侧边栏壁纸
博主头像
分享你我博主等级

行动起来,活在当下

  • 累计撰写 106 篇文章
  • 累计创建 13 个标签
  • 累计收到 0 条评论

目 录CONTENT

文章目录

python selenium 封装使用(处理反爬)

管理员
2022-03-17 / 0 评论 / 0 点赞 / 2 阅读 / 33788 字
# coding=utf-8
"""
此文件为selenium常用方法二次封装文件
"""
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains  # 处理鼠标事件
from selenium.webdriver.support.select import Select  # 用于处理下拉框
from selenium.common.exceptions import *  # 用于处理异常
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait  # 用于处理元素等待
from PIL import Image, ImageEnhance
import pytesseract, re
import time
from urllib.parse import urlparse
import sys
class BrowserHelper(object):
    """
    selenium框架的主要类
    """
    original_window = None
    def __init__(self, browser='chrome'):
        """
        运行初始化方法默认chrome,当然,你也可以传入一个其他浏览器名称,
        """
        if browser == "firefox" or browser == "ff":
            self.driver = webdriver.Firefox()
        elif browser == "chrome":
            options = Options();
            options.add_argument("--disable-blink-features=AutomationControlled");
            options.add_experimental_option("excludeSwitches", ["enable-automation"]);
            options.add_experimental_option('useAutomationExtension', False);
            self.driver = webdriver.Chrome(options=options)
        elif browser == "internet explorer" or browser == "ie":
            self.driver = webdriver.Ie()
        elif browser == "opera":
            self.driver = webdriver.Opera()
        elif browser == "chrome_headless":
            options = Options();
            options.add_argument('--headless');
            options.add_argument("--disable-blink-features=AutomationControlled");
            options.add_experimental_option("excludeSwitches", ["enable-automation"]);
            options.add_experimental_option('useAutomationExtension', False);
            self.driver = webdriver.Chrome(options=options)
        elif browser == 'edge':
            self.driver = webdriver.Edge()
        else:
            raise NameError(
                "找不到 %s 浏览器,你应该从这里面选取一个 'ie', 'ff', 'opera', 'edge', 'chrome' or 'chrome_headless'." % browser)
    def open(self, url, title='', timeout=10):
        u"""打开浏览器,并最大化,判断title是否为预期"""
        self.driver.get(url)
        self.driver.maximize_window()
        try:
            WebDriverWait(self.driver, timeout, 1).until(EC.title_contains(title))
        except TimeoutException:
            print("open %s title error" % url)
        except Exception as msg:
            print("Error:%s" % msg)
    def find_element(self, locator, timeout=10):
        u"""定位元素,参数locator为原则"""
        try:
            element = WebDriverWait(self.driver, timeout, 1).until(EC.presence_of_element_located(locator))
            return element
        except:
            print("%s 页面未找到元素 %s" % (self, locator))
    def find_elements(self, locator, timeout=10):
        u"""定位一组元素"""
        elements = WebDriverWait(self.driver, timeout, 1).until(EC.presence_of_all_elements_located(locator))
        return elements
    def element_wait(self, by, value, secs=5):
        """
        等待元素显示
        """
        try:
            if by == "id":
                WebDriverWait(self.driver, secs, 1).until(EC.presence_of_element_located((By.ID, value)))
            elif by == "name":
                WebDriverWait(self.driver, secs, 1).until(EC.presence_of_element_located((By.NAME, value)))
            elif by == "class":
                WebDriverWait(self.driver, secs, 1).until(EC.presence_of_element_located((By.CLASS_NAME, value)))
            elif by == "link_text":
                WebDriverWait(self.driver, secs, 1).until(EC.presence_of_element_located((By.LINK_TEXT, value)))
            elif by == "xpath":
                WebDriverWait(self.driver, secs, 1).until(EC.presence_of_element_located((By.XPATH, value)))
            elif by == "css":
                WebDriverWait(self.driver, secs, 1).until(EC.presence_of_element_located((By.CSS_SELECTOR, value)))
            else:
                raise NoSuchElementException(
                    "找不到元素,请检查语法或元素")
        except TimeoutException:
            print("查找元素超时请检查元素")
    def get_element(self, css):
        """
        判断元素定位方式,并返回元素
        """
        if "=>" not in css:
            by = "css"  # 如果是css的格式是#aaa,所以在此加入判断如果不包含=>就默认是css传给上面的element_wait判断元素是否存在
            value = css
            # wait element.
            self.element_wait(by, css)
        else:
            by = css.split("=>")[0]
            value = css.split("=>")[1]
            if by == "" or value == "":
                raise NameError(
                    "语法错误,参考: 'id=>kw 或 xpath=>//*[@id='kw'].")
            self.element_wait(by, value)
        if by == "id":
            element = self.driver.find_element(By.ID, value)
        elif by == "name":
            element = self.driver.find_element(By.NAME, value)
        elif by == "class":
            element = self.driver.find_element(By.CLASS_NAME, value)
        elif by == "link_text":
            element = self.driver.find_element(By.LINK_TEXT, value)
        elif by == "xpath":
            element = self.driver.find_element(By.XPATH, value)  # 如果是xpath要以此格式传入xpath=>//*[@id='su']
        elif by == "css":
            element = self.driver.find_element(By.CSS_SELECTOR, value)
        else:
            raise NameError(
                "Please enter the correct targeting elements,'id','name','class','link_text','xpath','css'.")
        return element
    def get_elements(self, css):
        """
        判断元素定位方式,并返回元素
        """
        if "=>" not in css:
            by = "css"  # 如果是css的格式是#aaa,所以在此加入判断如果不包含=>就默认是css传给上面的element_wait判断元素是否存在
            value = css
            # wait element.
            self.element_wait(by, css)
        else:
            by = css.split("=>")[0]
            value = css.split("=>")[1]
            if by == "" or value == "":
                raise NameError(
                    "语法错误,参考: 'id=>kw 或 xpath=>//*[@id='kw'].")
            self.element_wait(by, value)
        if by == "id":
            element = self.driver.find_elements(By.ID, value)
        elif by == "name":
            element = self.driver.find_elements(By.NAME, value)
        elif by == "class":
            element = self.driver.find_elements(By.CLASS_NAME, value)
        elif by == "link_text":
            element = self.driver.find_elements(By.LINK_TEXT, value)
        elif by == "xpath":
            element = self.driver.find_elements(By.XPATH, value)  # 如果是xpath要以此格式传入xpath=>//*[@id='su']
        elif by == "css":
            element = self.driver.find_elements(By.CSS_SELECTOR, value)
        else:
            raise NameError(
                "Please enter the correct targeting elements,'id','name','class','link_text','xpath','css'.")
        return element
    def click(self, locator):
        u"""封装点击操作"""
        element = self.find_element(locator)
        element.click()
    def send_key(self, locator, text):
        u"""发送文本后清除内容"""
        element = self.find_element(locator)
        element.clear()
        element.send_keys(text)
    def is_text_in_element(self, text, locator, timeout=10):
        u"""判断是否定位到元素"""
        try:
            result = WebDriverWait(self.driver, timeout, 1).until(EC.text_to_be_present_in_element(locator, text))
        except TimeoutException:
            print
            u"元素未定位到:" + str(locator)
            return False
        else:
            return result
    def is_title(self, title, timeout=10):
        u"""判断title完全相等"""
        result = WebDriverWait(self.driver, timeout, 1).until(EC.title_is(title))
        return result
    def is_title_contains(self, title, timeout=10):
        u"""判断是否包含title"""
        result = WebDriverWait(self.driver, timeout, 1).until(EC.title_contains(title))
        return result
    def is_select(self, locator, timeout=10):
        u"""判断元素是否被选中"""
        result = WebDriverWait(self.driver, timeout, 1).until(EC.element_located_to_be_selected(locator))
        return result
    def is_select_be(self, locator, timeout=10, selected=True):
        u"""判断元素的状态"""
        return WebDriverWait(self.driver, timeout, 1).until(EC.element_located_selection_state_to_be(locator, selected))
    def is_alert_present(self, timeout=10):
        u"""判断页面有无alert弹出框,有alert返回alert,无alert返回FALSE"""
        try:
            return WebDriverWait(self.driver, timeout, 1).until(EC.alert_is_present())
        except:
            print
            "No Alert Present"
    def is_visibility(self, locator, timeout=10):
        u"""判断元素是否可见,可见返回本身,不可见返回FALSE"""
        return WebDriverWait(self.driver, timeout, 1).until(EC.visibility_of_element_located(locator))
    def is_invisibility(self, locator, timeout=10):
        u"""判断元素是否可见,不可见,未找到元素返回True"""
        return WebDriverWait(self.driver, timeout, 1).until(EC.invisibility_of_element_located(locator))
    def is_clickable(self, locator, timeout=10):
        u"""判断元素是否可以点击,可以点击返回本身,不可点击返回FALSE"""
        return WebDriverWait(self.driver, timeout, 1).until(EC.element_to_be_clickable(locator))
    def is_located(self, locator, timeout=10):
        u"""判断元素是否定位到(元素不一定是可见),如果定位到返回Element,未定位到返回FALSE"""
        return WebDriverWait(self.driver, timeout, 1).until(EC.presence_of_element_located(locator))
    def move_is_element(self, locator):
        u"""鼠标悬停操作"""
        element = self.find_element(locator)
        ActionChains(self.driver).move_to_element(element).perform()
    def back(self):
        u"""返回到旧的窗口"""
        self.driver.back()
    def forward(self):
        u"""前进到新窗口"""
        self.driver.forward()
    def close(self):
        u"""关闭窗口"""
        self.driver.close()
    def quit(self):
        u"""关闭driver和所有窗口"""
        self.driver.quit()
    def get_title(self):
        u"""获取当前窗口的title"""
        return self.driver.title
    def get_netloc(self):
        u"""获取当前窗口的title"""
        res = urlparse(self.driver.current_url)
        return res.scheme + "://" + res.netloc
    def is_netloc(self,url: str) -> bool:
        res = urlparse(url)
        if res.netloc == "":
            return True
        return False
    def get_current_url(self):
        u"""获取当前页面url"""
        return self.driver.current_url
    def get_text(self, locator):
        u"""获取文本内容"""
        return self.find_element(locator).text
    def get_browser_log_level(self):
        u"""获取浏览器错误日志级别"""
        lists = self.driver.get_log('browser')
        list_value = []
        if lists.__len__() != 0:
            for dicts in lists:
                for key, value in dicts.items():
                    list_value.append(value)
        if 'SEVERE' in list_value:
            return "SEVERE"
        elif 'WARNING' in list_value:
            return "WARNING"
        return "SUCCESS"
    def get_attribute(self, locator, name):
        u"""获取属性"""
        return self.find_element(locator).get_attribute(name)
    def js_execute(self, js):
        u"""执行js"""
        return self.driver.execute_script(js)
    def js_fours_element(self, locator):
        u"""聚焦元素"""
        element = self.find_element(locator)
        self.driver.execute_script("arguments[0].scrollIntoView();", element)
    def js_scroll_top(self):
        u"""滑动到页面顶部"""
        js = "window.scrollTo(0,0)"
        self.driver.execute_script(js)
    def js_scroll_end(self):
        u"""滑动到页面底部"""
        js = "window.scrollTo(0, document.body.scrollHeight)"
        self.driver.execute_script(js)
    def select_by_index(self, locator, index):
        u"""通过所有index,0开始,定位元素"""
        element = self.find_element(locator)
        Select(element).select_by_index(index)
    def select_by_value(self, locator, value):
        u"""通过value定位元素"""
        element = self.find_element(locator)
        Select(element).select_by_value(value)
    def select_by_text(self, locator, text):
        u"""通过text定位元素"""
        element = self.find_element(locator)
        Select(element).select_by_visible_text(text)
    def F5(self):
        """
        刷新当前页面.
        用法:
        driver.F5()
        """
        self.driver.refresh()
    def open_new_window(self, css):
        """
        打开新窗口并切换到新打开的窗口.
        用法:
        传入一个点击后会跳转的元素
        driver.open_new_window("link_text=>注册")
        """
        original_window = self.driver.current_window_handle
        el = self.get_element(css)
        el.click()
        all_handles = self.driver.window_handles
        for handle in all_handles:
            if handle != original_window:
                self.driver.switch_to.window(handle)
    def get_verify_code(self, locator):
        u"""获取图片验证码"""
        # 验证码图片保存地址
        screenImg = "D:/image/verifyCode.png"
        # 浏览器页面截图
        self.driver.get_screenshot_as_file(screenImg)
        # 定位验证码大小
        location = self.find_element(locator).location
        size = self.find_element(locator).size
        left = location['x']
        top = location['y']
        right = location['x'] + size['width']
        bottom = location['y'] + size['height']
        # 从文件读取截图,截取验证码位置再次保存
        img = Image.open(screenImg).crop((left, top, right, bottom))
        img.convert('L')  # 转换模式:L|RGB
        img = ImageEnhance.Contrast(img)  # 增加对比度
        img = img.enhance(2.0)  # 增加饱和度
        img.save(screenImg)
        # 再次读取验证码
        img = Image.open(screenImg)
        time.sleep(1)
        code = pytesseract.image_to_string(img)
        return code
    def open_tab(self):
        u"""打开新选项卡并切换到新选项卡"""
        self.driver.switch_to.new_window('tab')
    def open_window(self):
        u"""打开一个新窗口并切换到新窗口"""
        self.driver.switch_to.new_window('window')
    def switch_tab_or_window(self,original_window:any):
        u"""切换回旧选项卡或窗口"""
        self.driver.switch_to.window(original_window)
if __name__ == '__main__':
    driver = BrowserHelper("chrome");
    # 检查是否处理反爬
    driver.open("https://bot.sannysoft.com/");


反爬结果:

image.png

0

评论区