通过selenium登陆,并完成滑动验证码验证来获取登陆cookie

在爬取网站数据时,需要通过登陆账户来获取cookie里的参数,才能向网站请求成功。
通过selenium登陆,并完成滑动验证码验证来获取登陆cookie_第1张图片
参数access_token和salt 通过登陆账户生成的cookie中携带
因此 首先通过selenium登陆账户
通过selenium登陆,并完成滑动验证码验证来获取登陆cookie_第2张图片
首先打开网页,定位用户名输入框,和密码输入框,输入后 点击登陆 弹出验证码
代码如下:

    def __init__(self,u,p):
        self.u = u
        self.p = p
        self.LoginClass = 'login'
        # self.proxy = proxy
        proxy = random_proxy()
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--proxy-server=%s' % proxy)
        # self.browser = webdriver.Chrome()
        self.browser = webdriver.Chrome(chrome_options=chrome_options)
        self.wait = WebDriverWait(self.browser, 80)
        self.browser.set_page_load_timeout(40)
        self.threshold = 60  # 验证码图片对比中RGB的差值,可调
        self.left = 50  # 验证码图片的对比中的起始坐标,即拖动模块的右边线位置
        self.BORDER = 6
        self.page_count = []
        self.url = 'https://www.shujuling.com/login/logining'

    def close(self):
        self.browser.close()

    def open(self):
        """
        # 打开浏览器,并输入账户和密码
        """
        self.browser.maximize_window()
        self.browser.get(self.url)


    def login_web(self):
        user = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "input[name='loginName']")))
        password = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "input[name='loginPwd']")))
        user.send_keys(self.u)
        password.send_keys(self.p)
        self.login_in()

设置为无头模式,输入后点击弹出验证码
通过selenium登陆,并完成滑动验证码验证来获取登陆cookie_第3张图片
验证码为极验滑动验证码 ,通过js加载出验证码图片

1,获取完整验证码和有缺口的验证码图片

    def get_images(self):
        """
        获取验证码图片
        :return: 图片的location信息
        """
        """
                从网页的网站截图中,截取验证码图片
                :return: 验证码图片对象
                """
        times = random.uniform(3, 5)
        times = round(times, 1)
        time.sleep(times)
        bg_js = 'return document.getElementsByClassName("geetest_canvas_bg geetest_absolute")[0].toDataURL("image/png");'
        fullbg_js = 'return document.getElementsByClassName("geetest_canvas_fullbg geetest_fade geetest_absolute")[0].toDataURL("image/png");'
        # slice 执行 JS 代码并拿到图片 base64 数据
        bg_info = self.browser.execute_script(bg_js)  # 执行js文件得到带图片信息的图片数据
        bg_base64 = bg_info.split(',')[1]  # 拿到base64编码的图片信息
        bg_bytes = base64.b64decode(bg_base64)  # 转为bytes类型
        with open('bg.png', 'wb') as f:  # 保存图片到本地
            f.write(bg_bytes)

        fullbg_info = self.browser.execute_script(fullbg_js)  # 执行js文件得到带图片信息的图片数据
        fullbg_base64 = fullbg_info.split(',')[1]  # 拿到base64编码的图片信息
        fullbg_bytes = base64.b64decode(fullbg_base64)  # 转为bytes类型
        with open('fullbg.png', 'wb') as f:  # 保存图片到本地
            f.write(fullbg_bytes)
        bg_image = Image.open('bg.png')
        fullbg_image = Image.open('fullbg.png')
        return bg_image, fullbg_image

2,通过比对两张验证码图片计算出缺口位置

    def get_distance(self, image1, image2):
        """
        拿到滑动验证码需要移动的距离
        :param image1: 没有缺口的图片对象
        :param image2: 带缺口的图片对象
        :return: 需要移动的距离
        """
        i = 0
        for i in range(self.left, image1.size[0]):
            for j in range(image1.size[1]):
                rgb1 = image1.load()[i, j]
                rgb2 = image2.load()[i, j]
                res1 = abs(rgb1[0] - rgb2[0])
                res2 = abs(rgb1[1] - rgb2[1])
                res3 = abs(rgb1[2] - rgb2[2])
                if not (res1 < self.threshold and res2 < self.threshold and res3 < self.threshold):
                    return i - self.BORDER  # 误差矫正
        logging.debug('未识别出验证码中的不同位置,或图片定位出现异常')
        return i

3,根据缺口位置模拟计算出滑块滑动轨迹

    def get_tracks(self, distance):
        track = []
        current = 0
        mid = distance * 0.8
        jiansu = distance - mid  # 需要减速的距离
        # 计算间隔
        t = 0.2
        # 初速度
        v = 0
        while current < distance:
            if current < mid:
                # 设置加速度动态变化
                # Chrome 浏览器的加速度
                ap = random.uniform(3, 5)
                times = round(ap, 2)
                a = times
                # 初速度v0
                v0 = v
                v = v0 + a * t
                move = v0 * t + 1 / 2 * a * t * t
                # 当前位移
                current += move
                # 加入轨迹
                track.append(round(move))
            else:
                a = -1 * (v * v) / (2 * jiansu)
                v0 = v
                v = v0 + a * t
                move = v0 * t + 1 / 2 * a * t * t - 1
                current += move
                track.append(round(move))
        return track

4,定位到滑块按钮

    def get_slider(self):
        """
        获取滑块
        :return: 滑块对象
        """
        slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_slider_button')))
        return slider

5,滑动滑块,完成验证码验证

    def move_to_gap(self, slider, track):
        """
        拖动滑块到缺口处
        :param slider: 滑块
        :param track: 轨迹
        :return:
        """
        ActionChains(self.browser).click_and_hold(slider).perform()
        for x in track:
            ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
        time.sleep(0.5)
        ActionChains(self.browser).release().perform()

6,验证码滑动成功
获取登陆cookie中的两个参数

ActionChains(self.browser).move_by_offset(xoffset=-3, yoffset=0).perform()
ActionChains(self.browser).move_by_offset(xoffset=3, yoffset=0).perform()
time.sleep(0.21)
ActionChains(self.browser).release().perform()
times = random.uniform(2, 4)
times = round(times, 1)
time.sleep(times)
access_token = self.browser.get_cookie('access_token')['value']
salt = self.browser.get_cookie('salt')['value']
print(access_token,salt)

附完整代码:

#!/usr/bin/env python
# __*__ coding: utf-8 __*__
"""
__author__: Nine
@file: login.py
@time: 2019/8/15 13:11
@func:通过登陆,滑动验证码来获取cookie中的参数 access_token 和 salt
"""
import json
import logging
from PIL import Image
from bs4 import BeautifulSoup
import PIL.Image as image
import random, base64, re
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
from unit.get_proxy import random_proxy

class Loginweb(object):

    def __init__(self,u,p):
        self.u = u
        self.p = p
        self.LoginClass = 'login'
        # self.proxy = proxy
        proxy = random_proxy()
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--proxy-server=%s' % proxy)
        # self.browser = webdriver.Chrome()
        self.browser = webdriver.Chrome(chrome_options=chrome_options)
        self.wait = WebDriverWait(self.browser, 80)
        self.browser.set_page_load_timeout(40)
        self.threshold = 60  # 验证码图片对比中RGB的差值,可调
        self.left = 50  # 验证码图片的对比中的起始坐标,即拖动模块的右边线位置
        self.BORDER = 6
        self.page_count = []
        self.url = 'https://www.shujuling.com/login/logining'

    def close(self):
        self.browser.close()

    def open(self):
        """
        # 打开浏览器,并输入账户和密码
        """
        self.browser.maximize_window()
        self.browser.get(self.url)


    def login_web(self):
        user = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "input[name='loginName']")))
        password = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "input[name='loginPwd']")))
        user.send_keys(self.u)
        password.send_keys(self.p)
        self.login_in()

    def login_in(self):
        button = self.wait.until(EC.presence_of_element_located((By.ID, "embed-submit")))
        button.click()

    def get_images(self):
        """
        获取验证码图片
        :return: 图片的location信息
        """
        """
                从网页的网站截图中,截取验证码图片
                :return: 验证码图片对象
                """
        times = random.uniform(3, 5)
        times = round(times, 1)
        time.sleep(times)
        bg_js = 'return document.getElementsByClassName("geetest_canvas_bg geetest_absolute")[0].toDataURL("image/png");'
        fullbg_js = 'return document.getElementsByClassName("geetest_canvas_fullbg geetest_fade geetest_absolute")[0].toDataURL("image/png");'
        # slice 执行 JS 代码并拿到图片 base64 数据
        bg_info = self.browser.execute_script(bg_js)  # 执行js文件得到带图片信息的图片数据
        bg_base64 = bg_info.split(',')[1]  # 拿到base64编码的图片信息
        bg_bytes = base64.b64decode(bg_base64)  # 转为bytes类型
        with open('bg.png', 'wb') as f:  # 保存图片到本地
            f.write(bg_bytes)

        fullbg_info = self.browser.execute_script(fullbg_js)  # 执行js文件得到带图片信息的图片数据
        fullbg_base64 = fullbg_info.split(',')[1]  # 拿到base64编码的图片信息
        fullbg_bytes = base64.b64decode(fullbg_base64)  # 转为bytes类型
        with open('fullbg.png', 'wb') as f:  # 保存图片到本地
            f.write(fullbg_bytes)
        bg_image = Image.open('bg.png')
        fullbg_image = Image.open('fullbg.png')
        return bg_image, fullbg_image

    def get_merge_image(self, filename, location_list):
        """
        根据位置对图片进行合并还原
        :filename:图片
        :location_list:图片位置
        """
        im = image.open(filename)
        new_im = image.new('RGB', (260, 116))
        im_list_upper = []
        im_list_down = []

        for location in location_list:
            if location['y'] == -58:
                im_list_upper.append(im.crop((abs(location['x']), 58, abs(location['x']) + 10, 166)))
            if location['y'] == 0:
                im_list_down.append(im.crop((abs(location['x']), 0, abs(location['x']) + 10, 58)))

        new_im = image.new('RGB', (260, 116))

        x_offset = 0
        for im in im_list_upper:
            new_im.paste(im, (x_offset, 0))
            x_offset += im.size[0]

        x_offset = 0
        for im in im_list_down:
            new_im.paste(im, (x_offset, 58))
            x_offset += im.size[0]

        new_im.save(filename)

        return new_im

    def get_merge_image(self, filename, location_list):
        """
        根据位置对图片进行合并还原
        :filename:图片
        :location_list:图片位置
        """
        im = image.open(filename)
        new_im = image.new('RGB', (260, 116))
        im_list_upper = []
        im_list_down = []

        for location in location_list:
            if location['y'] == -58:
                im_list_upper.append(im.crop((abs(location['x']), 58, abs(location['x']) + 10, 166)))
            if location['y'] == 0:
                im_list_down.append(im.crop((abs(location['x']), 0, abs(location['x']) + 10, 58)))

        new_im = image.new('RGB', (260, 116))

        x_offset = 0
        for im in im_list_upper:
            new_im.paste(im, (x_offset, 0))
            x_offset += im.size[0]

        x_offset = 0
        for im in im_list_down:
            new_im.paste(im, (x_offset, 58))
            x_offset += im.size[0]

        new_im.save(filename)

        return new_im

    def is_pixel_equal(self, img1, img2, x, y):
        """
        判断两个像素是否相同
        :param image1: 图片1
        :param image2: 图片2
        :param x: 位置x
        :param y: 位置y
        :return: 像素是否相同
        """
        # 取两个图片的像素点
        pix1 = img1.load()[x, y]
        pix2 = img2.load()[x, y]
        threshold = 80
        if (abs(pix1[0] - pix2[0] < threshold) and abs(pix1[1] - pix2[1] < threshold) and abs(
                pix1[2] - pix2[2] < threshold)):
            return True
        else:
            return False

    def get_gap(self, img1, img2):
        """
        获取缺口偏移量
        :param img1: 不带缺口图片
        :param img2: 带缺口图片
        :return:
        """
        left = 43
        for i in range(left, img1.size[0]):
            for j in range(img1.size[1]):
                if not self.is_pixel_equal(img1, img2, i, j):
                    left = i
                    return left
        return left

    def get_track(self, distance):
        """
        :param distance:
        :return: 滑动轨迹
        """
        track = []
        current = 0
        mid = int(distance * round(random.uniform(0.6, 0.7), 2))
        jiansu = distance - mid  # 需要减速的距离
        # 计算间隔
        t = 0.2
        # 初速度
        v = 0
        while current < distance:
            if current < mid:
                # 设置加速度动态变化
                # Chrome 浏览器的加速度
                ap = random.uniform(3, 5)
                times = round(ap, 2)
                a = times
                # 初速度v0
                v0 = v
                v = v0 + a * t
                move = v0 * t + 1 / 2 * a * t * t + 0.5
                # 当前位移
                current += move
                # 加入轨迹
                track.append(round(move))
            else:
                a = -1 * (v * v) / (2 * jiansu)
                v0 = v
                v = v0 + a * t
                if distance > 120:
                    move = v0 * t + 1 / 2 * a * t * t  - 1.5
                elif distance <= 120 and distance >= 60:
                    move = v0 * t + 1 / 2 * a * t * t - 1
                else:
                    move = v0 * t + 1 / 2 * a * t * t - 0.5
                if move < 1:
                    move = 1
                current += move
                track.append(round(move))
        return track

    def get_slider(self):
        """
        获取滑块
        :return: 滑块对象
        """
        slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_slider_button')))
        return slider

    def move_to_gap(self, slider, track):
        """
        拖动滑块到缺口处
        :param slider: 滑块
        :param track: 轨迹
        :return:
        """
        ActionChains(self.browser).click_and_hold(slider).perform()
        for x in track:
            ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
        time.sleep(0.5)
        ActionChains(self.browser).release().perform()

    def check_html(self,html):
        """
        :return: 检测html是否滑动成功
        """
        if '数据翎' in html:
            return True
        else:
            return False

    def get_distance(self, image1, image2):
        """
        拿到滑动验证码需要移动的距离
        :param image1: 没有缺口的图片对象
        :param image2: 带缺口的图片对象
        :return: 需要移动的距离
        """
        i = 0
        for i in range(self.left, image1.size[0]):
            for j in range(image1.size[1]):
                rgb1 = image1.load()[i, j]
                rgb2 = image2.load()[i, j]
                res1 = abs(rgb1[0] - rgb2[0])
                res2 = abs(rgb1[1] - rgb2[1])
                res3 = abs(rgb1[2] - rgb2[2])
                if not (res1 < self.threshold and res2 < self.threshold and res3 < self.threshold):
                    return i - self.BORDER  # 误差矫正
        logging.debug('未识别出验证码中的不同位置,或图片定位出现异常')
        return i

    def get_tracks(self, distance):
        track = []
        current = 0
        mid = distance * 0.8
        jiansu = distance - mid  # 需要减速的距离
        # 计算间隔
        t = 0.2
        # 初速度
        v = 0
        while current < distance:
            if current < mid:
                # 设置加速度动态变化
                # Chrome 浏览器的加速度
                ap = random.uniform(3, 5)
                times = round(ap, 2)
                a = times
                # 初速度v0
                v0 = v
                v = v0 + a * t
                move = v0 * t + 1 / 2 * a * t * t
                # 当前位移
                current += move
                # 加入轨迹
                track.append(round(move))
            else:
                a = -1 * (v * v) / (2 * jiansu)
                v0 = v
                v = v0 + a * t
                move = v0 * t + 1 / 2 * a * t * t - 1
                current += move
                track.append(round(move))
        return track

    def crack(self):
        """
        程序运行流程。。。
        :return:
        """
        self.open()
        time.sleep(5)
        self.login_web()
        # 获取图片
        image1, image2 = self.get_images()
        distance = self.get_distance(image1, image2)
        track = self.get_tracks(distance)
        # 步骤五:选中按钮
        slider = self.get_slider()
        # 步骤六:拖动滑块到缺口处
        self.move_to_gap(slider, track)
        ActionChains(self.browser).move_by_offset(xoffset=-3, yoffset=0).perform()
        ActionChains(self.browser).move_by_offset(xoffset=3, yoffset=0).perform()
        time.sleep(0.21)
        ActionChains(self.browser).release().perform()
        times = random.uniform(2, 4)
        times = round(times, 1)
        time.sleep(times)
        html = self.browser.page_source
        res = self.check_html(html)
        print("滑动是否成功:",res)
        if res == True:
            access_token = self.browser.get_cookie('access_token')['value']
            salt = self.browser.get_cookie('salt')['value']
            dit = {"access_token":'Bearer ' + access_token,"salt":salt}
            str = json.dumps(dit)
            self.close()
            return str
        else:
            self.close()
            return None

if __name__ == '__main__':
    #用户名
    u = '----'
    #密码
    p = '-----'
    crack = Loginweb(u, p)
    str = crack.crack()
    print(str)

你可能感兴趣的:(通过selenium登陆,并完成滑动验证码验证来获取登陆cookie)