破解bilibili滑块验证码

基本逻辑:

logging -----------------------》get_image(下载带缺口的图片和不带缺口的图片) ----------------------》 recover_image

                                                                                                                                                            |

                                                                                                                                                            |

                                        simulate_drag《---------------get_track 《-------------get_diff_x(比对x轴RGB差值获取缺口x坐标)

话不多说,直接贴代码

#!/usr/bin/python3
# -*- coding: utf-8 -*-

"""
author NJ

破解 哔哩哔哩 滑块验证码
"""
import os
import random
import urllib.request

import requests
import logging
import time
import re
from selenium import webdriver
from PIL import Image
from io import BytesIO
from selenium.webdriver.common.action_chains import ActionChains


class Bilibili(object):
    js = """var keys=document.cookie.match(/[^ =;]+(?=\=)/g);
            if (keys) {
            for (var i = keys.length; i--;)
            document.cookie=keys[i]+'=0;expires=' + new Date( 0).toUTCString()
            } """

    def __init__(self):
        super(Bilibili, self).__init__()
        self.browser = webdriver.Chrome()
        self.browser.set_page_load_timeout(20)   #设定页面加载时间
        self.browser.implicitly_wait(10)        #隐式等待

    def __del__(self):
        """析构函数"""
        if self.browser is not  None:
            self.browser.quit()

    def get_image(self,class_name):
        """
        从网页中下载打乱的图片
        :param class_name: 传进去的包含图片地址的classname
        :return: 返回拼凑好的完整图片
        """
        image_list = self.browser.find_elements_by_class_name(class_name)
        if len(image_list) == 0:
            print('未抓取到图片的classname')
        else:
            image_style = image_list[0].get_attribute('style')
            image_url_position = re.findall('background-image: url\("(.*?)"\); background-position: (.*)px (.*)px;',image_style)
            if image_url_position:
                print(image_url_position[0])
                image_url = image_url_position[0][0].replace('webp','jpg')
                image_name = image_url_position[0][0].split('/')[-1].replace('webp','jpg')
                location_list = []
                for image in image_list:
                    location_dict = {}
                    x_y = re.findall('background-image: url\("(.*?)"\); background-position: (.*)px (.*)px;',image.get_attribute('style'))
                    location_dict['x'] = int(x_y[0][1])
                    location_dict['y'] = int(x_y[0][-1])
                    location_list.append(location_dict)
                print(image_url)
                response = requests.get(image_url)  # 直接将图片保存到本地,然后从本地读取图片
                # print(response.content)
                image = Image.open(BytesIO(response.content))
                if not os.path.isdir(os.path.join(os.path.dirname(__file__), 'image')):
                    os.mkdir('image')
                with open(os.path.join('image', image_name), 'wb') as fp:
                    fp.write(response.content)
                resotre_image = self.recover_image(image, location_list)
                return resotre_image

    def recover_image(self, image, location_list):
        """
        将下载后的被打乱的图片还原
        :param image: 下载的乱图
        :param location_list: 偏移量
        :return: 修复之后的图片
        """
        new_im = Image.new('RGB', (260,116))
        im_upper_list = []
        im_down_list = []
        for location in location_list:
            if location['y'] == -58:
                im_upper_list.append(image.crop((abs(location['x']), 58, abs(location['x'])+10, 116)))
            elif location['y'] == 0:
                im_down_list.append(image.crop((abs(location['x']), 0, abs(location['x'])+10, 58)))
        x_offset = 0
        for upper in im_upper_list:
            new_im.paste(upper, (x_offset, 0))
            x_offset+= upper.size[0]
        x_offset = 0
        for down in im_down_list:
            new_im.paste(down, (x_offset, 58))
            x_offset+=down.size[0]
        print('===',new_im)
        return new_im

    def __is_similar(self,full_image, bg_image, x,y):
        """
        判断两张图片的[x,y]这一像素是否相似,如果像素间差值在50以内则认为相似,
        :param full_image:  全图
        :param bg_image: 带缺口的图
        :param x: x轴坐标
        :param y: y轴坐标
        :return: blloean
        """
        pixel_full = full_image.getpixel((x,y))
        pixel_bg = bg_image.getpixel((x,y))
        for i in range(0,3):
            if abs(pixel_full[i] - pixel_bg[i]) >=50:
                return  False
        return True

    def get_diff_x(self,full_image, bg_image):
        """
        计算两张图片缺口的位置(x轴)
        通过比对每个像素点的RGB的值来确定缺口的位置,如果RGB差值大于50,则认为找到了缺口的位置
        :param full_image: 完整的图片
        :param bg_image: 带缺口的图片
        :return:缺口处x轴坐标
        """
        for x in range(0,260):
            for y in range(0,116):
                if not self.__is_similar(full_image, bg_image, x, y):
                    return  x

    def get_track(self,diff_x):
        """
        每次移动轨迹, 随机数字
        :param diff_x:
        :return: 轨迹列表
        """
        print('diff_x',diff_x)
        track = []
        length = diff_x - 6
        x  = random.randint(1,5)
        while length -x >4:
            track.append([x,0,0])
            length = length - x
            x= random.randint(1,15)
        for i in range(length):
            if diff_x > 47:
                track.append([1,0,random.randint(10,12)/100.0])
            else:
                track.append([1,0,random.randint(13,14)/100.0])
        return  track

    def simulate_drag(self,track):
        """
        开始点击滑块,移动滑块
        :param track:
        :return:移动完滑块的输出值
        """
        gt_slider = self.browser.find_element_by_xpath('//*[@class="gt_slider"]/div[2]')
        ActionChains(self.browser).click_and_hold(on_element=gt_slider).perform()
        print(track)
        for x,y,z in track:
            ActionChains(self.browser).move_to_element_with_offset(to_element=gt_slider,xoffset=x+22,yoffset=y+22).perform()  #加22 的原因是因为滑块的尺寸是(44,44),而ActionChains的作用是将滑块移动到距离滑块左上角(x,y)的位置上去,而滑块x的实际滑的距离是其中心到缺口的位置。
            time.sleep(z)
        time.sleep(0.9)
        ActionChains(self.browser).release(on_element=gt_slider).perform()
        time.sleep(1)
        return_text = self.browser.find_element_by_class_name('gt_info_type')
        return return_text.text
        # return 'ok'

    def logging(self, username, passwd):
        self.browser.get('https://passport.bilibili.com/login')
        username_id = self.browser.find_element_by_id('login-username')
        passwd_id = self.browser.find_element_by_id('login-passwd')
        # login_but = self.browser.find_element_by_class_name('btn btn-login')
        username_id.send_keys(username)
        passwd_id.send_keys(passwd)

        flag_success = False
        while not flag_success:
            #下载完整图片
            img_full_bg = self.get_image('gt_cut_fullbg_slice')
            #下载有缺口图片
            img_bg = self.get_image('gt_cut_bg_slice')
            diff_x = self.get_diff_x(img_full_bg, img_bg)
            track = self.get_track(diff_x)
            result = self.simulate_drag(track)

            print(result)
            if u'验证通过' in result:
                flag_success = True
            elif u'出现错误' in result:
                self.browser.execute_script('location.reload()')
            elif u'再' in result:
                time.sleep(4)
                continue
            elif u'吃' in result:
                time.sleep(5)
            elif u'失败' in result:
                self.browser.execute_script('location.reload()')
            else:
                break
            if flag_success:
                time.sleep(random.uniform(1.5,2))
                self.browser.execute_script(self.js)

if __name__ == '__main__':
    headers = {
        'Host':'passport.bilibili.com',
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:63.0) Gecko/20100101 Firefox/63.0',
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Encoding':'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
        'Accept-Encoding':'gzip, deflate, br',
        'Connection':'keep-alive',
        'Cookie':'sid=hxw2364w;finger=964b42c0;buvid3=D4B40754-E3C1-42C5-B638-FF21CD209B9584677infoc',
        'Upgrade-Insecure-Requests': '1',
        'Cache-Control':'max-age=0'
    }
    bilibili = Bilibili()
    bilibili.logging(username, passwd)

 

你可能感兴趣的:(爬虫)