在当今互联网环境中,滑块验证已成为阻挡自动化程序的主要防线之一。本文将通过Python+dddocr实现一套完整的自动化解决方案,突破多缺口滑块验证,内容涵盖技术原理、实现细节和实战技巧。
多缺口滑块验证是传统滑块验证的升级版,通过设置多个干扰项增加识别难度:
技术难点分析:
pip install dddocr # 安装命令
特性优势:
pip install selenium
配套组件:
import time
import dddocr
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
# 初始化浏览器
def init_browser():
options = webdriver.ChromeOptions()
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
return driver
def detect_gaps(bg_path, gap_path):
ocr = dddocr.DdddOcr(show_ad=False)
# 读取背景图和滑块图
with open(bg_path, 'rb') as f:
target_bytes = f.read()
with open(gap_path, 'rb') as f:
background_bytes = f.read()
# 识别多个缺口位置
results = ocr.slide_match(target_bytes, background_bytes, simple_target=True)
# 过滤低置信度结果
valid_gaps = [res for res in results if res['confidence'] > 0.7]
# 按位置排序并选择最可能的缺口
if valid_gaps:
# 选择X坐标最小的缺口(通常是最左侧的正确缺口)
target_gap = min(valid_gaps, key=lambda x: x['target'][0])
return target_gap
return None
def generate_move_track(distance):
"""生成拟人化移动轨迹"""
track = []
current = 0
mid = distance * 0.8
t = 0.2
v = 0
while current < distance:
if current < mid:
a = 2 # 加速阶段
else:
a = -3 # 减速阶段
v0 = v
v = v0 + a * t
move = v0 * t + 0.5 * a * t * t
current += move
track.append(round(move))
# 微调确保准确到达
overshoot = current - distance
if overshoot > 0:
track.append(-round(overshoot))
return track
def execute_slide_verification(driver, gap_element, track):
"""执行滑块拖动操作"""
action = ActionChains(driver)
action.click_and_hold(gap_element).perform()
# 分段移动模拟人工操作
for move in track:
action.move_by_offset(move, random.randint(-2, 2)).perform()
time.sleep(random.uniform(0.01, 0.05))
# 添加随机抖动
action.move_by_offset(random.randint(-3, 3), random.randint(-3, 3)).perform()
time.sleep(0.1)
action.release().perform()
def filter_valid_gaps(results, bg_width):
"""多维度过滤有效缺口"""
valid_gaps = []
for res in results:
x, y = res['target'][0], res['target'][1]
# 1. 置信度过滤
if res['confidence'] < 0.65:
continue
# 2. 边界过滤(排除边缘10%区域)
if x < bg_width * 0.1 or x > bg_width * 0.9:
continue
# 3. 缺口尺寸过滤
gap_width = res['target'][2] - res['target'][0]
if not (40 <= gap_width <= 70): # 典型缺口宽度范围
continue
# 4. 与其他缺口距离过滤
if any(abs(x - gap['target'][0]) < 20 for gap in valid_gaps):
continue
valid_gaps.append(res)
return valid_gaps
def select_most_likely_gap(gaps, previous_gaps=[]):
"""基于历史记录选择最可能缺口"""
if not gaps:
return None
# 优先选择水平位置最近的缺口
if previous_gaps:
last_x = previous_gaps[-1]['target'][0]
return min(gaps, key=lambda x: abs(x['target'][0] - last_x))
# 首次选择最左侧缺口(80%情况下正确)
return min(gaps, key=lambda x: x['target'][0])
def solve_slide_captcha(driver, max_retry=3):
"""处理滑块验证主函数"""
retry_count = 0
while retry_count < max_retry:
try:
# 1. 定位元素
bg_element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "captcha-bg"))
)
gap_element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "captcha-gap"))
)
# 2. 保存验证图片
bg_location = bg_element.location
bg_size = bg_element.size
driver.save_screenshot("screenshot.png")
# 3. 裁剪背景图和滑块图
crop_image("screenshot.png", "bg.png",
bg_location['x'], bg_location['y'],
bg_size['width'], bg_size['height'])
# 4. 缺口识别
target_gap = detect_gaps("bg.png", "gap_template.png")
if not target_gap:
raise Exception("No valid gap detected")
# 5. 计算移动距离(考虑缩放比例)
scale_ratio = bg_size['width'] / target_gap['background'][0]
move_distance = target_gap['target'][0] * scale_ratio - 25
# 6. 生成移动轨迹
track = generate_move_track(move_distance)
# 7. 执行滑块操作
execute_slide_verification(driver, gap_element, track)
# 8. 验证结果检查
time.sleep(2)
if "验证成功" in driver.page_source:
return True
except Exception as e:
print(f"Attempt {retry_count+1} failed: {str(e)}")
retry_count += 1
# 点击刷新按钮
driver.find_element(By.CLASS_NAME, "captcha-refresh").click()
time.sleep(1)
return False
def mask_browser_fingerprint(driver):
# 修改WebGL供应商
driver.execute_script(
"const getParameter = WebGLRenderingContext.getParameter;"
"WebGLRenderingContext.prototype.getParameter = function(parameter) {"
" if (parameter === 37445) return 'Intel Inc.';"
" if (parameter === 37446) return 'Intel Iris OpenGL Engine';"
" return getParameter(parameter);"
"};"
)
# 修改屏幕分辨率
driver.execute_script(
"Object.defineProperty(screen, 'width', {get: () => 1920});"
"Object.defineProperty(screen, 'height', {get: () => 1080});"
)
# 修改时区
driver.execute_script(
"Object.defineProperty(Intl.DateTimeFormat.prototype, 'resolvedOptions', {"
" value: function() {"
" return { timeZone: 'Asia/Shanghai' };"
" }"
"});"
)
def human_like_mouse_movement(driver, element):
"""模拟人类鼠标移动路径"""
action = ActionChains(driver)
location = element.location_once_scrolled_into_view
# 生成随机起始点
start_x = random.randint(100, 300)
start_y = random.randint(300, 500)
# 移动到元素上方随机点
action.move_by_offset(start_x, start_y).perform()
time.sleep(random.uniform(0.2, 0.5))
# 随机曲线路径
points = generate_bezier_curve(
start_x, start_y,
location['x'] + 10, location['y'] + 10,
points=30
)
for point in points:
action.move_by_offset(point[0], point[1]).perform()
time.sleep(random.uniform(0.01, 0.03))
def universal_captcha_solver(driver):
"""通用验证码处理框架"""
captcha_type = identify_captcha_type(driver)
if captcha_type == "SLIDE":
return solve_slide_captcha(driver)
elif captcha_type == "TEXT":
return solve_text_captcha(driver)
elif captcha_type == "CLICK":
return solve_click_captcha(driver)
else:
raise Exception("Unsupported captcha type")
问题1:缺口识别错误率高
# 图像增强预处理
def enhance_image(image_path):
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.GaussianBlur(img, (3, 3), 0)
img = cv2.Canny(img, 100, 200)
cv2.imwrite("enhanced.png", img)
return "enhanced.png"
问题2:被检测为自动化程序
# 使用undetected_chromedriver
import undetected_chromedriver as uc
driver = uc.Chrome(version_main=105)
driver.get('https://target-website.com')
问题3:移动轨迹被识别
# 添加人类特征抖动
def humanize_track(track):
# 随机插入微停顿
for _ in range(3):
pos = random.randint(5, len(track)-5)
track.insert(pos, 0)
# 添加垂直抖动
return [(x, random.randint(-1, 1)) for x in track]
def collect_training_data():
"""收集失败案例用于模型改进"""
if not solve_slide_captcha(driver):
save_failed_case(bg_image, gap_image, gap_position)
# 定期使用新数据训练模型
retrain_dddocr_model()
def identify_captcha_type(driver):
"""自动识别验证码类型"""
if driver.find_elements(By.CLASS_NAME, "slider-captcha"):
return "SLIDE"
elif driver.find_elements(By.ID, "captcha-image"):
return "TEXT"
elif driver.find_elements(By.CLASS_NAME, "point-captcha"):
return "CLICK"
else:
return "UNKNOWN"
重要注意事项:
合法使用场景:
随着AI对抗的升级,滑块验证技术也在持续进化:
本文实现的Python+dddocr解决方案,通过以下关键技术点突破多缺口验证:
终极建议:在实际应用中,建议结合代理IP池和硬件模拟技术,构建更健壮的自动化系统。同时持续关注dddocr的更新,其最新版本已支持更复杂的验证码类型识别。