实现了自动识别形状验证码

This commit is contained in:
灼眼者 2024-06-01 23:24:48 +08:00
parent 6d70fe03a3
commit 84bb57e53e
8 changed files with 375 additions and 8 deletions

View File

@ -1,3 +1,55 @@
## 20240601 更新
### 更新内容
- 加入了自动验证形状码的方法
### 配置步骤
步骤较繁琐,我们需要拿到几个重要的坐标,请加油!!!
#### 1、需要获取以下三个坐标
```
# 截取文字区域的左上和右下坐标
small_img_top_left_x, small_img_top_left_y = 500, 534 # 左上角坐标
small_img_bottom_right_x, small_img_bottom_right_y = 800, 559 # 右下角X坐标
# 形状图的左上角坐标
backend_top_left_x, backend_top_left_y = 505, 340
```
#### 2、运行脚本
```commandline
python locate_tool4shape.py
```
- 运行后,等待浏览器自动滑块后进入形状验证码后,进行操作脚本会捕获鼠标的点击事件。
- 按下图1,2,3顺序点击,获取三个坐标
- 点1位于形状图片最左上角的位置用于计算形状的坐标
- 点23用于截图文字内容进行识别
![PNG](./img/sharp_click.png)
- 运行情况如下图
![PNG](./img/run_loate4shape.png)
#### 3、添加配置
编辑config.py填入获取到的值
```commandline
# 是否自动形状验证码识别,有时不准,就关掉吧
auto_shape_recognition = True
# 形状图的左上角坐标
backend_top_left_x, backend_top_left_y = 505, 340
# 截取文字区域的左上和右下坐标
# 左上角坐标
small_img_top_left_x, small_img_top_left_y = 500, 534
# 右下角坐标
small_img_bottom_right_x, small_img_bottom_right_y = 800, 559
```
# MyJdCOOKIE
### 介绍
@ -19,6 +71,7 @@
- 写使用文档(已实现)
- 加一些通知如钉钉等
- 添加获取滑块x,y坐标的工具(已实现)
- 加入了自动验证形状码的方法(已实现)
## 使用文档
### 安装依赖
@ -39,6 +92,9 @@ python locate_tool.py
![GIF](./img/get_location.gif)
### 配置形状验证码的内容
#### **详见20240601 更新**
### 添加配置
- 复制config_example.py, 重命名为config.py, 我们基于这个config.py运行程序;
- slide_x_position, slide_y_position用locate_tool.py拿到;
@ -50,4 +106,7 @@ python locate_tool.py
### 运行脚本
```commandline
python main.py
```
```
### 特别感谢
- 感谢 **https://github.com/sml2h3/ddddocr** 项目,牛逼项目

View File

@ -47,7 +47,7 @@ class QlApi(object):
async with aiohttp.ClientSession() as session:
async with session.get(f"{self.url}/{QlUri.envs.value}", headers=self.headers) as response:
if response.status == 200:
logger.info("Get Envs successful. Token obtained.")
logger.info("Get Envs successful.")
data = await response.json()
return data
else:
@ -57,7 +57,7 @@ class QlApi(object):
async with aiohttp.ClientSession() as session:
async with session.put(f"{self.url}/{QlUri.envs.value}", data=json.dumps(data), headers=self.headers) as response:
if response.status == 200:
logger.info("Set Envs successful. Token obtained.")
logger.info("Set Envs successful.")
data = await response.json()
return data
else:
@ -67,7 +67,7 @@ class QlApi(object):
async with aiohttp.ClientSession() as session:
async with session.put(f"{self.url}/{QlUri.envs_enable.value}", data=data, headers=self.headers) as response:
if response.status == 200:
logger.info("enable Envs successful. Token obtained.")
logger.info("enable Envs successful.")
data = await response.json()
return data
else:
@ -77,7 +77,7 @@ class QlApi(object):
async with aiohttp.ClientSession() as session:
async with session.put(f"{self.url}/{QlUri.envs_disable.value}", data=data, headers=self.headers) as response:
if response.status == 200:
logger.info("disable Envs successful. Token obtained.")
logger.info("disable Envs successful.")
data = await response.json()
return data
else:

View File

@ -28,3 +28,15 @@ slide_difference = 10
# 是否自动识别移动滑块,有时不准,就关掉吧
auto_move = True
# 是否自动形状验证码识别,有时不准,就关掉吧
auto_shape_recognition = True
# 形状图的左上角坐标
backend_top_left_x, backend_top_left_y = 505, 340
# 截取文字区域的左上和右下坐标
# 左上角坐标
small_img_top_left_x, small_img_top_left_y = 500, 534
# 右下角坐标
small_img_bottom_right_x, small_img_bottom_right_y = 800, 559

62
locate_tool4shape.py Normal file
View File

@ -0,0 +1,62 @@
import asyncio
from pynput import mouse
from config import jd_login_url
from main import auto_move_slide
from playwright.async_api import async_playwright
import random
"""
这个脚本用于形状码的坐标的
"""
def get_position(x_name, y_name, detail):
print(f"请点击{detail},将获取坐标 ...")
def on_click(x, y, button, pressed):
if pressed:
print(f"{x_name}, {y_name} = {x}, {y}")
return False # 停止监听事件
with mouse.Listener(on_click=on_click) as listener:
listener.join()
async def main():
async with async_playwright() as playwright:
try:
browser = await playwright.chromium.launch(headless=False)
context = await browser.new_context()
page = await context.new_page()
# 关闭Webdriver属性,绕过Webdriver检测
js = """Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});"""
await page.add_init_script(js)
await page.goto(jd_login_url)
await page.get_by_text("账号密码登录").click()
username_input = page.get_by_placeholder("账号名/邮箱/手机号")
await username_input.click()
await username_input.type("1")
await asyncio.sleep(random.random() / 10)
password_input = page.get_by_placeholder("请输入密码")
await password_input.click()
await password_input.type('1')
await page.get_by_role("checkbox").check()
await page.get_by_text("登 录").click()
await auto_move_slide(page, retry_times=5)
await asyncio.sleep(random.random() / 10)
get_position("backend_top_left_x", "backend_top_left_y", "形状图的左上角")
get_position("small_img_top_left_x", "small_img_top_left_y", "文字区域的左上角")
get_position("small_img_bottom_right_x", "small_img_bottom_right_y", "文字区域的右下角")
await context.close()
await browser.close()
except Exception:
return
if __name__ == '__main__':
asyncio.run(main())

100
main.py
View File

@ -1,13 +1,40 @@
import asyncio
from api.qinglong import QlApi
from config import slide_difference, slide_x_position, slide_y_position, auto_move, qinglong_data, user_datas, jd_login_url
from config import (
slide_difference,
slide_x_position,
slide_y_position,
auto_move,
qinglong_data,
user_datas,
jd_login_url,
auto_shape_recognition,
backend_top_left_x,
backend_top_left_y,
small_img_top_left_x,
small_img_top_left_y,
small_img_bottom_right_x,
small_img_bottom_right_y
)
from loguru import logger
import time
from playwright.async_api import Playwright, async_playwright
import random
import traceback
from typing import Union
from utils.tools import base_move, get_img_bytes, get_forbidden_users_dict, filter_forbidden_users
from utils.consts import supported_types
from utils.tools import (
base_move,
get_img_bytes,
get_forbidden_users_dict,
filter_forbidden_users,
save_img,
get_ocr,
get_word,
save_screenshot_img,
get_shape_location_by_type,
click_by_autogui
)
"""
基于playwright做的
@ -42,6 +69,70 @@ async def auto_move_slide(page, retry_times: int=2):
time.sleep(3)
async def auto_shape(page, retry_times: int=5):
ocr = get_ocr(beta=True)
"""
自动识别滑块验证码
"""
for i in range(retry_times):
logger.info(f'{i}次自动识别形状中...')
try:
# 查找小图
await page.wait_for_selector('#cpc_img', state='visible', timeout=3000)
except Exception as e:
# 未找到元素,认为成功,退出循环
logger.info('未找到形状图,退出识别状态')
break
# 获取 图片的src 属性和button按键
background_src = await page.locator('#cpc_img').get_attribute('src')
button = page.locator('div.captcha_footer button.sure_btn')
# 找到刷新按钮
refresh_button = page.locator('div.captcha_header img.jcap_refresh')
# 截文字小图,返回小图path
small_img_path = save_screenshot_img(small_img_top_left_x, small_img_top_left_y, small_img_bottom_right_x, small_img_bottom_right_y, 'small_img')
# 获取大图并保存
background_img_bytes = get_img_bytes(background_src)
background_img_path = save_img('background_img', background_img_bytes)
# 获取问题的文字
word = get_word(ocr, small_img_path)
if word.find('') > 0:
logger.info(f'不支持颜色,刷新中......')
# 刷新
await refresh_button.click()
await asyncio.sleep(random.random() * 2)
continue
else:
type = word.split('请选出图中的')[1]
if type in supported_types:
logger.info(f'已找到图形,点击中......')
# 获取点的中心点
center_x, center_y = get_shape_location_by_type(background_img_path, type)
if center_x is None and center_y is None:
logger.info(f'识别失败,刷新中......')
await refresh_button.click()
continue
# 得到网页上的中心点
x, y = backend_top_left_x + center_x ,backend_top_left_y + center_y
# 点击图片
click_by_autogui(x, y)
await asyncio.sleep(random.random() * 3)
# 点击确定
await button.click()
await asyncio.sleep(3)
continue
else:
logger.info(f'不支持该类型形状,刷新中......')
# 刷新
await refresh_button.click()
await asyncio.sleep(random.random() * 3)
continue
async def get_jd_pt_key(playwright: Playwright, user) -> Union[dict, None]:
"""
获取jd的pt_key
@ -77,6 +168,11 @@ async def get_jd_pt_key(playwright: Playwright, user) -> Union[dict, None]:
time.sleep(2)
await auto_move_slide(page, retry_times=5)
# 自动验证形状验证码
if auto_shape_recognition:
time.sleep(2)
await auto_shape(page, retry_times=30)
# 等待滑块验证码通过
await page.wait_for_selector('#msShortcutMenu', state='visible', timeout=120000)

View File

@ -3,4 +3,5 @@ requests
aiohttp
playwright
loguru
pyautogui
pyautogui
pynput

9
utils/consts.py Normal file
View File

@ -0,0 +1,9 @@
supported_types = [
"三角形",
"正方形",
"长方形",
"五角星",
"六边形",
"圆形",
"梯形"
]

View File

@ -1,8 +1,19 @@
import base64
import cv2
import ddddocr
import io
import pyautogui
import random
import os
from PIL import Image, ImageGrab
import re
import time
def get_tmp_dir(tmp_dir:str = './tmp'):
# 检查并创建 tmp 目录(如果不存在)
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
return tmp_dir
def ddddocr_find_files_pic(target_file, background_file) -> int:
@ -41,6 +52,41 @@ def get_img_bytes(img_src: str) -> bytes:
raise "image is empty"
def get_ocr(**kwargs):
return ddddocr.DdddOcr(show_ad=False, **kwargs)
def save_img(img_name, img_bytes):
tmp_dir = get_tmp_dir()
img_path = os.path.join(tmp_dir, f'{img_name}.png')
# with open(img_path, 'wb') as file:
# file.write(img_bytes)
# 使用 Pillow 打开图像
with Image.open(io.BytesIO(img_bytes)) as img:
# 保存图像到文件
img.save(img_path)
return img_path
def get_word(ocr, img_path):
image_bytes = open(img_path, "rb").read()
result = ocr.classification(image_bytes, png_fix=True)
return result
def save_screenshot_img(left, top, right, bottom, img_name):
tmp_dir = get_tmp_dir()
img_path = os.path.join(tmp_dir, f'{img_name}.png')
# 等待片刻以确保截图区域的准备
time.sleep(2)
# 获取屏幕截图
screenshot = ImageGrab.grab(bbox=(left, top, right, bottom))
# 保存截图
screenshot.save(img_path)
return img_path
def slide_by_autogui(x, y, offset):
"""
使用pyautogui实现滑块并自定义轨迹方程
@ -92,3 +138,85 @@ def base_move(slide_x_position, slide_y_position, small_img_bytes, background_im
x = ddddocr_find_bytes_pic(small_img_bytes, background_img_bytes) + slide_difference
slide_by_autogui(slide_x_position, slide_y_position, x)
def sort_rectangle_vertices(vertices):
"""
获取左上右上右下左下顺序的坐标
"""
# 根据 y 坐标对顶点排序
vertices = sorted(vertices, key=lambda x: x[1])
# 根据 x 坐标对前两个和后两个顶点分别排序
top_left, top_right = sorted(vertices[:2], key=lambda x: x[0])
bottom_left, bottom_right = sorted(vertices[2:], key=lambda x: x[0])
return [top_left, top_right, bottom_right, bottom_left]
def is_trapezoid(vertices):
"""
判断四边形是否为梯形
vertices: 四个顶点按顺序排列的列表
返回值: 如果是梯形返回 True否则返回 False
"""
top_width = abs(vertices[1][0] - vertices[0][0])
bottom_width = abs(vertices[2][0] - vertices[3][0])
return top_width < bottom_width
def get_shape_location_by_type(img_path, type: str):
"""
获取指定形状在图片中的坐标
"""
img = cv2.imread(img_path)
imgGray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # 转灰度图
imgBlur = cv2.GaussianBlur(imgGray, (5, 5), 1) # 高斯模糊
imgCanny = cv2.Canny(imgBlur, 60, 60) # Canny算子边缘检测
contours, hierarchy = cv2.findContours(imgCanny, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) # 寻找轮廓点
for obj in contours:
perimeter = cv2.arcLength(obj, True) # 计算轮廓周长
approx = cv2.approxPolyDP(obj, 0.02 * perimeter, True) # 获取轮廓角点坐标
CornerNum = len(approx) # 轮廓角点的数量
x, y, w, h = cv2.boundingRect(approx) # 获取坐标值和宽度、高度
# 轮廓对象分类
if CornerNum == 3:
obj_type = "三角形"
elif CornerNum == 4:
if w == h:
obj_type = "正方形"
else:
approx = sort_rectangle_vertices([vertex[0] for vertex in approx])
if is_trapezoid(approx):
obj_type = "梯形"
else:
obj_type = "长方形"
elif CornerNum == 6:
obj_type = "六边形"
elif CornerNum == 20:
obj_type = "五角星"
elif 4 < CornerNum < 6 or 6 < CornerNum < 20 or CornerNum > 20:
# 圆形
obj_type = "圆形"
else:
obj_type = "未知"
if obj_type == type:
# 获取中心点
center_x, center_y = x + w // 2, y + h // 2
return center_x, center_y
# 如果获取不到,则返回空
return None, None
def click_by_autogui(x, y):
"""
点击指定坐标的元素
"""
# 移动鼠标到指定坐标
pyautogui.moveTo(x, y)
# 点击鼠标左键
pyautogui.click()