版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/weixin_40313634/article/details/84591042
环境
- python3.6.7
- OCR识别库: OCR工具(exe)安装:选择语言库,其他默认。 tesserocr库安装:pip3 install tesserocr pillow PIL库安装:pip install pillow
- Sublime Text编辑器安装
- ChromeDriver 安装 可能遇到的问题: 问题1:tesserocr安装失败,报错如下: install --record C:UsersZy129AppDataLocalTemppip-record-r5t7l43tinstall-record.txt --single-version-externally-managed --compile" failed with error code 1 in C:UsersZy129AppDataLocalTemppip-install-5z4ljbyjtesserocr 解决办法:手动下载和python版本对应的原始whl文件安装即可(tesserocr-2.3.1-cp36-cp36m-win_amd64.whl)。 whl 文件:https://github.com/simonflueckiger/tesserocr-windows_build/releases 方法参考:http://www.mamicode.com/info-detail-2353336.html 建议:通常情况下,不要安装最新版本的python,因为可能某库还没有升级,导致无法使用。 问题2:调用tesserocr.image_to_text(image)方法时,报如下错: RuntimeError: Failed to init API, possibly an invalid tessdata path: C:UsersZy129AppDataLocalProgramsPythonPython36/tessdata/ 解决:拷贝OCR库文件夹tessdata(默认路径:C:Program Files (x86)Tesseract-OCR)到python安装路径下(默认路径:C:UsersZy129AppDataLocalProgramsPythonPython36)即可。 参考方法:https://www.jianshu.com/p/dcad7ee3b162
图形验证
代码语言:javascript复制 1. version1.0
代码语言:javascript复制 import tesserocr
from PIL import Image
image = Image.open('code.jpg')
result = tesserocr.image_to_text(image)
print(result)
效果:
2. version2.0 问题:部分图片背景上有条纹,可能导致图片识别错误。 改进点:将图片转化为灰度图像,设定阈值,小于阈值的像素点置为0,大于的置为1。即把彩色图片转化为黑白图片,忽略掉背景色的影响,从而增大验证码识别率。
代码语言:javascript复制import tesserocr
from PIL import Image
image = Image.open('code.jpg')
# 转化成灰度图像
image = image.convert('L')
# 设置阈值(1--500)
threshold = 127
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
image = image.point(table, '1')
# 转化后的图片
image.show()
result = tesserocr.image_to_text(image)
print(result)
效果:
滑块验证
version:
代码语言:javascript复制import time
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
EMAIL = 'cqc@cuiqingcai.com'
PASSWORD = ''
BORDER = 6
INIT_LEFT = 60
class CrackGeetest():
def __init__(self):
self.url = 'https://account.geetest.com/login'
self.browser = webdriver.Chrome()
self.wait = WebDriverWait(self.browser, 20)
self.email = EMAIL
self.password = PASSWORD
def __del__(self):
self.browser.close()
def get_geetest_button(self):
"""
获取初始验证按钮
:return:
"""
button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_radar_tip')))
return button
def get_position(self):
"""
获取验证码位置
:return: 验证码位置元组
"""
img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_canvas_img')))
time.sleep(2)
location = img.location
size = img.size
top, bottom, left, right = location['y'], location['y'] size['height'], location['x'], location['x'] size[
'width']
return (top, bottom, left, right)
def get_screenshot(self):
"""
获取网页截图
:return: 截图对象
"""
screenshot = self.browser.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
return screenshot
def get_slider(self):
"""
获取滑块
:return: 滑块对象
"""
slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_slider_button')))
return slider
def get_geetest_image(self, name='captcha.png'):
"""
获取验证码图片
:return: 图片对象
"""
top, bottom, left, right = self.get_position()
print('验证码位置', top, bottom, left, right)
screenshot = self.get_screenshot()
captcha = screenshot.crop((left, top, right, bottom))
captcha.save(name)
return captcha
def open(self):
"""
打开网页输入用户名密码
:return: None
"""
self.browser.get(self.url)
email = self.wait.until(EC.presence_of_element_located((By.ID, 'email')))
password = self.wait.until(EC.presence_of_element_located((By.ID, 'password')))
email.send_keys(self.email)
password.send_keys(self.password)
def get_gap(self, image1, image2):
"""
获取缺口偏移量
:param image1: 不带缺口图片
:param image2: 带缺口图片
:return:
"""
left = 60
for i in range(left, image1.size[0]):
for j in range(image1.size[1]):
if not self.is_pixel_equal(image1, image2, i, j):
left = i
return left
return left
def is_pixel_equal(self, image1, image2, x, y):
"""
判断两个像素是否相同
:param image1: 图片1
:param image2: 图片2
:param x: 位置x
:param y: 位置y
:return: 像素是否相同
"""
# 取两个图片的像素点
pixel1 = image1.load()[x, y]
pixel2 = image2.load()[x, y]
threshold = 60
if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(
pixel1[2] - pixel2[2]) < threshold:
return True
else:
return False
def get_track(self, distance):
"""
根据偏移量获取移动轨迹
:param distance: 偏移量
:return: 移动轨迹
"""
# 移动轨迹
track = []
# 当前位移
current = 0
# 减速阈值
mid = distance * 4 / 5
# 计算间隔
t = 0.2
# 初速度
v = 0
while current < distance:
if current < mid:
# 加速度为正2
a = 2
else:
# 加速度为负3
a = -3
# 初速度v0
v0 = v
# 当前速度v = v0 at
v = v0 a * t
# 移动距离x = v0t 1/2 * a * t^2
move = v0 * t 1 / 2 * a * t * t
# 当前位移
current = move
# 加入轨迹
track.append(round(move))
return track
def move_to_gap(self, slider, track):
"""
拖动滑块到缺口处
:param slider: 滑块
:param track: 轨迹
:return:
"""
ActionChains(self.browser).click_and_hold(slider).perform()
for x in track:
ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
time.sleep(0.5)
ActionChains(self.browser).release().perform()
def login(self):
"""
登录
:return: None
"""
submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'login-btn')))
submit.click()
time.sleep(10)
print('登录成功')
def crack(self):
# 输入用户名密码
self.open()
# 点击验证按钮
button = self.get_geetest_button()
button.click()
# 获取验证码图片
image1 = self.get_geetest_image('captcha1.png')
# 点按呼出缺口
slider = self.get_slider()
slider.click()
# 获取带缺口的验证码图片
image2 = self.get_geetest_image('captcha2.png')
# 获取缺口位置
gap = self.get_gap(image1, image2)
print('缺口位置', gap)
# 减去缺口位移
gap -= BORDER
# 获取移动轨迹
track = self.get_track(gap)
print('滑动轨迹', track)
# 拖动滑块
self.move_to_gap(slider, track)
success = self.wait.until(
EC.text_to_be_present_in_element((By.CLASS_NAME, 'geetest_success_radar_tip_content'), '验证成功'))
print(success)
# 失败后重试
if not success:
self.crack()
else:
self.login()
if __name__ == '__main__':
crack = CrackGeetest()
crack.crack()
错误1. 报错:[17672:21484:0802/155619.485:ERROR:install_util.cc(603)] Failed to read HKLMSOFTWAREPoliciesGoogleChromeMachineLevelUserCloudPolicyEnrollmentToken: 系统找不到指定 的文件。 (0x2) 解决:命令行运行「reg add HKLMSOFTWAREPoliciesGoogleChrome /v MachineLevelUserCloudPolicyEnrollmentToken /t REG_SZ」,添加报错信息中提示的注册表值。