目录
使用到的工具
使用步骤
公式识别接口申请
公式识别示例代码
实现截图示例代码
最终代码与效果
同类的工具有:妙手OCR、Mathpix等。有些收费,有些不好用,这里自己随便实现一个。 (个人比较喜欢妙手OCR,不限次数又好用,虽然偶尔识别不准)
使用到的工具
讯飞公式识别API:个人开发者每天500次免费额度,一般够用了。
PrScrn.dll:旧版微信的截图dll(新版微信已更改)。
相关库:pyperclip、PIL、ctypes、re、os、time、requests、datetime、hashlib、base64、hmac、json
使用步骤
公式识别接口申请
1、先去讯飞申请免费接口
公式识别 - 图像识别 - 讯飞开放平台 (xfyun.cn)
申请完记下密钥
接口文档:
公式识别 API 文档 | 讯飞开放平台文档中心 (xfyun.cn)
公式识别示例代码
使用示例代码测试,注意先把上面的密钥填入
api的示例代码formulaRecognition.py:
代码语言:javascript复制#!/usr/bin/env python
# -*- coding:utf-8 -*-
# 1.接口文档(必看):https://www.xfyun.cn/doc/words/formula-discern/API.html
# 2.错误码链接:https://www.xfyun.cn/document/error-code (错误码code为5位数字)
#
import requests
import datetime
import hashlib
import base64
import hmac
import json
class FormulaRecognition(object):
def __init__(self):
# 应用ID(到控制台获取)
self.APPID = ""
# 接口APISercet(到控制台公式识别服务页面获取)
self.Secret = ""
# 接口APIKey(到控制台公式识别服务页面获取)
self.APIKey = ""
# 以下为POST请求
self.Host = "rest-api.xfyun.cn"
self.RequestUri = "/v2/itr"
# 设置url
# print(host)
self.url = "https://" self.Host self.RequestUri
self.HttpMethod = "POST"
self.Algorithm = "hmac-sha256"
self.HttpProto = "HTTP/1.1"
# 设置当前时间
curTime_utc = datetime.datetime.utcnow()
self.Date = self.httpdate(curTime_utc)
self.BusinessArgs = {
"ent": "teach-photo-print",
"aue": "raw",
}
def imgRead(self, path):
with open(path, 'rb') as fo:
return fo.read()
def hashlib_256(self, res):
m = hashlib.sha256(bytes(res.encode(encoding='utf-8'))).digest()
result = "SHA-256=" base64.b64encode(m).decode(encoding='utf-8')
return result
def httpdate(self, dt):
"""
Return a string representation of a date according to RFC 1123
(HTTP/1.1).
The supplied date must be in UTC.
"""
weekday = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()]
month = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep",
"Oct", "Nov", "Dec"][dt.month - 1]
return "%s, d %s d d:d:d GMT" % (weekday, dt.day, month,
dt.year, dt.hour, dt.minute, dt.second)
def generateSignature(self, digest):
signatureStr = "host: " self.Host "n"
signatureStr = "date: " self.Date "n"
signatureStr = self.HttpMethod " " self.RequestUri
" " self.HttpProto "n"
signatureStr = "digest: " digest
signature = hmac.new(bytes(self.Secret.encode(encoding='utf-8')),
bytes(signatureStr.encode(encoding='utf-8')),
digestmod=hashlib.sha256).digest()
result = base64.b64encode(signature)
return result.decode(encoding='utf-8')
def init_header(self, data):
digest = self.hashlib_256(data)
# print(digest)
sign = self.generateSignature(digest)
authHeader = 'api_key="%s", algorithm="%s", '
'headers="host date request-line digest", '
'signature="%s"'
% (self.APIKey, self.Algorithm, sign)
# print(authHeader)
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"Method": "POST",
"Host": self.Host,
"Date": self.Date,
"Digest": digest,
"Authorization": authHeader
}
return headers
def get_body(self, image_path):
audioData = self.imgRead(image_path)
content = base64.b64encode(audioData).decode(encoding='utf-8')
postdata = {
"common": {"app_id": self.APPID},
"business": self.BusinessArgs,
"data": {
"image": content,
}
}
body = json.dumps(postdata)
# print(body)
return body
def call_url(self, image_path):
if self.APPID == '' or self.APIKey == '' or self.Secret == '':
print('Appid 或APIKey 或APISecret 为空!请打开demo代码,填写相关信息。')
else:
code = 0
body = self.get_body(image_path)
headers = self.init_header(body)
# print(self.url)
response = requests.post(self.url, data=body, headers=headers, timeout=8)
status_code = response.status_code
# print(response.content)
if status_code != 200:
# 鉴权失败
print("Http请求失败,状态码:" str(status_code) ",错误信息:" response.text)
print("请根据错误信息检查代码,接口文档:https://www.xfyun.cn/doc/words/formula-discern/API.html")
else:
# 鉴权成功
respData = json.loads(response.text)
# print(respData)
# 以下仅用于调试
code = str(respData["code"])
if code != '0':
print("请前往https://www.xfyun.cn/document/error-code?code=" code "查询解决办法")
return response.json()
if __name__ == '__main__':
gClass = FormulaRecognition()
gClass.call_url('01.png')
测试图片:
实现截图示例代码
先导入库
代码语言:javascript复制import re
import time
import pyperclip
from PIL import Image, ImageGrab
import os
from formulaRecognition import FormulaRecognition
import ctypes
截图功能可以简单地直接使用“旧版的QQ、微信的dll”来实现,相比之下,微信的dll更好用点。Python中可以使用ctypes来调用dll,但可能会遇到一些问题,详见:
Python调用动态链接库DLL文件_小锋学长生活大爆炸的博客-CSDN博客简单用用dll
由于本机python为64位,dll为32位,因此这里使用rundll32来实现,如下:
代码语言:javascript复制os.system('RUNDLL32.EXE PrScrn.dll PrScrn')
为了在截图时候隐藏当前的窗口,可以使用如下方式:
代码语言:javascript复制window_handle = ctypes.windll.kernel32.GetConsoleWindow()
# 0:隐藏;6:最小化
# 1:显示;3:最大化
ctypes.windll.user32.ShowWindow(window_handle, 0)
time.sleep(0.5)
os.system('RUNDLL32.EXE PrScrn.dll PrScrn')
ctypes.windll.user32.ShowWindow(window_handle, 1)
微信dll截图后的图片存放在剪切板,因此需要从剪切板上读取后,再使用讯飞API来识别,由于pyperclip只支持文字,因此对于图片类的,需要使用PIL库。如下:
代码语言:javascript复制img = ImageGrab.grabclipboard()
if not isinstance(img, Image.Image):
return
print('>> 识别中...')
image_path = "formula_recognition_temp.png"
img.save(image_path)
recognition = FormulaRecognition()
res = recognition.call_url(image_path)
os.remove(image_path)
if res['code'] != 0:
return
content = res['data']['region'][0]['recog']['content']
content = re.sub(r'ifly-latex-begin', '', content)
content = re.sub(r'ifly-latex-end', '', content)
# 将结果复制到剪切板
pyperclip.copy(content)
print(content)
最终代码与效果
代码语言:javascript复制import re
import time
import pyperclip
from PIL import Image, ImageGrab
import os
from formulaRecognition import FormulaRecognition
import ctypes
def start():
window_handle = ctypes.windll.kernel32.GetConsoleWindow()
# 0:隐藏;6:最小化
# 1:显示;3:最大化
ctypes.windll.user32.ShowWindow(window_handle, 0)
time.sleep(0.5)
os.system('RUNDLL32.EXE PrScrn.dll PrScrn')
ctypes.windll.user32.ShowWindow(window_handle, 1)
img = ImageGrab.grabclipboard()
if not isinstance(img, Image.Image):
return
print('>> 识别中...')
image_path = "formula_recognition_temp.png"
img.save(image_path)
recognition = FormulaRecognition()
res = recognition.call_url(image_path)
os.remove(image_path)
if res['code'] != 0:
return
content = res['data']['region'][0]['recog']['content']
content = re.sub(r'ifly-latex-begin', '', content)
content = re.sub(r'ifly-latex-end', '', content)
# 将结果复制到剪切板
pyperclip.copy(content)
print(content)
if __name__ == '__main__':
while True:
if input('>> 按回车启动,按q退出: ').strip().lower() == 'q':
break
print('>> 启动中...')
try:
start()
except Exception as e:
print(e)
运行中:
识别结果:
代码语言:javascript复制frac {c}{a} sin A= frac {bc}{a^ {2}} sin ^ {2} A= frac {20}{21} times frac {3}{4} = frac {5}{7}
放入markdown看下效果: