利用百度 AI 开发平台的 OCR 文字识别 API 识别并提取图片中的文字。首先需注册获取 API 调用的 ID 和 key,步骤如下:
打开百度AI开放平台,进入控制台中的文字识别应用(需要有百度账号)。
创建一个应用,并进入管理应用,记下 AppID, API Key, Secrect Key,调用 API需用到。
py2,py3均适用
# coding=utf-8
import sys,time,json,base64,fitz
# 保证兼容python2以及python3
IS_PY3 = sys.version_info.major == 3
if IS_PY3:
from urllib.request import urlopen,Request
from urllib.error import URLError
from urllib.parse import urlencode,quote_plus
else:
import urllib2
from urllib import quote_plus,urlencode
from urllib2 import urlopen,URLError,Request
# 防止https证书校验不正确
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
class IMG_OCR():
def __init__(self):
self.API_KEY = 'aq'
self.SECRET_KEY = '1u'
self.OCR_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
"""TOKEN start """
self.TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token'
# 获取access token
self.token = self.fetch_token()
# 拼接通用文字识别高精度url
self.image_url = self.OCR_URL + "?access_token=" + self.token
"""获取token"""
def fetch_token(self):
params = {
'grant_type': 'client_credentials',
'client_id': self.API_KEY,
'client_secret': self.SECRET_KEY
}
post_data = urlencode(params)
if (IS_PY3):
post_data = post_data.encode('utf-8')
req = Request(self.TOKEN_URL, post_data)
try:
f = urlopen(req, timeout=5)
result_str = f.read()
except URLError as err:
print(err)
if (IS_PY3):
result_str = result_str.decode()
result = json.loads(result_str)
if ('access_token' in result.keys() and 'scope' in result.keys()):
if not 'brain_all_scope' in result['scope'].split(' '):
print('please ensure has check the ability')
exit()
return result['access_token']
else:
print('please overwrite the correct API_KEY and SECRET_KEY')
exit()
"""读取文件"""
def read_file(self,image_path):
try:
with open(image_path, 'rb') as f:
return f.read()
except:
print('read image file fail')
return None
"""调用远程服务"""
def request(self,url, data):
req = Request(url, data.encode('utf-8'))
has_error = False
try:
f = urlopen(req)
result_str = f.read()
if (IS_PY3):
result_str = result_str.decode()
return result_str
except URLError as err:
print(err)
def main(self,file_content):
text = []
# 调用文字识别服务
result = self.request(self.image_url, urlencode({'image': base64.b64encode(file_content)}))
# 解析返回结果
result_json = json.loads(result)
print(result_json)
for words_result in result_json["words_result"]:
text.append(words_result["words"])
# 打印文字
# print(text)
return text
class PDF_OCR():
def __init__(self):
self.img_ocr=IMG_OCR()
'''
# 将PDF转化为图片
pdfPath pdf文件的路径
imgPath 图像要保存的文件夹
zoom_x x方向的缩放系数
zoom_y y方向的缩放系数
rotation_angle 旋转角度
'''
def pdf_image(self,pdfPath, imgPath, zoom_x=5, zoom_y=5, rotation_angle=0):
# 打开PDF文件
pdf = fitz.open(pdfPath)
page_count=pdf.pageCount
# 逐页读取PDF
for pg in range(0, page_count):
page = pdf[pg]
# 设置缩放和旋转系数
trans = fitz.Matrix(zoom_x, zoom_y).preRotate(rotation_angle)
pm = page.getPixmap(matrix=trans, alpha=False)
# 开始写图像
pm.writePNG(imgPath + str(pg) + ".jpg")
pdf.close()
return page_count
def main(self,pdfPath,imgPath):
page_count=self.pdf_image(pdfPath, imgPath)
text_list=[]
for page in range(0,page_count):
file_content = self.img_ocr.read_file(imgPath+'%s.jpg'%page)
text=self.img_ocr.main(file_content)
text_list=text_list+text
return text_list
if __name__ == '__main__':
img_ocr=IMG_OCR()
# 读取书籍页面图片
# file_content = img_ocr.read_file('./c33.jpg')
file_content = img_ocr.read_file('./zlgl_img/2020-01-10.jpg')
text=img_ocr.main(file_content)
print(text)
安装 python 的百度ai接口的的库
pip install baidu-aip
以下是代码实现,需将所有识别的图片放进名为 picture 的文件夹。
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
利用百度api实现图片文本识别
@author:
"""
import glob
from os import path
import os
from aip import AipOcr
from PIL import Image
def convertimg(picfile, outdir):
'''调整图片大小,对于过大的图片进行压缩
picfile: 图片路径
outdir: 图片输出路径
'''
img = Image.open(picfile)
width, height = img.size
while(width*height > 4000000): # 该数值压缩后的图片大约 两百多k
width = width // 2
height = height // 2
new_img=img.resize((width, height),Image.BILINEAR)
new_img.save(path.join(outdir,os.path.basename(picfile)))
def baiduOCR(picfile, outfile):
"""利用百度api识别文本,并保存提取的文字
picfile: 图片文件名
outfile: 输出文件
"""
filename = path.basename(picfile)
APP_ID = '******' # 刚才获取的 ID,下同
API_KEY = '******'
SECRECT_KEY = '******'
client = AipOcr(APP_ID, API_KEY, SECRECT_KEY)
i = open(picfile, 'rb')
img = i.read()
print("正在识别图片:\t" + filename)
message = client.basicGeneral(img) # 通用文字识别,每天 50 000 次免费
#message = client.basicAccurate(img) # 通用文字高精度识别,每天 800 次免费
print("识别成功!")
i.close();
with open(outfile, 'a+') as fo:
fo.writelines("+" * 60 + '\n')
fo.writelines("识别图片:\t" + filename + "\n" * 2)
fo.writelines("文本内容:\n")
# 输出文本内容
for text in message.get('words_result'):
fo.writelines(text.get('words') + '\n')
fo.writelines('\n'*2)
print("文本导出成功!")
print()
if __name__ == "__main__":
outfile = 'export.txt'
outdir = 'tmp'
if path.exists(outfile):
os.remove(outfile)
if not path.exists(outdir):
os.mkdir(outdir)
print("压缩过大的图片...")
// 首先对过大的图片进行压缩,以提高识别速度,将压缩的图片保存与临时文件夹中
for picfile in glob.glob("picture/*"):
convertimg(picfile, outdir)
print("图片识别...")
for picfile in glob.glob("tmp/*"):
baiduOCR(picfile, outfile)
os.remove(picfile)
print('图片文本提取结束!文本输出结果位于 %s 文件中。' % outfile)
os.removedirs(outdir)
github:https://github.com/Baidu-AIP/QuickStart