您当前的位置：首页 > 计算机 > 编程开发 > Python

Python调用百度文字识别API识别并提取图片中文字

时间：08-24来源：作者：点击数：20

利用百度 AI 开发平台的 OCR 文字识别 API 识别并提取图片中的文字。首先需注册获取 API 调用的 ID 和 key，步骤如下：

打开百度AI开放平台，进入控制台中的文字识别应用(需要有百度账号）。

创建一个应用，并进入管理应用，记下 AppID, API Key, Secrect Key，调用 API需用到。

方法1完整代码

py2，py3均适用

# coding=utf-8
import sys,time,json,base64,fitz
# 保证兼容python2以及python3
IS_PY3 = sys.version_info.major == 3
if IS_PY3:
    from urllib.request import urlopen,Request
    from urllib.error import URLError
    from urllib.parse import urlencode,quote_plus
else:
    import urllib2
    from urllib import quote_plus,urlencode
    from urllib2 import urlopen,URLError,Request
# 防止https证书校验不正确
import ssl
ssl._create_default_https_context = ssl._create_unverified_context


class IMG_OCR():

    def __init__(self):
        self.API_KEY = 'aq'
        self.SECRET_KEY = '1u'
        self.OCR_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
        """TOKEN start """
        self.TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token'
        # 获取access token
        self.token = self.fetch_token()
        # 拼接通用文字识别高精度url
        self.image_url = self.OCR_URL + "?access_token=" + self.token

    """获取token"""
    def fetch_token(self):
        params = {
            'grant_type': 'client_credentials',
            'client_id': self.API_KEY,
            'client_secret': self.SECRET_KEY
        }
        post_data = urlencode(params)
        if (IS_PY3):
            post_data = post_data.encode('utf-8')
        req = Request(self.TOKEN_URL, post_data)
        try:
            f = urlopen(req, timeout=5)
            result_str = f.read()
        except URLError as err:
            print(err)
        if (IS_PY3):
            result_str = result_str.decode()

        result = json.loads(result_str)
        if ('access_token' in result.keys() and 'scope' in result.keys()):
            if not 'brain_all_scope' in result['scope'].split(' '):
                print('please ensure has check the ability')
                exit()
            return result['access_token']
        else:
            print('please overwrite the correct API_KEY and SECRET_KEY')
            exit()

    """读取文件"""
    def read_file(self,image_path):
        try:
            with open(image_path, 'rb') as f:
                return f.read()
        except:
            print('read image file fail')
            return None

    """调用远程服务"""
    def request(self,url, data):
        req = Request(url, data.encode('utf-8'))
        has_error = False
        try:
            f = urlopen(req)
            result_str = f.read()
            if (IS_PY3):
                result_str = result_str.decode()
            return result_str
        except  URLError as err:
            print(err)

    def main(self,file_content):
        text = []
        # 调用文字识别服务
        result = self.request(self.image_url, urlencode({'image': base64.b64encode(file_content)}))
        # 解析返回结果
        result_json = json.loads(result)
        print(result_json)
        for words_result in result_json["words_result"]:
            text.append(words_result["words"])
        # 打印文字
        # print(text)
        return text


class PDF_OCR():
    def __init__(self):
        self.img_ocr=IMG_OCR()

    '''
    # 将PDF转化为图片
    pdfPath pdf文件的路径
    imgPath 图像要保存的文件夹
    zoom_x x方向的缩放系数
    zoom_y y方向的缩放系数
    rotation_angle 旋转角度
    '''
    def pdf_image(self,pdfPath, imgPath, zoom_x=5, zoom_y=5, rotation_angle=0):
        # 打开PDF文件
        pdf = fitz.open(pdfPath)
        page_count=pdf.pageCount
        # 逐页读取PDF
        for pg in range(0, page_count):
            page = pdf[pg]
            # 设置缩放和旋转系数
            trans = fitz.Matrix(zoom_x, zoom_y).preRotate(rotation_angle)
            pm = page.getPixmap(matrix=trans, alpha=False)
            # 开始写图像
            pm.writePNG(imgPath + str(pg) + ".jpg")
        pdf.close()
        
        return page_count
    
    def main(self,pdfPath,imgPath):
        page_count=self.pdf_image(pdfPath, imgPath)
        text_list=[]
        for page in range(0,page_count):
            file_content = self.img_ocr.read_file(imgPath+'%s.jpg'%page)
            text=self.img_ocr.main(file_content)
            text_list=text_list+text
        return text_list
    

if __name__ == '__main__':
    img_ocr=IMG_OCR()
    # 读取书籍页面图片
    # file_content = img_ocr.read_file('./c33.jpg')
    file_content = img_ocr.read_file('./zlgl_img/2020-01-10.jpg')
    text=img_ocr.main(file_content)
    print(text)

方法2

安装 python 的百度ai接口的的库

pip install baidu-aip

以下是代码实现，需将所有识别的图片放进名为 picture 的文件夹。

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
利用百度api实现图片文本识别
@author: 
"""

import glob
from os import path
import os
from aip import AipOcr
from PIL import Image

def convertimg(picfile, outdir):
    '''调整图片大小，对于过大的图片进行压缩
    picfile:    图片路径
    outdir：    图片输出路径
    '''
    img = Image.open(picfile)
    width, height = img.size
    while(width*height > 4000000):  # 该数值压缩后的图片大约 两百多k
        width = width // 2
        height = height // 2
    new_img=img.resize((width, height),Image.BILINEAR)
    new_img.save(path.join(outdir,os.path.basename(picfile)))
    
def baiduOCR(picfile, outfile):
    """利用百度api识别文本，并保存提取的文字
    picfile:    图片文件名
    outfile:    输出文件
    """
    filename = path.basename(picfile)
    
    APP_ID = '******' # 刚才获取的 ID，下同
    API_KEY = '******'
    SECRECT_KEY = '******'
    client = AipOcr(APP_ID, API_KEY, SECRECT_KEY)
    
    i = open(picfile, 'rb')
    img = i.read()
    print("正在识别图片：\t" + filename)
    message = client.basicGeneral(img)   # 通用文字识别，每天 50 000 次免费
    #message = client.basicAccurate(img)   # 通用文字高精度识别，每天 800 次免费
    print("识别成功！")
    i.close();
    
    with open(outfile, 'a+') as fo:
        fo.writelines("+" * 60 + '\n')
        fo.writelines("识别图片：\t" + filename + "\n" * 2)
        fo.writelines("文本内容：\n")
        # 输出文本内容
        for text in message.get('words_result'):
            fo.writelines(text.get('words') + '\n')
        fo.writelines('\n'*2)
    print("文本导出成功！")
    print()

if __name__ == "__main__":
    
    outfile = 'export.txt'
    outdir = 'tmp'
    if path.exists(outfile):
        os.remove(outfile)
    if not path.exists(outdir):
        os.mkdir(outdir)
    print("压缩过大的图片...")
    // 首先对过大的图片进行压缩，以提高识别速度，将压缩的图片保存与临时文件夹中
    for picfile in glob.glob("picture/*"):
        convertimg(picfile, outdir)
    print("图片识别...")
    for picfile in glob.glob("tmp/*"):
        baiduOCR(picfile, outfile)
        os.remove(picfile)
    print('图片文本提取结束！文本输出结果位于 %s 文件中。' % outfile)
    os.removedirs(outdir)