python --读pdf详解
装包
pip install pdfminer3k
详解
# -*- encoding: utf-8 -*-
from io import StringIO
from pdfminer.pdfinterp import PDFResourceManager, process_pdf
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
# 读取pdf的函数,返回内容
def readPdf(pdf_file):
rsrcmgr = PDFResourceManager()
retstr = StringIO()
laparams = LAParams()
device = TextConverter(rsrcmgr=rsrcmgr, outfp=retstr, laparams=laparams)
process_pdf(rsrcmgr=rsrcmgr, device=device, fp=pdf_file)
device.close()
content = retstr.getvalue()
retstr.close()
return content
pdf_file = open(r'C:\Users\EDY\Desktop\2.pdf', 'rb')
content = readPdf(pdf_file)
print(content)
pdf_file.close()