推测是PDF转image时的缩放系数导致图片分辨率过低的问题,修改ppcor.utils.utility.py 第109行开始:
elif os.path.basename(img_path)[-3:].lower() == 'pdf':
import fitz
from PIL import Image, ImageFilter
imgs = []
with fitz.open(img_path) as pdf:
for pg in range(0, pdf.page_count):
page = pdf[pg]
提高PDF转换为图片时的分辨率,您可以通过调整fitz.Matrix中的缩放因子来实现。目前,代码中的缩放因子设置为2,这意味着图片的分辨率将是PDF的两倍。
# mat = fitz.Matrix(2, 2)
mat = fitz.Matrix(1, 1)
pm = page.get_pixmap(matrix=mat, alpha=False)
# if width and height > 2000 pixels, don't enlarge the image
if pm.width > 2000 and pm.height > 2000:
pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
else:
scale = int(max(2000/pm.width,2000/pm.height))
pm = page.get_pixmap(matrix=fitz.Matrix(scale, scale), alpha=False)
print('convert PDF to image with width {} , height {}'.format(pm.width,pm.height))
img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
# 应用锐化滤波器
img = img.filter(ImageFilter.SHARPEN)
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
imgs.append(img)
return imgs, False, True