ebookify-backend2 / pdf_to_image.py
Geetansh
Added constraint of processing only 2 pages at max
b25434a
from pdf2image import convert_from_path
from pdf2image.exceptions import (
PDFInfoNotInstalledError,
PDFPageCountError,
PDFSyntaxError
)
# poppler_path = r"./Poppler/poppler-24.07.0/Library/bin"
# def pdfToImg(pdfPath, outputPath):
# '''
# 1)Images stored in output folder
# 2)It returns path to stored images
# '''
# images_paths = convert_from_path(pdfPath, 200, outputPath, fmt="jpeg", poppler_path=poppler_path, paths_only=True, last_page=4) # Only processing 4 pages at max
# return images_paths
# def pdfToImg2(pdfPath):
# '''
# 1)Returns a list of Pillow images
# '''
# images = convert_from_path(pdfPath, 200, fmt="jpeg", poppler_path=poppler_path, last_page=4) # Only processing 4 pages at max
# return images
# Changed version of above code for deployment on huggingface spaces
def pdfToImg(pdfPath, outputPath):
'''
1)Images stored in output folder
2)It returns path to stored images
'''
images_paths = convert_from_path(pdfPath, 200, outputPath, fmt="jpeg", paths_only=True, last_page=2) # Only processing 2 pages at max
return images_paths
def pdfToImg2(pdfPath):
'''
1)Returns a list of Pillow images
'''
images = convert_from_path(pdfPath, 200, fmt="jpeg", last_page=2) # Only processing 2 pages at max
return images