File size: 1,341 Bytes
6604d8f
 
 
 
 
 
 
 
 
 
 
 
 
 
78e4afe
6604d8f
 
 
 
 
 
78e4afe
6604d8f
 
 
 
 
 
 
 
b25434a
6604d8f
 
 
 
 
 
b25434a
6604d8f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from pdf2image import convert_from_path
from pdf2image.exceptions import (
    PDFInfoNotInstalledError,
    PDFPageCountError,
    PDFSyntaxError
)

# poppler_path = r"./Poppler/poppler-24.07.0/Library/bin"

# def pdfToImg(pdfPath, outputPath):
#     '''
#     1)Images stored in output folder
#     2)It returns path to stored images 
#     '''
#     images_paths = convert_from_path(pdfPath, 200, outputPath, fmt="jpeg", poppler_path=poppler_path, paths_only=True, last_page=4) # Only processing 4 pages at max 
#     return images_paths

# def pdfToImg2(pdfPath):
#     '''
#     1)Returns a list of Pillow images
#     '''
#     images = convert_from_path(pdfPath, 200, fmt="jpeg", poppler_path=poppler_path, last_page=4) # Only processing 4 pages at max 
#     return images

# Changed version of above code for deployment on huggingface spaces 
def pdfToImg(pdfPath, outputPath):
    '''
    1)Images stored in output folder
    2)It returns path to stored images 
    '''
    images_paths = convert_from_path(pdfPath, 200, outputPath, fmt="jpeg", paths_only=True, last_page=2) # Only processing 2 pages at max
    return images_paths

def pdfToImg2(pdfPath):
    '''
    1)Returns a list of Pillow images
    '''
    images = convert_from_path(pdfPath, 200, fmt="jpeg", last_page=2) # Only processing 2 pages at max
    return images