Tejas1206 commited on
Commit
c755fa6
·
1 Parent(s): 294dbbc
Files changed (2) hide show
  1. app.py +102 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import io
3
+ import torch
4
+ import gradio as gr
5
+ from PIL import Image
6
+ from qwen_vl_utils import process_vision_info
7
+ from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
8
+
9
+ def OCRmodel():
10
+
11
+ # default: Load the model on the available device(s)
12
+ model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype=torch.float32)
13
+ # default processer
14
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True)
15
+
16
+ # Move the model to the correct device (GPU if available, otherwise CPU)
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ model.to(device)
19
+
20
+ return model, processor
21
+
22
+ model, processor = OCRmodel()
23
+
24
+ # Function to read the image and process it for OCR
25
+ def ocr(image_data):
26
+ """
27
+ Process the uploaded image and extract text using the OCR model.
28
+
29
+ Args:
30
+ image_data: Image data in bytes.
31
+
32
+ Returns:
33
+ Extracted text as a string.
34
+ """
35
+ text_query = "Extract all the text in Sanskrit and English from the image."
36
+ # Prepare messages for the model with the image
37
+ messages = [
38
+ {
39
+ "role": "user",
40
+ "content": [
41
+ {"type": "image", "image": image_data},
42
+ {"type": "text", "text": text_query}],
43
+ }
44
+ ]
45
+
46
+ # Prepare text and image input for the model
47
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
48
+ image_inputs, _ = process_vision_info(messages)
49
+
50
+ # Process inputs
51
+ inputs = processor(
52
+ text=[text],
53
+ images=image_inputs,
54
+ padding=True,
55
+ return_tensors="pt"
56
+ )
57
+ inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu") # Move to GPU if available
58
+
59
+ # Generate the output from the model
60
+ with torch.no_grad():
61
+ # Inference: Generation of the output
62
+ generated_ids = model.generate(**inputs, max_new_tokens=2000, no_repeat_ngram_size=3, temperature=0.7)
63
+ generated_ids_trimmed = [
64
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
65
+ ]
66
+ output_text = processor.batch_decode(
67
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
68
+ )
69
+ return " ".join(output_text).strip()
70
+
71
+ # Function to highlight search terms in the text
72
+ def highlight_keywords(text, keywords):
73
+ pattern = "|".join(re.escape(keyword) for keyword in keywords)
74
+ highlighted_text = re.sub(f"({pattern})", rf'<mark style="background-color:{"red"};">\1</mark>', text, flags=re.IGNORECASE)
75
+ return highlighted_text
76
+
77
+ # Gradio interface function
78
+ def process_image(image, search_query):
79
+
80
+ extracted_text = ocr(image)
81
+
82
+ if search_query:
83
+ # Highlight matching keywords
84
+ keywords = search_query.split() # Split input into individual keywords
85
+ highlighted_text = highlight_keywords(extracted_text, keywords)
86
+ else:
87
+ highlighted_text = extracted_text
88
+
89
+ return highlighted_text
90
+
91
+ # Gradio Interface
92
+ application = gr.Interface(
93
+ fn=process_image, # Function to process the image and search query
94
+ inputs=[
95
+ gr.Image(type="pil", label="Upload Image"), # Image input
96
+ gr.Textbox(label="Enter search keywords") # Textbox for search query
97
+ ],
98
+ outputs=gr.HTML(label="Extracted and Highlighted Text") # Output area
99
+ )
100
+
101
+ # Launch the Gradio app
102
+ application.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ git+https://github.com/huggingface/transformers
2
+ torch==2.4.1+cu121
3
+ torchvision==0.19.1+cu121
4
+ gradio==4.44.0
5
+ qwen-vl-utils==0.0.8
6
+ pillow==10.4.0