omkar-surve126's picture
Upload app.py
ff49c2d verified
raw
history blame
3.98 kB
from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoProcessor
import streamlit as st
import os
from PIL import Image
import torch
import re
@st.cache_resource
def init_model():
tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
model = model.eval()
return model, tokenizer
# def init_gpu_model():
# tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
# model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
# model = model.eval().cuda()
# return model, tokenizer
def init_qwen_model():
model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", device_map="cpu", torch_dtype=torch.float16)
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
return model, processor
def get_quen_op(image_file, model, processor):
try:
image = Image.open(image_file).convert('RGB')
conversation = [
{
"role":"user",
"content":[
{
"type":"image",
},
{
"type":"text",
"text":"Extract text from this image."
}
]
}
]
text_prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
inputs = processor(text=[text_prompt], images=[image], padding=True, return_tensors="pt")
inputs = {k: v.to(torch.float32) if torch.is_floating_point(v) else v for k, v in inputs.items()}
generation_config = {
"max_new_tokens": 32,
"do_sample": False,
"top_k": 20,
"top_p": 0.90,
"temperature": 0.4,
"num_return_sequences": 1,
"pad_token_id": processor.tokenizer.pad_token_id,
"eos_token_id": processor.tokenizer.eos_token_id,
}
output_ids = model.generate(**inputs, **generation_config)
if 'input_ids' in inputs:
generated_ids = output_ids[:, inputs['input_ids'].shape[1]:]
else:
generated_ids = output_ids
output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
return output_text[:] if output_text else "No text extracted from the image."
except Exception as e:
return f"An error occurred: {str(e)}"
@st.cache_data
def get_text(image_file, _model, _tokenizer):
res = _model.chat(_tokenizer, image_file, ocr_type='ocr')
return res
def highlight_text(text, search_term):
if not search_term:
return text
pattern = re.compile(re.escape(search_term), re.IGNORECASE)
return pattern.sub(lambda m: f'<span style="background-color: yellow;">{m.group()}</span>', text)
st.title("GOT-OCR2.0")
st.write("Upload an image")
MODEL, PROCESSOR = init_model()
image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg'])
if image_file:
if not os.path.exists("images"):
os.makedirs("images")
with open(f"images/{image_file.name}", "wb") as f:
f.write(image_file.getbuffer())
image_file = f"images/{image_file.name}"
text = get_text(image_file, MODEL, PROCESSOR)
print(text)
# Add search functionality
search_term = st.text_input("Enter a word or phrase to search:")
highlighted_text = highlight_text(text, search_term)
st.markdown(highlighted_text, unsafe_allow_html=True)