import re import gradio as gr from PIL import Image from transformers import AutoProcessor, AutoModelForCausalLM import spacy device='cpu' processor = AutoProcessor.from_pretrained("microsoft/git-base") model = AutoModelForCausalLM.from_pretrained("nkasmanoff/sky-scribe").to(device) def predict(image,max_length=50,device='cpu'): pixel_values = processor(images=image, return_tensors="pt").to(device).pixel_values generated_ids = model.generate(pixel_values=pixel_values, max_length=max_length) generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_caption input = gr.inputs.Image(label="Please upload an image", type = 'pil', optional=True) output = gr.outputs.Textbox(type="text",label="Captions") title = "Satellite Image Captioning" description = "Provide an image, receive a description of the event with predictions about location, date, observing instrument, and possible event type. For best results, please consider using images only from NASA Earth." interface = gr.Interface( fn=predict, inputs = input, theme="grass", outputs=output, title=title, description=description, flagging_callback=gr.SimpleCSVLogger() ) interface.launch(debug=True)