import re 
import gradio as gr
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM
import spacy

device='cpu'

processor = AutoProcessor.from_pretrained("microsoft/git-base")
model = AutoModelForCausalLM.from_pretrained("nkasmanoff/sky-scribe").to(device)


def predict(image,max_length=50,device='cpu'):
    pixel_values = processor(images=image, return_tensors="pt").to(device).pixel_values
    generated_ids = model.generate(pixel_values=pixel_values, max_length=max_length)
    generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return generated_caption 


input = gr.inputs.Image(label="Please upload an image", type = 'pil', optional=True)
output = gr.outputs.Textbox(type="text",label="Captions")


title = "Satellite Image Captioning"
description = "Provide an image, receive a description of the event with predictions about location, date, observing instrument, and possible event type. For best results, please consider using images only from NASA Earth."

interface = gr.Interface(
        fn=predict,
        inputs = input,
        theme="grass",
        outputs=output,
        title=title,
        description=description,
    flagging_callback=gr.SimpleCSVLogger()
    )
interface.launch(debug=True)