Spaces:
Sleeping
Sleeping
import gradio as gr | |
import base64 | |
import time | |
import os | |
from dotenv import load_dotenv | |
import requests | |
load_dotenv() | |
phospho_api_key = os.environ.get("PHOSPHO_API_KEY") | |
assert phospho_api_key, "Please set the PHOSPHO_API_KEY environment variable" | |
# Image to Base 64 Converter | |
def image_to_base64(image_path): | |
with open(image_path, "rb") as img: | |
encoded_string = base64.b64encode(img.read()) | |
return encoded_string.decode("utf-8") | |
# Function that takes User Inputs and displays it on ChatUI | |
def query_message(history, txt, img): | |
if not img: | |
history += [(txt, None)] | |
return history | |
base64 = image_to_base64(img) | |
data_url = f"data:image/jpeg;base64,{base64}" | |
history += [(f"{txt} ![]({data_url})", None)] | |
return history | |
# Function that takes User Inputs, generates Response and displays on Chat UI | |
def llm_response(history, text, img): | |
if not img: | |
answer = f"Please provide an image, otherwise I cannot answer." | |
history += [(None, answer)] | |
return history | |
else: | |
url = "https://api.phospho.ai/v2/predict" | |
headers = { | |
"accept": "application/json", | |
"Authorization": f"Bearer {phospho_api_key}", | |
"Content-Type": "application/json", | |
} | |
data = { | |
"inputs": [ | |
{ | |
"text": text, | |
"image_url": image_to_base64(img), | |
"temperature": 0.2, | |
"top_p": 0.9, | |
"max_new_tokens": 100, | |
} | |
], | |
"model": "phospho-multimodal", | |
} | |
response = requests.post(url, json=data, headers=headers) | |
# Check if the response is successful | |
if response.status_code != 200: | |
history += [ | |
(None, "Sorry, I couldn't process the image. Please try again.") | |
] | |
return history | |
else: | |
response = response.json() | |
print(response) | |
answer = response["predictions"][0]["description"] | |
history += [(None, answer)] | |
return history | |
# Interface Code | |
with gr.Blocks() as app: | |
gr.Markdown("# LLM with vision") | |
gr.Markdown( | |
"Upload an image and ask a question about it. Your experience might be slow as we experience many requests. Learn more about phospho multimodal LLM [here](https://docs.phospho.ai/models/multimodal). \n\n This model is not censored or moderated." | |
) | |
with gr.Row(): | |
image_box = gr.Image(type="filepath") | |
chatbot = gr.Chatbot(scale=2, height=500) | |
text_box = gr.Textbox( | |
placeholder="What is your question about the image?", | |
container=False, | |
) | |
btn = gr.Button("Submit") | |
clicked = btn.click(query_message, [chatbot, text_box, image_box], chatbot).then( | |
llm_response, [chatbot, text_box, image_box], chatbot | |
) | |
app.queue() | |
app.launch(debug=True) | |