Spaces:

phospho-ai
/

phospho-multimodal

Sleeping

App Files Files Community

phospho-multimodal / app.py

PLB

Styled the gradio

efac96d 6 months ago

raw

history blame contribute delete

2.96 kB

	import gradio as gr
	import base64
	import time
	import os
	from dotenv import load_dotenv
	import requests


	load_dotenv()

	phospho_api_key = os.environ.get("PHOSPHO_API_KEY")

	assert phospho_api_key, "Please set the PHOSPHO_API_KEY environment variable"


	# Image to Base 64 Converter
	def image_to_base64(image_path):
	with open(image_path, "rb") as img:
	encoded_string = base64.b64encode(img.read())
	return encoded_string.decode("utf-8")


	# Function that takes User Inputs and displays it on ChatUI
	def query_message(history, txt, img):
	if not img:
	history += [(txt, None)]
	return history
	base64 = image_to_base64(img)
	data_url = f"data:image/jpeg;base64,{base64}"
	history += [(f"{txt} ![]({data_url})", None)]
	return history


	# Function that takes User Inputs, generates Response and displays on Chat UI
	def llm_response(history, text, img):
	if not img:
	answer = f"Please provide an image, otherwise I cannot answer."
	history += [(None, answer)]
	return history

	else:
	url = "https://api.phospho.ai/v2/predict"
	headers = {
	"accept": "application/json",
	"Authorization": f"Bearer {phospho_api_key}",
	"Content-Type": "application/json",
	}
	data = {
	"inputs": [
	{
	"text": text,
	"image_url": image_to_base64(img),
	"temperature": 0.2,
	"top_p": 0.9,
	"max_new_tokens": 100,
	}
	],
	"model": "phospho-multimodal",
	}

	response = requests.post(url, json=data, headers=headers)

	# Check if the response is successful
	if response.status_code != 200:
	history += [
	(None, "Sorry, I couldn't process the image. Please try again.")
	]
	return history

	else:
	response = response.json()
	print(response)
	answer = response["predictions"][0]["description"]
	history += [(None, answer)]
	return history


	# Interface Code
	with gr.Blocks() as app:
	gr.Markdown("# LLM with vision")
	gr.Markdown(
	"Upload an image and ask a question about it. Your experience might be slow as we experience many requests. Learn more about phospho multimodal LLM [here](https://docs.phospho.ai/models/multimodal). \n\n This model is not censored or moderated."
	)
	with gr.Row():
	image_box = gr.Image(type="filepath")

	chatbot = gr.Chatbot(scale=2, height=500)
	text_box = gr.Textbox(
	placeholder="What is your question about the image?",
	container=False,
	)

	btn = gr.Button("Submit")
	clicked = btn.click(query_message, [chatbot, text_box, image_box], chatbot).then(
	llm_response, [chatbot, text_box, image_box], chatbot
	)
	app.queue()
	app.launch(debug=True)