Lake-1-Pro / app.py
BICORP's picture
Update app.py
166c1ae verified
import gradio as gr
from huggingface_hub import InferenceClient
# Initialize Inference Clients for all models
paligemma224_client = InferenceClient("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
paligemma448_client = InferenceClient("microsoft/LLM2CLIP-Llama-3-8B-Instruct-CC-Finetuned")
paligemma896_client = InferenceClient("ProsusAI/finbert")
paligemma28b_client = InferenceClient("Skywork/Skywork-o1-Open-PRM-Qwen-2.5-1.5B")
llama_client = InferenceClient("llama/3.3-1b")
deepseek_client = InferenceClient("deepseek-ai/deepseek-vl2")
omniparser_client = InferenceClient("microsoft/OmniParser")
pixtral_client = InferenceClient("mistralai/Pixtral-12B-2409")
def enhance_prompt(prompt: str) -> str:
# Use the Paligemma models for prompt enhancement
prompt_224 = paligemma224_client(inputs={"inputs": prompt})["generated_text"]
prompt_448 = paligemma448_client(inputs={"inputs": prompt})["generated_text"]
prompt_896 = paligemma896_client(inputs={"inputs": prompt})["generated_text"]
# Combine all enhanced prompts into a single one
enhanced_prompt = f"Enhanced (224): {prompt_224}\nEnhanced (448): {prompt_448}\nEnhanced (896): {prompt_896}"
# Ultra-enhance the prompt using Paligemma 28b
ultra_enhanced_prompt = paligemma28b_client(inputs={"inputs": enhanced_prompt})["generated_text"]
return ultra_enhanced_prompt
def generate_answer(enhanced_prompt: str) -> str:
# Generate answers using the three models: llama, deepseek, and omniparser
llama_answer = llama_client(inputs={"inputs": enhanced_prompt})["generated_text"]
deepseek_answer = deepseek_client(inputs={"inputs": enhanced_prompt})["generated_text"]
omniparser_answer = omniparser_client(inputs={"inputs": enhanced_prompt})["generated_text"]
# Combine answers from all models
combined_answer = f"Llama: {llama_answer}\nDeepseek: {deepseek_answer}\nOmniparser: {omniparser_answer}"
return combined_answer
def enhance_answer(answer: str) -> str:
# Enhance the final answer using Pixtral model
enhanced_answer = pixtral_client(inputs={"inputs": answer})["generated_text"]
return enhanced_answer
def process(message: str) -> str:
# Step 1: Enhance the prompt using the Paligemma models
enhanced_prompt = enhance_prompt(message)
# Step 2: Generate an answer using the three models
answer = generate_answer(enhanced_prompt)
# Step 3: Enhance the generated answer using Pixtral
final_answer = enhance_answer(answer)
return final_answer
# Gradio interface setup
def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
# Include system message and history in conversation
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
# Get the final enhanced response
final_answer = process(message)
# Yield the response for the Gradio interface
response = ""
for token in final_answer:
response += token
yield response
# Gradio interface setup
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a helpful assistant.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
)
if __name__ == "__main__":
demo.launch()