Spaces:
Running
Running
import gradio as gr | |
from huggingface_hub import InferenceClient | |
# Initialize Inference Clients for all models | |
paligemma224_client = InferenceClient("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli") | |
paligemma448_client = InferenceClient("microsoft/LLM2CLIP-Llama-3-8B-Instruct-CC-Finetuned") | |
paligemma896_client = InferenceClient("ProsusAI/finbert") | |
paligemma28b_client = InferenceClient("Skywork/Skywork-o1-Open-PRM-Qwen-2.5-1.5B") | |
llama_client = InferenceClient("llama/3.3-1b") | |
deepseek_client = InferenceClient("deepseek-ai/deepseek-vl2") | |
omniparser_client = InferenceClient("microsoft/OmniParser") | |
pixtral_client = InferenceClient("mistralai/Pixtral-12B-2409") | |
def enhance_prompt(prompt: str) -> str: | |
# Use the Paligemma models for prompt enhancement | |
prompt_224 = paligemma224_client(inputs={"inputs": prompt})["generated_text"] | |
prompt_448 = paligemma448_client(inputs={"inputs": prompt})["generated_text"] | |
prompt_896 = paligemma896_client(inputs={"inputs": prompt})["generated_text"] | |
# Combine all enhanced prompts into a single one | |
enhanced_prompt = f"Enhanced (224): {prompt_224}\nEnhanced (448): {prompt_448}\nEnhanced (896): {prompt_896}" | |
# Ultra-enhance the prompt using Paligemma 28b | |
ultra_enhanced_prompt = paligemma28b_client(inputs={"inputs": enhanced_prompt})["generated_text"] | |
return ultra_enhanced_prompt | |
def generate_answer(enhanced_prompt: str) -> str: | |
# Generate answers using the three models: llama, deepseek, and omniparser | |
llama_answer = llama_client(inputs={"inputs": enhanced_prompt})["generated_text"] | |
deepseek_answer = deepseek_client(inputs={"inputs": enhanced_prompt})["generated_text"] | |
omniparser_answer = omniparser_client(inputs={"inputs": enhanced_prompt})["generated_text"] | |
# Combine answers from all models | |
combined_answer = f"Llama: {llama_answer}\nDeepseek: {deepseek_answer}\nOmniparser: {omniparser_answer}" | |
return combined_answer | |
def enhance_answer(answer: str) -> str: | |
# Enhance the final answer using Pixtral model | |
enhanced_answer = pixtral_client(inputs={"inputs": answer})["generated_text"] | |
return enhanced_answer | |
def process(message: str) -> str: | |
# Step 1: Enhance the prompt using the Paligemma models | |
enhanced_prompt = enhance_prompt(message) | |
# Step 2: Generate an answer using the three models | |
answer = generate_answer(enhanced_prompt) | |
# Step 3: Enhance the generated answer using Pixtral | |
final_answer = enhance_answer(answer) | |
return final_answer | |
# Gradio interface setup | |
def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p): | |
# Include system message and history in conversation | |
messages = [{"role": "system", "content": system_message}] | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
# Get the final enhanced response | |
final_answer = process(message) | |
# Yield the response for the Gradio interface | |
response = "" | |
for token in final_answer: | |
response += token | |
yield response | |
# Gradio interface setup | |
demo = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox(value="You are a helpful assistant.", label="System message"), | |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), | |
], | |
) | |
if __name__ == "__main__": | |
demo.launch() | |