HF-LLMs / app.py
SkyNetWalker's picture
Update app.py
d04ced8 verified
raw
history blame
3.55 kB
#refer llama recipes for more info https://github.com/huggingface/huggingface-llama-recipes/blob/main/inference-api.ipynb
#huggingface-llama-recipes : https://github.com/huggingface/huggingface-llama-recipes/tree/main
import gradio as gr
from openai import OpenAI
import os
ACCESS_TOKEN = os.getenv("myHFtoken")
print("Access token loaded.")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
print("Client initialized.")
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
model_name, # New parameter for model selection
):
print(f"Received message: {message}")
print(f"History: {history}")
print(f"System message: {system_message}")
print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
print(f"Selected model: {model_name}")
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
print(f"Added user message to context: {val[0]}")
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
print(f"Added assistant message to context: {val[1]}")
messages.append({"role": "user", "content": message})
response = ""
print("Sending request to OpenAI API.")
for message in client.chat.completions.create(
model=model_name, # Use the selected model
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
messages=messages,
):
token = message.choices[0].delta.content
print(f"Received token: {token}")
response += token
yield response
print("Completed response generation.")
chatbot = gr.Chatbot(height=600)
print("Chatbot interface created.")
# Define the list of models
models = [
"PowerInfer/SmallThinker-3B-Preview", #OK
"Qwen/QwQ-32B-Preview", #OK
"Qwen/Qwen2.5-Coder-32B-Instruct", #OK
"meta-llama/Llama-3.2-3B-Instruct", #OK
#"Qwen/Qwen2.5-32B-Instruct", #fail, too large
#"microsoft/Phi-3-mini-128k-instruct", #fail
#"microsoft/Phi-3-medium-128k-instruct", #fail
#"microsoft/phi-4", #fail, too large to be loaded automatically (29GB > 10GB)
#"meta-llama/Llama-3.3-70B-Instruct", #fail, need HF Pro subscription
]
# Add a title and move the model dropdown to the top
with gr.Blocks() as demo:
gr.Markdown("# LLM Test (HF API)") # Add a title to the top of the UI
# Add the model dropdown above the chatbot
model_dropdown = gr.Dropdown(choices=models, value=models[0], label="Select Model")
# Use the existing ChatInterface
gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="", label="System message"),
gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.3, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P",
),
model_dropdown, # Pass the dropdown as an additional input
],
fill_height=True,
chatbot=chatbot,
)
print("Gradio interface initialized.")
if __name__ == "__main__":
print("Launching the demo application.")
demo.launch()