HF-LLMs / app.py
SkyNetWalker's picture
Update app.py
b597dd2 verified
#refer llama recipes for more info https://github.com/huggingface/huggingface-llama-recipes/blob/main/inference-api.ipynb
#huggingface-llama-recipes : https://github.com/huggingface/huggingface-llama-recipes/tree/main
import gradio as gr
from openai import OpenAI
import os
ACCESS_TOKEN = os.getenv("myHFtoken")
print("Access token loaded.")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
print("Client initialized.")
SYSTEM_PROMPTS = {
"zh-HK": "用香港的廣東話(Cantonese)對話. No chatty. Answer in simple but accurate way.",
"zh-TW": "Chat by Traditional Chinese language of Taiwan (zh-TW). No chatty. Answer in simple but accurate way.",
"EN: General Assistant": "You are a helpful, respectful and honest assistant. Always provide accurate information and admit when you're not sure about something.",
"EN: Code Helper": "You are a programming assistant. Help users with coding questions, debugging, and best practices. Provide clear explanations and code examples when appropriate.",
"EN: Creative Writer": "You are a creative writing assistant. Help users with storytelling, character development, and creative writing techniques. Be imaginative and encouraging."
}
def respond(
message,
history: list[tuple[str, str]],
preset_prompt,
custom_prompt,
max_tokens,
temperature,
top_p,
model_name,
):
print(f"Received message: {message}")
print(f"History: {history}")
system_message = custom_prompt if custom_prompt.strip() else SYSTEM_PROMPTS[preset_prompt]
print(f"System message: {system_message}")
print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
print(f"Selected model: {model_name}")
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
print(f"Added user message to context: {val[0]}")
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
print(f"Added assistant message to context: {val[1]}")
messages.append({"role": "user", "content": message})
response = ""
print("Sending request to OpenAI API.")
for message in client.chat.completions.create(
model=model_name,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
messages=messages,
):
token = message.choices[0].delta.content
print(f"Received token: {token}")
response += token
yield response
print("Completed response generation.")
models = [
"PowerInfer/SmallThinker-3B-Preview",
"Qwen/QwQ-32B-Preview",
"Qwen/Qwen2.5-Coder-32B-Instruct",
"meta-llama/Llama-3.2-3B-Instruct",
"microsoft/Phi-3-mini-128k-instruct",
]
with gr.Blocks() as demo:
gr.Markdown("# LLM Test")
with gr.Row():
model_dropdown = gr.Dropdown(
choices=models,
value=models[0],
label="Select Model:"
)
# Create the chat components separately
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(
show_label=False,
placeholder="Enter text and press enter",
container=False
)
clear = gr.Button("Clear")
# Additional inputs
with gr.Accordion("Configuration", open=False):
preset_prompt = gr.Dropdown(
choices=list(SYSTEM_PROMPTS.keys()),
value=list(SYSTEM_PROMPTS.keys())[0],
label="Select System Prompt:"
)
custom_prompt = gr.Textbox(
value="",
label="Custom System Prompt (leaves blank to use preset):",
lines=2
)
max_tokens = gr.Slider(
minimum=1,
maximum=8192,
value=2048,
step=1,
label="Max new tokens:"
)
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.3,
step=0.1,
label="Temperature:"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P:"
)
# Set up the chat functionality
def user(user_message, history):
return "", history + [[user_message, None]]
def bot(
history,
preset_prompt,
custom_prompt,
max_tokens,
temperature,
top_p,
model_name
):
history[-1][1] = ""
for character in respond(
history[-1][0],
history[:-1],
preset_prompt,
custom_prompt,
max_tokens,
temperature,
top_p,
model_name
):
history[-1][1] = character
yield history
msg.submit(
user,
[msg, chatbot],
[msg, chatbot],
queue=False
).then(
bot,
[chatbot, preset_prompt, custom_prompt, max_tokens, temperature, top_p, model_dropdown],
chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
print("Gradio interface initialized.")
if __name__ == "__main__":
print("Launching the demo application.")
demo.launch()