Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,11 +2,9 @@ import gradio as gr
|
|
2 |
from openai import OpenAI
|
3 |
import os
|
4 |
|
5 |
-
# Retrieve the access token from the environment variable
|
6 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
7 |
print("Access token loaded.")
|
8 |
|
9 |
-
# Initialize the OpenAI client with the Hugging Face Inference API endpoint
|
10 |
client = OpenAI(
|
11 |
base_url="https://api-inference.huggingface.co/v1/",
|
12 |
api_key=ACCESS_TOKEN,
|
@@ -25,18 +23,6 @@ def respond(
|
|
25 |
seed,
|
26 |
custom_model
|
27 |
):
|
28 |
-
"""
|
29 |
-
This function handles the chatbot response. It takes in:
|
30 |
-
- message: the user's new message
|
31 |
-
- history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
|
32 |
-
- system_message: the system prompt
|
33 |
-
- max_tokens: the maximum number of tokens to generate in the response
|
34 |
-
- temperature: sampling temperature
|
35 |
-
- top_p: top-p (nucleus) sampling
|
36 |
-
- frequency_penalty: penalize repeated tokens in the output
|
37 |
-
- seed: a fixed seed for reproducibility; -1 will mean 'random'
|
38 |
-
- custom_model: the final model name in use, which may be set by selecting from the Featured Models radio or by typing a custom model
|
39 |
-
"""
|
40 |
|
41 |
print(f"Received message: {message}")
|
42 |
print(f"History: {history}")
|
@@ -49,14 +35,13 @@ def respond(
|
|
49 |
if seed == -1:
|
50 |
seed = None
|
51 |
|
52 |
-
# Construct the messages array required by the API
|
53 |
messages = [{"role": "system", "content": system_message}]
|
54 |
print("Initial messages array constructed.")
|
55 |
|
56 |
# Add conversation history to the context
|
57 |
for val in history:
|
58 |
-
user_part = val[0]
|
59 |
-
assistant_part = val[1]
|
60 |
if user_part:
|
61 |
messages.append({"role": "user", "content": user_part})
|
62 |
print(f"Added user message to context: {user_part}")
|
@@ -76,7 +61,6 @@ def respond(
|
|
76 |
response = ""
|
77 |
print("Sending request to OpenAI API.")
|
78 |
|
79 |
-
# Make the streaming request to the HF Inference API via openai-like client
|
80 |
for message_chunk in client.chat.completions.create(
|
81 |
model=model_to_use,
|
82 |
max_tokens=max_tokens,
|
@@ -87,7 +71,6 @@ def respond(
|
|
87 |
seed=seed,
|
88 |
messages=messages,
|
89 |
):
|
90 |
-
# Extract the token text from the response chunk
|
91 |
token_text = message_chunk.choices[0].delta.content
|
92 |
print(f"Received token: {token_text}")
|
93 |
response += token_text
|
@@ -95,17 +78,12 @@ def respond(
|
|
95 |
|
96 |
print("Completed response generation.")
|
97 |
|
|
|
98 |
|
99 |
-
# -------------------------
|
100 |
-
# GRADIO UI CONFIGURATION
|
101 |
-
# -------------------------
|
102 |
-
|
103 |
-
# Create a Chatbot component with a specified height
|
104 |
chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", likeable=True, layout="panel")
|
105 |
print("Chatbot interface created.")
|
106 |
|
107 |
-
|
108 |
-
system_message_box = gr.Textbox(value="", label="System message")
|
109 |
|
110 |
max_tokens_slider = gr.Slider(
|
111 |
minimum=1,
|
@@ -147,7 +125,8 @@ seed_slider = gr.Slider(
|
|
147 |
custom_model_box = gr.Textbox(
|
148 |
value="",
|
149 |
label="Custom Model",
|
150 |
-
info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model."
|
|
|
151 |
)
|
152 |
|
153 |
def set_custom_model_from_radio(selected):
|
@@ -158,10 +137,6 @@ def set_custom_model_from_radio(selected):
|
|
158 |
print(f"Featured model selected: {selected}")
|
159 |
return selected
|
160 |
|
161 |
-
# IMPORTANT: Because we have 1 main user input + 7 additional inputs,
|
162 |
-
# each example should be an 8-item list: [user_text, system_prompt, max_tokens,
|
163 |
-
# temperature, top_p, frequency_penalty, seed, custom_model].
|
164 |
-
|
165 |
demo = gr.ChatInterface(
|
166 |
fn=respond,
|
167 |
additional_inputs=[
|
@@ -176,46 +151,11 @@ demo = gr.ChatInterface(
|
|
176 |
fill_height=True,
|
177 |
chatbot=chatbot,
|
178 |
theme="Nymbo/Nymbo_Theme",
|
179 |
-
examples=[
|
180 |
-
# Example 1
|
181 |
-
[
|
182 |
-
"Howdy, partner!", # user_text
|
183 |
-
"", # system_prompt
|
184 |
-
512, # max_tokens
|
185 |
-
0.7, # temperature
|
186 |
-
0.95, # top_p
|
187 |
-
0.0, # frequency_penalty
|
188 |
-
-1, # seed
|
189 |
-
"" # custom_model
|
190 |
-
],
|
191 |
-
# Example 2
|
192 |
-
[
|
193 |
-
"What's your model name and who trained you?",
|
194 |
-
"",
|
195 |
-
512,
|
196 |
-
0.7,
|
197 |
-
0.95,
|
198 |
-
0.0,
|
199 |
-
-1,
|
200 |
-
""
|
201 |
-
],
|
202 |
-
# Example 3
|
203 |
-
[
|
204 |
-
"How many R's are there in 'Strawberry'?",
|
205 |
-
"",
|
206 |
-
512,
|
207 |
-
0.7,
|
208 |
-
0.95,
|
209 |
-
0.0,
|
210 |
-
-1,
|
211 |
-
""
|
212 |
-
],
|
213 |
-
],
|
214 |
)
|
215 |
print("ChatInterface object created.")
|
216 |
|
217 |
with demo:
|
218 |
-
with gr.Accordion("
|
219 |
model_search_box = gr.Textbox(
|
220 |
label="Filter Models",
|
221 |
placeholder="Search for a featured model...",
|
|
|
2 |
from openai import OpenAI
|
3 |
import os
|
4 |
|
|
|
5 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
6 |
print("Access token loaded.")
|
7 |
|
|
|
8 |
client = OpenAI(
|
9 |
base_url="https://api-inference.huggingface.co/v1/",
|
10 |
api_key=ACCESS_TOKEN,
|
|
|
23 |
seed,
|
24 |
custom_model
|
25 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
print(f"Received message: {message}")
|
28 |
print(f"History: {history}")
|
|
|
35 |
if seed == -1:
|
36 |
seed = None
|
37 |
|
|
|
38 |
messages = [{"role": "system", "content": system_message}]
|
39 |
print("Initial messages array constructed.")
|
40 |
|
41 |
# Add conversation history to the context
|
42 |
for val in history:
|
43 |
+
user_part = val[0]
|
44 |
+
assistant_part = val[1]
|
45 |
if user_part:
|
46 |
messages.append({"role": "user", "content": user_part})
|
47 |
print(f"Added user message to context: {user_part}")
|
|
|
61 |
response = ""
|
62 |
print("Sending request to OpenAI API.")
|
63 |
|
|
|
64 |
for message_chunk in client.chat.completions.create(
|
65 |
model=model_to_use,
|
66 |
max_tokens=max_tokens,
|
|
|
71 |
seed=seed,
|
72 |
messages=messages,
|
73 |
):
|
|
|
74 |
token_text = message_chunk.choices[0].delta.content
|
75 |
print(f"Received token: {token_text}")
|
76 |
response += token_text
|
|
|
78 |
|
79 |
print("Completed response generation.")
|
80 |
|
81 |
+
# GRADIO UI
|
82 |
|
|
|
|
|
|
|
|
|
|
|
83 |
chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", likeable=True, layout="panel")
|
84 |
print("Chatbot interface created.")
|
85 |
|
86 |
+
system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
|
|
|
87 |
|
88 |
max_tokens_slider = gr.Slider(
|
89 |
minimum=1,
|
|
|
125 |
custom_model_box = gr.Textbox(
|
126 |
value="",
|
127 |
label="Custom Model",
|
128 |
+
info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
|
129 |
+
placeholder="meta-llama/Llama-3.3-70B-Instruct"
|
130 |
)
|
131 |
|
132 |
def set_custom_model_from_radio(selected):
|
|
|
137 |
print(f"Featured model selected: {selected}")
|
138 |
return selected
|
139 |
|
|
|
|
|
|
|
|
|
140 |
demo = gr.ChatInterface(
|
141 |
fn=respond,
|
142 |
additional_inputs=[
|
|
|
151 |
fill_height=True,
|
152 |
chatbot=chatbot,
|
153 |
theme="Nymbo/Nymbo_Theme",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
)
|
155 |
print("ChatInterface object created.")
|
156 |
|
157 |
with demo:
|
158 |
+
with gr.Accordion("Model Selection", open=False):
|
159 |
model_search_box = gr.Textbox(
|
160 |
label="Filter Models",
|
161 |
placeholder="Search for a featured model...",
|