Spaces:
Running
Running
OKAY LETS SIMPLIFY THS LOL
Browse files
app.py
CHANGED
@@ -21,21 +21,29 @@ def respond(
|
|
21 |
temperature,
|
22 |
top_p,
|
23 |
frequency_penalty,
|
24 |
-
seed
|
25 |
-
model,
|
26 |
-
custom_model
|
27 |
):
|
28 |
"""
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
"""
|
|
|
31 |
print(f"Received message: {message}")
|
32 |
print(f"History: {history}")
|
33 |
print(f"System message: {system_message}")
|
34 |
-
print(f"
|
|
|
35 |
|
36 |
-
#
|
37 |
-
|
38 |
-
|
39 |
|
40 |
# Construct the messages array required by the API
|
41 |
messages = [{"role": "system", "content": system_message}]
|
@@ -58,107 +66,95 @@ def respond(
|
|
58 |
response = ""
|
59 |
print("Sending request to OpenAI API.")
|
60 |
|
61 |
-
# Make the streaming request to the HF Inference API via
|
62 |
for message_chunk in client.chat.completions.create(
|
63 |
-
model=
|
64 |
max_tokens=max_tokens,
|
65 |
-
stream=True,
|
66 |
temperature=temperature,
|
67 |
top_p=top_p,
|
68 |
-
frequency_penalty=frequency_penalty,
|
69 |
-
seed=seed
|
70 |
messages=messages,
|
71 |
):
|
72 |
# Extract the token text from the response chunk
|
73 |
token_text = message_chunk.choices[0].delta.content
|
74 |
print(f"Received token: {token_text}")
|
75 |
response += token_text
|
|
|
76 |
yield response
|
77 |
|
78 |
print("Completed response generation.")
|
79 |
|
80 |
-
# Create a Chatbot component
|
81 |
chatbot = gr.Chatbot(height=600)
|
82 |
print("Chatbot interface created.")
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
"
|
87 |
-
"bigscience/bloom-176b",
|
88 |
-
"gpt-j-6b",
|
89 |
-
"opt-30b",
|
90 |
-
"flan-t5-xxl",
|
91 |
]
|
92 |
|
93 |
-
# Function to filter models based on user input
|
94 |
def filter_models(search_term):
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
-
# Gradio interface
|
98 |
with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
<tr><td>flan-t5-xxl</td><td>Google's Flan-tuned T5 XXL</td></tr>
|
131 |
-
</table>
|
132 |
-
"""
|
133 |
-
)
|
134 |
-
with gr.Accordion("Parameters Overview", open=False):
|
135 |
-
gr.Markdown(
|
136 |
-
"""
|
137 |
-
### Parameters Overview
|
138 |
-
- **Max Tokens**: Maximum number of tokens in the response.
|
139 |
-
- **Temperature**: Controls the randomness of responses. Lower values make the output more deterministic.
|
140 |
-
- **Top-P**: Controls the diversity of responses by limiting the token selection to a probability mass.
|
141 |
-
- **Frequency Penalty**: Penalizes repeated tokens in the output.
|
142 |
-
- **Seed**: Fixes randomness for reproducibility. Use -1 for a random seed.
|
143 |
-
"""
|
144 |
-
)
|
145 |
-
|
146 |
-
run_button.click(
|
147 |
-
respond,
|
148 |
-
inputs=[
|
149 |
-
user_input,
|
150 |
-
chatbot.state,
|
151 |
system_message,
|
152 |
max_tokens,
|
153 |
temperature,
|
154 |
top_p,
|
155 |
frequency_penalty,
|
156 |
-
seed
|
157 |
-
model,
|
158 |
-
custom_model
|
159 |
],
|
160 |
-
|
|
|
|
|
|
|
|
|
161 |
)
|
162 |
|
163 |
-
print("
|
164 |
-
|
|
|
|
|
|
|
|
21 |
temperature,
|
22 |
top_p,
|
23 |
frequency_penalty,
|
24 |
+
seed
|
|
|
|
|
25 |
):
|
26 |
"""
|
27 |
+
This function handles the chatbot response. It takes in:
|
28 |
+
- message: the user's new message
|
29 |
+
- history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
|
30 |
+
- system_message: the system prompt
|
31 |
+
- max_tokens: the maximum number of tokens to generate in the response
|
32 |
+
- temperature: sampling temperature
|
33 |
+
- top_p: top-p (nucleus) sampling
|
34 |
+
- frequency_penalty: penalize repeated tokens in the output
|
35 |
+
- seed: a fixed seed for reproducibility; -1 will mean 'random'
|
36 |
"""
|
37 |
+
|
38 |
print(f"Received message: {message}")
|
39 |
print(f"History: {history}")
|
40 |
print(f"System message: {system_message}")
|
41 |
+
print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
|
42 |
+
print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
|
43 |
|
44 |
+
# Convert seed to None if -1 (meaning random)
|
45 |
+
if seed == -1:
|
46 |
+
seed = None
|
47 |
|
48 |
# Construct the messages array required by the API
|
49 |
messages = [{"role": "system", "content": system_message}]
|
|
|
66 |
response = ""
|
67 |
print("Sending request to OpenAI API.")
|
68 |
|
69 |
+
# Make the streaming request to the HF Inference API via openai-like client
|
70 |
for message_chunk in client.chat.completions.create(
|
71 |
+
model="meta-llama/Llama-3.3-70B-Instruct", # You can update this to your specific model
|
72 |
max_tokens=max_tokens,
|
73 |
+
stream=True, # Stream the response
|
74 |
temperature=temperature,
|
75 |
top_p=top_p,
|
76 |
+
frequency_penalty=frequency_penalty, # <-- NEW
|
77 |
+
seed=seed, # <-- NEW
|
78 |
messages=messages,
|
79 |
):
|
80 |
# Extract the token text from the response chunk
|
81 |
token_text = message_chunk.choices[0].delta.content
|
82 |
print(f"Received token: {token_text}")
|
83 |
response += token_text
|
84 |
+
# As streaming progresses, yield partial output
|
85 |
yield response
|
86 |
|
87 |
print("Completed response generation.")
|
88 |
|
89 |
+
# Create a Chatbot component with a specified height
|
90 |
chatbot = gr.Chatbot(height=600)
|
91 |
print("Chatbot interface created.")
|
92 |
|
93 |
+
MODELS_LIST = [
|
94 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
95 |
+
"microsoft/Phi-3.5-mini-instruct",
|
|
|
|
|
|
|
|
|
96 |
]
|
97 |
|
|
|
98 |
def filter_models(search_term):
|
99 |
+
"""
|
100 |
+
Simple function to filter the placeholder model list based on the user's input
|
101 |
+
"""
|
102 |
+
filtered_models = [m for m in MODELS_LIST if search_term.lower() in m.lower()]
|
103 |
+
return gr.update(choices=filtered_models)
|
104 |
+
|
105 |
+
# --------------------------------------
|
106 |
+
# REBUILD THE INTERFACE USING BLOCKS
|
107 |
+
# --------------------------------------
|
108 |
+
print("Building Gradio interface with Blocks...")
|
109 |
|
|
|
110 |
with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
|
111 |
+
# Title
|
112 |
+
gr.Markdown("# Serverless-TextGen-Hub")
|
113 |
+
|
114 |
+
# Accordion: Parameters (sliders, etc.)
|
115 |
+
with gr.Accordion("Parameters", open=True):
|
116 |
+
system_message = gr.Textbox(value="", label="System message")
|
117 |
+
max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
|
118 |
+
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
|
119 |
+
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
|
120 |
+
frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
|
121 |
+
seed = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
|
122 |
+
|
123 |
+
# Accordion: Featured Models (Below the parameters)
|
124 |
+
with gr.Accordion("Featured Models", open=False):
|
125 |
+
model_search = gr.Textbox(
|
126 |
+
label="Filter Models",
|
127 |
+
placeholder="Search for a featured model...",
|
128 |
+
lines=1
|
129 |
+
)
|
130 |
+
model_radio = gr.Radio(
|
131 |
+
label="Select a model below",
|
132 |
+
value=MODELS_LIST[0], # default
|
133 |
+
choices=MODELS_LIST,
|
134 |
+
interactive=True
|
135 |
+
)
|
136 |
+
model_search.change(filter_models, inputs=model_search, outputs=model_radio)
|
137 |
+
|
138 |
+
# The main ChatInterface
|
139 |
+
chat_interface = gr.ChatInterface(
|
140 |
+
fn=respond,
|
141 |
+
additional_inputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
system_message,
|
143 |
max_tokens,
|
144 |
temperature,
|
145 |
top_p,
|
146 |
frequency_penalty,
|
147 |
+
seed
|
|
|
|
|
148 |
],
|
149 |
+
fill_height=True,
|
150 |
+
chatbot=chatbot,
|
151 |
+
theme="Nymbo/Nymbo_Theme",
|
152 |
+
title="Serverless-TextGen-Hub",
|
153 |
+
description="A comprehensive UI for text generation using the HF Inference API."
|
154 |
)
|
155 |
|
156 |
+
print("Gradio interface initialized.")
|
157 |
+
|
158 |
+
if __name__ == "__main__":
|
159 |
+
print("Launching the demo application.")
|
160 |
+
demo.launch()
|