Nymbo commited on
Commit
9b9dccd
·
verified ·
1 Parent(s): ce12e24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -171
app.py CHANGED
@@ -22,8 +22,7 @@ def respond(
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
- custom_model,
26
- featured_model
27
  ):
28
  """
29
  This function handles the chatbot response. It takes in:
@@ -36,7 +35,6 @@ def respond(
36
  - frequency_penalty: penalize repeated tokens in the output
37
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
38
  - custom_model: the user-provided custom model name (if any)
39
- - featured_model: the model selected from the "Featured Models" radio
40
  """
41
 
42
  print(f"Received message: {message}")
@@ -45,7 +43,6 @@ def respond(
45
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
46
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
47
  print(f"Custom model: {custom_model}")
48
- print(f"Featured model: {featured_model}")
49
 
50
  # Convert seed to None if -1 (meaning random)
51
  if seed == -1:
@@ -68,15 +65,8 @@ def respond(
68
  # Append the latest user message
69
  messages.append({"role": "user", "content": message})
70
 
71
- # Determine which model to use
72
- # If custom_model is provided, that overrides everything.
73
- # Otherwise, use the selected featured_model.
74
- # If featured_model is empty, fall back on the default.
75
- if custom_model.strip() != "":
76
- model_to_use = custom_model.strip()
77
- else:
78
- model_to_use = featured_model.strip() if featured_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
79
-
80
  print(f"Model selected for inference: {model_to_use}")
81
 
82
  # Start with an empty string to build the response as tokens stream in
@@ -85,9 +75,9 @@ def respond(
85
 
86
  # Make the streaming request to the HF Inference API via openai-like client
87
  for message_chunk in client.chat.completions.create(
88
- model=model_to_use,
89
  max_tokens=max_tokens,
90
- stream=True,
91
  temperature=temperature,
92
  top_p=top_p,
93
  frequency_penalty=frequency_penalty,
@@ -98,6 +88,7 @@ def respond(
98
  token_text = message_chunk.choices[0].delta.content
99
  print(f"Received token: {token_text}")
100
  response += token_text
 
101
  yield response
102
 
103
  print("Completed response generation.")
@@ -106,162 +97,57 @@ def respond(
106
  chatbot = gr.Chatbot(height=600)
107
  print("Chatbot interface created.")
108
 
109
- ####################################
110
- # GRADIO UI SETUP #
111
- ####################################
112
-
113
- # 1) We'll create a set of placeholder featured models.
114
- all_featured_models = [
115
- "meta-llama/Llama-2-7B-Chat-hf",
116
- "meta-llama/Llama-2-13B-Chat-hf",
117
- "bigscience/bloom",
118
- "google/flan-t5-xxl",
119
- "meta-llama/Llama-3.3-70B-Instruct"
120
- ]
121
-
122
- def filter_featured_models(search_term):
123
- """
124
- Helper function to filter featured models by search text.
125
- """
126
- filtered = [m for m in all_featured_models if search_term.lower() in m.lower()]
127
- # We'll return an update with the filtered list
128
- return gr.update(choices=filtered)
129
-
130
- # 2) Create the ChatInterface with additional inputs
131
- with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
132
- gr.Markdown("# Serverless Text Generation Hub")
133
-
134
- # We'll organize content in tabs similar to the ImgGen-Hub
135
- with gr.Tab("Chat"):
136
- gr.Markdown("## Chat Interface")
137
- chat_interface = gr.ChatInterface(
138
- fn=respond,
139
- additional_inputs=[
140
- gr.Textbox(value="", label="System message"),
141
- gr.Slider(
142
- minimum=1,
143
- maximum=4096,
144
- value=512,
145
- step=1,
146
- label="Max new tokens"
147
- ),
148
- gr.Slider(
149
- minimum=0.1,
150
- maximum=4.0,
151
- value=0.7,
152
- step=0.1,
153
- label="Temperature"
154
- ),
155
- gr.Slider(
156
- minimum=0.1,
157
- maximum=1.0,
158
- value=0.95,
159
- step=0.05,
160
- label="Top-P"
161
- ),
162
- gr.Slider(
163
- minimum=-2.0,
164
- maximum=2.0,
165
- value=0.0,
166
- step=0.1,
167
- label="Frequency Penalty"
168
- ),
169
- gr.Slider(
170
- minimum=-1,
171
- maximum=65535,
172
- value=-1,
173
- step=1,
174
- label="Seed (-1 for random)"
175
- ),
176
- gr.Textbox(
177
- value="",
178
- label="Custom Model",
179
- info="(Optional) Provide a custom Hugging Face model path. This overrides the featured model if not empty."
180
- ),
181
- ],
182
- fill_height=True,
183
- chatbot=chatbot
184
- )
185
-
186
- # We'll add a new accordion for "Featured Models" within the Chat tab
187
- with gr.Accordion("Featured Models", open=True):
188
- gr.Markdown("Pick one of the placeholder featured models below, or search for more.")
189
- featured_model_search = gr.Textbox(
190
- label="Filter Models",
191
- placeholder="Type to filter featured models..."
192
- )
193
- featured_model_radio = gr.Radio(
194
- label="Select a featured model",
195
- choices=all_featured_models,
196
- value="meta-llama/Llama-3.3-70B-Instruct"
197
- )
198
- # Connect the search box to the filter function
199
- featured_model_search.change(
200
- filter_featured_models,
201
- inputs=featured_model_search,
202
- outputs=featured_model_radio
203
- )
204
-
205
- # We must connect the featured_model_radio to the chat interface
206
- # We'll pass it as the last argument in the respond function.
207
- chat_interface.add_variable(featured_model_radio, "featured_model")
208
-
209
- # 3) Create the "Information" tab, containing:
210
- # - A "Featured Models" accordion with a table
211
- # - A "Parameters Overview" accordion with markdown
212
- with gr.Tab("Information"):
213
- gr.Markdown("## Additional Information and Help")
214
- with gr.Accordion("Featured Models (Table)", open=False):
215
- gr.Markdown("""
216
- Here is a table of some placeholder featured models:
217
- <table style="width:100%; text-align:center; margin:auto;">
218
- <tr>
219
- <th>Model</th>
220
- <th>Description</th>
221
- </tr>
222
- <tr>
223
- <td>meta-llama/Llama-2-7B-Chat-hf</td>
224
- <td>A 7B parameter Llama 2 Chat model</td>
225
- </tr>
226
- <tr>
227
- <td>meta-llama/Llama-2-13B-Chat-hf</td>
228
- <td>A 13B parameter Llama 2 Chat model</td>
229
- </tr>
230
- <tr>
231
- <td>bigscience/bloom</td>
232
- <td>Large-scale multilingual model</td>
233
- </tr>
234
- <tr>
235
- <td>google/flan-t5-xxl</td>
236
- <td>A large instruction-tuned T5 model</td>
237
- </tr>
238
- <tr>
239
- <td>meta-llama/Llama-3.3-70B-Instruct</td>
240
- <td>70B parameter Llama 3.3 instruct model</td>
241
- </tr>
242
- </table>
243
- """)
244
-
245
- with gr.Accordion("Parameters Overview", open=False):
246
- gr.Markdown("""
247
- **Here’s a quick breakdown of the main parameters you’ll find in this interface:**
248
-
249
- - **Max New Tokens**: This controls the maximum number of tokens (words or subwords) in the generated response.
250
- - **Temperature**: Adjusts how 'creative' or random the model's output is. A low temperature keeps it more predictable; a high temperature makes it more varied or 'wacky.'
251
- - **Top-P**: Also known as nucleus sampling. Controls how the model decides which words to include. Lower means more conservative, higher means more open.
252
- - **Frequency Penalty**: A value to penalize repeated words or phrases. Higher penalty means the model will avoid repeating itself.
253
- - **Seed**: Fix a random seed for reproducibility. If set to -1, a random seed is used each time.
254
- - **Custom Model**: Provide the full Hugging Face model path (like `bigscience/bloom`) if you'd like to override the default or the featured model you selected above.
255
-
256
- ### Usage Tips
257
- 1. If you’d like to use one of the featured models, simply select it from the list in the **Featured Models** accordion.
258
- 2. If you’d like to override the featured models, type your own custom path in **Custom Model**.
259
- 3. Adjust your parameters (temperature, top-p, etc.) if you want different styles of results.
260
- 4. You can provide a **System message** to guide the overall behavior or 'role' of the AI. For example, you can say "You are a helpful coding assistant" or something else to set the context.
261
-
262
- Feel free to play around with these settings, and if you have any questions, check out the Hugging Face docs or ask in the community spaces!
263
- """)
264
-
265
  print("Gradio interface initialized.")
266
 
267
  if __name__ == "__main__":
 
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
+ custom_model
 
26
  ):
27
  """
28
  This function handles the chatbot response. It takes in:
 
35
  - frequency_penalty: penalize repeated tokens in the output
36
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
37
  - custom_model: the user-provided custom model name (if any)
 
38
  """
39
 
40
  print(f"Received message: {message}")
 
43
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
44
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
45
  print(f"Custom model: {custom_model}")
 
46
 
47
  # Convert seed to None if -1 (meaning random)
48
  if seed == -1:
 
65
  # Append the latest user message
66
  messages.append({"role": "user", "content": message})
67
 
68
+ # Determine which model to use: either custom_model or a default
69
+ model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
 
 
 
 
 
 
 
70
  print(f"Model selected for inference: {model_to_use}")
71
 
72
  # Start with an empty string to build the response as tokens stream in
 
75
 
76
  # Make the streaming request to the HF Inference API via openai-like client
77
  for message_chunk in client.chat.completions.create(
78
+ model=model_to_use, # Use either the user-provided custom model or default
79
  max_tokens=max_tokens,
80
+ stream=True, # Stream the response
81
  temperature=temperature,
82
  top_p=top_p,
83
  frequency_penalty=frequency_penalty,
 
88
  token_text = message_chunk.choices[0].delta.content
89
  print(f"Received token: {token_text}")
90
  response += token_text
91
+ # Yield the partial response to Gradio so it can display in real-time
92
  yield response
93
 
94
  print("Completed response generation.")
 
97
  chatbot = gr.Chatbot(height=600)
98
  print("Chatbot interface created.")
99
 
100
+ # Create the Gradio ChatInterface
101
+ # We add two new sliders for Frequency Penalty, Seed, and now a new "Custom Model" text box.
102
+ demo = gr.ChatInterface(
103
+ fn=respond,
104
+ additional_inputs=[
105
+ gr.Textbox(value="", label="System message"),
106
+ gr.Slider(
107
+ minimum=1,
108
+ maximum=4096,
109
+ value=512,
110
+ step=1,
111
+ label="Max new tokens"
112
+ ),
113
+ gr.Slider(
114
+ minimum=0.1,
115
+ maximum=4.0,
116
+ value=0.7,
117
+ step=0.1,
118
+ label="Temperature"
119
+ ),
120
+ gr.Slider(
121
+ minimum=0.1,
122
+ maximum=1.0,
123
+ value=0.95,
124
+ step=0.05,
125
+ label="Top-P"
126
+ ),
127
+ gr.Slider(
128
+ minimum=-2.0,
129
+ maximum=2.0,
130
+ value=0.0,
131
+ step=0.1,
132
+ label="Frequency Penalty"
133
+ ),
134
+ gr.Slider(
135
+ minimum=-1,
136
+ maximum=65535, # Arbitrary upper limit for demonstration
137
+ value=-1,
138
+ step=1,
139
+ label="Seed (-1 for random)"
140
+ ),
141
+ gr.Textbox(
142
+ value="",
143
+ label="Custom Model",
144
+ info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty."
145
+ ),
146
+ ],
147
+ fill_height=True,
148
+ chatbot=chatbot,
149
+ theme="Nymbo/Nymbo_Theme",
150
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  print("Gradio interface initialized.")
152
 
153
  if __name__ == "__main__":