Nymbo commited on
Commit
ce12e24
·
verified ·
1 Parent(s): 98674ca

featured models and info tab

Browse files
Files changed (1) hide show
  1. app.py +171 -57
app.py CHANGED
@@ -22,7 +22,8 @@ def respond(
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
- custom_model
 
26
  ):
27
  """
28
  This function handles the chatbot response. It takes in:
@@ -35,6 +36,7 @@ def respond(
35
  - frequency_penalty: penalize repeated tokens in the output
36
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
37
  - custom_model: the user-provided custom model name (if any)
 
38
  """
39
 
40
  print(f"Received message: {message}")
@@ -43,6 +45,7 @@ def respond(
43
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
44
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
45
  print(f"Custom model: {custom_model}")
 
46
 
47
  # Convert seed to None if -1 (meaning random)
48
  if seed == -1:
@@ -65,8 +68,15 @@ def respond(
65
  # Append the latest user message
66
  messages.append({"role": "user", "content": message})
67
 
68
- # Determine which model to use: either custom_model or a default
69
- model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
 
 
 
 
 
 
 
70
  print(f"Model selected for inference: {model_to_use}")
71
 
72
  # Start with an empty string to build the response as tokens stream in
@@ -75,9 +85,9 @@ def respond(
75
 
76
  # Make the streaming request to the HF Inference API via openai-like client
77
  for message_chunk in client.chat.completions.create(
78
- model=model_to_use, # Use either the user-provided custom model or default
79
  max_tokens=max_tokens,
80
- stream=True, # Stream the response
81
  temperature=temperature,
82
  top_p=top_p,
83
  frequency_penalty=frequency_penalty,
@@ -88,7 +98,6 @@ def respond(
88
  token_text = message_chunk.choices[0].delta.content
89
  print(f"Received token: {token_text}")
90
  response += token_text
91
- # Yield the partial response to Gradio so it can display in real-time
92
  yield response
93
 
94
  print("Completed response generation.")
@@ -97,57 +106,162 @@ def respond(
97
  chatbot = gr.Chatbot(height=600)
98
  print("Chatbot interface created.")
99
 
100
- # Create the Gradio ChatInterface
101
- # We add two new sliders for Frequency Penalty, Seed, and now a new "Custom Model" text box.
102
- demo = gr.ChatInterface(
103
- fn=respond,
104
- additional_inputs=[
105
- gr.Textbox(value="", label="System message"),
106
- gr.Slider(
107
- minimum=1,
108
- maximum=4096,
109
- value=512,
110
- step=1,
111
- label="Max new tokens"
112
- ),
113
- gr.Slider(
114
- minimum=0.1,
115
- maximum=4.0,
116
- value=0.7,
117
- step=0.1,
118
- label="Temperature"
119
- ),
120
- gr.Slider(
121
- minimum=0.1,
122
- maximum=1.0,
123
- value=0.95,
124
- step=0.05,
125
- label="Top-P"
126
- ),
127
- gr.Slider(
128
- minimum=-2.0,
129
- maximum=2.0,
130
- value=0.0,
131
- step=0.1,
132
- label="Frequency Penalty"
133
- ),
134
- gr.Slider(
135
- minimum=-1,
136
- maximum=65535, # Arbitrary upper limit for demonstration
137
- value=-1,
138
- step=1,
139
- label="Seed (-1 for random)"
140
- ),
141
- gr.Textbox(
142
- value="",
143
- label="Custom Model",
144
- info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty."
145
- ),
146
- ],
147
- fill_height=True,
148
- chatbot=chatbot,
149
- theme="Nymbo/Nymbo_Theme",
150
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  print("Gradio interface initialized.")
152
 
153
  if __name__ == "__main__":
 
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
+ custom_model,
26
+ featured_model
27
  ):
28
  """
29
  This function handles the chatbot response. It takes in:
 
36
  - frequency_penalty: penalize repeated tokens in the output
37
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
38
  - custom_model: the user-provided custom model name (if any)
39
+ - featured_model: the model selected from the "Featured Models" radio
40
  """
41
 
42
  print(f"Received message: {message}")
 
45
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
46
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
47
  print(f"Custom model: {custom_model}")
48
+ print(f"Featured model: {featured_model}")
49
 
50
  # Convert seed to None if -1 (meaning random)
51
  if seed == -1:
 
68
  # Append the latest user message
69
  messages.append({"role": "user", "content": message})
70
 
71
+ # Determine which model to use
72
+ # If custom_model is provided, that overrides everything.
73
+ # Otherwise, use the selected featured_model.
74
+ # If featured_model is empty, fall back on the default.
75
+ if custom_model.strip() != "":
76
+ model_to_use = custom_model.strip()
77
+ else:
78
+ model_to_use = featured_model.strip() if featured_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
79
+
80
  print(f"Model selected for inference: {model_to_use}")
81
 
82
  # Start with an empty string to build the response as tokens stream in
 
85
 
86
  # Make the streaming request to the HF Inference API via openai-like client
87
  for message_chunk in client.chat.completions.create(
88
+ model=model_to_use,
89
  max_tokens=max_tokens,
90
+ stream=True,
91
  temperature=temperature,
92
  top_p=top_p,
93
  frequency_penalty=frequency_penalty,
 
98
  token_text = message_chunk.choices[0].delta.content
99
  print(f"Received token: {token_text}")
100
  response += token_text
 
101
  yield response
102
 
103
  print("Completed response generation.")
 
106
  chatbot = gr.Chatbot(height=600)
107
  print("Chatbot interface created.")
108
 
109
+ ####################################
110
+ # GRADIO UI SETUP #
111
+ ####################################
112
+
113
+ # 1) We'll create a set of placeholder featured models.
114
+ all_featured_models = [
115
+ "meta-llama/Llama-2-7B-Chat-hf",
116
+ "meta-llama/Llama-2-13B-Chat-hf",
117
+ "bigscience/bloom",
118
+ "google/flan-t5-xxl",
119
+ "meta-llama/Llama-3.3-70B-Instruct"
120
+ ]
121
+
122
+ def filter_featured_models(search_term):
123
+ """
124
+ Helper function to filter featured models by search text.
125
+ """
126
+ filtered = [m for m in all_featured_models if search_term.lower() in m.lower()]
127
+ # We'll return an update with the filtered list
128
+ return gr.update(choices=filtered)
129
+
130
+ # 2) Create the ChatInterface with additional inputs
131
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
132
+ gr.Markdown("# Serverless Text Generation Hub")
133
+
134
+ # We'll organize content in tabs similar to the ImgGen-Hub
135
+ with gr.Tab("Chat"):
136
+ gr.Markdown("## Chat Interface")
137
+ chat_interface = gr.ChatInterface(
138
+ fn=respond,
139
+ additional_inputs=[
140
+ gr.Textbox(value="", label="System message"),
141
+ gr.Slider(
142
+ minimum=1,
143
+ maximum=4096,
144
+ value=512,
145
+ step=1,
146
+ label="Max new tokens"
147
+ ),
148
+ gr.Slider(
149
+ minimum=0.1,
150
+ maximum=4.0,
151
+ value=0.7,
152
+ step=0.1,
153
+ label="Temperature"
154
+ ),
155
+ gr.Slider(
156
+ minimum=0.1,
157
+ maximum=1.0,
158
+ value=0.95,
159
+ step=0.05,
160
+ label="Top-P"
161
+ ),
162
+ gr.Slider(
163
+ minimum=-2.0,
164
+ maximum=2.0,
165
+ value=0.0,
166
+ step=0.1,
167
+ label="Frequency Penalty"
168
+ ),
169
+ gr.Slider(
170
+ minimum=-1,
171
+ maximum=65535,
172
+ value=-1,
173
+ step=1,
174
+ label="Seed (-1 for random)"
175
+ ),
176
+ gr.Textbox(
177
+ value="",
178
+ label="Custom Model",
179
+ info="(Optional) Provide a custom Hugging Face model path. This overrides the featured model if not empty."
180
+ ),
181
+ ],
182
+ fill_height=True,
183
+ chatbot=chatbot
184
+ )
185
+
186
+ # We'll add a new accordion for "Featured Models" within the Chat tab
187
+ with gr.Accordion("Featured Models", open=True):
188
+ gr.Markdown("Pick one of the placeholder featured models below, or search for more.")
189
+ featured_model_search = gr.Textbox(
190
+ label="Filter Models",
191
+ placeholder="Type to filter featured models..."
192
+ )
193
+ featured_model_radio = gr.Radio(
194
+ label="Select a featured model",
195
+ choices=all_featured_models,
196
+ value="meta-llama/Llama-3.3-70B-Instruct"
197
+ )
198
+ # Connect the search box to the filter function
199
+ featured_model_search.change(
200
+ filter_featured_models,
201
+ inputs=featured_model_search,
202
+ outputs=featured_model_radio
203
+ )
204
+
205
+ # We must connect the featured_model_radio to the chat interface
206
+ # We'll pass it as the last argument in the respond function.
207
+ chat_interface.add_variable(featured_model_radio, "featured_model")
208
+
209
+ # 3) Create the "Information" tab, containing:
210
+ # - A "Featured Models" accordion with a table
211
+ # - A "Parameters Overview" accordion with markdown
212
+ with gr.Tab("Information"):
213
+ gr.Markdown("## Additional Information and Help")
214
+ with gr.Accordion("Featured Models (Table)", open=False):
215
+ gr.Markdown("""
216
+ Here is a table of some placeholder featured models:
217
+ <table style="width:100%; text-align:center; margin:auto;">
218
+ <tr>
219
+ <th>Model</th>
220
+ <th>Description</th>
221
+ </tr>
222
+ <tr>
223
+ <td>meta-llama/Llama-2-7B-Chat-hf</td>
224
+ <td>A 7B parameter Llama 2 Chat model</td>
225
+ </tr>
226
+ <tr>
227
+ <td>meta-llama/Llama-2-13B-Chat-hf</td>
228
+ <td>A 13B parameter Llama 2 Chat model</td>
229
+ </tr>
230
+ <tr>
231
+ <td>bigscience/bloom</td>
232
+ <td>Large-scale multilingual model</td>
233
+ </tr>
234
+ <tr>
235
+ <td>google/flan-t5-xxl</td>
236
+ <td>A large instruction-tuned T5 model</td>
237
+ </tr>
238
+ <tr>
239
+ <td>meta-llama/Llama-3.3-70B-Instruct</td>
240
+ <td>70B parameter Llama 3.3 instruct model</td>
241
+ </tr>
242
+ </table>
243
+ """)
244
+
245
+ with gr.Accordion("Parameters Overview", open=False):
246
+ gr.Markdown("""
247
+ **Here’s a quick breakdown of the main parameters you’ll find in this interface:**
248
+
249
+ - **Max New Tokens**: This controls the maximum number of tokens (words or subwords) in the generated response.
250
+ - **Temperature**: Adjusts how 'creative' or random the model's output is. A low temperature keeps it more predictable; a high temperature makes it more varied or 'wacky.'
251
+ - **Top-P**: Also known as nucleus sampling. Controls how the model decides which words to include. Lower means more conservative, higher means more open.
252
+ - **Frequency Penalty**: A value to penalize repeated words or phrases. Higher penalty means the model will avoid repeating itself.
253
+ - **Seed**: Fix a random seed for reproducibility. If set to -1, a random seed is used each time.
254
+ - **Custom Model**: Provide the full Hugging Face model path (like `bigscience/bloom`) if you'd like to override the default or the featured model you selected above.
255
+
256
+ ### Usage Tips
257
+ 1. If you’d like to use one of the featured models, simply select it from the list in the **Featured Models** accordion.
258
+ 2. If you’d like to override the featured models, type your own custom path in **Custom Model**.
259
+ 3. Adjust your parameters (temperature, top-p, etc.) if you want different styles of results.
260
+ 4. You can provide a **System message** to guide the overall behavior or 'role' of the AI. For example, you can say "You are a helpful coding assistant" or something else to set the context.
261
+
262
+ Feel free to play around with these settings, and if you have any questions, check out the Hugging Face docs or ask in the community spaces!
263
+ """)
264
+
265
  print("Gradio interface initialized.")
266
 
267
  if __name__ == "__main__":