Nymbo commited on
Commit
62429d1
·
verified ·
1 Parent(s): 204f2fd

adding info tab with featured models table and parameters overview

Browse files
Files changed (1) hide show
  1. app.py +252 -154
app.py CHANGED
@@ -2,11 +2,26 @@ import gradio as gr
2
  from openai import OpenAI
3
  import os
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  # Retrieve the access token from the environment variable
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
7
  print("Access token loaded.")
8
 
9
- # Initialize the OpenAI client with the Hugging Face Inference API endpoint
10
  client = OpenAI(
11
  base_url="https://api-inference.huggingface.co/v1/",
12
  api_key=ACCESS_TOKEN,
@@ -48,19 +63,19 @@ def respond(
48
  if seed == -1:
49
  seed = None
50
 
51
- # Construct the messages array required by the API
52
  messages = [{"role": "system", "content": system_message}]
53
  print("Initial messages array constructed.")
54
 
55
  # Add conversation history to the context
56
  for val in history:
57
  user_part = val[0] # Extract user message from the tuple
58
- assistant_part = val[1] # Extract assistant message from the tuple
59
  if user_part:
60
- messages.append({"role": "user", "content": user_part}) # Append user message
61
  print(f"Added user message to context: {user_part}")
62
  if assistant_part:
63
- messages.append({"role": "assistant", "content": assistant_part}) # Append assistant message
64
  print(f"Added assistant message to context: {assistant_part}")
65
 
66
  # Append the latest user message
@@ -71,179 +86,262 @@ def respond(
71
  model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
72
  print(f"Model selected for inference: {model_to_use}")
73
 
74
- # Start with an empty string to build the response as tokens stream in
75
- response = ""
76
- print("Sending request to OpenAI API.")
77
 
78
- # Make the streaming request to the HF Inference API via openai-like client
79
  for message_chunk in client.chat.completions.create(
80
- model=model_to_use, # Use either the user-provided or default model
81
- max_tokens=max_tokens, # Maximum tokens for the response
82
- stream=True, # Enable streaming responses
83
- temperature=temperature, # Adjust randomness in response
84
- top_p=top_p, # Control diversity in response generation
85
- frequency_penalty=frequency_penalty, # Penalize repeated phrases
86
- seed=seed, # Set random seed for reproducibility
87
- messages=messages, # Contextual conversation messages
88
  ):
89
  # Extract the token text from the response chunk
90
  token_text = message_chunk.choices[0].delta.content
91
  print(f"Received token: {token_text}")
92
- response += token_text
93
  # Yield the partial response to Gradio so it can display in real-time
94
- yield response
95
 
96
  print("Completed response generation.")
97
 
98
- # -------------------------
99
- # GRADIO UI CONFIGURATION
100
- # -------------------------
101
 
102
- # Create a Chatbot component with a specified height
103
- chatbot = gr.Chatbot(height=600) # Define the height of the chatbot interface
104
- print("Chatbot interface created.")
105
 
106
- # Create textboxes and sliders for system prompt, tokens, and other parameters
107
- system_message_box = gr.Textbox(value="", label="System message") # Input box for system message
 
 
 
 
 
 
108
 
109
- max_tokens_slider = gr.Slider(
110
- minimum=1, # Minimum allowable tokens
111
- maximum=4096, # Maximum allowable tokens
112
- value=512, # Default value
113
- step=1, # Increment step size
114
- label="Max new tokens" # Slider label
115
- )
116
- temperature_slider = gr.Slider(
117
- minimum=0.1, # Minimum temperature
118
- maximum=4.0, # Maximum temperature
119
- value=0.7, # Default value
120
- step=0.1, # Increment step size
121
- label="Temperature" # Slider label
122
- )
123
- top_p_slider = gr.Slider(
124
- minimum=0.1, # Minimum top-p value
125
- maximum=1.0, # Maximum top-p value
126
- value=0.95, # Default value
127
- step=0.05, # Increment step size
128
- label="Top-P" # Slider label
129
- )
130
- frequency_penalty_slider = gr.Slider(
131
- minimum=-2.0, # Minimum penalty
132
- maximum=2.0, # Maximum penalty
133
- value=0.0, # Default value
134
- step=0.1, # Increment step size
135
- label="Frequency Penalty" # Slider label
136
- )
137
- seed_slider = gr.Slider(
138
- minimum=-1, # -1 for random seed
139
- maximum=65535, # Maximum seed value
140
- value=-1, # Default value
141
- step=1, # Increment step size
142
- label="Seed (-1 for random)" # Slider label
143
- )
144
 
145
- # The custom_model_box is what the respond function sees as "custom_model"
146
- custom_model_box = gr.Textbox(
147
- value="", # Default value
148
- label="Custom Model", # Label for the textbox
149
- info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model." # Additional info
150
- )
151
 
152
- # Define a function that updates the custom model box when a featured model is selected
153
- def set_custom_model_from_radio(selected):
154
- """
155
- This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
156
- We will update the Custom Model text box with that selection automatically.
157
- """
158
- print(f"Featured model selected: {selected}") # Log selected model
159
- return selected
160
-
161
- # Create the main ChatInterface object
162
- demo = gr.ChatInterface(
163
- fn=respond, # The function to handle responses
164
- additional_inputs=[
165
- system_message_box, # System message input
166
- max_tokens_slider, # Max tokens slider
167
- temperature_slider, # Temperature slider
168
- top_p_slider, # Top-P slider
169
- frequency_penalty_slider, # Frequency penalty slider
170
- seed_slider, # Seed slider
171
- custom_model_box # Custom model input
172
- ],
173
- fill_height=True, # Allow the chatbot to fill the container height
174
- chatbot=chatbot, # Chatbot UI component
175
- theme="Nymbo/Nymbo_Theme", # Theme for the interface
176
- )
177
 
178
- print("ChatInterface object created.")
179
-
180
- # -----------
181
- # ADDING THE "FEATURED MODELS" ACCORDION
182
- # -----------
183
- with demo:
184
- with gr.Accordion("Featured Models", open=False): # Collapsible section for featured models
185
- model_search_box = gr.Textbox(
186
- label="Filter Models", # Label for the search box
187
- placeholder="Search for a featured model...", # Placeholder text
188
- lines=1 # Single-line input
189
  )
190
- print("Model search box created.")
191
-
192
- # Sample list of popular text models
193
- models_list = [
194
- "meta-llama/Llama-3.3-70B-Instruct",
195
- "meta-llama/Llama-3.2-3B-Instruct",
196
- "meta-llama/Llama-3.2-1B-Instruct",
197
- "meta-llama/Llama-3.1-8B-Instruct",
198
- "NousResearch/Hermes-3-Llama-3.1-8B",
199
- "google/gemma-2-27b-it",
200
- "google/gemma-2-9b-it",
201
- "google/gemma-2-2b-it",
202
- "mistralai/Mistral-Nemo-Instruct-2407",
203
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
204
- "mistralai/Mistral-7B-Instruct-v0.3",
205
- "Qwen/Qwen2.5-72B-Instruct",
206
- "Qwen/QwQ-32B-Preview",
207
- "PowerInfer/SmallThinker-3B-Preview",
208
- "HuggingFaceTB/SmolLM2-1.7B-Instruct",
209
- "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
210
- "microsoft/Phi-3.5-mini-instruct",
211
- ]
212
- print("Models list initialized.")
213
-
214
- featured_model_radio = gr.Radio(
215
- label="Select a model below", # Label for the radio buttons
216
- choices=models_list, # List of available models
217
- value="meta-llama/Llama-3.3-70B-Instruct", # Default selection
218
- interactive=True # Allow user interaction
219
  )
220
- print("Featured models radio button created.")
221
-
222
- # Filter function for the radio button list
223
- def filter_models(search_term):
224
- print(f"Filtering models with search term: {search_term}") # Log the search term
225
- filtered = [m for m in models_list if search_term.lower() in m.lower()] # Filter models by search term
226
- print(f"Filtered models: {filtered}") # Log filtered models
227
- return gr.update(choices=filtered)
228
-
229
- # Update the radio list when the search box value changes
230
- model_search_box.change(
231
- fn=filter_models, # Function to filter models
232
- inputs=model_search_box, # Input: search box value
233
- outputs=featured_model_radio # Output: update radio button list
234
  )
235
- print("Model search box change event linked.")
236
 
237
- # Update the custom model textbox when a featured model is selected
238
- featured_model_radio.change(
239
- fn=set_custom_model_from_radio, # Function to set custom model
240
- inputs=featured_model_radio, # Input: selected model
241
- outputs=custom_model_box # Output: update custom model textbox
 
 
242
  )
243
- print("Featured model radio button change event linked.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
  print("Gradio interface initialized.")
246
 
 
 
 
247
  if __name__ == "__main__":
248
  print("Launching the demo application.")
249
  demo.launch()
 
2
  from openai import OpenAI
3
  import os
4
 
5
+ # -------------------
6
+ # SERVERLESS-TEXTGEN-HUB
7
+ # -------------------
8
+ #
9
+ # This version has been updated to include an "Information" tab above the Chat tab.
10
+ # The Information tab has two accordions:
11
+ # - "Featured Models" which displays a simple table
12
+ # - "Parameters Overview" which contains markdown describing the settings
13
+ #
14
+ # The Chat tab contains the existing chatbot UI.
15
+
16
+ # -------------------
17
+ # SETUP AND CONFIG
18
+ # -------------------
19
+
20
  # Retrieve the access token from the environment variable
21
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
22
  print("Access token loaded.")
23
 
24
+ # Initialize the OpenAI-like client (Hugging Face Inference API) with your token
25
  client = OpenAI(
26
  base_url="https://api-inference.huggingface.co/v1/",
27
  api_key=ACCESS_TOKEN,
 
63
  if seed == -1:
64
  seed = None
65
 
66
+ # Construct the messages array required by the HF Inference API
67
  messages = [{"role": "system", "content": system_message}]
68
  print("Initial messages array constructed.")
69
 
70
  # Add conversation history to the context
71
  for val in history:
72
  user_part = val[0] # Extract user message from the tuple
73
+ assistant_part = val[1] # Extract assistant message
74
  if user_part:
75
+ messages.append({"role": "user", "content": user_part})
76
  print(f"Added user message to context: {user_part}")
77
  if assistant_part:
78
+ messages.append({"role": "assistant", "content": assistant_part})
79
  print(f"Added assistant message to context: {assistant_part}")
80
 
81
  # Append the latest user message
 
86
  model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
87
  print(f"Model selected for inference: {model_to_use}")
88
 
89
+ # Start with an empty string to build the streamed response
90
+ response_text = ""
91
+ print("Sending request to Hugging Face Inference API via OpenAI-like client...")
92
 
93
+ # Make the streaming request to the HF Inference API
94
  for message_chunk in client.chat.completions.create(
95
+ model=model_to_use,
96
+ max_tokens=max_tokens,
97
+ stream=True,
98
+ temperature=temperature,
99
+ top_p=top_p,
100
+ frequency_penalty=frequency_penalty,
101
+ seed=seed,
102
+ messages=messages,
103
  ):
104
  # Extract the token text from the response chunk
105
  token_text = message_chunk.choices[0].delta.content
106
  print(f"Received token: {token_text}")
107
+ response_text += token_text
108
  # Yield the partial response to Gradio so it can display in real-time
109
+ yield response_text
110
 
111
  print("Completed response generation.")
112
 
113
+ # ----------------------
114
+ # BUILDING THE INTERFACE
115
+ # ----------------------
116
 
117
+ # We will use a "Blocks" layout with two tabs:
118
+ # 1) "Information" tab, which shows helpful info and a table of "Featured Models"
119
+ # 2) "Chat" tab, which holds our ChatInterface and associated controls
120
 
121
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
122
+
123
+ # -----------------
124
+ # TAB: INFORMATION
125
+ # -----------------
126
+ with gr.Tab("Information"):
127
+ # You can add instructions, disclaimers, or helpful text here
128
+ gr.Markdown("## Welcome to Serverless-TextGen-Hub - Information")
129
 
130
+ # Accordion for Featured Models (table)
131
+ with gr.Accordion("Featured Models (WiP)", open=False):
132
+ gr.HTML(
133
+ """
134
+ <p><a href="https://huggingface.co/models?inference=warm&pipeline_tag=chat&sort=trending" target="_blank">See all available text models on Hugging Face</a></p>
135
+ <table style="width:100%; text-align:center; margin:auto;">
136
+ <tr>
137
+ <th>Model Name</th>
138
+ <th>Supported</th>
139
+ <th>Notes</th>
140
+ </tr>
141
+ <tr>
142
+ <td>meta-llama/Llama-3.3-70B-Instruct</td>
143
+ <td>✅</td>
144
+ <td>Default model, if none is provided in the 'Custom Model' box.</td>
145
+ </tr>
146
+ <tr>
147
+ <td>meta-llama/Llama-3.2-3B-Instruct</td>
148
+ <td>✅</td>
149
+ <td>Smaller Llama-based instruct model for faster responses.</td>
150
+ </tr>
151
+ <tr>
152
+ <td>microsoft/Phi-3.5-mini-instruct</td>
153
+ <td>✅</td>
154
+ <td>A smaller instruct model from Microsoft.</td>
155
+ </tr>
156
+ <tr>
157
+ <td>Qwen/Qwen2.5-72B-Instruct</td>
158
+ <td>✅</td>
159
+ <td>Large-scale Qwen-based model.</td>
160
+ </tr>
161
+ </table>
162
+ """
163
+ )
 
164
 
165
+ # Accordion for Parameters Overview
166
+ with gr.Accordion("Parameters Overview", open=False):
167
+ gr.Markdown(
168
+ """
169
+ **Here is a brief overview of the main parameters for text generation:**
 
170
 
171
+ - **Max Tokens**: The maximum number of tokens (think of these as word-pieces) the model will generate in its response.
172
+ - **Temperature**: Controls how "creative" or random the output is. Lower values = more deterministic, higher values = more varied.
173
+ - **Top-P**: Similar to temperature, but uses nucleus sampling. Top-P defines the probability mass of the tokens to sample from. For example, `top_p=0.9` means "use the top 90% probable tokens."
174
+ - **Frequency Penalty**: A higher penalty discourages repeated tokens, helping reduce repetitive answers.
175
+ - **Seed**: You can set a seed for deterministic results. `-1` means random each time.
176
+
177
+ **Featured Models** can also be selected. If you want to override the model, you may specify a custom Hugging Face model path in the "Custom Model" text box.
178
+
179
+ ---
180
+ If you are new to text-generation parameters, the defaults are a great place to start!
181
+ """
182
+ )
183
+
184
+ # -----------
185
+ # TAB: CHAT
186
+ # -----------
187
+ with gr.Tab("Chat"):
188
+ gr.Markdown("## Chat with the TextGen Model")
 
 
 
 
 
 
 
189
 
190
+ # Create a Chatbot component with a specified height
191
+ chatbot = gr.Chatbot(height=600)
192
+ print("Chatbot interface created.")
193
+
194
+ # Create textboxes and sliders for system prompt, tokens, and other parameters
195
+ system_message_box = gr.Textbox(
196
+ value="",
197
+ label="System message",
198
+ info="You can use this to provide instructions or context to the assistant. Leave empty if not needed."
 
 
199
  )
200
+
201
+ max_tokens_slider = gr.Slider(
202
+ minimum=1,
203
+ maximum=4096,
204
+ value=512,
205
+ step=1,
206
+ label="Max new tokens",
207
+ info="Controls the maximum length of the output. Keep an eye on your usage!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  )
209
+
210
+ temperature_slider = gr.Slider(
211
+ minimum=0.1,
212
+ maximum=4.0,
213
+ value=0.7,
214
+ step=0.1,
215
+ label="Temperature",
216
+ info="Controls creativity. Higher values = more random replies, lower = more deterministic."
 
 
 
 
 
 
217
  )
 
218
 
219
+ top_p_slider = gr.Slider(
220
+ minimum=0.1,
221
+ maximum=1.0,
222
+ value=0.95,
223
+ step=0.05,
224
+ label="Top-P",
225
+ info="Use nucleus sampling with probability mass cutoff. 1.0 includes all tokens."
226
  )
227
+
228
+ frequency_penalty_slider = gr.Slider(
229
+ minimum=-2.0,
230
+ maximum=2.0,
231
+ value=0.0,
232
+ step=0.1,
233
+ label="Frequency Penalty",
234
+ info="Penalize repeated tokens to avoid repetition in output."
235
+ )
236
+
237
+ seed_slider = gr.Slider(
238
+ minimum=-1,
239
+ maximum=65535,
240
+ value=-1,
241
+ step=1,
242
+ label="Seed (-1 for random)",
243
+ info="Fixing a seed (0 to 65535) can make results reproducible. -1 picks a random seed each time."
244
+ )
245
+
246
+ # The custom_model_box is what the respond function sees as "custom_model"
247
+ custom_model_box = gr.Textbox(
248
+ value="",
249
+ label="Custom Model",
250
+ info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model."
251
+ )
252
+
253
+ # Function to update the custom model box when a featured model is selected
254
+ def set_custom_model_from_radio(selected):
255
+ print(f"Featured model selected: {selected}")
256
+ return selected
257
+
258
+ print("ChatInterface object created.")
259
+
260
+ # The main ChatInterface call
261
+ chat_interface = gr.ChatInterface(
262
+ fn=respond, # The function to handle responses
263
+ additional_inputs=[
264
+ system_message_box,
265
+ max_tokens_slider,
266
+ temperature_slider,
267
+ top_p_slider,
268
+ frequency_penalty_slider,
269
+ seed_slider,
270
+ custom_model_box
271
+ ],
272
+ fill_height=True, # Let the chatbot fill the container height
273
+ chatbot=chatbot, # The Chatbot UI component
274
+ theme="Nymbo/Nymbo_Theme",
275
+ )
276
+
277
+ print("Gradio interface for Chat created.")
278
+
279
+ # -----------
280
+ # ADDING THE "FEATURED MODELS" ACCORDION (Same logic as before)
281
+ # -----------
282
+ with gr.Accordion("Featured Models", open=False):
283
+ model_search_box = gr.Textbox(
284
+ label="Filter Models",
285
+ placeholder="Search for a featured model...",
286
+ lines=1
287
+ )
288
+ print("Model search box created.")
289
+
290
+ # Sample list of popular text models
291
+ models_list = [
292
+ "meta-llama/Llama-3.3-70B-Instruct",
293
+ "meta-llama/Llama-3.2-3B-Instruct",
294
+ "meta-llama/Llama-3.2-1B-Instruct",
295
+ "meta-llama/Llama-3.1-8B-Instruct",
296
+ "NousResearch/Hermes-3-Llama-3.1-8B",
297
+ "google/gemma-2-27b-it",
298
+ "google/gemma-2-9b-it",
299
+ "google/gemma-2-2b-it",
300
+ "mistralai/Mistral-Nemo-Instruct-2407",
301
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
302
+ "mistralai/Mistral-7B-Instruct-v0.3",
303
+ "Qwen/Qwen2.5-72B-Instruct",
304
+ "Qwen/QwQ-32B-Preview",
305
+ "PowerInfer/SmallThinker-3B-Preview",
306
+ "HuggingFaceTB/SmolLM2-1.7B-Instruct",
307
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
308
+ "microsoft/Phi-3.5-mini-instruct",
309
+ ]
310
+ print("Models list initialized.")
311
+
312
+ featured_model_radio = gr.Radio(
313
+ label="Select a model below",
314
+ choices=models_list,
315
+ value="meta-llama/Llama-3.3-70B-Instruct",
316
+ interactive=True
317
+ )
318
+ print("Featured models radio button created.")
319
+
320
+ def filter_models(search_term):
321
+ print(f"Filtering models with search term: {search_term}")
322
+ filtered = [m for m in models_list if search_term.lower() in m.lower()]
323
+ print(f"Filtered models: {filtered}")
324
+ return gr.update(choices=filtered)
325
+
326
+ model_search_box.change(
327
+ fn=filter_models,
328
+ inputs=model_search_box,
329
+ outputs=featured_model_radio
330
+ )
331
+ print("Model search box change event linked.")
332
+
333
+ featured_model_radio.change(
334
+ fn=set_custom_model_from_radio,
335
+ inputs=featured_model_radio,
336
+ outputs=custom_model_box
337
+ )
338
+ print("Featured model radio button change event linked.")
339
 
340
  print("Gradio interface initialized.")
341
 
342
+ # ------------------------
343
+ # MAIN ENTRY POINT
344
+ # ------------------------
345
  if __name__ == "__main__":
346
  print("Launching the demo application.")
347
  demo.launch()