Nymbo commited on
Commit
a8fc89d
·
verified ·
1 Parent(s): 62429d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +264 -229
app.py CHANGED
@@ -2,26 +2,11 @@ import gradio as gr
2
  from openai import OpenAI
3
  import os
4
 
5
- # -------------------
6
- # SERVERLESS-TEXTGEN-HUB
7
- # -------------------
8
- #
9
- # This version has been updated to include an "Information" tab above the Chat tab.
10
- # The Information tab has two accordions:
11
- # - "Featured Models" which displays a simple table
12
- # - "Parameters Overview" which contains markdown describing the settings
13
- #
14
- # The Chat tab contains the existing chatbot UI.
15
-
16
- # -------------------
17
- # SETUP AND CONFIG
18
- # -------------------
19
-
20
  # Retrieve the access token from the environment variable
21
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
22
  print("Access token loaded.")
23
 
24
- # Initialize the OpenAI-like client (Hugging Face Inference API) with your token
25
  client = OpenAI(
26
  base_url="https://api-inference.huggingface.co/v1/",
27
  api_key=ACCESS_TOKEN,
@@ -63,19 +48,19 @@ def respond(
63
  if seed == -1:
64
  seed = None
65
 
66
- # Construct the messages array required by the HF Inference API
67
  messages = [{"role": "system", "content": system_message}]
68
  print("Initial messages array constructed.")
69
 
70
  # Add conversation history to the context
71
  for val in history:
72
  user_part = val[0] # Extract user message from the tuple
73
- assistant_part = val[1] # Extract assistant message
74
  if user_part:
75
- messages.append({"role": "user", "content": user_part})
76
  print(f"Added user message to context: {user_part}")
77
  if assistant_part:
78
- messages.append({"role": "assistant", "content": assistant_part})
79
  print(f"Added assistant message to context: {assistant_part}")
80
 
81
  # Append the latest user message
@@ -86,262 +71,312 @@ def respond(
86
  model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
87
  print(f"Model selected for inference: {model_to_use}")
88
 
89
- # Start with an empty string to build the streamed response
90
- response_text = ""
91
- print("Sending request to Hugging Face Inference API via OpenAI-like client...")
92
 
93
- # Make the streaming request to the HF Inference API
94
  for message_chunk in client.chat.completions.create(
95
- model=model_to_use,
96
- max_tokens=max_tokens,
97
- stream=True,
98
- temperature=temperature,
99
- top_p=top_p,
100
- frequency_penalty=frequency_penalty,
101
- seed=seed,
102
- messages=messages,
103
  ):
104
  # Extract the token text from the response chunk
105
  token_text = message_chunk.choices[0].delta.content
106
  print(f"Received token: {token_text}")
107
- response_text += token_text
108
  # Yield the partial response to Gradio so it can display in real-time
109
- yield response_text
110
 
111
  print("Completed response generation.")
112
 
113
- # ----------------------
114
- # BUILDING THE INTERFACE
115
- # ----------------------
116
 
117
- # We will use a "Blocks" layout with two tabs:
118
- # 1) "Information" tab, which shows helpful info and a table of "Featured Models"
119
- # 2) "Chat" tab, which holds our ChatInterface and associated controls
120
 
121
- with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
122
-
123
- # -----------------
124
- # TAB: INFORMATION
125
- # -----------------
126
- with gr.Tab("Information"):
127
- # You can add instructions, disclaimers, or helpful text here
128
- gr.Markdown("## Welcome to Serverless-TextGen-Hub - Information")
129
 
130
- # Accordion for Featured Models (table)
131
- with gr.Accordion("Featured Models (WiP)", open=False):
132
- gr.HTML(
133
- """
134
- <p><a href="https://huggingface.co/models?inference=warm&pipeline_tag=chat&sort=trending" target="_blank">See all available text models on Hugging Face</a></p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  <table style="width:100%; text-align:center; margin:auto;">
136
  <tr>
137
  <th>Model Name</th>
138
- <th>Supported</th>
139
  <th>Notes</th>
140
  </tr>
141
  <tr>
142
  <td>meta-llama/Llama-3.3-70B-Instruct</td>
143
  <td>✅</td>
144
- <td>Default model, if none is provided in the 'Custom Model' box.</td>
145
  </tr>
146
  <tr>
147
  <td>meta-llama/Llama-3.2-3B-Instruct</td>
148
  <td>✅</td>
149
- <td>Smaller Llama-based instruct model for faster responses.</td>
150
  </tr>
151
  <tr>
152
- <td>microsoft/Phi-3.5-mini-instruct</td>
 
 
 
 
 
 
 
 
 
 
153
  <td>✅</td>
154
- <td>A smaller instruct model from Microsoft.</td>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  </tr>
156
  <tr>
157
  <td>Qwen/Qwen2.5-72B-Instruct</td>
158
  <td>✅</td>
159
- <td>Large-scale Qwen-based model.</td>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  </tr>
161
  </table>
162
  """
163
- )
164
-
165
- # Accordion for Parameters Overview
166
- with gr.Accordion("Parameters Overview", open=False):
167
- gr.Markdown(
168
- """
169
- **Here is a brief overview of the main parameters for text generation:**
170
-
171
- - **Max Tokens**: The maximum number of tokens (think of these as word-pieces) the model will generate in its response.
172
- - **Temperature**: Controls how "creative" or random the output is. Lower values = more deterministic, higher values = more varied.
173
- - **Top-P**: Similar to temperature, but uses nucleus sampling. Top-P defines the probability mass of the tokens to sample from. For example, `top_p=0.9` means "use the top 90% probable tokens."
174
- - **Frequency Penalty**: A higher penalty discourages repeated tokens, helping reduce repetitive answers.
175
- - **Seed**: You can set a seed for deterministic results. `-1` means random each time.
176
-
177
- **Featured Models** can also be selected. If you want to override the model, you may specify a custom Hugging Face model path in the "Custom Model" text box.
178
 
179
- ---
180
- If you are new to text-generation parameters, the defaults are a great place to start!
 
181
  """
182
- )
183
-
184
- # -----------
185
- # TAB: CHAT
186
- # -----------
187
- with gr.Tab("Chat"):
188
- gr.Markdown("## Chat with the TextGen Model")
189
-
190
- # Create a Chatbot component with a specified height
191
- chatbot = gr.Chatbot(height=600)
192
- print("Chatbot interface created.")
193
-
194
- # Create textboxes and sliders for system prompt, tokens, and other parameters
195
- system_message_box = gr.Textbox(
196
- value="",
197
- label="System message",
198
- info="You can use this to provide instructions or context to the assistant. Leave empty if not needed."
199
- )
200
 
201
- max_tokens_slider = gr.Slider(
202
- minimum=1,
203
- maximum=4096,
204
- value=512,
205
- step=1,
206
- label="Max new tokens",
207
- info="Controls the maximum length of the output. Keep an eye on your usage!"
208
- )
209
 
210
- temperature_slider = gr.Slider(
211
- minimum=0.1,
212
- maximum=4.0,
213
- value=0.7,
214
- step=0.1,
215
- label="Temperature",
216
- info="Controls creativity. Higher values = more random replies, lower = more deterministic."
217
- )
218
-
219
- top_p_slider = gr.Slider(
220
- minimum=0.1,
221
- maximum=1.0,
222
- value=0.95,
223
- step=0.05,
224
- label="Top-P",
225
- info="Use nucleus sampling with probability mass cutoff. 1.0 includes all tokens."
226
- )
227
 
228
- frequency_penalty_slider = gr.Slider(
229
- minimum=-2.0,
230
- maximum=2.0,
231
- value=0.0,
232
- step=0.1,
233
- label="Frequency Penalty",
234
- info="Penalize repeated tokens to avoid repetition in output."
235
- )
236
 
237
- seed_slider = gr.Slider(
238
- minimum=-1,
239
- maximum=65535,
240
- value=-1,
241
- step=1,
242
- label="Seed (-1 for random)",
243
- info="Fixing a seed (0 to 65535) can make results reproducible. -1 picks a random seed each time."
244
- )
245
 
246
- # The custom_model_box is what the respond function sees as "custom_model"
247
- custom_model_box = gr.Textbox(
248
- value="",
249
- label="Custom Model",
250
- info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model."
251
- )
252
 
253
- # Function to update the custom model box when a featured model is selected
254
- def set_custom_model_from_radio(selected):
255
- print(f"Featured model selected: {selected}")
256
- return selected
257
-
258
- print("ChatInterface object created.")
259
-
260
- # The main ChatInterface call
261
- chat_interface = gr.ChatInterface(
262
- fn=respond, # The function to handle responses
263
- additional_inputs=[
264
- system_message_box,
265
- max_tokens_slider,
266
- temperature_slider,
267
- top_p_slider,
268
- frequency_penalty_slider,
269
- seed_slider,
270
- custom_model_box
271
- ],
272
- fill_height=True, # Let the chatbot fill the container height
273
- chatbot=chatbot, # The Chatbot UI component
274
- theme="Nymbo/Nymbo_Theme",
275
- )
276
 
277
- print("Gradio interface for Chat created.")
278
-
279
- # -----------
280
- # ADDING THE "FEATURED MODELS" ACCORDION (Same logic as before)
281
- # -----------
282
- with gr.Accordion("Featured Models", open=False):
283
- model_search_box = gr.Textbox(
284
- label="Filter Models",
285
- placeholder="Search for a featured model...",
286
- lines=1
287
- )
288
- print("Model search box created.")
289
-
290
- # Sample list of popular text models
291
- models_list = [
292
- "meta-llama/Llama-3.3-70B-Instruct",
293
- "meta-llama/Llama-3.2-3B-Instruct",
294
- "meta-llama/Llama-3.2-1B-Instruct",
295
- "meta-llama/Llama-3.1-8B-Instruct",
296
- "NousResearch/Hermes-3-Llama-3.1-8B",
297
- "google/gemma-2-27b-it",
298
- "google/gemma-2-9b-it",
299
- "google/gemma-2-2b-it",
300
- "mistralai/Mistral-Nemo-Instruct-2407",
301
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
302
- "mistralai/Mistral-7B-Instruct-v0.3",
303
- "Qwen/Qwen2.5-72B-Instruct",
304
- "Qwen/QwQ-32B-Preview",
305
- "PowerInfer/SmallThinker-3B-Preview",
306
- "HuggingFaceTB/SmolLM2-1.7B-Instruct",
307
- "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
308
- "microsoft/Phi-3.5-mini-instruct",
309
- ]
310
- print("Models list initialized.")
311
-
312
- featured_model_radio = gr.Radio(
313
- label="Select a model below",
314
- choices=models_list,
315
- value="meta-llama/Llama-3.3-70B-Instruct",
316
- interactive=True
317
- )
318
- print("Featured models radio button created.")
319
-
320
- def filter_models(search_term):
321
- print(f"Filtering models with search term: {search_term}")
322
- filtered = [m for m in models_list if search_term.lower() in m.lower()]
323
- print(f"Filtered models: {filtered}")
324
- return gr.update(choices=filtered)
325
-
326
- model_search_box.change(
327
- fn=filter_models,
328
- inputs=model_search_box,
329
- outputs=featured_model_radio
330
- )
331
- print("Model search box change event linked.")
332
-
333
- featured_model_radio.change(
334
- fn=set_custom_model_from_radio,
335
- inputs=featured_model_radio,
336
- outputs=custom_model_box
337
- )
338
- print("Featured model radio button change event linked.")
339
 
340
  print("Gradio interface initialized.")
341
 
342
- # ------------------------
343
- # MAIN ENTRY POINT
344
- # ------------------------
345
  if __name__ == "__main__":
346
  print("Launching the demo application.")
347
  demo.launch()
 
2
  from openai import OpenAI
3
  import os
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  # Retrieve the access token from the environment variable
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
7
  print("Access token loaded.")
8
 
9
+ # Initialize the OpenAI client with the Hugging Face Inference API endpoint
10
  client = OpenAI(
11
  base_url="https://api-inference.huggingface.co/v1/",
12
  api_key=ACCESS_TOKEN,
 
48
  if seed == -1:
49
  seed = None
50
 
51
+ # Construct the messages array required by the API
52
  messages = [{"role": "system", "content": system_message}]
53
  print("Initial messages array constructed.")
54
 
55
  # Add conversation history to the context
56
  for val in history:
57
  user_part = val[0] # Extract user message from the tuple
58
+ assistant_part = val[1] # Extract assistant message from the tuple
59
  if user_part:
60
+ messages.append({"role": "user", "content": user_part}) # Append user message
61
  print(f"Added user message to context: {user_part}")
62
  if assistant_part:
63
+ messages.append({"role": "assistant", "content": assistant_part}) # Append assistant message
64
  print(f"Added assistant message to context: {assistant_part}")
65
 
66
  # Append the latest user message
 
71
  model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
72
  print(f"Model selected for inference: {model_to_use}")
73
 
74
+ # Start with an empty string to build the response as tokens stream in
75
+ response = ""
76
+ print("Sending request to OpenAI API.")
77
 
78
+ # Make the streaming request to the HF Inference API via openai-like client
79
  for message_chunk in client.chat.completions.create(
80
+ model=model_to_use, # Use either the user-provided or default model
81
+ max_tokens=max_tokens, # Maximum tokens for the response
82
+ stream=True, # Enable streaming responses
83
+ temperature=temperature, # Adjust randomness in response
84
+ top_p=top_p, # Control diversity in response generation
85
+ frequency_penalty=frequency_penalty, # Penalize repeated phrases
86
+ seed=seed, # Set random seed for reproducibility
87
+ messages=messages, # Contextual conversation messages
88
  ):
89
  # Extract the token text from the response chunk
90
  token_text = message_chunk.choices[0].delta.content
91
  print(f"Received token: {token_text}")
92
+ response += token_text
93
  # Yield the partial response to Gradio so it can display in real-time
94
+ yield response
95
 
96
  print("Completed response generation.")
97
 
98
+ # -------------------------
99
+ # GRADIO UI CONFIGURATION
100
+ # -------------------------
101
 
102
+ # Create a Chatbot component with a specified height
103
+ chatbot = gr.Chatbot(height=600) # Define the height of the chatbot interface
104
+ print("Chatbot interface created.")
105
 
106
+ # Create textboxes and sliders for system prompt, tokens, and other parameters
107
+ system_message_box = gr.Textbox(value="", label="System message") # Input box for system message
 
 
 
 
 
 
108
 
109
+ max_tokens_slider = gr.Slider(
110
+ minimum=1, # Minimum allowable tokens
111
+ maximum=4096, # Maximum allowable tokens
112
+ value=512, # Default value
113
+ step=1, # Increment step size
114
+ label="Max new tokens" # Slider label
115
+ )
116
+ temperature_slider = gr.Slider(
117
+ minimum=0.1, # Minimum temperature
118
+ maximum=4.0, # Maximum temperature
119
+ value=0.7, # Default value
120
+ step=0.1, # Increment step size
121
+ label="Temperature" # Slider label
122
+ )
123
+ top_p_slider = gr.Slider(
124
+ minimum=0.1, # Minimum top-p value
125
+ maximum=1.0, # Maximum top-p value
126
+ value=0.95, # Default value
127
+ step=0.05, # Increment step size
128
+ label="Top-P" # Slider label
129
+ )
130
+ frequency_penalty_slider = gr.Slider(
131
+ minimum=-2.0, # Minimum penalty
132
+ maximum=2.0, # Maximum penalty
133
+ value=0.0, # Default value
134
+ step=0.1, # Increment step size
135
+ label="Frequency Penalty" # Slider label
136
+ )
137
+ seed_slider = gr.Slider(
138
+ minimum=-1, # -1 for random seed
139
+ maximum=65535, # Maximum seed value
140
+ value=-1, # Default value
141
+ step=1, # Increment step size
142
+ label="Seed (-1 for random)" # Slider label
143
+ )
144
+
145
+ # The custom_model_box is what the respond function sees as "custom_model"
146
+ custom_model_box = gr.Textbox(
147
+ value="", # Default value
148
+ label="Custom Model", # Label for the textbox
149
+ info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model." # Additional info
150
+ )
151
+
152
+ # Define a function that updates the custom model box when a featured model is selected
153
+ def set_custom_model_from_radio(selected):
154
+ """
155
+ This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
156
+ We will update the Custom Model text box with that selection automatically.
157
+ """
158
+ print(f"Featured model selected: {selected}") # Log selected model
159
+ return selected
160
+
161
+ # Create the main ChatInterface object
162
+ demo = gr.ChatInterface(
163
+ fn=respond, # The function to handle responses
164
+ additional_inputs=[
165
+ system_message_box, # System message input
166
+ max_tokens_slider, # Max tokens slider
167
+ temperature_slider, # Temperature slider
168
+ top_p_slider, # Top-P slider
169
+ frequency_penalty_slider, # Frequency penalty slider
170
+ seed_slider, # Seed slider
171
+ custom_model_box # Custom model input
172
+ ],
173
+ fill_height=True, # Allow the chatbot to fill the container height
174
+ chatbot=chatbot, # Chatbot UI component
175
+ theme="Nymbo/Nymbo_Theme", # Theme for the interface
176
+ )
177
+
178
+ print("ChatInterface object created.")
179
+
180
+ # -----------
181
+ # ADDING THE "FEATURED MODELS" ACCORDION
182
+ # -----------
183
+ with demo:
184
+ with gr.Accordion("Featured Models", open=False): # Collapsible section for featured models
185
+ model_search_box = gr.Textbox(
186
+ label="Filter Models", # Label for the search box
187
+ placeholder="Search for a featured model...", # Placeholder text
188
+ lines=1 # Single-line input
189
+ )
190
+ print("Model search box created.")
191
+
192
+ # Sample list of popular text models
193
+ models_list = [
194
+ "meta-llama/Llama-3.3-70B-Instruct",
195
+ "meta-llama/Llama-3.2-3B-Instruct",
196
+ "meta-llama/Llama-3.2-1B-Instruct",
197
+ "meta-llama/Llama-3.1-8B-Instruct",
198
+ "NousResearch/Hermes-3-Llama-3.1-8B",
199
+ "google/gemma-2-27b-it",
200
+ "google/gemma-2-9b-it",
201
+ "google/gemma-2-2b-it",
202
+ "mistralai/Mistral-Nemo-Instruct-2407",
203
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
204
+ "mistralai/Mistral-7B-Instruct-v0.3",
205
+ "Qwen/Qwen2.5-72B-Instruct",
206
+ "Qwen/QwQ-32B-Preview",
207
+ "PowerInfer/SmallThinker-3B-Preview",
208
+ "HuggingFaceTB/SmolLM2-1.7B-Instruct",
209
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
210
+ "microsoft/Phi-3.5-mini-instruct",
211
+ ]
212
+ print("Models list initialized.")
213
+
214
+ featured_model_radio = gr.Radio(
215
+ label="Select a model below", # Label for the radio buttons
216
+ choices=models_list, # List of available models
217
+ value="meta-llama/Llama-3.3-70B-Instruct", # Default selection
218
+ interactive=True # Allow user interaction
219
+ )
220
+ print("Featured models radio button created.")
221
+
222
+ # Filter function for the radio button list
223
+ def filter_models(search_term):
224
+ print(f"Filtering models with search term: {search_term}") # Log the search term
225
+ filtered = [m for m in models_list if search_term.lower() in m.lower()] # Filter models by search term
226
+ print(f"Filtered models: {filtered}") # Log filtered models
227
+ return gr.update(choices=filtered)
228
+
229
+ # Update the radio list when the search box value changes
230
+ model_search_box.change(
231
+ fn=filter_models, # Function to filter models
232
+ inputs=model_search_box, # Input: search box value
233
+ outputs=featured_model_radio # Output: update radio button list
234
+ )
235
+ print("Model search box change event linked.")
236
+
237
+ # Update the custom model textbox when a featured model is selected
238
+ featured_model_radio.change(
239
+ fn=set_custom_model_from_radio, # Function to set custom model
240
+ inputs=featured_model_radio, # Input: selected model
241
+ outputs=custom_model_box # Output: update custom model textbox
242
+ )
243
+ print("Featured model radio button change event linked.")
244
+
245
+ # -----------
246
+ # ADDING THE "INFORMATION" TAB
247
+ # -----------
248
+ with gr.Tab("Information"):
249
+ with gr.Row():
250
+ # Accordion for Featured Models
251
+ with gr.Accordion("Featured Models", open=False):
252
+ gr.HTML(
253
+ """
254
  <table style="width:100%; text-align:center; margin:auto;">
255
  <tr>
256
  <th>Model Name</th>
257
+ <th>Typography</th>
258
  <th>Notes</th>
259
  </tr>
260
  <tr>
261
  <td>meta-llama/Llama-3.3-70B-Instruct</td>
262
  <td>✅</td>
263
+ <td></td>
264
  </tr>
265
  <tr>
266
  <td>meta-llama/Llama-3.2-3B-Instruct</td>
267
  <td>✅</td>
268
+ <td></td>
269
  </tr>
270
  <tr>
271
+ <td>meta-llama/Llama-3.2-1B-Instruct</td>
272
+ <td>✅</td>
273
+ <td></td>
274
+ </tr>
275
+ <tr>
276
+ <td>meta-llama/Llama-3.1-8B-Instruct</td>
277
+ <td>✅</td>
278
+ <td></td>
279
+ </tr>
280
+ <tr>
281
+ <td>NousResearch/Hermes-3-Llama-3.1-8B</td>
282
  <td>✅</td>
283
+ <td></td>
284
+ </tr>
285
+ <tr>
286
+ <td>google/gemma-2-27b-it</td>
287
+ <td>✅</td>
288
+ <td></td>
289
+ </tr>
290
+ <tr>
291
+ <td>google/gemma-2-9b-it</td>
292
+ <td>✅</td>
293
+ <td></td>
294
+ </tr>
295
+ <tr>
296
+ <td>google/gemma-2-2b-it</td>
297
+ <td>✅</td>
298
+ <td></td>
299
+ </tr>
300
+ <tr>
301
+ <td>mistralai/Mistral-Nemo-Instruct-2407</td>
302
+ <td>✅</td>
303
+ <td></td>
304
+ </tr>
305
+ <tr>
306
+ <td>mistralai/Mixtral-8x7B-Instruct-v0.1</td>
307
+ <td>✅</td>
308
+ <td></td>
309
+ </tr>
310
+ <tr>
311
+ <td>mistralai/Mistral-7B-Instruct-v0.3</td>
312
+ <td>✅</td>
313
+ <td></td>
314
  </tr>
315
  <tr>
316
  <td>Qwen/Qwen2.5-72B-Instruct</td>
317
  <td>✅</td>
318
+ <td></td>
319
+ </tr>
320
+ <tr>
321
+ <td>Qwen/QwQ-32B-Preview</td>
322
+ <td>✅</td>
323
+ <td></td>
324
+ </tr>
325
+ <tr>
326
+ <td>PowerInfer/SmallThinker-3B-Preview</td>
327
+ <td>✅</td>
328
+ <td></td>
329
+ </tr>
330
+ <tr>
331
+ <td>HuggingFaceTB/SmolLM2-1.7B-Instruct</td>
332
+ <td>✅</td>
333
+ <td></td>
334
+ </tr>
335
+ <tr>
336
+ <td>TinyLlama/TinyLlama-1.1B-Chat-v1.0</td>
337
+ <td>✅</td>
338
+ <td></td>
339
+ </tr>
340
+ <tr>
341
+ <td>microsoft/Phi-3.5-mini-instruct</td>
342
+ <td>✅</td>
343
+ <td></td>
344
  </tr>
345
  </table>
346
  """
347
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
+ # Accordion for Parameters Overview
350
+ with gr.Accordion("Parameters Overview", open=False):
351
+ gr.Markdown(
352
  """
353
+ ## System Message
354
+ ###### This box is for setting the initial context or instructions for the AI. It helps guide the AI on how to respond to your inputs.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
 
356
+ ## Max New Tokens
357
+ ###### This slider allows you to specify the maximum number of tokens (words or parts of words) the AI can generate in a single response. The default value is 512, and the maximum is 4096.
 
 
 
 
 
 
358
 
359
+ ## Temperature
360
+ ###### Temperature controls the randomness of the AI's responses. A higher temperature makes the responses more creative and varied, while a lower temperature makes them more predictable and focused. The default value is 0.7.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
+ ## Top-P (Nucleus Sampling)
363
+ ###### Top-P sampling is another way to control the diversity of the AI's responses. It ensures that the AI only considers the most likely tokens up to a cumulative probability of P. The default value is 0.95.
 
 
 
 
 
 
364
 
365
+ ## Frequency Penalty
366
+ ###### This penalty discourages the AI from repeating the same tokens (words or phrases) in its responses. A higher penalty reduces repetition. The default value is 0.0.
 
 
 
 
 
 
367
 
368
+ ## Seed
369
+ ###### The seed is a number that ensures the reproducibility of the AI's responses. If you set a specific seed, the AI will generate the same response every time for the same input. If you set it to -1, the AI will generate a random seed each time.
 
 
 
 
370
 
371
+ ## Custom Model
372
+ ###### You can specify a custom Hugging Face model path here. This will override any selected featured model. This is optional and allows you to use models not listed in the featured models.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
 
374
+ ### Remember, these settings are all about giving you control over the text generation process. Feel free to experiment and see what each one does. And if you're ever in doubt, the default settings are a great place to start. Happy creating!
375
+ """
376
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
 
378
  print("Gradio interface initialized.")
379
 
 
 
 
380
  if __name__ == "__main__":
381
  print("Launching the demo application.")
382
  demo.launch()