Nymbo commited on
Commit
b56d11c
·
verified ·
1 Parent(s): 86297f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -107
app.py CHANGED
@@ -22,8 +22,7 @@ def respond(
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
- model,
26
- custom_model
27
  ):
28
  """
29
  This function handles the chatbot response. It takes in:
@@ -35,8 +34,7 @@ def respond(
35
  - top_p: top-p (nucleus) sampling
36
  - frequency_penalty: penalize repeated tokens in the output
37
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
38
- - model: the selected model from the featured list
39
- - custom_model: a custom model specified by the user
40
  """
41
 
42
  print(f"Received message: {message}")
@@ -44,15 +42,7 @@ def respond(
44
  print(f"System message: {system_message}")
45
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
46
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
47
- print(f"Model: {model}, Custom Model: {custom_model}")
48
-
49
- # Determine the model to use
50
- if custom_model.strip() != "":
51
- selected_model = custom_model.strip()
52
- else:
53
- selected_model = model
54
-
55
- print(f"Selected model for inference: {selected_model}")
56
 
57
  # Convert seed to None if -1 (meaning random)
58
  if seed == -1:
@@ -77,25 +67,24 @@ def respond(
77
 
78
  # Start with an empty string to build the response as tokens stream in
79
  response = ""
80
- print(f"Sending request to OpenAI API using model: {selected_model}.")
81
 
82
  # Make the streaming request to the HF Inference API via openai-like client
83
  for message_chunk in client.chat.completions.create(
84
- model=selected_model,
85
  max_tokens=max_tokens,
86
  stream=True, # Stream the response
87
  temperature=temperature,
88
  top_p=top_p,
89
- frequency_penalty=frequency_penalty,
90
- seed=seed,
91
  messages=messages,
92
  ):
93
  # Extract the token text from the response chunk
94
  token_text = message_chunk.choices[0].delta.content
95
- if token_text is not None:
96
- print(f"Received token: {token_text}")
97
- response += token_text
98
- yield response
99
 
100
  print("Completed response generation.")
101
 
@@ -103,108 +92,175 @@ def respond(
103
  chatbot = gr.Chatbot(height=600)
104
  print("Chatbot interface created.")
105
 
106
- # Define featured models
107
- featured_models_list = [
108
- "meta-llama/Llama-3.3-70B-Instruct",
109
- "mistralai/Mistral-7B-v0.1",
110
- "google/gemma-7b",
111
- ]
112
-
113
  # Create the Gradio ChatInterface
114
- with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
115
- with gr.Tab("Chat"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  with gr.Row():
117
- with gr.Column():
118
- # Chat interface
119
- gr.ChatInterface(
120
- respond,
121
- additional_inputs=[
122
- gr.Textbox(value="", label="System message"),
123
- gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
124
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
125
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
126
- gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty"),
127
- gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)"),
128
- gr.Dropdown(label="Featured Models", choices=featured_models_list, value="meta-llama/Llama-3.3-70B-Instruct", interactive=True),
129
- gr.Textbox(value="", label="Custom Model (Optional)"),
130
- ],
131
- fill_height=True,
132
- chatbot=chatbot,
133
- )
134
- with gr.Column():
135
- # Featured models accordion
136
- with gr.Accordion("Featured Models", open=True):
137
- model_search = gr.Textbox(label="Filter Models", placeholder="Search for a featured model...", lines=1)
138
- model_radio = gr.Radio(label="Select a model below", choices=featured_models_list, value="meta-llama/Llama-3.3-70B-Instruct", interactive=True)
139
-
140
- def filter_models(search_term):
141
- filtered_models = [m for m in featured_models_list if search_term.lower() in m.lower()]
142
- return gr.update(choices=filtered_models)
143
-
144
- model_search.change(filter_models, inputs=model_search, outputs=model_radio)
145
-
146
- # Custom model textbox
147
- custom_model_textbox = gr.Textbox(label="Custom Model", placeholder="Enter a custom model path here (optional)", lines=1)
148
 
 
149
  with gr.Tab("Information"):
150
- with gr.Accordion("Featured Models", open=False):
 
 
 
 
 
151
  gr.HTML(
152
  """
153
- <p><a href="https://huggingface.co/models?pipeline_tag=text-generation&sort=trending">See all available models</a></p>
154
- <table style="width:100%; text-align:center; margin:auto;">
155
- <tr>
156
- <th>Model Name</th>
157
- <th>Notes</th>
158
- </tr>
159
- <tr>
160
- <td>meta-llama/Llama-3.3-70B-Instruct</td>
161
- <td>Powerful large language model.</td>
162
- </tr>
163
- <tr>
164
- <td>mistralai/Mistral-7B-v0.1</td>
165
- <td>A smaller, efficient model.</td>
166
- </tr>
167
- <tr>
168
- <td>google/gemma-7b</td>
169
- <td>Google's language model.</td>
170
- </tr>
171
- </table>
172
- """
 
 
 
 
 
 
 
 
 
173
  )
174
 
 
175
  with gr.Accordion("Parameters Overview", open=False):
176
  gr.Markdown(
177
- """
178
- ## Parameters Overview
179
-
180
- ### System Message
181
- The system message is an initial instruction or context that you provide to the chatbot. It sets the stage for the conversation and can be used to guide the chatbot's behavior or persona.
182
-
183
- ### Max New Tokens
184
- This parameter limits the length of the chatbot's response. It specifies the maximum number of tokens (words or subwords) that the chatbot can generate in a single response.
185
 
186
- ### Temperature
187
- Temperature controls the randomness of the chatbot's responses. A higher temperature (e.g., 1.0) makes the output more random and creative, while a lower temperature (e.g., 0.2) makes the output more focused and deterministic.
188
 
189
- ### Top-P
190
- Top-P, also known as nucleus sampling, is another way to control the randomness of the responses. It sets a threshold for the cumulative probability of the most likely tokens. The chatbot will only consider tokens whose cumulative probability is below this threshold.
191
 
192
- ### Frequency Penalty
193
- This parameter discourages the chatbot from repeating the same tokens or phrases too often. A higher value (e.g., 1.0) penalizes repetition more strongly, while a lower value (e.g., 0.0) has no penalty.
194
 
195
- ### Seed
196
- The seed is a number that initializes the random number generator used by the chatbot. If you set a specific seed, you will get the same response every time you run the chatbot with the same parameters. If you set the seed to -1, a random seed will be used, resulting in different responses each time.
197
 
198
- ### Featured Models
199
- You can select a featured model from the dropdown list. These models have been pre-selected for their performance and capabilities.
200
 
201
- ### Custom Model
202
- If you have a specific model that you want to use, you can enter its path in the Custom Model textbox. This allows you to use models that are not included in the featured list.
203
- """
204
  )
205
 
206
- print("Gradio interface initialized.")
 
 
 
 
 
 
 
 
207
 
208
- if __name__ == "__main__":
209
- print("Launching the demo application.")
210
- demo.launch()
 
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
+ model
 
26
  ):
27
  """
28
  This function handles the chatbot response. It takes in:
 
34
  - top_p: top-p (nucleus) sampling
35
  - frequency_penalty: penalize repeated tokens in the output
36
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
37
+ - model: the selected model for text generation
 
38
  """
39
 
40
  print(f"Received message: {message}")
 
42
  print(f"System message: {system_message}")
43
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
44
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
45
+ print(f"Model: {model}")
 
 
 
 
 
 
 
 
46
 
47
  # Convert seed to None if -1 (meaning random)
48
  if seed == -1:
 
67
 
68
  # Start with an empty string to build the response as tokens stream in
69
  response = ""
70
+ print("Sending request to OpenAI API.")
71
 
72
  # Make the streaming request to the HF Inference API via openai-like client
73
  for message_chunk in client.chat.completions.create(
74
+ model=model, # Use the selected model
75
  max_tokens=max_tokens,
76
  stream=True, # Stream the response
77
  temperature=temperature,
78
  top_p=top_p,
79
+ frequency_penalty=frequency_penalty, # <-- NEW
80
+ seed=seed, # <-- NEW
81
  messages=messages,
82
  ):
83
  # Extract the token text from the response chunk
84
  token_text = message_chunk.choices[0].delta.content
85
+ print(f"Received token: {token_text}")
86
+ response += token_text
87
+ yield response
 
88
 
89
  print("Completed response generation.")
90
 
 
92
  chatbot = gr.Chatbot(height=600)
93
  print("Chatbot interface created.")
94
 
 
 
 
 
 
 
 
95
  # Create the Gradio ChatInterface
96
+ # We add two new sliders for Frequency Penalty and Seed
97
+ demo = gr.ChatInterface(
98
+ respond,
99
+ additional_inputs=[
100
+ gr.Textbox(value="", label="System message"),
101
+ gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
102
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
103
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
104
+ gr.Slider(
105
+ minimum=-2.0,
106
+ maximum=2.0,
107
+ value=0.0,
108
+ step=0.1,
109
+ label="Frequency Penalty"
110
+ ),
111
+ gr.Slider(
112
+ minimum=-1,
113
+ maximum=65535, # Arbitrary upper limit for demonstration
114
+ value=-1,
115
+ step=1,
116
+ label="Seed (-1 for random)"
117
+ ),
118
+ gr.Textbox(label="Custom Model", info="Model Hugging Face path (optional)", placeholder="meta-llama/Llama-3.3-70B-Instruct"),
119
+ ],
120
+ fill_height=True,
121
+ chatbot=chatbot,
122
+ theme="Nymbo/Nymbo_Theme",
123
+ )
124
+ print("Gradio interface initialized.")
125
+
126
+ # Custom CSS to hide the footer in the interface
127
+ css = """
128
+ * {}
129
+ footer {visibility: hidden !important;}
130
+ """
131
+
132
+ print("Initializing Gradio interface...") # Debug log
133
+
134
+ # Define the Gradio interface
135
+ with gr.Blocks(theme='Nymbo/Nymbo_Theme_5') as textgen:
136
+ # Tab for basic settings
137
+ with gr.Tab("Basic Settings"):
138
+ with gr.Row():
139
+ with gr.Column(elem_id="prompt-container"):
140
+ with gr.Row():
141
+ # Textbox for user to input the prompt
142
+ text_prompt = gr.Textbox(label="Prompt", placeholder="Enter a prompt here", lines=3, elem_id="prompt-text-input")
143
+ with gr.Row():
144
+ # Textbox for custom model input
145
+ custom_model = gr.Textbox(label="Custom Model", info="Model Hugging Face path (optional)", placeholder="meta-llama/Llama-3.3-70B-Instruct")
146
+ with gr.Row():
147
+ # Accordion for selecting the model
148
+ with gr.Accordion("Featured Models", open=True):
149
+ # Textbox for searching models
150
+ model_search = gr.Textbox(label="Filter Models", placeholder="Search for a featured model...", lines=1, elem_id="model-search-input")
151
+ models_list = (
152
+ "meta-llama/Llama-3.3-70B-Instruct",
153
+ "meta-llama/Llama-3.3-30B-Instruct",
154
+ "meta-llama/Llama-3.3-13B-Instruct",
155
+ "meta-llama/Llama-3.3-7B-Instruct",
156
+ )
157
+
158
+ # Radio buttons to select the desired model
159
+ model = gr.Radio(label="Select a model below", value="meta-llama/Llama-3.3-70B-Instruct", choices=models_list, interactive=True, elem_id="model-radio")
160
+
161
+ # Filtering models based on search input
162
+ def filter_models(search_term):
163
+ filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
164
+ return gr.update(choices=filtered_models)
165
+
166
+ # Update model list when search box is used
167
+ model_search.change(filter_models, inputs=model_search, outputs=model)
168
+
169
+ # Tab for advanced settings
170
+ with gr.Tab("Advanced Settings"):
171
  with gr.Row():
172
+ # Slider for setting the maximum number of new tokens
173
+ max_tokens = gr.Slider(label="Max new tokens", value=512, minimum=1, maximum=4096, step=1)
174
+ with gr.Row():
175
+ # Slider for adjusting the temperature
176
+ temperature = gr.Slider(label="Temperature", value=0.7, minimum=0.1, maximum=4.0, step=0.1)
177
+ with gr.Row():
178
+ # Slider for adjusting the top-p (nucleus) sampling
179
+ top_p = gr.Slider(label="Top-P", value=0.95, minimum=0.1, maximum=1.0, step=0.05)
180
+ with gr.Row():
181
+ # Slider for adjusting the frequency penalty
182
+ frequency_penalty = gr.Slider(label="Frequency Penalty", value=0.0, minimum=-2.0, maximum=2.0, step=0.1)
183
+ with gr.Row():
184
+ # Slider for setting the seed for reproducibility
185
+ seed = gr.Slider(label="Seed", value=-1, minimum=-1, maximum=65535, step=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ # Tab to provide information to the user
188
  with gr.Tab("Information"):
189
+ with gr.Row():
190
+ # Display a sample prompt for guidance
191
+ gr.Textbox(label="Sample prompt", value="{prompt} | ultra detail, ultra elaboration, ultra quality, perfect.")
192
+
193
+ # Accordion displaying featured models
194
+ with gr.Accordion("Featured Models (WiP)", open=False):
195
  gr.HTML(
196
  """
197
+ <p><a href="https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending">See all available models</a></p>
198
+ <table style="width:100%; text-align:center; margin:auto;">
199
+ <tr>
200
+ <th>Model Name</th>
201
+ <th>Typography</th>
202
+ <th>Notes</th>
203
+ </tr>
204
+ <tr>
205
+ <td>meta-llama/Llama-3.3-70B-Instruct</td>
206
+ <td>✅</td>
207
+ <td></td>
208
+ </tr>
209
+ <tr>
210
+ <td>meta-llama/Llama-3.3-30B-Instruct</td>
211
+ <td>✅</td>
212
+ <td></td>
213
+ </tr>
214
+ <tr>
215
+ <td>meta-llama/Llama-3.3-13B-Instruct</td>
216
+ <td>✅</td>
217
+ <td></td>
218
+ </tr>
219
+ <tr>
220
+ <td>meta-llama/Llama-3.3-7B-Instruct</td>
221
+ <td>✅</td>
222
+ <td></td>
223
+ </tr>
224
+ </table>
225
+ """
226
  )
227
 
228
+ # Accordion providing an overview of advanced settings
229
  with gr.Accordion("Parameters Overview", open=False):
230
  gr.Markdown(
231
+ """
232
+ ## System Message
233
+ ###### This box is for setting the system prompt, which guides the AI's behavior and context.
 
 
 
 
 
234
 
235
+ ## Max New Tokens
236
+ ###### This slider allows you to specify the maximum number of tokens (words or parts of words) the AI will generate in response to your prompt. The default value is 512.
237
 
238
+ ## Temperature
239
+ ###### Temperature controls the randomness of the AI's output. A higher temperature makes the output more random and creative, while a lower temperature makes it more predictable and focused.
240
 
241
+ ## Top-P (Nucleus Sampling)
242
+ ###### Top-P sampling is a technique that selects the smallest set of top tokens whose cumulative probability exceeds a threshold (p). This helps in generating more coherent and relevant responses.
243
 
244
+ ## Frequency Penalty
245
+ ###### This parameter penalizes repeated tokens in the output, encouraging the AI to generate more diverse responses. A higher value means more penalty for repetition.
246
 
247
+ ## Seed
248
+ ###### The seed is a value that ensures reproducibility. If you set a specific seed, the AI will generate the same output for the same input. Setting it to -1 means the seed will be random.
249
 
250
+ ### Remember, these settings are all about giving you control over the text generation process. Feel free to experiment and see what each one does. And if you're ever in doubt, the default settings are a great place to start. Happy creating!
251
+ """
 
252
  )
253
 
254
+ # Row containing the 'Run' button to trigger the text generation
255
+ with gr.Row():
256
+ text_button = gr.Button("Run", variant='primary', elem_id="gen-button")
257
+ # Row for displaying the generated text output
258
+ with gr.Row():
259
+ text_output = gr.Textbox(label="Text Output", elem_id="text-output")
260
+
261
+ # Set up button click event to call the respond function
262
+ text_button.click(respond, inputs=[text_prompt, chatbot, gr.Textbox(value="", label="System message"), max_tokens, temperature, top_p, frequency_penalty, seed, model], outputs=text_output)
263
 
264
+ print("Launching Gradio interface...") # Debug log
265
+ # Launch the Gradio interface without showing the API or sharing externally
266
+ textgen.launch(show_api=False, share=False)