Nymbo commited on
Commit
10ffb1d
·
verified ·
1 Parent(s): 8696822

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -191
app.py CHANGED
@@ -2,10 +2,6 @@ import gradio as gr
2
  from openai import OpenAI
3
  import os
4
 
5
- # =============================
6
- # GLOBAL SETUP / CLIENT
7
- # =============================
8
-
9
  # Retrieve the access token from the environment variable
10
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
11
  print("Access token loaded.")
@@ -17,25 +13,23 @@ client = OpenAI(
17
  )
18
  print("OpenAI client initialized.")
19
 
20
- # =============================
21
- # MODEL CONFIG / LOGIC
22
- # =============================
23
-
24
- # Sample placeholder list of "featured" models for demonstration
25
- featured_models_list = [
26
- "meta-llama/Llama-2-13B-chat-hf",
27
- "bigscience/bloom",
28
- "microsoft/DialoGPT-large",
29
- "OpenAssistant/oasst-sft-1-pythia-12b",
30
- "tiiuae/falcon-7b-instruct",
31
- "meta-llama/Llama-3.3-70B-Instruct"
32
  ]
33
 
34
- def filter_featured_models(search_term: str):
35
  """
36
- Returns a list of models that contain the search term (case-insensitive).
 
 
37
  """
38
- filtered = [m for m in featured_models_list if search_term.lower() in m.lower()]
39
  return gr.update(choices=filtered)
40
 
41
 
@@ -49,31 +43,36 @@ def respond(
49
  frequency_penalty,
50
  seed,
51
  custom_model,
52
- selected_featured_model
53
  ):
54
  """
55
  This function handles the chatbot response. It takes in:
56
  - message: the user's new message
57
  - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
58
  - system_message: the system prompt
59
- - max_tokens, temperature, top_p, frequency_penalty, seed: generation params
60
- - custom_model: user-provided custom model path/name
61
- - selected_featured_model: model chosen from the featured radio list
 
 
 
 
62
  """
 
63
  print(f"Received message: {message}")
64
  print(f"History: {history}")
65
  print(f"System message: {system_message}")
66
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
67
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
68
  print(f"Custom model: {custom_model}")
69
- print(f"Selected featured model: {selected_featured_model}")
70
 
71
  # Convert seed to None if -1 (meaning random)
72
  if seed == -1:
73
  seed = None
74
 
75
  # Construct the messages array required by the API
76
- messages = [{"role": "system", "content": system_message}] if system_message.strip() else []
77
 
78
  # Add conversation history to the context
79
  for val in history:
@@ -89,19 +88,20 @@ def respond(
89
  # Append the latest user message
90
  messages.append({"role": "user", "content": message})
91
 
92
- # Determine which model to use:
93
- # 1) If custom_model is non-empty, it overrides everything.
94
- # 2) Otherwise, use the selected featured model from the radio button if available.
95
- # 3) If both are empty, fall back to the default.
96
- model_to_use = "meta-llama/Llama-3.3-70B-Instruct" # Default
97
  if custom_model.strip() != "":
98
  model_to_use = custom_model.strip()
99
- elif selected_featured_model.strip() != "":
100
- model_to_use = selected_featured_model.strip()
 
 
101
 
102
  print(f"Model selected for inference: {model_to_use}")
103
 
104
- # Start building the streaming response
105
  response = ""
106
  print("Sending request to OpenAI API.")
107
 
@@ -109,7 +109,7 @@ def respond(
109
  for message_chunk in client.chat.completions.create(
110
  model=model_to_use,
111
  max_tokens=max_tokens,
112
- stream=True, # Stream the response
113
  temperature=temperature,
114
  top_p=top_p,
115
  frequency_penalty=frequency_penalty,
@@ -118,168 +118,141 @@ def respond(
118
  ):
119
  # Extract the token text from the response chunk
120
  token_text = message_chunk.choices[0].delta.content
121
- print(f"Received token: {token_text}", flush=True)
122
  response += token_text
123
  # Yield the partial response to Gradio so it can display in real-time
124
  yield response
125
 
126
  print("Completed response generation.")
127
 
128
- # =============================
129
- # MAIN UI
130
- # =============================
131
 
132
- def build_app():
133
- """
134
- Build the Gradio Blocks interface containing:
135
- - A Chat tab (ChatInterface)
136
- - A Featured Models tab
137
- - An Information tab
138
- """
139
- with gr.Blocks(theme="Nymbo/Nymbo_Theme") as main_interface:
140
-
141
- # We define a Gr.State to hold the user's chosen featured model
142
- selected_featured_model_state = gr.State("")
143
-
144
- with gr.Tab("Chat Interface"):
145
- gr.Markdown("## Serverless-TextGen-Hub")
146
-
147
- # Here we embed the ChatInterface for streaming conversation
148
- # We add extra inputs for "Selected Featured Model" as hidden,
149
- # so the user can't directly edit but it flows into respond().
150
- demo = gr.ChatInterface(
151
- fn=respond,
152
- additional_inputs=[
153
- gr.Textbox(value="", label="System message", lines=2),
154
- gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
155
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
156
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
157
- gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty"),
158
- gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)"),
159
- gr.Textbox(value="", label="Custom Model", info="(Optional) Provide a custom HF model path"),
160
- gr.Textbox(value="", label="Selected Featured Model (from tab)", visible=False),
161
- ],
162
- fill_height=True,
163
- chatbot=gr.Chatbot(height=600),
164
- theme="Nymbo/Nymbo_Theme",
165
- )
166
-
167
- # We want to connect the selected_featured_model_state to that hidden text box
168
- def set_featured_model_in_chatbox(val):
169
- return val
170
-
171
- # Whenever the selected_featured_model_state changes, update the hidden field in the ChatInterface
172
- selected_featured_model_state.change(
173
- fn=set_featured_model_in_chatbox,
174
- inputs=selected_featured_model_state,
175
- outputs=demo.additional_inputs[-1], # The last additional input is the "Selected Featured Model"
176
- )
177
-
178
- # ==========================
179
- # Featured Models Tab
180
- # ==========================
181
- with gr.Tab("Featured Models"):
182
- gr.Markdown("### Choose from our Featured Models")
183
-
184
- # A text box for searching/filtering
185
- model_search = gr.Textbox(
186
- label="Filter Models",
187
- placeholder="Search for a featured model..."
188
- )
189
-
190
- # A radio component listing the featured models (default to first)
191
- model_radio = gr.Radio(
192
- choices=featured_models_list,
193
- label="Select a model below",
194
- value=featured_models_list[0],
195
- interactive=True
196
- )
197
-
198
- # Define how to update the radio choices when the search box changes
199
- model_search.change(
200
- fn=filter_featured_models,
201
- inputs=model_search,
202
- outputs=model_radio
203
- )
204
-
205
- # Button to confirm the selection
206
- def select_featured_model(radio_val):
207
- """
208
- Updates the hidden state with the user-chosen featured model.
209
- """
210
- return radio_val
211
-
212
- choose_btn = gr.Button("Use this Featured Model", variant="primary")
213
-
214
- choose_btn.click(
215
- fn=select_featured_model,
216
- inputs=model_radio,
217
- outputs=selected_featured_model_state
218
- )
219
-
220
- gr.Markdown(
221
- """
222
- **Tip**: If you type a Custom Model in the "Chat Interface" tab, it overrides the
223
- featured model you selected here.
224
- """
225
- )
226
-
227
- # ==========================
228
- # Information Tab
229
- # ==========================
230
- with gr.Tab("Information"):
231
- gr.Markdown("## Learn More About These Models and Parameters")
232
-
233
- with gr.Accordion("Featured Models (Table)", open=False):
234
- gr.HTML(
235
- """
236
- <p>Below is a small sample table showing some featured models.</p>
237
- <table style="width:100%; text-align:center; margin:auto;">
238
- <tr>
239
- <th>Model Name</th>
240
- <th>Type</th>
241
- <th>Notes</th>
242
- </tr>
243
- <tr>
244
- <td>meta-llama/Llama-2-13B-chat-hf</td>
245
- <td>Chat</td>
246
- <td>Good for multi-turn dialogue.</td>
247
- </tr>
248
- <tr>
249
- <td>bigscience/bloom</td>
250
- <td>Language Model</td>
251
- <td>Large multilingual model.</td>
252
- </tr>
253
- <tr>
254
- <td>microsoft/DialoGPT-large</td>
255
- <td>Chat</td>
256
- <td>Well-known smaller chat model.</td>
257
- </tr>
258
- </table>
259
- """
260
- )
261
-
262
- with gr.Accordion("Parameters Overview", open=False):
263
- gr.Markdown(
264
- """
265
- ### Explanation of Key Parameters
266
-
267
- - **System Message**: Provides context or initial instructions to the model.
268
- - **Max Tokens**: The maximum number of tokens (roughly pieces of words) in the generated response.
269
- - **Temperature**: Higher values produce more random/creative outputs, while lower values make the output more focused and deterministic.
270
- - **Top-P**: Controls nucleus sampling. The model considers only the tokens whose probability mass exceeds this value.
271
- - **Frequency Penalty**: Penalizes repeated tokens. Positive values (like 1.0) reduce repetition in the output. Negative values can increase repetition.
272
- - **Seed**: Determines reproducibility. Set it to a fixed integer for consistent results; `-1` is random each time.
273
- - **Custom Model**: Overwrites the featured model. Provide the Hugging Face path (e.g., `openai/whisper-base`) for your own usage.
274
-
275
- Use these settings to guide how the model generates text. If in doubt, stick to defaults and experiment in small increments.
276
- """
277
- )
278
-
279
- return main_interface
280
-
281
- # If run as a standalone script, just launch.
282
  if __name__ == "__main__":
283
- print("Building and launching the Serverless-TextGen-Hub interface...")
284
- ui = build_app()
285
- ui.launch()
 
2
  from openai import OpenAI
3
  import os
4
 
 
 
 
 
5
  # Retrieve the access token from the environment variable
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
7
  print("Access token loaded.")
 
13
  )
14
  print("OpenAI client initialized.")
15
 
16
+ # We'll define a list of placeholder featured models for demonstration.
17
+ # In real usage, replace them with actual model names available on Hugging Face.
18
+ models_list = [
19
+ "PlaceholderModel1",
20
+ "PlaceholderModel2",
21
+ "PlaceholderModel3",
22
+ "PlaceholderModel4",
23
+ "PlaceholderModel5"
 
 
 
 
24
  ]
25
 
26
+ def filter_featured_models(search_term):
27
  """
28
+ Filters the 'models_list' based on text entered in the search box.
29
+ Returns a gr.update object that changes the choices available
30
+ in the 'featured_models_radio'.
31
  """
32
+ filtered = [m for m in models_list if search_term.lower() in m.lower()]
33
  return gr.update(choices=filtered)
34
 
35
 
 
43
  frequency_penalty,
44
  seed,
45
  custom_model,
46
+ selected_model
47
  ):
48
  """
49
  This function handles the chatbot response. It takes in:
50
  - message: the user's new message
51
  - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
52
  - system_message: the system prompt
53
+ - max_tokens: the maximum number of tokens to generate in the response
54
+ - temperature: sampling temperature
55
+ - top_p: top-p (nucleus) sampling
56
+ - frequency_penalty: penalize repeated tokens in the output
57
+ - seed: a fixed seed for reproducibility; -1 will mean 'random'
58
+ - custom_model: a custom Hugging Face model name (if any)
59
+ - selected_model: a model name chosen from the featured models radio button
60
  """
61
+
62
  print(f"Received message: {message}")
63
  print(f"History: {history}")
64
  print(f"System message: {system_message}")
65
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
66
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
67
  print(f"Custom model: {custom_model}")
68
+ print(f"Selected featured model: {selected_model}")
69
 
70
  # Convert seed to None if -1 (meaning random)
71
  if seed == -1:
72
  seed = None
73
 
74
  # Construct the messages array required by the API
75
+ messages = [{"role": "system", "content": system_message}]
76
 
77
  # Add conversation history to the context
78
  for val in history:
 
88
  # Append the latest user message
89
  messages.append({"role": "user", "content": message})
90
 
91
+ # Decide which model to use:
92
+ # 1) If the user provided a custom model, use it.
93
+ # 2) Else if they chose a featured model, use it.
94
+ # 3) Otherwise, fall back to a default model.
 
95
  if custom_model.strip() != "":
96
  model_to_use = custom_model.strip()
97
+ elif selected_model is not None and selected_model.strip() != "":
98
+ model_to_use = selected_model.strip()
99
+ else:
100
+ model_to_use = "meta-llama/Llama-3.3-70B-Instruct" # Default fallback
101
 
102
  print(f"Model selected for inference: {model_to_use}")
103
 
104
+ # Start with an empty string to build the response as tokens stream in
105
  response = ""
106
  print("Sending request to OpenAI API.")
107
 
 
109
  for message_chunk in client.chat.completions.create(
110
  model=model_to_use,
111
  max_tokens=max_tokens,
112
+ stream=True,
113
  temperature=temperature,
114
  top_p=top_p,
115
  frequency_penalty=frequency_penalty,
 
118
  ):
119
  # Extract the token text from the response chunk
120
  token_text = message_chunk.choices[0].delta.content
121
+ print(f"Received token: {token_text}")
122
  response += token_text
123
  # Yield the partial response to Gradio so it can display in real-time
124
  yield response
125
 
126
  print("Completed response generation.")
127
 
 
 
 
128
 
129
+ ########################
130
+ # GRADIO APP LAYOUT
131
+ ########################
132
+
133
+ # We’ll build a custom Blocks layout so we can have:
134
+ # - A Featured Models accordion with a search box
135
+ # - Our ChatInterface to handle the conversation
136
+ # - Additional sliders and textboxes for settings (like the original code)
137
+ ########################
138
+
139
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
140
+ gr.Markdown("## Serverless Text Generation Hub")
141
+ gr.Markdown(
142
+ "An all-in-one UI for chatting with text-generation models on Hugging Face's Inference API."
143
+ )
144
+
145
+ # We keep a Chatbot component for the conversation display
146
+ chatbot = gr.Chatbot(height=600, label="Chat Preview")
147
+
148
+ # Textbox for system message
149
+ system_message_box = gr.Textbox(
150
+ value="",
151
+ label="System Message",
152
+ placeholder="Enter a system prompt if you want (optional).",
153
+ )
154
+
155
+ # Slider for max_tokens
156
+ max_tokens_slider = gr.Slider(
157
+ minimum=1,
158
+ maximum=4096,
159
+ value=512,
160
+ step=1,
161
+ label="Max new tokens",
162
+ )
163
+
164
+ # Slider for temperature
165
+ temperature_slider = gr.Slider(
166
+ minimum=0.1,
167
+ maximum=4.0,
168
+ value=0.7,
169
+ step=0.1,
170
+ label="Temperature",
171
+ )
172
+
173
+ # Slider for top_p
174
+ top_p_slider = gr.Slider(
175
+ minimum=0.1,
176
+ maximum=1.0,
177
+ value=0.95,
178
+ step=0.05,
179
+ label="Top-P",
180
+ )
181
+
182
+ # Slider for frequency penalty
183
+ freq_penalty_slider = gr.Slider(
184
+ minimum=-2.0,
185
+ maximum=2.0,
186
+ value=0.0,
187
+ step=0.1,
188
+ label="Frequency Penalty",
189
+ )
190
+
191
+ # Slider for seed
192
+ seed_slider = gr.Slider(
193
+ minimum=-1,
194
+ maximum=65535, # Arbitrary upper limit for demonstration
195
+ value=-1,
196
+ step=1,
197
+ label="Seed (-1 for random)",
198
+ )
199
+
200
+ # Custom Model textbox
201
+ custom_model_box = gr.Textbox(
202
+ value="",
203
+ label="Custom Model",
204
+ info="(Optional) Provide a custom Hugging Face model path. This will override the selected Featured Model if not empty."
205
+ )
206
+
207
+ # Accordion for featured models
208
+ with gr.Accordion("Featured Models", open=False):
209
+ # Textbox for filtering the featured models
210
+ model_search_box = gr.Textbox(
211
+ label="Filter Models",
212
+ placeholder="Search for a featured model...",
213
+ lines=1,
214
+ )
215
+ # Radio for selecting the desired model
216
+ featured_models_radio = gr.Radio(
217
+ label="Select a featured model below",
218
+ choices=models_list, # Start with the entire list
219
+ value=None, # No default
220
+ interactive=True
221
+ )
222
+
223
+ # We connect the model_search_box to the filter function
224
+ model_search_box.change(
225
+ filter_featured_models,
226
+ inputs=model_search_box,
227
+ outputs=featured_models_radio
228
+ )
229
+
230
+ # Now we create our ChatInterface
231
+ # We pass all the extra components as additional_inputs
232
+ interface = gr.ChatInterface(
233
+ fn=respond,
234
+ chatbot=chatbot,
235
+ additional_inputs=[
236
+ system_message_box,
237
+ max_tokens_slider,
238
+ temperature_slider,
239
+ top_p_slider,
240
+ freq_penalty_slider,
241
+ seed_slider,
242
+ custom_model_box,
243
+ featured_models_radio
244
+ ],
245
+ theme="Nymbo/Nymbo_Theme",
246
+ title="Serverless TextGen Hub with Featured Models",
247
+ description=(
248
+ "Use the sliders and textboxes to control generation parameters. "
249
+ "Pick a model from 'Featured Models' or specify a custom model path."
250
+ ),
251
+ # Fill the screen height
252
+ fill_height=True
253
+ )
254
+
255
+ # If you want the script to be directly executable, launch the demo here:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  if __name__ == "__main__":
257
+ print("Launching the demo application...")
258
+ demo.launch()