Nymbo commited on
Commit
77298b9
·
verified ·
1 Parent(s): d3123eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +241 -215
app.py CHANGED
@@ -1,6 +1,10 @@
1
  import gradio as gr
2
- from openai import OpenAI
3
  import os
 
 
 
 
 
4
 
5
  # Retrieve the access token from the environment variable
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
@@ -13,291 +17,313 @@ client = OpenAI(
13
  )
14
  print("OpenAI client initialized.")
15
 
 
16
  def respond(
17
- message,
18
- history: list[tuple[str, str]],
19
  system_message,
20
  max_tokens,
21
  temperature,
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
- custom_model,
26
- selected_featured_model
27
  ):
28
  """
29
  This function handles the chatbot response. It takes in:
30
- - message: the user's new message
31
- - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
32
  - system_message: the system prompt
33
  - max_tokens: the maximum number of tokens to generate in the response
34
  - temperature: sampling temperature
35
  - top_p: top-p (nucleus) sampling
36
  - frequency_penalty: penalize repeated tokens in the output
37
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
38
- - custom_model: the user-provided custom model name (if any)
39
- - selected_featured_model: the model selected from featured models
40
  """
41
 
42
- print(f"Received message: {message}")
43
- print(f"History: {history}")
44
  print(f"System message: {system_message}")
45
- print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
46
- print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
47
- print(f"Custom model: {custom_model}")
48
- print(f"Selected featured model: {selected_featured_model}")
49
 
50
  # Convert seed to None if -1 (meaning random)
51
  if seed == -1:
52
  seed = None
53
 
54
- # Determine which model to use: either custom_model or selected featured model
55
- if custom_model.strip() != "":
 
 
 
56
  model_to_use = custom_model.strip()
57
- print(f"Using Custom Model: {model_to_use}")
 
58
  else:
59
- model_to_use = selected_featured_model
60
- print(f"Using Featured Model: {model_to_use}")
61
 
62
- # Construct the messages array required by the API
 
 
63
  messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
64
 
65
- # Add conversation history to the context
66
- for val in history:
67
- user_part = val[0]
68
- assistant_part = val[1]
69
- if user_part:
70
- messages.append({"role": "user", "content": user_part})
71
- print(f"Added user message to context: {user_part}")
72
- if assistant_part:
73
- messages.append({"role": "assistant", "content": assistant_part})
74
- print(f"Added assistant message to context: {assistant_part}")
75
-
76
- # Append the latest user message
77
- messages.append({"role": "user", "content": message})
78
-
79
- # Start with an empty string to build the response as tokens stream in
80
- response = ""
81
- print("Sending request to OpenAI API.")
82
-
83
- try:
84
- # Make the streaming request to the HF Inference API via openai-like client
85
- for message_chunk in client.chat.completions.create(
86
- model=model_to_use, # Use either the user-provided custom model or selected featured model
87
- max_tokens=max_tokens,
88
- stream=True, # Stream the response
89
- temperature=temperature,
90
- top_p=top_p,
91
- frequency_penalty=frequency_penalty,
92
- seed=seed,
93
- messages=messages,
94
- ):
95
- # Extract the token text from the response chunk
96
- token_text = message_chunk.choices[0].delta.content
97
- print(f"Received token: {token_text}")
98
- response += token_text
99
- # Yield the partial response to Gradio so it can display in real-time
100
- yield response
101
- except Exception as e:
102
- print(f"Error during API call: {e}")
103
- yield f"An error occurred: {e}"
104
 
105
  print("Completed response generation.")
106
 
107
- # Create a Chatbot component with a specified height
108
- chatbot = gr.Chatbot(height=600)
109
- print("Chatbot interface created.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
- # Placeholder featured models list
112
- FEATURED_MODELS_LIST = [
113
- "meta-llama/Llama-3.1-8B-Instruct",
114
- "microsoft/Phi-3.5-mini-instruct",
115
- "mistralai/Mistral-7B-Instruct-v0.3",
116
- "Qwen/Qwen2.5-72B-Instruct",
117
  ]
118
 
119
- # Define the Gradio Blocks interface
 
 
 
 
 
 
 
 
 
 
 
 
120
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
121
- gr.Markdown("# Serverless-TextGen-Hub 📝🤖")
122
  gr.Markdown(
123
  """
124
- Welcome to the **Serverless-TextGen-Hub**! Chat with your favorite models seamlessly.
 
 
 
 
125
  """
126
  )
127
-
128
- with gr.Row():
129
- # Chatbot component
130
- chatbot_component = gr.Chatbot(height=600)
131
 
132
- with gr.Row():
133
- # System message input
 
 
 
134
  system_message = gr.Textbox(
135
- value="You are a helpful assistant.",
136
  label="System Message",
137
- placeholder="Enter system message here...",
138
  lines=2,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  )
140
 
141
- with gr.Row():
142
- # User message input
143
- user_message = gr.Textbox(
144
- label="Your Message",
145
- placeholder="Type your message here...",
146
- lines=2,
 
 
 
 
 
 
 
 
 
 
 
147
  )
148
- # Run button
149
- run_button = gr.Button("Send", variant="primary")
150
 
151
- with gr.Row():
152
- # Additional settings
153
- with gr.Column(scale=1):
154
- max_tokens = gr.Slider(
155
- minimum=1,
156
- maximum=4096,
157
- value=512,
158
- step=1,
159
- label="Max New Tokens",
160
- )
161
- temperature = gr.Slider(
162
- minimum=0.1,
163
- maximum=4.0,
164
- value=0.7,
165
- step=0.1,
166
- label="Temperature",
167
- )
168
- top_p = gr.Slider(
169
- minimum=0.1,
170
- maximum=1.0,
171
- value=0.95,
172
- step=0.05,
173
- label="Top-P",
174
- )
175
- frequency_penalty = gr.Slider(
176
- minimum=-2.0,
177
- maximum=2.0,
178
- value=0.0,
179
- step=0.1,
180
- label="Frequency Penalty",
181
- )
182
- seed = gr.Slider(
183
- minimum=-1,
184
- maximum=65535, # Arbitrary upper limit for demonstration
185
- value=-1,
186
- step=1,
187
- label="Seed (-1 for random)",
188
- )
189
- custom_model = gr.Textbox(
190
- value="",
191
- label="Custom Model",
192
- info="(Optional) Provide a custom Hugging Face model path. This will override the selected featured model if not empty.",
193
- placeholder="e.g., meta-llama/Llama-3.3-70B-Instruct",
194
- )
195
-
196
- with gr.Accordion("Featured Models", open=True):
197
- with gr.Column():
198
- model_search = gr.Textbox(
199
- label="Filter Models",
200
- placeholder="Search for a featured model...",
201
- lines=1,
202
- )
203
- featured_model = gr.Radio(
204
- label="Select a model below",
205
- value=FEATURED_MODELS_LIST[0],
206
- choices=FEATURED_MODELS_LIST,
207
- interactive=True,
208
- )
209
-
210
- # Function to filter featured models based on search input
211
- def filter_featured_models(search_term):
212
- if not search_term:
213
- return gr.update(choices=FEATURED_MODELS_LIST, value=FEATURED_MODELS_LIST[0])
214
- filtered = [model for model in FEATURED_MODELS_LIST if search_term.lower() in model.lower()]
215
- if not filtered:
216
- return gr.update(choices=[], value=None)
217
- return gr.update(choices=filtered, value=filtered[0])
218
-
219
- # Update featured_model choices based on search
220
- model_search.change(
221
- fn=filter_featured_models,
222
- inputs=model_search,
223
- outputs=featured_model,
224
  )
225
 
226
- # Function to handle the chatbot response
227
- def handle_response(message, history, system_msg, max_tok, temp, tp, freq_pen, sd, custom_mod, selected_feat_mod):
228
- # Append user message to history
229
- history = history or []
230
- history.append((message, None))
231
- # Generate response using the respond function
232
- response = respond(
233
- message=message,
234
- history=history,
235
- system_message=system_msg,
236
- max_tokens=max_tok,
237
- temperature=temp,
238
- top_p=tp,
239
- frequency_penalty=freq_pen,
240
- seed=sd,
241
- custom_model=custom_mod,
242
- selected_featured_model=selected_feat_mod,
243
  )
244
- return response, history + [(message, response)]
 
 
 
 
 
 
 
245
 
246
- # Handle button click
247
- run_button.click(
248
- fn=handle_response,
 
249
  inputs=[
250
- user_message,
251
- chatbot_component, # history
252
  system_message,
253
  max_tokens,
254
  temperature,
255
  top_p,
256
  frequency_penalty,
257
  seed,
258
- custom_model,
259
- featured_model,
260
- ],
261
- outputs=[
262
- chatbot_component,
263
- chatbot_component, # Updated history
264
  ],
 
 
 
265
  )
266
 
267
- # Allow pressing Enter to send the message
268
- user_message.submit(
269
- fn=handle_response,
 
 
 
 
 
270
  inputs=[
271
- user_message,
272
- chatbot_component, # history
273
  system_message,
274
  max_tokens,
275
  temperature,
276
  top_p,
277
  frequency_penalty,
278
  seed,
279
- custom_model,
280
- featured_model,
281
- ],
282
- outputs=[
283
- chatbot_component,
284
- chatbot_component, # Updated history
285
  ],
 
 
286
  )
287
 
288
- # Custom CSS to enhance the UI
289
- demo.load(lambda: None, None, None, _js="""
290
- () => {
291
- const style = document.createElement('style');
292
- style.innerHTML = `
293
- footer {visibility: hidden !important;}
294
- .gradio-container {background-color: #f9f9f9;}
295
- `;
296
- document.head.appendChild(style);
297
- }
298
  """)
299
 
300
- print("Launching Gradio interface...") # Debug log
301
-
302
- # Launch the Gradio interface without showing the API or sharing externally
303
- demo.launch(show_api=False, share=False)
 
1
  import gradio as gr
 
2
  import os
3
+ from openai import OpenAI
4
+
5
+ ################################################
6
+ # INITIAL SETUP
7
+ ################################################
8
 
9
  # Retrieve the access token from the environment variable
10
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
 
17
  )
18
  print("OpenAI client initialized.")
19
 
20
+ # Our main response-generating function
21
  def respond(
22
+ user_message,
23
+ history,
24
  system_message,
25
  max_tokens,
26
  temperature,
27
  top_p,
28
  frequency_penalty,
29
  seed,
30
+ featured_model,
31
+ custom_model
32
  ):
33
  """
34
  This function handles the chatbot response. It takes in:
35
+ - user_message: the user's new message
36
+ - history: the list of previous messages, each as [user_text, assistant_text]
37
  - system_message: the system prompt
38
  - max_tokens: the maximum number of tokens to generate in the response
39
  - temperature: sampling temperature
40
  - top_p: top-p (nucleus) sampling
41
  - frequency_penalty: penalize repeated tokens in the output
42
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
43
+ - featured_model: the user-chosen model from the radio button
44
+ - custom_model: a user-specified custom model that overrides featured_model if not empty
45
  """
46
 
47
+ print(f"New user message: {user_message}")
48
+ print(f"History so far: {history}")
49
  print(f"System message: {system_message}")
50
+ print(f"max_tokens: {max_tokens}, temperature: {temperature}, top_p: {top_p}")
51
+ print(f"frequency_penalty: {frequency_penalty}, seed: {seed}")
52
+ print(f"Featured Model: {featured_model}")
53
+ print(f"Custom Model: {custom_model}")
54
 
55
  # Convert seed to None if -1 (meaning random)
56
  if seed == -1:
57
  seed = None
58
 
59
+ # Determine which model to use
60
+ # If the user typed something in custom_model, that overrides the featured model
61
+ # Otherwise we use the model selected in the radio. If neither, default to the example "meta-llama..."
62
+ model_to_use = None
63
+ if custom_model.strip():
64
  model_to_use = custom_model.strip()
65
+ elif featured_model is not None and featured_model.strip():
66
+ model_to_use = featured_model.strip()
67
  else:
68
+ model_to_use = "meta-llama/Llama-3.3-70B-Instruct"
 
69
 
70
+ print(f"Model selected for inference: {model_to_use}")
71
+
72
+ # Construct the conversation messages for the HF Inference API
73
  messages = [{"role": "system", "content": system_message}]
74
+ for user_text, assistant_text in history:
75
+ if user_text:
76
+ messages.append({"role": "user", "content": user_text})
77
+ if assistant_text:
78
+ messages.append({"role": "assistant", "content": assistant_text})
79
+ messages.append({"role": "user", "content": user_message})
80
+
81
+ # We'll collect and stream the response
82
+ response_so_far = ""
83
 
84
+ # Make the streaming request to the HF Inference API
85
+ print("Sending request to OpenAI/Hugging Face Inference API...")
86
+ for message_chunk in client.chat.completions.create(
87
+ model=model_to_use,
88
+ max_tokens=max_tokens,
89
+ stream=True,
90
+ temperature=temperature,
91
+ top_p=top_p,
92
+ frequency_penalty=frequency_penalty,
93
+ seed=seed,
94
+ messages=messages,
95
+ ):
96
+ # The content for the partial chunk
97
+ token_text = message_chunk.choices[0].delta.content
98
+ response_so_far += token_text
99
+ # Return partial response to Gradio to display in real-time
100
+ yield response_so_far
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  print("Completed response generation.")
103
 
104
+ ################################################
105
+ # GRADIO UI + STATE MANAGEMENT
106
+ ################################################
107
+
108
+ def user_submit(user_message, history):
109
+ """
110
+ This function is called when the user sends a message.
111
+ We simply add the user message to the conversation history.
112
+ """
113
+ print("user_submit triggered.")
114
+ # Append the new user message to history
115
+ if not history:
116
+ history = []
117
+ history = history + [[user_message, None]]
118
+ return history, ""
119
+
120
+ def bot_reply(history, system_message, max_tokens, temperature, top_p,
121
+ frequency_penalty, seed, featured_model, custom_model):
122
+ """
123
+ This function is triggered to produce the bot's response after the user has submitted.
124
+ We call 'respond' for streaming text.
125
+ """
126
+ print("bot_reply triggered.")
127
+
128
+ # The last conversation item has user_message, None
129
+ user_message = history[-1][0]
130
+
131
+ # We will stream the partial responses from 'respond'
132
+ bot_response = respond(
133
+ user_message=user_message,
134
+ history=history[:-1], # all items except the last, because we pass the last user msg separately
135
+ system_message=system_message,
136
+ max_tokens=max_tokens,
137
+ temperature=temperature,
138
+ top_p=top_p,
139
+ frequency_penalty=frequency_penalty,
140
+ seed=seed,
141
+ featured_model=featured_model,
142
+ custom_model=custom_model
143
+ )
144
+
145
+ # As we yield from the generator, we update the last item in history with the partial response
146
+ # Gradio streaming logic: yield the partial updates as they come in
147
+ for partial_text in bot_response:
148
+ history[-1][1] = partial_text
149
+ yield history
150
 
151
+ # We define a small list of placeholder featured models for demonstration
152
+ models_list = [
153
+ "meta-llama/Llama-2-13B-Chat-hf",
154
+ "bigscience/bloom",
155
+ "EleutherAI/gpt-neo-2.7B",
156
+ "meta-llama/Llama-3.3-70B-Instruct"
157
  ]
158
 
159
+ def filter_models(search_term):
160
+ """
161
+ Filter function triggered when user types in the model_search box.
162
+ Returns an updated list of models that contain the search term.
163
+ """
164
+ filtered = [m for m in models_list if search_term.lower() in m.lower()]
165
+ return gr.update(choices=filtered)
166
+
167
+
168
+ ################################################
169
+ # BUILDING THE GRADIO LAYOUT
170
+ ################################################
171
+
172
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
 
173
  gr.Markdown(
174
  """
175
+ # Serverless-TextGen-Hub
176
+ **A UI for text generation using Hugging Face's Inference API.**
177
+
178
+ Below is a simple chat interface. You can pick from **Featured Models** or specify a **Custom Model**
179
+ to override the choice. If you're not sure, just use the default.
180
  """
181
  )
 
 
 
 
182
 
183
+ # State to hold the conversation history, will be a list of [user, bot]
184
+ conversation_state = gr.State([])
185
+
186
+ # Row for system message + advanced settings
187
+ with gr.Accordion("Advanced Settings", open=False):
188
  system_message = gr.Textbox(
 
189
  label="System Message",
190
+ value="You are a helpful assistant.",
191
  lines=2,
192
+ info="Provides background or personality instructions to the model."
193
+ )
194
+ max_tokens = gr.Slider(
195
+ minimum=1,
196
+ maximum=4096,
197
+ value=512,
198
+ step=1,
199
+ label="Max new tokens"
200
+ )
201
+ temperature = gr.Slider(
202
+ minimum=0.1,
203
+ maximum=4.0,
204
+ value=0.7,
205
+ step=0.1,
206
+ label="Temperature"
207
+ )
208
+ top_p = gr.Slider(
209
+ minimum=0.1,
210
+ maximum=1.0,
211
+ value=0.95,
212
+ step=0.05,
213
+ label="Top-P"
214
+ )
215
+ frequency_penalty = gr.Slider(
216
+ minimum=-2.0,
217
+ maximum=2.0,
218
+ value=0.0,
219
+ step=0.1,
220
+ label="Frequency Penalty"
221
+ )
222
+ seed = gr.Slider(
223
+ minimum=-1,
224
+ maximum=65535,
225
+ value=-1,
226
+ step=1,
227
+ label="Seed (-1 for random)"
228
  )
229
 
230
+ # Featured Models + filtering
231
+ with gr.Accordion("Featured Models", open=False):
232
+ model_search = gr.Textbox(
233
+ label="Filter Models",
234
+ placeholder="Search for a featured model...",
235
+ lines=1
236
+ )
237
+ featured_model_radio = gr.Radio(
238
+ label="Select a featured model below",
239
+ choices=models_list,
240
+ value=models_list[0], # default selection
241
+ interactive=True
242
+ )
243
+ model_search.change(
244
+ filter_models,
245
+ inputs=model_search,
246
+ outputs=featured_model_radio
247
  )
 
 
248
 
249
+ # This is the Custom Model box (overrides Featured Models if not empty)
250
+ custom_model = gr.Textbox(
251
+ label="Custom Model",
252
+ value="",
253
+ info="(Optional) Provide a custom HF model path. If not empty, it overrides the Featured Model."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  )
255
 
256
+ # The main Chatbot interface
257
+ chatbot = gr.Chatbot(height=600)
258
+
259
+ # Textbox for the user to type a new message
260
+ with gr.Row():
261
+ user_input = gr.Textbox(
262
+ show_label=False,
263
+ placeholder="Type your message here (press enter or click 'Submit')",
264
+ lines=2
 
 
 
 
 
 
 
 
265
  )
266
+ submit_btn = gr.Button("Submit", variant="primary")
267
+
268
+ # The user submits -> we update the conversation state
269
+ submit_btn.click(
270
+ fn=user_submit,
271
+ inputs=[user_input, conversation_state],
272
+ outputs=[conversation_state, user_input],
273
+ )
274
 
275
+ # Then the bot replies, streaming the output
276
+ # We pass all required arguments from the advanced settings, plus the model selection boxes
277
+ submit_btn.click(
278
+ fn=bot_reply,
279
  inputs=[
280
+ conversation_state,
 
281
  system_message,
282
  max_tokens,
283
  temperature,
284
  top_p,
285
  frequency_penalty,
286
  seed,
287
+ featured_model_radio,
288
+ custom_model
 
 
 
 
289
  ],
290
+ outputs=[chatbot],
291
+ # 'bot_reply' is a generator, so we set streaming=True:
292
+ queue=True
293
  )
294
 
295
+ # We also allow pressing Enter in user_input to do the same thing
296
+ user_input.submit(
297
+ fn=user_submit,
298
+ inputs=[user_input, conversation_state],
299
+ outputs=[conversation_state, user_input],
300
+ )
301
+ user_input.submit(
302
+ fn=bot_reply,
303
  inputs=[
304
+ conversation_state,
 
305
  system_message,
306
  max_tokens,
307
  temperature,
308
  top_p,
309
  frequency_penalty,
310
  seed,
311
+ featured_model_radio,
312
+ custom_model
 
 
 
 
313
  ],
314
+ outputs=[chatbot],
315
+ queue=True
316
  )
317
 
318
+ gr.HTML("""
319
+ <br>
320
+ <p style='text-align:center;'>
321
+ Developed by <strong>Nymbo</strong>.
322
+ Powered by <strong>Hugging Face Inference API</strong>.
323
+ </p>
 
 
 
 
324
  """)
325
 
326
+ # Finally, launch the app
327
+ if __name__ == "__main__":
328
+ print("Launching the Serverless-TextGen-Hub application...")
329
+ demo.launch()