Nymbo commited on
Commit
27c8b8d
·
verified ·
1 Parent(s): 7de1759

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -266
app.py CHANGED
@@ -14,297 +14,145 @@ client = OpenAI(
14
  print("OpenAI client initialized.")
15
 
16
  def respond(
17
- user_message,
18
- chat_history,
19
- system_msg,
20
  max_tokens,
21
  temperature,
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
- featured_model,
26
- custom_model
27
  ):
28
  """
29
- This function handles the chatbot response. It takes in:
30
- - user_message: the user's newly typed message
31
- - chat_history: the list of (user, assistant) message pairs
32
- - system_msg: the system instruction or system-level context
33
- - max_tokens: the maximum number of tokens to generate
34
- - temperature: sampling temperature
35
- - top_p: top-p (nucleus) sampling
36
- - frequency_penalty: penalize repeated tokens in the output
37
- - seed: a fixed seed for reproducibility; -1 means 'random'
38
- - featured_model: the chosen model name from 'Featured Models' radio
39
- - custom_model: the optional custom model that overrides the featured one if provided
40
  """
41
-
42
- print(f"Received user message: {user_message}")
43
- print(f"System message: {system_msg}")
44
- print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}, Freq-Penalty: {frequency_penalty}, Seed: {seed}")
45
- print(f"Featured model: {featured_model}")
46
  print(f"Custom model: {custom_model}")
 
47
 
48
- # Convert the seed to None if user set it to -1 (meaning random)
49
  if seed == -1:
50
  seed = None
51
 
52
- # Decide which model to actually use
53
- # If custom_model is non-empty, use that; otherwise use the chosen featured_model
54
- model_to_use = custom_model.strip() if custom_model.strip() != "" else featured_model
55
- # Provide a default fallback if for some reason both are empty
56
- if model_to_use.strip() == "":
57
- model_to_use = "meta-llama/Llama-3.3-70B-Instruct"
58
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  print(f"Model selected for inference: {model_to_use}")
60
 
61
- # Construct the conversation history in the format required by HF's Inference API
62
- messages = []
63
- if system_msg.strip():
64
- messages.append({"role": "system", "content": system_msg.strip()})
65
-
66
- # Add the conversation history
67
- for user_text, assistant_text in chat_history:
68
- if user_text:
69
- messages.append({"role": "user", "content": user_text})
70
- if assistant_text:
71
- messages.append({"role": "assistant", "content": assistant_text})
72
-
73
- # Add the new user message to the conversation
74
- messages.append({"role": "user", "content": user_message})
75
-
76
- # We'll build the response token-by-token in a streaming loop
77
- response_so_far = ""
78
- print("Sending request to the Hugging Face Inference API...")
79
-
80
- # Make the streaming request to the HF Inference API
81
- try:
82
- for resp_chunk in client.chat.completions.create(
83
- model=model_to_use,
84
- max_tokens=max_tokens,
85
- stream=True,
86
- temperature=temperature,
87
- top_p=top_p,
88
- frequency_penalty=frequency_penalty,
89
- seed=seed,
90
- messages=messages,
91
- ):
92
- token_text = resp_chunk.choices[0].delta.content
93
- response_so_far += token_text
94
- # We yield back the updated message to display partial progress in the chatbot
95
- yield response_so_far
96
- except Exception as e:
97
- # If there's an error, let's at least show it in the chat
98
- error_text = f"[ERROR] {str(e)}"
99
- print(error_text)
100
- yield response_so_far + "\n\n" + error_text
101
 
102
  print("Completed response generation.")
103
 
104
- #
105
- # BUILDING THE GRADIO INTERFACE BELOW
106
- #
107
-
108
- # List of featured models; adjust or replace these placeholders with real text-generation models
109
  models_list = [
110
  "meta-llama/Llama-3.3-70B-Instruct",
111
- "meta-llama/Llama-2-13B-chat-hf",
112
- "bigscience/bloom",
113
- "openlm-research/open_llama_7b",
114
- "facebook/opt-6.7b",
115
- "google/flan-t5-xxl",
116
  ]
117
 
 
118
  def filter_models(search_term):
119
- """Filters the models_list by the given search_term and returns an update for the Radio component."""
120
- filtered = [m for m in models_list if search_term.lower() in m.lower()]
121
- return gr.update(choices=filtered)
122
-
123
- with gr.Blocks(theme="Nymbo/Nymbo_Theme_5") as demo:
124
- gr.Markdown("# Serverless-TextGen-Hub (Enhanced)")
125
- gr.Markdown("**A comprehensive UI for text generation with a featured-models dropdown and a custom override**.")
126
-
127
- # We keep track of the conversation in a Gradio state variable (list of tuples)
128
- chat_history = gr.State([])
129
-
130
- # Tabs for organization
131
- with gr.Tab("Basic Settings"):
132
- with gr.Row():
133
- with gr.Column(elem_id="prompt-container"):
134
- # System Message
135
- system_msg = gr.Textbox(
136
- label="System message",
137
- placeholder="Enter system-level instructions or context here.",
138
- lines=2
139
- )
140
- # Accordion for featured models
141
- with gr.Accordion("Featured Models", open=True):
142
- model_search = gr.Textbox(
143
- label="Filter Models",
144
- placeholder="Search for a featured model...",
145
- lines=1
146
- )
147
- # The radio that lists our featured models
148
- model_radio = gr.Radio(
149
- label="Select a featured model below",
150
- choices=models_list,
151
- value=models_list[0], # default
152
- interactive=True
153
- )
154
- # Link the search box to update the model_radio choices
155
- model_search.change(filter_models, inputs=model_search, outputs=model_radio)
156
-
157
- # Custom Model
158
- custom_model_box = gr.Textbox(
159
- label="Custom Model (Optional)",
160
- info="If provided, overrides the featured model above. e.g. 'meta-llama/Llama-3.3-70B-Instruct'",
161
- placeholder="Your huggingface.co/username/model_name path"
162
- )
163
-
164
- with gr.Tab("Advanced Settings"):
165
- with gr.Row():
166
- max_tokens_slider = gr.Slider(
167
- minimum=1,
168
- maximum=4096,
169
- value=512,
170
- step=1,
171
- label="Max new tokens"
172
- )
173
- temperature_slider = gr.Slider(
174
- minimum=0.1,
175
- maximum=4.0,
176
- value=0.7,
177
- step=0.1,
178
- label="Temperature"
179
- )
180
- top_p_slider = gr.Slider(
181
- minimum=0.1,
182
- maximum=1.0,
183
- value=0.95,
184
- step=0.05,
185
- label="Top-P"
186
- )
187
- with gr.Row():
188
- freq_penalty_slider = gr.Slider(
189
- minimum=-2.0,
190
- maximum=2.0,
191
- value=0.0,
192
- step=0.1,
193
- label="Frequency Penalty"
194
  )
195
- seed_slider = gr.Slider(
196
- minimum=-1,
197
- maximum=65535,
198
- value=-1,
199
- step=1,
200
- label="Seed (-1 for random)"
201
- )
202
-
203
- # Chat interface area: user input -> assistant output
204
- with gr.Row():
205
- chatbot = gr.Chatbot(
206
- label="TextGen Chat",
207
- height=500
208
- )
209
-
210
- # The user types a message here
211
- user_input = gr.Textbox(
212
- label="Your message",
213
- placeholder="Type your text prompt here..."
214
- )
215
-
216
- # "Send" button triggers our respond() function, updates the chatbot
217
- send_button = gr.Button("Send")
218
-
219
- # A Clear Chat button to reset the conversation
220
- clear_button = gr.Button("Clear Chat")
221
-
222
- # Define how the Send button updates the state and chatbot
223
- def user_submission(user_text, history):
224
- """
225
- This function gets called first to add the user's message to the chat.
226
- We return the updated chat_history with the user's message appended,
227
- plus an empty string for the next user input box.
228
- """
229
- if user_text.strip() == "":
230
- return history, ""
231
- # Append user message to chat
232
- history = history + [(user_text, None)]
233
- return history, ""
234
-
235
- send_button.click(
236
- fn=user_submission,
237
- inputs=[user_input, chat_history],
238
- outputs=[chat_history, user_input]
239
- )
240
-
241
- # Then we run the respond function (streaming) to generate the assistant message
242
- def bot_response(
243
- history,
244
- system_msg,
245
- max_tokens,
246
- temperature,
247
- top_p,
248
- freq_penalty,
249
- seed,
250
- featured_model,
251
- custom_model
252
- ):
253
- """
254
- This function is called to generate the assistant's response
255
- based on the conversation so far, system message, etc.
256
- We do the streaming here.
257
- """
258
- if not history:
259
- yield history
260
- # The last user message is in history[-1][0]
261
- user_message = history[-1][0] if history else ""
262
- # We pass everything to respond() generator
263
- bot_stream = respond(
264
- user_message=user_message,
265
- chat_history=history[:-1], # all except the newly appended user message
266
- system_msg=system_msg,
267
- max_tokens=max_tokens,
268
- temperature=temperature,
269
- top_p=top_p,
270
- frequency_penalty=freq_penalty,
271
- seed=seed,
272
- featured_model=featured_model,
273
- custom_model=custom_model
274
- )
275
- partial_text = ""
276
- for partial_text in bot_stream:
277
- # We'll keep updating the last message in the conversation with partial_text
278
- updated_history = history[:-1] + [(history[-1][0], partial_text)]
279
- yield updated_history
280
-
281
- send_button.click(
282
- fn=bot_response,
283
- inputs=[
284
- chat_history,
285
- system_msg,
286
- max_tokens_slider,
287
- temperature_slider,
288
- top_p_slider,
289
- freq_penalty_slider,
290
- seed_slider,
291
- model_radio,
292
- custom_model_box
293
- ],
294
- outputs=chatbot
295
- )
296
-
297
- # Clear chat just resets the state
298
- def clear_chat():
299
- return [], ""
300
 
301
- clear_button.click(
302
- fn=clear_chat,
303
- inputs=[],
304
- outputs=[chat_history, user_input]
305
- )
306
 
307
- # Launch the application
308
  if __name__ == "__main__":
309
- print("Launching the Serverless-TextGen-Hub with Featured Models & Custom Model override.")
310
  demo.launch()
 
14
  print("OpenAI client initialized.")
15
 
16
  def respond(
17
+ message,
18
+ history: list[tuple[str, str]],
19
+ system_message,
20
  max_tokens,
21
  temperature,
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
+ custom_model,
26
+ selected_model
27
  ):
28
  """
29
+ Handles the chatbot response generation.
 
 
 
 
 
 
 
 
 
 
30
  """
31
+ print(f"Received message: {message}")
32
+ print(f"History: {history}")
33
+ print(f"System message: {system_message}")
34
+ print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
35
+ print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
36
  print(f"Custom model: {custom_model}")
37
+ print(f"Selected model: {selected_model}")
38
 
39
+ # Convert seed to None if -1 (meaning random)
40
  if seed == -1:
41
  seed = None
42
 
43
+ # Construct the messages array required by the API
44
+ messages = [{"role": "system", "content": system_message}]
45
+
46
+ # Add conversation history to the context
47
+ for val in history:
48
+ user_part = val[0]
49
+ assistant_part = val[1]
50
+ if user_part:
51
+ messages.append({"role": "user", "content": user_part})
52
+ print(f"Added user message to context: {user_part}")
53
+ if assistant_part:
54
+ messages.append({"role": "assistant", "content": assistant_part})
55
+ print(f"Added assistant message to context: {assistant_part}")
56
+
57
+ # Append the latest user message
58
+ messages.append({"role": "user", "content": message})
59
+
60
+ # Determine which model to use
61
+ model_to_use = (
62
+ custom_model.strip()
63
+ if custom_model.strip() != ""
64
+ else selected_model.strip()
65
+ )
66
  print(f"Model selected for inference: {model_to_use}")
67
 
68
+ # Start with an empty string to build the response as tokens stream in
69
+ response = ""
70
+ print("Sending request to OpenAI API.")
71
+
72
+ # Make the streaming request to the HF Inference API via openai-like client
73
+ for message_chunk in client.chat.completions.create(
74
+ model=model_to_use,
75
+ max_tokens=max_tokens,
76
+ stream=True,
77
+ temperature=temperature,
78
+ top_p=top_p,
79
+ frequency_penalty=frequency_penalty,
80
+ seed=seed,
81
+ messages=messages,
82
+ ):
83
+ # Extract the token text from the response chunk
84
+ token_text = message_chunk.choices[0].delta.content
85
+ print(f"Received token: {token_text}")
86
+ response += token_text
87
+ yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  print("Completed response generation.")
90
 
91
+ # Predefined list of placeholder models for the Featured Models accordion
 
 
 
 
92
  models_list = [
93
  "meta-llama/Llama-3.3-70B-Instruct",
94
+ "bigscience/bloom-7b1",
95
+ "EleutherAI/gpt-neo-2.7B",
96
+ "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
97
+ "HuggingFace/distilgpt2",
 
98
  ]
99
 
100
+ # Function to filter models based on search input
101
  def filter_models(search_term):
102
+ filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
103
+ return gr.update(choices=filtered_models)
104
+
105
+ # Create a Chatbot component with a specified height
106
+ chatbot = gr.Chatbot(height=600)
107
+ print("Chatbot interface created.")
108
+
109
+ # Create the Gradio ChatInterface
110
+ # Added "Featured Models" accordion and integrated filtering
111
+ demo = gr.Interface(
112
+ fn=respond,
113
+ inputs=[
114
+ gr.Textbox(value="", label="System message"),
115
+ gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
116
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
117
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
118
+ gr.Slider(
119
+ minimum=-2.0,
120
+ maximum=2.0,
121
+ value=0.0,
122
+ step=0.1,
123
+ label="Frequency Penalty"
124
+ ),
125
+ gr.Slider(
126
+ minimum=-1,
127
+ maximum=65535, # Arbitrary upper limit for demonstration
128
+ value=-1,
129
+ step=1,
130
+ label="Seed (-1 for random)"
131
+ ),
132
+ gr.Textbox(
133
+ value="",
134
+ label="Custom Model",
135
+ info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty.",
136
+ ),
137
+ # Add Featured Models accordion
138
+ gr.Accordion("Featured Models", open=True, children=[
139
+ gr.Textbox(label="Filter Models", placeholder="Search for a featured model...", lines=1).change(
140
+ filter_models, inputs=["value"], outputs="choices"
141
+ ),
142
+ gr.Radio(
143
+ label="Select a featured model",
144
+ value="meta-llama/Llama-3.3-70B-Instruct",
145
+ choices=models_list,
146
+ elem_id="model-radio",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  )
148
+ ]),
149
+ ],
150
+ outputs=gr.Chatbot(height=600),
151
+ theme="Nymbo/Nymbo_Theme",
152
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
+ print("Gradio interface initialized.")
 
 
 
 
155
 
 
156
  if __name__ == "__main__":
157
+ print("Launching the demo application.")
158
  demo.launch()