Nymbo commited on
Commit
7de1759
·
verified ·
1 Parent(s): 77298b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -253
app.py CHANGED
@@ -1,10 +1,6 @@
1
  import gradio as gr
2
- import os
3
  from openai import OpenAI
4
-
5
- ################################################
6
- # INITIAL SETUP
7
- ################################################
8
 
9
  # Retrieve the access token from the environment variable
10
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
@@ -17,11 +13,10 @@ client = OpenAI(
17
  )
18
  print("OpenAI client initialized.")
19
 
20
- # Our main response-generating function
21
  def respond(
22
  user_message,
23
- history,
24
- system_message,
25
  max_tokens,
26
  temperature,
27
  top_p,
@@ -32,298 +27,284 @@ def respond(
32
  ):
33
  """
34
  This function handles the chatbot response. It takes in:
35
- - user_message: the user's new message
36
- - history: the list of previous messages, each as [user_text, assistant_text]
37
- - system_message: the system prompt
38
- - max_tokens: the maximum number of tokens to generate in the response
39
  - temperature: sampling temperature
40
  - top_p: top-p (nucleus) sampling
41
  - frequency_penalty: penalize repeated tokens in the output
42
- - seed: a fixed seed for reproducibility; -1 will mean 'random'
43
- - featured_model: the user-chosen model from the radio button
44
- - custom_model: a user-specified custom model that overrides featured_model if not empty
45
  """
46
 
47
- print(f"New user message: {user_message}")
48
- print(f"History so far: {history}")
49
- print(f"System message: {system_message}")
50
- print(f"max_tokens: {max_tokens}, temperature: {temperature}, top_p: {top_p}")
51
- print(f"frequency_penalty: {frequency_penalty}, seed: {seed}")
52
- print(f"Featured Model: {featured_model}")
53
- print(f"Custom Model: {custom_model}")
54
 
55
- # Convert seed to None if -1 (meaning random)
56
  if seed == -1:
57
  seed = None
58
 
59
- # Determine which model to use
60
- # If the user typed something in custom_model, that overrides the featured model
61
- # Otherwise we use the model selected in the radio. If neither, default to the example "meta-llama..."
62
- model_to_use = None
63
- if custom_model.strip():
64
- model_to_use = custom_model.strip()
65
- elif featured_model is not None and featured_model.strip():
66
- model_to_use = featured_model.strip()
67
- else:
68
  model_to_use = "meta-llama/Llama-3.3-70B-Instruct"
69
 
70
  print(f"Model selected for inference: {model_to_use}")
71
 
72
- # Construct the conversation messages for the HF Inference API
73
- messages = [{"role": "system", "content": system_message}]
74
- for user_text, assistant_text in history:
 
 
 
 
75
  if user_text:
76
  messages.append({"role": "user", "content": user_text})
77
  if assistant_text:
78
  messages.append({"role": "assistant", "content": assistant_text})
 
 
79
  messages.append({"role": "user", "content": user_message})
80
 
81
- # We'll collect and stream the response
82
  response_so_far = ""
 
83
 
84
  # Make the streaming request to the HF Inference API
85
- print("Sending request to OpenAI/Hugging Face Inference API...")
86
- for message_chunk in client.chat.completions.create(
87
- model=model_to_use,
88
- max_tokens=max_tokens,
89
- stream=True,
90
- temperature=temperature,
91
- top_p=top_p,
92
- frequency_penalty=frequency_penalty,
93
- seed=seed,
94
- messages=messages,
95
- ):
96
- # The content for the partial chunk
97
- token_text = message_chunk.choices[0].delta.content
98
- response_so_far += token_text
99
- # Return partial response to Gradio to display in real-time
100
- yield response_so_far
 
 
 
 
101
 
102
  print("Completed response generation.")
103
 
104
- ################################################
105
- # GRADIO UI + STATE MANAGEMENT
106
- ################################################
107
-
108
- def user_submit(user_message, history):
109
- """
110
- This function is called when the user sends a message.
111
- We simply add the user message to the conversation history.
112
- """
113
- print("user_submit triggered.")
114
- # Append the new user message to history
115
- if not history:
116
- history = []
117
- history = history + [[user_message, None]]
118
- return history, ""
119
-
120
- def bot_reply(history, system_message, max_tokens, temperature, top_p,
121
- frequency_penalty, seed, featured_model, custom_model):
122
- """
123
- This function is triggered to produce the bot's response after the user has submitted.
124
- We call 'respond' for streaming text.
125
- """
126
- print("bot_reply triggered.")
127
-
128
- # The last conversation item has user_message, None
129
- user_message = history[-1][0]
130
-
131
- # We will stream the partial responses from 'respond'
132
- bot_response = respond(
133
- user_message=user_message,
134
- history=history[:-1], # all items except the last, because we pass the last user msg separately
135
- system_message=system_message,
136
- max_tokens=max_tokens,
137
- temperature=temperature,
138
- top_p=top_p,
139
- frequency_penalty=frequency_penalty,
140
- seed=seed,
141
- featured_model=featured_model,
142
- custom_model=custom_model
143
- )
144
 
145
- # As we yield from the generator, we update the last item in history with the partial response
146
- # Gradio streaming logic: yield the partial updates as they come in
147
- for partial_text in bot_response:
148
- history[-1][1] = partial_text
149
- yield history
150
-
151
- # We define a small list of placeholder featured models for demonstration
152
  models_list = [
153
- "meta-llama/Llama-2-13B-Chat-hf",
 
154
  "bigscience/bloom",
155
- "EleutherAI/gpt-neo-2.7B",
156
- "meta-llama/Llama-3.3-70B-Instruct"
 
157
  ]
158
 
159
  def filter_models(search_term):
160
- """
161
- Filter function triggered when user types in the model_search box.
162
- Returns an updated list of models that contain the search term.
163
- """
164
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
165
  return gr.update(choices=filtered)
166
 
167
-
168
- ################################################
169
- # BUILDING THE GRADIO LAYOUT
170
- ################################################
171
-
172
- with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
173
- gr.Markdown(
174
- """
175
- # Serverless-TextGen-Hub
176
- **A UI for text generation using Hugging Face's Inference API.**
177
-
178
- Below is a simple chat interface. You can pick from **Featured Models** or specify a **Custom Model**
179
- to override the choice. If you're not sure, just use the default.
180
- """
181
- )
182
-
183
- # State to hold the conversation history, will be a list of [user, bot]
184
- conversation_state = gr.State([])
185
-
186
- # Row for system message + advanced settings
187
- with gr.Accordion("Advanced Settings", open=False):
188
- system_message = gr.Textbox(
189
- label="System Message",
190
- value="You are a helpful assistant.",
191
- lines=2,
192
- info="Provides background or personality instructions to the model."
193
- )
194
- max_tokens = gr.Slider(
195
- minimum=1,
196
- maximum=4096,
197
- value=512,
198
- step=1,
199
- label="Max new tokens"
200
- )
201
- temperature = gr.Slider(
202
- minimum=0.1,
203
- maximum=4.0,
204
- value=0.7,
205
- step=0.1,
206
- label="Temperature"
207
- )
208
- top_p = gr.Slider(
209
- minimum=0.1,
210
- maximum=1.0,
211
- value=0.95,
212
- step=0.05,
213
- label="Top-P"
214
- )
215
- frequency_penalty = gr.Slider(
216
- minimum=-2.0,
217
- maximum=2.0,
218
- value=0.0,
219
- step=0.1,
220
- label="Frequency Penalty"
221
- )
222
- seed = gr.Slider(
223
- minimum=-1,
224
- maximum=65535,
225
- value=-1,
226
- step=1,
227
- label="Seed (-1 for random)"
228
- )
229
-
230
- # Featured Models + filtering
231
- with gr.Accordion("Featured Models", open=False):
232
- model_search = gr.Textbox(
233
- label="Filter Models",
234
- placeholder="Search for a featured model...",
235
- lines=1
236
- )
237
- featured_model_radio = gr.Radio(
238
- label="Select a featured model below",
239
- choices=models_list,
240
- value=models_list[0], # default selection
241
- interactive=True
242
- )
243
- model_search.change(
244
- filter_models,
245
- inputs=model_search,
246
- outputs=featured_model_radio
 
 
 
 
 
247
  )
248
 
249
- # This is the Custom Model box (overrides Featured Models if not empty)
250
- custom_model = gr.Textbox(
251
- label="Custom Model",
252
- value="",
253
- info="(Optional) Provide a custom HF model path. If not empty, it overrides the Featured Model."
254
  )
255
 
256
- # The main Chatbot interface
257
- chatbot = gr.Chatbot(height=600)
258
 
259
- # Textbox for the user to type a new message
260
- with gr.Row():
261
- user_input = gr.Textbox(
262
- show_label=False,
263
- placeholder="Type your message here (press enter or click 'Submit')",
264
- lines=2
265
- )
266
- submit_btn = gr.Button("Submit", variant="primary")
267
 
268
- # The user submits -> we update the conversation state
269
- submit_btn.click(
270
- fn=user_submit,
271
- inputs=[user_input, conversation_state],
272
- outputs=[conversation_state, user_input],
 
 
 
 
 
 
 
 
 
 
 
 
273
  )
274
 
275
- # Then the bot replies, streaming the output
276
- # We pass all required arguments from the advanced settings, plus the model selection boxes
277
- submit_btn.click(
278
- fn=bot_reply,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  inputs=[
280
- conversation_state,
281
- system_message,
282
- max_tokens,
283
- temperature,
284
- top_p,
285
- frequency_penalty,
286
- seed,
287
- featured_model_radio,
288
- custom_model
289
  ],
290
- outputs=[chatbot],
291
- # 'bot_reply' is a generator, so we set streaming=True:
292
- queue=True
293
  )
294
 
295
- # We also allow pressing Enter in user_input to do the same thing
296
- user_input.submit(
297
- fn=user_submit,
298
- inputs=[user_input, conversation_state],
299
- outputs=[conversation_state, user_input],
300
- )
301
- user_input.submit(
302
- fn=bot_reply,
303
- inputs=[
304
- conversation_state,
305
- system_message,
306
- max_tokens,
307
- temperature,
308
- top_p,
309
- frequency_penalty,
310
- seed,
311
- featured_model_radio,
312
- custom_model
313
- ],
314
- outputs=[chatbot],
315
- queue=True
316
- )
317
 
318
- gr.HTML("""
319
- <br>
320
- <p style='text-align:center;'>
321
- Developed by <strong>Nymbo</strong>.
322
- Powered by <strong>Hugging Face Inference API</strong>.
323
- </p>
324
- """)
325
 
326
- # Finally, launch the app
327
  if __name__ == "__main__":
328
- print("Launching the Serverless-TextGen-Hub application...")
329
  demo.launch()
 
1
  import gradio as gr
 
2
  from openai import OpenAI
3
+ import os
 
 
 
4
 
5
  # Retrieve the access token from the environment variable
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
 
13
  )
14
  print("OpenAI client initialized.")
15
 
 
16
  def respond(
17
  user_message,
18
+ chat_history,
19
+ system_msg,
20
  max_tokens,
21
  temperature,
22
  top_p,
 
27
  ):
28
  """
29
  This function handles the chatbot response. It takes in:
30
+ - user_message: the user's newly typed message
31
+ - chat_history: the list of (user, assistant) message pairs
32
+ - system_msg: the system instruction or system-level context
33
+ - max_tokens: the maximum number of tokens to generate
34
  - temperature: sampling temperature
35
  - top_p: top-p (nucleus) sampling
36
  - frequency_penalty: penalize repeated tokens in the output
37
+ - seed: a fixed seed for reproducibility; -1 means 'random'
38
+ - featured_model: the chosen model name from 'Featured Models' radio
39
+ - custom_model: the optional custom model that overrides the featured one if provided
40
  """
41
 
42
+ print(f"Received user message: {user_message}")
43
+ print(f"System message: {system_msg}")
44
+ print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}, Freq-Penalty: {frequency_penalty}, Seed: {seed}")
45
+ print(f"Featured model: {featured_model}")
46
+ print(f"Custom model: {custom_model}")
 
 
47
 
48
+ # Convert the seed to None if user set it to -1 (meaning random)
49
  if seed == -1:
50
  seed = None
51
 
52
+ # Decide which model to actually use
53
+ # If custom_model is non-empty, use that; otherwise use the chosen featured_model
54
+ model_to_use = custom_model.strip() if custom_model.strip() != "" else featured_model
55
+ # Provide a default fallback if for some reason both are empty
56
+ if model_to_use.strip() == "":
 
 
 
 
57
  model_to_use = "meta-llama/Llama-3.3-70B-Instruct"
58
 
59
  print(f"Model selected for inference: {model_to_use}")
60
 
61
+ # Construct the conversation history in the format required by HF's Inference API
62
+ messages = []
63
+ if system_msg.strip():
64
+ messages.append({"role": "system", "content": system_msg.strip()})
65
+
66
+ # Add the conversation history
67
+ for user_text, assistant_text in chat_history:
68
  if user_text:
69
  messages.append({"role": "user", "content": user_text})
70
  if assistant_text:
71
  messages.append({"role": "assistant", "content": assistant_text})
72
+
73
+ # Add the new user message to the conversation
74
  messages.append({"role": "user", "content": user_message})
75
 
76
+ # We'll build the response token-by-token in a streaming loop
77
  response_so_far = ""
78
+ print("Sending request to the Hugging Face Inference API...")
79
 
80
  # Make the streaming request to the HF Inference API
81
+ try:
82
+ for resp_chunk in client.chat.completions.create(
83
+ model=model_to_use,
84
+ max_tokens=max_tokens,
85
+ stream=True,
86
+ temperature=temperature,
87
+ top_p=top_p,
88
+ frequency_penalty=frequency_penalty,
89
+ seed=seed,
90
+ messages=messages,
91
+ ):
92
+ token_text = resp_chunk.choices[0].delta.content
93
+ response_so_far += token_text
94
+ # We yield back the updated message to display partial progress in the chatbot
95
+ yield response_so_far
96
+ except Exception as e:
97
+ # If there's an error, let's at least show it in the chat
98
+ error_text = f"[ERROR] {str(e)}"
99
+ print(error_text)
100
+ yield response_so_far + "\n\n" + error_text
101
 
102
  print("Completed response generation.")
103
 
104
+ #
105
+ # BUILDING THE GRADIO INTERFACE BELOW
106
+ #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ # List of featured models; adjust or replace these placeholders with real text-generation models
 
 
 
 
 
 
109
  models_list = [
110
+ "meta-llama/Llama-3.3-70B-Instruct",
111
+ "meta-llama/Llama-2-13B-chat-hf",
112
  "bigscience/bloom",
113
+ "openlm-research/open_llama_7b",
114
+ "facebook/opt-6.7b",
115
+ "google/flan-t5-xxl",
116
  ]
117
 
118
  def filter_models(search_term):
119
+ """Filters the models_list by the given search_term and returns an update for the Radio component."""
 
 
 
120
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
121
  return gr.update(choices=filtered)
122
 
123
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme_5") as demo:
124
+ gr.Markdown("# Serverless-TextGen-Hub (Enhanced)")
125
+ gr.Markdown("**A comprehensive UI for text generation with a featured-models dropdown and a custom override**.")
126
+
127
+ # We keep track of the conversation in a Gradio state variable (list of tuples)
128
+ chat_history = gr.State([])
129
+
130
+ # Tabs for organization
131
+ with gr.Tab("Basic Settings"):
132
+ with gr.Row():
133
+ with gr.Column(elem_id="prompt-container"):
134
+ # System Message
135
+ system_msg = gr.Textbox(
136
+ label="System message",
137
+ placeholder="Enter system-level instructions or context here.",
138
+ lines=2
139
+ )
140
+ # Accordion for featured models
141
+ with gr.Accordion("Featured Models", open=True):
142
+ model_search = gr.Textbox(
143
+ label="Filter Models",
144
+ placeholder="Search for a featured model...",
145
+ lines=1
146
+ )
147
+ # The radio that lists our featured models
148
+ model_radio = gr.Radio(
149
+ label="Select a featured model below",
150
+ choices=models_list,
151
+ value=models_list[0], # default
152
+ interactive=True
153
+ )
154
+ # Link the search box to update the model_radio choices
155
+ model_search.change(filter_models, inputs=model_search, outputs=model_radio)
156
+
157
+ # Custom Model
158
+ custom_model_box = gr.Textbox(
159
+ label="Custom Model (Optional)",
160
+ info="If provided, overrides the featured model above. e.g. 'meta-llama/Llama-3.3-70B-Instruct'",
161
+ placeholder="Your huggingface.co/username/model_name path"
162
+ )
163
+
164
+ with gr.Tab("Advanced Settings"):
165
+ with gr.Row():
166
+ max_tokens_slider = gr.Slider(
167
+ minimum=1,
168
+ maximum=4096,
169
+ value=512,
170
+ step=1,
171
+ label="Max new tokens"
172
+ )
173
+ temperature_slider = gr.Slider(
174
+ minimum=0.1,
175
+ maximum=4.0,
176
+ value=0.7,
177
+ step=0.1,
178
+ label="Temperature"
179
+ )
180
+ top_p_slider = gr.Slider(
181
+ minimum=0.1,
182
+ maximum=1.0,
183
+ value=0.95,
184
+ step=0.05,
185
+ label="Top-P"
186
+ )
187
+ with gr.Row():
188
+ freq_penalty_slider = gr.Slider(
189
+ minimum=-2.0,
190
+ maximum=2.0,
191
+ value=0.0,
192
+ step=0.1,
193
+ label="Frequency Penalty"
194
+ )
195
+ seed_slider = gr.Slider(
196
+ minimum=-1,
197
+ maximum=65535,
198
+ value=-1,
199
+ step=1,
200
+ label="Seed (-1 for random)"
201
+ )
202
+
203
+ # Chat interface area: user input -> assistant output
204
+ with gr.Row():
205
+ chatbot = gr.Chatbot(
206
+ label="TextGen Chat",
207
+ height=500
208
  )
209
 
210
+ # The user types a message here
211
+ user_input = gr.Textbox(
212
+ label="Your message",
213
+ placeholder="Type your text prompt here..."
 
214
  )
215
 
216
+ # "Send" button triggers our respond() function, updates the chatbot
217
+ send_button = gr.Button("Send")
218
 
219
+ # A Clear Chat button to reset the conversation
220
+ clear_button = gr.Button("Clear Chat")
 
 
 
 
 
 
221
 
222
+ # Define how the Send button updates the state and chatbot
223
+ def user_submission(user_text, history):
224
+ """
225
+ This function gets called first to add the user's message to the chat.
226
+ We return the updated chat_history with the user's message appended,
227
+ plus an empty string for the next user input box.
228
+ """
229
+ if user_text.strip() == "":
230
+ return history, ""
231
+ # Append user message to chat
232
+ history = history + [(user_text, None)]
233
+ return history, ""
234
+
235
+ send_button.click(
236
+ fn=user_submission,
237
+ inputs=[user_input, chat_history],
238
+ outputs=[chat_history, user_input]
239
  )
240
 
241
+ # Then we run the respond function (streaming) to generate the assistant message
242
+ def bot_response(
243
+ history,
244
+ system_msg,
245
+ max_tokens,
246
+ temperature,
247
+ top_p,
248
+ freq_penalty,
249
+ seed,
250
+ featured_model,
251
+ custom_model
252
+ ):
253
+ """
254
+ This function is called to generate the assistant's response
255
+ based on the conversation so far, system message, etc.
256
+ We do the streaming here.
257
+ """
258
+ if not history:
259
+ yield history
260
+ # The last user message is in history[-1][0]
261
+ user_message = history[-1][0] if history else ""
262
+ # We pass everything to respond() generator
263
+ bot_stream = respond(
264
+ user_message=user_message,
265
+ chat_history=history[:-1], # all except the newly appended user message
266
+ system_msg=system_msg,
267
+ max_tokens=max_tokens,
268
+ temperature=temperature,
269
+ top_p=top_p,
270
+ frequency_penalty=freq_penalty,
271
+ seed=seed,
272
+ featured_model=featured_model,
273
+ custom_model=custom_model
274
+ )
275
+ partial_text = ""
276
+ for partial_text in bot_stream:
277
+ # We'll keep updating the last message in the conversation with partial_text
278
+ updated_history = history[:-1] + [(history[-1][0], partial_text)]
279
+ yield updated_history
280
+
281
+ send_button.click(
282
+ fn=bot_response,
283
  inputs=[
284
+ chat_history,
285
+ system_msg,
286
+ max_tokens_slider,
287
+ temperature_slider,
288
+ top_p_slider,
289
+ freq_penalty_slider,
290
+ seed_slider,
291
+ model_radio,
292
+ custom_model_box
293
  ],
294
+ outputs=chatbot
 
 
295
  )
296
 
297
+ # Clear chat just resets the state
298
+ def clear_chat():
299
+ return [], ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
+ clear_button.click(
302
+ fn=clear_chat,
303
+ inputs=[],
304
+ outputs=[chat_history, user_input]
305
+ )
 
 
306
 
307
+ # Launch the application
308
  if __name__ == "__main__":
309
+ print("Launching the Serverless-TextGen-Hub with Featured Models & Custom Model override.")
310
  demo.launch()