Nymbo commited on
Commit
fab24df
·
verified ·
1 Parent(s): 7255410

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -70
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
- from openai import OpenAI
3
  import os
 
4
 
5
  # Retrieve the access token from the environment variable
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
@@ -22,7 +22,8 @@ def respond(
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
- custom_model
 
26
  ):
27
  """
28
  This function handles the chatbot response. It takes in:
@@ -34,7 +35,8 @@ def respond(
34
  - top_p: top-p (nucleus) sampling
35
  - frequency_penalty: penalize repeated tokens in the output
36
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
37
- - custom_model: the user-provided custom model name (if any)
 
38
  """
39
 
40
  print(f"Received message: {message}")
@@ -43,15 +45,16 @@ def respond(
43
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
44
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
45
  print(f"Custom model: {custom_model}")
 
46
 
47
- # Convert seed to None if -1 (meaning random)
48
  if seed == -1:
49
  seed = None
50
 
51
- # Construct the messages array required by the API
52
- messages = [{"role": "system", "content": system_message}]
53
 
54
- # Add conversation history to the context
55
  for val in history:
56
  user_part = val[0]
57
  assistant_part = val[1]
@@ -62,22 +65,26 @@ def respond(
62
  messages.append({"role": "assistant", "content": assistant_part})
63
  print(f"Added assistant message to context: {assistant_part}")
64
 
65
- # Append the latest user message
66
  messages.append({"role": "user", "content": message})
67
 
68
- # Determine which model to use: either custom_model or a default
69
- model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
 
 
 
 
70
  print(f"Model selected for inference: {model_to_use}")
71
 
72
- # Start with an empty string to build the response as tokens stream in
73
  response = ""
74
  print("Sending request to OpenAI API.")
75
 
76
- # Make the streaming request to the HF Inference API via openai-like client
77
  for message_chunk in client.chat.completions.create(
78
- model=model_to_use, # Use either the user-provided custom model or default
79
  max_tokens=max_tokens,
80
- stream=True, # Stream the response
81
  temperature=temperature,
82
  top_p=top_p,
83
  frequency_penalty=frequency_penalty,
@@ -88,68 +95,178 @@ def respond(
88
  token_text = message_chunk.choices[0].delta.content
89
  print(f"Received token: {token_text}")
90
  response += token_text
91
- # Yield the partial response to Gradio so it can display in real-time
92
  yield response
93
 
94
  print("Completed response generation.")
95
 
96
- # Create a Chatbot component with a specified height
97
- chatbot = gr.Chatbot(height=600)
98
- print("Chatbot interface created.")
99
-
100
- # Create the Gradio ChatInterface
101
- # We add two new sliders for Frequency Penalty, Seed, and now a new "Custom Model" text box.
102
- demo = gr.ChatInterface(
103
- fn=respond,
104
- additional_inputs=[
105
- gr.Textbox(value="", label="System message"),
106
- gr.Slider(
107
- minimum=1,
108
- maximum=4096,
109
- value=512,
110
- step=1,
111
- label="Max new tokens"
112
- ),
113
- gr.Slider(
114
- minimum=0.1,
115
- maximum=4.0,
116
- value=0.7,
117
- step=0.1,
118
- label="Temperature"
119
- ),
120
- gr.Slider(
121
- minimum=0.1,
122
- maximum=1.0,
123
- value=0.95,
124
- step=0.05,
125
- label="Top-P"
126
- ),
127
- gr.Slider(
128
- minimum=-2.0,
129
- maximum=2.0,
130
- value=0.0,
131
- step=0.1,
132
- label="Frequency Penalty"
133
- ),
134
- gr.Slider(
135
- minimum=-1,
136
- maximum=65535, # Arbitrary upper limit for demonstration
137
- value=-1,
138
- step=1,
139
- label="Seed (-1 for random)"
140
- ),
141
- gr.Textbox(
142
- value="",
143
- label="Custom Model",
144
- info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty."
145
- ),
146
- ],
147
- fill_height=True,
148
- chatbot=chatbot,
149
- theme="Nymbo/Nymbo_Theme",
150
  )
151
- print("Gradio interface initialized.")
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  if __name__ == "__main__":
154
  print("Launching the demo application.")
155
  demo.launch()
 
1
  import gradio as gr
 
2
  import os
3
+ from openai import OpenAI
4
 
5
  # Retrieve the access token from the environment variable
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
 
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
+ custom_model,
26
+ featured_model
27
  ):
28
  """
29
  This function handles the chatbot response. It takes in:
 
35
  - top_p: top-p (nucleus) sampling
36
  - frequency_penalty: penalize repeated tokens in the output
37
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
38
+ - custom_model: a user-provided custom model name (if any)
39
+ - featured_model: the user-selected model from the radio
40
  """
41
 
42
  print(f"Received message: {message}")
 
45
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
46
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
47
  print(f"Custom model: {custom_model}")
48
+ print(f"Featured model: {featured_model}")
49
 
50
+ # Convert seed to None if -1 (meaning "random")
51
  if seed == -1:
52
  seed = None
53
 
54
+ # Construct the conversation array required by the HF Inference API
55
+ messages = [{"role": "system", "content": system_message or ""}]
56
 
57
+ # Add conversation history
58
  for val in history:
59
  user_part = val[0]
60
  assistant_part = val[1]
 
65
  messages.append({"role": "assistant", "content": assistant_part})
66
  print(f"Added assistant message to context: {assistant_part}")
67
 
68
+ # The latest user message
69
  messages.append({"role": "user", "content": message})
70
 
71
+ # If custom_model is not empty, it overrides the featured model
72
+ model_to_use = custom_model.strip() if custom_model.strip() != "" else featured_model.strip()
73
+ # If somehow both are empty, default to an example model
74
+ if model_to_use == "":
75
+ model_to_use = "meta-llama/Llama-3.3-70B-Instruct"
76
+
77
  print(f"Model selected for inference: {model_to_use}")
78
 
79
+ # Build the response from the streaming tokens
80
  response = ""
81
  print("Sending request to OpenAI API.")
82
 
83
+ # Streaming request to the HF Inference API
84
  for message_chunk in client.chat.completions.create(
85
+ model=model_to_use,
86
  max_tokens=max_tokens,
87
+ stream=True,
88
  temperature=temperature,
89
  top_p=top_p,
90
  frequency_penalty=frequency_penalty,
 
95
  token_text = message_chunk.choices[0].delta.content
96
  print(f"Received token: {token_text}")
97
  response += token_text
98
+ # Yield partial response so Gradio can display in real-time
99
  yield response
100
 
101
  print("Completed response generation.")
102
 
103
+ #
104
+ # Building the Gradio interface below
105
+ #
106
+ print("Building the Gradio interface with advanced features...")
107
+
108
+ # --- Create a list of 'Featured Models' for demonstration. You can customize as you wish. ---
109
+ models_list = (
110
+ "meta-llama/Llama-3.3-70B-Instruct",
111
+ "BigScience/bloom",
112
+ "openai/gpt-4",
113
+ "google/flan-t5-xxl",
114
+ "EleutherAI/gpt-j-6B",
115
+ "YourSpecialModel/awesome-13B",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  )
 
117
 
118
+ # This function filters the above models_list by a given search term:
119
+ def filter_models(search_term):
120
+ filtered = [m for m in models_list if search_term.lower() in m.lower()]
121
+ return gr.update(choices=filtered)
122
+
123
+ # We’ll create a Chatbot in a Blocks layout to incorporate an Accordion for "Featured Models"
124
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
125
+ gr.Markdown("## Serverless-TextGen-Hub\nA comprehensive UI for text generation, including featured models and custom model overrides.")
126
+
127
+ # The Chatbot itself
128
+ chatbot = gr.Chatbot(label="TextGen Chatbot", height=600)
129
+
130
+ with gr.Row():
131
+ with gr.Column(scale=1):
132
+ # We create interactive UI elements that will feed into the 'respond' function
133
+
134
+ # System message
135
+ system_message = gr.Textbox(label="System message", placeholder="Set the system role instructions here.")
136
+
137
+ # Accordion for selecting the model
138
+ with gr.Accordion("Featured Models", open=True):
139
+ model_search = gr.Textbox(
140
+ label="Filter Models",
141
+ placeholder="Search for a featured model...",
142
+ lines=1
143
+ )
144
+ featured_model = gr.Radio(
145
+ label="Select a Featured Model Below",
146
+ choices=models_list,
147
+ value="meta-llama/Llama-3.3-70B-Instruct", # default
148
+ interactive=True,
149
+ )
150
+ # Link the search box to filter the radio model choices
151
+ model_search.change(filter_models, inputs=model_search, outputs=featured_model)
152
+
153
+ # A text box to optionally override the featured model
154
+ custom_model = gr.Textbox(
155
+ label="Custom Model",
156
+ info="(Optional) Provide a custom HF model path. If non-empty, it overrides your featured model choice."
157
+ )
158
+
159
+ # Sliders
160
+ max_tokens = gr.Slider(
161
+ minimum=1,
162
+ maximum=4096,
163
+ value=512,
164
+ step=1,
165
+ label="Max new tokens"
166
+ )
167
+ temperature = gr.Slider(
168
+ minimum=0.1,
169
+ maximum=4.0,
170
+ value=0.7,
171
+ step=0.1,
172
+ label="Temperature"
173
+ )
174
+ top_p = gr.Slider(
175
+ minimum=0.1,
176
+ maximum=1.0,
177
+ value=0.95,
178
+ step=0.05,
179
+ label="Top-P"
180
+ )
181
+ frequency_penalty = gr.Slider(
182
+ minimum=-2.0,
183
+ maximum=2.0,
184
+ value=0.0,
185
+ step=0.1,
186
+ label="Frequency Penalty"
187
+ )
188
+ seed = gr.Slider(
189
+ minimum=-1,
190
+ maximum=65535,
191
+ value=-1,
192
+ step=1,
193
+ label="Seed (-1 for random)"
194
+ )
195
+
196
+ # The "chat" Column
197
+ with gr.Column(scale=2):
198
+ # We store the conversation history in a state variable
199
+ state = gr.State([]) # Each element in state is (user_message, assistant_message)
200
+
201
+ # Chat input box for the user
202
+ with gr.Row():
203
+ txt = gr.Textbox(
204
+ label="Enter your message",
205
+ placeholder="Type your request here, then press 'Submit'",
206
+ lines=3
207
+ )
208
+
209
+ # Button to submit the message
210
+ submit_btn = gr.Button("Submit", variant="primary")
211
+
212
+ #
213
+ # The 'respond' function is tied to the chatbot display.
214
+ # We'll define a small wrapper that updates the 'history' (state) each time.
215
+ #
216
+
217
+ def user_submit(user_message, chat_history):
218
+ """
219
+ This function just adds the user message to the history and returns it.
220
+ The actual text generation will come from 'bot_respond' next.
221
+ """
222
+ # Append new user message to the existing conversation
223
+ chat_history = chat_history + [(user_message, None)]
224
+ return "", chat_history
225
+
226
+ def bot_respond(chat_history, sys_msg, max_t, temp, top, freq_pen, s, custom_mod, feat_model):
227
+ """
228
+ This function calls our 'respond' generator to get the text.
229
+ It updates the last message in chat_history with the bot's response as it streams.
230
+ """
231
+ user_message = chat_history[-1][0] if len(chat_history) > 0 else ""
232
+ # We call the generator
233
+ bot_messages = respond(
234
+ user_message,
235
+ chat_history[:-1], # all but the last user message
236
+ sys_msg,
237
+ max_t,
238
+ temp,
239
+ top,
240
+ freq_pen,
241
+ s,
242
+ custom_mod,
243
+ feat_model,
244
+ )
245
+
246
+ # Stream the tokens back
247
+ final_bot_msg = ""
248
+ for token_text in bot_messages:
249
+ final_bot_msg = token_text
250
+ # We'll update the chatbot in real-time
251
+ chat_history[-1] = (user_message, final_bot_msg)
252
+ yield chat_history
253
+
254
+ # Tie the Submit button to the user_submit function, and then to bot_respond
255
+ submit_btn.click(
256
+ user_submit,
257
+ inputs=[txt, state],
258
+ outputs=[txt, state],
259
+ queue=False
260
+ ).then(
261
+ bot_respond,
262
+ inputs=[state, system_message, max_tokens, temperature, top_p, frequency_penalty, seed, custom_model, featured_model],
263
+ outputs=[chatbot],
264
+ queue=True
265
+ )
266
+
267
+ print("Interface construction complete. Ready to launch!")
268
+
269
+ # Launch the Gradio Blocks interface
270
  if __name__ == "__main__":
271
  print("Launching the demo application.")
272
  demo.launch()