Nymbo commited on
Commit
7d3730f
·
verified ·
1 Parent(s): 21137c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -125
app.py CHANGED
@@ -22,193 +22,152 @@ def respond(
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
- model
 
26
  ):
27
  """
28
- This function handles the chatbot response. It takes in:
29
- - message: the user's new message
30
- - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
31
- - system_message: the system prompt
32
- - max_tokens: the maximum number of tokens to generate in the response
33
- - temperature: sampling temperature
34
- - top_p: top-p (nucleus) sampling
35
- - frequency_penalty: penalize repeated tokens in the output
36
- - seed: a fixed seed for reproducibility; -1 will mean 'random'
37
- - model: the selected model for text generation
38
  """
 
 
39
 
40
- print(f"Received message: {message}")
41
- print(f"History: {history}")
42
- print(f"System message: {system_message}")
43
- print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
44
- print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}, Model: {model}")
45
-
46
- # Convert seed to None if -1 (meaning random)
47
  if seed == -1:
48
  seed = None
49
 
50
- # Construct the messages array required by the API
51
  messages = [{"role": "system", "content": system_message}]
52
-
53
- # Add conversation history to the context
54
  for val in history:
55
- user_part = val[0]
56
- assistant_part = val[1]
57
- if user_part:
58
- messages.append({"role": "user", "content": user_part})
59
- print(f"Added user message to context: {user_part}")
60
- if assistant_part:
61
- messages.append({"role": "assistant", "content": assistant_part})
62
- print(f"Added assistant message to context: {assistant_part}")
63
-
64
- # Append the latest user message
65
  messages.append({"role": "user", "content": message})
66
 
67
- # Start with an empty string to build the response as tokens stream in
68
  response = ""
69
- print("Sending request to OpenAI API.")
70
-
71
- # Make the streaming request to the HF Inference API via openai-like client
72
  for message_chunk in client.chat.completions.create(
73
- model=model, # Use the selected model
74
  max_tokens=max_tokens,
75
- stream=True, # Stream the response
76
  temperature=temperature,
77
  top_p=top_p,
78
  frequency_penalty=frequency_penalty,
79
  seed=seed,
80
  messages=messages,
81
  ):
82
- # Extract the token text from the response chunk
83
  token_text = message_chunk.choices[0].delta.content
84
- print(f"Received token: {token_text}")
85
  response += token_text
86
  yield response
87
 
88
- print("Completed response generation.")
89
-
90
  # Create a Chatbot component with a specified height
91
  chatbot = gr.Chatbot(height=600)
92
- print("Chatbot interface created.")
93
 
94
- # List of featured models (placeholder models for now)
95
  featured_models = [
96
  "meta-llama/Llama-3.3-70B-Instruct",
97
- "gpt-3.5-turbo",
98
- "gpt-4",
99
- "mistralai/Mistral-7B-Instruct-v0.1",
100
- "tiiuae/falcon-40b-instruct"
101
  ]
102
 
103
- # Function to filter models based on search input
104
- def filter_models(search_term):
105
- filtered_models = [m for m in featured_models if search_term.lower() in m.lower()]
106
- return gr.update(choices=filtered_models)
107
-
108
  # Create the Gradio ChatInterface
109
- demo = gr.ChatInterface(
110
- respond,
111
- additional_inputs=[
112
- gr.Textbox(value="", label="System message"),
113
- gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
114
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
115
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
116
- gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty"),
117
- gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)"),
118
- gr.Radio(label="Select a model below", value="meta-llama/Llama-3.3-70B-Instruct", choices=featured_models, interactive=True, elem_id="model-radio")
119
- ],
120
- fill_height=True,
121
- chatbot=chatbot,
122
- theme="Nymbo/Nymbo_Theme",
123
- )
124
 
125
- # Add a "Custom Model" text box and "Featured Models" accordion
126
- with demo:
127
- with gr.Tab("Model Settings"):
128
  with gr.Row():
129
  with gr.Column():
130
- # Textbox for custom model input
131
- custom_model = gr.Textbox(label="Custom Model", info="Hugging Face model path (optional)", placeholder="username/model-name")
132
- # Accordion for selecting featured models
 
 
133
  with gr.Accordion("Featured Models", open=True):
134
- # Textbox for searching models
135
- model_search = gr.Textbox(label="Filter Models", placeholder="Search for a featured model...", lines=1, elem_id="model-search-input")
136
- # Radio buttons to select the desired model
137
- model_radio = gr.Radio(label="Select a model below", value="meta-llama/Llama-3.3-70B-Instruct", choices=featured_models, interactive=True, elem_id="model-radio")
138
- # Update model list when search box is used
139
- model_search.change(filter_models, inputs=model_search, outputs=model_radio)
140
-
141
- # Add an "Information" tab with accordions
142
- with gr.Tab("Information"):
 
 
 
 
 
 
 
 
143
  with gr.Row():
144
- # Accordion for "Featured Models" with a table
145
- with gr.Accordion("Featured Models (WiP)", open=False):
146
- gr.HTML(
147
- """
148
- <p><a href="https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending">See all available models</a></p>
 
 
 
 
149
  <table style="width:100%; text-align:center; margin:auto;">
150
  <tr>
151
  <th>Model Name</th>
152
- <th>Typical Use Case</th>
153
- <th>Notes</th>
154
  </tr>
155
  <tr>
156
  <td>meta-llama/Llama-3.3-70B-Instruct</td>
157
- <td>General-purpose instruction following</td>
158
- <td>High-quality, large-scale model</td>
159
  </tr>
160
  <tr>
161
- <td>gpt-3.5-turbo</td>
162
- <td>Chat and general text generation</td>
163
- <td>Fast and efficient</td>
164
  </tr>
165
  <tr>
166
- <td>gpt-4</td>
167
- <td>Advanced text generation</td>
168
- <td>State-of-the-art performance</td>
169
- </tr>
170
- <tr>
171
- <td>mistralai/Mistral-7B-Instruct-v0.1</td>
172
- <td>Instruction following</td>
173
- <td>Lightweight and efficient</td>
174
- </tr>
175
- <tr>
176
- <td>tiiuae/falcon-40b-instruct</td>
177
- <td>Instruction following</td>
178
- <td>High-quality, large-scale model</td>
179
  </tr>
180
  </table>
181
  """
182
- )
183
-
184
- # Accordion for "Parameters Overview" with markdown
185
- with gr.Accordion("Parameters Overview", open=False):
186
- gr.Markdown(
187
  """
188
  ## System Message
189
- ###### This is the initial prompt that sets the behavior of the model. It can be used to define the tone, style, or role of the assistant.
190
 
191
- ## Max Tokens
192
- ###### This controls the maximum length of the generated response. Higher values allow for longer responses but may take more time to generate.
193
 
194
  ## Temperature
195
- ###### This controls the randomness of the output. Lower values make the model more deterministic, while higher values make it more creative.
196
 
197
  ## Top-P
198
- ###### This controls the diversity of the output by limiting the model to the most likely tokens. Lower values make the output more focused, while higher values allow for more diversity.
199
 
200
  ## Frequency Penalty
201
- ###### This penalizes repeated tokens in the output. Higher values discourage repetition, while lower values allow for more repetitive outputs.
202
 
203
  ## Seed
204
- ###### This sets a fixed seed for reproducibility. A value of -1 means the seed is random.
205
-
206
- ## Model
207
- ###### This selects the model used for text generation. You can choose from featured models or specify a custom model.
208
  """
209
- )
210
-
211
- print("Gradio interface initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
  if __name__ == "__main__":
214
  print("Launching the demo application.")
 
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
+ model_selection,
26
+ custom_model
27
  ):
28
  """
29
+ This function handles the chatbot response.
 
 
 
 
 
 
 
 
 
30
  """
31
+ selected_model = custom_model if custom_model.strip() != "" else model_selection
32
+ print(f"Selected model: {selected_model}")
33
 
 
 
 
 
 
 
 
34
  if seed == -1:
35
  seed = None
36
 
 
37
  messages = [{"role": "system", "content": system_message}]
 
 
38
  for val in history:
39
+ if val[0]:
40
+ messages.append({"role": "user", "content": val[0]})
41
+ if val[1]:
42
+ messages.append({"role": "assistant", "content": val[1]})
 
 
 
 
 
 
43
  messages.append({"role": "user", "content": message})
44
 
 
45
  response = ""
 
 
 
46
  for message_chunk in client.chat.completions.create(
47
+ model=selected_model,
48
  max_tokens=max_tokens,
49
+ stream=True,
50
  temperature=temperature,
51
  top_p=top_p,
52
  frequency_penalty=frequency_penalty,
53
  seed=seed,
54
  messages=messages,
55
  ):
 
56
  token_text = message_chunk.choices[0].delta.content
 
57
  response += token_text
58
  yield response
59
 
 
 
60
  # Create a Chatbot component with a specified height
61
  chatbot = gr.Chatbot(height=600)
 
62
 
63
+ # Define placeholder models
64
  featured_models = [
65
  "meta-llama/Llama-3.3-70B-Instruct",
66
+ "gpt2",
67
+ "bert-base-uncased",
68
+ "facebook/bart-base",
69
+ "google/flan-t5-base"
70
  ]
71
 
 
 
 
 
 
72
  # Create the Gradio ChatInterface
73
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
74
+ gr.Markdown("# Serverless Text Generation Hub")
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ with gr.Tab("Basic Settings"):
 
 
77
  with gr.Row():
78
  with gr.Column():
79
+ # Textbox for system message
80
+ system_message = gr.Textbox(value="", label="System message")
81
+ with gr.Row():
82
+ with gr.Column():
83
+ # Model selection
84
  with gr.Accordion("Featured Models", open=True):
85
+ model_search = gr.Textbox(label="Filter Models", placeholder="Search for a featured model...")
86
+ model = gr.Radio(label="Select a model", choices=featured_models, value="meta-llama/Llama-3.3-70B-Instruct")
87
+
88
+ def filter_models(search_term):
89
+ filtered_models = [m for m in featured_models if search_term.lower() in m.lower()]
90
+ return gr.update(choices=filtered_models)
91
+
92
+ model_search.change(filter_models, inputs=model_search, outputs=model)
93
+ with gr.Row():
94
+ with gr.Column():
95
+ # Custom model input
96
+ custom_model = gr.Textbox(label="Custom Model", placeholder="Enter a custom model name")
97
+
98
+ with gr.Tab("Advanced Settings"):
99
+ with gr.Row():
100
+ max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
101
+ temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
102
  with gr.Row():
103
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
104
+ frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
105
+ with gr.Row():
106
+ seed = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
107
+
108
+ with gr.Tab("Information"):
109
+ with gr.Accordion("Featured Models", open=False):
110
+ gr.Markdown(
111
+ """
112
  <table style="width:100%; text-align:center; margin:auto;">
113
  <tr>
114
  <th>Model Name</th>
115
+ <th>Description</th>
 
116
  </tr>
117
  <tr>
118
  <td>meta-llama/Llama-3.3-70B-Instruct</td>
119
+ <td>Highly capable Llama model</td>
 
120
  </tr>
121
  <tr>
122
+ <td>gpt2</td>
123
+ <td>Generative Pre-trained Transformer 2</td>
 
124
  </tr>
125
  <tr>
126
+ <td>bert-base-uncased</td>
127
+ <td>Bidirectional Encoder Representations from Transformers</td>
 
 
 
 
 
 
 
 
 
 
 
128
  </tr>
129
  </table>
130
  """
131
+ )
132
+ with gr.Accordion("Parameters Overview", open=False):
133
+ gr.Markdown(
 
 
134
  """
135
  ## System Message
136
+ ###### Sets the behavior and tone of the assistant.
137
 
138
+ ## Max New Tokens
139
+ ###### Determines the maximum length of the response.
140
 
141
  ## Temperature
142
+ ###### Controls the randomness of the output. Lower values make the output more deterministic.
143
 
144
  ## Top-P
145
+ ###### Used for nucleus sampling. Higher values include more tokens in consideration.
146
 
147
  ## Frequency Penalty
148
+ ###### Penalizes the model for repeating the same tokens.
149
 
150
  ## Seed
151
+ ###### Ensures reproducibility of results.
 
 
 
152
  """
153
+ )
154
+
155
+ # Chat interface
156
+ demo = gr.ChatInterface(
157
+ respond,
158
+ additional_inputs=[
159
+ system_message,
160
+ max_tokens,
161
+ temperature,
162
+ top_p,
163
+ frequency_penalty,
164
+ seed,
165
+ model,
166
+ custom_model
167
+ ],
168
+ chatbot=chatbot,
169
+ theme="Nymbo/Nymbo_Theme"
170
+ )
171
 
172
  if __name__ == "__main__":
173
  print("Launching the demo application.")