Nymbo commited on
Commit
f7c4208
·
verified ·
1 Parent(s): 7d3730f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -91
app.py CHANGED
@@ -22,109 +22,164 @@ def respond(
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
- model_selection,
26
  custom_model
27
  ):
28
  """
29
- This function handles the chatbot response.
 
 
 
 
 
 
 
 
 
 
30
  """
31
- selected_model = custom_model if custom_model.strip() != "" else model_selection
32
- print(f"Selected model: {selected_model}")
33
 
 
 
 
 
 
 
 
 
34
  if seed == -1:
35
  seed = None
36
 
 
 
 
 
 
 
 
37
  messages = [{"role": "system", "content": system_message}]
 
 
38
  for val in history:
39
- if val[0]:
40
- messages.append({"role": "user", "content": val[0]})
41
- if val[1]:
42
- messages.append({"role": "assistant", "content": val[1]})
 
 
 
 
 
 
43
  messages.append({"role": "user", "content": message})
44
 
 
45
  response = ""
 
 
 
46
  for message_chunk in client.chat.completions.create(
47
- model=selected_model,
48
  max_tokens=max_tokens,
49
- stream=True,
50
  temperature=temperature,
51
  top_p=top_p,
52
  frequency_penalty=frequency_penalty,
53
  seed=seed,
54
  messages=messages,
55
  ):
 
56
  token_text = message_chunk.choices[0].delta.content
 
57
  response += token_text
58
  yield response
59
 
 
 
60
  # Create a Chatbot component with a specified height
61
  chatbot = gr.Chatbot(height=600)
 
62
 
63
- # Define placeholder models
64
- featured_models = [
65
  "meta-llama/Llama-3.3-70B-Instruct",
66
- "gpt2",
67
- "bert-base-uncased",
68
- "facebook/bart-base",
69
- "google/flan-t5-base"
70
  ]
71
 
 
 
 
 
 
72
  # Create the Gradio ChatInterface
73
- with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
74
- gr.Markdown("# Serverless Text Generation Hub")
75
-
76
- with gr.Tab("Basic Settings"):
77
- with gr.Row():
78
- with gr.Column():
79
- # Textbox for system message
80
- system_message = gr.Textbox(value="", label="System message")
81
- with gr.Row():
82
- with gr.Column():
83
- # Model selection
84
- with gr.Accordion("Featured Models", open=True):
85
- model_search = gr.Textbox(label="Filter Models", placeholder="Search for a featured model...")
86
- model = gr.Radio(label="Select a model", choices=featured_models, value="meta-llama/Llama-3.3-70B-Instruct")
87
-
88
- def filter_models(search_term):
89
- filtered_models = [m for m in featured_models if search_term.lower() in m.lower()]
90
- return gr.update(choices=filtered_models)
91
-
92
- model_search.change(filter_models, inputs=model_search, outputs=model)
93
- with gr.Row():
94
- with gr.Column():
95
- # Custom model input
96
- custom_model = gr.Textbox(label="Custom Model", placeholder="Enter a custom model name")
97
-
98
- with gr.Tab("Advanced Settings"):
99
- with gr.Row():
100
- max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
101
- temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
102
- with gr.Row():
103
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
104
- frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
105
- with gr.Row():
106
- seed = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
 
 
107
 
 
 
 
 
 
108
  with gr.Tab("Information"):
109
  with gr.Accordion("Featured Models", open=False):
110
- gr.Markdown(
111
  """
 
112
  <table style="width:100%; text-align:center; margin:auto;">
113
  <tr>
114
  <th>Model Name</th>
115
- <th>Description</th>
 
116
  </tr>
117
  <tr>
118
- <td>meta-llama/Llama-3.3-70B-Instruct</td>
119
- <td>Highly capable Llama model</td>
 
120
  </tr>
121
  <tr>
122
- <td>gpt2</td>
123
- <td>Generative Pre-trained Transformer 2</td>
 
124
  </tr>
125
  <tr>
126
- <td>bert-base-uncased</td>
127
- <td>Bidirectional Encoder Representations from Transformers</td>
 
128
  </tr>
129
  </table>
130
  """
@@ -132,43 +187,23 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
132
  with gr.Accordion("Parameters Overview", open=False):
133
  gr.Markdown(
134
  """
135
- ## System Message
136
- ###### Sets the behavior and tone of the assistant.
137
-
138
- ## Max New Tokens
139
- ###### Determines the maximum length of the response.
140
 
141
- ## Temperature
142
- ###### Controls the randomness of the output. Lower values make the output more deterministic.
143
 
144
- ## Top-P
145
- ###### Used for nucleus sampling. Higher values include more tokens in consideration.
146
 
147
- ## Frequency Penalty
148
- ###### Penalizes the model for repeating the same tokens.
149
 
150
- ## Seed
151
- ###### Ensures reproducibility of results.
152
  """
153
  )
154
 
155
- # Chat interface
156
- demo = gr.ChatInterface(
157
- respond,
158
- additional_inputs=[
159
- system_message,
160
- max_tokens,
161
- temperature,
162
- top_p,
163
- frequency_penalty,
164
- seed,
165
- model,
166
- custom_model
167
- ],
168
- chatbot=chatbot,
169
- theme="Nymbo/Nymbo_Theme"
170
- )
171
-
172
- if __name__ == "__main__":
173
- print("Launching the demo application.")
174
- demo.launch()
 
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
+ model,
26
  custom_model
27
  ):
28
  """
29
+ This function handles the chatbot response. It takes in:
30
+ - message: the user's new message
31
+ - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
32
+ - system_message: the system prompt
33
+ - max_tokens: the maximum number of tokens to generate in the response
34
+ - temperature: sampling temperature
35
+ - top_p: top-p (nucleus) sampling
36
+ - frequency_penalty: penalize repeated tokens in the output
37
+ - seed: a fixed seed for reproducibility; -1 will mean 'random'
38
+ - model: the selected model
39
+ - custom_model: a custom model provided by the user
40
  """
 
 
41
 
42
+ print(f"Received message: {message}")
43
+ print(f"History: {history}")
44
+ print(f"System message: {system_message}")
45
+ print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
46
+ print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
47
+ print(f"Model: {model}, Custom Model: {custom_model}")
48
+
49
+ # Convert seed to None if -1 (meaning random)
50
  if seed == -1:
51
  seed = None
52
 
53
+ # Use custom model if provided, otherwise use selected model
54
+ if custom_model.strip() != "":
55
+ model_to_use = custom_model.strip()
56
+ else:
57
+ model_to_use = model
58
+
59
+ # Construct the messages array required by the API
60
  messages = [{"role": "system", "content": system_message}]
61
+
62
+ # Add conversation history to the context
63
  for val in history:
64
+ user_part = val[0]
65
+ assistant_part = val[1]
66
+ if user_part:
67
+ messages.append({"role": "user", "content": user_part})
68
+ print(f"Added user message to context: {user_part}")
69
+ if assistant_part:
70
+ messages.append({"role": "assistant", "content": assistant_part})
71
+ print(f"Added assistant message to context: {assistant_part}")
72
+
73
+ # Append the latest user message
74
  messages.append({"role": "user", "content": message})
75
 
76
+ # Start with an empty string to build the response as tokens stream in
77
  response = ""
78
+ print("Sending request to OpenAI API.")
79
+
80
+ # Make the streaming request to the HF Inference API via openai-like client
81
  for message_chunk in client.chat.completions.create(
82
+ model=model_to_use, # Use the selected or custom model
83
  max_tokens=max_tokens,
84
+ stream=True, # Stream the response
85
  temperature=temperature,
86
  top_p=top_p,
87
  frequency_penalty=frequency_penalty,
88
  seed=seed,
89
  messages=messages,
90
  ):
91
+ # Extract the token text from the response chunk
92
  token_text = message_chunk.choices[0].delta.content
93
+ print(f"Received token: {token_text}")
94
  response += token_text
95
  yield response
96
 
97
+ print("Completed response generation.")
98
+
99
  # Create a Chatbot component with a specified height
100
  chatbot = gr.Chatbot(height=600)
101
+ print("Chatbot interface created.")
102
 
103
+ # List of placeholder models for demonstration
104
+ models_list = [
105
  "meta-llama/Llama-3.3-70B-Instruct",
106
+ "meta-llama/Llama-2-70B-chat",
107
+ "google/flan-t5-xl"
 
 
108
  ]
109
 
110
+ # Function to filter models based on search input
111
+ def filter_models(search_term):
112
+ filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
113
+ return gr.update(choices=filtered_models)
114
+
115
  # Create the Gradio ChatInterface
116
+ # Adding additional fields for model selection and parameters
117
+ demo = gr.ChatInterface(
118
+ respond,
119
+ additional_inputs=[
120
+ gr.Textbox(value="", label="System message"),
121
+ gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
122
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
123
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
124
+ gr.Slider(
125
+ minimum=-2.0,
126
+ maximum=2.0,
127
+ value=0.0,
128
+ step=0.1,
129
+ label="Frequency Penalty"
130
+ ),
131
+ gr.Slider(
132
+ minimum=-1,
133
+ maximum=65535, # Arbitrary upper limit for demonstration
134
+ value=-1,
135
+ step=1,
136
+ label="Seed (-1 for random)"
137
+ ),
138
+ gr.Textbox(label="Custom Model", placeholder="Enter custom model path here"),
139
+ gr.Accordion("Featured Models", open=True).update(
140
+ gr.Column([
141
+ gr.Textbox(label="Filter Models", placeholder="Search for a featured model...").change(
142
+ filter_models, inputs="__self__", outputs="model"
143
+ ),
144
+ gr.Radio(label="Select a model below", value="meta-llama/Llama-3.3-70B-Instruct", choices=models_list, interactive=True, elem_id="model-radio")
145
+ ])
146
+ )
147
+ ],
148
+ fill_height=True,
149
+ chatbot=chatbot,
150
+ theme="Nymbo/Nymbo_Theme",
151
+ )
152
 
153
+ # Adding an "Information" tab with accordions for "Featured Models" and "Parameters Overview"
154
+ with gr.Blocks(theme='Nymbo/Nymbo_Theme') as demo:
155
+ with gr.Tab("Chat"):
156
+ gr.Markdown("## Chat with the Model")
157
+ chatbot.render()
158
  with gr.Tab("Information"):
159
  with gr.Accordion("Featured Models", open=False):
160
+ gr.HTML(
161
  """
162
+ <p><a href="https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending">See all available models</a></p>
163
  <table style="width:100%; text-align:center; margin:auto;">
164
  <tr>
165
  <th>Model Name</th>
166
+ <th>Type</th>
167
+ <th>Notes</th>
168
  </tr>
169
  <tr>
170
+ <td>Llama-3.3-70B-Instruct</td>
171
+ <td>Instruction</td>
172
+ <td>High performance</td>
173
  </tr>
174
  <tr>
175
+ <td>Llama-2-70B-chat</td>
176
+ <td>Chat</td>
177
+ <td>Conversational</td>
178
  </tr>
179
  <tr>
180
+ <td>Flan-T5-XL</td>
181
+ <td>General</td>
182
+ <td>Versatile</td>
183
  </tr>
184
  </table>
185
  """
 
187
  with gr.Accordion("Parameters Overview", open=False):
188
  gr.Markdown(
189
  """
190
+ ## Parameters Overview
191
+ ### Max new tokens
192
+ This slider controls the maximum number of tokens to generate in the response.
 
 
193
 
194
+ ### Temperature
195
+ Sampling temperature, which controls the randomness. A higher temperature makes the output more random.
196
 
197
+ ### Top-P
198
+ Top-p (nucleus) sampling, which controls the diversity. The model considers the smallest number of tokens whose cumulative probability exceeds the top-p threshold.
199
 
200
+ ### Frequency Penalty
201
+ Penalizes repeated tokens in the output, which helps to reduce repetition.
202
 
203
+ ### Seed
204
+ A fixed seed for reproducibility. Set to -1 for random seed.
205
  """
206
  )
207
 
208
+ print("Launching the demo application.")
209
+ demo.launch()