Nymbo commited on
Commit
7255410
·
verified ·
1 Parent(s): 775feaf
Files changed (1) hide show
  1. app.py +44 -46
app.py CHANGED
@@ -22,19 +22,27 @@ def respond(
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
- custom_model,
26
- selected_model
27
  ):
28
  """
29
- Handles the chatbot response generation.
 
 
 
 
 
 
 
 
 
30
  """
 
31
  print(f"Received message: {message}")
32
  print(f"History: {history}")
33
  print(f"System message: {system_message}")
34
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
35
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
36
  print(f"Custom model: {custom_model}")
37
- print(f"Selected model: {selected_model}")
38
 
39
  # Convert seed to None if -1 (meaning random)
40
  if seed == -1:
@@ -57,12 +65,8 @@ def respond(
57
  # Append the latest user message
58
  messages.append({"role": "user", "content": message})
59
 
60
- # Determine which model to use
61
- model_to_use = (
62
- custom_model.strip()
63
- if custom_model.strip() != ""
64
- else selected_model.strip()
65
- )
66
  print(f"Model selected for inference: {model_to_use}")
67
 
68
  # Start with an empty string to build the response as tokens stream in
@@ -71,9 +75,9 @@ def respond(
71
 
72
  # Make the streaming request to the HF Inference API via openai-like client
73
  for message_chunk in client.chat.completions.create(
74
- model=model_to_use,
75
  max_tokens=max_tokens,
76
- stream=True,
77
  temperature=temperature,
78
  top_p=top_p,
79
  frequency_penalty=frequency_penalty,
@@ -84,36 +88,42 @@ def respond(
84
  token_text = message_chunk.choices[0].delta.content
85
  print(f"Received token: {token_text}")
86
  response += token_text
 
87
  yield response
88
 
89
  print("Completed response generation.")
90
 
91
- # Predefined list of placeholder models for the Featured Models accordion
92
- models_list = [
93
- "meta-llama/Llama-3.3-70B-Instruct",
94
- "microsoft/Phi-3.5-mini-instruct",
95
- "mistralai/Mistral-7B-Instruct-v0.3",
96
- "Qwen/Qwen2.5-72B-Instruct",
97
- ]
98
-
99
- # Function to filter models based on search input
100
- def filter_models(search_term):
101
- filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
102
- return gr.update(choices=filtered_models)
103
-
104
  # Create a Chatbot component with a specified height
105
  chatbot = gr.Chatbot(height=600)
106
  print("Chatbot interface created.")
107
 
108
  # Create the Gradio ChatInterface
109
- # Added "Featured Models" accordion and integrated filtering
110
- demo = gr.Interface(
111
  fn=respond,
112
- inputs=[
113
  gr.Textbox(value="", label="System message"),
114
- gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
115
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
116
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  gr.Slider(
118
  minimum=-2.0,
119
  maximum=2.0,
@@ -131,25 +141,13 @@ demo = gr.Interface(
131
  gr.Textbox(
132
  value="",
133
  label="Custom Model",
134
- info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty.",
135
  ),
136
- # Add Featured Models accordion
137
- gr.Accordion("Featured Models", open=True, children=[
138
- gr.Textbox(label="Filter Models", placeholder="Search for a featured model...", lines=1).change(
139
- filter_models, inputs=["value"], outputs="choices"
140
- ),
141
- gr.Radio(
142
- label="Select a featured model",
143
- value="meta-llama/Llama-3.3-70B-Instruct",
144
- choices=models_list,
145
- elem_id="model-radio",
146
- )
147
- ]),
148
  ],
149
- outputs=gr.Chatbot(height=600),
 
150
  theme="Nymbo/Nymbo_Theme",
151
  )
152
-
153
  print("Gradio interface initialized.")
154
 
155
  if __name__ == "__main__":
 
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
+ custom_model
 
26
  ):
27
  """
28
+ This function handles the chatbot response. It takes in:
29
+ - message: the user's new message
30
+ - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
31
+ - system_message: the system prompt
32
+ - max_tokens: the maximum number of tokens to generate in the response
33
+ - temperature: sampling temperature
34
+ - top_p: top-p (nucleus) sampling
35
+ - frequency_penalty: penalize repeated tokens in the output
36
+ - seed: a fixed seed for reproducibility; -1 will mean 'random'
37
+ - custom_model: the user-provided custom model name (if any)
38
  """
39
+
40
  print(f"Received message: {message}")
41
  print(f"History: {history}")
42
  print(f"System message: {system_message}")
43
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
44
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
45
  print(f"Custom model: {custom_model}")
 
46
 
47
  # Convert seed to None if -1 (meaning random)
48
  if seed == -1:
 
65
  # Append the latest user message
66
  messages.append({"role": "user", "content": message})
67
 
68
+ # Determine which model to use: either custom_model or a default
69
+ model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
 
 
 
 
70
  print(f"Model selected for inference: {model_to_use}")
71
 
72
  # Start with an empty string to build the response as tokens stream in
 
75
 
76
  # Make the streaming request to the HF Inference API via openai-like client
77
  for message_chunk in client.chat.completions.create(
78
+ model=model_to_use, # Use either the user-provided custom model or default
79
  max_tokens=max_tokens,
80
+ stream=True, # Stream the response
81
  temperature=temperature,
82
  top_p=top_p,
83
  frequency_penalty=frequency_penalty,
 
88
  token_text = message_chunk.choices[0].delta.content
89
  print(f"Received token: {token_text}")
90
  response += token_text
91
+ # Yield the partial response to Gradio so it can display in real-time
92
  yield response
93
 
94
  print("Completed response generation.")
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  # Create a Chatbot component with a specified height
97
  chatbot = gr.Chatbot(height=600)
98
  print("Chatbot interface created.")
99
 
100
  # Create the Gradio ChatInterface
101
+ # We add two new sliders for Frequency Penalty, Seed, and now a new "Custom Model" text box.
102
+ demo = gr.ChatInterface(
103
  fn=respond,
104
+ additional_inputs=[
105
  gr.Textbox(value="", label="System message"),
106
+ gr.Slider(
107
+ minimum=1,
108
+ maximum=4096,
109
+ value=512,
110
+ step=1,
111
+ label="Max new tokens"
112
+ ),
113
+ gr.Slider(
114
+ minimum=0.1,
115
+ maximum=4.0,
116
+ value=0.7,
117
+ step=0.1,
118
+ label="Temperature"
119
+ ),
120
+ gr.Slider(
121
+ minimum=0.1,
122
+ maximum=1.0,
123
+ value=0.95,
124
+ step=0.05,
125
+ label="Top-P"
126
+ ),
127
  gr.Slider(
128
  minimum=-2.0,
129
  maximum=2.0,
 
141
  gr.Textbox(
142
  value="",
143
  label="Custom Model",
144
+ info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty."
145
  ),
 
 
 
 
 
 
 
 
 
 
 
 
146
  ],
147
+ fill_height=True,
148
+ chatbot=chatbot,
149
  theme="Nymbo/Nymbo_Theme",
150
  )
 
151
  print("Gradio interface initialized.")
152
 
153
  if __name__ == "__main__":