Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on 9 days ago

Commit

7255410

verified ·

1 Parent(s): 775feaf

reverting

Browse files

Files changed (1) hide show

app.py +44 -46

app.py CHANGED Viewed

@@ -22,19 +22,27 @@ def respond(
     top_p,
     frequency_penalty,
     seed,
-    custom_model,
-    selected_model
 ):
     """
-    Handles the chatbot response generation.
     """
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
-    print(f"Selected model: {selected_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
@@ -57,12 +65,8 @@ def respond(
     # Append the latest user message
     messages.append({"role": "user", "content": message})
-    # Determine which model to use
-    model_to_use = (
-        custom_model.strip()
-        if custom_model.strip() != ""
-        else selected_model.strip()
-    )
     print(f"Model selected for inference: {model_to_use}")
     # Start with an empty string to build the response as tokens stream in
@@ -71,9 +75,9 @@ def respond(
     # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
-        model=model_to_use,
         max_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
@@ -84,36 +88,42 @@ def respond(
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
         yield response
     print("Completed response generation.")
-# Predefined list of placeholder models for the Featured Models accordion
-models_list = [
-    "meta-llama/Llama-3.3-70B-Instruct",
-    "microsoft/Phi-3.5-mini-instruct",
-    "mistralai/Mistral-7B-Instruct-v0.3",
-    "Qwen/Qwen2.5-72B-Instruct",
-]
-# Function to filter models based on search input
-def filter_models(search_term):
-    filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
-    return gr.update(choices=filtered_models)
 # Create a Chatbot component with a specified height
 chatbot = gr.Chatbot(height=600)
 print("Chatbot interface created.")
 # Create the Gradio ChatInterface
-# Added "Featured Models" accordion and integrated filtering
-demo = gr.Interface(
     fn=respond,
-    inputs=[
         gr.Textbox(value="", label="System message"),
-        gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
         gr.Slider(
             minimum=-2.0,
             maximum=2.0,
@@ -131,25 +141,13 @@ demo = gr.Interface(
         gr.Textbox(
             value="",
             label="Custom Model",
-            info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty.",
         ),
-        # Add Featured Models accordion
-        gr.Accordion("Featured Models", open=True, children=[
-            gr.Textbox(label="Filter Models", placeholder="Search for a featured model...", lines=1).change(
-                filter_models, inputs=["value"], outputs="choices"
-            ),
-            gr.Radio(
-                label="Select a featured model",
-                value="meta-llama/Llama-3.3-70B-Instruct",
-                choices=models_list,
-                elem_id="model-radio",
-            )
-        ]),
     ],
-    outputs=gr.Chatbot(height=600),
     theme="Nymbo/Nymbo_Theme",
 )
 print("Gradio interface initialized.")
 if __name__ == "__main__":

     top_p,
     frequency_penalty,
     seed,
+    custom_model
 ):
     """
+    This function handles the chatbot response. It takes in:
+    - message: the user's new message
+    - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
+    - system_message: the system prompt
+    - max_tokens: the maximum number of tokens to generate in the response
+    - temperature: sampling temperature
+    - top_p: top-p (nucleus) sampling
+    - frequency_penalty: penalize repeated tokens in the output
+    - seed: a fixed seed for reproducibility; -1 will mean 'random'
+    - custom_model: the user-provided custom model name (if any)
     """
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
     # Append the latest user message
     messages.append({"role": "user", "content": message})
+    # Determine which model to use: either custom_model or a default
+    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
     # Start with an empty string to build the response as tokens stream in
     # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
+        model=model_to_use,              # Use either the user-provided custom model or default
         max_tokens=max_tokens,
+        stream=True,                     # Stream the response
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
+        # Yield the partial response to Gradio so it can display in real-time
         yield response
     print("Completed response generation.")
 # Create a Chatbot component with a specified height
 chatbot = gr.Chatbot(height=600)
 print("Chatbot interface created.")
 # Create the Gradio ChatInterface
+# We add two new sliders for Frequency Penalty, Seed, and now a new "Custom Model" text box.
+demo = gr.ChatInterface(
     fn=respond,
+    additional_inputs=[
         gr.Textbox(value="", label="System message"),
+        gr.Slider(
+            minimum=1,
+            maximum=4096,
+            value=512,
+            step=1,
+            label="Max new tokens"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=4.0,
+            value=0.7,
+            step=0.1,
+            label="Temperature"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-P"
+        ),
         gr.Slider(
             minimum=-2.0,
             maximum=2.0,
         gr.Textbox(
             value="",
             label="Custom Model",
+            info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty."
         ),
     ],
+    fill_height=True,
+    chatbot=chatbot,
     theme="Nymbo/Nymbo_Theme",
 )
 print("Gradio interface initialized.")
 if __name__ == "__main__":