Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on 22 days ago

Commit

9b9dccd

verified ·

1 Parent(s): ce12e24

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -171

app.py CHANGED Viewed

@@ -22,8 +22,7 @@ def respond(
     top_p,
     frequency_penalty,
     seed,
-    custom_model,
-    featured_model
 ):
     """
     This function handles the chatbot response. It takes in:
@@ -36,7 +35,6 @@ def respond(
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
     - custom_model: the user-provided custom model name (if any)
-    - featured_model: the model selected from the "Featured Models" radio
     """
     print(f"Received message: {message}")
@@ -45,7 +43,6 @@ def respond(
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
-    print(f"Featured model: {featured_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
@@ -68,15 +65,8 @@ def respond(
     # Append the latest user message
     messages.append({"role": "user", "content": message})
-    # Determine which model to use
-    # If custom_model is provided, that overrides everything.
-    # Otherwise, use the selected featured_model.
-    # If featured_model is empty, fall back on the default.
-    if custom_model.strip() != "":
-        model_to_use = custom_model.strip()
-    else:
-        model_to_use = featured_model.strip() if featured_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
     # Start with an empty string to build the response as tokens stream in
@@ -85,9 +75,9 @@ def respond(
     # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
-        model=model_to_use,
         max_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
@@ -98,6 +88,7 @@ def respond(
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
         yield response
     print("Completed response generation.")
@@ -106,162 +97,57 @@ def respond(
 chatbot = gr.Chatbot(height=600)
 print("Chatbot interface created.")
-####################################
-#           GRADIO UI SETUP        #
-####################################
-# 1) We'll create a set of placeholder featured models.
-all_featured_models = [
-    "meta-llama/Llama-2-7B-Chat-hf",
-    "meta-llama/Llama-2-13B-Chat-hf",
-    "bigscience/bloom",
-    "google/flan-t5-xxl",
-    "meta-llama/Llama-3.3-70B-Instruct"
-]
-def filter_featured_models(search_term):
-    """
-    Helper function to filter featured models by search text.
-    """
-    filtered = [m for m in all_featured_models if search_term.lower() in m.lower()]
-    # We'll return an update with the filtered list
-    return gr.update(choices=filtered)
-# 2) Create the ChatInterface with additional inputs
-with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
-    gr.Markdown("# Serverless Text Generation Hub")
-    # We'll organize content in tabs similar to the ImgGen-Hub
-    with gr.Tab("Chat"):
-        gr.Markdown("## Chat Interface")
-        chat_interface = gr.ChatInterface(
-            fn=respond,
-            additional_inputs=[
-                gr.Textbox(value="", label="System message"),
-                gr.Slider(
-                    minimum=1,
-                    maximum=4096,
-                    value=512,
-                    step=1,
-                    label="Max new tokens"
-                ),
-                gr.Slider(
-                    minimum=0.1,
-                    maximum=4.0,
-                    value=0.7,
-                    step=0.1,
-                    label="Temperature"
-                ),
-                gr.Slider(
-                    minimum=0.1,
-                    maximum=1.0,
-                    value=0.95,
-                    step=0.05,
-                    label="Top-P"
-                ),
-                gr.Slider(
-                    minimum=-2.0,
-                    maximum=2.0,
-                    value=0.0,
-                    step=0.1,
-                    label="Frequency Penalty"
-                ),
-                gr.Slider(
-                    minimum=-1,
-                    maximum=65535,
-                    value=-1,
-                    step=1,
-                    label="Seed (-1 for random)"
-                ),
-                gr.Textbox(
-                    value="",
-                    label="Custom Model",
-                    info="(Optional) Provide a custom Hugging Face model path. This overrides the featured model if not empty."
-                ),
-            ],
-            fill_height=True,
-            chatbot=chatbot
-        )
-        # We'll add a new accordion for "Featured Models" within the Chat tab
-        with gr.Accordion("Featured Models", open=True):
-            gr.Markdown("Pick one of the placeholder featured models below, or search for more.")
-            featured_model_search = gr.Textbox(
-                label="Filter Models",
-                placeholder="Type to filter featured models..."
-            )
-            featured_model_radio = gr.Radio(
-                label="Select a featured model",
-                choices=all_featured_models,
-                value="meta-llama/Llama-3.3-70B-Instruct"
-            )
-            # Connect the search box to the filter function
-            featured_model_search.change(
-                filter_featured_models,
-                inputs=featured_model_search,
-                outputs=featured_model_radio
-            )
-            # We must connect the featured_model_radio to the chat interface
-            # We'll pass it as the last argument in the respond function.
-            chat_interface.add_variable(featured_model_radio, "featured_model")
-    # 3) Create the "Information" tab, containing:
-    #    - A "Featured Models" accordion with a table
-    #    - A "Parameters Overview" accordion with markdown
-    with gr.Tab("Information"):
-        gr.Markdown("## Additional Information and Help")
-        with gr.Accordion("Featured Models (Table)", open=False):
-            gr.Markdown("""
-            Here is a table of some placeholder featured models:
-            <table style="width:100%; text-align:center; margin:auto;">
-                <tr>
-                    <th>Model</th>
-                    <th>Description</th>
-                </tr>
-                <tr>
-                    <td>meta-llama/Llama-2-7B-Chat-hf</td>
-                    <td>A 7B parameter Llama 2 Chat model</td>
-                </tr>
-                <tr>
-                    <td>meta-llama/Llama-2-13B-Chat-hf</td>
-                    <td>A 13B parameter Llama 2 Chat model</td>
-                </tr>
-                <tr>
-                    <td>bigscience/bloom</td>
-                    <td>Large-scale multilingual model</td>
-                </tr>
-                <tr>
-                    <td>google/flan-t5-xxl</td>
-                    <td>A large instruction-tuned T5 model</td>
-                </tr>
-                <tr>
-                    <td>meta-llama/Llama-3.3-70B-Instruct</td>
-                    <td>70B parameter Llama 3.3 instruct model</td>
-                </tr>
-            </table>
-            """)
-        with gr.Accordion("Parameters Overview", open=False):
-            gr.Markdown("""
-            **Here’s a quick breakdown of the main parameters you’ll find in this interface:**
-            - **Max New Tokens**: This controls the maximum number of tokens (words or subwords) in the generated response.
-            - **Temperature**: Adjusts how 'creative' or random the model's output is. A low temperature keeps it more predictable; a high temperature makes it more varied or 'wacky.'
-            - **Top-P**: Also known as nucleus sampling. Controls how the model decides which words to include. Lower means more conservative, higher means more open.
-            - **Frequency Penalty**: A value to penalize repeated words or phrases. Higher penalty means the model will avoid repeating itself.
-            - **Seed**: Fix a random seed for reproducibility. If set to -1, a random seed is used each time.
-            - **Custom Model**: Provide the full Hugging Face model path (like `bigscience/bloom`) if you'd like to override the default or the featured model you selected above.
-            ### Usage Tips
-            1. If you’d like to use one of the featured models, simply select it from the list in the **Featured Models** accordion.
-            2. If you’d like to override the featured models, type your own custom path in **Custom Model**.
-            3. Adjust your parameters (temperature, top-p, etc.) if you want different styles of results.
-            4. You can provide a **System message** to guide the overall behavior or 'role' of the AI. For example, you can say "You are a helpful coding assistant" or something else to set the context.
-            Feel free to play around with these settings, and if you have any questions, check out the Hugging Face docs or ask in the community spaces!
-            """)
 print("Gradio interface initialized.")
 if __name__ == "__main__":

     top_p,
     frequency_penalty,
     seed,
+    custom_model
 ):
     """
     This function handles the chatbot response. It takes in:
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
     - custom_model: the user-provided custom model name (if any)
     """
     print(f"Received message: {message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
     # Append the latest user message
     messages.append({"role": "user", "content": message})
+    # Determine which model to use: either custom_model or a default
+    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
     # Start with an empty string to build the response as tokens stream in
     # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
+        model=model_to_use,              # Use either the user-provided custom model or default
         max_tokens=max_tokens,
+        stream=True,                     # Stream the response
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
+        # Yield the partial response to Gradio so it can display in real-time
         yield response
     print("Completed response generation.")
 chatbot = gr.Chatbot(height=600)
 print("Chatbot interface created.")
+# Create the Gradio ChatInterface
+# We add two new sliders for Frequency Penalty, Seed, and now a new "Custom Model" text box.
+demo = gr.ChatInterface(
+    fn=respond,
+    additional_inputs=[
+        gr.Textbox(value="", label="System message"),
+        gr.Slider(
+            minimum=1,
+            maximum=4096,
+            value=512,
+            step=1,
+            label="Max new tokens"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=4.0,
+            value=0.7,
+            step=0.1,
+            label="Temperature"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-P"
+        ),
+        gr.Slider(
+            minimum=-2.0,
+            maximum=2.0,
+            value=0.0,
+            step=0.1,
+            label="Frequency Penalty"
+        ),
+        gr.Slider(
+            minimum=-1,
+            maximum=65535,  # Arbitrary upper limit for demonstration
+            value=-1,
+            step=1,
+            label="Seed (-1 for random)"
+        ),
+        gr.Textbox(
+            value="",
+            label="Custom Model",
+            info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty."
+        ),
+    ],
+    fill_height=True,
+    chatbot=chatbot,
+    theme="Nymbo/Nymbo_Theme",
+)
 print("Gradio interface initialized.")
 if __name__ == "__main__":