Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on 23 days ago

Commit

ce12e24

verified ·

1 Parent(s): 98674ca

featured models and info tab

Browse files

Files changed (1) hide show

app.py +171 -57

app.py CHANGED Viewed

@@ -22,7 +22,8 @@ def respond(
     top_p,
     frequency_penalty,
     seed,
-    custom_model
 ):
     """
     This function handles the chatbot response. It takes in:
@@ -35,6 +36,7 @@ def respond(
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
     - custom_model: the user-provided custom model name (if any)
     """
     print(f"Received message: {message}")
@@ -43,6 +45,7 @@ def respond(
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
@@ -65,8 +68,15 @@ def respond(
     # Append the latest user message
     messages.append({"role": "user", "content": message})
-    # Determine which model to use: either custom_model or a default
-    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
     # Start with an empty string to build the response as tokens stream in
@@ -75,9 +85,9 @@ def respond(
     # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
-        model=model_to_use,              # Use either the user-provided custom model or default
         max_tokens=max_tokens,
-        stream=True,                     # Stream the response
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
@@ -88,7 +98,6 @@ def respond(
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
-        # Yield the partial response to Gradio so it can display in real-time
         yield response
     print("Completed response generation.")
@@ -97,57 +106,162 @@ def respond(
 chatbot = gr.Chatbot(height=600)
 print("Chatbot interface created.")
-# Create the Gradio ChatInterface
-# We add two new sliders for Frequency Penalty, Seed, and now a new "Custom Model" text box.
-demo = gr.ChatInterface(
-    fn=respond,
-    additional_inputs=[
-        gr.Textbox(value="", label="System message"),
-        gr.Slider(
-            minimum=1,
-            maximum=4096,
-            value=512,
-            step=1,
-            label="Max new tokens"
-        ),
-        gr.Slider(
-            minimum=0.1,
-            maximum=4.0,
-            value=0.7,
-            step=0.1,
-            label="Temperature"
-        ),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-P"
-        ),
-        gr.Slider(
-            minimum=-2.0,
-            maximum=2.0,
-            value=0.0,
-            step=0.1,
-            label="Frequency Penalty"
-        ),
-        gr.Slider(
-            minimum=-1,
-            maximum=65535,  # Arbitrary upper limit for demonstration
-            value=-1,
-            step=1,
-            label="Seed (-1 for random)"
-        ),
-        gr.Textbox(
-            value="",
-            label="Custom Model",
-            info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty."
-        ),
-    ],
-    fill_height=True,
-    chatbot=chatbot,
-    theme="Nymbo/Nymbo_Theme",
-)
 print("Gradio interface initialized.")
 if __name__ == "__main__":

     top_p,
     frequency_penalty,
     seed,
+    custom_model,
+    featured_model
 ):
     """
     This function handles the chatbot response. It takes in:
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
     - custom_model: the user-provided custom model name (if any)
+    - featured_model: the model selected from the "Featured Models" radio
     """
     print(f"Received message: {message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
+    print(f"Featured model: {featured_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
     # Append the latest user message
     messages.append({"role": "user", "content": message})
+    # Determine which model to use
+    # If custom_model is provided, that overrides everything.
+    # Otherwise, use the selected featured_model.
+    # If featured_model is empty, fall back on the default.
+    if custom_model.strip() != "":
+        model_to_use = custom_model.strip()
+    else:
+        model_to_use = featured_model.strip() if featured_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
     # Start with an empty string to build the response as tokens stream in
     # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
+        model=model_to_use,
         max_tokens=max_tokens,
+        stream=True,
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
         yield response
     print("Completed response generation.")
 chatbot = gr.Chatbot(height=600)
 print("Chatbot interface created.")
+####################################
+#           GRADIO UI SETUP        #
+####################################
+# 1) We'll create a set of placeholder featured models.
+all_featured_models = [
+    "meta-llama/Llama-2-7B-Chat-hf",
+    "meta-llama/Llama-2-13B-Chat-hf",
+    "bigscience/bloom",
+    "google/flan-t5-xxl",
+    "meta-llama/Llama-3.3-70B-Instruct"
+]
+def filter_featured_models(search_term):
+    """
+    Helper function to filter featured models by search text.
+    """
+    filtered = [m for m in all_featured_models if search_term.lower() in m.lower()]
+    # We'll return an update with the filtered list
+    return gr.update(choices=filtered)
+# 2) Create the ChatInterface with additional inputs
+with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
+    gr.Markdown("# Serverless Text Generation Hub")
+    # We'll organize content in tabs similar to the ImgGen-Hub
+    with gr.Tab("Chat"):
+        gr.Markdown("## Chat Interface")
+        chat_interface = gr.ChatInterface(
+            fn=respond,
+            additional_inputs=[
+                gr.Textbox(value="", label="System message"),
+                gr.Slider(
+                    minimum=1,
+                    maximum=4096,
+                    value=512,
+                    step=1,
+                    label="Max new tokens"
+                ),
+                gr.Slider(
+                    minimum=0.1,
+                    maximum=4.0,
+                    value=0.7,
+                    step=0.1,
+                    label="Temperature"
+                ),
+                gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.95,
+                    step=0.05,
+                    label="Top-P"
+                ),
+                gr.Slider(
+                    minimum=-2.0,
+                    maximum=2.0,
+                    value=0.0,
+                    step=0.1,
+                    label="Frequency Penalty"
+                ),
+                gr.Slider(
+                    minimum=-1,
+                    maximum=65535,
+                    value=-1,
+                    step=1,
+                    label="Seed (-1 for random)"
+                ),
+                gr.Textbox(
+                    value="",
+                    label="Custom Model",
+                    info="(Optional) Provide a custom Hugging Face model path. This overrides the featured model if not empty."
+                ),
+            ],
+            fill_height=True,
+            chatbot=chatbot
+        )
+        # We'll add a new accordion for "Featured Models" within the Chat tab
+        with gr.Accordion("Featured Models", open=True):
+            gr.Markdown("Pick one of the placeholder featured models below, or search for more.")
+            featured_model_search = gr.Textbox(
+                label="Filter Models",
+                placeholder="Type to filter featured models..."
+            )
+            featured_model_radio = gr.Radio(
+                label="Select a featured model",
+                choices=all_featured_models,
+                value="meta-llama/Llama-3.3-70B-Instruct"
+            )
+            # Connect the search box to the filter function
+            featured_model_search.change(
+                filter_featured_models,
+                inputs=featured_model_search,
+                outputs=featured_model_radio
+            )
+            # We must connect the featured_model_radio to the chat interface
+            # We'll pass it as the last argument in the respond function.
+            chat_interface.add_variable(featured_model_radio, "featured_model")
+    # 3) Create the "Information" tab, containing:
+    #    - A "Featured Models" accordion with a table
+    #    - A "Parameters Overview" accordion with markdown
+    with gr.Tab("Information"):
+        gr.Markdown("## Additional Information and Help")
+        with gr.Accordion("Featured Models (Table)", open=False):
+            gr.Markdown("""
+            Here is a table of some placeholder featured models:
+            <table style="width:100%; text-align:center; margin:auto;">
+                <tr>
+                    <th>Model</th>
+                    <th>Description</th>
+                </tr>
+                <tr>
+                    <td>meta-llama/Llama-2-7B-Chat-hf</td>
+                    <td>A 7B parameter Llama 2 Chat model</td>
+                </tr>
+                <tr>
+                    <td>meta-llama/Llama-2-13B-Chat-hf</td>
+                    <td>A 13B parameter Llama 2 Chat model</td>
+                </tr>
+                <tr>
+                    <td>bigscience/bloom</td>
+                    <td>Large-scale multilingual model</td>
+                </tr>
+                <tr>
+                    <td>google/flan-t5-xxl</td>
+                    <td>A large instruction-tuned T5 model</td>
+                </tr>
+                <tr>
+                    <td>meta-llama/Llama-3.3-70B-Instruct</td>
+                    <td>70B parameter Llama 3.3 instruct model</td>
+                </tr>
+            </table>
+            """)
+        with gr.Accordion("Parameters Overview", open=False):
+            gr.Markdown("""
+            **Here’s a quick breakdown of the main parameters you’ll find in this interface:**
+            - **Max New Tokens**: This controls the maximum number of tokens (words or subwords) in the generated response.
+            - **Temperature**: Adjusts how 'creative' or random the model's output is. A low temperature keeps it more predictable; a high temperature makes it more varied or 'wacky.'
+            - **Top-P**: Also known as nucleus sampling. Controls how the model decides which words to include. Lower means more conservative, higher means more open.
+            - **Frequency Penalty**: A value to penalize repeated words or phrases. Higher penalty means the model will avoid repeating itself.
+            - **Seed**: Fix a random seed for reproducibility. If set to -1, a random seed is used each time.
+            - **Custom Model**: Provide the full Hugging Face model path (like `bigscience/bloom`) if you'd like to override the default or the featured model you selected above.
+            ### Usage Tips
+            1. If you’d like to use one of the featured models, simply select it from the list in the **Featured Models** accordion.
+            2. If you’d like to override the featured models, type your own custom path in **Custom Model**.
+            3. Adjust your parameters (temperature, top-p, etc.) if you want different styles of results.
+            4. You can provide a **System message** to guide the overall behavior or 'role' of the AI. For example, you can say "You are a helpful coding assistant" or something else to set the context.
+            Feel free to play around with these settings, and if you have any questions, check out the Hugging Face docs or ask in the community spaces!
+            """)
 print("Gradio interface initialized.")
 if __name__ == "__main__":