Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on 22 days ago

Commit

62429d1

verified ·

1 Parent(s): 204f2fd

adding info tab with featured models table and parameters overview

Browse files

Files changed (1) hide show

app.py +252 -154

app.py CHANGED Viewed

@@ -2,11 +2,26 @@ import gradio as gr
 from openai import OpenAI
 import os
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
-# Initialize the OpenAI client with the Hugging Face Inference API endpoint
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1/",
     api_key=ACCESS_TOKEN,
@@ -48,19 +63,19 @@ def respond(
     if seed == -1:
         seed = None
-    # Construct the messages array required by the API
     messages = [{"role": "system", "content": system_message}]
     print("Initial messages array constructed.")
     # Add conversation history to the context
     for val in history:
         user_part = val[0]  # Extract user message from the tuple
-        assistant_part = val[1]  # Extract assistant message from the tuple
         if user_part:
-            messages.append({"role": "user", "content": user_part})  # Append user message
             print(f"Added user message to context: {user_part}")
         if assistant_part:
-            messages.append({"role": "assistant", "content": assistant_part})  # Append assistant message
             print(f"Added assistant message to context: {assistant_part}")
     # Append the latest user message
@@ -71,179 +86,262 @@ def respond(
     model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
-    # Start with an empty string to build the response as tokens stream in
-    response = ""
-    print("Sending request to OpenAI API.")
-    # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
-        model=model_to_use,              # Use either the user-provided or default model
-        max_tokens=max_tokens,           # Maximum tokens for the response
-        stream=True,                     # Enable streaming responses
-        temperature=temperature,         # Adjust randomness in response
-        top_p=top_p,                     # Control diversity in response generation
-        frequency_penalty=frequency_penalty,  # Penalize repeated phrases
-        seed=seed,                       # Set random seed for reproducibility
-        messages=messages,               # Contextual conversation messages
     ):
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
-        response += token_text
         # Yield the partial response to Gradio so it can display in real-time
-        yield response
     print("Completed response generation.")
-# -------------------------
-# GRADIO UI CONFIGURATION
-# -------------------------
-# Create a Chatbot component with a specified height
-chatbot = gr.Chatbot(height=600)  # Define the height of the chatbot interface
-print("Chatbot interface created.")
-# Create textboxes and sliders for system prompt, tokens, and other parameters
-system_message_box = gr.Textbox(value="", label="System message")  # Input box for system message
-max_tokens_slider = gr.Slider(
-    minimum=1,   # Minimum allowable tokens
-    maximum=4096,  # Maximum allowable tokens
-    value=512,   # Default value
-    step=1,      # Increment step size
-    label="Max new tokens"  # Slider label
-)
-temperature_slider = gr.Slider(
-    minimum=0.1,  # Minimum temperature
-    maximum=4.0,  # Maximum temperature
-    value=0.7,    # Default value
-    step=0.1,     # Increment step size
-    label="Temperature"  # Slider label
-)
-top_p_slider = gr.Slider(
-    minimum=0.1,  # Minimum top-p value
-    maximum=1.0,  # Maximum top-p value
-    value=0.95,   # Default value
-    step=0.05,    # Increment step size
-    label="Top-P"  # Slider label
-)
-frequency_penalty_slider = gr.Slider(
-    minimum=-2.0,  # Minimum penalty
-    maximum=2.0,   # Maximum penalty
-    value=0.0,     # Default value
-    step=0.1,      # Increment step size
-    label="Frequency Penalty"  # Slider label
-)
-seed_slider = gr.Slider(
-    minimum=-1,    # -1 for random seed
-    maximum=65535, # Maximum seed value
-    value=-1,      # Default value
-    step=1,        # Increment step size
-    label="Seed (-1 for random)"  # Slider label
-)
-# The custom_model_box is what the respond function sees as "custom_model"
-custom_model_box = gr.Textbox(
-    value="",  # Default value
-    label="Custom Model",  # Label for the textbox
-    info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model."  # Additional info
-)
-# Define a function that updates the custom model box when a featured model is selected
-def set_custom_model_from_radio(selected):
-    """
-    This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
-    We will update the Custom Model text box with that selection automatically.
-    """
-    print(f"Featured model selected: {selected}")  # Log selected model
-    return selected
-# Create the main ChatInterface object
-demo = gr.ChatInterface(
-    fn=respond,  # The function to handle responses
-    additional_inputs=[
-        system_message_box,          # System message input
-        max_tokens_slider,           # Max tokens slider
-        temperature_slider,          # Temperature slider
-        top_p_slider,                # Top-P slider
-        frequency_penalty_slider,    # Frequency penalty slider
-        seed_slider,                 # Seed slider
-        custom_model_box             # Custom model input
-    ],
-    fill_height=True,  # Allow the chatbot to fill the container height
-    chatbot=chatbot,   # Chatbot UI component
-    theme="Nymbo/Nymbo_Theme",  # Theme for the interface
-)
-print("ChatInterface object created.")
-# -----------
-# ADDING THE "FEATURED MODELS" ACCORDION
-# -----------
-with demo:
-    with gr.Accordion("Featured Models", open=False):  # Collapsible section for featured models
-        model_search_box = gr.Textbox(
-            label="Filter Models",  # Label for the search box
-            placeholder="Search for a featured model...",  # Placeholder text
-            lines=1  # Single-line input
         )
-        print("Model search box created.")
-        # Sample list of popular text models
-        models_list = [
-            "meta-llama/Llama-3.3-70B-Instruct",
-            "meta-llama/Llama-3.2-3B-Instruct",
-            "meta-llama/Llama-3.2-1B-Instruct",
-            "meta-llama/Llama-3.1-8B-Instruct",
-            "NousResearch/Hermes-3-Llama-3.1-8B",
-            "google/gemma-2-27b-it",
-            "google/gemma-2-9b-it",
-            "google/gemma-2-2b-it",
-            "mistralai/Mistral-Nemo-Instruct-2407",
-            "mistralai/Mixtral-8x7B-Instruct-v0.1",
-            "mistralai/Mistral-7B-Instruct-v0.3",
-            "Qwen/Qwen2.5-72B-Instruct",
-            "Qwen/QwQ-32B-Preview",
-            "PowerInfer/SmallThinker-3B-Preview",
-            "HuggingFaceTB/SmolLM2-1.7B-Instruct",
-            "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-            "microsoft/Phi-3.5-mini-instruct",
-        ]
-        print("Models list initialized.")
-        featured_model_radio = gr.Radio(
-            label="Select a model below",  # Label for the radio buttons
-            choices=models_list,  # List of available models
-            value="meta-llama/Llama-3.3-70B-Instruct",  # Default selection
-            interactive=True  # Allow user interaction
         )
-        print("Featured models radio button created.")
-        # Filter function for the radio button list
-        def filter_models(search_term):
-            print(f"Filtering models with search term: {search_term}")  # Log the search term
-            filtered = [m for m in models_list if search_term.lower() in m.lower()]  # Filter models by search term
-            print(f"Filtered models: {filtered}")  # Log filtered models
-            return gr.update(choices=filtered)
-        # Update the radio list when the search box value changes
-        model_search_box.change(
-            fn=filter_models,  # Function to filter models
-            inputs=model_search_box,  # Input: search box value
-            outputs=featured_model_radio  # Output: update radio button list
         )
-        print("Model search box change event linked.")
-        # Update the custom model textbox when a featured model is selected
-        featured_model_radio.change(
-            fn=set_custom_model_from_radio,  # Function to set custom model
-            inputs=featured_model_radio,  # Input: selected model
-            outputs=custom_model_box  # Output: update custom model textbox
         )
-        print("Featured model radio button change event linked.")
 print("Gradio interface initialized.")
 if __name__ == "__main__":
     print("Launching the demo application.")
     demo.launch()

 from openai import OpenAI
 import os
+# -------------------
+# SERVERLESS-TEXTGEN-HUB
+# -------------------
+#
+# This version has been updated to include an "Information" tab above the Chat tab.
+# The Information tab has two accordions:
+#   - "Featured Models" which displays a simple table
+#   - "Parameters Overview" which contains markdown describing the settings
+#
+# The Chat tab contains the existing chatbot UI.
+# -------------------
+# SETUP AND CONFIG
+# -------------------
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
+# Initialize the OpenAI-like client (Hugging Face Inference API) with your token
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1/",
     api_key=ACCESS_TOKEN,
     if seed == -1:
         seed = None
+    # Construct the messages array required by the HF Inference API
     messages = [{"role": "system", "content": system_message}]
     print("Initial messages array constructed.")
     # Add conversation history to the context
     for val in history:
         user_part = val[0]  # Extract user message from the tuple
+        assistant_part = val[1]  # Extract assistant message
         if user_part:
+            messages.append({"role": "user", "content": user_part})
             print(f"Added user message to context: {user_part}")
         if assistant_part:
+            messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
     # Append the latest user message
     model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
+    # Start with an empty string to build the streamed response
+    response_text = ""
+    print("Sending request to Hugging Face Inference API via OpenAI-like client...")
+    # Make the streaming request to the HF Inference API
     for message_chunk in client.chat.completions.create(
+        model=model_to_use,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+        frequency_penalty=frequency_penalty,
+        seed=seed,
+        messages=messages,
     ):
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
+        response_text += token_text
         # Yield the partial response to Gradio so it can display in real-time
+        yield response_text
     print("Completed response generation.")
+# ----------------------
+# BUILDING THE INTERFACE
+# ----------------------
+# We will use a "Blocks" layout with two tabs:
+#   1) "Information" tab, which shows helpful info and a table of "Featured Models"
+#   2) "Chat" tab, which holds our ChatInterface and associated controls
+with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
+    # -----------------
+    # TAB: INFORMATION
+    # -----------------
+    with gr.Tab("Information"):
+        # You can add instructions, disclaimers, or helpful text here
+        gr.Markdown("## Welcome to Serverless-TextGen-Hub - Information")
+        # Accordion for Featured Models (table)
+        with gr.Accordion("Featured Models (WiP)", open=False):
+            gr.HTML(
+                """
+                <p><a href="https://huggingface.co/models?inference=warm&pipeline_tag=chat&sort=trending" target="_blank">See all available text models on Hugging Face</a></p>
+                <table style="width:100%; text-align:center; margin:auto;">
+                    <tr>
+                        <th>Model Name</th>
+                        <th>Supported</th>
+                        <th>Notes</th>
+                    </tr>
+                    <tr>
+                        <td>meta-llama/Llama-3.3-70B-Instruct</td>
+                        <td>✅</td>
+                        <td>Default model, if none is provided in the 'Custom Model' box.</td>
+                    </tr>
+                    <tr>
+                        <td>meta-llama/Llama-3.2-3B-Instruct</td>
+                        <td>✅</td>
+                        <td>Smaller Llama-based instruct model for faster responses.</td>
+                    </tr>
+                    <tr>
+                        <td>microsoft/Phi-3.5-mini-instruct</td>
+                        <td>✅</td>
+                        <td>A smaller instruct model from Microsoft.</td>
+                    </tr>
+                    <tr>
+                        <td>Qwen/Qwen2.5-72B-Instruct</td>
+                        <td>✅</td>
+                        <td>Large-scale Qwen-based model.</td>
+                    </tr>
+                </table>
+                """
+            )
+        # Accordion for Parameters Overview
+        with gr.Accordion("Parameters Overview", open=False):
+            gr.Markdown(
+                """
+                **Here is a brief overview of the main parameters for text generation:**
+                - **Max Tokens**: The maximum number of tokens (think of these as word-pieces) the model will generate in its response.
+                - **Temperature**: Controls how "creative" or random the output is. Lower values = more deterministic, higher values = more varied.
+                - **Top-P**: Similar to temperature, but uses nucleus sampling. Top-P defines the probability mass of the tokens to sample from. For example, `top_p=0.9` means "use the top 90% probable tokens."
+                - **Frequency Penalty**: A higher penalty discourages repeated tokens, helping reduce repetitive answers.
+                - **Seed**: You can set a seed for deterministic results. `-1` means random each time.
+                **Featured Models** can also be selected. If you want to override the model, you may specify a custom Hugging Face model path in the "Custom Model" text box.
+                ---
+                If you are new to text-generation parameters, the defaults are a great place to start!
+                """
+            )
+    # -----------
+    # TAB: CHAT
+    # -----------
+    with gr.Tab("Chat"):
+        gr.Markdown("## Chat with the TextGen Model")
+        # Create a Chatbot component with a specified height
+        chatbot = gr.Chatbot(height=600)
+        print("Chatbot interface created.")
+        # Create textboxes and sliders for system prompt, tokens, and other parameters
+        system_message_box = gr.Textbox(
+            value="",
+            label="System message",
+            info="You can use this to provide instructions or context to the assistant. Leave empty if not needed."
         )
+        max_tokens_slider = gr.Slider(
+            minimum=1,
+            maximum=4096,
+            value=512,
+            step=1,
+            label="Max new tokens",
+            info="Controls the maximum length of the output. Keep an eye on your usage!"
         )
+        temperature_slider = gr.Slider(
+            minimum=0.1,
+            maximum=4.0,
+            value=0.7,
+            step=0.1,
+            label="Temperature",
+            info="Controls creativity. Higher values = more random replies, lower = more deterministic."
         )
+        top_p_slider = gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-P",
+            info="Use nucleus sampling with probability mass cutoff. 1.0 includes all tokens."
         )
+        frequency_penalty_slider = gr.Slider(
+            minimum=-2.0,
+            maximum=2.0,
+            value=0.0,
+            step=0.1,
+            label="Frequency Penalty",
+            info="Penalize repeated tokens to avoid repetition in output."
+        )
+        seed_slider = gr.Slider(
+            minimum=-1,
+            maximum=65535,
+            value=-1,
+            step=1,
+            label="Seed (-1 for random)",
+            info="Fixing a seed (0 to 65535) can make results reproducible. -1 picks a random seed each time."
+        )
+        # The custom_model_box is what the respond function sees as "custom_model"
+        custom_model_box = gr.Textbox(
+            value="",
+            label="Custom Model",
+            info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model."
+        )
+        # Function to update the custom model box when a featured model is selected
+        def set_custom_model_from_radio(selected):
+            print(f"Featured model selected: {selected}")
+            return selected
+        print("ChatInterface object created.")
+        # The main ChatInterface call
+        chat_interface = gr.ChatInterface(
+            fn=respond,  # The function to handle responses
+            additional_inputs=[
+                system_message_box,
+                max_tokens_slider,
+                temperature_slider,
+                top_p_slider,
+                frequency_penalty_slider,
+                seed_slider,
+                custom_model_box
+            ],
+            fill_height=True,  # Let the chatbot fill the container height
+            chatbot=chatbot,   # The Chatbot UI component
+            theme="Nymbo/Nymbo_Theme",
+        )
+        print("Gradio interface for Chat created.")
+        # -----------
+        # ADDING THE "FEATURED MODELS" ACCORDION (Same logic as before)
+        # -----------
+        with gr.Accordion("Featured Models", open=False):
+            model_search_box = gr.Textbox(
+                label="Filter Models",
+                placeholder="Search for a featured model...",
+                lines=1
+            )
+            print("Model search box created.")
+            # Sample list of popular text models
+            models_list = [
+                "meta-llama/Llama-3.3-70B-Instruct",
+                "meta-llama/Llama-3.2-3B-Instruct",
+                "meta-llama/Llama-3.2-1B-Instruct",
+                "meta-llama/Llama-3.1-8B-Instruct",
+                "NousResearch/Hermes-3-Llama-3.1-8B",
+                "google/gemma-2-27b-it",
+                "google/gemma-2-9b-it",
+                "google/gemma-2-2b-it",
+                "mistralai/Mistral-Nemo-Instruct-2407",
+                "mistralai/Mixtral-8x7B-Instruct-v0.1",
+                "mistralai/Mistral-7B-Instruct-v0.3",
+                "Qwen/Qwen2.5-72B-Instruct",
+                "Qwen/QwQ-32B-Preview",
+                "PowerInfer/SmallThinker-3B-Preview",
+                "HuggingFaceTB/SmolLM2-1.7B-Instruct",
+                "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+                "microsoft/Phi-3.5-mini-instruct",
+            ]
+            print("Models list initialized.")
+            featured_model_radio = gr.Radio(
+                label="Select a model below",
+                choices=models_list,
+                value="meta-llama/Llama-3.3-70B-Instruct",
+                interactive=True
+            )
+            print("Featured models radio button created.")
+            def filter_models(search_term):
+                print(f"Filtering models with search term: {search_term}")
+                filtered = [m for m in models_list if search_term.lower() in m.lower()]
+                print(f"Filtered models: {filtered}")
+                return gr.update(choices=filtered)
+            model_search_box.change(
+                fn=filter_models,
+                inputs=model_search_box,
+                outputs=featured_model_radio
+            )
+            print("Model search box change event linked.")
+            featured_model_radio.change(
+                fn=set_custom_model_from_radio,
+                inputs=featured_model_radio,
+                outputs=custom_model_box
+            )
+            print("Featured model radio button change event linked.")
 print("Gradio interface initialized.")
+# ------------------------
+# MAIN ENTRY POINT
+# ------------------------
 if __name__ == "__main__":
     print("Launching the demo application.")
     demo.launch()