Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on Jan 4

Commit

10ffb1d

verified ·

1 Parent(s): 8696822

Update app.py

Browse files

Files changed (1) hide show

app.py +164 -191

app.py CHANGED Viewed

@@ -2,10 +2,6 @@ import gradio as gr
 from openai import OpenAI
 import os
-# =============================
-#     GLOBAL SETUP / CLIENT
-# =============================
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
@@ -17,25 +13,23 @@ client = OpenAI(
 )
 print("OpenAI client initialized.")
-# =============================
-#     MODEL CONFIG / LOGIC
-# =============================
-# Sample placeholder list of "featured" models for demonstration
-featured_models_list = [
-    "meta-llama/Llama-2-13B-chat-hf",
-    "bigscience/bloom",
-    "microsoft/DialoGPT-large",
-    "OpenAssistant/oasst-sft-1-pythia-12b",
-    "tiiuae/falcon-7b-instruct",
-    "meta-llama/Llama-3.3-70B-Instruct"
 ]
-def filter_featured_models(search_term: str):
     """
-    Returns a list of models that contain the search term (case-insensitive).
     """
-    filtered = [m for m in featured_models_list if search_term.lower() in m.lower()]
     return gr.update(choices=filtered)
@@ -49,31 +43,36 @@ def respond(
     frequency_penalty,
     seed,
     custom_model,
-    selected_featured_model
 ):
     """
     This function handles the chatbot response. It takes in:
     - message: the user's new message
     - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
     - system_message: the system prompt
-    - max_tokens, temperature, top_p, frequency_penalty, seed: generation params
-    - custom_model: user-provided custom model path/name
-    - selected_featured_model: model chosen from the featured radio list
     """
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
-    print(f"Selected featured model: {selected_featured_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
     # Construct the messages array required by the API
-    messages = [{"role": "system", "content": system_message}] if system_message.strip() else []
     # Add conversation history to the context
     for val in history:
@@ -89,19 +88,20 @@ def respond(
     # Append the latest user message
     messages.append({"role": "user", "content": message})
-    # Determine which model to use:
-    # 1) If custom_model is non-empty, it overrides everything.
-    # 2) Otherwise, use the selected featured model from the radio button if available.
-    # 3) If both are empty, fall back to the default.
-    model_to_use = "meta-llama/Llama-3.3-70B-Instruct"  # Default
     if custom_model.strip() != "":
         model_to_use = custom_model.strip()
-    elif selected_featured_model.strip() != "":
-        model_to_use = selected_featured_model.strip()
     print(f"Model selected for inference: {model_to_use}")
-    # Start building the streaming response
     response = ""
     print("Sending request to OpenAI API.")
@@ -109,7 +109,7 @@ def respond(
     for message_chunk in client.chat.completions.create(
         model=model_to_use,
         max_tokens=max_tokens,
-        stream=True,  # Stream the response
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
@@ -118,168 +118,141 @@ def respond(
     ):
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
-        print(f"Received token: {token_text}", flush=True)
         response += token_text
         # Yield the partial response to Gradio so it can display in real-time
         yield response
     print("Completed response generation.")
-# =============================
-#         MAIN UI
-# =============================
-def build_app():
-    """
-    Build the Gradio Blocks interface containing:
-      - A Chat tab (ChatInterface)
-      - A Featured Models tab
-      - An Information tab
-    """
-    with gr.Blocks(theme="Nymbo/Nymbo_Theme") as main_interface:
-        # We define a Gr.State to hold the user's chosen featured model
-        selected_featured_model_state = gr.State("")
-        with gr.Tab("Chat Interface"):
-            gr.Markdown("## Serverless-TextGen-Hub")
-            # Here we embed the ChatInterface for streaming conversation
-            # We add extra inputs for "Selected Featured Model" as hidden,
-            # so the user can't directly edit but it flows into respond().
-            demo = gr.ChatInterface(
-                fn=respond,
-                additional_inputs=[
-                    gr.Textbox(value="", label="System message", lines=2),
-                    gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
-                    gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-                    gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
-                    gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty"),
-                    gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)"),
-                    gr.Textbox(value="", label="Custom Model", info="(Optional) Provide a custom HF model path"),
-                    gr.Textbox(value="", label="Selected Featured Model (from tab)", visible=False),
-                ],
-                fill_height=True,
-                chatbot=gr.Chatbot(height=600),
-                theme="Nymbo/Nymbo_Theme",
-            )
-            # We want to connect the selected_featured_model_state to that hidden text box
-            def set_featured_model_in_chatbox(val):
-                return val
-            # Whenever the selected_featured_model_state changes, update the hidden field in the ChatInterface
-            selected_featured_model_state.change(
-                fn=set_featured_model_in_chatbox,
-                inputs=selected_featured_model_state,
-                outputs=demo.additional_inputs[-1],  # The last additional input is the "Selected Featured Model"
-            )
-        # ==========================
-        #   Featured Models Tab
-        # ==========================
-        with gr.Tab("Featured Models"):
-            gr.Markdown("### Choose from our Featured Models")
-            # A text box for searching/filtering
-            model_search = gr.Textbox(
-                label="Filter Models",
-                placeholder="Search for a featured model..."
-            )
-            # A radio component listing the featured models (default to first)
-            model_radio = gr.Radio(
-                choices=featured_models_list,
-                label="Select a model below",
-                value=featured_models_list[0],
-                interactive=True
-            )
-            # Define how to update the radio choices when the search box changes
-            model_search.change(
-                fn=filter_featured_models,
-                inputs=model_search,
-                outputs=model_radio
-            )
-            # Button to confirm the selection
-            def select_featured_model(radio_val):
-                """
-                Updates the hidden state with the user-chosen featured model.
-                """
-                return radio_val
-            choose_btn = gr.Button("Use this Featured Model", variant="primary")
-            choose_btn.click(
-                fn=select_featured_model,
-                inputs=model_radio,
-                outputs=selected_featured_model_state
-            )
-            gr.Markdown(
-                """
-                **Tip**: If you type a Custom Model in the "Chat Interface" tab, it overrides the
-                featured model you selected here.
-                """
-            )
-        # ==========================
-        #   Information Tab
-        # ==========================
-        with gr.Tab("Information"):
-            gr.Markdown("## Learn More About These Models and Parameters")
-            with gr.Accordion("Featured Models (Table)", open=False):
-                gr.HTML(
-                    """
-                    <p>Below is a small sample table showing some featured models.</p>
-                    <table style="width:100%; text-align:center; margin:auto;">
-                        <tr>
-                            <th>Model Name</th>
-                            <th>Type</th>
-                            <th>Notes</th>
-                        </tr>
-                        <tr>
-                            <td>meta-llama/Llama-2-13B-chat-hf</td>
-                            <td>Chat</td>
-                            <td>Good for multi-turn dialogue.</td>
-                        </tr>
-                        <tr>
-                            <td>bigscience/bloom</td>
-                            <td>Language Model</td>
-                            <td>Large multilingual model.</td>
-                        </tr>
-                        <tr>
-                            <td>microsoft/DialoGPT-large</td>
-                            <td>Chat</td>
-                            <td>Well-known smaller chat model.</td>
-                        </tr>
-                    </table>
-                    """
-                )
-            with gr.Accordion("Parameters Overview", open=False):
-                gr.Markdown(
-                    """
-                    ### Explanation of Key Parameters
-                    - **System Message**: Provides context or initial instructions to the model.
-                    - **Max Tokens**: The maximum number of tokens (roughly pieces of words) in the generated response.
-                    - **Temperature**: Higher values produce more random/creative outputs, while lower values make the output more focused and deterministic.
-                    - **Top-P**: Controls nucleus sampling. The model considers only the tokens whose probability mass exceeds this value.
-                    - **Frequency Penalty**: Penalizes repeated tokens. Positive values (like 1.0) reduce repetition in the output. Negative values can increase repetition.
-                    - **Seed**: Determines reproducibility. Set it to a fixed integer for consistent results; `-1` is random each time.
-                    - **Custom Model**: Overwrites the featured model. Provide the Hugging Face path (e.g., `openai/whisper-base`) for your own usage.
-                    Use these settings to guide how the model generates text. If in doubt, stick to defaults and experiment in small increments.
-                    """
-                )
-    return main_interface
-# If run as a standalone script, just launch.
 if __name__ == "__main__":
-    print("Building and launching the Serverless-TextGen-Hub interface...")
-    ui = build_app()
-    ui.launch()

 from openai import OpenAI
 import os
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
 )
 print("OpenAI client initialized.")
+# We'll define a list of placeholder featured models for demonstration.
+# In real usage, replace them with actual model names available on Hugging Face.
+models_list = [
+    "PlaceholderModel1",
+    "PlaceholderModel2",
+    "PlaceholderModel3",
+    "PlaceholderModel4",
+    "PlaceholderModel5"
 ]
+def filter_featured_models(search_term):
     """
+    Filters the 'models_list' based on text entered in the search box.
+    Returns a gr.update object that changes the choices available
+    in the 'featured_models_radio'.
     """
+    filtered = [m for m in models_list if search_term.lower() in m.lower()]
     return gr.update(choices=filtered)
     frequency_penalty,
     seed,
     custom_model,
+    selected_model
 ):
     """
     This function handles the chatbot response. It takes in:
     - message: the user's new message
     - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
     - system_message: the system prompt
+    - max_tokens: the maximum number of tokens to generate in the response
+    - temperature: sampling temperature
+    - top_p: top-p (nucleus) sampling
+    - frequency_penalty: penalize repeated tokens in the output
+    - seed: a fixed seed for reproducibility; -1 will mean 'random'
+    - custom_model: a custom Hugging Face model name (if any)
+    - selected_model: a model name chosen from the featured models radio button
     """
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
+    print(f"Selected featured model: {selected_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
     # Construct the messages array required by the API
+    messages = [{"role": "system", "content": system_message}]
     # Add conversation history to the context
     for val in history:
     # Append the latest user message
     messages.append({"role": "user", "content": message})
+    # Decide which model to use:
+    # 1) If the user provided a custom model, use it.
+    # 2) Else if they chose a featured model, use it.
+    # 3) Otherwise, fall back to a default model.
     if custom_model.strip() != "":
         model_to_use = custom_model.strip()
+    elif selected_model is not None and selected_model.strip() != "":
+        model_to_use = selected_model.strip()
+    else:
+        model_to_use = "meta-llama/Llama-3.3-70B-Instruct"  # Default fallback
     print(f"Model selected for inference: {model_to_use}")
+    # Start with an empty string to build the response as tokens stream in
     response = ""
     print("Sending request to OpenAI API.")
     for message_chunk in client.chat.completions.create(
         model=model_to_use,
         max_tokens=max_tokens,
+        stream=True,
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
     ):
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
+        print(f"Received token: {token_text}")
         response += token_text
         # Yield the partial response to Gradio so it can display in real-time
         yield response
     print("Completed response generation.")
+########################
+# GRADIO APP LAYOUT
+########################
+# We’ll build a custom Blocks layout so we can have:
+#  - A Featured Models accordion with a search box
+#  - Our ChatInterface to handle the conversation
+#  - Additional sliders and textboxes for settings (like the original code)
+########################
+with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
+    gr.Markdown("## Serverless Text Generation Hub")
+    gr.Markdown(
+        "An all-in-one UI for chatting with text-generation models on Hugging Face's Inference API."
+    )
+    # We keep a Chatbot component for the conversation display
+    chatbot = gr.Chatbot(height=600, label="Chat Preview")
+    # Textbox for system message
+    system_message_box = gr.Textbox(
+        value="",
+        label="System Message",
+        placeholder="Enter a system prompt if you want (optional).",
+    )
+    # Slider for max_tokens
+    max_tokens_slider = gr.Slider(
+        minimum=1,
+        maximum=4096,
+        value=512,
+        step=1,
+        label="Max new tokens",
+    )
+    # Slider for temperature
+    temperature_slider = gr.Slider(
+        minimum=0.1,
+        maximum=4.0,
+        value=0.7,
+        step=0.1,
+        label="Temperature",
+    )
+    # Slider for top_p
+    top_p_slider = gr.Slider(
+        minimum=0.1,
+        maximum=1.0,
+        value=0.95,
+        step=0.05,
+        label="Top-P",
+    )
+    # Slider for frequency penalty
+    freq_penalty_slider = gr.Slider(
+        minimum=-2.0,
+        maximum=2.0,
+        value=0.0,
+        step=0.1,
+        label="Frequency Penalty",
+    )
+    # Slider for seed
+    seed_slider = gr.Slider(
+        minimum=-1,
+        maximum=65535,  # Arbitrary upper limit for demonstration
+        value=-1,
+        step=1,
+        label="Seed (-1 for random)",
+    )
+    # Custom Model textbox
+    custom_model_box = gr.Textbox(
+        value="",
+        label="Custom Model",
+        info="(Optional) Provide a custom Hugging Face model path. This will override the selected Featured Model if not empty."
+    )
+    # Accordion for featured models
+    with gr.Accordion("Featured Models", open=False):
+        # Textbox for filtering the featured models
+        model_search_box = gr.Textbox(
+            label="Filter Models",
+            placeholder="Search for a featured model...",
+            lines=1,
+        )
+        # Radio for selecting the desired model
+        featured_models_radio = gr.Radio(
+            label="Select a featured model below",
+            choices=models_list,  # Start with the entire list
+            value=None,           # No default
+            interactive=True
+        )
+        # We connect the model_search_box to the filter function
+        model_search_box.change(
+            filter_featured_models,
+            inputs=model_search_box,
+            outputs=featured_models_radio
+        )
+    # Now we create our ChatInterface
+    # We pass all the extra components as additional_inputs
+    interface = gr.ChatInterface(
+        fn=respond,
+        chatbot=chatbot,
+        additional_inputs=[
+            system_message_box,
+            max_tokens_slider,
+            temperature_slider,
+            top_p_slider,
+            freq_penalty_slider,
+            seed_slider,
+            custom_model_box,
+            featured_models_radio
+        ],
+        theme="Nymbo/Nymbo_Theme",
+        title="Serverless TextGen Hub with Featured Models",
+        description=(
+            "Use the sliders and textboxes to control generation parameters. "
+            "Pick a model from 'Featured Models' or specify a custom model path."
+        ),
+        # Fill the screen height
+        fill_height=True
+    )
+# If you want the script to be directly executable, launch the demo here:
 if __name__ == "__main__":
+    print("Launching the demo application...")
+    demo.launch()