Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on 9 days ago

Commit

52ad57a

verified ·

1 Parent(s): c20c4dd

OKAY LETS SIMPLIFY THS LOL

Browse files

Files changed (1) hide show

app.py +78 -82

app.py CHANGED Viewed

@@ -21,21 +21,29 @@ def respond(
     temperature,
     top_p,
     frequency_penalty,
-    seed,
-    model,
-    custom_model
 ):
     """
-    Handles the chatbot response with given parameters.
     """
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"System message: {system_message}")
-    print(f"Model: {model}, Custom Model: {custom_model}")
-    # Use custom model if provided, else use selected model
-    selected_model = custom_model.strip() if custom_model.strip() else model
-    print(f"Selected model: {selected_model}")
     # Construct the messages array required by the API
     messages = [{"role": "system", "content": system_message}]
@@ -58,107 +66,95 @@ def respond(
     response = ""
     print("Sending request to OpenAI API.")
-    # Make the streaming request to the HF Inference API via OpenAI-like client
     for message_chunk in client.chat.completions.create(
-        model=selected_model,
         max_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
         top_p=top_p,
-        frequency_penalty=frequency_penalty,
-        seed=seed if seed != -1 else None,
         messages=messages,
     ):
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
         yield response
     print("Completed response generation.")
-# Create a Chatbot component
 chatbot = gr.Chatbot(height=600)
 print("Chatbot interface created.")
-# Define the featured models for the dropdown
-models_list = [
-    "meta-llama/Llama-3.3-70B-Instruct",
-    "bigscience/bloom-176b",
-    "gpt-j-6b",
-    "opt-30b",
-    "flan-t5-xxl",
 ]
-# Function to filter models based on user input
 def filter_models(search_term):
-    return [m for m in models_list if search_term.lower() in m.lower()]
-# Gradio interface
 with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
-    with gr.Row():
-        chatbot = gr.Chatbot(height=600)
-    with gr.Tab("Chat Interface"):
-        with gr.Row():
-            user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
-        with gr.Row():
-            system_message = gr.Textbox(value="", label="System Message")
-        with gr.Row():
-            max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max Tokens")
-            temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
-        with gr.Row():
-            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-P")
-            frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
-            seed = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
-        with gr.Row():
-            model = gr.Dropdown(label="Select a Model", choices=models_list, value="meta-llama/Llama-3.3-70B-Instruct")
-            custom_model = gr.Textbox(label="Custom Model", placeholder="Enter custom model path")
-        with gr.Row():
-            run_button = gr.Button("Generate Response")
-    with gr.Tab("Information"):
-        with gr.Accordion("Featured Models", open=False):
-            gr.HTML(
-                """
-                <table>
-                    <tr><th>Model Name</th><th>Description</th></tr>
-                    <tr><td>meta-llama/Llama-3.3-70B-Instruct</td><td>Instruction-tuned LLaMA model</td></tr>
-                    <tr><td>bigscience/bloom-176b</td><td>Multilingual large language model</td></tr>
-                    <tr><td>gpt-j-6b</td><td>Open-source GPT model</td></tr>
-                    <tr><td>opt-30b</td><td>Meta's OPT model</td></tr>
-                    <tr><td>flan-t5-xxl</td><td>Google's Flan-tuned T5 XXL</td></tr>
-                </table>
-                """
-            )
-        with gr.Accordion("Parameters Overview", open=False):
-            gr.Markdown(
-                """
-                ### Parameters Overview
-                - **Max Tokens**: Maximum number of tokens in the response.
-                - **Temperature**: Controls the randomness of responses. Lower values make the output more deterministic.
-                - **Top-P**: Controls the diversity of responses by limiting the token selection to a probability mass.
-                - **Frequency Penalty**: Penalizes repeated tokens in the output.
-                - **Seed**: Fixes randomness for reproducibility. Use -1 for a random seed.
-                """
-            )
-    run_button.click(
-        respond,
-        inputs=[
-            user_input,
-            chatbot.state,
             system_message,
             max_tokens,
             temperature,
             top_p,
             frequency_penalty,
-            seed,
-            model,
-            custom_model
         ],
-        outputs=chatbot
     )
-print("Launching the demo application.")
-demo.launch()

     temperature,
     top_p,
     frequency_penalty,
+    seed
 ):
     """
+    This function handles the chatbot response. It takes in:
+    - message: the user's new message
+    - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
+    - system_message: the system prompt
+    - max_tokens: the maximum number of tokens to generate in the response
+    - temperature: sampling temperature
+    - top_p: top-p (nucleus) sampling
+    - frequency_penalty: penalize repeated tokens in the output
+    - seed: a fixed seed for reproducibility; -1 will mean 'random'
     """
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"System message: {system_message}")
+    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
+    print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
+    # Convert seed to None if -1 (meaning random)
+    if seed == -1:
+        seed = None
     # Construct the messages array required by the API
     messages = [{"role": "system", "content": system_message}]
     response = ""
     print("Sending request to OpenAI API.")
+    # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
+        model="meta-llama/Llama-3.3-70B-Instruct",   # You can update this to your specific model
         max_tokens=max_tokens,
+        stream=True,  # Stream the response
         temperature=temperature,
         top_p=top_p,
+        frequency_penalty=frequency_penalty,  # <-- NEW
+        seed=seed,                             # <-- NEW
         messages=messages,
     ):
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
+        # As streaming progresses, yield partial output
         yield response
     print("Completed response generation.")
+# Create a Chatbot component with a specified height
 chatbot = gr.Chatbot(height=600)
 print("Chatbot interface created.")
+MODELS_LIST = [
+    "meta-llama/Llama-3.1-8B-Instruct",
+    "microsoft/Phi-3.5-mini-instruct",
 ]
 def filter_models(search_term):
+    """
+    Simple function to filter the placeholder model list based on the user's input
+    """
+    filtered_models = [m for m in MODELS_LIST if search_term.lower() in m.lower()]
+    return gr.update(choices=filtered_models)
+# --------------------------------------
+# REBUILD THE INTERFACE USING BLOCKS
+# --------------------------------------
+print("Building Gradio interface with Blocks...")
 with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
+    # Title
+    gr.Markdown("# Serverless-TextGen-Hub")
+    # Accordion: Parameters (sliders, etc.)
+    with gr.Accordion("Parameters", open=True):
+        system_message = gr.Textbox(value="", label="System message")
+        max_tokens = gr.Slider(minimum=1,   maximum=4096, value=512,   step=1,   label="Max new tokens")
+        temperature = gr.Slider(minimum=0.1, maximum=4.0,  value=0.7,  step=0.1, label="Temperature")
+        top_p = gr.Slider(minimum=0.1, maximum=1.0,  value=0.95, step=0.05, label="Top-P")
+        frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
+        seed = gr.Slider(minimum=-1,  maximum=65535, value=-1,  step=1,    label="Seed (-1 for random)")
+    # Accordion: Featured Models (Below the parameters)
+    with gr.Accordion("Featured Models", open=False):
+        model_search = gr.Textbox(
+            label="Filter Models",
+            placeholder="Search for a featured model...",
+            lines=1
+        )
+        model_radio = gr.Radio(
+            label="Select a model below",
+            value=MODELS_LIST[0],  # default
+            choices=MODELS_LIST,
+            interactive=True
+        )
+        model_search.change(filter_models, inputs=model_search, outputs=model_radio)
+    # The main ChatInterface
+    chat_interface = gr.ChatInterface(
+        fn=respond,
+        additional_inputs=[
             system_message,
             max_tokens,
             temperature,
             top_p,
             frequency_penalty,
+            seed
         ],
+        fill_height=True,
+        chatbot=chatbot,
+        theme="Nymbo/Nymbo_Theme",
+        title="Serverless-TextGen-Hub",
+        description="A comprehensive UI for text generation using the HF Inference API."
     )
+print("Gradio interface initialized.")
+if __name__ == "__main__":
+    print("Launching the demo application.")
+    demo.launch()