Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on 8 days ago

Commit

fab24df

verified ·

1 Parent(s): 7255410

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -70

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
-from openai import OpenAI
 import os
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
@@ -22,7 +22,8 @@ def respond(
     top_p,
     frequency_penalty,
     seed,
-    custom_model
 ):
     """
     This function handles the chatbot response. It takes in:
@@ -34,7 +35,8 @@ def respond(
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
-    - custom_model: the user-provided custom model name (if any)
     """
     print(f"Received message: {message}")
@@ -43,15 +45,16 @@ def respond(
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
-    # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
-    # Construct the messages array required by the API
-    messages = [{"role": "system", "content": system_message}]
-    # Add conversation history to the context
     for val in history:
         user_part = val[0]
         assistant_part = val[1]
@@ -62,22 +65,26 @@ def respond(
             messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
-    # Append the latest user message
     messages.append({"role": "user", "content": message})
-    # Determine which model to use: either custom_model or a default
-    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
-    # Start with an empty string to build the response as tokens stream in
     response = ""
     print("Sending request to OpenAI API.")
-    # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
-        model=model_to_use,              # Use either the user-provided custom model or default
         max_tokens=max_tokens,
-        stream=True,                     # Stream the response
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
@@ -88,68 +95,178 @@ def respond(
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
-        # Yield the partial response to Gradio so it can display in real-time
         yield response
     print("Completed response generation.")
-# Create a Chatbot component with a specified height
-chatbot = gr.Chatbot(height=600)
-print("Chatbot interface created.")
-# Create the Gradio ChatInterface
-# We add two new sliders for Frequency Penalty, Seed, and now a new "Custom Model" text box.
-demo = gr.ChatInterface(
-    fn=respond,
-    additional_inputs=[
-        gr.Textbox(value="", label="System message"),
-        gr.Slider(
-            minimum=1,
-            maximum=4096,
-            value=512,
-            step=1,
-            label="Max new tokens"
-        ),
-        gr.Slider(
-            minimum=0.1,
-            maximum=4.0,
-            value=0.7,
-            step=0.1,
-            label="Temperature"
-        ),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-P"
-        ),
-        gr.Slider(
-            minimum=-2.0,
-            maximum=2.0,
-            value=0.0,
-            step=0.1,
-            label="Frequency Penalty"
-        ),
-        gr.Slider(
-            minimum=-1,
-            maximum=65535,  # Arbitrary upper limit for demonstration
-            value=-1,
-            step=1,
-            label="Seed (-1 for random)"
-        ),
-        gr.Textbox(
-            value="",
-            label="Custom Model",
-            info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty."
-        ),
-    ],
-    fill_height=True,
-    chatbot=chatbot,
-    theme="Nymbo/Nymbo_Theme",
 )
-print("Gradio interface initialized.")
 if __name__ == "__main__":
     print("Launching the demo application.")
     demo.launch()

 import gradio as gr
 import os
+from openai import OpenAI
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
     top_p,
     frequency_penalty,
     seed,
+    custom_model,
+    featured_model
 ):
     """
     This function handles the chatbot response. It takes in:
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
+    - custom_model: a user-provided custom model name (if any)
+    - featured_model: the user-selected model from the radio
     """
     print(f"Received message: {message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
+    print(f"Featured model: {featured_model}")
+    # Convert seed to None if -1 (meaning "random")
     if seed == -1:
         seed = None
+    # Construct the conversation array required by the HF Inference API
+    messages = [{"role": "system", "content": system_message or ""}]
+    # Add conversation history
     for val in history:
         user_part = val[0]
         assistant_part = val[1]
             messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
+    # The latest user message
     messages.append({"role": "user", "content": message})
+    # If custom_model is not empty, it overrides the featured model
+    model_to_use = custom_model.strip() if custom_model.strip() != "" else featured_model.strip()
+    # If somehow both are empty, default to an example model
+    if model_to_use == "":
+        model_to_use = "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
+    # Build the response from the streaming tokens
     response = ""
     print("Sending request to OpenAI API.")
+    # Streaming request to the HF Inference API
     for message_chunk in client.chat.completions.create(
+        model=model_to_use,
         max_tokens=max_tokens,
+        stream=True,
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
+        # Yield partial response so Gradio can display in real-time
         yield response
     print("Completed response generation.")
+#
+# Building the Gradio interface below
+#
+print("Building the Gradio interface with advanced features...")
+# --- Create a list of 'Featured Models' for demonstration. You can customize as you wish. ---
+models_list = (
+    "meta-llama/Llama-3.3-70B-Instruct",
+    "BigScience/bloom",
+    "openai/gpt-4",
+    "google/flan-t5-xxl",
+    "EleutherAI/gpt-j-6B",
+    "YourSpecialModel/awesome-13B",
 )
+# This function filters the above models_list by a given search term:
+def filter_models(search_term):
+    filtered = [m for m in models_list if search_term.lower() in m.lower()]
+    return gr.update(choices=filtered)
+# We’ll create a Chatbot in a Blocks layout to incorporate an Accordion for "Featured Models"
+with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
+    gr.Markdown("## Serverless-TextGen-Hub\nA comprehensive UI for text generation, including featured models and custom model overrides.")
+    # The Chatbot itself
+    chatbot = gr.Chatbot(label="TextGen Chatbot", height=600)
+    with gr.Row():
+        with gr.Column(scale=1):
+            # We create interactive UI elements that will feed into the 'respond' function
+            # System message
+            system_message = gr.Textbox(label="System message", placeholder="Set the system role instructions here.")
+            # Accordion for selecting the model
+            with gr.Accordion("Featured Models", open=True):
+                model_search = gr.Textbox(
+                    label="Filter Models",
+                    placeholder="Search for a featured model...",
+                    lines=1
+                )
+                featured_model = gr.Radio(
+                    label="Select a Featured Model Below",
+                    choices=models_list,
+                    value="meta-llama/Llama-3.3-70B-Instruct",  # default
+                    interactive=True,
+                )
+                # Link the search box to filter the radio model choices
+                model_search.change(filter_models, inputs=model_search, outputs=featured_model)
+            # A text box to optionally override the featured model
+            custom_model = gr.Textbox(
+                label="Custom Model",
+                info="(Optional) Provide a custom HF model path. If non-empty, it overrides your featured model choice."
+            )
+            # Sliders
+            max_tokens = gr.Slider(
+                minimum=1,
+                maximum=4096,
+                value=512,
+                step=1,
+                label="Max new tokens"
+            )
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=4.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature"
+            )
+            top_p = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.95,
+                step=0.05,
+                label="Top-P"
+            )
+            frequency_penalty = gr.Slider(
+                minimum=-2.0,
+                maximum=2.0,
+                value=0.0,
+                step=0.1,
+                label="Frequency Penalty"
+            )
+            seed = gr.Slider(
+                minimum=-1,
+                maximum=65535,
+                value=-1,
+                step=1,
+                label="Seed (-1 for random)"
+            )
+        # The "chat" Column
+        with gr.Column(scale=2):
+            # We store the conversation history in a state variable
+            state = gr.State([])  # Each element in state is (user_message, assistant_message)
+            # Chat input box for the user
+            with gr.Row():
+                txt = gr.Textbox(
+                    label="Enter your message",
+                    placeholder="Type your request here, then press 'Submit'",
+                    lines=3
+                )
+            # Button to submit the message
+            submit_btn = gr.Button("Submit", variant="primary")
+    #
+    # The 'respond' function is tied to the chatbot display.
+    # We'll define a small wrapper that updates the 'history' (state) each time.
+    #
+    def user_submit(user_message, chat_history):
+        """
+        This function just adds the user message to the history and returns it.
+        The actual text generation will come from 'bot_respond' next.
+        """
+        # Append new user message to the existing conversation
+        chat_history = chat_history + [(user_message, None)]
+        return "", chat_history
+    def bot_respond(chat_history, sys_msg, max_t, temp, top, freq_pen, s, custom_mod, feat_model):
+        """
+        This function calls our 'respond' generator to get the text.
+        It updates the last message in chat_history with the bot's response as it streams.
+        """
+        user_message = chat_history[-1][0] if len(chat_history) > 0 else ""
+        # We call the generator
+        bot_messages = respond(
+            user_message,
+            chat_history[:-1],     # all but the last user message
+            sys_msg,
+            max_t,
+            temp,
+            top,
+            freq_pen,
+            s,
+            custom_mod,
+            feat_model,
+        )
+        # Stream the tokens back
+        final_bot_msg = ""
+        for token_text in bot_messages:
+            final_bot_msg = token_text
+            # We'll update the chatbot in real-time
+            chat_history[-1] = (user_message, final_bot_msg)
+            yield chat_history
+    # Tie the Submit button to the user_submit function, and then to bot_respond
+    submit_btn.click(
+        user_submit,
+        inputs=[txt, state],
+        outputs=[txt, state],
+        queue=False
+    ).then(
+        bot_respond,
+        inputs=[state, system_message, max_tokens, temperature, top_p, frequency_penalty, seed, custom_model, featured_model],
+        outputs=[chatbot],
+        queue=True
+    )
+print("Interface construction complete. Ready to launch!")
+# Launch the Gradio Blocks interface
 if __name__ == "__main__":
     print("Launching the demo application.")
     demo.launch()