Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on 8 days ago

Commit

c5a20a4

verified ·

1 Parent(s): fab24df

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -187

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
-import os
 from openai import OpenAI
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
@@ -22,8 +22,7 @@ def respond(
     top_p,
     frequency_penalty,
     seed,
-    custom_model,
-    featured_model
 ):
     """
     This function handles the chatbot response. It takes in:
@@ -35,8 +34,7 @@ def respond(
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
-    - custom_model: a user-provided custom model name (if any)
-    - featured_model: the user-selected model from the radio
     """
     print(f"Received message: {message}")
@@ -45,16 +43,15 @@ def respond(
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
-    print(f"Featured model: {featured_model}")
-    # Convert seed to None if -1 (meaning "random")
     if seed == -1:
         seed = None
-    # Construct the conversation array required by the HF Inference API
-    messages = [{"role": "system", "content": system_message or ""}]
-    # Add conversation history
     for val in history:
         user_part = val[0]
         assistant_part = val[1]
@@ -65,26 +62,22 @@ def respond(
             messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
-    # The latest user message
     messages.append({"role": "user", "content": message})
-    # If custom_model is not empty, it overrides the featured model
-    model_to_use = custom_model.strip() if custom_model.strip() != "" else featured_model.strip()
-    # If somehow both are empty, default to an example model
-    if model_to_use == "":
-        model_to_use = "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
-    # Build the response from the streaming tokens
     response = ""
     print("Sending request to OpenAI API.")
-    # Streaming request to the HF Inference API
     for message_chunk in client.chat.completions.create(
-        model=model_to_use,
         max_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
@@ -95,178 +88,68 @@ def respond(
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
-        # Yield partial response so Gradio can display in real-time
         yield response
     print("Completed response generation.")
-#
-# Building the Gradio interface below
-#
-print("Building the Gradio interface with advanced features...")
-# --- Create a list of 'Featured Models' for demonstration. You can customize as you wish. ---
-models_list = (
-    "meta-llama/Llama-3.3-70B-Instruct",
-    "BigScience/bloom",
-    "openai/gpt-4",
-    "google/flan-t5-xxl",
-    "EleutherAI/gpt-j-6B",
-    "YourSpecialModel/awesome-13B",
 )
-# This function filters the above models_list by a given search term:
-def filter_models(search_term):
-    filtered = [m for m in models_list if search_term.lower() in m.lower()]
-    return gr.update(choices=filtered)
-# We’ll create a Chatbot in a Blocks layout to incorporate an Accordion for "Featured Models"
-with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
-    gr.Markdown("## Serverless-TextGen-Hub\nA comprehensive UI for text generation, including featured models and custom model overrides.")
-    # The Chatbot itself
-    chatbot = gr.Chatbot(label="TextGen Chatbot", height=600)
-    with gr.Row():
-        with gr.Column(scale=1):
-            # We create interactive UI elements that will feed into the 'respond' function
-            # System message
-            system_message = gr.Textbox(label="System message", placeholder="Set the system role instructions here.")
-            # Accordion for selecting the model
-            with gr.Accordion("Featured Models", open=True):
-                model_search = gr.Textbox(
-                    label="Filter Models",
-                    placeholder="Search for a featured model...",
-                    lines=1
-                )
-                featured_model = gr.Radio(
-                    label="Select a Featured Model Below",
-                    choices=models_list,
-                    value="meta-llama/Llama-3.3-70B-Instruct",  # default
-                    interactive=True,
-                )
-                # Link the search box to filter the radio model choices
-                model_search.change(filter_models, inputs=model_search, outputs=featured_model)
-            # A text box to optionally override the featured model
-            custom_model = gr.Textbox(
-                label="Custom Model",
-                info="(Optional) Provide a custom HF model path. If non-empty, it overrides your featured model choice."
-            )
-            # Sliders
-            max_tokens = gr.Slider(
-                minimum=1,
-                maximum=4096,
-                value=512,
-                step=1,
-                label="Max new tokens"
-            )
-            temperature = gr.Slider(
-                minimum=0.1,
-                maximum=4.0,
-                value=0.7,
-                step=0.1,
-                label="Temperature"
-            )
-            top_p = gr.Slider(
-                minimum=0.1,
-                maximum=1.0,
-                value=0.95,
-                step=0.05,
-                label="Top-P"
-            )
-            frequency_penalty = gr.Slider(
-                minimum=-2.0,
-                maximum=2.0,
-                value=0.0,
-                step=0.1,
-                label="Frequency Penalty"
-            )
-            seed = gr.Slider(
-                minimum=-1,
-                maximum=65535,
-                value=-1,
-                step=1,
-                label="Seed (-1 for random)"
-            )
-        # The "chat" Column
-        with gr.Column(scale=2):
-            # We store the conversation history in a state variable
-            state = gr.State([])  # Each element in state is (user_message, assistant_message)
-            # Chat input box for the user
-            with gr.Row():
-                txt = gr.Textbox(
-                    label="Enter your message",
-                    placeholder="Type your request here, then press 'Submit'",
-                    lines=3
-                )
-            # Button to submit the message
-            submit_btn = gr.Button("Submit", variant="primary")
-    #
-    # The 'respond' function is tied to the chatbot display.
-    # We'll define a small wrapper that updates the 'history' (state) each time.
-    #
-    def user_submit(user_message, chat_history):
-        """
-        This function just adds the user message to the history and returns it.
-        The actual text generation will come from 'bot_respond' next.
-        """
-        # Append new user message to the existing conversation
-        chat_history = chat_history + [(user_message, None)]
-        return "", chat_history
-    def bot_respond(chat_history, sys_msg, max_t, temp, top, freq_pen, s, custom_mod, feat_model):
-        """
-        This function calls our 'respond' generator to get the text.
-        It updates the last message in chat_history with the bot's response as it streams.
-        """
-        user_message = chat_history[-1][0] if len(chat_history) > 0 else ""
-        # We call the generator
-        bot_messages = respond(
-            user_message,
-            chat_history[:-1],     # all but the last user message
-            sys_msg,
-            max_t,
-            temp,
-            top,
-            freq_pen,
-            s,
-            custom_mod,
-            feat_model,
-        )
-        # Stream the tokens back
-        final_bot_msg = ""
-        for token_text in bot_messages:
-            final_bot_msg = token_text
-            # We'll update the chatbot in real-time
-            chat_history[-1] = (user_message, final_bot_msg)
-            yield chat_history
-    # Tie the Submit button to the user_submit function, and then to bot_respond
-    submit_btn.click(
-        user_submit,
-        inputs=[txt, state],
-        outputs=[txt, state],
-        queue=False
-    ).then(
-        bot_respond,
-        inputs=[state, system_message, max_tokens, temperature, top_p, frequency_penalty, seed, custom_model, featured_model],
-        outputs=[chatbot],
-        queue=True
-    )
-print("Interface construction complete. Ready to launch!")
-# Launch the Gradio Blocks interface
 if __name__ == "__main__":
     print("Launching the demo application.")
     demo.launch()

 import gradio as gr
 from openai import OpenAI
+import os
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
     top_p,
     frequency_penalty,
     seed,
+    custom_model
 ):
     """
     This function handles the chatbot response. It takes in:
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
+    - custom_model: the user-provided custom model name (if any)
     """
     print(f"Received message: {message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
+    # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
+    # Construct the messages array required by the API
+    messages = [{"role": "system", "content": system_message}]
+    # Add conversation history to the context
     for val in history:
         user_part = val[0]
         assistant_part = val[1]
             messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
+    # Append the latest user message
     messages.append({"role": "user", "content": message})
+    # Determine which model to use: either custom_model or a default
+    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
+    # Start with an empty string to build the response as tokens stream in
     response = ""
     print("Sending request to OpenAI API.")
+    # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
+        model=model_to_use,              # Use either the user-provided custom model or default
         max_tokens=max_tokens,
+        stream=True,                     # Stream the response
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
+        # Yield the partial response to Gradio so it can display in real-time
         yield response
     print("Completed response generation.")
+# Create a Chatbot component with a specified height
+chatbot = gr.Chatbot(height=600)
+print("Chatbot interface created.")
+# Create the Gradio ChatInterface
+# We add two new sliders for Frequency Penalty, Seed, and now a new "Custom Model" text box.
+demo = gr.ChatInterface(
+    fn=respond,
+    additional_inputs=[
+        gr.Textbox(value="", label="System message"),
+        gr.Slider(
+            minimum=1,
+            maximum=4096,
+            value=512,
+            step=1,
+            label="Max new tokens"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=4.0,
+            value=0.7,
+            step=0.1,
+            label="Temperature"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-P"
+        ),
+        gr.Slider(
+            minimum=-2.0,
+            maximum=2.0,
+            value=0.0,
+            step=0.1,
+            label="Frequency Penalty"
+        ),
+        gr.Slider(
+            minimum=-1,
+            maximum=65535,  # Arbitrary upper limit for demonstration
+            value=-1,
+            step=1,
+            label="Seed (-1 for random)"
+        ),
+        gr.Textbox(
+            value="",
+            label="Custom Model",
+            info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty."
+        ),
+    ],
+    fill_height=True,
+    chatbot=chatbot,
+    theme="Nymbo/Nymbo_Theme",
 )
+print("Gradio interface initialized.")
 if __name__ == "__main__":
     print("Launching the demo application.")
     demo.launch()