Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on 8 days ago

Commit

a05c183

verified ·

1 Parent(s): 6ee17e0

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -67

app.py CHANGED Viewed

@@ -2,11 +2,9 @@ import gradio as gr
 from openai import OpenAI
 import os
-# Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
-# Initialize the OpenAI client with the Hugging Face Inference API endpoint
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1/",
     api_key=ACCESS_TOKEN,
@@ -25,18 +23,6 @@ def respond(
     seed,
     custom_model
 ):
-    """
-    This function handles the chatbot response. It takes in:
-    - message: the user's new message
-    - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
-    - system_message: the system prompt
-    - max_tokens: the maximum number of tokens to generate in the response
-    - temperature: sampling temperature
-    - top_p: top-p (nucleus) sampling
-    - frequency_penalty: penalize repeated tokens in the output
-    - seed: a fixed seed for reproducibility; -1 will mean 'random'
-    - custom_model: the final model name in use, which may be set by selecting from the Featured Models radio or by typing a custom model
-    """
     print(f"Received message: {message}")
     print(f"History: {history}")
@@ -49,14 +35,13 @@ def respond(
     if seed == -1:
         seed = None
-    # Construct the messages array required by the API
     messages = [{"role": "system", "content": system_message}]
     print("Initial messages array constructed.")
     # Add conversation history to the context
     for val in history:
-        user_part = val[0]  # Extract user message from the tuple
-        assistant_part = val[1]  # Extract assistant message from the tuple
         if user_part:
             messages.append({"role": "user", "content": user_part})
             print(f"Added user message to context: {user_part}")
@@ -76,7 +61,6 @@ def respond(
     response = ""
     print("Sending request to OpenAI API.")
-    # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
         model=model_to_use,
         max_tokens=max_tokens,
@@ -87,7 +71,6 @@ def respond(
         seed=seed,
         messages=messages,
     ):
-        # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
@@ -95,17 +78,12 @@ def respond(
     print("Completed response generation.")
-# -------------------------
-# GRADIO UI CONFIGURATION
-# -------------------------
-# Create a Chatbot component with a specified height
 chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", likeable=True, layout="panel")
 print("Chatbot interface created.")
-# Create textboxes and sliders for system prompt, tokens, and other parameters
-system_message_box = gr.Textbox(value="", label="System message")
 max_tokens_slider = gr.Slider(
     minimum=1,
@@ -147,7 +125,8 @@ seed_slider = gr.Slider(
 custom_model_box = gr.Textbox(
     value="",
     label="Custom Model",
-    info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model."
 )
 def set_custom_model_from_radio(selected):
@@ -158,10 +137,6 @@ def set_custom_model_from_radio(selected):
     print(f"Featured model selected: {selected}")
     return selected
-# IMPORTANT: Because we have 1 main user input + 7 additional inputs,
-# each example should be an 8-item list: [user_text, system_prompt, max_tokens,
-# temperature, top_p, frequency_penalty, seed, custom_model].
 demo = gr.ChatInterface(
     fn=respond,
     additional_inputs=[
@@ -176,46 +151,11 @@ demo = gr.ChatInterface(
     fill_height=True,
     chatbot=chatbot,
     theme="Nymbo/Nymbo_Theme",
-    examples=[
-        # Example 1
-        [
-            "Howdy, partner!",      # user_text
-            "",                     # system_prompt
-            512,                    # max_tokens
-            0.7,                    # temperature
-            0.95,                   # top_p
-            0.0,                    # frequency_penalty
-            -1,                     # seed
-            ""                      # custom_model
-        ],
-        # Example 2
-        [
-            "What's your model name and who trained you?",
-            "",
-            512,
-            0.7,
-            0.95,
-            0.0,
-            -1,
-            ""
-        ],
-        # Example 3
-        [
-            "How many R's are there in 'Strawberry'?",
-            "",
-            512,
-            0.7,
-            0.95,
-            0.0,
-            -1,
-            ""
-        ],
-    ],
 )
 print("ChatInterface object created.")
 with demo:
-    with gr.Accordion("Featured Models", open=False):
         model_search_box = gr.Textbox(
             label="Filter Models",
             placeholder="Search for a featured model...",

 from openai import OpenAI
 import os
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1/",
     api_key=ACCESS_TOKEN,
     seed,
     custom_model
 ):
     print(f"Received message: {message}")
     print(f"History: {history}")
     if seed == -1:
         seed = None
     messages = [{"role": "system", "content": system_message}]
     print("Initial messages array constructed.")
     # Add conversation history to the context
     for val in history:
+        user_part = val[0]
+        assistant_part = val[1]
         if user_part:
             messages.append({"role": "user", "content": user_part})
             print(f"Added user message to context: {user_part}")
     response = ""
     print("Sending request to OpenAI API.")
     for message_chunk in client.chat.completions.create(
         model=model_to_use,
         max_tokens=max_tokens,
         seed=seed,
         messages=messages,
     ):
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
     print("Completed response generation.")
+# GRADIO UI
 chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", likeable=True, layout="panel")
 print("Chatbot interface created.")
+system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
 max_tokens_slider = gr.Slider(
     minimum=1,
 custom_model_box = gr.Textbox(
     value="",
     label="Custom Model",
+    info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
+    placeholder="meta-llama/Llama-3.3-70B-Instruct"
 )
 def set_custom_model_from_radio(selected):
     print(f"Featured model selected: {selected}")
     return selected
 demo = gr.ChatInterface(
     fn=respond,
     additional_inputs=[
     fill_height=True,
     chatbot=chatbot,
     theme="Nymbo/Nymbo_Theme",
 )
 print("ChatInterface object created.")
 with demo:
+    with gr.Accordion("Model Selection", open=False):
         model_search_box = gr.Textbox(
             label="Filter Models",
             placeholder="Search for a featured model...",