Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on 7 days ago

Commit

0ef95ea

verified ·

1 Parent(s): 8d5a7cf

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -78

app.py CHANGED Viewed

@@ -3,23 +3,13 @@ from openai import OpenAI
 import os
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
-def show_loading_status(msg):
-    try:
-        gr.toast(msg)
-    except:
-        pass
-    print(msg)
-show_loading_status("Access token loaded.")
-# Initialize the Hugging Face Inference-based OpenAI client
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1/",
     api_key=ACCESS_TOKEN,
 )
-show_loading_status("OpenAI client initialized.")
 def respond(
@@ -33,18 +23,20 @@ def respond(
     seed,
     custom_model
 ):
-    show_loading_status(f"Received message: {message}")
-    show_loading_status(f"History: {history}")
-    show_loading_status(f"System message: {system_message}")
-    show_loading_status(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
-    show_loading_status(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
-    show_loading_status(f"Selected model (custom_model): {custom_model}")
     # Convert seed to None if -1 (meaning random)
-    seed = seed if seed != -1 else random.randint(1, 1000000000),
     messages = [{"role": "system", "content": system_message}]
-    show_loading_status("Initial messages array constructed.")
     # Add conversation history to the context
     for val in history:
@@ -52,62 +44,46 @@ def respond(
         assistant_part = val[1]
         if user_part:
             messages.append({"role": "user", "content": user_part})
-            show_loading_status(f"Added user message to context: {user_part}")
         if assistant_part:
             messages.append({"role": "assistant", "content": assistant_part})
-            show_loading_status(f"Added assistant message to context: {assistant_part}")
     # Append the latest user message
     messages.append({"role": "user", "content": message})
-    show_loading_status("Latest user message appended.")
-    # If user provided a model, use that; otherwise, fall back to a default
     model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
-    show_loading_status(f"Model selected for inference: {model_to_use}")
-    response_text = ""
-    show_loading_status("Sending request to OpenAI API.")
-    try:
-        for message_chunk in client.chat.completions.create(
-            model=model_to_use,
-            max_tokens=max_tokens,
-            stream=True,
-            temperature=temperature,
-            top_p=top_p,
-            frequency_penalty=frequency_penalty,
-            seed=seed,
-            messages=messages,
-        ):
-            # Each chunk is a piece of the streaming text
-            token_text = message_chunk.choices[0].delta.content
-            show_loading_status(f"Received token: {token_text}")
-            response_text += token_text
-            yield response_text
-        show_loading_status("Completed response generation.")
-    except Exception as e:
-        show_loading_status("Error encountered during completion streaming.")
-        raise gr.Error(f"An unexpected error occurred: {str(e)}")
 # GRADIO UI
-chatbot = gr.Chatbot(
-    height=600,
-    show_copy_button=True,
-    placeholder="Select a model and begin chatting",
-    likeable=True,
-    layout="panel"
-)
-show_loading_status("Chatbot interface created.")
-system_message_box = gr.Textbox(
-    value="",
-    placeholder="You are a helpful assistant.",
-    label="System Prompt"
-)
 max_tokens_slider = gr.Slider(
     minimum=1,
@@ -139,21 +115,26 @@ frequency_penalty_slider = gr.Slider(
 )
 seed_slider = gr.Slider(
     minimum=-1,
-    maximum=1000000000,
     value=-1,
     step=1,
     label="Seed (-1 for random)"
 )
 custom_model_box = gr.Textbox(
     value="",
     label="Custom Model",
-    info="(Optional) Provide a custom Hugging Face model path. Supports Warm and Cold models.",
     placeholder="meta-llama/Llama-3.3-70B-Instruct"
 )
 def set_custom_model_from_radio(selected):
-    show_loading_status(f"Featured model selected: {selected}")
     return selected
 demo = gr.ChatInterface(
@@ -171,7 +152,7 @@ demo = gr.ChatInterface(
     chatbot=chatbot,
     theme="Nymbo/Nymbo_Theme",
 )
-show_loading_status("ChatInterface object created.")
 with demo:
     with gr.Accordion("Model Selection", open=False):
@@ -180,7 +161,7 @@ with demo:
             placeholder="Search for a featured model...",
             lines=1
         )
-        show_loading_status("Model search box created.")
         models_list = [
             "meta-llama/Llama-3.3-70B-Instruct",
@@ -188,15 +169,20 @@ with demo:
             "meta-llama/Llama-3.2-1B-Instruct",
             "meta-llama/Llama-3.1-8B-Instruct",
             "NousResearch/Hermes-3-Llama-3.1-8B",
             "mistralai/Mistral-Nemo-Instruct-2407",
             "mistralai/Mixtral-8x7B-Instruct-v0.1",
             "mistralai/Mistral-7B-Instruct-v0.3",
             "Qwen/Qwen2.5-72B-Instruct",
             "Qwen/QwQ-32B-Preview",
             "HuggingFaceTB/SmolLM2-1.7B-Instruct",
             "microsoft/Phi-3.5-mini-instruct",
         ]
-        show_loading_status("Models list initialized.")
         featured_model_radio = gr.Radio(
             label="Select a model below",
@@ -204,12 +190,12 @@ with demo:
             value="meta-llama/Llama-3.3-70B-Instruct",
             interactive=True
         )
-        show_loading_status("Featured models radio button created.")
         def filter_models(search_term):
-            show_loading_status(f"Filtering models with search term: {search_term}")
             filtered = [m for m in models_list if search_term.lower() in m.lower()]
-            show_loading_status(f"Filtered models: {filtered}")
             return gr.update(choices=filtered)
         model_search_box.change(
@@ -217,17 +203,17 @@ with demo:
             inputs=model_search_box,
             outputs=featured_model_radio
         )
-        show_loading_status("Model search box change event linked.")
         featured_model_radio.change(
             fn=set_custom_model_from_radio,
             inputs=featured_model_radio,
             outputs=custom_model_box
         )
-        show_loading_status("Featured model radio button change event linked.")
-show_loading_status("Gradio interface initialized.")
 if __name__ == "__main__":
-    show_loading_status("Launching the demo application.")
     demo.launch()

 import os
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
+print("Access token loaded.")
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1/",
     api_key=ACCESS_TOKEN,
 )
+print("OpenAI client initialized.")
 def respond(
     seed,
     custom_model
 ):
+    print(f"Received message: {message}")
+    print(f"History: {history}")
+    print(f"System message: {system_message}")
+    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
+    print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
+    print(f"Selected model (custom_model): {custom_model}")
     # Convert seed to None if -1 (meaning random)
+    if seed == -1:
+        seed = None
     messages = [{"role": "system", "content": system_message}]
+    print("Initial messages array constructed.")
     # Add conversation history to the context
     for val in history:
         assistant_part = val[1]
         if user_part:
             messages.append({"role": "user", "content": user_part})
+            print(f"Added user message to context: {user_part}")
         if assistant_part:
             messages.append({"role": "assistant", "content": assistant_part})
+            print(f"Added assistant message to context: {assistant_part}")
     # Append the latest user message
     messages.append({"role": "user", "content": message})
+    print("Latest user message appended.")
+    # If user provided a model, use that; otherwise, fall back to a default model
     model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
+    print(f"Model selected for inference: {model_to_use}")
+    # Start with an empty string to build the response as tokens stream in
+    response = ""
+    print("Sending request to OpenAI API.")
+    for message_chunk in client.chat.completions.create(
+        model=model_to_use,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+        frequency_penalty=frequency_penalty,
+        seed=seed,
+        messages=messages,
+    ):
+        token_text = message_chunk.choices[0].delta.content
+        print(f"Received token: {token_text}")
+        response += token_text
+        yield response
+    print("Completed response generation.")
 # GRADIO UI
+chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", likeable=True, layout="panel")
+print("Chatbot interface created.")
+system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
 max_tokens_slider = gr.Slider(
     minimum=1,
 )
 seed_slider = gr.Slider(
     minimum=-1,
+    maximum=65535,
     value=-1,
     step=1,
     label="Seed (-1 for random)"
 )
+# The custom_model_box is what the respond function sees as "custom_model"
 custom_model_box = gr.Textbox(
     value="",
     label="Custom Model",
+    info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
     placeholder="meta-llama/Llama-3.3-70B-Instruct"
 )
 def set_custom_model_from_radio(selected):
+    """
+    This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
+    We will update the Custom Model text box with that selection automatically.
+    """
+    print(f"Featured model selected: {selected}")
     return selected
 demo = gr.ChatInterface(
     chatbot=chatbot,
     theme="Nymbo/Nymbo_Theme",
 )
+print("ChatInterface object created.")
 with demo:
     with gr.Accordion("Model Selection", open=False):
             placeholder="Search for a featured model...",
             lines=1
         )
+        print("Model search box created.")
         models_list = [
             "meta-llama/Llama-3.3-70B-Instruct",
             "meta-llama/Llama-3.2-1B-Instruct",
             "meta-llama/Llama-3.1-8B-Instruct",
             "NousResearch/Hermes-3-Llama-3.1-8B",
+            "google/gemma-2-27b-it",
+            "google/gemma-2-9b-it",
+            "google/gemma-2-2b-it",
             "mistralai/Mistral-Nemo-Instruct-2407",
             "mistralai/Mixtral-8x7B-Instruct-v0.1",
             "mistralai/Mistral-7B-Instruct-v0.3",
             "Qwen/Qwen2.5-72B-Instruct",
             "Qwen/QwQ-32B-Preview",
+            "PowerInfer/SmallThinker-3B-Preview",
             "HuggingFaceTB/SmolLM2-1.7B-Instruct",
+            "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
             "microsoft/Phi-3.5-mini-instruct",
         ]
+        print("Models list initialized.")
         featured_model_radio = gr.Radio(
             label="Select a model below",
             value="meta-llama/Llama-3.3-70B-Instruct",
             interactive=True
         )
+        print("Featured models radio button created.")
         def filter_models(search_term):
+            print(f"Filtering models with search term: {search_term}")
             filtered = [m for m in models_list if search_term.lower() in m.lower()]
+            print(f"Filtered models: {filtered}")
             return gr.update(choices=filtered)
         model_search_box.change(
             inputs=model_search_box,
             outputs=featured_model_radio
         )
+        print("Model search box change event linked.")
         featured_model_radio.change(
             fn=set_custom_model_from_radio,
             inputs=featured_model_radio,
             outputs=custom_model_box
         )
+        print("Featured model radio button change event linked.")
+print("Gradio interface initialized.")
 if __name__ == "__main__":
+    print("Launching the demo application.")
     demo.launch()