Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on 8 days ago

Commit

db00df1

verified ·

1 Parent(s): c6bdd15

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -33

app.py CHANGED Viewed

@@ -2,26 +2,22 @@ import gradio as gr
 from openai import OpenAI
 import os
-# A helper function to show pop-up (toast) messages in the Gradio interface
-# and also keep them in the console for debugging.
-# Note: gr.toast() only works during or after a Gradio event has started.
-#       If this code runs at the global level (on import), the pop-ups may
-#       not appear. They *will* appear for any messages triggered during
-#       a Gradio event (e.g. when the user sends a message).
 def show_loading_status(msg):
-    # Attempt to show pop-up via gr.toast (works when called inside a running Gradio event).
     try:
         gr.toast(msg)
     except:
-        # If gr.toast() fails (e.g. called outside of an event), just ignore or pass
         pass
-    # Also print to console for debugging
     print(msg)
-ACCESS_TOKEN = os.getenv("HF_TOKEN")
 show_loading_status("Access token loaded.")
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1/",
     api_key=ACCESS_TOKEN,
@@ -40,7 +36,6 @@ def respond(
     seed,
     custom_model
 ):
     show_loading_status(f"Received message: {message}")
     show_loading_status(f"History: {history}")
     show_loading_status(f"System message: {system_message}")
@@ -70,37 +65,53 @@ def respond(
     messages.append({"role": "user", "content": message})
     show_loading_status("Latest user message appended.")
-    # If user provided a model, use that; otherwise, fall back to a default model
     model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     show_loading_status(f"Model selected for inference: {model_to_use}")
-    # Start with an empty string to build the response as tokens stream in
-    response = ""
     show_loading_status("Sending request to OpenAI API.")
-    for message_chunk in client.chat.completions.create(
-        model=model_to_use,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-        frequency_penalty=frequency_penalty,
-        seed=seed,
-        messages=messages,
-    ):
-        token_text = message_chunk.choices[0].delta.content
-        show_loading_status(f"Received token: {token_text}")
-        response += token_text
-        yield response
-    show_loading_status("Completed response generation.")
 # GRADIO UI
-chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", likeable=True, layout="panel")
 show_loading_status("Chatbot interface created.")
-system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
 max_tokens_slider = gr.Slider(
     minimum=1,
@@ -138,7 +149,6 @@ seed_slider = gr.Slider(
     label="Seed (-1 for random)"
 )
-# The custom_model_box is what the respond function sees as "custom_model"
 custom_model_box = gr.Textbox(
     value="",
     label="Custom Model",

 from openai import OpenAI
 import os
+ACCESS_TOKEN = os.getenv("HF_TOKEN")
 def show_loading_status(msg):
+    """
+    This helper function attempts to show a pop-up (toast) message if called
+    during an active Gradio event. If that fails, we at least log to console.
+    """
     try:
         gr.toast(msg)
     except:
         pass
     print(msg)
 show_loading_status("Access token loaded.")
+# Initialize the Hugging Face Inference-based OpenAI client
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1/",
     api_key=ACCESS_TOKEN,
     seed,
     custom_model
 ):
     show_loading_status(f"Received message: {message}")
     show_loading_status(f"History: {history}")
     show_loading_status(f"System message: {system_message}")
     messages.append({"role": "user", "content": message})
     show_loading_status("Latest user message appended.")
+    # If user provided a model, use that; otherwise, fall back to a default
     model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     show_loading_status(f"Model selected for inference: {model_to_use}")
+    response_text = ""
     show_loading_status("Sending request to OpenAI API.")
+    try:
+        for message_chunk in client.chat.completions.create(
+            model=model_to_use,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+            frequency_penalty=frequency_penalty,
+            seed=seed,
+            messages=messages,
+        ):
+            # Each chunk is a piece of the streaming text
+            token_text = message_chunk.choices[0].delta.content
+            show_loading_status(f"Received token: {token_text}")
+            response_text += token_text
+            yield response_text
+        show_loading_status("Completed response generation.")
+    except Exception as e:
+        show_loading_status("Error encountered during completion streaming.")
+        raise gr.Error(f"An unexpected error occurred: {str(e)}")
 # GRADIO UI
+chatbot = gr.Chatbot(
+    height=600,
+    show_copy_button=True,
+    placeholder="Select a model and begin chatting",
+    likeable=True,
+    layout="panel"
+)
 show_loading_status("Chatbot interface created.")
+system_message_box = gr.Textbox(
+    value="",
+    placeholder="You are a helpful assistant.",
+    label="System Prompt"
+)
 max_tokens_slider = gr.Slider(
     minimum=1,
     label="Seed (-1 for random)"
 )
 custom_model_box = gr.Textbox(
     value="",
     label="Custom Model",