Nymbo commited on
Commit
db00df1
·
verified ·
1 Parent(s): c6bdd15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -33
app.py CHANGED
@@ -2,26 +2,22 @@ import gradio as gr
2
  from openai import OpenAI
3
  import os
4
 
5
- # A helper function to show pop-up (toast) messages in the Gradio interface
6
- # and also keep them in the console for debugging.
7
- # Note: gr.toast() only works during or after a Gradio event has started.
8
- # If this code runs at the global level (on import), the pop-ups may
9
- # not appear. They *will* appear for any messages triggered during
10
- # a Gradio event (e.g. when the user sends a message).
11
 
12
  def show_loading_status(msg):
13
- # Attempt to show pop-up via gr.toast (works when called inside a running Gradio event).
 
 
 
14
  try:
15
  gr.toast(msg)
16
  except:
17
- # If gr.toast() fails (e.g. called outside of an event), just ignore or pass
18
  pass
19
- # Also print to console for debugging
20
  print(msg)
21
 
22
- ACCESS_TOKEN = os.getenv("HF_TOKEN")
23
  show_loading_status("Access token loaded.")
24
 
 
25
  client = OpenAI(
26
  base_url="https://api-inference.huggingface.co/v1/",
27
  api_key=ACCESS_TOKEN,
@@ -40,7 +36,6 @@ def respond(
40
  seed,
41
  custom_model
42
  ):
43
-
44
  show_loading_status(f"Received message: {message}")
45
  show_loading_status(f"History: {history}")
46
  show_loading_status(f"System message: {system_message}")
@@ -70,37 +65,53 @@ def respond(
70
  messages.append({"role": "user", "content": message})
71
  show_loading_status("Latest user message appended.")
72
 
73
- # If user provided a model, use that; otherwise, fall back to a default model
74
  model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
75
  show_loading_status(f"Model selected for inference: {model_to_use}")
76
 
77
- # Start with an empty string to build the response as tokens stream in
78
- response = ""
79
  show_loading_status("Sending request to OpenAI API.")
80
 
81
- for message_chunk in client.chat.completions.create(
82
- model=model_to_use,
83
- max_tokens=max_tokens,
84
- stream=True,
85
- temperature=temperature,
86
- top_p=top_p,
87
- frequency_penalty=frequency_penalty,
88
- seed=seed,
89
- messages=messages,
90
- ):
91
- token_text = message_chunk.choices[0].delta.content
92
- show_loading_status(f"Received token: {token_text}")
93
- response += token_text
94
- yield response
95
-
96
- show_loading_status("Completed response generation.")
 
 
 
 
 
 
 
97
 
98
  # GRADIO UI
99
 
100
- chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", likeable=True, layout="panel")
 
 
 
 
 
 
101
  show_loading_status("Chatbot interface created.")
102
 
103
- system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
 
 
 
 
104
 
105
  max_tokens_slider = gr.Slider(
106
  minimum=1,
@@ -138,7 +149,6 @@ seed_slider = gr.Slider(
138
  label="Seed (-1 for random)"
139
  )
140
 
141
- # The custom_model_box is what the respond function sees as "custom_model"
142
  custom_model_box = gr.Textbox(
143
  value="",
144
  label="Custom Model",
 
2
  from openai import OpenAI
3
  import os
4
 
5
+ ACCESS_TOKEN = os.getenv("HF_TOKEN")
 
 
 
 
 
6
 
7
  def show_loading_status(msg):
8
+ """
9
+ This helper function attempts to show a pop-up (toast) message if called
10
+ during an active Gradio event. If that fails, we at least log to console.
11
+ """
12
  try:
13
  gr.toast(msg)
14
  except:
 
15
  pass
 
16
  print(msg)
17
 
 
18
  show_loading_status("Access token loaded.")
19
 
20
+ # Initialize the Hugging Face Inference-based OpenAI client
21
  client = OpenAI(
22
  base_url="https://api-inference.huggingface.co/v1/",
23
  api_key=ACCESS_TOKEN,
 
36
  seed,
37
  custom_model
38
  ):
 
39
  show_loading_status(f"Received message: {message}")
40
  show_loading_status(f"History: {history}")
41
  show_loading_status(f"System message: {system_message}")
 
65
  messages.append({"role": "user", "content": message})
66
  show_loading_status("Latest user message appended.")
67
 
68
+ # If user provided a model, use that; otherwise, fall back to a default
69
  model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
70
  show_loading_status(f"Model selected for inference: {model_to_use}")
71
 
72
+ response_text = ""
 
73
  show_loading_status("Sending request to OpenAI API.")
74
 
75
+ try:
76
+ for message_chunk in client.chat.completions.create(
77
+ model=model_to_use,
78
+ max_tokens=max_tokens,
79
+ stream=True,
80
+ temperature=temperature,
81
+ top_p=top_p,
82
+ frequency_penalty=frequency_penalty,
83
+ seed=seed,
84
+ messages=messages,
85
+ ):
86
+ # Each chunk is a piece of the streaming text
87
+ token_text = message_chunk.choices[0].delta.content
88
+ show_loading_status(f"Received token: {token_text}")
89
+ response_text += token_text
90
+ yield response_text
91
+
92
+ show_loading_status("Completed response generation.")
93
+
94
+ except Exception as e:
95
+ show_loading_status("Error encountered during completion streaming.")
96
+ raise gr.Error(f"An unexpected error occurred: {str(e)}")
97
+
98
 
99
  # GRADIO UI
100
 
101
+ chatbot = gr.Chatbot(
102
+ height=600,
103
+ show_copy_button=True,
104
+ placeholder="Select a model and begin chatting",
105
+ likeable=True,
106
+ layout="panel"
107
+ )
108
  show_loading_status("Chatbot interface created.")
109
 
110
+ system_message_box = gr.Textbox(
111
+ value="",
112
+ placeholder="You are a helpful assistant.",
113
+ label="System Prompt"
114
+ )
115
 
116
  max_tokens_slider = gr.Slider(
117
  minimum=1,
 
149
  label="Seed (-1 for random)"
150
  )
151
 
 
152
  custom_model_box = gr.Textbox(
153
  value="",
154
  label="Custom Model",