Spaces:
Running
Running
File size: 10,671 Bytes
038f313 4c18bfc 77298b9 038f313 880ced6 e13eb1b 038f313 e13eb1b 038f313 e13eb1b 038f313 77298b9 038f313 77298b9 69b4a5f 038f313 3a64d68 98674ca 77298b9 038f313 e13eb1b 52ad57a 77298b9 52ad57a 10ffb1d 77298b9 e13eb1b 10ffb1d 77298b9 86297f5 77298b9 f7c4208 52ad57a 038f313 77298b9 e7683ca 77298b9 e7683ca 77298b9 e7683ca 77298b9 10ffb1d 77298b9 f7c4208 77298b9 542c2ac e13eb1b f7c4208 77298b9 e7683ca 77298b9 e7683ca 8696822 77298b9 10ffb1d e7683ca 77298b9 e7683ca 10ffb1d e7683ca 77298b9 e7683ca 77298b9 e7683ca 77298b9 10ffb1d e7683ca 77298b9 10ffb1d e7683ca 77298b9 e7683ca 10ffb1d 77298b9 10ffb1d 77298b9 e7683ca 77298b9 e7683ca 77298b9 e7683ca 77298b9 e7683ca 77298b9 e7683ca 10ffb1d 77298b9 e7683ca 77298b9 e7683ca 77298b9 10ffb1d 77298b9 10ffb1d 77298b9 e7683ca 77298b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 |
import gradio as gr
import os
from openai import OpenAI
################################################
# INITIAL SETUP
################################################
# Retrieve the access token from the environment variable
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")
# Initialize the OpenAI client with the Hugging Face Inference API endpoint
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")
# Our main response-generating function
def respond(
user_message,
history,
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed,
featured_model,
custom_model
):
"""
This function handles the chatbot response. It takes in:
- user_message: the user's new message
- history: the list of previous messages, each as [user_text, assistant_text]
- system_message: the system prompt
- max_tokens: the maximum number of tokens to generate in the response
- temperature: sampling temperature
- top_p: top-p (nucleus) sampling
- frequency_penalty: penalize repeated tokens in the output
- seed: a fixed seed for reproducibility; -1 will mean 'random'
- featured_model: the user-chosen model from the radio button
- custom_model: a user-specified custom model that overrides featured_model if not empty
"""
print(f"New user message: {user_message}")
print(f"History so far: {history}")
print(f"System message: {system_message}")
print(f"max_tokens: {max_tokens}, temperature: {temperature}, top_p: {top_p}")
print(f"frequency_penalty: {frequency_penalty}, seed: {seed}")
print(f"Featured Model: {featured_model}")
print(f"Custom Model: {custom_model}")
# Convert seed to None if -1 (meaning random)
if seed == -1:
seed = None
# Determine which model to use
# If the user typed something in custom_model, that overrides the featured model
# Otherwise we use the model selected in the radio. If neither, default to the example "meta-llama..."
model_to_use = None
if custom_model.strip():
model_to_use = custom_model.strip()
elif featured_model is not None and featured_model.strip():
model_to_use = featured_model.strip()
else:
model_to_use = "meta-llama/Llama-3.3-70B-Instruct"
print(f"Model selected for inference: {model_to_use}")
# Construct the conversation messages for the HF Inference API
messages = [{"role": "system", "content": system_message}]
for user_text, assistant_text in history:
if user_text:
messages.append({"role": "user", "content": user_text})
if assistant_text:
messages.append({"role": "assistant", "content": assistant_text})
messages.append({"role": "user", "content": user_message})
# We'll collect and stream the response
response_so_far = ""
# Make the streaming request to the HF Inference API
print("Sending request to OpenAI/Hugging Face Inference API...")
for message_chunk in client.chat.completions.create(
model=model_to_use,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
seed=seed,
messages=messages,
):
# The content for the partial chunk
token_text = message_chunk.choices[0].delta.content
response_so_far += token_text
# Return partial response to Gradio to display in real-time
yield response_so_far
print("Completed response generation.")
################################################
# GRADIO UI + STATE MANAGEMENT
################################################
def user_submit(user_message, history):
"""
This function is called when the user sends a message.
We simply add the user message to the conversation history.
"""
print("user_submit triggered.")
# Append the new user message to history
if not history:
history = []
history = history + [[user_message, None]]
return history, ""
def bot_reply(history, system_message, max_tokens, temperature, top_p,
frequency_penalty, seed, featured_model, custom_model):
"""
This function is triggered to produce the bot's response after the user has submitted.
We call 'respond' for streaming text.
"""
print("bot_reply triggered.")
# The last conversation item has user_message, None
user_message = history[-1][0]
# We will stream the partial responses from 'respond'
bot_response = respond(
user_message=user_message,
history=history[:-1], # all items except the last, because we pass the last user msg separately
system_message=system_message,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
seed=seed,
featured_model=featured_model,
custom_model=custom_model
)
# As we yield from the generator, we update the last item in history with the partial response
# Gradio streaming logic: yield the partial updates as they come in
for partial_text in bot_response:
history[-1][1] = partial_text
yield history
# We define a small list of placeholder featured models for demonstration
models_list = [
"meta-llama/Llama-2-13B-Chat-hf",
"bigscience/bloom",
"EleutherAI/gpt-neo-2.7B",
"meta-llama/Llama-3.3-70B-Instruct"
]
def filter_models(search_term):
"""
Filter function triggered when user types in the model_search box.
Returns an updated list of models that contain the search term.
"""
filtered = [m for m in models_list if search_term.lower() in m.lower()]
return gr.update(choices=filtered)
################################################
# BUILDING THE GRADIO LAYOUT
################################################
with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
gr.Markdown(
"""
# Serverless-TextGen-Hub
**A UI for text generation using Hugging Face's Inference API.**
Below is a simple chat interface. You can pick from **Featured Models** or specify a **Custom Model**
to override the choice. If you're not sure, just use the default.
"""
)
# State to hold the conversation history, will be a list of [user, bot]
conversation_state = gr.State([])
# Row for system message + advanced settings
with gr.Accordion("Advanced Settings", open=False):
system_message = gr.Textbox(
label="System Message",
value="You are a helpful assistant.",
lines=2,
info="Provides background or personality instructions to the model."
)
max_tokens = gr.Slider(
minimum=1,
maximum=4096,
value=512,
step=1,
label="Max new tokens"
)
temperature = gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P"
)
frequency_penalty = gr.Slider(
minimum=-2.0,
maximum=2.0,
value=0.0,
step=0.1,
label="Frequency Penalty"
)
seed = gr.Slider(
minimum=-1,
maximum=65535,
value=-1,
step=1,
label="Seed (-1 for random)"
)
# Featured Models + filtering
with gr.Accordion("Featured Models", open=False):
model_search = gr.Textbox(
label="Filter Models",
placeholder="Search for a featured model...",
lines=1
)
featured_model_radio = gr.Radio(
label="Select a featured model below",
choices=models_list,
value=models_list[0], # default selection
interactive=True
)
model_search.change(
filter_models,
inputs=model_search,
outputs=featured_model_radio
)
# This is the Custom Model box (overrides Featured Models if not empty)
custom_model = gr.Textbox(
label="Custom Model",
value="",
info="(Optional) Provide a custom HF model path. If not empty, it overrides the Featured Model."
)
# The main Chatbot interface
chatbot = gr.Chatbot(height=600)
# Textbox for the user to type a new message
with gr.Row():
user_input = gr.Textbox(
show_label=False,
placeholder="Type your message here (press enter or click 'Submit')",
lines=2
)
submit_btn = gr.Button("Submit", variant="primary")
# The user submits -> we update the conversation state
submit_btn.click(
fn=user_submit,
inputs=[user_input, conversation_state],
outputs=[conversation_state, user_input],
)
# Then the bot replies, streaming the output
# We pass all required arguments from the advanced settings, plus the model selection boxes
submit_btn.click(
fn=bot_reply,
inputs=[
conversation_state,
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed,
featured_model_radio,
custom_model
],
outputs=[chatbot],
# 'bot_reply' is a generator, so we set streaming=True:
queue=True
)
# We also allow pressing Enter in user_input to do the same thing
user_input.submit(
fn=user_submit,
inputs=[user_input, conversation_state],
outputs=[conversation_state, user_input],
)
user_input.submit(
fn=bot_reply,
inputs=[
conversation_state,
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed,
featured_model_radio,
custom_model
],
outputs=[chatbot],
queue=True
)
gr.HTML("""
<br>
<p style='text-align:center;'>
Developed by <strong>Nymbo</strong>.
Powered by <strong>Hugging Face Inference API</strong>.
</p>
""")
# Finally, launch the app
if __name__ == "__main__":
print("Launching the Serverless-TextGen-Hub application...")
demo.launch() |