Nymbo commited on
Commit
52ad57a
·
verified ·
1 Parent(s): c20c4dd

OKAY LETS SIMPLIFY THS LOL

Browse files
Files changed (1) hide show
  1. app.py +78 -82
app.py CHANGED
@@ -21,21 +21,29 @@ def respond(
21
  temperature,
22
  top_p,
23
  frequency_penalty,
24
- seed,
25
- model,
26
- custom_model
27
  ):
28
  """
29
- Handles the chatbot response with given parameters.
 
 
 
 
 
 
 
 
30
  """
 
31
  print(f"Received message: {message}")
32
  print(f"History: {history}")
33
  print(f"System message: {system_message}")
34
- print(f"Model: {model}, Custom Model: {custom_model}")
 
35
 
36
- # Use custom model if provided, else use selected model
37
- selected_model = custom_model.strip() if custom_model.strip() else model
38
- print(f"Selected model: {selected_model}")
39
 
40
  # Construct the messages array required by the API
41
  messages = [{"role": "system", "content": system_message}]
@@ -58,107 +66,95 @@ def respond(
58
  response = ""
59
  print("Sending request to OpenAI API.")
60
 
61
- # Make the streaming request to the HF Inference API via OpenAI-like client
62
  for message_chunk in client.chat.completions.create(
63
- model=selected_model,
64
  max_tokens=max_tokens,
65
- stream=True,
66
  temperature=temperature,
67
  top_p=top_p,
68
- frequency_penalty=frequency_penalty,
69
- seed=seed if seed != -1 else None,
70
  messages=messages,
71
  ):
72
  # Extract the token text from the response chunk
73
  token_text = message_chunk.choices[0].delta.content
74
  print(f"Received token: {token_text}")
75
  response += token_text
 
76
  yield response
77
 
78
  print("Completed response generation.")
79
 
80
- # Create a Chatbot component
81
  chatbot = gr.Chatbot(height=600)
82
  print("Chatbot interface created.")
83
 
84
- # Define the featured models for the dropdown
85
- models_list = [
86
- "meta-llama/Llama-3.3-70B-Instruct",
87
- "bigscience/bloom-176b",
88
- "gpt-j-6b",
89
- "opt-30b",
90
- "flan-t5-xxl",
91
  ]
92
 
93
- # Function to filter models based on user input
94
  def filter_models(search_term):
95
- return [m for m in models_list if search_term.lower() in m.lower()]
 
 
 
 
 
 
 
 
 
96
 
97
- # Gradio interface
98
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
99
- with gr.Row():
100
- chatbot = gr.Chatbot(height=600)
101
-
102
- with gr.Tab("Chat Interface"):
103
- with gr.Row():
104
- user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
105
- with gr.Row():
106
- system_message = gr.Textbox(value="", label="System Message")
107
- with gr.Row():
108
- max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max Tokens")
109
- temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
110
- with gr.Row():
111
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-P")
112
- frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
113
- seed = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
114
- with gr.Row():
115
- model = gr.Dropdown(label="Select a Model", choices=models_list, value="meta-llama/Llama-3.3-70B-Instruct")
116
- custom_model = gr.Textbox(label="Custom Model", placeholder="Enter custom model path")
117
- with gr.Row():
118
- run_button = gr.Button("Generate Response")
119
-
120
- with gr.Tab("Information"):
121
- with gr.Accordion("Featured Models", open=False):
122
- gr.HTML(
123
- """
124
- <table>
125
- <tr><th>Model Name</th><th>Description</th></tr>
126
- <tr><td>meta-llama/Llama-3.3-70B-Instruct</td><td>Instruction-tuned LLaMA model</td></tr>
127
- <tr><td>bigscience/bloom-176b</td><td>Multilingual large language model</td></tr>
128
- <tr><td>gpt-j-6b</td><td>Open-source GPT model</td></tr>
129
- <tr><td>opt-30b</td><td>Meta's OPT model</td></tr>
130
- <tr><td>flan-t5-xxl</td><td>Google's Flan-tuned T5 XXL</td></tr>
131
- </table>
132
- """
133
- )
134
- with gr.Accordion("Parameters Overview", open=False):
135
- gr.Markdown(
136
- """
137
- ### Parameters Overview
138
- - **Max Tokens**: Maximum number of tokens in the response.
139
- - **Temperature**: Controls the randomness of responses. Lower values make the output more deterministic.
140
- - **Top-P**: Controls the diversity of responses by limiting the token selection to a probability mass.
141
- - **Frequency Penalty**: Penalizes repeated tokens in the output.
142
- - **Seed**: Fixes randomness for reproducibility. Use -1 for a random seed.
143
- """
144
- )
145
-
146
- run_button.click(
147
- respond,
148
- inputs=[
149
- user_input,
150
- chatbot.state,
151
  system_message,
152
  max_tokens,
153
  temperature,
154
  top_p,
155
  frequency_penalty,
156
- seed,
157
- model,
158
- custom_model
159
  ],
160
- outputs=chatbot
 
 
 
 
161
  )
162
 
163
- print("Launching the demo application.")
164
- demo.launch()
 
 
 
 
21
  temperature,
22
  top_p,
23
  frequency_penalty,
24
+ seed
 
 
25
  ):
26
  """
27
+ This function handles the chatbot response. It takes in:
28
+ - message: the user's new message
29
+ - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
30
+ - system_message: the system prompt
31
+ - max_tokens: the maximum number of tokens to generate in the response
32
+ - temperature: sampling temperature
33
+ - top_p: top-p (nucleus) sampling
34
+ - frequency_penalty: penalize repeated tokens in the output
35
+ - seed: a fixed seed for reproducibility; -1 will mean 'random'
36
  """
37
+
38
  print(f"Received message: {message}")
39
  print(f"History: {history}")
40
  print(f"System message: {system_message}")
41
+ print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
42
+ print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
43
 
44
+ # Convert seed to None if -1 (meaning random)
45
+ if seed == -1:
46
+ seed = None
47
 
48
  # Construct the messages array required by the API
49
  messages = [{"role": "system", "content": system_message}]
 
66
  response = ""
67
  print("Sending request to OpenAI API.")
68
 
69
+ # Make the streaming request to the HF Inference API via openai-like client
70
  for message_chunk in client.chat.completions.create(
71
+ model="meta-llama/Llama-3.3-70B-Instruct", # You can update this to your specific model
72
  max_tokens=max_tokens,
73
+ stream=True, # Stream the response
74
  temperature=temperature,
75
  top_p=top_p,
76
+ frequency_penalty=frequency_penalty, # <-- NEW
77
+ seed=seed, # <-- NEW
78
  messages=messages,
79
  ):
80
  # Extract the token text from the response chunk
81
  token_text = message_chunk.choices[0].delta.content
82
  print(f"Received token: {token_text}")
83
  response += token_text
84
+ # As streaming progresses, yield partial output
85
  yield response
86
 
87
  print("Completed response generation.")
88
 
89
+ # Create a Chatbot component with a specified height
90
  chatbot = gr.Chatbot(height=600)
91
  print("Chatbot interface created.")
92
 
93
+ MODELS_LIST = [
94
+ "meta-llama/Llama-3.1-8B-Instruct",
95
+ "microsoft/Phi-3.5-mini-instruct",
 
 
 
 
96
  ]
97
 
 
98
  def filter_models(search_term):
99
+ """
100
+ Simple function to filter the placeholder model list based on the user's input
101
+ """
102
+ filtered_models = [m for m in MODELS_LIST if search_term.lower() in m.lower()]
103
+ return gr.update(choices=filtered_models)
104
+
105
+ # --------------------------------------
106
+ # REBUILD THE INTERFACE USING BLOCKS
107
+ # --------------------------------------
108
+ print("Building Gradio interface with Blocks...")
109
 
 
110
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
111
+ # Title
112
+ gr.Markdown("# Serverless-TextGen-Hub")
113
+
114
+ # Accordion: Parameters (sliders, etc.)
115
+ with gr.Accordion("Parameters", open=True):
116
+ system_message = gr.Textbox(value="", label="System message")
117
+ max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
118
+ temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
119
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
120
+ frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
121
+ seed = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
122
+
123
+ # Accordion: Featured Models (Below the parameters)
124
+ with gr.Accordion("Featured Models", open=False):
125
+ model_search = gr.Textbox(
126
+ label="Filter Models",
127
+ placeholder="Search for a featured model...",
128
+ lines=1
129
+ )
130
+ model_radio = gr.Radio(
131
+ label="Select a model below",
132
+ value=MODELS_LIST[0], # default
133
+ choices=MODELS_LIST,
134
+ interactive=True
135
+ )
136
+ model_search.change(filter_models, inputs=model_search, outputs=model_radio)
137
+
138
+ # The main ChatInterface
139
+ chat_interface = gr.ChatInterface(
140
+ fn=respond,
141
+ additional_inputs=[
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  system_message,
143
  max_tokens,
144
  temperature,
145
  top_p,
146
  frequency_penalty,
147
+ seed
 
 
148
  ],
149
+ fill_height=True,
150
+ chatbot=chatbot,
151
+ theme="Nymbo/Nymbo_Theme",
152
+ title="Serverless-TextGen-Hub",
153
+ description="A comprehensive UI for text generation using the HF Inference API."
154
  )
155
 
156
+ print("Gradio interface initialized.")
157
+
158
+ if __name__ == "__main__":
159
+ print("Launching the demo application.")
160
+ demo.launch()