update generation type names
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ import pandas as pd
|
|
11 |
|
12 |
# benchmark order: pytorch, tf eager, tf xla; units = ms
|
13 |
BENCHMARK_DATA = {
|
14 |
-
"Greedy
|
15 |
"DistilGPT2": {
|
16 |
"T4": [336.22, 3976.23, 115.84],
|
17 |
"3090": [158.38, 1835.82, 46.56],
|
@@ -53,7 +53,7 @@ BENCHMARK_DATA = {
|
|
53 |
"A100": [1801.68, 16707.71, 411.93],
|
54 |
},
|
55 |
},
|
56 |
-
"
|
57 |
"DistilGPT2": {
|
58 |
"T4": [617.40, 6078.81, 221.65],
|
59 |
"3090": [310.37, 2843.73, 85.44],
|
@@ -184,8 +184,8 @@ with demo:
|
|
184 |
"""
|
185 |
)
|
186 |
with gr.Tabs():
|
187 |
-
with gr.TabItem("Greedy
|
188 |
-
plot_fn = functools.partial(get_plot, generate_type="Greedy
|
189 |
with gr.Row():
|
190 |
with gr.Column():
|
191 |
model_selector = gr.Dropdown(
|
@@ -202,7 +202,7 @@ with demo:
|
|
202 |
)
|
203 |
gr.Markdown(
|
204 |
"""
|
205 |
-
### Greedy
|
206 |
- `max_new_tokens = 64`;
|
207 |
- `pad_to_multiple_of = 64` for Tensorflow XLA models. Others do not pad (input prompts between 2 and 33 tokens).
|
208 |
"""
|
@@ -210,8 +210,8 @@ with demo:
|
|
210 |
plot = gr.Image(value=plot_fn("T5 Small", "Yes")) # Show plot when the gradio app is initialized
|
211 |
model_selector.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
|
212 |
eager_enabler.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
|
213 |
-
with gr.TabItem("
|
214 |
-
plot_fn = functools.partial(get_plot, generate_type="
|
215 |
with gr.Row():
|
216 |
with gr.Column():
|
217 |
model_selector = gr.Dropdown(
|
@@ -228,7 +228,7 @@ with demo:
|
|
228 |
)
|
229 |
gr.Markdown(
|
230 |
"""
|
231 |
-
###
|
232 |
- `max_new_tokens = 128`;
|
233 |
- `temperature = 2.0`;
|
234 |
- `top_k = 50`;
|
|
|
11 |
|
12 |
# benchmark order: pytorch, tf eager, tf xla; units = ms
|
13 |
BENCHMARK_DATA = {
|
14 |
+
"Greedy Decoding": {
|
15 |
"DistilGPT2": {
|
16 |
"T4": [336.22, 3976.23, 115.84],
|
17 |
"3090": [158.38, 1835.82, 46.56],
|
|
|
53 |
"A100": [1801.68, 16707.71, 411.93],
|
54 |
},
|
55 |
},
|
56 |
+
"Sampling": {
|
57 |
"DistilGPT2": {
|
58 |
"T4": [617.40, 6078.81, 221.65],
|
59 |
"3090": [310.37, 2843.73, 85.44],
|
|
|
184 |
"""
|
185 |
)
|
186 |
with gr.Tabs():
|
187 |
+
with gr.TabItem("Greedy Decoding"):
|
188 |
+
plot_fn = functools.partial(get_plot, generate_type="Greedy Decoding")
|
189 |
with gr.Row():
|
190 |
with gr.Column():
|
191 |
model_selector = gr.Dropdown(
|
|
|
202 |
)
|
203 |
gr.Markdown(
|
204 |
"""
|
205 |
+
### Greedy Decoding benchmark parameters
|
206 |
- `max_new_tokens = 64`;
|
207 |
- `pad_to_multiple_of = 64` for Tensorflow XLA models. Others do not pad (input prompts between 2 and 33 tokens).
|
208 |
"""
|
|
|
210 |
plot = gr.Image(value=plot_fn("T5 Small", "Yes")) # Show plot when the gradio app is initialized
|
211 |
model_selector.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
|
212 |
eager_enabler.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
|
213 |
+
with gr.TabItem("Sampling"):
|
214 |
+
plot_fn = functools.partial(get_plot, generate_type="Sampling")
|
215 |
with gr.Row():
|
216 |
with gr.Column():
|
217 |
model_selector = gr.Dropdown(
|
|
|
228 |
)
|
229 |
gr.Markdown(
|
230 |
"""
|
231 |
+
### Sampling benchmark parameters
|
232 |
- `max_new_tokens = 128`;
|
233 |
- `temperature = 2.0`;
|
234 |
- `top_k = 50`;
|