de-Rodrigo commited on
Commit
d163769
Β·
1 Parent(s): 472c6ff

Update Space Visualization

Browse files
app.py CHANGED
@@ -9,11 +9,17 @@ import json
9
  import re
10
  import logging
11
  from datasets import load_dataset
 
12
 
13
  # Logging configuration
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
 
 
 
 
 
17
  # Global variables for Donut model, processor, and dataset
18
  donut_model = None
19
  donut_processor = None
@@ -126,45 +132,68 @@ if __name__ == "__main__":
126
  models.append("de-Rodrigo/donut-merit")
127
 
128
  with gr.Blocks() as demo:
129
- gr.Markdown("# Document Understanding with Donut")
130
- gr.Markdown(
131
- "Select a model and an image from the dataset, or upload your own image."
132
- )
133
 
134
- with gr.Row():
135
- with gr.Column():
136
- model_dropdown = gr.Dropdown(choices=models, label="Select Model")
137
- dataset_slider = gr.Slider(
138
- minimum=0,
139
- maximum=len(dataset) - 1,
140
- step=1,
141
- label="Dataset Image Index",
142
- )
143
- upload_image = gr.Image(type="pil", label="Or Upload Your Own Image")
144
 
145
- preview_image = gr.Image(label="Selected/Uploaded Image")
 
 
 
146
 
147
- process_button = gr.Button("Process Image")
 
148
 
149
- with gr.Row():
150
- output_image = gr.Image(label="Processed Image")
151
- output_text = gr.Textbox(label="Result")
 
 
 
152
 
153
- # Update preview image when slider changes
154
- dataset_slider.change(
155
- fn=update_image, inputs=[dataset_slider], outputs=[preview_image]
156
- )
157
 
158
- # Update preview image when an image is uploaded
159
- upload_image.change(
160
- fn=lambda x: x, inputs=[upload_image], outputs=[preview_image]
161
- )
162
 
163
- # Process image when button is clicked
164
- process_button.click(
165
- fn=process_image,
166
- inputs=[model_dropdown, upload_image, dataset_slider],
167
- outputs=[output_image, output_text],
168
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  demo.launch()
 
9
  import re
10
  import logging
11
  from datasets import load_dataset
12
+ import os
13
 
14
  # Logging configuration
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
+
19
+ # Paths to the static image and GIF
20
+ README_IMAGE_PATH = os.path.join("figs", "saliencies-merit-dataset.png")
21
+ GIF_PATH = os.path.join("figs", "demo_samples.gif")
22
+
23
  # Global variables for Donut model, processor, and dataset
24
  donut_model = None
25
  donut_processor = None
 
132
  models.append("de-Rodrigo/donut-merit")
133
 
134
  with gr.Blocks() as demo:
135
+ gr.Markdown("# Saliency Maps with the MERIT Dataset πŸŽ’πŸ“ƒπŸ†")
 
 
 
136
 
137
+ gr.Image(value=README_IMAGE_PATH, label="Example Document")
 
 
 
 
 
 
 
 
 
138
 
139
+ with gr.Tab("Introduction"):
140
+ gr.Markdown(
141
+ """
142
+ ## Welcome to Saliency Maps with the [MERIT Dataset](https://huggingface.co/datasets/de-Rodrigo/merit)
143
 
144
+ This space demonstrates the capabilities of different Vision Language models
145
+ for document understanding tasks.
146
 
147
+ ### Key Features:
148
+ - Process images from the [MERIT Dataset](https://huggingface.co/datasets/de-Rodrigo/merit) or upload your own image.
149
+ - Use a fine-tuned version of the models availabe to extract grades from documents.
150
+ - Visualize saliency maps to understand where the model is looking (WIP πŸ› οΈ).
151
+ """
152
+ )
153
 
154
+ gr.Image(value=GIF_PATH, label="Document Understanding Process")
 
 
 
155
 
156
+ with gr.Tab("Try It Yourself"):
157
+ gr.Markdown(
158
+ "Select a model and an image from the dataset, or upload your own image."
159
+ )
160
 
161
+ with gr.Row():
162
+ with gr.Column():
163
+ model_dropdown = gr.Dropdown(choices=models, label="Select Model")
164
+ dataset_slider = gr.Slider(
165
+ minimum=0,
166
+ maximum=len(dataset) - 1,
167
+ step=1,
168
+ label="Dataset Image Index",
169
+ )
170
+ upload_image = gr.Image(
171
+ type="pil", label="Or Upload Your Own Image"
172
+ )
173
+
174
+ preview_image = gr.Image(label="Selected/Uploaded Image")
175
+
176
+ process_button = gr.Button("Process Image")
177
+
178
+ with gr.Row():
179
+ output_image = gr.Image(label="Processed Image")
180
+ output_text = gr.Textbox(label="Result")
181
+
182
+ # Update preview image when slider changes
183
+ dataset_slider.change(
184
+ fn=update_image, inputs=[dataset_slider], outputs=[preview_image]
185
+ )
186
+
187
+ # Update preview image when an image is uploaded
188
+ upload_image.change(
189
+ fn=lambda x: x, inputs=[upload_image], outputs=[preview_image]
190
+ )
191
+
192
+ # Process image when button is clicked
193
+ process_button.click(
194
+ fn=process_image,
195
+ inputs=[model_dropdown, upload_image, dataset_slider],
196
+ outputs=[output_image, output_text],
197
+ )
198
 
199
  demo.launch()
figs/demo-samples.gif ADDED

Git LFS Details

  • SHA256: 398431edff9a4c57af24050b53ff00cfb9eb9cd1c2a6ab5f53a77760fed80067
  • Pointer size: 134 Bytes
  • Size of remote file: 166 MB
figs/saliencies-merit-dataset.png ADDED

Git LFS Details

  • SHA256: 609136140ff255106f6bf25fe27f6f0e6407bb90b62aae7bd915e82fb5e9d745
  • Pointer size: 132 Bytes
  • Size of remote file: 1.47 MB