leonard-dls
commited on
Commit
·
4eff8a8
1
Parent(s):
c0b1d6c
Add application file
Browse files- app.py +85 -0
- filtered_big_gsm8k_output.jsonl +0 -0
app.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import random
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
from difflib import SequenceMatcher
|
6 |
+
|
7 |
+
file_path = "big_gsm8k_output.jsonl"
|
8 |
+
similarity_threshold = 0.85
|
9 |
+
current_index = 0
|
10 |
+
|
11 |
+
|
12 |
+
def find_similar_chunks(original, output):
|
13 |
+
matcher = SequenceMatcher(None, original, output)
|
14 |
+
left = 0
|
15 |
+
highlighted_sequence = []
|
16 |
+
for _, j, n in matcher.get_matching_blocks():
|
17 |
+
if left < j:
|
18 |
+
highlighted_sequence.append((output[left:j], None))
|
19 |
+
highlighted_sequence.append((output[j:j+n], 1))
|
20 |
+
left = j + n
|
21 |
+
if j+n < len(output) - 1:
|
22 |
+
highlighted_sequence.append((output[j+n:], None))
|
23 |
+
|
24 |
+
return highlighted_sequence
|
25 |
+
|
26 |
+
with open(file_path, "r") as file:
|
27 |
+
examples = [json.loads(line) for line in file if json.loads(line)["similarity_ratio"] > similarity_threshold]
|
28 |
+
|
29 |
+
def next_example():
|
30 |
+
new_example = random.choice(examples)
|
31 |
+
|
32 |
+
highlighted_output = find_similar_chunks(new_example["original"], new_example["output"])
|
33 |
+
return(
|
34 |
+
[
|
35 |
+
new_example["prompt"],
|
36 |
+
new_example["original"],
|
37 |
+
highlighted_output,
|
38 |
+
new_example["similarity_ratio"],
|
39 |
+
new_example["seed"]
|
40 |
+
]
|
41 |
+
)
|
42 |
+
|
43 |
+
with gr.Blocks() as demo:
|
44 |
+
prompt = gr.Textbox(
|
45 |
+
label="Prompt",
|
46 |
+
interactive=False,
|
47 |
+
value=examples[current_index]["prompt"],
|
48 |
+
)
|
49 |
+
with gr.Row():
|
50 |
+
with gr.Column(scale=4):
|
51 |
+
original = gr.Textbox(
|
52 |
+
label="Original",
|
53 |
+
interactive=False,
|
54 |
+
value=examples[current_index]["original"],
|
55 |
+
)
|
56 |
+
with gr.Column(scale=4):
|
57 |
+
output = gr.HighlightedText(
|
58 |
+
label="Output",
|
59 |
+
color_map={"1": "yellow"},
|
60 |
+
value=find_similar_chunks(examples[current_index]["original"],
|
61 |
+
examples[current_index]["output"]),
|
62 |
+
)
|
63 |
+
|
64 |
+
with gr.Row():
|
65 |
+
with gr.Column(scale=1):
|
66 |
+
similarity = gr.Textbox(
|
67 |
+
label="Similarity ratio",
|
68 |
+
interactive=False,
|
69 |
+
value=examples[current_index]["similarity_ratio"],
|
70 |
+
)
|
71 |
+
with gr.Column(scale=1):
|
72 |
+
seed = gr.Textbox(
|
73 |
+
label="Seed",
|
74 |
+
interactive=False,
|
75 |
+
value=examples[current_index]["seed"],
|
76 |
+
)
|
77 |
+
|
78 |
+
next_btn = gr.Button("Anoter example")
|
79 |
+
|
80 |
+
next_btn.click(fn=next_example,
|
81 |
+
outputs=[prompt, original, output, similarity, seed])
|
82 |
+
|
83 |
+
|
84 |
+
|
85 |
+
demo.launch()
|
filtered_big_gsm8k_output.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|