leonard-dls commited on
Commit
4eff8a8
·
1 Parent(s): c0b1d6c

Add application file

Browse files
Files changed (2) hide show
  1. app.py +85 -0
  2. filtered_big_gsm8k_output.jsonl +0 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import random
3
+
4
+ import gradio as gr
5
+ from difflib import SequenceMatcher
6
+
7
+ file_path = "big_gsm8k_output.jsonl"
8
+ similarity_threshold = 0.85
9
+ current_index = 0
10
+
11
+
12
+ def find_similar_chunks(original, output):
13
+ matcher = SequenceMatcher(None, original, output)
14
+ left = 0
15
+ highlighted_sequence = []
16
+ for _, j, n in matcher.get_matching_blocks():
17
+ if left < j:
18
+ highlighted_sequence.append((output[left:j], None))
19
+ highlighted_sequence.append((output[j:j+n], 1))
20
+ left = j + n
21
+ if j+n < len(output) - 1:
22
+ highlighted_sequence.append((output[j+n:], None))
23
+
24
+ return highlighted_sequence
25
+
26
+ with open(file_path, "r") as file:
27
+ examples = [json.loads(line) for line in file if json.loads(line)["similarity_ratio"] > similarity_threshold]
28
+
29
+ def next_example():
30
+ new_example = random.choice(examples)
31
+
32
+ highlighted_output = find_similar_chunks(new_example["original"], new_example["output"])
33
+ return(
34
+ [
35
+ new_example["prompt"],
36
+ new_example["original"],
37
+ highlighted_output,
38
+ new_example["similarity_ratio"],
39
+ new_example["seed"]
40
+ ]
41
+ )
42
+
43
+ with gr.Blocks() as demo:
44
+ prompt = gr.Textbox(
45
+ label="Prompt",
46
+ interactive=False,
47
+ value=examples[current_index]["prompt"],
48
+ )
49
+ with gr.Row():
50
+ with gr.Column(scale=4):
51
+ original = gr.Textbox(
52
+ label="Original",
53
+ interactive=False,
54
+ value=examples[current_index]["original"],
55
+ )
56
+ with gr.Column(scale=4):
57
+ output = gr.HighlightedText(
58
+ label="Output",
59
+ color_map={"1": "yellow"},
60
+ value=find_similar_chunks(examples[current_index]["original"],
61
+ examples[current_index]["output"]),
62
+ )
63
+
64
+ with gr.Row():
65
+ with gr.Column(scale=1):
66
+ similarity = gr.Textbox(
67
+ label="Similarity ratio",
68
+ interactive=False,
69
+ value=examples[current_index]["similarity_ratio"],
70
+ )
71
+ with gr.Column(scale=1):
72
+ seed = gr.Textbox(
73
+ label="Seed",
74
+ interactive=False,
75
+ value=examples[current_index]["seed"],
76
+ )
77
+
78
+ next_btn = gr.Button("Anoter example")
79
+
80
+ next_btn.click(fn=next_example,
81
+ outputs=[prompt, original, output, similarity, seed])
82
+
83
+
84
+
85
+ demo.launch()
filtered_big_gsm8k_output.jsonl ADDED
The diff for this file is too large to render. See raw diff