import json import random import gradio as gr from difflib import SequenceMatcher file_path = "filtered_big_gsm8k_output.jsonl" similarity_threshold = 0.85 current_index = 0 def find_similar_chunks(original, output): matcher = SequenceMatcher(None, original, output) left = 0 highlighted_sequence = [] for _, j, n in matcher.get_matching_blocks(): if left < j: highlighted_sequence.append((output[left:j], None)) highlighted_sequence.append((output[j:j+n], 1)) left = j + n if j+n < len(output) - 1: highlighted_sequence.append((output[j+n:], None)) return highlighted_sequence with open(file_path, "r") as file: examples = [json.loads(line) for line in file if json.loads(line)["similarity_ratio"] > similarity_threshold] def next_example(): new_example = random.choice(examples) highlighted_output = find_similar_chunks(new_example["original"], new_example["output"]) return( [ new_example["prompt"], new_example["original"], highlighted_output, new_example["similarity_ratio"], new_example["seed"] ] ) with gr.Blocks() as demo: prompt = gr.Textbox( label="Prompt", interactive=False, value=examples[current_index]["prompt"], ) with gr.Row(): with gr.Column(scale=4): original = gr.Textbox( label="Original", interactive=False, value=examples[current_index]["original"], ) with gr.Column(scale=4): output = gr.HighlightedText( label="Output", color_map={"1": "yellow"}, value=find_similar_chunks(examples[current_index]["original"], examples[current_index]["output"]), ) with gr.Row(): with gr.Column(scale=1): similarity = gr.Textbox( label="Similarity ratio", interactive=False, value=examples[current_index]["similarity_ratio"], ) with gr.Column(scale=1): seed = gr.Textbox( label="Seed", interactive=False, value=examples[current_index]["seed"], ) next_btn = gr.Button("Anoter example") next_btn.click(fn=next_example, outputs=[prompt, original, output, similarity, seed]) demo.launch()