import gradio as gr
import os
from huggingface_hub import HfApi, snapshot_download
from apscheduler.schedulers.background import BackgroundScheduler
from datasets import load_dataset
from src.utils import load_all_data, prep_df, sort_by_category
from src.md import ABOUT_TEXT, TOP_TEXT
from src.css import custom_css
import numpy as np

api = HfApi()

COLLAB_TOKEN = os.environ.get("COLLAB_TOKEN")
evals_repo = "allenai/href_results"

eval_set_repo = "allenai/href_validation"
local_result_dir = "./results/"

def restart_space():
    api.restart_space(repo_id="allenai/href", token=COLLAB_TOKEN)

print("Pulling evaluation results")
repo = snapshot_download(
    local_dir=local_result_dir,
    ignore_patterns=[],
    repo_id=evals_repo,
    use_auth_token=COLLAB_TOKEN,
    tqdm_class=None, 
    etag_timeout=30,
    repo_type="dataset",
)
    
href_data_greedy = prep_df(load_all_data(local_result_dir, subdir="temperature=0.0"))
href_data_nongreedy = prep_df(load_all_data(local_result_dir, subdir="temperature=1.0"))


col_types_href = ["number"] + ["markdown"] + ["number"] * int((len(href_data_greedy.columns) - 1) / 2)
col_types_href_hidden = ["number"] + ["markdown"] + ["number"] * (len(href_data_greedy.columns) - 1)
categories = ['Average', 'Brainstorm', 'Open QA', 'Closed QA', 'Extract', 'Generation', 'Rewrite', 'Summarize', 'Classify', "Reasoning Over Numerical Data", "Multi-Document Synthesis", "Fact Checking or Attributed QA"]
# categories = ['Average', 'Brainstorm', 'Open QA', 'Closed QA', 'Extract', 'Generation', 'Rewrite', 'Summarize', 'Classify']

# for showing random samples
eval_set = load_dataset(eval_set_repo, use_auth_token=COLLAB_TOKEN, split="dev")
def random_sample(r: gr.Request, category):
    if category is None or category == []:
        sample_index = np.random.randint(0, len(eval_set) - 1)
        sample = eval_set[sample_index]
    else: # filter by category (can be list)
        if isinstance(category, str):
            category = [category]
        # filter down dataset to only include the category(s)
        eval_set_filtered = eval_set.filter(lambda x: x["category"] in category)
        sample_index = np.random.randint(0, len(eval_set_filtered) - 1)
        sample = eval_set_filtered[sample_index]

    markdown_text = '\n\n'.join([f"**{key}**:\n\n{value}" for key, value in sample.items()])
    return markdown_text

subsets = eval_set.unique("category")


def regex_table(dataframe, regex, selected_category, style=True):
    """
    Takes a model name as a regex, then returns only the rows that has that in it.
    """
    dataframe = sort_by_category(dataframe, selected_category)

    # Split regex statement by comma and trim whitespace around regexes
    regex_list = [x.strip() for x in regex.split(",")]
    # Join the list into a single regex pattern with '|' acting as OR
    combined_regex = '|'.join(regex_list)

    # Filter the dataframe such that 'model' contains any of the regex patterns
    data = dataframe[dataframe["Model"].str.contains(combined_regex, case=False, na=False)]

    data.reset_index(drop=True, inplace=True)

    if style:
         # Format for different columns
        format_dict = {col: "{:.1f}" for col in data.columns if col not in ['Average', 'Model', 'Rank', '95% CI']}
        format_dict['Average'] = "{:.2f}"
        data = data.style.format(format_dict, na_rep='').set_properties(**{'text-align': 'right'})
    return data


total_models = len(regex_table(href_data_greedy.copy(), "", "Average", style=False).values)

with gr.Blocks(css=custom_css) as app:
    # create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
    with gr.Row():
        with gr.Column(scale=8):
            gr.Markdown(TOP_TEXT.format(str(total_models)))
        with gr.Column(scale=2):
            # search = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
            # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
            # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
            gr.Markdown("""
                        <img src="file/src/logo.png" height="130">
                        """)
    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("🏆 HREF Leaderboard"):
            with gr.Row():
                search_1 = gr.Textbox(label="Model Search (delimit with , )", 
                                    #   placeholder="Model Search (delimit with , )",
                                      show_label=True)
                category_selector_1 = gr.Dropdown(categories, label="Sorted By", value="Average", multiselect=False, show_label=True, elem_id="category_selector", elem_classes="category_selector_class")
            with gr.Row():
                # reference data
                rewardbench_table_hidden = gr.Dataframe(
                    href_data_greedy.values,
                    datatype=col_types_href_hidden,
                    headers=href_data_greedy.columns.tolist(),
                    visible=False,
                )
                rewardbench_table = gr.Dataframe(
                    regex_table(href_data_greedy.copy(), "", "Average"),
                    datatype=col_types_href,
                    headers=href_data_greedy.columns.tolist(),
                    elem_id="href_data_greedy",
                    interactive=False,
                    height=1000,
                )
        # with gr.TabItem("Non-Greedy"):
        #     with gr.Row():
        #         search_2 = gr.Textbox(label="Model Search (delimit with , )", 
        #                             #   placeholder="Model Search (delimit with , )",
        #                               show_label=True)
        #         category_selector_2 = gr.Dropdown(categories, label="Sorted By", value="Average",
        #                                             multiselect=False, show_label=True, elem_id="category_selector")
        #     with gr.Row():
        #         # reference data
        #         rewardbench_table_hidden_nongreedy = gr.Dataframe(
        #             href_data_nongreedy.values,
        #             datatype=col_types_href_hidden,
        #             headers=href_data_nongreedy.columns.tolist(),
        #             visible=False,
        #         )
        #         rewardbench_table_nongreedy = gr.Dataframe(
        #             regex_table(href_data_nongreedy.copy(), "", "Average"),
        #             datatype=col_types_href,
        #             headers=href_data_nongreedy.columns.tolist(),
        #             elem_id="href_data_nongreedy",
        #             interactive=False,
        #             height=1000,
        #         )
        with gr.TabItem("About"):
            with gr.Row():
                gr.Markdown(ABOUT_TEXT)

        with gr.TabItem("Dataset Viewer"):
            with gr.Row():
                # loads one sample
                gr.Markdown("""## Random Dataset Sample Viewer""")
                subset_selector = gr.Dropdown(subsets, label="Category", value=None, multiselect=True)
                button = gr.Button("Show Random Sample")

            with gr.Row():
                sample_display = gr.Markdown("{sampled data loads here}")

            button.click(fn=random_sample, inputs=[subset_selector], outputs=[sample_display])

    search_1.change(regex_table, inputs=[rewardbench_table_hidden, search_1, category_selector_1], outputs=rewardbench_table)  
    category_selector_1.change(regex_table, inputs=[rewardbench_table_hidden, search_1, category_selector_1], outputs=rewardbench_table)
    # search_2.change(regex_table, inputs=[rewardbench_table_hidden_nongreedy, search_2, category_selector_2], outputs=rewardbench_table_nongreedy)  
    # category_selector_2.change(regex_table, inputs=[rewardbench_table_hidden_nongreedy, search_2, category_selector_2], outputs=rewardbench_table_nongreedy)

    with gr.Row():
        with gr.Accordion("📚 Citation", open=False):
            citation_button = gr.Textbox(
                value=r"""@article{lyu2024href,
      title={HREF: Human Response-Guided Evaluation of Instruction Following in Language Models}, 
      author={Xinxi Lyu and Yizhong Wang and Hannaneh Hajishirzi and Pradeep Dasigi},
      journal={arXiv preprint arXiv:2412.15524},
      year={2024} 
}""",
                lines=7,
                label="Copy the following to cite these results.",
                elem_id="citation-button",
                show_copy_button=True,
            )


scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=10800) # restarted every 3h
scheduler.start()
app.launch(allowed_paths=['src/']) # had .queue() before launch before... not sure if that's necessary