# chat.py import gradio as gr import json import pandas as pd import numpy as np from functools import lru_cache import promptquality as pq project_name = "agent-lb-v1" PROJECT_ID = pq.get_project_from_name(project_name).id @lru_cache(maxsize=1000) def get_model_score_for_dataset(model, dataset): print(f"Getting metrics for {model} {project_name} for dataset {dataset}") run_name = f"{model} {dataset}" run_id = pq.get_run_from_name(run_name, PROJECT_ID).id rows = pq.get_rows( project_id=PROJECT_ID, run_id=run_id, task_type=None, config=None, starting_token=0, limit=1000, ) rationales = [d.metrics.tool_selection_quality_rationale for d in rows] scores = [ round(d.metrics.tool_selection_quality, 2) for d, rationale in zip(rows, rationales) if rationale ] explanations = [ d.metrics.tool_selection_quality_explanation for d, rationale in zip(rows, rationales) if rationale ] rationales = [r for r in rationales if r] mean_score = round(np.mean(scores), 2) return { "mean_score": mean_score, "scores": scores, "rationales": rationales, "explanations": explanations, } def get_updated_df(df, data): df["rationale"] = data["rationales"] df["explanation"] = data["explanations"] df["score"] = data["scores"] return df def get_chat_and_score_df(model, dataset): data = get_model_score_for_dataset(model, dataset) df = pd.read_parquet(f"datasets/{dataset}.parquet") df = get_updated_df(df, data) return df def format_chat_message(role, content): """Format individual chat messages with proper styling.""" role_style = role.lower() return f"""
{role}
{content}
""" def format_tool_info(tools): """Format tool information with proper styling.""" if isinstance(tools, str): try: tools = json.loads(tools) except: return "
No tool information available
" if not tools: return "
No tool information available
" tool_html = "" for tool in tools: tool_html += f"""
{tool.get('name', 'Unnamed Tool')}
{tool.get('description', 'No description available')}
{format_parameters(tool.get('parameters', {}))}
""" return f'
{tool_html}
' def format_parameters(parameters): if not parameters: return "
No parameters
" params_html = "" for name, desc in parameters.items(): params_html += f"""
{name}: {desc}
""" return params_html def format_metrics(score, rationale, explanation): """Format metrics display with proper styling.""" return f"""

Score

{score:.2f}

Rationale

{rationale}

Explanation

{explanation}
""" def update_chat_display(df, index): """Update the chat visualization for a specific index.""" if df is None or df.empty or index >= len(df): return ( "
No data available
", "
No metrics available
", "
No tool information available
", ) row = df.iloc[index] # Format chat messages messages = json.loads(row["conversation"]) chat_html = f"""
{"".join([format_chat_message(msg["role"], msg["content"]) for msg in messages])}
""" # Format metrics metrics_html = format_metrics(row["score"], row["rationale"], row["explanation"]) # Format tool info tool_html = format_tool_info(row["tools_langchain"]) return chat_html, metrics_html, tool_html def filter_and_update_display(model, dataset, selected_scores, current_index): try: # Get data and filter by scores df_chat = get_chat_and_score_df(model, dataset) if selected_scores: df_chat = df_chat[df_chat["score"].isin(selected_scores)] if df_chat.empty: return ( "
No data available for selected filters
", "
No metrics available
", "
No tool information available
", gr.update(maximum=0, value=0), "0/0", ) # Update index bounds max_index = len(df_chat) - 1 current_index = min(current_index, max_index) # Get displays for current index chat_html, metrics_html, tool_html = update_chat_display(df_chat, current_index) return ( chat_html, metrics_html, tool_html, gr.update(maximum=max_index, value=current_index), f"{current_index + 1}/{len(df_chat)}", ) except Exception as e: print(f"Error in filter_and_update_display: {str(e)}") return ( f"
Error: {str(e)}
", "
No metrics available
", "
No tool information available
", gr.update(maximum=0, value=0), "0/0", )