# chat.py
import gradio as gr
import json
import pandas as pd
import numpy as np
from functools import lru_cache
import promptquality as pq
project_name = "agent-lb-v1"
PROJECT_ID = pq.get_project_from_name(project_name).id
@lru_cache(maxsize=1000)
def get_model_score_for_dataset(model, dataset):
print(f"Getting metrics for {model} {project_name} for dataset {dataset}")
run_name = f"{model} {dataset}"
run_id = pq.get_run_from_name(run_name, PROJECT_ID).id
rows = pq.get_rows(
project_id=PROJECT_ID,
run_id=run_id,
task_type=None,
config=None,
starting_token=0,
limit=1000,
)
rationales = [d.metrics.tool_selection_quality_rationale for d in rows]
scores = [
round(d.metrics.tool_selection_quality, 2)
for d, rationale in zip(rows, rationales)
if rationale
]
explanations = [
d.metrics.tool_selection_quality_explanation
for d, rationale in zip(rows, rationales)
if rationale
]
rationales = [r for r in rationales if r]
mean_score = round(np.mean(scores), 2)
return {
"mean_score": mean_score,
"scores": scores,
"rationales": rationales,
"explanations": explanations,
}
def get_updated_df(df, data):
df["rationale"] = data["rationales"]
df["explanation"] = data["explanations"]
df["score"] = data["scores"]
return df
def get_chat_and_score_df(model, dataset):
data = get_model_score_for_dataset(model, dataset)
df = pd.read_parquet(f"datasets/{dataset}.parquet")
df = get_updated_df(df, data)
return df
def format_chat_message(role, content):
"""Format individual chat messages with proper styling."""
role_style = role.lower()
return f"""
"""
def format_tool_info(tools):
"""Format tool information with proper styling."""
if isinstance(tools, str):
try:
tools = json.loads(tools)
except:
return "No tool information available
"
if not tools:
return "No tool information available
"
tool_html = ""
for tool in tools:
tool_html += f"""
"""
return f'{tool_html}
'
def format_parameters(parameters):
if not parameters:
return "No parameters
"
params_html = ""
for name, desc in parameters.items():
params_html += f"""
{name}: {desc}
"""
return params_html
def format_metrics(score, rationale, explanation):
"""Format metrics display with proper styling."""
return f"""
Explanation
{explanation}
"""
def update_chat_display(df, index):
"""Update the chat visualization for a specific index."""
if df is None or df.empty or index >= len(df):
return (
"No data available
",
"No metrics available
",
"No tool information available
",
)
row = df.iloc[index]
# Format chat messages
messages = json.loads(row["conversation"])
chat_html = f"""
{"".join([format_chat_message(msg["role"], msg["content"])
for msg in messages])}
"""
# Format metrics
metrics_html = format_metrics(row["score"], row["rationale"], row["explanation"])
# Format tool info
tool_html = format_tool_info(row["tools_langchain"])
return chat_html, metrics_html, tool_html
def filter_and_update_display(model, dataset, selected_scores, current_index):
try:
# Get data and filter by scores
df_chat = get_chat_and_score_df(model, dataset)
if selected_scores:
df_chat = df_chat[df_chat["score"].isin(selected_scores)]
if df_chat.empty:
return (
"No data available for selected filters
",
"No metrics available
",
"No tool information available
",
gr.update(maximum=0, value=0),
"0/0",
)
# Update index bounds
max_index = len(df_chat) - 1
current_index = min(current_index, max_index)
# Get displays for current index
chat_html, metrics_html, tool_html = update_chat_display(df_chat, current_index)
return (
chat_html,
metrics_html,
tool_html,
gr.update(maximum=max_index, value=current_index),
f"{current_index + 1}/{len(df_chat)}",
)
except Exception as e:
print(f"Error in filter_and_update_display: {str(e)}")
return (
f"Error: {str(e)}
",
"No metrics available
",
"No tool information available
",
gr.update(maximum=0, value=0),
"0/0",
)