Spaces:
Running
Running
# chat.py | |
import gradio as gr | |
import json | |
import pandas as pd | |
import numpy as np | |
from functools import lru_cache | |
import promptquality as pq | |
project_name = "agent-lb-v1" | |
PROJECT_ID = pq.get_project_from_name(project_name).id | |
def get_model_score_for_dataset(model, dataset): | |
print(f"Getting metrics for {model} {project_name} for dataset {dataset}") | |
run_name = f"{model} {dataset}" | |
run_id = pq.get_run_from_name(run_name, PROJECT_ID).id | |
rows = pq.get_rows( | |
project_id=PROJECT_ID, | |
run_id=run_id, | |
task_type=None, | |
config=None, | |
starting_token=0, | |
limit=1000, | |
) | |
rationales = [d.metrics.tool_selection_quality_rationale for d in rows] | |
scores = [ | |
round(d.metrics.tool_selection_quality, 2) | |
for d, rationale in zip(rows, rationales) | |
if rationale | |
] | |
explanations = [ | |
d.metrics.tool_selection_quality_explanation | |
for d, rationale in zip(rows, rationales) | |
if rationale | |
] | |
rationales = [r for r in rationales if r] | |
mean_score = round(np.mean(scores), 2) | |
return { | |
"mean_score": mean_score, | |
"scores": scores, | |
"rationales": rationales, | |
"explanations": explanations, | |
} | |
def get_updated_df(df, data): | |
df["rationale"] = data["rationales"] | |
df["explanation"] = data["explanations"] | |
df["score"] = data["scores"] | |
return df | |
def get_chat_and_score_df(model, dataset): | |
data = get_model_score_for_dataset(model, dataset) | |
df = pd.read_parquet(f"datasets/{dataset}.parquet") | |
df = get_updated_df(df, data) | |
return df | |
def format_chat_message(role, content): | |
"""Format individual chat messages with proper styling.""" | |
role_style = role.lower() | |
return f""" | |
<div class="message {role_style}"> | |
<div class="role-badge {role_style}-role">{role}</div> | |
<div class="content">{content}</div> | |
</div> | |
""" | |
def format_tool_info(tools): | |
"""Format tool information with proper styling.""" | |
if isinstance(tools, str): | |
try: | |
tools = json.loads(tools) | |
except: | |
return "<div>No tool information available</div>" | |
if not tools: | |
return "<div>No tool information available</div>" | |
tool_html = "" | |
for tool in tools: | |
tool_html += f""" | |
<div class="tool-section"> | |
<div class="tool-name">{tool.get('name', 'Unnamed Tool')}</div> | |
<div class="tool-description">{tool.get('description', 'No description available')}</div> | |
<div class="tool-parameters"> | |
{format_parameters(tool.get('parameters', {}))} | |
</div> | |
</div> | |
""" | |
return f'<div class="tool-info-panel">{tool_html}</div>' | |
def format_parameters(parameters): | |
if not parameters: | |
return "<div>No parameters</div>" | |
params_html = "" | |
for name, desc in parameters.items(): | |
params_html += f""" | |
<div class="parameter"> | |
<span class="param-name">{name}:</span> {desc} | |
</div> | |
""" | |
return params_html | |
def format_metrics(score, rationale, explanation): | |
"""Format metrics display with proper styling.""" | |
return f""" | |
<div class="metrics-panel"> | |
<div class="metric-section"> | |
<h3>Score</h3> | |
<div class="score-display">{score:.2f}</div> | |
</div> | |
<div class="metric-section"> | |
<h3>Rationale</h3> | |
<div class="explanation-text">{rationale}</div> | |
</div> | |
<div class="metric-section"> | |
<h3>Explanation</h3> | |
<div class="explanation-text">{explanation}</div> | |
</div> | |
</div> | |
""" | |
def update_chat_display(df, index): | |
"""Update the chat visualization for a specific index.""" | |
if df is None or df.empty or index >= len(df): | |
return ( | |
"<div>No data available</div>", | |
"<div>No metrics available</div>", | |
"<div>No tool information available</div>", | |
) | |
row = df.iloc[index] | |
# Format chat messages | |
messages = json.loads(row["conversation"]) | |
chat_html = f""" | |
<div class="chat-panel"> | |
{"".join([format_chat_message(msg["role"], msg["content"]) | |
for msg in messages])} | |
</div> | |
""" | |
# Format metrics | |
metrics_html = format_metrics(row["score"], row["rationale"], row["explanation"]) | |
# Format tool info | |
tool_html = format_tool_info(row["tools_langchain"]) | |
return chat_html, metrics_html, tool_html | |
def filter_and_update_display(model, dataset, selected_scores, current_index): | |
try: | |
# Get data and filter by scores | |
df_chat = get_chat_and_score_df(model, dataset) | |
if selected_scores: | |
df_chat = df_chat[df_chat["score"].isin(selected_scores)] | |
if df_chat.empty: | |
return ( | |
"<div>No data available for selected filters</div>", | |
"<div>No metrics available</div>", | |
"<div>No tool information available</div>", | |
gr.update(maximum=0, value=0), | |
"0/0", | |
) | |
# Update index bounds | |
max_index = len(df_chat) - 1 | |
current_index = min(current_index, max_index) | |
# Get displays for current index | |
chat_html, metrics_html, tool_html = update_chat_display(df_chat, current_index) | |
return ( | |
chat_html, | |
metrics_html, | |
tool_html, | |
gr.update(maximum=max_index, value=current_index), | |
f"{current_index + 1}/{len(df_chat)}", | |
) | |
except Exception as e: | |
print(f"Error in filter_and_update_display: {str(e)}") | |
return ( | |
f"<div>Error: {str(e)}</div>", | |
"<div>No metrics available</div>", | |
"<div>No tool information available</div>", | |
gr.update(maximum=0, value=0), | |
"0/0", | |
) | |