Spaces:

galileo-ai
/

agent-leaderboard

Running

Pratik Bhavsar

refactoring and auto theme

4a46abc about 15 hours ago

5.93 kB

	# chat.py
	import gradio as gr
	import json
	import pandas as pd
	import numpy as np
	from functools import lru_cache
	import promptquality as pq

	project_name = "agent-lb-v1"
	PROJECT_ID = pq.get_project_from_name(project_name).id


	@lru_cache(maxsize=1000)
	def get_model_score_for_dataset(model, dataset):
	print(f"Getting metrics for {model} {project_name} for dataset {dataset}")
	run_name = f"{model} {dataset}"
	run_id = pq.get_run_from_name(run_name, PROJECT_ID).id
	rows = pq.get_rows(
	project_id=PROJECT_ID,
	run_id=run_id,
	task_type=None,
	config=None,
	starting_token=0,
	limit=1000,
	)

	rationales = [d.metrics.tool_selection_quality_rationale for d in rows]
	scores = [
	round(d.metrics.tool_selection_quality, 2)
	for d, rationale in zip(rows, rationales)
	if rationale
	]
	explanations = [
	d.metrics.tool_selection_quality_explanation
	for d, rationale in zip(rows, rationales)
	if rationale
	]
	rationales = [r for r in rationales if r]
	mean_score = round(np.mean(scores), 2)
	return {
	"mean_score": mean_score,
	"scores": scores,
	"rationales": rationales,
	"explanations": explanations,
	}


	def get_updated_df(df, data):
	df["rationale"] = data["rationales"]
	df["explanation"] = data["explanations"]
	df["score"] = data["scores"]
	return df


	def get_chat_and_score_df(model, dataset):
	data = get_model_score_for_dataset(model, dataset)
	df = pd.read_parquet(f"datasets/{dataset}.parquet")
	df = get_updated_df(df, data)
	return df


	def format_chat_message(role, content):
	"""Format individual chat messages with proper styling."""
	role_style = role.lower()
	return f"""
	<div class="message {role_style}">
	<div class="role-badge {role_style}-role">{role}</div>
	<div class="content">{content}</div>
	</div>
	"""


	def format_tool_info(tools):
	"""Format tool information with proper styling."""
	if isinstance(tools, str):
	try:
	tools = json.loads(tools)
	except:
	return "<div>No tool information available</div>"

	if not tools:
	return "<div>No tool information available</div>"

	tool_html = ""
	for tool in tools:
	tool_html += f"""
	<div class="tool-section">
	<div class="tool-name">{tool.get('name', 'Unnamed Tool')}</div>
	<div class="tool-description">{tool.get('description', 'No description available')}</div>
	<div class="tool-parameters">
	{format_parameters(tool.get('parameters', {}))}
	</div>
	</div>
	"""
	return f'<div class="tool-info-panel">{tool_html}</div>'


	def format_parameters(parameters):
	if not parameters:
	return "<div>No parameters</div>"

	params_html = ""
	for name, desc in parameters.items():
	params_html += f"""
	<div class="parameter">
	<span class="param-name">{name}:</span> {desc}
	</div>
	"""
	return params_html


	def format_metrics(score, rationale, explanation):
	"""Format metrics display with proper styling."""
	return f"""
	<div class="metrics-panel">
	<div class="metric-section">
	<h3>Score</h3>
	<div class="score-display">{score:.2f}</div>
	</div>
	<div class="metric-section">
	<h3>Rationale</h3>
	<div class="explanation-text">{rationale}</div>
	</div>
	<div class="metric-section">
	<h3>Explanation</h3>
	<div class="explanation-text">{explanation}</div>
	</div>
	</div>
	"""


	def update_chat_display(df, index):
	"""Update the chat visualization for a specific index."""
	if df is None or df.empty or index >= len(df):
	return (
	"<div>No data available</div>",
	"<div>No metrics available</div>",
	"<div>No tool information available</div>",
	)

	row = df.iloc[index]

	# Format chat messages
	messages = json.loads(row["conversation"])
	chat_html = f"""
	<div class="chat-panel">
	{"".join([format_chat_message(msg["role"], msg["content"])
	for msg in messages])}
	</div>
	"""

	# Format metrics
	metrics_html = format_metrics(row["score"], row["rationale"], row["explanation"])

	# Format tool info
	tool_html = format_tool_info(row["tools_langchain"])

	return chat_html, metrics_html, tool_html


	def filter_and_update_display(model, dataset, selected_scores, current_index):
	try:
	# Get data and filter by scores
	df_chat = get_chat_and_score_df(model, dataset)
	if selected_scores:
	df_chat = df_chat[df_chat["score"].isin(selected_scores)]

	if df_chat.empty:
	return (
	"<div>No data available for selected filters</div>",
	"<div>No metrics available</div>",
	"<div>No tool information available</div>",
	gr.update(maximum=0, value=0),
	"0/0",
	)

	# Update index bounds
	max_index = len(df_chat) - 1
	current_index = min(current_index, max_index)

	# Get displays for current index
	chat_html, metrics_html, tool_html = update_chat_display(df_chat, current_index)

	return (
	chat_html,
	metrics_html,
	tool_html,
	gr.update(maximum=max_index, value=current_index),
	f"{current_index + 1}/{len(df_chat)}",
	)
	except Exception as e:
	print(f"Error in filter_and_update_display: {str(e)}")
	return (
	f"<div>Error: {str(e)}</div>",
	"<div>No metrics available</div>",
	"<div>No tool information available</div>",
	gr.update(maximum=0, value=0),
	"0/0",
	)