Spaces:

galileo-ai
/

agent-leaderboard

Running

File size: 6,506 Bytes

10ad72f

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go


def setup_matplotlib():
    """Set up matplotlib configuration."""
    matplotlib.use("Agg")
    plt.close("all")


def get_performance_chart(df, category_name="Overall"):
    plt.close("all")
    score_column = "Category Score"
    df_sorted = df.sort_values(score_column, ascending=True)
    colors = {"Private": "#4F46E5", "Open source": "#16A34A"}

    height = max(8, len(df_sorted) * 0.8)
    fig, ax = plt.subplots(figsize=(16, height))
    plt.rcParams.update({"font.size": 12})

    try:
        bars = ax.barh(
            np.arange(len(df_sorted)),
            df_sorted[score_column],
            height=0.6,
            color=[colors[t] for t in df_sorted["Model Type"]],
        )

        ax.set_title(
            f"Model Performance Comparison - {category_name}",
            pad=20,
            fontsize=20,
            fontweight="bold",
        )
        ax.set_xlabel("Average Score", fontsize=14, labelpad=10)
        ax.set_xlim(0.0, 1.0)

        ax.set_yticks(np.arange(len(df_sorted)))
        ax.set_yticklabels(df_sorted["Model"], fontsize=12)

        plt.subplots_adjust(left=0.35)

        for i, v in enumerate(df_sorted[score_column]):
            ax.text(
                v + 0.01, i, f"{v:.3f}", va="center", fontsize=12, fontweight="bold"
            )

        ax.grid(True, axis="x", linestyle="--", alpha=0.2)
        ax.spines[["top", "right"]].set_visible(False)

        legend_elements = [
            plt.Rectangle((0, 0), 1, 1, facecolor=color, label=label)
            for label, color in colors.items()
        ]
        ax.legend(
            handles=legend_elements,
            title="Model Type",
            loc="lower right",
            fontsize=12,
            title_fontsize=14,
        )

        plt.tight_layout()
        return fig
    finally:
        plt.close(fig)


def create_radar_plot(df, model_names):
    datasets = [col for col in df.columns[7:] if col != "IO Cost"]
    fig = go.Figure()

    colors = ["rgba(99, 102, 241, 0.3)", "rgba(34, 197, 94, 0.3)"]
    line_colors = ["#4F46E5", "#16A34A"]

    for idx, model_name in enumerate(model_names):
        model_data = df[df["Model"] == model_name].iloc[0]
        values = [model_data[m] for m in datasets]
        values.append(values[0])
        datasets_plot = datasets + [datasets[0]]

        fig.add_trace(
            go.Scatterpolar(
                r=values,
                theta=datasets_plot,
                fill="toself",
                fillcolor=colors[idx % len(colors)],
                line=dict(color=line_colors[idx % len(line_colors)], width=2),
                name=model_name,
                text=[f"{val:.3f}" for val in values],
                textposition="middle right",
                mode="lines+markers+text",
            )
        )

    fig.update_layout(
        polar=dict(
            radialaxis=dict(
                visible=True, range=[0, 1], showline=False, tickfont=dict(size=12)
            ),
            angularaxis=dict(
                tickfont=dict(size=13, family="Arial"),
                rotation=90,
                direction="clockwise",
            ),
        ),
        showlegend=True,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=-0.2,
            xanchor="center",
            x=0.5,
            font=dict(size=14),
        ),
        title=dict(
            text="Model Comparison",
            x=0.5,
            y=0.95,
            font=dict(size=24, family="Arial", color="#1F2937"),
        ),
        paper_bgcolor="white",
        plot_bgcolor="white",
        height=700,
        width=900,
        margin=dict(t=100, b=100, l=80, r=80),
    )

    return fig


def get_performance_cost_chart(df, category_name="Overall"):
    # Create figure and axis with specified style
    fig, ax = plt.subplots(figsize=(12, 8), dpi=300)

    # Configure plot style
    ax.grid(True, linestyle="--", alpha=0.15, which="both")
    ax.set_facecolor("white")
    fig.patch.set_facecolor("white")

    colors = {"Private": "#4F46E5", "Open source": "#16A34A"}
    performance_colors = ["#DCFCE7", "#FEF9C3", "#FEE2E2"]

    score_column = "Category Score"

    # Plot data points
    for _, row in df.iterrows():
        color = colors[row["Model Type"]]
        size = 100 if row[score_column] > 0.85 else 80
        edge_color = "#3730A3" if row["Model Type"] == "Private" else "#166534"

        # Plot scatter points
        ax.scatter(
            row["IO Cost"],
            row[score_column] * 100,
            c=color,
            s=size,
            alpha=0.9,
            edgecolor=edge_color,
            linewidth=1,
            zorder=5,  # Ensure points are above grid
        )

        # Add annotations with model names
        bbox_props = dict(boxstyle="round,pad=0.3", fc="white", ec="none", alpha=0.8)

        ax.annotate(
            f"{row['Model']}\n(${row['IO Cost']:.2f})",
            (row["IO Cost"], row[score_column] * 100),
            xytext=(5, 5),
            textcoords="offset points",
            fontsize=8,
            bbox=bbox_props,
            zorder=6,
        )

    # Configure axes
    ax.set_xscale("log")
    ax.set_xlim(0.08, 40)  # Adjust based on your data range
    ax.set_ylim(60, 95)

    # Customize axis labels
    ax.set_xlabel("I/O Cost per Million Tokens ($)", fontsize=10, labelpad=10)
    ax.set_ylabel("Model Performance Score", fontsize=10, labelpad=10)

    # Add legend
    legend_elements = [
        plt.scatter([], [], c=color, label=label, s=80)
        for label, color in colors.items()
    ]
    ax.legend(
        handles=legend_elements,
        loc="upper right",
        frameon=True,
        facecolor="white",
        edgecolor="none",
        fontsize=9,
    )

    # Set title
    ax.set_title(
        f"AI Language Model Performance vs. Cost - {category_name}", fontsize=12, pad=15
    )

    # Add performance bands
    for y1, y2, color in zip([85, 75, 60], [95, 85, 75], performance_colors):
        ax.axhspan(y1, y2, alpha=0.2, color=color, zorder=1)

    # Customize tick parameters
    ax.tick_params(axis="both", which="major", labelsize=9)
    ax.tick_params(axis="both", which="minor", labelsize=8)

    # Add minor ticks for log scale
    ax.xaxis.set_minor_locator(plt.LogLocator(base=10.0, subs=np.arange(2, 10) * 0.1))

    # Adjust layout
    plt.tight_layout()

    return fig