David Pomerenke
Basic Gradio setup
63202a2
raw
history blame
2.25 kB
import gradio as gr
import json
import pandas as pd
import plotly.graph_objects as go
# Load and process results
with open("results.json") as f:
results = json.load(f)
def create_model_comparison_plot(results):
# Extract all unique models
models = set()
for lang in results:
for score in lang["scores"]:
models.add(score["model"])
models = list(models)
# Create traces for each model
traces = []
for model in models:
x_vals = [] # languages
y_vals = [] # BLEU scores
for lang in results:
model_score = next((s["bleu"] for s in lang["scores"] if s["model"] == model), None)
if model_score is not None:
x_vals.append(lang["language_name"])
y_vals.append(model_score)
traces.append(go.Bar(
name=model.split('/')[-1],
x=x_vals,
y=y_vals,
))
fig = go.Figure(data=traces)
fig.update_layout(
title="BLEU Scores by Model and Language",
xaxis_title="Language",
yaxis_title="BLEU Score",
barmode='group',
height=500
)
return fig
def create_results_df(results):
# Create a list to store flattened data
flat_data = []
for lang in results:
row = {
"Language": lang["language_name"],
"Speakers (M)": round(lang["speakers"] / 1_000_000, 1),
"Average BLEU": round(lang["bleu"], 3),
}
# Add individual model scores
for score in lang["scores"]:
model_name = score["model"].split('/')[-1]
row[f"{model_name} BLEU"] = round(score["bleu"], 3)
flat_data.append(row)
return pd.DataFrame(flat_data)
# Create the visualization components
with gr.Blocks(title="AI Language Translation Benchmark") as demo:
gr.Markdown("# AI Language Translation Benchmark")
gr.Markdown("Comparing translation performance across different AI models and languages")
df = create_results_df(results)
plot = create_model_comparison_plot(results)
gr.DataFrame(value=df, label="Translation Results")
gr.Plot(value=plot, label="Model Comparison")
demo.launch()