File size: 2,253 Bytes
63202a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
import json
import pandas as pd
import plotly.graph_objects as go

# Load and process results
with open("results.json") as f:
    results = json.load(f)

def create_model_comparison_plot(results):
    # Extract all unique models
    models = set()
    for lang in results:
        for score in lang["scores"]:
            models.add(score["model"])
    models = list(models)
    
    # Create traces for each model
    traces = []
    for model in models:
        x_vals = []  # languages
        y_vals = []  # BLEU scores
        
        for lang in results:
            model_score = next((s["bleu"] for s in lang["scores"] if s["model"] == model), None)
            if model_score is not None:
                x_vals.append(lang["language_name"])
                y_vals.append(model_score)
        
        traces.append(go.Bar(
            name=model.split('/')[-1],
            x=x_vals,
            y=y_vals,
        ))
    
    fig = go.Figure(data=traces)
    fig.update_layout(
        title="BLEU Scores by Model and Language",
        xaxis_title="Language",
        yaxis_title="BLEU Score",
        barmode='group',
        height=500
    )
    return fig

def create_results_df(results):
    # Create a list to store flattened data
    flat_data = []
    
    for lang in results:
        row = {
            "Language": lang["language_name"],
            "Speakers (M)": round(lang["speakers"] / 1_000_000, 1),
            "Average BLEU": round(lang["bleu"], 3),
        }
        # Add individual model scores
        for score in lang["scores"]:
            model_name = score["model"].split('/')[-1]
            row[f"{model_name} BLEU"] = round(score["bleu"], 3)
        
        flat_data.append(row)
    
    return pd.DataFrame(flat_data)

# Create the visualization components
with gr.Blocks(title="AI Language Translation Benchmark") as demo:
    gr.Markdown("# AI Language Translation Benchmark")
    gr.Markdown("Comparing translation performance across different AI models and languages")
    
    df = create_results_df(results)
    plot = create_model_comparison_plot(results)
    
    gr.DataFrame(value=df, label="Translation Results")
    gr.Plot(value=plot, label="Model Comparison")

demo.launch()