David Pomerenke
commited on
Commit
·
d178010
1
Parent(s):
160ce91
Improve language chart
Browse files- index.html +27 -5
- languagebench.py +1 -1
- results.json +40 -0
- results_summary.json +5 -0
index.html
CHANGED
@@ -22,6 +22,8 @@
|
|
22 |
margin: 0;
|
23 |
}
|
24 |
</style>
|
|
|
|
|
25 |
</head>
|
26 |
|
27 |
<body>
|
@@ -37,24 +39,44 @@
|
|
37 |
|
38 |
const summary = await fetch('results_summary.json');
|
39 |
const summaryData = await summary.json();
|
|
|
40 |
|
41 |
// Create summary plot
|
42 |
const summaryPlot = Plot.plot({
|
43 |
width: 800,
|
44 |
height: 400,
|
45 |
-
|
|
|
46 |
y: { label: "BLEU Score (average across models)" },
|
47 |
marks: [
|
|
|
48 |
Plot.rectY(summaryData, Plot.stackX({
|
49 |
x: "speakers",
|
50 |
order: "bleu",
|
51 |
reverse: true,
|
52 |
y2: "bleu", // y2 to avoid stacking by y
|
53 |
-
title:
|
54 |
-
|
55 |
-
insetRight: 0.2
|
56 |
})),
|
57 |
-
Plot.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
]
|
59 |
});
|
60 |
|
|
|
22 |
margin: 0;
|
23 |
}
|
24 |
</style>
|
25 |
+
<link rel="icon"
|
26 |
+
href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22 fill=%22black%22>🌍</text></svg>">
|
27 |
</head>
|
28 |
|
29 |
<body>
|
|
|
39 |
|
40 |
const summary = await fetch('results_summary.json');
|
41 |
const summaryData = await summary.json();
|
42 |
+
const formatTitle = d => d.target_language_name + "\n" + parseInt(d.speakers / 1_000_00) / 10 + "M speakers\nBLEU score: " + d.bleu.toFixed(1)
|
43 |
|
44 |
// Create summary plot
|
45 |
const summaryPlot = Plot.plot({
|
46 |
width: 800,
|
47 |
height: 400,
|
48 |
+
marginBottom: 100,
|
49 |
+
x: { label: "Number of speakers", axis: null },
|
50 |
y: { label: "BLEU Score (average across models)" },
|
51 |
marks: [
|
52 |
+
|
53 |
Plot.rectY(summaryData, Plot.stackX({
|
54 |
x: "speakers",
|
55 |
order: "bleu",
|
56 |
reverse: true,
|
57 |
y2: "bleu", // y2 to avoid stacking by y
|
58 |
+
title: formatTitle,
|
59 |
+
tip: true,
|
|
|
60 |
})),
|
61 |
+
Plot.rectY(summaryData, Plot.pointerX(Plot.stackX({
|
62 |
+
x: "speakers",
|
63 |
+
order: "bleu",
|
64 |
+
reverse: true,
|
65 |
+
y2: "bleu", // y2 to avoid stacking by y
|
66 |
+
fill: "grey",
|
67 |
+
}))),
|
68 |
+
Plot.text(summaryData, Plot.stackX({
|
69 |
+
filter: (d) => d.speakers > 1_000_000,
|
70 |
+
x: "speakers",
|
71 |
+
y2: "bleu",
|
72 |
+
order: "bleu",
|
73 |
+
reverse: true,
|
74 |
+
text: "target_language_name",
|
75 |
+
frameAnchor: "bottom",
|
76 |
+
textAnchor: "end",
|
77 |
+
dy: 10,
|
78 |
+
rotate: 270
|
79 |
+
}))
|
80 |
]
|
81 |
});
|
82 |
|
languagebench.py
CHANGED
@@ -25,7 +25,7 @@ original_language = "eng_Latn"
|
|
25 |
dataset = "floresp-v2.0-rc.3/dev"
|
26 |
random.seed(42)
|
27 |
target_languages = [f.split(".")[1] for f in os.listdir(dataset)]
|
28 |
-
target_languages = random.choices(target_languages, k=
|
29 |
# target_languages = [
|
30 |
# "eng_Latn",
|
31 |
# "deu_Latn",
|
|
|
25 |
dataset = "floresp-v2.0-rc.3/dev"
|
26 |
random.seed(42)
|
27 |
target_languages = [f.split(".")[1] for f in os.listdir(dataset)]
|
28 |
+
target_languages = random.choices(target_languages, k=9)
|
29 |
# target_languages = [
|
30 |
# "eng_Latn",
|
31 |
# "deu_Latn",
|
results.json
CHANGED
@@ -318,5 +318,45 @@
|
|
318 |
"target_language_name": "Czech",
|
319 |
"speakers": 10700000,
|
320 |
"bleu": 60.25088578142904
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
}
|
322 |
]
|
|
|
318 |
"target_language_name": "Czech",
|
319 |
"speakers": 10700000,
|
320 |
"bleu": 60.25088578142904
|
321 |
+
},
|
322 |
+
{
|
323 |
+
"model": "openai/gpt-4o-mini",
|
324 |
+
"original_language": "eng_Latn",
|
325 |
+
"target_language": "sag_Latn",
|
326 |
+
"target_language_name": "Sango",
|
327 |
+
"speakers": 4600000,
|
328 |
+
"bleu": 2.2745290486034833
|
329 |
+
},
|
330 |
+
{
|
331 |
+
"model": "google/gemini-flash-1.5",
|
332 |
+
"original_language": "eng_Latn",
|
333 |
+
"target_language": "sag_Latn",
|
334 |
+
"target_language_name": "Sango",
|
335 |
+
"speakers": 4600000,
|
336 |
+
"bleu": 5.131617554505083
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"model": "anthropic/claude-3.5-sonnet",
|
340 |
+
"original_language": "eng_Latn",
|
341 |
+
"target_language": "sag_Latn",
|
342 |
+
"target_language_name": "Sango",
|
343 |
+
"speakers": 4600000,
|
344 |
+
"bleu": 22.265544703760973
|
345 |
+
},
|
346 |
+
{
|
347 |
+
"model": "qwen/qwen-2.5-72b-instruct",
|
348 |
+
"original_language": "eng_Latn",
|
349 |
+
"target_language": "sag_Latn",
|
350 |
+
"target_language_name": "Sango",
|
351 |
+
"speakers": 4600000,
|
352 |
+
"bleu": 1.1524444505654738
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"model": "meta-llama/llama-3.1-8b-instruct",
|
356 |
+
"original_language": "eng_Latn",
|
357 |
+
"target_language": "sag_Latn",
|
358 |
+
"target_language_name": "Sango",
|
359 |
+
"speakers": 4600000,
|
360 |
+
"bleu": 1.437953401517244
|
361 |
}
|
362 |
]
|
results_summary.json
CHANGED
@@ -38,5 +38,10 @@
|
|
38 |
"target_language_name":"Polish",
|
39 |
"bleu":59.3540779188,
|
40 |
"speakers":40200000.0
|
|
|
|
|
|
|
|
|
|
|
41 |
}
|
42 |
]
|
|
|
38 |
"target_language_name":"Polish",
|
39 |
"bleu":59.3540779188,
|
40 |
"speakers":40200000.0
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"target_language_name":"Sango",
|
44 |
+
"bleu":6.4524178318,
|
45 |
+
"speakers":4600000.0
|
46 |
}
|
47 |
]
|