David Pomerenke
Display all languages and translate from multiple languages
6b6f157
raw
history blame
5.63 kB
<!DOCTYPE html>
<html>
<head>
<title>Language Bench</title>
<style>
body {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
font-family: sans-serif;
}
.language-header {
margin-bottom: 10px;
}
.speaker-count {
font-size: 0.8em;
color: #666;
font-weight: normal;
margin: 0;
}
</style>
<link rel="icon"
href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22 fill=%22black%22>🌍</text></svg>">
</head>
<body>
<h1>Language Bench</h1>
<div id="charts"></div>
<script type="module">
// Import Plot using ESM
import * as Plot from "https://cdn.jsdelivr.net/npm/@observablehq/plot@0.6/+esm";
async function init() {
const scoreKey = "bleu"
const scoreName = "BLEU Score"
const chartsDiv = document.getElementById('charts');
const response = await fetch('results.json');
const data = await response.json();
// Format captions
const formatScore = (score) => score > 0 ? score.toFixed(2) : "No benchmark available!"
const formatTitle = d => (d.language_name + "\n" + parseInt(d.speakers / 1_000_00) / 10 + "M speakers\n" + scoreName + ": " + formatScore(d[scoreKey]))
// Create summary plot
const summaryPlot = Plot.plot({
width: chartsDiv.clientWidth,
height: 400,
marginBottom: 100,
x: { label: "Number of speakers", axis: null },
y: { label: `${scoreName} (average across models)` },
// color: { scheme: "BrBG" },
marks: [
Plot.rectY(data, Plot.stackX({
x: "speakers",
order: scoreKey,
reverse: true,
y2: scoreKey, // y2 to avoid stacking by y
title: formatTitle,
tip: true,
fill: d => d[scoreKey] > 0 ? "black" : "pink"
})),
Plot.rectY(data, Plot.pointerX(Plot.stackX({
x: "speakers",
order: scoreKey,
reverse: true,
y2: scoreKey, // y2 to avoid stacking by y
fill: "grey",
}))),
Plot.text(data, Plot.stackX({
x: "speakers",
y2: scoreKey,
order: scoreKey,
reverse: true,
text: "language_name",
frameAnchor: "bottom",
textAnchor: "end",
dy: 10,
rotate: 270,
opacity: (d) => d.speakers > 50_000_000 ? 1 : 0,
}))
]
});
// Add summary plot at the top
chartsDiv.insertBefore(summaryPlot, chartsDiv.firstChild);
// Get unique languages with their speaker counts
const languageMap = new Map();
data.forEach(r => {
if (!languageMap.has(r.language_name)) {
languageMap.set(r.language_name, r.speakers);
}
});
// Sort languages by speaker count (descending)
const languages = [...languageMap.entries()]
.sort((a, b) => b[1] - a[1])
.map(([lang]) => lang);
// Section for each language
languages.forEach(language => {
const headerDiv = document.createElement('div');
headerDiv.className = 'language-header';
const h2 = document.createElement('h2');
h2.textContent = language;
h2.style.marginBottom = '5px';
const speakerP = document.createElement('p');
speakerP.className = 'speaker-count';
const speakerCount = (languageMap.get(language) / 1_000_000).toFixed(1);
speakerP.textContent = `${speakerCount}M speakers`;
headerDiv.appendChild(h2);
headerDiv.appendChild(speakerP);
chartsDiv.appendChild(headerDiv);
const languageData = data.filter(r => r.language_name === language)[0]["scores"];
const descriptor = code => {
let [org, model] = code.split("/")
return model.split("-")[0]
}
// Plot for how well the models perform on this language
if (languageData && languageData.length > 1) {
const plot = Plot.plot({
width: 400,
height: 200,
margin: 30,
y: {
domain: [0, 1],
label: scoreName
},
marks: [
Plot.barY(languageData, {
x: d => descriptor(d.model),
y: scoreKey
})
]
});
chartsDiv.appendChild(plot);
}
});
}
init();
</script>
</body>
</html>