David Pomerenke
Add chart with #speakers vs BLEU
b909c3a
raw
history blame
4.13 kB
<!DOCTYPE html>
<html>
<head>
<title>Language Bench</title>
<style>
body {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
font-family: sans-serif;
}
.language-header {
margin-bottom: 10px;
}
.speaker-count {
font-size: 0.8em;
color: #666;
font-weight: normal;
margin: 0;
}
</style>
</head>
<body>
<h1>Language Bench</h1>
<div id="charts"></div>
<script type="module">
// Import Plot using ESM
import * as Plot from "https://cdn.jsdelivr.net/npm/@observablehq/plot@0.6/+esm";
async function init() {
const chartsDiv = document.getElementById('charts');
const summary = await fetch('results_summary.json');
const summaryData = await summary.json();
// Create summary plot
const summaryPlot = Plot.plot({
width: 800,
height: 400,
x: { label: "Number of speakers" },
y: { label: "BLEU Score (average across models)" },
marks: [
Plot.rectY(summaryData, Plot.stackX({
x: "speakers",
order: "bleu",
reverse: true,
y2: "bleu", // y2 to avoid stacking by y
title: (d) => `${d.target_language_name}\n${d.bleu.toFixed(1)}`,
insetLeft: 0.2,
insetRight: 0.2
})),
Plot.ruleY([0])
]
});
// Add summary plot at the top
chartsDiv.insertBefore(summaryPlot, chartsDiv.firstChild);
const response = await fetch('results.json');
const results = await response.json();
// Get unique languages with their speaker counts
const languageMap = new Map();
results.forEach(r => {
if (!languageMap.has(r.target_language_name)) {
languageMap.set(r.target_language_name, r.speakers);
}
});
// Sort languages by speaker count (descending)
const languages = [...languageMap.entries()]
.sort((a, b) => b[1] - a[1])
.map(([lang]) => lang);
languages.forEach(language => {
const headerDiv = document.createElement('div');
headerDiv.className = 'language-header';
const h2 = document.createElement('h2');
h2.textContent = language;
h2.style.marginBottom = '5px';
const speakerP = document.createElement('p');
speakerP.className = 'speaker-count';
const speakerCount = (languageMap.get(language) / 1_000_000).toFixed(1);
speakerP.textContent = `${speakerCount}M speakers`;
headerDiv.appendChild(h2);
headerDiv.appendChild(speakerP);
chartsDiv.appendChild(headerDiv);
const languageData = results.filter(r => r.target_language_name === language);
const descriptor = code => {
let [org, model] = code.split("/")
return model.split("-")[0]
}
// Create plot using the more idiomatic Observable Plot approach
const plot = Plot.plot({
width: 400,
height: 200,
margin: 30,
y: {
domain: [0, 100],
label: "BLEU"
},
marks: [
Plot.barY(languageData, {
x: d => descriptor(d.model),
y: "bleu"
})
]
});
chartsDiv.appendChild(plot);
});
}
init();
</script>
</body>
</html>