David Pomerenke
commited on
Commit
·
7f54946
1
Parent(s):
086a421
Only show top languages in bar chart
Browse files- app.py +12 -31
- language-chart.js +0 -68
app.py
CHANGED
@@ -4,6 +4,7 @@ import gradio as gr
|
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
6 |
import plotly.graph_objects as go
|
|
|
7 |
import pycountry
|
8 |
|
9 |
with open("results.json") as f:
|
@@ -127,36 +128,14 @@ def create_leaderboard_df(results):
|
|
127 |
|
128 |
|
129 |
def create_model_comparison_plot(results):
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
for
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
traces = []
|
139 |
-
for model in models:
|
140 |
-
x_vals = [] # languages
|
141 |
-
y_vals = [] # BLEU scores
|
142 |
-
|
143 |
-
for lang in results:
|
144 |
-
model_score = next(
|
145 |
-
(s["bleu"] for s in lang["scores"] if s["model"] == model), None
|
146 |
-
)
|
147 |
-
if model_score is not None:
|
148 |
-
x_vals.append(lang["language_name"])
|
149 |
-
y_vals.append(model_score)
|
150 |
-
|
151 |
-
traces.append(
|
152 |
-
go.Bar(
|
153 |
-
name=model.split("/")[-1],
|
154 |
-
x=x_vals,
|
155 |
-
y=y_vals,
|
156 |
-
)
|
157 |
-
)
|
158 |
-
|
159 |
-
fig = go.Figure(data=traces)
|
160 |
fig.update_layout(
|
161 |
title="BLEU Scores by Model and Language",
|
162 |
xaxis_title=None,
|
@@ -231,7 +210,9 @@ def create_language_stats_df(results):
|
|
231 |
def create_scatter_plot(results):
|
232 |
fig = go.Figure()
|
233 |
|
234 |
-
x_vals = [
|
|
|
|
|
235 |
y_vals = [lang["bleu"] for lang in results]
|
236 |
labels = [lang["language_name"] for lang in results]
|
237 |
|
|
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
6 |
import plotly.graph_objects as go
|
7 |
+
import plotly.express as px
|
8 |
import pycountry
|
9 |
|
10 |
with open("results.json") as f:
|
|
|
128 |
|
129 |
|
130 |
def create_model_comparison_plot(results):
|
131 |
+
top_languages = sorted(results, key=lambda x: x["speakers"], reverse=True)[:10]
|
132 |
+
scores_flat = [
|
133 |
+
{"language": lang["language_name"], "model": score["model"], "bleu": score["bleu"]}
|
134 |
+
for lang in top_languages
|
135 |
+
for score in lang["scores"]
|
136 |
+
]
|
137 |
+
df = pd.DataFrame(scores_flat)
|
138 |
+
fig = px.bar(df, x="language", y="bleu", color="model", barmode="group")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
fig.update_layout(
|
140 |
title="BLEU Scores by Model and Language",
|
141 |
xaxis_title=None,
|
|
|
210 |
def create_scatter_plot(results):
|
211 |
fig = go.Figure()
|
212 |
|
213 |
+
x_vals = [
|
214 |
+
lang["speakers"] / 1_000_000 for lang in results if lang["speakers"] >= 10_000
|
215 |
+
] # Convert to millions
|
216 |
y_vals = [lang["bleu"] for lang in results]
|
217 |
labels = [lang["language_name"] for lang in results]
|
218 |
|
language-chart.js
DELETED
@@ -1,68 +0,0 @@
|
|
1 |
-
import * as Plot from "npm:@observablehq/plot";
|
2 |
-
|
3 |
-
export function languageChart(
|
4 |
-
languageData,
|
5 |
-
{ width, height, scoreKey, scoreName } = {}
|
6 |
-
) {
|
7 |
-
// Format captions
|
8 |
-
const formatScore = (score) =>
|
9 |
-
score > 0 ? score.toFixed(2) : "No benchmark available!";
|
10 |
-
const formatTitle = (d) =>
|
11 |
-
d.language_name +
|
12 |
-
"\n" +
|
13 |
-
parseInt(d.speakers / 1_000_00) / 10 +
|
14 |
-
"M speakers\n" +
|
15 |
-
scoreName +
|
16 |
-
": " +
|
17 |
-
formatScore(d[scoreKey]);
|
18 |
-
|
19 |
-
return Plot.plot({
|
20 |
-
width: width,
|
21 |
-
height: height,
|
22 |
-
marginBottom: 100,
|
23 |
-
x: { label: "Number of speakers", axis: null },
|
24 |
-
y: { label: `${scoreName} (average across models)` },
|
25 |
-
// color: { scheme: "BrBG" },
|
26 |
-
marks: [
|
27 |
-
Plot.rectY(
|
28 |
-
languageData,
|
29 |
-
Plot.stackX({
|
30 |
-
x: "speakers",
|
31 |
-
order: scoreKey,
|
32 |
-
reverse: true,
|
33 |
-
y2: scoreKey, // y2 to avoid stacking by y
|
34 |
-
title: formatTitle,
|
35 |
-
tip: true,
|
36 |
-
fill: (d) => (d[scoreKey] > 0 ? "black" : "pink"),
|
37 |
-
})
|
38 |
-
),
|
39 |
-
Plot.rectY(
|
40 |
-
languageData,
|
41 |
-
Plot.pointerX(
|
42 |
-
Plot.stackX({
|
43 |
-
x: "speakers",
|
44 |
-
order: scoreKey,
|
45 |
-
reverse: true,
|
46 |
-
y2: scoreKey, // y2 to avoid stacking by y
|
47 |
-
fill: "grey",
|
48 |
-
})
|
49 |
-
)
|
50 |
-
),
|
51 |
-
Plot.text(
|
52 |
-
languageData,
|
53 |
-
Plot.stackX({
|
54 |
-
x: "speakers",
|
55 |
-
y2: scoreKey,
|
56 |
-
order: scoreKey,
|
57 |
-
reverse: true,
|
58 |
-
text: "language_name",
|
59 |
-
frameAnchor: "bottom",
|
60 |
-
textAnchor: "end",
|
61 |
-
dy: 10,
|
62 |
-
rotate: 270,
|
63 |
-
opacity: (d) => (d.speakers > 50_000_000 ? 1 : 0),
|
64 |
-
})
|
65 |
-
),
|
66 |
-
],
|
67 |
-
});
|
68 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|