David Pomerenke commited on
Commit
7f54946
·
1 Parent(s): 086a421

Only show top languages in bar chart

Browse files
Files changed (2) hide show
  1. app.py +12 -31
  2. language-chart.js +0 -68
app.py CHANGED
@@ -4,6 +4,7 @@ import gradio as gr
4
  import numpy as np
5
  import pandas as pd
6
  import plotly.graph_objects as go
 
7
  import pycountry
8
 
9
  with open("results.json") as f:
@@ -127,36 +128,14 @@ def create_leaderboard_df(results):
127
 
128
 
129
  def create_model_comparison_plot(results):
130
- # Extract all unique models
131
- models = set()
132
- for lang in results:
133
- for score in lang["scores"]:
134
- models.add(score["model"])
135
- models = list(models)
136
-
137
- # Create traces for each model
138
- traces = []
139
- for model in models:
140
- x_vals = [] # languages
141
- y_vals = [] # BLEU scores
142
-
143
- for lang in results:
144
- model_score = next(
145
- (s["bleu"] for s in lang["scores"] if s["model"] == model), None
146
- )
147
- if model_score is not None:
148
- x_vals.append(lang["language_name"])
149
- y_vals.append(model_score)
150
-
151
- traces.append(
152
- go.Bar(
153
- name=model.split("/")[-1],
154
- x=x_vals,
155
- y=y_vals,
156
- )
157
- )
158
-
159
- fig = go.Figure(data=traces)
160
  fig.update_layout(
161
  title="BLEU Scores by Model and Language",
162
  xaxis_title=None,
@@ -231,7 +210,9 @@ def create_language_stats_df(results):
231
  def create_scatter_plot(results):
232
  fig = go.Figure()
233
 
234
- x_vals = [lang["speakers"] / 1_000_000 for lang in results if lang["speakers"] >= 10_000] # Convert to millions
 
 
235
  y_vals = [lang["bleu"] for lang in results]
236
  labels = [lang["language_name"] for lang in results]
237
 
 
4
  import numpy as np
5
  import pandas as pd
6
  import plotly.graph_objects as go
7
+ import plotly.express as px
8
  import pycountry
9
 
10
  with open("results.json") as f:
 
128
 
129
 
130
  def create_model_comparison_plot(results):
131
+ top_languages = sorted(results, key=lambda x: x["speakers"], reverse=True)[:10]
132
+ scores_flat = [
133
+ {"language": lang["language_name"], "model": score["model"], "bleu": score["bleu"]}
134
+ for lang in top_languages
135
+ for score in lang["scores"]
136
+ ]
137
+ df = pd.DataFrame(scores_flat)
138
+ fig = px.bar(df, x="language", y="bleu", color="model", barmode="group")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  fig.update_layout(
140
  title="BLEU Scores by Model and Language",
141
  xaxis_title=None,
 
210
  def create_scatter_plot(results):
211
  fig = go.Figure()
212
 
213
+ x_vals = [
214
+ lang["speakers"] / 1_000_000 for lang in results if lang["speakers"] >= 10_000
215
+ ] # Convert to millions
216
  y_vals = [lang["bleu"] for lang in results]
217
  labels = [lang["language_name"] for lang in results]
218
 
language-chart.js DELETED
@@ -1,68 +0,0 @@
1
- import * as Plot from "npm:@observablehq/plot";
2
-
3
- export function languageChart(
4
- languageData,
5
- { width, height, scoreKey, scoreName } = {}
6
- ) {
7
- // Format captions
8
- const formatScore = (score) =>
9
- score > 0 ? score.toFixed(2) : "No benchmark available!";
10
- const formatTitle = (d) =>
11
- d.language_name +
12
- "\n" +
13
- parseInt(d.speakers / 1_000_00) / 10 +
14
- "M speakers\n" +
15
- scoreName +
16
- ": " +
17
- formatScore(d[scoreKey]);
18
-
19
- return Plot.plot({
20
- width: width,
21
- height: height,
22
- marginBottom: 100,
23
- x: { label: "Number of speakers", axis: null },
24
- y: { label: `${scoreName} (average across models)` },
25
- // color: { scheme: "BrBG" },
26
- marks: [
27
- Plot.rectY(
28
- languageData,
29
- Plot.stackX({
30
- x: "speakers",
31
- order: scoreKey,
32
- reverse: true,
33
- y2: scoreKey, // y2 to avoid stacking by y
34
- title: formatTitle,
35
- tip: true,
36
- fill: (d) => (d[scoreKey] > 0 ? "black" : "pink"),
37
- })
38
- ),
39
- Plot.rectY(
40
- languageData,
41
- Plot.pointerX(
42
- Plot.stackX({
43
- x: "speakers",
44
- order: scoreKey,
45
- reverse: true,
46
- y2: scoreKey, // y2 to avoid stacking by y
47
- fill: "grey",
48
- })
49
- )
50
- ),
51
- Plot.text(
52
- languageData,
53
- Plot.stackX({
54
- x: "speakers",
55
- y2: scoreKey,
56
- order: scoreKey,
57
- reverse: true,
58
- text: "language_name",
59
- frameAnchor: "bottom",
60
- textAnchor: "end",
61
- dy: 10,
62
- rotate: 270,
63
- opacity: (d) => (d.speakers > 50_000_000 ? 1 : 0),
64
- })
65
- ),
66
- ],
67
- });
68
- }