David Pomerenke commited on
Commit
723f963
·
1 Parent(s): b4a0c57

Process data for country map

Browse files
evals/countries.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import xml.etree.ElementTree as ET
3
+ from collections import defaultdict
4
+
5
+ from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
6
+ from language_data.util import data_filename
7
+
8
+
9
+ def get_population_data():
10
+ filename = data_filename("supplementalData.xml")
11
+ root = ET.fromstring(open(filename).read())
12
+ territories = root.findall("./territoryInfo/territory")
13
+
14
+ data = {}
15
+ for territory in territories:
16
+ t_code = territory.attrib["type"]
17
+ t_population = float(territory.attrib["population"])
18
+ data[t_code] = t_population
19
+ return data
20
+
21
+
22
+ def population(bcp_47):
23
+ items = {
24
+ re.sub(r"^[a-z]+-", "", lang): pop
25
+ for lang, pop in LANGUAGE_SPEAKING_POPULATION.items()
26
+ if re.match(rf"^{bcp_47}-[A-Z]{{2}}$", lang)
27
+ }
28
+ return items
29
+
30
+
31
+ def make_country_table(language_table):
32
+ countries = defaultdict(list)
33
+ for lang in language_table.itertuples():
34
+ for country, pop in population(lang.bcp_47).items():
35
+ countries[country].append(
36
+ {
37
+ "name": lang.language_name,
38
+ "bcp_47": lang.bcp_47,
39
+ "population": pop,
40
+ "score": lang.average,
41
+ }
42
+ )
43
+ for country, languages in countries.items():
44
+ pop = sum(entry["population"] for entry in languages)
45
+ score = sum(entry["score"] * entry["population"] for entry in languages) / pop
46
+ countries[country] = {
47
+ "score": score,
48
+ "languages": languages,
49
+ }
50
+ return countries
evals/languages.py CHANGED
@@ -46,15 +46,6 @@ scripts = pd.read_csv("data/ScriptCodes.csv").rename(
46
  columns={"Code": "iso15924", "English Name": "script_name"}
47
  )
48
 
49
-
50
- def population(bcp_47):
51
- items = {
52
- re.sub(r"^[a-z]+-", "", lang): pop
53
- for lang, pop in LANGUAGE_SPEAKING_POPULATION.items()
54
- if re.match(rf"^{bcp_47}-[A-Z]{{2}}$", lang)
55
- }
56
- return items
57
-
58
  def script_name(iso15924):
59
  return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
60
 
 
46
  columns={"Code": "iso15924", "English Name": "script_name"}
47
  )
48
 
 
 
 
 
 
 
 
 
 
49
  def script_name(iso15924):
50
  return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
51
 
evals/main.py CHANGED
@@ -3,11 +3,12 @@ import json
3
 
4
  import numpy as np
5
  import pandas as pd
6
- from rich import print
7
- from tqdm.asyncio import tqdm_asyncio
8
  from languages import languages
 
 
9
  from tasks import tasks
10
- from models import models, model_fast
11
 
12
  # ===== config =====
13
 
@@ -91,7 +92,20 @@ def make_model_table(df):
91
  df["provider"] = df["model"].str.split("/").str[0].apply(fmt_name)
92
  df["model"] = df["model"].str.split("/").str[1].apply(fmt_name)
93
  df["rank"] = df.index + 1
94
- df = df[["rank", "provider", "model", "hf_id", "creation_date", "size", "type", "license", "average", *task_metrics]]
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  return df
96
 
97
 
@@ -99,15 +113,31 @@ def make_language_table(df):
99
  df["task_metric"] = df["task"] + "_" + df["metric"]
100
  df = df.drop(columns=["task", "metric"])
101
  task_metrics = df["task_metric"].unique()
102
- df = df.pivot(index="bcp_47", columns="task_metric", values="score").fillna(0).reset_index()
 
 
 
 
103
  df["average"] = df[task_metrics].mean(axis=1)
104
  for row in [*task_metrics, "average"]:
105
  df[row] = df[row].round(2)
106
  df = pd.merge(languages, df, on="bcp_47", how="outer")
107
  df = df.sort_values(by="speakers", ascending=False)
108
- df = df[["language_name", "autonym", "speakers", "family", "average", "in_benchmark", *task_metrics]]
 
 
 
 
 
 
 
 
 
 
 
109
  return df
110
 
 
111
  async def main():
112
  results = await evaluate()
113
  results, lang_results, model_results, task_results = aggregate(results)
@@ -121,10 +151,12 @@ async def main():
121
  json.dump(all_results, f, indent=2, ensure_ascii=False)
122
 
123
  datasets_df = pd.read_json("data/datasets.json")
 
124
  all_tables = {
125
  "model_table": serialize(make_model_table(model_results)),
126
- "language_table": serialize(make_language_table(lang_results)),
127
  "dataset_table": serialize(datasets_df),
 
128
  }
129
  with open("frontend/public/results.json", "w") as f:
130
  json.dump(all_tables, f, indent=2, ensure_ascii=False)
 
3
 
4
  import numpy as np
5
  import pandas as pd
6
+ from countries import make_country_table
 
7
  from languages import languages
8
+ from models import model_fast, models
9
+ from rich import print
10
  from tasks import tasks
11
+ from tqdm.asyncio import tqdm_asyncio
12
 
13
  # ===== config =====
14
 
 
92
  df["provider"] = df["model"].str.split("/").str[0].apply(fmt_name)
93
  df["model"] = df["model"].str.split("/").str[1].apply(fmt_name)
94
  df["rank"] = df.index + 1
95
+ df = df[
96
+ [
97
+ "rank",
98
+ "provider",
99
+ "model",
100
+ "hf_id",
101
+ "creation_date",
102
+ "size",
103
+ "type",
104
+ "license",
105
+ "average",
106
+ *task_metrics,
107
+ ]
108
+ ]
109
  return df
110
 
111
 
 
113
  df["task_metric"] = df["task"] + "_" + df["metric"]
114
  df = df.drop(columns=["task", "metric"])
115
  task_metrics = df["task_metric"].unique()
116
+ df = (
117
+ df.pivot(index="bcp_47", columns="task_metric", values="score")
118
+ .fillna(0)
119
+ .reset_index()
120
+ )
121
  df["average"] = df[task_metrics].mean(axis=1)
122
  for row in [*task_metrics, "average"]:
123
  df[row] = df[row].round(2)
124
  df = pd.merge(languages, df, on="bcp_47", how="outer")
125
  df = df.sort_values(by="speakers", ascending=False)
126
+ df = df[
127
+ [
128
+ "bcp_47",
129
+ "language_name",
130
+ "autonym",
131
+ "speakers",
132
+ "family",
133
+ "average",
134
+ "in_benchmark",
135
+ *task_metrics,
136
+ ]
137
+ ]
138
  return df
139
 
140
+
141
  async def main():
142
  results = await evaluate()
143
  results, lang_results, model_results, task_results = aggregate(results)
 
151
  json.dump(all_results, f, indent=2, ensure_ascii=False)
152
 
153
  datasets_df = pd.read_json("data/datasets.json")
154
+ language_table = make_language_table(lang_results)
155
  all_tables = {
156
  "model_table": serialize(make_model_table(model_results)),
157
+ "language_table": serialize(language_table),
158
  "dataset_table": serialize(datasets_df),
159
+ "countries": make_country_table(language_table),
160
  }
161
  with open("frontend/public/results.json", "w") as f:
162
  json.dump(all_tables, f, indent=2, ensure_ascii=False)
frontend/public/results.json CHANGED
The diff for this file is too large to render. See raw diff
 
results.json CHANGED
@@ -616,7 +616,7 @@
616
  "family": "Indo-European",
617
  "flores_path": null,
618
  "fleurs_tag": null,
619
- "commonvoice_hours": 13.0,
620
  "commonvoice_locale": "an",
621
  "in_benchmark": false,
622
  "task": null,
@@ -1992,7 +1992,7 @@
1992
  "family": "Indo-European",
1993
  "flores_path": "cat_Latn",
1994
  "fleurs_tag": "ca_es",
1995
- "commonvoice_hours": 2842.0,
1996
  "commonvoice_locale": "ca",
1997
  "in_benchmark": true,
1998
  "task": null,
@@ -2584,7 +2584,7 @@
2584
  "family": "Indo-European",
2585
  "flores_path": "deu_Latn",
2586
  "fleurs_tag": "de_de",
2587
- "commonvoice_hours": 1359.0,
2588
  "commonvoice_locale": "de",
2589
  "in_benchmark": true,
2590
  "task": null,
@@ -2904,7 +2904,7 @@
2904
  "family": "Indo-European",
2905
  "flores_path": "eng_Latn",
2906
  "fleurs_tag": "en_us",
2907
- "commonvoice_hours": 2651.0,
2908
  "commonvoice_locale": "en",
2909
  "in_benchmark": true,
2910
  "task": "classification",
@@ -2920,7 +2920,7 @@
2920
  "family": "Indo-European",
2921
  "flores_path": "eng_Latn",
2922
  "fleurs_tag": "en_us",
2923
- "commonvoice_hours": 2651.0,
2924
  "commonvoice_locale": "en",
2925
  "in_benchmark": true,
2926
  "task": "language_modeling",
@@ -2936,7 +2936,7 @@
2936
  "family": "Indo-European",
2937
  "flores_path": "eng_Latn",
2938
  "fleurs_tag": "en_us",
2939
- "commonvoice_hours": 2651.0,
2940
  "commonvoice_locale": "en",
2941
  "in_benchmark": true,
2942
  "task": "translation",
@@ -2952,7 +2952,7 @@
2952
  "family": "Indo-European",
2953
  "flores_path": "eng_Latn",
2954
  "fleurs_tag": "en_us",
2955
- "commonvoice_hours": 2651.0,
2956
  "commonvoice_locale": "en",
2957
  "in_benchmark": true,
2958
  "task": "translation",
@@ -3320,7 +3320,7 @@
3320
  "family": "Indo-European",
3321
  "flores_path": "fra_Latn",
3322
  "fleurs_tag": "fr_fr",
3323
- "commonvoice_hours": 1052.0,
3324
  "commonvoice_locale": "fr",
3325
  "in_benchmark": true,
3326
  "task": "classification",
@@ -3336,7 +3336,7 @@
3336
  "family": "Indo-European",
3337
  "flores_path": "fra_Latn",
3338
  "fleurs_tag": "fr_fr",
3339
- "commonvoice_hours": 1052.0,
3340
  "commonvoice_locale": "fr",
3341
  "in_benchmark": true,
3342
  "task": "language_modeling",
@@ -3352,7 +3352,7 @@
3352
  "family": "Indo-European",
3353
  "flores_path": "fra_Latn",
3354
  "fleurs_tag": "fr_fr",
3355
- "commonvoice_hours": 1052.0,
3356
  "commonvoice_locale": "fr",
3357
  "in_benchmark": true,
3358
  "task": "translation",
@@ -3368,7 +3368,7 @@
3368
  "family": "Indo-European",
3369
  "flores_path": "fra_Latn",
3370
  "fleurs_tag": "fr_fr",
3371
- "commonvoice_hours": 1052.0,
3372
  "commonvoice_locale": "fr",
3373
  "in_benchmark": true,
3374
  "task": "translation",
@@ -4952,7 +4952,7 @@
4952
  "family": "Abkhaz-Adyge",
4953
  "flores_path": null,
4954
  "fleurs_tag": null,
4955
- "commonvoice_hours": 16.0,
4956
  "commonvoice_locale": "kbd",
4957
  "in_benchmark": false,
4958
  "task": null,
@@ -5752,7 +5752,7 @@
5752
  "family": "Indo-European",
5753
  "flores_path": null,
5754
  "fleurs_tag": null,
5755
- "commonvoice_hours": 2.9,
5756
  "commonvoice_locale": "kw",
5757
  "in_benchmark": false,
5758
  "task": null,
@@ -6296,7 +6296,7 @@
6296
  "family": "Indo-European",
6297
  "flores_path": "ltg_Latn",
6298
  "fleurs_tag": null,
6299
- "commonvoice_hours": 28.0,
6300
  "commonvoice_locale": "ltg",
6301
  "in_benchmark": true,
6302
  "task": null,
@@ -8360,7 +8360,7 @@
8360
  "family": "Indo-European",
8361
  "flores_path": null,
8362
  "fleurs_tag": "ps_af",
8363
- "commonvoice_hours": 79.0,
8364
  "commonvoice_locale": "ps",
8365
  "in_benchmark": false,
8366
  "task": null,
@@ -8952,7 +8952,7 @@
8952
  "family": "Turkic",
8953
  "flores_path": null,
8954
  "fleurs_tag": null,
8955
- "commonvoice_hours": 9.5,
8956
  "commonvoice_locale": "sah",
8957
  "in_benchmark": false,
8958
  "task": null,
@@ -9224,7 +9224,7 @@
9224
  "family": null,
9225
  "flores_path": null,
9226
  "fleurs_tag": null,
9227
- "commonvoice_hours": 1.2,
9228
  "commonvoice_locale": "sei",
9229
  "in_benchmark": false,
9230
  "task": null,
@@ -9352,7 +9352,7 @@
9352
  "family": "Indo-European",
9353
  "flores_path": "slk_Latn",
9354
  "fleurs_tag": "sk_sk",
9355
- "commonvoice_hours": 46.0,
9356
  "commonvoice_locale": "sk",
9357
  "in_benchmark": true,
9358
  "task": null,
@@ -10200,7 +10200,7 @@
10200
  "family": "Afro-Asiatic",
10201
  "flores_path": null,
10202
  "fleurs_tag": null,
10203
- "commonvoice_hours": 7.3,
10204
  "commonvoice_locale": "tig",
10205
  "in_benchmark": false,
10206
  "task": null,
@@ -10712,7 +10712,7 @@
10712
  "family": "Turkic",
10713
  "flores_path": "uig_Arab",
10714
  "fleurs_tag": null,
10715
- "commonvoice_hours": 364.0,
10716
  "commonvoice_locale": "ug",
10717
  "in_benchmark": true,
10718
  "task": null,
@@ -10728,7 +10728,7 @@
10728
  "family": "Indo-European",
10729
  "flores_path": "ukr_Cyrl",
10730
  "fleurs_tag": "uk_ua",
10731
- "commonvoice_hours": 98.0,
10732
  "commonvoice_locale": "uk",
10733
  "in_benchmark": true,
10734
  "task": null,
 
616
  "family": "Indo-European",
617
  "flores_path": null,
618
  "fleurs_tag": null,
619
+ "commonvoice_hours": 14.0,
620
  "commonvoice_locale": "an",
621
  "in_benchmark": false,
622
  "task": null,
 
1992
  "family": "Indo-European",
1993
  "flores_path": "cat_Latn",
1994
  "fleurs_tag": "ca_es",
1995
+ "commonvoice_hours": 2844.0,
1996
  "commonvoice_locale": "ca",
1997
  "in_benchmark": true,
1998
  "task": null,
 
2584
  "family": "Indo-European",
2585
  "flores_path": "deu_Latn",
2586
  "fleurs_tag": "de_de",
2587
+ "commonvoice_hours": 1360.0,
2588
  "commonvoice_locale": "de",
2589
  "in_benchmark": true,
2590
  "task": null,
 
2904
  "family": "Indo-European",
2905
  "flores_path": "eng_Latn",
2906
  "fleurs_tag": "en_us",
2907
+ "commonvoice_hours": 2653.0,
2908
  "commonvoice_locale": "en",
2909
  "in_benchmark": true,
2910
  "task": "classification",
 
2920
  "family": "Indo-European",
2921
  "flores_path": "eng_Latn",
2922
  "fleurs_tag": "en_us",
2923
+ "commonvoice_hours": 2653.0,
2924
  "commonvoice_locale": "en",
2925
  "in_benchmark": true,
2926
  "task": "language_modeling",
 
2936
  "family": "Indo-European",
2937
  "flores_path": "eng_Latn",
2938
  "fleurs_tag": "en_us",
2939
+ "commonvoice_hours": 2653.0,
2940
  "commonvoice_locale": "en",
2941
  "in_benchmark": true,
2942
  "task": "translation",
 
2952
  "family": "Indo-European",
2953
  "flores_path": "eng_Latn",
2954
  "fleurs_tag": "en_us",
2955
+ "commonvoice_hours": 2653.0,
2956
  "commonvoice_locale": "en",
2957
  "in_benchmark": true,
2958
  "task": "translation",
 
3320
  "family": "Indo-European",
3321
  "flores_path": "fra_Latn",
3322
  "fleurs_tag": "fr_fr",
3323
+ "commonvoice_hours": 1053.0,
3324
  "commonvoice_locale": "fr",
3325
  "in_benchmark": true,
3326
  "task": "classification",
 
3336
  "family": "Indo-European",
3337
  "flores_path": "fra_Latn",
3338
  "fleurs_tag": "fr_fr",
3339
+ "commonvoice_hours": 1053.0,
3340
  "commonvoice_locale": "fr",
3341
  "in_benchmark": true,
3342
  "task": "language_modeling",
 
3352
  "family": "Indo-European",
3353
  "flores_path": "fra_Latn",
3354
  "fleurs_tag": "fr_fr",
3355
+ "commonvoice_hours": 1053.0,
3356
  "commonvoice_locale": "fr",
3357
  "in_benchmark": true,
3358
  "task": "translation",
 
3368
  "family": "Indo-European",
3369
  "flores_path": "fra_Latn",
3370
  "fleurs_tag": "fr_fr",
3371
+ "commonvoice_hours": 1053.0,
3372
  "commonvoice_locale": "fr",
3373
  "in_benchmark": true,
3374
  "task": "translation",
 
4952
  "family": "Abkhaz-Adyge",
4953
  "flores_path": null,
4954
  "fleurs_tag": null,
4955
+ "commonvoice_hours": 18.0,
4956
  "commonvoice_locale": "kbd",
4957
  "in_benchmark": false,
4958
  "task": null,
 
5752
  "family": "Indo-European",
5753
  "flores_path": null,
5754
  "fleurs_tag": null,
5755
+ "commonvoice_hours": 3.4,
5756
  "commonvoice_locale": "kw",
5757
  "in_benchmark": false,
5758
  "task": null,
 
6296
  "family": "Indo-European",
6297
  "flores_path": "ltg_Latn",
6298
  "fleurs_tag": null,
6299
+ "commonvoice_hours": 29.0,
6300
  "commonvoice_locale": "ltg",
6301
  "in_benchmark": true,
6302
  "task": null,
 
8360
  "family": "Indo-European",
8361
  "flores_path": null,
8362
  "fleurs_tag": "ps_af",
8363
+ "commonvoice_hours": 80.0,
8364
  "commonvoice_locale": "ps",
8365
  "in_benchmark": false,
8366
  "task": null,
 
8952
  "family": "Turkic",
8953
  "flores_path": null,
8954
  "fleurs_tag": null,
8955
+ "commonvoice_hours": 11.0,
8956
  "commonvoice_locale": "sah",
8957
  "in_benchmark": false,
8958
  "task": null,
 
9224
  "family": null,
9225
  "flores_path": null,
9226
  "fleurs_tag": null,
9227
+ "commonvoice_hours": 1.4,
9228
  "commonvoice_locale": "sei",
9229
  "in_benchmark": false,
9230
  "task": null,
 
9352
  "family": "Indo-European",
9353
  "flores_path": "slk_Latn",
9354
  "fleurs_tag": "sk_sk",
9355
+ "commonvoice_hours": 47.0,
9356
  "commonvoice_locale": "sk",
9357
  "in_benchmark": true,
9358
  "task": null,
 
10200
  "family": "Afro-Asiatic",
10201
  "flores_path": null,
10202
  "fleurs_tag": null,
10203
+ "commonvoice_hours": 11.0,
10204
  "commonvoice_locale": "tig",
10205
  "in_benchmark": false,
10206
  "task": null,
 
10712
  "family": "Turkic",
10713
  "flores_path": "uig_Arab",
10714
  "fleurs_tag": null,
10715
+ "commonvoice_hours": 365.0,
10716
  "commonvoice_locale": "ug",
10717
  "in_benchmark": true,
10718
  "task": null,
 
10728
  "family": "Indo-European",
10729
  "flores_path": "ukr_Cyrl",
10730
  "fleurs_tag": "uk_ua",
10731
+ "commonvoice_hours": 99.0,
10732
  "commonvoice_locale": "uk",
10733
  "in_benchmark": true,
10734
  "task": null,