David Pomerenke
commited on
Commit
·
723f963
1
Parent(s):
b4a0c57
Process data for country map
Browse files- evals/countries.py +50 -0
- evals/languages.py +0 -9
- evals/main.py +39 -7
- frontend/public/results.json +0 -0
- results.json +21 -21
evals/countries.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import xml.etree.ElementTree as ET
|
3 |
+
from collections import defaultdict
|
4 |
+
|
5 |
+
from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
|
6 |
+
from language_data.util import data_filename
|
7 |
+
|
8 |
+
|
9 |
+
def get_population_data():
|
10 |
+
filename = data_filename("supplementalData.xml")
|
11 |
+
root = ET.fromstring(open(filename).read())
|
12 |
+
territories = root.findall("./territoryInfo/territory")
|
13 |
+
|
14 |
+
data = {}
|
15 |
+
for territory in territories:
|
16 |
+
t_code = territory.attrib["type"]
|
17 |
+
t_population = float(territory.attrib["population"])
|
18 |
+
data[t_code] = t_population
|
19 |
+
return data
|
20 |
+
|
21 |
+
|
22 |
+
def population(bcp_47):
|
23 |
+
items = {
|
24 |
+
re.sub(r"^[a-z]+-", "", lang): pop
|
25 |
+
for lang, pop in LANGUAGE_SPEAKING_POPULATION.items()
|
26 |
+
if re.match(rf"^{bcp_47}-[A-Z]{{2}}$", lang)
|
27 |
+
}
|
28 |
+
return items
|
29 |
+
|
30 |
+
|
31 |
+
def make_country_table(language_table):
|
32 |
+
countries = defaultdict(list)
|
33 |
+
for lang in language_table.itertuples():
|
34 |
+
for country, pop in population(lang.bcp_47).items():
|
35 |
+
countries[country].append(
|
36 |
+
{
|
37 |
+
"name": lang.language_name,
|
38 |
+
"bcp_47": lang.bcp_47,
|
39 |
+
"population": pop,
|
40 |
+
"score": lang.average,
|
41 |
+
}
|
42 |
+
)
|
43 |
+
for country, languages in countries.items():
|
44 |
+
pop = sum(entry["population"] for entry in languages)
|
45 |
+
score = sum(entry["score"] * entry["population"] for entry in languages) / pop
|
46 |
+
countries[country] = {
|
47 |
+
"score": score,
|
48 |
+
"languages": languages,
|
49 |
+
}
|
50 |
+
return countries
|
evals/languages.py
CHANGED
@@ -46,15 +46,6 @@ scripts = pd.read_csv("data/ScriptCodes.csv").rename(
|
|
46 |
columns={"Code": "iso15924", "English Name": "script_name"}
|
47 |
)
|
48 |
|
49 |
-
|
50 |
-
def population(bcp_47):
|
51 |
-
items = {
|
52 |
-
re.sub(r"^[a-z]+-", "", lang): pop
|
53 |
-
for lang, pop in LANGUAGE_SPEAKING_POPULATION.items()
|
54 |
-
if re.match(rf"^{bcp_47}-[A-Z]{{2}}$", lang)
|
55 |
-
}
|
56 |
-
return items
|
57 |
-
|
58 |
def script_name(iso15924):
|
59 |
return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
|
60 |
|
|
|
46 |
columns={"Code": "iso15924", "English Name": "script_name"}
|
47 |
)
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
def script_name(iso15924):
|
50 |
return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
|
51 |
|
evals/main.py
CHANGED
@@ -3,11 +3,12 @@ import json
|
|
3 |
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
6 |
-
from
|
7 |
-
from tqdm.asyncio import tqdm_asyncio
|
8 |
from languages import languages
|
|
|
|
|
9 |
from tasks import tasks
|
10 |
-
from
|
11 |
|
12 |
# ===== config =====
|
13 |
|
@@ -91,7 +92,20 @@ def make_model_table(df):
|
|
91 |
df["provider"] = df["model"].str.split("/").str[0].apply(fmt_name)
|
92 |
df["model"] = df["model"].str.split("/").str[1].apply(fmt_name)
|
93 |
df["rank"] = df.index + 1
|
94 |
-
df = df[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
return df
|
96 |
|
97 |
|
@@ -99,15 +113,31 @@ def make_language_table(df):
|
|
99 |
df["task_metric"] = df["task"] + "_" + df["metric"]
|
100 |
df = df.drop(columns=["task", "metric"])
|
101 |
task_metrics = df["task_metric"].unique()
|
102 |
-
df =
|
|
|
|
|
|
|
|
|
103 |
df["average"] = df[task_metrics].mean(axis=1)
|
104 |
for row in [*task_metrics, "average"]:
|
105 |
df[row] = df[row].round(2)
|
106 |
df = pd.merge(languages, df, on="bcp_47", how="outer")
|
107 |
df = df.sort_values(by="speakers", ascending=False)
|
108 |
-
df = df[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
return df
|
110 |
|
|
|
111 |
async def main():
|
112 |
results = await evaluate()
|
113 |
results, lang_results, model_results, task_results = aggregate(results)
|
@@ -121,10 +151,12 @@ async def main():
|
|
121 |
json.dump(all_results, f, indent=2, ensure_ascii=False)
|
122 |
|
123 |
datasets_df = pd.read_json("data/datasets.json")
|
|
|
124 |
all_tables = {
|
125 |
"model_table": serialize(make_model_table(model_results)),
|
126 |
-
"language_table": serialize(
|
127 |
"dataset_table": serialize(datasets_df),
|
|
|
128 |
}
|
129 |
with open("frontend/public/results.json", "w") as f:
|
130 |
json.dump(all_tables, f, indent=2, ensure_ascii=False)
|
|
|
3 |
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
6 |
+
from countries import make_country_table
|
|
|
7 |
from languages import languages
|
8 |
+
from models import model_fast, models
|
9 |
+
from rich import print
|
10 |
from tasks import tasks
|
11 |
+
from tqdm.asyncio import tqdm_asyncio
|
12 |
|
13 |
# ===== config =====
|
14 |
|
|
|
92 |
df["provider"] = df["model"].str.split("/").str[0].apply(fmt_name)
|
93 |
df["model"] = df["model"].str.split("/").str[1].apply(fmt_name)
|
94 |
df["rank"] = df.index + 1
|
95 |
+
df = df[
|
96 |
+
[
|
97 |
+
"rank",
|
98 |
+
"provider",
|
99 |
+
"model",
|
100 |
+
"hf_id",
|
101 |
+
"creation_date",
|
102 |
+
"size",
|
103 |
+
"type",
|
104 |
+
"license",
|
105 |
+
"average",
|
106 |
+
*task_metrics,
|
107 |
+
]
|
108 |
+
]
|
109 |
return df
|
110 |
|
111 |
|
|
|
113 |
df["task_metric"] = df["task"] + "_" + df["metric"]
|
114 |
df = df.drop(columns=["task", "metric"])
|
115 |
task_metrics = df["task_metric"].unique()
|
116 |
+
df = (
|
117 |
+
df.pivot(index="bcp_47", columns="task_metric", values="score")
|
118 |
+
.fillna(0)
|
119 |
+
.reset_index()
|
120 |
+
)
|
121 |
df["average"] = df[task_metrics].mean(axis=1)
|
122 |
for row in [*task_metrics, "average"]:
|
123 |
df[row] = df[row].round(2)
|
124 |
df = pd.merge(languages, df, on="bcp_47", how="outer")
|
125 |
df = df.sort_values(by="speakers", ascending=False)
|
126 |
+
df = df[
|
127 |
+
[
|
128 |
+
"bcp_47",
|
129 |
+
"language_name",
|
130 |
+
"autonym",
|
131 |
+
"speakers",
|
132 |
+
"family",
|
133 |
+
"average",
|
134 |
+
"in_benchmark",
|
135 |
+
*task_metrics,
|
136 |
+
]
|
137 |
+
]
|
138 |
return df
|
139 |
|
140 |
+
|
141 |
async def main():
|
142 |
results = await evaluate()
|
143 |
results, lang_results, model_results, task_results = aggregate(results)
|
|
|
151 |
json.dump(all_results, f, indent=2, ensure_ascii=False)
|
152 |
|
153 |
datasets_df = pd.read_json("data/datasets.json")
|
154 |
+
language_table = make_language_table(lang_results)
|
155 |
all_tables = {
|
156 |
"model_table": serialize(make_model_table(model_results)),
|
157 |
+
"language_table": serialize(language_table),
|
158 |
"dataset_table": serialize(datasets_df),
|
159 |
+
"countries": make_country_table(language_table),
|
160 |
}
|
161 |
with open("frontend/public/results.json", "w") as f:
|
162 |
json.dump(all_tables, f, indent=2, ensure_ascii=False)
|
frontend/public/results.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
results.json
CHANGED
@@ -616,7 +616,7 @@
|
|
616 |
"family": "Indo-European",
|
617 |
"flores_path": null,
|
618 |
"fleurs_tag": null,
|
619 |
-
"commonvoice_hours":
|
620 |
"commonvoice_locale": "an",
|
621 |
"in_benchmark": false,
|
622 |
"task": null,
|
@@ -1992,7 +1992,7 @@
|
|
1992 |
"family": "Indo-European",
|
1993 |
"flores_path": "cat_Latn",
|
1994 |
"fleurs_tag": "ca_es",
|
1995 |
-
"commonvoice_hours":
|
1996 |
"commonvoice_locale": "ca",
|
1997 |
"in_benchmark": true,
|
1998 |
"task": null,
|
@@ -2584,7 +2584,7 @@
|
|
2584 |
"family": "Indo-European",
|
2585 |
"flores_path": "deu_Latn",
|
2586 |
"fleurs_tag": "de_de",
|
2587 |
-
"commonvoice_hours":
|
2588 |
"commonvoice_locale": "de",
|
2589 |
"in_benchmark": true,
|
2590 |
"task": null,
|
@@ -2904,7 +2904,7 @@
|
|
2904 |
"family": "Indo-European",
|
2905 |
"flores_path": "eng_Latn",
|
2906 |
"fleurs_tag": "en_us",
|
2907 |
-
"commonvoice_hours":
|
2908 |
"commonvoice_locale": "en",
|
2909 |
"in_benchmark": true,
|
2910 |
"task": "classification",
|
@@ -2920,7 +2920,7 @@
|
|
2920 |
"family": "Indo-European",
|
2921 |
"flores_path": "eng_Latn",
|
2922 |
"fleurs_tag": "en_us",
|
2923 |
-
"commonvoice_hours":
|
2924 |
"commonvoice_locale": "en",
|
2925 |
"in_benchmark": true,
|
2926 |
"task": "language_modeling",
|
@@ -2936,7 +2936,7 @@
|
|
2936 |
"family": "Indo-European",
|
2937 |
"flores_path": "eng_Latn",
|
2938 |
"fleurs_tag": "en_us",
|
2939 |
-
"commonvoice_hours":
|
2940 |
"commonvoice_locale": "en",
|
2941 |
"in_benchmark": true,
|
2942 |
"task": "translation",
|
@@ -2952,7 +2952,7 @@
|
|
2952 |
"family": "Indo-European",
|
2953 |
"flores_path": "eng_Latn",
|
2954 |
"fleurs_tag": "en_us",
|
2955 |
-
"commonvoice_hours":
|
2956 |
"commonvoice_locale": "en",
|
2957 |
"in_benchmark": true,
|
2958 |
"task": "translation",
|
@@ -3320,7 +3320,7 @@
|
|
3320 |
"family": "Indo-European",
|
3321 |
"flores_path": "fra_Latn",
|
3322 |
"fleurs_tag": "fr_fr",
|
3323 |
-
"commonvoice_hours":
|
3324 |
"commonvoice_locale": "fr",
|
3325 |
"in_benchmark": true,
|
3326 |
"task": "classification",
|
@@ -3336,7 +3336,7 @@
|
|
3336 |
"family": "Indo-European",
|
3337 |
"flores_path": "fra_Latn",
|
3338 |
"fleurs_tag": "fr_fr",
|
3339 |
-
"commonvoice_hours":
|
3340 |
"commonvoice_locale": "fr",
|
3341 |
"in_benchmark": true,
|
3342 |
"task": "language_modeling",
|
@@ -3352,7 +3352,7 @@
|
|
3352 |
"family": "Indo-European",
|
3353 |
"flores_path": "fra_Latn",
|
3354 |
"fleurs_tag": "fr_fr",
|
3355 |
-
"commonvoice_hours":
|
3356 |
"commonvoice_locale": "fr",
|
3357 |
"in_benchmark": true,
|
3358 |
"task": "translation",
|
@@ -3368,7 +3368,7 @@
|
|
3368 |
"family": "Indo-European",
|
3369 |
"flores_path": "fra_Latn",
|
3370 |
"fleurs_tag": "fr_fr",
|
3371 |
-
"commonvoice_hours":
|
3372 |
"commonvoice_locale": "fr",
|
3373 |
"in_benchmark": true,
|
3374 |
"task": "translation",
|
@@ -4952,7 +4952,7 @@
|
|
4952 |
"family": "Abkhaz-Adyge",
|
4953 |
"flores_path": null,
|
4954 |
"fleurs_tag": null,
|
4955 |
-
"commonvoice_hours":
|
4956 |
"commonvoice_locale": "kbd",
|
4957 |
"in_benchmark": false,
|
4958 |
"task": null,
|
@@ -5752,7 +5752,7 @@
|
|
5752 |
"family": "Indo-European",
|
5753 |
"flores_path": null,
|
5754 |
"fleurs_tag": null,
|
5755 |
-
"commonvoice_hours":
|
5756 |
"commonvoice_locale": "kw",
|
5757 |
"in_benchmark": false,
|
5758 |
"task": null,
|
@@ -6296,7 +6296,7 @@
|
|
6296 |
"family": "Indo-European",
|
6297 |
"flores_path": "ltg_Latn",
|
6298 |
"fleurs_tag": null,
|
6299 |
-
"commonvoice_hours":
|
6300 |
"commonvoice_locale": "ltg",
|
6301 |
"in_benchmark": true,
|
6302 |
"task": null,
|
@@ -8360,7 +8360,7 @@
|
|
8360 |
"family": "Indo-European",
|
8361 |
"flores_path": null,
|
8362 |
"fleurs_tag": "ps_af",
|
8363 |
-
"commonvoice_hours":
|
8364 |
"commonvoice_locale": "ps",
|
8365 |
"in_benchmark": false,
|
8366 |
"task": null,
|
@@ -8952,7 +8952,7 @@
|
|
8952 |
"family": "Turkic",
|
8953 |
"flores_path": null,
|
8954 |
"fleurs_tag": null,
|
8955 |
-
"commonvoice_hours":
|
8956 |
"commonvoice_locale": "sah",
|
8957 |
"in_benchmark": false,
|
8958 |
"task": null,
|
@@ -9224,7 +9224,7 @@
|
|
9224 |
"family": null,
|
9225 |
"flores_path": null,
|
9226 |
"fleurs_tag": null,
|
9227 |
-
"commonvoice_hours": 1.
|
9228 |
"commonvoice_locale": "sei",
|
9229 |
"in_benchmark": false,
|
9230 |
"task": null,
|
@@ -9352,7 +9352,7 @@
|
|
9352 |
"family": "Indo-European",
|
9353 |
"flores_path": "slk_Latn",
|
9354 |
"fleurs_tag": "sk_sk",
|
9355 |
-
"commonvoice_hours":
|
9356 |
"commonvoice_locale": "sk",
|
9357 |
"in_benchmark": true,
|
9358 |
"task": null,
|
@@ -10200,7 +10200,7 @@
|
|
10200 |
"family": "Afro-Asiatic",
|
10201 |
"flores_path": null,
|
10202 |
"fleurs_tag": null,
|
10203 |
-
"commonvoice_hours":
|
10204 |
"commonvoice_locale": "tig",
|
10205 |
"in_benchmark": false,
|
10206 |
"task": null,
|
@@ -10712,7 +10712,7 @@
|
|
10712 |
"family": "Turkic",
|
10713 |
"flores_path": "uig_Arab",
|
10714 |
"fleurs_tag": null,
|
10715 |
-
"commonvoice_hours":
|
10716 |
"commonvoice_locale": "ug",
|
10717 |
"in_benchmark": true,
|
10718 |
"task": null,
|
@@ -10728,7 +10728,7 @@
|
|
10728 |
"family": "Indo-European",
|
10729 |
"flores_path": "ukr_Cyrl",
|
10730 |
"fleurs_tag": "uk_ua",
|
10731 |
-
"commonvoice_hours":
|
10732 |
"commonvoice_locale": "uk",
|
10733 |
"in_benchmark": true,
|
10734 |
"task": null,
|
|
|
616 |
"family": "Indo-European",
|
617 |
"flores_path": null,
|
618 |
"fleurs_tag": null,
|
619 |
+
"commonvoice_hours": 14.0,
|
620 |
"commonvoice_locale": "an",
|
621 |
"in_benchmark": false,
|
622 |
"task": null,
|
|
|
1992 |
"family": "Indo-European",
|
1993 |
"flores_path": "cat_Latn",
|
1994 |
"fleurs_tag": "ca_es",
|
1995 |
+
"commonvoice_hours": 2844.0,
|
1996 |
"commonvoice_locale": "ca",
|
1997 |
"in_benchmark": true,
|
1998 |
"task": null,
|
|
|
2584 |
"family": "Indo-European",
|
2585 |
"flores_path": "deu_Latn",
|
2586 |
"fleurs_tag": "de_de",
|
2587 |
+
"commonvoice_hours": 1360.0,
|
2588 |
"commonvoice_locale": "de",
|
2589 |
"in_benchmark": true,
|
2590 |
"task": null,
|
|
|
2904 |
"family": "Indo-European",
|
2905 |
"flores_path": "eng_Latn",
|
2906 |
"fleurs_tag": "en_us",
|
2907 |
+
"commonvoice_hours": 2653.0,
|
2908 |
"commonvoice_locale": "en",
|
2909 |
"in_benchmark": true,
|
2910 |
"task": "classification",
|
|
|
2920 |
"family": "Indo-European",
|
2921 |
"flores_path": "eng_Latn",
|
2922 |
"fleurs_tag": "en_us",
|
2923 |
+
"commonvoice_hours": 2653.0,
|
2924 |
"commonvoice_locale": "en",
|
2925 |
"in_benchmark": true,
|
2926 |
"task": "language_modeling",
|
|
|
2936 |
"family": "Indo-European",
|
2937 |
"flores_path": "eng_Latn",
|
2938 |
"fleurs_tag": "en_us",
|
2939 |
+
"commonvoice_hours": 2653.0,
|
2940 |
"commonvoice_locale": "en",
|
2941 |
"in_benchmark": true,
|
2942 |
"task": "translation",
|
|
|
2952 |
"family": "Indo-European",
|
2953 |
"flores_path": "eng_Latn",
|
2954 |
"fleurs_tag": "en_us",
|
2955 |
+
"commonvoice_hours": 2653.0,
|
2956 |
"commonvoice_locale": "en",
|
2957 |
"in_benchmark": true,
|
2958 |
"task": "translation",
|
|
|
3320 |
"family": "Indo-European",
|
3321 |
"flores_path": "fra_Latn",
|
3322 |
"fleurs_tag": "fr_fr",
|
3323 |
+
"commonvoice_hours": 1053.0,
|
3324 |
"commonvoice_locale": "fr",
|
3325 |
"in_benchmark": true,
|
3326 |
"task": "classification",
|
|
|
3336 |
"family": "Indo-European",
|
3337 |
"flores_path": "fra_Latn",
|
3338 |
"fleurs_tag": "fr_fr",
|
3339 |
+
"commonvoice_hours": 1053.0,
|
3340 |
"commonvoice_locale": "fr",
|
3341 |
"in_benchmark": true,
|
3342 |
"task": "language_modeling",
|
|
|
3352 |
"family": "Indo-European",
|
3353 |
"flores_path": "fra_Latn",
|
3354 |
"fleurs_tag": "fr_fr",
|
3355 |
+
"commonvoice_hours": 1053.0,
|
3356 |
"commonvoice_locale": "fr",
|
3357 |
"in_benchmark": true,
|
3358 |
"task": "translation",
|
|
|
3368 |
"family": "Indo-European",
|
3369 |
"flores_path": "fra_Latn",
|
3370 |
"fleurs_tag": "fr_fr",
|
3371 |
+
"commonvoice_hours": 1053.0,
|
3372 |
"commonvoice_locale": "fr",
|
3373 |
"in_benchmark": true,
|
3374 |
"task": "translation",
|
|
|
4952 |
"family": "Abkhaz-Adyge",
|
4953 |
"flores_path": null,
|
4954 |
"fleurs_tag": null,
|
4955 |
+
"commonvoice_hours": 18.0,
|
4956 |
"commonvoice_locale": "kbd",
|
4957 |
"in_benchmark": false,
|
4958 |
"task": null,
|
|
|
5752 |
"family": "Indo-European",
|
5753 |
"flores_path": null,
|
5754 |
"fleurs_tag": null,
|
5755 |
+
"commonvoice_hours": 3.4,
|
5756 |
"commonvoice_locale": "kw",
|
5757 |
"in_benchmark": false,
|
5758 |
"task": null,
|
|
|
6296 |
"family": "Indo-European",
|
6297 |
"flores_path": "ltg_Latn",
|
6298 |
"fleurs_tag": null,
|
6299 |
+
"commonvoice_hours": 29.0,
|
6300 |
"commonvoice_locale": "ltg",
|
6301 |
"in_benchmark": true,
|
6302 |
"task": null,
|
|
|
8360 |
"family": "Indo-European",
|
8361 |
"flores_path": null,
|
8362 |
"fleurs_tag": "ps_af",
|
8363 |
+
"commonvoice_hours": 80.0,
|
8364 |
"commonvoice_locale": "ps",
|
8365 |
"in_benchmark": false,
|
8366 |
"task": null,
|
|
|
8952 |
"family": "Turkic",
|
8953 |
"flores_path": null,
|
8954 |
"fleurs_tag": null,
|
8955 |
+
"commonvoice_hours": 11.0,
|
8956 |
"commonvoice_locale": "sah",
|
8957 |
"in_benchmark": false,
|
8958 |
"task": null,
|
|
|
9224 |
"family": null,
|
9225 |
"flores_path": null,
|
9226 |
"fleurs_tag": null,
|
9227 |
+
"commonvoice_hours": 1.4,
|
9228 |
"commonvoice_locale": "sei",
|
9229 |
"in_benchmark": false,
|
9230 |
"task": null,
|
|
|
9352 |
"family": "Indo-European",
|
9353 |
"flores_path": "slk_Latn",
|
9354 |
"fleurs_tag": "sk_sk",
|
9355 |
+
"commonvoice_hours": 47.0,
|
9356 |
"commonvoice_locale": "sk",
|
9357 |
"in_benchmark": true,
|
9358 |
"task": null,
|
|
|
10200 |
"family": "Afro-Asiatic",
|
10201 |
"flores_path": null,
|
10202 |
"fleurs_tag": null,
|
10203 |
+
"commonvoice_hours": 11.0,
|
10204 |
"commonvoice_locale": "tig",
|
10205 |
"in_benchmark": false,
|
10206 |
"task": null,
|
|
|
10712 |
"family": "Turkic",
|
10713 |
"flores_path": "uig_Arab",
|
10714 |
"fleurs_tag": null,
|
10715 |
+
"commonvoice_hours": 365.0,
|
10716 |
"commonvoice_locale": "ug",
|
10717 |
"in_benchmark": true,
|
10718 |
"task": null,
|
|
|
10728 |
"family": "Indo-European",
|
10729 |
"flores_path": "ukr_Cyrl",
|
10730 |
"fleurs_tag": "uk_ua",
|
10731 |
+
"commonvoice_hours": 99.0,
|
10732 |
"commonvoice_locale": "uk",
|
10733 |
"in_benchmark": true,
|
10734 |
"task": null,
|