David Pomerenke
commited on
Commit
·
8190782
1
Parent(s):
d5fc8b3
Add links to add CommonVoice recordings
Browse files- app.py +3 -2
- evals.py +23 -7
- results.json +20 -10
app.py
CHANGED
@@ -178,6 +178,7 @@ def create_language_stats_df(results):
|
|
178 |
model = best_score['model']
|
179 |
model_name = model.split('/')[-1] if model else "N/A"
|
180 |
model_link = f"<a href='https://openrouter.ai/{model}' style='text-decoration: none; color: inherit;'>{model_name}</a>" if model else "N/A"
|
|
|
181 |
row = {
|
182 |
"Language": f"**{lang['language_name']}**",
|
183 |
"Speakers (M)": round(lang["speakers"] / 1_000_000, 1),
|
@@ -189,7 +190,7 @@ def create_language_stats_df(results):
|
|
189 |
"Best Model BLEU": round(best_score["bleu"], 3)
|
190 |
if best_score["bleu"] is not None
|
191 |
else "N/A",
|
192 |
-
"CommonVoice Hours":
|
193 |
}
|
194 |
flat_data.append(row)
|
195 |
|
@@ -198,7 +199,7 @@ def create_language_stats_df(results):
|
|
198 |
value=df,
|
199 |
label="Language Results",
|
200 |
show_search="search",
|
201 |
-
datatype=["markdown", "number", "number", "number", "markdown", "number"],
|
202 |
)
|
203 |
|
204 |
|
|
|
178 |
model = best_score['model']
|
179 |
model_name = model.split('/')[-1] if model else "N/A"
|
180 |
model_link = f"<a href='https://openrouter.ai/{model}' style='text-decoration: none; color: inherit;'>{model_name}</a>" if model else "N/A"
|
181 |
+
commonvoice_link = f"<!--{lang['commonvoice_hours']:07} (for sorting)--> <a href='https://commonvoice.mozilla.org/{lang['commonvoice_locale']}/speak' style='text-decoration: none; color: inherit;'>🎙️ {lang['commonvoice_hours']}</a>" if lang["commonvoice_hours"] else "N/A"
|
182 |
row = {
|
183 |
"Language": f"**{lang['language_name']}**",
|
184 |
"Speakers (M)": round(lang["speakers"] / 1_000_000, 1),
|
|
|
190 |
"Best Model BLEU": round(best_score["bleu"], 3)
|
191 |
if best_score["bleu"] is not None
|
192 |
else "N/A",
|
193 |
+
"CommonVoice Hours": commonvoice_link,
|
194 |
}
|
195 |
flat_data.append(row)
|
196 |
|
|
|
199 |
value=df,
|
200 |
label="Language Results",
|
201 |
show_search="search",
|
202 |
+
datatype=["markdown", "number", "number", "number", "markdown", "number", "markdown"],
|
203 |
)
|
204 |
|
205 |
|
evals.py
CHANGED
@@ -61,11 +61,15 @@ languages = pd.DataFrame(list(languages.items()), columns=["bcp_47", "speakers"]
|
|
61 |
languages["name"] = languages["bcp_47"].apply(lambda x: Language.get(x).display_name())
|
62 |
|
63 |
# load script codes and names
|
64 |
-
scripts = pd.read_csv("data/ScriptCodes.csv").rename(
|
|
|
|
|
|
|
65 |
|
66 |
def script_name(iso15924):
|
67 |
return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
|
68 |
|
|
|
69 |
# load benchmark languages and scripts
|
70 |
benchmark_dir = "data/floresp-v2.0-rc.3/dev"
|
71 |
benchmark_languages = pd.DataFrame(
|
@@ -94,16 +98,20 @@ def get_commonvoice_stats(date: date):
|
|
94 |
|
95 |
|
96 |
commonvoice_stats = pd.DataFrame(get_commonvoice_stats(date.today())).rename(
|
97 |
-
columns={"locale": "
|
98 |
-
)[["
|
99 |
# ignore country (language is language) (in practive this is only relevant to zh-CN/zh-TW/zh-HK)
|
100 |
-
commonvoice_stats["bcp_47"] = commonvoice_stats["
|
101 |
lambda x: re.sub(r"-[A-Z]{2}$", "", x)
|
102 |
)
|
103 |
commonvoice_stats["bcp_47"] = commonvoice_stats["bcp_47"].apply(
|
104 |
lambda x: standardize_tag(x, macro=True)
|
105 |
) # this does not really seem to get macrolanguages though, e.g. not for Quechua
|
106 |
-
commonvoice_stats =
|
|
|
|
|
|
|
|
|
107 |
|
108 |
# merge data
|
109 |
languages = pd.merge(
|
@@ -149,6 +157,7 @@ async def complete(**kwargs):
|
|
149 |
raise Exception(response)
|
150 |
return response
|
151 |
|
|
|
152 |
async def translate(model, target_language, sentence):
|
153 |
script = script_name(target_language.iso15924)
|
154 |
reply = await complete(
|
@@ -170,7 +179,9 @@ def mean(l):
|
|
170 |
|
171 |
|
172 |
def load_sentences(language):
|
173 |
-
return open(
|
|
|
|
|
174 |
|
175 |
|
176 |
# evaluation!
|
@@ -196,7 +207,11 @@ async def main():
|
|
196 |
original_sentences, target_languages.itertuples()
|
197 |
)
|
198 |
]
|
199 |
-
predictions = await tqdm_asyncio.gather(
|
|
|
|
|
|
|
|
|
200 |
target_sentences = [
|
201 |
load_sentences(lang)[i]
|
202 |
for i, lang in enumerate(target_languages.itertuples())
|
@@ -227,6 +242,7 @@ async def main():
|
|
227 |
"bleu": mean([s["bleu"] for s in scores]) if scores else None,
|
228 |
# "bert_score": mean([s["bert_score"] for s in scores]),
|
229 |
"commonvoice_hours": language.commonvoice_hours,
|
|
|
230 |
}
|
231 |
)
|
232 |
with open("results.json", "w") as f:
|
|
|
61 |
languages["name"] = languages["bcp_47"].apply(lambda x: Language.get(x).display_name())
|
62 |
|
63 |
# load script codes and names
|
64 |
+
scripts = pd.read_csv("data/ScriptCodes.csv").rename(
|
65 |
+
columns={"Code": "iso15924", "English Name": "script_name"}
|
66 |
+
)
|
67 |
+
|
68 |
|
69 |
def script_name(iso15924):
|
70 |
return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
|
71 |
|
72 |
+
|
73 |
# load benchmark languages and scripts
|
74 |
benchmark_dir = "data/floresp-v2.0-rc.3/dev"
|
75 |
benchmark_languages = pd.DataFrame(
|
|
|
98 |
|
99 |
|
100 |
commonvoice_stats = pd.DataFrame(get_commonvoice_stats(date.today())).rename(
|
101 |
+
columns={"locale": "commonvoice_locale", "validatedHours": "commonvoice_hours"}
|
102 |
+
)[["commonvoice_locale", "commonvoice_hours"]]
|
103 |
# ignore country (language is language) (in practive this is only relevant to zh-CN/zh-TW/zh-HK)
|
104 |
+
commonvoice_stats["bcp_47"] = commonvoice_stats["commonvoice_locale"].apply(
|
105 |
lambda x: re.sub(r"-[A-Z]{2}$", "", x)
|
106 |
)
|
107 |
commonvoice_stats["bcp_47"] = commonvoice_stats["bcp_47"].apply(
|
108 |
lambda x: standardize_tag(x, macro=True)
|
109 |
) # this does not really seem to get macrolanguages though, e.g. not for Quechua
|
110 |
+
commonvoice_stats = (
|
111 |
+
commonvoice_stats.groupby("bcp_47")
|
112 |
+
.agg({"commonvoice_hours": "sum", "commonvoice_locale": "first"})
|
113 |
+
.reset_index()
|
114 |
+
)
|
115 |
|
116 |
# merge data
|
117 |
languages = pd.merge(
|
|
|
157 |
raise Exception(response)
|
158 |
return response
|
159 |
|
160 |
+
|
161 |
async def translate(model, target_language, sentence):
|
162 |
script = script_name(target_language.iso15924)
|
163 |
reply = await complete(
|
|
|
179 |
|
180 |
|
181 |
def load_sentences(language):
|
182 |
+
return open(
|
183 |
+
f"{benchmark_dir}/dev.{language.iso639_3}_{language.iso15924}"
|
184 |
+
).readlines()
|
185 |
|
186 |
|
187 |
# evaluation!
|
|
|
207 |
original_sentences, target_languages.itertuples()
|
208 |
)
|
209 |
]
|
210 |
+
predictions = await tqdm_asyncio.gather(
|
211 |
+
*predictions,
|
212 |
+
miniters=1,
|
213 |
+
desc=f"{language.name} {model.split('/')[0]}",
|
214 |
+
)
|
215 |
target_sentences = [
|
216 |
load_sentences(lang)[i]
|
217 |
for i, lang in enumerate(target_languages.itertuples())
|
|
|
242 |
"bleu": mean([s["bleu"] for s in scores]) if scores else None,
|
243 |
# "bert_score": mean([s["bert_score"] for s in scores]),
|
244 |
"commonvoice_hours": language.commonvoice_hours,
|
245 |
+
"commonvoice_locale": language.commonvoice_locale,
|
246 |
}
|
247 |
)
|
248 |
with open("results.json", "w") as f:
|
results.json
CHANGED
@@ -10,7 +10,8 @@
|
|
10 |
}
|
11 |
],
|
12 |
"bleu": 0.4931825583688982,
|
13 |
-
"commonvoice_hours": 2649.0
|
|
|
14 |
},
|
15 |
{
|
16 |
"language_name": "Chinese",
|
@@ -43,7 +44,8 @@
|
|
43 |
}
|
44 |
],
|
45 |
"bleu": 0.4356399559223496,
|
46 |
-
"commonvoice_hours": 422.0
|
|
|
47 |
},
|
48 |
{
|
49 |
"language_name": "Hindi",
|
@@ -56,7 +58,8 @@
|
|
56 |
}
|
57 |
],
|
58 |
"bleu": 0.42910938007537924,
|
59 |
-
"commonvoice_hours": 16.0
|
|
|
60 |
},
|
61 |
{
|
62 |
"language_name": "Spanish",
|
@@ -69,7 +72,8 @@
|
|
69 |
}
|
70 |
],
|
71 |
"bleu": 0.3335615012680206,
|
72 |
-
"commonvoice_hours": 446.0
|
|
|
73 |
},
|
74 |
{
|
75 |
"language_name": "Arabic",
|
@@ -82,7 +86,8 @@
|
|
82 |
}
|
83 |
],
|
84 |
"bleu": 0.19072998559991275,
|
85 |
-
"commonvoice_hours": 91.0
|
|
|
86 |
},
|
87 |
{
|
88 |
"language_name": "Urdu",
|
@@ -115,7 +120,8 @@
|
|
115 |
}
|
116 |
],
|
117 |
"bleu": 0.32276445473356513,
|
118 |
-
"commonvoice_hours": 76.0
|
|
|
119 |
},
|
120 |
{
|
121 |
"language_name": "French",
|
@@ -128,7 +134,8 @@
|
|
128 |
}
|
129 |
],
|
130 |
"bleu": 0.40595466651226686,
|
131 |
-
"commonvoice_hours": 1051.0
|
|
|
132 |
},
|
133 |
{
|
134 |
"language_name": "Bangla",
|
@@ -141,7 +148,8 @@
|
|
141 |
}
|
142 |
],
|
143 |
"bleu": 0.30570858536443696,
|
144 |
-
"commonvoice_hours": 49.0
|
|
|
145 |
},
|
146 |
{
|
147 |
"language_name": "Portuguese",
|
@@ -174,7 +182,8 @@
|
|
174 |
}
|
175 |
],
|
176 |
"bleu": 0.3778453994295843,
|
177 |
-
"commonvoice_hours": 176.0
|
|
|
178 |
},
|
179 |
{
|
180 |
"language_name": "Punjabi",
|
@@ -187,6 +196,7 @@
|
|
187 |
}
|
188 |
],
|
189 |
"bleu": 0.34311946995454473,
|
190 |
-
"commonvoice_hours": 2.3
|
|
|
191 |
}
|
192 |
]
|
|
|
10 |
}
|
11 |
],
|
12 |
"bleu": 0.4931825583688982,
|
13 |
+
"commonvoice_hours": 2649.0,
|
14 |
+
"commonvoice_locale": "en"
|
15 |
},
|
16 |
{
|
17 |
"language_name": "Chinese",
|
|
|
44 |
}
|
45 |
],
|
46 |
"bleu": 0.4356399559223496,
|
47 |
+
"commonvoice_hours": 422.0,
|
48 |
+
"commonvoice_locale": "zh-TW"
|
49 |
},
|
50 |
{
|
51 |
"language_name": "Hindi",
|
|
|
58 |
}
|
59 |
],
|
60 |
"bleu": 0.42910938007537924,
|
61 |
+
"commonvoice_hours": 16.0,
|
62 |
+
"commonvoice_locale": "hi-IN"
|
63 |
},
|
64 |
{
|
65 |
"language_name": "Spanish",
|
|
|
72 |
}
|
73 |
],
|
74 |
"bleu": 0.3335615012680206,
|
75 |
+
"commonvoice_hours": 446.0,
|
76 |
+
"commonvoice_locale": "es"
|
77 |
},
|
78 |
{
|
79 |
"language_name": "Arabic",
|
|
|
86 |
}
|
87 |
],
|
88 |
"bleu": 0.19072998559991275,
|
89 |
+
"commonvoice_hours": 91.0,
|
90 |
+
"commonvoice_locale": "ar"
|
91 |
},
|
92 |
{
|
93 |
"language_name": "Urdu",
|
|
|
120 |
}
|
121 |
],
|
122 |
"bleu": 0.32276445473356513,
|
123 |
+
"commonvoice_hours": 76.0,
|
124 |
+
"commonvoice_locale": "ur"
|
125 |
},
|
126 |
{
|
127 |
"language_name": "French",
|
|
|
134 |
}
|
135 |
],
|
136 |
"bleu": 0.40595466651226686,
|
137 |
+
"commonvoice_hours": 1051.0,
|
138 |
+
"commonvoice_locale": "fr"
|
139 |
},
|
140 |
{
|
141 |
"language_name": "Bangla",
|
|
|
148 |
}
|
149 |
],
|
150 |
"bleu": 0.30570858536443696,
|
151 |
+
"commonvoice_hours": 49.0,
|
152 |
+
"commonvoice_locale": "bn"
|
153 |
},
|
154 |
{
|
155 |
"language_name": "Portuguese",
|
|
|
182 |
}
|
183 |
],
|
184 |
"bleu": 0.3778453994295843,
|
185 |
+
"commonvoice_hours": 176.0,
|
186 |
+
"commonvoice_locale": "pt"
|
187 |
},
|
188 |
{
|
189 |
"language_name": "Punjabi",
|
|
|
196 |
}
|
197 |
],
|
198 |
"bleu": 0.34311946995454473,
|
199 |
+
"commonvoice_hours": 2.3,
|
200 |
+
"commonvoice_locale": "pa-IN"
|
201 |
}
|
202 |
]
|