Spaces:

fair-forward
/

evals-for-every-language

Running

App Files Files Community

David Pomerenke commited on Feb 21

Commit

8190782

1 Parent(s): d5fc8b3

Add links to add CommonVoice recordings

Browse files

Files changed (3) hide show

app.py +3 -2
evals.py +23 -7
results.json +20 -10

app.py CHANGED Viewed

@@ -178,6 +178,7 @@ def create_language_stats_df(results):
         model = best_score['model']
         model_name = model.split('/')[-1] if model else "N/A"
         model_link = f"<a href='https://openrouter.ai/{model}' style='text-decoration: none; color: inherit;'>{model_name}</a>" if model else "N/A"
         row = {
             "Language": f"**{lang['language_name']}**",
             "Speakers (M)": round(lang["speakers"] / 1_000_000, 1),
@@ -189,7 +190,7 @@ def create_language_stats_df(results):
             "Best Model BLEU": round(best_score["bleu"], 3)
             if best_score["bleu"] is not None
             else "N/A",
-            "CommonVoice Hours": lang["commonvoice_hours"],
         }
         flat_data.append(row)
@@ -198,7 +199,7 @@ def create_language_stats_df(results):
         value=df,
         label="Language Results",
         show_search="search",
-        datatype=["markdown", "number", "number", "number", "markdown", "number"],
     )

         model = best_score['model']
         model_name = model.split('/')[-1] if model else "N/A"
         model_link = f"<a href='https://openrouter.ai/{model}' style='text-decoration: none; color: inherit;'>{model_name}</a>" if model else "N/A"
+        commonvoice_link = f"<!--{lang['commonvoice_hours']:07} (for sorting)--> <a href='https://commonvoice.mozilla.org/{lang['commonvoice_locale']}/speak' style='text-decoration: none; color: inherit;'>🎙️ {lang['commonvoice_hours']}</a>" if lang["commonvoice_hours"] else "N/A"
         row = {
             "Language": f"**{lang['language_name']}**",
             "Speakers (M)": round(lang["speakers"] / 1_000_000, 1),
             "Best Model BLEU": round(best_score["bleu"], 3)
             if best_score["bleu"] is not None
             else "N/A",
+            "CommonVoice Hours": commonvoice_link,
         }
         flat_data.append(row)
         value=df,
         label="Language Results",
         show_search="search",
+        datatype=["markdown", "number", "number", "number", "markdown", "number", "markdown"],
     )

evals.py CHANGED Viewed

@@ -61,11 +61,15 @@ languages = pd.DataFrame(list(languages.items()), columns=["bcp_47", "speakers"]
 languages["name"] = languages["bcp_47"].apply(lambda x: Language.get(x).display_name())
 # load script codes and names
-scripts = pd.read_csv("data/ScriptCodes.csv").rename(columns={"Code": "iso15924", "English Name": "script_name"})
 def script_name(iso15924):
     return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
 # load benchmark languages and scripts
 benchmark_dir = "data/floresp-v2.0-rc.3/dev"
 benchmark_languages = pd.DataFrame(
@@ -94,16 +98,20 @@ def get_commonvoice_stats(date: date):
 commonvoice_stats = pd.DataFrame(get_commonvoice_stats(date.today())).rename(
-    columns={"locale": "bcp_47", "validatedHours": "commonvoice_hours"}
-)[["bcp_47", "commonvoice_hours"]]
 # ignore country (language is language) (in practive this is only relevant to zh-CN/zh-TW/zh-HK)
-commonvoice_stats["bcp_47"] = commonvoice_stats["bcp_47"].apply(
     lambda x: re.sub(r"-[A-Z]{2}$", "", x)
 )
 commonvoice_stats["bcp_47"] = commonvoice_stats["bcp_47"].apply(
     lambda x: standardize_tag(x, macro=True)
 )  # this does not really seem to get macrolanguages though, e.g. not for Quechua
-commonvoice_stats = commonvoice_stats.groupby("bcp_47").sum().reset_index()
 # merge data
 languages = pd.merge(
@@ -149,6 +157,7 @@ async def complete(**kwargs):
         raise Exception(response)
     return response
 async def translate(model, target_language, sentence):
     script = script_name(target_language.iso15924)
     reply = await complete(
@@ -170,7 +179,9 @@ def mean(l):
 def load_sentences(language):
-    return open(f"{benchmark_dir}/dev.{language.iso639_3}_{language.iso15924}").readlines()
 # evaluation!
@@ -196,7 +207,11 @@ async def main():
                         original_sentences, target_languages.itertuples()
                     )
                 ]
-                predictions = await tqdm_asyncio.gather(*predictions, miniters=1, desc=f"{language.name} {model.split('/')[0]}")
                 target_sentences = [
                     load_sentences(lang)[i]
                     for i, lang in enumerate(target_languages.itertuples())
@@ -227,6 +242,7 @@ async def main():
                 "bleu": mean([s["bleu"] for s in scores]) if scores else None,
                 # "bert_score": mean([s["bert_score"] for s in scores]),
                 "commonvoice_hours": language.commonvoice_hours,
             }
         )
     with open("results.json", "w") as f:

 languages["name"] = languages["bcp_47"].apply(lambda x: Language.get(x).display_name())
 # load script codes and names
+scripts = pd.read_csv("data/ScriptCodes.csv").rename(
+    columns={"Code": "iso15924", "English Name": "script_name"}
+)
 def script_name(iso15924):
     return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
 # load benchmark languages and scripts
 benchmark_dir = "data/floresp-v2.0-rc.3/dev"
 benchmark_languages = pd.DataFrame(
 commonvoice_stats = pd.DataFrame(get_commonvoice_stats(date.today())).rename(
+    columns={"locale": "commonvoice_locale", "validatedHours": "commonvoice_hours"}
+)[["commonvoice_locale", "commonvoice_hours"]]
 # ignore country (language is language) (in practive this is only relevant to zh-CN/zh-TW/zh-HK)
+commonvoice_stats["bcp_47"] = commonvoice_stats["commonvoice_locale"].apply(
     lambda x: re.sub(r"-[A-Z]{2}$", "", x)
 )
 commonvoice_stats["bcp_47"] = commonvoice_stats["bcp_47"].apply(
     lambda x: standardize_tag(x, macro=True)
 )  # this does not really seem to get macrolanguages though, e.g. not for Quechua
+commonvoice_stats = (
+    commonvoice_stats.groupby("bcp_47")
+    .agg({"commonvoice_hours": "sum", "commonvoice_locale": "first"})
+    .reset_index()
+)
 # merge data
 languages = pd.merge(
         raise Exception(response)
     return response
 async def translate(model, target_language, sentence):
     script = script_name(target_language.iso15924)
     reply = await complete(
 def load_sentences(language):
+    return open(
+        f"{benchmark_dir}/dev.{language.iso639_3}_{language.iso15924}"
+    ).readlines()
 # evaluation!
                         original_sentences, target_languages.itertuples()
                     )
                 ]
+                predictions = await tqdm_asyncio.gather(
+                    *predictions,
+                    miniters=1,
+                    desc=f"{language.name} {model.split('/')[0]}",
+                )
                 target_sentences = [
                     load_sentences(lang)[i]
                     for i, lang in enumerate(target_languages.itertuples())
                 "bleu": mean([s["bleu"] for s in scores]) if scores else None,
                 # "bert_score": mean([s["bert_score"] for s in scores]),
                 "commonvoice_hours": language.commonvoice_hours,
+                "commonvoice_locale": language.commonvoice_locale,
             }
         )
     with open("results.json", "w") as f:

results.json CHANGED Viewed

@@ -10,7 +10,8 @@
       }
     ],
     "bleu": 0.4931825583688982,
-    "commonvoice_hours": 2649.0
   },
   {
     "language_name": "Chinese",
@@ -43,7 +44,8 @@
       }
     ],
     "bleu": 0.4356399559223496,
-    "commonvoice_hours": 422.0
   },
   {
     "language_name": "Hindi",
@@ -56,7 +58,8 @@
       }
     ],
     "bleu": 0.42910938007537924,
-    "commonvoice_hours": 16.0
   },
   {
     "language_name": "Spanish",
@@ -69,7 +72,8 @@
       }
     ],
     "bleu": 0.3335615012680206,
-    "commonvoice_hours": 446.0
   },
   {
     "language_name": "Arabic",
@@ -82,7 +86,8 @@
       }
     ],
     "bleu": 0.19072998559991275,
-    "commonvoice_hours": 91.0
   },
   {
     "language_name": "Urdu",
@@ -115,7 +120,8 @@
       }
     ],
     "bleu": 0.32276445473356513,
-    "commonvoice_hours": 76.0
   },
   {
     "language_name": "French",
@@ -128,7 +134,8 @@
       }
     ],
     "bleu": 0.40595466651226686,
-    "commonvoice_hours": 1051.0
   },
   {
     "language_name": "Bangla",
@@ -141,7 +148,8 @@
       }
     ],
     "bleu": 0.30570858536443696,
-    "commonvoice_hours": 49.0
   },
   {
     "language_name": "Portuguese",
@@ -174,7 +182,8 @@
       }
     ],
     "bleu": 0.3778453994295843,
-    "commonvoice_hours": 176.0
   },
   {
     "language_name": "Punjabi",
@@ -187,6 +196,7 @@
       }
     ],
     "bleu": 0.34311946995454473,
-    "commonvoice_hours": 2.3
   }
 ]

       }
     ],
     "bleu": 0.4931825583688982,
+    "commonvoice_hours": 2649.0,
+    "commonvoice_locale": "en"
   },
   {
     "language_name": "Chinese",
       }
     ],
     "bleu": 0.4356399559223496,
+    "commonvoice_hours": 422.0,
+    "commonvoice_locale": "zh-TW"
   },
   {
     "language_name": "Hindi",
       }
     ],
     "bleu": 0.42910938007537924,
+    "commonvoice_hours": 16.0,
+    "commonvoice_locale": "hi-IN"
   },
   {
     "language_name": "Spanish",
       }
     ],
     "bleu": 0.3335615012680206,
+    "commonvoice_hours": 446.0,
+    "commonvoice_locale": "es"
   },
   {
     "language_name": "Arabic",
       }
     ],
     "bleu": 0.19072998559991275,
+    "commonvoice_hours": 91.0,
+    "commonvoice_locale": "ar"
   },
   {
     "language_name": "Urdu",
       }
     ],
     "bleu": 0.32276445473356513,
+    "commonvoice_hours": 76.0,
+    "commonvoice_locale": "ur"
   },
   {
     "language_name": "French",
       }
     ],
     "bleu": 0.40595466651226686,
+    "commonvoice_hours": 1051.0,
+    "commonvoice_locale": "fr"
   },
   {
     "language_name": "Bangla",
       }
     ],
     "bleu": 0.30570858536443696,
+    "commonvoice_hours": 49.0,
+    "commonvoice_locale": "bn"
   },
   {
     "language_name": "Portuguese",
       }
     ],
     "bleu": 0.3778453994295843,
+    "commonvoice_hours": 176.0,
+    "commonvoice_locale": "pt"
   },
   {
     "language_name": "Punjabi",
       }
     ],
     "bleu": 0.34311946995454473,
+    "commonvoice_hours": 2.3,
+    "commonvoice_locale": "pa-IN"
   }
 ]