David Pomerenke commited on
Commit
80a0827
Β·
1 Parent(s): b1da19b

Run on 50 languages

Browse files
Files changed (3) hide show
  1. app.py +5 -4
  2. evals.py +12 -7
  3. results.json +710 -300
app.py CHANGED
@@ -137,17 +137,18 @@ def create_leaderboard_df(model_type, metric=None):
137
  "model_": "Model",
138
  }
139
  )
 
140
  df = df.sort_values(metric["label"], ascending=False)
141
  df["Rank"] = range(1, len(df) + 1)
142
  df["Rank"] = df["Rank"].apply(
143
  lambda x: "πŸ₯‡" if x == 1 else "πŸ₯ˆ" if x == 2 else "πŸ₯‰" if x == 3 else str(x)
144
  )
145
- df = df[["Rank", "Model", metric["label"], "Languages Tested"]]
146
  return gr.DataFrame(
147
  value=df,
148
  label="Model Leaderboard",
149
  show_search=False,
150
- datatype=["number", "markdown", "number", "number"],
151
  )
152
 
153
 
@@ -321,7 +322,7 @@ def create_scatter_plot(metric):
321
  sorted(set(data["family"] for data in scatter_data)), color_pallette
322
  )
323
  }
324
- c_vals = [color_mapping[data["family"]] for data in scatter_data]
325
  labels = [data["language"] for data in scatter_data]
326
  hover_template = f"<b>%{{text}}</b><br>Speakers: %{{x:.1f}}M<br>{metric['label']}: %{{y:.3f}}<extra></extra>"
327
  fig.add_trace(
@@ -447,7 +448,7 @@ def create_world_map(metric):
447
  hover_texts = []
448
 
449
  for country_code, data in country_data.items():
450
- weighted_avg = data["weighted_score_sum"] / data["total_speakers"]
451
 
452
  try:
453
  country_name = pycountry.countries.get(alpha_3=country_code).name
 
137
  "model_": "Model",
138
  }
139
  )
140
+ df[metric["label"]] = df[metric["label"]].round(3)
141
  df = df.sort_values(metric["label"], ascending=False)
142
  df["Rank"] = range(1, len(df) + 1)
143
  df["Rank"] = df["Rank"].apply(
144
  lambda x: "πŸ₯‡" if x == 1 else "πŸ₯ˆ" if x == 2 else "πŸ₯‰" if x == 3 else str(x)
145
  )
146
+ df = df[["Rank", "Model", metric["label"]]]
147
  return gr.DataFrame(
148
  value=df,
149
  label="Model Leaderboard",
150
  show_search=False,
151
+ datatype=["number", "markdown", "number"],
152
  )
153
 
154
 
 
322
  sorted(set(data["family"] for data in scatter_data)), color_pallette
323
  )
324
  }
325
+ c_vals = [color_mapping.get(data["family"], "LightGray") for data in scatter_data]
326
  labels = [data["language"] for data in scatter_data]
327
  hover_template = f"<b>%{{text}}</b><br>Speakers: %{{x:.1f}}M<br>{metric['label']}: %{{y:.3f}}<extra></extra>"
328
  fig.add_trace(
 
448
  hover_texts = []
449
 
450
  for country_code, data in country_data.items():
451
+ weighted_avg = data["weighted_score_sum"] / data["total_speakers"] if data["total_speakers"] > 0 else None
452
 
453
  try:
454
  country_name = pycountry.countries.get(alpha_3=country_code).name
evals.py CHANGED
@@ -36,9 +36,13 @@ models = [
36
  "google/gemini-2.0-flash-001", # 0.4$/M tokens
37
  # "qwen/qwen-turbo", # 0.2$/M tokens; recognizes "inappropriate content"
38
  # "deepseek/deepseek-chat", # 0.9$/M tokens
39
- "microsoft/phi-4", # 0.07$/M tokens; only 16k tokens context
40
  ]
41
  model_fast = "meta-llama/llama-3.3-70b-instruct"
 
 
 
 
42
  transcription_models = [
43
  "elevenlabs/scribe_v1",
44
  "openai/whisper-large-v3",
@@ -46,9 +50,8 @@ transcription_models = [
46
  # "facebook/seamless-m4t-v2-large",
47
  ]
48
  transcription_model_fast = "elevenlabs/scribe_v1"
49
- n_sentences = 30
50
- n_languages = 10
51
- n_detailed_languages = 5
52
 
53
  # ===== setup =====
54
 
@@ -195,6 +198,8 @@ target_languages = languages[languages["in_benchmark"]].sample(
195
  )
196
  langs_eval = languages.iloc[:n_languages]
197
  langs_eval_detailed = languages.iloc[:n_detailed_languages]
 
 
198
 
199
 
200
  def download_file(url, path):
@@ -205,7 +210,7 @@ def download_file(url, path):
205
 
206
  def download_fleurs():
207
  # the huggingface loader does not allow loading only the dev set, so do it manually
208
- for language in langs_eval.itertuples():
209
  tar_url = f"https://huggingface.co/datasets/google/fleurs/resolve/main/data/{language.fleurs_tag}/audio/dev.tar.gz"
210
  tar_path = Path(f"data/fleurs/{language.fleurs_tag}/audio/dev.tar.gz")
211
  audio_path = Path(f"data/fleurs/{language.fleurs_tag}/audio")
@@ -496,12 +501,12 @@ async def main():
496
  transcription_scores = [
497
  transcribe_and_evaluate(model, language.bcp_47, i)
498
  for i in range(n_sentences)
499
- for language in langs_eval.itertuples()
500
  for model in transcription_models
501
  if language.in_benchmark
502
  and (
503
  model == transcription_model_fast
504
- or language.bcp_47 in langs_eval_detailed.bcp_47.values
505
  )
506
  ]
507
  transcription_scores = await tqdm_asyncio.gather(*transcription_scores, miniters=1)
 
36
  "google/gemini-2.0-flash-001", # 0.4$/M tokens
37
  # "qwen/qwen-turbo", # 0.2$/M tokens; recognizes "inappropriate content"
38
  # "deepseek/deepseek-chat", # 0.9$/M tokens
39
+ # "microsoft/phi-4", # 0.07$/M tokens; only 16k tokens context
40
  ]
41
  model_fast = "meta-llama/llama-3.3-70b-instruct"
42
+ n_languages = 50
43
+ n_detailed_languages = 10
44
+ n_sentences = 30
45
+
46
  transcription_models = [
47
  "elevenlabs/scribe_v1",
48
  "openai/whisper-large-v3",
 
50
  # "facebook/seamless-m4t-v2-large",
51
  ]
52
  transcription_model_fast = "elevenlabs/scribe_v1"
53
+ transcription_n_languages = 10
54
+ transcription_n_detailed_languages = 5
 
55
 
56
  # ===== setup =====
57
 
 
198
  )
199
  langs_eval = languages.iloc[:n_languages]
200
  langs_eval_detailed = languages.iloc[:n_detailed_languages]
201
+ transcription_langs_eval = languages.iloc[:transcription_n_languages]
202
+ transcription_langs_eval_detailed = languages.iloc[:transcription_n_detailed_languages]
203
 
204
 
205
  def download_file(url, path):
 
210
 
211
  def download_fleurs():
212
  # the huggingface loader does not allow loading only the dev set, so do it manually
213
+ for language in transcription_langs_eval.itertuples():
214
  tar_url = f"https://huggingface.co/datasets/google/fleurs/resolve/main/data/{language.fleurs_tag}/audio/dev.tar.gz"
215
  tar_path = Path(f"data/fleurs/{language.fleurs_tag}/audio/dev.tar.gz")
216
  audio_path = Path(f"data/fleurs/{language.fleurs_tag}/audio")
 
501
  transcription_scores = [
502
  transcribe_and_evaluate(model, language.bcp_47, i)
503
  for i in range(n_sentences)
504
+ for language in transcription_langs_eval.itertuples()
505
  for model in transcription_models
506
  if language.in_benchmark
507
  and (
508
  model == transcription_model_fast
509
+ or language.bcp_47 in transcription_langs_eval_detailed.bcp_47.values
510
  )
511
  ]
512
  transcription_scores = await tqdm_asyncio.gather(*transcription_scores, miniters=1)
results.json CHANGED
@@ -40,15 +40,6 @@
40
  "mlm_chrf": 0.9820612175447262,
41
  "t2t_score": 0.819029567439605
42
  },
43
- {
44
- "model": "microsoft/phi-4",
45
- "model_type": "text-to-text",
46
- "mt_bleu": 0.37572897166941227,
47
- "mt_chrf": 0.5070735300311086,
48
- "cls_acc": 0.6666666666666666,
49
- "mlm_chrf": 0.969030413937307,
50
- "t2t_score": 0.714256870211694
51
- },
52
  {
53
  "model": "elevenlabs/scribe_v1",
54
  "model_type": "speech-to-text",
@@ -224,13 +215,13 @@
224
  "ZW": 6109446
225
  },
226
  "language_family": "Indo-European",
227
- "mt_bleu": 0.41833744305054205,
228
- "mt_chrf": 0.5363758767655269,
229
- "cls_acc": 0.6066666666666667,
230
- "mlm_chrf": 0.9311734868119783,
231
  "asr_wer": 0.2883431971452567,
232
  "asr_chrf": 0.8410097119423834,
233
- "t2t_score": 0.691405343414724,
234
  "s2t_score": 0.5646764545438201
235
  },
236
  {
@@ -274,15 +265,6 @@
274
  "mlm_chrf": 0.9353777720326482,
275
  "t2t_score": 0.7876025558161427
276
  },
277
- {
278
- "model": "microsoft/phi-4",
279
- "model_type": "text-to-text",
280
- "mt_bleu": 0.3516110848479108,
281
- "mt_chrf": 0.5536738005415017,
282
- "cls_acc": 0.7333333333333333,
283
- "mlm_chrf": 0.9080915180096581,
284
- "t2t_score": 0.7316995506281644
285
- },
286
  {
287
  "model": "elevenlabs/scribe_v1",
288
  "model_type": "speech-to-text",
@@ -323,13 +305,13 @@
323
  "VN": 1085934
324
  },
325
  "language_family": "Sino-Tibetan",
326
- "mt_bleu": 0.3771247416861843,
327
- "mt_chrf": 0.5565088711888644,
328
- "cls_acc": 0.6599999999999999,
329
- "mlm_chrf": 0.9067663411187363,
330
  "asr_wer": 1.0,
331
  "asr_chrf": 0.7412819691487745,
332
- "t2t_score": 0.7077584041025334,
333
  "s2t_score": 0.8706409845743872
334
  },
335
  {
@@ -373,15 +355,6 @@
373
  "mlm_chrf": 0.9648559476590244,
374
  "t2t_score": 0.7852166232038682
375
  },
376
- {
377
- "model": "microsoft/phi-4",
378
- "model_type": "text-to-text",
379
- "mt_bleu": 0.27124364337229007,
380
- "mt_chrf": 0.43849820754378105,
381
- "cls_acc": 0.8,
382
- "mlm_chrf": 0.9387109966783314,
383
- "t2t_score": 0.7257364014073708
384
- },
385
  {
386
  "model": "elevenlabs/scribe_v1",
387
  "model_type": "speech-to-text",
@@ -408,13 +381,13 @@
408
  "ZA": 1129272
409
  },
410
  "language_family": "Indo-European",
411
- "mt_bleu": 0.2883532753161887,
412
- "mt_chrf": 0.4523345971484474,
413
- "cls_acc": 0.6466666666666667,
414
- "mlm_chrf": 0.9329724602243828,
415
  "asr_wer": 0.3581497443457955,
416
  "asr_chrf": 0.7041899877791161,
417
- "t2t_score": 0.6773245746798323,
418
  "s2t_score": 0.5311698660624558
419
  },
420
  {
@@ -458,15 +431,6 @@
458
  "mlm_chrf": 0.9816202595213187,
459
  "t2t_score": 0.7836003741246967
460
  },
461
- {
462
- "model": "microsoft/phi-4",
463
- "model_type": "text-to-text",
464
- "mt_bleu": 0.2422835449771352,
465
- "mt_chrf": 0.4339464294262581,
466
- "cls_acc": 0.7666666666666667,
467
- "mlm_chrf": 0.9650899128314744,
468
- "t2t_score": 0.7219010029747998
469
- },
470
  {
471
  "model": "elevenlabs/scribe_v1",
472
  "model_type": "speech-to-text",
@@ -526,13 +490,13 @@
526
  "VE": 23488572
527
  },
528
  "language_family": "Indo-European",
529
- "mt_bleu": 0.28722441435034374,
530
- "mt_chrf": 0.4567381907318164,
531
- "cls_acc": 0.6799999999999999,
532
- "mlm_chrf": 0.9629878273867962,
533
  "asr_wer": 0.18417222540761574,
534
  "asr_chrf": 0.9063759698635353,
535
- "t2t_score": 0.6999086727062043,
536
  "s2t_score": 0.5452740976355754
537
  },
538
  {
@@ -576,15 +540,6 @@
576
  "mlm_chrf": 0.97921999148367,
577
  "t2t_score": 0.7974861134300695
578
  },
579
- {
580
- "model": "microsoft/phi-4",
581
- "model_type": "text-to-text",
582
- "mt_bleu": 0.2419401543819311,
583
- "mt_chrf": 0.4175700747114916,
584
- "cls_acc": 0.7,
585
- "mlm_chrf": 0.9332394461837199,
586
- "t2t_score": 0.6836031736317372
587
- },
588
  {
589
  "model": "elevenlabs/scribe_v1",
590
  "model_type": "speech-to-text",
@@ -643,13 +598,13 @@
643
  "YE": 22114456
644
  },
645
  "language_family": "Afro-Asiatic",
646
- "mt_bleu": 0.26654405723980623,
647
- "mt_chrf": 0.44762080785480374,
648
- "cls_acc": 0.6733333333333332,
649
- "mlm_chrf": 0.9416886014071657,
650
  "asr_wer": 0.23569148982352306,
651
  "asr_chrf": 0.8475974658647911,
652
- "t2t_score": 0.6875475808651009,
653
  "s2t_score": 0.5416444778441571
654
  },
655
  {
@@ -657,6 +612,15 @@
657
  "bcp_47": "ur",
658
  "speakers": 290790290,
659
  "scores": [
 
 
 
 
 
 
 
 
 
660
  {
661
  "model": "meta-llama/llama-3.3-70b-instruct",
662
  "model_type": "text-to-text",
@@ -666,6 +630,24 @@
666
  "mlm_chrf": 0.9330700955297437,
667
  "t2t_score": 0.6011143080087092
668
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
669
  {
670
  "model": "elevenlabs/scribe_v1",
671
  "model_type": "speech-to-text",
@@ -684,13 +666,13 @@
684
  "PK": 221825950
685
  },
686
  "language_family": "Indo-European",
687
- "mt_bleu": 0.21847309277555946,
688
- "mt_chrf": 0.4036061618297173,
689
- "cls_acc": 0.4666666666666667,
690
- "mlm_chrf": 0.9330700955297437,
691
  "asr_wer": 0.33586677704198,
692
  "asr_chrf": 0.7430938264813908,
693
- "t2t_score": 0.6011143080087092,
694
  "s2t_score": 0.5394803017616854
695
  },
696
  {
@@ -698,6 +680,15 @@
698
  "bcp_47": "fr",
699
  "speakers": 278611507,
700
  "scores": [
 
 
 
 
 
 
 
 
 
701
  {
702
  "model": "meta-llama/llama-3.3-70b-instruct",
703
  "model_type": "text-to-text",
@@ -707,6 +698,24 @@
707
  "mlm_chrf": 0.9639910578331403,
708
  "t2t_score": 0.6753073934678575
709
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
710
  {
711
  "model": "elevenlabs/scribe_v1",
712
  "model_type": "speech-to-text",
@@ -782,13 +791,13 @@
782
  "YT": 110580
783
  },
784
  "language_family": "Indo-European",
785
- "mt_bleu": 0.32618133837885355,
786
- "mt_chrf": 0.4952644559037655,
787
- "cls_acc": 0.5666666666666667,
788
- "mlm_chrf": 0.9639910578331403,
789
  "asr_wer": 0.2610754929736961,
790
  "asr_chrf": 0.8775590287945104,
791
- "t2t_score": 0.6753073934678575,
792
  "s2t_score": 0.5693172608841033
793
  },
794
  {
@@ -796,6 +805,15 @@
796
  "bcp_47": "bn",
797
  "speakers": 267193288,
798
  "scores": [
 
 
 
 
 
 
 
 
 
799
  {
800
  "model": "meta-llama/llama-3.3-70b-instruct",
801
  "model_type": "text-to-text",
@@ -805,6 +823,24 @@
805
  "mlm_chrf": 0.8995877938471141,
806
  "t2t_score": 0.5622979523454997
807
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
808
  {
809
  "model": "elevenlabs/scribe_v1",
810
  "model_type": "speech-to-text",
@@ -823,13 +859,13 @@
823
  "NP": 28508
824
  },
825
  "language_family": "Indo-European",
826
- "mt_bleu": 0.23230854865261916,
827
- "mt_chrf": 0.42063939652271853,
828
- "cls_acc": 0.36666666666666664,
829
- "mlm_chrf": 0.8995877938471141,
830
  "asr_wer": 0.3066054718228631,
831
  "asr_chrf": 0.8006938100379883,
832
- "t2t_score": 0.5622979523454997,
833
  "s2t_score": 0.5536496409304257
834
  },
835
  {
@@ -837,6 +873,15 @@
837
  "bcp_47": "pt",
838
  "speakers": 237496885,
839
  "scores": [
 
 
 
 
 
 
 
 
 
840
  {
841
  "model": "meta-llama/llama-3.3-70b-instruct",
842
  "model_type": "text-to-text",
@@ -846,6 +891,24 @@
846
  "mlm_chrf": 0.9630716853128435,
847
  "t2t_score": 0.6609618763871179
848
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849
  {
850
  "model": "elevenlabs/scribe_v1",
851
  "model_type": "speech-to-text",
@@ -875,13 +938,13 @@
875
  "TL": 816395
876
  },
877
  "language_family": "Indo-European",
878
- "mt_bleu": 0.2806919135940658,
879
- "mt_chrf": 0.4531472771818437,
880
- "cls_acc": 0.5666666666666667,
881
- "mlm_chrf": 0.9630716853128435,
882
  "asr_wer": 0.22800492332171055,
883
  "asr_chrf": 0.8922038015648965,
884
- "t2t_score": 0.6609618763871179,
885
  "s2t_score": 0.5601043624433035
886
  },
887
  {
@@ -889,6 +952,15 @@
889
  "bcp_47": "pa",
890
  "speakers": 203571210,
891
  "scores": [
 
 
 
 
 
 
 
 
 
892
  {
893
  "model": "meta-llama/llama-3.3-70b-instruct",
894
  "model_type": "text-to-text",
@@ -898,6 +970,24 @@
898
  "mlm_chrf": 0.8966325892385384,
899
  "t2t_score": 0.5942784813918421
900
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
901
  {
902
  "model": "elevenlabs/scribe_v1",
903
  "model_type": "speech-to-text",
@@ -917,20 +1007,30 @@
917
  "SG": 9314
918
  },
919
  "language_family": "Indo-European",
920
- "mt_bleu": 0.3260979917168239,
921
- "mt_chrf": 0.48620285493698817,
922
- "cls_acc": 0.4,
923
- "mlm_chrf": 0.8966325892385384,
924
  "asr_wer": 0.2622994225519276,
925
  "asr_chrf": 0.7896064299629604,
926
- "t2t_score": 0.5942784813918421,
927
  "s2t_score": 0.525952926257444
928
  },
929
  {
930
  "language_name": "Russian",
931
  "bcp_47": "ru",
932
  "speakers": 195841151,
933
- "scores": [],
 
 
 
 
 
 
 
 
 
 
934
  "commonvoice_hours": 242.0,
935
  "commonvoice_locale": "ru",
936
  "population": {
@@ -959,20 +1059,30 @@
959
  "UZ": 4279156
960
  },
961
  "language_family": "Indo-European",
962
- "mt_bleu": null,
963
- "mt_chrf": null,
964
- "cls_acc": null,
965
- "mlm_chrf": null,
966
  "asr_wer": null,
967
  "asr_chrf": null,
968
- "t2t_score": null,
969
  "s2t_score": null
970
  },
971
  {
972
  "language_name": "Swahili",
973
  "bcp_47": "sw",
974
  "speakers": 171610296,
975
- "scores": [],
 
 
 
 
 
 
 
 
 
 
976
  "commonvoice_hours": 411.0,
977
  "commonvoice_locale": "sw",
978
  "population": {
@@ -987,20 +1097,30 @@
987
  "ZA": 1016
988
  },
989
  "language_family": "Atlantic-Congo",
990
- "mt_bleu": null,
991
- "mt_chrf": null,
992
- "cls_acc": null,
993
- "mlm_chrf": null,
994
  "asr_wer": null,
995
  "asr_chrf": null,
996
- "t2t_score": null,
997
  "s2t_score": null
998
  },
999
  {
1000
  "language_name": "Indonesian",
1001
  "bcp_47": "id",
1002
  "speakers": 171207687,
1003
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1004
  "commonvoice_hours": 33.0,
1005
  "commonvoice_locale": "id",
1006
  "population": {
@@ -1008,20 +1128,30 @@
1008
  "NL": 311047
1009
  },
1010
  "language_family": "Austronesian",
1011
- "mt_bleu": null,
1012
- "mt_chrf": null,
1013
- "cls_acc": null,
1014
- "mlm_chrf": null,
1015
  "asr_wer": null,
1016
  "asr_chrf": null,
1017
- "t2t_score": null,
1018
  "s2t_score": null
1019
  },
1020
  {
1021
  "language_name": "German",
1022
  "bcp_47": "de",
1023
  "speakers": 136350226,
1024
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1025
  "commonvoice_hours": 1359.0,
1026
  "commonvoice_locale": "de",
1027
  "population": {
@@ -1054,20 +1184,30 @@
1054
  "US": 1563403
1055
  },
1056
  "language_family": "Indo-European",
1057
- "mt_bleu": null,
1058
- "mt_chrf": null,
1059
- "cls_acc": null,
1060
- "mlm_chrf": null,
1061
  "asr_wer": null,
1062
  "asr_chrf": null,
1063
- "t2t_score": null,
1064
  "s2t_score": null
1065
  },
1066
  {
1067
  "language_name": "Japanese",
1068
  "bcp_47": "ja",
1069
  "speakers": 119729026,
1070
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1071
  "commonvoice_hours": 222.0,
1072
  "commonvoice_locale": "ja",
1073
  "population": {
@@ -1076,33 +1216,43 @@
1076
  "JP": 119231650
1077
  },
1078
  "language_family": "Japonic",
1079
- "mt_bleu": null,
1080
- "mt_chrf": null,
1081
- "cls_acc": null,
1082
- "mlm_chrf": null,
1083
  "asr_wer": null,
1084
  "asr_chrf": null,
1085
- "t2t_score": null,
1086
  "s2t_score": null
1087
  },
1088
  {
1089
  "language_name": "Telugu",
1090
  "bcp_47": "te",
1091
  "speakers": 95478480,
1092
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1093
  "commonvoice_hours": 0.3,
1094
  "commonvoice_locale": "te",
1095
  "population": {
1096
  "IN": 95478480
1097
  },
1098
  "language_family": "Dravidian",
1099
- "mt_bleu": null,
1100
- "mt_chrf": null,
1101
- "cls_acc": null,
1102
- "mlm_chrf": null,
1103
  "asr_wer": null,
1104
  "asr_chrf": null,
1105
- "t2t_score": null,
1106
  "s2t_score": null
1107
  },
1108
  {
@@ -1130,27 +1280,47 @@
1130
  "language_name": "Marathi",
1131
  "bcp_47": "mr",
1132
  "speakers": 92826300,
1133
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1134
  "commonvoice_hours": 20.0,
1135
  "commonvoice_locale": "mr",
1136
  "population": {
1137
  "IN": 92826300
1138
  },
1139
  "language_family": "Indo-European",
1140
- "mt_bleu": null,
1141
- "mt_chrf": null,
1142
- "cls_acc": null,
1143
- "mlm_chrf": null,
1144
  "asr_wer": null,
1145
  "asr_chrf": null,
1146
- "t2t_score": null,
1147
  "s2t_score": null
1148
  },
1149
  {
1150
  "language_name": "Javanese",
1151
  "bcp_47": "jv",
1152
  "speakers": 91180665,
1153
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1154
  "commonvoice_hours": 0.0,
1155
  "commonvoice_locale": "jv",
1156
  "population": {
@@ -1158,20 +1328,30 @@
1158
  "MY": 391825
1159
  },
1160
  "language_family": "Austronesian",
1161
- "mt_bleu": null,
1162
- "mt_chrf": null,
1163
- "cls_acc": null,
1164
- "mlm_chrf": null,
1165
  "asr_wer": null,
1166
  "asr_chrf": null,
1167
- "t2t_score": null,
1168
  "s2t_score": null
1169
  },
1170
  {
1171
  "language_name": "Vietnamese",
1172
  "bcp_47": "vi",
1173
  "speakers": 86222962,
1174
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1175
  "commonvoice_hours": 5.9,
1176
  "commonvoice_locale": "vi",
1177
  "population": {
@@ -1181,20 +1361,30 @@
1181
  "VN": 84900318
1182
  },
1183
  "language_family": "Austroasiatic",
1184
- "mt_bleu": null,
1185
- "mt_chrf": null,
1186
- "cls_acc": null,
1187
- "mlm_chrf": null,
1188
  "asr_wer": null,
1189
  "asr_chrf": null,
1190
- "t2t_score": null,
1191
  "s2t_score": null
1192
  },
1193
  {
1194
  "language_name": "Tamil",
1195
  "bcp_47": "ta",
1196
  "speakers": 85616159,
1197
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1198
  "commonvoice_hours": 234.0,
1199
  "commonvoice_locale": "ta",
1200
  "population": {
@@ -1208,20 +1398,30 @@
1208
  "SG": 130403
1209
  },
1210
  "language_family": "Dravidian",
1211
- "mt_bleu": null,
1212
- "mt_chrf": null,
1213
- "cls_acc": null,
1214
- "mlm_chrf": null,
1215
  "asr_wer": null,
1216
  "asr_chrf": null,
1217
- "t2t_score": null,
1218
  "s2t_score": null
1219
  },
1220
  {
1221
  "language_name": "Persian",
1222
  "bcp_47": "fa",
1223
  "speakers": 84710459,
1224
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1225
  "commonvoice_hours": 370.0,
1226
  "commonvoice_locale": "fa",
1227
  "population": {
@@ -1236,13 +1436,13 @@
1236
  "TJ": 69215
1237
  },
1238
  "language_family": "Indo-European",
1239
- "mt_bleu": null,
1240
- "mt_chrf": null,
1241
- "cls_acc": null,
1242
- "mlm_chrf": null,
1243
  "asr_wer": null,
1244
  "asr_chrf": null,
1245
- "t2t_score": null,
1246
  "s2t_score": null
1247
  },
1248
  {
@@ -1269,7 +1469,17 @@
1269
  "language_name": "Turkish",
1270
  "bcp_47": "tr",
1271
  "speakers": 80360704,
1272
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1273
  "commonvoice_hours": 127.0,
1274
  "commonvoice_locale": "tr",
1275
  "population": {
@@ -1287,20 +1497,30 @@
1287
  "UZ": 232297
1288
  },
1289
  "language_family": "Turkic",
1290
- "mt_bleu": null,
1291
- "mt_chrf": null,
1292
- "cls_acc": null,
1293
- "mlm_chrf": null,
1294
  "asr_wer": null,
1295
  "asr_chrf": null,
1296
- "t2t_score": null,
1297
  "s2t_score": null
1298
  },
1299
  {
1300
  "language_name": "Cantonese",
1301
  "bcp_47": "yue",
1302
  "speakers": 79654759,
1303
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1304
  "commonvoice_hours": 203.0,
1305
  "commonvoice_locale": "yue",
1306
  "population": {
@@ -1309,20 +1529,30 @@
1309
  "HK": 6524919
1310
  },
1311
  "language_family": "Sino-Tibetan",
1312
- "mt_bleu": null,
1313
- "mt_chrf": null,
1314
- "cls_acc": null,
1315
- "mlm_chrf": null,
1316
  "asr_wer": null,
1317
  "asr_chrf": null,
1318
- "t2t_score": null,
1319
  "s2t_score": null
1320
  },
1321
  {
1322
  "language_name": "Korean",
1323
  "bcp_47": "ko",
1324
  "speakers": 78357046,
1325
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1326
  "commonvoice_hours": 1.7,
1327
  "commonvoice_locale": "ko",
1328
  "population": {
@@ -1335,20 +1565,30 @@
1335
  "US": 997917
1336
  },
1337
  "language_family": "Koreanic",
1338
- "mt_bleu": null,
1339
- "mt_chrf": null,
1340
- "cls_acc": null,
1341
- "mlm_chrf": null,
1342
  "asr_wer": null,
1343
  "asr_chrf": null,
1344
- "t2t_score": null,
1345
  "s2t_score": null
1346
  },
1347
  {
1348
  "language_name": "Italian",
1349
  "bcp_47": "it",
1350
  "speakers": 70247060,
1351
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1352
  "commonvoice_hours": 362.0,
1353
  "commonvoice_locale": "it",
1354
  "population": {
@@ -1369,20 +1609,30 @@
1369
  "VA": 820
1370
  },
1371
  "language_family": "Indo-European",
1372
- "mt_bleu": null,
1373
- "mt_chrf": null,
1374
- "cls_acc": null,
1375
- "mlm_chrf": null,
1376
  "asr_wer": null,
1377
  "asr_chrf": null,
1378
- "t2t_score": null,
1379
  "s2t_score": null
1380
  },
1381
  {
1382
  "language_name": "Filipino",
1383
  "bcp_47": "fil",
1384
  "speakers": 67471096,
1385
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1386
  "commonvoice_hours": 0.0,
1387
  "commonvoice_locale": "tl",
1388
  "population": {
@@ -1391,40 +1641,60 @@
1391
  "US": 1397084
1392
  },
1393
  "language_family": "Austronesian",
1394
- "mt_bleu": null,
1395
- "mt_chrf": null,
1396
- "cls_acc": null,
1397
- "mlm_chrf": null,
1398
  "asr_wer": null,
1399
  "asr_chrf": null,
1400
- "t2t_score": null,
1401
  "s2t_score": null
1402
  },
1403
  {
1404
  "language_name": "Egyptian Arabic",
1405
  "bcp_47": "arz",
1406
  "speakers": 66639360,
1407
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1408
  "commonvoice_hours": null,
1409
  "commonvoice_locale": null,
1410
  "population": {
1411
  "EG": 66639360
1412
  },
1413
  "language_family": "Afro-Asiatic",
1414
- "mt_bleu": null,
1415
- "mt_chrf": null,
1416
- "cls_acc": null,
1417
- "mlm_chrf": null,
1418
  "asr_wer": null,
1419
  "asr_chrf": null,
1420
- "t2t_score": null,
1421
  "s2t_score": null
1422
  },
1423
  {
1424
  "language_name": "Gujarati",
1425
  "bcp_47": "gu",
1426
  "speakers": 61721799,
1427
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1428
  "commonvoice_hours": 0.0,
1429
  "commonvoice_locale": "gu-IN",
1430
  "population": {
@@ -1434,33 +1704,43 @@
1434
  "KE": 4978
1435
  },
1436
  "language_family": "Indo-European",
1437
- "mt_bleu": null,
1438
- "mt_chrf": null,
1439
- "cls_acc": null,
1440
- "mlm_chrf": null,
1441
  "asr_wer": null,
1442
  "asr_chrf": null,
1443
- "t2t_score": null,
1444
  "s2t_score": null
1445
  },
1446
  {
1447
  "language_name": "Thai",
1448
  "bcp_47": "th",
1449
  "speakers": 55181920,
1450
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1451
  "commonvoice_hours": 172.0,
1452
  "commonvoice_locale": "th",
1453
  "population": {
1454
  "TH": 55181920
1455
  },
1456
  "language_family": "Tai-Kadai",
1457
- "mt_bleu": null,
1458
- "mt_chrf": null,
1459
- "cls_acc": null,
1460
- "mlm_chrf": null,
1461
  "asr_wer": null,
1462
  "asr_chrf": null,
1463
- "t2t_score": null,
1464
  "s2t_score": null
1465
  },
1466
  {
@@ -1490,20 +1770,30 @@
1490
  "language_name": "Kannada",
1491
  "bcp_47": "kn",
1492
  "speakers": 49065330,
1493
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1494
  "commonvoice_hours": 0.0,
1495
  "commonvoice_locale": "kn",
1496
  "population": {
1497
  "IN": 49065330
1498
  },
1499
  "language_family": "Dravidian",
1500
- "mt_bleu": null,
1501
- "mt_chrf": null,
1502
- "cls_acc": null,
1503
- "mlm_chrf": null,
1504
  "asr_wer": null,
1505
  "asr_chrf": null,
1506
- "t2t_score": null,
1507
  "s2t_score": null
1508
  },
1509
  {
@@ -1530,7 +1820,17 @@
1530
  "language_name": "Malayalam",
1531
  "bcp_47": "ml",
1532
  "speakers": 43257484,
1533
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1534
  "commonvoice_hours": 2.8,
1535
  "commonvoice_locale": "ml",
1536
  "population": {
@@ -1543,40 +1843,60 @@
1543
  "SG": 9935
1544
  },
1545
  "language_family": "Dravidian",
1546
- "mt_bleu": null,
1547
- "mt_chrf": null,
1548
- "cls_acc": null,
1549
- "mlm_chrf": null,
1550
  "asr_wer": null,
1551
  "asr_chrf": null,
1552
- "t2t_score": null,
1553
  "s2t_score": null
1554
  },
1555
  {
1556
  "language_name": "Odia",
1557
  "bcp_47": "or",
1558
  "speakers": 42434880,
1559
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1560
  "commonvoice_hours": 2.8,
1561
  "commonvoice_locale": "or",
1562
  "population": {
1563
  "IN": 42434880
1564
  },
1565
  "language_family": "Indo-European",
1566
- "mt_bleu": null,
1567
- "mt_chrf": null,
1568
- "cls_acc": null,
1569
- "mlm_chrf": null,
1570
  "asr_wer": null,
1571
  "asr_chrf": null,
1572
- "t2t_score": null,
1573
  "s2t_score": null
1574
  },
1575
  {
1576
  "language_name": "Polish",
1577
  "bcp_47": "pl",
1578
  "speakers": 41077399,
1579
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1580
  "commonvoice_hours": 174.0,
1581
  "commonvoice_locale": "pl",
1582
  "population": {
@@ -1591,13 +1911,13 @@
1591
  "UA": 1054150
1592
  },
1593
  "language_family": "Indo-European",
1594
- "mt_bleu": null,
1595
- "mt_chrf": null,
1596
- "cls_acc": null,
1597
- "mlm_chrf": null,
1598
  "asr_wer": null,
1599
  "asr_chrf": null,
1600
- "t2t_score": null,
1601
  "s2t_score": null
1602
  },
1603
  {
@@ -1624,7 +1944,17 @@
1624
  "language_name": "Hausa",
1625
  "bcp_47": "ha",
1626
  "speakers": 40411882,
1627
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1628
  "commonvoice_hours": 4.1,
1629
  "commonvoice_locale": "ha",
1630
  "population": {
@@ -1635,20 +1965,30 @@
1635
  "SD": 820109
1636
  },
1637
  "language_family": "Afro-Asiatic",
1638
- "mt_bleu": null,
1639
- "mt_chrf": null,
1640
- "cls_acc": null,
1641
- "mlm_chrf": null,
1642
  "asr_wer": null,
1643
  "asr_chrf": null,
1644
- "t2t_score": null,
1645
  "s2t_score": null
1646
  },
1647
  {
1648
  "language_name": "Sindhi",
1649
  "bcp_47": "sd",
1650
  "speakers": 40329510,
1651
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1652
  "commonvoice_hours": 0.4,
1653
  "commonvoice_locale": "sd",
1654
  "population": {
@@ -1656,13 +1996,13 @@
1656
  "PK": 35025150
1657
  },
1658
  "language_family": "Indo-European",
1659
- "mt_bleu": null,
1660
- "mt_chrf": null,
1661
- "cls_acc": null,
1662
- "mlm_chrf": null,
1663
  "asr_wer": null,
1664
  "asr_chrf": null,
1665
- "t2t_score": null,
1666
  "s2t_score": null
1667
  },
1668
  {
@@ -1694,7 +2034,17 @@
1694
  "language_name": "Malay",
1695
  "bcp_47": "ms",
1696
  "speakers": 38097307,
1697
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1698
  "commonvoice_hours": 0.0,
1699
  "commonvoice_locale": "ms",
1700
  "population": {
@@ -1705,20 +2055,30 @@
1705
  "SG": 869352
1706
  },
1707
  "language_family": "Austronesian",
1708
- "mt_bleu": null,
1709
- "mt_chrf": null,
1710
- "cls_acc": null,
1711
- "mlm_chrf": null,
1712
  "asr_wer": null,
1713
  "asr_chrf": null,
1714
- "t2t_score": null,
1715
  "s2t_score": null
1716
  },
1717
  {
1718
  "language_name": "Burmese",
1719
  "bcp_47": "my",
1720
  "speakers": 36559231,
1721
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1722
  "commonvoice_hours": 0.0,
1723
  "commonvoice_locale": "my",
1724
  "population": {
@@ -1726,20 +2086,30 @@
1726
  "MM": 36217664
1727
  },
1728
  "language_family": "Sino-Tibetan",
1729
- "mt_bleu": null,
1730
- "mt_chrf": null,
1731
- "cls_acc": null,
1732
- "mlm_chrf": null,
1733
  "asr_wer": null,
1734
  "asr_chrf": null,
1735
- "t2t_score": null,
1736
  "s2t_score": null
1737
  },
1738
  {
1739
  "language_name": "Amharic",
1740
  "bcp_47": "am",
1741
  "speakers": 35728475,
1742
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1743
  "commonvoice_hours": 1.8,
1744
  "commonvoice_locale": "am",
1745
  "population": {
@@ -1747,13 +2117,13 @@
1747
  "IL": 51185
1748
  },
1749
  "language_family": "Afro-Asiatic",
1750
- "mt_bleu": null,
1751
- "mt_chrf": null,
1752
- "cls_acc": null,
1753
- "mlm_chrf": null,
1754
  "asr_wer": null,
1755
  "asr_chrf": null,
1756
- "t2t_score": null,
1757
  "s2t_score": null
1758
  },
1759
  {
@@ -1780,7 +2150,17 @@
1780
  "language_name": "Oromo",
1781
  "bcp_47": "om",
1782
  "speakers": 34897121,
1783
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1784
  "commonvoice_hours": 0.0,
1785
  "commonvoice_locale": "om",
1786
  "population": {
@@ -1789,20 +2169,30 @@
1789
  "SO": 49380
1790
  },
1791
  "language_family": "Afro-Asiatic",
1792
- "mt_bleu": null,
1793
- "mt_chrf": null,
1794
- "cls_acc": null,
1795
- "mlm_chrf": null,
1796
  "asr_wer": null,
1797
  "asr_chrf": null,
1798
- "t2t_score": null,
1799
  "s2t_score": null
1800
  },
1801
  {
1802
  "language_name": "Bhojpuri",
1803
  "bcp_47": "bho",
1804
  "speakers": 32934797,
1805
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1806
  "commonvoice_hours": null,
1807
  "commonvoice_locale": null,
1808
  "population": {
@@ -1811,20 +2201,30 @@
1811
  "NP": 2062297
1812
  },
1813
  "language_family": "Indo-European",
1814
- "mt_bleu": null,
1815
- "mt_chrf": null,
1816
- "cls_acc": null,
1817
- "mlm_chrf": null,
1818
  "asr_wer": null,
1819
  "asr_chrf": null,
1820
- "t2t_score": null,
1821
  "s2t_score": null
1822
  },
1823
  {
1824
  "language_name": "Uzbek",
1825
  "bcp_47": "uz",
1826
  "speakers": 32792780,
1827
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1828
  "commonvoice_hours": 100.0,
1829
  "commonvoice_locale": "uz",
1830
  "population": {
@@ -1835,20 +2235,30 @@
1835
  "UZ": 30565400
1836
  },
1837
  "language_family": "Turkic",
1838
- "mt_bleu": null,
1839
- "mt_chrf": null,
1840
- "cls_acc": null,
1841
- "mlm_chrf": null,
1842
  "asr_wer": null,
1843
  "asr_chrf": null,
1844
- "t2t_score": null,
1845
  "s2t_score": null
1846
  },
1847
  {
1848
  "language_name": "Azerbaijani",
1849
  "bcp_47": "az",
1850
  "speakers": 32446682,
1851
- "scores": [],
 
 
 
 
 
 
 
 
 
 
1852
  "commonvoice_hours": 0.5,
1853
  "commonvoice_locale": "az",
1854
  "population": {
@@ -1860,13 +2270,13 @@
1860
  "TR": 1140044
1861
  },
1862
  "language_family": "Turkic",
1863
- "mt_bleu": null,
1864
- "mt_chrf": null,
1865
- "cls_acc": null,
1866
- "mlm_chrf": null,
1867
  "asr_wer": null,
1868
  "asr_chrf": null,
1869
- "t2t_score": null,
1870
  "s2t_score": null
1871
  },
1872
  {
@@ -3267,7 +3677,7 @@
3267
  "bcp_47": "ug",
3268
  "speakers": 8052967,
3269
  "scores": [],
3270
- "commonvoice_hours": 363.0,
3271
  "commonvoice_locale": "ug",
3272
  "population": {
3273
  "AF": 3005,
@@ -3615,7 +4025,7 @@
3615
  "bcp_47": "sk",
3616
  "speakers": 6680269,
3617
  "scores": [],
3618
- "commonvoice_hours": 44.0,
3619
  "commonvoice_locale": "sk",
3620
  "population": {
3621
  "CZ": 1712400,
@@ -6647,7 +7057,7 @@
6647
  "bcp_47": "ga",
6648
  "speakers": 1237487,
6649
  "scores": [],
6650
- "commonvoice_hours": 5.8,
6651
  "commonvoice_locale": "ga-IE",
6652
  "population": {
6653
  "GB": 98642,
@@ -6969,7 +7379,7 @@
6969
  "bcp_47": "tig",
6970
  "speakers": 1094616,
6971
  "scores": [],
6972
- "commonvoice_hours": 2.2,
6973
  "commonvoice_locale": "tig",
6974
  "population": {
6975
  "ER": 1094616
@@ -7030,7 +7440,7 @@
7030
  "bcp_47": "kbd",
7031
  "speakers": 1070873,
7032
  "scores": [],
7033
- "commonvoice_hours": 15.0,
7034
  "commonvoice_locale": "kbd",
7035
  "population": {
7036
  "RU": 439338,
@@ -8812,7 +9222,7 @@
8812
  "bcp_47": "sah",
8813
  "speakers": 453510,
8814
  "scores": [],
8815
- "commonvoice_hours": 8.5,
8816
  "commonvoice_locale": "sah",
8817
  "population": {
8818
  "RU": 453510
@@ -8913,7 +9323,7 @@
8913
  "bcp_47": "xmf",
8914
  "speakers": 439670,
8915
  "scores": [],
8916
- "commonvoice_hours": 9.1,
8917
  "commonvoice_locale": "xmf",
8918
  "population": {
8919
  "GE": 439670
@@ -11222,7 +11632,7 @@
11222
  "bcp_47": "trw",
11223
  "speakers": 123756,
11224
  "scores": [],
11225
- "commonvoice_hours": 18.0,
11226
  "commonvoice_locale": "trw",
11227
  "population": {
11228
  "PK": 123756
@@ -11502,7 +11912,7 @@
11502
  "bcp_47": "ab",
11503
  "speakers": 91953,
11504
  "scores": [],
11505
- "commonvoice_hours": 68.0,
11506
  "commonvoice_locale": "ab",
11507
  "population": {
11508
  "GE": 87934,
@@ -13239,7 +13649,7 @@
13239
  "bcp_47": "ik",
13240
  "speakers": 7983,
13241
  "scores": [],
13242
- "commonvoice_hours": 2.6,
13243
  "commonvoice_locale": "ipk",
13244
  "population": {
13245
  "US": 7983
@@ -13539,7 +13949,7 @@
13539
  "bcp_47": "trv",
13540
  "speakers": 4721,
13541
  "scores": [],
13542
- "commonvoice_hours": 4.7,
13543
  "commonvoice_locale": "trv",
13544
  "population": {
13545
  "TW": 4721
@@ -13899,7 +14309,7 @@
13899
  "bcp_47": "gv",
13900
  "speakers": 1719,
13901
  "scores": [],
13902
- "commonvoice_hours": 2.7,
13903
  "commonvoice_locale": "gv",
13904
  "population": {
13905
  "IM": 1719
@@ -14019,7 +14429,7 @@
14019
  "bcp_47": "sei",
14020
  "speakers": 901,
14021
  "scores": [],
14022
- "commonvoice_hours": 1.0,
14023
  "commonvoice_locale": "sei",
14024
  "population": {
14025
  "MX": 901
 
40
  "mlm_chrf": 0.9820612175447262,
41
  "t2t_score": 0.819029567439605
42
  },
 
 
 
 
 
 
 
 
 
43
  {
44
  "model": "elevenlabs/scribe_v1",
45
  "model_type": "speech-to-text",
 
215
  "ZW": 6109446
216
  },
217
  "language_family": "Indo-European",
218
+ "mt_bleu": 0.42898956089582446,
219
+ "mt_chrf": 0.5437014634491315,
220
+ "cls_acc": 0.5916666666666667,
221
+ "mlm_chrf": 0.9217092550306462,
222
  "asr_wer": 0.2883431971452567,
223
  "asr_chrf": 0.8410097119423834,
224
+ "t2t_score": 0.6856924617154815,
225
  "s2t_score": 0.5646764545438201
226
  },
227
  {
 
265
  "mlm_chrf": 0.9353777720326482,
266
  "t2t_score": 0.7876025558161427
267
  },
 
 
 
 
 
 
 
 
 
268
  {
269
  "model": "elevenlabs/scribe_v1",
270
  "model_type": "speech-to-text",
 
305
  "VN": 1085934
306
  },
307
  "language_family": "Sino-Tibetan",
308
+ "mt_bleu": 0.3835031558957527,
309
+ "mt_chrf": 0.557217638850705,
310
+ "cls_acc": 0.6416666666666666,
311
+ "mlm_chrf": 0.9064350468960058,
312
  "asr_wer": 1.0,
313
  "asr_chrf": 0.7412819691487745,
314
+ "t2t_score": 0.7017731174711257,
315
  "s2t_score": 0.8706409845743872
316
  },
317
  {
 
355
  "mlm_chrf": 0.9648559476590244,
356
  "t2t_score": 0.7852166232038682
357
  },
 
 
 
 
 
 
 
 
 
358
  {
359
  "model": "elevenlabs/scribe_v1",
360
  "model_type": "speech-to-text",
 
381
  "ZA": 1129272
382
  },
383
  "language_family": "Indo-European",
384
+ "mt_bleu": 0.29263068330216335,
385
+ "mt_chrf": 0.45579369454961405,
386
+ "cls_acc": 0.6083333333333334,
387
+ "mlm_chrf": 0.9315378261108958,
388
  "asr_wer": 0.3581497443457955,
389
  "asr_chrf": 0.7041899877791161,
390
+ "t2t_score": 0.6652216179979477,
391
  "s2t_score": 0.5311698660624558
392
  },
393
  {
 
431
  "mlm_chrf": 0.9816202595213187,
432
  "t2t_score": 0.7836003741246967
433
  },
 
 
 
 
 
 
 
 
 
434
  {
435
  "model": "elevenlabs/scribe_v1",
436
  "model_type": "speech-to-text",
 
490
  "VE": 23488572
491
  },
492
  "language_family": "Indo-European",
493
+ "mt_bleu": 0.29845963169364587,
494
+ "mt_chrf": 0.4624361310582059,
495
+ "cls_acc": 0.6583333333333333,
496
+ "mlm_chrf": 0.9624623060256268,
497
  "asr_wer": 0.18417222540761574,
498
  "asr_chrf": 0.9063759698635353,
499
+ "t2t_score": 0.6944105901390554,
500
  "s2t_score": 0.5452740976355754
501
  },
502
  {
 
540
  "mlm_chrf": 0.97921999148367,
541
  "t2t_score": 0.7974861134300695
542
  },
 
 
 
 
 
 
 
 
 
543
  {
544
  "model": "elevenlabs/scribe_v1",
545
  "model_type": "speech-to-text",
 
598
  "YE": 22114456
599
  },
600
  "language_family": "Afro-Asiatic",
601
+ "mt_bleu": 0.272695032954275,
602
+ "mt_chrf": 0.45513349114063173,
603
+ "cls_acc": 0.6666666666666666,
604
+ "mlm_chrf": 0.9438008902130272,
605
  "asr_wer": 0.23569148982352306,
606
  "asr_chrf": 0.8475974658647911,
607
+ "t2t_score": 0.6885336826734418,
608
  "s2t_score": 0.5416444778441571
609
  },
610
  {
 
612
  "bcp_47": "ur",
613
  "speakers": 290790290,
614
  "scores": [
615
+ {
616
+ "model": "openai/gpt-4o-mini",
617
+ "model_type": "text-to-text",
618
+ "mt_bleu": 0.25651711220915696,
619
+ "mt_chrf": 0.434940424205634,
620
+ "cls_acc": 0.43333333333333335,
621
+ "mlm_chrf": 0.9405185966090683,
622
+ "t2t_score": 0.6029307847160119
623
+ },
624
  {
625
  "model": "meta-llama/llama-3.3-70b-instruct",
626
  "model_type": "text-to-text",
 
630
  "mlm_chrf": 0.9330700955297437,
631
  "t2t_score": 0.6011143080087092
632
  },
633
+ {
634
+ "model": "mistralai/mistral-small-24b-instruct-2501",
635
+ "model_type": "text-to-text",
636
+ "mt_bleu": 0.18142367436048185,
637
+ "mt_chrf": 0.3459866651047097,
638
+ "cls_acc": 0.6666666666666666,
639
+ "mlm_chrf": 0.9320431438157005,
640
+ "t2t_score": 0.6482321585290256
641
+ },
642
+ {
643
+ "model": "google/gemini-2.0-flash-001",
644
+ "model_type": "text-to-text",
645
+ "mt_bleu": 0.31728190361318,
646
+ "mt_chrf": 0.48818783571496116,
647
+ "cls_acc": 0.9,
648
+ "mlm_chrf": 0.9548190343172376,
649
+ "t2t_score": 0.7810022900107328
650
+ },
651
  {
652
  "model": "elevenlabs/scribe_v1",
653
  "model_type": "speech-to-text",
 
666
  "PK": 221825950
667
  },
668
  "language_family": "Indo-European",
669
+ "mt_bleu": 0.24342394573959458,
670
+ "mt_chrf": 0.41818027171375555,
671
+ "cls_acc": 0.6166666666666667,
672
+ "mlm_chrf": 0.9401127175679376,
673
  "asr_wer": 0.33586677704198,
674
  "asr_chrf": 0.7430938264813908,
675
+ "t2t_score": 0.6583198853161198,
676
  "s2t_score": 0.5394803017616854
677
  },
678
  {
 
680
  "bcp_47": "fr",
681
  "speakers": 278611507,
682
  "scores": [
683
+ {
684
+ "model": "openai/gpt-4o-mini",
685
+ "model_type": "text-to-text",
686
+ "mt_bleu": 0.3330934985862475,
687
+ "mt_chrf": 0.48898780285384186,
688
+ "cls_acc": 0.6666666666666666,
689
+ "mlm_chrf": 0.9702783834061439,
690
+ "t2t_score": 0.7086442843088842
691
+ },
692
  {
693
  "model": "meta-llama/llama-3.3-70b-instruct",
694
  "model_type": "text-to-text",
 
698
  "mlm_chrf": 0.9639910578331403,
699
  "t2t_score": 0.6753073934678575
700
  },
701
+ {
702
+ "model": "mistralai/mistral-small-24b-instruct-2501",
703
+ "model_type": "text-to-text",
704
+ "mt_bleu": 0.27503766788302847,
705
+ "mt_chrf": 0.441500491331595,
706
+ "cls_acc": 0.7666666666666667,
707
+ "mlm_chrf": 0.9709359436365964,
708
+ "t2t_score": 0.7263677005449528
709
+ },
710
+ {
711
+ "model": "google/gemini-2.0-flash-001",
712
+ "model_type": "text-to-text",
713
+ "mt_bleu": 0.3407733406148989,
714
+ "mt_chrf": 0.5123861936301051,
715
+ "cls_acc": 0.8666666666666667,
716
+ "mlm_chrf": 0.981866385354237,
717
+ "t2t_score": 0.7869730818836697
718
+ },
719
  {
720
  "model": "elevenlabs/scribe_v1",
721
  "model_type": "speech-to-text",
 
791
  "YT": 110580
792
  },
793
  "language_family": "Indo-European",
794
+ "mt_bleu": 0.3187714613657571,
795
+ "mt_chrf": 0.4845347359298269,
796
+ "cls_acc": 0.7166666666666667,
797
+ "mlm_chrf": 0.9717679425575294,
798
  "asr_wer": 0.2610754929736961,
799
  "asr_chrf": 0.8775590287945104,
800
+ "t2t_score": 0.7243231150513411,
801
  "s2t_score": 0.5693172608841033
802
  },
803
  {
 
805
  "bcp_47": "bn",
806
  "speakers": 267193288,
807
  "scores": [
808
+ {
809
+ "model": "openai/gpt-4o-mini",
810
+ "model_type": "text-to-text",
811
+ "mt_bleu": 0.20957267610789623,
812
+ "mt_chrf": 0.38143415739652864,
813
+ "cls_acc": 0.3333333333333333,
814
+ "mlm_chrf": 0.8956278857774084,
815
+ "t2t_score": 0.5367984588357567
816
+ },
817
  {
818
  "model": "meta-llama/llama-3.3-70b-instruct",
819
  "model_type": "text-to-text",
 
823
  "mlm_chrf": 0.8995877938471141,
824
  "t2t_score": 0.5622979523454997
825
  },
826
+ {
827
+ "model": "mistralai/mistral-small-24b-instruct-2501",
828
+ "model_type": "text-to-text",
829
+ "mt_bleu": 0.17813156872400576,
830
+ "mt_chrf": 0.34058339171576163,
831
+ "cls_acc": 0.4,
832
+ "mlm_chrf": 0.8686306022759065,
833
+ "t2t_score": 0.5364046646638894
834
+ },
835
+ {
836
+ "model": "google/gemini-2.0-flash-001",
837
+ "model_type": "text-to-text",
838
+ "mt_bleu": 0.29469375187054625,
839
+ "mt_chrf": 0.4790136299628091,
840
+ "cls_acc": 0.9,
841
+ "mlm_chrf": 0.9213788493667834,
842
+ "t2t_score": 0.7667974931098641
843
+ },
844
  {
845
  "model": "elevenlabs/scribe_v1",
846
  "model_type": "speech-to-text",
 
859
  "NP": 28508
860
  },
861
  "language_family": "Indo-European",
862
+ "mt_bleu": 0.22867663633876686,
863
+ "mt_chrf": 0.4054176438994545,
864
+ "cls_acc": 0.5,
865
+ "mlm_chrf": 0.896306282816803,
866
  "asr_wer": 0.3066054718228631,
867
  "asr_chrf": 0.8006938100379883,
868
+ "t2t_score": 0.6005746422387525,
869
  "s2t_score": 0.5536496409304257
870
  },
871
  {
 
873
  "bcp_47": "pt",
874
  "speakers": 237496885,
875
  "scores": [
876
+ {
877
+ "model": "openai/gpt-4o-mini",
878
+ "model_type": "text-to-text",
879
+ "mt_bleu": 0.3303753782527083,
880
+ "mt_chrf": 0.486171419442025,
881
+ "cls_acc": 0.6333333333333333,
882
+ "mlm_chrf": 0.976316236530384,
883
+ "t2t_score": 0.6986069964352475
884
+ },
885
  {
886
  "model": "meta-llama/llama-3.3-70b-instruct",
887
  "model_type": "text-to-text",
 
891
  "mlm_chrf": 0.9630716853128435,
892
  "t2t_score": 0.6609618763871179
893
  },
894
+ {
895
+ "model": "mistralai/mistral-small-24b-instruct-2501",
896
+ "model_type": "text-to-text",
897
+ "mt_bleu": 0.3025841449283074,
898
+ "mt_chrf": 0.4573560946306324,
899
+ "cls_acc": 0.6,
900
+ "mlm_chrf": 0.9304952225388448,
901
+ "t2t_score": 0.662617105723159
902
+ },
903
+ {
904
+ "model": "google/gemini-2.0-flash-001",
905
+ "model_type": "text-to-text",
906
+ "mt_bleu": 0.37947802751730003,
907
+ "mt_chrf": 0.534900245316462,
908
+ "cls_acc": 0.8,
909
+ "mlm_chrf": 0.9836940585303217,
910
+ "t2t_score": 0.7728647679489279
911
+ },
912
  {
913
  "model": "elevenlabs/scribe_v1",
914
  "model_type": "speech-to-text",
 
938
  "TL": 816395
939
  },
940
  "language_family": "Indo-European",
941
+ "mt_bleu": 0.32328236607309535,
942
+ "mt_chrf": 0.48289375914274074,
943
+ "cls_acc": 0.6499999999999999,
944
+ "mlm_chrf": 0.9633943007280985,
945
  "asr_wer": 0.22800492332171055,
946
  "asr_chrf": 0.8922038015648965,
947
+ "t2t_score": 0.6987626866236131,
948
  "s2t_score": 0.5601043624433035
949
  },
950
  {
 
952
  "bcp_47": "pa",
953
  "speakers": 203571210,
954
  "scores": [
955
+ {
956
+ "model": "openai/gpt-4o-mini",
957
+ "model_type": "text-to-text",
958
+ "mt_bleu": 0.31712197840223066,
959
+ "mt_chrf": 0.46859907170069276,
960
+ "cls_acc": 0.3,
961
+ "mlm_chrf": 0.8891660761810072,
962
+ "t2t_score": 0.5525883826272333
963
+ },
964
  {
965
  "model": "meta-llama/llama-3.3-70b-instruct",
966
  "model_type": "text-to-text",
 
970
  "mlm_chrf": 0.8966325892385384,
971
  "t2t_score": 0.5942784813918421
972
  },
973
+ {
974
+ "model": "mistralai/mistral-small-24b-instruct-2501",
975
+ "model_type": "text-to-text",
976
+ "mt_bleu": 0.2187115929653566,
977
+ "mt_chrf": 0.3524175363784819,
978
+ "cls_acc": 0.3,
979
+ "mlm_chrf": 0.8850095336403061,
980
+ "t2t_score": 0.5124756900062627
981
+ },
982
+ {
983
+ "model": "google/gemini-2.0-flash-001",
984
+ "model_type": "text-to-text",
985
+ "mt_bleu": 0.3778935484806146,
986
+ "mt_chrf": 0.5344045974435822,
987
+ "cls_acc": 0.8666666666666667,
988
+ "mlm_chrf": 0.9084722630859033,
989
+ "t2t_score": 0.7698478423987174
990
+ },
991
  {
992
  "model": "elevenlabs/scribe_v1",
993
  "model_type": "speech-to-text",
 
1007
  "SG": 9314
1008
  },
1009
  "language_family": "Indo-European",
1010
+ "mt_bleu": 0.30995627789125646,
1011
+ "mt_chrf": 0.4604060151149363,
1012
+ "cls_acc": 0.4666666666666667,
1013
+ "mlm_chrf": 0.8948201155364387,
1014
  "asr_wer": 0.2622994225519276,
1015
  "asr_chrf": 0.7896064299629604,
1016
+ "t2t_score": 0.6072975991060139,
1017
  "s2t_score": 0.525952926257444
1018
  },
1019
  {
1020
  "language_name": "Russian",
1021
  "bcp_47": "ru",
1022
  "speakers": 195841151,
1023
+ "scores": [
1024
+ {
1025
+ "model": "meta-llama/llama-3.3-70b-instruct",
1026
+ "model_type": "text-to-text",
1027
+ "mt_bleu": 0.21923866610511508,
1028
+ "mt_chrf": 0.4198761129214811,
1029
+ "cls_acc": 0.6333333333333333,
1030
+ "mlm_chrf": 0.952484813474142,
1031
+ "t2t_score": 0.6685647532429854
1032
+ }
1033
+ ],
1034
  "commonvoice_hours": 242.0,
1035
  "commonvoice_locale": "ru",
1036
  "population": {
 
1059
  "UZ": 4279156
1060
  },
1061
  "language_family": "Indo-European",
1062
+ "mt_bleu": 0.21923866610511508,
1063
+ "mt_chrf": 0.4198761129214811,
1064
+ "cls_acc": 0.6333333333333333,
1065
+ "mlm_chrf": 0.952484813474142,
1066
  "asr_wer": null,
1067
  "asr_chrf": null,
1068
+ "t2t_score": 0.6685647532429854,
1069
  "s2t_score": null
1070
  },
1071
  {
1072
  "language_name": "Swahili",
1073
  "bcp_47": "sw",
1074
  "speakers": 171610296,
1075
+ "scores": [
1076
+ {
1077
+ "model": "meta-llama/llama-3.3-70b-instruct",
1078
+ "model_type": "text-to-text",
1079
+ "mt_bleu": 0.2687578645492076,
1080
+ "mt_chrf": 0.43597304968281303,
1081
+ "cls_acc": 0.6,
1082
+ "mlm_chrf": 0.9381149118648788,
1083
+ "t2t_score": 0.6580293205158972
1084
+ }
1085
+ ],
1086
  "commonvoice_hours": 411.0,
1087
  "commonvoice_locale": "sw",
1088
  "population": {
 
1097
  "ZA": 1016
1098
  },
1099
  "language_family": "Atlantic-Congo",
1100
+ "mt_bleu": 0.2687578645492076,
1101
+ "mt_chrf": 0.43597304968281303,
1102
+ "cls_acc": 0.6,
1103
+ "mlm_chrf": 0.9381149118648788,
1104
  "asr_wer": null,
1105
  "asr_chrf": null,
1106
+ "t2t_score": 0.6580293205158972,
1107
  "s2t_score": null
1108
  },
1109
  {
1110
  "language_name": "Indonesian",
1111
  "bcp_47": "id",
1112
  "speakers": 171207687,
1113
+ "scores": [
1114
+ {
1115
+ "model": "meta-llama/llama-3.3-70b-instruct",
1116
+ "model_type": "text-to-text",
1117
+ "mt_bleu": 0.2848563065102788,
1118
+ "mt_chrf": 0.45614479099339716,
1119
+ "cls_acc": 0.6,
1120
+ "mlm_chrf": 0.9430041038670292,
1121
+ "t2t_score": 0.6663829649534755
1122
+ }
1123
+ ],
1124
  "commonvoice_hours": 33.0,
1125
  "commonvoice_locale": "id",
1126
  "population": {
 
1128
  "NL": 311047
1129
  },
1130
  "language_family": "Austronesian",
1131
+ "mt_bleu": 0.2848563065102788,
1132
+ "mt_chrf": 0.45614479099339716,
1133
+ "cls_acc": 0.6,
1134
+ "mlm_chrf": 0.9430041038670292,
1135
  "asr_wer": null,
1136
  "asr_chrf": null,
1137
+ "t2t_score": 0.6663829649534755,
1138
  "s2t_score": null
1139
  },
1140
  {
1141
  "language_name": "German",
1142
  "bcp_47": "de",
1143
  "speakers": 136350226,
1144
+ "scores": [
1145
+ {
1146
+ "model": "meta-llama/llama-3.3-70b-instruct",
1147
+ "model_type": "text-to-text",
1148
+ "mt_bleu": 0.31823711841849867,
1149
+ "mt_chrf": 0.4917464754926922,
1150
+ "cls_acc": 0.6666666666666666,
1151
+ "mlm_chrf": 0.9691031216506188,
1152
+ "t2t_score": 0.7091720879366593
1153
+ }
1154
+ ],
1155
  "commonvoice_hours": 1359.0,
1156
  "commonvoice_locale": "de",
1157
  "population": {
 
1184
  "US": 1563403
1185
  },
1186
  "language_family": "Indo-European",
1187
+ "mt_bleu": 0.31823711841849867,
1188
+ "mt_chrf": 0.4917464754926922,
1189
+ "cls_acc": 0.6666666666666666,
1190
+ "mlm_chrf": 0.9691031216506188,
1191
  "asr_wer": null,
1192
  "asr_chrf": null,
1193
+ "t2t_score": 0.7091720879366593,
1194
  "s2t_score": null
1195
  },
1196
  {
1197
  "language_name": "Japanese",
1198
  "bcp_47": "ja",
1199
  "speakers": 119729026,
1200
+ "scores": [
1201
+ {
1202
+ "model": "meta-llama/llama-3.3-70b-instruct",
1203
+ "model_type": "text-to-text",
1204
+ "mt_bleu": 0.2683560704622462,
1205
+ "mt_chrf": 0.4411714629040184,
1206
+ "cls_acc": 0.5333333333333333,
1207
+ "mlm_chrf": 0.9391550198474721,
1208
+ "t2t_score": 0.6378866053616079
1209
+ }
1210
+ ],
1211
  "commonvoice_hours": 222.0,
1212
  "commonvoice_locale": "ja",
1213
  "population": {
 
1216
  "JP": 119231650
1217
  },
1218
  "language_family": "Japonic",
1219
+ "mt_bleu": 0.2683560704622462,
1220
+ "mt_chrf": 0.4411714629040184,
1221
+ "cls_acc": 0.5333333333333333,
1222
+ "mlm_chrf": 0.9391550198474721,
1223
  "asr_wer": null,
1224
  "asr_chrf": null,
1225
+ "t2t_score": 0.6378866053616079,
1226
  "s2t_score": null
1227
  },
1228
  {
1229
  "language_name": "Telugu",
1230
  "bcp_47": "te",
1231
  "speakers": 95478480,
1232
+ "scores": [
1233
+ {
1234
+ "model": "meta-llama/llama-3.3-70b-instruct",
1235
+ "model_type": "text-to-text",
1236
+ "mt_bleu": 0.26607890997092576,
1237
+ "mt_chrf": 0.4454927673606575,
1238
+ "cls_acc": 0.36666666666666664,
1239
+ "mlm_chrf": 0.9537042790563578,
1240
+ "t2t_score": 0.5886212376945607
1241
+ }
1242
+ ],
1243
  "commonvoice_hours": 0.3,
1244
  "commonvoice_locale": "te",
1245
  "population": {
1246
  "IN": 95478480
1247
  },
1248
  "language_family": "Dravidian",
1249
+ "mt_bleu": 0.26607890997092576,
1250
+ "mt_chrf": 0.4454927673606575,
1251
+ "cls_acc": 0.36666666666666664,
1252
+ "mlm_chrf": 0.9537042790563578,
1253
  "asr_wer": null,
1254
  "asr_chrf": null,
1255
+ "t2t_score": 0.5886212376945607,
1256
  "s2t_score": null
1257
  },
1258
  {
 
1280
  "language_name": "Marathi",
1281
  "bcp_47": "mr",
1282
  "speakers": 92826300,
1283
+ "scores": [
1284
+ {
1285
+ "model": "meta-llama/llama-3.3-70b-instruct",
1286
+ "model_type": "text-to-text",
1287
+ "mt_bleu": 0.22832077978859452,
1288
+ "mt_chrf": 0.42452032963429065,
1289
+ "cls_acc": 0.6333333333333333,
1290
+ "mlm_chrf": 0.9408962186478436,
1291
+ "t2t_score": 0.6662499605384892
1292
+ }
1293
+ ],
1294
  "commonvoice_hours": 20.0,
1295
  "commonvoice_locale": "mr",
1296
  "population": {
1297
  "IN": 92826300
1298
  },
1299
  "language_family": "Indo-European",
1300
+ "mt_bleu": 0.22832077978859452,
1301
+ "mt_chrf": 0.42452032963429065,
1302
+ "cls_acc": 0.6333333333333333,
1303
+ "mlm_chrf": 0.9408962186478436,
1304
  "asr_wer": null,
1305
  "asr_chrf": null,
1306
+ "t2t_score": 0.6662499605384892,
1307
  "s2t_score": null
1308
  },
1309
  {
1310
  "language_name": "Javanese",
1311
  "bcp_47": "jv",
1312
  "speakers": 91180665,
1313
+ "scores": [
1314
+ {
1315
+ "model": "meta-llama/llama-3.3-70b-instruct",
1316
+ "model_type": "text-to-text",
1317
+ "mt_bleu": 0.25461973194021953,
1318
+ "mt_chrf": 0.42962098287756895,
1319
+ "cls_acc": 0.5666666666666667,
1320
+ "mlm_chrf": 0.9314748203362577,
1321
+ "t2t_score": 0.6425874899601645
1322
+ }
1323
+ ],
1324
  "commonvoice_hours": 0.0,
1325
  "commonvoice_locale": "jv",
1326
  "population": {
 
1328
  "MY": 391825
1329
  },
1330
  "language_family": "Austronesian",
1331
+ "mt_bleu": 0.25461973194021953,
1332
+ "mt_chrf": 0.42962098287756895,
1333
+ "cls_acc": 0.5666666666666667,
1334
+ "mlm_chrf": 0.9314748203362577,
1335
  "asr_wer": null,
1336
  "asr_chrf": null,
1337
+ "t2t_score": 0.6425874899601645,
1338
  "s2t_score": null
1339
  },
1340
  {
1341
  "language_name": "Vietnamese",
1342
  "bcp_47": "vi",
1343
  "speakers": 86222962,
1344
+ "scores": [
1345
+ {
1346
+ "model": "meta-llama/llama-3.3-70b-instruct",
1347
+ "model_type": "text-to-text",
1348
+ "mt_bleu": 0.23107984716515417,
1349
+ "mt_chrf": 0.4169594776564998,
1350
+ "cls_acc": 0.4666666666666667,
1351
+ "mlm_chrf": 0.9518257555072703,
1352
+ "t2t_score": 0.6118172999434789
1353
+ }
1354
+ ],
1355
  "commonvoice_hours": 5.9,
1356
  "commonvoice_locale": "vi",
1357
  "population": {
 
1361
  "VN": 84900318
1362
  },
1363
  "language_family": "Austroasiatic",
1364
+ "mt_bleu": 0.23107984716515417,
1365
+ "mt_chrf": 0.4169594776564998,
1366
+ "cls_acc": 0.4666666666666667,
1367
+ "mlm_chrf": 0.9518257555072703,
1368
  "asr_wer": null,
1369
  "asr_chrf": null,
1370
+ "t2t_score": 0.6118172999434789,
1371
  "s2t_score": null
1372
  },
1373
  {
1374
  "language_name": "Tamil",
1375
  "bcp_47": "ta",
1376
  "speakers": 85616159,
1377
+ "scores": [
1378
+ {
1379
+ "model": "meta-llama/llama-3.3-70b-instruct",
1380
+ "model_type": "text-to-text",
1381
+ "mt_bleu": 0.2275590311337094,
1382
+ "mt_chrf": 0.39686916122496285,
1383
+ "cls_acc": 0.43333333333333335,
1384
+ "mlm_chrf": 0.9472514842672666,
1385
+ "t2t_score": 0.5924846596085209
1386
+ }
1387
+ ],
1388
  "commonvoice_hours": 234.0,
1389
  "commonvoice_locale": "ta",
1390
  "population": {
 
1398
  "SG": 130403
1399
  },
1400
  "language_family": "Dravidian",
1401
+ "mt_bleu": 0.2275590311337094,
1402
+ "mt_chrf": 0.39686916122496285,
1403
+ "cls_acc": 0.43333333333333335,
1404
+ "mlm_chrf": 0.9472514842672666,
1405
  "asr_wer": null,
1406
  "asr_chrf": null,
1407
+ "t2t_score": 0.5924846596085209,
1408
  "s2t_score": null
1409
  },
1410
  {
1411
  "language_name": "Persian",
1412
  "bcp_47": "fa",
1413
  "speakers": 84710459,
1414
+ "scores": [
1415
+ {
1416
+ "model": "meta-llama/llama-3.3-70b-instruct",
1417
+ "model_type": "text-to-text",
1418
+ "mt_bleu": 0.2516753344674677,
1419
+ "mt_chrf": 0.4448545956789697,
1420
+ "cls_acc": 0.4666666666666667,
1421
+ "mlm_chrf": 0.9507199271394501,
1422
+ "t2t_score": 0.6207470631616955
1423
+ }
1424
+ ],
1425
  "commonvoice_hours": 370.0,
1426
  "commonvoice_locale": "fa",
1427
  "population": {
 
1436
  "TJ": 69215
1437
  },
1438
  "language_family": "Indo-European",
1439
+ "mt_bleu": 0.2516753344674677,
1440
+ "mt_chrf": 0.4448545956789697,
1441
+ "cls_acc": 0.4666666666666667,
1442
+ "mlm_chrf": 0.9507199271394501,
1443
  "asr_wer": null,
1444
  "asr_chrf": null,
1445
+ "t2t_score": 0.6207470631616955,
1446
  "s2t_score": null
1447
  },
1448
  {
 
1469
  "language_name": "Turkish",
1470
  "bcp_47": "tr",
1471
  "speakers": 80360704,
1472
+ "scores": [
1473
+ {
1474
+ "model": "meta-llama/llama-3.3-70b-instruct",
1475
+ "model_type": "text-to-text",
1476
+ "mt_bleu": 0.28856187360515456,
1477
+ "mt_chrf": 0.44838344659789414,
1478
+ "cls_acc": 0.5333333333333333,
1479
+ "mlm_chrf": 0.942880312425148,
1480
+ "t2t_score": 0.6415323641187918
1481
+ }
1482
+ ],
1483
  "commonvoice_hours": 127.0,
1484
  "commonvoice_locale": "tr",
1485
  "population": {
 
1497
  "UZ": 232297
1498
  },
1499
  "language_family": "Turkic",
1500
+ "mt_bleu": 0.28856187360515456,
1501
+ "mt_chrf": 0.44838344659789414,
1502
+ "cls_acc": 0.5333333333333333,
1503
+ "mlm_chrf": 0.942880312425148,
1504
  "asr_wer": null,
1505
  "asr_chrf": null,
1506
+ "t2t_score": 0.6415323641187918,
1507
  "s2t_score": null
1508
  },
1509
  {
1510
  "language_name": "Cantonese",
1511
  "bcp_47": "yue",
1512
  "speakers": 79654759,
1513
+ "scores": [
1514
+ {
1515
+ "model": "meta-llama/llama-3.3-70b-instruct",
1516
+ "model_type": "text-to-text",
1517
+ "mt_bleu": 0.2429350313522061,
1518
+ "mt_chrf": 0.43687518387422897,
1519
+ "cls_acc": 0.6,
1520
+ "mlm_chrf": 0.908223660437837,
1521
+ "t2t_score": 0.6483662814373553
1522
+ }
1523
+ ],
1524
  "commonvoice_hours": 203.0,
1525
  "commonvoice_locale": "yue",
1526
  "population": {
 
1529
  "HK": 6524919
1530
  },
1531
  "language_family": "Sino-Tibetan",
1532
+ "mt_bleu": 0.2429350313522061,
1533
+ "mt_chrf": 0.43687518387422897,
1534
+ "cls_acc": 0.6,
1535
+ "mlm_chrf": 0.908223660437837,
1536
  "asr_wer": null,
1537
  "asr_chrf": null,
1538
+ "t2t_score": 0.6483662814373553,
1539
  "s2t_score": null
1540
  },
1541
  {
1542
  "language_name": "Korean",
1543
  "bcp_47": "ko",
1544
  "speakers": 78357046,
1545
+ "scores": [
1546
+ {
1547
+ "model": "meta-llama/llama-3.3-70b-instruct",
1548
+ "model_type": "text-to-text",
1549
+ "mt_bleu": 0.21969579072372622,
1550
+ "mt_chrf": 0.4134343535369621,
1551
+ "cls_acc": 0.5666666666666667,
1552
+ "mlm_chrf": 0.9227146395635537,
1553
+ "t2t_score": 0.6342718865890609
1554
+ }
1555
+ ],
1556
  "commonvoice_hours": 1.7,
1557
  "commonvoice_locale": "ko",
1558
  "population": {
 
1565
  "US": 997917
1566
  },
1567
  "language_family": "Koreanic",
1568
+ "mt_bleu": 0.21969579072372622,
1569
+ "mt_chrf": 0.4134343535369621,
1570
+ "cls_acc": 0.5666666666666667,
1571
+ "mlm_chrf": 0.9227146395635537,
1572
  "asr_wer": null,
1573
  "asr_chrf": null,
1574
+ "t2t_score": 0.6342718865890609,
1575
  "s2t_score": null
1576
  },
1577
  {
1578
  "language_name": "Italian",
1579
  "bcp_47": "it",
1580
  "speakers": 70247060,
1581
+ "scores": [
1582
+ {
1583
+ "model": "meta-llama/llama-3.3-70b-instruct",
1584
+ "model_type": "text-to-text",
1585
+ "mt_bleu": 0.2774810104440749,
1586
+ "mt_chrf": 0.46396483435604213,
1587
+ "cls_acc": 0.6333333333333333,
1588
+ "mlm_chrf": 0.9708848195292886,
1589
+ "t2t_score": 0.689394329072888
1590
+ }
1591
+ ],
1592
  "commonvoice_hours": 362.0,
1593
  "commonvoice_locale": "it",
1594
  "population": {
 
1609
  "VA": 820
1610
  },
1611
  "language_family": "Indo-European",
1612
+ "mt_bleu": 0.2774810104440749,
1613
+ "mt_chrf": 0.46396483435604213,
1614
+ "cls_acc": 0.6333333333333333,
1615
+ "mlm_chrf": 0.9708848195292886,
1616
  "asr_wer": null,
1617
  "asr_chrf": null,
1618
+ "t2t_score": 0.689394329072888,
1619
  "s2t_score": null
1620
  },
1621
  {
1622
  "language_name": "Filipino",
1623
  "bcp_47": "fil",
1624
  "speakers": 67471096,
1625
+ "scores": [
1626
+ {
1627
+ "model": "meta-llama/llama-3.3-70b-instruct",
1628
+ "model_type": "text-to-text",
1629
+ "mt_bleu": 0.3248927726984041,
1630
+ "mt_chrf": 0.4689020729383555,
1631
+ "cls_acc": 0.4666666666666667,
1632
+ "mlm_chrf": 0.954910715229284,
1633
+ "t2t_score": 0.6301598182781021
1634
+ }
1635
+ ],
1636
  "commonvoice_hours": 0.0,
1637
  "commonvoice_locale": "tl",
1638
  "population": {
 
1641
  "US": 1397084
1642
  },
1643
  "language_family": "Austronesian",
1644
+ "mt_bleu": 0.3248927726984041,
1645
+ "mt_chrf": 0.4689020729383555,
1646
+ "cls_acc": 0.4666666666666667,
1647
+ "mlm_chrf": 0.954910715229284,
1648
  "asr_wer": null,
1649
  "asr_chrf": null,
1650
+ "t2t_score": 0.6301598182781021,
1651
  "s2t_score": null
1652
  },
1653
  {
1654
  "language_name": "Egyptian Arabic",
1655
  "bcp_47": "arz",
1656
  "speakers": 66639360,
1657
+ "scores": [
1658
+ {
1659
+ "model": "meta-llama/llama-3.3-70b-instruct",
1660
+ "model_type": "text-to-text",
1661
+ "mt_bleu": 0.20127691717583832,
1662
+ "mt_chrf": 0.3878019842631749,
1663
+ "cls_acc": 0.5666666666666667,
1664
+ "mlm_chrf": 0.9209179960794297,
1665
+ "t2t_score": 0.6251288823364237
1666
+ }
1667
+ ],
1668
  "commonvoice_hours": null,
1669
  "commonvoice_locale": null,
1670
  "population": {
1671
  "EG": 66639360
1672
  },
1673
  "language_family": "Afro-Asiatic",
1674
+ "mt_bleu": 0.20127691717583832,
1675
+ "mt_chrf": 0.3878019842631749,
1676
+ "cls_acc": 0.5666666666666667,
1677
+ "mlm_chrf": 0.9209179960794297,
1678
  "asr_wer": null,
1679
  "asr_chrf": null,
1680
+ "t2t_score": 0.6251288823364237,
1681
  "s2t_score": null
1682
  },
1683
  {
1684
  "language_name": "Gujarati",
1685
  "bcp_47": "gu",
1686
  "speakers": 61721799,
1687
+ "scores": [
1688
+ {
1689
+ "model": "meta-llama/llama-3.3-70b-instruct",
1690
+ "model_type": "text-to-text",
1691
+ "mt_bleu": 0.24812610549809738,
1692
+ "mt_chrf": 0.4318359636701651,
1693
+ "cls_acc": 0.4666666666666667,
1694
+ "mlm_chrf": 0.9077297218306815,
1695
+ "t2t_score": 0.6020774507225044
1696
+ }
1697
+ ],
1698
  "commonvoice_hours": 0.0,
1699
  "commonvoice_locale": "gu-IN",
1700
  "population": {
 
1704
  "KE": 4978
1705
  },
1706
  "language_family": "Indo-European",
1707
+ "mt_bleu": 0.24812610549809738,
1708
+ "mt_chrf": 0.4318359636701651,
1709
+ "cls_acc": 0.4666666666666667,
1710
+ "mlm_chrf": 0.9077297218306815,
1711
  "asr_wer": null,
1712
  "asr_chrf": null,
1713
+ "t2t_score": 0.6020774507225044,
1714
  "s2t_score": null
1715
  },
1716
  {
1717
  "language_name": "Thai",
1718
  "bcp_47": "th",
1719
  "speakers": 55181920,
1720
+ "scores": [
1721
+ {
1722
+ "model": "meta-llama/llama-3.3-70b-instruct",
1723
+ "model_type": "text-to-text",
1724
+ "mt_bleu": 0.2267380896222089,
1725
+ "mt_chrf": 0.4196149454731818,
1726
+ "cls_acc": 0.5333333333333333,
1727
+ "mlm_chrf": 0.9450374989027414,
1728
+ "t2t_score": 0.6326619259030855
1729
+ }
1730
+ ],
1731
  "commonvoice_hours": 172.0,
1732
  "commonvoice_locale": "th",
1733
  "population": {
1734
  "TH": 55181920
1735
  },
1736
  "language_family": "Tai-Kadai",
1737
+ "mt_bleu": 0.2267380896222089,
1738
+ "mt_chrf": 0.4196149454731818,
1739
+ "cls_acc": 0.5333333333333333,
1740
+ "mlm_chrf": 0.9450374989027414,
1741
  "asr_wer": null,
1742
  "asr_chrf": null,
1743
+ "t2t_score": 0.6326619259030855,
1744
  "s2t_score": null
1745
  },
1746
  {
 
1770
  "language_name": "Kannada",
1771
  "bcp_47": "kn",
1772
  "speakers": 49065330,
1773
+ "scores": [
1774
+ {
1775
+ "model": "meta-llama/llama-3.3-70b-instruct",
1776
+ "model_type": "text-to-text",
1777
+ "mt_bleu": 0.25507181761037034,
1778
+ "mt_chrf": 0.4455588394992276,
1779
+ "cls_acc": 0.4,
1780
+ "mlm_chrf": 0.9511289423946643,
1781
+ "t2t_score": 0.5988959272979639
1782
+ }
1783
+ ],
1784
  "commonvoice_hours": 0.0,
1785
  "commonvoice_locale": "kn",
1786
  "population": {
1787
  "IN": 49065330
1788
  },
1789
  "language_family": "Dravidian",
1790
+ "mt_bleu": 0.25507181761037034,
1791
+ "mt_chrf": 0.4455588394992276,
1792
+ "cls_acc": 0.4,
1793
+ "mlm_chrf": 0.9511289423946643,
1794
  "asr_wer": null,
1795
  "asr_chrf": null,
1796
+ "t2t_score": 0.5988959272979639,
1797
  "s2t_score": null
1798
  },
1799
  {
 
1820
  "language_name": "Malayalam",
1821
  "bcp_47": "ml",
1822
  "speakers": 43257484,
1823
+ "scores": [
1824
+ {
1825
+ "model": "meta-llama/llama-3.3-70b-instruct",
1826
+ "model_type": "text-to-text",
1827
+ "mt_bleu": 0.2011475124105192,
1828
+ "mt_chrf": 0.39404171184956394,
1829
+ "cls_acc": 0.43333333333333335,
1830
+ "mlm_chrf": 0.9231097391098181,
1831
+ "t2t_score": 0.5834949280975718
1832
+ }
1833
+ ],
1834
  "commonvoice_hours": 2.8,
1835
  "commonvoice_locale": "ml",
1836
  "population": {
 
1843
  "SG": 9935
1844
  },
1845
  "language_family": "Dravidian",
1846
+ "mt_bleu": 0.2011475124105192,
1847
+ "mt_chrf": 0.39404171184956394,
1848
+ "cls_acc": 0.43333333333333335,
1849
+ "mlm_chrf": 0.9231097391098181,
1850
  "asr_wer": null,
1851
  "asr_chrf": null,
1852
+ "t2t_score": 0.5834949280975718,
1853
  "s2t_score": null
1854
  },
1855
  {
1856
  "language_name": "Odia",
1857
  "bcp_47": "or",
1858
  "speakers": 42434880,
1859
+ "scores": [
1860
+ {
1861
+ "model": "meta-llama/llama-3.3-70b-instruct",
1862
+ "model_type": "text-to-text",
1863
+ "mt_bleu": 0.2521285657827072,
1864
+ "mt_chrf": 0.4422326291663303,
1865
+ "cls_acc": 0.4666666666666667,
1866
+ "mlm_chrf": 0.9217590917135282,
1867
+ "t2t_score": 0.6102194625155084
1868
+ }
1869
+ ],
1870
  "commonvoice_hours": 2.8,
1871
  "commonvoice_locale": "or",
1872
  "population": {
1873
  "IN": 42434880
1874
  },
1875
  "language_family": "Indo-European",
1876
+ "mt_bleu": 0.2521285657827072,
1877
+ "mt_chrf": 0.4422326291663303,
1878
+ "cls_acc": 0.4666666666666667,
1879
+ "mlm_chrf": 0.9217590917135282,
1880
  "asr_wer": null,
1881
  "asr_chrf": null,
1882
+ "t2t_score": 0.6102194625155084,
1883
  "s2t_score": null
1884
  },
1885
  {
1886
  "language_name": "Polish",
1887
  "bcp_47": "pl",
1888
  "speakers": 41077399,
1889
+ "scores": [
1890
+ {
1891
+ "model": "meta-llama/llama-3.3-70b-instruct",
1892
+ "model_type": "text-to-text",
1893
+ "mt_bleu": 0.26987204535648013,
1894
+ "mt_chrf": 0.4562492816384855,
1895
+ "cls_acc": 0.6,
1896
+ "mlm_chrf": 0.9584939056009891,
1897
+ "t2t_score": 0.6715810624131583
1898
+ }
1899
+ ],
1900
  "commonvoice_hours": 174.0,
1901
  "commonvoice_locale": "pl",
1902
  "population": {
 
1911
  "UA": 1054150
1912
  },
1913
  "language_family": "Indo-European",
1914
+ "mt_bleu": 0.26987204535648013,
1915
+ "mt_chrf": 0.4562492816384855,
1916
+ "cls_acc": 0.6,
1917
+ "mlm_chrf": 0.9584939056009891,
1918
  "asr_wer": null,
1919
  "asr_chrf": null,
1920
+ "t2t_score": 0.6715810624131583,
1921
  "s2t_score": null
1922
  },
1923
  {
 
1944
  "language_name": "Hausa",
1945
  "bcp_47": "ha",
1946
  "speakers": 40411882,
1947
+ "scores": [
1948
+ {
1949
+ "model": "meta-llama/llama-3.3-70b-instruct",
1950
+ "model_type": "text-to-text",
1951
+ "mt_bleu": 0.14767816277169446,
1952
+ "mt_chrf": 0.3435397334736881,
1953
+ "cls_acc": 0.6,
1954
+ "mlm_chrf": 0.9296807495100402,
1955
+ "t2t_score": 0.6244068276612428
1956
+ }
1957
+ ],
1958
  "commonvoice_hours": 4.1,
1959
  "commonvoice_locale": "ha",
1960
  "population": {
 
1965
  "SD": 820109
1966
  },
1967
  "language_family": "Afro-Asiatic",
1968
+ "mt_bleu": 0.14767816277169446,
1969
+ "mt_chrf": 0.3435397334736881,
1970
+ "cls_acc": 0.6,
1971
+ "mlm_chrf": 0.9296807495100402,
1972
  "asr_wer": null,
1973
  "asr_chrf": null,
1974
+ "t2t_score": 0.6244068276612428,
1975
  "s2t_score": null
1976
  },
1977
  {
1978
  "language_name": "Sindhi",
1979
  "bcp_47": "sd",
1980
  "speakers": 40329510,
1981
+ "scores": [
1982
+ {
1983
+ "model": "meta-llama/llama-3.3-70b-instruct",
1984
+ "model_type": "text-to-text",
1985
+ "mt_bleu": 0.21679684560539594,
1986
+ "mt_chrf": 0.4130326388570074,
1987
+ "cls_acc": 0.3333333333333333,
1988
+ "mlm_chrf": 0.9140884463880482,
1989
+ "t2t_score": 0.5534848061927963
1990
+ }
1991
+ ],
1992
  "commonvoice_hours": 0.4,
1993
  "commonvoice_locale": "sd",
1994
  "population": {
 
1996
  "PK": 35025150
1997
  },
1998
  "language_family": "Indo-European",
1999
+ "mt_bleu": 0.21679684560539594,
2000
+ "mt_chrf": 0.4130326388570074,
2001
+ "cls_acc": 0.3333333333333333,
2002
+ "mlm_chrf": 0.9140884463880482,
2003
  "asr_wer": null,
2004
  "asr_chrf": null,
2005
+ "t2t_score": 0.5534848061927963,
2006
  "s2t_score": null
2007
  },
2008
  {
 
2034
  "language_name": "Malay",
2035
  "bcp_47": "ms",
2036
  "speakers": 38097307,
2037
+ "scores": [
2038
+ {
2039
+ "model": "meta-llama/llama-3.3-70b-instruct",
2040
+ "model_type": "text-to-text",
2041
+ "mt_bleu": 0.2640387431669489,
2042
+ "mt_chrf": 0.45903065670305865,
2043
+ "cls_acc": 0.6,
2044
+ "mlm_chrf": 0.953241903654886,
2045
+ "t2t_score": 0.6707575201193148
2046
+ }
2047
+ ],
2048
  "commonvoice_hours": 0.0,
2049
  "commonvoice_locale": "ms",
2050
  "population": {
 
2055
  "SG": 869352
2056
  },
2057
  "language_family": "Austronesian",
2058
+ "mt_bleu": 0.2640387431669489,
2059
+ "mt_chrf": 0.45903065670305865,
2060
+ "cls_acc": 0.6,
2061
+ "mlm_chrf": 0.953241903654886,
2062
  "asr_wer": null,
2063
  "asr_chrf": null,
2064
+ "t2t_score": 0.6707575201193148,
2065
  "s2t_score": null
2066
  },
2067
  {
2068
  "language_name": "Burmese",
2069
  "bcp_47": "my",
2070
  "speakers": 36559231,
2071
+ "scores": [
2072
+ {
2073
+ "model": "meta-llama/llama-3.3-70b-instruct",
2074
+ "model_type": "text-to-text",
2075
+ "mt_bleu": 0.18617334539824332,
2076
+ "mt_chrf": 0.37050602802081317,
2077
+ "cls_acc": 0.4,
2078
+ "mlm_chrf": 0.9435884643372262,
2079
+ "t2t_score": 0.5713648307860132
2080
+ }
2081
+ ],
2082
  "commonvoice_hours": 0.0,
2083
  "commonvoice_locale": "my",
2084
  "population": {
 
2086
  "MM": 36217664
2087
  },
2088
  "language_family": "Sino-Tibetan",
2089
+ "mt_bleu": 0.18617334539824332,
2090
+ "mt_chrf": 0.37050602802081317,
2091
+ "cls_acc": 0.4,
2092
+ "mlm_chrf": 0.9435884643372262,
2093
  "asr_wer": null,
2094
  "asr_chrf": null,
2095
+ "t2t_score": 0.5713648307860132,
2096
  "s2t_score": null
2097
  },
2098
  {
2099
  "language_name": "Amharic",
2100
  "bcp_47": "am",
2101
  "speakers": 35728475,
2102
+ "scores": [
2103
+ {
2104
+ "model": "meta-llama/llama-3.3-70b-instruct",
2105
+ "model_type": "text-to-text",
2106
+ "mt_bleu": 0.15002522598066087,
2107
+ "mt_chrf": 0.3344555209113584,
2108
+ "cls_acc": 0.36666666666666664,
2109
+ "mlm_chrf": 0.9209607335412187,
2110
+ "t2t_score": 0.540694307039748
2111
+ }
2112
+ ],
2113
  "commonvoice_hours": 1.8,
2114
  "commonvoice_locale": "am",
2115
  "population": {
 
2117
  "IL": 51185
2118
  },
2119
  "language_family": "Afro-Asiatic",
2120
+ "mt_bleu": 0.15002522598066087,
2121
+ "mt_chrf": 0.3344555209113584,
2122
+ "cls_acc": 0.36666666666666664,
2123
+ "mlm_chrf": 0.9209607335412187,
2124
  "asr_wer": null,
2125
  "asr_chrf": null,
2126
+ "t2t_score": 0.540694307039748,
2127
  "s2t_score": null
2128
  },
2129
  {
 
2150
  "language_name": "Oromo",
2151
  "bcp_47": "om",
2152
  "speakers": 34897121,
2153
+ "scores": [
2154
+ {
2155
+ "model": "meta-llama/llama-3.3-70b-instruct",
2156
+ "model_type": "text-to-text",
2157
+ "mt_bleu": 0.06509147151730071,
2158
+ "mt_chrf": 0.22674886804446034,
2159
+ "cls_acc": 0.4666666666666667,
2160
+ "mlm_chrf": 0.935213305660654,
2161
+ "t2t_score": 0.542876280123927
2162
+ }
2163
+ ],
2164
  "commonvoice_hours": 0.0,
2165
  "commonvoice_locale": "om",
2166
  "population": {
 
2169
  "SO": 49380
2170
  },
2171
  "language_family": "Afro-Asiatic",
2172
+ "mt_bleu": 0.06509147151730071,
2173
+ "mt_chrf": 0.22674886804446034,
2174
+ "cls_acc": 0.4666666666666667,
2175
+ "mlm_chrf": 0.935213305660654,
2176
  "asr_wer": null,
2177
  "asr_chrf": null,
2178
+ "t2t_score": 0.542876280123927,
2179
  "s2t_score": null
2180
  },
2181
  {
2182
  "language_name": "Bhojpuri",
2183
  "bcp_47": "bho",
2184
  "speakers": 32934797,
2185
+ "scores": [
2186
+ {
2187
+ "model": "meta-llama/llama-3.3-70b-instruct",
2188
+ "model_type": "text-to-text",
2189
+ "mt_bleu": 0.24092898437545654,
2190
+ "mt_chrf": 0.41894143077328727,
2191
+ "cls_acc": 0.3333333333333333,
2192
+ "mlm_chrf": 0.9323077688040071,
2193
+ "t2t_score": 0.5615275109702093
2194
+ }
2195
+ ],
2196
  "commonvoice_hours": null,
2197
  "commonvoice_locale": null,
2198
  "population": {
 
2201
  "NP": 2062297
2202
  },
2203
  "language_family": "Indo-European",
2204
+ "mt_bleu": 0.24092898437545654,
2205
+ "mt_chrf": 0.41894143077328727,
2206
+ "cls_acc": 0.3333333333333333,
2207
+ "mlm_chrf": 0.9323077688040071,
2208
  "asr_wer": null,
2209
  "asr_chrf": null,
2210
+ "t2t_score": 0.5615275109702093,
2211
  "s2t_score": null
2212
  },
2213
  {
2214
  "language_name": "Uzbek",
2215
  "bcp_47": "uz",
2216
  "speakers": 32792780,
2217
+ "scores": [
2218
+ {
2219
+ "model": "meta-llama/llama-3.3-70b-instruct",
2220
+ "model_type": "text-to-text",
2221
+ "mt_bleu": 0.21766825893077735,
2222
+ "mt_chrf": 0.41950879947755915,
2223
+ "cls_acc": 0.5,
2224
+ "mlm_chrf": 0.9467806955511296,
2225
+ "t2t_score": 0.6220964983428963
2226
+ }
2227
+ ],
2228
  "commonvoice_hours": 100.0,
2229
  "commonvoice_locale": "uz",
2230
  "population": {
 
2235
  "UZ": 30565400
2236
  },
2237
  "language_family": "Turkic",
2238
+ "mt_bleu": 0.21766825893077735,
2239
+ "mt_chrf": 0.41950879947755915,
2240
+ "cls_acc": 0.5,
2241
+ "mlm_chrf": 0.9467806955511296,
2242
  "asr_wer": null,
2243
  "asr_chrf": null,
2244
+ "t2t_score": 0.6220964983428963,
2245
  "s2t_score": null
2246
  },
2247
  {
2248
  "language_name": "Azerbaijani",
2249
  "bcp_47": "az",
2250
  "speakers": 32446682,
2251
+ "scores": [
2252
+ {
2253
+ "model": "meta-llama/llama-3.3-70b-instruct",
2254
+ "model_type": "text-to-text",
2255
+ "mt_bleu": 0.1836488163977562,
2256
+ "mt_chrf": 0.37916693488563025,
2257
+ "cls_acc": 0.5333333333333333,
2258
+ "mlm_chrf": 0.9328142884587273,
2259
+ "t2t_score": 0.615104852225897
2260
+ }
2261
+ ],
2262
  "commonvoice_hours": 0.5,
2263
  "commonvoice_locale": "az",
2264
  "population": {
 
2270
  "TR": 1140044
2271
  },
2272
  "language_family": "Turkic",
2273
+ "mt_bleu": 0.1836488163977562,
2274
+ "mt_chrf": 0.37916693488563025,
2275
+ "cls_acc": 0.5333333333333333,
2276
+ "mlm_chrf": 0.9328142884587273,
2277
  "asr_wer": null,
2278
  "asr_chrf": null,
2279
+ "t2t_score": 0.615104852225897,
2280
  "s2t_score": null
2281
  },
2282
  {
 
3677
  "bcp_47": "ug",
3678
  "speakers": 8052967,
3679
  "scores": [],
3680
+ "commonvoice_hours": 364.0,
3681
  "commonvoice_locale": "ug",
3682
  "population": {
3683
  "AF": 3005,
 
4025
  "bcp_47": "sk",
4026
  "speakers": 6680269,
4027
  "scores": [],
4028
+ "commonvoice_hours": 45.0,
4029
  "commonvoice_locale": "sk",
4030
  "population": {
4031
  "CZ": 1712400,
 
7057
  "bcp_47": "ga",
7058
  "speakers": 1237487,
7059
  "scores": [],
7060
+ "commonvoice_hours": 5.9,
7061
  "commonvoice_locale": "ga-IE",
7062
  "population": {
7063
  "GB": 98642,
 
7379
  "bcp_47": "tig",
7380
  "speakers": 1094616,
7381
  "scores": [],
7382
+ "commonvoice_hours": 3.0,
7383
  "commonvoice_locale": "tig",
7384
  "population": {
7385
  "ER": 1094616
 
7440
  "bcp_47": "kbd",
7441
  "speakers": 1070873,
7442
  "scores": [],
7443
+ "commonvoice_hours": 16.0,
7444
  "commonvoice_locale": "kbd",
7445
  "population": {
7446
  "RU": 439338,
 
9222
  "bcp_47": "sah",
9223
  "speakers": 453510,
9224
  "scores": [],
9225
+ "commonvoice_hours": 8.8,
9226
  "commonvoice_locale": "sah",
9227
  "population": {
9228
  "RU": 453510
 
9323
  "bcp_47": "xmf",
9324
  "speakers": 439670,
9325
  "scores": [],
9326
+ "commonvoice_hours": 9.5,
9327
  "commonvoice_locale": "xmf",
9328
  "population": {
9329
  "GE": 439670
 
11632
  "bcp_47": "trw",
11633
  "speakers": 123756,
11634
  "scores": [],
11635
+ "commonvoice_hours": 19.0,
11636
  "commonvoice_locale": "trw",
11637
  "population": {
11638
  "PK": 123756
 
11912
  "bcp_47": "ab",
11913
  "speakers": 91953,
11914
  "scores": [],
11915
+ "commonvoice_hours": 67.0,
11916
  "commonvoice_locale": "ab",
11917
  "population": {
11918
  "GE": 87934,
 
13649
  "bcp_47": "ik",
13650
  "speakers": 7983,
13651
  "scores": [],
13652
+ "commonvoice_hours": 2.8,
13653
  "commonvoice_locale": "ipk",
13654
  "population": {
13655
  "US": 7983
 
13949
  "bcp_47": "trv",
13950
  "speakers": 4721,
13951
  "scores": [],
13952
+ "commonvoice_hours": 5.7,
13953
  "commonvoice_locale": "trv",
13954
  "population": {
13955
  "TW": 4721
 
14309
  "bcp_47": "gv",
14310
  "speakers": 1719,
14311
  "scores": [],
14312
+ "commonvoice_hours": 4.5,
14313
  "commonvoice_locale": "gv",
14314
  "population": {
14315
  "IM": 1719
 
14429
  "bcp_47": "sei",
14430
  "speakers": 901,
14431
  "scores": [],
14432
+ "commonvoice_hours": 1.2,
14433
  "commonvoice_locale": "sei",
14434
  "population": {
14435
  "MX": 901