David Pomerenke commited on
Commit
e223525
·
1 Parent(s): 3d9cde9

Better separation of ttt/stt in results format

Browse files
Files changed (2) hide show
  1. evals.py +73 -44
  2. results.json +63 -84
evals.py CHANGED
@@ -377,7 +377,9 @@ async def transcribe_elevenlabs(path, model):
377
  client = AsyncElevenLabs(api_key=getenv("ELEVENLABS_API_KEY"))
378
  async with elevenlabs_rate_limit:
379
  with open(path, "rb") as file:
380
- response = await client.speech_to_text.convert(model_id=modelname, file=file)
 
 
381
  return response.text
382
 
383
 
@@ -400,13 +402,26 @@ async def transcribe(path, model="elevenlabs/scribe_v1"):
400
  raise ValueError(f"Model {model} not supported")
401
 
402
 
 
403
  async def transcribe_and_evaluate(model, language_bcp_47, nr):
404
  language = languages[languages["bcp_47"] == language_bcp_47].iloc[0]
405
- fleurs = pd.read_csv(f"data/fleurs/{language.fleurs_tag}/dev.tsv", sep="\t", names=["id", "fname", "raw_transcription", "transcription", "words", "id2", "gender"])
 
 
 
 
 
 
 
 
 
 
 
 
406
  item = fleurs.iloc[nr]
407
  path = f"data/fleurs/{language.fleurs_tag}/audio/dev/{item.fname}"
408
  pred = await transcribe(path, model=model)
409
- score = wer.compute(predictions=[pred], references=[item.transcription])
410
  return {
411
  "model": model,
412
  "bcp_47": language["bcp_47"],
@@ -419,7 +434,7 @@ async def transcribe_and_evaluate(model, language_bcp_47, nr):
419
 
420
 
421
  def mean(lst):
422
- return sum(lst) / len(lst) if lst else 0
423
 
424
 
425
  async def main():
@@ -474,7 +489,7 @@ async def main():
474
  all_results = []
475
  for language in languages.itertuples():
476
  results = []
477
- for model in models + transcription_models:
478
  scores_mt = [
479
  score
480
  for score in translation_scores
@@ -490,54 +505,68 @@ async def main():
490
  for score in mlm_scores
491
  if score["bcp_47"] == language.bcp_47 and score["model"] == model
492
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  scores_asr = [
494
  score
495
  for score in transcription_scores
496
  if score["bcp_47"] == language.bcp_47 and score["model"] == model
497
  ]
498
- mt_bleu = mean([s["mt_bleu"] for s in scores_mt])
499
- mt_chrf = mean([s["mt_chrf"] for s in scores_mt])
500
- cls_acc = mean([s["true"] == s["pred"] for s in scores_cls])
501
- mlm_chrf = mean([s["mlm_chrf"] for s in scores_mlm])
502
  asr_wer = mean([s["asr_wer"] for s in scores_asr])
503
- overall_score = (mt_chrf / 100 + cls_acc + mlm_chrf / 100) / 3
504
- if scores_mt or scores_asr:
505
- results.append(
506
- {
507
- "model": model,
508
- "mt_bleu": mt_bleu,
509
- "mt_chrf": mt_chrf,
510
- "cls_acc": cls_acc,
511
- "mlm_chrf": mlm_chrf,
512
- "asr_wer": asr_wer,
513
- "overall_score": overall_score,
514
- }
515
- )
516
- if results:
517
- all_results.append(
518
  {
519
- "language_name": language.language_name,
520
- "bcp_47": language.bcp_47,
521
- "speakers": language.speakers,
522
- "scores": results,
523
- "mt_bleu": mean([s["mt_bleu"] for s in results]),
524
- "mt_chrf": mean([s["mt_chrf"] for s in results]),
525
- "cls_acc": mean([s["cls_acc"] for s in results]),
526
- "mlm_chrf": mean([s["mlm_chrf"] for s in results]),
527
- "asr_wer": mean([s["asr_wer"] for s in results]),
528
- "overall_score": mean([s["overall_score"] for s in results]),
529
- "commonvoice_hours": language.commonvoice_hours
530
- if not pd.isna(language.commonvoice_hours)
531
- else None,
532
- "commonvoice_locale": language.commonvoice_locale
533
- if not pd.isna(language.commonvoice_locale)
534
- else None,
535
- "population": population(language.bcp_47),
536
- "language_family": language_family(
537
- language.flores_path.split("_")[0]
538
- ),
539
  }
540
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
  with open("results.json", "w") as f:
542
  json.dump(all_results, f, indent=2, ensure_ascii=False)
543
 
 
377
  client = AsyncElevenLabs(api_key=getenv("ELEVENLABS_API_KEY"))
378
  async with elevenlabs_rate_limit:
379
  with open(path, "rb") as file:
380
+ response = await client.speech_to_text.convert(
381
+ model_id=modelname, file=file
382
+ )
383
  return response.text
384
 
385
 
 
402
  raise ValueError(f"Model {model} not supported")
403
 
404
 
405
+ @cache
406
  async def transcribe_and_evaluate(model, language_bcp_47, nr):
407
  language = languages[languages["bcp_47"] == language_bcp_47].iloc[0]
408
+ fleurs = pd.read_csv(
409
+ f"data/fleurs/{language.fleurs_tag}/dev.tsv",
410
+ sep="\t",
411
+ names=[
412
+ "id",
413
+ "fname",
414
+ "raw_transcription",
415
+ "transcription",
416
+ "words",
417
+ "id2",
418
+ "gender",
419
+ ],
420
+ )
421
  item = fleurs.iloc[nr]
422
  path = f"data/fleurs/{language.fleurs_tag}/audio/dev/{item.fname}"
423
  pred = await transcribe(path, model=model)
424
+ score = wer.compute(predictions=[pred], references=[item.transcription])
425
  return {
426
  "model": model,
427
  "bcp_47": language["bcp_47"],
 
434
 
435
 
436
  def mean(lst):
437
+ return sum(lst) / len(lst) if lst else None
438
 
439
 
440
  async def main():
 
489
  all_results = []
490
  for language in languages.itertuples():
491
  results = []
492
+ for model in models:
493
  scores_mt = [
494
  score
495
  for score in translation_scores
 
505
  for score in mlm_scores
506
  if score["bcp_47"] == language.bcp_47 and score["model"] == model
507
  ]
508
+ if not scores_mt:
509
+ continue
510
+ mt_bleu = mean([s["mt_bleu"] for s in scores_mt])
511
+ mt_chrf = mean([s["mt_chrf"] for s in scores_mt])
512
+ cls_acc = mean([s["true"] == s["pred"] for s in scores_cls])
513
+ mlm_chrf = mean([s["mlm_chrf"] for s in scores_mlm])
514
+ overall_score = (mt_chrf / 100 + cls_acc + mlm_chrf / 100) / 3
515
+ results.append(
516
+ {
517
+ "model": model,
518
+ "model_type": "text-to-text",
519
+ "mt_bleu": mt_bleu,
520
+ "mt_chrf": mt_chrf,
521
+ "cls_acc": cls_acc,
522
+ "mlm_chrf": mlm_chrf,
523
+ "overall_score": overall_score,
524
+ }
525
+ )
526
+ for model in transcription_models:
527
  scores_asr = [
528
  score
529
  for score in transcription_scores
530
  if score["bcp_47"] == language.bcp_47 and score["model"] == model
531
  ]
532
+ if not scores_asr:
533
+ continue
 
 
534
  asr_wer = mean([s["asr_wer"] for s in scores_asr])
535
+ results.append(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
  {
537
+ "model": model,
538
+ "model_type": "speech-to-text",
539
+ "asr_wer": asr_wer,
540
+ "overall_score": asr_wer,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
  }
542
  )
543
+ if results:
544
+ language_results = {
545
+ "language_name": language.language_name,
546
+ "bcp_47": language.bcp_47,
547
+ "speakers": language.speakers,
548
+ "scores": results,
549
+ "commonvoice_hours": language.commonvoice_hours
550
+ if not pd.isna(language.commonvoice_hours)
551
+ else None,
552
+ "commonvoice_locale": language.commonvoice_locale
553
+ if not pd.isna(language.commonvoice_locale)
554
+ else None,
555
+ "population": population(language.bcp_47),
556
+ "language_family": language_family(language.flores_path.split("_")[0]),
557
+ }
558
+ for score in [
559
+ "mt_bleu",
560
+ "mt_chrf",
561
+ "cls_acc",
562
+ "mlm_chrf",
563
+ "asr_wer",
564
+ "overall_score",
565
+ ]:
566
+ language_results[score] = mean(
567
+ [s[score] for s in results if score in s]
568
+ )
569
+ all_results.append(language_results)
570
  with open("results.json", "w") as f:
571
  json.dump(all_results, f, indent=2, ensure_ascii=False)
572
 
results.json CHANGED
@@ -6,74 +6,62 @@
6
  "scores": [
7
  {
8
  "model": "openai/gpt-4o-mini",
 
9
  "mt_bleu": 0.5245466124037277,
10
  "mt_chrf": 65.25187717981981,
11
  "cls_acc": 0.5666666666666667,
12
  "mlm_chrf": 97.84704595784264,
13
- "asr_wer": 0,
14
  "overall_score": 0.7325519660144305
15
  },
16
  {
17
  "model": "meta-llama/llama-3.3-70b-instruct",
 
18
  "mt_bleu": 0.48750797044187216,
19
  "mt_chrf": 63.24229348441665,
20
  "cls_acc": 0.6,
21
  "mlm_chrf": 93.62602669879945,
22
- "asr_wer": 0,
23
  "overall_score": 0.7228944006107203
24
  },
25
  {
26
  "model": "mistralai/mistral-small-24b-instruct-2501",
 
27
  "mt_bleu": 0.486501959595472,
28
  "mt_chrf": 63.8187259254881,
29
  "cls_acc": 0.5333333333333333,
30
  "mlm_chrf": 79.91140615317198,
31
- "asr_wer": 0,
32
  "overall_score": 0.656878218039978
33
  },
34
  {
35
  "model": "google/gemini-2.0-flash-001",
 
36
  "mt_bleu": 0.6060954569411976,
37
  "mt_chrf": 71.2288943066563,
38
  "cls_acc": 0.8666666666666667,
39
  "mlm_chrf": 98.79868693366329,
40
- "asr_wer": 0,
41
  "overall_score": 0.8556474930232877
42
  },
43
  {
44
  "model": "microsoft/phi-4",
 
45
  "mt_bleu": 0.5199836121545649,
46
  "mt_chrf": 66.05410510011644,
47
  "cls_acc": 0.7,
48
  "mlm_chrf": 97.2290729316734,
49
- "asr_wer": 0,
50
  "overall_score": 0.7776105934392995
51
  },
52
  {
53
  "model": "elevenlabs/scribe_v1",
54
- "mt_bleu": 0,
55
- "mt_chrf": 0,
56
- "cls_acc": 0,
57
- "mlm_chrf": 0,
58
- "asr_wer": 0.3384224589434353,
59
- "overall_score": 0.0
60
  },
61
  {
62
  "model": "openai/whisper-large-v3-turbo",
63
- "mt_bleu": 0,
64
- "mt_chrf": 0,
65
- "cls_acc": 0,
66
- "mlm_chrf": 0,
67
  "asr_wer": 0.2670470041301468,
68
- "overall_score": 0.0
69
  }
70
  ],
71
- "mt_bleu": 0.37494794450526203,
72
- "mt_chrf": 47.08512799949961,
73
- "cls_acc": 0.4666666666666667,
74
- "mlm_chrf": 66.77317695359297,
75
- "asr_wer": 0.08649563758194032,
76
- "overall_score": 0.5350832387325309,
77
  "commonvoice_hours": 2651.0,
78
  "commonvoice_locale": "en",
79
  "population": {
@@ -233,7 +221,13 @@
233
  "ZM": 2788256,
234
  "ZW": 6109446
235
  },
236
- "language_family": "Indo-European"
 
 
 
 
 
 
237
  },
238
  {
239
  "language_name": "Chinese",
@@ -242,74 +236,62 @@
242
  "scores": [
243
  {
244
  "model": "openai/gpt-4o-mini",
 
245
  "mt_bleu": 0.4887591978835982,
246
  "mt_chrf": 70.23584222732792,
247
  "cls_acc": 0.43333333333333335,
248
  "mlm_chrf": 91.54177259254774,
249
- "asr_wer": 0,
250
  "overall_score": 0.6837031605106967
251
  },
252
  {
253
  "model": "meta-llama/llama-3.3-70b-instruct",
 
254
  "mt_bleu": 0.38557580495281013,
255
  "mt_chrf": 61.11151378837755,
256
  "cls_acc": 0.5666666666666667,
257
  "mlm_chrf": 94.55849047452216,
258
- "asr_wer": 0,
259
  "overall_score": 0.7077889030985546
260
  },
261
  {
262
  "model": "mistralai/mistral-small-24b-instruct-2501",
 
263
  "mt_bleu": 0.48838341708028316,
264
  "mt_chrf": 71.48446350451079,
265
  "cls_acc": 0.7666666666666667,
266
  "mlm_chrf": 91.82889304045095,
267
- "asr_wer": 0,
268
  "overall_score": 0.799933410705428
269
  },
270
  {
271
  "model": "google/gemini-2.0-flash-001",
 
272
  "mt_bleu": 0.392664198976078,
273
  "mt_chrf": 58.52460212908651,
274
  "cls_acc": 0.8666666666666667,
275
  "mlm_chrf": 94.26439448464122,
276
- "asr_wer": 0,
277
  "overall_score": 0.7981855442679814
278
  },
279
  {
280
  "model": "microsoft/phi-4",
 
281
  "mt_bleu": 0.4892212172376435,
282
  "mt_chrf": 69.23023880157619,
283
  "cls_acc": 0.7,
284
  "mlm_chrf": 93.53779735205161,
285
- "asr_wer": 0,
286
  "overall_score": 0.7758934538454261
287
  },
288
  {
289
  "model": "elevenlabs/scribe_v1",
290
- "mt_bleu": 0,
291
- "mt_chrf": 0,
292
- "cls_acc": 0,
293
- "mlm_chrf": 0,
294
  "asr_wer": 1.0,
295
- "overall_score": 0.0
296
  },
297
  {
298
  "model": "openai/whisper-large-v3-turbo",
299
- "mt_bleu": 0,
300
- "mt_chrf": 0,
301
- "cls_acc": 0,
302
- "mlm_chrf": 0,
303
  "asr_wer": 1.0,
304
- "overall_score": 0.0
305
  }
306
  ],
307
- "mt_bleu": 0.3206576908757733,
308
- "mt_chrf": 47.226665778697,
309
- "cls_acc": 0.47619047619047616,
310
- "mlm_chrf": 66.53304970631623,
311
- "asr_wer": 0.2857142857142857,
312
- "overall_score": 0.5379292103468696,
313
  "commonvoice_hours": 422.0,
314
  "commonvoice_locale": "zh-TW",
315
  "population": {
@@ -334,7 +316,13 @@
334
  "US": 2295209,
335
  "VN": 1085934
336
  },
337
- "language_family": "Sino-Tibetan"
 
 
 
 
 
 
338
  },
339
  {
340
  "language_name": "Hindi",
@@ -343,29 +331,20 @@
343
  "scores": [
344
  {
345
  "model": "meta-llama/llama-3.3-70b-instruct",
 
346
  "mt_bleu": 0.32404902340686065,
347
  "mt_chrf": 53.54085104449268,
348
  "cls_acc": 0.5,
349
  "mlm_chrf": 96.17240172798218,
350
- "asr_wer": 0,
351
  "overall_score": 0.6657108425749162
352
  },
353
  {
354
  "model": "openai/whisper-large-v3-turbo",
355
- "mt_bleu": 0,
356
- "mt_chrf": 0,
357
- "cls_acc": 0,
358
- "mlm_chrf": 0,
359
  "asr_wer": 0.40237814140706235,
360
- "overall_score": 0.0
361
  }
362
  ],
363
- "mt_bleu": 0.16202451170343032,
364
- "mt_chrf": 26.77042552224634,
365
- "cls_acc": 0.25,
366
- "mlm_chrf": 48.08620086399109,
367
- "asr_wer": 0.20118907070353118,
368
- "overall_score": 0.3328554212874581,
369
  "commonvoice_hours": 16.0,
370
  "commonvoice_locale": "hi-IN",
371
  "population": {
@@ -376,7 +355,13 @@
376
  "UG": 2206,
377
  "ZA": 1129272
378
  },
379
- "language_family": "Indo-European"
 
 
 
 
 
 
380
  },
381
  {
382
  "language_name": "Spanish",
@@ -385,29 +370,20 @@
385
  "scores": [
386
  {
387
  "model": "meta-llama/llama-3.3-70b-instruct",
 
388
  "mt_bleu": 0.31587937116142056,
389
  "mt_chrf": 52.142851262301726,
390
  "cls_acc": 0.5,
391
  "mlm_chrf": 96.92768852306384,
392
- "asr_wer": 0,
393
  "overall_score": 0.6635684659512185
394
  },
395
  {
396
  "model": "openai/whisper-large-v3-turbo",
397
- "mt_bleu": 0,
398
- "mt_chrf": 0,
399
- "cls_acc": 0,
400
- "mlm_chrf": 0,
401
  "asr_wer": 0.16641846898275137,
402
- "overall_score": 0.0
403
  }
404
  ],
405
- "mt_bleu": 0.15793968558071028,
406
- "mt_chrf": 26.071425631150863,
407
- "cls_acc": 0.25,
408
- "mlm_chrf": 48.46384426153192,
409
- "asr_wer": 0.08320923449137568,
410
- "overall_score": 0.3317842329756093,
411
  "commonvoice_hours": 446.0,
412
  "commonvoice_locale": "es",
413
  "population": {
@@ -451,7 +427,13 @@
451
  "UY": 2981097,
452
  "VE": 23488572
453
  },
454
- "language_family": "Indo-European"
 
 
 
 
 
 
455
  },
456
  {
457
  "language_name": "Arabic",
@@ -460,29 +442,20 @@
460
  "scores": [
461
  {
462
  "model": "meta-llama/llama-3.3-70b-instruct",
 
463
  "mt_bleu": 0.39547934933771334,
464
  "mt_chrf": 57.51652731936118,
465
  "cls_acc": 0.4666666666666667,
466
  "mlm_chrf": 94.97026443937914,
467
- "asr_wer": 0,
468
  "overall_score": 0.6638448614180232
469
  },
470
  {
471
  "model": "openai/whisper-large-v3-turbo",
472
- "mt_bleu": 0,
473
- "mt_chrf": 0,
474
- "cls_acc": 0,
475
- "mlm_chrf": 0,
476
  "asr_wer": 0.17249214841393012,
477
- "overall_score": 0.0
478
  }
479
  ],
480
- "mt_bleu": 0.19773967466885667,
481
- "mt_chrf": 28.75826365968059,
482
- "cls_acc": 0.23333333333333334,
483
- "mlm_chrf": 47.48513221968957,
484
- "asr_wer": 0.08624607420696506,
485
- "overall_score": 0.3319224307090116,
486
  "commonvoice_hours": 91.0,
487
  "commonvoice_locale": "ar",
488
  "population": {
@@ -525,6 +498,12 @@
525
  "TR": 459298,
526
  "YE": 22114456
527
  },
528
- "language_family": "Afro-Asiatic"
 
 
 
 
 
 
529
  }
530
  ]
 
6
  "scores": [
7
  {
8
  "model": "openai/gpt-4o-mini",
9
+ "model_type": "text-to-text",
10
  "mt_bleu": 0.5245466124037277,
11
  "mt_chrf": 65.25187717981981,
12
  "cls_acc": 0.5666666666666667,
13
  "mlm_chrf": 97.84704595784264,
 
14
  "overall_score": 0.7325519660144305
15
  },
16
  {
17
  "model": "meta-llama/llama-3.3-70b-instruct",
18
+ "model_type": "text-to-text",
19
  "mt_bleu": 0.48750797044187216,
20
  "mt_chrf": 63.24229348441665,
21
  "cls_acc": 0.6,
22
  "mlm_chrf": 93.62602669879945,
 
23
  "overall_score": 0.7228944006107203
24
  },
25
  {
26
  "model": "mistralai/mistral-small-24b-instruct-2501",
27
+ "model_type": "text-to-text",
28
  "mt_bleu": 0.486501959595472,
29
  "mt_chrf": 63.8187259254881,
30
  "cls_acc": 0.5333333333333333,
31
  "mlm_chrf": 79.91140615317198,
 
32
  "overall_score": 0.656878218039978
33
  },
34
  {
35
  "model": "google/gemini-2.0-flash-001",
36
+ "model_type": "text-to-text",
37
  "mt_bleu": 0.6060954569411976,
38
  "mt_chrf": 71.2288943066563,
39
  "cls_acc": 0.8666666666666667,
40
  "mlm_chrf": 98.79868693366329,
 
41
  "overall_score": 0.8556474930232877
42
  },
43
  {
44
  "model": "microsoft/phi-4",
45
+ "model_type": "text-to-text",
46
  "mt_bleu": 0.5199836121545649,
47
  "mt_chrf": 66.05410510011644,
48
  "cls_acc": 0.7,
49
  "mlm_chrf": 97.2290729316734,
 
50
  "overall_score": 0.7776105934392995
51
  },
52
  {
53
  "model": "elevenlabs/scribe_v1",
54
+ "model_type": "speech-to-text",
55
+ "asr_wer": 0.34916319968417603,
56
+ "overall_score": 0.34916319968417603
 
 
 
57
  },
58
  {
59
  "model": "openai/whisper-large-v3-turbo",
60
+ "model_type": "speech-to-text",
 
 
 
61
  "asr_wer": 0.2670470041301468,
62
+ "overall_score": 0.2670470041301468
63
  }
64
  ],
 
 
 
 
 
 
65
  "commonvoice_hours": 2651.0,
66
  "commonvoice_locale": "en",
67
  "population": {
 
221
  "ZM": 2788256,
222
  "ZW": 6109446
223
  },
224
+ "language_family": "Indo-European",
225
+ "mt_bleu": 0.5249271223073668,
226
+ "mt_chrf": 65.91917919929946,
227
+ "cls_acc": 0.6533333333333333,
228
+ "mlm_chrf": 93.48244773503015,
229
+ "asr_wer": 0.30810510190716145,
230
+ "overall_score": 0.6231132678488628
231
  },
232
  {
233
  "language_name": "Chinese",
 
236
  "scores": [
237
  {
238
  "model": "openai/gpt-4o-mini",
239
+ "model_type": "text-to-text",
240
  "mt_bleu": 0.4887591978835982,
241
  "mt_chrf": 70.23584222732792,
242
  "cls_acc": 0.43333333333333335,
243
  "mlm_chrf": 91.54177259254774,
 
244
  "overall_score": 0.6837031605106967
245
  },
246
  {
247
  "model": "meta-llama/llama-3.3-70b-instruct",
248
+ "model_type": "text-to-text",
249
  "mt_bleu": 0.38557580495281013,
250
  "mt_chrf": 61.11151378837755,
251
  "cls_acc": 0.5666666666666667,
252
  "mlm_chrf": 94.55849047452216,
 
253
  "overall_score": 0.7077889030985546
254
  },
255
  {
256
  "model": "mistralai/mistral-small-24b-instruct-2501",
257
+ "model_type": "text-to-text",
258
  "mt_bleu": 0.48838341708028316,
259
  "mt_chrf": 71.48446350451079,
260
  "cls_acc": 0.7666666666666667,
261
  "mlm_chrf": 91.82889304045095,
 
262
  "overall_score": 0.799933410705428
263
  },
264
  {
265
  "model": "google/gemini-2.0-flash-001",
266
+ "model_type": "text-to-text",
267
  "mt_bleu": 0.392664198976078,
268
  "mt_chrf": 58.52460212908651,
269
  "cls_acc": 0.8666666666666667,
270
  "mlm_chrf": 94.26439448464122,
 
271
  "overall_score": 0.7981855442679814
272
  },
273
  {
274
  "model": "microsoft/phi-4",
275
+ "model_type": "text-to-text",
276
  "mt_bleu": 0.4892212172376435,
277
  "mt_chrf": 69.23023880157619,
278
  "cls_acc": 0.7,
279
  "mlm_chrf": 93.53779735205161,
 
280
  "overall_score": 0.7758934538454261
281
  },
282
  {
283
  "model": "elevenlabs/scribe_v1",
284
+ "model_type": "speech-to-text",
 
 
 
285
  "asr_wer": 1.0,
286
+ "overall_score": 1.0
287
  },
288
  {
289
  "model": "openai/whisper-large-v3-turbo",
290
+ "model_type": "speech-to-text",
 
 
 
291
  "asr_wer": 1.0,
292
+ "overall_score": 1.0
293
  }
294
  ],
 
 
 
 
 
 
295
  "commonvoice_hours": 422.0,
296
  "commonvoice_locale": "zh-TW",
297
  "population": {
 
316
  "US": 2295209,
317
  "VN": 1085934
318
  },
319
+ "language_family": "Sino-Tibetan",
320
+ "mt_bleu": 0.4489207672260826,
321
+ "mt_chrf": 66.1173320901758,
322
+ "cls_acc": 0.6666666666666666,
323
+ "mlm_chrf": 93.14626958884273,
324
+ "asr_wer": 1.0,
325
+ "overall_score": 0.8236434960611553
326
  },
327
  {
328
  "language_name": "Hindi",
 
331
  "scores": [
332
  {
333
  "model": "meta-llama/llama-3.3-70b-instruct",
334
+ "model_type": "text-to-text",
335
  "mt_bleu": 0.32404902340686065,
336
  "mt_chrf": 53.54085104449268,
337
  "cls_acc": 0.5,
338
  "mlm_chrf": 96.17240172798218,
 
339
  "overall_score": 0.6657108425749162
340
  },
341
  {
342
  "model": "openai/whisper-large-v3-turbo",
343
+ "model_type": "speech-to-text",
 
 
 
344
  "asr_wer": 0.40237814140706235,
345
+ "overall_score": 0.40237814140706235
346
  }
347
  ],
 
 
 
 
 
 
348
  "commonvoice_hours": 16.0,
349
  "commonvoice_locale": "hi-IN",
350
  "population": {
 
355
  "UG": 2206,
356
  "ZA": 1129272
357
  },
358
+ "language_family": "Indo-European",
359
+ "mt_bleu": 0.32404902340686065,
360
+ "mt_chrf": 53.54085104449268,
361
+ "cls_acc": 0.5,
362
+ "mlm_chrf": 96.17240172798218,
363
+ "asr_wer": 0.40237814140706235,
364
+ "overall_score": 0.5340444919909892
365
  },
366
  {
367
  "language_name": "Spanish",
 
370
  "scores": [
371
  {
372
  "model": "meta-llama/llama-3.3-70b-instruct",
373
+ "model_type": "text-to-text",
374
  "mt_bleu": 0.31587937116142056,
375
  "mt_chrf": 52.142851262301726,
376
  "cls_acc": 0.5,
377
  "mlm_chrf": 96.92768852306384,
 
378
  "overall_score": 0.6635684659512185
379
  },
380
  {
381
  "model": "openai/whisper-large-v3-turbo",
382
+ "model_type": "speech-to-text",
 
 
 
383
  "asr_wer": 0.16641846898275137,
384
+ "overall_score": 0.16641846898275137
385
  }
386
  ],
 
 
 
 
 
 
387
  "commonvoice_hours": 446.0,
388
  "commonvoice_locale": "es",
389
  "population": {
 
427
  "UY": 2981097,
428
  "VE": 23488572
429
  },
430
+ "language_family": "Indo-European",
431
+ "mt_bleu": 0.31587937116142056,
432
+ "mt_chrf": 52.142851262301726,
433
+ "cls_acc": 0.5,
434
+ "mlm_chrf": 96.92768852306384,
435
+ "asr_wer": 0.16641846898275137,
436
+ "overall_score": 0.41499346746698496
437
  },
438
  {
439
  "language_name": "Arabic",
 
442
  "scores": [
443
  {
444
  "model": "meta-llama/llama-3.3-70b-instruct",
445
+ "model_type": "text-to-text",
446
  "mt_bleu": 0.39547934933771334,
447
  "mt_chrf": 57.51652731936118,
448
  "cls_acc": 0.4666666666666667,
449
  "mlm_chrf": 94.97026443937914,
 
450
  "overall_score": 0.6638448614180232
451
  },
452
  {
453
  "model": "openai/whisper-large-v3-turbo",
454
+ "model_type": "speech-to-text",
 
 
 
455
  "asr_wer": 0.17249214841393012,
456
+ "overall_score": 0.17249214841393012
457
  }
458
  ],
 
 
 
 
 
 
459
  "commonvoice_hours": 91.0,
460
  "commonvoice_locale": "ar",
461
  "population": {
 
498
  "TR": 459298,
499
  "YE": 22114456
500
  },
501
+ "language_family": "Afro-Asiatic",
502
+ "mt_bleu": 0.39547934933771334,
503
+ "mt_chrf": 57.51652731936118,
504
+ "cls_acc": 0.4666666666666667,
505
+ "mlm_chrf": 94.97026443937914,
506
+ "asr_wer": 0.17249214841393012,
507
+ "overall_score": 0.4181685049159767
508
  }
509
  ]