David Pomerenke commited on
Commit
1b634f3
·
1 Parent(s): 1167b2d

For classification use number + few-shot

Browse files

(rather than label, where the label is in English)

Files changed (2) hide show
  1. evals.py +12 -12
  2. results.json +342 -79
evals.py CHANGED
@@ -27,7 +27,7 @@ models = [
27
  "mistralai/mistral-small-24b-instruct-2501", # 0.14$/M tokens
28
  "google/gemini-2.0-flash-001", # 0.4$/M tokens
29
  # "qwen/qwen-turbo", # 0.2$/M tokens; recognizes "inappropriate content"
30
- "deepseek/deepseek-chat", # 0.9$/M tokens
31
  "microsoft/phi-4", # 0.07$/M tokens
32
  ]
33
  fast_model = "meta-llama/llama-3.3-70b-instruct"
@@ -139,7 +139,7 @@ languages = pd.merge(
139
  ) # "left" because keep it simple for now
140
  languages["in_benchmark"] = languages["bcp_47"].isin(benchmark_languages["bcp_47"])
141
 
142
- languages = languages.sort_values(by="speakers", ascending=False).iloc[:10]
143
 
144
  # sample languages to translate to
145
  target_languages = languages[languages["in_benchmark"]].sample(
@@ -238,16 +238,13 @@ async def classify_and_evaluate(model, language_bcp_47, nr):
238
  frac=1, random_state=42
239
  )
240
  test_paragraph = test_paragraphs.iloc[nr]
241
- messages = [
242
- {
243
- "role": "system",
244
- "content": f"Categories: {'; '.join(examples['topic'].drop_duplicates())}.",
245
- }
246
- ]
247
  for example in examples.itertuples():
248
  messages += [
249
  {"role": "user", "content": example.text},
250
- {"role": "assistant", "content": example.topic},
251
  ]
252
  reply = await complete(
253
  model=model,
@@ -259,13 +256,16 @@ async def classify_and_evaluate(model, language_bcp_47, nr):
259
  },
260
  ],
261
  temperature=0,
262
- max_tokens=1024,
263
  )
264
- prediction = reply.choices[0].message.content.strip()
 
 
 
265
  return {
266
  "model": model,
267
  "bcp_47": language["bcp_47"],
268
- "true": test_paragraph.topic,
269
  "pred": prediction,
270
  "sentence_nr": nr,
271
  }
 
27
  "mistralai/mistral-small-24b-instruct-2501", # 0.14$/M tokens
28
  "google/gemini-2.0-flash-001", # 0.4$/M tokens
29
  # "qwen/qwen-turbo", # 0.2$/M tokens; recognizes "inappropriate content"
30
+ # "deepseek/deepseek-chat", # 0.9$/M tokens
31
  "microsoft/phi-4", # 0.07$/M tokens
32
  ]
33
  fast_model = "meta-llama/llama-3.3-70b-instruct"
 
139
  ) # "left" because keep it simple for now
140
  languages["in_benchmark"] = languages["bcp_47"].isin(benchmark_languages["bcp_47"])
141
 
142
+ languages = languages.sort_values(by="speakers", ascending=False).iloc[:20]
143
 
144
  # sample languages to translate to
145
  target_languages = languages[languages["in_benchmark"]].sample(
 
238
  frac=1, random_state=42
239
  )
240
  test_paragraph = test_paragraphs.iloc[nr]
241
+ def topic_to_number(topic):
242
+ return top_topics.get_loc(topic)
243
+ messages = []
 
 
 
244
  for example in examples.itertuples():
245
  messages += [
246
  {"role": "user", "content": example.text},
247
+ {"role": "assistant", "content": str(topic_to_number(example.topic))},
248
  ]
249
  reply = await complete(
250
  model=model,
 
256
  },
257
  ],
258
  temperature=0,
259
+ max_tokens=5,
260
  )
261
+ try:
262
+ prediction = int(reply.choices[0].message.content.strip())
263
+ except ValueError:
264
+ prediction = -1
265
  return {
266
  "model": model,
267
  "bcp_47": language["bcp_47"],
268
+ "true": topic_to_number(test_paragraph.topic),
269
  "pred": prediction,
270
  "sentence_nr": nr,
271
  }
results.json CHANGED
@@ -4,18 +4,46 @@
4
  "bcp_47": "en",
5
  "speakers": 1636485840,
6
  "scores": [
 
 
 
 
 
 
 
7
  {
8
  "model": "meta-llama/llama-3.3-70b-instruct",
9
  "bleu": 0.4351349353198866,
10
  "chrf": 54.9504915580248,
11
- "accuracy": 1.0,
12
- "overall_score": 0.7175674676599433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  }
14
  ],
15
- "bleu": 0.4351349353198866,
16
- "chrf": 54.9504915580248,
17
- "accuracy": 1.0,
18
- "overall_score": 0.7175674676599433,
19
  "commonvoice_hours": 2651.0,
20
  "commonvoice_locale": "en",
21
  "population": {
@@ -185,14 +213,14 @@
185
  "model": "meta-llama/llama-3.3-70b-instruct",
186
  "bleu": 0.3977775857451761,
187
  "chrf": 57.672913792439125,
188
- "accuracy": 1.0,
189
- "overall_score": 0.698888792872588
190
  }
191
  ],
192
  "bleu": 0.3977775857451761,
193
  "chrf": 57.672913792439125,
194
- "accuracy": 1.0,
195
- "overall_score": 0.698888792872588,
196
  "commonvoice_hours": 422.0,
197
  "commonvoice_locale": "zh-TW",
198
  "population": {
@@ -227,14 +255,14 @@
227
  "model": "meta-llama/llama-3.3-70b-instruct",
228
  "bleu": 0.333521621016373,
229
  "chrf": 50.48364584189306,
230
- "accuracy": 0.9333333333333333,
231
- "overall_score": 0.6334274771748531
232
  }
233
  ],
234
  "bleu": 0.333521621016373,
235
  "chrf": 50.48364584189306,
236
- "accuracy": 0.9333333333333333,
237
- "overall_score": 0.6334274771748531,
238
  "commonvoice_hours": 16.0,
239
  "commonvoice_locale": "hi-IN",
240
  "population": {
@@ -255,14 +283,14 @@
255
  "model": "meta-llama/llama-3.3-70b-instruct",
256
  "bleu": 0.29160032861883095,
257
  "chrf": 47.668399832701844,
258
- "accuracy": 0.9666666666666667,
259
- "overall_score": 0.6291334976427488
260
  }
261
  ],
262
  "bleu": 0.29160032861883095,
263
  "chrf": 47.668399832701844,
264
- "accuracy": 0.9666666666666667,
265
- "overall_score": 0.6291334976427488,
266
  "commonvoice_hours": 446.0,
267
  "commonvoice_locale": "es",
268
  "population": {
@@ -316,14 +344,14 @@
316
  "model": "meta-llama/llama-3.3-70b-instruct",
317
  "bleu": 0.277257629790728,
318
  "chrf": 46.62779335380641,
319
- "accuracy": 0.9333333333333333,
320
- "overall_score": 0.6052954815620306
321
  }
322
  ],
323
  "bleu": 0.277257629790728,
324
  "chrf": 46.62779335380641,
325
- "accuracy": 0.9333333333333333,
326
- "overall_score": 0.6052954815620306,
327
  "commonvoice_hours": 91.0,
328
  "commonvoice_locale": "ar",
329
  "population": {
@@ -376,14 +404,14 @@
376
  "model": "meta-llama/llama-3.3-70b-instruct",
377
  "bleu": 0.2659144372728079,
378
  "chrf": 44.14831240898717,
379
- "accuracy": 0.8333333333333334,
380
- "overall_score": 0.5496238853030706
381
  }
382
  ],
383
  "bleu": 0.2659144372728079,
384
  "chrf": 44.14831240898717,
385
- "accuracy": 0.8333333333333334,
386
- "overall_score": 0.5496238853030706,
387
  "commonvoice_hours": 77.0,
388
  "commonvoice_locale": "ur",
389
  "population": {
@@ -403,14 +431,14 @@
403
  "model": "meta-llama/llama-3.3-70b-instruct",
404
  "bleu": 0.315663773358301,
405
  "chrf": 49.253978669350964,
406
- "accuracy": 0.9666666666666667,
407
- "overall_score": 0.6411652200124838
408
  }
409
  ],
410
  "bleu": 0.315663773358301,
411
  "chrf": 49.253978669350964,
412
- "accuracy": 0.9666666666666667,
413
- "overall_score": 0.6411652200124838,
414
  "commonvoice_hours": 1052.0,
415
  "commonvoice_locale": "fr",
416
  "population": {
@@ -487,14 +515,14 @@
487
  "model": "meta-llama/llama-3.3-70b-instruct",
488
  "bleu": 0.21265887286151353,
489
  "chrf": 41.501657722373686,
490
- "accuracy": 0.9333333333333333,
491
- "overall_score": 0.5729961030974234
492
  }
493
  ],
494
  "bleu": 0.21265887286151353,
495
  "chrf": 41.501657722373686,
496
- "accuracy": 0.9333333333333333,
497
- "overall_score": 0.5729961030974234,
498
  "commonvoice_hours": 49.0,
499
  "commonvoice_locale": "bn",
500
  "population": {
@@ -510,53 +538,18 @@
510
  "bcp_47": "pt",
511
  "speakers": 237496885,
512
  "scores": [
513
- {
514
- "model": "openai/gpt-4o-mini",
515
- "bleu": 0.37370265193281843,
516
- "chrf": 57.010201314973216,
517
- "accuracy": 0.9666666666666667,
518
- "overall_score": 0.6701846592997426
519
- },
520
  {
521
  "model": "meta-llama/llama-3.3-70b-instruct",
522
  "bleu": 0.27514792195783394,
523
  "chrf": 45.901248962808694,
524
- "accuracy": 0.9666666666666667,
525
- "overall_score": 0.6209072943122503
526
- },
527
- {
528
- "model": "mistralai/mistral-small-24b-instruct-2501",
529
- "bleu": 0.3691905380990064,
530
- "chrf": 54.842418095352954,
531
- "accuracy": 0.9666666666666667,
532
- "overall_score": 0.6679286023828366
533
- },
534
- {
535
- "model": "google/gemini-2.0-flash-001",
536
- "bleu": 0.4020145367576223,
537
- "chrf": 60.73156386707501,
538
- "accuracy": 0.9,
539
- "overall_score": 0.6510072683788112
540
- },
541
- {
542
- "model": "deepseek/deepseek-chat",
543
- "bleu": 0.39831859400698993,
544
- "chrf": 59.99225659809846,
545
- "accuracy": 0.9666666666666667,
546
- "overall_score": 0.6824926303368283
547
- },
548
- {
549
- "model": "microsoft/phi-4",
550
- "bleu": 0.35576182901107084,
551
- "chrf": 56.05856754270042,
552
- "accuracy": 0.9,
553
- "overall_score": 0.6278809145055354
554
  }
555
  ],
556
- "bleu": 0.36235601196089035,
557
- "chrf": 55.756042730168126,
558
- "accuracy": 0.9444444444444445,
559
- "overall_score": 0.6534002282026674,
560
  "commonvoice_hours": 177.0,
561
  "commonvoice_locale": "pt",
562
  "population": {
@@ -587,14 +580,14 @@
587
  "model": "meta-llama/llama-3.3-70b-instruct",
588
  "bleu": 0.3048037308116852,
589
  "chrf": 48.4304965568793,
590
- "accuracy": 0.9666666666666667,
591
- "overall_score": 0.6357351987391759
592
  }
593
  ],
594
  "bleu": 0.3048037308116852,
595
  "chrf": 48.4304965568793,
596
- "accuracy": 0.9666666666666667,
597
- "overall_score": 0.6357351987391759,
598
  "commonvoice_hours": 2.3,
599
  "commonvoice_locale": "pa-IN",
600
  "population": {
@@ -605,5 +598,275 @@
605
  "PK": 163450700,
606
  "SG": 9314
607
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
608
  }
609
  ]
 
4
  "bcp_47": "en",
5
  "speakers": 1636485840,
6
  "scores": [
7
+ {
8
+ "model": "openai/gpt-4o-mini",
9
+ "bleu": 0.89404322120213,
10
+ "chrf": 92.53933977489264,
11
+ "accuracy": 0.5666666666666667,
12
+ "overall_score": 0.7303549439343984
13
+ },
14
  {
15
  "model": "meta-llama/llama-3.3-70b-instruct",
16
  "bleu": 0.4351349353198866,
17
  "chrf": 54.9504915580248,
18
+ "accuracy": 0.6,
19
+ "overall_score": 0.5175674676599433
20
+ },
21
+ {
22
+ "model": "mistralai/mistral-small-24b-instruct-2501",
23
+ "bleu": 0.8800468872938262,
24
+ "chrf": 94.30164664106223,
25
+ "accuracy": 0.5333333333333333,
26
+ "overall_score": 0.7066901103135798
27
+ },
28
+ {
29
+ "model": "google/gemini-2.0-flash-001",
30
+ "bleu": 0.8489646963773831,
31
+ "chrf": 92.73129066280984,
32
+ "accuracy": 0.8666666666666667,
33
+ "overall_score": 0.8578156815220249
34
+ },
35
+ {
36
+ "model": "microsoft/phi-4",
37
+ "bleu": 0.8230104823079876,
38
+ "chrf": 91.69043412576788,
39
+ "accuracy": 0.7,
40
+ "overall_score": 0.7615052411539938
41
  }
42
  ],
43
+ "bleu": 0.7762400445002428,
44
+ "chrf": 85.24264055251147,
45
+ "accuracy": 0.6533333333333333,
46
+ "overall_score": 0.714786688916788,
47
  "commonvoice_hours": 2651.0,
48
  "commonvoice_locale": "en",
49
  "population": {
 
213
  "model": "meta-llama/llama-3.3-70b-instruct",
214
  "bleu": 0.3977775857451761,
215
  "chrf": 57.672913792439125,
216
+ "accuracy": 0.5666666666666667,
217
+ "overall_score": 0.48222212620592136
218
  }
219
  ],
220
  "bleu": 0.3977775857451761,
221
  "chrf": 57.672913792439125,
222
+ "accuracy": 0.5666666666666667,
223
+ "overall_score": 0.48222212620592136,
224
  "commonvoice_hours": 422.0,
225
  "commonvoice_locale": "zh-TW",
226
  "population": {
 
255
  "model": "meta-llama/llama-3.3-70b-instruct",
256
  "bleu": 0.333521621016373,
257
  "chrf": 50.48364584189306,
258
+ "accuracy": 0.5,
259
+ "overall_score": 0.4167608105081865
260
  }
261
  ],
262
  "bleu": 0.333521621016373,
263
  "chrf": 50.48364584189306,
264
+ "accuracy": 0.5,
265
+ "overall_score": 0.4167608105081865,
266
  "commonvoice_hours": 16.0,
267
  "commonvoice_locale": "hi-IN",
268
  "population": {
 
283
  "model": "meta-llama/llama-3.3-70b-instruct",
284
  "bleu": 0.29160032861883095,
285
  "chrf": 47.668399832701844,
286
+ "accuracy": 0.5,
287
+ "overall_score": 0.39580016430941545
288
  }
289
  ],
290
  "bleu": 0.29160032861883095,
291
  "chrf": 47.668399832701844,
292
+ "accuracy": 0.5,
293
+ "overall_score": 0.39580016430941545,
294
  "commonvoice_hours": 446.0,
295
  "commonvoice_locale": "es",
296
  "population": {
 
344
  "model": "meta-llama/llama-3.3-70b-instruct",
345
  "bleu": 0.277257629790728,
346
  "chrf": 46.62779335380641,
347
+ "accuracy": 0.4666666666666667,
348
+ "overall_score": 0.37196214822869733
349
  }
350
  ],
351
  "bleu": 0.277257629790728,
352
  "chrf": 46.62779335380641,
353
+ "accuracy": 0.4666666666666667,
354
+ "overall_score": 0.37196214822869733,
355
  "commonvoice_hours": 91.0,
356
  "commonvoice_locale": "ar",
357
  "population": {
 
404
  "model": "meta-llama/llama-3.3-70b-instruct",
405
  "bleu": 0.2659144372728079,
406
  "chrf": 44.14831240898717,
407
+ "accuracy": 0.43333333333333335,
408
+ "overall_score": 0.34962388530307065
409
  }
410
  ],
411
  "bleu": 0.2659144372728079,
412
  "chrf": 44.14831240898717,
413
+ "accuracy": 0.43333333333333335,
414
+ "overall_score": 0.34962388530307065,
415
  "commonvoice_hours": 77.0,
416
  "commonvoice_locale": "ur",
417
  "population": {
 
431
  "model": "meta-llama/llama-3.3-70b-instruct",
432
  "bleu": 0.315663773358301,
433
  "chrf": 49.253978669350964,
434
+ "accuracy": 0.5666666666666667,
435
+ "overall_score": 0.4411652200124838
436
  }
437
  ],
438
  "bleu": 0.315663773358301,
439
  "chrf": 49.253978669350964,
440
+ "accuracy": 0.5666666666666667,
441
+ "overall_score": 0.4411652200124838,
442
  "commonvoice_hours": 1052.0,
443
  "commonvoice_locale": "fr",
444
  "population": {
 
515
  "model": "meta-llama/llama-3.3-70b-instruct",
516
  "bleu": 0.21265887286151353,
517
  "chrf": 41.501657722373686,
518
+ "accuracy": 0.4,
519
+ "overall_score": 0.3063294364307568
520
  }
521
  ],
522
  "bleu": 0.21265887286151353,
523
  "chrf": 41.501657722373686,
524
+ "accuracy": 0.4,
525
+ "overall_score": 0.3063294364307568,
526
  "commonvoice_hours": 49.0,
527
  "commonvoice_locale": "bn",
528
  "population": {
 
538
  "bcp_47": "pt",
539
  "speakers": 237496885,
540
  "scores": [
 
 
 
 
 
 
 
541
  {
542
  "model": "meta-llama/llama-3.3-70b-instruct",
543
  "bleu": 0.27514792195783394,
544
  "chrf": 45.901248962808694,
545
+ "accuracy": 0.5666666666666667,
546
+ "overall_score": 0.42090729431225027
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
  }
548
  ],
549
+ "bleu": 0.27514792195783394,
550
+ "chrf": 45.901248962808694,
551
+ "accuracy": 0.5666666666666667,
552
+ "overall_score": 0.42090729431225027,
553
  "commonvoice_hours": 177.0,
554
  "commonvoice_locale": "pt",
555
  "population": {
 
580
  "model": "meta-llama/llama-3.3-70b-instruct",
581
  "bleu": 0.3048037308116852,
582
  "chrf": 48.4304965568793,
583
+ "accuracy": 0.5333333333333333,
584
+ "overall_score": 0.41906853207250927
585
  }
586
  ],
587
  "bleu": 0.3048037308116852,
588
  "chrf": 48.4304965568793,
589
+ "accuracy": 0.5333333333333333,
590
+ "overall_score": 0.41906853207250927,
591
  "commonvoice_hours": 2.3,
592
  "commonvoice_locale": "pa-IN",
593
  "population": {
 
598
  "PK": 163450700,
599
  "SG": 9314
600
  }
601
+ },
602
+ {
603
+ "language_name": "Russian",
604
+ "bcp_47": "ru",
605
+ "speakers": 195841151,
606
+ "scores": [
607
+ {
608
+ "model": "meta-llama/llama-3.3-70b-instruct",
609
+ "bleu": 0.26108507692625094,
610
+ "chrf": 45.063308940468154,
611
+ "accuracy": 0.5666666666666667,
612
+ "overall_score": 0.4138758717964588
613
+ }
614
+ ],
615
+ "bleu": 0.26108507692625094,
616
+ "chrf": 45.063308940468154,
617
+ "accuracy": 0.5666666666666667,
618
+ "overall_score": 0.4138758717964588,
619
+ "commonvoice_hours": 242.0,
620
+ "commonvoice_locale": "ru",
621
+ "population": {
622
+ "BG": 1602387,
623
+ "BY": 1137350,
624
+ "CA": 211087,
625
+ "CN": 13940,
626
+ "DE": 4809582,
627
+ "EE": 688027,
628
+ "FI": 45131,
629
+ "GE": 359730,
630
+ "IL": 954303,
631
+ "KG": 2147364,
632
+ "KZ": 13746168,
633
+ "LT": 2185168,
634
+ "LV": 714867,
635
+ "MD": 100935,
636
+ "MN": 4118,
637
+ "PL": 6890814,
638
+ "RU": 133218680,
639
+ "SJ": 1200,
640
+ "TJ": 1064840,
641
+ "TM": 663436,
642
+ "UA": 20204534,
643
+ "US": 798334,
644
+ "UZ": 4279156
645
+ }
646
+ },
647
+ {
648
+ "language_name": "Swahili",
649
+ "bcp_47": "sw",
650
+ "speakers": 171610296,
651
+ "scores": [
652
+ {
653
+ "model": "meta-llama/llama-3.3-70b-instruct",
654
+ "bleu": 0.2709203338132304,
655
+ "chrf": 44.36399636969686,
656
+ "accuracy": 0.5,
657
+ "overall_score": 0.3854601669066152
658
+ }
659
+ ],
660
+ "bleu": 0.2709203338132304,
661
+ "chrf": 44.36399636969686,
662
+ "accuracy": 0.5,
663
+ "overall_score": 0.3854601669066152,
664
+ "commonvoice_hours": 411.0,
665
+ "commonvoice_locale": "sw",
666
+ "population": {
667
+ "BI": 6408,
668
+ "CD": 50890000,
669
+ "KE": 35328414,
670
+ "MZ": 9330,
671
+ "SO": 235142,
672
+ "TZ": 52697520,
673
+ "UG": 32439750,
674
+ "YT": 2716,
675
+ "ZA": 1016
676
+ }
677
+ },
678
+ {
679
+ "language_name": "Indonesian",
680
+ "bcp_47": "id",
681
+ "speakers": 171207687,
682
+ "scores": [
683
+ {
684
+ "model": "meta-llama/llama-3.3-70b-instruct",
685
+ "bleu": 0.27441353638286026,
686
+ "chrf": 46.025445629112156,
687
+ "accuracy": 0.6,
688
+ "overall_score": 0.4372067681914301
689
+ }
690
+ ],
691
+ "bleu": 0.27441353638286026,
692
+ "chrf": 46.025445629112156,
693
+ "accuracy": 0.6,
694
+ "overall_score": 0.4372067681914301,
695
+ "commonvoice_hours": 33.0,
696
+ "commonvoice_locale": "id",
697
+ "population": {
698
+ "ID": 170896640,
699
+ "NL": 311047
700
+ }
701
+ },
702
+ {
703
+ "language_name": "German",
704
+ "bcp_47": "de",
705
+ "speakers": 136350226,
706
+ "scores": [
707
+ {
708
+ "model": "meta-llama/llama-3.3-70b-instruct",
709
+ "bleu": 0.3338682761061998,
710
+ "chrf": 50.216731068308064,
711
+ "accuracy": 0.5666666666666667,
712
+ "overall_score": 0.4502674713864332
713
+ }
714
+ ],
715
+ "bleu": 0.3338682761061998,
716
+ "chrf": 50.216731068308064,
717
+ "accuracy": 0.5666666666666667,
718
+ "overall_score": 0.4502674713864332,
719
+ "commonvoice_hours": 1358.0,
720
+ "commonvoice_locale": "de",
721
+ "population": {
722
+ "AT": 8593666,
723
+ "BE": 2578554,
724
+ "BG": 557352,
725
+ "BR": 1778414,
726
+ "CA": 294014,
727
+ "CH": 6134913,
728
+ "CZ": 1605375,
729
+ "DE": 72945327,
730
+ "DK": 2758623,
731
+ "FI": 1002901,
732
+ "FR": 3392410,
733
+ "GB": 5918499,
734
+ "GR": 530355,
735
+ "HU": 1758929,
736
+ "IT": 998443,
737
+ "KZ": 1221882,
738
+ "LI": 39137,
739
+ "LT": 382404,
740
+ "LU": 395880,
741
+ "NA": 23671,
742
+ "NL": 12269084,
743
+ "PL": 7273637,
744
+ "PY": 208559,
745
+ "RO": 44736,
746
+ "SI": 883126,
747
+ "SK": 1196932,
748
+ "US": 1563403
749
+ }
750
+ },
751
+ {
752
+ "language_name": "Japanese",
753
+ "bcp_47": "ja",
754
+ "speakers": 119729026,
755
+ "scores": [
756
+ {
757
+ "model": "meta-llama/llama-3.3-70b-instruct",
758
+ "bleu": 0.2940100667664714,
759
+ "chrf": 46.403097021492236,
760
+ "accuracy": 0.6,
761
+ "overall_score": 0.4470050333832357
762
+ }
763
+ ],
764
+ "bleu": 0.2940100667664714,
765
+ "chrf": 46.403097021492236,
766
+ "accuracy": 0.6,
767
+ "overall_score": 0.4470050333832357,
768
+ "commonvoice_hours": 222.0,
769
+ "commonvoice_locale": "ja",
770
+ "population": {
771
+ "BR": 444604,
772
+ "CA": 52772,
773
+ "JP": 119231650
774
+ }
775
+ },
776
+ {
777
+ "language_name": "Telugu",
778
+ "bcp_47": "te",
779
+ "speakers": 95478480,
780
+ "scores": [
781
+ {
782
+ "model": "meta-llama/llama-3.3-70b-instruct",
783
+ "bleu": 0.2750887189010237,
784
+ "chrf": 46.31463752811596,
785
+ "accuracy": 0.4,
786
+ "overall_score": 0.33754435945051187
787
+ }
788
+ ],
789
+ "bleu": 0.2750887189010237,
790
+ "chrf": 46.31463752811596,
791
+ "accuracy": 0.4,
792
+ "overall_score": 0.33754435945051187,
793
+ "commonvoice_hours": 0.3,
794
+ "commonvoice_locale": "te",
795
+ "population": {
796
+ "IN": 95478480
797
+ }
798
+ },
799
+ {
800
+ "language_name": "Marathi",
801
+ "bcp_47": "mr",
802
+ "speakers": 92826300,
803
+ "scores": [
804
+ {
805
+ "model": "meta-llama/llama-3.3-70b-instruct",
806
+ "bleu": 0.2584800238292114,
807
+ "chrf": 44.69889855306244,
808
+ "accuracy": 0.5666666666666667,
809
+ "overall_score": 0.41257334524793904
810
+ }
811
+ ],
812
+ "bleu": 0.2584800238292114,
813
+ "chrf": 44.69889855306244,
814
+ "accuracy": 0.5666666666666667,
815
+ "overall_score": 0.41257334524793904,
816
+ "commonvoice_hours": 20.0,
817
+ "commonvoice_locale": "mr",
818
+ "population": {
819
+ "IN": 92826300
820
+ }
821
+ },
822
+ {
823
+ "language_name": "Javanese",
824
+ "bcp_47": "jv",
825
+ "speakers": 91180665,
826
+ "scores": [
827
+ {
828
+ "model": "meta-llama/llama-3.3-70b-instruct",
829
+ "bleu": 0.23082586428104943,
830
+ "chrf": 41.42591471734489,
831
+ "accuracy": 0.4666666666666667,
832
+ "overall_score": 0.34874626547385806
833
+ }
834
+ ],
835
+ "bleu": 0.23082586428104943,
836
+ "chrf": 41.42591471734489,
837
+ "accuracy": 0.4666666666666667,
838
+ "overall_score": 0.34874626547385806,
839
+ "commonvoice_hours": 0.0,
840
+ "commonvoice_locale": "jv",
841
+ "population": {
842
+ "ID": 90788840,
843
+ "MY": 391825
844
+ }
845
+ },
846
+ {
847
+ "language_name": "Vietnamese",
848
+ "bcp_47": "vi",
849
+ "speakers": 86222962,
850
+ "scores": [
851
+ {
852
+ "model": "meta-llama/llama-3.3-70b-instruct",
853
+ "bleu": 0.252552287345529,
854
+ "chrf": 43.351007120897606,
855
+ "accuracy": 0.5333333333333333,
856
+ "overall_score": 0.3929428103394311
857
+ }
858
+ ],
859
+ "bleu": 0.252552287345529,
860
+ "chrf": 43.351007120897606,
861
+ "accuracy": 0.5333333333333333,
862
+ "overall_score": 0.3929428103394311,
863
+ "commonvoice_hours": 5.9,
864
+ "commonvoice_locale": "vi",
865
+ "population": {
866
+ "CA": 184701,
867
+ "CN": 6970,
868
+ "US": 1130973,
869
+ "VN": 84900318
870
+ }
871
  }
872
  ]