David Pomerenke commited on
Commit
07dcc45
·
1 Parent(s): 3a698ab

Don't translate a langauge to itself

Browse files

This makes the language comparison fairer

Files changed (2) hide show
  1. languagebench.py +8 -3
  2. results.json +56 -424
languagebench.py CHANGED
@@ -96,12 +96,13 @@ languages["in_benchmark"] = languages["in_benchmark"].fillna(False)
96
  languages = languages.sort_values(by="speakers", ascending=False)
97
 
98
  # sample languages to translate from
 
99
  original_languages = languages[languages["in_benchmark"]].sample(
100
- n=n_sentences, weights="speakers", replace=True, random_state=42
101
  )
102
  # sample languages to analyze with all models
103
  detailed_target_languages = languages[languages["in_benchmark"]].sample(
104
- n=25, random_state=42
105
  )
106
 
107
 
@@ -176,9 +177,13 @@ async def main():
176
  not in detailed_target_languages.language_code.values
177
  ):
178
  continue
 
 
 
 
179
  original_sentences = [
180
  load_sentences(lang)[i]
181
- for i, lang in enumerate(original_languages.itertuples())
182
  ]
183
  print(model)
184
  predictions = [
 
96
  languages = languages.sort_values(by="speakers", ascending=False)
97
 
98
  # sample languages to translate from
99
+ # when translating e.g. to Mandarin, we drop Mandarin from the sample and use the next samples from the list instead; therefore we need to sample more than n_sentences
100
  original_languages = languages[languages["in_benchmark"]].sample(
101
+ n=n_sentences * 2, weights="speakers", replace=True, random_state=42
102
  )
103
  # sample languages to analyze with all models
104
  detailed_target_languages = languages[languages["in_benchmark"]].sample(
105
+ n=3, random_state=42
106
  )
107
 
108
 
 
177
  not in detailed_target_languages.language_code.values
178
  ):
179
  continue
180
+ # drop the target language from the original languages sample
181
+ _original_languages = original_languages[
182
+ original_languages.language_code != language.language_code
183
+ ].iloc[:n_sentences]
184
  original_sentences = [
185
  load_sentences(lang)[i]
186
+ for i, lang in enumerate(_original_languages.itertuples())
187
  ]
188
  print(model)
189
  predictions = [
results.json CHANGED
@@ -6,10 +6,10 @@
6
  "scores": [
7
  {
8
  "model": "anthropic/claude-3.5-sonnet",
9
- "bleu": 0.44035095716211076
10
  }
11
  ],
12
- "bleu": 0.44035095716211076
13
  },
14
  {
15
  "language_name": "Mandarin Chinese",
@@ -18,10 +18,10 @@
18
  "scores": [
19
  {
20
  "model": "anthropic/claude-3.5-sonnet",
21
- "bleu": 0.4863759231444335
22
  }
23
  ],
24
- "bleu": 0.4863759231444335
25
  },
26
  {
27
  "language_name": "Spanish",
@@ -30,10 +30,10 @@
30
  "scores": [
31
  {
32
  "model": "anthropic/claude-3.5-sonnet",
33
- "bleu": 0.31421313286521146
34
  }
35
  ],
36
- "bleu": 0.31421313286521146
37
  },
38
  {
39
  "language_name": "Hindi",
@@ -42,10 +42,10 @@
42
  "scores": [
43
  {
44
  "model": "anthropic/claude-3.5-sonnet",
45
- "bleu": 0.3904883925679598
46
  }
47
  ],
48
- "bleu": 0.3904883925679598
49
  },
50
  {
51
  "language_name": "Bengali",
@@ -54,10 +54,10 @@
54
  "scores": [
55
  {
56
  "model": "anthropic/claude-3.5-sonnet",
57
- "bleu": 0.3297736038355109
58
  }
59
  ],
60
- "bleu": 0.3297736038355109
61
  },
62
  {
63
  "language_name": "Portuguese",
@@ -78,10 +78,10 @@
78
  "scores": [
79
  {
80
  "model": "anthropic/claude-3.5-sonnet",
81
- "bleu": 0.4387903665095541
82
  }
83
  ],
84
- "bleu": 0.4387903665095541
85
  },
86
  {
87
  "language_name": "Indonesian",
@@ -114,10 +114,10 @@
114
  "scores": [
115
  {
116
  "model": "anthropic/claude-3.5-sonnet",
117
- "bleu": 0.3324495319019684
118
  }
119
  ],
120
- "bleu": 0.3324495319019684
121
  },
122
  {
123
  "language_name": "Eastern Punjabi",
@@ -126,10 +126,10 @@
126
  "scores": [
127
  {
128
  "model": "anthropic/claude-3.5-sonnet",
129
- "bleu": 0.3928664264809312
130
  }
131
  ],
132
- "bleu": 0.3928664264809312
133
  },
134
  {
135
  "language_name": "Standard German",
@@ -212,28 +212,12 @@
212
  "language_code": "mar",
213
  "speakers": 83100000.0,
214
  "scores": [
215
- {
216
- "model": "openai/gpt-4o",
217
- "bleu": 0.22522575757065674
218
- },
219
  {
220
  "model": "anthropic/claude-3.5-sonnet",
221
  "bleu": 0.2333407635240652
222
- },
223
- {
224
- "model": "meta-llama/llama-3.1-405b-instruct",
225
- "bleu": 0.10203588314680621
226
- },
227
- {
228
- "model": "mistralai/mistral-large",
229
- "bleu": 0.16554398005927695
230
- },
231
- {
232
- "model": "qwen/qwen-2.5-72b-instruct",
233
- "bleu": 0.0743622715348743
234
  }
235
  ],
236
- "bleu": 0.16010173116713589
237
  },
238
  {
239
  "language_name": "Swahili",
@@ -254,10 +238,10 @@
254
  "scores": [
255
  {
256
  "model": "anthropic/claude-3.5-sonnet",
257
- "bleu": 0.3391095199409747
258
  }
259
  ],
260
- "bleu": 0.3391095199409747
261
  },
262
  {
263
  "language_name": "Telugu",
@@ -285,10 +269,10 @@
285
  "scores": [
286
  {
287
  "model": "anthropic/claude-3.5-sonnet",
288
- "bleu": 0.3327815413417786
289
  }
290
  ],
291
- "bleu": 0.3327815413417786
292
  },
293
  {
294
  "language_name": "Vietnamese",
@@ -357,10 +341,10 @@
357
  "scores": [
358
  {
359
  "model": "anthropic/claude-3.5-sonnet",
360
- "bleu": 0.2760973768717978
361
  }
362
  ],
363
- "bleu": 0.2760973768717978
364
  },
365
  {
366
  "language_name": "Bhojpuri",
@@ -414,28 +398,12 @@
414
  "language_code": "apc",
415
  "speakers": 44000000.0,
416
  "scores": [
417
- {
418
- "model": "openai/gpt-4o",
419
- "bleu": 0.3018366486807633
420
- },
421
  {
422
  "model": "anthropic/claude-3.5-sonnet",
423
  "bleu": 0.21013619903144296
424
- },
425
- {
426
- "model": "meta-llama/llama-3.1-405b-instruct",
427
- "bleu": 0.1499005259127334
428
- },
429
- {
430
- "model": "mistralai/mistral-large",
431
- "bleu": 0.18169602550114305
432
- },
433
- {
434
- "model": "qwen/qwen-2.5-72b-instruct",
435
- "bleu": 0.02549625724666183
436
  }
437
  ],
438
- "bleu": 0.17381313127454892
439
  },
440
  {
441
  "language_name": "Hausa",
@@ -515,10 +483,10 @@
515
  "scores": [
516
  {
517
  "model": "anthropic/claude-3.5-sonnet",
518
- "bleu": 0.4072481008205514
519
  }
520
  ],
521
- "bleu": 0.4072481008205514
522
  },
523
  {
524
  "language_name": "Xiang Chinese",
@@ -653,10 +621,10 @@
653
  "scores": [
654
  {
655
  "model": "anthropic/claude-3.5-sonnet",
656
- "bleu": 0.1643090786763395
657
  }
658
  ],
659
- "bleu": 0.1643090786763395
660
  },
661
  {
662
  "language_name": "Northern Uzbek",
@@ -699,28 +667,12 @@
699
  "language_code": "apc",
700
  "speakers": 24600000.0,
701
  "scores": [
702
- {
703
- "model": "openai/gpt-4o",
704
- "bleu": 0.3018366486807633
705
- },
706
  {
707
  "model": "anthropic/claude-3.5-sonnet",
708
  "bleu": 0.21013619903144296
709
- },
710
- {
711
- "model": "meta-llama/llama-3.1-405b-instruct",
712
- "bleu": 0.1499005259127334
713
- },
714
- {
715
- "model": "mistralai/mistral-large",
716
- "bleu": 0.18169602550114305
717
- },
718
- {
719
- "model": "qwen/qwen-2.5-72b-instruct",
720
- "bleu": 0.02549625724666183
721
  }
722
  ],
723
- "bleu": 0.17381313127454892
724
  },
725
  {
726
  "language_name": "Romanian",
@@ -917,28 +869,12 @@
917
  "language_code": "zul",
918
  "speakers": 15700000.0,
919
  "scores": [
920
- {
921
- "model": "openai/gpt-4o",
922
- "bleu": 0.22516622902472544
923
- },
924
  {
925
  "model": "anthropic/claude-3.5-sonnet",
926
  "bleu": 0.20245371733247658
927
- },
928
- {
929
- "model": "meta-llama/llama-3.1-405b-instruct",
930
- "bleu": 0.0378198916279553
931
- },
932
- {
933
- "model": "mistralai/mistral-large",
934
- "bleu": 0.0515062321283951
935
- },
936
- {
937
- "model": "qwen/qwen-2.5-72b-instruct",
938
- "bleu": 0.01701042472693127
939
  }
940
  ],
941
- "bleu": 0.10679129896809673
942
  },
943
  {
944
  "language_name": "Mesopotamian Arabic",
@@ -1104,28 +1040,12 @@
1104
  "language_code": "kaz",
1105
  "speakers": 13161980.0,
1106
  "scores": [
1107
- {
1108
- "model": "openai/gpt-4o",
1109
- "bleu": 0.2865687127063326
1110
- },
1111
  {
1112
  "model": "anthropic/claude-3.5-sonnet",
1113
  "bleu": 0.25054345947985385
1114
- },
1115
- {
1116
- "model": "meta-llama/llama-3.1-405b-instruct",
1117
- "bleu": 0.181288673149438
1118
- },
1119
- {
1120
- "model": "mistralai/mistral-large",
1121
- "bleu": 0.188276270677333
1122
- },
1123
- {
1124
- "model": "qwen/qwen-2.5-72b-instruct",
1125
- "bleu": 0.054576267347929
1126
  }
1127
  ],
1128
- "bleu": 0.19225067667217727
1129
  },
1130
  {
1131
  "language_name": "Chittagonian",
@@ -1139,28 +1059,12 @@
1139
  "language_code": "tso",
1140
  "speakers": 13000000.0,
1141
  "scores": [
1142
- {
1143
- "model": "openai/gpt-4o",
1144
- "bleu": 0.17202786716122928
1145
- },
1146
  {
1147
  "model": "anthropic/claude-3.5-sonnet",
1148
  "bleu": 0.1952038863089787
1149
- },
1150
- {
1151
- "model": "meta-llama/llama-3.1-405b-instruct",
1152
- "bleu": 0.0866195039237461
1153
- },
1154
- {
1155
- "model": "mistralai/mistral-large",
1156
- "bleu": 0.06062634407793842
1157
- },
1158
- {
1159
- "model": "qwen/qwen-2.5-72b-instruct",
1160
- "bleu": 0.014720823522746878
1161
  }
1162
  ],
1163
- "bleu": 0.10583968499892786
1164
  },
1165
  {
1166
  "language_name": "Hungarian",
@@ -1179,56 +1083,24 @@
1179
  "language_code": "kin",
1180
  "speakers": 12100000.0,
1181
  "scores": [
1182
- {
1183
- "model": "openai/gpt-4o",
1184
- "bleu": 0.22728268497187282
1185
- },
1186
  {
1187
  "model": "anthropic/claude-3.5-sonnet",
1188
  "bleu": 0.21847668603031067
1189
- },
1190
- {
1191
- "model": "meta-llama/llama-3.1-405b-instruct",
1192
- "bleu": 0.10074185841058973
1193
- },
1194
- {
1195
- "model": "mistralai/mistral-large",
1196
- "bleu": 0.059392901501730454
1197
- },
1198
- {
1199
- "model": "qwen/qwen-2.5-72b-instruct",
1200
- "bleu": 0.0074286483802763524
1201
  }
1202
  ],
1203
- "bleu": 0.12266455585895601
1204
  },
1205
  {
1206
  "language_name": "Chichewa",
1207
  "language_code": "nya",
1208
  "speakers": 12000000.0,
1209
  "scores": [
1210
- {
1211
- "model": "openai/gpt-4o",
1212
- "bleu": 0.1895725455230325
1213
- },
1214
  {
1215
  "model": "anthropic/claude-3.5-sonnet",
1216
  "bleu": 0.17401113784791736
1217
- },
1218
- {
1219
- "model": "meta-llama/llama-3.1-405b-instruct",
1220
- "bleu": 0.09658218667531897
1221
- },
1222
- {
1223
- "model": "mistralai/mistral-large",
1224
- "bleu": 0.04964676481672043
1225
- },
1226
- {
1227
- "model": "qwen/qwen-2.5-72b-instruct",
1228
- "bleu": 0.02040785086927957
1229
  }
1230
  ],
1231
- "bleu": 0.10604409714645376
1232
  },
1233
  {
1234
  "language_name": "Tunisian Arabic",
@@ -1394,28 +1266,12 @@
1394
  "language_code": "hat",
1395
  "speakers": 9600000.0,
1396
  "scores": [
1397
- {
1398
- "model": "openai/gpt-4o",
1399
- "bleu": 0.288129435659592
1400
- },
1401
  {
1402
  "model": "anthropic/claude-3.5-sonnet",
1403
- "bleu": 0.3053888119917926
1404
- },
1405
- {
1406
- "model": "meta-llama/llama-3.1-405b-instruct",
1407
- "bleu": 0.26523811773703404
1408
- },
1409
- {
1410
- "model": "mistralai/mistral-large",
1411
- "bleu": 0.20181127876225555
1412
- },
1413
- {
1414
- "model": "qwen/qwen-2.5-72b-instruct",
1415
- "bleu": 0.10821493612429332
1416
  }
1417
  ],
1418
- "bleu": 0.23375651605499342
1419
  },
1420
  {
1421
  "language_name": "Dari",
@@ -1458,28 +1314,12 @@
1458
  "language_code": "azj",
1459
  "speakers": 9220610.0,
1460
  "scores": [
1461
- {
1462
- "model": "openai/gpt-4o",
1463
- "bleu": 0.20542741434113335
1464
- },
1465
  {
1466
  "model": "anthropic/claude-3.5-sonnet",
1467
  "bleu": 0.24029548337141315
1468
- },
1469
- {
1470
- "model": "meta-llama/llama-3.1-405b-instruct",
1471
- "bleu": 0.1985709579217795
1472
- },
1473
- {
1474
- "model": "mistralai/mistral-large",
1475
- "bleu": 0.16426254772371085
1476
- },
1477
- {
1478
- "model": "qwen/qwen-2.5-72b-instruct",
1479
- "bleu": 0.029108557467416214
1480
  }
1481
  ],
1482
- "bleu": 0.1675329921650906
1483
  },
1484
  {
1485
  "language_name": "Congo Swahili",
@@ -1697,28 +1537,12 @@
1697
  "language_code": "kas",
1698
  "speakers": 6900000.0,
1699
  "scores": [
1700
- {
1701
- "model": "openai/gpt-4o",
1702
- "bleu": 0.058464446367472944
1703
- },
1704
  {
1705
  "model": "anthropic/claude-3.5-sonnet",
1706
  "bleu": 0.06589195125918151
1707
- },
1708
- {
1709
- "model": "meta-llama/llama-3.1-405b-instruct",
1710
- "bleu": 0.04553335750357841
1711
- },
1712
- {
1713
- "model": "mistralai/mistral-large",
1714
- "bleu": 0.0441481215103373
1715
- },
1716
- {
1717
- "model": "qwen/qwen-2.5-72b-instruct",
1718
- "bleu": 0.038143033479956526
1719
  }
1720
  ],
1721
- "bleu": 0.05043618202410534
1722
  },
1723
  {
1724
  "language_name": "Armenian",
@@ -1739,26 +1563,26 @@
1739
  "scores": [
1740
  {
1741
  "model": "openai/gpt-4o",
1742
- "bleu": 0.028795542086220486
1743
  },
1744
  {
1745
  "model": "anthropic/claude-3.5-sonnet",
1746
- "bleu": 0.10774605838812318
1747
  },
1748
  {
1749
  "model": "meta-llama/llama-3.1-405b-instruct",
1750
- "bleu": 0.022737318691929916
1751
  },
1752
  {
1753
  "model": "mistralai/mistral-large",
1754
- "bleu": 0.06144543752598748
1755
  },
1756
  {
1757
  "model": "qwen/qwen-2.5-72b-instruct",
1758
- "bleu": 0.028275143039527356
1759
  }
1760
  ],
1761
- "bleu": 0.04979989994635768
1762
  },
1763
  {
1764
  "language_name": "Central Pashto",
@@ -2133,28 +1957,12 @@
2133
  "language_code": "lit",
2134
  "speakers": 4000000.0,
2135
  "scores": [
2136
- {
2137
- "model": "openai/gpt-4o",
2138
- "bleu": 0.2826991552063554
2139
- },
2140
  {
2141
  "model": "anthropic/claude-3.5-sonnet",
2142
  "bleu": 0.30370845804188434
2143
- },
2144
- {
2145
- "model": "meta-llama/llama-3.1-405b-instruct",
2146
- "bleu": 0.2787991559825463
2147
- },
2148
- {
2149
- "model": "mistralai/mistral-large",
2150
- "bleu": 0.22260546031191955
2151
- },
2152
- {
2153
- "model": "qwen/qwen-2.5-72b-instruct",
2154
- "bleu": 0.13004923167033042
2155
  }
2156
  ],
2157
- "bleu": 0.2435722922426072
2158
  },
2159
  {
2160
  "language_name": "Tok Pisin",
@@ -2292,28 +2100,12 @@
2292
  "language_code": "ace",
2293
  "speakers": 3500032.0,
2294
  "scores": [
2295
- {
2296
- "model": "openai/gpt-4o",
2297
- "bleu": 0.025447626712218067
2298
- },
2299
  {
2300
  "model": "anthropic/claude-3.5-sonnet",
2301
  "bleu": 0.06711853873605253
2302
- },
2303
- {
2304
- "model": "meta-llama/llama-3.1-405b-instruct",
2305
- "bleu": 0.002679704493921361
2306
- },
2307
- {
2308
- "model": "mistralai/mistral-large",
2309
- "bleu": 0.013442877254370728
2310
- },
2311
- {
2312
- "model": "qwen/qwen-2.5-72b-instruct",
2313
- "bleu": 0.005062086703614178
2314
  }
2315
  ],
2316
- "bleu": 0.022750166780035372
2317
  },
2318
  {
2319
  "language_name": "Banjar",
@@ -2405,28 +2197,12 @@
2405
  "language_code": "ewe",
2406
  "speakers": 3000000.0,
2407
  "scores": [
2408
- {
2409
- "model": "openai/gpt-4o",
2410
- "bleu": 0.01735238801571977
2411
- },
2412
  {
2413
  "model": "anthropic/claude-3.5-sonnet",
2414
  "bleu": 0.08106169448483001
2415
- },
2416
- {
2417
- "model": "meta-llama/llama-3.1-405b-instruct",
2418
- "bleu": 0.010779616104049154
2419
- },
2420
- {
2421
- "model": "mistralai/mistral-large",
2422
- "bleu": 0.037190408434750306
2423
- },
2424
- {
2425
- "model": "qwen/qwen-2.5-72b-instruct",
2426
- "bleu": 0.006988505107902836
2427
  }
2428
  ],
2429
- "bleu": 0.030674522429450412
2430
  },
2431
  {
2432
  "language_name": "Tosk Albanian",
@@ -3158,28 +2934,12 @@
3158
  "language_code": "srd",
3159
  "speakers": 1300000.0,
3160
  "scores": [
3161
- {
3162
- "model": "openai/gpt-4o",
3163
- "bleu": 0.039786729911513496
3164
- },
3165
  {
3166
  "model": "anthropic/claude-3.5-sonnet",
3167
  "bleu": 0.01817225070836904
3168
- },
3169
- {
3170
- "model": "meta-llama/llama-3.1-405b-instruct",
3171
- "bleu": 0.0
3172
- },
3173
- {
3174
- "model": "mistralai/mistral-large",
3175
- "bleu": 0.03789690570026145
3176
- },
3177
- {
3178
- "model": "qwen/qwen-2.5-72b-instruct",
3179
- "bleu": 0.028031956319831585
3180
  }
3181
  ],
3182
- "bleu": 0.024777568527995116
3183
  },
3184
  {
3185
  "language_name": "Emilian",
@@ -3285,28 +3045,12 @@
3285
  "language_code": "ekk",
3286
  "speakers": 1164770.0,
3287
  "scores": [
3288
- {
3289
- "model": "openai/gpt-4o",
3290
- "bleu": 0.32857986618400864
3291
- },
3292
  {
3293
  "model": "anthropic/claude-3.5-sonnet",
3294
  "bleu": 0.3107627601397992
3295
- },
3296
- {
3297
- "model": "meta-llama/llama-3.1-405b-instruct",
3298
- "bleu": 0.264677035091384
3299
- },
3300
- {
3301
- "model": "mistralai/mistral-large",
3302
- "bleu": 0.23351285522302628
3303
- },
3304
- {
3305
- "model": "qwen/qwen-2.5-72b-instruct",
3306
- "bleu": 0.1151818587731128
3307
  }
3308
  ],
3309
- "bleu": 0.2505428750822662
3310
  },
3311
  {
3312
  "language_name": "Muong",
@@ -3548,28 +3292,12 @@
3548
  "language_code": "quy",
3549
  "speakers": 918200.0,
3550
  "scores": [
3551
- {
3552
- "model": "openai/gpt-4o",
3553
- "bleu": 0.04264750796981389
3554
- },
3555
  {
3556
  "model": "anthropic/claude-3.5-sonnet",
3557
  "bleu": 0.07871425240461387
3558
- },
3559
- {
3560
- "model": "meta-llama/llama-3.1-405b-instruct",
3561
- "bleu": 0.016883187863719572
3562
- },
3563
- {
3564
- "model": "mistralai/mistral-large",
3565
- "bleu": 0.04439823175345493
3566
- },
3567
- {
3568
- "model": "qwen/qwen-2.5-72b-instruct",
3569
- "bleu": 0.013066506239359036
3570
  }
3571
  ],
3572
- "bleu": 0.039141937246192256
3573
  },
3574
  {
3575
  "language_name": "Nuer",
@@ -4145,28 +3873,12 @@
4145
  "language_code": "szl",
4146
  "speakers": 522000.0,
4147
  "scores": [
4148
- {
4149
- "model": "openai/gpt-4o",
4150
- "bleu": 0.1560774913216495
4151
- },
4152
  {
4153
  "model": "anthropic/claude-3.5-sonnet",
4154
  "bleu": 0.17422519619712065
4155
- },
4156
- {
4157
- "model": "meta-llama/llama-3.1-405b-instruct",
4158
- "bleu": 0.12400774609673239
4159
- },
4160
- {
4161
- "model": "mistralai/mistral-large",
4162
- "bleu": 0.09674030113339326
4163
- },
4164
- {
4165
- "model": "qwen/qwen-2.5-72b-instruct",
4166
- "bleu": 0.096266902845627
4167
  }
4168
  ],
4169
- "bleu": 0.12946352751890455
4170
  },
4171
  {
4172
  "language_name": "Flaaitaal",
@@ -4408,28 +4120,12 @@
4408
  "language_code": "smo",
4409
  "speakers": 415720.0,
4410
  "scores": [
4411
- {
4412
- "model": "openai/gpt-4o",
4413
- "bleu": 0.21326629811705108
4414
- },
4415
  {
4416
  "model": "anthropic/claude-3.5-sonnet",
4417
  "bleu": 0.2024917287660747
4418
- },
4419
- {
4420
- "model": "meta-llama/llama-3.1-405b-instruct",
4421
- "bleu": 0.16385599822945093
4422
- },
4423
- {
4424
- "model": "mistralai/mistral-large",
4425
- "bleu": 0.08705749106630675
4426
- },
4427
- {
4428
- "model": "qwen/qwen-2.5-72b-instruct",
4429
- "bleu": 0.021765197503505862
4430
  }
4431
  ],
4432
- "bleu": 0.1376873427364779
4433
  },
4434
  {
4435
  "language_name": "Sranan Tongo",
@@ -4682,28 +4378,12 @@
4682
  "language_code": "pap",
4683
  "speakers": 321300.0,
4684
  "scores": [
4685
- {
4686
- "model": "openai/gpt-4o",
4687
- "bleu": 0.22785468046191032
4688
- },
4689
  {
4690
  "model": "anthropic/claude-3.5-sonnet",
4691
  "bleu": 0.28092666579128994
4692
- },
4693
- {
4694
- "model": "meta-llama/llama-3.1-405b-instruct",
4695
- "bleu": 0.22002899232464
4696
- },
4697
- {
4698
- "model": "mistralai/mistral-large",
4699
- "bleu": 0.11261357394303675
4700
- },
4701
- {
4702
- "model": "qwen/qwen-2.5-72b-instruct",
4703
- "bleu": 0.06527147003401967
4704
  }
4705
  ],
4706
- "bleu": 0.18133907651097933
4707
  },
4708
  {
4709
  "language_name": "Kiembu",
@@ -5196,28 +4876,12 @@
5196
  "language_code": "ltg",
5197
  "speakers": 200000.0,
5198
  "scores": [
5199
- {
5200
- "model": "openai/gpt-4o",
5201
- "bleu": 0.06220005456851305
5202
- },
5203
  {
5204
  "model": "anthropic/claude-3.5-sonnet",
5205
  "bleu": 0.16898752975227693
5206
- },
5207
- {
5208
- "model": "meta-llama/llama-3.1-405b-instruct",
5209
- "bleu": 0.07373773186072476
5210
- },
5211
- {
5212
- "model": "mistralai/mistral-large",
5213
- "bleu": 0.04077981475688863
5214
- },
5215
- {
5216
- "model": "qwen/qwen-2.5-72b-instruct",
5217
- "bleu": 0.02988351640091225
5218
  }
5219
  ],
5220
- "bleu": 0.07511772946786313
5221
  },
5222
  {
5223
  "language_name": "Dongxiang",
@@ -13903,28 +13567,12 @@
13903
  "language_code": "arb",
13904
  "speakers": 0.0,
13905
  "scores": [
13906
- {
13907
- "model": "openai/gpt-4o",
13908
- "bleu": 0.09526855781979184
13909
- },
13910
  {
13911
  "model": "anthropic/claude-3.5-sonnet",
13912
  "bleu": 0.1160671201312185
13913
- },
13914
- {
13915
- "model": "meta-llama/llama-3.1-405b-instruct",
13916
- "bleu": 0.04096256526230796
13917
- },
13918
- {
13919
- "model": "mistralai/mistral-large",
13920
- "bleu": 0.07433538408053277
13921
- },
13922
- {
13923
- "model": "qwen/qwen-2.5-72b-instruct",
13924
- "bleu": 0.012873795681693922
13925
  }
13926
  ],
13927
- "bleu": 0.067901484595109
13928
  },
13929
  {
13930
  "language_name": "ocm",
@@ -22297,28 +21945,12 @@
22297
  "language_code": "dik",
22298
  "speakers": 0,
22299
  "scores": [
22300
- {
22301
- "model": "openai/gpt-4o",
22302
- "bleu": 0.0071179726075512725
22303
- },
22304
  {
22305
  "model": "anthropic/claude-3.5-sonnet",
22306
  "bleu": 0.04004591158378547
22307
- },
22308
- {
22309
- "model": "meta-llama/llama-3.1-405b-instruct",
22310
- "bleu": 0.012071851713508174
22311
- },
22312
- {
22313
- "model": "mistralai/mistral-large",
22314
- "bleu": 0.021620964225193613
22315
- },
22316
- {
22317
- "model": "qwen/qwen-2.5-72b-instruct",
22318
- "bleu": 0.009725228948559986
22319
  }
22320
  ],
22321
- "bleu": 0.018116385815719706
22322
  },
22323
  {
22324
  "language_name": "Dilling",
 
6
  "scores": [
7
  {
8
  "model": "anthropic/claude-3.5-sonnet",
9
+ "bleu": 0.438607997913414
10
  }
11
  ],
12
+ "bleu": 0.438607997913414
13
  },
14
  {
15
  "language_name": "Mandarin Chinese",
 
18
  "scores": [
19
  {
20
  "model": "anthropic/claude-3.5-sonnet",
21
+ "bleu": 0.28859709196576455
22
  }
23
  ],
24
+ "bleu": 0.28859709196576455
25
  },
26
  {
27
  "language_name": "Spanish",
 
30
  "scores": [
31
  {
32
  "model": "anthropic/claude-3.5-sonnet",
33
+ "bleu": 0.31529908667129014
34
  }
35
  ],
36
+ "bleu": 0.31529908667129014
37
  },
38
  {
39
  "language_name": "Hindi",
 
42
  "scores": [
43
  {
44
  "model": "anthropic/claude-3.5-sonnet",
45
+ "bleu": 0.3063790221508274
46
  }
47
  ],
48
+ "bleu": 0.3063790221508274
49
  },
50
  {
51
  "language_name": "Bengali",
 
54
  "scores": [
55
  {
56
  "model": "anthropic/claude-3.5-sonnet",
57
+ "bleu": 0.2932161392776923
58
  }
59
  ],
60
+ "bleu": 0.2932161392776923
61
  },
62
  {
63
  "language_name": "Portuguese",
 
78
  "scores": [
79
  {
80
  "model": "anthropic/claude-3.5-sonnet",
81
+ "bleu": 0.4225557942193369
82
  }
83
  ],
84
+ "bleu": 0.4225557942193369
85
  },
86
  {
87
  "language_name": "Indonesian",
 
114
  "scores": [
115
  {
116
  "model": "anthropic/claude-3.5-sonnet",
117
+ "bleu": 0.3224776412158195
118
  }
119
  ],
120
+ "bleu": 0.3224776412158195
121
  },
122
  {
123
  "language_name": "Eastern Punjabi",
 
126
  "scores": [
127
  {
128
  "model": "anthropic/claude-3.5-sonnet",
129
+ "bleu": 0.3441710075977166
130
  }
131
  ],
132
+ "bleu": 0.3441710075977166
133
  },
134
  {
135
  "language_name": "Standard German",
 
212
  "language_code": "mar",
213
  "speakers": 83100000.0,
214
  "scores": [
 
 
 
 
215
  {
216
  "model": "anthropic/claude-3.5-sonnet",
217
  "bleu": 0.2333407635240652
 
 
 
 
 
 
 
 
 
 
 
 
218
  }
219
  ],
220
+ "bleu": 0.2333407635240652
221
  },
222
  {
223
  "language_name": "Swahili",
 
238
  "scores": [
239
  {
240
  "model": "anthropic/claude-3.5-sonnet",
241
+ "bleu": 0.31845792720586547
242
  }
243
  ],
244
+ "bleu": 0.31845792720586547
245
  },
246
  {
247
  "language_name": "Telugu",
 
269
  "scores": [
270
  {
271
  "model": "anthropic/claude-3.5-sonnet",
272
+ "bleu": 0.28861844740070713
273
  }
274
  ],
275
+ "bleu": 0.28861844740070713
276
  },
277
  {
278
  "language_name": "Vietnamese",
 
341
  "scores": [
342
  {
343
  "model": "anthropic/claude-3.5-sonnet",
344
+ "bleu": 0.27067019149599314
345
  }
346
  ],
347
+ "bleu": 0.27067019149599314
348
  },
349
  {
350
  "language_name": "Bhojpuri",
 
398
  "language_code": "apc",
399
  "speakers": 44000000.0,
400
  "scores": [
 
 
 
 
401
  {
402
  "model": "anthropic/claude-3.5-sonnet",
403
  "bleu": 0.21013619903144296
 
 
 
 
 
 
 
 
 
 
 
 
404
  }
405
  ],
406
+ "bleu": 0.21013619903144296
407
  },
408
  {
409
  "language_name": "Hausa",
 
483
  "scores": [
484
  {
485
  "model": "anthropic/claude-3.5-sonnet",
486
+ "bleu": 0.4055817497511186
487
  }
488
  ],
489
+ "bleu": 0.4055817497511186
490
  },
491
  {
492
  "language_name": "Xiang Chinese",
 
621
  "scores": [
622
  {
623
  "model": "anthropic/claude-3.5-sonnet",
624
+ "bleu": 0.17452025039334695
625
  }
626
  ],
627
+ "bleu": 0.17452025039334695
628
  },
629
  {
630
  "language_name": "Northern Uzbek",
 
667
  "language_code": "apc",
668
  "speakers": 24600000.0,
669
  "scores": [
 
 
 
 
670
  {
671
  "model": "anthropic/claude-3.5-sonnet",
672
  "bleu": 0.21013619903144296
 
 
 
 
 
 
 
 
 
 
 
 
673
  }
674
  ],
675
+ "bleu": 0.21013619903144296
676
  },
677
  {
678
  "language_name": "Romanian",
 
869
  "language_code": "zul",
870
  "speakers": 15700000.0,
871
  "scores": [
 
 
 
 
872
  {
873
  "model": "anthropic/claude-3.5-sonnet",
874
  "bleu": 0.20245371733247658
 
 
 
 
 
 
 
 
 
 
 
 
875
  }
876
  ],
877
+ "bleu": 0.20245371733247658
878
  },
879
  {
880
  "language_name": "Mesopotamian Arabic",
 
1040
  "language_code": "kaz",
1041
  "speakers": 13161980.0,
1042
  "scores": [
 
 
 
 
1043
  {
1044
  "model": "anthropic/claude-3.5-sonnet",
1045
  "bleu": 0.25054345947985385
 
 
 
 
 
 
 
 
 
 
 
 
1046
  }
1047
  ],
1048
+ "bleu": 0.25054345947985385
1049
  },
1050
  {
1051
  "language_name": "Chittagonian",
 
1059
  "language_code": "tso",
1060
  "speakers": 13000000.0,
1061
  "scores": [
 
 
 
 
1062
  {
1063
  "model": "anthropic/claude-3.5-sonnet",
1064
  "bleu": 0.1952038863089787
 
 
 
 
 
 
 
 
 
 
 
 
1065
  }
1066
  ],
1067
+ "bleu": 0.1952038863089787
1068
  },
1069
  {
1070
  "language_name": "Hungarian",
 
1083
  "language_code": "kin",
1084
  "speakers": 12100000.0,
1085
  "scores": [
 
 
 
 
1086
  {
1087
  "model": "anthropic/claude-3.5-sonnet",
1088
  "bleu": 0.21847668603031067
 
 
 
 
 
 
 
 
 
 
 
 
1089
  }
1090
  ],
1091
+ "bleu": 0.21847668603031067
1092
  },
1093
  {
1094
  "language_name": "Chichewa",
1095
  "language_code": "nya",
1096
  "speakers": 12000000.0,
1097
  "scores": [
 
 
 
 
1098
  {
1099
  "model": "anthropic/claude-3.5-sonnet",
1100
  "bleu": 0.17401113784791736
 
 
 
 
 
 
 
 
 
 
 
 
1101
  }
1102
  ],
1103
+ "bleu": 0.17401113784791736
1104
  },
1105
  {
1106
  "language_name": "Tunisian Arabic",
 
1266
  "language_code": "hat",
1267
  "speakers": 9600000.0,
1268
  "scores": [
 
 
 
 
1269
  {
1270
  "model": "anthropic/claude-3.5-sonnet",
1271
+ "bleu": 0.2780257097562799
 
 
 
 
 
 
 
 
 
 
 
 
1272
  }
1273
  ],
1274
+ "bleu": 0.2780257097562799
1275
  },
1276
  {
1277
  "language_name": "Dari",
 
1314
  "language_code": "azj",
1315
  "speakers": 9220610.0,
1316
  "scores": [
 
 
 
 
1317
  {
1318
  "model": "anthropic/claude-3.5-sonnet",
1319
  "bleu": 0.24029548337141315
 
 
 
 
 
 
 
 
 
 
 
 
1320
  }
1321
  ],
1322
+ "bleu": 0.24029548337141315
1323
  },
1324
  {
1325
  "language_name": "Congo Swahili",
 
1537
  "language_code": "kas",
1538
  "speakers": 6900000.0,
1539
  "scores": [
 
 
 
 
1540
  {
1541
  "model": "anthropic/claude-3.5-sonnet",
1542
  "bleu": 0.06589195125918151
 
 
 
 
 
 
 
 
 
 
 
 
1543
  }
1544
  ],
1545
+ "bleu": 0.06589195125918151
1546
  },
1547
  {
1548
  "language_name": "Armenian",
 
1563
  "scores": [
1564
  {
1565
  "model": "openai/gpt-4o",
1566
+ "bleu": 0.0315377397347853
1567
  },
1568
  {
1569
  "model": "anthropic/claude-3.5-sonnet",
1570
+ "bleu": 0.09333641225693347
1571
  },
1572
  {
1573
  "model": "meta-llama/llama-3.1-405b-instruct",
1574
+ "bleu": 0.030121023774013433
1575
  },
1576
  {
1577
  "model": "mistralai/mistral-large",
1578
+ "bleu": 0.04230836190600749
1579
  },
1580
  {
1581
  "model": "qwen/qwen-2.5-72b-instruct",
1582
+ "bleu": 0.027727350391206936
1583
  }
1584
  ],
1585
+ "bleu": 0.04500617761258932
1586
  },
1587
  {
1588
  "language_name": "Central Pashto",
 
1957
  "language_code": "lit",
1958
  "speakers": 4000000.0,
1959
  "scores": [
 
 
 
 
1960
  {
1961
  "model": "anthropic/claude-3.5-sonnet",
1962
  "bleu": 0.30370845804188434
 
 
 
 
 
 
 
 
 
 
 
 
1963
  }
1964
  ],
1965
+ "bleu": 0.30370845804188434
1966
  },
1967
  {
1968
  "language_name": "Tok Pisin",
 
2100
  "language_code": "ace",
2101
  "speakers": 3500032.0,
2102
  "scores": [
 
 
 
 
2103
  {
2104
  "model": "anthropic/claude-3.5-sonnet",
2105
  "bleu": 0.06711853873605253
 
 
 
 
 
 
 
 
 
 
 
 
2106
  }
2107
  ],
2108
+ "bleu": 0.06711853873605253
2109
  },
2110
  {
2111
  "language_name": "Banjar",
 
2197
  "language_code": "ewe",
2198
  "speakers": 3000000.0,
2199
  "scores": [
 
 
 
 
2200
  {
2201
  "model": "anthropic/claude-3.5-sonnet",
2202
  "bleu": 0.08106169448483001
 
 
 
 
 
 
 
 
 
 
 
 
2203
  }
2204
  ],
2205
+ "bleu": 0.08106169448483001
2206
  },
2207
  {
2208
  "language_name": "Tosk Albanian",
 
2934
  "language_code": "srd",
2935
  "speakers": 1300000.0,
2936
  "scores": [
 
 
 
 
2937
  {
2938
  "model": "anthropic/claude-3.5-sonnet",
2939
  "bleu": 0.01817225070836904
 
 
 
 
 
 
 
 
 
 
 
 
2940
  }
2941
  ],
2942
+ "bleu": 0.01817225070836904
2943
  },
2944
  {
2945
  "language_name": "Emilian",
 
3045
  "language_code": "ekk",
3046
  "speakers": 1164770.0,
3047
  "scores": [
 
 
 
 
3048
  {
3049
  "model": "anthropic/claude-3.5-sonnet",
3050
  "bleu": 0.3107627601397992
 
 
 
 
 
 
 
 
 
 
 
 
3051
  }
3052
  ],
3053
+ "bleu": 0.3107627601397992
3054
  },
3055
  {
3056
  "language_name": "Muong",
 
3292
  "language_code": "quy",
3293
  "speakers": 918200.0,
3294
  "scores": [
 
 
 
 
3295
  {
3296
  "model": "anthropic/claude-3.5-sonnet",
3297
  "bleu": 0.07871425240461387
 
 
 
 
 
 
 
 
 
 
 
 
3298
  }
3299
  ],
3300
+ "bleu": 0.07871425240461387
3301
  },
3302
  {
3303
  "language_name": "Nuer",
 
3873
  "language_code": "szl",
3874
  "speakers": 522000.0,
3875
  "scores": [
 
 
 
 
3876
  {
3877
  "model": "anthropic/claude-3.5-sonnet",
3878
  "bleu": 0.17422519619712065
 
 
 
 
 
 
 
 
 
 
 
 
3879
  }
3880
  ],
3881
+ "bleu": 0.17422519619712065
3882
  },
3883
  {
3884
  "language_name": "Flaaitaal",
 
4120
  "language_code": "smo",
4121
  "speakers": 415720.0,
4122
  "scores": [
 
 
 
 
4123
  {
4124
  "model": "anthropic/claude-3.5-sonnet",
4125
  "bleu": 0.2024917287660747
 
 
 
 
 
 
 
 
 
 
 
 
4126
  }
4127
  ],
4128
+ "bleu": 0.2024917287660747
4129
  },
4130
  {
4131
  "language_name": "Sranan Tongo",
 
4378
  "language_code": "pap",
4379
  "speakers": 321300.0,
4380
  "scores": [
 
 
 
 
4381
  {
4382
  "model": "anthropic/claude-3.5-sonnet",
4383
  "bleu": 0.28092666579128994
 
 
 
 
 
 
 
 
 
 
 
 
4384
  }
4385
  ],
4386
+ "bleu": 0.28092666579128994
4387
  },
4388
  {
4389
  "language_name": "Kiembu",
 
4876
  "language_code": "ltg",
4877
  "speakers": 200000.0,
4878
  "scores": [
 
 
 
 
4879
  {
4880
  "model": "anthropic/claude-3.5-sonnet",
4881
  "bleu": 0.16898752975227693
 
 
 
 
 
 
 
 
 
 
 
 
4882
  }
4883
  ],
4884
+ "bleu": 0.16898752975227693
4885
  },
4886
  {
4887
  "language_name": "Dongxiang",
 
13567
  "language_code": "arb",
13568
  "speakers": 0.0,
13569
  "scores": [
 
 
 
 
13570
  {
13571
  "model": "anthropic/claude-3.5-sonnet",
13572
  "bleu": 0.1160671201312185
 
 
 
 
 
 
 
 
 
 
 
 
13573
  }
13574
  ],
13575
+ "bleu": 0.1160671201312185
13576
  },
13577
  {
13578
  "language_name": "ocm",
 
21945
  "language_code": "dik",
21946
  "speakers": 0,
21947
  "scores": [
 
 
 
 
21948
  {
21949
  "model": "anthropic/claude-3.5-sonnet",
21950
  "bleu": 0.04004591158378547
 
 
 
 
 
 
 
 
 
 
 
 
21951
  }
21952
  ],
21953
+ "bleu": 0.04004591158378547
21954
  },
21955
  {
21956
  "language_name": "Dilling",