David Pomerenke
commited on
Commit
·
07dcc45
1
Parent(s):
3a698ab
Don't translate a langauge to itself
Browse filesThis makes the language comparison fairer
- languagebench.py +8 -3
- results.json +56 -424
languagebench.py
CHANGED
@@ -96,12 +96,13 @@ languages["in_benchmark"] = languages["in_benchmark"].fillna(False)
|
|
96 |
languages = languages.sort_values(by="speakers", ascending=False)
|
97 |
|
98 |
# sample languages to translate from
|
|
|
99 |
original_languages = languages[languages["in_benchmark"]].sample(
|
100 |
-
n=n_sentences, weights="speakers", replace=True, random_state=42
|
101 |
)
|
102 |
# sample languages to analyze with all models
|
103 |
detailed_target_languages = languages[languages["in_benchmark"]].sample(
|
104 |
-
n=
|
105 |
)
|
106 |
|
107 |
|
@@ -176,9 +177,13 @@ async def main():
|
|
176 |
not in detailed_target_languages.language_code.values
|
177 |
):
|
178 |
continue
|
|
|
|
|
|
|
|
|
179 |
original_sentences = [
|
180 |
load_sentences(lang)[i]
|
181 |
-
for i, lang in enumerate(
|
182 |
]
|
183 |
print(model)
|
184 |
predictions = [
|
|
|
96 |
languages = languages.sort_values(by="speakers", ascending=False)
|
97 |
|
98 |
# sample languages to translate from
|
99 |
+
# when translating e.g. to Mandarin, we drop Mandarin from the sample and use the next samples from the list instead; therefore we need to sample more than n_sentences
|
100 |
original_languages = languages[languages["in_benchmark"]].sample(
|
101 |
+
n=n_sentences * 2, weights="speakers", replace=True, random_state=42
|
102 |
)
|
103 |
# sample languages to analyze with all models
|
104 |
detailed_target_languages = languages[languages["in_benchmark"]].sample(
|
105 |
+
n=3, random_state=42
|
106 |
)
|
107 |
|
108 |
|
|
|
177 |
not in detailed_target_languages.language_code.values
|
178 |
):
|
179 |
continue
|
180 |
+
# drop the target language from the original languages sample
|
181 |
+
_original_languages = original_languages[
|
182 |
+
original_languages.language_code != language.language_code
|
183 |
+
].iloc[:n_sentences]
|
184 |
original_sentences = [
|
185 |
load_sentences(lang)[i]
|
186 |
+
for i, lang in enumerate(_original_languages.itertuples())
|
187 |
]
|
188 |
print(model)
|
189 |
predictions = [
|
results.json
CHANGED
@@ -6,10 +6,10 @@
|
|
6 |
"scores": [
|
7 |
{
|
8 |
"model": "anthropic/claude-3.5-sonnet",
|
9 |
-
"bleu": 0.
|
10 |
}
|
11 |
],
|
12 |
-
"bleu": 0.
|
13 |
},
|
14 |
{
|
15 |
"language_name": "Mandarin Chinese",
|
@@ -18,10 +18,10 @@
|
|
18 |
"scores": [
|
19 |
{
|
20 |
"model": "anthropic/claude-3.5-sonnet",
|
21 |
-
"bleu": 0.
|
22 |
}
|
23 |
],
|
24 |
-
"bleu": 0.
|
25 |
},
|
26 |
{
|
27 |
"language_name": "Spanish",
|
@@ -30,10 +30,10 @@
|
|
30 |
"scores": [
|
31 |
{
|
32 |
"model": "anthropic/claude-3.5-sonnet",
|
33 |
-
"bleu": 0.
|
34 |
}
|
35 |
],
|
36 |
-
"bleu": 0.
|
37 |
},
|
38 |
{
|
39 |
"language_name": "Hindi",
|
@@ -42,10 +42,10 @@
|
|
42 |
"scores": [
|
43 |
{
|
44 |
"model": "anthropic/claude-3.5-sonnet",
|
45 |
-
"bleu": 0.
|
46 |
}
|
47 |
],
|
48 |
-
"bleu": 0.
|
49 |
},
|
50 |
{
|
51 |
"language_name": "Bengali",
|
@@ -54,10 +54,10 @@
|
|
54 |
"scores": [
|
55 |
{
|
56 |
"model": "anthropic/claude-3.5-sonnet",
|
57 |
-
"bleu": 0.
|
58 |
}
|
59 |
],
|
60 |
-
"bleu": 0.
|
61 |
},
|
62 |
{
|
63 |
"language_name": "Portuguese",
|
@@ -78,10 +78,10 @@
|
|
78 |
"scores": [
|
79 |
{
|
80 |
"model": "anthropic/claude-3.5-sonnet",
|
81 |
-
"bleu": 0.
|
82 |
}
|
83 |
],
|
84 |
-
"bleu": 0.
|
85 |
},
|
86 |
{
|
87 |
"language_name": "Indonesian",
|
@@ -114,10 +114,10 @@
|
|
114 |
"scores": [
|
115 |
{
|
116 |
"model": "anthropic/claude-3.5-sonnet",
|
117 |
-
"bleu": 0.
|
118 |
}
|
119 |
],
|
120 |
-
"bleu": 0.
|
121 |
},
|
122 |
{
|
123 |
"language_name": "Eastern Punjabi",
|
@@ -126,10 +126,10 @@
|
|
126 |
"scores": [
|
127 |
{
|
128 |
"model": "anthropic/claude-3.5-sonnet",
|
129 |
-
"bleu": 0.
|
130 |
}
|
131 |
],
|
132 |
-
"bleu": 0.
|
133 |
},
|
134 |
{
|
135 |
"language_name": "Standard German",
|
@@ -212,28 +212,12 @@
|
|
212 |
"language_code": "mar",
|
213 |
"speakers": 83100000.0,
|
214 |
"scores": [
|
215 |
-
{
|
216 |
-
"model": "openai/gpt-4o",
|
217 |
-
"bleu": 0.22522575757065674
|
218 |
-
},
|
219 |
{
|
220 |
"model": "anthropic/claude-3.5-sonnet",
|
221 |
"bleu": 0.2333407635240652
|
222 |
-
},
|
223 |
-
{
|
224 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
225 |
-
"bleu": 0.10203588314680621
|
226 |
-
},
|
227 |
-
{
|
228 |
-
"model": "mistralai/mistral-large",
|
229 |
-
"bleu": 0.16554398005927695
|
230 |
-
},
|
231 |
-
{
|
232 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
233 |
-
"bleu": 0.0743622715348743
|
234 |
}
|
235 |
],
|
236 |
-
"bleu": 0.
|
237 |
},
|
238 |
{
|
239 |
"language_name": "Swahili",
|
@@ -254,10 +238,10 @@
|
|
254 |
"scores": [
|
255 |
{
|
256 |
"model": "anthropic/claude-3.5-sonnet",
|
257 |
-
"bleu": 0.
|
258 |
}
|
259 |
],
|
260 |
-
"bleu": 0.
|
261 |
},
|
262 |
{
|
263 |
"language_name": "Telugu",
|
@@ -285,10 +269,10 @@
|
|
285 |
"scores": [
|
286 |
{
|
287 |
"model": "anthropic/claude-3.5-sonnet",
|
288 |
-
"bleu": 0.
|
289 |
}
|
290 |
],
|
291 |
-
"bleu": 0.
|
292 |
},
|
293 |
{
|
294 |
"language_name": "Vietnamese",
|
@@ -357,10 +341,10 @@
|
|
357 |
"scores": [
|
358 |
{
|
359 |
"model": "anthropic/claude-3.5-sonnet",
|
360 |
-
"bleu": 0.
|
361 |
}
|
362 |
],
|
363 |
-
"bleu": 0.
|
364 |
},
|
365 |
{
|
366 |
"language_name": "Bhojpuri",
|
@@ -414,28 +398,12 @@
|
|
414 |
"language_code": "apc",
|
415 |
"speakers": 44000000.0,
|
416 |
"scores": [
|
417 |
-
{
|
418 |
-
"model": "openai/gpt-4o",
|
419 |
-
"bleu": 0.3018366486807633
|
420 |
-
},
|
421 |
{
|
422 |
"model": "anthropic/claude-3.5-sonnet",
|
423 |
"bleu": 0.21013619903144296
|
424 |
-
},
|
425 |
-
{
|
426 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
427 |
-
"bleu": 0.1499005259127334
|
428 |
-
},
|
429 |
-
{
|
430 |
-
"model": "mistralai/mistral-large",
|
431 |
-
"bleu": 0.18169602550114305
|
432 |
-
},
|
433 |
-
{
|
434 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
435 |
-
"bleu": 0.02549625724666183
|
436 |
}
|
437 |
],
|
438 |
-
"bleu": 0.
|
439 |
},
|
440 |
{
|
441 |
"language_name": "Hausa",
|
@@ -515,10 +483,10 @@
|
|
515 |
"scores": [
|
516 |
{
|
517 |
"model": "anthropic/claude-3.5-sonnet",
|
518 |
-
"bleu": 0.
|
519 |
}
|
520 |
],
|
521 |
-
"bleu": 0.
|
522 |
},
|
523 |
{
|
524 |
"language_name": "Xiang Chinese",
|
@@ -653,10 +621,10 @@
|
|
653 |
"scores": [
|
654 |
{
|
655 |
"model": "anthropic/claude-3.5-sonnet",
|
656 |
-
"bleu": 0.
|
657 |
}
|
658 |
],
|
659 |
-
"bleu": 0.
|
660 |
},
|
661 |
{
|
662 |
"language_name": "Northern Uzbek",
|
@@ -699,28 +667,12 @@
|
|
699 |
"language_code": "apc",
|
700 |
"speakers": 24600000.0,
|
701 |
"scores": [
|
702 |
-
{
|
703 |
-
"model": "openai/gpt-4o",
|
704 |
-
"bleu": 0.3018366486807633
|
705 |
-
},
|
706 |
{
|
707 |
"model": "anthropic/claude-3.5-sonnet",
|
708 |
"bleu": 0.21013619903144296
|
709 |
-
},
|
710 |
-
{
|
711 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
712 |
-
"bleu": 0.1499005259127334
|
713 |
-
},
|
714 |
-
{
|
715 |
-
"model": "mistralai/mistral-large",
|
716 |
-
"bleu": 0.18169602550114305
|
717 |
-
},
|
718 |
-
{
|
719 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
720 |
-
"bleu": 0.02549625724666183
|
721 |
}
|
722 |
],
|
723 |
-
"bleu": 0.
|
724 |
},
|
725 |
{
|
726 |
"language_name": "Romanian",
|
@@ -917,28 +869,12 @@
|
|
917 |
"language_code": "zul",
|
918 |
"speakers": 15700000.0,
|
919 |
"scores": [
|
920 |
-
{
|
921 |
-
"model": "openai/gpt-4o",
|
922 |
-
"bleu": 0.22516622902472544
|
923 |
-
},
|
924 |
{
|
925 |
"model": "anthropic/claude-3.5-sonnet",
|
926 |
"bleu": 0.20245371733247658
|
927 |
-
},
|
928 |
-
{
|
929 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
930 |
-
"bleu": 0.0378198916279553
|
931 |
-
},
|
932 |
-
{
|
933 |
-
"model": "mistralai/mistral-large",
|
934 |
-
"bleu": 0.0515062321283951
|
935 |
-
},
|
936 |
-
{
|
937 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
938 |
-
"bleu": 0.01701042472693127
|
939 |
}
|
940 |
],
|
941 |
-
"bleu": 0.
|
942 |
},
|
943 |
{
|
944 |
"language_name": "Mesopotamian Arabic",
|
@@ -1104,28 +1040,12 @@
|
|
1104 |
"language_code": "kaz",
|
1105 |
"speakers": 13161980.0,
|
1106 |
"scores": [
|
1107 |
-
{
|
1108 |
-
"model": "openai/gpt-4o",
|
1109 |
-
"bleu": 0.2865687127063326
|
1110 |
-
},
|
1111 |
{
|
1112 |
"model": "anthropic/claude-3.5-sonnet",
|
1113 |
"bleu": 0.25054345947985385
|
1114 |
-
},
|
1115 |
-
{
|
1116 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
1117 |
-
"bleu": 0.181288673149438
|
1118 |
-
},
|
1119 |
-
{
|
1120 |
-
"model": "mistralai/mistral-large",
|
1121 |
-
"bleu": 0.188276270677333
|
1122 |
-
},
|
1123 |
-
{
|
1124 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
1125 |
-
"bleu": 0.054576267347929
|
1126 |
}
|
1127 |
],
|
1128 |
-
"bleu": 0.
|
1129 |
},
|
1130 |
{
|
1131 |
"language_name": "Chittagonian",
|
@@ -1139,28 +1059,12 @@
|
|
1139 |
"language_code": "tso",
|
1140 |
"speakers": 13000000.0,
|
1141 |
"scores": [
|
1142 |
-
{
|
1143 |
-
"model": "openai/gpt-4o",
|
1144 |
-
"bleu": 0.17202786716122928
|
1145 |
-
},
|
1146 |
{
|
1147 |
"model": "anthropic/claude-3.5-sonnet",
|
1148 |
"bleu": 0.1952038863089787
|
1149 |
-
},
|
1150 |
-
{
|
1151 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
1152 |
-
"bleu": 0.0866195039237461
|
1153 |
-
},
|
1154 |
-
{
|
1155 |
-
"model": "mistralai/mistral-large",
|
1156 |
-
"bleu": 0.06062634407793842
|
1157 |
-
},
|
1158 |
-
{
|
1159 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
1160 |
-
"bleu": 0.014720823522746878
|
1161 |
}
|
1162 |
],
|
1163 |
-
"bleu": 0.
|
1164 |
},
|
1165 |
{
|
1166 |
"language_name": "Hungarian",
|
@@ -1179,56 +1083,24 @@
|
|
1179 |
"language_code": "kin",
|
1180 |
"speakers": 12100000.0,
|
1181 |
"scores": [
|
1182 |
-
{
|
1183 |
-
"model": "openai/gpt-4o",
|
1184 |
-
"bleu": 0.22728268497187282
|
1185 |
-
},
|
1186 |
{
|
1187 |
"model": "anthropic/claude-3.5-sonnet",
|
1188 |
"bleu": 0.21847668603031067
|
1189 |
-
},
|
1190 |
-
{
|
1191 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
1192 |
-
"bleu": 0.10074185841058973
|
1193 |
-
},
|
1194 |
-
{
|
1195 |
-
"model": "mistralai/mistral-large",
|
1196 |
-
"bleu": 0.059392901501730454
|
1197 |
-
},
|
1198 |
-
{
|
1199 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
1200 |
-
"bleu": 0.0074286483802763524
|
1201 |
}
|
1202 |
],
|
1203 |
-
"bleu": 0.
|
1204 |
},
|
1205 |
{
|
1206 |
"language_name": "Chichewa",
|
1207 |
"language_code": "nya",
|
1208 |
"speakers": 12000000.0,
|
1209 |
"scores": [
|
1210 |
-
{
|
1211 |
-
"model": "openai/gpt-4o",
|
1212 |
-
"bleu": 0.1895725455230325
|
1213 |
-
},
|
1214 |
{
|
1215 |
"model": "anthropic/claude-3.5-sonnet",
|
1216 |
"bleu": 0.17401113784791736
|
1217 |
-
},
|
1218 |
-
{
|
1219 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
1220 |
-
"bleu": 0.09658218667531897
|
1221 |
-
},
|
1222 |
-
{
|
1223 |
-
"model": "mistralai/mistral-large",
|
1224 |
-
"bleu": 0.04964676481672043
|
1225 |
-
},
|
1226 |
-
{
|
1227 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
1228 |
-
"bleu": 0.02040785086927957
|
1229 |
}
|
1230 |
],
|
1231 |
-
"bleu": 0.
|
1232 |
},
|
1233 |
{
|
1234 |
"language_name": "Tunisian Arabic",
|
@@ -1394,28 +1266,12 @@
|
|
1394 |
"language_code": "hat",
|
1395 |
"speakers": 9600000.0,
|
1396 |
"scores": [
|
1397 |
-
{
|
1398 |
-
"model": "openai/gpt-4o",
|
1399 |
-
"bleu": 0.288129435659592
|
1400 |
-
},
|
1401 |
{
|
1402 |
"model": "anthropic/claude-3.5-sonnet",
|
1403 |
-
"bleu": 0.
|
1404 |
-
},
|
1405 |
-
{
|
1406 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
1407 |
-
"bleu": 0.26523811773703404
|
1408 |
-
},
|
1409 |
-
{
|
1410 |
-
"model": "mistralai/mistral-large",
|
1411 |
-
"bleu": 0.20181127876225555
|
1412 |
-
},
|
1413 |
-
{
|
1414 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
1415 |
-
"bleu": 0.10821493612429332
|
1416 |
}
|
1417 |
],
|
1418 |
-
"bleu": 0.
|
1419 |
},
|
1420 |
{
|
1421 |
"language_name": "Dari",
|
@@ -1458,28 +1314,12 @@
|
|
1458 |
"language_code": "azj",
|
1459 |
"speakers": 9220610.0,
|
1460 |
"scores": [
|
1461 |
-
{
|
1462 |
-
"model": "openai/gpt-4o",
|
1463 |
-
"bleu": 0.20542741434113335
|
1464 |
-
},
|
1465 |
{
|
1466 |
"model": "anthropic/claude-3.5-sonnet",
|
1467 |
"bleu": 0.24029548337141315
|
1468 |
-
},
|
1469 |
-
{
|
1470 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
1471 |
-
"bleu": 0.1985709579217795
|
1472 |
-
},
|
1473 |
-
{
|
1474 |
-
"model": "mistralai/mistral-large",
|
1475 |
-
"bleu": 0.16426254772371085
|
1476 |
-
},
|
1477 |
-
{
|
1478 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
1479 |
-
"bleu": 0.029108557467416214
|
1480 |
}
|
1481 |
],
|
1482 |
-
"bleu": 0.
|
1483 |
},
|
1484 |
{
|
1485 |
"language_name": "Congo Swahili",
|
@@ -1697,28 +1537,12 @@
|
|
1697 |
"language_code": "kas",
|
1698 |
"speakers": 6900000.0,
|
1699 |
"scores": [
|
1700 |
-
{
|
1701 |
-
"model": "openai/gpt-4o",
|
1702 |
-
"bleu": 0.058464446367472944
|
1703 |
-
},
|
1704 |
{
|
1705 |
"model": "anthropic/claude-3.5-sonnet",
|
1706 |
"bleu": 0.06589195125918151
|
1707 |
-
},
|
1708 |
-
{
|
1709 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
1710 |
-
"bleu": 0.04553335750357841
|
1711 |
-
},
|
1712 |
-
{
|
1713 |
-
"model": "mistralai/mistral-large",
|
1714 |
-
"bleu": 0.0441481215103373
|
1715 |
-
},
|
1716 |
-
{
|
1717 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
1718 |
-
"bleu": 0.038143033479956526
|
1719 |
}
|
1720 |
],
|
1721 |
-
"bleu": 0.
|
1722 |
},
|
1723 |
{
|
1724 |
"language_name": "Armenian",
|
@@ -1739,26 +1563,26 @@
|
|
1739 |
"scores": [
|
1740 |
{
|
1741 |
"model": "openai/gpt-4o",
|
1742 |
-
"bleu": 0.
|
1743 |
},
|
1744 |
{
|
1745 |
"model": "anthropic/claude-3.5-sonnet",
|
1746 |
-
"bleu": 0.
|
1747 |
},
|
1748 |
{
|
1749 |
"model": "meta-llama/llama-3.1-405b-instruct",
|
1750 |
-
"bleu": 0.
|
1751 |
},
|
1752 |
{
|
1753 |
"model": "mistralai/mistral-large",
|
1754 |
-
"bleu": 0.
|
1755 |
},
|
1756 |
{
|
1757 |
"model": "qwen/qwen-2.5-72b-instruct",
|
1758 |
-
"bleu": 0.
|
1759 |
}
|
1760 |
],
|
1761 |
-
"bleu": 0.
|
1762 |
},
|
1763 |
{
|
1764 |
"language_name": "Central Pashto",
|
@@ -2133,28 +1957,12 @@
|
|
2133 |
"language_code": "lit",
|
2134 |
"speakers": 4000000.0,
|
2135 |
"scores": [
|
2136 |
-
{
|
2137 |
-
"model": "openai/gpt-4o",
|
2138 |
-
"bleu": 0.2826991552063554
|
2139 |
-
},
|
2140 |
{
|
2141 |
"model": "anthropic/claude-3.5-sonnet",
|
2142 |
"bleu": 0.30370845804188434
|
2143 |
-
},
|
2144 |
-
{
|
2145 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
2146 |
-
"bleu": 0.2787991559825463
|
2147 |
-
},
|
2148 |
-
{
|
2149 |
-
"model": "mistralai/mistral-large",
|
2150 |
-
"bleu": 0.22260546031191955
|
2151 |
-
},
|
2152 |
-
{
|
2153 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
2154 |
-
"bleu": 0.13004923167033042
|
2155 |
}
|
2156 |
],
|
2157 |
-
"bleu": 0.
|
2158 |
},
|
2159 |
{
|
2160 |
"language_name": "Tok Pisin",
|
@@ -2292,28 +2100,12 @@
|
|
2292 |
"language_code": "ace",
|
2293 |
"speakers": 3500032.0,
|
2294 |
"scores": [
|
2295 |
-
{
|
2296 |
-
"model": "openai/gpt-4o",
|
2297 |
-
"bleu": 0.025447626712218067
|
2298 |
-
},
|
2299 |
{
|
2300 |
"model": "anthropic/claude-3.5-sonnet",
|
2301 |
"bleu": 0.06711853873605253
|
2302 |
-
},
|
2303 |
-
{
|
2304 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
2305 |
-
"bleu": 0.002679704493921361
|
2306 |
-
},
|
2307 |
-
{
|
2308 |
-
"model": "mistralai/mistral-large",
|
2309 |
-
"bleu": 0.013442877254370728
|
2310 |
-
},
|
2311 |
-
{
|
2312 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
2313 |
-
"bleu": 0.005062086703614178
|
2314 |
}
|
2315 |
],
|
2316 |
-
"bleu": 0.
|
2317 |
},
|
2318 |
{
|
2319 |
"language_name": "Banjar",
|
@@ -2405,28 +2197,12 @@
|
|
2405 |
"language_code": "ewe",
|
2406 |
"speakers": 3000000.0,
|
2407 |
"scores": [
|
2408 |
-
{
|
2409 |
-
"model": "openai/gpt-4o",
|
2410 |
-
"bleu": 0.01735238801571977
|
2411 |
-
},
|
2412 |
{
|
2413 |
"model": "anthropic/claude-3.5-sonnet",
|
2414 |
"bleu": 0.08106169448483001
|
2415 |
-
},
|
2416 |
-
{
|
2417 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
2418 |
-
"bleu": 0.010779616104049154
|
2419 |
-
},
|
2420 |
-
{
|
2421 |
-
"model": "mistralai/mistral-large",
|
2422 |
-
"bleu": 0.037190408434750306
|
2423 |
-
},
|
2424 |
-
{
|
2425 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
2426 |
-
"bleu": 0.006988505107902836
|
2427 |
}
|
2428 |
],
|
2429 |
-
"bleu": 0.
|
2430 |
},
|
2431 |
{
|
2432 |
"language_name": "Tosk Albanian",
|
@@ -3158,28 +2934,12 @@
|
|
3158 |
"language_code": "srd",
|
3159 |
"speakers": 1300000.0,
|
3160 |
"scores": [
|
3161 |
-
{
|
3162 |
-
"model": "openai/gpt-4o",
|
3163 |
-
"bleu": 0.039786729911513496
|
3164 |
-
},
|
3165 |
{
|
3166 |
"model": "anthropic/claude-3.5-sonnet",
|
3167 |
"bleu": 0.01817225070836904
|
3168 |
-
},
|
3169 |
-
{
|
3170 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
3171 |
-
"bleu": 0.0
|
3172 |
-
},
|
3173 |
-
{
|
3174 |
-
"model": "mistralai/mistral-large",
|
3175 |
-
"bleu": 0.03789690570026145
|
3176 |
-
},
|
3177 |
-
{
|
3178 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
3179 |
-
"bleu": 0.028031956319831585
|
3180 |
}
|
3181 |
],
|
3182 |
-
"bleu": 0.
|
3183 |
},
|
3184 |
{
|
3185 |
"language_name": "Emilian",
|
@@ -3285,28 +3045,12 @@
|
|
3285 |
"language_code": "ekk",
|
3286 |
"speakers": 1164770.0,
|
3287 |
"scores": [
|
3288 |
-
{
|
3289 |
-
"model": "openai/gpt-4o",
|
3290 |
-
"bleu": 0.32857986618400864
|
3291 |
-
},
|
3292 |
{
|
3293 |
"model": "anthropic/claude-3.5-sonnet",
|
3294 |
"bleu": 0.3107627601397992
|
3295 |
-
},
|
3296 |
-
{
|
3297 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
3298 |
-
"bleu": 0.264677035091384
|
3299 |
-
},
|
3300 |
-
{
|
3301 |
-
"model": "mistralai/mistral-large",
|
3302 |
-
"bleu": 0.23351285522302628
|
3303 |
-
},
|
3304 |
-
{
|
3305 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
3306 |
-
"bleu": 0.1151818587731128
|
3307 |
}
|
3308 |
],
|
3309 |
-
"bleu": 0.
|
3310 |
},
|
3311 |
{
|
3312 |
"language_name": "Muong",
|
@@ -3548,28 +3292,12 @@
|
|
3548 |
"language_code": "quy",
|
3549 |
"speakers": 918200.0,
|
3550 |
"scores": [
|
3551 |
-
{
|
3552 |
-
"model": "openai/gpt-4o",
|
3553 |
-
"bleu": 0.04264750796981389
|
3554 |
-
},
|
3555 |
{
|
3556 |
"model": "anthropic/claude-3.5-sonnet",
|
3557 |
"bleu": 0.07871425240461387
|
3558 |
-
},
|
3559 |
-
{
|
3560 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
3561 |
-
"bleu": 0.016883187863719572
|
3562 |
-
},
|
3563 |
-
{
|
3564 |
-
"model": "mistralai/mistral-large",
|
3565 |
-
"bleu": 0.04439823175345493
|
3566 |
-
},
|
3567 |
-
{
|
3568 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
3569 |
-
"bleu": 0.013066506239359036
|
3570 |
}
|
3571 |
],
|
3572 |
-
"bleu": 0.
|
3573 |
},
|
3574 |
{
|
3575 |
"language_name": "Nuer",
|
@@ -4145,28 +3873,12 @@
|
|
4145 |
"language_code": "szl",
|
4146 |
"speakers": 522000.0,
|
4147 |
"scores": [
|
4148 |
-
{
|
4149 |
-
"model": "openai/gpt-4o",
|
4150 |
-
"bleu": 0.1560774913216495
|
4151 |
-
},
|
4152 |
{
|
4153 |
"model": "anthropic/claude-3.5-sonnet",
|
4154 |
"bleu": 0.17422519619712065
|
4155 |
-
},
|
4156 |
-
{
|
4157 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
4158 |
-
"bleu": 0.12400774609673239
|
4159 |
-
},
|
4160 |
-
{
|
4161 |
-
"model": "mistralai/mistral-large",
|
4162 |
-
"bleu": 0.09674030113339326
|
4163 |
-
},
|
4164 |
-
{
|
4165 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
4166 |
-
"bleu": 0.096266902845627
|
4167 |
}
|
4168 |
],
|
4169 |
-
"bleu": 0.
|
4170 |
},
|
4171 |
{
|
4172 |
"language_name": "Flaaitaal",
|
@@ -4408,28 +4120,12 @@
|
|
4408 |
"language_code": "smo",
|
4409 |
"speakers": 415720.0,
|
4410 |
"scores": [
|
4411 |
-
{
|
4412 |
-
"model": "openai/gpt-4o",
|
4413 |
-
"bleu": 0.21326629811705108
|
4414 |
-
},
|
4415 |
{
|
4416 |
"model": "anthropic/claude-3.5-sonnet",
|
4417 |
"bleu": 0.2024917287660747
|
4418 |
-
},
|
4419 |
-
{
|
4420 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
4421 |
-
"bleu": 0.16385599822945093
|
4422 |
-
},
|
4423 |
-
{
|
4424 |
-
"model": "mistralai/mistral-large",
|
4425 |
-
"bleu": 0.08705749106630675
|
4426 |
-
},
|
4427 |
-
{
|
4428 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
4429 |
-
"bleu": 0.021765197503505862
|
4430 |
}
|
4431 |
],
|
4432 |
-
"bleu": 0.
|
4433 |
},
|
4434 |
{
|
4435 |
"language_name": "Sranan Tongo",
|
@@ -4682,28 +4378,12 @@
|
|
4682 |
"language_code": "pap",
|
4683 |
"speakers": 321300.0,
|
4684 |
"scores": [
|
4685 |
-
{
|
4686 |
-
"model": "openai/gpt-4o",
|
4687 |
-
"bleu": 0.22785468046191032
|
4688 |
-
},
|
4689 |
{
|
4690 |
"model": "anthropic/claude-3.5-sonnet",
|
4691 |
"bleu": 0.28092666579128994
|
4692 |
-
},
|
4693 |
-
{
|
4694 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
4695 |
-
"bleu": 0.22002899232464
|
4696 |
-
},
|
4697 |
-
{
|
4698 |
-
"model": "mistralai/mistral-large",
|
4699 |
-
"bleu": 0.11261357394303675
|
4700 |
-
},
|
4701 |
-
{
|
4702 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
4703 |
-
"bleu": 0.06527147003401967
|
4704 |
}
|
4705 |
],
|
4706 |
-
"bleu": 0.
|
4707 |
},
|
4708 |
{
|
4709 |
"language_name": "Kiembu",
|
@@ -5196,28 +4876,12 @@
|
|
5196 |
"language_code": "ltg",
|
5197 |
"speakers": 200000.0,
|
5198 |
"scores": [
|
5199 |
-
{
|
5200 |
-
"model": "openai/gpt-4o",
|
5201 |
-
"bleu": 0.06220005456851305
|
5202 |
-
},
|
5203 |
{
|
5204 |
"model": "anthropic/claude-3.5-sonnet",
|
5205 |
"bleu": 0.16898752975227693
|
5206 |
-
},
|
5207 |
-
{
|
5208 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
5209 |
-
"bleu": 0.07373773186072476
|
5210 |
-
},
|
5211 |
-
{
|
5212 |
-
"model": "mistralai/mistral-large",
|
5213 |
-
"bleu": 0.04077981475688863
|
5214 |
-
},
|
5215 |
-
{
|
5216 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
5217 |
-
"bleu": 0.02988351640091225
|
5218 |
}
|
5219 |
],
|
5220 |
-
"bleu": 0.
|
5221 |
},
|
5222 |
{
|
5223 |
"language_name": "Dongxiang",
|
@@ -13903,28 +13567,12 @@
|
|
13903 |
"language_code": "arb",
|
13904 |
"speakers": 0.0,
|
13905 |
"scores": [
|
13906 |
-
{
|
13907 |
-
"model": "openai/gpt-4o",
|
13908 |
-
"bleu": 0.09526855781979184
|
13909 |
-
},
|
13910 |
{
|
13911 |
"model": "anthropic/claude-3.5-sonnet",
|
13912 |
"bleu": 0.1160671201312185
|
13913 |
-
},
|
13914 |
-
{
|
13915 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
13916 |
-
"bleu": 0.04096256526230796
|
13917 |
-
},
|
13918 |
-
{
|
13919 |
-
"model": "mistralai/mistral-large",
|
13920 |
-
"bleu": 0.07433538408053277
|
13921 |
-
},
|
13922 |
-
{
|
13923 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
13924 |
-
"bleu": 0.012873795681693922
|
13925 |
}
|
13926 |
],
|
13927 |
-
"bleu": 0.
|
13928 |
},
|
13929 |
{
|
13930 |
"language_name": "ocm",
|
@@ -22297,28 +21945,12 @@
|
|
22297 |
"language_code": "dik",
|
22298 |
"speakers": 0,
|
22299 |
"scores": [
|
22300 |
-
{
|
22301 |
-
"model": "openai/gpt-4o",
|
22302 |
-
"bleu": 0.0071179726075512725
|
22303 |
-
},
|
22304 |
{
|
22305 |
"model": "anthropic/claude-3.5-sonnet",
|
22306 |
"bleu": 0.04004591158378547
|
22307 |
-
},
|
22308 |
-
{
|
22309 |
-
"model": "meta-llama/llama-3.1-405b-instruct",
|
22310 |
-
"bleu": 0.012071851713508174
|
22311 |
-
},
|
22312 |
-
{
|
22313 |
-
"model": "mistralai/mistral-large",
|
22314 |
-
"bleu": 0.021620964225193613
|
22315 |
-
},
|
22316 |
-
{
|
22317 |
-
"model": "qwen/qwen-2.5-72b-instruct",
|
22318 |
-
"bleu": 0.009725228948559986
|
22319 |
}
|
22320 |
],
|
22321 |
-
"bleu": 0.
|
22322 |
},
|
22323 |
{
|
22324 |
"language_name": "Dilling",
|
|
|
6 |
"scores": [
|
7 |
{
|
8 |
"model": "anthropic/claude-3.5-sonnet",
|
9 |
+
"bleu": 0.438607997913414
|
10 |
}
|
11 |
],
|
12 |
+
"bleu": 0.438607997913414
|
13 |
},
|
14 |
{
|
15 |
"language_name": "Mandarin Chinese",
|
|
|
18 |
"scores": [
|
19 |
{
|
20 |
"model": "anthropic/claude-3.5-sonnet",
|
21 |
+
"bleu": 0.28859709196576455
|
22 |
}
|
23 |
],
|
24 |
+
"bleu": 0.28859709196576455
|
25 |
},
|
26 |
{
|
27 |
"language_name": "Spanish",
|
|
|
30 |
"scores": [
|
31 |
{
|
32 |
"model": "anthropic/claude-3.5-sonnet",
|
33 |
+
"bleu": 0.31529908667129014
|
34 |
}
|
35 |
],
|
36 |
+
"bleu": 0.31529908667129014
|
37 |
},
|
38 |
{
|
39 |
"language_name": "Hindi",
|
|
|
42 |
"scores": [
|
43 |
{
|
44 |
"model": "anthropic/claude-3.5-sonnet",
|
45 |
+
"bleu": 0.3063790221508274
|
46 |
}
|
47 |
],
|
48 |
+
"bleu": 0.3063790221508274
|
49 |
},
|
50 |
{
|
51 |
"language_name": "Bengali",
|
|
|
54 |
"scores": [
|
55 |
{
|
56 |
"model": "anthropic/claude-3.5-sonnet",
|
57 |
+
"bleu": 0.2932161392776923
|
58 |
}
|
59 |
],
|
60 |
+
"bleu": 0.2932161392776923
|
61 |
},
|
62 |
{
|
63 |
"language_name": "Portuguese",
|
|
|
78 |
"scores": [
|
79 |
{
|
80 |
"model": "anthropic/claude-3.5-sonnet",
|
81 |
+
"bleu": 0.4225557942193369
|
82 |
}
|
83 |
],
|
84 |
+
"bleu": 0.4225557942193369
|
85 |
},
|
86 |
{
|
87 |
"language_name": "Indonesian",
|
|
|
114 |
"scores": [
|
115 |
{
|
116 |
"model": "anthropic/claude-3.5-sonnet",
|
117 |
+
"bleu": 0.3224776412158195
|
118 |
}
|
119 |
],
|
120 |
+
"bleu": 0.3224776412158195
|
121 |
},
|
122 |
{
|
123 |
"language_name": "Eastern Punjabi",
|
|
|
126 |
"scores": [
|
127 |
{
|
128 |
"model": "anthropic/claude-3.5-sonnet",
|
129 |
+
"bleu": 0.3441710075977166
|
130 |
}
|
131 |
],
|
132 |
+
"bleu": 0.3441710075977166
|
133 |
},
|
134 |
{
|
135 |
"language_name": "Standard German",
|
|
|
212 |
"language_code": "mar",
|
213 |
"speakers": 83100000.0,
|
214 |
"scores": [
|
|
|
|
|
|
|
|
|
215 |
{
|
216 |
"model": "anthropic/claude-3.5-sonnet",
|
217 |
"bleu": 0.2333407635240652
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
}
|
219 |
],
|
220 |
+
"bleu": 0.2333407635240652
|
221 |
},
|
222 |
{
|
223 |
"language_name": "Swahili",
|
|
|
238 |
"scores": [
|
239 |
{
|
240 |
"model": "anthropic/claude-3.5-sonnet",
|
241 |
+
"bleu": 0.31845792720586547
|
242 |
}
|
243 |
],
|
244 |
+
"bleu": 0.31845792720586547
|
245 |
},
|
246 |
{
|
247 |
"language_name": "Telugu",
|
|
|
269 |
"scores": [
|
270 |
{
|
271 |
"model": "anthropic/claude-3.5-sonnet",
|
272 |
+
"bleu": 0.28861844740070713
|
273 |
}
|
274 |
],
|
275 |
+
"bleu": 0.28861844740070713
|
276 |
},
|
277 |
{
|
278 |
"language_name": "Vietnamese",
|
|
|
341 |
"scores": [
|
342 |
{
|
343 |
"model": "anthropic/claude-3.5-sonnet",
|
344 |
+
"bleu": 0.27067019149599314
|
345 |
}
|
346 |
],
|
347 |
+
"bleu": 0.27067019149599314
|
348 |
},
|
349 |
{
|
350 |
"language_name": "Bhojpuri",
|
|
|
398 |
"language_code": "apc",
|
399 |
"speakers": 44000000.0,
|
400 |
"scores": [
|
|
|
|
|
|
|
|
|
401 |
{
|
402 |
"model": "anthropic/claude-3.5-sonnet",
|
403 |
"bleu": 0.21013619903144296
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
}
|
405 |
],
|
406 |
+
"bleu": 0.21013619903144296
|
407 |
},
|
408 |
{
|
409 |
"language_name": "Hausa",
|
|
|
483 |
"scores": [
|
484 |
{
|
485 |
"model": "anthropic/claude-3.5-sonnet",
|
486 |
+
"bleu": 0.4055817497511186
|
487 |
}
|
488 |
],
|
489 |
+
"bleu": 0.4055817497511186
|
490 |
},
|
491 |
{
|
492 |
"language_name": "Xiang Chinese",
|
|
|
621 |
"scores": [
|
622 |
{
|
623 |
"model": "anthropic/claude-3.5-sonnet",
|
624 |
+
"bleu": 0.17452025039334695
|
625 |
}
|
626 |
],
|
627 |
+
"bleu": 0.17452025039334695
|
628 |
},
|
629 |
{
|
630 |
"language_name": "Northern Uzbek",
|
|
|
667 |
"language_code": "apc",
|
668 |
"speakers": 24600000.0,
|
669 |
"scores": [
|
|
|
|
|
|
|
|
|
670 |
{
|
671 |
"model": "anthropic/claude-3.5-sonnet",
|
672 |
"bleu": 0.21013619903144296
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
673 |
}
|
674 |
],
|
675 |
+
"bleu": 0.21013619903144296
|
676 |
},
|
677 |
{
|
678 |
"language_name": "Romanian",
|
|
|
869 |
"language_code": "zul",
|
870 |
"speakers": 15700000.0,
|
871 |
"scores": [
|
|
|
|
|
|
|
|
|
872 |
{
|
873 |
"model": "anthropic/claude-3.5-sonnet",
|
874 |
"bleu": 0.20245371733247658
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
875 |
}
|
876 |
],
|
877 |
+
"bleu": 0.20245371733247658
|
878 |
},
|
879 |
{
|
880 |
"language_name": "Mesopotamian Arabic",
|
|
|
1040 |
"language_code": "kaz",
|
1041 |
"speakers": 13161980.0,
|
1042 |
"scores": [
|
|
|
|
|
|
|
|
|
1043 |
{
|
1044 |
"model": "anthropic/claude-3.5-sonnet",
|
1045 |
"bleu": 0.25054345947985385
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1046 |
}
|
1047 |
],
|
1048 |
+
"bleu": 0.25054345947985385
|
1049 |
},
|
1050 |
{
|
1051 |
"language_name": "Chittagonian",
|
|
|
1059 |
"language_code": "tso",
|
1060 |
"speakers": 13000000.0,
|
1061 |
"scores": [
|
|
|
|
|
|
|
|
|
1062 |
{
|
1063 |
"model": "anthropic/claude-3.5-sonnet",
|
1064 |
"bleu": 0.1952038863089787
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1065 |
}
|
1066 |
],
|
1067 |
+
"bleu": 0.1952038863089787
|
1068 |
},
|
1069 |
{
|
1070 |
"language_name": "Hungarian",
|
|
|
1083 |
"language_code": "kin",
|
1084 |
"speakers": 12100000.0,
|
1085 |
"scores": [
|
|
|
|
|
|
|
|
|
1086 |
{
|
1087 |
"model": "anthropic/claude-3.5-sonnet",
|
1088 |
"bleu": 0.21847668603031067
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1089 |
}
|
1090 |
],
|
1091 |
+
"bleu": 0.21847668603031067
|
1092 |
},
|
1093 |
{
|
1094 |
"language_name": "Chichewa",
|
1095 |
"language_code": "nya",
|
1096 |
"speakers": 12000000.0,
|
1097 |
"scores": [
|
|
|
|
|
|
|
|
|
1098 |
{
|
1099 |
"model": "anthropic/claude-3.5-sonnet",
|
1100 |
"bleu": 0.17401113784791736
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1101 |
}
|
1102 |
],
|
1103 |
+
"bleu": 0.17401113784791736
|
1104 |
},
|
1105 |
{
|
1106 |
"language_name": "Tunisian Arabic",
|
|
|
1266 |
"language_code": "hat",
|
1267 |
"speakers": 9600000.0,
|
1268 |
"scores": [
|
|
|
|
|
|
|
|
|
1269 |
{
|
1270 |
"model": "anthropic/claude-3.5-sonnet",
|
1271 |
+
"bleu": 0.2780257097562799
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1272 |
}
|
1273 |
],
|
1274 |
+
"bleu": 0.2780257097562799
|
1275 |
},
|
1276 |
{
|
1277 |
"language_name": "Dari",
|
|
|
1314 |
"language_code": "azj",
|
1315 |
"speakers": 9220610.0,
|
1316 |
"scores": [
|
|
|
|
|
|
|
|
|
1317 |
{
|
1318 |
"model": "anthropic/claude-3.5-sonnet",
|
1319 |
"bleu": 0.24029548337141315
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1320 |
}
|
1321 |
],
|
1322 |
+
"bleu": 0.24029548337141315
|
1323 |
},
|
1324 |
{
|
1325 |
"language_name": "Congo Swahili",
|
|
|
1537 |
"language_code": "kas",
|
1538 |
"speakers": 6900000.0,
|
1539 |
"scores": [
|
|
|
|
|
|
|
|
|
1540 |
{
|
1541 |
"model": "anthropic/claude-3.5-sonnet",
|
1542 |
"bleu": 0.06589195125918151
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1543 |
}
|
1544 |
],
|
1545 |
+
"bleu": 0.06589195125918151
|
1546 |
},
|
1547 |
{
|
1548 |
"language_name": "Armenian",
|
|
|
1563 |
"scores": [
|
1564 |
{
|
1565 |
"model": "openai/gpt-4o",
|
1566 |
+
"bleu": 0.0315377397347853
|
1567 |
},
|
1568 |
{
|
1569 |
"model": "anthropic/claude-3.5-sonnet",
|
1570 |
+
"bleu": 0.09333641225693347
|
1571 |
},
|
1572 |
{
|
1573 |
"model": "meta-llama/llama-3.1-405b-instruct",
|
1574 |
+
"bleu": 0.030121023774013433
|
1575 |
},
|
1576 |
{
|
1577 |
"model": "mistralai/mistral-large",
|
1578 |
+
"bleu": 0.04230836190600749
|
1579 |
},
|
1580 |
{
|
1581 |
"model": "qwen/qwen-2.5-72b-instruct",
|
1582 |
+
"bleu": 0.027727350391206936
|
1583 |
}
|
1584 |
],
|
1585 |
+
"bleu": 0.04500617761258932
|
1586 |
},
|
1587 |
{
|
1588 |
"language_name": "Central Pashto",
|
|
|
1957 |
"language_code": "lit",
|
1958 |
"speakers": 4000000.0,
|
1959 |
"scores": [
|
|
|
|
|
|
|
|
|
1960 |
{
|
1961 |
"model": "anthropic/claude-3.5-sonnet",
|
1962 |
"bleu": 0.30370845804188434
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1963 |
}
|
1964 |
],
|
1965 |
+
"bleu": 0.30370845804188434
|
1966 |
},
|
1967 |
{
|
1968 |
"language_name": "Tok Pisin",
|
|
|
2100 |
"language_code": "ace",
|
2101 |
"speakers": 3500032.0,
|
2102 |
"scores": [
|
|
|
|
|
|
|
|
|
2103 |
{
|
2104 |
"model": "anthropic/claude-3.5-sonnet",
|
2105 |
"bleu": 0.06711853873605253
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2106 |
}
|
2107 |
],
|
2108 |
+
"bleu": 0.06711853873605253
|
2109 |
},
|
2110 |
{
|
2111 |
"language_name": "Banjar",
|
|
|
2197 |
"language_code": "ewe",
|
2198 |
"speakers": 3000000.0,
|
2199 |
"scores": [
|
|
|
|
|
|
|
|
|
2200 |
{
|
2201 |
"model": "anthropic/claude-3.5-sonnet",
|
2202 |
"bleu": 0.08106169448483001
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2203 |
}
|
2204 |
],
|
2205 |
+
"bleu": 0.08106169448483001
|
2206 |
},
|
2207 |
{
|
2208 |
"language_name": "Tosk Albanian",
|
|
|
2934 |
"language_code": "srd",
|
2935 |
"speakers": 1300000.0,
|
2936 |
"scores": [
|
|
|
|
|
|
|
|
|
2937 |
{
|
2938 |
"model": "anthropic/claude-3.5-sonnet",
|
2939 |
"bleu": 0.01817225070836904
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2940 |
}
|
2941 |
],
|
2942 |
+
"bleu": 0.01817225070836904
|
2943 |
},
|
2944 |
{
|
2945 |
"language_name": "Emilian",
|
|
|
3045 |
"language_code": "ekk",
|
3046 |
"speakers": 1164770.0,
|
3047 |
"scores": [
|
|
|
|
|
|
|
|
|
3048 |
{
|
3049 |
"model": "anthropic/claude-3.5-sonnet",
|
3050 |
"bleu": 0.3107627601397992
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3051 |
}
|
3052 |
],
|
3053 |
+
"bleu": 0.3107627601397992
|
3054 |
},
|
3055 |
{
|
3056 |
"language_name": "Muong",
|
|
|
3292 |
"language_code": "quy",
|
3293 |
"speakers": 918200.0,
|
3294 |
"scores": [
|
|
|
|
|
|
|
|
|
3295 |
{
|
3296 |
"model": "anthropic/claude-3.5-sonnet",
|
3297 |
"bleu": 0.07871425240461387
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3298 |
}
|
3299 |
],
|
3300 |
+
"bleu": 0.07871425240461387
|
3301 |
},
|
3302 |
{
|
3303 |
"language_name": "Nuer",
|
|
|
3873 |
"language_code": "szl",
|
3874 |
"speakers": 522000.0,
|
3875 |
"scores": [
|
|
|
|
|
|
|
|
|
3876 |
{
|
3877 |
"model": "anthropic/claude-3.5-sonnet",
|
3878 |
"bleu": 0.17422519619712065
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3879 |
}
|
3880 |
],
|
3881 |
+
"bleu": 0.17422519619712065
|
3882 |
},
|
3883 |
{
|
3884 |
"language_name": "Flaaitaal",
|
|
|
4120 |
"language_code": "smo",
|
4121 |
"speakers": 415720.0,
|
4122 |
"scores": [
|
|
|
|
|
|
|
|
|
4123 |
{
|
4124 |
"model": "anthropic/claude-3.5-sonnet",
|
4125 |
"bleu": 0.2024917287660747
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4126 |
}
|
4127 |
],
|
4128 |
+
"bleu": 0.2024917287660747
|
4129 |
},
|
4130 |
{
|
4131 |
"language_name": "Sranan Tongo",
|
|
|
4378 |
"language_code": "pap",
|
4379 |
"speakers": 321300.0,
|
4380 |
"scores": [
|
|
|
|
|
|
|
|
|
4381 |
{
|
4382 |
"model": "anthropic/claude-3.5-sonnet",
|
4383 |
"bleu": 0.28092666579128994
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4384 |
}
|
4385 |
],
|
4386 |
+
"bleu": 0.28092666579128994
|
4387 |
},
|
4388 |
{
|
4389 |
"language_name": "Kiembu",
|
|
|
4876 |
"language_code": "ltg",
|
4877 |
"speakers": 200000.0,
|
4878 |
"scores": [
|
|
|
|
|
|
|
|
|
4879 |
{
|
4880 |
"model": "anthropic/claude-3.5-sonnet",
|
4881 |
"bleu": 0.16898752975227693
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4882 |
}
|
4883 |
],
|
4884 |
+
"bleu": 0.16898752975227693
|
4885 |
},
|
4886 |
{
|
4887 |
"language_name": "Dongxiang",
|
|
|
13567 |
"language_code": "arb",
|
13568 |
"speakers": 0.0,
|
13569 |
"scores": [
|
|
|
|
|
|
|
|
|
13570 |
{
|
13571 |
"model": "anthropic/claude-3.5-sonnet",
|
13572 |
"bleu": 0.1160671201312185
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13573 |
}
|
13574 |
],
|
13575 |
+
"bleu": 0.1160671201312185
|
13576 |
},
|
13577 |
{
|
13578 |
"language_name": "ocm",
|
|
|
21945 |
"language_code": "dik",
|
21946 |
"speakers": 0,
|
21947 |
"scores": [
|
|
|
|
|
|
|
|
|
21948 |
{
|
21949 |
"model": "anthropic/claude-3.5-sonnet",
|
21950 |
"bleu": 0.04004591158378547
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21951 |
}
|
21952 |
],
|
21953 |
+
"bleu": 0.04004591158378547
|
21954 |
},
|
21955 |
{
|
21956 |
"language_name": "Dilling",
|