David Pomerenke commited on
Commit
86b8b3a
·
1 Parent(s): 49ffc1d

Add bert_score evaluation

Browse files
Files changed (6) hide show
  1. index.html +13 -11
  2. languagebench.py +26 -16
  3. pyproject.toml +1 -0
  4. results.json +514 -274
  5. results_summary.json +400 -195
  6. uv.lock +0 -0
index.html CHANGED
@@ -35,12 +35,14 @@
35
  import * as Plot from "https://cdn.jsdelivr.net/npm/@observablehq/plot@0.6/+esm";
36
 
37
  async function init() {
 
 
38
  const chartsDiv = document.getElementById('charts');
39
 
40
  const summary = await fetch('results_summary.json');
41
  const summaryData = await summary.json();
42
  // Format captions
43
- const formatTitle = d => d.target_language_name + "\n" + parseInt(d.speakers / 1_000_00) / 10 + "M speakers\nBLEU score: " + d.bleu.toFixed(1)
44
 
45
  // Create summary plot
46
  const summaryPlot = Plot.plot({
@@ -48,29 +50,29 @@
48
  height: 400,
49
  marginBottom: 100,
50
  x: { label: "Number of speakers", axis: null },
51
- y: { label: "BLEU Score (average across models)" },
52
  // color: { scheme: "BrBG" },
53
  marks: [
54
  Plot.rectY(summaryData, Plot.stackX({
55
  x: "speakers",
56
- order: "bleu",
57
  reverse: true,
58
- y2: "bleu", // y2 to avoid stacking by y
59
  title: formatTitle,
60
  tip: true,
61
  // fill: d => -d.bleu
62
  })),
63
  Plot.rectY(summaryData, Plot.pointerX(Plot.stackX({
64
  x: "speakers",
65
- order: "bleu",
66
  reverse: true,
67
- y2: "bleu", // y2 to avoid stacking by y
68
  fill: "grey",
69
  }))),
70
  Plot.text(summaryData, Plot.stackX({
71
  x: "speakers",
72
- y2: "bleu",
73
- order: "bleu",
74
  reverse: true,
75
  text: "target_language_name",
76
  frameAnchor: "bottom",
@@ -135,13 +137,13 @@
135
  height: 200,
136
  margin: 30,
137
  y: {
138
- domain: [0, 100],
139
- label: "BLEU"
140
  },
141
  marks: [
142
  Plot.barY(languageData, {
143
  x: d => descriptor(d.model),
144
- y: "bleu"
145
  })
146
  ]
147
  });
 
35
  import * as Plot from "https://cdn.jsdelivr.net/npm/@observablehq/plot@0.6/+esm";
36
 
37
  async function init() {
38
+ const scoreKey = "bleu"
39
+ const scoreName = "BLEU Score"
40
  const chartsDiv = document.getElementById('charts');
41
 
42
  const summary = await fetch('results_summary.json');
43
  const summaryData = await summary.json();
44
  // Format captions
45
+ const formatTitle = d => (d.target_language_name + "\n" + parseInt(d.speakers / 1_000_00) / 10 + "M speakers\n" + scoreName + ": " + d[scoreKey].toFixed(1))
46
 
47
  // Create summary plot
48
  const summaryPlot = Plot.plot({
 
50
  height: 400,
51
  marginBottom: 100,
52
  x: { label: "Number of speakers", axis: null },
53
+ y: { label: `${scoreName} (average across models)` },
54
  // color: { scheme: "BrBG" },
55
  marks: [
56
  Plot.rectY(summaryData, Plot.stackX({
57
  x: "speakers",
58
+ order: scoreKey,
59
  reverse: true,
60
+ y2: scoreKey, // y2 to avoid stacking by y
61
  title: formatTitle,
62
  tip: true,
63
  // fill: d => -d.bleu
64
  })),
65
  Plot.rectY(summaryData, Plot.pointerX(Plot.stackX({
66
  x: "speakers",
67
+ order: scoreKey,
68
  reverse: true,
69
+ y2: scoreKey, // y2 to avoid stacking by y
70
  fill: "grey",
71
  }))),
72
  Plot.text(summaryData, Plot.stackX({
73
  x: "speakers",
74
+ y2: scoreKey,
75
+ order: scoreKey,
76
  reverse: true,
77
  text: "target_language_name",
78
  frameAnchor: "bottom",
 
137
  height: 200,
138
  margin: 30,
139
  y: {
140
+ domain: [0, 1],
141
+ label: scoreName
142
  },
143
  marks: [
144
  Plot.barY(languageData, {
145
  x: d => descriptor(d.model),
146
+ y: scoreKey
147
  })
148
  ]
149
  });
languagebench.py CHANGED
@@ -37,9 +37,23 @@ client = AsyncOpenAI(
37
  )
38
  cache = Memory(location=".cache", verbose=0).cache
39
  bleu = evaluate.load("sacrebleu")
 
40
  rate_limit = AsyncLimiter(max_rate=15, time_period=1)
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def check_rate_limit():
44
  print(
45
  requests.get(
@@ -64,18 +78,6 @@ async def complete(**kwargs):
64
  return response
65
 
66
 
67
- def reorder(language_name):
68
- if "," in language_name and "(" not in language_name:
69
- return language_name.split(",")[1] + " " + language_name.split(",")[0]
70
- return language_name
71
-
72
-
73
- language_names = pd.read_csv("LanguageCodes.tab", sep="\t")
74
- language_names["Name"] = language_names["Name"].apply(reorder)
75
- language_stats = pd.read_csv("languages.tsv", sep="\t")
76
- script_names = pd.read_csv("ScriptCodes.csv")
77
-
78
-
79
  @cache
80
  async def translate(model, target_language, target_script, sentence):
81
  reply = await complete(
@@ -112,13 +114,16 @@ def get_language_stats(language_code):
112
  return stats
113
 
114
 
 
 
 
 
 
115
  async def main():
116
  n = 30
117
  results = []
118
  original_sentences = open(f"{dataset}/dev.{original_language}").readlines()
119
  for target_language in target_languages:
120
- if target_language == original_language:
121
- continue
122
  target_sentences = open(f"{dataset}/dev.{target_language}").readlines()
123
  for model in models:
124
  if model != fast_model and target_language not in detailed_target_languages:
@@ -135,7 +140,11 @@ async def main():
135
  references=target_sentences[:n],
136
  tokenize="char",
137
  )
138
-
 
 
 
 
139
  results.append(
140
  {
141
  "model": model,
@@ -144,12 +153,13 @@ async def main():
144
  "target_language_name": stats["name"],
145
  "speakers": int(stats.get("maxSpeakers", 0)),
146
  "bleu": metrics["score"],
 
147
  }
148
  )
149
  with open("results.json", "w") as f:
150
  json.dump(results, f, indent=2, ensure_ascii=False)
151
  pd.DataFrame(results).groupby("target_language_name").agg(
152
- {"bleu": "mean", "speakers": "mean"}
153
  ).reset_index().to_json("results_summary.json", indent=2, orient="records")
154
 
155
 
 
37
  )
38
  cache = Memory(location=".cache", verbose=0).cache
39
  bleu = evaluate.load("sacrebleu")
40
+ bertscore = evaluate.load("bertscore")
41
  rate_limit = AsyncLimiter(max_rate=15, time_period=1)
42
 
43
 
44
+ def reorder(language_name):
45
+ if "," in language_name and "(" not in language_name:
46
+ return language_name.split(",")[1] + " " + language_name.split(",")[0]
47
+ return language_name
48
+
49
+
50
+ language_names = pd.read_csv("LanguageCodes.tab", sep="\t")
51
+ language_names["Name"] = language_names["Name"].apply(reorder).str.strip()
52
+ language_stats = pd.read_csv("languages.tsv", sep="\t")
53
+ script_names = pd.read_csv("ScriptCodes.csv")
54
+
55
+
56
+ # utils
57
  def check_rate_limit():
58
  print(
59
  requests.get(
 
78
  return response
79
 
80
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  @cache
82
  async def translate(model, target_language, target_script, sentence):
83
  reply = await complete(
 
114
  return stats
115
 
116
 
117
+ def mean(l):
118
+ return sum(l) / len(l)
119
+
120
+
121
+ # evaluation!
122
  async def main():
123
  n = 30
124
  results = []
125
  original_sentences = open(f"{dataset}/dev.{original_language}").readlines()
126
  for target_language in target_languages:
 
 
127
  target_sentences = open(f"{dataset}/dev.{target_language}").readlines()
128
  for model in models:
129
  if model != fast_model and target_language not in detailed_target_languages:
 
140
  references=target_sentences[:n],
141
  tokenize="char",
142
  )
143
+ bert_metrics = bertscore.compute(
144
+ predictions=predictions,
145
+ references=target_sentences[:n],
146
+ model_type="distilbert-base-uncased",
147
+ )
148
  results.append(
149
  {
150
  "model": model,
 
153
  "target_language_name": stats["name"],
154
  "speakers": int(stats.get("maxSpeakers", 0)),
155
  "bleu": metrics["score"],
156
+ "bert_score": mean(bert_metrics["f1"]),
157
  }
158
  )
159
  with open("results.json", "w") as f:
160
  json.dump(results, f, indent=2, ensure_ascii=False)
161
  pd.DataFrame(results).groupby("target_language_name").agg(
162
+ {"bleu": "mean", "bert_score": "mean", "speakers": "mean"}
163
  ).reset_index().to_json("results_summary.json", indent=2, orient="records")
164
 
165
 
pyproject.toml CHANGED
@@ -6,6 +6,7 @@ readme = "README.md"
6
  requires-python = ">=3.10"
7
  dependencies = [
8
  "aiolimiter>=1.1.0",
 
9
  "evaluate>=0.4.3",
10
  "joblib>=1.4.2",
11
  "openai>=1.52.2",
 
6
  requires-python = ">=3.10"
7
  dependencies = [
8
  "aiolimiter>=1.1.0",
9
+ "bert-score>=0.3.13",
10
  "evaluate>=0.4.3",
11
  "joblib>=1.4.2",
12
  "openai>=1.52.2",
results.json CHANGED
@@ -5,15 +5,17 @@
5
  "target_language": "tur_Latn",
6
  "target_language_name": "Turkish",
7
  "speakers": 82231620,
8
- "bleu": 67.16006256755001
 
9
  },
10
  {
11
  "model": "anthropic/claude-3.5-sonnet",
12
  "original_language": "eng_Latn",
13
  "target_language": "ary_Arab",
14
- "target_language_name": " Moroccan Arabic",
15
  "speakers": 27500000,
16
- "bleu": 48.56078739780091
 
17
  },
18
  {
19
  "model": "anthropic/claude-3.5-sonnet",
@@ -21,7 +23,8 @@
21
  "target_language": "fij_Latn",
22
  "target_language_name": "Fijian",
23
  "speakers": 341270,
24
- "bleu": 58.28926672457303
 
25
  },
26
  {
27
  "model": "anthropic/claude-3.5-sonnet",
@@ -29,7 +32,8 @@
29
  "target_language": "lug_Latn",
30
  "target_language_name": "Ganda",
31
  "speakers": 4100000,
32
- "bleu": 45.86933229358203
 
33
  },
34
  {
35
  "model": "anthropic/claude-3.5-sonnet",
@@ -37,7 +41,8 @@
37
  "target_language": "kin_Latn",
38
  "target_language_name": "Kinyarwanda",
39
  "speakers": 12100000,
40
- "bleu": 57.241062675597036
 
41
  },
42
  {
43
  "model": "openai/gpt-4o-mini",
@@ -45,7 +50,8 @@
45
  "target_language": "ind_Latn",
46
  "target_language_name": "Indonesian",
47
  "speakers": 198996550,
48
- "bleu": 76.69607051201163
 
49
  },
50
  {
51
  "model": "anthropic/claude-3.5-sonnet",
@@ -53,7 +59,8 @@
53
  "target_language": "ind_Latn",
54
  "target_language_name": "Indonesian",
55
  "speakers": 198996550,
56
- "bleu": 75.20007995551391
 
57
  },
58
  {
59
  "model": "meta-llama/llama-3.1-70b-instruct",
@@ -61,7 +68,8 @@
61
  "target_language": "ind_Latn",
62
  "target_language_name": "Indonesian",
63
  "speakers": 198996550,
64
- "bleu": 74.1959714053824
 
65
  },
66
  {
67
  "model": "mistralai/mistral-nemo",
@@ -69,7 +77,8 @@
69
  "target_language": "ind_Latn",
70
  "target_language_name": "Indonesian",
71
  "speakers": 198996550,
72
- "bleu": 65.59558256613556
 
73
  },
74
  {
75
  "model": "qwen/qwen-2.5-72b-instruct",
@@ -77,7 +86,8 @@
77
  "target_language": "ind_Latn",
78
  "target_language_name": "Indonesian",
79
  "speakers": 198996550,
80
- "bleu": 72.85582869172275
 
81
  },
82
  {
83
  "model": "anthropic/claude-3.5-sonnet",
@@ -85,7 +95,8 @@
85
  "target_language": "nus_Latn",
86
  "target_language_name": "Nuer",
87
  "speakers": 900000,
88
- "bleu": 16.57969879511241
 
89
  },
90
  {
91
  "model": "anthropic/claude-3.5-sonnet",
@@ -93,23 +104,26 @@
93
  "target_language": "szl_Latn",
94
  "target_language_name": "Silesian",
95
  "speakers": 522000,
96
- "bleu": 56.78363920686616
 
97
  },
98
  {
99
  "model": "anthropic/claude-3.5-sonnet",
100
  "original_language": "eng_Latn",
101
  "target_language": "azj_Latn",
102
- "target_language_name": " North Azerbaijani",
103
  "speakers": 9220610,
104
- "bleu": 55.00459613512087
 
105
  },
106
  {
107
  "model": "anthropic/claude-3.5-sonnet",
108
  "original_language": "eng_Latn",
109
  "target_language": "dik_Latn",
110
- "target_language_name": " Southwestern Dinka",
111
  "speakers": 0,
112
- "bleu": 12.497832051373198
 
113
  },
114
  {
115
  "model": "anthropic/claude-3.5-sonnet",
@@ -117,7 +131,8 @@
117
  "target_language": "smo_Latn",
118
  "target_language_name": "Samoan",
119
  "speakers": 415720,
120
- "bleu": 56.71388314225869
 
121
  },
122
  {
123
  "model": "anthropic/claude-3.5-sonnet",
@@ -125,7 +140,8 @@
125
  "target_language": "heb_Hebr",
126
  "target_language_name": "Hebrew",
127
  "speakers": 9303950,
128
- "bleu": 72.0702990513479
 
129
  },
130
  {
131
  "model": "anthropic/claude-3.5-sonnet",
@@ -133,7 +149,8 @@
133
  "target_language": "lao_Laoo",
134
  "target_language_name": "Lao",
135
  "speakers": 5225552,
136
- "bleu": 60.02109096770294
 
137
  },
138
  {
139
  "model": "anthropic/claude-3.5-sonnet",
@@ -141,7 +158,8 @@
141
  "target_language": "bul_Cyrl",
142
  "target_language_name": "Bulgarian",
143
  "speakers": 9000000,
144
- "bleu": 72.9695925130979
 
145
  },
146
  {
147
  "model": "anthropic/claude-3.5-sonnet",
@@ -149,7 +167,8 @@
149
  "target_language": "dgo_Deva",
150
  "target_language_name": "Dogri",
151
  "speakers": 2000000,
152
- "bleu": 44.91535352779186
 
153
  },
154
  {
155
  "model": "anthropic/claude-3.5-sonnet",
@@ -157,15 +176,17 @@
157
  "target_language": "epo_Latn",
158
  "target_language_name": "Esperanto",
159
  "speakers": 2000000,
160
- "bleu": 69.60565775541012
 
161
  },
162
  {
163
  "model": "anthropic/claude-3.5-sonnet",
164
  "original_language": "eng_Latn",
165
  "target_language": "azb_Arab",
166
- "target_language_name": " South Azerbaijani",
167
  "speakers": 15000000,
168
- "bleu": 43.53622667084785
 
169
  },
170
  {
171
  "model": "anthropic/claude-3.5-sonnet",
@@ -173,7 +194,8 @@
173
  "target_language": "pap_Latn",
174
  "target_language_name": "Papiamentu",
175
  "speakers": 321300,
176
- "bleu": 69.79553281331113
 
177
  },
178
  {
179
  "model": "anthropic/claude-3.5-sonnet",
@@ -181,7 +203,8 @@
181
  "target_language": "ces_Latn",
182
  "target_language_name": "Czech",
183
  "speakers": 10700000,
184
- "bleu": 69.71122905993063
 
185
  },
186
  {
187
  "model": "anthropic/claude-3.5-sonnet",
@@ -189,7 +212,8 @@
189
  "target_language": "gle_Latn",
190
  "target_language_name": "Irish",
191
  "speakers": 1030000,
192
- "bleu": 69.97251945242859
 
193
  },
194
  {
195
  "model": "anthropic/claude-3.5-sonnet",
@@ -197,7 +221,8 @@
197
  "target_language": "cym_Latn",
198
  "target_language_name": "Welsh",
199
  "speakers": 977366,
200
- "bleu": 83.34377244735965
 
201
  },
202
  {
203
  "model": "anthropic/claude-3.5-sonnet",
@@ -205,7 +230,8 @@
205
  "target_language": "war_Latn",
206
  "target_language_name": "Waray-Waray",
207
  "speakers": 3100000,
208
- "bleu": 66.38502312428538
 
209
  },
210
  {
211
  "model": "anthropic/claude-3.5-sonnet",
@@ -213,7 +239,8 @@
213
  "target_language": "tuk_Latn",
214
  "target_language_name": "Turkmen",
215
  "speakers": 16000000,
216
- "bleu": 60.559370593640274
 
217
  },
218
  {
219
  "model": "anthropic/claude-3.5-sonnet",
@@ -221,7 +248,8 @@
221
  "target_language": "kea_Latn",
222
  "target_language_name": "Kabuverdianu",
223
  "speakers": 871000,
224
- "bleu": 65.11060103907447
 
225
  },
226
  {
227
  "model": "anthropic/claude-3.5-sonnet",
@@ -229,7 +257,8 @@
229
  "target_language": "swe_Latn",
230
  "target_language_name": "Swedish",
231
  "speakers": 9244250,
232
- "bleu": 77.42161024703672
 
233
  },
234
  {
235
  "model": "anthropic/claude-3.5-sonnet",
@@ -237,7 +266,8 @@
237
  "target_language": "mni_Mtei",
238
  "target_language_name": "Meitei",
239
  "speakers": 1470000,
240
- "bleu": 41.90750872077243
 
241
  },
242
  {
243
  "model": "anthropic/claude-3.5-sonnet",
@@ -245,15 +275,17 @@
245
  "target_language": "kan_Knda",
246
  "target_language_name": "Kannada",
247
  "speakers": 43600000,
248
- "bleu": 60.01420283321725
 
249
  },
250
  {
251
  "model": "anthropic/claude-3.5-sonnet",
252
  "original_language": "eng_Latn",
253
  "target_language": "plt_Latn",
254
- "target_language_name": " Merina Malagasy",
255
  "speakers": 0,
256
- "bleu": 60.430438804745
 
257
  },
258
  {
259
  "model": "anthropic/claude-3.5-sonnet",
@@ -261,7 +293,8 @@
261
  "target_language": "ewe_Latn",
262
  "target_language_name": "Éwé",
263
  "speakers": 3000000,
264
- "bleu": 41.6614038790914
 
265
  },
266
  {
267
  "model": "anthropic/claude-3.5-sonnet",
@@ -269,7 +302,8 @@
269
  "target_language": "rus_Cyrl",
270
  "target_language_name": "Russian",
271
  "speakers": 171428900,
272
- "bleu": 71.14894410390329
 
273
  },
274
  {
275
  "model": "anthropic/claude-3.5-sonnet",
@@ -277,7 +311,8 @@
277
  "target_language": "bjn_Arab",
278
  "target_language_name": "Banjar",
279
  "speakers": 3500000,
280
- "bleu": 36.7812759423696
 
281
  },
282
  {
283
  "model": "anthropic/claude-3.5-sonnet",
@@ -285,7 +320,8 @@
285
  "target_language": "kmb_Latn",
286
  "target_language_name": "Kimbundu",
287
  "speakers": 0,
288
- "bleu": 5.85234572235619
 
289
  },
290
  {
291
  "model": "anthropic/claude-3.5-sonnet",
@@ -293,15 +329,17 @@
293
  "target_language": "vec_Latn",
294
  "target_language_name": "Venetian",
295
  "speakers": 2000000,
296
- "bleu": 60.61408762705794
 
297
  },
298
  {
299
  "model": "anthropic/claude-3.5-sonnet",
300
  "original_language": "eng_Latn",
301
  "target_language": "aeb_Arab",
302
- "target_language_name": " Tunisian Arabic",
303
  "speakers": 11600000,
304
- "bleu": 49.87359800112665
 
305
  },
306
  {
307
  "model": "anthropic/claude-3.5-sonnet",
@@ -309,7 +347,8 @@
309
  "target_language": "lit_Latn",
310
  "target_language_name": "Lithuanian",
311
  "speakers": 4000000,
312
- "bleu": 67.16256955707802
 
313
  },
314
  {
315
  "model": "anthropic/claude-3.5-sonnet",
@@ -317,7 +356,8 @@
317
  "target_language": "swh_Latn",
318
  "target_language_name": "Swahili",
319
  "speakers": 82300000,
320
- "bleu": 73.51990421418041
 
321
  },
322
  {
323
  "model": "anthropic/claude-3.5-sonnet",
@@ -325,15 +365,17 @@
325
  "target_language": "bug_Latn",
326
  "target_language_name": "Bugis",
327
  "speakers": 5017800,
328
- "bleu": 44.838817003109384
 
329
  },
330
  {
331
  "model": "anthropic/claude-3.5-sonnet",
332
  "original_language": "eng_Latn",
333
  "target_language": "apc_Arab_nort3139",
334
- "target_language_name": " Levantine Arabic",
335
  "speakers": 44000000,
336
- "bleu": 56.66525774941823
 
337
  },
338
  {
339
  "model": "anthropic/claude-3.5-sonnet",
@@ -341,7 +383,8 @@
341
  "target_language": "lus_Latn",
342
  "target_language_name": "Mizo",
343
  "speakers": 500000,
344
- "bleu": 51.65580174875804
 
345
  },
346
  {
347
  "model": "anthropic/claude-3.5-sonnet",
@@ -349,7 +392,8 @@
349
  "target_language": "lim_Latn",
350
  "target_language_name": "Limburgish",
351
  "speakers": 1600000,
352
- "bleu": 59.44855049817084
 
353
  },
354
  {
355
  "model": "anthropic/claude-3.5-sonnet",
@@ -357,7 +401,8 @@
357
  "target_language": "mri_Latn",
358
  "target_language_name": "Maori",
359
  "speakers": 160000,
360
- "bleu": 54.831993564329125
 
361
  },
362
  {
363
  "model": "anthropic/claude-3.5-sonnet",
@@ -365,7 +410,8 @@
365
  "target_language": "kam_Latn",
366
  "target_language_name": "Kamba",
367
  "speakers": 3893000,
368
- "bleu": 41.73348967095708
 
369
  },
370
  {
371
  "model": "anthropic/claude-3.5-sonnet",
@@ -373,15 +419,17 @@
373
  "target_language": "ban_Latn",
374
  "target_language_name": "Bali (Indonesia)",
375
  "speakers": 4000000,
376
- "bleu": 52.87524191594727
 
377
  },
378
  {
379
  "model": "anthropic/claude-3.5-sonnet",
380
  "original_language": "eng_Latn",
381
  "target_language": "pan_Guru",
382
- "target_language_name": " Eastern Punjabi",
383
  "speakers": 125000000,
384
- "bleu": 60.833539867728966
 
385
  },
386
  {
387
  "model": "anthropic/claude-3.5-sonnet",
@@ -389,7 +437,8 @@
389
  "target_language": "por_Latn",
390
  "target_language_name": "Portuguese",
391
  "speakers": 254300000,
392
- "bleu": 77.49780742224304
 
393
  },
394
  {
395
  "model": "anthropic/claude-3.5-sonnet",
@@ -397,7 +446,8 @@
397
  "target_language": "crh_Latn",
398
  "target_language_name": "Crimean Tatar",
399
  "speakers": 552740,
400
- "bleu": 52.705024944759934
 
401
  },
402
  {
403
  "model": "anthropic/claude-3.5-sonnet",
@@ -405,7 +455,8 @@
405
  "target_language": "srp_Cyrl",
406
  "target_language_name": "Serbian",
407
  "speakers": 9000000,
408
- "bleu": 69.96913961762156
 
409
  },
410
  {
411
  "model": "openai/gpt-4o-mini",
@@ -413,7 +464,8 @@
413
  "target_language": "kas_Deva",
414
  "target_language_name": "Kashmiri",
415
  "speakers": 6900000,
416
- "bleu": 22.94872648513265
 
417
  },
418
  {
419
  "model": "anthropic/claude-3.5-sonnet",
@@ -421,7 +473,8 @@
421
  "target_language": "kas_Deva",
422
  "target_language_name": "Kashmiri",
423
  "speakers": 6900000,
424
- "bleu": 27.525562771983658
 
425
  },
426
  {
427
  "model": "meta-llama/llama-3.1-70b-instruct",
@@ -429,7 +482,8 @@
429
  "target_language": "kas_Deva",
430
  "target_language_name": "Kashmiri",
431
  "speakers": 6900000,
432
- "bleu": 7.999053096113321
 
433
  },
434
  {
435
  "model": "mistralai/mistral-nemo",
@@ -437,7 +491,8 @@
437
  "target_language": "kas_Deva",
438
  "target_language_name": "Kashmiri",
439
  "speakers": 6900000,
440
- "bleu": 15.126083511737422
 
441
  },
442
  {
443
  "model": "qwen/qwen-2.5-72b-instruct",
@@ -445,15 +500,17 @@
445
  "target_language": "kas_Deva",
446
  "target_language_name": "Kashmiri",
447
  "speakers": 6900000,
448
- "bleu": 21.928943407791756
 
449
  },
450
  {
451
  "model": "anthropic/claude-3.5-sonnet",
452
  "original_language": "eng_Latn",
453
  "target_language": "bod_Tibt",
454
- "target_language_name": " Central Tibetan",
455
  "speakers": 1200000,
456
- "bleu": 51.76205985600136
 
457
  },
458
  {
459
  "model": "anthropic/claude-3.5-sonnet",
@@ -461,15 +518,17 @@
461
  "target_language": "slv_Latn",
462
  "target_language_name": "Slovene",
463
  "speakers": 2400000,
464
- "bleu": 72.56912707571306
 
465
  },
466
  {
467
  "model": "anthropic/claude-3.5-sonnet",
468
  "original_language": "eng_Latn",
469
  "target_language": "ars_Arab",
470
- "target_language_name": " Najdi Arabic",
471
  "speakers": 0,
472
- "bleu": 47.7692581451387
 
473
  },
474
  {
475
  "model": "anthropic/claude-3.5-sonnet",
@@ -477,7 +536,8 @@
477
  "target_language": "cat_Latn",
478
  "target_language_name": "Catalan",
479
  "speakers": 5100000,
480
- "bleu": 74.45950079317922
 
481
  },
482
  {
483
  "model": "anthropic/claude-3.5-sonnet",
@@ -485,15 +545,17 @@
485
  "target_language": "zul_Latn",
486
  "target_language_name": "Zulu",
487
  "speakers": 15700000,
488
- "bleu": 59.176207838896076
 
489
  },
490
  {
491
  "model": "anthropic/claude-3.5-sonnet",
492
  "original_language": "eng_Latn",
493
  "target_language": "pes_Arab",
494
- "target_language_name": " Iranian Persian",
495
  "speakers": 52800000,
496
- "bleu": 57.466690672020064
 
497
  },
498
  {
499
  "model": "anthropic/claude-3.5-sonnet",
@@ -501,7 +563,8 @@
501
  "target_language": "taq_Latn",
502
  "target_language_name": "Tamasheq",
503
  "speakers": 500000,
504
- "bleu": 25.866944911127725
 
505
  },
506
  {
507
  "model": "anthropic/claude-3.5-sonnet",
@@ -509,7 +572,8 @@
509
  "target_language": "snd_Deva",
510
  "target_language_name": "Sindhi",
511
  "speakers": 25000000,
512
- "bleu": 40.04493401977834
 
513
  },
514
  {
515
  "model": "anthropic/claude-3.5-sonnet",
@@ -517,7 +581,8 @@
517
  "target_language": "ssw_Latn",
518
  "target_language_name": "Swati",
519
  "speakers": 2034200,
520
- "bleu": 52.77460964391619
 
521
  },
522
  {
523
  "model": "anthropic/claude-3.5-sonnet",
@@ -525,7 +590,8 @@
525
  "target_language": "mkd_Cyrl",
526
  "target_language_name": "Macedonian",
527
  "speakers": 2000000,
528
- "bleu": 72.27334714365769
 
529
  },
530
  {
531
  "model": "openai/gpt-4o-mini",
@@ -533,7 +599,8 @@
533
  "target_language": "pol_Latn",
534
  "target_language_name": "Polish",
535
  "speakers": 40200000,
536
- "bleu": 63.79524354957938
 
537
  },
538
  {
539
  "model": "anthropic/claude-3.5-sonnet",
@@ -541,7 +608,8 @@
541
  "target_language": "pol_Latn",
542
  "target_language_name": "Polish",
543
  "speakers": 40200000,
544
- "bleu": 65.97562270518736
 
545
  },
546
  {
547
  "model": "meta-llama/llama-3.1-70b-instruct",
@@ -549,7 +617,8 @@
549
  "target_language": "pol_Latn",
550
  "target_language_name": "Polish",
551
  "speakers": 40200000,
552
- "bleu": 62.09512880944625
 
553
  },
554
  {
555
  "model": "mistralai/mistral-nemo",
@@ -557,7 +626,8 @@
557
  "target_language": "pol_Latn",
558
  "target_language_name": "Polish",
559
  "speakers": 40200000,
560
- "bleu": 56.42877796144466
 
561
  },
562
  {
563
  "model": "qwen/qwen-2.5-72b-instruct",
@@ -565,7 +635,8 @@
565
  "target_language": "pol_Latn",
566
  "target_language_name": "Polish",
567
  "speakers": 40200000,
568
- "bleu": 61.08942681151859
 
569
  },
570
  {
571
  "model": "anthropic/claude-3.5-sonnet",
@@ -573,15 +644,17 @@
573
  "target_language": "srd_Latn",
574
  "target_language_name": "Sardinian",
575
  "speakers": 1300000,
576
- "bleu": 62.69039147714039
 
577
  },
578
  {
579
  "model": "anthropic/claude-3.5-sonnet",
580
  "original_language": "eng_Latn",
581
  "target_language": "arb_Latn",
582
- "target_language_name": " Standard Arabic",
583
  "speakers": 0,
584
- "bleu": 47.790331753697075
 
585
  },
586
  {
587
  "model": "anthropic/claude-3.5-sonnet",
@@ -589,7 +662,8 @@
589
  "target_language": "twi_Latn_asan1239",
590
  "target_language_name": "Twi",
591
  "speakers": 3000000,
592
- "bleu": 44.44337481309101
 
593
  },
594
  {
595
  "model": "anthropic/claude-3.5-sonnet",
@@ -597,7 +671,8 @@
597
  "target_language": "tum_Latn",
598
  "target_language_name": "Tumbuka",
599
  "speakers": 2680000,
600
- "bleu": 44.0490017392109
 
601
  },
602
  {
603
  "model": "anthropic/claude-3.5-sonnet",
@@ -605,7 +680,8 @@
605
  "target_language": "fur_Latn",
606
  "target_language_name": "Friulian",
607
  "speakers": 300000,
608
- "bleu": 66.54880923723718
 
609
  },
610
  {
611
  "model": "anthropic/claude-3.5-sonnet",
@@ -613,7 +689,8 @@
613
  "target_language": "lua_Latn",
614
  "target_language_name": "Luba-Kasai",
615
  "speakers": 6300000,
616
- "bleu": 45.065529165477344
 
617
  },
618
  {
619
  "model": "anthropic/claude-3.5-sonnet",
@@ -621,7 +698,8 @@
621
  "target_language": "fil_Latn",
622
  "target_language_name": "Filipino",
623
  "speakers": 90000000,
624
- "bleu": 70.19284983784472
 
625
  },
626
  {
627
  "model": "anthropic/claude-3.5-sonnet",
@@ -629,7 +707,8 @@
629
  "target_language": "afr_Latn",
630
  "target_language_name": "Afrikaans",
631
  "speakers": 10300000,
632
- "bleu": 76.89005407773752
 
633
  },
634
  {
635
  "model": "anthropic/claude-3.5-sonnet",
@@ -637,7 +716,8 @@
637
  "target_language": "bos_Latn",
638
  "target_language_name": "Bosnian",
639
  "speakers": 3500000,
640
- "bleu": 72.54880271311463
 
641
  },
642
  {
643
  "model": "anthropic/claude-3.5-sonnet",
@@ -645,15 +725,17 @@
645
  "target_language": "ltg_Latn",
646
  "target_language_name": "Latgalian",
647
  "speakers": 200000,
648
- "bleu": 56.484355652391756
 
649
  },
650
  {
651
  "model": "anthropic/claude-3.5-sonnet",
652
  "original_language": "eng_Latn",
653
  "target_language": "acq_Arab",
654
- "target_language_name": " Ta’izzi-Adeni Arabic",
655
  "speakers": 10500000,
656
- "bleu": 48.67285742584471
 
657
  },
658
  {
659
  "model": "anthropic/claude-3.5-sonnet",
@@ -661,7 +743,8 @@
661
  "target_language": "mag_Deva",
662
  "target_language_name": "Magahi",
663
  "speakers": 20700000,
664
- "bleu": 58.54742215461198
 
665
  },
666
  {
667
  "model": "anthropic/claude-3.5-sonnet",
@@ -669,7 +752,8 @@
669
  "target_language": "min_Latn",
670
  "target_language_name": "Minangkabau",
671
  "speakers": 5530000,
672
- "bleu": 64.0323403919738
 
673
  },
674
  {
675
  "model": "anthropic/claude-3.5-sonnet",
@@ -677,15 +761,17 @@
677
  "target_language": "kor_Hang",
678
  "target_language_name": "Korean",
679
  "speakers": 77300000,
680
- "bleu": 43.68722859743311
 
681
  },
682
  {
683
  "model": "anthropic/claude-3.5-sonnet",
684
  "original_language": "eng_Latn",
685
  "target_language": "zsm_Latn",
686
- "target_language_name": " Standard Malay",
687
  "speakers": 0,
688
- "bleu": 75.58682556964494
 
689
  },
690
  {
691
  "model": "anthropic/claude-3.5-sonnet",
@@ -693,15 +779,17 @@
693
  "target_language": "mar_Deva",
694
  "target_language_name": "Marathi",
695
  "speakers": 83100000,
696
- "bleu": 57.44340907113495
 
697
  },
698
  {
699
  "model": "anthropic/claude-3.5-sonnet",
700
  "original_language": "eng_Latn",
701
  "target_language": "pbt_Arab",
702
- "target_language_name": " Southern Pashto",
703
  "speakers": 10900000,
704
- "bleu": 38.548696222210154
 
705
  },
706
  {
707
  "model": "anthropic/claude-3.5-sonnet",
@@ -709,15 +797,17 @@
709
  "target_language": "lij_Latn",
710
  "target_language_name": "Ligurian",
711
  "speakers": 500000,
712
- "bleu": 55.85306363017816
 
713
  },
714
  {
715
  "model": "anthropic/claude-3.5-sonnet",
716
  "original_language": "eng_Latn",
717
  "target_language": "knc_Latn",
718
- "target_language_name": " Yerwa Kanuri",
719
  "speakers": 0,
720
- "bleu": 22.241385961047616
 
721
  },
722
  {
723
  "model": "anthropic/claude-3.5-sonnet",
@@ -725,7 +815,8 @@
725
  "target_language": "chv_Cyrl",
726
  "target_language_name": "Chuvash",
727
  "speakers": 1279650,
728
- "bleu": 45.05466587233098
 
729
  },
730
  {
731
  "model": "anthropic/claude-3.5-sonnet",
@@ -733,7 +824,8 @@
733
  "target_language": "asm_Beng",
734
  "target_language_name": "Assamese",
735
  "speakers": 15300000,
736
- "bleu": 47.03513316051628
 
737
  },
738
  {
739
  "model": "anthropic/claude-3.5-sonnet",
@@ -741,7 +833,8 @@
741
  "target_language": "ace_Arab",
742
  "target_language_name": "Aceh",
743
  "speakers": 3500032,
744
- "bleu": 23.349267705271775
 
745
  },
746
  {
747
  "model": "anthropic/claude-3.5-sonnet",
@@ -749,7 +842,8 @@
749
  "target_language": "tha_Thai",
750
  "target_language_name": "Thai",
751
  "speakers": 40000000,
752
- "bleu": 62.81253609435389
 
753
  },
754
  {
755
  "model": "anthropic/claude-3.5-sonnet",
@@ -757,7 +851,8 @@
757
  "target_language": "fao_Latn",
758
  "target_language_name": "Faroese",
759
  "speakers": 69150,
760
- "bleu": 65.91479024829229
 
761
  },
762
  {
763
  "model": "anthropic/claude-3.5-sonnet",
@@ -765,7 +860,8 @@
765
  "target_language": "nqo_Nkoo",
766
  "target_language_name": "N’Ko",
767
  "speakers": 0,
768
- "bleu": 32.48349079900792
 
769
  },
770
  {
771
  "model": "anthropic/claude-3.5-sonnet",
@@ -773,7 +869,8 @@
773
  "target_language": "ilo_Latn",
774
  "target_language_name": "Ilocano",
775
  "speakers": 9100000,
776
- "bleu": 62.605886459379576
 
777
  },
778
  {
779
  "model": "anthropic/claude-3.5-sonnet",
@@ -781,15 +878,17 @@
781
  "target_language": "kat_Geor",
782
  "target_language_name": "Georgian",
783
  "speakers": 3700000,
784
- "bleu": 61.016636144241765
 
785
  },
786
  {
787
  "model": "anthropic/claude-3.5-sonnet",
788
  "original_language": "eng_Latn",
789
  "target_language": "ayr_Latn",
790
- "target_language_name": " Central Aymara",
791
  "speakers": 0,
792
- "bleu": 40.77379070593447
 
793
  },
794
  {
795
  "model": "anthropic/claude-3.5-sonnet",
@@ -797,7 +896,8 @@
797
  "target_language": "dan_Latn",
798
  "target_language_name": "Danish",
799
  "speakers": 6000000,
800
- "bleu": 78.0935433283814
 
801
  },
802
  {
803
  "model": "anthropic/claude-3.5-sonnet",
@@ -805,7 +905,8 @@
805
  "target_language": "brx_Deva",
806
  "target_language_name": "Boro (India)",
807
  "speakers": 1482929,
808
- "bleu": 36.11004749691388
 
809
  },
810
  {
811
  "model": "anthropic/claude-3.5-sonnet",
@@ -813,15 +914,17 @@
813
  "target_language": "sag_Latn",
814
  "target_language_name": "Sango",
815
  "speakers": 4600000,
816
- "bleu": 34.875422265717596
 
817
  },
818
  {
819
  "model": "anthropic/claude-3.5-sonnet",
820
  "original_language": "eng_Latn",
821
  "target_language": "lvs_Latn",
822
- "target_language_name": " Standard Latvian",
823
  "speakers": 0,
824
- "bleu": 64.89951881611144
 
825
  },
826
  {
827
  "model": "anthropic/claude-3.5-sonnet",
@@ -829,7 +932,8 @@
829
  "target_language": "jpn_Jpan",
830
  "target_language_name": "Japanese",
831
  "speakers": 128000000,
832
- "bleu": 49.91661356931259
 
833
  },
834
  {
835
  "model": "anthropic/claude-3.5-sonnet",
@@ -837,7 +941,8 @@
837
  "target_language": "uig_Arab",
838
  "target_language_name": "Uyghur",
839
  "speakers": 10400000,
840
- "bleu": 53.53468771030665
 
841
  },
842
  {
843
  "model": "anthropic/claude-3.5-sonnet",
@@ -845,7 +950,8 @@
845
  "target_language": "fra_Latn",
846
  "target_language_name": "French",
847
  "speakers": 208157220,
848
- "bleu": 79.3023871219446
 
849
  },
850
  {
851
  "model": "anthropic/claude-3.5-sonnet",
@@ -853,7 +959,8 @@
853
  "target_language": "jav_Latn",
854
  "target_language_name": "Javanese",
855
  "speakers": 84308740,
856
- "bleu": 60.44033529900538
 
857
  },
858
  {
859
  "model": "anthropic/claude-3.5-sonnet",
@@ -861,7 +968,8 @@
861
  "target_language": "sun_Latn",
862
  "target_language_name": "Sunda",
863
  "speakers": 32400000,
864
- "bleu": 56.40659991041485
 
865
  },
866
  {
867
  "model": "anthropic/claude-3.5-sonnet",
@@ -869,7 +977,8 @@
869
  "target_language": "umb_Latn",
870
  "target_language_name": "Umbundu",
871
  "speakers": 6000000,
872
- "bleu": 21.080277559665817
 
873
  },
874
  {
875
  "model": "anthropic/claude-3.5-sonnet",
@@ -877,7 +986,8 @@
877
  "target_language": "bel_Cyrl",
878
  "target_language_name": "Belarusian",
879
  "speakers": 7900000,
880
- "bleu": 54.51951664423131
 
881
  },
882
  {
883
  "model": "anthropic/claude-3.5-sonnet",
@@ -885,15 +995,17 @@
885
  "target_language": "cjk_Latn",
886
  "target_language_name": "Chokwe",
887
  "speakers": 0,
888
- "bleu": 10.186407416077753
 
889
  },
890
  {
891
  "model": "anthropic/claude-3.5-sonnet",
892
  "original_language": "eng_Latn",
893
  "target_language": "yue_Hant",
894
- "target_language_name": " Yue Chinese",
895
  "speakers": 73100000,
896
- "bleu": 34.25609770821219
 
897
  },
898
  {
899
  "model": "anthropic/claude-3.5-sonnet",
@@ -901,15 +1013,17 @@
901
  "target_language": "hat_Latn",
902
  "target_language_name": "Haitian Creole",
903
  "speakers": 9600000,
904
- "bleu": 63.85321875910916
 
905
  },
906
  {
907
  "model": "anthropic/claude-3.5-sonnet",
908
  "original_language": "eng_Latn",
909
  "target_language": "kmr_Latn",
910
- "target_language_name": " Northern Kurdish",
911
  "speakers": 14600000,
912
- "bleu": 55.00856002671919
 
913
  },
914
  {
915
  "model": "anthropic/claude-3.5-sonnet",
@@ -917,7 +1031,8 @@
917
  "target_language": "ceb_Latn",
918
  "target_language_name": "Cebuano",
919
  "speakers": 15900000,
920
- "bleu": 69.455795865483
 
921
  },
922
  {
923
  "model": "anthropic/claude-3.5-sonnet",
@@ -925,15 +1040,17 @@
925
  "target_language": "dzo_Tibt",
926
  "target_language_name": "Dzongkha",
927
  "speakers": 237080,
928
- "bleu": 44.35738140173861
 
929
  },
930
  {
931
  "model": "anthropic/claude-3.5-sonnet",
932
  "original_language": "eng_Latn",
933
  "target_language": "deu_Latn",
934
- "target_language_name": " Standard German",
935
  "speakers": 105000000,
936
- "bleu": 77.22130383692244
 
937
  },
938
  {
939
  "model": "anthropic/claude-3.5-sonnet",
@@ -941,7 +1058,8 @@
941
  "target_language": "ibo_Latn",
942
  "target_language_name": "Igbo",
943
  "speakers": 27000000,
944
- "bleu": 46.40173449341075
 
945
  },
946
  {
947
  "model": "anthropic/claude-3.5-sonnet",
@@ -949,23 +1067,26 @@
949
  "target_language": "vie_Latn",
950
  "target_language_name": "Vietnamese",
951
  "speakers": 76000000,
952
- "bleu": 70.35607494641172
 
953
  },
954
  {
955
  "model": "anthropic/claude-3.5-sonnet",
956
  "original_language": "eng_Latn",
957
  "target_language": "quy_Latn",
958
- "target_language_name": " Ayacucho Quechua",
959
  "speakers": 918200,
960
- "bleu": 46.24441292595311
 
961
  },
962
  {
963
  "model": "anthropic/claude-3.5-sonnet",
964
  "original_language": "eng_Latn",
965
  "target_language": "cmn_Hant",
966
- "target_language_name": " Mandarin Chinese",
967
  "speakers": 1074000000,
968
- "bleu": 41.46450104859355
 
969
  },
970
  {
971
  "model": "anthropic/claude-3.5-sonnet",
@@ -973,7 +1094,8 @@
973
  "target_language": "mai_Deva",
974
  "target_language_name": "Maithili",
975
  "speakers": 33900000,
976
- "bleu": 54.65300713908629
 
977
  },
978
  {
979
  "model": "anthropic/claude-3.5-sonnet",
@@ -981,7 +1103,8 @@
981
  "target_language": "gla_Latn",
982
  "target_language_name": "Scottish Gaelic",
983
  "speakers": 60130,
984
- "bleu": 62.604437133773324
 
985
  },
986
  {
987
  "model": "anthropic/claude-3.5-sonnet",
@@ -989,7 +1112,8 @@
989
  "target_language": "urd_Arab",
990
  "target_language_name": "Urdu",
991
  "speakers": 94022900,
992
- "bleu": 61.12554572717868
 
993
  },
994
  {
995
  "model": "anthropic/claude-3.5-sonnet",
@@ -997,7 +1121,8 @@
997
  "target_language": "shn_Mymr",
998
  "target_language_name": "Shan",
999
  "speakers": 3000000,
1000
- "bleu": 29.21299485766884
 
1001
  },
1002
  {
1003
  "model": "anthropic/claude-3.5-sonnet",
@@ -1005,15 +1130,17 @@
1005
  "target_language": "wol_Latn",
1006
  "target_language_name": "Wolof",
1007
  "speakers": 3700000,
1008
- "bleu": 42.64301275691043
 
1009
  },
1010
  {
1011
  "model": "anthropic/claude-3.5-sonnet",
1012
  "original_language": "eng_Latn",
1013
  "target_language": "cmn_Hans",
1014
- "target_language_name": " Mandarin Chinese",
1015
  "speakers": 1074000000,
1016
- "bleu": 43.72017219180501
 
1017
  },
1018
  {
1019
  "model": "anthropic/claude-3.5-sonnet",
@@ -1021,15 +1148,17 @@
1021
  "target_language": "guj_Gujr",
1022
  "target_language_name": "Gujarati",
1023
  "speakers": 56400000,
1024
- "bleu": 55.588451345198735
 
1025
  },
1026
  {
1027
  "model": "anthropic/claude-3.5-sonnet",
1028
  "original_language": "eng_Latn",
1029
  "target_language": "ekk_Latn",
1030
- "target_language_name": " Standard Estonian",
1031
  "speakers": 1164770,
1032
- "bleu": 68.06935269432235
 
1033
  },
1034
  {
1035
  "model": "anthropic/claude-3.5-sonnet",
@@ -1037,7 +1166,8 @@
1037
  "target_language": "luo_Latn",
1038
  "target_language_name": "Dholuo",
1039
  "speakers": 3000000,
1040
- "bleu": 46.41194790710186
 
1041
  },
1042
  {
1043
  "model": "anthropic/claude-3.5-sonnet",
@@ -1045,15 +1175,17 @@
1045
  "target_language": "hrv_Latn",
1046
  "target_language_name": "Croatian",
1047
  "speakers": 7000000,
1048
- "bleu": 69.54569836615161
 
1049
  },
1050
  {
1051
  "model": "anthropic/claude-3.5-sonnet",
1052
  "original_language": "eng_Latn",
1053
  "target_language": "uzn_Latn",
1054
- "target_language_name": " Northern Uzbek",
1055
  "speakers": 26912410,
1056
- "bleu": 64.07804482004295
 
1057
  },
1058
  {
1059
  "model": "anthropic/claude-3.5-sonnet",
@@ -1061,7 +1193,8 @@
1061
  "target_language": "ben_Beng",
1062
  "target_language_name": "Bengali",
1063
  "speakers": 300000000,
1064
- "bleu": 57.14175888160181
 
1065
  },
1066
  {
1067
  "model": "anthropic/claude-3.5-sonnet",
@@ -1069,7 +1202,8 @@
1069
  "target_language": "nya_Latn",
1070
  "target_language_name": "Chichewa",
1071
  "speakers": 12000000,
1072
- "bleu": 59.76016801606614
 
1073
  },
1074
  {
1075
  "model": "anthropic/claude-3.5-sonnet",
@@ -1077,7 +1211,8 @@
1077
  "target_language": "tsn_Latn",
1078
  "target_language_name": "Setswana",
1079
  "speakers": 4500000,
1080
- "bleu": 55.22888902281337
 
1081
  },
1082
  {
1083
  "model": "anthropic/claude-3.5-sonnet",
@@ -1085,15 +1220,17 @@
1085
  "target_language": "fin_Latn",
1086
  "target_language_name": "Finnish",
1087
  "speakers": 5413380,
1088
- "bleu": 70.94250295175219
 
1089
  },
1090
  {
1091
  "model": "anthropic/claude-3.5-sonnet",
1092
  "original_language": "eng_Latn",
1093
  "target_language": "nso_Latn",
1094
- "target_language_name": " Northern Sotho",
1095
  "speakers": 4100000,
1096
- "bleu": 63.048250806570664
 
1097
  },
1098
  {
1099
  "model": "anthropic/claude-3.5-sonnet",
@@ -1101,7 +1238,8 @@
1101
  "target_language": "sna_Latn",
1102
  "target_language_name": "Shona",
1103
  "speakers": 9023000,
1104
- "bleu": 51.55921914049446
 
1105
  },
1106
  {
1107
  "model": "anthropic/claude-3.5-sonnet",
@@ -1109,7 +1247,8 @@
1109
  "target_language": "snd_Arab",
1110
  "target_language_name": "Sindhi",
1111
  "speakers": 25000000,
1112
- "bleu": 56.33027730975489
 
1113
  },
1114
  {
1115
  "model": "anthropic/claude-3.5-sonnet",
@@ -1117,7 +1256,8 @@
1117
  "target_language": "xho_Latn",
1118
  "target_language_name": "Xhosa",
1119
  "speakers": 11000000,
1120
- "bleu": 55.46880910094653
 
1121
  },
1122
  {
1123
  "model": "anthropic/claude-3.5-sonnet",
@@ -1125,7 +1265,8 @@
1125
  "target_language": "kik_Latn",
1126
  "target_language_name": "Gikuyu",
1127
  "speakers": 6623000,
1128
- "bleu": 40.92882752909001
 
1129
  },
1130
  {
1131
  "model": "anthropic/claude-3.5-sonnet",
@@ -1133,7 +1274,8 @@
1133
  "target_language": "tso_Latn",
1134
  "target_language_name": "Tsonga",
1135
  "speakers": 13000000,
1136
- "bleu": 58.35165735971044
 
1137
  },
1138
  {
1139
  "model": "anthropic/claude-3.5-sonnet",
@@ -1141,7 +1283,8 @@
1141
  "target_language": "tat_Cyrl",
1142
  "target_language_name": "Tatar",
1143
  "speakers": 5427318,
1144
- "bleu": 60.3447467212788
 
1145
  },
1146
  {
1147
  "model": "anthropic/claude-3.5-sonnet",
@@ -1149,15 +1292,17 @@
1149
  "target_language": "awa_Deva",
1150
  "target_language_name": "Awadhi",
1151
  "speakers": 22000000,
1152
- "bleu": 46.0797144146192
 
1153
  },
1154
  {
1155
  "model": "anthropic/claude-3.5-sonnet",
1156
  "original_language": "eng_Latn",
1157
  "target_language": "gom_Deva",
1158
- "target_language_name": " Goan Konkani",
1159
  "speakers": 3633900,
1160
- "bleu": 46.88835079678478
 
1161
  },
1162
  {
1163
  "model": "anthropic/claude-3.5-sonnet",
@@ -1165,7 +1310,8 @@
1165
  "target_language": "amh_Ethi",
1166
  "target_language_name": "Amharic",
1167
  "speakers": 25000000,
1168
- "bleu": 43.15445686971015
 
1169
  },
1170
  {
1171
  "model": "anthropic/claude-3.5-sonnet",
@@ -1173,7 +1319,8 @@
1173
  "target_language": "tam_Taml",
1174
  "target_language_name": "Tamil",
1175
  "speakers": 75000000,
1176
- "bleu": 65.78632210538115
 
1177
  },
1178
  {
1179
  "model": "openai/gpt-4o-mini",
@@ -1181,7 +1328,8 @@
1181
  "target_language": "isl_Latn",
1182
  "target_language_name": "Icelandic",
1183
  "speakers": 358000,
1184
- "bleu": 61.13552606922321
 
1185
  },
1186
  {
1187
  "model": "anthropic/claude-3.5-sonnet",
@@ -1189,7 +1337,8 @@
1189
  "target_language": "isl_Latn",
1190
  "target_language_name": "Icelandic",
1191
  "speakers": 358000,
1192
- "bleu": 66.67473000551618
 
1193
  },
1194
  {
1195
  "model": "meta-llama/llama-3.1-70b-instruct",
@@ -1197,7 +1346,8 @@
1197
  "target_language": "isl_Latn",
1198
  "target_language_name": "Icelandic",
1199
  "speakers": 358000,
1200
- "bleu": 58.60923195347865
 
1201
  },
1202
  {
1203
  "model": "mistralai/mistral-nemo",
@@ -1205,7 +1355,8 @@
1205
  "target_language": "isl_Latn",
1206
  "target_language_name": "Icelandic",
1207
  "speakers": 358000,
1208
- "bleu": 45.58482442810681
 
1209
  },
1210
  {
1211
  "model": "qwen/qwen-2.5-72b-instruct",
@@ -1213,7 +1364,8 @@
1213
  "target_language": "isl_Latn",
1214
  "target_language_name": "Icelandic",
1215
  "speakers": 358000,
1216
- "bleu": 40.16071522003955
 
1217
  },
1218
  {
1219
  "model": "anthropic/claude-3.5-sonnet",
@@ -1221,15 +1373,17 @@
1221
  "target_language": "san_Deva",
1222
  "target_language_name": "Sanskrit",
1223
  "speakers": 49736,
1224
- "bleu": 32.78132499113236
 
1225
  },
1226
  {
1227
  "model": "anthropic/claude-3.5-sonnet",
1228
  "original_language": "eng_Latn",
1229
  "target_language": "als_Latn",
1230
- "target_language_name": " Tosk Albanian",
1231
  "speakers": 3000000,
1232
- "bleu": 69.24285560147364
 
1233
  },
1234
  {
1235
  "model": "anthropic/claude-3.5-sonnet",
@@ -1237,7 +1391,8 @@
1237
  "target_language": "ron_Latn",
1238
  "target_language_name": "Romanian",
1239
  "speakers": 24300000,
1240
- "bleu": 76.4907159034647
 
1241
  },
1242
  {
1243
  "model": "anthropic/claude-3.5-sonnet",
@@ -1245,7 +1400,8 @@
1245
  "target_language": "kaz_Cyrl",
1246
  "target_language_name": "Kazakh",
1247
  "speakers": 13161980,
1248
- "bleu": 61.12516213751114
 
1249
  },
1250
  {
1251
  "model": "anthropic/claude-3.5-sonnet",
@@ -1253,7 +1409,8 @@
1253
  "target_language": "sat_Olck",
1254
  "target_language_name": "Santhali",
1255
  "speakers": 7200000,
1256
- "bleu": 31.51192472690372
 
1257
  },
1258
  {
1259
  "model": "anthropic/claude-3.5-sonnet",
@@ -1261,15 +1418,17 @@
1261
  "target_language": "ukr_Cyrl",
1262
  "target_language_name": "Ukrainian",
1263
  "speakers": 34710100,
1264
- "bleu": 68.09762325436868
 
1265
  },
1266
  {
1267
  "model": "anthropic/claude-3.5-sonnet",
1268
  "original_language": "eng_Latn",
1269
  "target_language": "khk_Cyrl",
1270
- "target_language_name": " Halh Mongolian",
1271
  "speakers": 2704030,
1272
- "bleu": 59.14263972986465
 
1273
  },
1274
  {
1275
  "model": "anthropic/claude-3.5-sonnet",
@@ -1277,7 +1436,8 @@
1277
  "target_language": "bjn_Latn",
1278
  "target_language_name": "Banjar",
1279
  "speakers": 3500000,
1280
- "bleu": 56.309519555010915
 
1281
  },
1282
  {
1283
  "model": "anthropic/claude-3.5-sonnet",
@@ -1285,7 +1445,8 @@
1285
  "target_language": "fon_Latn",
1286
  "target_language_name": "Fon",
1287
  "speakers": 1935500,
1288
- "bleu": 25.279777366609945
 
1289
  },
1290
  {
1291
  "model": "anthropic/claude-3.5-sonnet",
@@ -1293,7 +1454,8 @@
1293
  "target_language": "sin_Sinh",
1294
  "target_language_name": "Sinhala",
1295
  "speakers": 15300000,
1296
- "bleu": 56.75673117959971
 
1297
  },
1298
  {
1299
  "model": "anthropic/claude-3.5-sonnet",
@@ -1301,7 +1463,8 @@
1301
  "target_language": "nno_Latn",
1302
  "target_language_name": "nno",
1303
  "speakers": 0,
1304
- "bleu": 71.86156462958435
 
1305
  },
1306
  {
1307
  "model": "anthropic/claude-3.5-sonnet",
@@ -1309,7 +1472,8 @@
1309
  "target_language": "hau_Latn",
1310
  "target_language_name": "Hausa",
1311
  "speakers": 43900000,
1312
- "bleu": 56.34319579006431
 
1313
  },
1314
  {
1315
  "model": "anthropic/claude-3.5-sonnet",
@@ -1317,7 +1481,8 @@
1317
  "target_language": "prs_Arab",
1318
  "target_language_name": "Dari",
1319
  "speakers": 9600000,
1320
- "bleu": 52.55397957953147
 
1321
  },
1322
  {
1323
  "model": "anthropic/claude-3.5-sonnet",
@@ -1325,7 +1490,8 @@
1325
  "target_language": "ell_Grek",
1326
  "target_language_name": "Greek",
1327
  "speakers": 15000000,
1328
- "bleu": 66.23477821529342
 
1329
  },
1330
  {
1331
  "model": "anthropic/claude-3.5-sonnet",
@@ -1333,7 +1499,8 @@
1333
  "target_language": "tpi_Latn",
1334
  "target_language_name": "Tok Pisin",
1335
  "speakers": 4000000,
1336
- "bleu": 56.54077603673191
 
1337
  },
1338
  {
1339
  "model": "anthropic/claude-3.5-sonnet",
@@ -1341,7 +1508,8 @@
1341
  "target_language": "hye_Armn",
1342
  "target_language_name": "Armenian",
1343
  "speakers": 6700000,
1344
- "bleu": 64.68044008058686
 
1345
  },
1346
  {
1347
  "model": "anthropic/claude-3.5-sonnet",
@@ -1349,7 +1517,8 @@
1349
  "target_language": "eus_Latn",
1350
  "target_language_name": "Basque",
1351
  "speakers": 750000,
1352
- "bleu": 65.89687213771296
 
1353
  },
1354
  {
1355
  "model": "anthropic/claude-3.5-sonnet",
@@ -1357,7 +1526,8 @@
1357
  "target_language": "nob_Latn",
1358
  "target_language_name": "Bokmål",
1359
  "speakers": 4000000,
1360
- "bleu": 77.49395130155645
 
1361
  },
1362
  {
1363
  "model": "anthropic/claude-3.5-sonnet",
@@ -1365,15 +1535,17 @@
1365
  "target_language": "slk_Latn",
1366
  "target_language_name": "Slovak",
1367
  "speakers": 6000000,
1368
- "bleu": 67.92848040860814
 
1369
  },
1370
  {
1371
  "model": "anthropic/claude-3.5-sonnet",
1372
  "original_language": "eng_Latn",
1373
  "target_language": "knc_Arab",
1374
- "target_language_name": " Yerwa Kanuri",
1375
  "speakers": 0,
1376
- "bleu": 14.714612284264362
 
1377
  },
1378
  {
1379
  "model": "openai/gpt-4o-mini",
@@ -1381,7 +1553,8 @@
1381
  "target_language": "lin_Latn",
1382
  "target_language_name": "Lingala",
1383
  "speakers": 20000000,
1384
- "bleu": 50.384710146677506
 
1385
  },
1386
  {
1387
  "model": "anthropic/claude-3.5-sonnet",
@@ -1389,7 +1562,8 @@
1389
  "target_language": "lin_Latn",
1390
  "target_language_name": "Lingala",
1391
  "speakers": 20000000,
1392
- "bleu": 56.735518064625495
 
1393
  },
1394
  {
1395
  "model": "meta-llama/llama-3.1-70b-instruct",
@@ -1397,7 +1571,8 @@
1397
  "target_language": "lin_Latn",
1398
  "target_language_name": "Lingala",
1399
  "speakers": 20000000,
1400
- "bleu": 19.732953348932526
 
1401
  },
1402
  {
1403
  "model": "mistralai/mistral-nemo",
@@ -1405,7 +1580,8 @@
1405
  "target_language": "lin_Latn",
1406
  "target_language_name": "Lingala",
1407
  "speakers": 20000000,
1408
- "bleu": 8.64985622273109
 
1409
  },
1410
  {
1411
  "model": "qwen/qwen-2.5-72b-instruct",
@@ -1413,7 +1589,8 @@
1413
  "target_language": "lin_Latn",
1414
  "target_language_name": "Lingala",
1415
  "speakers": 20000000,
1416
- "bleu": 16.658410482633357
 
1417
  },
1418
  {
1419
  "model": "anthropic/claude-3.5-sonnet",
@@ -1421,15 +1598,17 @@
1421
  "target_language": "bam_Latn",
1422
  "target_language_name": "Bamanankan",
1423
  "speakers": 2700000,
1424
- "bleu": 38.693909140769804
 
1425
  },
1426
  {
1427
  "model": "anthropic/claude-3.5-sonnet",
1428
  "original_language": "eng_Latn",
1429
  "target_language": "sot_Latn",
1430
- "target_language_name": " Southern Sotho",
1431
  "speakers": 6000000,
1432
- "bleu": 57.153751026567605
 
1433
  },
1434
  {
1435
  "model": "anthropic/claude-3.5-sonnet",
@@ -1437,23 +1616,26 @@
1437
  "target_language": "min_Arab",
1438
  "target_language_name": "Minangkabau",
1439
  "speakers": 5530000,
1440
- "bleu": 37.44925084737469
 
1441
  },
1442
  {
1443
  "model": "anthropic/claude-3.5-sonnet",
1444
  "original_language": "eng_Latn",
1445
  "target_language": "zgh_Tfng",
1446
- "target_language_name": " Standard Moroccan Tamazight",
1447
  "speakers": 0,
1448
- "bleu": 36.02110203894128
 
1449
  },
1450
  {
1451
  "model": "anthropic/claude-3.5-sonnet",
1452
  "original_language": "eng_Latn",
1453
  "target_language": "gug_Latn",
1454
- "target_language_name": " Paraguayan Guaraní",
1455
  "speakers": 0,
1456
- "bleu": 42.90235038974312
 
1457
  },
1458
  {
1459
  "model": "anthropic/claude-3.5-sonnet",
@@ -1461,7 +1643,8 @@
1461
  "target_language": "lmo_Latn",
1462
  "target_language_name": "Lombard",
1463
  "speakers": 3900000,
1464
- "bleu": 46.38844026736926
 
1465
  },
1466
  {
1467
  "model": "anthropic/claude-3.5-sonnet",
@@ -1469,7 +1652,8 @@
1469
  "target_language": "yor_Latn",
1470
  "target_language_name": "Yoruba",
1471
  "speakers": 40000000,
1472
- "bleu": 34.264254226792296
 
1473
  },
1474
  {
1475
  "model": "anthropic/claude-3.5-sonnet",
@@ -1477,7 +1661,8 @@
1477
  "target_language": "taq_Tfng",
1478
  "target_language_name": "Tamasheq",
1479
  "speakers": 500000,
1480
- "bleu": 10.997033033155907
 
1481
  },
1482
  {
1483
  "model": "anthropic/claude-3.5-sonnet",
@@ -1485,15 +1670,17 @@
1485
  "target_language": "tgk_Cyrl",
1486
  "target_language_name": "Tajik",
1487
  "speakers": 14000000,
1488
- "bleu": 60.97836841576954
 
1489
  },
1490
  {
1491
  "model": "anthropic/claude-3.5-sonnet",
1492
  "original_language": "eng_Latn",
1493
  "target_language": "fuv_Latn",
1494
- "target_language_name": " Nigerian Fulfulde",
1495
  "speakers": 14500000,
1496
- "bleu": 28.24585718309509
 
1497
  },
1498
  {
1499
  "model": "anthropic/claude-3.5-sonnet",
@@ -1501,7 +1688,8 @@
1501
  "target_language": "mal_Mlym",
1502
  "target_language_name": "Malayalam",
1503
  "speakers": 37100000,
1504
- "bleu": 64.06558940908465
 
1505
  },
1506
  {
1507
  "model": "anthropic/claude-3.5-sonnet",
@@ -1509,15 +1697,17 @@
1509
  "target_language": "tel_Telu",
1510
  "target_language_name": "Telugu",
1511
  "speakers": 82000000,
1512
- "bleu": 61.635245762892694
 
1513
  },
1514
  {
1515
  "model": "anthropic/claude-3.5-sonnet",
1516
  "original_language": "eng_Latn",
1517
  "target_language": "arb_Arab",
1518
- "target_language_name": " Standard Arabic",
1519
  "speakers": 0,
1520
- "bleu": 65.45362559909618
 
1521
  },
1522
  {
1523
  "model": "anthropic/claude-3.5-sonnet",
@@ -1525,7 +1715,8 @@
1525
  "target_language": "khm_Khmr",
1526
  "target_language_name": "Khmer",
1527
  "speakers": 16600000,
1528
- "bleu": 49.209825704340375
 
1529
  },
1530
  {
1531
  "model": "anthropic/claude-3.5-sonnet",
@@ -1533,7 +1724,8 @@
1533
  "target_language": "scn_Latn",
1534
  "target_language_name": "Sicilian",
1535
  "speakers": 4700000,
1536
- "bleu": 58.589535944250635
 
1537
  },
1538
  {
1539
  "model": "anthropic/claude-3.5-sonnet",
@@ -1541,7 +1733,8 @@
1541
  "target_language": "ltz_Latn",
1542
  "target_language_name": "Luxembourgish",
1543
  "speakers": 391200,
1544
- "bleu": 70.8338190437548
 
1545
  },
1546
  {
1547
  "model": "anthropic/claude-3.5-sonnet",
@@ -1549,7 +1742,8 @@
1549
  "target_language": "pag_Latn",
1550
  "target_language_name": "Pangasinan",
1551
  "speakers": 1100000,
1552
- "bleu": 56.00481838266269
 
1553
  },
1554
  {
1555
  "model": "anthropic/claude-3.5-sonnet",
@@ -1557,7 +1751,8 @@
1557
  "target_language": "kab_Latn",
1558
  "target_language_name": "Kabyle",
1559
  "speakers": 5586000,
1560
- "bleu": 41.14429925869902
 
1561
  },
1562
  {
1563
  "model": "anthropic/claude-3.5-sonnet",
@@ -1565,7 +1760,8 @@
1565
  "target_language": "bak_Cyrl",
1566
  "target_language_name": "Bashkort",
1567
  "speakers": 1200000,
1568
- "bleu": 57.54538429274717
 
1569
  },
1570
  {
1571
  "model": "anthropic/claude-3.5-sonnet",
@@ -1573,7 +1769,8 @@
1573
  "target_language": "twi_Latn_akua1239",
1574
  "target_language_name": "Twi",
1575
  "speakers": 3000000,
1576
- "bleu": 45.1519376004116
 
1577
  },
1578
  {
1579
  "model": "anthropic/claude-3.5-sonnet",
@@ -1581,7 +1778,8 @@
1581
  "target_language": "hin_Deva",
1582
  "target_language_name": "Hindi",
1583
  "speakers": 341000000,
1584
- "bleu": 64.936216689785
 
1585
  },
1586
  {
1587
  "model": "anthropic/claude-3.5-sonnet",
@@ -1589,7 +1787,8 @@
1589
  "target_language": "kas_Arab",
1590
  "target_language_name": "Kashmiri",
1591
  "speakers": 6900000,
1592
- "bleu": 38.28328065553461
 
1593
  },
1594
  {
1595
  "model": "anthropic/claude-3.5-sonnet",
@@ -1597,7 +1796,8 @@
1597
  "target_language": "mlt_Latn",
1598
  "target_language_name": "Maltese",
1599
  "speakers": 570000,
1600
- "bleu": 80.08667772627608
 
1601
  },
1602
  {
1603
  "model": "anthropic/claude-3.5-sonnet",
@@ -1605,7 +1805,8 @@
1605
  "target_language": "som_Latn",
1606
  "target_language_name": "Somali",
1607
  "speakers": 16200000,
1608
- "bleu": 55.370649647294535
 
1609
  },
1610
  {
1611
  "model": "anthropic/claude-3.5-sonnet",
@@ -1613,7 +1814,8 @@
1613
  "target_language": "hne_Deva",
1614
  "target_language_name": "Chhattisgarhi",
1615
  "speakers": 16300000,
1616
- "bleu": 47.979750130407254
 
1617
  },
1618
  {
1619
  "model": "anthropic/claude-3.5-sonnet",
@@ -1621,7 +1823,8 @@
1621
  "target_language": "glg_Latn",
1622
  "target_language_name": "Galician",
1623
  "speakers": 2500000,
1624
- "bleu": 68.70247869041181
 
1625
  },
1626
  {
1627
  "model": "anthropic/claude-3.5-sonnet",
@@ -1629,7 +1832,8 @@
1629
  "target_language": "ory_Orya",
1630
  "target_language_name": "Odia",
1631
  "speakers": 34500000,
1632
- "bleu": 57.362809651798656
 
1633
  },
1634
  {
1635
  "model": "anthropic/claude-3.5-sonnet",
@@ -1637,15 +1841,17 @@
1637
  "target_language": "nld_Latn",
1638
  "target_language_name": "Dutch",
1639
  "speakers": 23100000,
1640
- "bleu": 71.18493263152928
 
1641
  },
1642
  {
1643
  "model": "anthropic/claude-3.5-sonnet",
1644
  "original_language": "eng_Latn",
1645
  "target_language": "apc_Arab_sout3123",
1646
- "target_language_name": " Levantine Arabic",
1647
  "speakers": 44000000,
1648
- "bleu": 55.666484540051364
 
1649
  },
1650
  {
1651
  "model": "anthropic/claude-3.5-sonnet",
@@ -1653,7 +1859,8 @@
1653
  "target_language": "oci_Latn",
1654
  "target_language_name": "Occitan",
1655
  "speakers": 542000,
1656
- "bleu": 71.53274018395614
 
1657
  },
1658
  {
1659
  "model": "anthropic/claude-3.5-sonnet",
@@ -1661,7 +1868,8 @@
1661
  "target_language": "mni_Beng",
1662
  "target_language_name": "Meitei",
1663
  "speakers": 1470000,
1664
- "bleu": 40.61648039338993
 
1665
  },
1666
  {
1667
  "model": "anthropic/claude-3.5-sonnet",
@@ -1669,7 +1877,8 @@
1669
  "target_language": "hun_Latn",
1670
  "target_language_name": "Hungarian",
1671
  "speakers": 12600000,
1672
- "bleu": 66.13011194084116
 
1673
  },
1674
  {
1675
  "model": "anthropic/claude-3.5-sonnet",
@@ -1677,7 +1886,8 @@
1677
  "target_language": "bho_Deva",
1678
  "target_language_name": "Bhojpuri",
1679
  "speakers": 52200000,
1680
- "bleu": 44.54123379070156
 
1681
  },
1682
  {
1683
  "model": "anthropic/claude-3.5-sonnet",
@@ -1685,7 +1895,8 @@
1685
  "target_language": "mya_Mymr",
1686
  "target_language_name": "Burmese",
1687
  "speakers": 32900000,
1688
- "bleu": 55.723591167735165
 
1689
  },
1690
  {
1691
  "model": "anthropic/claude-3.5-sonnet",
@@ -1693,7 +1904,8 @@
1693
  "target_language": "run_Latn",
1694
  "target_language_name": "Rundi",
1695
  "speakers": 10800000,
1696
- "bleu": 48.94351362900039
 
1697
  },
1698
  {
1699
  "model": "anthropic/claude-3.5-sonnet",
@@ -1701,7 +1913,8 @@
1701
  "target_language": "ast_Latn",
1702
  "target_language_name": "Asturian",
1703
  "speakers": 450000,
1704
- "bleu": 71.34456234933488
 
1705
  },
1706
  {
1707
  "model": "anthropic/claude-3.5-sonnet",
@@ -1709,7 +1922,8 @@
1709
  "target_language": "tir_Ethi",
1710
  "target_language_name": "Tigrigna",
1711
  "speakers": 7507780,
1712
- "bleu": 32.87119617033141
 
1713
  },
1714
  {
1715
  "model": "anthropic/claude-3.5-sonnet",
@@ -1717,31 +1931,35 @@
1717
  "target_language": "kac_Latn",
1718
  "target_language_name": "Jingpho",
1719
  "speakers": 940000,
1720
- "bleu": 43.550058140290794
 
1721
  },
1722
  {
1723
  "model": "anthropic/claude-3.5-sonnet",
1724
  "original_language": "eng_Latn",
1725
  "target_language": "acm_Arab",
1726
- "target_language_name": " Mesopotamian Arabic",
1727
  "speakers": 15700000,
1728
- "bleu": 50.086563426937225
 
1729
  },
1730
  {
1731
  "model": "anthropic/claude-3.5-sonnet",
1732
  "original_language": "eng_Latn",
1733
  "target_language": "arz_Arab",
1734
- "target_language_name": " Egyptian Arabic",
1735
  "speakers": 100542400,
1736
- "bleu": 54.06827805197923
 
1737
  },
1738
  {
1739
  "model": "anthropic/claude-3.5-sonnet",
1740
  "original_language": "eng_Latn",
1741
  "target_language": "mhr_Cyrl",
1742
- "target_language_name": " Meadow Mari",
1743
  "speakers": 482000,
1744
- "bleu": 49.560611727256386
 
1745
  },
1746
  {
1747
  "model": "anthropic/claude-3.5-sonnet",
@@ -1749,7 +1967,8 @@
1749
  "target_language": "ita_Latn",
1750
  "target_language_name": "Italian",
1751
  "speakers": 64819790,
1752
- "bleu": 69.15883435724486
 
1753
  },
1754
  {
1755
  "model": "anthropic/claude-3.5-sonnet",
@@ -1757,23 +1976,35 @@
1757
  "target_language": "spa_Latn",
1758
  "target_language_name": "Spanish",
1759
  "speakers": 485000000,
1760
- "bleu": 63.84670733785411
 
1761
  },
1762
  {
1763
  "model": "anthropic/claude-3.5-sonnet",
1764
  "original_language": "eng_Latn",
1765
  "target_language": "ydd_Hebr",
1766
- "target_language_name": " Eastern Yiddish",
1767
  "speakers": 0,
1768
- "bleu": 49.97450075850061
 
1769
  },
1770
  {
1771
  "model": "anthropic/claude-3.5-sonnet",
1772
  "original_language": "eng_Latn",
1773
  "target_language": "gaz_Latn",
1774
- "target_language_name": " West Central Oromo",
1775
  "speakers": 0,
1776
- "bleu": 47.19114355070805
 
 
 
 
 
 
 
 
 
 
1777
  },
1778
  {
1779
  "model": "anthropic/claude-3.5-sonnet",
@@ -1781,7 +2012,8 @@
1781
  "target_language": "ktu_Latn",
1782
  "target_language_name": "Kituba (Democratic Republic of the Congo)",
1783
  "speakers": 0,
1784
- "bleu": 52.848460160201476
 
1785
  },
1786
  {
1787
  "model": "anthropic/claude-3.5-sonnet",
@@ -1789,7 +2021,8 @@
1789
  "target_language": "dyu_Latn",
1790
  "target_language_name": "Jula",
1791
  "speakers": 2700000,
1792
- "bleu": 29.541518029704612
 
1793
  },
1794
  {
1795
  "model": "anthropic/claude-3.5-sonnet",
@@ -1797,15 +2030,17 @@
1797
  "target_language": "ace_Latn",
1798
  "target_language_name": "Aceh",
1799
  "speakers": 3500032,
1800
- "bleu": 54.98266447488466
 
1801
  },
1802
  {
1803
  "model": "anthropic/claude-3.5-sonnet",
1804
  "original_language": "eng_Latn",
1805
  "target_language": "ckb_Arab",
1806
- "target_language_name": " Central Kurdish",
1807
  "speakers": 7250000,
1808
- "bleu": 58.304911173323845
 
1809
  },
1810
  {
1811
  "model": "anthropic/claude-3.5-sonnet",
@@ -1813,7 +2048,8 @@
1813
  "target_language": "mos_Latn",
1814
  "target_language_name": "Moore",
1815
  "speakers": 7600000,
1816
- "bleu": 32.845809798291015
 
1817
  },
1818
  {
1819
  "model": "anthropic/claude-3.5-sonnet",
@@ -1821,7 +2057,8 @@
1821
  "target_language": "kir_Cyrl",
1822
  "target_language_name": "Kyrgyz",
1823
  "speakers": 4568480,
1824
- "bleu": 57.082442245268965
 
1825
  },
1826
  {
1827
  "model": "anthropic/claude-3.5-sonnet",
@@ -1829,7 +2066,8 @@
1829
  "target_language": "npi_Deva",
1830
  "target_language_name": "Nepali",
1831
  "speakers": 0,
1832
- "bleu": 55.29193473519464
 
1833
  },
1834
  {
1835
  "model": "anthropic/claude-3.5-sonnet",
@@ -1837,7 +2075,8 @@
1837
  "target_language": "kbp_Latn",
1838
  "target_language_name": "Kabiyè",
1839
  "speakers": 1000000,
1840
- "bleu": 22.549850465519835
 
1841
  },
1842
  {
1843
  "model": "anthropic/claude-3.5-sonnet",
@@ -1845,6 +2084,7 @@
1845
  "target_language": "bem_Latn",
1846
  "target_language_name": "Bemba",
1847
  "speakers": 3600000,
1848
- "bleu": 47.80685489558032
 
1849
  }
1850
  ]
 
5
  "target_language": "tur_Latn",
6
  "target_language_name": "Turkish",
7
  "speakers": 82231620,
8
+ "bleu": 67.16006256755001,
9
+ "bert_score": 0.9309494157632192
10
  },
11
  {
12
  "model": "anthropic/claude-3.5-sonnet",
13
  "original_language": "eng_Latn",
14
  "target_language": "ary_Arab",
15
+ "target_language_name": "Moroccan Arabic",
16
  "speakers": 27500000,
17
+ "bleu": 49.308297678095336,
18
+ "bert_score": 0.9317501008510589
19
  },
20
  {
21
  "model": "anthropic/claude-3.5-sonnet",
 
23
  "target_language": "fij_Latn",
24
  "target_language_name": "Fijian",
25
  "speakers": 341270,
26
+ "bleu": 58.28926672457303,
27
+ "bert_score": 0.9183188617229462
28
  },
29
  {
30
  "model": "anthropic/claude-3.5-sonnet",
 
32
  "target_language": "lug_Latn",
33
  "target_language_name": "Ganda",
34
  "speakers": 4100000,
35
+ "bleu": 45.86933229358203,
36
+ "bert_score": 0.8834402799606323
37
  },
38
  {
39
  "model": "anthropic/claude-3.5-sonnet",
 
41
  "target_language": "kin_Latn",
42
  "target_language_name": "Kinyarwanda",
43
  "speakers": 12100000,
44
+ "bleu": 57.241062675597036,
45
+ "bert_score": 0.9069234490394592
46
  },
47
  {
48
  "model": "openai/gpt-4o-mini",
 
50
  "target_language": "ind_Latn",
51
  "target_language_name": "Indonesian",
52
  "speakers": 198996550,
53
+ "bleu": 76.69607051201163,
54
+ "bert_score": 0.9437516768773396
55
  },
56
  {
57
  "model": "anthropic/claude-3.5-sonnet",
 
59
  "target_language": "ind_Latn",
60
  "target_language_name": "Indonesian",
61
  "speakers": 198996550,
62
+ "bleu": 75.20007995551391,
63
+ "bert_score": 0.9425927738348643
64
  },
65
  {
66
  "model": "meta-llama/llama-3.1-70b-instruct",
 
68
  "target_language": "ind_Latn",
69
  "target_language_name": "Indonesian",
70
  "speakers": 198996550,
71
+ "bleu": 74.1959714053824,
72
+ "bert_score": 0.9384953796863555
73
  },
74
  {
75
  "model": "mistralai/mistral-nemo",
 
77
  "target_language": "ind_Latn",
78
  "target_language_name": "Indonesian",
79
  "speakers": 198996550,
80
+ "bleu": 65.59558256613556,
81
+ "bert_score": 0.894232589006424
82
  },
83
  {
84
  "model": "qwen/qwen-2.5-72b-instruct",
 
86
  "target_language": "ind_Latn",
87
  "target_language_name": "Indonesian",
88
  "speakers": 198996550,
89
+ "bleu": 72.85582869172275,
90
+ "bert_score": 0.9316293319066365
91
  },
92
  {
93
  "model": "anthropic/claude-3.5-sonnet",
 
95
  "target_language": "nus_Latn",
96
  "target_language_name": "Nuer",
97
  "speakers": 900000,
98
+ "bleu": 16.57969879511241,
99
+ "bert_score": 0.8528214693069458
100
  },
101
  {
102
  "model": "anthropic/claude-3.5-sonnet",
 
104
  "target_language": "szl_Latn",
105
  "target_language_name": "Silesian",
106
  "speakers": 522000,
107
+ "bleu": 56.78363920686616,
108
+ "bert_score": 0.9106028735637665
109
  },
110
  {
111
  "model": "anthropic/claude-3.5-sonnet",
112
  "original_language": "eng_Latn",
113
  "target_language": "azj_Latn",
114
+ "target_language_name": "North Azerbaijani",
115
  "speakers": 9220610,
116
+ "bleu": 55.52651070626997,
117
+ "bert_score": 0.9145456314086914
118
  },
119
  {
120
  "model": "anthropic/claude-3.5-sonnet",
121
  "original_language": "eng_Latn",
122
  "target_language": "dik_Latn",
123
+ "target_language_name": "Southwestern Dinka",
124
  "speakers": 0,
125
+ "bleu": 17.591328140326063,
126
+ "bert_score": 0.8016291638215383
127
  },
128
  {
129
  "model": "anthropic/claude-3.5-sonnet",
 
131
  "target_language": "smo_Latn",
132
  "target_language_name": "Samoan",
133
  "speakers": 415720,
134
+ "bleu": 56.71388314225869,
135
+ "bert_score": 0.9166683554649353
136
  },
137
  {
138
  "model": "anthropic/claude-3.5-sonnet",
 
140
  "target_language": "heb_Hebr",
141
  "target_language_name": "Hebrew",
142
  "speakers": 9303950,
143
+ "bleu": 72.0702990513479,
144
+ "bert_score": 0.9640646179517111
145
  },
146
  {
147
  "model": "anthropic/claude-3.5-sonnet",
 
149
  "target_language": "lao_Laoo",
150
  "target_language_name": "Lao",
151
  "speakers": 5225552,
152
+ "bleu": 60.02109096770294,
153
+ "bert_score": 0.904438438018163
154
  },
155
  {
156
  "model": "anthropic/claude-3.5-sonnet",
 
158
  "target_language": "bul_Cyrl",
159
  "target_language_name": "Bulgarian",
160
  "speakers": 9000000,
161
+ "bleu": 72.9695925130979,
162
+ "bert_score": 0.9545443177223205
163
  },
164
  {
165
  "model": "anthropic/claude-3.5-sonnet",
 
167
  "target_language": "dgo_Deva",
168
  "target_language_name": "Dogri",
169
  "speakers": 2000000,
170
+ "bleu": 44.91535352779186,
171
+ "bert_score": 0.9340701540311177
172
  },
173
  {
174
  "model": "anthropic/claude-3.5-sonnet",
 
176
  "target_language": "epo_Latn",
177
  "target_language_name": "Esperanto",
178
  "speakers": 2000000,
179
+ "bleu": 69.60565775541012,
180
+ "bert_score": 0.9302131255467733
181
  },
182
  {
183
  "model": "anthropic/claude-3.5-sonnet",
184
  "original_language": "eng_Latn",
185
  "target_language": "azb_Arab",
186
+ "target_language_name": "South Azerbaijani",
187
  "speakers": 15000000,
188
+ "bleu": 44.37128043020771,
189
+ "bert_score": 0.9420697371164958
190
  },
191
  {
192
  "model": "anthropic/claude-3.5-sonnet",
 
194
  "target_language": "pap_Latn",
195
  "target_language_name": "Papiamentu",
196
  "speakers": 321300,
197
+ "bleu": 69.79553281331113,
198
+ "bert_score": 0.9325902322928111
199
  },
200
  {
201
  "model": "anthropic/claude-3.5-sonnet",
 
203
  "target_language": "ces_Latn",
204
  "target_language_name": "Czech",
205
  "speakers": 10700000,
206
+ "bleu": 69.71122905993063,
207
+ "bert_score": 0.9384464244047801
208
  },
209
  {
210
  "model": "anthropic/claude-3.5-sonnet",
 
212
  "target_language": "gle_Latn",
213
  "target_language_name": "Irish",
214
  "speakers": 1030000,
215
+ "bleu": 69.97251945242859,
216
+ "bert_score": 0.9440232972304027
217
  },
218
  {
219
  "model": "anthropic/claude-3.5-sonnet",
 
221
  "target_language": "cym_Latn",
222
  "target_language_name": "Welsh",
223
  "speakers": 977366,
224
+ "bleu": 83.34377244735965,
225
+ "bert_score": 0.9662299533685048
226
  },
227
  {
228
  "model": "anthropic/claude-3.5-sonnet",
 
230
  "target_language": "war_Latn",
231
  "target_language_name": "Waray-Waray",
232
  "speakers": 3100000,
233
+ "bleu": 66.38502312428538,
234
+ "bert_score": 0.920412007967631
235
  },
236
  {
237
  "model": "anthropic/claude-3.5-sonnet",
 
239
  "target_language": "tuk_Latn",
240
  "target_language_name": "Turkmen",
241
  "speakers": 16000000,
242
+ "bleu": 60.559370593640274,
243
+ "bert_score": 0.9125106473763783
244
  },
245
  {
246
  "model": "anthropic/claude-3.5-sonnet",
 
248
  "target_language": "kea_Latn",
249
  "target_language_name": "Kabuverdianu",
250
  "speakers": 871000,
251
+ "bleu": 65.11060103907447,
252
+ "bert_score": 0.921340302626292
253
  },
254
  {
255
  "model": "anthropic/claude-3.5-sonnet",
 
257
  "target_language": "swe_Latn",
258
  "target_language_name": "Swedish",
259
  "speakers": 9244250,
260
+ "bleu": 77.42161024703672,
261
+ "bert_score": 0.9571870168050131
262
  },
263
  {
264
  "model": "anthropic/claude-3.5-sonnet",
 
266
  "target_language": "mni_Mtei",
267
  "target_language_name": "Meitei",
268
  "speakers": 1470000,
269
+ "bleu": 41.90750872077243,
270
+ "bert_score": 0.9731392125288646
271
  },
272
  {
273
  "model": "anthropic/claude-3.5-sonnet",
 
275
  "target_language": "kan_Knda",
276
  "target_language_name": "Kannada",
277
  "speakers": 43600000,
278
+ "bleu": 60.01420283321725,
279
+ "bert_score": 0.9730932295322419
280
  },
281
  {
282
  "model": "anthropic/claude-3.5-sonnet",
283
  "original_language": "eng_Latn",
284
  "target_language": "plt_Latn",
285
+ "target_language_name": "Merina Malagasy",
286
  "speakers": 0,
287
+ "bleu": 61.096843454641544,
288
+ "bert_score": 0.9032936791578928
289
  },
290
  {
291
  "model": "anthropic/claude-3.5-sonnet",
 
293
  "target_language": "ewe_Latn",
294
  "target_language_name": "Éwé",
295
  "speakers": 3000000,
296
+ "bleu": 41.6614038790914,
297
+ "bert_score": 0.8829316159089406
298
  },
299
  {
300
  "model": "anthropic/claude-3.5-sonnet",
 
302
  "target_language": "rus_Cyrl",
303
  "target_language_name": "Russian",
304
  "speakers": 171428900,
305
+ "bleu": 71.14894410390329,
306
+ "bert_score": 0.9518508851528168
307
  },
308
  {
309
  "model": "anthropic/claude-3.5-sonnet",
 
311
  "target_language": "bjn_Arab",
312
  "target_language_name": "Banjar",
313
  "speakers": 3500000,
314
+ "bleu": 36.7812759423696,
315
+ "bert_score": 0.9300645053386688
316
  },
317
  {
318
  "model": "anthropic/claude-3.5-sonnet",
 
320
  "target_language": "kmb_Latn",
321
  "target_language_name": "Kimbundu",
322
  "speakers": 0,
323
+ "bleu": 5.85234572235619,
324
+ "bert_score": 0.6849321782588959
325
  },
326
  {
327
  "model": "anthropic/claude-3.5-sonnet",
 
329
  "target_language": "vec_Latn",
330
  "target_language_name": "Venetian",
331
  "speakers": 2000000,
332
+ "bleu": 60.61408762705794,
333
+ "bert_score": 0.9080212533473968
334
  },
335
  {
336
  "model": "anthropic/claude-3.5-sonnet",
337
  "original_language": "eng_Latn",
338
  "target_language": "aeb_Arab",
339
+ "target_language_name": "Tunisian Arabic",
340
  "speakers": 11600000,
341
+ "bleu": 49.67140907439696,
342
+ "bert_score": 0.9337966998418172
343
  },
344
  {
345
  "model": "anthropic/claude-3.5-sonnet",
 
347
  "target_language": "lit_Latn",
348
  "target_language_name": "Lithuanian",
349
  "speakers": 4000000,
350
+ "bleu": 67.16256955707802,
351
+ "bert_score": 0.915470290184021
352
  },
353
  {
354
  "model": "anthropic/claude-3.5-sonnet",
 
356
  "target_language": "swh_Latn",
357
  "target_language_name": "Swahili",
358
  "speakers": 82300000,
359
+ "bleu": 73.51990421418041,
360
+ "bert_score": 0.9450787365436554
361
  },
362
  {
363
  "model": "anthropic/claude-3.5-sonnet",
 
365
  "target_language": "bug_Latn",
366
  "target_language_name": "Bugis",
367
  "speakers": 5017800,
368
+ "bleu": 44.838817003109384,
369
+ "bert_score": 0.864792396624883
370
  },
371
  {
372
  "model": "anthropic/claude-3.5-sonnet",
373
  "original_language": "eng_Latn",
374
  "target_language": "apc_Arab_nort3139",
375
+ "target_language_name": "Levantine Arabic",
376
  "speakers": 44000000,
377
+ "bleu": 55.944015565916516,
378
+ "bert_score": 0.9422544419765473
379
  },
380
  {
381
  "model": "anthropic/claude-3.5-sonnet",
 
383
  "target_language": "lus_Latn",
384
  "target_language_name": "Mizo",
385
  "speakers": 500000,
386
+ "bleu": 51.65580174875804,
387
+ "bert_score": 0.8875152905782063
388
  },
389
  {
390
  "model": "anthropic/claude-3.5-sonnet",
 
392
  "target_language": "lim_Latn",
393
  "target_language_name": "Limburgish",
394
  "speakers": 1600000,
395
+ "bleu": 59.44855049817084,
396
+ "bert_score": 0.8987095455328623
397
  },
398
  {
399
  "model": "anthropic/claude-3.5-sonnet",
 
401
  "target_language": "mri_Latn",
402
  "target_language_name": "Maori",
403
  "speakers": 160000,
404
+ "bleu": 54.831993564329125,
405
+ "bert_score": 0.9185245017210643
406
  },
407
  {
408
  "model": "anthropic/claude-3.5-sonnet",
 
410
  "target_language": "kam_Latn",
411
  "target_language_name": "Kamba",
412
  "speakers": 3893000,
413
+ "bleu": 41.73348967095708,
414
+ "bert_score": 0.8780206362406413
415
  },
416
  {
417
  "model": "anthropic/claude-3.5-sonnet",
 
419
  "target_language": "ban_Latn",
420
  "target_language_name": "Bali (Indonesia)",
421
  "speakers": 4000000,
422
+ "bleu": 52.87524191594727,
423
+ "bert_score": 0.8934772113958994
424
  },
425
  {
426
  "model": "anthropic/claude-3.5-sonnet",
427
  "original_language": "eng_Latn",
428
  "target_language": "pan_Guru",
429
+ "target_language_name": "Eastern Punjabi",
430
  "speakers": 125000000,
431
+ "bleu": 60.46844110900072,
432
+ "bert_score": 0.9882440070311228
433
  },
434
  {
435
  "model": "anthropic/claude-3.5-sonnet",
 
437
  "target_language": "por_Latn",
438
  "target_language_name": "Portuguese",
439
  "speakers": 254300000,
440
+ "bleu": 77.49780742224304,
441
+ "bert_score": 0.9494876027107239
442
  },
443
  {
444
  "model": "anthropic/claude-3.5-sonnet",
 
446
  "target_language": "crh_Latn",
447
  "target_language_name": "Crimean Tatar",
448
  "speakers": 552740,
449
+ "bleu": 52.705024944759934,
450
+ "bert_score": 0.8972040812174479
451
  },
452
  {
453
  "model": "anthropic/claude-3.5-sonnet",
 
455
  "target_language": "srp_Cyrl",
456
  "target_language_name": "Serbian",
457
  "speakers": 9000000,
458
+ "bleu": 69.96913961762156,
459
+ "bert_score": 0.9582955678304036
460
  },
461
  {
462
  "model": "openai/gpt-4o-mini",
 
464
  "target_language": "kas_Deva",
465
  "target_language_name": "Kashmiri",
466
  "speakers": 6900000,
467
+ "bleu": 22.94872648513265,
468
+ "bert_score": 0.9032864511013031
469
  },
470
  {
471
  "model": "anthropic/claude-3.5-sonnet",
 
473
  "target_language": "kas_Deva",
474
  "target_language_name": "Kashmiri",
475
  "speakers": 6900000,
476
+ "bleu": 27.525562771983658,
477
+ "bert_score": 0.9144352018833161
478
  },
479
  {
480
  "model": "meta-llama/llama-3.1-70b-instruct",
 
482
  "target_language": "kas_Deva",
483
  "target_language_name": "Kashmiri",
484
  "speakers": 6900000,
485
+ "bleu": 7.999053096113321,
486
+ "bert_score": 0.891643617550532
487
  },
488
  {
489
  "model": "mistralai/mistral-nemo",
 
491
  "target_language": "kas_Deva",
492
  "target_language_name": "Kashmiri",
493
  "speakers": 6900000,
494
+ "bleu": 15.126083511737422,
495
+ "bert_score": 0.8491478403409322
496
  },
497
  {
498
  "model": "qwen/qwen-2.5-72b-instruct",
 
500
  "target_language": "kas_Deva",
501
  "target_language_name": "Kashmiri",
502
  "speakers": 6900000,
503
+ "bleu": 21.928943407791756,
504
+ "bert_score": 0.9002275844415029
505
  },
506
  {
507
  "model": "anthropic/claude-3.5-sonnet",
508
  "original_language": "eng_Latn",
509
  "target_language": "bod_Tibt",
510
+ "target_language_name": "Central Tibetan",
511
  "speakers": 1200000,
512
+ "bleu": 51.34907527401463,
513
+ "bert_score": 0.9671575029691061
514
  },
515
  {
516
  "model": "anthropic/claude-3.5-sonnet",
 
518
  "target_language": "slv_Latn",
519
  "target_language_name": "Slovene",
520
  "speakers": 2400000,
521
+ "bleu": 72.56912707571306,
522
+ "bert_score": 0.9432346244653066
523
  },
524
  {
525
  "model": "anthropic/claude-3.5-sonnet",
526
  "original_language": "eng_Latn",
527
  "target_language": "ars_Arab",
528
+ "target_language_name": "Najdi Arabic",
529
  "speakers": 0,
530
+ "bleu": 46.41024303772761,
531
+ "bert_score": 0.9332984228928883
532
  },
533
  {
534
  "model": "anthropic/claude-3.5-sonnet",
 
536
  "target_language": "cat_Latn",
537
  "target_language_name": "Catalan",
538
  "speakers": 5100000,
539
+ "bleu": 74.45950079317922,
540
+ "bert_score": 0.9464139262835185
541
  },
542
  {
543
  "model": "anthropic/claude-3.5-sonnet",
 
545
  "target_language": "zul_Latn",
546
  "target_language_name": "Zulu",
547
  "speakers": 15700000,
548
+ "bleu": 59.176207838896076,
549
+ "bert_score": 0.9099391102790833
550
  },
551
  {
552
  "model": "anthropic/claude-3.5-sonnet",
553
  "original_language": "eng_Latn",
554
  "target_language": "pes_Arab",
555
+ "target_language_name": "Iranian Persian",
556
  "speakers": 52800000,
557
+ "bleu": 57.64441696979444,
558
+ "bert_score": 0.9476486345132192
559
  },
560
  {
561
  "model": "anthropic/claude-3.5-sonnet",
 
563
  "target_language": "taq_Latn",
564
  "target_language_name": "Tamasheq",
565
  "speakers": 500000,
566
+ "bleu": 25.866944911127725,
567
+ "bert_score": 0.8280646105607351
568
  },
569
  {
570
  "model": "anthropic/claude-3.5-sonnet",
 
572
  "target_language": "snd_Deva",
573
  "target_language_name": "Sindhi",
574
  "speakers": 25000000,
575
+ "bleu": 40.04493401977834,
576
+ "bert_score": 0.9263754248619079
577
  },
578
  {
579
  "model": "anthropic/claude-3.5-sonnet",
 
581
  "target_language": "ssw_Latn",
582
  "target_language_name": "Swati",
583
  "speakers": 2034200,
584
+ "bleu": 52.77460964391619,
585
+ "bert_score": 0.8899940272172292
586
  },
587
  {
588
  "model": "anthropic/claude-3.5-sonnet",
 
590
  "target_language": "mkd_Cyrl",
591
  "target_language_name": "Macedonian",
592
  "speakers": 2000000,
593
+ "bleu": 72.27334714365769,
594
+ "bert_score": 0.9558346649010976
595
  },
596
  {
597
  "model": "openai/gpt-4o-mini",
 
599
  "target_language": "pol_Latn",
600
  "target_language_name": "Polish",
601
  "speakers": 40200000,
602
+ "bleu": 63.79524354957938,
603
+ "bert_score": 0.9248784482479095
604
  },
605
  {
606
  "model": "anthropic/claude-3.5-sonnet",
 
608
  "target_language": "pol_Latn",
609
  "target_language_name": "Polish",
610
  "speakers": 40200000,
611
+ "bleu": 65.97562270518736,
612
+ "bert_score": 0.9295462707678477
613
  },
614
  {
615
  "model": "meta-llama/llama-3.1-70b-instruct",
 
617
  "target_language": "pol_Latn",
618
  "target_language_name": "Polish",
619
  "speakers": 40200000,
620
+ "bleu": 62.09512880944625,
621
+ "bert_score": 0.918810615936915
622
  },
623
  {
624
  "model": "mistralai/mistral-nemo",
 
626
  "target_language": "pol_Latn",
627
  "target_language_name": "Polish",
628
  "speakers": 40200000,
629
+ "bleu": 56.42877796144466,
630
+ "bert_score": 0.8988153100013733
631
  },
632
  {
633
  "model": "qwen/qwen-2.5-72b-instruct",
 
635
  "target_language": "pol_Latn",
636
  "target_language_name": "Polish",
637
  "speakers": 40200000,
638
+ "bleu": 61.08942681151859,
639
+ "bert_score": 0.9175748288631439
640
  },
641
  {
642
  "model": "anthropic/claude-3.5-sonnet",
 
644
  "target_language": "srd_Latn",
645
  "target_language_name": "Sardinian",
646
  "speakers": 1300000,
647
+ "bleu": 62.69039147714039,
648
+ "bert_score": 0.9118991812070211
649
  },
650
  {
651
  "model": "anthropic/claude-3.5-sonnet",
652
  "original_language": "eng_Latn",
653
  "target_language": "arb_Latn",
654
+ "target_language_name": "Standard Arabic",
655
  "speakers": 0,
656
+ "bleu": 48.72485475113871,
657
+ "bert_score": 0.8800670305887858
658
  },
659
  {
660
  "model": "anthropic/claude-3.5-sonnet",
 
662
  "target_language": "twi_Latn_asan1239",
663
  "target_language_name": "Twi",
664
  "speakers": 3000000,
665
+ "bleu": 44.44337481309101,
666
+ "bert_score": 0.8881823480129242
667
  },
668
  {
669
  "model": "anthropic/claude-3.5-sonnet",
 
671
  "target_language": "tum_Latn",
672
  "target_language_name": "Tumbuka",
673
  "speakers": 2680000,
674
+ "bleu": 44.0490017392109,
675
+ "bert_score": 0.8865564326445262
676
  },
677
  {
678
  "model": "anthropic/claude-3.5-sonnet",
 
680
  "target_language": "fur_Latn",
681
  "target_language_name": "Friulian",
682
  "speakers": 300000,
683
+ "bleu": 66.54880923723718,
684
+ "bert_score": 0.9255799611409505
685
  },
686
  {
687
  "model": "anthropic/claude-3.5-sonnet",
 
689
  "target_language": "lua_Latn",
690
  "target_language_name": "Luba-Kasai",
691
  "speakers": 6300000,
692
+ "bleu": 45.065529165477344,
693
+ "bert_score": 0.8749240279197693
694
  },
695
  {
696
  "model": "anthropic/claude-3.5-sonnet",
 
698
  "target_language": "fil_Latn",
699
  "target_language_name": "Filipino",
700
  "speakers": 90000000,
701
+ "bleu": 70.19284983784472,
702
+ "bert_score": 0.92694251537323
703
  },
704
  {
705
  "model": "anthropic/claude-3.5-sonnet",
 
707
  "target_language": "afr_Latn",
708
  "target_language_name": "Afrikaans",
709
  "speakers": 10300000,
710
+ "bleu": 76.89005407773752,
711
+ "bert_score": 0.9481831173102061
712
  },
713
  {
714
  "model": "anthropic/claude-3.5-sonnet",
 
716
  "target_language": "bos_Latn",
717
  "target_language_name": "Bosnian",
718
  "speakers": 3500000,
719
+ "bleu": 72.54880271311463,
720
+ "bert_score": 0.94769393603007
721
  },
722
  {
723
  "model": "anthropic/claude-3.5-sonnet",
 
725
  "target_language": "ltg_Latn",
726
  "target_language_name": "Latgalian",
727
  "speakers": 200000,
728
+ "bleu": 56.484355652391756,
729
+ "bert_score": 0.9078494012355804
730
  },
731
  {
732
  "model": "anthropic/claude-3.5-sonnet",
733
  "original_language": "eng_Latn",
734
  "target_language": "acq_Arab",
735
+ "target_language_name": "Ta’izzi-Adeni Arabic",
736
  "speakers": 10500000,
737
+ "bleu": 49.413933528139225,
738
+ "bert_score": 0.9354432185490926
739
  },
740
  {
741
  "model": "anthropic/claude-3.5-sonnet",
 
743
  "target_language": "mag_Deva",
744
  "target_language_name": "Magahi",
745
  "speakers": 20700000,
746
+ "bleu": 58.54742215461198,
747
+ "bert_score": 0.9458349565664927
748
  },
749
  {
750
  "model": "anthropic/claude-3.5-sonnet",
 
752
  "target_language": "min_Latn",
753
  "target_language_name": "Minangkabau",
754
  "speakers": 5530000,
755
+ "bleu": 64.0323403919738,
756
+ "bert_score": 0.9164695799350738
757
  },
758
  {
759
  "model": "anthropic/claude-3.5-sonnet",
 
761
  "target_language": "kor_Hang",
762
  "target_language_name": "Korean",
763
  "speakers": 77300000,
764
+ "bleu": 43.68722859743311,
765
+ "bert_score": 0.9579092760880789
766
  },
767
  {
768
  "model": "anthropic/claude-3.5-sonnet",
769
  "original_language": "eng_Latn",
770
  "target_language": "zsm_Latn",
771
+ "target_language_name": "Standard Malay",
772
  "speakers": 0,
773
+ "bleu": 74.2657232797885,
774
+ "bert_score": 0.9445500493049621
775
  },
776
  {
777
  "model": "anthropic/claude-3.5-sonnet",
 
779
  "target_language": "mar_Deva",
780
  "target_language_name": "Marathi",
781
  "speakers": 83100000,
782
+ "bleu": 57.44340907113495,
783
+ "bert_score": 0.9421781261761983
784
  },
785
  {
786
  "model": "anthropic/claude-3.5-sonnet",
787
  "original_language": "eng_Latn",
788
  "target_language": "pbt_Arab",
789
+ "target_language_name": "Southern Pashto",
790
  "speakers": 10900000,
791
+ "bleu": 38.3124819373775,
792
+ "bert_score": 0.9212681790192921
793
  },
794
  {
795
  "model": "anthropic/claude-3.5-sonnet",
 
797
  "target_language": "lij_Latn",
798
  "target_language_name": "Ligurian",
799
  "speakers": 500000,
800
+ "bleu": 55.85306363017816,
801
+ "bert_score": 0.904762077331543
802
  },
803
  {
804
  "model": "anthropic/claude-3.5-sonnet",
805
  "original_language": "eng_Latn",
806
  "target_language": "knc_Latn",
807
+ "target_language_name": "Yerwa Kanuri",
808
  "speakers": 0,
809
+ "bleu": 22.062110974228755,
810
+ "bert_score": 0.8124950389067332
811
  },
812
  {
813
  "model": "anthropic/claude-3.5-sonnet",
 
815
  "target_language": "chv_Cyrl",
816
  "target_language_name": "Chuvash",
817
  "speakers": 1279650,
818
+ "bleu": 45.05466587233098,
819
+ "bert_score": 0.9203916192054749
820
  },
821
  {
822
  "model": "anthropic/claude-3.5-sonnet",
 
824
  "target_language": "asm_Beng",
825
  "target_language_name": "Assamese",
826
  "speakers": 15300000,
827
+ "bleu": 47.03513316051628,
828
+ "bert_score": 0.9281195739905039
829
  },
830
  {
831
  "model": "anthropic/claude-3.5-sonnet",
 
833
  "target_language": "ace_Arab",
834
  "target_language_name": "Aceh",
835
  "speakers": 3500032,
836
+ "bleu": 23.349267705271775,
837
+ "bert_score": 0.8943273961544037
838
  },
839
  {
840
  "model": "anthropic/claude-3.5-sonnet",
 
842
  "target_language": "tha_Thai",
843
  "target_language_name": "Thai",
844
  "speakers": 40000000,
845
+ "bleu": 62.81253609435389,
846
+ "bert_score": 0.9225328485171
847
  },
848
  {
849
  "model": "anthropic/claude-3.5-sonnet",
 
851
  "target_language": "fao_Latn",
852
  "target_language_name": "Faroese",
853
  "speakers": 69150,
854
+ "bleu": 65.91479024829229,
855
+ "bert_score": 0.9332413752873738
856
  },
857
  {
858
  "model": "anthropic/claude-3.5-sonnet",
 
860
  "target_language": "nqo_Nkoo",
861
  "target_language_name": "N’Ko",
862
  "speakers": 0,
863
+ "bleu": 32.48349079900792,
864
+ "bert_score": 0.9823745767275492
865
  },
866
  {
867
  "model": "anthropic/claude-3.5-sonnet",
 
869
  "target_language": "ilo_Latn",
870
  "target_language_name": "Ilocano",
871
  "speakers": 9100000,
872
+ "bleu": 62.605886459379576,
873
+ "bert_score": 0.9115280091762543
874
  },
875
  {
876
  "model": "anthropic/claude-3.5-sonnet",
 
878
  "target_language": "kat_Geor",
879
  "target_language_name": "Georgian",
880
  "speakers": 3700000,
881
+ "bleu": 61.016636144241765,
882
+ "bert_score": 0.9546662310759226
883
  },
884
  {
885
  "model": "anthropic/claude-3.5-sonnet",
886
  "original_language": "eng_Latn",
887
  "target_language": "ayr_Latn",
888
+ "target_language_name": "Central Aymara",
889
  "speakers": 0,
890
+ "bleu": 42.769843666920714,
891
+ "bert_score": 0.8625142018000285
892
  },
893
  {
894
  "model": "anthropic/claude-3.5-sonnet",
 
896
  "target_language": "dan_Latn",
897
  "target_language_name": "Danish",
898
  "speakers": 6000000,
899
+ "bleu": 78.0935433283814,
900
+ "bert_score": 0.9506490747133891
901
  },
902
  {
903
  "model": "anthropic/claude-3.5-sonnet",
 
905
  "target_language": "brx_Deva",
906
  "target_language_name": "Boro (India)",
907
  "speakers": 1482929,
908
+ "bleu": 36.11004749691388,
909
+ "bert_score": 0.9251878539721171
910
  },
911
  {
912
  "model": "anthropic/claude-3.5-sonnet",
 
914
  "target_language": "sag_Latn",
915
  "target_language_name": "Sango",
916
  "speakers": 4600000,
917
+ "bleu": 34.875422265717596,
918
+ "bert_score": 0.8720244228839874
919
  },
920
  {
921
  "model": "anthropic/claude-3.5-sonnet",
922
  "original_language": "eng_Latn",
923
  "target_language": "lvs_Latn",
924
+ "target_language_name": "Standard Latvian",
925
  "speakers": 0,
926
+ "bleu": 65.08332100371138,
927
+ "bert_score": 0.9217625757058462
928
  },
929
  {
930
  "model": "anthropic/claude-3.5-sonnet",
 
932
  "target_language": "jpn_Jpan",
933
  "target_language_name": "Japanese",
934
  "speakers": 128000000,
935
+ "bleu": 49.91661356931259,
936
+ "bert_score": 0.9425287286440531
937
  },
938
  {
939
  "model": "anthropic/claude-3.5-sonnet",
 
941
  "target_language": "uig_Arab",
942
  "target_language_name": "Uyghur",
943
  "speakers": 10400000,
944
+ "bleu": 53.53468771030665,
945
+ "bert_score": 0.9397906005382538
946
  },
947
  {
948
  "model": "anthropic/claude-3.5-sonnet",
 
950
  "target_language": "fra_Latn",
951
  "target_language_name": "French",
952
  "speakers": 208157220,
953
+ "bleu": 79.3023871219446,
954
+ "bert_score": 0.9554367423057556
955
  },
956
  {
957
  "model": "anthropic/claude-3.5-sonnet",
 
959
  "target_language": "jav_Latn",
960
  "target_language_name": "Javanese",
961
  "speakers": 84308740,
962
+ "bleu": 60.44033529900538,
963
+ "bert_score": 0.9125308076540629
964
  },
965
  {
966
  "model": "anthropic/claude-3.5-sonnet",
 
968
  "target_language": "sun_Latn",
969
  "target_language_name": "Sunda",
970
  "speakers": 32400000,
971
+ "bleu": 56.40659991041485,
972
+ "bert_score": 0.9077177822589875
973
  },
974
  {
975
  "model": "anthropic/claude-3.5-sonnet",
 
977
  "target_language": "umb_Latn",
978
  "target_language_name": "Umbundu",
979
  "speakers": 6000000,
980
+ "bleu": 21.080277559665817,
981
+ "bert_score": 0.8461364289124806
982
  },
983
  {
984
  "model": "anthropic/claude-3.5-sonnet",
 
986
  "target_language": "bel_Cyrl",
987
  "target_language_name": "Belarusian",
988
  "speakers": 7900000,
989
+ "bleu": 54.51951664423131,
990
+ "bert_score": 0.9329862594604492
991
  },
992
  {
993
  "model": "anthropic/claude-3.5-sonnet",
 
995
  "target_language": "cjk_Latn",
996
  "target_language_name": "Chokwe",
997
  "speakers": 0,
998
+ "bleu": 10.186407416077753,
999
+ "bert_score": 0.727788798014323
1000
  },
1001
  {
1002
  "model": "anthropic/claude-3.5-sonnet",
1003
  "original_language": "eng_Latn",
1004
  "target_language": "yue_Hant",
1005
+ "target_language_name": "Yue Chinese",
1006
  "speakers": 73100000,
1007
+ "bleu": 34.561465122815996,
1008
+ "bert_score": 0.9634495397408803
1009
  },
1010
  {
1011
  "model": "anthropic/claude-3.5-sonnet",
 
1013
  "target_language": "hat_Latn",
1014
  "target_language_name": "Haitian Creole",
1015
  "speakers": 9600000,
1016
+ "bleu": 63.85321875910916,
1017
+ "bert_score": 0.9323611199855805
1018
  },
1019
  {
1020
  "model": "anthropic/claude-3.5-sonnet",
1021
  "original_language": "eng_Latn",
1022
  "target_language": "kmr_Latn",
1023
+ "target_language_name": "Northern Kurdish",
1024
  "speakers": 14600000,
1025
+ "bleu": 55.79658782267678,
1026
+ "bert_score": 0.9104436457157135
1027
  },
1028
  {
1029
  "model": "anthropic/claude-3.5-sonnet",
 
1031
  "target_language": "ceb_Latn",
1032
  "target_language_name": "Cebuano",
1033
  "speakers": 15900000,
1034
+ "bleu": 69.455795865483,
1035
+ "bert_score": 0.932128123442332
1036
  },
1037
  {
1038
  "model": "anthropic/claude-3.5-sonnet",
 
1040
  "target_language": "dzo_Tibt",
1041
  "target_language_name": "Dzongkha",
1042
  "speakers": 237080,
1043
+ "bleu": 44.35738140173861,
1044
+ "bert_score": 0.9664796074231465
1045
  },
1046
  {
1047
  "model": "anthropic/claude-3.5-sonnet",
1048
  "original_language": "eng_Latn",
1049
  "target_language": "deu_Latn",
1050
+ "target_language_name": "Standard German",
1051
  "speakers": 105000000,
1052
+ "bleu": 77.19665151067797,
1053
+ "bert_score": 0.9468763132890066
1054
  },
1055
  {
1056
  "model": "anthropic/claude-3.5-sonnet",
 
1058
  "target_language": "ibo_Latn",
1059
  "target_language_name": "Igbo",
1060
  "speakers": 27000000,
1061
+ "bleu": 46.40173449341075,
1062
+ "bert_score": 0.9137314637502034
1063
  },
1064
  {
1065
  "model": "anthropic/claude-3.5-sonnet",
 
1067
  "target_language": "vie_Latn",
1068
  "target_language_name": "Vietnamese",
1069
  "speakers": 76000000,
1070
+ "bleu": 70.35607494641172,
1071
+ "bert_score": 0.9527418712774912
1072
  },
1073
  {
1074
  "model": "anthropic/claude-3.5-sonnet",
1075
  "original_language": "eng_Latn",
1076
  "target_language": "quy_Latn",
1077
+ "target_language_name": "Ayacucho Quechua",
1078
  "speakers": 918200,
1079
+ "bleu": 45.653492702784725,
1080
+ "bert_score": 0.8731370111306508
1081
  },
1082
  {
1083
  "model": "anthropic/claude-3.5-sonnet",
1084
  "original_language": "eng_Latn",
1085
  "target_language": "cmn_Hant",
1086
+ "target_language_name": "Mandarin Chinese",
1087
  "speakers": 1074000000,
1088
+ "bleu": 41.61854589430943,
1089
+ "bert_score": 0.9654350221157074
1090
  },
1091
  {
1092
  "model": "anthropic/claude-3.5-sonnet",
 
1094
  "target_language": "mai_Deva",
1095
  "target_language_name": "Maithili",
1096
  "speakers": 33900000,
1097
+ "bleu": 54.65300713908629,
1098
+ "bert_score": 0.9433513383070627
1099
  },
1100
  {
1101
  "model": "anthropic/claude-3.5-sonnet",
 
1103
  "target_language": "gla_Latn",
1104
  "target_language_name": "Scottish Gaelic",
1105
  "speakers": 60130,
1106
+ "bleu": 62.604437133773324,
1107
+ "bert_score": 0.9264988481998444
1108
  },
1109
  {
1110
  "model": "anthropic/claude-3.5-sonnet",
 
1112
  "target_language": "urd_Arab",
1113
  "target_language_name": "Urdu",
1114
  "speakers": 94022900,
1115
+ "bleu": 61.12554572717868,
1116
+ "bert_score": 0.9538880089918772
1117
  },
1118
  {
1119
  "model": "anthropic/claude-3.5-sonnet",
 
1121
  "target_language": "shn_Mymr",
1122
  "target_language_name": "Shan",
1123
  "speakers": 3000000,
1124
+ "bleu": 29.21299485766884,
1125
+ "bert_score": 0.9378574808438619
1126
  },
1127
  {
1128
  "model": "anthropic/claude-3.5-sonnet",
 
1130
  "target_language": "wol_Latn",
1131
  "target_language_name": "Wolof",
1132
  "speakers": 3700000,
1133
+ "bleu": 42.64301275691043,
1134
+ "bert_score": 0.8762976408004761
1135
  },
1136
  {
1137
  "model": "anthropic/claude-3.5-sonnet",
1138
  "original_language": "eng_Latn",
1139
  "target_language": "cmn_Hans",
1140
+ "target_language_name": "Mandarin Chinese",
1141
  "speakers": 1074000000,
1142
+ "bleu": 43.441487462618014,
1143
+ "bert_score": 0.9615364015102387
1144
  },
1145
  {
1146
  "model": "anthropic/claude-3.5-sonnet",
 
1148
  "target_language": "guj_Gujr",
1149
  "target_language_name": "Gujarati",
1150
  "speakers": 56400000,
1151
+ "bleu": 55.588451345198735,
1152
+ "bert_score": 0.9753397226333618
1153
  },
1154
  {
1155
  "model": "anthropic/claude-3.5-sonnet",
1156
  "original_language": "eng_Latn",
1157
  "target_language": "ekk_Latn",
1158
+ "target_language_name": "Standard Estonian",
1159
  "speakers": 1164770,
1160
+ "bleu": 67.41569195167845,
1161
+ "bert_score": 0.9277306814988454
1162
  },
1163
  {
1164
  "model": "anthropic/claude-3.5-sonnet",
 
1166
  "target_language": "luo_Latn",
1167
  "target_language_name": "Dholuo",
1168
  "speakers": 3000000,
1169
+ "bleu": 46.41194790710186,
1170
+ "bert_score": 0.8803233822186788
1171
  },
1172
  {
1173
  "model": "anthropic/claude-3.5-sonnet",
 
1175
  "target_language": "hrv_Latn",
1176
  "target_language_name": "Croatian",
1177
  "speakers": 7000000,
1178
+ "bleu": 69.54569836615161,
1179
+ "bert_score": 0.9444877982139588
1180
  },
1181
  {
1182
  "model": "anthropic/claude-3.5-sonnet",
1183
  "original_language": "eng_Latn",
1184
  "target_language": "uzn_Latn",
1185
+ "target_language_name": "Northern Uzbek",
1186
  "speakers": 26912410,
1187
+ "bleu": 63.20557385096708,
1188
+ "bert_score": 0.9120756924152374
1189
  },
1190
  {
1191
  "model": "anthropic/claude-3.5-sonnet",
 
1193
  "target_language": "ben_Beng",
1194
  "target_language_name": "Bengali",
1195
  "speakers": 300000000,
1196
+ "bleu": 57.14175888160181,
1197
+ "bert_score": 0.9483523646990458
1198
  },
1199
  {
1200
  "model": "anthropic/claude-3.5-sonnet",
 
1202
  "target_language": "nya_Latn",
1203
  "target_language_name": "Chichewa",
1204
  "speakers": 12000000,
1205
+ "bleu": 59.76016801606614,
1206
+ "bert_score": 0.9069253901640574
1207
  },
1208
  {
1209
  "model": "anthropic/claude-3.5-sonnet",
 
1211
  "target_language": "tsn_Latn",
1212
  "target_language_name": "Setswana",
1213
  "speakers": 4500000,
1214
+ "bleu": 55.22888902281337,
1215
+ "bert_score": 0.9117900888125102
1216
  },
1217
  {
1218
  "model": "anthropic/claude-3.5-sonnet",
 
1220
  "target_language": "fin_Latn",
1221
  "target_language_name": "Finnish",
1222
  "speakers": 5413380,
1223
+ "bleu": 70.94250295175219,
1224
+ "bert_score": 0.9320579687754313
1225
  },
1226
  {
1227
  "model": "anthropic/claude-3.5-sonnet",
1228
  "original_language": "eng_Latn",
1229
  "target_language": "nso_Latn",
1230
+ "target_language_name": "Northern Sotho",
1231
  "speakers": 4100000,
1232
+ "bleu": 62.87694016917069,
1233
+ "bert_score": 0.9261207898457845
1234
  },
1235
  {
1236
  "model": "anthropic/claude-3.5-sonnet",
 
1238
  "target_language": "sna_Latn",
1239
  "target_language_name": "Shona",
1240
  "speakers": 9023000,
1241
+ "bleu": 51.55921914049446,
1242
+ "bert_score": 0.8798740128676097
1243
  },
1244
  {
1245
  "model": "anthropic/claude-3.5-sonnet",
 
1247
  "target_language": "snd_Arab",
1248
  "target_language_name": "Sindhi",
1249
  "speakers": 25000000,
1250
+ "bleu": 56.33027730975489,
1251
+ "bert_score": 0.9470286051432292
1252
  },
1253
  {
1254
  "model": "anthropic/claude-3.5-sonnet",
 
1256
  "target_language": "xho_Latn",
1257
  "target_language_name": "Xhosa",
1258
  "speakers": 11000000,
1259
+ "bleu": 55.46880910094653,
1260
+ "bert_score": 0.9008744815985362
1261
  },
1262
  {
1263
  "model": "anthropic/claude-3.5-sonnet",
 
1265
  "target_language": "kik_Latn",
1266
  "target_language_name": "Gikuyu",
1267
  "speakers": 6623000,
1268
+ "bleu": 40.92882752909001,
1269
+ "bert_score": 0.8850945432980856
1270
  },
1271
  {
1272
  "model": "anthropic/claude-3.5-sonnet",
 
1274
  "target_language": "tso_Latn",
1275
  "target_language_name": "Tsonga",
1276
  "speakers": 13000000,
1277
+ "bleu": 58.35165735971044,
1278
+ "bert_score": 0.9134832978248596
1279
  },
1280
  {
1281
  "model": "anthropic/claude-3.5-sonnet",
 
1283
  "target_language": "tat_Cyrl",
1284
  "target_language_name": "Tatar",
1285
  "speakers": 5427318,
1286
+ "bleu": 60.3447467212788,
1287
+ "bert_score": 0.9364115715026855
1288
  },
1289
  {
1290
  "model": "anthropic/claude-3.5-sonnet",
 
1292
  "target_language": "awa_Deva",
1293
  "target_language_name": "Awadhi",
1294
  "speakers": 22000000,
1295
+ "bleu": 46.0797144146192,
1296
+ "bert_score": 0.9333642820517222
1297
  },
1298
  {
1299
  "model": "anthropic/claude-3.5-sonnet",
1300
  "original_language": "eng_Latn",
1301
  "target_language": "gom_Deva",
1302
+ "target_language_name": "Goan Konkani",
1303
  "speakers": 3633900,
1304
+ "bleu": 47.108401794496295,
1305
+ "bert_score": 0.931428724527359
1306
  },
1307
  {
1308
  "model": "anthropic/claude-3.5-sonnet",
 
1310
  "target_language": "amh_Ethi",
1311
  "target_language_name": "Amharic",
1312
  "speakers": 25000000,
1313
+ "bleu": 43.15445686971015,
1314
+ "bert_score": 0.9891169210275014
1315
  },
1316
  {
1317
  "model": "anthropic/claude-3.5-sonnet",
 
1319
  "target_language": "tam_Taml",
1320
  "target_language_name": "Tamil",
1321
  "speakers": 75000000,
1322
+ "bleu": 65.78632210538115,
1323
+ "bert_score": 0.9536473691463471
1324
  },
1325
  {
1326
  "model": "openai/gpt-4o-mini",
 
1328
  "target_language": "isl_Latn",
1329
  "target_language_name": "Icelandic",
1330
  "speakers": 358000,
1331
+ "bleu": 61.13552606922321,
1332
+ "bert_score": 0.9258354703585306
1333
  },
1334
  {
1335
  "model": "anthropic/claude-3.5-sonnet",
 
1337
  "target_language": "isl_Latn",
1338
  "target_language_name": "Icelandic",
1339
  "speakers": 358000,
1340
+ "bleu": 66.67473000551618,
1341
+ "bert_score": 0.938145500421524
1342
  },
1343
  {
1344
  "model": "meta-llama/llama-3.1-70b-instruct",
 
1346
  "target_language": "isl_Latn",
1347
  "target_language_name": "Icelandic",
1348
  "speakers": 358000,
1349
+ "bleu": 58.60923195347865,
1350
+ "bert_score": 0.9203641970952352
1351
  },
1352
  {
1353
  "model": "mistralai/mistral-nemo",
 
1355
  "target_language": "isl_Latn",
1356
  "target_language_name": "Icelandic",
1357
  "speakers": 358000,
1358
+ "bleu": 45.58482442810681,
1359
+ "bert_score": 0.8916285157203674
1360
  },
1361
  {
1362
  "model": "qwen/qwen-2.5-72b-instruct",
 
1364
  "target_language": "isl_Latn",
1365
  "target_language_name": "Icelandic",
1366
  "speakers": 358000,
1367
+ "bleu": 40.16071522003955,
1368
+ "bert_score": 0.8842564543088277
1369
  },
1370
  {
1371
  "model": "anthropic/claude-3.5-sonnet",
 
1373
  "target_language": "san_Deva",
1374
  "target_language_name": "Sanskrit",
1375
  "speakers": 49736,
1376
+ "bleu": 32.78132499113236,
1377
+ "bert_score": 0.8987655500570934
1378
  },
1379
  {
1380
  "model": "anthropic/claude-3.5-sonnet",
1381
  "original_language": "eng_Latn",
1382
  "target_language": "als_Latn",
1383
+ "target_language_name": "Tosk Albanian",
1384
  "speakers": 3000000,
1385
+ "bleu": 69.4218765092482,
1386
+ "bert_score": 0.940268095334371
1387
  },
1388
  {
1389
  "model": "anthropic/claude-3.5-sonnet",
 
1391
  "target_language": "ron_Latn",
1392
  "target_language_name": "Romanian",
1393
  "speakers": 24300000,
1394
+ "bleu": 76.4907159034647,
1395
+ "bert_score": 0.9455295324325561
1396
  },
1397
  {
1398
  "model": "anthropic/claude-3.5-sonnet",
 
1400
  "target_language": "kaz_Cyrl",
1401
  "target_language_name": "Kazakh",
1402
  "speakers": 13161980,
1403
+ "bleu": 61.12516213751114,
1404
+ "bert_score": 0.9379647115866343
1405
  },
1406
  {
1407
  "model": "anthropic/claude-3.5-sonnet",
 
1409
  "target_language": "sat_Olck",
1410
  "target_language_name": "Santhali",
1411
  "speakers": 7200000,
1412
+ "bleu": 31.51192472690372,
1413
+ "bert_score": 0.9440957049528758
1414
  },
1415
  {
1416
  "model": "anthropic/claude-3.5-sonnet",
 
1418
  "target_language": "ukr_Cyrl",
1419
  "target_language_name": "Ukrainian",
1420
  "speakers": 34710100,
1421
+ "bleu": 68.09762325436868,
1422
+ "bert_score": 0.9468558847904205
1423
  },
1424
  {
1425
  "model": "anthropic/claude-3.5-sonnet",
1426
  "original_language": "eng_Latn",
1427
  "target_language": "khk_Cyrl",
1428
+ "target_language_name": "Halh Mongolian",
1429
  "speakers": 2704030,
1430
+ "bleu": 58.50377899708198,
1431
+ "bert_score": 0.9380823055903117
1432
  },
1433
  {
1434
  "model": "anthropic/claude-3.5-sonnet",
 
1436
  "target_language": "bjn_Latn",
1437
  "target_language_name": "Banjar",
1438
  "speakers": 3500000,
1439
+ "bleu": 56.309519555010915,
1440
+ "bert_score": 0.901931474606196
1441
  },
1442
  {
1443
  "model": "anthropic/claude-3.5-sonnet",
 
1445
  "target_language": "fon_Latn",
1446
  "target_language_name": "Fon",
1447
  "speakers": 1935500,
1448
+ "bleu": 25.279777366609945,
1449
+ "bert_score": 0.8664443592230479
1450
  },
1451
  {
1452
  "model": "anthropic/claude-3.5-sonnet",
 
1454
  "target_language": "sin_Sinh",
1455
  "target_language_name": "Sinhala",
1456
  "speakers": 15300000,
1457
+ "bleu": 56.75673117959971,
1458
+ "bert_score": 0.9713358581066132
1459
  },
1460
  {
1461
  "model": "anthropic/claude-3.5-sonnet",
 
1463
  "target_language": "nno_Latn",
1464
  "target_language_name": "nno",
1465
  "speakers": 0,
1466
+ "bleu": 71.86156462958435,
1467
+ "bert_score": 0.9335320313771566
1468
  },
1469
  {
1470
  "model": "anthropic/claude-3.5-sonnet",
 
1472
  "target_language": "hau_Latn",
1473
  "target_language_name": "Hausa",
1474
  "speakers": 43900000,
1475
+ "bleu": 56.34319579006431,
1476
+ "bert_score": 0.9012877802054088
1477
  },
1478
  {
1479
  "model": "anthropic/claude-3.5-sonnet",
 
1481
  "target_language": "prs_Arab",
1482
  "target_language_name": "Dari",
1483
  "speakers": 9600000,
1484
+ "bleu": 52.55397957953147,
1485
+ "bert_score": 0.9466466506322225
1486
  },
1487
  {
1488
  "model": "anthropic/claude-3.5-sonnet",
 
1490
  "target_language": "ell_Grek",
1491
  "target_language_name": "Greek",
1492
  "speakers": 15000000,
1493
+ "bleu": 66.23477821529342,
1494
+ "bert_score": 0.9577525118986766
1495
  },
1496
  {
1497
  "model": "anthropic/claude-3.5-sonnet",
 
1499
  "target_language": "tpi_Latn",
1500
  "target_language_name": "Tok Pisin",
1501
  "speakers": 4000000,
1502
+ "bleu": 56.54077603673191,
1503
+ "bert_score": 0.903118242820104
1504
  },
1505
  {
1506
  "model": "anthropic/claude-3.5-sonnet",
 
1508
  "target_language": "hye_Armn",
1509
  "target_language_name": "Armenian",
1510
  "speakers": 6700000,
1511
+ "bleu": 64.68044008058686,
1512
+ "bert_score": 0.9550812800725301
1513
  },
1514
  {
1515
  "model": "anthropic/claude-3.5-sonnet",
 
1517
  "target_language": "eus_Latn",
1518
  "target_language_name": "Basque",
1519
  "speakers": 750000,
1520
+ "bleu": 65.89687213771296,
1521
+ "bert_score": 0.9192741294701894
1522
  },
1523
  {
1524
  "model": "anthropic/claude-3.5-sonnet",
 
1526
  "target_language": "nob_Latn",
1527
  "target_language_name": "Bokmål",
1528
  "speakers": 4000000,
1529
+ "bleu": 77.49395130155645,
1530
+ "bert_score": 0.9550971885522207
1531
  },
1532
  {
1533
  "model": "anthropic/claude-3.5-sonnet",
 
1535
  "target_language": "slk_Latn",
1536
  "target_language_name": "Slovak",
1537
  "speakers": 6000000,
1538
+ "bleu": 67.92848040860814,
1539
+ "bert_score": 0.9360236605008443
1540
  },
1541
  {
1542
  "model": "anthropic/claude-3.5-sonnet",
1543
  "original_language": "eng_Latn",
1544
  "target_language": "knc_Arab",
1545
+ "target_language_name": "Yerwa Kanuri",
1546
  "speakers": 0,
1547
+ "bleu": 14.954246536984446,
1548
+ "bert_score": 0.8674997230370839
1549
  },
1550
  {
1551
  "model": "openai/gpt-4o-mini",
 
1553
  "target_language": "lin_Latn",
1554
  "target_language_name": "Lingala",
1555
  "speakers": 20000000,
1556
+ "bleu": 50.384710146677506,
1557
+ "bert_score": 0.887447464466095
1558
  },
1559
  {
1560
  "model": "anthropic/claude-3.5-sonnet",
 
1562
  "target_language": "lin_Latn",
1563
  "target_language_name": "Lingala",
1564
  "speakers": 20000000,
1565
+ "bleu": 56.735518064625495,
1566
+ "bert_score": 0.900246125459671
1567
  },
1568
  {
1569
  "model": "meta-llama/llama-3.1-70b-instruct",
 
1571
  "target_language": "lin_Latn",
1572
  "target_language_name": "Lingala",
1573
  "speakers": 20000000,
1574
+ "bleu": 19.732953348932526,
1575
+ "bert_score": 0.8526991029580434
1576
  },
1577
  {
1578
  "model": "mistralai/mistral-nemo",
 
1580
  "target_language": "lin_Latn",
1581
  "target_language_name": "Lingala",
1582
  "speakers": 20000000,
1583
+ "bleu": 8.64985622273109,
1584
+ "bert_score": 0.8075945158799489
1585
  },
1586
  {
1587
  "model": "qwen/qwen-2.5-72b-instruct",
 
1589
  "target_language": "lin_Latn",
1590
  "target_language_name": "Lingala",
1591
  "speakers": 20000000,
1592
+ "bleu": 16.658410482633357,
1593
+ "bert_score": 0.8286310772101084
1594
  },
1595
  {
1596
  "model": "anthropic/claude-3.5-sonnet",
 
1598
  "target_language": "bam_Latn",
1599
  "target_language_name": "Bamanankan",
1600
  "speakers": 2700000,
1601
+ "bleu": 38.693909140769804,
1602
+ "bert_score": 0.887204376856486
1603
  },
1604
  {
1605
  "model": "anthropic/claude-3.5-sonnet",
1606
  "original_language": "eng_Latn",
1607
  "target_language": "sot_Latn",
1608
+ "target_language_name": "Southern Sotho",
1609
  "speakers": 6000000,
1610
+ "bleu": 56.73529955399566,
1611
+ "bert_score": 0.9102749407291413
1612
  },
1613
  {
1614
  "model": "anthropic/claude-3.5-sonnet",
 
1616
  "target_language": "min_Arab",
1617
  "target_language_name": "Minangkabau",
1618
  "speakers": 5530000,
1619
+ "bleu": 37.44925084737469,
1620
+ "bert_score": 0.9340883692105612
1621
  },
1622
  {
1623
  "model": "anthropic/claude-3.5-sonnet",
1624
  "original_language": "eng_Latn",
1625
  "target_language": "zgh_Tfng",
1626
+ "target_language_name": "Standard Moroccan Tamazight",
1627
  "speakers": 0,
1628
+ "bleu": 35.62476481092257,
1629
+ "bert_score": 0.9847298423449199
1630
  },
1631
  {
1632
  "model": "anthropic/claude-3.5-sonnet",
1633
  "original_language": "eng_Latn",
1634
  "target_language": "gug_Latn",
1635
+ "target_language_name": "Paraguayan Guaraní",
1636
  "speakers": 0,
1637
+ "bleu": 41.79298637071421,
1638
+ "bert_score": 0.8764786461989085
1639
  },
1640
  {
1641
  "model": "anthropic/claude-3.5-sonnet",
 
1643
  "target_language": "lmo_Latn",
1644
  "target_language_name": "Lombard",
1645
  "speakers": 3900000,
1646
+ "bleu": 46.38844026736926,
1647
+ "bert_score": 0.8643471499284109
1648
  },
1649
  {
1650
  "model": "anthropic/claude-3.5-sonnet",
 
1652
  "target_language": "yor_Latn",
1653
  "target_language_name": "Yoruba",
1654
  "speakers": 40000000,
1655
+ "bleu": 34.264254226792296,
1656
+ "bert_score": 0.9001545011997223
1657
  },
1658
  {
1659
  "model": "anthropic/claude-3.5-sonnet",
 
1661
  "target_language": "taq_Tfng",
1662
  "target_language_name": "Tamasheq",
1663
  "speakers": 500000,
1664
+ "bleu": 10.997033033155907,
1665
+ "bert_score": 0.8574198484420776
1666
  },
1667
  {
1668
  "model": "anthropic/claude-3.5-sonnet",
 
1670
  "target_language": "tgk_Cyrl",
1671
  "target_language_name": "Tajik",
1672
  "speakers": 14000000,
1673
+ "bleu": 60.97836841576954,
1674
+ "bert_score": 0.9378365337848663
1675
  },
1676
  {
1677
  "model": "anthropic/claude-3.5-sonnet",
1678
  "original_language": "eng_Latn",
1679
  "target_language": "fuv_Latn",
1680
+ "target_language_name": "Nigerian Fulfulde",
1681
  "speakers": 14500000,
1682
+ "bleu": 28.17610559128895,
1683
+ "bert_score": 0.8343587597211202
1684
  },
1685
  {
1686
  "model": "anthropic/claude-3.5-sonnet",
 
1688
  "target_language": "mal_Mlym",
1689
  "target_language_name": "Malayalam",
1690
  "speakers": 37100000,
1691
+ "bleu": 64.06558940908465,
1692
+ "bert_score": 0.9803075671195984
1693
  },
1694
  {
1695
  "model": "anthropic/claude-3.5-sonnet",
 
1697
  "target_language": "tel_Telu",
1698
  "target_language_name": "Telugu",
1699
  "speakers": 82000000,
1700
+ "bleu": 61.635245762892694,
1701
+ "bert_score": 0.9790697515010833
1702
  },
1703
  {
1704
  "model": "anthropic/claude-3.5-sonnet",
1705
  "original_language": "eng_Latn",
1706
  "target_language": "arb_Arab",
1707
+ "target_language_name": "Standard Arabic",
1708
  "speakers": 0,
1709
+ "bleu": 65.04139779040646,
1710
+ "bert_score": 0.9535989860693613
1711
  },
1712
  {
1713
  "model": "anthropic/claude-3.5-sonnet",
 
1715
  "target_language": "khm_Khmr",
1716
  "target_language_name": "Khmer",
1717
  "speakers": 16600000,
1718
+ "bleu": 49.209825704340375,
1719
+ "bert_score": 0.8907732884089152
1720
  },
1721
  {
1722
  "model": "anthropic/claude-3.5-sonnet",
 
1724
  "target_language": "scn_Latn",
1725
  "target_language_name": "Sicilian",
1726
  "speakers": 4700000,
1727
+ "bleu": 58.589535944250635,
1728
+ "bert_score": 0.9042834500471751
1729
  },
1730
  {
1731
  "model": "anthropic/claude-3.5-sonnet",
 
1733
  "target_language": "ltz_Latn",
1734
  "target_language_name": "Luxembourgish",
1735
  "speakers": 391200,
1736
+ "bleu": 70.8338190437548,
1737
+ "bert_score": 0.9297492106755575
1738
  },
1739
  {
1740
  "model": "anthropic/claude-3.5-sonnet",
 
1742
  "target_language": "pag_Latn",
1743
  "target_language_name": "Pangasinan",
1744
  "speakers": 1100000,
1745
+ "bleu": 56.00481838266269,
1746
+ "bert_score": 0.890628065665563
1747
  },
1748
  {
1749
  "model": "anthropic/claude-3.5-sonnet",
 
1751
  "target_language": "kab_Latn",
1752
  "target_language_name": "Kabyle",
1753
  "speakers": 5586000,
1754
+ "bleu": 41.14429925869902,
1755
+ "bert_score": 0.8803219795227051
1756
  },
1757
  {
1758
  "model": "anthropic/claude-3.5-sonnet",
 
1760
  "target_language": "bak_Cyrl",
1761
  "target_language_name": "Bashkort",
1762
  "speakers": 1200000,
1763
+ "bleu": 57.54538429274717,
1764
+ "bert_score": 0.9298217594623566
1765
  },
1766
  {
1767
  "model": "anthropic/claude-3.5-sonnet",
 
1769
  "target_language": "twi_Latn_akua1239",
1770
  "target_language_name": "Twi",
1771
  "speakers": 3000000,
1772
+ "bleu": 45.1519376004116,
1773
+ "bert_score": 0.8945407330989837
1774
  },
1775
  {
1776
  "model": "anthropic/claude-3.5-sonnet",
 
1778
  "target_language": "hin_Deva",
1779
  "target_language_name": "Hindi",
1780
  "speakers": 341000000,
1781
+ "bleu": 64.936216689785,
1782
+ "bert_score": 0.9463364283243815
1783
  },
1784
  {
1785
  "model": "anthropic/claude-3.5-sonnet",
 
1787
  "target_language": "kas_Arab",
1788
  "target_language_name": "Kashmiri",
1789
  "speakers": 6900000,
1790
+ "bleu": 38.28328065553461,
1791
+ "bert_score": 0.9321333905061086
1792
  },
1793
  {
1794
  "model": "anthropic/claude-3.5-sonnet",
 
1796
  "target_language": "mlt_Latn",
1797
  "target_language_name": "Maltese",
1798
  "speakers": 570000,
1799
+ "bleu": 80.08667772627608,
1800
+ "bert_score": 0.9520254651705424
1801
  },
1802
  {
1803
  "model": "anthropic/claude-3.5-sonnet",
 
1805
  "target_language": "som_Latn",
1806
  "target_language_name": "Somali",
1807
  "speakers": 16200000,
1808
+ "bleu": 55.370649647294535,
1809
+ "bert_score": 0.9085715929667155
1810
  },
1811
  {
1812
  "model": "anthropic/claude-3.5-sonnet",
 
1814
  "target_language": "hne_Deva",
1815
  "target_language_name": "Chhattisgarhi",
1816
  "speakers": 16300000,
1817
+ "bleu": 47.979750130407254,
1818
+ "bert_score": 0.9363766014575958
1819
  },
1820
  {
1821
  "model": "anthropic/claude-3.5-sonnet",
 
1823
  "target_language": "glg_Latn",
1824
  "target_language_name": "Galician",
1825
  "speakers": 2500000,
1826
+ "bleu": 68.70247869041181,
1827
+ "bert_score": 0.9283550182978312
1828
  },
1829
  {
1830
  "model": "anthropic/claude-3.5-sonnet",
 
1832
  "target_language": "ory_Orya",
1833
  "target_language_name": "Odia",
1834
  "speakers": 34500000,
1835
+ "bleu": 57.362809651798656,
1836
+ "bert_score": 0.9768644154071808
1837
  },
1838
  {
1839
  "model": "anthropic/claude-3.5-sonnet",
 
1841
  "target_language": "nld_Latn",
1842
  "target_language_name": "Dutch",
1843
  "speakers": 23100000,
1844
+ "bleu": 71.18493263152928,
1845
+ "bert_score": 0.9376831948757172
1846
  },
1847
  {
1848
  "model": "anthropic/claude-3.5-sonnet",
1849
  "original_language": "eng_Latn",
1850
  "target_language": "apc_Arab_sout3123",
1851
+ "target_language_name": "Levantine Arabic",
1852
  "speakers": 44000000,
1853
+ "bleu": 56.235711236638,
1854
+ "bert_score": 0.9452390710512797
1855
  },
1856
  {
1857
  "model": "anthropic/claude-3.5-sonnet",
 
1859
  "target_language": "oci_Latn",
1860
  "target_language_name": "Occitan",
1861
  "speakers": 542000,
1862
+ "bleu": 71.53274018395614,
1863
+ "bert_score": 0.9337525626023611
1864
  },
1865
  {
1866
  "model": "anthropic/claude-3.5-sonnet",
 
1868
  "target_language": "mni_Beng",
1869
  "target_language_name": "Meitei",
1870
  "speakers": 1470000,
1871
+ "bleu": 40.61648039338993,
1872
+ "bert_score": 0.9325185577074687
1873
  },
1874
  {
1875
  "model": "anthropic/claude-3.5-sonnet",
 
1877
  "target_language": "hun_Latn",
1878
  "target_language_name": "Hungarian",
1879
  "speakers": 12600000,
1880
+ "bleu": 66.13011194084116,
1881
+ "bert_score": 0.924921864271164
1882
  },
1883
  {
1884
  "model": "anthropic/claude-3.5-sonnet",
 
1886
  "target_language": "bho_Deva",
1887
  "target_language_name": "Bhojpuri",
1888
  "speakers": 52200000,
1889
+ "bleu": 44.54123379070156,
1890
+ "bert_score": 0.9288184980551402
1891
  },
1892
  {
1893
  "model": "anthropic/claude-3.5-sonnet",
 
1895
  "target_language": "mya_Mymr",
1896
  "target_language_name": "Burmese",
1897
  "speakers": 32900000,
1898
+ "bleu": 55.723591167735165,
1899
+ "bert_score": 0.975975106159846
1900
  },
1901
  {
1902
  "model": "anthropic/claude-3.5-sonnet",
 
1904
  "target_language": "run_Latn",
1905
  "target_language_name": "Rundi",
1906
  "speakers": 10800000,
1907
+ "bleu": 48.94351362900039,
1908
+ "bert_score": 0.8933652222156525
1909
  },
1910
  {
1911
  "model": "anthropic/claude-3.5-sonnet",
 
1913
  "target_language": "ast_Latn",
1914
  "target_language_name": "Asturian",
1915
  "speakers": 450000,
1916
+ "bleu": 71.34456234933488,
1917
+ "bert_score": 0.931475841999054
1918
  },
1919
  {
1920
  "model": "anthropic/claude-3.5-sonnet",
 
1922
  "target_language": "tir_Ethi",
1923
  "target_language_name": "Tigrigna",
1924
  "speakers": 7507780,
1925
+ "bleu": 32.87119617033141,
1926
+ "bert_score": 0.9852415164311726
1927
  },
1928
  {
1929
  "model": "anthropic/claude-3.5-sonnet",
 
1931
  "target_language": "kac_Latn",
1932
  "target_language_name": "Jingpho",
1933
  "speakers": 940000,
1934
+ "bleu": 43.550058140290794,
1935
+ "bert_score": 0.8727998991807302
1936
  },
1937
  {
1938
  "model": "anthropic/claude-3.5-sonnet",
1939
  "original_language": "eng_Latn",
1940
  "target_language": "acm_Arab",
1941
+ "target_language_name": "Mesopotamian Arabic",
1942
  "speakers": 15700000,
1943
+ "bleu": 49.51848652969806,
1944
+ "bert_score": 0.9382626354694367
1945
  },
1946
  {
1947
  "model": "anthropic/claude-3.5-sonnet",
1948
  "original_language": "eng_Latn",
1949
  "target_language": "arz_Arab",
1950
+ "target_language_name": "Egyptian Arabic",
1951
  "speakers": 100542400,
1952
+ "bleu": 53.68180810376352,
1953
+ "bert_score": 0.9394114673137665
1954
  },
1955
  {
1956
  "model": "anthropic/claude-3.5-sonnet",
1957
  "original_language": "eng_Latn",
1958
  "target_language": "mhr_Cyrl",
1959
+ "target_language_name": "Meadow Mari",
1960
  "speakers": 482000,
1961
+ "bleu": 49.791168058232124,
1962
+ "bert_score": 0.929511696100235
1963
  },
1964
  {
1965
  "model": "anthropic/claude-3.5-sonnet",
 
1967
  "target_language": "ita_Latn",
1968
  "target_language_name": "Italian",
1969
  "speakers": 64819790,
1970
+ "bleu": 69.15883435724486,
1971
+ "bert_score": 0.9358606537183126
1972
  },
1973
  {
1974
  "model": "anthropic/claude-3.5-sonnet",
 
1976
  "target_language": "spa_Latn",
1977
  "target_language_name": "Spanish",
1978
  "speakers": 485000000,
1979
+ "bleu": 63.84670733785411,
1980
+ "bert_score": 0.922440630197525
1981
  },
1982
  {
1983
  "model": "anthropic/claude-3.5-sonnet",
1984
  "original_language": "eng_Latn",
1985
  "target_language": "ydd_Hebr",
1986
+ "target_language_name": "Eastern Yiddish",
1987
  "speakers": 0,
1988
+ "bleu": 47.55620093253461,
1989
+ "bert_score": 0.9590989410877228
1990
  },
1991
  {
1992
  "model": "anthropic/claude-3.5-sonnet",
1993
  "original_language": "eng_Latn",
1994
  "target_language": "gaz_Latn",
1995
+ "target_language_name": "West Central Oromo",
1996
  "speakers": 0,
1997
+ "bleu": 46.909035002775134,
1998
+ "bert_score": 0.8845542371273041
1999
+ },
2000
+ {
2001
+ "model": "anthropic/claude-3.5-sonnet",
2002
+ "original_language": "eng_Latn",
2003
+ "target_language": "eng_Latn",
2004
+ "target_language_name": "English",
2005
+ "speakers": 1132366680,
2006
+ "bleu": 75.35011734860745,
2007
+ "bert_score": 0.880733436346054
2008
  },
2009
  {
2010
  "model": "anthropic/claude-3.5-sonnet",
 
2012
  "target_language": "ktu_Latn",
2013
  "target_language_name": "Kituba (Democratic Republic of the Congo)",
2014
  "speakers": 0,
2015
+ "bleu": 52.848460160201476,
2016
+ "bert_score": 0.9017938395341237
2017
  },
2018
  {
2019
  "model": "anthropic/claude-3.5-sonnet",
 
2021
  "target_language": "dyu_Latn",
2022
  "target_language_name": "Jula",
2023
  "speakers": 2700000,
2024
+ "bleu": 29.541518029704612,
2025
+ "bert_score": 0.8223321119944255
2026
  },
2027
  {
2028
  "model": "anthropic/claude-3.5-sonnet",
 
2030
  "target_language": "ace_Latn",
2031
  "target_language_name": "Aceh",
2032
  "speakers": 3500032,
2033
+ "bleu": 54.98266447488466,
2034
+ "bert_score": 0.9054659227530162
2035
  },
2036
  {
2037
  "model": "anthropic/claude-3.5-sonnet",
2038
  "original_language": "eng_Latn",
2039
  "target_language": "ckb_Arab",
2040
+ "target_language_name": "Central Kurdish",
2041
  "speakers": 7250000,
2042
+ "bleu": 59.19279106924606,
2043
+ "bert_score": 0.933256882429123
2044
  },
2045
  {
2046
  "model": "anthropic/claude-3.5-sonnet",
 
2048
  "target_language": "mos_Latn",
2049
  "target_language_name": "Moore",
2050
  "speakers": 7600000,
2051
+ "bleu": 32.845809798291015,
2052
+ "bert_score": 0.8583020627498626
2053
  },
2054
  {
2055
  "model": "anthropic/claude-3.5-sonnet",
 
2057
  "target_language": "kir_Cyrl",
2058
  "target_language_name": "Kyrgyz",
2059
  "speakers": 4568480,
2060
+ "bleu": 57.082442245268965,
2061
+ "bert_score": 0.9317750076452891
2062
  },
2063
  {
2064
  "model": "anthropic/claude-3.5-sonnet",
 
2066
  "target_language": "npi_Deva",
2067
  "target_language_name": "Nepali",
2068
  "speakers": 0,
2069
+ "bleu": 55.29193473519464,
2070
+ "bert_score": 0.9358912428220113
2071
  },
2072
  {
2073
  "model": "anthropic/claude-3.5-sonnet",
 
2075
  "target_language": "kbp_Latn",
2076
  "target_language_name": "Kabiyè",
2077
  "speakers": 1000000,
2078
+ "bleu": 22.549850465519835,
2079
+ "bert_score": 0.8587520639101665
2080
  },
2081
  {
2082
  "model": "anthropic/claude-3.5-sonnet",
 
2084
  "target_language": "bem_Latn",
2085
  "target_language_name": "Bemba",
2086
  "speakers": 3600000,
2087
+ "bleu": 47.80685489558032,
2088
+ "bert_score": 0.889907830953598
2089
  }
2090
  ]
results_summary.json CHANGED
@@ -1,997 +1,1202 @@
1
  [
2
- {
3
- "target_language_name":" Ayacucho Quechua",
4
- "bleu":46.244412926,
5
- "speakers":918200.0
6
- },
7
- {
8
- "target_language_name":" Central Aymara",
9
- "bleu":40.7737907059,
10
- "speakers":0.0
11
- },
12
- {
13
- "target_language_name":" Central Kurdish",
14
- "bleu":58.3049111733,
15
- "speakers":7250000.0
16
- },
17
- {
18
- "target_language_name":" Central Tibetan",
19
- "bleu":51.762059856,
20
- "speakers":1200000.0
21
- },
22
- {
23
- "target_language_name":" Eastern Punjabi",
24
- "bleu":60.8335398677,
25
- "speakers":125000000.0
26
- },
27
- {
28
- "target_language_name":" Eastern Yiddish",
29
- "bleu":49.9745007585,
30
- "speakers":0.0
31
- },
32
- {
33
- "target_language_name":" Egyptian Arabic",
34
- "bleu":54.068278052,
35
- "speakers":100542400.0
36
- },
37
- {
38
- "target_language_name":" Goan Konkani",
39
- "bleu":46.8883507968,
40
- "speakers":3633900.0
41
- },
42
- {
43
- "target_language_name":" Halh Mongolian",
44
- "bleu":59.1426397299,
45
- "speakers":2704030.0
46
- },
47
- {
48
- "target_language_name":" Iranian Persian",
49
- "bleu":57.466690672,
50
- "speakers":52800000.0
51
- },
52
- {
53
- "target_language_name":" Levantine Arabic",
54
- "bleu":56.1658711447,
55
- "speakers":44000000.0
56
- },
57
- {
58
- "target_language_name":" Mandarin Chinese",
59
- "bleu":42.5923366202,
60
- "speakers":1074000000.0
61
- },
62
- {
63
- "target_language_name":" Meadow Mari",
64
- "bleu":49.5606117273,
65
- "speakers":482000.0
66
- },
67
- {
68
- "target_language_name":" Merina Malagasy",
69
- "bleu":60.4304388047,
70
- "speakers":0.0
71
- },
72
- {
73
- "target_language_name":" Mesopotamian Arabic",
74
- "bleu":50.0865634269,
75
- "speakers":15700000.0
76
- },
77
- {
78
- "target_language_name":" Moroccan Arabic",
79
- "bleu":48.5607873978,
80
- "speakers":27500000.0
81
- },
82
- {
83
- "target_language_name":" Najdi Arabic",
84
- "bleu":47.7692581451,
85
- "speakers":0.0
86
- },
87
- {
88
- "target_language_name":" Nigerian Fulfulde",
89
- "bleu":28.2458571831,
90
- "speakers":14500000.0
91
- },
92
- {
93
- "target_language_name":" North Azerbaijani",
94
- "bleu":55.0045961351,
95
- "speakers":9220610.0
96
- },
97
- {
98
- "target_language_name":" Northern Kurdish",
99
- "bleu":55.0085600267,
100
- "speakers":14600000.0
101
- },
102
- {
103
- "target_language_name":" Northern Sotho",
104
- "bleu":63.0482508066,
105
- "speakers":4100000.0
106
- },
107
- {
108
- "target_language_name":" Northern Uzbek",
109
- "bleu":64.07804482,
110
- "speakers":26912410.0
111
- },
112
- {
113
- "target_language_name":" Paraguayan Guaran\u00ed",
114
- "bleu":42.9023503897,
115
- "speakers":0.0
116
- },
117
- {
118
- "target_language_name":" South Azerbaijani",
119
- "bleu":43.5362266708,
120
- "speakers":15000000.0
121
- },
122
- {
123
- "target_language_name":" Southern Pashto",
124
- "bleu":38.5486962222,
125
- "speakers":10900000.0
126
- },
127
- {
128
- "target_language_name":" Southern Sotho",
129
- "bleu":57.1537510266,
130
- "speakers":6000000.0
131
- },
132
- {
133
- "target_language_name":" Southwestern Dinka",
134
- "bleu":12.4978320514,
135
- "speakers":0.0
136
- },
137
- {
138
- "target_language_name":" Standard Arabic",
139
- "bleu":56.6219786764,
140
- "speakers":0.0
141
- },
142
- {
143
- "target_language_name":" Standard Estonian",
144
- "bleu":68.0693526943,
145
- "speakers":1164770.0
146
- },
147
- {
148
- "target_language_name":" Standard German",
149
- "bleu":77.2213038369,
150
- "speakers":105000000.0
151
- },
152
- {
153
- "target_language_name":" Standard Latvian",
154
- "bleu":64.8995188161,
155
- "speakers":0.0
156
- },
157
- {
158
- "target_language_name":" Standard Malay",
159
- "bleu":75.5868255696,
160
- "speakers":0.0
161
- },
162
- {
163
- "target_language_name":" Standard Moroccan Tamazight",
164
- "bleu":36.0211020389,
165
- "speakers":0.0
166
- },
167
- {
168
- "target_language_name":" Ta\u2019izzi-Adeni Arabic",
169
- "bleu":48.6728574258,
170
- "speakers":10500000.0
171
- },
172
- {
173
- "target_language_name":" Tosk Albanian",
174
- "bleu":69.2428556015,
175
- "speakers":3000000.0
176
- },
177
- {
178
- "target_language_name":" Tunisian Arabic",
179
- "bleu":49.8735980011,
180
- "speakers":11600000.0
181
- },
182
- {
183
- "target_language_name":" West Central Oromo",
184
- "bleu":47.1911435507,
185
- "speakers":0.0
186
- },
187
- {
188
- "target_language_name":" Yerwa Kanuri",
189
- "bleu":18.4779991227,
190
- "speakers":0.0
191
- },
192
- {
193
- "target_language_name":" Yue Chinese",
194
- "bleu":34.2560977082,
195
- "speakers":73100000.0
196
- },
197
  {
198
  "target_language_name":"Aceh",
199
  "bleu":39.1659660901,
 
200
  "speakers":3500032.0
201
  },
202
  {
203
  "target_language_name":"Afrikaans",
204
  "bleu":76.8900540777,
 
205
  "speakers":10300000.0
206
  },
207
  {
208
  "target_language_name":"Amharic",
209
  "bleu":43.1544568697,
 
210
  "speakers":25000000.0
211
  },
212
  {
213
  "target_language_name":"Armenian",
214
  "bleu":64.6804400806,
 
215
  "speakers":6700000.0
216
  },
217
  {
218
  "target_language_name":"Assamese",
219
  "bleu":47.0351331605,
 
220
  "speakers":15300000.0
221
  },
222
  {
223
  "target_language_name":"Asturian",
224
  "bleu":71.3445623493,
 
225
  "speakers":450000.0
226
  },
227
  {
228
  "target_language_name":"Awadhi",
229
  "bleu":46.0797144146,
 
230
  "speakers":22000000.0
231
  },
 
 
 
 
 
 
232
  {
233
  "target_language_name":"Bali (Indonesia)",
234
  "bleu":52.8752419159,
 
235
  "speakers":4000000.0
236
  },
237
  {
238
  "target_language_name":"Bamanankan",
239
  "bleu":38.6939091408,
 
240
  "speakers":2700000.0
241
  },
242
  {
243
  "target_language_name":"Banjar",
244
  "bleu":46.5453977487,
 
245
  "speakers":3500000.0
246
  },
247
  {
248
  "target_language_name":"Bashkort",
249
  "bleu":57.5453842927,
 
250
  "speakers":1200000.0
251
  },
252
  {
253
  "target_language_name":"Basque",
254
  "bleu":65.8968721377,
 
255
  "speakers":750000.0
256
  },
257
  {
258
  "target_language_name":"Belarusian",
259
  "bleu":54.5195166442,
 
260
  "speakers":7900000.0
261
  },
262
  {
263
  "target_language_name":"Bemba",
264
  "bleu":47.8068548956,
 
265
  "speakers":3600000.0
266
  },
267
  {
268
  "target_language_name":"Bengali",
269
  "bleu":57.1417588816,
 
270
  "speakers":300000000.0
271
  },
272
  {
273
  "target_language_name":"Bhojpuri",
274
  "bleu":44.5412337907,
 
275
  "speakers":52200000.0
276
  },
277
  {
278
  "target_language_name":"Bokm\u00e5l",
279
  "bleu":77.4939513016,
 
280
  "speakers":4000000.0
281
  },
282
  {
283
  "target_language_name":"Boro (India)",
284
  "bleu":36.1100474969,
 
285
  "speakers":1482929.0
286
  },
287
  {
288
  "target_language_name":"Bosnian",
289
  "bleu":72.5488027131,
 
290
  "speakers":3500000.0
291
  },
292
  {
293
  "target_language_name":"Bugis",
294
  "bleu":44.8388170031,
 
295
  "speakers":5017800.0
296
  },
297
  {
298
  "target_language_name":"Bulgarian",
299
  "bleu":72.9695925131,
 
300
  "speakers":9000000.0
301
  },
302
  {
303
  "target_language_name":"Burmese",
304
  "bleu":55.7235911677,
 
305
  "speakers":32900000.0
306
  },
307
  {
308
  "target_language_name":"Catalan",
309
  "bleu":74.4595007932,
 
310
  "speakers":5100000.0
311
  },
312
  {
313
  "target_language_name":"Cebuano",
314
  "bleu":69.4557958655,
 
315
  "speakers":15900000.0
316
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  {
318
  "target_language_name":"Chhattisgarhi",
319
  "bleu":47.9797501304,
 
320
  "speakers":16300000.0
321
  },
322
  {
323
  "target_language_name":"Chichewa",
324
  "bleu":59.7601680161,
 
325
  "speakers":12000000.0
326
  },
327
  {
328
  "target_language_name":"Chokwe",
329
  "bleu":10.1864074161,
 
330
  "speakers":0.0
331
  },
332
  {
333
  "target_language_name":"Chuvash",
334
  "bleu":45.0546658723,
 
335
  "speakers":1279650.0
336
  },
337
  {
338
  "target_language_name":"Crimean Tatar",
339
  "bleu":52.7050249448,
 
340
  "speakers":552740.0
341
  },
342
  {
343
  "target_language_name":"Croatian",
344
  "bleu":69.5456983662,
 
345
  "speakers":7000000.0
346
  },
347
  {
348
  "target_language_name":"Czech",
349
  "bleu":69.7112290599,
 
350
  "speakers":10700000.0
351
  },
352
  {
353
  "target_language_name":"Danish",
354
  "bleu":78.0935433284,
 
355
  "speakers":6000000.0
356
  },
357
  {
358
  "target_language_name":"Dari",
359
  "bleu":52.5539795795,
 
360
  "speakers":9600000.0
361
  },
362
  {
363
  "target_language_name":"Dholuo",
364
  "bleu":46.4119479071,
 
365
  "speakers":3000000.0
366
  },
367
  {
368
  "target_language_name":"Dogri",
369
  "bleu":44.9153535278,
 
370
  "speakers":2000000.0
371
  },
372
  {
373
  "target_language_name":"Dutch",
374
  "bleu":71.1849326315,
 
375
  "speakers":23100000.0
376
  },
377
  {
378
  "target_language_name":"Dzongkha",
379
  "bleu":44.3573814017,
 
380
  "speakers":237080.0
381
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  {
383
  "target_language_name":"Esperanto",
384
  "bleu":69.6056577554,
 
385
  "speakers":2000000.0
386
  },
387
  {
388
  "target_language_name":"Faroese",
389
  "bleu":65.9147902483,
 
390
  "speakers":69150.0
391
  },
392
  {
393
  "target_language_name":"Fijian",
394
  "bleu":58.2892667246,
 
395
  "speakers":341270.0
396
  },
397
  {
398
  "target_language_name":"Filipino",
399
  "bleu":70.1928498378,
 
400
  "speakers":90000000.0
401
  },
402
  {
403
  "target_language_name":"Finnish",
404
  "bleu":70.9425029518,
 
405
  "speakers":5413380.0
406
  },
407
  {
408
  "target_language_name":"Fon",
409
  "bleu":25.2797773666,
 
410
  "speakers":1935500.0
411
  },
412
  {
413
  "target_language_name":"French",
414
  "bleu":79.3023871219,
 
415
  "speakers":208157220.0
416
  },
417
  {
418
  "target_language_name":"Friulian",
419
  "bleu":66.5488092372,
 
420
  "speakers":300000.0
421
  },
422
  {
423
  "target_language_name":"Galician",
424
  "bleu":68.7024786904,
 
425
  "speakers":2500000.0
426
  },
427
  {
428
  "target_language_name":"Ganda",
429
  "bleu":45.8693322936,
 
430
  "speakers":4100000.0
431
  },
432
  {
433
  "target_language_name":"Georgian",
434
  "bleu":61.0166361442,
 
435
  "speakers":3700000.0
436
  },
437
  {
438
  "target_language_name":"Gikuyu",
439
  "bleu":40.9288275291,
 
440
  "speakers":6623000.0
441
  },
 
 
 
 
 
 
442
  {
443
  "target_language_name":"Greek",
444
  "bleu":66.2347782153,
 
445
  "speakers":15000000.0
446
  },
447
  {
448
  "target_language_name":"Gujarati",
449
  "bleu":55.5884513452,
 
450
  "speakers":56400000.0
451
  },
452
  {
453
  "target_language_name":"Haitian Creole",
454
  "bleu":63.8532187591,
 
455
  "speakers":9600000.0
456
  },
 
 
 
 
 
 
457
  {
458
  "target_language_name":"Hausa",
459
  "bleu":56.3431957901,
 
460
  "speakers":43900000.0
461
  },
462
  {
463
  "target_language_name":"Hebrew",
464
  "bleu":72.0702990513,
 
465
  "speakers":9303950.0
466
  },
467
  {
468
  "target_language_name":"Hindi",
469
  "bleu":64.9362166898,
 
470
  "speakers":341000000.0
471
  },
472
  {
473
  "target_language_name":"Hungarian",
474
  "bleu":66.1301119408,
 
475
  "speakers":12600000.0
476
  },
477
  {
478
  "target_language_name":"Icelandic",
479
  "bleu":54.4330055353,
 
480
  "speakers":358000.0
481
  },
482
  {
483
  "target_language_name":"Igbo",
484
  "bleu":46.4017344934,
 
485
  "speakers":27000000.0
486
  },
487
  {
488
  "target_language_name":"Ilocano",
489
  "bleu":62.6058864594,
 
490
  "speakers":9100000.0
491
  },
492
  {
493
  "target_language_name":"Indonesian",
494
  "bleu":72.9087066262,
 
495
  "speakers":198996550.0
496
  },
 
 
 
 
 
 
497
  {
498
  "target_language_name":"Irish",
499
  "bleu":69.9725194524,
 
500
  "speakers":1030000.0
501
  },
502
  {
503
  "target_language_name":"Italian",
504
  "bleu":69.1588343572,
 
505
  "speakers":64819790.0
506
  },
507
  {
508
  "target_language_name":"Japanese",
509
  "bleu":49.9166135693,
 
510
  "speakers":128000000.0
511
  },
512
  {
513
  "target_language_name":"Javanese",
514
  "bleu":60.440335299,
 
515
  "speakers":84308740.0
516
  },
517
  {
518
  "target_language_name":"Jingpho",
519
  "bleu":43.5500581403,
 
520
  "speakers":940000.0
521
  },
522
  {
523
  "target_language_name":"Jula",
524
  "bleu":29.5415180297,
 
525
  "speakers":2700000.0
526
  },
527
  {
528
  "target_language_name":"Kabiy\u00e8",
529
  "bleu":22.5498504655,
 
530
  "speakers":1000000.0
531
  },
532
  {
533
  "target_language_name":"Kabuverdianu",
534
  "bleu":65.1106010391,
 
535
  "speakers":871000.0
536
  },
537
  {
538
  "target_language_name":"Kabyle",
539
  "bleu":41.1442992587,
 
540
  "speakers":5586000.0
541
  },
542
  {
543
  "target_language_name":"Kamba",
544
  "bleu":41.733489671,
 
545
  "speakers":3893000.0
546
  },
547
  {
548
  "target_language_name":"Kannada",
549
  "bleu":60.0142028332,
 
550
  "speakers":43600000.0
551
  },
552
  {
553
  "target_language_name":"Kashmiri",
554
  "bleu":22.3019416547,
 
555
  "speakers":6900000.0
556
  },
557
  {
558
  "target_language_name":"Kazakh",
559
  "bleu":61.1251621375,
 
560
  "speakers":13161980.0
561
  },
562
  {
563
  "target_language_name":"Khmer",
564
  "bleu":49.2098257043,
 
565
  "speakers":16600000.0
566
  },
567
  {
568
  "target_language_name":"Kimbundu",
569
  "bleu":5.8523457224,
 
570
  "speakers":0.0
571
  },
572
  {
573
  "target_language_name":"Kinyarwanda",
574
  "bleu":57.2410626756,
 
575
  "speakers":12100000.0
576
  },
577
  {
578
  "target_language_name":"Kituba (Democratic Republic of the Congo)",
579
  "bleu":52.8484601602,
 
580
  "speakers":0.0
581
  },
582
  {
583
  "target_language_name":"Korean",
584
  "bleu":43.6872285974,
 
585
  "speakers":77300000.0
586
  },
587
  {
588
  "target_language_name":"Kyrgyz",
589
  "bleu":57.0824422453,
 
590
  "speakers":4568480.0
591
  },
592
  {
593
  "target_language_name":"Lao",
594
  "bleu":60.0210909677,
 
595
  "speakers":5225552.0
596
  },
597
  {
598
  "target_language_name":"Latgalian",
599
  "bleu":56.4843556524,
 
600
  "speakers":200000.0
601
  },
 
 
 
 
 
 
602
  {
603
  "target_language_name":"Ligurian",
604
  "bleu":55.8530636302,
 
605
  "speakers":500000.0
606
  },
607
  {
608
  "target_language_name":"Limburgish",
609
  "bleu":59.4485504982,
 
610
  "speakers":1600000.0
611
  },
612
  {
613
  "target_language_name":"Lingala",
614
  "bleu":30.4322896531,
 
615
  "speakers":20000000.0
616
  },
617
  {
618
  "target_language_name":"Lithuanian",
619
  "bleu":67.1625695571,
 
620
  "speakers":4000000.0
621
  },
622
  {
623
  "target_language_name":"Lombard",
624
  "bleu":46.3884402674,
 
625
  "speakers":3900000.0
626
  },
627
  {
628
  "target_language_name":"Luba-Kasai",
629
  "bleu":45.0655291655,
 
630
  "speakers":6300000.0
631
  },
632
  {
633
  "target_language_name":"Luxembourgish",
634
  "bleu":70.8338190438,
 
635
  "speakers":391200.0
636
  },
637
  {
638
  "target_language_name":"Macedonian",
639
  "bleu":72.2733471437,
 
640
  "speakers":2000000.0
641
  },
642
  {
643
  "target_language_name":"Magahi",
644
  "bleu":58.5474221546,
 
645
  "speakers":20700000.0
646
  },
647
  {
648
  "target_language_name":"Maithili",
649
  "bleu":54.6530071391,
 
650
  "speakers":33900000.0
651
  },
652
  {
653
  "target_language_name":"Malayalam",
654
  "bleu":64.0655894091,
 
655
  "speakers":37100000.0
656
  },
657
  {
658
  "target_language_name":"Maltese",
659
  "bleu":80.0866777263,
 
660
  "speakers":570000.0
661
  },
 
 
 
 
 
 
662
  {
663
  "target_language_name":"Maori",
664
  "bleu":54.8319935643,
 
665
  "speakers":160000.0
666
  },
667
  {
668
  "target_language_name":"Marathi",
669
  "bleu":57.4434090711,
 
670
  "speakers":83100000.0
671
  },
 
 
 
 
 
 
672
  {
673
  "target_language_name":"Meitei",
674
  "bleu":41.2619945571,
 
675
  "speakers":1470000.0
676
  },
 
 
 
 
 
 
 
 
 
 
 
 
677
  {
678
  "target_language_name":"Minangkabau",
679
  "bleu":50.7407956197,
 
680
  "speakers":5530000.0
681
  },
682
  {
683
  "target_language_name":"Mizo",
684
  "bleu":51.6558017488,
 
685
  "speakers":500000.0
686
  },
687
  {
688
  "target_language_name":"Moore",
689
  "bleu":32.8458097983,
 
690
  "speakers":7600000.0
691
  },
 
 
 
 
 
 
 
 
 
 
 
 
692
  {
693
  "target_language_name":"Nepali",
694
  "bleu":55.2919347352,
 
695
  "speakers":0.0
696
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
  {
698
  "target_language_name":"Nuer",
699
  "bleu":16.5796987951,
 
700
  "speakers":900000.0
701
  },
702
  {
703
  "target_language_name":"N\u2019Ko",
704
  "bleu":32.483490799,
 
705
  "speakers":0.0
706
  },
707
  {
708
  "target_language_name":"Occitan",
709
  "bleu":71.532740184,
 
710
  "speakers":542000.0
711
  },
712
  {
713
  "target_language_name":"Odia",
714
  "bleu":57.3628096518,
 
715
  "speakers":34500000.0
716
  },
717
  {
718
  "target_language_name":"Pangasinan",
719
  "bleu":56.0048183827,
 
720
  "speakers":1100000.0
721
  },
722
  {
723
  "target_language_name":"Papiamentu",
724
  "bleu":69.7955328133,
 
725
  "speakers":321300.0
726
  },
 
 
 
 
 
 
727
  {
728
  "target_language_name":"Polish",
729
  "bleu":61.8768399674,
 
730
  "speakers":40200000.0
731
  },
732
  {
733
  "target_language_name":"Portuguese",
734
  "bleu":77.4978074222,
 
735
  "speakers":254300000.0
736
  },
737
  {
738
  "target_language_name":"Romanian",
739
  "bleu":76.4907159035,
 
740
  "speakers":24300000.0
741
  },
742
  {
743
  "target_language_name":"Rundi",
744
  "bleu":48.943513629,
 
745
  "speakers":10800000.0
746
  },
747
  {
748
  "target_language_name":"Russian",
749
  "bleu":71.1489441039,
 
750
  "speakers":171428900.0
751
  },
752
  {
753
  "target_language_name":"Samoan",
754
  "bleu":56.7138831423,
 
755
  "speakers":415720.0
756
  },
757
  {
758
  "target_language_name":"Sango",
759
  "bleu":34.8754222657,
 
760
  "speakers":4600000.0
761
  },
762
  {
763
  "target_language_name":"Sanskrit",
764
  "bleu":32.7813249911,
 
765
  "speakers":49736.0
766
  },
767
  {
768
  "target_language_name":"Santhali",
769
  "bleu":31.5119247269,
 
770
  "speakers":7200000.0
771
  },
772
  {
773
  "target_language_name":"Sardinian",
774
  "bleu":62.6903914771,
 
775
  "speakers":1300000.0
776
  },
777
  {
778
  "target_language_name":"Scottish Gaelic",
779
  "bleu":62.6044371338,
 
780
  "speakers":60130.0
781
  },
782
  {
783
  "target_language_name":"Serbian",
784
  "bleu":69.9691396176,
 
785
  "speakers":9000000.0
786
  },
787
  {
788
  "target_language_name":"Setswana",
789
  "bleu":55.2288890228,
 
790
  "speakers":4500000.0
791
  },
792
  {
793
  "target_language_name":"Shan",
794
  "bleu":29.2129948577,
 
795
  "speakers":3000000.0
796
  },
797
  {
798
  "target_language_name":"Shona",
799
  "bleu":51.5592191405,
 
800
  "speakers":9023000.0
801
  },
802
  {
803
  "target_language_name":"Sicilian",
804
  "bleu":58.5895359443,
 
805
  "speakers":4700000.0
806
  },
807
  {
808
  "target_language_name":"Silesian",
809
  "bleu":56.7836392069,
 
810
  "speakers":522000.0
811
  },
812
  {
813
  "target_language_name":"Sindhi",
814
  "bleu":48.1876056648,
 
815
  "speakers":25000000.0
816
  },
817
  {
818
  "target_language_name":"Sinhala",
819
  "bleu":56.7567311796,
 
820
  "speakers":15300000.0
821
  },
822
  {
823
  "target_language_name":"Slovak",
824
  "bleu":67.9284804086,
 
825
  "speakers":6000000.0
826
  },
827
  {
828
  "target_language_name":"Slovene",
829
  "bleu":72.5691270757,
 
830
  "speakers":2400000.0
831
  },
832
  {
833
  "target_language_name":"Somali",
834
  "bleu":55.3706496473,
 
835
  "speakers":16200000.0
836
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
837
  {
838
  "target_language_name":"Spanish",
839
  "bleu":63.8467073379,
 
840
  "speakers":485000000.0
841
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
842
  {
843
  "target_language_name":"Sunda",
844
  "bleu":56.4065999104,
 
845
  "speakers":32400000.0
846
  },
847
  {
848
  "target_language_name":"Swahili",
849
  "bleu":73.5199042142,
 
850
  "speakers":82300000.0
851
  },
852
  {
853
  "target_language_name":"Swati",
854
  "bleu":52.7746096439,
 
855
  "speakers":2034200.0
856
  },
857
  {
858
  "target_language_name":"Swedish",
859
  "bleu":77.421610247,
 
860
  "speakers":9244250.0
861
  },
862
  {
863
  "target_language_name":"Tajik",
864
  "bleu":60.9783684158,
 
865
  "speakers":14000000.0
866
  },
867
  {
868
  "target_language_name":"Tamasheq",
869
  "bleu":18.4319889721,
 
870
  "speakers":500000.0
871
  },
872
  {
873
  "target_language_name":"Tamil",
874
  "bleu":65.7863221054,
 
875
  "speakers":75000000.0
876
  },
877
  {
878
  "target_language_name":"Tatar",
879
  "bleu":60.3447467213,
 
880
  "speakers":5427318.0
881
  },
 
 
 
 
 
 
882
  {
883
  "target_language_name":"Telugu",
884
  "bleu":61.6352457629,
 
885
  "speakers":82000000.0
886
  },
887
  {
888
  "target_language_name":"Thai",
889
  "bleu":62.8125360944,
 
890
  "speakers":40000000.0
891
  },
892
  {
893
  "target_language_name":"Tigrigna",
894
  "bleu":32.8711961703,
 
895
  "speakers":7507780.0
896
  },
897
  {
898
  "target_language_name":"Tok Pisin",
899
  "bleu":56.5407760367,
 
900
  "speakers":4000000.0
901
  },
 
 
 
 
 
 
902
  {
903
  "target_language_name":"Tsonga",
904
  "bleu":58.3516573597,
 
905
  "speakers":13000000.0
906
  },
907
  {
908
  "target_language_name":"Tumbuka",
909
  "bleu":44.0490017392,
 
910
  "speakers":2680000.0
911
  },
 
 
 
 
 
 
912
  {
913
  "target_language_name":"Turkish",
914
  "bleu":67.1600625676,
 
915
  "speakers":82231620.0
916
  },
917
  {
918
  "target_language_name":"Turkmen",
919
  "bleu":60.5593705936,
 
920
  "speakers":16000000.0
921
  },
922
  {
923
  "target_language_name":"Twi",
924
  "bleu":44.7976562068,
 
925
  "speakers":3000000.0
926
  },
927
  {
928
  "target_language_name":"Ukrainian",
929
  "bleu":68.0976232544,
 
930
  "speakers":34710100.0
931
  },
932
  {
933
  "target_language_name":"Umbundu",
934
  "bleu":21.0802775597,
 
935
  "speakers":6000000.0
936
  },
937
  {
938
  "target_language_name":"Urdu",
939
  "bleu":61.1255457272,
 
940
  "speakers":94022900.0
941
  },
942
  {
943
  "target_language_name":"Uyghur",
944
  "bleu":53.5346877103,
 
945
  "speakers":10400000.0
946
  },
947
  {
948
  "target_language_name":"Venetian",
949
  "bleu":60.6140876271,
 
950
  "speakers":2000000.0
951
  },
952
  {
953
  "target_language_name":"Vietnamese",
954
  "bleu":70.3560749464,
 
955
  "speakers":76000000.0
956
  },
957
  {
958
  "target_language_name":"Waray-Waray",
959
  "bleu":66.3850231243,
 
960
  "speakers":3100000.0
961
  },
962
  {
963
  "target_language_name":"Welsh",
964
  "bleu":83.3437724474,
 
965
  "speakers":977366.0
966
  },
 
 
 
 
 
 
967
  {
968
  "target_language_name":"Wolof",
969
  "bleu":42.6430127569,
 
970
  "speakers":3700000.0
971
  },
972
  {
973
  "target_language_name":"Xhosa",
974
  "bleu":55.4688091009,
 
975
  "speakers":11000000.0
976
  },
 
 
 
 
 
 
977
  {
978
  "target_language_name":"Yoruba",
979
  "bleu":34.2642542268,
 
980
  "speakers":40000000.0
981
  },
 
 
 
 
 
 
982
  {
983
  "target_language_name":"Zulu",
984
  "bleu":59.1762078389,
 
985
  "speakers":15700000.0
986
  },
987
  {
988
  "target_language_name":"nno",
989
  "bleu":71.8615646296,
 
990
  "speakers":0.0
991
  },
992
  {
993
  "target_language_name":"\u00c9w\u00e9",
994
  "bleu":41.6614038791,
 
995
  "speakers":3000000.0
996
  }
997
  ]
 
1
  [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {
3
  "target_language_name":"Aceh",
4
  "bleu":39.1659660901,
5
+ "bert_score":0.8998966595,
6
  "speakers":3500032.0
7
  },
8
  {
9
  "target_language_name":"Afrikaans",
10
  "bleu":76.8900540777,
11
+ "bert_score":0.9481831173,
12
  "speakers":10300000.0
13
  },
14
  {
15
  "target_language_name":"Amharic",
16
  "bleu":43.1544568697,
17
+ "bert_score":0.989116921,
18
  "speakers":25000000.0
19
  },
20
  {
21
  "target_language_name":"Armenian",
22
  "bleu":64.6804400806,
23
+ "bert_score":0.9550812801,
24
  "speakers":6700000.0
25
  },
26
  {
27
  "target_language_name":"Assamese",
28
  "bleu":47.0351331605,
29
+ "bert_score":0.928119574,
30
  "speakers":15300000.0
31
  },
32
  {
33
  "target_language_name":"Asturian",
34
  "bleu":71.3445623493,
35
+ "bert_score":0.931475842,
36
  "speakers":450000.0
37
  },
38
  {
39
  "target_language_name":"Awadhi",
40
  "bleu":46.0797144146,
41
+ "bert_score":0.9333642821,
42
  "speakers":22000000.0
43
  },
44
+ {
45
+ "target_language_name":"Ayacucho Quechua",
46
+ "bleu":45.6534927028,
47
+ "bert_score":0.8731370111,
48
+ "speakers":918200.0
49
+ },
50
  {
51
  "target_language_name":"Bali (Indonesia)",
52
  "bleu":52.8752419159,
53
+ "bert_score":0.8934772114,
54
  "speakers":4000000.0
55
  },
56
  {
57
  "target_language_name":"Bamanankan",
58
  "bleu":38.6939091408,
59
+ "bert_score":0.8872043769,
60
  "speakers":2700000.0
61
  },
62
  {
63
  "target_language_name":"Banjar",
64
  "bleu":46.5453977487,
65
+ "bert_score":0.91599799,
66
  "speakers":3500000.0
67
  },
68
  {
69
  "target_language_name":"Bashkort",
70
  "bleu":57.5453842927,
71
+ "bert_score":0.9298217595,
72
  "speakers":1200000.0
73
  },
74
  {
75
  "target_language_name":"Basque",
76
  "bleu":65.8968721377,
77
+ "bert_score":0.9192741295,
78
  "speakers":750000.0
79
  },
80
  {
81
  "target_language_name":"Belarusian",
82
  "bleu":54.5195166442,
83
+ "bert_score":0.9329862595,
84
  "speakers":7900000.0
85
  },
86
  {
87
  "target_language_name":"Bemba",
88
  "bleu":47.8068548956,
89
+ "bert_score":0.889907831,
90
  "speakers":3600000.0
91
  },
92
  {
93
  "target_language_name":"Bengali",
94
  "bleu":57.1417588816,
95
+ "bert_score":0.9483523647,
96
  "speakers":300000000.0
97
  },
98
  {
99
  "target_language_name":"Bhojpuri",
100
  "bleu":44.5412337907,
101
+ "bert_score":0.9288184981,
102
  "speakers":52200000.0
103
  },
104
  {
105
  "target_language_name":"Bokm\u00e5l",
106
  "bleu":77.4939513016,
107
+ "bert_score":0.9550971886,
108
  "speakers":4000000.0
109
  },
110
  {
111
  "target_language_name":"Boro (India)",
112
  "bleu":36.1100474969,
113
+ "bert_score":0.925187854,
114
  "speakers":1482929.0
115
  },
116
  {
117
  "target_language_name":"Bosnian",
118
  "bleu":72.5488027131,
119
+ "bert_score":0.947693936,
120
  "speakers":3500000.0
121
  },
122
  {
123
  "target_language_name":"Bugis",
124
  "bleu":44.8388170031,
125
+ "bert_score":0.8647923966,
126
  "speakers":5017800.0
127
  },
128
  {
129
  "target_language_name":"Bulgarian",
130
  "bleu":72.9695925131,
131
+ "bert_score":0.9545443177,
132
  "speakers":9000000.0
133
  },
134
  {
135
  "target_language_name":"Burmese",
136
  "bleu":55.7235911677,
137
+ "bert_score":0.9759751062,
138
  "speakers":32900000.0
139
  },
140
  {
141
  "target_language_name":"Catalan",
142
  "bleu":74.4595007932,
143
+ "bert_score":0.9464139263,
144
  "speakers":5100000.0
145
  },
146
  {
147
  "target_language_name":"Cebuano",
148
  "bleu":69.4557958655,
149
+ "bert_score":0.9321281234,
150
  "speakers":15900000.0
151
  },
152
+ {
153
+ "target_language_name":"Central Aymara",
154
+ "bleu":42.7698436669,
155
+ "bert_score":0.8625142018,
156
+ "speakers":0.0
157
+ },
158
+ {
159
+ "target_language_name":"Central Kurdish",
160
+ "bleu":59.1927910692,
161
+ "bert_score":0.9332568824,
162
+ "speakers":7250000.0
163
+ },
164
+ {
165
+ "target_language_name":"Central Tibetan",
166
+ "bleu":51.349075274,
167
+ "bert_score":0.967157503,
168
+ "speakers":1200000.0
169
+ },
170
  {
171
  "target_language_name":"Chhattisgarhi",
172
  "bleu":47.9797501304,
173
+ "bert_score":0.9363766015,
174
  "speakers":16300000.0
175
  },
176
  {
177
  "target_language_name":"Chichewa",
178
  "bleu":59.7601680161,
179
+ "bert_score":0.9069253902,
180
  "speakers":12000000.0
181
  },
182
  {
183
  "target_language_name":"Chokwe",
184
  "bleu":10.1864074161,
185
+ "bert_score":0.727788798,
186
  "speakers":0.0
187
  },
188
  {
189
  "target_language_name":"Chuvash",
190
  "bleu":45.0546658723,
191
+ "bert_score":0.9203916192,
192
  "speakers":1279650.0
193
  },
194
  {
195
  "target_language_name":"Crimean Tatar",
196
  "bleu":52.7050249448,
197
+ "bert_score":0.8972040812,
198
  "speakers":552740.0
199
  },
200
  {
201
  "target_language_name":"Croatian",
202
  "bleu":69.5456983662,
203
+ "bert_score":0.9444877982,
204
  "speakers":7000000.0
205
  },
206
  {
207
  "target_language_name":"Czech",
208
  "bleu":69.7112290599,
209
+ "bert_score":0.9384464244,
210
  "speakers":10700000.0
211
  },
212
  {
213
  "target_language_name":"Danish",
214
  "bleu":78.0935433284,
215
+ "bert_score":0.9506490747,
216
  "speakers":6000000.0
217
  },
218
  {
219
  "target_language_name":"Dari",
220
  "bleu":52.5539795795,
221
+ "bert_score":0.9466466506,
222
  "speakers":9600000.0
223
  },
224
  {
225
  "target_language_name":"Dholuo",
226
  "bleu":46.4119479071,
227
+ "bert_score":0.8803233822,
228
  "speakers":3000000.0
229
  },
230
  {
231
  "target_language_name":"Dogri",
232
  "bleu":44.9153535278,
233
+ "bert_score":0.934070154,
234
  "speakers":2000000.0
235
  },
236
  {
237
  "target_language_name":"Dutch",
238
  "bleu":71.1849326315,
239
+ "bert_score":0.9376831949,
240
  "speakers":23100000.0
241
  },
242
  {
243
  "target_language_name":"Dzongkha",
244
  "bleu":44.3573814017,
245
+ "bert_score":0.9664796074,
246
  "speakers":237080.0
247
  },
248
+ {
249
+ "target_language_name":"Eastern Punjabi",
250
+ "bleu":60.468441109,
251
+ "bert_score":0.988244007,
252
+ "speakers":125000000.0
253
+ },
254
+ {
255
+ "target_language_name":"Eastern Yiddish",
256
+ "bleu":47.5562009325,
257
+ "bert_score":0.9590989411,
258
+ "speakers":0.0
259
+ },
260
+ {
261
+ "target_language_name":"Egyptian Arabic",
262
+ "bleu":53.6818081038,
263
+ "bert_score":0.9394114673,
264
+ "speakers":100542400.0
265
+ },
266
+ {
267
+ "target_language_name":"English",
268
+ "bleu":75.3501173486,
269
+ "bert_score":0.8807334363,
270
+ "speakers":1132366680.0
271
+ },
272
  {
273
  "target_language_name":"Esperanto",
274
  "bleu":69.6056577554,
275
+ "bert_score":0.9302131255,
276
  "speakers":2000000.0
277
  },
278
  {
279
  "target_language_name":"Faroese",
280
  "bleu":65.9147902483,
281
+ "bert_score":0.9332413753,
282
  "speakers":69150.0
283
  },
284
  {
285
  "target_language_name":"Fijian",
286
  "bleu":58.2892667246,
287
+ "bert_score":0.9183188617,
288
  "speakers":341270.0
289
  },
290
  {
291
  "target_language_name":"Filipino",
292
  "bleu":70.1928498378,
293
+ "bert_score":0.9269425154,
294
  "speakers":90000000.0
295
  },
296
  {
297
  "target_language_name":"Finnish",
298
  "bleu":70.9425029518,
299
+ "bert_score":0.9320579688,
300
  "speakers":5413380.0
301
  },
302
  {
303
  "target_language_name":"Fon",
304
  "bleu":25.2797773666,
305
+ "bert_score":0.8664443592,
306
  "speakers":1935500.0
307
  },
308
  {
309
  "target_language_name":"French",
310
  "bleu":79.3023871219,
311
+ "bert_score":0.9554367423,
312
  "speakers":208157220.0
313
  },
314
  {
315
  "target_language_name":"Friulian",
316
  "bleu":66.5488092372,
317
+ "bert_score":0.9255799611,
318
  "speakers":300000.0
319
  },
320
  {
321
  "target_language_name":"Galician",
322
  "bleu":68.7024786904,
323
+ "bert_score":0.9283550183,
324
  "speakers":2500000.0
325
  },
326
  {
327
  "target_language_name":"Ganda",
328
  "bleu":45.8693322936,
329
+ "bert_score":0.88344028,
330
  "speakers":4100000.0
331
  },
332
  {
333
  "target_language_name":"Georgian",
334
  "bleu":61.0166361442,
335
+ "bert_score":0.9546662311,
336
  "speakers":3700000.0
337
  },
338
  {
339
  "target_language_name":"Gikuyu",
340
  "bleu":40.9288275291,
341
+ "bert_score":0.8850945433,
342
  "speakers":6623000.0
343
  },
344
+ {
345
+ "target_language_name":"Goan Konkani",
346
+ "bleu":47.1084017945,
347
+ "bert_score":0.9314287245,
348
+ "speakers":3633900.0
349
+ },
350
  {
351
  "target_language_name":"Greek",
352
  "bleu":66.2347782153,
353
+ "bert_score":0.9577525119,
354
  "speakers":15000000.0
355
  },
356
  {
357
  "target_language_name":"Gujarati",
358
  "bleu":55.5884513452,
359
+ "bert_score":0.9753397226,
360
  "speakers":56400000.0
361
  },
362
  {
363
  "target_language_name":"Haitian Creole",
364
  "bleu":63.8532187591,
365
+ "bert_score":0.93236112,
366
  "speakers":9600000.0
367
  },
368
+ {
369
+ "target_language_name":"Halh Mongolian",
370
+ "bleu":58.5037789971,
371
+ "bert_score":0.9380823056,
372
+ "speakers":2704030.0
373
+ },
374
  {
375
  "target_language_name":"Hausa",
376
  "bleu":56.3431957901,
377
+ "bert_score":0.9012877802,
378
  "speakers":43900000.0
379
  },
380
  {
381
  "target_language_name":"Hebrew",
382
  "bleu":72.0702990513,
383
+ "bert_score":0.964064618,
384
  "speakers":9303950.0
385
  },
386
  {
387
  "target_language_name":"Hindi",
388
  "bleu":64.9362166898,
389
+ "bert_score":0.9463364283,
390
  "speakers":341000000.0
391
  },
392
  {
393
  "target_language_name":"Hungarian",
394
  "bleu":66.1301119408,
395
+ "bert_score":0.9249218643,
396
  "speakers":12600000.0
397
  },
398
  {
399
  "target_language_name":"Icelandic",
400
  "bleu":54.4330055353,
401
+ "bert_score":0.9120460276,
402
  "speakers":358000.0
403
  },
404
  {
405
  "target_language_name":"Igbo",
406
  "bleu":46.4017344934,
407
+ "bert_score":0.9137314638,
408
  "speakers":27000000.0
409
  },
410
  {
411
  "target_language_name":"Ilocano",
412
  "bleu":62.6058864594,
413
+ "bert_score":0.9115280092,
414
  "speakers":9100000.0
415
  },
416
  {
417
  "target_language_name":"Indonesian",
418
  "bleu":72.9087066262,
419
+ "bert_score":0.9301403503,
420
  "speakers":198996550.0
421
  },
422
+ {
423
+ "target_language_name":"Iranian Persian",
424
+ "bleu":57.6444169698,
425
+ "bert_score":0.9476486345,
426
+ "speakers":52800000.0
427
+ },
428
  {
429
  "target_language_name":"Irish",
430
  "bleu":69.9725194524,
431
+ "bert_score":0.9440232972,
432
  "speakers":1030000.0
433
  },
434
  {
435
  "target_language_name":"Italian",
436
  "bleu":69.1588343572,
437
+ "bert_score":0.9358606537,
438
  "speakers":64819790.0
439
  },
440
  {
441
  "target_language_name":"Japanese",
442
  "bleu":49.9166135693,
443
+ "bert_score":0.9425287286,
444
  "speakers":128000000.0
445
  },
446
  {
447
  "target_language_name":"Javanese",
448
  "bleu":60.440335299,
449
+ "bert_score":0.9125308077,
450
  "speakers":84308740.0
451
  },
452
  {
453
  "target_language_name":"Jingpho",
454
  "bleu":43.5500581403,
455
+ "bert_score":0.8727998992,
456
  "speakers":940000.0
457
  },
458
  {
459
  "target_language_name":"Jula",
460
  "bleu":29.5415180297,
461
+ "bert_score":0.822332112,
462
  "speakers":2700000.0
463
  },
464
  {
465
  "target_language_name":"Kabiy\u00e8",
466
  "bleu":22.5498504655,
467
+ "bert_score":0.8587520639,
468
  "speakers":1000000.0
469
  },
470
  {
471
  "target_language_name":"Kabuverdianu",
472
  "bleu":65.1106010391,
473
+ "bert_score":0.9213403026,
474
  "speakers":871000.0
475
  },
476
  {
477
  "target_language_name":"Kabyle",
478
  "bleu":41.1442992587,
479
+ "bert_score":0.8803219795,
480
  "speakers":5586000.0
481
  },
482
  {
483
  "target_language_name":"Kamba",
484
  "bleu":41.733489671,
485
+ "bert_score":0.8780206362,
486
  "speakers":3893000.0
487
  },
488
  {
489
  "target_language_name":"Kannada",
490
  "bleu":60.0142028332,
491
+ "bert_score":0.9730932295,
492
  "speakers":43600000.0
493
  },
494
  {
495
  "target_language_name":"Kashmiri",
496
  "bleu":22.3019416547,
497
+ "bert_score":0.8984790143,
498
  "speakers":6900000.0
499
  },
500
  {
501
  "target_language_name":"Kazakh",
502
  "bleu":61.1251621375,
503
+ "bert_score":0.9379647116,
504
  "speakers":13161980.0
505
  },
506
  {
507
  "target_language_name":"Khmer",
508
  "bleu":49.2098257043,
509
+ "bert_score":0.8907732884,
510
  "speakers":16600000.0
511
  },
512
  {
513
  "target_language_name":"Kimbundu",
514
  "bleu":5.8523457224,
515
+ "bert_score":0.6849321783,
516
  "speakers":0.0
517
  },
518
  {
519
  "target_language_name":"Kinyarwanda",
520
  "bleu":57.2410626756,
521
+ "bert_score":0.906923449,
522
  "speakers":12100000.0
523
  },
524
  {
525
  "target_language_name":"Kituba (Democratic Republic of the Congo)",
526
  "bleu":52.8484601602,
527
+ "bert_score":0.9017938395,
528
  "speakers":0.0
529
  },
530
  {
531
  "target_language_name":"Korean",
532
  "bleu":43.6872285974,
533
+ "bert_score":0.9579092761,
534
  "speakers":77300000.0
535
  },
536
  {
537
  "target_language_name":"Kyrgyz",
538
  "bleu":57.0824422453,
539
+ "bert_score":0.9317750076,
540
  "speakers":4568480.0
541
  },
542
  {
543
  "target_language_name":"Lao",
544
  "bleu":60.0210909677,
545
+ "bert_score":0.904438438,
546
  "speakers":5225552.0
547
  },
548
  {
549
  "target_language_name":"Latgalian",
550
  "bleu":56.4843556524,
551
+ "bert_score":0.9078494012,
552
  "speakers":200000.0
553
  },
554
+ {
555
+ "target_language_name":"Levantine Arabic",
556
+ "bleu":56.0898634013,
557
+ "bert_score":0.9437467565,
558
+ "speakers":44000000.0
559
+ },
560
  {
561
  "target_language_name":"Ligurian",
562
  "bleu":55.8530636302,
563
+ "bert_score":0.9047620773,
564
  "speakers":500000.0
565
  },
566
  {
567
  "target_language_name":"Limburgish",
568
  "bleu":59.4485504982,
569
+ "bert_score":0.8987095455,
570
  "speakers":1600000.0
571
  },
572
  {
573
  "target_language_name":"Lingala",
574
  "bleu":30.4322896531,
575
+ "bert_score":0.8553236572,
576
  "speakers":20000000.0
577
  },
578
  {
579
  "target_language_name":"Lithuanian",
580
  "bleu":67.1625695571,
581
+ "bert_score":0.9154702902,
582
  "speakers":4000000.0
583
  },
584
  {
585
  "target_language_name":"Lombard",
586
  "bleu":46.3884402674,
587
+ "bert_score":0.8643471499,
588
  "speakers":3900000.0
589
  },
590
  {
591
  "target_language_name":"Luba-Kasai",
592
  "bleu":45.0655291655,
593
+ "bert_score":0.8749240279,
594
  "speakers":6300000.0
595
  },
596
  {
597
  "target_language_name":"Luxembourgish",
598
  "bleu":70.8338190438,
599
+ "bert_score":0.9297492107,
600
  "speakers":391200.0
601
  },
602
  {
603
  "target_language_name":"Macedonian",
604
  "bleu":72.2733471437,
605
+ "bert_score":0.9558346649,
606
  "speakers":2000000.0
607
  },
608
  {
609
  "target_language_name":"Magahi",
610
  "bleu":58.5474221546,
611
+ "bert_score":0.9458349566,
612
  "speakers":20700000.0
613
  },
614
  {
615
  "target_language_name":"Maithili",
616
  "bleu":54.6530071391,
617
+ "bert_score":0.9433513383,
618
  "speakers":33900000.0
619
  },
620
  {
621
  "target_language_name":"Malayalam",
622
  "bleu":64.0655894091,
623
+ "bert_score":0.9803075671,
624
  "speakers":37100000.0
625
  },
626
  {
627
  "target_language_name":"Maltese",
628
  "bleu":80.0866777263,
629
+ "bert_score":0.9520254652,
630
  "speakers":570000.0
631
  },
632
+ {
633
+ "target_language_name":"Mandarin Chinese",
634
+ "bleu":42.5300166785,
635
+ "bert_score":0.9634857118,
636
+ "speakers":1074000000.0
637
+ },
638
  {
639
  "target_language_name":"Maori",
640
  "bleu":54.8319935643,
641
+ "bert_score":0.9185245017,
642
  "speakers":160000.0
643
  },
644
  {
645
  "target_language_name":"Marathi",
646
  "bleu":57.4434090711,
647
+ "bert_score":0.9421781262,
648
  "speakers":83100000.0
649
  },
650
+ {
651
+ "target_language_name":"Meadow Mari",
652
+ "bleu":49.7911680582,
653
+ "bert_score":0.9295116961,
654
+ "speakers":482000.0
655
+ },
656
  {
657
  "target_language_name":"Meitei",
658
  "bleu":41.2619945571,
659
+ "bert_score":0.9528288851,
660
  "speakers":1470000.0
661
  },
662
+ {
663
+ "target_language_name":"Merina Malagasy",
664
+ "bleu":61.0968434546,
665
+ "bert_score":0.9032936792,
666
+ "speakers":0.0
667
+ },
668
+ {
669
+ "target_language_name":"Mesopotamian Arabic",
670
+ "bleu":49.5184865297,
671
+ "bert_score":0.9382626355,
672
+ "speakers":15700000.0
673
+ },
674
  {
675
  "target_language_name":"Minangkabau",
676
  "bleu":50.7407956197,
677
+ "bert_score":0.9252789746,
678
  "speakers":5530000.0
679
  },
680
  {
681
  "target_language_name":"Mizo",
682
  "bleu":51.6558017488,
683
+ "bert_score":0.8875152906,
684
  "speakers":500000.0
685
  },
686
  {
687
  "target_language_name":"Moore",
688
  "bleu":32.8458097983,
689
+ "bert_score":0.8583020627,
690
  "speakers":7600000.0
691
  },
692
+ {
693
+ "target_language_name":"Moroccan Arabic",
694
+ "bleu":49.3082976781,
695
+ "bert_score":0.9317501009,
696
+ "speakers":27500000.0
697
+ },
698
+ {
699
+ "target_language_name":"Najdi Arabic",
700
+ "bleu":46.4102430377,
701
+ "bert_score":0.9332984229,
702
+ "speakers":0.0
703
+ },
704
  {
705
  "target_language_name":"Nepali",
706
  "bleu":55.2919347352,
707
+ "bert_score":0.9358912428,
708
  "speakers":0.0
709
  },
710
+ {
711
+ "target_language_name":"Nigerian Fulfulde",
712
+ "bleu":28.1761055913,
713
+ "bert_score":0.8343587597,
714
+ "speakers":14500000.0
715
+ },
716
+ {
717
+ "target_language_name":"North Azerbaijani",
718
+ "bleu":55.5265107063,
719
+ "bert_score":0.9145456314,
720
+ "speakers":9220610.0
721
+ },
722
+ {
723
+ "target_language_name":"Northern Kurdish",
724
+ "bleu":55.7965878227,
725
+ "bert_score":0.9104436457,
726
+ "speakers":14600000.0
727
+ },
728
+ {
729
+ "target_language_name":"Northern Sotho",
730
+ "bleu":62.8769401692,
731
+ "bert_score":0.9261207898,
732
+ "speakers":4100000.0
733
+ },
734
+ {
735
+ "target_language_name":"Northern Uzbek",
736
+ "bleu":63.205573851,
737
+ "bert_score":0.9120756924,
738
+ "speakers":26912410.0
739
+ },
740
  {
741
  "target_language_name":"Nuer",
742
  "bleu":16.5796987951,
743
+ "bert_score":0.8528214693,
744
  "speakers":900000.0
745
  },
746
  {
747
  "target_language_name":"N\u2019Ko",
748
  "bleu":32.483490799,
749
+ "bert_score":0.9823745767,
750
  "speakers":0.0
751
  },
752
  {
753
  "target_language_name":"Occitan",
754
  "bleu":71.532740184,
755
+ "bert_score":0.9337525626,
756
  "speakers":542000.0
757
  },
758
  {
759
  "target_language_name":"Odia",
760
  "bleu":57.3628096518,
761
+ "bert_score":0.9768644154,
762
  "speakers":34500000.0
763
  },
764
  {
765
  "target_language_name":"Pangasinan",
766
  "bleu":56.0048183827,
767
+ "bert_score":0.8906280657,
768
  "speakers":1100000.0
769
  },
770
  {
771
  "target_language_name":"Papiamentu",
772
  "bleu":69.7955328133,
773
+ "bert_score":0.9325902323,
774
  "speakers":321300.0
775
  },
776
+ {
777
+ "target_language_name":"Paraguayan Guaran\u00ed",
778
+ "bleu":41.7929863707,
779
+ "bert_score":0.8764786462,
780
+ "speakers":0.0
781
+ },
782
  {
783
  "target_language_name":"Polish",
784
  "bleu":61.8768399674,
785
+ "bert_score":0.9179250948,
786
  "speakers":40200000.0
787
  },
788
  {
789
  "target_language_name":"Portuguese",
790
  "bleu":77.4978074222,
791
+ "bert_score":0.9494876027,
792
  "speakers":254300000.0
793
  },
794
  {
795
  "target_language_name":"Romanian",
796
  "bleu":76.4907159035,
797
+ "bert_score":0.9455295324,
798
  "speakers":24300000.0
799
  },
800
  {
801
  "target_language_name":"Rundi",
802
  "bleu":48.943513629,
803
+ "bert_score":0.8933652222,
804
  "speakers":10800000.0
805
  },
806
  {
807
  "target_language_name":"Russian",
808
  "bleu":71.1489441039,
809
+ "bert_score":0.9518508852,
810
  "speakers":171428900.0
811
  },
812
  {
813
  "target_language_name":"Samoan",
814
  "bleu":56.7138831423,
815
+ "bert_score":0.9166683555,
816
  "speakers":415720.0
817
  },
818
  {
819
  "target_language_name":"Sango",
820
  "bleu":34.8754222657,
821
+ "bert_score":0.8720244229,
822
  "speakers":4600000.0
823
  },
824
  {
825
  "target_language_name":"Sanskrit",
826
  "bleu":32.7813249911,
827
+ "bert_score":0.8987655501,
828
  "speakers":49736.0
829
  },
830
  {
831
  "target_language_name":"Santhali",
832
  "bleu":31.5119247269,
833
+ "bert_score":0.944095705,
834
  "speakers":7200000.0
835
  },
836
  {
837
  "target_language_name":"Sardinian",
838
  "bleu":62.6903914771,
839
+ "bert_score":0.9118991812,
840
  "speakers":1300000.0
841
  },
842
  {
843
  "target_language_name":"Scottish Gaelic",
844
  "bleu":62.6044371338,
845
+ "bert_score":0.9264988482,
846
  "speakers":60130.0
847
  },
848
  {
849
  "target_language_name":"Serbian",
850
  "bleu":69.9691396176,
851
+ "bert_score":0.9582955678,
852
  "speakers":9000000.0
853
  },
854
  {
855
  "target_language_name":"Setswana",
856
  "bleu":55.2288890228,
857
+ "bert_score":0.9117900888,
858
  "speakers":4500000.0
859
  },
860
  {
861
  "target_language_name":"Shan",
862
  "bleu":29.2129948577,
863
+ "bert_score":0.9378574808,
864
  "speakers":3000000.0
865
  },
866
  {
867
  "target_language_name":"Shona",
868
  "bleu":51.5592191405,
869
+ "bert_score":0.8798740129,
870
  "speakers":9023000.0
871
  },
872
  {
873
  "target_language_name":"Sicilian",
874
  "bleu":58.5895359443,
875
+ "bert_score":0.90428345,
876
  "speakers":4700000.0
877
  },
878
  {
879
  "target_language_name":"Silesian",
880
  "bleu":56.7836392069,
881
+ "bert_score":0.9106028736,
882
  "speakers":522000.0
883
  },
884
  {
885
  "target_language_name":"Sindhi",
886
  "bleu":48.1876056648,
887
+ "bert_score":0.936702015,
888
  "speakers":25000000.0
889
  },
890
  {
891
  "target_language_name":"Sinhala",
892
  "bleu":56.7567311796,
893
+ "bert_score":0.9713358581,
894
  "speakers":15300000.0
895
  },
896
  {
897
  "target_language_name":"Slovak",
898
  "bleu":67.9284804086,
899
+ "bert_score":0.9360236605,
900
  "speakers":6000000.0
901
  },
902
  {
903
  "target_language_name":"Slovene",
904
  "bleu":72.5691270757,
905
+ "bert_score":0.9432346245,
906
  "speakers":2400000.0
907
  },
908
  {
909
  "target_language_name":"Somali",
910
  "bleu":55.3706496473,
911
+ "bert_score":0.908571593,
912
  "speakers":16200000.0
913
  },
914
+ {
915
+ "target_language_name":"South Azerbaijani",
916
+ "bleu":44.3712804302,
917
+ "bert_score":0.9420697371,
918
+ "speakers":15000000.0
919
+ },
920
+ {
921
+ "target_language_name":"Southern Pashto",
922
+ "bleu":38.3124819374,
923
+ "bert_score":0.921268179,
924
+ "speakers":10900000.0
925
+ },
926
+ {
927
+ "target_language_name":"Southern Sotho",
928
+ "bleu":56.735299554,
929
+ "bert_score":0.9102749407,
930
+ "speakers":6000000.0
931
+ },
932
+ {
933
+ "target_language_name":"Southwestern Dinka",
934
+ "bleu":17.5913281403,
935
+ "bert_score":0.8016291638,
936
+ "speakers":0.0
937
+ },
938
  {
939
  "target_language_name":"Spanish",
940
  "bleu":63.8467073379,
941
+ "bert_score":0.9224406302,
942
  "speakers":485000000.0
943
  },
944
+ {
945
+ "target_language_name":"Standard Arabic",
946
+ "bleu":56.8831262708,
947
+ "bert_score":0.9168330083,
948
+ "speakers":0.0
949
+ },
950
+ {
951
+ "target_language_name":"Standard Estonian",
952
+ "bleu":67.4156919517,
953
+ "bert_score":0.9277306815,
954
+ "speakers":1164770.0
955
+ },
956
+ {
957
+ "target_language_name":"Standard German",
958
+ "bleu":77.1966515107,
959
+ "bert_score":0.9468763133,
960
+ "speakers":105000000.0
961
+ },
962
+ {
963
+ "target_language_name":"Standard Latvian",
964
+ "bleu":65.0833210037,
965
+ "bert_score":0.9217625757,
966
+ "speakers":0.0
967
+ },
968
+ {
969
+ "target_language_name":"Standard Malay",
970
+ "bleu":74.2657232798,
971
+ "bert_score":0.9445500493,
972
+ "speakers":0.0
973
+ },
974
+ {
975
+ "target_language_name":"Standard Moroccan Tamazight",
976
+ "bleu":35.6247648109,
977
+ "bert_score":0.9847298423,
978
+ "speakers":0.0
979
+ },
980
  {
981
  "target_language_name":"Sunda",
982
  "bleu":56.4065999104,
983
+ "bert_score":0.9077177823,
984
  "speakers":32400000.0
985
  },
986
  {
987
  "target_language_name":"Swahili",
988
  "bleu":73.5199042142,
989
+ "bert_score":0.9450787365,
990
  "speakers":82300000.0
991
  },
992
  {
993
  "target_language_name":"Swati",
994
  "bleu":52.7746096439,
995
+ "bert_score":0.8899940272,
996
  "speakers":2034200.0
997
  },
998
  {
999
  "target_language_name":"Swedish",
1000
  "bleu":77.421610247,
1001
+ "bert_score":0.9571870168,
1002
  "speakers":9244250.0
1003
  },
1004
  {
1005
  "target_language_name":"Tajik",
1006
  "bleu":60.9783684158,
1007
+ "bert_score":0.9378365338,
1008
  "speakers":14000000.0
1009
  },
1010
  {
1011
  "target_language_name":"Tamasheq",
1012
  "bleu":18.4319889721,
1013
+ "bert_score":0.8427422295,
1014
  "speakers":500000.0
1015
  },
1016
  {
1017
  "target_language_name":"Tamil",
1018
  "bleu":65.7863221054,
1019
+ "bert_score":0.9536473691,
1020
  "speakers":75000000.0
1021
  },
1022
  {
1023
  "target_language_name":"Tatar",
1024
  "bleu":60.3447467213,
1025
+ "bert_score":0.9364115715,
1026
  "speakers":5427318.0
1027
  },
1028
+ {
1029
+ "target_language_name":"Ta\u2019izzi-Adeni Arabic",
1030
+ "bleu":49.4139335281,
1031
+ "bert_score":0.9354432185,
1032
+ "speakers":10500000.0
1033
+ },
1034
  {
1035
  "target_language_name":"Telugu",
1036
  "bleu":61.6352457629,
1037
+ "bert_score":0.9790697515,
1038
  "speakers":82000000.0
1039
  },
1040
  {
1041
  "target_language_name":"Thai",
1042
  "bleu":62.8125360944,
1043
+ "bert_score":0.9225328485,
1044
  "speakers":40000000.0
1045
  },
1046
  {
1047
  "target_language_name":"Tigrigna",
1048
  "bleu":32.8711961703,
1049
+ "bert_score":0.9852415164,
1050
  "speakers":7507780.0
1051
  },
1052
  {
1053
  "target_language_name":"Tok Pisin",
1054
  "bleu":56.5407760367,
1055
+ "bert_score":0.9031182428,
1056
  "speakers":4000000.0
1057
  },
1058
+ {
1059
+ "target_language_name":"Tosk Albanian",
1060
+ "bleu":69.4218765092,
1061
+ "bert_score":0.9402680953,
1062
+ "speakers":3000000.0
1063
+ },
1064
  {
1065
  "target_language_name":"Tsonga",
1066
  "bleu":58.3516573597,
1067
+ "bert_score":0.9134832978,
1068
  "speakers":13000000.0
1069
  },
1070
  {
1071
  "target_language_name":"Tumbuka",
1072
  "bleu":44.0490017392,
1073
+ "bert_score":0.8865564326,
1074
  "speakers":2680000.0
1075
  },
1076
+ {
1077
+ "target_language_name":"Tunisian Arabic",
1078
+ "bleu":49.6714090744,
1079
+ "bert_score":0.9337966998,
1080
+ "speakers":11600000.0
1081
+ },
1082
  {
1083
  "target_language_name":"Turkish",
1084
  "bleu":67.1600625676,
1085
+ "bert_score":0.9309494158,
1086
  "speakers":82231620.0
1087
  },
1088
  {
1089
  "target_language_name":"Turkmen",
1090
  "bleu":60.5593705936,
1091
+ "bert_score":0.9125106474,
1092
  "speakers":16000000.0
1093
  },
1094
  {
1095
  "target_language_name":"Twi",
1096
  "bleu":44.7976562068,
1097
+ "bert_score":0.8913615406,
1098
  "speakers":3000000.0
1099
  },
1100
  {
1101
  "target_language_name":"Ukrainian",
1102
  "bleu":68.0976232544,
1103
+ "bert_score":0.9468558848,
1104
  "speakers":34710100.0
1105
  },
1106
  {
1107
  "target_language_name":"Umbundu",
1108
  "bleu":21.0802775597,
1109
+ "bert_score":0.8461364289,
1110
  "speakers":6000000.0
1111
  },
1112
  {
1113
  "target_language_name":"Urdu",
1114
  "bleu":61.1255457272,
1115
+ "bert_score":0.953888009,
1116
  "speakers":94022900.0
1117
  },
1118
  {
1119
  "target_language_name":"Uyghur",
1120
  "bleu":53.5346877103,
1121
+ "bert_score":0.9397906005,
1122
  "speakers":10400000.0
1123
  },
1124
  {
1125
  "target_language_name":"Venetian",
1126
  "bleu":60.6140876271,
1127
+ "bert_score":0.9080212533,
1128
  "speakers":2000000.0
1129
  },
1130
  {
1131
  "target_language_name":"Vietnamese",
1132
  "bleu":70.3560749464,
1133
+ "bert_score":0.9527418713,
1134
  "speakers":76000000.0
1135
  },
1136
  {
1137
  "target_language_name":"Waray-Waray",
1138
  "bleu":66.3850231243,
1139
+ "bert_score":0.920412008,
1140
  "speakers":3100000.0
1141
  },
1142
  {
1143
  "target_language_name":"Welsh",
1144
  "bleu":83.3437724474,
1145
+ "bert_score":0.9662299534,
1146
  "speakers":977366.0
1147
  },
1148
+ {
1149
+ "target_language_name":"West Central Oromo",
1150
+ "bleu":46.9090350028,
1151
+ "bert_score":0.8845542371,
1152
+ "speakers":0.0
1153
+ },
1154
  {
1155
  "target_language_name":"Wolof",
1156
  "bleu":42.6430127569,
1157
+ "bert_score":0.8762976408,
1158
  "speakers":3700000.0
1159
  },
1160
  {
1161
  "target_language_name":"Xhosa",
1162
  "bleu":55.4688091009,
1163
+ "bert_score":0.9008744816,
1164
  "speakers":11000000.0
1165
  },
1166
+ {
1167
+ "target_language_name":"Yerwa Kanuri",
1168
+ "bleu":18.5081787556,
1169
+ "bert_score":0.839997381,
1170
+ "speakers":0.0
1171
+ },
1172
  {
1173
  "target_language_name":"Yoruba",
1174
  "bleu":34.2642542268,
1175
+ "bert_score":0.9001545012,
1176
  "speakers":40000000.0
1177
  },
1178
+ {
1179
+ "target_language_name":"Yue Chinese",
1180
+ "bleu":34.5614651228,
1181
+ "bert_score":0.9634495397,
1182
+ "speakers":73100000.0
1183
+ },
1184
  {
1185
  "target_language_name":"Zulu",
1186
  "bleu":59.1762078389,
1187
+ "bert_score":0.9099391103,
1188
  "speakers":15700000.0
1189
  },
1190
  {
1191
  "target_language_name":"nno",
1192
  "bleu":71.8615646296,
1193
+ "bert_score":0.9335320314,
1194
  "speakers":0.0
1195
  },
1196
  {
1197
  "target_language_name":"\u00c9w\u00e9",
1198
  "bleu":41.6614038791,
1199
+ "bert_score":0.8829316159,
1200
  "speakers":3000000.0
1201
  }
1202
  ]
uv.lock CHANGED
The diff for this file is too large to render. See raw diff