David Pomerenke commited on
Commit
0e5691e
·
1 Parent(s): 50128d8

Improve language and script names and speaker data

Browse files
Files changed (9) hide show
  1. .gitignore +2 -0
  2. README.md +1 -1
  3. data.txt +4 -0
  4. index.html +8 -1
  5. languagebench.py +63 -25
  6. languages.rq +1 -2
  7. languages.tsv +0 -0
  8. results.json +200 -120
  9. results_summary.json +34 -0
.gitignore CHANGED
@@ -1,4 +1,6 @@
1
  floresp-*
 
 
2
  .cache
3
  .env
4
 
 
1
  floresp-*
2
+ LanguageCodes.tab
3
+ ScriptCodes.csv
4
  .cache
5
  .env
6
 
README.md CHANGED
@@ -6,4 +6,4 @@ Sources:
6
 
7
  1. For AI models: [OpenRouter](https://openrouter.ai/)
8
  2. For language benchmarks: [FLORES+](https://github.com/openlanguagedata/flores)
9
- 3. For language statistics: [Wikidata](https://gist.github.com/unhammer/3e8f2e0f79972bf5008a4c970081502d) (Potential alternative: [Ethnologue](https://www.ethnologue.com/browse/names/))
 
6
 
7
  1. For AI models: [OpenRouter](https://openrouter.ai/)
8
  2. For language benchmarks: [FLORES+](https://github.com/openlanguagedata/flores)
9
+ 3. For language statistics: [Wikidata](https://gist.github.com/unhammer/3e8f2e0f79972bf5008a4c970081502d) (And [Ethnologue](https://www.ethnologue.com/browse/names/) for additional language names)
data.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ floresp-v2.0-rc.3: https://github.com/openlanguagedata/flores
2
+ languages.csv: generated from https://query.wikidata.org/ using the languages.rq query
3
+ LanguageCodes.tab: https://www.ethnologue.com/
4
+ ScriptCodes.csv: https://www.unicode.org/iso15924/iso15924-codes.html
index.html CHANGED
@@ -33,6 +33,8 @@
33
  import * as Plot from "https://cdn.jsdelivr.net/npm/@observablehq/plot@0.6/+esm";
34
 
35
  async function init() {
 
 
36
  const response = await fetch('results.json');
37
  const results = await response.json();
38
 
@@ -70,6 +72,11 @@
70
 
71
  const languageData = results.filter(r => r.target_language_name === language);
72
 
 
 
 
 
 
73
  // Create plot using the more idiomatic Observable Plot approach
74
  const plot = Plot.plot({
75
  width: 400,
@@ -81,7 +88,7 @@
81
  },
82
  marks: [
83
  Plot.barY(languageData, {
84
- x: d => d.model.split('/')[0],
85
  y: "bleu"
86
  })
87
  ]
 
33
  import * as Plot from "https://cdn.jsdelivr.net/npm/@observablehq/plot@0.6/+esm";
34
 
35
  async function init() {
36
+ const summary = await fetch('results_summary.json');
37
+
38
  const response = await fetch('results.json');
39
  const results = await response.json();
40
 
 
72
 
73
  const languageData = results.filter(r => r.target_language_name === language);
74
 
75
+ const descriptor = code => {
76
+ let [org, model] = code.split("/")
77
+ return model.split("-")[0]
78
+ }
79
+
80
  // Create plot using the more idiomatic Observable Plot approach
81
  const plot = Plot.plot({
82
  width: 400,
 
88
  },
89
  marks: [
90
  Plot.barY(languageData, {
91
+ x: d => descriptor(d.model),
92
  y: "bleu"
93
  })
94
  ]
languagebench.py CHANGED
@@ -1,6 +1,7 @@
1
  import asyncio
2
  import json
3
  import os
 
4
  from os import getenv
5
 
6
  import evaluate
@@ -9,6 +10,7 @@ from dotenv import load_dotenv
9
  from joblib.memory import Memory
10
  from openai import AsyncOpenAI
11
  from tqdm.asyncio import tqdm_asyncio
 
12
 
13
  # config
14
  models = [
@@ -21,15 +23,17 @@ models = [
21
  # models = ["gpt-4o-mini"]
22
  original_language = "eng_Latn"
23
  dataset = "floresp-v2.0-rc.3/dev"
24
- target_languages = sorted([f.split(".")[1] for f in os.listdir(dataset)][:10])
25
- target_languages = [
26
- "eng_Latn",
27
- "deu_Latn",
28
- "fra_Latn",
29
- "spa_Latn",
30
- "cmn_Hans",
31
- "cmn_Hant",
32
- ]
 
 
33
 
34
  # setup
35
  load_dotenv()
@@ -40,17 +44,33 @@ client = AsyncOpenAI(
40
  )
41
  cache = Memory(location=".cache", verbose=0).cache
42
  bleu = evaluate.load("sacrebleu")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  language_stats = pd.read_csv("languages.tsv", sep="\t")
 
44
 
45
 
46
  @cache
47
- async def translate(model, target_language, sentence):
48
- reply = await client.chat.completions.create(
49
  model=model,
50
  messages=[
51
  {
52
  "role": "user",
53
- "content": f"Translate the following text from {original_language} to {target_language}:\n\n{sentence}",
54
  }
55
  ],
56
  temperature=0,
@@ -61,9 +81,15 @@ async def translate(model, target_language, sentence):
61
  def get_language_stats(language_code):
62
  lang, script = language_code.split("_")
63
  stats = language_stats[language_stats["iso639_3"] == lang]
64
- if stats.empty:
65
- return dict()
66
- return stats.iloc[0].to_dict()
 
 
 
 
 
 
67
 
68
 
69
  async def main():
@@ -71,31 +97,43 @@ async def main():
71
  results = []
72
  original_sentences = open(f"{dataset}/dev.{original_language}").readlines()
73
  for target_language in target_languages:
 
 
74
  target_sentences = open(f"{dataset}/dev.{target_language}").readlines()
75
  for model in models:
76
- print(f"{model} -> {target_language}")
77
- predictions = await tqdm_asyncio.gather(
78
- *[
79
- translate(model, target_language, sentence)
80
- for sentence in original_sentences[:n]
81
- ],
82
- )
 
 
 
 
83
  metrics = bleu.compute(
84
- predictions=predictions, references=target_sentences[:n]
 
 
85
  )
86
- stats = get_language_stats(target_language)
87
  results.append(
88
  {
89
  "model": model,
90
  "original_language": original_language,
91
  "target_language": target_language,
92
- "target_language_name": stats.get("itemLabel_en", target_language),
93
  "speakers": stats.get("maxSpeakers"),
94
  "bleu": metrics["score"],
95
  }
96
  )
97
  with open("results.json", "w") as f:
98
  json.dump(results, f, indent=2, ensure_ascii=False)
 
 
 
 
99
 
100
 
101
  if __name__ == "__main__":
 
1
  import asyncio
2
  import json
3
  import os
4
+ import random
5
  from os import getenv
6
 
7
  import evaluate
 
10
  from joblib.memory import Memory
11
  from openai import AsyncOpenAI
12
  from tqdm.asyncio import tqdm_asyncio
13
+ from tqdm.auto import tqdm
14
 
15
  # config
16
  models = [
 
23
  # models = ["gpt-4o-mini"]
24
  original_language = "eng_Latn"
25
  dataset = "floresp-v2.0-rc.3/dev"
26
+ random.seed(42)
27
+ target_languages = [f.split(".")[1] for f in os.listdir(dataset)]
28
+ target_languages = random.choices(target_languages, k=10)
29
+ # target_languages = [
30
+ # "eng_Latn",
31
+ # "deu_Latn",
32
+ # "fra_Latn",
33
+ # "spa_Latn",
34
+ # "cmn_Hans",
35
+ # "cmn_Hant",
36
+ # ]
37
 
38
  # setup
39
  load_dotenv()
 
44
  )
45
  cache = Memory(location=".cache", verbose=0).cache
46
  bleu = evaluate.load("sacrebleu")
47
+
48
+
49
+ @cache
50
+ async def complete(**kwargs):
51
+ return await client.chat.completions.create(**kwargs)
52
+
53
+
54
+ def reorder(language_name):
55
+ if "," in language_name and "(" not in language_name:
56
+ return language_name.split(",")[1] + " " + language_name.split(",")[0]
57
+ return language_name
58
+
59
+
60
+ language_names = pd.read_csv("LanguageCodes.tab", sep="\t")
61
+ language_names["Name"] = language_names["Name"].apply(reorder)
62
  language_stats = pd.read_csv("languages.tsv", sep="\t")
63
+ script_names = pd.read_csv("ScriptCodes.csv")
64
 
65
 
66
  @cache
67
+ async def translate(model, target_language, target_script, sentence):
68
+ reply = await complete(
69
  model=model,
70
  messages=[
71
  {
72
  "role": "user",
73
+ "content": f"Translate the following text to {target_language} (script: {target_script}):\n\n{sentence}",
74
  }
75
  ],
76
  temperature=0,
 
81
  def get_language_stats(language_code):
82
  lang, script = language_code.split("_")
83
  stats = language_stats[language_stats["iso639_3"] == lang]
84
+ if not stats.empty:
85
+ stats = stats.iloc[0].to_dict()
86
+ else:
87
+ stats = dict()
88
+ stats["script"] = script_names[script_names["Code"] == script]["English Name"].iloc[
89
+ 0
90
+ ]
91
+ stats["name"] = language_names[language_names["LangID"] == lang]["Name"].iloc[0]
92
+ return stats
93
 
94
 
95
  async def main():
 
97
  results = []
98
  original_sentences = open(f"{dataset}/dev.{original_language}").readlines()
99
  for target_language in target_languages:
100
+ if target_language == original_language:
101
+ continue
102
  target_sentences = open(f"{dataset}/dev.{target_language}").readlines()
103
  for model in models:
104
+ stats = get_language_stats(target_language)
105
+ print(f"{model} -> {stats['name']}")
106
+ # predictions = [
107
+ # await translate(model, stats["name"], stats["script"], sentence)
108
+ # for sentence in tqdm(original_sentences[:n])
109
+ # ]
110
+ predictions = [
111
+ translate(model, stats["name"], stats["script"], sentence)
112
+ for sentence in tqdm(original_sentences[:n])
113
+ ]
114
+ predictions = await tqdm_asyncio.gather(*predictions)
115
  metrics = bleu.compute(
116
+ predictions=predictions,
117
+ references=target_sentences[:n],
118
+ tokenize="char",
119
  )
120
+
121
  results.append(
122
  {
123
  "model": model,
124
  "original_language": original_language,
125
  "target_language": target_language,
126
+ "target_language_name": stats["name"],
127
  "speakers": stats.get("maxSpeakers"),
128
  "bleu": metrics["score"],
129
  }
130
  )
131
  with open("results.json", "w") as f:
132
  json.dump(results, f, indent=2, ensure_ascii=False)
133
+ # compute mean bleu for each target language
134
+ pd.DataFrame(results).groupby("target_language_name").agg(
135
+ {"bleu": "mean"}
136
+ ).reset_index().to_json("results_summary.json", indent=2, orient="records")
137
 
138
 
139
  if __name__ == "__main__":
languages.rq CHANGED
@@ -1,8 +1,7 @@
1
  # https://query.wikidata.org/
2
  SELECT DISTINCT ?item (MAX(?numberOfSpeakers) AS ?maxSpeakers) (MAX(?speakersTime) AS ?mostRecentTime) ?iso639_1 ?iso639_3 ?itemLabel ?itemLabel_en
3
  WHERE {
4
- ?item wdt:P31 wd:Q34770; # General "language" type to include all languages
5
- wdt:P220 ?iso639_3. # Language with ISO 639-3 code
6
 
7
  ?item p:P1098 ?numberOfSpeakersStatement.
8
  ?numberOfSpeakersStatement ps:P1098 ?numberOfSpeakers.
 
1
  # https://query.wikidata.org/
2
  SELECT DISTINCT ?item (MAX(?numberOfSpeakers) AS ?maxSpeakers) (MAX(?speakersTime) AS ?mostRecentTime) ?iso639_1 ?iso639_3 ?itemLabel ?itemLabel_en
3
  WHERE {
4
+ ?item wdt:P220 ?iso639_3. # Language with ISO 639-3 code
 
5
 
6
  ?item p:P1098 ?numberOfSpeakersStatement.
7
  ?numberOfSpeakersStatement ps:P1098 ?numberOfSpeakers.
languages.tsv CHANGED
The diff for this file is too large to render. See raw diff
 
results.json CHANGED
@@ -2,241 +2,321 @@
2
  {
3
  "model": "openai/gpt-4o-mini",
4
  "original_language": "eng_Latn",
5
- "target_language": "eng_Latn",
6
- "target_language_name": "English",
7
- "speakers": 1132366680,
8
- "bleu": 96.0187510193446
9
  },
10
  {
11
  "model": "google/gemini-flash-1.5",
12
  "original_language": "eng_Latn",
13
- "target_language": "eng_Latn",
14
- "target_language_name": "English",
15
- "speakers": 1132366680,
16
- "bleu": 79.64837722618887
17
  },
18
  {
19
  "model": "anthropic/claude-3.5-sonnet",
20
  "original_language": "eng_Latn",
21
- "target_language": "eng_Latn",
22
- "target_language_name": "English",
23
- "speakers": 1132366680,
24
- "bleu": 47.89694173473209
25
  },
26
  {
27
  "model": "qwen/qwen-2.5-72b-instruct",
28
  "original_language": "eng_Latn",
29
- "target_language": "eng_Latn",
30
- "target_language_name": "English",
31
- "speakers": 1132366680,
32
- "bleu": 57.08253125905762
33
  },
34
  {
35
  "model": "meta-llama/llama-3.1-8b-instruct",
36
  "original_language": "eng_Latn",
37
- "target_language": "eng_Latn",
38
- "target_language_name": "English",
39
- "speakers": 1132366680,
40
- "bleu": 71.3986619616758
41
  },
42
  {
43
  "model": "openai/gpt-4o-mini",
44
  "original_language": "eng_Latn",
45
- "target_language": "deu_Latn",
46
- "target_language_name": "German",
47
- "speakers": 105000000,
48
- "bleu": 42.76912386979146
49
  },
50
  {
51
  "model": "google/gemini-flash-1.5",
52
  "original_language": "eng_Latn",
53
- "target_language": "deu_Latn",
54
- "target_language_name": "German",
55
- "speakers": 105000000,
56
- "bleu": 48.166702527508484
57
  },
58
  {
59
  "model": "anthropic/claude-3.5-sonnet",
60
  "original_language": "eng_Latn",
61
- "target_language": "deu_Latn",
62
- "target_language_name": "German",
63
- "speakers": 105000000,
64
- "bleu": 47.56638188073429
65
  },
66
  {
67
  "model": "qwen/qwen-2.5-72b-instruct",
68
  "original_language": "eng_Latn",
69
- "target_language": "deu_Latn",
70
- "target_language_name": "German",
71
- "speakers": 105000000,
72
- "bleu": 38.8670415108337
73
  },
74
  {
75
  "model": "meta-llama/llama-3.1-8b-instruct",
76
  "original_language": "eng_Latn",
77
- "target_language": "deu_Latn",
78
- "target_language_name": "German",
79
- "speakers": 105000000,
80
- "bleu": 32.2942935571844
81
  },
82
  {
83
  "model": "openai/gpt-4o-mini",
84
  "original_language": "eng_Latn",
85
- "target_language": "fra_Latn",
86
- "target_language_name": "French",
87
- "speakers": 208157220,
88
- "bleu": 47.70220301445618
89
  },
90
  {
91
  "model": "google/gemini-flash-1.5",
92
  "original_language": "eng_Latn",
93
- "target_language": "fra_Latn",
94
- "target_language_name": "French",
95
- "speakers": 208157220,
96
- "bleu": 49.50529382461407
97
  },
98
  {
99
  "model": "anthropic/claude-3.5-sonnet",
100
  "original_language": "eng_Latn",
101
- "target_language": "fra_Latn",
102
- "target_language_name": "French",
103
- "speakers": 208157220,
104
- "bleu": 50.55719906730571
105
  },
106
  {
107
  "model": "qwen/qwen-2.5-72b-instruct",
108
  "original_language": "eng_Latn",
109
- "target_language": "fra_Latn",
110
- "target_language_name": "French",
111
- "speakers": 208157220,
112
- "bleu": 43.43766704709355
113
  },
114
  {
115
  "model": "meta-llama/llama-3.1-8b-instruct",
116
  "original_language": "eng_Latn",
117
- "target_language": "fra_Latn",
118
- "target_language_name": "French",
119
- "speakers": 208157220,
120
- "bleu": 37.38013101452594
121
  },
122
  {
123
  "model": "openai/gpt-4o-mini",
124
  "original_language": "eng_Latn",
125
- "target_language": "spa_Latn",
126
- "target_language_name": "Spanish",
127
- "speakers": 485000000,
128
- "bleu": 34.65606074843554
129
  },
130
  {
131
  "model": "google/gemini-flash-1.5",
132
  "original_language": "eng_Latn",
133
- "target_language": "spa_Latn",
134
- "target_language_name": "Spanish",
135
- "speakers": 485000000,
136
- "bleu": 34.49205632717459
137
  },
138
  {
139
  "model": "anthropic/claude-3.5-sonnet",
140
  "original_language": "eng_Latn",
141
- "target_language": "spa_Latn",
142
- "target_language_name": "Spanish",
143
- "speakers": 485000000,
144
- "bleu": 34.58637890527096
145
  },
146
  {
147
  "model": "qwen/qwen-2.5-72b-instruct",
148
  "original_language": "eng_Latn",
149
- "target_language": "spa_Latn",
150
- "target_language_name": "Spanish",
151
- "speakers": 485000000,
152
- "bleu": 33.41419407814188
153
  },
154
  {
155
  "model": "meta-llama/llama-3.1-8b-instruct",
156
  "original_language": "eng_Latn",
157
- "target_language": "spa_Latn",
158
- "target_language_name": "Spanish",
159
- "speakers": 485000000,
160
- "bleu": 29.470460185415075
161
  },
162
  {
163
  "model": "openai/gpt-4o-mini",
164
  "original_language": "eng_Latn",
165
- "target_language": "cmn_Hans",
166
- "target_language_name": "Mandarin",
167
- "speakers": 1074000000,
168
- "bleu": 0.7678283495493847
169
  },
170
  {
171
  "model": "google/gemini-flash-1.5",
172
  "original_language": "eng_Latn",
173
- "target_language": "cmn_Hans",
174
- "target_language_name": "Mandarin",
175
- "speakers": 1074000000,
176
- "bleu": 0.3178534804335777
177
  },
178
  {
179
  "model": "anthropic/claude-3.5-sonnet",
180
  "original_language": "eng_Latn",
181
- "target_language": "cmn_Hans",
182
- "target_language_name": "Mandarin",
183
- "speakers": 1074000000,
184
- "bleu": 0.8670958769249191
185
  },
186
  {
187
  "model": "qwen/qwen-2.5-72b-instruct",
188
  "original_language": "eng_Latn",
189
- "target_language": "cmn_Hans",
190
- "target_language_name": "Mandarin",
191
- "speakers": 1074000000,
192
- "bleu": 0.6796400550094367
193
  },
194
  {
195
  "model": "meta-llama/llama-3.1-8b-instruct",
196
  "original_language": "eng_Latn",
197
- "target_language": "cmn_Hans",
198
- "target_language_name": "Mandarin",
199
- "speakers": 1074000000,
200
- "bleu": 0.027154305073795664
201
  },
202
  {
203
  "model": "openai/gpt-4o-mini",
204
  "original_language": "eng_Latn",
205
- "target_language": "cmn_Hant",
206
- "target_language_name": "Mandarin",
207
- "speakers": 1074000000,
208
- "bleu": 2.175042632198715
209
  },
210
  {
211
  "model": "google/gemini-flash-1.5",
212
  "original_language": "eng_Latn",
213
- "target_language": "cmn_Hant",
214
- "target_language_name": "Mandarin",
215
- "speakers": 1074000000,
216
- "bleu": 0.3480387797702917
217
  },
218
  {
219
  "model": "anthropic/claude-3.5-sonnet",
220
  "original_language": "eng_Latn",
221
- "target_language": "cmn_Hant",
222
- "target_language_name": "Mandarin",
223
- "speakers": 1074000000,
224
- "bleu": 3.8196828383724886
225
  },
226
  {
227
  "model": "qwen/qwen-2.5-72b-instruct",
228
  "original_language": "eng_Latn",
229
- "target_language": "cmn_Hant",
230
- "target_language_name": "Mandarin",
231
- "speakers": 1074000000,
232
- "bleu": 2.1029807575075994
233
  },
234
  {
235
  "model": "meta-llama/llama-3.1-8b-instruct",
236
  "original_language": "eng_Latn",
237
- "target_language": "cmn_Hant",
238
- "target_language_name": "Mandarin",
239
- "speakers": 1074000000,
240
- "bleu": 0.017008567925605175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  }
242
  ]
 
2
  {
3
  "model": "openai/gpt-4o-mini",
4
  "original_language": "eng_Latn",
5
+ "target_language": "isl_Latn",
6
+ "target_language_name": "Icelandic",
7
+ "speakers": "358000",
8
+ "bleu": 61.64765463591684
9
  },
10
  {
11
  "model": "google/gemini-flash-1.5",
12
  "original_language": "eng_Latn",
13
+ "target_language": "isl_Latn",
14
+ "target_language_name": "Icelandic",
15
+ "speakers": "358000",
16
+ "bleu": 58.800114937175536
17
  },
18
  {
19
  "model": "anthropic/claude-3.5-sonnet",
20
  "original_language": "eng_Latn",
21
+ "target_language": "isl_Latn",
22
+ "target_language_name": "Icelandic",
23
+ "speakers": "358000",
24
+ "bleu": 63.29310272337262
25
  },
26
  {
27
  "model": "qwen/qwen-2.5-72b-instruct",
28
  "original_language": "eng_Latn",
29
+ "target_language": "isl_Latn",
30
+ "target_language_name": "Icelandic",
31
+ "speakers": "358000",
32
+ "bleu": 28.81229691079592
33
  },
34
  {
35
  "model": "meta-llama/llama-3.1-8b-instruct",
36
  "original_language": "eng_Latn",
37
+ "target_language": "isl_Latn",
38
+ "target_language_name": "Icelandic",
39
+ "speakers": "358000",
40
+ "bleu": 35.26511601216665
41
  },
42
  {
43
  "model": "openai/gpt-4o-mini",
44
  "original_language": "eng_Latn",
45
+ "target_language": "ind_Latn",
46
+ "target_language_name": "Indonesian",
47
+ "speakers": "198996550",
48
+ "bleu": 77.046561794683
49
  },
50
  {
51
  "model": "google/gemini-flash-1.5",
52
  "original_language": "eng_Latn",
53
+ "target_language": "ind_Latn",
54
+ "target_language_name": "Indonesian",
55
+ "speakers": "198996550",
56
+ "bleu": 73.29149877209336
57
  },
58
  {
59
  "model": "anthropic/claude-3.5-sonnet",
60
  "original_language": "eng_Latn",
61
+ "target_language": "ind_Latn",
62
+ "target_language_name": "Indonesian",
63
+ "speakers": "198996550",
64
+ "bleu": 71.2935146236652
65
  },
66
  {
67
  "model": "qwen/qwen-2.5-72b-instruct",
68
  "original_language": "eng_Latn",
69
+ "target_language": "ind_Latn",
70
+ "target_language_name": "Indonesian",
71
+ "speakers": "198996550",
72
+ "bleu": 71.24329964015463
73
  },
74
  {
75
  "model": "meta-llama/llama-3.1-8b-instruct",
76
  "original_language": "eng_Latn",
77
+ "target_language": "ind_Latn",
78
+ "target_language_name": "Indonesian",
79
+ "speakers": "198996550",
80
+ "bleu": 68.73551514908719
81
  },
82
  {
83
  "model": "openai/gpt-4o-mini",
84
  "original_language": "eng_Latn",
85
+ "target_language": "pol_Latn",
86
+ "target_language_name": "Polish",
87
+ "speakers": "40200000",
88
+ "bleu": 65.22066652766671
89
  },
90
  {
91
  "model": "google/gemini-flash-1.5",
92
  "original_language": "eng_Latn",
93
+ "target_language": "pol_Latn",
94
+ "target_language_name": "Polish",
95
+ "speakers": "40200000",
96
+ "bleu": 64.9142026045634
97
  },
98
  {
99
  "model": "anthropic/claude-3.5-sonnet",
100
  "original_language": "eng_Latn",
101
+ "target_language": "pol_Latn",
102
+ "target_language_name": "Polish",
103
+ "speakers": "40200000",
104
+ "bleu": 62.0169794404058
105
  },
106
  {
107
  "model": "qwen/qwen-2.5-72b-instruct",
108
  "original_language": "eng_Latn",
109
+ "target_language": "pol_Latn",
110
+ "target_language_name": "Polish",
111
+ "speakers": "40200000",
112
+ "bleu": 50.22137732799528
113
  },
114
  {
115
  "model": "meta-llama/llama-3.1-8b-instruct",
116
  "original_language": "eng_Latn",
117
+ "target_language": "pol_Latn",
118
+ "target_language_name": "Polish",
119
+ "speakers": "40200000",
120
+ "bleu": 54.39716369344653
121
  },
122
  {
123
  "model": "openai/gpt-4o-mini",
124
  "original_language": "eng_Latn",
125
+ "target_language": "kas_Deva",
126
+ "target_language_name": "Kashmiri",
127
+ "speakers": "6900000",
128
+ "bleu": 22.853844625462184
129
  },
130
  {
131
  "model": "google/gemini-flash-1.5",
132
  "original_language": "eng_Latn",
133
+ "target_language": "kas_Deva",
134
+ "target_language_name": "Kashmiri",
135
+ "speakers": "6900000",
136
+ "bleu": 1.6028583744267129
137
  },
138
  {
139
  "model": "anthropic/claude-3.5-sonnet",
140
  "original_language": "eng_Latn",
141
+ "target_language": "kas_Deva",
142
+ "target_language_name": "Kashmiri",
143
+ "speakers": "6900000",
144
+ "bleu": 10.24928534626491
145
  },
146
  {
147
  "model": "qwen/qwen-2.5-72b-instruct",
148
  "original_language": "eng_Latn",
149
+ "target_language": "kas_Deva",
150
+ "target_language_name": "Kashmiri",
151
+ "speakers": "6900000",
152
+ "bleu": 18.289957619904254
153
  },
154
  {
155
  "model": "meta-llama/llama-3.1-8b-instruct",
156
  "original_language": "eng_Latn",
157
+ "target_language": "kas_Deva",
158
+ "target_language_name": "Kashmiri",
159
+ "speakers": "6900000",
160
+ "bleu": 0.8669151440746464
161
  },
162
  {
163
  "model": "openai/gpt-4o-mini",
164
  "original_language": "eng_Latn",
165
+ "target_language": "lin_Latn",
166
+ "target_language_name": "Lingala",
167
+ "speakers": "20000000",
168
+ "bleu": 50.873508850595044
169
  },
170
  {
171
  "model": "google/gemini-flash-1.5",
172
  "original_language": "eng_Latn",
173
+ "target_language": "lin_Latn",
174
+ "target_language_name": "Lingala",
175
+ "speakers": "20000000",
176
+ "bleu": 6.943431977023627
177
  },
178
  {
179
  "model": "anthropic/claude-3.5-sonnet",
180
  "original_language": "eng_Latn",
181
+ "target_language": "lin_Latn",
182
+ "target_language_name": "Lingala",
183
+ "speakers": "20000000",
184
+ "bleu": 45.048992005653
185
  },
186
  {
187
  "model": "qwen/qwen-2.5-72b-instruct",
188
  "original_language": "eng_Latn",
189
+ "target_language": "lin_Latn",
190
+ "target_language_name": "Lingala",
191
+ "speakers": "20000000",
192
+ "bleu": 2.9809864732757902
193
  },
194
  {
195
  "model": "meta-llama/llama-3.1-8b-instruct",
196
  "original_language": "eng_Latn",
197
+ "target_language": "lin_Latn",
198
+ "target_language_name": "Lingala",
199
+ "speakers": "20000000",
200
+ "bleu": 0.5682271000080301
201
  },
202
  {
203
  "model": "openai/gpt-4o-mini",
204
  "original_language": "eng_Latn",
205
+ "target_language": "bjn_Latn",
206
+ "target_language_name": "Banjar",
207
+ "speakers": "3500000",
208
+ "bleu": 50.02000892713302
209
  },
210
  {
211
  "model": "google/gemini-flash-1.5",
212
  "original_language": "eng_Latn",
213
+ "target_language": "bjn_Latn",
214
+ "target_language_name": "Banjar",
215
+ "speakers": "3500000",
216
+ "bleu": 5.960932185623333
217
  },
218
  {
219
  "model": "anthropic/claude-3.5-sonnet",
220
  "original_language": "eng_Latn",
221
+ "target_language": "bjn_Latn",
222
+ "target_language_name": "Banjar",
223
+ "speakers": "3500000",
224
+ "bleu": 46.23236901760108
225
  },
226
  {
227
  "model": "qwen/qwen-2.5-72b-instruct",
228
  "original_language": "eng_Latn",
229
+ "target_language": "bjn_Latn",
230
+ "target_language_name": "Banjar",
231
+ "speakers": "3500000",
232
+ "bleu": 21.478973248564643
233
  },
234
  {
235
  "model": "meta-llama/llama-3.1-8b-instruct",
236
  "original_language": "eng_Latn",
237
+ "target_language": "bjn_Latn",
238
+ "target_language_name": "Banjar",
239
+ "speakers": "3500000",
240
+ "bleu": 22.371002216375594
241
+ },
242
+ {
243
+ "model": "openai/gpt-4o-mini",
244
+ "original_language": "eng_Latn",
245
+ "target_language": "bho_Deva",
246
+ "target_language_name": "Bhojpuri",
247
+ "speakers": "52200000",
248
+ "bleu": 43.46871320382143
249
+ },
250
+ {
251
+ "model": "google/gemini-flash-1.5",
252
+ "original_language": "eng_Latn",
253
+ "target_language": "bho_Deva",
254
+ "target_language_name": "Bhojpuri",
255
+ "speakers": "52200000",
256
+ "bleu": 18.253876118905147
257
+ },
258
+ {
259
+ "model": "anthropic/claude-3.5-sonnet",
260
+ "original_language": "eng_Latn",
261
+ "target_language": "bho_Deva",
262
+ "target_language_name": "Bhojpuri",
263
+ "speakers": "52200000",
264
+ "bleu": 23.447356116551486
265
+ },
266
+ {
267
+ "model": "qwen/qwen-2.5-72b-instruct",
268
+ "original_language": "eng_Latn",
269
+ "target_language": "bho_Deva",
270
+ "target_language_name": "Bhojpuri",
271
+ "speakers": "52200000",
272
+ "bleu": 29.81946758376717
273
+ },
274
+ {
275
+ "model": "meta-llama/llama-3.1-8b-instruct",
276
+ "original_language": "eng_Latn",
277
+ "target_language": "bho_Deva",
278
+ "target_language_name": "Bhojpuri",
279
+ "speakers": "52200000",
280
+ "bleu": 3.2486895985868474
281
+ },
282
+ {
283
+ "model": "openai/gpt-4o-mini",
284
+ "original_language": "eng_Latn",
285
+ "target_language": "ces_Latn",
286
+ "target_language_name": "Czech",
287
+ "speakers": "10700000",
288
+ "bleu": 69.01583822081993
289
+ },
290
+ {
291
+ "model": "google/gemini-flash-1.5",
292
+ "original_language": "eng_Latn",
293
+ "target_language": "ces_Latn",
294
+ "target_language_name": "Czech",
295
+ "speakers": "10700000",
296
+ "bleu": 69.4260447999661
297
+ },
298
+ {
299
+ "model": "anthropic/claude-3.5-sonnet",
300
+ "original_language": "eng_Latn",
301
+ "target_language": "ces_Latn",
302
+ "target_language_name": "Czech",
303
+ "speakers": "10700000",
304
+ "bleu": 68.6109083634317
305
+ },
306
+ {
307
+ "model": "qwen/qwen-2.5-72b-instruct",
308
+ "original_language": "eng_Latn",
309
+ "target_language": "ces_Latn",
310
+ "target_language_name": "Czech",
311
+ "speakers": "10700000",
312
+ "bleu": 59.72501366200287
313
+ },
314
+ {
315
+ "model": "meta-llama/llama-3.1-8b-instruct",
316
+ "original_language": "eng_Latn",
317
+ "target_language": "ces_Latn",
318
+ "target_language_name": "Czech",
319
+ "speakers": "10700000",
320
+ "bleu": 60.25088578142904
321
  }
322
  ]
results_summary.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "target_language_name":"Banjar",
4
+ "bleu":29.2126571191
5
+ },
6
+ {
7
+ "target_language_name":"Bhojpuri",
8
+ "bleu":23.6476205243
9
+ },
10
+ {
11
+ "target_language_name":"Czech",
12
+ "bleu":65.4057381655
13
+ },
14
+ {
15
+ "target_language_name":"Icelandic",
16
+ "bleu":49.5636570439
17
+ },
18
+ {
19
+ "target_language_name":"Indonesian",
20
+ "bleu":72.3220779959
21
+ },
22
+ {
23
+ "target_language_name":"Kashmiri",
24
+ "bleu":10.772572222
25
+ },
26
+ {
27
+ "target_language_name":"Lingala",
28
+ "bleu":21.2830292813
29
+ },
30
+ {
31
+ "target_language_name":"Polish",
32
+ "bleu":59.3540779188
33
+ }
34
+ ]