David Pomerenke
commited on
Commit
·
d5fc8b3
1
Parent(s):
8beab26
Use langcodes for language matching
Browse files- evals.py +79 -80
- pyproject.toml +1 -0
- results.json +71 -461
- uv.lock +81 -0
evals.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import asyncio
|
2 |
import json
|
3 |
import os
|
|
|
4 |
from os import getenv
|
5 |
|
6 |
import evaluate
|
@@ -14,17 +15,19 @@ from tqdm.asyncio import tqdm_asyncio
|
|
14 |
from transformers import NllbTokenizer
|
15 |
from datetime import date
|
16 |
from requests import get
|
|
|
|
|
17 |
|
18 |
# config
|
19 |
models = [
|
20 |
-
"openai/gpt-4o-mini",
|
21 |
# "anthropic/claude-3.5-haiku", # 4$/M tokens -> too expensive
|
22 |
-
"meta-llama/llama-3.3-70b-instruct",
|
23 |
-
"mistralai/mistral-small-24b-instruct-2501",
|
24 |
-
"google/gemini-2.0-flash-001",
|
25 |
# "qwen/qwen-turbo", # 0.2$/M tokens; recognizes "inappropriate content"
|
26 |
-
"deepseek/deepseek-chat",
|
27 |
-
"microsoft/phi-4",
|
28 |
]
|
29 |
fast_model = "meta-llama/llama-3.3-70b-instruct"
|
30 |
n_sentences = 30
|
@@ -47,73 +50,79 @@ def reorder(language_name):
|
|
47 |
return language_name.split(",")[1] + " " + language_name.split(",")[0]
|
48 |
return language_name
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
# load benchmark languages and scripts
|
51 |
benchmark_dir = "data/floresp-v2.0-rc.3/dev"
|
52 |
benchmark_languages = pd.DataFrame(
|
53 |
[f.split(".")[1].split("_", 1) for f in os.listdir(benchmark_dir)],
|
54 |
-
columns=["
|
55 |
)
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
# load Ethnologue language names
|
61 |
-
language_names = (
|
62 |
-
pd.read_csv("data/LanguageCodes.tab", sep="\t")
|
63 |
-
.rename(columns={"LangID": "language_code", "Name": "language_name"})[
|
64 |
-
["language_code", "language_name"]
|
65 |
-
]
|
66 |
-
.assign(language_name=lambda df: df["language_name"].apply(reorder).str.strip())
|
67 |
)
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
.
|
76 |
)
|
77 |
-
language_stats["speakers"] = pd.to_numeric(language_stats["speakers"], errors="coerce")
|
78 |
-
ignored_languages = [
|
79 |
-
"zho", # Chinese -> use Mandarin (cmn) instead
|
80 |
-
"ara", # Arabic -> use Standard Arabic (arb) instead
|
81 |
-
"pus", # Pashto -> use Nothern / Central / Southern Pashto instead (pbt / pst / pbu)
|
82 |
-
"fas", # Persian -> use Iranian Persian (pes) instead
|
83 |
-
"msa", # Malay -> use Indonesian (ind) instead
|
84 |
-
]
|
85 |
-
language_stats = language_stats[
|
86 |
-
~language_stats["language_code"].isin(ignored_languages)
|
87 |
-
]
|
88 |
-
|
89 |
-
# load unicode script names
|
90 |
-
script_names = pd.read_csv("data/ScriptCodes.csv").rename(
|
91 |
-
columns={"Code": "script_code", "English Name": "script_name"}
|
92 |
-
)[["script_code", "script_name"]]
|
93 |
|
94 |
-
# merge data
|
95 |
-
languages = pd.merge(language_stats, language_names, on="language_code", how="outer")
|
96 |
-
languages = pd.merge(benchmark_languages, languages, on="language_code", how="outer")
|
97 |
-
languages = pd.merge(languages, script_names, on="script_code", how="left")
|
98 |
-
languages["in_benchmark"] = languages["in_benchmark"].fillna(False)
|
99 |
-
languages = languages.sort_values(by="speakers", ascending=False)
|
100 |
-
languages = languages.iloc[:30]
|
101 |
|
102 |
-
#
|
103 |
-
@cache
|
104 |
def get_commonvoice_stats(date: date):
|
105 |
return get("https://commonvoice.mozilla.org/api/v1/stats/languages").json()
|
106 |
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
# sample languages to translate to
|
110 |
target_languages = languages[languages["in_benchmark"]].sample(
|
111 |
n=n_sentences, weights="speakers", replace=True, random_state=42
|
112 |
)
|
113 |
# sample languages to analyze with all models
|
114 |
-
detailed_languages = languages[languages["in_benchmark"]].sample(
|
115 |
-
n=10, random_state=42
|
116 |
-
)
|
117 |
|
118 |
|
119 |
# utils
|
@@ -140,15 +149,14 @@ async def complete(**kwargs):
|
|
140 |
raise Exception(response)
|
141 |
return response
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
async def translate(model, target_language, target_script, sentence):
|
146 |
reply = await complete(
|
147 |
model=model,
|
148 |
messages=[
|
149 |
{
|
150 |
"role": "user",
|
151 |
-
"content": f"Translate the following text to the {target_language} language; use the {
|
152 |
}
|
153 |
],
|
154 |
temperature=0,
|
@@ -162,40 +170,33 @@ def mean(l):
|
|
162 |
|
163 |
|
164 |
def load_sentences(language):
|
165 |
-
return open(
|
166 |
-
f"{benchmark_dir}/dev.{language.language_code}_{language.script_code}"
|
167 |
-
).readlines()
|
168 |
|
169 |
|
170 |
# evaluation!
|
171 |
async def main():
|
172 |
results = []
|
173 |
for language in list(languages.itertuples()):
|
174 |
-
name = (
|
175 |
-
language.language_name
|
176 |
-
if not pd.isna(language.language_name)
|
177 |
-
else language.language_code
|
178 |
-
)
|
179 |
-
print(name)
|
180 |
scores = []
|
181 |
if language.in_benchmark:
|
182 |
original_sentences = load_sentences(language)[:n_sentences]
|
183 |
for model in models:
|
184 |
if (
|
185 |
model != fast_model
|
186 |
-
and language.
|
187 |
-
not in detailed_languages.language_code.values
|
188 |
):
|
189 |
continue
|
190 |
-
|
191 |
-
print(model)
|
192 |
predictions = [
|
193 |
translate(
|
194 |
-
model,
|
|
|
|
|
|
|
|
|
|
|
195 |
)
|
196 |
-
for sentence, language in zip(original_sentences, target_languages.itertuples())
|
197 |
]
|
198 |
-
predictions = await tqdm_asyncio.gather(*predictions, miniters=1)
|
199 |
target_sentences = [
|
200 |
load_sentences(lang)[i]
|
201 |
for i, lang in enumerate(target_languages.itertuples())
|
@@ -217,17 +218,15 @@ async def main():
|
|
217 |
# "bert_score": mean(metrics_bert["f1"]),
|
218 |
}
|
219 |
)
|
220 |
-
commonvoice_hours = commonvoice_stats[commonvoice_stats["locale"] == language.iso639_1]["validatedHours"].values
|
221 |
-
commonvoice_hours = commonvoice_hours[0] if commonvoice_hours.size > 0 else "N/A"
|
222 |
results.append(
|
223 |
{
|
224 |
-
"language_name": name,
|
225 |
-
"
|
226 |
"speakers": language.speakers if not pd.isna(language.speakers) else 0,
|
227 |
"scores": scores,
|
228 |
"bleu": mean([s["bleu"] for s in scores]) if scores else None,
|
229 |
# "bert_score": mean([s["bert_score"] for s in scores]),
|
230 |
-
"commonvoice_hours": commonvoice_hours,
|
231 |
}
|
232 |
)
|
233 |
with open("results.json", "w") as f:
|
|
|
1 |
import asyncio
|
2 |
import json
|
3 |
import os
|
4 |
+
import re
|
5 |
from os import getenv
|
6 |
|
7 |
import evaluate
|
|
|
15 |
from transformers import NllbTokenizer
|
16 |
from datetime import date
|
17 |
from requests import get
|
18 |
+
from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
|
19 |
+
from langcodes import standardize_tag, Language
|
20 |
|
21 |
# config
|
22 |
models = [
|
23 |
+
"openai/gpt-4o-mini", # 0.6$/M tokens
|
24 |
# "anthropic/claude-3.5-haiku", # 4$/M tokens -> too expensive
|
25 |
+
"meta-llama/llama-3.3-70b-instruct", # 0.3$/M tokens
|
26 |
+
"mistralai/mistral-small-24b-instruct-2501", # 0.14$/M tokens
|
27 |
+
"google/gemini-2.0-flash-001", # 0.4$/M tokens
|
28 |
# "qwen/qwen-turbo", # 0.2$/M tokens; recognizes "inappropriate content"
|
29 |
+
"deepseek/deepseek-chat", # 0.9$/M tokens
|
30 |
+
"microsoft/phi-4", # 0.07$/M tokens
|
31 |
]
|
32 |
fast_model = "meta-llama/llama-3.3-70b-instruct"
|
33 |
n_sentences = 30
|
|
|
50 |
return language_name.split(",")[1] + " " + language_name.split(",")[0]
|
51 |
return language_name
|
52 |
|
53 |
+
|
54 |
+
# load general language data
|
55 |
+
languages = {
|
56 |
+
lang: pop
|
57 |
+
for lang, pop in LANGUAGE_SPEAKING_POPULATION.items()
|
58 |
+
if not re.match(r".*-[A-Z]{2}$", lang)
|
59 |
+
}
|
60 |
+
languages = pd.DataFrame(list(languages.items()), columns=["bcp_47", "speakers"])
|
61 |
+
languages["name"] = languages["bcp_47"].apply(lambda x: Language.get(x).display_name())
|
62 |
+
|
63 |
+
# load script codes and names
|
64 |
+
scripts = pd.read_csv("data/ScriptCodes.csv").rename(columns={"Code": "iso15924", "English Name": "script_name"})
|
65 |
+
|
66 |
+
def script_name(iso15924):
|
67 |
+
return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
|
68 |
+
|
69 |
# load benchmark languages and scripts
|
70 |
benchmark_dir = "data/floresp-v2.0-rc.3/dev"
|
71 |
benchmark_languages = pd.DataFrame(
|
72 |
[f.split(".")[1].split("_", 1) for f in os.listdir(benchmark_dir)],
|
73 |
+
columns=["iso639_3", "iso15924"],
|
74 |
)
|
75 |
+
benchmark_languages["bcp_47"] = benchmark_languages.apply(
|
76 |
+
lambda row: standardize_tag(row["iso639_3"] + "-" + row["iso15924"], macro=True),
|
77 |
+
axis=1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
)
|
79 |
+
# ignore script (language is language)
|
80 |
+
benchmark_languages["bcp_47"] = benchmark_languages["bcp_47"].apply(
|
81 |
+
lambda x: re.sub(r"-[A-Z][a-z]+$", "", x)
|
82 |
+
)
|
83 |
+
benchmark_languages = (
|
84 |
+
benchmark_languages.groupby("bcp_47")
|
85 |
+
.agg({"iso639_3": "first", "iso15924": "first"})
|
86 |
+
.reset_index()
|
87 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
+
# load CommonVoice stats
|
91 |
+
@cache # cache for 1 day
|
92 |
def get_commonvoice_stats(date: date):
|
93 |
return get("https://commonvoice.mozilla.org/api/v1/stats/languages").json()
|
94 |
|
95 |
+
|
96 |
+
commonvoice_stats = pd.DataFrame(get_commonvoice_stats(date.today())).rename(
|
97 |
+
columns={"locale": "bcp_47", "validatedHours": "commonvoice_hours"}
|
98 |
+
)[["bcp_47", "commonvoice_hours"]]
|
99 |
+
# ignore country (language is language) (in practive this is only relevant to zh-CN/zh-TW/zh-HK)
|
100 |
+
commonvoice_stats["bcp_47"] = commonvoice_stats["bcp_47"].apply(
|
101 |
+
lambda x: re.sub(r"-[A-Z]{2}$", "", x)
|
102 |
+
)
|
103 |
+
commonvoice_stats["bcp_47"] = commonvoice_stats["bcp_47"].apply(
|
104 |
+
lambda x: standardize_tag(x, macro=True)
|
105 |
+
) # this does not really seem to get macrolanguages though, e.g. not for Quechua
|
106 |
+
commonvoice_stats = commonvoice_stats.groupby("bcp_47").sum().reset_index()
|
107 |
+
|
108 |
+
# merge data
|
109 |
+
languages = pd.merge(
|
110 |
+
languages, benchmark_languages, on="bcp_47", how="left"
|
111 |
+
) # "left" because keep it simple for now
|
112 |
+
languages = pd.merge(
|
113 |
+
languages, commonvoice_stats, on="bcp_47", how="left"
|
114 |
+
) # "left" because keep it simple for now
|
115 |
+
languages["in_benchmark"] = languages["bcp_47"].isin(benchmark_languages["bcp_47"])
|
116 |
+
|
117 |
+
languages = languages.sort_values(by="speakers", ascending=False)
|
118 |
+
languages = languages.iloc[:10]
|
119 |
|
120 |
# sample languages to translate to
|
121 |
target_languages = languages[languages["in_benchmark"]].sample(
|
122 |
n=n_sentences, weights="speakers", replace=True, random_state=42
|
123 |
)
|
124 |
# sample languages to analyze with all models
|
125 |
+
detailed_languages = languages[languages["in_benchmark"]].sample(n=3, random_state=42)
|
|
|
|
|
126 |
|
127 |
|
128 |
# utils
|
|
|
149 |
raise Exception(response)
|
150 |
return response
|
151 |
|
152 |
+
async def translate(model, target_language, sentence):
|
153 |
+
script = script_name(target_language.iso15924)
|
|
|
154 |
reply = await complete(
|
155 |
model=model,
|
156 |
messages=[
|
157 |
{
|
158 |
"role": "user",
|
159 |
+
"content": f"Translate the following text to the {target_language.name} language; use the {script} script; reply only with the translation:\n\n{sentence}",
|
160 |
}
|
161 |
],
|
162 |
temperature=0,
|
|
|
170 |
|
171 |
|
172 |
def load_sentences(language):
|
173 |
+
return open(f"{benchmark_dir}/dev.{language.iso639_3}_{language.iso15924}").readlines()
|
|
|
|
|
174 |
|
175 |
|
176 |
# evaluation!
|
177 |
async def main():
|
178 |
results = []
|
179 |
for language in list(languages.itertuples()):
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
scores = []
|
181 |
if language.in_benchmark:
|
182 |
original_sentences = load_sentences(language)[:n_sentences]
|
183 |
for model in models:
|
184 |
if (
|
185 |
model != fast_model
|
186 |
+
and language.bcp_47 not in detailed_languages.bcp_47.values
|
|
|
187 |
):
|
188 |
continue
|
|
|
|
|
189 |
predictions = [
|
190 |
translate(
|
191 |
+
model,
|
192 |
+
language,
|
193 |
+
sentence,
|
194 |
+
)
|
195 |
+
for sentence, language in zip(
|
196 |
+
original_sentences, target_languages.itertuples()
|
197 |
)
|
|
|
198 |
]
|
199 |
+
predictions = await tqdm_asyncio.gather(*predictions, miniters=1, desc=f"{language.name} {model.split('/')[0]}")
|
200 |
target_sentences = [
|
201 |
load_sentences(lang)[i]
|
202 |
for i, lang in enumerate(target_languages.itertuples())
|
|
|
218 |
# "bert_score": mean(metrics_bert["f1"]),
|
219 |
}
|
220 |
)
|
|
|
|
|
221 |
results.append(
|
222 |
{
|
223 |
+
"language_name": language.name,
|
224 |
+
"bcp_47": language.bcp_47,
|
225 |
"speakers": language.speakers if not pd.isna(language.speakers) else 0,
|
226 |
"scores": scores,
|
227 |
"bleu": mean([s["bleu"] for s in scores]) if scores else None,
|
228 |
# "bert_score": mean([s["bert_score"] for s in scores]),
|
229 |
+
"commonvoice_hours": language.commonvoice_hours,
|
230 |
}
|
231 |
)
|
232 |
with open("results.json", "w") as f:
|
pyproject.toml
CHANGED
@@ -16,6 +16,7 @@ dev-dependencies = [
|
|
16 |
"bert-score>=0.3.13",
|
17 |
"evaluate==0.4.0",
|
18 |
"joblib>=1.4.2",
|
|
|
19 |
"openai>=1.52.2",
|
20 |
"protobuf>=5.28.3",
|
21 |
"python-dotenv>=1.0.1",
|
|
|
16 |
"bert-score>=0.3.13",
|
17 |
"evaluate==0.4.0",
|
18 |
"joblib>=1.4.2",
|
19 |
+
"langcodes>=3.5.0",
|
20 |
"openai>=1.52.2",
|
21 |
"protobuf>=5.28.3",
|
22 |
"python-dotenv>=1.0.1",
|
results.json
CHANGED
@@ -1,582 +1,192 @@
|
|
1 |
[
|
2 |
{
|
3 |
"language_name": "English",
|
4 |
-
"
|
5 |
-
"speakers":
|
6 |
"scores": [
|
7 |
-
{
|
8 |
-
"model": "openai/gpt-4o-mini",
|
9 |
-
"bleu": 0.47104084248165595
|
10 |
-
},
|
11 |
{
|
12 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
13 |
-
"bleu": 0.
|
14 |
-
},
|
15 |
-
{
|
16 |
-
"model": "mistralai/mistral-small-24b-instruct-2501",
|
17 |
-
"bleu": 0.4642719176436136
|
18 |
-
},
|
19 |
-
{
|
20 |
-
"model": "google/gemini-2.0-flash-001",
|
21 |
-
"bleu": 0.5237470882988915
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"model": "deepseek/deepseek-chat",
|
25 |
-
"bleu": 0.516570670982587
|
26 |
-
},
|
27 |
-
{
|
28 |
-
"model": "microsoft/phi-4",
|
29 |
-
"bleu": 0.44668905281921456
|
30 |
}
|
31 |
],
|
32 |
-
"bleu": 0.
|
33 |
"commonvoice_hours": 2649.0
|
34 |
},
|
35 |
{
|
36 |
-
"language_name": "
|
37 |
-
"
|
38 |
-
"speakers":
|
39 |
-
"scores": [
|
40 |
-
{
|
41 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
42 |
-
"bleu": 0.48254866511762295
|
43 |
-
}
|
44 |
-
],
|
45 |
-
"bleu": 0.48254866511762295,
|
46 |
-
"commonvoice_hours": "N/A"
|
47 |
-
},
|
48 |
-
{
|
49 |
-
"language_name": "Spanish",
|
50 |
-
"language_code": "spa",
|
51 |
-
"speakers": 485000000.0,
|
52 |
-
"scores": [
|
53 |
-
{
|
54 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
55 |
-
"bleu": 0.31606621368361204
|
56 |
-
}
|
57 |
-
],
|
58 |
-
"bleu": 0.31606621368361204,
|
59 |
-
"commonvoice_hours": 446.0
|
60 |
-
},
|
61 |
-
{
|
62 |
-
"language_name": "Hindi",
|
63 |
-
"language_code": "hin",
|
64 |
-
"speakers": 341000000.0,
|
65 |
-
"scores": [
|
66 |
-
{
|
67 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
68 |
-
"bleu": 0.3273225856613046
|
69 |
-
}
|
70 |
-
],
|
71 |
-
"bleu": 0.3273225856613046,
|
72 |
-
"commonvoice_hours": 16.0
|
73 |
-
},
|
74 |
-
{
|
75 |
-
"language_name": "Bengali",
|
76 |
-
"language_code": "ben",
|
77 |
-
"speakers": 300000000.0,
|
78 |
-
"scores": [
|
79 |
-
{
|
80 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
81 |
-
"bleu": 0.23110496173302814
|
82 |
-
}
|
83 |
-
],
|
84 |
-
"bleu": 0.23110496173302814,
|
85 |
-
"commonvoice_hours": 49.0
|
86 |
-
},
|
87 |
-
{
|
88 |
-
"language_name": "Portuguese",
|
89 |
-
"language_code": "por",
|
90 |
-
"speakers": 254300000.0,
|
91 |
-
"scores": [
|
92 |
-
{
|
93 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
94 |
-
"bleu": 0.35032125995743685
|
95 |
-
}
|
96 |
-
],
|
97 |
-
"bleu": 0.35032125995743685,
|
98 |
-
"commonvoice_hours": 176.0
|
99 |
-
},
|
100 |
-
{
|
101 |
-
"language_name": "French",
|
102 |
-
"language_code": "fra",
|
103 |
-
"speakers": 208157220.0,
|
104 |
-
"scores": [
|
105 |
-
{
|
106 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
107 |
-
"bleu": 0.31625053573185663
|
108 |
-
}
|
109 |
-
],
|
110 |
-
"bleu": 0.31625053573185663,
|
111 |
-
"commonvoice_hours": 1051.0
|
112 |
-
},
|
113 |
-
{
|
114 |
-
"language_name": "Indonesian",
|
115 |
-
"language_code": "ind",
|
116 |
-
"speakers": 198996550.0,
|
117 |
-
"scores": [
|
118 |
-
{
|
119 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
120 |
-
"bleu": 0.3112185444311794
|
121 |
-
}
|
122 |
-
],
|
123 |
-
"bleu": 0.3112185444311794,
|
124 |
-
"commonvoice_hours": 33.0
|
125 |
-
},
|
126 |
-
{
|
127 |
-
"language_name": "Russian",
|
128 |
-
"language_code": "rus",
|
129 |
-
"speakers": 171428900.0,
|
130 |
"scores": [
|
131 |
{
|
132 |
"model": "openai/gpt-4o-mini",
|
133 |
-
"bleu": 0.
|
134 |
},
|
135 |
{
|
136 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
137 |
-
"bleu": 0.
|
138 |
},
|
139 |
{
|
140 |
"model": "mistralai/mistral-small-24b-instruct-2501",
|
141 |
-
"bleu": 0.
|
142 |
},
|
143 |
{
|
144 |
"model": "google/gemini-2.0-flash-001",
|
145 |
-
"bleu": 0.
|
146 |
},
|
147 |
{
|
148 |
"model": "deepseek/deepseek-chat",
|
149 |
-
"bleu": 0.
|
150 |
},
|
151 |
{
|
152 |
"model": "microsoft/phi-4",
|
153 |
-
"bleu": 0.
|
154 |
}
|
155 |
],
|
156 |
-
"bleu": 0.
|
157 |
-
"commonvoice_hours":
|
158 |
},
|
159 |
{
|
160 |
-
"language_name": "
|
161 |
-
"
|
162 |
-
"speakers":
|
163 |
"scores": [
|
164 |
-
{
|
165 |
-
"model": "openai/gpt-4o-mini",
|
166 |
-
"bleu": 0.28991739992953497
|
167 |
-
},
|
168 |
{
|
169 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
170 |
-
"bleu": 0.
|
171 |
-
},
|
172 |
-
{
|
173 |
-
"model": "mistralai/mistral-small-24b-instruct-2501",
|
174 |
-
"bleu": 0.21348802780641032
|
175 |
-
},
|
176 |
-
{
|
177 |
-
"model": "google/gemini-2.0-flash-001",
|
178 |
-
"bleu": 0.3345265427223546
|
179 |
-
},
|
180 |
-
{
|
181 |
-
"model": "deepseek/deepseek-chat",
|
182 |
-
"bleu": 0.3101203037558905
|
183 |
-
},
|
184 |
-
{
|
185 |
-
"model": "microsoft/phi-4",
|
186 |
-
"bleu": 0.2585222780278109
|
187 |
}
|
188 |
],
|
189 |
-
"bleu": 0.
|
190 |
-
"commonvoice_hours":
|
191 |
},
|
192 |
{
|
193 |
-
"language_name": "
|
194 |
-
"
|
195 |
-
"speakers":
|
196 |
"scores": [
|
197 |
{
|
198 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
199 |
-
"bleu": 0.
|
200 |
}
|
201 |
],
|
202 |
-
"bleu": 0.
|
203 |
-
"commonvoice_hours":
|
204 |
},
|
205 |
{
|
206 |
-
"language_name": "
|
207 |
-
"
|
208 |
-
"speakers":
|
209 |
"scores": [
|
210 |
-
{
|
211 |
-
"model": "openai/gpt-4o-mini",
|
212 |
-
"bleu": 0.39019323183176663
|
213 |
-
},
|
214 |
{
|
215 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
216 |
-
"bleu": 0.
|
217 |
-
},
|
218 |
-
{
|
219 |
-
"model": "mistralai/mistral-small-24b-instruct-2501",
|
220 |
-
"bleu": 0.3647632576435612
|
221 |
-
},
|
222 |
-
{
|
223 |
-
"model": "google/gemini-2.0-flash-001",
|
224 |
-
"bleu": 0.4466723425292597
|
225 |
-
},
|
226 |
-
{
|
227 |
-
"model": "deepseek/deepseek-chat",
|
228 |
-
"bleu": 0.4045496243095387
|
229 |
-
},
|
230 |
-
{
|
231 |
-
"model": "microsoft/phi-4",
|
232 |
-
"bleu": 0.36047992103881465
|
233 |
}
|
234 |
],
|
235 |
-
"bleu": 0.
|
236 |
-
"commonvoice_hours":
|
237 |
-
},
|
238 |
-
{
|
239 |
-
"language_name": "Egyptian Arabic",
|
240 |
-
"language_code": "arz",
|
241 |
-
"speakers": 100542400.0,
|
242 |
-
"scores": [
|
243 |
-
{
|
244 |
-
"model": "openai/gpt-4o-mini",
|
245 |
-
"bleu": 0.2339779422333898
|
246 |
-
},
|
247 |
-
{
|
248 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
249 |
-
"bleu": 0.20475486619797384
|
250 |
-
},
|
251 |
-
{
|
252 |
-
"model": "mistralai/mistral-small-24b-instruct-2501",
|
253 |
-
"bleu": 0.20783660453505234
|
254 |
-
},
|
255 |
-
{
|
256 |
-
"model": "google/gemini-2.0-flash-001",
|
257 |
-
"bleu": 0.2840808045687292
|
258 |
-
},
|
259 |
-
{
|
260 |
-
"model": "deepseek/deepseek-chat",
|
261 |
-
"bleu": 0.2786287793608212
|
262 |
-
},
|
263 |
-
{
|
264 |
-
"model": "microsoft/phi-4",
|
265 |
-
"bleu": 0.19969813973959594
|
266 |
-
}
|
267 |
-
],
|
268 |
-
"bleu": 0.23482952277259375,
|
269 |
-
"commonvoice_hours": "N/A"
|
270 |
},
|
271 |
{
|
272 |
"language_name": "Urdu",
|
273 |
-
"
|
274 |
-
"speakers":
|
275 |
"scores": [
|
276 |
{
|
277 |
"model": "openai/gpt-4o-mini",
|
278 |
-
"bleu": 0.
|
279 |
},
|
280 |
{
|
281 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
282 |
-
"bleu": 0.
|
283 |
},
|
284 |
{
|
285 |
"model": "mistralai/mistral-small-24b-instruct-2501",
|
286 |
-
"bleu": 0.
|
287 |
},
|
288 |
{
|
289 |
"model": "google/gemini-2.0-flash-001",
|
290 |
-
"bleu": 0.
|
291 |
},
|
292 |
{
|
293 |
"model": "deepseek/deepseek-chat",
|
294 |
-
"bleu": 0.
|
295 |
},
|
296 |
{
|
297 |
"model": "microsoft/phi-4",
|
298 |
-
"bleu": 0.
|
299 |
}
|
300 |
],
|
301 |
-
"bleu": 0.
|
302 |
"commonvoice_hours": 76.0
|
303 |
},
|
304 |
{
|
305 |
-
"language_name": "
|
306 |
-
"
|
307 |
-
"speakers":
|
308 |
-
"scores": [
|
309 |
-
{
|
310 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
311 |
-
"bleu": 0.33268969497468076
|
312 |
-
}
|
313 |
-
],
|
314 |
-
"bleu": 0.33268969497468076,
|
315 |
-
"commonvoice_hours": "N/A"
|
316 |
-
},
|
317 |
-
{
|
318 |
-
"language_name": "Javanese",
|
319 |
-
"language_code": "jav",
|
320 |
-
"speakers": 84308740.0,
|
321 |
-
"scores": [
|
322 |
-
{
|
323 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
324 |
-
"bleu": 0.2528746866064681
|
325 |
-
}
|
326 |
-
],
|
327 |
-
"bleu": 0.2528746866064681,
|
328 |
-
"commonvoice_hours": 0.0
|
329 |
-
},
|
330 |
-
{
|
331 |
-
"language_name": "Marathi",
|
332 |
-
"language_code": "mar",
|
333 |
-
"speakers": 83100000.0,
|
334 |
-
"scores": [
|
335 |
-
{
|
336 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
337 |
-
"bleu": 0.24876051941895777
|
338 |
-
}
|
339 |
-
],
|
340 |
-
"bleu": 0.24876051941895777,
|
341 |
-
"commonvoice_hours": 20.0
|
342 |
-
},
|
343 |
-
{
|
344 |
-
"language_name": "Swahili",
|
345 |
-
"language_code": "swh",
|
346 |
-
"speakers": 82300000.0,
|
347 |
-
"scores": [
|
348 |
-
{
|
349 |
-
"model": "openai/gpt-4o-mini",
|
350 |
-
"bleu": 0.34863560100932933
|
351 |
-
},
|
352 |
-
{
|
353 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
354 |
-
"bleu": 0.30524292832054034
|
355 |
-
},
|
356 |
-
{
|
357 |
-
"model": "mistralai/mistral-small-24b-instruct-2501",
|
358 |
-
"bleu": 0.23580256234118713
|
359 |
-
},
|
360 |
-
{
|
361 |
-
"model": "google/gemini-2.0-flash-001",
|
362 |
-
"bleu": 0.3871437234807849
|
363 |
-
},
|
364 |
-
{
|
365 |
-
"model": "deepseek/deepseek-chat",
|
366 |
-
"bleu": 0.3476225063617937
|
367 |
-
},
|
368 |
-
{
|
369 |
-
"model": "microsoft/phi-4",
|
370 |
-
"bleu": 0.21803176063271826
|
371 |
-
}
|
372 |
-
],
|
373 |
-
"bleu": 0.3070798470243923,
|
374 |
-
"commonvoice_hours": "N/A"
|
375 |
-
},
|
376 |
-
{
|
377 |
-
"language_name": "Turkish",
|
378 |
-
"language_code": "tur",
|
379 |
-
"speakers": 82231620.0,
|
380 |
-
"scores": [
|
381 |
-
{
|
382 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
383 |
-
"bleu": 0.29874140544434125
|
384 |
-
}
|
385 |
-
],
|
386 |
-
"bleu": 0.29874140544434125,
|
387 |
-
"commonvoice_hours": 127.0
|
388 |
-
},
|
389 |
-
{
|
390 |
-
"language_name": "Telugu",
|
391 |
-
"language_code": "tel",
|
392 |
-
"speakers": 82000000.0,
|
393 |
-
"scores": [
|
394 |
-
{
|
395 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
396 |
-
"bleu": 0.28869836899054496
|
397 |
-
}
|
398 |
-
],
|
399 |
-
"bleu": 0.28869836899054496,
|
400 |
-
"commonvoice_hours": 0.3
|
401 |
-
},
|
402 |
-
{
|
403 |
-
"language_name": "Wu Chinese",
|
404 |
-
"language_code": "wuu",
|
405 |
-
"speakers": 81400000.0,
|
406 |
-
"scores": [],
|
407 |
-
"bleu": null,
|
408 |
-
"commonvoice_hours": "N/A"
|
409 |
-
},
|
410 |
-
{
|
411 |
-
"language_name": "Korean",
|
412 |
-
"language_code": "kor",
|
413 |
-
"speakers": 77300000.0,
|
414 |
-
"scores": [
|
415 |
-
{
|
416 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
417 |
-
"bleu": 0.2566453806044083
|
418 |
-
}
|
419 |
-
],
|
420 |
-
"bleu": 0.2566453806044083,
|
421 |
-
"commonvoice_hours": 1.7
|
422 |
-
},
|
423 |
-
{
|
424 |
-
"language_name": "Vietnamese",
|
425 |
-
"language_code": "vie",
|
426 |
-
"speakers": 76000000.0,
|
427 |
-
"scores": [
|
428 |
-
{
|
429 |
-
"model": "openai/gpt-4o-mini",
|
430 |
-
"bleu": 0.3104431723374164
|
431 |
-
},
|
432 |
-
{
|
433 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
434 |
-
"bleu": 0.3098478561790782
|
435 |
-
},
|
436 |
-
{
|
437 |
-
"model": "mistralai/mistral-small-24b-instruct-2501",
|
438 |
-
"bleu": 0.28074941515909896
|
439 |
-
},
|
440 |
-
{
|
441 |
-
"model": "google/gemini-2.0-flash-001",
|
442 |
-
"bleu": 0.37327273228460267
|
443 |
-
},
|
444 |
-
{
|
445 |
-
"model": "deepseek/deepseek-chat",
|
446 |
-
"bleu": 0.3487726531917602
|
447 |
-
},
|
448 |
-
{
|
449 |
-
"model": "microsoft/phi-4",
|
450 |
-
"bleu": 0.18355331419148843
|
451 |
-
}
|
452 |
-
],
|
453 |
-
"bleu": 0.3011065238905742,
|
454 |
-
"commonvoice_hours": 5.9
|
455 |
-
},
|
456 |
-
{
|
457 |
-
"language_name": "Tamil",
|
458 |
-
"language_code": "tam",
|
459 |
-
"speakers": 75000000.0,
|
460 |
-
"scores": [
|
461 |
-
{
|
462 |
-
"model": "openai/gpt-4o-mini",
|
463 |
-
"bleu": 0.24593649157372188
|
464 |
-
},
|
465 |
-
{
|
466 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
467 |
-
"bleu": 0.24009996232522382
|
468 |
-
},
|
469 |
-
{
|
470 |
-
"model": "mistralai/mistral-small-24b-instruct-2501",
|
471 |
-
"bleu": 0.16785828803139252
|
472 |
-
},
|
473 |
-
{
|
474 |
-
"model": "google/gemini-2.0-flash-001",
|
475 |
-
"bleu": 0.3411457686951495
|
476 |
-
},
|
477 |
-
{
|
478 |
-
"model": "deepseek/deepseek-chat",
|
479 |
-
"bleu": 0.2875340171253509
|
480 |
-
},
|
481 |
-
{
|
482 |
-
"model": "microsoft/phi-4",
|
483 |
-
"bleu": 0.12646276530642359
|
484 |
-
}
|
485 |
-
],
|
486 |
-
"bleu": 0.23483954884287706,
|
487 |
-
"commonvoice_hours": 234.0
|
488 |
-
},
|
489 |
-
{
|
490 |
-
"language_name": "Yue Chinese",
|
491 |
-
"language_code": "yue",
|
492 |
-
"speakers": 73100000.0,
|
493 |
"scores": [
|
494 |
{
|
495 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
496 |
-
"bleu": 0.
|
497 |
}
|
498 |
],
|
499 |
-
"bleu": 0.
|
500 |
-
"commonvoice_hours":
|
501 |
},
|
502 |
{
|
503 |
-
"language_name": "
|
504 |
-
"
|
505 |
-
"speakers":
|
506 |
"scores": [
|
507 |
{
|
508 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
509 |
-
"bleu": 0.
|
510 |
}
|
511 |
],
|
512 |
-
"bleu": 0.
|
513 |
-
"commonvoice_hours":
|
514 |
},
|
515 |
{
|
516 |
-
"language_name": "
|
517 |
-
"
|
518 |
-
"speakers":
|
519 |
"scores": [
|
520 |
{
|
521 |
"model": "openai/gpt-4o-mini",
|
522 |
-
"bleu": 0.
|
523 |
},
|
524 |
{
|
525 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
526 |
-
"bleu": 0.
|
527 |
},
|
528 |
{
|
529 |
"model": "mistralai/mistral-small-24b-instruct-2501",
|
530 |
-
"bleu": 0.
|
531 |
},
|
532 |
{
|
533 |
"model": "google/gemini-2.0-flash-001",
|
534 |
-
"bleu": 0.
|
535 |
},
|
536 |
{
|
537 |
"model": "deepseek/deepseek-chat",
|
538 |
-
"bleu": 0.
|
539 |
},
|
540 |
{
|
541 |
"model": "microsoft/phi-4",
|
542 |
-
"bleu": 0.
|
543 |
}
|
544 |
],
|
545 |
-
"bleu": 0.
|
546 |
-
"commonvoice_hours":
|
547 |
-
},
|
548 |
-
{
|
549 |
-
"language_name": "Iranian Persian",
|
550 |
-
"language_code": "pes",
|
551 |
-
"speakers": 52800000.0,
|
552 |
-
"scores": [
|
553 |
-
{
|
554 |
-
"model": "meta-llama/llama-3.3-70b-instruct",
|
555 |
-
"bleu": 0.28359916806993934
|
556 |
-
}
|
557 |
-
],
|
558 |
-
"bleu": 0.28359916806993934,
|
559 |
-
"commonvoice_hours": "N/A"
|
560 |
},
|
561 |
{
|
562 |
-
"language_name": "
|
563 |
-
"
|
564 |
-
"speakers":
|
565 |
"scores": [
|
566 |
{
|
567 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
568 |
-
"bleu": 0.
|
569 |
}
|
570 |
],
|
571 |
-
"bleu": 0.
|
572 |
-
"commonvoice_hours":
|
573 |
-
},
|
574 |
-
{
|
575 |
-
"language_name": "Hakka Chinese",
|
576 |
-
"language_code": "hak",
|
577 |
-
"speakers": 48200000.0,
|
578 |
-
"scores": [],
|
579 |
-
"bleu": null,
|
580 |
-
"commonvoice_hours": "N/A"
|
581 |
}
|
582 |
]
|
|
|
1 |
[
|
2 |
{
|
3 |
"language_name": "English",
|
4 |
+
"bcp_47": "en",
|
5 |
+
"speakers": 1636485840,
|
6 |
"scores": [
|
|
|
|
|
|
|
|
|
7 |
{
|
8 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
9 |
+
"bleu": 0.4931825583688982
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
}
|
11 |
],
|
12 |
+
"bleu": 0.4931825583688982,
|
13 |
"commonvoice_hours": 2649.0
|
14 |
},
|
15 |
{
|
16 |
+
"language_name": "Chinese",
|
17 |
+
"bcp_47": "zh",
|
18 |
+
"speakers": 1304678914,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
"scores": [
|
20 |
{
|
21 |
"model": "openai/gpt-4o-mini",
|
22 |
+
"bleu": 0.4807599914028467
|
23 |
},
|
24 |
{
|
25 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
26 |
+
"bleu": 0.48224897154012053
|
27 |
},
|
28 |
{
|
29 |
"model": "mistralai/mistral-small-24b-instruct-2501",
|
30 |
+
"bleu": 0.2688927547323512
|
31 |
},
|
32 |
{
|
33 |
"model": "google/gemini-2.0-flash-001",
|
34 |
+
"bleu": 0.4876059353172742
|
35 |
},
|
36 |
{
|
37 |
"model": "deepseek/deepseek-chat",
|
38 |
+
"bleu": 0.46126489333496423
|
39 |
},
|
40 |
{
|
41 |
"model": "microsoft/phi-4",
|
42 |
+
"bleu": 0.43306718920654086
|
43 |
}
|
44 |
],
|
45 |
+
"bleu": 0.4356399559223496,
|
46 |
+
"commonvoice_hours": 422.0
|
47 |
},
|
48 |
{
|
49 |
+
"language_name": "Hindi",
|
50 |
+
"bcp_47": "hi",
|
51 |
+
"speakers": 546882144,
|
52 |
"scores": [
|
|
|
|
|
|
|
|
|
53 |
{
|
54 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
55 |
+
"bleu": 0.42910938007537924
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
}
|
57 |
],
|
58 |
+
"bleu": 0.42910938007537924,
|
59 |
+
"commonvoice_hours": 16.0
|
60 |
},
|
61 |
{
|
62 |
+
"language_name": "Spanish",
|
63 |
+
"bcp_47": "es",
|
64 |
+
"speakers": 493528077,
|
65 |
"scores": [
|
66 |
{
|
67 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
68 |
+
"bleu": 0.3335615012680206
|
69 |
}
|
70 |
],
|
71 |
+
"bleu": 0.3335615012680206,
|
72 |
+
"commonvoice_hours": 446.0
|
73 |
},
|
74 |
{
|
75 |
+
"language_name": "Arabic",
|
76 |
+
"bcp_47": "ar",
|
77 |
+
"speakers": 351664197,
|
78 |
"scores": [
|
|
|
|
|
|
|
|
|
79 |
{
|
80 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
81 |
+
"bleu": 0.19072998559991275
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
}
|
83 |
],
|
84 |
+
"bleu": 0.19072998559991275,
|
85 |
+
"commonvoice_hours": 91.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
},
|
87 |
{
|
88 |
"language_name": "Urdu",
|
89 |
+
"bcp_47": "ur",
|
90 |
+
"speakers": 290790290,
|
91 |
"scores": [
|
92 |
{
|
93 |
"model": "openai/gpt-4o-mini",
|
94 |
+
"bleu": 0.3223557428811336
|
95 |
},
|
96 |
{
|
97 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
98 |
+
"bleu": 0.3361392064611452
|
99 |
},
|
100 |
{
|
101 |
"model": "mistralai/mistral-small-24b-instruct-2501",
|
102 |
+
"bleu": 0.30361668093990973
|
103 |
},
|
104 |
{
|
105 |
"model": "google/gemini-2.0-flash-001",
|
106 |
+
"bleu": 0.38811035932918286
|
107 |
},
|
108 |
{
|
109 |
"model": "deepseek/deepseek-chat",
|
110 |
+
"bleu": 0.33221997814253806
|
111 |
},
|
112 |
{
|
113 |
"model": "microsoft/phi-4",
|
114 |
+
"bleu": 0.2541447606474814
|
115 |
}
|
116 |
],
|
117 |
+
"bleu": 0.32276445473356513,
|
118 |
"commonvoice_hours": 76.0
|
119 |
},
|
120 |
{
|
121 |
+
"language_name": "French",
|
122 |
+
"bcp_47": "fr",
|
123 |
+
"speakers": 278611507,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
"scores": [
|
125 |
{
|
126 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
127 |
+
"bleu": 0.40595466651226686
|
128 |
}
|
129 |
],
|
130 |
+
"bleu": 0.40595466651226686,
|
131 |
+
"commonvoice_hours": 1051.0
|
132 |
},
|
133 |
{
|
134 |
+
"language_name": "Bangla",
|
135 |
+
"bcp_47": "bn",
|
136 |
+
"speakers": 267193288,
|
137 |
"scores": [
|
138 |
{
|
139 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
140 |
+
"bleu": 0.30570858536443696
|
141 |
}
|
142 |
],
|
143 |
+
"bleu": 0.30570858536443696,
|
144 |
+
"commonvoice_hours": 49.0
|
145 |
},
|
146 |
{
|
147 |
+
"language_name": "Portuguese",
|
148 |
+
"bcp_47": "pt",
|
149 |
+
"speakers": 237496885,
|
150 |
"scores": [
|
151 |
{
|
152 |
"model": "openai/gpt-4o-mini",
|
153 |
+
"bleu": 0.4122096638493346
|
154 |
},
|
155 |
{
|
156 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
157 |
+
"bleu": 0.39250552075952033
|
158 |
},
|
159 |
{
|
160 |
"model": "mistralai/mistral-small-24b-instruct-2501",
|
161 |
+
"bleu": 0.22643923104785263
|
162 |
},
|
163 |
{
|
164 |
"model": "google/gemini-2.0-flash-001",
|
165 |
+
"bleu": 0.42197093736929103
|
166 |
},
|
167 |
{
|
168 |
"model": "deepseek/deepseek-chat",
|
169 |
+
"bleu": 0.42783260235353093
|
170 |
},
|
171 |
{
|
172 |
"model": "microsoft/phi-4",
|
173 |
+
"bleu": 0.38611444119797594
|
174 |
}
|
175 |
],
|
176 |
+
"bleu": 0.3778453994295843,
|
177 |
+
"commonvoice_hours": 176.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
},
|
179 |
{
|
180 |
+
"language_name": "Punjabi",
|
181 |
+
"bcp_47": "pa",
|
182 |
+
"speakers": 203571210,
|
183 |
"scores": [
|
184 |
{
|
185 |
"model": "meta-llama/llama-3.3-70b-instruct",
|
186 |
+
"bleu": 0.34311946995454473
|
187 |
}
|
188 |
],
|
189 |
+
"bleu": 0.34311946995454473,
|
190 |
+
"commonvoice_hours": 2.3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
}
|
192 |
]
|
uv.lock
CHANGED
@@ -898,6 +898,30 @@ wheels = [
|
|
898 |
{ url = "https://files.pythonhosted.org/packages/ea/8b/d7497df4a1cae9367adf21665dd1f896c2a7aeb8769ad77b662c5e2bcce7/kiwisolver-1.4.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:eb542fe7933aa09d8d8f9d9097ef37532a7df6497819d16efe4359890a2f417a", size = 55715 },
|
899 |
]
|
900 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
901 |
[[package]]
|
902 |
name = "languagebench"
|
903 |
version = "0.1.0"
|
@@ -914,6 +938,7 @@ dev = [
|
|
914 |
{ name = "bert-score" },
|
915 |
{ name = "evaluate" },
|
916 |
{ name = "joblib" },
|
|
|
917 |
{ name = "openai" },
|
918 |
{ name = "protobuf" },
|
919 |
{ name = "python-dotenv" },
|
@@ -937,6 +962,7 @@ dev = [
|
|
937 |
{ name = "bert-score", specifier = ">=0.3.13" },
|
938 |
{ name = "evaluate", specifier = "==0.4.0" },
|
939 |
{ name = "joblib", specifier = ">=1.4.2" },
|
|
|
940 |
{ name = "openai", specifier = ">=1.52.2" },
|
941 |
{ name = "protobuf", specifier = ">=5.28.3" },
|
942 |
{ name = "python-dotenv", specifier = ">=1.0.1" },
|
@@ -1029,6 +1055,61 @@ wheels = [
|
|
1029 |
{ url = "https://files.pythonhosted.org/packages/ba/b2/6a22fb5c0885da3b00e116aee81f0b829ec9ac8f736cd414b4a09413fc7d/lxml-5.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6e91cf736959057f7aac7adfc83481e03615a8e8dd5758aa1d95ea69e8931dba", size = 3487557 },
|
1030 |
]
|
1031 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1032 |
[[package]]
|
1033 |
name = "markdown-it-py"
|
1034 |
version = "3.0.0"
|
|
|
898 |
{ url = "https://files.pythonhosted.org/packages/ea/8b/d7497df4a1cae9367adf21665dd1f896c2a7aeb8769ad77b662c5e2bcce7/kiwisolver-1.4.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:eb542fe7933aa09d8d8f9d9097ef37532a7df6497819d16efe4359890a2f417a", size = 55715 },
|
899 |
]
|
900 |
|
901 |
+
[[package]]
|
902 |
+
name = "langcodes"
|
903 |
+
version = "3.5.0"
|
904 |
+
source = { registry = "https://pypi.org/simple" }
|
905 |
+
dependencies = [
|
906 |
+
{ name = "language-data" },
|
907 |
+
]
|
908 |
+
sdist = { url = "https://files.pythonhosted.org/packages/3a/7a/5a97e327063409a5caa21541e6d08ae4a0f2da328447e9f2c7b39e179226/langcodes-3.5.0.tar.gz", hash = "sha256:1eef8168d07e51e131a2497ffecad4b663f6208e7c3ae3b8dc15c51734a6f801", size = 191030 }
|
909 |
+
wheels = [
|
910 |
+
{ url = "https://files.pythonhosted.org/packages/c3/6b/068c2ea7a712bf805c62445bd9e9c06d7340358ef2824150eceac027444b/langcodes-3.5.0-py3-none-any.whl", hash = "sha256:853c69d1a35e0e13da2f427bb68fb2fa4a8f4fb899e0c62ad8df8d073dcfed33", size = 182974 },
|
911 |
+
]
|
912 |
+
|
913 |
+
[[package]]
|
914 |
+
name = "language-data"
|
915 |
+
version = "1.3.0"
|
916 |
+
source = { registry = "https://pypi.org/simple" }
|
917 |
+
dependencies = [
|
918 |
+
{ name = "marisa-trie" },
|
919 |
+
]
|
920 |
+
sdist = { url = "https://files.pythonhosted.org/packages/dd/ce/3f144716a9f2cbf42aa86ebc8b085a184be25c80aa453eea17c294d239c1/language_data-1.3.0.tar.gz", hash = "sha256:7600ef8aa39555145d06c89f0c324bf7dab834ea0b0a439d8243762e3ebad7ec", size = 5129310 }
|
921 |
+
wheels = [
|
922 |
+
{ url = "https://files.pythonhosted.org/packages/5d/e9/5a5ffd9b286db82be70d677d0a91e4d58f7912bb8dd026ddeeb4abe70679/language_data-1.3.0-py3-none-any.whl", hash = "sha256:e2ee943551b5ae5f89cd0e801d1fc3835bb0ef5b7e9c3a4e8e17b2b214548fbf", size = 5385760 },
|
923 |
+
]
|
924 |
+
|
925 |
[[package]]
|
926 |
name = "languagebench"
|
927 |
version = "0.1.0"
|
|
|
938 |
{ name = "bert-score" },
|
939 |
{ name = "evaluate" },
|
940 |
{ name = "joblib" },
|
941 |
+
{ name = "langcodes" },
|
942 |
{ name = "openai" },
|
943 |
{ name = "protobuf" },
|
944 |
{ name = "python-dotenv" },
|
|
|
962 |
{ name = "bert-score", specifier = ">=0.3.13" },
|
963 |
{ name = "evaluate", specifier = "==0.4.0" },
|
964 |
{ name = "joblib", specifier = ">=1.4.2" },
|
965 |
+
{ name = "langcodes", specifier = ">=3.5.0" },
|
966 |
{ name = "openai", specifier = ">=1.52.2" },
|
967 |
{ name = "protobuf", specifier = ">=5.28.3" },
|
968 |
{ name = "python-dotenv", specifier = ">=1.0.1" },
|
|
|
1055 |
{ url = "https://files.pythonhosted.org/packages/ba/b2/6a22fb5c0885da3b00e116aee81f0b829ec9ac8f736cd414b4a09413fc7d/lxml-5.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6e91cf736959057f7aac7adfc83481e03615a8e8dd5758aa1d95ea69e8931dba", size = 3487557 },
|
1056 |
]
|
1057 |
|
1058 |
+
[[package]]
|
1059 |
+
name = "marisa-trie"
|
1060 |
+
version = "1.2.1"
|
1061 |
+
source = { registry = "https://pypi.org/simple" }
|
1062 |
+
dependencies = [
|
1063 |
+
{ name = "setuptools" },
|
1064 |
+
]
|
1065 |
+
sdist = { url = "https://files.pythonhosted.org/packages/31/15/9d9743897e4450b2de199ee673b50cb018980c4ced477d41cf91304a85e3/marisa_trie-1.2.1.tar.gz", hash = "sha256:3a27c408e2aefc03e0f1d25b2ff2afb85aac3568f6fa2ae2a53b57a2e87ce29d", size = 416124 }
|
1066 |
+
wheels = [
|
1067 |
+
{ url = "https://files.pythonhosted.org/packages/e4/83/ccf5b33f2123f3110705c608f8e0caa82002626511aafafc58f82e50d322/marisa_trie-1.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a2eb41d2f9114d8b7bd66772c237111e00d2bae2260824560eaa0a1e291ce9e8", size = 362200 },
|
1068 |
+
{ url = "https://files.pythonhosted.org/packages/9d/74/f7ce1fc2ee480c7f8ceadd9b992caceaba442a97e5e99d6aea00d3635a0b/marisa_trie-1.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9e956e6a46f604b17d570901e66f5214fb6f658c21e5e7665deace236793cef6", size = 192309 },
|
1069 |
+
{ url = "https://files.pythonhosted.org/packages/e4/52/5dbbc13e57ce54c2ef0d04962d7d8f66edc69ed34310c734a2913199a581/marisa_trie-1.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bd45142501300e7538b2e544905580918b67b1c82abed1275fe4c682c95635fa", size = 174713 },
|
1070 |
+
{ url = "https://files.pythonhosted.org/packages/57/49/2580372f3f980aea95c23d05b2c1d3bbb9ee1ab8cfd441545153e44f1be7/marisa_trie-1.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8443d116c612cfd1961fbf76769faf0561a46d8e317315dd13f9d9639ad500c", size = 1314808 },
|
1071 |
+
{ url = "https://files.pythonhosted.org/packages/5a/ba/e12a4d450f265414cc68df6a116a78beece72b95f774f04d29cd48e08d19/marisa_trie-1.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:875a6248e60fbb48d947b574ffa4170f34981f9e579bde960d0f9a49ea393ecc", size = 1346678 },
|
1072 |
+
{ url = "https://files.pythonhosted.org/packages/b2/81/8e130cb1eea741fd17694d821096f7ec9841f0e3d3c69b740257f5eeafa8/marisa_trie-1.2.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:746a7c60a17fccd3cfcfd4326926f02ea4fcdfc25d513411a0c4fc8e4a1ca51f", size = 1307254 },
|
1073 |
+
{ url = "https://files.pythonhosted.org/packages/d7/d0/3deb5ea2bf7e4d845339875dbb31f3c3f66c8d6568723db1d137fb08a91c/marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e70869737cc0e5bd903f620667da6c330d6737048d1f44db792a6af68a1d35be", size = 2194712 },
|
1074 |
+
{ url = "https://files.pythonhosted.org/packages/9c/5f/b38d728dd30954816497b53425cfaddaf7b93ac0912db5911888f191b07a/marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:06b099dd743676dbcd8abd8465ceac8f6d97d8bfaabe2c83b965495523b4cef2", size = 2355625 },
|
1075 |
+
{ url = "https://files.pythonhosted.org/packages/7e/4f/61c0faa9ae9e53600a1b7a0c367bc9db1a4fdc625402ec232c755a05e094/marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d2a82eb21afdaf22b50d9b996472305c05ca67fc4ff5a026a220320c9c961db6", size = 2290290 },
|
1076 |
+
{ url = "https://files.pythonhosted.org/packages/7c/7d/713b970fb3043248881ed776dbf4d54918398aa5dde843a38711d0d62c8f/marisa_trie-1.2.1-cp310-cp310-win32.whl", hash = "sha256:8951e7ce5d3167fbd085703b4cbb3f47948ed66826bef9a2173c379508776cf5", size = 130743 },
|
1077 |
+
{ url = "https://files.pythonhosted.org/packages/cc/94/3d619cc82c30daeacd18a88674f4e6540ebfb7b4b7752ca0552793be80cf/marisa_trie-1.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:5685a14b3099b1422c4f59fa38b0bf4b5342ee6cc38ae57df9666a0b28eeaad3", size = 151891 },
|
1078 |
+
{ url = "https://files.pythonhosted.org/packages/4a/93/ffb01dfa22b6eee918e798e0bc3487427036c608aa4c065725f31aaf4104/marisa_trie-1.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ed3fb4ed7f2084597e862bcd56c56c5529e773729a426c083238682dba540e98", size = 362823 },
|
1079 |
+
{ url = "https://files.pythonhosted.org/packages/6d/1d/5c36500ac350c278c9bdfd88e17fa846fa4136d75597c167141ed973cdf2/marisa_trie-1.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fe69fb9ffb2767746181f7b3b29bbd3454d1d24717b5958e030494f3d3cddf3", size = 192741 },
|
1080 |
+
{ url = "https://files.pythonhosted.org/packages/e8/04/87dd0840f3f720e511eba56193c02bf64d7d96df1ca9f6d19994f55154be/marisa_trie-1.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4728ed3ae372d1ea2cdbd5eaa27b8f20a10e415d1f9d153314831e67d963f281", size = 174995 },
|
1081 |
+
{ url = "https://files.pythonhosted.org/packages/c9/51/9e903a7e13b7593e2e675d0ec4c390ca076dc5df1c1a0d5e85a513b886a3/marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cf4f25cf895692b232f49aa5397af6aba78bb679fb917a05fce8d3cb1ee446d", size = 1384728 },
|
1082 |
+
{ url = "https://files.pythonhosted.org/packages/e8/3f/7362a5ac60c2b0aad0f52cd57e7bd0c708f20d2660d8df85360f3d8f1c4b/marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cca7f96236ffdbf49be4b2e42c132e3df05968ac424544034767650913524de", size = 1412620 },
|
1083 |
+
{ url = "https://files.pythonhosted.org/packages/1f/bc/aaa3eaf6875f78a204a8da9692d56e3a36f89997dad2c388628385614576/marisa_trie-1.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d7eb20bf0e8b55a58d2a9b518aabc4c18278787bdba476c551dd1c1ed109e509", size = 1361555 },
|
1084 |
+
{ url = "https://files.pythonhosted.org/packages/18/98/e11b5a6206c5d110f32adab37fa84a85410d684e9c731acdd5c9250e2ce4/marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b1ec93f0d1ee6d7ab680a6d8ea1a08bf264636358e92692072170032dda652ba", size = 2257717 },
|
1085 |
+
{ url = "https://files.pythonhosted.org/packages/d2/9d/6b4a40867875e738a67c5b29f83e2e490a66bd9067ace3dd9a5c497e2b7f/marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e2699255d7ac610dee26d4ae7bda5951d05c7d9123a22e1f7c6a6f1964e0a4e4", size = 2417044 },
|
1086 |
+
{ url = "https://files.pythonhosted.org/packages/fe/61/e25613c72f2931757334b8bcf6b501569ef713f5ee9c6c7688ec460bd720/marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c484410911182457a8a1a0249d0c09c01e2071b78a0a8538cd5f7fa45589b13a", size = 2351960 },
|
1087 |
+
{ url = "https://files.pythonhosted.org/packages/19/0a/a90ccaf3eb476d13ec261f80c6c52defaf10ebc7f35eb2bcd7dfb533aef7/marisa_trie-1.2.1-cp311-cp311-win32.whl", hash = "sha256:ad548117744b2bcf0e3d97374608be0a92d18c2af13d98b728d37cd06248e571", size = 130446 },
|
1088 |
+
{ url = "https://files.pythonhosted.org/packages/fc/98/574b4e143e0a2f5f71af8716b6c4a8a46220f75a6e0847ce7d11ee0ba4aa/marisa_trie-1.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:436f62d27714970b9cdd3b3c41bdad046f260e62ebb0daa38125ef70536fc73b", size = 152037 },
|
1089 |
+
{ url = "https://files.pythonhosted.org/packages/4e/bf/8bd4ac8436b33fd46c9e1ffe3c2a131cd9744cc1649dbbe13308f744ef2b/marisa_trie-1.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:638506eacf20ca503fff72221a7e66a6eadbf28d6a4a6f949fcf5b1701bb05ec", size = 360041 },
|
1090 |
+
{ url = "https://files.pythonhosted.org/packages/ab/dd/4d3151e302e66ae387885f6ec265bd189e096b0c43c1379bfd9a3b9d2543/marisa_trie-1.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de1665eaafefa48a308e4753786519888021740501a15461c77bdfd57638e6b4", size = 190520 },
|
1091 |
+
{ url = "https://files.pythonhosted.org/packages/00/28/ae5991c74fb90b173167a366a634c83445f948ad044d37287b478d6b457e/marisa_trie-1.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f713af9b8aa66a34cd3a78c7d150a560a75734713abe818a69021fd269e927fa", size = 174175 },
|
1092 |
+
{ url = "https://files.pythonhosted.org/packages/5a/6a/fbfa89a8680eaabc6847a6c421e65427c43182db0c4bdb60e1516c81c822/marisa_trie-1.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2a7d00f53f4945320b551bccb826b3fb26948bde1a10d50bb9802fabb611b10", size = 1354995 },
|
1093 |
+
{ url = "https://files.pythonhosted.org/packages/9e/4c/2ba0b385e5f64ca4ddb0c10ec52ddf881bc4521f135948786fc339d1d6c8/marisa_trie-1.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98042040d1d6085792e8d0f74004fc0f5f9ca6091c298f593dd81a22a4643854", size = 1390989 },
|
1094 |
+
{ url = "https://files.pythonhosted.org/packages/6b/22/0791ed3045c91d0938345a86be472fc7c188b894f16c5dfad2ef31e7f882/marisa_trie-1.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6532615111eec2c79e711965ece0bc95adac1ff547a7fff5ffca525463116deb", size = 1328810 },
|
1095 |
+
{ url = "https://files.pythonhosted.org/packages/9d/7d/3f566e563abae6efce7fc311c63282a447c611739b3cd66c0e36077c86f8/marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:20948e40ab2038e62b7000ca6b4a913bc16c91a2c2e6da501bd1f917eeb28d51", size = 2230222 },
|
1096 |
+
{ url = "https://files.pythonhosted.org/packages/a5/0b/38fbb4611b5d1030242ddc2aa62e524438c8076e26f87395dbbf222dc62d/marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:66b23e5b35dd547f85bf98db7c749bc0ffc57916ade2534a6bbc32db9a4abc44", size = 2383620 },
|
1097 |
+
{ url = "https://files.pythonhosted.org/packages/ae/17/4553c63de29904d5d2521a24cad817bc7883cfa90506ab702ec4dae59a7b/marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6704adf0247d2dda42e876b793be40775dff46624309ad99bc7537098bee106d", size = 2329202 },
|
1098 |
+
{ url = "https://files.pythonhosted.org/packages/45/08/6307a630e63cd763fe77ac56516faa67fa9cd342060691e40fabc84be6b0/marisa_trie-1.2.1-cp312-cp312-win32.whl", hash = "sha256:3ad356442c2fea4c2a6f514738ddf213d23930f942299a2b2c05df464a00848a", size = 129652 },
|
1099 |
+
{ url = "https://files.pythonhosted.org/packages/a1/fe/67c357bfd92710d95a16b86e1453c663d565415d7f7838781c79ff7e1a7e/marisa_trie-1.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:f2806f75817392cedcacb24ac5d80b0350dde8d3861d67d045c1d9b109764114", size = 150845 },
|
1100 |
+
{ url = "https://files.pythonhosted.org/packages/2a/a4/a110cd9952f0e72da7bafea1f0084b18b9e03952110d9083bfda52279f5c/marisa_trie-1.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:b5ea16e69bfda0ac028c921b58de1a4aaf83d43934892977368579cd3c0a2554", size = 354439 },
|
1101 |
+
{ url = "https://files.pythonhosted.org/packages/3c/a5/a6099eb1c3fd8d7e93408c45501e1d08536ac57dfef02ec331f78e1ace18/marisa_trie-1.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9f627f4e41be710b6cb6ed54b0128b229ac9d50e2054d9cde3af0fef277c23cf", size = 188187 },
|
1102 |
+
{ url = "https://files.pythonhosted.org/packages/7c/cc/f637127e2beffa920d21f7fc45b4029575bcd1b28a90c0d90cb2b08c2205/marisa_trie-1.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5e649f3dc8ab5476732094f2828cc90cac3be7c79bc0c8318b6fda0c1d248db4", size = 171484 },
|
1103 |
+
{ url = "https://files.pythonhosted.org/packages/6d/0f/29f2ad7260b956570f69f25a542efa51ba76eb76ecd53c63ee9d21987c3d/marisa_trie-1.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46e528ee71808c961baf8c3ce1c46a8337ec7a96cc55389d11baafe5b632f8e9", size = 1319770 },
|
1104 |
+
{ url = "https://files.pythonhosted.org/packages/f2/12/0b69ed61fba59551a5f3d569af367afae614db7214ce1da12946ba9a433a/marisa_trie-1.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36aa4401a1180615f74d575571a6550081d84fc6461e9aefc0bb7b2427af098e", size = 1356488 },
|
1105 |
+
{ url = "https://files.pythonhosted.org/packages/33/23/483b110db7ffe8729d6ebea2bf74258aef51f10fef5775f99e4bac7aef69/marisa_trie-1.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce59bcd2cda9bb52b0e90cc7f36413cd86c3d0ce7224143447424aafb9f4aa48", size = 1302334 },
|
1106 |
+
{ url = "https://files.pythonhosted.org/packages/1c/6f/46c2be99ce925985127fdf78900f1673bce8cb72debfebee6dccd11032c6/marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f4cd800704a5fc57e53c39c3a6b0c9b1519ebdbcb644ede3ee67a06eb542697d", size = 2202624 },
|
1107 |
+
{ url = "https://files.pythonhosted.org/packages/fd/b6/ef642327dbd4ec35be55d5682520b8f70fca98a54024f441ef2732f6b305/marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2428b495003c189695fb91ceeb499f9fcced3a2dce853e17fa475519433c67ff", size = 2364206 },
|
1108 |
+
{ url = "https://files.pythonhosted.org/packages/69/04/ef8197a79d0ab5043b781cc9b457bd11b81d4204fe78adf7625a67f48c21/marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:735c363d9aaac82eaf516a28f7c6b95084c2e176d8231c87328dc80e112a9afa", size = 2304801 },
|
1109 |
+
{ url = "https://files.pythonhosted.org/packages/03/72/f87564d653daf31d8f33d9bf0121e99ccc21f18f5c485fb404ba06abc10e/marisa_trie-1.2.1-cp313-cp313-win32.whl", hash = "sha256:eba6ca45500ca1a042466a0684aacc9838e7f20fe2605521ee19f2853062798f", size = 128799 },
|
1110 |
+
{ url = "https://files.pythonhosted.org/packages/27/40/5f9eb8b73030cc4b0d6817176e66079a62a2ddd9d5530da54f8011473428/marisa_trie-1.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:aa7cd17e1c690ce96c538b2f4aae003d9a498e65067dd433c52dd069009951d4", size = 149035 },
|
1111 |
+
]
|
1112 |
+
|
1113 |
[[package]]
|
1114 |
name = "markdown-it-py"
|
1115 |
version = "3.0.0"
|