davidpomerenke commited on
Commit
913253a
·
verified ·
1 Parent(s): 7fce0be

Upload from GitHub Actions: Use FLORES+ via Huggingface

Browse files
README.md CHANGED
@@ -43,6 +43,15 @@ For tag meaning, see https://huggingface.co/spaces/leaderboards/LeaderboardsExpl
43
 
44
  _Tracking language proficiency of AI models for every language_
45
 
 
 
 
 
 
 
 
 
46
  ```bash
47
- uv run evals/main.py
 
48
  ```
 
43
 
44
  _Tracking language proficiency of AI models for every language_
45
 
46
+ ## Evaluate
47
+
48
+ ```bash
49
+ uv run --extra dev evals/main.py
50
+ ```
51
+
52
+ ## Explore
53
+
54
  ```bash
55
+ uv run evals/backend.py
56
+ cd frontend && npm i && npm start
57
  ```
evals/backend.py CHANGED
@@ -23,7 +23,7 @@ def mean(lst):
23
  task_metrics = [
24
  "translation_from_bleu",
25
  "translation_to_bleu",
26
- # "classification_accuracy",
27
  "mmlu_accuracy",
28
  ]
29
 
 
23
  task_metrics = [
24
  "translation_from_bleu",
25
  "translation_to_bleu",
26
+ "classification_accuracy",
27
  "mmlu_accuracy",
28
  ]
29
 
evals/datasets_/flores.py CHANGED
@@ -1,15 +1,19 @@
1
- from langcodes import Language, standardize_tag
2
- import pandas as pd
3
- import os
4
  import re
5
 
6
- flores_dir = "data/floresp-v2.0-rc.3/dev"
 
 
 
 
 
 
7
 
8
- def flores_sentences(language) -> list[str] | None:
9
- try:
10
- return open(f"{flores_dir}/dev.{language.flores_path}").readlines()
11
- except FileNotFoundError:
12
  return None
 
 
13
 
14
  def aggregate_flores_paths(flores_paths):
15
  # takes a list of paths from the same language but different scripts
@@ -22,20 +26,15 @@ def aggregate_flores_paths(flores_paths):
22
  ]
23
  return flores_paths.values[populations.index(max(populations))]
24
 
25
- flores = pd.DataFrame(
26
- [f.split(".")[1] for f in os.listdir(flores_dir)],
27
- columns=["flores_path"],
28
- )
29
  flores["bcp_47"] = flores["flores_path"].apply(
30
  lambda x: standardize_tag(x, macro=True),
31
  )
32
  # ignore script (language is language)
33
  flores["bcp_47"] = flores["bcp_47"].apply(
34
- lambda x: re.sub(r"-[A-Z][a-z]+$", "", x)
35
  )
36
  flores = (
37
- flores.groupby("bcp_47")
38
- .agg({"flores_path": aggregate_flores_paths})
39
- .reset_index()
40
  )
41
-
 
 
 
 
1
  import re
2
 
3
+ import pandas as pd
4
+ from datasets_.util import _get_dataset_config_names, _load_dataset
5
+ from langcodes import Language, standardize_tag
6
+
7
+ slug = "openlanguagedata/flores_plus"
8
+ splits = _get_dataset_config_names(slug)
9
+ splits.remove("default")
10
 
11
+
12
+ def flores_sentences(language) -> pd.DataFrame | None:
13
+ if language.flores_path not in splits:
 
14
  return None
15
+ return _load_dataset(slug, subset=language.flores_path, split="dev").to_pandas()
16
+
17
 
18
  def aggregate_flores_paths(flores_paths):
19
  # takes a list of paths from the same language but different scripts
 
26
  ]
27
  return flores_paths.values[populations.index(max(populations))]
28
 
29
+
30
+ flores = pd.DataFrame(splits, columns=["flores_path"])
 
 
31
  flores["bcp_47"] = flores["flores_path"].apply(
32
  lambda x: standardize_tag(x, macro=True),
33
  )
34
  # ignore script (language is language)
35
  flores["bcp_47"] = flores["bcp_47"].apply(
36
+ lambda x: re.sub(r"-[A-Z][a-z0-9\-]+$", "", x)
37
  )
38
  flores = (
39
+ flores.groupby("bcp_47").agg({"flores_path": aggregate_flores_paths}).reset_index()
 
 
40
  )
 
evals/download_data.py CHANGED
@@ -24,9 +24,6 @@ DATA_DIR = project_root / "data"
24
  FLEURS_BASE_URL = "https://huggingface.co/datasets/google/fleurs/resolve/main/data"
25
  FLEURS_TARGET_DIR = DATA_DIR / "fleurs"
26
 
27
- FLORES_PLUS_HF_ID = "openlanguagedata/flores_plus"
28
- FLORES_TARGET_DIR = DATA_DIR / "floresp-v2.0-rc.3" / "dev_parquet" # Note: Saving as parquet
29
-
30
  GLOTTOLOG_URL = "https://cdstar.shh.mpg.de/bitstreams/EAEA0-B44E-8CEC-EA65-0/glottolog_languoid.zip" # Assumed direct link from https://glottolog.org/meta/downloads
31
  GLOTTOLOG_TARGET_DIR = DATA_DIR / "glottolog_languoid.csv"
32
  GLOTTOLOG_CSV_NAME = "languoid.csv"
@@ -142,37 +139,6 @@ def download_fleurs_data():
142
  else:
143
  print(f"Found extracted audio: {audio_extracted_marker}")
144
 
145
- def download_flores_plus_data():
146
- """Downloads Flores+ data using Hugging Face datasets library."""
147
- print("\n--- Downloading Flores+ Data (requires HF login & accepted terms) ---")
148
- FLORES_TARGET_DIR.mkdir(parents=True, exist_ok=True)
149
-
150
- try:
151
- # Check login status first
152
- token = huggingface_hub.HfFolder.get_token()
153
- if not token:
154
- print("Hugging Face token not found. Please log in using `huggingface-cli login`.")
155
- print("You also need to accept the terms for 'openlanguagedata/flores_plus' on the HF website.")
156
- return
157
-
158
- print(f"Attempting to download '{FLORES_PLUS_HF_ID}' (dev split)...")
159
- # Load only the 'dev' split
160
- ds = load_dataset(FLORES_PLUS_HF_ID, split='dev', verification_mode='no_checks')
161
-
162
- # Save as parquet files, potentially one per language if needed later
163
- # For simplicity now, save the whole dev split as one parquet file
164
- target_file = FLORES_TARGET_DIR / "dev_split.parquet"
165
- print(f"Saving dev split to {target_file}...")
166
- ds.to_parquet(target_file)
167
- print("Flores+ dev split downloaded and saved as parquet.")
168
-
169
- except huggingface_hub.utils.GatedRepoError:
170
- print(f"Error: Access to '{FLORES_PLUS_HF_ID}' is gated.")
171
- print("Please ensure you are logged in (`huggingface-cli login`) and have accepted the terms ")
172
- print(f"on the dataset page: https://huggingface.co/datasets/{FLORES_PLUS_HF_ID}")
173
- except Exception as e:
174
- print(f"An error occurred downloading or saving Flores+: {e}")
175
-
176
 
177
  def download_glottolog_data():
178
  """Downloads and extracts Glottolog languoid CSV."""
@@ -227,53 +193,6 @@ def download_spbleu_data():
227
  else:
228
  print(f"Found: {target_dict_file}")
229
 
230
- # --- Conversion Function ---
231
-
232
- def convert_flores_parquet_to_text():
233
- """Converts the downloaded Flores+ parquet dev split to text files."""
234
- print("\n--- Converting Flores+ Parquet to Text Files ---")
235
- parquet_file = FLORES_TARGET_DIR / "dev_split.parquet"
236
- text_dir = project_root / "data" / "floresp-v2.0-rc.3" / "dev" # Original expected dir
237
-
238
- if not parquet_file.exists():
239
- print(f"Parquet file not found: {parquet_file}. Skipping conversion.")
240
- return
241
-
242
- try:
243
- print(f"Reading parquet file: {parquet_file}")
244
- df = pd.read_parquet(parquet_file)
245
- print(f"Read {len(df)} rows from parquet.")
246
-
247
- if not all(col in df.columns for col in ['iso_639_3', 'iso_15924', 'text']):
248
- print("Error: Parquet file missing required columns (iso_639_3, iso_15924, text).")
249
- return
250
-
251
- text_dir.mkdir(parents=True, exist_ok=True)
252
- print(f"Target directory for text files: {text_dir}")
253
-
254
- # Group by language and script to create individual files
255
- grouped = df.groupby(['iso_639_3', 'iso_15924'])
256
- count = 0
257
- for (lang, script), group in grouped:
258
- target_filename = f"dev.{lang}_{script}"
259
- target_path = text_dir / target_filename
260
- print(f"Writing {len(group)} sentences to {target_path}...")
261
- try:
262
- with open(target_path, 'w', encoding='utf-8') as f:
263
- for sentence in group['text']:
264
- f.write(sentence + '\n')
265
- count += 1
266
- except Exception as e:
267
- print(f"Error writing file {target_path}: {e}")
268
-
269
- print(f"Successfully wrote {count} language/script files to {text_dir}.")
270
-
271
- except ImportError:
272
- print("Error: pandas or pyarrow might be missing. Cannot read parquet.")
273
- print("Please install them: pip install pandas pyarrow")
274
- except Exception as e:
275
- print(f"An error occurred during parquet conversion: {e}")
276
-
277
 
278
  # --- Main Execution ---
279
 
@@ -282,8 +201,6 @@ def main():
282
  print("Starting data download process...")
283
  DATA_DIR.mkdir(exist_ok=True)
284
 
285
- download_flores_plus_data()
286
- convert_flores_parquet_to_text()
287
  #download_fleurs_data()
288
  download_glottolog_data()
289
  download_scriptcodes_data()
 
24
  FLEURS_BASE_URL = "https://huggingface.co/datasets/google/fleurs/resolve/main/data"
25
  FLEURS_TARGET_DIR = DATA_DIR / "fleurs"
26
 
 
 
 
27
  GLOTTOLOG_URL = "https://cdstar.shh.mpg.de/bitstreams/EAEA0-B44E-8CEC-EA65-0/glottolog_languoid.zip" # Assumed direct link from https://glottolog.org/meta/downloads
28
  GLOTTOLOG_TARGET_DIR = DATA_DIR / "glottolog_languoid.csv"
29
  GLOTTOLOG_CSV_NAME = "languoid.csv"
 
139
  else:
140
  print(f"Found extracted audio: {audio_extracted_marker}")
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
  def download_glottolog_data():
144
  """Downloads and extracts Glottolog languoid CSV."""
 
193
  else:
194
  print(f"Found: {target_dict_file}")
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
  # --- Main Execution ---
198
 
 
201
  print("Starting data download process...")
202
  DATA_DIR.mkdir(exist_ok=True)
203
 
 
 
204
  #download_fleurs_data()
205
  download_glottolog_data()
206
  download_scriptcodes_data()
evals/main.py CHANGED
@@ -9,8 +9,8 @@ from tqdm.asyncio import tqdm_asyncio
9
  # ===== config =====
10
 
11
  n_sentences = 10
12
- n_languages = 20
13
- n_models = 30
14
 
15
  # ===== run evaluation and aggregate results =====
16
 
@@ -31,9 +31,8 @@ async def evaluate():
31
  ]
32
  # filter out combinations that have already been evaluated
33
  combis = pd.DataFrame(combis, columns=["model", "bcp_47", "task"])
34
- combis = combis.merge(old_results, on=["model", "bcp_47", "task"], how="left")
35
- combis = combis[combis["metric"].isna()][["model", "bcp_47", "task"]]
36
- print(combis["model"].unique())
37
  # run evaluations
38
  results = [
39
  tasks[task_name](model, bcp_47, i)
@@ -51,7 +50,7 @@ async def evaluate():
51
  .reset_index()
52
  )
53
  # save results
54
- results = pd.concat([old_results, results])
55
  results = results.sort_values(by=["model", "bcp_47", "task", "metric"])
56
  results.to_json("results.json", **args)
57
 
 
9
  # ===== config =====
10
 
11
  n_sentences = 10
12
+ n_languages = 10
13
+ n_models = 10
14
 
15
  # ===== run evaluation and aggregate results =====
16
 
 
31
  ]
32
  # filter out combinations that have already been evaluated
33
  combis = pd.DataFrame(combis, columns=["model", "bcp_47", "task"])
34
+ # combis = combis.merge(old_results, on=["model", "bcp_47", "task"], how="left")
35
+ # combis = combis[combis["metric"].isna()][["model", "bcp_47", "task"]]
 
36
  # run evaluations
37
  results = [
38
  tasks[task_name](model, bcp_47, i)
 
50
  .reset_index()
51
  )
52
  # save results
53
+ # results = pd.concat([old_results, results])
54
  results = results.sort_values(by=["model", "bcp_47", "task", "metric"])
55
  results.to_json("results.json", **args)
56
 
evals/models.py CHANGED
@@ -12,6 +12,7 @@ from huggingface_hub import AsyncInferenceClient, HfApi
12
  from joblib.memory import Memory
13
  from openai import AsyncOpenAI
14
  from requests import HTTPError, get
 
15
 
16
  # for development purposes, all languages will be evaluated on the fast models
17
  # and only a sample of languages will be evaluated on all models
@@ -111,11 +112,17 @@ huggingface_rate_limit = AsyncLimiter(max_rate=5, time_period=1)
111
  @cache
112
  async def complete(**kwargs):
113
  async with openrouter_rate_limit:
114
- response = await client.chat.completions.create(**kwargs)
 
 
 
 
 
 
 
115
  if not response.choices:
116
  raise Exception(response)
117
- return response
118
-
119
 
120
  @cache
121
  async def transcribe_elevenlabs(path, model):
@@ -199,12 +206,13 @@ def get_cost(row):
199
 
200
  @cache
201
  def load_models(date: date):
202
- popular_models = (
203
- get_historical_popular_models(date.today())[:15]
204
- + get_current_popular_models(date.today())[:15]
205
- )
206
- popular_models = [m["slug"] for m in popular_models]
207
- models = set(important_models + popular_models) - set(blocklist)
 
208
  models = pd.DataFrame(sorted(list(models)), columns=["id"])
209
  or_metadata = models["id"].apply(get_or_metadata)
210
  hf_metadata = or_metadata.apply(get_hf_metadata)
 
12
  from joblib.memory import Memory
13
  from openai import AsyncOpenAI
14
  from requests import HTTPError, get
15
+ from openai import PermissionDeniedError
16
 
17
  # for development purposes, all languages will be evaluated on the fast models
18
  # and only a sample of languages will be evaluated on all models
 
112
  @cache
113
  async def complete(**kwargs):
114
  async with openrouter_rate_limit:
115
+ try:
116
+ response = await client.chat.completions.create(**kwargs)
117
+ except PermissionDeniedError as e:
118
+ if e["error"]["metadata"]["reason"] in ["violence", "hate", "sexual", "self-harm", "harassment"]:
119
+ print(e)
120
+ return None
121
+ else:
122
+ raise e
123
  if not response.choices:
124
  raise Exception(response)
125
+ return response.choices[0].message.content.strip()
 
126
 
127
  @cache
128
  async def transcribe_elevenlabs(path, model):
 
206
 
207
  @cache
208
  def load_models(date: date):
209
+ # popular_models = (
210
+ # get_historical_popular_models(date.today())[:15]
211
+ # + get_current_popular_models(date.today())[:15]
212
+ # )
213
+ # popular_models = [m["slug"] for m in popular_models]
214
+ # models = set(important_models + popular_models) - set(blocklist)
215
+ models = set(important_models) - set(blocklist)
216
  models = pd.DataFrame(sorted(list(models)), columns=["id"])
217
  or_metadata = models["id"].apply(get_or_metadata)
218
  hf_metadata = or_metadata.apply(get_hf_metadata)
evals/tasks.py CHANGED
@@ -30,12 +30,12 @@ async def translate_and_evaluate(model, bcp_47, sentence_nr, mode="from"):
30
  pass
31
  case "to":
32
  original_language, target_language = target_language, original_language
33
- if not flores_sentences(original_language) or not flores_sentences(target_language):
34
  return []
35
- original_sentence = flores_sentences(original_language)[sentence_nr].strip()
36
- target_sentence = flores_sentences(target_language)[sentence_nr].strip()
37
  script = script_name(target_language.flores_path.split("_")[1])
38
- reply = await complete(
39
  model=model,
40
  messages=[
41
  {
@@ -46,8 +46,7 @@ async def translate_and_evaluate(model, bcp_47, sentence_nr, mode="from"):
46
  temperature=0,
47
  max_tokens=1024,
48
  )
49
- prediction = reply.choices[0].message.content.strip()
50
- if prediction.strip():
51
  bleu_score = bleu.compute(
52
  predictions=[prediction],
53
  references=[target_sentence],
@@ -71,21 +70,15 @@ async def translate_and_evaluate(model, bcp_47, sentence_nr, mode="from"):
71
  )
72
  ]
73
 
74
-
75
- # metadata = pd.read_csv("data/floresp-v2.0-rc.3/metadata_dev.tsv", sep="\t")
76
-
77
-
78
  async def classify_and_evaluate(model, bcp_47, nr):
79
  language = languages[languages["bcp_47"] == bcp_47].iloc[0]
80
  sentences = flores_sentences(language)
81
- if not sentences:
82
  return []
83
- sentences = pd.DataFrame(sentences, columns=["text"])
84
- sentences = pd.concat([metadata, sentences], axis=1)
85
  sentences = sentences.dropna(subset=["topic"])
86
  sentences["topic"] = sentences["topic"].str.lower()
87
  paragraphs = (
88
- sentences.groupby("URL").agg({"text": " ".join, "topic": "first"}).reset_index()
89
  )
90
  top_topics = paragraphs.value_counts("topic").head(5).index
91
  paragraphs = paragraphs[paragraphs["topic"].isin(top_topics)]
@@ -95,7 +88,7 @@ async def classify_and_evaluate(model, bcp_47, nr):
95
  for t in top_topics
96
  ]
97
  ).sample(frac=1, random_state=nr)
98
- test_paragraphs = paragraphs[~paragraphs["URL"].isin(examples["URL"])].sample(
99
  frac=1, random_state=42
100
  )
101
  test_paragraph = test_paragraphs.iloc[nr]
@@ -112,7 +105,7 @@ async def classify_and_evaluate(model, bcp_47, nr):
112
  # some models have poor tokenization for some languages, and the prompt for this task is relatively long, so it sometimes exceeds the context window
113
  # this is not just to blame on the context window but mostly on the model's tokenization, so we assign 0 accuracy in this case
114
  try:
115
- reply = await complete(
116
  model=model,
117
  messages=[
118
  *messages,
@@ -124,12 +117,11 @@ async def classify_and_evaluate(model, bcp_47, nr):
124
  temperature=0,
125
  max_tokens=30,
126
  )
127
- response = reply.choices[0].message.content.strip().lower()
128
  true = test_paragraph.topic
129
  others = [t for t in top_topics if t != true]
130
  acc = int(
131
- response.startswith(true)
132
- or (true in response and not any(o in response for o in others))
133
  )
134
  except Exception as e:
135
  if "`inputs` tokens + `max_new_tokens` must be <= 4097" in str(e):
@@ -160,7 +152,7 @@ def corrupt_sentence(sentence):
160
  async def mlm_and_evaluate(model, language_bcp_47, nr):
161
  language = languages[languages["bcp_47"] == language_bcp_47].iloc[0]
162
  sentences = flores_sentences(language)
163
- if not sentences:
164
  return []
165
  sentences = pd.DataFrame(sentences, columns=["text"])
166
  sentences["corrupt_text"] = sentences["text"].apply(corrupt_sentence)
@@ -175,7 +167,7 @@ async def mlm_and_evaluate(model, language_bcp_47, nr):
175
  {"role": "user", "content": example.corrupt_text},
176
  {"role": "assistant", "content": example.text},
177
  ]
178
- reply = await complete(
179
  model=model,
180
  messages=[
181
  *messages,
@@ -187,7 +179,6 @@ async def mlm_and_evaluate(model, language_bcp_47, nr):
187
  temperature=0,
188
  max_tokens=1024,
189
  )
190
- prediction = reply.choices[0].message.content.strip()
191
  chrf_score = chrf.compute(predictions=[prediction], references=[test_sentence.text])
192
  return [
193
  {
@@ -224,13 +215,13 @@ async def mmlu_and_evaluate(model, language_bcp_47, nr):
224
  ]
225
  messages += [{"role": "user", "content": format_item(task)}]
226
  try:
227
- reply = await complete(
228
  model=model,
229
  messages=messages,
230
  temperature=0,
231
  max_tokens=1,
232
  )
233
- acc = int(reply.choices[0].message.content[:1].strip() == task["answer"])
234
  except Exception as e:
235
  if "ResponsibleAIPolicyViolation" in str(e):
236
  acc = 0
@@ -282,7 +273,7 @@ async def transcribe_and_evaluate(model, language_bcp_47, nr):
282
  tasks = {
283
  "translation_from": partial(translate_and_evaluate, mode="from"),
284
  "translation_to": partial(translate_and_evaluate, mode="to"),
285
- # "classification": classify_and_evaluate,
286
  # "mlm": mlm_and_evaluate,
287
  "mmlu": mmlu_and_evaluate,
288
  # "asr": transcribe_and_evaluate,
 
30
  pass
31
  case "to":
32
  original_language, target_language = target_language, original_language
33
+ if flores_sentences(original_language) is None or flores_sentences(target_language) is None:
34
  return []
35
+ original_sentence = flores_sentences(original_language)["text"][sentence_nr].strip()
36
+ target_sentence = flores_sentences(target_language)["text"][sentence_nr].strip()
37
  script = script_name(target_language.flores_path.split("_")[1])
38
+ prediction = await complete(
39
  model=model,
40
  messages=[
41
  {
 
46
  temperature=0,
47
  max_tokens=1024,
48
  )
49
+ if prediction:
 
50
  bleu_score = bleu.compute(
51
  predictions=[prediction],
52
  references=[target_sentence],
 
70
  )
71
  ]
72
 
 
 
 
 
73
  async def classify_and_evaluate(model, bcp_47, nr):
74
  language = languages[languages["bcp_47"] == bcp_47].iloc[0]
75
  sentences = flores_sentences(language)
76
+ if sentences is None:
77
  return []
 
 
78
  sentences = sentences.dropna(subset=["topic"])
79
  sentences["topic"] = sentences["topic"].str.lower()
80
  paragraphs = (
81
+ sentences.groupby("url").agg({"text": " ".join, "topic": "first"}).reset_index()
82
  )
83
  top_topics = paragraphs.value_counts("topic").head(5).index
84
  paragraphs = paragraphs[paragraphs["topic"].isin(top_topics)]
 
88
  for t in top_topics
89
  ]
90
  ).sample(frac=1, random_state=nr)
91
+ test_paragraphs = paragraphs[~paragraphs["url"].isin(examples["url"])].sample(
92
  frac=1, random_state=42
93
  )
94
  test_paragraph = test_paragraphs.iloc[nr]
 
105
  # some models have poor tokenization for some languages, and the prompt for this task is relatively long, so it sometimes exceeds the context window
106
  # this is not just to blame on the context window but mostly on the model's tokenization, so we assign 0 accuracy in this case
107
  try:
108
+ pred = await complete(
109
  model=model,
110
  messages=[
111
  *messages,
 
117
  temperature=0,
118
  max_tokens=30,
119
  )
 
120
  true = test_paragraph.topic
121
  others = [t for t in top_topics if t != true]
122
  acc = int(
123
+ pred.startswith(true)
124
+ or (true in pred and not any(o in pred for o in others))
125
  )
126
  except Exception as e:
127
  if "`inputs` tokens + `max_new_tokens` must be <= 4097" in str(e):
 
152
  async def mlm_and_evaluate(model, language_bcp_47, nr):
153
  language = languages[languages["bcp_47"] == language_bcp_47].iloc[0]
154
  sentences = flores_sentences(language)
155
+ if sentences is None:
156
  return []
157
  sentences = pd.DataFrame(sentences, columns=["text"])
158
  sentences["corrupt_text"] = sentences["text"].apply(corrupt_sentence)
 
167
  {"role": "user", "content": example.corrupt_text},
168
  {"role": "assistant", "content": example.text},
169
  ]
170
+ prediction = await complete(
171
  model=model,
172
  messages=[
173
  *messages,
 
179
  temperature=0,
180
  max_tokens=1024,
181
  )
 
182
  chrf_score = chrf.compute(predictions=[prediction], references=[test_sentence.text])
183
  return [
184
  {
 
215
  ]
216
  messages += [{"role": "user", "content": format_item(task)}]
217
  try:
218
+ response = await complete(
219
  model=model,
220
  messages=messages,
221
  temperature=0,
222
  max_tokens=1,
223
  )
224
+ acc = int(response[:1].strip() == task["answer"])
225
  except Exception as e:
226
  if "ResponsibleAIPolicyViolation" in str(e):
227
  acc = 0
 
273
  tasks = {
274
  "translation_from": partial(translate_and_evaluate, mode="from"),
275
  "translation_to": partial(translate_and_evaluate, mode="to"),
276
+ "classification": classify_and_evaluate,
277
  # "mlm": mlm_and_evaluate,
278
  "mmlu": mmlu_and_evaluate,
279
  # "asr": transcribe_and_evaluate,
languages.json CHANGED
@@ -485,7 +485,7 @@
485
  "language_name":"North Levantine Arabic",
486
  "autonym":"العامية",
487
  "family":"Afro-Asiatic",
488
- "flores_path":"apc_Arab",
489
  "fleurs_tag":null,
490
  "commonvoice_hours":null,
491
  "commonvoice_locale":null,
@@ -876,10 +876,10 @@
876
  "in_benchmark":true
877
  },
878
  {
879
- "bcp_47":"mwr",
880
  "speakers":15913080,
881
- "language_name":"Marwari",
882
- "autonym":"Marwari",
883
  "family":"Indo-European",
884
  "flores_path":null,
885
  "fleurs_tag":null,
@@ -888,10 +888,10 @@
888
  "in_benchmark":false
889
  },
890
  {
891
- "bcp_47":"bgc",
892
  "speakers":15913080,
893
- "language_name":"Haryanvi",
894
- "autonym":"हरियाणवी",
895
  "family":"Indo-European",
896
  "flores_path":null,
897
  "fleurs_tag":null,
@@ -1073,7 +1073,7 @@
1073
  "language_name":"Akan",
1074
  "autonym":"Akan",
1075
  "family":"Atlantic-Congo",
1076
- "flores_path":"twi_Latn",
1077
  "fleurs_tag":null,
1078
  "commonvoice_hours":0.2,
1079
  "commonvoice_locale":"tw",
@@ -1171,7 +1171,7 @@
1171
  "family":"Afro-Asiatic",
1172
  "flores_path":"tir_Ethi",
1173
  "fleurs_tag":null,
1174
- "commonvoice_hours":0.0,
1175
  "commonvoice_locale":"ti",
1176
  "in_benchmark":true
1177
  },
@@ -1195,7 +1195,7 @@
1195
  "family":"Atlantic-Congo",
1196
  "flores_path":"lua_Latn",
1197
  "fleurs_tag":null,
1198
- "commonvoice_hours":1.9,
1199
  "commonvoice_locale":"lua",
1200
  "in_benchmark":true
1201
  },
@@ -1955,18 +1955,6 @@
1955
  "commonvoice_locale":"gom",
1956
  "in_benchmark":true
1957
  },
1958
- {
1959
- "bcp_47":"kln",
1960
- "speakers":4068120,
1961
- "language_name":"Kalenjin",
1962
- "autonym":"Kalenjin",
1963
- "family":"Nilotic",
1964
- "flores_path":null,
1965
- "fleurs_tag":null,
1966
- "commonvoice_hours":43.0,
1967
- "commonvoice_locale":"kln",
1968
- "in_benchmark":false
1969
- },
1970
  {
1971
  "bcp_47":"kam",
1972
  "speakers":4068120,
@@ -1979,6 +1967,18 @@
1979
  "commonvoice_locale":"kam",
1980
  "in_benchmark":true
1981
  },
 
 
 
 
 
 
 
 
 
 
 
 
1982
  {
1983
  "bcp_47":"bjn",
1984
  "speakers":4010288,
@@ -2124,10 +2124,10 @@
2124
  "in_benchmark":true
2125
  },
2126
  {
2127
- "bcp_47":"gbm",
2128
  "speakers":3580443,
2129
- "language_name":"Garhwali",
2130
- "autonym":"Garhwali",
2131
  "family":"Indo-European",
2132
  "flores_path":null,
2133
  "fleurs_tag":null,
@@ -2136,10 +2136,10 @@
2136
  "in_benchmark":false
2137
  },
2138
  {
2139
- "bcp_47":"lmn",
2140
  "speakers":3580443,
2141
- "language_name":"Lambadi",
2142
- "autonym":"Lambadi",
2143
  "family":"Indo-European",
2144
  "flores_path":null,
2145
  "fleurs_tag":null,
@@ -2352,27 +2352,27 @@
2352
  "in_benchmark":true
2353
  },
2354
  {
2355
- "bcp_47":"efi",
2356
  "speakers":2996392,
2357
- "language_name":"Efik",
2358
- "autonym":"Efik",
2359
  "family":"Atlantic-Congo",
2360
  "flores_path":null,
2361
  "fleurs_tag":null,
2362
- "commonvoice_hours":null,
2363
- "commonvoice_locale":null,
2364
  "in_benchmark":false
2365
  },
2366
  {
2367
- "bcp_47":"ibb",
2368
  "speakers":2996392,
2369
- "language_name":"Ibibio",
2370
- "autonym":"Ibibio",
2371
  "family":"Atlantic-Congo",
2372
  "flores_path":null,
2373
  "fleurs_tag":null,
2374
- "commonvoice_hours":11.0,
2375
- "commonvoice_locale":"ibb",
2376
  "in_benchmark":false
2377
  },
2378
  {
@@ -2544,11 +2544,11 @@
2544
  "in_benchmark":false
2545
  },
2546
  {
2547
- "bcp_47":"wbq",
2548
  "speakers":2386962,
2549
- "language_name":"Waddar",
2550
- "autonym":"Waddar",
2551
- "family":"Dravidian",
2552
  "flores_path":null,
2553
  "fleurs_tag":null,
2554
  "commonvoice_hours":null,
@@ -2556,11 +2556,11 @@
2556
  "in_benchmark":false
2557
  },
2558
  {
2559
- "bcp_47":"sck",
2560
  "speakers":2386962,
2561
- "language_name":"Sadri",
2562
- "autonym":"Sadri",
2563
- "family":"Indo-European",
2564
  "flores_path":null,
2565
  "fleurs_tag":null,
2566
  "commonvoice_hours":null,
@@ -2724,10 +2724,10 @@
2724
  "in_benchmark":false
2725
  },
2726
  {
2727
- "bcp_47":"khn",
2728
  "speakers":1989135,
2729
- "language_name":"Khandesi",
2730
- "autonym":"Khandesi",
2731
  "family":"Indo-European",
2732
  "flores_path":null,
2733
  "fleurs_tag":null,
@@ -2748,10 +2748,10 @@
2748
  "in_benchmark":false
2749
  },
2750
  {
2751
- "bcp_47":"wbr",
2752
  "speakers":1989135,
2753
- "language_name":"Wagdi",
2754
- "autonym":"Wagdi",
2755
  "family":"Indo-European",
2756
  "flores_path":null,
2757
  "fleurs_tag":null,
@@ -3535,7 +3535,7 @@
3535
  "family":null,
3536
  "flores_path":"eus_Latn",
3537
  "fleurs_tag":null,
3538
- "commonvoice_hours":377.0,
3539
  "commonvoice_locale":"eu",
3540
  "in_benchmark":true
3541
  },
@@ -3559,7 +3559,7 @@
3559
  "family":"Abkhaz-Adyge",
3560
  "flores_path":null,
3561
  "fleurs_tag":null,
3562
- "commonvoice_hours":61.0,
3563
  "commonvoice_locale":"kbd",
3564
  "in_benchmark":false
3565
  },
@@ -3684,10 +3684,10 @@
3684
  "in_benchmark":true
3685
  },
3686
  {
3687
- "bcp_47":"ksb",
3688
  "speakers":995398,
3689
- "language_name":"Shambala",
3690
- "autonym":"Kishambaa",
3691
  "family":"Atlantic-Congo",
3692
  "flores_path":null,
3693
  "fleurs_tag":null,
@@ -3696,10 +3696,10 @@
3696
  "in_benchmark":false
3697
  },
3698
  {
3699
- "bcp_47":"bez",
3700
  "speakers":995398,
3701
- "language_name":"Bena",
3702
- "autonym":"Hibena",
3703
  "family":"Atlantic-Congo",
3704
  "flores_path":null,
3705
  "fleurs_tag":null,
@@ -4512,27 +4512,27 @@
4512
  "in_benchmark":false
4513
  },
4514
  {
4515
- "bcp_47":"nhw",
4516
  "speakers":501735,
4517
- "language_name":"Western Huasteca Nahuatl",
4518
- "autonym":"Western Huasteca Nahuatl",
4519
  "family":"Uto-Aztecan",
4520
  "flores_path":null,
4521
  "fleurs_tag":null,
4522
- "commonvoice_hours":null,
4523
- "commonvoice_locale":null,
4524
  "in_benchmark":false
4525
  },
4526
  {
4527
- "bcp_47":"nhe",
4528
  "speakers":501735,
4529
- "language_name":"Eastern Huasteca Nahuatl",
4530
- "autonym":"Eastern Huasteca Nahuatl",
4531
  "family":"Uto-Aztecan",
4532
  "flores_path":null,
4533
  "fleurs_tag":null,
4534
- "commonvoice_hours":0.0,
4535
- "commonvoice_locale":"nhe",
4536
  "in_benchmark":false
4537
  },
4538
  {
@@ -4553,11 +4553,11 @@
4553
  "language_name":"Kara-Kalpak",
4554
  "autonym":"Kara-Kalpak",
4555
  "family":"Turkic",
4556
- "flores_path":null,
4557
  "fleurs_tag":null,
4558
  "commonvoice_hours":0.0,
4559
  "commonvoice_locale":"kaa",
4560
- "in_benchmark":false
4561
  },
4562
  {
4563
  "bcp_47":"gju",
@@ -4715,18 +4715,6 @@
4715
  "commonvoice_locale":null,
4716
  "in_benchmark":false
4717
  },
4718
- {
4719
- "bcp_47":"jmc",
4720
- "speakers":433291,
4721
- "language_name":"Machame",
4722
- "autonym":"Kimachame",
4723
- "family":"Atlantic-Congo",
4724
- "flores_path":null,
4725
- "fleurs_tag":null,
4726
- "commonvoice_hours":null,
4727
- "commonvoice_locale":null,
4728
- "in_benchmark":false
4729
- },
4730
  {
4731
  "bcp_47":"vun",
4732
  "speakers":433291,
@@ -4747,10 +4735,22 @@
4747
  "family":"Atlantic-Congo",
4748
  "flores_path":null,
4749
  "fleurs_tag":null,
4750
- "commonvoice_hours":1.2,
4751
  "commonvoice_locale":"rof",
4752
  "in_benchmark":false
4753
  },
 
 
 
 
 
 
 
 
 
 
 
 
4754
  {
4755
  "bcp_47":"kjg",
4756
  "speakers":431949,
@@ -5124,27 +5124,27 @@
5124
  "in_benchmark":false
5125
  },
5126
  {
5127
- "bcp_47":"bas",
5128
  "speakers":332940,
5129
- "language_name":"Basaa",
5130
- "autonym":"Ɓàsàa",
5131
  "family":"Atlantic-Congo",
5132
  "flores_path":null,
5133
  "fleurs_tag":null,
5134
- "commonvoice_hours":12.0,
5135
- "commonvoice_locale":"bas",
5136
  "in_benchmark":false
5137
  },
5138
  {
5139
- "bcp_47":"bax",
5140
  "speakers":332940,
5141
- "language_name":"Bamun",
5142
- "autonym":"Bamun",
5143
  "family":"Atlantic-Congo",
5144
  "flores_path":null,
5145
  "fleurs_tag":null,
5146
- "commonvoice_hours":11.0,
5147
- "commonvoice_locale":"bax",
5148
  "in_benchmark":false
5149
  },
5150
  {
@@ -5232,11 +5232,11 @@
5232
  "in_benchmark":false
5233
  },
5234
  {
5235
- "bcp_47":"bfq",
5236
  "speakers":305001,
5237
- "language_name":"Badaga",
5238
- "autonym":"Badaga",
5239
- "family":"Dravidian",
5240
  "flores_path":null,
5241
  "fleurs_tag":null,
5242
  "commonvoice_hours":null,
@@ -5244,11 +5244,11 @@
5244
  "in_benchmark":false
5245
  },
5246
  {
5247
- "bcp_47":"njo",
5248
  "speakers":305001,
5249
- "language_name":"Ao Naga",
5250
- "autonym":"Ao Naga",
5251
- "family":"Sino-Tibetan",
5252
  "flores_path":null,
5253
  "fleurs_tag":null,
5254
  "commonvoice_hours":null,
@@ -5388,10 +5388,10 @@
5388
  "in_benchmark":false
5389
  },
5390
  {
5391
- "bcp_47":"tdd",
5392
  "speakers":264864,
5393
- "language_name":"Tai Nüa",
5394
- "autonym":"Tai Nüa",
5395
  "family":"Tai-Kadai",
5396
  "flores_path":null,
5397
  "fleurs_tag":null,
@@ -5400,10 +5400,10 @@
5400
  "in_benchmark":false
5401
  },
5402
  {
5403
- "bcp_47":"khb",
5404
  "speakers":264864,
5405
- "language_name":"",
5406
- "autonym":"",
5407
  "family":"Tai-Kadai",
5408
  "flores_path":null,
5409
  "fleurs_tag":null,
@@ -5508,10 +5508,10 @@
5508
  "in_benchmark":true
5509
  },
5510
  {
5511
- "bcp_47":"sxn",
5512
  "speakers":245664,
5513
- "language_name":"Sangir",
5514
- "autonym":"Sangir",
5515
  "family":"Austronesian",
5516
  "flores_path":null,
5517
  "fleurs_tag":null,
@@ -5520,10 +5520,10 @@
5520
  "in_benchmark":false
5521
  },
5522
  {
5523
- "bcp_47":"mdr",
5524
  "speakers":245664,
5525
- "language_name":"Mandar",
5526
- "autonym":"Mandar",
5527
  "family":"Austronesian",
5528
  "flores_path":null,
5529
  "fleurs_tag":null,
@@ -5904,10 +5904,10 @@
5904
  "in_benchmark":false
5905
  },
5906
  {
5907
- "bcp_47":"bss",
5908
  "speakers":149823,
5909
- "language_name":"Akoose",
5910
- "autonym":"Akoose",
5911
  "family":"Atlantic-Congo",
5912
  "flores_path":null,
5913
  "fleurs_tag":null,
@@ -5916,10 +5916,10 @@
5916
  "in_benchmark":false
5917
  },
5918
  {
5919
- "bcp_47":"kkj",
5920
  "speakers":149823,
5921
- "language_name":"Kako",
5922
- "autonym":"Kakɔ",
5923
  "family":"Atlantic-Congo",
5924
  "flores_path":null,
5925
  "fleurs_tag":null,
@@ -6367,7 +6367,7 @@
6367
  "family":"Indo-European",
6368
  "flores_path":null,
6369
  "fleurs_tag":null,
6370
- "commonvoice_hours":3.1,
6371
  "commonvoice_locale":"btv",
6372
  "in_benchmark":false
6373
  },
@@ -7272,11 +7272,11 @@
7272
  "in_benchmark":false
7273
  },
7274
  {
7275
- "bcp_47":"bku",
7276
  "speakers":7970,
7277
- "language_name":"Buhid",
7278
- "autonym":"Buhid",
7279
- "family":"Austronesian",
7280
  "flores_path":null,
7281
  "fleurs_tag":null,
7282
  "commonvoice_hours":null,
@@ -7284,11 +7284,11 @@
7284
  "in_benchmark":false
7285
  },
7286
  {
7287
- "bcp_47":"twq",
7288
  "speakers":7970,
7289
- "language_name":"Tasawaq",
7290
- "autonym":"Tasawaq Senni",
7291
- "family":"Songhay",
7292
  "flores_path":null,
7293
  "fleurs_tag":null,
7294
  "commonvoice_hours":null,
@@ -7836,11 +7836,11 @@
7836
  "in_benchmark":false
7837
  },
7838
  {
7839
- "bcp_47":"crl",
7840
  "speakers":377,
7841
- "language_name":"Northern East Cree",
7842
- "autonym":"Northern East Cree",
7843
- "family":"Algic",
7844
  "flores_path":null,
7845
  "fleurs_tag":null,
7846
  "commonvoice_hours":null,
@@ -7848,11 +7848,11 @@
7848
  "in_benchmark":false
7849
  },
7850
  {
7851
- "bcp_47":"kwk",
7852
  "speakers":377,
7853
- "language_name":"Kwakʼwala",
7854
- "autonym":"KwakʼWala",
7855
- "family":"Wakashan",
7856
  "flores_path":null,
7857
  "fleurs_tag":null,
7858
  "commonvoice_hours":null,
@@ -7968,11 +7968,11 @@
7968
  "in_benchmark":false
7969
  },
7970
  {
7971
- "bcp_47":"sgs",
7972
  "speakers":0,
7973
- "language_name":"Samogitian",
7974
- "autonym":"Samogitian",
7975
- "family":"Indo-European",
7976
  "flores_path":null,
7977
  "fleurs_tag":null,
7978
  "commonvoice_hours":null,
@@ -7980,11 +7980,11 @@
7980
  "in_benchmark":false
7981
  },
7982
  {
7983
- "bcp_47":"rgn",
7984
  "speakers":0,
7985
- "language_name":"Romagnol",
7986
- "autonym":"Romagnol",
7987
- "family":"Indo-European",
7988
  "flores_path":null,
7989
  "fleurs_tag":null,
7990
  "commonvoice_hours":null,
@@ -7992,22 +7992,22 @@
7992
  "in_benchmark":false
7993
  },
7994
  {
7995
- "bcp_47":"ann",
7996
  "speakers":0,
7997
- "language_name":"Obolo",
7998
- "autonym":"Obolo",
7999
- "family":"Atlantic-Congo",
8000
  "flores_path":null,
8001
  "fleurs_tag":null,
8002
- "commonvoice_hours":null,
8003
- "commonvoice_locale":null,
8004
  "in_benchmark":false
8005
  },
8006
  {
8007
- "bcp_47":"pfl",
8008
  "speakers":0,
8009
- "language_name":"Palatine German",
8010
- "autonym":"Palatine German",
8011
  "family":"Indo-European",
8012
  "flores_path":null,
8013
  "fleurs_tag":null,
@@ -8016,23 +8016,23 @@
8016
  "in_benchmark":false
8017
  },
8018
  {
8019
- "bcp_47":"osa",
8020
  "speakers":0,
8021
- "language_name":"Osage",
8022
- "autonym":"𐓏𐓘𐓻𐓘𐓻𐓟",
8023
- "family":"Siouan",
8024
  "flores_path":null,
8025
  "fleurs_tag":null,
8026
- "commonvoice_hours":null,
8027
- "commonvoice_locale":null,
8028
  "in_benchmark":false
8029
  },
8030
  {
8031
- "bcp_47":"lzh",
8032
  "speakers":0,
8033
- "language_name":"Literary Chinese",
8034
- "autonym":"Literary Chinese",
8035
- "family":"Sino-Tibetan",
8036
  "flores_path":null,
8037
  "fleurs_tag":null,
8038
  "commonvoice_hours":null,
@@ -8040,23 +8040,23 @@
8040
  "in_benchmark":false
8041
  },
8042
  {
8043
- "bcp_47":"jbo",
8044
  "speakers":0,
8045
- "language_name":"Lojban",
8046
- "autonym":"La .Lojban.",
8047
- "family":"Artificial Language",
8048
  "flores_path":null,
8049
  "fleurs_tag":null,
8050
- "commonvoice_hours":0.0,
8051
- "commonvoice_locale":"jbo",
8052
  "in_benchmark":false
8053
  },
8054
  {
8055
- "bcp_47":"io",
8056
  "speakers":0,
8057
- "language_name":"Ido",
8058
- "autonym":"Ido",
8059
- "family":"Artificial Language",
8060
  "flores_path":null,
8061
  "fleurs_tag":null,
8062
  "commonvoice_hours":null,
@@ -8064,10 +8064,10 @@
8064
  "in_benchmark":false
8065
  },
8066
  {
8067
- "bcp_47":"jut",
8068
  "speakers":0,
8069
- "language_name":"Jutish",
8070
- "autonym":"Jutish",
8071
  "family":"Indo-European",
8072
  "flores_path":null,
8073
  "fleurs_tag":null,
@@ -8076,11 +8076,11 @@
8076
  "in_benchmark":false
8077
  },
8078
  {
8079
- "bcp_47":"gez",
8080
  "speakers":0,
8081
- "language_name":"Geez",
8082
- "autonym":"Geez",
8083
- "family":"Afro-Asiatic",
8084
  "flores_path":null,
8085
  "fleurs_tag":null,
8086
  "commonvoice_hours":null,
@@ -8088,11 +8088,11 @@
8088
  "in_benchmark":false
8089
  },
8090
  {
8091
- "bcp_47":"cu",
8092
  "speakers":0,
8093
- "language_name":"Church Slavic",
8094
- "autonym":"Church Slavic",
8095
- "family":"Indo-European",
8096
  "flores_path":null,
8097
  "fleurs_tag":null,
8098
  "commonvoice_hours":null,
@@ -8112,23 +8112,23 @@
8112
  "in_benchmark":false
8113
  },
8114
  {
8115
- "bcp_47":"vot",
8116
  "speakers":0,
8117
- "language_name":"Votic",
8118
- "autonym":"Votic",
8119
- "family":"Uralic",
8120
  "flores_path":null,
8121
  "fleurs_tag":null,
8122
- "commonvoice_hours":0.1,
8123
- "commonvoice_locale":"vot",
8124
  "in_benchmark":false
8125
  },
8126
  {
8127
- "bcp_47":"cad",
8128
  "speakers":0,
8129
- "language_name":"Caddo",
8130
- "autonym":"Caddo",
8131
- "family":"Caddoan",
8132
  "flores_path":null,
8133
  "fleurs_tag":null,
8134
  "commonvoice_hours":null,
 
485
  "language_name":"North Levantine Arabic",
486
  "autonym":"العامية",
487
  "family":"Afro-Asiatic",
488
+ "flores_path":"apc_Arab_nort3139",
489
  "fleurs_tag":null,
490
  "commonvoice_hours":null,
491
  "commonvoice_locale":null,
 
876
  "in_benchmark":true
877
  },
878
  {
879
+ "bcp_47":"bgc",
880
  "speakers":15913080,
881
+ "language_name":"Haryanvi",
882
+ "autonym":"हरियाणवी",
883
  "family":"Indo-European",
884
  "flores_path":null,
885
  "fleurs_tag":null,
 
888
  "in_benchmark":false
889
  },
890
  {
891
+ "bcp_47":"mwr",
892
  "speakers":15913080,
893
+ "language_name":"Marwari",
894
+ "autonym":"Marwari",
895
  "family":"Indo-European",
896
  "flores_path":null,
897
  "fleurs_tag":null,
 
1073
  "language_name":"Akan",
1074
  "autonym":"Akan",
1075
  "family":"Atlantic-Congo",
1076
+ "flores_path":"twi_Latn_akua1239",
1077
  "fleurs_tag":null,
1078
  "commonvoice_hours":0.2,
1079
  "commonvoice_locale":"tw",
 
1171
  "family":"Afro-Asiatic",
1172
  "flores_path":"tir_Ethi",
1173
  "fleurs_tag":null,
1174
+ "commonvoice_hours":0.1,
1175
  "commonvoice_locale":"ti",
1176
  "in_benchmark":true
1177
  },
 
1195
  "family":"Atlantic-Congo",
1196
  "flores_path":"lua_Latn",
1197
  "fleurs_tag":null,
1198
+ "commonvoice_hours":2.2,
1199
  "commonvoice_locale":"lua",
1200
  "in_benchmark":true
1201
  },
 
1955
  "commonvoice_locale":"gom",
1956
  "in_benchmark":true
1957
  },
 
 
 
 
 
 
 
 
 
 
 
 
1958
  {
1959
  "bcp_47":"kam",
1960
  "speakers":4068120,
 
1967
  "commonvoice_locale":"kam",
1968
  "in_benchmark":true
1969
  },
1970
+ {
1971
+ "bcp_47":"kln",
1972
+ "speakers":4068120,
1973
+ "language_name":"Kalenjin",
1974
+ "autonym":"Kalenjin",
1975
+ "family":"Nilotic",
1976
+ "flores_path":null,
1977
+ "fleurs_tag":null,
1978
+ "commonvoice_hours":43.0,
1979
+ "commonvoice_locale":"kln",
1980
+ "in_benchmark":false
1981
+ },
1982
  {
1983
  "bcp_47":"bjn",
1984
  "speakers":4010288,
 
2124
  "in_benchmark":true
2125
  },
2126
  {
2127
+ "bcp_47":"lmn",
2128
  "speakers":3580443,
2129
+ "language_name":"Lambadi",
2130
+ "autonym":"Lambadi",
2131
  "family":"Indo-European",
2132
  "flores_path":null,
2133
  "fleurs_tag":null,
 
2136
  "in_benchmark":false
2137
  },
2138
  {
2139
+ "bcp_47":"gbm",
2140
  "speakers":3580443,
2141
+ "language_name":"Garhwali",
2142
+ "autonym":"Garhwali",
2143
  "family":"Indo-European",
2144
  "flores_path":null,
2145
  "fleurs_tag":null,
 
2352
  "in_benchmark":true
2353
  },
2354
  {
2355
+ "bcp_47":"ibb",
2356
  "speakers":2996392,
2357
+ "language_name":"Ibibio",
2358
+ "autonym":"Ibibio",
2359
  "family":"Atlantic-Congo",
2360
  "flores_path":null,
2361
  "fleurs_tag":null,
2362
+ "commonvoice_hours":11.0,
2363
+ "commonvoice_locale":"ibb",
2364
  "in_benchmark":false
2365
  },
2366
  {
2367
+ "bcp_47":"efi",
2368
  "speakers":2996392,
2369
+ "language_name":"Efik",
2370
+ "autonym":"Efik",
2371
  "family":"Atlantic-Congo",
2372
  "flores_path":null,
2373
  "fleurs_tag":null,
2374
+ "commonvoice_hours":null,
2375
+ "commonvoice_locale":null,
2376
  "in_benchmark":false
2377
  },
2378
  {
 
2544
  "in_benchmark":false
2545
  },
2546
  {
2547
+ "bcp_47":"sck",
2548
  "speakers":2386962,
2549
+ "language_name":"Sadri",
2550
+ "autonym":"Sadri",
2551
+ "family":"Indo-European",
2552
  "flores_path":null,
2553
  "fleurs_tag":null,
2554
  "commonvoice_hours":null,
 
2556
  "in_benchmark":false
2557
  },
2558
  {
2559
+ "bcp_47":"wbq",
2560
  "speakers":2386962,
2561
+ "language_name":"Waddar",
2562
+ "autonym":"Waddar",
2563
+ "family":"Dravidian",
2564
  "flores_path":null,
2565
  "fleurs_tag":null,
2566
  "commonvoice_hours":null,
 
2724
  "in_benchmark":false
2725
  },
2726
  {
2727
+ "bcp_47":"wbr",
2728
  "speakers":1989135,
2729
+ "language_name":"Wagdi",
2730
+ "autonym":"Wagdi",
2731
  "family":"Indo-European",
2732
  "flores_path":null,
2733
  "fleurs_tag":null,
 
2748
  "in_benchmark":false
2749
  },
2750
  {
2751
+ "bcp_47":"khn",
2752
  "speakers":1989135,
2753
+ "language_name":"Khandesi",
2754
+ "autonym":"Khandesi",
2755
  "family":"Indo-European",
2756
  "flores_path":null,
2757
  "fleurs_tag":null,
 
3535
  "family":null,
3536
  "flores_path":"eus_Latn",
3537
  "fleurs_tag":null,
3538
+ "commonvoice_hours":379.0,
3539
  "commonvoice_locale":"eu",
3540
  "in_benchmark":true
3541
  },
 
3559
  "family":"Abkhaz-Adyge",
3560
  "flores_path":null,
3561
  "fleurs_tag":null,
3562
+ "commonvoice_hours":62.0,
3563
  "commonvoice_locale":"kbd",
3564
  "in_benchmark":false
3565
  },
 
3684
  "in_benchmark":true
3685
  },
3686
  {
3687
+ "bcp_47":"bez",
3688
  "speakers":995398,
3689
+ "language_name":"Bena",
3690
+ "autonym":"Hibena",
3691
  "family":"Atlantic-Congo",
3692
  "flores_path":null,
3693
  "fleurs_tag":null,
 
3696
  "in_benchmark":false
3697
  },
3698
  {
3699
+ "bcp_47":"ksb",
3700
  "speakers":995398,
3701
+ "language_name":"Shambala",
3702
+ "autonym":"Kishambaa",
3703
  "family":"Atlantic-Congo",
3704
  "flores_path":null,
3705
  "fleurs_tag":null,
 
4512
  "in_benchmark":false
4513
  },
4514
  {
4515
+ "bcp_47":"nhe",
4516
  "speakers":501735,
4517
+ "language_name":"Eastern Huasteca Nahuatl",
4518
+ "autonym":"Eastern Huasteca Nahuatl",
4519
  "family":"Uto-Aztecan",
4520
  "flores_path":null,
4521
  "fleurs_tag":null,
4522
+ "commonvoice_hours":0.0,
4523
+ "commonvoice_locale":"nhe",
4524
  "in_benchmark":false
4525
  },
4526
  {
4527
+ "bcp_47":"nhw",
4528
  "speakers":501735,
4529
+ "language_name":"Western Huasteca Nahuatl",
4530
+ "autonym":"Western Huasteca Nahuatl",
4531
  "family":"Uto-Aztecan",
4532
  "flores_path":null,
4533
  "fleurs_tag":null,
4534
+ "commonvoice_hours":null,
4535
+ "commonvoice_locale":null,
4536
  "in_benchmark":false
4537
  },
4538
  {
 
4553
  "language_name":"Kara-Kalpak",
4554
  "autonym":"Kara-Kalpak",
4555
  "family":"Turkic",
4556
+ "flores_path":"kaa_Latn",
4557
  "fleurs_tag":null,
4558
  "commonvoice_hours":0.0,
4559
  "commonvoice_locale":"kaa",
4560
+ "in_benchmark":true
4561
  },
4562
  {
4563
  "bcp_47":"gju",
 
4715
  "commonvoice_locale":null,
4716
  "in_benchmark":false
4717
  },
 
 
 
 
 
 
 
 
 
 
 
 
4718
  {
4719
  "bcp_47":"vun",
4720
  "speakers":433291,
 
4735
  "family":"Atlantic-Congo",
4736
  "flores_path":null,
4737
  "fleurs_tag":null,
4738
+ "commonvoice_hours":2.5,
4739
  "commonvoice_locale":"rof",
4740
  "in_benchmark":false
4741
  },
4742
+ {
4743
+ "bcp_47":"jmc",
4744
+ "speakers":433291,
4745
+ "language_name":"Machame",
4746
+ "autonym":"Kimachame",
4747
+ "family":"Atlantic-Congo",
4748
+ "flores_path":null,
4749
+ "fleurs_tag":null,
4750
+ "commonvoice_hours":null,
4751
+ "commonvoice_locale":null,
4752
+ "in_benchmark":false
4753
+ },
4754
  {
4755
  "bcp_47":"kjg",
4756
  "speakers":431949,
 
5124
  "in_benchmark":false
5125
  },
5126
  {
5127
+ "bcp_47":"bax",
5128
  "speakers":332940,
5129
+ "language_name":"Bamun",
5130
+ "autonym":"Bamun",
5131
  "family":"Atlantic-Congo",
5132
  "flores_path":null,
5133
  "fleurs_tag":null,
5134
+ "commonvoice_hours":11.0,
5135
+ "commonvoice_locale":"bax",
5136
  "in_benchmark":false
5137
  },
5138
  {
5139
+ "bcp_47":"bas",
5140
  "speakers":332940,
5141
+ "language_name":"Basaa",
5142
+ "autonym":"Ɓàsàa",
5143
  "family":"Atlantic-Congo",
5144
  "flores_path":null,
5145
  "fleurs_tag":null,
5146
+ "commonvoice_hours":12.0,
5147
+ "commonvoice_locale":"bas",
5148
  "in_benchmark":false
5149
  },
5150
  {
 
5232
  "in_benchmark":false
5233
  },
5234
  {
5235
+ "bcp_47":"njo",
5236
  "speakers":305001,
5237
+ "language_name":"Ao Naga",
5238
+ "autonym":"Ao Naga",
5239
+ "family":"Sino-Tibetan",
5240
  "flores_path":null,
5241
  "fleurs_tag":null,
5242
  "commonvoice_hours":null,
 
5244
  "in_benchmark":false
5245
  },
5246
  {
5247
+ "bcp_47":"bfq",
5248
  "speakers":305001,
5249
+ "language_name":"Badaga",
5250
+ "autonym":"Badaga",
5251
+ "family":"Dravidian",
5252
  "flores_path":null,
5253
  "fleurs_tag":null,
5254
  "commonvoice_hours":null,
 
5388
  "in_benchmark":false
5389
  },
5390
  {
5391
+ "bcp_47":"khb",
5392
  "speakers":264864,
5393
+ "language_name":"",
5394
+ "autonym":"",
5395
  "family":"Tai-Kadai",
5396
  "flores_path":null,
5397
  "fleurs_tag":null,
 
5400
  "in_benchmark":false
5401
  },
5402
  {
5403
+ "bcp_47":"tdd",
5404
  "speakers":264864,
5405
+ "language_name":"Tai Nüa",
5406
+ "autonym":"Tai Nüa",
5407
  "family":"Tai-Kadai",
5408
  "flores_path":null,
5409
  "fleurs_tag":null,
 
5508
  "in_benchmark":true
5509
  },
5510
  {
5511
+ "bcp_47":"mdr",
5512
  "speakers":245664,
5513
+ "language_name":"Mandar",
5514
+ "autonym":"Mandar",
5515
  "family":"Austronesian",
5516
  "flores_path":null,
5517
  "fleurs_tag":null,
 
5520
  "in_benchmark":false
5521
  },
5522
  {
5523
+ "bcp_47":"sxn",
5524
  "speakers":245664,
5525
+ "language_name":"Sangir",
5526
+ "autonym":"Sangir",
5527
  "family":"Austronesian",
5528
  "flores_path":null,
5529
  "fleurs_tag":null,
 
5904
  "in_benchmark":false
5905
  },
5906
  {
5907
+ "bcp_47":"kkj",
5908
  "speakers":149823,
5909
+ "language_name":"Kako",
5910
+ "autonym":"Kakɔ",
5911
  "family":"Atlantic-Congo",
5912
  "flores_path":null,
5913
  "fleurs_tag":null,
 
5916
  "in_benchmark":false
5917
  },
5918
  {
5919
+ "bcp_47":"bss",
5920
  "speakers":149823,
5921
+ "language_name":"Akoose",
5922
+ "autonym":"Akoose",
5923
  "family":"Atlantic-Congo",
5924
  "flores_path":null,
5925
  "fleurs_tag":null,
 
6367
  "family":"Indo-European",
6368
  "flores_path":null,
6369
  "fleurs_tag":null,
6370
+ "commonvoice_hours":4.6,
6371
  "commonvoice_locale":"btv",
6372
  "in_benchmark":false
6373
  },
 
7272
  "in_benchmark":false
7273
  },
7274
  {
7275
+ "bcp_47":"twq",
7276
  "speakers":7970,
7277
+ "language_name":"Tasawaq",
7278
+ "autonym":"Tasawaq Senni",
7279
+ "family":"Songhay",
7280
  "flores_path":null,
7281
  "fleurs_tag":null,
7282
  "commonvoice_hours":null,
 
7284
  "in_benchmark":false
7285
  },
7286
  {
7287
+ "bcp_47":"bku",
7288
  "speakers":7970,
7289
+ "language_name":"Buhid",
7290
+ "autonym":"Buhid",
7291
+ "family":"Austronesian",
7292
  "flores_path":null,
7293
  "fleurs_tag":null,
7294
  "commonvoice_hours":null,
 
7836
  "in_benchmark":false
7837
  },
7838
  {
7839
+ "bcp_47":"kwk",
7840
  "speakers":377,
7841
+ "language_name":"Kwakʼwala",
7842
+ "autonym":"KwakʼWala",
7843
+ "family":"Wakashan",
7844
  "flores_path":null,
7845
  "fleurs_tag":null,
7846
  "commonvoice_hours":null,
 
7848
  "in_benchmark":false
7849
  },
7850
  {
7851
+ "bcp_47":"crl",
7852
  "speakers":377,
7853
+ "language_name":"Northern East Cree",
7854
+ "autonym":"Northern East Cree",
7855
+ "family":"Algic",
7856
  "flores_path":null,
7857
  "fleurs_tag":null,
7858
  "commonvoice_hours":null,
 
7968
  "in_benchmark":false
7969
  },
7970
  {
7971
+ "bcp_47":"lzh",
7972
  "speakers":0,
7973
+ "language_name":"Literary Chinese",
7974
+ "autonym":"Literary Chinese",
7975
+ "family":"Sino-Tibetan",
7976
  "flores_path":null,
7977
  "fleurs_tag":null,
7978
  "commonvoice_hours":null,
 
7980
  "in_benchmark":false
7981
  },
7982
  {
7983
+ "bcp_47":"io",
7984
  "speakers":0,
7985
+ "language_name":"Ido",
7986
+ "autonym":"Ido",
7987
+ "family":"Artificial Language",
7988
  "flores_path":null,
7989
  "fleurs_tag":null,
7990
  "commonvoice_hours":null,
 
7992
  "in_benchmark":false
7993
  },
7994
  {
7995
+ "bcp_47":"jbo",
7996
  "speakers":0,
7997
+ "language_name":"Lojban",
7998
+ "autonym":"La .Lojban.",
7999
+ "family":"Artificial Language",
8000
  "flores_path":null,
8001
  "fleurs_tag":null,
8002
+ "commonvoice_hours":0.0,
8003
+ "commonvoice_locale":"jbo",
8004
  "in_benchmark":false
8005
  },
8006
  {
8007
+ "bcp_47":"jut",
8008
  "speakers":0,
8009
+ "language_name":"Jutish",
8010
+ "autonym":"Jutish",
8011
  "family":"Indo-European",
8012
  "flores_path":null,
8013
  "fleurs_tag":null,
 
8016
  "in_benchmark":false
8017
  },
8018
  {
8019
+ "bcp_47":"vot",
8020
  "speakers":0,
8021
+ "language_name":"Votic",
8022
+ "autonym":"Votic",
8023
+ "family":"Uralic",
8024
  "flores_path":null,
8025
  "fleurs_tag":null,
8026
+ "commonvoice_hours":0.1,
8027
+ "commonvoice_locale":"vot",
8028
  "in_benchmark":false
8029
  },
8030
  {
8031
+ "bcp_47":"gez",
8032
  "speakers":0,
8033
+ "language_name":"Geez",
8034
+ "autonym":"Geez",
8035
+ "family":"Afro-Asiatic",
8036
  "flores_path":null,
8037
  "fleurs_tag":null,
8038
  "commonvoice_hours":null,
 
8040
  "in_benchmark":false
8041
  },
8042
  {
8043
+ "bcp_47":"osa",
8044
  "speakers":0,
8045
+ "language_name":"Osage",
8046
+ "autonym":"𐓏𐓘𐓻𐓘𐓻𐓟",
8047
+ "family":"Siouan",
8048
  "flores_path":null,
8049
  "fleurs_tag":null,
8050
+ "commonvoice_hours":null,
8051
+ "commonvoice_locale":null,
8052
  "in_benchmark":false
8053
  },
8054
  {
8055
+ "bcp_47":"rgn",
8056
  "speakers":0,
8057
+ "language_name":"Romagnol",
8058
+ "autonym":"Romagnol",
8059
+ "family":"Indo-European",
8060
  "flores_path":null,
8061
  "fleurs_tag":null,
8062
  "commonvoice_hours":null,
 
8064
  "in_benchmark":false
8065
  },
8066
  {
8067
+ "bcp_47":"cu",
8068
  "speakers":0,
8069
+ "language_name":"Church Slavic",
8070
+ "autonym":"Church Slavic",
8071
  "family":"Indo-European",
8072
  "flores_path":null,
8073
  "fleurs_tag":null,
 
8076
  "in_benchmark":false
8077
  },
8078
  {
8079
+ "bcp_47":"sgs",
8080
  "speakers":0,
8081
+ "language_name":"Samogitian",
8082
+ "autonym":"Samogitian",
8083
+ "family":"Indo-European",
8084
  "flores_path":null,
8085
  "fleurs_tag":null,
8086
  "commonvoice_hours":null,
 
8088
  "in_benchmark":false
8089
  },
8090
  {
8091
+ "bcp_47":"ann",
8092
  "speakers":0,
8093
+ "language_name":"Obolo",
8094
+ "autonym":"Obolo",
8095
+ "family":"Atlantic-Congo",
8096
  "flores_path":null,
8097
  "fleurs_tag":null,
8098
  "commonvoice_hours":null,
 
8112
  "in_benchmark":false
8113
  },
8114
  {
8115
+ "bcp_47":"cad",
8116
  "speakers":0,
8117
+ "language_name":"Caddo",
8118
+ "autonym":"Caddo",
8119
+ "family":"Caddoan",
8120
  "flores_path":null,
8121
  "fleurs_tag":null,
8122
+ "commonvoice_hours":null,
8123
+ "commonvoice_locale":null,
8124
  "in_benchmark":false
8125
  },
8126
  {
8127
+ "bcp_47":"pfl",
8128
  "speakers":0,
8129
+ "language_name":"Palatine German",
8130
+ "autonym":"Palatine German",
8131
+ "family":"Indo-European",
8132
  "flores_path":null,
8133
  "fleurs_tag":null,
8134
  "commonvoice_hours":null,
models.json CHANGED
@@ -32,28 +32,6 @@
32
  "license":"Mit",
33
  "creation_date":1742774400000
34
  },
35
- {
36
- "id":"deepseek\/deepseek-r1",
37
- "name":"R1 (free)",
38
- "provider_name":"DeepSeek",
39
- "cost":0.0,
40
- "hf_id":"deepseek-ai\/DeepSeek-R1",
41
- "size":684531386000.0,
42
- "type":"Open",
43
- "license":"Mit",
44
- "creation_date":1737331200000
45
- },
46
- {
47
- "id":"google\/gemini-2.0-flash-001",
48
- "name":"Gemini 2.0 Flash",
49
- "provider_name":"Google",
50
- "cost":0.4,
51
- "hf_id":null,
52
- "size":null,
53
- "type":"Commercial",
54
- "license":null,
55
- "creation_date":1738713600000
56
- },
57
  {
58
  "id":"google\/gemini-2.0-flash-lite-001",
59
  "name":"Gemini 2.0 Flash Lite",
@@ -76,28 +54,6 @@
76
  "license":null,
77
  "creation_date":1744848000000
78
  },
79
- {
80
- "id":"google\/gemini-flash-1.5",
81
- "name":"Gemini 1.5 Flash ",
82
- "provider_name":"Google",
83
- "cost":0.3,
84
- "hf_id":null,
85
- "size":null,
86
- "type":"Commercial",
87
- "license":null,
88
- "creation_date":1715644800000
89
- },
90
- {
91
- "id":"google\/gemini-flash-1.5-8b",
92
- "name":"Gemini 1.5 Flash 8B",
93
- "provider_name":"Google",
94
- "cost":0.15,
95
- "hf_id":null,
96
- "size":null,
97
- "type":"Commercial",
98
- "license":null,
99
- "creation_date":1727913600000
100
- },
101
  {
102
  "id":"google\/gemma-3-27b-it",
103
  "name":"Gemma 3 27B (free)",
@@ -109,17 +65,6 @@
109
  "license":"Gemma",
110
  "creation_date":1740787200000
111
  },
112
- {
113
- "id":"gryphe\/mythomax-l2-13b",
114
- "name":"MythoMax 13B",
115
- "provider_name":"MythoMax 13B",
116
- "cost":0.07,
117
- "hf_id":"Gryphe\/MythoMax-L2-13b",
118
- "size":null,
119
- "type":"Open",
120
- "license":"Other",
121
- "creation_date":1691625600000
122
- },
123
  {
124
  "id":"meta-llama\/llama-3-70b-instruct",
125
  "name":"Llama 3 70B Instruct",
 
32
  "license":"Mit",
33
  "creation_date":1742774400000
34
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  {
36
  "id":"google\/gemini-2.0-flash-lite-001",
37
  "name":"Gemini 2.0 Flash Lite",
 
54
  "license":null,
55
  "creation_date":1744848000000
56
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  {
58
  "id":"google\/gemma-3-27b-it",
59
  "name":"Gemma 3 27B (free)",
 
65
  "license":"Gemma",
66
  "creation_date":1740787200000
67
  },
 
 
 
 
 
 
 
 
 
 
 
68
  {
69
  "id":"meta-llama\/llama-3-70b-instruct",
70
  "name":"Llama 3 70B Instruct",
results.json CHANGED
The diff for this file is too large to render. See raw diff