David Pomerenke commited on
Commit
29c8ef6
·
1 Parent(s): 56081d8

Make a map

Browse files
Files changed (6) hide show
  1. app.py +215 -8
  2. evals.py +16 -2
  3. pyproject.toml +1 -0
  4. requirements.txt +2 -0
  5. results.json +571 -29
  6. uv.lock +11 -0
app.py CHANGED
@@ -1,8 +1,10 @@
1
  import json
2
 
3
  import gradio as gr
 
4
  import pandas as pd
5
  import plotly.graph_objects as go
 
6
 
7
  with open("results.json") as f:
8
  results = json.load(f)
@@ -157,10 +159,17 @@ def create_model_comparison_plot(results):
157
  fig = go.Figure(data=traces)
158
  fig.update_layout(
159
  title="BLEU Scores by Model and Language",
160
- xaxis_title="Language",
161
  yaxis_title="BLEU Score",
162
  barmode="group",
163
  height=500,
 
 
 
 
 
 
 
164
  )
165
  return fig
166
 
@@ -175,10 +184,18 @@ def create_language_stats_df(results):
175
  lang["scores"] or [{"bleu": None, "model": None}], key=lambda x: x["bleu"]
176
  )
177
 
178
- model = best_score['model']
179
- model_name = model.split('/')[-1] if model else "N/A"
180
- model_link = f"<a href='https://openrouter.ai/{model}' style='text-decoration: none; color: inherit;'>{model_name}</a>" if model else "N/A"
181
- commonvoice_link = f"<!--{lang['commonvoice_hours']:07} (for sorting)--> <a href='https://commonvoice.mozilla.org/{lang['commonvoice_locale']}/speak' style='text-decoration: none; color: inherit;'>🎙️ {lang['commonvoice_hours']}</a>" if lang["commonvoice_hours"] else "N/A"
 
 
 
 
 
 
 
 
182
  row = {
183
  "Language": f"**{lang['language_name']}**",
184
  "Speakers (M)": round(lang["speakers"] / 1_000_000, 1),
@@ -199,7 +216,15 @@ def create_language_stats_df(results):
199
  value=df,
200
  label="Language Results",
201
  show_search="search",
202
- datatype=["markdown", "number", "number", "number", "markdown", "number", "markdown"],
 
 
 
 
 
 
 
 
203
  )
204
 
205
 
@@ -224,7 +249,7 @@ def create_scatter_plot(results):
224
  )
225
 
226
  fig.update_layout(
227
- title="Language Coverage: Speakers vs BLEU Score",
228
  xaxis_title="Number of Speakers (Millions)",
229
  yaxis_title="Average BLEU Score",
230
  height=500,
@@ -237,6 +262,186 @@ def create_scatter_plot(results):
237
  return fig
238
 
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  # Create the visualization components
241
  with gr.Blocks(title="AI Language Translation Benchmark") as demo:
242
  gr.Markdown("# AI Language Translation Benchmark")
@@ -246,11 +451,13 @@ with gr.Blocks(title="AI Language Translation Benchmark") as demo:
246
 
247
  bar_plot = create_model_comparison_plot(results)
248
  scatter_plot = create_scatter_plot(results)
 
249
 
250
  create_leaderboard_df(results)
251
  gr.Plot(value=bar_plot, label="Model Comparison")
252
  create_language_stats_df(results)
253
- gr.Plot(value=scatter_plot, label="Language Coverage")
 
254
 
255
  gr.Markdown(
256
  """
 
1
  import json
2
 
3
  import gradio as gr
4
+ import numpy as np
5
  import pandas as pd
6
  import plotly.graph_objects as go
7
+ import pycountry
8
 
9
  with open("results.json") as f:
10
  results = json.load(f)
 
159
  fig = go.Figure(data=traces)
160
  fig.update_layout(
161
  title="BLEU Scores by Model and Language",
162
+ xaxis_title=None,
163
  yaxis_title="BLEU Score",
164
  barmode="group",
165
  height=500,
166
+ legend=dict(
167
+ orientation="h", # horizontal orientation
168
+ yanchor="bottom",
169
+ y=-0.3, # position below plot
170
+ xanchor="center",
171
+ x=0.5, # center horizontally
172
+ ),
173
  )
174
  return fig
175
 
 
184
  lang["scores"] or [{"bleu": None, "model": None}], key=lambda x: x["bleu"]
185
  )
186
 
187
+ model = best_score["model"]
188
+ model_name = model.split("/")[-1] if model else "N/A"
189
+ model_link = (
190
+ f"<a href='https://openrouter.ai/{model}' style='text-decoration: none; color: inherit;'>{model_name}</a>"
191
+ if model
192
+ else "N/A"
193
+ )
194
+ commonvoice_link = (
195
+ f"<!--{lang['commonvoice_hours']:07} (for sorting)--> <a href='https://commonvoice.mozilla.org/{lang['commonvoice_locale']}/speak' style='text-decoration: none; color: inherit;'>🎙️ {lang['commonvoice_hours']}</a>"
196
+ if lang["commonvoice_hours"]
197
+ else "N/A"
198
+ )
199
  row = {
200
  "Language": f"**{lang['language_name']}**",
201
  "Speakers (M)": round(lang["speakers"] / 1_000_000, 1),
 
216
  value=df,
217
  label="Language Results",
218
  show_search="search",
219
+ datatype=[
220
+ "markdown",
221
+ "number",
222
+ "number",
223
+ "number",
224
+ "markdown",
225
+ "number",
226
+ "markdown",
227
+ ],
228
  )
229
 
230
 
 
249
  )
250
 
251
  fig.update_layout(
252
+ title=None,
253
  xaxis_title="Number of Speakers (Millions)",
254
  yaxis_title="Average BLEU Score",
255
  height=500,
 
262
  return fig
263
 
264
 
265
+ def format_number(n):
266
+ """Format number with K/M suffix"""
267
+ if n >= 1_000_000:
268
+ return f"{n/1_000_000:.1f}M"
269
+ elif n >= 1_000:
270
+ return f"{n/1_000:.0f}K"
271
+ return str(n)
272
+
273
+
274
+ def create_world_map(results):
275
+ # Collect all country data
276
+ country_data = {}
277
+ for lang in results:
278
+ if "population" not in lang or lang["bleu"] is None:
279
+ continue
280
+
281
+ for country_code, speakers in lang["population"].items():
282
+ try:
283
+ # Convert alpha_2 (2-letter) to alpha_3 (3-letter) code
284
+ country = pycountry.countries.get(alpha_2=country_code)
285
+ if country is None:
286
+ continue
287
+
288
+ iso3_code = country.alpha_3
289
+ if iso3_code not in country_data:
290
+ country_data[iso3_code] = {
291
+ "total_speakers": 0,
292
+ "weighted_bleu_sum": 0,
293
+ "languages": [],
294
+ }
295
+
296
+ country_data[iso3_code]["total_speakers"] += speakers
297
+ country_data[iso3_code]["weighted_bleu_sum"] += speakers * lang["bleu"]
298
+ country_data[iso3_code]["languages"].append(
299
+ {
300
+ "name": lang["language_name"],
301
+ "speakers": speakers,
302
+ "bleu": lang["bleu"],
303
+ }
304
+ )
305
+ except (KeyError, AttributeError):
306
+ # Skip invalid or unrecognized country codes
307
+ continue
308
+
309
+ # Calculate final weighted averages and prepare hover text
310
+ countries = []
311
+ bleu_scores = []
312
+ hover_texts = []
313
+
314
+ def make_black_bar(value, max_width=10):
315
+ filled = int(value * max_width)
316
+ return "⬛️" * filled + "⬜️" * (max_width - filled)
317
+
318
+ def make_colored_bar(value, max_width=10):
319
+ """Create a colored bar using Unicode blocks
320
+ 🟦 for high values (>0.35)
321
+ 🟨 for medium values (0.25-0.35)
322
+ 🟥 for low values (<0.25)
323
+ ⬜ for empty space
324
+ """
325
+ filled = int(value * max_width)
326
+ filled = max(0, min(filled, max_width))
327
+ empty = max_width - filled
328
+
329
+ if value > 0.35:
330
+ return "🟦" * filled + "⬜" * empty
331
+ elif value > 0.25:
332
+ return "🟨" * filled + "⬜" * empty
333
+ else:
334
+ return "🟥" * filled + "⬜" * empty
335
+
336
+ for country_code, data in country_data.items():
337
+ weighted_avg = data["weighted_bleu_sum"] / data["total_speakers"]
338
+
339
+ try:
340
+ country_name = pycountry.countries.get(alpha_3=country_code).name
341
+ except AttributeError:
342
+ country_name = country_code
343
+
344
+ # Sort languages by number of speakers
345
+ langs = sorted(data["languages"], key=lambda x: x["speakers"], reverse=True)
346
+ total_speakers = sum(lang["speakers"] for lang in langs)
347
+
348
+ # Take top 5 languages and summarize the rest
349
+ main_langs = langs[:5]
350
+ other_langs = langs[5:]
351
+
352
+ # Create language rows with bars
353
+ lang_rows = []
354
+ for lang in main_langs:
355
+ percentage = (lang["speakers"] / total_speakers) * 100
356
+ speaker_bar = make_black_bar(percentage / 100)
357
+ bleu_bar = make_colored_bar((lang["bleu"] - 0.2) / 0.2)
358
+
359
+ lang_rows.append(
360
+ f"<b>{lang['name']}</b><br>"
361
+ f"{speaker_bar} {format_number(lang['speakers'])} speakers<br>"
362
+ f"{bleu_bar} {lang['bleu']:.3f} BLEU<br>"
363
+ )
364
+
365
+ # Add summary for other languages if any
366
+ if other_langs:
367
+ other_speakers = sum(lang["speakers"] for lang in other_langs)
368
+ other_percentage = (other_speakers / total_speakers) * 100
369
+ other_avg_bleu = sum(lang["bleu"] for lang in other_langs) / len(
370
+ other_langs
371
+ )
372
+
373
+ speaker_bar = make_black_bar(other_percentage / 100)
374
+ bleu_bar = make_colored_bar((other_avg_bleu - 0.2) / 0.2)
375
+
376
+ lang_rows.append(
377
+ f"<b>+{len(other_langs)} other languages</b><br>"
378
+ f"{speaker_bar} {format_number(other_speakers)} speakers<br>"
379
+ f"{bleu_bar} {other_avg_bleu:.3f} BLEU<br>"
380
+ )
381
+
382
+ # Create overall BLEU visualization
383
+ bleu_percentage = (weighted_avg - 0.2) / 0.2 # Scale from 0.2-0.4 to 0-1
384
+ overall_bleu_bar = make_colored_bar(bleu_percentage)
385
+
386
+ hover_text = (
387
+ f"<b>{country_name}</b><br><br>"
388
+ f"{format_number(data['total_speakers'])} speakers*<br>"
389
+ f"{overall_bleu_bar} {weighted_avg:.3f} BLEU<br><br>"
390
+ f"<b>Languages:</b><br><br>"
391
+ f"{'<br>'.join(lang_rows)}"
392
+ )
393
+
394
+ countries.append(country_code)
395
+ bleu_scores.append(weighted_avg)
396
+ hover_texts.append(hover_text)
397
+
398
+ # Create the choropleth map
399
+ fig = go.Figure(
400
+ data=go.Choropleth(
401
+ locations=countries,
402
+ locationmode="ISO-3",
403
+ z=bleu_scores,
404
+ text=hover_texts,
405
+ hoverinfo="text",
406
+ colorscale=[[0, "#ff9999"], [1, "#99ccff"]],
407
+ colorbar=dict(
408
+ title="BLEU Score",
409
+ orientation="h", # horizontal orientation
410
+ y=-0.2, # position below map
411
+ yanchor="bottom",
412
+ len=0.5, # length of colorbar
413
+ x=0.5, # center horizontally
414
+ xanchor="center",
415
+ thickness=20, # make it a bit thicker when horizontal
416
+ ),
417
+ zmin=0.2,
418
+ zmax=0.5,
419
+ )
420
+ )
421
+
422
+ fig.update_layout(
423
+ title=dict(text="BLEU Score by Country", x=0.5, xanchor="center"),
424
+ geo=dict(
425
+ showframe=True,
426
+ showcoastlines=True,
427
+ projection_type="equal earth",
428
+ showland=True,
429
+ landcolor="#f8f9fa",
430
+ coastlinecolor="#e0e0e0",
431
+ countrycolor="#e0e0e0",
432
+ ),
433
+ height=600,
434
+ margin=dict(l=0, r=0, t=30, b=0),
435
+ paper_bgcolor="white",
436
+ hoverlabel=dict(
437
+ bgcolor="beige",
438
+ font_size=12,
439
+ ),
440
+ )
441
+
442
+ return fig
443
+
444
+
445
  # Create the visualization components
446
  with gr.Blocks(title="AI Language Translation Benchmark") as demo:
447
  gr.Markdown("# AI Language Translation Benchmark")
 
451
 
452
  bar_plot = create_model_comparison_plot(results)
453
  scatter_plot = create_scatter_plot(results)
454
+ world_map = create_world_map(results)
455
 
456
  create_leaderboard_df(results)
457
  gr.Plot(value=bar_plot, label="Model Comparison")
458
  create_language_stats_df(results)
459
+ gr.Plot(value=scatter_plot, label="Speaker population vs BLEU")
460
+ gr.Plot(value=world_map, container=False, elem_classes="fullwidth-plot")
461
 
462
  gr.Markdown(
463
  """
evals.py CHANGED
@@ -62,6 +62,15 @@ scripts = pd.read_csv("data/ScriptCodes.csv").rename(
62
  )
63
 
64
 
 
 
 
 
 
 
 
 
 
65
  def script_name(iso15924):
66
  return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
67
 
@@ -246,8 +255,13 @@ async def main():
246
  "speakers": language.speakers,
247
  "scores": results_for_language,
248
  "bleu": mean([s["bleu"] for s in results_for_language]),
249
- "commonvoice_hours": language.commonvoice_hours,
250
- "commonvoice_locale": language.commonvoice_locale,
 
 
 
 
 
251
  }
252
  )
253
  with open("results.json", "w") as f:
 
62
  )
63
 
64
 
65
+ def population(bcp_47):
66
+ items = {
67
+ re.sub(r"^[a-z]+-", "", lang): pop
68
+ for lang, pop in LANGUAGE_SPEAKING_POPULATION.items()
69
+ if re.match(rf"^{bcp_47}-[A-Z]{{2}}$", lang)
70
+ }
71
+ return items
72
+
73
+
74
  def script_name(iso15924):
75
  return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
76
 
 
255
  "speakers": language.speakers,
256
  "scores": results_for_language,
257
  "bleu": mean([s["bleu"] for s in results_for_language]),
258
+ "commonvoice_hours": language.commonvoice_hours
259
+ if not pd.isna(language.commonvoice_hours)
260
+ else None,
261
+ "commonvoice_locale": language.commonvoice_locale
262
+ if not pd.isna(language.commonvoice_locale)
263
+ else None,
264
+ "population": population(language.bcp_47),
265
  }
266
  )
267
  with open("results.json", "w") as f:
pyproject.toml CHANGED
@@ -8,6 +8,7 @@ dependencies = [
8
  "gradio>=5.16.2",
9
  "pandas>=2.2.3",
10
  "plotly>=6.0.0",
 
11
  ]
12
 
13
  [tool.uv]
 
8
  "gradio>=5.16.2",
9
  "pandas>=2.2.3",
10
  "plotly>=6.0.0",
11
+ "pycountry>=24.6.1",
12
  ]
13
 
14
  [tool.uv]
requirements.txt CHANGED
@@ -88,6 +88,8 @@ pillow==11.1.0
88
  # via gradio
89
  plotly==6.0.0
90
  # via languagebench (pyproject.toml)
 
 
91
  pydantic==2.10.6
92
  # via
93
  # fastapi
 
88
  # via gradio
89
  plotly==6.0.0
90
  # via languagebench (pyproject.toml)
91
+ pycountry==24.6.1
92
+ # via languagebench (pyproject.toml)
93
  pydantic==2.10.6
94
  # via
95
  # fastapi
results.json CHANGED
@@ -31,7 +31,164 @@
31
  ],
32
  "bleu": 0.5035795595158651,
33
  "commonvoice_hours": 2649.0,
34
- "commonvoice_locale": "en"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  },
36
  {
37
  "language_name": "Chinese",
@@ -45,7 +202,29 @@
45
  ],
46
  "bleu": 0.35763875438716014,
47
  "commonvoice_hours": 422.0,
48
- "commonvoice_locale": "zh-HK"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  },
50
  {
51
  "language_name": "Hindi",
@@ -59,7 +238,15 @@
59
  ],
60
  "bleu": 0.33760351976648345,
61
  "commonvoice_hours": 16.0,
62
- "commonvoice_locale": "hi"
 
 
 
 
 
 
 
 
63
  },
64
  {
65
  "language_name": "Spanish",
@@ -73,7 +260,48 @@
73
  ],
74
  "bleu": 0.3600460831160618,
75
  "commonvoice_hours": 446.0,
76
- "commonvoice_locale": "es"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  },
78
  {
79
  "language_name": "Arabic",
@@ -87,7 +315,47 @@
87
  ],
88
  "bleu": 0.3046598747480405,
89
  "commonvoice_hours": 91.0,
90
- "commonvoice_locale": "ar"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  },
92
  {
93
  "language_name": "Urdu",
@@ -101,7 +369,14 @@
101
  ],
102
  "bleu": 0.331647033312127,
103
  "commonvoice_hours": 76.0,
104
- "commonvoice_locale": "ur"
 
 
 
 
 
 
 
105
  },
106
  {
107
  "language_name": "French",
@@ -115,7 +390,71 @@
115
  ],
116
  "bleu": 0.3141809404018014,
117
  "commonvoice_hours": 1051.0,
118
- "commonvoice_locale": "fr"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  },
120
  {
121
  "language_name": "Bangla",
@@ -129,7 +468,14 @@
129
  ],
130
  "bleu": 0.27472181972977344,
131
  "commonvoice_hours": 49.0,
132
- "commonvoice_locale": "bn"
 
 
 
 
 
 
 
133
  },
134
  {
135
  "language_name": "Portuguese",
@@ -163,7 +509,25 @@
163
  ],
164
  "bleu": 0.367787171884892,
165
  "commonvoice_hours": 176.0,
166
- "commonvoice_locale": "pt"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  },
168
  {
169
  "language_name": "Punjabi",
@@ -197,7 +561,15 @@
197
  ],
198
  "bleu": 0.31594664710428266,
199
  "commonvoice_hours": 2.3,
200
- "commonvoice_locale": "pa-IN"
 
 
 
 
 
 
 
 
201
  },
202
  {
203
  "language_name": "Russian",
@@ -211,7 +583,32 @@
211
  ],
212
  "bleu": 0.2920291935463745,
213
  "commonvoice_hours": 241.0,
214
- "commonvoice_locale": "ru"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  },
216
  {
217
  "language_name": "Swahili",
@@ -245,7 +642,18 @@
245
  ],
246
  "bleu": 0.3018786362743097,
247
  "commonvoice_hours": 411.0,
248
- "commonvoice_locale": "sw"
 
 
 
 
 
 
 
 
 
 
 
249
  },
250
  {
251
  "language_name": "Indonesian",
@@ -279,7 +687,11 @@
279
  ],
280
  "bleu": 0.31132422822400946,
281
  "commonvoice_hours": 33.0,
282
- "commonvoice_locale": "id"
 
 
 
 
283
  },
284
  {
285
  "language_name": "German",
@@ -313,7 +725,36 @@
313
  ],
314
  "bleu": 0.3992689214831344,
315
  "commonvoice_hours": 1357.0,
316
- "commonvoice_locale": "de"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  },
318
  {
319
  "language_name": "Japanese",
@@ -327,7 +768,12 @@
327
  ],
328
  "bleu": 0.2954810072264808,
329
  "commonvoice_hours": 222.0,
330
- "commonvoice_locale": "ja"
 
 
 
 
 
331
  },
332
  {
333
  "language_name": "Telugu",
@@ -341,7 +787,10 @@
341
  ],
342
  "bleu": 0.37949545228579734,
343
  "commonvoice_hours": 0.3,
344
- "commonvoice_locale": "te"
 
 
 
345
  },
346
  {
347
  "language_name": "Marathi",
@@ -355,7 +804,10 @@
355
  ],
356
  "bleu": 0.2852384896861461,
357
  "commonvoice_hours": 20.0,
358
- "commonvoice_locale": "mr"
 
 
 
359
  },
360
  {
361
  "language_name": "Javanese",
@@ -389,7 +841,11 @@
389
  ],
390
  "bleu": 0.2505244065073906,
391
  "commonvoice_hours": 0.0,
392
- "commonvoice_locale": "jv"
 
 
 
 
393
  },
394
  {
395
  "language_name": "Vietnamese",
@@ -403,7 +859,13 @@
403
  ],
404
  "bleu": 0.2956750563565745,
405
  "commonvoice_hours": 5.9,
406
- "commonvoice_locale": "vi"
 
 
 
 
 
 
407
  },
408
  {
409
  "language_name": "Tamil",
@@ -417,7 +879,17 @@
417
  ],
418
  "bleu": 0.27547489589987734,
419
  "commonvoice_hours": 234.0,
420
- "commonvoice_locale": "ta"
 
 
 
 
 
 
 
 
 
 
421
  },
422
  {
423
  "language_name": "Persian",
@@ -431,7 +903,18 @@
431
  ],
432
  "bleu": 0.2858012364771329,
433
  "commonvoice_hours": 370.0,
434
- "commonvoice_locale": "fa"
 
 
 
 
 
 
 
 
 
 
 
435
  },
436
  {
437
  "language_name": "Turkish",
@@ -465,7 +948,21 @@
465
  ],
466
  "bleu": 0.30402386618673855,
467
  "commonvoice_hours": 127.0,
468
- "commonvoice_locale": "tr"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
469
  },
470
  {
471
  "language_name": "Cantonese",
@@ -499,7 +996,12 @@
499
  ],
500
  "bleu": 0.27975991005230577,
501
  "commonvoice_hours": 203.0,
502
- "commonvoice_locale": "yue"
 
 
 
 
 
503
  },
504
  {
505
  "language_name": "Korean",
@@ -513,7 +1015,16 @@
513
  ],
514
  "bleu": 0.24501349273295708,
515
  "commonvoice_hours": 1.7,
516
- "commonvoice_locale": "ko"
 
 
 
 
 
 
 
 
 
517
  },
518
  {
519
  "language_name": "Italian",
@@ -527,7 +1038,24 @@
527
  ],
528
  "bleu": 0.3273249067267197,
529
  "commonvoice_hours": 362.0,
530
- "commonvoice_locale": "it"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
  },
532
  {
533
  "language_name": "Filipino",
@@ -561,7 +1089,12 @@
561
  ],
562
  "bleu": 0.3353425581350746,
563
  "commonvoice_hours": 0.0,
564
- "commonvoice_locale": "tl"
 
 
 
 
 
565
  },
566
  {
567
  "language_name": "Egyptian Arabic",
@@ -574,8 +1107,11 @@
574
  }
575
  ],
576
  "bleu": 0.23431638822117362,
577
- "commonvoice_hours": NaN,
578
- "commonvoice_locale": NaN
 
 
 
579
  },
580
  {
581
  "language_name": "Gujarati",
@@ -589,6 +1125,12 @@
589
  ],
590
  "bleu": 0.27834507803114356,
591
  "commonvoice_hours": 0.0,
592
- "commonvoice_locale": "gu-IN"
 
 
 
 
 
 
593
  }
594
  ]
 
31
  ],
32
  "bleu": 0.5035795595158651,
33
  "commonvoice_hours": 2649.0,
34
+ "commonvoice_locale": "en",
35
+ "population": {
36
+ "AC": 931,
37
+ "AE": 4996040,
38
+ "AG": 84434,
39
+ "AI": 17186,
40
+ "AQ": 300,
41
+ "AR": 3183537,
42
+ "AS": 47954,
43
+ "AT": 6467398,
44
+ "AU": 24447840,
45
+ "AW": 2986,
46
+ "BA": 1726016,
47
+ "BB": 294560,
48
+ "BD": 29277180,
49
+ "BE": 6915213,
50
+ "BG": 1741725,
51
+ "BI": 6289,
52
+ "BM": 66010,
53
+ "BN": 7896,
54
+ "BR": 16937280,
55
+ "BS": 337721,
56
+ "BT": 86055,
57
+ "BV": 1,
58
+ "BW": 1876956,
59
+ "BZ": 399598,
60
+ "CA": 32416926,
61
+ "CC": 101,
62
+ "CH": 5126434,
63
+ "CK": 8574,
64
+ "CL": 1727746,
65
+ "CM": 10543100,
66
+ "CN": 62731,
67
+ "CP": 1,
68
+ "CQ": 482,
69
+ "CX": 1389,
70
+ "CY": 924676,
71
+ "CZ": 2889675,
72
+ "DE": 51302208,
73
+ "DG": 495,
74
+ "DK": 5047693,
75
+ "DM": 69788,
76
+ "DO": 7980,
77
+ "DZ": 3008103,
78
+ "EE": 614310,
79
+ "EG": 36443400,
80
+ "ER": 3587908,
81
+ "ES": 12003792,
82
+ "ET": 46488590,
83
+ "FI": 3900169,
84
+ "FJ": 879816,
85
+ "FK": 2814,
86
+ "FM": 58389,
87
+ "FR": 26460798,
88
+ "GB": 64445878,
89
+ "GD": 108570,
90
+ "GG": 67052,
91
+ "GH": 6161442,
92
+ "GI": 23665,
93
+ "GM": 869600,
94
+ "GR": 5409621,
95
+ "GS": 20,
96
+ "GU": 153321,
97
+ "GY": 750204,
98
+ "HK": 3697454,
99
+ "HM": 1,
100
+ "HN": 40635,
101
+ "HR": 2071598,
102
+ "HU": 1954366,
103
+ "IE": 5073039,
104
+ "IL": 7374158,
105
+ "IM": 90499,
106
+ "IN": 251957100,
107
+ "IO": 3500,
108
+ "IQ": 13605445,
109
+ "IT": 21216918,
110
+ "JE": 96019,
111
+ "JM": 2752399,
112
+ "JO": 4869270,
113
+ "KE": 10170301,
114
+ "KI": 111796,
115
+ "KN": 52745,
116
+ "KY": 60705,
117
+ "KZ": 2863785,
118
+ "LB": 2187844,
119
+ "LC": 149838,
120
+ "LK": 2288920,
121
+ "LR": 4210839,
122
+ "LS": 531719,
123
+ "LT": 1037955,
124
+ "LU": 351893,
125
+ "LV": 865366,
126
+ "MA": 4978638,
127
+ "MG": 4852026,
128
+ "MH": 72463,
129
+ "MO": 14133,
130
+ "MP": 49890,
131
+ "MS": 3492,
132
+ "MT": 402395,
133
+ "MU": 993146,
134
+ "MV": 293928,
135
+ "MW": 13353858,
136
+ "MX": 16724500,
137
+ "MY": 6856941,
138
+ "NA": 184105,
139
+ "NF": 1678,
140
+ "NG": 113434840,
141
+ "NL": 15552360,
142
+ "NP": 909837,
143
+ "NR": 9350,
144
+ "NU": 1120,
145
+ "NZ": 4826970,
146
+ "PA": 545171,
147
+ "PG": 3629730,
148
+ "PH": 69875840,
149
+ "PK": 116750500,
150
+ "PL": 12633159,
151
+ "PM": 187,
152
+ "PN": 46,
153
+ "PR": 1562644,
154
+ "PT": 2781729,
155
+ "PW": 1887,
156
+ "RO": 6603899,
157
+ "RW": 1906860,
158
+ "SB": 685097,
159
+ "SC": 36473,
160
+ "SD": 27792576,
161
+ "SE": 8774150,
162
+ "SG": 5774984,
163
+ "SH": 5425,
164
+ "SI": 1240581,
165
+ "SK": 1414556,
166
+ "SL": 2318726,
167
+ "SS": 2851524,
168
+ "SX": 29816,
169
+ "SZ": 883584,
170
+ "TA": 272,
171
+ "TC": 54807,
172
+ "TH": 18623898,
173
+ "TK": 1285,
174
+ "TO": 29707,
175
+ "TR": 13942975,
176
+ "TT": 1063735,
177
+ "TV": 1066,
178
+ "TZ": 40401432,
179
+ "UG": 1686867,
180
+ "UM": 316,
181
+ "US": 319333440,
182
+ "VC": 97334,
183
+ "VG": 36633,
184
+ "VI": 79676,
185
+ "VU": 247616,
186
+ "WS": 4279,
187
+ "YE": 2689596,
188
+ "ZA": 17503716,
189
+ "ZM": 2788256,
190
+ "ZW": 6109446
191
+ }
192
  },
193
  {
194
  "language_name": "Chinese",
 
202
  ],
203
  "bleu": 0.35763875438716014,
204
  "commonvoice_hours": 422.0,
205
+ "commonvoice_locale": "zh-TW",
206
+ "population": {
207
+ "AU": 534796,
208
+ "BN": 51093,
209
+ "CA": 678494,
210
+ "CN": 1254618000,
211
+ "GB": 197283,
212
+ "GF": 4988,
213
+ "HK": 7249910,
214
+ "ID": 2456639,
215
+ "MN": 44352,
216
+ "MO": 632892,
217
+ "MY": 5550857,
218
+ "PA": 5841,
219
+ "PF": 23019,
220
+ "PH": 797021,
221
+ "SG": 4781438,
222
+ "SR": 6705,
223
+ "TH": 1241593,
224
+ "TW": 22422850,
225
+ "US": 2295209,
226
+ "VN": 1085934
227
+ }
228
  },
229
  {
230
  "language_name": "Hindi",
 
238
  ],
239
  "bleu": 0.33760351976648345,
240
  "commonvoice_hours": 16.0,
241
+ "commonvoice_locale": "hi-IN",
242
+ "population": {
243
+ "CA": 188470,
244
+ "FJ": 411829,
245
+ "IN": 545022990,
246
+ "NP": 127377,
247
+ "UG": 2206,
248
+ "ZA": 1129272
249
+ }
250
  },
251
  {
252
  "language_name": "Spanish",
 
260
  ],
261
  "bleu": 0.3600460831160618,
262
  "commonvoice_hours": 446.0,
263
+ "commonvoice_locale": "es",
264
+ "population": {
265
+ "AD": 33110,
266
+ "AR": 45479100,
267
+ "BO": 7100339,
268
+ "BR": 76218,
269
+ "BZ": 111887,
270
+ "CA": 603106,
271
+ "CL": 17823064,
272
+ "CO": 45648864,
273
+ "CR": 4843090,
274
+ "CU": 11059100,
275
+ "CW": 5751,
276
+ "DE": 4809582,
277
+ "DO": 8189766,
278
+ "EA": 147000,
279
+ "EC": 16228704,
280
+ "ES": 49515642,
281
+ "FR": 8820266,
282
+ "GB": 5260888,
283
+ "GI": 14790,
284
+ "GQ": 727475,
285
+ "GT": 15952569,
286
+ "HN": 7203565,
287
+ "IC": 2056618,
288
+ "MA": 23115,
289
+ "MX": 106779500,
290
+ "NI": 4838683,
291
+ "PA": 2686915,
292
+ "PE": 23297950,
293
+ "PH": 33846110,
294
+ "PR": 2774491,
295
+ "PT": 1030270,
296
+ "PY": 230134,
297
+ "RO": 2130290,
298
+ "SV": 5768179,
299
+ "SX": 4823,
300
+ "TT": 4110,
301
+ "US": 31933344,
302
+ "UY": 2981097,
303
+ "VE": 23488572
304
+ }
305
  },
306
  {
307
  "language_name": "Arabic",
 
315
  ],
316
  "bleu": 0.3046598747480405,
317
  "commonvoice_hours": 91.0,
318
+ "commonvoice_locale": "ar",
319
+ "population": {
320
+ "AE": 7793822,
321
+ "BH": 1309350,
322
+ "CA": 565412,
323
+ "CM": 108206,
324
+ "CY": 1267,
325
+ "DJ": 67292,
326
+ "DZ": 31799946,
327
+ "EG": 97876560,
328
+ "EH": 652271,
329
+ "ER": 297979,
330
+ "GB": 197283,
331
+ "IL": 1735096,
332
+ "IQ": 26433436,
333
+ "IR": 1698466,
334
+ "JO": 10820600,
335
+ "KE": 24623,
336
+ "KM": 558545,
337
+ "KW": 2993710,
338
+ "LB": 4703865,
339
+ "LY": 5099000,
340
+ "MA": 22048254,
341
+ "ML": 175981,
342
+ "MR": 3404658,
343
+ "NE": 47822,
344
+ "NG": 151960,
345
+ "OM": 3778520,
346
+ "PS": 4818260,
347
+ "QA": 2175311,
348
+ "SA": 34173500,
349
+ "SD": 27792576,
350
+ "SO": 3997414,
351
+ "SS": 2851524,
352
+ "SY": 15518720,
353
+ "TD": 2869158,
354
+ "TJ": 976,
355
+ "TN": 10549080,
356
+ "TR": 459298,
357
+ "YE": 22114456
358
+ }
359
  },
360
  {
361
  "language_name": "Urdu",
 
369
  ],
370
  "bleu": 0.331647033312127,
371
  "commonvoice_hours": 76.0,
372
+ "commonvoice_locale": "ur",
373
+ "population": {
374
+ "CA": 286475,
375
+ "GB": 2301638,
376
+ "IN": 66304500,
377
+ "MU": 71727,
378
+ "PK": 221825950
379
+ }
380
  },
381
  {
382
  "language_name": "French",
 
390
  ],
391
  "bleu": 0.3141809404018014,
392
  "commonvoice_hours": 1051.0,
393
+ "commonvoice_locale": "fr",
394
+ "population": {
395
+ "AD": 5775,
396
+ "AT": 974540,
397
+ "BE": 4453866,
398
+ "BF": 4583788,
399
+ "BI": 7000822,
400
+ "BJ": 4502610,
401
+ "BL": 6837,
402
+ "CA": 11308230,
403
+ "CD": 3867640,
404
+ "CF": 2935521,
405
+ "CG": 4446179,
406
+ "CH": 1764838,
407
+ "CI": 13465739,
408
+ "CM": 18866600,
409
+ "CY": 88668,
410
+ "DE": 14428746,
411
+ "DJ": 19358,
412
+ "DZ": 8594580,
413
+ "FR": 67169718,
414
+ "GA": 1405473,
415
+ "GB": 15125053,
416
+ "GF": 153622,
417
+ "GN": 3632946,
418
+ "GP": 407498,
419
+ "GQ": 73584,
420
+ "GR": 954639,
421
+ "HT": 520187,
422
+ "HU": 293155,
423
+ "IE": 880017,
424
+ "IT": 3931370,
425
+ "KM": 473917,
426
+ "LB": 20238,
427
+ "LU": 546691,
428
+ "MA": 7112340,
429
+ "MC": 38610,
430
+ "MF": 32556,
431
+ "MG": 18599433,
432
+ "ML": 8994564,
433
+ "MQ": 427408,
434
+ "MR": 680932,
435
+ "MT": 50299,
436
+ "MU": 41381,
437
+ "NC": 278409,
438
+ "NE": 6603996,
439
+ "NL": 5011316,
440
+ "PF": 180024,
441
+ "PM": 5133,
442
+ "PT": 1545405,
443
+ "RE": 700950,
444
+ "RO": 3621493,
445
+ "RW": 2288,
446
+ "SC": 57589,
447
+ "SN": 6137196,
448
+ "SY": 1144506,
449
+ "TD": 4388124,
450
+ "TF": 140,
451
+ "TG": 5251148,
452
+ "TN": 8673688,
453
+ "US": 1862778,
454
+ "VU": 149166,
455
+ "WF": 7610,
456
+ "YT": 110580
457
+ }
458
  },
459
  {
460
  "language_name": "Bangla",
 
468
  ],
469
  "bleu": 0.27472181972977344,
470
  "commonvoice_hours": 49.0,
471
+ "commonvoice_locale": "bn",
472
+ "population": {
473
+ "BD": 159397980,
474
+ "CA": 90466,
475
+ "GB": 263044,
476
+ "IN": 107413290,
477
+ "NP": 28508
478
+ }
479
  },
480
  {
481
  "language_name": "Portuguese",
 
509
  ],
510
  "bleu": 0.367787171884892,
511
  "commonvoice_hours": 176.0,
512
+ "commonvoice_locale": "pt",
513
+ "population": {
514
+ "AG": 1571,
515
+ "AO": 21789941,
516
+ "BR": 192661560,
517
+ "CA": 229934,
518
+ "CH": 285736,
519
+ "CV": 443274,
520
+ "FR": 882027,
521
+ "GB": 131522,
522
+ "GQ": 1,
523
+ "GW": 1927100,
524
+ "LU": 100541,
525
+ "MO": 30723,
526
+ "MZ": 8126514,
527
+ "PT": 9890592,
528
+ "ST": 179454,
529
+ "TL": 816395
530
+ }
531
  },
532
  {
533
  "language_name": "Punjabi",
 
561
  ],
562
  "bleu": 0.31594664710428266,
563
  "commonvoice_hours": 2.3,
564
+ "commonvoice_locale": "pa-IN",
565
+ "population": {
566
+ "CA": 603106,
567
+ "GB": 2367400,
568
+ "IN": 37130520,
569
+ "KE": 10170,
570
+ "PK": 163450700,
571
+ "SG": 9314
572
+ }
573
  },
574
  {
575
  "language_name": "Russian",
 
583
  ],
584
  "bleu": 0.2920291935463745,
585
  "commonvoice_hours": 241.0,
586
+ "commonvoice_locale": "ru",
587
+ "population": {
588
+ "BG": 1602387,
589
+ "BY": 1137350,
590
+ "CA": 211087,
591
+ "CN": 13940,
592
+ "DE": 4809582,
593
+ "EE": 688027,
594
+ "FI": 45131,
595
+ "GE": 359730,
596
+ "IL": 954303,
597
+ "KG": 2147364,
598
+ "KZ": 13746168,
599
+ "LT": 2185168,
600
+ "LV": 714867,
601
+ "MD": 100935,
602
+ "MN": 4118,
603
+ "PL": 6890814,
604
+ "RU": 133218680,
605
+ "SJ": 1200,
606
+ "TJ": 1064840,
607
+ "TM": 663436,
608
+ "UA": 20204534,
609
+ "US": 798334,
610
+ "UZ": 4279156
611
+ }
612
  },
613
  {
614
  "language_name": "Swahili",
 
642
  ],
643
  "bleu": 0.3018786362743097,
644
  "commonvoice_hours": 411.0,
645
+ "commonvoice_locale": "sw",
646
+ "population": {
647
+ "BI": 6408,
648
+ "CD": 50890000,
649
+ "KE": 35328414,
650
+ "MZ": 9330,
651
+ "SO": 235142,
652
+ "TZ": 52697520,
653
+ "UG": 32439750,
654
+ "YT": 2716,
655
+ "ZA": 1016
656
+ }
657
  },
658
  {
659
  "language_name": "Indonesian",
 
687
  ],
688
  "bleu": 0.31132422822400946,
689
  "commonvoice_hours": 33.0,
690
+ "commonvoice_locale": "id",
691
+ "population": {
692
+ "ID": 170896640,
693
+ "NL": 311047
694
+ }
695
  },
696
  {
697
  "language_name": "German",
 
725
  ],
726
  "bleu": 0.3992689214831344,
727
  "commonvoice_hours": 1357.0,
728
+ "commonvoice_locale": "de",
729
+ "population": {
730
+ "AT": 8593666,
731
+ "BE": 2578554,
732
+ "BG": 557352,
733
+ "BR": 1778414,
734
+ "CA": 294014,
735
+ "CH": 6134913,
736
+ "CZ": 1605375,
737
+ "DE": 72945327,
738
+ "DK": 2758623,
739
+ "FI": 1002901,
740
+ "FR": 3392410,
741
+ "GB": 5918499,
742
+ "GR": 530355,
743
+ "HU": 1758929,
744
+ "IT": 998443,
745
+ "KZ": 1221882,
746
+ "LI": 39137,
747
+ "LT": 382404,
748
+ "LU": 395880,
749
+ "NA": 23671,
750
+ "NL": 12269084,
751
+ "PL": 7273637,
752
+ "PY": 208559,
753
+ "RO": 44736,
754
+ "SI": 883126,
755
+ "SK": 1196932,
756
+ "US": 1563403
757
+ }
758
  },
759
  {
760
  "language_name": "Japanese",
 
768
  ],
769
  "bleu": 0.2954810072264808,
770
  "commonvoice_hours": 222.0,
771
+ "commonvoice_locale": "ja",
772
+ "population": {
773
+ "BR": 444604,
774
+ "CA": 52772,
775
+ "JP": 119231650
776
+ }
777
  },
778
  {
779
  "language_name": "Telugu",
 
787
  ],
788
  "bleu": 0.37949545228579734,
789
  "commonvoice_hours": 0.3,
790
+ "commonvoice_locale": "te",
791
+ "population": {
792
+ "IN": 95478480
793
+ }
794
  },
795
  {
796
  "language_name": "Marathi",
 
804
  ],
805
  "bleu": 0.2852384896861461,
806
  "commonvoice_hours": 20.0,
807
+ "commonvoice_locale": "mr",
808
+ "population": {
809
+ "IN": 92826300
810
+ }
811
  },
812
  {
813
  "language_name": "Javanese",
 
841
  ],
842
  "bleu": 0.2505244065073906,
843
  "commonvoice_hours": 0.0,
844
+ "commonvoice_locale": "jv",
845
+ "population": {
846
+ "ID": 90788840,
847
+ "MY": 391825
848
+ }
849
  },
850
  {
851
  "language_name": "Vietnamese",
 
859
  ],
860
  "bleu": 0.2956750563565745,
861
  "commonvoice_hours": 5.9,
862
+ "commonvoice_locale": "vi",
863
+ "population": {
864
+ "CA": 184701,
865
+ "CN": 6970,
866
+ "US": 1130973,
867
+ "VN": 84900318
868
+ }
869
  },
870
  {
871
  "language_name": "Tamil",
 
879
  ],
880
  "bleu": 0.27547489589987734,
881
  "commonvoice_hours": 234.0,
882
+ "commonvoice_locale": "ta",
883
+ "population": {
884
+ "CA": 184701,
885
+ "GB": 2104355,
886
+ "IN": 78239310,
887
+ "LK": 3433380,
888
+ "MU": 34484,
889
+ "MY": 1371388,
890
+ "RE": 118138,
891
+ "SG": 130403
892
+ }
893
  },
894
  {
895
  "language_name": "Persian",
 
903
  ],
904
  "bleu": 0.2858012364771329,
905
  "commonvoice_hours": 370.0,
906
+ "commonvoice_locale": "fa",
907
+ "population": {
908
+ "AE": 189850,
909
+ "AF": 18321900,
910
+ "CA": 245012,
911
+ "IQ": 338192,
912
+ "IR": 63692475,
913
+ "OM": 43849,
914
+ "PK": 1541107,
915
+ "QA": 268859,
916
+ "TJ": 69215
917
+ }
918
  },
919
  {
920
  "language_name": "Turkish",
 
948
  ],
949
  "bleu": 0.30402386618673855,
950
  "commonvoice_hours": 127.0,
951
+ "commonvoice_locale": "tr",
952
+ "population": {
953
+ "BG": 766359,
954
+ "CA": 37694,
955
+ "CY": 291336,
956
+ "DE": 2003992,
957
+ "GB": 131522,
958
+ "GR": 127285,
959
+ "MK": 74409,
960
+ "NL": 207365,
961
+ "RO": 27694,
962
+ "TR": 76276275,
963
+ "UA": 184476,
964
+ "UZ": 232297
965
+ }
966
  },
967
  {
968
  "language_name": "Cantonese",
 
996
  ],
997
  "bleu": 0.27975991005230577,
998
  "commonvoice_hours": 203.0,
999
+ "commonvoice_locale": "yue",
1000
+ "population": {
1001
+ "CA": 640800,
1002
+ "CN": 72489040,
1003
+ "HK": 6524919
1004
+ }
1005
  },
1006
  {
1007
  "language_name": "Korean",
 
1015
  ],
1016
  "bleu": 0.24501349273295708,
1017
  "commonvoice_hours": 1.7,
1018
+ "commonvoice_locale": "ko",
1019
+ "population": {
1020
+ "BR": 44460,
1021
+ "CA": 169623,
1022
+ "CN": 2091030,
1023
+ "JP": 652636,
1024
+ "KP": 22566280,
1025
+ "KR": 51835100,
1026
+ "US": 997917
1027
+ }
1028
  },
1029
  {
1030
  "language_name": "Italian",
 
1038
  ],
1039
  "bleu": 0.3273249067267197,
1040
  "commonvoice_hours": 362.0,
1041
+ "commonvoice_locale": "it",
1042
+ "population": {
1043
+ "AT": 797350,
1044
+ "AU": 483864,
1045
+ "BR": 592805,
1046
+ "CA": 343016,
1047
+ "CH": 361372,
1048
+ "DE": 5611179,
1049
+ "FR": 1153419,
1050
+ "GB": 131522,
1051
+ "HR": 67644,
1052
+ "IT": 59282565,
1053
+ "MT": 256070,
1054
+ "SI": 3995,
1055
+ "SM": 30466,
1056
+ "US": 1130973,
1057
+ "VA": 820
1058
+ }
1059
  },
1060
  {
1061
  "language_name": "Filipino",
 
1089
  ],
1090
  "bleu": 0.3353425581350746,
1091
  "commonvoice_hours": 0.0,
1092
+ "commonvoice_locale": "tl",
1093
+ "population": {
1094
+ "CA": 565412,
1095
+ "PH": 65508600,
1096
+ "US": 1397084
1097
+ }
1098
  },
1099
  {
1100
  "language_name": "Egyptian Arabic",
 
1107
  }
1108
  ],
1109
  "bleu": 0.23431638822117362,
1110
+ "commonvoice_hours": null,
1111
+ "commonvoice_locale": null,
1112
+ "population": {
1113
+ "EG": 66639360
1114
+ }
1115
  },
1116
  {
1117
  "language_name": "Gujarati",
 
1125
  ],
1126
  "bleu": 0.27834507803114356,
1127
  "commonvoice_hours": 0.0,
1128
+ "commonvoice_locale": "gu-IN",
1129
+ "population": {
1130
+ "CA": 135699,
1131
+ "GB": 1907072,
1132
+ "IN": 59674050,
1133
+ "KE": 4978
1134
+ }
1135
  }
1136
  ]
uv.lock CHANGED
@@ -930,6 +930,7 @@ dependencies = [
930
  { name = "gradio" },
931
  { name = "pandas" },
932
  { name = "plotly" },
 
933
  ]
934
 
935
  [package.dev-dependencies]
@@ -954,6 +955,7 @@ requires-dist = [
954
  { name = "gradio", specifier = ">=5.16.2" },
955
  { name = "pandas", specifier = ">=2.2.3" },
956
  { name = "plotly", specifier = ">=6.0.0" },
 
957
  ]
958
 
959
  [package.metadata.requires-dev]
@@ -1871,6 +1873,15 @@ wheels = [
1871
  { url = "https://files.pythonhosted.org/packages/92/a2/81c1dd744b322c0c548f793deb521bf23500806d754128ddf6f978736dff/pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b46591222c864e7da7faa3b19455196416cd8355ff6c2cc2e65726a760a3c420", size = 40006508 },
1872
  ]
1873
 
 
 
 
 
 
 
 
 
 
1874
  [[package]]
1875
  name = "pydantic"
1876
  version = "2.9.2"
 
930
  { name = "gradio" },
931
  { name = "pandas" },
932
  { name = "plotly" },
933
+ { name = "pycountry" },
934
  ]
935
 
936
  [package.dev-dependencies]
 
955
  { name = "gradio", specifier = ">=5.16.2" },
956
  { name = "pandas", specifier = ">=2.2.3" },
957
  { name = "plotly", specifier = ">=6.0.0" },
958
+ { name = "pycountry" },
959
  ]
960
 
961
  [package.metadata.requires-dev]
 
1873
  { url = "https://files.pythonhosted.org/packages/92/a2/81c1dd744b322c0c548f793deb521bf23500806d754128ddf6f978736dff/pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b46591222c864e7da7faa3b19455196416cd8355ff6c2cc2e65726a760a3c420", size = 40006508 },
1874
  ]
1875
 
1876
+ [[package]]
1877
+ name = "pycountry"
1878
+ version = "24.6.1"
1879
+ source = { registry = "https://pypi.org/simple" }
1880
+ sdist = { url = "https://files.pythonhosted.org/packages/76/57/c389fa68c50590881a75b7883eeb3dc15e9e73a0fdc001cdd45c13290c92/pycountry-24.6.1.tar.gz", hash = "sha256:b61b3faccea67f87d10c1f2b0fc0be714409e8fcdcc1315613174f6466c10221", size = 6043910 }
1881
+ wheels = [
1882
+ { url = "https://files.pythonhosted.org/packages/b1/ec/1fb891d8a2660716aadb2143235481d15ed1cbfe3ad669194690b0604492/pycountry-24.6.1-py3-none-any.whl", hash = "sha256:f1a4fb391cd7214f8eefd39556d740adcc233c778a27f8942c8dca351d6ce06f", size = 6335189 },
1883
+ ]
1884
+
1885
  [[package]]
1886
  name = "pydantic"
1887
  version = "2.9.2"