ai-language-proficiency-monitor

Running

App Files Files Community

David Pomerenke commited on Mar 16

Commit

11c32ae

1 Parent(s): c5278dd

Datasets table

Browse files

Files changed (8) hide show

data/datasets.json +387 -0
datasets.json +387 -0
evals/main.py +2 -1
frontend/public/results.json +440 -0
frontend/src/App.css +4 -0
frontend/src/App.js +7 -4
frontend/src/components/DatasetTable.js +72 -0
frontend/src/components/LanguageTable.js +5 -5

data/datasets.json ADDED Viewed

	@@ -0,0 +1,387 @@

+[
+    {
+        "name": "FLORES+",
+        "author": "Meta",
+        "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus",
+        "n_languages": 200,
+        "tasks": [
+            "translation",
+            "classification",
+            "language_modeling"
+        ],
+        "parallel": true,
+        "base": "FLORES",
+        "implemented": true
+    },
+    {
+        "name": "FLEURS",
+        "author": "Meta",
+        "url": "https://huggingface.co/datasets/google/fleurs",
+        "n_languages": 102,
+        "tasks": [
+            "speech_recognition"
+        ],
+        "parallel": true,
+        "base": "FLORES",
+        "implemented": true
+    },
+    {
+        "name": "CommonVoice",
+        "author": "Mozilla",
+        "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0",
+        "n_languages": 124,
+        "tasks": [
+            "speech_recognition"
+        ],
+        "parallel": null
+    },
+    {
+        "name": "MMMLU",
+        "author": "OpenAI",
+        "url": "https://huggingface.co/datasets/openai/MMMLU",
+        "n_languages": "14",
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "MMLU"
+    },
+    {
+        "name": "AfriMMLU",
+        "author": "Masakhane",
+        "url": "https://huggingface.co/datasets/masakhane/afrimmlu",
+        "n_languages": "17",
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "MMLU"
+    },
+    {
+        "name": "Okapi MMLU",
+        "author": "Okapi",
+        "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu",
+        "n_languages": 16,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "MMLU"
+    },
+    {
+        "name": "Global MMLU",
+        "author": "Cohere",
+        "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU",
+        "n_languages": 42,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "MMLU"
+    },
+    {
+        "name": "MGSM",
+        "author": "Google",
+        "url": "https://huggingface.co/datasets/juletxara/mgsm",
+        "n_languages": 10,
+        "tasks": [
+            "math"
+        ],
+        "parallel": true,
+        "base": "MGSM"
+    },
+    {
+        "name": "AfriMGSM",
+        "author": "Masakhane",
+        "url": "https://huggingface.co/datasets/masakhane/afrimgsm",
+        "n_languages": 18,
+        "tasks": [
+            "math"
+        ],
+        "parallel": true,
+        "base": "MGSM"
+    },
+    {
+        "name": "Okapi ARC Challenge",
+        "author": "Okapi",
+        "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge",
+        "n_languages": 31,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "AI2 ARC"
+    },
+    {
+        "name": "Uhuru ARC Easy",
+        "author": "Masakhane",
+        "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy",
+        "n_languages": 6,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "AI2 ARC"
+    },
+    {
+        "name": "Okapi TruthfulQA",
+        "author": "Okapi",
+        "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data",
+        "n_languages": 31,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "TruthfulQA"
+    },
+    {
+        "name": "Uhura TruthfulQA",
+        "author": "Masakhane",
+        "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa",
+        "n_languages": 6,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "TruthfulQA"
+    },
+    {
+        "name": "XNLI",
+        "author": "Meta",
+        "url": "https://huggingface.co/datasets/facebook/xnli",
+        "n_languages": 14,
+        "tasks": [
+            "classification"
+        ],
+        "parallel": true,
+        "base": "XNLI"
+    },
+    {
+        "name": "AfriXNLI",
+        "author": "Masakhane",
+        "url": "https://huggingface.co/datasets/masakhane/afrixnli",
+        "n_languages": 18,
+        "tasks": [
+            "classification"
+        ],
+        "parallel": true,
+        "base": "XNLI"
+    },
+    {
+        "name": "Okapi HellaSwag",
+        "author": "Okapi",
+        "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag",
+        "n_languages": 31,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "HellaSwag"
+    },
+    {
+        "name": "WikiANN / PAN-X",
+        "author": "Academic",
+        "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
+        "n_languages": 176,
+        "tasks": [
+            "ner"
+        ],
+        "parallel": false
+    },
+    {
+        "name": "MSVAMP",
+        "author": "Microsoft",
+        "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
+        "n_languages": 10,
+        "tasks": [
+            "math"
+        ],
+        "parallel": true
+    },
+    {
+        "name": "XLSUM",
+        "author": "Academic",
+        "url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
+        "n_languages": 45,
+        "tasks": [
+            "summarization"
+        ],
+        "parallel": true
+    },
+    {
+        "name": "SEA-IFEVAL",
+        "author": "AI Singapore",
+        "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
+        "n_languages": 7,
+        "tasks": [
+            "instruction_following"
+        ],
+        "parallel": true,
+        "base": "IFEVAL"
+    },
+    {
+        "name": "XTREME",
+        "author": "Google",
+        "url": "https://huggingface.co/datasets/google/xtreme",
+        "n_languages": 40,
+        "tasks": [
+            "translation",
+            "classification",
+            "question_answering",
+            "ner"
+        ],
+        "parallel": null
+    },
+    {
+        "name": "XGLUE",
+        "author": "Microsoft",
+        "url": "https://huggingface.co/datasets/microsoft/xglue",
+        "n_languages": 18,
+        "tasks": [
+            "pos"
+        ],
+        "parallel": null,
+        "base": "GLUE"
+    },
+    {
+        "name": "IndicGLUE",
+        "author": "AI4Bharat",
+        "url": "https://huggingface.co/datasets/ai4bharat/indic_glue",
+        "n_languages": 11,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": null,
+        "base": "GLUE"
+    },
+    {
+        "name": "Opus Gnome",
+        "author": "Helsinki NLP",
+        "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
+        "n_languages": 187,
+        "tasks": [
+            "translation"
+        ],
+        "parallel": true
+    },
+    {
+        "name": "Opus Paracrawl",
+        "author": "Helsinki NLP",
+        "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
+        "n_languages": 43,
+        "tasks": [
+            "translation"
+        ],
+        "parallel": false
+    },
+    {
+        "name": "CCAligned",
+        "author": "Meta",
+        "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
+        "n_languages": 137,
+        "tasks": [
+            "translation"
+        ],
+        "parallel": false
+    },
+    {
+        "name": "OPUS Collection",
+        "author": "Helsinki NLP",
+        "url": "https://opus.nlpl.eu/",
+        "n_languages": 747,
+        "tasks": [
+            "translation"
+        ],
+        "parallel": false
+    },
+    {
+        "name": "MasakhaNER",
+        "author": "Masakhane",
+        "url": "https://huggingface.co/datasets/masakhane/masakhaner",
+        "n_languages": 10,
+        "tasks": [
+            "ner"
+        ],
+        "parallel": null
+    },
+    {
+        "name": "Multilingual Sentiments",
+        "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments",
+        "n_languages": 12,
+        "tasks": [
+            "sentiment_analysis"
+        ],
+        "parallel": null
+    },
+    {
+        "name": "CulturaX",
+        "author": "Academic",
+        "url": "https://huggingface.co/datasets/uonlp/CulturaX",
+        "n_languages": 167,
+        "tasks": [
+            "language_modeling"
+        ],
+        "parallel": false
+    },
+    {
+        "name": "Tülu 3 SFT Mixture",
+        "author": "AllenAI",
+        "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture",
+        "n_languages": 70,
+        "tasks": [
+            "instruction_following"
+        ],
+        "parallel": false
+    },
+    {
+        "name": "xP3",
+        "author": "BigScience",
+        "url": "https://huggingface.co/datasets/bigscience/xP3",
+        "n_languages": 46,
+        "tasks": [
+            "instruction_following"
+        ],
+        "parallel": false
+    },
+    {
+        "name": "Aya",
+        "author": "Cohere",
+        "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset",
+        "n_languages": 65,
+        "tasks": [
+            "instruction_following"
+        ],
+        "parallel": null
+    },
+    {
+        "name": "Lanfrica",
+        "author": "Lanfrica",
+        "url": "https://lanfrica.com/records?language=yor&task=machine%20translation",
+        "n_languages": 2200,
+        "tasks": [
+            "datasets"
+        ],
+        "parallel": null
+    },
+    {
+        "name": "HuggingFace Languages",
+        "author": "HuggingFace",
+        "url": "https://huggingface.co/languages",
+        "n_languages": 4680,
+        "tasks": [
+            "datasets",
+            "models"
+        ],
+        "parallel": null
+    },
+    {
+        "name": "HuggingFace Multilingual Datasets",
+        "author": "HuggingFace",
+        "url": "https://huggingface.co/datasets?other=multilinguality:multilingual",
+        "n_languages": null,
+        "tasks": [
+            "datasets"
+        ],
+        "parallel": false
+    }
+]

datasets.json ADDED Viewed

	@@ -0,0 +1,387 @@

+[
+  {
+    "name": "FLORES+",
+    "author": "Meta",
+    "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus",
+    "n_languages": 185,
+    "tasks": [
+      "translation",
+      "classification",
+      "language_modeling"
+    ],
+    "parallel": true,
+    "base": "FLORES",
+    "implemented": true
+  },
+  {
+    "name": "FLEURS",
+    "author": "Meta",
+    "url": "https://huggingface.co/datasets/google/fleurs",
+    "n_languages": 102,
+    "tasks": [
+      "speech_recognition"
+    ],
+    "parallel": true,
+    "base": "FLORES",
+    "implemented": true
+  },
+  {
+    "name": "CommonVoice",
+    "author": "Mozilla",
+    "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_1_0",
+    "n_languages": 231,
+    "tasks": [
+      "speech_recognition"
+    ],
+    "parallel": null
+  },
+  {
+    "name": "MMMLU",
+    "author": "OpenAI",
+    "url": "https://huggingface.co/datasets/openai/MMMLU",
+    "n_languages": "14",
+    "tasks": [
+      "question_answering"
+    ],
+    "parallel": true,
+    "base": "MMLU"
+  },
+  {
+    "name": "AfriMMLU",
+    "author": "Masakhane",
+    "url": "https://huggingface.co/datasets/masakhane/afrimmlu",
+    "n_languages": "17",
+    "tasks": [
+      "question_answering"
+    ],
+    "parallel": true,
+    "base": "MMLU"
+  },
+  {
+    "name": "Okapi MMLU",
+    "author": "Okapi",
+    "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu",
+    "n_languages": 16,
+    "tasks": [
+      "question_answering"
+    ],
+    "parallel": true,
+    "base": "MMLU"
+  },
+  {
+    "name": "Global MMLU",
+    "author": "Cohere",
+    "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU",
+    "n_languages": 42,
+    "tasks": [
+      "question_answering"
+    ],
+    "parallel": true,
+    "base": "MMLU"
+  },
+  {
+    "name": "MGSM",
+    "author": "Google",
+    "url": "https://huggingface.co/datasets/juletxara/mgsm",
+    "n_languages": 10,
+    "tasks": [
+      "math"
+    ],
+    "parallel": true,
+    "base": "MGSM"
+  },
+  {
+    "name": "AfriMGSM",
+    "author": "Masakhane",
+    "url": "https://huggingface.co/datasets/masakhane/afrimgsm",
+    "n_languages": 18,
+    "tasks": [
+      "math"
+    ],
+    "parallel": true,
+    "base": "MGSM"
+  },
+  {
+    "name": "Okapi ARC Challenge",
+    "author": "Okapi",
+    "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge",
+    "n_languages": 31,
+    "tasks": [
+      "question_answering"
+    ],
+    "parallel": true,
+    "base": "AI2 ARC"
+  },
+  {
+    "name": "Uhuru ARC Easy",
+    "author": "Masakhane",
+    "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy",
+    "n_languages": 6,
+    "tasks": [
+      "question_answering"
+    ],
+    "parallel": true,
+    "base": "AI2 ARC"
+  },
+  {
+    "name": "Okapi TruthfulQA",
+    "author": "Okapi",
+    "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data",
+    "n_languages": 31,
+    "tasks": [
+      "question_answering"
+    ],
+    "parallel": true,
+    "base": "TruthfulQA"
+  },
+  {
+    "name": "Uhura TruthfulQA",
+    "author": "Masakhane",
+    "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa",
+    "n_languages": 6,
+    "tasks": [
+      "question_answering"
+    ],
+    "parallel": true,
+    "base": "TruthfulQA"
+  },
+  {
+    "name": "XNLI",
+    "author": "Meta",
+    "url": "https://huggingface.co/datasets/facebook/xnli",
+    "n_languages": 14,
+    "tasks": [
+      "classification"
+    ],
+    "parallel": true,
+    "base": "XNLI"
+  },
+  {
+    "name": "AfriXNLI",
+    "author": "Masakhane",
+    "url": "https://huggingface.co/datasets/masakhane/afrixnli",
+    "n_languages": 18,
+    "tasks": [
+      "classification"
+    ],
+    "parallel": true,
+    "base": "XNLI"
+  },
+  {
+    "name": "Okapi HellaSwag",
+    "author": "Okapi",
+    "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag",
+    "n_languages": 31,
+    "tasks": [
+      "question_answering"
+    ],
+    "parallel": true,
+    "base": "HellaSwag"
+  },
+  {
+    "name": "WikiANN / PAN-X",
+    "author": "Academic",
+    "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
+    "n_languages": 176,
+    "tasks": [
+      "ner"
+    ],
+    "parallel": false
+  },
+  {
+    "name": "MSVAMP",
+    "author": "Microsoft",
+    "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
+    "n_languages": 10,
+    "tasks": [
+      "math"
+    ],
+    "parallel": true
+  },
+  {
+    "name": "XLSUM",
+    "author": "Academic",
+    "url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
+    "n_languages": 45,
+    "tasks": [
+      "summarization"
+    ],
+    "parallel": true
+  },
+  {
+    "name": "SEA-IFEVAL",
+    "author": "AI Singapore",
+    "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
+    "n_languages": 7,
+    "tasks": [
+      "instruction_following"
+    ],
+    "parallel": true,
+    "base": "IFEVAL"
+  },
+  {
+    "name": "XTREME",
+    "author": "Google",
+    "url": "https://huggingface.co/datasets/google/xtreme",
+    "n_languages": 40,
+    "tasks": [
+      "translation",
+      "classification",
+      "question_answering",
+      "ner"
+    ],
+    "parallel": null
+  },
+  {
+    "name": "XGLUE",
+    "author": "Microsoft",
+    "url": "https://huggingface.co/datasets/microsoft/xglue",
+    "n_languages": 18,
+    "tasks": [
+      "pos"
+    ],
+    "parallel": null,
+    "base": "GLUE"
+  },
+  {
+    "name": "IndicGLUE",
+    "author": "AI4Bharat",
+    "url": "https://huggingface.co/datasets/ai4bharat/indic_glue",
+    "n_languages": 11,
+    "tasks": [
+      "question_answering"
+    ],
+    "parallel": null,
+    "base": "GLUE"
+  },
+  {
+    "name": "Opus Gnome",
+    "author": "Helsinki NLP",
+    "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
+    "n_languages": 187,
+    "tasks": [
+      "translation"
+    ],
+    "parallel": true
+  },
+  {
+    "name": "Opus Paracrawl",
+    "author": "Helsinki NLP",
+    "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
+    "n_languages": 43,
+    "tasks": [
+      "translation"
+    ],
+    "parallel": false
+  },
+  {
+    "name": "CCAligned",
+    "author": "Meta",
+    "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
+    "n_languages": 137,
+    "tasks": [
+      "translation"
+    ],
+    "parallel": false
+  },
+  {
+    "name": "OPUS Collection",
+    "author": "Helsinki NLP",
+    "url": "https://opus.nlpl.eu/",
+    "n_languages": 747,
+    "tasks": [
+      "translation"
+    ],
+    "parallel": false
+  },
+  {
+    "name": "MasakhaNER",
+    "author": "Masakhane",
+    "url": "https://huggingface.co/datasets/masakhane/masakhaner",
+    "n_languages": 10,
+    "tasks": [
+      "ner"
+    ],
+    "parallel": null
+  },
+  {
+    "name": "Multilingual Sentiments",
+    "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments",
+    "n_languages": 12,
+    "tasks": [
+      "sentiment_analysis"
+    ],
+    "parallel": null
+  },
+  {
+    "name": "CulturaX",
+    "author": "Academic",
+    "url": "https://huggingface.co/datasets/uonlp/CulturaX",
+    "n_languages": 167,
+    "tasks": [
+      "language_modeling"
+    ],
+    "parallel": false
+  },
+  {
+    "name": "T\u00fclu 3 SFT Mixture",
+    "author": "AllenAI",
+    "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture",
+    "n_languages": 70,
+    "tasks": [
+      "instruction_following"
+    ],
+    "parallel": false
+  },
+  {
+    "name": "xP3",
+    "author": "BigScience",
+    "url": "https://huggingface.co/datasets/bigscience/xP3",
+    "n_languages": 46,
+    "tasks": [
+      "instruction_following"
+    ],
+    "parallel": false
+  },
+  {
+    "name": "Aya",
+    "author": "Cohere",
+    "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset",
+    "n_languages": 65,
+    "tasks": [
+      "instruction_following"
+    ],
+    "parallel": null
+  },
+  {
+    "name": "Lanfrica",
+    "author": "Lanfrica",
+    "url": "https://lanfrica.com/records?language=yor&task=machine%20translation",
+    "n_languages": 2200,
+    "tasks": [
+      "datasets"
+    ],
+    "parallel": null
+  },
+  {
+    "name": "HuggingFace Languages",
+    "author": "HuggingFace",
+    "url": "https://huggingface.co/languages",
+    "n_languages": 4680,
+    "tasks": [
+      "datasets",
+      "models"
+    ],
+    "parallel": null
+  },
+  {
+    "name": "HuggingFace Multilingual Datasets",
+    "author": "HuggingFace",
+    "url": "https://huggingface.co/datasets?other=multilinguality:multilingual",
+    "n_languages": null,
+    "tasks": [
+      "datasets"
+    ],
+    "parallel": false
+  }
+]

evals/main.py CHANGED Viewed

@@ -108,7 +108,6 @@ def make_language_table(df):
     df = df[["language_name", "speakers", "family", "average", "in_benchmark", *task_metrics]]
     return df
 async def main():
     results = await evaluate()
     results, lang_results, model_results, task_results = aggregate(results)
@@ -121,9 +120,11 @@ async def main():
     with open("results.json", "w") as f:
         json.dump(all_results, f, indent=2, ensure_ascii=False)
     all_tables = {
         "model_table": serialize(make_model_table(model_results)),
         "language_table": serialize(make_language_table(lang_results)),
     }
     with open("frontend/public/results.json", "w") as f:
         json.dump(all_tables, f, indent=2, ensure_ascii=False)

     df = df[["language_name", "speakers", "family", "average", "in_benchmark", *task_metrics]]
     return df
 async def main():
     results = await evaluate()
     results, lang_results, model_results, task_results = aggregate(results)
     with open("results.json", "w") as f:
         json.dump(all_results, f, indent=2, ensure_ascii=False)
+    datasets_df = pd.read_json("data/datasets.json")
     all_tables = {
         "model_table": serialize(make_model_table(model_results)),
         "language_table": serialize(make_language_table(lang_results)),
+        "dataset_table": serialize(datasets_df),
     }
     with open("frontend/public/results.json", "w") as f:
         json.dump(all_tables, f, indent=2, ensure_ascii=False)

frontend/public/results.json CHANGED Viewed

@@ -8318,5 +8318,445 @@
       "translation_bleu": 0.0,
       "translation_chrf": 0.0
     }
   ]
 }

       "translation_bleu": 0.0,
       "translation_chrf": 0.0
     }
+  ],
+  "dataset_table": [
+    {
+      "name": "FLORES+",
+      "author": "Meta",
+      "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus",
+      "n_languages": 200.0,
+      "tasks": [
+        "translation",
+        "classification",
+        "language_modeling"
+      ],
+      "parallel": 1.0,
+      "base": "FLORES",
+      "implemented": 1.0
+    },
+    {
+      "name": "FLEURS",
+      "author": "Meta",
+      "url": "https://huggingface.co/datasets/google/fleurs",
+      "n_languages": 102.0,
+      "tasks": [
+        "speech_recognition"
+      ],
+      "parallel": 1.0,
+      "base": "FLORES",
+      "implemented": 1.0
+    },
+    {
+      "name": "CommonVoice",
+      "author": "Mozilla",
+      "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0",
+      "n_languages": 124.0,
+      "tasks": [
+        "speech_recognition"
+      ],
+      "parallel": null,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "MMMLU",
+      "author": "OpenAI",
+      "url": "https://huggingface.co/datasets/openai/MMMLU",
+      "n_languages": 14.0,
+      "tasks": [
+        "question_answering"
+      ],
+      "parallel": 1.0,
+      "base": "MMLU",
+      "implemented": null
+    },
+    {
+      "name": "AfriMMLU",
+      "author": "Masakhane",
+      "url": "https://huggingface.co/datasets/masakhane/afrimmlu",
+      "n_languages": 17.0,
+      "tasks": [
+        "question_answering"
+      ],
+      "parallel": 1.0,
+      "base": "MMLU",
+      "implemented": null
+    },
+    {
+      "name": "Okapi MMLU",
+      "author": "Okapi",
+      "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu",
+      "n_languages": 16.0,
+      "tasks": [
+        "question_answering"
+      ],
+      "parallel": 1.0,
+      "base": "MMLU",
+      "implemented": null
+    },
+    {
+      "name": "Global MMLU",
+      "author": "Cohere",
+      "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU",
+      "n_languages": 42.0,
+      "tasks": [
+        "question_answering"
+      ],
+      "parallel": 1.0,
+      "base": "MMLU",
+      "implemented": null
+    },
+    {
+      "name": "MGSM",
+      "author": "Google",
+      "url": "https://huggingface.co/datasets/juletxara/mgsm",
+      "n_languages": 10.0,
+      "tasks": [
+        "math"
+      ],
+      "parallel": 1.0,
+      "base": "MGSM",
+      "implemented": null
+    },
+    {
+      "name": "AfriMGSM",
+      "author": "Masakhane",
+      "url": "https://huggingface.co/datasets/masakhane/afrimgsm",
+      "n_languages": 18.0,
+      "tasks": [
+        "math"
+      ],
+      "parallel": 1.0,
+      "base": "MGSM",
+      "implemented": null
+    },
+    {
+      "name": "Okapi ARC Challenge",
+      "author": "Okapi",
+      "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge",
+      "n_languages": 31.0,
+      "tasks": [
+        "question_answering"
+      ],
+      "parallel": 1.0,
+      "base": "AI2 ARC",
+      "implemented": null
+    },
+    {
+      "name": "Uhuru ARC Easy",
+      "author": "Masakhane",
+      "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy",
+      "n_languages": 6.0,
+      "tasks": [
+        "question_answering"
+      ],
+      "parallel": 1.0,
+      "base": "AI2 ARC",
+      "implemented": null
+    },
+    {
+      "name": "Okapi TruthfulQA",
+      "author": "Okapi",
+      "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data",
+      "n_languages": 31.0,
+      "tasks": [
+        "question_answering"
+      ],
+      "parallel": 1.0,
+      "base": "TruthfulQA",
+      "implemented": null
+    },
+    {
+      "name": "Uhura TruthfulQA",
+      "author": "Masakhane",
+      "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa",
+      "n_languages": 6.0,
+      "tasks": [
+        "question_answering"
+      ],
+      "parallel": 1.0,
+      "base": "TruthfulQA",
+      "implemented": null
+    },
+    {
+      "name": "XNLI",
+      "author": "Meta",
+      "url": "https://huggingface.co/datasets/facebook/xnli",
+      "n_languages": 14.0,
+      "tasks": [
+        "classification"
+      ],
+      "parallel": 1.0,
+      "base": "XNLI",
+      "implemented": null
+    },
+    {
+      "name": "AfriXNLI",
+      "author": "Masakhane",
+      "url": "https://huggingface.co/datasets/masakhane/afrixnli",
+      "n_languages": 18.0,
+      "tasks": [
+        "classification"
+      ],
+      "parallel": 1.0,
+      "base": "XNLI",
+      "implemented": null
+    },
+    {
+      "name": "Okapi HellaSwag",
+      "author": "Okapi",
+      "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag",
+      "n_languages": 31.0,
+      "tasks": [
+        "question_answering"
+      ],
+      "parallel": 1.0,
+      "base": "HellaSwag",
+      "implemented": null
+    },
+    {
+      "name": "WikiANN / PAN-X",
+      "author": "Academic",
+      "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
+      "n_languages": 176.0,
+      "tasks": [
+        "ner"
+      ],
+      "parallel": 0.0,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "MSVAMP",
+      "author": "Microsoft",
+      "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
+      "n_languages": 10.0,
+      "tasks": [
+        "math"
+      ],
+      "parallel": 1.0,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "XLSUM",
+      "author": "Academic",
+      "url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
+      "n_languages": 45.0,
+      "tasks": [
+        "summarization"
+      ],
+      "parallel": 1.0,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "SEA-IFEVAL",
+      "author": "AI Singapore",
+      "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
+      "n_languages": 7.0,
+      "tasks": [
+        "instruction_following"
+      ],
+      "parallel": 1.0,
+      "base": "IFEVAL",
+      "implemented": null
+    },
+    {
+      "name": "XTREME",
+      "author": "Google",
+      "url": "https://huggingface.co/datasets/google/xtreme",
+      "n_languages": 40.0,
+      "tasks": [
+        "translation",
+        "classification",
+        "question_answering",
+        "ner"
+      ],
+      "parallel": null,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "XGLUE",
+      "author": "Microsoft",
+      "url": "https://huggingface.co/datasets/microsoft/xglue",
+      "n_languages": 18.0,
+      "tasks": [
+        "pos"
+      ],
+      "parallel": null,
+      "base": "GLUE",
+      "implemented": null
+    },
+    {
+      "name": "IndicGLUE",
+      "author": "AI4Bharat",
+      "url": "https://huggingface.co/datasets/ai4bharat/indic_glue",
+      "n_languages": 11.0,
+      "tasks": [
+        "question_answering"
+      ],
+      "parallel": null,
+      "base": "GLUE",
+      "implemented": null
+    },
+    {
+      "name": "Opus Gnome",
+      "author": "Helsinki NLP",
+      "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
+      "n_languages": 187.0,
+      "tasks": [
+        "translation"
+      ],
+      "parallel": 1.0,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "Opus Paracrawl",
+      "author": "Helsinki NLP",
+      "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
+      "n_languages": 43.0,
+      "tasks": [
+        "translation"
+      ],
+      "parallel": 0.0,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "CCAligned",
+      "author": "Meta",
+      "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
+      "n_languages": 137.0,
+      "tasks": [
+        "translation"
+      ],
+      "parallel": 0.0,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "OPUS Collection",
+      "author": "Helsinki NLP",
+      "url": "https://opus.nlpl.eu/",
+      "n_languages": 747.0,
+      "tasks": [
+        "translation"
+      ],
+      "parallel": 0.0,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "MasakhaNER",
+      "author": "Masakhane",
+      "url": "https://huggingface.co/datasets/masakhane/masakhaner",
+      "n_languages": 10.0,
+      "tasks": [
+        "ner"
+      ],
+      "parallel": null,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "Multilingual Sentiments",
+      "author": null,
+      "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments",
+      "n_languages": 12.0,
+      "tasks": [
+        "sentiment_analysis"
+      ],
+      "parallel": null,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "CulturaX",
+      "author": "Academic",
+      "url": "https://huggingface.co/datasets/uonlp/CulturaX",
+      "n_languages": 167.0,
+      "tasks": [
+        "language_modeling"
+      ],
+      "parallel": 0.0,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "Tülu 3 SFT Mixture",
+      "author": "AllenAI",
+      "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture",
+      "n_languages": 70.0,
+      "tasks": [
+        "instruction_following"
+      ],
+      "parallel": 0.0,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "xP3",
+      "author": "BigScience",
+      "url": "https://huggingface.co/datasets/bigscience/xP3",
+      "n_languages": 46.0,
+      "tasks": [
+        "instruction_following"
+      ],
+      "parallel": 0.0,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "Aya",
+      "author": "Cohere",
+      "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset",
+      "n_languages": 65.0,
+      "tasks": [
+        "instruction_following"
+      ],
+      "parallel": null,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "Lanfrica",
+      "author": "Lanfrica",
+      "url": "https://lanfrica.com/records?language=yor&task=machine%20translation",
+      "n_languages": 2200.0,
+      "tasks": [
+        "datasets"
+      ],
+      "parallel": null,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "HuggingFace Languages",
+      "author": "HuggingFace",
+      "url": "https://huggingface.co/languages",
+      "n_languages": 4680.0,
+      "tasks": [
+        "datasets",
+        "models"
+      ],
+      "parallel": null,
+      "base": null,
+      "implemented": null
+    },
+    {
+      "name": "HuggingFace Multilingual Datasets",
+      "author": "HuggingFace",
+      "url": "https://huggingface.co/datasets?other=multilinguality:multilingual",
+      "n_languages": null,
+      "tasks": [
+        "datasets"
+      ],
+      "parallel": 0.0,
+      "base": null,
+      "implemented": null
+    }
   ]
 }

frontend/src/App.css CHANGED Viewed

@@ -37,3 +37,7 @@ p {
   color: #555;
   margin-top: 0;
 }

   color: #555;
   margin-top: 0;
 }
+* {
+  font-size: 10pt;
+}

frontend/src/App.js CHANGED Viewed

@@ -4,7 +4,7 @@ import { PrimeReactProvider } from 'primereact/api'
 import 'primereact/resources/themes/lara-light-cyan/theme.css'
 import ModelTable from './components/ModelTable'
 import LanguageTable from './components/LanguageTable'
 function App () {
   const [data, setData] = useState(null)
   const [loading, setLoading] = useState(true)
@@ -48,9 +48,12 @@ function App () {
             {loading && <p>...</p>}
             {error && <p>Error: {error}</p>}
             {data && (
-              <div style={{ display: 'flex', flexDirection: 'row', gap: '2rem' }}>
-                <ModelTable data={data} />
-                <LanguageTable data={data} />
               </div>
             )}
           </PrimeReactProvider>

 import 'primereact/resources/themes/lara-light-cyan/theme.css'
 import ModelTable from './components/ModelTable'
 import LanguageTable from './components/LanguageTable'
+import DatasetTable from './components/DatasetTable'
 function App () {
   const [data, setData] = useState(null)
   const [loading, setLoading] = useState(true)
             {loading && <p>...</p>}
             {error && <p>Error: {error}</p>}
             {data && (
+              <div style={{ display: 'flex', flexDirection: 'column', gap: '2rem', alignItems: 'center', width: '100%' }}>
+                <div style={{ display: 'flex', flexDirection: 'row', gap: '2rem' }}>
+                  <ModelTable data={data} />
+                  <LanguageTable data={data} />
+                </div>
+                <DatasetTable data={data} />
               </div>
             )}
           </PrimeReactProvider>

frontend/src/components/DatasetTable.js ADDED Viewed

	@@ -0,0 +1,72 @@

+import { DataTable } from 'primereact/datatable'
+import { Column } from 'primereact/column'
+import { FilterMatchMode } from 'primereact/api'
+import { useState } from 'react'
+const DatasetTable = ({ data }) => {
+  const [filters, setFilters] = useState({
+    name: { value: null, matchMode: FilterMatchMode.CONTAINS },
+    author: { value: null, matchMode: FilterMatchMode.IN },
+    n_languages: { value: null, matchMode: FilterMatchMode.BETWEEN },
+    tasks: { value: null, matchMode: FilterMatchMode.IN },
+    parallel: { value: null, matchMode: FilterMatchMode.EQUALS },
+    base: { value: null, matchMode: FilterMatchMode.IN },
+    implemented: { value: null, matchMode: FilterMatchMode.EQUALS },
+  })
+  const table = data.dataset_table
+  const nameBodyTemplate = rowData => {
+    return <div style={{ fontWeight: 'bold' }}>{rowData.name}</div>
+  }
+  return (
+    <DataTable
+      value={table}
+      header={<>Datasets</>}
+      removableSort
+      filters={filters}
+      filterDisplay='menu'
+      scrollable
+      scrollHeight='500px'
+      style={{ minWidth: '200px', width: "50%" }}
+    >
+      {/* <Column
+        field='implemented'
+        header='Implemented'
+        filter
+        style={{ minWidth: '5rem' }}
+      /> */}
+      <Column
+        field='author'
+        header='Author'
+        filter
+        showFilterMatchModes={false}
+        style={{ minWidth: '5rem' }}
+      />
+      <Column
+        field='name'
+        header='Name'
+        body={nameBodyTemplate}
+        filter
+        style={{ minWidth: '5rem' }}
+        frozen
+      />
+      <Column
+        field='tasks'
+        header='Tasks'
+        filter
+        style={{ minWidth: '5rem', maxWidth: '10rem' }}
+      />
+      <Column
+        field='n_languages'
+        header='#Languages'
+        filter
+        sortable
+        style={{ minWidth: '10rem' }}
+      />
+    </DataTable>
+  )
+}
+export default DatasetTable

frontend/src/components/LanguageTable.js CHANGED Viewed

@@ -157,7 +157,7 @@ const LanguageTable = ({ data }) => {
         field='average'
         header='Average'
         sortable
-        body={scoreBodyTemplate('average', { minScore: 0.4, maxScore: 0.8 })}
         style={{ minWidth: '5rem', maxWidth: '10rem' }}
       />
       <Column
@@ -165,8 +165,8 @@ const LanguageTable = ({ data }) => {
         header='Translation'
         sortable
         body={scoreBodyTemplate('translation_chrf', {
-          minScore: 0.4,
-          maxScore: 0.7
         })}
         style={{ minWidth: '5rem', maxWidth: '10rem' }}
       />
@@ -175,8 +175,8 @@ const LanguageTable = ({ data }) => {
         header='Classification'
         sortable
         body={scoreBodyTemplate('classification_accuracy', {
-          minScore: 0.4,
-          maxScore: 1
         })}
         style={{ minWidth: '5rem', maxWidth: '10rem' }}
       />

         field='average'
         header='Average'
         sortable
+        body={scoreBodyTemplate('average', { minScore: 0.2, maxScore: 0.5 })}
         style={{ minWidth: '5rem', maxWidth: '10rem' }}
       />
       <Column
         header='Translation'
         sortable
         body={scoreBodyTemplate('translation_chrf', {
+          minScore: 0.3,
+          maxScore: 0.6
         })}
         style={{ minWidth: '5rem', maxWidth: '10rem' }}
       />
         header='Classification'
         sortable
         body={scoreBodyTemplate('classification_accuracy', {
+          minScore: 0.3,
+          maxScore: 0.7
         })}
         style={{ minWidth: '5rem', maxWidth: '10rem' }}
       />