Spaces:

fair-forward
/

evals-for-every-language

Running

App Files Files Community

David Pomerenke commited on Apr 6

Commit

9051509

1 Parent(s): 51cb38c

Dataset table grouping

Browse files

Files changed (3) hide show

datasets.json +525 -482
frontend/src/components/DatasetTable.js +5 -0
frontend/src/components/LanguageTable.js +1 -1

datasets.json CHANGED Viewed

@@ -1,484 +1,527 @@
 [
-  {
-      "name": "FLORES+",
-      "author": "Meta",
-      "author_url": "https://ai.meta.com",
-      "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus",
-      "n_languages": 200,
-      "tasks": [
-          "translation",
-          "classification",
-          "language_modeling"
-      ],
-      "parallel": true,
-      "base": "FLORES",
-      "implemented": true
-  },
-  {
-      "name": "FLEURS",
-      "author": "Meta",
-      "author_url": "https://ai.meta.com",
-      "url": "https://huggingface.co/datasets/google/fleurs",
-      "n_languages": 102,
-      "tasks": [
-          "speech_recognition"
-      ],
-      "parallel": true,
-      "base": "FLORES",
-      "implemented": true
-  },
-  {
-      "name": "CommonVoice",
-      "author": "Mozilla",
-      "author_url": "https://blog.mozilla.ai",
-      "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0",
-      "n_languages": 124,
-      "tasks": [
-          "speech_recognition"
-      ],
-      "parallel": null
-  },
-  {
-      "name": "MMMLU",
-      "author": "OpenAI",
-      "author_url": "https://openai.com",
-      "url": "https://huggingface.co/datasets/openai/MMMLU",
-      "n_languages": "14",
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": true,
-      "base": "MMLU"
-  },
-  {
-      "name": "AfriMMLU",
-      "author": "Masakhane",
-      "author_url": "https://www.masakhane.io",
-      "url": "https://huggingface.co/datasets/masakhane/afrimmlu",
-      "n_languages": "17",
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": true,
-      "base": "MMLU"
-  },
-  {
-      "name": "Okapi MMLU",
-      "author": "Academic",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu",
-      "n_languages": 16,
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": true,
-      "base": "MMLU"
-  },
-  {
-      "name": "MMLU-X",
-      "author": "OpenGPT-X",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/openGPT-X/mmlux",
-      "n_languages": 20,
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": true,
-      "base": "MMLU"
-  },
-  {
-      "name": "Global MMLU",
-      "author": "Cohere",
-      "author_url": "https://cohere.com",
-      "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU",
-      "n_languages": 42,
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": true,
-      "base": "MMLU"
-  },
-  {
-      "name": "MGSM",
-      "author": "Google",
-      "author_url": "https://google.com",
-      "url": "https://huggingface.co/datasets/juletxara/mgsm",
-      "n_languages": 10,
-      "tasks": [
-          "math"
-      ],
-      "parallel": true,
-      "base": "MGSM"
-  },
-  {
-      "name": "AfriMGSM",
-      "author": "Masakhane",
-      "author_url": "https://www.masakhane.io",
-      "url": "https://huggingface.co/datasets/masakhane/afrimgsm",
-      "n_languages": 18,
-      "tasks": [
-          "math"
-      ],
-      "parallel": true,
-      "base": "MGSM"
-  },
-  {
-      "name": "GSM8K-X",
-      "author": "OpenGPT-X",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/openGPT-X/gsm8kx",
-      "n_languages": 20,
-      "tasks": [
-          "math"
-      ],
-      "parallel": true,
-      "base": "MGSM"
-  },
-  {
-      "name": "Okapi ARC Challenge",
-      "author": "Academic",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge",
-      "n_languages": 31,
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": true,
-      "base": "AI2 ARC"
-  },
-  {
-      "name": "Uhuru ARC Easy",
-      "author": "Masakhane",
-      "author_url": "https://www.masakhane.io",
-      "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy",
-      "n_languages": 6,
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": true,
-      "base": "AI2 ARC"
-  },
-  {
-      "name": "Arc-X",
-      "author": "OpenGPT-X",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/openGPT-X/arcx",
-      "n_languages": 20,
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": true,
-      "base": "AI2 ARC"
-  },
-  {
-      "name": "Okapi TruthfulQA",
-      "author": "Academic",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data",
-      "n_languages": 31,
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": true,
-      "base": "TruthfulQA"
-  },
-  {
-      "name": "Uhura TruthfulQA",
-      "author": "Masakhane",
-      "author_url": "https://www.masakhane.io",
-      "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa",
-      "n_languages": 6,
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": true,
-      "base": "TruthfulQA"
-  },
-  {
-      "name": "TruthfulQA-X",
-      "author": "OpenGPT-X",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/openGPT-X/truthfulqax",
-      "n_languages": 20,
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": true,
-      "base": "TruthfulQA"
-  },
-  {
-      "name": "XNLI",
-      "author": "Meta",
-      "author_url": "https://ai.meta.com",
-      "url": "https://huggingface.co/datasets/facebook/xnli",
-      "n_languages": 14,
-      "tasks": [
-          "classification"
-      ],
-      "parallel": true,
-      "base": "XNLI"
-  },
-  {
-      "name": "AfriXNLI",
-      "author": "Masakhane",
-      "author_url": "https://www.masakhane.io",
-      "url": "https://huggingface.co/datasets/masakhane/afrixnli",
-      "n_languages": 18,
-      "tasks": [
-          "classification"
-      ],
-      "parallel": true,
-      "base": "XNLI"
-  },
-  {
-      "name": "Okapi HellaSwag",
-      "author": "Academic",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag",
-      "n_languages": 31,
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": true,
-      "base": "HellaSwag"
-  },
-  {
-      "name": "HellaSwag-X",
-      "author": "OpenGPT-X",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/openGPT-X/hellaswagx",
-      "n_languages": 20,
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": true,
-      "base": "HellaSwag"
-  },
-  {
-      "name": "WikiANN / PAN-X",
-      "author": "Academic",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
-      "n_languages": 176,
-      "tasks": [
-          "ner"
-      ],
-      "parallel": false
-  },
-  {
-      "name": "MSVAMP",
-      "author": "Microsoft",
-      "author_url": "https://microsoft.ai",
-      "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
-      "n_languages": 10,
-      "tasks": [
-          "math"
-      ],
-      "parallel": true
-  },
-  {
-      "name": "XLSUM",
-      "author": "Academic",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
-      "n_languages": 45,
-      "tasks": [
-          "summarization"
-      ],
-      "parallel": true
-  },
-  {
-      "name": "SEA-IFEVAL",
-      "author": "AI Singapore",
-      "author_url": "https://aisingapore.org",
-      "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
-      "n_languages": 7,
-      "tasks": [
-          "instruction_following"
-      ],
-      "parallel": true,
-      "base": "IFEVAL"
-  },
-  {
-      "name": "XTREME",
-      "author": "Google",
-      "author_url": "https://google.com",
-      "url": "https://huggingface.co/datasets/google/xtreme",
-      "n_languages": 40,
-      "tasks": [
-          "translation",
-          "classification",
-          "question_answering",
-          "ner"
-      ],
-      "parallel": null
-  },
-  {
-      "name": "XGLUE",
-      "author": "Microsoft",
-      "author_url": "https://microsoft.ai",
-      "url": "https://huggingface.co/datasets/microsoft/xglue",
-      "n_languages": 18,
-      "tasks": [
-          "pos"
-      ],
-      "parallel": null,
-      "base": "GLUE"
-  },
-  {
-      "name": "IndicGLUE",
-      "author": "AI4Bharat",
-      "author_url": "https://models.ai4bharat.org",
-      "url": "https://huggingface.co/datasets/ai4bharat/indic_glue",
-      "n_languages": 11,
-      "tasks": [
-          "question_answering"
-      ],
-      "parallel": null,
-      "base": "GLUE"
-  },
-  {
-      "name": "Opus Gnome",
-      "author": "Helsinki NLP",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
-      "n_languages": 187,
-      "tasks": [
-          "translation"
-      ],
-      "parallel": true
-  },
-  {
-      "name": "Opus Paracrawl",
-      "author": "Helsinki NLP",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
-      "n_languages": 43,
-      "tasks": [
-          "translation"
-      ],
-      "parallel": false
-  },
-  {
-      "name": "CCAligned",
-      "author": "Meta",
-      "author_url": "https://ai.meta.com",
-      "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
-      "n_languages": 137,
-      "tasks": [
-          "translation"
-      ],
-      "parallel": false
-  },
-  {
-      "name": "OPUS Collection",
-      "author": "Helsinki NLP",
-      "author_url": null,
-      "url": "https://opus.nlpl.eu",
-      "n_languages": 747,
-      "tasks": [
-          "translation"
-      ],
-      "parallel": false
-  },
-  {
-      "name": "MasakhaNER",
-      "author": "Masakhane",
-      "author_url": "https://www.masakhane.io",
-      "url": "https://huggingface.co/datasets/masakhane/masakhaner",
-      "n_languages": 10,
-      "tasks": [
-          "ner"
-      ],
-      "parallel": null
-  },
-  {
-      "name": "Multilingual Sentiments",
-      "author": "Academic",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments",
-      "n_languages": 12,
-      "tasks": [
-          "sentiment_analysis"
-      ],
-      "parallel": null
-  },
-  {
-      "name": "CulturaX",
-      "author": "Academic",
-      "author_url": null,
-      "url": "https://huggingface.co/datasets/uonlp/CulturaX",
-      "n_languages": 167,
-      "tasks": [
-          "language_modeling"
-      ],
-      "parallel": false
-  },
-  {
-      "name": "Tülu 3 SFT Mixture",
-      "author": "AllenAI",
-      "author_url": "https://allenai.org",
-      "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture",
-      "n_languages": 70,
-      "tasks": [
-          "instruction_following"
-      ],
-      "parallel": false
-  },
-  {
-      "name": "xP3",
-      "author": "BigScience",
-      "author_url": "https://bigscience.huggingface.co",
-      "url": "https://huggingface.co/datasets/bigscience/xP3",
-      "n_languages": 46,
-      "tasks": [
-          "instruction_following"
-      ],
-      "parallel": false
-  },
-  {
-      "name": "Aya",
-      "author": "Cohere",
-      "author_url": "https://cohere.com",
-      "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset",
-      "n_languages": 65,
-      "tasks": [
-          "instruction_following"
-      ],
-      "parallel": null
-  },
-  {
-      "name": "Lanfrica",
-      "author": "Lanfrica",
-      "author_url": "https://lanfrica.com",
-      "url": "https://lanfrica.com/records?language=yor&task=machine%20translation",
-      "n_languages": 2200,
-      "tasks": [
-          "datasets"
-      ],
-      "parallel": null
-  },
-  {
-      "name": "HuggingFace Languages",
-      "author": "HuggingFace",
-      "author_url": "https://huggingface.co",
-      "url": "https://huggingface.co/languages",
-      "n_languages": 4680,
-      "tasks": [
-          "datasets",
-          "models"
-      ],
-      "parallel": null
-  },
-  {
-      "name": "HuggingFace Multilingual Datasets",
-      "author": "HuggingFace",
-      "author_url": "https://huggingface.co",
-      "url": "https://huggingface.co/datasets?other=multilinguality:multilingual",
-      "n_languages": 2012,
-      "tasks": [
-          "datasets"
-      ],
-      "parallel": false
-  }
 ]

 [
+    {
+        "name": "FLORES+",
+        "author": "Meta",
+        "author_url": "https://ai.meta.com",
+        "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus",
+        "n_languages": 200,
+        "tasks": [
+            "translation",
+            "classification",
+            "language_modeling"
+        ],
+        "parallel": true,
+        "base": "FLORES",
+        "implemented": true,
+        "group": "Low-Resource Languages"
+    },
+    {
+        "name": "FLEURS",
+        "author": "Meta",
+        "author_url": "https://ai.meta.com",
+        "url": "https://huggingface.co/datasets/google/fleurs",
+        "n_languages": 102,
+        "tasks": [
+            "speech_recognition"
+        ],
+        "parallel": true,
+        "base": "FLORES",
+        "implemented": true,
+        "group": "Low-Resource Languages"
+    },
+    {
+        "name": "CommonVoice",
+        "author": "Mozilla",
+        "author_url": "https://blog.mozilla.ai",
+        "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0",
+        "n_languages": 124,
+        "tasks": [
+            "speech_recognition"
+        ],
+        "parallel": null,
+        "group": "Low-Resource Languages"
+    },
+    {
+        "name": "MMMLU",
+        "author": "OpenAI",
+        "author_url": "https://openai.com",
+        "url": "https://huggingface.co/datasets/openai/MMMLU",
+        "n_languages": "14",
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "MMLU",
+        "group": "Multitask Language Understanding"
+    },
+    {
+        "name": "AfriMMLU",
+        "author": "Masakhane",
+        "author_url": "https://www.masakhane.io",
+        "url": "https://huggingface.co/datasets/masakhane/afrimmlu",
+        "n_languages": "17",
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "MMLU",
+        "group": "Multitask Language Understanding"
+    },
+    {
+        "name": "Okapi MMLU",
+        "author": "Academic",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu",
+        "n_languages": 16,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "MMLU",
+        "group": "Multitask Language Understanding"
+    },
+    {
+        "name": "MMLU-X",
+        "author": "OpenGPT-X",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/openGPT-X/mmlux",
+        "n_languages": 20,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "MMLU",
+        "group": "Multitask Language Understanding"
+    },
+    {
+        "name": "Global MMLU",
+        "author": "Cohere",
+        "author_url": "https://cohere.com",
+        "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU",
+        "n_languages": 42,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "MMLU",
+        "group": "Multitask Language Understanding"
+    },
+    {
+        "name": "Okapi ARC Challenge",
+        "author": "Academic",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge",
+        "n_languages": 31,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "AI2 ARC",
+        "group": "Abstract Reasoning"
+    },
+    {
+        "name": "Uhuru ARC Easy",
+        "author": "Masakhane",
+        "author_url": "https://www.masakhane.io",
+        "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy",
+        "n_languages": 6,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "AI2 ARC",
+        "group": "Abstract Reasoning"
+    },
+    {
+        "name": "Arc-X",
+        "author": "OpenGPT-X",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/openGPT-X/arcx",
+        "n_languages": 20,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "AI2 ARC",
+        "group": "Abstract Reasoning"
+    },
+    {
+        "name": "Okapi TruthfulQA",
+        "author": "Academic",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data",
+        "n_languages": 31,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "TruthfulQA",
+        "group": "Truthfulness"
+    },
+    {
+        "name": "Uhura TruthfulQA",
+        "author": "Masakhane",
+        "author_url": "https://www.masakhane.io",
+        "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa",
+        "n_languages": 6,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "TruthfulQA",
+        "group": "Truthfulness"
+    },
+    {
+        "name": "TruthfulQA-X",
+        "author": "OpenGPT-X",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/openGPT-X/truthfulqax",
+        "n_languages": 20,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "TruthfulQA",
+        "group": "Truthfulness"
+    },
+    {
+        "name": "XNLI",
+        "author": "Meta",
+        "author_url": "https://ai.meta.com",
+        "url": "https://huggingface.co/datasets/facebook/xnli",
+        "n_languages": 14,
+        "tasks": [
+            "classification",
+            "logic"
+        ],
+        "parallel": true,
+        "base": "MNLI",
+        "group": "Natural Language Inference"
+    },
+    {
+        "name": "AfriXNLI",
+        "author": "Masakhane",
+        "author_url": "https://www.masakhane.io",
+        "url": "https://huggingface.co/datasets/masakhane/afrixnli",
+        "n_languages": 18,
+        "tasks": [
+            "classification",
+            "logic"
+        ],
+        "parallel": true,
+        "base": "MNLI",
+        "group": "Natural Language Inference"
+    },
+    {
+        "name": "XGLUE",
+        "author": "Microsoft",
+        "author_url": "https://microsoft.ai",
+        "url": "https://huggingface.co/datasets/microsoft/xglue",
+        "n_languages": 18,
+        "tasks": [
+            "pos"
+        ],
+        "parallel": null,
+        "base": "GLUE",
+        "group": "General Language Understanding"
+    },
+    {
+        "name": "IndicGLUE",
+        "author": "AI4Bharat",
+        "author_url": "https://models.ai4bharat.org",
+        "url": "https://huggingface.co/datasets/ai4bharat/indic_glue",
+        "n_languages": 11,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": null,
+        "base": "GLUE",
+        "group": "General Language Understanding"
+    },
+    {
+        "name": "Okapi HellaSwag",
+        "author": "Academic",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag",
+        "n_languages": 31,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "HellaSwag",
+        "group": "Adversarial Language Modelling"
+    },
+    {
+        "name": "HellaSwag-X",
+        "author": "OpenGPT-X",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/openGPT-X/hellaswagx",
+        "n_languages": 20,
+        "tasks": [
+            "question_answering"
+        ],
+        "parallel": true,
+        "base": "HellaSwag",
+        "group": "Adversarial Language Modelling"
+    },
+    {
+        "name": "MGSM",
+        "author": "Google",
+        "author_url": "https://google.com",
+        "url": "https://huggingface.co/datasets/juletxara/mgsm",
+        "n_languages": 10,
+        "tasks": [
+            "math"
+        ],
+        "parallel": true,
+        "base": "MGSM",
+        "group": "Grade School Math"
+    },
+    {
+        "name": "AfriMGSM",
+        "author": "Masakhane",
+        "author_url": "https://www.masakhane.io",
+        "url": "https://huggingface.co/datasets/masakhane/afrimgsm",
+        "n_languages": 18,
+        "tasks": [
+            "math"
+        ],
+        "parallel": true,
+        "base": "MGSM",
+        "group": "Grade School Math"
+    },
+    {
+        "name": "GSM8K-X",
+        "author": "OpenGPT-X",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/openGPT-X/gsm8kx",
+        "n_languages": 20,
+        "tasks": [
+            "math"
+        ],
+        "parallel": true,
+        "base": "MGSM",
+        "group": "Grade School Math"
+    },
+    {
+        "name": "WikiANN / PAN-X",
+        "author": "Academic",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
+        "n_languages": 176,
+        "tasks": [
+            "ner"
+        ],
+        "parallel": false,
+        "group": "Named Entity Recognition"
+    },
+    {
+        "name": "MasakhaNER",
+        "author": "Masakhane",
+        "author_url": "https://www.masakhane.io",
+        "url": "https://huggingface.co/datasets/masakhane/masakhaner",
+        "n_languages": 10,
+        "tasks": [
+            "ner"
+        ],
+        "parallel": null,
+        "group": "Named Entity Recognition"
+    },
+    {
+        "name": "Opus Gnome",
+        "author": "Helsinki NLP",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
+        "n_languages": 187,
+        "tasks": [
+            "translation"
+        ],
+        "parallel": true,
+        "group": "Parallel Corpora"
+    },
+    {
+        "name": "Opus Paracrawl",
+        "author": "Helsinki NLP",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
+        "n_languages": 43,
+        "tasks": [
+            "translation"
+        ],
+        "parallel": false,
+        "group": "Parallel Corpora"
+    },
+    {
+        "name": "CCAligned",
+        "author": "Meta",
+        "author_url": "https://ai.meta.com",
+        "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
+        "n_languages": 137,
+        "tasks": [
+            "translation"
+        ],
+        "parallel": false,
+        "group": "Parallel Corpora"
+    },
+    {
+        "name": "OPUS Collection",
+        "author": "Helsinki NLP",
+        "author_url": null,
+        "url": "https://opus.nlpl.eu",
+        "n_languages": 747,
+        "tasks": [
+            "translation"
+        ],
+        "parallel": false,
+        "group": "Parallel Corpora"
+    },
+    {
+        "name": "Tülu 3 SFT Mixture",
+        "author": "AllenAI",
+        "author_url": "https://allenai.org",
+        "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture",
+        "n_languages": 70,
+        "tasks": [
+            "instruction_following"
+        ],
+        "parallel": false,
+        "group": "Instruction Following"
+    },
+    {
+        "name": "xP3",
+        "author": "BigScience",
+        "author_url": "https://bigscience.huggingface.co",
+        "url": "https://huggingface.co/datasets/bigscience/xP3",
+        "n_languages": 46,
+        "tasks": [
+            "instruction_following"
+        ],
+        "parallel": false,
+        "group": "Instruction Following"
+    },
+    {
+        "name": "Aya",
+        "author": "Cohere",
+        "author_url": "https://cohere.com",
+        "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset",
+        "n_languages": 65,
+        "tasks": [
+            "instruction_following"
+        ],
+        "parallel": null,
+        "group": "Instruction Following"
+    },
+    {
+        "name": "SEA-IFEVAL",
+        "author": "AI Singapore",
+        "author_url": "https://aisingapore.org",
+        "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
+        "n_languages": 7,
+        "tasks": [
+            "instruction_following"
+        ],
+        "parallel": true,
+        "base": "IFEVAL",
+        "group": "Instruction Following"
+    },
+    {
+        "name": "CulturaX",
+        "author": "Academic",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/uonlp/CulturaX",
+        "n_languages": 167,
+        "tasks": [
+            "language_modeling"
+        ],
+        "parallel": false,
+        "group": "Other Tasks"
+    },
+    {
+        "name": "XTREME",
+        "author": "Google",
+        "author_url": "https://google.com",
+        "url": "https://huggingface.co/datasets/google/xtreme",
+        "n_languages": 40,
+        "tasks": [
+            "translation",
+            "classification",
+            "question_answering",
+            "ner"
+        ],
+        "parallel": null,
+        "group": "Other Tasks"
+    },
+    {
+        "name": "XLSUM",
+        "author": "Academic",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
+        "n_languages": 45,
+        "tasks": [
+            "summarization"
+        ],
+        "parallel": true,
+        "group": "Other Tasks"
+    },
+    {
+        "name": "MSVAMP",
+        "author": "Microsoft",
+        "author_url": "https://microsoft.ai",
+        "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
+        "n_languages": 10,
+        "tasks": [
+            "math"
+        ],
+        "parallel": true,
+        "group": "Other Tasks"
+    },
+    {
+        "name": "Multilingual Sentiments",
+        "author": "Academic",
+        "author_url": null,
+        "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments",
+        "n_languages": 12,
+        "tasks": [
+            "sentiment_analysis"
+        ],
+        "parallel": null,
+        "group": "Other Tasks"
+    },
+    {
+        "name": "Lanfrica",
+        "author": "Lanfrica",
+        "author_url": "https://lanfrica.com",
+        "url": "https://lanfrica.com/records?language=yor&task=machine%20translation",
+        "n_languages": 2200,
+        "tasks": [
+            "datasets"
+        ],
+        "parallel": null,
+        "group": "Dataset Collections"
+    },
+    {
+        "name": "HuggingFace Languages",
+        "author": "HuggingFace",
+        "author_url": "https://huggingface.co",
+        "url": "https://huggingface.co/languages",
+        "n_languages": 4680,
+        "tasks": [
+            "datasets",
+            "models"
+        ],
+        "parallel": null,
+        "group": "Dataset Collections"
+    },
+    {
+        "name": "HuggingFace Multilingual Datasets",
+        "author": "HuggingFace",
+        "author_url": "https://huggingface.co",
+        "url": "https://huggingface.co/datasets?other=multilinguality:multilingual",
+        "n_languages": 2012,
+        "tasks": [
+            "datasets"
+        ],
+        "parallel": false,
+        "group": "Dataset Collections"
+    }
 ]

frontend/src/components/DatasetTable.js CHANGED Viewed

@@ -70,6 +70,11 @@ const DatasetTable = ({ data }) => {
   return (
     <DataTable
       value={table}
       header={<>Datasets</>}
       removableSort
       filters={filters}

   return (
     <DataTable
       value={table}
+      rowGroupMode='subheader'
+      rowGroupHeaderTemplate={rowData => {
+        return <div style={{ fontWeight: 'bold' }}>{rowData.group}</div>
+      }}
+      groupRowsBy='group'
       header={<>Datasets</>}
       removableSort
       filters={filters}

frontend/src/components/LanguageTable.js CHANGED Viewed

@@ -134,7 +134,7 @@ const LanguageTable = ({ data, selectedLanguages, setSelectedLanguages }) => {
       selection={selectedLanguages}
       onSelectionChange={e => setSelectedLanguages(e.value)}
       frozenValue={selectedLanguages}
-      virtualScrollerOptions={{ itemSize: 100 }}
       scrollable
       scrollHeight='600px'
       id='language-table'

       selection={selectedLanguages}
       onSelectionChange={e => setSelectedLanguages(e.value)}
       frozenValue={selectedLanguages}
+      virtualScrollerOptions={{ itemSize: 60 }}
       scrollable
       scrollHeight='600px'
       id='language-table'