David Pomerenke
Add Babel-670
7283eaa
[
{
"name": "FLORES+",
"author": "Meta",
"author_url": "https://ai.meta.com",
"url": "https://huggingface.co/datasets/openlanguagedata/flores_plus",
"n_languages": 200,
"tasks": [
"translation",
"classification"
],
"parallel": true,
"translation": "human",
"base": "FLORES",
"implemented": true,
"group": "Translation"
},
{
"name": "CCAligned",
"author": "Meta",
"author_url": "https://ai.meta.com",
"url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
"n_languages": 137,
"tasks": [
"translation"
],
"parallel": false,
"group": "Translation"
},
{
"name": "OPUS Collection",
"author": "Helsinki NLP",
"author_url": null,
"url": "https://opus.nlpl.eu",
"n_languages": 747,
"tasks": [
"translation"
],
"parallel": false,
"group": "Translation"
},
{
"name": "Global MMLU",
"author": "Cohere",
"author_url": "https://cohere.com",
"url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU",
"n_languages": 42,
"languages": [
"am",
"ar",
"bn",
"cs",
"de",
"el",
"en",
"es",
"fa",
"fil",
"fr",
"ha",
"he",
"hi",
"id",
"ig",
"it",
"ja",
"ko",
"ky",
"lt",
"mg",
"ms",
"ne",
"nl",
"ny",
"pl",
"pt",
"ro",
"ru",
"si",
"sn",
"so",
"sr",
"sv",
"sw",
"te",
"tr",
"uk",
"vi",
"yo",
"zh"
],
"tasks": [
"question_answering"
],
"parallel": true,
"translation": "mixed",
"base": "MMLU",
"implemented": true,
"group": "Multitask Language Understanding"
},
{
"name": "MMMLU",
"author": "OpenAI",
"author_url": "https://openai.com",
"url": "https://huggingface.co/datasets/openai/MMMLU",
"n_languages": "14",
"languages": [
"ar",
"bn",
"de",
"es",
"fr",
"hi",
"id",
"it",
"ja",
"ko",
"pt",
"sw",
"yo",
"zh"
],
"tasks": [
"question_answering"
],
"parallel": true,
"translation": "human",
"base": "MMLU",
"implemented": true,
"group": "Multitask Language Understanding"
},
{
"name": "AfriMMLU",
"author": "Masakhane",
"author_url": "https://www.masakhane.io",
"url": "https://huggingface.co/datasets/masakhane/afrimmlu",
"n_languages": "17",
"languages": [
"am",
"en",
"ee",
"fr",
"ha",
"ig",
"rw",
"ln",
"lg",
"om",
"sn",
"st",
"sw",
"tw",
"wo",
"xh",
"yo",
"zu"
],
"tasks": [
"question_answering"
],
"parallel": true,
"translation": "human",
"base": "MMLU",
"implemented": true,
"group": "Multitask Language Understanding"
},
{
"name": "Okapi MMLU",
"author": "Academic",
"author_url": null,
"url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu",
"n_languages": 26,
"languages": [
"ar",
"bn",
"ca",
"da",
"de",
"es",
"eu",
"fr",
"gu",
"hi",
"hr",
"hu",
"hy",
"id",
"it",
"kn",
"ml",
"mr",
"ne",
"nl",
"pt",
"ro",
"ru",
"sk",
"sr",
"sv",
"ta",
"te",
"uk",
"vi",
"zh"
],
"tasks": [
"question_answering"
],
"parallel": true,
"translation": "machine",
"base": "MMLU",
"implemented": true,
"group": "Multitask Language Understanding"
},
{
"name": "MMLU-X",
"author": "OpenGPT-X",
"author_url": "https://opengpt-x.de",
"url": "https://huggingface.co/datasets/openGPT-X/mmlux",
"n_languages": 20,
"languages": [
"bg",
"cs",
"da",
"de",
"el",
"es",
"et",
"fi",
"fr",
"hu",
"it",
"lt",
"lv",
"nl",
"pl",
"pt",
"ro",
"sk",
"sl",
"sv"
],
"tasks": [
"question_answering"
],
"parallel": true,
"translation": "machine",
"base": "MMLU",
"implemented": false,
"group": "Multitask Language Understanding"
},
{
"name": "FLEURS",
"author": "Meta",
"author_url": "https://ai.meta.com",
"url": "https://huggingface.co/datasets/google/fleurs",
"n_languages": 102,
"tasks": [
"speech_recognition"
],
"parallel": true,
"translation": "human",
"base": "FLORES",
"implemented": false,
"group": "Speech Recognition"
},
{
"name": "CommonVoice",
"author": "Mozilla",
"author_url": "https://blog.mozilla.ai",
"url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0",
"n_languages": 124,
"tasks": [
"speech_recognition"
],
"parallel": null,
"translation": "human",
"group": "Speech Recognition"
},
{
"name": "WorldCuisines",
"author": "Academic",
"author_url": "https://worldcuisines.github.io",
"url": "https://huggingface.co/datasets/worldcuisines/vqa",
"n_languages": 30,
"tasks": [
"visual_question_answering"
],
"parallel": null,
"group": "Visual Question Answering"
},
{
"name": "CVQA",
"author": "Academic",
"author_url": null,
"url": "https://huggingface.co/datasets/afaji/cvqa",
"n_languages": 39,
"tasks": [
"visual_question_answering"
],
"parallel": null,
"group": "Visual Question Answering"
},
{
"name": "Uhuru ARC Easy",
"author": "Masakhane",
"author_url": "https://www.masakhane.io",
"url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy",
"n_languages": 6,
"tasks": [
"question_answering"
],
"parallel": true,
"translation": "human",
"base": "AI2 ARC",
"implemented": false,
"group": "Abstract Reasoning"
},
{
"name": "Okapi ARC Challenge",
"author": "Academic",
"author_url": null,
"url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge",
"n_languages": 31,
"tasks": [
"question_answering"
],
"parallel": true,
"translation": "machine",
"base": "AI2 ARC",
"implemented": false,
"group": "Abstract Reasoning"
},
{
"name": "Arc-X",
"author": "OpenGPT-X",
"author_url": "https://opengpt-x.de",
"url": "https://huggingface.co/datasets/openGPT-X/arcx",
"n_languages": 20,
"tasks": [
"question_answering"
],
"parallel": true,
"translation": "machine",
"base": "AI2 ARC",
"implemented": false,
"group": "Abstract Reasoning"
},
{
"name": "Uhura TruthfulQA",
"author": "Masakhane",
"author_url": "https://www.masakhane.io",
"url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa",
"n_languages": 6,
"tasks": [
"question_answering"
],
"parallel": true,
"translation": "human",
"base": "TruthfulQA",
"implemented": false,
"group": "Truthfulness"
},
{
"name": "Okapi TruthfulQA",
"author": "Academic",
"author_url": null,
"url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data",
"n_languages": 31,
"tasks": [
"question_answering"
],
"parallel": true,
"translation": "machine",
"base": "TruthfulQA",
"implemented": false,
"group": "Truthfulness"
},
{
"name": "TruthfulQA-X",
"author": "OpenGPT-X",
"author_url": "https://opengpt-x.de",
"url": "https://huggingface.co/datasets/openGPT-X/truthfulqax",
"n_languages": 20,
"tasks": [
"question_answering"
],
"parallel": true,
"translation": "machine",
"base": "TruthfulQA",
"implemented": false,
"group": "Truthfulness"
},
{
"name": "XNLI",
"author": "Meta",
"author_url": "https://ai.meta.com",
"url": "https://huggingface.co/datasets/facebook/xnli",
"n_languages": 14,
"tasks": [
"classification",
"logic"
],
"parallel": true,
"base": "MNLI",
"group": "Natural Language Inference"
},
{
"name": "AfriXNLI",
"author": "Masakhane",
"author_url": "https://www.masakhane.io",
"url": "https://huggingface.co/datasets/masakhane/afrixnli",
"n_languages": 18,
"tasks": [
"classification",
"logic"
],
"parallel": true,
"translation": "human",
"base": "MNLI",
"implemented": false,
"group": "Natural Language Inference"
},
{
"name": "XGLUE",
"author": "Microsoft",
"author_url": "https://microsoft.ai",
"url": "https://huggingface.co/datasets/microsoft/xglue",
"n_languages": 18,
"tasks": [
"pos"
],
"parallel": null,
"base": "GLUE",
"group": "General Language Understanding"
},
{
"name": "IndicGLUE",
"author": "AI4Bharat",
"author_url": "https://models.ai4bharat.org",
"url": "https://huggingface.co/datasets/ai4bharat/indic_glue",
"n_languages": 11,
"tasks": [
"question_answering"
],
"parallel": null,
"base": "GLUE",
"group": "General Language Understanding"
},
{
"name": "Okapi HellaSwag",
"author": "Academic",
"author_url": null,
"url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag",
"n_languages": 31,
"tasks": [
"question_answering"
],
"parallel": true,
"translation": "machine",
"base": "HellaSwag",
"implemented": false,
"group": "Adversarial Language Modelling"
},
{
"name": "HellaSwag-X",
"author": "OpenGPT-X",
"author_url": "https://opengpt-x.de",
"url": "https://huggingface.co/datasets/openGPT-X/hellaswagx",
"n_languages": 20,
"tasks": [
"question_answering"
],
"parallel": true,
"translation": "machine",
"base": "HellaSwag",
"implemented": false,
"group": "Adversarial Language Modelling"
},
{
"name": "MGSM",
"author": "Google",
"author_url": "https://google.com",
"url": "https://huggingface.co/datasets/juletxara/mgsm",
"n_languages": 10,
"tasks": [
"math"
],
"parallel": true,
"base": "MGSM",
"group": "Grade School Math"
},
{
"name": "AfriMGSM",
"author": "Masakhane",
"author_url": "https://www.masakhane.io",
"url": "https://huggingface.co/datasets/masakhane/afrimgsm",
"n_languages": 18,
"tasks": [
"math"
],
"parallel": true,
"translation": "human",
"base": "MGSM",
"implemented": false,
"group": "Grade School Math"
},
{
"name": "GSM8K-X",
"author": "OpenGPT-X",
"author_url": "https://opengpt-x.de",
"url": "https://huggingface.co/datasets/openGPT-X/gsm8kx",
"n_languages": 20,
"tasks": [
"math"
],
"parallel": true,
"translation": "machine",
"base": "MGSM",
"implemented": false,
"group": "Grade School Math"
},
{
"name": "WikiANN / PAN-X",
"author": "Academic",
"author_url": null,
"url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
"n_languages": 176,
"tasks": [
"ner"
],
"parallel": false,
"group": "Named Entity Recognition"
},
{
"name": "MasakhaNER",
"author": "Masakhane",
"author_url": "https://www.masakhane.io",
"url": "https://huggingface.co/datasets/masakhane/masakhaner",
"n_languages": 10,
"tasks": [
"ner"
],
"parallel": null,
"group": "Named Entity Recognition"
},
{
"name": "Tülu 3 SFT Mixture",
"author": "AllenAI",
"author_url": "https://allenai.org",
"url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture",
"n_languages": 70,
"tasks": [
"instruction_following"
],
"parallel": false,
"group": "Instruction Following"
},
{
"name": "xP3",
"author": "BigScience",
"author_url": "https://bigscience.huggingface.co",
"url": "https://huggingface.co/datasets/bigscience/xP3",
"n_languages": 46,
"tasks": [
"instruction_following"
],
"parallel": false,
"group": "Instruction Following"
},
{
"name": "Aya",
"author": "Cohere",
"author_url": "https://cohere.com",
"url": "https://huggingface.co/datasets/CohereForAI/aya_dataset",
"n_languages": 65,
"tasks": [
"instruction_following"
],
"parallel": null,
"group": "Instruction Following"
},
{
"name": "SEA-IFEVAL",
"author": "AI Singapore",
"author_url": "https://aisingapore.org",
"url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
"n_languages": 7,
"tasks": [
"instruction_following"
],
"parallel": true,
"base": "IFEVAL",
"group": "Instruction Following"
},
{
"name": "Babel-670",
"author": "Academic",
"author_url": null,
"url": "https://github.com/UBC-NLP/Babel-670-Language-Identification",
"n_languages": 670,
"tasks": [
"language_identification"
],
"parallel": false,
"group": "Other Tasks"
},
{
"name": "CulturaX",
"author": "Academic",
"author_url": null,
"url": "https://huggingface.co/datasets/uonlp/CulturaX",
"n_languages": 167,
"tasks": [
"language_modeling"
],
"parallel": false,
"group": "Other Tasks"
},
{
"name": "XTREME",
"author": "Google",
"author_url": "https://google.com",
"url": "https://huggingface.co/datasets/google/xtreme",
"n_languages": 40,
"tasks": [
"translation",
"classification",
"question_answering",
"ner"
],
"parallel": null,
"group": "Other Tasks"
},
{
"name": "XLSUM",
"author": "Academic",
"author_url": null,
"url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
"n_languages": 45,
"tasks": [
"summarization"
],
"parallel": true,
"group": "Other Tasks"
},
{
"name": "MSVAMP",
"author": "Microsoft",
"author_url": "https://microsoft.ai",
"url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
"n_languages": 10,
"tasks": [
"math"
],
"parallel": true,
"group": "Other Tasks"
},
{
"name": "Multilingual Sentiments",
"author": "Academic",
"author_url": null,
"url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments",
"n_languages": 12,
"tasks": [
"sentiment_analysis"
],
"parallel": null,
"group": "Other Tasks"
},
{
"name": "Lanfrica",
"author": "Lanfrica",
"author_url": "https://lanfrica.com",
"url": "https://lanfrica.com/records?language=yor&task=machine%20translation",
"n_languages": 2200,
"tasks": [
"datasets"
],
"parallel": null,
"group": "Dataset Collections"
},
{
"name": "HuggingFace Languages",
"author": "HuggingFace",
"author_url": "https://huggingface.co",
"url": "https://huggingface.co/languages",
"n_languages": 4680,
"tasks": [
"datasets",
"models"
],
"parallel": null,
"group": "Dataset Collections"
},
{
"name": "HuggingFace Multilingual Datasets",
"author": "HuggingFace",
"author_url": "https://huggingface.co",
"url": "https://huggingface.co/datasets?other=multilinguality:multilingual",
"n_languages": 2012,
"tasks": [
"datasets"
],
"parallel": false,
"group": "Dataset Collections"
}
]