|
[ |
|
{ |
|
"name": "FLORES+", |
|
"author": "Meta", |
|
"author_url": "https://ai.meta.com", |
|
"url": "https://huggingface.co/datasets/openlanguagedata/flores_plus", |
|
"n_languages": 200, |
|
"tasks": [ |
|
"translation", |
|
"classification" |
|
], |
|
"parallel": true, |
|
"translation": "human", |
|
"base": "FLORES", |
|
"implemented": true, |
|
"group": "Translation" |
|
}, |
|
{ |
|
"name": "CCAligned", |
|
"author": "Meta", |
|
"author_url": "https://ai.meta.com", |
|
"url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual", |
|
"n_languages": 137, |
|
"tasks": [ |
|
"translation" |
|
], |
|
"parallel": false, |
|
"group": "Translation" |
|
}, |
|
{ |
|
"name": "OPUS Collection", |
|
"author": "Helsinki NLP", |
|
"author_url": null, |
|
"url": "https://opus.nlpl.eu", |
|
"n_languages": 747, |
|
"tasks": [ |
|
"translation" |
|
], |
|
"parallel": false, |
|
"group": "Translation" |
|
}, |
|
{ |
|
"name": "Global MMLU", |
|
"author": "Cohere", |
|
"author_url": "https://cohere.com", |
|
"url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU", |
|
"n_languages": 42, |
|
"languages": [ |
|
"am", |
|
"ar", |
|
"bn", |
|
"cs", |
|
"de", |
|
"el", |
|
"en", |
|
"es", |
|
"fa", |
|
"fil", |
|
"fr", |
|
"ha", |
|
"he", |
|
"hi", |
|
"id", |
|
"ig", |
|
"it", |
|
"ja", |
|
"ko", |
|
"ky", |
|
"lt", |
|
"mg", |
|
"ms", |
|
"ne", |
|
"nl", |
|
"ny", |
|
"pl", |
|
"pt", |
|
"ro", |
|
"ru", |
|
"si", |
|
"sn", |
|
"so", |
|
"sr", |
|
"sv", |
|
"sw", |
|
"te", |
|
"tr", |
|
"uk", |
|
"vi", |
|
"yo", |
|
"zh" |
|
], |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"translation": "mixed", |
|
"base": "MMLU", |
|
"implemented": true, |
|
"group": "Multitask Language Understanding" |
|
}, |
|
{ |
|
"name": "MMMLU", |
|
"author": "OpenAI", |
|
"author_url": "https://openai.com", |
|
"url": "https://huggingface.co/datasets/openai/MMMLU", |
|
"n_languages": "14", |
|
"languages": [ |
|
"ar", |
|
"bn", |
|
"de", |
|
"es", |
|
"fr", |
|
"hi", |
|
"id", |
|
"it", |
|
"ja", |
|
"ko", |
|
"pt", |
|
"sw", |
|
"yo", |
|
"zh" |
|
], |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"translation": "human", |
|
"base": "MMLU", |
|
"implemented": true, |
|
"group": "Multitask Language Understanding" |
|
}, |
|
{ |
|
"name": "AfriMMLU", |
|
"author": "Masakhane", |
|
"author_url": "https://www.masakhane.io", |
|
"url": "https://huggingface.co/datasets/masakhane/afrimmlu", |
|
"n_languages": "17", |
|
"languages": [ |
|
"am", |
|
"en", |
|
"ee", |
|
"fr", |
|
"ha", |
|
"ig", |
|
"rw", |
|
"ln", |
|
"lg", |
|
"om", |
|
"sn", |
|
"st", |
|
"sw", |
|
"tw", |
|
"wo", |
|
"xh", |
|
"yo", |
|
"zu" |
|
], |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"translation": "human", |
|
"base": "MMLU", |
|
"implemented": true, |
|
"group": "Multitask Language Understanding" |
|
}, |
|
{ |
|
"name": "Okapi MMLU", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu", |
|
"n_languages": 26, |
|
"languages": [ |
|
"ar", |
|
"bn", |
|
"ca", |
|
"da", |
|
"de", |
|
"es", |
|
"eu", |
|
"fr", |
|
"gu", |
|
"hi", |
|
"hr", |
|
"hu", |
|
"hy", |
|
"id", |
|
"it", |
|
"kn", |
|
"ml", |
|
"mr", |
|
"ne", |
|
"nl", |
|
"pt", |
|
"ro", |
|
"ru", |
|
"sk", |
|
"sr", |
|
"sv", |
|
"ta", |
|
"te", |
|
"uk", |
|
"vi", |
|
"zh" |
|
], |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"translation": "machine", |
|
"base": "MMLU", |
|
"implemented": true, |
|
"group": "Multitask Language Understanding" |
|
}, |
|
{ |
|
"name": "MMLU-X", |
|
"author": "OpenGPT-X", |
|
"author_url": "https://opengpt-x.de", |
|
"url": "https://huggingface.co/datasets/openGPT-X/mmlux", |
|
"n_languages": 20, |
|
"languages": [ |
|
"bg", |
|
"cs", |
|
"da", |
|
"de", |
|
"el", |
|
"es", |
|
"et", |
|
"fi", |
|
"fr", |
|
"hu", |
|
"it", |
|
"lt", |
|
"lv", |
|
"nl", |
|
"pl", |
|
"pt", |
|
"ro", |
|
"sk", |
|
"sl", |
|
"sv" |
|
], |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"translation": "machine", |
|
"base": "MMLU", |
|
"implemented": false, |
|
"group": "Multitask Language Understanding" |
|
}, |
|
{ |
|
"name": "FLEURS", |
|
"author": "Meta", |
|
"author_url": "https://ai.meta.com", |
|
"url": "https://huggingface.co/datasets/google/fleurs", |
|
"n_languages": 102, |
|
"tasks": [ |
|
"speech_recognition" |
|
], |
|
"parallel": true, |
|
"translation": "human", |
|
"base": "FLORES", |
|
"implemented": false, |
|
"group": "Speech Recognition" |
|
}, |
|
{ |
|
"name": "CommonVoice", |
|
"author": "Mozilla", |
|
"author_url": "https://blog.mozilla.ai", |
|
"url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0", |
|
"n_languages": 124, |
|
"tasks": [ |
|
"speech_recognition" |
|
], |
|
"parallel": null, |
|
"translation": "human", |
|
"group": "Speech Recognition" |
|
}, |
|
{ |
|
"name": "WorldCuisines", |
|
"author": "Academic", |
|
"author_url": "https://worldcuisines.github.io", |
|
"url": "https://huggingface.co/datasets/worldcuisines/vqa", |
|
"n_languages": 30, |
|
"tasks": [ |
|
"visual_question_answering" |
|
], |
|
"parallel": null, |
|
"group": "Visual Question Answering" |
|
}, |
|
{ |
|
"name": "CVQA", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/afaji/cvqa", |
|
"n_languages": 39, |
|
"tasks": [ |
|
"visual_question_answering" |
|
], |
|
"parallel": null, |
|
"group": "Visual Question Answering" |
|
}, |
|
{ |
|
"name": "Uhuru ARC Easy", |
|
"author": "Masakhane", |
|
"author_url": "https://www.masakhane.io", |
|
"url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy", |
|
"n_languages": 6, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"translation": "human", |
|
"base": "AI2 ARC", |
|
"implemented": false, |
|
"group": "Abstract Reasoning" |
|
}, |
|
{ |
|
"name": "Okapi ARC Challenge", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge", |
|
"n_languages": 31, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"translation": "machine", |
|
"base": "AI2 ARC", |
|
"implemented": false, |
|
"group": "Abstract Reasoning" |
|
}, |
|
{ |
|
"name": "Arc-X", |
|
"author": "OpenGPT-X", |
|
"author_url": "https://opengpt-x.de", |
|
"url": "https://huggingface.co/datasets/openGPT-X/arcx", |
|
"n_languages": 20, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"translation": "machine", |
|
"base": "AI2 ARC", |
|
"implemented": false, |
|
"group": "Abstract Reasoning" |
|
}, |
|
{ |
|
"name": "Uhura TruthfulQA", |
|
"author": "Masakhane", |
|
"author_url": "https://www.masakhane.io", |
|
"url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa", |
|
"n_languages": 6, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"translation": "human", |
|
"base": "TruthfulQA", |
|
"implemented": false, |
|
"group": "Truthfulness" |
|
}, |
|
{ |
|
"name": "Okapi TruthfulQA", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data", |
|
"n_languages": 31, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"translation": "machine", |
|
"base": "TruthfulQA", |
|
"implemented": false, |
|
"group": "Truthfulness" |
|
}, |
|
{ |
|
"name": "TruthfulQA-X", |
|
"author": "OpenGPT-X", |
|
"author_url": "https://opengpt-x.de", |
|
"url": "https://huggingface.co/datasets/openGPT-X/truthfulqax", |
|
"n_languages": 20, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"translation": "machine", |
|
"base": "TruthfulQA", |
|
"implemented": false, |
|
"group": "Truthfulness" |
|
}, |
|
{ |
|
"name": "XNLI", |
|
"author": "Meta", |
|
"author_url": "https://ai.meta.com", |
|
"url": "https://huggingface.co/datasets/facebook/xnli", |
|
"n_languages": 14, |
|
"tasks": [ |
|
"classification", |
|
"logic" |
|
], |
|
"parallel": true, |
|
"base": "MNLI", |
|
"group": "Natural Language Inference" |
|
}, |
|
{ |
|
"name": "AfriXNLI", |
|
"author": "Masakhane", |
|
"author_url": "https://www.masakhane.io", |
|
"url": "https://huggingface.co/datasets/masakhane/afrixnli", |
|
"n_languages": 18, |
|
"tasks": [ |
|
"classification", |
|
"logic" |
|
], |
|
"parallel": true, |
|
"translation": "human", |
|
"base": "MNLI", |
|
"implemented": false, |
|
"group": "Natural Language Inference" |
|
}, |
|
{ |
|
"name": "XGLUE", |
|
"author": "Microsoft", |
|
"author_url": "https://microsoft.ai", |
|
"url": "https://huggingface.co/datasets/microsoft/xglue", |
|
"n_languages": 18, |
|
"tasks": [ |
|
"pos" |
|
], |
|
"parallel": null, |
|
"base": "GLUE", |
|
"group": "General Language Understanding" |
|
}, |
|
{ |
|
"name": "IndicGLUE", |
|
"author": "AI4Bharat", |
|
"author_url": "https://models.ai4bharat.org", |
|
"url": "https://huggingface.co/datasets/ai4bharat/indic_glue", |
|
"n_languages": 11, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": null, |
|
"base": "GLUE", |
|
"group": "General Language Understanding" |
|
}, |
|
{ |
|
"name": "Okapi HellaSwag", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag", |
|
"n_languages": 31, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"translation": "machine", |
|
"base": "HellaSwag", |
|
"implemented": false, |
|
"group": "Adversarial Language Modelling" |
|
}, |
|
{ |
|
"name": "HellaSwag-X", |
|
"author": "OpenGPT-X", |
|
"author_url": "https://opengpt-x.de", |
|
"url": "https://huggingface.co/datasets/openGPT-X/hellaswagx", |
|
"n_languages": 20, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"translation": "machine", |
|
"base": "HellaSwag", |
|
"implemented": false, |
|
"group": "Adversarial Language Modelling" |
|
}, |
|
{ |
|
"name": "MGSM", |
|
"author": "Google", |
|
"author_url": "https://google.com", |
|
"url": "https://huggingface.co/datasets/juletxara/mgsm", |
|
"n_languages": 10, |
|
"tasks": [ |
|
"math" |
|
], |
|
"parallel": true, |
|
"base": "MGSM", |
|
"group": "Grade School Math" |
|
}, |
|
{ |
|
"name": "AfriMGSM", |
|
"author": "Masakhane", |
|
"author_url": "https://www.masakhane.io", |
|
"url": "https://huggingface.co/datasets/masakhane/afrimgsm", |
|
"n_languages": 18, |
|
"tasks": [ |
|
"math" |
|
], |
|
"parallel": true, |
|
"translation": "human", |
|
"base": "MGSM", |
|
"implemented": false, |
|
"group": "Grade School Math" |
|
}, |
|
{ |
|
"name": "GSM8K-X", |
|
"author": "OpenGPT-X", |
|
"author_url": "https://opengpt-x.de", |
|
"url": "https://huggingface.co/datasets/openGPT-X/gsm8kx", |
|
"n_languages": 20, |
|
"tasks": [ |
|
"math" |
|
], |
|
"parallel": true, |
|
"translation": "machine", |
|
"base": "MGSM", |
|
"implemented": false, |
|
"group": "Grade School Math" |
|
}, |
|
{ |
|
"name": "WikiANN / PAN-X", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/unimelb-nlp/wikiann", |
|
"n_languages": 176, |
|
"tasks": [ |
|
"ner" |
|
], |
|
"parallel": false, |
|
"group": "Named Entity Recognition" |
|
}, |
|
{ |
|
"name": "MasakhaNER", |
|
"author": "Masakhane", |
|
"author_url": "https://www.masakhane.io", |
|
"url": "https://huggingface.co/datasets/masakhane/masakhaner", |
|
"n_languages": 10, |
|
"tasks": [ |
|
"ner" |
|
], |
|
"parallel": null, |
|
"group": "Named Entity Recognition" |
|
}, |
|
{ |
|
"name": "Tülu 3 SFT Mixture", |
|
"author": "AllenAI", |
|
"author_url": "https://allenai.org", |
|
"url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture", |
|
"n_languages": 70, |
|
"tasks": [ |
|
"instruction_following" |
|
], |
|
"parallel": false, |
|
"group": "Instruction Following" |
|
}, |
|
{ |
|
"name": "xP3", |
|
"author": "BigScience", |
|
"author_url": "https://bigscience.huggingface.co", |
|
"url": "https://huggingface.co/datasets/bigscience/xP3", |
|
"n_languages": 46, |
|
"tasks": [ |
|
"instruction_following" |
|
], |
|
"parallel": false, |
|
"group": "Instruction Following" |
|
}, |
|
{ |
|
"name": "Aya", |
|
"author": "Cohere", |
|
"author_url": "https://cohere.com", |
|
"url": "https://huggingface.co/datasets/CohereForAI/aya_dataset", |
|
"n_languages": 65, |
|
"tasks": [ |
|
"instruction_following" |
|
], |
|
"parallel": null, |
|
"group": "Instruction Following" |
|
}, |
|
{ |
|
"name": "SEA-IFEVAL", |
|
"author": "AI Singapore", |
|
"author_url": "https://aisingapore.org", |
|
"url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval", |
|
"n_languages": 7, |
|
"tasks": [ |
|
"instruction_following" |
|
], |
|
"parallel": true, |
|
"base": "IFEVAL", |
|
"group": "Instruction Following" |
|
}, |
|
{ |
|
"name": "Babel-670", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://github.com/UBC-NLP/Babel-670-Language-Identification", |
|
"n_languages": 670, |
|
"tasks": [ |
|
"language_identification" |
|
], |
|
"parallel": false, |
|
"group": "Other Tasks" |
|
}, |
|
{ |
|
"name": "CulturaX", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/uonlp/CulturaX", |
|
"n_languages": 167, |
|
"tasks": [ |
|
"language_modeling" |
|
], |
|
"parallel": false, |
|
"group": "Other Tasks" |
|
}, |
|
{ |
|
"name": "XTREME", |
|
"author": "Google", |
|
"author_url": "https://google.com", |
|
"url": "https://huggingface.co/datasets/google/xtreme", |
|
"n_languages": 40, |
|
"tasks": [ |
|
"translation", |
|
"classification", |
|
"question_answering", |
|
"ner" |
|
], |
|
"parallel": null, |
|
"group": "Other Tasks" |
|
}, |
|
{ |
|
"name": "XLSUM", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/csebuetnlp/xlsum", |
|
"n_languages": 45, |
|
"tasks": [ |
|
"summarization" |
|
], |
|
"parallel": true, |
|
"group": "Other Tasks" |
|
}, |
|
{ |
|
"name": "MSVAMP", |
|
"author": "Microsoft", |
|
"author_url": "https://microsoft.ai", |
|
"url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP", |
|
"n_languages": 10, |
|
"tasks": [ |
|
"math" |
|
], |
|
"parallel": true, |
|
"group": "Other Tasks" |
|
}, |
|
{ |
|
"name": "Multilingual Sentiments", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments", |
|
"n_languages": 12, |
|
"tasks": [ |
|
"sentiment_analysis" |
|
], |
|
"parallel": null, |
|
"group": "Other Tasks" |
|
}, |
|
{ |
|
"name": "Lanfrica", |
|
"author": "Lanfrica", |
|
"author_url": "https://lanfrica.com", |
|
"url": "https://lanfrica.com/records?language=yor&task=machine%20translation", |
|
"n_languages": 2200, |
|
"tasks": [ |
|
"datasets" |
|
], |
|
"parallel": null, |
|
"group": "Dataset Collections" |
|
}, |
|
{ |
|
"name": "HuggingFace Languages", |
|
"author": "HuggingFace", |
|
"author_url": "https://huggingface.co", |
|
"url": "https://huggingface.co/languages", |
|
"n_languages": 4680, |
|
"tasks": [ |
|
"datasets", |
|
"models" |
|
], |
|
"parallel": null, |
|
"group": "Dataset Collections" |
|
}, |
|
{ |
|
"name": "HuggingFace Multilingual Datasets", |
|
"author": "HuggingFace", |
|
"author_url": "https://huggingface.co", |
|
"url": "https://huggingface.co/datasets?other=multilinguality:multilingual", |
|
"n_languages": 2012, |
|
"tasks": [ |
|
"datasets" |
|
], |
|
"parallel": false, |
|
"group": "Dataset Collections" |
|
} |
|
] |