David Pomerenke
commited on
Commit
·
276ec94
1
Parent(s):
d8f2dee
Add visual QA, reorder datasets
Browse files- datasets.json +76 -53
datasets.json
CHANGED
@@ -7,14 +7,61 @@
|
|
7 |
"n_languages": 200,
|
8 |
"tasks": [
|
9 |
"translation",
|
10 |
-
"classification"
|
11 |
-
"language_modeling"
|
12 |
],
|
13 |
"parallel": true,
|
14 |
"translation": "human",
|
15 |
"base": "FLORES",
|
16 |
"implemented": true,
|
17 |
-
"group": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
},
|
19 |
{
|
20 |
"name": "FLEURS",
|
@@ -29,7 +76,7 @@
|
|
29 |
"translation": "human",
|
30 |
"base": "FLORES",
|
31 |
"implemented": false,
|
32 |
-
"group": "
|
33 |
},
|
34 |
{
|
35 |
"name": "CommonVoice",
|
@@ -42,7 +89,31 @@
|
|
42 |
],
|
43 |
"parallel": null,
|
44 |
"translation": "human",
|
45 |
-
"group": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
},
|
47 |
{
|
48 |
"name": "Global MMLU",
|
@@ -497,54 +568,6 @@
|
|
497 |
"parallel": null,
|
498 |
"group": "Named Entity Recognition"
|
499 |
},
|
500 |
-
{
|
501 |
-
"name": "CCAligned",
|
502 |
-
"author": "Meta",
|
503 |
-
"author_url": "https://ai.meta.com",
|
504 |
-
"url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
|
505 |
-
"n_languages": 137,
|
506 |
-
"tasks": [
|
507 |
-
"translation"
|
508 |
-
],
|
509 |
-
"parallel": false,
|
510 |
-
"group": "Parallel Corpora"
|
511 |
-
},
|
512 |
-
{
|
513 |
-
"name": "Opus Gnome",
|
514 |
-
"author": "Helsinki NLP",
|
515 |
-
"author_url": null,
|
516 |
-
"url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
|
517 |
-
"n_languages": 187,
|
518 |
-
"tasks": [
|
519 |
-
"translation"
|
520 |
-
],
|
521 |
-
"parallel": true,
|
522 |
-
"group": "Parallel Corpora"
|
523 |
-
},
|
524 |
-
{
|
525 |
-
"name": "Opus Paracrawl",
|
526 |
-
"author": "Helsinki NLP",
|
527 |
-
"author_url": null,
|
528 |
-
"url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
|
529 |
-
"n_languages": 43,
|
530 |
-
"tasks": [
|
531 |
-
"translation"
|
532 |
-
],
|
533 |
-
"parallel": false,
|
534 |
-
"group": "Parallel Corpora"
|
535 |
-
},
|
536 |
-
{
|
537 |
-
"name": "OPUS Collection",
|
538 |
-
"author": "Helsinki NLP",
|
539 |
-
"author_url": null,
|
540 |
-
"url": "https://opus.nlpl.eu",
|
541 |
-
"n_languages": 747,
|
542 |
-
"tasks": [
|
543 |
-
"translation"
|
544 |
-
],
|
545 |
-
"parallel": false,
|
546 |
-
"group": "Parallel Corpora"
|
547 |
-
},
|
548 |
{
|
549 |
"name": "Tülu 3 SFT Mixture",
|
550 |
"author": "AllenAI",
|
|
|
7 |
"n_languages": 200,
|
8 |
"tasks": [
|
9 |
"translation",
|
10 |
+
"classification"
|
|
|
11 |
],
|
12 |
"parallel": true,
|
13 |
"translation": "human",
|
14 |
"base": "FLORES",
|
15 |
"implemented": true,
|
16 |
+
"group": "Translation"
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"name": "CCAligned",
|
20 |
+
"author": "Meta",
|
21 |
+
"author_url": "https://ai.meta.com",
|
22 |
+
"url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
|
23 |
+
"n_languages": 137,
|
24 |
+
"tasks": [
|
25 |
+
"translation"
|
26 |
+
],
|
27 |
+
"parallel": false,
|
28 |
+
"group": "Translation"
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"name": "Opus Gnome",
|
32 |
+
"author": "Helsinki NLP",
|
33 |
+
"author_url": null,
|
34 |
+
"url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
|
35 |
+
"n_languages": 187,
|
36 |
+
"tasks": [
|
37 |
+
"translation"
|
38 |
+
],
|
39 |
+
"parallel": true,
|
40 |
+
"group": "Translation"
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"name": "Opus Paracrawl",
|
44 |
+
"author": "Helsinki NLP",
|
45 |
+
"author_url": null,
|
46 |
+
"url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
|
47 |
+
"n_languages": 43,
|
48 |
+
"tasks": [
|
49 |
+
"translation"
|
50 |
+
],
|
51 |
+
"parallel": false,
|
52 |
+
"group": "Translation"
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"name": "OPUS Collection",
|
56 |
+
"author": "Helsinki NLP",
|
57 |
+
"author_url": null,
|
58 |
+
"url": "https://opus.nlpl.eu",
|
59 |
+
"n_languages": 747,
|
60 |
+
"tasks": [
|
61 |
+
"translation"
|
62 |
+
],
|
63 |
+
"parallel": false,
|
64 |
+
"group": "Translation"
|
65 |
},
|
66 |
{
|
67 |
"name": "FLEURS",
|
|
|
76 |
"translation": "human",
|
77 |
"base": "FLORES",
|
78 |
"implemented": false,
|
79 |
+
"group": "Speech Recognition"
|
80 |
},
|
81 |
{
|
82 |
"name": "CommonVoice",
|
|
|
89 |
],
|
90 |
"parallel": null,
|
91 |
"translation": "human",
|
92 |
+
"group": "Speech Recognition"
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"name": "WorldCuisines",
|
96 |
+
"author": "Academic",
|
97 |
+
"author_url": "https://worldcuisines.github.io",
|
98 |
+
"url": "https://huggingface.co/datasets/worldcuisines/vqa",
|
99 |
+
"n_languages": 30,
|
100 |
+
"tasks": [
|
101 |
+
"visual_question_answering"
|
102 |
+
],
|
103 |
+
"parallel": null,
|
104 |
+
"group": "Visual Question Answering"
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"name": "CVQA",
|
108 |
+
"author": "Academic",
|
109 |
+
"author_url": null,
|
110 |
+
"url": "https://huggingface.co/datasets/afaji/cvqa",
|
111 |
+
"n_languages": 39,
|
112 |
+
"tasks": [
|
113 |
+
"visual_question_answering"
|
114 |
+
],
|
115 |
+
"parallel": null,
|
116 |
+
"group": "Visual Question Answering"
|
117 |
},
|
118 |
{
|
119 |
"name": "Global MMLU",
|
|
|
568 |
"parallel": null,
|
569 |
"group": "Named Entity Recognition"
|
570 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
571 |
{
|
572 |
"name": "Tülu 3 SFT Mixture",
|
573 |
"author": "AllenAI",
|