David Pomerenke commited on
Commit
276ec94
·
1 Parent(s): d8f2dee

Add visual QA, reorder datasets

Browse files
Files changed (1) hide show
  1. datasets.json +76 -53
datasets.json CHANGED
@@ -7,14 +7,61 @@
7
  "n_languages": 200,
8
  "tasks": [
9
  "translation",
10
- "classification",
11
- "language_modeling"
12
  ],
13
  "parallel": true,
14
  "translation": "human",
15
  "base": "FLORES",
16
  "implemented": true,
17
- "group": "Low-Resource Languages"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  },
19
  {
20
  "name": "FLEURS",
@@ -29,7 +76,7 @@
29
  "translation": "human",
30
  "base": "FLORES",
31
  "implemented": false,
32
- "group": "Low-Resource Languages"
33
  },
34
  {
35
  "name": "CommonVoice",
@@ -42,7 +89,31 @@
42
  ],
43
  "parallel": null,
44
  "translation": "human",
45
- "group": "Low-Resource Languages"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  },
47
  {
48
  "name": "Global MMLU",
@@ -497,54 +568,6 @@
497
  "parallel": null,
498
  "group": "Named Entity Recognition"
499
  },
500
- {
501
- "name": "CCAligned",
502
- "author": "Meta",
503
- "author_url": "https://ai.meta.com",
504
- "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
505
- "n_languages": 137,
506
- "tasks": [
507
- "translation"
508
- ],
509
- "parallel": false,
510
- "group": "Parallel Corpora"
511
- },
512
- {
513
- "name": "Opus Gnome",
514
- "author": "Helsinki NLP",
515
- "author_url": null,
516
- "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
517
- "n_languages": 187,
518
- "tasks": [
519
- "translation"
520
- ],
521
- "parallel": true,
522
- "group": "Parallel Corpora"
523
- },
524
- {
525
- "name": "Opus Paracrawl",
526
- "author": "Helsinki NLP",
527
- "author_url": null,
528
- "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
529
- "n_languages": 43,
530
- "tasks": [
531
- "translation"
532
- ],
533
- "parallel": false,
534
- "group": "Parallel Corpora"
535
- },
536
- {
537
- "name": "OPUS Collection",
538
- "author": "Helsinki NLP",
539
- "author_url": null,
540
- "url": "https://opus.nlpl.eu",
541
- "n_languages": 747,
542
- "tasks": [
543
- "translation"
544
- ],
545
- "parallel": false,
546
- "group": "Parallel Corpora"
547
- },
548
  {
549
  "name": "Tülu 3 SFT Mixture",
550
  "author": "AllenAI",
 
7
  "n_languages": 200,
8
  "tasks": [
9
  "translation",
10
+ "classification"
 
11
  ],
12
  "parallel": true,
13
  "translation": "human",
14
  "base": "FLORES",
15
  "implemented": true,
16
+ "group": "Translation"
17
+ },
18
+ {
19
+ "name": "CCAligned",
20
+ "author": "Meta",
21
+ "author_url": "https://ai.meta.com",
22
+ "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
23
+ "n_languages": 137,
24
+ "tasks": [
25
+ "translation"
26
+ ],
27
+ "parallel": false,
28
+ "group": "Translation"
29
+ },
30
+ {
31
+ "name": "Opus Gnome",
32
+ "author": "Helsinki NLP",
33
+ "author_url": null,
34
+ "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
35
+ "n_languages": 187,
36
+ "tasks": [
37
+ "translation"
38
+ ],
39
+ "parallel": true,
40
+ "group": "Translation"
41
+ },
42
+ {
43
+ "name": "Opus Paracrawl",
44
+ "author": "Helsinki NLP",
45
+ "author_url": null,
46
+ "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
47
+ "n_languages": 43,
48
+ "tasks": [
49
+ "translation"
50
+ ],
51
+ "parallel": false,
52
+ "group": "Translation"
53
+ },
54
+ {
55
+ "name": "OPUS Collection",
56
+ "author": "Helsinki NLP",
57
+ "author_url": null,
58
+ "url": "https://opus.nlpl.eu",
59
+ "n_languages": 747,
60
+ "tasks": [
61
+ "translation"
62
+ ],
63
+ "parallel": false,
64
+ "group": "Translation"
65
  },
66
  {
67
  "name": "FLEURS",
 
76
  "translation": "human",
77
  "base": "FLORES",
78
  "implemented": false,
79
+ "group": "Speech Recognition"
80
  },
81
  {
82
  "name": "CommonVoice",
 
89
  ],
90
  "parallel": null,
91
  "translation": "human",
92
+ "group": "Speech Recognition"
93
+ },
94
+ {
95
+ "name": "WorldCuisines",
96
+ "author": "Academic",
97
+ "author_url": "https://worldcuisines.github.io",
98
+ "url": "https://huggingface.co/datasets/worldcuisines/vqa",
99
+ "n_languages": 30,
100
+ "tasks": [
101
+ "visual_question_answering"
102
+ ],
103
+ "parallel": null,
104
+ "group": "Visual Question Answering"
105
+ },
106
+ {
107
+ "name": "CVQA",
108
+ "author": "Academic",
109
+ "author_url": null,
110
+ "url": "https://huggingface.co/datasets/afaji/cvqa",
111
+ "n_languages": 39,
112
+ "tasks": [
113
+ "visual_question_answering"
114
+ ],
115
+ "parallel": null,
116
+ "group": "Visual Question Answering"
117
  },
118
  {
119
  "name": "Global MMLU",
 
568
  "parallel": null,
569
  "group": "Named Entity Recognition"
570
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
571
  {
572
  "name": "Tülu 3 SFT Mixture",
573
  "author": "AllenAI",