David Pomerenke commited on
Commit
9051509
·
1 Parent(s): 51cb38c

Dataset table grouping

Browse files
datasets.json CHANGED
@@ -1,484 +1,527 @@
1
  [
2
- {
3
- "name": "FLORES+",
4
- "author": "Meta",
5
- "author_url": "https://ai.meta.com",
6
- "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus",
7
- "n_languages": 200,
8
- "tasks": [
9
- "translation",
10
- "classification",
11
- "language_modeling"
12
- ],
13
- "parallel": true,
14
- "base": "FLORES",
15
- "implemented": true
16
- },
17
- {
18
- "name": "FLEURS",
19
- "author": "Meta",
20
- "author_url": "https://ai.meta.com",
21
- "url": "https://huggingface.co/datasets/google/fleurs",
22
- "n_languages": 102,
23
- "tasks": [
24
- "speech_recognition"
25
- ],
26
- "parallel": true,
27
- "base": "FLORES",
28
- "implemented": true
29
- },
30
- {
31
- "name": "CommonVoice",
32
- "author": "Mozilla",
33
- "author_url": "https://blog.mozilla.ai",
34
- "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0",
35
- "n_languages": 124,
36
- "tasks": [
37
- "speech_recognition"
38
- ],
39
- "parallel": null
40
- },
41
- {
42
- "name": "MMMLU",
43
- "author": "OpenAI",
44
- "author_url": "https://openai.com",
45
- "url": "https://huggingface.co/datasets/openai/MMMLU",
46
- "n_languages": "14",
47
- "tasks": [
48
- "question_answering"
49
- ],
50
- "parallel": true,
51
- "base": "MMLU"
52
- },
53
- {
54
- "name": "AfriMMLU",
55
- "author": "Masakhane",
56
- "author_url": "https://www.masakhane.io",
57
- "url": "https://huggingface.co/datasets/masakhane/afrimmlu",
58
- "n_languages": "17",
59
- "tasks": [
60
- "question_answering"
61
- ],
62
- "parallel": true,
63
- "base": "MMLU"
64
- },
65
- {
66
- "name": "Okapi MMLU",
67
- "author": "Academic",
68
- "author_url": null,
69
- "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu",
70
- "n_languages": 16,
71
- "tasks": [
72
- "question_answering"
73
- ],
74
- "parallel": true,
75
- "base": "MMLU"
76
- },
77
- {
78
- "name": "MMLU-X",
79
- "author": "OpenGPT-X",
80
- "author_url": null,
81
- "url": "https://huggingface.co/datasets/openGPT-X/mmlux",
82
- "n_languages": 20,
83
- "tasks": [
84
- "question_answering"
85
- ],
86
- "parallel": true,
87
- "base": "MMLU"
88
- },
89
- {
90
- "name": "Global MMLU",
91
- "author": "Cohere",
92
- "author_url": "https://cohere.com",
93
- "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU",
94
- "n_languages": 42,
95
- "tasks": [
96
- "question_answering"
97
- ],
98
- "parallel": true,
99
- "base": "MMLU"
100
- },
101
- {
102
- "name": "MGSM",
103
- "author": "Google",
104
- "author_url": "https://google.com",
105
- "url": "https://huggingface.co/datasets/juletxara/mgsm",
106
- "n_languages": 10,
107
- "tasks": [
108
- "math"
109
- ],
110
- "parallel": true,
111
- "base": "MGSM"
112
- },
113
- {
114
- "name": "AfriMGSM",
115
- "author": "Masakhane",
116
- "author_url": "https://www.masakhane.io",
117
- "url": "https://huggingface.co/datasets/masakhane/afrimgsm",
118
- "n_languages": 18,
119
- "tasks": [
120
- "math"
121
- ],
122
- "parallel": true,
123
- "base": "MGSM"
124
- },
125
- {
126
- "name": "GSM8K-X",
127
- "author": "OpenGPT-X",
128
- "author_url": null,
129
- "url": "https://huggingface.co/datasets/openGPT-X/gsm8kx",
130
- "n_languages": 20,
131
- "tasks": [
132
- "math"
133
- ],
134
- "parallel": true,
135
- "base": "MGSM"
136
- },
137
- {
138
- "name": "Okapi ARC Challenge",
139
- "author": "Academic",
140
- "author_url": null,
141
- "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge",
142
- "n_languages": 31,
143
- "tasks": [
144
- "question_answering"
145
- ],
146
- "parallel": true,
147
- "base": "AI2 ARC"
148
- },
149
- {
150
- "name": "Uhuru ARC Easy",
151
- "author": "Masakhane",
152
- "author_url": "https://www.masakhane.io",
153
- "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy",
154
- "n_languages": 6,
155
- "tasks": [
156
- "question_answering"
157
- ],
158
- "parallel": true,
159
- "base": "AI2 ARC"
160
- },
161
- {
162
- "name": "Arc-X",
163
- "author": "OpenGPT-X",
164
- "author_url": null,
165
- "url": "https://huggingface.co/datasets/openGPT-X/arcx",
166
- "n_languages": 20,
167
- "tasks": [
168
- "question_answering"
169
- ],
170
- "parallel": true,
171
- "base": "AI2 ARC"
172
- },
173
- {
174
- "name": "Okapi TruthfulQA",
175
- "author": "Academic",
176
- "author_url": null,
177
- "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data",
178
- "n_languages": 31,
179
- "tasks": [
180
- "question_answering"
181
- ],
182
- "parallel": true,
183
- "base": "TruthfulQA"
184
- },
185
- {
186
- "name": "Uhura TruthfulQA",
187
- "author": "Masakhane",
188
- "author_url": "https://www.masakhane.io",
189
- "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa",
190
- "n_languages": 6,
191
- "tasks": [
192
- "question_answering"
193
- ],
194
- "parallel": true,
195
- "base": "TruthfulQA"
196
- },
197
- {
198
- "name": "TruthfulQA-X",
199
- "author": "OpenGPT-X",
200
- "author_url": null,
201
- "url": "https://huggingface.co/datasets/openGPT-X/truthfulqax",
202
- "n_languages": 20,
203
- "tasks": [
204
- "question_answering"
205
- ],
206
- "parallel": true,
207
- "base": "TruthfulQA"
208
- },
209
- {
210
- "name": "XNLI",
211
- "author": "Meta",
212
- "author_url": "https://ai.meta.com",
213
- "url": "https://huggingface.co/datasets/facebook/xnli",
214
- "n_languages": 14,
215
- "tasks": [
216
- "classification"
217
- ],
218
- "parallel": true,
219
- "base": "XNLI"
220
- },
221
- {
222
- "name": "AfriXNLI",
223
- "author": "Masakhane",
224
- "author_url": "https://www.masakhane.io",
225
- "url": "https://huggingface.co/datasets/masakhane/afrixnli",
226
- "n_languages": 18,
227
- "tasks": [
228
- "classification"
229
- ],
230
- "parallel": true,
231
- "base": "XNLI"
232
- },
233
- {
234
- "name": "Okapi HellaSwag",
235
- "author": "Academic",
236
- "author_url": null,
237
- "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag",
238
- "n_languages": 31,
239
- "tasks": [
240
- "question_answering"
241
- ],
242
- "parallel": true,
243
- "base": "HellaSwag"
244
- },
245
- {
246
- "name": "HellaSwag-X",
247
- "author": "OpenGPT-X",
248
- "author_url": null,
249
- "url": "https://huggingface.co/datasets/openGPT-X/hellaswagx",
250
- "n_languages": 20,
251
- "tasks": [
252
- "question_answering"
253
- ],
254
- "parallel": true,
255
- "base": "HellaSwag"
256
- },
257
- {
258
- "name": "WikiANN / PAN-X",
259
- "author": "Academic",
260
- "author_url": null,
261
- "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
262
- "n_languages": 176,
263
- "tasks": [
264
- "ner"
265
- ],
266
- "parallel": false
267
- },
268
- {
269
- "name": "MSVAMP",
270
- "author": "Microsoft",
271
- "author_url": "https://microsoft.ai",
272
- "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
273
- "n_languages": 10,
274
- "tasks": [
275
- "math"
276
- ],
277
- "parallel": true
278
- },
279
- {
280
- "name": "XLSUM",
281
- "author": "Academic",
282
- "author_url": null,
283
- "url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
284
- "n_languages": 45,
285
- "tasks": [
286
- "summarization"
287
- ],
288
- "parallel": true
289
- },
290
- {
291
- "name": "SEA-IFEVAL",
292
- "author": "AI Singapore",
293
- "author_url": "https://aisingapore.org",
294
- "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
295
- "n_languages": 7,
296
- "tasks": [
297
- "instruction_following"
298
- ],
299
- "parallel": true,
300
- "base": "IFEVAL"
301
- },
302
- {
303
- "name": "XTREME",
304
- "author": "Google",
305
- "author_url": "https://google.com",
306
- "url": "https://huggingface.co/datasets/google/xtreme",
307
- "n_languages": 40,
308
- "tasks": [
309
- "translation",
310
- "classification",
311
- "question_answering",
312
- "ner"
313
- ],
314
- "parallel": null
315
- },
316
- {
317
- "name": "XGLUE",
318
- "author": "Microsoft",
319
- "author_url": "https://microsoft.ai",
320
- "url": "https://huggingface.co/datasets/microsoft/xglue",
321
- "n_languages": 18,
322
- "tasks": [
323
- "pos"
324
- ],
325
- "parallel": null,
326
- "base": "GLUE"
327
- },
328
- {
329
- "name": "IndicGLUE",
330
- "author": "AI4Bharat",
331
- "author_url": "https://models.ai4bharat.org",
332
- "url": "https://huggingface.co/datasets/ai4bharat/indic_glue",
333
- "n_languages": 11,
334
- "tasks": [
335
- "question_answering"
336
- ],
337
- "parallel": null,
338
- "base": "GLUE"
339
- },
340
- {
341
- "name": "Opus Gnome",
342
- "author": "Helsinki NLP",
343
- "author_url": null,
344
- "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
345
- "n_languages": 187,
346
- "tasks": [
347
- "translation"
348
- ],
349
- "parallel": true
350
- },
351
- {
352
- "name": "Opus Paracrawl",
353
- "author": "Helsinki NLP",
354
- "author_url": null,
355
- "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
356
- "n_languages": 43,
357
- "tasks": [
358
- "translation"
359
- ],
360
- "parallel": false
361
- },
362
- {
363
- "name": "CCAligned",
364
- "author": "Meta",
365
- "author_url": "https://ai.meta.com",
366
- "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
367
- "n_languages": 137,
368
- "tasks": [
369
- "translation"
370
- ],
371
- "parallel": false
372
- },
373
- {
374
- "name": "OPUS Collection",
375
- "author": "Helsinki NLP",
376
- "author_url": null,
377
- "url": "https://opus.nlpl.eu",
378
- "n_languages": 747,
379
- "tasks": [
380
- "translation"
381
- ],
382
- "parallel": false
383
- },
384
- {
385
- "name": "MasakhaNER",
386
- "author": "Masakhane",
387
- "author_url": "https://www.masakhane.io",
388
- "url": "https://huggingface.co/datasets/masakhane/masakhaner",
389
- "n_languages": 10,
390
- "tasks": [
391
- "ner"
392
- ],
393
- "parallel": null
394
- },
395
- {
396
- "name": "Multilingual Sentiments",
397
- "author": "Academic",
398
- "author_url": null,
399
- "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments",
400
- "n_languages": 12,
401
- "tasks": [
402
- "sentiment_analysis"
403
- ],
404
- "parallel": null
405
- },
406
- {
407
- "name": "CulturaX",
408
- "author": "Academic",
409
- "author_url": null,
410
- "url": "https://huggingface.co/datasets/uonlp/CulturaX",
411
- "n_languages": 167,
412
- "tasks": [
413
- "language_modeling"
414
- ],
415
- "parallel": false
416
- },
417
- {
418
- "name": "Tülu 3 SFT Mixture",
419
- "author": "AllenAI",
420
- "author_url": "https://allenai.org",
421
- "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture",
422
- "n_languages": 70,
423
- "tasks": [
424
- "instruction_following"
425
- ],
426
- "parallel": false
427
- },
428
- {
429
- "name": "xP3",
430
- "author": "BigScience",
431
- "author_url": "https://bigscience.huggingface.co",
432
- "url": "https://huggingface.co/datasets/bigscience/xP3",
433
- "n_languages": 46,
434
- "tasks": [
435
- "instruction_following"
436
- ],
437
- "parallel": false
438
- },
439
- {
440
- "name": "Aya",
441
- "author": "Cohere",
442
- "author_url": "https://cohere.com",
443
- "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset",
444
- "n_languages": 65,
445
- "tasks": [
446
- "instruction_following"
447
- ],
448
- "parallel": null
449
- },
450
- {
451
- "name": "Lanfrica",
452
- "author": "Lanfrica",
453
- "author_url": "https://lanfrica.com",
454
- "url": "https://lanfrica.com/records?language=yor&task=machine%20translation",
455
- "n_languages": 2200,
456
- "tasks": [
457
- "datasets"
458
- ],
459
- "parallel": null
460
- },
461
- {
462
- "name": "HuggingFace Languages",
463
- "author": "HuggingFace",
464
- "author_url": "https://huggingface.co",
465
- "url": "https://huggingface.co/languages",
466
- "n_languages": 4680,
467
- "tasks": [
468
- "datasets",
469
- "models"
470
- ],
471
- "parallel": null
472
- },
473
- {
474
- "name": "HuggingFace Multilingual Datasets",
475
- "author": "HuggingFace",
476
- "author_url": "https://huggingface.co",
477
- "url": "https://huggingface.co/datasets?other=multilinguality:multilingual",
478
- "n_languages": 2012,
479
- "tasks": [
480
- "datasets"
481
- ],
482
- "parallel": false
483
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
  ]
 
1
  [
2
+ {
3
+ "name": "FLORES+",
4
+ "author": "Meta",
5
+ "author_url": "https://ai.meta.com",
6
+ "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus",
7
+ "n_languages": 200,
8
+ "tasks": [
9
+ "translation",
10
+ "classification",
11
+ "language_modeling"
12
+ ],
13
+ "parallel": true,
14
+ "base": "FLORES",
15
+ "implemented": true,
16
+ "group": "Low-Resource Languages"
17
+ },
18
+ {
19
+ "name": "FLEURS",
20
+ "author": "Meta",
21
+ "author_url": "https://ai.meta.com",
22
+ "url": "https://huggingface.co/datasets/google/fleurs",
23
+ "n_languages": 102,
24
+ "tasks": [
25
+ "speech_recognition"
26
+ ],
27
+ "parallel": true,
28
+ "base": "FLORES",
29
+ "implemented": true,
30
+ "group": "Low-Resource Languages"
31
+ },
32
+ {
33
+ "name": "CommonVoice",
34
+ "author": "Mozilla",
35
+ "author_url": "https://blog.mozilla.ai",
36
+ "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0",
37
+ "n_languages": 124,
38
+ "tasks": [
39
+ "speech_recognition"
40
+ ],
41
+ "parallel": null,
42
+ "group": "Low-Resource Languages"
43
+ },
44
+ {
45
+ "name": "MMMLU",
46
+ "author": "OpenAI",
47
+ "author_url": "https://openai.com",
48
+ "url": "https://huggingface.co/datasets/openai/MMMLU",
49
+ "n_languages": "14",
50
+ "tasks": [
51
+ "question_answering"
52
+ ],
53
+ "parallel": true,
54
+ "base": "MMLU",
55
+ "group": "Multitask Language Understanding"
56
+ },
57
+ {
58
+ "name": "AfriMMLU",
59
+ "author": "Masakhane",
60
+ "author_url": "https://www.masakhane.io",
61
+ "url": "https://huggingface.co/datasets/masakhane/afrimmlu",
62
+ "n_languages": "17",
63
+ "tasks": [
64
+ "question_answering"
65
+ ],
66
+ "parallel": true,
67
+ "base": "MMLU",
68
+ "group": "Multitask Language Understanding"
69
+ },
70
+ {
71
+ "name": "Okapi MMLU",
72
+ "author": "Academic",
73
+ "author_url": null,
74
+ "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu",
75
+ "n_languages": 16,
76
+ "tasks": [
77
+ "question_answering"
78
+ ],
79
+ "parallel": true,
80
+ "base": "MMLU",
81
+ "group": "Multitask Language Understanding"
82
+ },
83
+ {
84
+ "name": "MMLU-X",
85
+ "author": "OpenGPT-X",
86
+ "author_url": null,
87
+ "url": "https://huggingface.co/datasets/openGPT-X/mmlux",
88
+ "n_languages": 20,
89
+ "tasks": [
90
+ "question_answering"
91
+ ],
92
+ "parallel": true,
93
+ "base": "MMLU",
94
+ "group": "Multitask Language Understanding"
95
+ },
96
+ {
97
+ "name": "Global MMLU",
98
+ "author": "Cohere",
99
+ "author_url": "https://cohere.com",
100
+ "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU",
101
+ "n_languages": 42,
102
+ "tasks": [
103
+ "question_answering"
104
+ ],
105
+ "parallel": true,
106
+ "base": "MMLU",
107
+ "group": "Multitask Language Understanding"
108
+ },
109
+ {
110
+ "name": "Okapi ARC Challenge",
111
+ "author": "Academic",
112
+ "author_url": null,
113
+ "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge",
114
+ "n_languages": 31,
115
+ "tasks": [
116
+ "question_answering"
117
+ ],
118
+ "parallel": true,
119
+ "base": "AI2 ARC",
120
+ "group": "Abstract Reasoning"
121
+ },
122
+ {
123
+ "name": "Uhuru ARC Easy",
124
+ "author": "Masakhane",
125
+ "author_url": "https://www.masakhane.io",
126
+ "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy",
127
+ "n_languages": 6,
128
+ "tasks": [
129
+ "question_answering"
130
+ ],
131
+ "parallel": true,
132
+ "base": "AI2 ARC",
133
+ "group": "Abstract Reasoning"
134
+ },
135
+ {
136
+ "name": "Arc-X",
137
+ "author": "OpenGPT-X",
138
+ "author_url": null,
139
+ "url": "https://huggingface.co/datasets/openGPT-X/arcx",
140
+ "n_languages": 20,
141
+ "tasks": [
142
+ "question_answering"
143
+ ],
144
+ "parallel": true,
145
+ "base": "AI2 ARC",
146
+ "group": "Abstract Reasoning"
147
+ },
148
+ {
149
+ "name": "Okapi TruthfulQA",
150
+ "author": "Academic",
151
+ "author_url": null,
152
+ "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data",
153
+ "n_languages": 31,
154
+ "tasks": [
155
+ "question_answering"
156
+ ],
157
+ "parallel": true,
158
+ "base": "TruthfulQA",
159
+ "group": "Truthfulness"
160
+ },
161
+ {
162
+ "name": "Uhura TruthfulQA",
163
+ "author": "Masakhane",
164
+ "author_url": "https://www.masakhane.io",
165
+ "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa",
166
+ "n_languages": 6,
167
+ "tasks": [
168
+ "question_answering"
169
+ ],
170
+ "parallel": true,
171
+ "base": "TruthfulQA",
172
+ "group": "Truthfulness"
173
+ },
174
+ {
175
+ "name": "TruthfulQA-X",
176
+ "author": "OpenGPT-X",
177
+ "author_url": null,
178
+ "url": "https://huggingface.co/datasets/openGPT-X/truthfulqax",
179
+ "n_languages": 20,
180
+ "tasks": [
181
+ "question_answering"
182
+ ],
183
+ "parallel": true,
184
+ "base": "TruthfulQA",
185
+ "group": "Truthfulness"
186
+ },
187
+ {
188
+ "name": "XNLI",
189
+ "author": "Meta",
190
+ "author_url": "https://ai.meta.com",
191
+ "url": "https://huggingface.co/datasets/facebook/xnli",
192
+ "n_languages": 14,
193
+ "tasks": [
194
+ "classification",
195
+ "logic"
196
+ ],
197
+ "parallel": true,
198
+ "base": "MNLI",
199
+ "group": "Natural Language Inference"
200
+ },
201
+ {
202
+ "name": "AfriXNLI",
203
+ "author": "Masakhane",
204
+ "author_url": "https://www.masakhane.io",
205
+ "url": "https://huggingface.co/datasets/masakhane/afrixnli",
206
+ "n_languages": 18,
207
+ "tasks": [
208
+ "classification",
209
+ "logic"
210
+ ],
211
+ "parallel": true,
212
+ "base": "MNLI",
213
+ "group": "Natural Language Inference"
214
+ },
215
+ {
216
+ "name": "XGLUE",
217
+ "author": "Microsoft",
218
+ "author_url": "https://microsoft.ai",
219
+ "url": "https://huggingface.co/datasets/microsoft/xglue",
220
+ "n_languages": 18,
221
+ "tasks": [
222
+ "pos"
223
+ ],
224
+ "parallel": null,
225
+ "base": "GLUE",
226
+ "group": "General Language Understanding"
227
+ },
228
+ {
229
+ "name": "IndicGLUE",
230
+ "author": "AI4Bharat",
231
+ "author_url": "https://models.ai4bharat.org",
232
+ "url": "https://huggingface.co/datasets/ai4bharat/indic_glue",
233
+ "n_languages": 11,
234
+ "tasks": [
235
+ "question_answering"
236
+ ],
237
+ "parallel": null,
238
+ "base": "GLUE",
239
+ "group": "General Language Understanding"
240
+ },
241
+ {
242
+ "name": "Okapi HellaSwag",
243
+ "author": "Academic",
244
+ "author_url": null,
245
+ "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag",
246
+ "n_languages": 31,
247
+ "tasks": [
248
+ "question_answering"
249
+ ],
250
+ "parallel": true,
251
+ "base": "HellaSwag",
252
+ "group": "Adversarial Language Modelling"
253
+ },
254
+ {
255
+ "name": "HellaSwag-X",
256
+ "author": "OpenGPT-X",
257
+ "author_url": null,
258
+ "url": "https://huggingface.co/datasets/openGPT-X/hellaswagx",
259
+ "n_languages": 20,
260
+ "tasks": [
261
+ "question_answering"
262
+ ],
263
+ "parallel": true,
264
+ "base": "HellaSwag",
265
+ "group": "Adversarial Language Modelling"
266
+ },
267
+ {
268
+ "name": "MGSM",
269
+ "author": "Google",
270
+ "author_url": "https://google.com",
271
+ "url": "https://huggingface.co/datasets/juletxara/mgsm",
272
+ "n_languages": 10,
273
+ "tasks": [
274
+ "math"
275
+ ],
276
+ "parallel": true,
277
+ "base": "MGSM",
278
+ "group": "Grade School Math"
279
+ },
280
+ {
281
+ "name": "AfriMGSM",
282
+ "author": "Masakhane",
283
+ "author_url": "https://www.masakhane.io",
284
+ "url": "https://huggingface.co/datasets/masakhane/afrimgsm",
285
+ "n_languages": 18,
286
+ "tasks": [
287
+ "math"
288
+ ],
289
+ "parallel": true,
290
+ "base": "MGSM",
291
+ "group": "Grade School Math"
292
+ },
293
+ {
294
+ "name": "GSM8K-X",
295
+ "author": "OpenGPT-X",
296
+ "author_url": null,
297
+ "url": "https://huggingface.co/datasets/openGPT-X/gsm8kx",
298
+ "n_languages": 20,
299
+ "tasks": [
300
+ "math"
301
+ ],
302
+ "parallel": true,
303
+ "base": "MGSM",
304
+ "group": "Grade School Math"
305
+ },
306
+ {
307
+ "name": "WikiANN / PAN-X",
308
+ "author": "Academic",
309
+ "author_url": null,
310
+ "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
311
+ "n_languages": 176,
312
+ "tasks": [
313
+ "ner"
314
+ ],
315
+ "parallel": false,
316
+ "group": "Named Entity Recognition"
317
+ },
318
+ {
319
+ "name": "MasakhaNER",
320
+ "author": "Masakhane",
321
+ "author_url": "https://www.masakhane.io",
322
+ "url": "https://huggingface.co/datasets/masakhane/masakhaner",
323
+ "n_languages": 10,
324
+ "tasks": [
325
+ "ner"
326
+ ],
327
+ "parallel": null,
328
+ "group": "Named Entity Recognition"
329
+ },
330
+ {
331
+ "name": "Opus Gnome",
332
+ "author": "Helsinki NLP",
333
+ "author_url": null,
334
+ "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
335
+ "n_languages": 187,
336
+ "tasks": [
337
+ "translation"
338
+ ],
339
+ "parallel": true,
340
+ "group": "Parallel Corpora"
341
+ },
342
+ {
343
+ "name": "Opus Paracrawl",
344
+ "author": "Helsinki NLP",
345
+ "author_url": null,
346
+ "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
347
+ "n_languages": 43,
348
+ "tasks": [
349
+ "translation"
350
+ ],
351
+ "parallel": false,
352
+ "group": "Parallel Corpora"
353
+ },
354
+ {
355
+ "name": "CCAligned",
356
+ "author": "Meta",
357
+ "author_url": "https://ai.meta.com",
358
+ "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
359
+ "n_languages": 137,
360
+ "tasks": [
361
+ "translation"
362
+ ],
363
+ "parallel": false,
364
+ "group": "Parallel Corpora"
365
+ },
366
+ {
367
+ "name": "OPUS Collection",
368
+ "author": "Helsinki NLP",
369
+ "author_url": null,
370
+ "url": "https://opus.nlpl.eu",
371
+ "n_languages": 747,
372
+ "tasks": [
373
+ "translation"
374
+ ],
375
+ "parallel": false,
376
+ "group": "Parallel Corpora"
377
+ },
378
+ {
379
+ "name": "Tülu 3 SFT Mixture",
380
+ "author": "AllenAI",
381
+ "author_url": "https://allenai.org",
382
+ "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture",
383
+ "n_languages": 70,
384
+ "tasks": [
385
+ "instruction_following"
386
+ ],
387
+ "parallel": false,
388
+ "group": "Instruction Following"
389
+ },
390
+ {
391
+ "name": "xP3",
392
+ "author": "BigScience",
393
+ "author_url": "https://bigscience.huggingface.co",
394
+ "url": "https://huggingface.co/datasets/bigscience/xP3",
395
+ "n_languages": 46,
396
+ "tasks": [
397
+ "instruction_following"
398
+ ],
399
+ "parallel": false,
400
+ "group": "Instruction Following"
401
+ },
402
+ {
403
+ "name": "Aya",
404
+ "author": "Cohere",
405
+ "author_url": "https://cohere.com",
406
+ "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset",
407
+ "n_languages": 65,
408
+ "tasks": [
409
+ "instruction_following"
410
+ ],
411
+ "parallel": null,
412
+ "group": "Instruction Following"
413
+ },
414
+ {
415
+ "name": "SEA-IFEVAL",
416
+ "author": "AI Singapore",
417
+ "author_url": "https://aisingapore.org",
418
+ "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
419
+ "n_languages": 7,
420
+ "tasks": [
421
+ "instruction_following"
422
+ ],
423
+ "parallel": true,
424
+ "base": "IFEVAL",
425
+ "group": "Instruction Following"
426
+ },
427
+ {
428
+ "name": "CulturaX",
429
+ "author": "Academic",
430
+ "author_url": null,
431
+ "url": "https://huggingface.co/datasets/uonlp/CulturaX",
432
+ "n_languages": 167,
433
+ "tasks": [
434
+ "language_modeling"
435
+ ],
436
+ "parallel": false,
437
+ "group": "Other Tasks"
438
+ },
439
+ {
440
+ "name": "XTREME",
441
+ "author": "Google",
442
+ "author_url": "https://google.com",
443
+ "url": "https://huggingface.co/datasets/google/xtreme",
444
+ "n_languages": 40,
445
+ "tasks": [
446
+ "translation",
447
+ "classification",
448
+ "question_answering",
449
+ "ner"
450
+ ],
451
+ "parallel": null,
452
+ "group": "Other Tasks"
453
+ },
454
+ {
455
+ "name": "XLSUM",
456
+ "author": "Academic",
457
+ "author_url": null,
458
+ "url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
459
+ "n_languages": 45,
460
+ "tasks": [
461
+ "summarization"
462
+ ],
463
+ "parallel": true,
464
+ "group": "Other Tasks"
465
+ },
466
+ {
467
+ "name": "MSVAMP",
468
+ "author": "Microsoft",
469
+ "author_url": "https://microsoft.ai",
470
+ "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
471
+ "n_languages": 10,
472
+ "tasks": [
473
+ "math"
474
+ ],
475
+ "parallel": true,
476
+ "group": "Other Tasks"
477
+ },
478
+ {
479
+ "name": "Multilingual Sentiments",
480
+ "author": "Academic",
481
+ "author_url": null,
482
+ "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments",
483
+ "n_languages": 12,
484
+ "tasks": [
485
+ "sentiment_analysis"
486
+ ],
487
+ "parallel": null,
488
+ "group": "Other Tasks"
489
+ },
490
+ {
491
+ "name": "Lanfrica",
492
+ "author": "Lanfrica",
493
+ "author_url": "https://lanfrica.com",
494
+ "url": "https://lanfrica.com/records?language=yor&task=machine%20translation",
495
+ "n_languages": 2200,
496
+ "tasks": [
497
+ "datasets"
498
+ ],
499
+ "parallel": null,
500
+ "group": "Dataset Collections"
501
+ },
502
+ {
503
+ "name": "HuggingFace Languages",
504
+ "author": "HuggingFace",
505
+ "author_url": "https://huggingface.co",
506
+ "url": "https://huggingface.co/languages",
507
+ "n_languages": 4680,
508
+ "tasks": [
509
+ "datasets",
510
+ "models"
511
+ ],
512
+ "parallel": null,
513
+ "group": "Dataset Collections"
514
+ },
515
+ {
516
+ "name": "HuggingFace Multilingual Datasets",
517
+ "author": "HuggingFace",
518
+ "author_url": "https://huggingface.co",
519
+ "url": "https://huggingface.co/datasets?other=multilinguality:multilingual",
520
+ "n_languages": 2012,
521
+ "tasks": [
522
+ "datasets"
523
+ ],
524
+ "parallel": false,
525
+ "group": "Dataset Collections"
526
+ }
527
  ]
frontend/src/components/DatasetTable.js CHANGED
@@ -70,6 +70,11 @@ const DatasetTable = ({ data }) => {
70
  return (
71
  <DataTable
72
  value={table}
 
 
 
 
 
73
  header={<>Datasets</>}
74
  removableSort
75
  filters={filters}
 
70
  return (
71
  <DataTable
72
  value={table}
73
+ rowGroupMode='subheader'
74
+ rowGroupHeaderTemplate={rowData => {
75
+ return <div style={{ fontWeight: 'bold' }}>{rowData.group}</div>
76
+ }}
77
+ groupRowsBy='group'
78
  header={<>Datasets</>}
79
  removableSort
80
  filters={filters}
frontend/src/components/LanguageTable.js CHANGED
@@ -134,7 +134,7 @@ const LanguageTable = ({ data, selectedLanguages, setSelectedLanguages }) => {
134
  selection={selectedLanguages}
135
  onSelectionChange={e => setSelectedLanguages(e.value)}
136
  frozenValue={selectedLanguages}
137
- virtualScrollerOptions={{ itemSize: 100 }}
138
  scrollable
139
  scrollHeight='600px'
140
  id='language-table'
 
134
  selection={selectedLanguages}
135
  onSelectionChange={e => setSelectedLanguages(e.value)}
136
  frozenValue={selectedLanguages}
137
+ virtualScrollerOptions={{ itemSize: 60 }}
138
  scrollable
139
  scrollHeight='600px'
140
  id='language-table'