David Pomerenke commited on
Commit
c1db7ba
·
1 Parent(s): 2c21cf7

Add indic datasets

Browse files
Files changed (1) hide show
  1. datasets.json +73 -49
datasets.json CHANGED
@@ -167,6 +167,17 @@
167
  "parallel": true,
168
  "base": "XNLI"
169
  },
 
 
 
 
 
 
 
 
 
 
 
170
  {
171
  "name": "Okapi HellaSwag",
172
  "author": "Okapi",
@@ -179,51 +190,23 @@
179
  "base": "HellaSwag"
180
  },
181
  {
182
- "name": "WikiANN / PAN-X",
183
- "author": "Academic",
184
- "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
185
- "n_languages": 176,
186
  "tasks": [
 
 
 
187
  "ner"
188
  ],
189
- "parallel": false
190
- },
191
- {
192
- "name": "MSVAMP",
193
- "author": "Microsoft",
194
- "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
195
- "n_languages": 10,
196
- "tasks": [
197
- "math"
198
- ],
199
- "parallel": true
200
- },
201
- {
202
- "name": "XLSUM",
203
- "author": "Academic",
204
- "url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
205
- "n_languages": 45,
206
- "tasks": [
207
- "summarization"
208
- ],
209
- "parallel": true
210
- },
211
- {
212
- "name": "SEA-IFEVAL",
213
- "author": "AI Singapore",
214
- "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
215
- "n_languages": 7,
216
- "tasks": [
217
- "instruction_following"
218
- ],
219
- "parallel": true,
220
- "base": "IFEVAL"
221
  },
222
  {
223
- "name": "XTREME",
224
- "author": "Google",
225
- "url": "https://huggingface.co/datasets/google/xtreme",
226
- "n_languages": 40,
227
  "tasks": [
228
  "translation",
229
  "classification",
@@ -254,6 +237,16 @@
254
  "parallel": null,
255
  "base": "GLUE"
256
  },
 
 
 
 
 
 
 
 
 
 
257
  {
258
  "name": "Opus Gnome",
259
  "author": "Helsinki NLP",
@@ -275,25 +268,56 @@
275
  "parallel": false
276
  },
277
  {
278
- "name": "CCAligned",
279
- "author": "Meta",
280
- "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
281
- "n_languages": 137,
282
  "tasks": [
283
  "translation"
284
  ],
285
  "parallel": false
286
  },
287
  {
288
- "name": "OPUS Collection",
289
- "author": "Helsinki NLP",
290
- "url": "https://opus.nlpl.eu/",
291
- "n_languages": 747,
292
  "tasks": [
293
- "translation"
294
  ],
295
  "parallel": false
296
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  {
298
  "name": "MasakhaNER",
299
  "author": "Masakhane",
 
167
  "parallel": true,
168
  "base": "XNLI"
169
  },
170
+ {
171
+ "name": "IndicXNLI",
172
+ "author": "AI4Bharat",
173
+ "url": "https://huggingface.co/datasets/Divyanshu/indicxnli",
174
+ "n_languages": 11,
175
+ "tasks": [
176
+ "classification"
177
+ ],
178
+ "parallel": true,
179
+ "base": "XNLI"
180
+ },
181
  {
182
  "name": "Okapi HellaSwag",
183
  "author": "Okapi",
 
190
  "base": "HellaSwag"
191
  },
192
  {
193
+ "name": "XTREME",
194
+ "author": "Google",
195
+ "url": "https://huggingface.co/datasets/google/xtreme",
196
+ "n_languages": 40,
197
  "tasks": [
198
+ "translation",
199
+ "classification",
200
+ "question_answering",
201
  "ner"
202
  ],
203
+ "parallel": null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  },
205
  {
206
+ "name": "IndicXTREME",
207
+ "author": "AI4Bharat",
208
+ "url": "https://huggingface.co/collections/ai4bharat/indicxtreme-66c59f576386ba2955650030",
209
+ "n_languages": 20,
210
  "tasks": [
211
  "translation",
212
  "classification",
 
237
  "parallel": null,
238
  "base": "GLUE"
239
  },
240
+ {
241
+ "name": "CCAligned",
242
+ "author": "Meta",
243
+ "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
244
+ "n_languages": 137,
245
+ "tasks": [
246
+ "translation"
247
+ ],
248
+ "parallel": false
249
+ },
250
  {
251
  "name": "Opus Gnome",
252
  "author": "Helsinki NLP",
 
268
  "parallel": false
269
  },
270
  {
271
+ "name": "OPUS Collection",
272
+ "author": "Helsinki NLP",
273
+ "url": "https://opus.nlpl.eu/",
274
+ "n_languages": 747,
275
  "tasks": [
276
  "translation"
277
  ],
278
  "parallel": false
279
  },
280
  {
281
+ "name": "WikiANN / PAN-X",
282
+ "author": "Academic",
283
+ "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
284
+ "n_languages": 176,
285
  "tasks": [
286
+ "ner"
287
  ],
288
  "parallel": false
289
  },
290
+ {
291
+ "name": "MSVAMP",
292
+ "author": "Microsoft",
293
+ "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
294
+ "n_languages": 10,
295
+ "tasks": [
296
+ "math"
297
+ ],
298
+ "parallel": true
299
+ },
300
+ {
301
+ "name": "XLSUM",
302
+ "author": "Academic",
303
+ "url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
304
+ "n_languages": 45,
305
+ "tasks": [
306
+ "summarization"
307
+ ],
308
+ "parallel": true
309
+ },
310
+ {
311
+ "name": "SEA-IFEVAL",
312
+ "author": "AI Singapore",
313
+ "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
314
+ "n_languages": 7,
315
+ "tasks": [
316
+ "instruction_following"
317
+ ],
318
+ "parallel": true,
319
+ "base": "IFEVAL"
320
+ },
321
  {
322
  "name": "MasakhaNER",
323
  "author": "Masakhane",