David Pomerenke commited on
Commit
43057f8
·
1 Parent(s): 9cd1249

Add OpenGPT-X

Browse files
data/datasets.json CHANGED
@@ -74,6 +74,18 @@
74
  "parallel": true,
75
  "base": "MMLU"
76
  },
 
 
 
 
 
 
 
 
 
 
 
 
77
  {
78
  "name": "Global MMLU",
79
  "author": "Cohere",
@@ -110,6 +122,18 @@
110
  "parallel": true,
111
  "base": "MGSM"
112
  },
 
 
 
 
 
 
 
 
 
 
 
 
113
  {
114
  "name": "Okapi ARC Challenge",
115
  "author": "Academic",
@@ -134,6 +158,18 @@
134
  "parallel": true,
135
  "base": "AI2 ARC"
136
  },
 
 
 
 
 
 
 
 
 
 
 
 
137
  {
138
  "name": "Okapi TruthfulQA",
139
  "author": "Academic",
@@ -158,6 +194,18 @@
158
  "parallel": true,
159
  "base": "TruthfulQA"
160
  },
 
 
 
 
 
 
 
 
 
 
 
 
161
  {
162
  "name": "XNLI",
163
  "author": "Meta",
@@ -194,6 +242,18 @@
194
  "parallel": true,
195
  "base": "HellaSwag"
196
  },
 
 
 
 
 
 
 
 
 
 
 
 
197
  {
198
  "name": "WikiANN / PAN-X",
199
  "author": "Academic",
@@ -208,7 +268,7 @@
208
  {
209
  "name": "MSVAMP",
210
  "author": "Microsoft",
211
- "author_url": "https://microsoft.com",
212
  "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
213
  "n_languages": 10,
214
  "tasks": [
@@ -256,7 +316,7 @@
256
  {
257
  "name": "XGLUE",
258
  "author": "Microsoft",
259
- "author_url": "https://microsoft.com",
260
  "url": "https://huggingface.co/datasets/microsoft/xglue",
261
  "n_languages": 18,
262
  "tasks": [
 
74
  "parallel": true,
75
  "base": "MMLU"
76
  },
77
+ {
78
+ "name": "MMLU-X",
79
+ "author": "OpenGPT-X",
80
+ "author_url": null,
81
+ "url": "https://huggingface.co/datasets/openGPT-X/mmlux",
82
+ "n_languages": 20,
83
+ "tasks": [
84
+ "question_answering"
85
+ ],
86
+ "parallel": true,
87
+ "base": "MMLU"
88
+ },
89
  {
90
  "name": "Global MMLU",
91
  "author": "Cohere",
 
122
  "parallel": true,
123
  "base": "MGSM"
124
  },
125
+ {
126
+ "name": "GSM8K-X",
127
+ "author": "OpenGPT-X",
128
+ "author_url": null,
129
+ "url": "https://huggingface.co/datasets/openGPT-X/gsm8kx",
130
+ "n_languages": 20,
131
+ "tasks": [
132
+ "math"
133
+ ],
134
+ "parallel": true,
135
+ "base": "MGSM"
136
+ },
137
  {
138
  "name": "Okapi ARC Challenge",
139
  "author": "Academic",
 
158
  "parallel": true,
159
  "base": "AI2 ARC"
160
  },
161
+ {
162
+ "name": "Arc-X",
163
+ "author": "OpenGPT-X",
164
+ "author_url": null,
165
+ "url": "https://huggingface.co/datasets/openGPT-X/arcx",
166
+ "n_languages": 20,
167
+ "tasks": [
168
+ "question_answering"
169
+ ],
170
+ "parallel": true,
171
+ "base": "AI2 ARC"
172
+ },
173
  {
174
  "name": "Okapi TruthfulQA",
175
  "author": "Academic",
 
194
  "parallel": true,
195
  "base": "TruthfulQA"
196
  },
197
+ {
198
+ "name": "TruthfulQA-X",
199
+ "author": "OpenGPT-X",
200
+ "author_url": null,
201
+ "url": "https://huggingface.co/datasets/openGPT-X/truthfulqax",
202
+ "n_languages": 20,
203
+ "tasks": [
204
+ "question_answering"
205
+ ],
206
+ "parallel": true,
207
+ "base": "TruthfulQA"
208
+ },
209
  {
210
  "name": "XNLI",
211
  "author": "Meta",
 
242
  "parallel": true,
243
  "base": "HellaSwag"
244
  },
245
+ {
246
+ "name": "HellaSwag-X",
247
+ "author": "OpenGPT-X",
248
+ "author_url": null,
249
+ "url": "https://huggingface.co/datasets/openGPT-X/hellaswagx",
250
+ "n_languages": 20,
251
+ "tasks": [
252
+ "question_answering"
253
+ ],
254
+ "parallel": true,
255
+ "base": "HellaSwag"
256
+ },
257
  {
258
  "name": "WikiANN / PAN-X",
259
  "author": "Academic",
 
268
  {
269
  "name": "MSVAMP",
270
  "author": "Microsoft",
271
+ "author_url": "https://microsoft.ai",
272
  "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
273
  "n_languages": 10,
274
  "tasks": [
 
316
  {
317
  "name": "XGLUE",
318
  "author": "Microsoft",
319
+ "author_url": "https://microsoft.ai",
320
  "url": "https://huggingface.co/datasets/microsoft/xglue",
321
  "n_languages": 18,
322
  "tasks": [
evals/models.py CHANGED
@@ -28,6 +28,7 @@ models = [
28
  # "microsoft/phi-4", # 0.07$/M tokens; only 16k tokens context
29
  "microsoft/phi-4-multimodal-instruct",
30
  "amazon/nova-micro-v1", # 0.09$/M tokens
 
31
  ]
32
  model_fast = "meta-llama/llama-3.3-70b-instruct"
33
 
 
28
  # "microsoft/phi-4", # 0.07$/M tokens; only 16k tokens context
29
  "microsoft/phi-4-multimodal-instruct",
30
  "amazon/nova-micro-v1", # 0.09$/M tokens
31
+ # "openGPT-X/Teuken-7B-instruct-research-v0.4", # not on OpenRouter
32
  ]
33
  model_fast = "meta-llama/llama-3.3-70b-instruct"
34
 
frontend/public/results.json CHANGED
@@ -9756,6 +9756,19 @@
9756
  "base": "MMLU",
9757
  "implemented": null
9758
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
9759
  {
9760
  "name": "Global MMLU",
9761
  "author": "Cohere",
@@ -9795,6 +9808,19 @@
9795
  "base": "MGSM",
9796
  "implemented": null
9797
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
9798
  {
9799
  "name": "Okapi ARC Challenge",
9800
  "author": "Academic",
@@ -9821,6 +9847,19 @@
9821
  "base": "AI2 ARC",
9822
  "implemented": null
9823
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
9824
  {
9825
  "name": "Okapi TruthfulQA",
9826
  "author": "Academic",
@@ -9847,6 +9886,19 @@
9847
  "base": "TruthfulQA",
9848
  "implemented": null
9849
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
9850
  {
9851
  "name": "XNLI",
9852
  "author": "Meta",
@@ -9886,6 +9938,19 @@
9886
  "base": "HellaSwag",
9887
  "implemented": null
9888
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
9889
  {
9890
  "name": "WikiANN / PAN-X",
9891
  "author": "Academic",
@@ -9902,7 +9967,7 @@
9902
  {
9903
  "name": "MSVAMP",
9904
  "author": "Microsoft",
9905
- "author_url": "https://microsoft.com",
9906
  "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
9907
  "n_languages": 10,
9908
  "tasks": [
@@ -9957,7 +10022,7 @@
9957
  {
9958
  "name": "XGLUE",
9959
  "author": "Microsoft",
9960
- "author_url": "https://microsoft.com",
9961
  "url": "https://huggingface.co/datasets/microsoft/xglue",
9962
  "n_languages": 18,
9963
  "tasks": [
 
9756
  "base": "MMLU",
9757
  "implemented": null
9758
  },
9759
+ {
9760
+ "name": "MMLU-X",
9761
+ "author": "OpenGPT-X",
9762
+ "author_url": null,
9763
+ "url": "https://huggingface.co/datasets/openGPT-X/mmlux",
9764
+ "n_languages": 20,
9765
+ "tasks": [
9766
+ "question_answering"
9767
+ ],
9768
+ "parallel": 1.0,
9769
+ "base": "MMLU",
9770
+ "implemented": null
9771
+ },
9772
  {
9773
  "name": "Global MMLU",
9774
  "author": "Cohere",
 
9808
  "base": "MGSM",
9809
  "implemented": null
9810
  },
9811
+ {
9812
+ "name": "GSM8K-X",
9813
+ "author": "OpenGPT-X",
9814
+ "author_url": null,
9815
+ "url": "https://huggingface.co/datasets/openGPT-X/gsm8kx",
9816
+ "n_languages": 20,
9817
+ "tasks": [
9818
+ "math"
9819
+ ],
9820
+ "parallel": 1.0,
9821
+ "base": "MGSM",
9822
+ "implemented": null
9823
+ },
9824
  {
9825
  "name": "Okapi ARC Challenge",
9826
  "author": "Academic",
 
9847
  "base": "AI2 ARC",
9848
  "implemented": null
9849
  },
9850
+ {
9851
+ "name": "Arc-X",
9852
+ "author": "OpenGPT-X",
9853
+ "author_url": null,
9854
+ "url": "https://huggingface.co/datasets/openGPT-X/arcx",
9855
+ "n_languages": 20,
9856
+ "tasks": [
9857
+ "question_answering"
9858
+ ],
9859
+ "parallel": 1.0,
9860
+ "base": "AI2 ARC",
9861
+ "implemented": null
9862
+ },
9863
  {
9864
  "name": "Okapi TruthfulQA",
9865
  "author": "Academic",
 
9886
  "base": "TruthfulQA",
9887
  "implemented": null
9888
  },
9889
+ {
9890
+ "name": "TruthfulQA-X",
9891
+ "author": "OpenGPT-X",
9892
+ "author_url": null,
9893
+ "url": "https://huggingface.co/datasets/openGPT-X/truthfulqax",
9894
+ "n_languages": 20,
9895
+ "tasks": [
9896
+ "question_answering"
9897
+ ],
9898
+ "parallel": 1.0,
9899
+ "base": "TruthfulQA",
9900
+ "implemented": null
9901
+ },
9902
  {
9903
  "name": "XNLI",
9904
  "author": "Meta",
 
9938
  "base": "HellaSwag",
9939
  "implemented": null
9940
  },
9941
+ {
9942
+ "name": "HellaSwag-X",
9943
+ "author": "OpenGPT-X",
9944
+ "author_url": null,
9945
+ "url": "https://huggingface.co/datasets/openGPT-X/hellaswagx",
9946
+ "n_languages": 20,
9947
+ "tasks": [
9948
+ "question_answering"
9949
+ ],
9950
+ "parallel": 1.0,
9951
+ "base": "HellaSwag",
9952
+ "implemented": null
9953
+ },
9954
  {
9955
  "name": "WikiANN / PAN-X",
9956
  "author": "Academic",
 
9967
  {
9968
  "name": "MSVAMP",
9969
  "author": "Microsoft",
9970
+ "author_url": "https://microsoft.ai",
9971
  "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
9972
  "n_languages": 10,
9973
  "tasks": [
 
10022
  {
10023
  "name": "XGLUE",
10024
  "author": "Microsoft",
10025
+ "author_url": "https://microsoft.ai",
10026
  "url": "https://huggingface.co/datasets/microsoft/xglue",
10027
  "n_languages": 18,
10028
  "tasks": [