|
task,metric,value,err,version
|
|
anli_r1,acc,0.33,0.014876872027456732,0
|
|
anli_r2,acc,0.337,0.014955087918653598,0
|
|
anli_r3,acc,0.3375,0.013655897185463657,0
|
|
arc_challenge,acc,0.27986348122866894,0.013119040897725922,0
|
|
arc_challenge,acc_norm,0.2986348122866894,0.01337407861506875,0
|
|
arc_easy,acc,0.6056397306397306,0.010028176038393,0
|
|
arc_easy,acc_norm,0.5808080808080808,0.010124905282491183,0
|
|
boolq,acc,0.5825688073394495,0.008624990050216684,1
|
|
cb,acc,0.44642857142857145,0.06703189227942398,1
|
|
cb,f1,0.2706349206349206,,1
|
|
copa,acc,0.78,0.04163331998932261,0
|
|
hellaswag,acc,0.43487353116908983,0.004947272454226208,0
|
|
hellaswag,acc_norm,0.5603465445130452,0.004953305461311746,0
|
|
piqa,acc,0.7149075081610446,0.010533270588738937,0
|
|
piqa,acc_norm,0.7110990206746464,0.010575111841364908,0
|
|
rte,acc,0.5270758122743683,0.030052303463143706,0
|
|
sciq,acc,0.875,0.010463483381956722,0
|
|
sciq,acc_norm,0.842,0.011539894677559559,0
|
|
storycloze_2016,acc,0.7033671833244255,0.010562819181563227,0
|
|
winogrande,acc,0.5785319652722968,0.013878072377497606,0
|
|
|