lewtun HF Staff commited on
Commit
f930f2a
·
1 Parent(s): 0a8a8b8

Add pass@1

Browse files
Files changed (1) hide show
  1. app.py +27 -6
app.py CHANGED
@@ -82,12 +82,33 @@ def get_leaderboard_df():
82
  df.loc[model_revision, task] = float(value)
83
  # AIME24 and 25 report pass@1
84
  elif task.lower() in ["aime24", "aime25"]:
85
- value = (
86
- data["results"]["all"]["math_pass@1:32_samples"]
87
- if "math_pass@1:32_samples" in data["results"]["all"]
88
- else -1
89
- )
90
- df.loc[model_revision, task] = float(value)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  # MATH reports qem
92
  elif task.lower() in ["aimo_kaggle", "math_deepseek_cot", "math_deepseek_rl_cot"]:
93
  value = data["results"]["all"]["qem"]
 
82
  df.loc[model_revision, task] = float(value)
83
  # AIME24 and 25 report pass@1
84
  elif task.lower() in ["aime24", "aime25"]:
85
+ # Check for 32 samples
86
+ if "math_pass@1:32_samples" in data["results"]["all"]:
87
+ value = data["results"]["all"]["math_pass@1:32_samples"]
88
+ df.loc[model_revision, f"{task} (n=32)"] = float(value)
89
+
90
+ # Check for 64 samples
91
+ if "math_pass@1:64_samples" in data["results"]["all"]:
92
+ value = data["results"]["all"]["math_pass@1:64_samples"]
93
+ df.loc[model_revision, f"{task} (n=64)"] = float(value)
94
+
95
+ # For backward compatibility, also store in the original column name if any value exists
96
+ if "math_pass@1:32_samples" in data["results"]["all"]:
97
+ df.loc[model_revision, task] = float(data["results"]["all"]["math_pass@1:32_samples"])
98
+ elif "math_pass@1:64_samples" in data["results"]["all"]:
99
+ df.loc[model_revision, task] = float(data["results"]["all"]["math_pass@1:64_samples"])
100
+ # GPQA now reports pass@1
101
+ elif task.lower() == "gpqa":
102
+ # Check for 8 samples
103
+ if "gpqa_pass@1:8_samples" in data["results"]["all"]:
104
+ value = data["results"]["all"]["gpqa_pass@1:8_samples"]
105
+ df.loc[model_revision, f"{task} (n=8)"] = float(value)
106
+
107
+ # For backward compatibility, also store in the original column name if any value exists
108
+ if "extractive_match" in data["results"]["all"]:
109
+ df.loc[model_revision, task] = float(data["results"]["all"]["extractive_match"])
110
+ elif "gpqa_pass@1:8_samples" in data["results"]["all"]:
111
+ df.loc[model_revision, task] = float(data["results"]["all"]["gpqa_pass@1:8_samples"])
112
  # MATH reports qem
113
  elif task.lower() in ["aimo_kaggle", "math_deepseek_cot", "math_deepseek_rl_cot"]:
114
  value = data["results"]["all"]["qem"]