Spaces:
Running
Running
Commit
·
640136e
1
Parent(s):
866d755
update dataset name and citation
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ api = HfApi()
|
|
20 |
COLLAB_TOKEN = os.environ.get("COLLAB_TOKEN")
|
21 |
evals_repo = "allenai/reward-bench-v2-results"
|
22 |
|
23 |
-
eval_set_repo = "allenai/reward-bench-
|
24 |
eval_set_repo_v1 = "allenai/reward-bench"
|
25 |
|
26 |
repo_dir_rewardbench = "./evals/rewardbench/"
|
@@ -42,6 +42,7 @@ repo = snapshot_download(
|
|
42 |
|
43 |
|
44 |
def avg_over_rewardbench_v2(dataframe_core):
|
|
|
45 |
domain_cols = ["factuality", "precise if", "math", "safety", "chat", "ties"]
|
46 |
domain_weights = [1, 1, 1, 1, 1, 1]
|
47 |
new_df = dataframe_core.copy()
|
@@ -536,7 +537,7 @@ with gr.Blocks(theme=theme, css=custom_css) as app:
|
|
536 |
title={RewardBench 2: Advancing Reward Model Evaluation},
|
537 |
author={Malik, Saumya and Pyatkin, Valentina and Land, Sander and Morrison, Jacob and Smith, Noah A. and Hajishirzi, Hannaneh and Lambert, Nathan},
|
538 |
year={2025},
|
539 |
-
howpublished={\url{https://huggingface.co/spaces/allenai/reward-bench
|
540 |
}
|
541 |
|
542 |
@misc{RewardBench,
|
|
|
20 |
COLLAB_TOKEN = os.environ.get("COLLAB_TOKEN")
|
21 |
evals_repo = "allenai/reward-bench-v2-results"
|
22 |
|
23 |
+
eval_set_repo = "allenai/reward-bench-2"
|
24 |
eval_set_repo_v1 = "allenai/reward-bench"
|
25 |
|
26 |
repo_dir_rewardbench = "./evals/rewardbench/"
|
|
|
42 |
|
43 |
|
44 |
def avg_over_rewardbench_v2(dataframe_core):
|
45 |
+
# TODO: change this when I run on new data with different names
|
46 |
domain_cols = ["factuality", "precise if", "math", "safety", "chat", "ties"]
|
47 |
domain_weights = [1, 1, 1, 1, 1, 1]
|
48 |
new_df = dataframe_core.copy()
|
|
|
537 |
title={RewardBench 2: Advancing Reward Model Evaluation},
|
538 |
author={Malik, Saumya and Pyatkin, Valentina and Land, Sander and Morrison, Jacob and Smith, Noah A. and Hajishirzi, Hannaneh and Lambert, Nathan},
|
539 |
year={2025},
|
540 |
+
howpublished={\url{https://huggingface.co/spaces/allenai/reward-bench}},
|
541 |
}
|
542 |
|
543 |
@misc{RewardBench,
|