saumyamalik commited on
Commit
640136e
·
1 Parent(s): 866d755

update dataset name and citation

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -20,7 +20,7 @@ api = HfApi()
20
  COLLAB_TOKEN = os.environ.get("COLLAB_TOKEN")
21
  evals_repo = "allenai/reward-bench-v2-results"
22
 
23
- eval_set_repo = "allenai/reward-bench-v2-v0"
24
  eval_set_repo_v1 = "allenai/reward-bench"
25
 
26
  repo_dir_rewardbench = "./evals/rewardbench/"
@@ -42,6 +42,7 @@ repo = snapshot_download(
42
 
43
 
44
  def avg_over_rewardbench_v2(dataframe_core):
 
45
  domain_cols = ["factuality", "precise if", "math", "safety", "chat", "ties"]
46
  domain_weights = [1, 1, 1, 1, 1, 1]
47
  new_df = dataframe_core.copy()
@@ -536,7 +537,7 @@ with gr.Blocks(theme=theme, css=custom_css) as app:
536
  title={RewardBench 2: Advancing Reward Model Evaluation},
537
  author={Malik, Saumya and Pyatkin, Valentina and Land, Sander and Morrison, Jacob and Smith, Noah A. and Hajishirzi, Hannaneh and Lambert, Nathan},
538
  year={2025},
539
- howpublished={\url{https://huggingface.co/spaces/allenai/reward-bench-2}},
540
  }
541
 
542
  @misc{RewardBench,
 
20
  COLLAB_TOKEN = os.environ.get("COLLAB_TOKEN")
21
  evals_repo = "allenai/reward-bench-v2-results"
22
 
23
+ eval_set_repo = "allenai/reward-bench-2"
24
  eval_set_repo_v1 = "allenai/reward-bench"
25
 
26
  repo_dir_rewardbench = "./evals/rewardbench/"
 
42
 
43
 
44
  def avg_over_rewardbench_v2(dataframe_core):
45
+ # TODO: change this when I run on new data with different names
46
  domain_cols = ["factuality", "precise if", "math", "safety", "chat", "ties"]
47
  domain_weights = [1, 1, 1, 1, 1, 1]
48
  new_df = dataframe_core.copy()
 
537
  title={RewardBench 2: Advancing Reward Model Evaluation},
538
  author={Malik, Saumya and Pyatkin, Valentina and Land, Sander and Morrison, Jacob and Smith, Noah A. and Hajishirzi, Hannaneh and Lambert, Nathan},
539
  year={2025},
540
+ howpublished={\url{https://huggingface.co/spaces/allenai/reward-bench}},
541
  }
542
 
543
  @misc{RewardBench,