Spaces:
Running
Running
Shiyu Zhao
commited on
Commit
·
ea1c498
1
Parent(s):
438e395
Update space
Browse files
app.py
CHANGED
@@ -171,7 +171,8 @@ model_types = {
|
|
171 |
'Small Dense Retrievers': ['DPR (roberta)', 'ANCE (roberta)', 'QAGNN (roberta)'],
|
172 |
'LLM-based Dense Retrievers': ['ada-002', 'voyage-l2-instruct', 'LLM2Vec', 'GritLM-7b'],
|
173 |
'Multivector Retrievers': ['multi-ada-002', 'ColBERTv2'],
|
174 |
-
'LLM Rerankers': ['Claude3 Reranker', 'GPT4 Reranker']
|
|
|
175 |
}
|
176 |
|
177 |
# Submission form validation functions
|
@@ -381,21 +382,25 @@ def scan_submissions_directory():
|
|
381 |
return None
|
382 |
|
383 |
def initialize_leaderboard():
|
384 |
-
"""
|
385 |
-
Initialize the leaderboard with baseline results and submitted results.
|
386 |
-
"""
|
387 |
global df_synthesized_full, df_synthesized_10, df_human_generated
|
388 |
|
389 |
try:
|
390 |
-
#
|
391 |
df_synthesized_full = pd.DataFrame(data_synthesized_full)
|
392 |
df_synthesized_10 = pd.DataFrame(data_synthesized_10)
|
393 |
df_human_generated = pd.DataFrame(data_human_generated)
|
394 |
|
395 |
print("Initialized with baseline results")
|
396 |
|
397 |
-
#
|
398 |
-
scan_submissions_directory()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
|
400 |
print("Leaderboard initialization complete")
|
401 |
|
@@ -577,26 +582,26 @@ def format_evaluation_results(results):
|
|
577 |
|
578 |
def process_submission(
|
579 |
method_name, team_name, dataset, split, contact_email,
|
580 |
-
code_repo, csv_file, model_description, hardware, paper_link
|
581 |
):
|
582 |
"""Process and validate submission"""
|
583 |
temp_files = []
|
584 |
try:
|
585 |
# Input validation
|
586 |
-
if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file]):
|
587 |
return "Error: Please fill in all required fields"
|
588 |
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
if
|
593 |
-
|
594 |
-
if not
|
595 |
-
return "Error:
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
# Create metadata
|
600 |
meta_data = {
|
601 |
"Method Name": method_name,
|
602 |
"Team Name": team_name,
|
@@ -606,7 +611,8 @@ def process_submission(
|
|
606 |
"Code Repository": code_repo,
|
607 |
"Model Description": model_description,
|
608 |
"Hardware": hardware,
|
609 |
-
"(Optional) Paper link": paper_link
|
|
|
610 |
}
|
611 |
|
612 |
# Generate folder name and timestamp
|
@@ -755,7 +761,9 @@ def process_submission(
|
|
755 |
def filter_by_model_type(df, selected_types):
|
756 |
if not selected_types:
|
757 |
return df.head(0)
|
758 |
-
selected_models = [
|
|
|
|
|
759 |
return df[df['Method'].isin(selected_models)]
|
760 |
|
761 |
def format_dataframe(df, dataset):
|
@@ -872,6 +880,11 @@ with gr.Blocks(css=css) as demo:
|
|
872 |
)
|
873 |
|
874 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
875 |
code_repo = gr.Textbox(
|
876 |
label="Code Repository*",
|
877 |
placeholder="https://github.com/snap-stanford/stark-leaderboard"
|
@@ -911,7 +924,7 @@ with gr.Blocks(css=css) as demo:
|
|
911 |
fn=process_submission,
|
912 |
inputs=[
|
913 |
method_name, team_name, dataset, split, contact_email,
|
914 |
-
code_repo, csv_file, model_description, hardware, paper_link
|
915 |
],
|
916 |
outputs=result
|
917 |
).success( # Add a success handler to update tables after successful submission
|
|
|
171 |
'Small Dense Retrievers': ['DPR (roberta)', 'ANCE (roberta)', 'QAGNN (roberta)'],
|
172 |
'LLM-based Dense Retrievers': ['ada-002', 'voyage-l2-instruct', 'LLM2Vec', 'GritLM-7b'],
|
173 |
'Multivector Retrievers': ['multi-ada-002', 'ColBERTv2'],
|
174 |
+
'LLM Rerankers': ['Claude3 Reranker', 'GPT4 Reranker'],
|
175 |
+
'Others': [] # Will be populated dynamically with submitted models
|
176 |
}
|
177 |
|
178 |
# Submission form validation functions
|
|
|
382 |
return None
|
383 |
|
384 |
def initialize_leaderboard():
|
|
|
|
|
|
|
385 |
global df_synthesized_full, df_synthesized_10, df_human_generated
|
386 |
|
387 |
try:
|
388 |
+
# Initialize with baseline results
|
389 |
df_synthesized_full = pd.DataFrame(data_synthesized_full)
|
390 |
df_synthesized_10 = pd.DataFrame(data_synthesized_10)
|
391 |
df_human_generated = pd.DataFrame(data_human_generated)
|
392 |
|
393 |
print("Initialized with baseline results")
|
394 |
|
395 |
+
# Scan submissions directory and update 'Others' category
|
396 |
+
submissions = scan_submissions_directory()
|
397 |
+
if submissions:
|
398 |
+
for split_submissions in submissions.values():
|
399 |
+
for submission in split_submissions:
|
400 |
+
method_name = submission.get('Method Name')
|
401 |
+
method_exists = any(method_name in models for models in model_types.values())
|
402 |
+
if not method_exists:
|
403 |
+
model_types['Others'].append(method_name)
|
404 |
|
405 |
print("Leaderboard initialization complete")
|
406 |
|
|
|
582 |
|
583 |
def process_submission(
|
584 |
method_name, team_name, dataset, split, contact_email,
|
585 |
+
code_repo, csv_file, model_description, hardware, paper_link, model_type
|
586 |
):
|
587 |
"""Process and validate submission"""
|
588 |
temp_files = []
|
589 |
try:
|
590 |
# Input validation
|
591 |
+
if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file, model_type]):
|
592 |
return "Error: Please fill in all required fields"
|
593 |
|
594 |
+
if model_type not in model_types:
|
595 |
+
return "Error: Invalid model type selected"
|
596 |
+
|
597 |
+
# Add model to 'Others' category if it's a new model
|
598 |
+
method_exists = any(method_name in models for models in model_types.values())
|
599 |
+
if not method_exists and model_type != 'Others':
|
600 |
+
return "Error: New models must be submitted under 'Others' category"
|
601 |
+
elif not method_exists and model_type == 'Others':
|
602 |
+
model_types['Others'].append(method_name)
|
603 |
+
|
604 |
+
# Create metadata
|
605 |
meta_data = {
|
606 |
"Method Name": method_name,
|
607 |
"Team Name": team_name,
|
|
|
611 |
"Code Repository": code_repo,
|
612 |
"Model Description": model_description,
|
613 |
"Hardware": hardware,
|
614 |
+
"(Optional) Paper link": paper_link,
|
615 |
+
"Model Type": model_type
|
616 |
}
|
617 |
|
618 |
# Generate folder name and timestamp
|
|
|
761 |
def filter_by_model_type(df, selected_types):
|
762 |
if not selected_types:
|
763 |
return df.head(0)
|
764 |
+
selected_models = []
|
765 |
+
for type in selected_types:
|
766 |
+
selected_models.extend(model_types[type])
|
767 |
return df[df['Method'].isin(selected_models)]
|
768 |
|
769 |
def format_dataframe(df, dataset):
|
|
|
880 |
)
|
881 |
|
882 |
with gr.Column():
|
883 |
+
model_type = gr.Dropdown(
|
884 |
+
choices=list(model_types.keys()),
|
885 |
+
label="Model Type*",
|
886 |
+
value="Others"
|
887 |
+
)
|
888 |
code_repo = gr.Textbox(
|
889 |
label="Code Repository*",
|
890 |
placeholder="https://github.com/snap-stanford/stark-leaderboard"
|
|
|
924 |
fn=process_submission,
|
925 |
inputs=[
|
926 |
method_name, team_name, dataset, split, contact_email,
|
927 |
+
code_repo, csv_file, model_description, hardware, paper_link, model_type
|
928 |
],
|
929 |
outputs=result
|
930 |
).success( # Add a success handler to update tables after successful submission
|