Spaces:
Running
Running
Shiyu Zhao
commited on
Commit
·
c4923ca
1
Parent(s):
e050fd8
Update space
Browse files
app.py
CHANGED
@@ -84,23 +84,44 @@ def compute_metrics(csv_path: str, dataset: str, split: str, num_workers: int =
|
|
84 |
all_indices = split_idx[split].tolist()
|
85 |
|
86 |
results_list = []
|
87 |
-
query_ids = []
|
88 |
-
|
89 |
-
# Prepare args for each worker
|
90 |
-
args = [(idx, eval_csv, qa_dataset, evaluator, eval_metrics) for idx in all_indices]
|
91 |
-
|
92 |
-
with ProcessPoolExecutor(max_workers=num_workers) as executor:
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
# Concatenate results and compute final metrics
|
100 |
-
eval_csv = pd.concat([eval_csv, pd.DataFrame(results_list)], ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
final_results = {
|
102 |
-
metric:
|
|
|
103 |
}
|
|
|
104 |
return final_results
|
105 |
|
106 |
except pd.errors.EmptyDataError:
|
@@ -616,144 +637,108 @@ def process_submission(
|
|
616 |
method_name, team_name, dataset, split, contact_email,
|
617 |
code_repo, csv_file, model_description, hardware, paper_link, model_type
|
618 |
):
|
619 |
-
"""Process
|
620 |
try:
|
621 |
-
# 1. Initial validation
|
|
|
622 |
if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file, model_type]):
|
623 |
return "Error: Please fill in all required fields"
|
624 |
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
if
|
629 |
-
|
630 |
-
|
631 |
-
# 2. Validate model type
|
632 |
-
is_valid, message = validate_model_type(method_name, model_type)
|
633 |
-
if not is_valid:
|
634 |
-
return f"Error: {message}"
|
635 |
-
|
636 |
-
# 3. Create temporary directory for processing
|
637 |
-
with tempfile.TemporaryDirectory() as temp_dir:
|
638 |
-
# Copy CSV file to temp directory
|
639 |
-
temp_csv_path = os.path.join(temp_dir, "submission.csv")
|
640 |
-
if isinstance(csv_file, str):
|
641 |
-
shutil.copy2(csv_file, temp_csv_path)
|
642 |
-
else:
|
643 |
-
with open(temp_csv_path, 'wb') as temp_file:
|
644 |
-
if hasattr(csv_file, 'seek'):
|
645 |
-
csv_file.seek(0)
|
646 |
-
if hasattr(csv_file, 'read'):
|
647 |
-
shutil.copyfileobj(csv_file, temp_file)
|
648 |
-
else:
|
649 |
-
temp_file.write(csv_file)
|
650 |
-
|
651 |
-
# 4. Validate CSV format
|
652 |
-
is_valid_csv, csv_message = validate_csv(temp_csv_path)
|
653 |
-
if not is_valid_csv:
|
654 |
-
return f"Error validating CSV: {csv_message}"
|
655 |
-
|
656 |
-
# 5. Compute metrics with progress indication
|
657 |
-
print(f"Computing metrics for {dataset.lower()} dataset...")
|
658 |
-
results = compute_metrics(
|
659 |
-
csv_path=temp_csv_path,
|
660 |
-
dataset=dataset.lower(),
|
661 |
-
split=split,
|
662 |
-
num_workers=4
|
663 |
-
)
|
664 |
-
|
665 |
-
if isinstance(results, str):
|
666 |
-
return f"Evaluation error: {results}"
|
667 |
-
|
668 |
-
# 6. Process results
|
669 |
-
processed_results = {
|
670 |
-
metric: round(value * 100, 2)
|
671 |
-
for metric, value in results.items()
|
672 |
-
}
|
673 |
-
|
674 |
-
# 7. Prepare submission data
|
675 |
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
676 |
-
folder_name = f"{sanitize_name(method_name)}_{sanitize_name(team_name)}"
|
677 |
-
|
678 |
-
submission_data = {
|
679 |
-
"Method Name": method_name,
|
680 |
-
"Team Name": team_name,
|
681 |
-
"Dataset": dataset,
|
682 |
-
"Split": split,
|
683 |
-
"Contact Email(s)": contact_email,
|
684 |
-
"Code Repository": code_repo,
|
685 |
-
"Model Description": model_description,
|
686 |
-
"Hardware": hardware,
|
687 |
-
"(Optional) Paper link": paper_link,
|
688 |
-
"Model Type": model_type,
|
689 |
-
"results": processed_results,
|
690 |
-
"status": "pending_review",
|
691 |
-
"submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
692 |
-
}
|
693 |
-
|
694 |
-
# 8. Save to HuggingFace Hub with error handling
|
695 |
try:
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
file_content=temp_csv_path,
|
700 |
-
path_in_repo=csv_path_in_repo,
|
701 |
-
commit_message=f"Add submission CSV: {method_name} by {team_name}"
|
702 |
-
)
|
703 |
-
submission_data["csv_path"] = csv_path_in_repo
|
704 |
-
|
705 |
-
# Save metadata
|
706 |
-
metadata_path = f"submissions/{folder_name}/metadata_{timestamp}.json"
|
707 |
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.json') as tmp:
|
708 |
-
json.dump(submission_data, tmp, indent=4)
|
709 |
-
tmp.flush()
|
710 |
-
hub_storage.save_to_hub(
|
711 |
-
file_content=tmp.name,
|
712 |
-
path_in_repo=metadata_path,
|
713 |
-
commit_message=f"Add metadata: {method_name} by {team_name}"
|
714 |
-
)
|
715 |
-
|
716 |
-
# Update latest.json
|
717 |
-
latest_path = f"submissions/{folder_name}/latest.json"
|
718 |
-
latest_info = {
|
719 |
-
"latest_submission": timestamp,
|
720 |
-
"status": "pending_review",
|
721 |
-
"method_name": method_name
|
722 |
-
}
|
723 |
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.json') as tmp:
|
724 |
-
json.dump(latest_info, tmp, indent=4)
|
725 |
-
tmp.flush()
|
726 |
-
hub_storage.save_to_hub(
|
727 |
-
file_content=tmp.name,
|
728 |
-
path_in_repo=latest_path,
|
729 |
-
commit_message=f"Update latest submission info for {method_name}"
|
730 |
-
)
|
731 |
-
|
732 |
except Exception as e:
|
733 |
-
return f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
734 |
|
735 |
-
|
736 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
737 |
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
-
|
746 |
-
|
747 |
-
|
748 |
-
|
749 |
-
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
754 |
|
755 |
except Exception as e:
|
756 |
-
return f"Error
|
|
|
|
|
|
|
|
|
757 |
|
758 |
def filter_by_model_type(df, selected_types):
|
759 |
"""
|
@@ -952,8 +937,9 @@ with gr.Blocks(css=css) as demo:
|
|
952 |
method_name, team_name, dataset, split, contact_email,
|
953 |
code_repo, csv_file, model_description, hardware, paper_link, model_type
|
954 |
],
|
955 |
-
outputs=result
|
956 |
-
|
|
|
957 |
fn=update_tables,
|
958 |
inputs=[model_type_filter],
|
959 |
outputs=all_dfs
|
|
|
84 |
all_indices = split_idx[split].tolist()
|
85 |
|
86 |
results_list = []
|
87 |
+
# query_ids = []
|
88 |
+
|
89 |
+
# # Prepare args for each worker
|
90 |
+
# args = [(idx, eval_csv, qa_dataset, evaluator, eval_metrics) for idx in all_indices]
|
91 |
+
|
92 |
+
# with ProcessPoolExecutor(max_workers=num_workers) as executor:
|
93 |
+
# futures = [executor.submit(process_single_instance, arg) for arg in args]
|
94 |
+
# for future in tqdm(as_completed(futures), total=len(futures)):
|
95 |
+
# result = future.result() # This will raise an error if the worker encountered one
|
96 |
+
# results_list.append(result)
|
97 |
+
# query_ids.append(result['query_id'])
|
98 |
+
|
99 |
+
# # Concatenate results and compute final metrics
|
100 |
+
# eval_csv = pd.concat([eval_csv, pd.DataFrame(results_list)], ignore_index=True)
|
101 |
+
# final_results = {
|
102 |
+
# metric: np.mean(eval_csv[eval_csv['query_id'].isin(query_ids)][metric]) for metric in eval_metrics
|
103 |
+
# }
|
104 |
+
# return final_result
|
105 |
+
batch_size = 100
|
106 |
+
for i in range(0, len(all_indices), batch_size):
|
107 |
+
max_ind = min(i+batch_size, len(all_indices))
|
108 |
+
batch_indices = all_indices[i:max_ind]
|
109 |
+
args = [(idx, eval_csv, qa_dataset, evaluator, eval_metrics)
|
110 |
+
for idx in batch_indices]
|
111 |
+
|
112 |
+
with ProcessPoolExecutor(max_workers=num_workers) as executor:
|
113 |
+
futures = [executor.submit(process_single_instance, arg)
|
114 |
+
for arg in args]
|
115 |
+
for future in as_completed(futures):
|
116 |
+
results_list.append(future.result())
|
117 |
+
|
118 |
+
# Compute final metrics
|
119 |
+
results_df = pd.DataFrame(results_list)
|
120 |
final_results = {
|
121 |
+
metric: results_df[metric].mean()
|
122 |
+
for metric in eval_metrics
|
123 |
}
|
124 |
+
|
125 |
return final_results
|
126 |
|
127 |
except pd.errors.EmptyDataError:
|
|
|
637 |
method_name, team_name, dataset, split, contact_email,
|
638 |
code_repo, csv_file, model_description, hardware, paper_link, model_type
|
639 |
):
|
640 |
+
"""Process submission with progress updates"""
|
641 |
try:
|
642 |
+
# 1. Initial validation
|
643 |
+
yield "Validating submission details..."
|
644 |
if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file, model_type]):
|
645 |
return "Error: Please fill in all required fields"
|
646 |
|
647 |
+
# 2. Process CSV
|
648 |
+
yield "Processing CSV file..."
|
649 |
+
temp_csv_path = None
|
650 |
+
if isinstance(csv_file, str):
|
651 |
+
temp_csv_path = csv_file
|
652 |
+
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
653 |
try:
|
654 |
+
temp_fd, temp_csv_path = tempfile.mkstemp(suffix='.csv')
|
655 |
+
os.close(temp_fd)
|
656 |
+
shutil.copy2(csv_file.name, temp_csv_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
657 |
except Exception as e:
|
658 |
+
return f"Error processing CSV file: {str(e)}"
|
659 |
+
|
660 |
+
# 3. Validate CSV format
|
661 |
+
yield "Validating CSV format..."
|
662 |
+
try:
|
663 |
+
df = pd.read_csv(temp_csv_path)
|
664 |
+
if 'query_id' not in df.columns or 'pred_rank' not in df.columns:
|
665 |
+
return "Error: CSV must contain 'query_id' and 'pred_rank' columns"
|
666 |
+
except Exception as e:
|
667 |
+
return f"Error reading CSV: {str(e)}"
|
668 |
+
|
669 |
+
# 4. Compute metrics with reduced workers
|
670 |
+
yield f"Computing metrics for {dataset}..."
|
671 |
+
results = compute_metrics(
|
672 |
+
csv_path=temp_csv_path,
|
673 |
+
dataset=dataset.lower(),
|
674 |
+
split=split,
|
675 |
+
num_workers=2 # Reduced from 4 to 2
|
676 |
+
)
|
677 |
+
|
678 |
+
if isinstance(results, str):
|
679 |
+
return f"Evaluation error: {results}"
|
680 |
+
|
681 |
+
# 5. Process results
|
682 |
+
yield "Processing results..."
|
683 |
+
processed_results = {
|
684 |
+
"hit@1": round(results['hit@1'] * 100, 2),
|
685 |
+
"hit@5": round(results['hit@5'] * 100, 2),
|
686 |
+
"recall@20": round(results['recall@20'] * 100, 2),
|
687 |
+
"mrr": round(results['mrr'] * 100, 2)
|
688 |
+
}
|
689 |
|
690 |
+
# 6. Save submission
|
691 |
+
yield "Saving submission..."
|
692 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
693 |
+
folder_name = f"{sanitize_name(method_name)}_{sanitize_name(team_name)}"
|
694 |
+
|
695 |
+
submission_data = {
|
696 |
+
"Method Name": method_name,
|
697 |
+
"Team Name": team_name,
|
698 |
+
"Dataset": dataset,
|
699 |
+
"Split": split,
|
700 |
+
"Contact Email(s)": contact_email,
|
701 |
+
"Code Repository": code_repo,
|
702 |
+
"Model Description": model_description,
|
703 |
+
"Hardware": hardware,
|
704 |
+
"Paper link": paper_link,
|
705 |
+
"Model Type": model_type,
|
706 |
+
"results": processed_results
|
707 |
+
}
|
708 |
|
709 |
+
try:
|
710 |
+
# Save to HuggingFace Hub
|
711 |
+
csv_path_in_repo = f"submissions/{folder_name}/predictions_{timestamp}.csv"
|
712 |
+
hub_storage.save_to_hub(
|
713 |
+
file_content=temp_csv_path,
|
714 |
+
path_in_repo=csv_path_in_repo,
|
715 |
+
commit_message=f"Add submission: {method_name}"
|
716 |
+
)
|
717 |
+
except Exception as e:
|
718 |
+
return f"Error saving to HuggingFace Hub: {str(e)}"
|
719 |
+
|
720 |
+
# 7. Update leaderboard
|
721 |
+
yield "Updating leaderboard..."
|
722 |
+
update_leaderboard_data(submission_data)
|
723 |
+
|
724 |
+
return f"""
|
725 |
+
Submission successful!
|
726 |
+
|
727 |
+
Evaluation Results:
|
728 |
+
Hit@1: {processed_results['hit@1']:.2f}%
|
729 |
+
Hit@5: {processed_results['hit@5']:.2f}%
|
730 |
+
Recall@20: {processed_results['recall@20']:.2f}%
|
731 |
+
MRR: {processed_results['mrr']:.2f}%
|
732 |
+
|
733 |
+
Your submission will appear in the leaderboard after review.
|
734 |
+
"""
|
735 |
|
736 |
except Exception as e:
|
737 |
+
return f"Error: {str(e)}"
|
738 |
+
finally:
|
739 |
+
# Cleanup
|
740 |
+
if temp_csv_path and os.path.exists(temp_csv_path):
|
741 |
+
os.unlink(temp_csv_path)
|
742 |
|
743 |
def filter_by_model_type(df, selected_types):
|
744 |
"""
|
|
|
937 |
method_name, team_name, dataset, split, contact_email,
|
938 |
code_repo, csv_file, model_description, hardware, paper_link, model_type
|
939 |
],
|
940 |
+
outputs=result,
|
941 |
+
api_name="submit"
|
942 |
+
).success( # Add success handler to update tables
|
943 |
fn=update_tables,
|
944 |
inputs=[model_type_filter],
|
945 |
outputs=all_dfs
|