Shiyu Zhao commited on
Commit
b16c4b6
·
1 Parent(s): df3974d

Update space

Browse files
Files changed (1) hide show
  1. app.py +150 -121
app.py CHANGED
@@ -14,11 +14,14 @@ from email.mime.text import MIMEText
14
  from huggingface_hub import HfApi
15
  import shutil
16
  import tempfile
17
- import time
18
- from concurrent.futures import ThreadPoolExecutor, as_completed
19
- from queue import Queue
20
- import threading
21
- from threading import Lock
 
 
 
22
  from stark_qa import load_qa
23
  from stark_qa.evaluator import Evaluator
24
 
@@ -32,8 +35,6 @@ try:
32
  except Exception as e:
33
  raise RuntimeError(f"Failed to initialize HuggingFace Hub storage: {e}")
34
 
35
- # Global lock for thread-safe operations
36
- result_lock = Lock()
37
 
38
  def process_single_instance(args):
39
  """Process a single instance with improved prediction handling"""
@@ -210,7 +211,6 @@ def compute_metrics(csv_path: str, dataset: str, split: str, num_workers: int =
210
  print(error_msg)
211
  return error_msg
212
 
213
-
214
  # Data dictionaries for leaderboard
215
  data_synthesized_full = {
216
  'Method': ['BM25', 'DPR (roberta)', 'ANCE (roberta)', 'QAGNN (roberta)', 'ada-002', 'voyage-l2-instruct', 'LLM2Vec', 'GritLM-7b', 'multi-ada-002', 'ColBERTv2'],
@@ -289,32 +289,23 @@ def validate_github_url(url):
289
  )
290
  return bool(github_pattern.match(url))
291
 
292
- def validate_csv(file_path):
293
- """Validate CSV file format and content with better error handling"""
294
  try:
295
- df = pd.read_csv(file_path)
296
  required_cols = ['query_id', 'pred_rank']
297
 
298
- # Check for required columns
299
- missing_cols = [col for col in required_cols if col not in df.columns]
300
- if missing_cols:
301
- return False, f"Missing required columns: {', '.join(missing_cols)}"
302
-
303
- # Validate first few rows to ensure proper format
304
- for idx, row in df.head().iterrows():
305
- try:
306
- rank_list = eval(row['pred_rank']) if isinstance(row['pred_rank'], str) else row['pred_rank']
307
- if not isinstance(rank_list, list):
308
- return False, f"pred_rank must be a list (row {idx})"
309
- if len(rank_list) < 20:
310
- return False, f"pred_rank must contain at least 20 candidates (row {idx})"
311
- except Exception as e:
312
- return False, f"Invalid pred_rank format in row {idx}: {str(e)}"
313
-
314
  return True, "Valid CSV file"
315
-
316
- except pd.errors.EmptyDataError:
317
- return False, "CSV file is empty"
318
  except Exception as e:
319
  return False, f"Error processing CSV: {str(e)}"
320
 
@@ -574,7 +565,7 @@ def save_submission(submission_data, csv_file):
574
  def update_leaderboard_data(submission_data):
575
  """
576
  Update leaderboard data with new submission results
577
- Only updates the specific dataset submitted, preventing empty rows
578
  """
579
  global df_synthesized_full, df_synthesized_10, df_human_generated
580
 
@@ -586,32 +577,26 @@ def update_leaderboard_data(submission_data):
586
  }
587
 
588
  df_to_update = split_to_df[submission_data['Split']]
589
- dataset = submission_data['Dataset'].upper()
590
 
591
- # Prepare new row data with only the relevant dataset columns
592
  new_row = {
593
- 'Method': submission_data['Method Name']
 
 
 
 
594
  }
595
- # Only add metrics for the submitted dataset
596
- new_row.update({
597
- f'STARK-{dataset}_Hit@1': submission_data['results']['hit@1'],
598
- f'STARK-{dataset}_Hit@5': submission_data['results']['hit@5'],
599
- f'STARK-{dataset}_R@20': submission_data['results']['recall@20'],
600
- f'STARK-{dataset}_MRR': submission_data['results']['mrr']
601
- })
602
 
603
  # Check if method already exists
604
  method_mask = df_to_update['Method'] == submission_data['Method Name']
605
  if method_mask.any():
606
- # Update only the columns for the submitted dataset
607
  for col in new_row:
608
  df_to_update.loc[method_mask, col] = new_row[col]
609
  else:
610
- # For new methods, create a row with only the submitted dataset's values
611
  df_to_update.loc[len(df_to_update)] = new_row
612
 
613
-
614
-
615
  # Function to get emails from meta_data
616
  def get_emails_from_metadata(meta_data):
617
  """
@@ -716,93 +701,143 @@ def process_submission(
716
  method_name, team_name, dataset, split, contact_email,
717
  code_repo, csv_file, model_description, hardware, paper_link, model_type
718
  ):
719
- """Process submission with progress updates"""
720
  temp_files = []
721
- start_time = time.time()
722
  try:
723
- # 1. Initial validation
724
- yield "Validating submission details..."
725
  if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file, model_type]):
726
  return "Error: Please fill in all required fields"
727
 
728
- # 2. Process CSV
729
- yield "Processing CSV file..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730
  temp_csv_path = None
731
  if isinstance(csv_file, str):
732
  temp_csv_path = csv_file
733
  else:
734
- try:
735
- temp_fd, temp_csv_path = tempfile.mkstemp(suffix='.csv')
736
- temp_files.append(temp_csv_path)
737
- os.close(temp_fd)
 
738
  shutil.copy2(csv_file.name, temp_csv_path)
739
- except Exception as e:
740
- return f"Error processing CSV file: {str(e)}"
 
 
 
 
 
 
741
 
742
- # 3. Validate CSV format
743
- yield "Validating CSV format..."
744
- try:
745
- df = pd.read_csv(temp_csv_path)
746
- if 'query_id' not in df.columns or 'pred_rank' not in df.columns:
747
- return "Error: CSV must contain 'query_id' and 'pred_rank' columns"
748
- except Exception as e:
749
- return f"Error reading CSV: {str(e)}"
750
 
751
- # 4. Compute metrics with reduced workers
752
- yield f"Computing metrics for {dataset}..."
753
  results = compute_metrics(
754
  csv_path=temp_csv_path,
755
  dataset=dataset.lower(),
756
  split=split,
757
- num_threads=4
758
  )
759
 
760
  if isinstance(results, str):
 
761
  return f"Evaluation error: {results}"
762
 
763
- # 5. Process results
764
- yield "Processing results..."
765
  processed_results = {
766
  "hit@1": round(results['hit@1'] * 100, 2),
767
  "hit@5": round(results['hit@5'] * 100, 2),
768
  "recall@20": round(results['recall@20'] * 100, 2),
769
  "mrr": round(results['mrr'] * 100, 2)
770
  }
771
-
772
- # 6. Save submission
773
- yield "Saving submission..."
774
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
775
- folder_name = f"{sanitize_name(method_name)}_{sanitize_name(team_name)}"
776
 
777
- submission_data = {
778
- "Method Name": method_name,
779
- "Team Name": team_name,
780
- "Dataset": dataset,
781
- "Split": split,
782
- "Contact Email(s)": contact_email,
783
- "Code Repository": code_repo,
784
- "Model Description": model_description,
785
- "Hardware": hardware,
786
- "Paper link": paper_link,
787
- "Model Type": model_type,
788
- "results": processed_results
789
- }
790
-
791
  try:
792
- # Save to HuggingFace Hub
793
- csv_path_in_repo = f"submissions/{folder_name}/predictions_{timestamp}.csv"
 
794
  hub_storage.save_to_hub(
795
  file_content=temp_csv_path,
796
  path_in_repo=csv_path_in_repo,
797
- commit_message=f"Add submission: {method_name}"
798
  )
799
- except Exception as e:
800
- return f"Error saving to HuggingFace Hub: {str(e)}"
801
 
802
- # 7. Update leaderboard
803
- yield "Updating leaderboard..."
804
- update_leaderboard_data(submission_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
805
 
 
 
 
 
 
 
 
 
806
  return f"""
807
  Submission successful!
808
 
@@ -812,20 +847,29 @@ def process_submission(
812
  Recall@20: {processed_results['recall@20']:.2f}%
813
  MRR: {processed_results['mrr']:.2f}%
814
 
815
- Your submission will appear in the leaderboard after review.
 
 
 
 
 
 
816
  """
817
-
818
  except Exception as e:
819
- total_time = time.time() - start_time
820
- return f"Error ({total_time:.1f}s): {str(e)}"
 
821
  finally:
 
822
  for temp_file in temp_files:
823
  try:
824
  if os.path.exists(temp_file):
825
  os.unlink(temp_file)
826
  except Exception as e:
827
- print(f"Warning: Failed to delete {temp_file}: {str(e)}")
828
-
 
829
  def filter_by_model_type(df, selected_types):
830
  """
831
  Filter DataFrame by selected model types, including submitted models.
@@ -842,24 +886,10 @@ def filter_by_model_type(df, selected_types):
842
  return df[df['Method'].isin(selected_models)]
843
 
844
  def format_dataframe(df, dataset):
845
- """
846
- Format DataFrame for display, removing rows with no data for the selected dataset
847
- """
848
- # Select relevant columns
849
  columns = ['Method'] + [col for col in df.columns if dataset in col]
850
  filtered_df = df[columns].copy()
851
-
852
- # Remove rows where all metric columns are empty/NaN for this dataset
853
- metric_columns = [col for col in filtered_df.columns if col != 'Method']
854
- filtered_df = filtered_df.dropna(subset=metric_columns, how='all')
855
-
856
- # Rename columns to remove dataset prefix
857
  filtered_df.columns = [col.split('_')[-1] if '_' in col else col for col in filtered_df.columns]
858
-
859
- # Sort by MRR
860
- if 'MRR' in filtered_df.columns:
861
- filtered_df = filtered_df.sort_values('MRR', ascending=False)
862
-
863
  return filtered_df
864
 
865
  def update_tables(selected_types):
@@ -1023,9 +1053,8 @@ with gr.Blocks(css=css) as demo:
1023
  method_name, team_name, dataset, split, contact_email,
1024
  code_repo, csv_file, model_description, hardware, paper_link, model_type
1025
  ],
1026
- outputs=result,
1027
- api_name="submit"
1028
- ).success( # Add success handler to update tables
1029
  fn=update_tables,
1030
  inputs=[model_type_filter],
1031
  outputs=all_dfs
 
14
  from huggingface_hub import HfApi
15
  import shutil
16
  import tempfile
17
+
18
+ from stark_qa import load_qa
19
+ from stark_qa.evaluator import Evaluator
20
+
21
+ from utils.hub_storage import HubStorage
22
+ from utils.token_handler import TokenHandler
23
+
24
+
25
  from stark_qa import load_qa
26
  from stark_qa.evaluator import Evaluator
27
 
 
35
  except Exception as e:
36
  raise RuntimeError(f"Failed to initialize HuggingFace Hub storage: {e}")
37
 
 
 
38
 
39
  def process_single_instance(args):
40
  """Process a single instance with improved prediction handling"""
 
211
  print(error_msg)
212
  return error_msg
213
 
 
214
  # Data dictionaries for leaderboard
215
  data_synthesized_full = {
216
  'Method': ['BM25', 'DPR (roberta)', 'ANCE (roberta)', 'QAGNN (roberta)', 'ada-002', 'voyage-l2-instruct', 'LLM2Vec', 'GritLM-7b', 'multi-ada-002', 'ColBERTv2'],
 
289
  )
290
  return bool(github_pattern.match(url))
291
 
292
+ def validate_csv(file_obj):
293
+ """Validate CSV file format and content"""
294
  try:
295
+ df = pd.read_csv(file_obj.name)
296
  required_cols = ['query_id', 'pred_rank']
297
 
298
+ if not all(col in df.columns for col in required_cols):
299
+ return False, "CSV must contain 'query_id' and 'pred_rank' columns"
300
+
301
+ try:
302
+ first_rank = eval(df['pred_rank'].iloc[0]) if isinstance(df['pred_rank'].iloc[0], str) else df['pred_rank'].iloc[0]
303
+ if not isinstance(first_rank, list) or len(first_rank) < 20:
304
+ return False, "pred_rank must be a list with at least 20 candidates"
305
+ except:
306
+ return False, "Invalid pred_rank format"
307
+
 
 
 
 
 
 
308
  return True, "Valid CSV file"
 
 
 
309
  except Exception as e:
310
  return False, f"Error processing CSV: {str(e)}"
311
 
 
565
  def update_leaderboard_data(submission_data):
566
  """
567
  Update leaderboard data with new submission results
568
+ Only uses model name in the displayed table
569
  """
570
  global df_synthesized_full, df_synthesized_10, df_human_generated
571
 
 
577
  }
578
 
579
  df_to_update = split_to_df[submission_data['Split']]
 
580
 
581
+ # Prepare new row data
582
  new_row = {
583
+ 'Method': submission_data['Method Name'], # Only use method name in table
584
+ f'STARK-{submission_data["Dataset"].upper()}_Hit@1': submission_data['results']['hit@1'],
585
+ f'STARK-{submission_data["Dataset"].upper()}_Hit@5': submission_data['results']['hit@5'],
586
+ f'STARK-{submission_data["Dataset"].upper()}_R@20': submission_data['results']['recall@20'],
587
+ f'STARK-{submission_data["Dataset"].upper()}_MRR': submission_data['results']['mrr']
588
  }
 
 
 
 
 
 
 
589
 
590
  # Check if method already exists
591
  method_mask = df_to_update['Method'] == submission_data['Method Name']
592
  if method_mask.any():
593
+ # Update existing row
594
  for col in new_row:
595
  df_to_update.loc[method_mask, col] = new_row[col]
596
  else:
597
+ # Add new row
598
  df_to_update.loc[len(df_to_update)] = new_row
599
 
 
 
600
  # Function to get emails from meta_data
601
  def get_emails_from_metadata(meta_data):
602
  """
 
701
  method_name, team_name, dataset, split, contact_email,
702
  code_repo, csv_file, model_description, hardware, paper_link, model_type
703
  ):
704
+ """Process and validate submission"""
705
  temp_files = []
 
706
  try:
707
+ # Input validation
 
708
  if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file, model_type]):
709
  return "Error: Please fill in all required fields"
710
 
711
+ # Validate model type
712
+ is_valid, message = validate_model_type(method_name, model_type)
713
+ if not is_valid:
714
+ return f"Error: {message}"
715
+
716
+ # Create metadata
717
+ meta_data = {
718
+ "Method Name": method_name,
719
+ "Team Name": team_name,
720
+ "Dataset": dataset,
721
+ "Split": split,
722
+ "Contact Email(s)": contact_email,
723
+ "Code Repository": code_repo,
724
+ "Model Description": model_description,
725
+ "Hardware": hardware,
726
+ "(Optional) Paper link": paper_link,
727
+ "Model Type": model_type
728
+ }
729
+
730
+ # Generate folder name and timestamp
731
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
732
+ folder_name = f"{sanitize_name(method_name)}_{sanitize_name(team_name)}"
733
+
734
+ # Process CSV file
735
  temp_csv_path = None
736
  if isinstance(csv_file, str):
737
  temp_csv_path = csv_file
738
  else:
739
+ temp_fd, temp_csv_path = tempfile.mkstemp(suffix='.csv')
740
+ temp_files.append(temp_csv_path)
741
+ os.close(temp_fd)
742
+
743
+ if hasattr(csv_file, 'name'):
744
  shutil.copy2(csv_file.name, temp_csv_path)
745
+ else:
746
+ with open(temp_csv_path, 'wb') as temp_file:
747
+ if hasattr(csv_file, 'seek'):
748
+ csv_file.seek(0)
749
+ if hasattr(csv_file, 'read'):
750
+ shutil.copyfileobj(csv_file, temp_file)
751
+ else:
752
+ temp_file.write(csv_file)
753
 
754
+ if not os.path.exists(temp_csv_path):
755
+ raise FileNotFoundError(f"Failed to create temporary CSV file at {temp_csv_path}")
 
 
 
 
 
 
756
 
757
+ # Compute metrics
 
758
  results = compute_metrics(
759
  csv_path=temp_csv_path,
760
  dataset=dataset.lower(),
761
  split=split,
762
+ num_workers=4
763
  )
764
 
765
  if isinstance(results, str):
766
+ # send_error_notification(meta_data, results)
767
  return f"Evaluation error: {results}"
768
 
769
+ # Process results
 
770
  processed_results = {
771
  "hit@1": round(results['hit@1'] * 100, 2),
772
  "hit@5": round(results['hit@5'] * 100, 2),
773
  "recall@20": round(results['recall@20'] * 100, 2),
774
  "mrr": round(results['mrr'] * 100, 2)
775
  }
 
 
 
 
 
776
 
777
+ # Save files to HuggingFace Hub
 
 
 
 
 
 
 
 
 
 
 
 
 
778
  try:
779
+ # 1. Save CSV file
780
+ csv_filename = f"predictions_{timestamp}.csv"
781
+ csv_path_in_repo = f"submissions/{folder_name}/{csv_filename}"
782
  hub_storage.save_to_hub(
783
  file_content=temp_csv_path,
784
  path_in_repo=csv_path_in_repo,
785
+ commit_message=f"Add submission: {method_name} by {team_name}"
786
  )
 
 
787
 
788
+ # 2. Save metadata
789
+ submission_data = {
790
+ **meta_data,
791
+ "results": processed_results,
792
+ "status": "approved", # or "pending_review"
793
+ "submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
794
+ "csv_path": csv_path_in_repo
795
+ }
796
+
797
+ metadata_fd, temp_metadata_path = tempfile.mkstemp(suffix='.json')
798
+ temp_files.append(temp_metadata_path)
799
+ os.close(metadata_fd)
800
+
801
+ with open(temp_metadata_path, 'w') as f:
802
+ json.dump(submission_data, f, indent=4)
803
+
804
+ metadata_path = f"submissions/{folder_name}/metadata_{timestamp}.json"
805
+ hub_storage.save_to_hub(
806
+ file_content=temp_metadata_path,
807
+ path_in_repo=metadata_path,
808
+ commit_message=f"Add metadata: {method_name} by {team_name}"
809
+ )
810
+
811
+ # 3. Create or update latest.json
812
+ latest_info = {
813
+ "latest_submission": timestamp,
814
+ "status": "approved", # or "pending_review"
815
+ "method_name": method_name,
816
+ "team_name": team_name
817
+ }
818
+
819
+ latest_fd, temp_latest_path = tempfile.mkstemp(suffix='.json')
820
+ temp_files.append(temp_latest_path)
821
+ os.close(latest_fd)
822
+
823
+ with open(temp_latest_path, 'w') as f:
824
+ json.dump(latest_info, f, indent=4)
825
+
826
+ latest_path = f"submissions/{folder_name}/latest.json"
827
+ hub_storage.save_to_hub(
828
+ file_content=temp_latest_path,
829
+ path_in_repo=latest_path,
830
+ commit_message=f"Update latest submission info for {method_name}"
831
+ )
832
 
833
+ except Exception as e:
834
+ raise RuntimeError(f"Failed to save files to HuggingFace Hub: {str(e)}")
835
+
836
+ # Send confirmation email and update leaderboard data
837
+ # send_submission_confirmation(meta_data, processed_results)
838
+ update_leaderboard_data(submission_data)
839
+
840
+ # Return success message
841
  return f"""
842
  Submission successful!
843
 
 
847
  Recall@20: {processed_results['recall@20']:.2f}%
848
  MRR: {processed_results['mrr']:.2f}%
849
 
850
+ Your submission has been saved and a confirmation email has been sent to {contact_email}.
851
+ Once approved, your results will appear in the leaderboard under: {method_name}
852
+
853
+ You can find your submission at:
854
+ https://huggingface.co/spaces/{REPO_ID}/tree/main/submissions/{folder_name}
855
+
856
+ Please refresh the page to see your submission in the leaderboard.
857
  """
858
+
859
  except Exception as e:
860
+ error_message = f"Error processing submission: {str(e)}"
861
+ # send_error_notification(meta_data, error_message)
862
+ return error_message
863
  finally:
864
+ # Clean up temporary files
865
  for temp_file in temp_files:
866
  try:
867
  if os.path.exists(temp_file):
868
  os.unlink(temp_file)
869
  except Exception as e:
870
+ print(f"Warning: Failed to delete temporary file {temp_file}: {str(e)}")
871
+
872
+
873
  def filter_by_model_type(df, selected_types):
874
  """
875
  Filter DataFrame by selected model types, including submitted models.
 
886
  return df[df['Method'].isin(selected_models)]
887
 
888
  def format_dataframe(df, dataset):
 
 
 
 
889
  columns = ['Method'] + [col for col in df.columns if dataset in col]
890
  filtered_df = df[columns].copy()
 
 
 
 
 
 
891
  filtered_df.columns = [col.split('_')[-1] if '_' in col else col for col in filtered_df.columns]
892
+ filtered_df = filtered_df.sort_values('MRR', ascending=False)
 
 
 
 
893
  return filtered_df
894
 
895
  def update_tables(selected_types):
 
1053
  method_name, team_name, dataset, split, contact_email,
1054
  code_repo, csv_file, model_description, hardware, paper_link, model_type
1055
  ],
1056
+ outputs=result
1057
+ ).success( # Add a success handler to update tables after successful submission
 
1058
  fn=update_tables,
1059
  inputs=[model_type_filter],
1060
  outputs=all_dfs