Spaces:

snap-stanford
/

stark-leaderboard

Running

App Files Files Community

Shiyu Zhao commited on Nov 14, 2024

Commit

b16c4b6

1 Parent(s): df3974d

Update space

Browse files

Files changed (1) hide show

app.py +150 -121

app.py CHANGED Viewed

@@ -14,11 +14,14 @@ from email.mime.text import MIMEText
 from huggingface_hub import HfApi
 import shutil
 import tempfile
-import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from queue import Queue
-import threading
-from threading import Lock
 from stark_qa import load_qa
 from stark_qa.evaluator import Evaluator
@@ -32,8 +35,6 @@ try:
 except Exception as e:
     raise RuntimeError(f"Failed to initialize HuggingFace Hub storage: {e}")
-# Global lock for thread-safe operations
-result_lock = Lock()
 def process_single_instance(args):
     """Process a single instance with improved prediction handling"""
@@ -210,7 +211,6 @@ def compute_metrics(csv_path: str, dataset: str, split: str, num_workers: int =
         print(error_msg)
         return error_msg
 # Data dictionaries for leaderboard
 data_synthesized_full = {
     'Method': ['BM25', 'DPR (roberta)', 'ANCE (roberta)', 'QAGNN (roberta)', 'ada-002', 'voyage-l2-instruct', 'LLM2Vec', 'GritLM-7b', 'multi-ada-002', 'ColBERTv2'],
@@ -289,32 +289,23 @@ def validate_github_url(url):
     )
     return bool(github_pattern.match(url))
-def validate_csv(file_path):
-    """Validate CSV file format and content with better error handling"""
     try:
-        df = pd.read_csv(file_path)
         required_cols = ['query_id', 'pred_rank']
-        # Check for required columns
-        missing_cols = [col for col in required_cols if col not in df.columns]
-        if missing_cols:
-            return False, f"Missing required columns: {', '.join(missing_cols)}"
-        # Validate first few rows to ensure proper format
-        for idx, row in df.head().iterrows():
-            try:
-                rank_list = eval(row['pred_rank']) if isinstance(row['pred_rank'], str) else row['pred_rank']
-                if not isinstance(rank_list, list):
-                    return False, f"pred_rank must be a list (row {idx})"
-                if len(rank_list) < 20:
-                    return False, f"pred_rank must contain at least 20 candidates (row {idx})"
-            except Exception as e:
-                return False, f"Invalid pred_rank format in row {idx}: {str(e)}"
         return True, "Valid CSV file"
-    except pd.errors.EmptyDataError:
-        return False, "CSV file is empty"
     except Exception as e:
         return False, f"Error processing CSV: {str(e)}"
@@ -574,7 +565,7 @@ def save_submission(submission_data, csv_file):
 def update_leaderboard_data(submission_data):
     """
     Update leaderboard data with new submission results
-    Only updates the specific dataset submitted, preventing empty rows
     """
     global df_synthesized_full, df_synthesized_10, df_human_generated
@@ -586,32 +577,26 @@ def update_leaderboard_data(submission_data):
     }
     df_to_update = split_to_df[submission_data['Split']]
-    dataset = submission_data['Dataset'].upper()
-    # Prepare new row data with only the relevant dataset columns
     new_row = {
-        'Method': submission_data['Method Name']
     }
-    # Only add metrics for the submitted dataset
-    new_row.update({
-        f'STARK-{dataset}_Hit@1': submission_data['results']['hit@1'],
-        f'STARK-{dataset}_Hit@5': submission_data['results']['hit@5'],
-        f'STARK-{dataset}_R@20': submission_data['results']['recall@20'],
-        f'STARK-{dataset}_MRR': submission_data['results']['mrr']
-    })
     # Check if method already exists
     method_mask = df_to_update['Method'] == submission_data['Method Name']
     if method_mask.any():
-        # Update only the columns for the submitted dataset
         for col in new_row:
             df_to_update.loc[method_mask, col] = new_row[col]
     else:
-        # For new methods, create a row with only the submitted dataset's values
         df_to_update.loc[len(df_to_update)] = new_row
 # Function to get emails from meta_data
 def get_emails_from_metadata(meta_data):
     """
@@ -716,93 +701,143 @@ def process_submission(
     method_name, team_name, dataset, split, contact_email,
     code_repo, csv_file, model_description, hardware, paper_link, model_type
 ):
-    """Process submission with progress updates"""
     temp_files = []
-    start_time = time.time()
     try:
-        # 1. Initial validation
-        yield "Validating submission details..."
         if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file, model_type]):
             return "Error: Please fill in all required fields"
-        # 2. Process CSV
-        yield "Processing CSV file..."
         temp_csv_path = None
         if isinstance(csv_file, str):
             temp_csv_path = csv_file
         else:
-            try:
-                temp_fd, temp_csv_path = tempfile.mkstemp(suffix='.csv')
-                temp_files.append(temp_csv_path)
-                os.close(temp_fd)
                 shutil.copy2(csv_file.name, temp_csv_path)
-            except Exception as e:
-                return f"Error processing CSV file: {str(e)}"
-        # 3. Validate CSV format
-        yield "Validating CSV format..."
-        try:
-            df = pd.read_csv(temp_csv_path)
-            if 'query_id' not in df.columns or 'pred_rank' not in df.columns:
-                return "Error: CSV must contain 'query_id' and 'pred_rank' columns"
-        except Exception as e:
-            return f"Error reading CSV: {str(e)}"
-        # 4. Compute metrics with reduced workers
-        yield f"Computing metrics for {dataset}..."
         results = compute_metrics(
             csv_path=temp_csv_path,
             dataset=dataset.lower(),
             split=split,
-            num_threads=4
         )
         if isinstance(results, str):
             return f"Evaluation error: {results}"
-        # 5. Process results
-        yield "Processing results..."
         processed_results = {
             "hit@1": round(results['hit@1'] * 100, 2),
             "hit@5": round(results['hit@5'] * 100, 2),
             "recall@20": round(results['recall@20'] * 100, 2),
             "mrr": round(results['mrr'] * 100, 2)
         }
-        # 6. Save submission
-        yield "Saving submission..."
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        folder_name = f"{sanitize_name(method_name)}_{sanitize_name(team_name)}"
-        submission_data = {
-            "Method Name": method_name,
-            "Team Name": team_name,
-            "Dataset": dataset,
-            "Split": split,
-            "Contact Email(s)": contact_email,
-            "Code Repository": code_repo,
-            "Model Description": model_description,
-            "Hardware": hardware,
-            "Paper link": paper_link,
-            "Model Type": model_type,
-            "results": processed_results
-        }
         try:
-            # Save to HuggingFace Hub
-            csv_path_in_repo = f"submissions/{folder_name}/predictions_{timestamp}.csv"
             hub_storage.save_to_hub(
                 file_content=temp_csv_path,
                 path_in_repo=csv_path_in_repo,
-                commit_message=f"Add submission: {method_name}"
             )
-        except Exception as e:
-            return f"Error saving to HuggingFace Hub: {str(e)}"
-        # 7. Update leaderboard
-        yield "Updating leaderboard..."
-        update_leaderboard_data(submission_data)
         return f"""
         Submission successful!
@@ -812,20 +847,29 @@ def process_submission(
         Recall@20: {processed_results['recall@20']:.2f}%
         MRR: {processed_results['mrr']:.2f}%
-        Your submission will appear in the leaderboard after review.
         """
     except Exception as e:
-        total_time = time.time() - start_time
-        return f"Error ({total_time:.1f}s): {str(e)}"
     finally:
         for temp_file in temp_files:
             try:
                 if os.path.exists(temp_file):
                     os.unlink(temp_file)
             except Exception as e:
-                print(f"Warning: Failed to delete {temp_file}: {str(e)}")
 def filter_by_model_type(df, selected_types):
     """
     Filter DataFrame by selected model types, including submitted models.
@@ -842,24 +886,10 @@ def filter_by_model_type(df, selected_types):
     return df[df['Method'].isin(selected_models)]
 def format_dataframe(df, dataset):
-    """
-    Format DataFrame for display, removing rows with no data for the selected dataset
-    """
-    # Select relevant columns
     columns = ['Method'] + [col for col in df.columns if dataset in col]
     filtered_df = df[columns].copy()
-    # Remove rows where all metric columns are empty/NaN for this dataset
-    metric_columns = [col for col in filtered_df.columns if col != 'Method']
-    filtered_df = filtered_df.dropna(subset=metric_columns, how='all')
-    # Rename columns to remove dataset prefix
     filtered_df.columns = [col.split('_')[-1] if '_' in col else col for col in filtered_df.columns]
-    # Sort by MRR
-    if 'MRR' in filtered_df.columns:
-        filtered_df = filtered_df.sort_values('MRR', ascending=False)
     return filtered_df
 def update_tables(selected_types):
@@ -1023,9 +1053,8 @@ with gr.Blocks(css=css) as demo:
             method_name, team_name, dataset, split, contact_email,
             code_repo, csv_file, model_description, hardware, paper_link, model_type
         ],
-        outputs=result,
-        api_name="submit"
-    ).success(  # Add success handler to update tables
         fn=update_tables,
         inputs=[model_type_filter],
         outputs=all_dfs

 from huggingface_hub import HfApi
 import shutil
 import tempfile
+from stark_qa import load_qa
+from stark_qa.evaluator import Evaluator
+from utils.hub_storage import HubStorage
+from utils.token_handler import TokenHandler
 from stark_qa import load_qa
 from stark_qa.evaluator import Evaluator
 except Exception as e:
     raise RuntimeError(f"Failed to initialize HuggingFace Hub storage: {e}")
 def process_single_instance(args):
     """Process a single instance with improved prediction handling"""
         print(error_msg)
         return error_msg
 # Data dictionaries for leaderboard
 data_synthesized_full = {
     'Method': ['BM25', 'DPR (roberta)', 'ANCE (roberta)', 'QAGNN (roberta)', 'ada-002', 'voyage-l2-instruct', 'LLM2Vec', 'GritLM-7b', 'multi-ada-002', 'ColBERTv2'],
     )
     return bool(github_pattern.match(url))
+def validate_csv(file_obj):
+    """Validate CSV file format and content"""
     try:
+        df = pd.read_csv(file_obj.name)
         required_cols = ['query_id', 'pred_rank']
+        if not all(col in df.columns for col in required_cols):
+            return False, "CSV must contain 'query_id' and 'pred_rank' columns"
+        try:
+            first_rank = eval(df['pred_rank'].iloc[0]) if isinstance(df['pred_rank'].iloc[0], str) else df['pred_rank'].iloc[0]
+            if not isinstance(first_rank, list) or len(first_rank) < 20:
+                return False, "pred_rank must be a list with at least 20 candidates"
+        except:
+            return False, "Invalid pred_rank format"
         return True, "Valid CSV file"
     except Exception as e:
         return False, f"Error processing CSV: {str(e)}"
 def update_leaderboard_data(submission_data):
     """
     Update leaderboard data with new submission results
+    Only uses model name in the displayed table
     """
     global df_synthesized_full, df_synthesized_10, df_human_generated
     }
     df_to_update = split_to_df[submission_data['Split']]
+    # Prepare new row data
     new_row = {
+        'Method': submission_data['Method Name'],  # Only use method name in table
+        f'STARK-{submission_data["Dataset"].upper()}_Hit@1': submission_data['results']['hit@1'],
+        f'STARK-{submission_data["Dataset"].upper()}_Hit@5': submission_data['results']['hit@5'],
+        f'STARK-{submission_data["Dataset"].upper()}_R@20': submission_data['results']['recall@20'],
+        f'STARK-{submission_data["Dataset"].upper()}_MRR': submission_data['results']['mrr']
     }
     # Check if method already exists
     method_mask = df_to_update['Method'] == submission_data['Method Name']
     if method_mask.any():
+        # Update existing row
         for col in new_row:
             df_to_update.loc[method_mask, col] = new_row[col]
     else:
+        # Add new row
         df_to_update.loc[len(df_to_update)] = new_row
 # Function to get emails from meta_data
 def get_emails_from_metadata(meta_data):
     """
     method_name, team_name, dataset, split, contact_email,
     code_repo, csv_file, model_description, hardware, paper_link, model_type
 ):
+    """Process and validate submission"""
     temp_files = []
     try:
+        # Input validation
         if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file, model_type]):
             return "Error: Please fill in all required fields"
+        # Validate model type
+        is_valid, message = validate_model_type(method_name, model_type)
+        if not is_valid:
+            return f"Error: {message}"
+        # Create metadata
+        meta_data = {
+            "Method Name": method_name,
+            "Team Name": team_name,
+            "Dataset": dataset,
+            "Split": split,
+            "Contact Email(s)": contact_email,
+            "Code Repository": code_repo,
+            "Model Description": model_description,
+            "Hardware": hardware,
+            "(Optional) Paper link": paper_link,
+            "Model Type": model_type
+        }
+        # Generate folder name and timestamp
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        folder_name = f"{sanitize_name(method_name)}_{sanitize_name(team_name)}"
+        # Process CSV file
         temp_csv_path = None
         if isinstance(csv_file, str):
             temp_csv_path = csv_file
         else:
+            temp_fd, temp_csv_path = tempfile.mkstemp(suffix='.csv')
+            temp_files.append(temp_csv_path)
+            os.close(temp_fd)
+            if hasattr(csv_file, 'name'):
                 shutil.copy2(csv_file.name, temp_csv_path)
+            else:
+                with open(temp_csv_path, 'wb') as temp_file:
+                    if hasattr(csv_file, 'seek'):
+                        csv_file.seek(0)
+                    if hasattr(csv_file, 'read'):
+                        shutil.copyfileobj(csv_file, temp_file)
+                    else:
+                        temp_file.write(csv_file)
+        if not os.path.exists(temp_csv_path):
+            raise FileNotFoundError(f"Failed to create temporary CSV file at {temp_csv_path}")
+        # Compute metrics
         results = compute_metrics(
             csv_path=temp_csv_path,
             dataset=dataset.lower(),
             split=split,
+            num_workers=4
         )
         if isinstance(results, str):
+            # send_error_notification(meta_data, results)
             return f"Evaluation error: {results}"
+        # Process results
         processed_results = {
             "hit@1": round(results['hit@1'] * 100, 2),
             "hit@5": round(results['hit@5'] * 100, 2),
             "recall@20": round(results['recall@20'] * 100, 2),
             "mrr": round(results['mrr'] * 100, 2)
         }
+        # Save files to HuggingFace Hub
         try:
+            # 1. Save CSV file
+            csv_filename = f"predictions_{timestamp}.csv"
+            csv_path_in_repo = f"submissions/{folder_name}/{csv_filename}"
             hub_storage.save_to_hub(
                 file_content=temp_csv_path,
                 path_in_repo=csv_path_in_repo,
+                commit_message=f"Add submission: {method_name} by {team_name}"
             )
+            # 2. Save metadata
+            submission_data = {
+                **meta_data,
+                "results": processed_results,
+                "status": "approved",  # or "pending_review"
+                "submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                "csv_path": csv_path_in_repo
+            }
+            metadata_fd, temp_metadata_path = tempfile.mkstemp(suffix='.json')
+            temp_files.append(temp_metadata_path)
+            os.close(metadata_fd)
+            with open(temp_metadata_path, 'w') as f:
+                json.dump(submission_data, f, indent=4)
+            metadata_path = f"submissions/{folder_name}/metadata_{timestamp}.json"
+            hub_storage.save_to_hub(
+                file_content=temp_metadata_path,
+                path_in_repo=metadata_path,
+                commit_message=f"Add metadata: {method_name} by {team_name}"
+            )
+            # 3. Create or update latest.json
+            latest_info = {
+                "latest_submission": timestamp,
+                "status": "approved",  # or "pending_review"
+                "method_name": method_name,
+                "team_name": team_name
+            }
+            latest_fd, temp_latest_path = tempfile.mkstemp(suffix='.json')
+            temp_files.append(temp_latest_path)
+            os.close(latest_fd)
+            with open(temp_latest_path, 'w') as f:
+                json.dump(latest_info, f, indent=4)
+            latest_path = f"submissions/{folder_name}/latest.json"
+            hub_storage.save_to_hub(
+                file_content=temp_latest_path,
+                path_in_repo=latest_path,
+                commit_message=f"Update latest submission info for {method_name}"
+            )
+        except Exception as e:
+            raise RuntimeError(f"Failed to save files to HuggingFace Hub: {str(e)}")
+        # Send confirmation email and update leaderboard data
+        # send_submission_confirmation(meta_data, processed_results)
+        update_leaderboard_data(submission_data)
+        # Return success message
         return f"""
         Submission successful!
         Recall@20: {processed_results['recall@20']:.2f}%
         MRR: {processed_results['mrr']:.2f}%
+        Your submission has been saved and a confirmation email has been sent to {contact_email}.
+        Once approved, your results will appear in the leaderboard under: {method_name}
+        You can find your submission at:
+        https://huggingface.co/spaces/{REPO_ID}/tree/main/submissions/{folder_name}
+        Please refresh the page to see your submission in the leaderboard.
         """
     except Exception as e:
+        error_message = f"Error processing submission: {str(e)}"
+        # send_error_notification(meta_data, error_message)
+        return error_message
     finally:
+        # Clean up temporary files
         for temp_file in temp_files:
             try:
                 if os.path.exists(temp_file):
                     os.unlink(temp_file)
             except Exception as e:
+                print(f"Warning: Failed to delete temporary file {temp_file}: {str(e)}")
 def filter_by_model_type(df, selected_types):
     """
     Filter DataFrame by selected model types, including submitted models.
     return df[df['Method'].isin(selected_models)]
 def format_dataframe(df, dataset):
     columns = ['Method'] + [col for col in df.columns if dataset in col]
     filtered_df = df[columns].copy()
     filtered_df.columns = [col.split('_')[-1] if '_' in col else col for col in filtered_df.columns]
+    filtered_df = filtered_df.sort_values('MRR', ascending=False)
     return filtered_df
 def update_tables(selected_types):
             method_name, team_name, dataset, split, contact_email,
             code_repo, csv_file, model_description, hardware, paper_link, model_type
         ],
+        outputs=result
+    ).success(  # Add a success handler to update tables after successful submission
         fn=update_tables,
         inputs=[model_type_filter],
         outputs=all_dfs