Shiyu Zhao commited on
Commit
d38a2a4
·
1 Parent(s): e6f9c92

Update space

Browse files
Files changed (1) hide show
  1. app.py +45 -42
app.py CHANGED
@@ -36,36 +36,35 @@ except Exception as e:
36
  result_lock = Lock()
37
 
38
  def process_single_instance(args):
39
- idx, eval_csv, qa_dataset, evaluator, eval_metrics = args
40
  query, query_id, answer_ids, meta_info = qa_dataset[idx]
 
41
  try:
42
- # Using loc instead of direct boolean indexing for thread safety
43
- with result_lock:
44
- matching_rows = eval_csv.loc[eval_csv['query_id'] == query_id]
45
- if matching_rows.empty:
46
- raise IndexError(f'Error when processing query_id={query_id}, please make sure the predicted results exist for this query.')
47
- pred_rank = matching_rows['pred_rank'].iloc[0]
48
- except IndexError:
49
- raise IndexError(f'Error when processing query_id={query_id}, please make sure the predicted results exist for this query.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  except Exception as e:
51
- raise RuntimeError(f'Unexpected error occurred while fetching prediction rank for query_id={query_id}: {e}')
52
-
53
- if isinstance(pred_rank, str):
54
- try:
55
- pred_rank = eval(pred_rank)
56
- except SyntaxError as e:
57
- raise ValueError(f'Failed to parse pred_rank as a list for query_id={query_id}: {e}')
58
-
59
- if not isinstance(pred_rank, list):
60
- raise TypeError(f'Error when processing query_id={query_id}, expected pred_rank to be a list but got {type(pred_rank)}.')
61
-
62
- pred_dict = {pred_rank[i]: -i for i in range(min(100, len(pred_rank)))}
63
- answer_ids = torch.LongTensor(answer_ids)
64
-
65
- # Evaluate metrics
66
- result = evaluator.evaluate(pred_dict, answer_ids, metrics=eval_metrics)
67
- result["idx"], result["query_id"] = idx, query_id
68
- return result
69
 
70
  def compute_metrics(csv_path: str, dataset: str, split: str, num_threads: int = 4):
71
  candidate_ids_dict = {
@@ -82,8 +81,8 @@ def compute_metrics(csv_path: str, dataset: str, split: str, num_threads: int =
82
  if 'pred_rank' not in eval_csv.columns:
83
  raise ValueError('No `pred_rank` column found in the submitted csv.')
84
 
85
- # Filter required columns
86
- eval_csv = eval_csv[['query_id', 'pred_rank']]
87
 
88
  # Validate input parameters
89
  if dataset not in candidate_ids_dict:
@@ -100,13 +99,14 @@ def compute_metrics(csv_path: str, dataset: str, split: str, num_threads: int =
100
  split_idx = qa_dataset.get_idx_split()
101
  all_indices = split_idx[split].tolist()
102
 
103
- # Thread-safe containers
104
  results_list = []
105
- query_ids = []
106
  results_lock = Lock()
107
 
108
  # Prepare args for each thread
109
- args = [(idx, eval_csv, qa_dataset, evaluator, eval_metrics) for idx in all_indices]
 
 
110
 
111
  # Process using threads
112
  with ThreadPoolExecutor(max_workers=num_threads) as executor:
@@ -117,19 +117,23 @@ def compute_metrics(csv_path: str, dataset: str, split: str, num_threads: int =
117
  result = future.result()
118
  with results_lock:
119
  results_list.append(result)
120
- query_ids.append(result['query_id'])
121
  except Exception as e:
 
 
122
  print(f"Error processing instance: {str(e)}")
123
 
124
- # Concatenate results and compute final metrics
125
- with result_lock:
126
- results_df = pd.DataFrame(results_list)
127
- eval_csv = pd.concat([eval_csv, results_df], ignore_index=True)
128
 
129
- final_results = {
130
- metric: np.mean(eval_csv[eval_csv['query_id'].isin(query_ids)][metric])
131
- for metric in eval_metrics
132
- }
 
 
 
 
133
 
134
  return final_results
135
 
@@ -139,7 +143,6 @@ def compute_metrics(csv_path: str, dataset: str, split: str, num_threads: int =
139
  return f"Error: The file {csv_path} could not be found. Please check the file path and try again."
140
  except Exception as error:
141
  return f"{error}"
142
-
143
 
144
 
145
  # Data dictionaries for leaderboard
 
36
  result_lock = Lock()
37
 
38
  def process_single_instance(args):
39
+ idx, eval_dict, qa_dataset, evaluator, eval_metrics = args
40
  query, query_id, answer_ids, meta_info = qa_dataset[idx]
41
+
42
  try:
43
+ # Access prediction using dictionary instead of DataFrame
44
+ if query_id not in eval_dict:
45
+ raise IndexError(f'Error when processing query_id={query_id}, please make sure the predicted results exist for this query.')
46
+
47
+ pred_rank = eval_dict[query_id]
48
+
49
+ if isinstance(pred_rank, str):
50
+ try:
51
+ pred_rank = eval(pred_rank)
52
+ except SyntaxError as e:
53
+ raise ValueError(f'Failed to parse pred_rank as a list for query_id={query_id}: {e}')
54
+
55
+ if not isinstance(pred_rank, list):
56
+ raise TypeError(f'Error when processing query_id={query_id}, expected pred_rank to be a list but got {type(pred_rank)}.')
57
+
58
+ pred_dict = {pred_rank[i]: -i for i in range(min(100, len(pred_rank)))}
59
+ answer_ids = torch.LongTensor(answer_ids)
60
+
61
+ # Evaluate metrics
62
+ result = evaluator.evaluate(pred_dict, answer_ids, metrics=eval_metrics)
63
+ result["idx"], result["query_id"] = idx, query_id
64
+ return result
65
+
66
  except Exception as e:
67
+ raise RuntimeError(f'Error processing query_id={query_id}: {str(e)}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  def compute_metrics(csv_path: str, dataset: str, split: str, num_threads: int = 4):
70
  candidate_ids_dict = {
 
81
  if 'pred_rank' not in eval_csv.columns:
82
  raise ValueError('No `pred_rank` column found in the submitted csv.')
83
 
84
+ # Convert DataFrame to dictionary for thread-safe access
85
+ eval_dict = dict(zip(eval_csv['query_id'], eval_csv['pred_rank']))
86
 
87
  # Validate input parameters
88
  if dataset not in candidate_ids_dict:
 
99
  split_idx = qa_dataset.get_idx_split()
100
  all_indices = split_idx[split].tolist()
101
 
102
+ # Thread-safe containers for results
103
  results_list = []
 
104
  results_lock = Lock()
105
 
106
  # Prepare args for each thread
107
+ args = [(idx, eval_dict, qa_dataset, evaluator, eval_metrics) for idx in all_indices]
108
+
109
+ failed_queries = [] # Track failed queries
110
 
111
  # Process using threads
112
  with ThreadPoolExecutor(max_workers=num_threads) as executor:
 
117
  result = future.result()
118
  with results_lock:
119
  results_list.append(result)
 
120
  except Exception as e:
121
+ query_id = str(e).split('query_id=')[-1].split(':')[0]
122
+ failed_queries.append(query_id)
123
  print(f"Error processing instance: {str(e)}")
124
 
125
+ if failed_queries:
126
+ print(f"\nFailed to process {len(failed_queries)} queries.")
127
+ print(f"First few failed query_ids: {failed_queries[:5]}")
 
128
 
129
+ if not results_list:
130
+ raise ValueError("No results were successfully processed")
131
+
132
+ # Compute final metrics
133
+ results_df = pd.DataFrame(results_list)
134
+ final_results = {
135
+ metric: np.mean(results_df[metric]) for metric in eval_metrics
136
+ }
137
 
138
  return final_results
139
 
 
143
  return f"Error: The file {csv_path} could not be found. Please check the file path and try again."
144
  except Exception as error:
145
  return f"{error}"
 
146
 
147
 
148
  # Data dictionaries for leaderboard