Spaces:
Running
Running
Shiyu Zhao
commited on
Commit
·
d38a2a4
1
Parent(s):
e6f9c92
Update space
Browse files
app.py
CHANGED
@@ -36,36 +36,35 @@ except Exception as e:
|
|
36 |
result_lock = Lock()
|
37 |
|
38 |
def process_single_instance(args):
|
39 |
-
idx,
|
40 |
query, query_id, answer_ids, meta_info = qa_dataset[idx]
|
|
|
41 |
try:
|
42 |
-
#
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
except Exception as e:
|
51 |
-
raise RuntimeError(f'
|
52 |
-
|
53 |
-
if isinstance(pred_rank, str):
|
54 |
-
try:
|
55 |
-
pred_rank = eval(pred_rank)
|
56 |
-
except SyntaxError as e:
|
57 |
-
raise ValueError(f'Failed to parse pred_rank as a list for query_id={query_id}: {e}')
|
58 |
-
|
59 |
-
if not isinstance(pred_rank, list):
|
60 |
-
raise TypeError(f'Error when processing query_id={query_id}, expected pred_rank to be a list but got {type(pred_rank)}.')
|
61 |
-
|
62 |
-
pred_dict = {pred_rank[i]: -i for i in range(min(100, len(pred_rank)))}
|
63 |
-
answer_ids = torch.LongTensor(answer_ids)
|
64 |
-
|
65 |
-
# Evaluate metrics
|
66 |
-
result = evaluator.evaluate(pred_dict, answer_ids, metrics=eval_metrics)
|
67 |
-
result["idx"], result["query_id"] = idx, query_id
|
68 |
-
return result
|
69 |
|
70 |
def compute_metrics(csv_path: str, dataset: str, split: str, num_threads: int = 4):
|
71 |
candidate_ids_dict = {
|
@@ -82,8 +81,8 @@ def compute_metrics(csv_path: str, dataset: str, split: str, num_threads: int =
|
|
82 |
if 'pred_rank' not in eval_csv.columns:
|
83 |
raise ValueError('No `pred_rank` column found in the submitted csv.')
|
84 |
|
85 |
-
#
|
86 |
-
|
87 |
|
88 |
# Validate input parameters
|
89 |
if dataset not in candidate_ids_dict:
|
@@ -100,13 +99,14 @@ def compute_metrics(csv_path: str, dataset: str, split: str, num_threads: int =
|
|
100 |
split_idx = qa_dataset.get_idx_split()
|
101 |
all_indices = split_idx[split].tolist()
|
102 |
|
103 |
-
# Thread-safe containers
|
104 |
results_list = []
|
105 |
-
query_ids = []
|
106 |
results_lock = Lock()
|
107 |
|
108 |
# Prepare args for each thread
|
109 |
-
args = [(idx,
|
|
|
|
|
110 |
|
111 |
# Process using threads
|
112 |
with ThreadPoolExecutor(max_workers=num_threads) as executor:
|
@@ -117,19 +117,23 @@ def compute_metrics(csv_path: str, dataset: str, split: str, num_threads: int =
|
|
117 |
result = future.result()
|
118 |
with results_lock:
|
119 |
results_list.append(result)
|
120 |
-
query_ids.append(result['query_id'])
|
121 |
except Exception as e:
|
|
|
|
|
122 |
print(f"Error processing instance: {str(e)}")
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
eval_csv = pd.concat([eval_csv, results_df], ignore_index=True)
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
133 |
|
134 |
return final_results
|
135 |
|
@@ -139,7 +143,6 @@ def compute_metrics(csv_path: str, dataset: str, split: str, num_threads: int =
|
|
139 |
return f"Error: The file {csv_path} could not be found. Please check the file path and try again."
|
140 |
except Exception as error:
|
141 |
return f"{error}"
|
142 |
-
|
143 |
|
144 |
|
145 |
# Data dictionaries for leaderboard
|
|
|
36 |
result_lock = Lock()
|
37 |
|
38 |
def process_single_instance(args):
|
39 |
+
idx, eval_dict, qa_dataset, evaluator, eval_metrics = args
|
40 |
query, query_id, answer_ids, meta_info = qa_dataset[idx]
|
41 |
+
|
42 |
try:
|
43 |
+
# Access prediction using dictionary instead of DataFrame
|
44 |
+
if query_id not in eval_dict:
|
45 |
+
raise IndexError(f'Error when processing query_id={query_id}, please make sure the predicted results exist for this query.')
|
46 |
+
|
47 |
+
pred_rank = eval_dict[query_id]
|
48 |
+
|
49 |
+
if isinstance(pred_rank, str):
|
50 |
+
try:
|
51 |
+
pred_rank = eval(pred_rank)
|
52 |
+
except SyntaxError as e:
|
53 |
+
raise ValueError(f'Failed to parse pred_rank as a list for query_id={query_id}: {e}')
|
54 |
+
|
55 |
+
if not isinstance(pred_rank, list):
|
56 |
+
raise TypeError(f'Error when processing query_id={query_id}, expected pred_rank to be a list but got {type(pred_rank)}.')
|
57 |
+
|
58 |
+
pred_dict = {pred_rank[i]: -i for i in range(min(100, len(pred_rank)))}
|
59 |
+
answer_ids = torch.LongTensor(answer_ids)
|
60 |
+
|
61 |
+
# Evaluate metrics
|
62 |
+
result = evaluator.evaluate(pred_dict, answer_ids, metrics=eval_metrics)
|
63 |
+
result["idx"], result["query_id"] = idx, query_id
|
64 |
+
return result
|
65 |
+
|
66 |
except Exception as e:
|
67 |
+
raise RuntimeError(f'Error processing query_id={query_id}: {str(e)}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
def compute_metrics(csv_path: str, dataset: str, split: str, num_threads: int = 4):
|
70 |
candidate_ids_dict = {
|
|
|
81 |
if 'pred_rank' not in eval_csv.columns:
|
82 |
raise ValueError('No `pred_rank` column found in the submitted csv.')
|
83 |
|
84 |
+
# Convert DataFrame to dictionary for thread-safe access
|
85 |
+
eval_dict = dict(zip(eval_csv['query_id'], eval_csv['pred_rank']))
|
86 |
|
87 |
# Validate input parameters
|
88 |
if dataset not in candidate_ids_dict:
|
|
|
99 |
split_idx = qa_dataset.get_idx_split()
|
100 |
all_indices = split_idx[split].tolist()
|
101 |
|
102 |
+
# Thread-safe containers for results
|
103 |
results_list = []
|
|
|
104 |
results_lock = Lock()
|
105 |
|
106 |
# Prepare args for each thread
|
107 |
+
args = [(idx, eval_dict, qa_dataset, evaluator, eval_metrics) for idx in all_indices]
|
108 |
+
|
109 |
+
failed_queries = [] # Track failed queries
|
110 |
|
111 |
# Process using threads
|
112 |
with ThreadPoolExecutor(max_workers=num_threads) as executor:
|
|
|
117 |
result = future.result()
|
118 |
with results_lock:
|
119 |
results_list.append(result)
|
|
|
120 |
except Exception as e:
|
121 |
+
query_id = str(e).split('query_id=')[-1].split(':')[0]
|
122 |
+
failed_queries.append(query_id)
|
123 |
print(f"Error processing instance: {str(e)}")
|
124 |
|
125 |
+
if failed_queries:
|
126 |
+
print(f"\nFailed to process {len(failed_queries)} queries.")
|
127 |
+
print(f"First few failed query_ids: {failed_queries[:5]}")
|
|
|
128 |
|
129 |
+
if not results_list:
|
130 |
+
raise ValueError("No results were successfully processed")
|
131 |
+
|
132 |
+
# Compute final metrics
|
133 |
+
results_df = pd.DataFrame(results_list)
|
134 |
+
final_results = {
|
135 |
+
metric: np.mean(results_df[metric]) for metric in eval_metrics
|
136 |
+
}
|
137 |
|
138 |
return final_results
|
139 |
|
|
|
143 |
return f"Error: The file {csv_path} could not be found. Please check the file path and try again."
|
144 |
except Exception as error:
|
145 |
return f"{error}"
|
|
|
146 |
|
147 |
|
148 |
# Data dictionaries for leaderboard
|