Spaces:
Running
Running
Shiyu Zhao
commited on
Commit
·
5bf37f4
1
Parent(s):
5f78509
Update space
Browse files
app.py
CHANGED
@@ -242,11 +242,9 @@ def read_json_from_hub(api: HfApi, repo_id: str, file_path: str) -> dict:
|
|
242 |
|
243 |
def scan_submissions_directory():
|
244 |
"""
|
245 |
-
Scans the submissions directory and updates the
|
246 |
-
|
247 |
"""
|
248 |
-
global df_synthesized_full, df_synthesized_10, df_human_generated
|
249 |
-
|
250 |
try:
|
251 |
# Initialize HuggingFace API
|
252 |
api = HfApi()
|
@@ -264,117 +262,39 @@ def scan_submissions_directory():
|
|
264 |
repo_id=REPO_ID,
|
265 |
repo_type="space"
|
266 |
)
|
267 |
-
# Filter for files in submissions directory
|
268 |
repo_files = [f for f in all_files if f.startswith('submissions/')]
|
269 |
except Exception as e:
|
270 |
print(f"Error listing repository contents: {str(e)}")
|
271 |
return submissions_by_split
|
272 |
-
|
273 |
-
|
274 |
-
print("No submissions directory found or empty")
|
275 |
-
return submissions_by_split
|
276 |
-
|
277 |
-
# Group files by team folders
|
278 |
-
folder_files = {}
|
279 |
for filepath in repo_files:
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
folder_files[folder_name] = []
|
287 |
-
folder_files[folder_name].append(filepath)
|
288 |
-
|
289 |
-
# Process each team folder
|
290 |
-
for folder_name, files in folder_files.items():
|
291 |
-
try:
|
292 |
-
# Find latest.json in this folder
|
293 |
-
latest_file = next((f for f in files if f.endswith('latest.json')), None)
|
294 |
-
if not latest_file:
|
295 |
-
print(f"No latest.json found in {folder_name}")
|
296 |
-
continue
|
297 |
-
|
298 |
-
# Read latest.json
|
299 |
-
latest_info = read_json_from_hub(api, REPO_ID, latest_file)
|
300 |
-
if not latest_info:
|
301 |
-
print(f"Failed to read latest.json for {folder_name}")
|
302 |
-
continue
|
303 |
-
|
304 |
-
# Check submission status
|
305 |
-
if latest_info.get('status') != 'approved':
|
306 |
-
print(f"Skipping unapproved submission in {folder_name}")
|
307 |
-
continue
|
308 |
-
|
309 |
-
timestamp = latest_info.get('latest_submission')
|
310 |
-
if not timestamp:
|
311 |
-
print(f"No timestamp found in latest.json for {folder_name}")
|
312 |
-
continue
|
313 |
-
|
314 |
-
# Find metadata file
|
315 |
-
metadata_file = next(
|
316 |
-
(f for f in files if f.endswith(f'metadata_{timestamp}.json')),
|
317 |
-
None
|
318 |
-
)
|
319 |
-
if not metadata_file:
|
320 |
-
print(f"No matching metadata file found for {folder_name} timestamp {timestamp}")
|
321 |
-
continue
|
322 |
-
|
323 |
-
# Read metadata file
|
324 |
-
submission_data = read_json_from_hub(api, REPO_ID, metadata_file)
|
325 |
-
if not submission_data:
|
326 |
-
print(f"Failed to read metadata for {folder_name}")
|
327 |
-
continue
|
328 |
-
|
329 |
-
# Map the split name if necessary
|
330 |
-
split = submission_data.get('Split')
|
331 |
-
if split in submissions_by_split:
|
332 |
-
submissions_by_split[split].append(submission_data)
|
333 |
-
|
334 |
-
# Update the appropriate DataFrame based on the split
|
335 |
-
if split == 'test':
|
336 |
-
df_to_update = df_synthesized_full
|
337 |
-
elif split == 'test-0.1':
|
338 |
-
df_to_update = df_synthesized_10
|
339 |
-
else: # human_generated_eval
|
340 |
-
df_to_update = df_human_generated
|
341 |
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
else:
|
357 |
-
df_to_update.loc[len(df_to_update)] = new_row
|
358 |
-
|
359 |
-
print(f"Successfully added submission from {folder_name} to {split} leaderboard")
|
360 |
-
else:
|
361 |
-
print(f"Invalid split '{split}' found in {folder_name}")
|
362 |
-
|
363 |
-
except Exception as e:
|
364 |
-
print(f"Error processing folder {folder_name}: {str(e)}")
|
365 |
-
continue
|
366 |
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
df.sort_values(by=mrr_cols[0], ascending=False, inplace=True)
|
372 |
-
|
373 |
-
# Print summary
|
374 |
-
print("\nLeaderboard initialization summary:")
|
375 |
-
for split, submissions in submissions_by_split.items():
|
376 |
-
print(f"{split}: {len(submissions)} submissions")
|
377 |
-
|
378 |
return submissions_by_split
|
379 |
|
380 |
except Exception as e:
|
@@ -579,7 +499,38 @@ def format_evaluation_results(results):
|
|
579 |
"""
|
580 |
result_lines = [f"{metric}: {value}" for metric, value in results.items()]
|
581 |
return "\n".join(result_lines)
|
582 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
583 |
def process_submission(
|
584 |
method_name, team_name, dataset, split, contact_email,
|
585 |
code_repo, csv_file, model_description, hardware, paper_link, model_type
|
@@ -591,15 +542,10 @@ def process_submission(
|
|
591 |
if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file, model_type]):
|
592 |
return "Error: Please fill in all required fields"
|
593 |
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
method_exists = any(method_name in models for models in model_types.values())
|
599 |
-
if not method_exists and model_type != 'Others':
|
600 |
-
return "Error: New models must be submitted under 'Others' category"
|
601 |
-
elif not method_exists and model_type == 'Others':
|
602 |
-
model_types['Others'].append(method_name)
|
603 |
|
604 |
# Create metadata
|
605 |
meta_data = {
|
@@ -774,6 +720,13 @@ def format_dataframe(df, dataset):
|
|
774 |
return filtered_df
|
775 |
|
776 |
def update_tables(selected_types):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
777 |
filtered_df_full = filter_by_model_type(df_synthesized_full, selected_types)
|
778 |
filtered_df_10 = filter_by_model_type(df_synthesized_10, selected_types)
|
779 |
filtered_df_human = filter_by_model_type(df_human_generated, selected_types)
|
@@ -785,7 +738,6 @@ def update_tables(selected_types):
|
|
785 |
|
786 |
return outputs
|
787 |
|
788 |
-
|
789 |
css = """
|
790 |
table > thead {
|
791 |
white-space: normal
|
@@ -878,26 +830,27 @@ with gr.Blocks(css=css) as demo:
|
|
878 |
label="Contact Email(s)*",
|
879 |
placeholder="email@example.com; another@example.com"
|
880 |
)
|
881 |
-
|
882 |
-
with gr.Column():
|
883 |
model_type = gr.Dropdown(
|
884 |
choices=list(model_types.keys()),
|
885 |
label="Model Type*",
|
886 |
-
value="Others"
|
|
|
|
|
|
|
|
|
|
|
|
|
887 |
)
|
888 |
code_repo = gr.Textbox(
|
889 |
label="Code Repository*",
|
890 |
placeholder="https://github.com/snap-stanford/stark-leaderboard"
|
891 |
)
|
|
|
|
|
892 |
csv_file = gr.File(
|
893 |
label="Prediction CSV*",
|
894 |
file_types=[".csv"],
|
895 |
-
type="filepath"
|
896 |
-
)
|
897 |
-
model_description = gr.Textbox(
|
898 |
-
label="Model Description*",
|
899 |
-
lines=3,
|
900 |
-
placeholder="Briefly describe how your retriever model works..."
|
901 |
)
|
902 |
hardware = gr.Textbox(
|
903 |
label="Hardware Specifications*",
|
|
|
242 |
|
243 |
def scan_submissions_directory():
|
244 |
"""
|
245 |
+
Scans the submissions directory and updates the model types dictionary
|
246 |
+
with submitted models.
|
247 |
"""
|
|
|
|
|
248 |
try:
|
249 |
# Initialize HuggingFace API
|
250 |
api = HfApi()
|
|
|
262 |
repo_id=REPO_ID,
|
263 |
repo_type="space"
|
264 |
)
|
|
|
265 |
repo_files = [f for f in all_files if f.startswith('submissions/')]
|
266 |
except Exception as e:
|
267 |
print(f"Error listing repository contents: {str(e)}")
|
268 |
return submissions_by_split
|
269 |
+
|
270 |
+
# Process submissions and update model types
|
|
|
|
|
|
|
|
|
|
|
271 |
for filepath in repo_files:
|
272 |
+
if filepath.endswith('metadata.json'):
|
273 |
+
try:
|
274 |
+
submission_data = read_json_from_hub(api, REPO_ID, filepath)
|
275 |
+
if submission_data:
|
276 |
+
method_name = submission_data.get('Method Name')
|
277 |
+
model_type = submission_data.get('Model Type')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
|
279 |
+
# If model type is specified in metadata, use it
|
280 |
+
if method_name and model_type:
|
281 |
+
# Check if method exists in any other category
|
282 |
+
existing_type = get_model_type_for_method(method_name)
|
283 |
+
|
284 |
+
# If method doesn't exist in any category, add it to the specified category
|
285 |
+
if existing_type == 'Others' and model_type in model_types:
|
286 |
+
if method_name not in model_types[model_type]:
|
287 |
+
model_types[model_type].append(method_name)
|
288 |
+
|
289 |
+
# Add submission to appropriate split
|
290 |
+
split = submission_data.get('Split')
|
291 |
+
if split in submissions_by_split:
|
292 |
+
submissions_by_split[split].append(submission_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
|
294 |
+
except Exception as e:
|
295 |
+
print(f"Error processing metadata file {filepath}: {str(e)}")
|
296 |
+
continue
|
297 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
return submissions_by_split
|
299 |
|
300 |
except Exception as e:
|
|
|
499 |
"""
|
500 |
result_lines = [f"{metric}: {value}" for metric, value in results.items()]
|
501 |
return "\n".join(result_lines)
|
502 |
+
|
503 |
+
def get_model_type_for_method(method_name):
|
504 |
+
"""
|
505 |
+
Find the model type category for a given method name.
|
506 |
+
Returns 'Others' if not found in predefined categories.
|
507 |
+
"""
|
508 |
+
for type_name, methods in model_types.items():
|
509 |
+
if method_name in methods:
|
510 |
+
return type_name
|
511 |
+
return 'Others'
|
512 |
+
|
513 |
+
def validate_model_type(method_name, selected_type):
|
514 |
+
"""
|
515 |
+
Validate if the selected model type is appropriate for the method name.
|
516 |
+
Returns (is_valid, message).
|
517 |
+
"""
|
518 |
+
# Check if method exists in any category
|
519 |
+
existing_type = None
|
520 |
+
for type_name, methods in model_types.items():
|
521 |
+
if method_name in methods:
|
522 |
+
existing_type = type_name
|
523 |
+
break
|
524 |
+
|
525 |
+
# If method exists, it must be submitted under its predefined category
|
526 |
+
if existing_type:
|
527 |
+
if existing_type != selected_type:
|
528 |
+
return False, f"This method name is already registered under '{existing_type}'. Please use the correct category."
|
529 |
+
return True, "Valid model type"
|
530 |
+
|
531 |
+
# For new methods, any category is valid
|
532 |
+
return True, "Valid model type"
|
533 |
+
|
534 |
def process_submission(
|
535 |
method_name, team_name, dataset, split, contact_email,
|
536 |
code_repo, csv_file, model_description, hardware, paper_link, model_type
|
|
|
542 |
if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file, model_type]):
|
543 |
return "Error: Please fill in all required fields"
|
544 |
|
545 |
+
# Validate model type
|
546 |
+
is_valid, message = validate_model_type(method_name, model_type)
|
547 |
+
if not is_valid:
|
548 |
+
return f"Error: {message}"
|
|
|
|
|
|
|
|
|
|
|
549 |
|
550 |
# Create metadata
|
551 |
meta_data = {
|
|
|
720 |
return filtered_df
|
721 |
|
722 |
def update_tables(selected_types):
|
723 |
+
"""
|
724 |
+
Update tables based on selected model types.
|
725 |
+
Include all models from selected categories.
|
726 |
+
"""
|
727 |
+
if not selected_types:
|
728 |
+
return [df.head(0) for df in [df_synthesized_full, df_synthesized_10, df_human_generated]]
|
729 |
+
|
730 |
filtered_df_full = filter_by_model_type(df_synthesized_full, selected_types)
|
731 |
filtered_df_10 = filter_by_model_type(df_synthesized_10, selected_types)
|
732 |
filtered_df_human = filter_by_model_type(df_human_generated, selected_types)
|
|
|
738 |
|
739 |
return outputs
|
740 |
|
|
|
741 |
css = """
|
742 |
table > thead {
|
743 |
white-space: normal
|
|
|
830 |
label="Contact Email(s)*",
|
831 |
placeholder="email@example.com; another@example.com"
|
832 |
)
|
|
|
|
|
833 |
model_type = gr.Dropdown(
|
834 |
choices=list(model_types.keys()),
|
835 |
label="Model Type*",
|
836 |
+
value="Others",
|
837 |
+
info="Select the appropriate category for your model"
|
838 |
+
)
|
839 |
+
model_description = gr.Textbox(
|
840 |
+
label="Model Description*",
|
841 |
+
lines=3,
|
842 |
+
placeholder="Briefly describe how your retriever model works..."
|
843 |
)
|
844 |
code_repo = gr.Textbox(
|
845 |
label="Code Repository*",
|
846 |
placeholder="https://github.com/snap-stanford/stark-leaderboard"
|
847 |
)
|
848 |
+
|
849 |
+
with gr.Column():
|
850 |
csv_file = gr.File(
|
851 |
label="Prediction CSV*",
|
852 |
file_types=[".csv"],
|
853 |
+
type="filepath"
|
|
|
|
|
|
|
|
|
|
|
854 |
)
|
855 |
hardware = gr.Textbox(
|
856 |
label="Hardware Specifications*",
|