Shiyu Zhao commited on
Commit
518f6cc
·
1 Parent(s): e65749f

Update space

Browse files
Files changed (1) hide show
  1. app.py +44 -33
app.py CHANGED
@@ -223,14 +223,21 @@ def scan_submissions_directory():
223
  # Initialize HuggingFace API
224
  api = HfApi()
225
 
226
- # Get submissions directory content from HuggingFace hub
 
 
 
 
 
 
 
227
  try:
228
- repo_files = api.list_repo_files(
229
  repo_id=REPO_ID,
230
  repo_type="space"
231
  )
232
  # Filter for files in submissions directory
233
- repo_files = [f for f in repo_files if f.startswith('submissions/')]
234
  except Exception as e:
235
  print(f"Error listing repository contents: {str(e)}")
236
  return
@@ -238,61 +245,60 @@ def scan_submissions_directory():
238
  if not repo_files:
239
  print("No submissions directory found or empty")
240
  return
241
-
242
- # Track submissions for each split
243
- submissions_by_split = {
244
- 'test': [],
245
- 'test-0.1': [],
246
- 'human_generated_eval': []
247
- }
248
 
249
  # Group files by team folders
250
  folder_files = {}
251
- for file_info in repo_files:
252
- path = file_info.path
253
- if not path.startswith('submissions/'):
254
  continue
255
 
256
- parts = path.split('/')
257
- if len(parts) < 3: # submissions/folder_name/file
258
- continue
259
-
260
- folder_name = parts[1]
261
  if folder_name not in folder_files:
262
  folder_files[folder_name] = []
263
- folder_files[folder_name].append(path)
264
 
265
  # Process each team folder
266
  for folder_name, files in folder_files.items():
267
  try:
268
- # Look for latest.json
269
  latest_file = next((f for f in files if f.endswith('latest.json')), None)
270
  if not latest_file:
 
271
  continue
272
-
273
  # Read latest.json
274
  try:
275
- latest_content = hub_storage.get_file_content(latest_file)
 
 
 
 
 
276
  latest_info = json.loads(latest_content)
277
  except Exception as e:
278
  print(f"Error reading latest.json for {folder_name}: {str(e)}")
279
  continue
280
 
 
281
  if latest_info.get('status') != 'approved':
 
282
  continue
283
-
284
  timestamp = latest_info.get('latest_submission')
285
  if not timestamp:
 
286
  continue
287
-
288
- # Find corresponding metadata file
289
  metadata_file = next(
290
  (f for f in files if f.endswith(f'metadata_{timestamp}.json')),
291
  None
292
  )
293
  if not metadata_file:
 
294
  continue
295
-
296
  # Read metadata file
297
  try:
298
  metadata_content = api.hf_hub_download(
@@ -306,19 +312,21 @@ def scan_submissions_directory():
306
  print(f"Error reading metadata for {folder_name}: {str(e)}")
307
  continue
308
 
 
309
  split = submission_data.get('Split')
310
  if split in submissions_by_split:
311
  submissions_by_split[split].append(submission_data)
312
-
313
- # Update corresponding DataFrame
314
  update_leaderboard_data(submission_data)
315
- print(f"Added submission from {folder_name} to {split} leaderboard")
 
 
316
 
317
  except Exception as e:
318
  print(f"Error processing folder {folder_name}: {str(e)}")
319
  continue
320
 
321
- print("Leaderboard initialized with existing submissions:")
 
322
  for split, submissions in submissions_by_split.items():
323
  print(f"{split}: {len(submissions)} submissions")
324
 
@@ -844,6 +852,11 @@ with gr.Blocks(css=css) as demo:
844
  gr.Markdown("# Semi-structured Retrieval Benchmark (STaRK) Leaderboard")
845
  gr.Markdown("Refer to the [STaRK paper](https://arxiv.org/pdf/2404.13207) for details on metrics, tasks and models.")
846
 
 
 
 
 
 
847
  # Model type filter
848
  model_type_filter = gr.CheckboxGroup(
849
  choices=list(model_types.keys()),
@@ -934,9 +947,7 @@ with gr.Blocks(css=css) as demo:
934
 
935
  submit_btn = gr.Button("Submit", variant="primary")
936
  result = gr.Textbox(label="Submission Status", interactive=False)
937
-
938
- # Initialize leaderboard at startup
939
- initialize_leaderboard()
940
 
941
  # Set up event handlers
942
  model_type_filter.change(
 
223
  # Initialize HuggingFace API
224
  api = HfApi()
225
 
226
+ # Track submissions for each split
227
+ submissions_by_split = {
228
+ 'test': [],
229
+ 'test-0.1': [],
230
+ 'human_generated_eval': []
231
+ }
232
+
233
+ # Get all files from repository
234
  try:
235
+ all_files = api.list_repo_files(
236
  repo_id=REPO_ID,
237
  repo_type="space"
238
  )
239
  # Filter for files in submissions directory
240
+ repo_files = [f for f in all_files if f.startswith('submissions/')]
241
  except Exception as e:
242
  print(f"Error listing repository contents: {str(e)}")
243
  return
 
245
  if not repo_files:
246
  print("No submissions directory found or empty")
247
  return
 
 
 
 
 
 
 
248
 
249
  # Group files by team folders
250
  folder_files = {}
251
+ for filepath in repo_files:
252
+ parts = filepath.split('/')
253
+ if len(parts) < 3: # Need at least submissions/team_folder/file
254
  continue
255
 
256
+ folder_name = parts[1] # team_folder name
 
 
 
 
257
  if folder_name not in folder_files:
258
  folder_files[folder_name] = []
259
+ folder_files[folder_name].append(filepath)
260
 
261
  # Process each team folder
262
  for folder_name, files in folder_files.items():
263
  try:
264
+ # Find latest.json in this folder
265
  latest_file = next((f for f in files if f.endswith('latest.json')), None)
266
  if not latest_file:
267
+ print(f"No latest.json found in {folder_name}")
268
  continue
269
+
270
  # Read latest.json
271
  try:
272
+ latest_content = api.hf_hub_download(
273
+ repo_id=REPO_ID,
274
+ repo_type="space",
275
+ filename=latest_file,
276
+ text=True
277
+ )
278
  latest_info = json.loads(latest_content)
279
  except Exception as e:
280
  print(f"Error reading latest.json for {folder_name}: {str(e)}")
281
  continue
282
 
283
+ # Check submission status
284
  if latest_info.get('status') != 'approved':
285
+ print(f"Skipping unapproved submission in {folder_name}")
286
  continue
287
+
288
  timestamp = latest_info.get('latest_submission')
289
  if not timestamp:
290
+ print(f"No timestamp found in latest.json for {folder_name}")
291
  continue
292
+
293
+ # Find metadata file
294
  metadata_file = next(
295
  (f for f in files if f.endswith(f'metadata_{timestamp}.json')),
296
  None
297
  )
298
  if not metadata_file:
299
+ print(f"No matching metadata file found for {folder_name} timestamp {timestamp}")
300
  continue
301
+
302
  # Read metadata file
303
  try:
304
  metadata_content = api.hf_hub_download(
 
312
  print(f"Error reading metadata for {folder_name}: {str(e)}")
313
  continue
314
 
315
+ # Update leaderboard
316
  split = submission_data.get('Split')
317
  if split in submissions_by_split:
318
  submissions_by_split[split].append(submission_data)
 
 
319
  update_leaderboard_data(submission_data)
320
+ print(f"Successfully added submission from {folder_name} to {split} leaderboard")
321
+ else:
322
+ print(f"Invalid split '{split}' found in {folder_name}")
323
 
324
  except Exception as e:
325
  print(f"Error processing folder {folder_name}: {str(e)}")
326
  continue
327
 
328
+ # Print summary
329
+ print("\nLeaderboard initialization summary:")
330
  for split, submissions in submissions_by_split.items():
331
  print(f"{split}: {len(submissions)} submissions")
332
 
 
852
  gr.Markdown("# Semi-structured Retrieval Benchmark (STaRK) Leaderboard")
853
  gr.Markdown("Refer to the [STaRK paper](https://arxiv.org/pdf/2404.13207) for details on metrics, tasks and models.")
854
 
855
+ # Initialize leaderboard at startup
856
+ print("Starting leaderboard initialization...")
857
+ initialize_leaderboard()
858
+ print("Leaderboard initialization finished")
859
+
860
  # Model type filter
861
  model_type_filter = gr.CheckboxGroup(
862
  choices=list(model_types.keys()),
 
947
 
948
  submit_btn = gr.Button("Submit", variant="primary")
949
  result = gr.Textbox(label="Submission Status", interactive=False)
950
+
 
 
951
 
952
  # Set up event handlers
953
  model_type_filter.change(