Spaces:
Running
Running
Lakoc
commited on
Commit
·
7fc9a28
1
Parent(s):
84a47e4
Leaderboard split into 4 categories, updates of the logic and GT added, simplified CER for Mandarin
Browse files- app.py +18 -12
- content.py +8 -1
- leaderboard_server.py +172 -73
- references/multi_channel_gt_diar/aishell4.json +0 -0
- references/multi_channel_gt_diar/alimeeting.json +0 -0
- references/multi_channel_gt_diar/ami-mdm.json +0 -0
- references/multi_channel_gt_diar/chime6-mdm.json +0 -0
- references/multi_channel_gt_diar/notsofar1-small-mdm.json +0 -0
- references/multi_channel_real_diar/aishell4.json +0 -0
- references/multi_channel_real_diar/alimeeting.json +0 -0
- references/multi_channel_real_diar/ami-mdm.json +0 -0
- references/multi_channel_real_diar/chime6-mdm.json +0 -0
- references/multi_channel_real_diar/notsofar1-small-mdm.json +0 -0
- requirements.txt +2 -1
- tasks_metadata.json +6 -0
app.py
CHANGED
@@ -3,7 +3,7 @@ import os
|
|
3 |
import gradio as gr
|
4 |
from gradio_modal import Modal
|
5 |
|
6 |
-
from content import HEADER_MARKDOWN, LEADERBOARD_TAB_TITLE_MARKDOWN, SUBMISSION_TAB_TITLE_MARKDOWN, ADDITIONAL_NOTES_MARKDOWN
|
7 |
from leaderboard_server import LeaderboardServer
|
8 |
|
9 |
# Initialize server and task list
|
@@ -23,11 +23,11 @@ def update_datasets(task):
|
|
23 |
return gr.CheckboxGroup(choices=get_datasets_for_task(task), value=get_datasets_for_task(task))
|
24 |
|
25 |
|
26 |
-
def submit_model(task, datasets, hyp_file, submitted_by, model_id, token, normalize):
|
27 |
if not hyp_file:
|
28 |
return gr.update(visible=True, value="⚠️ Please upload a hypothesis file.")
|
29 |
|
30 |
-
if not submitted_by.strip() or not model_id.strip() or not token.strip():
|
31 |
return gr.update(visible=True, value="⚠️ All fields are required.")
|
32 |
|
33 |
if token.strip() != EXPECTED_TOKEN:
|
@@ -36,10 +36,11 @@ def submit_model(task, datasets, hyp_file, submitted_by, model_id, token, normal
|
|
36 |
metadata = {
|
37 |
"submitted_by": submitted_by.strip(),
|
38 |
"model_id": model_id.strip(),
|
|
|
39 |
"normalize": normalize # Include normalization info in metadata if needed
|
40 |
}
|
41 |
|
42 |
-
leaderboard_df = server.get_leaderboard()
|
43 |
if len(leaderboard_df) > 0:
|
44 |
existing = leaderboard_df[
|
45 |
(leaderboard_df["Submitted by"] == submitted_by.strip()) &
|
@@ -52,27 +53,30 @@ def submit_model(task, datasets, hyp_file, submitted_by, model_id, token, normal
|
|
52 |
server.prepare_model_for_submission(
|
53 |
hyp_file.name, metadata, task, datasets, normalize=normalize
|
54 |
)
|
55 |
-
server.update_leaderboard()
|
56 |
return gr.update(visible=True, value="✅ Submission successful!")
|
57 |
except Exception as e:
|
58 |
print(e)
|
59 |
return gr.update(visible=True, value=f"❌ Error: {str(e)}")
|
60 |
|
61 |
-
def get_leaderboard_df():
|
62 |
-
return server.get_leaderboard()
|
63 |
|
64 |
# Gradio UI
|
65 |
-
with gr.Blocks() as demo:
|
66 |
gr.Markdown(HEADER_MARKDOWN)
|
67 |
|
68 |
with gr.Tabs(selected=0) as tabs:
|
69 |
with gr.Tab("📈 Leaderboard"):
|
70 |
gr.Markdown(LEADERBOARD_TAB_TITLE_MARKDOWN)
|
71 |
-
|
72 |
-
|
|
|
|
|
73 |
interactive=False,
|
74 |
label="Leaderboard"
|
75 |
)
|
|
|
|
|
76 |
|
77 |
with gr.Tab("📤 Submit"):
|
78 |
gr.Markdown(SUBMISSION_TAB_TITLE_MARKDOWN)
|
@@ -85,6 +89,7 @@ with gr.Blocks() as demo:
|
|
85 |
with gr.Row():
|
86 |
submitted_by_input = gr.Text(label="Submitted by")
|
87 |
model_id_input = gr.Text(label="Model Identifier")
|
|
|
88 |
token_input = gr.Text(label="Submission Token", type="password")
|
89 |
|
90 |
hyp_file_upload = gr.File(label="Upload Hypothesis JSON", file_types=[".json"])
|
@@ -103,10 +108,11 @@ with gr.Blocks() as demo:
|
|
103 |
).then(
|
104 |
fn=submit_model,
|
105 |
inputs=[task_dropdown, dataset_checkboxes, hyp_file_upload,
|
106 |
-
submitted_by_input, model_id_input, token_input, normalize_checkbox],
|
107 |
outputs=[feedback_text],
|
108 |
).then(
|
109 |
-
lambda:
|
|
|
110 |
outputs=leaderboard_output
|
111 |
)
|
112 |
|
|
|
3 |
import gradio as gr
|
4 |
from gradio_modal import Modal
|
5 |
|
6 |
+
from content import HEADER_MARKDOWN, LEADERBOARD_TAB_TITLE_MARKDOWN, SUBMISSION_TAB_TITLE_MARKDOWN, ADDITIONAL_NOTES_MARKDOWN, LEADERBOARD_CSS
|
7 |
from leaderboard_server import LeaderboardServer
|
8 |
|
9 |
# Initialize server and task list
|
|
|
23 |
return gr.CheckboxGroup(choices=get_datasets_for_task(task), value=get_datasets_for_task(task))
|
24 |
|
25 |
|
26 |
+
def submit_model(task, datasets, hyp_file, submitted_by, model_id, model_link_input, token, normalize):
|
27 |
if not hyp_file:
|
28 |
return gr.update(visible=True, value="⚠️ Please upload a hypothesis file.")
|
29 |
|
30 |
+
if not submitted_by.strip() or not model_id.strip() or not model_link_input.strip() or not token.strip():
|
31 |
return gr.update(visible=True, value="⚠️ All fields are required.")
|
32 |
|
33 |
if token.strip() != EXPECTED_TOKEN:
|
|
|
36 |
metadata = {
|
37 |
"submitted_by": submitted_by.strip(),
|
38 |
"model_id": model_id.strip(),
|
39 |
+
"model_link": model_link_input.strip(),
|
40 |
"normalize": normalize # Include normalization info in metadata if needed
|
41 |
}
|
42 |
|
43 |
+
leaderboard_df = server.get_leaderboard(task)
|
44 |
if len(leaderboard_df) > 0:
|
45 |
existing = leaderboard_df[
|
46 |
(leaderboard_df["Submitted by"] == submitted_by.strip()) &
|
|
|
53 |
server.prepare_model_for_submission(
|
54 |
hyp_file.name, metadata, task, datasets, normalize=normalize
|
55 |
)
|
|
|
56 |
return gr.update(visible=True, value="✅ Submission successful!")
|
57 |
except Exception as e:
|
58 |
print(e)
|
59 |
return gr.update(visible=True, value=f"❌ Error: {str(e)}")
|
60 |
|
61 |
+
def get_leaderboard_df(task):
|
62 |
+
return server.get_leaderboard(task)
|
63 |
|
64 |
# Gradio UI
|
65 |
+
with gr.Blocks(css=LEADERBOARD_CSS) as demo:
|
66 |
gr.Markdown(HEADER_MARKDOWN)
|
67 |
|
68 |
with gr.Tabs(selected=0) as tabs:
|
69 |
with gr.Tab("📈 Leaderboard"):
|
70 |
gr.Markdown(LEADERBOARD_TAB_TITLE_MARKDOWN)
|
71 |
+
leaderboard_task_dropdown = gr.Dropdown(choices=TASKS, value=TASKS[0], label="Select Task for Leaderboard")
|
72 |
+
leaderboard_output = gr.components.Dataframe(
|
73 |
+
datatype=["markdown", "markdown", "float", "float", "float", "float", "float", "float"],
|
74 |
+
value=lambda: get_leaderboard_df(TASKS[0]),
|
75 |
interactive=False,
|
76 |
label="Leaderboard"
|
77 |
)
|
78 |
+
leaderboard_task_dropdown.change(fn=get_leaderboard_df, inputs=leaderboard_task_dropdown,
|
79 |
+
outputs=leaderboard_output)
|
80 |
|
81 |
with gr.Tab("📤 Submit"):
|
82 |
gr.Markdown(SUBMISSION_TAB_TITLE_MARKDOWN)
|
|
|
89 |
with gr.Row():
|
90 |
submitted_by_input = gr.Text(label="Submitted by")
|
91 |
model_id_input = gr.Text(label="Model Identifier")
|
92 |
+
model_link_input = gr.Text(label="Model Link", placeholder="Link to model or code repository")
|
93 |
token_input = gr.Text(label="Submission Token", type="password")
|
94 |
|
95 |
hyp_file_upload = gr.File(label="Upload Hypothesis JSON", file_types=[".json"])
|
|
|
108 |
).then(
|
109 |
fn=submit_model,
|
110 |
inputs=[task_dropdown, dataset_checkboxes, hyp_file_upload,
|
111 |
+
submitted_by_input, model_id_input, model_link_input, token_input, normalize_checkbox],
|
112 |
outputs=[feedback_text],
|
113 |
).then(
|
114 |
+
fn=lambda task: get_leaderboard_df(task),
|
115 |
+
inputs=task_dropdown,
|
116 |
outputs=leaderboard_output
|
117 |
)
|
118 |
|
content.py
CHANGED
@@ -10,7 +10,9 @@ Welcome to the official leaderboard for benchmarking **multi-talker ASR systems*
|
|
10 |
LEADERBOARD_TAB_TITLE_MARKDOWN = """
|
11 |
## Leaderboard
|
12 |
|
13 |
-
Below you’ll find the latest results submitted to the benchmark. Models are evaluated using **`meeteval`** with **TCP-WER [%] (collar=5s)**.
|
|
|
|
|
14 |
"""
|
15 |
|
16 |
SUBMISSION_TAB_TITLE_MARKDOWN = """
|
@@ -43,3 +45,8 @@ You can choose to disable this using the checkbox above.
|
|
43 |
"""
|
44 |
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
10 |
LEADERBOARD_TAB_TITLE_MARKDOWN = """
|
11 |
## Leaderboard
|
12 |
|
13 |
+
Below you’ll find the latest results submitted to the benchmark. Models are evaluated using **`meeteval`** with **TCP-WER [%] (collar=5s)**.
|
14 |
+
|
15 |
+
For AISHELL-4 and AliMeeting conversion to simplified Mandarin is applied, and tcpCER [%] is used.
|
16 |
"""
|
17 |
|
18 |
SUBMISSION_TAB_TITLE_MARKDOWN = """
|
|
|
45 |
"""
|
46 |
|
47 |
|
48 |
+
LEADERBOARD_CSS = """
|
49 |
+
#leaderboard-table th .header-content {
|
50 |
+
white-space: nowrap;
|
51 |
+
}
|
52 |
+
"""
|
leaderboard_server.py
CHANGED
@@ -1,110 +1,209 @@
|
|
1 |
import json
|
2 |
-
import
|
|
|
3 |
|
4 |
import meeteval.io
|
5 |
import pandas as pd
|
6 |
-
from utils import calc_wer, aggregate_wer_metrics
|
7 |
-
from txt_norm import get_text_norm
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
TASKS_METADATA_PATH = os.path.abspath("tasks_metadata.json")
|
12 |
|
13 |
-
def list_files(startpath):
|
14 |
-
for root, dirs, files in os.walk(startpath):
|
15 |
-
level = root.replace(startpath, '').count(os.sep)
|
16 |
-
indent = ' ' * 4 * (level)
|
17 |
-
print('{}{}/'.format(indent, os.path.basename(root)))
|
18 |
-
subindent = ' ' * 4 * (level + 1)
|
19 |
-
for f in files:
|
20 |
-
print('{}{}'.format(subindent, f))
|
21 |
|
22 |
class LeaderboardServer:
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
self.text_normalizer = get_text_norm("whisper_nsf")
|
32 |
|
33 |
-
def
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
with open(results_path) as f:
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
hyp_seglst = meeteval.io.load(file)
|
54 |
|
|
|
|
|
55 |
for dataset in datasets:
|
56 |
-
ref_path =
|
|
|
|
|
|
|
|
|
57 |
ref_seglst = meeteval.io.load(ref_path)
|
58 |
sessions = ref_seglst.unique('session_id')
|
|
|
|
|
59 |
local_hyps = hyp_seglst.filter(lambda seg: seg['session_id'] in sessions)
|
60 |
-
ref_seglst = ref_seglst.map(lambda seg: {**seg, "words":self.text_normalizer(seg["words"]) if normalize else seg["words"]})
|
61 |
-
local_hyps = local_hyps.map(lambda seg: {**seg, "words":self.text_normalizer(seg["words"]) if normalize else seg["words"]})
|
62 |
-
per_session_wers = calc_wer(tcp_hyp_seglst=local_hyps, ref_seglst=ref_seglst, collar=5, metrics_list=["tcp_wer"])
|
63 |
-
metrics = aggregate_wer_metrics(per_session_wers, ["tcp_wer"])
|
64 |
-
results[dataset] = metrics
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
77 |
"results": results
|
78 |
}
|
79 |
|
80 |
-
|
81 |
-
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
86 |
|
87 |
-
|
88 |
-
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
93 |
return pd.DataFrame(columns=["No submissions yet"])
|
94 |
|
95 |
with open(results_path) as f:
|
96 |
results = json.load(f)
|
97 |
|
|
|
|
|
|
|
|
|
98 |
rows = []
|
99 |
for content in results.values():
|
100 |
-
row = {
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
rows.append(row)
|
104 |
|
105 |
-
df =
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
108 |
df = df.round(2)
|
|
|
109 |
|
110 |
return df
|
|
|
1 |
import json
|
2 |
+
from pathlib import Path
|
3 |
+
from typing import Dict, List
|
4 |
|
5 |
import meeteval.io
|
6 |
import pandas as pd
|
|
|
|
|
7 |
|
8 |
+
from txt_norm import get_text_norm
|
9 |
+
from utils import calc_wer, aggregate_wer_metrics
|
|
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
class LeaderboardServer:
|
13 |
+
"""Manages ASR model submissions and leaderboard generation."""
|
14 |
+
|
15 |
+
def __init__(self,
|
16 |
+
reference_base_path: str = "references",
|
17 |
+
tasks_metadata_path: str = "tasks_metadata.json",
|
18 |
+
local_leaderboard_path: str = "submissions"):
|
19 |
+
"""Initialize the leaderboard server.
|
20 |
+
|
21 |
+
Args:
|
22 |
+
reference_base_path: Base path for reference files
|
23 |
+
tasks_metadata_path: Path to tasks metadata JSON file
|
24 |
+
local_leaderboard_path: Directory for storing submissions
|
25 |
+
"""
|
26 |
+
self.reference_base_path = Path(reference_base_path).resolve()
|
27 |
+
self.tasks_metadata_path = Path(tasks_metadata_path).resolve()
|
28 |
+
self.local_leaderboard = Path(local_leaderboard_path).resolve()
|
29 |
+
|
30 |
+
# Load tasks metadata
|
31 |
+
self.tasks_metadata = self._load_tasks_metadata()
|
32 |
+
|
33 |
+
# Initialize storage
|
34 |
+
self.local_leaderboard.mkdir(exist_ok=True)
|
35 |
self.text_normalizer = get_text_norm("whisper_nsf")
|
36 |
|
37 |
+
def _load_tasks_metadata(self) -> Dict:
|
38 |
+
"""Load tasks metadata from JSON file."""
|
39 |
+
try:
|
40 |
+
with open(self.tasks_metadata_path) as f:
|
41 |
+
return json.load(f)["tasks"]
|
42 |
+
except (FileNotFoundError, KeyError, json.JSONDecodeError) as e:
|
43 |
+
raise ValueError(f"Failed to load tasks metadata: {e}")
|
44 |
+
|
45 |
+
def _get_results_file_path(self, task: str) -> Path:
|
46 |
+
"""Get the path to the results file for a specific task."""
|
47 |
+
return self.local_leaderboard / f"{task}_results.json"
|
48 |
+
|
49 |
+
def _create_submission_id(self, metadata: Dict[str, str]) -> str:
|
50 |
+
"""Create a unique submission ID from metadata."""
|
51 |
+
return f"{metadata['submitted_by']}_{metadata['model_id']}"
|
52 |
+
|
53 |
+
def _normalize_text_if_needed(self, segment: Dict, normalize: bool) -> Dict:
|
54 |
+
"""Apply text normalization to a segment if requested."""
|
55 |
+
if normalize:
|
56 |
+
return {**segment, "words": self.text_normalizer(segment["words"])}
|
57 |
+
return segment
|
58 |
+
|
59 |
+
def _evaluate_dataset(self,
|
60 |
+
hyp_seglst,
|
61 |
+
ref_seglst,
|
62 |
+
normalize: bool = False) -> Dict:
|
63 |
+
"""Evaluate WER for a single dataset."""
|
64 |
+
# Apply normalization if requested
|
65 |
+
if normalize:
|
66 |
+
ref_seglst = ref_seglst.map(lambda seg: self._normalize_text_if_needed(seg, True))
|
67 |
+
hyp_seglst = hyp_seglst.map(lambda seg: self._normalize_text_if_needed(seg, True))
|
68 |
+
|
69 |
+
# Calculate WER metrics
|
70 |
+
per_session_wers = calc_wer(
|
71 |
+
tcp_hyp_seglst=hyp_seglst,
|
72 |
+
ref_seglst=ref_seglst,
|
73 |
+
collar=5,
|
74 |
+
metrics_list=["tcp_wer"]
|
75 |
+
)
|
76 |
+
|
77 |
+
return aggregate_wer_metrics(per_session_wers, ["tcp_wer"])
|
78 |
+
|
79 |
+
def _load_existing_results(self, task: str) -> Dict:
|
80 |
+
"""Load existing results for a task, or return empty dict."""
|
81 |
+
results_path = self._get_results_file_path(task)
|
82 |
+
if results_path.exists():
|
83 |
+
with open(results_path) as f:
|
84 |
+
return json.load(f)
|
85 |
+
return {}
|
86 |
|
87 |
+
def _save_results(self, task: str, results: Dict) -> None:
|
88 |
+
"""Save results to the task results file."""
|
89 |
+
results_path = self._get_results_file_path(task)
|
90 |
+
with open(results_path, "w") as f:
|
91 |
+
json.dump(results, f, indent=2)
|
92 |
+
|
93 |
+
def _save_hypothesis_file(self,
|
94 |
+
task: str,
|
95 |
+
submission_id: str,
|
96 |
+
source_file: str) -> None:
|
97 |
+
"""Save the hypothesis file for future reference."""
|
98 |
+
hyp_filename = f"{task}_{submission_id}_hyp.json"
|
99 |
+
hyp_filepath = self.local_leaderboard / hyp_filename
|
100 |
+
|
101 |
+
with open(hyp_filepath, "w") as out_f:
|
102 |
+
with open(source_file, "r") as in_f:
|
103 |
+
out_f.write(in_f.read())
|
104 |
+
|
105 |
+
def prepare_model_for_submission(self,
|
106 |
+
file: str,
|
107 |
+
metadata: Dict[str, str],
|
108 |
+
task: str,
|
109 |
+
datasets: List[str],
|
110 |
+
normalize: bool = False) -> None:
|
111 |
+
"""Prepare and evaluate a model submission.
|
112 |
+
|
113 |
+
Args:
|
114 |
+
file: Path to the hypothesis file
|
115 |
+
metadata: Submission metadata containing 'submitted_by' and 'model_id'
|
116 |
+
task: Task name
|
117 |
+
datasets: List of dataset names to evaluate on
|
118 |
+
normalize: Whether to apply text normalization
|
119 |
+
"""
|
120 |
+
submission_id = self._create_submission_id(metadata)
|
121 |
+
|
122 |
+
# Load hypothesis segments
|
123 |
hyp_seglst = meeteval.io.load(file)
|
124 |
|
125 |
+
# Evaluate on each dataset
|
126 |
+
results = {}
|
127 |
for dataset in datasets:
|
128 |
+
ref_path = self.reference_base_path / task / f"{dataset}.json"
|
129 |
+
|
130 |
+
if not ref_path.exists():
|
131 |
+
raise FileNotFoundError(f"Reference file not found: {ref_path}")
|
132 |
+
|
133 |
ref_seglst = meeteval.io.load(ref_path)
|
134 |
sessions = ref_seglst.unique('session_id')
|
135 |
+
|
136 |
+
# Filter hypotheses to match reference sessions
|
137 |
local_hyps = hyp_seglst.filter(lambda seg: seg['session_id'] in sessions)
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
+
if "alimeeting" in dataset or "aishell4" in dataset:
|
140 |
+
import opencc
|
141 |
+
converter = opencc.OpenCC('s2t.json')
|
142 |
+
local_hyps = local_hyps.map(lambda seg: {**seg, "words": " ".join(list(converter.convert(seg["words"])))})
|
143 |
+
ref_seglst = ref_seglst.map(lambda seg: {**seg, "words": " ".join(list(converter.convert(seg["words"])))})
|
144 |
+
|
145 |
+
# Evaluate this dataset
|
146 |
+
results[dataset] = self._evaluate_dataset(local_hyps, ref_seglst, normalize)
|
147 |
|
148 |
+
# Update results file
|
149 |
+
all_results = self._load_existing_results(task)
|
150 |
+
all_results[submission_id] = {
|
151 |
+
"model_link": metadata["model_link"],
|
152 |
+
"model_id": metadata["model_id"],
|
153 |
+
"submitted_by": metadata["submitted_by"],
|
154 |
"results": results
|
155 |
}
|
156 |
|
157 |
+
self._save_results(task, all_results)
|
158 |
+
self._save_hypothesis_file(task, submission_id, file)
|
159 |
|
160 |
+
@staticmethod
|
161 |
+
def make_clickable_model(model_name, link):
|
162 |
+
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
163 |
+
|
164 |
+
def get_leaderboard(self, task: str) -> pd.DataFrame:
|
165 |
+
"""Generate leaderboard DataFrame for a specific task.
|
166 |
|
167 |
+
Args:
|
168 |
+
task: Task name
|
169 |
|
170 |
+
Returns:
|
171 |
+
DataFrame containing leaderboard results
|
172 |
+
"""
|
173 |
+
results_path = self._get_results_file_path(task)
|
174 |
+
|
175 |
+
if not results_path.exists():
|
176 |
return pd.DataFrame(columns=["No submissions yet"])
|
177 |
|
178 |
with open(results_path) as f:
|
179 |
results = json.load(f)
|
180 |
|
181 |
+
if not results:
|
182 |
+
return pd.DataFrame(columns=["No submissions yet"])
|
183 |
+
|
184 |
+
# Build rows for DataFrame
|
185 |
rows = []
|
186 |
for content in results.values():
|
187 |
+
row = {
|
188 |
+
"Model ID": self.make_clickable_model(content["model_id"], content["model_link"]),
|
189 |
+
"Submitted by": content["submitted_by"]
|
190 |
+
}
|
191 |
+
|
192 |
+
# Add dataset results
|
193 |
+
for dataset, metrics in content["results"].items():
|
194 |
+
row[dataset] = metrics.get("tcp_wer")
|
195 |
+
|
196 |
rows.append(row)
|
197 |
|
198 |
+
df = pd.DataFrame(rows)
|
199 |
+
|
200 |
+
if df.empty:
|
201 |
+
return df
|
202 |
+
|
203 |
+
# Convert WER to percentage and format
|
204 |
+
numeric_columns = df.select_dtypes(include=['number']).columns
|
205 |
+
df[numeric_columns] *= 100.0
|
206 |
df = df.round(2)
|
207 |
+
df = df.fillna("-")
|
208 |
|
209 |
return df
|
references/multi_channel_gt_diar/aishell4.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
references/multi_channel_gt_diar/alimeeting.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
references/multi_channel_gt_diar/ami-mdm.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
references/multi_channel_gt_diar/chime6-mdm.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
references/multi_channel_gt_diar/notsofar1-small-mdm.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
references/multi_channel_real_diar/aishell4.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
references/multi_channel_real_diar/alimeeting.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
references/multi_channel_real_diar/ami-mdm.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
references/multi_channel_real_diar/chime6-mdm.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
references/multi_channel_real_diar/notsofar1-small-mdm.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -9,4 +9,5 @@ simplejson
|
|
9 |
more-itertools
|
10 |
meeteval
|
11 |
gradio_modal
|
12 |
-
regex
|
|
|
|
9 |
more-itertools
|
10 |
meeteval
|
11 |
gradio_modal
|
12 |
+
regex
|
13 |
+
opencc
|
tasks_metadata.json
CHANGED
@@ -5,6 +5,12 @@
|
|
5 |
},
|
6 |
"single_channel_real_diar": {
|
7 |
"name": "Single Channel - Real Diarization"
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
}
|
9 |
}
|
10 |
}
|
|
|
5 |
},
|
6 |
"single_channel_real_diar": {
|
7 |
"name": "Single Channel - Real Diarization"
|
8 |
+
},
|
9 |
+
"multi_channel_gt_diar": {
|
10 |
+
"name": "Multi Channel - Ground Truth Diarization"
|
11 |
+
},
|
12 |
+
"multi_channel_real_diar": {
|
13 |
+
"name": "Multi Channel - Real Diarization"
|
14 |
}
|
15 |
}
|
16 |
}
|