David Pomerenke commited on
Commit
8f4448c
·
1 Parent(s): e223525

Display ASR-WER in app

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -60,6 +60,17 @@ METRICS = {
60
  between predicted and actual text. Higher scores indicate better language understanding.
61
  """,
62
  },
 
 
 
 
 
 
 
 
 
 
 
63
  }
64
 
65
 
@@ -195,6 +206,8 @@ def create_model_comparison_plot(metric):
195
  for lang in top_languages:
196
  for score in lang["scores"]:
197
  # Get the value directly using the field name
 
 
198
  value = score[metric["field_name"]]
199
  if value is not None:
200
  scores_flat.append(
@@ -254,15 +267,18 @@ def create_language_stats_df(metric):
254
  "Overall": round(lang["overall_score"], 3)
255
  if lang["overall_score"] is not None
256
  else "N/A",
257
- "Trans-lation": round(lang["mt_bleu"], 3)
258
  if lang["mt_bleu"] is not None
259
  else "N/A",
260
- "Classi-fication": round(lang["cls_acc"], 3)
261
  if lang["cls_acc"] is not None
262
  else "N/A",
263
  "MLM": round(lang["mlm_chrf"], 3)
264
  if lang["mlm_chrf"] is not None
265
  else "N/A",
 
 
 
266
  "Best Model": model_link,
267
  "CommonVoice Hours": commonvoice_link,
268
  }
@@ -299,7 +315,7 @@ def create_scatter_plot(metric):
299
  scores = [
300
  score[metric["field_name"]]
301
  for score in lang["scores"]
302
- if score[metric["field_name"]] is not None
303
  ]
304
  if scores: # Only include if we have valid scores
305
  avg_score = sum(scores) / len(scores)
 
60
  between predicted and actual text. Higher scores indicate better language understanding.
61
  """,
62
  },
63
+ "asr_wer": {
64
+ "display_name": "Automatic Speech Recognition (WER)",
65
+ "field_name": "asr_wer",
66
+ "label": "WER",
67
+ "explanation": """
68
+ **Automatic Speech Recognition Word Error Rate**: Measures the accuracy of speech-to-text transcription.
69
+ It calculates the minimum number of word edits (insertions, deletions, substitutions) needed to transform the
70
+ transcription into the reference text, divided by the number of words in the reference.
71
+ Lower scores indicate better performance, with 0 being perfect transcription.
72
+ """,
73
+ },
74
  }
75
 
76
 
 
206
  for lang in top_languages:
207
  for score in lang["scores"]:
208
  # Get the value directly using the field name
209
+ if metric["field_name"] not in score:
210
+ continue
211
  value = score[metric["field_name"]]
212
  if value is not None:
213
  scores_flat.append(
 
267
  "Overall": round(lang["overall_score"], 3)
268
  if lang["overall_score"] is not None
269
  else "N/A",
270
+ "Translation": round(lang["mt_bleu"], 3)
271
  if lang["mt_bleu"] is not None
272
  else "N/A",
273
+ "Classification": round(lang["cls_acc"], 3)
274
  if lang["cls_acc"] is not None
275
  else "N/A",
276
  "MLM": round(lang["mlm_chrf"], 3)
277
  if lang["mlm_chrf"] is not None
278
  else "N/A",
279
+ "ASR": round(lang["asr_wer"], 3)
280
+ if lang["asr_wer"] is not None
281
+ else "N/A",
282
  "Best Model": model_link,
283
  "CommonVoice Hours": commonvoice_link,
284
  }
 
315
  scores = [
316
  score[metric["field_name"]]
317
  for score in lang["scores"]
318
+ if metric["field_name"] in score and score[metric["field_name"]] is not None
319
  ]
320
  if scores: # Only include if we have valid scores
321
  avg_score = sum(scores) / len(scores)