Ritesh-hf commited on
Commit
19eb6d4
·
verified ·
1 Parent(s): a80b9b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -23
app.py CHANGED
@@ -2,7 +2,10 @@ import gradio as gr
2
  from transformers import pipeline
3
  import numpy as np
4
  import pandas as pd
5
-
 
 
 
6
 
7
  MODEL_NAME = "openai/whisper-large-v3"
8
  BATCH_SIZE = 8
@@ -15,14 +18,62 @@ pipe = pipeline(
15
  # device=device,
16
  )
17
 
18
- # eng_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def format_output_to_list(data):
21
  formatted_list = "\n".join([f"{item['timestamp'][0]}s - {item['timestamp'][1]}s \t : {item['text']}" for item in data])
22
  return formatted_list
23
 
24
- def transcribe(inputs, task, timestamp_type):
25
- if inputs is None:
26
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
27
 
28
  if timestamp_type == "sentence":
@@ -30,33 +81,42 @@ def transcribe(inputs, task, timestamp_type):
30
  else:
31
  timestamp_type = "word"
32
 
33
- output = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=timestamp_type, generate_kwargs={"task": task})
34
  text = output['text']
 
35
  timestamps = format_output_to_list(output['chunks'])
36
- return [text, timestamps]
 
 
 
 
 
 
 
37
 
38
  examples = [
39
- ["arabic_english_audios/audios/arabic_audio_1.wav"],
40
- ["arabic_english_audios/audios/arabic_audio_2.wav"],
41
- ["arabic_english_audios/audios/arabic_audio_3.wav"],
42
- ["arabic_english_audios/audios/arabic_audio_4.wav"],
43
- ["arabic_english_audios/audios/arabic_hate_audio_1.mp3"],
44
- ["arabic_english_audios/audios/arabic_hate_audio_2.mp3"],
45
- ["arabic_english_audios/audios/arabic_hate_audio_3.mp3"],
46
- ["arabic_english_audios/audios/english_audio_1.wav"],
47
- ["arabic_english_audios/audios/english_audio_2.mp3"],
48
- ["arabic_english_audios/audios/english_audio_3.mp3"],
49
- ["arabic_english_audios/audios/english_audio_4.mp3"],
50
- ["arabic_english_audios/audios/english_audio_5.mp3"],
51
- ["arabic_english_audios/audios/english_audio_6.wav"]
52
  ]
53
 
54
  with gr.Blocks(theme=gr.themes.Default()) as demo:
55
  gr.HTML("<h2 style='text-align: center;'>Transcribing Audio with Timestamps using whisper-large-v3</h2>")
56
- gr.Markdown("")
57
  with gr.Row():
58
  with gr.Column():
59
  audio_input = gr.Audio(sources=["upload", 'microphone'], type="filepath", label="Audio file")
 
60
  task = gr.Radio(["transcribe", "translate"], label="Task")
61
  timestamp_type = gr.Radio(["sentence", "word"], label="Timestamp Type")
62
  with gr.Row():
@@ -66,11 +126,13 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
66
  with gr.Column():
67
  transcript_output = gr.Text(label="Transcript")
68
  timestamp_output = gr.Text(label="Timestamps")
 
 
69
 
70
- examples = gr.Examples(examples, inputs=[audio_input, task, timestamp_type], outputs=[transcript_output, timestamp_output], fn=transcribe, examples_per_page=20)
71
 
72
- submit_button.click(fn=transcribe, inputs=[audio_input, task, timestamp_type], outputs=[transcript_output, timestamp_output])
73
- clear_button.add([audio_input, task, timestamp_type, transcript_output, timestamp_output])
74
 
75
 
76
  if __name__ == "__main__":
 
2
  from transformers import pipeline
3
  import numpy as np
4
  import pandas as pd
5
+ import re
6
+ from pydub import AudioSegment
7
+ from pydub.generators import Sine
8
+ import io
9
 
10
  MODEL_NAME = "openai/whisper-large-v3"
11
  BATCH_SIZE = 8
 
18
  # device=device,
19
  )
20
 
21
+ arabic_bad_Words = pd.read_csv("arabic_bad_words_dataset.csv")
22
+ english_bad_Words = pd.read_csv("english_bad_words_dataset.csv")
23
+
24
+
25
+ def clean_text(text):
26
+ # Use regex to remove special characters, punctuation, and spaces around words
27
+ cleaned_text = re.sub(r'^[\s\W_]+|[\s\W_]+$', '', text)
28
+ return cleaned_text
29
+
30
+ def classifier(word_list_with_timestamp, language):
31
+ if language == "English":
32
+ list_to_search = set(english_bad_Words["words"])
33
+
34
+ else:
35
+ list_to_search = set(english_bad_Words["words"])
36
+
37
+ output = []
38
+ negative_timestamps = []
39
+ for item in word_list_with_timestamp:
40
+ word = clean_text(item['text'])
41
+ if word in list_to_search:
42
+ output.append((item['text'], "negative"))
43
+ negative_timestamps.append(item['timestamp'])
44
+ else:
45
+ output.append((item['text'], "positive"))
46
+ return [output, negative_timestamps]
47
+
48
+ def generate_bleep(duration_ms, frequency=1000):
49
+ sine_wave = Sine(frequency)
50
+ bleep = sine_wave.to_audio_segment(duration=duration_ms)
51
+ return bleep
52
+
53
+ def mute_audio_range(audio_filepath, ranges, bleep_frequency=800):
54
+ audio = AudioSegment.from_file(audio_filepath)
55
+
56
+ for range in ranges:
57
+ start_time = range[0] - 0.1
58
+ end_time = range[-1] + 0.1
59
+ start_ms = start_time * 1000 # pydub works with milliseconds
60
+ end_ms = end_time * 1000
61
+ duration_ms = end_ms - start_ms
62
+
63
+ # Generate the bleep sound
64
+ bleep_sound = generate_bleep(duration_ms, bleep_frequency)
65
+
66
+ # Combine the original audio with the bleep sound
67
+ audio = audio[:start_ms] + bleep_sound + audio[end_ms:]
68
+
69
+ return audio
70
 
71
  def format_output_to_list(data):
72
  formatted_list = "\n".join([f"{item['timestamp'][0]}s - {item['timestamp'][1]}s \t : {item['text']}" for item in data])
73
  return formatted_list
74
 
75
+ def transcribe(input_audio, audio_language, task, timestamp_type):
76
+ if input_audio is None:
77
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
78
 
79
  if timestamp_type == "sentence":
 
81
  else:
82
  timestamp_type = "word"
83
 
84
+ output = pipe(input_audio, batch_size=BATCH_SIZE, return_timestamps=timestamp_type, generate_kwargs={"task": task})
85
  text = output['text']
86
+
87
  timestamps = format_output_to_list(output['chunks'])
88
+
89
+ classifier_output, negative_timestamps = classifier(output['chunks'], audio_language)
90
+
91
+ audio_output = mute_audio_range(input_audio, negative_timestamps)
92
+ output_path = "output_audio.wav"
93
+ audio_output.export(output_path, format="wav")
94
+
95
+ return [text, timestamps, classifier_output, output_path]
96
 
97
  examples = [
98
+ ["arabic_english_audios/audios/arabic_audio_1.wav", 'Arabic', 'transcribe', 'word'],
99
+ ["arabic_english_audios/audios/arabic_audio_2.wav", 'Arabic', 'transcribe', 'word'],
100
+ ["arabic_english_audios/audios/arabic_audio_3.wav", 'Arabic', 'transcribe', 'word'],
101
+ ["arabic_english_audios/audios/arabic_audio_4.wav", 'Arabic', 'transcribe', 'word'],
102
+ ["arabic_english_audios/audios/arabic_hate_audio_1.mp3", 'Arabic', 'transcribe', 'word'],
103
+ ["arabic_english_audios/audios/arabic_hate_audio_2.mp3", 'Arabic', 'transcribe', 'word'],
104
+ ["arabic_english_audios/audios/arabic_hate_audio_3.mp3", 'Arabic', 'transcribe', 'word'],
105
+ ["arabic_english_audios/audios/english_audio_1.wav", 'English', 'transcribe', 'word'],
106
+ ["arabic_english_audios/audios/english_audio_2.mp3", 'English', 'transcribe', 'word'],
107
+ ["arabic_english_audios/audios/english_audio_3.mp3", 'English', 'transcribe', 'word'],
108
+ ["arabic_english_audios/audios/english_audio_4.mp3", 'English', 'transcribe', 'word'],
109
+ ["arabic_english_audios/audios/english_audio_5.mp3", 'English', 'transcribe', 'word'],
110
+ ["arabic_english_audios/audios/english_audio_6.wav", 'English', 'transcribe', 'word']
111
  ]
112
 
113
  with gr.Blocks(theme=gr.themes.Default()) as demo:
114
  gr.HTML("<h2 style='text-align: center;'>Transcribing Audio with Timestamps using whisper-large-v3</h2>")
115
+ # gr.Markdown("")
116
  with gr.Row():
117
  with gr.Column():
118
  audio_input = gr.Audio(sources=["upload", 'microphone'], type="filepath", label="Audio file")
119
+ audio_language = gr.Radio(["Arabic", "English"], label="Audio Language")
120
  task = gr.Radio(["transcribe", "translate"], label="Task")
121
  timestamp_type = gr.Radio(["sentence", "word"], label="Timestamp Type")
122
  with gr.Row():
 
126
  with gr.Column():
127
  transcript_output = gr.Text(label="Transcript")
128
  timestamp_output = gr.Text(label="Timestamps")
129
+ highlighted_output = gr.HighlightedText(label="Words Classification", combine_adjacent=True, show_legend=True, color_map={"negative": "red", "positive": "green"})
130
+ output_audio = gr.Audio(label="Output Audio")
131
 
132
+ examples = gr.Examples(examples, inputs=[audio_input, audio_language, task, timestamp_type], outputs=[transcript_output, timestamp_output], fn=transcribe, examples_per_page=20)
133
 
134
+ submit_button.click(fn=transcribe, inputs=[audio_input, audio_language, task, timestamp_type], outputs=[transcript_output, timestamp_output, highlighted_output, output_audio])
135
+ clear_button.add([audio_input, audio_language, task, timestamp_type, transcript_output, timestamp_output, highlighted_output, output_audio])
136
 
137
 
138
  if __name__ == "__main__":