Update app.py
Browse files
app.py
CHANGED
@@ -34,16 +34,15 @@ def classifier(word_list_with_timestamp, language):
|
|
34 |
else:
|
35 |
list_to_search = set(english_bad_Words["words"])
|
36 |
|
37 |
-
|
38 |
negative_timestamps = []
|
39 |
for item in word_list_with_timestamp:
|
40 |
word = clean_text(item['text'])
|
41 |
if word in list_to_search:
|
42 |
-
|
|
|
43 |
negative_timestamps.append(item['timestamp'])
|
44 |
-
|
45 |
-
output.append((item['text'], "positive"))
|
46 |
-
return [output, negative_timestamps]
|
47 |
|
48 |
def generate_bleep(duration_ms, frequency=1000):
|
49 |
sine_wave = Sine(frequency)
|
@@ -86,13 +85,16 @@ def transcribe(input_audio, audio_language, task, timestamp_type):
|
|
86 |
|
87 |
timestamps = format_output_to_list(output['chunks'])
|
88 |
|
89 |
-
|
90 |
|
|
|
|
|
|
|
91 |
audio_output = mute_audio_range(input_audio, negative_timestamps)
|
92 |
output_path = "output_audio.wav"
|
93 |
audio_output.export(output_path, format="wav")
|
94 |
|
95 |
-
return [text, timestamps,
|
96 |
|
97 |
examples = [
|
98 |
["arabic_english_audios/audios/arabic_audio_1.wav", 'Arabic', 'transcribe', 'word'],
|
@@ -126,10 +128,10 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
|
|
126 |
with gr.Column():
|
127 |
transcript_output = gr.Text(label="Transcript")
|
128 |
timestamp_output = gr.Text(label="Timestamps")
|
129 |
-
|
130 |
output_audio = gr.Audio(label="Output Audio")
|
131 |
|
132 |
-
examples = gr.Examples(examples, inputs=[audio_input, audio_language, task, timestamp_type], outputs=[transcript_output, timestamp_output], fn=transcribe, examples_per_page=20)
|
133 |
|
134 |
submit_button.click(fn=transcribe, inputs=[audio_input, audio_language, task, timestamp_type], outputs=[transcript_output, timestamp_output, highlighted_output, output_audio])
|
135 |
clear_button.add([audio_input, audio_language, task, timestamp_type, transcript_output, timestamp_output, highlighted_output, output_audio])
|
|
|
34 |
else:
|
35 |
list_to_search = set(english_bad_Words["words"])
|
36 |
|
37 |
+
foul_words = []
|
38 |
negative_timestamps = []
|
39 |
for item in word_list_with_timestamp:
|
40 |
word = clean_text(item['text'])
|
41 |
if word in list_to_search:
|
42 |
+
if word not in foul_words:
|
43 |
+
foul_words.append(word)
|
44 |
negative_timestamps.append(item['timestamp'])
|
45 |
+
return [foul_words, negative_timestamps]
|
|
|
|
|
46 |
|
47 |
def generate_bleep(duration_ms, frequency=1000):
|
48 |
sine_wave = Sine(frequency)
|
|
|
85 |
|
86 |
timestamps = format_output_to_list(output['chunks'])
|
87 |
|
88 |
+
foul_words_list, negative_timestamps = classifier(output['chunks'], audio_language)
|
89 |
|
90 |
+
foul_words_list = ", ".join(foul_words_list)
|
91 |
+
|
92 |
+
|
93 |
audio_output = mute_audio_range(input_audio, negative_timestamps)
|
94 |
output_path = "output_audio.wav"
|
95 |
audio_output.export(output_path, format="wav")
|
96 |
|
97 |
+
return [text, timestamps, foul_words_list, output_path]
|
98 |
|
99 |
examples = [
|
100 |
["arabic_english_audios/audios/arabic_audio_1.wav", 'Arabic', 'transcribe', 'word'],
|
|
|
128 |
with gr.Column():
|
129 |
transcript_output = gr.Text(label="Transcript")
|
130 |
timestamp_output = gr.Text(label="Timestamps")
|
131 |
+
foul_words_output = gr.Text(label="Foul words in Audio")
|
132 |
output_audio = gr.Audio(label="Output Audio")
|
133 |
|
134 |
+
examples = gr.Examples(examples, inputs=[audio_input, audio_language, task, timestamp_type], outputs=[transcript_output, timestamp_output, foul_words_output, output_audio], fn=transcribe, examples_per_page=20)
|
135 |
|
136 |
submit_button.click(fn=transcribe, inputs=[audio_input, audio_language, task, timestamp_type], outputs=[transcript_output, timestamp_output, highlighted_output, output_audio])
|
137 |
clear_button.add([audio_input, audio_language, task, timestamp_type, transcript_output, timestamp_output, highlighted_output, output_audio])
|