import gradio as gr from transformers import pipeline import numpy as np import librosa import pandas as pd MODEL_NAME = "openai/whisper-tiny" BATCH_SIZE = 8 # device = 0 if torch.cuda.is_available() else "cpu" pipe = pipeline( task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, # device=device, ) # eng_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain") def format_output_to_list(data): formatted_list = "\n".join([f"{item['timestamp'][0]}s - {item['timestamp'][1]}s \t : {item['text']}" for item in data]) return formatted_list def transcribe(inputs, task): if inputs is None: raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.") output = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps="word", generate_kwargs={"task": task}) text = output['text'] timestamps = format_output_to_list(output['chunks']) return [text, timestamps] examples = [ ["arabic_english_audios/audios/arabic_audio_1.wav"], ["arabic_english_audios/audios/arabic_audio_2.wav"], ["arabic_english_audios/audios/arabic_audio_3.wav"], ["arabic_english_audios/audios/arabic_audio_4.wav"], ["arabic_english_audios/audios/arabic_hate_audio_1.mp3"], ["arabic_english_audios/audios/arabic_hate_audio_2.mp3"], ["arabic_english_audios/audios/arabic_hate_audio_3.mp3"], ["arabic_english_audios/audios/english_audio_1.wav"], ["arabic_english_audios/audios/english_audio_2.mp3"], ["arabic_english_audios/audios/english_audio_3.mp3"], ["arabic_english_audios/audios/english_audio_4.mp3"], ["arabic_english_audios/audios/english_audio_5.mp3"], ["arabic_english_audios/audios/english_audio_6.wav"] ] with gr.Blocks(theme=gr.themes.Default()) as demo: gr.HTML("

Transcribe Audio with Timestamps using whisper-large-v3

") gr.Markdown("") with gr.Row(): with gr.Column(): audio_input = gr.Audio(sources=["upload", 'microphone'], type="filepath", label="Audio file") task = gr.Radio(["transcribe", "translate"], label="Task") with gr.Row(): clear_button = gr.ClearButton(value="Clear") submit_button = gr.Button("Submit", variant="primary", ) with gr.Column(): transcript_output = gr.Text(label="Transcript") timestamp_output = gr.Text(label="Timestamp") examples = gr.Examples(examples, inputs=audio_input, outputs=[transcript_output, timestamp_output], fn=transcribe, examples_per_page=20) submit_button.click(fn=transcribe, inputs=audio_input, outputs=[transcript_output, timestamp_output]) clear_button.add([audio_input, transcript_output, timestamp_output]) if __name__ == "__main__": demo.launch()