import gradio as gr from transformers import pipeline import numpy as np import librosa import pandas as pd MODEL_NAME = "openai/whisper-tiny" BATCH_SIZE = 8 # device = 0 if torch.cuda.is_available() else "cpu" pipe = pipeline( task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, # device=device, ) # eng_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain") def format_output_to_list(data): formatted_list = "\n".join([f"{item['timestamp'][0]}s - {item['timestamp'][1]}s \t : {item['text']}" for item in data]) return formatted_list def transcribe(inputs, task): if inputs is None: raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.") output = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps="word", generate_kwargs={"task": task}) text = output['text'] timestamps = format_output_to_list(output['chunks']) return [text, timestamps] examples = [ ["arabic_english_audios/audios/arabic_audio_1.wav"], ["arabic_english_audios/audios/arabic_audio_2.wav"], ["arabic_english_audios/audios/arabic_audio_3.wav"], ["arabic_english_audios/audios/arabic_audio_4.wav"], ["arabic_english_audios/audios/arabic_hate_audio_1.mp3"], ["arabic_english_audios/audios/arabic_hate_audio_2.mp3"], ["arabic_english_audios/audios/arabic_hate_audio_3.mp3"], ["arabic_english_audios/audios/english_audio_1.wav"], ["arabic_english_audios/audios/english_audio_2.mp3"], ["arabic_english_audios/audios/english_audio_3.mp3"], ["arabic_english_audios/audios/english_audio_4.mp3"], ["arabic_english_audios/audios/english_audio_5.mp3"], ["arabic_english_audios/audios/english_audio_6.wav"] ] with gr.Blocks(theme=gr.themes.Default()) as demo: gr.HTML("