import whisper from transformers import pipeline import torch import torchaudio # 🔹 Whisper transkripcija def transcribe_text(audio_path): model = whisper.load_model("base") result = model.transcribe(audio_path, language='lt') return result.get("text", "").strip() # 🔹 Whisper kalbos atpažinimas (su papildomu raktinių žodžių tikrinimu) def recognize_language(audio_path): model = whisper.load_model("base") result = model.transcribe(audio_path) text = result.get("text", "").strip() lang_code = result.get("language", "unknown") lower_text = text.lower() if any(word in lower_text for word in ["labas", "ačiū", "draugas", "vardas", "sekasi", "prašau"]): return "lt" elif any(word in lower_text for word in ["hello", "name", "how are you", "friend", "please"]): return "en" elif any(word in lower_text for word in ["hallo", "danke", "freund", "ich", "bitte"]): return "de" else: return lang_code # 🔸 Wav2Vec2 transkripcija (su kalbos pasirinkimu) def transcribe_text_wav2vec(audio_path, kalba): kalbos_modeliai = { "lt": "DeividasM/wav2vec2-large-xlsr-53-lithuanian", "en": "facebook/wav2vec2-base-960h", "de": "jonatasgrosman/wav2vec2-large-xlsr-53-german" } if kalba not in kalbos_modeliai: raise ValueError(f"Nepalaikoma kalba: {kalba}") pipe = pipeline( "automatic-speech-recognition", model=kalbos_modeliai[kalba] ) speech_array, sampling_rate = torchaudio.load(audio_path) if sampling_rate != 16000: resampler = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000) speech_array = resampler(speech_array) speech = speech_array[0].numpy() result = pipe(speech) return result["text"]