projektas1-demo-hf1 / kalbos_nustatymas.py
Elanas's picture
Upload kalbos_nustatymas.py
10a86b9 verified
raw
history blame
1.82 kB
import whisper
from transformers import pipeline
import torch
import torchaudio
# 🔹 Whisper transkripcija
def transcribe_text(audio_path):
model = whisper.load_model("base")
result = model.transcribe(audio_path, language='lt')
return result.get("text", "").strip()
# 🔹 Whisper kalbos atpažinimas (su papildomu raktinių žodžių tikrinimu)
def recognize_language(audio_path):
model = whisper.load_model("base")
result = model.transcribe(audio_path)
text = result.get("text", "").strip()
lang_code = result.get("language", "unknown")
lower_text = text.lower()
if any(word in lower_text for word in ["labas", "ačiū", "draugas", "vardas", "sekasi", "prašau"]):
return "lt"
elif any(word in lower_text for word in ["hello", "name", "how are you", "friend", "please"]):
return "en"
elif any(word in lower_text for word in ["hallo", "danke", "freund", "ich", "bitte"]):
return "de"
else:
return lang_code
# 🔸 Wav2Vec2 transkripcija (su kalbos pasirinkimu)
def transcribe_text_wav2vec(audio_path, kalba):
kalbos_modeliai = {
"lt": "DeividasM/wav2vec2-large-xlsr-53-lithuanian",
"en": "facebook/wav2vec2-base-960h",
"de": "jonatasgrosman/wav2vec2-large-xlsr-53-german"
}
if kalba not in kalbos_modeliai:
raise ValueError(f"Nepalaikoma kalba: {kalba}")
pipe = pipeline(
"automatic-speech-recognition",
model=kalbos_modeliai[kalba]
)
speech_array, sampling_rate = torchaudio.load(audio_path)
if sampling_rate != 16000:
resampler = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)
speech_array = resampler(speech_array)
speech = speech_array[0].numpy()
result = pipe(speech)
return result["text"]