Spaces:
Sleeping
Sleeping
Upload kalbetojai_analize.py
Browse files- kalbetojai_analize.py +154 -0
kalbetojai_analize.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import torchaudio
|
3 |
+
import time
|
4 |
+
import json
|
5 |
+
import shutil
|
6 |
+
from pyannote.audio import Pipeline
|
7 |
+
import whisper
|
8 |
+
from kalbos_nustatymas import (
|
9 |
+
transcribe_text,
|
10 |
+
transcribe_text_wav2vec,
|
11 |
+
recognize_language
|
12 |
+
)
|
13 |
+
|
14 |
+
def analizuoti_kalbetojus(pasirinktas_modelis="Wav2Vec2", failas="/tmp/ivestis.wav"):
|
15 |
+
start_time = time.time()
|
16 |
+
tekstas = []
|
17 |
+
lietuviskas_tekstas = ""
|
18 |
+
visi_segmentai = []
|
19 |
+
|
20 |
+
tekstas.append("🔁 Įkeliama diarizacijos sistema...")
|
21 |
+
diar_pipeline = Pipeline.from_pretrained(
|
22 |
+
"pyannote/speaker-diarization",
|
23 |
+
use_auth_token=os.getenv("HF_TOKEN")
|
24 |
+
)
|
25 |
+
tekstas.append("✅ Diarizacijos modelis paruoštas.")
|
26 |
+
|
27 |
+
tekstas.append(f"🔁 Įkeliamas modelis: {pasirinktas_modelis}...")
|
28 |
+
whisper_modelis = None
|
29 |
+
if pasirinktas_modelis == "Whisper":
|
30 |
+
whisper_modelis = whisper.load_model("medium")
|
31 |
+
tekstas.append(f"✅ Modelis {pasirinktas_modelis} paruoštas.")
|
32 |
+
|
33 |
+
tekstas.append("🧠 Atliekama diarizacija...")
|
34 |
+
diar_result = diar_pipeline(failas)
|
35 |
+
speaker_segments = list(diar_result.itertracks(yield_label=True))
|
36 |
+
|
37 |
+
skaiciuokle = {}
|
38 |
+
transkripcijos = {}
|
39 |
+
kalbos_visos = {}
|
40 |
+
|
41 |
+
waveform, sample_rate = torchaudio.load(failas)
|
42 |
+
|
43 |
+
TEMP_FOLDER = "/tmp/temp_segmentai"
|
44 |
+
if os.path.exists(TEMP_FOLDER):
|
45 |
+
shutil.rmtree(TEMP_FOLDER)
|
46 |
+
os.makedirs(TEMP_FOLDER, exist_ok=True)
|
47 |
+
|
48 |
+
for i, (segment, _, speaker) in enumerate(speaker_segments):
|
49 |
+
if (segment.end - segment.start) < 1.0:
|
50 |
+
continue
|
51 |
+
|
52 |
+
output_path = os.path.join(TEMP_FOLDER, f"segment_{i}_{speaker}.wav")
|
53 |
+
start_sample = int(segment.start * sample_rate)
|
54 |
+
end_sample = int(segment.end * sample_rate)
|
55 |
+
torchaudio.save(output_path, waveform[:, start_sample:end_sample], sample_rate)
|
56 |
+
|
57 |
+
if pasirinktas_modelis == "Whisper":
|
58 |
+
try:
|
59 |
+
result = whisper_modelis.transcribe(output_path)
|
60 |
+
text = result["text"].strip()
|
61 |
+
lang = result["language"].strip().lower()
|
62 |
+
|
63 |
+
if lang == "ru":
|
64 |
+
retry = whisper_modelis.transcribe(output_path, language="lt")
|
65 |
+
text = retry["text"].strip()
|
66 |
+
lang = "lt (forced)"
|
67 |
+
|
68 |
+
except Exception:
|
69 |
+
text = "[KLAIDA Whisper transkripcijoje]"
|
70 |
+
lang = "unknown"
|
71 |
+
|
72 |
+
elif pasirinktas_modelis == "Wav2Vec2":
|
73 |
+
try:
|
74 |
+
best_text = ""
|
75 |
+
best_lang = "unknown"
|
76 |
+
longest = 0
|
77 |
+
for kalba in ["lt", "en", "de"]:
|
78 |
+
try:
|
79 |
+
txt = transcribe_text_wav2vec(output_path, kalba=kalba).strip()
|
80 |
+
if len(txt) > longest:
|
81 |
+
best_text = txt
|
82 |
+
best_lang = kalba
|
83 |
+
longest = len(txt)
|
84 |
+
except:
|
85 |
+
continue
|
86 |
+
text = best_text
|
87 |
+
lang = best_lang
|
88 |
+
except Exception:
|
89 |
+
text = "[KLAIDA Wav2Vec2 transkripcijoje]"
|
90 |
+
lang = "unknown"
|
91 |
+
|
92 |
+
else:
|
93 |
+
text = "[Nežinomas modelis]"
|
94 |
+
lang = "unknown"
|
95 |
+
|
96 |
+
trukme = round(segment.end - segment.start, 2)
|
97 |
+
if speaker not in skaiciuokle:
|
98 |
+
skaiciuokle[speaker] = {}
|
99 |
+
lang_clean = lang.replace(" (forced)", "")
|
100 |
+
skaiciuokle[speaker][lang_clean] = skaiciuokle[speaker].get(lang_clean, 0) + trukme
|
101 |
+
|
102 |
+
if speaker not in transkripcijos:
|
103 |
+
transkripcijos[speaker] = []
|
104 |
+
transkripcijos[speaker].append({"tekstas": text, "kalba": lang, "trukme": trukme})
|
105 |
+
|
106 |
+
if speaker not in kalbos_visos:
|
107 |
+
kalbos_visos[speaker] = set()
|
108 |
+
kalbos_visos[speaker].add(lang_clean)
|
109 |
+
|
110 |
+
max_lt_seconds = 0
|
111 |
+
kalbetojas_lt = None
|
112 |
+
for speaker, kalbos in skaiciuokle.items():
|
113 |
+
lt_trukme = kalbos.get("lt", 0)
|
114 |
+
if lt_trukme > max_lt_seconds:
|
115 |
+
max_lt_seconds = lt_trukme
|
116 |
+
kalbetojas_lt = speaker
|
117 |
+
|
118 |
+
sorted_speakers = sorted(skaiciuokle.keys())
|
119 |
+
for idx, speaker in enumerate(sorted_speakers):
|
120 |
+
etikete = chr(65 + idx) # A, B, C, ...
|
121 |
+
for sak in transkripcijos[speaker]:
|
122 |
+
if speaker == kalbetojas_lt and "lt" in sak["kalba"]:
|
123 |
+
lietuviskas_tekstas += sak["tekstas"] + " "
|
124 |
+
|
125 |
+
visi_segmentai.append({
|
126 |
+
"kalbetojas": etikete,
|
127 |
+
"modelis": pasirinktas_modelis,
|
128 |
+
"kalba": sak["kalba"],
|
129 |
+
"tekstas": sak["tekstas"],
|
130 |
+
"trukme": sak["trukme"]
|
131 |
+
})
|
132 |
+
|
133 |
+
elapsed_time = round(time.time() - start_time, 2)
|
134 |
+
minutes = int(elapsed_time) // 60
|
135 |
+
seconds = int(elapsed_time) % 60
|
136 |
+
formatted_time = f"{minutes} min. {seconds} sek."
|
137 |
+
|
138 |
+
os.makedirs("rezultatai", exist_ok=True)
|
139 |
+
failas = os.path.join("rezultatai", f"{pasirinktas_modelis.lower()}.json")
|
140 |
+
|
141 |
+
try:
|
142 |
+
with open(failas, "w", encoding="utf-8") as f:
|
143 |
+
json.dump({
|
144 |
+
"modelis": pasirinktas_modelis,
|
145 |
+
"apdorojimo_laikas": elapsed_time,
|
146 |
+
"apdorojimo_laikas_tekstu": formatted_time,
|
147 |
+
"segmentai": visi_segmentai
|
148 |
+
}, f, ensure_ascii=False, indent=2)
|
149 |
+
tekstas.append(f"✅ JSON failas įrašytas: {failas}")
|
150 |
+
except Exception as e:
|
151 |
+
tekstas.append(f"❌ Nepavyko įrašyti JSON: {str(e)}")
|
152 |
+
|
153 |
+
tekstas.append(f"⏱️ Programos vykdymo trukmė: {formatted_time}")
|
154 |
+
return "\n".join(tekstas), lietuviskas_tekstas.strip(), visi_segmentai
|