Elanas commited on
Commit
9db06b6
·
verified ·
1 Parent(s): 74fd48f

Upload kalbetojai_analize.py

Browse files
Files changed (1) hide show
  1. kalbetojai_analize.py +154 -0
kalbetojai_analize.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torchaudio
3
+ import time
4
+ import json
5
+ import shutil
6
+ from pyannote.audio import Pipeline
7
+ import whisper
8
+ from kalbos_nustatymas import (
9
+ transcribe_text,
10
+ transcribe_text_wav2vec,
11
+ recognize_language
12
+ )
13
+
14
+ def analizuoti_kalbetojus(pasirinktas_modelis="Wav2Vec2", failas="/tmp/ivestis.wav"):
15
+ start_time = time.time()
16
+ tekstas = []
17
+ lietuviskas_tekstas = ""
18
+ visi_segmentai = []
19
+
20
+ tekstas.append("🔁 Įkeliama diarizacijos sistema...")
21
+ diar_pipeline = Pipeline.from_pretrained(
22
+ "pyannote/speaker-diarization",
23
+ use_auth_token=os.getenv("HF_TOKEN")
24
+ )
25
+ tekstas.append("✅ Diarizacijos modelis paruoštas.")
26
+
27
+ tekstas.append(f"🔁 Įkeliamas modelis: {pasirinktas_modelis}...")
28
+ whisper_modelis = None
29
+ if pasirinktas_modelis == "Whisper":
30
+ whisper_modelis = whisper.load_model("medium")
31
+ tekstas.append(f"✅ Modelis {pasirinktas_modelis} paruoštas.")
32
+
33
+ tekstas.append("🧠 Atliekama diarizacija...")
34
+ diar_result = diar_pipeline(failas)
35
+ speaker_segments = list(diar_result.itertracks(yield_label=True))
36
+
37
+ skaiciuokle = {}
38
+ transkripcijos = {}
39
+ kalbos_visos = {}
40
+
41
+ waveform, sample_rate = torchaudio.load(failas)
42
+
43
+ TEMP_FOLDER = "/tmp/temp_segmentai"
44
+ if os.path.exists(TEMP_FOLDER):
45
+ shutil.rmtree(TEMP_FOLDER)
46
+ os.makedirs(TEMP_FOLDER, exist_ok=True)
47
+
48
+ for i, (segment, _, speaker) in enumerate(speaker_segments):
49
+ if (segment.end - segment.start) < 1.0:
50
+ continue
51
+
52
+ output_path = os.path.join(TEMP_FOLDER, f"segment_{i}_{speaker}.wav")
53
+ start_sample = int(segment.start * sample_rate)
54
+ end_sample = int(segment.end * sample_rate)
55
+ torchaudio.save(output_path, waveform[:, start_sample:end_sample], sample_rate)
56
+
57
+ if pasirinktas_modelis == "Whisper":
58
+ try:
59
+ result = whisper_modelis.transcribe(output_path)
60
+ text = result["text"].strip()
61
+ lang = result["language"].strip().lower()
62
+
63
+ if lang == "ru":
64
+ retry = whisper_modelis.transcribe(output_path, language="lt")
65
+ text = retry["text"].strip()
66
+ lang = "lt (forced)"
67
+
68
+ except Exception:
69
+ text = "[KLAIDA Whisper transkripcijoje]"
70
+ lang = "unknown"
71
+
72
+ elif pasirinktas_modelis == "Wav2Vec2":
73
+ try:
74
+ best_text = ""
75
+ best_lang = "unknown"
76
+ longest = 0
77
+ for kalba in ["lt", "en", "de"]:
78
+ try:
79
+ txt = transcribe_text_wav2vec(output_path, kalba=kalba).strip()
80
+ if len(txt) > longest:
81
+ best_text = txt
82
+ best_lang = kalba
83
+ longest = len(txt)
84
+ except:
85
+ continue
86
+ text = best_text
87
+ lang = best_lang
88
+ except Exception:
89
+ text = "[KLAIDA Wav2Vec2 transkripcijoje]"
90
+ lang = "unknown"
91
+
92
+ else:
93
+ text = "[Nežinomas modelis]"
94
+ lang = "unknown"
95
+
96
+ trukme = round(segment.end - segment.start, 2)
97
+ if speaker not in skaiciuokle:
98
+ skaiciuokle[speaker] = {}
99
+ lang_clean = lang.replace(" (forced)", "")
100
+ skaiciuokle[speaker][lang_clean] = skaiciuokle[speaker].get(lang_clean, 0) + trukme
101
+
102
+ if speaker not in transkripcijos:
103
+ transkripcijos[speaker] = []
104
+ transkripcijos[speaker].append({"tekstas": text, "kalba": lang, "trukme": trukme})
105
+
106
+ if speaker not in kalbos_visos:
107
+ kalbos_visos[speaker] = set()
108
+ kalbos_visos[speaker].add(lang_clean)
109
+
110
+ max_lt_seconds = 0
111
+ kalbetojas_lt = None
112
+ for speaker, kalbos in skaiciuokle.items():
113
+ lt_trukme = kalbos.get("lt", 0)
114
+ if lt_trukme > max_lt_seconds:
115
+ max_lt_seconds = lt_trukme
116
+ kalbetojas_lt = speaker
117
+
118
+ sorted_speakers = sorted(skaiciuokle.keys())
119
+ for idx, speaker in enumerate(sorted_speakers):
120
+ etikete = chr(65 + idx) # A, B, C, ...
121
+ for sak in transkripcijos[speaker]:
122
+ if speaker == kalbetojas_lt and "lt" in sak["kalba"]:
123
+ lietuviskas_tekstas += sak["tekstas"] + " "
124
+
125
+ visi_segmentai.append({
126
+ "kalbetojas": etikete,
127
+ "modelis": pasirinktas_modelis,
128
+ "kalba": sak["kalba"],
129
+ "tekstas": sak["tekstas"],
130
+ "trukme": sak["trukme"]
131
+ })
132
+
133
+ elapsed_time = round(time.time() - start_time, 2)
134
+ minutes = int(elapsed_time) // 60
135
+ seconds = int(elapsed_time) % 60
136
+ formatted_time = f"{minutes} min. {seconds} sek."
137
+
138
+ os.makedirs("rezultatai", exist_ok=True)
139
+ failas = os.path.join("rezultatai", f"{pasirinktas_modelis.lower()}.json")
140
+
141
+ try:
142
+ with open(failas, "w", encoding="utf-8") as f:
143
+ json.dump({
144
+ "modelis": pasirinktas_modelis,
145
+ "apdorojimo_laikas": elapsed_time,
146
+ "apdorojimo_laikas_tekstu": formatted_time,
147
+ "segmentai": visi_segmentai
148
+ }, f, ensure_ascii=False, indent=2)
149
+ tekstas.append(f"✅ JSON failas įrašytas: {failas}")
150
+ except Exception as e:
151
+ tekstas.append(f"❌ Nepavyko įrašyti JSON: {str(e)}")
152
+
153
+ tekstas.append(f"⏱️ Programos vykdymo trukmė: {formatted_time}")
154
+ return "\n".join(tekstas), lietuviskas_tekstas.strip(), visi_segmentai