Update app.py
Browse files
app.py
CHANGED
@@ -31,9 +31,6 @@ pipe = pipeline(
|
|
31 |
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe")
|
32 |
|
33 |
# Similarity check to remove repeated phrases
|
34 |
-
def is_similar(a, b, threshold=0.8):
|
35 |
-
return SequenceMatcher(None, a, b).ratio() > threshold
|
36 |
-
|
37 |
def remove_repeated_phrases(text):
|
38 |
sentences = re.split(r'(?<=[。!?])', text)
|
39 |
cleaned_sentences = []
|
@@ -42,23 +39,22 @@ def remove_repeated_phrases(text):
|
|
42 |
cleaned_sentences.append(sentence.strip())
|
43 |
return " ".join(cleaned_sentences)
|
44 |
|
45 |
-
# Remove punctuation
|
46 |
def remove_punctuation(text):
|
47 |
return re.sub(r'[^\w\s]', '', text)
|
48 |
|
49 |
-
# Transcription function (adjusted for punctuation and repetition removal)
|
50 |
def transcribe_audio(audio_path):
|
51 |
waveform, sample_rate = torchaudio.load(audio_path)
|
52 |
|
53 |
-
|
54 |
-
|
|
|
55 |
|
56 |
-
waveform = waveform.squeeze(0).numpy()
|
57 |
|
58 |
duration = waveform.shape[0] / sample_rate
|
59 |
if duration > 60:
|
60 |
-
chunk_size = sample_rate * 55
|
61 |
-
step_size = sample_rate * 50
|
62 |
results = []
|
63 |
|
64 |
for start in range(0, waveform.shape[0], step_size):
|
|
|
31 |
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe")
|
32 |
|
33 |
# Similarity check to remove repeated phrases
|
|
|
|
|
|
|
34 |
def remove_repeated_phrases(text):
|
35 |
sentences = re.split(r'(?<=[。!?])', text)
|
36 |
cleaned_sentences = []
|
|
|
39 |
cleaned_sentences.append(sentence.strip())
|
40 |
return " ".join(cleaned_sentences)
|
41 |
|
|
|
42 |
def remove_punctuation(text):
|
43 |
return re.sub(r'[^\w\s]', '', text)
|
44 |
|
|
|
45 |
def transcribe_audio(audio_path):
|
46 |
waveform, sample_rate = torchaudio.load(audio_path)
|
47 |
|
48 |
+
# Convert stereo to mono (if needed)
|
49 |
+
if waveform.shape[0] > 1: # More than 1 channel
|
50 |
+
waveform = torch.mean(waveform, dim=0, keepdim=True) # Average the channels
|
51 |
|
52 |
+
waveform = waveform.squeeze(0).numpy() # Convert to NumPy (1D array)
|
53 |
|
54 |
duration = waveform.shape[0] / sample_rate
|
55 |
if duration > 60:
|
56 |
+
chunk_size = sample_rate * 55 # 55 seconds
|
57 |
+
step_size = sample_rate * 50 # 50 seconds overlap
|
58 |
results = []
|
59 |
|
60 |
for start in range(0, waveform.shape[0], step_size):
|