hamza2923 commited on
Commit
b00efa4
·
verified ·
1 Parent(s): 7323fd3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -72
app.py CHANGED
@@ -23,7 +23,7 @@ compute_type = "float16" if device == "cuda" else "int8"
23
  print(f"Using device: {device} with compute_type: {compute_type}")
24
 
25
  # Faster Whisper setup with optimized parameters for long audio
26
- beamsize = 5 # Slightly larger beam size can help with long-form accuracy
27
  wmodel = WhisperModel(
28
  "guillaumekln/faster-whisper-small",
29
  device=device,
@@ -74,97 +74,49 @@ def whisper_transcribe():
74
  global active_requests
75
 
76
  if not request_semaphore.acquire(blocking=False):
77
- return jsonify({
78
- 'status': 'Server busy',
79
- 'message': f'Currently processing {active_requests} requests',
80
- 'suggestion': 'Please try again shortly'
81
- }), 503
82
 
83
  active_requests += 1
84
- print(f"Starting transcription (Active requests: {active_requests})")
85
-
86
  temp_file_path = None
87
 
88
  try:
89
  if 'audio' not in request.files:
90
  return jsonify({'error': 'No file provided'}), 400
91
-
92
  audio_file = request.files['audio']
93
  if not (audio_file and allowed_file(audio_file.filename)):
94
  return jsonify({'error': 'Invalid file format'}), 400
95
 
96
- # Save to temporary file for large audio processing
97
  temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(audio_file.filename))
98
  audio_file.save(temp_file_path)
99
 
100
- # Get processing parameters from request
101
- language = request.form.get('language', None)
102
- task = request.form.get('task', 'transcribe') # 'transcribe' or 'translate'
103
- vad_filter = request.form.get('vad_filter', 'true').lower() == 'true'
104
- word_timestamps = request.form.get('word_timestamps', 'false').lower() == 'true'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- try:
107
- start_time = time.time()
108
-
109
- # Process in chunks with VAD for long audio
110
- segments, info = wmodel.transcribe(
111
- temp_file_path,
112
- beam_size=beamsize,
113
- language=language,
114
- task=task,
115
- vad_filter=vad_filter,
116
- word_timestamps=word_timestamps,
117
- chunk_length=30 # Process in 30-second chunks
118
- )
119
-
120
- # Stream results as they become available
121
- results = []
122
- for segment in segments:
123
- if time.time() - start_time > MAX_AUDIO_DURATION:
124
- raise TimeoutError(f"Transcription exceeded maximum allowed duration of {MAX_AUDIO_DURATION} seconds")
125
-
126
- result = {
127
- 'text': segment.text,
128
- 'start': segment.start,
129
- 'end': segment.end
130
- }
131
-
132
- if word_timestamps and segment.words:
133
- result['words'] = [{
134
- 'word': word.word,
135
- 'start': word.start,
136
- 'end': word.end,
137
- 'probability': word.probability
138
- } for word in segment.words]
139
-
140
- results.append(result)
141
-
142
- processing_time = time.time() - start_time
143
- print(f"Transcription completed in {processing_time:.2f} seconds")
144
-
145
- return jsonify({
146
- 'segments': results,
147
- 'summary': {
148
- 'processing_time': processing_time,
149
- 'language': info.language,
150
- 'language_probability': info.language_probability,
151
- 'duration': sum(seg.end - seg.start for seg in results if hasattr(seg, 'end'))
152
- }
153
- })
154
-
155
- except TimeoutError as te:
156
- print(f"Transcription timeout: {str(te)}")
157
- return jsonify({'error': str(te)}), 504
158
- except Exception as e:
159
- print(f"Transcription error: {str(e)}")
160
- return jsonify({'error': 'Transcription failed', 'details': str(e)}), 500
161
-
162
  finally:
163
  if temp_file_path:
164
  cleanup_temp_files(temp_file_path)
165
  active_requests -= 1
166
  request_semaphore.release()
167
- print(f"Request completed (Active requests: {active_requests})")
168
 
169
  if __name__ == "__main__":
170
  # Create temporary folder if it doesn't exist
 
23
  print(f"Using device: {device} with compute_type: {compute_type}")
24
 
25
  # Faster Whisper setup with optimized parameters for long audio
26
+ beamsize = 2 # Slightly larger beam size can help with long-form accuracy
27
  wmodel = WhisperModel(
28
  "guillaumekln/faster-whisper-small",
29
  device=device,
 
74
  global active_requests
75
 
76
  if not request_semaphore.acquire(blocking=False):
77
+ return jsonify({'error': 'Server busy'}), 503
 
 
 
 
78
 
79
  active_requests += 1
80
+ start_time = time.time()
 
81
  temp_file_path = None
82
 
83
  try:
84
  if 'audio' not in request.files:
85
  return jsonify({'error': 'No file provided'}), 400
86
+
87
  audio_file = request.files['audio']
88
  if not (audio_file and allowed_file(audio_file.filename)):
89
  return jsonify({'error': 'Invalid file format'}), 400
90
 
 
91
  temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(audio_file.filename))
92
  audio_file.save(temp_file_path)
93
 
94
+ segments, _ = wmodel.transcribe(
95
+ temp_file_path,
96
+ beam_size=beamsize,
97
+ vad_filter=True,
98
+ without_timestamps=True, # Ensure timestamps are not included
99
+ compression_ratio_threshold=2.4,
100
+ word_timestamps=False
101
+ )
102
+
103
+ full_text = " ".join(segment.text for segment in segments)
104
+
105
+ return Response(
106
+ response=full_text,
107
+ status=200,
108
+ mimetype='text/plain'
109
+ )
110
+
111
+ except Exception as e:
112
+ return jsonify({'error': str(e)}), 500
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  finally:
115
  if temp_file_path:
116
  cleanup_temp_files(temp_file_path)
117
  active_requests -= 1
118
  request_semaphore.release()
119
+ print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})")
120
 
121
  if __name__ == "__main__":
122
  # Create temporary folder if it doesn't exist