hamza2923 commited on
Commit
922901f
·
verified ·
1 Parent(s): 27eb3e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -43
app.py CHANGED
@@ -3,9 +3,8 @@ from faster_whisper import WhisperModel
3
  import torch
4
  import io
5
  import time
6
- from threading import Lock
7
- from queue import Queue
8
  import datetime
 
9
 
10
  app = Flask(__name__)
11
 
@@ -18,11 +17,20 @@ print(f"Using device: {device} with compute_type: {compute_type}")
18
  beamsize = 2
19
  wmodel = WhisperModel("guillaumekln/faster-whisper-small", device=device, compute_type=compute_type)
20
 
21
- # Server status tracking
 
 
22
  active_requests = 0
23
- request_queue = Queue()
24
- status_lock = Lock()
25
- MAX_CONCURRENT_REQUESTS = 2 # Adjust based on your server capacity
 
 
 
 
 
 
 
26
 
27
  @app.route("/health", methods=["GET"])
28
  def health_check():
@@ -31,27 +39,18 @@ def health_check():
31
  'status': 'API is running',
32
  'timestamp': datetime.datetime.now().isoformat(),
33
  'device': device,
34
- 'compute_type': compute_type
 
35
  })
36
 
37
  @app.route("/status/busy", methods=["GET"])
38
  def server_busy():
39
  """Endpoint to check if server is busy"""
40
- with status_lock:
41
- is_busy = active_requests >= MAX_CONCURRENT_REQUESTS
42
- return jsonify({
43
- 'is_busy': is_busy,
44
- 'active_requests': active_requests,
45
- 'max_capacity': MAX_CONCURRENT_REQUESTS,
46
- 'queue_size': request_queue.qsize()
47
- })
48
-
49
- @app.route("/status/queue", methods=["GET"])
50
- def queue_status():
51
- """Endpoint to get current queue size"""
52
  return jsonify({
53
- 'queue_size': request_queue.qsize(),
54
- 'active_requests': active_requests
 
55
  })
56
 
57
  @app.route("/whisper_transcribe", methods=["POST"])
@@ -59,16 +58,15 @@ def whisper_transcribe():
59
  global active_requests
60
 
61
  # Check if server is at capacity
62
- with status_lock:
63
- if active_requests >= MAX_CONCURRENT_REQUESTS:
64
- request_queue.put(datetime.datetime.now())
65
- return jsonify({
66
- 'status': 'Server busy',
67
- 'message': f'Currently processing {active_requests} requests',
68
- 'queue_position': request_queue.qsize()
69
- }), 503
70
-
71
- active_requests += 1
72
 
73
  try:
74
  if 'audio' not in request.files:
@@ -79,30 +77,41 @@ def whisper_transcribe():
79
  if not (audio_file and audio_file.filename.lower().split('.')[-1] in allowed_extensions):
80
  return jsonify({'error': 'Invalid file format'}), 400
81
 
82
- print(f"Transcribing audio on {device} (Active requests: {active_requests})")
83
  audio_bytes = audio_file.read()
84
  audio_file = io.BytesIO(audio_bytes)
85
 
86
  try:
 
 
87
  segments, info = wmodel.transcribe(audio_file, beam_size=beamsize)
 
88
  text = ''
89
- starttime = time.time()
90
  for segment in segments:
 
 
91
  text += segment.text
92
- print(f"Time to transcribe: {time.time() - starttime} seconds")
93
- return jsonify({'transcription': text})
 
 
 
 
 
 
 
 
 
 
 
 
94
  except Exception as e:
95
  print(f"Transcription error: {str(e)}")
96
  return jsonify({'error': 'Transcription failed'}), 500
 
97
  finally:
98
- with status_lock:
99
- active_requests -= 1
100
- # Remove oldest queued request if any
101
- if not request_queue.empty():
102
- try:
103
- request_queue.get_nowait()
104
- except:
105
- pass
106
 
107
  if __name__ == "__main__":
108
  app.run(host="0.0.0.0", debug=True, port=7860, threaded=True)
 
3
  import torch
4
  import io
5
  import time
 
 
6
  import datetime
7
+ from threading import Semaphore
8
 
9
  app = Flask(__name__)
10
 
 
17
  beamsize = 2
18
  wmodel = WhisperModel("guillaumekln/faster-whisper-small", device=device, compute_type=compute_type)
19
 
20
+ # Concurrency control
21
+ MAX_CONCURRENT_REQUESTS = 2 # Adjust based on your server capacity
22
+ request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
23
  active_requests = 0
24
+
25
+ # Warm up the model (important for CUDA)
26
+ print("Warming up the model...")
27
+ try:
28
+ dummy_audio = io.BytesIO(b'') # Empty audio for warmup
29
+ segments, info = wmodel.transcribe(dummy_audio, beam_size=beamsize)
30
+ _ = [segment.text for segment in segments] # Force execution
31
+ print("Model warmup complete")
32
+ except Exception as e:
33
+ print(f"Model warmup failed: {str(e)}")
34
 
35
  @app.route("/health", methods=["GET"])
36
  def health_check():
 
39
  'status': 'API is running',
40
  'timestamp': datetime.datetime.now().isoformat(),
41
  'device': device,
42
+ 'compute_type': compute_type,
43
+ 'active_requests': active_requests
44
  })
45
 
46
  @app.route("/status/busy", methods=["GET"])
47
  def server_busy():
48
  """Endpoint to check if server is busy"""
49
+ is_busy = active_requests >= MAX_CONCURRENT_REQUESTS
 
 
 
 
 
 
 
 
 
 
 
50
  return jsonify({
51
+ 'is_busy': is_busy,
52
+ 'active_requests': active_requests,
53
+ 'max_capacity': MAX_CONCURRENT_REQUESTS
54
  })
55
 
56
  @app.route("/whisper_transcribe", methods=["POST"])
 
58
  global active_requests
59
 
60
  # Check if server is at capacity
61
+ if not request_semaphore.acquire(blocking=False):
62
+ return jsonify({
63
+ 'status': 'Server busy',
64
+ 'message': f'Currently processing {active_requests} requests',
65
+ 'suggestion': 'Please try again shortly'
66
+ }), 503
67
+
68
+ active_requests += 1
69
+ print(f"Starting transcription (Active requests: {active_requests})")
 
70
 
71
  try:
72
  if 'audio' not in request.files:
 
77
  if not (audio_file and audio_file.filename.lower().split('.')[-1] in allowed_extensions):
78
  return jsonify({'error': 'Invalid file format'}), 400
79
 
 
80
  audio_bytes = audio_file.read()
81
  audio_file = io.BytesIO(audio_bytes)
82
 
83
  try:
84
+ # Timeout handling (60 seconds max processing time)
85
+ start_time = time.time()
86
  segments, info = wmodel.transcribe(audio_file, beam_size=beamsize)
87
+
88
  text = ''
 
89
  for segment in segments:
90
+ if time.time() - start_time > 60: # Timeout after 60 seconds
91
+ raise TimeoutError("Transcription took too long")
92
  text += segment.text
93
+
94
+ processing_time = time.time() - start_time
95
+ print(f"Transcription completed in {processing_time:.2f} seconds")
96
+
97
+ return jsonify({
98
+ 'transcription': text,
99
+ 'processing_time': processing_time,
100
+ 'language': info.language,
101
+ 'language_probability': info.language_probability
102
+ })
103
+
104
+ except TimeoutError:
105
+ print("Transcription timeout")
106
+ return jsonify({'error': 'Transcription timeout'}), 504
107
  except Exception as e:
108
  print(f"Transcription error: {str(e)}")
109
  return jsonify({'error': 'Transcription failed'}), 500
110
+
111
  finally:
112
+ active_requests -= 1
113
+ request_semaphore.release()
114
+ print(f"Request completed (Active requests: {active_requests})")
 
 
 
 
 
115
 
116
  if __name__ == "__main__":
117
  app.run(host="0.0.0.0", debug=True, port=7860, threaded=True)