hamza2923's picture
Update app.py
27eb3e4 verified
raw
history blame
3.62 kB
from flask import Flask, request, jsonify
from faster_whisper import WhisperModel
import torch
import io
import time
from threading import Lock
from queue import Queue
import datetime
app = Flask(__name__)
# Device check for faster-whisper
device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "float16" if device == "cuda" else "int8"
print(f"Using device: {device} with compute_type: {compute_type}")
# Faster Whisper setup
beamsize = 2
wmodel = WhisperModel("guillaumekln/faster-whisper-small", device=device, compute_type=compute_type)
# Server status tracking
active_requests = 0
request_queue = Queue()
status_lock = Lock()
MAX_CONCURRENT_REQUESTS = 2 # Adjust based on your server capacity
@app.route("/health", methods=["GET"])
def health_check():
"""Endpoint to check if API is running"""
return jsonify({
'status': 'API is running',
'timestamp': datetime.datetime.now().isoformat(),
'device': device,
'compute_type': compute_type
})
@app.route("/status/busy", methods=["GET"])
def server_busy():
"""Endpoint to check if server is busy"""
with status_lock:
is_busy = active_requests >= MAX_CONCURRENT_REQUESTS
return jsonify({
'is_busy': is_busy,
'active_requests': active_requests,
'max_capacity': MAX_CONCURRENT_REQUESTS,
'queue_size': request_queue.qsize()
})
@app.route("/status/queue", methods=["GET"])
def queue_status():
"""Endpoint to get current queue size"""
return jsonify({
'queue_size': request_queue.qsize(),
'active_requests': active_requests
})
@app.route("/whisper_transcribe", methods=["POST"])
def whisper_transcribe():
global active_requests
# Check if server is at capacity
with status_lock:
if active_requests >= MAX_CONCURRENT_REQUESTS:
request_queue.put(datetime.datetime.now())
return jsonify({
'status': 'Server busy',
'message': f'Currently processing {active_requests} requests',
'queue_position': request_queue.qsize()
}), 503
active_requests += 1
try:
if 'audio' not in request.files:
return jsonify({'error': 'No file provided'}), 400
audio_file = request.files['audio']
allowed_extensions = {'mp3', 'wav', 'ogg', 'm4a'}
if not (audio_file and audio_file.filename.lower().split('.')[-1] in allowed_extensions):
return jsonify({'error': 'Invalid file format'}), 400
print(f"Transcribing audio on {device} (Active requests: {active_requests})")
audio_bytes = audio_file.read()
audio_file = io.BytesIO(audio_bytes)
try:
segments, info = wmodel.transcribe(audio_file, beam_size=beamsize)
text = ''
starttime = time.time()
for segment in segments:
text += segment.text
print(f"Time to transcribe: {time.time() - starttime} seconds")
return jsonify({'transcription': text})
except Exception as e:
print(f"Transcription error: {str(e)}")
return jsonify({'error': 'Transcription failed'}), 500
finally:
with status_lock:
active_requests -= 1
# Remove oldest queued request if any
if not request_queue.empty():
try:
request_queue.get_nowait()
except:
pass
if __name__ == "__main__":
app.run(host="0.0.0.0", debug=True, port=7860, threaded=True)