from flask import Flask, request, jsonify, Response # Add Response here from faster_whisper import WhisperModel import torch import io import time import datetime from threading import Semaphore import os from werkzeug.utils import secure_filename import tempfile app = Flask(__name__) # Configuration MAX_CONCURRENT_REQUESTS = 2 # Adjust based on your server capacity MAX_AUDIO_DURATION = 60 * 30 # 30 minutes maximum audio duration (adjust as needed) TEMPORARY_FOLDER = tempfile.gettempdir() ALLOWED_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac'} # Device check for faster-whisper device = "cuda" if torch.cuda.is_available() else "cpu" compute_type = "float16" if device == "cuda" else "int8" print(f"Using device: {device} with compute_type: {compute_type}") # Faster Whisper setup with optimized parameters for long audio beamsize = 2 # Slightly larger beam size can help with long-form accuracy wmodel = WhisperModel( "guillaumekln/faster-whisper-small", device=device, compute_type=compute_type, download_root="./model_cache" # Cache model to avoid re-downloading ) # Concurrency control request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS) active_requests = 0 def allowed_file(filename): return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS def cleanup_temp_files(file_path): """Ensure temporary files are deleted after processing""" try: if os.path.exists(file_path): os.remove(file_path) except Exception as e: print(f"Error cleaning up temp file {file_path}: {str(e)}") @app.route("/health", methods=["GET"]) def health_check(): """Endpoint to check if API is running""" return jsonify({ 'status': 'API is running', 'timestamp': datetime.datetime.now().isoformat(), 'device': device, 'compute_type': compute_type, 'active_requests': active_requests, 'max_duration_supported': MAX_AUDIO_DURATION }) @app.route("/status/busy", methods=["GET"]) def server_busy(): """Endpoint to check if server is busy""" is_busy = active_requests >= MAX_CONCURRENT_REQUESTS return jsonify({ 'is_busy': is_busy, 'active_requests': active_requests, 'max_capacity': MAX_CONCURRENT_REQUESTS }) @app.route("/whisper_transcribe", methods=["POST"]) def whisper_transcribe(): global active_requests if not request_semaphore.acquire(blocking=False): return jsonify({'error': 'Server busy'}), 503 active_requests += 1 start_time = time.time() temp_file_path = None try: if 'audio' not in request.files: return jsonify({'error': 'No file provided'}), 400 audio_file = request.files['audio'] if not (audio_file and allowed_file(audio_file.filename)): return jsonify({'error': 'Invalid file format'}), 400 temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(audio_file.filename)) audio_file.save(temp_file_path) segments, _ = wmodel.transcribe( temp_file_path, beam_size=beamsize, vad_filter=True, without_timestamps=True, # Ensure timestamps are not included compression_ratio_threshold=2.4, word_timestamps=False ) full_text = " ".join(segment.text for segment in segments) return jsonify({'transcription': full_text}), 200 except Exception as e: return jsonify({'error': str(e)}), 500 finally: if temp_file_path: cleanup_temp_files(temp_file_path) active_requests -= 1 request_semaphore.release() print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})") if __name__ == "__main__": # Create temporary folder if it doesn't exist if not os.path.exists(TEMPORARY_FOLDER): os.makedirs(TEMPORARY_FOLDER) app.run(host="0.0.0.0", port=7860, threaded=True)