from flask import Flask, request, jsonify from faster_whisper import WhisperModel import torch import io import time app = Flask(__name__) # Device check for faster-whisper device = "cuda" if torch.cuda.is_available() else "cpu" compute_type = "float16" if device == "cuda" else "int8" print(f"Using device: {device} with compute_type: {compute_type}") # Faster Whisper setup beamsize = 2 wmodel = WhisperModel("guillaumekln/faster-whisper-small", device=device, compute_type=compute_type) @app.route("/whisper_transcribe", methods=["POST"]) def whisper_transcribe(): if 'audio' not in request.files: return jsonify({'error': 'No file provided'}), 400 audio_file = request.files['audio'] allowed_extensions = {'mp3', 'wav', 'ogg', 'm4a'} if not (audio_file and audio_file.filename.lower().split('.')[-1] in allowed_extensions): return jsonify({'error': 'Invalid file format'}), 400 print(f"Transcribing audio on {device}") audio_bytes = audio_file.read() audio_file = io.BytesIO(audio_bytes) try: segments, info = wmodel.transcribe(audio_file, beam_size=beamsize) text = '' starttime = time.time() for segment in segments: text += segment.text print(f"Time to transcribe: {time.time() - starttime} seconds") return jsonify({'transcription': text}) except Exception as e: print(f"Transcription error: {str(e)}") return jsonify({'error': 'Transcription failed'}), 500 if __name__ == "__main__": app.run(host="0.0.0.0", debug=True, port=7860)