Nusri7's picture
Initial commit with FastAPI + Gradio app
20acaf7
raw
history blame
4.02 kB
import os
import torchaudio
import gradio as gr
import torch
from fastapi import FastAPI, HTTPException, File, UploadFile
from speechbrain.inference import SpeakerRecognition
from fastapi.responses import JSONResponse
import numpy as np
# Initialize the speaker verification model
speaker_verification = SpeakerRecognition.from_hparams(
source="speechbrain/spkrec-ecapa-voxceleb",
savedir="tmp_model"
)
# Function to calculate similarity score
def get_similarity(audio1, audio2, sample_rate=16000):
try:
# Ensure audio1 and audio2 are numpy arrays
signal1 = torch.tensor(audio1)
signal2 = torch.tensor(audio2)
# Make sure the signals are in the right shape (2D tensor: (1, N))
if signal1.ndimension() == 1:
signal1 = signal1.unsqueeze(0)
if signal2.ndimension() == 1:
signal2 = signal2.unsqueeze(0)
# Get similarity score and prediction
score, prediction = speaker_verification.verify_batch(signal1, signal2)
return float(score), "Yes" if prediction else "No"
except Exception as e:
return None, str(e) # Return error message if any exception
# API function to compare voices
def compare_voices(file1, file2):
try:
# Debugging: Check the types of inputs
print(f"Received file1: {type(file1)}")
print(f"Received file2: {type(file2)}")
# Ensure file1 and file2 are numpy arrays
if isinstance(file1, np.ndarray) and isinstance(file2, np.ndarray):
audio1, audio2 = file1, file2
else:
return {"error": "Invalid input format. Both inputs must be numpy arrays."}
# Get similarity score
score, is_same_user = get_similarity(audio1, audio2)
if score is None:
# Return the error message if processing fails
return {"error": is_same_user}
# Return a dictionary with the similarity score and prediction
return {"Similarity Score": f"{score:.4f}", "Same User Prediction": is_same_user}
except Exception as e:
# Handle unexpected errors
return {"error": str(e)}
# FastAPI app
app = FastAPI()
@app.post("/compare_voices/")
async def compare_voices_api(file1: UploadFile = File(...), file2: UploadFile = File(...)):
"""
Compare two audio files and return the similarity score and prediction.
"""
try:
# Process the audio files and return them as numpy arrays
file1_data = await file1.read()
file2_data = await file2.read()
# Assuming the audio is decoded into numpy arrays here (e.g., using torchaudio)
# For example:
audio1, _ = torchaudio.load(io.BytesIO(file1_data)) # (Tensor, sample_rate)
audio2, _ = torchaudio.load(io.BytesIO(file2_data)) # (Tensor, sample_rate)
audio1 = audio1.numpy()
audio2 = audio2.numpy()
# Compare the two audio files and return the result
return compare_voices(audio1, audio2)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
# Gradio interface function
def gradio_interface():
return gr.Interface(
fn=compare_voices,
inputs=[
gr.Audio(type="numpy", label="First Audio File"), # Gradio now gives numpy arrays
gr.Audio(type="numpy", label="Second Audio File") # Gradio now gives numpy arrays
],
outputs="json", # Output results as JSON
live=False # No live interface, just the API
)
# Launch Gradio as a web interface
@app.on_event("startup")
async def startup():
gr.Interface(fn=compare_voices, inputs=[
gr.Audio(type="numpy", label="First Audio File"), # Gradio now gives numpy arrays
gr.Audio(type="numpy", label="Second Audio File") # Gradio now gives numpy arrays
], outputs="json", live=False).launch(share=True, inline=True)
# Running the FastAPI app with Gradio
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=5000)