Spaces:

Nusri7
/

voice_comparison

Sleeping

App Files Files Community

voice_comparison / app.py

Nusri7

Initial commit with FastAPI + Gradio app

20acaf7 8 months ago

raw

history blame

4.02 kB

	import os
	import torchaudio
	import gradio as gr
	import torch
	from fastapi import FastAPI, HTTPException, File, UploadFile
	from speechbrain.inference import SpeakerRecognition
	from fastapi.responses import JSONResponse
	import numpy as np

	# Initialize the speaker verification model
	speaker_verification = SpeakerRecognition.from_hparams(
	source="speechbrain/spkrec-ecapa-voxceleb",
	savedir="tmp_model"
	)

	# Function to calculate similarity score
	def get_similarity(audio1, audio2, sample_rate=16000):
	try:
	# Ensure audio1 and audio2 are numpy arrays
	signal1 = torch.tensor(audio1)
	signal2 = torch.tensor(audio2)

	# Make sure the signals are in the right shape (2D tensor: (1, N))
	if signal1.ndimension() == 1:
	signal1 = signal1.unsqueeze(0)
	if signal2.ndimension() == 1:
	signal2 = signal2.unsqueeze(0)

	# Get similarity score and prediction
	score, prediction = speaker_verification.verify_batch(signal1, signal2)
	return float(score), "Yes" if prediction else "No"
	except Exception as e:
	return None, str(e) # Return error message if any exception

	# API function to compare voices
	def compare_voices(file1, file2):
	try:
	# Debugging: Check the types of inputs
	print(f"Received file1: {type(file1)}")
	print(f"Received file2: {type(file2)}")

	# Ensure file1 and file2 are numpy arrays
	if isinstance(file1, np.ndarray) and isinstance(file2, np.ndarray):
	audio1, audio2 = file1, file2
	else:
	return {"error": "Invalid input format. Both inputs must be numpy arrays."}

	# Get similarity score
	score, is_same_user = get_similarity(audio1, audio2)

	if score is None:
	# Return the error message if processing fails
	return {"error": is_same_user}

	# Return a dictionary with the similarity score and prediction
	return {"Similarity Score": f"{score:.4f}", "Same User Prediction": is_same_user}

	except Exception as e:
	# Handle unexpected errors
	return {"error": str(e)}

	# FastAPI app
	app = FastAPI()

	@app.post("/compare_voices/")
	async def compare_voices_api(file1: UploadFile = File(...), file2: UploadFile = File(...)):
	"""
	Compare two audio files and return the similarity score and prediction.
	"""
	try:
	# Process the audio files and return them as numpy arrays
	file1_data = await file1.read()
	file2_data = await file2.read()

	# Assuming the audio is decoded into numpy arrays here (e.g., using torchaudio)
	# For example:
	audio1, _ = torchaudio.load(io.BytesIO(file1_data)) # (Tensor, sample_rate)
	audio2, _ = torchaudio.load(io.BytesIO(file2_data)) # (Tensor, sample_rate)

	audio1 = audio1.numpy()
	audio2 = audio2.numpy()

	# Compare the two audio files and return the result
	return compare_voices(audio1, audio2)

	except Exception as e:
	raise HTTPException(status_code=400, detail=str(e))

	# Gradio interface function
	def gradio_interface():
	return gr.Interface(
	fn=compare_voices,
	inputs=[
	gr.Audio(type="numpy", label="First Audio File"), # Gradio now gives numpy arrays
	gr.Audio(type="numpy", label="Second Audio File") # Gradio now gives numpy arrays
	],
	outputs="json", # Output results as JSON
	live=False # No live interface, just the API
	)

	# Launch Gradio as a web interface
	@app.on_event("startup")
	async def startup():
	gr.Interface(fn=compare_voices, inputs=[
	gr.Audio(type="numpy", label="First Audio File"), # Gradio now gives numpy arrays
	gr.Audio(type="numpy", label="Second Audio File") # Gradio now gives numpy arrays
	], outputs="json", live=False).launch(share=True, inline=True)

	# Running the FastAPI app with Gradio
	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=5000)