Spaces:

Nusri7
/

voice_comparison

Sleeping

App Files Files Community

Nusri7 commited on Dec 11, 2024

Commit

c812287

1 Parent(s): 20acaf7

Initial commit with FastAPI + Gradio app

Browse files

Files changed (1) hide show

app.py +43 -68

app.py CHANGED Viewed

@@ -1,87 +1,62 @@
-import os
-import torchaudio
-import gradio as gr
 import torch
-from fastapi import FastAPI, HTTPException, File, UploadFile
 from speechbrain.inference import SpeakerRecognition
 from fastapi.responses import JSONResponse
-import numpy as np
 # Initialize the speaker verification model
-speaker_verification = SpeakerRecognition.from_hparams(
     source="speechbrain/spkrec-ecapa-voxceleb",
     savedir="tmp_model"
 )
 # Function to calculate similarity score
-def get_similarity(audio1, audio2, sample_rate=16000):
     try:
-        # Ensure audio1 and audio2 are numpy arrays
-        signal1 = torch.tensor(audio1)
-        signal2 = torch.tensor(audio2)
-        # Make sure the signals are in the right shape (2D tensor: (1, N))
-        if signal1.ndimension() == 1:
-            signal1 = signal1.unsqueeze(0)
-        if signal2.ndimension() == 1:
-            signal2 = signal2.unsqueeze(0)
-        # Get similarity score and prediction
-        score, prediction = speaker_verification.verify_batch(signal1, signal2)
-        return float(score), "Yes" if prediction else "No"
     except Exception as e:
-        return None, str(e)  # Return error message if any exception
-# API function to compare voices
-def compare_voices(file1, file2):
-    try:
-        # Debugging: Check the types of inputs
-        print(f"Received file1: {type(file1)}")
-        print(f"Received file2: {type(file2)}")
-        # Ensure file1 and file2 are numpy arrays
-        if isinstance(file1, np.ndarray) and isinstance(file2, np.ndarray):
-            audio1, audio2 = file1, file2
-        else:
-            return {"error": "Invalid input format. Both inputs must be numpy arrays."}
-        # Get similarity score
-        score, is_same_user = get_similarity(audio1, audio2)
-        if score is None:
-            # Return the error message if processing fails
-            return {"error": is_same_user}
-        # Return a dictionary with the similarity score and prediction
-        return {"Similarity Score": f"{score:.4f}", "Same User Prediction": is_same_user}
-    except Exception as e:
-        # Handle unexpected errors
         return {"error": str(e)}
-# FastAPI app
-app = FastAPI()
 @app.post("/compare_voices/")
 async def compare_voices_api(file1: UploadFile = File(...), file2: UploadFile = File(...)):
     """
     Compare two audio files and return the similarity score and prediction.
     """
     try:
-        # Process the audio files and return them as numpy arrays
         file1_data = await file1.read()
         file2_data = await file2.read()
-        # Assuming the audio is decoded into numpy arrays here (e.g., using torchaudio)
-        # For example:
-        audio1, _ = torchaudio.load(io.BytesIO(file1_data))  # (Tensor, sample_rate)
-        audio2, _ = torchaudio.load(io.BytesIO(file2_data))  # (Tensor, sample_rate)
-        audio1 = audio1.numpy()
-        audio2 = audio2.numpy()
-        # Compare the two audio files and return the result
-        return compare_voices(audio1, audio2)
     except Exception as e:
         raise HTTPException(status_code=400, detail=str(e))
@@ -89,21 +64,21 @@ async def compare_voices_api(file1: UploadFile = File(...), file2: UploadFile =
 # Gradio interface function
 def gradio_interface():
     return gr.Interface(
-        fn=compare_voices,
         inputs=[
-            gr.Audio(type="numpy", label="First Audio File"),  # Gradio now gives numpy arrays
-            gr.Audio(type="numpy", label="Second Audio File")  # Gradio now gives numpy arrays
         ],
-        outputs="json",  # Output results as JSON
         live=False  # No live interface, just the API
     )
-# Launch Gradio as a web interface
 @app.on_event("startup")
 async def startup():
-    gr.Interface(fn=compare_voices, inputs=[
-        gr.Audio(type="numpy", label="First Audio File"),  # Gradio now gives numpy arrays
-        gr.Audio(type="numpy", label="Second Audio File")  # Gradio now gives numpy arrays
     ], outputs="json", live=False).launch(share=True, inline=True)
 # Running the FastAPI app with Gradio

+import io
+import tempfile
 import torch
+import gradio as gr
+from fastapi import FastAPI, File, UploadFile, HTTPException
 from speechbrain.inference import SpeakerRecognition
 from fastapi.responses import JSONResponse
 # Initialize the speaker verification model
+verification = SpeakerRecognition.from_hparams(
     source="speechbrain/spkrec-ecapa-voxceleb",
     savedir="tmp_model"
 )
+# FastAPI app
+app = FastAPI()
 # Function to calculate similarity score
+def get_similarity(file1_data, file2_data):
     try:
+        # Create temporary files for the uploaded audio
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile1, \
+             tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile2:
+            # Write audio data to the temporary files
+            tmpfile1.write(file1_data)
+            tmpfile2.write(file2_data)
+            # Get the file paths
+            file1_path = tmpfile1.name
+            file2_path = tmpfile2.name
+        # Use `verify_files` to compare the audio files
+        score, prediction = verification.verify_files(file1_path, file2_path)
+        # Return the result as a dictionary
+        return {
+            "Similarity Score": f"{score:.4f}",
+            "Same User Prediction": "Yes" if prediction else "No"
+        }
     except Exception as e:
         return {"error": str(e)}
+# API function to compare voices
 @app.post("/compare_voices/")
 async def compare_voices_api(file1: UploadFile = File(...), file2: UploadFile = File(...)):
     """
     Compare two audio files and return the similarity score and prediction.
     """
     try:
+        # Read the uploaded file data
         file1_data = await file1.read()
         file2_data = await file2.read()
+        # Call the get_similarity function with file data
+        result = get_similarity(file1_data, file2_data)
+        return result
     except Exception as e:
         raise HTTPException(status_code=400, detail=str(e))
 # Gradio interface function
 def gradio_interface():
     return gr.Interface(
+        fn=compare_voices_api,  # FastAPI function is wrapped here
         inputs=[
+            gr.Audio(type="file", label="First Audio File"),  # Audio file input
+            gr.Audio(type="file", label="Second Audio File")  # Audio file input
         ],
+        outputs="json",  # Output as JSON
         live=False  # No live interface, just the API
     )
+# Launch Gradio interface
 @app.on_event("startup")
 async def startup():
+    gr.Interface(fn=compare_voices_api, inputs=[
+        gr.Audio(type="file", label="First Audio File"),  # Audio file input
+        gr.Audio(type="file", label="Second Audio File")  # Audio file input
     ], outputs="json", live=False).launch(share=True, inline=True)
 # Running the FastAPI app with Gradio