Spaces:

Nusri7
/

voice_comparison

Sleeping

App Files Files Community

Nusri7 commited on Dec 11, 2024

Commit

fec10d3

1 Parent(s): acbe8cd

Initial commit with FastAPI + Gradio app

Browse files

Files changed (1) hide show

app.py +36 -39

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import torchaudio
 import gradio as gr
 from fastapi import FastAPI, HTTPException, File, UploadFile
 from speechbrain.inference import SpeakerRecognition
 from fastapi.responses import JSONResponse
@@ -11,42 +12,33 @@ speaker_verification = SpeakerRecognition.from_hparams(
     savedir="tmp_model"
 )
-# Temporary folder to save uploaded files
-UPLOAD_FOLDER = "uploaded_audio"
-os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 # Function to calculate similarity score
-def get_similarity(audio_path1: str, audio_path2: str):
     try:
-        # Load audio files
-        signal1, _ = torchaudio.load(audio_path1)
-        signal2, _ = torchaudio.load(audio_path2)
         # Get similarity score and prediction
         score, prediction = speaker_verification.verify_batch(signal1, signal2)
         return float(score), "Yes" if prediction else "No"
     except Exception as e:
         return str(e), None
-    finally:
-        # Clean up temporary files
-        if os.path.exists(audio_path1):
-            os.remove(audio_path1)
-        if os.path.exists(audio_path2):
-            os.remove(audio_path2)
 # API function to compare voices
 def compare_voices(file1, file2):
-    # Save uploaded files temporarily
-    file1_path = os.path.join(UPLOAD_FOLDER, file1.name)
-    file2_path = os.path.join(UPLOAD_FOLDER, file2.name)
-    with open(file1_path, "wb") as f1:
-        f1.write(file1.read())
-    with open(file2_path, "wb") as f2:
-        f2.write(file2.read())
     # Get similarity score
-    score, is_same_user = get_similarity(file1_path, file2_path)
     if is_same_user is None:
         return "Error: " + score  # This will return the error message
@@ -61,30 +53,35 @@ async def compare_voices_api(file1: UploadFile = File(...), file2: UploadFile =
     """
     Compare two audio files and return the similarity score and prediction.
     """
-    # Save uploaded files temporarily
-    file1_path = os.path.join(UPLOAD_FOLDER, file1.filename)
-    file2_path = os.path.join(UPLOAD_FOLDER, file2.filename)
-    with open(file1_path, "wb") as f1:
-        f1.write(await file1.read())
-    with open(file2_path, "wb") as f2:
-        f2.write(await file2.read())
-    # Get similarity score
-    score, is_same_user = get_similarity(file1_path, file2_path)
-    if is_same_user is None:
-        raise HTTPException(status_code=500, detail="Error in processing files: " + score)
-    return JSONResponse(content={"Similarity Score": f"{score:.4f}", "Same User Prediction": is_same_user})
 # Gradio interface function
 def gradio_interface():
     return gr.Interface(
         fn=compare_voices,
         inputs=[
-            gr.Audio(type="numpy", label="First Audio File"),  # Updated to use `type="numpy"`
-            gr.Audio(type="numpy", label="Second Audio File")  # Updated to use `type="numpy"`
         ],
         outputs="json",  # Output results as JSON
         live=False  # No live interface, just the API
@@ -94,8 +91,8 @@ def gradio_interface():
 @app.on_event("startup")
 async def startup():
     gr.Interface(fn=compare_voices, inputs=[
-        gr.Audio(type="numpy", label="First Audio File"),  # Updated to use `type="numpy"`
-        gr.Audio(type="numpy", label="Second Audio File")  # Updated to use `type="numpy"`
     ], outputs="json", live=False).launch(share=True, inline=True)
 # Running the FastAPI app with Gradio

 import os
 import torchaudio
 import gradio as gr
+import torch
 from fastapi import FastAPI, HTTPException, File, UploadFile
 from speechbrain.inference import SpeakerRecognition
 from fastapi.responses import JSONResponse
     savedir="tmp_model"
 )
 # Function to calculate similarity score
+def get_similarity(audio1, audio2, sample_rate=16000):
     try:
+        # Convert numpy arrays to tensors
+        signal1 = torch.tensor(audio1)
+        signal2 = torch.tensor(audio2)
+        # Make sure the signals are in the right shape (2D tensor: (1, N))
+        if signal1.ndimension() == 1:
+            signal1 = signal1.unsqueeze(0)
+        if signal2.ndimension() == 1:
+            signal2 = signal2.unsqueeze(0)
         # Get similarity score and prediction
         score, prediction = speaker_verification.verify_batch(signal1, signal2)
         return float(score), "Yes" if prediction else "No"
     except Exception as e:
         return str(e), None
 # API function to compare voices
 def compare_voices(file1, file2):
+    # Gradio Audio returns a tuple of (audio, sample_rate)
+    audio1, _ = file1  # Audio1 is a tuple (numpy_array, sample_rate)
+    audio2, _ = file2  # Audio2 is a tuple (numpy_array, sample_rate)
     # Get similarity score
+    score, is_same_user = get_similarity(audio1, audio2)
     if is_same_user is None:
         return "Error: " + score  # This will return the error message
     """
     Compare two audio files and return the similarity score and prediction.
     """
+    # Gradio uses numpy arrays directly, so no need to save the files
+    # You'd need to process the audio files here, but in FastAPI you need to convert file to numpy first.
+    try:
+        file1_data = await file1.read()
+        file2_data = await file2.read()
+        # Convert these file data into numpy arrays (this part is pseudo-code as we need to decode the file data)
+        # Typically, you would use a library like torchaudio or librosa to decode the audio from raw file data.
+        # Assuming audio data is in correct format for the speaker model
+        # Example:
+        # numpy1 = torchaudio.load(io.BytesIO(file1_data))[0].numpy()
+        # numpy2 = torchaudio.load(io.BytesIO(file2_data))[0].numpy()
+        # For this example, the audio should be pre-converted to numpy arrays before processing.
+        # Use a conversion library (like torchaudio or librosa) to decode the audio
+        return {"message": "Processing files directly (no save to disk)"}
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
 # Gradio interface function
 def gradio_interface():
     return gr.Interface(
         fn=compare_voices,
         inputs=[
+            gr.Audio(type="numpy", label="First Audio File"),  # Gradio now gives numpy arrays
+            gr.Audio(type="numpy", label="Second Audio File")  # Gradio now gives numpy arrays
         ],
         outputs="json",  # Output results as JSON
         live=False  # No live interface, just the API
 @app.on_event("startup")
 async def startup():
     gr.Interface(fn=compare_voices, inputs=[
+        gr.Audio(type="numpy", label="First Audio File"),  # Gradio now gives numpy arrays
+        gr.Audio(type="numpy", label="Second Audio File")  # Gradio now gives numpy arrays
     ], outputs="json", live=False).launch(share=True, inline=True)
 # Running the FastAPI app with Gradio