import os
import torchaudio
import gradio as gr
import torch
from fastapi import FastAPI, HTTPException, File, UploadFile
from speechbrain.inference import SpeakerRecognition
from fastapi.responses import JSONResponse
import numpy as np

# Initialize the speaker verification model
speaker_verification = SpeakerRecognition.from_hparams(
    source="speechbrain/spkrec-ecapa-voxceleb",
    savedir="tmp_model"
)

# Function to calculate similarity score
def get_similarity(audio1, audio2, sample_rate=16000):
    try:
        # Ensure audio1 and audio2 are numpy arrays
        signal1 = torch.tensor(audio1)
        signal2 = torch.tensor(audio2)

        # Make sure the signals are in the right shape (2D tensor: (1, N))
        if signal1.ndimension() == 1:
            signal1 = signal1.unsqueeze(0)
        if signal2.ndimension() == 1:
            signal2 = signal2.unsqueeze(0)

        # Get similarity score and prediction
        score, prediction = speaker_verification.verify_batch(signal1, signal2)
        return float(score), "Yes" if prediction else "No"
    except Exception as e:
        return None, str(e)  # Return error message if any exception

# API function to compare voices
def compare_voices(file1, file2):
    try:
        # Debugging: Check the types of inputs
        print(f"Received file1: {type(file1)}")
        print(f"Received file2: {type(file2)}")

        # Ensure file1 and file2 are numpy arrays
        if isinstance(file1, np.ndarray) and isinstance(file2, np.ndarray):
            audio1, audio2 = file1, file2
        else:
            return {"error": "Invalid input format. Both inputs must be numpy arrays."}

        # Get similarity score
        score, is_same_user = get_similarity(audio1, audio2)

        if score is None:
            # Return the error message if processing fails
            return {"error": is_same_user}

        # Return a dictionary with the similarity score and prediction
        return {"Similarity Score": f"{score:.4f}", "Same User Prediction": is_same_user}

    except Exception as e:
        # Handle unexpected errors
        return {"error": str(e)}

# FastAPI app
app = FastAPI()

@app.post("/compare_voices/")
async def compare_voices_api(file1: UploadFile = File(...), file2: UploadFile = File(...)):
    """
    Compare two audio files and return the similarity score and prediction.
    """
    try:
        # Process the audio files and return them as numpy arrays
        file1_data = await file1.read()
        file2_data = await file2.read()

        # Assuming the audio is decoded into numpy arrays here (e.g., using torchaudio)
        # For example:
        audio1, _ = torchaudio.load(io.BytesIO(file1_data))  # (Tensor, sample_rate)
        audio2, _ = torchaudio.load(io.BytesIO(file2_data))  # (Tensor, sample_rate)

        audio1 = audio1.numpy()
        audio2 = audio2.numpy()

        # Compare the two audio files and return the result
        return compare_voices(audio1, audio2)

    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

# Gradio interface function
def gradio_interface():
    return gr.Interface(
        fn=compare_voices,
        inputs=[
            gr.Audio(type="numpy", label="First Audio File"),  # Gradio now gives numpy arrays
            gr.Audio(type="numpy", label="Second Audio File")  # Gradio now gives numpy arrays
        ],
        outputs="json",  # Output results as JSON
        live=False  # No live interface, just the API
    )

# Launch Gradio as a web interface
@app.on_event("startup")
async def startup():
    gr.Interface(fn=compare_voices, inputs=[
        gr.Audio(type="numpy", label="First Audio File"),  # Gradio now gives numpy arrays
        gr.Audio(type="numpy", label="Second Audio File")  # Gradio now gives numpy arrays
    ], outputs="json", live=False).launch(share=True, inline=True)

# Running the FastAPI app with Gradio
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=5000)