Nusri7 commited on
Commit
fec10d3
·
1 Parent(s): acbe8cd

Initial commit with FastAPI + Gradio app

Browse files
Files changed (1) hide show
  1. app.py +36 -39
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import torchaudio
3
  import gradio as gr
 
4
  from fastapi import FastAPI, HTTPException, File, UploadFile
5
  from speechbrain.inference import SpeakerRecognition
6
  from fastapi.responses import JSONResponse
@@ -11,42 +12,33 @@ speaker_verification = SpeakerRecognition.from_hparams(
11
  savedir="tmp_model"
12
  )
13
 
14
- # Temporary folder to save uploaded files
15
- UPLOAD_FOLDER = "uploaded_audio"
16
- os.makedirs(UPLOAD_FOLDER, exist_ok=True)
17
-
18
  # Function to calculate similarity score
19
- def get_similarity(audio_path1: str, audio_path2: str):
20
  try:
21
- # Load audio files
22
- signal1, _ = torchaudio.load(audio_path1)
23
- signal2, _ = torchaudio.load(audio_path2)
 
 
 
 
 
 
24
 
25
  # Get similarity score and prediction
26
  score, prediction = speaker_verification.verify_batch(signal1, signal2)
27
  return float(score), "Yes" if prediction else "No"
28
  except Exception as e:
29
  return str(e), None
30
- finally:
31
- # Clean up temporary files
32
- if os.path.exists(audio_path1):
33
- os.remove(audio_path1)
34
- if os.path.exists(audio_path2):
35
- os.remove(audio_path2)
36
 
37
  # API function to compare voices
38
  def compare_voices(file1, file2):
39
- # Save uploaded files temporarily
40
- file1_path = os.path.join(UPLOAD_FOLDER, file1.name)
41
- file2_path = os.path.join(UPLOAD_FOLDER, file2.name)
42
-
43
- with open(file1_path, "wb") as f1:
44
- f1.write(file1.read())
45
- with open(file2_path, "wb") as f2:
46
- f2.write(file2.read())
47
 
48
  # Get similarity score
49
- score, is_same_user = get_similarity(file1_path, file2_path)
50
 
51
  if is_same_user is None:
52
  return "Error: " + score # This will return the error message
@@ -61,30 +53,35 @@ async def compare_voices_api(file1: UploadFile = File(...), file2: UploadFile =
61
  """
62
  Compare two audio files and return the similarity score and prediction.
63
  """
64
- # Save uploaded files temporarily
65
- file1_path = os.path.join(UPLOAD_FOLDER, file1.filename)
66
- file2_path = os.path.join(UPLOAD_FOLDER, file2.filename)
 
 
67
 
68
- with open(file1_path, "wb") as f1:
69
- f1.write(await file1.read())
70
- with open(file2_path, "wb") as f2:
71
- f2.write(await file2.read())
 
 
 
72
 
73
- # Get similarity score
74
- score, is_same_user = get_similarity(file1_path, file2_path)
75
 
76
- if is_same_user is None:
77
- raise HTTPException(status_code=500, detail="Error in processing files: " + score)
78
 
79
- return JSONResponse(content={"Similarity Score": f"{score:.4f}", "Same User Prediction": is_same_user})
 
80
 
81
  # Gradio interface function
82
  def gradio_interface():
83
  return gr.Interface(
84
  fn=compare_voices,
85
  inputs=[
86
- gr.Audio(type="numpy", label="First Audio File"), # Updated to use `type="numpy"`
87
- gr.Audio(type="numpy", label="Second Audio File") # Updated to use `type="numpy"`
88
  ],
89
  outputs="json", # Output results as JSON
90
  live=False # No live interface, just the API
@@ -94,8 +91,8 @@ def gradio_interface():
94
  @app.on_event("startup")
95
  async def startup():
96
  gr.Interface(fn=compare_voices, inputs=[
97
- gr.Audio(type="numpy", label="First Audio File"), # Updated to use `type="numpy"`
98
- gr.Audio(type="numpy", label="Second Audio File") # Updated to use `type="numpy"`
99
  ], outputs="json", live=False).launch(share=True, inline=True)
100
 
101
  # Running the FastAPI app with Gradio
 
1
  import os
2
  import torchaudio
3
  import gradio as gr
4
+ import torch
5
  from fastapi import FastAPI, HTTPException, File, UploadFile
6
  from speechbrain.inference import SpeakerRecognition
7
  from fastapi.responses import JSONResponse
 
12
  savedir="tmp_model"
13
  )
14
 
 
 
 
 
15
  # Function to calculate similarity score
16
+ def get_similarity(audio1, audio2, sample_rate=16000):
17
  try:
18
+ # Convert numpy arrays to tensors
19
+ signal1 = torch.tensor(audio1)
20
+ signal2 = torch.tensor(audio2)
21
+
22
+ # Make sure the signals are in the right shape (2D tensor: (1, N))
23
+ if signal1.ndimension() == 1:
24
+ signal1 = signal1.unsqueeze(0)
25
+ if signal2.ndimension() == 1:
26
+ signal2 = signal2.unsqueeze(0)
27
 
28
  # Get similarity score and prediction
29
  score, prediction = speaker_verification.verify_batch(signal1, signal2)
30
  return float(score), "Yes" if prediction else "No"
31
  except Exception as e:
32
  return str(e), None
 
 
 
 
 
 
33
 
34
  # API function to compare voices
35
  def compare_voices(file1, file2):
36
+ # Gradio Audio returns a tuple of (audio, sample_rate)
37
+ audio1, _ = file1 # Audio1 is a tuple (numpy_array, sample_rate)
38
+ audio2, _ = file2 # Audio2 is a tuple (numpy_array, sample_rate)
 
 
 
 
 
39
 
40
  # Get similarity score
41
+ score, is_same_user = get_similarity(audio1, audio2)
42
 
43
  if is_same_user is None:
44
  return "Error: " + score # This will return the error message
 
53
  """
54
  Compare two audio files and return the similarity score and prediction.
55
  """
56
+ # Gradio uses numpy arrays directly, so no need to save the files
57
+ # You'd need to process the audio files here, but in FastAPI you need to convert file to numpy first.
58
+ try:
59
+ file1_data = await file1.read()
60
+ file2_data = await file2.read()
61
 
62
+ # Convert these file data into numpy arrays (this part is pseudo-code as we need to decode the file data)
63
+ # Typically, you would use a library like torchaudio or librosa to decode the audio from raw file data.
64
+
65
+ # Assuming audio data is in correct format for the speaker model
66
+ # Example:
67
+ # numpy1 = torchaudio.load(io.BytesIO(file1_data))[0].numpy()
68
+ # numpy2 = torchaudio.load(io.BytesIO(file2_data))[0].numpy()
69
 
70
+ # For this example, the audio should be pre-converted to numpy arrays before processing.
 
71
 
72
+ # Use a conversion library (like torchaudio or librosa) to decode the audio
73
+ return {"message": "Processing files directly (no save to disk)"}
74
 
75
+ except Exception as e:
76
+ raise HTTPException(status_code=400, detail=str(e))
77
 
78
  # Gradio interface function
79
  def gradio_interface():
80
  return gr.Interface(
81
  fn=compare_voices,
82
  inputs=[
83
+ gr.Audio(type="numpy", label="First Audio File"), # Gradio now gives numpy arrays
84
+ gr.Audio(type="numpy", label="Second Audio File") # Gradio now gives numpy arrays
85
  ],
86
  outputs="json", # Output results as JSON
87
  live=False # No live interface, just the API
 
91
  @app.on_event("startup")
92
  async def startup():
93
  gr.Interface(fn=compare_voices, inputs=[
94
+ gr.Audio(type="numpy", label="First Audio File"), # Gradio now gives numpy arrays
95
+ gr.Audio(type="numpy", label="Second Audio File") # Gradio now gives numpy arrays
96
  ], outputs="json", live=False).launch(share=True, inline=True)
97
 
98
  # Running the FastAPI app with Gradio