fdaudens HF Staff commited on
Commit
2286fa1
·
verified ·
1 Parent(s): 84a1f8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -22
app.py CHANGED
@@ -29,26 +29,27 @@ def load_model():
29
  return model, processor
30
 
31
  def chunk_audio(audio_file_path, chunk_length=30):
32
- """Split audio into 30s mono chunks at 16kHz"""
33
  try:
 
 
 
 
34
  target_rate = 16000
35
  chunk_length_ms = chunk_length * 1000
36
 
37
- # Load and resample
38
- audio = AudioSegment.from_file(audio_file_path)
39
- audio = audio.set_channels(1).set_frame_rate(target_rate)
40
-
41
- chunks = []
42
  for i in range(0, len(audio), chunk_length_ms):
43
  chunk = audio[i:i + chunk_length_ms]
 
44
 
45
- # Convert to NumPy array
46
- samples = np.array(chunk.get_array_of_samples())
47
- chunks.append(samples)
 
 
 
48
 
49
- print(f"Audio file split into {len(chunks)} chunks of ~{chunk_length}s")
50
- return chunks
51
-
52
  except Exception as e:
53
  raise gr.Error(f"Error processing audio file: {str(e)}. Make sure ffmpeg is installed.")
54
 
@@ -95,7 +96,6 @@ def search_audio(query, embeddings, audios, top_k=5):
95
 
96
  # Score against all embeddings
97
  scores = processor.score_multi_vector(query_embeddings, embeddings)
98
- top_k = min(top_k, scores[0].shape[0])
99
  top_indices = scores[0].topk(top_k).indices.tolist()
100
 
101
  # Move model back to CPU
@@ -115,10 +115,10 @@ def audio_to_base64(data, rate=16000):
115
  def process_audio_rag(audio_file_path, query, chunk_length=30, use_openai=False, openai_key=None):
116
  """Main processing function"""
117
  if not audio_file_path:
118
- return "Please upload an audio file", None
119
 
120
  if not query:
121
- return "Please enter a search query", None
122
 
123
  try:
124
  # Chunk audio
@@ -132,8 +132,7 @@ def process_audio_rag(audio_file_path, query, chunk_length=30, use_openai=False,
132
 
133
  # Prepare results
134
  result_text = f"Found {len(top_indices)} relevant audio chunks:\n"
135
- result_text += f"Chunk indices: {top_indices}\n"
136
- result_text += f"Total chunks in audio: {len(audios)}\n\n"
137
 
138
  # Save first result as audio file
139
  first_chunk_path = "result_chunk.wav"
@@ -141,7 +140,6 @@ def process_audio_rag(audio_file_path, query, chunk_length=30, use_openai=False,
141
 
142
  # Optional: Use OpenAI for answer generation
143
  if use_openai and openai_key:
144
- result_text += "Generating textual answer from retrieved audio chunks...\n\n"
145
  from openai import OpenAI
146
  client = OpenAI(api_key=openai_key)
147
 
@@ -164,14 +162,23 @@ def process_audio_rag(audio_file_path, query, chunk_length=30, use_openai=False,
164
  model="gpt-4o-audio-preview",
165
  messages=[{"role": "user", "content": content}]
166
  )
167
- result_text += f"OpenAI Answer: {completion.choices[0].message.content}"
168
  except Exception as e:
169
- result_text += f"OpenAI Error: {str(e)}"
 
 
 
 
 
 
 
 
 
170
 
171
- return result_text, first_chunk_path
172
 
173
  except Exception as e:
174
- return f"Error: {str(e)}", None
175
 
176
  # Create Gradio interface
177
  with gr.Blocks(title="AudioRAG Demo") as demo:
 
29
  return model, processor
30
 
31
  def chunk_audio(audio_file_path, chunk_length=30):
32
+ """Split audio into chunks"""
33
  try:
34
+ # audio_file_path is already a string path when type="filepath"
35
+ audio = AudioSegment.from_file(audio_file_path)
36
+
37
+ audios = []
38
  target_rate = 16000
39
  chunk_length_ms = chunk_length * 1000
40
 
 
 
 
 
 
41
  for i in range(0, len(audio), chunk_length_ms):
42
  chunk = audio[i:i + chunk_length_ms]
43
+ chunk = chunk.set_channels(1).set_frame_rate(target_rate)
44
 
45
+ buf = io.BytesIO()
46
+ chunk.export(buf, format="wav")
47
+ buf.seek(0)
48
+
49
+ rate, data = wavfile.read(buf)
50
+ audios.append(data)
51
 
52
+ return audios
 
 
53
  except Exception as e:
54
  raise gr.Error(f"Error processing audio file: {str(e)}. Make sure ffmpeg is installed.")
55
 
 
96
 
97
  # Score against all embeddings
98
  scores = processor.score_multi_vector(query_embeddings, embeddings)
 
99
  top_indices = scores[0].topk(top_k).indices.tolist()
100
 
101
  # Move model back to CPU
 
115
  def process_audio_rag(audio_file_path, query, chunk_length=30, use_openai=False, openai_key=None):
116
  """Main processing function"""
117
  if not audio_file_path:
118
+ return "Please upload an audio file", None, None
119
 
120
  if not query:
121
+ return "Please enter a search query", None, None
122
 
123
  try:
124
  # Chunk audio
 
132
 
133
  # Prepare results
134
  result_text = f"Found {len(top_indices)} relevant audio chunks:\n"
135
+ result_text += f"Chunk indices: {top_indices}\n\n"
 
136
 
137
  # Save first result as audio file
138
  first_chunk_path = "result_chunk.wav"
 
140
 
141
  # Optional: Use OpenAI for answer generation
142
  if use_openai and openai_key:
 
143
  from openai import OpenAI
144
  client = OpenAI(api_key=openai_key)
145
 
 
162
  model="gpt-4o-audio-preview",
163
  messages=[{"role": "user", "content": content}]
164
  )
165
+ result_text += f"\nOpenAI Answer: {completion.choices[0].message.content}"
166
  except Exception as e:
167
+ result_text += f"\nOpenAI Error: {str(e)}"
168
+
169
+ # Create audio visualization
170
+ import matplotlib.pyplot as plt
171
+ fig, ax = plt.subplots(figsize=(10, 4))
172
+ ax.plot(audios[top_indices[0]])
173
+ ax.set_title(f"Waveform of top matching chunk (#{top_indices[0]})")
174
+ ax.set_xlabel("Samples")
175
+ ax.set_ylabel("Amplitude")
176
+ plt.tight_layout()
177
 
178
+ return result_text, first_chunk_path, fig
179
 
180
  except Exception as e:
181
+ return f"Error: {str(e)}", None, None
182
 
183
  # Create Gradio interface
184
  with gr.Blocks(title="AudioRAG Demo") as demo: