thecollabagepatch commited on
Commit
0e896d0
·
1 Parent(s): 6e56362

loudness issues

Browse files
Files changed (1) hide show
  1. app.py +26 -15
app.py CHANGED
@@ -197,23 +197,34 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, output_dur
197
  generated_audio_segment = AudioSegment.from_wav(filename_with_extension)
198
  file_paths_for_cleanup.append(filename_with_extension)
199
 
200
- # VOLUME MATCHING: Apply consistent normalization
201
-
202
- # 1. Remove prompt duration from original (no overlap)
203
  prompt_duration_ms = int(prompt_duration * 1000)
204
  original_minus_prompt = original_audio[:-prompt_duration_ms]
205
-
206
- # 2. Normalize both segments to same peak level
207
- target_peak_dbfs = -6.0 # Professional level with headroom
208
-
209
- # Normalize original segment
210
- original_normalized = original_minus_prompt.normalize(headroom=abs(target_peak_dbfs))
211
-
212
- # Normalize generated segment
213
- generated_normalized = generated_audio_segment.normalize(headroom=abs(target_peak_dbfs))
214
-
215
- # 3. Combine seamlessly
216
- combined_audio = original_normalized + generated_normalized
 
 
 
 
 
 
 
 
 
 
 
217
 
218
  # Save final result
219
  combined_audio_filename = f"extended_audio_{random.randint(1000, 9999)}.wav"
 
197
  generated_audio_segment = AudioSegment.from_wav(filename_with_extension)
198
  file_paths_for_cleanup.append(filename_with_extension)
199
 
200
+ # VOLUME MATCHING: Use RMS instead of peak normalization
201
+
202
+ # 1. Remove prompt duration from original (no overlap)
203
  prompt_duration_ms = int(prompt_duration * 1000)
204
  original_minus_prompt = original_audio[:-prompt_duration_ms]
205
+
206
+ # 2. Calculate RMS levels for perceived loudness
207
+ original_rms = original_minus_prompt.rms
208
+ generated_rms = generated_audio_segment.rms
209
+
210
+ print(f"🔊 Volume analysis:")
211
+ print(f" Original RMS: {original_rms}")
212
+ print(f" Generated RMS: {generated_rms}")
213
+
214
+ # 3. Match generated segment to original's RMS level
215
+ if generated_rms > 0: # Avoid division by zero
216
+ # Calculate dB adjustment needed
217
+ from pydub.utils import ratio_to_db
218
+ volume_adjustment = ratio_to_db(original_rms / generated_rms)
219
+ print(f" Applying {volume_adjustment:.1f}dB to generated audio")
220
+
221
+ # Apply volume adjustment
222
+ generated_matched = generated_audio_segment + volume_adjustment
223
+ else:
224
+ generated_matched = generated_audio_segment
225
+
226
+ # 4. Combine seamlessly
227
+ combined_audio = original_minus_prompt + generated_matched
228
 
229
  # Save final result
230
  combined_audio_filename = f"extended_audio_{random.randint(1000, 9999)}.wav"