thecollabagepatch commited on
Commit
6e56362
·
1 Parent(s): 1277288

loudness issues

Browse files
Files changed (1) hide show
  1. app.py +22 -7
app.py CHANGED
@@ -163,7 +163,6 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, output_dur
163
 
164
  # Load original audio as AudioSegment for easier manipulation
165
  original_audio = AudioSegment.from_wav(input_audio_path)
166
- current_audio = original_audio
167
  file_paths_for_cleanup = []
168
 
169
  # Get the last `prompt_duration` seconds as the prompt
@@ -183,10 +182,11 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, output_dur
183
  if len(output.size()) > 2:
184
  output = output.squeeze()
185
 
186
- # Save the generated audio
187
  filename_without_extension = f'continue_extension_{random.randint(1000, 9999)}'
188
  filename_with_extension = f'{filename_without_extension}.wav'
189
- audio_write(filename_without_extension, output, model_continue.sample_rate, strategy="clip")
 
190
 
191
  # Handle the double .wav extension issue
192
  correct_filename = f'{filename_without_extension}.wav.wav'
@@ -197,10 +197,25 @@ def continue_music(input_audio_path, prompt_duration, musicgen_model, output_dur
197
  generated_audio_segment = AudioSegment.from_wav(filename_with_extension)
198
  file_paths_for_cleanup.append(filename_with_extension)
199
 
200
- # Combine original + new audio
201
- prompt_duration_ms = prompt_duration * 1000 # Convert to milliseconds for AudioSegment
202
- original_minus_prompt = current_audio[:-prompt_duration_ms] # Remove last X seconds
203
- combined_audio = original_minus_prompt + generated_audio_segment # Seamless join
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  combined_audio_filename = f"extended_audio_{random.randint(1000, 9999)}.wav"
205
  combined_audio.export(combined_audio_filename, format="wav")
206
 
 
163
 
164
  # Load original audio as AudioSegment for easier manipulation
165
  original_audio = AudioSegment.from_wav(input_audio_path)
 
166
  file_paths_for_cleanup = []
167
 
168
  # Get the last `prompt_duration` seconds as the prompt
 
182
  if len(output.size()) > 2:
183
  output = output.squeeze()
184
 
185
+ # Save the generated audio WITHOUT aggressive loudness processing
186
  filename_without_extension = f'continue_extension_{random.randint(1000, 9999)}'
187
  filename_with_extension = f'{filename_without_extension}.wav'
188
+ audio_write(filename_without_extension, output, model_continue.sample_rate,
189
+ strategy="clip") # Just prevent clipping, no loudness changes
190
 
191
  # Handle the double .wav extension issue
192
  correct_filename = f'{filename_without_extension}.wav.wav'
 
197
  generated_audio_segment = AudioSegment.from_wav(filename_with_extension)
198
  file_paths_for_cleanup.append(filename_with_extension)
199
 
200
+ # VOLUME MATCHING: Apply consistent normalization
201
+
202
+ # 1. Remove prompt duration from original (no overlap)
203
+ prompt_duration_ms = int(prompt_duration * 1000)
204
+ original_minus_prompt = original_audio[:-prompt_duration_ms]
205
+
206
+ # 2. Normalize both segments to same peak level
207
+ target_peak_dbfs = -6.0 # Professional level with headroom
208
+
209
+ # Normalize original segment
210
+ original_normalized = original_minus_prompt.normalize(headroom=abs(target_peak_dbfs))
211
+
212
+ # Normalize generated segment
213
+ generated_normalized = generated_audio_segment.normalize(headroom=abs(target_peak_dbfs))
214
+
215
+ # 3. Combine seamlessly
216
+ combined_audio = original_normalized + generated_normalized
217
+
218
+ # Save final result
219
  combined_audio_filename = f"extended_audio_{random.randint(1000, 9999)}.wav"
220
  combined_audio.export(combined_audio_filename, format="wav")
221