Surn commited on
Commit
f476be0
·
1 Parent(s): ac77b7e

Variable Segment Length

Browse files
Files changed (1) hide show
  1. app.py +42 -39
app.py CHANGED
@@ -44,11 +44,10 @@ MAX_PROMPT_INDEX = 0
44
  git = os.environ.get('GIT', "git")
45
  #s.environ["CUDA_LAUNCH_BLOCKING"] = "1"
46
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True"
47
- # os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
48
- # os.environ['CUDA_MODULE_LOADING']='LAZY'
49
- # os.environ['USE_FLASH_ATTENTION'] = '1'
50
- # os.environ['XFORMERS_FORCE_DISABLE_TRITON']= '1'
51
-
52
 
53
  def interrupt_callback():
54
  return INTERRUPTED
@@ -134,7 +133,7 @@ def git_tag():
134
  except Exception:
135
  return "<none>"
136
 
137
- def load_melody_filepath(melody_filepath, title, assigned_model,topp, temperature, cfg_coef):
138
  # get melody filename
139
  #$Union[str, os.PathLike]
140
  symbols = ['_', '.', '-']
@@ -161,14 +160,14 @@ def load_melody_filepath(melody_filepath, title, assigned_model,topp, temperatur
161
  # get melody length in number of segments and modify the UI
162
  melody = get_melody(melody_filepath)
163
  sr, melody_data = melody[0], melody[1]
164
- segment_samples = sr * 30
165
  total_melodys = max(min((len(melody_data) // segment_samples), 25), 0)
166
  print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
167
  MAX_PROMPT_INDEX = total_melodys
168
 
169
  return gr.update(value=melody_name), gr.update(maximum=MAX_PROMPT_INDEX, value=0), gr.update(value=assigned_model, interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef)
170
 
171
- def predict(model, text, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap=1, prompt_index = 0, include_title = True, include_settings = True, harmony_only = False, profile = gr.OAuthProfile, progress=gr.Progress(track_tqdm=True)):
172
  global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
173
  output_segments = None
174
  melody_name = "Not Used"
@@ -219,6 +218,8 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
219
  segment_duration = duration + overlap
220
  else:
221
  segment_duration = MODEL.lm.cfg.dataset.segment_duration
 
 
222
  # implement seed
223
  if seed < 0:
224
  seed = random.randint(0, 0xffff_ffff_ffff)
@@ -243,7 +244,7 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
243
  if melody and ("melody" in model):
244
  # return excess duration, load next model and continue in loop structure building up output_segments
245
  if duration > MODEL.lm.cfg.dataset.segment_duration:
246
- output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index, harmony_only=False, progress=gr.Progress(track_tqdm=True))
247
  else:
248
  # pure original code
249
  sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
@@ -449,10 +450,12 @@ def ui(**kwargs):
449
  with gr.Row():
450
  with gr.Column():
451
  with gr.Row():
452
- text = gr.Text(label="Describe your music", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out")
453
  with gr.Column():
454
- duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration (s)", interactive=True)
455
- model = gr.Radio(["melody", "medium", "small", "large", "melody-large", "stereo-small", "stereo-medium", "stereo-large", "stereo-melody", "stereo-melody-large"], label="AI Model", value="medium", interactive=True)
 
 
 
456
  with gr.Row():
457
  submit = gr.Button("Generate", elem_id="btn-generate")
458
  # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
@@ -460,42 +463,44 @@ def ui(**kwargs):
460
  with gr.Row():
461
  with gr.Column():
462
  radio = gr.Radio(["file", "mic"], value="file", label="Condition on a melody (optional) File or Mic")
463
- melody_filepath = gr.Audio(sources=["upload"], type="filepath", label="Melody Condition (optional)", interactive=True, elem_id="melody-input")
464
  with gr.Column():
465
- harmony_only = gr.Radio(label="Use Harmony Only",choices=["No", "Yes"], value="No", interactive=True, info="Remove Drums?")
466
- prompt_index = gr.Slider(label="Melody Condition Sample Segment", minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=0, interactive=True, info="Which 30 second segment to condition with, - 1 condition each segment independantly")
467
  with gr.Accordion("Video", open=False):
468
  with gr.Row():
469
- background= gr.Image(value="./assets/background.png", sources=["upload"], label="Background", width=768, height=512, type="filepath", interactive=True)
470
  with gr.Column():
471
- include_title = gr.Checkbox(label="Add Title", value=True, interactive=True)
472
- include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
473
  with gr.Row():
474
- title = gr.Textbox(label="Title", value="UnlimitedMusicGen", interactive=True)
475
  settings_font = gr.Text(label="Settings Font", value="./assets/arial.ttf", interactive=True)
476
- settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#c87f05", interactive=True)
477
  with gr.Accordion("Expert", open=False):
478
  with gr.Row():
479
- overlap = gr.Slider(minimum=0, maximum=15, value=2, step=1, label="Verse Overlap", interactive=True)
 
480
  dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
481
  with gr.Row():
482
- topk = gr.Number(label="Top-k", value=280, precision=0, interactive=True)
483
- topp = gr.Number(label="Top-p", value=1150, precision=0, interactive=True, info="overwrites Top-k if not zero")
484
- temperature = gr.Number(label="Randomness Temperature", value=0.7, precision=None, interactive=True)
485
- cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.5, precision=None, interactive=True)
486
  with gr.Row():
487
- seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True)
488
  gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(fn=lambda: -1, outputs=[seed], queue=False)
489
  reuse_seed = gr.Button('\u267b\ufe0f', elem_classes="small-btn")
490
  with gr.Column() as c:
491
- output = gr.Video(label="Generated Music")
492
  wave_file = gr.File(label=".wav file", elem_id="output_wavefile", interactive=True)
493
  seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
494
 
495
  radio.change(toggle_audio_src, radio, [melody_filepath], queue=False, show_progress=False)
496
- melody_filepath.change(load_melody_filepath, inputs=[melody_filepath, title, model,topp, temperature, cfg_coef], outputs=[title, prompt_index , model, topp, temperature, cfg_coef], api_name="melody_filepath_change", queue=False)
497
- reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False, api_name="reuse_seed")
498
-
 
499
  gr.Examples(
500
  examples=[
501
  [
@@ -505,7 +510,7 @@ def ui(**kwargs):
505
  "80s Pop Synth",
506
  950,
507
  0.6,
508
- 3.0
509
  ],
510
  [
511
  "4/4 120bpm 320kbps 48khz, A cheerful country song with acoustic guitars",
@@ -514,7 +519,7 @@ def ui(**kwargs):
514
  "Country Guitar",
515
  750,
516
  0.7,
517
- 3.75
518
  ],
519
  [
520
  "4/4 120bpm 320kbps 48khz, 90s rock song with electric guitar and heavy drums",
@@ -523,7 +528,7 @@ def ui(**kwargs):
523
  "90s Rock Guitar",
524
  1150,
525
  0.7,
526
- 3.5
527
  ],
528
  [
529
  "4/4 120bpm 320kbps 48khz, a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
@@ -532,7 +537,7 @@ def ui(**kwargs):
532
  "EDM my Bach",
533
  500,
534
  0.7,
535
- 3.5
536
  ],
537
  [
538
  "4/4 320kbps 48khz, lofi slow bpm electro chill with organic samples",
@@ -563,8 +568,8 @@ def ui(**kwargs):
563
  api_name="submit"
564
  ).then(
565
  predict,
566
- inputs=[model, text,melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap, prompt_index, include_title, include_settings, harmony_only, user_profile],
567
- outputs=[output, wave_file, seed_used])
568
 
569
  # Show the interface
570
  launch_kwargs = {}
@@ -578,11 +583,9 @@ def ui(**kwargs):
578
  launch_kwargs['server_port'] = server_port
579
  if share:
580
  launch_kwargs['share'] = share
581
- launch_kwargs['favicon_path']= "./assets/favicon.ico"
582
-
583
 
584
 
585
- demo.queue(max_size=10, api_open=False).launch(**launch_kwargs)
586
 
587
  if __name__ == "__main__":
588
  parser = argparse.ArgumentParser()
 
44
  git = os.environ.get('GIT', "git")
45
  #s.environ["CUDA_LAUNCH_BLOCKING"] = "1"
46
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True"
47
+ os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
48
+ os.environ['CUDA_MODULE_LOADING']='LAZY'
49
+ os.environ['USE_FLASH_ATTENTION'] = '1'
50
+ os.environ['XFORMERS_FORCE_DISABLE_TRITON']= '1'
 
51
 
52
  def interrupt_callback():
53
  return INTERRUPTED
 
133
  except Exception:
134
  return "<none>"
135
 
136
+ def load_melody_filepath(melody_filepath, title, assigned_model,topp, temperature, cfg_coef, segment_duration = 30):
137
  # get melody filename
138
  #$Union[str, os.PathLike]
139
  symbols = ['_', '.', '-']
 
160
  # get melody length in number of segments and modify the UI
161
  melody = get_melody(melody_filepath)
162
  sr, melody_data = melody[0], melody[1]
163
+ segment_samples = sr * segment_duration
164
  total_melodys = max(min((len(melody_data) // segment_samples), 25), 0)
165
  print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
166
  MAX_PROMPT_INDEX = total_melodys
167
 
168
  return gr.update(value=melody_name), gr.update(maximum=MAX_PROMPT_INDEX, value=0), gr.update(value=assigned_model, interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef)
169
 
170
+ def predict(model, text, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap=1, prompt_index = 0, include_title = True, include_settings = True, harmony_only = False, profile = gr.OAuthProfile, segment_length = 30, progress=gr.Progress(track_tqdm=True)):
171
  global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
172
  output_segments = None
173
  melody_name = "Not Used"
 
218
  segment_duration = duration + overlap
219
  else:
220
  segment_duration = MODEL.lm.cfg.dataset.segment_duration
221
+ if (segment_length + overlap) < segment_duration:
222
+ segment_duration = segment_length + overlap
223
  # implement seed
224
  if seed < 0:
225
  seed = random.randint(0, 0xffff_ffff_ffff)
 
244
  if melody and ("melody" in model):
245
  # return excess duration, load next model and continue in loop structure building up output_segments
246
  if duration > MODEL.lm.cfg.dataset.segment_duration:
247
+ output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index, harmony_only, progress=gr.Progress(track_tqdm=True))
248
  else:
249
  # pure original code
250
  sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
 
450
  with gr.Row():
451
  with gr.Column():
452
  with gr.Row():
 
453
  with gr.Column():
454
+ text = gr.Text(label="Describe your music", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out", key="prompt", lines=4)
455
+ autoplay_cb = gr.Checkbox(value=False, label="Autoplay?", key="autoplay_cb")
456
+ with gr.Column():
457
+ duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration (s)", interactive=True, key="total_duration")
458
+ model = gr.Radio(["melody", "medium", "small", "large", "melody-large", "stereo-small", "stereo-medium", "stereo-large", "stereo-melody", "stereo-melody-large"], label="AI Model", value="medium", interactive=True, key="chosen_model")
459
  with gr.Row():
460
  submit = gr.Button("Generate", elem_id="btn-generate")
461
  # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
 
463
  with gr.Row():
464
  with gr.Column():
465
  radio = gr.Radio(["file", "mic"], value="file", label="Condition on a melody (optional) File or Mic")
466
+ melody_filepath = gr.Audio(sources=["upload"], type="filepath", label="Melody Condition (optional)", interactive=True, elem_id="melody-input", key="melody_input")
467
  with gr.Column():
468
+ harmony_only = gr.Radio(label="Use Harmony Only",choices=["No", "Yes"], value="No", interactive=True, info="Remove Drums?", key="use_harmony")
469
+ prompt_index = gr.Slider(label="Melody Condition Sample Segment", minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=0, interactive=True, info="Which 15-30 second segment to condition with, - 1 = align with conditioning melody", key="melody_index")
470
  with gr.Accordion("Video", open=False):
471
  with gr.Row():
472
+ background= gr.Image(value="./assets/background.png", sources=["upload"], label="Background", width=768, height=512, type="filepath", interactive=True, key="background_imagepath")
473
  with gr.Column():
474
+ include_title = gr.Checkbox(label="Add Title", value=True, interactive=True,key="add_title")
475
+ include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True, key="add_settings")
476
  with gr.Row():
477
+ title = gr.Textbox(label="Title", value="UnlimitedMusicGen", interactive=True, key="song_title")
478
  settings_font = gr.Text(label="Settings Font", value="./assets/arial.ttf", interactive=True)
479
+ settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#c87f05", interactive=True, key="settings_font_color")
480
  with gr.Accordion("Expert", open=False):
481
  with gr.Row():
482
+ segment_duration = gr.Slider(minimum=10, maximum=30, value=30, step =1,label="Music Generation Segment Length (s)", interactive=True)
483
+ overlap = gr.Slider(minimum=0, maximum=15, value=1, step=1, label="Segment Overlap", interactive=True)
484
  dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
485
  with gr.Row():
486
+ topk = gr.Number(label="Top-k", value=280, precision=0, interactive=True, info="more structured", key="topk")
487
+ topp = gr.Number(label="Top-p", value=1150, precision=0, interactive=True, info="more variation, overwrites Top-k if not zero", key="topp")
488
+ temperature = gr.Number(label="Randomness Temperature", value=0.7, precision=None, interactive=True, info="less than one to follow Melody Condition song closely", key="temperature")
489
+ cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.75, precision=None, interactive=True, info="3.0-4.0, stereo and small need more", key="cfg_coef")
490
  with gr.Row():
491
+ seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True, key="seed")
492
  gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(fn=lambda: -1, outputs=[seed], queue=False)
493
  reuse_seed = gr.Button('\u267b\ufe0f', elem_classes="small-btn")
494
  with gr.Column() as c:
495
+ output = gr.Video(label="Generated Music", interactive=False, show_download_button=True, show_share_button=True, autoplay=False)
496
  wave_file = gr.File(label=".wav file", elem_id="output_wavefile", interactive=True)
497
  seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
498
 
499
  radio.change(toggle_audio_src, radio, [melody_filepath], queue=False, show_progress=False)
500
+ melody_filepath.change(load_melody_filepath, inputs=[melody_filepath, title, model,topp, temperature, cfg_coef, segment_duration], outputs=[title, prompt_index , model, topp, temperature, cfg_coef], api_name="melody_filepath_change", queue=False)
501
+ reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False, api_name="reuse_seed_click")
502
+ autoplay_cb.change(fn=lambda x: gr.update(autoplay=x), inputs=[autoplay_cb], outputs=[output], queue=False, api_name="autoplay_cb_change")
503
+
504
  gr.Examples(
505
  examples=[
506
  [
 
510
  "80s Pop Synth",
511
  950,
512
  0.6,
513
+ 3.5
514
  ],
515
  [
516
  "4/4 120bpm 320kbps 48khz, A cheerful country song with acoustic guitars",
 
519
  "Country Guitar",
520
  750,
521
  0.7,
522
+ 4.0
523
  ],
524
  [
525
  "4/4 120bpm 320kbps 48khz, 90s rock song with electric guitar and heavy drums",
 
528
  "90s Rock Guitar",
529
  1150,
530
  0.7,
531
+ 3.75
532
  ],
533
  [
534
  "4/4 120bpm 320kbps 48khz, a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
 
537
  "EDM my Bach",
538
  500,
539
  0.7,
540
+ 3.75
541
  ],
542
  [
543
  "4/4 320kbps 48khz, lofi slow bpm electro chill with organic samples",
 
568
  api_name="submit"
569
  ).then(
570
  predict,
571
+ inputs=[model, text,melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap, prompt_index, include_title, include_settings, harmony_only, user_profile, segment_duration],
572
+ outputs=[output, wave_file, seed_used], scroll_to_output=True)
573
 
574
  # Show the interface
575
  launch_kwargs = {}
 
583
  launch_kwargs['server_port'] = server_port
584
  if share:
585
  launch_kwargs['share'] = share
 
 
586
 
587
 
588
+ demo.queue(max_size=10, api_open=False).launch(**launch_kwargs, allowed_paths=["assets","./assets","images","./images", 'e:/TMP'], favicon_path="./assets/favicon.ico")
589
 
590
  if __name__ == "__main__":
591
  parser = argparse.ArgumentParser()