Spaces:
Running
on
T4
Running
on
T4
Variable Segment Length
Browse files
app.py
CHANGED
@@ -44,11 +44,10 @@ MAX_PROMPT_INDEX = 0
|
|
44 |
git = os.environ.get('GIT', "git")
|
45 |
#s.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
46 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True"
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
|
53 |
def interrupt_callback():
|
54 |
return INTERRUPTED
|
@@ -134,7 +133,7 @@ def git_tag():
|
|
134 |
except Exception:
|
135 |
return "<none>"
|
136 |
|
137 |
-
def load_melody_filepath(melody_filepath, title, assigned_model,topp, temperature, cfg_coef):
|
138 |
# get melody filename
|
139 |
#$Union[str, os.PathLike]
|
140 |
symbols = ['_', '.', '-']
|
@@ -161,14 +160,14 @@ def load_melody_filepath(melody_filepath, title, assigned_model,topp, temperatur
|
|
161 |
# get melody length in number of segments and modify the UI
|
162 |
melody = get_melody(melody_filepath)
|
163 |
sr, melody_data = melody[0], melody[1]
|
164 |
-
segment_samples = sr *
|
165 |
total_melodys = max(min((len(melody_data) // segment_samples), 25), 0)
|
166 |
print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
|
167 |
MAX_PROMPT_INDEX = total_melodys
|
168 |
|
169 |
return gr.update(value=melody_name), gr.update(maximum=MAX_PROMPT_INDEX, value=0), gr.update(value=assigned_model, interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef)
|
170 |
|
171 |
-
def predict(model, text, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap=1, prompt_index = 0, include_title = True, include_settings = True, harmony_only = False, profile = gr.OAuthProfile, progress=gr.Progress(track_tqdm=True)):
|
172 |
global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
|
173 |
output_segments = None
|
174 |
melody_name = "Not Used"
|
@@ -219,6 +218,8 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
|
|
219 |
segment_duration = duration + overlap
|
220 |
else:
|
221 |
segment_duration = MODEL.lm.cfg.dataset.segment_duration
|
|
|
|
|
222 |
# implement seed
|
223 |
if seed < 0:
|
224 |
seed = random.randint(0, 0xffff_ffff_ffff)
|
@@ -243,7 +244,7 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
|
|
243 |
if melody and ("melody" in model):
|
244 |
# return excess duration, load next model and continue in loop structure building up output_segments
|
245 |
if duration > MODEL.lm.cfg.dataset.segment_duration:
|
246 |
-
output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index, harmony_only
|
247 |
else:
|
248 |
# pure original code
|
249 |
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
|
@@ -449,10 +450,12 @@ def ui(**kwargs):
|
|
449 |
with gr.Row():
|
450 |
with gr.Column():
|
451 |
with gr.Row():
|
452 |
-
text = gr.Text(label="Describe your music", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out")
|
453 |
with gr.Column():
|
454 |
-
|
455 |
-
|
|
|
|
|
|
|
456 |
with gr.Row():
|
457 |
submit = gr.Button("Generate", elem_id="btn-generate")
|
458 |
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
|
@@ -460,42 +463,44 @@ def ui(**kwargs):
|
|
460 |
with gr.Row():
|
461 |
with gr.Column():
|
462 |
radio = gr.Radio(["file", "mic"], value="file", label="Condition on a melody (optional) File or Mic")
|
463 |
-
melody_filepath = gr.Audio(sources=["upload"], type="filepath", label="Melody Condition (optional)", interactive=True, elem_id="melody-input")
|
464 |
with gr.Column():
|
465 |
-
harmony_only = gr.Radio(label="Use Harmony Only",choices=["No", "Yes"], value="No", interactive=True, info="Remove Drums?")
|
466 |
-
prompt_index = gr.Slider(label="Melody Condition Sample Segment", minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=0, interactive=True, info="Which 30 second segment to condition with, - 1
|
467 |
with gr.Accordion("Video", open=False):
|
468 |
with gr.Row():
|
469 |
-
background= gr.Image(value="./assets/background.png", sources=["upload"], label="Background", width=768, height=512, type="filepath", interactive=True)
|
470 |
with gr.Column():
|
471 |
-
include_title = gr.Checkbox(label="Add Title", value=True, interactive=True)
|
472 |
-
include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
|
473 |
with gr.Row():
|
474 |
-
title = gr.Textbox(label="Title", value="UnlimitedMusicGen", interactive=True)
|
475 |
settings_font = gr.Text(label="Settings Font", value="./assets/arial.ttf", interactive=True)
|
476 |
-
settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#c87f05", interactive=True)
|
477 |
with gr.Accordion("Expert", open=False):
|
478 |
with gr.Row():
|
479 |
-
|
|
|
480 |
dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
|
481 |
with gr.Row():
|
482 |
-
topk = gr.Number(label="Top-k", value=280, precision=0, interactive=True)
|
483 |
-
topp = gr.Number(label="Top-p", value=1150, precision=0, interactive=True, info="overwrites Top-k if not zero")
|
484 |
-
temperature = gr.Number(label="Randomness Temperature", value=0.7, precision=None, interactive=True)
|
485 |
-
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.
|
486 |
with gr.Row():
|
487 |
-
seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True)
|
488 |
gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(fn=lambda: -1, outputs=[seed], queue=False)
|
489 |
reuse_seed = gr.Button('\u267b\ufe0f', elem_classes="small-btn")
|
490 |
with gr.Column() as c:
|
491 |
-
output = gr.Video(label="Generated Music")
|
492 |
wave_file = gr.File(label=".wav file", elem_id="output_wavefile", interactive=True)
|
493 |
seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
|
494 |
|
495 |
radio.change(toggle_audio_src, radio, [melody_filepath], queue=False, show_progress=False)
|
496 |
-
melody_filepath.change(load_melody_filepath, inputs=[melody_filepath, title, model,topp, temperature, cfg_coef], outputs=[title, prompt_index , model, topp, temperature, cfg_coef], api_name="melody_filepath_change", queue=False)
|
497 |
-
reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False, api_name="
|
498 |
-
|
|
|
499 |
gr.Examples(
|
500 |
examples=[
|
501 |
[
|
@@ -505,7 +510,7 @@ def ui(**kwargs):
|
|
505 |
"80s Pop Synth",
|
506 |
950,
|
507 |
0.6,
|
508 |
-
3.
|
509 |
],
|
510 |
[
|
511 |
"4/4 120bpm 320kbps 48khz, A cheerful country song with acoustic guitars",
|
@@ -514,7 +519,7 @@ def ui(**kwargs):
|
|
514 |
"Country Guitar",
|
515 |
750,
|
516 |
0.7,
|
517 |
-
|
518 |
],
|
519 |
[
|
520 |
"4/4 120bpm 320kbps 48khz, 90s rock song with electric guitar and heavy drums",
|
@@ -523,7 +528,7 @@ def ui(**kwargs):
|
|
523 |
"90s Rock Guitar",
|
524 |
1150,
|
525 |
0.7,
|
526 |
-
3.
|
527 |
],
|
528 |
[
|
529 |
"4/4 120bpm 320kbps 48khz, a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
|
@@ -532,7 +537,7 @@ def ui(**kwargs):
|
|
532 |
"EDM my Bach",
|
533 |
500,
|
534 |
0.7,
|
535 |
-
3.
|
536 |
],
|
537 |
[
|
538 |
"4/4 320kbps 48khz, lofi slow bpm electro chill with organic samples",
|
@@ -563,8 +568,8 @@ def ui(**kwargs):
|
|
563 |
api_name="submit"
|
564 |
).then(
|
565 |
predict,
|
566 |
-
inputs=[model, text,melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap, prompt_index, include_title, include_settings, harmony_only, user_profile],
|
567 |
-
outputs=[output, wave_file, seed_used])
|
568 |
|
569 |
# Show the interface
|
570 |
launch_kwargs = {}
|
@@ -578,11 +583,9 @@ def ui(**kwargs):
|
|
578 |
launch_kwargs['server_port'] = server_port
|
579 |
if share:
|
580 |
launch_kwargs['share'] = share
|
581 |
-
launch_kwargs['favicon_path']= "./assets/favicon.ico"
|
582 |
-
|
583 |
|
584 |
|
585 |
-
demo.queue(max_size=10, api_open=False).launch(**launch_kwargs)
|
586 |
|
587 |
if __name__ == "__main__":
|
588 |
parser = argparse.ArgumentParser()
|
|
|
44 |
git = os.environ.get('GIT', "git")
|
45 |
#s.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
46 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True"
|
47 |
+
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
48 |
+
os.environ['CUDA_MODULE_LOADING']='LAZY'
|
49 |
+
os.environ['USE_FLASH_ATTENTION'] = '1'
|
50 |
+
os.environ['XFORMERS_FORCE_DISABLE_TRITON']= '1'
|
|
|
51 |
|
52 |
def interrupt_callback():
|
53 |
return INTERRUPTED
|
|
|
133 |
except Exception:
|
134 |
return "<none>"
|
135 |
|
136 |
+
def load_melody_filepath(melody_filepath, title, assigned_model,topp, temperature, cfg_coef, segment_duration = 30):
|
137 |
# get melody filename
|
138 |
#$Union[str, os.PathLike]
|
139 |
symbols = ['_', '.', '-']
|
|
|
160 |
# get melody length in number of segments and modify the UI
|
161 |
melody = get_melody(melody_filepath)
|
162 |
sr, melody_data = melody[0], melody[1]
|
163 |
+
segment_samples = sr * segment_duration
|
164 |
total_melodys = max(min((len(melody_data) // segment_samples), 25), 0)
|
165 |
print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
|
166 |
MAX_PROMPT_INDEX = total_melodys
|
167 |
|
168 |
return gr.update(value=melody_name), gr.update(maximum=MAX_PROMPT_INDEX, value=0), gr.update(value=assigned_model, interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef)
|
169 |
|
170 |
+
def predict(model, text, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap=1, prompt_index = 0, include_title = True, include_settings = True, harmony_only = False, profile = gr.OAuthProfile, segment_length = 30, progress=gr.Progress(track_tqdm=True)):
|
171 |
global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
|
172 |
output_segments = None
|
173 |
melody_name = "Not Used"
|
|
|
218 |
segment_duration = duration + overlap
|
219 |
else:
|
220 |
segment_duration = MODEL.lm.cfg.dataset.segment_duration
|
221 |
+
if (segment_length + overlap) < segment_duration:
|
222 |
+
segment_duration = segment_length + overlap
|
223 |
# implement seed
|
224 |
if seed < 0:
|
225 |
seed = random.randint(0, 0xffff_ffff_ffff)
|
|
|
244 |
if melody and ("melody" in model):
|
245 |
# return excess duration, load next model and continue in loop structure building up output_segments
|
246 |
if duration > MODEL.lm.cfg.dataset.segment_duration:
|
247 |
+
output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index, harmony_only, progress=gr.Progress(track_tqdm=True))
|
248 |
else:
|
249 |
# pure original code
|
250 |
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
|
|
|
450 |
with gr.Row():
|
451 |
with gr.Column():
|
452 |
with gr.Row():
|
|
|
453 |
with gr.Column():
|
454 |
+
text = gr.Text(label="Describe your music", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out", key="prompt", lines=4)
|
455 |
+
autoplay_cb = gr.Checkbox(value=False, label="Autoplay?", key="autoplay_cb")
|
456 |
+
with gr.Column():
|
457 |
+
duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration (s)", interactive=True, key="total_duration")
|
458 |
+
model = gr.Radio(["melody", "medium", "small", "large", "melody-large", "stereo-small", "stereo-medium", "stereo-large", "stereo-melody", "stereo-melody-large"], label="AI Model", value="medium", interactive=True, key="chosen_model")
|
459 |
with gr.Row():
|
460 |
submit = gr.Button("Generate", elem_id="btn-generate")
|
461 |
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
|
|
|
463 |
with gr.Row():
|
464 |
with gr.Column():
|
465 |
radio = gr.Radio(["file", "mic"], value="file", label="Condition on a melody (optional) File or Mic")
|
466 |
+
melody_filepath = gr.Audio(sources=["upload"], type="filepath", label="Melody Condition (optional)", interactive=True, elem_id="melody-input", key="melody_input")
|
467 |
with gr.Column():
|
468 |
+
harmony_only = gr.Radio(label="Use Harmony Only",choices=["No", "Yes"], value="No", interactive=True, info="Remove Drums?", key="use_harmony")
|
469 |
+
prompt_index = gr.Slider(label="Melody Condition Sample Segment", minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=0, interactive=True, info="Which 15-30 second segment to condition with, - 1 = align with conditioning melody", key="melody_index")
|
470 |
with gr.Accordion("Video", open=False):
|
471 |
with gr.Row():
|
472 |
+
background= gr.Image(value="./assets/background.png", sources=["upload"], label="Background", width=768, height=512, type="filepath", interactive=True, key="background_imagepath")
|
473 |
with gr.Column():
|
474 |
+
include_title = gr.Checkbox(label="Add Title", value=True, interactive=True,key="add_title")
|
475 |
+
include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True, key="add_settings")
|
476 |
with gr.Row():
|
477 |
+
title = gr.Textbox(label="Title", value="UnlimitedMusicGen", interactive=True, key="song_title")
|
478 |
settings_font = gr.Text(label="Settings Font", value="./assets/arial.ttf", interactive=True)
|
479 |
+
settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#c87f05", interactive=True, key="settings_font_color")
|
480 |
with gr.Accordion("Expert", open=False):
|
481 |
with gr.Row():
|
482 |
+
segment_duration = gr.Slider(minimum=10, maximum=30, value=30, step =1,label="Music Generation Segment Length (s)", interactive=True)
|
483 |
+
overlap = gr.Slider(minimum=0, maximum=15, value=1, step=1, label="Segment Overlap", interactive=True)
|
484 |
dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
|
485 |
with gr.Row():
|
486 |
+
topk = gr.Number(label="Top-k", value=280, precision=0, interactive=True, info="more structured", key="topk")
|
487 |
+
topp = gr.Number(label="Top-p", value=1150, precision=0, interactive=True, info="more variation, overwrites Top-k if not zero", key="topp")
|
488 |
+
temperature = gr.Number(label="Randomness Temperature", value=0.7, precision=None, interactive=True, info="less than one to follow Melody Condition song closely", key="temperature")
|
489 |
+
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.75, precision=None, interactive=True, info="3.0-4.0, stereo and small need more", key="cfg_coef")
|
490 |
with gr.Row():
|
491 |
+
seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True, key="seed")
|
492 |
gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(fn=lambda: -1, outputs=[seed], queue=False)
|
493 |
reuse_seed = gr.Button('\u267b\ufe0f', elem_classes="small-btn")
|
494 |
with gr.Column() as c:
|
495 |
+
output = gr.Video(label="Generated Music", interactive=False, show_download_button=True, show_share_button=True, autoplay=False)
|
496 |
wave_file = gr.File(label=".wav file", elem_id="output_wavefile", interactive=True)
|
497 |
seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
|
498 |
|
499 |
radio.change(toggle_audio_src, radio, [melody_filepath], queue=False, show_progress=False)
|
500 |
+
melody_filepath.change(load_melody_filepath, inputs=[melody_filepath, title, model,topp, temperature, cfg_coef, segment_duration], outputs=[title, prompt_index , model, topp, temperature, cfg_coef], api_name="melody_filepath_change", queue=False)
|
501 |
+
reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False, api_name="reuse_seed_click")
|
502 |
+
autoplay_cb.change(fn=lambda x: gr.update(autoplay=x), inputs=[autoplay_cb], outputs=[output], queue=False, api_name="autoplay_cb_change")
|
503 |
+
|
504 |
gr.Examples(
|
505 |
examples=[
|
506 |
[
|
|
|
510 |
"80s Pop Synth",
|
511 |
950,
|
512 |
0.6,
|
513 |
+
3.5
|
514 |
],
|
515 |
[
|
516 |
"4/4 120bpm 320kbps 48khz, A cheerful country song with acoustic guitars",
|
|
|
519 |
"Country Guitar",
|
520 |
750,
|
521 |
0.7,
|
522 |
+
4.0
|
523 |
],
|
524 |
[
|
525 |
"4/4 120bpm 320kbps 48khz, 90s rock song with electric guitar and heavy drums",
|
|
|
528 |
"90s Rock Guitar",
|
529 |
1150,
|
530 |
0.7,
|
531 |
+
3.75
|
532 |
],
|
533 |
[
|
534 |
"4/4 120bpm 320kbps 48khz, a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
|
|
|
537 |
"EDM my Bach",
|
538 |
500,
|
539 |
0.7,
|
540 |
+
3.75
|
541 |
],
|
542 |
[
|
543 |
"4/4 320kbps 48khz, lofi slow bpm electro chill with organic samples",
|
|
|
568 |
api_name="submit"
|
569 |
).then(
|
570 |
predict,
|
571 |
+
inputs=[model, text,melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap, prompt_index, include_title, include_settings, harmony_only, user_profile, segment_duration],
|
572 |
+
outputs=[output, wave_file, seed_used], scroll_to_output=True)
|
573 |
|
574 |
# Show the interface
|
575 |
launch_kwargs = {}
|
|
|
583 |
launch_kwargs['server_port'] = server_port
|
584 |
if share:
|
585 |
launch_kwargs['share'] = share
|
|
|
|
|
586 |
|
587 |
|
588 |
+
demo.queue(max_size=10, api_open=False).launch(**launch_kwargs, allowed_paths=["assets","./assets","images","./images", 'e:/TMP'], favicon_path="./assets/favicon.ico")
|
589 |
|
590 |
if __name__ == "__main__":
|
591 |
parser = argparse.ArgumentParser()
|