|
|
import gradio as gr |
|
|
import json |
|
|
from datetime import datetime |
|
|
import yaml |
|
|
import time |
|
|
import os.path as op |
|
|
from download import download_model |
|
|
from levo_inference import LeVoInference |
|
|
|
|
|
|
|
|
|
|
|
APP_DIR = op.dirname(op.abspath(__file__)) |
|
|
download_model(APP_DIR) |
|
|
print("Successful downloaded model.") |
|
|
|
|
|
|
|
|
MODEL = LeVoInference(op.join(APP_DIR, "ckpt/songgeneration_base_zn/")) |
|
|
|
|
|
EXAMPLE_LYRICS = """ |
|
|
[intro-short] |
|
|
|
|
|
[verse] |
|
|
雪花舞动在无尽的天际 |
|
|
情缘如同雪花般轻轻逝去 |
|
|
希望与真挚 |
|
|
永不磨灭 |
|
|
你的忧虑 |
|
|
随风而逝 |
|
|
|
|
|
[chorus] |
|
|
我怀抱着守护这片梦境 |
|
|
在这世界中寻找爱与虚幻 |
|
|
苦辣酸甜 |
|
|
我们一起品尝 |
|
|
在雪的光芒中 |
|
|
紧紧相拥 |
|
|
|
|
|
[inst-short] |
|
|
|
|
|
[verse] |
|
|
雪花再次在风中飘扬 |
|
|
情愿如同雪花般消失无踪 |
|
|
希望与真挚 |
|
|
永不消失 |
|
|
在痛苦与喧嚣中 |
|
|
你找到解脱 |
|
|
|
|
|
[chorus] |
|
|
我环绕着守护这片梦境 |
|
|
在这世界中感受爱与虚假 |
|
|
苦辣酸甜 |
|
|
我们一起分享 |
|
|
在白银的光芒中 |
|
|
我们同在 |
|
|
|
|
|
[outro-short] |
|
|
""".strip() |
|
|
|
|
|
with open(op.join(APP_DIR, 'conf/vocab.yaml'), 'r', encoding='utf-8') as file: |
|
|
STRUCTS = yaml.safe_load(file) |
|
|
|
|
|
|
|
|
|
|
|
def generate_song(lyric, description=None, prompt_audio=None, genre=None, cfg_coef=None, temperature=None, top_k=None, progress=gr.Progress(track_tqdm=True)): |
|
|
global MODEL |
|
|
global STRUCTS |
|
|
params = {'cfg_coef':cfg_coef, 'temperature':temperature, 'top_k':top_k} |
|
|
params = {k:v for k,v in params.items() if v is not None} |
|
|
sample_rate = MODEL.cfg.sample_rate |
|
|
|
|
|
|
|
|
lyric = lyric.replace("[intro]", "[intro-short]").replace("[inst]", "[inst-short]").replace("[outro]", "[outro-short]") |
|
|
lyric = lyric.replace("\n\n", " ; ") |
|
|
for s in STRUCTS: |
|
|
lyric = lyric.replace(f"{s}\n", f"{s} ") |
|
|
lyric = lyric.replace("\n", ".") |
|
|
lyric = lyric.replace(". ; ", " ; ") |
|
|
|
|
|
|
|
|
if prompt_audio is not None: |
|
|
genre = None |
|
|
description = None |
|
|
elif description is not None and description != "": |
|
|
genre = None |
|
|
|
|
|
progress(0.0, "Start Generation") |
|
|
start = time.time() |
|
|
|
|
|
audio_data = MODEL(lyric, description, prompt_audio, genre, op.join(APP_DIR, "ckpt/prompt.pt"), params).cpu().permute(1, 0).float().numpy() |
|
|
|
|
|
end = time.time() |
|
|
|
|
|
|
|
|
input_config = { |
|
|
"lyric": lyric, |
|
|
"genre": genre, |
|
|
"prompt_audio": prompt_audio, |
|
|
"description": description, |
|
|
"params": params, |
|
|
"inference_duration": end - start, |
|
|
"timestamp": datetime.now().isoformat(), |
|
|
} |
|
|
print(input_config) |
|
|
|
|
|
return (sample_rate, audio_data), json.dumps(input_config, indent=2) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="SongGeration Demo Space") as demo: |
|
|
gr.Markdown("# 🎵 SongGeration Demo Space") |
|
|
gr.Markdown("Demo interface for the song generation model. Provide a lyrics, and optionally an audio or text prompt, to generate a custom song.") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
lyric = gr.Textbox( |
|
|
label="Lyrics", |
|
|
lines=5, |
|
|
max_lines=15, |
|
|
value=EXAMPLE_LYRICS, |
|
|
info="Support lyric structure tags like [verse], [chorus], and [bridge] to separate different parts of the lyrics. Use [intro] [outro] [inst] to generate instrumental music.", |
|
|
placeholder="""Lyric Format |
|
|
''' |
|
|
[structure tag] |
|
|
lyrics |
|
|
|
|
|
[structure tag] |
|
|
lyrics |
|
|
''' |
|
|
1. One paragraph represents one section, starting with a structure tag and ending with a blank line |
|
|
2. One line represents one lyric line, punctuation is not recommended inside the line |
|
|
3. Structure tags can be chosen from the following list |
|
|
- '[verse]' |
|
|
- '[chorus]' |
|
|
- '[bridge]' |
|
|
- '[intro-short]' |
|
|
- '[intro-medium]' |
|
|
- '[intro-long]' |
|
|
- '[outro-short]' |
|
|
- '[outro-medium]' |
|
|
- '[outro-long]' |
|
|
- '[inst-short]' |
|
|
- '[inst-medium]' |
|
|
- '[inst-long]' |
|
|
- '[silence]' |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Tabs(elem_id="extra-tabs"): |
|
|
with gr.Tab("Genre Select"): |
|
|
genre = gr.Radio( |
|
|
choices=["Auto", "Pop", "R&B", "Dance", "Jazz", "Folk", "Rock", "Chinese Style", "Chinese Tradition", "Metal", "Reggae", "Chinese Opera"], |
|
|
label="Genre Select(Optional)", |
|
|
value="Auto", |
|
|
interactive=True, |
|
|
elem_id="single-select-radio" |
|
|
) |
|
|
with gr.Tab("Audio Prompt"): |
|
|
prompt_audio = gr.Audio( |
|
|
label="Prompt Audio (Optional)", |
|
|
type="filepath", |
|
|
elem_id="audio-prompt" |
|
|
) |
|
|
with gr.Tab("Text Prompt"): |
|
|
description = gr.Textbox( |
|
|
label="Song Description (Optional)", |
|
|
info="Describe the gender, timbre, genre, emotion, instrument and bpm of the song", |
|
|
placeholder="female, dark, pop, sad, piano and drums, the bpm is 125.", |
|
|
lines=1, |
|
|
max_lines=2 |
|
|
) |
|
|
|
|
|
with gr.Accordion("Advanced Config", open=False): |
|
|
cfg_coef = gr.Slider( |
|
|
label="CFG Coefficient", |
|
|
minimum=0.1, |
|
|
maximum=3.0, |
|
|
step=0.1, |
|
|
value=1.5, |
|
|
interactive=True, |
|
|
elem_id="cfg-coef", |
|
|
) |
|
|
temperature = gr.Slider( |
|
|
label="Temperature", |
|
|
minimum=0.1, |
|
|
maximum=2.0, |
|
|
step=0.1, |
|
|
value=0.9, |
|
|
interactive=True, |
|
|
elem_id="temperature", |
|
|
) |
|
|
top_k = gr.Slider( |
|
|
label="Top-K", |
|
|
minimum=1, |
|
|
maximum=100, |
|
|
step=1, |
|
|
value=50, |
|
|
interactive=True, |
|
|
elem_id="top_k", |
|
|
) |
|
|
generate_btn = gr.Button("Generate Song", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
output_audio = gr.Audio(label="Generated Song", type="numpy") |
|
|
output_json = gr.JSON(label="Input Configuration") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generate_btn.click( |
|
|
fn=generate_song, |
|
|
inputs=[lyric, description, prompt_audio, genre, cfg_coef, temperature, top_k], |
|
|
outputs=[output_audio, output_json] |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |