asr_models: - id: openai/whisper-large-v3-turbo name: Whisper large-v3-turbo - id: openai/whisper-large-v3 name: Whisper large-v3 - id: openai/whisper-medium name: Whisper medium - id: openai/whisper-small name: Whisper small - id: funasr/paraformer-zh name: Paraformer-zh llm_models: - id: gemini-2.5-flash name: Gemini 2.5 Flash - id: google/gemma-2-2b name: Gemma 2 2B - id: meta-llama/Llama-3.2-3B-Instruct name: Llama 3.2 3B Instruct - id: meta-llama/Llama-3.1-8B-Instruct name: Llama 3.1 8B Instruct - id: Qwen/Qwen3-8B name: Qwen3 8B - id: Qwen/Qwen3-30B-A3B name: Qwen3 30B A3B svs_models: - id: mandarin-espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg name: Visinger2 (Bilingual)-zh model_path: espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg lang: mandarin voices: voice1: resources/singer/singer_embedding_ace-2.npy voice2: resources/singer/singer_embedding_ace-8.npy voice3: resources/singer/singer_embedding_itako.npy voice4: resources/singer/singer_embedding_kising_orange.npy voice5: resources/singer/singer_embedding_m4singer_Alto-4.npy - id: japanese-espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg name: Visinger2 (Bilingual)-jp model_path: espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg lang: japanese voices: voice1: resources/singer/singer_embedding_ace-2.npy voice2: resources/singer/singer_embedding_ace-8.npy voice3: resources/singer/singer_embedding_itako.npy voice4: resources/singer/singer_embedding_kising_orange.npy voice5: resources/singer/singer_embedding_m4singer_Alto-4.npy - id: mandarin-espnet/aceopencpop_svs_visinger2_40singer_pretrain name: Visinger2 (Chinese) model_path: espnet/aceopencpop_svs_visinger2_40singer_pretrain lang: mandarin voices: voice1: 5 voice2: 8 voice3: 12 voice4: 15 voice5: 29 melody_sources: - id: gen-random-none name: Random Generation desc: "Melody is generated without any structure or reference." - id: sample-note-kising name: Sampled Melody (KiSing) desc: "Melody is retrieved from KiSing dataset." - id: sample-note-touhou name: Sampled Melody (Touhou) desc: "Melody is retrieved from Touhou dataset." - id: sample-lyric-kising name: Sampled Melody with Lyrics (Kising) desc: "Melody with aligned lyrics are sampled from Kising dataset." - id: sample-lyric-genre name: Sampled Melody with Lyrics (Synthetic) desc: "Melody with aligned lyrics are sampled from Kising dataset."