Spaces:
Running
Running
File size: 6,194 Bytes
14be0b9 29e22da 14be0b9 29e22da 14be0b9 29e22da 14be0b9 29e22da 14be0b9 29e22da 14be0b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
import gradio as gr
import datetime
import base64
import numpy as np
import dashscope
import os
API_KEY = os.environ['API_KEY']
VOICE_OPTIONS = {
"Cherry / 芊悦": "Cherry",
"Serena / 苏瑶": "Serena",
"Ethan / 晨煦": "Ethan",
"Chelsie / 千雪": "Chelsie",
"Momo / 茉兔": "Momo",
"Vivian / 十三": "Vivian",
"Moon / 月白": "Moon",
"Maia / 四月": "Maia",
"Kai / 凯": "Kai",
"Nofish / 不吃鱼": "Nofish",
"Bella / 萌宝": "Bella",
"Jennifer / 詹妮弗": "Jennifer",
"Ryan / 甜茶": "Ryan",
"Katerina / 卡捷琳娜": "Katerina",
"Aiden / 艾登": "Aiden",
"Bodega / 西班牙语-博德加": "Bodega",
"Alek / 俄语-阿列克": "Alek",
"Dolce / 意大利语-多尔切": "Dolce",
"Sohee / 韩语-素熙": "Sohee",
"Ono Anna / 日语-小野杏": "Ono Anna",
"Lenn / 德语-莱恩": "Lenn",
"Sonrisa / 西班牙语拉美-索尼莎": "Sonrisa",
"Emilien / 法语-埃米尔安": "Emilien",
"Andre / 葡萄牙语欧-安德雷": "Andre",
"Radio Gol / 葡萄牙语巴-拉迪奥·戈尔": "Radio Gol",
"Eldric Sage / 精品百人-沧明子": "Eldric Sage",
"Mia / 精品百人-乖小妹": "Mia",
"Mochi / 精品百人-沙小弥": "Mochi",
"Bellona / 精品百人-燕铮莺": "Bellona",
"Vincent / 精品百人-田叔": "Vincent",
"Bunny / 精品百人-萌小姬": "Bunny",
"Neil / 精品百人-阿闻": "Neil",
"Elias / 墨讲师": "Elias",
"Arthur / 精品百人-徐大爷": "Arthur",
"Nini / 精品百人-邻家妹妹": "Nini",
"Ebona / 精品百人-诡婆婆": "Ebona",
"Seren / 精品百人-小婉": "Seren",
"Pip / 精品百人-调皮小新": "Pip",
"Stella / 精品百人-美少女阿月": "Stella",
"Li / 南京-老李": "Li",
"Marcus / 陕西-秦川": "Marcus",
"Roy / 闽南-阿杰": "Roy",
"Peter / 天津-李彼得": "Peter",
"Eric / 四川-程川": "Eric",
"Rocky / 粤语-阿强": "Rocky",
"Kiki / 粤语-阿清": "Kiki",
"Sunny / 四川-晴儿": "Sunny",
"Jada / 上海-阿珍": "Jada",
"Dylan / 北京-晓东": "Dylan",
}
DEFAULT_VOICE = 'Cherry / 芊悦'
LANGUAGE_OPTIONS = [
"Auto / 自动",
"English / 英文",
"Chinese / 中文",
"German / 德语",
"Italian / 意大利语",
"Portuguese / 葡萄牙语",
"Spanish / 西班牙语",
"Japanese / 日语",
"Korean / 韩语",
"French / 法语",
"Russian / 俄语"
]
LANGUAGE_MAP = {
"Auto / 自动": "Auto",
"English / 英文": "English",
"Chinese / 中文": "Chinese",
"German / 德语": "German",
"Italian / 意大利语": "Italian",
"Portuguese / 葡萄牙语": "Portuguese",
"Spanish / 西班牙语": "Spanish",
"Japanese / 日语": "Japanese",
"Korean / 韩语": "Korean",
"French / 法语": "French",
"Russian / 俄语": "Russian"
}
def tts_interface(text, voice_display, language_display):
voice_name = VOICE_OPTIONS[voice_display]
# 将显示的语言转换为API参数
language = LANGUAGE_MAP[language_display]
print(f"text: {text}, {voice_name}, {language} time: {datetime.datetime.now()}\n")
audio_frames = []
responses = dashscope.MultiModalConversation.call(
api_key=API_KEY,
model="qwen3-tts-flash-2025-11-27",
text=text,
voice=voice_name,
stream=True,
language_type=language
)
for chunk in responses:
audio_string = ""
try:
audio_string = chunk.output.audio.data
except:
print(chunk)
pass
wav_bytes = base64.b64decode(audio_string)
audio_np = np.frombuffer(wav_bytes, dtype=np.int16).astype(np.float32) / 32768.0
audio_frames.append(audio_np)
if audio_frames:
full_audio = np.concatenate(audio_frames)
else:
full_audio = None
sample_rate = 24000
return (sample_rate, full_audio)
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Source Sans Pro"), "Arial", "sans-serif"]), css=".gradio-container {max-width: none !important;}") as demo:
gr.Markdown("# 🎤 Qwen3-TTS Demo")
with gr.Row():
with gr.Column():
# 输入文本 - 英文在前
text_input = gr.Textbox(
label="Input Text / 输入文本",
placeholder="Enter text to synthesis here... / 在此输入要合成为语音的文本...",
lines=4,
max_lines=8
)
# 发音人选择 - 英文在前
voice_select = gr.Dropdown(
label="Select Voice / 选择发音人",
choices=list(VOICE_OPTIONS.keys()),
value=DEFAULT_VOICE
)
# 语言选择 - 英文在前
language_select = gr.Dropdown(
label="Select Text Language / 选择文本语言",
choices=LANGUAGE_OPTIONS,
value="Auto / 自动"
)
# 生成按钮 - 英文在前
generate_btn = gr.Button("Generate Speech / 生成语音", variant="primary")
with gr.Column():
# 音频输出 - 英文在前
audio_output = gr.Audio(label="Generated Speech / 生成的语音", interactive=False)
# 示例文本 - 英文在前
examples = gr.Examples(
examples=[
["你好,我是通义千问,很高兴认识你。", "Cherry / 芊悦", "Chinese / 中文"],
["你好,我是通义千问,很高兴认识你。", "Dylan / 北京-晓东", "Chinese / 中文"],
["Hello, this is a text-to-speech demo", "Jennifer / 詹妮弗", "English / 英文"],
["こんにちは、これはデモです", "Cherry / 芊悦", "Japanese / 日语"],
],
inputs=[text_input, voice_select, language_select],
label="Examples / 示例文本"
)
generate_btn.click(
fn=tts_interface,
inputs=[text_input, voice_select, language_select],
outputs=audio_output
)
if __name__ == "__main__":
demo.launch() |