Text-To-Speech

Runtime error

App Files Files Community

pikto commited on Sep 21, 2023

Commit

0cb208e

1 Parent(s): b1372f9

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -35

app.py CHANGED Viewed

@@ -1,17 +1,29 @@
-"""
-TTS interactive demo
-"""
 import logging
 from typing import cast
 import gradio as gr
 from balacoon_tts import TTS
 from huggingface_hub import hf_hub_download, list_repo_files
 # global tts module, initialized from a model selected
 tts = None
 def main():
@@ -20,23 +32,22 @@ def main():
     with gr.Blocks() as demo:
         gr.Markdown(
             """
-            <h1 align="center">Text-to-Speech</h1>
             1. Write an utterance to generate,
             2. Select the model to synthesize with
-            3. Select the speaker
             4. Hit "Generate" and listen to the result!
-            When you select a Model for the first time,
-            it will take a little time to download it.
             """
         )
         with gr.Row(variant="panel"):
-            text = gr.Textbox(label="Text", placeholder="Insert your article here...")
         with gr.Row():
             with gr.Column(variant="panel"):
-                repo_files = list_repo_files(repo_id="balacoon/tts")
                 model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
                 model_name = gr.Dropdown(
                     label="Model",
@@ -47,16 +58,25 @@ def main():
             def set_model(model_name_str: str):
                 """
-                gets value from `model_name`, loads model,
-                re-initializes tts object, gets list of
-                speakers that model supports and set them to `speaker`
                 """
-                model_path = hf_hub_download(
-                    repo_id="balacoon/tts", filename=model_name_str
-                )
-                global tts
-                tts = TTS(model_path)
-                speakers = tts.get_speakers()
                 value = speakers[-1]
                 return gr.Dropdown.update(
                     choices=speakers, value=value, visible=True
@@ -69,26 +89,35 @@ def main():
         with gr.Row(variant="panel"):
             audio = gr.Audio()
-        def synthesize_audio(text_str: str, speaker_str: str = ""):
             """
             gets utterance to synthesize from `text` Textbox
             and speaker name from `speaker` dropdown list.
             speaker name might be empty for single-speaker models.
             Synthesizes the waveform and updates `audio` with it.
             """
-            if not text_str:
-                logging.info("text or speaker are not provided")
                 return None
-            global tts
-            if len(text_str) > 1024:
-                text_str = text_str[:1024]
-            samples = cast(TTS, tts).synthesize(text_str, speaker_str)
-            return gr.Audio.update(value=(cast(TTS, tts).get_sampling_rate(), samples))
-        generate.click(synthesize_audio, inputs=[text, speaker], outputs=audio)
-    demo.launch()
 if __name__ == "__main__":
-    main()

+import os
+import glob
 import logging
 from typing import cast
+from threading import Lock
 import gradio as gr
 from balacoon_tts import TTS
 from huggingface_hub import hf_hub_download, list_repo_files
+# locker that disallow access to the tts object from more then one thread
+locker = Lock()
 # global tts module, initialized from a model selected
 tts = None
+# path to the model that is currently used in tts
+cur_model_path = None
+# cache of speakers, maps model name to speaker list
+model_to_speakers = dict()
+model_repo_dir = "/data"
+for name in list_repo_files(repo_id="balacoon/tts"):
+    if not os.path.isfile(os.path.join(model_repo_dir, name)):
+        hf_hub_download(
+            repo_id="balacoon/tts",
+            filename=name,
+            local_dir=model_repo_dir,
+        )
 def main():
     with gr.Blocks() as demo:
         gr.Markdown(
             """
+            <h1 align="center">Balacoon🦝 Text-to-Speech</h1>
             1. Write an utterance to generate,
             2. Select the model to synthesize with
+            3. Select speaker
             4. Hit "Generate" and listen to the result!
+            You can learn more about models available
+            [here](https://huggingface.co/balacoon/tts).
+            Visit [Balacoon website](https://balacoon.com/) for more info.
             """
         )
         with gr.Row(variant="panel"):
+            text = gr.Textbox(label="Text", placeholder="Type something here...")
         with gr.Row():
             with gr.Column(variant="panel"):
+                repo_files = os.listdir(model_repo_dir)
                 model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
                 model_name = gr.Dropdown(
                     label="Model",
             def set_model(model_name_str: str):
                 """
+                gets value from `model_name`. either
+                uses cached list of speakers for the given model name
+                or loads the addon and checks what are the speakers.
                 """
+                global model_to_speakers
+                if model_name_str in model_to_speakers:
+                    speakers = model_to_speakers[model_name_str]
+                else:
+                    global tts, cur_model_path, locker
+                    with locker:
+                        # need to load this model to learn the list of speakers
+                        model_path = os.path.join(model_repo_dir, model_name_str)
+                        if tts is not None:
+                            del tts
+                        tts = TTS(model_path)
+                        cur_model_path = model_path
+                        speakers = tts.get_speakers()
+                        model_to_speakers[model_name_str] = speakers
                 value = speakers[-1]
                 return gr.Dropdown.update(
                     choices=speakers, value=value, visible=True
         with gr.Row(variant="panel"):
             audio = gr.Audio()
+        def synthesize_audio(text_str: str, model_name_str: str, speaker_str: str):
             """
             gets utterance to synthesize from `text` Textbox
             and speaker name from `speaker` dropdown list.
             speaker name might be empty for single-speaker models.
             Synthesizes the waveform and updates `audio` with it.
             """
+            if not text_str or not model_name_str or not speaker_str:
+                logging.info("text, model name or speaker are not provided")
                 return None
+            expected_model_path = os.path.join(model_repo_dir, model_name_str)
+            global tts, cur_model_path, locker
+            with locker:
+                if expected_model_path != cur_model_path:
+                    # reload model
+                    if tts is not None:
+                        del tts
+                    tts = TTS(expected_model_path)
+                    cur_model_path = expected_model_path
+                if len(text_str) > 1024:
+                    # truncate the text
+                    text_str = text_str[:1024]
+                samples = tts.synthesize(text_str, speaker_str)
+            return gr.Audio.update(value=(tts.get_sampling_rate(), samples))
+        generate.click(synthesize_audio, inputs=[text, model_name, speaker], outputs=audio)
+    demo.queue(concurrency_count=1).launch()
 if __name__ == "__main__":
+    main()