Spaces:
Running
Running
zach
commited on
Commit
·
e9bcee8
1
Parent(s):
e560bf3
Update Hume integration to randmonly pick a voice from a predefined list of top voices
Browse files- src/app.py +19 -19
- src/config.py +8 -8
- src/integrations/anthropic_api.py +21 -21
- src/integrations/elevenlabs_api.py +21 -21
- src/integrations/hume_api.py +39 -20
- src/sample_prompts.py +17 -17
- src/utils.py +12 -12
src/app.py
CHANGED
|
@@ -42,14 +42,14 @@ def process_prompt(prompt: str) -> str:
|
|
| 42 |
Returns:
|
| 43 |
tuple: The generated text and audio data from both Hume and ElevenLabs.
|
| 44 |
"""
|
| 45 |
-
logger.info(f
|
| 46 |
try:
|
| 47 |
# Validate prompt length before processing
|
| 48 |
validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
|
| 49 |
|
| 50 |
# Generate text with Claude API
|
| 51 |
generated_text = generate_text_with_claude(prompt)
|
| 52 |
-
logger.info(f
|
| 53 |
|
| 54 |
# Run TTS requests in parallel
|
| 55 |
with ThreadPoolExecutor(max_workers=2) as executor:
|
|
@@ -60,15 +60,15 @@ def process_prompt(prompt: str) -> str:
|
|
| 60 |
hume_audio = hume_future.result()
|
| 61 |
elevenlabs_audio = elevenlabs_future.result()
|
| 62 |
|
| 63 |
-
logger.info(f
|
| 64 |
return generated_text, hume_audio, elevenlabs_audio
|
| 65 |
|
| 66 |
except ValueError as ve:
|
| 67 |
-
logger.warning(f
|
| 68 |
return str(ve), None, None # Return validation error directly to the UI
|
| 69 |
except Exception as e:
|
| 70 |
-
logger.error(f
|
| 71 |
-
return
|
| 72 |
|
| 73 |
|
| 74 |
def build_gradio_interface() -> gr.Blocks:
|
|
@@ -81,16 +81,16 @@ def build_gradio_interface() -> gr.Blocks:
|
|
| 81 |
with gr.Blocks() as demo:
|
| 82 |
gr.Markdown("# TTS Arena")
|
| 83 |
gr.Markdown(
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
)
|
| 88 |
|
| 89 |
with gr.Row():
|
| 90 |
# Dropdown for predefined prompts
|
| 91 |
sample_prompt_dropdown = gr.Dropdown(
|
| 92 |
choices=list(SAMPLE_PROMPTS.keys()),
|
| 93 |
-
label=
|
| 94 |
value=None,
|
| 95 |
interactive=True
|
| 96 |
)
|
|
@@ -98,26 +98,26 @@ def build_gradio_interface() -> gr.Blocks:
|
|
| 98 |
with gr.Row():
|
| 99 |
# Custom prompt input
|
| 100 |
prompt_input = gr.Textbox(
|
| 101 |
-
label=
|
| 102 |
-
placeholder=
|
| 103 |
lines=2,
|
| 104 |
)
|
| 105 |
|
| 106 |
with gr.Row():
|
| 107 |
-
generate_button = gr.Button(
|
| 108 |
|
| 109 |
# Display the generated text and audio side by side
|
| 110 |
with gr.Row():
|
| 111 |
output_text = gr.Textbox(
|
| 112 |
-
label=
|
| 113 |
interactive=False,
|
| 114 |
lines=12,
|
| 115 |
max_lines=24,
|
| 116 |
scale=2,
|
| 117 |
)
|
| 118 |
with gr.Column(scale=1):
|
| 119 |
-
hume_audio_output = gr.Audio(label=
|
| 120 |
-
elevenlabs_audio_output = gr.Audio(label=
|
| 121 |
|
| 122 |
# Auto-fill the text input when a sample is selected
|
| 123 |
sample_prompt_dropdown.change(
|
|
@@ -133,11 +133,11 @@ def build_gradio_interface() -> gr.Blocks:
|
|
| 133 |
outputs=[output_text, hume_audio_output, elevenlabs_audio_output],
|
| 134 |
)
|
| 135 |
|
| 136 |
-
logger.debug(
|
| 137 |
return demo
|
| 138 |
|
| 139 |
|
| 140 |
-
if __name__ ==
|
| 141 |
-
logger.info(
|
| 142 |
demo = build_gradio_interface()
|
| 143 |
demo.launch()
|
|
|
|
| 42 |
Returns:
|
| 43 |
tuple: The generated text and audio data from both Hume and ElevenLabs.
|
| 44 |
"""
|
| 45 |
+
logger.info(f'Processing prompt: {truncate_text(prompt, max_length=100)}')
|
| 46 |
try:
|
| 47 |
# Validate prompt length before processing
|
| 48 |
validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
|
| 49 |
|
| 50 |
# Generate text with Claude API
|
| 51 |
generated_text = generate_text_with_claude(prompt)
|
| 52 |
+
logger.info(f'Generated text (length={len(generated_text)} characters).')
|
| 53 |
|
| 54 |
# Run TTS requests in parallel
|
| 55 |
with ThreadPoolExecutor(max_workers=2) as executor:
|
|
|
|
| 60 |
hume_audio = hume_future.result()
|
| 61 |
elevenlabs_audio = elevenlabs_future.result()
|
| 62 |
|
| 63 |
+
logger.info(f'TTS audio generated successfully: Hume={len(hume_audio)} bytes, ElevenLabs={len(elevenlabs_audio)} bytes')
|
| 64 |
return generated_text, hume_audio, elevenlabs_audio
|
| 65 |
|
| 66 |
except ValueError as ve:
|
| 67 |
+
logger.warning(f'Validation error: {ve}')
|
| 68 |
return str(ve), None, None # Return validation error directly to the UI
|
| 69 |
except Exception as e:
|
| 70 |
+
logger.error(f'Unexpected error during processing: {e}')
|
| 71 |
+
return 'An unexpected error occurred. Please try again.', None, None
|
| 72 |
|
| 73 |
|
| 74 |
def build_gradio_interface() -> gr.Blocks:
|
|
|
|
| 81 |
with gr.Blocks() as demo:
|
| 82 |
gr.Markdown("# TTS Arena")
|
| 83 |
gr.Markdown(
|
| 84 |
+
'Generate text from a prompt using **Claude by Anthropic**, '
|
| 85 |
+
'and listen to the generated text-to-speech using **Hume TTS API** '
|
| 86 |
+
'and **ElevenLabs TTS API** for comparison.'
|
| 87 |
)
|
| 88 |
|
| 89 |
with gr.Row():
|
| 90 |
# Dropdown for predefined prompts
|
| 91 |
sample_prompt_dropdown = gr.Dropdown(
|
| 92 |
choices=list(SAMPLE_PROMPTS.keys()),
|
| 93 |
+
label='Choose a Sample Prompt (or enter your own below)',
|
| 94 |
value=None,
|
| 95 |
interactive=True
|
| 96 |
)
|
|
|
|
| 98 |
with gr.Row():
|
| 99 |
# Custom prompt input
|
| 100 |
prompt_input = gr.Textbox(
|
| 101 |
+
label='Enter your prompt',
|
| 102 |
+
placeholder='Or type your own prompt here...',
|
| 103 |
lines=2,
|
| 104 |
)
|
| 105 |
|
| 106 |
with gr.Row():
|
| 107 |
+
generate_button = gr.Button('Generate')
|
| 108 |
|
| 109 |
# Display the generated text and audio side by side
|
| 110 |
with gr.Row():
|
| 111 |
output_text = gr.Textbox(
|
| 112 |
+
label='Generated Text',
|
| 113 |
interactive=False,
|
| 114 |
lines=12,
|
| 115 |
max_lines=24,
|
| 116 |
scale=2,
|
| 117 |
)
|
| 118 |
with gr.Column(scale=1):
|
| 119 |
+
hume_audio_output = gr.Audio(label='Hume TTS Audio', type='filepath')
|
| 120 |
+
elevenlabs_audio_output = gr.Audio(label='ElevenLabs TTS Audio', type='filepath')
|
| 121 |
|
| 122 |
# Auto-fill the text input when a sample is selected
|
| 123 |
sample_prompt_dropdown.change(
|
|
|
|
| 133 |
outputs=[output_text, hume_audio_output, elevenlabs_audio_output],
|
| 134 |
)
|
| 135 |
|
| 136 |
+
logger.debug('Gradio interface built successfully')
|
| 137 |
return demo
|
| 138 |
|
| 139 |
|
| 140 |
+
if __name__ == '__main__':
|
| 141 |
+
logger.info('Launching TTS Arena Gradio app...')
|
| 142 |
demo = build_gradio_interface()
|
| 143 |
demo.launch()
|
src/config.py
CHANGED
|
@@ -25,10 +25,10 @@ load_dotenv()
|
|
| 25 |
|
| 26 |
|
| 27 |
# Enable debugging mode based on an environment variable
|
| 28 |
-
debug_raw = os.getenv(
|
| 29 |
-
if debug_raw not in {
|
| 30 |
-
print(f
|
| 31 |
-
DEBUG = debug_raw ==
|
| 32 |
|
| 33 |
|
| 34 |
# Configure the logger
|
|
@@ -36,8 +36,8 @@ logging.basicConfig(
|
|
| 36 |
level=logging.DEBUG if DEBUG else logging.INFO,
|
| 37 |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
| 38 |
)
|
| 39 |
-
logger: logging.Logger = logging.getLogger(
|
| 40 |
-
logger.info(f
|
| 41 |
|
| 42 |
|
| 43 |
# Log environment variables
|
|
@@ -49,7 +49,7 @@ def log_env_variable(var_name: str, value: str) -> None:
|
|
| 49 |
var_name (str): The name of the environment variable.
|
| 50 |
value (str): The value of the environment variable.
|
| 51 |
"""
|
| 52 |
-
logger.debug(f
|
| 53 |
|
| 54 |
if DEBUG:
|
| 55 |
-
logger.debug(f
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
# Enable debugging mode based on an environment variable
|
| 28 |
+
debug_raw = os.getenv('DEBUG', 'false').lower()
|
| 29 |
+
if debug_raw not in {'true', 'false'}:
|
| 30 |
+
print(f'Warning: Invalid DEBUG value "{debug_raw}". Defaulting to "false".')
|
| 31 |
+
DEBUG = debug_raw == 'true'
|
| 32 |
|
| 33 |
|
| 34 |
# Configure the logger
|
|
|
|
| 36 |
level=logging.DEBUG if DEBUG else logging.INFO,
|
| 37 |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
| 38 |
)
|
| 39 |
+
logger: logging.Logger = logging.getLogger('tts_arena')
|
| 40 |
+
logger.info(f'Debug mode is {"enabled" if DEBUG else "disabled"}.')
|
| 41 |
|
| 42 |
|
| 43 |
# Log environment variables
|
|
|
|
| 49 |
var_name (str): The name of the environment variable.
|
| 50 |
value (str): The value of the environment variable.
|
| 51 |
"""
|
| 52 |
+
logger.debug(f'Environment variable "{var_name}" validated with value: {value}')
|
| 53 |
|
| 54 |
if DEBUG:
|
| 55 |
+
logger.debug(f'DEBUG mode enabled.')
|
src/integrations/anthropic_api.py
CHANGED
|
@@ -38,8 +38,8 @@ class AnthropicConfig:
|
|
| 38 |
Immutable configuration for interacting with the Anthropic API.
|
| 39 |
Includes client initialization for encapsulation.
|
| 40 |
"""
|
| 41 |
-
api_key: str = validate_env_var(
|
| 42 |
-
model: ModelParam =
|
| 43 |
max_tokens: int = 300 # Max tokens for API response
|
| 44 |
system_prompt: str = """You are a highly creative and articulate assistant specialized in generating vivid, engaging, and well-written content.
|
| 45 |
|
|
@@ -62,13 +62,13 @@ Always keep your responses concise, unless explicitly instructed to elaborate.""
|
|
| 62 |
def __post_init__(self):
|
| 63 |
# Validate that required attributes are set
|
| 64 |
if not self.api_key:
|
| 65 |
-
raise ValueError(
|
| 66 |
if not self.model:
|
| 67 |
-
raise ValueError(
|
| 68 |
if not self.max_tokens:
|
| 69 |
-
raise ValueError(
|
| 70 |
if not self.system_prompt:
|
| 71 |
-
raise ValueError(
|
| 72 |
|
| 73 |
@property
|
| 74 |
def client(self) -> Anthropic:
|
|
@@ -119,44 +119,44 @@ def generate_text_with_claude(prompt: str) -> str:
|
|
| 119 |
>>> generate_text_with_claude("")
|
| 120 |
"The prompt exceeds the maximum allowed length of 500 characters. Your prompt contains 512 characters."
|
| 121 |
"""
|
| 122 |
-
logger.debug(f
|
| 123 |
|
| 124 |
try:
|
| 125 |
response: Message = anthropic_config.client.messages.create(
|
| 126 |
model=anthropic_config.model,
|
| 127 |
max_tokens=anthropic_config.max_tokens,
|
| 128 |
system=anthropic_config.system_prompt,
|
| 129 |
-
messages=[{
|
| 130 |
)
|
| 131 |
-
logger.debug(f
|
| 132 |
|
| 133 |
# Validate response content
|
| 134 |
-
if not hasattr(response,
|
| 135 |
logger.error("Response is missing 'content'. Response: %s", response)
|
| 136 |
-
raise AnthropicError(
|
| 137 |
|
| 138 |
# Process response content
|
| 139 |
blocks: Union[List[TextBlock], TextBlock, None] = response.content
|
| 140 |
|
| 141 |
if isinstance(blocks, list):
|
| 142 |
-
result =
|
| 143 |
-
logger.debug(f
|
| 144 |
return result
|
| 145 |
if isinstance(blocks, TextBlock):
|
| 146 |
-
logger.debug(f
|
| 147 |
return blocks.text
|
| 148 |
|
| 149 |
-
logger.warning(f
|
| 150 |
-
return str(blocks or
|
| 151 |
|
| 152 |
except Exception as e:
|
| 153 |
-
logger.exception(f
|
| 154 |
raise AnthropicError(
|
| 155 |
message=(
|
| 156 |
-
f
|
| 157 |
-
f
|
| 158 |
-
f
|
| 159 |
-
f
|
| 160 |
),
|
| 161 |
original_exception=e,
|
| 162 |
)
|
|
|
|
| 38 |
Immutable configuration for interacting with the Anthropic API.
|
| 39 |
Includes client initialization for encapsulation.
|
| 40 |
"""
|
| 41 |
+
api_key: str = validate_env_var('ANTHROPIC_API_KEY')
|
| 42 |
+
model: ModelParam = 'claude-3-5-sonnet-latest' # Valid predefined model
|
| 43 |
max_tokens: int = 300 # Max tokens for API response
|
| 44 |
system_prompt: str = """You are a highly creative and articulate assistant specialized in generating vivid, engaging, and well-written content.
|
| 45 |
|
|
|
|
| 62 |
def __post_init__(self):
|
| 63 |
# Validate that required attributes are set
|
| 64 |
if not self.api_key:
|
| 65 |
+
raise ValueError('Anthropic API key is not set.')
|
| 66 |
if not self.model:
|
| 67 |
+
raise ValueError('Anthropic Model is not set.')
|
| 68 |
if not self.max_tokens:
|
| 69 |
+
raise ValueError('Anthropic Max Tokens is not set.')
|
| 70 |
if not self.system_prompt:
|
| 71 |
+
raise ValueError('Anthropic System Prompt is not set.')
|
| 72 |
|
| 73 |
@property
|
| 74 |
def client(self) -> Anthropic:
|
|
|
|
| 119 |
>>> generate_text_with_claude("")
|
| 120 |
"The prompt exceeds the maximum allowed length of 500 characters. Your prompt contains 512 characters."
|
| 121 |
"""
|
| 122 |
+
logger.debug(f'Generating text with Claude. Prompt length: {len(prompt)} characters.')
|
| 123 |
|
| 124 |
try:
|
| 125 |
response: Message = anthropic_config.client.messages.create(
|
| 126 |
model=anthropic_config.model,
|
| 127 |
max_tokens=anthropic_config.max_tokens,
|
| 128 |
system=anthropic_config.system_prompt,
|
| 129 |
+
messages=[{'role': 'user', 'content': prompt}],
|
| 130 |
)
|
| 131 |
+
logger.debug(f'API response received: {truncate_text(str(response))}')
|
| 132 |
|
| 133 |
# Validate response content
|
| 134 |
+
if not hasattr(response, 'content'):
|
| 135 |
logger.error("Response is missing 'content'. Response: %s", response)
|
| 136 |
+
raise AnthropicError('Invalid API response: Missing "content".')
|
| 137 |
|
| 138 |
# Process response content
|
| 139 |
blocks: Union[List[TextBlock], TextBlock, None] = response.content
|
| 140 |
|
| 141 |
if isinstance(blocks, list):
|
| 142 |
+
result = '\n\n'.join(block.text for block in blocks if isinstance(block, TextBlock))
|
| 143 |
+
logger.debug(f'Processed response from list: {truncate_text(result)}')
|
| 144 |
return result
|
| 145 |
if isinstance(blocks, TextBlock):
|
| 146 |
+
logger.debug(f'Processed response from single TextBlock: {truncate_text(blocks.text)}')
|
| 147 |
return blocks.text
|
| 148 |
|
| 149 |
+
logger.warning(f'Unexpected response type: {type(blocks)}')
|
| 150 |
+
return str(blocks or 'No content generated.')
|
| 151 |
|
| 152 |
except Exception as e:
|
| 153 |
+
logger.exception(f'Error generating text with Claude: {e}')
|
| 154 |
raise AnthropicError(
|
| 155 |
message=(
|
| 156 |
+
f'Error generating text with Claude: {e}. '
|
| 157 |
+
f'HTTP Status: {getattr(response, "status", "N/A")}. '
|
| 158 |
+
f'Prompt (truncated): {truncate_text(prompt)}. '
|
| 159 |
+
f'Model: {anthropic_config.model}, Max tokens: {anthropic_config.max_tokens}'
|
| 160 |
),
|
| 161 |
original_exception=e,
|
| 162 |
)
|
src/integrations/elevenlabs_api.py
CHANGED
|
@@ -34,26 +34,26 @@ from src.utils import validate_env_var, truncate_text
|
|
| 34 |
@dataclass(frozen=True)
|
| 35 |
class ElevenLabsConfig:
|
| 36 |
"""Immutable configuration for interacting with the ElevenLabs TTS API."""
|
| 37 |
-
api_key: str = validate_env_var(
|
| 38 |
-
model_id: str =
|
| 39 |
-
output_format: str =
|
| 40 |
top_voices: list[str] = (
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
)
|
| 46 |
|
| 47 |
def __post_init__(self):
|
| 48 |
# Validate that required attributes are set
|
| 49 |
if not self.api_key:
|
| 50 |
-
raise ValueError(
|
| 51 |
if not self.model_id:
|
| 52 |
-
raise ValueError(
|
| 53 |
if not self.output_format:
|
| 54 |
-
raise ValueError(
|
| 55 |
if not self.top_voices:
|
| 56 |
-
raise ValueError(
|
| 57 |
|
| 58 |
@property
|
| 59 |
def client(self) -> ElevenLabs:
|
|
@@ -103,7 +103,7 @@ def text_to_speech_with_elevenlabs(text: str) -> bytes:
|
|
| 103 |
Raises:
|
| 104 |
ElevenLabsException: If there is an error communicating with the ElevenLabs API or processing the response.
|
| 105 |
"""
|
| 106 |
-
logger.debug(f
|
| 107 |
|
| 108 |
try:
|
| 109 |
# Generate audio using the ElevenLabs SDK
|
|
@@ -115,24 +115,24 @@ def text_to_speech_with_elevenlabs(text: str) -> bytes:
|
|
| 115 |
)
|
| 116 |
|
| 117 |
# Ensure the response is an iterator
|
| 118 |
-
if not hasattr(audio_iterator,
|
| 119 |
-
logger.error(
|
| 120 |
-
raise ElevenLabsException(
|
| 121 |
|
| 122 |
# Combine chunks into a single bytes object
|
| 123 |
-
audio = b
|
| 124 |
|
| 125 |
# Validate audio
|
| 126 |
if not audio:
|
| 127 |
-
logger.error(
|
| 128 |
-
raise ElevenLabsException(
|
| 129 |
|
| 130 |
-
logger.info(f
|
| 131 |
return audio
|
| 132 |
|
| 133 |
except Exception as e:
|
| 134 |
-
logger.exception(f
|
| 135 |
raise ElevenLabsException(
|
| 136 |
-
message=f
|
| 137 |
original_exception=e,
|
| 138 |
)
|
|
|
|
| 34 |
@dataclass(frozen=True)
|
| 35 |
class ElevenLabsConfig:
|
| 36 |
"""Immutable configuration for interacting with the ElevenLabs TTS API."""
|
| 37 |
+
api_key: str = validate_env_var('ELEVENLABS_API_KEY')
|
| 38 |
+
model_id: str = 'eleven_multilingual_v2' # ElevenLab's most emotionally expressive model
|
| 39 |
+
output_format: str = 'mp3_44100_128' # Output format of the generated audio.
|
| 40 |
top_voices: list[str] = (
|
| 41 |
+
'pNInz6obpgDQGcFmaJgB', # Adam
|
| 42 |
+
'ErXwobaYiN019PkySvjV', # Antoni
|
| 43 |
+
'21m00Tcm4TlvDq8ikWAM', # Rachel
|
| 44 |
+
'XrExE9yKIg1WjnnlVkGX', # Matilda
|
| 45 |
)
|
| 46 |
|
| 47 |
def __post_init__(self):
|
| 48 |
# Validate that required attributes are set
|
| 49 |
if not self.api_key:
|
| 50 |
+
raise ValueError('ElevenLabs API key is not set.')
|
| 51 |
if not self.model_id:
|
| 52 |
+
raise ValueError('ElevenLabs Model ID is not set.')
|
| 53 |
if not self.output_format:
|
| 54 |
+
raise ValueError('ElevenLabs Output Format is not set.')
|
| 55 |
if not self.top_voices:
|
| 56 |
+
raise ValueError('ElevenLabs Top Voices are not set.')
|
| 57 |
|
| 58 |
@property
|
| 59 |
def client(self) -> ElevenLabs:
|
|
|
|
| 103 |
Raises:
|
| 104 |
ElevenLabsException: If there is an error communicating with the ElevenLabs API or processing the response.
|
| 105 |
"""
|
| 106 |
+
logger.debug(f'Generating speech with ElevenLabs. Text length: {len(text)} characters.')
|
| 107 |
|
| 108 |
try:
|
| 109 |
# Generate audio using the ElevenLabs SDK
|
|
|
|
| 115 |
)
|
| 116 |
|
| 117 |
# Ensure the response is an iterator
|
| 118 |
+
if not hasattr(audio_iterator, '__iter__') or not hasattr(audio_iterator, '__next__'):
|
| 119 |
+
logger.error('Invalid audio iterator response.')
|
| 120 |
+
raise ElevenLabsException('Invalid audio iterator received from ElevenLabs API.')
|
| 121 |
|
| 122 |
# Combine chunks into a single bytes object
|
| 123 |
+
audio = b''.join(chunk for chunk in audio_iterator)
|
| 124 |
|
| 125 |
# Validate audio
|
| 126 |
if not audio:
|
| 127 |
+
logger.error('No audio data received from ElevenLabs API.')
|
| 128 |
+
raise ElevenLabsException('Empty audio data received from ElevenLabs API.')
|
| 129 |
|
| 130 |
+
logger.info(f'Received ElevenLabs audio ({len(audio)} bytes).')
|
| 131 |
return audio
|
| 132 |
|
| 133 |
except Exception as e:
|
| 134 |
+
logger.exception(f'Error generating speech: {e}')
|
| 135 |
raise ElevenLabsException(
|
| 136 |
+
message=f'Failed to generate audio with ElevenLabs: {e}',
|
| 137 |
original_exception=e,
|
| 138 |
)
|
src/integrations/hume_api.py
CHANGED
|
@@ -21,7 +21,8 @@ Functions:
|
|
| 21 |
# Standard Library Imports
|
| 22 |
from dataclasses import dataclass
|
| 23 |
import logging
|
| 24 |
-
|
|
|
|
| 25 |
# Third-Party Library Imports
|
| 26 |
import requests
|
| 27 |
from tenacity import retry, stop_after_attempt, wait_fixed, before_log, after_log
|
|
@@ -32,20 +33,38 @@ from src.utils import validate_env_var, truncate_text
|
|
| 32 |
|
| 33 |
@dataclass(frozen=True)
|
| 34 |
class HumeConfig:
|
| 35 |
-
"""Immutable configuration for interacting with the TTS API."""
|
| 36 |
-
tts_endpoint_url: str =
|
| 37 |
-
api_key: str = validate_env_var(
|
| 38 |
-
|
| 39 |
audio_format: str = 'wav'
|
| 40 |
-
headers: dict = None
|
| 41 |
|
| 42 |
def __post_init__(self):
|
| 43 |
-
#
|
| 44 |
-
|
| 45 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
'Content-Type': 'application/json',
|
| 47 |
})
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
class HumeException(Exception):
|
| 51 |
"""Custom exception for errors related to the Hume TTS API."""
|
|
@@ -78,11 +97,11 @@ def text_to_speech_with_hume(prompt: str, text: str) -> bytes:
|
|
| 78 |
Raises:
|
| 79 |
HumeException: If there is an error communicating with the Hume TTS API.
|
| 80 |
"""
|
| 81 |
-
logger.debug(f
|
| 82 |
|
| 83 |
request_body = {
|
| 84 |
-
|
| 85 |
-
|
| 86 |
# "voice_description": prompt, # <-- breaking request!?
|
| 87 |
# "format": hume_config.audio_format, # <-- breaking request!?
|
| 88 |
}
|
|
@@ -96,26 +115,26 @@ def text_to_speech_with_hume(prompt: str, text: str) -> bytes:
|
|
| 96 |
|
| 97 |
# Validate response
|
| 98 |
if response.status_code != 200:
|
| 99 |
-
logger.error(f
|
| 100 |
-
raise HumeException(f
|
| 101 |
|
| 102 |
# Process audio response
|
| 103 |
-
if response.headers.get(
|
| 104 |
audio_data = response.content # Raw binary audio data
|
| 105 |
-
logger.info(f
|
| 106 |
return audio_data
|
| 107 |
|
| 108 |
# Unexpected content type
|
| 109 |
-
raise HumeException(f
|
| 110 |
|
| 111 |
except requests.exceptions.RequestException as e:
|
| 112 |
-
logger.exception(
|
| 113 |
raise HumeException(
|
| 114 |
-
message=f
|
| 115 |
original_exception=e,
|
| 116 |
)
|
| 117 |
except Exception as e:
|
| 118 |
-
logger.exception(
|
| 119 |
raise HumeException(
|
| 120 |
message=f"Unexpected error while processing the Hume TTS response: {e}",
|
| 121 |
original_exception=e,
|
|
|
|
| 21 |
# Standard Library Imports
|
| 22 |
from dataclasses import dataclass
|
| 23 |
import logging
|
| 24 |
+
import random
|
| 25 |
+
from typing import List, Optional
|
| 26 |
# Third-Party Library Imports
|
| 27 |
import requests
|
| 28 |
from tenacity import retry, stop_after_attempt, wait_fixed, before_log, after_log
|
|
|
|
| 33 |
|
| 34 |
@dataclass(frozen=True)
|
| 35 |
class HumeConfig:
|
| 36 |
+
"""Immutable configuration for interacting with the Hume TTS API."""
|
| 37 |
+
tts_endpoint_url: str = 'https://api.hume.ai/v0/tts'
|
| 38 |
+
api_key: str = validate_env_var('HUME_API_KEY')
|
| 39 |
+
voices: List[str] = ('ITO', 'KORA', 'DACHER') # List of available Hume voices
|
| 40 |
audio_format: str = 'wav'
|
| 41 |
+
headers: dict = None # Headers for the API requests
|
| 42 |
|
| 43 |
def __post_init__(self):
|
| 44 |
+
# Validate required attributes
|
| 45 |
+
if not self.api_key:
|
| 46 |
+
raise ValueError('Hume API key is not set.')
|
| 47 |
+
if not self.voices:
|
| 48 |
+
raise ValueError('Hume voices list is empty. Please provide at least one voice.')
|
| 49 |
+
if not self.audio_format:
|
| 50 |
+
raise ValueError('Hume audio format is not set.')
|
| 51 |
+
|
| 52 |
+
# Set headers dynamically after validation
|
| 53 |
+
object.__setattr__(self, 'headers', {
|
| 54 |
+
'X-Hume-Api-Key': f'{self.api_key}',
|
| 55 |
'Content-Type': 'application/json',
|
| 56 |
})
|
| 57 |
|
| 58 |
+
@property
|
| 59 |
+
def random_voice(self) -> str:
|
| 60 |
+
"""
|
| 61 |
+
Randomly selects a voice from the available voices.
|
| 62 |
+
|
| 63 |
+
Returns:
|
| 64 |
+
str: A randomly chosen voice name.
|
| 65 |
+
"""
|
| 66 |
+
return random.choice(self.voices)
|
| 67 |
+
|
| 68 |
|
| 69 |
class HumeException(Exception):
|
| 70 |
"""Custom exception for errors related to the Hume TTS API."""
|
|
|
|
| 97 |
Raises:
|
| 98 |
HumeException: If there is an error communicating with the Hume TTS API.
|
| 99 |
"""
|
| 100 |
+
logger.debug(f'Processing TTS with Hume. Prompt length: {len(prompt)} characters. Text length: {len(text)} characters.')
|
| 101 |
|
| 102 |
request_body = {
|
| 103 |
+
'text': text,
|
| 104 |
+
'voice': {'name': hume_config.random_voice},
|
| 105 |
# "voice_description": prompt, # <-- breaking request!?
|
| 106 |
# "format": hume_config.audio_format, # <-- breaking request!?
|
| 107 |
}
|
|
|
|
| 115 |
|
| 116 |
# Validate response
|
| 117 |
if response.status_code != 200:
|
| 118 |
+
logger.error(f'Hume TTS API Error: {response.status_code} - {response.text[:200]}... (truncated)')
|
| 119 |
+
raise HumeException(f'Hume TTS API responded with status {response.status_code}: {response.text}')
|
| 120 |
|
| 121 |
# Process audio response
|
| 122 |
+
if response.headers.get('Content-Type', '').startswith('audio/'):
|
| 123 |
audio_data = response.content # Raw binary audio data
|
| 124 |
+
logger.info(f'Received audio data from Hume ({len(response.content)} bytes).')
|
| 125 |
return audio_data
|
| 126 |
|
| 127 |
# Unexpected content type
|
| 128 |
+
raise HumeException(f'Unexpected Content-Type: {response.headers.get("Content-Type", "Unknown")}')
|
| 129 |
|
| 130 |
except requests.exceptions.RequestException as e:
|
| 131 |
+
logger.exception('Request to Hume TTS API failed.')
|
| 132 |
raise HumeException(
|
| 133 |
+
message=f'Failed to communicate with Hume TTS API: {e}',
|
| 134 |
original_exception=e,
|
| 135 |
)
|
| 136 |
except Exception as e:
|
| 137 |
+
logger.exception('Request to Hume TTS API failed.')
|
| 138 |
raise HumeException(
|
| 139 |
message=f"Unexpected error while processing the Hume TTS response: {e}",
|
| 140 |
original_exception=e,
|
src/sample_prompts.py
CHANGED
|
@@ -6,26 +6,26 @@ These prompts are structured to highlight different aspects of emotional tone, p
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
SAMPLE_PROMPTS = {
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
|
| 18 |
"🐱 Whimsical Children's Story (Talking Cat)":
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
}
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
SAMPLE_PROMPTS = {
|
| 9 |
+
'🚀 Dramatic Monologue (Stranded Astronaut)':
|
| 10 |
+
'Write a short dramatic monologue from a lone astronaut stranded on Mars, '
|
| 11 |
+
'speaking to mission control for the last time. The tone should be reflective, '
|
| 12 |
+
'filled with awe and resignation as they describe the Martian landscape and their final thoughts.',
|
| 13 |
|
| 14 |
+
'📜 Poetic Sonnet (The Passage of Time)':
|
| 15 |
+
'Compose a sonnet about the passage of time, using vivid imagery and a flowing, melodic rhythm. '
|
| 16 |
+
'The poem should contrast fleeting moments with eternity, capturing both beauty and melancholy.',
|
| 17 |
|
| 18 |
"🐱 Whimsical Children's Story (Talking Cat)":
|
| 19 |
+
'Tell a short bedtime story about a mischievous talking cat who sneaks into a grand wizard’s library '
|
| 20 |
+
'at night and accidentally casts a spell that brings the books to life. '
|
| 21 |
+
'Make the tone playful, whimsical, and filled with wonder.',
|
| 22 |
|
| 23 |
+
'🔥 Intense Speech (Freedom & Justice)':
|
| 24 |
+
'Write a powerful speech delivered by a rebel leader rallying their people against a tyrant. '
|
| 25 |
+
'The speech should be passionate, filled with urgency and conviction, calling for freedom and justice.',
|
| 26 |
|
| 27 |
+
'👻 Mysterious Horror Scene (Haunted Lighthouse)':
|
| 28 |
+
'Describe a chilling ghostly encounter in an abandoned lighthouse on a foggy night. '
|
| 29 |
+
'The protagonist, alone and cold, begins hearing whispers from the shadows, '
|
| 30 |
+
'telling them secrets they were never meant to know.'
|
| 31 |
}
|
src/utils.py
CHANGED
|
@@ -40,14 +40,14 @@ def truncate_text(text: str, max_length: int = 50) -> str:
|
|
| 40 |
''
|
| 41 |
"""
|
| 42 |
if max_length <= 0:
|
| 43 |
-
logger.warning(f
|
| 44 |
-
return
|
| 45 |
|
| 46 |
is_truncated = len(text) > max_length
|
| 47 |
if is_truncated:
|
| 48 |
-
logger.debug(f
|
| 49 |
|
| 50 |
-
return text[:max_length] + (
|
| 51 |
|
| 52 |
|
| 53 |
def validate_env_var(var_name: str) -> str:
|
|
@@ -74,9 +74,9 @@ def validate_env_var(var_name: str) -> str:
|
|
| 74 |
...
|
| 75 |
ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
|
| 76 |
"""
|
| 77 |
-
value = os.environ.get(var_name,
|
| 78 |
if not value:
|
| 79 |
-
raise ValueError(f
|
| 80 |
return value
|
| 81 |
|
| 82 |
|
|
@@ -99,21 +99,21 @@ def validate_prompt_length(prompt: str, max_length: int, min_length: int) -> Non
|
|
| 99 |
>>> validate_prompt_length("", max_length=500, min_length=1)
|
| 100 |
# Raises ValueError: "Prompt must be at least 1 character(s) long."
|
| 101 |
"""
|
| 102 |
-
logger.debug(f
|
| 103 |
|
| 104 |
# Check if prompt is empty or too short
|
| 105 |
stripped_prompt = prompt.strip()
|
| 106 |
if len(stripped_prompt) < min_length:
|
| 107 |
raise ValueError(
|
| 108 |
-
f
|
| 109 |
-
f
|
| 110 |
)
|
| 111 |
|
| 112 |
# Check if prompt is too long
|
| 113 |
if len(stripped_prompt) > max_length:
|
| 114 |
raise ValueError(
|
| 115 |
-
f
|
| 116 |
-
f
|
| 117 |
)
|
| 118 |
|
| 119 |
-
logger.debug(f
|
|
|
|
| 40 |
''
|
| 41 |
"""
|
| 42 |
if max_length <= 0:
|
| 43 |
+
logger.warning(f'Invalid max_length={max_length}. Returning empty string.')
|
| 44 |
+
return ''
|
| 45 |
|
| 46 |
is_truncated = len(text) > max_length
|
| 47 |
if is_truncated:
|
| 48 |
+
logger.debug(f'Truncated text to {max_length} characters.')
|
| 49 |
|
| 50 |
+
return text[:max_length] + ('...' if is_truncated else '')
|
| 51 |
|
| 52 |
|
| 53 |
def validate_env_var(var_name: str) -> str:
|
|
|
|
| 74 |
...
|
| 75 |
ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
|
| 76 |
"""
|
| 77 |
+
value = os.environ.get(var_name, '')
|
| 78 |
if not value:
|
| 79 |
+
raise ValueError(f'{var_name} is not set. Please ensure it is defined in your environment variables.')
|
| 80 |
return value
|
| 81 |
|
| 82 |
|
|
|
|
| 99 |
>>> validate_prompt_length("", max_length=500, min_length=1)
|
| 100 |
# Raises ValueError: "Prompt must be at least 1 character(s) long."
|
| 101 |
"""
|
| 102 |
+
logger.debug(f'Prompt length being validated: {len(prompt)} characters')
|
| 103 |
|
| 104 |
# Check if prompt is empty or too short
|
| 105 |
stripped_prompt = prompt.strip()
|
| 106 |
if len(stripped_prompt) < min_length:
|
| 107 |
raise ValueError(
|
| 108 |
+
f'Prompt must be at least {min_length} character(s) long. '
|
| 109 |
+
f'Received only {len(stripped_prompt)}.'
|
| 110 |
)
|
| 111 |
|
| 112 |
# Check if prompt is too long
|
| 113 |
if len(stripped_prompt) > max_length:
|
| 114 |
raise ValueError(
|
| 115 |
+
f'The prompt exceeds the maximum allowed length of {max_length} characters. '
|
| 116 |
+
f'Your prompt contains {len(stripped_prompt)} characters.'
|
| 117 |
)
|
| 118 |
|
| 119 |
+
logger.debug(f'Prompt length validation passed for prompt: {truncate_text(stripped_prompt)}')
|