FocusFlowAI / voice.py
avaliev's picture
Initial commit
0491e54 verified
raw
history blame
6.39 kB
"""
ElevenLabs Voice Integration for FocusFlow.
Provides optional voice feedback for focus agent and Pomodoro timer.
Gracefully falls back to text-only mode if API key is missing or quota exceeded.
"""
import os
import tempfile
from typing import Optional, Dict
from pathlib import Path
class VoiceGenerator:
"""
Handles text-to-speech generation using ElevenLabs API.
Designed for graceful degradation - never crashes if voice unavailable.
"""
def __init__(self):
"""Initialize ElevenLabs client if API key available."""
self.client = None
self.available = False
self.voice_id = "JBFqnCBsd6RMkjVDRZzb" # George - friendly, clear voice
self.model_id = "eleven_turbo_v2_5" # Fast, low-latency model
try:
# Check for API key (demo key first, then user key)
api_key = os.getenv("DEMO_ELEVEN_API_KEY") or os.getenv("ELEVEN_API_KEY")
if not api_key:
print("ℹ️ ElevenLabs: No API key found. Voice feedback disabled (text-only mode).")
return
# Try to initialize client
from elevenlabs.client import ElevenLabs
self.client = ElevenLabs(api_key=api_key)
self.available = True
key_type = "demo" if os.getenv("DEMO_ELEVEN_API_KEY") else "user"
print(f"✅ ElevenLabs voice initialized ({key_type} key)")
except ImportError:
print("⚠️ ElevenLabs: Package not installed. Run: pip install elevenlabs")
except Exception as e:
print(f"⚠️ ElevenLabs: Initialization failed: {e}")
def text_to_speech(self, text: str, emotion: str = "neutral") -> Optional[str]:
"""
Convert text to speech and return path to temporary audio file.
Args:
text: Text to convert to speech
emotion: Emotion hint (not used in current implementation)
Returns:
Path to temporary MP3 file, or None if voice unavailable
"""
# Check if voice is enabled globally
if os.getenv("VOICE_ENABLED", "true").lower() == "false":
return None
if not self.available or not self.client:
return None
try:
# Generate audio using ElevenLabs API
audio = self.client.text_to_speech.convert(
text=text,
voice_id=self.voice_id,
model_id=self.model_id,
output_format="mp3_44100_128"
)
# Convert generator/stream to bytes
audio_bytes = b"".join(audio)
# Save to temporary file (Gradio expects file path, not data URL)
temp_file = tempfile.NamedTemporaryFile(
delete=False,
suffix=".mp3",
prefix="focusflow_voice_"
)
temp_file.write(audio_bytes)
temp_file.close()
return temp_file.name
except Exception as e:
# Graceful degradation - log error but don't crash
print(f"⚠️ ElevenLabs: TTS failed: {e}")
return None
def get_focus_message_audio(self, verdict: str, message: str) -> Optional[str]:
"""
Generate voice feedback for focus check results.
Args:
verdict: "On Track", "Distracted", or "Idle"
message: Text message to speak
Returns:
Path to temporary audio file or None
"""
if not self.available:
return None
# Add emotion/tone based on verdict (for future voice modulation)
emotion_map = {
"On Track": "cheerful",
"Distracted": "concerned",
"Idle": "motivating"
}
emotion = emotion_map.get(verdict, "neutral")
return self.text_to_speech(message, emotion=emotion)
def get_pomodoro_audio(self, event_type: str) -> Optional[str]:
"""
Generate voice alerts for Pomodoro timer events.
Args:
event_type: "work_complete" or "break_complete"
Returns:
Path to temporary audio file or None
"""
if not self.available:
return None
messages = {
"work_complete": "Great work! Time for a 5-minute break. You've earned it!",
"break_complete": "Break's over! Let's get back to work and stay focused!"
}
message = messages.get(event_type, "Timer complete!")
return self.text_to_speech(message, emotion="cheerful")
def test_voice(self) -> Dict[str, any]:
"""
Test voice generation (for setup/debugging).
Returns:
Dict with status, message, and optional audio data
"""
if not self.available:
return {
"status": "unavailable",
"message": "Voice not available (no API key or initialization failed)",
"audio": None
}
try:
test_message = "Hello! FocusFlow voice is working perfectly!"
audio = self.text_to_speech(test_message)
if audio:
return {
"status": "success",
"message": "Voice test successful!",
"audio": audio
}
else:
return {
"status": "error",
"message": "Voice generation failed",
"audio": None
}
except Exception as e:
return {
"status": "error",
"message": f"Voice test failed: {str(e)}",
"audio": None
}
# Global voice generator instance
voice_generator = VoiceGenerator()
def get_voice_status() -> str:
"""
Get human-readable voice status for UI display.
Returns:
Status string like "✅ ElevenLabs Voice Enabled" or "ℹ️ Voice Disabled"
"""
if voice_generator.available:
return "✅ ElevenLabs Voice Enabled"
else:
return "ℹ️ Voice Disabled (text-only mode)"