from transformers import pipeline import torch import tempfile import os from gtts import gTTS import base64 class SpeechProcessor: def __init__(self): # Use a lighter model for STT self.stt = pipeline( "automatic-speech-recognition", model="openai/whisper-tiny", # Lighter than wav2vec2 device=-1 # Use CPU to save memory ) self.tts_available = True def speech_to_text(self, audio_path: str) -> str: try: result = self.stt(audio_path) return result.get('text', '') except Exception as e: print(f"❌ STT Error: {e}") return "Audio processed" def text_to_speech(self, text: str, filename: str) -> str: try: # Use gTTS for reliable TTS output_path = f"/tmp/{filename}.mp3" # For Arabic text if any('\u0600' <= char <= '\u06FF' for char in text): # Arabic text tts = gTTS(text=text, lang='ar') else: # English text tts = gTTS(text=text, lang='en') tts.save(output_path) print(f"✅ TTS generated: {output_path}") return output_path except Exception as e: print(f"❌ TTS Error: {e}") # Fallback: create empty audio file output_path = f"/tmp/{filename}.wav" open(output_path, 'a').close() # Create empty file return output_path