Mr-HASSAN's picture
Update utils/speech.py
f0b1a8e verified
from transformers import pipeline
import torch
import tempfile
import os
from gtts import gTTS
import base64
class SpeechProcessor:
def __init__(self):
# Use a lighter model for STT
self.stt = pipeline(
"automatic-speech-recognition",
model="openai/whisper-tiny", # Lighter than wav2vec2
device=-1 # Use CPU to save memory
)
self.tts_available = True
def speech_to_text(self, audio_path: str) -> str:
try:
result = self.stt(audio_path)
return result.get('text', '')
except Exception as e:
print(f"❌ STT Error: {e}")
return "Audio processed"
def text_to_speech(self, text: str, filename: str) -> str:
try:
# Use gTTS for reliable TTS
output_path = f"/tmp/{filename}.mp3"
# For Arabic text
if any('\u0600' <= char <= '\u06FF' for char in text):
# Arabic text
tts = gTTS(text=text, lang='ar')
else:
# English text
tts = gTTS(text=text, lang='en')
tts.save(output_path)
print(f"βœ… TTS generated: {output_path}")
return output_path
except Exception as e:
print(f"❌ TTS Error: {e}")
# Fallback: create empty audio file
output_path = f"/tmp/{filename}.wav"
open(output_path, 'a').close() # Create empty file
return output_path