Spaces:
Sleeping
Sleeping
File size: 1,550 Bytes
f0b1a8e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
from transformers import pipeline
import torch
import tempfile
import os
from gtts import gTTS
import base64
class SpeechProcessor:
def __init__(self):
# Use a lighter model for STT
self.stt = pipeline(
"automatic-speech-recognition",
model="openai/whisper-tiny", # Lighter than wav2vec2
device=-1 # Use CPU to save memory
)
self.tts_available = True
def speech_to_text(self, audio_path: str) -> str:
try:
result = self.stt(audio_path)
return result.get('text', '')
except Exception as e:
print(f"β STT Error: {e}")
return "Audio processed"
def text_to_speech(self, text: str, filename: str) -> str:
try:
# Use gTTS for reliable TTS
output_path = f"/tmp/{filename}.mp3"
# For Arabic text
if any('\u0600' <= char <= '\u06FF' for char in text):
# Arabic text
tts = gTTS(text=text, lang='ar')
else:
# English text
tts = gTTS(text=text, lang='en')
tts.save(output_path)
print(f"β
TTS generated: {output_path}")
return output_path
except Exception as e:
print(f"β TTS Error: {e}")
# Fallback: create empty audio file
output_path = f"/tmp/{filename}.wav"
open(output_path, 'a').close() # Create empty file
return output_path |