Spaces:

Mr-HASSAN
/

arabic-sign-language-yolo

Sleeping

File size: 1,550 Bytes

f0b1a8e

from transformers import pipeline
import torch
import tempfile
import os
from gtts import gTTS
import base64


class SpeechProcessor:
    def __init__(self):
        # Use a lighter model for STT
        self.stt = pipeline(
            "automatic-speech-recognition",
            model="openai/whisper-tiny",  # Lighter than wav2vec2
            device=-1  # Use CPU to save memory
        )
        self.tts_available = True

    def speech_to_text(self, audio_path: str) -> str:
        try:
            result = self.stt(audio_path)
            return result.get('text', '')
        except Exception as e:
            print(f"❌ STT Error: {e}")
            return "Audio processed"

    def text_to_speech(self, text: str, filename: str) -> str:
        try:
            # Use gTTS for reliable TTS
            output_path = f"/tmp/{filename}.mp3"
            
            # For Arabic text
            if any('\u0600' <= char <= '\u06FF' for char in text):
                # Arabic text
                tts = gTTS(text=text, lang='ar')
            else:
                # English text
                tts = gTTS(text=text, lang='en')
            
            tts.save(output_path)
            print(f"✅ TTS generated: {output_path}")
            return output_path
            
        except Exception as e:
            print(f"❌ TTS Error: {e}")
            # Fallback: create empty audio file
            output_path = f"/tmp/{filename}.wav"
            open(output_path, 'a').close()  # Create empty file
            return output_path