Spaces:

Mr-HASSAN
/

arabic-sign-language-yolo

Sleeping

Update utils/speech.py

f0b1a8e verified 24 days ago

1.55 kB

	from transformers import pipeline
	import torch
	import tempfile
	import os
	from gtts import gTTS
	import base64


	class SpeechProcessor:
	def __init__(self):
	# Use a lighter model for STT
	self.stt = pipeline(
	"automatic-speech-recognition",
	model="openai/whisper-tiny", # Lighter than wav2vec2
	device=-1 # Use CPU to save memory
	)
	self.tts_available = True

	def speech_to_text(self, audio_path: str) -> str:
	try:
	result = self.stt(audio_path)
	return result.get('text', '')
	except Exception as e:
	print(f"❌ STT Error: {e}")
	return "Audio processed"

	def text_to_speech(self, text: str, filename: str) -> str:
	try:
	# Use gTTS for reliable TTS
	output_path = f"/tmp/{filename}.mp3"

	# For Arabic text
	if any('\u0600' <= char <= '\u06FF' for char in text):
	# Arabic text
	tts = gTTS(text=text, lang='ar')
	else:
	# English text
	tts = gTTS(text=text, lang='en')

	tts.save(output_path)
	print(f"✅ TTS generated: {output_path}")
	return output_path

	except Exception as e:
	print(f"❌ TTS Error: {e}")
	# Fallback: create empty audio file
	output_path = f"/tmp/{filename}.wav"
	open(output_path, 'a').close() # Create empty file
	return output_path