Update app.py
Browse files
app.py
CHANGED
|
@@ -127,29 +127,15 @@ def ai_detect_language(text: str) -> Dict[str, Any]:
|
|
| 127 |
'detection_method': 'unicode_detection'
|
| 128 |
}
|
| 129 |
|
| 130 |
-
#
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
sync_translator = Translator()
|
| 140 |
-
detection_result = sync_translator.detect(text)
|
| 141 |
-
|
| 142 |
-
# Handle both old and new googletrans response formats
|
| 143 |
-
if hasattr(detection_result, 'lang'):
|
| 144 |
-
detected_lang = detection_result.lang
|
| 145 |
-
confidence = getattr(detection_result, 'confidence', 0.8)
|
| 146 |
-
elif isinstance(detection_result, dict):
|
| 147 |
-
detected_lang = detection_result.get('lang', 'en')
|
| 148 |
-
confidence = detection_result.get('confidence', 0.8)
|
| 149 |
-
else:
|
| 150 |
-
logger.error(f"Unexpected detection result format: {type(detection_result)}")
|
| 151 |
-
detected_lang = 'en'
|
| 152 |
-
confidence = 0.5
|
| 153 |
|
| 154 |
# Check if it's romanized Sinhala based on content analysis
|
| 155 |
is_romanized_sinhala = (
|
|
@@ -338,58 +324,19 @@ def rule_based_transliterate_singlish_to_sinhala(text: str) -> str:
|
|
| 338 |
return text # Return original text if transliteration fails
|
| 339 |
|
| 340 |
def translate_text(text: str, target_language: str = 'en') -> str:
|
| 341 |
-
"""
|
| 342 |
try:
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
# Try multiple approaches for translation
|
| 348 |
-
for attempt in range(3):
|
| 349 |
-
try:
|
| 350 |
-
# Attempt 1: Direct translation
|
| 351 |
-
if attempt == 0:
|
| 352 |
-
result = translator.translate(text, dest=target_language)
|
| 353 |
-
# Attempt 2: Reinitialize translator
|
| 354 |
-
elif attempt == 1:
|
| 355 |
-
logger.info("Reinitializing translator for attempt 2")
|
| 356 |
-
from googletrans import Translator
|
| 357 |
-
new_translator = Translator()
|
| 358 |
-
result = new_translator.translate(text, dest=target_language)
|
| 359 |
-
# Attempt 3: Use a different service endpoint
|
| 360 |
-
else:
|
| 361 |
-
logger.info("Using alternative translator service for attempt 3")
|
| 362 |
-
from googletrans import Translator
|
| 363 |
-
alt_translator = Translator(service_urls=['translate.google.com'])
|
| 364 |
-
result = alt_translator.translate(text, dest=target_language)
|
| 365 |
-
|
| 366 |
-
# Handle async results
|
| 367 |
-
if hasattr(result, '__await__'):
|
| 368 |
-
logger.warning(f"Attempt {attempt + 1}: Got coroutine, skipping")
|
| 369 |
-
continue
|
| 370 |
-
|
| 371 |
-
# Extract text from result
|
| 372 |
-
if hasattr(result, 'text'):
|
| 373 |
-
translated_text = result.text
|
| 374 |
-
logger.info(f"Successfully translated on attempt {attempt + 1}")
|
| 375 |
-
return translated_text
|
| 376 |
-
elif isinstance(result, dict) and 'text' in result:
|
| 377 |
-
translated_text = result['text']
|
| 378 |
-
logger.info(f"Successfully translated on attempt {attempt + 1}")
|
| 379 |
-
return translated_text
|
| 380 |
-
else:
|
| 381 |
-
logger.warning(f"Attempt {attempt + 1}: Unexpected result format: {type(result)}")
|
| 382 |
-
continue
|
| 383 |
-
|
| 384 |
-
except Exception as attempt_error:
|
| 385 |
-
logger.warning(f"Translation attempt {attempt + 1} failed: {attempt_error}")
|
| 386 |
-
if attempt == 2: # Last attempt
|
| 387 |
-
raise attempt_error
|
| 388 |
-
continue
|
| 389 |
|
| 390 |
-
|
| 391 |
-
logger.error("All translation attempts failed")
|
| 392 |
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
|
| 394 |
except Exception as e:
|
| 395 |
logger.error(f"Translation error: {e}")
|
|
@@ -665,12 +612,12 @@ def generate_response_with_rag(user_message: str, session_id: str) -> Dict[str,
|
|
| 665 |
# Create a prompt with conversation history and retrieved context
|
| 666 |
language_instruction = ""
|
| 667 |
if original_language == 'si':
|
| 668 |
-
language_instruction = "\n\nIMPORTANT: The user asked in Sinhala. Please respond in Sinhala using proper Sinhala script and formal language appropriate for policy discussions."
|
| 669 |
elif original_language == 'singlish':
|
| 670 |
if transliteration_used:
|
| 671 |
-
language_instruction = "\n\nNote: The user used Romanized Sinhala (transliterated via Swabhasha). Please respond in Sinhala using proper Sinhala script and formal language appropriate for policy discussions."
|
| 672 |
else:
|
| 673 |
-
language_instruction = "\n\nNote: The user used Singlish (Sinhala words in English letters). You may respond in English but consider using some familiar Sri Lankan terminology where appropriate."
|
| 674 |
|
| 675 |
prompt = f"""You are a helpful assistant for budget proposals in Sri Lanka. You can communicate in English, Sinhala, and understand Singlish.
|
| 676 |
|
|
|
|
| 127 |
'detection_method': 'unicode_detection'
|
| 128 |
}
|
| 129 |
|
| 130 |
+
# TEMPORARY: Skip Google Translate detection due to async issues
|
| 131 |
+
# Use enhanced rule-based detection instead
|
| 132 |
+
logger.info("Skipping Google Translate detection (async issue), using enhanced rule-based")
|
| 133 |
+
return enhanced_rule_based_detection(text)
|
| 134 |
+
|
| 135 |
+
# TODO: Implement working language detection:
|
| 136 |
+
# - Use a different language detection library
|
| 137 |
+
# - Call Google Translate API directly with requests
|
| 138 |
+
# - Use the working parts of enhanced_rule_based_detection
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
# Check if it's romanized Sinhala based on content analysis
|
| 141 |
is_romanized_sinhala = (
|
|
|
|
| 324 |
return text # Return original text if transliteration fails
|
| 325 |
|
| 326 |
def translate_text(text: str, target_language: str = 'en') -> str:
|
| 327 |
+
"""Temporary workaround: Skip translation due to async issues in HF environment"""
|
| 328 |
try:
|
| 329 |
+
# TEMPORARY: Due to googletrans async issues in HF environment,
|
| 330 |
+
# we'll skip translation and return original text
|
| 331 |
+
# The chatbot can still work in the original language
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
|
| 333 |
+
logger.info(f"Translation skipped (async issue): '{text}' -> returning original")
|
|
|
|
| 334 |
return text
|
| 335 |
+
|
| 336 |
+
# TODO: Replace with a working translation service:
|
| 337 |
+
# - Use requests to call Google Translate API directly
|
| 338 |
+
# - Use a different translation library
|
| 339 |
+
# - Implement custom translation logic
|
| 340 |
|
| 341 |
except Exception as e:
|
| 342 |
logger.error(f"Translation error: {e}")
|
|
|
|
| 612 |
# Create a prompt with conversation history and retrieved context
|
| 613 |
language_instruction = ""
|
| 614 |
if original_language == 'si':
|
| 615 |
+
language_instruction = "\n\nIMPORTANT: The user asked in Sinhala. Please respond in the same language (Sinhala) using proper Sinhala script and formal language appropriate for policy discussions. The question was: '{}'".format(user_message)
|
| 616 |
elif original_language == 'singlish':
|
| 617 |
if transliteration_used:
|
| 618 |
+
language_instruction = "\n\nNote: The user used Romanized Sinhala (transliterated via Swabhasha). Please respond in Sinhala using proper Sinhala script and formal language appropriate for policy discussions. The original question was: '{}'".format(user_message)
|
| 619 |
else:
|
| 620 |
+
language_instruction = "\n\nNote: The user used Singlish (Sinhala words in English letters). You may respond in English but consider using some familiar Sri Lankan terminology where appropriate. The original question was: '{}'".format(user_message)
|
| 621 |
|
| 622 |
prompt = f"""You are a helpful assistant for budget proposals in Sri Lanka. You can communicate in English, Sinhala, and understand Singlish.
|
| 623 |
|