danulr05 commited on
Commit
6f7b77d
·
verified ·
1 Parent(s): 2216d7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -10
app.py CHANGED
@@ -637,7 +637,8 @@ def extract_sources_from_search_context(search_context: str, user_language: str
637
  found_files = set()
638
 
639
  # Pattern to match "From filename.pdf (category):" or "From filename.docx (category):"
640
- from_pattern = r'From\s+([^:\s]+\.(?:pdf|docx))\s*\('
 
641
  matches = re.findall(from_pattern, search_context)
642
 
643
  for match in matches:
@@ -650,38 +651,47 @@ def extract_sources_from_search_context(search_context: str, user_language: str
650
  if pdf in search_context:
651
  found_files.add(pdf)
652
 
653
- # Filter by user language preference
654
- language_filtered_files = set()
655
 
 
656
  for pdf in found_files:
657
  # Determine document language from filename
658
  doc_language = get_document_language(pdf)
659
 
660
  # Language matching logic
 
661
  if user_language == 'en' or user_language == 'singlish':
662
  # English users can see all documents, but prefer English versions
663
  if doc_language in ['en', 'english'] or user_language == 'singlish':
664
- language_filtered_files.add(pdf)
665
  elif not any(f for f in found_files if get_document_language(f) in ['en', 'english']):
666
  # If no English version available, show other languages
667
- language_filtered_files.add(pdf)
668
  elif user_language == 'si' or user_language == 'sinhala':
669
  # Sinhala users prefer Sinhala documents
670
  if doc_language in ['si', 'sinhala']:
671
- language_filtered_files.add(pdf)
672
  elif not any(f for f in found_files if get_document_language(f) in ['si', 'sinhala']):
673
  # If no Sinhala version available, show other languages
674
- language_filtered_files.add(pdf)
675
  elif user_language == 'ta' or user_language == 'tamil':
676
  # Tamil users prefer Tamil documents
677
  if doc_language in ['ta', 'tamil']:
678
- language_filtered_files.add(pdf)
679
  elif not any(f for f in found_files if get_document_language(f) in ['ta', 'tamil']):
680
  # If no Tamil version available, show other languages
681
- language_filtered_files.add(pdf)
682
  else:
683
  # Default: show all documents
684
- language_filtered_files.add(pdf)
 
 
 
 
 
 
 
685
 
686
  # Convert to list with short names
687
  for pdf in language_filtered_files:
 
637
  found_files = set()
638
 
639
  # Pattern to match "From filename.pdf (category):" or "From filename.docx (category):"
640
+ # Updated to handle assets/pdfs/ prefix and empty parentheses, and stop at the colon
641
+ from_pattern = r'From\s+assets/pdfs/([^:]+\.(?:pdf|docx))\s*\([^)]*\)'
642
  matches = re.findall(from_pattern, search_context)
643
 
644
  for match in matches:
 
651
  if pdf in search_context:
652
  found_files.add(pdf)
653
 
654
+ # Filter by user language preference and prioritize by relevance
655
+ language_filtered_files = []
656
 
657
+ # First, collect all language-appropriate documents
658
  for pdf in found_files:
659
  # Determine document language from filename
660
  doc_language = get_document_language(pdf)
661
 
662
  # Language matching logic
663
+ should_include = False
664
  if user_language == 'en' or user_language == 'singlish':
665
  # English users can see all documents, but prefer English versions
666
  if doc_language in ['en', 'english'] or user_language == 'singlish':
667
+ should_include = True
668
  elif not any(f for f in found_files if get_document_language(f) in ['en', 'english']):
669
  # If no English version available, show other languages
670
+ should_include = True
671
  elif user_language == 'si' or user_language == 'sinhala':
672
  # Sinhala users prefer Sinhala documents
673
  if doc_language in ['si', 'sinhala']:
674
+ should_include = True
675
  elif not any(f for f in found_files if get_document_language(f) in ['si', 'sinhala']):
676
  # If no Sinhala version available, show other languages
677
+ should_include = True
678
  elif user_language == 'ta' or user_language == 'tamil':
679
  # Tamil users prefer Tamil documents
680
  if doc_language in ['ta', 'tamil']:
681
+ should_include = True
682
  elif not any(f for f in found_files if get_document_language(f) in ['ta', 'tamil']):
683
  # If no Tamil version available, show other languages
684
+ should_include = True
685
  else:
686
  # Default: show all documents
687
+ should_include = True
688
+
689
+ if should_include:
690
+ language_filtered_files.append(pdf)
691
+
692
+ # Prioritize by relevance - take only the first (most relevant) document
693
+ if language_filtered_files:
694
+ language_filtered_files = [language_filtered_files[0]]
695
 
696
  # Convert to list with short names
697
  for pdf in language_filtered_files: