import wx
import os
import re
import json
import threading
from pathlib import Path
from rapidfuzz import fuzz, process
from typing import List, Tuple, Any, Optional


# --- Engine ---

class TextProcessor:
    def __init__(self, file_path: str):
        self.file_path = Path(file_path)
        self.text_bytes = None
        self.decoded_text = None
        self.char_to_byte = None
        self.load_and_process_file()

    def load_and_process_file(self):
        try:
            self.text_bytes = self.file_path.read_bytes()
            self.decoded_text = self.text_bytes.decode("utf-8", errors="surrogateescape")
            self._build_char_to_byte_mapping()
        except Exception as e:
            raise RuntimeError(f"Failed to read file {self.file_path}: {str(e)}")

    def _build_char_to_byte_mapping(self):
        self.char_to_byte = [0]
        for ch in self.decoded_text:
            self.char_to_byte.append(self.char_to_byte[-1] + len(ch.encode("utf-8", errors="surrogateescape")))


class Match:
    def __init__(self, pattern, text, start_char, end_char):
        self.pattern = pattern
        self.text = text
        self.start_char = start_char
        self.end_char = end_char
        self.byte_start = None
        self.byte_end = None

    def set_byte_positions(self, char_to_byte_map):
        self.byte_start = char_to_byte_map[self.start_char]
        self.byte_end = char_to_byte_map[self.end_char]


class SnippetExtractor:
    # Pre-compiled regex patterns for performance
    _regex_cache = {}

    # -------------
    # wildcard part
    # -------------
    @staticmethod
    def wildcards_to_regex(pattern: str) -> str:
        """
        Convert wildcard pattern to regex with caching.
        - '?'  → matches exactly one character of any type
        - '*'  → matches zero or more non-whitespace chars
        """
        try:
            # Use cache for better performance
            if pattern in SnippetExtractor._regex_cache:
                return SnippetExtractor._regex_cache[pattern]
            
            regex_parts = []
            i = 0
            while i < len(pattern):
                ch = pattern[i]
                if ch == '?':
                    regex_parts.append('.')
                    i += 1
                elif ch == '*':
                    regex_parts.append(r'(?:\S*)')
                    i += 1
                else:
                    regex_parts.append(re.escape(ch))
                    i += 1

            result = "".join(regex_parts)
            SnippetExtractor._regex_cache[pattern] = result
            return result
        except Exception as e:
            raise RuntimeError(f"Failed to convert wildcard pattern '{pattern}' to regex: {str(e)}")


    @staticmethod
    # hanlde wildcard pattern '?' and '*'
    def expand_to_word_boundaries(text: str, start_char: int, end_char: int, pattern: str):
        """
        Expand match boundaries depending on '*' position.
        """
        try:
            # Exact match for '?' only patterns
            if '?' in pattern and '*' not in pattern:
                return text[start_char:end_char], start_char, end_char

            expanded_start = start_char
            expanded_end = end_char

            if '*' in pattern:
                if pattern.startswith('*') and not pattern.endswith('*'):
                    # expand LEFT until whitespace - optimized with backward search
                    while expanded_start > 0 and not text[expanded_start - 1].isspace():
                        expanded_start -= 1

                elif pattern.endswith('*') and not pattern.startswith('*'):
                    # expand RIGHT until whitespace - optimized forward search
                    while expanded_end < len(text) and not text[expanded_end].isspace():
                        expanded_end += 1

                else:
                    # '*' is inside → expand both sides until visible character
                    if expanded_start > 0:
                        expanded_start -= 1
                    if expanded_end < len(text):
                        expanded_end += 1

            return text[expanded_start:expanded_end], expanded_start, expanded_end

        except Exception as e:
            raise RuntimeError(f"Failed to expand word boundaries for pattern '{pattern}': {str(e)}")


    # find wildcard matches
    @staticmethod
    def find_matches(patterns, decoded_text: str, char_to_byte_map):
        """
        Find all matches. These are also passed on to fuzzy match.
        """	
        try:
            matches = []
            
            # Pre-compile all patterns once - cached version
            compiled_patterns = {}
            for pattern in patterns:
                if not pattern:
                    continue
                if '*' in pattern or '?' in pattern:
                    regex_pattern = SnippetExtractor.wildcards_to_regex(pattern)
                    compiled_patterns[pattern] = re.compile(regex_pattern, re.IGNORECASE | re.DOTALL)
                else:
                    escaped_pattern = re.escape(pattern)
                    regex_pattern = r'\b' + escaped_pattern + r'\b'
                    compiled_patterns[pattern] = re.compile(regex_pattern, re.IGNORECASE)

            for pattern, compiled_pattern in compiled_patterns.items():
                try:
                    # Check stop event before each iteration
                    for match in compiled_pattern.finditer(decoded_text):
                        start_pos, end_pos = match.start(), match.end()
                        match_text = decoded_text[start_pos:end_pos]

                        if '*' in pattern or '?' in pattern:
                            expanded_match_text, expanded_start, expanded_end = SnippetExtractor.expand_to_word_boundaries(
                                decoded_text, start_pos, end_pos, pattern
                            )
                            match_text = expanded_match_text
                            start_pos = expanded_start
                            end_pos = expanded_end

                        match_obj = Match(pattern, match_text, start_pos, end_pos)
                        match_obj.set_byte_positions(char_to_byte_map)
                        matches.append(match_obj)
                except re.error as e:
                    raise RuntimeError(f"Regex compilation error for pattern '{pattern}': {str(e)}")

            return matches
        except Exception as e:
            raise RuntimeError(f"Failed to find matches: {str(e)}")


    # distance check of all found matches
    @staticmethod
    def filter_by_distance(matches, distance: int, buzzwords):
        """
        filter matches by distance limit given by user input.
        """
        try:
            if not matches:
                return []
            
            # Use sets for faster membership checks and avoid redundant lookups
            pattern_positions = {word: set() for word in buzzwords}
            for m in matches:
                if m.pattern in pattern_positions:
                    pattern_positions[m.pattern].add((m.start_char, m.end_char))
                    
            if any(not pos_set for pos_set in pattern_positions.values()):
                return []
                
            combined_spans = []
            first_word = list(buzzwords)[0]
            
            for start1, end1 in pattern_positions[first_word]:
                span_candidates = [(start1, end1)]
                for other_word in buzzwords:
                    if other_word == first_word:
                        continue
                    best_match = None
                    min_distance = float('inf')
                    
                    # Direct set iteration - much faster than list lookup
                    for start2, end2 in pattern_positions[other_word]:
                        dist = abs(start1 - start2)
                        if dist <= distance and dist < min_distance:
                            min_distance = dist
                            best_match = (start2, end2)
                            
                    if best_match:
                        span_candidates.append(best_match)
                        
                if len(span_candidates) == len(buzzwords):
                    min_pos = min(s for s, _ in span_candidates)
                    max_pos = max(e for _, e in span_candidates)
                    combined_spans.append((min_pos, max_pos))
                    
            return combined_spans
        except Exception as e:
            raise RuntimeError(f"Failed to filter by distance: {str(e)}")

    # snippet extraction, pre_ratio and post_ratio given from user
    @staticmethod
    def extract_snippets(matches, snippet_size, pre_ratio, post_ratio, decoded_text):
        try:
            snippets = []
            for start, end in matches:
                pre_chars = int(snippet_size * pre_ratio)
                post_chars = int(snippet_size * post_ratio)
                snippet_start = max(0, start - pre_chars)
                snippet_end = min(len(decoded_text), end + post_chars)
                snippets.append((snippet_start, snippet_end))
            return snippets
        except Exception as e:
            raise RuntimeError(f"Failed to extract snippets: {str(e)}")
    
    # merge snippet if overlapping
    @staticmethod
    def merge_snippets(snippets):
        try:
            if not snippets:
                return [], 0
                
            total_snippets = len(snippets)
            
            # Sort once instead of repeatedly during merging
            sorted_snippets = sorted(snippets, key=lambda x: x[0])
            merged = [sorted_snippets[0]]
            
            for current in sorted_snippets[1:]:
                last_end = merged[-1][1]
                if current[0] <= last_end:
                    # Fast merge - no need to check all previous ones
                    merged[-1] = (merged[-1][0], max(last_end, current[1]))
                else:
                    merged.append(current)
                    
            return merged, total_snippets
        except Exception as e:
            raise RuntimeError(f"Failed to merge snippets: {str(e)}")


    # ----------
    # Fuzzy part
    # ----------
    # use results of wildcard find_matches for fuzzy search
    @staticmethod
    def find_fuzzy_matches(decoded_text: str, wildcard_matches: List[Match], threshold: float, stop_event=None):
        """
        Search the entire text using matches from wildcard search as fuzzily searched words.
        Returns list of tuples (match_start, match_end, score, original_word) where score >= threshold.
        """
        try:
            fuzzy_results = []
            
            # Get all unique texts from wildcard matches to use as buzzwords
            buzzwords = [match.text for match in wildcard_matches if match.text.strip()]
            
            if not buzzwords:
                return fuzzy_results
                
            # Use rapidfuzz.process.extract for efficient fuzzy matching
            # Process each word in the text against our buzzwords
            words = decoded_text.split()
            processed_words = []
            
            # Create a list of (word, start_pos, end_pos) tuples to track positions
            current_pos = 0
            for word in words:
                if stop_event and stop_event.is_set():
                    raise RuntimeError("Fuzzy search was aborted")
                    
                # Find exact position of this word in original text
                try:
                    pos = decoded_text.index(word, current_pos)
                    processed_words.append((word, pos, pos + len(word)))
                    current_pos = pos + len(word)
                except ValueError:
                    # Word not found - skip it
                    continue
            
            # For each word in the document, check fuzzy matches against our buzzwords
            for word, start_pos, end_pos in processed_words:
                if stop_event and stop_event.is_set():
                    raise RuntimeError("Fuzzy search was aborted")
                    
                # Find best match among buzzwords using rapidfuzz
                try:
                    # Get top match with score >= threshold
                    matches = process.extract(
                        word, 
                        buzzwords, 
                        limit=1,
                        scorer=fuzz.ratio,
                        score_cutoff=threshold
                    )
                    
                    if matches and len(matches) > 0:
                        best_match_text, score, _ = matches[0]
                        # Add the position of this match in original text + the actual word that was matched
                        fuzzy_results.append((start_pos, end_pos, score, word))
                        
                except Exception as e:
                    # Continue with other words if one fails
                    continue
                    
            return fuzzy_results
            
        except Exception as e:
            raise RuntimeError(f"Failed to find fuzzy matches: {str(e)}")



    # filter by distance for fuzzy matches if "AND
    # filter by distance for fuzzy matches - NEW IMPLEMENTATION
    @staticmethod
    def filter_by_distance_fuzzy(fuzzy_matches, distance_threshold):
        """
        Filter fuzzy matches requiring all buzzwords within distance threshold.
        Groups matching words together and only keeps groups where all required 
        buzzwords appear within the specified distance.
        
        Args:
            fuzzy_matches: List of tuples (start_pos, end_pos, score, original_word)
            distance_threshold: Maximum character distance between matches
            
        Returns:
            List of filtered fuzzy match tuples
        """
        try:
            if not fuzzy_matches:
                return []
                
            # Group matches by their original word (buzzword)
            word_groups = {}
            for start, end, score, word in fuzzy_matches:
                if word not in word_groups:
                    word_groups[word] = []
                word_groups[word].append((start, end, score))
            
            # Debugging output
            print(f"DEBUG: Processing {len(word_groups)} unique words from fuzzy matches")
            for word, positions in word_groups.items():
                print(f"  Word '{word}': {len(positions)} matches at positions {[pos[0] for pos in positions]}")
            
            # Get all buzzwords that were actually found
            found_buzzwords = list(word_groups.keys())
            
            if len(found_buzzwords) < 2:
                print("DEBUG: Only one unique word found - returning all matches")
                return fuzzy_matches
            
            # For multiple words, create sliding windows to find valid groups
            # This approach checks each possible combination of positions for different words
            results = []
            
            # Sort all positions by start position to make grouping easier
            all_positions = []
            for word, pos_list in word_groups.items():
                for start, end, score in pos_list:
                    all_positions.append((start, end, score, word))
            
            all_positions.sort(key=lambda x: x[0])  # Sort by start position
            
            print(f"DEBUG: Total positions to process: {len(all_positions)}")
            
            # Try to find groups where multiple buzzwords appear within distance
            i = 0
            while i < len(all_positions):
                current_start = all_positions[i][0]
                current_end = all_positions[i][1]
                
                # Create a window around this position
                window_end = current_start + distance_threshold
                
                # Collect all words in this window
                window_words = {}
                j = i
                while j < len(all_positions) and all_positions[j][0] <= window_end:
                    pos_start, pos_end, score, word = all_positions[j]
                    if word not in window_words:
                        window_words[word] = []
                    window_words[word].append((pos_start, pos_end, score))
                    j += 1
                
                # Check if we have matches for ALL required buzzwords
                if len(window_words) >= 2:  # At least two different words found together
                    # For now, just return all the original matches from this window
                    # This is a simpler approach - you could get more sophisticated later
                    print(f"DEBUG: Found group with {len(window_words)} words in range [{current_start}, {window_end}]")
                    for word, positions in window_words.items():
                        print(f"  Word '{word}': {[pos[0] for pos in positions]}")
                    
                    # Add all matches from this valid window
                    for word, positions in window_words.items():
                        for start, end, score in positions:
                            results.append((start, end, score, word))
                else:
                    print(f"DEBUG: Window [{current_start}, {window_end}] only had {len(window_words)} unique words")
                
                i = j
            
            # Remove duplicates while preserving order
            seen = set()
            final_results = []
            for item in results:
                if item not in seen:
                    seen.add(item)
                    final_results.append(item)
            
            print(f"DEBUG: Final filtered results count: {len(final_results)}")
            return final_results
            
        except Exception as e:
            raise RuntimeError(f"Failed to filter fuzzy matches by distance: {str(e)}")





    # extract fuzzy snippets
    @staticmethod
    def extract_snippets_fuzzy(matches, snippet_size, pre_ratio, post_ratio, decoded_text):
        """
        Extract snippets from fuzzy matches.
        """
        try:
            snippets = []
            for start, end, score, original_word in matches:
                # Apply ratio-based padding to include more context
                pre_chars = int(snippet_size * pre_ratio)
                post_chars = int(snippet_size * post_ratio)
                snippet_start = max(0, start - pre_chars)
                snippet_end = min(len(decoded_text), end + post_chars)
                
                snippets.append((snippet_start, snippet_end, score, original_word))
            return snippets
        except Exception as e:
            raise RuntimeError(f"Failed to extract fuzzy snippets: {str(e)}")

    

    # merge fuzzy snippets
    @staticmethod
    def merge_snippets_fuzzy(snippets):
        """
        Merge overlapping or adjacent fuzzy snippets.
        """
        try:
            if not snippets:
                return [], 0
                
            total_snippets = len(snippets)
            
            # Sort by start position
            sorted_snippets = sorted(snippets, key=lambda x: x[0])
            merged = [sorted_snippets[0]]
            
            for current in sorted_snippets[1:]:
                last_end = merged[-1][1]
                
                if current[0] <= last_end:
                    # Merge overlapping or adjacent snippets
                    new_start = merged[-1][0]
                    new_end = max(last_end, current[1])
                    
                    # Update the score to be average of both scores (or keep highest)
                    avg_score = (merged[-1][2] + current[2]) / 2.0
                    
                    merged[-1] = (new_start, new_end, avg_score, merged[-1][3])  # Keep original word from first
                else:
                    merged.append(current)
                    
            return merged, total_snippets
        except Exception as e:
            raise RuntimeError(f"Failed to merge fuzzy snippets: {str(e)}")




# --- Main search function ---

def run_search_for_file(file_path: str, config: dict, stop_event: threading.Event):
    """
    Run search for a single file. Writes output_snippets.txt and output_fuzzy_snippets.txt.
    Returns (wildcard_text, fuzzy_text) strings for UI display.
    Optimized version with faster operations.
    """
    try:
        processor = TextProcessor(file_path)
        buzzwords = [bw for bw in config.get("buzzwords", []) if bw.strip()]

        # Use set for filter_by_distance membership but keep list for order preservation
        buzzwords_set = list(dict.fromkeys(buzzwords))  # unique preserving order

        # wildcard-part - optimized
        all_matches = SnippetExtractor.find_matches(
            buzzwords_set,
            processor.decoded_text,
            processor.char_to_byte
        )

        if config.get("search_type", "AND") == "AND":
            final_matches = SnippetExtractor.filter_by_distance(
                all_matches,
                config.get("distance_match", 100),
                buzzwords_set
            )
        else:
            final_matches = [(m.start_char, m.end_char) for m in all_matches]

        snippets = SnippetExtractor.extract_snippets(
            final_matches,
            config.get("snippet_size", 2000),
            config.get("pre_ratio", 0.3),
            config.get("post_ratio", 0.7),
            processor.decoded_text
        )

        merged_snippets, total_snippets = SnippetExtractor.merge_snippets(snippets)

        # Build wildcard textual output - optimized with pre-calculated values
        wildcard_blocks = []
        for idx, (start, end) in enumerate(merged_snippets):
            if stop_event.is_set():
                raise RuntimeError("Search was aborted")

            s_b = processor.char_to_byte[start]
            e_b = processor.char_to_byte[end]
            snippet_bytes = processor.text_bytes[s_b:e_b]
            snippet_text = snippet_bytes.decode("utf-8", errors="surrogateescape")
            cleaned = re.sub(r'\s+', ' ', snippet_text) # without \n and \r

            # Find first match
            match_text = None
            byte_start = None
            for m in all_matches:
                if start <= m.start_char and end >= m.end_char:
                    match_text = m.text
                    byte_start = m.byte_start
                    break

            block = [
                {"Excerpt": idx + 1},
                {"Match Text": match_text},
                {"Start position, match_text": byte_start},
                {"Content": cleaned},
            ]
            wildcard_blocks.append(json.dumps(block, ensure_ascii=False, indent=1))

        wildcard_text = "\n\n".join(wildcard_blocks)

        # fuzzy part, similar approach like wildcard
        ft = config.get("fuzzy_threshold", 96)
        if not isinstance(ft, (int, float)) or not (0 <= ft <= 100):
            ft = 96.0  # default threshold

        # Use all wildcard matches as input for fuzzy search
        fuzzy_matches = SnippetExtractor.find_fuzzy_matches(
            processor.decoded_text,
            all_matches,
            ft
        )

        if config.get("search_type", "AND") == "AND":
            if len(buzzwords) > 1:
                filtered_fuzzy_matches = SnippetExtractor.filter_by_distance_fuzzy(
                    fuzzy_matches,
                    config.get("distance_match", 100)
                )
            else:
                # fallback to OR behavior when only one buzzword
                filtered_fuzzy_matches = fuzzy_matches
        else:
            filtered_fuzzy_matches = fuzzy_matches

        # Extract snippets for fuzzy matches
        fuzzy_snippets = SnippetExtractor.extract_snippets_fuzzy(
            filtered_fuzzy_matches,
            config.get("snippet_size", 2000),
            config.get("pre_ratio", 0.3),
            config.get("post_ratio", 0.7),
            processor.decoded_text
        )

        # Merge fuzzy snippets
        merged_fuzzy_snippets, total_fuzzy_snippets = SnippetExtractor.merge_snippets_fuzzy(fuzzy_snippets)

        # Build fuzzy textual output - now with actual matched text and byte positions
        fuzzy_blocks = []
        for idx, (start, end, score, original_word) in enumerate(merged_fuzzy_snippets):
            if stop_event.is_set():
                raise RuntimeError("Search was aborted")

            s_b = processor.char_to_byte[start]
            e_b = processor.char_to_byte[end]
            snippet_bytes = processor.text_bytes[s_b:e_b]
            snippet_text = snippet_bytes.decode("utf-8", errors="surrogateescape")
            cleaned_snippet = re.sub(r'\s+', ' ', snippet_text) # without \n and \r

            # Get the actual byte start position of the matched word in the original file
            match_byte_start = None
            for fm in fuzzy_matches:  # Use original fuzzy_matches, not filtered_fuzzy_matches
                if fm[3] == original_word and fm[0] >= start and fm[1] <= end:
                    # Found the exact fuzzy match that corresponds to this merged snippet
                    match_byte_start = processor.char_to_byte[fm[0]]
                    break

            block = [
                {"Excerpt": idx + 1},
                {"Match Text": original_word},  # Show the actual word that was matched
                {"Score": score},
                {"Start Byte Position": match_byte_start},  # Add byte position to JSON output
                {"Content": cleaned_snippet},
            ]
            fuzzy_blocks.append(json.dumps(block, ensure_ascii=False, indent=1))


        fuzzy_text = "\n\n".join(fuzzy_blocks)

        return wildcard_text, fuzzy_text
    except Exception as e:
        raise RuntimeError(f"Search failed for file {file_path}: {str(e)}")

# ---
# GUI
# ---

class SearchThread(threading.Thread):
    def __init__(self, paths, config, stop_event, on_complete):
        super().__init__()
        self.paths = paths
        self.config = config
        self.stop_event = stop_event
        self.on_complete = on_complete  # callback(wildcard_text, fuzzy_text, finished_ok)

    def run(self):
        try:
            agg_wild = []
            agg_fuzzy = []
            for p in self.paths:
                if self.stop_event.is_set():
                    self.on_complete("", "", False)
                    return
                try:
                    w, f = run_search_for_file(p, self.config, self.stop_event)
                    agg_wild.append(w)
                    agg_fuzzy.append(f)
                except Exception as e:
                    # If one file fails, continue with others but report the error
                    if not self.stop_event.is_set():  # Only show error if not aborted
                        self.on_complete(f"ERROR processing {p}: {str(e)}", f"ERROR processing {p}: {str(e)}", False)
                        return
            wildcard_text = "\n\n--- FILE BOUNDARY ---\n\n".join(agg_wild)
            fuzzy_text = "\n\n--- FILE BOUNDARY ---\n\n".join(agg_fuzzy)
            self.on_complete(wildcard_text, fuzzy_text, True)
        except Exception as e:
            # Handle exceptions in the thread itself
            self.on_complete(f"THREAD ERROR: {str(e)}", f"THREAD ERROR: {str(e)}", False)

class MainFrame(wx.Frame):
    def __init__(self):
        super().__init__(None, title="Text Search by Sevenof9 (v3_alpha)", size=(1200, 1000))
        panel = wx.Panel(self)

        # Top: file / dir pickers and right-side label for chosen path
        top_sizer = wx.BoxSizer(wx.HORIZONTAL)
        self.file_picker = wx.FilePickerCtrl(panel, style=wx.FLP_OPEN | wx.FLP_FILE_MUST_EXIST)
        self.dir_picker = wx.DirPickerCtrl(panel)
        self.path_label = wx.StaticText(panel, label="No file/folder selected")

        top_sizer.Add(self.file_picker, 0, wx.ALL | wx.ALIGN_LEFT, 4)
        top_sizer.Add(self.dir_picker, 0, wx.ALL | wx.ALIGN_LEFT, 4)
        top_sizer.Add(self.path_label, 0, wx.ALL | wx.ALIGN_LEFT, 6)

        # Middle: left = buzzwords (4 fields with AND/OR buttons between), right = controls/config
        middle_sizer = wx.BoxSizer(wx.HORIZONTAL)

        # Left: buzzwords area
        buzz_sizer = wx.BoxSizer(wx.VERTICAL)
        self.buzz_inputs = []
        self.toggle_buttons = []
        for i in range(4):
            txt = wx.TextCtrl(panel, size=(250, -1))
            self.buzz_inputs.append(txt)
            buzz_sizer.Add(txt, 0, wx.ALL | wx.ALIGN_LEFT, 2)
            if i < 3:
                btn = wx.Button(panel, label="AND", size=(80, 24))
                btn.Bind(wx.EVT_BUTTON, self.on_toggle)
                self.toggle_buttons.append(btn)
                buzz_sizer.Add(btn, 0, wx.ALL | wx.ALIGN_LEFT, 2)

        middle_sizer.Add(buzz_sizer, 0, wx.ALL | wx.ALIGN_LEFT, 6)

        # Right: controls and config
        ctrl_sizer = wx.BoxSizer(wx.VERTICAL)

        # Start / Abort
        self.start_button = wx.Button(panel, label="Start Search")
        self.abort_button = wx.Button(panel, label="Abort")
        self.abort_button.Disable()
        self.start_button.Bind(wx.EVT_BUTTON, self.on_start)
        self.abort_button.Bind(wx.EVT_BUTTON, self.on_abort)
        ctrl_sizer.Add(self.start_button, 0, wx.ALL | wx.ALIGN_LEFT, 4)
        ctrl_sizer.Add(self.abort_button, 0, wx.ALL | wx.ALIGN_LEFT, 4)

        # Config fields
        self.cfg_fields = {}
        defaults = [("snippet_size", "2000"),
                    ("pre_ratio", "0.3"),
                    ("post_ratio", "0.7"),
                    ("distance_match", "300"),
                    ("fuzzy_threshold", "96")]
        for label, val in defaults:
            row = wx.BoxSizer(wx.HORIZONTAL)
            lbl = wx.StaticText(panel, label=label + ":")
            fld = wx.TextCtrl(panel, value=val, size=(50, -1))
            # Bind focus event for validation
            fld.Bind(wx.EVT_KILL_FOCUS, self.on_field_focus_lost)
            row.Add(lbl, 0, wx.ALL | wx.ALIGN_LEFT, 2)
            row.Add(fld, 0, wx.ALL | wx.ALIGN_LEFT, 2)
            ctrl_sizer.Add(row, 0, wx.ALL | wx.ALIGN_LEFT, 2)
            self.cfg_fields[label] = fld

        middle_sizer.Add(ctrl_sizer, 0, wx.ALL | wx.ALIGN_LEFT, 6)

        # Bottom: results (wildcard and fuzzy) across full width
        result_sizer = wx.BoxSizer(wx.VERTICAL)
        result_sizer.Add(wx.StaticText(panel, label="Wildcard Results (output_snippets.txt):"), 0, wx.ALL | wx.ALIGN_LEFT, 2)
        self.wildcard_box = wx.TextCtrl(panel, style=wx.TE_MULTILINE | wx.TE_READONLY, size=(-1, 220))
        result_sizer.Add(self.wildcard_box, 1, wx.EXPAND | wx.ALL, 4)
        result_sizer.Add(wx.StaticText(panel, label="Fuzzy Results (output_fuzzy_snippets.txt):"), 0, wx.ALL | wx.ALIGN_LEFT, 2)
        self.fuzzy_box = wx.TextCtrl(panel, style=wx.TE_MULTILINE | wx.TE_READONLY, size=(-1, 220))
        result_sizer.Add(self.fuzzy_box, 1, wx.EXPAND | wx.ALL, 4)

        # Main vertical layout using only horizontal alignment flags where appropriate
        main_sizer = wx.BoxSizer(wx.VERTICAL)
        main_sizer.Add(top_sizer, 0, wx.ALL | wx.ALIGN_LEFT, 6)
        main_sizer.Add(middle_sizer, 0, wx.ALL | wx.ALIGN_LEFT, 6)
        main_sizer.Add(result_sizer, 1, wx.EXPAND | wx.ALL, 6)

        panel.SetSizer(main_sizer)

        # Events
        self.file_picker.Bind(wx.EVT_FILEPICKER_CHANGED, self.on_path_change)
        self.dir_picker.Bind(wx.EVT_DIRPICKER_CHANGED, self.on_path_change)

        # Thread controls
        self.worker = None
        self.stop_event = threading.Event()

    def on_field_focus_lost(self, evt):
        """Validate all fields when any field loses focus"""
        self.validate_all_fields()
        evt.Skip()  # Allow normal processing to continue

    def validate_all_fields(self):
        """Validate all configuration fields and enforce dependencies"""
        try:
            # Get current values
            snippet_size_val = self.cfg_fields["snippet_size"].GetValue().strip()
            pre_ratio_val = self.cfg_fields["pre_ratio"].GetValue().strip()
            post_ratio_val = self.cfg_fields["post_ratio"].GetValue().strip()
            distance_match_val = self.cfg_fields["distance_match"].GetValue().strip()
            fuzzy_threshold_val = self.cfg_fields["fuzzy_threshold"].GetValue().strip()

            # Default values if empty
            snippet_size_val = snippet_size_val if snippet_size_val else "2000"
            pre_ratio_val = pre_ratio_val if pre_ratio_val else "0.3"
            post_ratio_val = post_ratio_val if post_ratio_val else "0.7"
            distance_match_val = distance_match_val if distance_match_val else "300"
            fuzzy_threshold_val = fuzzy_threshold_val if fuzzy_threshold_val else "96"

            # Validate and process each field
            # snippet_size: min=0, max=999999, round to integer
            snippet_size = int(float(snippet_size_val)) if snippet_size_val else 2000
            snippet_size = max(0, min(999999, snippet_size))

            # pre_ratio: min=0.1, max=0.9, 1 decimal place
            pre_ratio = round(float(pre_ratio_val), 1) if pre_ratio_val else 0.3
            pre_ratio = max(0.1, min(0.9, pre_ratio))

            # post_ratio: min=0.1, max=0.9, 1 decimal place
            post_ratio = round(float(post_ratio_val), 1) if post_ratio_val else 0.7
            post_ratio = max(0.1, min(0.9, post_ratio))

            # Ensure pre + post = 1 (adjust one to maintain sum)
            total = pre_ratio + post_ratio
            if abs(total - 1.0) > 0.001:  # Allow small floating point differences
                # Adjust post_ratio to make the sum equal to 1.0
                post_ratio = round(1.0 - pre_ratio, 1)
                self.cfg_fields["post_ratio"].SetValue(str(post_ratio))

            # distance_match: min=0, max=snippet_size, round to integer
            distance_match = int(float(distance_match_val)) if distance_match_val else 300
            distance_match = max(0, min(snippet_size, distance_match))

            # If snippet_size < distance_match, adjust snippet_size to match
            if snippet_size < distance_match:
                snippet_size = distance_match
                self.cfg_fields["snippet_size"].SetValue(str(snippet_size))

            # fuzzy_threshold: min=1, max=100, round to integer
            fuzzy_threshold = int(float(fuzzy_threshold_val)) if fuzzy_threshold_val else 96
            fuzzy_threshold = max(1, min(100, fuzzy_threshold))

            # Apply validated values back to fields
            self.cfg_fields["snippet_size"].SetValue(str(snippet_size))
            self.cfg_fields["pre_ratio"].SetValue(str(pre_ratio))
            self.cfg_fields["post_ratio"].SetValue(str(post_ratio))
            self.cfg_fields["distance_match"].SetValue(str(distance_match))
            self.cfg_fields["fuzzy_threshold"].SetValue(str(fuzzy_threshold))

        except Exception as e:
            # If validation fails, show error but don't block the user
            wx.MessageBox(f"Validation Error: {str(e)}", "Error")

    def on_path_change(self, evt):
        path = evt.GetPath()
        self.path_label.SetLabel(path)

    def on_toggle(self, evt):
        btn = evt.GetEventObject()
        label = btn.GetLabel()
        if label == "AND":
            btn.SetLabel("OR")
        else:
            btn.SetLabel("AND")

    def on_abort(self, evt):
        """Abort button now properly stops all processes"""
        if self.worker and self.worker.is_alive():
            # Set the stop event to signal all running operations to abort
            self.stop_event.set()

            # Disable buttons immediately
            self.abort_button.Disable()
            self.start_button.Enable()

            # Clear any text that might have been set during processing
            wx.CallAfter(self.wildcard_box.SetValue, "Aborting...")
            wx.CallAfter(self.fuzzy_box.SetValue, "Aborting...")

    def on_start(self, evt):
        # get path
        path = self.path_label.GetLabel()
        if not path or path == "No file/folder selected":
            wx.MessageBox("Please select a file or folder first.", "Error")
            return

        try:
            if os.path.isdir(path):
                txts = [str(Path(path) / f) for f in sorted(os.listdir(path))
                        if f.lower().endswith(".txt") and os.path.isfile(os.path.join(path, f))]
                if not txts:
                    wx.MessageBox("Selected folder contains no .txt files.", "Error")
                    return
                paths = txts
            else:
                if not os.path.isfile(path):
                    wx.MessageBox("Selected path is not a file.", "Error")
                    return
                # Only allow .txt files - this validation was missing before
                if not path.lower().endswith(".txt"):
                    wx.MessageBox("Please select a .txt file.", "Error")
                    return
                paths = [path]
        except Exception as e:
            wx.MessageBox(f"Failed to access path: {str(e)}", "Error")
            return

        # prepare config
        try:
            cfg = {
                "snippet_size": int(self.cfg_fields["snippet_size"].GetValue().strip()),
                "pre_ratio": float(self.cfg_fields["pre_ratio"].GetValue().strip()),
                "post_ratio": float(self.cfg_fields["post_ratio"].GetValue().strip()),
                "distance_match": int(self.cfg_fields["distance_match"].GetValue().strip()),
                "fuzzy_threshold": float(self.cfg_fields["fuzzy_threshold"].GetValue().strip()),
            }
        except Exception:
            wx.MessageBox("Please check numeric configuration values.", "Error")
            return

        buzzwords = [t.GetValue().strip() for t in self.buzz_inputs]
        cfg["buzzwords"] = buzzwords
        cfg["search_type"] = "AND" if self.toggle_buttons[0].GetLabel() == "AND" else "OR"

        # UI state
        self.start_button.Disable()
        self.abort_button.Enable()
        self.wildcard_box.SetValue("Running...")
        self.fuzzy_box.SetValue("Running...")

        # reset stop_event and start thread
        self.stop_event.clear()

        # Overwrite output files at the beginning of each new search
        Path("output_snippets.txt").write_text("", encoding="utf-8", errors="surrogateescape")
        Path("output_fuzzy_snippets.txt").write_text("", encoding="utf-8", errors="surrogateescape")

        self.worker = SearchThread(paths, cfg, self.stop_event, self.on_search_complete)
        self.worker.start()

    def on_search_complete(self, wildcard_text, fuzzy_text, finished_ok):
        # This callback runs in worker thread; must marshal to main GUI thread
        def _update():
            if finished_ok:
                self.wildcard_box.SetValue(wildcard_text)
                self.fuzzy_box.SetValue(fuzzy_text)

                # Append results to output files for each processed file
                Path("output_snippets.txt").write_text(wildcard_text, encoding="utf-8", errors="surrogateescape")
                Path("output_fuzzy_snippets.txt").write_text(fuzzy_text, encoding="utf-8", errors="surrogateescape")

            else:
                # signals either error or aborted
                self.wildcard_box.SetValue(wildcard_text or "Aborted / Error")
                self.fuzzy_box.SetValue(fuzzy_text or "Aborted / Error")
            self.stop_event.clear()
            self.start_button.Enable()
            self.abort_button.Disable()

        wx.CallAfter(_update)

if __name__ == "__main__":
    app = wx.App(False)
    frame = MainFrame()
    frame.Show()
    app.MainLoop()