# %%writefile smart_breed_filter.py """ Smart Breed Filter - 智慧品種過濾系統 設計原則: 1. 只對「真正危害用戶」的情況進行干預 2. 無傷大雅的偏好差異維持原有評分邏輯 3. 所有規則基於通用性設計,不針對特定品種硬編碼 危害類型: - 安全風險:幼童 + 高風險行為特徵 - 生活品質嚴重影響:噪音零容忍 + 焦慮/警戒吠叫品種 """ from typing import Dict, List, Tuple, Optional, Set from dataclasses import dataclass from breed_noise_info import breed_noise_info @dataclass class UserPriorityContext: """用戶優先級上下文""" noise_intolerance: bool = False # 噪音零容忍 has_young_children: bool = False # 有幼童 is_beginner: bool = False # 新手 is_senior: bool = False # 老年人 priority_dimensions: Dict[str, str] = None # 各維度優先級 def __post_init__(self): if self.priority_dimensions is None: self.priority_dimensions = {} class PriorityParser: """ 優先級語意解析器 識別用戶是否對某些維度有「絕對需求」vs「一般偏好」 只在用戶明確強調時才觸發嚴格約束 """ # 絕對需求信號詞 ABSOLUTE_SIGNALS = [ 'most importantly', 'absolutely need', 'must have', 'essential', 'critical', 'cannot', "can't", 'no way', 'zero tolerance', 'very noise sensitive', 'neighbors complain', 'thin walls' ] # 主要需求信號詞 PRIMARY_SIGNALS = [ 'first', 'primarily', 'main priority', 'most important', 'first priority', 'number one' ] # 維度關鍵詞 DIMENSION_KEYWORDS = { 'noise': ['quiet', 'noise', 'bark', 'silent', 'neighbors', 'thin walls', 'apartment noise', 'loud', 'vocal'], 'children': ['kids', 'children', 'child', 'toddler', 'baby', 'infant', 'young kids', 'aged 1', 'aged 2', 'aged 3', 'aged 4', 'aged 5', 'preschool'], 'exercise': ['active', 'exercise', 'running', 'hiking', 'energetic', 'athletic', 'jogging', 'outdoor activities'], 'grooming': ['maintenance', 'grooming', 'shedding', 'brush', 'coat', 'low maintenance', 'easy care'], } def parse(self, user_input: str) -> UserPriorityContext: """解析用戶輸入,提取優先級上下文""" text = user_input.lower() context = UserPriorityContext() # 檢測噪音零容忍 context.noise_intolerance = self._detect_noise_intolerance(text) # 檢測是否有幼童 context.has_young_children = self._detect_young_children(text) # 檢測各維度優先級 context.priority_dimensions = self._detect_dimension_priorities(text) return context def _detect_noise_intolerance(self, text: str) -> bool: """ 檢測噪音零容忍 只有當用戶明確表達噪音是嚴重問題時才觸發 例如:thin walls, neighbors complain, noise sensitive neighbors """ # 強烈噪音敏感信號 strong_signals = [ 'thin walls', 'noise sensitive', 'neighbors complain', 'zero tolerance', 'cannot bark', "can't bark", 'absolutely quiet', 'must be quiet', 'noise restriction' ] # 需要同時出現「噪音相關詞」+「強調詞」 noise_words = ['quiet', 'noise', 'bark', 'silent', 'loud'] emphasis_words = ['most importantly', 'absolutely', 'must', 'essential', 'critical', 'very', 'extremely', 'cannot', "can't"] # 檢查強烈信號 if any(signal in text for signal in strong_signals): return True # 檢查組合:噪音詞 + 強調詞 has_noise_word = any(w in text for w in noise_words) has_emphasis = any(w in text for w in emphasis_words) return has_noise_word and has_emphasis def _detect_young_children(self, text: str) -> bool: """ 檢測是否有幼童或一般兒童 對於兒童安全,我們採取保守策略: - 明確提到 kids/children 就視為有兒童風險需要考慮 - 因為牧羊本能的 nipping 對任何年齡兒童都有風險 """ # 任何提到兒童的情況都需要考慮安全 child_signals = [ 'kids', 'children', 'child', 'toddler', 'baby', 'infant', 'young kids', 'young children', 'aged 1', 'aged 2', 'aged 3', 'aged 4', 'aged 5', '1 year', '2 year', '3 year', '4 year', '5 year', 'preschool', 'newborn', 'family with' ] return any(signal in text for signal in child_signals) def _detect_dimension_priorities(self, text: str) -> Dict[str, str]: """檢測各維度的優先級""" priorities = {} for dimension, keywords in self.DIMENSION_KEYWORDS.items(): if any(kw in text for kw in keywords): # 檢查是否有絕對需求信號 if any(signal in text for signal in self.ABSOLUTE_SIGNALS): # 檢查信號是否與該維度相關(在附近) for signal in self.ABSOLUTE_SIGNALS: if signal in text: signal_pos = text.find(signal) for kw in keywords: if kw in text: kw_pos = text.find(kw) # 如果信號詞和維度關鍵詞距離在50字符內 if abs(signal_pos - kw_pos) < 80: priorities[dimension] = 'ABSOLUTE' break if dimension in priorities: break # 檢查是否有主要需求信號 if dimension not in priorities: if any(signal in text for signal in self.PRIMARY_SIGNALS): priorities[dimension] = 'PRIMARY' else: priorities[dimension] = 'PREFERENCE' return priorities class BreedRiskAnalyzer: """ 品種風險分析器 只分析「真正的危害風險」,不對一般偏好差異進行干預 """ # 焦慮相關觸發詞(會導致持續吠叫的真正問題) ANXIETY_TRIGGERS = ['anxiety', 'separation anxiety', 'loneliness'] # 高警戒觸發詞(會導致頻繁吠叫) HIGH_ALERT_TRIGGERS = ['stranger alerts', 'strangers approaching', 'suspicious activity', 'territorial defense', 'protecting territory'] # 牧羊/追逐本能(對幼童有 nipping 風險) HERDING_INDICATORS = ['herding instincts', 'herding', 'nipping'] # 獵物驅動(可能追逐小孩) PREY_DRIVE_INDICATORS = ['prey drive', 'prey sighting', 'chase'] def analyze_noise_risk(self, breed_info: Dict, noise_info: Dict) -> Dict: """ 分析品種的噪音風險 只標記「真正會造成問題」的品種: - 有焦慮吠叫傾向(持續性問題) - 高度警戒吠叫(頻繁問題) 不標記: - 偶爾興奮吠叫(正常狗行為) - 打招呼吠叫(短暫且可控) """ noise_notes = noise_info.get('noise_notes', '').lower() noise_level = noise_info.get('noise_level', 'Moderate').lower() temperament = breed_info.get('Temperament', '').lower() risk_factors = [] # 1. 焦慮觸發 - 這是真正的問題(持續性吠叫) has_anxiety = any(t in noise_notes for t in self.ANXIETY_TRIGGERS) if has_anxiety: risk_factors.append('anxiety_barking') # 2. 高度警戒 - 頻繁吠叫風險 has_high_alert = any(t in noise_notes for t in self.HIGH_ALERT_TRIGGERS) if has_high_alert: risk_factors.append('high_alert_barking') # 3. 敏感性格 + 焦慮觸發的組合(更嚴重) is_sensitive = 'sensitive' in temperament if is_sensitive and has_anxiety: risk_factors.append('sensitive_anxiety_combo') # 4. 基礎噪音等級高 if noise_level in ['high', 'moderate-high', 'moderate to high']: risk_factors.append('high_base_noise') # 計算風險等級 # 只有真正問題的組合才是 HIGH if 'sensitive_anxiety_combo' in risk_factors: risk_level = 'HIGH' elif 'anxiety_barking' in risk_factors and 'high_alert_barking' in risk_factors: risk_level = 'HIGH' elif 'anxiety_barking' in risk_factors or len(risk_factors) >= 2: risk_level = 'MODERATE' elif len(risk_factors) >= 1: risk_level = 'LOW' else: risk_level = 'NONE' return { 'risk_level': risk_level, 'risk_factors': risk_factors } def analyze_child_safety_risk(self, breed_info: Dict, noise_info: Dict) -> Dict: """ 分析品種對幼童的安全風險 只標記「真正的安全風險」: - 牧羊本能(nipping 風險) - 高獵物驅動 + 大體型(追逐風險) - Good with Children = No 且有其他風險因素 不標記: - 只是體型大但性格溫和 - 活力高但無追逐/牧羊本能 """ temperament = breed_info.get('Temperament', '').lower() description = breed_info.get('Description', '').lower() noise_notes = noise_info.get('noise_notes', '').lower() size = breed_info.get('Size', '').lower() good_with_children = breed_info.get('Good with Children', 'Yes') exercise = breed_info.get('Exercise Needs', '').lower() risk_factors = [] # 1. 牧羊本能 - 真正的 nipping 風險 has_herding = any(ind in noise_notes or ind in description for ind in self.HERDING_INDICATORS) if has_herding: risk_factors.append('herding_instinct') # 2. 獵物驅動 - 追逐風險 has_prey_drive = any(ind in noise_notes or ind in description for ind in self.PREY_DRIVE_INDICATORS) if has_prey_drive: risk_factors.append('prey_drive') # 3. Good with Children = No 是強烈信號 if good_with_children == 'No': risk_factors.append('not_child_friendly') # 4. 大體型 + 高驅動 + 牧羊/獵物本能的組合才是風險 is_large = size in ['large', 'giant'] is_very_high_energy = 'very high' in exercise if is_large and (has_herding or has_prey_drive) and is_very_high_energy: risk_factors.append('large_high_drive_instinct') # 計算風險等級 # 只有真正危險的組合才是 HIGH if 'not_child_friendly' in risk_factors and len(risk_factors) >= 2: risk_level = 'HIGH' elif 'large_high_drive_instinct' in risk_factors: risk_level = 'HIGH' elif 'herding_instinct' in risk_factors and is_very_high_energy: # 牧羊本能 + 高能量 = 對兒童的真正風險(nipping + 控制不住) risk_level = 'HIGH' elif 'herding_instinct' in risk_factors or 'prey_drive' in risk_factors: # 單獨的牧羊或獵物本能仍是中等風險 risk_level = 'MODERATE' elif 'not_child_friendly' in risk_factors: risk_level = 'MODERATE' elif len(risk_factors) >= 1: risk_level = 'LOW' else: risk_level = 'NONE' return { 'risk_level': risk_level, 'risk_factors': risk_factors } class SmartBreedFilter: """ 智慧品種過濾器 整合優先級解析和風險分析,只對真正危害用戶的情況進行干預 """ def __init__(self): self.priority_parser = PriorityParser() self.risk_analyzer = BreedRiskAnalyzer() def analyze_user_context(self, user_input: str) -> UserPriorityContext: """分析用戶輸入,提取優先級上下文""" return self.priority_parser.parse(user_input) def should_exclude_breed(self, breed_info: Dict, noise_info: Dict, user_context: UserPriorityContext) -> Tuple[bool, str]: """ 判斷是否應該排除該品種 返回: (是否排除, 排除原因) """ # 1. 噪音零容忍 + 高噪音風險 if user_context.noise_intolerance: noise_risk = self.risk_analyzer.analyze_noise_risk(breed_info, noise_info) if noise_risk['risk_level'] == 'HIGH': return True, f"High noise risk ({', '.join(noise_risk['risk_factors'])}) conflicts with noise intolerance" # 2. 有幼童 + 高兒童安全風險 if user_context.has_young_children: child_risk = self.risk_analyzer.analyze_child_safety_risk(breed_info, noise_info) if child_risk['risk_level'] == 'HIGH': return True, f"Child safety risk ({', '.join(child_risk['risk_factors'])}) with young children" return False, "" def calculate_risk_penalty(self, breed_info: Dict, noise_info: Dict, user_context: UserPriorityContext) -> float: """ 計算風險懲罰分數 只對中等風險進行輕微降權,不排除 返回: 懲罰係數 (0.0 - 0.3) """ penalty = 0.0 # 噪音相關懲罰(只在用戶關注噪音時) if 'noise' in user_context.priority_dimensions: noise_risk = self.risk_analyzer.analyze_noise_risk(breed_info, noise_info) if noise_risk['risk_level'] == 'MODERATE': penalty += 0.1 elif noise_risk['risk_level'] == 'HIGH' and not user_context.noise_intolerance: penalty += 0.15 # 兒童安全相關懲罰(只在用戶有孩子時) if 'children' in user_context.priority_dimensions or user_context.has_young_children: child_risk = self.risk_analyzer.analyze_child_safety_risk(breed_info, noise_info) if child_risk['risk_level'] == 'MODERATE': penalty += 0.1 elif child_risk['risk_level'] == 'HIGH' and not user_context.has_young_children: penalty += 0.15 return min(penalty, 0.3) # 最大懲罰 30% def filter_and_adjust_recommendations(self, recommendations: List[Dict], user_input: str) -> List[Dict]: """ 過濾並調整推薦結果 這是主要入口函數,整合所有過濾和調整邏輯 """ user_context = self.analyze_user_context(user_input) filtered_recommendations = [] for rec in recommendations: breed = rec.get('breed', '') # 智能獲取品種資訊:優先從 info 欄位,否則從 rec 本身,最後從資料庫 breed_info = rec.get('info') if not breed_info: # 嘗試從 rec 中構建標準化的 breed_info(處理大小寫差異) breed_info = { 'Temperament': rec.get('Temperament', rec.get('temperament', '')), 'Description': rec.get('Description', rec.get('description', '')), 'Size': rec.get('Size', rec.get('size', '')), 'Exercise Needs': rec.get('Exercise Needs', rec.get('exercise_needs', '')), 'Good with Children': rec.get('Good with Children', rec.get('good_with_children', 'Yes')), 'Care Level': rec.get('Care Level', rec.get('care_level', '')), } # 如果關鍵資訊缺失,從資料庫獲取 if not breed_info['Temperament'] and not breed_info['Description']: from dog_database import get_dog_description db_info = get_dog_description(breed.replace(' ', '_')) if db_info: breed_info = db_info # 獲取噪音資訊(嘗試兩種品種名稱格式) noise_info = breed_noise_info.get(breed) or breed_noise_info.get(breed.replace(' ', '_'), { 'noise_notes': '', 'noise_level': 'Moderate' }) # 檢查是否應該排除 should_exclude, reason = self.should_exclude_breed( breed_info, noise_info, user_context ) if should_exclude: print(f" [SmartFilter] Excluded {breed}: {reason}") continue # 計算風險懲罰 penalty = self.calculate_risk_penalty(breed_info, noise_info, user_context) if penalty > 0: original_score = rec.get('final_score', rec.get('overall_score', 0.8)) adjusted_score = original_score * (1 - penalty) rec['final_score'] = adjusted_score rec['risk_penalty'] = penalty filtered_recommendations.append(rec) # 重新排序 filtered_recommendations.sort(key=lambda x: -x.get('final_score', 0)) # 更新排名 for i, rec in enumerate(filtered_recommendations): rec['rank'] = i + 1 return filtered_recommendations # 模組級便捷函數 _smart_filter = None def get_smart_filter() -> SmartBreedFilter: """獲取單例過濾器""" global _smart_filter if _smart_filter is None: _smart_filter = SmartBreedFilter() return _smart_filter def apply_smart_filtering(recommendations: List[Dict], user_input: str) -> List[Dict]: """便捷函數:應用智慧過濾""" return get_smart_filter().filter_and_adjust_recommendations(recommendations, user_input)