Spaces:
Running
on
Zero
Running
on
Zero
| # %%writefile smart_breed_filter.py | |
| """ | |
| Smart Breed Filter - 智慧品種過濾系統 | |
| 設計原則: | |
| 1. 只對「真正危害用戶」的情況進行干預 | |
| 2. 無傷大雅的偏好差異維持原有評分邏輯 | |
| 3. 所有規則基於通用性設計,不針對特定品種硬編碼 | |
| 危害類型: | |
| - 安全風險:幼童 + 高風險行為特徵 | |
| - 生活品質嚴重影響:噪音零容忍 + 焦慮/警戒吠叫品種 | |
| """ | |
| from typing import Dict, List, Tuple, Optional, Set | |
| from dataclasses import dataclass | |
| from breed_noise_info import breed_noise_info | |
| class UserPriorityContext: | |
| """用戶優先級上下文""" | |
| noise_intolerance: bool = False # 噪音零容忍 | |
| has_young_children: bool = False # 有幼童 | |
| is_beginner: bool = False # 新手 | |
| is_senior: bool = False # 老年人 | |
| priority_dimensions: Dict[str, str] = None # 各維度優先級 | |
| def __post_init__(self): | |
| if self.priority_dimensions is None: | |
| self.priority_dimensions = {} | |
| class PriorityParser: | |
| """ | |
| 優先級語意解析器 | |
| 識別用戶是否對某些維度有「絕對需求」vs「一般偏好」 | |
| 只在用戶明確強調時才觸發嚴格約束 | |
| """ | |
| # 絕對需求信號詞 | |
| ABSOLUTE_SIGNALS = [ | |
| 'most importantly', 'absolutely need', 'must have', 'essential', | |
| 'critical', 'cannot', "can't", 'no way', 'zero tolerance', | |
| 'very noise sensitive', 'neighbors complain', 'thin walls' | |
| ] | |
| # 主要需求信號詞 | |
| PRIMARY_SIGNALS = [ | |
| 'first', 'primarily', 'main priority', 'most important', | |
| 'first priority', 'number one' | |
| ] | |
| # 維度關鍵詞 | |
| DIMENSION_KEYWORDS = { | |
| 'noise': ['quiet', 'noise', 'bark', 'silent', 'neighbors', | |
| 'thin walls', 'apartment noise', 'loud', 'vocal'], | |
| 'children': ['kids', 'children', 'child', 'toddler', 'baby', | |
| 'infant', 'young kids', 'aged 1', 'aged 2', 'aged 3', | |
| 'aged 4', 'aged 5', 'preschool'], | |
| 'exercise': ['active', 'exercise', 'running', 'hiking', 'energetic', | |
| 'athletic', 'jogging', 'outdoor activities'], | |
| 'grooming': ['maintenance', 'grooming', 'shedding', 'brush', 'coat', | |
| 'low maintenance', 'easy care'], | |
| } | |
| def parse(self, user_input: str) -> UserPriorityContext: | |
| """解析用戶輸入,提取優先級上下文""" | |
| text = user_input.lower() | |
| context = UserPriorityContext() | |
| # 檢測噪音零容忍 | |
| context.noise_intolerance = self._detect_noise_intolerance(text) | |
| # 檢測是否有幼童 | |
| context.has_young_children = self._detect_young_children(text) | |
| # 檢測各維度優先級 | |
| context.priority_dimensions = self._detect_dimension_priorities(text) | |
| return context | |
| def _detect_noise_intolerance(self, text: str) -> bool: | |
| """ | |
| 檢測噪音零容忍 | |
| 只有當用戶明確表達噪音是嚴重問題時才觸發 | |
| 例如:thin walls, neighbors complain, noise sensitive neighbors | |
| """ | |
| # 強烈噪音敏感信號 | |
| strong_signals = [ | |
| 'thin walls', 'noise sensitive', 'neighbors complain', | |
| 'zero tolerance', 'cannot bark', "can't bark", | |
| 'absolutely quiet', 'must be quiet', 'noise restriction' | |
| ] | |
| # 需要同時出現「噪音相關詞」+「強調詞」 | |
| noise_words = ['quiet', 'noise', 'bark', 'silent', 'loud'] | |
| emphasis_words = ['most importantly', 'absolutely', 'must', 'essential', | |
| 'critical', 'very', 'extremely', 'cannot', "can't"] | |
| # 檢查強烈信號 | |
| if any(signal in text for signal in strong_signals): | |
| return True | |
| # 檢查組合:噪音詞 + 強調詞 | |
| has_noise_word = any(w in text for w in noise_words) | |
| has_emphasis = any(w in text for w in emphasis_words) | |
| return has_noise_word and has_emphasis | |
| def _detect_young_children(self, text: str) -> bool: | |
| """ | |
| 檢測是否有幼童或一般兒童 | |
| 對於兒童安全,我們採取保守策略: | |
| - 明確提到 kids/children 就視為有兒童風險需要考慮 | |
| - 因為牧羊本能的 nipping 對任何年齡兒童都有風險 | |
| """ | |
| # 任何提到兒童的情況都需要考慮安全 | |
| child_signals = [ | |
| 'kids', 'children', 'child', 'toddler', 'baby', 'infant', | |
| 'young kids', 'young children', | |
| 'aged 1', 'aged 2', 'aged 3', 'aged 4', 'aged 5', | |
| '1 year', '2 year', '3 year', '4 year', '5 year', | |
| 'preschool', 'newborn', 'family with' | |
| ] | |
| return any(signal in text for signal in child_signals) | |
| def _detect_dimension_priorities(self, text: str) -> Dict[str, str]: | |
| """檢測各維度的優先級""" | |
| priorities = {} | |
| for dimension, keywords in self.DIMENSION_KEYWORDS.items(): | |
| if any(kw in text for kw in keywords): | |
| # 檢查是否有絕對需求信號 | |
| if any(signal in text for signal in self.ABSOLUTE_SIGNALS): | |
| # 檢查信號是否與該維度相關(在附近) | |
| for signal in self.ABSOLUTE_SIGNALS: | |
| if signal in text: | |
| signal_pos = text.find(signal) | |
| for kw in keywords: | |
| if kw in text: | |
| kw_pos = text.find(kw) | |
| # 如果信號詞和維度關鍵詞距離在50字符內 | |
| if abs(signal_pos - kw_pos) < 80: | |
| priorities[dimension] = 'ABSOLUTE' | |
| break | |
| if dimension in priorities: | |
| break | |
| # 檢查是否有主要需求信號 | |
| if dimension not in priorities: | |
| if any(signal in text for signal in self.PRIMARY_SIGNALS): | |
| priorities[dimension] = 'PRIMARY' | |
| else: | |
| priorities[dimension] = 'PREFERENCE' | |
| return priorities | |
| class BreedRiskAnalyzer: | |
| """ | |
| 品種風險分析器 | |
| 只分析「真正的危害風險」,不對一般偏好差異進行干預 | |
| """ | |
| # 焦慮相關觸發詞(會導致持續吠叫的真正問題) | |
| ANXIETY_TRIGGERS = ['anxiety', 'separation anxiety', 'loneliness'] | |
| # 高警戒觸發詞(會導致頻繁吠叫) | |
| HIGH_ALERT_TRIGGERS = ['stranger alerts', 'strangers approaching', | |
| 'suspicious activity', 'territorial defense', | |
| 'protecting territory'] | |
| # 牧羊/追逐本能(對幼童有 nipping 風險) | |
| HERDING_INDICATORS = ['herding instincts', 'herding', 'nipping'] | |
| # 獵物驅動(可能追逐小孩) | |
| PREY_DRIVE_INDICATORS = ['prey drive', 'prey sighting', 'chase'] | |
| def analyze_noise_risk(self, breed_info: Dict, noise_info: Dict) -> Dict: | |
| """ | |
| 分析品種的噪音風險 | |
| 只標記「真正會造成問題」的品種: | |
| - 有焦慮吠叫傾向(持續性問題) | |
| - 高度警戒吠叫(頻繁問題) | |
| 不標記: | |
| - 偶爾興奮吠叫(正常狗行為) | |
| - 打招呼吠叫(短暫且可控) | |
| """ | |
| noise_notes = noise_info.get('noise_notes', '').lower() | |
| noise_level = noise_info.get('noise_level', 'Moderate').lower() | |
| temperament = breed_info.get('Temperament', '').lower() | |
| risk_factors = [] | |
| # 1. 焦慮觸發 - 這是真正的問題(持續性吠叫) | |
| has_anxiety = any(t in noise_notes for t in self.ANXIETY_TRIGGERS) | |
| if has_anxiety: | |
| risk_factors.append('anxiety_barking') | |
| # 2. 高度警戒 - 頻繁吠叫風險 | |
| has_high_alert = any(t in noise_notes for t in self.HIGH_ALERT_TRIGGERS) | |
| if has_high_alert: | |
| risk_factors.append('high_alert_barking') | |
| # 3. 敏感性格 + 焦慮觸發的組合(更嚴重) | |
| is_sensitive = 'sensitive' in temperament | |
| if is_sensitive and has_anxiety: | |
| risk_factors.append('sensitive_anxiety_combo') | |
| # 4. 基礎噪音等級高 | |
| if noise_level in ['high', 'moderate-high', 'moderate to high']: | |
| risk_factors.append('high_base_noise') | |
| # 計算風險等級 | |
| # 只有真正問題的組合才是 HIGH | |
| if 'sensitive_anxiety_combo' in risk_factors: | |
| risk_level = 'HIGH' | |
| elif 'anxiety_barking' in risk_factors and 'high_alert_barking' in risk_factors: | |
| risk_level = 'HIGH' | |
| elif 'anxiety_barking' in risk_factors or len(risk_factors) >= 2: | |
| risk_level = 'MODERATE' | |
| elif len(risk_factors) >= 1: | |
| risk_level = 'LOW' | |
| else: | |
| risk_level = 'NONE' | |
| return { | |
| 'risk_level': risk_level, | |
| 'risk_factors': risk_factors | |
| } | |
| def analyze_child_safety_risk(self, breed_info: Dict, noise_info: Dict) -> Dict: | |
| """ | |
| 分析品種對幼童的安全風險 | |
| 只標記「真正的安全風險」: | |
| - 牧羊本能(nipping 風險) | |
| - 高獵物驅動 + 大體型(追逐風險) | |
| - Good with Children = No 且有其他風險因素 | |
| 不標記: | |
| - 只是體型大但性格溫和 | |
| - 活力高但無追逐/牧羊本能 | |
| """ | |
| temperament = breed_info.get('Temperament', '').lower() | |
| description = breed_info.get('Description', '').lower() | |
| noise_notes = noise_info.get('noise_notes', '').lower() | |
| size = breed_info.get('Size', '').lower() | |
| good_with_children = breed_info.get('Good with Children', 'Yes') | |
| exercise = breed_info.get('Exercise Needs', '').lower() | |
| risk_factors = [] | |
| # 1. 牧羊本能 - 真正的 nipping 風險 | |
| has_herding = any(ind in noise_notes or ind in description | |
| for ind in self.HERDING_INDICATORS) | |
| if has_herding: | |
| risk_factors.append('herding_instinct') | |
| # 2. 獵物驅動 - 追逐風險 | |
| has_prey_drive = any(ind in noise_notes or ind in description | |
| for ind in self.PREY_DRIVE_INDICATORS) | |
| if has_prey_drive: | |
| risk_factors.append('prey_drive') | |
| # 3. Good with Children = No 是強烈信號 | |
| if good_with_children == 'No': | |
| risk_factors.append('not_child_friendly') | |
| # 4. 大體型 + 高驅動 + 牧羊/獵物本能的組合才是風險 | |
| is_large = size in ['large', 'giant'] | |
| is_very_high_energy = 'very high' in exercise | |
| if is_large and (has_herding or has_prey_drive) and is_very_high_energy: | |
| risk_factors.append('large_high_drive_instinct') | |
| # 計算風險等級 | |
| # 只有真正危險的組合才是 HIGH | |
| if 'not_child_friendly' in risk_factors and len(risk_factors) >= 2: | |
| risk_level = 'HIGH' | |
| elif 'large_high_drive_instinct' in risk_factors: | |
| risk_level = 'HIGH' | |
| elif 'herding_instinct' in risk_factors and is_very_high_energy: | |
| # 牧羊本能 + 高能量 = 對兒童的真正風險(nipping + 控制不住) | |
| risk_level = 'HIGH' | |
| elif 'herding_instinct' in risk_factors or 'prey_drive' in risk_factors: | |
| # 單獨的牧羊或獵物本能仍是中等風險 | |
| risk_level = 'MODERATE' | |
| elif 'not_child_friendly' in risk_factors: | |
| risk_level = 'MODERATE' | |
| elif len(risk_factors) >= 1: | |
| risk_level = 'LOW' | |
| else: | |
| risk_level = 'NONE' | |
| return { | |
| 'risk_level': risk_level, | |
| 'risk_factors': risk_factors | |
| } | |
| class SmartBreedFilter: | |
| """ | |
| 智慧品種過濾器 | |
| 整合優先級解析和風險分析,只對真正危害用戶的情況進行干預 | |
| """ | |
| def __init__(self): | |
| self.priority_parser = PriorityParser() | |
| self.risk_analyzer = BreedRiskAnalyzer() | |
| def analyze_user_context(self, user_input: str) -> UserPriorityContext: | |
| """分析用戶輸入,提取優先級上下文""" | |
| return self.priority_parser.parse(user_input) | |
| def should_exclude_breed(self, breed_info: Dict, noise_info: Dict, | |
| user_context: UserPriorityContext) -> Tuple[bool, str]: | |
| """ | |
| 判斷是否應該排除該品種 | |
| 返回: (是否排除, 排除原因) | |
| """ | |
| # 1. 噪音零容忍 + 高噪音風險 | |
| if user_context.noise_intolerance: | |
| noise_risk = self.risk_analyzer.analyze_noise_risk(breed_info, noise_info) | |
| if noise_risk['risk_level'] == 'HIGH': | |
| return True, f"High noise risk ({', '.join(noise_risk['risk_factors'])}) conflicts with noise intolerance" | |
| # 2. 有幼童 + 高兒童安全風險 | |
| if user_context.has_young_children: | |
| child_risk = self.risk_analyzer.analyze_child_safety_risk(breed_info, noise_info) | |
| if child_risk['risk_level'] == 'HIGH': | |
| return True, f"Child safety risk ({', '.join(child_risk['risk_factors'])}) with young children" | |
| return False, "" | |
| def calculate_risk_penalty(self, breed_info: Dict, noise_info: Dict, | |
| user_context: UserPriorityContext) -> float: | |
| """ | |
| 計算風險懲罰分數 | |
| 只對中等風險進行輕微降權,不排除 | |
| 返回: 懲罰係數 (0.0 - 0.3) | |
| """ | |
| penalty = 0.0 | |
| # 噪音相關懲罰(只在用戶關注噪音時) | |
| if 'noise' in user_context.priority_dimensions: | |
| noise_risk = self.risk_analyzer.analyze_noise_risk(breed_info, noise_info) | |
| if noise_risk['risk_level'] == 'MODERATE': | |
| penalty += 0.1 | |
| elif noise_risk['risk_level'] == 'HIGH' and not user_context.noise_intolerance: | |
| penalty += 0.15 | |
| # 兒童安全相關懲罰(只在用戶有孩子時) | |
| if 'children' in user_context.priority_dimensions or user_context.has_young_children: | |
| child_risk = self.risk_analyzer.analyze_child_safety_risk(breed_info, noise_info) | |
| if child_risk['risk_level'] == 'MODERATE': | |
| penalty += 0.1 | |
| elif child_risk['risk_level'] == 'HIGH' and not user_context.has_young_children: | |
| penalty += 0.15 | |
| return min(penalty, 0.3) # 最大懲罰 30% | |
| def filter_and_adjust_recommendations(self, recommendations: List[Dict], | |
| user_input: str) -> List[Dict]: | |
| """ | |
| 過濾並調整推薦結果 | |
| 這是主要入口函數,整合所有過濾和調整邏輯 | |
| """ | |
| user_context = self.analyze_user_context(user_input) | |
| filtered_recommendations = [] | |
| for rec in recommendations: | |
| breed = rec.get('breed', '') | |
| # 智能獲取品種資訊:優先從 info 欄位,否則從 rec 本身,最後從資料庫 | |
| breed_info = rec.get('info') | |
| if not breed_info: | |
| # 嘗試從 rec 中構建標準化的 breed_info(處理大小寫差異) | |
| breed_info = { | |
| 'Temperament': rec.get('Temperament', rec.get('temperament', '')), | |
| 'Description': rec.get('Description', rec.get('description', '')), | |
| 'Size': rec.get('Size', rec.get('size', '')), | |
| 'Exercise Needs': rec.get('Exercise Needs', rec.get('exercise_needs', '')), | |
| 'Good with Children': rec.get('Good with Children', rec.get('good_with_children', 'Yes')), | |
| 'Care Level': rec.get('Care Level', rec.get('care_level', '')), | |
| } | |
| # 如果關鍵資訊缺失,從資料庫獲取 | |
| if not breed_info['Temperament'] and not breed_info['Description']: | |
| from dog_database import get_dog_description | |
| db_info = get_dog_description(breed.replace(' ', '_')) | |
| if db_info: | |
| breed_info = db_info | |
| # 獲取噪音資訊(嘗試兩種品種名稱格式) | |
| noise_info = breed_noise_info.get(breed) or breed_noise_info.get(breed.replace(' ', '_'), { | |
| 'noise_notes': '', | |
| 'noise_level': 'Moderate' | |
| }) | |
| # 檢查是否應該排除 | |
| should_exclude, reason = self.should_exclude_breed( | |
| breed_info, noise_info, user_context | |
| ) | |
| if should_exclude: | |
| print(f" [SmartFilter] Excluded {breed}: {reason}") | |
| continue | |
| # 計算風險懲罰 | |
| penalty = self.calculate_risk_penalty(breed_info, noise_info, user_context) | |
| if penalty > 0: | |
| original_score = rec.get('final_score', rec.get('overall_score', 0.8)) | |
| adjusted_score = original_score * (1 - penalty) | |
| rec['final_score'] = adjusted_score | |
| rec['risk_penalty'] = penalty | |
| filtered_recommendations.append(rec) | |
| # 重新排序 | |
| filtered_recommendations.sort(key=lambda x: -x.get('final_score', 0)) | |
| # 更新排名 | |
| for i, rec in enumerate(filtered_recommendations): | |
| rec['rank'] = i + 1 | |
| return filtered_recommendations | |
| # 模組級便捷函數 | |
| _smart_filter = None | |
| def get_smart_filter() -> SmartBreedFilter: | |
| """獲取單例過濾器""" | |
| global _smart_filter | |
| if _smart_filter is None: | |
| _smart_filter = SmartBreedFilter() | |
| return _smart_filter | |
| def apply_smart_filtering(recommendations: List[Dict], user_input: str) -> List[Dict]: | |
| """便捷函數:應用智慧過濾""" | |
| return get_smart_filter().filter_and_adjust_recommendations(recommendations, user_input) | |