# %%writefile semantic_breed_recommender.py import random import hashlib import numpy as np import sqlite3 import re import traceback from typing import List, Dict, Tuple, Optional, Any from dataclasses import dataclass from sentence_transformers import SentenceTransformer import torch from sklearn.metrics.pairwise import cosine_similarity from dog_database import get_dog_description from breed_health_info import breed_health_info from breed_noise_info import breed_noise_info from scoring_calculation_system import UserPreferences, calculate_compatibility_score, UnifiedScoringSystem, calculate_unified_breed_scores from query_understanding import QueryUnderstandingEngine, analyze_user_query from constraint_manager import ConstraintManager, apply_breed_constraints from multi_head_scorer import MultiHeadScorer, score_breed_candidates, BreedScore from score_calibrator import ScoreCalibrator, calibrate_breed_scores from config_manager import get_config_manager, get_standardized_breed_data from semantic_vector_manager import SemanticVectorManager, BreedDescriptionVector from user_query_analyzer import UserQueryAnalyzer from matching_score_calculator import MatchingScoreCalculator from smart_breed_filter import apply_smart_filtering class SemanticBreedRecommender: """ 增強的基於 SBERT 的語義品種推薦系統 """ def __init__(self): """初始化語義品種推薦器""" # 初始化語義vector的管理器 self.vector_manager = SemanticVectorManager() # 初始化用戶查詢分析器 self.query_analyzer = UserQueryAnalyzer(self.vector_manager.get_breed_list()) # 初始化評分計算器 self.score_calculator = MatchingScoreCalculator(self.vector_manager.get_breed_list()) self.model_name = self.vector_manager.model_name self.sbert_model = self.vector_manager.get_sbert_model() self.breed_vectors = self.vector_manager.get_breed_vectors() self.breed_list = self.vector_manager.get_breed_list() self.comparative_keywords = self.query_analyzer.comparative_keywords # 初始化增強系統組件(if 可用) try: self.query_engine = QueryUnderstandingEngine() print("QueryUnderstandingEngine initialized") self.constraint_manager = ConstraintManager() print("ConstraintManager initialized") self.multi_head_scorer = None self.score_calibrator = ScoreCalibrator() print("ScoreCalibrator initialized") self.config_manager = get_config_manager() # 如果 SBERT 模型可用,初始化多頭評分器 if self.sbert_model: self.multi_head_scorer = MultiHeadScorer(self.sbert_model) print("Multi-head scorer initialized with SBERT model") else: print("WARNING: SBERT model not available, multi_head_scorer will be None") except Exception as e: print(f"Error initializing enhanced system components: {str(e)}") print(traceback.format_exc()) self.query_engine = None self.constraint_manager = None self.multi_head_scorer = None self.score_calibrator = None self.config_manager = None def _parse_comparative_preferences(self, user_input: str) -> Dict[str, float]: """解析比較性偏好表達""" return self.query_analyzer.parse_comparative_preferences(user_input) def _extract_lifestyle_keywords(self, user_input: str) -> Dict[str, List[str]]: """增強的生活方式關鍵字提取,具有更好的模式匹配""" return self.query_analyzer.extract_lifestyle_keywords(user_input) def _apply_size_distribution_correction(self, recommendations: List[Dict]) -> List[Dict]: """應用尺寸分佈修正以防止大型品種偏差""" return self.score_calculator.apply_size_distribution_correction(recommendations) def _normalize_breed_size(self, size: str) -> str: """標準化品種尺寸到標準分類""" return self.score_calculator._normalize_breed_size(size) def _parse_user_requirements(self, user_input: str) -> Dict[str, Any]: """更準確地解析用戶需求""" return self.query_analyzer.parse_user_requirements(user_input) def _apply_hard_constraints(self, breed: str, user_input: str, breed_characteristics: Dict[str, Any]) -> float: """增強硬約束,具有更嚴格的懲罰""" return self.score_calculator.apply_hard_constraints(breed, user_input, breed_characteristics) def _calculate_lifestyle_bonus(self, breed_characteristics: Dict[str, Any], lifestyle_keywords: Dict[str, List[str]]) -> float: """增強生活方式匹配獎勵計算""" return self.score_calculator.calculate_lifestyle_bonus(breed_characteristics, lifestyle_keywords) def _apply_intelligent_trait_matching(self, recommendations: List[Dict], user_input: str) -> List[Dict]: """基於增強關鍵字提取和數據庫挖掘應用智能特徵匹配""" return self.score_calculator.apply_intelligent_trait_matching(recommendations, user_input) def _get_breed_info_from_standardized(self, standardized_info) -> Dict[str, Any]: """將標準化品種信息轉換為字典格式""" return self.score_calculator.get_breed_info_from_standardized(standardized_info) def _get_fallback_recommendations(self, top_k: int = 15) -> List[Dict[str, Any]]: """當增強系統失敗時獲取備用推薦""" return self.score_calculator.get_fallback_recommendations(top_k) def _get_fallback_scoring_with_constraints(self, user_input: str, passed_breeds: set, dimensions: 'QueryDimensions', top_k: int = 15) -> List[Dict[str, Any]]: """ 當 multi_head_scorer 不可用時的回退評分方法 仍然用 constraint_manager 的過濾結果,並產生自然分佈的分數 """ print(f"Fallback scoring for {len(passed_breeds)} filtered breeds") recommendations = [] user_text = user_input.lower() # 提取用戶需求關鍵詞 lifestyle_keywords = self._extract_lifestyle_keywords(user_input) for breed in passed_breeds: breed_info = get_dog_description(breed.replace(' ', '_')) or {} if not breed_info: continue # 計算多維度匹配分數 dimension_scores = self._calculate_comprehensive_dimension_scores( breed, breed_info, user_text, dimensions, lifestyle_keywords ) # 基於維度分數計算加權總分 weights = self._get_dimension_weights_from_query(user_text, dimensions) weighted_sum = sum(dimension_scores.get(dim, 0.7) * weights.get(dim, 1.0) for dim in dimension_scores) total_weight = sum(weights.get(dim, 1.0) for dim in dimension_scores) final_score = weighted_sum / total_weight if total_weight > 0 else 0.7 # 確保分數在合理範圍內(允許高分,非常契合的品種可超過 90%) final_score = max(0.45, min(0.98, final_score)) dimension_scores['overall'] = final_score recommendation = { 'breed': breed.replace('_', ' '), 'rank': 0, 'overall_score': final_score, 'final_score': final_score, 'scores': dimension_scores, 'size': breed_info.get('Size', 'Unknown'), 'temperament': breed_info.get('Temperament', ''), 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), 'good_with_children': breed_info.get('Good with Children', 'Yes'), 'lifespan': breed_info.get('Lifespan', '10-12 years'), 'description': breed_info.get('Description', ''), 'search_type': 'fallback_with_constraints', } recommendations.append(recommendation) # 按分數排序 recommendations.sort(key=lambda x: -x['final_score']) # 更新排名 for i, rec in enumerate(recommendations[:top_k]): rec['rank'] = i + 1 print(f"Generated {len(recommendations[:top_k])} fallback recommendations") return recommendations[:top_k] def _calculate_comprehensive_dimension_scores(self, breed: str, breed_info: Dict, user_text: str, dimensions, lifestyle_keywords: Dict) -> Dict[str, float]: """ 計算全面的維度分數,產生自然分佈的評分 """ scores = {} temperament = breed_info.get('Temperament', '').lower() size = breed_info.get('Size', 'Medium').lower() exercise_needs = breed_info.get('Exercise Needs', 'Moderate').lower() grooming_needs = breed_info.get('Grooming Needs', 'Moderate').lower() good_with_children = breed_info.get('Good with Children', 'Yes') care_level = breed_info.get('Care Level', 'Moderate').lower() description = breed_info.get('Description', '').lower() # 1. 空間相容性 space_score = 0.7 if 'apartment' in user_text or 'small space' in user_text: if 'small' in size or 'toy' in size: space_score = 0.96 elif 'medium' in size: space_score = 0.78 elif 'large' in size: space_score = 0.52 else: space_score = 0.45 elif 'house' in user_text or 'yard' in user_text: if 'large' in size: space_score = 0.92 elif 'medium' in size: space_score = 0.88 else: space_score = 0.82 scores['space'] = space_score # 2. 運動相容性 exercise_score = 0.7 user_wants_high = any(w in user_text for w in ['energetic', 'active', 'running', 'hiking', 'athletic']) user_wants_low = any(w in user_text for w in ['low maintenance', 'relaxed', 'calm', 'couch']) if user_wants_high: if 'very high' in exercise_needs: exercise_score = 0.98 elif 'high' in exercise_needs: exercise_score = 0.92 elif 'moderate' in exercise_needs: exercise_score = 0.68 else: exercise_score = 0.48 elif user_wants_low: if 'low' in exercise_needs: exercise_score = 0.96 elif 'moderate' in exercise_needs: exercise_score = 0.78 elif 'high' in exercise_needs: exercise_score = 0.52 else: exercise_score = 0.42 else: # 中等運動需求 if 'moderate' in exercise_needs: exercise_score = 0.88 elif 'low' in exercise_needs or 'high' in exercise_needs: exercise_score = 0.72 else: exercise_score = 0.65 scores['exercise'] = exercise_score # 3. 美容需求相容性 grooming_score = 0.7 user_wants_low_maintenance = any(w in user_text for w in ['low maintenance', 'easy care', 'minimal grooming']) if user_wants_low_maintenance: if 'low' in grooming_needs or 'minimal' in grooming_needs: grooming_score = 0.96 elif 'moderate' in grooming_needs: grooming_score = 0.75 else: grooming_score = 0.50 else: if 'low' in grooming_needs: grooming_score = 0.85 elif 'moderate' in grooming_needs: grooming_score = 0.78 else: grooming_score = 0.70 scores['grooming'] = grooming_score # 4. 噪音相容性 noise_score = 0.7 user_wants_quiet = any(w in user_text for w in ['quiet', 'silent', 'noise', 'bark', 'neighbors']) if user_wants_quiet: # 從 breed_noise_info 獲取噪音資訊 noise_info = breed_noise_info.get(breed.replace(' ', '_'), {}) noise_level = noise_info.get('noise_level', 'Moderate').lower() if 'low' in noise_level or 'quiet' in noise_level: noise_score = 0.97 elif 'moderate' in noise_level: noise_score = 0.72 elif 'high' in noise_level: noise_score = 0.45 else: # 根據性格推斷 if any(w in temperament for w in ['calm', 'quiet', 'gentle', 'reserved']): noise_score = 0.88 elif any(w in temperament for w in ['alert', 'vocal', 'energetic']): noise_score = 0.55 else: noise_score = 0.70 scores['noise'] = noise_score # 5. 家庭相容性 family_score = 0.7 has_family_context = any(w in user_text for w in ['kids', 'children', 'family', 'child']) if has_family_context: if good_with_children == 'Yes': family_score = 0.94 # 額外加分:溫和性格 if any(w in temperament for w in ['gentle', 'friendly', 'patient', 'loving']): family_score = min(0.98, family_score + 0.04) elif good_with_children == 'No': family_score = 0.32 else: family_score = 0.62 else: family_score = 0.76 if good_with_children == 'Yes' else 0.70 scores['family'] = family_score # 6. 經驗相容性 experience_score = 0.7 is_beginner = any(w in user_text for w in ['first dog', 'first time', 'beginner', 'new owner', 'never had']) if is_beginner: # 評估品種對新手的友好程度 if 'low' in care_level or 'easy' in care_level: experience_score = 0.94 elif 'moderate' in care_level: experience_score = 0.78 else: experience_score = 0.52 # 性格調整 if any(w in temperament for w in ['eager to please', 'trainable', 'intelligent', 'friendly']): experience_score = min(0.98, experience_score + 0.08) if any(w in temperament for w in ['stubborn', 'independent', 'strong-willed']): experience_score = max(0.38, experience_score - 0.18) else: experience_score = 0.80 scores['experience'] = experience_score # 7. 健康分數(基於壽命和品種特性) health_score = 0.75 lifespan = breed_info.get('Lifespan', '10-12 years') try: # 解析壽命 years = [int(y) for y in lifespan.replace(' years', '').split('-') if y.strip().isdigit()] if years: avg_lifespan = sum(years) / len(years) if avg_lifespan >= 14: health_score = 0.94 elif avg_lifespan >= 12: health_score = 0.85 elif avg_lifespan >= 10: health_score = 0.75 else: health_score = 0.62 except: pass scores['health'] = health_score return scores def _get_dimension_weights_from_query(self, user_text: str, dimensions) -> Dict[str, float]: """ 根據用戶查詢動態計算維度權重 """ weights = { 'space': 1.0, 'exercise': 1.0, 'grooming': 1.0, 'noise': 1.0, 'family': 1.0, 'experience': 1.0, 'health': 0.8 } # 根據 dimensions 的 priority 調整權重 if hasattr(dimensions, 'dimension_priorities'): priority_map = getattr(dimensions, 'dimension_priorities', {}) for dim, priority in priority_map.items(): if dim in weights: weights[dim] = priority # 映射不同名稱 if dim == 'size': weights['space'] = max(weights['space'], priority) if dim == 'family': weights['family'] = max(weights['family'], priority) # 根據關鍵詞強化權重 if any(w in user_text for w in ['quiet', 'noise', 'bark', 'neighbors', 'thin walls']): weights['noise'] = max(weights['noise'], 2.2) if any(w in user_text for w in ['kids', 'children', 'family', 'child']): weights['family'] = max(weights['family'], 2.0) if any(w in user_text for w in ['first', 'beginner', 'new owner']): weights['experience'] = max(weights['experience'], 2.0) if any(w in user_text for w in ['apartment', 'small space', 'studio']): weights['space'] = max(weights['space'], 1.8) if any(w in user_text for w in ['energetic', 'active', 'running', 'hiking']): weights['exercise'] = max(weights['exercise'], 2.0) if any(w in user_text for w in ['low maintenance', 'easy care']): weights['grooming'] = max(weights['grooming'], 1.8) return weights def _calculate_real_dimension_scores(self, breed: str, breed_info: Dict, user_input: str, overall_score: float) -> Dict[str, float]: """ 計算真實的維度分數(基於品種特性和用戶需求) 這個方法取代了假分數生成器,提供真實的評分 Args: breed: 品種名稱 breed_info: 品種資訊字典 user_input: 用戶輸入文字 overall_score: 總體分數 Returns: Dict[str, float]: 維度分數字典 """ if not breed_info: breed_info = {} user_text = user_input.lower() temperament = breed_info.get('Temperament', '').lower() size = breed_info.get('Size', 'Medium').lower() exercise_needs = breed_info.get('Exercise Needs', 'Moderate').lower() grooming_needs = breed_info.get('Grooming Needs', 'Moderate').lower() good_with_children = breed_info.get('Good with Children', 'Yes') care_level = breed_info.get('Care Level', 'Moderate').lower() scores = {} # 1. Space Compatibility (空間相容性) space_score = 0.7 if 'apartment' in user_text or 'small' in user_text: if 'small' in size: space_score = 0.9 elif 'medium' in size: space_score = 0.7 elif 'large' in size: space_score = 0.5 elif 'giant' in size: space_score = 0.3 elif 'house' in user_text or 'yard' in user_text: if 'large' in size or 'giant' in size: space_score = 0.85 else: space_score = 0.8 scores['space'] = space_score # 2. Exercise Compatibility (運動相容性) exercise_score = 0.7 if 'low' in exercise_needs or 'minimal' in exercise_needs: if any(term in user_text for term in ['work full time', 'busy', 'low exercise', 'not much exercise']): exercise_score = 0.9 else: exercise_score = 0.75 elif 'high' in exercise_needs or 'very high' in exercise_needs: if any(term in user_text for term in ['active', 'running', 'hiking', 'exercise']): exercise_score = 0.9 elif any(term in user_text for term in ['work full time', 'busy']): exercise_score = 0.5 else: exercise_score = 0.65 else: # moderate exercise_score = 0.75 scores['exercise'] = exercise_score # 3. Grooming/Maintenance Compatibility (美容/維護相容性) grooming_score = 0.7 if 'low' in grooming_needs: if any(term in user_text for term in ['low maintenance', 'low-maintenance', 'easy care', 'minimal grooming']): grooming_score = 0.9 else: grooming_score = 0.8 elif 'high' in grooming_needs: if any(term in user_text for term in ['low maintenance', 'low-maintenance', 'easy care']): grooming_score = 0.4 else: grooming_score = 0.6 # 敏感品種需要額外照顧 if 'sensitive' in temperament: grooming_score -= 0.1 # 特殊品種需要額外護理 breed_lower = breed.lower() if any(term in breed_lower for term in ['italian', 'greyhound', 'whippet', 'hairless']): if any(term in user_text for term in ['low maintenance', 'low-maintenance', 'easy']): grooming_score -= 0.15 scores['grooming'] = max(0.2, grooming_score) # 4. Experience Compatibility (經驗相容性) - 關鍵維度! experience_score = 0.7 is_beginner = any(term in user_text for term in ['first dog', 'first time', 'beginner', 'new to dogs', 'never owned', 'never had']) if is_beginner: # 新手評估 if 'low' in care_level: experience_score = 0.85 elif 'moderate' in care_level: experience_score = 0.65 elif 'high' in care_level: experience_score = 0.45 # 性格懲罰 - 對新手很重要 difficult_traits = ['sensitive', 'stubborn', 'independent', 'dominant', 'aggressive', 'nervous', 'shy', 'timid', 'alert'] for trait in difficult_traits: if trait in temperament: if trait == 'sensitive': experience_score -= 0.15 # 敏感性格對新手很具挑戰 elif trait == 'aggressive': experience_score -= 0.25 elif trait in ['stubborn', 'independent', 'dominant']: experience_score -= 0.12 else: experience_score -= 0.08 # 友善性格獎勵 easy_traits = ['friendly', 'gentle', 'eager to please', 'patient', 'calm', 'outgoing'] for trait in easy_traits: if trait in temperament: experience_score += 0.08 # 易於訓練的加分 if any(term in user_text for term in ['easy to train', 'trainable']): if any(term in temperament for term in ['eager to please', 'intelligent', 'trainable']): experience_score += 0.1 elif any(term in temperament for term in ['stubborn', 'independent']): experience_score -= 0.1 else: # 有經驗的飼主 experience_score = 0.8 scores['experience'] = max(0.2, min(0.95, experience_score)) # 5. Noise Compatibility (噪音相容性) noise_score = 0.75 if any(term in user_text for term in ['quiet', 'apartment', 'neighbors']): if any(term in temperament for term in ['quiet', 'calm', 'gentle']): noise_score = 0.9 elif any(term in temperament for term in ['alert', 'vocal', 'barking']): noise_score = 0.5 scores['noise'] = noise_score # 6. Family Compatibility (家庭相容性) family_score = 0.7 if any(term in user_text for term in ['children', 'kids', 'family']): if good_with_children == 'Yes' or good_with_children == True: family_score = 0.9 if any(term in temperament for term in ['gentle', 'patient', 'friendly']): family_score = 0.95 else: family_score = 0.35 scores['family'] = family_score # 7. Overall scores['overall'] = overall_score return scores def get_enhanced_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: """ 增強的多維度語義品種推薦 Args: user_input: 用戶的自然語言描述 top_k: 返回的推薦數量 Returns: 增強評分的推薦品種列表 """ try: # 階段 1: 查詢理解 if self.query_engine: dimensions = self.query_engine.analyze_query(user_input) print(f"Query dimensions detected: {len(dimensions.spatial_constraints + dimensions.activity_level + dimensions.noise_preferences + dimensions.size_preferences + dimensions.family_context + dimensions.maintenance_level + dimensions.special_requirements)} total dimensions") else: print("Query engine not available, using basic analysis") return self.get_semantic_recommendations(user_input, top_k) # 階段 2: 應用約束 if self.constraint_manager: filter_result = self.constraint_manager.apply_constraints(dimensions, min_candidates=max(8, top_k)) print(f"Constraint filtering: {len(self.breed_list)} -> {len(filter_result.passed_breeds)} candidates") if not filter_result.passed_breeds: error_msg = f"No dog breeds match your requirements after applying constraints. Applied constraints: {filter_result.applied_constraints}. Consider relaxing some requirements." print(f"ERROR: {error_msg}") raise ValueError(error_msg) else: print("Constraint manager not available, using all breeds") filter_result = type('FilterResult', (), { 'passed_breeds': self.breed_list, 'applied_constraints': [], 'relaxed_constraints': [], 'warnings': [] })() # 階段 3: 多頭評分 if self.multi_head_scorer: breed_scores = self.multi_head_scorer.score_breeds(filter_result.passed_breeds, dimensions) print(f"Multi-head scoring completed for {len(breed_scores)} breeds") # Debug: 顯示前5名的分數和維度breakdown for bs in breed_scores[:5]: print(f" {bs.breed_name}: final={bs.final_score:.3f}, breakdown={bs.dimensional_breakdown}") else: # 使用回退評分,但仍然尊重 constraint 過濾結果 print("Multi-head scorer not available, using fallback scoring with constraint filtering") fallback_results = self._get_fallback_scoring_with_constraints( user_input, filter_result.passed_breeds, dimensions, top_k ) return fallback_results # 階段 4: 分數校準 if self.score_calibrator: breed_score_tuples = [(score.breed_name, score.final_score) for score in breed_scores] calibration_result = self.score_calibrator.calibrate_scores(breed_score_tuples) print(f"Score calibration: method={calibration_result.calibration_method}") else: print("Score calibrator not available, using raw scores") calibration_result = type('CalibrationResult', (), { 'score_mapping': {score.breed_name: score.final_score for score in breed_scores}, 'calibration_method': 'none' })() # 階段 5: 生成最終推薦 final_recommendations = [] for i, breed_score in enumerate(breed_scores[:top_k]): breed_name = breed_score.breed_name # 獲取校準後的分數 calibrated_score = calibration_result.score_mapping.get(breed_name, breed_score.final_score) # 獲取標準化品種信息 if self.config_manager: standardized_info = get_standardized_breed_data(breed_name.replace(' ', '_')) if standardized_info: breed_info = self._get_breed_info_from_standardized(standardized_info) else: breed_info = get_dog_description(breed_name.replace(' ', '_')) or {} else: breed_info = get_dog_description(breed_name.replace(' ', '_')) or {} # 將 dimensional_breakdown 轉換為 UI 需要的 scores 格式 breakdown = breed_score.dimensional_breakdown or {} ui_scores = { 'space': breakdown.get('spatial_compatibility', 0.7), 'exercise': breakdown.get('activity_compatibility', 0.7), 'grooming': breakdown.get('maintenance_compatibility', 0.7), 'experience': breakdown.get('experience_compatibility', 0.7), 'noise': breakdown.get('noise_compatibility', 0.7), 'family': breakdown.get('family_compatibility', 0.7), 'health': breakdown.get('health_compatibility', 0.7), 'overall': calibrated_score } recommendation = { 'breed': breed_name, 'rank': i + 1, 'overall_score': calibrated_score, 'final_score': calibrated_score, 'semantic_score': breed_score.semantic_component, 'attribute_score': breed_score.attribute_component, 'bidirectional_bonus': breed_score.bidirectional_bonus, 'confidence_score': breed_score.confidence_score, 'dimensional_breakdown': breed_score.dimensional_breakdown, 'scores': ui_scores, # UI 需要的格式 'explanation': breed_score.explanation, 'size': breed_info.get('Size', 'Unknown'), 'temperament': breed_info.get('Temperament', ''), 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), 'good_with_children': breed_info.get('Good with Children', 'Yes'), 'lifespan': breed_info.get('Lifespan', '10-12 years'), 'description': breed_info.get('Description', ''), 'search_type': 'enhanced_description', 'calibration_method': calibration_result.calibration_method, 'applied_constraints': filter_result.applied_constraints, 'relaxed_constraints': filter_result.relaxed_constraints, 'warnings': filter_result.warnings } final_recommendations.append(recommendation) # 應用尺寸分佈修正 corrected_recommendations = self._apply_size_distribution_correction(final_recommendations) # 階段 6: 應用智能特徵匹配增強 intelligence_enhanced_recommendations = self._apply_intelligent_trait_matching(corrected_recommendations, user_input) print(f"Generated {len(intelligence_enhanced_recommendations)} enhanced semantic recommendations with intelligent trait matching") return intelligence_enhanced_recommendations except Exception as e: print(f"Error in enhanced semantic recommendations: {str(e)}") print(traceback.format_exc()) # 回退到原始方法 return self.get_semantic_recommendations(user_input, top_k) def get_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: """ 基於自然語言描述獲取品種推薦 Args: user_input: 用戶的自然語言描述 top_k: 返回的推薦數量 Returns: 推薦品種列表 """ try: print(f"Processing user input: {user_input}") # 檢查模型是否可用 - 如果不可用,則報錯 if self.sbert_model is None: error_msg = "SBERT model not available. This could be due to:\n• Model download failed\n• Insufficient memory\n• Network connectivity issues\n\nPlease check your environment and try again." print(f"ERROR: {error_msg}") raise RuntimeError(error_msg) # 生成用戶輸入嵌入 user_embedding = self.vector_manager.encode_text(user_input) # 解析比較性偏好 comparative_prefs = self._parse_comparative_preferences(user_input) # 提取生活方式關鍵字 lifestyle_keywords = self._extract_lifestyle_keywords(user_input) # 計算與所有品種的相似度並應用約束 similarities = [] for breed, breed_vector in self.breed_vectors.items(): # 首先應用硬約束 constraint_penalty = self._apply_hard_constraints(breed, user_input, breed_vector.characteristics) # 跳過違反關鍵約束的品種 if constraint_penalty <= -1.0: # 完全取消資格 continue # 基本語義相似度 semantic_score = cosine_similarity( [user_embedding], [breed_vector.embedding] )[0][0] # 比較性偏好加權 comparative_bonus = comparative_prefs.get(breed, 0.0) # 生活方式匹配獎勵 lifestyle_bonus = self._calculate_lifestyle_bonus( breed_vector.characteristics, lifestyle_keywords ) # 應用約束懲罰 lifestyle_bonus += constraint_penalty # 更好分佈的增強組合分數 # 應用指數縮放以創建更自然的分數分佈 base_semantic = semantic_score ** 0.8 # 輕微壓縮高分 enhanced_lifestyle = lifestyle_bonus * 2.0 # 放大生活方式匹配 enhanced_comparative = comparative_bonus * 1.5 # 放大品種偏好 final_score = ( base_semantic * 0.55 + enhanced_comparative * 0.30 + enhanced_lifestyle * 0.15 ) # 添加小的隨機變化以自然地打破平局 random.seed(hash(breed)) # 對相同品種保持一致 final_score += random.uniform(-0.03, 0.03) # 確保最終分數不超過 1.0 final_score = min(1.0, final_score) similarities.append({ 'breed': breed, 'score': final_score, 'semantic_score': semantic_score, 'comparative_bonus': comparative_bonus, 'lifestyle_bonus': lifestyle_bonus }) # 計算平衡分佈的標準化顯示分數 breed_display_scores = [] # 首先,收集所有語義分數以進行標準化 all_semantic_scores = [breed_data['semantic_score'] for breed_data in similarities] semantic_mean = np.mean(all_semantic_scores) semantic_std = np.std(all_semantic_scores) if len(all_semantic_scores) > 1 else 1.0 for breed_data in similarities: breed = breed_data['breed'] base_semantic = breed_data['semantic_score'] # 標準化語義分數以防止極端異常值 if semantic_std > 0: normalized_semantic = (base_semantic - semantic_mean) / semantic_std normalized_semantic = max(-2.0, min(2.0, normalized_semantic)) # 限制在 2 個標準差 scaled_semantic = 0.5 + (normalized_semantic * 0.1) # 映射到 0.3-0.7 範圍 else: scaled_semantic = 0.5 # 獲取品種特徵 breed_info = get_dog_description(breed) if breed != 'Unknown' else {} breed_size = breed_info.get('Size', '').lower() if breed_info else '' exercise_needs = breed_info.get('Exercise Needs', '').lower() if breed_info else '' # 計算特徵匹配分數(比純語義相似度更重要) feature_score = 0.0 user_text = user_input.lower() # 尺寸和空間需求(高權重) if any(term in user_text for term in ['apartment', 'small', 'limited space']): if 'small' in breed_size: feature_score += 0.25 elif 'medium' in breed_size: feature_score += 0.05 elif 'large' in breed_size or 'giant' in breed_size: feature_score -= 0.30 # 運動需求(高權重) if any(term in user_text for term in ['low exercise', 'minimal exercise', "doesn't need", 'not much']): if 'low' in exercise_needs or 'minimal' in exercise_needs: feature_score += 0.20 elif 'high' in exercise_needs or 'very high' in exercise_needs: feature_score -= 0.25 elif any(term in user_text for term in ['active', 'high exercise', 'running', 'hiking']): if 'high' in exercise_needs: feature_score += 0.20 elif 'low' in exercise_needs: feature_score -= 0.15 # 家庭相容性 if any(term in user_text for term in ['children', 'kids', 'family']): good_with_children = breed_info.get('Good with Children', '') if breed_info else '' if good_with_children == 'Yes': feature_score += 0.10 elif good_with_children == 'No': feature_score -= 0.20 # 平衡權重組合分數 final_score = ( scaled_semantic * 0.35 + # 降低語義權重 feature_score * 0.45 + # 增加特徵匹配權重 breed_data['lifestyle_bonus'] * 0.15 + breed_data['comparative_bonus'] * 0.05 ) # 計算基本相容性分數 base_compatibility = final_score # 應用自然分佈的動態評分 if base_compatibility >= 0.9: # 例外匹配 score_range = (0.92, 0.98) position = (base_compatibility - 0.9) / 0.1 elif base_compatibility >= 0.75: # 優秀匹配 score_range = (0.85, 0.91) position = (base_compatibility - 0.75) / 0.15 elif base_compatibility >= 0.6: # 良好匹配 score_range = (0.75, 0.84) position = (base_compatibility - 0.6) / 0.15 elif base_compatibility >= 0.45: # 公平匹配 score_range = (0.65, 0.74) position = (base_compatibility - 0.45) / 0.15 elif base_compatibility >= 0.3: # 較差匹配 score_range = (0.55, 0.64) position = (base_compatibility - 0.3) / 0.15 else: # 非常差的匹配 score_range = (0.45, 0.54) position = max(0, base_compatibility / 0.3) # 計算帶自然變化的最終分數 score_span = score_range[1] - score_range[0] base_score = score_range[0] + (position * score_span) # 添加控制的隨機變化以進行自然排名 random.seed(hash(breed + user_input[:15])) variation = random.uniform(-0.015, 0.015) display_score = round(max(0.45, min(0.98, base_score + variation)), 3) breed_display_scores.append({ 'breed': breed, 'display_score': display_score, 'semantic_score': base_semantic, 'comparative_bonus': breed_data['comparative_bonus'], 'lifestyle_bonus': breed_data['lifestyle_bonus'] }) # 計算真實維度分數並整合到排序中 for breed_data in breed_display_scores: breed = breed_data['breed'] breed_info = get_dog_description(breed) real_scores = self._calculate_real_dimension_scores( breed, breed_info, user_input, breed_data['display_score'] ) breed_data['real_scores'] = real_scores # 計算加權的最終分數(考慮維度分數) # 原始顯示分數權重 50%,維度分數平均權重 50% dim_scores = [real_scores.get('space', 0.7), real_scores.get('exercise', 0.7), real_scores.get('grooming', 0.7), real_scores.get('experience', 0.7), real_scores.get('noise', 0.7)] avg_dim_score = sum(dim_scores) / len(dim_scores) # 對低維度分數施加懲罰 min_dim_score = min(dim_scores) penalty = 0 if min_dim_score < 0.5: penalty = (0.5 - min_dim_score) * 0.3 # 最低分數懲罰 # 最終排序分數 breed_data['adjusted_score'] = ( breed_data['display_score'] * 0.5 + avg_dim_score * 0.5 - penalty ) # 按調整後的分數排序 breed_display_scores.sort(key=lambda x: x['adjusted_score'], reverse=True) top_breeds = breed_display_scores[:top_k] # 轉換為標準推薦格式 recommendations = [] for i, breed_data in enumerate(top_breeds): breed = breed_data['breed'] adjusted_score = breed_data['adjusted_score'] real_scores = breed_data['real_scores'] # 獲取詳細信息 breed_info = get_dog_description(breed) recommendation = { 'breed': breed.replace('_', ' '), 'rank': i + 1, 'overall_score': adjusted_score, # 使用調整後的分數 'final_score': adjusted_score, # 確保 final_score 與 overall_score 匹配 'semantic_score': breed_data['semantic_score'], 'comparative_bonus': breed_data['comparative_bonus'], 'lifestyle_bonus': breed_data['lifestyle_bonus'], 'size': breed_info.get('Size', 'Unknown') if breed_info else 'Unknown', 'temperament': breed_info.get('Temperament', '') if breed_info else '', 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate') if breed_info else 'Moderate', 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate') if breed_info else 'Moderate', 'good_with_children': breed_info.get('Good with Children', 'Yes') if breed_info else 'Yes', 'lifespan': breed_info.get('Lifespan', '10-12 years') if breed_info else '10-12 years', 'description': breed_info.get('Description', '') if breed_info else '', 'search_type': 'description', 'scores': real_scores # 添加真實的維度分數 } recommendations.append(recommendation) print(f"Generated {len(recommendations)} semantic recommendations") return recommendations except Exception as e: print(f"Failed to generate semantic recommendations: {str(e)}") print(traceback.format_exc()) return [] def get_enhanced_recommendations_with_unified_scoring(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: """ 增強推薦方法 - 使用完整的多頭評分系統 這個方法使用: - QueryUnderstandingEngine: 解析用戶意圖 - PriorityDetector: 檢測維度優先級 - MultiHeadScorer: 多維度評分 - DynamicWeightCalculator: 動態權重分配 """ try: print(f"Processing enhanced recommendation with multi-head scoring: {user_input[:50]}...") # 使用完整的增強語義推薦系統(包含 multi_head_scorer) return self.get_enhanced_semantic_recommendations(user_input, top_k) except Exception as e: error_msg = f"Enhanced recommendation error: {str(e)}. Please check your description." print(f"ERROR: {error_msg}") print(traceback.format_exc()) raise RuntimeError(error_msg) from e def _analyze_user_description_enhanced(self, user_description: str) -> Dict[str, Any]: """增強用戶描述分析""" return self.query_analyzer.analyze_user_description_enhanced(user_description) def _create_user_preferences_from_analysis_enhanced(self, analysis: Dict[str, Any]) -> UserPreferences: """從分析結果創建用戶偏好物件""" return self.query_analyzer.create_user_preferences_from_analysis_enhanced(analysis) def _get_candidate_breeds_enhanced(self, analysis: Dict[str, Any]) -> List[str]: """獲取候選品種列表""" return self.query_analyzer.get_candidate_breeds_enhanced(analysis) def _apply_constraint_filtering_enhanced(self, breed: str, analysis: Dict[str, Any]) -> float: """應用約束過濾,返回調整分數""" # 這個方法需要從 score_calculator 調用適當的方法 # 但原始實現中沒有這個具體方法,所以我們提供基本實現 constraint_penalty = 0.0 breed_info = get_dog_description(breed) if not breed_info: return constraint_penalty # 低噪音要求 if 'low_noise' in analysis['constraint_requirements']: noise_info = breed_noise_info.get(breed, {}) noise_level = noise_info.get('noise_level', 'moderate').lower() if 'high' in noise_level: constraint_penalty -= 0.3 # 嚴重扣分 elif 'low' in noise_level: constraint_penalty += 0.1 # 輕微加分 # 公寓適合性 if 'apartment_suitable' in analysis['constraint_requirements']: size = breed_info.get('Size', '').lower() exercise_needs = breed_info.get('Exercise Needs', '').lower() if size in ['large', 'giant']: constraint_penalty -= 0.2 elif size in ['small', 'tiny']: constraint_penalty += 0.1 if 'high' in exercise_needs: constraint_penalty -= 0.15 # 兒童友善性 if 'child_friendly' in analysis['constraint_requirements']: good_with_children = breed_info.get('Good with Children', 'Unknown') if good_with_children == 'Yes': constraint_penalty += 0.15 elif good_with_children == 'No': constraint_penalty -= 0.4 # 嚴重扣分 return constraint_penalty def _get_breed_characteristics_enhanced(self, breed: str) -> Dict[str, Any]: """獲取品種特徵""" return self.score_calculator.get_breed_characteristics_enhanced(breed) def get_hybrid_recommendations(self, user_description: str, user_preferences: Optional[Any] = None, top_k: int = 15) -> List[Dict[str, Any]]: """ 混合推薦:結合語義匹配與傳統評分 Args: user_description: 用戶的自然語言描述 user_preferences: 可選的結構化偏好設置 top_k: 返回的推薦數量 Returns: 混合推薦結果 """ try: # 獲取語義推薦 semantic_recommendations = self.get_semantic_recommendations(user_description, top_k * 2) if not user_preferences: return semantic_recommendations[:top_k] # 與傳統評分結合 hybrid_results = [] for semantic_rec in semantic_recommendations: breed_name = semantic_rec['breed'].replace(' ', '_') # 計算傳統相容性分數 traditional_score = calculate_compatibility_score(user_preferences, breed_name) # 混合分數(語義 40% + 傳統 60%) hybrid_score = ( semantic_rec['overall_score'] * 0.4 + traditional_score * 0.6 ) semantic_rec['hybrid_score'] = hybrid_score semantic_rec['traditional_score'] = traditional_score hybrid_results.append(semantic_rec) # 按混合分數重新排序 hybrid_results.sort(key=lambda x: x['hybrid_score'], reverse=True) # 更新排名 for i, result in enumerate(hybrid_results[:top_k]): result['rank'] = i + 1 result['overall_score'] = result['hybrid_score'] return hybrid_results[:top_k] except Exception as e: print(f"Hybrid recommendation failed: {str(e)}") print(traceback.format_exc()) return self.get_semantic_recommendations(user_description, top_k) def get_breed_recommendations_by_description(user_description: str, user_preferences: Optional[Any] = None, top_k: int = 15) -> List[Dict[str, Any]]: """基於描述獲取品種推薦的主要介面函數""" try: print("Initializing Enhanced SemanticBreedRecommender...") recommender = SemanticBreedRecommender() # 優先使用整合統一評分系統的增強推薦 print("Using enhanced recommendation system with unified scoring") results = recommender.get_enhanced_recommendations_with_unified_scoring(user_description, top_k) if results and len(results) > 0: print(f"Generated {len(results)} enhanced recommendations successfully") return results else: # 如果增強系統無結果,嘗試原有增強系統 print("Enhanced unified system returned no results, trying original enhanced system") results = recommender.get_enhanced_semantic_recommendations(user_description, top_k) if results and len(results) > 0: return results else: # 最後回退到標準系統 print("All enhanced systems failed, using standard system") if user_preferences: results = recommender.get_hybrid_recommendations(user_description, user_preferences, top_k) else: results = recommender.get_semantic_recommendations(user_description, top_k) if not results: error_msg = f"All recommendation systems failed to generate results. Please check your input description and try again. Error details may be in the console." print(f"ERROR: {error_msg}") raise RuntimeError(error_msg) return results except Exception as e: error_msg = f"Critical error in recommendation system: {str(e)}. Please check your input and system configuration." print(f"ERROR: {error_msg}") print(traceback.format_exc()) raise RuntimeError(error_msg) from e def get_enhanced_recommendations_with_unified_scoring(user_description: str, top_k: int = 15) -> List[Dict[str, Any]]: """ 模組層級便利函數 - 使用完整的多頭評分系統 這個函數呼叫 SemanticBreedRecommender 的增強推薦方法,使用: - QueryUnderstandingEngine: 解析用戶意圖 - PriorityDetector: 檢測維度優先級 - MultiHeadScorer: 多維度評分 - DynamicWeightCalculator: 動態權重分配 - SmartBreedFilter: 智慧風險過濾(只對真正危害用戶的情況干預) 如果增強系統失敗,會自動回退到基本語義推薦 """ try: print(f"Processing description-based recommendation with multi-head scoring: {user_description[:50]}...") # 創建推薦器實例 recommender = SemanticBreedRecommender() # 檢查 SBERT 模型是否可用 if not recommender.vector_manager.is_model_available(): print("SBERT model not available, using basic text matching...") results = _get_basic_text_matching_recommendations(user_description, top_k, recommender) # 應用智慧過濾 results = apply_smart_filtering(results, user_description) return results # 嘗試使用完整的增強語義推薦系統 try: results = recommender.get_enhanced_semantic_recommendations(user_description, top_k) if results: # 應用智慧過濾 results = apply_smart_filtering(results, user_description) return results else: print("Enhanced recommendations returned empty, falling back to basic semantic...") except Exception as enhanced_error: print(f"Enhanced recommendation failed: {str(enhanced_error)}, falling back to basic semantic...") print(traceback.format_exc()) # 回退到基本語義推薦 try: results = recommender.get_semantic_recommendations(user_description, top_k) if results: # 應用智慧過濾 results = apply_smart_filtering(results, user_description) return results except Exception as semantic_error: print(f"Basic semantic recommendation also failed: {str(semantic_error)}") # 最後回退到基本文字匹配 print("All semantic methods failed, using basic text matching as last resort...") results = _get_basic_text_matching_recommendations(user_description, top_k, recommender) # 應用智慧過濾 results = apply_smart_filtering(results, user_description) return results except Exception as e: error_msg = f"Error in semantic recommendation system: {str(e)}. Please check your input and try again." print(f"ERROR: {error_msg}") print(traceback.format_exc()) raise RuntimeError(error_msg) from e def _get_basic_text_matching_recommendations(user_description: str, top_k: int = 15, recommender=None) -> List[Dict[str, Any]]: """基本文字匹配推薦(SBERT 不可用時的後備方案)""" try: print("Using basic text matching as fallback...") # 如果沒有提供 recommender,創建一個新的 if recommender is None: recommender = SemanticBreedRecommender() # 基本關鍵字匹配 keywords = user_description.lower().split() breed_scores = [] # 從數據庫獲取品種清單或使用預設清單 try: conn = sqlite3.connect('animal_detector.db') cursor = conn.cursor() cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog LIMIT 50") basic_breeds = [row[0] for row in cursor.fetchall()] cursor.close() conn.close() # 過濾掉野生動物品種 basic_breeds = [breed for breed in basic_breeds if breed != 'Dhole'] except Exception as e: print(f"Could not load breed list from database: {str(e)}") # 後備品種清單 basic_breeds = [ 'Labrador_Retriever', 'Golden_Retriever', 'German_Shepherd', 'French_Bulldog', 'Border_Collie', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier', 'Dachshund', 'Boxer', 'Siberian_Husky', 'Great_Dane', 'Pomeranian', 'Shih_Tzu', 'Maltese_Dog', 'Chihuahua', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier', 'Japanese_Spaniel', 'Toy_Terrier', 'Affenpinscher', 'Pekingese', 'Lhasa' ] # 應用約束過濾 - 關鍵修復! try: from constraint_manager import ConstraintManager from query_understanding import QueryUnderstandingEngine query_engine = QueryUnderstandingEngine() dimensions = query_engine.analyze_query(user_description) constraint_manager = ConstraintManager() filter_result = constraint_manager.apply_constraints(dimensions) # 只保留通過約束的品種 allowed_breeds = filter_result.passed_breeds filtered_count = len(basic_breeds) basic_breeds = [b for b in basic_breeds if b in allowed_breeds] print(f"Constraint filtering: {filtered_count} -> {len(basic_breeds)} breeds") # 記錄被過濾的原因(用於調試) for breed, reason in filter_result.filtered_breeds.items(): if breed in ['Italian_Greyhound', 'Rottweiler', 'Malinois']: print(f" Filtered {breed}: {reason}") except Exception as e: print(f"Warning: Could not apply constraints: {str(e)}") for breed in basic_breeds: breed_info = get_dog_description(breed) or {} breed_text = f"{breed} {breed_info.get('Temperament', '')} {breed_info.get('Size', '')} {breed_info.get('Description', '')}".lower() # 計算關鍵字匹配分數 matches = sum(1 for keyword in keywords if keyword in breed_text) base_score = min(0.95, 0.3 + (matches / len(keywords)) * 0.6) # 應用增強匹配邏輯 enhanced_score = recommender.score_calculator.calculate_enhanced_matching_score( breed, breed_info, user_description, base_score ) breed_scores.append((breed, enhanced_score['final_score'], breed_info, enhanced_score)) # 按分數排序 breed_scores.sort(key=lambda x: x[1], reverse=True) recommendations = [] for i, (breed, final_score, breed_info, enhanced_score) in enumerate(breed_scores[:top_k]): recommendation = { 'breed': breed.replace('_', ' '), 'rank': i + 1, 'overall_score': final_score, 'final_score': final_score, 'semantic_score': enhanced_score.get('weighted_score', final_score), 'comparative_bonus': enhanced_score.get('lifestyle_bonus', 0.0), 'lifestyle_bonus': enhanced_score.get('lifestyle_bonus', 0.0), 'size': breed_info.get('Size', 'Unknown'), 'temperament': breed_info.get('Temperament', 'Unknown'), 'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), 'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), 'good_with_children': breed_info.get('Good with Children', 'Unknown'), 'lifespan': breed_info.get('Lifespan', '10-12 years'), 'description': breed_info.get('Description', 'No description available'), 'search_type': 'description', 'scores': enhanced_score.get('dimension_scores', { 'space': final_score * 0.9, 'exercise': final_score * 0.85, 'grooming': final_score * 0.8, 'experience': final_score * 0.75, 'noise': final_score * 0.7, 'family': final_score * 0.65 }) } recommendations.append(recommendation) return recommendations except Exception as e: error_msg = f"Error in basic text matching: {str(e)}" print(f"ERROR: {error_msg}") raise RuntimeError(error_msg) from e