PawMatchAI / semantic_breed_recommender.py
DawnC's picture
Update semantic_breed_recommender.py
d5249d6 verified
# %%writefile semantic_breed_recommender.py
import random
import hashlib
import numpy as np
import sqlite3
import re
import traceback
from typing import List, Dict, Tuple, Optional, Any
from dataclasses import dataclass
from sentence_transformers import SentenceTransformer
import torch
from sklearn.metrics.pairwise import cosine_similarity
from dog_database import get_dog_description
from breed_health_info import breed_health_info
from breed_noise_info import breed_noise_info
from scoring_calculation_system import UserPreferences, calculate_compatibility_score, UnifiedScoringSystem, calculate_unified_breed_scores
from query_understanding import QueryUnderstandingEngine, analyze_user_query
from constraint_manager import ConstraintManager, apply_breed_constraints
from multi_head_scorer import MultiHeadScorer, score_breed_candidates, BreedScore
from score_calibrator import ScoreCalibrator, calibrate_breed_scores
from config_manager import get_config_manager, get_standardized_breed_data
from semantic_vector_manager import SemanticVectorManager, BreedDescriptionVector
from user_query_analyzer import UserQueryAnalyzer
from matching_score_calculator import MatchingScoreCalculator
from smart_breed_filter import apply_smart_filtering
class SemanticBreedRecommender:
"""
增強的基於 SBERT 的語義品種推薦系統
"""
def __init__(self):
"""初始化語義品種推薦器"""
# 初始化語義vector的管理器
self.vector_manager = SemanticVectorManager()
# 初始化用戶查詢分析器
self.query_analyzer = UserQueryAnalyzer(self.vector_manager.get_breed_list())
# 初始化評分計算器
self.score_calculator = MatchingScoreCalculator(self.vector_manager.get_breed_list())
self.model_name = self.vector_manager.model_name
self.sbert_model = self.vector_manager.get_sbert_model()
self.breed_vectors = self.vector_manager.get_breed_vectors()
self.breed_list = self.vector_manager.get_breed_list()
self.comparative_keywords = self.query_analyzer.comparative_keywords
# 初始化增強系統組件(if 可用)
try:
self.query_engine = QueryUnderstandingEngine()
print("QueryUnderstandingEngine initialized")
self.constraint_manager = ConstraintManager()
print("ConstraintManager initialized")
self.multi_head_scorer = None
self.score_calibrator = ScoreCalibrator()
print("ScoreCalibrator initialized")
self.config_manager = get_config_manager()
# 如果 SBERT 模型可用,初始化多頭評分器
if self.sbert_model:
self.multi_head_scorer = MultiHeadScorer(self.sbert_model)
print("Multi-head scorer initialized with SBERT model")
else:
print("WARNING: SBERT model not available, multi_head_scorer will be None")
except Exception as e:
print(f"Error initializing enhanced system components: {str(e)}")
print(traceback.format_exc())
self.query_engine = None
self.constraint_manager = None
self.multi_head_scorer = None
self.score_calibrator = None
self.config_manager = None
def _parse_comparative_preferences(self, user_input: str) -> Dict[str, float]:
"""解析比較性偏好表達"""
return self.query_analyzer.parse_comparative_preferences(user_input)
def _extract_lifestyle_keywords(self, user_input: str) -> Dict[str, List[str]]:
"""增強的生活方式關鍵字提取,具有更好的模式匹配"""
return self.query_analyzer.extract_lifestyle_keywords(user_input)
def _apply_size_distribution_correction(self, recommendations: List[Dict]) -> List[Dict]:
"""應用尺寸分佈修正以防止大型品種偏差"""
return self.score_calculator.apply_size_distribution_correction(recommendations)
def _normalize_breed_size(self, size: str) -> str:
"""標準化品種尺寸到標準分類"""
return self.score_calculator._normalize_breed_size(size)
def _parse_user_requirements(self, user_input: str) -> Dict[str, Any]:
"""更準確地解析用戶需求"""
return self.query_analyzer.parse_user_requirements(user_input)
def _apply_hard_constraints(self, breed: str, user_input: str, breed_characteristics: Dict[str, Any]) -> float:
"""增強硬約束,具有更嚴格的懲罰"""
return self.score_calculator.apply_hard_constraints(breed, user_input, breed_characteristics)
def _calculate_lifestyle_bonus(self, breed_characteristics: Dict[str, Any],
lifestyle_keywords: Dict[str, List[str]]) -> float:
"""增強生活方式匹配獎勵計算"""
return self.score_calculator.calculate_lifestyle_bonus(breed_characteristics, lifestyle_keywords)
def _apply_intelligent_trait_matching(self, recommendations: List[Dict], user_input: str) -> List[Dict]:
"""基於增強關鍵字提取和數據庫挖掘應用智能特徵匹配"""
return self.score_calculator.apply_intelligent_trait_matching(recommendations, user_input)
def _get_breed_info_from_standardized(self, standardized_info) -> Dict[str, Any]:
"""將標準化品種信息轉換為字典格式"""
return self.score_calculator.get_breed_info_from_standardized(standardized_info)
def _get_fallback_recommendations(self, top_k: int = 15) -> List[Dict[str, Any]]:
"""當增強系統失敗時獲取備用推薦"""
return self.score_calculator.get_fallback_recommendations(top_k)
def _get_fallback_scoring_with_constraints(self, user_input: str,
passed_breeds: set,
dimensions: 'QueryDimensions',
top_k: int = 15) -> List[Dict[str, Any]]:
"""
當 multi_head_scorer 不可用時的回退評分方法
仍然用 constraint_manager 的過濾結果,並產生自然分佈的分數
"""
print(f"Fallback scoring for {len(passed_breeds)} filtered breeds")
recommendations = []
user_text = user_input.lower()
# 提取用戶需求關鍵詞
lifestyle_keywords = self._extract_lifestyle_keywords(user_input)
for breed in passed_breeds:
breed_info = get_dog_description(breed.replace(' ', '_')) or {}
if not breed_info:
continue
# 計算多維度匹配分數
dimension_scores = self._calculate_comprehensive_dimension_scores(
breed, breed_info, user_text, dimensions, lifestyle_keywords
)
# 基於維度分數計算加權總分
weights = self._get_dimension_weights_from_query(user_text, dimensions)
weighted_sum = sum(dimension_scores.get(dim, 0.7) * weights.get(dim, 1.0)
for dim in dimension_scores)
total_weight = sum(weights.get(dim, 1.0) for dim in dimension_scores)
final_score = weighted_sum / total_weight if total_weight > 0 else 0.7
# 確保分數在合理範圍內(允許高分,非常契合的品種可超過 90%)
final_score = max(0.45, min(0.98, final_score))
dimension_scores['overall'] = final_score
recommendation = {
'breed': breed.replace('_', ' '),
'rank': 0,
'overall_score': final_score,
'final_score': final_score,
'scores': dimension_scores,
'size': breed_info.get('Size', 'Unknown'),
'temperament': breed_info.get('Temperament', ''),
'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'),
'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'),
'good_with_children': breed_info.get('Good with Children', 'Yes'),
'lifespan': breed_info.get('Lifespan', '10-12 years'),
'description': breed_info.get('Description', ''),
'search_type': 'fallback_with_constraints',
}
recommendations.append(recommendation)
# 按分數排序
recommendations.sort(key=lambda x: -x['final_score'])
# 更新排名
for i, rec in enumerate(recommendations[:top_k]):
rec['rank'] = i + 1
print(f"Generated {len(recommendations[:top_k])} fallback recommendations")
return recommendations[:top_k]
def _calculate_comprehensive_dimension_scores(self, breed: str, breed_info: Dict,
user_text: str, dimensions,
lifestyle_keywords: Dict) -> Dict[str, float]:
"""
計算全面的維度分數,產生自然分佈的評分
"""
scores = {}
temperament = breed_info.get('Temperament', '').lower()
size = breed_info.get('Size', 'Medium').lower()
exercise_needs = breed_info.get('Exercise Needs', 'Moderate').lower()
grooming_needs = breed_info.get('Grooming Needs', 'Moderate').lower()
good_with_children = breed_info.get('Good with Children', 'Yes')
care_level = breed_info.get('Care Level', 'Moderate').lower()
description = breed_info.get('Description', '').lower()
# 1. 空間相容性
space_score = 0.7
if 'apartment' in user_text or 'small space' in user_text:
if 'small' in size or 'toy' in size:
space_score = 0.96
elif 'medium' in size:
space_score = 0.78
elif 'large' in size:
space_score = 0.52
else:
space_score = 0.45
elif 'house' in user_text or 'yard' in user_text:
if 'large' in size:
space_score = 0.92
elif 'medium' in size:
space_score = 0.88
else:
space_score = 0.82
scores['space'] = space_score
# 2. 運動相容性
exercise_score = 0.7
user_wants_high = any(w in user_text for w in ['energetic', 'active', 'running', 'hiking', 'athletic'])
user_wants_low = any(w in user_text for w in ['low maintenance', 'relaxed', 'calm', 'couch'])
if user_wants_high:
if 'very high' in exercise_needs:
exercise_score = 0.98
elif 'high' in exercise_needs:
exercise_score = 0.92
elif 'moderate' in exercise_needs:
exercise_score = 0.68
else:
exercise_score = 0.48
elif user_wants_low:
if 'low' in exercise_needs:
exercise_score = 0.96
elif 'moderate' in exercise_needs:
exercise_score = 0.78
elif 'high' in exercise_needs:
exercise_score = 0.52
else:
exercise_score = 0.42
else:
# 中等運動需求
if 'moderate' in exercise_needs:
exercise_score = 0.88
elif 'low' in exercise_needs or 'high' in exercise_needs:
exercise_score = 0.72
else:
exercise_score = 0.65
scores['exercise'] = exercise_score
# 3. 美容需求相容性
grooming_score = 0.7
user_wants_low_maintenance = any(w in user_text for w in ['low maintenance', 'easy care', 'minimal grooming'])
if user_wants_low_maintenance:
if 'low' in grooming_needs or 'minimal' in grooming_needs:
grooming_score = 0.96
elif 'moderate' in grooming_needs:
grooming_score = 0.75
else:
grooming_score = 0.50
else:
if 'low' in grooming_needs:
grooming_score = 0.85
elif 'moderate' in grooming_needs:
grooming_score = 0.78
else:
grooming_score = 0.70
scores['grooming'] = grooming_score
# 4. 噪音相容性
noise_score = 0.7
user_wants_quiet = any(w in user_text for w in ['quiet', 'silent', 'noise', 'bark', 'neighbors'])
if user_wants_quiet:
# 從 breed_noise_info 獲取噪音資訊
noise_info = breed_noise_info.get(breed.replace(' ', '_'), {})
noise_level = noise_info.get('noise_level', 'Moderate').lower()
if 'low' in noise_level or 'quiet' in noise_level:
noise_score = 0.97
elif 'moderate' in noise_level:
noise_score = 0.72
elif 'high' in noise_level:
noise_score = 0.45
else:
# 根據性格推斷
if any(w in temperament for w in ['calm', 'quiet', 'gentle', 'reserved']):
noise_score = 0.88
elif any(w in temperament for w in ['alert', 'vocal', 'energetic']):
noise_score = 0.55
else:
noise_score = 0.70
scores['noise'] = noise_score
# 5. 家庭相容性
family_score = 0.7
has_family_context = any(w in user_text for w in ['kids', 'children', 'family', 'child'])
if has_family_context:
if good_with_children == 'Yes':
family_score = 0.94
# 額外加分:溫和性格
if any(w in temperament for w in ['gentle', 'friendly', 'patient', 'loving']):
family_score = min(0.98, family_score + 0.04)
elif good_with_children == 'No':
family_score = 0.32
else:
family_score = 0.62
else:
family_score = 0.76 if good_with_children == 'Yes' else 0.70
scores['family'] = family_score
# 6. 經驗相容性
experience_score = 0.7
is_beginner = any(w in user_text for w in ['first dog', 'first time', 'beginner', 'new owner', 'never had'])
if is_beginner:
# 評估品種對新手的友好程度
if 'low' in care_level or 'easy' in care_level:
experience_score = 0.94
elif 'moderate' in care_level:
experience_score = 0.78
else:
experience_score = 0.52
# 性格調整
if any(w in temperament for w in ['eager to please', 'trainable', 'intelligent', 'friendly']):
experience_score = min(0.98, experience_score + 0.08)
if any(w in temperament for w in ['stubborn', 'independent', 'strong-willed']):
experience_score = max(0.38, experience_score - 0.18)
else:
experience_score = 0.80
scores['experience'] = experience_score
# 7. 健康分數(基於壽命和品種特性)
health_score = 0.75
lifespan = breed_info.get('Lifespan', '10-12 years')
try:
# 解析壽命
years = [int(y) for y in lifespan.replace(' years', '').split('-') if y.strip().isdigit()]
if years:
avg_lifespan = sum(years) / len(years)
if avg_lifespan >= 14:
health_score = 0.94
elif avg_lifespan >= 12:
health_score = 0.85
elif avg_lifespan >= 10:
health_score = 0.75
else:
health_score = 0.62
except:
pass
scores['health'] = health_score
return scores
def _get_dimension_weights_from_query(self, user_text: str, dimensions) -> Dict[str, float]:
"""
根據用戶查詢動態計算維度權重
"""
weights = {
'space': 1.0,
'exercise': 1.0,
'grooming': 1.0,
'noise': 1.0,
'family': 1.0,
'experience': 1.0,
'health': 0.8
}
# 根據 dimensions 的 priority 調整權重
if hasattr(dimensions, 'dimension_priorities'):
priority_map = getattr(dimensions, 'dimension_priorities', {})
for dim, priority in priority_map.items():
if dim in weights:
weights[dim] = priority
# 映射不同名稱
if dim == 'size':
weights['space'] = max(weights['space'], priority)
if dim == 'family':
weights['family'] = max(weights['family'], priority)
# 根據關鍵詞強化權重
if any(w in user_text for w in ['quiet', 'noise', 'bark', 'neighbors', 'thin walls']):
weights['noise'] = max(weights['noise'], 2.2)
if any(w in user_text for w in ['kids', 'children', 'family', 'child']):
weights['family'] = max(weights['family'], 2.0)
if any(w in user_text for w in ['first', 'beginner', 'new owner']):
weights['experience'] = max(weights['experience'], 2.0)
if any(w in user_text for w in ['apartment', 'small space', 'studio']):
weights['space'] = max(weights['space'], 1.8)
if any(w in user_text for w in ['energetic', 'active', 'running', 'hiking']):
weights['exercise'] = max(weights['exercise'], 2.0)
if any(w in user_text for w in ['low maintenance', 'easy care']):
weights['grooming'] = max(weights['grooming'], 1.8)
return weights
def _calculate_real_dimension_scores(self, breed: str, breed_info: Dict,
user_input: str, overall_score: float) -> Dict[str, float]:
"""
計算真實的維度分數(基於品種特性和用戶需求)
這個方法取代了假分數生成器,提供真實的評分
Args:
breed: 品種名稱
breed_info: 品種資訊字典
user_input: 用戶輸入文字
overall_score: 總體分數
Returns:
Dict[str, float]: 維度分數字典
"""
if not breed_info:
breed_info = {}
user_text = user_input.lower()
temperament = breed_info.get('Temperament', '').lower()
size = breed_info.get('Size', 'Medium').lower()
exercise_needs = breed_info.get('Exercise Needs', 'Moderate').lower()
grooming_needs = breed_info.get('Grooming Needs', 'Moderate').lower()
good_with_children = breed_info.get('Good with Children', 'Yes')
care_level = breed_info.get('Care Level', 'Moderate').lower()
scores = {}
# 1. Space Compatibility (空間相容性)
space_score = 0.7
if 'apartment' in user_text or 'small' in user_text:
if 'small' in size:
space_score = 0.9
elif 'medium' in size:
space_score = 0.7
elif 'large' in size:
space_score = 0.5
elif 'giant' in size:
space_score = 0.3
elif 'house' in user_text or 'yard' in user_text:
if 'large' in size or 'giant' in size:
space_score = 0.85
else:
space_score = 0.8
scores['space'] = space_score
# 2. Exercise Compatibility (運動相容性)
exercise_score = 0.7
if 'low' in exercise_needs or 'minimal' in exercise_needs:
if any(term in user_text for term in ['work full time', 'busy', 'low exercise', 'not much exercise']):
exercise_score = 0.9
else:
exercise_score = 0.75
elif 'high' in exercise_needs or 'very high' in exercise_needs:
if any(term in user_text for term in ['active', 'running', 'hiking', 'exercise']):
exercise_score = 0.9
elif any(term in user_text for term in ['work full time', 'busy']):
exercise_score = 0.5
else:
exercise_score = 0.65
else: # moderate
exercise_score = 0.75
scores['exercise'] = exercise_score
# 3. Grooming/Maintenance Compatibility (美容/維護相容性)
grooming_score = 0.7
if 'low' in grooming_needs:
if any(term in user_text for term in ['low maintenance', 'low-maintenance', 'easy care', 'minimal grooming']):
grooming_score = 0.9
else:
grooming_score = 0.8
elif 'high' in grooming_needs:
if any(term in user_text for term in ['low maintenance', 'low-maintenance', 'easy care']):
grooming_score = 0.4
else:
grooming_score = 0.6
# 敏感品種需要額外照顧
if 'sensitive' in temperament:
grooming_score -= 0.1
# 特殊品種需要額外護理
breed_lower = breed.lower()
if any(term in breed_lower for term in ['italian', 'greyhound', 'whippet', 'hairless']):
if any(term in user_text for term in ['low maintenance', 'low-maintenance', 'easy']):
grooming_score -= 0.15
scores['grooming'] = max(0.2, grooming_score)
# 4. Experience Compatibility (經驗相容性) - 關鍵維度!
experience_score = 0.7
is_beginner = any(term in user_text for term in ['first dog', 'first time', 'beginner', 'new to dogs', 'never owned', 'never had'])
if is_beginner:
# 新手評估
if 'low' in care_level:
experience_score = 0.85
elif 'moderate' in care_level:
experience_score = 0.65
elif 'high' in care_level:
experience_score = 0.45
# 性格懲罰 - 對新手很重要
difficult_traits = ['sensitive', 'stubborn', 'independent', 'dominant', 'aggressive', 'nervous', 'shy', 'timid', 'alert']
for trait in difficult_traits:
if trait in temperament:
if trait == 'sensitive':
experience_score -= 0.15 # 敏感性格對新手很具挑戰
elif trait == 'aggressive':
experience_score -= 0.25
elif trait in ['stubborn', 'independent', 'dominant']:
experience_score -= 0.12
else:
experience_score -= 0.08
# 友善性格獎勵
easy_traits = ['friendly', 'gentle', 'eager to please', 'patient', 'calm', 'outgoing']
for trait in easy_traits:
if trait in temperament:
experience_score += 0.08
# 易於訓練的加分
if any(term in user_text for term in ['easy to train', 'trainable']):
if any(term in temperament for term in ['eager to please', 'intelligent', 'trainable']):
experience_score += 0.1
elif any(term in temperament for term in ['stubborn', 'independent']):
experience_score -= 0.1
else:
# 有經驗的飼主
experience_score = 0.8
scores['experience'] = max(0.2, min(0.95, experience_score))
# 5. Noise Compatibility (噪音相容性)
noise_score = 0.75
if any(term in user_text for term in ['quiet', 'apartment', 'neighbors']):
if any(term in temperament for term in ['quiet', 'calm', 'gentle']):
noise_score = 0.9
elif any(term in temperament for term in ['alert', 'vocal', 'barking']):
noise_score = 0.5
scores['noise'] = noise_score
# 6. Family Compatibility (家庭相容性)
family_score = 0.7
if any(term in user_text for term in ['children', 'kids', 'family']):
if good_with_children == 'Yes' or good_with_children == True:
family_score = 0.9
if any(term in temperament for term in ['gentle', 'patient', 'friendly']):
family_score = 0.95
else:
family_score = 0.35
scores['family'] = family_score
# 7. Overall
scores['overall'] = overall_score
return scores
def get_enhanced_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]:
"""
增強的多維度語義品種推薦
Args:
user_input: 用戶的自然語言描述
top_k: 返回的推薦數量
Returns:
增強評分的推薦品種列表
"""
try:
# 階段 1: 查詢理解
if self.query_engine:
dimensions = self.query_engine.analyze_query(user_input)
print(f"Query dimensions detected: {len(dimensions.spatial_constraints + dimensions.activity_level + dimensions.noise_preferences + dimensions.size_preferences + dimensions.family_context + dimensions.maintenance_level + dimensions.special_requirements)} total dimensions")
else:
print("Query engine not available, using basic analysis")
return self.get_semantic_recommendations(user_input, top_k)
# 階段 2: 應用約束
if self.constraint_manager:
filter_result = self.constraint_manager.apply_constraints(dimensions, min_candidates=max(8, top_k))
print(f"Constraint filtering: {len(self.breed_list)} -> {len(filter_result.passed_breeds)} candidates")
if not filter_result.passed_breeds:
error_msg = f"No dog breeds match your requirements after applying constraints. Applied constraints: {filter_result.applied_constraints}. Consider relaxing some requirements."
print(f"ERROR: {error_msg}")
raise ValueError(error_msg)
else:
print("Constraint manager not available, using all breeds")
filter_result = type('FilterResult', (), {
'passed_breeds': self.breed_list,
'applied_constraints': [],
'relaxed_constraints': [],
'warnings': []
})()
# 階段 3: 多頭評分
if self.multi_head_scorer:
breed_scores = self.multi_head_scorer.score_breeds(filter_result.passed_breeds, dimensions)
print(f"Multi-head scoring completed for {len(breed_scores)} breeds")
# Debug: 顯示前5名的分數和維度breakdown
for bs in breed_scores[:5]:
print(f" {bs.breed_name}: final={bs.final_score:.3f}, breakdown={bs.dimensional_breakdown}")
else:
# 使用回退評分,但仍然尊重 constraint 過濾結果
print("Multi-head scorer not available, using fallback scoring with constraint filtering")
fallback_results = self._get_fallback_scoring_with_constraints(
user_input, filter_result.passed_breeds, dimensions, top_k
)
return fallback_results
# 階段 4: 分數校準
if self.score_calibrator:
breed_score_tuples = [(score.breed_name, score.final_score) for score in breed_scores]
calibration_result = self.score_calibrator.calibrate_scores(breed_score_tuples)
print(f"Score calibration: method={calibration_result.calibration_method}")
else:
print("Score calibrator not available, using raw scores")
calibration_result = type('CalibrationResult', (), {
'score_mapping': {score.breed_name: score.final_score for score in breed_scores},
'calibration_method': 'none'
})()
# 階段 5: 生成最終推薦
final_recommendations = []
for i, breed_score in enumerate(breed_scores[:top_k]):
breed_name = breed_score.breed_name
# 獲取校準後的分數
calibrated_score = calibration_result.score_mapping.get(breed_name, breed_score.final_score)
# 獲取標準化品種信息
if self.config_manager:
standardized_info = get_standardized_breed_data(breed_name.replace(' ', '_'))
if standardized_info:
breed_info = self._get_breed_info_from_standardized(standardized_info)
else:
breed_info = get_dog_description(breed_name.replace(' ', '_')) or {}
else:
breed_info = get_dog_description(breed_name.replace(' ', '_')) or {}
# 將 dimensional_breakdown 轉換為 UI 需要的 scores 格式
breakdown = breed_score.dimensional_breakdown or {}
ui_scores = {
'space': breakdown.get('spatial_compatibility', 0.7),
'exercise': breakdown.get('activity_compatibility', 0.7),
'grooming': breakdown.get('maintenance_compatibility', 0.7),
'experience': breakdown.get('experience_compatibility', 0.7),
'noise': breakdown.get('noise_compatibility', 0.7),
'family': breakdown.get('family_compatibility', 0.7),
'health': breakdown.get('health_compatibility', 0.7),
'overall': calibrated_score
}
recommendation = {
'breed': breed_name,
'rank': i + 1,
'overall_score': calibrated_score,
'final_score': calibrated_score,
'semantic_score': breed_score.semantic_component,
'attribute_score': breed_score.attribute_component,
'bidirectional_bonus': breed_score.bidirectional_bonus,
'confidence_score': breed_score.confidence_score,
'dimensional_breakdown': breed_score.dimensional_breakdown,
'scores': ui_scores, # UI 需要的格式
'explanation': breed_score.explanation,
'size': breed_info.get('Size', 'Unknown'),
'temperament': breed_info.get('Temperament', ''),
'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'),
'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'),
'good_with_children': breed_info.get('Good with Children', 'Yes'),
'lifespan': breed_info.get('Lifespan', '10-12 years'),
'description': breed_info.get('Description', ''),
'search_type': 'enhanced_description',
'calibration_method': calibration_result.calibration_method,
'applied_constraints': filter_result.applied_constraints,
'relaxed_constraints': filter_result.relaxed_constraints,
'warnings': filter_result.warnings
}
final_recommendations.append(recommendation)
# 應用尺寸分佈修正
corrected_recommendations = self._apply_size_distribution_correction(final_recommendations)
# 階段 6: 應用智能特徵匹配增強
intelligence_enhanced_recommendations = self._apply_intelligent_trait_matching(corrected_recommendations, user_input)
print(f"Generated {len(intelligence_enhanced_recommendations)} enhanced semantic recommendations with intelligent trait matching")
return intelligence_enhanced_recommendations
except Exception as e:
print(f"Error in enhanced semantic recommendations: {str(e)}")
print(traceback.format_exc())
# 回退到原始方法
return self.get_semantic_recommendations(user_input, top_k)
def get_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]:
"""
基於自然語言描述獲取品種推薦
Args:
user_input: 用戶的自然語言描述
top_k: 返回的推薦數量
Returns:
推薦品種列表
"""
try:
print(f"Processing user input: {user_input}")
# 檢查模型是否可用 - 如果不可用,則報錯
if self.sbert_model is None:
error_msg = "SBERT model not available. This could be due to:\n• Model download failed\n• Insufficient memory\n• Network connectivity issues\n\nPlease check your environment and try again."
print(f"ERROR: {error_msg}")
raise RuntimeError(error_msg)
# 生成用戶輸入嵌入
user_embedding = self.vector_manager.encode_text(user_input)
# 解析比較性偏好
comparative_prefs = self._parse_comparative_preferences(user_input)
# 提取生活方式關鍵字
lifestyle_keywords = self._extract_lifestyle_keywords(user_input)
# 計算與所有品種的相似度並應用約束
similarities = []
for breed, breed_vector in self.breed_vectors.items():
# 首先應用硬約束
constraint_penalty = self._apply_hard_constraints(breed, user_input, breed_vector.characteristics)
# 跳過違反關鍵約束的品種
if constraint_penalty <= -1.0: # 完全取消資格
continue
# 基本語義相似度
semantic_score = cosine_similarity(
[user_embedding],
[breed_vector.embedding]
)[0][0]
# 比較性偏好加權
comparative_bonus = comparative_prefs.get(breed, 0.0)
# 生活方式匹配獎勵
lifestyle_bonus = self._calculate_lifestyle_bonus(
breed_vector.characteristics,
lifestyle_keywords
)
# 應用約束懲罰
lifestyle_bonus += constraint_penalty
# 更好分佈的增強組合分數
# 應用指數縮放以創建更自然的分數分佈
base_semantic = semantic_score ** 0.8 # 輕微壓縮高分
enhanced_lifestyle = lifestyle_bonus * 2.0 # 放大生活方式匹配
enhanced_comparative = comparative_bonus * 1.5 # 放大品種偏好
final_score = (
base_semantic * 0.55 +
enhanced_comparative * 0.30 +
enhanced_lifestyle * 0.15
)
# 添加小的隨機變化以自然地打破平局
random.seed(hash(breed)) # 對相同品種保持一致
final_score += random.uniform(-0.03, 0.03)
# 確保最終分數不超過 1.0
final_score = min(1.0, final_score)
similarities.append({
'breed': breed,
'score': final_score,
'semantic_score': semantic_score,
'comparative_bonus': comparative_bonus,
'lifestyle_bonus': lifestyle_bonus
})
# 計算平衡分佈的標準化顯示分數
breed_display_scores = []
# 首先,收集所有語義分數以進行標準化
all_semantic_scores = [breed_data['semantic_score'] for breed_data in similarities]
semantic_mean = np.mean(all_semantic_scores)
semantic_std = np.std(all_semantic_scores) if len(all_semantic_scores) > 1 else 1.0
for breed_data in similarities:
breed = breed_data['breed']
base_semantic = breed_data['semantic_score']
# 標準化語義分數以防止極端異常值
if semantic_std > 0:
normalized_semantic = (base_semantic - semantic_mean) / semantic_std
normalized_semantic = max(-2.0, min(2.0, normalized_semantic)) # 限制在 2 個標準差
scaled_semantic = 0.5 + (normalized_semantic * 0.1) # 映射到 0.3-0.7 範圍
else:
scaled_semantic = 0.5
# 獲取品種特徵
breed_info = get_dog_description(breed) if breed != 'Unknown' else {}
breed_size = breed_info.get('Size', '').lower() if breed_info else ''
exercise_needs = breed_info.get('Exercise Needs', '').lower() if breed_info else ''
# 計算特徵匹配分數(比純語義相似度更重要)
feature_score = 0.0
user_text = user_input.lower()
# 尺寸和空間需求(高權重)
if any(term in user_text for term in ['apartment', 'small', 'limited space']):
if 'small' in breed_size:
feature_score += 0.25
elif 'medium' in breed_size:
feature_score += 0.05
elif 'large' in breed_size or 'giant' in breed_size:
feature_score -= 0.30
# 運動需求(高權重)
if any(term in user_text for term in ['low exercise', 'minimal exercise', "doesn't need", 'not much']):
if 'low' in exercise_needs or 'minimal' in exercise_needs:
feature_score += 0.20
elif 'high' in exercise_needs or 'very high' in exercise_needs:
feature_score -= 0.25
elif any(term in user_text for term in ['active', 'high exercise', 'running', 'hiking']):
if 'high' in exercise_needs:
feature_score += 0.20
elif 'low' in exercise_needs:
feature_score -= 0.15
# 家庭相容性
if any(term in user_text for term in ['children', 'kids', 'family']):
good_with_children = breed_info.get('Good with Children', '') if breed_info else ''
if good_with_children == 'Yes':
feature_score += 0.10
elif good_with_children == 'No':
feature_score -= 0.20
# 平衡權重組合分數
final_score = (
scaled_semantic * 0.35 + # 降低語義權重
feature_score * 0.45 + # 增加特徵匹配權重
breed_data['lifestyle_bonus'] * 0.15 +
breed_data['comparative_bonus'] * 0.05
)
# 計算基本相容性分數
base_compatibility = final_score
# 應用自然分佈的動態評分
if base_compatibility >= 0.9: # 例外匹配
score_range = (0.92, 0.98)
position = (base_compatibility - 0.9) / 0.1
elif base_compatibility >= 0.75: # 優秀匹配
score_range = (0.85, 0.91)
position = (base_compatibility - 0.75) / 0.15
elif base_compatibility >= 0.6: # 良好匹配
score_range = (0.75, 0.84)
position = (base_compatibility - 0.6) / 0.15
elif base_compatibility >= 0.45: # 公平匹配
score_range = (0.65, 0.74)
position = (base_compatibility - 0.45) / 0.15
elif base_compatibility >= 0.3: # 較差匹配
score_range = (0.55, 0.64)
position = (base_compatibility - 0.3) / 0.15
else: # 非常差的匹配
score_range = (0.45, 0.54)
position = max(0, base_compatibility / 0.3)
# 計算帶自然變化的最終分數
score_span = score_range[1] - score_range[0]
base_score = score_range[0] + (position * score_span)
# 添加控制的隨機變化以進行自然排名
random.seed(hash(breed + user_input[:15]))
variation = random.uniform(-0.015, 0.015)
display_score = round(max(0.45, min(0.98, base_score + variation)), 3)
breed_display_scores.append({
'breed': breed,
'display_score': display_score,
'semantic_score': base_semantic,
'comparative_bonus': breed_data['comparative_bonus'],
'lifestyle_bonus': breed_data['lifestyle_bonus']
})
# 計算真實維度分數並整合到排序中
for breed_data in breed_display_scores:
breed = breed_data['breed']
breed_info = get_dog_description(breed)
real_scores = self._calculate_real_dimension_scores(
breed, breed_info, user_input, breed_data['display_score']
)
breed_data['real_scores'] = real_scores
# 計算加權的最終分數(考慮維度分數)
# 原始顯示分數權重 50%,維度分數平均權重 50%
dim_scores = [real_scores.get('space', 0.7), real_scores.get('exercise', 0.7),
real_scores.get('grooming', 0.7), real_scores.get('experience', 0.7),
real_scores.get('noise', 0.7)]
avg_dim_score = sum(dim_scores) / len(dim_scores)
# 對低維度分數施加懲罰
min_dim_score = min(dim_scores)
penalty = 0
if min_dim_score < 0.5:
penalty = (0.5 - min_dim_score) * 0.3 # 最低分數懲罰
# 最終排序分數
breed_data['adjusted_score'] = (
breed_data['display_score'] * 0.5 +
avg_dim_score * 0.5 -
penalty
)
# 按調整後的分數排序
breed_display_scores.sort(key=lambda x: x['adjusted_score'], reverse=True)
top_breeds = breed_display_scores[:top_k]
# 轉換為標準推薦格式
recommendations = []
for i, breed_data in enumerate(top_breeds):
breed = breed_data['breed']
adjusted_score = breed_data['adjusted_score']
real_scores = breed_data['real_scores']
# 獲取詳細信息
breed_info = get_dog_description(breed)
recommendation = {
'breed': breed.replace('_', ' '),
'rank': i + 1,
'overall_score': adjusted_score, # 使用調整後的分數
'final_score': adjusted_score, # 確保 final_score 與 overall_score 匹配
'semantic_score': breed_data['semantic_score'],
'comparative_bonus': breed_data['comparative_bonus'],
'lifestyle_bonus': breed_data['lifestyle_bonus'],
'size': breed_info.get('Size', 'Unknown') if breed_info else 'Unknown',
'temperament': breed_info.get('Temperament', '') if breed_info else '',
'exercise_needs': breed_info.get('Exercise Needs', 'Moderate') if breed_info else 'Moderate',
'grooming_needs': breed_info.get('Grooming Needs', 'Moderate') if breed_info else 'Moderate',
'good_with_children': breed_info.get('Good with Children', 'Yes') if breed_info else 'Yes',
'lifespan': breed_info.get('Lifespan', '10-12 years') if breed_info else '10-12 years',
'description': breed_info.get('Description', '') if breed_info else '',
'search_type': 'description',
'scores': real_scores # 添加真實的維度分數
}
recommendations.append(recommendation)
print(f"Generated {len(recommendations)} semantic recommendations")
return recommendations
except Exception as e:
print(f"Failed to generate semantic recommendations: {str(e)}")
print(traceback.format_exc())
return []
def get_enhanced_recommendations_with_unified_scoring(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]:
"""
增強推薦方法 - 使用完整的多頭評分系統
這個方法使用:
- QueryUnderstandingEngine: 解析用戶意圖
- PriorityDetector: 檢測維度優先級
- MultiHeadScorer: 多維度評分
- DynamicWeightCalculator: 動態權重分配
"""
try:
print(f"Processing enhanced recommendation with multi-head scoring: {user_input[:50]}...")
# 使用完整的增強語義推薦系統(包含 multi_head_scorer)
return self.get_enhanced_semantic_recommendations(user_input, top_k)
except Exception as e:
error_msg = f"Enhanced recommendation error: {str(e)}. Please check your description."
print(f"ERROR: {error_msg}")
print(traceback.format_exc())
raise RuntimeError(error_msg) from e
def _analyze_user_description_enhanced(self, user_description: str) -> Dict[str, Any]:
"""增強用戶描述分析"""
return self.query_analyzer.analyze_user_description_enhanced(user_description)
def _create_user_preferences_from_analysis_enhanced(self, analysis: Dict[str, Any]) -> UserPreferences:
"""從分析結果創建用戶偏好物件"""
return self.query_analyzer.create_user_preferences_from_analysis_enhanced(analysis)
def _get_candidate_breeds_enhanced(self, analysis: Dict[str, Any]) -> List[str]:
"""獲取候選品種列表"""
return self.query_analyzer.get_candidate_breeds_enhanced(analysis)
def _apply_constraint_filtering_enhanced(self, breed: str, analysis: Dict[str, Any]) -> float:
"""應用約束過濾,返回調整分數"""
# 這個方法需要從 score_calculator 調用適當的方法
# 但原始實現中沒有這個具體方法,所以我們提供基本實現
constraint_penalty = 0.0
breed_info = get_dog_description(breed)
if not breed_info:
return constraint_penalty
# 低噪音要求
if 'low_noise' in analysis['constraint_requirements']:
noise_info = breed_noise_info.get(breed, {})
noise_level = noise_info.get('noise_level', 'moderate').lower()
if 'high' in noise_level:
constraint_penalty -= 0.3 # 嚴重扣分
elif 'low' in noise_level:
constraint_penalty += 0.1 # 輕微加分
# 公寓適合性
if 'apartment_suitable' in analysis['constraint_requirements']:
size = breed_info.get('Size', '').lower()
exercise_needs = breed_info.get('Exercise Needs', '').lower()
if size in ['large', 'giant']:
constraint_penalty -= 0.2
elif size in ['small', 'tiny']:
constraint_penalty += 0.1
if 'high' in exercise_needs:
constraint_penalty -= 0.15
# 兒童友善性
if 'child_friendly' in analysis['constraint_requirements']:
good_with_children = breed_info.get('Good with Children', 'Unknown')
if good_with_children == 'Yes':
constraint_penalty += 0.15
elif good_with_children == 'No':
constraint_penalty -= 0.4 # 嚴重扣分
return constraint_penalty
def _get_breed_characteristics_enhanced(self, breed: str) -> Dict[str, Any]:
"""獲取品種特徵"""
return self.score_calculator.get_breed_characteristics_enhanced(breed)
def get_hybrid_recommendations(self, user_description: str,
user_preferences: Optional[Any] = None,
top_k: int = 15) -> List[Dict[str, Any]]:
"""
混合推薦:結合語義匹配與傳統評分
Args:
user_description: 用戶的自然語言描述
user_preferences: 可選的結構化偏好設置
top_k: 返回的推薦數量
Returns:
混合推薦結果
"""
try:
# 獲取語義推薦
semantic_recommendations = self.get_semantic_recommendations(user_description, top_k * 2)
if not user_preferences:
return semantic_recommendations[:top_k]
# 與傳統評分結合
hybrid_results = []
for semantic_rec in semantic_recommendations:
breed_name = semantic_rec['breed'].replace(' ', '_')
# 計算傳統相容性分數
traditional_score = calculate_compatibility_score(user_preferences, breed_name)
# 混合分數(語義 40% + 傳統 60%)
hybrid_score = (
semantic_rec['overall_score'] * 0.4 +
traditional_score * 0.6
)
semantic_rec['hybrid_score'] = hybrid_score
semantic_rec['traditional_score'] = traditional_score
hybrid_results.append(semantic_rec)
# 按混合分數重新排序
hybrid_results.sort(key=lambda x: x['hybrid_score'], reverse=True)
# 更新排名
for i, result in enumerate(hybrid_results[:top_k]):
result['rank'] = i + 1
result['overall_score'] = result['hybrid_score']
return hybrid_results[:top_k]
except Exception as e:
print(f"Hybrid recommendation failed: {str(e)}")
print(traceback.format_exc())
return self.get_semantic_recommendations(user_description, top_k)
def get_breed_recommendations_by_description(user_description: str,
user_preferences: Optional[Any] = None,
top_k: int = 15) -> List[Dict[str, Any]]:
"""基於描述獲取品種推薦的主要介面函數"""
try:
print("Initializing Enhanced SemanticBreedRecommender...")
recommender = SemanticBreedRecommender()
# 優先使用整合統一評分系統的增強推薦
print("Using enhanced recommendation system with unified scoring")
results = recommender.get_enhanced_recommendations_with_unified_scoring(user_description, top_k)
if results and len(results) > 0:
print(f"Generated {len(results)} enhanced recommendations successfully")
return results
else:
# 如果增強系統無結果,嘗試原有增強系統
print("Enhanced unified system returned no results, trying original enhanced system")
results = recommender.get_enhanced_semantic_recommendations(user_description, top_k)
if results and len(results) > 0:
return results
else:
# 最後回退到標準系統
print("All enhanced systems failed, using standard system")
if user_preferences:
results = recommender.get_hybrid_recommendations(user_description, user_preferences, top_k)
else:
results = recommender.get_semantic_recommendations(user_description, top_k)
if not results:
error_msg = f"All recommendation systems failed to generate results. Please check your input description and try again. Error details may be in the console."
print(f"ERROR: {error_msg}")
raise RuntimeError(error_msg)
return results
except Exception as e:
error_msg = f"Critical error in recommendation system: {str(e)}. Please check your input and system configuration."
print(f"ERROR: {error_msg}")
print(traceback.format_exc())
raise RuntimeError(error_msg) from e
def get_enhanced_recommendations_with_unified_scoring(user_description: str, top_k: int = 15) -> List[Dict[str, Any]]:
"""
模組層級便利函數 - 使用完整的多頭評分系統
這個函數呼叫 SemanticBreedRecommender 的增強推薦方法,使用:
- QueryUnderstandingEngine: 解析用戶意圖
- PriorityDetector: 檢測維度優先級
- MultiHeadScorer: 多維度評分
- DynamicWeightCalculator: 動態權重分配
- SmartBreedFilter: 智慧風險過濾(只對真正危害用戶的情況干預)
如果增強系統失敗,會自動回退到基本語義推薦
"""
try:
print(f"Processing description-based recommendation with multi-head scoring: {user_description[:50]}...")
# 創建推薦器實例
recommender = SemanticBreedRecommender()
# 檢查 SBERT 模型是否可用
if not recommender.vector_manager.is_model_available():
print("SBERT model not available, using basic text matching...")
results = _get_basic_text_matching_recommendations(user_description, top_k, recommender)
# 應用智慧過濾
results = apply_smart_filtering(results, user_description)
return results
# 嘗試使用完整的增強語義推薦系統
try:
results = recommender.get_enhanced_semantic_recommendations(user_description, top_k)
if results:
# 應用智慧過濾
results = apply_smart_filtering(results, user_description)
return results
else:
print("Enhanced recommendations returned empty, falling back to basic semantic...")
except Exception as enhanced_error:
print(f"Enhanced recommendation failed: {str(enhanced_error)}, falling back to basic semantic...")
print(traceback.format_exc())
# 回退到基本語義推薦
try:
results = recommender.get_semantic_recommendations(user_description, top_k)
if results:
# 應用智慧過濾
results = apply_smart_filtering(results, user_description)
return results
except Exception as semantic_error:
print(f"Basic semantic recommendation also failed: {str(semantic_error)}")
# 最後回退到基本文字匹配
print("All semantic methods failed, using basic text matching as last resort...")
results = _get_basic_text_matching_recommendations(user_description, top_k, recommender)
# 應用智慧過濾
results = apply_smart_filtering(results, user_description)
return results
except Exception as e:
error_msg = f"Error in semantic recommendation system: {str(e)}. Please check your input and try again."
print(f"ERROR: {error_msg}")
print(traceback.format_exc())
raise RuntimeError(error_msg) from e
def _get_basic_text_matching_recommendations(user_description: str, top_k: int = 15, recommender=None) -> List[Dict[str, Any]]:
"""基本文字匹配推薦(SBERT 不可用時的後備方案)"""
try:
print("Using basic text matching as fallback...")
# 如果沒有提供 recommender,創建一個新的
if recommender is None:
recommender = SemanticBreedRecommender()
# 基本關鍵字匹配
keywords = user_description.lower().split()
breed_scores = []
# 從數據庫獲取品種清單或使用預設清單
try:
conn = sqlite3.connect('animal_detector.db')
cursor = conn.cursor()
cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog LIMIT 50")
basic_breeds = [row[0] for row in cursor.fetchall()]
cursor.close()
conn.close()
# 過濾掉野生動物品種
basic_breeds = [breed for breed in basic_breeds if breed != 'Dhole']
except Exception as e:
print(f"Could not load breed list from database: {str(e)}")
# 後備品種清單
basic_breeds = [
'Labrador_Retriever', 'Golden_Retriever', 'German_Shepherd', 'French_Bulldog',
'Border_Collie', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier',
'Dachshund', 'Boxer', 'Siberian_Husky', 'Great_Dane', 'Pomeranian', 'Shih_Tzu',
'Maltese_Dog', 'Chihuahua', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier',
'Japanese_Spaniel', 'Toy_Terrier', 'Affenpinscher', 'Pekingese', 'Lhasa'
]
# 應用約束過濾 - 關鍵修復!
try:
from constraint_manager import ConstraintManager
from query_understanding import QueryUnderstandingEngine
query_engine = QueryUnderstandingEngine()
dimensions = query_engine.analyze_query(user_description)
constraint_manager = ConstraintManager()
filter_result = constraint_manager.apply_constraints(dimensions)
# 只保留通過約束的品種
allowed_breeds = filter_result.passed_breeds
filtered_count = len(basic_breeds)
basic_breeds = [b for b in basic_breeds if b in allowed_breeds]
print(f"Constraint filtering: {filtered_count} -> {len(basic_breeds)} breeds")
# 記錄被過濾的原因(用於調試)
for breed, reason in filter_result.filtered_breeds.items():
if breed in ['Italian_Greyhound', 'Rottweiler', 'Malinois']:
print(f" Filtered {breed}: {reason}")
except Exception as e:
print(f"Warning: Could not apply constraints: {str(e)}")
for breed in basic_breeds:
breed_info = get_dog_description(breed) or {}
breed_text = f"{breed} {breed_info.get('Temperament', '')} {breed_info.get('Size', '')} {breed_info.get('Description', '')}".lower()
# 計算關鍵字匹配分數
matches = sum(1 for keyword in keywords if keyword in breed_text)
base_score = min(0.95, 0.3 + (matches / len(keywords)) * 0.6)
# 應用增強匹配邏輯
enhanced_score = recommender.score_calculator.calculate_enhanced_matching_score(
breed, breed_info, user_description, base_score
)
breed_scores.append((breed, enhanced_score['final_score'], breed_info, enhanced_score))
# 按分數排序
breed_scores.sort(key=lambda x: x[1], reverse=True)
recommendations = []
for i, (breed, final_score, breed_info, enhanced_score) in enumerate(breed_scores[:top_k]):
recommendation = {
'breed': breed.replace('_', ' '),
'rank': i + 1,
'overall_score': final_score,
'final_score': final_score,
'semantic_score': enhanced_score.get('weighted_score', final_score),
'comparative_bonus': enhanced_score.get('lifestyle_bonus', 0.0),
'lifestyle_bonus': enhanced_score.get('lifestyle_bonus', 0.0),
'size': breed_info.get('Size', 'Unknown'),
'temperament': breed_info.get('Temperament', 'Unknown'),
'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'),
'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'),
'good_with_children': breed_info.get('Good with Children', 'Unknown'),
'lifespan': breed_info.get('Lifespan', '10-12 years'),
'description': breed_info.get('Description', 'No description available'),
'search_type': 'description',
'scores': enhanced_score.get('dimension_scores', {
'space': final_score * 0.9,
'exercise': final_score * 0.85,
'grooming': final_score * 0.8,
'experience': final_score * 0.75,
'noise': final_score * 0.7,
'family': final_score * 0.65
})
}
recommendations.append(recommendation)
return recommendations
except Exception as e:
error_msg = f"Error in basic text matching: {str(e)}"
print(f"ERROR: {error_msg}")
raise RuntimeError(error_msg) from e