PawMatchAI / adaptive_score_distribution.py
DawnC's picture
Upload 19 files
1b3ab7b verified
# %%writefile adaptive_score_distribution.py
import numpy as np
from typing import List, Tuple, Dict, Optional, Any
from dataclasses import dataclass, field
import traceback
@dataclass
class GradientAnalysis:
"""梯度分析結果"""
top_score: float
bottom_score: float
score_range: float
top5_std: float
top5_range: float
gradient_type: str # 'steep', 'moderate', 'flat'
score_distribution: List[float] = field(default_factory=list)
@dataclass
class ScenarioClassification:
"""情境分類結果"""
scenario_type: str # 'perfect_match', 'good_choices', 'moderate_fit', 'challenging'
confidence: float
reasoning: str
@dataclass
class DistributionResult:
"""分數分佈結果"""
final_scores: List[Tuple[str, float]] = field(default_factory=list)
gradient_analysis: Optional[GradientAnalysis] = None
scenario_classification: Optional[ScenarioClassification] = None
adjustment_applied: str = 'none'
adjustment_notes: List[str] = field(default_factory=list)
class AdaptiveScoreDistribution:
"""
自適應分數分佈系統
根據情境梯度自然形成分數分佈,不強制固定範圍
核心理念:
- 完美匹配 → 自然高分 (90+)
- 多個選擇 → 自然接近 (差距2-5分)
- 不適合 → 自然偏低 (60-70)
- 保證最低分 >= 60
"""
def __init__(self):
"""初始化自適應分數分佈系統"""
self.min_score = 0.60 # 全域最低分(觸底保護)
self.no_intervention_threshold = 0.10
self.gradient_thresholds = {
'steep_std': 0.04,
'steep_range': 0.12,
'flat_std': 0.02,
'flat_range': 0.05
}
def distribute_scores(self,
raw_scores: List[Tuple[str, float]]) -> DistributionResult:
"""
自適應分數分佈
Args:
raw_scores: 原始分數列表 [(breed_name, score), ...]
Returns:
DistributionResult: 分佈結果
"""
try:
if not raw_scores:
return DistributionResult()
# Step 1: 分析梯度
gradient_analysis = self._analyze_gradient(raw_scores)
# Step 2: 判斷情境
scenario = self._classify_scenario(gradient_analysis)
# Step 3: 決定調整策略
adjusted_scores, adjustment_type, notes = self._apply_adaptive_strategy(
raw_scores, scenario, gradient_analysis
)
# Step 4: 應用最低分保護
final_scores = self._apply_floor_protection(adjusted_scores)
return DistributionResult(
final_scores=final_scores,
gradient_analysis=gradient_analysis,
scenario_classification=scenario,
adjustment_applied=adjustment_type,
adjustment_notes=notes
)
except Exception as e:
print(f"Error distributing scores: {str(e)}")
print(traceback.format_exc())
return DistributionResult(
final_scores=raw_scores,
adjustment_applied='error_fallback'
)
def _analyze_gradient(self,
scores: List[Tuple[str, float]]) -> GradientAnalysis:
"""
分析分數梯度特徵
Args:
scores: 分數列表
Returns:
GradientAnalysis: 梯度分析結果
"""
try:
sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)
score_values = [s[1] for s in sorted_scores]
top_score = score_values[0] if score_values else 0.5
bottom_score = score_values[-1] if score_values else 0.5
score_range = top_score - bottom_score
# 前5名統計
top5_scores = score_values[:min(5, len(score_values))]
top5_std = float(np.std(top5_scores)) if len(top5_scores) > 1 else 0.0
top5_range = top5_scores[0] - top5_scores[-1] if len(top5_scores) >= 2 else 0.0
# 梯度類型判斷
if top5_std > self.gradient_thresholds['steep_std'] or \
top5_range > self.gradient_thresholds['steep_range']:
gradient_type = 'steep'
elif top5_std < self.gradient_thresholds['flat_std'] or \
top5_range < self.gradient_thresholds['flat_range']:
gradient_type = 'flat'
else:
gradient_type = 'moderate'
return GradientAnalysis(
top_score=top_score,
bottom_score=bottom_score,
score_range=score_range,
top5_std=top5_std,
top5_range=top5_range,
gradient_type=gradient_type,
score_distribution=score_values
)
except Exception as e:
print(f"Error analyzing gradient: {str(e)}")
return GradientAnalysis(
top_score=0.5,
bottom_score=0.5,
score_range=0.0,
top5_std=0.0,
top5_range=0.0,
gradient_type='moderate',
score_distribution=[]
)
def _classify_scenario(self,
gradient_analysis: GradientAnalysis) -> ScenarioClassification:
"""
根據梯度分析分類情境
情境類型:
1. perfect_match: 完美匹配(第1名分數高且梯度陡峭)
2. good_choices: 多個好選擇(前5名分數都高且梯度平坦)
3. moderate_fit: 中等匹配(第1名分數中等)
4. challenging: 挑戰情境(第1名分數偏低)
Args:
gradient_analysis: 梯度分析結果
Returns:
ScenarioClassification: 情境分類結果
"""
top_score = gradient_analysis.top_score
gradient_type = gradient_analysis.gradient_type
if top_score >= 0.88 and gradient_type == 'steep': # Increased from 0.85
return ScenarioClassification(
scenario_type='perfect_match',
confidence=0.9,
reasoning="High top score with clear differentiation indicates perfect match"
)
elif top_score >= 0.78 and gradient_type == 'flat': # Increased from 0.75
return ScenarioClassification(
scenario_type='good_choices',
confidence=0.85,
reasoning="Multiple high-scoring breeds with similar fitness"
)
elif top_score >= 0.68: # Reduced from 0.70 to be less inflating
return ScenarioClassification(
scenario_type='moderate_fit',
confidence=0.75,
reasoning="Moderate match quality with acceptable options"
)
else:
return ScenarioClassification(
scenario_type='challenging',
confidence=0.65,
reasoning="Lower overall match quality, may need requirement adjustment"
)
def _apply_adaptive_strategy(self,
raw_scores: List[Tuple[str, float]],
scenario: ScenarioClassification,
gradient_analysis: GradientAnalysis) -> Tuple[List[Tuple[str, float]], str, List[str]]:
"""
根據情境類型應用不同的調整策略
Args:
raw_scores: 原始分數
scenario: 情境分類
gradient_analysis: 梯度分析
Returns:
Tuple: (調整後分數, 調整類型, 調整註記)
"""
sorted_scores = sorted(raw_scores, key=lambda x: x[1], reverse=True)
notes = []
if scenario.scenario_type == 'perfect_match':
# 完美匹配: 不調整,保持自然
notes.append("Perfect match scenario: No adjustment needed")
return sorted_scores, 'no_adjustment', notes
elif scenario.scenario_type == 'good_choices':
# 多個好選擇: 確保最小區分度
adjusted, adjustment_notes = self._ensure_minimum_differentiation(
sorted_scores, gradient_analysis
)
notes.extend(adjustment_notes)
return adjusted, 'minimum_differentiation', notes
elif scenario.scenario_type == 'moderate_fit':
# 中等匹配: 溫和提升
adjusted, adjustment_notes = self._gentle_uplift(
sorted_scores, target_top=0.80
)
notes.extend(adjustment_notes)
return adjusted, 'gentle_uplift', notes
elif scenario.scenario_type == 'challenging':
# 挑戰情境: 適度提升但不過度
adjusted, adjustment_notes = self._moderate_uplift(
sorted_scores, target_top=0.72
)
notes.extend(adjustment_notes)
return adjusted, 'moderate_uplift', notes
return sorted_scores, 'no_adjustment', notes
def _ensure_minimum_differentiation(self,
scores: List[Tuple[str, float]],
gradient_analysis: GradientAnalysis) -> Tuple[List[Tuple[str, float]], List[str]]:
"""
確保最小區分度(當分數過於接近時)
Args:
scores: 分數列表
gradient_analysis: 梯度分析
Returns:
Tuple: (調整後分數, 註記)
"""
notes = []
top5_range = gradient_analysis.top5_range
# 如果前5名差距 >= 5%,不需要調整
if top5_range >= 0.05:
notes.append(f"Differentiation sufficient (range: {top5_range:.3f})")
return scores, notes
# 需要擴展區分度
top5 = scores[:5]
rest = scores[5:]
target_range = 0.05
current_top = top5[0][1] if top5 else 0.5
current_bottom = top5[-1][1] if len(top5) > 0 else 0.5
adjusted_top5 = []
for i, (breed, score) in enumerate(top5):
if len(top5) > 1:
position = i / (len(top5) - 1)
new_score = current_top - (position * target_range)
else:
new_score = score
adjusted_top5.append((breed, new_score))
notes.append(f"Expanded top 5 differentiation to {target_range:.1%}")
return adjusted_top5 + rest, notes
def _gentle_uplift(self,
scores: List[Tuple[str, float]],
target_top: float = 0.75) -> Tuple[List[Tuple[str, float]], List[str]]:
"""
溫和提升(保持分數分佈形狀)
Args:
scores: 分數列表
target_top: 目標第1名分數 (reduced from 0.80 to 0.75)
Returns:
Tuple: (調整後分數, 註記)
"""
notes = []
if not scores:
return scores, notes
current_top = scores[0][1]
if current_top >= target_top:
notes.append(f"Top score already sufficient ({current_top:.3f})")
return scores, notes
# 計算提升量
uplift = target_top - current_top
# 所有品種統一提升
adjusted = [(breed, min(1.0, score + uplift)) for breed, score in scores]
notes.append(f"Applied gentle uplift: +{uplift:.3f} to all breeds")
return adjusted, notes
def _moderate_uplift(self,
scores: List[Tuple[str, float]],
target_top: float = 0.68) -> Tuple[List[Tuple[str, float]], List[str]]:
"""
適度提升(挑戰情境)
Args:
scores: 分數列表
target_top: 目標第1名分數 (reduced from 0.72 to 0.68)
Returns:
Tuple: (調整後分數, 註記)
"""
notes = []
if not scores:
return scores, notes
current_top = scores[0][1]
current_bottom = scores[-1][1] if scores else 0.5
adjusted = []
for breed, score in scores:
# 非線性提升: 分數越高提升越多
if current_top > current_bottom:
relative_position = (score - current_bottom) / (current_top - current_bottom + 0.001)
else:
relative_position = 1.0
uplift_factor = 1.0 + (relative_position * 0.12) # 最多提升12% (reduced from 15%)
new_score = min(1.0, score * uplift_factor)
adjusted.append((breed, new_score))
notes.append("Applied moderate uplift with position-based scaling")
return adjusted, notes
def _apply_floor_protection(self,
scores: List[Tuple[str, float]]) -> List[Tuple[str, float]]:
"""
應用最低分保護(確保沒有品種低於60分)
Args:
scores: 分數列表
Returns:
List[Tuple[str, float]]: 保護後分數
"""
protected = []
for breed, score in scores:
protected_score = max(self.min_score, score)
protected.append((breed, protected_score))
return protected
def get_distribution_summary(self, result: DistributionResult) -> Dict[str, Any]:
"""
獲取分佈摘要
Args:
result: 分佈結果
Returns:
Dict[str, Any]: 分佈摘要
"""
if not result.final_scores:
return {'error': 'No scores to summarize'}
score_values = [s[1] for s in result.final_scores]
return {
'scenario_type': result.scenario_classification.scenario_type if result.scenario_classification else 'unknown',
'adjustment_applied': result.adjustment_applied,
'score_statistics': {
'top_score': max(score_values) if score_values else 0,
'bottom_score': min(score_values) if score_values else 0,
'mean_score': float(np.mean(score_values)) if score_values else 0,
'std_score': float(np.std(score_values)) if score_values else 0,
'range': max(score_values) - min(score_values) if score_values else 0
},
'gradient_info': {
'type': result.gradient_analysis.gradient_type if result.gradient_analysis else 'unknown',
'top5_std': result.gradient_analysis.top5_std if result.gradient_analysis else 0,
'top5_range': result.gradient_analysis.top5_range if result.gradient_analysis else 0
},
'adjustment_notes': result.adjustment_notes,
'top_3_breeds': result.final_scores[:3] if result.final_scores else []
}
def distribute_breed_scores(raw_scores: List[Tuple[str, float]]) -> DistributionResult:
"""
便利函數: 分佈品種分數
Args:
raw_scores: 原始分數列表
Returns:
DistributionResult: 分佈結果
"""
distributor = AdaptiveScoreDistribution()
return distributor.distribute_scores(raw_scores)
def get_distribution_summary(raw_scores: List[Tuple[str, float]]) -> Dict[str, Any]:
"""
便利函數: 獲取分佈摘要
Args:
raw_scores: 原始分數列表
Returns:
Dict[str, Any]: 分佈摘要
"""
distributor = AdaptiveScoreDistribution()
result = distributor.distribute_scores(raw_scores)
return distributor.get_distribution_summary(result)