"""矛盾检测与解决 — 检测来自不同混淆组的类型冲突。 CONTRADICTION_PAIRS 定义了可能会矛盾的分类类型对。 """ from __future__ import annotations from typing import Any # ── 矛盾对定义 ────────────────────────────────────────────────────────────── CONTRADICTION_PAIRS: list[dict[str, str]] = [ { "name": "matching_vs_keybreak", "type_a": "マッチング", "type_b": "キーブレイク", }, { "name": "dedup_vs_nodedup", "type_a": "項目チェック(重複含む)", "type_b": "項目チェック(重複含まず)", }, { "name": "validation_vs_keybreak", "type_a": "編集処理(校验)", "type_b": "キーブレイク", }, { "name": "csv_merge_vs_split", "type_a": "CSV合并", "type_b": "CSV拆分", }, { "name": "simple_vs_two_stage", "type_a": "単純マッチング", "type_b": "二段階マッチング", }, { "name": "pure_vs_mixed", "type_a": "純粋マッチング", "type_b": "混合マッチング", }, { "name": "division_50_25_100", "type_a": "DIVIDE_50", "type_b": "DIVIDE_100", }, { "name": "division_50_25_100", "type_a": "DIVIDE_50", "type_b": "DIVIDE_25", }, { "name": "division_50_25_100", "type_a": "DIVIDE_100", "type_b": "DIVIDE_25", }, { "name": "mn_output_mode", "type_a": "M:N", "type_b": "1:1", }, ] # ── 冲突优先级: 当同一种类型被多个混淆组判定时,优先级高者胜出 ────────── TYPE_PRIORITY: dict[str, int] = { "マッチング": 10, "キーブレイク": 9, "項目チェック(重複含む)": 8, "項目チェック(重複含まず)": 8, "編集処理(校验)": 7, "CSV合并": 6, "CSV拆分": 6, "単純マッチング": 5, "二段階マッチング": 5, "純粋マッチング": 4, "混合マッチング": 4, "DIVIDE_50": 3, "DIVIDE_100": 3, "DIVIDE_25": 3, "M:N": 2, "1:1": 2, } def detect_contradictions(features: dict) -> list[dict]: """检测可能矛盾的类型对,返回矛盾列表。 检查 features["resolved_types"] 中已判定的类型, 如果同一混淆组内两个类型同时存在,或不同组的类型存在冲突,则记录。 Args: features: 包含所有已判定的 resolved_types 字典。 Returns: 矛盾列表。每个元素格式: {"name": str, "type_a": str, "type_b": str} """ resolved_types: dict[str, str] = features.get("resolved_types", {}) if not resolved_types: return [] contradictions: list[dict] = [] for pair in CONTRADICTION_PAIRS: name = pair["name"] type_a = pair["type_a"] type_b = pair["type_b"] # 检查该混淆组的判定结果中是否同时包含两个类型 for key, resolved_type in resolved_types.items(): if resolved_type == type_a: for other_key, other_type in resolved_types.items(): if other_key != key and other_type == type_b: contradictions.append({ "name": name, "type_a": type_a, "type_b": type_b, "source_a": key, "source_b": other_key, }) break break return contradictions def resolve_contradiction(features: dict, contradiction: dict) -> str: """解决矛盾,返回胜出的类型名。 策略: 1. 根据 TYPE_PRIORITY 取优先级高的类型。 2. 若优先级相同,根据 features 中的额外证据选择。 Args: features: 完整特征字典。 contradiction: detect_contradictions 返回的单个矛盾。 Returns: 胜出的类型名称。 """ type_a = contradiction["type_a"] type_b = contradiction["type_b"] priority_a = TYPE_PRIORITY.get(type_a, 0) priority_b = TYPE_PRIORITY.get(type_b, 0) if priority_a > priority_b: return type_a elif priority_b > priority_a: return type_b # 优先级相同,尝试使用 confusion_groups 重判定 from .confusion_groups import resolve_confusion_pair pair_name = contradiction.get("name", "") if pair_name: result = resolve_confusion_pair(features, pair_name) if result.get("confidence", 0) >= 0.80: return result["resolved_type"] # 最终回退: 取 type_a return type_a