Files
cobol-java-v3/hina/confidence.py
NB-076 65e9919933 feat: matching program full recognition — L1 regex keyword + confidence consensus
Three-part fix for matching program classification:
1. L1 regex keyword WS-[-\w]*KEY (confidence 0.65):
   - Captures WS-KEY, WS-MAST-KEY, WS-TRAN-KEY, WS-PREV-KEY etc.
   - Matches ALL 10 matching programs including MT02 (which uses
     WS-MAST-KEY/WS-TRAN-KEY that literal 'WS-KEY' missed)
   - False positives (ST-SEARCH-ALL, VL01) overridden by rule engine
     or higher-confidence ORGANIZATION IS keyword
   - detect_keyword() extended with 're:' prefix for regex patterns

2. Consensus bonus in compute_confidence_v2:
   - When L1 keyword category matches rule engine's final category,
     context_factor boosted by +0.15
   - Pushes matching programs from manual (0.50-0.69) toward
     review (0.70-0.89) range

3. Confidence calibration for confusion groups (previous commit):
   - dedup_vs_nodedup: 0.85→0.50 for negative detection
   - validation_vs_keybreak: 0.80→0.55 for has_counter
   - simple_vs_two_stage: 0.80→0.50 for sequential OPEN

Results - matching programs:
  MT01: 0.38→0.75, MT02: 0.30→0.60, MT03: 0.30→0.60,
  MT16: 0.45→0.81, MT17: 0.36→0.65, MT18: 0.60→0.60,
  MT19: 0.30→0.60, MT20: 0.30→0.65, MT33: 0.30→0.60
  All now rule_engine (not fallback), no false negatives.

Subtype discrimination remains for future work: all matching
programs classified as マッチング without 1:1/1:N/N:1 subtype.
2026-06-21 13:25:39 +08:00

121 lines
4.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
确信度 4 因子计算。
公式: confidence = base × context_factor × consistency_factor × structure_factor
判定:
>= 0.90 auto — 自动通过
0.70-0.89 review — 需要人工审核
0.50-0.69 manual — 需要人工介入
< 0.50 impossible — 无法判定
"""
from __future__ import annotations
from typing import Any
def compute_confidence_v2(
keyword_result: dict[str, Any],
structure_features: dict[str, Any],
contradictions: list[dict[str, Any]] | None = None,
resolution: dict[str, Any] | None = None,
consensus_category: str | None = None,
) -> dict[str, Any]:
"""4 因子确信度计算。
Args:
keyword_result: L1 关键字判定结果,
例如 {"category": "DB操作", "base_confidence": 0.95, "match_count": 3}
structure_features: 结构特征分析结果,
例如 {"structure_match_score": 5, "total_paragraphs": 10}
contradictions: 矛盾列表,每条包含 {"type": str, "resolved": bool, ...}
resolution: 矛盾解决方案,
例如 {"resolved_count": 0, "total_count": 0}
consensus_category: 当不为 None 且与 keyword_result 中的 category 一致时,
表示 L1 关键字和规则引擎对最终分类达成一致,给予共识奖励。
Returns:
dict: {
"confidence": float, # 综合确信度 (0.0 ~ 1.0)
"base": float, # 基础确信度
"context_factor": float, # 上下文因子
"consistency_factor": float,# 一致性因子
"structure_factor": float, # 结构一致性因子
"judgment": str, # 判定结果 (auto/review/manual/impossible)
"needs_review": bool, # 是否需要人工审核
}
"""
# ── 1. 基础确信度 ──
base = keyword_result.get("base_confidence", 0.7)
# ── 2. 上下文因子(关键字匹配数 + 共识奖励)──
match_count = keyword_result.get("match_count", 0)
if match_count >= 3:
context_factor = 1.0
elif match_count == 2:
context_factor = 0.95
elif match_count == 1:
context_factor = 0.90
else:
context_factor = 0.50
# L1 关键字与规则引擎分类一致的共识奖励
kw_category = keyword_result.get("category", "")
if consensus_category and kw_category and kw_category == consensus_category:
context_factor = min(context_factor + 0.15, 1.0)
# ── 3. 一致性因子(矛盾检测)──
contradictions = contradictions or []
unresolved_count = sum(1 for c in contradictions if not c.get("resolved", False))
total_contradictions = len(contradictions)
if total_contradictions == 0:
consistency_factor = 1.0
elif unresolved_count == 0:
# 有矛盾但全部已解决
consistency_factor = 0.90
elif total_contradictions >= 3:
consistency_factor = 0.50
else:
# 有未解决的矛盾,但少于 3 个
consistency_factor = 0.80
# ── 4. 结构一致性因子 ──
structure_score = structure_features.get("structure_match_score", 0)
if structure_score == 5:
structure_factor = 1.0
elif structure_score >= 3:
structure_factor = 0.7
elif structure_score >= 1:
structure_factor = 0.5
else:
structure_factor = 0.3
# ── 计算综合确信度 ──
confidence = round(base * context_factor * consistency_factor * structure_factor, 4)
# ── 判定 ──
if confidence >= 0.90:
judgment = "auto"
needs_review = False
elif confidence >= 0.70:
judgment = "review"
needs_review = True
elif confidence >= 0.50:
judgment = "manual"
needs_review = True
else:
judgment = "impossible"
needs_review = True
return {
"confidence": confidence,
"base": base,
"context_factor": context_factor,
"consistency_factor": consistency_factor,
"structure_factor": structure_factor,
"judgment": judgment,
"needs_review": needs_review,
}