feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
@@ -0,0 +1,419 @@
+"""
+完整程序类型判定管道 — classify_program()
+
+流程:
+  1. 并行: detect_keyword() + extract_structure()
+  2. keyword confidence >= 90% -> 直接输出
+  3. keyword 50-89% -> 规则引擎 + 确信度计算 + 矛盾回溯
+  4. keyword < 50% -> LLM 辅助 + 规则引擎验证
+  5. 输出最终 JSON
+"""
+
+from __future__ import annotations
+
+import logging
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Any
+
+from hina.classifier import detect_keyword
+from hina.confidence import compute_confidence_v2
+from hina.rule_engine.confusion_groups import resolve_confusion_pair
+from hina.rule_engine.contradiction import (
+    CONTRADICTION_PAIRS,
+    detect_contradictions,
+    resolve_contradiction,
+)
+from cobol_testgen import extract_structure
+
+logger = logging.getLogger(__name__)
+
+# 所有可尝试的混淆对名称
+_PAIR_NAMES = [
+    "matching_vs_keybreak",
+    "dedup_vs_nodedup",
+    "validation_vs_keybreak",
+    "csv_merge_vs_split",
+    "simple_vs_two_stage",
+    "pure_vs_mixed",
+    "division_50_25_100",
+    "mn_output_mode",
+]
+
+
+# ── 内部工具 ──────────────────────────────────────────────────────────────────
+
+
+def _get_best_keyword_match(matches: list) -> dict | None:
+    """从 L1 关键字匹配结果中找出最佳匹配。
+
+    Args:
+        matches: detect_keyword() 返回的 list[tuple[str, float, str]]
+
+    Returns:
+        dict | None: {"category", "confidence", "keyword", "all_matches"}
+    """
+    if not matches:
+        return None
+    best = max(matches, key=lambda m: m[1])  # (category, confidence, keyword)
+    return {
+        "category": best[0],
+        "confidence": best[1],
+        "keyword": best[2],
+        "all_matches": matches,
+    }
+
+
+def _compute_structure_match_score(structure: dict) -> int:
+    """计算结构匹配度评分 (0-5)，供 compute_confidence_v2 使用。"""
+    return min(
+        5,
+        bool(structure.get("total_paragraphs", 0))       # 有段落
+        + bool(structure.get("file_count", 0))            # 有文件
+        + bool(len(structure.get("decision_points", []))) # 有决策点
+        + bool(structure.get("if_types", {}).get("total", 0))  # 有 IF
+        + bool(structure.get("branch_tree_obj") is not None),  # 有分支树
+    )
+
+
+def _build_structure_summary(structure: dict) -> dict:
+    """从完整结构中提取调试摘要。"""
+    return {
+        "paragraph_count": structure.get("total_paragraphs", 0),
+        "file_count": structure.get("file_count", 0),
+        "decision_count": len(structure.get("decision_points", [])),
+        "has_call": structure.get("has_call", False),
+        "has_divide": structure.get("has_divide", False),
+    }
+
+
+def _build_keyword_result_for_v2(keyword_info: dict | None) -> dict:
+    """构建 compute_confidence_v2 所需的 keyword_result。"""
+    if keyword_info:
+        return {
+            "base_confidence": keyword_info["confidence"],
+            "match_count": len(keyword_info["all_matches"]),
+        }
+    return {"base_confidence": 0.0, "match_count": 0}
+
+
+def _build_structure_features(structure: dict) -> dict:
+    """构建 compute_confidence_v2 所需的 structure_features。"""
+    return {
+        "structure_match_score": _compute_structure_match_score(structure),
+        "total_paragraphs": structure.get("total_paragraphs", 0),
+    }
+
+
+# ── 分路径逻辑 ────────────────────────────────────────────────────────────────
+
+
+def _path_keyword_direct(
+    keyword_info: dict,
+    structure: dict,
+) -> dict:
+    """路径 A: keyword confidence >= 90%, 直接输出。
+
+    仍会计算 v2 确信度用于最终 validation，但结果来源标记为 "keyword"。
+    """
+    keyword_result_v2 = _build_keyword_result_for_v2(keyword_info)
+    structure_features = _build_structure_features(structure)
+
+    v2_conf = compute_confidence_v2(
+        keyword_result=keyword_result_v2,
+        structure_features=structure_features,
+        contradictions=[],
+        resolution={"resolved_count": 0, "total_count": 0},
+    )
+
+    return {
+        "category": keyword_info["category"],
+        "confidence": v2_conf["confidence"],
+        "needs_review": v2_conf["needs_review"],
+        "method": "keyword",
+        "source": "l1",
+        "judgment": v2_conf["judgment"],
+        "matches": keyword_info["all_matches"],
+        "contradictions": [],
+        "v2_confidence": v2_conf,
+        "structure": _build_structure_summary(structure),
+    }
+
+
+def _path_rule_engine(
+    keyword_info: dict | None,
+    structure: dict,
+) -> dict:
+    """路径 B: keyword 50-89%, 规则引擎 + 确信度计算 + 矛盾回溯。
+
+    流程:
+      1. 用 structure 特征构建 features dict
+      2. 遍历所有混淆组解析器, 收集 resolved_types
+      3. 检测矛盾并解决
+      4. 确定最终分类
+      5. 计算 4 因子确信度
+    """
+    # 1. 结构特征直接作为 features
+    features = dict(structure)
+
+    # 2. 运行所有混淆组解析器
+    resolved_types: dict[str, str] = {}
+    for pair_name in _PAIR_NAMES:
+        try:
+            result = resolve_confusion_pair(features, pair_name)
+            if result["resolved_type"] != "unknown" and result["confidence"] > 0:
+                resolved_types[pair_name] = result["resolved_type"]
+        except Exception as e:
+            logger.debug("[pipeline] 混淆对 %s 解析异常: %s", pair_name, e)
+
+    features["resolved_types"] = resolved_types
+
+    # 3. 矛盾检测与解决
+    contradictions = detect_contradictions(features)
+    resolution_map: dict[str, Any] = {
+        "resolved_count": 0,
+        "total_count": len(contradictions),
+    }
+    for c in contradictions:
+        try:
+            winner = resolve_contradiction(features, c)
+            if winner:
+                resolution_map[c.get("name", "unknown")] = winner
+                resolution_map["resolved_count"] += 1
+        except Exception as e:
+            logger.debug("[pipeline] 矛盾解决异常: %s", e)
+
+    # 4. 确定最终分类与基础置信度
+    final_category = "unknown"
+    final_base_confidence = 0.0
+
+    # 优先采纳 keyword 判定
+    if keyword_info:
+        final_category = keyword_info["category"]
+        final_base_confidence = keyword_info["confidence"]
+
+    # 如果规则引擎有更高置信度的结果, 则采纳
+    best_resolved_type = None
+    best_resolved_conf = 0.0
+    for pair_name, rtype in resolved_types.items():
+        try:
+            rr = resolve_confusion_pair(features, pair_name)
+            if rr["confidence"] > best_resolved_conf:
+                best_resolved_conf = rr["confidence"]
+                best_resolved_type = rtype
+        except Exception:
+            continue
+
+    if best_resolved_type and best_resolved_conf > final_base_confidence:
+        final_category = best_resolved_type
+        final_base_confidence = best_resolved_conf
+
+    # 5. 计算 4 因子确信度
+    keyword_result_v2 = _build_keyword_result_for_v2(keyword_info)
+    keyword_result_v2["base_confidence"] = final_base_confidence
+
+    structure_features = _build_structure_features(structure)
+
+    v2_confidence = compute_confidence_v2(
+        keyword_result=keyword_result_v2,
+        structure_features=structure_features,
+        contradictions=contradictions,
+        resolution=resolution_map,
+    )
+
+    # 6. 组装结果
+    return {
+        "category": final_category,
+        "confidence": v2_confidence["confidence"],
+        "needs_review": v2_confidence["needs_review"],
+        "method": "rule_engine",
+        "source": "pipeline",
+        "judgment": v2_confidence["judgment"],
+        "matches": keyword_info["all_matches"] if keyword_info else [],
+        "contradictions": contradictions,
+        "contradiction_resolution": resolution_map,
+        "resolved_types": resolved_types,
+        "v2_confidence": v2_confidence,
+        "structure": _build_structure_summary(structure),
+    }
+
+
+def _path_llm_assisted(
+    keyword_info: dict | None,
+    structure: dict,
+    llm: Any,
+) -> dict:
+    """路径 C: keyword < 50%, LLM 辅助 + 规则引擎验证。
+
+    流程:
+      1. 调用 classify_with_llm 获取 LLM 分类
+      2. 规则引擎验证 LLM 结果
+      3. 矛盾检测
+      4. 确信度计算
+    """
+    from hina.hina_agent import classify_with_llm
+
+    # 1. LLM 分类
+    llm_result = classify_with_llm(structure, llm)
+    llm_category = llm_result.get("category", "unknown")
+    llm_confidence = llm_result.get("confidence", 0.5)
+
+    # 2. 规则引擎验证 LLM 分类
+    features = dict(structure)
+    validated_category = llm_category
+    validated_confidence = llm_confidence
+
+    for pair_name in _PAIR_NAMES:
+        try:
+            pair_result = resolve_confusion_pair(features, pair_name)
+            if (pair_result["resolved_type"] != "unknown"
+                    and pair_result["confidence"] > validated_confidence):
+                validated_category = pair_result["resolved_type"]
+                validated_confidence = pair_result["confidence"]
+        except Exception:
+            continue
+
+    # 3. 矛盾检测
+    resolved_types: dict[str, str] = {}
+    for pair_name in _PAIR_NAMES:
+        try:
+            rr = resolve_confusion_pair(features, pair_name)
+            if rr["resolved_type"] != "unknown":
+                resolved_types[pair_name] = rr["resolved_type"]
+        except Exception:
+            continue
+
+    features["resolved_types"] = resolved_types
+    contradictions = detect_contradictions(features)
+
+    # 4. 确信度计算
+    keyword_result_v2 = _build_keyword_result_for_v2(keyword_info)
+    keyword_result_v2["base_confidence"] = validated_confidence
+
+    structure_features = _build_structure_features(structure)
+
+    v2_confidence = compute_confidence_v2(
+        keyword_result=keyword_result_v2,
+        structure_features=structure_features,
+        contradictions=contradictions,
+        resolution={"resolved_count": 0, "total_count": len(contradictions)},
+    )
+
+    return {
+        "category": validated_category,
+        "confidence": v2_confidence["confidence"],
+        "needs_review": v2_confidence["needs_review"],
+        "method": "llm",
+        "source": "pipeline",
+        "judgment": v2_confidence["judgment"],
+        "matches": keyword_info["all_matches"] if keyword_info else [],
+        "contradictions": contradictions,
+        "llm_raw": llm_result,
+        "v2_confidence": v2_confidence,
+        "structure": _build_structure_summary(structure),
+    }
+
+
+# ── 主入口 ────────────────────────────────────────────────────────────────────
+
+
+def classify_program(cobol_source: str, llm: Any = None) -> dict:
+    """完整程序类型判定管道。
+
+    流程:
+      1. 并行: detect_keyword() + extract_structure()
+      2. keyword confidence >= 90% -> 直接输出
+      3. keyword 50-89% -> 规则引擎 + 确信度计算 + 矛盾回溯
+      4. keyword < 50% -> LLM 辅助 + 规则引擎验证
+      5. 输出最终 JSON
+
+    Args:
+        cobol_source: COBOL 程序源码文本。
+        llm: 可选的 LLM 客户端实例。
+             在 keyword confidence < 50% 路径中用于 LLM 辅助分类。
+             若为 None 且 keyword < 50%, 则使用规则引擎兜底。
+
+    Returns:
+        dict: {
+            "category": str,           # 程序分类名称
+            "confidence": float,       # 综合确信度 (0.0 ~ 1.0)
+            "needs_review": bool,      # 是否需要人工审核
+            "method": str,             # "keyword" | "rule_engine" | "llm"
+            "source": str,             # 结果来源: "l1" / "pipeline"
+            "judgment": str,           # auto / review / manual / impossible
+            "matches": list,           # L1 关键字匹配详情
+            "contradictions": list,    # 矛盾列表
+            "v2_confidence": dict,     # 4 因子确信度详情
+            "structure": dict,         # 结构特征摘要（调试用）
+        }
+
+    Raises:
+        ValueError: 如果 cobol_source 为空或无效。
+    """
+    if not cobol_source or not cobol_source.strip():
+        return {
+            "category": "unknown",
+            "confidence": 0.0,
+            "needs_review": True,
+            "method": "none",
+            "source": "error",
+            "judgment": "impossible",
+            "matches": [],
+            "contradictions": [],
+            "v2_confidence": {},
+            "structure": {},
+        }
+
+    # ── 第 1 步: 并行执行 keyword 检测和结构提取 ──
+    keyword_matches: list = []
+    structure: dict = {}
+
+    with ThreadPoolExecutor(max_workers=2) as executor:
+        future_keyword = executor.submit(detect_keyword, cobol_source)
+        future_structure = executor.submit(extract_structure, cobol_source)
+
+        for future in as_completed([future_keyword, future_structure]):
+            if future == future_keyword:
+                try:
+                    keyword_matches = future.result()
+                except Exception as e:
+                    logger.warning("[pipeline] detect_keyword 失败: %s", e)
+            elif future == future_structure:
+                try:
+                    structure = future.result()
+                except Exception as e:
+                    logger.warning("[pipeline] extract_structure 失败: %s", e)
+
+    # ── 第 2 步: 分析关键字结果, 确定路径 ──
+    keyword_info = _get_best_keyword_match(keyword_matches)
+    max_keyword_confidence = keyword_info["confidence"] if keyword_info else 0.0
+
+    logger.info(
+        "[pipeline] keyword matches=%d, max_confidence=%.2f, paragraphs=%d, files=%d",
+        len(keyword_matches),
+        max_keyword_confidence,
+        structure.get("total_paragraphs", 0),
+        structure.get("file_count", 0),
+    )
+
+    # ── 第 3 步: 根据确信度分路径 ──
+
+    # 路径 A: keyword >= 90% -> 直接输出
+    if max_keyword_confidence >= 0.90:
+        logger.info("[pipeline] 路径 A: keyword 高确信度 (%.2f)", max_keyword_confidence)
+        return _path_keyword_direct(keyword_info, structure)
+
+    # 路径 B: keyword 50-89% -> 规则引擎
+    if max_keyword_confidence >= 0.50:
+        logger.info("[pipeline] 路径 B: keyword 中确信度 (%.2f) -> 规则引擎", max_keyword_confidence)
+        return _path_rule_engine(keyword_info, structure)
+
+    # 路径 C: keyword < 50% -> LLM 辅助
+    if llm is not None:
+        logger.info("[pipeline] 路径 C: keyword 低确信度 (%.2f) -> LLM 辅助", max_keyword_confidence)
+        return _path_llm_assisted(keyword_info, structure, llm)
+
+    # LLM 不可用: 使用规则引擎兜底
+    logger.info("[pipeline] 路径 C(fallback): keyword 低确信度 (%.2f) -> 规则引擎兜底", max_keyword_confidence)
+    result = _path_rule_engine(keyword_info, structure)
+    result["method"] = "rule_engine_fallback"
+    return result