feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
@@ -4,26 +4,14 @@ from data.field_tree import FieldTree
 from data.test_case import TestSuite, SparkConfig, TestCase
 from data.diff_result import VerificationRun, FieldResult
 from runners.runner import Runner
-from runners.native_java_runner import NativeJavaRunner
-from runners.spark_java_runner import SparkJavaRunner
-from runners.cobol_runner import CobolRunner
-from runners.data_writer import DataWriter
-from agents.agent1_parser import Agent1Parser
-from agents.agent2_data import Agent2Data
-from agents.agent3_diagnostic import Agent3Diagnostic
-from agents.llm import LLMClient
-from comparator.aligner import align_records
-from comparator.field_compare import compare_field
-from comparator.cobol_binary_reader import CobolBinaryReader
-from report.generator import ReportGenerator
-from storage.bundle import TestDataBundle
+from runners import NativeJavaRunner, SparkJavaRunner, CobolRunner, DataWriter
+from agents import Agent1Parser, Agent2Data, Agent3Diagnostic, LLMClient
+from comparator import align_records, compare_field, CobolBinaryReader
+from report import ReportGenerator
+from storage import TestDataBundle
 from config import Config
-from cobol_testgen import extract_structure, generate_data, incremental_supplement
-from cobol_testgen.coverage import check_coverage
-from hina.gate import check as gate_check
-from hina.classifier import compute_confidence
-from hina.hina_agent import classify_with_llm
-from hina.strategy import supplement as strategy_supplement
+from cobol_testgen import extract_structure, generate_data, incremental_supplement, check_coverage
+from hina import classify_program, gate_check, supplement as strategy_supplement

 logger = logging.getLogger(__name__)

@@ -63,23 +51,21 @@ def run_pipeline(cfg: Config, cpath: str, cbl: str, java: str, map_path: str) ->
            for i, rec in enumerate(base_records):
                complete_tests.append(TestCase(id=f"CTG-{i+1:04d}", fields=dict(rec)))

-            # HINA Agent 类型判定
-            hina_result = {}
+            # HINA 完整类型判定管道（Keyword / 规则引擎 / LLM 辅助三路径）
+            classification: dict = {}
            try:
-                hina_result = compute_confidence(cobol_src_text, structure)
-                if hina_result.get("confidence", 0) < 0.7 and structure:
-                    llm_hina = classify_with_llm(structure, llm)
-                    if llm_hina.get("confidence", 0) > hina_result.get("confidence", 0):
-                        hina_result = llm_hina
-                vr.hina_type = hina_result.get("category", "")
-                vr.hina_confidence = hina_result.get("confidence", 0.0)
-                vr.debug["hina_result"] = hina_result
+                classification = classify_program(cobol_src_text, llm=llm)
+                vr.hina_type = classification["category"]
+                vr.hina_confidence = classification["confidence"]
+                vr.debug["classification"] = classification
+                if classification["needs_review"]:
+                    vr.quality_warn = f"类型判定确信度过低({classification['confidence']:.0%})"
            except Exception as e:
-                vr.debug["hina_agent_error"] = str(e)
-                logger.warning(f"[orchestrator] HINA Agent 判定失败: {e}")
+                vr.debug["hina_classify_error"] = str(e)
+                logger.warning(f"[orchestrator] HINA 类型判定失败: {e}")

            # 策略 Agent 补充（追加标记记录，统一为 TestCase 格式）
-            for m in strategy_supplement([], hina_result):
+            for m in strategy_supplement([], classification):
                complete_tests.append(TestCase(
                    id=m.get("id", f"STG-{len(complete_tests)+1:04d}"),
                    fields=m.get("fields", {}),
@@ -90,7 +76,7 @@ def run_pipeline(cfg: Config, cpath: str, cbl: str, java: str, map_path: str) ->
            cov = check_coverage(structure, base_records)
            for attempt in range(cfg.max_quality_retries):
                gate_result = gate_check(
-                    complete_tests, hina_result, cov,
+                    complete_tests, classification, cov,
                    decision_threshold=cfg.quality_gate_decision_threshold,
                    paragraph_threshold=cfg.quality_gate_paragraph_threshold,
                )