feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure
P1: extract_structure output expansion (11 new feature fields)
P2: Confusion group rule engine (8 pairs + contradiction + backtrack)
P3: 4-factor confidence calculation + quality gate update
P4: 33+2 COBOL program type test samples (22 files, 7 categories)
P5: parametrized/ test data generation engine
P6: japanese_data.py lookup tables
P7-10: Type-specific test suites (~159 parametrized tests)
P11: Full classification pipeline (classify_program) + orchestrator integration
P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix)

Architecture decisions:
- classification_pipeline/ merged to hina/pipeline/
- parametrized/ as independent module
- japanese_data.py as root-level file
- hina/__all__ only exports classify_program()

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
hangshuo652
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
+19 -33
View File
@@ -4,26 +4,14 @@ from data.field_tree import FieldTree
from data.test_case import TestSuite, SparkConfig, TestCase
from data.diff_result import VerificationRun, FieldResult
from runners.runner import Runner
from runners.native_java_runner import NativeJavaRunner
from runners.spark_java_runner import SparkJavaRunner
from runners.cobol_runner import CobolRunner
from runners.data_writer import DataWriter
from agents.agent1_parser import Agent1Parser
from agents.agent2_data import Agent2Data
from agents.agent3_diagnostic import Agent3Diagnostic
from agents.llm import LLMClient
from comparator.aligner import align_records
from comparator.field_compare import compare_field
from comparator.cobol_binary_reader import CobolBinaryReader
from report.generator import ReportGenerator
from storage.bundle import TestDataBundle
from runners import NativeJavaRunner, SparkJavaRunner, CobolRunner, DataWriter
from agents import Agent1Parser, Agent2Data, Agent3Diagnostic, LLMClient
from comparator import align_records, compare_field, CobolBinaryReader
from report import ReportGenerator
from storage import TestDataBundle
from config import Config
from cobol_testgen import extract_structure, generate_data, incremental_supplement
from cobol_testgen.coverage import check_coverage
from hina.gate import check as gate_check
from hina.classifier import compute_confidence
from hina.hina_agent import classify_with_llm
from hina.strategy import supplement as strategy_supplement
from cobol_testgen import extract_structure, generate_data, incremental_supplement, check_coverage
from hina import classify_program, gate_check, supplement as strategy_supplement
logger = logging.getLogger(__name__)
@@ -63,23 +51,21 @@ def run_pipeline(cfg: Config, cpath: str, cbl: str, java: str, map_path: str) ->
for i, rec in enumerate(base_records):
complete_tests.append(TestCase(id=f"CTG-{i+1:04d}", fields=dict(rec)))
# HINA Agent 类型判定
hina_result = {}
# HINA 完整类型判定管道(Keyword / 规则引擎 / LLM 辅助三路径)
classification: dict = {}
try:
hina_result = compute_confidence(cobol_src_text, structure)
if hina_result.get("confidence", 0) < 0.7 and structure:
llm_hina = classify_with_llm(structure, llm)
if llm_hina.get("confidence", 0) > hina_result.get("confidence", 0):
hina_result = llm_hina
vr.hina_type = hina_result.get("category", "")
vr.hina_confidence = hina_result.get("confidence", 0.0)
vr.debug["hina_result"] = hina_result
classification = classify_program(cobol_src_text, llm=llm)
vr.hina_type = classification["category"]
vr.hina_confidence = classification["confidence"]
vr.debug["classification"] = classification
if classification["needs_review"]:
vr.quality_warn = f"类型判定确信度过低({classification['confidence']:.0%})"
except Exception as e:
vr.debug["hina_agent_error"] = str(e)
logger.warning(f"[orchestrator] HINA Agent 判定失败: {e}")
vr.debug["hina_classify_error"] = str(e)
logger.warning(f"[orchestrator] HINA 类型判定失败: {e}")
# 策略 Agent 补充(追加标记记录,统一为 TestCase 格式)
for m in strategy_supplement([], hina_result):
for m in strategy_supplement([], classification):
complete_tests.append(TestCase(
id=m.get("id", f"STG-{len(complete_tests)+1:04d}"),
fields=m.get("fields", {}),
@@ -90,7 +76,7 @@ def run_pipeline(cfg: Config, cpath: str, cbl: str, java: str, map_path: str) ->
cov = check_coverage(structure, base_records)
for attempt in range(cfg.max_quality_retries):
gate_result = gate_check(
complete_tests, hina_result, cov,
complete_tests, classification, cov,
decision_threshold=cfg.quality_gate_decision_threshold,
paragraph_threshold=cfg.quality_gate_paragraph_threshold,
)