feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark
P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
+19
-33
@@ -4,26 +4,14 @@ from data.field_tree import FieldTree
|
||||
from data.test_case import TestSuite, SparkConfig, TestCase
|
||||
from data.diff_result import VerificationRun, FieldResult
|
||||
from runners.runner import Runner
|
||||
from runners.native_java_runner import NativeJavaRunner
|
||||
from runners.spark_java_runner import SparkJavaRunner
|
||||
from runners.cobol_runner import CobolRunner
|
||||
from runners.data_writer import DataWriter
|
||||
from agents.agent1_parser import Agent1Parser
|
||||
from agents.agent2_data import Agent2Data
|
||||
from agents.agent3_diagnostic import Agent3Diagnostic
|
||||
from agents.llm import LLMClient
|
||||
from comparator.aligner import align_records
|
||||
from comparator.field_compare import compare_field
|
||||
from comparator.cobol_binary_reader import CobolBinaryReader
|
||||
from report.generator import ReportGenerator
|
||||
from storage.bundle import TestDataBundle
|
||||
from runners import NativeJavaRunner, SparkJavaRunner, CobolRunner, DataWriter
|
||||
from agents import Agent1Parser, Agent2Data, Agent3Diagnostic, LLMClient
|
||||
from comparator import align_records, compare_field, CobolBinaryReader
|
||||
from report import ReportGenerator
|
||||
from storage import TestDataBundle
|
||||
from config import Config
|
||||
from cobol_testgen import extract_structure, generate_data, incremental_supplement
|
||||
from cobol_testgen.coverage import check_coverage
|
||||
from hina.gate import check as gate_check
|
||||
from hina.classifier import compute_confidence
|
||||
from hina.hina_agent import classify_with_llm
|
||||
from hina.strategy import supplement as strategy_supplement
|
||||
from cobol_testgen import extract_structure, generate_data, incremental_supplement, check_coverage
|
||||
from hina import classify_program, gate_check, supplement as strategy_supplement
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -63,23 +51,21 @@ def run_pipeline(cfg: Config, cpath: str, cbl: str, java: str, map_path: str) ->
|
||||
for i, rec in enumerate(base_records):
|
||||
complete_tests.append(TestCase(id=f"CTG-{i+1:04d}", fields=dict(rec)))
|
||||
|
||||
# HINA Agent 类型判定
|
||||
hina_result = {}
|
||||
# HINA 完整类型判定管道(Keyword / 规则引擎 / LLM 辅助三路径)
|
||||
classification: dict = {}
|
||||
try:
|
||||
hina_result = compute_confidence(cobol_src_text, structure)
|
||||
if hina_result.get("confidence", 0) < 0.7 and structure:
|
||||
llm_hina = classify_with_llm(structure, llm)
|
||||
if llm_hina.get("confidence", 0) > hina_result.get("confidence", 0):
|
||||
hina_result = llm_hina
|
||||
vr.hina_type = hina_result.get("category", "")
|
||||
vr.hina_confidence = hina_result.get("confidence", 0.0)
|
||||
vr.debug["hina_result"] = hina_result
|
||||
classification = classify_program(cobol_src_text, llm=llm)
|
||||
vr.hina_type = classification["category"]
|
||||
vr.hina_confidence = classification["confidence"]
|
||||
vr.debug["classification"] = classification
|
||||
if classification["needs_review"]:
|
||||
vr.quality_warn = f"类型判定确信度过低({classification['confidence']:.0%})"
|
||||
except Exception as e:
|
||||
vr.debug["hina_agent_error"] = str(e)
|
||||
logger.warning(f"[orchestrator] HINA Agent 判定失败: {e}")
|
||||
vr.debug["hina_classify_error"] = str(e)
|
||||
logger.warning(f"[orchestrator] HINA 类型判定失败: {e}")
|
||||
|
||||
# 策略 Agent 补充(追加标记记录,统一为 TestCase 格式)
|
||||
for m in strategy_supplement([], hina_result):
|
||||
for m in strategy_supplement([], classification):
|
||||
complete_tests.append(TestCase(
|
||||
id=m.get("id", f"STG-{len(complete_tests)+1:04d}"),
|
||||
fields=m.get("fields", {}),
|
||||
@@ -90,7 +76,7 @@ def run_pipeline(cfg: Config, cpath: str, cbl: str, java: str, map_path: str) ->
|
||||
cov = check_coverage(structure, base_records)
|
||||
for attempt in range(cfg.max_quality_retries):
|
||||
gate_result = gate_check(
|
||||
complete_tests, hina_result, cov,
|
||||
complete_tests, classification, cov,
|
||||
decision_threshold=cfg.quality_gate_decision_threshold,
|
||||
paragraph_threshold=cfg.quality_gate_paragraph_threshold,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user