feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure
P1: extract_structure output expansion (11 new feature fields)
P2: Confusion group rule engine (8 pairs + contradiction + backtrack)
P3: 4-factor confidence calculation + quality gate update
P4: 33+2 COBOL program type test samples (22 files, 7 categories)
P5: parametrized/ test data generation engine
P6: japanese_data.py lookup tables
P7-10: Type-specific test suites (~159 parametrized tests)
P11: Full classification pipeline (classify_program) + orchestrator integration
P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix)

Architecture decisions:
- classification_pipeline/ merged to hina/pipeline/
- parametrized/ as independent module
- japanese_data.py as root-level file
- hina/__all__ only exports classify_program()

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
hangshuo652
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
+94
View File
@@ -0,0 +1,94 @@
"""NF-01~17: 非功能测试 — 性能/并发/安全/容错(轻量级 smoke test"""
import sys, os, json, tempfile, time, threading
from pathlib import Path
from unittest.mock import patch, MagicMock
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
# ── 5.1 性能 ──
def test_extract_large_coverage_timing():
"""NF-01: COBOL 解析 500+ 行完成时间"""
from cobol_testgen.read import preprocess
lines = [" MOVE 1 TO A.\n" for _ in range(500)]
src = "".join(lines)
t0 = time.time()
preprocess(src)
elapsed = time.time() - t0
assert elapsed < 10, f"500行预处理耗时 {elapsed:.2f}s > 10s"
def test_cache_speed():
"""NF-05: 缓存命中 → ≤100ms"""
from agents.llm import LLMClient
with tempfile.TemporaryDirectory() as tmp:
client = LLMClient(model="t", cache_dir=tmp)
with patch("httpx.post") as mp:
mp.return_value = MagicMock(
json=lambda: {"choices": [{"message": {"content": "x"}}]},
raise_for_status=lambda: None,
)
client.call([{"role": "user", "content": "speed"}])
t0 = time.time()
client.call([{"role": "user", "content": "speed"}])
assert time.time() - t0 < 0.5
# ── 5.2 并发 ──
def test_concurrent_task_ids():
"""NF-06: 模拟并行上传 → 不同 task_id"""
import uuid
ids = {str(uuid.uuid4())[:8] for _ in range(5)}
assert len(ids) == 5
# ── 5.3 安全 ──
def test_path_traversal_copybook():
"""NF-10: path traversal → BLOCKED"""
from cobol_testgen import extract_structure
result = extract_structure("PROCEDURE DIVISION.",
source_dir="../../../etc/passwd")
# 不崩溃,返回安全结果
assert isinstance(result, dict)
def test_api_key_missing():
"""NF-12: 无 API key → Agent fallback"""
from agents.llm import LLMClient
with patch.dict(os.environ, {}, clear=True):
with tempfile.TemporaryDirectory() as tmp:
client = LLMClient(model="test", cache_dir=tmp)
with patch("httpx.post") as mp:
mp.return_value = MagicMock(
json=lambda: {"choices": [{"message": {"content": "ok"}}]},
raise_for_status=lambda: None,
)
result = client.call([{"role": "user", "content": "hi2"}])
assert result == "ok"
# ── 5.4 容错 ──
def test_orchestrator_no_llm_key():
"""pipeline 无 LLM key → 不崩溃(orchestrator 处理)"""
from config import Config
from orchestrator import run_pipeline
with patch.dict(os.environ, {}, clear=True), \
patch("orchestrator.Path") as mock_path, \
patch("orchestrator.Agent1Parser") as mock_a1p, \
patch("orchestrator.extract_structure") as mock_s:
mock_a1p_inst = MagicMock()
tree = MagicMock()
tree.fields = []
tree.flatten.return_value = {}
mock_a1p_inst.parse.return_value = tree
mock_a1p.return_value = mock_a1p_inst
mock_s.return_value = {"total_branches": 0}
mock_path.return_value.read_text.return_value = ""
mock_path.return_value.stem = "T"
cfg = Config()
vr = run_pipeline(cfg, "/f", "/f", "/f", "/f")
assert isinstance(vr, object)