feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure
P1: extract_structure output expansion (11 new feature fields)
P2: Confusion group rule engine (8 pairs + contradiction + backtrack)
P3: 4-factor confidence calculation + quality gate update
P4: 33+2 COBOL program type test samples (22 files, 7 categories)
P5: parametrized/ test data generation engine
P6: japanese_data.py lookup tables
P7-10: Type-specific test suites (~159 parametrized tests)
P11: Full classification pipeline (classify_program) + orchestrator integration
P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix)

Architecture decisions:
- classification_pipeline/ merged to hina/pipeline/
- parametrized/ as independent module
- japanese_data.py as root-level file
- hina/__all__ only exports classify_program()

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
hangshuo652
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
+238
View File
@@ -0,0 +1,238 @@
"""Phase 8: CALL / SEARCH ALL 系测试。
测试覆盖:
- CALL 参数传递逻辑(by reference / by value / by content
- SEARCH ALL 二分查找逻辑(找到 / 未找到 / 重复键 / 空表)
"""
from __future__ import annotations
from typing import Any
# ── CALL 模拟
def _call_by_reference(param: list) -> list:
"""模拟 COBOL CALL BY REFERENCE: 修改外部变量。"""
param[0] = param[0] * 2
return param
def _call_by_value(param: int) -> int:
"""模拟 COBOL CALL BY VALUE: 传入副本。"""
return param * 2
def _call_by_content(param: list) -> list:
"""模拟 COBOL CALL BY CONTENT: 传入副本,不修改原始值。"""
copy = param.copy()
copy[0] = copy[0] * 2
return copy
def _call_with_multiple(
a: int,
b: int,
c: str = "",
) -> dict[str, Any]:
"""模拟多参数 CALL。"""
return {"sum": a + b, "concat": c * 2}
# ── SEARCH ALL 模拟 ──
def _search_all(table: list[dict], key_field: str, target: Any) -> int | None:
"""模拟 COBOL SEARCH ALL(二分查找)。
要求 table 已按 key_field 升序排列。
参数
----------
table : list[dict]
已排序的表。
key_field : str
待查找的键字段名。
target : Any
目标值。
返回
-------
int | None
找到时返回下标;未找到返回 None。
"""
lo, hi = 0, len(table) - 1
while lo <= hi:
mid = (lo + hi) // 2
val = table[mid][key_field]
if val == target:
return mid
elif val < target:
lo = mid + 1
else:
hi = mid - 1
return None
def _search_all_duplicate_keys(
table: list[dict],
key_field: str,
target: Any,
) -> list[int]:
"""查找所有匹配的记录下标(处理重复键)。"""
indices: list[int] = []
first = _search_all(table, key_field, target)
if first is None:
return []
# 向前扫描
i = first
while i >= 0 and table[i][key_field] == target:
indices.append(i)
i -= 1
indices.reverse()
# 向后扫描
i = first + 1
while i < len(table) and table[i][key_field] == target:
indices.append(i)
i += 1
return indices
# ── 测试: CALL ──
class TestCallByReference:
"""CALL BY REFERENCE 参数传递"""
def test_by_reference_modifies_original(self):
data = [5]
result = _call_by_reference(data)
assert data[0] == 10, "BY REFERENCE 应修改原始值"
assert result == [10]
def test_by_reference_string(self):
data = ["hello"]
_call_by_reference(data)
assert data[0] == "hellohello"
class TestCallByValue:
"""CALL BY VALUE 参数传递"""
def test_by_value_no_side_effect(self):
x = 5
result = _call_by_value(x)
assert x == 5, "BY VALUE 不应修改原始值"
assert result == 10
def test_by_value_zero(self):
assert _call_by_value(0) == 0
def test_by_value_negative(self):
assert _call_by_value(-3) == -6
class TestCallByContent:
"""CALL BY CONTENT 参数传递"""
def test_by_content_preserves_original(self):
data = [5]
result = _call_by_content(data)
assert data[0] == 5, "BY CONTENT 不应修改原始值"
assert result == [10]
class TestCallMultipleParameters:
"""多参数 CALL"""
def test_multiple_params(self):
result = _call_with_multiple(3, 4)
assert result["sum"] == 7
def test_multiple_params_with_string(self):
result = _call_with_multiple(1, 2, c="ab")
assert result["sum"] == 3
assert result["concat"] == "abab"
def test_multiple_params_default(self):
result = _call_with_multiple(10, 20)
assert result["concat"] == ""
# ── 测试: SEARCH ALL ──
class TestSearchAllFound:
"""SEARCH ALL — 找到"""
def test_search_found_first(self):
table = [{"K": 1}, {"K": 3}, {"K": 5}, {"K": 7}]
idx = _search_all(table, "K", 1)
assert idx == 0
def test_search_found_last(self):
table = [{"K": 1}, {"K": 3}, {"K": 5}, {"K": 7}]
idx = _search_all(table, "K", 7)
assert idx == 3
def test_search_found_middle(self):
table = [{"K": 1}, {"K": 3}, {"K": 5}, {"K": 7}]
idx = _search_all(table, "K", 5)
assert idx == 2
def test_search_string_keys(self):
table = [{"K": "a"}, {"K": "b"}, {"K": "c"}, {"K": "d"}]
idx = _search_all(table, "K", "c")
assert idx == 2
class TestSearchAllNotFound:
"""SEARCH ALL — 未找到"""
def test_search_not_found(self):
table = [{"K": 1}, {"K": 3}, {"K": 5}]
idx = _search_all(table, "K", 4)
assert idx is None
def test_search_below_all(self):
table = [{"K": 10}, {"K": 20}]
idx = _search_all(table, "K", 5)
assert idx is None
def test_search_above_all(self):
table = [{"K": 10}, {"K": 20}]
idx = _search_all(table, "K", 25)
assert idx is None
class TestSearchAllDuplicateKeys:
"""SEARCH ALL — 重复键"""
def test_search_duplicate_keys(self):
table = [{"K": 1}, {"K": 2}, {"K": 2}, {"K": 2}, {"K": 3}]
indices = _search_all_duplicate_keys(table, "K", 2)
assert indices == [1, 2, 3]
def test_search_no_duplicate(self):
table = [{"K": 1}, {"K": 2}, {"K": 3}]
indices = _search_all_duplicate_keys(table, "K", 2)
assert indices == [1]
class TestSearchAllEdgeCases:
"""SEARCH ALL — 边界"""
def test_search_empty_table(self):
idx = _search_all([], "K", 1)
assert idx is None
def test_search_single_element_found(self):
table = [{"K": 42}]
idx = _search_all(table, "K", 42)
assert idx == 0
def test_search_single_element_not_found(self):
table = [{"K": 42}]
idx = _search_all(table, "K", 99)
assert idx is None