feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure
P1: extract_structure output expansion (11 new feature fields)
P2: Confusion group rule engine (8 pairs + contradiction + backtrack)
P3: 4-factor confidence calculation + quality gate update
P4: 33+2 COBOL program type test samples (22 files, 7 categories)
P5: parametrized/ test data generation engine
P6: japanese_data.py lookup tables
P7-10: Type-specific test suites (~159 parametrized tests)
P11: Full classification pipeline (classify_program) + orchestrator integration
P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix)

Architecture decisions:
- classification_pipeline/ merged to hina/pipeline/
- parametrized/ as independent module
- japanese_data.py as root-level file
- hina/__all__ only exports classify_program()

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
hangshuo652
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
+239
View File
@@ -0,0 +1,239 @@
"""Phase 9: 横断系测试(轻量版 ~20 测试)。
覆盖四大领域:
- VL: 可变长 / ODO 逻辑
- LP: 循环 / PERFORM VARYING / UNTIL 逻辑
- NP: 数值精度 / COMP-3 / ROUNDED 逻辑
- D: 日期 / 闰年 / 月末 / 和历逻辑
"""
from __future__ import annotations
import math
from datetime import date
from typing import Any
# ════════════════════════════════════════════════════════════
# VL: 可变长 / ODO 逻辑
# ════════════════════════════════════════════════════════════
def _odo_offset(depending_on: int, base_size: int, item_size: int) -> int:
"""模拟 COBOL OCCURS DEPENDING ON:
总长 = 固定部 + 可变项数 * 每项大小
"""
if depending_on < 0:
depending_on = 0
if depending_on > 999:
depending_on = 999
return base_size + depending_on * item_size
def _odo_read(table: list, start: int, count: int) -> list:
"""模拟 ODO 读取指定数量的可变元素。"""
return table[start:start + count]
class TestODO:
"""可变长 / ODO 逻辑 (5 tests)"""
def test_odo_basic_length(self):
length = _odo_offset(5, 10, 4)
assert length == 10 + 5 * 4
def test_odo_zero_items(self):
assert _odo_offset(0, 10, 4) == 10
def test_odo_negative_depending(self):
assert _odo_offset(-1, 10, 4) == 10
def test_odo_read_partial(self):
table = [10, 20, 30, 40, 50]
assert _odo_read(table, 1, 3) == [20, 30, 40]
def test_odo_read_beyond_end(self):
table = [10, 20, 30]
assert _odo_read(table, 1, 10) == [20, 30]
# ════════════════════════════════════════════════════════════
# LP: 循环 / PERFORM VARYING / UNTIL 逻辑
# ════════════════════════════════════════════════════════════
def _perform_varying(start: int, end: int, step: int = 1) -> list[int]:
"""模拟 COBOL PERFORM VARYING: 返回每次循环的索引值。"""
results: list[int] = []
i = start
if step > 0:
while i <= end:
results.append(i)
i += step
elif step < 0:
while i >= end:
results.append(i)
i += step
return results
def _perform_until(initial: int, condition_func, body_func, max_iter: int = 1000) -> list:
"""模拟 COBOL PERFORM UNTIL condition。"""
results: list = []
i = initial
count = 0
while not condition_func(i) and count < max_iter:
val = body_func(i)
results.append(val)
i = val
count += 1
return results
class TestPerformVarying:
"""PERFORM VARYING 逻辑 (3 tests)"""
def test_varying_ascending(self):
assert _perform_varying(1, 5) == [1, 2, 3, 4, 5]
def test_varying_step_2(self):
assert _perform_varying(1, 10, 2) == [1, 3, 5, 7, 9]
def test_varying_descending(self):
assert _perform_varying(5, 1, -1) == [5, 4, 3, 2, 1]
class TestPerformUntil:
"""PERFORM UNTIL 逻辑 (2 tests)"""
def test_until_reaches_target(self):
result = _perform_until(1, lambda x: x >= 10, lambda x: x + 1)
assert result == [2, 3, 4, 5, 6, 7, 8, 9, 10]
def test_until_condition_immediately_true(self):
result = _perform_until(10, lambda x: x >= 10, lambda x: x + 1)
assert result == []
# ════════════════════════════════════════════════════════════
# NP: 数值精度 / COMP-3 / ROUNDED 逻辑
# ════════════════════════════════════════════════════════════
def _comp3_to_value(bytes_data: bytes) -> int:
"""模拟 COMP-3 (BCD) 到整数的转换。"""
if not bytes_data:
return 0
last = bytes_data[-1]
sign_nibble = last & 0x0F
value_nibbles: list[int] = []
for b in bytes_data[:-1]:
value_nibbles.append((b >> 4) & 0x0F)
value_nibbles.append(b & 0x0F)
value_nibbles.append((last >> 4) & 0x0F)
value = 0
for nib in value_nibbles:
value = value * 10 + nib
if sign_nibble in (0x0D,):
value = -value
return value
def _rounded(value: float, decimals: int) -> float:
"""模拟 COBOL ROUNDED 子句。"""
factor = 10 ** decimals
return math.floor(value * factor + 0.5) / factor
class TestComp3:
"""COMP-3 数值精度 (3 tests)"""
def test_comp3_positive(self):
# BCD: 0x12 0x3C -> 123
assert _comp3_to_value(bytes([0x12, 0x3C])) == 123
def test_comp3_negative(self):
# BCD: 0x45 0x6D -> -456
assert _comp3_to_value(bytes([0x45, 0x6D])) == -456
def test_comp3_zero(self):
assert _comp3_to_value(bytes([0x0C])) == 0
class TestRounded:
"""ROUNDED 子句 (2 tests)"""
def test_rounded_up(self):
assert _rounded(1.235, 2) == 1.24
def test_rounded_down(self):
assert _rounded(1.234, 2) == 1.23
# ════════════════════════════════════════════════════════════
# D: 日期 / 闰年 / 月末 / 和历逻辑
# ════════════════════════════════════════════════════════════
def _is_leap_year(year: int) -> bool:
return year % 400 == 0 or (year % 100 != 0 and year % 4 == 0)
def _days_in_month(year: int, month: int) -> int:
if month == 2:
return 29 if _is_leap_year(year) else 28
long_months = {1, 3, 5, 7, 8, 10, 12}
return 31 if month in long_months else 30
def _month_end_date(year: int, month: int) -> date:
return date(year, month, _days_in_month(year, month))
def _wareki_to_year(wareki_prefix: str, wareki_year: int) -> int:
era_map = {
"R": (2019, "令和"), "H": (1989, "平成"),
"S": (1926, "昭和"), "T": (1912, "大正"),
"M": (1868, "明治"),
}
if wareki_prefix not in era_map:
raise ValueError(f"未知和历: {wareki_prefix!r}")
return era_map[wareki_prefix][0] + wareki_year - 1
class TestLeapYear:
"""闰年判断 (2 tests)"""
def test_leap_year_divisible_by_400(self):
assert _is_leap_year(2000) is True
assert _is_leap_year(2400) is True
def test_leap_year_divisible_by_4_not_100(self):
assert _is_leap_year(2024) is True
assert _is_leap_year(2028) is True
class TestMonthEnd:
"""月末日期 (2 tests)"""
def test_february_leap_year(self):
assert _days_in_month(2024, 2) == 29
assert _month_end_date(2024, 2) == date(2024, 2, 29)
def test_february_non_leap(self):
assert _days_in_month(2023, 2) == 28
assert _month_end_date(2023, 2) == date(2023, 2, 28)
class TestWareki:
"""和历逻辑 (1 test)"""
def test_wareki_reiwa(self):
assert _wareki_to_year("R", 5) == 2023
def test_wareki_invalid_prefix(self):
try:
_wareki_to_year("X", 1)
assert False, "应抛出异常"
except ValueError:
pass