feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
@@ -0,0 +1,239 @@
+"""Phase 9: 横断系测试（轻量版 ~20 测试）。
+
+覆盖四大领域:
+  - VL: 可变长 / ODO 逻辑
+  - LP: 循环 / PERFORM VARYING / UNTIL 逻辑
+  - NP: 数值精度 / COMP-3 / ROUNDED 逻辑
+  - D:  日期 / 闰年 / 月末 / 和历逻辑
+"""
+
+from __future__ import annotations
+
+import math
+from datetime import date
+from typing import Any
+
+
+# ════════════════════════════════════════════════════════════
+# VL: 可变长 / ODO 逻辑
+# ════════════════════════════════════════════════════════════
+
+
+def _odo_offset(depending_on: int, base_size: int, item_size: int) -> int:
+    """模拟 COBOL OCCURS DEPENDING ON:
+    总长 = 固定部 + 可变项数 * 每项大小
+    """
+    if depending_on < 0:
+        depending_on = 0
+    if depending_on > 999:
+        depending_on = 999
+    return base_size + depending_on * item_size
+
+
+def _odo_read(table: list, start: int, count: int) -> list:
+    """模拟 ODO 读取指定数量的可变元素。"""
+    return table[start:start + count]
+
+
+class TestODO:
+    """可变长 / ODO 逻辑 (5 tests)"""
+
+    def test_odo_basic_length(self):
+        length = _odo_offset(5, 10, 4)
+        assert length == 10 + 5 * 4
+
+    def test_odo_zero_items(self):
+        assert _odo_offset(0, 10, 4) == 10
+
+    def test_odo_negative_depending(self):
+        assert _odo_offset(-1, 10, 4) == 10
+
+    def test_odo_read_partial(self):
+        table = [10, 20, 30, 40, 50]
+        assert _odo_read(table, 1, 3) == [20, 30, 40]
+
+    def test_odo_read_beyond_end(self):
+        table = [10, 20, 30]
+        assert _odo_read(table, 1, 10) == [20, 30]
+
+
+# ════════════════════════════════════════════════════════════
+# LP: 循环 / PERFORM VARYING / UNTIL 逻辑
+# ════════════════════════════════════════════════════════════
+
+
+def _perform_varying(start: int, end: int, step: int = 1) -> list[int]:
+    """模拟 COBOL PERFORM VARYING: 返回每次循环的索引值。"""
+    results: list[int] = []
+    i = start
+    if step > 0:
+        while i <= end:
+            results.append(i)
+            i += step
+    elif step < 0:
+        while i >= end:
+            results.append(i)
+            i += step
+    return results
+
+
+def _perform_until(initial: int, condition_func, body_func, max_iter: int = 1000) -> list:
+    """模拟 COBOL PERFORM UNTIL condition。"""
+    results: list = []
+    i = initial
+    count = 0
+    while not condition_func(i) and count < max_iter:
+        val = body_func(i)
+        results.append(val)
+        i = val
+        count += 1
+    return results
+
+
+class TestPerformVarying:
+    """PERFORM VARYING 逻辑 (3 tests)"""
+
+    def test_varying_ascending(self):
+        assert _perform_varying(1, 5) == [1, 2, 3, 4, 5]
+
+    def test_varying_step_2(self):
+        assert _perform_varying(1, 10, 2) == [1, 3, 5, 7, 9]
+
+    def test_varying_descending(self):
+        assert _perform_varying(5, 1, -1) == [5, 4, 3, 2, 1]
+
+
+class TestPerformUntil:
+    """PERFORM UNTIL 逻辑 (2 tests)"""
+
+    def test_until_reaches_target(self):
+        result = _perform_until(1, lambda x: x >= 10, lambda x: x + 1)
+        assert result == [2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+    def test_until_condition_immediately_true(self):
+        result = _perform_until(10, lambda x: x >= 10, lambda x: x + 1)
+        assert result == []
+
+
+# ════════════════════════════════════════════════════════════
+# NP: 数值精度 / COMP-3 / ROUNDED 逻辑
+# ════════════════════════════════════════════════════════════
+
+
+def _comp3_to_value(bytes_data: bytes) -> int:
+    """模拟 COMP-3 (BCD) 到整数的转换。"""
+    if not bytes_data:
+        return 0
+    last = bytes_data[-1]
+    sign_nibble = last & 0x0F
+    value_nibbles: list[int] = []
+    for b in bytes_data[:-1]:
+        value_nibbles.append((b >> 4) & 0x0F)
+        value_nibbles.append(b & 0x0F)
+    value_nibbles.append((last >> 4) & 0x0F)
+    value = 0
+    for nib in value_nibbles:
+        value = value * 10 + nib
+    if sign_nibble in (0x0D,):
+        value = -value
+    return value
+
+
+def _rounded(value: float, decimals: int) -> float:
+    """模拟 COBOL ROUNDED 子句。"""
+    factor = 10 ** decimals
+    return math.floor(value * factor + 0.5) / factor
+
+
+class TestComp3:
+    """COMP-3 数值精度 (3 tests)"""
+
+    def test_comp3_positive(self):
+        # BCD: 0x12 0x3C -> 123
+        assert _comp3_to_value(bytes([0x12, 0x3C])) == 123
+
+    def test_comp3_negative(self):
+        # BCD: 0x45 0x6D -> -456
+        assert _comp3_to_value(bytes([0x45, 0x6D])) == -456
+
+    def test_comp3_zero(self):
+        assert _comp3_to_value(bytes([0x0C])) == 0
+
+
+class TestRounded:
+    """ROUNDED 子句 (2 tests)"""
+
+    def test_rounded_up(self):
+        assert _rounded(1.235, 2) == 1.24
+
+    def test_rounded_down(self):
+        assert _rounded(1.234, 2) == 1.23
+
+
+# ════════════════════════════════════════════════════════════
+# D: 日期 / 闰年 / 月末 / 和历逻辑
+# ════════════════════════════════════════════════════════════
+
+
+def _is_leap_year(year: int) -> bool:
+    return year % 400 == 0 or (year % 100 != 0 and year % 4 == 0)
+
+
+def _days_in_month(year: int, month: int) -> int:
+    if month == 2:
+        return 29 if _is_leap_year(year) else 28
+    long_months = {1, 3, 5, 7, 8, 10, 12}
+    return 31 if month in long_months else 30
+
+
+def _month_end_date(year: int, month: int) -> date:
+    return date(year, month, _days_in_month(year, month))
+
+
+def _wareki_to_year(wareki_prefix: str, wareki_year: int) -> int:
+    era_map = {
+        "R": (2019, "令和"), "H": (1989, "平成"),
+        "S": (1926, "昭和"), "T": (1912, "大正"),
+        "M": (1868, "明治"),
+    }
+    if wareki_prefix not in era_map:
+        raise ValueError(f"未知和历: {wareki_prefix!r}")
+    return era_map[wareki_prefix][0] + wareki_year - 1
+
+
+class TestLeapYear:
+    """闰年判断 (2 tests)"""
+
+    def test_leap_year_divisible_by_400(self):
+        assert _is_leap_year(2000) is True
+        assert _is_leap_year(2400) is True
+
+    def test_leap_year_divisible_by_4_not_100(self):
+        assert _is_leap_year(2024) is True
+        assert _is_leap_year(2028) is True
+
+
+class TestMonthEnd:
+    """月末日期 (2 tests)"""
+
+    def test_february_leap_year(self):
+        assert _days_in_month(2024, 2) == 29
+        assert _month_end_date(2024, 2) == date(2024, 2, 29)
+
+    def test_february_non_leap(self):
+        assert _days_in_month(2023, 2) == 28
+        assert _month_end_date(2023, 2) == date(2023, 2, 28)
+
+
+class TestWareki:
+    """和历逻辑 (1 test)"""
+
+    def test_wareki_reiwa(self):
+        assert _wareki_to_year("R", 5) == 2023
+
+    def test_wareki_invalid_prefix(self):
+        try:
+            _wareki_to_year("X", 1)
+            assert False, "应抛出异常"
+        except ValueError:
+            pass