Files
cobol-java-v3/tests/parametrized/test_division.py
hangshuo652 bc1d56d1a4 feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark
P0.6: gcov infrastructure
P1: extract_structure output expansion (11 new feature fields)
P2: Confusion group rule engine (8 pairs + contradiction + backtrack)
P3: 4-factor confidence calculation + quality gate update
P4: 33+2 COBOL program type test samples (22 files, 7 categories)
P5: parametrized/ test data generation engine
P6: japanese_data.py lookup tables
P7-10: Type-specific test suites (~159 parametrized tests)
P11: Full classification pipeline (classify_program) + orchestrator integration
P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix)

Architecture decisions:
- classification_pipeline/ merged to hina/pipeline/
- parametrized/ as independent module
- japanese_data.py as root-level file
- hina/__all__ only exports classify_program()

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00

127 lines
3.9 KiB
Python

"""Phase 7: 分割系测试 — 基于 parametrized.generate_division_data。
测试覆盖:
- 50% / 25% / 100% 分割
- 余数处理(奇偶 / 不可整除)
- 边界条件(单条记录 / 大量记录)
"""
from __future__ import annotations
import pytest
from parametrized import generate_division_data
class TestDivisionFifty:
"""50% 对半分割 → 2 个文件"""
def test_50_even_split(self):
result = generate_division_data(50, 100)
assert len(result) == 2
assert len(result[0]) == 50
assert len(result[1]) == 50
assert sum(len(f) for f in result) == 100
def test_50_odd_remainder(self):
"""奇数条记录: 最后一条应归属第 2 个文件"""
result = generate_division_data(50, 5)
assert len(result) == 2
assert len(result[0]) + len(result[1]) == 5
def test_50_single_record(self):
result = generate_division_data(50, 1)
assert len(result) == 2
assert len(result[0]) == 0
assert len(result[1]) == 1
def test_50_content_check(self):
result = generate_division_data(50, 10)
for file_no, records in enumerate(result, 1):
for rec in records:
assert rec["FILE_NO"] == file_no
assert rec["KEY"].startswith("DIV")
assert "SEQ" in rec
assert "DATA" in rec
class TestDivisionTwentyFive:
"""25% 四等分分割 → 4 个文件"""
def test_25_even_split(self):
result = generate_division_data(25, 100)
assert len(result) == 4
# 100/4 = 25 各
for records in result:
assert len(records) == 25
def test_25_remainder(self):
"""不可被 4 整除时,最后文件拿到剩余条数"""
result = generate_division_data(25, 10)
assert len(result) == 4
total = sum(len(f) for f in result)
assert total == 10
# 前 3 个文件各 2 条(floor(10*0.25)=2)→ 第 4 个文件得 4 条
assert len(result[0]) == 2
assert len(result[1]) == 2
assert len(result[2]) == 2
assert len(result[3]) == 4
def test_25_single_record(self):
result = generate_division_data(25, 1)
assert len(result) == 4
assert len(result[0]) == 0
assert len(result[1]) == 0
assert len(result[2]) == 0
assert len(result[3]) == 1
def test_25_content_check(self):
result = generate_division_data(25, 40)
for file_no, records in enumerate(result, 1):
for rec in records:
assert rec["FILE_NO"] == file_no
class TestDivisionOneHundred:
"""100% 全量(不分)→ 1 个文件"""
def test_100_all_in_one(self):
result = generate_division_data(100, 50)
assert len(result) == 1
assert len(result[0]) == 50
def test_100_single_record(self):
result = generate_division_data(100, 1)
assert len(result) == 1
assert len(result[0]) == 1
assert result[0][0]["FILE_NO"] == 1
def test_100_large_count(self):
result = generate_division_data(100, 10000)
assert len(result) == 1
assert len(result[0]) == 10000
assert result[0][0]["SEQ"] == 1
assert result[0][-1]["SEQ"] == 10000
class TestDivisionEdgeCases:
"""边界与异常"""
def test_invalid_division_type(self):
with pytest.raises(ValueError, match="division_type"):
generate_division_data(99, 50)
def test_invalid_record_count(self):
with pytest.raises(ValueError, match="record_count"):
generate_division_data(50, 0)
def test_sequence_global(self):
"""验证 SEQ 全局递增,不重复"""
result = generate_division_data(25, 30)
all_seq = []
for records in result:
for rec in records:
all_seq.append(rec["SEQ"])
assert all_seq == sorted(all_seq)
assert len(set(all_seq)) == len(all_seq)