bc1d56d1a4
P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
63 lines
2.1 KiB
Python
63 lines
2.1 KiB
Python
from dataclasses import dataclass, field
|
||
from pathlib import Path
|
||
from .mapping import MappingConfig, FieldMapping
|
||
|
||
__all__ = [
|
||
"Config", # 全局配置(dataclass)
|
||
"MappingConfig", # 字段映射配置
|
||
"FieldMapping", # 单个字段映射
|
||
]
|
||
|
||
|
||
@dataclass
|
||
class Config:
|
||
project_name: str = ""
|
||
copybook_paths: list = field(default_factory=lambda: ["./copybooks"])
|
||
dialect: str = "ibm"
|
||
llm_model: str = "gpt-4o-mini"
|
||
llm_timeout: int = 15
|
||
llm_cache_dir: str = ".cache/llm"
|
||
coverage_default: str = "boundary"
|
||
rounding_mode: str = "TRUNCATE"
|
||
tolerance: float = 0.01
|
||
runner_mode: str = "native"
|
||
spark_master: str = "local[*]"
|
||
spark_input_format: str = "json"
|
||
num_records: int = 1000
|
||
branch_pass: float = 0.80
|
||
max_llm_cost: float = 0.50
|
||
quality_gate_mode: str = "warn"
|
||
quality_gate_decision_threshold: float = 0.90
|
||
quality_gate_paragraph_threshold: float = 1.0
|
||
gcov_enabled: bool = False
|
||
gcov_work_dir: str = ".gcov_output"
|
||
gcov_threshold: float = 0.5
|
||
max_quality_retries: int = 4
|
||
|
||
@classmethod
|
||
def from_toml(cls, path="aurak.toml"):
|
||
import tomllib
|
||
try:
|
||
with open(path, "rb") as f:
|
||
d = tomllib.load(f)
|
||
except:
|
||
return cls()
|
||
c = cls()
|
||
p = d.get("project", {})
|
||
c.project_name = p.get("name", "")
|
||
c.copybook_paths = p.get("copybook_paths", c.copybook_paths)
|
||
c.dialect = p.get("dialect", "ibm")
|
||
ll = d.get("llm", {})
|
||
c.llm_model = ll.get("model", c.llm_model)
|
||
co = d.get("coverage", {})
|
||
c.coverage_default = co.get("default_target", "boundary")
|
||
cp = d.get("comparison", {})
|
||
c.rounding_mode = cp.get("rounding_mode", "TRUNCATE")
|
||
c.tolerance = cp.get("default_tolerance", c.tolerance)
|
||
r = d.get("runner", {})
|
||
c.runner_mode = r.get("mode", "native")
|
||
s = d.get("spark", {})
|
||
c.spark_master = s.get("master", "local[*]")
|
||
c.num_records = s.get("num_records", c.num_records)
|
||
return c
|