feat: complete INSPECT/SEARCH support, fix PERFORM/EVAL coverage marking

- Add INSPECT (TALLYING/REPLACING/CONVERTING) with BEFORE/AFTER INITIAL - Add SEARCH/SEARCH ALL with element-assignment path enumeration - Fix _mark_perform compound condition marking via evaluate_tree - Fix EVALUATE TRUE prior_false to collect all MC/DC false sets - Add impossible path filtering (Pass A.5) with trace-to-root conflict detection - Fix multi-line PERFORM VARYING parsing (VARYING/FROM/BY/UNTIL on separate lines) - Remove dead code: agents.py LLM parser (replaced by rule-based _BrParser) - 59 unit tests passing, 5 integration programs verified
2026-06-10 22:56:22 +08:00
parent 0730045e27
commit 7ac887c776
9 changed files with 509 additions and 1005 deletions
@@ -7,10 +7,7 @@ from pathlib import Path

 # ── 配置（必须放在本地模块导入之前，避免循环导入） ──

-CONFIG = {
-    "proc_parser": "rule",   # "rule" | "ai"
-    "llm_generator": False,   # True=启用LLM路径生成; False=纯规则引擎
-}
+CONFIG = {}

 from .read import preprocess, extract_data_division, extract_procedure_division
 from .read import resolve_copybooks, parse_data_division, parse_file_section, scan_open_statements
@@ -18,7 +15,6 @@ from .core import build_branch_tree, classify_field_roles, _init_child_names
 from .cond import parse_single_condition, is_field
 from .design import enum_paths, generate_records, _filter_stop
 from .output import output_json, output_input_files
-from . import agents
 from .coverage import run_coverage, generate_coverage_index

 logger = logging.getLogger(__name__)
@@ -233,14 +229,7 @@ def main():
        assignments = {}

        if proc_div:
-            if CONFIG["proc_parser"] == "ai":
-                try:
-                    result = agents.parse_proc_division_ai(proc_div, fields_dict)
-                    branch_tree, assignments = result
-                except NotImplementedError:
-                    branch_tree, assignments = build_branch_tree(proc_div, fields_dict)
-            else:
-                branch_tree, assignments = build_branch_tree(proc_div, fields_dict)
+            branch_tree, assignments = build_branch_tree(proc_div, fields_dict)

            roles = classify_field_roles(branch_tree, assignments, fields_dict,
                                         source=preprocessed, proc_text=proc_div)
@@ -281,14 +270,14 @@ def main():
        cov_result = run_coverage(branch_tree, branch_paths_with_assigns, fields_dict,
                                  source, cov_prefix, index_relpath=index_relpath)

-        records = generate_records(branch_paths_with_assigns, fields_dict, assignments, file_sec=file_sec)
+        records, kept_path_cons = generate_records(branch_paths_with_assigns, fields_dict, assignments, file_sec=file_sec)

        # 输出 JSON（完整文件）
        outpath = outdir / (filepath.stem + '.json')
        output_json(records, outpath, roles,
                    fd_fields=fd_fields, field_to_fd=field_to_fd,
                    open_dir=open_dir,
-                    path_cons_list=[c for c, a in branch_paths_with_assigns])
+                    path_cons_list=kept_path_cons)

        # 输出入力 JSON（按 FD 拆分）
        output_input_files(records, outdir, filepath.stem, roles,
@@ -1,308 +0,0 @@
-"""AI智能体接口 — 基于DeepSeek的PROCEDURE DIVISION解析"""
-
-import json
-import os
-import re
-from pathlib import Path
-
-from .models import BrSeq, BrIf, BrEval, BrPerform, Assign, CallNode
-
-
-DEEPSEEK_API_KEY_ENV = "DEEPSEEK_API_KEY"
-DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1"
-DEEPSEEK_MODEL = "deepseek-chat"
-PROMPT_FILE = Path(__file__).parent / "prompts" / "parse_proc_division.txt"
-
-
-def parse_proc_division_ai(proc_text: str, fields: list = None, spec_doc: str = ""):
-    """AI版PROCEDURE DIVISION解析：调用DeepSeek API，返回(branch_tree, assignments)."""
-    api_key = os.environ.get(DEEPSEEK_API_KEY_ENV)
-    if not api_key:
-        raise NotImplementedError(
-            f"AI agent requires {DEEPSEEK_API_KEY_ENV} environment variable"
-        )
-
-    prompt = _build_prompt(proc_text, fields)
-    response_text = _call_llm(prompt, api_key)
-    data = _extract_json(response_text)
-    if not data:
-        raise NotImplementedError("AI returned no parsable JSON")
-
-    branch_tree = _json_to_tree(data.get("tree", {}))
-    assignments = data.get("assignments", {})
-    return branch_tree, assignments
-
-
-def _build_prompt(proc_text: str, fields: list = None) -> list[dict]:
-    system = PROMPT_FILE.read_text(encoding="utf-8")
-
-    fields_json = json.dumps(fields, ensure_ascii=False, indent=2) if fields else "[]"
-
-    user = f"""## PROCEDURE DIVISION 源码
-
-```
-{proc_text}
-```
-
-## DATA DIVISION 字段列表
-
-```json
-{fields_json}
-```
-"""
-
-    return [
-        {"role": "system", "content": system},
-        {"role": "user", "content": user},
-    ]
-
-
-def _call_llm(messages: list[dict], api_key: str) -> str:
-    try:
-        from openai import OpenAI
-    except ImportError:
-        raise NotImplementedError(
-            "openai package not installed. Run: pip install openai"
-        )
-
-    client = OpenAI(api_key=api_key, base_url=DEEPSEEK_BASE_URL)
-    response = client.chat.completions.create(
-        model=DEEPSEEK_MODEL,
-        messages=messages,
-        temperature=0.1,
-        max_tokens=8192,
-    )
-    return response.choices[0].message.content or ""
-
-
-def _extract_json(text: str) -> dict | None:
-    stripped = text.strip()
-    # Try extracting from markdown code block first
-    m = re.search(r"```(?:json)?\s*\n?(.*?)\n?```", stripped, re.DOTALL)
-    if m:
-        stripped = m.group(1).strip()
-    try:
-        return json.loads(stripped)
-    except json.JSONDecodeError:
-        return None
-
-
-def _json_to_tree(data: dict):
-    node_type = data.get("type", "seq")
-
-    if node_type == "seq":
-        node = BrSeq()
-        for child_data in data.get("children", []):
-            child = _json_to_tree(child_data)
-            if child is not None:
-                node.add(child)
-        return node
-
-    if node_type == "if":
-        node = BrIf(data.get("condition", ""))
-        node.true_seq = _json_to_tree(data.get("true_seq", {"type": "seq", "children": []}))
-        node.false_seq = _json_to_tree(data.get("false_seq", {"type": "seq", "children": []}))
-        return node
-
-    if node_type == "eval":
-        node = BrEval(data.get("subject", ""))
-        for w in data.get("when_list", []):
-            node.when_list.append((w.get("value", ""), _json_to_tree(w.get("seq", {"type": "seq", "children": []}))))
-        node.other_seq = _json_to_tree(data.get("other_seq", {"type": "seq", "children": []}))
-        node.has_other = data.get("has_other", False)
-        return node
-
-    if node_type == "perform":
-        perf_type = data.get("perf_type", "para")
-        kw = {"perf_type": perf_type}
-        for k in ("condition", "target", "thru", "times",
-                   "varying_var", "varying_from", "varying_by"):
-            if k in data:
-                kw[k] = data[k]
-        node = BrPerform(**kw)
-        if "body_seq" in data:
-            node.body_seq = _json_to_tree(data["body_seq"])
-        return node
-
-    if node_type == "assign":
-        return Assign(
-            target=data.get("target", ""),
-            source_info=data.get("source_info", {}),
-        )
-
-    if node_type == "call":
-        return CallNode(
-            program_name=data.get("program_name", ""),
-            using_params=data.get("using_params", []),
-        )
-
-    return None
-
-
-# ── LLM 路径生成 ──
-
-
-def llm_generate_all_paths(tree_root, fields) -> list | None:
-    """为整个控制流树生成 MC/DC 路径。返回 [(constraints, assignments), ...] 或 None。"""
-    api_key = os.environ.get(DEEPSEEK_API_KEY_ENV)
-    if not api_key:
-        return None
-
-    tree_json = _serialize_tree_for_llm(tree_root)
-    if tree_json is None:
-        return None
-
-    level88_map = _extract_88_mapping(fields)
-    messages = _build_path_prompt(tree_json, fields, level88_map)
-
-    try:
-        response = _call_llm(messages, api_key)
-        data = _extract_json(response)
-        if data and "paths" in data:
-            return _parse_llm_paths(data["paths"])
-    except Exception:
-        pass
-    return None
-
-
-def _serialize_tree_for_llm(node):
-    if node is None:
-        return None
-    from .models import BrSeq, BrIf, BrEval, BrPerform, Assign, CallNode, ExitNode, GoTo
-
-    if isinstance(node, BrSeq):
-        children = []
-        for child in node.children:
-            s = _serialize_tree_for_llm(child)
-            if s is not None:
-                children.append(s)
-        return {"type": "seq", "children": children} if children else None
-
-    if isinstance(node, BrIf):
-        return {
-            "type": "if",
-            "condition": node.condition,
-            "true_seq": _serialize_tree_for_llm(node.true_seq) or {"type": "seq", "children": []},
-            "false_seq": _serialize_tree_for_llm(node.false_seq) or {"type": "seq", "children": []},
-        }
-
-    if isinstance(node, BrEval):
-        when_list = []
-        for val, seq in node.when_list:
-            s = _serialize_tree_for_llm(seq)
-            when_list.append({"value": val, "seq": s or {"type": "seq", "children": []}})
-        return {
-            "type": "eval",
-            "subject": node.subject,
-            "when_list": when_list,
-            "other_seq": _serialize_tree_for_llm(node.other_seq) or {"type": "seq", "children": []},
-            "has_other": node.has_other,
-        }
-
-    if isinstance(node, BrPerform):
-        result = {"type": "perform", "perf_type": node.perf_type}
-        for attr in ("condition", "target", "thru", "times",
-                      "varying_var", "varying_from", "varying_by"):
-            val = getattr(node, attr, None)
-            if val is not None:
-                result[attr] = val
-        if node.body_seq:
-            bs = _serialize_tree_for_llm(node.body_seq)
-            if bs:
-                result["body_seq"] = bs
-        return result
-
-    # Assign / CallNode / ExitNode / GoTo — 不影响路径生成，可省略
-    return None
-
-
-def _extract_88_mapping(fields):
-    mapping = {}
-    for f in fields:
-        if f.get('is_88'):
-            mapping[f['name']] = {
-                "parent": f['parent'],
-                "value": f['value'],
-                "pic_info": f.get('pic_info', {}),
-            }
-    return mapping
-
-
-def _build_path_prompt(tree_json, fields, level88_map):
-    system = ("你是 COBOL 测试路径生成专家。"
-              "请为给定的控制流树生成满足 MC/DC 覆盖的测试路径集。"
-              "只输出 JSON，不要多余文字。")
-
-    reduced_fields = []
-    for f in fields:
-        entry = {"name": f["name"], "pic": f.get("pic", "")}
-        pi = f.get("pic_info", {})
-        if pi:
-            entry["pic_info"] = {
-                "type": pi.get("type"), "digits": pi.get("digits"),
-                "decimal": pi.get("decimal"), "length": pi.get("length"),
-            }
-        if f.get("is_88"):
-            entry["is_88"] = True
-            entry["value"] = f.get("value")
-            entry["parent"] = f.get("parent")
-        reduced_fields.append(entry)
-
-    user = (
-        "## 控制流树（JSON）\n\n"
-        f"```json\n{json.dumps(tree_json, ensure_ascii=False, indent=2)}\n```\n\n"
-        "## 字段定义\n\n"
-        f"```json\n{json.dumps(reduced_fields, ensure_ascii=False, indent=2)}\n```\n\n"
-        "## 要求\n"
-        "1. 每个 IF/EVALUATE/PERFORM UNTIL 的每个分支至少被覆盖一次\n"
-        "2. 复合条件（AND/OR/NOT）需要满足 MC/DC：每个叶条件的独立影响对\n"
-        "3. 路径数尽量少（最小集优先）\n"
-        "4. 88-level 条件名要展开为实际字段比较（如 CUST-VIP → WS-CUST-LEVEL='V'）\n"
-        "5. 同一路径中的约束不能自相矛盾（同一字段不能同时等于 'A' 和等于 'B'）\n"
-        "6. 数值边界值合理（>5000 → 5001, <100 → 99）\n"
-        "7. AND 优先级高于 OR\n\n"
-        "## 输出格式\n\n"
-        "```json\n"
-        "{\n"
-        '  "paths": [\n'
-        "    {\n"
-        '      "constraints": [\n'
-        '        {"field": "WS-AMOUNT", "op": ">", "value": "5000", "want_true": true}\n'
-        "      ],\n"
-        '      "assignments": {}\n'
-        "    }\n"
-        "  ]\n"
-        "}\n"
-        "```"
-    )
-
-    return [
-        {"role": "system", "content": system},
-        {"role": "user", "content": user},
-    ]
-
-
-def _parse_llm_paths(paths_data):
-    result = []
-    for p in paths_data:
-        constraints = []
-        for c in p.get("constraints", []):
-            constraints.append((c["field"], c["op"], str(c["value"]), c["want_true"]))
-        assignments = p.get("assignments", {})
-        result.append((constraints, assignments))
-    return result
-
-
-def resolve_constraints_ai(paths, fields=None, assignments=None):
-    """AI版约束推理（未来实现）"""
-    raise NotImplementedError("AI agent not yet implemented")
-
-
-def enhance_metadata_ai(records, fields=None, spec_doc: str = ""):
-    """AI版测试用例元数据生成（未来实现）"""
-    raise NotImplementedError("AI agent not yet implemented")
-
-
-def analyze_spec_ai(spec_doc: str = ""):
-    """AI版式样书解析（未来实现）"""
-    raise NotImplementedError("AI agent not yet implemented")
@@ -3,7 +3,7 @@
 import re
 import logging
 from datetime import datetime
-from .models import BrSeq, BrIf, BrEval, BrPerform, BrSeq, CondLeaf, CondNot, ParseError, Assign, CallNode, ExitNode, GoTo
+from .models import BrSeq, BrIf, BrEval, BrPerform, BrSearch, BrSeq, CondLeaf, CondNot, ParseError, Assign, CallNode, ExitNode, GoTo
 from .cond import parse_compound_condition, parse_single_condition, collect_leaves

 logger = logging.getLogger(__name__)
@@ -12,6 +12,7 @@ logger = logging.getLogger(__name__)
 _COBOL_SCOPE_ENDERS = {
    'END-IF', 'END-EVALUATE', 'END-PERFORM', 'END-EXEC', 'END-CALL',
    'END-READ', 'END-WRITE', 'END-DELETE', 'END-REWRITE', 'END-START',
+    'END-SEARCH',
    'ELSE', 'WHEN', 'OTHER',
 }

@@ -22,22 +23,26 @@ def scan_paragraphs(raw_lines):
    while i < len(raw_lines):
        line = raw_lines[i].strip()
        m = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', line)
+        sec_m = re.match(r'^([A-Z][A-Z0-9-]*)\s+SECTION\.?\s*$', line, re.IGNORECASE)
        if m and m.group(1) not in _COBOL_SCOPE_ENDERS:
            name = m.group(1)
-            start = i + 1
-            j = i + 1
-            while j < len(raw_lines):
-                nline = raw_lines[j].strip()
-                nm = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', nline)
-                if nm and nm.group(1) not in _COBOL_SCOPE_ENDERS:
-                    break
-                if re.match(r'^[A-Z][A-Z0-9-]*\s+SECTION\.\s*$', nline, re.IGNORECASE):
-                    break
-                j += 1
-            paragraphs[name] = (start, j - 1)
-            i = j
+        elif sec_m:
+            name = sec_m.group(1).upper()
        else:
            i += 1
+            continue
+        start = i + 1
+        j = i + 1
+        while j < len(raw_lines):
+            nline = raw_lines[j].strip()
+            nm = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', nline)
+            if nm and nm.group(1) not in _COBOL_SCOPE_ENDERS:
+                break
+            if re.match(r'^[A-Z][A-Z0-9-]*\s+SECTION\.\s*$', nline, re.IGNORECASE):
+                break
+            j += 1
+        paragraphs[name] = (start, j - 1)
+        i = j
    return paragraphs


@@ -160,6 +165,10 @@ class _BrParser:
                if perf_node:
                    seq.add(perf_node)
                continue
+            m_search = re.match(r'^SEARCH\b(?:\s+(ALL))?\s+(\w[\w-]*)(?:\s+VARYING\s+(\w[\w-]*))?', line, re.IGNORECASE)
+            if m_search:
+                seq.add(self._parse_search(m_search))
+                continue
            m = re.match(r'^INITIALIZE\s+', line)
            if m:
                init_seq = self._parse_initialize()
@@ -229,6 +238,15 @@ class _BrParser:
            if m_set:
                seq.add(self._parse_set_true(m_set.group(1)))
                continue
+            m_insp = re.match(r'^INSPECT\s+', line, re.IGNORECASE)
+            if m_insp:
+                info = self._parse_inspect(line)
+                if info:
+                    tgt = info.get('tgt', '')
+                    self.assignments.setdefault(tgt, []).append(info)
+                    seq.add(Assign(tgt, info))
+                self.advance()
+                continue
            assign_node = self._record_assignment(line)
            if assign_node:
                seq.add(assign_node)
@@ -243,6 +261,81 @@ class _BrParser:
                return True
        return False

+    # ── INSPECT ──
+
+    _PIC_FIG_CONV = {'ZERO': '0', 'ZEROS': '0', 'ZEROES': '0',
+                     'SPACE': ' ', 'SPACES': ' '}
+
+    @staticmethod
+    def _expand_figurative(val):
+        if val.upper() in _BrParser._PIC_FIG_CONV:
+            return _BrParser._PIC_FIG_CONV[val.upper()]
+        return val
+
+    def _parse_inspect_phrase(self, phrase):
+        m = re.match(
+            r'TALLYING\s+(\w[\w-]*)\s+FOR\s+'
+            r'(LEADING|TRAILING|CHARACTERS)'
+            r'(?:\s+([\'"])(.*?)\3)?'
+            r'(?:\s+(BEFORE|AFTER)\s+INITIAL\s+([\'"])(.*?)\6)?\s*$',
+            phrase, re.IGNORECASE
+        )
+        if m:
+            return ('tally', {
+                'count_var': m.group(1).upper(),
+                'kind': m.group(2).upper(),
+                'char': self._expand_figurative(m.group(4) or ''),
+                'before_after': (m.group(5) or '').upper(),
+                'delimiter': self._expand_figurative(m.group(7) or ''),
+            })
+        m = re.match(
+            r'REPLACING\s+'
+            r'(ALL|LEADING|FIRST|CHARACTERS)\s+'
+            r'([\'"])(.*?)\2\s+BY\s+'
+            r'([\'"])(.*?)\4'
+            r'(?:\s+(BEFORE|AFTER)\s+INITIAL\s+([\'"])(.*?)\7)?\s*$',
+            phrase, re.IGNORECASE
+        )
+        if m:
+            return ('replace', {
+                'kind': m.group(1).upper(),
+                'src': self._expand_figurative(m.group(3)),
+                'dst': self._expand_figurative(m.group(5)),
+                'before_after': (m.group(6) or '').upper(),
+                'delimiter': self._expand_figurative(m.group(8) or ''),
+            })
+        m = re.match(
+            r'CONVERTING\s+([\'"])(.*?)\1\s+TO\s+([\'"])(.*?)\3\s*$',
+            phrase, re.IGNORECASE
+        )
+        if m:
+            return ('convert', {
+                'from_chars': self._expand_figurative(m.group(2)),
+                'to_chars': self._expand_figurative(m.group(4)),
+            })
+        return None
+
+    def _parse_inspect(self, line):
+        m = re.match(r'^INSPECT\s+(\w[\w-]*)\s+(.+)$', line, re.IGNORECASE)
+        if not m:
+            return None
+        tgt = m.group(1).upper()
+        rest = m.group(2).strip()
+        phrases = re.split(r'\s+(?=(?:TALLYING|REPLACING|CONVERTING)\b)', rest, flags=re.IGNORECASE)
+        sub_ops = []
+        for phrase in phrases:
+            sub = self._parse_inspect_phrase(phrase.strip())
+            if sub:
+                sub_ops.append(sub)
+        if not sub_ops:
+            return None
+        return {
+            'type': 'inspect',
+            'tgt': tgt,
+            'source_vars': [tgt],
+            'sub_ops': sub_ops,
+        }
+
    def _record_assignment(self, line):
        if self.assignments is None:
            return None
@@ -503,6 +596,44 @@ class _BrParser:
        vars_in = re.findall(r'[A-Z][A-Z0-9-]*', expr.upper())
        return {'type': 'compute', 'source_vars': list(set(vars_in)), 'op': None, 'const': None, 'expr': expr}

+    # ── SEARCH / SEARCH ALL ──
+
+    def _parse_search(self, m):
+        is_all = bool(m.group(1))
+        table = m.group(2).upper()
+        varying = m.group(3).upper() if m.group(3) else None
+        node = BrSearch(table, is_all=is_all, varying=varying)
+        self.advance()
+        while self.pos < len(self.lines):
+            line = self.clean()
+            if line in ('END-SEARCH', 'END-SEARCH.'):
+                self.advance()
+                return node
+            m_at = re.match(r'^AT\s+END(.+)?$', line, re.IGNORECASE)
+            if m_at:
+                self.advance()
+                rest = m_at.group(1)
+                if rest and rest.strip():
+                    self.lines.insert(self.pos, rest.strip())
+                node.at_end_seq = self.parse_seq(
+                    end_check=lambda l: re.match(r'^WHEN\b', l) or l in ('END-SEARCH',)
+                )
+                node.has_at_end = True
+                continue
+            m_when = re.match(r'^WHEN\s+(.+?)\s*$', line, re.IGNORECASE)
+            if m_when:
+                cond_upper = m_when.group(1).strip()
+                self.advance()
+                cond_tree = parse_compound_condition(cond_upper, self.fields)
+                body_seq = self.parse_seq(
+                    end_check=lambda l: re.match(r'^(WHEN|AT\s+END)\b', l) or l in ('END-SEARCH',)
+                )
+                node.when_list.append((cond_upper, body_seq))
+                node.cond_trees.append(cond_tree)
+                continue
+            self.advance()
+        return node
+
    def _parse_if(self):
        line = self.clean()
        m = re.match(r'^IF\s+(.+?)(?:THEN)?\s*$', line)
@@ -1039,6 +1170,18 @@ def _resolve_subscript(key, rec):
    return key


+def _apply_before_after(val, before_after, delimiter):
+    if not delimiter:
+        return val
+    if before_after == 'BEFORE':
+        idx = val.find(delimiter)
+        return val[:idx] if idx >= 0 else val
+    if before_after == 'AFTER':
+        idx = val.find(delimiter)
+        return val[idx + len(delimiter):] if idx >= 0 else ''
+    return val
+
+
 def propagate_assignments(rec, assignments, fields, file_sec=None):
    def raw_to_float(val, pi):
        if pi.get('type') == 'numeric':
@@ -1233,6 +1376,47 @@ def propagate_assignments(rec, assignments, fields, file_sec=None):
                    if all_found:
                        rec[resolved_tgt] = float_to_raw(total, pi_tgt)

+        # Pass 4.5: INSPECT
+        for tgt, asgn in flat_list:
+            if asgn['type'] != 'inspect':
+                continue
+            resolved_tgt = _resolve_subscript(tgt, rec)
+            if resolved_tgt not in rec:
+                continue
+            src_val = str(rec[resolved_tgt])
+            for op_type, params in asgn.get('sub_ops', []):
+                if op_type == 'tally':
+                    cv = params['count_var'].upper()
+                    cv_pi = pi_map.get(cv, {})
+                    effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
+                    cnt = 0
+                    if params['kind'] == 'LEADING':
+                        cnt = len(effective) - len(effective.lstrip(params['char']))
+                    elif params['kind'] == 'TRAILING':
+                        cnt = len(effective) - len(effective.rstrip(params['char']))
+                    else:
+                        cnt = len(effective)
+                    if cv_pi.get('type') == 'numeric':
+                        rec[cv] = float_to_raw(float(cnt), cv_pi)
+                elif op_type == 'replace':
+                    effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
+                    if params['kind'] == 'ALL':
+                        new_val = effective.replace(params['src'], params['dst'])
+                    elif params['kind'] == 'LEADING':
+                        new_val = effective
+                        while new_val.startswith(params['src']):
+                            new_val = new_val[len(params['src']):]
+                        new_val = effective.replace(params['src'], params['dst'], 1)
+                    elif params['kind'] == 'FIRST':
+                        new_val = effective.replace(params['src'], params['dst'], 1)
+                    else:
+                        new_val = params['dst'] * len(effective)
+                    rec[resolved_tgt] = new_val
+                elif op_type == 'convert':
+                    effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
+                    table = str.maketrans(params['from_chars'], params['to_chars'])
+                    rec[resolved_tgt] = effective.translate(table)
+
        # Pass 5: STRING / UNSTRING
        for tgt, asgn in flat_list:
            if asgn['type'] == 'string_concat':
@@ -6,7 +6,7 @@ from dataclasses import dataclass, field
 from pathlib import Path

 logger = logging.getLogger(__name__)
-from .models import BrSeq, BrIf, BrEval, BrPerform, CondLeaf
+from .models import BrSeq, BrIf, BrEval, BrPerform, BrSearch, CondLeaf
 from .cond import parse_single_condition, parse_compound_condition, is_field, collect_leaves, evaluate_tree


@@ -83,6 +83,26 @@ def collect_decision_points(node, fields, counter=None):
        p, l = _walk_collect(node.other_seq, fields, counter)
        points.extend(p); all_leaves.extend(l)

+    elif isinstance(node, BrSearch):
+        counter[0] += 1
+        branch_names = []
+        for cond_text, seq in node.when_list:
+            branch_names.append(f'WHEN {cond_text[:40]}')
+        if node.has_at_end:
+            branch_names.append('AT END')
+        dp = DecisionPoint(id=counter[0], kind='SEARCH',
+                           label=node.table_name, branch_names=branch_names)
+        dp.when_list = node.when_list
+        dp.cond_trees = node.cond_trees
+        dp.has_other = node.has_at_end
+        points.append(dp)
+        for cond_text, seq in node.when_list:
+            p, l = _walk_collect(seq, fields, counter)
+            points.extend(p); all_leaves.extend(l)
+        if node.has_at_end:
+            p, l = _walk_collect(node.at_end_seq, fields, counter)
+            points.extend(p); all_leaves.extend(l)
+
    elif isinstance(node, BrPerform):
        if node.perf_type in ('until', 'para_until', 'varying', 'para_varying'):
            counter[0] += 1
@@ -92,6 +112,13 @@ def collect_decision_points(node, fields, counter=None):
            simple = parse_single_condition(node.condition) if node.condition else None
            if simple and is_field(simple[0], fields):
                dp.parsed = simple
+            elif node.condition:
+                cond_tree = parse_compound_condition(node.condition, fields)
+                if cond_tree:
+                    leaves = collect_leaves(cond_tree)
+                    if leaves:
+                        dp.cond_tree = cond_tree
+                        dp.cond_leaves = list(leaves)
            points.append(dp)
        p, l = _walk_collect(node.body_seq, fields, counter)
        points.extend(p); all_leaves.extend(l)
@@ -116,9 +143,11 @@ def mark_coverage(decision_points, leaf_stats, branch_paths, fields):
            if dp.kind == 'IF':
                _mark_if(dp, cons)
            elif dp.kind == 'EVALUATE':
-                _mark_eval(dp, cons)
+                _mark_eval(dp, cons, fields)
            elif dp.kind == 'PERFORM':
                _mark_perform(dp, cons)
+            elif dp.kind == 'SEARCH':
+                _mark_search(dp, cons, fields)
        for leaf in leaf_stats:
            for c in cons:
                if _match_leaf(c, leaf):
@@ -128,7 +157,7 @@ def mark_coverage(decision_points, leaf_stats, branch_paths, fields):
                        leaf.covered_false = True

    for dp in decision_points:
-        _infer_implied(dp)
+        dp.implied_branches = set(dp.active_branches)


 def _match_constraint(c, parsed):
@@ -180,18 +209,20 @@ def _mark_if(dp, cons):
                        dp.active_branches.add('T' if c[3] else 'F')


-def _mark_eval(dp, cons):
+def _mark_eval(dp, cons, fields=None):
    if dp.label == 'TRUE':
+        matched = False
        for when_val, _ in dp.when_list:
-            parsed = parse_single_condition(when_val)
+            parsed = parse_single_condition(when_val, fields)
            if parsed:
                for c in cons:
-                    if _match_constraint(c, parsed):
+                    if _match_constraint(c, parsed) and c[3]:
                        name = f"WHEN {when_val}"
                        if name in dp.branch_names:
                            dp.active_branches.add(name)
+                            matched = True
            else:
-                cond_tree = parse_compound_condition(when_val)
+                cond_tree = parse_compound_condition(when_val, fields)
                if cond_tree and not isinstance(cond_tree, CondLeaf):
                    leaves = list(collect_leaves(cond_tree))
                    assignment = {}
@@ -205,6 +236,15 @@ def _mark_eval(dp, cons):
                            name = f"WHEN {when_val}"
                            if name in dp.branch_names:
                                dp.active_branches.add(name)
+                                matched = True
+        if not matched and 'OTHER' in dp.branch_names:
+            when_fields = set()
+            for when_val, _ in dp.when_list:
+                for c in cons:
+                    if c[0] in when_val:
+                        when_fields.add(c[0])
+            if when_fields:
+                dp.active_branches.add('OTHER')
        return
    for c in cons:
        if c[0] == dp.label and c[1] == '=':
@@ -215,6 +255,44 @@ def _mark_eval(dp, cons):
            dp.active_branches.add('OTHER')


+def _mark_search(dp, cons, fields=None):
+    branch_masks = [False] * len(dp.branch_names)
+    for i, (cond_text, body_seq) in enumerate(dp.when_list):
+        cond_tree = dp.cond_trees[i] if i < len(dp.cond_trees) else None
+        if not cond_tree:
+            continue
+        if isinstance(cond_tree, CondLeaf):
+            for c in cons:
+                if len(c) == 4:
+                    base_c = re.sub(r'\s*\(.*?\)\s*$', '', c[0])
+                    base_cond = re.sub(r'\s*\(.*?\)\s*$', '', cond_tree.field)
+                    if base_c == base_cond and c[1] == cond_tree.op \
+                       and str(c[2]) == str(cond_tree.value) and c[3]:
+                        branch_masks[i] = True
+                        break
+        else:
+            leaves = list(collect_leaves(cond_tree))
+            assignment = {}
+            for leaf in leaves:
+                for c in cons:
+                    if len(c) == 4:
+                        base_c = re.sub(r'\s*\(.*?\)\s*$', '', c[0])
+                        base_l = re.sub(r'\s*\(.*?\)\s*$', '', leaf.field)
+                        if base_c == base_l and c[1] == leaf.op and str(c[2]) == str(leaf.value):
+                            assignment[leaf] = c[3]
+                            break
+            if len(assignment) == len(leaves):
+                if evaluate_tree(cond_tree, assignment):
+                    branch_masks[i] = True
+    if dp.has_other:
+        at_end_idx = len(dp.branch_names) - 1
+        if not any(branch_masks[:at_end_idx]):
+            branch_masks[at_end_idx] = True
+    for i, m in enumerate(branch_masks):
+        if m:
+            dp.active_branches.add(dp.branch_names[i])
+
+
 def _mark_perform(dp, cons):
    simple = getattr(dp, 'parsed', None)
    if simple:
@@ -224,6 +302,18 @@ def _mark_perform(dp, cons):
                    dp.active_branches.add('Skip')
                else:
                    dp.active_branches.add('Enter')
+    elif dp.cond_tree and dp.cond_leaves:
+        assignment = {}
+        for leaf in dp.cond_leaves:
+            for c in cons:
+                if _match_leaf(c, leaf):
+                    assignment[leaf] = c[3]
+                    break
+        if len(assignment) == len(dp.cond_leaves):
+            if evaluate_tree(dp.cond_tree, assignment):
+                dp.active_branches.add('Skip')
+            else:
+                dp.active_branches.add('Enter')
    else:
        for c in cons:
            if c[0] == dp.label or any(c[0] == f for f in _get_fields_in_cond(dp.label)):
@@ -237,10 +327,6 @@ def _get_fields_in_cond(cond_text):
    return re.findall(r'[A-Z][A-Z0-9-]*', cond_text.upper())


-def _infer_implied(dp):
-    dp.implied_branches.update(dp.active_branches)
-
-
 # ── 行号定位（基于原始源文本）──

 def locate_decision_lines(decision_points, raw_source):
@@ -1,20 +1,15 @@
 """设计层：路径枚举 + 值生成 + 约束应用"""

-import os
 import re
 import logging
-from . import agents, CONFIG
-from .models import BrSeq, BrIf, BrEval, BrPerform, Assign, CallNode, CondNot, CondLeaf, ExitNode, GoTo
+from .models import BrSeq, BrIf, BrEval, BrPerform, BrSearch, Assign, CallNode, CondNot, CondLeaf, ExitNode, GoTo
 from .cond import parse_single_condition, parse_compound_condition, is_field, collect_leaves, mcdc_sets, satisfying_value
 from .core import trace_to_root, invert_through_chain, propagate_assignments, _basename

 logger = logging.getLogger(__name__)

 _STOP = ('__STOP__', '', None, True)
-_MAX_PATHS = 5000
-_FALLBACK_MAX_PATHS = 100
-_ACTIVE_MAX_PATHS = _MAX_PATHS
-_LLM_FAILED = False
+_MAX_PATHS = 10000


 def _filter_stop(cons):
@@ -22,46 +17,51 @@ def _filter_stop(cons):


 def _cap_paths(paths):
-    if len(paths) > _ACTIVE_MAX_PATHS:
-        return paths[:_ACTIVE_MAX_PATHS]
+    if len(paths) > _MAX_PATHS:
+        return paths[:_MAX_PATHS]
    return paths


+def _cap_paths_fair(new_active, child_paths):
+    """两阶段公平截断：每个前置路径至少保留一条子路径，再填充剩余配额。"""
+    if len(new_active) <= _MAX_PATHS:
+        return new_active
+    k = len(child_paths)
+    if k <= 1:
+        return new_active[:_MAX_PATHS]
+    # 分离 STOP 路径（不参与组合，直接保留）
+    stop_paths = [(p, a) for p, a in new_active if any(c is _STOP for c in p)]
+    combined = [(p, a) for p, a in new_active if not any(c is _STOP for c in p)]
+    n_pred = len(combined) // k
+    result = list(stop_paths)
+    if n_pred <= 1:
+        result.extend(combined[:_MAX_PATHS - len(result)])
+        return result[:_MAX_PATHS]
+    remaining_quota = _MAX_PATHS - len(result)
+    # Phase 1: 每个前置至少保留一条子路径（轮询分配不同子路径索引）
+    quota = min(n_pred, remaining_quota)
+    selected = set()
+    for p_idx in range(quota):
+        c_idx = p_idx % k
+        idx = p_idx * k + c_idx
+        selected.add(idx)
+        result.append(combined[idx])
+    if len(result) >= _MAX_PATHS:
+        return result[:_MAX_PATHS]
+    # Phase 2: 用剩余配额填充其余组合
+    remaining = _MAX_PATHS - len(result)
+    for idx in range(len(combined)):
+        if idx not in selected:
+            result.append(combined[idx])
+            remaining -= 1
+            if remaining <= 0:
+                break
+    return result[:_MAX_PATHS]
+
+
 # ── 路径枚举 ──

-def _try_llm_enum_paths(node, fields):
-    global _LLM_FAILED
-    if _LLM_FAILED:
-        logger.debug("断路器已跳，跳过 LLM")
-        return None
-    if not CONFIG.get("llm_generator", True):
-        logger.debug("llm_generator 已关闭，降级规则引擎")
-        return None
-    if not os.environ.get(agents.DEEPSEEK_API_KEY_ENV):
-        logger.warning("DEEPSEEK_API_KEY 未设置，降级规则引擎")
-        return None
-    try:
-        result = agents.llm_generate_all_paths(node, fields)
-        if result is not None:
-            logger.info(f"LLM 路径生成成功，{len(result)} 条")
-            return result
-        logger.warning("LLM 返回空，降级规则引擎")
-    except Exception as e:
-        logger.error(f"LLM API 调用异常: {e}")
-    _LLM_FAILED = True
-    return None
-
-
 def enum_paths(node, fields):
-    global _ACTIVE_MAX_PATHS
-    # === LLM 优先（整体替换整个树的路径生成） ===
-    llm_result = _try_llm_enum_paths(node, fields)
-    if llm_result is not None:
-        _ACTIVE_MAX_PATHS = _MAX_PATHS
-        return llm_result
-    if _ACTIVE_MAX_PATHS == _MAX_PATHS:
-        logger.warning("降级到规则引擎（路径上限 5000 → 100）")
-        _ACTIVE_MAX_PATHS = _FALLBACK_MAX_PATHS
    """枚举路径，每条路径返回 (constraints, assignments).
    返回 list[tuple[list[tuple], dict]].
    """
@@ -86,7 +86,7 @@ def enum_paths(node, fields):
                            merged.setdefault(k, []).extend(v if isinstance(v, list) else [v])
                    merged_cons = p_cons + list(cp_cons)
                    new_active.append((merged_cons, merged))
-            paths = _cap_paths(new_active)
+            paths = _cap_paths_fair(new_active, child_paths)
        return paths

    elif isinstance(node, BrIf):
@@ -170,49 +170,56 @@ def enum_paths(node, fields):
            return paths
        if node.subject == 'TRUE':
            paths = []
-            prior_false = []
+            prior_false_sets = []  # list[list[Constraint]]
            for value, seq in node.when_list:
                cond = parse_compound_condition(value, fields)
                if cond and isinstance(cond, CondLeaf) and is_field(cond.field, fields):
-                    # Simple condition
                    sub = _cap_paths(enum_paths(seq, fields))
                    for sp_cons, sp_assign in (sub or [([], {})]):
-                        constraints = list(prior_false)
+                        constraints = [c for pf in prior_false_sets for c in pf]
                        constraints.append((cond.field, cond.op, cond.value, True))
                        paths.append((constraints + sp_cons, sp_assign))
-                    prior_false.append((cond.field, cond.op, cond.value, False))
+                    prior_false_sets.append([(cond.field, cond.op, cond.value, False)])
                elif cond:
-                    # Compound condition — use MC/DC for path generation
                    leaves = collect_leaves(cond)
                    if leaves and all(is_field(l.field, fields) for l in leaves):
                        sets = mcdc_sets(cond, fields)
                        if sets:
                            sub = _cap_paths(enum_paths(seq, fields))
-                            false_set = None
+                            new_false_sets = []
                            for cs, decision in sets:
                                if decision:
-                                    for sp_cons, sp_assign in (sub or [([], {})]):
-                                        paths.append((list(prior_false) + list(cs) + sp_cons, sp_assign))
-                                elif false_set is None:
-                                    false_set = cs
-                            if false_set is not None:
-                                prior_false.extend(false_set)
-                            else:
-                                prior_false = []
+                                    if not prior_false_sets:
+                                        for sp_cons, sp_assign in (sub or [([], {})]):
+                                            paths.append((list(cs) + sp_cons, sp_assign))
+                                    else:
+                                        for pf_set in prior_false_sets:
+                                            for sp_cons, sp_assign in (sub or [([], {})]):
+                                                paths.append((list(pf_set) + list(cs) + sp_cons, sp_assign))
+                                else:
+                                    new_false_sets.append(cs)
+                            if not new_false_sets:
+                                prior_false_sets = []
                                break
+                            combined = []
+                            for pf_set in prior_false_sets:
+                                for nf_set in new_false_sets:
+                                    combined.append(list(pf_set) + list(nf_set))
+                            prior_false_sets = combined
                        else:
-                            prior_false = []
+                            prior_false_sets = []
                            break
                    else:
-                        prior_false = []
+                        prior_false_sets = []
                        break
                else:
-                    prior_false = []
+                    prior_false_sets = []
                    break
            if node.has_other:
                sub = _cap_paths(enum_paths(node.other_seq, fields))
                for sp_cons, sp_assign in (sub or [([], {})]):
-                    paths.append((list(prior_false) + sp_cons, sp_assign))
+                    constraints = [c for pf in prior_false_sets for c in pf]
+                    paths.append((constraints + sp_cons, sp_assign))
            return paths
        if not is_field(node.subject, fields):
            return [([], {})]
@@ -228,6 +235,9 @@ def enum_paths(node, fields):
                paths.append(([(node.subject, 'not_in', case_vals, True)] + sp_cons, sp_assign))
        return paths

+    elif isinstance(node, BrSearch):
+        return _enum_search_paths(node, fields)
+
    elif isinstance(node, BrPerform):
        if node.perf_type in ('para', 'thru'):
            if node.body_seq:
@@ -743,12 +753,90 @@ def apply_occurs_depending(rec, fields):
            rec[name] = '0' * length


+def _non_match_for(cond_leaf, fields):
+    if not fields or not cond_leaf:
+        return None
+    base = re.sub(r'\s*\(.*?\)\s*$', '', cond_leaf.field)
+    for f in fields:
+        if re.sub(r'\s*\(.*?\)\s*$', '', f['name']) == base:
+            pic = f.get('pic_info', {})
+            if pic.get('type') == 'numeric':
+                return '0'
+            return ' '
+    return None
+
+
+def _enum_search_paths(node, fields):
+    # 从条件字段名推断 OCCURS 数；如 WS-CODE-VAL(WS-IDX) → 查 WS-CODE-VAL(j) 最大 j
+    occurs_count = 1
+    if node.when_list and node.cond_trees and node.cond_trees[0]:
+        ct = node.cond_trees[0]
+        if isinstance(ct, CondLeaf):
+            base = re.sub(r'\s*\(.*?\)\s*$', '', ct.field)
+            for f in fields:
+                m = re.match(rf'^{re.escape(base)}\((\d+)\)$', f['name'])
+                if m:
+                    occurs_count = max(occurs_count, int(m.group(1)))
+            if occurs_count <= 1:
+                # 再查父组名下各字段的后缀
+                parent = node.table_name
+                for f in fields:
+                    m = re.match(rf'^{re.escape(parent)}\((\d+)\)$', f['name'])
+                    if m:
+                        occurs_count = max(occurs_count, int(m.group(1)))
+
+    paths = []
+    for i, (cond_text, body_seq) in enumerate(node.when_list):
+        cond_tree = node.cond_trees[i] if i < len(node.cond_trees) else None
+        sub = _cap_paths(enum_paths(body_seq, fields))
+        if not sub:
+            sub = [([], {})]
+
+        extra_assign = {}
+        if cond_tree and isinstance(cond_tree, CondLeaf):
+            base = re.sub(r'\s*\(.*?\)\s*$', '', cond_tree.field)
+            matching_val = cond_tree.value
+            elem_key = f'{base}({i + 1})'
+            extra_assign[elem_key] = [{'type': 'move_literal', 'literal': matching_val}]
+            non_match = _non_match_for(cond_tree, fields) or ' '
+            for j in range(i):
+                prev_key = f'{base}({j + 1})'
+                extra_assign[prev_key] = [{'type': 'move_literal', 'literal': non_match}]
+
+        for sp_cons, sp_assign in (sub or [([], {})]):
+            merged_assign = dict(extra_assign)
+            for k, v in sp_assign.items():
+                merged_assign.setdefault(k, []).extend(v if isinstance(v, list) else [v])
+            paths.append((sp_cons, merged_assign))
+
+    if node.has_at_end:
+        sub = _cap_paths(enum_paths(node.at_end_seq, fields))
+        for sp_cons, sp_assign in (sub or [([], {})]):
+            extra_assign = {}
+            non_match = ' '
+            if node.when_list:
+                ct = node.cond_trees[0]
+                if ct and isinstance(ct, CondLeaf):
+                    non_match = _non_match_for(ct, fields) or ' '
+                    base = re.sub(r'\s*\(.*?\)\s*$', '', ct.field)
+                    for j in range(max(occurs_count, 1)):
+                        extra_assign[f'{base}({j + 1})'] = [{'type': 'move_literal', 'literal': non_match}]
+            merged_assign = dict(extra_assign)
+            for k, v in sp_assign.items():
+                merged_assign.setdefault(k, []).extend(v if isinstance(v, list) else [v])
+            paths.append((sp_cons, merged_assign))
+
+    return paths
+
+
 def generate_records(branch_paths_with_assigns, data_fields, base_assignments=None, file_sec=None):
    """生成测试数据记录。
    branch_paths_with_assigns: list of (constraints, path_assignments).
    base_assignments: 全局 assignments dict (用于 trace_to_root).
+    返回: (records, kept_path_cons) — kept_path_cons 是与 records 一一对应的约束。
    """
    records = []
+    kept_path_cons = []
    if branch_paths_with_assigns:
        for seq, (path_cons, path_assign) in enumerate(branch_paths_with_assigns, start=1):
            path_cons = _filter_stop(path_cons)
@@ -756,20 +844,51 @@ def generate_records(branch_paths_with_assigns, data_fields, base_assignments=No
            # Pass A: 先传播赋值（MOVE/COMPUTE/READ INTO 等），模拟到决策点前的程序状态
            if isinstance(path_assign, dict):
                propagate_assignments(rec, path_assign, data_fields, file_sec=file_sec)
+            # Pass A.5: 检查约束是否经过链追溯到字面量截断（不可能路径）
+            skip_impossible = False
+            if base_assignments and isinstance(path_assign, dict):
+                for c in path_cons:
+                    if len(c) == 4 and not skip_impossible:
+                        field, op, val, want = c
+                        root_var, chain = trace_to_root(field, base_assignments, data_fields, path_assign)
+                        if root_var != field:
+                            new_fn, new_op, new_val = invert_through_chain(root_var, chain, op, val)
+                            if any(f['name'] == new_fn for f in data_fields):
+                                asgn_val = path_assign.get(root_var)
+                                if asgn_val is not None:
+                                    asgn_list = asgn_val if isinstance(asgn_val, list) else [asgn_val]
+                                    if asgn_list and asgn_list[-1]['type'] == 'move_literal' and root_var in rec:
+                                        if not _check_constraint_satisfied(rec, root_var, new_op, new_val, want, data_fields):
+                                            skip_impossible = True
+                                            break
+            if skip_impossible:
+                continue
            # Pass B: 约束覆盖（确保决策条件满足，覆盖 MOVE 带来的值）
            for c in path_cons:
                if len(c) == 4:
                    field, op, val, want = c
                    apply_constraint(rec, field, op, val, want, data_fields, base_assignments, path_assign)
+            # Pass B.5: 前向再传播变量间MOVE，保持约束修改后的链一致性
+            if isinstance(path_assign, dict):
+                forward = {}
+                for tgt, asgn_val in path_assign.items():
+                    asgn_list = asgn_val if isinstance(asgn_val, list) else [asgn_val]
+                    filtered = [a for a in asgn_list if a['type'] == 'move' and a.get('source_vars')]
+                    if filtered:
+                        forward[tgt] = filtered
+                if forward:
+                    propagate_assignments(rec, forward, data_fields, file_sec=file_sec)
            # Pass C: 同步 REDEFINES（确保共享存储一致）
            sync_redefined_fields(rec, data_fields)
            # Pass D: OCCURS DEPENDING ON — 清零超范围的下标字段
            apply_occurs_depending(rec, data_fields)

            records.append(rec)
+            kept_path_cons.append(path_cons)
    if not records:
        rec = make_base_record(1, data_fields)
        if base_assignments:
            propagate_assignments(rec, base_assignments, data_fields, file_sec=file_sec)
        records.append(rec)
-    return records
+        kept_path_cons.append([])
+    return records, kept_path_cons
@@ -115,6 +115,18 @@ class CondOr:
        self.right = right


+class BrSearch:
+    """SEARCH / SEARCH ALL 表查找"""
+    def __init__(self, table_name, is_all=False, varying=None):
+        self.table_name = table_name
+        self.is_all = is_all
+        self.varying = varying.upper() if varying else None
+        self.at_end_seq = BrSeq()
+        self.when_list = []       # [(condition_text, BrSeq)]
+        self.cond_trees = []      # [cond_tree, ...]
+        self.has_at_end = False
+
+
 class GoTo:
    """GO TO 节点：无条件跳转到指定段落"""
    def __init__(self, target: str, body_seq: 'BrSeq' = None):
@@ -1,596 +0,0 @@
-你是一个 COBOL 自动化测试数据生成器的核心解析模块。你的任务是将预处理的 COBOL PROCEDURE DIVISION 源码转换为结构化的 JSON 树，用于后续的路径枚举和测试数据生成。
-
-## 输入格式
-
-你会收到两样东西：
-1. **PROCEDURE DIVISION 源码文本** — 已预处理（大写、无注释、缩进规整）
-2. **DATA DIVISION 字段列表** — JSON 数组，每个字段包括 name/level/pic/pic_info 等
-
-## 输出格式
-
-输出一个 JSON 对象，包含两个顶级键：
-
-### 1. `assignments` (对象)
-记录了 PROCEDURE DIVISION 中每个赋值语句的来源信息。键是目标字段名，值是一个对象，类型如下：
-
- **move**: 变数对变数 MOVE (e.g., `MOVE WS-A TO WS-B`)
-  ```json
-  {"type": "move", "source_vars": ["WS-A"]}
-  ```
- **move_literal**: 字面量/定数 MOVE (e.g., `MOVE 'HELLO' TO WS-B`, `MOVE ZERO TO WS-B`)
-  ```json
-  {"type": "move_literal", "literal": "HELLO"}
-  ```
- **compute**: COMPUTE/ADD/SUBTRACT/MULTIPLY/DIVIDE
-  - 二元运算 (var OP const / const OP var):
-    ```json
-    {"type": "compute", "source_vars": ["WS-A"], "op": "+", "const": 5, "expr": "WS-A + 5"}
-    ```
-  - 变数间运算 (var OP var):
-    ```json
-    {"type": "compute", "source_vars": ["WS-A", "WS-B"], "op": "+", "expr": "WS-A + WS-B"}
-    ```
-  - 复杂表达式 (无法解析):
-    ```json
-    {"type": "compute", "source_vars": ["WS-A", "WS-B"], "op": null, "const": null, "expr": "WS-A * (WS-B + 1)"}
-    ```
-
-### 2. `tree` (对象)
-一个递归的 JSON 树，表示 PROCEDURE DIVISION 的代码结构。不要包含注释、段落标签（仅作为 PERFORM 目标引用）。
-
-#### 节点类型
-
-**seq**: 顺序序列（子节点列表）
-```json
-{"type": "seq", "children": [子节点...]}
-```
-
-**assign**: 赋值语句（MOVE / COMPUTE / ADD / SUBTRACT / MULTIPLY / DIVIDE）
-```json
-{"type": "assign", "target": "WS-STATUS", "source_info": {"type": "move_literal", "literal": "H"}}
-```
-source_info 必须与 assignments 中对应条目一致。
-
-**if**: 条件分支
-```json
-{
-  "type": "if",
-  "condition": "WS-AMOUNT > 1000",
-  "true_seq": {"type": "seq", "children": [...]},
-  "false_seq": {"type": "seq", "children": [...]}
-}
-```
- 如果无 ELSE，false_seq 应为 `{"type": "seq", "children": []}`
- condition 保持原始文本（不加解析）
-
-**eval**: EVALUATE 多路分支
-```json
-{
-  "type": "eval",
-  "subject": "WS-TYPE",
-  "when_list": [
-    {"value": "A", "seq": {"type": "seq", "children": [...]}},
-    {"value": "B", "seq": {"type": "seq", "children": [...]}}
-  ],
-  "other_seq": {"type": "seq", "children": [...]},
-  "has_other": true
-}
-```
- WHEN OTHER 时 has_other=true
- 无 WHEN OTHER 时 has_other=false, other_seq 为空 seq
-
-**call**: CALL 子程序调用
-```json
-{"type": "call", "program_name": "SUBPGM", "using_params": [
-  {"name": "WS-AMOUNT", "mechanism": "reference"},
-  {"name": "WS-RESULT", "mechanism": "reference"}
-]}
-```
- CALL 是顺序执行语句（不产生分支），作为 seq 的子节点放在相应位置
- USING 参数按 COBOL 源码顺序列出
- mechanism 取值：
-  - `"reference"`: BY REFERENCE（默认）— 子程序可能修改该变量
-  - `"content"`: BY CONTENT — 传副本，调用方变量不会被修改
-  - `"value"`: BY VALUE — 传值（仅数值/指针）
-  - 无 BY 子句时默认为 `"reference"`
- 字面量参数（如 `BY VALUE 100`）不包含字段名，只在 mechanism 为 `"value"` 时保留
-
-**perform**: PERFORM 语句
-```json
-// 段落调用：
-{"type": "perform", "perf_type": "para", "target": "1000-INIT"}
-
-// PERFORM THRU:
-{"type": "perform", "perf_type": "thru", "target": "1000-INIT", "thru": "2000-END"}
-
-// 内联 PERFORM UNTIL:
-{"type": "perform", "perf_type": "until", "condition": "WS-COUNT > 3",
- "body_seq": {"type": "seq", "children": [...]}}
-
-// PERFORM VARYING:
-{"type": "perform", "perf_type": "varying", "condition": "WS-I > 10",
- "varying_var": "WS-I", "varying_from": "1", "varying_by": "1",
- "body_seq": {"type": "seq", "children": [...]}}
-
-// PERFORM 段落 + UNTIL:
-{"type": "perform", "perf_type": "para_until", "target": "2000-HIGH", "condition": "WS-COUNT > 100"}
-```
-
-### 定数 (Figurative Constants) 处理规则
-
-以下定数在 MOVE 时直接用作字面量（保留原值）：
-
-| 定数 | 规则 |
-|------|------|
-| ZERO / ZEROS / ZEROES | `literal: "0"` |
-| SPACE / SPACES | `literal: " "` |
-| HIGH-VALUE / HIGH-VALUES | `literal: "HIGH-VALUE"` |
-| LOW-VALUE / LOW-VALUES | `literal: "LOW-VALUE"` |
-| QUOTE / QUOTES | `literal: "'"` |
-| ALL literal | `literal: literal值` |
-
-## COBOL 语法处理规则
-
-### 1. IF 语句
-```
-IF condition
-    statements...
-[ELSE
-    statements...]
-END-IF.
-```
- condition 可以是简单条件、复合条件（AND/OR）、带 NOT 前置
- true_seq 为 condition 为真时执行的分支，false_seq 为条件为假时的分支
- IF 可以和 ELSE IF 嵌套，此时结构化为嵌套 if 的 false_seq
-
-### 2. EVALUATE 语句
-```
-EVALUATE subject
-    WHEN value1
-        statements...
-    WHEN value2
-        statements...
-    WHEN OTHER
-        statements...
-END-EVALUATE.
-```
- subject 是单个字段
- value 是具体值或 OTHER
- 每个 WHEN 的 seq 是该分支下的语句序列
- WHEN 内的 GO TO / STOP RUN 不影响结构
-
-### 3. PERFORM 语句
-
-多种形态：
-
-**段落调用**:
-```
-PERFORM 1000-INIT
-```
-
-**段落范围**:
-```
-PERFORM 1000-INIT THRU 2000-END
-```
-
-**内联 UNTIL**:
-```
-PERFORM UNTIL condition
-    statements...
-END-PERFORM
-```
-
-**VARYING**:
-```
-PERFORM VARYING WS-I FROM 1 BY 1 UNTIL WS-I > 10
-    statements...
-END-PERFORM
-```
-
-**段落 + UNTIL**:
-```
-PERFORM 2000-HIGH UNTIL WS-COUNT > 100
-```
-
-### 4. 段落 (Paragraphs)
-
-PROCEDURE DIVISION 中的段落以标签名（后跟句点）开始、以下一个段落标签或文件末尾结束。
-
-```
-PARA-NAME.
-    statement
-    statement
-    .
-NEXT-PARA.
-    statement
-```
-
-段落标签会被 PERFORM 引用。如果代码不在任何 PERFORM 中执行（顶级流程），段落按顺序依次执行，遇到 STOP RUN / GOBACK 结束。
-
-在树结构中：
- 顶级流程入口（PROCEDURE DIVISION 后的第一个段落）作为树的根 seq
- 后续每个段落对应一个独立的 seq，只有在被 PERFORM 调用时才执行
- 段落标签本身不是节点，只作为 PERFORM 的目标引用
-
-### 5. CALL 语句
-
-CALL 调用子程序，参数通过 USING 传递。
-
-```
-CALL 'SUBPGM' USING WS-A WS-B WS-C
-CALL 'SUBPGM' USING BY REFERENCE WS-A BY CONTENT WS-B BY VALUE 100
-```
-
- CALL 是顺序执行，不产生分支
- USING 参数按 COBOL 源码顺序列出
- 缺省传递机制时默认为 BY REFERENCE
- 字段名参数保持原样，字面量/数值参数如 `BY VALUE 100` 不放入 using_params（因为无字段名）
- CALL 后继续执行下一条语句
-
-### 6. 赋值语句
-
-| COBOL | JSON 类型 | 示例 source_info |
-|-------|-----------|-----------------|
-| MOVE 'HELLO' TO WS-A | move_literal | `{"type":"move_literal","literal":"HELLO"}` |
-| MOVE WS-B TO WS-A | move | `{"type":"move","source_vars":["WS-B"]}` |
-| MOVE ZERO TO WS-A | move_literal | `{"type":"move_literal","literal":"0"}` |
-| MOVE SPACE TO WS-A | move_literal | `{"type":"move_literal","literal":" "}` |
-| MOVE HIGH-VALUE TO WS-A | move_literal | `{"type":"move_literal","literal":"HIGH-VALUE"}` |
-| COMPUTE WS-A = WS-B + 1 | compute (const OP var) | `{"type":"compute","source_vars":["WS-B"],"op":"+","const":1,"expr":"WS-B + 1"}` |
-| COMPUTE WS-A = 2 * WS-B | compute (const OP var) | 同上，op="*" |
-| COMPUTE WS-A = WS-B + WS-C | compute (var OP var) | `{"type":"compute","source_vars":["WS-B","WS-C"],"op":"+","expr":"WS-B + WS-C"}` |
-| COMPUTE WS-A = (WS-B + 1) * WS-C | compute (复杂) | `{"type":"compute","source_vars":["WS-B","WS-C"],"op":null,"const":null,"expr":"(WS-B + 1) * WS-C"}` |
-| ADD 5 TO WS-A | compute (const) | `{"type":"compute","source_vars":["WS-A"],"op":"+","const":5,"expr":"WS-A + 5"}` |
-| SUBTRACT 3 FROM WS-A | compute (const) | `{"type":"compute","source_vars":["WS-A"],"op":"-","const":3,"expr":"WS-A - 3"}` |
-| MULTIPLY 2 BY WS-A | compute (const) | `{"type":"compute","source_vars":["WS-A"],"op":"*","const":2,"expr":"WS-A * 2"}` |
-| DIVIDE 4 INTO WS-A | compute (const) | `{"type":"compute","source_vars":["WS-A"],"op":"/","const":4,"expr":"WS-A / 4"}` |
-
-### 7. 控制流结束
-
-| 语句 | 含义 |
-|------|------|
-| STOP RUN | 程序结束，不执行后续代码 |
-| GOBACK | 返回调用者（类似 STOP RUN） |
-| EXIT PROGRAM | 返回调用者 |
-
-这些语句不是树节点，但标记了当前段落/分支的结束。
-
-### 8. 88-level 条件名
-
-```
-05 CALL-TYPE    PIC X(1).
-   88 CALL-LOCAL    VALUE 'L'.
-   88 CALL-DOMESTIC VALUE 'D'.
-```
-
-在条件中如 `IF CALL-LOCAL`，等价于 `IF CALL-TYPE = 'L'`。条件名可替换为父字段 + 值。
-
-## 输出规则总结
-
-1. **assignments**: 包含所有出现的赋值语句，**不区分分支**（全局收集）
-2. **tree**: 只包含结构化的 if/eval/perform/assign 节点，**不包含段落标签**
-3. 注释行（* 在第7列）已被预处理移除
-4. 每个 assign 节点必须与 assignments 中的条目一一对应
-5. condition 保持原始文本，不要解析或转换
-6. 88-level 条件在 tree.condition 中直接替换为父字段条件（如 `IF CALL-TYPE = 'L'`）
-7. 赋值中的字段名、字面量保持原始值，多单词字段用连字符（如 WS-AMOUNT）
-
-## Few-Shot 示例
-
-### 示例 1：简单 IF/ELSE
-**输入:**
-```
-PROCEDURE DIVISION.
-    IF WS-AMOUNT > 1000
-        MOVE 'H' TO WS-STATUS
-    ELSE
-        MOVE 'L' TO WS-STATUS
-    END-IF.
-    STOP RUN.
-```
-
-**输出:**
-```json
-{
-  "assignments": {
-    "WS-STATUS": {"type": "move_literal", "literal": "H"},
-    "WS-STATUS": {"type": "move_literal", "literal": "L"}
-  },
-  "tree": {
-    "type": "seq",
-    "children": [
-      {
-        "type": "if",
-        "condition": "WS-AMOUNT > 1000",
-        "true_seq": {
-          "type": "seq",
-          "children": [
-            {"type": "assign", "target": "WS-STATUS", "source_info": {"type": "move_literal", "literal": "H"}}
-          ]
-        },
-        "false_seq": {
-          "type": "seq",
-          "children": [
-            {"type": "assign", "target": "WS-STATUS", "source_info": {"type": "move_literal", "literal": "L"}}
-          ]
-        }
-      }
-    ]
-  }
-}
-```
-
-### 示例 2：EVALUATE
-**输入:**
-```
-PROCEDURE DIVISION.
-    EVALUATE WS-TYPE
-        WHEN 'A'
-            MOVE 'TYPE-A' TO WS-MEMO
-        WHEN 'B'
-            MOVE 'TYPE-B' TO WS-MEMO
-        WHEN OTHER
-            MOVE 'OTHER' TO WS-MEMO
-    END-EVALUATE.
-    STOP RUN.
-```
-
-**输出:**
-```json
-{
-  "assignments": {
-    "WS-MEMO": {"type": "move_literal", "literal": "TYPE-A"},
-    "WS-MEMO": {"type": "move_literal", "literal": "TYPE-B"},
-    "WS-MEMO": {"type": "move_literal", "literal": "OTHER"}
-  },
-  "tree": {
-    "type": "seq",
-    "children": [
-      {
-        "type": "eval",
-        "subject": "WS-TYPE",
-        "when_list": [
-          {"value": "A", "seq": {"type": "seq", "children": [
-            {"type": "assign", "target": "WS-MEMO", "source_info": {"type": "move_literal", "literal": "TYPE-A"}}
-          ]}},
-          {"value": "B", "seq": {"type": "seq", "children": [
-            {"type": "assign", "target": "WS-MEMO", "source_info": {"type": "move_literal", "literal": "TYPE-B"}}
-          ]}}
-        ],
-        "other_seq": {"type": "seq", "children": [
-          {"type": "assign", "target": "WS-MEMO", "source_info": {"type": "move_literal", "literal": "OTHER"}}
-        ]},
-        "has_other": true
-      }
-    ]
-  }
-}
-```
-
-### 示例 3：嵌套 IF + PERFORM 段落
-**输入:**
-```
-PROCEDURE DIVISION.
-    IF WS-AMOUNT > 5000
-        PERFORM 2000-HIGH
-    ELSE
-        PERFORM 3000-LOW
-    END-IF.
-    STOP RUN.
-2000-HIGH.
-    MOVE 'H' TO WS-STATUS.
-3000-LOW.
-    MOVE 'L' TO WS-STATUS.
-```
-
-**输出:**
-```json
-{
-  "assignments": {
-    "WS-STATUS": {"type": "move_literal", "literal": "H"},
-    "WS-STATUS": {"type": "move_literal", "literal": "L"}
-  },
-  "tree": {
-    "type": "seq",
-    "children": [
-      {
-        "type": "if",
-        "condition": "WS-AMOUNT > 5000",
-        "true_seq": {"type": "seq", "children": [
-          {"type": "perform", "perf_type": "para", "target": "2000-HIGH"}
-        ]},
-        "false_seq": {"type": "seq", "children": [
-          {"type": "perform", "perf_type": "para", "target": "3000-LOW"}
-        ]}
-      }
-    ]
-  }
-}
-```
-
-### 示例 4：内联 PERFORM UNTIL
-**输入:**
-```
-PROCEDURE DIVISION.
-    MOVE 1 TO WS-COUNT.
-    PERFORM UNTIL WS-COUNT > 10
-        ADD 1 TO WS-COUNT
-    END-PERFORM.
-    STOP RUN.
-```
-
-**输出:**
-```json
-{
-  "assignments": {
-    "WS-COUNT": {"type": "move_literal", "literal": "1"},
-    "WS-COUNT": {"type": "compute", "source_vars": ["WS-COUNT"], "op": "+", "const": 1, "expr": "WS-COUNT + 1"}
-  },
-  "tree": {
-    "type": "seq",
-    "children": [
-      {"type": "assign", "target": "WS-COUNT", "source_info": {"type": "move_literal", "literal": "1"}},
-      {
-        "type": "perform",
-        "perf_type": "until",
-        "condition": "WS-COUNT > 10",
-        "body_seq": {"type": "seq", "children": [
-          {"type": "assign", "target": "WS-COUNT", "source_info": {"type": "compute", "source_vars": ["WS-COUNT"], "op": "+", "const": 1, "expr": "WS-COUNT + 1"}}
-        ]}
-      }
-    ]
-  }
-}
-```
-
-### 示例 5：PERFORM VARYING + 复合条件
-**输入:**
-```
-PROCEDURE DIVISION.
-    MOVE 0 TO WS-TOTAL-CHARGE.
-    PERFORM VARYING WS-COUNT FROM 1 BY 1 UNTIL WS-COUNT > 3
-        IF CALL-HOUR >= 08 AND CALL-HOUR < 22
-            MOVE 'Y' TO WS-PEAK-FLAG
-        ELSE
-            MOVE 'N' TO WS-PEAK-FLAG
-        END-IF
-    END-PERFORM.
-    STOP RUN.
-```
-
-**输出:**
-```json
-{
-  "assignments": {
-    "WS-TOTAL-CHARGE": {"type": "move_literal", "literal": "0"},
-    "WS-PEAK-FLAG": {"type": "move_literal", "literal": "Y"},
-    "WS-PEAK-FLAG": {"type": "move_literal", "literal": "N"}
-  },
-  "tree": {
-    "type": "seq",
-    "children": [
-      {"type": "assign", "target": "WS-TOTAL-CHARGE", "source_info": {"type": "move_literal", "literal": "0"}},
-      {
-        "type": "perform",
-        "perf_type": "varying",
-        "condition": "WS-COUNT > 3",
-        "varying_var": "WS-COUNT",
-        "varying_from": "1",
-        "varying_by": "1",
-        "body_seq": {"type": "seq", "children": [
-          {
-            "type": "if",
-            "condition": "CALL-HOUR >= 08 AND CALL-HOUR < 22",
-            "true_seq": {"type": "seq", "children": [
-              {"type": "assign", "target": "WS-PEAK-FLAG", "source_info": {"type": "move_literal", "literal": "Y"}}
-            ]},
-            "false_seq": {"type": "seq", "children": [
-              {"type": "assign", "target": "WS-PEAK-FLAG", "source_info": {"type": "move_literal", "literal": "N"}}
-            ]}
-          }
-        ]}
-      }
-    ]
-  }
-}
-```
-
-### 示例 6：88-level 条件名
-**输入:**
-```
-PROCEDURE DIVISION.
-    IF CALL-LOCAL
-        MOVE 'L' TO WS-TYPE
-    END-IF.
-    STOP RUN.
-```
-(DATA: 88 CALL-LOCAL VALUE 'L', parent field CALL-TYPE PIC X(1))
-
-**输出:**
-```json
-{
-  "assignments": {
-    "WS-TYPE": {"type": "move_literal", "literal": "L"}
-  },
-  "tree": {
-    "type": "seq",
-    "children": [
-      {
-        "type": "if",
-        "condition": "CALL-TYPE = 'L'",
-        "true_seq": {"type": "seq", "children": [
-          {"type": "assign", "target": "WS-TYPE", "source_info": {"type": "move_literal", "literal": "L"}}
-        ]},
-        "false_seq": {"type": "seq", "children": []}
-      }
-    ]
-  }
-}
-```
-
-### 示例 7：CALL 子程序调用
-**输入:**
-```
-PROCEDURE DIVISION.
-    MOVE 0 TO WS-RESULT.
-    IF WS-AMOUNT > 1000
-        MOVE 'H' TO WS-STATUS
-        CALL 'CALCSUB' USING WS-AMOUNT WS-TYPE WS-RESULT
-    ELSE
-        MOVE 'L' TO WS-STATUS
-        CALL 'CALCSUB' USING WS-AMOUNT WS-TYPE WS-RESULT
-    END-IF.
-    STOP RUN.
-```
-
-**输出:**
-```json
-{
-  "assignments": {
-    "WS-RESULT": {"type": "move_literal", "literal": "0"},
-    "WS-STATUS": {"type": "move_literal", "literal": "H"},
-    "WS-STATUS": {"type": "move_literal", "literal": "L"}
-  },
-  "tree": {
-    "type": "seq",
-    "children": [
-      {"type": "assign", "target": "WS-RESULT", "source_info": {"type": "move_literal", "literal": "0"}},
-      {
-        "type": "if",
-        "condition": "WS-AMOUNT > 1000",
-        "true_seq": {"type": "seq", "children": [
-          {"type": "assign", "target": "WS-STATUS", "source_info": {"type": "move_literal", "literal": "H"}},
-          {"type": "call", "program_name": "CALCSUB", "using_params": [
-            {"name": "WS-AMOUNT", "mechanism": "reference"},
-            {"name": "WS-TYPE", "mechanism": "reference"},
-            {"name": "WS-RESULT", "mechanism": "reference"}
-          ]}
-        ]},
-        "false_seq": {"type": "seq", "children": [
-          {"type": "assign", "target": "WS-STATUS", "source_info": {"type": "move_literal", "literal": "L"}},
-          {"type": "call", "program_name": "CALCSUB", "using_params": [
-            {"name": "WS-AMOUNT", "mechanism": "reference"},
-            {"name": "WS-TYPE", "mechanism": "reference"},
-            {"name": "WS-RESULT", "mechanism": "reference"}
-          ]}
-        ]}
-      }
-    ]
-  }
-}
-```
-
-## 错误处理
-
- 无法识别的语句：跳过该行（不影响整体结构）
- 不完整的语句（如 IF 无 END-IF）：尝试合理推断嵌套关系
- 嵌套段落引用（PERFORM A THRU B）：使用 perf_type "thru"
- 字段名与 88-level 名冲突：以字段定义为准
-
-## 输出要求
-
- 只输出一个 JSON 对象（无多余文本、无 markdown 标记）
- JSON 必须合法（双引号、正确逗号、无尾逗号）
- assignments 中**每个赋值只记录一次**（不区分分支）
- tree 必须完整包含所有可达代码路径
- 字段名、字面量保持原始值（不转换大小写，不移动）
@@ -52,7 +52,7 @@ def preprocess(source: str) -> str:
            if not line:
                continue
            content = line
-        lines.append(content.upper())
+        lines.append(re.sub(r'\s+FALSE\s+[^\s.]+', '', content.upper()))
    return '\n'.join(lines)