Files
cobol-java-v3/cobol_testgen/procedure_grammar.lark
NB-076 50995d3335 chore: SETUP.md + 测试报告脚本 + 文档更新
- SETUP.md: 完整环境搭建指南(同事用)
- SETUP_QUICK.md: 快速搭环境(4步)
- s22~s26: TNA端到端、覆盖率报告、回归检查
- procedure_grammar.lark: 实验性Lark语法

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-25 08:50:17 +08:00

204 lines
6.8 KiB
Plaintext

/* PRODECURE DIVISION Lark Grammar — control flow focus
*
* Parses COBOL PROCEDURE DIVISION to extract decision points.
* Treats simple statements as opaque text between control structures.
*/
start: proc_division? (paragraph | section)* END_MARKER?
proc_division: PROCEDURE_DIV (USING name_list)? DOT
paragraph: PARAGRAPH_NAME DOT (statement)*
section: PARAGRAPH_NAME SECTION DOT? (statement)*
/* ── Control flow statements ── */
statement: if_stmt | evaluate_stmt | perform_stmt | read_stmt | write_stmt | sort_stmt | merge_stmt | call_stmt
| simple_stmt DOT -> simple
| DOT -> empty_stmt
| EXIT DOT -> exit_stmt
| GOBACK DOT -> goback_stmt
| STOP RUN DOT -> stop_stmt
| GO_TO DEPENDING_ON? name_list DOT -> goto_stmt
| CONTINUE DOT -> continue_stmt
/* ── IF statement ── */
if_stmt: IF condition_seq (statement)*
else_clause?
END_IF DOT?
else_clause: ELSE (statement)*
| ELSE IF condition_seq (statement)* else_clause? /* ELSE IF (nested) */
/* ── Conditions ── */
condition_seq: (NOT? cond_expr) (COND_AND (NOT? cond_expr))* (COND_OR (NOT? cond_expr))*
cond_expr: operand (COMBINED_COND)? -> simple_cond
operand: QUAL_NAME | NUMBER | STRING_LITERAL | ZERO | SPACES | QUOTE
| operand COND_AND operand /* A AND B as single operand */
| LPAREN condition_seq RPAREN
/* ── EVALUATE statement ── */
evaluate_stmt: EVALUATE (ALSO)? operand? (ALSO operand)*
evaluate_when+
(WHEN OTHER statement*)?
END_EVALUATE DOT?
evaluate_when: WHEN (ALSO)? condition_seq (ALSO condition_seq)* (statement)*
/* ── PERFORM statement ── */
perform_stmt: PERFORM (perform_kind)? statement* END_PERFORM DOT?
| PERFORM (perform_kind)? DOT /* inline single statement */
perform_kind: UNTIL condition_seq -> perform_until
| VARYING operand perform_vary_clause -> perform_varying
| name (THRU name)? -> perform_call
| name (THRU name)? VARYING operand UNTIL condition_seq -> perform_call_varying
perform_vary_clause: (FROM operand)? (BY operand)? UNTIL condition_seq
/* ── READ statement ── */
read_stmt: READ operand (INTO operand)? (KEY operand)? (INVALID_KEY statement*)?
(AT_END statement*)? (NOT_AT_END statement*)?
END_READ DOT?
/* ── WRITE statement ── */
write_stmt: WRITE operand (FROM operand)? (INVALID_KEY statement*)?
END_WRITE DOT?
/* ── SORT statement ── */
sort_stmt: SORT operand sort_order (sort_order)*
(INPUT_PROC procedure_range)?
(OUTPUT_PROC procedure_range)?
(USING name_list)?
(GIVING name_list)?
DOT
sort_order: (ASCENDING | DESCENDING) KEY operand (COMMA operand)* -> sort_key
procedure_range: THRU name -> proc_range
| name (THRU name)? -> proc_range
/* ── MERGE statement ── */
merge_stmt: MERGE operand merge_keys (INPUT_PROC procedure_range)?
(OUTPUT_PROC procedure_range)?
(USING name_list)?
(GIVING name_list)?
DOT
merge_keys: (ASCENDING | DESCENDING) KEY operand (COMMA operand)*
((ASCENDING | DESCENDING) KEY operand (COMMA operand)*)*
/* ── CALL statement ── */
call_stmt: CALL operand (USING name_list)? (ON_EXCEPTION statement*)? (NOT_ON_EXCEPTION statement*)? END_CALL DOT?
/* ── Simple statement — everything not explicitly modeled ── */
simple_stmt: verb_clause (DOT | ~)
verb_clause: VERB (opaque_token)*
/* ── Opaque (unparsed) token sequence — becomes ignored text ── */
opaque_token: NAME | NUMBER | STRING_LITERAL | QUAL_NAME
| ZERO | SPACES | SPACE | QUOTE | ALL
| COMPARISON | COND_AND | COND_OR | COMMA
| LPAREN | RPAREN
| KEY_WS | KEY_SECTION | KEY_PROCEDURE
/* ── Terminals ── */
PROCEDURE_DIV: /PROCEDURE\s+DIVISION/i
SECTION: /SECTION/i
USING: /USING/i
END_MARKER: /END\s+PROGRAM\b/i
/* Control flow keywords */
IF: /IF\b/i
ELSE: /ELSE\b/i
END_IF: /END-IF\b/i
EVALUATE: /EVALUATE\b/i
WHEN: /WHEN\b/i
OTHER: /OTHER\b/i ?("OTHER"|"OTHERS")
END_EVALUATE: /END-EVALUATE\b/i
PERFORM: /PERFORM\b/i
END_PERFORM: /END-PERFORM\b/i
VARYING: /VARYING\b/i
UNTIL: /UNTIL\b/i
FROM: /FROM\b/i
BY: /BY\b/i
THRU: /THRU\b/i ?(/THRU|/THROUGH)
READ: /READ\b/i
WRITE: /WRITE\b/i
INTO: /INTO\b/i
KEY: /KEY\b/i
INVALID_KEY: /INVALID\b/i
AT_END: /AT\s+END\b/i
NOT_AT_END: /NOT\s+AT\s+END\b/i
END_READ: /END-READ\b/i
END_WRITE: /END-WRITE\b/i
SORT: /SORT\b/i
MERGE: /MERGE\b/i
ASCENDING: /ASCENDING\b/i
DESCENDING: /DESCENDING\b/i
INPUT_PROC: /INPUT\s+PROCEDURE\s+/i
OUTPUT_PROC: /OUTPUT\s+PROCEDURE\s+/i
GIVING: /GIVING\b/i
CALL: /CALL\b/i
ON_EXCEPTION: /ON\s+EXCEPTION\b/i
NOT_ON_EXCEPTION: /NOT\s+ON\s+EXCEPTION\b/i
END_CALL: /END-CALL\b/i
EXIT: /EXIT\b/i
GOBACK: /GOBACK\b/i
STOP: /STOP\b/i
RUN: /RUN\b/i
GO_TO: /GO\s+TO\b/i
DEPENDING_ON: /DEPENDING\s+ON\b/i
CONTINUE: /CONTINUE\b/i
ALSO: /ALSO\b/i
COMMA: /,/
LPAREN: /\(/
RPAREN: /\)/
NOT: /NOT\b/i
COND_AND: /AND\b/i
COND_OR: /OR\b/i
COMPARISON: /[=<>]=?|GREATER\s+THAN\b|LESS\s+THAN\b|EQUAL\s+TO\b|NOT\s+[=<>]/i avoid full regex - use basic ops
COMBINED_COND: /[=<>]=?|GREATER\s+THAN\b|LESS\s+THAN\b|EQUAL\s+TO\b|>\s*=|<|=|\s+NOT\s+[=<>]/i
/* Data references */
QUAL_NAME: /[A-Z][A-Z0-9-]*(?:\s+OF\s+[A-Z][A-Z0-9-]*)*/i
NAME: /[A-Z][A-Z0-9-]*/i
NUMBER: /[0-9]+(?:\.[0-9]+)?/
STRING_LITERAL: /'[^']*'/ | /"[^"]*"/
ZERO: /ZERO[S]?/i
SPACES: /SPACES/i
SPACE: /SPACE\b/i
QUOTE: /QUOTE[S]?/i
ALL: /ALL\b/i
KEY_WS: /WORKING-STORAGE\s+SECTION/i
KEY_SECTION: /SECTION\b/i
KEY_PROCEDURE: /PROCEDURE/i
/* Verb — any COBOL verb that starts a simple statement */
VERB: /ACCEPT|ADD|ALTER|CANCEL|CHAIN|CLOSE|COMMIT|COMPUTE|CONFIGURATION|DELETE|DISPLAY|DIVIDE|ENTRY|EVALUATE|EXHIBIT|GENERATE|GOBACK|GO|IF|INITIALIZE|INSPECT|MOVE|MULTIPLY|OPEN|PERFORM|READ|RECEIVE|RELEASE|RETURN|REWRITE|ROLLBACK|SEARCH|SECTION|SELECT|SEND|SET|SORT|START|STOP|STRING|SUBTRACT|TERMINATE|UNSTRING|USE|WRITE|EXIT|CONTINUE|CALL|MERGE|COMMIT|ROLLBACK/i
DOT: /\s*\.\s*/
PARAGRAPH_NAME: /[A-Z][A-Z0-9-]*(?=\s+DOT)/i /* paragraph name followed by DOT on same line (approximate) */
COBOL_COMMENT: /\*>.*/ -> skip
COMMENT_LINE: /^\s*\*.*/ -> skip
%import common.WS_INLINE
%ignore WS_INLINE
%ignore COBOL_COMMENT
%ignore COMMENT_LINE