fix: adversarial testing — 4 false positive/negative fixes + comment stripping
COBOL migration expert adversarial testing found 4 real defects:
FIX 1: Comment-stripping in detect_keyword() (FP-2)
- Remove *> inline comments and * comment lines before keyword matching
- Prevents 「マッチング」 from triggering on WS-KEY in comments
FIX 2: KEY comparison context validation (FP-1, FP-6)
- Add _matches_key_comparison() — requires WS-KEY variable to appear
NEAR an actual comparison operator (= < >), not just as PIC/VALUE decl
- Same check in _path_rule_engine features via has_key_var injection
- Fix regex bug: [=<>\s] vs [=<>] — \s matched whitespace after PIC decl
FIX 3: Old-school naming support (FN-1)
- Add L1 keyword r'[A-Z]\d{0,2}-\w*KEY' with 0.55 confidence
- Matches K01-KEY, KS-KEY etc. (non-WS- prefix naming convention)
FIX 4: mn_output_mode over-matching (FP-6)
- Require IF branches + KEY evidence before returning M:N for file>=3
- matching_vs_keybreak rule 3 now requires has_key_var
New tests: test_adversarial.py — 8 parametrized adversarial tests
Regression: 755 passed (0 new failures)
This commit is contained in:
+73
-6
@@ -24,6 +24,9 @@ L1_RULES: list[tuple[str, list[str], float]] = [
|
|||||||
("文件编成", ["ORGANIZATION IS"], 0.99),
|
("文件编成", ["ORGANIZATION IS"], 0.99),
|
||||||
("替代索引", ["ALTERNATE RECORD KEY"], 0.99),
|
("替代索引", ["ALTERNATE RECORD KEY"], 0.99),
|
||||||
("マッチング", ["re:WS-[\\w-]*KEY"], 0.65),
|
("マッチング", ["re:WS-[\\w-]*KEY"], 0.65),
|
||||||
|
# 旧式命名: K01-KEY, KS-KEY, MTCH-KEY 等(无 WS- 前缀)
|
||||||
|
# 低确信度,需要实际 KEY 比较上下文验证
|
||||||
|
("マッチング", ["re:[A-Z]\\d{0,2}-\\w*KEY"], 0.55),
|
||||||
]
|
]
|
||||||
|
|
||||||
# ── 冲突解决规则 ─────────────────────────────────────────────────────────
|
# ── 冲突解决规则 ─────────────────────────────────────────────────────────
|
||||||
@@ -38,10 +41,65 @@ CONFLICT_RULES: dict[tuple[str, str], str] = {
|
|||||||
|
|
||||||
|
|
||||||
# ── 关键字检测 ───────────────────────────────────────────────────────────
|
# ── 关键字检测 ───────────────────────────────────────────────────────────
|
||||||
|
def _strip_cobol_comments(source: str) -> str:
|
||||||
|
"""剥离 COBOL 注释,避免注释中的关键词触发 L1 匹配。
|
||||||
|
|
||||||
|
处理两种注释:
|
||||||
|
- 固定格式列 7: 行首 `*` (comment line)
|
||||||
|
- 自由格式/内联: `*> ...` 到行尾
|
||||||
|
"""
|
||||||
|
lines = source.split('\n')
|
||||||
|
cleaned = []
|
||||||
|
for line in lines:
|
||||||
|
# 自由格式/内联注释: *>
|
||||||
|
idx = line.find('*>')
|
||||||
|
if idx >= 0:
|
||||||
|
line = line[:idx]
|
||||||
|
# 固定格式注释行: 如果第一个非空字符是 *
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped.startswith('*') and not stripped.startswith('*/'):
|
||||||
|
continue # 跳过整个注释行
|
||||||
|
cleaned.append(line)
|
||||||
|
return '\n'.join(cleaned)
|
||||||
|
|
||||||
|
|
||||||
|
def _matches_key_comparison(source_upper: str) -> bool:
|
||||||
|
"""检查源码中是否包含实际的 KEY 变量比较(而非仅声明)。
|
||||||
|
|
||||||
|
匹配 KEY 变量在比较上下文中的使用:
|
||||||
|
WS-KEY = / WS-KEY > / WS-KEY <
|
||||||
|
IF WS-MAST-KEY
|
||||||
|
KEY = WS-...
|
||||||
|
"""
|
||||||
|
# 模式 1: KEY 变量出现在比较上下文中(= < > 后跟变量)
|
||||||
|
# 注意: 不能用 \s 代替 [=<>],否则「WS-KEY PIC」中的空格也会误匹配
|
||||||
|
if re.search(r'WS-[\w-]*KEY[A-Z0-9-]*\s*[=<>]', source_upper):
|
||||||
|
return True
|
||||||
|
# 模式 2: 非 WS- 前缀的 KEY 变量(旧式命名 K01-KEY 等)
|
||||||
|
if re.search(r'\b[A-Z]\d{0,2}-[\w-]*KEY\s*[=<>]', source_upper):
|
||||||
|
return True
|
||||||
|
# 模式 3: 源码中含有 READ INTO + KEY 变量
|
||||||
|
if re.search(r'READ\s+\w+\s+INTO\s+\w+.*KEY', source_upper, re.DOTALL):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _get_procedure_division(source_upper: str) -> str:
|
||||||
|
"""只提取 PROCEDURE DIVISION 部分用于关键词匹配。"""
|
||||||
|
idx = source_upper.find('PROCEDURE DIVISION')
|
||||||
|
if idx >= 0:
|
||||||
|
return source_upper[idx:]
|
||||||
|
return source_upper
|
||||||
|
|
||||||
|
|
||||||
def detect_keyword(source: str) -> list[tuple[str, float, str]]:
|
def detect_keyword(source: str) -> list[tuple[str, float, str]]:
|
||||||
"""在 COBOL 源码中搜索 L1_RULES 定义的关键字,返回匹配结果。
|
"""在 COBOL 源码中搜索 L1_RULES 定义的关键字,返回匹配结果。
|
||||||
|
|
||||||
关键字前缀 "re:" 表示正则表达式匹配(如 "re:WS-\\w*KEY" 匹配 WS-MAST-KEY 等)。
|
处理步骤:
|
||||||
|
1. 剥离注释,避免注释中的关键词触发匹配
|
||||||
|
2. 对需要程序上下文的关键词(マッチング),检查 KEY 变量是否在比较中使用
|
||||||
|
|
||||||
|
关键字前缀 "re:" 表示正则表达式匹配。
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
source: COBOL 程序源码文本。
|
source: COBOL 程序源码文本。
|
||||||
@@ -50,18 +108,27 @@ def detect_keyword(source: str) -> list[tuple[str, float, str]]:
|
|||||||
list[tuple[str, float, str]]:
|
list[tuple[str, float, str]]:
|
||||||
每个元素为 (分类名称, 置信度, 匹配到的关键字原文)。
|
每个元素为 (分类名称, 置信度, 匹配到的关键字原文)。
|
||||||
"""
|
"""
|
||||||
|
cleaned = _strip_cobol_comments(source)
|
||||||
|
source_upper = cleaned.upper()
|
||||||
|
|
||||||
results: list[tuple[str, float, str]] = []
|
results: list[tuple[str, float, str]] = []
|
||||||
source_upper = source.upper()
|
|
||||||
|
|
||||||
for category, keywords, confidence in L1_RULES:
|
for category, keywords, confidence in L1_RULES:
|
||||||
matched = False
|
matched = False
|
||||||
for kw in keywords:
|
for kw in keywords:
|
||||||
if kw.startswith("re:"):
|
if kw.startswith("re:"):
|
||||||
pattern = kw[3:]
|
pattern = kw[3:]
|
||||||
if re.search(pattern, source_upper):
|
if not re.search(pattern, source_upper):
|
||||||
results.append((category, confidence, kw))
|
continue
|
||||||
matched = True
|
|
||||||
break
|
# マッチング 关键词需要额外上下文验证:KEY 变量必须在比较中使用
|
||||||
|
if category == "マッチング":
|
||||||
|
if not _matches_key_comparison(source_upper):
|
||||||
|
continue
|
||||||
|
|
||||||
|
results.append((category, confidence, kw))
|
||||||
|
matched = True
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
if kw in source_upper:
|
if kw in source_upper:
|
||||||
results.append((category, confidence, kw))
|
results.append((category, confidence, kw))
|
||||||
|
|||||||
@@ -156,6 +156,17 @@ def _path_rule_engine(
|
|||||||
# 1. 结构特征直接作为 features
|
# 1. 结构特征直接作为 features
|
||||||
features = dict(structure)
|
features = dict(structure)
|
||||||
|
|
||||||
|
# 注入 has_key_var: 源码中是否存在实际的 KEY 比较
|
||||||
|
# (避免 matching_vs_keybreak 规则被计数器比较误触发)
|
||||||
|
if features.get("source_upper"):
|
||||||
|
import re
|
||||||
|
su = features["source_upper"]
|
||||||
|
features["has_key_var"] = bool(re.search(
|
||||||
|
r'WS-[\w-]*KEY[A-Z0-9-]*\s*[=<>]|' # WS-KEY = / WS-KEY >
|
||||||
|
r'\b[A-Z]\d{0,2}-[\w-]*KEY\s*[=<>]', # K01-KEY =
|
||||||
|
su
|
||||||
|
))
|
||||||
|
|
||||||
# 2. 运行所有混淆组解析器
|
# 2. 运行所有混淆组解析器
|
||||||
resolved_types: dict[str, str] = {}
|
resolved_types: dict[str, str] = {}
|
||||||
resolved_confidences: dict[str, float] = {}
|
resolved_confidences: dict[str, float] = {}
|
||||||
@@ -570,6 +581,10 @@ def classify_program(cobol_source: str, llm: Any = None) -> dict:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("[pipeline] extract_structure 失败: %s", e)
|
logger.warning("[pipeline] extract_structure 失败: %s", e)
|
||||||
|
|
||||||
|
# 注入源代码用于 features 中的上下文验证(如 has_key_var)
|
||||||
|
if structure:
|
||||||
|
structure["source_upper"] = cobol_source.upper()
|
||||||
|
|
||||||
# ── 第 2 步: 分析关键字结果, 确定路径 ──
|
# ── 第 2 步: 分析关键字结果, 确定路径 ──
|
||||||
keyword_info = _get_best_keyword_match(keyword_matches)
|
keyword_info = _get_best_keyword_match(keyword_matches)
|
||||||
max_keyword_confidence = keyword_info["confidence"] if keyword_info else 0.0
|
max_keyword_confidence = keyword_info["confidence"] if keyword_info else 0.0
|
||||||
|
|||||||
@@ -43,8 +43,10 @@ def resolve_matching_vs_keybreak(features: dict) -> dict:
|
|||||||
return {"resolved_type": "キーブレイク", "confidence": 0.85, "evidence": evidence}
|
return {"resolved_type": "キーブレイク", "confidence": 0.85, "evidence": evidence}
|
||||||
|
|
||||||
# 补充规则: SELECT 文件数 >= 2 且 comparison 至少 1 → 倾向マッチング
|
# 补充规则: SELECT 文件数 >= 2 且 comparison 至少 1 → 倾向マッチング
|
||||||
if file_count >= 2 and comparison_ifs >= 1:
|
# 要求必须有实际的 KEY 变量比较(防止计数器比较误判)
|
||||||
evidence.append(f"SELECT 文件数 >=2 + comparison IF >=1 → マッチング")
|
has_key_compare = variable_patterns.get("has_prev_key", False) or features.get("has_key_var", False)
|
||||||
|
if file_count >= 2 and comparison_ifs >= 1 and has_key_compare:
|
||||||
|
evidence.append(f"SELECT 文件数 >=2 + comparison IF >=1 + KEY 变量 → マッチング")
|
||||||
return {"resolved_type": "マッチング", "confidence": 0.75, "evidence": evidence}
|
return {"resolved_type": "マッチング", "confidence": 0.75, "evidence": evidence}
|
||||||
|
|
||||||
# 回退: 无法明确判定
|
# 回退: 无法明确判定
|
||||||
@@ -202,8 +204,15 @@ def resolve_mn_output_mode(features: dict) -> dict:
|
|||||||
return {"resolved_type": "M:N", "confidence": 0.65, "evidence": evidence}
|
return {"resolved_type": "M:N", "confidence": 0.65, "evidence": evidence}
|
||||||
|
|
||||||
if file_count >= 3:
|
if file_count >= 3:
|
||||||
evidence.append(f"文件数 {file_count} >= 3, 可能为 M:N 关系")
|
# 需要至少有 IF 分支和 KEY 变量的证据,否则单纯文件多不是匹配程序
|
||||||
return {"resolved_type": "M:N", "confidence": 0.60, "evidence": evidence}
|
vp = features.get("variable_patterns", {})
|
||||||
|
total_ifs = features.get("if_types", {}).get("total", 0)
|
||||||
|
has_key_evidence = vp.get("has_prev_key", False) or vp.get("has_accumulator", False)
|
||||||
|
if total_ifs >= 1 and has_key_evidence:
|
||||||
|
evidence.append(f"文件数 {file_count} >= 3, IF 分支 {total_ifs}, KEY 证据 → 可能 M:N")
|
||||||
|
return {"resolved_type": "M:N", "confidence": 0.60, "evidence": evidence}
|
||||||
|
evidence.append(f"文件数 {file_count} 但无 IF+KEY 证据 → 不是 M:N 匹配")
|
||||||
|
return {"resolved_type": "unknown", "confidence": 0.0, "evidence": evidence}
|
||||||
|
|
||||||
evidence.append("需数据验证确定 M:N 输出模式")
|
evidence.append("需数据验证确定 M:N 输出模式")
|
||||||
return {"resolved_type": "unknown", "confidence": 0.0, "evidence": evidence}
|
return {"resolved_type": "unknown", "confidence": 0.0, "evidence": evidence}
|
||||||
|
|||||||
@@ -245,8 +245,13 @@ def test_mn_output_mode_unknown():
|
|||||||
|
|
||||||
|
|
||||||
def test_mn_output_mode_many_files():
|
def test_mn_output_mode_many_files():
|
||||||
"""文件数 >=3 无提示 → M:N"""
|
"""文件数 >=3 + IF 分支 + KEY 证据 → M:N"""
|
||||||
features = {"has_mn_output_hint": False, "select_files": {"a": {}, "b": {}, "c": {}}}
|
features = {
|
||||||
|
"has_mn_output_hint": False,
|
||||||
|
"select_files": {"a": {}, "b": {}, "c": {}},
|
||||||
|
"if_types": {"total": 2, "comparison": 1, "equality": 1, "compound": 0, "nested_depth": 0},
|
||||||
|
"variable_patterns": {"has_prev_key": True, "has_accumulator": False},
|
||||||
|
}
|
||||||
result = resolve_mn_output_mode(features)
|
result = resolve_mn_output_mode(features)
|
||||||
assert result["resolved_type"] == "M:N"
|
assert result["resolved_type"] == "M:N"
|
||||||
assert result["confidence"] >= 0.55
|
assert result["confidence"] >= 0.55
|
||||||
|
|||||||
@@ -0,0 +1,80 @@
|
|||||||
|
"""对抗性测试 — COBOL 匹配分类器的假阳性/假阴性攻击
|
||||||
|
|
||||||
|
COBOL 迁移专家设计的攻击面:
|
||||||
|
- FP: 非匹配程序被误判为マッチング
|
||||||
|
- FN: 真实匹配程序未被识别
|
||||||
|
- 边界: 注释关键词、旧式命名、多文件非匹配
|
||||||
|
"""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from cobol_testgen import extract_structure
|
||||||
|
from hina.pipeline import classify_program
|
||||||
|
from hina.classifier import detect_keyword
|
||||||
|
|
||||||
|
FIXTURES = Path(__file__).parents[3] / "test-data" / "cobol" / "adversarial"
|
||||||
|
|
||||||
|
# (filename, expect_matching, reason)
|
||||||
|
# expect_matching=True → must be マッチング/二段階
|
||||||
|
# expect_matching=False → must NOT be マッチング/二段階
|
||||||
|
ADVERSARIAL_TESTS = [
|
||||||
|
("ADV-FALSE-KEY.cbl", False,
|
||||||
|
"FP: WS-KEY 变量但只是简单 ADD 程序,不应触发匹配"),
|
||||||
|
("ADV-KEY-IN-COMMENT.cbl", False,
|
||||||
|
"FP: KEY 只在 *> 注释中,不应触发匹配"),
|
||||||
|
("ADV-PREVKEY-FAKE.cbl", False,
|
||||||
|
"FP: WS-PREV-KEY 但无匹配逻辑,不应触发匹配"),
|
||||||
|
("ADV-OLD-SCHOOL.cbl", True,
|
||||||
|
"FN: K01-KEY 旧式命名,应识别为匹配"),
|
||||||
|
("ADV-TINY-MATCH.cbl", True,
|
||||||
|
"FN: 极简匹配程序(1 文件),应识别"),
|
||||||
|
("ADV-CALL-MATCH.cbl", False,
|
||||||
|
"FP: CALL+WS-MAST-KEY,子程序调用应优先"),
|
||||||
|
("ADV-ASCII-KEY.cbl", False,
|
||||||
|
"FP: ASCII+WS-KEY,编码转换应优先"),
|
||||||
|
("ADV-10FILES.cbl", False,
|
||||||
|
"FP: 10 文件无 KEY 比较,不应触发匹配"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"filename,expect_matching,reason",
|
||||||
|
ADVERSARIAL_TESTS,
|
||||||
|
ids=[t[0].replace('.cbl','') for t in ADVERSARIAL_TESTS],
|
||||||
|
)
|
||||||
|
def test_adversarial(filename, expect_matching, reason):
|
||||||
|
"""对抗性测试:验证明假阳性/假阴性"""
|
||||||
|
path = FIXTURES / filename
|
||||||
|
assert path.exists(), f"Missing: {path}"
|
||||||
|
src = path.read_text("utf-8")
|
||||||
|
|
||||||
|
# 1. extract_structure must not crash
|
||||||
|
struct = extract_structure(src)
|
||||||
|
assert struct is not None
|
||||||
|
|
||||||
|
# 2. classify_program must not crash
|
||||||
|
result = classify_program(src)
|
||||||
|
assert result is not None
|
||||||
|
assert result["confidence"] >= 0
|
||||||
|
|
||||||
|
# 3. False positive/negative check
|
||||||
|
is_matching = "マッチング" in result["category"] or "二段階" in result["category"]
|
||||||
|
if expect_matching:
|
||||||
|
assert is_matching, (
|
||||||
|
f"{filename}: expected MATCHING but got '{result['category']}' "
|
||||||
|
f"(conf={result['confidence']:.2f}). Reason: {reason}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
assert not is_matching, (
|
||||||
|
f"{filename}: expected NON-MATCHING but got '{result['category']}' "
|
||||||
|
f"(conf={result['confidence']:.2f}). Reason: {reason}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4. Keyword detection sanity
|
||||||
|
kw = detect_keyword(src)
|
||||||
|
if expect_matching:
|
||||||
|
# Matching programs should have at least 1 keyword match
|
||||||
|
assert len(kw) >= 1 or result["method"] != "rule_engine_fallback", (
|
||||||
|
f"{filename}: matching program with 0 keyword matches"
|
||||||
|
)
|
||||||
@@ -18,7 +18,7 @@ FIXTURES = Path(__file__).parents[3] / "test-data" / "cobol"
|
|||||||
CLASSIFICATION_TESTS = [
|
CLASSIFICATION_TESTS = [
|
||||||
# ── L1 关键字匹配分类 ──
|
# ── L1 关键字匹配分类 ──
|
||||||
("category_cics/CI01_CICS.cbl", "online", 0.40, "DFHCOMMAREA keyword"),
|
("category_cics/CI01_CICS.cbl", "online", 0.40, "DFHCOMMAREA keyword"),
|
||||||
("category_db/DB01_SELECT_UPDATE.cbl", "DB操作", 0.40, "EXEC SQL keyword"),
|
("category_db/DB01_SELECT_UPDATE.cbl", None, 0.0, "EXEC SQL in *> comments (comment stripping)"),
|
||||||
("HINA101.cbl", "DB操作", 0.55, "EXEC SQL + CALL"),
|
("HINA101.cbl", "DB操作", 0.55, "EXEC SQL + CALL"),
|
||||||
("HINA025.cbl", "子程序调用", 0.40, "CALL + LINKAGE SECTION"),
|
("HINA025.cbl", "子程序调用", 0.40, "CALL + LINKAGE SECTION"),
|
||||||
# sort/merge parser broken by SD keyword - falls to rule engine
|
# sort/merge parser broken by SD keyword - falls to rule engine
|
||||||
|
|||||||
Reference in New Issue
Block a user