add cobol_testgen module
This commit is contained in:
@@ -0,0 +1,308 @@
|
||||
"""AI智能体接口 — 基于DeepSeek的PROCEDURE DIVISION解析"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from .models import BrSeq, BrIf, BrEval, BrPerform, Assign, CallNode
|
||||
|
||||
|
||||
DEEPSEEK_API_KEY_ENV = "DEEPSEEK_API_KEY"
|
||||
DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1"
|
||||
DEEPSEEK_MODEL = "deepseek-chat"
|
||||
PROMPT_FILE = Path(__file__).parent / "prompts" / "parse_proc_division.txt"
|
||||
|
||||
|
||||
def parse_proc_division_ai(proc_text: str, fields: list = None, spec_doc: str = ""):
|
||||
"""AI版PROCEDURE DIVISION解析:调用DeepSeek API,返回(branch_tree, assignments)."""
|
||||
api_key = os.environ.get(DEEPSEEK_API_KEY_ENV)
|
||||
if not api_key:
|
||||
raise NotImplementedError(
|
||||
f"AI agent requires {DEEPSEEK_API_KEY_ENV} environment variable"
|
||||
)
|
||||
|
||||
prompt = _build_prompt(proc_text, fields)
|
||||
response_text = _call_llm(prompt, api_key)
|
||||
data = _extract_json(response_text)
|
||||
if not data:
|
||||
raise NotImplementedError("AI returned no parsable JSON")
|
||||
|
||||
branch_tree = _json_to_tree(data.get("tree", {}))
|
||||
assignments = data.get("assignments", {})
|
||||
return branch_tree, assignments
|
||||
|
||||
|
||||
def _build_prompt(proc_text: str, fields: list = None) -> list[dict]:
|
||||
system = PROMPT_FILE.read_text(encoding="utf-8")
|
||||
|
||||
fields_json = json.dumps(fields, ensure_ascii=False, indent=2) if fields else "[]"
|
||||
|
||||
user = f"""## PROCEDURE DIVISION 源码
|
||||
|
||||
```
|
||||
{proc_text}
|
||||
```
|
||||
|
||||
## DATA DIVISION 字段列表
|
||||
|
||||
```json
|
||||
{fields_json}
|
||||
```
|
||||
"""
|
||||
|
||||
return [
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": user},
|
||||
]
|
||||
|
||||
|
||||
def _call_llm(messages: list[dict], api_key: str) -> str:
|
||||
try:
|
||||
from openai import OpenAI
|
||||
except ImportError:
|
||||
raise NotImplementedError(
|
||||
"openai package not installed. Run: pip install openai"
|
||||
)
|
||||
|
||||
client = OpenAI(api_key=api_key, base_url=DEEPSEEK_BASE_URL)
|
||||
response = client.chat.completions.create(
|
||||
model=DEEPSEEK_MODEL,
|
||||
messages=messages,
|
||||
temperature=0.1,
|
||||
max_tokens=8192,
|
||||
)
|
||||
return response.choices[0].message.content or ""
|
||||
|
||||
|
||||
def _extract_json(text: str) -> dict | None:
|
||||
stripped = text.strip()
|
||||
# Try extracting from markdown code block first
|
||||
m = re.search(r"```(?:json)?\s*\n?(.*?)\n?```", stripped, re.DOTALL)
|
||||
if m:
|
||||
stripped = m.group(1).strip()
|
||||
try:
|
||||
return json.loads(stripped)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
|
||||
def _json_to_tree(data: dict):
|
||||
node_type = data.get("type", "seq")
|
||||
|
||||
if node_type == "seq":
|
||||
node = BrSeq()
|
||||
for child_data in data.get("children", []):
|
||||
child = _json_to_tree(child_data)
|
||||
if child is not None:
|
||||
node.add(child)
|
||||
return node
|
||||
|
||||
if node_type == "if":
|
||||
node = BrIf(data.get("condition", ""))
|
||||
node.true_seq = _json_to_tree(data.get("true_seq", {"type": "seq", "children": []}))
|
||||
node.false_seq = _json_to_tree(data.get("false_seq", {"type": "seq", "children": []}))
|
||||
return node
|
||||
|
||||
if node_type == "eval":
|
||||
node = BrEval(data.get("subject", ""))
|
||||
for w in data.get("when_list", []):
|
||||
node.when_list.append((w.get("value", ""), _json_to_tree(w.get("seq", {"type": "seq", "children": []}))))
|
||||
node.other_seq = _json_to_tree(data.get("other_seq", {"type": "seq", "children": []}))
|
||||
node.has_other = data.get("has_other", False)
|
||||
return node
|
||||
|
||||
if node_type == "perform":
|
||||
perf_type = data.get("perf_type", "para")
|
||||
kw = {"perf_type": perf_type}
|
||||
for k in ("condition", "target", "thru", "times",
|
||||
"varying_var", "varying_from", "varying_by"):
|
||||
if k in data:
|
||||
kw[k] = data[k]
|
||||
node = BrPerform(**kw)
|
||||
if "body_seq" in data:
|
||||
node.body_seq = _json_to_tree(data["body_seq"])
|
||||
return node
|
||||
|
||||
if node_type == "assign":
|
||||
return Assign(
|
||||
target=data.get("target", ""),
|
||||
source_info=data.get("source_info", {}),
|
||||
)
|
||||
|
||||
if node_type == "call":
|
||||
return CallNode(
|
||||
program_name=data.get("program_name", ""),
|
||||
using_params=data.get("using_params", []),
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ── LLM 路径生成 ──
|
||||
|
||||
|
||||
def llm_generate_all_paths(tree_root, fields) -> list | None:
|
||||
"""为整个控制流树生成 MC/DC 路径。返回 [(constraints, assignments), ...] 或 None。"""
|
||||
api_key = os.environ.get(DEEPSEEK_API_KEY_ENV)
|
||||
if not api_key:
|
||||
return None
|
||||
|
||||
tree_json = _serialize_tree_for_llm(tree_root)
|
||||
if tree_json is None:
|
||||
return None
|
||||
|
||||
level88_map = _extract_88_mapping(fields)
|
||||
messages = _build_path_prompt(tree_json, fields, level88_map)
|
||||
|
||||
try:
|
||||
response = _call_llm(messages, api_key)
|
||||
data = _extract_json(response)
|
||||
if data and "paths" in data:
|
||||
return _parse_llm_paths(data["paths"])
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _serialize_tree_for_llm(node):
|
||||
if node is None:
|
||||
return None
|
||||
from .models import BrSeq, BrIf, BrEval, BrPerform, Assign, CallNode, ExitNode, GoTo
|
||||
|
||||
if isinstance(node, BrSeq):
|
||||
children = []
|
||||
for child in node.children:
|
||||
s = _serialize_tree_for_llm(child)
|
||||
if s is not None:
|
||||
children.append(s)
|
||||
return {"type": "seq", "children": children} if children else None
|
||||
|
||||
if isinstance(node, BrIf):
|
||||
return {
|
||||
"type": "if",
|
||||
"condition": node.condition,
|
||||
"true_seq": _serialize_tree_for_llm(node.true_seq) or {"type": "seq", "children": []},
|
||||
"false_seq": _serialize_tree_for_llm(node.false_seq) or {"type": "seq", "children": []},
|
||||
}
|
||||
|
||||
if isinstance(node, BrEval):
|
||||
when_list = []
|
||||
for val, seq in node.when_list:
|
||||
s = _serialize_tree_for_llm(seq)
|
||||
when_list.append({"value": val, "seq": s or {"type": "seq", "children": []}})
|
||||
return {
|
||||
"type": "eval",
|
||||
"subject": node.subject,
|
||||
"when_list": when_list,
|
||||
"other_seq": _serialize_tree_for_llm(node.other_seq) or {"type": "seq", "children": []},
|
||||
"has_other": node.has_other,
|
||||
}
|
||||
|
||||
if isinstance(node, BrPerform):
|
||||
result = {"type": "perform", "perf_type": node.perf_type}
|
||||
for attr in ("condition", "target", "thru", "times",
|
||||
"varying_var", "varying_from", "varying_by"):
|
||||
val = getattr(node, attr, None)
|
||||
if val is not None:
|
||||
result[attr] = val
|
||||
if node.body_seq:
|
||||
bs = _serialize_tree_for_llm(node.body_seq)
|
||||
if bs:
|
||||
result["body_seq"] = bs
|
||||
return result
|
||||
|
||||
# Assign / CallNode / ExitNode / GoTo — 不影响路径生成,可省略
|
||||
return None
|
||||
|
||||
|
||||
def _extract_88_mapping(fields):
|
||||
mapping = {}
|
||||
for f in fields:
|
||||
if f.get('is_88'):
|
||||
mapping[f['name']] = {
|
||||
"parent": f['parent'],
|
||||
"value": f['value'],
|
||||
"pic_info": f.get('pic_info', {}),
|
||||
}
|
||||
return mapping
|
||||
|
||||
|
||||
def _build_path_prompt(tree_json, fields, level88_map):
|
||||
system = ("你是 COBOL 测试路径生成专家。"
|
||||
"请为给定的控制流树生成满足 MC/DC 覆盖的测试路径集。"
|
||||
"只输出 JSON,不要多余文字。")
|
||||
|
||||
reduced_fields = []
|
||||
for f in fields:
|
||||
entry = {"name": f["name"], "pic": f.get("pic", "")}
|
||||
pi = f.get("pic_info", {})
|
||||
if pi:
|
||||
entry["pic_info"] = {
|
||||
"type": pi.get("type"), "digits": pi.get("digits"),
|
||||
"decimal": pi.get("decimal"), "length": pi.get("length"),
|
||||
}
|
||||
if f.get("is_88"):
|
||||
entry["is_88"] = True
|
||||
entry["value"] = f.get("value")
|
||||
entry["parent"] = f.get("parent")
|
||||
reduced_fields.append(entry)
|
||||
|
||||
user = (
|
||||
"## 控制流树(JSON)\n\n"
|
||||
f"```json\n{json.dumps(tree_json, ensure_ascii=False, indent=2)}\n```\n\n"
|
||||
"## 字段定义\n\n"
|
||||
f"```json\n{json.dumps(reduced_fields, ensure_ascii=False, indent=2)}\n```\n\n"
|
||||
"## 要求\n"
|
||||
"1. 每个 IF/EVALUATE/PERFORM UNTIL 的每个分支至少被覆盖一次\n"
|
||||
"2. 复合条件(AND/OR/NOT)需要满足 MC/DC:每个叶条件的独立影响对\n"
|
||||
"3. 路径数尽量少(最小集优先)\n"
|
||||
"4. 88-level 条件名要展开为实际字段比较(如 CUST-VIP → WS-CUST-LEVEL='V')\n"
|
||||
"5. 同一路径中的约束不能自相矛盾(同一字段不能同时等于 'A' 和等于 'B')\n"
|
||||
"6. 数值边界值合理(>5000 → 5001, <100 → 99)\n"
|
||||
"7. AND 优先级高于 OR\n\n"
|
||||
"## 输出格式\n\n"
|
||||
"```json\n"
|
||||
"{\n"
|
||||
' "paths": [\n'
|
||||
" {\n"
|
||||
' "constraints": [\n'
|
||||
' {"field": "WS-AMOUNT", "op": ">", "value": "5000", "want_true": true}\n'
|
||||
" ],\n"
|
||||
' "assignments": {}\n'
|
||||
" }\n"
|
||||
" ]\n"
|
||||
"}\n"
|
||||
"```"
|
||||
)
|
||||
|
||||
return [
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": user},
|
||||
]
|
||||
|
||||
|
||||
def _parse_llm_paths(paths_data):
|
||||
result = []
|
||||
for p in paths_data:
|
||||
constraints = []
|
||||
for c in p.get("constraints", []):
|
||||
constraints.append((c["field"], c["op"], str(c["value"]), c["want_true"]))
|
||||
assignments = p.get("assignments", {})
|
||||
result.append((constraints, assignments))
|
||||
return result
|
||||
|
||||
|
||||
def resolve_constraints_ai(paths, fields=None, assignments=None):
|
||||
"""AI版约束推理(未来实现)"""
|
||||
raise NotImplementedError("AI agent not yet implemented")
|
||||
|
||||
|
||||
def enhance_metadata_ai(records, fields=None, spec_doc: str = ""):
|
||||
"""AI版测试用例元数据生成(未来实现)"""
|
||||
raise NotImplementedError("AI agent not yet implemented")
|
||||
|
||||
|
||||
def analyze_spec_ai(spec_doc: str = ""):
|
||||
"""AI版式样书解析(未来实现)"""
|
||||
raise NotImplementedError("AI agent not yet implemented")
|
||||
Reference in New Issue
Block a user