Files
aurak/server/src/assessment/graph/nodes/generator.node.ts
T

271 lines
11 KiB
TypeScript

import { ChatOpenAI } from '@langchain/openai';
import { SystemMessage, HumanMessage } from '@langchain/core/messages';
import { RunnableConfig } from '@langchain/core/runnables';
import { EvaluationState } from '../state';
import { safeParseJson } from '../../../common/json-utils';
/**
* Node responsible for generating assessment questions based on the knowledge base content.
*/
export const questionGeneratorNode = async (
state: EvaluationState,
config?: RunnableConfig,
): Promise<Partial<EvaluationState>> => {
const { model, knowledgeBaseContent, targetCount } = (config?.configurable as any) || {};
const limitCount = targetCount || 5;
console.log('[GeneratorNode] Starting generation...', {
language: state.language,
hasModel: !!model,
contentLength: knowledgeBaseContent?.length,
keywords: state.keywords || [],
targetCount: limitCount,
});
if (!model || !knowledgeBaseContent) {
console.error('[GeneratorNode] Missing model or knowledgeBaseContent');
throw new Error(
'Missing model or knowledgeBaseContent in node configuration',
);
}
const isZh = state.language === 'zh';
const isJa = state.language === 'ja';
const style = state.style || 'technical';
const difficultyText = state.difficultyDistribution
? JSON.stringify(state.difficultyDistribution)
: isZh
? '随机分布'
: isJa
? 'ランダム分布'
: 'Random distribution';
const keywords = state.keywords || [];
const hasKeywords = keywords.length > 0;
const keywordText = hasKeywords ? keywords.join(', ') : '';
const rulesZh = [
`**禁止重复**:绝对禁止生成与下方“禁止重复列表”中相似的题目。`,
`**深度挖掘**:如果之前的题目考查了核心定义,新题目必须考查具体的应用案例、对比分析或隐藏的细节。`,
hasKeywords
? `**关键词权重**:必须围绕关键词 (${keywordText}) 展开,但要从关键词的不同侧面(如流程、限制、优缺点、具体参数等)进行挖掘。`
: null,
`**随机扰动**:即使对于相同的主题或关键词,也要尝试从不同的逻辑链条(如“因为...所以...” vs “如果没有...会怎样”)出发。`,
]
.filter(Boolean)
.map((r, i) => `${i + 1}. ${r}`)
.join('\n');
const rulesJa = [
`**重複禁止**:下記の「作成済み問題リスト」と類似した内容は絶対に避けてください。`,
`**多角的アプローチ**:前回が定義だった場合は、今回は応用方法、制限事項、具体的な数値などに焦点を当ててください。`,
hasKeywords
? `**キーワードの深掘り**:キーワード (${keywordText}) の異なる側面から出題してください。`
: null,
]
.filter(Boolean)
.map((r, i) => `${i + 1}. ${r}`)
.join('\n');
const rulesEn = [
`**NO REPETITION**: Strictly avoid any conceptual overlap with the "Previous Questions" list below.`,
`**New Facets**: If previous questions were about definitions, focus on applications, edge cases, or specific details.`,
hasKeywords
? `**Keyword Variety**: Center on (${keywordText}), but explore different aspects (process, pros/cons, requirements).`
: null,
]
.filter(Boolean)
.map((r, i) => `${i + 1}. ${r}`)
.join('\n');
const existingQuestions = state.questions || [];
if (existingQuestions.length >= limitCount) {
console.log('[GeneratorNode] Skipping generation - enough questions from bank:', existingQuestions.length);
return { questions: existingQuestions };
}
const existingQuestionsText = existingQuestions
.map((q, i) => `Q${i + 1}: ${q.questionText}`)
.join('\n');
const systemPromptZh = `你是一位严格的知识评估专家。你必须**仅基于**下方提供的知识库内容来生成测试题目。
### 核心铁律(违反将导致题目无效):
1. **所有题目必须直接来源于提供的知识库内容**,每个题目必须能找到对应的原文依据
2. **绝对禁止**编造知识库内容中未提及的概念、术语、流程或数据
3. **绝对禁止**使用你自身知识库中的内容来编造题目
4. 如果知识库内容不足以出题,诚实地报告而不是编造
### 强制性语言规则:
**必须使用中文 (Simplified Chinese) 进行回复**。
### 多样性规则:
${rulesZh}
### 禁止重复列表(已出过):
${existingQuestionsText || '无'}
### 任务:
${hasKeywords ? `目标关键词:${keywordText}\n` : ''}出题风格:${style}
难度:${difficultyText}
请以 JSON 数组格式返回 1 个问题:
[
{
"question_text": "...",
"key_points": ["点1", "点2"],
"difficulty": "...",
"dimension": "prompt/llm/ide/devPattern/workCapability",
"basis": "【必须填写】从知识库中引用与此题相关的原文内容,用引号标注来源段落"
}
]`;
// dimension取值:prompt=提示词, llm=LLM原理, ide=IDE协作, devPattern=开发范式, workCapability=工作能力
const systemPromptJa = `あなたは厳格な知識評価の専門家です。提供されたナレッジベースの内容**のみ**に基づいて問題を作成してください。
### 核心鉄則(違反した問題は無効):
1. **すべての問題は提供されたナレッジベースから直接導出**し、各問題に原文の根拠が必要
2. **絶対禁止**:ナレッジベースに記載されていない概念、用語、プロセス、データを作り出すこと
3. **絶対禁止**:自身の知識ベースの内容を問題として使用すること
4. 内容が不十分な場合は、正直に報告し、捏造しないこと
### 言語ルール(最重要):
**必ず日本語で作成してください**。中国語が混ざらないように厳格に注意してください。
### 多様性ルール:
${rulesJa}
### 作成済み問題リスト:
${existingQuestionsText || 'なし'}
### 任務:
${hasKeywords ? `目標キーワード:${keywordText}\n` : ''}出題スタイル:${style}
難易度:${difficultyText}
以下のJSON配列形式で問題を1つ返してください:
[
{
"question_text": "...",
"key_points": ["ポイント1", "ポイント2"],
"difficulty": "...",
"dimension": "prompt/llm/ide/devPattern/workCapability",
"basis": "【必須】ナレッジベースから関連する原文を引用し、出典段落を明記"
}
]`;
const systemPromptEn = `You are a strict knowledge assessment expert. You MUST generate questions **ONLY** from the provided knowledge base content below.
### Core Rules (violations invalidate the question):
1. **All questions MUST directly derive from the provided content**, each question requires a verifiable source reference
2. **ABSOLUTELY FORBIDDEN**: inventing concepts, terminology, processes, or data not in the provided content
3. **ABSOLUTELY FORBIDDEN**: using your own knowledge to fabricate questions
4. If content is insufficient, honestly report rather than fabricate
### Language Rule:
**You MUST generate the question and key points in English.**
### Diversity Rules:
${rulesEn}
### Previous Questions (DO NOT REPEAT):
${existingQuestionsText || 'None'}
Return 1 question as a JSON array with format:
[
{
"question_text": "...",
"key_points": ["point1", "point2"],
"difficulty": "...",
"dimension": "prompt/llm/ide/devPattern/workCapability",
"basis": "【REQUIRED】Cite the specific source text from the knowledge base, noting the source paragraph"
}
]`;
// dimension values: prompt=prompt engineering, llm=LLM principles, ide=IDE collaboration, devPattern=development paradigm, workCapability=work capability
const systemPrompt = isZh
? systemPromptZh
: isJa
? systemPromptJa
: systemPromptEn;
const humanMsg = isZh
? `【知识库内容 - 以下是你出题的唯一依据】\n\n--- 知识库开始 ---\n${knowledgeBaseContent}\n--- 知识库结束 ---\n\n请严格基于以上内容生成题目。`
: isJa
? `【ナレッジベース内容 - 以下は出題の唯一の根拠です】\n\n--- ナレッジベース開始 ---\n${knowledgeBaseContent}\n--- ナレッジベース終了 ---\n\n上記の内容のみに基づいて問題を作成してください。`
: `【Knowledge Base Content - Your ONLY source for questions】\n\n--- KB START ---\n${knowledgeBaseContent}\n--- KB END ---\n\nGenerate questions strictly from the above content only.`;
try {
const response = await model.invoke([
new SystemMessage(systemPrompt),
new HumanMessage(humanMsg),
]);
try {
let newQuestions = safeParseJson<any>(response.content as string);
if (!newQuestions) {
console.error('[GeneratorNode] Failed to parse JSON. Raw content:', response.content);
throw new Error('Invalid JSON format from AI');
}
// Handle both array and single object
if (!Array.isArray(newQuestions)) {
newQuestions = [newQuestions];
}
const dimensionMap: Record<string, string> = {
// 中文
'技术能力-提示词': 'prompt',
'提示词': 'prompt',
'技术能力-LLM': 'llm',
'LLM': 'llm',
'IDE协作能力': 'ide',
'IDE': 'ide',
'AI开发范式': 'devPattern',
'开发范式': 'devPattern',
'工作能力-安全': 'workCapability',
'工作能力': 'workCapability',
// 英文直接映射
'prompt': 'prompt',
'llm': 'llm',
'ide': 'ide',
'devPattern': 'devPattern',
'workCapability': 'workCapability',
};
const mappedNewQuestions = newQuestions.map((q: any) => {
let inferredDimension = 'workCapability';
const dimValue = q.dimension?.toString().toLowerCase().trim();
if (dimValue) {
inferredDimension = dimensionMap[dimValue] || 'workCapability';
console.log('[GeneratorNode] Dimension mapping:', { original: q.dimension, mapped: inferredDimension });
}
return {
id: (existingQuestions.length + 1).toString(),
questionText: q.question_text,
keyPoints: q.key_points,
difficulty: q.difficulty,
basis: q.basis,
dimension: inferredDimension,
};
});
const questionsToGenerate = Math.max(1, limitCount - existingQuestions.length);
const limitedNewQuestions = mappedNewQuestions.slice(0, questionsToGenerate);
console.log('[GeneratorNode] Generated questions:', mappedNewQuestions.length, 'Limit:', questionsToGenerate);
return {
questions: [...existingQuestions, ...limitedNewQuestions],
};
} catch (error) {
console.error('[GeneratorNode] Parse error:', error);
return { questions: existingQuestions };
}
} catch (invokeError) {
console.error('[GeneratorNode] Invoke error:', invokeError);
throw invokeError;
}
};