forked from hangshuo652/aurak
271 lines
11 KiB
TypeScript
271 lines
11 KiB
TypeScript
import { ChatOpenAI } from '@langchain/openai';
|
|
import { SystemMessage, HumanMessage } from '@langchain/core/messages';
|
|
import { RunnableConfig } from '@langchain/core/runnables';
|
|
import { EvaluationState } from '../state';
|
|
import { safeParseJson } from '../../../common/json-utils';
|
|
|
|
/**
|
|
* Node responsible for generating assessment questions based on the knowledge base content.
|
|
*/
|
|
export const questionGeneratorNode = async (
|
|
state: EvaluationState,
|
|
config?: RunnableConfig,
|
|
): Promise<Partial<EvaluationState>> => {
|
|
const { model, knowledgeBaseContent, targetCount } = (config?.configurable as any) || {};
|
|
const limitCount = targetCount || 5;
|
|
|
|
console.log('[GeneratorNode] Starting generation...', {
|
|
language: state.language,
|
|
hasModel: !!model,
|
|
contentLength: knowledgeBaseContent?.length,
|
|
keywords: state.keywords || [],
|
|
targetCount: limitCount,
|
|
});
|
|
|
|
if (!model || !knowledgeBaseContent) {
|
|
console.error('[GeneratorNode] Missing model or knowledgeBaseContent');
|
|
throw new Error(
|
|
'Missing model or knowledgeBaseContent in node configuration',
|
|
);
|
|
}
|
|
|
|
const isZh = state.language === 'zh';
|
|
const isJa = state.language === 'ja';
|
|
|
|
const style = state.style || 'technical';
|
|
const difficultyText = state.difficultyDistribution
|
|
? JSON.stringify(state.difficultyDistribution)
|
|
: isZh
|
|
? '随机分布'
|
|
: isJa
|
|
? 'ランダム分布'
|
|
: 'Random distribution';
|
|
const keywords = state.keywords || [];
|
|
const hasKeywords = keywords.length > 0;
|
|
const keywordText = hasKeywords ? keywords.join(', ') : '';
|
|
|
|
const rulesZh = [
|
|
`**禁止重复**:绝对禁止生成与下方“禁止重复列表”中相似的题目。`,
|
|
`**深度挖掘**:如果之前的题目考查了核心定义,新题目必须考查具体的应用案例、对比分析或隐藏的细节。`,
|
|
hasKeywords
|
|
? `**关键词权重**:必须围绕关键词 (${keywordText}) 展开,但要从关键词的不同侧面(如流程、限制、优缺点、具体参数等)进行挖掘。`
|
|
: null,
|
|
`**随机扰动**:即使对于相同的主题或关键词,也要尝试从不同的逻辑链条(如“因为...所以...” vs “如果没有...会怎样”)出发。`,
|
|
]
|
|
.filter(Boolean)
|
|
.map((r, i) => `${i + 1}. ${r}`)
|
|
.join('\n');
|
|
|
|
const rulesJa = [
|
|
`**重複禁止**:下記の「作成済み問題リスト」と類似した内容は絶対に避けてください。`,
|
|
`**多角的アプローチ**:前回が定義だった場合は、今回は応用方法、制限事項、具体的な数値などに焦点を当ててください。`,
|
|
hasKeywords
|
|
? `**キーワードの深掘り**:キーワード (${keywordText}) の異なる側面から出題してください。`
|
|
: null,
|
|
]
|
|
.filter(Boolean)
|
|
.map((r, i) => `${i + 1}. ${r}`)
|
|
.join('\n');
|
|
|
|
const rulesEn = [
|
|
`**NO REPETITION**: Strictly avoid any conceptual overlap with the "Previous Questions" list below.`,
|
|
`**New Facets**: If previous questions were about definitions, focus on applications, edge cases, or specific details.`,
|
|
hasKeywords
|
|
? `**Keyword Variety**: Center on (${keywordText}), but explore different aspects (process, pros/cons, requirements).`
|
|
: null,
|
|
]
|
|
.filter(Boolean)
|
|
.map((r, i) => `${i + 1}. ${r}`)
|
|
.join('\n');
|
|
|
|
const existingQuestions = state.questions || [];
|
|
|
|
if (existingQuestions.length >= limitCount) {
|
|
console.log('[GeneratorNode] Skipping generation - enough questions from bank:', existingQuestions.length);
|
|
return { questions: existingQuestions };
|
|
}
|
|
|
|
const existingQuestionsText = existingQuestions
|
|
.map((q, i) => `Q${i + 1}: ${q.questionText}`)
|
|
.join('\n');
|
|
|
|
const systemPromptZh = `你是一位严格的知识评估专家。你必须**仅基于**下方提供的知识库内容来生成测试题目。
|
|
|
|
### 核心铁律(违反将导致题目无效):
|
|
1. **所有题目必须直接来源于提供的知识库内容**,每个题目必须能找到对应的原文依据
|
|
2. **绝对禁止**编造知识库内容中未提及的概念、术语、流程或数据
|
|
3. **绝对禁止**使用你自身知识库中的内容来编造题目
|
|
4. 如果知识库内容不足以出题,诚实地报告而不是编造
|
|
|
|
### 强制性语言规则:
|
|
**必须使用中文 (Simplified Chinese) 进行回复**。
|
|
|
|
### 多样性规则:
|
|
${rulesZh}
|
|
|
|
### 禁止重复列表(已出过):
|
|
${existingQuestionsText || '无'}
|
|
|
|
### 任务:
|
|
${hasKeywords ? `目标关键词:${keywordText}\n` : ''}出题风格:${style}
|
|
难度:${difficultyText}
|
|
|
|
请以 JSON 数组格式返回 1 个问题:
|
|
[
|
|
{
|
|
"question_text": "...",
|
|
"key_points": ["点1", "点2"],
|
|
"difficulty": "...",
|
|
"dimension": "prompt/llm/ide/devPattern/workCapability",
|
|
"basis": "【必须填写】从知识库中引用与此题相关的原文内容,用引号标注来源段落"
|
|
}
|
|
]`;
|
|
// dimension取值:prompt=提示词, llm=LLM原理, ide=IDE协作, devPattern=开发范式, workCapability=工作能力
|
|
|
|
const systemPromptJa = `あなたは厳格な知識評価の専門家です。提供されたナレッジベースの内容**のみ**に基づいて問題を作成してください。
|
|
|
|
### 核心鉄則(違反した問題は無効):
|
|
1. **すべての問題は提供されたナレッジベースから直接導出**し、各問題に原文の根拠が必要
|
|
2. **絶対禁止**:ナレッジベースに記載されていない概念、用語、プロセス、データを作り出すこと
|
|
3. **絶対禁止**:自身の知識ベースの内容を問題として使用すること
|
|
4. 内容が不十分な場合は、正直に報告し、捏造しないこと
|
|
|
|
### 言語ルール(最重要):
|
|
**必ず日本語で作成してください**。中国語が混ざらないように厳格に注意してください。
|
|
|
|
### 多様性ルール:
|
|
${rulesJa}
|
|
|
|
### 作成済み問題リスト:
|
|
${existingQuestionsText || 'なし'}
|
|
|
|
### 任務:
|
|
${hasKeywords ? `目標キーワード:${keywordText}\n` : ''}出題スタイル:${style}
|
|
難易度:${difficultyText}
|
|
|
|
以下のJSON配列形式で問題を1つ返してください:
|
|
[
|
|
{
|
|
"question_text": "...",
|
|
"key_points": ["ポイント1", "ポイント2"],
|
|
"difficulty": "...",
|
|
"dimension": "prompt/llm/ide/devPattern/workCapability",
|
|
"basis": "【必須】ナレッジベースから関連する原文を引用し、出典段落を明記"
|
|
}
|
|
]`;
|
|
|
|
const systemPromptEn = `You are a strict knowledge assessment expert. You MUST generate questions **ONLY** from the provided knowledge base content below.
|
|
|
|
### Core Rules (violations invalidate the question):
|
|
1. **All questions MUST directly derive from the provided content**, each question requires a verifiable source reference
|
|
2. **ABSOLUTELY FORBIDDEN**: inventing concepts, terminology, processes, or data not in the provided content
|
|
3. **ABSOLUTELY FORBIDDEN**: using your own knowledge to fabricate questions
|
|
4. If content is insufficient, honestly report rather than fabricate
|
|
|
|
### Language Rule:
|
|
**You MUST generate the question and key points in English.**
|
|
|
|
### Diversity Rules:
|
|
${rulesEn}
|
|
|
|
### Previous Questions (DO NOT REPEAT):
|
|
${existingQuestionsText || 'None'}
|
|
|
|
Return 1 question as a JSON array with format:
|
|
[
|
|
{
|
|
"question_text": "...",
|
|
"key_points": ["point1", "point2"],
|
|
"difficulty": "...",
|
|
"dimension": "prompt/llm/ide/devPattern/workCapability",
|
|
"basis": "【REQUIRED】Cite the specific source text from the knowledge base, noting the source paragraph"
|
|
}
|
|
]`;
|
|
|
|
// dimension values: prompt=prompt engineering, llm=LLM principles, ide=IDE collaboration, devPattern=development paradigm, workCapability=work capability
|
|
|
|
const systemPrompt = isZh
|
|
? systemPromptZh
|
|
: isJa
|
|
? systemPromptJa
|
|
: systemPromptEn;
|
|
const humanMsg = isZh
|
|
? `【知识库内容 - 以下是你出题的唯一依据】\n\n--- 知识库开始 ---\n${knowledgeBaseContent}\n--- 知识库结束 ---\n\n请严格基于以上内容生成题目。`
|
|
: isJa
|
|
? `【ナレッジベース内容 - 以下は出題の唯一の根拠です】\n\n--- ナレッジベース開始 ---\n${knowledgeBaseContent}\n--- ナレッジベース終了 ---\n\n上記の内容のみに基づいて問題を作成してください。`
|
|
: `【Knowledge Base Content - Your ONLY source for questions】\n\n--- KB START ---\n${knowledgeBaseContent}\n--- KB END ---\n\nGenerate questions strictly from the above content only.`;
|
|
|
|
try {
|
|
const response = await model.invoke([
|
|
new SystemMessage(systemPrompt),
|
|
new HumanMessage(humanMsg),
|
|
]);
|
|
|
|
try {
|
|
let newQuestions = safeParseJson<any>(response.content as string);
|
|
|
|
if (!newQuestions) {
|
|
console.error('[GeneratorNode] Failed to parse JSON. Raw content:', response.content);
|
|
throw new Error('Invalid JSON format from AI');
|
|
}
|
|
|
|
// Handle both array and single object
|
|
if (!Array.isArray(newQuestions)) {
|
|
newQuestions = [newQuestions];
|
|
}
|
|
|
|
const dimensionMap: Record<string, string> = {
|
|
// 中文
|
|
'技术能力-提示词': 'prompt',
|
|
'提示词': 'prompt',
|
|
'技术能力-LLM': 'llm',
|
|
'LLM': 'llm',
|
|
'IDE协作能力': 'ide',
|
|
'IDE': 'ide',
|
|
'AI开发范式': 'devPattern',
|
|
'开发范式': 'devPattern',
|
|
'工作能力-安全': 'workCapability',
|
|
'工作能力': 'workCapability',
|
|
// 英文直接映射
|
|
'prompt': 'prompt',
|
|
'llm': 'llm',
|
|
'ide': 'ide',
|
|
'devPattern': 'devPattern',
|
|
'workCapability': 'workCapability',
|
|
};
|
|
|
|
const mappedNewQuestions = newQuestions.map((q: any) => {
|
|
let inferredDimension = 'workCapability';
|
|
const dimValue = q.dimension?.toString().toLowerCase().trim();
|
|
if (dimValue) {
|
|
inferredDimension = dimensionMap[dimValue] || 'workCapability';
|
|
console.log('[GeneratorNode] Dimension mapping:', { original: q.dimension, mapped: inferredDimension });
|
|
}
|
|
return {
|
|
id: (existingQuestions.length + 1).toString(),
|
|
questionText: q.question_text,
|
|
keyPoints: q.key_points,
|
|
difficulty: q.difficulty,
|
|
basis: q.basis,
|
|
dimension: inferredDimension,
|
|
};
|
|
});
|
|
|
|
const questionsToGenerate = Math.max(1, limitCount - existingQuestions.length);
|
|
const limitedNewQuestions = mappedNewQuestions.slice(0, questionsToGenerate);
|
|
|
|
console.log('[GeneratorNode] Generated questions:', mappedNewQuestions.length, 'Limit:', questionsToGenerate);
|
|
|
|
return {
|
|
questions: [...existingQuestions, ...limitedNewQuestions],
|
|
};
|
|
} catch (error) {
|
|
console.error('[GeneratorNode] Parse error:', error);
|
|
return { questions: existingQuestions };
|
|
}
|
|
} catch (invokeError) {
|
|
console.error('[GeneratorNode] Invoke error:', invokeError);
|
|
throw invokeError;
|
|
}
|
|
};
|