Files
aurak/server/src/assessment/graph/nodes/grader.node.ts
T
Developer 7fd2a4cda2 fix: option display + partial credit grading
- Option display: use slice(1) instead of regex to strip letter prefix
- Grader prompts: add explicit partial credit guidance (5-7 for partial, 0-2 only for off-target)
2026-05-21 13:13:21 +08:00

320 lines
12 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { ChatOpenAI } from '@langchain/openai';
import {
SystemMessage,
HumanMessage,
AIMessage,
} from '@langchain/core/messages';
import { RunnableConfig } from '@langchain/core/runnables';
import { EvaluationState } from '../state';
import { safeParseJson } from '../../../common/json-utils';
/**
* Node responsible for grading the user's answer and deciding if a follow-up is needed.
*/
export const graderNode = async (
state: EvaluationState,
config?: RunnableConfig,
): Promise<Partial<EvaluationState>> => {
const { model } = (config?.configurable as any) || {};
const { questions, currentQuestionIndex, messages } = state;
const currentFollowUpCount = state.followUpCount || 0;
console.log('[GraderNode] Entering node...', {
currentIndex: currentQuestionIndex,
numMessages: messages?.length,
questionCount: state.questionCount,
hasQuestions: !!questions?.length,
});
if (!model) {
throw new Error('Missing model in node configuration');
}
const lastUserMessage = messages[messages.length - 1];
console.log('[GraderNode] Incoming Messages Count:', messages.length);
if (lastUserMessage) {
console.log(
'[GraderNode] Last Message Type:',
lastUserMessage.constructor.name,
);
// Safely extract content for logging
const logContent =
typeof lastUserMessage.content === 'string'
? lastUserMessage.content
: JSON.stringify(lastUserMessage.content);
console.log(
'[GraderNode] Last Message Content:',
logContent.substring(0, 50),
);
}
if (!(lastUserMessage instanceof HumanMessage)) {
console.log(
'[GraderNode] Last message is not HumanMessage, skipping grading.',
);
return {};
}
const isZh = state.language === 'zh';
const isJa = state.language === 'ja';
const currentQuestion = questions[currentQuestionIndex];
if (!currentQuestion) {
console.error(
`[GraderNode] Question at index ${currentQuestionIndex} not found!`,
);
return { currentQuestionIndex: currentQuestionIndex + 1 };
}
const isChoice = currentQuestion.questionType === 'MULTIPLE_CHOICE';
const expectedAnswer = currentQuestion.correctAnswer;
const answerKey = (state.questionAnswerKey as any)?.[currentQuestion.id];
if (isChoice && expectedAnswer) {
const userAnswer = (lastUserMessage.content as string).trim();
const isCorrect = userAnswer.toUpperCase() === expectedAnswer?.toUpperCase();
console.log('[GraderNode] Choice grading:', { userAnswer, expectedAnswer, isCorrect });
const feedback = isCorrect ? '✅ 正确' : `❌ 错误,正确答案是 ${expectedAnswer}`;
const feedbackMessage = new AIMessage(
{ content: `Score: ${isCorrect ? 10 : 0}\nFeedback: ${feedback}` } as any,
);
return {
messages: [feedbackMessage],
feedbackHistory: [feedbackMessage],
scores: { [currentQuestion.id || currentQuestionIndex.toString()]: isCorrect ? 10 : 0 },
shouldFollowUp: false,
followUpCount: 0,
currentQuestionIndex: currentQuestionIndex + 1,
};
}
const systemPromptZh = `你是一位专业的考官。
请根据以下问题和关键点对用户的回答进行评分。
重要提示:
1. **你必须使用以下语言提供反馈:中文 (Simplified Chinese)**。
2. 即使用户的回答或知识库内容涉及其他语言,请确保你的反馈和解释依然严格使用中文。不要夹杂日文。
问题:${currentQuestion.questionText}
预期的关键点:${currentQuestion.keyPoints.join(', ')}
评估标准:
1. 准确性:他们是否正确覆盖了关键点?
2. 完整性:他们是否遗漏了任何重要内容?
3. 深度:解释是否充分?
**重要:评分请给部分分数。不完全正确不等于0分——回答方向对、意思接近但不够完整时请给5-7分。完全不沾边才给0-2分。**
请提供:
1. 0 到 10 的评分。
2. 建设性的反馈。
3. 如果回答不完整或不清晰,需要进一步解释,请将 'should_follow_up' 标志设为 true。
请以 JSON 格式返回响应:
{
"score": 8,
"feedback": "...",
"should_follow_up": false
}`;
const systemPromptJa = `あなたは専門的な試験官です。
以下の質問とキーポイントに基づいて、ユーザーの回答を採点してください。
重要事項:
1. **フィードバックは必ず次の言語で提供してください:日本語**。
2. ユーザーの回答やナレッジベースの内容に他の言語(中国語や英語など)が含まれている場合でも、フィードバックと説明は必ず日本語のみで行ってください。中国語が混ざらないよう厳格に注意してください。
質問:${currentQuestion.questionText}
期待されるキーポイント:${currentQuestion.keyPoints.join(', ')}
評価基準:
1. 正確性:キーポイントを正確に網羅していますか?
2. 網羅性:重要な内容が欠落していませんか?
3. 深さ:説明は十分ですか?
**重要:点数は部分点をつけてください。完全に正解でなくても0点ではありません——方向性が合っていて、部分的に正しい場合は5〜7点を与えてください。全く見当違いの場合のみ0〜2点としてください。**
以下を提供してください:
1. 0 から 10 までのスコア。
2. 建設的なフィードバック。
3. 回答が不完全または不明確で、さらなる説明が必要な場合は、'should_follow_up' フラグを true に設定してください。
JSON 形式で回答してください:
{
"score": 8,
"feedback": "...",
"should_follow_up": false
}`;
const systemPromptEn = `You are an expert examiner.
Grade the user's answer based on the following question and key points.
IMPORTANT:
1. **You MUST provide the feedback in English.**
2. If the user's answer or knowledge base content references other languages, ensure your feedback and explanation remain strictly in English.
QUESTION: ${currentQuestion.questionText}
EXPECTED KEY POINTS: ${currentQuestion.keyPoints.join(', ')}
Evaluate:
1. Accuracy: Did they cover the key points correctly?
2. Completeness: Did they miss anything important?
3. Depth: Is the explanation sufficient?
**Important: Give partial credit. Incomplete answers are not 0 — if the direction is right and partially correct, give 5-7. Only give 0-2 for completely off-target answers.**
Provide:
1. A score from 0 to 10.
2. Constructive feedback.
3. A boolean flag 'should_follow_up' if the answer is incomplete or unclear and needs further clarification.
Format your response as JSON:
{
"score": 8,
"feedback": "...",
"should_follow_up": false
}`;
let systemPrompt = isZh
? systemPromptZh
: isJa
? systemPromptJa
: systemPromptEn;
if (currentQuestion.judgment) {
const anchorText = isZh
? `\n\n【判定依据(通过标准)】${currentQuestion.judgment}`
: isJa
? `\n\n【判定基準(合格基準)】${currentQuestion.judgment}`
: `\n\n【Judgment Criteria (Pass Standard)】${currentQuestion.judgment}`;
systemPrompt += anchorText;
}
const followupHints: string[] = answerKey?.followupHints || [];
const maxFollowUps = followupHints.length > 0 ? followupHints.length : 2;
const userContentText =
typeof lastUserMessage.content === 'string'
? lastUserMessage.content
: JSON.stringify(lastUserMessage.content);
console.log('[GraderNode] === START GRADING ===');
console.log('[GraderNode] User answer length:', userContentText.length);
console.log('[GraderNode] Question:', currentQuestion?.questionText?.substring(0, 100));
console.log('[GraderNode] Target dimension:', currentQuestion?.dimension);
try {
const response = await model.invoke([
new SystemMessage(systemPrompt),
new HumanMessage(userContentText),
]);
console.log('[GraderNode] LLM invoke completed');
try {
const rawContent = response.content as string;
console.log('[GraderNode] Raw AI response length:', rawContent.length);
console.log('[GraderNode] Raw AI response:', rawContent.substring(0, 800));
const result = safeParseJson<any>(rawContent);
if (!result) {
console.error('[GraderNode] Failed to parse JSON. Raw content:', rawContent);
throw new Error('Invalid JSON format from AI');
}
console.log('[GraderNode] === GRADING RESULT ===');
console.log('[GraderNode] Parsed result:', JSON.stringify(result, null, 2));
console.log('[GraderNode] Score value:', result.score);
console.log('[GraderNode] Feedback value:', result.feedback?.substring(0, 200));
const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score';
const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback';
let enhancedFeedback: string = result.feedback;
const newScores = {
...state.scores,
[currentQuestion.id || currentQuestionIndex.toString()]: result.score,
};
let shouldFollowUp = result.should_follow_up === true;
const normalizedContent = userContentText.trim().toLowerCase();
const saysIDontKnow =
normalizedContent.length < 10 &&
(normalizedContent.includes('不知道') ||
normalizedContent.includes('不会') ||
normalizedContent.includes("don't know") ||
normalizedContent.includes('no idea') ||
normalizedContent.includes('不知') ||
normalizedContent.includes('わかりません') ||
normalizedContent.includes('わからん') ||
normalizedContent.includes('知らない') ||
normalizedContent.includes('不明') ||
normalizedContent.includes('わからない'));
if (currentFollowUpCount >= maxFollowUps || result.score >= 8 || saysIDontKnow) {
shouldFollowUp = false;
}
let followupHintMsg: AIMessage | null = null;
if (shouldFollowUp && followupHints.length > 0) {
let hint = followupHints[Math.min(currentFollowUpCount, followupHints.length - 1)];
hint = hint.replace(/^如果.+?追问[:]\s*/i, '').replace(/^[""「『]|[""」』]$/g, '');
followupHintMsg = new AIMessage(hint);
}
const feedbackMessage = new AIMessage(
`${scoreLabel}: ${result.score}/10\n\n${feedbackLabel}: ${enhancedFeedback}`,
);
console.log('[GraderNode] Final State decision:', {
shouldFollowUp,
nextIndex: shouldFollowUp
? currentQuestionIndex
: currentQuestionIndex + 1,
score: result.score,
saysIDontKnow,
});
const feedbackHistoryMessages = followupHintMsg
? [feedbackMessage, followupHintMsg]
: [feedbackMessage];
return {
feedbackHistory: feedbackHistoryMessages,
scores: newScores,
shouldFollowUp: shouldFollowUp,
followUpCount: shouldFollowUp ? currentFollowUpCount + 1 : 0,
currentQuestionIndex: shouldFollowUp
? currentQuestionIndex
: currentQuestionIndex + 1,
} as any;
} catch (parseError) {
console.error('[GraderNode] Failed to parse grade:', parseError);
const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score';
const fallbackMsg = new AIMessage(`${scoreLabel}: 5/10\n\n评分解析失败,默认给5分。`);
return {
feedbackHistory: [fallbackMsg],
scores: { [currentQuestion.id || currentQuestionIndex.toString()]: 5 },
shouldFollowUp: false,
followUpCount: 0,
currentQuestionIndex: currentQuestionIndex + 1,
} as any;
}
} catch (error) {
console.error('[GraderNode] LLM grading failed:', error);
const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score';
const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback';
const fallbackMsg = new AIMessage(`${scoreLabel}: 5/10\n\n${feedbackLabel}: 评分服务暂时不可用,默认给5分。`);
return {
feedbackHistory: [fallbackMsg],
scores: { [currentQuestion.id || currentQuestionIndex.toString()]: 5 },
shouldFollowUp: false,
followUpCount: 0,
currentQuestionIndex: currentQuestionIndex + 1,
} as any;
}
};