7fd2a4cda2
- Option display: use slice(1) instead of regex to strip letter prefix - Grader prompts: add explicit partial credit guidance (5-7 for partial, 0-2 only for off-target)
320 lines
12 KiB
TypeScript
320 lines
12 KiB
TypeScript
import { ChatOpenAI } from '@langchain/openai';
|
||
import {
|
||
SystemMessage,
|
||
HumanMessage,
|
||
AIMessage,
|
||
} from '@langchain/core/messages';
|
||
import { RunnableConfig } from '@langchain/core/runnables';
|
||
import { EvaluationState } from '../state';
|
||
import { safeParseJson } from '../../../common/json-utils';
|
||
|
||
/**
|
||
* Node responsible for grading the user's answer and deciding if a follow-up is needed.
|
||
*/
|
||
export const graderNode = async (
|
||
state: EvaluationState,
|
||
config?: RunnableConfig,
|
||
): Promise<Partial<EvaluationState>> => {
|
||
const { model } = (config?.configurable as any) || {};
|
||
const { questions, currentQuestionIndex, messages } = state;
|
||
const currentFollowUpCount = state.followUpCount || 0;
|
||
|
||
console.log('[GraderNode] Entering node...', {
|
||
currentIndex: currentQuestionIndex,
|
||
numMessages: messages?.length,
|
||
questionCount: state.questionCount,
|
||
hasQuestions: !!questions?.length,
|
||
});
|
||
|
||
if (!model) {
|
||
throw new Error('Missing model in node configuration');
|
||
}
|
||
|
||
const lastUserMessage = messages[messages.length - 1];
|
||
|
||
console.log('[GraderNode] Incoming Messages Count:', messages.length);
|
||
if (lastUserMessage) {
|
||
console.log(
|
||
'[GraderNode] Last Message Type:',
|
||
lastUserMessage.constructor.name,
|
||
);
|
||
// Safely extract content for logging
|
||
const logContent =
|
||
typeof lastUserMessage.content === 'string'
|
||
? lastUserMessage.content
|
||
: JSON.stringify(lastUserMessage.content);
|
||
console.log(
|
||
'[GraderNode] Last Message Content:',
|
||
logContent.substring(0, 50),
|
||
);
|
||
}
|
||
|
||
if (!(lastUserMessage instanceof HumanMessage)) {
|
||
console.log(
|
||
'[GraderNode] Last message is not HumanMessage, skipping grading.',
|
||
);
|
||
return {};
|
||
}
|
||
|
||
const isZh = state.language === 'zh';
|
||
const isJa = state.language === 'ja';
|
||
|
||
const currentQuestion = questions[currentQuestionIndex];
|
||
if (!currentQuestion) {
|
||
console.error(
|
||
`[GraderNode] Question at index ${currentQuestionIndex} not found!`,
|
||
);
|
||
return { currentQuestionIndex: currentQuestionIndex + 1 };
|
||
}
|
||
|
||
const isChoice = currentQuestion.questionType === 'MULTIPLE_CHOICE';
|
||
const expectedAnswer = currentQuestion.correctAnswer;
|
||
const answerKey = (state.questionAnswerKey as any)?.[currentQuestion.id];
|
||
|
||
if (isChoice && expectedAnswer) {
|
||
const userAnswer = (lastUserMessage.content as string).trim();
|
||
const isCorrect = userAnswer.toUpperCase() === expectedAnswer?.toUpperCase();
|
||
|
||
console.log('[GraderNode] Choice grading:', { userAnswer, expectedAnswer, isCorrect });
|
||
|
||
const feedback = isCorrect ? '✅ 正确' : `❌ 错误,正确答案是 ${expectedAnswer}`;
|
||
const feedbackMessage = new AIMessage(
|
||
{ content: `Score: ${isCorrect ? 10 : 0}\nFeedback: ${feedback}` } as any,
|
||
);
|
||
|
||
return {
|
||
messages: [feedbackMessage],
|
||
feedbackHistory: [feedbackMessage],
|
||
scores: { [currentQuestion.id || currentQuestionIndex.toString()]: isCorrect ? 10 : 0 },
|
||
shouldFollowUp: false,
|
||
followUpCount: 0,
|
||
currentQuestionIndex: currentQuestionIndex + 1,
|
||
};
|
||
}
|
||
|
||
const systemPromptZh = `你是一位专业的考官。
|
||
请根据以下问题和关键点对用户的回答进行评分。
|
||
|
||
重要提示:
|
||
1. **你必须使用以下语言提供反馈:中文 (Simplified Chinese)**。
|
||
2. 即使用户的回答或知识库内容涉及其他语言,请确保你的反馈和解释依然严格使用中文。不要夹杂日文。
|
||
|
||
问题:${currentQuestion.questionText}
|
||
预期的关键点:${currentQuestion.keyPoints.join(', ')}
|
||
|
||
评估标准:
|
||
1. 准确性:他们是否正确覆盖了关键点?
|
||
2. 完整性:他们是否遗漏了任何重要内容?
|
||
3. 深度:解释是否充分?
|
||
|
||
**重要:评分请给部分分数。不完全正确不等于0分——回答方向对、意思接近但不够完整时请给5-7分。完全不沾边才给0-2分。**
|
||
|
||
请提供:
|
||
1. 0 到 10 的评分。
|
||
2. 建设性的反馈。
|
||
3. 如果回答不完整或不清晰,需要进一步解释,请将 'should_follow_up' 标志设为 true。
|
||
|
||
请以 JSON 格式返回响应:
|
||
{
|
||
"score": 8,
|
||
"feedback": "...",
|
||
"should_follow_up": false
|
||
}`;
|
||
|
||
const systemPromptJa = `あなたは専門的な試験官です。
|
||
以下の質問とキーポイントに基づいて、ユーザーの回答を採点してください。
|
||
|
||
重要事項:
|
||
1. **フィードバックは必ず次の言語で提供してください:日本語**。
|
||
2. ユーザーの回答やナレッジベースの内容に他の言語(中国語や英語など)が含まれている場合でも、フィードバックと説明は必ず日本語のみで行ってください。中国語が混ざらないよう厳格に注意してください。
|
||
|
||
質問:${currentQuestion.questionText}
|
||
期待されるキーポイント:${currentQuestion.keyPoints.join(', ')}
|
||
|
||
評価基準:
|
||
1. 正確性:キーポイントを正確に網羅していますか?
|
||
2. 網羅性:重要な内容が欠落していませんか?
|
||
3. 深さ:説明は十分ですか?
|
||
|
||
**重要:点数は部分点をつけてください。完全に正解でなくても0点ではありません——方向性が合っていて、部分的に正しい場合は5〜7点を与えてください。全く見当違いの場合のみ0〜2点としてください。**
|
||
|
||
以下を提供してください:
|
||
1. 0 から 10 までのスコア。
|
||
2. 建設的なフィードバック。
|
||
3. 回答が不完全または不明確で、さらなる説明が必要な場合は、'should_follow_up' フラグを true に設定してください。
|
||
|
||
JSON 形式で回答してください:
|
||
{
|
||
"score": 8,
|
||
"feedback": "...",
|
||
"should_follow_up": false
|
||
}`;
|
||
|
||
const systemPromptEn = `You are an expert examiner.
|
||
Grade the user's answer based on the following question and key points.
|
||
|
||
IMPORTANT:
|
||
1. **You MUST provide the feedback in English.**
|
||
2. If the user's answer or knowledge base content references other languages, ensure your feedback and explanation remain strictly in English.
|
||
|
||
QUESTION: ${currentQuestion.questionText}
|
||
EXPECTED KEY POINTS: ${currentQuestion.keyPoints.join(', ')}
|
||
|
||
Evaluate:
|
||
1. Accuracy: Did they cover the key points correctly?
|
||
2. Completeness: Did they miss anything important?
|
||
3. Depth: Is the explanation sufficient?
|
||
|
||
**Important: Give partial credit. Incomplete answers are not 0 — if the direction is right and partially correct, give 5-7. Only give 0-2 for completely off-target answers.**
|
||
|
||
Provide:
|
||
1. A score from 0 to 10.
|
||
2. Constructive feedback.
|
||
3. A boolean flag 'should_follow_up' if the answer is incomplete or unclear and needs further clarification.
|
||
|
||
Format your response as JSON:
|
||
{
|
||
"score": 8,
|
||
"feedback": "...",
|
||
"should_follow_up": false
|
||
}`;
|
||
|
||
let systemPrompt = isZh
|
||
? systemPromptZh
|
||
: isJa
|
||
? systemPromptJa
|
||
: systemPromptEn;
|
||
|
||
if (currentQuestion.judgment) {
|
||
const anchorText = isZh
|
||
? `\n\n【判定依据(通过标准)】${currentQuestion.judgment}`
|
||
: isJa
|
||
? `\n\n【判定基準(合格基準)】${currentQuestion.judgment}`
|
||
: `\n\n【Judgment Criteria (Pass Standard)】${currentQuestion.judgment}`;
|
||
systemPrompt += anchorText;
|
||
}
|
||
|
||
const followupHints: string[] = answerKey?.followupHints || [];
|
||
const maxFollowUps = followupHints.length > 0 ? followupHints.length : 2;
|
||
|
||
const userContentText =
|
||
typeof lastUserMessage.content === 'string'
|
||
? lastUserMessage.content
|
||
: JSON.stringify(lastUserMessage.content);
|
||
|
||
console.log('[GraderNode] === START GRADING ===');
|
||
console.log('[GraderNode] User answer length:', userContentText.length);
|
||
console.log('[GraderNode] Question:', currentQuestion?.questionText?.substring(0, 100));
|
||
console.log('[GraderNode] Target dimension:', currentQuestion?.dimension);
|
||
|
||
try {
|
||
const response = await model.invoke([
|
||
new SystemMessage(systemPrompt),
|
||
new HumanMessage(userContentText),
|
||
]);
|
||
|
||
console.log('[GraderNode] LLM invoke completed');
|
||
try {
|
||
const rawContent = response.content as string;
|
||
console.log('[GraderNode] Raw AI response length:', rawContent.length);
|
||
console.log('[GraderNode] Raw AI response:', rawContent.substring(0, 800));
|
||
|
||
const result = safeParseJson<any>(rawContent);
|
||
if (!result) {
|
||
console.error('[GraderNode] Failed to parse JSON. Raw content:', rawContent);
|
||
throw new Error('Invalid JSON format from AI');
|
||
}
|
||
console.log('[GraderNode] === GRADING RESULT ===');
|
||
console.log('[GraderNode] Parsed result:', JSON.stringify(result, null, 2));
|
||
console.log('[GraderNode] Score value:', result.score);
|
||
console.log('[GraderNode] Feedback value:', result.feedback?.substring(0, 200));
|
||
|
||
const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score';
|
||
const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback';
|
||
let enhancedFeedback: string = result.feedback;
|
||
|
||
const newScores = {
|
||
...state.scores,
|
||
[currentQuestion.id || currentQuestionIndex.toString()]: result.score,
|
||
};
|
||
|
||
let shouldFollowUp = result.should_follow_up === true;
|
||
|
||
const normalizedContent = userContentText.trim().toLowerCase();
|
||
const saysIDontKnow =
|
||
normalizedContent.length < 10 &&
|
||
(normalizedContent.includes('不知道') ||
|
||
normalizedContent.includes('不会') ||
|
||
normalizedContent.includes("don't know") ||
|
||
normalizedContent.includes('no idea') ||
|
||
normalizedContent.includes('不知') ||
|
||
normalizedContent.includes('わかりません') ||
|
||
normalizedContent.includes('わからん') ||
|
||
normalizedContent.includes('知らない') ||
|
||
normalizedContent.includes('不明') ||
|
||
normalizedContent.includes('わからない'));
|
||
|
||
if (currentFollowUpCount >= maxFollowUps || result.score >= 8 || saysIDontKnow) {
|
||
shouldFollowUp = false;
|
||
}
|
||
|
||
let followupHintMsg: AIMessage | null = null;
|
||
if (shouldFollowUp && followupHints.length > 0) {
|
||
let hint = followupHints[Math.min(currentFollowUpCount, followupHints.length - 1)];
|
||
hint = hint.replace(/^如果.+?追问[::]\s*/i, '').replace(/^[""「『]|[""」』]$/g, '');
|
||
followupHintMsg = new AIMessage(hint);
|
||
}
|
||
|
||
const feedbackMessage = new AIMessage(
|
||
`${scoreLabel}: ${result.score}/10\n\n${feedbackLabel}: ${enhancedFeedback}`,
|
||
);
|
||
|
||
console.log('[GraderNode] Final State decision:', {
|
||
shouldFollowUp,
|
||
nextIndex: shouldFollowUp
|
||
? currentQuestionIndex
|
||
: currentQuestionIndex + 1,
|
||
score: result.score,
|
||
saysIDontKnow,
|
||
});
|
||
|
||
const feedbackHistoryMessages = followupHintMsg
|
||
? [feedbackMessage, followupHintMsg]
|
||
: [feedbackMessage];
|
||
|
||
return {
|
||
feedbackHistory: feedbackHistoryMessages,
|
||
scores: newScores,
|
||
shouldFollowUp: shouldFollowUp,
|
||
followUpCount: shouldFollowUp ? currentFollowUpCount + 1 : 0,
|
||
currentQuestionIndex: shouldFollowUp
|
||
? currentQuestionIndex
|
||
: currentQuestionIndex + 1,
|
||
} as any;
|
||
} catch (parseError) {
|
||
console.error('[GraderNode] Failed to parse grade:', parseError);
|
||
const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score';
|
||
const fallbackMsg = new AIMessage(`${scoreLabel}: 5/10\n\n评分解析失败,默认给5分。`);
|
||
return {
|
||
feedbackHistory: [fallbackMsg],
|
||
scores: { [currentQuestion.id || currentQuestionIndex.toString()]: 5 },
|
||
shouldFollowUp: false,
|
||
followUpCount: 0,
|
||
currentQuestionIndex: currentQuestionIndex + 1,
|
||
} as any;
|
||
}
|
||
} catch (error) {
|
||
console.error('[GraderNode] LLM grading failed:', error);
|
||
const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score';
|
||
const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback';
|
||
const fallbackMsg = new AIMessage(`${scoreLabel}: 5/10\n\n${feedbackLabel}: 评分服务暂时不可用,默认给5分。`);
|
||
return {
|
||
feedbackHistory: [fallbackMsg],
|
||
scores: { [currentQuestion.id || currentQuestionIndex.toString()]: 5 },
|
||
shouldFollowUp: false,
|
||
followUpCount: 0,
|
||
currentQuestionIndex: currentQuestionIndex + 1,
|
||
} as any;
|
||
}
|
||
};
|