import { ChatOpenAI } from '@langchain/openai'; import { SystemMessage, HumanMessage, AIMessage, } from '@langchain/core/messages'; import { RunnableConfig } from '@langchain/core/runnables'; import { EvaluationState } from '../state'; import { safeParseJson } from '../../../common/json-utils'; /** * Node responsible for grading the user's answer and deciding if a follow-up is needed. */ export const graderNode = async ( state: EvaluationState, config?: RunnableConfig, ): Promise> => { const { model } = (config?.configurable as any) || {}; const { questions, currentQuestionIndex, messages } = state; const currentFollowUpCount = state.followUpCount || 0; console.log('[GraderNode] Entering node...', { currentIndex: currentQuestionIndex, numMessages: messages?.length, questionCount: state.questionCount, hasQuestions: !!questions?.length, }); if (!model) { throw new Error('Missing model in node configuration'); } const lastUserMessage = messages[messages.length - 1]; console.log('[GraderNode] Incoming Messages Count:', messages.length); if (lastUserMessage) { console.log( '[GraderNode] Last Message Type:', lastUserMessage.constructor.name, ); // Safely extract content for logging const logContent = typeof lastUserMessage.content === 'string' ? lastUserMessage.content : JSON.stringify(lastUserMessage.content); console.log( '[GraderNode] Last Message Content:', logContent.substring(0, 50), ); } if (!(lastUserMessage instanceof HumanMessage)) { console.log( '[GraderNode] Last message is not HumanMessage, skipping grading.', ); return {}; } const isZh = state.language === 'zh'; const isJa = state.language === 'ja'; const currentQuestion = questions[currentQuestionIndex]; if (!currentQuestion) { console.error( `[GraderNode] Question at index ${currentQuestionIndex} not found!`, ); return { currentQuestionIndex: currentQuestionIndex + 1 }; } const isChoice = currentQuestion.questionType === 'MULTIPLE_CHOICE'; const expectedAnswer = currentQuestion.correctAnswer; const answerKey = (state.questionAnswerKey as any)?.[currentQuestion.id]; if (isChoice && expectedAnswer) { const userAnswer = (lastUserMessage.content as string).trim(); const isCorrect = userAnswer.toUpperCase() === expectedAnswer?.toUpperCase(); console.log('[GraderNode] Choice grading:', { userAnswer, expectedAnswer, isCorrect }); const feedback = isCorrect ? '✅ 正确' : `❌ 错误,正确答案是 ${expectedAnswer}`; const feedbackMessage = new AIMessage( { content: `Score: ${isCorrect ? 10 : 0}\nFeedback: ${feedback}` } as any, ); return { messages: [feedbackMessage], feedbackHistory: [feedbackMessage], scores: { [currentQuestion.id || currentQuestionIndex.toString()]: isCorrect ? 10 : 0 }, shouldFollowUp: false, followUpCount: 0, currentQuestionIndex: currentQuestionIndex + 1, }; } const systemPromptZh = `你是一位专业的考官。 请根据以下问题和关键点对用户的回答进行评分。 重要提示: 1. **你必须使用以下语言提供反馈:中文 (Simplified Chinese)**。 2. 即使用户的回答或知识库内容涉及其他语言,请确保你的反馈和解释依然严格使用中文。不要夹杂日文。 问题:${currentQuestion.questionText} 预期的关键点:${currentQuestion.keyPoints.join(', ')} 评估标准: 1. 准确性:他们是否正确覆盖了关键点? 2. 完整性:他们是否遗漏了任何重要内容? 3. 深度:解释是否充分? **重要:评分请给部分分数。不完全正确不等于0分——回答方向对、意思接近但不够完整时请给5-7分。完全不沾边才给0-2分。** 请提供: 1. 0 到 10 的评分。 2. 建设性的反馈。 3. 如果回答不完整或不清晰,需要进一步解释,请将 'should_follow_up' 标志设为 true。 请以 JSON 格式返回响应: { "score": 8, "feedback": "...", "should_follow_up": false }`; const systemPromptJa = `あなたは専門的な試験官です。 以下の質問とキーポイントに基づいて、ユーザーの回答を採点してください。 重要事項: 1. **フィードバックは必ず次の言語で提供してください:日本語**。 2. ユーザーの回答やナレッジベースの内容に他の言語(中国語や英語など)が含まれている場合でも、フィードバックと説明は必ず日本語のみで行ってください。中国語が混ざらないよう厳格に注意してください。 質問:${currentQuestion.questionText} 期待されるキーポイント:${currentQuestion.keyPoints.join(', ')} 評価基準: 1. 正確性:キーポイントを正確に網羅していますか? 2. 網羅性:重要な内容が欠落していませんか? 3. 深さ:説明は十分ですか? **重要:点数は部分点をつけてください。完全に正解でなくても0点ではありません——方向性が合っていて、部分的に正しい場合は5〜7点を与えてください。全く見当違いの場合のみ0〜2点としてください。** 以下を提供してください: 1. 0 から 10 までのスコア。 2. 建設的なフィードバック。 3. 回答が不完全または不明確で、さらなる説明が必要な場合は、'should_follow_up' フラグを true に設定してください。 JSON 形式で回答してください: { "score": 8, "feedback": "...", "should_follow_up": false }`; const systemPromptEn = `You are an expert examiner. Grade the user's answer based on the following question and key points. IMPORTANT: 1. **You MUST provide the feedback in English.** 2. If the user's answer or knowledge base content references other languages, ensure your feedback and explanation remain strictly in English. QUESTION: ${currentQuestion.questionText} EXPECTED KEY POINTS: ${currentQuestion.keyPoints.join(', ')} Evaluate: 1. Accuracy: Did they cover the key points correctly? 2. Completeness: Did they miss anything important? 3. Depth: Is the explanation sufficient? **Important: Give partial credit. Incomplete answers are not 0 — if the direction is right and partially correct, give 5-7. Only give 0-2 for completely off-target answers.** Provide: 1. A score from 0 to 10. 2. Constructive feedback. 3. A boolean flag 'should_follow_up' if the answer is incomplete or unclear and needs further clarification. Format your response as JSON: { "score": 8, "feedback": "...", "should_follow_up": false }`; let systemPrompt = isZh ? systemPromptZh : isJa ? systemPromptJa : systemPromptEn; if (currentQuestion.judgment) { const anchorText = isZh ? `\n\n【判定依据(通过标准)】${currentQuestion.judgment}` : isJa ? `\n\n【判定基準(合格基準)】${currentQuestion.judgment}` : `\n\n【Judgment Criteria (Pass Standard)】${currentQuestion.judgment}`; systemPrompt += anchorText; } const followupHints: string[] = answerKey?.followupHints || []; const maxFollowUps = followupHints.length > 0 ? followupHints.length : 2; const userContentText = typeof lastUserMessage.content === 'string' ? lastUserMessage.content : JSON.stringify(lastUserMessage.content); console.log('[GraderNode] === START GRADING ==='); console.log('[GraderNode] User answer length:', userContentText.length); console.log('[GraderNode] Question:', currentQuestion?.questionText?.substring(0, 100)); console.log('[GraderNode] Target dimension:', currentQuestion?.dimension); try { const response = await model.invoke([ new SystemMessage(systemPrompt), new HumanMessage(userContentText), ]); console.log('[GraderNode] LLM invoke completed'); try { const rawContent = response.content as string; console.log('[GraderNode] Raw AI response length:', rawContent.length); console.log('[GraderNode] Raw AI response:', rawContent.substring(0, 800)); const result = safeParseJson(rawContent); if (!result) { console.error('[GraderNode] Failed to parse JSON. Raw content:', rawContent); throw new Error('Invalid JSON format from AI'); } console.log('[GraderNode] === GRADING RESULT ==='); console.log('[GraderNode] Parsed result:', JSON.stringify(result, null, 2)); console.log('[GraderNode] Score value:', result.score); console.log('[GraderNode] Feedback value:', result.feedback?.substring(0, 200)); const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score'; const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback'; let enhancedFeedback: string = result.feedback; const newScores = { ...state.scores, [currentQuestion.id || currentQuestionIndex.toString()]: result.score, }; let shouldFollowUp = result.should_follow_up === true; const normalizedContent = userContentText.trim().toLowerCase(); const saysIDontKnow = normalizedContent.length < 10 && (normalizedContent.includes('不知道') || normalizedContent.includes('不会') || normalizedContent.includes("don't know") || normalizedContent.includes('no idea') || normalizedContent.includes('不知') || normalizedContent.includes('わかりません') || normalizedContent.includes('わからん') || normalizedContent.includes('知らない') || normalizedContent.includes('不明') || normalizedContent.includes('わからない')); if (currentFollowUpCount >= maxFollowUps || result.score >= 8 || saysIDontKnow) { shouldFollowUp = false; } let followupHintMsg: AIMessage | null = null; if (shouldFollowUp && followupHints.length > 0) { let hint = followupHints[Math.min(currentFollowUpCount, followupHints.length - 1)]; hint = hint.replace(/^如果.+?追问[::]\s*/i, '').replace(/^[""「『]|[""」』]$/g, ''); followupHintMsg = new AIMessage(hint); } const feedbackMessage = new AIMessage( `${scoreLabel}: ${result.score}/10\n\n${feedbackLabel}: ${enhancedFeedback}`, ); console.log('[GraderNode] Final State decision:', { shouldFollowUp, nextIndex: shouldFollowUp ? currentQuestionIndex : currentQuestionIndex + 1, score: result.score, saysIDontKnow, }); const feedbackHistoryMessages = followupHintMsg ? [feedbackMessage, followupHintMsg] : [feedbackMessage]; return { feedbackHistory: feedbackHistoryMessages, scores: newScores, shouldFollowUp: shouldFollowUp, followUpCount: shouldFollowUp ? currentFollowUpCount + 1 : 0, currentQuestionIndex: shouldFollowUp ? currentQuestionIndex : currentQuestionIndex + 1, } as any; } catch (parseError) { console.error('[GraderNode] Failed to parse grade:', parseError); const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score'; const fallbackMsg = new AIMessage(`${scoreLabel}: 5/10\n\n评分解析失败,默认给5分。`); return { feedbackHistory: [fallbackMsg], scores: { [currentQuestion.id || currentQuestionIndex.toString()]: 5 }, shouldFollowUp: false, followUpCount: 0, currentQuestionIndex: currentQuestionIndex + 1, } as any; } } catch (error) { console.error('[GraderNode] LLM grading failed:', error); const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score'; const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback'; const fallbackMsg = new AIMessage(`${scoreLabel}: 5/10\n\n${feedbackLabel}: 评分服务暂时不可用,默认给5分。`); return { feedbackHistory: [fallbackMsg], scores: { [currentQuestion.id || currentQuestionIndex.toString()]: 5 }, shouldFollowUp: false, followUpCount: 0, currentQuestionIndex: currentQuestionIndex + 1, } as any; } };