import { ChatOpenAI } from '@langchain/openai'; import { SystemMessage, HumanMessage, AIMessage, } from '@langchain/core/messages'; import { RunnableConfig } from '@langchain/core/runnables'; import { EvaluationState } from '../state'; import { safeParseJson } from '../../../common/json-utils'; /** * Node responsible for grading the user's answer and deciding if a follow-up is needed. */ export const graderNode = async ( state: EvaluationState, config?: RunnableConfig, ): Promise> => { const { model } = (config?.configurable as any) || {}; const { questions, currentQuestionIndex, messages } = state; const currentFollowUpCount = state.followUpCount || 0; console.log('[GraderNode] Entering node...', { currentIndex: currentQuestionIndex, numMessages: messages?.length, questionCount: state.questionCount, hasQuestions: !!questions?.length, }); if (!model) { throw new Error('Missing model in node configuration'); } const lastUserMessage = messages[messages.length - 1]; console.log('[GraderNode] Incoming Messages Count:', messages.length); if (lastUserMessage) { console.log( '[GraderNode] Last Message Type:', lastUserMessage.constructor.name, ); // Safely extract content for logging const logContent = typeof lastUserMessage.content === 'string' ? lastUserMessage.content : JSON.stringify(lastUserMessage.content); console.log( '[GraderNode] Last Message Content:', logContent.substring(0, 50), ); } if (!(lastUserMessage instanceof HumanMessage)) { console.log( '[GraderNode] Last message is not HumanMessage, skipping grading.', ); return {}; } const isZh = state.language === 'zh'; const isJa = state.language === 'ja'; const currentQuestion = questions[currentQuestionIndex]; if (!currentQuestion) { console.error( `[GraderNode] Question at index ${currentQuestionIndex} not found!`, ); return { currentQuestionIndex: currentQuestionIndex + 1 }; } const isChoice = currentQuestion.questionType === 'MULTIPLE_CHOICE'; const expectedAnswer = currentQuestion.correctAnswer; if (isChoice && expectedAnswer) { const userAnswer = (lastUserMessage.content as string).trim(); const isCorrect = userAnswer.toUpperCase() === expectedAnswer?.toUpperCase(); console.log('[GraderNode] Choice grading:', { userAnswer, expectedAnswer, isCorrect }); const feedback = isCorrect ? '✅ 正确' : `❌ 错误,正确答案是 ${expectedAnswer}`; const feedbackMessage = new AIMessage( { content: `Score: ${isCorrect ? 10 : 0}\nFeedback: ${feedback}` } as any, ); return { messages: [feedbackMessage], feedbackHistory: [feedbackMessage], scores: { [currentQuestion.id || currentQuestionIndex.toString()]: isCorrect ? 10 : 0 }, shouldFollowUp: false, followUpCount: 0, currentQuestionIndex: currentQuestionIndex + 1, }; } // ── Rule-based grading: use structured followupMapping if available ── if (currentQuestion.followupHints) { let mapping: any = null; if (typeof currentQuestion.followupHints === 'string') { try { mapping = JSON.parse(currentQuestion.followupHints); } catch {} } else if (typeof currentQuestion.followupHints === 'object') { mapping = currentQuestion.followupHints; } if (mapping && Array.isArray(mapping.branches)) { const userAnswerText = typeof lastUserMessage.content === 'string' ? lastUserMessage.content : JSON.stringify(lastUserMessage.content); // Score based on keyword coverage let bestScore = mapping.defaultScore ?? 5; let matchedFollowup = mapping.defaultFollowup || ''; let matchedAll = true; const maxFollowUps = mapping.maxFollowups ?? 2; for (const branch of mapping.branches) { const kws = branch.keywords || []; const matchCount = kws.filter((kw: string) => userAnswerText.toLowerCase().includes(kw.toLowerCase())).length; if (kws.length > 0 && matchCount >= kws.length * 0.5) { const branchScore = branch.score ?? 7; if (branchScore > bestScore) bestScore = branchScore; if (branch.followup) matchedFollowup = branch.followup; } else if (kws.length > 0 && matchCount === 0) { matchedAll = false; } } const completionThreshold = mapping.completionThreshold ?? 80; const tooShort = userAnswerText.trim().length < 8; const saysIDontKnow = userAnswerText.trim().length < 10 && ( userAnswerText.includes('不知道') || userAnswerText.includes("don't know") || userAnswerText.includes('わかりません') ); let shouldFollowUp: boolean; if (saysIDontKnow || tooShort) { shouldFollowUp = false; bestScore = Math.min(bestScore, 2); } else if (bestScore >= completionThreshold / 10) { shouldFollowUp = false; } else if (currentFollowUpCount >= maxFollowUps) { shouldFollowUp = false; } else { shouldFollowUp = true; } const feedbackMessage = new AIMessage(`Score: ${bestScore}/10\n\nFeedback: ${shouldFollowUp ? matchedFollowup : '回答已覆盖关键点。'}`); const feedbackHistoryMessages = shouldFollowUp && matchedFollowup ? [feedbackMessage, new AIMessage(matchedFollowup)] : [feedbackMessage]; console.log('[GraderNode] Rule grading:', { score: bestScore, shouldFollowUp, matchedAll, followup: matchedFollowup?.substring(0, 60) }); return { feedbackHistory: feedbackHistoryMessages, scores: { [currentQuestion.id || currentQuestionIndex.toString()]: bestScore }, shouldFollowUp, followUpCount: shouldFollowUp ? currentFollowUpCount + 1 : 0, currentQuestionIndex: shouldFollowUp ? currentQuestionIndex : currentQuestionIndex + 1, } as any; } } const systemPromptZh = `你是一位考官。请评分并给出反馈。 规则: 1. 只用中文。 2. 多轮追问时,用户回答含所有轮次(第N轮回答:标记),综合判断已覆盖内容。 问题:${currentQuestion.questionText} 关键点:${currentQuestion.keyPoints.join(', ')} 评分标准:不要求深度,不要求使用特定术语,只看用户是否理解了概念。 用户理解核心概念就给分。即使没有使用关键点中的原词,只要意思到位就算覆盖。 例如关键点是"上下文窗口有限",用户说"信息太多超过AI处理长度"也是覆盖。 评分原则:往宽了给分,不确定时就给高分。明显正确就给8-10分,部分正确5-7分,完全不沾边才0-2分。 返回JSON: - score: 0-10 - feedback: 评语 - should_follow_up: true/false - follow_up_question: 追问(仅true时需要,针对未覆盖的关键点,false时null) 请以 JSON 格式返回响应: {"score":0到10,"feedback":"评语","should_follow_up":true或false,"follow_up_question":"追问或null"} 示例(需要追问): {"score":6,"feedback":"提到了安全性和性能,未说明依赖关系。","should_follow_up":true,"follow_up_question":"你如何让AI在计划中明确任务依赖关系?"} 示例(不需追问): {"score":8,"feedback":"回答完整。","should_follow_up":false,"follow_up_question":null}`; const systemPromptJa = `あなたは試験官です。採点とフィードバックを提供してください。 ルール: 1. 日本語のみ使用。 2. 複数ラウンドの回答は「第N輪回答:」でマークされ、全ラウンドを総合判断。 質問:${currentQuestion.questionText} キーポイント:${currentQuestion.keyPoints.join(', ')} 評価基準:正確性、網羅性、深さ。 部分点可(5〜7点)、見当違いのみ0〜2点。 JSON形式: - score: 0〜10 - feedback: 評価 - should_follow_up: true/false - follow_up_question: 追質問(true時のみ、未カバーのポイントに焦点、false時null) JSON 形式で回答してください: {"score":0から10,"feedback":"評価","should_follow_up":trueかfalse,"follow_up_question":"追質問かnull"} 例(追質問が必要): {"score":6,"feedback":"安全性と性能に言及したが、依存関係が不明。","should_follow_up":true,"follow_up_question":"AIに計画内のタスク依存関係を明示させる方法は?"} 例(不要): {"score":8,"feedback":"回答は完全。","should_follow_up":false,"follow_up_question":null}`; const systemPromptEn = `You are an examiner. Grade and give feedback. Rules: 1. English only. 2. Multi-round answers are tagged "第N轮回答:". Consider all rounds. Question: ${currentQuestion.questionText} Key points: ${currentQuestion.keyPoints.join(', ')} Criteria: accuracy, completeness, depth. Give partial credit (5-7 for partial), 0-2 only for off-target. Return JSON: - score: 0-10 - feedback: text - should_follow_up: true/false - follow_up_question: question (only when true, target uncovered points, null when false) Format as JSON: {"score":0-10,"feedback":"...","should_follow_up":true|false,"follow_up_question":"question or null"} Example (follow-up needed): {"score":6,"feedback":"Covered security and performance, missed dependencies.","should_follow_up":true,"follow_up_question":"How would you make the AI clarify task dependencies?"} Example (no follow-up): {"score":8,"feedback":"Complete answer.","should_follow_up":false,"follow_up_question":null}`; let systemPrompt = isZh ? systemPromptZh : isJa ? systemPromptJa : systemPromptEn; if (currentQuestion.judgment) { const anchorText = isZh ? `\n\n【判定依据(通过标准)】${currentQuestion.judgment}` : isJa ? `\n\n【判定基準(合格基準)】${currentQuestion.judgment}` : `\n\n【Judgment Criteria (Pass Standard)】${currentQuestion.judgment}`; systemPrompt += anchorText; } const maxFollowUps = (currentQuestion as any).maxFollowUps ?? 2; const userContentText = typeof lastUserMessage.content === 'string' ? lastUserMessage.content : JSON.stringify(lastUserMessage.content); let allAnswers = userContentText; if (currentFollowUpCount > 0) { const prevAnswers = state.messages .filter(m => m instanceof HumanMessage) .slice(-(currentFollowUpCount + 1)) .map((m, i) => `第${i + 1}轮回答:${typeof m.content === 'string' ? m.content : JSON.stringify(m.content)}`); allAnswers = prevAnswers.join('\n\n'); } console.log('[GraderNode] === START GRADING ==='); console.log('[GraderNode] User answer length:', userContentText.length); console.log('[GraderNode] Question:', currentQuestion?.questionText?.substring(0, 100)); console.log('[GraderNode] Target dimension:', currentQuestion?.dimension); try { const response = await model.invoke([ new SystemMessage(systemPrompt), new HumanMessage(allAnswers), ]); console.log('[GraderNode] LLM invoke completed'); try { const rawContent = response.content as string; console.log('[GraderNode] Raw AI response length:', rawContent.length); console.log('[GraderNode] Raw AI response:', rawContent.substring(0, 800)); const result = safeParseJson(rawContent); if (!result) { console.error('[GraderNode] Failed to parse JSON. Raw content:', rawContent); throw new Error('Invalid JSON format from AI'); } console.log('[GraderNode] === GRADING RESULT ==='); console.log('[GraderNode] Parsed result:', JSON.stringify(result, null, 2)); console.log('[GraderNode] Score value:', result.score); console.log('[GraderNode] Feedback value:', result.feedback?.substring(0, 200)); const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score'; const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback'; let enhancedFeedback: string = result.feedback; const newScores = { ...state.scores, [currentQuestion.id || currentQuestionIndex.toString()]: result.score, }; let shouldFollowUp = result.should_follow_up === true; const normalizedContent = userContentText.trim().toLowerCase(); const saysIDontKnow = normalizedContent.length < 10 && (normalizedContent.includes('不知道') || normalizedContent.includes('不会') || normalizedContent.includes("don't know") || normalizedContent.includes('no idea') || normalizedContent.includes('不知') || normalizedContent.includes('わかりません') || normalizedContent.includes('わからん') || normalizedContent.includes('知らない') || normalizedContent.includes('不明') || normalizedContent.includes('わからない')); if (currentFollowUpCount >= maxFollowUps || result.score >= 8 || saysIDontKnow) { shouldFollowUp = false; } let followupHintMsg: AIMessage | null = null; if (shouldFollowUp && result.follow_up_question && result.follow_up_question.trim()) { followupHintMsg = new AIMessage(result.follow_up_question.trim()); } else if (shouldFollowUp) { shouldFollowUp = false; } const feedbackMessage = new AIMessage( `${scoreLabel}: ${result.score}/10\n\n${feedbackLabel}: ${enhancedFeedback}`, ); console.log('[GraderNode] Final State decision:', { shouldFollowUp, nextIndex: shouldFollowUp ? currentQuestionIndex : currentQuestionIndex + 1, score: result.score, saysIDontKnow, }); const feedbackHistoryMessages = followupHintMsg ? [feedbackMessage, followupHintMsg] : [feedbackMessage]; return { feedbackHistory: feedbackHistoryMessages, scores: newScores, shouldFollowUp: shouldFollowUp, followUpCount: shouldFollowUp ? currentFollowUpCount + 1 : 0, currentQuestionIndex: shouldFollowUp ? currentQuestionIndex : currentQuestionIndex + 1, } as any; } catch (parseError) { console.error('[GraderNode] Failed to parse grade:', parseError); const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score'; const fallbackMsg = new AIMessage(`${scoreLabel}: 5/10\n\n评分解析失败,默认给5分。`); return { feedbackHistory: [fallbackMsg], scores: { [currentQuestion.id || currentQuestionIndex.toString()]: 5 }, shouldFollowUp: false, followUpCount: 0, currentQuestionIndex: currentQuestionIndex + 1, } as any; } } catch (error) { console.error('[GraderNode] LLM grading failed:', error); const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score'; const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback'; const fallbackMsg = new AIMessage(`${scoreLabel}: 5/10\n\n${feedbackLabel}: 评分服务暂时不可用,默认给5分。`); return { feedbackHistory: [fallbackMsg], scores: { [currentQuestion.id || currentQuestionIndex.toString()]: 5 }, shouldFollowUp: false, followUpCount: 0, currentQuestionIndex: currentQuestionIndex + 1, } as any; } };