aurak/server/src/assessment/graph/nodes/grader.node.ts

import { ChatOpenAI } from '@langchain/openai';
import {
  SystemMessage,
  HumanMessage,
  AIMessage,
} from '@langchain/core/messages';
import { RunnableConfig } from '@langchain/core/runnables';
import { EvaluationState } from '../state';
import { safeParseJson } from '../../../common/json-utils';

/**
 * Node responsible for grading the user's answer and deciding if a follow-up is needed.
 */
export const graderNode = async (
  state: EvaluationState,
  config?: RunnableConfig,
): Promise<Partial<EvaluationState>> => {
  const { model } = (config?.configurable as any) || {};
  const { questions, currentQuestionIndex, messages } = state;
  const currentFollowUpCount = state.followUpCount || 0;

  console.log('[GraderNode] Entering node...', {
    currentIndex: currentQuestionIndex,
    numMessages: messages?.length,
    questionCount: state.questionCount,
    hasQuestions: !!questions?.length,
  });

  if (!model) {
    throw new Error('Missing model in node configuration');
  }

  const lastUserMessage = messages[messages.length - 1];

  console.log('[GraderNode] Incoming Messages Count:', messages.length);
  if (lastUserMessage) {
    console.log(
      '[GraderNode] Last Message Type:',
      lastUserMessage.constructor.name,
    );
    // Safely extract content for logging
    const logContent =
      typeof lastUserMessage.content === 'string'
        ? lastUserMessage.content
        : JSON.stringify(lastUserMessage.content);
    console.log(
      '[GraderNode] Last Message Content:',
      logContent.substring(0, 50),
    );
  }

  if (!(lastUserMessage instanceof HumanMessage)) {
    console.log(
      '[GraderNode] Last message is not HumanMessage, skipping grading.',
    );
    return {};
  }

  const isZh = state.language === 'zh';
  const isJa = state.language === 'ja';

  const currentQuestion = questions[currentQuestionIndex];
  if (!currentQuestion) {
    console.error(
      `[GraderNode] Question at index ${currentQuestionIndex} not found!`,
    );
    return { currentQuestionIndex: currentQuestionIndex + 1 };
  }

  const isChoice = currentQuestion.questionType === 'MULTIPLE_CHOICE';
  const expectedAnswer = currentQuestion.correctAnswer;
  const answerKey = (state.questionAnswerKey as any)?.[currentQuestion.id];

  if (isChoice && expectedAnswer) {
    const userAnswer = (lastUserMessage.content as string).trim();
    const isCorrect = userAnswer.toUpperCase() === expectedAnswer?.toUpperCase();

    console.log('[GraderNode] Choice grading:', { userAnswer, expectedAnswer, isCorrect });

    const feedback = isCorrect ? '✅ 正确' : `❌ 错误，正确答案是 ${expectedAnswer}`;
    const feedbackMessage = new AIMessage(
      { content: `Score: ${isCorrect ? 10 : 0}\nFeedback: ${feedback}` } as any,
    );

    return {
      messages: [feedbackMessage],
      feedbackHistory: [feedbackMessage],
      scores: { [currentQuestion.id || currentQuestionIndex.toString()]: isCorrect ? 10 : 0 },
      shouldFollowUp: false,
      followUpCount: 0,
      currentQuestionIndex: currentQuestionIndex + 1,
    };
  }

  const systemPromptZh = `你是一位专业的考官。
请根据以下问题和关键点对用户的回答进行评分。

重要提示：
1. **你必须使用以下语言提供反馈：中文 (Simplified Chinese)**。
2. 即使用户的回答或知识库内容涉及其他语言，请确保你的反馈和解释依然严格使用中文。不要夹杂日文。

问题：${currentQuestion.questionText}
预期的关键点：${currentQuestion.keyPoints.join(', ')}

评估标准：
1. 准确性：他们是否正确覆盖了关键点？
2. 完整性：他们是否遗漏了任何重要内容？
3. 深度：解释是否充分？

**重要：评分请给部分分数。不完全正确不等于0分——回答方向对、意思接近但不够完整时请给5-7分。完全不沾边才给0-2分。**

请提供：
1. 0 到 10 的评分。
2. 建设性的反馈。
3. 如果回答不完整或不清晰，需要进一步解释，请将 'should_follow_up' 标志设为 true。

请以 JSON 格式返回响应：
{
  "score": 8,
  "feedback": "...",
  "should_follow_up": false
}`;

  const systemPromptJa = `あなたは専門的な試験官です。
以下の質問とキーポイントに基づいて、ユーザーの回答を採点してください。

重要事項：
1. **フィードバックは必ず次の言語で提供してください：日本語**。
2. ユーザーの回答やナレッジベースの内容に他の言語（中国語や英語など）が含まれている場合でも、フィードバックと説明は必ず日本語のみで行ってください。中国語が混ざらないよう厳格に注意してください。

質問：${currentQuestion.questionText}
期待されるキーポイント：${currentQuestion.keyPoints.join(', ')}

評価基準：
1. 正確性：キーポイントを正確に網羅していますか？
2. 網羅性：重要な内容が欠落していませんか？
3. 深さ：説明は十分ですか？

**重要：点数は部分点をつけてください。完全に正解でなくても0点ではありません——方向性が合っていて、部分的に正しい場合は5〜7点を与えてください。全く見当違いの場合のみ0〜2点としてください。**

以下を提供してください：
1. 0 から 10 までのスコア。
2. 建設的なフィードバック。
3. 回答が不完全または不明確で、さらなる説明が必要な場合は、'should_follow_up' フラグを true に設定してください。

JSON 形式で回答してください：
{
  "score": 8,
  "feedback": "...",
  "should_follow_up": false
}`;

  const systemPromptEn = `You are an expert examiner.
Grade the user's answer based on the following question and key points.

IMPORTANT:
1. **You MUST provide the feedback in English.**
2. If the user's answer or knowledge base content references other languages, ensure your feedback and explanation remain strictly in English.

QUESTION: ${currentQuestion.questionText}
EXPECTED KEY POINTS: ${currentQuestion.keyPoints.join(', ')}

Evaluate:
1. Accuracy: Did they cover the key points correctly?
2. Completeness: Did they miss anything important?
3. Depth: Is the explanation sufficient?

**Important: Give partial credit. Incomplete answers are not 0 — if the direction is right and partially correct, give 5-7. Only give 0-2 for completely off-target answers.**

Provide:
1. A score from 0 to 10.
2. Constructive feedback.
3. A boolean flag 'should_follow_up' if the answer is incomplete or unclear and needs further clarification.

Format your response as JSON:
{
  "score": 8,
  "feedback": "...",
  "should_follow_up": false
}`;

  let systemPrompt = isZh
    ? systemPromptZh
    : isJa
      ? systemPromptJa
      : systemPromptEn;

  if (currentQuestion.judgment) {
    const anchorText = isZh
      ? `\n\n【判定依据（通过标准）】${currentQuestion.judgment}`
      : isJa
        ? `\n\n【判定基準（合格基準）】${currentQuestion.judgment}`
        : `\n\n【Judgment Criteria (Pass Standard)】${currentQuestion.judgment}`;
    systemPrompt += anchorText;
  }

  const followupHints: string[] = answerKey?.followupHints || [];
  const maxFollowUps = followupHints.length > 0 ? followupHints.length : 2;

  const userContentText =
    typeof lastUserMessage.content === 'string'
      ? lastUserMessage.content
      : JSON.stringify(lastUserMessage.content);

  console.log('[GraderNode] === START GRADING ===');
  console.log('[GraderNode] User answer length:', userContentText.length);
  console.log('[GraderNode] Question:', currentQuestion?.questionText?.substring(0, 100));
  console.log('[GraderNode] Target dimension:', currentQuestion?.dimension);

  try {
  const response = await model.invoke([
    new SystemMessage(systemPrompt),
    new HumanMessage(userContentText),
  ]);

  console.log('[GraderNode] LLM invoke completed');
  try {
    const rawContent = response.content as string;
    console.log('[GraderNode] Raw AI response length:', rawContent.length);
    console.log('[GraderNode] Raw AI response:', rawContent.substring(0, 800));

    const result = safeParseJson<any>(rawContent);
    if (!result) {
      console.error('[GraderNode] Failed to parse JSON. Raw content:', rawContent);
      throw new Error('Invalid JSON format from AI');
    }
    console.log('[GraderNode] === GRADING RESULT ===');
    console.log('[GraderNode] Parsed result:', JSON.stringify(result, null, 2));
    console.log('[GraderNode] Score value:', result.score);
    console.log('[GraderNode] Feedback value:', result.feedback?.substring(0, 200));

    const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score';
    const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback';
    let enhancedFeedback: string = result.feedback;

    const newScores = {
      ...state.scores,
      [currentQuestion.id || currentQuestionIndex.toString()]: result.score,
    };

    let shouldFollowUp = result.should_follow_up === true;

    const normalizedContent = userContentText.trim().toLowerCase();
    const saysIDontKnow =
      normalizedContent.length < 10 &&
      (normalizedContent.includes('不知道') ||
        normalizedContent.includes('不会') ||
        normalizedContent.includes("don't know") ||
        normalizedContent.includes('no idea') ||
        normalizedContent.includes('不知') ||
        normalizedContent.includes('わかりません') ||
        normalizedContent.includes('わからん') ||
        normalizedContent.includes('知らない') ||
        normalizedContent.includes('不明') ||
        normalizedContent.includes('わからない'));

    if (currentFollowUpCount >= maxFollowUps || result.score >= 8 || saysIDontKnow) {
      shouldFollowUp = false;
    }

    let followupHintMsg: AIMessage | null = null;
    if (shouldFollowUp && followupHints.length > 0) {
      let hint = followupHints[Math.min(currentFollowUpCount, followupHints.length - 1)];
      hint = hint.replace(/^如果.+?追问[：:]\s*/i, '').replace(/^[""「『]|[""」』]$/g, '');
      followupHintMsg = new AIMessage(hint);
    }

    const feedbackMessage = new AIMessage(
      `${scoreLabel}: ${result.score}/10\n\n${feedbackLabel}: ${enhancedFeedback}`,
    );

    console.log('[GraderNode] Final State decision:', {
      shouldFollowUp,
      nextIndex: shouldFollowUp
        ? currentQuestionIndex
        : currentQuestionIndex + 1,
      score: result.score,
      saysIDontKnow,
    });

    const feedbackHistoryMessages = followupHintMsg
      ? [feedbackMessage, followupHintMsg]
      : [feedbackMessage];

    return {
      feedbackHistory: feedbackHistoryMessages,
      scores: newScores,
      shouldFollowUp: shouldFollowUp,
      followUpCount: shouldFollowUp ? currentFollowUpCount + 1 : 0,
      currentQuestionIndex: shouldFollowUp
        ? currentQuestionIndex
        : currentQuestionIndex + 1,
    } as any;
  } catch (parseError) {
    console.error('[GraderNode] Failed to parse grade:', parseError);
    const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score';
    const fallbackMsg = new AIMessage(`${scoreLabel}: 5/10\n\n评分解析失败，默认给5分。`);
    return {
      feedbackHistory: [fallbackMsg],
      scores: { [currentQuestion.id || currentQuestionIndex.toString()]: 5 },
      shouldFollowUp: false,
      followUpCount: 0,
      currentQuestionIndex: currentQuestionIndex + 1,
    } as any;
  }
  } catch (error) {
    console.error('[GraderNode] LLM grading failed:', error);
    const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score';
    const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback';
    const fallbackMsg = new AIMessage(`${scoreLabel}: 5/10\n\n${feedbackLabel}: 评分服务暂时不可用，默认给5分。`);
    return {
      feedbackHistory: [fallbackMsg],
      scores: { [currentQuestion.id || currentQuestionIndex.toString()]: 5 },
      shouldFollowUp: false,
      followUpCount: 0,
      currentQuestionIndex: currentQuestionIndex + 1,
    } as any;
  }
};