From 02f4ab23f716c752221aa9278df815d38fe1eadb Mon Sep 17 00:00:00 2001 From: Developer Date: Thu, 21 May 2026 14:18:14 +0800 Subject: [PATCH] feat: LLM-generated adaptive follow-up questions - Grader: LLM outputs follow_up_question targeting uncovered keyPoints - Remove static followupHints usage in grading flow - maxFollowUps sourced from question.maxFollowUps (hints.length) - Clean answerKey: remove followupHints field - Three-language prompt update with examples and bad examples - Grader spec: add follow_up_question to mock responses --- server/src/assessment/assessment.service.ts | 6 +- .../graph/nodes/grader.node.spec.ts | 6 +- .../src/assessment/graph/nodes/grader.node.ts | 65 ++++++++++++------- 3 files changed, 48 insertions(+), 29 deletions(-) diff --git a/server/src/assessment/assessment.service.ts b/server/src/assessment/assessment.service.ts index eb4f9f2..6de006b 100644 --- a/server/src/assessment/assessment.service.ts +++ b/server/src/assessment/assessment.service.ts @@ -532,16 +532,16 @@ private async getModel(tenantId: string): Promise { difficulty: item.difficulty, dimension: item.dimension, basis: item.basis, + maxFollowUps: item.followupHints?.length || 0, }; }); - const answerKey: Record = {}; + const answerKey: Record = {}; selectedItems.forEach(item => { - if (item.correctAnswer || item.judgment || item.followupHints) { + if (item.correctAnswer || item.judgment) { answerKey[item.id] = { correctAnswer: item.correctAnswer, judgment: item.judgment, - followupHints: item.followupHints, }; } }); diff --git a/server/src/assessment/graph/nodes/grader.node.spec.ts b/server/src/assessment/graph/nodes/grader.node.spec.ts index 4fcf426..45b900d 100644 --- a/server/src/assessment/graph/nodes/grader.node.spec.ts +++ b/server/src/assessment/graph/nodes/grader.node.spec.ts @@ -45,7 +45,7 @@ describe('graderNode', () => { describe('breakout logic (shouldFollowUp overrides)', () => { it('should NOT follow up when followUpCount >= 2 even if LLM says follow up', async () => { - const model = mockModel({ score: 5, feedback: 'needs work', should_follow_up: true }); + const model = mockModel({ score: 5, feedback: 'needs work', should_follow_up: true, follow_up_question: 'More?' }); const state = baseState({ followUpCount: 2 }); const result = await graderNode(state, { configurable: { model } } as any); expect(result.shouldFollowUp).toBe(false); @@ -66,7 +66,7 @@ describe('graderNode', () => { }); it('should allow follow up when conditions are met', async () => { - const model = mockModel({ score: 5, feedback: 'incomplete', should_follow_up: true }); + const model = mockModel({ score: 5, feedback: 'incomplete', should_follow_up: true, follow_up_question: 'Can you elaborate?' }); const state = baseState({ followUpCount: 0 }); const result = await graderNode(state, { configurable: { model } } as any); expect(result.shouldFollowUp).toBe(true); @@ -92,7 +92,7 @@ describe('graderNode', () => { }); it('should keep currentQuestionIndex when following up', async () => { - const model = mockModel({ score: 5, feedback: 'needs work', should_follow_up: true }); + const model = mockModel({ score: 5, feedback: 'needs work', should_follow_up: true, follow_up_question: 'Can you clarify?' }); const state = baseState({ followUpCount: 0 }); const result = await graderNode(state, { configurable: { model } } as any); expect(result.currentQuestionIndex).toBe(0); diff --git a/server/src/assessment/graph/nodes/grader.node.ts b/server/src/assessment/graph/nodes/grader.node.ts index 8cedaae..1fde32b 100644 --- a/server/src/assessment/graph/nodes/grader.node.ts +++ b/server/src/assessment/graph/nodes/grader.node.ts @@ -69,7 +69,6 @@ export const graderNode = async ( const isChoice = currentQuestion.questionType === 'MULTIPLE_CHOICE'; const expectedAnswer = currentQuestion.correctAnswer; - const answerKey = (state.questionAnswerKey as any)?.[currentQuestion.id]; if (isChoice && expectedAnswer) { const userAnswer = (lastUserMessage.content as string).trim(); @@ -113,13 +112,20 @@ export const graderNode = async ( 1. 0 到 10 的评分。 2. 建设性的反馈。 3. 如果回答不完整或不清晰,需要进一步解释,请将 'should_follow_up' 标志设为 true。 +4. follow_up_question:当 should_follow_up 为 true 时必须填写——针对用户尚未覆盖的关键点提问,不得提问已涵盖的内容。false 时填 null。 请以 JSON 格式返回响应: -{ - "score": 8, - "feedback": "...", - "should_follow_up": false -}`; +{"score":0到10,"feedback":"评语","should_follow_up":true或false,"follow_up_question":"追问或null"} + +示例(需要追问): +{"score":6,"feedback":"提到了安全性和性能,未说明依赖关系。","should_follow_up":true,"follow_up_question":"你如何让AI在计划中明确任务依赖关系?"} + +示例(不需追问): +{"score":8,"feedback":"回答完整。","should_follow_up":false,"follow_up_question":null} + +反面示例(禁止这样做): +{"should_follow_up":true,"follow_up_question":"除了这些还有什么?"} +↑ 用户已列出安全性、性能具体内容,不应再泛泛追问"还有什么"。`; const systemPromptJa = `あなたは専門的な試験官です。 以下の質問とキーポイントに基づいて、ユーザーの回答を採点してください。 @@ -142,13 +148,20 @@ export const graderNode = async ( 1. 0 から 10 までのスコア。 2. 建設的なフィードバック。 3. 回答が不完全または不明確で、さらなる説明が必要な場合は、'should_follow_up' フラグを true に設定してください。 +4. follow_up_question:should_follow_up が true の場合必須——ユーザーがまだカバーしていないキーポイントに焦点を当て、既に回答済みの内容は質問しないこと。false の場合は null。 JSON 形式で回答してください: -{ - "score": 8, - "feedback": "...", - "should_follow_up": false -}`; +{"score":0から10,"feedback":"評価","should_follow_up":trueかfalse,"follow_up_question":"追質問かnull"} + +例(追質問が必要): +{"score":6,"feedback":"安全性と性能に言及したが、依存関係が不明。","should_follow_up":true,"follow_up_question":"AIに計画内のタスク依存関係を明示させる方法は?"} + +例(不要): +{"score":8,"feedback":"回答は完全。","should_follow_up":false,"follow_up_question":null} + +悪い例: +{"should_follow_up":true,"follow_up_question":"他に何かありますか?"} +↑ ユーザーが既に具体的内容を挙げているのに「他に何か」と聞くのは不適切。`; const systemPromptEn = `You are an expert examiner. Grade the user's answer based on the following question and key points. @@ -171,13 +184,20 @@ Provide: 1. A score from 0 to 10. 2. Constructive feedback. 3. A boolean flag 'should_follow_up' if the answer is incomplete or unclear and needs further clarification. +4. follow_up_question: Required when should_follow_up is true—target key points the user hasn't covered, do not ask about already-answered content. Set to null when false. -Format your response as JSON: -{ - "score": 8, - "feedback": "...", - "should_follow_up": false -}`; +Format as JSON: +{"score":0-10,"feedback":"...","should_follow_up":true|false,"follow_up_question":"question or null"} + +Example (follow-up needed): +{"score":6,"feedback":"Covered security and performance, missed dependencies.","should_follow_up":true,"follow_up_question":"How would you make the AI clarify task dependencies?"} + +Example (no follow-up): +{"score":8,"feedback":"Complete answer.","should_follow_up":false,"follow_up_question":null} + +Bad example: +{"should_follow_up":true,"follow_up_question":"Anything else?"} +↑ User already provided details, vague "anything else" is unacceptable.`; let systemPrompt = isZh ? systemPromptZh @@ -194,8 +214,7 @@ Format your response as JSON: systemPrompt += anchorText; } - const followupHints: string[] = answerKey?.followupHints || []; - const maxFollowUps = followupHints.length > 0 ? followupHints.length : 2; + const maxFollowUps = (currentQuestion as any).maxFollowUps ?? 2; const userContentText = typeof lastUserMessage.content === 'string' @@ -259,10 +278,10 @@ Format your response as JSON: } let followupHintMsg: AIMessage | null = null; - if (shouldFollowUp && followupHints.length > 0) { - let hint = followupHints[Math.min(currentFollowUpCount, followupHints.length - 1)]; - hint = hint.replace(/^如果.+?追问[::]\s*/i, '').replace(/^[""「『]|[""」』]$/g, ''); - followupHintMsg = new AIMessage(hint); + if (shouldFollowUp && result.follow_up_question && result.follow_up_question.trim()) { + followupHintMsg = new AIMessage(result.follow_up_question.trim()); + } else if (shouldFollowUp) { + shouldFollowUp = false; } const feedbackMessage = new AIMessage(