From 35b1c6c37d42e0736f2df872c01969b2e07717f3 Mon Sep 17 00:00:00 2001 From: Developer Date: Thu, 21 May 2026 10:18:15 +0800 Subject: [PATCH] feat: judgment-anchored grading and per-question results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Grader: inject judgment as pass criteria anchor in LLM prompt - Grader: use followupHints for follow-up direction (not generic text) - Grader: follow-up limit from followupHints.length instead of hardcoded 2 - Session: correctAnswer/judgment stored in questions, stripped during assessment - Frontend: per-question results panel with choice ✅/❌ + judgment display --- server/src/assessment/assessment.service.ts | 30 ++++++--- .../src/assessment/graph/nodes/grader.node.ts | 39 ++++++++---- web/components/views/AssessmentView.tsx | 62 ++++++++++++++++++- 3 files changed, 111 insertions(+), 20 deletions(-) diff --git a/server/src/assessment/assessment.service.ts b/server/src/assessment/assessment.service.ts index 0a2fa09..e4debbc 100644 --- a/server/src/assessment/assessment.service.ts +++ b/server/src/assessment/assessment.service.ts @@ -501,6 +501,8 @@ private async getModel(tenantId: string): Promise { questionText: item.questionText, questionType: item.questionType, options: item.options, + correctAnswer: item.correctAnswer, + judgment: item.judgment, keyPoints: item.keyPoints, difficulty: item.difficulty, dimension: item.dimension, @@ -768,7 +770,10 @@ const initialState: Partial = { } await this.sessionRepository.save(session); - const mappedData: any = this.sanitizeStateForClient({ ...finalData }); + const mappedData: any = this.sanitizeStateForClient( + { ...finalData }, + session.status !== AssessmentStatus.COMPLETED, + ); mappedData.messages = this.mapMessages(finalData.messages); mappedData.feedbackHistory = this.mapMessages( finalData.feedbackHistory || [], @@ -1139,7 +1144,10 @@ const initialState: Partial = { } await this.sessionRepository.save(session); - const mappedData: any = this.sanitizeStateForClient({ ...finalData }); + const mappedData: any = this.sanitizeStateForClient( + { ...finalData }, + session.status !== AssessmentStatus.COMPLETED, + ); mappedData.messages = this.mapMessages(finalData.messages); mappedData.feedbackHistory = this.mapMessages( finalData.feedbackHistory || [], @@ -1185,7 +1193,10 @@ const initialState: Partial = { values.feedbackHistory = this.mapMessages(values.feedbackHistory); } - return this.sanitizeStateForClient(values); + return this.sanitizeStateForClient( + values, + session.status !== AssessmentStatus.COMPLETED, + ); } /** @@ -1394,14 +1405,19 @@ const initialState: Partial = { /** * Strips sensitive fields before sending state to frontend. */ - private sanitizeStateForClient(data: any): any { + private sanitizeStateForClient(data: any, stripAnswers = true): any { if (!data) return data; const sanitized = { ...data }; - delete sanitized.questionAnswerKey; + if (stripAnswers) { + delete sanitized.questionAnswerKey; + } if (Array.isArray(sanitized.questions)) { sanitized.questions = sanitized.questions.map((q: any) => { - const { correctAnswer, judgment, followupHints, ...rest } = q; - return rest; + if (stripAnswers) { + const { correctAnswer, judgment, followupHints, ...rest } = q; + return rest; + } + return q; }); } return sanitized; diff --git a/server/src/assessment/graph/nodes/grader.node.ts b/server/src/assessment/graph/nodes/grader.node.ts index 3ffb521..92b4e68 100644 --- a/server/src/assessment/graph/nodes/grader.node.ts +++ b/server/src/assessment/graph/nodes/grader.node.ts @@ -68,10 +68,10 @@ export const graderNode = async ( } const isChoice = currentQuestion.questionType === 'MULTIPLE_CHOICE'; + const expectedAnswer = currentQuestion.correctAnswer; const answerKey = (state.questionAnswerKey as any)?.[currentQuestion.id]; - if (isChoice || answerKey?.correctAnswer) { - const expectedAnswer = answerKey?.correctAnswer || currentQuestion.correctAnswer; + if (isChoice && expectedAnswer) { const userAnswer = (lastUserMessage.content as string).trim(); const isCorrect = userAnswer.toUpperCase() === expectedAnswer?.toUpperCase(); @@ -173,12 +173,24 @@ Format your response as JSON: "should_follow_up": false }`; - const systemPrompt = isZh + let systemPrompt = isZh ? systemPromptZh : isJa ? systemPromptJa : systemPromptEn; + if (currentQuestion.judgment) { + const anchorText = isZh + ? `\n\n【判定依据(通过标准)】${currentQuestion.judgment}` + : isJa + ? `\n\n【判定基準(合格基準)】${currentQuestion.judgment}` + : `\n\n【Judgment Criteria (Pass Standard)】${currentQuestion.judgment}`; + systemPrompt += anchorText; + } + + const followupHints: string[] = answerKey?.followupHints || []; + const maxFollowUps = followupHints.length > 0 ? followupHints.length : 2; + const userContentText = typeof lastUserMessage.content === 'string' ? lastUserMessage.content @@ -212,10 +224,7 @@ Format your response as JSON: const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score'; const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback'; - - const feedbackMessage = new AIMessage( - `${scoreLabel}: ${result.score}/10\n\n${feedbackLabel}: ${result.feedback}`, - ); + let enhancedFeedback: string = result.feedback; const newScores = { ...state.scores, @@ -224,10 +233,6 @@ Format your response as JSON: let shouldFollowUp = result.should_follow_up === true; - // Breakout logic: - // 1. Max 1 follow-up per question - // 2. If score is decent (>= 8), don't follow up - // 3. If answer is short "don't know", don't follow up const normalizedContent = userContentText.trim().toLowerCase(); const saysIDontKnow = normalizedContent.length < 10 && @@ -242,10 +247,20 @@ Format your response as JSON: normalizedContent.includes('不明') || normalizedContent.includes('わからない')); - if (currentFollowUpCount >= 2 || result.score >= 8 || saysIDontKnow) { + if (currentFollowUpCount >= maxFollowUps || result.score >= 8 || saysIDontKnow) { shouldFollowUp = false; } + if (shouldFollowUp && followupHints.length > 0) { + const hint = followupHints[Math.min(currentFollowUpCount, followupHints.length - 1)]; + const hintLabel = isZh ? '追问方向' : isJa ? '追加の方向性' : 'Follow-up hint'; + enhancedFeedback = `${result.feedback}\n\n${hintLabel}: ${hint}`; + } + + const feedbackMessage = new AIMessage( + `${scoreLabel}: ${result.score}/10\n\n${feedbackLabel}: ${enhancedFeedback}`, + ); + console.log('[GraderNode] Final State decision:', { shouldFollowUp, nextIndex: shouldFollowUp diff --git a/web/components/views/AssessmentView.tsx b/web/components/views/AssessmentView.tsx index 401e531..16558bb 100644 --- a/web/components/views/AssessmentView.tsx +++ b/web/components/views/AssessmentView.tsx @@ -13,7 +13,8 @@ import { Star, Award, Trophy, - Trash2 + Trash2, + XCircle } from 'lucide-react'; import { motion, AnimatePresence } from 'framer-motion'; import { useLanguage } from '../../contexts/LanguageContext'; @@ -823,6 +824,65 @@ export const AssessmentView: React.FC = ({
+ {state?.questions && state.questions.length > 0 && ( +
+

+ + 每题详情 +

+
+ {state.questions.map((q: any, i: number) => { + const score = state.scores?.[q.id || (i + 1).toString()]; + const isChoice = q.questionType === 'MULTIPLE_CHOICE'; + const isCorrect = isChoice && q.correctAnswer && score >= 10; + return ( +
+
+
+ {isChoice + ? (isCorrect ? : ) + : {score !== undefined ? score : '?'} + } +
+
+

{q.questionText}

+ {isChoice && ( +
+ {q.options?.map((opt: string, oi: number) => { + const letter = String.fromCharCode(65 + oi); + const isAnswer = letter === q.correctAnswer; + return ( + + {letter}. {opt} + + ); + })} +
+ )} + {q.judgment && ( +
+

{q.judgment}

+
+ )} + {!isChoice && score !== undefined && ( + 得分: {score}/10 + )} +
+
+
+ ); + })} +
+
+ )}