feat: judgment-anchored grading and per-question results
- Grader: inject judgment as pass criteria anchor in LLM prompt - Grader: use followupHints for follow-up direction (not generic text) - Grader: follow-up limit from followupHints.length instead of hardcoded 2 - Session: correctAnswer/judgment stored in questions, stripped during assessment - Frontend: per-question results panel with choice ✅/❌ + judgment display
This commit is contained in:
@@ -501,6 +501,8 @@ private async getModel(tenantId: string): Promise<ChatOpenAI> {
|
|||||||
questionText: item.questionText,
|
questionText: item.questionText,
|
||||||
questionType: item.questionType,
|
questionType: item.questionType,
|
||||||
options: item.options,
|
options: item.options,
|
||||||
|
correctAnswer: item.correctAnswer,
|
||||||
|
judgment: item.judgment,
|
||||||
keyPoints: item.keyPoints,
|
keyPoints: item.keyPoints,
|
||||||
difficulty: item.difficulty,
|
difficulty: item.difficulty,
|
||||||
dimension: item.dimension,
|
dimension: item.dimension,
|
||||||
@@ -768,7 +770,10 @@ const initialState: Partial<EvaluationState> = {
|
|||||||
}
|
}
|
||||||
await this.sessionRepository.save(session);
|
await this.sessionRepository.save(session);
|
||||||
|
|
||||||
const mappedData: any = this.sanitizeStateForClient({ ...finalData });
|
const mappedData: any = this.sanitizeStateForClient(
|
||||||
|
{ ...finalData },
|
||||||
|
session.status !== AssessmentStatus.COMPLETED,
|
||||||
|
);
|
||||||
mappedData.messages = this.mapMessages(finalData.messages);
|
mappedData.messages = this.mapMessages(finalData.messages);
|
||||||
mappedData.feedbackHistory = this.mapMessages(
|
mappedData.feedbackHistory = this.mapMessages(
|
||||||
finalData.feedbackHistory || [],
|
finalData.feedbackHistory || [],
|
||||||
@@ -1139,7 +1144,10 @@ const initialState: Partial<EvaluationState> = {
|
|||||||
}
|
}
|
||||||
await this.sessionRepository.save(session);
|
await this.sessionRepository.save(session);
|
||||||
|
|
||||||
const mappedData: any = this.sanitizeStateForClient({ ...finalData });
|
const mappedData: any = this.sanitizeStateForClient(
|
||||||
|
{ ...finalData },
|
||||||
|
session.status !== AssessmentStatus.COMPLETED,
|
||||||
|
);
|
||||||
mappedData.messages = this.mapMessages(finalData.messages);
|
mappedData.messages = this.mapMessages(finalData.messages);
|
||||||
mappedData.feedbackHistory = this.mapMessages(
|
mappedData.feedbackHistory = this.mapMessages(
|
||||||
finalData.feedbackHistory || [],
|
finalData.feedbackHistory || [],
|
||||||
@@ -1185,7 +1193,10 @@ const initialState: Partial<EvaluationState> = {
|
|||||||
values.feedbackHistory = this.mapMessages(values.feedbackHistory);
|
values.feedbackHistory = this.mapMessages(values.feedbackHistory);
|
||||||
}
|
}
|
||||||
|
|
||||||
return this.sanitizeStateForClient(values);
|
return this.sanitizeStateForClient(
|
||||||
|
values,
|
||||||
|
session.status !== AssessmentStatus.COMPLETED,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1394,14 +1405,19 @@ const initialState: Partial<EvaluationState> = {
|
|||||||
/**
|
/**
|
||||||
* Strips sensitive fields before sending state to frontend.
|
* Strips sensitive fields before sending state to frontend.
|
||||||
*/
|
*/
|
||||||
private sanitizeStateForClient(data: any): any {
|
private sanitizeStateForClient(data: any, stripAnswers = true): any {
|
||||||
if (!data) return data;
|
if (!data) return data;
|
||||||
const sanitized = { ...data };
|
const sanitized = { ...data };
|
||||||
delete sanitized.questionAnswerKey;
|
if (stripAnswers) {
|
||||||
|
delete sanitized.questionAnswerKey;
|
||||||
|
}
|
||||||
if (Array.isArray(sanitized.questions)) {
|
if (Array.isArray(sanitized.questions)) {
|
||||||
sanitized.questions = sanitized.questions.map((q: any) => {
|
sanitized.questions = sanitized.questions.map((q: any) => {
|
||||||
const { correctAnswer, judgment, followupHints, ...rest } = q;
|
if (stripAnswers) {
|
||||||
return rest;
|
const { correctAnswer, judgment, followupHints, ...rest } = q;
|
||||||
|
return rest;
|
||||||
|
}
|
||||||
|
return q;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return sanitized;
|
return sanitized;
|
||||||
|
|||||||
@@ -68,10 +68,10 @@ export const graderNode = async (
|
|||||||
}
|
}
|
||||||
|
|
||||||
const isChoice = currentQuestion.questionType === 'MULTIPLE_CHOICE';
|
const isChoice = currentQuestion.questionType === 'MULTIPLE_CHOICE';
|
||||||
|
const expectedAnswer = currentQuestion.correctAnswer;
|
||||||
const answerKey = (state.questionAnswerKey as any)?.[currentQuestion.id];
|
const answerKey = (state.questionAnswerKey as any)?.[currentQuestion.id];
|
||||||
|
|
||||||
if (isChoice || answerKey?.correctAnswer) {
|
if (isChoice && expectedAnswer) {
|
||||||
const expectedAnswer = answerKey?.correctAnswer || currentQuestion.correctAnswer;
|
|
||||||
const userAnswer = (lastUserMessage.content as string).trim();
|
const userAnswer = (lastUserMessage.content as string).trim();
|
||||||
const isCorrect = userAnswer.toUpperCase() === expectedAnswer?.toUpperCase();
|
const isCorrect = userAnswer.toUpperCase() === expectedAnswer?.toUpperCase();
|
||||||
|
|
||||||
@@ -173,12 +173,24 @@ Format your response as JSON:
|
|||||||
"should_follow_up": false
|
"should_follow_up": false
|
||||||
}`;
|
}`;
|
||||||
|
|
||||||
const systemPrompt = isZh
|
let systemPrompt = isZh
|
||||||
? systemPromptZh
|
? systemPromptZh
|
||||||
: isJa
|
: isJa
|
||||||
? systemPromptJa
|
? systemPromptJa
|
||||||
: systemPromptEn;
|
: systemPromptEn;
|
||||||
|
|
||||||
|
if (currentQuestion.judgment) {
|
||||||
|
const anchorText = isZh
|
||||||
|
? `\n\n【判定依据(通过标准)】${currentQuestion.judgment}`
|
||||||
|
: isJa
|
||||||
|
? `\n\n【判定基準(合格基準)】${currentQuestion.judgment}`
|
||||||
|
: `\n\n【Judgment Criteria (Pass Standard)】${currentQuestion.judgment}`;
|
||||||
|
systemPrompt += anchorText;
|
||||||
|
}
|
||||||
|
|
||||||
|
const followupHints: string[] = answerKey?.followupHints || [];
|
||||||
|
const maxFollowUps = followupHints.length > 0 ? followupHints.length : 2;
|
||||||
|
|
||||||
const userContentText =
|
const userContentText =
|
||||||
typeof lastUserMessage.content === 'string'
|
typeof lastUserMessage.content === 'string'
|
||||||
? lastUserMessage.content
|
? lastUserMessage.content
|
||||||
@@ -212,10 +224,7 @@ Format your response as JSON:
|
|||||||
|
|
||||||
const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score';
|
const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score';
|
||||||
const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback';
|
const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback';
|
||||||
|
let enhancedFeedback: string = result.feedback;
|
||||||
const feedbackMessage = new AIMessage(
|
|
||||||
`${scoreLabel}: ${result.score}/10\n\n${feedbackLabel}: ${result.feedback}`,
|
|
||||||
);
|
|
||||||
|
|
||||||
const newScores = {
|
const newScores = {
|
||||||
...state.scores,
|
...state.scores,
|
||||||
@@ -224,10 +233,6 @@ Format your response as JSON:
|
|||||||
|
|
||||||
let shouldFollowUp = result.should_follow_up === true;
|
let shouldFollowUp = result.should_follow_up === true;
|
||||||
|
|
||||||
// Breakout logic:
|
|
||||||
// 1. Max 1 follow-up per question
|
|
||||||
// 2. If score is decent (>= 8), don't follow up
|
|
||||||
// 3. If answer is short "don't know", don't follow up
|
|
||||||
const normalizedContent = userContentText.trim().toLowerCase();
|
const normalizedContent = userContentText.trim().toLowerCase();
|
||||||
const saysIDontKnow =
|
const saysIDontKnow =
|
||||||
normalizedContent.length < 10 &&
|
normalizedContent.length < 10 &&
|
||||||
@@ -242,10 +247,20 @@ Format your response as JSON:
|
|||||||
normalizedContent.includes('不明') ||
|
normalizedContent.includes('不明') ||
|
||||||
normalizedContent.includes('わからない'));
|
normalizedContent.includes('わからない'));
|
||||||
|
|
||||||
if (currentFollowUpCount >= 2 || result.score >= 8 || saysIDontKnow) {
|
if (currentFollowUpCount >= maxFollowUps || result.score >= 8 || saysIDontKnow) {
|
||||||
shouldFollowUp = false;
|
shouldFollowUp = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (shouldFollowUp && followupHints.length > 0) {
|
||||||
|
const hint = followupHints[Math.min(currentFollowUpCount, followupHints.length - 1)];
|
||||||
|
const hintLabel = isZh ? '追问方向' : isJa ? '追加の方向性' : 'Follow-up hint';
|
||||||
|
enhancedFeedback = `${result.feedback}\n\n${hintLabel}: ${hint}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const feedbackMessage = new AIMessage(
|
||||||
|
`${scoreLabel}: ${result.score}/10\n\n${feedbackLabel}: ${enhancedFeedback}`,
|
||||||
|
);
|
||||||
|
|
||||||
console.log('[GraderNode] Final State decision:', {
|
console.log('[GraderNode] Final State decision:', {
|
||||||
shouldFollowUp,
|
shouldFollowUp,
|
||||||
nextIndex: shouldFollowUp
|
nextIndex: shouldFollowUp
|
||||||
|
|||||||
@@ -13,7 +13,8 @@ import {
|
|||||||
Star,
|
Star,
|
||||||
Award,
|
Award,
|
||||||
Trophy,
|
Trophy,
|
||||||
Trash2
|
Trash2,
|
||||||
|
XCircle
|
||||||
} from 'lucide-react';
|
} from 'lucide-react';
|
||||||
import { motion, AnimatePresence } from 'framer-motion';
|
import { motion, AnimatePresence } from 'framer-motion';
|
||||||
import { useLanguage } from '../../contexts/LanguageContext';
|
import { useLanguage } from '../../contexts/LanguageContext';
|
||||||
@@ -823,6 +824,65 @@ export const AssessmentView: React.FC<AssessmentViewProps> = ({
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="space-y-8">
|
<div className="space-y-8">
|
||||||
|
{state?.questions && state.questions.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<h4 className="flex items-center gap-2.5 text-lg font-black text-slate-900 mb-4">
|
||||||
|
<CheckCircle size={20} className="text-indigo-600" />
|
||||||
|
每题详情
|
||||||
|
</h4>
|
||||||
|
<div className="space-y-4">
|
||||||
|
{state.questions.map((q: any, i: number) => {
|
||||||
|
const score = state.scores?.[q.id || (i + 1).toString()];
|
||||||
|
const isChoice = q.questionType === 'MULTIPLE_CHOICE';
|
||||||
|
const isCorrect = isChoice && q.correctAnswer && score >= 10;
|
||||||
|
return (
|
||||||
|
<div key={q.id || i} className="bg-white border border-slate-200 rounded-2xl p-5">
|
||||||
|
<div className="flex items-start gap-3">
|
||||||
|
<div className={cn(
|
||||||
|
"w-10 h-10 rounded-xl flex items-center justify-center shrink-0",
|
||||||
|
isChoice
|
||||||
|
? (isCorrect ? "bg-emerald-100 text-emerald-600" : "bg-red-100 text-red-600")
|
||||||
|
: score !== undefined ? "bg-indigo-100 text-indigo-600" : "bg-slate-100 text-slate-400"
|
||||||
|
)}>
|
||||||
|
{isChoice
|
||||||
|
? (isCorrect ? <CheckCircle size={20} /> : <XCircle size={20} />)
|
||||||
|
: <span className="text-sm font-black">{score !== undefined ? score : '?'}</span>
|
||||||
|
}
|
||||||
|
</div>
|
||||||
|
<div className="flex-1 min-w-0">
|
||||||
|
<p className="font-bold text-slate-800 text-sm leading-relaxed">{q.questionText}</p>
|
||||||
|
{isChoice && (
|
||||||
|
<div className="mt-2 flex flex-wrap gap-2 text-xs">
|
||||||
|
{q.options?.map((opt: string, oi: number) => {
|
||||||
|
const letter = String.fromCharCode(65 + oi);
|
||||||
|
const isAnswer = letter === q.correctAnswer;
|
||||||
|
return (
|
||||||
|
<span key={oi} className={cn(
|
||||||
|
"px-3 py-1 rounded-lg font-medium",
|
||||||
|
isAnswer ? "bg-emerald-100 text-emerald-700 border border-emerald-200" : "bg-slate-50 text-slate-500"
|
||||||
|
)}>
|
||||||
|
{letter}. {opt}
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{q.judgment && (
|
||||||
|
<div className="mt-3 bg-blue-50/50 border border-blue-100 rounded-xl p-3">
|
||||||
|
<p className="text-xs text-slate-600 leading-relaxed">{q.judgment}</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{!isChoice && score !== undefined && (
|
||||||
|
<span className="inline-block mt-2 text-xs text-slate-400">得分: {score}/10</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
<div>
|
<div>
|
||||||
<h4 className="flex items-center gap-2.5 text-lg font-black text-slate-900 mb-4">
|
<h4 className="flex items-center gap-2.5 text-lg font-black text-slate-900 mb-4">
|
||||||
<FileText size={20} className="text-indigo-600" />
|
<FileText size={20} className="text-indigo-600" />
|
||||||
|
|||||||
Reference in New Issue
Block a user