feat: judgment-anchored grading and per-question results

- Grader: inject judgment as pass criteria anchor in LLM prompt
- Grader: use followupHints for follow-up direction (not generic text)
- Grader: follow-up limit from followupHints.length instead of hardcoded 2
- Session: correctAnswer/judgment stored in questions, stripped during assessment
- Frontend: per-question results panel with choice / + judgment display
This commit is contained in:
Developer
2026-05-21 10:18:15 +08:00
parent 3993099907
commit 35b1c6c37d
3 changed files with 111 additions and 20 deletions
+23 -7
View File
@@ -501,6 +501,8 @@ private async getModel(tenantId: string): Promise<ChatOpenAI> {
questionText: item.questionText,
questionType: item.questionType,
options: item.options,
correctAnswer: item.correctAnswer,
judgment: item.judgment,
keyPoints: item.keyPoints,
difficulty: item.difficulty,
dimension: item.dimension,
@@ -768,7 +770,10 @@ const initialState: Partial<EvaluationState> = {
}
await this.sessionRepository.save(session);
const mappedData: any = this.sanitizeStateForClient({ ...finalData });
const mappedData: any = this.sanitizeStateForClient(
{ ...finalData },
session.status !== AssessmentStatus.COMPLETED,
);
mappedData.messages = this.mapMessages(finalData.messages);
mappedData.feedbackHistory = this.mapMessages(
finalData.feedbackHistory || [],
@@ -1139,7 +1144,10 @@ const initialState: Partial<EvaluationState> = {
}
await this.sessionRepository.save(session);
const mappedData: any = this.sanitizeStateForClient({ ...finalData });
const mappedData: any = this.sanitizeStateForClient(
{ ...finalData },
session.status !== AssessmentStatus.COMPLETED,
);
mappedData.messages = this.mapMessages(finalData.messages);
mappedData.feedbackHistory = this.mapMessages(
finalData.feedbackHistory || [],
@@ -1185,7 +1193,10 @@ const initialState: Partial<EvaluationState> = {
values.feedbackHistory = this.mapMessages(values.feedbackHistory);
}
return this.sanitizeStateForClient(values);
return this.sanitizeStateForClient(
values,
session.status !== AssessmentStatus.COMPLETED,
);
}
/**
@@ -1394,14 +1405,19 @@ const initialState: Partial<EvaluationState> = {
/**
* Strips sensitive fields before sending state to frontend.
*/
private sanitizeStateForClient(data: any): any {
private sanitizeStateForClient(data: any, stripAnswers = true): any {
if (!data) return data;
const sanitized = { ...data };
delete sanitized.questionAnswerKey;
if (stripAnswers) {
delete sanitized.questionAnswerKey;
}
if (Array.isArray(sanitized.questions)) {
sanitized.questions = sanitized.questions.map((q: any) => {
const { correctAnswer, judgment, followupHints, ...rest } = q;
return rest;
if (stripAnswers) {
const { correctAnswer, judgment, followupHints, ...rest } = q;
return rest;
}
return q;
});
}
return sanitized;
@@ -68,10 +68,10 @@ export const graderNode = async (
}
const isChoice = currentQuestion.questionType === 'MULTIPLE_CHOICE';
const expectedAnswer = currentQuestion.correctAnswer;
const answerKey = (state.questionAnswerKey as any)?.[currentQuestion.id];
if (isChoice || answerKey?.correctAnswer) {
const expectedAnswer = answerKey?.correctAnswer || currentQuestion.correctAnswer;
if (isChoice && expectedAnswer) {
const userAnswer = (lastUserMessage.content as string).trim();
const isCorrect = userAnswer.toUpperCase() === expectedAnswer?.toUpperCase();
@@ -173,12 +173,24 @@ Format your response as JSON:
"should_follow_up": false
}`;
const systemPrompt = isZh
let systemPrompt = isZh
? systemPromptZh
: isJa
? systemPromptJa
: systemPromptEn;
if (currentQuestion.judgment) {
const anchorText = isZh
? `\n\n【判定依据(通过标准)】${currentQuestion.judgment}`
: isJa
? `\n\n【判定基準(合格基準)】${currentQuestion.judgment}`
: `\n\n【Judgment Criteria (Pass Standard)】${currentQuestion.judgment}`;
systemPrompt += anchorText;
}
const followupHints: string[] = answerKey?.followupHints || [];
const maxFollowUps = followupHints.length > 0 ? followupHints.length : 2;
const userContentText =
typeof lastUserMessage.content === 'string'
? lastUserMessage.content
@@ -212,10 +224,7 @@ Format your response as JSON:
const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score';
const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback';
const feedbackMessage = new AIMessage(
`${scoreLabel}: ${result.score}/10\n\n${feedbackLabel}: ${result.feedback}`,
);
let enhancedFeedback: string = result.feedback;
const newScores = {
...state.scores,
@@ -224,10 +233,6 @@ Format your response as JSON:
let shouldFollowUp = result.should_follow_up === true;
// Breakout logic:
// 1. Max 1 follow-up per question
// 2. If score is decent (>= 8), don't follow up
// 3. If answer is short "don't know", don't follow up
const normalizedContent = userContentText.trim().toLowerCase();
const saysIDontKnow =
normalizedContent.length < 10 &&
@@ -242,10 +247,20 @@ Format your response as JSON:
normalizedContent.includes('不明') ||
normalizedContent.includes('わからない'));
if (currentFollowUpCount >= 2 || result.score >= 8 || saysIDontKnow) {
if (currentFollowUpCount >= maxFollowUps || result.score >= 8 || saysIDontKnow) {
shouldFollowUp = false;
}
if (shouldFollowUp && followupHints.length > 0) {
const hint = followupHints[Math.min(currentFollowUpCount, followupHints.length - 1)];
const hintLabel = isZh ? '追问方向' : isJa ? '追加の方向性' : 'Follow-up hint';
enhancedFeedback = `${result.feedback}\n\n${hintLabel}: ${hint}`;
}
const feedbackMessage = new AIMessage(
`${scoreLabel}: ${result.score}/10\n\n${feedbackLabel}: ${enhancedFeedback}`,
);
console.log('[GraderNode] Final State decision:', {
shouldFollowUp,
nextIndex: shouldFollowUp