feat: judgment-anchored grading and per-question results
- Grader: inject judgment as pass criteria anchor in LLM prompt - Grader: use followupHints for follow-up direction (not generic text) - Grader: follow-up limit from followupHints.length instead of hardcoded 2 - Session: correctAnswer/judgment stored in questions, stripped during assessment - Frontend: per-question results panel with choice ✅/❌ + judgment display
This commit is contained in:
@@ -68,10 +68,10 @@ export const graderNode = async (
|
||||
}
|
||||
|
||||
const isChoice = currentQuestion.questionType === 'MULTIPLE_CHOICE';
|
||||
const expectedAnswer = currentQuestion.correctAnswer;
|
||||
const answerKey = (state.questionAnswerKey as any)?.[currentQuestion.id];
|
||||
|
||||
if (isChoice || answerKey?.correctAnswer) {
|
||||
const expectedAnswer = answerKey?.correctAnswer || currentQuestion.correctAnswer;
|
||||
if (isChoice && expectedAnswer) {
|
||||
const userAnswer = (lastUserMessage.content as string).trim();
|
||||
const isCorrect = userAnswer.toUpperCase() === expectedAnswer?.toUpperCase();
|
||||
|
||||
@@ -173,12 +173,24 @@ Format your response as JSON:
|
||||
"should_follow_up": false
|
||||
}`;
|
||||
|
||||
const systemPrompt = isZh
|
||||
let systemPrompt = isZh
|
||||
? systemPromptZh
|
||||
: isJa
|
||||
? systemPromptJa
|
||||
: systemPromptEn;
|
||||
|
||||
if (currentQuestion.judgment) {
|
||||
const anchorText = isZh
|
||||
? `\n\n【判定依据(通过标准)】${currentQuestion.judgment}`
|
||||
: isJa
|
||||
? `\n\n【判定基準(合格基準)】${currentQuestion.judgment}`
|
||||
: `\n\n【Judgment Criteria (Pass Standard)】${currentQuestion.judgment}`;
|
||||
systemPrompt += anchorText;
|
||||
}
|
||||
|
||||
const followupHints: string[] = answerKey?.followupHints || [];
|
||||
const maxFollowUps = followupHints.length > 0 ? followupHints.length : 2;
|
||||
|
||||
const userContentText =
|
||||
typeof lastUserMessage.content === 'string'
|
||||
? lastUserMessage.content
|
||||
@@ -212,10 +224,7 @@ Format your response as JSON:
|
||||
|
||||
const scoreLabel = isZh ? '得分' : isJa ? 'スコア' : 'Score';
|
||||
const feedbackLabel = isZh ? '反馈' : isJa ? 'フィードバック' : 'Feedback';
|
||||
|
||||
const feedbackMessage = new AIMessage(
|
||||
`${scoreLabel}: ${result.score}/10\n\n${feedbackLabel}: ${result.feedback}`,
|
||||
);
|
||||
let enhancedFeedback: string = result.feedback;
|
||||
|
||||
const newScores = {
|
||||
...state.scores,
|
||||
@@ -224,10 +233,6 @@ Format your response as JSON:
|
||||
|
||||
let shouldFollowUp = result.should_follow_up === true;
|
||||
|
||||
// Breakout logic:
|
||||
// 1. Max 1 follow-up per question
|
||||
// 2. If score is decent (>= 8), don't follow up
|
||||
// 3. If answer is short "don't know", don't follow up
|
||||
const normalizedContent = userContentText.trim().toLowerCase();
|
||||
const saysIDontKnow =
|
||||
normalizedContent.length < 10 &&
|
||||
@@ -242,10 +247,20 @@ Format your response as JSON:
|
||||
normalizedContent.includes('不明') ||
|
||||
normalizedContent.includes('わからない'));
|
||||
|
||||
if (currentFollowUpCount >= 2 || result.score >= 8 || saysIDontKnow) {
|
||||
if (currentFollowUpCount >= maxFollowUps || result.score >= 8 || saysIDontKnow) {
|
||||
shouldFollowUp = false;
|
||||
}
|
||||
|
||||
if (shouldFollowUp && followupHints.length > 0) {
|
||||
const hint = followupHints[Math.min(currentFollowUpCount, followupHints.length - 1)];
|
||||
const hintLabel = isZh ? '追问方向' : isJa ? '追加の方向性' : 'Follow-up hint';
|
||||
enhancedFeedback = `${result.feedback}\n\n${hintLabel}: ${hint}`;
|
||||
}
|
||||
|
||||
const feedbackMessage = new AIMessage(
|
||||
`${scoreLabel}: ${result.score}/10\n\n${feedbackLabel}: ${enhancedFeedback}`,
|
||||
);
|
||||
|
||||
console.log('[GraderNode] Final State decision:', {
|
||||
shouldFollowUp,
|
||||
nextIndex: shouldFollowUp
|
||||
|
||||
Reference in New Issue
Block a user