forked from hangshuo652/aurak
0a9588abb7
- Add pagination support to findAll (page, limit query params) - Add findByTemplateId method to service - Add GET /by-template/:templateId endpoint to controller - Service already includes CRUD for QuestionBank and QuestionBankItem
411 lines
11 KiB
TypeScript
411 lines
11 KiB
TypeScript
import { Injectable, Logger } from '@nestjs/common';
|
|
import { I18nService } from '../i18n/i18n.service';
|
|
import { ConfigService } from '@nestjs/config';
|
|
import { ChatOpenAI } from '@langchain/openai';
|
|
import { HumanMessage } from '@langchain/core/messages';
|
|
import * as fs from 'fs/promises';
|
|
import {
|
|
VisionAnalysisResult,
|
|
VisionModelConfig,
|
|
BatchAnalysisResult,
|
|
ImageDescription,
|
|
} from './vision.interface';
|
|
|
|
@Injectable()
|
|
export class VisionService {
|
|
private readonly logger = new Logger(VisionService.name);
|
|
|
|
constructor(
|
|
private configService: ConfigService,
|
|
private i18nService: I18nService,
|
|
) {}
|
|
|
|
/**
|
|
* Analyze single image (document page)
|
|
*/
|
|
async analyzeImage(
|
|
imagePath: string,
|
|
modelConfig: VisionModelConfig,
|
|
pageIndex?: number,
|
|
): Promise<VisionAnalysisResult> {
|
|
const maxRetries = 3;
|
|
const baseDelay = 3000; // 3 second base delay
|
|
|
|
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
try {
|
|
return await this.performAnalysis(imagePath, modelConfig, pageIndex);
|
|
} catch (error) {
|
|
const isRetryableError = this.isRetryableError(error);
|
|
|
|
if (attempt === maxRetries || !isRetryableError) {
|
|
throw new Error(
|
|
this.i18nService.formatMessage('visionAnalysisFailed', {
|
|
message: error.message,
|
|
}),
|
|
);
|
|
}
|
|
|
|
const delay = baseDelay + Math.random() * 2000; // 3-5 second random delay
|
|
this.logger.warn(
|
|
`⚠️ Failed to analyze page ${pageIndex || '?'} (${attempt}/${maxRetries}), retrying in ${delay.toFixed(0)}ms: ${error.message}`,
|
|
);
|
|
|
|
await this.sleep(delay);
|
|
}
|
|
}
|
|
|
|
// This line theoretically should not execute, but included to satisfy TypeScript
|
|
throw new Error(this.i18nService.getMessage('retryMechanismError'));
|
|
}
|
|
|
|
/**
|
|
* Perform actual image analysis
|
|
*/
|
|
private async performAnalysis(
|
|
imagePath: string,
|
|
modelConfig: VisionModelConfig,
|
|
pageIndex?: number,
|
|
): Promise<VisionAnalysisResult> {
|
|
try {
|
|
// Load image and convert to base64
|
|
const imageBuffer = await fs.readFile(imagePath);
|
|
const base64Image = imageBuffer.toString('base64');
|
|
const mimeType = this.getMimeType(imagePath);
|
|
|
|
// Create vision model instance
|
|
const model = new ChatOpenAI({
|
|
apiKey: modelConfig.apiKey,
|
|
model: modelConfig.modelId,
|
|
configuration: {
|
|
baseURL: modelConfig.baseUrl,
|
|
},
|
|
temperature: 0.1, // Reduce randomness, increase consistency
|
|
});
|
|
|
|
// Build professional document analysis prompt
|
|
const systemPrompt = this.i18nService.getMessage('visionSystemPrompt');
|
|
|
|
const message = new HumanMessage({
|
|
content: [
|
|
{
|
|
type: 'text',
|
|
text: systemPrompt,
|
|
},
|
|
{
|
|
type: 'image_url',
|
|
image_url: {
|
|
url: `data:${mimeType};base64,${base64Image}`,
|
|
},
|
|
},
|
|
],
|
|
});
|
|
|
|
// Call model
|
|
this.logger.log(
|
|
this.i18nService.formatMessage('visionModelCall', {
|
|
model: modelConfig.modelId,
|
|
page: pageIndex || 'single',
|
|
}),
|
|
);
|
|
const response = await model.invoke([message]);
|
|
let content = response.content as string;
|
|
|
|
// Try to parse JSON
|
|
let result: VisionAnalysisResult;
|
|
try {
|
|
// Clean up markdown code block tags
|
|
content = content
|
|
.replace(/```json/g, '')
|
|
.replace(/```/g, '')
|
|
.trim();
|
|
const parsed = JSON.parse(content);
|
|
|
|
result = {
|
|
text: parsed.text || '',
|
|
images: parsed.images || [],
|
|
layout: parsed.layout || 'unknown',
|
|
confidence: parsed.confidence ?? 0.8,
|
|
pageIndex,
|
|
};
|
|
} catch (parseError) {
|
|
// If parsing fails, treat entire content as text
|
|
this.logger.warn(
|
|
`Failed to parse JSON response for ${imagePath}, using raw text`,
|
|
);
|
|
result = {
|
|
text: content,
|
|
images: [],
|
|
layout: 'unknown',
|
|
confidence: 0.5,
|
|
pageIndex,
|
|
};
|
|
}
|
|
|
|
this.logger.log(
|
|
this.i18nService.formatMessage('visionAnalysisSuccess', {
|
|
path: imagePath,
|
|
page: pageIndex ? ` (page ${pageIndex})` : '',
|
|
textLen: result.text.length,
|
|
imgCount: result.images.length,
|
|
layout: result.layout,
|
|
confidence: (result.confidence * 100).toFixed(1),
|
|
}),
|
|
);
|
|
|
|
return result;
|
|
} catch (error) {
|
|
this.logger.error(
|
|
this.i18nService.formatMessage('visionAnalysisFailed', {
|
|
message: error.message,
|
|
}),
|
|
);
|
|
this.logger.error(`Vision analysis error details: ${error.stack}`);
|
|
throw error; // Re-throw error for retry mechanism
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Determine if error is retryable
|
|
*/
|
|
private isRetryableError(error: any): boolean {
|
|
const errorMessage = error.message?.toLowerCase() || '';
|
|
const errorCode = error.status || error.code;
|
|
|
|
// 429 rate limit error
|
|
if (
|
|
errorCode === 429 ||
|
|
errorMessage.includes('rate limit') ||
|
|
errorMessage.includes('too many requests')
|
|
) {
|
|
return true;
|
|
}
|
|
|
|
// 5xx server error
|
|
if (errorCode >= 500 && errorCode < 600) {
|
|
return true;
|
|
}
|
|
|
|
// Network related error
|
|
if (
|
|
errorMessage.includes('timeout') ||
|
|
errorMessage.includes('network') ||
|
|
errorMessage.includes('connection')
|
|
) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Sleep function
|
|
*/
|
|
private sleep(ms: number): Promise<void> {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
/**
|
|
* Batch analyze multiple images
|
|
*/
|
|
async batchAnalyze(
|
|
imagePaths: string[],
|
|
modelConfig: VisionModelConfig,
|
|
options: {
|
|
startIndex?: number;
|
|
skipQualityCheck?: boolean;
|
|
onProgress?: (
|
|
current: number,
|
|
total: number,
|
|
pageResult?: VisionAnalysisResult,
|
|
) => void;
|
|
} = {},
|
|
): Promise<BatchAnalysisResult> {
|
|
const { startIndex = 1, skipQualityCheck = false, onProgress } = options;
|
|
const results: VisionAnalysisResult[] = [];
|
|
let successCount = 0;
|
|
let failedCount = 0;
|
|
|
|
this.logger.log(
|
|
this.i18nService.formatMessage('batchAnalysisStarted', {
|
|
count: imagePaths.length,
|
|
}),
|
|
);
|
|
this.logger.log(
|
|
`🔧 Model config: ${modelConfig.modelId} (${modelConfig.baseUrl || 'OpenAI'})`,
|
|
);
|
|
|
|
for (let i = 0; i < imagePaths.length; i++) {
|
|
const imagePath = imagePaths[i];
|
|
const pageIndex = startIndex + i;
|
|
const progress = Math.round(((i + 1) / imagePaths.length) * 100);
|
|
|
|
this.logger.log(
|
|
`🖼️ Analyzing page ${pageIndex} (${i + 1}/${imagePaths.length}, ${progress}%)`,
|
|
);
|
|
|
|
// Call progress callback
|
|
if (onProgress) {
|
|
onProgress(i + 1, imagePaths.length);
|
|
}
|
|
|
|
// Quality check(skip analysis if skipped)
|
|
if (!skipQualityCheck) {
|
|
const quality = await this.checkImageQuality(imagePath);
|
|
if (!quality.isGood) {
|
|
this.logger.warn(
|
|
`⚠️ Skipped page ${pageIndex} (poor quality): ${quality.reason}`,
|
|
);
|
|
failedCount++;
|
|
continue;
|
|
} else {
|
|
this.logger.log(
|
|
`✅ Page ${pageIndex} quality check passed (score: ${(quality.score || 0).toFixed(2)})`,
|
|
);
|
|
}
|
|
}
|
|
|
|
try {
|
|
this.logger.log(`🔍 Analyzing page ${pageIndex} with Vision model...`);
|
|
const startTime = Date.now();
|
|
const result = await this.analyzeImage(
|
|
imagePath,
|
|
modelConfig,
|
|
pageIndex,
|
|
);
|
|
const duration = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
|
|
results.push(result);
|
|
successCount++;
|
|
|
|
this.logger.log(
|
|
`✅ Page ${pageIndex} analysis completed (time: ${duration}s, ` +
|
|
`text: ${result.text.length} chars, ` +
|
|
`images: ${result.images.length}, ` +
|
|
`confidence: ${(result.confidence * 100).toFixed(1)}%)`,
|
|
);
|
|
|
|
// Call progress callback with result
|
|
if (onProgress) {
|
|
onProgress(i + 1, imagePaths.length, result);
|
|
}
|
|
} catch (error) {
|
|
this.logger.error(
|
|
this.i18nService.formatMessage('pageAnalysisFailed', {
|
|
page: pageIndex,
|
|
}) + `: ${error.message}`,
|
|
);
|
|
failedCount++;
|
|
}
|
|
}
|
|
|
|
// Calculate estimated cost (assuming $0.01 per image)
|
|
const estimatedCost = successCount * 0.01;
|
|
|
|
this.logger.log(
|
|
`🎉 Vision batch analysis completed! ` +
|
|
`✅ Success: ${successCount} pages, ❌ Failed: ${failedCount} pages, ` +
|
|
`💰 Estimated cost: $${estimatedCost.toFixed(2)}`,
|
|
);
|
|
|
|
return {
|
|
results,
|
|
totalPages: imagePaths.length,
|
|
successCount,
|
|
failedCount,
|
|
estimatedCost,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Check image quality
|
|
*/
|
|
async checkImageQuality(
|
|
imagePath: string,
|
|
): Promise<{ isGood: boolean; reason?: string; score?: number }> {
|
|
try {
|
|
const stats = await fs.stat(imagePath);
|
|
const sizeKB = stats.size / 1024;
|
|
|
|
// Check file size(5KB+)
|
|
if (sizeKB < 5) {
|
|
return {
|
|
isGood: false,
|
|
reason: `File too small (${sizeKB.toFixed(2)}KB)`,
|
|
score: 0,
|
|
};
|
|
}
|
|
|
|
// Check file size limit(10MB)
|
|
if (sizeKB > 10240) {
|
|
return {
|
|
isGood: false,
|
|
reason: `File too large (${sizeKB.toFixed(2)}KB)`,
|
|
score: 0,
|
|
};
|
|
}
|
|
|
|
// Simple quality scoring
|
|
let score = 0.5;
|
|
if (sizeKB > 50) score += 0.2;
|
|
if (sizeKB > 100) score += 0.2;
|
|
if (sizeKB > 500) score += 0.1;
|
|
|
|
score = Math.min(score, 1.0);
|
|
|
|
return { isGood: true, score };
|
|
} catch (error) {
|
|
return {
|
|
isGood: false,
|
|
reason: this.i18nService.formatMessage('imageLoadError', {
|
|
message: error.message,
|
|
}),
|
|
score: 0,
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if file is a supported image format
|
|
*/
|
|
isImageFile(mimetype: string): boolean {
|
|
const imageMimeTypes = [
|
|
'image/jpeg',
|
|
'image/jpg',
|
|
'image/png',
|
|
'image/gif',
|
|
'image/bmp',
|
|
'image/webp',
|
|
];
|
|
return imageMimeTypes.includes(mimetype);
|
|
}
|
|
|
|
/**
|
|
* Get MIME type
|
|
*/
|
|
private getMimeType(filePath: string): string {
|
|
const ext = filePath.toLowerCase().split('.').pop();
|
|
if (!ext) return 'image/jpeg';
|
|
|
|
const mimeTypes: Record<string, string> = {
|
|
jpg: 'image/jpeg',
|
|
jpeg: 'image/jpeg',
|
|
png: 'image/png',
|
|
gif: 'image/gif',
|
|
bmp: 'image/bmp',
|
|
webp: 'image/webp',
|
|
};
|
|
return mimeTypes[ext] || 'image/jpeg';
|
|
}
|
|
|
|
/**
|
|
* Legacy interface compatibility: extract content from single image
|
|
*/
|
|
async extractImageContent(
|
|
imagePath: string,
|
|
modelConfig: { baseUrl: string; apiKey: string; modelId: string },
|
|
): Promise<string> {
|
|
const result = await this.analyzeImage(imagePath, modelConfig);
|
|
return result.text;
|
|
}
|
|
}
|