Files
aurak/server/src/vision/vision.service.ts
T
Developer 0a9588abb7 feat: implement QuestionBank CRUD with pagination and template query
- Add pagination support to findAll (page, limit query params)
- Add findByTemplateId method to service
- Add GET /by-template/:templateId endpoint to controller
- Service already includes CRUD for QuestionBank and QuestionBankItem
2026-04-23 17:19:11 +08:00

411 lines
11 KiB
TypeScript

import { Injectable, Logger } from '@nestjs/common';
import { I18nService } from '../i18n/i18n.service';
import { ConfigService } from '@nestjs/config';
import { ChatOpenAI } from '@langchain/openai';
import { HumanMessage } from '@langchain/core/messages';
import * as fs from 'fs/promises';
import {
VisionAnalysisResult,
VisionModelConfig,
BatchAnalysisResult,
ImageDescription,
} from './vision.interface';
@Injectable()
export class VisionService {
private readonly logger = new Logger(VisionService.name);
constructor(
private configService: ConfigService,
private i18nService: I18nService,
) {}
/**
* Analyze single image (document page)
*/
async analyzeImage(
imagePath: string,
modelConfig: VisionModelConfig,
pageIndex?: number,
): Promise<VisionAnalysisResult> {
const maxRetries = 3;
const baseDelay = 3000; // 3 second base delay
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
return await this.performAnalysis(imagePath, modelConfig, pageIndex);
} catch (error) {
const isRetryableError = this.isRetryableError(error);
if (attempt === maxRetries || !isRetryableError) {
throw new Error(
this.i18nService.formatMessage('visionAnalysisFailed', {
message: error.message,
}),
);
}
const delay = baseDelay + Math.random() * 2000; // 3-5 second random delay
this.logger.warn(
`⚠️ Failed to analyze page ${pageIndex || '?'} (${attempt}/${maxRetries}), retrying in ${delay.toFixed(0)}ms: ${error.message}`,
);
await this.sleep(delay);
}
}
// This line theoretically should not execute, but included to satisfy TypeScript
throw new Error(this.i18nService.getMessage('retryMechanismError'));
}
/**
* Perform actual image analysis
*/
private async performAnalysis(
imagePath: string,
modelConfig: VisionModelConfig,
pageIndex?: number,
): Promise<VisionAnalysisResult> {
try {
// Load image and convert to base64
const imageBuffer = await fs.readFile(imagePath);
const base64Image = imageBuffer.toString('base64');
const mimeType = this.getMimeType(imagePath);
// Create vision model instance
const model = new ChatOpenAI({
apiKey: modelConfig.apiKey,
model: modelConfig.modelId,
configuration: {
baseURL: modelConfig.baseUrl,
},
temperature: 0.1, // Reduce randomness, increase consistency
});
// Build professional document analysis prompt
const systemPrompt = this.i18nService.getMessage('visionSystemPrompt');
const message = new HumanMessage({
content: [
{
type: 'text',
text: systemPrompt,
},
{
type: 'image_url',
image_url: {
url: `data:${mimeType};base64,${base64Image}`,
},
},
],
});
// Call model
this.logger.log(
this.i18nService.formatMessage('visionModelCall', {
model: modelConfig.modelId,
page: pageIndex || 'single',
}),
);
const response = await model.invoke([message]);
let content = response.content as string;
// Try to parse JSON
let result: VisionAnalysisResult;
try {
// Clean up markdown code block tags
content = content
.replace(/```json/g, '')
.replace(/```/g, '')
.trim();
const parsed = JSON.parse(content);
result = {
text: parsed.text || '',
images: parsed.images || [],
layout: parsed.layout || 'unknown',
confidence: parsed.confidence ?? 0.8,
pageIndex,
};
} catch (parseError) {
// If parsing fails, treat entire content as text
this.logger.warn(
`Failed to parse JSON response for ${imagePath}, using raw text`,
);
result = {
text: content,
images: [],
layout: 'unknown',
confidence: 0.5,
pageIndex,
};
}
this.logger.log(
this.i18nService.formatMessage('visionAnalysisSuccess', {
path: imagePath,
page: pageIndex ? ` (page ${pageIndex})` : '',
textLen: result.text.length,
imgCount: result.images.length,
layout: result.layout,
confidence: (result.confidence * 100).toFixed(1),
}),
);
return result;
} catch (error) {
this.logger.error(
this.i18nService.formatMessage('visionAnalysisFailed', {
message: error.message,
}),
);
this.logger.error(`Vision analysis error details: ${error.stack}`);
throw error; // Re-throw error for retry mechanism
}
}
/**
* Determine if error is retryable
*/
private isRetryableError(error: any): boolean {
const errorMessage = error.message?.toLowerCase() || '';
const errorCode = error.status || error.code;
// 429 rate limit error
if (
errorCode === 429 ||
errorMessage.includes('rate limit') ||
errorMessage.includes('too many requests')
) {
return true;
}
// 5xx server error
if (errorCode >= 500 && errorCode < 600) {
return true;
}
// Network related error
if (
errorMessage.includes('timeout') ||
errorMessage.includes('network') ||
errorMessage.includes('connection')
) {
return true;
}
return false;
}
/**
* Sleep function
*/
private sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Batch analyze multiple images
*/
async batchAnalyze(
imagePaths: string[],
modelConfig: VisionModelConfig,
options: {
startIndex?: number;
skipQualityCheck?: boolean;
onProgress?: (
current: number,
total: number,
pageResult?: VisionAnalysisResult,
) => void;
} = {},
): Promise<BatchAnalysisResult> {
const { startIndex = 1, skipQualityCheck = false, onProgress } = options;
const results: VisionAnalysisResult[] = [];
let successCount = 0;
let failedCount = 0;
this.logger.log(
this.i18nService.formatMessage('batchAnalysisStarted', {
count: imagePaths.length,
}),
);
this.logger.log(
`🔧 Model config: ${modelConfig.modelId} (${modelConfig.baseUrl || 'OpenAI'})`,
);
for (let i = 0; i < imagePaths.length; i++) {
const imagePath = imagePaths[i];
const pageIndex = startIndex + i;
const progress = Math.round(((i + 1) / imagePaths.length) * 100);
this.logger.log(
`🖼️ Analyzing page ${pageIndex} (${i + 1}/${imagePaths.length}, ${progress}%)`,
);
// Call progress callback
if (onProgress) {
onProgress(i + 1, imagePaths.length);
}
// Quality check(skip analysis if skipped)
if (!skipQualityCheck) {
const quality = await this.checkImageQuality(imagePath);
if (!quality.isGood) {
this.logger.warn(
`⚠️ Skipped page ${pageIndex} (poor quality): ${quality.reason}`,
);
failedCount++;
continue;
} else {
this.logger.log(
`✅ Page ${pageIndex} quality check passed (score: ${(quality.score || 0).toFixed(2)})`,
);
}
}
try {
this.logger.log(`🔍 Analyzing page ${pageIndex} with Vision model...`);
const startTime = Date.now();
const result = await this.analyzeImage(
imagePath,
modelConfig,
pageIndex,
);
const duration = ((Date.now() - startTime) / 1000).toFixed(1);
results.push(result);
successCount++;
this.logger.log(
`✅ Page ${pageIndex} analysis completed (time: ${duration}s, ` +
`text: ${result.text.length} chars, ` +
`images: ${result.images.length}, ` +
`confidence: ${(result.confidence * 100).toFixed(1)}%)`,
);
// Call progress callback with result
if (onProgress) {
onProgress(i + 1, imagePaths.length, result);
}
} catch (error) {
this.logger.error(
this.i18nService.formatMessage('pageAnalysisFailed', {
page: pageIndex,
}) + `: ${error.message}`,
);
failedCount++;
}
}
// Calculate estimated cost (assuming $0.01 per image)
const estimatedCost = successCount * 0.01;
this.logger.log(
`🎉 Vision batch analysis completed! ` +
`✅ Success: ${successCount} pages, ❌ Failed: ${failedCount} pages, ` +
`💰 Estimated cost: $${estimatedCost.toFixed(2)}`,
);
return {
results,
totalPages: imagePaths.length,
successCount,
failedCount,
estimatedCost,
};
}
/**
* Check image quality
*/
async checkImageQuality(
imagePath: string,
): Promise<{ isGood: boolean; reason?: string; score?: number }> {
try {
const stats = await fs.stat(imagePath);
const sizeKB = stats.size / 1024;
// Check file size(5KB+)
if (sizeKB < 5) {
return {
isGood: false,
reason: `File too small (${sizeKB.toFixed(2)}KB)`,
score: 0,
};
}
// Check file size limit(10MB)
if (sizeKB > 10240) {
return {
isGood: false,
reason: `File too large (${sizeKB.toFixed(2)}KB)`,
score: 0,
};
}
// Simple quality scoring
let score = 0.5;
if (sizeKB > 50) score += 0.2;
if (sizeKB > 100) score += 0.2;
if (sizeKB > 500) score += 0.1;
score = Math.min(score, 1.0);
return { isGood: true, score };
} catch (error) {
return {
isGood: false,
reason: this.i18nService.formatMessage('imageLoadError', {
message: error.message,
}),
score: 0,
};
}
}
/**
* Check if file is a supported image format
*/
isImageFile(mimetype: string): boolean {
const imageMimeTypes = [
'image/jpeg',
'image/jpg',
'image/png',
'image/gif',
'image/bmp',
'image/webp',
];
return imageMimeTypes.includes(mimetype);
}
/**
* Get MIME type
*/
private getMimeType(filePath: string): string {
const ext = filePath.toLowerCase().split('.').pop();
if (!ext) return 'image/jpeg';
const mimeTypes: Record<string, string> = {
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
png: 'image/png',
gif: 'image/gif',
bmp: 'image/bmp',
webp: 'image/webp',
};
return mimeTypes[ext] || 'image/jpeg';
}
/**
* Legacy interface compatibility: extract content from single image
*/
async extractImageContent(
imagePath: string,
modelConfig: { baseUrl: string; apiKey: string; modelId: string },
): Promise<string> {
const result = await this.analyzeImage(imagePath, modelConfig);
return result.text;
}
}