feat: implement QuestionBank CRUD with pagination and template query

- Add pagination support to findAll (page, limit query params)
- Add findByTemplateId method to service
- Add GET /by-template/:templateId endpoint to controller
- Service already includes CRUD for QuestionBank and QuestionBankItem
This commit is contained in:
Developer
2026-04-23 17:19:11 +08:00
commit 0a9588abb7
492 changed files with 112453 additions and 0 deletions
@@ -0,0 +1,11 @@
import { Module } from '@nestjs/common';
import { TypeOrmModule } from '@nestjs/typeorm';
import { CostControlService } from './cost-control.service';
import { User } from '../user/user.entity';
@Module({
imports: [TypeOrmModule.forFeature([User])],
providers: [CostControlService],
exports: [CostControlService],
})
export class CostControlModule {}
@@ -0,0 +1,261 @@
/**
* Cost control and quota management service
* Used to manage API call costs for Vision Pipeline
*/
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository } from 'typeorm';
import { User } from '../user/user.entity';
export interface UserQuota {
userId: string;
monthlyCost: number; // Current month used cost
maxCost: number; // Monthly max cost
remaining: number; // Remaining cost
lastReset: Date; // Last reset time
}
export interface CostEstimate {
estimatedCost: number; // Estimated cost
estimatedTime: number; // Estimated time(seconds)
pageBreakdown: {
// Per-page breakdown
pageIndex: number;
cost: number;
}[];
}
@Injectable()
export class CostControlService {
private readonly logger = new Logger(CostControlService.name);
private readonly COST_PER_PAGE = 0.01; // Cost per page(USD)
private readonly DEFAULT_MONTHLY_LIMIT = 100; // Default monthly limit(USD)
constructor(
private configService: ConfigService,
@InjectRepository(User)
private userRepository: Repository<User>,
) {}
/**
* Estimate processing cost
*/
estimateCost(
pageCount: number,
quality: 'low' | 'medium' | 'high' = 'medium',
): CostEstimate {
// Adjust cost coefficient based on quality
const qualityMultiplier = {
low: 0.5,
medium: 1.0,
high: 1.5,
};
const baseCost =
pageCount * this.COST_PER_PAGE * qualityMultiplier[quality];
const estimatedTime = pageCount * 3; // // Approximately 3 seconds
const pageBreakdown = Array.from({ length: pageCount }, (_, i) => ({
pageIndex: i + 1,
cost: this.COST_PER_PAGE * qualityMultiplier[quality],
}));
return {
estimatedCost: baseCost,
estimatedTime,
pageBreakdown,
};
}
/**
* Check user quota
*/
async checkQuota(
userId: string,
estimatedCost: number,
): Promise<{
allowed: boolean;
quota: UserQuota;
reason?: string;
}> {
const quota = await this.getUserQuota(userId);
// Check monthly reset
this.checkAndResetMonthlyQuota(quota);
if (quota.remaining < estimatedCost) {
this.logger.warn(
`User ${userId} quota insufficient: remaining $${quota.remaining.toFixed(2)}, required $${estimatedCost.toFixed(2)}`,
);
return {
allowed: false,
quota,
reason: `Insufficient quota: remaining $${quota.remaining.toFixed(2)}, required $${estimatedCost.toFixed(2)}`,
};
}
return {
allowed: true,
quota,
};
}
/**
* Deduct from quota
*/
async deductQuota(userId: string, actualCost: number): Promise<void> {
const quota = await this.getUserQuota(userId);
quota.monthlyCost += actualCost;
quota.remaining = quota.maxCost - quota.monthlyCost;
await this.userRepository.update(userId, {
monthlyCost: quota.monthlyCost,
});
this.logger.log(
`Deducted $${actualCost.toFixed(2)} from user ${userId} quota. Remaining: $${quota.remaining.toFixed(2)}`,
);
}
/**
* Get user quota
*/
async getUserQuota(userId: string): Promise<UserQuota> {
const user = await this.userRepository.findOne({ where: { id: userId } });
if (!user) {
throw new Error(`User ${userId} does not exist`);
}
// Use default if user has no quota info
const monthlyCost = user.monthlyCost || 0;
const maxCost = user.maxCost || this.DEFAULT_MONTHLY_LIMIT;
const lastReset = user.lastQuotaReset || new Date();
return {
userId,
monthlyCost,
maxCost,
remaining: maxCost - monthlyCost,
lastReset,
};
}
/**
* Check and reset monthly quota
*/
private checkAndResetMonthlyQuota(quota: UserQuota): void {
const now = new Date();
const lastReset = quota.lastReset;
// Check if crossed month
if (
now.getMonth() !== lastReset.getMonth() ||
now.getFullYear() !== lastReset.getFullYear()
) {
this.logger.log(`Reset monthly quota for user ${quota.userId}`);
// Reset quota
quota.monthlyCost = 0;
quota.remaining = quota.maxCost;
quota.lastReset = now;
// Update database
this.userRepository.update(quota.userId, {
monthlyCost: 0,
lastQuotaReset: now,
});
}
}
/**
* Set user quota limit
*/
async setQuotaLimit(userId: string, maxCost: number): Promise<void> {
await this.userRepository.update(userId, { maxCost });
this.logger.log(`Set quota limit to $${maxCost} for user ${userId}`);
}
/**
* Get cost report
*/
async getCostReport(
userId: string,
days: number = 30,
): Promise<{
totalCost: number;
dailyAverage: number;
pageStats: {
totalPages: number;
avgCostPerPage: number;
};
quotaUsage: number; // Percentage
}> {
const quota = await this.getUserQuota(userId);
const usagePercent = (quota.monthlyCost / quota.maxCost) * 100;
// Query history records here(if implemented)
// Return current quota info temporarily
return {
totalCost: quota.monthlyCost,
dailyAverage: quota.monthlyCost / Math.max(days, 1),
pageStats: {
totalPages: Math.floor(quota.monthlyCost / this.COST_PER_PAGE),
avgCostPerPage: this.COST_PER_PAGE,
},
quotaUsage: usagePercent,
};
}
/**
* Check cost warning threshold
*/
async checkWarningThreshold(userId: string): Promise<{
shouldWarn: boolean;
message: string;
}> {
const quota = await this.getUserQuota(userId);
const usagePercent = (quota.monthlyCost / quota.maxCost) * 100;
if (usagePercent >= 90) {
return {
shouldWarn: true,
message: `⚠️ Quota usage reached ${usagePercent.toFixed(1)}%. Remaining: $${quota.remaining.toFixed(2)}`,
};
}
if (usagePercent >= 75) {
return {
shouldWarn: true,
message: `💡 Quota usage at ${usagePercent.toFixed(1)}%. Please monitor your costs carefully`,
};
}
return {
shouldWarn: false,
message: '',
};
}
/**
* Format cost display
*/
formatCost(cost: number): string {
return `$${cost.toFixed(2)}`;
}
/**
* Format time display
*/
formatTime(seconds: number): string {
if (seconds < 60) {
return `${seconds.toFixed(0)}s`;
}
const minutes = Math.floor(seconds / 60);
const remainingSeconds = seconds % 60;
return `${minutes}m ${remainingSeconds.toFixed(0)}s`;
}
}
@@ -0,0 +1,341 @@
/**
* Vision Pipeline Service (with cost control)
* This is an extended version of vision-pipeline.service.ts with integrated cost control
*/
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import * as fs from 'fs/promises';
import * as path from 'path';
import { LibreOfficeService } from '../libreoffice/libreoffice.service';
import { Pdf2ImageService } from '../pdf2image/pdf2image.service';
import { VisionService } from '../vision/vision.service';
import { ElasticsearchService } from '../elasticsearch/elasticsearch.service';
import { ModelConfigService } from '../model-config/model-config.service';
import {
PreciseModeOptions,
PipelineResult,
ProcessingStatus,
ModeRecommendation,
} from './vision-pipeline.interface';
import {
VisionModelConfig,
VisionAnalysisResult,
} from '../vision/vision.interface';
import { CostControlService } from './cost-control.service';
import { I18nService } from '../i18n/i18n.service';
@Injectable()
export class VisionPipelineCostAwareService {
private readonly logger = new Logger(VisionPipelineCostAwareService.name);
constructor(
private libreOffice: LibreOfficeService,
private pdf2Image: Pdf2ImageService,
private vision: VisionService,
private elasticsearch: ElasticsearchService,
private modelConfigService: ModelConfigService,
private configService: ConfigService,
private costControl: CostControlService,
private i18nService: I18nService,
) {}
/**
* Main processing flow: Precise mode (with cost control)
*/
async processPreciseMode(
filePath: string,
options: PreciseModeOptions,
): Promise<PipelineResult> {
const startTime = Date.now();
const results: VisionAnalysisResult[] = [];
let processedPages = 0;
let failedPages = 0;
let totalCost = 0;
let pdfPath = filePath;
let imagesToProcess: any[] = [];
this.logger.log(
`Starting precise mode processing for ${options.fileName} (user: ${options.userId})`,
);
try {
// Step 1: Convert format
this.updateStatus('converting', 10, 'Converting document format...');
pdfPath = await this.convertToPDF(filePath);
// Step 2: Convert PDF to images
this.updateStatus('splitting', 30, 'Converting PDF to images...');
const conversionResult = await this.pdf2Image.convertToImages(pdfPath, {
density: 300,
quality: 85,
format: 'jpeg',
});
if (conversionResult.images.length === 0) {
throw new Error(
this.i18nService.getMessage('pdfToImageConversionFailed'),
);
}
// Limit processing pages
imagesToProcess = options.maxPages
? conversionResult.images.slice(0, options.maxPages)
: conversionResult.images;
const pageCount = imagesToProcess.length;
// Step 3: Cost estimation and quota check
this.updateStatus(
'checking',
40,
'Checking quota and estimating cost...',
);
const costEstimate = this.costControl.estimateCost(pageCount);
this.logger.log(
`Estimated cost: $${costEstimate.estimatedCost.toFixed(2)}, Estimated time: ${this.costControl.formatTime(costEstimate.estimatedTime)}`,
);
// Quota check
const quotaCheck = await this.costControl.checkQuota(
options.userId,
costEstimate.estimatedCost,
);
if (!quotaCheck.allowed) {
throw new Error(quotaCheck.reason);
}
// Cost warning check
const warning = await this.costControl.checkWarningThreshold(
options.userId,
);
if (warning.shouldWarn) {
this.logger.warn(warning.message);
}
// Step 4: Get Vision model config
const modelConfig = await this.getVisionModelConfig(
options.userId,
options.modelId,
options.tenantId,
);
// Step 5: VL model analysis
this.updateStatus(
'analyzing',
50,
'Analyzing pages with Vision model...',
);
const batchResult = await this.vision.batchAnalyze(
imagesToProcess.map((img) => img.path),
modelConfig,
{
startIndex: 1,
skipQualityCheck: options.skipQualityCheck,
},
);
totalCost = batchResult.estimatedCost;
processedPages = batchResult.successCount;
failedPages = batchResult.failedCount;
results.push(...batchResult.results);
// Step 6: Subtract actual cost
if (totalCost > 0) {
await this.costControl.deductQuota(options.userId, totalCost);
this.logger.log(`Actual cost deducted: $${totalCost.toFixed(2)}`);
}
// Step 7: Cleanup temp files
this.updateStatus(
'completed',
100,
'Processing completed. Cleaning up temp files...',
);
await this.pdf2Image.cleanupImages(imagesToProcess);
// Cleanup converted PDF file if converted
if (pdfPath !== filePath) {
try {
await fs.unlink(pdfPath);
} catch (error) {
this.logger.warn(`Failed to cleanup converted PDF: ${error.message}`);
}
}
const duration = (Date.now() - startTime) / 1000;
this.logger.log(
`Precise mode completed: ${processedPages} pages processed, ` +
`cost: $${totalCost.toFixed(2)}, duration: ${duration.toFixed(1)}s`,
);
return {
success: true,
fileId: options.fileId,
fileName: options.fileName,
totalPages: conversionResult.totalPages,
processedPages,
failedPages,
results,
cost: totalCost,
duration,
mode: 'precise',
};
} catch (error) {
this.logger.error(`Precise mode failed: ${error.message}`);
// Try to clean up temp files
try {
if (pdfPath !== filePath && pdfPath !== filePath) {
await fs.unlink(pdfPath);
}
if (imagesToProcess.length > 0) {
await this.pdf2Image.cleanupImages(imagesToProcess);
}
} catch {}
return {
success: false,
fileId: options.fileId,
fileName: options.fileName,
totalPages: 0,
processedPages,
failedPages,
results: [],
cost: totalCost,
duration: (Date.now() - startTime) / 1000,
mode: 'precise',
};
}
}
/**
* Get Vision model configuration
*/
private async getVisionModelConfig(
userId: string,
modelId: string,
tenantId?: string,
): Promise<VisionModelConfig> {
const config = await this.modelConfigService.findOne(modelId);
if (!config) {
throw new Error(`Model config not found: ${modelId}`);
}
// API key is optional - allows local models
return {
baseUrl: config.baseUrl || '',
apiKey: config.apiKey || '',
modelId: config.modelId,
};
}
/**
* Convert to PDF
*/
private async convertToPDF(filePath: string): Promise<string> {
const ext = path.extname(filePath).toLowerCase();
// Return as-is if already PDF
if (ext === '.pdf') {
return filePath;
}
// Call LibreOffice to convert
return await this.libreOffice.convertToPDF(filePath);
}
/**
* Format detection and mode recommendation (with cost estimation)
*/
async recommendMode(filePath: string): Promise<ModeRecommendation> {
const ext = path.extname(filePath).toLowerCase();
const stats = await fs.stat(filePath);
const sizeMB = stats.size / (1024 * 1024);
const supportedFormats = [
'.pdf',
'.doc',
'.docx',
'.ppt',
'.pptx',
'.xls',
'.xlsx',
];
const preciseFormats = ['.pdf', '.doc', '.docx', '.ppt', '.pptx'];
if (!supportedFormats.includes(ext)) {
return {
recommendedMode: 'fast',
reason: `Unsupported file format: ${ext}`,
warnings: ['Using fast mode (text extraction only)'],
};
}
if (!preciseFormats.includes(ext)) {
return {
recommendedMode: 'fast',
reason: `Format ${ext} does not support precise mode`,
warnings: ['Using fast mode (text extraction only)'],
};
}
// Estimate page countbased on file size
const estimatedPages = Math.max(1, Math.ceil(sizeMB * 2));
const costEstimate = this.costControl.estimateCost(estimatedPages);
// Recommend precise mode for large files
if (sizeMB > 50) {
return {
recommendedMode: 'precise',
reason:
'File is large, recommend precise mode to preserve full content',
estimatedCost: costEstimate.estimatedCost,
estimatedTime: costEstimate.estimatedTime,
warnings: [
'Processing time may be longer',
'API costs will be incurred',
],
};
}
// Recommend precise mode
return {
recommendedMode: 'precise',
reason:
'Precise mode available. Can preserve mixed text and image content',
estimatedCost: costEstimate.estimatedCost,
estimatedTime: costEstimate.estimatedTime,
warnings: ['API costs will be incurred'],
};
}
/**
* Get user quota information
*/
async getUserQuotaInfo(userId: string) {
const quota = await this.costControl.getUserQuota(userId);
const report = await this.costControl.getCostReport(userId);
return {
...quota,
report,
warnings: await this.costControl.checkWarningThreshold(userId),
};
}
/**
* Update processing status (for real-time feedback)
*/
private updateStatus(
status: ProcessingStatus['status'],
progress: number,
message: string,
): void {
this.logger.log(`[${status}] ${progress}% - ${message}`);
}
}
@@ -0,0 +1,56 @@
/**
* Vision Pipeline Interface Definitions
*/
import { VisionAnalysisResult } from '../vision/vision.interface';
export interface PreciseModeOptions {
userId: string;
tenantId: string;
modelId: string;
fileId: string;
fileName: string;
skipQualityCheck?: boolean;
maxPages?: number;
}
export interface PipelineResult {
success: boolean;
fileId: string;
fileName: string;
totalPages: number;
processedPages: number;
failedPages: number;
results: VisionAnalysisResult[];
cost: number;
duration: number; // seconds
mode: 'precise';
}
export interface ProcessingStatus {
status:
| 'converting'
| 'splitting'
| 'checking'
| 'analyzing'
| 'indexing'
| 'completed'
| 'failed';
progress: number;
message: string;
cost?: number;
}
export interface FileFormat {
extension: string;
needsConversion: boolean;
supported: boolean;
}
export interface ModeRecommendation {
recommendedMode: 'precise' | 'fast';
reason: string;
estimatedCost?: number;
estimatedTime?: number; // seconds
warnings?: string[];
}
@@ -0,0 +1,18 @@
import { Module } from '@nestjs/common';
import { VisionPipelineService } from './vision-pipeline.service';
import { LibreOfficeModule } from '../libreoffice/libreoffice.module';
import { Pdf2ImageModule } from '../pdf2image/pdf2image.module';
import { VisionModule } from '../vision/vision.module';
import { ModelConfigModule } from '../model-config/model-config.module';
@Module({
imports: [
LibreOfficeModule,
Pdf2ImageModule,
VisionModule,
ModelConfigModule,
],
providers: [VisionPipelineService],
exports: [VisionPipelineService],
})
export class VisionPipelineModule {}
@@ -0,0 +1,364 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import * as fs from 'fs/promises';
import * as path from 'path';
import { LibreOfficeService } from '../libreoffice/libreoffice.service';
import { Pdf2ImageService } from '../pdf2image/pdf2image.service';
import { VisionService } from '../vision/vision.service';
import { ModelConfigService } from '../model-config/model-config.service';
import {
ModeRecommendation,
PipelineResult,
PreciseModeOptions,
ProcessingStatus,
} from './vision-pipeline.interface';
import {
VisionAnalysisResult,
VisionModelConfig,
} from '../vision/vision.interface';
import { I18nService } from '../i18n/i18n.service';
@Injectable()
export class VisionPipelineService {
private readonly logger = new Logger(VisionPipelineService.name);
constructor(
private libreOffice: LibreOfficeService,
private pdf2Image: Pdf2ImageService,
private vision: VisionService,
private modelConfigService: ModelConfigService,
private configService: ConfigService,
private i18nService: I18nService,
) {}
/**
* Main processing flow: Precise mode
* Returns the processing result, and the caller is responsible for vectorization and indexing.
*/
async processPreciseMode(
filePath: string,
options: PreciseModeOptions,
): Promise<PipelineResult> {
const startTime = Date.now();
const results: VisionAnalysisResult[] = [];
let processedPages = 0;
let failedPages = 0;
let totalCost = 0;
let pdfPath = filePath;
let imagesToProcess: any[] = [];
this.logger.log(
`🚀 Starting precise mode processing: ${options.fileName} (User: ${options.userId})`,
);
try {
// Step 1: Unification of formats
this.logger.log('📄 Step 1/4: Unification of formats');
this.updateStatus('converting', 10, 'Converting document format...');
try {
pdfPath = await this.convertToPDF(filePath);
this.logger.log(`✅ Format conversion completed: ${pdfPath}`);
} catch (convertError) {
this.logger.error(
`❌ Format conversion failed: ${convertError.message}`,
);
throw convertError;
}
// Step 2: Conversion from PDF to images
this.logger.log('🖼️ Step 2/4: Conversion from PDF to images');
this.updateStatus('splitting', 30, 'Converting PDF to images...');
let conversionResult;
try {
conversionResult = await this.pdf2Image.convertToImages(pdfPath, {
density: 300,
quality: 85,
format: 'jpeg',
});
} catch (imageError) {
this.logger.error(
`❌ PDF to image conversion failed: ${imageError.message}`,
);
throw imageError;
}
if (conversionResult.images.length === 0) {
throw new Error(
this.i18nService.getMessage('pdfToImageConversionFailed'),
);
}
this.logger.log(
`✅ PDF to image conversion completed: Total ${conversionResult.totalPages} pages, ${conversionResult.images.length} images generated`,
);
// Limit the number of pages to process
imagesToProcess = options.maxPages
? conversionResult.images.slice(0, options.maxPages)
: conversionResult.images;
this.logger.log(
`📊 Processing ${imagesToProcess.length} pages (${options.maxPages ? 'limited' : 'all'})`,
);
// Step 3: Get Vision model configuration
this.logger.log('🤖 Step 3/4: Preparation of Vision model');
const modelConfig = await this.getVisionModelConfig(
options.modelId,
);
this.logger.log(
`✅ Vision model configuration completed: ${modelConfig.modelId}`,
);
// Step 4: VL model analysis
this.logger.log('🔍 Step 4/4: Vision model analysis');
this.updateStatus(
'analyzing',
50,
'Analyzing pages using Vision model...',
);
// Display processing progress of each page
this.logger.log(
`Starting analysis of ${imagesToProcess.length} page contents...`,
);
const batchResult = await this.vision.batchAnalyze(
imagesToProcess.map((img) => img.path),
modelConfig,
{
startIndex: 1,
skipQualityCheck: options.skipQualityCheck,
onProgress: (current: number, total: number, pageResult?: any) => {
const progress = Math.round((current / total) * 100);
this.logger.log(
`📄 Processing progress: ${current}/${total} (${progress}%) ${pageResult ? `- Page ${pageResult.pageIndex} completed` : ''}`,
);
},
},
);
totalCost = batchResult.estimatedCost;
processedPages = batchResult.successCount;
failedPages = batchResult.failedCount;
results.push(...batchResult.results);
this.logger.log(
`✅ Vision analysis completed: Success ${processedPages} pages, Fail ${failedPages} pages, Cost $${totalCost.toFixed(2)}`,
);
// Step 5: Cleanup of temporary files (images)
this.logger.log('🧹 Cleaning up temporary files...');
this.updateStatus(
'completed',
100,
'Processing completed. Cleaning up temporary files...',
);
await this.pdf2Image.cleanupImages(imagesToProcess);
// If converted to PDF, clean up the converted file
if (pdfPath !== filePath) {
try {
await fs.unlink(pdfPath);
this.logger.log('🗑️ Cleaned up converted PDF file');
} catch (error) {
this.logger.warn(
`⚠️ Failed to clean up converted PDF: ${error.message}`,
);
}
}
const duration = (Date.now() - startTime) / 1000;
this.logger.log(
`🎉 Precise mode processing completed! ` +
`📊 Statistics: ${processedPages}/${imagesToProcess.length} pages success, ` +
`💰 Cost: $${totalCost.toFixed(2)}, ` +
`⏱️ Duration: ${duration.toFixed(1)}s`,
);
return {
success: true,
fileId: options.fileId,
fileName: options.fileName,
totalPages: conversionResult.totalPages,
processedPages,
failedPages,
results,
cost: totalCost,
duration,
mode: 'precise',
};
} catch (error) {
this.logger.error(`❌ Precise mode processing failed: ${error.message}`);
// Attempting cleanup of temporary files
try {
if (pdfPath !== filePath && pdfPath !== filePath) {
await fs.unlink(pdfPath);
}
if (imagesToProcess.length > 0) {
await this.pdf2Image.cleanupImages(imagesToProcess);
}
this.logger.log('🧹 Cleaned up temporary files');
} catch {
this.logger.warn('⚠️ Failed to clean up temporary files');
}
return {
success: false,
fileId: options.fileId,
fileName: options.fileName,
totalPages: 0,
processedPages,
failedPages,
results: [],
cost: totalCost,
duration: (Date.now() - startTime) / 1000,
mode: 'precise',
};
}
}
/**
* Get Vision model configuration
*/
private async getVisionModelConfig(
modelId: string,
): Promise<VisionModelConfig> {
const config = await this.modelConfigService.findOne(
modelId,
);
if (!config) {
throw new Error(`Model configuration not found: ${modelId}`);
}
// API key is optional - Allows local models
return {
baseUrl: config.baseUrl || '',
apiKey: config.apiKey || '',
modelId: config.modelId,
};
}
/**
* Conversion to PDF
*/
private async convertToPDF(filePath: string): Promise<string> {
const ext = path.extname(filePath).toLowerCase();
// If already PDF, return as is
if (ext === '.pdf') {
return filePath;
}
// Call LibreOffice to convert
const containerPdfPath = await this.libreOffice.convertToPDF(filePath);
// The path returned from the LibreOffice container is already correct. All point to the same uploads directory.
// Inside container: /uploads/xxx.pdf -> Host machine: ../uploads/xxx.pdf
const hostPdfPath = containerPdfPath.startsWith('/uploads/')
? path.join('..', containerPdfPath) // ../uploads/xxx.pdf
: containerPdfPath;
this.logger.log(`Path conversion: ${containerPdfPath} -> ${hostPdfPath}`);
// Check existence of file
try {
await fs.access(hostPdfPath);
return hostPdfPath;
} catch (error) {
this.logger.error(`PDF file does not exist: ${hostPdfPath}`);
throw new Error(`PDF file does not exist: ${hostPdfPath}`);
}
}
/**
* Index results to Elasticsearch (using knowledge-base embedding service)
* Note: This method requires the embedding service, so it should be called within knowledge-base.service.
*/
private async indexResults(
results: any[],
options: PreciseModeOptions,
): Promise<void> {
// This method is currently called from knowledge-base.service
// vision-pipeline is only responsible for processing and returning results.
this.logger.log(
`indexResults called with ${results.length} results - should be handled by knowledge-base service`,
);
throw new Error(
'VisionPipelineService.indexResults should not be called directly. Use knowledge-base service instead.',
);
}
/**
* Format detection and mode recommendation
*/
async recommendMode(filePath: string): Promise<ModeRecommendation> {
const ext = path.extname(filePath).toLowerCase();
const stats = await fs.stat(filePath);
const sizeMB = stats.size / (1024 * 1024);
const supportedFormats = [
'.pdf',
'.doc',
'.docx',
'.ppt',
'.pptx',
'.xls',
'.xlsx',
];
const preciseFormats = ['.pdf', '.doc', '.docx', '.ppt', '.pptx'];
if (!supportedFormats.includes(ext)) {
return {
recommendedMode: 'fast',
reason: `Unsupported file format: ${ext}`,
warnings: ['Using fast mode (text extraction only)'],
};
}
if (!preciseFormats.includes(ext)) {
return {
recommendedMode: 'fast',
reason: `Format ${ext} does not support precise mode`,
warnings: ['Using fast mode (text extraction only)'],
};
}
// File size check
if (sizeMB > 50) {
return {
recommendedMode: 'precise',
reason:
'The file is large, so precise mode is recommended to retain full information.',
estimatedCost: sizeMB * 0.01, // Rough estimate
estimatedTime: sizeMB * 12, // Approx. 12 seconds per 1MB
warnings: ['Processing time may be long', 'API costs will occur'],
};
}
// Precise mode recommended
return {
recommendedMode: 'precise',
reason:
'Precise mode is available. Can retain mixed content of text and images.',
estimatedCost: sizeMB * 0.01,
estimatedTime: sizeMB * 10,
warnings: ['API costs will occur'],
};
}
/**
* Update processing status (for real-time feedback)
*/
private updateStatus(
status: ProcessingStatus['status'],
progress: number,
message: string,
): void {
// You can send WebSocket messages or update the database here.
this.logger.log(`[${status}] ${progress}% - ${message}`);
}
}