feat: implement QuestionBank CRUD with pagination and template query

- Add pagination support to findAll (page, limit query params)
- Add findByTemplateId method to service
- Add GET /by-template/:templateId endpoint to controller
- Service already includes CRUD for QuestionBank and QuestionBankItem
This commit is contained in:
Developer
2026-04-23 17:19:11 +08:00
commit 0a9588abb7
492 changed files with 112453 additions and 0 deletions
@@ -0,0 +1,393 @@
import { Injectable, Logger, BadRequestException } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { ModelConfigService } from '../model-config/model-config.service';
import { TenantService } from '../tenant/tenant.service';
// import { UserSettingService } from '../user-setting/user-setting.service';
/**
* Chunk config service
* Responsible for validating and managing chunk parameters to ensure they conform to model limits and environment variable settings
*
* Priority of limits:
* 1. Environment variables (MAX_CHUNK_SIZE, MAX_OVERLAP_SIZE)
* 2. Model settings in database (maxInputTokens, maxBatchSize)
* 3. Default values
*/
import {
DEFAULT_CHUNK_SIZE,
MIN_CHUNK_SIZE,
DEFAULT_CHUNK_OVERLAP,
MIN_CHUNK_OVERLAP,
DEFAULT_MAX_OVERLAP_RATIO,
DEFAULT_MAX_BATCH_SIZE,
DEFAULT_VECTOR_DIMENSIONS,
} from '../common/constants';
import { I18nService } from '../i18n/i18n.service';
@Injectable()
export class ChunkConfigService {
private readonly logger = new Logger(ChunkConfigService.name);
// Default settings
private readonly DEFAULTS = {
chunkSize: DEFAULT_CHUNK_SIZE,
chunkOverlap: DEFAULT_CHUNK_OVERLAP,
minChunkSize: MIN_CHUNK_SIZE,
minChunkOverlap: MIN_CHUNK_OVERLAP,
maxOverlapRatio: DEFAULT_MAX_OVERLAP_RATIO, // Overlap up to 50% of chunk size
maxBatchSize: DEFAULT_MAX_BATCH_SIZE, // Default batch limit
expectedDimensions: DEFAULT_VECTOR_DIMENSIONS, // Default vector dimensions
};
// Upper limits set by environment variables (used first)
private readonly envMaxChunkSize: number;
private readonly envMaxOverlapSize: number;
constructor(
private configService: ConfigService,
private modelConfigService: ModelConfigService,
private i18nService: I18nService,
private tenantService: TenantService,
) {
// Load global limit settings from environment variables
this.envMaxChunkSize = parseInt(
this.configService.get<string>('MAX_CHUNK_SIZE', '8191'),
);
this.envMaxOverlapSize = parseInt(
this.configService.get<string>('MAX_OVERLAP_SIZE', '2000'),
);
this.logger.log(
`Environment variable limits: MAX_CHUNK_SIZE=${this.envMaxChunkSize}, MAX_OVERLAP_SIZE=${this.envMaxOverlapSize}`,
);
}
/**
* Get model limit settings (read from database)
*/
async getModelLimits(modelId: string): Promise<{
maxInputTokens: number;
maxBatchSize: number;
expectedDimensions: number;
providerName: string;
isVectorModel: boolean;
}> {
const modelConfig = await this.modelConfigService.findOne(modelId);
if (!modelConfig || modelConfig.type !== 'embedding') {
throw new BadRequestException(
this.i18nService.formatMessage('embeddingModelNotFound', {
id: modelId,
}),
);
}
// Get limits from database fields and fill with defaults
const maxInputTokens = modelConfig.maxInputTokens || this.envMaxChunkSize;
const maxBatchSize = modelConfig.maxBatchSize || this.DEFAULTS.maxBatchSize;
const expectedDimensions =
modelConfig.dimensions ||
parseInt(
this.configService.get(
'DEFAULT_VECTOR_DIMENSIONS',
String(this.DEFAULTS.expectedDimensions),
),
);
const providerName = modelConfig.providerName || 'unknown';
const isVectorModel = modelConfig.isVectorModel || false;
this.logger.log(
this.i18nService.formatMessage('configLoaded', {
name: modelConfig.name,
id: modelConfig.modelId,
}) +
'\n' +
` - Provider: ${providerName}\n` +
` - Token limit: ${maxInputTokens}\n` +
` - Batch limit: ${maxBatchSize}\n` +
` - Vector dimensions: ${expectedDimensions}\n` +
` - Is vector model: ${isVectorModel}`,
);
return {
maxInputTokens,
maxBatchSize,
expectedDimensions,
providerName,
isVectorModel,
};
}
/**
* Validate and fix chunk config
* Priority: Environment variable limits > Model limits > User settings
*/
async validateChunkConfig(
chunkSize: number,
chunkOverlap: number,
modelId: string,
): Promise<{
chunkSize: number;
chunkOverlap: number;
warnings: string[];
effectiveMaxChunkSize: number;
effectiveMaxOverlapSize: number;
}> {
const warnings: string[] = [];
const limits = await this.getModelLimits(modelId);
// 1. Calculate final limits (choose smaller of env var and model limit)
const effectiveMaxChunkSize = Math.min(
this.envMaxChunkSize,
limits.maxInputTokens,
);
const effectiveMaxOverlapSize = Math.min(
this.envMaxOverlapSize,
Math.floor(effectiveMaxChunkSize * this.DEFAULTS.maxOverlapRatio),
);
// 2. Validate chunk size upper limit
if (chunkSize > effectiveMaxChunkSize) {
const reason =
this.envMaxChunkSize < limits.maxInputTokens
? `${this.i18nService.getMessage('environmentLimit')} ${this.envMaxChunkSize}`
: `${this.i18nService.getMessage('modelLimit')} ${limits.maxInputTokens}`;
warnings.push(
this.i18nService.formatMessage('chunkOverflow', {
size: chunkSize,
max: effectiveMaxChunkSize,
reason,
}),
);
chunkSize = effectiveMaxChunkSize;
}
// 3. Validate chunk size lower limit
if (chunkSize < this.DEFAULTS.minChunkSize) {
warnings.push(
this.i18nService.formatMessage('chunkUnderflow', {
size: chunkSize,
min: this.DEFAULTS.minChunkSize,
}),
);
chunkSize = this.DEFAULTS.minChunkSize;
}
// 4. Validate overlap size upper limit (env var first)
if (chunkOverlap > effectiveMaxOverlapSize) {
warnings.push(
this.i18nService.formatMessage('overlapOverflow', {
size: chunkOverlap,
max: effectiveMaxOverlapSize,
}),
);
chunkOverlap = effectiveMaxOverlapSize;
}
// 5. Validate overlap doesn't exceed 50% of chunk size
const maxOverlapByRatio = Math.floor(
chunkSize * this.DEFAULTS.maxOverlapRatio,
);
if (chunkOverlap > maxOverlapByRatio) {
warnings.push(
this.i18nService.formatMessage('overlapRatioExceeded', {
size: chunkOverlap,
max: maxOverlapByRatio,
}),
);
chunkOverlap = maxOverlapByRatio;
}
if (chunkOverlap < this.DEFAULTS.minChunkOverlap) {
warnings.push(
this.i18nService.formatMessage('overlapUnderflow', {
size: chunkOverlap,
min: this.DEFAULTS.minChunkOverlap,
}),
);
chunkOverlap = this.DEFAULTS.minChunkOverlap;
}
// 6. Add safety check for batch processing
// During batch processing, ensure total length of multiple texts doesn't exceed model limits
const safetyMargin = 0.8; // 80% safety margin to leave space for batch processing
const safeChunkSize = Math.floor(effectiveMaxChunkSize * safetyMargin);
if (chunkSize > safeChunkSize) {
warnings.push(
this.i18nService.formatMessage('batchOverflowWarning', {
safeSize: safeChunkSize,
size: chunkSize,
percent: Math.round(safetyMargin * 100),
}),
);
}
// 7. Check if estimated chunk count is reasonable
const estimatedChunkCount = this.estimateChunkCount(
1000000, // Assume 1MB text
chunkSize,
);
if (estimatedChunkCount > 50000) {
warnings.push(
this.i18nService.formatMessage('estimatedChunkCountExcessive', {
count: estimatedChunkCount,
}),
);
}
return {
chunkSize,
chunkOverlap,
warnings,
effectiveMaxChunkSize,
effectiveMaxOverlapSize,
};
}
/**
* Get recommended batch size
*/
async getRecommendedBatchSize(
modelId: string,
currentBatchSize: number = 100,
): Promise<number> {
const limits = await this.getModelLimits(modelId);
// Choose smaller of configured value and model limit
const recommended = Math.min(
currentBatchSize,
limits.maxBatchSize,
200, // Safety upper limit
);
if (recommended < currentBatchSize) {
this.logger.warn(
this.i18nService.formatMessage('batchSizeAdjusted', {
old: currentBatchSize,
new: recommended,
limit: limits.maxBatchSize,
}),
);
}
return Math.max(10, recommended); // Minimum 10
}
/**
* Estimate chunk count
*/
estimateChunkCount(textLength: number, chunkSize: number): number {
const chunkSizeInChars = chunkSize * 4; // 1 token ≈ 4 chars
return Math.ceil(textLength / chunkSizeInChars);
}
/**
* Validate vector dimensions
*/
async validateDimensions(
modelId: string,
actualDimensions: number,
): Promise<boolean> {
const limits = await this.getModelLimits(modelId);
if (actualDimensions !== limits.expectedDimensions) {
this.logger.warn(
this.i18nService.formatMessage('dimensionMismatch', {
id: modelId,
expected: limits.expectedDimensions,
actual: actualDimensions,
}),
);
return false;
}
return true;
}
/**
* Get config summary (for logging)
*/
async getConfigSummary(
chunkSize: number,
chunkOverlap: number,
modelId: string,
): Promise<string> {
const limits = await this.getModelLimits(modelId);
return [
`Model: ${modelId}`,
`Chunk size: ${chunkSize} tokens (limit: ${limits.maxInputTokens})`,
`Overlap size: ${chunkOverlap} tokens`,
`Batch size: ${limits.maxBatchSize}`,
`Vector dimensions: ${limits.expectedDimensions}`,
].join(', ');
}
/**
* Get config limits for frontend
* Used for frontend slider max value settings
*/
async getFrontendLimits(
modelId: string,
userId: string,
tenantId?: string,
): Promise<{
maxChunkSize: number;
maxOverlapSize: number;
minOverlapSize: number;
defaultChunkSize: number;
defaultOverlapSize: number;
modelInfo: {
name: string;
maxInputTokens: number;
maxBatchSize: number;
expectedDimensions: number;
};
}> {
const limits = await this.getModelLimits(modelId);
// Calculate final limits (choose smaller of env var and model limit)
const maxChunkSize = Math.min(this.envMaxChunkSize, limits.maxInputTokens);
const maxOverlapSize = Math.min(
this.envMaxOverlapSize,
Math.floor(maxChunkSize * this.DEFAULTS.maxOverlapRatio),
);
// Get model config name
const modelConfig = await this.modelConfigService.findOne(modelId);
const modelName = modelConfig?.name || 'Unknown';
// Get defaults from tenant or user settings
let defaultChunkSize = this.DEFAULTS.chunkSize;
let defaultOverlapSize = this.DEFAULTS.chunkOverlap;
if (tenantId) {
const tenantSettings = await this.tenantService.getSettings(tenantId);
if (tenantSettings?.chunkSize)
defaultChunkSize = tenantSettings.chunkSize;
if (tenantSettings?.chunkOverlap)
defaultOverlapSize = tenantSettings.chunkOverlap;
}
return {
maxChunkSize,
maxOverlapSize,
minOverlapSize: this.DEFAULTS.minChunkOverlap,
defaultChunkSize: Math.min(defaultChunkSize, maxChunkSize),
defaultOverlapSize: Math.max(
this.DEFAULTS.minChunkOverlap,
Math.min(defaultOverlapSize, maxOverlapSize),
),
modelInfo: {
name: modelName,
maxInputTokens: limits.maxInputTokens,
maxBatchSize: limits.maxBatchSize,
expectedDimensions: limits.expectedDimensions,
},
};
}
}
@@ -0,0 +1,11 @@
import { IsNotEmpty, IsOptional, IsString } from 'class-validator';
export class CreateKnowledgeBaseDto {
@IsString()
@IsNotEmpty()
name: string;
@IsString()
@IsOptional()
description?: string;
}
@@ -0,0 +1,286 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { ModelConfigService } from '../model-config/model-config.service';
import { I18nService } from '../i18n/i18n.service';
export interface EmbeddingResponse {
data: Array<{
embedding: number[];
index: number;
}>;
model: string;
usage: {
prompt_tokens: number;
total_tokens: number;
};
}
@Injectable()
export class EmbeddingService {
private readonly logger = new Logger(EmbeddingService.name);
private readonly defaultDimensions: number;
constructor(
private modelConfigService: ModelConfigService,
private configService: ConfigService,
private i18nService: I18nService,
) {
this.defaultDimensions = parseInt(
this.configService.get<string>('DEFAULT_VECTOR_DIMENSIONS', '2560'),
);
this.logger.log(
`Default vector dimensions set to ${this.defaultDimensions}`,
);
}
async getEmbeddings(
texts: string[],
embeddingModelConfigId: string,
): Promise<number[][]> {
this.logger.log(`Generating embeddings for ${texts.length} texts`);
const modelConfig = await this.modelConfigService.findOne(
embeddingModelConfigId,
);
if (!modelConfig || modelConfig.type !== 'embedding') {
throw new Error(
this.i18nService.formatMessage('embeddingModelNotFound', {
id: embeddingModelConfigId,
}),
);
}
if (modelConfig.isEnabled === false) {
throw new Error(
`Model ${modelConfig.name} is disabled and cannot generate embeddings`,
);
}
if (!modelConfig.baseUrl) {
throw new Error(
`Model ${modelConfig.name} does not have baseUrl configured`,
);
}
// Determine max batch size based on model name
const maxBatchSize = this.getMaxBatchSizeForModel(
modelConfig.modelId,
modelConfig.maxBatchSize,
);
// Split processing if batch size exceeds limit
if (texts.length > maxBatchSize) {
this.logger.log(
`Splitting ${texts.length} texts into batches (model batch limit: ${maxBatchSize})`,
);
const allEmbeddings: number[][] = [];
for (let i = 0; i < texts.length; i += maxBatchSize) {
const batch = texts.slice(i, i + maxBatchSize);
const batchEmbeddings = await this.getEmbeddingsForBatch(
batch,
modelConfig,
maxBatchSize,
);
allEmbeddings.push(...batchEmbeddings);
// Wait briefly to avoid API rate limiting
if (i + maxBatchSize < texts.length) {
await new Promise((resolve) => setTimeout(resolve, 100)); // Wait 100ms
}
}
return allEmbeddings;
} else {
// Normal processing (within batch size)
return await this.getEmbeddingsForBatch(
texts,
modelConfig,
maxBatchSize,
);
}
}
/**
* Determine max batch size based on model ID
*/
private getMaxBatchSizeForModel(
modelId: string,
configuredMaxBatchSize?: number,
): number {
// Model-specific batch size limits
if (
modelId.includes('text-embedding-004') ||
modelId.includes('text-embedding-v4') ||
modelId.includes('text-embedding-ada-002')
) {
return Math.min(10, configuredMaxBatchSize || 100); // Google limit: 10
} else if (
modelId.includes('text-embedding-3') ||
modelId.includes('text-embedding-003')
) {
return Math.min(2048, configuredMaxBatchSize || 2048); // OpenAI v3 limit: 2048
} else {
// Default: smaller of configured max or 100
return Math.min(configuredMaxBatchSize || 100, 100);
}
}
/**
* Process single batch embedding
*/
private async getEmbeddingsForBatch(
texts: string[],
modelConfig: any,
maxBatchSize: number,
): Promise<number[][]> {
const apiUrl = modelConfig.baseUrl.endsWith('/embeddings')
? modelConfig.baseUrl
: `${modelConfig.baseUrl}/embeddings`;
let lastError;
const MAX_RETRIES = 3;
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => {
controller.abort();
this.logger.error(`Embedding API timeout after 60s: ${apiUrl}`);
}, 60000); // 60s timeout
this.logger.log(
`[Model call] Type: Embedding, Model: ${modelConfig.name} (${modelConfig.modelId}), Text count: ${texts.length}`,
);
this.logger.log(
`Calling embedding API (attempt ${attempt}/${MAX_RETRIES}): ${apiUrl}`,
);
let response;
try {
response = await fetch(apiUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${modelConfig.apiKey}`,
},
body: JSON.stringify({
encoding_format: 'float',
input: texts,
model: modelConfig.modelId,
}),
signal: controller.signal,
});
} finally {
clearTimeout(timeoutId);
}
if (!response.ok) {
const errorText = await response.text();
// Detect batch size limit error
if (
errorText.includes('batch size is invalid') ||
errorText.includes('batch_size') ||
errorText.includes('invalid') ||
errorText.includes('larger than')
) {
this.logger.warn(
`Batch size limit error detected. Splitting batch in half and retrying: ${maxBatchSize} -> ${Math.floor(maxBatchSize / 2)}`,
);
// Split batch into smaller units and retry
if (texts.length > 1) {
const midPoint = Math.floor(texts.length / 2);
const firstHalf = texts.slice(0, midPoint);
const secondHalf = texts.slice(midPoint);
const firstResult = await this.getEmbeddingsForBatch(
firstHalf,
modelConfig,
Math.floor(maxBatchSize / 2),
);
const secondResult = await this.getEmbeddingsForBatch(
secondHalf,
modelConfig,
Math.floor(maxBatchSize / 2),
);
return [...firstResult, ...secondResult];
}
}
// Detect context length excess error
if (
errorText.includes('context length') ||
errorText.includes('exceeds')
) {
const avgLength =
texts.reduce((s, t) => s + t.length, 0) / texts.length;
const totalLength = texts.reduce((s, t) => s + t.length, 0);
this.logger.error(
`Text length exceeds limit: ${texts.length} texts, ` +
`total ${totalLength} characters, average ${Math.round(avgLength)} characters, ` +
`model limit: ${modelConfig.maxInputTokens || 8192} tokens`,
);
throw new Error(
`Text length exceeds model limit. ` +
`Current: ${texts.length} texts with total ${totalLength} characters, ` +
`model limit: ${modelConfig.maxInputTokens || 8192} tokens. ` +
`Advice: Reduce chunk size or batch size`,
);
}
// Retry on 429 (Too Many Requests) or 5xx (Server Error)
if (response.status === 429 || response.status >= 500) {
this.logger.warn(
`Temporary error from embedding API (${response.status}): ${errorText}`,
);
throw new Error(`API Error ${response.status}: ${errorText}`);
}
this.logger.error(`Embedding API error details: ${errorText}`);
this.logger.error(
`Request parameters: model=${modelConfig.modelId}, inputLength=${texts[0]?.length}`,
);
throw new Error(
`Embedding API call failed: ${response.statusText} - ${errorText}`,
);
}
const data: EmbeddingResponse = await response.json();
const embeddings = data.data.map((item) => item.embedding);
// Get dimensions from actual response
const actualDimensions =
embeddings[0]?.length || this.defaultDimensions;
this.logger.log(
`Got ${embeddings.length} embedding vectors from ${modelConfig.name}. Dimensions: ${actualDimensions}`,
);
return embeddings;
} catch (error) {
lastError = error;
// If not the last attempt and error appears temporary (or for robustness on all), retry after waiting
if (attempt < MAX_RETRIES) {
const delay = Math.pow(2, attempt - 1) * 1000; // 1s, 2s, 4s
this.logger.warn(
`Embedding request failed. Retrying after ${delay}ms: ${error.message}`,
);
await new Promise((resolve) => setTimeout(resolve, delay));
continue;
}
}
}
throw lastError;
}
private getEstimatedDimensions(modelId: string): number {
// Use default dimensions from environment variable
return this.defaultDimensions;
}
}
@@ -0,0 +1,362 @@
import {
Body,
Controller,
Delete,
Get,
Param,
Post,
Query,
Request,
UseGuards,
Res,
NotFoundException,
InternalServerErrorException,
} from '@nestjs/common';
import { Response } from 'express';
import * as path from 'path';
import { Logger } from '@nestjs/common';
import { KnowledgeBaseService } from './knowledge-base.service';
import { CombinedAuthGuard } from '../auth/combined-auth.guard';
import { RolesGuard } from '../auth/roles.guard';
import { Roles } from '../auth/roles.decorator';
import { UserRole } from '../user/user-role.enum';
import { Public } from '../auth/public.decorator';
import { KnowledgeBase } from './knowledge-base.entity';
import { ChunkConfigService } from './chunk-config.service';
import { KnowledgeGroupService } from '../knowledge-group/knowledge-group.service';
import { I18nService } from '../i18n/i18n.service';
@Controller('knowledge-bases')
@UseGuards(CombinedAuthGuard, RolesGuard)
export class KnowledgeBaseController {
private readonly logger = new Logger(KnowledgeBaseController.name);
constructor(
private readonly knowledgeBaseService: KnowledgeBaseService,
private readonly chunkConfigService: ChunkConfigService,
private readonly knowledgeGroupService: KnowledgeGroupService,
private readonly i18nService: I18nService,
) {}
@Get()
@UseGuards(CombinedAuthGuard)
async findAll(@Request() req): Promise<KnowledgeBase[]> {
return this.knowledgeBaseService.findAll(req.user.id, req.user.tenantId);
}
@Get('stats')
@UseGuards(CombinedAuthGuard)
async getStats(
@Request() req,
): Promise<{ total: number; uncategorized: number }> {
return this.knowledgeBaseService.getStats(req.user.id, req.user.tenantId);
}
@Delete('clear')
@Roles(UserRole.TENANT_ADMIN, UserRole.SUPER_ADMIN)
async clearAll(@Request() req): Promise<{ message: string }> {
await this.knowledgeBaseService.clearAll(req.user.id, req.user.tenantId);
return { message: this.i18nService.getMessage('kbCleared') };
}
@Post('search')
async search(@Request() req, @Body() body: { query: string; topK?: number }) {
return this.knowledgeBaseService.searchKnowledge(
req.user.id,
req.user.tenantId, // New
body.query,
body.topK || 5,
);
}
@Post('rag-search')
async ragSearch(
@Request() req,
@Body() body: { query: string; settings: any },
) {
return this.knowledgeBaseService.ragSearch(
req.user.id,
req.user.tenantId, // New
body.query,
body.settings,
);
}
@Delete(':id')
@Roles(UserRole.TENANT_ADMIN, UserRole.SUPER_ADMIN)
async deleteFile(
@Request() req,
@Param('id') fileId: string,
): Promise<{ message: string }> {
await this.knowledgeBaseService.deleteFile(
fileId,
req.user.id,
req.user.tenantId,
);
return { message: this.i18nService.getMessage('fileDeleted') };
}
@Post(':id/retry')
@Roles(UserRole.TENANT_ADMIN, UserRole.SUPER_ADMIN)
async retryFile(
@Request() req,
@Param('id') fileId: string,
): Promise<KnowledgeBase> {
return this.knowledgeBaseService.retryFailedFile(
fileId,
req.user.id,
req.user.tenantId,
);
}
@Get(':id/chunks')
async getFileChunks(@Request() req, @Param('id') fileId: string) {
return this.knowledgeBaseService.getFileChunks(
fileId,
req.user.id,
req.user.tenantId,
);
}
/**
* Get chunk config limits (for frontend slider settings)
* Query parameter: embeddingModelId - embedding model ID
*/
@Get('chunk-config/limits')
async getChunkConfigLimits(
@Request() req,
@Query('embeddingModelId') embeddingModelId: string,
) {
if (!embeddingModelId) {
return {
maxChunkSize: parseInt(process.env.MAX_CHUNK_SIZE || '8191'),
maxOverlapSize: parseInt(process.env.MAX_OVERLAP_SIZE || '2000'),
minOverlapSize: 25,
defaultChunkSize: 200,
defaultOverlapSize: 40,
modelInfo: {
name: this.i18nService.getMessage('modelNotConfigured'),
maxInputTokens: parseInt(process.env.MAX_CHUNK_SIZE || '8191'),
maxBatchSize: 2048,
expectedDimensions: parseInt(
process.env.DEFAULT_VECTOR_DIMENSIONS || '2560',
),
},
};
}
return await this.chunkConfigService.getFrontendLimits(
embeddingModelId,
req.user.id,
req.user.tenantId,
);
}
// File group management - requires admin permission
@Post(':id/groups')
@Roles(UserRole.TENANT_ADMIN, UserRole.SUPER_ADMIN)
async addFileToGroups(
@Param('id') fileId: string,
@Body() body: { groupIds: string[] },
@Request() req,
) {
await this.knowledgeGroupService.addFilesToGroup(
fileId,
body.groupIds,
req.user.id,
req.user.tenantId,
);
return { message: this.i18nService.getMessage('groupSyncSuccess') };
}
@Delete(':id/groups/:groupId')
@Roles(UserRole.TENANT_ADMIN, UserRole.SUPER_ADMIN)
async removeFileFromGroup(
@Param('id') fileId: string,
@Param('groupId') groupId: string,
@Request() req,
) {
await this.knowledgeGroupService.removeFileFromGroup(
fileId,
groupId,
req.user.id,
req.user.tenantId,
);
return { message: this.i18nService.getMessage('fileDeletedFromGroup') };
}
// PDF preview - public access
@Public()
@Get(':id/pdf')
async getPDFPreview(
@Param('id') fileId: string,
@Query('token') token: string,
@Res() res: Response,
) {
try {
if (!token) {
throw new NotFoundException(
this.i18nService.getMessage('accessDeniedNoToken'),
);
}
const jwt = await import('jsonwebtoken');
const secret = process.env.JWT_SECRET;
if (!secret) {
throw new InternalServerErrorException(
this.i18nService.getMessage('jwtSecretRequired'),
);
}
let decoded;
try {
decoded = jwt.verify(token, secret) as any;
} catch {
throw new NotFoundException(
this.i18nService.getMessage('invalidToken'),
);
}
if (decoded.type !== 'pdf-access' || decoded.fileId !== fileId) {
throw new NotFoundException(
this.i18nService.getMessage('invalidToken'),
);
}
const pdfPath = await this.knowledgeBaseService.ensurePDFExists(
fileId,
decoded.userId,
decoded.tenantId, // New
);
const fs = await import('fs');
const path = await import('path');
if (!fs.existsSync(pdfPath)) {
throw new NotFoundException(
this.i18nService.getMessage('pdfFileNotFound'),
);
}
const stat = fs.statSync(pdfPath);
const fileName = path.basename(pdfPath);
if (stat.size === 0) {
this.logger.warn(`PDF file is empty: ${pdfPath}`);
try {
fs.unlinkSync(pdfPath); // Delete empty file
} catch (e) {}
throw new NotFoundException(
this.i18nService.getMessage('pdfFileEmpty'),
);
}
res.setHeader('Content-Type', 'application/pdf');
res.setHeader('Content-Length', stat.size);
const stream = fs.createReadStream(pdfPath);
stream.pipe(res);
} catch (error) {
if (error instanceof NotFoundException) {
throw error;
}
this.logger.error(`PDF preview error: ${error.message}`);
throw new NotFoundException(
this.i18nService.getMessage('pdfConversionFailed'),
);
}
}
// Get PDF preview URL
@Get(':id/pdf-url')
async getPDFUrl(
@Param('id') fileId: string,
@Query('force') force: string,
@Request() req,
) {
try {
// Trigger PDF conversion
await this.knowledgeBaseService.ensurePDFExists(
fileId,
req.user.id,
req.user.tenantId,
force === 'true',
);
// Generate temporary access token
const jwt = await import('jsonwebtoken');
const secret = process.env.JWT_SECRET;
if (!secret) {
throw new InternalServerErrorException(
this.i18nService.getMessage('jwtSecretRequired'),
);
}
const token = jwt.sign(
{
fileId,
userId: req.user.id,
tenantId: req.user.tenantId,
type: 'pdf-access',
},
secret,
{ expiresIn: '1h' },
);
return {
url: `/api/knowledge-bases/${fileId}/pdf?token=${token}`,
};
} catch (error) {
if (error.message.includes('LibreOffice')) {
throw new InternalServerErrorException(
this.i18nService.formatMessage('pdfServiceUnavailable', {
message: error.message,
}),
);
}
throw new InternalServerErrorException(error.message);
}
}
@Get(':id/pdf-status')
async getPDFStatus(@Param('id') fileId: string, @Request() req) {
return await this.knowledgeBaseService.getPDFStatus(
fileId,
req.user.id,
req.user.tenantId,
);
}
// Get specific page of PDF as image
@Get(':id/page/:index')
async getPageImage(
@Param('id') fileId: string,
@Param('index') index: number,
@Request() req,
@Res() res: Response,
) {
try {
const imagePath = await this.knowledgeBaseService.getPageAsImage(
fileId,
Number(index),
req.user.id,
req.user.tenantId,
);
const fs = await import('fs');
if (!fs.existsSync(imagePath)) {
throw new NotFoundException(
this.i18nService.getMessage('pageImageNotFound'),
);
}
res.sendFile(path.resolve(imagePath));
} catch (error) {
this.logger.error(`Failed to get PDF page image: ${error.message}`);
throw new NotFoundException(
this.i18nService.getMessage('pdfPageImageFailed'),
);
}
}
}
@@ -0,0 +1,99 @@
import {
Column,
CreateDateColumn,
Entity,
PrimaryGeneratedColumn,
UpdateDateColumn,
ManyToMany,
ManyToOne,
JoinColumn,
} from 'typeorm';
import { KnowledgeGroup } from '../knowledge-group/knowledge-group.entity';
import { Tenant } from '../tenant/tenant.entity';
export enum FileStatus {
PENDING = 'pending',
INDEXING = 'indexing',
EXTRACTED = 'extracted', // Text extraction completed and saved to database
VECTORIZED = 'vectorized', // Vectorization completed and indexed to ES
FAILED = 'failed',
}
export enum ProcessingMode {
FAST = 'fast', // Fast mode - use Tika
PRECISE = 'precise', // Precise mode - use Vision Pipeline
}
@Entity('knowledge_bases')
export class KnowledgeBase {
@PrimaryGeneratedColumn('uuid')
id: string;
@Column({ name: 'original_name' })
originalName: string;
@Column({ nullable: true })
title: string;
@Column({ name: 'storage_path' })
storagePath: string;
@Column({ type: 'integer', default: 0 })
size: number;
@Column({ length: 100, nullable: true })
mimetype: string;
@Column({
type: 'simple-enum',
enum: FileStatus,
default: FileStatus.PENDING,
})
status: FileStatus;
@Column({ name: 'user_id', nullable: true }) // Temporarily allowed empty (for debugging), should be required in future
userId: string;
@Column({ name: 'tenant_id', nullable: true, type: 'text' })
tenantId: string;
@ManyToOne(() => Tenant, { nullable: true, onDelete: 'CASCADE' })
@JoinColumn({ name: 'tenant_id' })
tenant: Tenant;
@Column({ type: 'text', nullable: true })
content: string; // Stores text content extracted by Tika
// Index setting parameters
@Column({ name: 'chunk_size', type: 'integer', default: 1000 })
chunkSize: number;
@Column({ name: 'chunk_overlap', type: 'integer', default: 200 })
chunkOverlap: number;
@Column({ name: 'embedding_model_id', nullable: true })
embeddingModelId: string;
@Column({
type: 'simple-enum',
enum: ProcessingMode,
default: ProcessingMode.FAST,
name: 'processing_mode',
})
processingMode: ProcessingMode;
@Column({ type: 'json', nullable: true })
metadata: any; // Stores additional metadata (image descriptions, confidence, etc.)
@Column({ name: 'pdf_path', nullable: true })
pdfPath: string; // PDF file path (for preview)
@ManyToMany(() => KnowledgeGroup, (group) => group.knowledgeBases)
groups: KnowledgeGroup[];
@CreateDateColumn({ name: 'created_at' })
createdAt: Date;
@UpdateDateColumn({ name: 'updated_at' })
updatedAt: Date;
}
@@ -0,0 +1,52 @@
import { Module, forwardRef } from '@nestjs/common';
import { TypeOrmModule } from '@nestjs/typeorm';
import { KnowledgeBase } from './knowledge-base.entity';
import { KnowledgeGroup } from '../knowledge-group/knowledge-group.entity';
import { KnowledgeBaseService } from './knowledge-base.service';
import { KnowledgeBaseController } from './knowledge-base.controller';
import { ElasticsearchModule } from '../elasticsearch/elasticsearch.module';
import { TikaModule } from '../tika/tika.module';
import { ModelConfigModule } from '../model-config/model-config.module';
import { EmbeddingService } from './embedding.service';
import { TextChunkerService } from './text-chunker.service';
import { RagModule } from '../rag/rag.module';
import { VisionModule } from '../vision/vision.module';
import { MemoryMonitorService } from './memory-monitor.service';
import { ChunkConfigService } from './chunk-config.service';
import { LibreOfficeModule } from '../libreoffice/libreoffice.module';
import { Pdf2ImageModule } from '../pdf2image/pdf2image.module';
import { VisionPipelineModule } from '../vision-pipeline/vision-pipeline.module';
import { KnowledgeGroupModule } from '../knowledge-group/knowledge-group.module';
import { ChatModule } from '../chat/chat.module';
import { UserModule } from '../user/user.module';
import { TenantModule } from '../tenant/tenant.module';
import { CombinedAuthGuard } from '../auth/combined-auth.guard';
@Module({
imports: [
TypeOrmModule.forFeature([KnowledgeBase, KnowledgeGroup]),
forwardRef(() => ElasticsearchModule),
TikaModule,
ModelConfigModule,
forwardRef(() => RagModule),
VisionModule,
LibreOfficeModule,
Pdf2ImageModule,
VisionPipelineModule,
forwardRef(() => KnowledgeGroupModule),
forwardRef(() => ChatModule),
UserModule,
TenantModule,
],
controllers: [KnowledgeBaseController],
providers: [
KnowledgeBaseService,
EmbeddingService,
TextChunkerService,
MemoryMonitorService,
ChunkConfigService,
CombinedAuthGuard,
],
exports: [KnowledgeBaseService, EmbeddingService],
})
export class KnowledgeBaseModule {}
@@ -0,0 +1,1826 @@
import {
Injectable,
Logger,
NotFoundException,
ForbiddenException,
Inject,
forwardRef,
} from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { DEFAULT_LANGUAGE } from '../common/constants';
import { I18nService } from '../i18n/i18n.service';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository, In } from 'typeorm';
import {
FileStatus,
KnowledgeBase,
ProcessingMode,
} from './knowledge-base.entity';
import { KnowledgeGroup } from '../knowledge-group/knowledge-group.entity';
import { ElasticsearchService } from '../elasticsearch/elasticsearch.service';
import { TikaService } from '../tika/tika.service';
import * as fs from 'fs';
import * as path from 'path';
import { EmbeddingService } from './embedding.service';
import { TextChunkerService } from './text-chunker.service';
import { ModelConfigService } from '../model-config/model-config.service';
import { RagService } from '../rag/rag.service';
import { VisionService } from '../vision/vision.service';
import { TenantService } from '../tenant/tenant.service';
import { MemoryMonitorService } from './memory-monitor.service';
import { ChunkConfigService } from './chunk-config.service';
import { VisionPipelineService } from '../vision-pipeline/vision-pipeline.service';
import { LibreOfficeService } from '../libreoffice/libreoffice.service';
import { Pdf2ImageService } from '../pdf2image/pdf2image.service';
import {
DOC_EXTENSIONS,
IMAGE_EXTENSIONS,
} from '../common/file-support.constants';
import { ChatService } from '../chat/chat.service';
import { UserSettingService } from '../user/user-setting.service';
export interface PaginatedKnowledgeBase {
items: KnowledgeBase[];
total: number;
page: number;
limit: number;
}
@Injectable()
export class KnowledgeBaseService {
private readonly logger = new Logger(KnowledgeBaseService.name);
constructor(
@InjectRepository(KnowledgeBase)
private kbRepository: Repository<KnowledgeBase>,
@InjectRepository(KnowledgeGroup)
private groupRepository: Repository<KnowledgeGroup>,
@Inject(forwardRef(() => ElasticsearchService))
private elasticsearchService: ElasticsearchService,
private tikaService: TikaService,
private embeddingService: EmbeddingService,
private textChunkerService: TextChunkerService,
private modelConfigService: ModelConfigService,
@Inject(forwardRef(() => RagService))
private ragService: RagService,
private visionService: VisionService,
private tenantService: TenantService,
private memoryMonitor: MemoryMonitorService,
private chunkConfigService: ChunkConfigService,
private visionPipelineService: VisionPipelineService,
private libreOfficeService: LibreOfficeService,
private pdf2ImageService: Pdf2ImageService,
private configService: ConfigService,
private i18nService: I18nService,
@Inject(forwardRef(() => ChatService))
private chatService: ChatService,
private userSettingService: UserSettingService,
) {}
async createAndIndex(
fileInfo: any,
userId: string,
tenantId: string,
config?: any,
): Promise<KnowledgeBase> {
const mode = config?.mode || 'fast';
const processingMode =
mode === 'precise' ? ProcessingMode.PRECISE : ProcessingMode.FAST;
const kb = this.kbRepository.create({
originalName: fileInfo.originalname,
storagePath: fileInfo.path,
size: fileInfo.size,
mimetype: fileInfo.mimetype,
status: FileStatus.PENDING,
userId: userId,
tenantId: tenantId,
chunkSize: config?.chunkSize || 200,
chunkOverlap: config?.chunkOverlap || 40,
embeddingModelId: config?.embeddingModelId || null,
processingMode: processingMode,
});
// Associate groups
if (config?.groupIds && config.groupIds.length > 0) {
const groups = await this.groupRepository.find({
where: { id: In(config.groupIds), tenantId: tenantId },
});
kb.groups = groups;
}
const savedKb = await this.kbRepository.save(kb);
this.logger.log(
`Created KB record: ${savedKb.id}, mode: ${mode}, file: ${fileInfo.originalname}`,
);
// ---------------------------------------------------------
// Move the file to the final partitioned directory
// source: uploads/{tenantId}/{filename} (or wherever it was)
// target: uploads/{tenantId}/{savedKb.id}/{filename}
// ---------------------------------------------------------
const fs = await import('fs');
const path = await import('path');
const uploadPath = process.env.UPLOAD_FILE_PATH || './uploads';
const targetDir = path.join(uploadPath, tenantId || 'default', savedKb.id);
const targetPath = path.join(targetDir, fileInfo.filename);
try {
if (!fs.existsSync(targetDir)) {
fs.mkdirSync(targetDir, { recursive: true });
}
if (fs.existsSync(fileInfo.path)) {
fs.renameSync(fileInfo.path, targetPath);
// Update the DB record with the new path
savedKb.storagePath = targetPath;
await this.kbRepository.save(savedKb);
this.logger.log(`Moved file to partitioned storage: ${targetPath}`);
}
} catch (fsError) {
this.logger.error(
`Failed to move file ${savedKb.id} to partitioned storage`,
fsError,
);
// We will let it continue, but the file might be stuck in the temp/root folder
}
// If queue processing is requested, await completion
if (config?.waitForCompletion) {
await this.processFile(savedKb.id, userId, tenantId, config);
} else {
// Otherwise trigger asynchronously (default)
this.processFile(savedKb.id, userId, tenantId, config).catch((err) => {
this.logger.error(`Error processing file ${savedKb.id}`, err);
});
}
return savedKb;
}
async findAll(userId: string, tenantId?: string): Promise<KnowledgeBase[]> {
const where: any = {};
if (tenantId) {
where.tenantId = tenantId;
} else {
where.userId = userId;
}
return this.kbRepository.find({
where,
relations: ['groups'], // Load group relations
order: { createdAt: 'DESC' },
});
}
async findOne(
id: string,
userId: string,
tenantId: string,
): Promise<KnowledgeBase> {
const kb = await this.kbRepository.findOne({
where: { id },
relations: ['groups'],
});
if (!kb) {
throw new NotFoundException(
this.i18nService.getMessage('knowledgeBaseNotFound'),
);
}
// Check permission using TenantService
const hasAccess = await this.tenantService.canAccessTenant(
userId,
kb.tenantId,
tenantId,
);
if (!hasAccess) {
throw new ForbiddenException(
`You do not have permission to access this knowledge base`,
);
}
return kb;
}
async getStats(
userId: string,
tenantId?: string,
): Promise<{ total: number; uncategorized: number }> {
const where: any = {};
if (tenantId) {
where.tenantId = tenantId;
} else {
where.userId = userId;
}
// Get total count
const total = await this.kbRepository.count({ where });
// Get uncategorized count (files with no groups)
// We need to use query builder to check for empty groups relation
const uncategorizedQuery = this.kbRepository
.createQueryBuilder('kb')
.leftJoin('kb.groups', 'groups');
// Apply where conditions
if (tenantId) {
uncategorizedQuery.where('kb.tenantId = :tenantId', { tenantId });
} else {
uncategorizedQuery.where('kb.userId = :userId', { userId });
}
// Count files where groups array is empty
const uncategorizedCount = await uncategorizedQuery
.andWhere('groups.id IS NULL')
.getCount();
return {
total,
uncategorized: uncategorizedCount,
};
}
async searchKnowledge(
userId: string,
tenantId: string,
query: string,
topK: number = 5,
) {
try {
// Generate simulation vector using default dimensions from environment variable
const defaultDimensions = parseInt(
process.env.DEFAULT_VECTOR_DIMENSIONS || '2560',
);
const mockEmbedding = Array.from(
{ length: defaultDimensions },
() => Math.random() - 0.5,
);
const queryVector = mockEmbedding;
// 2. Search in Elasticsearch
const searchResults = await this.elasticsearchService.searchSimilar(
queryVector,
userId,
topK,
tenantId, // Ensure shared visibility within tenant
);
// 3. Get file information from database
const fileIds = [...new Set(searchResults.map((r) => r.fileId))];
const files = await this.kbRepository.findByIds(fileIds);
const fileMap = new Map(files.map((f) => [f.id, f]));
// 4. Combine results with file info
const results = searchResults.map((result) => {
const file = fileMap.get(result.fileId);
return {
...result,
file: file
? {
id: file.id,
name: file.originalName,
mimetype: file.mimetype,
size: file.size,
createdAt: file.createdAt,
}
: null,
};
});
return {
query,
results,
total: results.length,
};
} catch (error) {
this.logger.error(
`Metadata search failed for tenant ${tenantId}:`,
error.stack || error.message,
);
throw error;
}
}
async ragSearch(
userId: string,
tenantId: string,
query: string,
settings: any,
) {
this.logger.log(
`RAG search request: userId=${userId}, query="${query}", settings=${JSON.stringify(settings)}`,
);
try {
const ragResults = await this.ragService.searchKnowledge(
query,
userId,
settings.topK,
settings.similarityThreshold,
settings.selectedEmbeddingId,
settings.enableFullTextSearch,
settings.enableRerank,
settings.selectedRerankId,
undefined,
undefined,
settings.rerankSimilarityThreshold,
tenantId, // Ensure shared visibility within tenant for RAG
);
const sources = this.ragService.extractSources(ragResults);
const ragPrompt = this.ragService.buildRagPrompt(
query,
ragResults,
settings.language || DEFAULT_LANGUAGE,
);
const result = {
searchResults: ragResults,
sources,
ragPrompt,
hasRelevantContent: ragResults.length > 0,
};
this.logger.log(
`RAG search completed: found ${ragResults.length} results`,
);
return result;
} catch (error) {
this.logger.error(
`RAG search failed for user ${userId}:`,
error.stack || error.message,
);
// Return empty result instead of throwing error to keep system running
return {
searchResults: [],
sources: [],
ragPrompt: query, // Use original query
hasRelevantContent: false,
};
}
}
async deleteFile(
fileId: string,
userId: string,
tenantId: string,
): Promise<void> {
this.logger.log(`Deleting file ${fileId} for user ${userId}`);
try {
// 1. Get file info
const file = await this.kbRepository.findOne({
where: { id: fileId, tenantId }, // Filter by tenantId
});
if (!file) {
throw new NotFoundException(
this.i18nService.getMessage('fileNotFound'),
);
}
// 2. Delete file from filesystem
const fs = await import('fs');
try {
if (fs.existsSync(file.storagePath)) {
fs.unlinkSync(file.storagePath);
this.logger.log(`Deleted file: ${file.storagePath}`);
}
} catch (error) {
this.logger.warn(`Failed to delete file ${file.storagePath}:`, error);
}
// 3. Delete from Elasticsearch
try {
await this.elasticsearchService.deleteByFileId(
fileId,
userId,
tenantId,
);
this.logger.log(`Deleted ES documents for file ${fileId}`);
} catch (error) {
this.logger.warn(
`Failed to delete ES documents for file ${fileId}:`,
error,
);
}
// 4. Remove from all groups (cleanup M2M relations)
const fileWithGroups = await this.kbRepository.findOne({
where: { id: fileId, tenantId },
relations: ['groups'],
});
if (
fileWithGroups &&
fileWithGroups.groups &&
fileWithGroups.groups.length > 0
) {
// Clear groups to remove entries from join table
fileWithGroups.groups = [];
await this.kbRepository.save(fileWithGroups);
this.logger.log(`Cleared group associations for file ${fileId}`);
}
// 5. Delete from SQLite
await this.kbRepository.delete({ id: fileId });
this.logger.log(`Deleted database record for file ${fileId}`);
} catch (error) {
this.logger.error(`Failed to delete file ${fileId}`, error);
throw error;
}
}
async clearAll(userId: string, tenantId: string): Promise<void> {
this.logger.log(
`Clearing all knowledge base data for user ${userId} in tenant ${tenantId}`,
);
try {
// Get all files for the specific tenant and delete them one by one
const files = await this.kbRepository.find({ where: { tenantId } });
for (const file of files) {
await this.deleteFile(file.id, userId, tenantId);
}
this.logger.log(`Cleared all knowledge base data for user ${userId}`);
} catch (error) {
this.logger.error(
`Failed to clear knowledge base for user ${userId}`,
error,
);
throw error;
}
}
private async processFile(
kbId: string,
userId: string,
tenantId: string,
config?: any,
) {
this.logger.log(
`Starting processing for file ${kbId}, mode: ${config?.mode || 'fast'}`,
);
await this.updateStatus(kbId, FileStatus.INDEXING);
try {
const kb = await this.kbRepository.findOne({ where: { id: kbId } });
if (!kb) {
this.logger.error(`KB not found: ${kbId}`);
return;
}
// Memory monitor - pre-processing check
const memBefore = this.memoryMonitor.getMemoryUsage();
this.logger.log(
`Memory state - before processing: ${memBefore.heapUsed}/${memBefore.heapTotal}MB`,
);
// Select processing flow based on mode
const mode = config?.mode || 'fast';
if (mode === 'precise') {
// Precise mode - use Vision Pipeline
await this.processPreciseMode(kb, userId, tenantId, config);
} else {
// Fast mode - use Tika
await this.processFastMode(kb, userId, tenantId, config);
}
this.logger.log(`File ${kbId} processed successfully in ${mode} mode.`);
} catch (error) {
this.logger.error(`Failed to process file ${kbId}`, error);
await this.updateStatus(kbId, FileStatus.FAILED);
}
}
/**
* Fast mode processing (existing flow)
*/
private async processFastMode(
kb: KnowledgeBase,
userId: string,
tenantId: string,
config?: any,
) {
// 1. Extract text using Tika
let text = await this.tikaService.extractText(kb.storagePath);
// Use vision model for image files
if (this.visionService.isImageFile(kb.mimetype)) {
const settings = await this.tenantService.getSettings(
tenantId || 'default',
);
const visionModelId = settings?.selectedVisionId;
if (visionModelId) {
const visionModel =
await this.modelConfigService.findOne(visionModelId);
if (
visionModel &&
visionModel.type === 'vision' &&
visionModel.isEnabled !== false
) {
text = await this.visionService.extractImageContent(kb.storagePath, {
baseUrl: visionModel.baseUrl || '',
apiKey: visionModel.apiKey || '',
modelId: visionModel.modelId,
});
}
}
}
if (!text || text.trim().length === 0) {
this.logger.warn(this.i18nService.getMessage('noTextExtracted'));
}
// Check text size
const textSizeMB = Math.round(text.length / 1024 / 1024);
if (textSizeMB > 50) {
this.logger.warn(
this.i18nService.formatMessage('extractedTextTooLarge', {
size: textSizeMB,
}),
);
}
// Save text to database
await this.kbRepository.update(kb.id, { content: text });
await this.updateStatus(kb.id, FileStatus.EXTRACTED);
// Async vectorization
await this.vectorizeToElasticsearch(
kb.id,
userId,
tenantId,
text,
config,
).catch((err) => {
this.logger.error(`Error vectorizing file ${kb.id}`, err);
});
// Auto-generate title (async execution)
this.generateTitle(kb.id).catch((err) => {
this.logger.error(`Error generating title for file ${kb.id}`, err);
});
// Trigger PDF conversion asynchronously (for document files)
this.ensurePDFExists(kb.id, userId, tenantId).catch((err) => {
this.logger.warn(
this.i18nService.formatMessage('pdfConversionFailedDetail', {
id: kb.id,
}),
err,
);
});
}
/**
* Precise mode processing (new flow)
*/
private async processPreciseMode(
kb: KnowledgeBase,
userId: string,
tenantId: string,
config?: any,
) {
// Check if precise mode is supported
const preciseFormats = ['.pdf', '.doc', '.docx', '.ppt', '.pptx'];
const ext = kb.originalName
.toLowerCase()
.substring(kb.originalName.lastIndexOf('.'));
if (!preciseFormats.includes(ext)) {
this.logger.warn(
this.i18nService.formatMessage('preciseModeUnsupported', { ext }),
);
return this.processFastMode(kb, userId, tenantId, config);
}
// Check if Vision model is configured
const settings = await this.tenantService.getSettings(
tenantId || 'default',
);
const visionModelId = settings?.selectedVisionId;
if (!visionModelId) {
this.logger.warn(
this.i18nService.getMessage('visionModelNotConfiguredFallback'),
);
return this.processFastMode(kb, userId, tenantId, config);
}
const visionModel = await this.modelConfigService.findOne(visionModelId);
if (
!visionModel ||
visionModel.type !== 'vision' ||
visionModel.isEnabled === false
) {
this.logger.warn(
this.i18nService.getMessage('visionModelInvalidFallback'),
);
return this.processFastMode(kb, userId, tenantId, config);
}
// Call Vision Pipeline
try {
const result = await this.visionPipelineService.processPreciseMode(
kb.storagePath,
{
userId,
tenantId, // New
modelId: visionModelId,
fileId: kb.id,
fileName: kb.originalName,
skipQualityCheck: false,
},
);
if (!result.success) {
this.logger.error(`Vision pipeline failed, falling back to fast mode`);
this.logger.warn(this.i18nService.getMessage('visionPipelineFailed'));
return this.processFastMode(kb, userId, tenantId, config);
}
// Save text content to database
const combinedText = result.results.map((r) => r.text).join('\n\n');
const metadata = {
processedPages: result.processedPages,
failedPages: result.failedPages,
cost: result.cost,
duration: result.duration,
results: result.results.map((r) => ({
pageIndex: r.pageIndex,
confidence: r.confidence,
layout: r.layout,
imageCount: r.images.length,
})),
};
await this.kbRepository.update(kb.id, {
content: combinedText,
metadata: metadata as any,
});
await this.updateStatus(kb.id, FileStatus.EXTRACTED);
this.logger.log(
this.i18nService.formatMessage('preciseModeComplete', {
pages: result.processedPages,
cost: result.cost.toFixed(2),
}),
);
// Async vectorization and Elasticsearch indexing
// Create each page as separate document with metadata
this.indexPreciseResults(
kb,
userId,
tenantId,
kb.embeddingModelId,
result.results,
).catch((err) => {
this.logger.error(`Error indexing precise results for ${kb.id}`, err);
});
// Trigger PDF conversion asynchronously
this.ensurePDFExists(kb.id, userId, tenantId).catch((err) => {
this.logger.warn(`Initial PDF conversion failed for ${kb.id}`, err);
});
// Auto-generate title (async execution)
this.generateTitle(kb.id).catch((err) => {
this.logger.error(`Error generating title for file ${kb.id}`, err);
});
} catch (error) {
this.logger.error(`Vision pipeline error: ${error.message}`, error.stack);
this.logger.error(`Falling back to fast mode for file ${kb.id}`);
return this.processFastMode(kb, userId, tenantId, config);
}
}
/**
* Index precise mode results
*/
private async indexPreciseResults(
kb: KnowledgeBase,
userId: string,
tenantId: string,
embeddingModelId: string,
results: any[],
): Promise<void> {
this.logger.log(`Indexing ${results.length} precise results for ${kb.id}`);
// Check index existence - get actual model dimensions
const actualDimensions = await this.getActualModelDimensions(
embeddingModelId,
);
await this.elasticsearchService.createIndexIfNotExists(actualDimensions);
// Batch vectorization and indexing
const batchSize = parseInt(process.env.CHUNK_BATCH_SIZE || '50');
for (let i = 0; i < results.length; i += batchSize) {
const batch = results.slice(i, i + batchSize);
const texts = batch.map((r) => r.text);
try {
// Generate vectors
const embeddings = await this.embeddingService.getEmbeddings(
texts,
embeddingModelId,
);
// Index each result
for (let j = 0; j < batch.length; j++) {
const result = batch[j];
const embedding = embeddings[j];
if (!embedding || embedding.length === 0) {
this.logger.warn(
this.i18nService.formatMessage('skippingEmptyVectorPage', {
page: result.pageIndex,
}),
);
continue;
}
await this.elasticsearchService.indexDocument(
`${kb.id}_page_${result.pageIndex}`,
result.text,
embedding,
{
fileId: kb.id,
originalName: kb.originalName,
mimetype: kb.mimetype,
userId: userId,
tenantId: tenantId, // New
pageNumber: result.pageIndex,
images: result.images,
layout: result.layout,
confidence: result.confidence,
source: 'precise',
mode: 'vision',
},
);
}
this.logger.log(
`Batch ${Math.floor(i / batchSize) + 1} completed: ${batch.length} pages`,
);
} catch (error) {
this.logger.error(
`Batch ${Math.floor(i / batchSize) + 1} processing failed`,
error,
);
}
}
await this.updateStatus(kb.id, FileStatus.VECTORIZED);
this.logger.log(`Precise mode indexing completed: ${results.length} pages`);
}
/**
* Get specific page of PDF as image
*/
async getPageAsImage(
fileId: string,
pageIndex: number,
userId: string,
tenantId: string,
): Promise<string> {
const pdfPath = await this.ensurePDFExists(fileId, userId, tenantId);
// Convert specific pages
const result = await this.pdf2ImageService.convertToImages(pdfPath, {
density: 150,
quality: 75,
format: 'jpeg',
});
// Find images for corresponding page numbers
const pageImage = result.images.find(
(img) => img.pageIndex === pageIndex + 1,
);
if (!pageImage) {
throw new NotFoundException(
this.i18nService.formatMessage('pageImageNotFoundDetail', {
page: pageIndex + 1,
}),
);
}
return pageImage.path;
}
private async vectorizeToElasticsearch(
kbId: string,
userId: string,
tenantId: string,
text: string,
config?: any,
) {
try {
const kb = await this.kbRepository.findOne({
where: { id: kbId, tenantId },
});
if (!kb) return;
// Memory monitor - pre-vectorization check
const memBeforeChunk = this.memoryMonitor.getMemoryUsage();
this.logger.log(
`Pre-vectorization memory: ${memBeforeChunk.heapUsed}/${memBeforeChunk.heapTotal}MB`,
);
this.logger.debug(`File ${kbId}: Validating chunk config...`);
// 1. Validate and fix chunk config (based on model limits and env vars)
const validatedConfig = await this.chunkConfigService.validateChunkConfig(
kb.chunkSize,
kb.chunkOverlap,
kb.embeddingModelId,
);
this.logger.debug(`File ${kbId}: Chunk config validated.`);
// If config modified, log warning and update database
if (validatedConfig.warnings.length > 0) {
this.logger.warn(
this.i18nService.formatMessage('chunkConfigCorrection', {
warnings: validatedConfig.warnings.join(', '),
}),
);
// Update config in database
if (
validatedConfig.chunkSize !== kb.chunkSize ||
validatedConfig.chunkOverlap !== kb.chunkOverlap
) {
await this.kbRepository.update(kbId, {
chunkSize: validatedConfig.chunkSize,
chunkOverlap: validatedConfig.chunkOverlap,
});
}
}
// Display config summary (including actual limits applied)
this.logger.debug(`File ${kbId}: Getting config summary...`);
const configSummary = await this.chunkConfigService.getConfigSummary(
validatedConfig.chunkSize,
validatedConfig.chunkOverlap,
kb.embeddingModelId,
);
this.logger.log(`Chunk config: ${configSummary}`);
this.logger.log(
`Config limits: chunk=${validatedConfig.effectiveMaxChunkSize}, overlap=${validatedConfig.effectiveMaxOverlapSize}`,
);
// 2. Split text using validated config
const chunks = this.textChunkerService.chunkText(
text,
validatedConfig.chunkSize,
validatedConfig.chunkOverlap,
);
this.logger.log(`File ${kbId} split into ${chunks.length} text blocks`);
if (chunks.length === 0) {
this.logger.warn(
this.i18nService.formatMessage('noChunksGenerated', { id: kbId }),
);
await this.updateStatus(kbId, FileStatus.VECTORIZED);
return;
}
// 3. Validate chunk count is reasonable
const estimatedChunkCount = this.chunkConfigService.estimateChunkCount(
text.length,
validatedConfig.chunkSize,
);
if (chunks.length > estimatedChunkCount * 1.2) {
this.logger.warn(
this.i18nService.formatMessage('chunkCountAnomaly', {
actual: chunks.length,
estimated: estimatedChunkCount,
}),
);
}
// 4. Get recommended batch size (based on model limits)
const recommendedBatchSize =
await this.chunkConfigService.getRecommendedBatchSize(
kb.embeddingModelId,
parseInt(process.env.CHUNK_BATCH_SIZE || '100'),
);
// 5. Estimate memory usage
const avgChunkSize =
chunks.reduce((sum, c) => sum + c.content.length, 0) / chunks.length;
const estimatedMemory = this.memoryMonitor.estimateMemoryUsage(
chunks.length,
avgChunkSize,
parseInt(process.env.DEFAULT_VECTOR_DIMENSIONS || '2560'),
);
this.logger.log(
`Estimated memory usage: ${estimatedMemory}MB (batch size: ${recommendedBatchSize})`,
);
// 6. Get actual model dimensions and check index exists
const actualDimensions = await this.getActualModelDimensions(
kb.embeddingModelId,
);
await this.elasticsearchService.createIndexIfNotExists(actualDimensions);
// 7. Batch vectorization and indexing
const useBatching = this.memoryMonitor.shouldUseBatching(
chunks.length,
avgChunkSize,
actualDimensions,
);
if (useBatching) {
try {
await this.processInBatches(
chunks,
async (batch, batchIndex) => {
// Verify batch size not exceeding model limit
if (batch.length > recommendedBatchSize) {
this.logger.warn(
this.i18nService.formatMessage('batchSizeExceeded', {
index: batchIndex,
actual: batch.length,
limit: recommendedBatchSize,
}),
);
}
const chunkTexts = batch.map((chunk) => chunk.content);
const embeddings = await this.embeddingService.getEmbeddings(
chunkTexts,
kb.embeddingModelId,
);
// Validate dimension consistency
if (
embeddings.length > 0 &&
embeddings[0].length !== actualDimensions
) {
this.logger.warn(
`Vector dimension mismatch: expected ${actualDimensions}, got ${embeddings[0].length}`,
);
}
// Index this batch data immediately
for (let i = 0; i < batch.length; i++) {
const chunk = batch[i];
const embedding = embeddings[i];
if (!embedding || embedding.length === 0) {
this.logger.warn(
this.i18nService.formatMessage('skippingEmptyVectorChunk', {
index: chunk.index,
}),
);
continue;
}
await this.elasticsearchService.indexDocument(
`${kb.id}_chunk_${chunk.index}`,
chunk.content,
embedding,
{
fileId: kb.id,
originalName: kb.originalName,
mimetype: kb.mimetype,
userId: userId,
chunkIndex: chunk.index,
startPosition: chunk.startPosition,
tenantId, // Passing tenantId to ES
},
);
}
this.logger.log(
`Batch ${batchIndex} completed: ${batch.length} chunks`,
);
},
{
batchSize: recommendedBatchSize,
onBatchComplete: (batchIndex, totalBatches) => {
const mem = this.memoryMonitor.getMemoryUsage();
this.logger.log(
`Batch ${batchIndex}/${totalBatches} completed, memory: ${mem.heapUsed}MB`,
);
},
},
);
} catch (error) {
// Detect context length error (supports Japanese/Chinese/English)
if (
error.message &&
(error.message.includes('context length') ||
error.message.includes('context length exceeded'))
) {
this.logger.warn(
this.i18nService.getMessage('contextLengthErrorFallback'),
);
// Downgrade to single text processing
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
try {
const embeddings = await this.embeddingService.getEmbeddings(
[chunk.content], // Single text
kb.embeddingModelId,
);
if (!embeddings[0] || embeddings[0].length === 0) {
this.logger.warn(
this.i18nService.formatMessage('skippingEmptyVectorChunk', {
index: chunk.index,
}),
);
continue;
}
await this.elasticsearchService.indexDocument(
`${kb.id}_chunk_${chunk.index}`,
chunk.content,
embeddings[0],
{
fileId: kb.id,
originalName: kb.originalName,
mimetype: kb.mimetype,
userId: userId,
chunkIndex: chunk.index,
startPosition: chunk.startPosition,
endPosition: chunk.endPosition,
tenantId,
},
);
if ((i + 1) % 10 === 0) {
this.logger.log(
`Single processing progress: ${i + 1}/${chunks.length}`,
);
}
} catch (chunkError) {
this.logger.error(
`Failed to process text block ${chunk.index}. Skipping: ${chunkError.message}`,
);
continue;
}
}
this.logger.log(
`Single text processing completed: ${chunks.length} chunks`,
);
} else {
// Throw other errors directly
throw error;
}
}
} else {
// Small files, batch processing (but need to check batch limits)
const chunkTexts = chunks.map((chunk) => chunk.content);
// Force batch processing if chunk count exceeds model batch limit
if (chunks.length > recommendedBatchSize) {
this.logger.warn(
this.i18nService.formatMessage('chunkLimitExceededForceBatch', {
actual: chunks.length,
limit: recommendedBatchSize,
}),
);
try {
await this.processInBatches(chunks, async (batch, batchIndex) => {
const batchTexts = batch.map((c) => c.content);
const embeddings = await this.embeddingService.getEmbeddings(
batchTexts,
kb.embeddingModelId,
);
for (let i = 0; i < batch.length; i++) {
const chunk = batch[i];
const embedding = embeddings[i];
if (!embedding || embedding.length === 0) {
this.logger.warn(
`Skipping empty vector text block ${chunk.index}`,
);
continue;
}
await this.elasticsearchService.indexDocument(
`${kb.id}_chunk_${chunk.index}`,
chunk.content,
embedding,
{
fileId: kb.id,
originalName: kb.originalName,
mimetype: kb.mimetype,
userId: userId,
chunkIndex: chunk.index,
startPosition: chunk.startPosition,
endPosition: chunk.endPosition,
tenantId, // Passing tenantId to ES metadata
},
);
}
});
} catch (error) {
// Detect context length error (supports Japanese/Chinese/English)
if (
error.message &&
(error.message.includes('context length') ||
error.message.includes('context length exceeded'))
) {
this.logger.warn(
this.i18nService.getMessage('batchContextLengthErrorFallback'),
);
// Downgrade to single text processing
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
try {
const embeddings = await this.embeddingService.getEmbeddings(
[chunk.content], // Single text
kb.embeddingModelId,
);
if (!embeddings[0] || embeddings[0].length === 0) {
this.logger.warn(
this.i18nService.formatMessage(
'skippingEmptyVectorChunk',
{ index: chunk.index },
),
);
continue;
}
await this.elasticsearchService.indexDocument(
`${kb.id}_chunk_${chunk.index}`,
chunk.content,
embeddings[0],
{
fileId: kb.id,
originalName: kb.originalName,
mimetype: kb.mimetype,
userId: userId,
tenantId, // Added tenantId
chunkIndex: chunk.index,
startPosition: chunk.startPosition,
endPosition: chunk.endPosition,
},
);
if ((i + 1) % 10 === 0) {
this.logger.log(
`Single processing progress: ${i + 1}/${chunks.length}`,
);
}
} catch (chunkError) {
this.logger.error(
this.i18nService.formatMessage('chunkProcessingFailed', {
index: chunk.index,
message: chunkError.message,
}),
);
continue;
}
}
this.logger.log(
this.i18nService.formatMessage('singleTextProcessingComplete', {
count: chunks.length,
}),
);
} else {
// Throw other errors directly
throw error;
}
}
} else {
// Process if file is small enough
try {
const embeddings = await this.embeddingService.getEmbeddings(
chunkTexts,
kb.embeddingModelId,
);
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
const embedding = embeddings[i];
if (!embedding || embedding.length === 0) {
this.logger.warn(
this.i18nService.formatMessage('skippingEmptyVectorChunk', {
index: chunk.index,
}),
);
continue;
}
await this.elasticsearchService.indexDocument(
`${kb.id}_chunk_${chunk.index}`,
chunk.content,
embedding,
{
fileId: kb.id,
originalName: kb.originalName,
mimetype: kb.mimetype,
userId: userId,
tenantId, // Added tenantId
chunkIndex: chunk.index,
startPosition: chunk.startPosition,
endPosition: chunk.endPosition,
},
);
}
} catch (error) {
// Detect context length error (supports Japanese/Chinese/English)
if (
error.message &&
(error.message.includes('context length') ||
error.message.includes('context length exceeded'))
) {
this.logger.warn(
this.i18nService.getMessage('batchContextLengthErrorFallback'),
);
// Downgrade to single text processing
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
try {
const embeddings = await this.embeddingService.getEmbeddings(
[chunk.content], // Single text
kb.embeddingModelId,
);
if (!embeddings[0] || embeddings[0].length === 0) {
this.logger.warn(
`Skipping empty vector text block ${chunk.index}`,
);
continue;
}
await this.elasticsearchService.indexDocument(
`${kb.id}_chunk_${chunk.index}`,
chunk.content,
embeddings[0],
{
fileId: kb.id,
originalName: kb.originalName,
mimetype: kb.mimetype,
userId: userId,
tenantId, // Added tenantId
chunkIndex: chunk.index,
startPosition: chunk.startPosition,
endPosition: chunk.endPosition,
},
);
if ((i + 1) % 10 === 0) {
this.logger.log(
`Single processing progress: ${i + 1}/${chunks.length}`,
);
}
} catch (chunkError) {
this.logger.error(
`Failed to process text block ${chunk.index}. Skipping: ${chunkError.message}`,
);
continue;
}
}
this.logger.log(
this.i18nService.formatMessage('singleTextProcessingComplete', {
count: chunks.length,
}),
);
} else {
// Throw other errors directly
throw error;
}
}
}
}
await this.updateStatus(kbId, FileStatus.VECTORIZED);
const memAfter = this.memoryMonitor.getMemoryUsage();
this.logger.log(
this.i18nService.formatMessage('fileVectorizationComplete', {
id: kbId,
count: chunks.length,
memory: memAfter.heapUsed,
}),
);
} catch (error) {
this.logger.error(
this.i18nService.formatMessage('fileVectorizationFailed', { id: kbId }),
error,
);
// Save error info to metadata
try {
const kb = await this.kbRepository.findOne({ where: { id: kbId } });
if (kb) {
const metadata = kb.metadata || {};
metadata.lastError = error.message;
metadata.failedAt = new Date().toISOString();
await this.kbRepository.update(kbId, { metadata });
}
} catch (e) {
this.logger.warn(
`Failed to update metadata for failed file ${kbId}`,
e,
);
}
await this.updateStatus(kbId, FileStatus.FAILED);
}
}
/**
* Batch processing with memory control
*/
private async processInBatches<T>(
items: T[],
processor: (batch: T[], batchIndex: number) => Promise<void>,
options?: {
batchSize?: number;
onBatchComplete?: (batchIndex: number, totalBatches: number) => void;
},
): Promise<void> {
const totalItems = items.length;
if (totalItems === 0) return;
const startTime = Date.now();
this.logger.log(
this.i18nService.formatMessage('batchProcessingStarted', {
count: totalItems,
}),
);
// Use provided batch size or fallback to env/default
const initialBatchSize =
options?.batchSize || parseInt(process.env.CHUNK_BATCH_SIZE || '100');
const totalBatches = Math.ceil(totalItems / initialBatchSize);
for (let i = 0; i < totalItems; ) {
// Check memory and wait
await this.memoryMonitor.waitForMemoryAvailable();
// Dynamically adjust batch size (start from initialBatchSize, memory monitor can reduce if needed)
// Note: memoryMonitor.getDynamicBatchSize may return larger values based on memory situation,
// but we must respect model limits (initialBatchSize)
const currentMem = this.memoryMonitor.getMemoryUsage().heapUsed;
const dynamicBatchSize =
this.memoryMonitor.getDynamicBatchSize(currentMem);
// Ensure we don't exceed the model's limit (initialBatchSize) even if memory allows more
const batchSize = Math.min(dynamicBatchSize, initialBatchSize);
// Get current batch
const batch = items.slice(i, i + batchSize);
const batchIndex = Math.floor(i / batchSize) + 1;
this.logger.log(
this.i18nService.formatMessage('batchProcessingProgress', {
index: batchIndex,
total: totalBatches,
count: batch.length,
}),
);
// Process batch
await processor(batch, batchIndex);
// Callback notification
if (options?.onBatchComplete) {
options.onBatchComplete(batchIndex, totalBatches);
}
// Force GC (if memory is near threshold)
if (currentMem > 800) {
this.memoryMonitor.forceGC();
}
// Clear references to help GC
batch.length = 0;
i += batchSize;
}
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
this.logger.log(
this.i18nService.formatMessage('batchProcessingComplete', {
count: totalItems,
duration,
}),
);
}
/**
* Retry vectorization for failed files
*/
async retryFailedFile(
fileId: string,
userId: string,
tenantId: string,
): Promise<KnowledgeBase> {
this.logger.log(
`Retrying failed file ${fileId} for user ${userId} in tenant ${tenantId}`,
);
// 1. Get file with tenant restriction
const kb = await this.kbRepository.findOne({
where: { id: fileId, tenantId },
});
if (!kb) {
throw new NotFoundException(this.i18nService.getMessage('fileNotFound'));
}
if (kb.status !== FileStatus.FAILED) {
throw new Error(
this.i18nService.formatMessage('onlyFailedFilesRetryable', {
status: kb.status,
}),
);
}
if (!kb.content || kb.content.trim().length === 0) {
throw new Error(this.i18nService.getMessage('emptyFileRetryFailed'));
}
// 2. Reset status to INDEXING
await this.updateStatus(fileId, FileStatus.INDEXING);
// 3. Trigger vectorization asynchronously (reuse existing logic)
this.vectorizeToElasticsearch(fileId, userId, tenantId, kb.content, {
chunkSize: kb.chunkSize,
chunkOverlap: kb.chunkOverlap,
embeddingModelId: kb.embeddingModelId,
}).catch((err) => {
this.logger.error(`Retry vectorization failed for file ${fileId}`, err);
});
// 4. Return updated file status
const updatedKb = await this.kbRepository.findOne({
where: { id: fileId, tenantId },
});
if (!updatedKb) {
throw new NotFoundException(this.i18nService.getMessage('fileNotFound'));
}
return updatedKb;
}
/**
* Get all chunk information for a file
*/
async getFileChunks(fileId: string, userId: string, tenantId: string) {
this.logger.log(
`Getting chunks for file ${fileId}, user ${userId}, tenant ${tenantId}`,
);
// 1. Get file with tenant check
const kb = await this.kbRepository.findOne({
where: { id: fileId, tenantId },
});
if (!kb) {
throw new NotFoundException(this.i18nService.getMessage('fileNotFound'));
}
// 2. Get all chunks from Elasticsearch
const chunks = await this.elasticsearchService.getFileChunks(
fileId,
userId,
tenantId,
);
// 3. Return chunk info
return {
fileId: kb.id,
fileName: kb.originalName,
totalChunks: chunks.length,
chunkSize: kb.chunkSize,
chunkOverlap: kb.chunkOverlap,
chunks: chunks.map((chunk) => ({
index: chunk.chunkIndex,
content: chunk.content,
contentLength: chunk.content.length,
startPosition: chunk.startPosition,
endPosition: chunk.endPosition,
})),
};
}
private async updateStatus(id: string, status: FileStatus) {
await this.kbRepository.update(id, { status });
}
// PDF preview related methods
async ensurePDFExists(
fileId: string,
userId: string,
tenantId: string,
force: boolean = false,
): Promise<string> {
const kb = await this.kbRepository.findOne({
where: { id: fileId, tenantId },
});
if (!kb) {
throw new NotFoundException(this.i18nService.getMessage('fileNotFound'));
}
// If original file is PDF, return the original file path directly
if (kb.mimetype === 'application/pdf') {
return kb.storagePath;
}
// Check if preview conversion is supported (only documents or images allowed)
const ext = kb.originalName.toLowerCase().split('.').pop() || '';
const isConvertible = [...DOC_EXTENSIONS, ...IMAGE_EXTENSIONS].includes(
ext,
);
if (!isConvertible) {
this.logger.log(
`Skipping PDF conversion for unsupported format: .${ext} (${kb.originalName})`,
);
throw new Error(this.i18nService.getMessage('pdfPreviewNotSupported'));
}
// Generate PDF field path
const path = await import('path');
const fs = await import('fs');
const uploadDir = path.dirname(kb.storagePath);
const baseName = path.basename(
kb.storagePath,
path.extname(kb.storagePath),
);
const pdfPath = path.join(uploadDir, `${baseName}.pdf`);
// Delete if forced regeneration specified and file exists
if (force && fs.existsSync(pdfPath)) {
try {
fs.unlinkSync(pdfPath);
this.logger.log(`Forced regeneration: Deleted existing PDF ${pdfPath}`);
} catch (e) {
this.logger.warn(
`Failed to delete existing PDF for regeneration: ${e.message}`,
);
}
}
// Check if already converted and regeneration not needed
if (fs.existsSync(pdfPath) && !force) {
if (!kb.pdfPath) {
await this.kbRepository.update(kb.id, { pdfPath: pdfPath });
}
return pdfPath;
}
// Need to convert to PDF
try {
this.logger.log(
`Starting PDF conversion for ${kb.originalName} at ${kb.storagePath}`,
);
// Convert file
await this.libreOfficeService.convertToPDF(kb.storagePath);
// Check conversion result
if (!fs.existsSync(pdfPath)) {
throw new Error(
`PDF conversion completed but file not found at ${pdfPath}`,
);
}
const stats = fs.statSync(pdfPath);
if (stats.size === 0) {
fs.unlinkSync(pdfPath);
throw new Error(`PDF conversion failed: output file is empty`);
}
await this.kbRepository.update(kb.id, { pdfPath: pdfPath });
this.logger.log(`PDF conversion successful: ${pdfPath}`);
return pdfPath;
} catch (error) {
this.logger.error(
`PDF conversion failed for ${fileId}: ${error.message}`,
error.stack,
);
throw new Error(
this.i18nService.formatMessage('pdfConversionFailedDetail', {
id: fileId,
}),
);
}
}
async getPDFStatus(fileId: string, userId: string, tenantId: string) {
const kb = await this.kbRepository.findOne({
where: { id: fileId, tenantId },
});
if (!kb) {
throw new NotFoundException(this.i18nService.getMessage('fileNotFound'));
}
// If original file is PDF
if (kb.mimetype === 'application/pdf') {
const token = this.generateTempToken(fileId, userId, tenantId);
return {
status: 'ready',
url: `/api/knowledge-bases/${fileId}/pdf?token=${token}`,
};
}
// Generate PDF file path
const path = await import('path');
const fs = await import('fs');
const uploadDir = path.dirname(kb.storagePath);
const baseName = path.basename(
kb.storagePath,
path.extname(kb.storagePath),
);
const pdfPath = path.join(uploadDir, `${baseName}.pdf`);
// Check if converted
if (fs.existsSync(pdfPath)) {
if (!kb.pdfPath) {
kb.pdfPath = pdfPath;
await this.kbRepository.save(kb);
}
const token = this.generateTempToken(fileId, userId, tenantId);
return {
status: 'ready',
url: `/api/knowledge-bases/${fileId}/pdf?token=${token}`,
};
}
// Conversion needed
return {
status: 'pending',
};
}
private generateTempToken(
fileId: string,
userId: string,
tenantId: string,
): string {
const jwt = require('jsonwebtoken');
const secret = process.env.JWT_SECRET;
if (!secret) {
throw new Error(this.i18nService.getMessage('jwtSecretRequired'));
}
return jwt.sign({ fileId, userId, tenantId, type: 'pdf-access' }, secret, {
expiresIn: '1h',
});
}
/**
* Get actual model dimensions (with cache check and probe logic)
*/
private async getActualModelDimensions(
embeddingModelId: string,
): Promise<number> {
const defaultDimensions = parseInt(
process.env.DEFAULT_VECTOR_DIMENSIONS || '2560',
);
try {
// 1. Prioritize getting from model config
const modelConfig =
await this.modelConfigService.findOne(embeddingModelId);
if (modelConfig && modelConfig.dimensions) {
this.logger.log(
`Got dimensions from ${modelConfig.name} config: ${modelConfig.dimensions}`,
);
return modelConfig.dimensions;
}
// 2. Otherwise probe for dimensions
this.logger.log(`Probing model dimensions: ${embeddingModelId}`);
const probeEmbeddings = await this.embeddingService.getEmbeddings(
['probe'],
embeddingModelId,
);
if (probeEmbeddings.length > 0) {
const actualDimensions = probeEmbeddings[0].length;
this.logger.log(
`Detected actual model dimensions: ${actualDimensions}`,
);
// Update model config for next use
if (modelConfig) {
try {
await this.modelConfigService.update(modelConfig.id, {
dimensions: actualDimensions,
});
this.logger.log(
`Updated model ${modelConfig.name} dimension config to ${actualDimensions}`,
);
} catch (updateErr) {
this.logger.warn(
`Failed to update model dimension config: ${updateErr.message}`,
);
}
}
return actualDimensions;
}
} catch (err) {
this.logger.warn(
`Failed to get dimensions. Using default: ${defaultDimensions}`,
err.message,
);
}
return defaultDimensions;
}
/**
* Auto-generate document title using AI
*/
async generateTitle(kbId: string): Promise<string | null> {
this.logger.log(`Generating automatic title for file ${kbId}`);
try {
const kb = await this.kbRepository.findOne({ where: { id: kbId } });
if (!kb || !kb.content || kb.content.trim().length === 0) {
return null;
}
const tenantId = kb.tenantId;
// Skip if title already exists
if (kb.title) {
return kb.title;
}
// Get content sample (max 2500 characters)
const contentSample = kb.content.substring(0, 2500);
// Get language from org settings, or use default
const userSettings = await this.userSettingService.getByUser(kb.userId);
const language = userSettings.language || 'zh';
// Build prompt
const prompt = this.i18nService.getDocumentTitlePrompt(
language,
contentSample,
);
// Call LLM to generate title
let generatedTitle: string | undefined;
try {
generatedTitle = await this.chatService.generateSimpleChat(
[{ role: 'user', content: prompt }],
kb.userId,
kb.tenantId,
);
} catch (err) {
this.logger.warn(
`Failed to generate title for document ${kbId} due to LLM configuration issue: ${err.message}`,
);
return null; // Skip title generation if LLM is not configured for this tenant
}
if (generatedTitle && generatedTitle.trim().length > 0) {
// Remove extra quotes and newlines
const cleanedTitle = generatedTitle
.trim()
.replace(/^["']|["']$/g, '')
.substring(0, 100);
await this.kbRepository.update(kbId, { title: cleanedTitle });
// Also update ES chunks
await this.elasticsearchService
.updateTitleByFileId(kbId, cleanedTitle, tenantId)
.catch((err) => {
this.logger.error(
`Failed to update title in Elasticsearch for ${kbId}`,
err,
);
});
this.logger.log(
`Successfully generated title for ${kbId}: ${cleanedTitle}`,
);
return cleanedTitle;
}
} catch (error) {
this.logger.error(`Failed to generate title for ${kbId}`, error);
}
return null;
}
}
@@ -0,0 +1,255 @@
import { Injectable, Logger } from '@nestjs/common';
export interface MemoryStats {
heapUsed: number; // Used heap memory (MB)
heapTotal: number; // Total heap memory (MB)
external: number; // External memory (MB)
rss: number; // RSS (Resident Set Size) (MB)
timestamp: Date;
}
@Injectable()
export class MemoryMonitorService {
private readonly logger = new Logger(MemoryMonitorService.name);
private readonly MAX_MEMORY_MB: number;
private readonly BATCH_SIZE: number;
private readonly GC_THRESHOLD_MB: number;
constructor() {
// Load config from env vars. Default values for memory optimization
this.MAX_MEMORY_MB = parseInt(process.env.MAX_MEMORY_USAGE_MB || '1024'); // 1GB limit
this.BATCH_SIZE = parseInt(process.env.CHUNK_BATCH_SIZE || '100'); // 100 chunks per batch
this.GC_THRESHOLD_MB = parseInt(process.env.GC_THRESHOLD_MB || '800'); // Trigger GC at 800MB
this.logger.log(
`Memory monitor initialized: limit=${this.MAX_MEMORY_MB}MB, batchSize=${this.BATCH_SIZE}, GCThreshold=${this.GC_THRESHOLD_MB}MB`,
);
}
/**
* Get current memory usage
*/
getMemoryUsage(): MemoryStats {
const usage = process.memoryUsage();
return {
heapUsed: Math.round(usage.heapUsed / 1024 / 1024),
heapTotal: Math.round(usage.heapTotal / 1024 / 1024),
external: Math.round((usage.external || 0) / 1024 / 1024),
rss: Math.round(usage.rss / 1024 / 1024),
timestamp: new Date(),
};
}
/**
* Check if memory is approaching limit
*/
isMemoryHigh(): boolean {
const usage = this.getMemoryUsage();
return usage.heapUsed > this.MAX_MEMORY_MB * 0.85; // 85% threshold
}
/**
* Wait for memory to become available (with timeout)
*/
async waitForMemoryAvailable(timeoutMs: number = 30000): Promise<void> {
const startTime = Date.now();
while (this.isMemoryHigh()) {
if (Date.now() - startTime > timeoutMs) {
throw new Error(
`Memory wait timeout: current ${this.getMemoryUsage().heapUsed}MB > ${this.MAX_MEMORY_MB * 0.85}MB`,
);
}
this.logger.warn(
`Memory usage too high. Waiting for release... ${this.getMemoryUsage().heapUsed}/${this.MAX_MEMORY_MB}MB`,
);
// Force garbage collection (if available)
if (global.gc) {
this.logger.log('Running forced garbage collection...');
global.gc();
}
await new Promise((resolve) => setTimeout(resolve, 1000));
}
}
/**
* Force garbage collection (if available)
*/
forceGC(): void {
if (global.gc) {
const before = this.getMemoryUsage();
global.gc();
const after = this.getMemoryUsage();
this.logger.log(
`GC completed: ${before.heapUsed}MB → ${after.heapUsed}MB (${before.heapUsed - after.heapUsed}MB freed)`,
);
}
}
/**
* Dynamically adjust batch size
*/
getDynamicBatchSize(currentMemoryMB: number): number {
const baseBatchSize = this.BATCH_SIZE;
if (currentMemoryMB > this.GC_THRESHOLD_MB) {
// Memory pressure, reduce batch size
const reduced = Math.max(10, Math.floor(baseBatchSize * 0.5));
this.logger.warn(
`Memory pressure (${currentMemoryMB}MB), adjusting batch size: ${baseBatchSize}${reduced}`,
);
return reduced;
} else if (currentMemoryMB < this.MAX_MEMORY_MB * 0.4) {
// Enough memory, increase batch size
const increased = Math.min(200, Math.floor(baseBatchSize * 1.2));
if (increased > baseBatchSize) {
this.logger.log(
`Memory available (${currentMemoryMB}MB), adjusting batch size: ${baseBatchSize}${increased}`,
);
}
return increased;
}
return baseBatchSize;
}
/**
* Process large data: auto-batching and memory control
*/
async processInBatches<T, R>(
items: T[],
processor: (batch: T[], batchIndex: number) => Promise<R[]>,
options?: {
onBatchComplete?: (
batchIndex: number,
totalBatches: number,
results: R[],
) => Promise<void> | void;
maxConcurrency?: number;
},
): Promise<R[]> {
const totalItems = items.length;
if (totalItems === 0) return [];
const startTime = Date.now();
this.logger.log(`Starting batch processing: ${totalItems} items`);
const allResults: R[] = [];
let processedCount = 0;
for (let i = 0; i < totalItems; ) {
// Check memory state and wait
await this.waitForMemoryAvailable();
// Dynamically adjust batch size
const currentMem = this.getMemoryUsage().heapUsed;
const batchSize = this.getDynamicBatchSize(currentMem);
// Get current batch
const batch = items.slice(i, i + batchSize);
const batchIndex = Math.floor(i / batchSize) + 1;
const totalBatches = Math.ceil(totalItems / batchSize);
this.logger.log(
`Processing batch ${batchIndex}/${totalBatches}: ${batch.length} items (cumulative ${processedCount}/${totalItems})`,
);
// Process batch
const batchResults = await processor(batch, batchIndex);
allResults.push(...batchResults);
processedCount += batch.length;
// Callback notification
if (options?.onBatchComplete) {
await options.onBatchComplete(batchIndex, totalBatches, batchResults);
}
// Force GC if memory near threshold
if (currentMem > this.GC_THRESHOLD_MB) {
this.forceGC();
}
// Clear references to help GC
batch.length = 0;
i += batchSize;
}
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
const finalMem = this.getMemoryUsage();
this.logger.log(
`Batch processing completed: ${totalItems} items, duration ${duration}s, final memory ${finalMem.heapUsed}MB`,
);
return allResults;
}
/**
* Estimate memory required for processing
*/
estimateMemoryUsage(
itemCount: number,
itemSizeBytes: number,
vectorDim: number,
): number {
// Text content memory
const textMemory = itemCount * itemSizeBytes;
// Vector memory (dimension * 4 bytes per vector)
const vectorMemory = itemCount * vectorDim * 4;
// Object overhead (~100 bytes per object)
const overhead = itemCount * 100;
const totalMB = Math.round(
(textMemory + vectorMemory + overhead) / 1024 / 1024,
);
return totalMB;
}
/**
* Check if batching should be used
*/
shouldUseBatching(
itemCount: number,
itemSizeBytes: number,
vectorDim: number,
): boolean {
const estimatedMB = this.estimateMemoryUsage(
itemCount,
itemSizeBytes,
vectorDim,
);
const threshold = this.MAX_MEMORY_MB * 0.7; // 70% threshold
if (estimatedMB > threshold) {
this.logger.warn(
`Estimated memory ${estimatedMB}MB exceeds threshold ${threshold}MB, using batch processing`,
);
return true;
}
return false;
}
/**
* Get recommended batch size
*/
getRecommendedBatchSize(itemSizeBytes: number, vectorDim: number): number {
// Goal: max 200MB memory per batch
const targetMemoryMB = 200;
const targetMemoryBytes = targetMemoryMB * 1024 * 1024;
// Memory per item = text + vector + overhead
const singleItemMemory = itemSizeBytes + vectorDim * 4 + 100;
const batchSize = Math.floor(targetMemoryBytes / singleItemMemory);
// Limit between 10-200
return Math.max(10, Math.min(200, batchSize));
}
}
@@ -0,0 +1,106 @@
import { Injectable } from '@nestjs/common';
export interface TextChunk {
content: string;
index: number;
startPosition: number;
endPosition: number;
}
@Injectable()
export class TextChunkerService {
chunkText(
text: string,
chunkSize: number = 1000,
overlap: number = 200,
): TextChunk[] {
if (!text || text.trim().length === 0) {
return [];
}
const cleanText = text.trim();
const chunkSizeInChars = chunkSize * 4; // 1 token ≈ 4 chars
const overlapInChars = overlap * 4;
// If text length <= chunk size, return entire text as one chunk
if (cleanText.length <= chunkSizeInChars) {
return [
{
content: cleanText,
index: 0,
startPosition: 0,
endPosition: cleanText.length,
},
];
}
const chunks: TextChunk[] = [];
let start = 0;
let index = 0;
while (start < cleanText.length) {
let end = Math.min(start + chunkSizeInChars, cleanText.length);
// Split by sentence boundaries
if (end < cleanText.length) {
const sentenceEnd = this.findSentenceEnd(
cleanText,
end,
start + chunkSizeInChars * 0.8,
);
if (sentenceEnd > start) {
end = sentenceEnd;
}
}
const content = cleanText.slice(start, end).trim();
if (content.length > 0) {
chunks.push({
content,
index,
startPosition: start,
endPosition: end,
});
index++;
}
// Fix infinite loop: if we reached the end, stop here.
if (end >= cleanText.length) {
break;
}
// Calculate start position of next chunk
const newStart = end - overlapInChars;
// Protect against infinite loop if overlap is too large or chunk too small
if (newStart <= start) {
start = end; // Force advance if overlap would cause stagnation
} else {
start = newStart;
}
}
return chunks;
}
private findSentenceEnd(
text: string,
preferredEnd: number,
minEnd: number,
): number {
const sentenceEnders = ['.', '!', '?', '。', '', ''];
for (let i = preferredEnd; i >= minEnd; i--) {
if (sentenceEnders.includes(text[i])) {
return i + 1;
}
}
for (let i = preferredEnd; i >= minEnd; i--) {
if (text[i] === ' ' || text[i] === '\n') {
return i;
}
}
return preferredEnd;
}
}