feat: implement QuestionBank CRUD with pagination and template query

- Add pagination support to findAll (page, limit query params) - Add findByTemplateId method to service - Add GET /by-template/:templateId endpoint to controller - Service already includes CRUD for QuestionBank and QuestionBankItem
2026-04-23 17:19:11 +08:00
commit 0a9588abb7
492 changed files with 112453 additions and 0 deletions
@@ -0,0 +1,393 @@
+import { Injectable, Logger, BadRequestException } from '@nestjs/common';
+import { ConfigService } from '@nestjs/config';
+import { ModelConfigService } from '../model-config/model-config.service';
+import { TenantService } from '../tenant/tenant.service';
+// import { UserSettingService } from '../user-setting/user-setting.service';
+
+/**
+ * Chunk config service
+ * Responsible for validating and managing chunk parameters to ensure they conform to model limits and environment variable settings
+ *
+ * Priority of limits:
+ * 1. Environment variables (MAX_CHUNK_SIZE, MAX_OVERLAP_SIZE)
+ * 2. Model settings in database (maxInputTokens, maxBatchSize)
+ * 3. Default values
+ */
+import {
+  DEFAULT_CHUNK_SIZE,
+  MIN_CHUNK_SIZE,
+  DEFAULT_CHUNK_OVERLAP,
+  MIN_CHUNK_OVERLAP,
+  DEFAULT_MAX_OVERLAP_RATIO,
+  DEFAULT_MAX_BATCH_SIZE,
+  DEFAULT_VECTOR_DIMENSIONS,
+} from '../common/constants';
+import { I18nService } from '../i18n/i18n.service';
+
+@Injectable()
+export class ChunkConfigService {
+  private readonly logger = new Logger(ChunkConfigService.name);
+
+  // Default settings
+  private readonly DEFAULTS = {
+    chunkSize: DEFAULT_CHUNK_SIZE,
+    chunkOverlap: DEFAULT_CHUNK_OVERLAP,
+    minChunkSize: MIN_CHUNK_SIZE,
+    minChunkOverlap: MIN_CHUNK_OVERLAP,
+    maxOverlapRatio: DEFAULT_MAX_OVERLAP_RATIO, // Overlap up to 50% of chunk size
+    maxBatchSize: DEFAULT_MAX_BATCH_SIZE, // Default batch limit
+    expectedDimensions: DEFAULT_VECTOR_DIMENSIONS, // Default vector dimensions
+  };
+
+  // Upper limits set by environment variables (used first)
+  private readonly envMaxChunkSize: number;
+  private readonly envMaxOverlapSize: number;
+
+  constructor(
+    private configService: ConfigService,
+    private modelConfigService: ModelConfigService,
+    private i18nService: I18nService,
+    private tenantService: TenantService,
+  ) {
+    // Load global limit settings from environment variables
+    this.envMaxChunkSize = parseInt(
+      this.configService.get<string>('MAX_CHUNK_SIZE', '8191'),
+    );
+    this.envMaxOverlapSize = parseInt(
+      this.configService.get<string>('MAX_OVERLAP_SIZE', '2000'),
+    );
+
+    this.logger.log(
+      `Environment variable limits: MAX_CHUNK_SIZE=${this.envMaxChunkSize}, MAX_OVERLAP_SIZE=${this.envMaxOverlapSize}`,
+    );
+  }
+
+  /**
+   * Get model limit settings (read from database)
+   */
+  async getModelLimits(modelId: string): Promise<{
+    maxInputTokens: number;
+    maxBatchSize: number;
+    expectedDimensions: number;
+    providerName: string;
+    isVectorModel: boolean;
+  }> {
+    const modelConfig = await this.modelConfigService.findOne(modelId);
+
+    if (!modelConfig || modelConfig.type !== 'embedding') {
+      throw new BadRequestException(
+        this.i18nService.formatMessage('embeddingModelNotFound', {
+          id: modelId,
+        }),
+      );
+    }
+
+    // Get limits from database fields and fill with defaults
+    const maxInputTokens = modelConfig.maxInputTokens || this.envMaxChunkSize;
+    const maxBatchSize = modelConfig.maxBatchSize || this.DEFAULTS.maxBatchSize;
+    const expectedDimensions =
+      modelConfig.dimensions ||
+      parseInt(
+        this.configService.get(
+          'DEFAULT_VECTOR_DIMENSIONS',
+          String(this.DEFAULTS.expectedDimensions),
+        ),
+      );
+    const providerName = modelConfig.providerName || 'unknown';
+    const isVectorModel = modelConfig.isVectorModel || false;
+
+    this.logger.log(
+      this.i18nService.formatMessage('configLoaded', {
+        name: modelConfig.name,
+        id: modelConfig.modelId,
+      }) +
+        '\n' +
+        `  - Provider: ${providerName}\n` +
+        `  - Token limit: ${maxInputTokens}\n` +
+        `  - Batch limit: ${maxBatchSize}\n` +
+        `  - Vector dimensions: ${expectedDimensions}\n` +
+        `  - Is vector model: ${isVectorModel}`,
+    );
+
+    return {
+      maxInputTokens,
+      maxBatchSize,
+      expectedDimensions,
+      providerName,
+      isVectorModel,
+    };
+  }
+
+  /**
+   * Validate and fix chunk config
+   * Priority: Environment variable limits > Model limits > User settings
+   */
+  async validateChunkConfig(
+    chunkSize: number,
+    chunkOverlap: number,
+    modelId: string,
+  ): Promise<{
+    chunkSize: number;
+    chunkOverlap: number;
+    warnings: string[];
+    effectiveMaxChunkSize: number;
+    effectiveMaxOverlapSize: number;
+  }> {
+    const warnings: string[] = [];
+    const limits = await this.getModelLimits(modelId);
+
+    // 1. Calculate final limits (choose smaller of env var and model limit)
+    const effectiveMaxChunkSize = Math.min(
+      this.envMaxChunkSize,
+      limits.maxInputTokens,
+    );
+
+    const effectiveMaxOverlapSize = Math.min(
+      this.envMaxOverlapSize,
+      Math.floor(effectiveMaxChunkSize * this.DEFAULTS.maxOverlapRatio),
+    );
+
+    // 2. Validate chunk size upper limit
+    if (chunkSize > effectiveMaxChunkSize) {
+      const reason =
+        this.envMaxChunkSize < limits.maxInputTokens
+          ? `${this.i18nService.getMessage('environmentLimit')} ${this.envMaxChunkSize}`
+          : `${this.i18nService.getMessage('modelLimit')} ${limits.maxInputTokens}`;
+
+      warnings.push(
+        this.i18nService.formatMessage('chunkOverflow', {
+          size: chunkSize,
+          max: effectiveMaxChunkSize,
+          reason,
+        }),
+      );
+      chunkSize = effectiveMaxChunkSize;
+    }
+
+    // 3. Validate chunk size lower limit
+    if (chunkSize < this.DEFAULTS.minChunkSize) {
+      warnings.push(
+        this.i18nService.formatMessage('chunkUnderflow', {
+          size: chunkSize,
+          min: this.DEFAULTS.minChunkSize,
+        }),
+      );
+      chunkSize = this.DEFAULTS.minChunkSize;
+    }
+
+    // 4. Validate overlap size upper limit (env var first)
+    if (chunkOverlap > effectiveMaxOverlapSize) {
+      warnings.push(
+        this.i18nService.formatMessage('overlapOverflow', {
+          size: chunkOverlap,
+          max: effectiveMaxOverlapSize,
+        }),
+      );
+      chunkOverlap = effectiveMaxOverlapSize;
+    }
+
+    // 5. Validate overlap doesn't exceed 50% of chunk size
+    const maxOverlapByRatio = Math.floor(
+      chunkSize * this.DEFAULTS.maxOverlapRatio,
+    );
+    if (chunkOverlap > maxOverlapByRatio) {
+      warnings.push(
+        this.i18nService.formatMessage('overlapRatioExceeded', {
+          size: chunkOverlap,
+          max: maxOverlapByRatio,
+        }),
+      );
+      chunkOverlap = maxOverlapByRatio;
+    }
+
+    if (chunkOverlap < this.DEFAULTS.minChunkOverlap) {
+      warnings.push(
+        this.i18nService.formatMessage('overlapUnderflow', {
+          size: chunkOverlap,
+          min: this.DEFAULTS.minChunkOverlap,
+        }),
+      );
+      chunkOverlap = this.DEFAULTS.minChunkOverlap;
+    }
+
+    // 6. Add safety check for batch processing
+    // During batch processing, ensure total length of multiple texts doesn't exceed model limits
+    const safetyMargin = 0.8; // 80% safety margin to leave space for batch processing
+    const safeChunkSize = Math.floor(effectiveMaxChunkSize * safetyMargin);
+
+    if (chunkSize > safeChunkSize) {
+      warnings.push(
+        this.i18nService.formatMessage('batchOverflowWarning', {
+          safeSize: safeChunkSize,
+          size: chunkSize,
+          percent: Math.round(safetyMargin * 100),
+        }),
+      );
+    }
+
+    // 7. Check if estimated chunk count is reasonable
+    const estimatedChunkCount = this.estimateChunkCount(
+      1000000, // Assume 1MB text
+      chunkSize,
+    );
+
+    if (estimatedChunkCount > 50000) {
+      warnings.push(
+        this.i18nService.formatMessage('estimatedChunkCountExcessive', {
+          count: estimatedChunkCount,
+        }),
+      );
+    }
+
+    return {
+      chunkSize,
+      chunkOverlap,
+      warnings,
+      effectiveMaxChunkSize,
+      effectiveMaxOverlapSize,
+    };
+  }
+
+  /**
+   * Get recommended batch size
+   */
+  async getRecommendedBatchSize(
+    modelId: string,
+    currentBatchSize: number = 100,
+  ): Promise<number> {
+    const limits = await this.getModelLimits(modelId);
+
+    // Choose smaller of configured value and model limit
+    const recommended = Math.min(
+      currentBatchSize,
+      limits.maxBatchSize,
+      200, // Safety upper limit
+    );
+
+    if (recommended < currentBatchSize) {
+      this.logger.warn(
+        this.i18nService.formatMessage('batchSizeAdjusted', {
+          old: currentBatchSize,
+          new: recommended,
+          limit: limits.maxBatchSize,
+        }),
+      );
+    }
+
+    return Math.max(10, recommended); // Minimum 10
+  }
+
+  /**
+   * Estimate chunk count
+   */
+  estimateChunkCount(textLength: number, chunkSize: number): number {
+    const chunkSizeInChars = chunkSize * 4; // 1 token ≈ 4 chars
+    return Math.ceil(textLength / chunkSizeInChars);
+  }
+
+  /**
+   * Validate vector dimensions
+   */
+  async validateDimensions(
+    modelId: string,
+    actualDimensions: number,
+  ): Promise<boolean> {
+    const limits = await this.getModelLimits(modelId);
+
+    if (actualDimensions !== limits.expectedDimensions) {
+      this.logger.warn(
+        this.i18nService.formatMessage('dimensionMismatch', {
+          id: modelId,
+          expected: limits.expectedDimensions,
+          actual: actualDimensions,
+        }),
+      );
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * Get config summary (for logging)
+   */
+  async getConfigSummary(
+    chunkSize: number,
+    chunkOverlap: number,
+    modelId: string,
+  ): Promise<string> {
+    const limits = await this.getModelLimits(modelId);
+
+    return [
+      `Model: ${modelId}`,
+      `Chunk size: ${chunkSize} tokens (limit: ${limits.maxInputTokens})`,
+      `Overlap size: ${chunkOverlap} tokens`,
+      `Batch size: ${limits.maxBatchSize}`,
+      `Vector dimensions: ${limits.expectedDimensions}`,
+    ].join(', ');
+  }
+
+  /**
+   * Get config limits for frontend
+   * Used for frontend slider max value settings
+   */
+  async getFrontendLimits(
+    modelId: string,
+    userId: string,
+    tenantId?: string,
+  ): Promise<{
+    maxChunkSize: number;
+    maxOverlapSize: number;
+    minOverlapSize: number;
+    defaultChunkSize: number;
+    defaultOverlapSize: number;
+    modelInfo: {
+      name: string;
+      maxInputTokens: number;
+      maxBatchSize: number;
+      expectedDimensions: number;
+    };
+  }> {
+    const limits = await this.getModelLimits(modelId);
+
+    // Calculate final limits (choose smaller of env var and model limit)
+    const maxChunkSize = Math.min(this.envMaxChunkSize, limits.maxInputTokens);
+    const maxOverlapSize = Math.min(
+      this.envMaxOverlapSize,
+      Math.floor(maxChunkSize * this.DEFAULTS.maxOverlapRatio),
+    );
+
+    // Get model config name
+    const modelConfig = await this.modelConfigService.findOne(modelId);
+    const modelName = modelConfig?.name || 'Unknown';
+
+    // Get defaults from tenant or user settings
+    let defaultChunkSize = this.DEFAULTS.chunkSize;
+    let defaultOverlapSize = this.DEFAULTS.chunkOverlap;
+
+    if (tenantId) {
+      const tenantSettings = await this.tenantService.getSettings(tenantId);
+      if (tenantSettings?.chunkSize)
+        defaultChunkSize = tenantSettings.chunkSize;
+      if (tenantSettings?.chunkOverlap)
+        defaultOverlapSize = tenantSettings.chunkOverlap;
+    }
+
+    return {
+      maxChunkSize,
+      maxOverlapSize,
+      minOverlapSize: this.DEFAULTS.minChunkOverlap,
+      defaultChunkSize: Math.min(defaultChunkSize, maxChunkSize),
+      defaultOverlapSize: Math.max(
+        this.DEFAULTS.minChunkOverlap,
+        Math.min(defaultOverlapSize, maxOverlapSize),
+      ),
+      modelInfo: {
+        name: modelName,
+        maxInputTokens: limits.maxInputTokens,
+        maxBatchSize: limits.maxBatchSize,
+        expectedDimensions: limits.expectedDimensions,
+      },
+    };
+  }
+}
@@ -0,0 +1,11 @@
+import { IsNotEmpty, IsOptional, IsString } from 'class-validator';
+
+export class CreateKnowledgeBaseDto {
+  @IsString()
+  @IsNotEmpty()
+  name: string;
+
+  @IsString()
+  @IsOptional()
+  description?: string;
+}
@@ -0,0 +1,286 @@
+import { Injectable, Logger } from '@nestjs/common';
+import { ConfigService } from '@nestjs/config';
+import { ModelConfigService } from '../model-config/model-config.service';
+import { I18nService } from '../i18n/i18n.service';
+
+export interface EmbeddingResponse {
+  data: Array<{
+    embedding: number[];
+    index: number;
+  }>;
+  model: string;
+  usage: {
+    prompt_tokens: number;
+    total_tokens: number;
+  };
+}
+
+@Injectable()
+export class EmbeddingService {
+  private readonly logger = new Logger(EmbeddingService.name);
+  private readonly defaultDimensions: number;
+
+  constructor(
+    private modelConfigService: ModelConfigService,
+    private configService: ConfigService,
+    private i18nService: I18nService,
+  ) {
+    this.defaultDimensions = parseInt(
+      this.configService.get<string>('DEFAULT_VECTOR_DIMENSIONS', '2560'),
+    );
+    this.logger.log(
+      `Default vector dimensions set to ${this.defaultDimensions}`,
+    );
+  }
+
+  async getEmbeddings(
+    texts: string[],
+    embeddingModelConfigId: string,
+  ): Promise<number[][]> {
+    this.logger.log(`Generating embeddings for ${texts.length} texts`);
+
+    const modelConfig = await this.modelConfigService.findOne(
+      embeddingModelConfigId,
+    );
+    if (!modelConfig || modelConfig.type !== 'embedding') {
+      throw new Error(
+        this.i18nService.formatMessage('embeddingModelNotFound', {
+          id: embeddingModelConfigId,
+        }),
+      );
+    }
+
+    if (modelConfig.isEnabled === false) {
+      throw new Error(
+        `Model ${modelConfig.name} is disabled and cannot generate embeddings`,
+      );
+    }
+
+    if (!modelConfig.baseUrl) {
+      throw new Error(
+        `Model ${modelConfig.name} does not have baseUrl configured`,
+      );
+    }
+
+    // Determine max batch size based on model name
+    const maxBatchSize = this.getMaxBatchSizeForModel(
+      modelConfig.modelId,
+      modelConfig.maxBatchSize,
+    );
+
+    // Split processing if batch size exceeds limit
+    if (texts.length > maxBatchSize) {
+      this.logger.log(
+        `Splitting ${texts.length} texts into batches (model batch limit: ${maxBatchSize})`,
+      );
+
+      const allEmbeddings: number[][] = [];
+
+      for (let i = 0; i < texts.length; i += maxBatchSize) {
+        const batch = texts.slice(i, i + maxBatchSize);
+        const batchEmbeddings = await this.getEmbeddingsForBatch(
+          batch,
+          modelConfig,
+          maxBatchSize,
+        );
+
+        allEmbeddings.push(...batchEmbeddings);
+
+        // Wait briefly to avoid API rate limiting
+        if (i + maxBatchSize < texts.length) {
+          await new Promise((resolve) => setTimeout(resolve, 100)); // Wait 100ms
+        }
+      }
+
+      return allEmbeddings;
+    } else {
+      // Normal processing (within batch size)
+      return await this.getEmbeddingsForBatch(
+        texts,
+        modelConfig,
+        maxBatchSize,
+      );
+    }
+  }
+
+  /**
+   * Determine max batch size based on model ID
+   */
+  private getMaxBatchSizeForModel(
+    modelId: string,
+    configuredMaxBatchSize?: number,
+  ): number {
+    // Model-specific batch size limits
+    if (
+      modelId.includes('text-embedding-004') ||
+      modelId.includes('text-embedding-v4') ||
+      modelId.includes('text-embedding-ada-002')
+    ) {
+      return Math.min(10, configuredMaxBatchSize || 100); // Google limit: 10
+    } else if (
+      modelId.includes('text-embedding-3') ||
+      modelId.includes('text-embedding-003')
+    ) {
+      return Math.min(2048, configuredMaxBatchSize || 2048); // OpenAI v3 limit: 2048
+    } else {
+      // Default: smaller of configured max or 100
+      return Math.min(configuredMaxBatchSize || 100, 100);
+    }
+  }
+
+  /**
+   * Process single batch embedding
+   */
+  private async getEmbeddingsForBatch(
+    texts: string[],
+    modelConfig: any,
+    maxBatchSize: number,
+  ): Promise<number[][]> {
+    const apiUrl = modelConfig.baseUrl.endsWith('/embeddings')
+      ? modelConfig.baseUrl
+      : `${modelConfig.baseUrl}/embeddings`;
+
+    let lastError;
+    const MAX_RETRIES = 3;
+
+    for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
+      try {
+        const controller = new AbortController();
+        const timeoutId = setTimeout(() => {
+          controller.abort();
+          this.logger.error(`Embedding API timeout after 60s: ${apiUrl}`);
+        }, 60000); // 60s timeout
+
+        this.logger.log(
+          `[Model call] Type: Embedding, Model: ${modelConfig.name} (${modelConfig.modelId}), Text count: ${texts.length}`,
+        );
+        this.logger.log(
+          `Calling embedding API (attempt ${attempt}/${MAX_RETRIES}): ${apiUrl}`,
+        );
+
+        let response;
+        try {
+          response = await fetch(apiUrl, {
+            method: 'POST',
+            headers: {
+              'Content-Type': 'application/json',
+              Authorization: `Bearer ${modelConfig.apiKey}`,
+            },
+            body: JSON.stringify({
+              encoding_format: 'float',
+              input: texts,
+              model: modelConfig.modelId,
+            }),
+            signal: controller.signal,
+          });
+        } finally {
+          clearTimeout(timeoutId);
+        }
+
+        if (!response.ok) {
+          const errorText = await response.text();
+
+          // Detect batch size limit error
+          if (
+            errorText.includes('batch size is invalid') ||
+            errorText.includes('batch_size') ||
+            errorText.includes('invalid') ||
+            errorText.includes('larger than')
+          ) {
+            this.logger.warn(
+              `Batch size limit error detected. Splitting batch in half and retrying: ${maxBatchSize} -> ${Math.floor(maxBatchSize / 2)}`,
+            );
+
+            // Split batch into smaller units and retry
+            if (texts.length > 1) {
+              const midPoint = Math.floor(texts.length / 2);
+              const firstHalf = texts.slice(0, midPoint);
+              const secondHalf = texts.slice(midPoint);
+
+              const firstResult = await this.getEmbeddingsForBatch(
+                firstHalf,
+                modelConfig,
+                Math.floor(maxBatchSize / 2),
+              );
+              const secondResult = await this.getEmbeddingsForBatch(
+                secondHalf,
+                modelConfig,
+                Math.floor(maxBatchSize / 2),
+              );
+
+              return [...firstResult, ...secondResult];
+            }
+          }
+
+          // Detect context length excess error
+          if (
+            errorText.includes('context length') ||
+            errorText.includes('exceeds')
+          ) {
+            const avgLength =
+              texts.reduce((s, t) => s + t.length, 0) / texts.length;
+            const totalLength = texts.reduce((s, t) => s + t.length, 0);
+            this.logger.error(
+              `Text length exceeds limit: ${texts.length} texts, ` +
+                `total ${totalLength} characters, average ${Math.round(avgLength)} characters, ` +
+                `model limit: ${modelConfig.maxInputTokens || 8192} tokens`,
+            );
+            throw new Error(
+              `Text length exceeds model limit. ` +
+                `Current: ${texts.length} texts with total ${totalLength} characters, ` +
+                `model limit: ${modelConfig.maxInputTokens || 8192} tokens. ` +
+                `Advice: Reduce chunk size or batch size`,
+            );
+          }
+
+          // Retry on 429 (Too Many Requests) or 5xx (Server Error)
+          if (response.status === 429 || response.status >= 500) {
+            this.logger.warn(
+              `Temporary error from embedding API (${response.status}): ${errorText}`,
+            );
+            throw new Error(`API Error ${response.status}: ${errorText}`);
+          }
+
+          this.logger.error(`Embedding API error details: ${errorText}`);
+          this.logger.error(
+            `Request parameters: model=${modelConfig.modelId}, inputLength=${texts[0]?.length}`,
+          );
+          throw new Error(
+            `Embedding API call failed: ${response.statusText} - ${errorText}`,
+          );
+        }
+
+        const data: EmbeddingResponse = await response.json();
+        const embeddings = data.data.map((item) => item.embedding);
+
+        // Get dimensions from actual response
+        const actualDimensions =
+          embeddings[0]?.length || this.defaultDimensions;
+        this.logger.log(
+          `Got ${embeddings.length} embedding vectors from ${modelConfig.name}. Dimensions: ${actualDimensions}`,
+        );
+
+        return embeddings;
+      } catch (error) {
+        lastError = error;
+
+        // If not the last attempt and error appears temporary (or for robustness on all), retry after waiting
+        if (attempt < MAX_RETRIES) {
+          const delay = Math.pow(2, attempt - 1) * 1000; // 1s, 2s, 4s
+          this.logger.warn(
+            `Embedding request failed. Retrying after ${delay}ms: ${error.message}`,
+          );
+          await new Promise((resolve) => setTimeout(resolve, delay));
+          continue;
+        }
+      }
+    }
+
+    throw lastError;
+  }
+
+  private getEstimatedDimensions(modelId: string): number {
+    // Use default dimensions from environment variable
+    return this.defaultDimensions;
+  }
+}
@@ -0,0 +1,362 @@
+import {
+  Body,
+  Controller,
+  Delete,
+  Get,
+  Param,
+  Post,
+  Query,
+  Request,
+  UseGuards,
+  Res,
+  NotFoundException,
+  InternalServerErrorException,
+} from '@nestjs/common';
+import { Response } from 'express';
+import * as path from 'path';
+import { Logger } from '@nestjs/common';
+import { KnowledgeBaseService } from './knowledge-base.service';
+import { CombinedAuthGuard } from '../auth/combined-auth.guard';
+import { RolesGuard } from '../auth/roles.guard';
+import { Roles } from '../auth/roles.decorator';
+import { UserRole } from '../user/user-role.enum';
+import { Public } from '../auth/public.decorator';
+import { KnowledgeBase } from './knowledge-base.entity';
+import { ChunkConfigService } from './chunk-config.service';
+import { KnowledgeGroupService } from '../knowledge-group/knowledge-group.service';
+import { I18nService } from '../i18n/i18n.service';
+
+@Controller('knowledge-bases')
+@UseGuards(CombinedAuthGuard, RolesGuard)
+export class KnowledgeBaseController {
+  private readonly logger = new Logger(KnowledgeBaseController.name);
+
+  constructor(
+    private readonly knowledgeBaseService: KnowledgeBaseService,
+    private readonly chunkConfigService: ChunkConfigService,
+    private readonly knowledgeGroupService: KnowledgeGroupService,
+    private readonly i18nService: I18nService,
+  ) {}
+
+  @Get()
+  @UseGuards(CombinedAuthGuard)
+  async findAll(@Request() req): Promise<KnowledgeBase[]> {
+    return this.knowledgeBaseService.findAll(req.user.id, req.user.tenantId);
+  }
+
+  @Get('stats')
+  @UseGuards(CombinedAuthGuard)
+  async getStats(
+    @Request() req,
+  ): Promise<{ total: number; uncategorized: number }> {
+    return this.knowledgeBaseService.getStats(req.user.id, req.user.tenantId);
+  }
+
+  @Delete('clear')
+  @Roles(UserRole.TENANT_ADMIN, UserRole.SUPER_ADMIN)
+  async clearAll(@Request() req): Promise<{ message: string }> {
+    await this.knowledgeBaseService.clearAll(req.user.id, req.user.tenantId);
+    return { message: this.i18nService.getMessage('kbCleared') };
+  }
+
+  @Post('search')
+  async search(@Request() req, @Body() body: { query: string; topK?: number }) {
+    return this.knowledgeBaseService.searchKnowledge(
+      req.user.id,
+      req.user.tenantId, // New
+      body.query,
+      body.topK || 5,
+    );
+  }
+
+  @Post('rag-search')
+  async ragSearch(
+    @Request() req,
+    @Body() body: { query: string; settings: any },
+  ) {
+    return this.knowledgeBaseService.ragSearch(
+      req.user.id,
+      req.user.tenantId, // New
+      body.query,
+      body.settings,
+    );
+  }
+
+  @Delete(':id')
+  @Roles(UserRole.TENANT_ADMIN, UserRole.SUPER_ADMIN)
+  async deleteFile(
+    @Request() req,
+    @Param('id') fileId: string,
+  ): Promise<{ message: string }> {
+    await this.knowledgeBaseService.deleteFile(
+      fileId,
+      req.user.id,
+      req.user.tenantId,
+    );
+    return { message: this.i18nService.getMessage('fileDeleted') };
+  }
+
+  @Post(':id/retry')
+  @Roles(UserRole.TENANT_ADMIN, UserRole.SUPER_ADMIN)
+  async retryFile(
+    @Request() req,
+    @Param('id') fileId: string,
+  ): Promise<KnowledgeBase> {
+    return this.knowledgeBaseService.retryFailedFile(
+      fileId,
+      req.user.id,
+      req.user.tenantId,
+    );
+  }
+
+  @Get(':id/chunks')
+  async getFileChunks(@Request() req, @Param('id') fileId: string) {
+    return this.knowledgeBaseService.getFileChunks(
+      fileId,
+      req.user.id,
+      req.user.tenantId,
+    );
+  }
+
+  /**
+   * Get chunk config limits (for frontend slider settings)
+   * Query parameter: embeddingModelId - embedding model ID
+   */
+  @Get('chunk-config/limits')
+  async getChunkConfigLimits(
+    @Request() req,
+    @Query('embeddingModelId') embeddingModelId: string,
+  ) {
+    if (!embeddingModelId) {
+      return {
+        maxChunkSize: parseInt(process.env.MAX_CHUNK_SIZE || '8191'),
+        maxOverlapSize: parseInt(process.env.MAX_OVERLAP_SIZE || '2000'),
+        minOverlapSize: 25,
+        defaultChunkSize: 200,
+        defaultOverlapSize: 40,
+        modelInfo: {
+          name: this.i18nService.getMessage('modelNotConfigured'),
+          maxInputTokens: parseInt(process.env.MAX_CHUNK_SIZE || '8191'),
+          maxBatchSize: 2048,
+          expectedDimensions: parseInt(
+            process.env.DEFAULT_VECTOR_DIMENSIONS || '2560',
+          ),
+        },
+      };
+    }
+
+    return await this.chunkConfigService.getFrontendLimits(
+      embeddingModelId,
+      req.user.id,
+      req.user.tenantId,
+    );
+  }
+
+  // File group management - requires admin permission
+  @Post(':id/groups')
+  @Roles(UserRole.TENANT_ADMIN, UserRole.SUPER_ADMIN)
+  async addFileToGroups(
+    @Param('id') fileId: string,
+    @Body() body: { groupIds: string[] },
+    @Request() req,
+  ) {
+    await this.knowledgeGroupService.addFilesToGroup(
+      fileId,
+      body.groupIds,
+      req.user.id,
+      req.user.tenantId,
+    );
+    return { message: this.i18nService.getMessage('groupSyncSuccess') };
+  }
+
+  @Delete(':id/groups/:groupId')
+  @Roles(UserRole.TENANT_ADMIN, UserRole.SUPER_ADMIN)
+  async removeFileFromGroup(
+    @Param('id') fileId: string,
+    @Param('groupId') groupId: string,
+    @Request() req,
+  ) {
+    await this.knowledgeGroupService.removeFileFromGroup(
+      fileId,
+      groupId,
+      req.user.id,
+      req.user.tenantId,
+    );
+    return { message: this.i18nService.getMessage('fileDeletedFromGroup') };
+  }
+
+  // PDF preview - public access
+  @Public()
+  @Get(':id/pdf')
+  async getPDFPreview(
+    @Param('id') fileId: string,
+    @Query('token') token: string,
+    @Res() res: Response,
+  ) {
+    try {
+      if (!token) {
+        throw new NotFoundException(
+          this.i18nService.getMessage('accessDeniedNoToken'),
+        );
+      }
+
+      const jwt = await import('jsonwebtoken');
+      const secret = process.env.JWT_SECRET;
+      if (!secret) {
+        throw new InternalServerErrorException(
+          this.i18nService.getMessage('jwtSecretRequired'),
+        );
+      }
+
+      let decoded;
+      try {
+        decoded = jwt.verify(token, secret) as any;
+      } catch {
+        throw new NotFoundException(
+          this.i18nService.getMessage('invalidToken'),
+        );
+      }
+
+      if (decoded.type !== 'pdf-access' || decoded.fileId !== fileId) {
+        throw new NotFoundException(
+          this.i18nService.getMessage('invalidToken'),
+        );
+      }
+
+      const pdfPath = await this.knowledgeBaseService.ensurePDFExists(
+        fileId,
+        decoded.userId,
+        decoded.tenantId, // New
+      );
+
+      const fs = await import('fs');
+      const path = await import('path');
+
+      if (!fs.existsSync(pdfPath)) {
+        throw new NotFoundException(
+          this.i18nService.getMessage('pdfFileNotFound'),
+        );
+      }
+
+      const stat = fs.statSync(pdfPath);
+      const fileName = path.basename(pdfPath);
+
+      if (stat.size === 0) {
+        this.logger.warn(`PDF file is empty: ${pdfPath}`);
+        try {
+          fs.unlinkSync(pdfPath); // Delete empty file
+        } catch (e) {}
+        throw new NotFoundException(
+          this.i18nService.getMessage('pdfFileEmpty'),
+        );
+      }
+
+      res.setHeader('Content-Type', 'application/pdf');
+      res.setHeader('Content-Length', stat.size);
+
+      const stream = fs.createReadStream(pdfPath);
+      stream.pipe(res);
+    } catch (error) {
+      if (error instanceof NotFoundException) {
+        throw error;
+      }
+      this.logger.error(`PDF preview error: ${error.message}`);
+      throw new NotFoundException(
+        this.i18nService.getMessage('pdfConversionFailed'),
+      );
+    }
+  }
+
+  // Get PDF preview URL
+  @Get(':id/pdf-url')
+  async getPDFUrl(
+    @Param('id') fileId: string,
+    @Query('force') force: string,
+    @Request() req,
+  ) {
+    try {
+      // Trigger PDF conversion
+      await this.knowledgeBaseService.ensurePDFExists(
+        fileId,
+        req.user.id,
+        req.user.tenantId,
+        force === 'true',
+      );
+
+      // Generate temporary access token
+      const jwt = await import('jsonwebtoken');
+
+      const secret = process.env.JWT_SECRET;
+      if (!secret) {
+        throw new InternalServerErrorException(
+          this.i18nService.getMessage('jwtSecretRequired'),
+        );
+      }
+
+      const token = jwt.sign(
+        {
+          fileId,
+          userId: req.user.id,
+          tenantId: req.user.tenantId,
+          type: 'pdf-access',
+        },
+        secret,
+        { expiresIn: '1h' },
+      );
+
+      return {
+        url: `/api/knowledge-bases/${fileId}/pdf?token=${token}`,
+      };
+    } catch (error) {
+      if (error.message.includes('LibreOffice')) {
+        throw new InternalServerErrorException(
+          this.i18nService.formatMessage('pdfServiceUnavailable', {
+            message: error.message,
+          }),
+        );
+      }
+      throw new InternalServerErrorException(error.message);
+    }
+  }
+
+  @Get(':id/pdf-status')
+  async getPDFStatus(@Param('id') fileId: string, @Request() req) {
+    return await this.knowledgeBaseService.getPDFStatus(
+      fileId,
+      req.user.id,
+      req.user.tenantId,
+    );
+  }
+
+  // Get specific page of PDF as image
+  @Get(':id/page/:index')
+  async getPageImage(
+    @Param('id') fileId: string,
+    @Param('index') index: number,
+    @Request() req,
+    @Res() res: Response,
+  ) {
+    try {
+      const imagePath = await this.knowledgeBaseService.getPageAsImage(
+        fileId,
+        Number(index),
+        req.user.id,
+        req.user.tenantId,
+      );
+
+      const fs = await import('fs');
+      if (!fs.existsSync(imagePath)) {
+        throw new NotFoundException(
+          this.i18nService.getMessage('pageImageNotFound'),
+        );
+      }
+
+      res.sendFile(path.resolve(imagePath));
+    } catch (error) {
+      this.logger.error(`Failed to get PDF page image: ${error.message}`);
+      throw new NotFoundException(
+        this.i18nService.getMessage('pdfPageImageFailed'),
+      );
+    }
+  }
+}
@@ -0,0 +1,99 @@
+import {
+  Column,
+  CreateDateColumn,
+  Entity,
+  PrimaryGeneratedColumn,
+  UpdateDateColumn,
+  ManyToMany,
+  ManyToOne,
+  JoinColumn,
+} from 'typeorm';
+import { KnowledgeGroup } from '../knowledge-group/knowledge-group.entity';
+import { Tenant } from '../tenant/tenant.entity';
+
+export enum FileStatus {
+  PENDING = 'pending',
+  INDEXING = 'indexing',
+  EXTRACTED = 'extracted', // Text extraction completed and saved to database
+  VECTORIZED = 'vectorized', // Vectorization completed and indexed to ES
+  FAILED = 'failed',
+}
+
+export enum ProcessingMode {
+  FAST = 'fast', // Fast mode - use Tika
+  PRECISE = 'precise', // Precise mode - use Vision Pipeline
+}
+
+@Entity('knowledge_bases')
+export class KnowledgeBase {
+  @PrimaryGeneratedColumn('uuid')
+  id: string;
+
+  @Column({ name: 'original_name' })
+  originalName: string;
+
+  @Column({ nullable: true })
+  title: string;
+
+  @Column({ name: 'storage_path' })
+  storagePath: string;
+
+  @Column({ type: 'integer', default: 0 })
+  size: number;
+
+  @Column({ length: 100, nullable: true })
+  mimetype: string;
+
+  @Column({
+    type: 'simple-enum',
+    enum: FileStatus,
+    default: FileStatus.PENDING,
+  })
+  status: FileStatus;
+
+  @Column({ name: 'user_id', nullable: true }) // Temporarily allowed empty (for debugging), should be required in future
+  userId: string;
+
+  @Column({ name: 'tenant_id', nullable: true, type: 'text' })
+  tenantId: string;
+
+  @ManyToOne(() => Tenant, { nullable: true, onDelete: 'CASCADE' })
+  @JoinColumn({ name: 'tenant_id' })
+  tenant: Tenant;
+
+  @Column({ type: 'text', nullable: true })
+  content: string; // Stores text content extracted by Tika
+
+  // Index setting parameters
+  @Column({ name: 'chunk_size', type: 'integer', default: 1000 })
+  chunkSize: number;
+
+  @Column({ name: 'chunk_overlap', type: 'integer', default: 200 })
+  chunkOverlap: number;
+
+  @Column({ name: 'embedding_model_id', nullable: true })
+  embeddingModelId: string;
+
+  @Column({
+    type: 'simple-enum',
+    enum: ProcessingMode,
+    default: ProcessingMode.FAST,
+    name: 'processing_mode',
+  })
+  processingMode: ProcessingMode;
+
+  @Column({ type: 'json', nullable: true })
+  metadata: any; // Stores additional metadata (image descriptions, confidence, etc.)
+
+  @Column({ name: 'pdf_path', nullable: true })
+  pdfPath: string; // PDF file path (for preview)
+
+  @ManyToMany(() => KnowledgeGroup, (group) => group.knowledgeBases)
+  groups: KnowledgeGroup[];
+
+  @CreateDateColumn({ name: 'created_at' })
+  createdAt: Date;
+
+  @UpdateDateColumn({ name: 'updated_at' })
+  updatedAt: Date;
+}
@@ -0,0 +1,52 @@
+import { Module, forwardRef } from '@nestjs/common';
+import { TypeOrmModule } from '@nestjs/typeorm';
+import { KnowledgeBase } from './knowledge-base.entity';
+import { KnowledgeGroup } from '../knowledge-group/knowledge-group.entity';
+import { KnowledgeBaseService } from './knowledge-base.service';
+import { KnowledgeBaseController } from './knowledge-base.controller';
+import { ElasticsearchModule } from '../elasticsearch/elasticsearch.module';
+import { TikaModule } from '../tika/tika.module';
+import { ModelConfigModule } from '../model-config/model-config.module';
+import { EmbeddingService } from './embedding.service';
+import { TextChunkerService } from './text-chunker.service';
+import { RagModule } from '../rag/rag.module';
+import { VisionModule } from '../vision/vision.module';
+import { MemoryMonitorService } from './memory-monitor.service';
+import { ChunkConfigService } from './chunk-config.service';
+import { LibreOfficeModule } from '../libreoffice/libreoffice.module';
+import { Pdf2ImageModule } from '../pdf2image/pdf2image.module';
+import { VisionPipelineModule } from '../vision-pipeline/vision-pipeline.module';
+import { KnowledgeGroupModule } from '../knowledge-group/knowledge-group.module';
+import { ChatModule } from '../chat/chat.module';
+import { UserModule } from '../user/user.module';
+import { TenantModule } from '../tenant/tenant.module';
+import { CombinedAuthGuard } from '../auth/combined-auth.guard';
+
+@Module({
+  imports: [
+    TypeOrmModule.forFeature([KnowledgeBase, KnowledgeGroup]),
+    forwardRef(() => ElasticsearchModule),
+    TikaModule,
+    ModelConfigModule,
+    forwardRef(() => RagModule),
+    VisionModule,
+    LibreOfficeModule,
+    Pdf2ImageModule,
+    VisionPipelineModule,
+    forwardRef(() => KnowledgeGroupModule),
+    forwardRef(() => ChatModule),
+    UserModule,
+    TenantModule,
+  ],
+  controllers: [KnowledgeBaseController],
+  providers: [
+    KnowledgeBaseService,
+    EmbeddingService,
+    TextChunkerService,
+    MemoryMonitorService,
+    ChunkConfigService,
+    CombinedAuthGuard,
+  ],
+  exports: [KnowledgeBaseService, EmbeddingService],
+})
+export class KnowledgeBaseModule {}
@@ -0,0 +1,1826 @@
+import {
+  Injectable,
+  Logger,
+  NotFoundException,
+  ForbiddenException,
+  Inject,
+  forwardRef,
+} from '@nestjs/common';
+import { ConfigService } from '@nestjs/config';
+import { DEFAULT_LANGUAGE } from '../common/constants';
+import { I18nService } from '../i18n/i18n.service';
+import { InjectRepository } from '@nestjs/typeorm';
+import { Repository, In } from 'typeorm';
+import {
+  FileStatus,
+  KnowledgeBase,
+  ProcessingMode,
+} from './knowledge-base.entity';
+import { KnowledgeGroup } from '../knowledge-group/knowledge-group.entity';
+import { ElasticsearchService } from '../elasticsearch/elasticsearch.service';
+import { TikaService } from '../tika/tika.service';
+import * as fs from 'fs';
+import * as path from 'path';
+import { EmbeddingService } from './embedding.service';
+import { TextChunkerService } from './text-chunker.service';
+import { ModelConfigService } from '../model-config/model-config.service';
+import { RagService } from '../rag/rag.service';
+import { VisionService } from '../vision/vision.service';
+import { TenantService } from '../tenant/tenant.service';
+import { MemoryMonitorService } from './memory-monitor.service';
+import { ChunkConfigService } from './chunk-config.service';
+import { VisionPipelineService } from '../vision-pipeline/vision-pipeline.service';
+import { LibreOfficeService } from '../libreoffice/libreoffice.service';
+import { Pdf2ImageService } from '../pdf2image/pdf2image.service';
+import {
+  DOC_EXTENSIONS,
+  IMAGE_EXTENSIONS,
+} from '../common/file-support.constants';
+import { ChatService } from '../chat/chat.service';
+import { UserSettingService } from '../user/user-setting.service';
+
+export interface PaginatedKnowledgeBase {
+  items: KnowledgeBase[];
+  total: number;
+  page: number;
+  limit: number;
+}
+
+@Injectable()
+export class KnowledgeBaseService {
+  private readonly logger = new Logger(KnowledgeBaseService.name);
+
+  constructor(
+    @InjectRepository(KnowledgeBase)
+    private kbRepository: Repository<KnowledgeBase>,
+    @InjectRepository(KnowledgeGroup)
+    private groupRepository: Repository<KnowledgeGroup>,
+    @Inject(forwardRef(() => ElasticsearchService))
+    private elasticsearchService: ElasticsearchService,
+    private tikaService: TikaService,
+    private embeddingService: EmbeddingService,
+    private textChunkerService: TextChunkerService,
+    private modelConfigService: ModelConfigService,
+    @Inject(forwardRef(() => RagService))
+    private ragService: RagService,
+    private visionService: VisionService,
+    private tenantService: TenantService,
+    private memoryMonitor: MemoryMonitorService,
+    private chunkConfigService: ChunkConfigService,
+    private visionPipelineService: VisionPipelineService,
+    private libreOfficeService: LibreOfficeService,
+    private pdf2ImageService: Pdf2ImageService,
+    private configService: ConfigService,
+    private i18nService: I18nService,
+    @Inject(forwardRef(() => ChatService))
+    private chatService: ChatService,
+    private userSettingService: UserSettingService,
+  ) {}
+
+  async createAndIndex(
+    fileInfo: any,
+    userId: string,
+    tenantId: string,
+    config?: any,
+  ): Promise<KnowledgeBase> {
+    const mode = config?.mode || 'fast';
+    const processingMode =
+      mode === 'precise' ? ProcessingMode.PRECISE : ProcessingMode.FAST;
+
+    const kb = this.kbRepository.create({
+      originalName: fileInfo.originalname,
+      storagePath: fileInfo.path,
+      size: fileInfo.size,
+      mimetype: fileInfo.mimetype,
+      status: FileStatus.PENDING,
+      userId: userId,
+      tenantId: tenantId,
+      chunkSize: config?.chunkSize || 200,
+      chunkOverlap: config?.chunkOverlap || 40,
+      embeddingModelId: config?.embeddingModelId || null,
+      processingMode: processingMode,
+    });
+
+    // Associate groups
+    if (config?.groupIds && config.groupIds.length > 0) {
+      const groups = await this.groupRepository.find({
+        where: { id: In(config.groupIds), tenantId: tenantId },
+      });
+      kb.groups = groups;
+    }
+
+    const savedKb = await this.kbRepository.save(kb);
+
+    this.logger.log(
+      `Created KB record: ${savedKb.id}, mode: ${mode}, file: ${fileInfo.originalname}`,
+    );
+
+    // ---------------------------------------------------------
+    // Move the file to the final partitioned directory
+    // source: uploads/{tenantId}/{filename} (or wherever it was)
+    // target: uploads/{tenantId}/{savedKb.id}/{filename}
+    // ---------------------------------------------------------
+    const fs = await import('fs');
+    const path = await import('path');
+    const uploadPath = process.env.UPLOAD_FILE_PATH || './uploads';
+    const targetDir = path.join(uploadPath, tenantId || 'default', savedKb.id);
+    const targetPath = path.join(targetDir, fileInfo.filename);
+
+    try {
+      if (!fs.existsSync(targetDir)) {
+        fs.mkdirSync(targetDir, { recursive: true });
+      }
+      if (fs.existsSync(fileInfo.path)) {
+        fs.renameSync(fileInfo.path, targetPath);
+        // Update the DB record with the new path
+        savedKb.storagePath = targetPath;
+        await this.kbRepository.save(savedKb);
+        this.logger.log(`Moved file to partitioned storage: ${targetPath}`);
+      }
+    } catch (fsError) {
+      this.logger.error(
+        `Failed to move file ${savedKb.id} to partitioned storage`,
+        fsError,
+      );
+      // We will let it continue, but the file might be stuck in the temp/root folder
+    }
+
+    // If queue processing is requested, await completion
+    if (config?.waitForCompletion) {
+      await this.processFile(savedKb.id, userId, tenantId, config);
+    } else {
+      // Otherwise trigger asynchronously (default)
+      this.processFile(savedKb.id, userId, tenantId, config).catch((err) => {
+        this.logger.error(`Error processing file ${savedKb.id}`, err);
+      });
+    }
+
+    return savedKb;
+  }
+
+  async findAll(userId: string, tenantId?: string): Promise<KnowledgeBase[]> {
+    const where: any = {};
+    if (tenantId) {
+      where.tenantId = tenantId;
+    } else {
+      where.userId = userId;
+    }
+    return this.kbRepository.find({
+      where,
+      relations: ['groups'], // Load group relations
+      order: { createdAt: 'DESC' },
+    });
+  }
+
+  async findOne(
+    id: string,
+    userId: string,
+    tenantId: string,
+  ): Promise<KnowledgeBase> {
+    const kb = await this.kbRepository.findOne({
+      where: { id },
+      relations: ['groups'],
+    });
+
+    if (!kb) {
+      throw new NotFoundException(
+        this.i18nService.getMessage('knowledgeBaseNotFound'),
+      );
+    }
+
+    // Check permission using TenantService
+    const hasAccess = await this.tenantService.canAccessTenant(
+      userId,
+      kb.tenantId,
+      tenantId,
+    );
+    if (!hasAccess) {
+      throw new ForbiddenException(
+        `You do not have permission to access this knowledge base`,
+      );
+    }
+
+    return kb;
+  }
+
+  async getStats(
+    userId: string,
+    tenantId?: string,
+  ): Promise<{ total: number; uncategorized: number }> {
+    const where: any = {};
+    if (tenantId) {
+      where.tenantId = tenantId;
+    } else {
+      where.userId = userId;
+    }
+
+    // Get total count
+    const total = await this.kbRepository.count({ where });
+
+    // Get uncategorized count (files with no groups)
+    // We need to use query builder to check for empty groups relation
+    const uncategorizedQuery = this.kbRepository
+      .createQueryBuilder('kb')
+      .leftJoin('kb.groups', 'groups');
+
+    // Apply where conditions
+    if (tenantId) {
+      uncategorizedQuery.where('kb.tenantId = :tenantId', { tenantId });
+    } else {
+      uncategorizedQuery.where('kb.userId = :userId', { userId });
+    }
+
+    // Count files where groups array is empty
+    const uncategorizedCount = await uncategorizedQuery
+      .andWhere('groups.id IS NULL')
+      .getCount();
+
+    return {
+      total,
+      uncategorized: uncategorizedCount,
+    };
+  }
+
+  async searchKnowledge(
+    userId: string,
+    tenantId: string,
+    query: string,
+    topK: number = 5,
+  ) {
+    try {
+      // Generate simulation vector using default dimensions from environment variable
+      const defaultDimensions = parseInt(
+        process.env.DEFAULT_VECTOR_DIMENSIONS || '2560',
+      );
+      const mockEmbedding = Array.from(
+        { length: defaultDimensions },
+        () => Math.random() - 0.5,
+      );
+      const queryVector = mockEmbedding;
+
+      // 2. Search in Elasticsearch
+      const searchResults = await this.elasticsearchService.searchSimilar(
+        queryVector,
+        userId,
+        topK,
+        tenantId, // Ensure shared visibility within tenant
+      );
+
+      // 3. Get file information from database
+      const fileIds = [...new Set(searchResults.map((r) => r.fileId))];
+      const files = await this.kbRepository.findByIds(fileIds);
+      const fileMap = new Map(files.map((f) => [f.id, f]));
+
+      // 4. Combine results with file info
+      const results = searchResults.map((result) => {
+        const file = fileMap.get(result.fileId);
+        return {
+          ...result,
+          file: file
+            ? {
+                id: file.id,
+                name: file.originalName,
+                mimetype: file.mimetype,
+                size: file.size,
+                createdAt: file.createdAt,
+              }
+            : null,
+        };
+      });
+
+      return {
+        query,
+        results,
+        total: results.length,
+      };
+    } catch (error) {
+      this.logger.error(
+        `Metadata search failed for tenant ${tenantId}:`,
+        error.stack || error.message,
+      );
+      throw error;
+    }
+  }
+
+  async ragSearch(
+    userId: string,
+    tenantId: string,
+    query: string,
+    settings: any,
+  ) {
+    this.logger.log(
+      `RAG search request: userId=${userId}, query="${query}", settings=${JSON.stringify(settings)}`,
+    );
+
+    try {
+      const ragResults = await this.ragService.searchKnowledge(
+        query,
+        userId,
+        settings.topK,
+        settings.similarityThreshold,
+        settings.selectedEmbeddingId,
+        settings.enableFullTextSearch,
+        settings.enableRerank,
+        settings.selectedRerankId,
+        undefined,
+        undefined,
+        settings.rerankSimilarityThreshold,
+        tenantId, // Ensure shared visibility within tenant for RAG
+      );
+
+      const sources = this.ragService.extractSources(ragResults);
+      const ragPrompt = this.ragService.buildRagPrompt(
+        query,
+        ragResults,
+        settings.language || DEFAULT_LANGUAGE,
+      );
+
+      const result = {
+        searchResults: ragResults,
+        sources,
+        ragPrompt,
+        hasRelevantContent: ragResults.length > 0,
+      };
+
+      this.logger.log(
+        `RAG search completed: found ${ragResults.length} results`,
+      );
+      return result;
+    } catch (error) {
+      this.logger.error(
+        `RAG search failed for user ${userId}:`,
+        error.stack || error.message,
+      );
+      // Return empty result instead of throwing error to keep system running
+      return {
+        searchResults: [],
+        sources: [],
+        ragPrompt: query, // Use original query
+        hasRelevantContent: false,
+      };
+    }
+  }
+
+  async deleteFile(
+    fileId: string,
+    userId: string,
+    tenantId: string,
+  ): Promise<void> {
+    this.logger.log(`Deleting file ${fileId} for user ${userId}`);
+
+    try {
+      // 1. Get file info
+      const file = await this.kbRepository.findOne({
+        where: { id: fileId, tenantId }, // Filter by tenantId
+      });
+      if (!file) {
+        throw new NotFoundException(
+          this.i18nService.getMessage('fileNotFound'),
+        );
+      }
+
+      // 2. Delete file from filesystem
+      const fs = await import('fs');
+      try {
+        if (fs.existsSync(file.storagePath)) {
+          fs.unlinkSync(file.storagePath);
+          this.logger.log(`Deleted file: ${file.storagePath}`);
+        }
+      } catch (error) {
+        this.logger.warn(`Failed to delete file ${file.storagePath}:`, error);
+      }
+
+      // 3. Delete from Elasticsearch
+      try {
+        await this.elasticsearchService.deleteByFileId(
+          fileId,
+          userId,
+          tenantId,
+        );
+        this.logger.log(`Deleted ES documents for file ${fileId}`);
+      } catch (error) {
+        this.logger.warn(
+          `Failed to delete ES documents for file ${fileId}:`,
+          error,
+        );
+      }
+
+      // 4. Remove from all groups (cleanup M2M relations)
+      const fileWithGroups = await this.kbRepository.findOne({
+        where: { id: fileId, tenantId },
+        relations: ['groups'],
+      });
+
+      if (
+        fileWithGroups &&
+        fileWithGroups.groups &&
+        fileWithGroups.groups.length > 0
+      ) {
+        // Clear groups to remove entries from join table
+        fileWithGroups.groups = [];
+        await this.kbRepository.save(fileWithGroups);
+        this.logger.log(`Cleared group associations for file ${fileId}`);
+      }
+
+      // 5. Delete from SQLite
+      await this.kbRepository.delete({ id: fileId });
+      this.logger.log(`Deleted database record for file ${fileId}`);
+    } catch (error) {
+      this.logger.error(`Failed to delete file ${fileId}`, error);
+      throw error;
+    }
+  }
+
+  async clearAll(userId: string, tenantId: string): Promise<void> {
+    this.logger.log(
+      `Clearing all knowledge base data for user ${userId} in tenant ${tenantId}`,
+    );
+
+    try {
+      // Get all files for the specific tenant and delete them one by one
+      const files = await this.kbRepository.find({ where: { tenantId } });
+
+      for (const file of files) {
+        await this.deleteFile(file.id, userId, tenantId);
+      }
+
+      this.logger.log(`Cleared all knowledge base data for user ${userId}`);
+    } catch (error) {
+      this.logger.error(
+        `Failed to clear knowledge base for user ${userId}`,
+        error,
+      );
+      throw error;
+    }
+  }
+
+  private async processFile(
+    kbId: string,
+    userId: string,
+    tenantId: string,
+    config?: any,
+  ) {
+    this.logger.log(
+      `Starting processing for file ${kbId}, mode: ${config?.mode || 'fast'}`,
+    );
+    await this.updateStatus(kbId, FileStatus.INDEXING);
+
+    try {
+      const kb = await this.kbRepository.findOne({ where: { id: kbId } });
+      if (!kb) {
+        this.logger.error(`KB not found: ${kbId}`);
+        return;
+      }
+
+      // Memory monitor - pre-processing check
+      const memBefore = this.memoryMonitor.getMemoryUsage();
+      this.logger.log(
+        `Memory state - before processing: ${memBefore.heapUsed}/${memBefore.heapTotal}MB`,
+      );
+
+      // Select processing flow based on mode
+      const mode = config?.mode || 'fast';
+
+      if (mode === 'precise') {
+        // Precise mode - use Vision Pipeline
+        await this.processPreciseMode(kb, userId, tenantId, config);
+      } else {
+        // Fast mode - use Tika
+        await this.processFastMode(kb, userId, tenantId, config);
+      }
+
+      this.logger.log(`File ${kbId} processed successfully in ${mode} mode.`);
+    } catch (error) {
+      this.logger.error(`Failed to process file ${kbId}`, error);
+      await this.updateStatus(kbId, FileStatus.FAILED);
+    }
+  }
+
+  /**
+   * Fast mode processing (existing flow)
+   */
+  private async processFastMode(
+    kb: KnowledgeBase,
+    userId: string,
+    tenantId: string,
+    config?: any,
+  ) {
+    // 1. Extract text using Tika
+    let text = await this.tikaService.extractText(kb.storagePath);
+
+    // Use vision model for image files
+    if (this.visionService.isImageFile(kb.mimetype)) {
+      const settings = await this.tenantService.getSettings(
+        tenantId || 'default',
+      );
+      const visionModelId = settings?.selectedVisionId;
+      if (visionModelId) {
+        const visionModel =
+          await this.modelConfigService.findOne(visionModelId);
+        if (
+          visionModel &&
+          visionModel.type === 'vision' &&
+          visionModel.isEnabled !== false
+        ) {
+          text = await this.visionService.extractImageContent(kb.storagePath, {
+            baseUrl: visionModel.baseUrl || '',
+            apiKey: visionModel.apiKey || '',
+            modelId: visionModel.modelId,
+          });
+        }
+      }
+    }
+
+    if (!text || text.trim().length === 0) {
+      this.logger.warn(this.i18nService.getMessage('noTextExtracted'));
+    }
+
+    // Check text size
+    const textSizeMB = Math.round(text.length / 1024 / 1024);
+    if (textSizeMB > 50) {
+      this.logger.warn(
+        this.i18nService.formatMessage('extractedTextTooLarge', {
+          size: textSizeMB,
+        }),
+      );
+    }
+
+    // Save text to database
+    await this.kbRepository.update(kb.id, { content: text });
+    await this.updateStatus(kb.id, FileStatus.EXTRACTED);
+
+    // Async vectorization
+    await this.vectorizeToElasticsearch(
+      kb.id,
+      userId,
+      tenantId,
+      text,
+      config,
+    ).catch((err) => {
+      this.logger.error(`Error vectorizing file ${kb.id}`, err);
+    });
+
+    // Auto-generate title (async execution)
+    this.generateTitle(kb.id).catch((err) => {
+      this.logger.error(`Error generating title for file ${kb.id}`, err);
+    });
+
+    // Trigger PDF conversion asynchronously (for document files)
+    this.ensurePDFExists(kb.id, userId, tenantId).catch((err) => {
+      this.logger.warn(
+        this.i18nService.formatMessage('pdfConversionFailedDetail', {
+          id: kb.id,
+        }),
+        err,
+      );
+    });
+  }
+
+  /**
+   * Precise mode processing (new flow)
+   */
+  private async processPreciseMode(
+    kb: KnowledgeBase,
+    userId: string,
+    tenantId: string,
+    config?: any,
+  ) {
+    // Check if precise mode is supported
+    const preciseFormats = ['.pdf', '.doc', '.docx', '.ppt', '.pptx'];
+    const ext = kb.originalName
+      .toLowerCase()
+      .substring(kb.originalName.lastIndexOf('.'));
+
+    if (!preciseFormats.includes(ext)) {
+      this.logger.warn(
+        this.i18nService.formatMessage('preciseModeUnsupported', { ext }),
+      );
+      return this.processFastMode(kb, userId, tenantId, config);
+    }
+
+    // Check if Vision model is configured
+    const settings = await this.tenantService.getSettings(
+      tenantId || 'default',
+    );
+    const visionModelId = settings?.selectedVisionId;
+    if (!visionModelId) {
+      this.logger.warn(
+        this.i18nService.getMessage('visionModelNotConfiguredFallback'),
+      );
+      return this.processFastMode(kb, userId, tenantId, config);
+    }
+
+    const visionModel = await this.modelConfigService.findOne(visionModelId);
+    if (
+      !visionModel ||
+      visionModel.type !== 'vision' ||
+      visionModel.isEnabled === false
+    ) {
+      this.logger.warn(
+        this.i18nService.getMessage('visionModelInvalidFallback'),
+      );
+      return this.processFastMode(kb, userId, tenantId, config);
+    }
+
+    // Call Vision Pipeline
+    try {
+      const result = await this.visionPipelineService.processPreciseMode(
+        kb.storagePath,
+        {
+          userId,
+          tenantId, // New
+          modelId: visionModelId,
+          fileId: kb.id,
+          fileName: kb.originalName,
+          skipQualityCheck: false,
+        },
+      );
+
+      if (!result.success) {
+        this.logger.error(`Vision pipeline failed, falling back to fast mode`);
+        this.logger.warn(this.i18nService.getMessage('visionPipelineFailed'));
+        return this.processFastMode(kb, userId, tenantId, config);
+      }
+
+      // Save text content to database
+      const combinedText = result.results.map((r) => r.text).join('\n\n');
+      const metadata = {
+        processedPages: result.processedPages,
+        failedPages: result.failedPages,
+        cost: result.cost,
+        duration: result.duration,
+        results: result.results.map((r) => ({
+          pageIndex: r.pageIndex,
+          confidence: r.confidence,
+          layout: r.layout,
+          imageCount: r.images.length,
+        })),
+      };
+      await this.kbRepository.update(kb.id, {
+        content: combinedText,
+        metadata: metadata as any,
+      });
+
+      await this.updateStatus(kb.id, FileStatus.EXTRACTED);
+      this.logger.log(
+        this.i18nService.formatMessage('preciseModeComplete', {
+          pages: result.processedPages,
+          cost: result.cost.toFixed(2),
+        }),
+      );
+
+      // Async vectorization and Elasticsearch indexing
+      // Create each page as separate document with metadata
+      this.indexPreciseResults(
+        kb,
+        userId,
+        tenantId,
+        kb.embeddingModelId,
+        result.results,
+      ).catch((err) => {
+        this.logger.error(`Error indexing precise results for ${kb.id}`, err);
+      });
+
+      // Trigger PDF conversion asynchronously
+      this.ensurePDFExists(kb.id, userId, tenantId).catch((err) => {
+        this.logger.warn(`Initial PDF conversion failed for ${kb.id}`, err);
+      });
+
+      // Auto-generate title (async execution)
+      this.generateTitle(kb.id).catch((err) => {
+        this.logger.error(`Error generating title for file ${kb.id}`, err);
+      });
+    } catch (error) {
+      this.logger.error(`Vision pipeline error: ${error.message}`, error.stack);
+      this.logger.error(`Falling back to fast mode for file ${kb.id}`);
+      return this.processFastMode(kb, userId, tenantId, config);
+    }
+  }
+
+  /**
+   * Index precise mode results
+   */
+  private async indexPreciseResults(
+    kb: KnowledgeBase,
+    userId: string,
+    tenantId: string,
+    embeddingModelId: string,
+    results: any[],
+  ): Promise<void> {
+    this.logger.log(`Indexing ${results.length} precise results for ${kb.id}`);
+
+    // Check index existence - get actual model dimensions
+    const actualDimensions = await this.getActualModelDimensions(
+      embeddingModelId,
+    );
+    await this.elasticsearchService.createIndexIfNotExists(actualDimensions);
+
+    // Batch vectorization and indexing
+    const batchSize = parseInt(process.env.CHUNK_BATCH_SIZE || '50');
+
+    for (let i = 0; i < results.length; i += batchSize) {
+      const batch = results.slice(i, i + batchSize);
+      const texts = batch.map((r) => r.text);
+
+      try {
+        // Generate vectors
+        const embeddings = await this.embeddingService.getEmbeddings(
+          texts,
+          embeddingModelId,
+        );
+
+        // Index each result
+        for (let j = 0; j < batch.length; j++) {
+          const result = batch[j];
+          const embedding = embeddings[j];
+
+          if (!embedding || embedding.length === 0) {
+            this.logger.warn(
+              this.i18nService.formatMessage('skippingEmptyVectorPage', {
+                page: result.pageIndex,
+              }),
+            );
+            continue;
+          }
+
+          await this.elasticsearchService.indexDocument(
+            `${kb.id}_page_${result.pageIndex}`,
+            result.text,
+            embedding,
+            {
+              fileId: kb.id,
+              originalName: kb.originalName,
+              mimetype: kb.mimetype,
+              userId: userId,
+              tenantId: tenantId, // New
+              pageNumber: result.pageIndex,
+              images: result.images,
+              layout: result.layout,
+              confidence: result.confidence,
+              source: 'precise',
+              mode: 'vision',
+            },
+          );
+        }
+
+        this.logger.log(
+          `Batch ${Math.floor(i / batchSize) + 1} completed: ${batch.length} pages`,
+        );
+      } catch (error) {
+        this.logger.error(
+          `Batch ${Math.floor(i / batchSize) + 1} processing failed`,
+          error,
+        );
+      }
+    }
+
+    await this.updateStatus(kb.id, FileStatus.VECTORIZED);
+    this.logger.log(`Precise mode indexing completed: ${results.length} pages`);
+  }
+
+  /**
+   * Get specific page of PDF as image
+   */
+  async getPageAsImage(
+    fileId: string,
+    pageIndex: number,
+    userId: string,
+    tenantId: string,
+  ): Promise<string> {
+    const pdfPath = await this.ensurePDFExists(fileId, userId, tenantId);
+
+    // Convert specific pages
+    const result = await this.pdf2ImageService.convertToImages(pdfPath, {
+      density: 150,
+      quality: 75,
+      format: 'jpeg',
+    });
+
+    // Find images for corresponding page numbers
+    const pageImage = result.images.find(
+      (img) => img.pageIndex === pageIndex + 1,
+    );
+    if (!pageImage) {
+      throw new NotFoundException(
+        this.i18nService.formatMessage('pageImageNotFoundDetail', {
+          page: pageIndex + 1,
+        }),
+      );
+    }
+
+    return pageImage.path;
+  }
+
+  private async vectorizeToElasticsearch(
+    kbId: string,
+    userId: string,
+    tenantId: string,
+    text: string,
+    config?: any,
+  ) {
+    try {
+      const kb = await this.kbRepository.findOne({
+        where: { id: kbId, tenantId },
+      });
+      if (!kb) return;
+
+      // Memory monitor - pre-vectorization check
+      const memBeforeChunk = this.memoryMonitor.getMemoryUsage();
+      this.logger.log(
+        `Pre-vectorization memory: ${memBeforeChunk.heapUsed}/${memBeforeChunk.heapTotal}MB`,
+      );
+
+      this.logger.debug(`File ${kbId}: Validating chunk config...`);
+      // 1. Validate and fix chunk config (based on model limits and env vars)
+      const validatedConfig = await this.chunkConfigService.validateChunkConfig(
+        kb.chunkSize,
+        kb.chunkOverlap,
+        kb.embeddingModelId,
+      );
+      this.logger.debug(`File ${kbId}: Chunk config validated.`);
+
+      // If config modified, log warning and update database
+      if (validatedConfig.warnings.length > 0) {
+        this.logger.warn(
+          this.i18nService.formatMessage('chunkConfigCorrection', {
+            warnings: validatedConfig.warnings.join(', '),
+          }),
+        );
+
+        // Update config in database
+        if (
+          validatedConfig.chunkSize !== kb.chunkSize ||
+          validatedConfig.chunkOverlap !== kb.chunkOverlap
+        ) {
+          await this.kbRepository.update(kbId, {
+            chunkSize: validatedConfig.chunkSize,
+            chunkOverlap: validatedConfig.chunkOverlap,
+          });
+        }
+      }
+
+      // Display config summary (including actual limits applied)
+      this.logger.debug(`File ${kbId}: Getting config summary...`);
+      const configSummary = await this.chunkConfigService.getConfigSummary(
+        validatedConfig.chunkSize,
+        validatedConfig.chunkOverlap,
+        kb.embeddingModelId,
+      );
+      this.logger.log(`Chunk config: ${configSummary}`);
+      this.logger.log(
+        `Config limits: chunk=${validatedConfig.effectiveMaxChunkSize}, overlap=${validatedConfig.effectiveMaxOverlapSize}`,
+      );
+
+      // 2. Split text using validated config
+      const chunks = this.textChunkerService.chunkText(
+        text,
+        validatedConfig.chunkSize,
+        validatedConfig.chunkOverlap,
+      );
+      this.logger.log(`File ${kbId} split into ${chunks.length} text blocks`);
+
+      if (chunks.length === 0) {
+        this.logger.warn(
+          this.i18nService.formatMessage('noChunksGenerated', { id: kbId }),
+        );
+        await this.updateStatus(kbId, FileStatus.VECTORIZED);
+        return;
+      }
+
+      // 3. Validate chunk count is reasonable
+      const estimatedChunkCount = this.chunkConfigService.estimateChunkCount(
+        text.length,
+        validatedConfig.chunkSize,
+      );
+      if (chunks.length > estimatedChunkCount * 1.2) {
+        this.logger.warn(
+          this.i18nService.formatMessage('chunkCountAnomaly', {
+            actual: chunks.length,
+            estimated: estimatedChunkCount,
+          }),
+        );
+      }
+
+      // 4. Get recommended batch size (based on model limits)
+      const recommendedBatchSize =
+        await this.chunkConfigService.getRecommendedBatchSize(
+          kb.embeddingModelId,
+          parseInt(process.env.CHUNK_BATCH_SIZE || '100'),
+        );
+
+      // 5. Estimate memory usage
+      const avgChunkSize =
+        chunks.reduce((sum, c) => sum + c.content.length, 0) / chunks.length;
+      const estimatedMemory = this.memoryMonitor.estimateMemoryUsage(
+        chunks.length,
+        avgChunkSize,
+        parseInt(process.env.DEFAULT_VECTOR_DIMENSIONS || '2560'),
+      );
+      this.logger.log(
+        `Estimated memory usage: ${estimatedMemory}MB (batch size: ${recommendedBatchSize})`,
+      );
+
+      // 6. Get actual model dimensions and check index exists
+      const actualDimensions = await this.getActualModelDimensions(
+        kb.embeddingModelId,
+      );
+      await this.elasticsearchService.createIndexIfNotExists(actualDimensions);
+
+      // 7. Batch vectorization and indexing
+      const useBatching = this.memoryMonitor.shouldUseBatching(
+        chunks.length,
+        avgChunkSize,
+        actualDimensions,
+      );
+
+      if (useBatching) {
+        try {
+          await this.processInBatches(
+            chunks,
+            async (batch, batchIndex) => {
+              // Verify batch size not exceeding model limit
+              if (batch.length > recommendedBatchSize) {
+                this.logger.warn(
+                  this.i18nService.formatMessage('batchSizeExceeded', {
+                    index: batchIndex,
+                    actual: batch.length,
+                    limit: recommendedBatchSize,
+                  }),
+                );
+              }
+
+              const chunkTexts = batch.map((chunk) => chunk.content);
+              const embeddings = await this.embeddingService.getEmbeddings(
+                chunkTexts,
+                kb.embeddingModelId,
+              );
+
+              // Validate dimension consistency
+              if (
+                embeddings.length > 0 &&
+                embeddings[0].length !== actualDimensions
+              ) {
+                this.logger.warn(
+                  `Vector dimension mismatch: expected ${actualDimensions}, got ${embeddings[0].length}`,
+                );
+              }
+
+              // Index this batch data immediately
+              for (let i = 0; i < batch.length; i++) {
+                const chunk = batch[i];
+                const embedding = embeddings[i];
+
+                if (!embedding || embedding.length === 0) {
+                  this.logger.warn(
+                    this.i18nService.formatMessage('skippingEmptyVectorChunk', {
+                      index: chunk.index,
+                    }),
+                  );
+                  continue;
+                }
+
+                await this.elasticsearchService.indexDocument(
+                  `${kb.id}_chunk_${chunk.index}`,
+                  chunk.content,
+                  embedding,
+                  {
+                    fileId: kb.id,
+                    originalName: kb.originalName,
+                    mimetype: kb.mimetype,
+                    userId: userId,
+                    chunkIndex: chunk.index,
+                    startPosition: chunk.startPosition,
+                    tenantId, // Passing tenantId to ES
+                  },
+                );
+              }
+
+              this.logger.log(
+                `Batch ${batchIndex} completed: ${batch.length} chunks`,
+              );
+            },
+            {
+              batchSize: recommendedBatchSize,
+              onBatchComplete: (batchIndex, totalBatches) => {
+                const mem = this.memoryMonitor.getMemoryUsage();
+                this.logger.log(
+                  `Batch ${batchIndex}/${totalBatches} completed, memory: ${mem.heapUsed}MB`,
+                );
+              },
+            },
+          );
+        } catch (error) {
+          // Detect context length error (supports Japanese/Chinese/English)
+          if (
+            error.message &&
+            (error.message.includes('context length') ||
+              error.message.includes('context length exceeded'))
+          ) {
+            this.logger.warn(
+              this.i18nService.getMessage('contextLengthErrorFallback'),
+            );
+
+            // Downgrade to single text processing
+            for (let i = 0; i < chunks.length; i++) {
+              const chunk = chunks[i];
+
+              try {
+                const embeddings = await this.embeddingService.getEmbeddings(
+                  [chunk.content], // Single text
+                  kb.embeddingModelId,
+                );
+
+                if (!embeddings[0] || embeddings[0].length === 0) {
+                  this.logger.warn(
+                    this.i18nService.formatMessage('skippingEmptyVectorChunk', {
+                      index: chunk.index,
+                    }),
+                  );
+                  continue;
+                }
+
+                await this.elasticsearchService.indexDocument(
+                  `${kb.id}_chunk_${chunk.index}`,
+                  chunk.content,
+                  embeddings[0],
+                  {
+                    fileId: kb.id,
+                    originalName: kb.originalName,
+                    mimetype: kb.mimetype,
+                    userId: userId,
+                    chunkIndex: chunk.index,
+                    startPosition: chunk.startPosition,
+                    endPosition: chunk.endPosition,
+                    tenantId,
+                  },
+                );
+
+                if ((i + 1) % 10 === 0) {
+                  this.logger.log(
+                    `Single processing progress: ${i + 1}/${chunks.length}`,
+                  );
+                }
+              } catch (chunkError) {
+                this.logger.error(
+                  `Failed to process text block ${chunk.index}. Skipping: ${chunkError.message}`,
+                );
+                continue;
+              }
+            }
+
+            this.logger.log(
+              `Single text processing completed: ${chunks.length} chunks`,
+            );
+          } else {
+            // Throw other errors directly
+            throw error;
+          }
+        }
+      } else {
+        // Small files, batch processing (but need to check batch limits)
+        const chunkTexts = chunks.map((chunk) => chunk.content);
+
+        // Force batch processing if chunk count exceeds model batch limit
+        if (chunks.length > recommendedBatchSize) {
+          this.logger.warn(
+            this.i18nService.formatMessage('chunkLimitExceededForceBatch', {
+              actual: chunks.length,
+              limit: recommendedBatchSize,
+            }),
+          );
+          try {
+            await this.processInBatches(chunks, async (batch, batchIndex) => {
+              const batchTexts = batch.map((c) => c.content);
+              const embeddings = await this.embeddingService.getEmbeddings(
+                batchTexts,
+                kb.embeddingModelId,
+              );
+
+              for (let i = 0; i < batch.length; i++) {
+                const chunk = batch[i];
+                const embedding = embeddings[i];
+
+                if (!embedding || embedding.length === 0) {
+                  this.logger.warn(
+                    `Skipping empty vector text block ${chunk.index}`,
+                  );
+                  continue;
+                }
+
+                await this.elasticsearchService.indexDocument(
+                  `${kb.id}_chunk_${chunk.index}`,
+                  chunk.content,
+                  embedding,
+                  {
+                    fileId: kb.id,
+                    originalName: kb.originalName,
+                    mimetype: kb.mimetype,
+                    userId: userId,
+                    chunkIndex: chunk.index,
+                    startPosition: chunk.startPosition,
+                    endPosition: chunk.endPosition,
+                    tenantId, // Passing tenantId to ES metadata
+                  },
+                );
+              }
+            });
+          } catch (error) {
+            // Detect context length error (supports Japanese/Chinese/English)
+            if (
+              error.message &&
+              (error.message.includes('context length') ||
+                error.message.includes('context length exceeded'))
+            ) {
+              this.logger.warn(
+                this.i18nService.getMessage('batchContextLengthErrorFallback'),
+              );
+
+              // Downgrade to single text processing
+              for (let i = 0; i < chunks.length; i++) {
+                const chunk = chunks[i];
+
+                try {
+                  const embeddings = await this.embeddingService.getEmbeddings(
+                    [chunk.content], // Single text
+                    kb.embeddingModelId,
+                  );
+
+                  if (!embeddings[0] || embeddings[0].length === 0) {
+                    this.logger.warn(
+                      this.i18nService.formatMessage(
+                        'skippingEmptyVectorChunk',
+                        { index: chunk.index },
+                      ),
+                    );
+                    continue;
+                  }
+
+                  await this.elasticsearchService.indexDocument(
+                    `${kb.id}_chunk_${chunk.index}`,
+                    chunk.content,
+                    embeddings[0],
+                    {
+                      fileId: kb.id,
+                      originalName: kb.originalName,
+                      mimetype: kb.mimetype,
+                      userId: userId,
+                      tenantId, // Added tenantId
+                      chunkIndex: chunk.index,
+                      startPosition: chunk.startPosition,
+                      endPosition: chunk.endPosition,
+                    },
+                  );
+
+                  if ((i + 1) % 10 === 0) {
+                    this.logger.log(
+                      `Single processing progress: ${i + 1}/${chunks.length}`,
+                    );
+                  }
+                } catch (chunkError) {
+                  this.logger.error(
+                    this.i18nService.formatMessage('chunkProcessingFailed', {
+                      index: chunk.index,
+                      message: chunkError.message,
+                    }),
+                  );
+                  continue;
+                }
+              }
+
+              this.logger.log(
+                this.i18nService.formatMessage('singleTextProcessingComplete', {
+                  count: chunks.length,
+                }),
+              );
+            } else {
+              // Throw other errors directly
+              throw error;
+            }
+          }
+        } else {
+          // Process if file is small enough
+          try {
+            const embeddings = await this.embeddingService.getEmbeddings(
+              chunkTexts,
+              kb.embeddingModelId,
+            );
+
+            for (let i = 0; i < chunks.length; i++) {
+              const chunk = chunks[i];
+              const embedding = embeddings[i];
+
+              if (!embedding || embedding.length === 0) {
+                this.logger.warn(
+                  this.i18nService.formatMessage('skippingEmptyVectorChunk', {
+                    index: chunk.index,
+                  }),
+                );
+                continue;
+              }
+
+              await this.elasticsearchService.indexDocument(
+                `${kb.id}_chunk_${chunk.index}`,
+                chunk.content,
+                embedding,
+                {
+                  fileId: kb.id,
+                  originalName: kb.originalName,
+                  mimetype: kb.mimetype,
+                  userId: userId,
+                  tenantId, // Added tenantId
+                  chunkIndex: chunk.index,
+                  startPosition: chunk.startPosition,
+                  endPosition: chunk.endPosition,
+                },
+              );
+            }
+          } catch (error) {
+            // Detect context length error (supports Japanese/Chinese/English)
+            if (
+              error.message &&
+              (error.message.includes('context length') ||
+                error.message.includes('context length exceeded'))
+            ) {
+              this.logger.warn(
+                this.i18nService.getMessage('batchContextLengthErrorFallback'),
+              );
+
+              // Downgrade to single text processing
+              for (let i = 0; i < chunks.length; i++) {
+                const chunk = chunks[i];
+
+                try {
+                  const embeddings = await this.embeddingService.getEmbeddings(
+                    [chunk.content], // Single text
+                    kb.embeddingModelId,
+                  );
+
+                  if (!embeddings[0] || embeddings[0].length === 0) {
+                    this.logger.warn(
+                      `Skipping empty vector text block ${chunk.index}`,
+                    );
+                    continue;
+                  }
+
+                  await this.elasticsearchService.indexDocument(
+                    `${kb.id}_chunk_${chunk.index}`,
+                    chunk.content,
+                    embeddings[0],
+                    {
+                      fileId: kb.id,
+                      originalName: kb.originalName,
+                      mimetype: kb.mimetype,
+                      userId: userId,
+                      tenantId, // Added tenantId
+                      chunkIndex: chunk.index,
+                      startPosition: chunk.startPosition,
+                      endPosition: chunk.endPosition,
+                    },
+                  );
+
+                  if ((i + 1) % 10 === 0) {
+                    this.logger.log(
+                      `Single processing progress: ${i + 1}/${chunks.length}`,
+                    );
+                  }
+                } catch (chunkError) {
+                  this.logger.error(
+                    `Failed to process text block ${chunk.index}. Skipping: ${chunkError.message}`,
+                  );
+                  continue;
+                }
+              }
+
+              this.logger.log(
+                this.i18nService.formatMessage('singleTextProcessingComplete', {
+                  count: chunks.length,
+                }),
+              );
+            } else {
+              // Throw other errors directly
+              throw error;
+            }
+          }
+        }
+      }
+
+      await this.updateStatus(kbId, FileStatus.VECTORIZED);
+      const memAfter = this.memoryMonitor.getMemoryUsage();
+      this.logger.log(
+        this.i18nService.formatMessage('fileVectorizationComplete', {
+          id: kbId,
+          count: chunks.length,
+          memory: memAfter.heapUsed,
+        }),
+      );
+    } catch (error) {
+      this.logger.error(
+        this.i18nService.formatMessage('fileVectorizationFailed', { id: kbId }),
+        error,
+      );
+
+      // Save error info to metadata
+      try {
+        const kb = await this.kbRepository.findOne({ where: { id: kbId } });
+        if (kb) {
+          const metadata = kb.metadata || {};
+          metadata.lastError = error.message;
+          metadata.failedAt = new Date().toISOString();
+          await this.kbRepository.update(kbId, { metadata });
+        }
+      } catch (e) {
+        this.logger.warn(
+          `Failed to update metadata for failed file ${kbId}`,
+          e,
+        );
+      }
+
+      await this.updateStatus(kbId, FileStatus.FAILED);
+    }
+  }
+
+  /**
+   * Batch processing with memory control
+   */
+  private async processInBatches<T>(
+    items: T[],
+    processor: (batch: T[], batchIndex: number) => Promise<void>,
+    options?: {
+      batchSize?: number;
+      onBatchComplete?: (batchIndex: number, totalBatches: number) => void;
+    },
+  ): Promise<void> {
+    const totalItems = items.length;
+    if (totalItems === 0) return;
+
+    const startTime = Date.now();
+    this.logger.log(
+      this.i18nService.formatMessage('batchProcessingStarted', {
+        count: totalItems,
+      }),
+    );
+
+    // Use provided batch size or fallback to env/default
+    const initialBatchSize =
+      options?.batchSize || parseInt(process.env.CHUNK_BATCH_SIZE || '100');
+    const totalBatches = Math.ceil(totalItems / initialBatchSize);
+
+    for (let i = 0; i < totalItems; ) {
+      // Check memory and wait
+      await this.memoryMonitor.waitForMemoryAvailable();
+
+      // Dynamically adjust batch size (start from initialBatchSize, memory monitor can reduce if needed)
+      // Note: memoryMonitor.getDynamicBatchSize may return larger values based on memory situation,
+      // but we must respect model limits (initialBatchSize)
+      const currentMem = this.memoryMonitor.getMemoryUsage().heapUsed;
+      const dynamicBatchSize =
+        this.memoryMonitor.getDynamicBatchSize(currentMem);
+
+      // Ensure we don't exceed the model's limit (initialBatchSize) even if memory allows more
+      const batchSize = Math.min(dynamicBatchSize, initialBatchSize);
+
+      // Get current batch
+      const batch = items.slice(i, i + batchSize);
+      const batchIndex = Math.floor(i / batchSize) + 1;
+
+      this.logger.log(
+        this.i18nService.formatMessage('batchProcessingProgress', {
+          index: batchIndex,
+          total: totalBatches,
+          count: batch.length,
+        }),
+      );
+
+      // Process batch
+      await processor(batch, batchIndex);
+
+      // Callback notification
+      if (options?.onBatchComplete) {
+        options.onBatchComplete(batchIndex, totalBatches);
+      }
+
+      // Force GC (if memory is near threshold)
+      if (currentMem > 800) {
+        this.memoryMonitor.forceGC();
+      }
+
+      // Clear references to help GC
+      batch.length = 0;
+
+      i += batchSize;
+    }
+
+    const duration = ((Date.now() - startTime) / 1000).toFixed(2);
+    this.logger.log(
+      this.i18nService.formatMessage('batchProcessingComplete', {
+        count: totalItems,
+        duration,
+      }),
+    );
+  }
+
+  /**
+   * Retry vectorization for failed files
+   */
+  async retryFailedFile(
+    fileId: string,
+    userId: string,
+    tenantId: string,
+  ): Promise<KnowledgeBase> {
+    this.logger.log(
+      `Retrying failed file ${fileId} for user ${userId} in tenant ${tenantId}`,
+    );
+
+    // 1. Get file with tenant restriction
+    const kb = await this.kbRepository.findOne({
+      where: { id: fileId, tenantId },
+    });
+
+    if (!kb) {
+      throw new NotFoundException(this.i18nService.getMessage('fileNotFound'));
+    }
+
+    if (kb.status !== FileStatus.FAILED) {
+      throw new Error(
+        this.i18nService.formatMessage('onlyFailedFilesRetryable', {
+          status: kb.status,
+        }),
+      );
+    }
+
+    if (!kb.content || kb.content.trim().length === 0) {
+      throw new Error(this.i18nService.getMessage('emptyFileRetryFailed'));
+    }
+
+    // 2. Reset status to INDEXING
+    await this.updateStatus(fileId, FileStatus.INDEXING);
+
+    // 3. Trigger vectorization asynchronously (reuse existing logic)
+    this.vectorizeToElasticsearch(fileId, userId, tenantId, kb.content, {
+      chunkSize: kb.chunkSize,
+      chunkOverlap: kb.chunkOverlap,
+      embeddingModelId: kb.embeddingModelId,
+    }).catch((err) => {
+      this.logger.error(`Retry vectorization failed for file ${fileId}`, err);
+    });
+
+    // 4. Return updated file status
+    const updatedKb = await this.kbRepository.findOne({
+      where: { id: fileId, tenantId },
+    });
+    if (!updatedKb) {
+      throw new NotFoundException(this.i18nService.getMessage('fileNotFound'));
+    }
+    return updatedKb;
+  }
+
+  /**
+   * Get all chunk information for a file
+   */
+  async getFileChunks(fileId: string, userId: string, tenantId: string) {
+    this.logger.log(
+      `Getting chunks for file ${fileId}, user ${userId}, tenant ${tenantId}`,
+    );
+
+    // 1. Get file with tenant check
+    const kb = await this.kbRepository.findOne({
+      where: { id: fileId, tenantId },
+    });
+
+    if (!kb) {
+      throw new NotFoundException(this.i18nService.getMessage('fileNotFound'));
+    }
+
+    // 2. Get all chunks from Elasticsearch
+    const chunks = await this.elasticsearchService.getFileChunks(
+      fileId,
+      userId,
+      tenantId,
+    );
+
+    // 3. Return chunk info
+    return {
+      fileId: kb.id,
+      fileName: kb.originalName,
+      totalChunks: chunks.length,
+      chunkSize: kb.chunkSize,
+      chunkOverlap: kb.chunkOverlap,
+      chunks: chunks.map((chunk) => ({
+        index: chunk.chunkIndex,
+        content: chunk.content,
+        contentLength: chunk.content.length,
+        startPosition: chunk.startPosition,
+        endPosition: chunk.endPosition,
+      })),
+    };
+  }
+
+  private async updateStatus(id: string, status: FileStatus) {
+    await this.kbRepository.update(id, { status });
+  }
+
+  // PDF preview related methods
+  async ensurePDFExists(
+    fileId: string,
+    userId: string,
+    tenantId: string,
+    force: boolean = false,
+  ): Promise<string> {
+    const kb = await this.kbRepository.findOne({
+      where: { id: fileId, tenantId },
+    });
+
+    if (!kb) {
+      throw new NotFoundException(this.i18nService.getMessage('fileNotFound'));
+    }
+
+    // If original file is PDF, return the original file path directly
+    if (kb.mimetype === 'application/pdf') {
+      return kb.storagePath;
+    }
+
+    // Check if preview conversion is supported (only documents or images allowed)
+    const ext = kb.originalName.toLowerCase().split('.').pop() || '';
+    const isConvertible = [...DOC_EXTENSIONS, ...IMAGE_EXTENSIONS].includes(
+      ext,
+    );
+
+    if (!isConvertible) {
+      this.logger.log(
+        `Skipping PDF conversion for unsupported format: .${ext} (${kb.originalName})`,
+      );
+      throw new Error(this.i18nService.getMessage('pdfPreviewNotSupported'));
+    }
+
+    // Generate PDF field path
+    const path = await import('path');
+    const fs = await import('fs');
+    const uploadDir = path.dirname(kb.storagePath);
+    const baseName = path.basename(
+      kb.storagePath,
+      path.extname(kb.storagePath),
+    );
+    const pdfPath = path.join(uploadDir, `${baseName}.pdf`);
+
+    // Delete if forced regeneration specified and file exists
+    if (force && fs.existsSync(pdfPath)) {
+      try {
+        fs.unlinkSync(pdfPath);
+        this.logger.log(`Forced regeneration: Deleted existing PDF ${pdfPath}`);
+      } catch (e) {
+        this.logger.warn(
+          `Failed to delete existing PDF for regeneration: ${e.message}`,
+        );
+      }
+    }
+
+    // Check if already converted and regeneration not needed
+    if (fs.existsSync(pdfPath) && !force) {
+      if (!kb.pdfPath) {
+        await this.kbRepository.update(kb.id, { pdfPath: pdfPath });
+      }
+      return pdfPath;
+    }
+
+    // Need to convert to PDF
+    try {
+      this.logger.log(
+        `Starting PDF conversion for ${kb.originalName} at ${kb.storagePath}`,
+      );
+
+      // Convert file
+      await this.libreOfficeService.convertToPDF(kb.storagePath);
+
+      // Check conversion result
+      if (!fs.existsSync(pdfPath)) {
+        throw new Error(
+          `PDF conversion completed but file not found at ${pdfPath}`,
+        );
+      }
+
+      const stats = fs.statSync(pdfPath);
+      if (stats.size === 0) {
+        fs.unlinkSync(pdfPath);
+        throw new Error(`PDF conversion failed: output file is empty`);
+      }
+
+      await this.kbRepository.update(kb.id, { pdfPath: pdfPath });
+
+      this.logger.log(`PDF conversion successful: ${pdfPath}`);
+      return pdfPath;
+    } catch (error) {
+      this.logger.error(
+        `PDF conversion failed for ${fileId}: ${error.message}`,
+        error.stack,
+      );
+      throw new Error(
+        this.i18nService.formatMessage('pdfConversionFailedDetail', {
+          id: fileId,
+        }),
+      );
+    }
+  }
+
+  async getPDFStatus(fileId: string, userId: string, tenantId: string) {
+    const kb = await this.kbRepository.findOne({
+      where: { id: fileId, tenantId },
+    });
+
+    if (!kb) {
+      throw new NotFoundException(this.i18nService.getMessage('fileNotFound'));
+    }
+
+    // If original file is PDF
+    if (kb.mimetype === 'application/pdf') {
+      const token = this.generateTempToken(fileId, userId, tenantId);
+      return {
+        status: 'ready',
+        url: `/api/knowledge-bases/${fileId}/pdf?token=${token}`,
+      };
+    }
+
+    // Generate PDF file path
+    const path = await import('path');
+    const fs = await import('fs');
+    const uploadDir = path.dirname(kb.storagePath);
+    const baseName = path.basename(
+      kb.storagePath,
+      path.extname(kb.storagePath),
+    );
+    const pdfPath = path.join(uploadDir, `${baseName}.pdf`);
+
+    // Check if converted
+    if (fs.existsSync(pdfPath)) {
+      if (!kb.pdfPath) {
+        kb.pdfPath = pdfPath;
+        await this.kbRepository.save(kb);
+      }
+      const token = this.generateTempToken(fileId, userId, tenantId);
+      return {
+        status: 'ready',
+        url: `/api/knowledge-bases/${fileId}/pdf?token=${token}`,
+      };
+    }
+
+    // Conversion needed
+    return {
+      status: 'pending',
+    };
+  }
+
+  private generateTempToken(
+    fileId: string,
+    userId: string,
+    tenantId: string,
+  ): string {
+    const jwt = require('jsonwebtoken');
+
+    const secret = process.env.JWT_SECRET;
+    if (!secret) {
+      throw new Error(this.i18nService.getMessage('jwtSecretRequired'));
+    }
+
+    return jwt.sign({ fileId, userId, tenantId, type: 'pdf-access' }, secret, {
+      expiresIn: '1h',
+    });
+  }
+
+  /**
+   * Get actual model dimensions (with cache check and probe logic)
+   */
+  private async getActualModelDimensions(
+    embeddingModelId: string,
+  ): Promise<number> {
+    const defaultDimensions = parseInt(
+      process.env.DEFAULT_VECTOR_DIMENSIONS || '2560',
+    );
+
+    try {
+      // 1. Prioritize getting from model config
+      const modelConfig =
+        await this.modelConfigService.findOne(embeddingModelId);
+
+      if (modelConfig && modelConfig.dimensions) {
+        this.logger.log(
+          `Got dimensions from ${modelConfig.name} config: ${modelConfig.dimensions}`,
+        );
+        return modelConfig.dimensions;
+      }
+
+      // 2. Otherwise probe for dimensions
+      this.logger.log(`Probing model dimensions: ${embeddingModelId}`);
+      const probeEmbeddings = await this.embeddingService.getEmbeddings(
+        ['probe'],
+        embeddingModelId,
+      );
+
+      if (probeEmbeddings.length > 0) {
+        const actualDimensions = probeEmbeddings[0].length;
+        this.logger.log(
+          `Detected actual model dimensions: ${actualDimensions}`,
+        );
+
+        // Update model config for next use
+        if (modelConfig) {
+          try {
+            await this.modelConfigService.update(modelConfig.id, {
+              dimensions: actualDimensions,
+            });
+            this.logger.log(
+              `Updated model ${modelConfig.name} dimension config to ${actualDimensions}`,
+            );
+          } catch (updateErr) {
+            this.logger.warn(
+              `Failed to update model dimension config: ${updateErr.message}`,
+            );
+          }
+        }
+
+        return actualDimensions;
+      }
+    } catch (err) {
+      this.logger.warn(
+        `Failed to get dimensions. Using default: ${defaultDimensions}`,
+        err.message,
+      );
+    }
+
+    return defaultDimensions;
+  }
+
+  /**
+   * Auto-generate document title using AI
+   */
+  async generateTitle(kbId: string): Promise<string | null> {
+    this.logger.log(`Generating automatic title for file ${kbId}`);
+
+    try {
+      const kb = await this.kbRepository.findOne({ where: { id: kbId } });
+      if (!kb || !kb.content || kb.content.trim().length === 0) {
+        return null;
+      }
+      const tenantId = kb.tenantId;
+
+      // Skip if title already exists
+      if (kb.title) {
+        return kb.title;
+      }
+
+      // Get content sample (max 2500 characters)
+      const contentSample = kb.content.substring(0, 2500);
+
+      // Get language from org settings, or use default
+      const userSettings = await this.userSettingService.getByUser(kb.userId);
+      const language = userSettings.language || 'zh';
+
+      // Build prompt
+      const prompt = this.i18nService.getDocumentTitlePrompt(
+        language,
+        contentSample,
+      );
+
+      // Call LLM to generate title
+      let generatedTitle: string | undefined;
+      try {
+        generatedTitle = await this.chatService.generateSimpleChat(
+          [{ role: 'user', content: prompt }],
+          kb.userId,
+          kb.tenantId,
+        );
+      } catch (err) {
+        this.logger.warn(
+          `Failed to generate title for document ${kbId} due to LLM configuration issue: ${err.message}`,
+        );
+        return null; // Skip title generation if LLM is not configured for this tenant
+      }
+
+      if (generatedTitle && generatedTitle.trim().length > 0) {
+        // Remove extra quotes and newlines
+        const cleanedTitle = generatedTitle
+          .trim()
+          .replace(/^["']|["']$/g, '')
+          .substring(0, 100);
+        await this.kbRepository.update(kbId, { title: cleanedTitle });
+
+        // Also update ES chunks
+        await this.elasticsearchService
+          .updateTitleByFileId(kbId, cleanedTitle, tenantId)
+          .catch((err) => {
+            this.logger.error(
+              `Failed to update title in Elasticsearch for ${kbId}`,
+              err,
+            );
+          });
+
+        this.logger.log(
+          `Successfully generated title for ${kbId}: ${cleanedTitle}`,
+        );
+        return cleanedTitle;
+      }
+    } catch (error) {
+      this.logger.error(`Failed to generate title for ${kbId}`, error);
+    }
+
+    return null;
+  }
+}
@@ -0,0 +1,255 @@
+import { Injectable, Logger } from '@nestjs/common';
+
+export interface MemoryStats {
+  heapUsed: number; // Used heap memory (MB)
+  heapTotal: number; // Total heap memory (MB)
+  external: number; // External memory (MB)
+  rss: number; // RSS (Resident Set Size) (MB)
+  timestamp: Date;
+}
+
+@Injectable()
+export class MemoryMonitorService {
+  private readonly logger = new Logger(MemoryMonitorService.name);
+  private readonly MAX_MEMORY_MB: number;
+  private readonly BATCH_SIZE: number;
+  private readonly GC_THRESHOLD_MB: number;
+
+  constructor() {
+    // Load config from env vars. Default values for memory optimization
+    this.MAX_MEMORY_MB = parseInt(process.env.MAX_MEMORY_USAGE_MB || '1024'); // 1GB limit
+    this.BATCH_SIZE = parseInt(process.env.CHUNK_BATCH_SIZE || '100'); // 100 chunks per batch
+    this.GC_THRESHOLD_MB = parseInt(process.env.GC_THRESHOLD_MB || '800'); // Trigger GC at 800MB
+
+    this.logger.log(
+      `Memory monitor initialized: limit=${this.MAX_MEMORY_MB}MB, batchSize=${this.BATCH_SIZE}, GCThreshold=${this.GC_THRESHOLD_MB}MB`,
+    );
+  }
+
+  /**
+   * Get current memory usage
+   */
+  getMemoryUsage(): MemoryStats {
+    const usage = process.memoryUsage();
+    return {
+      heapUsed: Math.round(usage.heapUsed / 1024 / 1024),
+      heapTotal: Math.round(usage.heapTotal / 1024 / 1024),
+      external: Math.round((usage.external || 0) / 1024 / 1024),
+      rss: Math.round(usage.rss / 1024 / 1024),
+      timestamp: new Date(),
+    };
+  }
+
+  /**
+   * Check if memory is approaching limit
+   */
+  isMemoryHigh(): boolean {
+    const usage = this.getMemoryUsage();
+    return usage.heapUsed > this.MAX_MEMORY_MB * 0.85; // 85% threshold
+  }
+
+  /**
+   * Wait for memory to become available (with timeout)
+   */
+  async waitForMemoryAvailable(timeoutMs: number = 30000): Promise<void> {
+    const startTime = Date.now();
+
+    while (this.isMemoryHigh()) {
+      if (Date.now() - startTime > timeoutMs) {
+        throw new Error(
+          `Memory wait timeout: current ${this.getMemoryUsage().heapUsed}MB > ${this.MAX_MEMORY_MB * 0.85}MB`,
+        );
+      }
+
+      this.logger.warn(
+        `Memory usage too high. Waiting for release... ${this.getMemoryUsage().heapUsed}/${this.MAX_MEMORY_MB}MB`,
+      );
+
+      // Force garbage collection (if available)
+      if (global.gc) {
+        this.logger.log('Running forced garbage collection...');
+        global.gc();
+      }
+
+      await new Promise((resolve) => setTimeout(resolve, 1000));
+    }
+  }
+
+  /**
+   * Force garbage collection (if available)
+   */
+  forceGC(): void {
+    if (global.gc) {
+      const before = this.getMemoryUsage();
+      global.gc();
+      const after = this.getMemoryUsage();
+      this.logger.log(
+        `GC completed: ${before.heapUsed}MB → ${after.heapUsed}MB (${before.heapUsed - after.heapUsed}MB freed)`,
+      );
+    }
+  }
+
+  /**
+   * Dynamically adjust batch size
+   */
+  getDynamicBatchSize(currentMemoryMB: number): number {
+    const baseBatchSize = this.BATCH_SIZE;
+
+    if (currentMemoryMB > this.GC_THRESHOLD_MB) {
+      // Memory pressure, reduce batch size
+      const reduced = Math.max(10, Math.floor(baseBatchSize * 0.5));
+      this.logger.warn(
+        `Memory pressure (${currentMemoryMB}MB), adjusting batch size: ${baseBatchSize} → ${reduced}`,
+      );
+      return reduced;
+    } else if (currentMemoryMB < this.MAX_MEMORY_MB * 0.4) {
+      // Enough memory, increase batch size
+      const increased = Math.min(200, Math.floor(baseBatchSize * 1.2));
+      if (increased > baseBatchSize) {
+        this.logger.log(
+          `Memory available (${currentMemoryMB}MB), adjusting batch size: ${baseBatchSize} → ${increased}`,
+        );
+      }
+      return increased;
+    }
+
+    return baseBatchSize;
+  }
+
+  /**
+   * Process large data: auto-batching and memory control
+   */
+  async processInBatches<T, R>(
+    items: T[],
+    processor: (batch: T[], batchIndex: number) => Promise<R[]>,
+    options?: {
+      onBatchComplete?: (
+        batchIndex: number,
+        totalBatches: number,
+        results: R[],
+      ) => Promise<void> | void;
+      maxConcurrency?: number;
+    },
+  ): Promise<R[]> {
+    const totalItems = items.length;
+    if (totalItems === 0) return [];
+
+    const startTime = Date.now();
+    this.logger.log(`Starting batch processing: ${totalItems} items`);
+
+    const allResults: R[] = [];
+    let processedCount = 0;
+
+    for (let i = 0; i < totalItems; ) {
+      // Check memory state and wait
+      await this.waitForMemoryAvailable();
+
+      // Dynamically adjust batch size
+      const currentMem = this.getMemoryUsage().heapUsed;
+      const batchSize = this.getDynamicBatchSize(currentMem);
+
+      // Get current batch
+      const batch = items.slice(i, i + batchSize);
+      const batchIndex = Math.floor(i / batchSize) + 1;
+      const totalBatches = Math.ceil(totalItems / batchSize);
+
+      this.logger.log(
+        `Processing batch ${batchIndex}/${totalBatches}: ${batch.length} items (cumulative ${processedCount}/${totalItems})`,
+      );
+
+      // Process batch
+      const batchResults = await processor(batch, batchIndex);
+      allResults.push(...batchResults);
+      processedCount += batch.length;
+
+      // Callback notification
+      if (options?.onBatchComplete) {
+        await options.onBatchComplete(batchIndex, totalBatches, batchResults);
+      }
+
+      // Force GC if memory near threshold
+      if (currentMem > this.GC_THRESHOLD_MB) {
+        this.forceGC();
+      }
+
+      // Clear references to help GC
+      batch.length = 0;
+
+      i += batchSize;
+    }
+
+    const duration = ((Date.now() - startTime) / 1000).toFixed(2);
+    const finalMem = this.getMemoryUsage();
+    this.logger.log(
+      `Batch processing completed: ${totalItems} items, duration ${duration}s, final memory ${finalMem.heapUsed}MB`,
+    );
+
+    return allResults;
+  }
+
+  /**
+   * Estimate memory required for processing
+   */
+  estimateMemoryUsage(
+    itemCount: number,
+    itemSizeBytes: number,
+    vectorDim: number,
+  ): number {
+    // Text content memory
+    const textMemory = itemCount * itemSizeBytes;
+
+    // Vector memory (dimension * 4 bytes per vector)
+    const vectorMemory = itemCount * vectorDim * 4;
+
+    // Object overhead (~100 bytes per object)
+    const overhead = itemCount * 100;
+
+    const totalMB = Math.round(
+      (textMemory + vectorMemory + overhead) / 1024 / 1024,
+    );
+
+    return totalMB;
+  }
+
+  /**
+   * Check if batching should be used
+   */
+  shouldUseBatching(
+    itemCount: number,
+    itemSizeBytes: number,
+    vectorDim: number,
+  ): boolean {
+    const estimatedMB = this.estimateMemoryUsage(
+      itemCount,
+      itemSizeBytes,
+      vectorDim,
+    );
+    const threshold = this.MAX_MEMORY_MB * 0.7; // 70% threshold
+
+    if (estimatedMB > threshold) {
+      this.logger.warn(
+        `Estimated memory ${estimatedMB}MB exceeds threshold ${threshold}MB, using batch processing`,
+      );
+      return true;
+    }
+
+    return false;
+  }
+
+  /**
+   * Get recommended batch size
+   */
+  getRecommendedBatchSize(itemSizeBytes: number, vectorDim: number): number {
+    // Goal: max 200MB memory per batch
+    const targetMemoryMB = 200;
+    const targetMemoryBytes = targetMemoryMB * 1024 * 1024;
+
+    // Memory per item = text + vector + overhead
+    const singleItemMemory = itemSizeBytes + vectorDim * 4 + 100;
+
+    const batchSize = Math.floor(targetMemoryBytes / singleItemMemory);
+
+    // Limit between 10-200
+    return Math.max(10, Math.min(200, batchSize));
+  }
+}
@@ -0,0 +1,106 @@
+import { Injectable } from '@nestjs/common';
+
+export interface TextChunk {
+  content: string;
+  index: number;
+  startPosition: number;
+  endPosition: number;
+}
+
+@Injectable()
+export class TextChunkerService {
+  chunkText(
+    text: string,
+    chunkSize: number = 1000,
+    overlap: number = 200,
+  ): TextChunk[] {
+    if (!text || text.trim().length === 0) {
+      return [];
+    }
+
+    const cleanText = text.trim();
+    const chunkSizeInChars = chunkSize * 4; // 1 token ≈ 4 chars
+    const overlapInChars = overlap * 4;
+
+    // If text length <= chunk size, return entire text as one chunk
+    if (cleanText.length <= chunkSizeInChars) {
+      return [
+        {
+          content: cleanText,
+          index: 0,
+          startPosition: 0,
+          endPosition: cleanText.length,
+        },
+      ];
+    }
+
+    const chunks: TextChunk[] = [];
+    let start = 0;
+    let index = 0;
+
+    while (start < cleanText.length) {
+      let end = Math.min(start + chunkSizeInChars, cleanText.length);
+
+      // Split by sentence boundaries
+      if (end < cleanText.length) {
+        const sentenceEnd = this.findSentenceEnd(
+          cleanText,
+          end,
+          start + chunkSizeInChars * 0.8,
+        );
+        if (sentenceEnd > start) {
+          end = sentenceEnd;
+        }
+      }
+
+      const content = cleanText.slice(start, end).trim();
+      if (content.length > 0) {
+        chunks.push({
+          content,
+          index,
+          startPosition: start,
+          endPosition: end,
+        });
+        index++;
+      }
+
+      // Fix infinite loop: if we reached the end, stop here.
+      if (end >= cleanText.length) {
+        break;
+      }
+
+      // Calculate start position of next chunk
+      const newStart = end - overlapInChars;
+      // Protect against infinite loop if overlap is too large or chunk too small
+      if (newStart <= start) {
+        start = end; // Force advance if overlap would cause stagnation
+      } else {
+        start = newStart;
+      }
+    }
+
+    return chunks;
+  }
+
+  private findSentenceEnd(
+    text: string,
+    preferredEnd: number,
+    minEnd: number,
+  ): number {
+    const sentenceEnders = ['.', '!', '?', '。', '！', '？'];
+
+    for (let i = preferredEnd; i >= minEnd; i--) {
+      if (sentenceEnders.includes(text[i])) {
+        return i + 1;
+      }
+    }
+
+    for (let i = preferredEnd; i >= minEnd; i--) {
+      if (text[i] === ' ' || text[i] === '\n') {
+        return i;
+      }
+    }
+
+    return preferredEnd;
+  }
+}