feat: implement QuestionBank CRUD with pagination and template query

- Add pagination support to findAll (page, limit query params)
- Add findByTemplateId method to service
- Add GET /by-template/:templateId endpoint to controller
- Service already includes CRUD for QuestionBank and QuestionBankItem
This commit is contained in:
Developer
2026-04-23 17:19:11 +08:00
commit 0a9588abb7
492 changed files with 112453 additions and 0 deletions
@@ -0,0 +1,10 @@
import { Module, forwardRef } from '@nestjs/common';
import { ElasticsearchService } from './elasticsearch.service';
import { KnowledgeGroupModule } from '../knowledge-group/knowledge-group.module';
@Module({
imports: [forwardRef(() => KnowledgeGroupModule)],
providers: [ElasticsearchService],
exports: [ElasticsearchService],
})
export class ElasticsearchModule {}
@@ -0,0 +1,653 @@
import { Injectable, Logger, OnModuleInit } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { Client } from '@elastic/elasticsearch';
import { I18nService } from '../i18n/i18n.service';
@Injectable()
export class ElasticsearchService implements OnModuleInit {
public readonly client: Client;
private readonly logger = new Logger(ElasticsearchService.name);
private readonly indexName: string;
constructor(
private configService: ConfigService,
private i18nService: I18nService,
) {
const node = this.configService.get<string>('ELASTICSEARCH_HOST'); // Changed from NODE to HOST
this.indexName = this.configService.get<string>(
'ELASTICSEARCH_INDEX',
'knowledge_base',
);
if (!node) {
throw new Error(this.i18nService.getMessage('elasticsearchHostRequired'));
}
this.client = new Client({
node,
});
}
async onModuleInit() {
try {
const health = await this.client.cluster.health();
this.logger.log(`Elasticsearch cluster health is: ${health.status}`);
// Index is created dynamically on first use based on the model
} catch (error) {
this.logger.error('Failed to connect to Elasticsearch', error);
}
}
async createIndexIfNotExists(vectorDimensions: number) {
const indexExists = await this.client.indices.exists({
index: this.indexName,
});
if (!indexExists) {
this.logger.log(
`Creating index ${this.indexName}. Vector dimensions: ${vectorDimensions}`,
);
await this.createIndex(vectorDimensions);
} else {
// Check existing index vector dimensions
const mapping = await this.client.indices.getMapping({
index: this.indexName,
});
const vectorMapping = mapping[this.indexName]?.mappings?.properties
?.vector as any;
const existingDims = vectorMapping?.dims;
if (existingDims && existingDims !== vectorDimensions) {
this.logger.warn(
`Vector dimensions ${existingDims} of index ${this.indexName} do not match the current model dimensions ${vectorDimensions}.`,
);
this.logger.warn(
`Reason: The embedding model might have been changed to one with different dimensions. The system will automatically recreate the index.`,
);
// Delete existing index and recreate
await this.client.indices.delete({ index: this.indexName });
this.logger.log(`Successfully deleted old index: ${this.indexName}`);
await this.createIndex(vectorDimensions);
this.logger.log(
`Recreated index: ${this.indexName} (Dimensions: ${vectorDimensions})`,
);
} else {
this.logger.log(
`Index ${this.indexName} already exists. Vector dimensions: ${existingDims || 'Unknown'}`,
);
}
}
}
async indexDocument(
documentId: string,
content: string,
vector: number[],
metadata: any,
) {
this.logger.log(
`Indexing document ${documentId}: content=${content.length} chars, vector=${vector?.length} dims`,
);
if (!vector || vector.length === 0) {
this.logger.error(`Invalid vector for document ${documentId}`);
throw new Error(this.i18nService.getMessage('vectorRequired'));
}
const document = {
content,
vector,
fileId: metadata.fileId,
fileName: metadata.originalName,
title: metadata.title || metadata.originalName,
fileMimeType: metadata.mimetype,
chunkIndex: metadata.chunkIndex,
startPosition: metadata.startPosition,
endPosition: metadata.endPosition,
userId: metadata.userId,
tenantId: metadata.tenantId,
createdAt: new Date(),
};
const result = await this.client.index({
index: this.indexName,
id: documentId,
document,
});
this.logger.log(
`Indexed document ${documentId} with ${vector.length}D vector`,
);
return result;
}
async deleteByFileId(fileId: string, userId: string, tenantId?: string) {
const filter: any[] = [{ term: { fileId } }];
if (tenantId) {
filter.push({ term: { tenantId } });
} else {
filter.push({ term: { userId } });
}
await this.client.deleteByQuery({
index: this.indexName,
query: {
bool: { filter },
},
});
}
async updateTitleByFileId(fileId: string, title: string, tenantId?: string) {
const filter: any[] = [{ term: { fileId } }];
if (tenantId) {
filter.push({ term: { tenantId } });
}
await this.client.updateByQuery({
index: this.indexName,
query: {
bool: { filter },
},
script: {
source: 'ctx._source.title = params.title',
params: { title },
},
refresh: true, // Reflect in search immediately
});
}
async deleteByUserId(userId: string) {
// Note: This method should likely only be used by admin functionality
// since it deletes all data for a user
await this.client.deleteByQuery({
index: this.indexName,
query: {
term: { userId },
},
});
}
async searchSimilar(
queryVector: number[],
userId: string,
topK: number = 5,
tenantId?: string,
) {
try {
this.logger.log(
`Vector search: userId=${userId}, vectorDim=${queryVector?.length}, topK=${topK}`,
);
if (!queryVector || queryVector.length === 0) {
this.logger.warn('Empty query vector provided');
return [];
}
const filterClauses: any[] = [];
if (tenantId) {
filterClauses.push({ term: { tenantId } });
} else {
filterClauses.push({ term: { userId } });
}
const response = await this.client.search({
index: this.indexName,
knn: {
field: 'vector',
query_vector: queryVector,
k: topK,
num_candidates: topK * 2,
filter: { bool: { must: filterClauses } },
},
size: topK,
_source: {
excludes: ['vector'],
},
});
const results = response.hits.hits.map((hit: any) => ({
id: hit._id,
score: this.normalizeScore(hit._score), // Normalize score
content: hit._source?.content,
fileId: hit._source?.fileId,
fileName: hit._source?.fileName,
title: hit._source?.title,
chunkIndex: hit._source?.chunkIndex,
startPosition: hit._source?.startPosition,
endPosition: hit._source?.endPosition,
}));
this.logger.log(
`Vector search completed: found ${results.length} results`,
);
return results;
} catch (error) {
this.logger.error('Vector search failed:', error);
return [];
}
}
async searchFullText(
query: string,
userId: string,
topK: number = 5,
tenantId?: string,
) {
try {
this.logger.log(
`Full-text search: userId=${userId}, query="${query}", topK=${topK}`,
);
if (!query || query.trim().length === 0) {
this.logger.warn('Empty query provided for full-text search');
return [];
}
const filterClauses: any[] = [];
if (tenantId) {
filterClauses.push({ term: { tenantId } });
} else {
filterClauses.push({ term: { userId } });
}
const response = await this.client.search({
index: this.indexName,
query: {
bool: {
must: {
match: {
content: {
query: query,
fuzziness: 'AUTO',
},
},
},
filter: filterClauses,
},
},
size: topK,
_source: {
excludes: ['vector'],
},
});
const results = response.hits.hits.map((hit: any) => ({
id: hit._id,
score: this.normalizeScore(hit._score), // Normalize score
content: hit._source?.content,
fileId: hit._source?.fileId,
fileName: hit._source?.fileName,
title: hit._source?.title,
chunkIndex: hit._source?.chunkIndex,
startPosition: hit._source?.startPosition,
endPosition: hit._source?.endPosition,
}));
this.logger.log(
`Full-text search completed: found ${results.length} results`,
);
return results;
} catch (error) {
this.logger.error('Full-text search failed:', error);
return [];
}
}
async hybridSearch(
queryVector: number[],
query: string,
userId: string,
topK: number = 5,
vectorWeight: number = 0.7,
selectedGroups?: string[], // Keep for backward compatibility(未使用)
explicitFileIds?: string[], // Explicitly specified file ID list
tenantId?: string,
) {
// selectedGroups is deprecated。呼び出し側で fileIds に変換して explicitFileIds を使用please
const fileIds = explicitFileIds;
if (fileIds && fileIds.length === 0) {
this.logger.log('No search target files (count=0), skipping search');
return [];
}
if (fileIds) {
this.logger.log(`Final search target scope: ${fileIds.length} files`);
}
const [vectorResults, textResults] = await Promise.all([
this.searchSimilarWithFileFilter(
queryVector,
userId,
topK,
fileIds,
tenantId,
),
this.searchFullTextWithFileFilter(query, userId, topK, fileIds, tenantId),
]);
// Merge results and remove duplicates
const combinedResults = new Map();
// Add vector search results
vectorResults.forEach((result) => {
combinedResults.set(result.id, {
...result,
vectorScore: result.score,
textScore: 0,
combinedScore: result.score * vectorWeight,
});
});
// Add full-text search results
textResults.forEach((result) => {
if (combinedResults.has(result.id)) {
const existing = combinedResults.get(result.id);
existing.textScore = result.score;
existing.combinedScore =
existing.vectorScore * vectorWeight +
result.score * (1 - vectorWeight);
} else {
combinedResults.set(result.id, {
...result,
vectorScore: 0,
textScore: result.score,
combinedScore: result.score * (1 - vectorWeight),
});
}
});
// 正規化forにすべての組み合わせスコアを取得
const allScores = Array.from(combinedResults.values()).map(
(r) => r.combinedScore,
);
const maxScore = Math.max(...allScores, 1);
const minScore = Math.min(...allScores);
return Array.from(combinedResults.values())
.sort((a, b) => b.combinedScore - a.combinedScore)
.slice(0, topK)
.map((result) => {
let finalScore = result.combinedScore;
finalScore = Math.max(0, Math.min(1.0, finalScore));
return {
id: result.id,
score: finalScore,
content: result.content,
fileId: result.fileId,
fileName: result.fileName,
title: result.title,
chunkIndex: result.chunkIndex,
startPosition: result.startPosition,
endPosition: result.endPosition,
};
});
}
private async createIndex(vectorDimensions: number) {
const mappings: any = {
properties: {
content: {
type: 'text',
analyzer: 'standard',
},
vector: {
type: 'dense_vector',
dims: vectorDimensions,
index: true,
similarity: 'cosine',
},
fileId: { type: 'keyword' },
fileName: { type: 'keyword' },
title: { type: 'text' },
fileMimeType: { type: 'keyword' },
chunkIndex: { type: 'integer' },
startPosition: { type: 'integer' },
endPosition: { type: 'integer' },
userId: { type: 'keyword' },
// テナント情報(マルチテナント分離用)
tenantId: { type: 'keyword' },
// タイムスタンプ
createdAt: { type: 'date' },
},
};
await this.client.indices.create({
index: this.indexName,
mappings,
});
this.logger.log(
`Successfully created index ${this.indexName}. Vector dimensions: ${vectorDimensions}`,
);
}
private normalizeScore(rawScore: number): number {
if (!rawScore || rawScore <= 0) return 0;
return Math.min(1.0, rawScore);
}
private async searchSimilarWithFileFilter(
queryVector: number[],
userId: string,
topK: number = 5,
fileIds?: string[],
tenantId?: string,
) {
try {
this.logger.log(
`Vector search with filter: userId=${userId}, tenantId=${tenantId}, vectorDim=${queryVector?.length}, topK=${topK}, fileIds=${fileIds?.length || 'all'}`,
);
if (!queryVector || queryVector.length === 0) {
this.logger.warn('Empty query vector provided');
return [];
}
if (fileIds && fileIds.length === 0) {
this.logger.log(
'Filter resulted in 0 files, returning empty results for vector search',
);
return [];
}
const filterClauses: any[] = [];
if (fileIds && fileIds.length > 0) {
filterClauses.push({ terms: { fileId: fileIds } });
}
// Tenant isolation: when tenantId is provided, enforce it
if (tenantId) {
filterClauses.push({ term: { tenantId } });
} else {
// Legacy: fall back to userId-based filter
filterClauses.push({ term: { userId } });
}
const filter =
filterClauses.length > 0
? { bool: { must: filterClauses } }
: undefined;
const queryBody: any = {
index: this.indexName,
knn: {
field: 'vector',
query_vector: queryVector,
k: topK,
num_candidates: topK * 2,
},
size: topK,
_source: {
excludes: ['vector'],
},
};
if (filter && Object.keys(filter).length > 0) {
queryBody.knn.filter = filter;
}
const response = await this.client.search(queryBody);
const results = response.hits.hits.map((hit: any) => ({
id: hit._id,
score: this.normalizeScore(hit._score),
content: hit._source?.content,
fileId: hit._source?.fileId,
fileName: hit._source?.fileName,
title: hit._source?.title,
chunkIndex: hit._source?.chunkIndex,
startPosition: hit._source?.startPosition,
endPosition: hit._source?.endPosition,
}));
this.logger.log(
`Vector search completed: found ${results.length} results`,
);
return results;
} catch (error) {
this.logger.error('Vector search failed:', error);
return [];
}
}
/**
* Performs full-text search with file filtering.
*/
public async searchFullTextWithFileFilter(
query: string,
userId: string,
topK: number = 5,
fileIds?: string[],
tenantId?: string,
) {
try {
this.logger.log(
`Full-text search with filter: userId=${userId}, query="${query}", topK=${topK}, fileIds=${fileIds?.length || 'all'}`,
);
if (!query || query.trim().length === 0) {
this.logger.warn('Empty query provided for full-text search');
return [];
}
if (fileIds && fileIds.length === 0) {
this.logger.log(
'Filter resulted in 0 files, returning empty results for full-text search',
);
return [];
}
const mustClause: any[] = [
{
match: {
content: {
query: query,
fuzziness: 'AUTO',
},
},
},
];
const filter: any[] = [];
if (fileIds && fileIds.length > 0) {
filter.push({ terms: { fileId: fileIds } });
}
if (tenantId) {
filter.push({ term: { tenantId } });
} else {
filter.push({ term: { userId } });
}
const queryBody: any = {
index: this.indexName,
query: {
bool: {
must: mustClause,
filter: filter,
},
},
size: topK,
_source: {
excludes: ['vector'],
},
};
const response = await this.client.search(queryBody);
const results = response.hits.hits.map((hit: any) => ({
id: hit._id,
score: this.normalizeScore(hit._score),
content: hit._source?.content,
fileId: hit._source?.fileId,
fileName: hit._source?.fileName,
title: hit._source?.title,
chunkIndex: hit._source?.chunkIndex,
startPosition: hit._source?.startPosition,
endPosition: hit._source?.endPosition,
}));
this.logger.log(
`Full-text search completed: found ${results.length} results`,
);
return results;
} catch (error) {
this.logger.error('Full-text search failed:', error);
return [];
}
}
/**
* 指定されたファイルのすべてのチャンクを取得
*/
async getFileChunks(fileId: string, userId: string, tenantId?: string) {
try {
this.logger.log(`Getting chunks for file ${fileId}`);
const filter: any[] = [{ term: { fileId } }];
if (tenantId) {
filter.push({ term: { tenantId } });
} else {
filter.push({ term: { userId } });
}
const response = await this.client.search({
index: this.indexName,
query: {
bool: { filter },
},
sort: [{ chunkIndex: 'asc' }],
size: 10000,
_source: {
excludes: ['vector'],
},
});
const chunks = response.hits.hits.map((hit: any) => ({
id: hit._id,
chunkIndex: hit._source.chunkIndex,
content: hit._source.content,
startPosition: hit._source.startPosition,
endPosition: hit._source.endPosition,
fileName: hit._source.fileName,
}));
this.logger.log(`Found ${chunks.length} chunks for file ${fileId}`);
return chunks;
} catch (error) {
this.logger.error(`Failed to get chunks for file ${fileId}`, error);
return [];
}
}
}