feat: implement QuestionBank CRUD with pagination and template query
- Add pagination support to findAll (page, limit query params) - Add findByTemplateId method to service - Add GET /by-template/:templateId endpoint to controller - Service already includes CRUD for QuestionBank and QuestionBankItem
This commit is contained in:
@@ -0,0 +1,10 @@
|
||||
import { Module, forwardRef } from '@nestjs/common';
|
||||
import { ElasticsearchService } from './elasticsearch.service';
|
||||
import { KnowledgeGroupModule } from '../knowledge-group/knowledge-group.module';
|
||||
|
||||
@Module({
|
||||
imports: [forwardRef(() => KnowledgeGroupModule)],
|
||||
providers: [ElasticsearchService],
|
||||
exports: [ElasticsearchService],
|
||||
})
|
||||
export class ElasticsearchModule {}
|
||||
@@ -0,0 +1,653 @@
|
||||
import { Injectable, Logger, OnModuleInit } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { Client } from '@elastic/elasticsearch';
|
||||
import { I18nService } from '../i18n/i18n.service';
|
||||
|
||||
@Injectable()
|
||||
export class ElasticsearchService implements OnModuleInit {
|
||||
public readonly client: Client;
|
||||
private readonly logger = new Logger(ElasticsearchService.name);
|
||||
private readonly indexName: string;
|
||||
|
||||
constructor(
|
||||
private configService: ConfigService,
|
||||
private i18nService: I18nService,
|
||||
) {
|
||||
const node = this.configService.get<string>('ELASTICSEARCH_HOST'); // Changed from NODE to HOST
|
||||
this.indexName = this.configService.get<string>(
|
||||
'ELASTICSEARCH_INDEX',
|
||||
'knowledge_base',
|
||||
);
|
||||
|
||||
if (!node) {
|
||||
throw new Error(this.i18nService.getMessage('elasticsearchHostRequired'));
|
||||
}
|
||||
|
||||
this.client = new Client({
|
||||
node,
|
||||
});
|
||||
}
|
||||
|
||||
async onModuleInit() {
|
||||
try {
|
||||
const health = await this.client.cluster.health();
|
||||
this.logger.log(`Elasticsearch cluster health is: ${health.status}`);
|
||||
// Index is created dynamically on first use based on the model
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to connect to Elasticsearch', error);
|
||||
}
|
||||
}
|
||||
|
||||
async createIndexIfNotExists(vectorDimensions: number) {
|
||||
const indexExists = await this.client.indices.exists({
|
||||
index: this.indexName,
|
||||
});
|
||||
|
||||
if (!indexExists) {
|
||||
this.logger.log(
|
||||
`Creating index ${this.indexName}. Vector dimensions: ${vectorDimensions}`,
|
||||
);
|
||||
await this.createIndex(vectorDimensions);
|
||||
} else {
|
||||
// Check existing index vector dimensions
|
||||
const mapping = await this.client.indices.getMapping({
|
||||
index: this.indexName,
|
||||
});
|
||||
|
||||
const vectorMapping = mapping[this.indexName]?.mappings?.properties
|
||||
?.vector as any;
|
||||
const existingDims = vectorMapping?.dims;
|
||||
|
||||
if (existingDims && existingDims !== vectorDimensions) {
|
||||
this.logger.warn(
|
||||
`Vector dimensions ${existingDims} of index ${this.indexName} do not match the current model dimensions ${vectorDimensions}.`,
|
||||
);
|
||||
this.logger.warn(
|
||||
`Reason: The embedding model might have been changed to one with different dimensions. The system will automatically recreate the index.`,
|
||||
);
|
||||
|
||||
// Delete existing index and recreate
|
||||
await this.client.indices.delete({ index: this.indexName });
|
||||
this.logger.log(`Successfully deleted old index: ${this.indexName}`);
|
||||
|
||||
await this.createIndex(vectorDimensions);
|
||||
this.logger.log(
|
||||
`Recreated index: ${this.indexName} (Dimensions: ${vectorDimensions})`,
|
||||
);
|
||||
} else {
|
||||
this.logger.log(
|
||||
`Index ${this.indexName} already exists. Vector dimensions: ${existingDims || 'Unknown'}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async indexDocument(
|
||||
documentId: string,
|
||||
content: string,
|
||||
vector: number[],
|
||||
metadata: any,
|
||||
) {
|
||||
this.logger.log(
|
||||
`Indexing document ${documentId}: content=${content.length} chars, vector=${vector?.length} dims`,
|
||||
);
|
||||
|
||||
if (!vector || vector.length === 0) {
|
||||
this.logger.error(`Invalid vector for document ${documentId}`);
|
||||
throw new Error(this.i18nService.getMessage('vectorRequired'));
|
||||
}
|
||||
|
||||
const document = {
|
||||
content,
|
||||
vector,
|
||||
fileId: metadata.fileId,
|
||||
fileName: metadata.originalName,
|
||||
title: metadata.title || metadata.originalName,
|
||||
fileMimeType: metadata.mimetype,
|
||||
chunkIndex: metadata.chunkIndex,
|
||||
startPosition: metadata.startPosition,
|
||||
endPosition: metadata.endPosition,
|
||||
userId: metadata.userId,
|
||||
tenantId: metadata.tenantId,
|
||||
createdAt: new Date(),
|
||||
};
|
||||
|
||||
const result = await this.client.index({
|
||||
index: this.indexName,
|
||||
id: documentId,
|
||||
document,
|
||||
});
|
||||
|
||||
this.logger.log(
|
||||
`Indexed document ${documentId} with ${vector.length}D vector`,
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
async deleteByFileId(fileId: string, userId: string, tenantId?: string) {
|
||||
const filter: any[] = [{ term: { fileId } }];
|
||||
if (tenantId) {
|
||||
filter.push({ term: { tenantId } });
|
||||
} else {
|
||||
filter.push({ term: { userId } });
|
||||
}
|
||||
|
||||
await this.client.deleteByQuery({
|
||||
index: this.indexName,
|
||||
query: {
|
||||
bool: { filter },
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async updateTitleByFileId(fileId: string, title: string, tenantId?: string) {
|
||||
const filter: any[] = [{ term: { fileId } }];
|
||||
if (tenantId) {
|
||||
filter.push({ term: { tenantId } });
|
||||
}
|
||||
|
||||
await this.client.updateByQuery({
|
||||
index: this.indexName,
|
||||
query: {
|
||||
bool: { filter },
|
||||
},
|
||||
script: {
|
||||
source: 'ctx._source.title = params.title',
|
||||
params: { title },
|
||||
},
|
||||
refresh: true, // Reflect in search immediately
|
||||
});
|
||||
}
|
||||
|
||||
async deleteByUserId(userId: string) {
|
||||
// Note: This method should likely only be used by admin functionality
|
||||
// since it deletes all data for a user
|
||||
await this.client.deleteByQuery({
|
||||
index: this.indexName,
|
||||
query: {
|
||||
term: { userId },
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async searchSimilar(
|
||||
queryVector: number[],
|
||||
userId: string,
|
||||
topK: number = 5,
|
||||
tenantId?: string,
|
||||
) {
|
||||
try {
|
||||
this.logger.log(
|
||||
`Vector search: userId=${userId}, vectorDim=${queryVector?.length}, topK=${topK}`,
|
||||
);
|
||||
|
||||
if (!queryVector || queryVector.length === 0) {
|
||||
this.logger.warn('Empty query vector provided');
|
||||
return [];
|
||||
}
|
||||
|
||||
const filterClauses: any[] = [];
|
||||
if (tenantId) {
|
||||
filterClauses.push({ term: { tenantId } });
|
||||
} else {
|
||||
filterClauses.push({ term: { userId } });
|
||||
}
|
||||
|
||||
const response = await this.client.search({
|
||||
index: this.indexName,
|
||||
knn: {
|
||||
field: 'vector',
|
||||
query_vector: queryVector,
|
||||
k: topK,
|
||||
num_candidates: topK * 2,
|
||||
filter: { bool: { must: filterClauses } },
|
||||
},
|
||||
size: topK,
|
||||
_source: {
|
||||
excludes: ['vector'],
|
||||
},
|
||||
});
|
||||
|
||||
const results = response.hits.hits.map((hit: any) => ({
|
||||
id: hit._id,
|
||||
score: this.normalizeScore(hit._score), // Normalize score
|
||||
content: hit._source?.content,
|
||||
fileId: hit._source?.fileId,
|
||||
fileName: hit._source?.fileName,
|
||||
title: hit._source?.title,
|
||||
chunkIndex: hit._source?.chunkIndex,
|
||||
startPosition: hit._source?.startPosition,
|
||||
endPosition: hit._source?.endPosition,
|
||||
}));
|
||||
|
||||
this.logger.log(
|
||||
`Vector search completed: found ${results.length} results`,
|
||||
);
|
||||
return results;
|
||||
} catch (error) {
|
||||
this.logger.error('Vector search failed:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async searchFullText(
|
||||
query: string,
|
||||
userId: string,
|
||||
topK: number = 5,
|
||||
tenantId?: string,
|
||||
) {
|
||||
try {
|
||||
this.logger.log(
|
||||
`Full-text search: userId=${userId}, query="${query}", topK=${topK}`,
|
||||
);
|
||||
|
||||
if (!query || query.trim().length === 0) {
|
||||
this.logger.warn('Empty query provided for full-text search');
|
||||
return [];
|
||||
}
|
||||
|
||||
const filterClauses: any[] = [];
|
||||
if (tenantId) {
|
||||
filterClauses.push({ term: { tenantId } });
|
||||
} else {
|
||||
filterClauses.push({ term: { userId } });
|
||||
}
|
||||
|
||||
const response = await this.client.search({
|
||||
index: this.indexName,
|
||||
query: {
|
||||
bool: {
|
||||
must: {
|
||||
match: {
|
||||
content: {
|
||||
query: query,
|
||||
fuzziness: 'AUTO',
|
||||
},
|
||||
},
|
||||
},
|
||||
filter: filterClauses,
|
||||
},
|
||||
},
|
||||
size: topK,
|
||||
_source: {
|
||||
excludes: ['vector'],
|
||||
},
|
||||
});
|
||||
|
||||
const results = response.hits.hits.map((hit: any) => ({
|
||||
id: hit._id,
|
||||
score: this.normalizeScore(hit._score), // Normalize score
|
||||
content: hit._source?.content,
|
||||
fileId: hit._source?.fileId,
|
||||
fileName: hit._source?.fileName,
|
||||
title: hit._source?.title,
|
||||
chunkIndex: hit._source?.chunkIndex,
|
||||
startPosition: hit._source?.startPosition,
|
||||
endPosition: hit._source?.endPosition,
|
||||
}));
|
||||
|
||||
this.logger.log(
|
||||
`Full-text search completed: found ${results.length} results`,
|
||||
);
|
||||
return results;
|
||||
} catch (error) {
|
||||
this.logger.error('Full-text search failed:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async hybridSearch(
|
||||
queryVector: number[],
|
||||
query: string,
|
||||
userId: string,
|
||||
topK: number = 5,
|
||||
vectorWeight: number = 0.7,
|
||||
selectedGroups?: string[], // Keep for backward compatibility(未使用)
|
||||
explicitFileIds?: string[], // Explicitly specified file ID list
|
||||
tenantId?: string,
|
||||
) {
|
||||
// selectedGroups is deprecated。呼び出し側で fileIds に変換して explicitFileIds を使用please
|
||||
const fileIds = explicitFileIds;
|
||||
|
||||
if (fileIds && fileIds.length === 0) {
|
||||
this.logger.log('No search target files (count=0), skipping search');
|
||||
return [];
|
||||
}
|
||||
|
||||
if (fileIds) {
|
||||
this.logger.log(`Final search target scope: ${fileIds.length} files`);
|
||||
}
|
||||
|
||||
const [vectorResults, textResults] = await Promise.all([
|
||||
this.searchSimilarWithFileFilter(
|
||||
queryVector,
|
||||
userId,
|
||||
topK,
|
||||
fileIds,
|
||||
tenantId,
|
||||
),
|
||||
this.searchFullTextWithFileFilter(query, userId, topK, fileIds, tenantId),
|
||||
]);
|
||||
|
||||
// Merge results and remove duplicates
|
||||
const combinedResults = new Map();
|
||||
|
||||
// Add vector search results
|
||||
vectorResults.forEach((result) => {
|
||||
combinedResults.set(result.id, {
|
||||
...result,
|
||||
vectorScore: result.score,
|
||||
textScore: 0,
|
||||
combinedScore: result.score * vectorWeight,
|
||||
});
|
||||
});
|
||||
|
||||
// Add full-text search results
|
||||
textResults.forEach((result) => {
|
||||
if (combinedResults.has(result.id)) {
|
||||
const existing = combinedResults.get(result.id);
|
||||
existing.textScore = result.score;
|
||||
existing.combinedScore =
|
||||
existing.vectorScore * vectorWeight +
|
||||
result.score * (1 - vectorWeight);
|
||||
} else {
|
||||
combinedResults.set(result.id, {
|
||||
...result,
|
||||
vectorScore: 0,
|
||||
textScore: result.score,
|
||||
combinedScore: result.score * (1 - vectorWeight),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 正規化forにすべての組み合わせスコアを取得
|
||||
const allScores = Array.from(combinedResults.values()).map(
|
||||
(r) => r.combinedScore,
|
||||
);
|
||||
const maxScore = Math.max(...allScores, 1);
|
||||
const minScore = Math.min(...allScores);
|
||||
|
||||
return Array.from(combinedResults.values())
|
||||
.sort((a, b) => b.combinedScore - a.combinedScore)
|
||||
.slice(0, topK)
|
||||
.map((result) => {
|
||||
let finalScore = result.combinedScore;
|
||||
|
||||
finalScore = Math.max(0, Math.min(1.0, finalScore));
|
||||
|
||||
return {
|
||||
id: result.id,
|
||||
score: finalScore,
|
||||
content: result.content,
|
||||
fileId: result.fileId,
|
||||
fileName: result.fileName,
|
||||
title: result.title,
|
||||
chunkIndex: result.chunkIndex,
|
||||
startPosition: result.startPosition,
|
||||
endPosition: result.endPosition,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
private async createIndex(vectorDimensions: number) {
|
||||
const mappings: any = {
|
||||
properties: {
|
||||
content: {
|
||||
type: 'text',
|
||||
analyzer: 'standard',
|
||||
},
|
||||
|
||||
vector: {
|
||||
type: 'dense_vector',
|
||||
dims: vectorDimensions,
|
||||
index: true,
|
||||
similarity: 'cosine',
|
||||
},
|
||||
|
||||
fileId: { type: 'keyword' },
|
||||
fileName: { type: 'keyword' },
|
||||
title: { type: 'text' },
|
||||
fileMimeType: { type: 'keyword' },
|
||||
|
||||
chunkIndex: { type: 'integer' },
|
||||
startPosition: { type: 'integer' },
|
||||
endPosition: { type: 'integer' },
|
||||
|
||||
userId: { type: 'keyword' },
|
||||
|
||||
// テナント情報(マルチテナント分離用)
|
||||
tenantId: { type: 'keyword' },
|
||||
|
||||
// タイムスタンプ
|
||||
createdAt: { type: 'date' },
|
||||
},
|
||||
};
|
||||
|
||||
await this.client.indices.create({
|
||||
index: this.indexName,
|
||||
mappings,
|
||||
});
|
||||
|
||||
this.logger.log(
|
||||
`Successfully created index ${this.indexName}. Vector dimensions: ${vectorDimensions}`,
|
||||
);
|
||||
}
|
||||
|
||||
private normalizeScore(rawScore: number): number {
|
||||
if (!rawScore || rawScore <= 0) return 0;
|
||||
|
||||
return Math.min(1.0, rawScore);
|
||||
}
|
||||
|
||||
private async searchSimilarWithFileFilter(
|
||||
queryVector: number[],
|
||||
userId: string,
|
||||
topK: number = 5,
|
||||
fileIds?: string[],
|
||||
tenantId?: string,
|
||||
) {
|
||||
try {
|
||||
this.logger.log(
|
||||
`Vector search with filter: userId=${userId}, tenantId=${tenantId}, vectorDim=${queryVector?.length}, topK=${topK}, fileIds=${fileIds?.length || 'all'}`,
|
||||
);
|
||||
|
||||
if (!queryVector || queryVector.length === 0) {
|
||||
this.logger.warn('Empty query vector provided');
|
||||
return [];
|
||||
}
|
||||
|
||||
if (fileIds && fileIds.length === 0) {
|
||||
this.logger.log(
|
||||
'Filter resulted in 0 files, returning empty results for vector search',
|
||||
);
|
||||
return [];
|
||||
}
|
||||
|
||||
const filterClauses: any[] = [];
|
||||
if (fileIds && fileIds.length > 0) {
|
||||
filterClauses.push({ terms: { fileId: fileIds } });
|
||||
}
|
||||
// Tenant isolation: when tenantId is provided, enforce it
|
||||
if (tenantId) {
|
||||
filterClauses.push({ term: { tenantId } });
|
||||
} else {
|
||||
// Legacy: fall back to userId-based filter
|
||||
filterClauses.push({ term: { userId } });
|
||||
}
|
||||
|
||||
const filter =
|
||||
filterClauses.length > 0
|
||||
? { bool: { must: filterClauses } }
|
||||
: undefined;
|
||||
|
||||
const queryBody: any = {
|
||||
index: this.indexName,
|
||||
knn: {
|
||||
field: 'vector',
|
||||
query_vector: queryVector,
|
||||
k: topK,
|
||||
num_candidates: topK * 2,
|
||||
},
|
||||
size: topK,
|
||||
_source: {
|
||||
excludes: ['vector'],
|
||||
},
|
||||
};
|
||||
|
||||
if (filter && Object.keys(filter).length > 0) {
|
||||
queryBody.knn.filter = filter;
|
||||
}
|
||||
|
||||
const response = await this.client.search(queryBody);
|
||||
|
||||
const results = response.hits.hits.map((hit: any) => ({
|
||||
id: hit._id,
|
||||
score: this.normalizeScore(hit._score),
|
||||
content: hit._source?.content,
|
||||
fileId: hit._source?.fileId,
|
||||
fileName: hit._source?.fileName,
|
||||
title: hit._source?.title,
|
||||
chunkIndex: hit._source?.chunkIndex,
|
||||
startPosition: hit._source?.startPosition,
|
||||
endPosition: hit._source?.endPosition,
|
||||
}));
|
||||
|
||||
this.logger.log(
|
||||
`Vector search completed: found ${results.length} results`,
|
||||
);
|
||||
return results;
|
||||
} catch (error) {
|
||||
this.logger.error('Vector search failed:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs full-text search with file filtering.
|
||||
*/
|
||||
public async searchFullTextWithFileFilter(
|
||||
query: string,
|
||||
userId: string,
|
||||
topK: number = 5,
|
||||
fileIds?: string[],
|
||||
tenantId?: string,
|
||||
) {
|
||||
try {
|
||||
this.logger.log(
|
||||
`Full-text search with filter: userId=${userId}, query="${query}", topK=${topK}, fileIds=${fileIds?.length || 'all'}`,
|
||||
);
|
||||
|
||||
if (!query || query.trim().length === 0) {
|
||||
this.logger.warn('Empty query provided for full-text search');
|
||||
return [];
|
||||
}
|
||||
|
||||
if (fileIds && fileIds.length === 0) {
|
||||
this.logger.log(
|
||||
'Filter resulted in 0 files, returning empty results for full-text search',
|
||||
);
|
||||
return [];
|
||||
}
|
||||
|
||||
const mustClause: any[] = [
|
||||
{
|
||||
match: {
|
||||
content: {
|
||||
query: query,
|
||||
fuzziness: 'AUTO',
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const filter: any[] = [];
|
||||
if (fileIds && fileIds.length > 0) {
|
||||
filter.push({ terms: { fileId: fileIds } });
|
||||
}
|
||||
if (tenantId) {
|
||||
filter.push({ term: { tenantId } });
|
||||
} else {
|
||||
filter.push({ term: { userId } });
|
||||
}
|
||||
|
||||
const queryBody: any = {
|
||||
index: this.indexName,
|
||||
query: {
|
||||
bool: {
|
||||
must: mustClause,
|
||||
filter: filter,
|
||||
},
|
||||
},
|
||||
size: topK,
|
||||
_source: {
|
||||
excludes: ['vector'],
|
||||
},
|
||||
};
|
||||
|
||||
const response = await this.client.search(queryBody);
|
||||
|
||||
const results = response.hits.hits.map((hit: any) => ({
|
||||
id: hit._id,
|
||||
score: this.normalizeScore(hit._score),
|
||||
content: hit._source?.content,
|
||||
fileId: hit._source?.fileId,
|
||||
fileName: hit._source?.fileName,
|
||||
title: hit._source?.title,
|
||||
chunkIndex: hit._source?.chunkIndex,
|
||||
startPosition: hit._source?.startPosition,
|
||||
endPosition: hit._source?.endPosition,
|
||||
}));
|
||||
|
||||
this.logger.log(
|
||||
`Full-text search completed: found ${results.length} results`,
|
||||
);
|
||||
return results;
|
||||
} catch (error) {
|
||||
this.logger.error('Full-text search failed:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 指定されたファイルのすべてのチャンクを取得
|
||||
*/
|
||||
async getFileChunks(fileId: string, userId: string, tenantId?: string) {
|
||||
try {
|
||||
this.logger.log(`Getting chunks for file ${fileId}`);
|
||||
|
||||
const filter: any[] = [{ term: { fileId } }];
|
||||
if (tenantId) {
|
||||
filter.push({ term: { tenantId } });
|
||||
} else {
|
||||
filter.push({ term: { userId } });
|
||||
}
|
||||
|
||||
const response = await this.client.search({
|
||||
index: this.indexName,
|
||||
query: {
|
||||
bool: { filter },
|
||||
},
|
||||
sort: [{ chunkIndex: 'asc' }],
|
||||
size: 10000,
|
||||
_source: {
|
||||
excludes: ['vector'],
|
||||
},
|
||||
});
|
||||
|
||||
const chunks = response.hits.hits.map((hit: any) => ({
|
||||
id: hit._id,
|
||||
chunkIndex: hit._source.chunkIndex,
|
||||
content: hit._source.content,
|
||||
startPosition: hit._source.startPosition,
|
||||
endPosition: hit._source.endPosition,
|
||||
fileName: hit._source.fileName,
|
||||
}));
|
||||
|
||||
this.logger.log(`Found ${chunks.length} chunks for file ${fileId}`);
|
||||
return chunks;
|
||||
} catch (error) {
|
||||
this.logger.error(`Failed to get chunks for file ${fileId}`, error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user