forked from hangshuo652/aurak
0a9588abb7
- Add pagination support to findAll (page, limit query params) - Add findByTemplateId method to service - Add GET /by-template/:templateId endpoint to controller - Service already includes CRUD for QuestionBank and QuestionBankItem
182 lines
5.0 KiB
TypeScript
182 lines
5.0 KiB
TypeScript
import { Injectable, Logger } from '@nestjs/common';
|
|
import { ConfigService } from '@nestjs/config';
|
|
import * as fs from 'fs/promises';
|
|
import * as path from 'path';
|
|
import { PDFDocument } from 'pdf-lib';
|
|
import { exec } from 'child_process';
|
|
import { promisify } from 'util';
|
|
import {
|
|
Pdf2ImageOptions,
|
|
ImageInfo,
|
|
ConversionResult,
|
|
} from './pdf2image.interface';
|
|
import { I18nService } from '../i18n/i18n.service';
|
|
|
|
const execAsync = promisify(exec);
|
|
|
|
@Injectable()
|
|
export class Pdf2ImageService {
|
|
private readonly logger = new Logger(Pdf2ImageService.name);
|
|
private tempDir: string;
|
|
|
|
constructor(
|
|
private configService: ConfigService,
|
|
private i18nService: I18nService,
|
|
) {
|
|
this.tempDir = this.configService.get<string>('TEMP_DIR', './temp');
|
|
}
|
|
|
|
/**
|
|
* Convert PDF to list of images
|
|
* Uses ImageMagick's convert command
|
|
*/
|
|
async convertToImages(
|
|
pdfPath: string,
|
|
options: Pdf2ImageOptions = {},
|
|
): Promise<ConversionResult> {
|
|
const {
|
|
density = 300,
|
|
quality = 85,
|
|
format = 'jpeg',
|
|
outDir = this.tempDir,
|
|
} = options;
|
|
|
|
// Validate PDF file
|
|
try {
|
|
await fs.access(pdfPath);
|
|
} catch {
|
|
throw new Error(`PDF file not found: ${pdfPath}`);
|
|
}
|
|
|
|
// Create output directory
|
|
const timestamp = Date.now();
|
|
const outputDir = path.join(outDir, `pdf2img_${timestamp}`);
|
|
await fs.mkdir(outputDir, { recursive: true });
|
|
|
|
this.logger.log(`Converting PDF to images: ${pdfPath}`);
|
|
this.logger.log(`Output directory: ${outputDir}`);
|
|
|
|
try {
|
|
// Get total page count using pdf-lib instead of pdfinfo
|
|
const pdfBytes = await fs.readFile(pdfPath);
|
|
const pdfDoc = await PDFDocument.load(pdfBytes, {
|
|
ignoreEncryption: true,
|
|
});
|
|
const totalPages = pdfDoc.getPageCount();
|
|
|
|
if (totalPages === 0) {
|
|
throw new Error(this.i18nService.getMessage('pdfPageCountError'));
|
|
}
|
|
|
|
this.logger.log(
|
|
`Starting PDF conversion: ${path.basename(pdfPath)} (${totalPages} pages)`,
|
|
);
|
|
this.logger.log(`Output directory: ${outputDir}`);
|
|
this.logger.log(
|
|
`Conversion parameters: density=${density}dpi, quality=${quality}%, format=${format}`,
|
|
);
|
|
|
|
// Convert using Python script
|
|
const zoom = (density / 72).toFixed(2);
|
|
const pythonScript = path.join(process.cwd(), 'pdf_to_images.py');
|
|
const cmd = `python "${pythonScript}" "${pdfPath}" "${outputDir}" ${zoom} ${quality}`;
|
|
|
|
this.logger.log(`Executing conversion command: ${cmd}`);
|
|
const { stdout } = await execAsync(cmd);
|
|
const result = JSON.parse(stdout);
|
|
|
|
if (!result.success) {
|
|
throw new Error(`Python conversion failed: ${result.error}`);
|
|
}
|
|
|
|
const images: ImageInfo[] = result.images;
|
|
const successCount = result.images.length;
|
|
const failedCount = totalPages - successCount;
|
|
|
|
this.logger.log(
|
|
`🎉 PDF conversion completed! ✅ Success: ${successCount} pages, ❌ Failed: ${failedCount} pages, 📊 Total pages: ${totalPages}`,
|
|
);
|
|
|
|
return {
|
|
images,
|
|
totalPages,
|
|
successCount,
|
|
failedCount,
|
|
};
|
|
} catch (error) {
|
|
// Cleanup temp directory
|
|
await this.cleanupDirectory(outputDir);
|
|
throw new Error(`PDF to image conversion failed: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Batch convert multiple PDFs
|
|
*/
|
|
async batchConvert(
|
|
pdfPaths: string[],
|
|
options?: Pdf2ImageOptions,
|
|
): Promise<ConversionResult[]> {
|
|
const results: ConversionResult[] = [];
|
|
for (const pdfPath of pdfPaths) {
|
|
try {
|
|
const result = await this.convertToImages(pdfPath, options);
|
|
results.push(result);
|
|
} catch (error) {
|
|
this.logger.error(`Failed to convert ${pdfPath}: ${error.message}`);
|
|
throw error;
|
|
}
|
|
}
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Cleanup image files
|
|
*/
|
|
async cleanupImages(images: ImageInfo[]): Promise<void> {
|
|
for (const image of images) {
|
|
try {
|
|
await fs.unlink(image.path);
|
|
this.logger.log(`Deleted: ${image.path}`);
|
|
} catch (error) {
|
|
this.logger.warn(`Failed to delete ${image.path}: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
// Try to cleanup empty directory
|
|
if (images.length > 0) {
|
|
const dir = path.dirname(images[0].path);
|
|
await this.cleanupDirectory(dir);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Cleanup directory
|
|
*/
|
|
async cleanupDirectory(dir: string): Promise<void> {
|
|
try {
|
|
const files = await fs.readdir(dir);
|
|
if (files.length === 0) {
|
|
await fs.rmdir(dir);
|
|
this.logger.log(`Cleaned up empty directory: ${dir}`);
|
|
}
|
|
} catch (error) {
|
|
this.logger.warn(`Failed to cleanup directory ${dir}: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if image quality is acceptable
|
|
*/
|
|
isImageQualityGood(imageInfo: ImageInfo, minSizeKB: number = 10): boolean {
|
|
const sizeKB = imageInfo.size / 1024;
|
|
if (sizeKB < minSizeKB) {
|
|
this.logger.warn(
|
|
`Image too small: ${sizeKB.toFixed(2)}KB < ${minSizeKB}KB`,
|
|
);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
}
|