forked from hangshuo652/aurak
0a9588abb7
- Add pagination support to findAll (page, limit query params) - Add findByTemplateId method to service - Add GET /by-template/:templateId endpoint to controller - Service already includes CRUD for QuestionBank and QuestionBankItem
60 lines
1.8 KiB
Python
60 lines
1.8 KiB
Python
import fitz # PyMuPDF
|
|
import sys
|
|
import os
|
|
import json
|
|
|
|
def convert_pdf_to_images(pdf_path, output_dir, zoom=2.0, quality=85):
|
|
"""
|
|
Converts PDF pages to images.
|
|
zoom: 2.0 means 200% scaling (approx 144 DPI if original is 72 DPI)
|
|
"""
|
|
try:
|
|
if not os.path.exists(output_dir):
|
|
os.makedirs(output_dir)
|
|
|
|
doc = fitz.open(pdf_path)
|
|
images = []
|
|
|
|
# Matrix for scaling (DPI control)
|
|
mat = fitz.Matrix(zoom, zoom)
|
|
|
|
for i in range(len(doc)):
|
|
page = doc.load_page(i)
|
|
pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
|
|
|
|
output_path = os.path.join(output_dir, f"page-{i+1}.jpg")
|
|
# In newer PyMuPDF, save() doesn't take quality. Use tobytes instead.
|
|
img_bytes = pix.tobytes("jpg", jpg_quality=quality)
|
|
with open(output_path, "wb") as f:
|
|
f.write(img_bytes)
|
|
|
|
images.append({
|
|
"path": output_path,
|
|
"pageIndex": i + 1,
|
|
"size": os.path.getsize(output_path)
|
|
})
|
|
|
|
doc.close()
|
|
return {
|
|
"success": True,
|
|
"images": images,
|
|
"totalPages": len(images)
|
|
}
|
|
except Exception as e:
|
|
return {
|
|
"success": False,
|
|
"error": str(e)
|
|
}
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 3:
|
|
print(json.dumps({"success": False, "error": "Usage: python pdf_to_images.py <pdf_path> <output_dir> [zoom] [quality]"}))
|
|
sys.exit(1)
|
|
|
|
pdf_path = sys.argv[1]
|
|
output_dir = sys.argv[2]
|
|
zoom = float(sys.argv[3]) if len(sys.argv) > 3 else 2.0
|
|
quality = int(sys.argv[4]) if len(sys.argv) > 4 else 85
|
|
|
|
result = convert_pdf_to_images(pdf_path, output_dir, zoom, quality)
|
|
print(json.dumps(result)) |