Files
aurak/server/pdf_to_images.py
T
Developer 0a9588abb7 feat: implement QuestionBank CRUD with pagination and template query
- Add pagination support to findAll (page, limit query params)
- Add findByTemplateId method to service
- Add GET /by-template/:templateId endpoint to controller
- Service already includes CRUD for QuestionBank and QuestionBankItem
2026-04-23 17:19:11 +08:00

60 lines
1.8 KiB
Python

import fitz # PyMuPDF
import sys
import os
import json
def convert_pdf_to_images(pdf_path, output_dir, zoom=2.0, quality=85):
"""
Converts PDF pages to images.
zoom: 2.0 means 200% scaling (approx 144 DPI if original is 72 DPI)
"""
try:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
doc = fitz.open(pdf_path)
images = []
# Matrix for scaling (DPI control)
mat = fitz.Matrix(zoom, zoom)
for i in range(len(doc)):
page = doc.load_page(i)
pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
output_path = os.path.join(output_dir, f"page-{i+1}.jpg")
# In newer PyMuPDF, save() doesn't take quality. Use tobytes instead.
img_bytes = pix.tobytes("jpg", jpg_quality=quality)
with open(output_path, "wb") as f:
f.write(img_bytes)
images.append({
"path": output_path,
"pageIndex": i + 1,
"size": os.path.getsize(output_path)
})
doc.close()
return {
"success": True,
"images": images,
"totalPages": len(images)
}
except Exception as e:
return {
"success": False,
"error": str(e)
}
if __name__ == "__main__":
if len(sys.argv) < 3:
print(json.dumps({"success": False, "error": "Usage: python pdf_to_images.py <pdf_path> <output_dir> [zoom] [quality]"}))
sys.exit(1)
pdf_path = sys.argv[1]
output_dir = sys.argv[2]
zoom = float(sys.argv[3]) if len(sys.argv) > 3 else 2.0
quality = int(sys.argv[4]) if len(sys.argv) > 4 else 85
result = convert_pdf_to_images(pdf_path, output_dir, zoom, quality)
print(json.dumps(result))