Skip to content

PDF skills

Convert PDF to image

from pathlib import Path
import numpy as np
import cv2
from pdf2image import convert_from_path

def pdf_to_image(file_path: Path, dpi: int):

    def pil2cv(image):
        """ PIL -> OpenCV """
        image_as_nparray = np.array(image, dtype=np.uint8)
        if image_as_nparray.ndim == 2:
            pass
        elif image_as_nparray.shape[2] == 3:
            image_as_nparray = cv2.cvtColor(image_as_nparray, cv2.COLOR_RGB2BGR)
        elif image_as_nparray.shape[2] == 4: 
            image_as_nparray = cv2.cvtColor(image_as_nparray, cv2.COLOR_RGBA2BGRA)
        return image_as_nparray

    list_pil_images = convert_from_path(
        file_path,
        dpi=dpi,
        grayscale=True
    )
    list_cv2_images = [pil2cv(pil_image) for pil_image in list_pil_images]
    return list_cv2_images

Comments