PDF Dateien pixelbasiert miteinander vergleichen
Ausgehend von einer Anfrage auf der Dante Beraterkreis Mailingliste hier einige Ideen, wie man pixelbasiert PDF-Dateien miteinander vergleichen kann.
Schritt 1: Umwandlung ins PNG Format. Ich nutze dazu pdftoppm aus dem Poppler-Paket.
pdftoppm -png 1.pdf > 1.png
das gleiche dann nochmal für die zweite Datei.
Dann mit ein wenig Python-Magie die beiden PNGs vergleichen:
from PIL import Image, ImageChops # generic import numpy as np # numerical from skimage.metrics import structural_similarity as ssim import cv2 # Load the images img1 = Image.open("1.png") img2 = Image.open("2.png") # Compute absolute difference diff = ImageChops.difference(img1, img2) # Save or show the difference image diff.save("difference.png") diff.show() # Load PNGs into numpy arrays img1 = np.array(Image.open("1.png")) img2 = np.array(Image.open("2.png")) # Compute pixel-wise absolute difference diff = np.abs(img1.astype(int) - img2.astype(int)) # Save result Image.fromarray(diff.astype(np.uint8)).save("difference.png") # Or compute some stats: print("Mean difference:", diff.mean()) print("Max difference:", diff.max()) # Convert to grayscale gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) score, diff = ssim(gray1, gray2, full=True) print("SSIM:", score) # diff is in [0,1]; scale to 0-255 to view as an image diff = (diff * 255).astype("uint8") cv2.imwrite("ssim_diff.png", diff) |