Skip to content

Instantly share code, notes, and snippets.

@sujaldev
Created June 6, 2024 03:45
Show Gist options
  • Save sujaldev/bb0fb7393969413490f5b00431b85330 to your computer and use it in GitHub Desktop.
Save sujaldev/bb0fb7393969413490f5b00431b85330 to your computer and use it in GitHub Desktop.
Removes watermark from PDF
# "Borrowed" from https://github.com/pymupdf/PyMuPDF/discussions/1855
import sys
import argparse
from pathlib import Path
import pymupdf
parser = argparse.ArgumentParser(
prog="watermark-remover",
description="Remove watermark from PDF"
)
parser.add_argument("input")
parser.add_argument("output")
args = parser.parse_args()
inputfile = Path(args.input).resolve()
outputfile = Path(args.output).resolve()
doc = pymupdf.open(inputfile)
for page in doc:
page.clean_contents()
xref = page.get_contents()[0]
cont = bytearray(page.read_contents())
if cont.find(b"/Subtype/Watermark") > 0:
print("Watermark detected.")
while True:
start = cont.find(b"/Artifact") # start of definition
if start < 0: break
end = cont.find(b"EMC", start) # end of definition
cont[start - 2 : end + 3] = b"" # remove the full definition source "q ... EMC"
doc.update_stream(xref, cont) # replace the original source
doc.ez_save(outputfile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment