Skip to content

Instantly share code, notes, and snippets.

@codade
Forked from agarciadom/find-color-pages
Last active May 25, 2021 09:32
Show Gist options
  • Save codade/1b373181107207e9bea9ece20440b8e1 to your computer and use it in GitHub Desktop.
Save codade/1b373181107207e9bea9ece20440b8e1 to your computer and use it in GitHub Desktop.
Python 3.7+ script that lists the color pages in a PDF along with their CMYK ink mixes, as computed by the 'inkcov' device in Ghostcript 9.05+. Extended to lists all color pages in the format required by On-Demand Publisher BoD
#!/usr/bin/env python3
# Simple script for finding and counting the color pages in a PDF
# Copyright (C) 2013-2019 Antonio Garcia-Dominguez
# Licensed under the GPLv3
#
# This script is based on the following thread (thanks for the tip!):
#
# http://tex.stackexchange.com/questions/53493
#
# On Mac OS X, you may need to install "gc" through "brew install gc".
import logging
import re
import subprocess
from os import path, access, R_OK
VERSION = "1.0.4"
RE_FLOAT = re.compile("[01].[0-9]+")
CMYK_NCOLORS = 4
logging.basicConfig(level=logging.ERROR)
def is_color(c, m, y, k):
return c > 0 or m > 0 or y > 0
def cmyk_per_page(pdf_file):
if not path.isfile(pdf_file):
raise Exception("{} does not exist or is not a file".format(pdf_file))
if not access(pdf_file, R_OK):
raise Exception("{} is not readable".format(pdf_file))
gs_inkcov = subprocess.Popen(
["gs", "-o", "-", "-sDEVICE=inkcov", pdf_file],
stdout=subprocess.PIPE)
for raw_line in iter(gs_inkcov.stdout.readline, b''):
line = raw_line.decode('utf8').rstrip()
logging.debug("Read line %s", line)
fields = line.split()
if (len(fields) >= CMYK_NCOLORS
and all(RE_FLOAT.match(fields[i]) for i in range(CMYK_NCOLORS))):
cmyk = tuple(float(value) for value in fields[0:CMYK_NCOLORS])
logging.debug("Extracted fields %s", cmyk)
yield cmyk
def count_page_types(pdf_file):
nb, nc = 0, 0
for page in cmyk_per_page(pdf_file):
if is_color(*page):
nc += 1
else:
nb += 1
return (nb, nc)
def find_color_pages(pdf_file):
for n, page in enumerate(cmyk_per_page(pdf_file), 1):
if is_color(*page):
logging.debug("Page %d is a color page", n)
yield (n, page)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="""Lists or counts the
colour pages of a PDF file on standard output. The utility
requires having the 'gs' tool from Ghostscript 9.05 or later
installed and available through the PATH.""")
parser.add_argument("file", help="PDF file to be analyzed")
parser.add_argument("--count", "-c", action='store_true',
help="Print the number of pages instead of listing them")
parser.add_argument("--debug", "-d", action='store_true',
help="Enables verbose debugging output")
parser.add_argument("--noheader", "-H", action='store_true',
help="Disables the first header line")
parser.add_argument("--pcolor", "-C", metavar="PC", type=float,
help="Color page price (for total cost report, " +
"requires --pblack as well)")
parser.add_argument("--pblack", "-B", metavar="PB", type=float,
help="B/W page price (for total cost report, " +
"requires --pcolor as well)")
args = parser.parse_args()
if args.debug:
logging.getLogger('').setLevel(logging.DEBUG)
if args.pcolor is not None and args.pblack is None:
raise Exception(
"Page price was specified for color but not for B/W pages")
if args.pblack is not None and args.pcolor is None:
raise Exception(
"Page price was specified for B/W but not for color pages")
if args.count:
print(count_page_types(args.file)[1])
elif args.pcolor is not None and args.pblack is not None:
nb, nc = count_page_types(args.file)
total_cost = args.pblack * nb + args.pcolor * nc
print(("Total cost ({0:d} B/W @ {1:3.6g}/page "
+ "and {2:d} color @ {3:3.6g}/page): {4:3.6g}")
.format(nb, args.pblack, nc, args.pcolor, total_cost))
else:
total_colorcount=0
color_pos=[]
if not args.noheader:
print("\t".join(("n", "c", "m", "y", "k")))
for n, cmyk in find_color_pages(args.file):
total_colorcount+=1
print("\t".join((str(s) for s in (n,) + cmyk)))
#get structure for BoD
if color_pos==[]:
continuing_value=n
start_value=n
color_pos.append([start_value,continuing_value])
elif n!=start_value+1 and n!=continuing_value+1:
start_value=n
continuing_value=n
color_pos.append([start_value,continuing_value])
else:
continuing_value=n
color_pos[-1][1]=continuing_value
#printing list preparation
color_pos_print=(', '.join([f'{entry[0]}' if entry[0]==entry[1] else f'{entry[0]}-{entry[1]}' for entry in color_pos]))
print('\nFarbseiten Gesamt: {}'.format(total_colorcount))
print('\nPosition Farbseiten für Druck:\n"{}"'.format(color_pos_print))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment