-
-
Save s-leroux/59c6a0c5421c034d759263d6f55757bb to your computer and use it in GitHub Desktop.
Add hyperlinks to PDFs created by Inkscape
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# svglinkify.py - Add hyperlinks to PDFs generated by Inkscape | |
# Copyright (C) 2015 Mansour Behabadi <mansour@oxplot.com> | |
# | |
# This script comes with no warranty whatsoever. Use at your own risk. | |
# If you decide to distribute verbatim or modified versions of this | |
# code, you must retain this copyright notice. | |
# | |
# Usage: svglinkify.py <svg-file> <inkscape-gen-pdf> <linkified-pdf> | |
# Requires: | |
# qpdf | |
# inkscape | |
# python 2/3 | |
# | |
# WARNING Since this script is one heck of a hack, you should follow the | |
# instructions below to the letter, or you will fail miserably. | |
# | |
# 1. Start by making an SVG that looks nice and everything and add a | |
# piece of text somewhere. | |
# | |
# 2. Select the rectangle tool and draw a box on top of the text. | |
# This box will be the clickable area of our link. Set its fill color | |
# to #ff00ff (magenta) and remove any strokes. | |
# | |
# 3. Right click the box and select "Create Link". In the "Object | |
# attributes" window that opens up, type the destination link in | |
# "Href". | |
# | |
# 4. Send the box to the back (using End key on the keyboard) so you can | |
# see your text. DO NOT move your box at any time after you've | |
# created the link. More details below. | |
# | |
# 5. Export your SVG as PDF and run svglinkify.py: | |
# | |
# $ svglinkify.py my_doc.svg my_doc.pdf my_doc_with_links.pdf | |
# | |
# So you pass your SVG file as the first arg, the exported PDF as the | |
# 2nd arg and the name of final PDF as 3rd arg. | |
# | |
# 6. If you did everything right, open my_doc_with_links.pdf and you | |
# should be able to click your text and open the link in browser. You | |
# also notice that the magenta box is gone. That's it. Now read the | |
# sections below if you hate being frustrated when things break. | |
# | |
# HOW IT WORKS | |
# | |
# The script looks for magenta boxes (surprise!) that have a link. It | |
# then extracts their x,y position and hyperlinks. It does the same | |
# search for magenta boxes in the generated PDF and tries to match them | |
# up by their relative locations. Therefore it's crucial to get the | |
# locations right. Since SVG is pretty damn flexible, locations aren't | |
# always simple x,y attributes. When you create a link for an object, | |
# you wrap it in a group. Groups don't have x,y, instead they are | |
# transformed using 2D matrices which means, maths calculations must be | |
# done in order to find out where the enclosed box really is. This | |
# script is too dumb to do that. That's why you should not move a box | |
# after you create a link for it. | |
# | |
# You could either delete it and draw a new one, or if you like it | |
# dangerous, you can enter the group (ie double cliking the box) and | |
# then move the box. This way, you're not moving the group so no | |
# transformations will be applied. You're bound to make a mistake sooner | |
# or later this way, so don't do it. | |
# | |
# If you can't get this to work after at least several attempts, email | |
# me your SVG and the PDF inkscape generated for you and I should be | |
# able to help. | |
from __future__ import unicode_literals | |
from __future__ import print_function | |
from itertools import count | |
from subprocess import call, PIPE, Popen | |
import os | |
import re | |
import sys | |
import tempfile | |
# Magic to support python both 2 and 3 | |
try: | |
range = xrange | |
except: | |
pass | |
try: | |
import HTMLParser as html_parser | |
except: | |
import html.parser as html_parser | |
_html_parser = html_parser.HTMLParser() | |
try: | |
html_unescape = _html_parser.unescape | |
except: | |
import html | |
html_unescape = html.unescape | |
# Command line parsing | |
if len(sys.argv) < 4: | |
print('Usage: %s <svg-file> <inkscape-gen-pdf> <linkified-pdf>' | |
% sys.argv[0], file=sys.stderr) | |
exit(1) | |
svg_path = sys.argv[1] | |
pdf_in_path = sys.argv[2] | |
pdf_out_path = sys.argv[3] | |
# Load the link rects from SVG file | |
SVG_X_PAT = re.compile(r'\bx="([^"]+)"') | |
SVG_Y_PAT = re.compile(r'\by="([^"]+)"') | |
with open(svg_path, 'r') as svg_file: | |
svg_rects = [( | |
html_unescape(i[0]), | |
float(SVG_X_PAT.search(i[1]).group(1)), | |
float(SVG_Y_PAT.search(i[1]).group(1)) | |
) for i in re.findall(r''' | |
<a[^>]*?\bxlink:href="([^"]+)"[^>]*>\s*<rect | |
([^>]*?\bstyle="[^"]*?\bfill:[#]ff00ff\b[^>]*) | |
''', svg_file.read(), re.X)] | |
# QDFy the input PDF & load the resulting PDF to memory | |
fd, qdf_tmppath = tempfile.mkstemp() | |
os.close(fd) | |
try: | |
if call(['qpdf', '--qdf', pdf_in_path, qdf_tmppath]) != 0: | |
print('error: qpdf failed', file=sys.stderr) | |
exit(1) | |
with open(qdf_tmppath, 'rb') as ps_file: | |
pdf_data = ps_file.read() | |
finally: | |
try: | |
os.unlink(qdf_tmppath) | |
except: | |
pass | |
# Load the rects and last object ID from PDF file | |
PDF_RECT_PAT = re.compile(br''' | |
\b1\s+0\s+1\s+rg(?:\s+/a0\s+gs)? | |
((?:\s+[\d.-]+\s+[\d.-]+\s+[\d.-]+\s+[\d.-]+\s+re\s+f)+)\b | |
''', re.X) | |
m = PDF_RECT_PAT.search(pdf_data) | |
pdf_rects = re.split(br'\s+', m.group(1).strip()) if m else [] | |
pdf_rects = [ | |
list(map(float, pdf_rects[i:i + 4])) | |
for i in range(0, len(pdf_rects), 6) | |
] | |
last_obj = re.search(br'\bxref\s+(\d+)\s+(\d+)\b', pdf_data) | |
if not last_obj: | |
print('error: could not find last obj id', file=sys.stderr) | |
exit(1) | |
last_obj = tuple(map(int, last_obj.groups())) | |
# Some sanity check to ensure our matches are good | |
if len(svg_rects) != len(pdf_rects): | |
print(''' | |
error: found diff # of rects in svg & ps | |
This can be due to number of reasons: | |
- you've moved the box after creating a link for it - bad move! | |
fix: delete it and draw a new box and DON'T MOVE it this time | |
- you've grouped the boxes and done some fancy things | |
fix: see above | |
- you forgot to remove the strokes from the boxes | |
- you have removed a box but Inkscape is still keeping it in the file | |
fix: do a document cleanup or close/re-open your file | |
'''.strip(), file=sys.stderr) | |
exit(1) | |
# Match up the rects based on their relative X,Y position | |
# FIXME there is a possibility that due to rounding errors, links get | |
# matched up incorrectly. Always check the final PDF before sharing. | |
svg_rects.sort(key=(lambda x: int(x[2] * 100)), reverse=True) | |
svg_rects.sort(key=lambda x: int(x[1] * 100)) | |
pdf_rects.sort(key=lambda x: (int(x[0] * 100), int(x[1] * 100))) | |
# Generate the PDF hyperlink objects | |
pdf_link_tpl = ''' | |
%%QDF: ignore_newline | |
%d %d obj | |
<< | |
/A << /S /URI /URI (%s) >> | |
/Border [ 0 0 0 ] | |
/Rect [ %f %f %f %f ] | |
/Subtype /Link | |
/Type /Annot | |
>> | |
endobj | |
'''.strip() | |
pdf_links = '\n'.join(pdf_link_tpl % ( | |
c, last_obj[0], s[0], p[0], p[1], p[0] + p[2], p[1] + p[3] | |
) for p, s, c in zip(pdf_rects, svg_rects, count(last_obj[1]))) | |
# Remove the visual rects from PDF, write out the new hyperlink objs | |
pdf_data = PDF_RECT_PAT.sub(b'', pdf_data) | |
pdf_data = re.sub( | |
(r'\bxref\s+%d\s+%d\b' % last_obj).encode('ascii'), | |
(pdf_links + '\nxref\n%d %d' % ( | |
last_obj[0], last_obj[1] + len(svg_rects) | |
)).encode('ascii'), | |
pdf_data | |
) | |
pdf_data = re.sub( | |
br'([%][%]\s+Page\s+1\s+[%][%][^\n]+\s+\d+\s+\d+\s+obj\s+<<)', | |
(r'\1/Annots [%s] ' % ' '.join( | |
'%d %d R' % (i + last_obj[1], last_obj[0]) | |
for i in range(len(svg_rects)) | |
)).encode('ascii'), pdf_data) | |
# Optimize and save the new file | |
fd, out_tmppath = tempfile.mkstemp() | |
os.close(fd) | |
try: | |
with open(out_tmppath, 'wb') as out_tmpfile: | |
fix_qdf_proc = Popen(['fix-qdf'], stdin=PIPE, stdout=out_tmpfile) | |
fix_qdf_proc.communicate(pdf_data) | |
if fix_qdf_proc.wait() != 0: | |
print('error: failed writing the mod pdf', file=sys.stderr) | |
exit(1) | |
if call([ | |
'qpdf', '--object-streams=generate', '--stream-data=compress', | |
out_tmppath, pdf_out_path | |
]) != 0: | |
print('error: failed writing the mod pdf', file=sys.stderr) | |
exit(1) | |
finally: | |
try: | |
os.unlink(out_tmppath) | |
except: | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment