Last active
December 6, 2022 09:53
-
-
Save AlisterH/bd83640397ee4ddc52fd7374225b0db9 to your computer and use it in GitHub Desktop.
gtkdialog gui for Puppy Linux to recursively find and combine pdf files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
# gjoinpdf: gtkdialog gui for Puppy Linux to recursively find and combine pdf files. | |
# Requires qpdf (or pdfunite from poppler, or gs) and basename. | |
# See joinpdf for command-line only version. | |
# Version 7 by disciple, 06/12/2022. | |
# http://www.murga-linux.com/puppy/viewtopic.php?p=149208#149208 | |
# Currently has NO ERROR HANDLING for the actual join operation. | |
# You may experience errors if you have pdfs that are corrupted. | |
# The use of all those symlinks is an ugly hack from when I couldn't get gs to join files with spaces in the path otherwise :( | |
# Maybe we should fix it if we aren't using gs anymore! | |
# Set defaults | |
INPUTFOLDER="`pwd`" | |
# We don't change directory, so we only need a `pwd` for the sake of the user | |
OUTPUTFILE="`pwd`/combined.pdf" | |
VIEWPROGRAM="xdg-open" | |
URL_HANDLER="xdg-open" | |
#Could use something like this if xdg-open doesn't work everywhere | |
#VIEWPROGRAM="`which rox || which xdg-open || which mimeo`" | |
#URL_HANDLER="`which defaultbrowser || which xdg-open || which mimeo`" | |
# Use natsort if it is available, for better sorting | |
# `msort -l -w -c h` also gives good results but isn't designed for piping | |
if hash natsort 2>/dev/null; then | |
SORT="natsort" | |
else | |
echo "gjoinpdf: using sort" | |
echo "for better results install natsort from https://github.com/jjk-jacky/natsort" | |
sort -V 1>/dev/null 2>&1 <<<1 | |
if [ $? = "0" ] | |
then | |
SORT="sort -V" | |
else | |
SORT="sort" | |
echo "gjoinpdf: sort does not support -V" | |
echo "I hope you were careful with any file numbering" | |
fi | |
fi | |
# Initialise filecount | |
# Will need to increase this if we want to join 10000+ files! | |
# I have not tested to find the maximum number of files in each input directory, | |
# although it performed well on my system with an input directory containing 960 files. | |
# Note that if there are too many our pipes will break! | |
FILECOUNT=1000 | |
export MAIN_DIALOG=" | |
<window title=\"gjoinpdf - combine pdfs\" icon-name=\"gtk-file\"> | |
<vbox> | |
<edit editable=\"false\" | |
wrap-mode=\"3\" | |
cursor-visible=\"false\"> | |
<default> | |
\"gjoinpdf will recursively find and join all the pdfs in a directory and any number of subdirectories in normal alphanumeric order. | |
Make sure you name the files and folders appropriately so they are joined in the order you want. | |
e.g. Pdfs in a subfolder called "A" will come after a file called "1.pdf", and before a file called "B.pdf"\" | |
</default> | |
<height>180</height> | |
</edit> | |
<frame Location of input files> | |
<hbox> | |
<entry accept=\"directory\" | |
fs-filters-mime=\"application/pdf\" | |
fs-title=\"Select folder with input files\"> | |
<variable>INPUTFOLDER</variable> | |
<input>echo '$INPUTFOLDER'</input> | |
</entry> | |
<button> | |
<input file stock=\"gtk-open\"></input> | |
<action type=\"fileselect\">INPUTFOLDER</action> | |
</button> | |
</hbox> | |
</frame> | |
<frame Output file> | |
<hbox> | |
<entry accept=\"savefilename\" | |
fs-filters-mime=\"application/pdf\" | |
fs-title=\"Select output file\" | |
is-focus=\"true\"> | |
<variable>OUTPUTFILE</variable> | |
<input>echo '$OUTPUTFILE'</input> | |
</entry> | |
<button> | |
<input file stock=\"gtk-new\"></input> | |
<action type=\"fileselect\">OUTPUTFILE</action> | |
</button> | |
</hbox> | |
</frame> | |
<frame View the file afterwards?> | |
<hbox> | |
<checkbox> | |
<label>View output with</label> | |
<default>true</default> | |
<variable>VIEWOUTPUT</variable> | |
</checkbox> | |
<entry accept=\"filename\" | |
fs-title=\"Select pdf viewer\"> | |
<variable>VIEWPROGRAM</variable> | |
<input>echo '$VIEWPROGRAM'</input> | |
</entry> | |
<button> | |
<input file stock=\"gtk-execute\"></input> | |
<action type=\"fileselect\">VIEWPROGRAM</action> | |
</button> | |
</hbox> | |
</frame> | |
<hbox> | |
<button use-underline=\"true\"> | |
<input file stock=\"gtk-ok\"></input> | |
<label>_Join pdfs</label> | |
<action type=\"exit\">JOIN-NOW</action> | |
</button> | |
<button use-underline=\"true\"> | |
<input file stock=\"gtk-help\"></input> | |
<label>_Help</label> | |
<action>launch:HELP_DIALOG</action> | |
</button> | |
<button cancel></button> | |
</hbox> | |
</vbox> | |
</window> | |
" | |
export HELP_DIALOG=" | |
<window title=\"gjoinpdf - info\" icon-name=\"gtk-help\"> | |
<vbox> | |
<edit editable=\"false\" | |
wrap-mode=\"3\" | |
cursor-visible=\"false\"> | |
<default> | |
\"If you run gjoinpdf from the command line with inputs, it will pop up a dialog to ask you what you want to save the combined file as, and will then join them as you would expect. | |
i.e. If you specify an input folder it will recursively search for all pdfs in the folder and all subfolders, and sort them in normal alphanumeric order. | |
If you specify more than one input (file or folder), it will keep them in the order that you specify (folders won't be sorted before files), and for each folder it will sort the pdfs found there. | |
If you type in an output filename you do not need to add a .pdf extension; the script will sort that out. | |
gjoinpdf is the gui version - a command-line only version is also provided as joinpdf. | |
Files are sorted most sensibly if you install natsort, otherwise coreutils \`sort -V\` is used. If you only have busybox sort you should be careful to add leading zeros to numbered filenames, e.g. to avoid 11.pdf coming before 2.pdf. | |
\" | |
</default> | |
<height>400</height> | |
<width>440</width> | |
</edit> | |
<hbox> | |
<button use-underline=\"true\"> | |
<input file stock=\"gtk-dialog-info\"></input> | |
<label>\"_Visit forum thread\"</label> | |
<action>$URL_HANDLER http://www.murga-linux.com/puppy/viewtopic.php?p=149208#149208 >/dev/null &</action> | |
</button> | |
<button has_focus=\"true\" use-stock=\"true\" label=\"gtk-ok\" > | |
<action>closewindow:HELP_DIALOG</action> | |
</button> | |
</hbox> | |
</vbox> | |
<variable>HELP_DIALOG</variable> | |
</window> | |
" | |
generate_output_file_dialog() | |
{ | |
export OUTPUT_FILE_DIALOG=" | |
<window title=\"gjoinpdf - combine pdfs\" icon-name=\"gtk-file\"> | |
<vbox> | |
<frame Select output file> | |
<hbox> | |
<entry accept=\"savefilename\" | |
fs-filters-mime=\"application/pdf\" | |
fs-title=\"Select output file\" | |
is-focus=\"true\"> | |
<variable>OUTPUTFILE</variable> | |
<input>echo '$OUTPUTFILE'</input> | |
</entry> | |
<button> | |
<input file stock=\"gtk-new\"></input> | |
<action type=\"fileselect\">OUTPUTFILE</action> | |
</button> | |
</hbox> | |
</frame> | |
<frame View the file afterwards?> | |
<hbox> | |
<checkbox> | |
<label>View output with</label> | |
<default>true</default> | |
<variable>VIEWOUTPUT</variable> | |
</checkbox> | |
<entry accept=\"filename\" | |
fs-title=\"Select pdf viewer\"> | |
<variable>VIEWPROGRAM</variable> | |
<input>echo '$VIEWPROGRAM'</input> | |
</entry> | |
<button> | |
<input file stock=\"gtk-execute\"></input> | |
<action type=\"fileselect\">VIEWPROGRAM</action> | |
</button> | |
</hbox> | |
</frame> | |
<hbox> | |
<button use-underline=\"true\"> | |
<input file stock=\"gtk-ok\"></input> | |
<label>_Join pdfs</label> | |
<action type=\"exit\">_JOIN-NOW</action> | |
</button> | |
<button cancel></button> | |
</hbox> | |
</vbox> | |
</window> | |
" | |
} | |
export ERROR_DIALOG=" | |
<window title=\"gjoinpdf - error\" icon-name=\"gtk-dialog-error\"> | |
<vbox> | |
<text> | |
<label>ERROR: no pdf files were found.</label> | |
</text> | |
<button ok></button> | |
</vbox> | |
</window> | |
" | |
export ERROR_DIALOG_EXISTS=" | |
<window title=\"gjoinpdf - file exists\" icon-name=\"gtk-dialog-error\"> | |
<vbox> | |
<text> | |
<label>ERROR: the specified output file already exists; choose another one.</label> | |
</text> | |
<button ok></button> | |
</vbox> | |
</window> | |
" | |
export ERROR_DIALOG_PATH=" | |
<window title=\"gjoinpdf - save error\" icon-name=\"gtk-dialog-error\"> | |
<vbox> | |
<text> | |
<label>ERROR: the specified output file is in a location which is read-only or does not exist; choose another one.</label> | |
</text> | |
<button ok></button> | |
</vbox> | |
</window> | |
" | |
showoutputfiledialog() | |
{ | |
generate_output_file_dialog | |
OUTPUTFILEGUI="`gtkdialog --program=OUTPUT_FILE_DIALOG`" | |
OUTPUTFILE="`echo "$OUTPUTFILEGUI" | grep OUTPUTFILE | cut -f 2 -d '"' | sed 's/\"//g' `" | |
if [ "`echo "$OUTPUTFILEGUI" | grep EXIT | cut -f 2 -d '\"' | sed 's/\"//g'`" != "JOIN-NOW" ]; then | |
# if we don't clean up the temporary directory here we need to use return rather than exit | |
# it is simpler to duplicate the cleanup code here than to use check the return value everywhere this function is called | |
rm -rf "$TEMPFOLDER" | |
exit 0 | |
fi | |
VIEWOUTPUT="`echo "$OUTPUTFILEGUI" | grep VIEWOUTPUT | cut -f 2 -d '"' | sed 's/\"//g' `" | |
VIEWPROGRAM="`echo "$OUTPUTFILEGUI" | grep VIEWPROGRAM | cut -f 2 -d '"' | sed 's/\"//g' `" | |
} | |
joinfiles() | |
{ | |
# if the file already exists don't try to overwrite it | |
if [ -a "$OUTPUTFILE" ]; then | |
gtkdialog --program=ERROR_DIALOG_EXISTS >/dev/null | |
showoutputfiledialog | |
joinfiles | |
# Join files together if the output file is somewhere writable | |
elif [ -w "`dirname "$OUTPUTFILE"`" ]; then | |
# pdfunite is MUCH faster than gs and less error prone | |
# qpdf is MUCH faster than pdfunite; let's hope it is also reliable | |
if hash qpdf 2>/dev/null; then | |
qpdf --empty --pages "$TEMPFOLDER"/* -- "$OUTPUTFILE" && echo "created: $OUTPUTFILE" | |
else | |
echo "gjoinpdf: qpdf is not available; combining files with pdfunite" | |
echo "this is significantly slower" | |
if hash pdfunite 2>/dev/null; then | |
pdfunite "$TEMPFOLDER"/* "$OUTPUTFILE" && echo "created: $OUTPUTFILE" | |
else | |
echo "gjoinpdf: pdfunite is not available; combining files with gs" | |
echo "this is significantly slower and more error prone" | |
gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -sOutputFile="$OUTPUTFILE" "$TEMPFOLDER"/* && echo "created: $OUTPUTFILE" | |
fi | |
fi | |
# View or postprocess output file | |
if [[ "$VIEWOUTPUT" = "true" && "$VIEWPROGRAM" != "" ]]; then | |
$VIEWPROGRAM "$OUTPUTFILE" & | |
fi | |
else | |
gtkdialog --program=ERROR_DIALOG_PATH >/dev/null | |
showoutputfiledialog | |
joinfiles | |
fi | |
} | |
# Show full gui if run without input arguments. | |
# can also do this test in one step, but it isn't really as clear: | |
#if [ -sd "$@" != "0" ]; then | |
test -sd "$@" 2>/dev/null | |
if [ "$?" = "0" ]; then | |
MAINGUI="`gtkdialog --program MAIN_DIALOG`" | |
if [ "`echo "$MAINGUI" | grep EXIT | cut -f 2 -d '\"' | sed 's/\"//g'`" != "JOIN-NOW" ]; then | |
exit 0 | |
fi | |
INPUTFOLDER="`echo "$MAINGUI" | grep INPUTFOLDER | cut -f 2 -d '"' | sed 's/\"//g' `" | |
OUTPUTFILE="`echo "$MAINGUI" | grep OUTPUTFILE | cut -f 2 -d '"' | sed 's/\"//g' `" | |
VIEWOUTPUT="`echo "$MAINGUI" | grep VIEWOUTPUT | cut -f 2 -d '"' | sed 's/\"//g' `" | |
VIEWPROGRAM="`echo "$MAINGUI" | grep VIEWPROGRAM | cut -f 2 -d '"' | sed 's/\"//g' `" | |
# Create temporary directory | |
TEMPFOLDER="`mktemp -dt joinpdf-XXX`" | |
# Find all files! | |
#see comment further down re mime types vs file extensions | |
# find -L "$INPUTFOLDER" -type f -name '*.pdf' | $SORT >> $TEMPFOLDER/files.txt | |
find -L "$INPUTFOLDER" -type f -exec realpath {} + | $SORT>> "$TEMPFOLDER"/files0.txt | |
# Note this doesn't work: | |
# I read somewheree that each -exec is a test, but they don't seem to be tests, as this will grep the realpath of everything, | |
# as well as the mime-types. So our list will include anything named *application/pdf | |
#find "$INPUTFOLDER" -type f -exec realpath {} + \ | |
# -exec file -F $'\t' --mime-type {} + \ | |
# |grep 'application/pdf$' | cut -f 1 >> "$TEMPFOLDER"/files.txt | |
# Show reduced gui if run with input arguments. | |
else | |
# Get output filename | |
showoutputfiledialog | |
# Create temporary directory | |
TEMPFOLDER="`mktemp -dt joinpdf-XXX`" | |
# Find all files! | |
#see comment below re mime types vs file extensions | |
for i in "$@" | |
do | |
#find -L "$i" -type f -name '*.pdf' | $SORT >> "$TEMPFOLDER"/files.txt | |
find -L "$i" -type f -exec realpath {} + | $SORT >> "$TEMPFOLDER"/files0.txt | |
done | |
fi | |
# get input files; detect pdfs by mime-type now | |
# this is more 'nixy and will pick up pdfs without a file extension, and not files with a .pdf extension that aren't actually pdfs | |
# need to use realpath above to pick up symlinks to pdfs (readlink would be a more portable alternative - would it be slower?) | |
# unfortunately (in terms of performance) we can't use the file command via find -exec because it doesn't get passed the realpath | |
# see https://unix.stackexchange.com/questions/79222/how-can-i-efficiently-dereference-all-symlinks-in-find-output-filenames | |
while IFS='' read -r line | |
do | |
file -F $'\t' --mime-type "$line" | grep 'application/pdf$' | cut -f 1 >> "$TEMPFOLDER"/files.txt | |
done < "$TEMPFOLDER"/files0.txt | |
# Make sure output file has an extension | |
OUTPUTFILE="`echo $OUTPUTFILE | gawk '{gsub (/\.pdf$|\.PDF$/,"",$0); print $0'}`" | |
OUTPUTFILE="$OUTPUTFILE.pdf" | |
# Symlink files for us to join | |
while read line | |
do FILECOUNT=$(($FILECOUNT+1)) | |
# Used to use this, but I don't think realpath is needed | |
# ln -s "`realpath "$line"`" "$TEMPFOLDER"/$FILECOUNT | |
ln -s "$line" "$TEMPFOLDER"/$FILECOUNT | |
done < "$TEMPFOLDER"/files.txt | |
# Remove lists | |
rm -f "$TEMPFOLDER"/files0.txt | |
rm -f "$TEMPFOLDER"/files.txt | |
if [ -z "`ls -A "$TEMPFOLDER"`" ]; then | |
gtkdialog --program=ERROR_DIALOG >/dev/null | |
else | |
joinfiles | |
fi | |
#remove temporary directory | |
rm -rf "$TEMPFOLDER" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment