Last active
January 20, 2016 07:42
-
-
Save dberkholz/44d42e65f7947162f894 to your computer and use it in GitHub Desktop.
Takes an Excel spreadsheet, fetches URLs, annotates them with spreadsheet data and combines into a single PDF
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
VERSION=0.3 | |
help() { | |
echo "${0##*/} v${VERSION}" | |
echo | |
echo "${0##*/} makes expenses easier by handling receipt->PDF generation." | |
echo | |
echo "It creates an annotated PDF based on an Excel spreadsheet containing" | |
echo "arbitrary data in one column (such as receipt numbers), URLs to PDF/JPG" | |
echo "files in another column, and can handle any other data in other" | |
echo "columns, as long as it doesn't contain commas." | |
echo | |
echo "This spreadsheet could be created through an Expensify export or" | |
echo "other methods." | |
echo | |
echo "Depends on ImageMagick and csvkit" | |
echo | |
echo "Usage: ${0##*/} -i INFILE -o OUTFILE -s SHEET -d DATA_COLUMN -u URL_COLUMN [-t] [-V]" | |
echo | |
echo "-i INFILE The input Excel file (.xls, .xlsx)" | |
echo "-o OUTFILE The output PDF file" | |
echo "-s SHEET The Excel sheet name (quote if contains spaces)" | |
echo "-d DATA_COLUMN The column number containing annotation data" | |
echo "-u URL_COLUMN The column number containing the URLs to fetch (jpg, pdf)" | |
echo "-t Save the temporary working directory" | |
echo "-V Show version and exit" | |
echo | |
exit 1 | |
} | |
fetch_url() { | |
local URL=${1} | |
local FILE=${1##*/} | |
[[ -f ${FILE} ]] || curl -s -O ${URL} | |
echo ${FILE} | |
} | |
convert_pdf_to_jpg() { | |
local INFILE=${1} | |
local OUTFILES=( ${INFILE%.*}.jpg ) | |
if [[ ${INFILE##*.} == pdf ]]; then | |
convert -density 150 ${INFILE} ${OUTFILES} | |
# If there's not a single one, then it's multiple pages | |
if [[ -f ${OUTFILES[0]} ]]; then | |
echo "Single-page PDF" >/dev/stderr | |
echo ${OUTFILES[@]} | |
continue | |
fi | |
echo "Multipage PDF" >/dev/stderr | |
OUTFILES= | |
local FILE | |
for FILE in ${INFILE%.*}-[0-9].jpg ${INFILE%.*}-[0-9][0-9].jpg; do | |
[[ -f ${FILE} ]] && OUTFILES+=( ${FILE} ) | |
done | |
else | |
echo "JPG" >/dev/stderr | |
fi | |
echo ${OUTFILES[@]} | |
} | |
annotate_jpgs() { | |
local NUM=${1}; shift | |
declare -a FILES=( ${@} ) | |
local FILE=${FILES[0]} | |
local ANN=${FILE%.*}.ann.jpg | |
# Annotate the first file only | |
convert -background '#ffffff' -gravity center -fill black -size 90x90 \ | |
caption:"${NUM}" \ | |
${FILE} +swap -gravity northeast -composite ${ANN} | |
FILES[0]=${ANN} | |
echo ${FILES[@]} | |
} | |
combine_files() { | |
local -a OUT=${1}; shift | |
convert ${@} ${OUT} | |
} | |
main() { | |
local URL INFILE XLS OUTPUT_NAME SAVE_TEMP NDATA=1 NURL=9 | |
local SHEET='Expensify Report Export' | |
declare -a JPGS ANNS OUTFILES | |
[[ ${#} -eq 0 ]] && help | |
while getopts ti:o:s:d:u:V OPTS; do | |
case ${OPTS} in | |
i) | |
XLS=${OPTARG} | |
;; | |
o) | |
OUTPUT_NAME=${OPTARG} | |
;; | |
s) | |
SHEET=${OPTARG} | |
;; | |
d) | |
NDATA=${OPTARG} | |
;; | |
u) | |
NURL=${OPTARG} | |
;; | |
t) | |
SAVE_TEMP="true" | |
;; | |
V) | |
echo "${0##*/} v${VERSION}" | |
return 1 | |
;; | |
\?) | |
echo "Invalid option" >/dev/stderr | |
return 1 | |
;; | |
esac | |
done | |
TEMPDIR="tmp-${RANDOM}" | |
[[ ! -d ${TEMPDIR} ]] && mkdir ${TEMPDIR} | |
pushd ${TEMPDIR} >/dev/null | |
declare -a LINE | |
while IFS=, read -a LINE; do | |
NUM=${LINE[NDATA-1]} | |
URL=${LINE[NURL-1]} | |
[[ ${URL:0:4} == http ]] || continue | |
echo -n "Processing line ${NUM}: " >/dev/stderr | |
INFILE=$(fetch_url ${URL}) | |
JPGS=( $(convert_pdf_to_jpg ${INFILE}) ) | |
ANNS=( $(annotate_jpgs ${NUM} ${JPGS[@]}) ) | |
OUTFILES+=( ${ANNS[@]} ) | |
done \ | |
< <(in2csv --sheet "${SHEET}" ../${XLS}) | |
# Combine files into final PDF | |
echo "Creating final PDF ${OUTPUT_NAME}" >/dev/stderr | |
combine_files ../${OUTPUT_NAME} ${OUTFILES[@]} | |
popd >/dev/null | |
[[ ${SAVE_TEMP} == "true" ]] || rm -rf ${TEMPDIR} | |
} | |
main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment