-
-
Save Zhong-z/418e11b104e1ecfff9d457f7a9de917a to your computer and use it in GitHub Desktop.
CBA PDF statement converter to PDF or CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict";var t=this&&this.__importDefault||function(t){return t&&t.__esModule?t:{default:t}};Object.defineProperty(exports,"__esModule",{value:!0}),exports.parsePDF=void 0;const e=t(require("fs")),n=t(require("pdf-parse")),s=require("json2csv");async function r(t,r){const l=e.default.readFileSync(t),o=(await n.default(l)).text.split("TransactionDebitCreditBalance");o.shift();let a=[],c=0;for(let t of o){const{results:e,finalBalance:n}=i(t,c);c=n,a=[...a,...e]}if(r&&r.endsWith(".json"))e.default.writeFileSync(r,JSON.stringify(a,null,2));else if(r&&r.endsWith(".csv")){const t=s.parse(a);e.default.writeFileSync(r,t)}else console.log(a)}function i(t,e=0){const{openingBalance:n,firstPassResults:s}=function(t){const e=t.split("\n");let n;e.shift();const s=[];if(function(t){return t.includes("OPENING BALANCE")}(e[0])){n=l((e.shift()||"").split("$")[1])}let r=!1;for(const t of e)if(!0!==r){if(""===t)break;if(t.includes("CLOSING BALANCE"))break;t.includes("CREDIT INTEREST EARNED")?r=!0:s.push(t)}else r=!1;return{firstPassResults:s,openingBalance:n}}(t);return function(t,e){const n=[];let s=e;for(const e of t){const t=e.substring(0,6);let[r,...i]=e.substring(6,e.length).split("$");const c=o(i[0]),u=l(i[1]),f=u-s;let g=0;if(0===c){let e=a(f);e.startsWith("-")&&(e=e.substring(1,e.length));e!==r.substring(r.length-e.length,r.length)&&(console.log("WrongDebitCalc"),console.log({newBalance:u-Math.abs(f),diff:Math.abs(f),ds:e,date:t,label:r,debit:g,credit:c,balance:u})),g=Math.abs(f),r=r.substring(0,r.length-e.length)}n.push({date:t,label:r,debit:g,credit:c,balance:u}),s=u}return{results:n,finalBalance:s}}(function(t){const e=[];let n="";for(const s of t){1===s.split("$").length?n+=s:(e.push(n+s),n="")}return e}(s),n||e)}function l(t){if(t.includes("CR"))return o(t.split("CR")[0]);if(t.includes("DR"))return-Math.abs(o(t.split("DR")[0]));throw"No Balance"}function o(t){return t?parseInt(t.replace(/,/g,"").replace(/\./g,"")):0}function a(t){const e=t.toString().startsWith("-")?"-":"",n=t.toString().replace("-","");if(1===n.length)return e+"0.0"+n;if(2===n.length)return e+"0."+n;const s=n.substring(0,n.length-2),r=n.substring(n.length-2,n.length);return parseInt(s).toLocaleString()+"."+r}process.argv[2]&&r(process.argv[2],process.argv[3]),exports.parsePDF=r |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Usage | |
Contact alshdavid@gmail.com for help | |
Download and install node from https://nodejs.org/en/download/ | |
Copy the "index.js" script below to you computer. | |
Open up your terminal, cmd, or powershell. | |
run the following once: | |
npm install pdf-parse json2csv | |
Navigate to the directory then run the following for each statement: | |
node index.js ./statement.pdf ./output.csv | |
If you want to output a JSON file | |
node index.js ./statement.pdf ./output.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fs from 'fs' | |
import pdf from 'pdf-parse' | |
import { parse } from 'json2csv'; | |
export type Cents = number | |
export type Record = { | |
date: string, | |
label: string, | |
debit: Cents, | |
credit: Cents, | |
balance: Cents | |
}; | |
if (process.argv[2]) { | |
parsePDF(process.argv[2], process.argv[3]) | |
} | |
export async function parsePDF(file: string, outputFile?: string){ | |
const dataBuffer = fs.readFileSync(file); | |
const data = await pdf(dataBuffer) | |
const vs = data.text.split('TransactionDebitCreditBalance') | |
vs.shift() | |
let output: Record[] = [] | |
let previousBalance = 0 | |
for (let v of vs) { | |
const { results, finalBalance } = processTable(v, previousBalance) | |
previousBalance = finalBalance | |
output = [ ...output, ...results ] | |
} | |
if (outputFile && outputFile.endsWith('.json')) { | |
fs.writeFileSync(outputFile, JSON.stringify(output, null, 2)) | |
} else if (outputFile && outputFile.endsWith('.csv')) { | |
const csv = parse(output); | |
fs.writeFileSync(outputFile, csv) | |
} else { | |
console.log(output) | |
} | |
} | |
function isOpeningBalance(target: string) { | |
return target.includes('OPENING BALANCE') | |
} | |
function getTabletLines(target: string): { firstPassResults: string[], openingBalance: number | undefined } { | |
const l = target.split('\n') | |
l.shift() | |
let openingBalance: number | undefined | |
const p = [] | |
if (isOpeningBalance(l[0])) { | |
const openingLine = l.shift() || '' | |
openingBalance = parseBalance(openingLine.split('$')[1]) | |
} | |
let skip = false | |
for (const line of l) { | |
if (skip === true) { | |
skip = false | |
continue | |
} | |
if (line === '') { | |
break | |
} | |
if (line.includes('CLOSING BALANCE')) { | |
break | |
} | |
if (line.includes('CREDIT INTEREST EARNED')) { | |
skip = true | |
continue | |
} | |
p.push(line) | |
} | |
return { | |
firstPassResults: p, | |
openingBalance | |
} | |
} | |
function squashLines(target: string[]): string[] { | |
const p2: string[] = [] | |
let temp = '' | |
for (const line of target) { | |
const v = line.split('$') | |
if (v.length === 1) { | |
temp += line | |
} else { | |
p2.push(temp + line) | |
temp = '' | |
} | |
} | |
return p2 | |
} | |
function generateRecords(target: string[], initialBalance: number): { results: Record[], finalBalance: Cents } { | |
const p3: Record[] = [] | |
let previousBalance = initialBalance | |
for (const line of target) { | |
const date = line.substring(0, 6) | |
let [ label, ...data ] = line.substring(6, line.length).split('$') | |
const credit = moneyToCents(data[0]) | |
const balance = parseBalance(data[1]) | |
const diff = balance - previousBalance | |
let debit = 0 | |
if (credit === 0) { | |
let ds = centsToMoney(diff) | |
if (ds.startsWith('-')) { | |
ds = ds.substring(1, ds.length) | |
} | |
const diffInLabel = label.substring(label.length - ds.length, label.length) | |
if (ds !== diffInLabel) { | |
console.log('WrongDebitCalc') | |
console.log({ | |
newBalance: balance - Math.abs(diff), | |
diff: Math.abs(diff), | |
ds, | |
date, | |
label, | |
debit, | |
credit, | |
balance | |
}) | |
} | |
debit = Math.abs(diff) | |
label = label.substring(0, label.length - ds.length) | |
} | |
p3.push({ | |
date, | |
label, | |
debit, | |
credit, | |
balance | |
}) | |
previousBalance = balance | |
} | |
return { | |
results: p3, | |
finalBalance: previousBalance | |
} | |
} | |
function processTable(t: string, initialBalance: Cents = 0) { | |
const { openingBalance, firstPassResults } = getTabletLines(t) | |
const squashedLines = squashLines(firstPassResults) | |
return generateRecords(squashedLines, openingBalance || initialBalance) | |
} | |
function parseBalance(balance: string): Cents { | |
if (balance.includes('CR')) { | |
return moneyToCents(balance.split('CR')[0]) | |
} if (balance.includes('DR')) { | |
return -Math.abs(moneyToCents(balance.split('DR')[0])) | |
} | |
throw 'No Balance' | |
} | |
function moneyToCents(money: string): Cents { | |
return money ? parseInt(money.replace(/,/g, '').replace(/\./g, '')) : 0 | |
} | |
function centsToMoney(input: Cents): string { | |
const negative = input.toString().startsWith('-') ? '-' : '' | |
const iS = input.toString().replace('-', '') | |
if (iS.length === 1) { | |
return negative + '0.0'+ iS | |
} | |
if (iS.length === 2) { | |
return negative + '0.'+ iS | |
} | |
const dollars = iS.substring(0, iS.length - 2) | |
const cents = iS.substring(iS.length - 2, iS.length) | |
const localDollars = parseInt(dollars).toLocaleString() | |
return localDollars + '.' + cents | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment