Last active
February 19, 2016 21:04
-
-
Save deeenes/5fb6c128d54ce359b63c to your computer and use it in GitHub Desktop.
make a NatWest statement less verbose
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# -*- coding: utf-8 -*- | |
''' | |
NatWest statement privacy | |
you want to extract a list of transactions data from | |
your account, to provide evidence for a third party, | |
but you want to hide all your other transactions. | |
(ok, it clearly gives not more evidence than saying | |
`hey buddy, i transferred 300 quids`, but at many | |
points of bureaucracy, surprisingly they like it) | |
1. log in to NatWest online banking | |
2. on the left, choose statements | |
3. click view transactions | |
4. select the desired time range, | |
note: in historic transactions | |
you can set any custom range | |
5. click view transactions | |
6. right click anywhere on the list | |
7. select 'this frame' > 'view source' | |
note: if you get a source without your | |
statement list table, the cache | |
is probably disabled, and it tried | |
to download it again... | |
8. select all, copy | |
9. paste into text editor, and save | |
10. set `infile` below to the file name | |
just saved | |
11. set `outfile` to the preferred output | |
file name | |
12. set the keywords, i.e. unique name | |
fragments of those partners whos | |
transactions you want to keep visible | |
13. save and run the script | |
14. open the output html in browser and | |
check the result | |
15. print from browser to pdf, optionally | |
set the right top field of the header | |
to whatever you want to see there | |
instead of the path of your local | |
file path | |
''' | |
import bs4 | |
import codecs | |
infile = 'natwest_statement_012016.html' | |
outfile = 'natwest_statement_012016.htm' | |
# give some *unique* string samples from the names: | |
do_not_hide = ['UNIV', 'IBM', 'EAST'] | |
hide_balance = True | |
with open(infile, 'r') as f: | |
html = f.read() | |
soup = bs4.BeautifulSoup(html, 'lxml') | |
null = map(lambda script: script.extract(), soup.find_all('script')) | |
for tr in soup.find_all('tr'): | |
tds = tr.find_all('td') | |
if len(tds) == 6: | |
if not sum(sample in tds[2].text for sample in do_not_hide): | |
tds[2].string = '[HIDDEN]' | |
tds[3].string = '£XX' | |
tds[4].string = '£XX' | |
if hide_balance: | |
tds[5].string = '£XX' | |
with codecs.open(outfile, encoding = 'utf-8', mode = 'w') as f: | |
f.write(soup.prettify()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment