Last active
October 27, 2020 21:20
-
-
Save diatche/ad7b9727ea5cd8343a8c7360c1ec4a52 to your computer and use it in GitHub Desktop.
Converts Revolut CSV format into a CSV table format which is easier for automated processing.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python3 | |
# Converts Revolut CSV format into a CSV table | |
# format which is easier for automated processing. | |
# | |
# Removes the following categories automatically: | |
# general, transfers | |
# | |
# Usage: | |
# | |
# 1. Install dependencies. In terminal: python3 -m pip install arrow | |
# 1. Save file as revolut-csv.py and open terminal in folder location. | |
# 2. In termial: python3 revolut-csv.py -i <path to source CSV> | |
# 3. For more info, run: python3 revolut-csv.py --help | |
# | |
# Example Output: | |
# | |
# Date,Description,Amount,Currency,Balance,Category | |
# 12/09/2018,To TransferWise CFSB,-25.0,USD,0.00, | |
# | |
# Works with Revolut CSV formats from 2018 to current (as at 2020). | |
# | |
# Author: Pavel Diatchenko <diatche@gmail.com> | |
# Licence: MIT | |
import csv | |
import re | |
import arrow | |
import argparse | |
from pathlib import Path | |
parser = argparse.ArgumentParser( | |
description=( | |
'Converts Revolut CSV format into a CSV table ' | |
'format which is easier for automated processing.\n\n' | |
'Works with Revolut CSV format as at 17 Apr 2019.\n\n' | |
'Author: Pavel Diatchenko <diatche@gmail.com>\n' | |
'Licence: MIT' | |
) | |
) | |
parser.add_argument( | |
'-i', | |
'--source', | |
type=str, | |
required=True, | |
help='Source CSV file path.', | |
) | |
parser.add_argument( | |
'-o', | |
'--destination', | |
type=str, | |
help='Destination CSV file path.', | |
) | |
parser.add_argument( | |
'-s', | |
'--suffix', | |
type=str, | |
help='Destination CSV file suffix. If destination file is ommited, this suffix is added to the source file name', | |
) | |
parser.add_argument( | |
'-d', | |
'--delimiter', | |
type=str, | |
help='Source CSV column delimiter.', | |
) | |
parser.add_argument( | |
'--verbose', | |
dest='verbose', | |
action='store_true', | |
help='Verbose mode.', | |
) | |
parser.set_defaults( | |
suffix='-parsed', | |
delimiter=';', | |
verbose=False, | |
) | |
args = parser.parse_args() | |
verbose = bool(args.verbose) | |
source_path = Path(args.source).expanduser() | |
if verbose: print(f'source_path: "{source_path}"') | |
destination_path = '' | |
if not bool(args.destination): | |
# Polyfill with_stem (new in Python 3.9) | |
def with_stem(p, stem): | |
ext = p.suffix | |
return p.with_name(f'{stem}{ext}') | |
if verbose: print(f'suffix: "{args.suffix}"') | |
destination_path = with_stem(source_path, source_path.stem + args.suffix) | |
else: | |
destination_path = Path(args.destination).expanduser() | |
if verbose: print(f'destination_path: "{destination_path}"') | |
delimiter = args.delimiter | |
if verbose: print(f'delimiter: "{delimiter}"') | |
INPUT_HEADER_REGEX = { | |
'completed': r'^Completed Date$', | |
'reference': r'^Reference$', | |
'debit': r'^Paid Out (.*)$', | |
'credit': r'^Paid In (.*)$', | |
'rateOut': r'^Exchange Out$', | |
'rateIn': r'^Exchange In$', | |
'balance': r'^ ?Balance (.*)$', | |
'rate': r'^Exchange Rate$', | |
'category': r'^Category$', | |
} | |
OPTIONAL_HEADER_KEYS = [ | |
'rate' | |
] | |
DATE_KEY = 'Date' | |
DATE_INPUT_FORMAT_2020 = 'DD MMM YYYY' | |
DATE_INPUT_FORMAT_2019 = 'YYYY MMMM D' | |
DATE_OUTPUT_FORMAT = 'DD/MM/YYYY' | |
AMOUNT_KEY = 'Amount' | |
CURRENCY_KEY = 'Currency' | |
BALANCE_KEY = 'Balance' | |
CATEGORY_KEY = 'Category' | |
REMOVE_CATEGORIES = {'general', 'transfers'} | |
ROW_KEY_MAP = { | |
DATE_KEY: 'completed', | |
'Description': 'reference', | |
AMOUNT_KEY: '', # Extra logic applied | |
CURRENCY_KEY: '', # Extra logic applied | |
BALANCE_KEY: 'balance', | |
CATEGORY_KEY: 'category' | |
} | |
OUTPUT_HEADERS = list(ROW_KEY_MAP.keys()) | |
CURRENCY_REGEX = re.compile(r'.*\((.*)\)') | |
CURRENCY_SOURCE_KEY = 'debit' | |
input_indexes = {} | |
currency = '' | |
with open(source_path, newline='') as source: | |
reader = csv.reader(source, delimiter=delimiter, quotechar='"') | |
destination_path.touch() | |
with open(destination_path, 'w', newline='') as destination: | |
writer = csv.writer(destination, delimiter=',', quotechar='"') | |
if verbose: print('begin') | |
for i, row in enumerate(reader): | |
if i == 0: | |
for header_key, header_re in INPUT_HEADER_REGEX.items(): | |
found = False | |
for j, header in enumerate(row): | |
if re.match(header_re, header): | |
input_indexes[header_key] = j | |
if verbose: print(f'header "{header_key}" column: {j}') | |
found = True | |
break | |
if not found: | |
if header_key in OPTIONAL_HEADER_KEYS: | |
if verbose: print(f'optional header "{header_key}" column not found') | |
else: | |
raise f'Header key "{header_key}" not found' | |
if len(input_indexes) < len(row): | |
print(f'Warning: Ignoring extra columns') | |
# map currency | |
csource = row[input_indexes[CURRENCY_SOURCE_KEY]] | |
cmatch = CURRENCY_REGEX.match(csource) | |
if cmatch is not None: | |
currency = cmatch.group(1) or '' | |
writer.writerow(OUTPUT_HEADERS) | |
continue | |
out_dict = {} | |
for key, i in ROW_KEY_MAP.items(): | |
if not bool(i): | |
val = '' | |
else: | |
val = row[input_indexes[i]] | |
out_dict[key] = val | |
# map date | |
try: | |
# 2020 format | |
date = arrow.get(out_dict[DATE_KEY], DATE_INPUT_FORMAT_2020) | |
except Exception: | |
# Try pre 2020 format | |
try: | |
date = arrow.get(out_dict[DATE_KEY], DATE_INPUT_FORMAT_2019) | |
except Exception: | |
# Try to prepend with current year | |
current_year = arrow.now().year | |
date_with_year = f'{current_year} {out_dict[DATE_KEY]}' | |
date = arrow.get(date_with_year, DATE_INPUT_FORMAT_2019) | |
out_dict[DATE_KEY] = date.format(DATE_OUTPUT_FORMAT) | |
# map amount | |
debit = float(str(row[input_indexes['debit']]).replace(',', '') or 0) | |
credit = float(str(row[input_indexes['credit']]).replace(',', '') or 0) | |
amount = credit - debit | |
out_dict[AMOUNT_KEY] = amount | |
# map currency | |
out_dict[CURRENCY_KEY] = currency | |
# map category | |
category = out_dict[CATEGORY_KEY] | |
if category in REMOVE_CATEGORIES: | |
out_dict[CATEGORY_KEY] = '' | |
# map balance | |
out_dict[BALANCE_KEY] = float(out_dict[BALANCE_KEY].replace(',', '') or 0) | |
outrow = [out_dict[k] for k in OUTPUT_HEADERS] | |
writer.writerow(outrow) | |
if verbose: print(outrow) | |
if verbose: print('end') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment