Skip to content

Instantly share code, notes, and snippets.

@diatche
Last active October 27, 2020 21:20
Show Gist options
  • Save diatche/ad7b9727ea5cd8343a8c7360c1ec4a52 to your computer and use it in GitHub Desktop.
Save diatche/ad7b9727ea5cd8343a8c7360c1ec4a52 to your computer and use it in GitHub Desktop.
Converts Revolut CSV format into a CSV table format which is easier for automated processing.
#!/bin/python3
# Converts Revolut CSV format into a CSV table
# format which is easier for automated processing.
#
# Removes the following categories automatically:
# general, transfers
#
# Usage:
#
# 1. Install dependencies. In terminal: python3 -m pip install arrow
# 1. Save file as revolut-csv.py and open terminal in folder location.
# 2. In termial: python3 revolut-csv.py -i <path to source CSV>
# 3. For more info, run: python3 revolut-csv.py --help
#
# Example Output:
#
# Date,Description,Amount,Currency,Balance,Category
# 12/09/2018,To TransferWise CFSB,-25.0,USD,0.00,
#
# Works with Revolut CSV formats from 2018 to current (as at 2020).
#
# Author: Pavel Diatchenko <diatche@gmail.com>
# Licence: MIT
import csv
import re
import arrow
import argparse
from pathlib import Path
parser = argparse.ArgumentParser(
description=(
'Converts Revolut CSV format into a CSV table '
'format which is easier for automated processing.\n\n'
'Works with Revolut CSV format as at 17 Apr 2019.\n\n'
'Author: Pavel Diatchenko <diatche@gmail.com>\n'
'Licence: MIT'
)
)
parser.add_argument(
'-i',
'--source',
type=str,
required=True,
help='Source CSV file path.',
)
parser.add_argument(
'-o',
'--destination',
type=str,
help='Destination CSV file path.',
)
parser.add_argument(
'-s',
'--suffix',
type=str,
help='Destination CSV file suffix. If destination file is ommited, this suffix is added to the source file name',
)
parser.add_argument(
'-d',
'--delimiter',
type=str,
help='Source CSV column delimiter.',
)
parser.add_argument(
'--verbose',
dest='verbose',
action='store_true',
help='Verbose mode.',
)
parser.set_defaults(
suffix='-parsed',
delimiter=';',
verbose=False,
)
args = parser.parse_args()
verbose = bool(args.verbose)
source_path = Path(args.source).expanduser()
if verbose: print(f'source_path: "{source_path}"')
destination_path = ''
if not bool(args.destination):
# Polyfill with_stem (new in Python 3.9)
def with_stem(p, stem):
ext = p.suffix
return p.with_name(f'{stem}{ext}')
if verbose: print(f'suffix: "{args.suffix}"')
destination_path = with_stem(source_path, source_path.stem + args.suffix)
else:
destination_path = Path(args.destination).expanduser()
if verbose: print(f'destination_path: "{destination_path}"')
delimiter = args.delimiter
if verbose: print(f'delimiter: "{delimiter}"')
INPUT_HEADER_REGEX = {
'completed': r'^Completed Date$',
'reference': r'^Reference$',
'debit': r'^Paid Out (.*)$',
'credit': r'^Paid In (.*)$',
'rateOut': r'^Exchange Out$',
'rateIn': r'^Exchange In$',
'balance': r'^ ?Balance (.*)$',
'rate': r'^Exchange Rate$',
'category': r'^Category$',
}
OPTIONAL_HEADER_KEYS = [
'rate'
]
DATE_KEY = 'Date'
DATE_INPUT_FORMAT_2020 = 'DD MMM YYYY'
DATE_INPUT_FORMAT_2019 = 'YYYY MMMM D'
DATE_OUTPUT_FORMAT = 'DD/MM/YYYY'
AMOUNT_KEY = 'Amount'
CURRENCY_KEY = 'Currency'
BALANCE_KEY = 'Balance'
CATEGORY_KEY = 'Category'
REMOVE_CATEGORIES = {'general', 'transfers'}
ROW_KEY_MAP = {
DATE_KEY: 'completed',
'Description': 'reference',
AMOUNT_KEY: '', # Extra logic applied
CURRENCY_KEY: '', # Extra logic applied
BALANCE_KEY: 'balance',
CATEGORY_KEY: 'category'
}
OUTPUT_HEADERS = list(ROW_KEY_MAP.keys())
CURRENCY_REGEX = re.compile(r'.*\((.*)\)')
CURRENCY_SOURCE_KEY = 'debit'
input_indexes = {}
currency = ''
with open(source_path, newline='') as source:
reader = csv.reader(source, delimiter=delimiter, quotechar='"')
destination_path.touch()
with open(destination_path, 'w', newline='') as destination:
writer = csv.writer(destination, delimiter=',', quotechar='"')
if verbose: print('begin')
for i, row in enumerate(reader):
if i == 0:
for header_key, header_re in INPUT_HEADER_REGEX.items():
found = False
for j, header in enumerate(row):
if re.match(header_re, header):
input_indexes[header_key] = j
if verbose: print(f'header "{header_key}" column: {j}')
found = True
break
if not found:
if header_key in OPTIONAL_HEADER_KEYS:
if verbose: print(f'optional header "{header_key}" column not found')
else:
raise f'Header key "{header_key}" not found'
if len(input_indexes) < len(row):
print(f'Warning: Ignoring extra columns')
# map currency
csource = row[input_indexes[CURRENCY_SOURCE_KEY]]
cmatch = CURRENCY_REGEX.match(csource)
if cmatch is not None:
currency = cmatch.group(1) or ''
writer.writerow(OUTPUT_HEADERS)
continue
out_dict = {}
for key, i in ROW_KEY_MAP.items():
if not bool(i):
val = ''
else:
val = row[input_indexes[i]]
out_dict[key] = val
# map date
try:
# 2020 format
date = arrow.get(out_dict[DATE_KEY], DATE_INPUT_FORMAT_2020)
except Exception:
# Try pre 2020 format
try:
date = arrow.get(out_dict[DATE_KEY], DATE_INPUT_FORMAT_2019)
except Exception:
# Try to prepend with current year
current_year = arrow.now().year
date_with_year = f'{current_year} {out_dict[DATE_KEY]}'
date = arrow.get(date_with_year, DATE_INPUT_FORMAT_2019)
out_dict[DATE_KEY] = date.format(DATE_OUTPUT_FORMAT)
# map amount
debit = float(str(row[input_indexes['debit']]).replace(',', '') or 0)
credit = float(str(row[input_indexes['credit']]).replace(',', '') or 0)
amount = credit - debit
out_dict[AMOUNT_KEY] = amount
# map currency
out_dict[CURRENCY_KEY] = currency
# map category
category = out_dict[CATEGORY_KEY]
if category in REMOVE_CATEGORIES:
out_dict[CATEGORY_KEY] = ''
# map balance
out_dict[BALANCE_KEY] = float(out_dict[BALANCE_KEY].replace(',', '') or 0)
outrow = [out_dict[k] for k in OUTPUT_HEADERS]
writer.writerow(outrow)
if verbose: print(outrow)
if verbose: print('end')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment