diatche · October 27, 2020 21:20
diff --git a/revolut-csv.py b/revolut-csv.py
 #!/bin/python3

 # Converts Revolut CSV format into a CSV table
 # format which is easier for automated processing.
 #
 # Removes the following categories automatically:
 # general, transfers
 #
 # Usage:
 #
 # 1. Install dependencies. In terminal: python3 -m pip install arrow
 # 1. Save file as revolut-csv.py and open terminal in folder location.
 # 2. In termial: python3 revolut-csv.py -i <path to source CSV>
 # 3. For more info, run: python3 revolut-csv.py --help
 #
 # Example Output:
 #
 # Date,Description,Amount,Currency,Balance,Category
 # 12/09/2018,To TransferWise CFSB,-25.0,USD,0.00,
 #
 # Works with Revolut CSV formats from 2018 to current (as at 2020).
 #
 # Author: Pavel Diatchenko <diatche@gmail.com>
 # Licence: MIT

 import csv
 import re
 import arrow
 import argparse
 from pathlib import Path

 parser = argparse.ArgumentParser(
    description=(
                'Converts Revolut CSV format into a CSV table '
                'format which is easier for automated processing.\n\n'
                'Works with Revolut CSV format as at 17 Apr 2019.\n\n'
                'Author: Pavel Diatchenko <diatche@gmail.com>\n'
                'Licence: MIT'
    )
 )
 parser.add_argument(
    '-i',
    '--source',
    type=str,
    required=True,
    help='Source CSV file path.',
 )
 parser.add_argument(
    '-o',
    '--destination',
    type=str,
    help='Destination CSV file path.',
 )
 parser.add_argument(
    '-s',
    '--suffix',
    type=str,
    help='Destination CSV file suffix. If destination file is ommited, this suffix is added to the source file name',
 )
 parser.add_argument(
    '-d',
    '--delimiter',
    type=str,
    help='Source CSV column delimiter.',
 )
 parser.add_argument(
    '--verbose',
    dest='verbose',
    action='store_true',
    help='Verbose mode.',
 )

 parser.set_defaults(
    suffix='-parsed',
    delimiter=';',
    verbose=False,
 )
 args = parser.parse_args()
 verbose = bool(args.verbose)

 source_path = Path(args.source).expanduser()
 if verbose: print(f'source_path: "{source_path}"')

 destination_path = ''
 if not bool(args.destination):
    # Polyfill with_stem (new in Python 3.9)
    def with_stem(p, stem):
        ext = p.suffix
        return p.with_name(f'{stem}{ext}')
    
    if verbose: print(f'suffix: "{args.suffix}"')
    destination_path = with_stem(source_path, source_path.stem + args.suffix)
 else:
    destination_path = Path(args.destination).expanduser()
 if verbose: print(f'destination_path: "{destination_path}"')

 delimiter = args.delimiter
 if verbose: print(f'delimiter: "{delimiter}"')

 INPUT_HEADER_REGEX = {
    'completed':    r'^Completed Date$',
    'reference':    r'^Reference$',
    'debit':        r'^Paid Out (.*)$',
    'credit':       r'^Paid In (.*)$',
    'rateOut':      r'^Exchange Out$',
    'rateIn':       r'^Exchange In$',
    'balance':      r'^ ?Balance (.*)$',
    'rate':         r'^Exchange Rate$',
    'category':     r'^Category$',
 }
 OPTIONAL_HEADER_KEYS = [
    'rate'
 ]

 DATE_KEY = 'Date'
 DATE_INPUT_FORMAT_2020 = 'DD MMM YYYY'
 DATE_INPUT_FORMAT_2019 = 'YYYY MMMM D'
 DATE_OUTPUT_FORMAT = 'DD/MM/YYYY'
 AMOUNT_KEY = 'Amount'
 CURRENCY_KEY = 'Currency'
 BALANCE_KEY = 'Balance'
 CATEGORY_KEY = 'Category'
 REMOVE_CATEGORIES = {'general', 'transfers'}
 ROW_KEY_MAP = {
    DATE_KEY: 'completed',
    'Description': 'reference',
    AMOUNT_KEY: '', # Extra logic applied
    CURRENCY_KEY: '', # Extra logic applied
    BALANCE_KEY: 'balance',
    CATEGORY_KEY: 'category'
 }
 OUTPUT_HEADERS = list(ROW_KEY_MAP.keys())
 CURRENCY_REGEX = re.compile(r'.*\((.*)\)')
 CURRENCY_SOURCE_KEY = 'debit'
 input_indexes = {}
 currency = ''

 with open(source_path, newline='') as source:
    reader = csv.reader(source, delimiter=delimiter, quotechar='"')
    destination_path.touch()
    with open(destination_path, 'w', newline='') as destination:
        writer = csv.writer(destination, delimiter=',', quotechar='"')
        if verbose: print('begin')
        for i, row in enumerate(reader):
            if i == 0:
                for header_key, header_re in INPUT_HEADER_REGEX.items():
                    found = False
                    for j, header in enumerate(row):
                        if re.match(header_re, header):
                            input_indexes[header_key] = j
                            if verbose: print(f'header "{header_key}" column: {j}')
                            found = True
                            break
                    if not found:
                        if header_key in OPTIONAL_HEADER_KEYS:
                            if verbose: print(f'optional header "{header_key}" column not found')
                        else:
                            raise f'Header key "{header_key}" not found'
                if len(input_indexes) < len(row):
                    print(f'Warning: Ignoring extra columns')

                # map currency
                csource = row[input_indexes[CURRENCY_SOURCE_KEY]]
                cmatch = CURRENCY_REGEX.match(csource)
                if cmatch is not None:
                    currency = cmatch.group(1) or ''

                writer.writerow(OUTPUT_HEADERS)
                continue

            out_dict = {}
            for key, i in ROW_KEY_MAP.items():
                if not bool(i):
                    val = ''
                else:
                    val = row[input_indexes[i]]
                out_dict[key] = val

            # map date
            try:
                # 2020 format
                date = arrow.get(out_dict[DATE_KEY], DATE_INPUT_FORMAT_2020)
            except Exception:
                # Try pre 2020 format
                try:
                    date = arrow.get(out_dict[DATE_KEY], DATE_INPUT_FORMAT_2019)
                except Exception:
                    # Try to prepend with current year
                    current_year = arrow.now().year
                    date_with_year = f'{current_year} {out_dict[DATE_KEY]}'
                    date = arrow.get(date_with_year, DATE_INPUT_FORMAT_2019)
            out_dict[DATE_KEY] = date.format(DATE_OUTPUT_FORMAT)

            # map amount
            debit = float(str(row[input_indexes['debit']]).replace(',', '') or 0)
            credit = float(str(row[input_indexes['credit']]).replace(',', '') or 0)
            amount = credit - debit
            out_dict[AMOUNT_KEY] = amount

            # map currency
            out_dict[CURRENCY_KEY] = currency

            # map category
            category = out_dict[CATEGORY_KEY]
            if category in REMOVE_CATEGORIES:
                out_dict[CATEGORY_KEY] = ''

            # map balance
            out_dict[BALANCE_KEY] = float(out_dict[BALANCE_KEY].replace(',', '') or 0)

            outrow = [out_dict[k] for k in OUTPUT_HEADERS]
            writer.writerow(outrow)
            if verbose: print(outrow)
        if verbose: print('end')
	#!/bin/python3

	# Converts Revolut CSV format into a CSV table
	# format which is easier for automated processing.
	#
	# Removes the following categories automatically:
	# general, transfers
	#
	# Usage:
	#
	# 1. Install dependencies. In terminal: python3 -m pip install arrow
	# 1. Save file as revolut-csv.py and open terminal in folder location.
	# 2. In termial: python3 revolut-csv.py -i <path to source CSV>
	# 3. For more info, run: python3 revolut-csv.py --help
	#
	# Example Output:
	#
	# Date,Description,Amount,Currency,Balance,Category
	# 12/09/2018,To TransferWise CFSB,-25.0,USD,0.00,
	#
	# Works with Revolut CSV formats from 2018 to current (as at 2020).
	#
	# Author: Pavel Diatchenko <diatche@gmail.com>
	# Licence: MIT

	import csv
	import re
	import arrow
	import argparse
	from pathlib import Path

	parser = argparse.ArgumentParser(
	description=(
	'Converts Revolut CSV format into a CSV table '
	'format which is easier for automated processing.\n\n'
	'Works with Revolut CSV format as at 17 Apr 2019.\n\n'
	'Author: Pavel Diatchenko <diatche@gmail.com>\n'
	'Licence: MIT'
	)
	)
	parser.add_argument(
	'-i',
	'--source',
	type=str,
	required=True,
	help='Source CSV file path.',
	)
	parser.add_argument(
	'-o',
	'--destination',
	type=str,
	help='Destination CSV file path.',
	)
	parser.add_argument(
	'-s',
	'--suffix',
	type=str,
	help='Destination CSV file suffix. If destination file is ommited, this suffix is added to the source file name',
	)
	parser.add_argument(
	'-d',
	'--delimiter',
	type=str,
	help='Source CSV column delimiter.',
	)
	parser.add_argument(
	'--verbose',
	dest='verbose',
	action='store_true',
	help='Verbose mode.',
	)

	parser.set_defaults(
	suffix='-parsed',
	delimiter=';',
	verbose=False,
	)
	args = parser.parse_args()
	verbose = bool(args.verbose)

	source_path = Path(args.source).expanduser()
	if verbose: print(f'source_path: "{source_path}"')

	destination_path = ''
	if not bool(args.destination):
	# Polyfill with_stem (new in Python 3.9)
	def with_stem(p, stem):
	ext = p.suffix
	return p.with_name(f'{stem}{ext}')

	if verbose: print(f'suffix: "{args.suffix}"')
	destination_path = with_stem(source_path, source_path.stem + args.suffix)
	else:
	destination_path = Path(args.destination).expanduser()
	if verbose: print(f'destination_path: "{destination_path}"')

	delimiter = args.delimiter
	if verbose: print(f'delimiter: "{delimiter}"')

	INPUT_HEADER_REGEX = {
	'completed': r'^Completed Date$',
	'reference': r'^Reference$',
	'debit': r'^Paid Out (.*)$',
	'credit': r'^Paid In (.*)$',
	'rateOut': r'^Exchange Out$',
	'rateIn': r'^Exchange In$',
	'balance': r'^ ?Balance (.*)$',
	'rate': r'^Exchange Rate$',
	'category': r'^Category$',
	}
	OPTIONAL_HEADER_KEYS = [
	'rate'
	]

	DATE_KEY = 'Date'
	DATE_INPUT_FORMAT_2020 = 'DD MMM YYYY'
	DATE_INPUT_FORMAT_2019 = 'YYYY MMMM D'
	DATE_OUTPUT_FORMAT = 'DD/MM/YYYY'
	AMOUNT_KEY = 'Amount'
	CURRENCY_KEY = 'Currency'
	BALANCE_KEY = 'Balance'
	CATEGORY_KEY = 'Category'
	REMOVE_CATEGORIES = {'general', 'transfers'}
	ROW_KEY_MAP = {
	DATE_KEY: 'completed',
	'Description': 'reference',
	AMOUNT_KEY: '', # Extra logic applied
	CURRENCY_KEY: '', # Extra logic applied
	BALANCE_KEY: 'balance',
	CATEGORY_KEY: 'category'
	}
	OUTPUT_HEADERS = list(ROW_KEY_MAP.keys())
	CURRENCY_REGEX = re.compile(r'.\((.)\)')
	CURRENCY_SOURCE_KEY = 'debit'
	input_indexes = {}
	currency = ''

	with open(source_path, newline='') as source:
	reader = csv.reader(source, delimiter=delimiter, quotechar='"')
	destination_path.touch()
	with open(destination_path, 'w', newline='') as destination:
	writer = csv.writer(destination, delimiter=',', quotechar='"')
	if verbose: print('begin')
	for i, row in enumerate(reader):
	if i == 0:
	for header_key, header_re in INPUT_HEADER_REGEX.items():
	found = False
	for j, header in enumerate(row):
	if re.match(header_re, header):
	input_indexes[header_key] = j
	if verbose: print(f'header "{header_key}" column: {j}')
	found = True
	break
	if not found:
	if header_key in OPTIONAL_HEADER_KEYS:
	if verbose: print(f'optional header "{header_key}" column not found')
	else:
	raise f'Header key "{header_key}" not found'
	if len(input_indexes) < len(row):
	print(f'Warning: Ignoring extra columns')

	# map currency
	csource = row[input_indexes[CURRENCY_SOURCE_KEY]]
	cmatch = CURRENCY_REGEX.match(csource)
	if cmatch is not None:
	currency = cmatch.group(1) or ''

	writer.writerow(OUTPUT_HEADERS)
	continue

	out_dict = {}
	for key, i in ROW_KEY_MAP.items():
	if not bool(i):
	val = ''
	else:
	val = row[input_indexes[i]]
	out_dict[key] = val

	# map date
	try:
	# 2020 format
	date = arrow.get(out_dict[DATE_KEY], DATE_INPUT_FORMAT_2020)
	except Exception:
	# Try pre 2020 format
	try:
	date = arrow.get(out_dict[DATE_KEY], DATE_INPUT_FORMAT_2019)
	except Exception:
	# Try to prepend with current year
	current_year = arrow.now().year
	date_with_year = f'{current_year} {out_dict[DATE_KEY]}'
	date = arrow.get(date_with_year, DATE_INPUT_FORMAT_2019)
	out_dict[DATE_KEY] = date.format(DATE_OUTPUT_FORMAT)

	# map amount
	debit = float(str(row[input_indexes['debit']]).replace(',', '') or 0)
	credit = float(str(row[input_indexes['credit']]).replace(',', '') or 0)
	amount = credit - debit
	out_dict[AMOUNT_KEY] = amount

	# map currency
	out_dict[CURRENCY_KEY] = currency

	# map category
	category = out_dict[CATEGORY_KEY]
	if category in REMOVE_CATEGORIES:
	out_dict[CATEGORY_KEY] = ''

	# map balance
	out_dict[BALANCE_KEY] = float(out_dict[BALANCE_KEY].replace(',', '') or 0)

	outrow = [out_dict[k] for k in OUTPUT_HEADERS]
	writer.writerow(outrow)
	if verbose: print(outrow)
	if verbose: print('end')