Last active
October 9, 2021 01:48
-
-
Save oberrich/c2bcac8d07a9e55bf812aca2091226ce to your computer and use it in GitHub Desktop.
Extract Bitcoin transactions with historical prices and fix up time zones in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import datetime as dt | |
import csv, glob, os | |
from currency_converter import CurrencyConverter | |
pd.set_option('precision', 9) | |
filename_tpl = 'output/temp/'+str(int(dt.datetime.utcnow().timestamp()))+'_{}.csv' | |
currency = CurrencyConverter('http://www.ecb.int/stats/eurofxref/eurofxref-hist.zip', fallback_on_missing_rate=True) | |
def convert_usd_eur(row, rate_only=False): | |
if rate_only: | |
return currency.convert(1.0, 'USD', 'EUR', date=row.name) | |
return currency.convert(row['HL_AVG'], 'USD', 'EUR', date=row.name) | |
def convert_btc_eur(row, btc_eur_df, rate_only=False): | |
btc_eur_at_hour = btc_eur_df.loc[dt.datetime(row.name.year, row.name.month, row.name.day, row.name.hour)]['BTC_EUR'] | |
if rate_only: | |
return btc_eur_at_hour | |
return row['Amount'] * btc_eur_at_hour | |
def parse_coinbase_date(d): | |
return pd.to_datetime(d, format="%Y-%m-%d %I-%p") | |
def parse_paypal_date(d): | |
return pd.to_datetime(d, format="%d/%m/%Y %H:%M:%S") | |
btc_eur_dict = { | |
'Date': 'Datum', | |
'Open': 'Eröffnung (USD)', | |
'Close': 'Schlusskurs (USD)', | |
'High': 'Stundenhoch (USD)', | |
'Low': 'Stundentief (USD)', | |
'Volume BTC': 'Volumen (BTC)', | |
'Volume USD': 'Volumen (USD)', | |
'HL_AVG': 'Mittel aus Stundenhoch/Stundentief (USD)', | |
'USD_EUR': 'Wechselkurs USD/EUR', | |
'BTC_EUR': 'Wechselkurs BTC/EUR' | |
} | |
# read and parse coinbase historical btc/usd data | |
btc_eur_df = pd.read_csv('data/Coinbase_BTCUSD_1h.csv', usecols=['Date', 'Open', 'Close', 'Low', 'High', 'Volume BTC', 'Volume USD'], parse_dates=['Date'], date_parser=parse_coinbase_date, index_col=['Date']) | |
# localize datetime index and convert to german timezone | |
btc_eur_df.index = btc_eur_df.index.tz_localize('utc').tz_convert('Europe/Berlin') | |
btc_eur_df['HL_AVG'] = btc_eur_df[['High','Low']].mean(axis=1) | |
btc_eur_df['USD_EUR'] = btc_eur_df.apply(lambda x: convert_usd_eur(x, True), axis=1) | |
btc_eur_df['BTC_EUR'] = btc_eur_df.apply(convert_usd_eur, axis=1) | |
# translate keys to german/human readable and save to disk | |
btc_eur_df.rename(columns=btc_eur_dict).to_csv(filename_tpl.format('BTCEUR_Umrechnung'), decimal=',', encoding='utf-8-sig') | |
print(btc_eur_df.head(10)) | |
bitpay_df = pd.read_csv('data/BitPay-Export.csv', usecols=['Date', 'Destination', 'Amount', 'Currency'], parse_dates=['Date'], index_col=['Date']) | |
# localize datetime index and convert to german timezone | |
bitpay_df.index = pd.to_datetime(bitpay_df.index).tz_convert('Europe/Berlin') | |
# determine transaction type from destination column | |
bitpay_df.Destination = bitpay_df.Destination.fillna('Eingang') | |
bitpay_df.loc[(bitpay_df.Destination == 'Bitcoin Network Fees'), 'Destination'] = 'Netzwerkgebühren' | |
bitpay_df.loc[(bitpay_df.Destination != 'Netzwerkgebühren') & (bitpay_df.Destination != 'Eingang'), 'Destination'] = 'Ausgang' | |
bitpay_df['BTC_EUR'] = bitpay_df.apply(lambda r: convert_btc_eur(r, btc_eur_df, True), axis=1) | |
bitpay_df['EUR_Amount'] = bitpay_df.apply(lambda r: convert_btc_eur(r, btc_eur_df), axis=1) | |
bitpay_df.index = bitpay_df.index.rename('Datum') | |
bitpay_df.rename(columns={ | |
'Destination': 'Transaktionstyp', | |
'Amount': 'Einnahme Bitcoin (BTC)', | |
'Currency': 'Währung', | |
'BTC_EUR': 'Wechselkurs BTC/EUR', | |
'EUR_Amount': 'Einnahme €' | |
}).to_csv(filename_tpl.format('BitPay_BTC_Transaktionen'), decimal=',', encoding='utf-8-sig') | |
print(bitpay_df.head(10)) | |
paypal_tx_files = glob.glob(os.path.join('data', "PayPal*.csv")) | |
# Date,"Time","TimeZone","Name","Type","Status","Currency","Gross","Fee","Net","From Email Address","To Email Address","Transaction ID","CounterParty Status","Shipping Address","Address Status","Item Title","Item ID","Shipping and Handling Amount","Insurance Amount","Sales Tax","Option 1 Name","Option 1 Value","Option 2 Name","Option 2 Value","Auction Site","Buyer ID","Item URL","Closing Date","Escrow Id","Reference Txn ID","Invoice Number","Custom Number","Quantity","Receipt ID","Balance","Address Line 1","Address Line 2/District/Neighborhood","Town/City","State/Province/Region/County/Territory/Prefecture/Republic","Zip/Postal Code","Country","Contact Phone Number","Subject","Note","Payment Source","Card Type","Transaction Event Code","Payment Tracking ID","Bank Reference ID","Transaction Buyer Country Code","Item Details","Coupons","Special Offers","Loyalty Card Number","Authorization Review Status","Protection Eligibility","Country Code","Balance Impact","Buyer Wallet","Comment 1","Comment 2","Invoice Number","PO Number","Customer Reference Number","Payflow Transaction ID (PNREF)","Tip","Discount","Seller ID","Risk Filter" | |
paypal_dfs = (pd.read_csv(f, decimal=',', thousands='.', usecols=['Date', 'Time', 'Type', 'Status', 'Currency', 'Gross', 'Fee', 'Net', 'From Email Address', 'To Email Address', 'Balance', 'Payment Source', 'Transaction Buyer Country Code', 'State/Province/Region/County/Territory/Prefecture/Republic', 'Balance Impact', 'Transaction Event Code', 'Transaction ID', 'Reference Txn ID'], date_parser=parse_paypal_date, parse_dates=[['Date', 'Time']]) for f in paypal_tx_files) | |
paypal_df = pd.concat(paypal_dfs, ignore_index=True) | |
paypal_df = paypal_df.set_index('Date_Time') | |
# localize datetime column | |
paypal_df.index = paypal_df.index.tz_localize('Europe/Berlin') | |
paypal_df.index = paypal_df.index.rename('Datum') | |
paypal_dfr = paypal_df.rename(columns={ | |
'State/Province/Region/County/Territory/Prefecture/Republic': 'Bundesstaat', | |
'From Email Address': 'Absender', | |
'To Email Address': 'Empfänger', | |
'Transaction Buyer Country Code': 'Land', | |
'Currency': 'Währung', | |
'Type': 'Transaktionstyp' | |
}) | |
paypal_dfr.to_csv(filename_tpl.format('PayPal_Transactions'), decimal=',', encoding='utf-8-sig', float_format='%.2f') | |
print(paypal_dfr) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment