Skip to content

Instantly share code, notes, and snippets.

View dzakyputra's full-sized avatar

Dzaky Widya Putra dzakyputra

View GitHub Profile
query = """
SELECT
complete.offer_type,
complete.complete_without_view_income,
complete_view.complete_with_view_income
FROM
(SELECT
offer_type,
AVG(income) complete_without_view_income
# Find the spending distribution
spending_distribution = transcript[transcript['event'] == 'transaction'].merge(profile, left_on='person', right_on='id')
# Histogram for the Male
plt.hist(spending_distribution[spending_distribution['gender'] == 'M']['amount'],
range=(0, 40),
alpha=0.5,
bins=40,
label='Male')
# Calculate the average spending for each gender
avg_spending = transcript[transcript['event'] == 'transaction'].merge(profile, left_on='person', right_on='id') \
.groupby('gender', as_index=False)['amount'] \
.mean() \
.rename(columns={'amount': 'average_spending_per_transaction'}) \
.sort_values('average_spending_per_transaction')
avg_spending['gender'] = avg_spending['gender'].map({'F': 'Female', 'M': 'Male', 'O': 'Others'})
# Start the visualization process
query = """
SELECT
complete.offer_type,
complete.gender,
complete.complete_without_view,
complete_view.complete_with_view,
(complete.complete_without_view + complete_view.complete_with_view) total_complete
FROM
(SELECT
query = """
SELECT
complete.offer_id,
complete.offer_type,
total_completed,
total_completed_without_view,
ROUND(((1.0*total_completed_without_view) / (1.0*total_completed))*100, 2) as total_completed_without_view_ratio,
100 - ROUND(((1.0*total_completed_without_view) / (1.0*total_completed))*100, 2) as total_completed_with_view_ratio,
`loss ($)`
FROM
# Reference
offer_reference = {}
offer_duration_reference = {}
for i,j in zip(portfolio['id'], portfolio['offer_type']):
offer_reference[i] = j
for i,j in zip(portfolio['id'], portfolio['duration']):
offer_duration_reference[i] = j*24
# Profile preprocessing
# Convert the became_member_on into datetime type
profile['became_member_on'] = pd.to_datetime(profile['became_member_on'], format='%Y%m%d')
# Create a new column with the value of the difference days between the column became_member_on and the max days
profile['difference_days'] = (profile['became_member_on'].max() - profile['became_member_on']).dt.days
## Find the median and mode
## Transcript preprocessing
# Convert the dictionary value into columns and concatenate with the current dataframe
value = pd.io.json.json_normalize(transcript['value'])
transcript = pd.concat([transcript, value], axis=1).drop(columns=['value'])
# Merge the offer_id column and offer id collumn so that it only has one column
transcript['offer_id'] = np.where(pd.isnull(transcript['offer_id']), transcript['offer id'], transcript['offer_id'])
transcript.drop(columns=['offer id'], inplace=True)
# Portfolio preprocessing
values = []
# Iterate the dataframe
for index, row in portfolio.iterrows():
# Set the default value
new_value = [0,0,0,0]
from matplotlib.colors import ListedColormap
from tqdm import tqdm
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import pandasql as pdsql
import seaborn as sns
import pandas as pd
import numpy as np
import math