Skip to content

Instantly share code, notes, and snippets.

@dzakyputra
Created June 17, 2020 00:13
Show Gist options
  • Save dzakyputra/03e005e3a675e18ff2c06105f868c5dd to your computer and use it in GitHub Desktop.
Save dzakyputra/03e005e3a675e18ff2c06105f868c5dd to your computer and use it in GitHub Desktop.
# Reference
offer_reference = {}
offer_duration_reference = {}
for i,j in zip(portfolio['id'], portfolio['offer_type']):
offer_reference[i] = j
for i,j in zip(portfolio['id'], portfolio['duration']):
offer_duration_reference[i] = j*24
# Set the list to store all the data
full_data = []
# Iterate through each person
for person in tqdm(list(transcript['person'].unique())):
not_completed = {}
received = []
active = []
total_data = {}
information = []
# Iterate through each person activity
for index, row in transcript[transcript['person'] == person].iterrows():
if row['event'] == 'offer received':
# Everytime there is an offer received, do this
received.append(row['offer_id'])
key = row['offer_id'] + '-' + str(received.count(row['offer_id']))
not_completed[key] = row['time']
total_data[key] = [row['person'], row['offer_id'], 0, 0, 0, 0]
if row['event'] == 'offer viewed':
# If the customers have seen the informational offer
if offer_reference[row['offer_id']] == 'informational':
information.append(row['offer_id'])
# Everytime the offer is viewed, do this
active = list(filter(lambda x: x.split('-')[0] == row['offer_id'], list(not_completed.keys())))
# If there is only one offer_id active
if len(active) == 1:
# Only change the value if the offer is not completed yet
if active[0] in not_completed:
total_data[active[0]][2] = 1
# If there are more than one offer_id active
else:
for offer_id in active:
if (row['time'] - not_completed[offer_id]) < offer_duration_reference[row['offer_id']]:
if total_data[offer_id][2] == 1:
continue
total_data[offer_id][2] = 1
break
if row['event'] == 'offer completed':
# If the users completed the offer and have seen the informational offer
info = False
if len(information) > 0:
info = True
# Everytime the offer is completed, do this
active = list(filter(lambda x: x.split('-')[0] == row['offer_id'], list(not_completed.keys())))
# If there is only one offer_id active
if len(active) == 1:
total_data[active[0]][3] = 1
total_data[active[0]][5] = row['time'] - not_completed[active[0]]
not_completed.pop(active[0])
if info:
total_data[active[0]][4] = 1
continue
# If there is more that one offer_id active
else:
for offer_id in active:
if (row['time'] - not_completed[offer_id]) < offer_duration_reference[row['offer_id']]:
total_data[offer_id][3] = 1
total_data[offer_id][5] = row['time'] - not_completed[offer_id]
not_completed.pop(offer_id)
if info:
total_data[offer_id][4] = 1
break
for index, value in total_data.items():
full_data += [value]
# Create a dataframe based on the compile result
compiled_data = pd.DataFrame(full_data, columns=['person', 'offer_id', 'viewed', 'completed', 'view_information', 'time_completed'])
# Merge with the portfolio and profile dataframe
compiled_data_merged = compiled_data.merge(portfolio, left_on='offer_id', right_on='id').drop(columns=['id'])
complete_data = compiled_data_merged.merge(profile, left_on='person', right_on='id').drop(columns=['id'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment