Last active
May 23, 2024 15:53
-
-
Save markrittman/5fa843ab279743771d71eab12b2d854c to your computer and use it in GitHub Desktop.
First-Click, Last-Click, Even-Click and Time Decay Attribution for GA4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
First-Click, Last-Click, Even-Click and Time Decay Attribution for GA4 | |
Replace "event_name in ('Contact Us','Contact Us Clicked','CTA Pressed','Form Submitted')" with the name(s) of your conversion event in lines 54, 57, 61 and 76 | |
*/ | |
WITH | |
events AS ( | |
SELECT | |
event_timestamp as event_ts, | |
user_pseudo_id AS user_pseudo_id, | |
user_id, | |
traffic_source.name as channel, | |
traffic_source.medium as medium, | |
traffic_source.source as source, | |
event_name, | |
(SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS session_id, | |
(SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_number') AS session_number, | |
(SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_referrer') AS referrer, | |
(SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location') AS page_path, | |
(SELECT value.string_value FROM UNNEST(event_params) WHERE event_name = 'page_view' AND key = 'page_title') AS page_title, | |
device.category as device_category, | |
device.operating_system, | |
device.language, | |
device.is_limited_ad_tracking, | |
device.web_info.browser, | |
device.web_info.hostname, | |
geo.continent, | |
geo.country, | |
geo.region, | |
geo.city | |
FROM | |
`ra-development.analytics_277223877.events_*` -- modify to your project | |
), | |
id_stitching as ( | |
SELECT | |
DISTINCT user_pseudo_id as user_pseudo_id, | |
LAST_VALUE(user_id ignore nulls) OVER ( | |
partition by user_pseudo_id | |
order by event_ts | |
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING | |
) AS user_id, | |
MIN(event_ts) OVER ( | |
PARTITION BY user_pseudo_id | |
) AS first_seen_at, | |
MAX(event_ts) OVER ( | |
PARTITION BY user_pseudo_id | |
) AS last_seen_at | |
FROM | |
events), | |
sessions AS ( | |
SELECT | |
* | |
FROM | |
events | |
WHERE | |
event_name = 'session_start' ), | |
user_stitched_sessions as ( | |
SELECT | |
sessions.*, | |
coalesce(id_stitching.user_id, sessions.user_pseudo_id) as blended_user_id | |
FROM sessions | |
LEFT JOIN id_stitching using (user_pseudo_id) | |
), | |
sessions_with_start_end_times AS ( | |
SELECT | |
* EXCEPT (event_ts), | |
TIMESTAMP_MICROS(event_ts) AS session_start_ts, | |
CAST(LEAD(TIMESTAMP_MICROS(event_ts),1) OVER (PARTITION BY CONCAT(blended_user_id) | |
ORDER BY | |
event_ts) AS timestamp) AS session_end_ts | |
FROM | |
user_stitched_sessions ), | |
converting_events AS ( | |
SELECT | |
coalesce(id_stitching.user_id, events.user_pseudo_id) as blended_user_id, | |
FIRST_VALUE(CASE WHEN event_name in ('Contact Us','Contact Us Clicked','CTA Pressed','Form Submitted') THEN session_id END) OVER (PARTITION BY coalesce(id_stitching.user_id, events.user_pseudo_id) ORDER BY event_ts ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as session_id, | |
1 AS count_conversions, | |
event_name, | |
CAST(TIMESTAMP_MICROS(MIN(CASE WHEN event_name in ('Contact Us','Contact Us Clicked','CTA Pressed','Form Submitted') THEN event_ts END ) OVER (PARTITION BY coalesce(id_stitching.user_id, events.user_pseudo_id))) as timestamp) AS converted_ts, | |
FROM | |
events | |
LEFT JOIN id_stitching using (user_pseudo_id) | |
WHERE | |
event_name in ('Contact Us','Contact Us Clicked','CTA Pressed','Form Submitted')), | |
converting_sessions AS ( | |
SELECT | |
* | |
FROM | |
converting_events | |
GROUP BY | |
1, | |
2, | |
3, | |
4, | |
5 ), | |
converting_sessions_deduped AS ( | |
SELECT | |
blended_user_id AS blended_user_id, | |
MAX(CASE | |
WHEN event_name in ('Contact Us','Contact Us Clicked','CTA Pressed','Form Submitted') THEN session_id | |
END | |
) AS session_id, | |
MIN(converted_ts) AS converted_ts | |
FROM | |
converting_sessions | |
GROUP BY | |
1 ), | |
converting_sessions_deduped_labelled AS ( | |
SELECT | |
c.blended_user_id, | |
s.session_start_ts, | |
s.session_end_ts, | |
c.converted_ts, | |
s.session_id AS session_id, | |
s.session_number AS session_seq, | |
CASE | |
WHEN (c.converted_ts BETWEEN s.session_start_ts AND coalesce(s.session_end_ts, current_timestamp)) THEN TRUE | |
ELSE | |
FALSE | |
END | |
AS conversion_session, | |
CASE | |
WHEN (c.converted_ts BETWEEN s.session_start_ts AND coalesce(s.session_end_ts, current_timestamp)) THEN 1 | |
ELSE | |
0 | |
END | |
AS event, | |
source, | |
medium, | |
channel, | |
referrer, | |
page_path, | |
device_category, | |
operating_system, | |
LANGUAGE, | |
is_limited_ad_tracking, | |
browser, | |
hostname, | |
continent, | |
country, | |
region, | |
city | |
FROM | |
sessions_with_start_end_times s | |
JOIN | |
converting_sessions_deduped c | |
ON | |
c.blended_user_id = s.blended_user_id | |
WHERE | |
c.converted_ts >= s.session_start_ts | |
ORDER BY | |
c.blended_user_id, | |
s.session_start_ts), | |
session_attrib_pct AS ( | |
SELECT | |
*, | |
CASE | |
WHEN session_id = LAST_VALUE(session_id) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) THEN 1 | |
ELSE | |
0 | |
END | |
AS LAST_click_attrib_pct, | |
CASE | |
WHEN session_id = FIRST_VALUE(session_id) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) THEN 1 | |
ELSE | |
0 | |
END | |
AS first_click_attrib_pct, | |
1/COUNT(session_id) OVER (PARTITION BY blended_user_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS even_click_attrib_pct, | |
CASE | |
WHEN session_start_ts = FIRST_VALUE(session_start_ts) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AND MAX(event) OVER (PARTITION BY blended_user_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) = 1 THEN 1.1-ROW_NUMBER() OVER (PARTITION BY blended_user_id) | |
WHEN session_start_ts > LAG(session_start_ts) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts) | |
AND MAX(event) OVER (PARTITION BY blended_user_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) = 1 THEN ROUND(1.1-1/ROW_NUMBER() OVER (PARTITION BY blended_user_id), 2) | |
ELSE | |
NULL | |
END | |
AS weights | |
FROM | |
converting_sessions_deduped_labelled), | |
session_attrib_pct_with_time_decay AS ( | |
SELECT | |
*, | |
ROUND(CASE | |
WHEN (weights=0 OR SUM(weights) OVER (PARTITION BY blended_user_id)=0) THEN 0 | |
ELSE | |
weights/SUM(weights) OVER (PARTITION BY blended_user_id) | |
END | |
, 2) AS time_decay_attrib_pct | |
FROM | |
session_attrib_pct) | |
SELECT | |
* EXCEPT (event, | |
weights, | |
converted_ts) | |
FROM | |
session_attrib_pct_with_time_decay | |
ORDER BY | |
blended_user_id, | |
session_start_ts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment