Skip to content

Instantly share code, notes, and snippets.

@doryokujin
Created December 6, 2018 01:32
Show Gist options
  • Save doryokujin/b06e4390156402eff4e07fb1498a4b57 to your computer and use it in GitHub Desktop.
Save doryokujin/b06e4390156402eff4e07fb1498a4b57 to your computer and use it in GitHub Desktop.
『Treasure Data でアクセスログ分析の限界に挑む』その④ 〜アクセスに基づいたユーザーセグメントの作成 月次編(時間帯)〜 ref: https://qiita.com/doryokujin/items/750e3f322c2bc00371ac
/* TD_SCHEDULED_TIME() = '2018-11-23 11:11:00' */
/*
SELECT target_month, segment, COUNT(1) AS cnt
FROM
(
*/
SELECT target_month,
CASE
WHEN 50<=ratio_freq_sleeping OR 50<=ratio_pv_sleeping THEN 'sleeping'
WHEN 50<=ratio_freq_morning OR 50<=ratio_pv_morning THEN 'morning'
WHEN 50<=ratio_freq_working OR 50<=ratio_pv_working THEN 'working'
WHEN 50<=ratio_freq_night OR 50<=ratio_pv_night THEN 'night'
WHEN
(30<=ratio_freq_sleeping OR 30<=ratio_pv_sleeping) AND (30<=ratio_freq_morning OR 30<=ratio_pv_morning)
THEN 'sleeping,morning'
WHEN
(30<=ratio_freq_sleeping OR 30<=ratio_pv_sleeping) AND (30<=ratio_freq_working OR 30<=ratio_pv_working)
THEN 'sleeping,working'
WHEN
(30<=ratio_freq_sleeping OR 30<=ratio_pv_sleeping) AND (30<=ratio_freq_night OR 30<=ratio_pv_night)
THEN 'sleeping,night'
WHEN
(30<=ratio_freq_morning OR 30<=ratio_pv_morning) AND (30<=ratio_freq_working OR 30<=ratio_pv_working)
THEN 'morning,working'
WHEN
(30<=ratio_freq_morning OR 30<=ratio_pv_morning) AND (30<=ratio_freq_night OR 30<=ratio_pv_night)
THEN 'morning,night'
WHEN
(30<=ratio_freq_working OR 30<=ratio_pv_working) AND (30<=ratio_freq_night OR 30<=ratio_pv_night)
THEN 'working,night'
WHEN freq IS NULL THEN 'non_active'
ELSE 'alltime'
END AS segment,
freq, freq_sleeping, freq_morning, freq_working, freq_night,
ratio_freq_sleeping, ratio_freq_morning, ratio_freq_working, ratio_freq_night,
pv, pv_sleeping, pv_morning, pv_working, pv_night,
ratio_pv_sleeping, ratio_pv_morning, ratio_pv_working, ratio_pv_night
FROM
(
SELECT
TD_TIME_FORMAT(TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST'),'yyyy-MM-dd','JST') AS target_month,
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id ) AS td_client_id,
freq, freq_sleeping, freq_morning, freq_working, freq_night,
ROUND(1.0*freq_sleeping/freq,2)*100 AS ratio_freq_sleeping,
ROUND(1.0*freq_morning /freq,2)*100 AS ratio_freq_morning,
ROUND(1.0*freq_working /freq,2)*100 AS ratio_freq_working,
ROUND(1.0*freq_night /freq,2)*100 AS ratio_freq_night,
pv, pv_sleeping, pv_morning, pv_working, pv_night,
ROUND(1.0*pv_sleeping/pv,2)*100 AS ratio_pv_sleeping,
ROUND(1.0*pv_morning /pv,2)*100 AS ratio_pv_morning,
ROUND(1.0*pv_working /pv,2)*100 AS ratio_pv_working,
ROUND(1.0*pv_night /pv,2)*100 AS ratio_pv_night,
min_time, max_time
FROM
(
SELECT
td_client_id,
SUM(IF(hour IN ('00','01','02','03','04','05'), 1, 0)) AS freq_sleeping,
SUM(IF(hour IN ('06','07','08','09'), 1, 0)) AS freq_morning,
SUM(IF(hour IN ('10','11','12','13','14','15','16','17'), 1, 0)) AS freq_working,
SUM(IF(hour IN ('18','19','20','21','22','23'), 1, 0)) AS freq_night,
COUNT(1) AS freq,
SUM(IF(hour IN ('00','01','02','03','04','05'), pv, 0)) AS pv_sleeping,
SUM(IF(hour IN ('06','07','08','09'), pv, 0)) AS pv_morning,
SUM(IF(hour IN ('10','11','12','13','14','15','16','17'), pv, 0)) AS pv_working,
SUM(IF(hour IN ('18','19','20','21','22','23'), pv, 0)) AS pv_night,
SUM(pv) AS pv,
TD_TIME_FORMAT(MIN(min_time),'yyyy-MM-dd','JST') AS min_time, TD_TIME_FORMAT(MAX(max_time),'yyyy-MM-dd','JST') AS max_time
FROM
(
SELECT td_client_id, TD_DATE_TRUNC('day',time,'JST') AS access_day, TD_TIME_FORMAT(time,'HH','JST') AS hour, COUNT(1) AS pv, MIN(time) AS min_time, MAX(time) AS max_time
FROM pageviews
WHERE TD_INTERVAL(time, '-1M', 'JST')
GROUP BY td_client_id, TD_DATE_TRUNC('day',time,'JST'), TD_TIME_FORMAT(time,'HH','JST')
)
GROUP BY td_client_id
) past_month
FULL OUTER JOIN
(
SELECT td_client_id
FROM pageviews
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */
GROUP BY td_client_id
) past_month_ago
ON past_month.td_client_id = past_month_ago.td_client_id
)
/* 前月分のみのユーザーのみ取得で良いのならば past_month_ago サブクエリは不要 *//*
)
GROUP BY target_month, segment HAVING segment != 'non_active'
ORDER BY cnt DESC
*/
SELECT td_client_id, TD_DATE_TRUNC('day',time,'JST') AS access_day, TD_TIME_FORMAT(time,'HH','JST') AS hour,
COUNT(1) AS pv, MIN(time) AS min_time, MAX(time) AS max_time
FROM pageviews
WHERE TD_INTERVAL(time, '-1M', 'JST')
GROUP BY td_client_id, TD_DATE_TRUNC('day',time,'JST'), TD_TIME_FORMAT(time,'HH','JST')
SUM(IF(hour IN ('00','01','02','03','04','05'), 1, 0)) AS freq_sleeping,
SUM(IF(hour IN ('06','07','08','09'), 1, 0)) AS freq_morning,
SUM(IF(hour IN ('10','11','12','13','14','15','16','17'), 1, 0)) AS freq_working,
SUM(IF(hour IN ('18','19','20','21','22','23'), 1, 0)) AS freq_night,
ROUND(1.0*freq_sleeping/freq,2)*100 AS ratio_freq_sleeping,
ROUND(1.0*freq_morning /freq,2)*100 AS ratio_freq_morning,
ROUND(1.0*freq_working /freq,2)*100 AS ratio_freq_working,
ROUND(1.0*freq_night /freq,2)*100 AS ratio_freq_night,
FULL OUTER JOIN
(
SELECT td_client_id
FROM pageviews
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */
GROUP BY td_client_id
) past_month_ago
ON past_month.td_client_id = past_month_ago.td_client_id
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id ) AS td_client_id,
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment