Last active
May 19, 2016 13:38
-
-
Save amontalenti/8b6e33f35353a9c5fd47625dcd94d38e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT title, views FROM [test.parselyblog] | |
-- utilities | |
-- STRFTIME_UTC_USEC(pub_date, "%Y-%m") | |
-- SUM(views)a | |
-- COUNT(distinct url) | |
-- find dupes | |
-- ---------- | |
SELECT title, COUNT(1) as num_urls | |
FROM [test.parselyblog] | |
GROUP BY title | |
ORDER BY num_urls DESC | |
LIMIT 100 | |
-- calculate aggs | |
-- -------------- | |
-- total views by author | |
SELECT author, SUM(views) as author_total_views | |
FROM [test.parselyblog] | |
GROUP BY author | |
-- total views by section | |
SELECT section, SUM(views) as section_total_views | |
FROM [test.parselyblog] | |
GROUP BY section | |
-- views by month over time | |
SELECT STRFTIME_UTC_USEC(pub_date, "%Y-%m") as month, SUM(views) as month_total_views | |
FROM [test.parselyblog] | |
GROUP BY month | |
ORDER BY month DESC | |
-- using subselects to do author, author # posts, and author views per post | |
SELECT author, author_posts, CAST(author_views / author_posts as INTEGER) as author_avg_views | |
FROM ( | |
SELECT author, SUM(views) as author_views, COUNT(1) as author_posts | |
FROM [test.parselyblog] | |
GROUP BY author) | |
-- using subselects to do author, author # posts, author # visitors, and total "man-days" spent on content! | |
SELECT author, author_posts, author_visitors, CAST(author_time / 60 / 24 as INTEGER) as author_man_days | |
FROM ( | |
SELECT author, SUM(visitors) as author_visitors, SUM(minutes) as author_time, COUNT(1) as author_posts | |
FROM [test.parselyblog] | |
GROUP BY author) | |
-- use subselects together with keyword match and filtering | |
SELECT has_keyword, total_views, total_posts, CAST(total_views / total_posts as INTEGER) as avg_views | |
FROM ( | |
SELECT LOWER(title) CONTAINS "facebook" AS has_keyword, SUM(views) as total_views, COUNT(1) as total_posts | |
FROM [test.parselyblog] | |
WHERE author <> "Andrew Montalenti" | |
GROUP BY has_keyword | |
) | |
/* BigQuery schema: | |
url:STRING, | |
title:STRING, | |
pub_date:TIMESTAMP, | |
author:STRING, | |
section:STRING, | |
tags:STRING, | |
sort_metric:INTEGER, | |
visitors:INTEGER, | |
views:INTEGER, | |
minutes:INTEGER, | |
shares:INTEGER, | |
views_desktop:INTEGER, | |
views_mobile:INTEGER, | |
views_tablet:INTEGER, | |
refs_social:INTEGER, | |
refs_search:INTEGER, | |
refs_internal:INTEGER, | |
refs_other:INTEGER, | |
refs_direct:INTEGER, | |
visitors_new:INTEGER, | |
visitors_returning:INTEGER, | |
views_new:INTEGER, | |
views_returning:INTEGER, | |
minutes_new:INTEGER, | |
minutes_returning:INTEGER | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment