Skip to content

Instantly share code, notes, and snippets.

@benhannel
Created July 31, 2019 22:36
Show Gist options
  • Save benhannel/ee3d4b4c67b293032363a3553f8a7af0 to your computer and use it in GitHub Desktop.
Save benhannel/ee3d4b4c67b293032363a3553f8a7af0 to your computer and use it in GitHub Desktop.
Using SQL to join the nested JSON data with CSV data
%%time
%%sql
WITH
-- # compute application statistics, MAU and percent change in MAU.
appStats AS
(
SELECT
rows.r[2][1]."name" AS app,
rows.r[2][1]."company_name" AS company,
rows.r[4][1] AS mau,
rows.r[4][4] AS mau_percent_change
FROM
app_annie_monthly a,
unnest(a."data"."table"."rows" AS r) AS rows
WHERE
a._meta.s3.path LIKE 'app\_annie/monthly/2018-05/01/data/all\_users\_top\_usage\_US\_iphone\_100\_%'
),
-- # Get list of crunchbase orgs to join with.
crunchbaseOrgs AS
(
SELECT
founded_on AS founded_on,
uuid AS company_uuid,
short_description AS short_description,
company_name as company_name
FROM
"crunchbase_organizations"
),
-- # Get the JOINED relation from the above steps.
appStatsWithCrunchbaseOrgs as
(
SELECT
appStats.app as App,
appStats.mau as mau,
appStats.mau_percent_change as mau_percent_change,
crunchbaseOrgs.company_uuid as company_uuid,
crunchbaseOrgs.company_name as company_name,
crunchbaseOrgs.founded_on as founded_on,
crunchbaseOrgs.short_description as short_description
FROM
appStats
INNER JOIN
crunchbaseOrgs
ON appStats.company = crunchbaseOrgs.company_name
),
-- # Compute companyStatus = (IPO|ACQUIRED|CLOSED|OPERATING)
-- # There may be more than one status associated with a company, so, we do the Group By and Min.
companyStatus as
(
SELECT
company_name,
min(
case
status
when
'ipo'
then
1
when
'acquired'
then
2
when
'closed'
then
3
when
'operating'
then
4
end
) as status
FROM
"crunchbase_organizations"
GROUP BY
company_name
),
-- # JOIN with companyStatus == (OPERATING), call it ventureFunded
ventureFunded as (SELECT
appStatsWithCrunchbaseOrgs.App,
appStatsWithCrunchbaseOrgs.company_name,
appStatsWithCrunchbaseOrgs.mau_percent_change,
appStatsWithCrunchbaseOrgs.mau,
appStatsWithCrunchbaseOrgs.company_uuid,
appStatsWithCrunchbaseOrgs.founded_on,
appStatsWithCrunchbaseOrgs.short_description
FROM
appStatsWithCrunchbaseOrgs
INNER JOIN
companyStatus
ON appStatsWithCrunchbaseOrgs.company_name = companyStatus.company_name
AND companyStatus.status = 4),
-- # Find the latest round that each company raised, grouped by company UUID
latestRound AS
(
SELECT
company_uuid as cuid,
max(announced_on) as announced_on,
max(raised_amount_usd) as raised_amount_usd
FROM
"crunchbase_funding_rounds"
GROUP BY
company_uuid
),
-- # Join it back with crunchbase_funding_rounds to get other details about that company
fundingRounds AS
(
SELECT
cfr.company_uuid as company_uuid,
cfr.announced_on as announced_on,
cfr.funding_round_uuid as funding_round_uuid,
cfr.company_name as company_name,
cfr.investment_type as investment_type,
cfr.raised_amount_usd as raised_amount_usd,
cfr.country_code as country_code,
cfr.state_code as state_code,
cfr.investor_names as investor_names
FROM
"crunchbase_funding_rounds" cfr
JOIN
latestRound
ON latestRound.company_uuid = cfr.company_uuid
AND latestRound.announced_on = cfr.announced_on
),
-- # Finally, select the dataset with all the fields that are interesting to us. ventureFundedAllRegions
ventureFundedAllRegions AS (
SELECT
ventureFunded.App as App,
ventureFunded.company_name as company_name,
ventureFunded.mau as mau,
ventureFunded.mau_percent_change as mau_percent_change,
ventureFunded.short_description as short_description,
fundingRounds.announced_on as last_funding,
fundingRounds.raised_amount_usd as raised_amount_usd,
fundingRounds.country_code as country_code,
fundingRounds.state_code as state_code,
fundingRounds.investor_names as investor_names,
fundingRounds.investment_type as investment_type
FROM
ventureFunded
JOIN
fundingRounds
ON fundingRounds.company_uuid = ventureFunded.company_uuid)
SELECT * FROM ventureFundedAllRegions
ORDER BY
mau_percent_change DESC LIMIT 10
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment