Skip to content

Instantly share code, notes, and snippets.

@PatrickMurphy
Last active February 9, 2018 17:46
Show Gist options
  • Save PatrickMurphy/8ddd040b65911bee2261c902fa1bc2f7 to your computer and use it in GitHub Desktop.
Save PatrickMurphy/8ddd040b65911bee2261c902fa1bc2f7 to your computer and use it in GitHub Desktop.
NFL Arrest SQL WIP
SELECT Reddit_Group_Name, count(distinct Team_name) as team_count, count(1) as arrest_count, count(1)/count(distinct Team_name) as arrest_rate FROM `ArrestDateRedditGroups`
group by Reddit_Group_Name
ORDER BY `arrest_rate` ASC;
SELECT Reddit_Group_Name, count(distinct Team_name) as team_count, count(1) as arrest_count, count(1)/count(distinct Team_name) as arrest_rate FROM `ArrestDateRedditGroups`
group by Reddit_Group_Name
ORDER BY `arrest_rate` DESC;
SELECT Reddit_Group_Name, Team_name, count(1) as arrest_count
FROM `ArrestDateRedditGroups`
group by Reddit_Group_Name, Team_name
ORDER BY `arrest_count` DESC;
SELECT Reddit_Group_Name, Team_name, count(1) as arrest_count
FROM `ArrestDateRedditGroups`
group by Reddit_Group_Name, Team_name
ORDER BY Reddit_Group_Name, `arrest_count` DESC;
SELECT Reddit_Group_Name
, count(1) as arrest_count
, count(DISTINCT Team_name) AS Team_count
, count(1)/count(DISTINCT Team_name) AS arrests_scaled
, sum(CASE WHEN `ArrestSeasonState` = 'InSeason' THEN 1 ELSE 0 END) as InSeason
, sum(CASE WHEN `ArrestSeasonState` = 'InSeason' THEN 1 ELSE 0 END)/count(DISTINCT Team_name) as InSeasonScaled
, sum(CASE WHEN `ArrestSeasonState` = 'OffSeason' THEN 1 ELSE 0 END) as OffSeason
, sum(CASE WHEN `ArrestSeasonState` = 'OffSeason' THEN 1 ELSE 0 END)/count(DISTINCT Team_name) as OffSeasonScaled
, sum(CASE WHEN `ArrestSeasonState` = 'InSeason' THEN 1 ELSE 0 END)/count(1) AS InSeasonPCT
, sum(CASE WHEN `ArrestSeasonState` = 'OffSeason' THEN 1 ELSE 0 END)/count(1) AS OffSeasonPCT
, (sum(CASE WHEN `ArrestSeasonState` = 'InSeason' THEN 1 ELSE 0 END)/count(DISTINCT Team_name))/(count(1)/count(DISTINCT Team_name)) as InSeasonPCTScaled
, (sum(CASE WHEN `ArrestSeasonState` = 'OffSeason' THEN 1 ELSE 0 END)/count(DISTINCT Team_name))/(count(1)/count(DISTINCT Team_name)) as OffSeasonPCTScaled
FROM `ArrestDateCacheRedditGroups`
group by Reddit_Group_Name
ORDER BY InSeasonPCTScaled ASC;
SELECT s.season
, superbowl_date
, superbowl_afc_team
, superbowl_nfc_team
, superbowl_winning_team
, CASE WHEN superbowl_afc_team = superbowl_winning_team THEN 'AFC' ELSE 'NFC' END AS winning_conference
, sum(CASE WHEN superbowl_afc_team = act.Team THEN 1 ELSE 0 END) AS afc_total_season_arrests
, sum(CASE WHEN superbowl_nfc_team = act.Team THEN 1 ELSE 0 END) AS nfc_total_season_arrests
, CASE
WHEN sum(CASE WHEN superbowl_afc_team = act.Team THEN 1 ELSE 0 END) > sum(CASE WHEN superbowl_nfc_team = act.Team THEN 1 ELSE 0 END) THEN 'AFC' ELSE 'NFC' END as most_arrests
, CASE WHEN CASE
WHEN sum(CASE WHEN superbowl_afc_team = act.Team THEN 1 ELSE 0 END) > sum(CASE WHEN superbowl_nfc_team = act.Team THEN 1 ELSE 0 END) THEN 'AFC' ELSE 'NFC' END = CASE WHEN superbowl_afc_team = superbowl_winning_team THEN 'AFC' ELSE 'NFC' END THEN 1 ELSE 0 END as winner_correlate
FROM `seasons` as `s`
JOIN ArrestsCacheTable as `act`
ON act.Season = s.season
JOIN ArrestsCacheTable as `act_afc`
ON act_afc.Team = s.`superbowl_afc_team`
WHERE s.season <> 2018
GROUP BY s.season
SELECT s.season
, superbowl_date
, superbowl_afc_team
, superbowl_nfc_team
, superbowl_winning_team
, CASE WHEN superbowl_afc_team = superbowl_winning_team THEN 'AFC' ELSE 'NFC' END AS winning_conference
, sum(CASE WHEN superbowl_afc_team = act.Team THEN 1 ELSE 0 END) AS afc_total_season_arrests
, sum(CASE WHEN superbowl_nfc_team = act.Team THEN 1 ELSE 0 END) AS nfc_total_season_arrests
, CASE
WHEN sum(CASE WHEN superbowl_afc_team = act.Team THEN 1 ELSE 0 END) > sum(CASE WHEN superbowl_nfc_team = act.Team THEN 1 ELSE 0 END) THEN 'AFC' ELSE 'NFC' END as most_arrests
, CASE WHEN CASE
WHEN sum(CASE WHEN superbowl_afc_team = act.Team THEN 1 ELSE 0 END) > sum(CASE WHEN superbowl_nfc_team = act.Team THEN 1 ELSE 0 END) THEN 'AFC' ELSE 'NFC' END = CASE WHEN superbowl_afc_team = superbowl_winning_team THEN 'AFC' ELSE 'NFC' END THEN 1 ELSE 0 END as winner_correlate
FROM `seasons` as `s`
JOIN ArrestsCacheTable as `act`
ON act.Season = s.season
WHERE s.season <> 2018
GROUP BY s.season
-- get rank by date
SET @prev_count = NULL;
SET @prev_date = NULL;
SET @prev_team = NULL;
SET @rank_count = 0;
SELECT
a.Date
, a.team
, a.arrest_count
, CASE
WHEN @prev_count = a.arrest_count
AND @prev_date = a.Date
AND @prev_team = a.team
THEN @rank_count
WHEN @prev_count := a.arrest_count
THEN @rank_count := (@rank_count + 1)
END AS ranking
FROM ArrestCountByDateAndTeam as a
ORDER BY `ranking` DESC
SELECT @row_num := IF(@prev_value=a.team,@row_num+1,1) AS RowNumber
,a.team
,a.Date
,a.arrest_count
,@prev_value := a.team
FROM ArrestCountByDateAndTeam as a,
(SELECT @row_num := 1) x,
(SELECT @prev_value := '') y
ORDER BY a.team, a.Date DESC
SELECT @row_num := IF(@prev_value=a.team,@row_num+1,1) AS RowNumber
,a.team
,a.Date
,a.arrest_count
,@prev_value := a.team
FROM ArrestCountByDateAndTeam as a,
(SELECT @row_num := 1) x,
(SELECT @prev_value := '') y
WHERE @row_num := IF(@prev_value=a.team,@row_num+1,1) = 1
ORDER BY a.team
-- get count of time MIN not first by date
SELECT team2, count(1) as cnt FROM (
SELECT a.Date
, a.team as team1
, b.team as team2
, a.arrest_count as team1_count
, b.arrest_count as team2_count
, CASE WHEN a.arrest_count < b.arrest_count THEN 'Less' ELSE 'More' END AS less_or_more
FROM `ArrestCountByDateAndTeam` as a
JOIN ArrestCountByDateAndTeam as b
ON a.Date = b.Date
AND b.team != 'MIN'
AND a.arrest_count <= b.arrest_count
WHERE a.team = 'MIN' AND a.Date >= '2012-01-01'
ORDER BY `b`.`Date` DESC) as t2
WHERE t2.less_or_more = 'More'
GROUP BY team2
ORDER BY count(1) DESC
SELECT
a.Date
, max(CASE WHEN a.arrest_count = max(a.arrest_count) THEN a.team ELSE NULL END) AS top_team
, max(a.arrest_count) as arrest_count
FROM ArrestCountByDateAndTeam as a
GROUP BY a.Date
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment