this shows how many downloads are normal, in 4/2024, this was 15-75 downloads per day.
should be easy to spot outliers (like 4343
on 4/12/24).
WITH DailyCounts AS (
SELECT
DATE(created_at) AS day_start,
COUNT(*) AS download_count
FROM
downloads
GROUP BY
day_start
)
SELECT
day_start,
download_count
FROM
DailyCounts
ORDER BY
day_start DESC;
use the outliers found and see if you can narrow out how to identify only the bad ones, with a query like this:
SELECT *
FROM
downloads
WHERE
DATE(created_at) = '2024-04-10'; -- replace '2024-04-10' with your desired date
the query below is dialing in a tighter match based on os
and a LIKE match on the user_agent
col:
WITH DailyCounts AS (
SELECT
DATE(created_at) AS day_start,
COUNT(*) AS download_count
FROM
downloads
WHERE
os = 'Windows 10.0'
AND user_agent LIKE 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%'
AND created_at BETWEEN '2000-04-01' AND '2099-04-15'
GROUP BY
day_start
)
SELECT
day_start,
download_count
FROM
DailyCounts
ORDER BY
download_count DESC;
-- Identify downloads with a download_count greater than 20 for any 24-hour period
WITH DailyCounts AS (
SELECT
DATE(created_at) AS day_start,
COUNT(*) AS download_count
FROM
downloads
WHERE
os = 'Windows 10.0'
AND user_agent LIKE 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%'
AND created_at BETWEEN '2004-04-01' AND '2094-04-15'
GROUP BY
day_start
),
ToDelete AS (
SELECT
d.id
FROM
downloads d
JOIN
DailyCounts dc ON DATE(d.created_at) = dc.day_start
WHERE
dc.download_count > 20
)
-- Delete the identified downloads
SELECT * FROM -- change to `DELETE` after verifying
downloads
WHERE
id IN (SELECT id FROM ToDelete)
AND os = 'Windows 10.0'
AND user_agent LIKE 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%';