Find out who modified a IAM user policy
SELECT eventname,
useridentity.arn,
sourceIPAddress,
eventtime,
requestparameters
FROM db.name
WHERE eventsource = 'iam.amazonaws.com'
AND eventtime > '2018-01-01T00:00:00Z'
AND eventtime < '2018-05-09T00:00:00Z'
AND (eventname = 'AttachUserPolicy'
OR eventname = 'PutUserPolicy')
ORDER BY eventtime
Find out who modified a IAM role policy
SELECT eventname,
useridentity.arn,
sourceIPAddress,
eventtime,
requestparameters
FROM db.name
WHERE eventsource = 'iam.amazonaws.com'
AND eventtime > '2018-01-01T00:00:00Z'
AND (eventname = 'AttachRolePolicy'
OR eventname = 'PutRolePolicy')
ORDER BY eventtime
Blame user for resource:
SELECT eventname,
useridentity.arn,
sourceIPAddress,
eventtime,
requestparameters
FROM db.name
WHERE (requestparameters LIKE '%i-12345678%'
OR requestparameters LIKE '%sg-12345678%')
AND eventtime > '2018-03-01T00:00:00Z'
AND eventtime < '2018-03-20T00:00:00Z'
ORDER BY eventtime asc
Find out who started instances in time frame in AWS region
SELECT eventname,
useridentity.arn,
sourceIPAddress,
eventtime,
requestparameters
FROM db.name
WHERE eventname = 'RunInstances'
AND awsregion = 'eu-central-1'
AND eventtime > '2018-05-01T00:00:00Z'
AND eventtime < '2018-06-07T00:00:00Z'
ORDER BY eventtime asc
Create ELB table
CREATE EXTERNAL TABLE IF NOT EXISTS elb.elb_logs (
request_timestamp string,
elb_name string,
request_ip string,
request_port int,
backend_ip string,
backend_port int,
request_processing_time double,
backend_processing_time double,
client_response_time double,
elb_response_code string,
backend_response_code string,
received_bytes bigint,
sent_bytes bigint,
request_verb string,
url string,
protocol string,
user_agent string,
ssl_cipher string,
ssl_protocol string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1', 'input.regex' = '([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:\-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \\\"([^ ]*) ([^ ]*) (- |[^ ]*)\\\" (\"[^\"]*\") ([A-Z0-9-]+) ([A-Za-z0-9.-]*)$' ) LOCATION 's3://bucket/AWSLogs/account_id/elasticloadbalancing/';
Static resources in period of time
SELECT t.url,
count(t.url) AS count,
t.method
FROM
(SELECT url,
request_verb AS method
FROM "db"."elb_logs"
WHERE url LIKE '%.png'
or url LIKE '%.jpg'
or url LIKE '%.jpeg'
or url LIKE '%.css'
or url LIKE '%.js'
AND request_timestamp > '2018-07-17T00:00:00.00Z'
AND request_timestamp < '2018-07-24T00:00:00.00Z') t
GROUP BY t.url, t.method
ORDER BY count(t.url) desc
Order ELB logs by hostname
SELECT t.hostname, count(t.hostname) as count
FROM
(SELECT regexp_extract(url,
'(//)([^:]+):', 2) AS hostname, url, elb_response_code, user_agent
FROM db.elb_logs
WHERE timestamp > '2018-05-19T08:45:00.00Z') t
GROUP BY t.hostname
ORDER BY count(t.hostname) DESC
Create ALB table
CREATE EXTERNAL TABLE IF NOT EXISTS alb.acc (
type string,
time string,
elb string,
client_ip string,
client_port int,
target_ip string,
target_port int,
request_processing_time double,
target_processing_time double,
response_processing_time double,
elb_status_code string,
target_status_code string,
received_bytes bigint,
sent_bytes bigint,
request_verb string,
request_url string,
request_proto string,
user_agent string,
ssl_cipher string,
ssl_protocol string,
target_group_arn string,
trace_id string,
domain_name string,
chosen_cert_arn string,
matched_rule_priority string,
request_creation_time string,
actions_executed string,
redirect_url string,
new_field string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1', 'input.regex' = '([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\"($| \"[^ ]*\")(.*)') LOCATION 's3://bucket/AWSLogs/account_id/elasticloadbalancing/eu-central-1'
Aggregate ALB requests by IP
SELECT t.client_ip,
count(t.client_ip)
FROM
(SELECT client_ip,
time
FROM "alb"."acc"
WHERE elb LIKE 'app/LBNAME%'
AND time > '2018-07-01T00:00:00Z') t
GROUP BY t.client_ip
ORDER BY count(t.client_ip) DESC;
Find errors by time
SELECT request_timestamp,
request_ip,
url,
request_verb,
elb_status_code,
user_agent
FROM "db"."alb_logs"
WHERE time > '2018-05-24T08:45:00.00Z'
AND time < '2018-05-24T09:30:00.00Z'
AND elb_status_code LIKE '5%' limit 100
Group by errors
SELECT count(elb_status_code) as count, url
FROM db.elb_logs
WHERE time > '2022-01-24T08:45:00.00Z'
AND time < '2022-01-24T09:05:00.00Z'
AND elb_status_code LIKE '5%'
group by url
order by count desc
Some extra information about number of requests, request size, for calculation if CloudFront can safe money. Adjust according to your private agreement
SELECT sum(sent_bytes) AS sent_bytes,
count(*) AS request_count,
sum(sent_bytes)/count(*) AS average_bytes,
sum(sent_bytes)/(100*1024) AS free_requests,
(count(*) - sum(sent_bytes)/(100*1024)) as payed_requests
((count(*) - sum(sent_bytes)/(100*1024))/count(*))*100.0 as payed_requests_prcnt
FROM "db"."alb_logs"
Retrieve the ten most used domain names since a certain day, excluding CloudFront requests.
SELECT count(hostname) AS count,
hostname
FROM
(SELECT regexp_extract(request_url,
'(//)([^:]+):', 2) AS hostname, request_url
FROM "alb"."ntv_web_prod"
WHERE user_agent != 'Amazon CloudFront'
AND domain_name = '-'
AND time > '2018-12-17T00:00:00Z')t
GROUP BY t.hostname
ORDER BY count(t.hostname) DESC limit 10