How to get an estimate of the impact of writing Postgres logs with log_min_duration_statement = 0
:
-
Do
select pg_stat_statements_reset();
and wait N seconds (where N >> 60 – say 1-24 hours, covering typical busy hours). Remember when it was, and write down somewhere – this timestamp will be needed! -
Check if
select count(*) from pg_stat_statements
is lower thanpg_stat_statements.max
. If it's equal to it, then raisepg_stat_statements.max
and restart with the step 1. -
Get the estimate:
\set TS_PGSS_RESET 'XXXX-XX-XX XX:XX:XX';
select
sum(calls * length(query)) as total_bytes,
sum(calls * length(query)) / extract(epoch from now() - :'TS_PGSS_RESET') as bytes_per_sec
from pg_stat_statements;
-- this will give the number of bytes per second.
This is our estimate. The real load will be slightly higher due to:
- presence of concrete parameter values (pg_stat_statements.query doesn't have them in general),
- additional wrapping text for each query (log line prefix, keywords like
LOG: duration: XXX ms statement:
), - additional messages (connection/disconnection, autovacuum, locks, tmp files logging, error messages, warning, etc).
Also, if pg_stat_statements.track = all
, some queries might be counted multiple times.
Also, it is worth to take into account how the workload is usually distributed during a day / a week. For example, if you see that bytes_per_second
is somewhat ~500kB/sec, this means that during the busiest hours, spikes of many MB/s might happen and this can cause significant impact on disks' performance.
Alternative query:
-- WARNING: this will be OK only if pg_stat_reset() and
-- pg_stat_statements_reset() were last invoked at the same time !
with const(stats_since, "pg_stat_statements.max") as (
select
(select stats_reset from pg_stat_database where datname = current_database()),
(select setting from pg_settings where name = 'pg_stat_statements.max')
)
select
(select stats_since from const),
(select now() - stats_since from const) stats_age,
count(*) as query_groups,
(select "pg_stat_statements.max" from const),
sum(calls * length(query)) as total_bytes,
sum(calls * length(query)) / extract(epoch from now() - (select stats_since from const)) as bytes_per_sec
from pg_stat_statements
;
How to reset stats (fully) and remember reset time in psql var:
select now() as "TS_PGSS_RESET"
from
pg_stat_reset(),
pg_stat_reset_shared('archiver') a,
pg_stat_reset_shared('bgwriter') b,
pg_stat_statements_reset()
--, pg_stat_kcache_reset() -- optional, uncomment if needed
\gset
select :'TS_PGSS_RESET' as reset_timestamp;
show blocked queries
SELECT blocked_locks.pid AS blocked_pid,
blocked_activity.usename AS blocked_user,
blocking_locks.pid AS blocking_pid,
blocking_activity.usename AS blocking_user,
blocked_activity.query AS blocked_statement,
blocking_activity.query AS current_statement_in_blocking_process
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
JOIN pg_catalog.pg_locks blocking_locks
ON blocking_locks.locktype = blocked_locks.locktype
AND blocking_locks.database IS NOT DISTINCT FROM blocked_locks.database
AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
AND blocking_locks.pid != blocked_locks.pid
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.granted;
How to kill pg backend
SELECT pg_cancel_backend(<pid of the process>);
OR
SELECT pg_terminate_backend(<pid of the process>)
Show vacuum stats:
SELECT
p.pid,
now() - a.xact_start AS duration,
coalesce(wait_event_type ||'.'|| wait_event, 'f') AS waiting,
CASE
WHEN a.query ~*'^autovacuum.*to prevent wraparound' THEN 'wraparound'
WHEN a.query ~*'^vacuum' THEN 'user'
ELSE 'regular'
END AS mode,
p.datname AS database,
p.relid::regclass AS table,
p.phase,
pg_size_pretty(p.heap_blks_total * current_setting('block_size')::int) AS table_size,
pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
pg_size_pretty(p.heap_blks_scanned * current_setting('block_size')::int) AS scanned,
pg_size_pretty(p.heap_blks_vacuumed * current_setting('block_size')::int) AS vacuumed,
round(100.0 * p.heap_blks_scanned / p.heap_blks_total, 1) AS scanned_pct,
round(100.0 * p.heap_blks_vacuumed / p.heap_blks_total, 1) AS vacuumed_pct,
p.index_vacuum_count,
round(100.0 * p.num_dead_tuples / p.max_dead_tuples,1) AS dead_pct
FROM pg_stat_progress_vacuum p
JOIN pg_stat_activity a using (pid)
ORDER BY now() - a.xact_start DESC;
Check for reindex
select 'select * from pgstatindex('||quote_literal(schemaname||'.'||indexname)||')'
from pg_indexes where tablename = '<TABLENAME>';
And then reindex if nesessary
select 'reindex index CONCURRENTLY '||schemaname||'.'||indexname||';'
from pg_indexes where tablename = '<TABLENAME>';
Show index in progress
select index_relid::regclass,
phase,
lockers_total||'/'||lockers_done as "lockers total/done",
current_locker_pid,
CASE blocks_done
WHEN 0 THEN 0
ELSE blocks_done*100/blocks_total
END as blocks_progress,
CASE tuples_done
WHEN 0 THEN 0
ELSE tuples_done*100/tuples_total
END as tuples_progress
from pg_stat_progress_create_index;
How many WAL files already done while recovering
#todo
ls -lah /var/lib/postgresql/data/pg_wal/ | grep -A 100000 $(ps aux | grep recovering | \
grep -P ^postgres | awk '{print $14}') | wc -l
#done
ls -lah /var/lib/postgresql/data/pg_wal/ | grep -B 100000 $(ps aux | grep recovering | \
grep -P ^postgres | awk '{print $14}') | wc -l
rebalance partitions
=# CALL partman.partition_data_proc('public.table_name');
NOTICE: Total rows moved: 0
NOTICE: Ensure to VACUUM ANALYZE the parent (and source table if used) after partitioning data
CALL
create partitions with specific time
select partman.create_partition_time(‘public.table_name’, p_partition_times := ARRAY[‘2021-02-09 22:00:00’::timestamp without time zone]);
shell to container without docker|kubectl exec
# find node where is pod running
# f.e
NODE=$(kubectl get pods -n teamcity \
-o=jsonpath='{.items[0].spec.nodeName}' -l app=POD_LABEL)
# check ip address to sonnnect to the node:
ADDRESS=$(kubectl get nodes \
-o jsonpath='{.items[*].status.addresses[?(.type=="ExternalIP")].address}' \
-l kubernetes.io/hostname=${NODE})
ssh ${ADDRESS}
sudo -s
PID=$(docker ps -q | xargs docker inspect --format '{{.State.Pid}} {{.Name}}' | \
grep POD_NAME | grep -v POD | cut -d' ' -f1)
nsenter -t ${PID} -a
# if pod doesnt have a shell you can change root
# and provide host binaries with additional arg: --root=/
create flame grapf
FILE=perf.data
perf record -F 99 -a -g -o $FILE -C0-9 -- sleep 20
perf script -i $FILE > ${FILE}_out
./stackcollapse-perf.pl ${FILE}_out > ${FILE}_svg
./flamegraph.pl ${FILE}_svg > ${FILE}.svg
open ${FILE}.svg
Get all ips in sorted order
PROJECT=test
gcloud compute addresses list --project=${PROJECT} | \
grep " 10."| awk '{print $2 ".\t" $1 "\t" $7}'| \
sort -t . -k 2,2n -k 3,4n -k 4,4n| grep -e "\.15[0-9]"
capture http headers
sudo stdbuf -oL -eL /usr/sbin/tcpdump -A -s 10240 "tcp port 4080 and (((ip[2:2] - ((ip[0]&0xf)<<2)) - ((tcp[12]&0xf0)>>2)) != 0)" | egrep -a --line-buffered ".+(GET |HTTP\/|POST )|^[A-Za-z0-9-]+: " | perl -nle 'BEGIN{$|=1} { s/.*?(GET |HTTP\/[0-9.]* |POST )/\n$1/g; print }'
nginx json log
log_format logger-json-log escape=json
'{'
'"body_bytes_sent":"$body_bytes_sent",'
'"bytes_sent":"$bytes_sent",'
'"http_host":"$http_host",'
'"msec":"$msec",'
'"remote_addr":"$remote_addr",'
'"request_length":"$request_length",'
'"request_method":"$request_method",'
'"request_uri":"$request_uri",'
'"server port":"$server port",'
'"server protocol":"$server protocol",'
'"ssl_protocol":"$ssl_protocol",'
'"status":"$status",'
'"upstream_response_time":"$upstream_response_time",'
'"upstream_addr":"$upstream_addr",'
'"upstream_connect_time":"$upstream_connect_time"'
'}';
k8s rollover deployment
APP_NAME=whatever
CONTEXT=blabla
kubectl --context ${CONTEXT} -n ${APP_NAME} set \
env deployment/${APP_NAME} REBALANCE_CONNECTIONS=$(whoami)_$(date "+%Y/%m/%d_%H:%M:%S")