Skip to content

Instantly share code, notes, and snippets.

@githoov
Last active September 21, 2022 01:57
Show Gist options
  • Save githoov/eb0323a46d0fde958ce39e452bb59657 to your computer and use it in GitHub Desktop.
Save githoov/eb0323a46d0fde958ce39e452bb59657 to your computer and use it in GitHub Desktop.
Redshift Metadata

Redshift Tables Used

pg_table_def, stl_query, stl_querytext, stl_tr_conflict, stl_explain, stl_alert_event_log, stl_ddltext, stl_scan, stl_save, stl_hashjoin, stl_hash, stl_plan_info, stl_return, and information_schema.table_constraints.

Queries to Extract Features

  • execution time
select (endtime - starttime) as execution_time_in_ms 
from stl_query 
where query = QUERY_ID;
  • from tables
select replace(regexp_substr(sql_body, 'from\\s+[^\\(\\s]+'), 'from ', '') 
from (select listagg(text) within group (order by sequence) as sql_body 
      from stl_querytext 
      where query = QUERY_ID);
  • joined in tables
select distinct replace(regexp_substr(sql_body, 'join\\s+[^\\(\\s]+'), 'join ', '') 
from (select listagg(text) within group (order by sequence) as sql_body 
      from stl_querytext 
      where query = QUERY_ID);
  • from table size (gb)
select count(tbl) / 1000.0 as gb 
from (select distinct datname
        , id
        , name 
      from stv_tbl_perm 
      join pg_database 
      on pg_database.oid = db_id) as t 
join stv_blocklist 
on tbl = t.id 
where name = TABLE_NAME;
  • filters (this need a lot of work)
select distinct replace(regexp_substr(sql_body, '(where\\s+[^\\(]+\\s+)[^(\\s+group\\s+by|order\\s+by|having|limit)]+'), 'where ', '') 
from (select query
        , listagg(text) within group (order by sequence) as sql_body 
      from stl_querytext 
      where query = QUERY_ID);
  • from table locks
select count(*) 
from stl_tr_conflict 
where table_id = TABLE_ID 
and xact_start_ts >= QUERY_START_TIME 
and abort_time <= QUERY_END_TIME;
  • number of concurrent queries
select count(*) 
from stl_query 
where starttime >= QUERY_START_TIME 
and endtime <= QUERY_END_TIME;
  • user_id
select userid 
from stl_query 
where query = QUERY_ID;
  • query group
select label 
from stl_query 
where query = QUERY_ID;
  • query database
select database 
from stl_query
where query = QUERY_ID;
  • query aborted
select aborted 
from stl_query
where query = QUERY_ID;
  • explain steps (this requires some text cleanup)
select nodeid
   , plannode 
from stl_explain 
where query = QUERY_ID;
  • query-planner alerts
select event 
from stl_alert_event_log 
where query = QUERY_ID;
  • query-planner solutions
select solution 
from stl_alert_event_log
where query = QUERY_ID;
  • number of concurrent ddl statements
select count(*) 
from stl_ddltext 
where starttime >= QUERY_START_TIME
and endtime <= QUERY_END_TIME;
  • from table column encoding (PostgreSQL only)
select json_object_agg(columnname, encoding) 
from table_info 
where tablename = TABLE_NAME;
  • from table column sortkey (PostgreSQL only)
select json_object_agg(columnname, sortkey) 
from table_info 
where tablename = TABLE_NAME;
  • from table column distkey (PostgreSQL only)
select json_object_agg(columnname, distkey) 
from table_info 
where tablename = TABLE_NAME;
  • join table column encoding
...
  • join table column sortkey
...
  • join table column distkey
...
  • table constraints—i.e., foreign-to-primary key mappings
--in redshift
create view table_constraints as (
  select 
    trim(c.conname)         as constraint_name
    , trim(cn.nspname)      as constraint_schema
    , trim(cc.relname)      as constraint_table_name
    , trim(ca.attname)      as constraint_column_name
    , current_database()    as table_catalog
    , trim(fn.nspname)      as table_schema
    , trim(fc.relname)      as table_name
    , trim(fa.attname)      as column_name
    , decode(c.contype
      , 'p', 'PRIMARY KEY'
      , 'u', 'UNIQUE'
      , 'f', 'FOREIGN KEY') as constraint_type
    from pg_constraint      as c
    left join pg_attribute  as ca 
      on c.conrelid = ca.attrelid and ca.attnum = any(c.conkey)
    left join pg_attribute  as fa 
      on c.confrelid = fa.attrelid and fa.attnum = any(c.confkey)
    left join pg_class      as cc 
      on ca.attrelid = cc.oid
    left join pg_class      as fc 
      on c.confrelid = fc.oid
    left join pg_namespace  as cn 
      on c.connamespace = cn.oid
    left join pg_namespace  as fn 
      on fc.relnamespace = fn.oid
    where c.contype in ('p','u','f')
    and cc.relkind = 'r'
    and not ca.attisdropped
    order by constraint_schema
      , constraint_name
      , decode(c.contype,'p',1,'u',2,'f',3)
      , ca.attnum);

-- in postgresql
select json_object_agg(constraint_table_name || '.' || constraint_column_name, table_name || '.' || column_name)
from constraints 
where constraint_table_name = TABLE_NAME;
  • number of leader-node operations
select count(case when locus = 0 then 1 end)
from stl_plan_info 
where query = QUERY_ID;
  • percent of operations on leader node
select 1.0 * count(case when locus = 0 then 1 end) / nullif(count(*), 0) 
from stl_plan_info 
where query = QUERY_ID;
  • max number of rows processed on leader node
select max(case when locus = 0 then rows else 0 end) 
from stl_plan_info 
where query = QUERY_ID;
  • max number of bytes processed on leader node
select max(case when locus = 0 then bytes else 0 end) 
from stl_plan_info 
where query = QUERY_ID;
  • was task diskbased (PostgreSQL only)
select json_object_agg(distinct tasknum, is_diskbased) 
from save 
where query = QUERY_ID;
  • working memory used for task (PostgreSQL only)
select json_object_agg(distinct step, workmem) 
from save 
where query = QUERY_ID;
  • total number of steps in query
select count(*) 
from stl_explain 
where query = QUERY_ID;
  • types of steps in query
select json_object_agg(tasknum , type) 
from scan 
where query = QUERY_ID;
  • impact of filter(s)
select json_object_agg(tasknum , (rows - rows_pre_filter)) 
from scan 
where query = QUERY_ID;
  • is vacuum needed
select json_object_agg(tasknum , (rows_pre_filter - rows_pre_user_filter)) 
from scan 
where query = QUERY_ID;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment