Created
June 11, 2020 14:37
-
-
Save PedroMartinSteenstrup/2f728973b6a378f0348374dba4be0add to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
USE ROLE AP_TOOLS; | |
-- CAVEAT Nr 1: Snowflake only gets the history_id from Looker passed as context, so we can link to a user efficiently. | |
-- -- -- -- -- -- This is then linked to workday's data | |
-- CAVEAT Nr 2: Looker's history_id can have several Snowflake queries linked to, who each have a cost associated to. | |
-- -- -- -- -- -- therefore there is a danger of double counting. History is augmented by Looker data. | |
-- CAVEAT Nr 3: Workday's data seems to not systematically have the cost center input, though it makes little sense | |
-- -- -- -- -- -- some departments might be under-represented until we figure out why it's missing | |
-- CAVEAT Nr 4: Snowflake credits seem to be off compared to the rest of the figures | |
-- -- -- -- -- -- might have to tweak the calculation | |
select * | |
from ANALYTICS_DB.REPORTS.report_looker_usage; | |
CREATE TABLE IF NOT EXISTS ANALYTICS_DB.REPORTS.report_looker_usage | |
( | |
SNOWFLAKE_QUERY_ID VARCHAR PRIMARY KEY, | |
START_TIME TIMESTAMP_LTZ, | |
EXECUTION_STATUS VARCHAR, | |
QUERY_TYPE VARCHAR, | |
EXECUTION_TIME VARCHAR, | |
COMPILATION_TIME NUMBER, | |
QUEUED_OVERLOAD_TIME NUMBER, | |
BYTES_SCANNED NUMBER, | |
ERROR_MESSAGE VARCHAR, | |
CREDITS_USED_CLOUD_SERVICES FLOAT, | |
LOOKER_QUERY_ID NUMBER, | |
HISTORY_ID NUMBER, | |
EMAIL VARCHAR, | |
USER_ID NUMBER, | |
SOURCE VARCHAR, | |
CONNECTION_NAME VARCHAR, | |
MODEL VARCHAR, | |
LOOKER_VIEW VARCHAR, | |
FIELDS VARCHAR, | |
DASHBOARD_ID VARCHAR, | |
RESULT_SOURCE VARCHAR, | |
STATUS VARCHAR, | |
HISTORY_RUNTIME DOUBLE, | |
QUERY_RUNTIME DOUBLE, | |
WORKER_TITLE VARCHAR, | |
COST_CENTER_LVL_1 VARCHAR, | |
COST_CENTER_LVL_2 VARCHAR, | |
COST_CENTER_CODE VARCHAR, | |
WORKER_LOCATION VARCHAR, | |
WORKER_NAME VARCHAR | |
); | |
-- Get all Looker queries from Snowflake | |
MERGE INTO ANALYTICS_DB.REPORTS.report_looker_usage target USING ( | |
select JSON_EXTRACT_PATH_TEXT(looker_data, 'user_id')::int user_id, | |
JSON_EXTRACT_PATH_TEXT(looker_data, 'history_id')::int history_id, | |
QUERY_ID AS SNOWFLAKE_QUERY_ID, | |
QUERY_TYPE, | |
START_TIME, | |
EXECUTION_STATUS, | |
EXECUTION_TIME, | |
COMPILATION_TIME, | |
QUEUED_OVERLOAD_TIME, | |
BYTES_SCANNED, | |
ERROR_MESSAGE, | |
CREDITS_USED_CLOUD_SERVICES | |
from ( | |
select parse_json(split_part(qh.QUERY_TEXT, '-- Looker Query Context ', 2)) looker_data, | |
qh.QUERY_ID, | |
qh.START_TIME, | |
qh.EXECUTION_STATUS, | |
qh.EXECUTION_TIME, | |
qh.QUEUED_OVERLOAD_TIME, | |
qh.BYTES_SCANNED, | |
qh.ERROR_MESSAGE, | |
qh.COMPILATION_TIME, | |
qh.CREDITS_USED_CLOUD_SERVICES, | |
qh.QUERY_TYPE | |
from SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY qh | |
where qh.USER_NAME IN ('LOOKER_OBFUSCATED', 'LOOKER_UNOBFUSCATED', 'LOOKER_MONITOR') | |
and qh.QUERY_TEXT ilike '%Looker Query Context%' | |
AND START_TIME < date_trunc(day, current_timestamp) | |
AND START_TIME > DATEADD(day, -3, date_trunc(day, current_timestamp)) | |
) data | |
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 | |
) source ON source.history_id = target.HISTORY_ID | |
AND source.user_id = target.USER_ID | |
AND source.SNOWFLAKE_QUERY_ID = target.SNOWFLAKE_QUERY_ID | |
WHEN NOT MATCHED THEN INSERT | |
(user_id, history_id, SNOWFLAKE_QUERY_ID, QUERY_TYPE, START_TIME, | |
EXECUTION_STATUS, EXECUTION_TIME, COMPILATION_TIME, QUEUED_OVERLOAD_TIME, | |
BYTES_SCANNED, ERROR_MESSAGE, CREDITS_USED_CLOUD_SERVICES) | |
VALUES (source.user_id, source.history_id, source.SNOWFLAKE_QUERY_ID, source.QUERY_TYPE, source.START_TIME, | |
source.EXECUTION_STATUS, source.EXECUTION_TIME, source.COMPILATION_TIME, source.QUEUED_OVERLOAD_TIME, | |
source.BYTES_SCANNED, source.ERROR_MESSAGE, source.CREDITS_USED_CLOUD_SERVICES); | |
MERGE INTO ANALYTICS_DB.REPORTS.report_looker_usage target USING ( | |
select q.ID as LOOKER_QUERY_ID, | |
h.id as history_id, | |
u.email, | |
u.id as user_id, | |
h.source, | |
h.CONNECTION_NAME, | |
q.MODEL, | |
q.VIEW AS looker_view, | |
q.fields, | |
h.DASHBOARD_ID, | |
h.RESULT_SOURCE, | |
h.STATUS, | |
h.RUNTIME history_runtime, | |
q.RUNTIME query_runtime | |
from ANALYTICS_DB.looker.HISTORY h | |
LEFT JOIN LOOKER.QUERY q ON q.id = h.QUERY_ID | |
LEFT JOIN LOOKER.USER u ON u.ID = h.USER_ID | |
where 1 = 1 | |
AND h.CREATED_AT < date_trunc(day, current_timestamp) | |
AND h.CREATED_AT > DATEADD(day, -4, date_trunc(day, current_timestamp)) | |
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 | |
) source ON source.history_id = target.HISTORY_ID | |
AND source.user_id = target.USER_ID | |
WHEN MATCHED THEN UPDATE SET | |
LOOKER_QUERY_ID = source.LOOKER_QUERY_ID, | |
history_id = source.history_id, | |
email = source.email, | |
user_id = source.user_id, | |
source = source.source, | |
CONNECTION_NAME = source.CONNECTION_NAME, | |
MODEL = source.MODEL, | |
looker_view = source.looker_view, | |
fields = source.fields, | |
DASHBOARD_ID = source.DASHBOARD_ID, | |
RESULT_SOURCE = source.RESULT_SOURCE, | |
STATUS = source.STATUS, | |
history_runtime = source.history_runtime, | |
query_runtime = source.query_runtime | |
; | |
MERGE INTO ANALYTICS_DB.REPORTS.report_looker_usage target USING ( | |
SELECT * | |
FROM ( | |
select people.BUSINESS_TITLE as WORKER_TITLE, | |
regexp_replace( | |
regexp_replace(people.HIERARCHY_LEVEL_03, | |
regexp_substr(people.HIERARCHY_LEVEL_03, '\\(.*\\)'), ''), ' \\(\\)', | |
'') as COST_CENTER_LVL_1, | |
regexp_replace(people.COST_CENTER_NAME, '\\(DO NOT USE\\)', '') as COST_CENTER_LVL_2, | |
people.COST_CENTER_ID as COST_CENTER_CODE, | |
people.PRIMARY_WORK_EMAIL as WORKER_EMAIL, | |
people.LOCATION as WORKER_LOCATION, | |
people.WORKER as WORKER_NAME, | |
row_number() | |
over (partition by PRIMARY_WORK_EMAIL order by REPORT_EFFECTIVE_DATE_AND_TIME desc) rownr | |
from ANALYTICS_DB.WORKDAY.HEADCOUNT people | |
group by 1, 2, 3, 4, 5, 6, 7, REPORT_EFFECTIVE_DATE_AND_TIME) | |
WHERE rownr = 1 | |
) source ON lower(source.WORKER_EMAIL) = lower(target.EMAIL) AND target.WORKER_NAME is null | |
WHEN MATCHED THEN UPDATE SET | |
WORKER_TITLE = source.WORKER_TITLE, | |
COST_CENTER_LVL_1 = source.COST_CENTER_LVL_1, | |
COST_CENTER_LVL_2 = source.COST_CENTER_LVL_2, | |
COST_CENTER_CODE = source.COST_CENTER_CODE, | |
WORKER_LOCATION = source.WORKER_LOCATION, | |
WORKER_NAME = source.WORKER_NAME; | |
GRANT SELECT ON table reports.report_looker_usage TO ROLE LOOKER_ROLE_MONITOR; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment