Last active
October 5, 2022 16:55
-
-
Save jtalmi/d72973461bd8d37d32bcdc9dcc865d8d to your computer and use it in GitHub Desktop.
A script to generate a list of dbt models using changed macros relative to a git branch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
''' | |
Script to detect models downstream of changed macros, relative to a git branch. | |
Usage: | |
$ python3 models_using_changed_macros.py --branch master --children --manifest_path /path/to/manifest.json | |
''' | |
import os | |
import sys | |
import json | |
import argparse | |
import subprocess | |
from typing import List, Dict | |
def _find_changed_macros(branch: str) -> List: | |
''' Returns changed dbt macros relative to a git branch''' | |
list_of_changed_files = subprocess.check_output(["git", "diff", branch, "--name-only"]).split() | |
list_of_changed_files = [file.decode('utf-8') for file in list_of_changed_files] | |
list_of_changed_macros = [os.path.splitext(model)[0] for model in list_of_changed_files if model.startswith('macros/') and model.endswith('.sql')] # pylint: disable=line-too-long | |
list_of_remaining_changed_macros = [os.path.basename(macro) for macro in list_of_changed_macros if os.path.exists("{}/{}.sql".format(os.getcwd(), macro))] # pylint: disable=line-too-long | |
return list_of_remaining_changed_macros | |
def _fetch_manifest(manifest_path: str): | |
try: | |
with open(manifest_path) as f: | |
manifest = json.load(f) | |
return manifest | |
except IOError: | |
raise Exception("Could not find manifest file in %s", manifest_path) | |
def _fetch_macro_child_map(manifest: Dict) -> List: | |
macro_child_map = {} | |
for resource, v in manifest['nodes'].items(): | |
if v['resource_type'] == 'model' and v.get('depends_on', {}).get('macros'): | |
for macro in v["depends_on"]["macros"]: | |
macro_name = macro.split(".")[-1] | |
macro_child_map.setdefault(macro_name, []) | |
macro_child_map[macro_name].append(resource) | |
return macro_child_map | |
def main(branch: str, children: bool, manifest_path: str) -> None: | |
'''Return changed macros''' | |
list_of_changed_models = [] | |
list_of_changed_macros = _find_changed_macros(branch) | |
if not list_of_changed_macros: | |
return "" | |
manifest = _fetch_manifest(manifest_path) | |
models = {} | |
macro_child_map = _fetch_macro_child_map(manifest) | |
for macro in list_of_changed_macros: | |
list_of_changed_models += macro_child_map.get(macro, []) | |
list_of_changed_models = [model_name.split('.')[2] for model_name in list(set(list_of_changed_models))] | |
if children: | |
list_of_changed_models = [model + '+' for model in list_of_changed_models] | |
if not list_of_changed_models: | |
return "" | |
return " ".join(list_of_changed_models) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("--branch", default='origin/master', type=str) | |
parser.add_argument("--children", default=True, action="store_true") | |
parser.add_argument("--manifest_path", default='./target/manifest.json', type=str) | |
parsed_args = parser.parse_args() | |
try: | |
models = main(parsed_args.branch, parsed_args.children, parsed_args.manifest_path) | |
print(models) | |
except Exception as e: | |
raise Exception('Changed macro script failed: %s', e) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example usage in CI: