Skip to content

Instantly share code, notes, and snippets.

@oleewere
Last active October 30, 2020 11:20
Show Gist options
  • Save oleewere/ac6a8fbab3944f6bc9b2aee0a2f59326 to your computer and use it in GitHub Desktop.
Save oleewere/ac6a8fbab3944f6bc9b2aee0a2f59326 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import argparse
import sys
import os
import json
import subprocess
#import datetime
#from datetime import timezone, timedelta
def parse_args(args):
parser = argparse.ArgumentParser(
description='Python script to cleanup machine users (used by CB) with cdpcli commands')
parser.add_argument('--action', type=str, required=True,
help='Possible actions for cleanup: dump/cleanup')
parser.add_argument('--profile', type=str, required=False,
help='Profile for cdpcli')
parser.add_argument('--dp-profile', type=str, required=False,
help='Profile for internal dp cli')
parser.add_argument("--use-dp", dest="use_dp", default=False,
action="store_true", help="Use internal dp cli to gather freeipa instances")
parser.add_argument("--skip-orphan-keys", dest="skip_orphan_keys", default=False,
action="store_true", help="Skips handling of orphan access keys")
parser.add_argument('--workdir', type=str, required=False,
help='Work directory for dumped JSON files')
parser.add_argument('--days', type=int, required=False,
help='Filter out machine users that are young (in days)')
parser.add_argument('--mode', type=str, required=False,
help='Mode for cleanup: freeipa / datalake / datahub / all (default == all)')
args = parser.parse_args(args)
return args
def setup_workdir(args):
workdir = args.workdir if args.workdir else os.getcwd()
print("Working directory: %s" % os.path.abspath(workdir))
resp_location = os.path.abspath(os.path.join(workdir, "dump"))
if not os.path.exists(resp_location):
os.makedirs(resp_location)
return resp_location
def nonblank_lines(f):
for l in f:
line = l.rstrip()
if line:
yield line
def run_command(cmd):
proc = subprocess.Popen(cmd.split(),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE)
stdout, stderr = proc.communicate()
return stdout, stderr, proc.returncode
def __cleanup_machine_user(delete_file, cdp_command, name):
databus_role_crn="crn:altus:iam:us-west-1:altus:role:DbusUploader"
with open(delete_file) as json_file:
data = json.load(json_file)
if not data:
print("Not found any %s for deletion" % name)
for databus_wa_user in data:
machineUser=databus_wa_user["name"]
accessKeys=databus_wa_user["accessKeys"] if "accessKeys" in databus_wa_user else []
unassign_command="%s iam unassign-machine-user-role --role %s --machine-user-name %s" % (cdp_command, databus_role_crn, machineUser)
print(unassign_command)
os.system(unassign_command)
for accessKey in accessKeys:
accessKeyId=accessKey["accessKeyId"]
delete_access_key_command="%s iam delete-access-key --access-key-id %s" % (cdp_command, accessKeyId)
print(delete_access_key_command)
os.system(delete_access_key_command)
delete_machine_usr_cmd="%s iam delete-machine-user --machine-user-name %s" % (cdp_command, machineUser)
print(delete_machine_usr_cmd)
os.system(delete_machine_usr_cmd)
def cleanup(args):
resp_location=setup_workdir(args)
cdp_command=create_cdp_command(args)
mode = args.mode if args.mode else "all"
if mode == "freeipa" or mode == "datalake" or mode == "datahub":
print("Using mode: %s" % mode)
else:
print("Using mode: %s" % mode)
mode = "all"
if mode == "all" or mode == "freeipa":
__cleanup_machine_user(os.path.join(resp_location, "delete-freeipa-fluent-users.json"), cdp_command, "FreeIPA Fluentd machine user")
if mode == "all" or mode == "datahub":
__cleanup_machine_user(os.path.join(resp_location, "delete-datahub-fluent-users.json"), cdp_command, "DataHub Fluentd machine user")
__cleanup_machine_user(os.path.join(resp_location, "delete-datahub-wa-users.json"), cdp_command, "DataHub WA machine user")
if mode == "all" or mode == "datalake":
__cleanup_machine_user(os.path.join(resp_location, "delete-datalake-fluent-users.json"), cdp_command, "DataLake Fluentd machine user")
if not args.skip_orphan_keys:
with open(os.path.join(resp_location, "delete-orphan-access-keys.json")) as json_file:
data = json.load(json_file)
if not data:
print("Not found any orphan access keys")
for access_key_data in data:
accessKeyId=access_key_data["accessKeyId"]
delete_access_key_command="%s iam delete-access-key --access-key-id %s" % (cdp_command, accessKeyId)
print(delete_access_key_command)
os.system(delete_access_key_command)
def dump(args):
days=args.days if args.days else None
mode = args.mode if args.mode else "all"
if not args.use_dp:
print("Please set --use-dp option if freeipa is included in the responses, also make sure to install dp cli, see: https://github.com/hortonworks/cb-cli")
sys.exit(1)
if mode == "freeipa" or mode == "datalake" or mode == "datahub":
print("Using mode: %s" % mode)
else:
print("Using mode: %s" % mode)
mode = "all"
cdp_command=create_cdp_command(args)
resp_location=setup_workdir(args)
allMachineUsersJson=os.path.join(resp_location, "all-machine-users.json")
os.system("%s iam list-machine-users --max-items 10000 | jq .machineUsers > %s" % (cdp_command, allMachineUsersJson))
allAccessKeysJson=os.path.join(resp_location, "all-access-keys.json")
os.system("%s iam list-access-keys --max-items 10000 | jq .accessKeys > %s" % (cdp_command, allAccessKeysJson))
allMachineUsers=os.path.join(resp_location, "all-machine-users.txt")
os.system("cat %s | jq -r '.[].machineUserName' | sort > %s" % (allMachineUsersJson, allMachineUsers))
allAccessKeys=[]
with open(allAccessKeysJson) as json_file:
allAccessKeys=json.load(json_file)
fluentDatahubUserAccessKeys={}
fluentDatalakeUserAccessKeys={}
fluentFreeIpaUserAccessKeys={}
waDatahubUserAccessKeys={}
for accessKey in allAccessKeys:
accessKeyId=str(accessKey["accessKeyId"])
actorCrn=str(accessKey["actorCrn"])
creationDate=str(accessKey["creationDate"])
machineUserNameSuffix=actorCrn.rsplit(':',1)[1]
machineUserName=machineUserNameSuffix.split("/", 1)[0]
if machineUserName.startswith("datahub-fluent-databus-uploader"):
fluentDatahubUserAccessKeys = append_access_key(fluentDatahubUserAccessKeys, machineUserName, accessKeyId, creationDate)
elif machineUserName.startswith("datalake-fluent-databus-uploader"):
fluentDatalakeUserAccessKeys = append_access_key(fluentDatalakeUserAccessKeys, machineUserName, accessKeyId, creationDate)
elif machineUserName.startswith("datahub-wa-publisher"):
waDatahubUserAccessKeys = append_access_key(waDatahubUserAccessKeys, machineUserName, accessKeyId, creationDate)
elif machineUserName.startswith("freeipa-fluent-databus-uploader"):
fluentFreeIpaUserAccessKeys = append_access_key(fluentFreeIpaUserAccessKeys, machineUserName, accessKeyId, creationDate)
orphan_access_keys=[]
if mode == "all" or mode == "freeipa":
dpProfile="--profile %s" % args.dp_profile if args.dp_profile else ""
allEnvs=os.path.join(resp_location, "all-environments.txt")
os.system("%s environments list-environments | jq -r '.[][].environmentName' > %s" % (cdp_command, allEnvs))
allFreeIpaIdsFile=os.path.join(resp_location, "all-freeipa-ids.txt")
if args.use_dp:
os.system("dp freeipa list %s | jq -r '.[].CRN' | tr -d '\"' | cut -d: -f7 > %s" % (dpProfile, allFreeIpaIdsFile))
freeipaFluentUsers=os.path.join(resp_location, "freeipa-fluent-users.txt")
os.system("cat %s | grep 'freeipa-fluent-databus-uploader' | sort > %s" % (allMachineUsers, freeipaFluentUsers))
expectedFreeipaFluentUsers=os.path.join(resp_location, "whitelist-freeipa-fluent-users.txt")
os.system("cat %s | sed -e 's/^/freeipa-fluent-databus-uploader-/' | sort > %s" % (allFreeIpaIdsFile, expectedFreeipaFluentUsers))
whitelisted_freeipa_fluent_users=[]
with open(expectedFreeipaFluentUsers) as f_in:
for line in nonblank_lines(f_in):
whitelisted_freeipa_fluent_users.append(line.rstrip())
all_freeipa_fluent_users=[]
with open(freeipaFluentUsers) as f_in:
for line in nonblank_lines(f_in):
all_freeipa_fluent_users.append(line.rstrip())
blacklisted_freeipa_fluent_users=[]
for freeipa_fluent_user in all_freeipa_fluent_users:
if freeipa_fluent_user not in whitelisted_freeipa_fluent_users:
blacklisted_freeipa_fluent_users.append(freeipa_fluent_user)
delete_freeipa_fluent_users=[]
for blacklisted_user in blacklisted_freeipa_fluent_users:
userToDelete={}
userToDelete["name"]=blacklisted_user
accessKeyList=list()
if blacklisted_user in fluentFreeIpaUserAccessKeys:
accessKeyList=fluentFreeIpaUserAccessKeys[blacklisted_user]
userToDelete["accessKeys"]=accessKeyList
delete_freeipa_fluent_users.append(userToDelete)
with open(os.path.join(resp_location, "delete-freeipa-fluent-users.json"), "w") as fileToSave:
json.dump(delete_freeipa_fluent_users, fileToSave, indent=4, sort_keys=True)
for keyForAccKey in fluentFreeIpaUserAccessKeys:
if keyForAccKey not in all_freeipa_fluent_users and fluentFreeIpaUserAccessKeys[keyForAccKey]:
for element in fluentFreeIpaUserAccessKeys[keyForAccKey]:
orphan_access_keys.append(element)
if mode == "all" or mode == "datahub":
datahubFluentUsers=os.path.join(resp_location, "datahub-fluent-users.txt")
os.system("cat %s | grep 'datahub-fluent-databus-uploader' | sort > %s" % (allMachineUsers, datahubFluentUsers))
datahubWaUsers=os.path.join(resp_location, "datahub-wa-users.txt")
os.system("cat %s | grep 'datahub-wa-publisher' | sort > %s" % (allMachineUsers, datahubWaUsers))
all_datahub_ids=os.path.join(resp_location, "all-datahub-ids.txt")
os.system("%s datahub list-clusters | jq -r '.[][].crn' | tr -d '\"' | cut -d: -f7 > %s" % (cdp_command, all_datahub_ids))
expectedDatahubFluentUsers=os.path.join(resp_location, "whitelist-datahub-fluent-users.txt")
os.system("cat %s | sed -e 's/^/datahub-fluent-databus-uploader-/' | sort > %s" % (all_datahub_ids, expectedDatahubFluentUsers))
expectedDatahubWaUsers=os.path.join(resp_location, "whitelist-datahub-wa-users.txt")
os.system("cat %s | sed -e 's/^/datahub-wa-publisher-/' | sort > %s" % (all_datahub_ids, expectedDatahubWaUsers))
whitelisted_datahub_fluent_users=[]
with open(expectedDatahubFluentUsers) as f_in:
for line in nonblank_lines(f_in):
whitelisted_datahub_fluent_users.append(line.rstrip())
whitelisted_datahub_wa_users=[]
with open(expectedDatahubWaUsers) as f_in:
for line in nonblank_lines(f_in):
whitelisted_datahub_wa_users.append(line.rstrip())
all_datahub_fluent_users=[]
with open(datahubFluentUsers) as f_in:
for line in nonblank_lines(f_in):
all_datahub_fluent_users.append(line.rstrip())
all_datahub_wa_users=[]
with open(datahubWaUsers) as f_in:
for line in nonblank_lines(f_in):
all_datahub_wa_users.append(line.rstrip())
blacklisted_datahub_fluent_users=[]
for datahub_fluent_user in all_datahub_fluent_users:
if datahub_fluent_user not in whitelisted_datahub_fluent_users:
blacklisted_datahub_fluent_users.append(datahub_fluent_user)
blacklisted_datahub_wa_users=[]
for datahub_wa_user in all_datahub_wa_users:
if datahub_wa_user not in whitelisted_datahub_wa_users:
blacklisted_datahub_wa_users.append(datahub_wa_user)
delete_datahub_fluent_users=[]
for blacklisted_user in blacklisted_datahub_fluent_users:
userToDelete={}
userToDelete["name"]=blacklisted_user
accessKeyList=list()
if blacklisted_user in fluentDatahubUserAccessKeys:
accessKeyList=fluentDatahubUserAccessKeys[blacklisted_user]
userToDelete["accessKeys"]=accessKeyList
delete_datahub_fluent_users.append(userToDelete)
with open(os.path.join(resp_location, "delete-datahub-fluent-users.json"), "w") as fileToSave:
json.dump(delete_datahub_fluent_users, fileToSave, indent=4, sort_keys=True)
delete_datahub_wa_users=[]
for blacklisted_user in blacklisted_datahub_wa_users:
userToDelete={}
userToDelete["name"]=blacklisted_user
accessKeyList=list()
if blacklisted_user in waDatahubUserAccessKeys.keys():
accessKeyList=waDatahubUserAccessKeys[blacklisted_user]
userToDelete["accessKeys"]=accessKeyList
delete_datahub_wa_users.append(userToDelete)
with open(os.path.join(resp_location, "delete-datahub-wa-users.json"), "w") as fileToSave:
json.dump(delete_datahub_wa_users, fileToSave, indent=4, sort_keys=True)
for keyForAccKey in fluentDatahubUserAccessKeys:
if keyForAccKey not in all_datahub_fluent_users and fluentDatahubUserAccessKeys[keyForAccKey]:
for element in fluentDatahubUserAccessKeys[keyForAccKey]:
orphan_access_keys.append(element)
for keyForAccKey in waDatahubUserAccessKeys:
if keyForAccKey in all_datahub_wa_users and waDatahubUserAccessKeys[keyForAccKey]:
for element in waDatahubUserAccessKeys[keyForAccKey]:
orphan_access_keys.append(element)
if mode == "all" or mode == "datalake":
datalakeFluentUsers=os.path.join(resp_location, "datalake-fluent-users.txt")
os.system("cat %s | grep 'datalake-fluent-databus-uploader' | sort > %s" % (allMachineUsers, datalakeFluentUsers))
all_datalake_ids=os.path.join(resp_location, "all-datalake-ids.txt")
os.system("%s datalake list-datalakes | jq -r '.[][].crn' | tr -d '\"' | cut -d: -f7 > %s" % (cdp_command, all_datalake_ids))
expectedDatalakeFluentUsers=os.path.join(resp_location, "whitelist-datalake-fluent-users.txt")
os.system("cat %s | sed -e 's/^/datalake-fluent-databus-uploader-/' | sort > %s" % (all_datalake_ids, expectedDatalakeFluentUsers))
whitelisted_datalake_fluent_users=[]
with open(expectedDatalakeFluentUsers) as f_in:
for line in nonblank_lines(f_in):
whitelisted_datalake_fluent_users.append(line.rstrip())
all_datalake_fluent_users=[]
with open(datalakeFluentUsers) as f_in:
for line in nonblank_lines(f_in):
all_datalake_fluent_users.append(line.rstrip())
blacklisted_datalake_fluent_users=[]
for datalake_fluent_user in all_datalake_fluent_users:
if datalake_fluent_user not in whitelisted_datalake_fluent_users:
blacklisted_datalake_fluent_users.append(datalake_fluent_user)
delete_datalake_fluent_users=[]
for blacklisted_user in blacklisted_datalake_fluent_users:
userToDelete={}
userToDelete["name"]=blacklisted_user
accessKeyList=list()
if blacklisted_user in fluentDatalakeUserAccessKeys:
accessKeyList=fluentDatalakeUserAccessKeys[blacklisted_user]
userToDelete["accessKeys"]=accessKeyList
delete_datalake_fluent_users.append(userToDelete)
with open(os.path.join(resp_location, "delete-datalake-fluent-users.json"), "w") as fileToSave:
json.dump(delete_datalake_fluent_users, fileToSave, indent=4, sort_keys=True)
for keyForAccKey in fluentDatalakeUserAccessKeys:
if keyForAccKey not in all_datalake_fluent_users and fluentDatalakeUserAccessKeys[keyForAccKey]:
for element in fluentDatalakeUserAccessKeys[keyForAccKey]:
orphan_access_keys.append(element)
warnings = 0
print("--------")
print("MODE: %s" % mode)
print("--------")
if mode == "all" or mode == "freeipa":
print("freeipa-fluent-databus-uploader-<clusterid>:")
print("--------")
print("All Fluent FreeIPA user size: %s" % str(len(all_freeipa_fluent_users)))
print("White-listed (possible) Fluent FreeIPA user size: %s" % str(len(whitelisted_freeipa_fluent_users)))
print("Black-listed Fluent FreeIPA user size: %s" % str(len(blacklisted_freeipa_fluent_users)))
print("--------")
if not whitelisted_freeipa_fluent_users:
warnings = warnings + 1
if mode == "all" or mode == "datahub":
print("datahub-fluent-databus-uploader-<clusterid>:")
print("--------")
print("All Fluent DataHub user size: %s" % str(len(all_datahub_fluent_users)))
print("White-listed (possible) Fluent DataHub user size: %s" % str(len(whitelisted_datahub_fluent_users)))
print("Black-listed Fluent DataHub user size: %s" % str(len(blacklisted_datahub_fluent_users)))
print("--------")
print("datahub-wa-publisher-<clusterid>:")
print("--------")
print("All WA DataHub user size: %s" % str(len(all_datahub_wa_users)))
print("White-listed (possible) WA DataHub user size: %s" % str(len(whitelisted_datahub_wa_users)))
print("Black-listed WA DataHub user size: %s" % str(len(blacklisted_datahub_wa_users)))
print("--------")
if not whitelisted_datahub_wa_users:
warnings = warnings + 1
elif not whitelisted_datahub_fluent_users:
warnings = warnings + 1
if mode == "all" or mode == "datalake":
print("datalake-fluent-databus-uploader-<clusterid>:")
print("--------")
print("All Fluent DataLake user size: %s" % str(len(all_datalake_fluent_users)))
print("White-listed (possible) Fluent DataLake user size: %s" % str(len(whitelisted_datalake_fluent_users)))
print("Black-listed Fluent DataLake user size: %s" % str(len(blacklisted_datalake_fluent_users)))
print("--------")
if not whitelisted_datalake_fluent_users:
warnings = warnings + 1
print("Orphan access keys: %s" % str(len(orphan_access_keys)))
with open(os.path.join(resp_location, "delete-orphan-access-keys.json"), "w") as fileToSave:
json.dump(orphan_access_keys, fileToSave, indent=4, sort_keys=True)
if warnings > 0:
print("!!!! WARNING: There are whitelists with 0 numbers, make sure that is expected and there is no error logs above with 4XX or 5XX cdp cli responses!!!!!")
def create_cdp_command(args):
cdp_command="cdp --profile %s" % args.profile if args.profile else "cdp"
return cdp_command
def append_access_key(accessKeyMap, machineUserName, accessKeyId, creationDate):
if accessKeyId not in accessKeyMap:
accessKeyMap[machineUserName]=list()
keyList=accessKeyMap[machineUserName]
accessKeyObj={}
accessKeyObj["accessKeyId"]=accessKeyId
accessKeyObj["date"]=creationDate
keyList.append(accessKeyObj)
accessKeyMap[machineUserName]=keyList
return accessKeyMap
def main(args):
args = parse_args(args)
if args.action == "dump":
dump(args)
elif args.action == "cleanup":
cleanup(args)
else:
print("Action %s is not supported" % args.action)
sys.exit(1)
if __name__ == "__main__":
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment