Skip to content

Instantly share code, notes, and snippets.

@kurohai
Forked from holly/zfs_host_backup.py
Created August 4, 2020 14:46
Show Gist options
  • Save kurohai/6220ae9d7f560254b9de90e274b72d03 to your computer and use it in GitHub Desktop.
Save kurohai/6220ae9d7f560254b9de90e274b72d03 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# vim:fileencoding=utf-8
from datetime import datetime
from argparse import ArgumentParser, FileType
from concurrent.futures import ThreadPoolExecutor, as_completed
from subprocess import Popen, PIPE
import threading
import re
import shlex
import time
import logging
import logging.handlers
import importlib.machinery
import os, sys, io
__author__ = 'holly'
__version__ = '1.0'
DESCRIPTION = 'zfs backup cli'
CONFIG_DIR = '/etc/zfs-host-backup'
RSYNC_CONFIG = os.path.join(CONFIG_DIR, 'rsync_config.py')
RSYNC_TARGET = os.path.join(CONFIG_DIR, 'rsync_target.py')
###########################################
#
# * sample rsync_config.py
#
# ```
# remote_rsync_path = 'sudo ionice -c3 nice -n 19 /usr/bin/rsync'
# ssh_user = 'backup'
# ssh_privkey = '/root/.ssh/backup_id_rsa'
# ```
#
# * sample rsync_target.py
#
# ```
# target_hosts = [
# {'src_host': '172.16.0.4', 'target_dirs': ['/etc', '/opt', '/home']},
# {'src_host': '172.16.0.5', 'target_dirs': ['/etc', '/opt', '/home']},
# {'src_host': '172.16.0.6', 'target_dirs': ['/var'], 'rsync_config': {"ssh_port": "9022"}},
# ]
#
# ```
#
###########################################
LOG_FORMAT = "%(asctime)s %(name)s %(levelname)s: %(message)s"
FORK = 1
SNAPSHOT_ROTATE = 3
class Log():
def __init__(self, log_file=None, quiet=False, debug=False):
self._logger = logging.getLogger(__name__)
self._logger.addHandler(logging.StreamHandler())
if log_file:
#fh = logging.FileHandler(log_file)
fh = logging.handlers.TimedRotatingFileHandler(log_file, when="D", backupCount=30)
fh.formatter = logging.Formatter(fmt=LOG_FORMAT)
self._logger.addHandler(fh)
if quiet:
self._logger.setLevel(logging.CRITICAL)
elif debug:
self._logger.setLevel(logging.DEBUG)
else:
self._logger.setLevel(logging.INFO)
def shutdown(self):
logging.shutdown()
@property
def logger(self):
return self._logger
class Rsync:
config = {}
config['zfs_pool'] = 'tank'
config['zfs_name'] = '{0}/data'.format(config['zfs_pool'])
config['local_backup_dir'] = '/data'
config['local_rsync_path'] = '/usr/bin/rsync'
config['local_config_dir'] = os.path.join(config['local_backup_dir'], ".config")
config['remote_rsync_path'] = 'ionice -c3 nice -n 19 rsync'
config['ssh_opts'] = ['ssh', '-2', '-x', '-T', '-o StrictHostKeyChecking=no', '-o UserKnownHostsFile=/dev/null', '-o Compression=no']
config['ssh_privkey'] = '/root/.ssh/id_rsa'
config['ssh_user'] = 'root'
config['ssh_port'] = 22
config['ssh_connection_timeout'] = 30
config['ssh_cipher'] = 'aes256-ctr'
config['rsync_bwlimit'] = 0
config['rsync_opts'] = ['-axSHAX', '--numeric-ids', '--delete', '--timeout=0']
config['rsync_other_opts'] = ['--append', '--partial']
config['rsync_excludes'] = ['*.swp', '*.tmp', '*~', '.make.state', '.nse_depinfo', '#*', '.#*', ',*', '_$*', '*$', '*.old', '*.bak','*.BAK', '*.orig', '*.rej', '.del-*', '*.olb', '*.obj', '*.Z', '*.elc', '*.ln', 'core']
config['rsync_exclude_from'] = os.path.join(config['local_config_dir'], "{0}", "exclude_from")
config['rsync_sp_opts_file'] = os.path.join(config['local_config_dir'], "{0}", "sp_opts")
def __init__(self, src_host, rsync_config):
self.src_host = src_host
self.date_string = datetime.now().strftime("%Y%m%d_%H%M%S")
self.config = self.parse_config(rsync_config)
def parse_config(self, rsync_config):
config = importlib.machinery.SourceFileLoader('config', rsync_config).load_module() if os.path.exists(rsync_config) else type('config', (object,), {})()
for key in self.__class__.config:
if not hasattr(config, key):
setattr(config, key, self.__class__.config[key])
return config
def zfs_snapshot_list(self):
snapshots = []
command = "zfs list -t snapshot"
snapshot = "{0}/{1}".format(self.config.zfs_name, self.src_host)
outs = self.execute_from_string(command)
lines = outs[0].split("\n")
for line in lines:
m = re.match(r"^({0}@[0-9]+_[0-9]+)\s+.*$".format(snapshot), line)
if m:
snapshots.append(m.group(1))
return snapshots
def make_backup_host_dir(self):
backup_host_dir = os.path.join(self.config.local_backup_dir, self.src_host)
if not os.path.exists(backup_host_dir):
self.execute_from_string("zfs create {0}/{1}".format(self.config.zfs_name, self.src_host))
return backup_host_dir
def make_rsync_commands(self, src_dir, dry_run=False):
config = self.config
make_dir = True
if dry_run:
make_dir = False
src = self.make_src_dir(src_dir)
dst = self.make_dst_dir(src_dir, make_dir=make_dir)
exclude_from = config.rsync_exclude_from.format(self.src_host)
sp_opts_file = config.rsync_sp_opts_file.format(self.src_host)
cmds = [ config.local_rsync_path ]
if os.path.exists(sp_opts_file):
with open(sp_opts_file) as f:
data = f.read().strip()
data = re.sub(r"\\\n", " ", data)
cmds.extend(shlex.split(data))
else:
cmds.extend(["--bwlimit={0}".format(config.rsync_bwlimit)])
cmds.extend(config.rsync_opts)
cmds.extend(config.rsync_other_opts)
# shallow copy
ssh_opts = list(config.ssh_opts)
ssh_opts.append("-o ConnectTimeout={0}".format(config.ssh_connection_timeout))
ssh_opts.append("-c {0}".format(config.ssh_cipher))
ssh_opts.append("-p {0}".format(config.ssh_port))
ssh_opts.append("-i {0}".format(config.ssh_privkey))
ssh_opts.append("-l {0}".format(config.ssh_user))
cmds.append("-e '{0}'".format(" ".join(ssh_opts)))
cmds.append("--rsync-path='{0}'".format(config.remote_rsync_path))
if os.path.exists(exclude_from):
cmds.append("--exclude-from='{0}'".format(exclude_from))
for exclude in config.rsync_excludes:
cmds.append("--exclude='{0}'".format(exclude))
if dry_run:
cmds.append("--dry-run")
cmds.append(src)
cmds.append(dst)
return cmds
def make_rsync_command_string(self, src_dir, dry_run=False):
cmds = self.make_rsync_commands(src_dir, dry_run=dry_run)
return " ".join(cmds)
def make_src_dir(self, src_dir):
m = re.search(r"^(.*)/+$", src_dir)
if not m:
src_dir = src_dir + "/"
return "{0}:{1}".format(self.src_host, src_dir)
def make_dst_dir(self, src_dir, make_dir=False):
m = re.search(r"^(/+)(.*)$", src_dir)
if m:
src_dir = m.group(2)
dst_dir = os.path.join(self.make_backup_host_dir(), src_dir)
if make_dir:
os.makedirs(dst_dir, exist_ok=True)
return dst_dir
def make_backup_snapshot(self, finished):
f = open(finished, "r")
date_string = f.read().strip()
f.close()
snapshot = "{0}/{1}@{2}".format(self.config.zfs_name, self.src_host, date_string)
self.execute_from_string("zfs snapshot {0}".format(snapshot))
def delete_backup_snapshot(self, snapshot):
self.execute_from_string("zfs destroy {0}".format(snapshot))
def execute_rsync(self, src_dir, dry_run=False):
cmds = self.make_rsync_commands(src_dir, dry_run=dry_run)
return self._execute_command(cmds)
def execute_from_string(self, command):
return self._execute_command(shlex.split(command))
def _execute_command(self, cmds):
proc = Popen(cmds, universal_newlines=True, stdout=PIPE, stderr=PIPE)
outs, errs = proc.communicate()
return outs.strip(), errs.strip(), proc.returncode
parser = ArgumentParser(description=DESCRIPTION)
parser.add_argument('--version', action='version', version='%(prog)s ' + __version__)
parser.add_argument('--rsync-config', action='store', default=RSYNC_CONFIG, help='config file (Default: {0})'.format(RSYNC_CONFIG))
parser.add_argument('--rsync-target', action='store', default=RSYNC_TARGET, help='rsync target hosts file (Default: {0})'.format(RSYNC_TARGET))
parser.add_argument('--fork', '-f', action='store', type=int, default=FORK, help='backup fork processes (Default: {0})'.format(FORK))
parser.add_argument('--snapshot-rotate' ,'-s', action='store', type=int, default=SNAPSHOT_ROTATE, help='backup snapshot rotage age (Default: {0})'.format(SNAPSHOT_ROTATE))
parser.add_argument('--dry-run', '-n', action='store_true', default=False, help='backup rsync dry-run mode (Default: False)')
parser.add_argument('--log-file', '-l', action='store', help='output log file(Default: STDOUT)')
parser.add_argument('--quiet', '-q', action='store_true', default=False, help='quiet mode')
parser.add_argument('--verbose', action='store_true', help='output verbose message')
args = parser.parse_args()
log = Log(log_file=args.log_file, quiet=args.quiet, debug=args.verbose)
rsync_target = importlib.machinery.SourceFileLoader('rsync_target', args.rsync_target).load_module()
def execute_command(rsync, command_string):
command = re.split("\s+", command_string)[0]
log.logger.info("target:{0} command:{1}".format(rsync.src_host, command_string))
outs, errs, returncode = rsync.execute_from_string(command_string)
if returncode == 0:
log.logger.info("target:{0}: {1} success returncode:{2}".format(rsync.src_host, command, returncode))
else:
log.logger.error("target:{0} {1} failure returncode:{2}".format(rsync.src_host, command, returncode))
raise Exception(errs)
def execute_rsync(target):
src_host = target["src_host"]
target_dirs = target["target_dirs"]
rsync = Rsync(src_host, args.rsync_config)
thread_id = threading.get_ident()
start_time = int(datetime.now().strftime('%s'))
log.logger.info(">> {0} backup start".format(src_host))
log.logger.debug("thread_id {0}: start thread.".format(thread_id))
backup_host_dir = rsync.make_backup_host_dir()
finished = os.path.join(backup_host_dir, ".backup_finished")
if os.path.exists(finished):
log.logger.info(">>> target:{0} make zfs snapshot".format(src_host))
rsync.make_backup_snapshot(finished)
snapshots = rsync.zfs_snapshot_list()
while len(snapshots) > args.snapshot_rotate:
snapshot = snapshots.pop(0)
rsync.delete_backup_snapshot(snapshot)
log.logger.info(">>> target:{0} old snapshot:{1} deleted".format(src_host, snapshot))
if "rsync_config" in target:
for key in target["rsync_config"]:
setattr(rsync.config, key, target["rsync_config"][key])
for target_dir in target_dirs:
command_string = rsync.make_rsync_command_string(target_dir, dry_run=args.dry_run)
start_rsync_time = int(datetime.now().strftime('%s'))
log.logger.info(">>>> target:{0}:{1} rsync start. command:{2}".format(src_host, target_dir, command_string))
outs, errs, returncode = rsync.execute_from_string(command_string)
end_rsync_time = int(datetime.now().strftime('%s'))
if returncode == 0 or returncode == 24:
log.logger.info(">>>> target:{0}:{1} rsync is finished successfully. returncode:{2} ({3} sec)".format(src_host, target_dir, returncode, (end_rsync_time - start_rsync_time)))
else:
log.logger.error(">>>> target:{0}:{1} rsync is terminated abnormally. returncode:{2} ({3} sec)".format(src_host, target_dir, returncode, (end_rsync_time - start_rsync_time)))
log.logger.error(errs)
with open(finished, "w") as f:
print(datetime.now().strftime("%Y%m%d_%H%M%S"), file=f)
end_time = int(datetime.now().strftime('%s'))
log.logger.info(">> {0} backup is finished ({1} sec)".format(src_host, (end_time - start_time)))
log.logger.debug("thread_id {0}: break thread.".format(thread_id))
def main():
""" [FUNCTIONS] method or function description
"""
start_time = int(datetime.now().strftime('%s'))
log.logger.info("===== start zfs host backup =====")
with ThreadPoolExecutor(max_workers=args.fork) as executor:
future_to_targets = { executor.submit(execute_rsync, target): target for target in rsync_target.target_hosts }
for future in as_completed(future_to_targets):
target = future_to_targets[future]
try:
data = future.result()
except Exception as exc:
log.logger.critical('%s generated an exception: %s' % (target, exc))
else:
pass
end_time = int(datetime.now().strftime('%s'))
log.logger.info("===== end zfs host backup ({0} sec) =====".format(end_time - start_time))
sys.exit(0)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment