Created
March 24, 2023 21:11
-
-
Save philpennock/4dbeb8e053c77e6e3a6507975c0ff4ff to your computer and use it in GitHub Desktop.
Find VCS repos under the cwd and report their paths
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
find_to_repo: find all dirs under the reference point which are repos | |
Stop at repos, don't descend into the repos. | |
Handling submodules etc is explicitly out of scope. | |
Use --help to see help. | |
Assumes under current directory if no directories given. | |
""" | |
from __future__ import print_function | |
__author__ = 'phil@pennock-tech.com (Phil Pennock)' | |
import argparse | |
import os | |
import stat | |
import sys | |
# We hard-code this one as internal logic | |
GIT_BARE_NEEDALL_DIRS = frozenset(['info', 'objects', 'refs']) | |
# This is used by some update tooling | |
SKIP_CHILDREN_FILENAME = '.skip-children' # selectively skip children | |
SKIP_ALL_FILENAME = '.skip-updates' # "prune here" | |
REPO_RECURSE_FLAGFILES = frozenset(['.update-children', SKIP_ALL_FILENAME]) | |
class Error(Exception): | |
"""Base class for exceptions from find_to_repo.""" | |
pass | |
class Exit(Error): | |
"""Errors which exit cleanly without a stack trace.""" | |
pass | |
class Walker(object): | |
def __init__(self, options): | |
self.repo_dirs = set(options.repo_dirs) | |
self.bundle_exts = set(options.bundle_exts) | |
self.dir_exts = set(options.dir_exts) | |
self.only_show = set(options.only_repo_types) | |
self.use_only_show = bool(self.only_show) | |
self.exclude_paths = set(options.exclude_paths) | |
self.required_meta_file = options.meta_file | |
self.required_exists_file = options.exists_file | |
self.verbose = options.verbose | |
self.symlinks_file = None | |
self.warn_no_flagfiles = options.warn_no_flagfiles | |
self.obey_skips = options.obey_skips | |
if options.symlinks_fd is not None and options.symlinks_fd >= 0: | |
self.symlinks_file = os.fdopen(options.symlinks_fd, mode='wt', buffering=1, encoding='UTF-8') | |
def find_under(self, top): | |
for root, dirs, files in os.walk(top, topdown=True): | |
if root in self.exclude_paths: | |
del dirs[:] | |
continue | |
# For our purposes, a repo is one of: | |
# 1. A file | |
# 2. A directory with a flag-directory within it | |
# 3. A directory named a particular way (bare .git repos) | |
# 4. Fallback for bare git repos not named that way | |
is_repo_thisdir = False | |
for bundle in [f for f in files if os.path.splitext(f)[1] in self.bundle_exts]: | |
full_repo, ext = os.path.join(root, bundle), os.path.splitext(bundle)[1] | |
is_repo_thisdir = True | |
if self.use_only_show and ext not in self.only_show: | |
continue | |
# File, no --meta-file support | |
if self.required_meta_file or self.required_exists_file: | |
pass | |
elif self.verbose: | |
print('{0}\t{1}'.format(ext, full_repo)) | |
else: | |
print(full_repo) | |
dirs_set = set(dirs) | |
if not self.repo_dirs.isdisjoint(dirs_set): | |
# This is a checkout dir which contains a meta dir inside it, eg a .git/ sub-dir | |
is_repo_thisdir = True | |
if self.use_only_show and self.only_show.isdisjoint(dirs_set): | |
pass | |
else: | |
printable = root | |
if self.required_exists_file: | |
printable = os.path.join(root, self.required_exists_file) | |
if self.use_only_show: | |
seek = self.only_show | |
else: | |
seek = dirs_set | |
# := is too new to want to use it except when debugging, here | |
if self.required_meta_file and not os.path.exists(os.path.join(root, sorted(dirs_set.intersection(seek))[0], self.required_meta_file)): | |
# if self.required_meta_file and not os.path.exists(t := os.path.join(root, sorted(dirs_set.intersection(seek))[0], self.required_meta_file)): | |
# print('skipping {0} because missing {1}'.format(root, t)) | |
pass | |
elif self.required_exists_file and not os.path.exists(os.path.join(root, self.required_exists_file)): | |
pass | |
elif self.verbose: | |
print('{1}\t{0}'.format(printable, ','.join(self.repo_dirs.intersection(dirs_set)))) | |
else: | |
print(printable) | |
del dirs[:] | |
byname = set([d for d in dirs if os.path.splitext(d)[1] in self.dir_exts]) | |
if byname: | |
for d in byname: | |
full_repo, ext = os.path.join(root, d), os.path.splitext(d)[1] | |
if self.use_only_show and ext not in self.only_show: | |
pass | |
else: | |
# this is a dir which is a "git bare repo" or moral equivalent, hinted at by the filename extension | |
if self.required_meta_file and not os.path.exists(os.path.join(full_repo, self.required_meta_file)): | |
# print('skipping {} [{}]'.format(full_repo, self.required_meta_file)) | |
pass | |
elif self.required_exists_file: | |
# this can't exist inside a bare repo | |
pass | |
elif self.verbose: | |
print('{0}\t{1}'.format(ext, full_repo)) | |
else: | |
print(full_repo) | |
for i in range(len(dirs)-1, -1, -1): | |
if dirs[i] in byname: | |
del dirs[i] | |
if dirs_set.intersection(GIT_BARE_NEEDALL_DIRS) == GIT_BARE_NEEDALL_DIRS: | |
# this is a git bare repo, not named to have .git extension | |
is_repo_thisdir = True | |
if self.use_only_show and '.git' not in self.only_show: | |
pass | |
else: | |
if self.required_meta_file and not os.path.exists(os.path.join(root, self.required_meta_file)): | |
# print('skipping {} [{}]'.format(root, self.required_meta_file)) | |
pass | |
elif self.required_exists_file: | |
# this can't exist inside a bare repo | |
pass | |
elif self.verbose: | |
print('.git\t{0}'.format(root)) | |
else: | |
print(root) | |
del dirs[:] | |
if self.warn_no_flagfiles and not is_repo_thisdir: | |
if not set(files).intersection(REPO_RECURSE_FLAGFILES): | |
print('# WARNING: not a repo, no flag-files: {}'.format(root), file=sys.stderr) | |
# Ideally, we'd detect SKIP_CHILDREN_FILENAME and in sub-dirs under that, suppress this warning, by mutating a context only passed to child dirs. | |
# But the API doesn't offer us that context. So instead I added --obey-skips: a crude bodge. | |
if self.obey_skips and SKIP_ALL_FILENAME in files: | |
del dirs[:] | |
if self.obey_skips and SKIP_CHILDREN_FILENAME in files: | |
skip = [entry for entry in [line.rstrip() for line in open(os.path.join(root, SKIP_CHILDREN_FILENAME))] if entry and not entry.startswith('#')] | |
for entry in skip: | |
if entry in dirs: | |
dirs.remove(entry) | |
if self.symlinks_file is not None and not is_repo_thisdir: | |
# It's already in dirs, so there's already been a stat ... I need to decide when to switch to that newer walk API which caches this | |
for e in dirs: | |
fn = os.path.join(root, e) | |
st = os.lstat(fn) | |
if stat.S_ISLNK(st.st_mode): | |
print('{0} -> {1}'.format(fn, os.readlink(fn)), file=self.symlinks_file) | |
def _main(args, argv0): | |
parser = argparse.ArgumentParser() | |
repo_stops = ['.git', '.hg', 'CVS', '.bzr', '.svn', '.sl'] | |
bundle_exts = ['.fossil'] | |
dir_exts = ['.git'] | |
known_types = frozenset(set(repo_stops) | set(bundle_exts) | set(dir_exts)) | |
parser.add_argument('-x', '--exclude', | |
action='append', dest='exclude_paths', default=[], | |
help='skip anything under one of these prefices') | |
parser.add_argument('-o', '--only-repo-type', | |
action='append', dest='only_repo_types', metavar='TYPE+', | |
choices=known_types, default=[], | |
help='only print for these repo dirs (available: %(choices)s) (default: %(default)s)') | |
parser.add_argument('-r', '--repo-dir', | |
action='append', dest='repo_dirs', default=repo_stops, | |
help='Add to list of repo dirs (default %(default)s)') | |
parser.add_argument('-f', '--bundle-ext', | |
action='append', dest='bundle_exts', default=bundle_exts, | |
help='Add to list of repo file extensions (default %(default)s)') | |
parser.add_argument('-d', '--dir-ext', | |
action='append', dest='dir_exts', default=dir_exts, | |
help='Add to list of repo dirname extensions (default %(default)s)') | |
parser.add_argument('-v', '--verbose', | |
action='store_true', dest='verbose', default=False, | |
help='Be more verbose') | |
parser.add_argument('--symlinks-fd', | |
type=int, default=None, metavar='FD', | |
help='FD to write encountered symlinks to') | |
parser.add_argument('--warn-no-flagfiles', | |
action='store_true', default=False, | |
help='Outside repos, warn if no control flag-files are seen (probably want --obey-skips too)') | |
parser.add_argument('--obey-skips', | |
action='store_true', default=False, | |
help='Obey .skip-children files') | |
parser.add_argument('-F', '--meta-file', | |
type=str, default=None, metavar='FN', | |
help='filename relative to repo meta-dir to require to exist, to print') | |
parser.add_argument('-e', '--exists-file', | |
type=str, default=None, metavar='FN', | |
help='filename relative to repo working tree to require to exist, to print, including file') | |
parser.add_argument('top_dirs', | |
type=str, nargs='*', metavar='DIR', | |
help='top level directories') | |
options = parser.parse_args(args=args) | |
if not options.top_dirs: | |
options.top_dirs = ['.'] | |
unknown = set(options.only_repo_types) - known_types | |
if unknown: | |
raise Exit('unknown repo types to limit to: {}'.format(' '.join(sorted(unknown)))) | |
walker = Walker(options) | |
for top in options.top_dirs: | |
walker.find_under(top) | |
return 0 | |
if __name__ == '__main__': | |
argv0 = sys.argv[0].rsplit('/')[-1] | |
try: | |
rv = _main(sys.argv[1:], argv0=argv0) | |
except Exit as e: | |
for arg in e.args: | |
print('{}: {}'.format(argv0, arg), file=sys.stderr) | |
sys.exit(1) | |
sys.exit(rv) | |
# vim: set ft=python sw=4 expandtab : |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment