Skip to content

Instantly share code, notes, and snippets.

Last active September 16, 2024 10:55
Show Gist options
  • Save tigerhawkvok/adb7905a2423312f237da1953d6a8eeb to your computer and use it in GitHub Desktop.
Save tigerhawkvok/adb7905a2423312f237da1953d6a8eeb to your computer and use it in GitHub Desktop.
Remove LFS from a directory
Remove LFS tracked files and change them to normal git
Python 3.8+, native code
For Python <= 3.7, move the Literal and Final imports from "typing" to third-party module "typing_extensions"
@author Philip Kahn
import subprocess
import os
import re
import datetime as dt
from typing import Optional, Union, overload, Literal, Final
from pathlib import Path
from hashlib import sha256
import shutil
DRY_RUN_WRITE_PATH:Final[Path] = Path("unLFS_dryRun").with_suffix(".sh" if != "nt" else ".ps1")
DRY_RUN_WRITE_PATH.unlink(missing_ok= True)
def __shellRun(*args, dryRun:Literal[False]= False, **kwargs) -> subprocess.CompletedProcess:
def __shellRun(*args, dryRun:Literal[True]= True, **kwargs) -> str: ...
def __shellRun(*args, dryRun:bool= False, **kwargs) -> Union[subprocess.CompletedProcess, str]: ...
def __shellRun(*args, dryRun:bool= False, **kwargs):
if dryRun:
cmd = " ".join([str(x) for x in args])
if len(kwargs) > 0:
cmd += f" # subprocess sent kwargs {kwargs}"
with"a", encoding= "utf-8") as f:
f.write(cmd + "\n")
return cmd
return[str(x) for x in args], check= True, shell= True, capture_output= True, **kwargs)
def unLFS(dryRun:bool= True, path:Optional[Union[str, Path]]= None, push:bool= True, pushAllBranches:bool= False, pushRemote:str= "origin"):
Untrack all LFS files in a git repo, and remove LFS from the repo.
This will also commit the changes and push them to the remote, as well remove the .git/lfs folder, and remove the LFS filter from .gitattributes
By default, `dryRun` is enabled which merely dumps the commands to a file, and does not actually run them. Set `dryRun` to `False` to actually run the commands.
dryRun : bool
If `True`, the commands will not be run, but instead dumped to a file. If `False`, the commands will be run.
path : Optional[Union[str, Path]], optional (default: None)
The path to the git repo. If None, the current working directory is used
push : bool, optional (default: True)
If `True`, the changes will be pushed to the remote. If `False`, the changes will not be pushed to the remote.
pushAllBranches : bool, optional (default: False)
If `True`, all branches will be pushed to the remote. If `False`, only the current branch will be pushed to the remote.
pushRemote : str, optional (default: "origin")
The remote to push to. This is only used if `push` is `True`
if path is None:
path = Path.cwd()
path = Path(path)
gitAttributes = Path(".gitattributes")
if not gitAttributes.is_file():
raise FileNotFoundError("No .gitattributes file found")
with"a", encoding= "utf-8") as f:
f.write(f"\n## Dry Run Started At {} ##")
f.write(f"\n## Working directory: {path.absolute().as_posix()} ##\n\n")
# Check for uncommitted files
status = str(__shellRun("git", "status", "--porcelain").stdout.decode("utf-8")).strip().splitlines()
status = [" ".join(line.split(" ")[1:]) for line in status if not"^\?\?", line)]
if len(status) > 0:
print("\nUncommitted files found:")
print("\t", status, end= "\n\n")
raise RuntimeError("Uncommitted files found, please commit or stash them")
# Get file lists
fileListOut = __shellRun("git", "lfs", "ls-files")
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Problem getting untracked file list") from e
if dryRun:
__shellRun("git", "lfs", "ls-files", dryRun= True)
fileList = [line.split(" ").pop() for line in fileListOut.stdout.decode("utf-8").splitlines()]
if len(fileList) == 0:
print("No files to untrack")
# Get a stable run ID based on the files we're untracking
runID = sha256(",".join(fileList).encode("utf-8")).hexdigest()[:8]
__shellRun("git", "lfs", "pull", "--all", dryRun= dryRun)
MERGE_BRANCH_TARGET:Final[str] = __shellRun("git", "branch", "--show-current").stdout.decode("utf-8").strip()
NEW_BRANCH_NAME:Final[str] = f"unLFS_{runID}"
if dryRun:
__shellRun("git", "branch", "--show-current", dryRun= True)
__shellRun("git", "checkout", "-b", NEW_BRANCH_NAME, dryRun= dryRun)
for file in fileList:
__shellRun("git", "lfs", "untrack", f"\"{file}\"", dryRun= dryRun)
__shellRun("git", "rm", "--cached", f"\"{file}\"", dryRun= dryRun)
# Remove patterns from .gitattributes
gaTextLines = gitAttributes.read_text("utf-8").splitlines()
gaLFS = [line for line in gaTextLines if"filter\s*=\s*lfs", line, re.IGNORECASE)]
gaNonLFS = [line for line in gaTextLines if not"filter\s*=\s*lfs", line, re.IGNORECASE)]
for lfsLine in gaLFS:
__shellRun("git", "lfs", "untrack", re.split(r"\s", lfsLine, 0, re.IGNORECASE).pop(0), dryRun= dryRun)
if not dryRun:
gitAttributes.write_text("\n".join(gaNonLFS), "utf-8")
with"a", encoding= "utf-8") as f:
f.write("\n## New .gitAttributes ##\n# " + "\n# ".join(gaNonLFS) + "\n")
# This should handle most of it and convert pointers
__shellRun("git", "add", "--renormalize", ".", dryRun= dryRun)
for file in fileList:
# In case we were manually tracking a file
# that is otherwise ignored
__shellRun("git", "add", "-f", f"\"{file}\"", dryRun= dryRun)
__shellRun("git", "commit", "-m", f"'Untrack LFS files ({runID})'", dryRun= dryRun)
# Merge
__shellRun("git", "checkout", MERGE_BRANCH_TARGET, dryRun= dryRun)
__shellRun("git", "merge", NEW_BRANCH_NAME, dryRun= dryRun)
# Push
pushCommand = ["git", "push", pushRemote]
if pushAllBranches:
if push:
__shellRun(*pushCommand, dryRun= dryRun)
# Verify
fileListRemaining = __shellRun("git", "lfs", "ls-files")
if dryRun:
__shellRun("git", "lfs", "ls-files", dryRun= True)
fileList = [line.split(" ").pop() for line in fileListRemaining.stdout.decode("utf-8").splitlines()]
if len(fileList) > 0:
if dryRun:
print("Would verify that no files remained tracked, but dry run is enabled")
print("Failed to untrack files:")
raise RuntimeError("Failed to untrack files")
# Do a cleanup before we nuke directories
__shellRun("git", "lfs", "prune", "--verify-remote", "--verbose", "--recent", dryRun= dryRun)
__shellRun("git stash clear && git prune --expire=now && git reflog expire --expire-unreachable=now --rewrite --all && git repack -a -d && git prune-packed && git reflog expire --all --expire=now && git gc --prune=now --aggressive", dryRun= dryRun)
# Remove locally
__shellRun("git", "lfs", "uninstall", "--local", dryRun= dryRun)
lfsDirPath = Path(path, ".git", "lfs")
if dryRun:
print("Would remove .git/lfs, but dry run is enabled")
with"a", encoding= "utf-8") as f:
f.write(f"\n## Remove .git/lfs ##\n# {lfsDirPath.absolute().as_posix()}\n")
persistPath = DRY_RUN_WRITE_PATH.with_name(DRY_RUN_WRITE_PATH.with_suffix("").name + f"_{runID}{DRY_RUN_WRITE_PATH.suffix}")
shutil.copy(DRY_RUN_WRITE_PATH, persistPath)
print(f"Dry-run commands written to {persistPath.absolute().as_posix()}")
DRY_RUN_WRITE_PATH.unlink(missing_ok= True)
return runID
if __name__ == "__main__":
unLFS(dryRun= True)
Copy link

pkeir commented Sep 16, 2024

Thanks. Alas, for me running this ended with a CalledProcessError. Note that git status --porcelain runs correctly (error code 0):

$ python3
Traceback (most recent call last):
  File "/home/XXXXXXX/repos/myrepo/", line 170, in <module>
    unLFS(dryRun= True)
  File "/home/XXXXXXX/repos/myrepo/", line 78, in unLFS
    status = str(__shellRun("git", "status", "--porcelain").stdout.decode("utf-8")).strip().splitlines()
  File "/home/XXXXXXX/repos/myrepo/", line 40, in __shellRun
    return[str(x) for x in args], check= True, shell= True, capture_output= True, **kwargs)
  File "/usr/lib/python3.12/", line 571, in run
    raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['git', 'status', '--porcelain']' returned non-zero exit status 1.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment