Skip to content

Instantly share code, notes, and snippets.

@maxrothman
Created December 31, 2019 00:44
Show Gist options
  • Save maxrothman/d27bbc36f7c150924de6c6e54965de4d to your computer and use it in GitHub Desktop.
Save maxrothman/d27bbc36f7c150924de6c6e54965de4d to your computer and use it in GitHub Desktop.
Git diff + blame
#!/usr/bin/env python3
"""
Imagine this: you do a bunch of work and end up with a bunch of commits on a branch, but now you
want to reorganize them into logical changes. A lot of those commits touched the same lines though,
so it's hard to know how to squash them together. In this situation, it'd be helpful to look at the
whole diff of the branch against its root, which changes labeled by the commit that introduces them,
sort of like a hybrid between git blame and git diff. This is that!
Usage: git diff --color=always ROOT | git-diff-blame.py | less
Future work:
- Indicate if a line was changed by multiple commits on the branch
- Annotate deletions
- More advanced UI:
- a la tig: view commit, view line at previous commit (until you hit diff start commitish)
- Jump to source/destination of moved/copied line
Prior art:
- https://github.com/eantoranz/difflame
"""
# TODO:
# - make the state machine less hacky. There are a few escape hatches which were expedient but unclean
# - improve performance by talking to the git API directly rather than through a subprocess
from sys import stdin
from enum import Enum, auto
import re
import subprocess
fmt_pattern = re.compile(r'\x1b[^m]*m')
start_pattern = re.compile(r'diff --git a/\S+ b/(\S+)')
range_pattern = re.compile(r'@@ \S+ .(\d+)(?:,(\d+))?')
def run(cmd):
return subprocess.run(cmd, text=True, shell=True, capture_output=True)
class States(Enum):
FIND_FILENAME = auto()
FIND_DIFF = auto()
IN_DIFF = auto()
def main():
filename = None
state = States.FIND_FILENAME
# type: iterator
blames = None
for raw_line in stdin:
line = fmt_pattern.sub('', raw_line)
start_match = start_pattern.match(line)
range_match = range_pattern.match(line)
if start_match:
print(raw_line, end='')
filename = start_match.groups()[0]
state = States.FIND_DIFF
elif range_match:
line_range = list(map(int, range_match.groups('0')))
blames = get_blames(line_range, filename)
state = States.IN_DIFF
elif state == States.FIND_DIFF:
print(raw_line, end='')
elif state == States.IN_DIFF:
if line.startswith('-'):
commit = ''
sub = True
elif line.startswith('+'):
try:
commit = next(blames)
except StopIteration:
print('error1')
print(raw_line)
break
else:
try:
next(blames)
except StopIteration:
print('error2')
print(raw_line)
break
commit = ''
print(format_line(commit, raw_line), end='')
def get_blames(line_range, filename):
cmd = f'git blame -L {line_range[0]},+{line_range[1]} {filename}'
res = run(cmd)
# print(res.stdout)
return iter([l.split()[0] for l in res.stdout.splitlines()])
def format_line(commit, line):
if commit:
msg = run(f'git log --format=%B -n 1 {commit}').stdout[:18].strip()
else:
msg = ''
return f"{msg:18} " + line
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment