|
#!/usr/bin/env python |
|
|
|
""" |
|
Take a diff created by diff.py and colorize it and add insert/delete markers. |
|
|
|
Requirements: click (only used for its terminal color printing feature, could easily be replaced with something else) |
|
|
|
Conceptually, this script implements a state machine: |
|
|
|
┏━━━━━┓ |
|
┃START┃ |
|
┗━━┯━━┛ |
|
↓ |
|
╭───────────────────┲━━━━━━┱────────────────────╮ |
|
↓ ╭──────→┃COMMON┃←────────╮ ↓ |
|
╭────────╮ │ ╭────→┗┯━━━━┯┛←──────╮ │ ╭────────╮ |
|
│START │ │ │ ↓ ↓ │ │ │START │ |
|
│ADDITION│ │ │ ╭──────╮ ╭───────╮ │ │ │DELETION│ |
|
╰───┬────╯ │ │ │NORMAL│ │NEWLINE│ │ │ ╰───┬────╯ |
|
│ │ │ │CHAR │ ╰────┬──╯ │ │ │ |
|
↓ │ │ ╰─┬────╯ ↓ │ │ ↓ |
|
┏━━━━━━━━┓ │ │ ↓ print,─╯ │ ┏━━━━━━━━┓ |
|
┃COLLECT ┃ │ ╰──store clear │ ┃COLLECT ┃ |
|
╭────→┃ADDITION┃ │ both stored │ ┃DELETION┃←────╮ |
|
│ ┗┯━━━━━━┯┛ │ │ ┗┯━━━━━━┯┛ │ |
|
│ ↓ ↓ │ │ ↓ ↓ │ |
|
│ ╭──────╮ ╭────────╮ │ │ ╭────────╮ ╭──────╮ │ |
|
│ │NORMAL│ │END │ │ │ │END │ │NORMAL│ │ |
|
│ │CHAR │ │ADDITION│ │ │ │DELETION│ │CHAR │ │ |
|
│ ╰──┬───╯ ╰────┬───╯ │ │ ╰─┬──────╯ ╰──┬───╯ │ |
|
│ ↓ ╰─────╯ ╰───╯ ↓ │ |
|
╰──store store───╯ |
|
addition deletion |
|
|
|
|
|
KEY |
|
--- |
|
┏━━━━━┓ |
|
┃STATE┃ |
|
┗━━━━━┛ |
|
╭─────────╮ |
|
│CONDITION│ |
|
╰─────────╯ |
|
action |
|
|
|
|
|
We go through the input character-by-character, storing text in two buffers: one for addition lines |
|
and one for deletions lines. When we encounter an addition or deletion region, we store it in the |
|
appropriate buffer, marking it as a diff region. When we encounter a common character, we store it |
|
in both buffers. When we encounter a newline, we first print the deletion buffer with red foreground |
|
and the diff regions with red background, then do the same with the addition buffer except with |
|
green. |
|
|
|
The algorithm used here is slightly different than the described above. First, rather than going |
|
through the input character-by-character, we iterate through matches of a regex that matches some |
|
arbitrary text followed by a state transition indicator. This pushes the tight loop into the C |
|
implementation of the regex engine, improving performance. That change necessitates some minor |
|
modifications to the state machine, but the overall idea is similar. Second, there's a bit of a hack |
|
to support insertion and deletion markers. The intended effects is described in the README; to add |
|
them we add the marker character before deletions and additions that don't directly follow |
|
deletions. This works because what it means to be an "edit" is to be an addition directly following |
|
a deletion. There's some slightly hairy edge cases to handle, but it's pretty well-explained in the code. |
|
""" |
|
|
|
from sys import stdout, stdin |
|
from enum import Enum |
|
import re |
|
import click |
|
|
|
|
|
def bail(state, boundary, text): |
|
raise Exception(f"Unexpected boundary in {state} state: {boundary}. After {text}") |
|
|
|
class Change: |
|
def __init__(self, s): |
|
self.s = s |
|
|
|
def __repr__(self): |
|
return f"{type(self).__name__}({self.s})" |
|
|
|
class Addition(Change): pass |
|
|
|
class Deletion(Change): pass |
|
|
|
|
|
def format_sep(buf, color): |
|
result = [] |
|
for bit in buf: |
|
if isinstance(bit, str): |
|
result.append(click.style(bit, fg=color)) |
|
elif isinstance(bit, Addition): |
|
# Hack hack. Should be factored into Change |
|
if bit.s == '↓': |
|
result.append(click.style(bit.s, fg='white', bg='green')) |
|
else: |
|
result.append(click.style(bit.s, fg='green', reverse=True)) |
|
elif isinstance(bit, Deletion): |
|
# As above |
|
if bit.s == '↓': |
|
result.append(click.style(bit.s, fg='white', bg='red')) |
|
else: |
|
result.append(click.style(bit.s, fg='red', reverse=True)) |
|
else: |
|
raise Exception(f"Unexpected item in buffer: {bit}") |
|
return ''.join(result) |
|
|
|
|
|
State = Enum('State', ['common', 'insert', 'delete']) |
|
|
|
boundary_re = re.compile(r'(.*?)(\{\+|\+\}|\[-|-]|\n)', flags=re.MULTILINE) |
|
|
|
def main(whole_text): |
|
state = State.common |
|
sub_buf, add_buf= [], [] |
|
|
|
for match in boundary_re.finditer(whole_text): |
|
text, boundary = match.groups() |
|
|
|
if state == State.common: |
|
if boundary == '\n': |
|
if sub_buf or add_buf: |
|
# Print separated output |
|
sub_buf.append(text + boundary) |
|
add_buf.append(text + boundary) |
|
|
|
stdout.write(format_sep(sub_buf, 'red')) |
|
stdout.write(format_sep(add_buf, 'green')) |
|
|
|
sub_buf, add_buf = [], [] |
|
|
|
else: |
|
# We have just a plain line of common |
|
stdout.write(text + boundary) |
|
|
|
elif boundary == '{+': |
|
state = State.insert |
|
if text: |
|
sub_buf.append(text) |
|
add_buf.append(text) |
|
|
|
elif boundary == '[-': |
|
state = State.delete |
|
if text: |
|
sub_buf.append(text) |
|
add_buf.append(text) |
|
|
|
else: |
|
bail(state, boundary, text) |
|
|
|
elif state == State.delete: |
|
if boundary == '-]': |
|
add_buf.append(Deletion('↓')) # Add deletion marker for easier comparison |
|
sub_buf.append(Deletion(text)) |
|
state = State.common |
|
elif boundary == '\n': |
|
sub_buf.append(Deletion(text + boundary)) |
|
else: |
|
bail(state, boundary, text) |
|
|
|
elif state == State.insert: |
|
if boundary == '+}': |
|
# print('sub buff', sub_buf) |
|
# print('add buff', add_buf) |
|
|
|
# Remove spurious deletion marker added in previous delete branch |
|
if sub_buf and isinstance(sub_buf[-1], Deletion) and isinstance(add_buf[-1], Deletion): |
|
del add_buf[-1] |
|
|
|
# Add addition marker for easier comparison, but not when the change was an edit |
|
# No need to do this in the delete branch above because in edits, additions |
|
# always follow deletions |
|
if sub_buf and not isinstance(sub_buf[-1], Deletion): |
|
sub_buf.append(Addition('↓')) |
|
|
|
add_buf.append(Addition(text)) |
|
|
|
state = State.common |
|
elif boundary == '\n': |
|
add_buf.append(Addition(text + boundary)) |
|
else: |
|
bail(state, boundary, text) |
|
|
|
else: |
|
raise Exception(f"Unexpected state {state}") |
|
|
|
# Print the rest in case there isn't a trailing newline or something |
|
# scope-leak from loop var is unfortunate but necessary |
|
stdout.write(whole_text[match.end():]) |
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
main(stdin.read()) |