Created
April 17, 2018 01:38
-
-
Save jchaffin/e1f7ef8f27e389a30adf1a270743c0d0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
""" | |
Copyright 2017 Jacob Chaffin | |
This program is free software: you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation; either version 3 of the License, or | |
(at your option) any later version. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
Please see <http://www.gnu.org/licenses/> for a copy of the license. | |
""" | |
import random, sys, locale, string | |
from argparse import ArgumentParser, FileType | |
def lines(filecontents): | |
lines_list = [] | |
for line in filecontents.readlines(): | |
lines_list.append(line.strip()) | |
return lines_list | |
def main(): | |
description_msg="compare two sorted files line by line" | |
parser = ArgumentParser(description=description_msg) | |
# Positional Arguments | |
parser.add_argument('FILE1', type=FileType('r')) | |
parser.add_argument('FILE2', type=FileType('r')) | |
# Optional Arguments | |
parser.add_argument( | |
"-1", | |
help="suppress column 1 (lines unique to FILE1)", | |
dest="one", | |
action="store_true" | |
) | |
parser.add_argument( | |
"-2", | |
help="suppress column 2 (lines unique to FILE2)", | |
dest="two", | |
action="store_true" | |
) | |
parser.add_argument( | |
"-3", | |
help="suppress column 3 (lines that appear in both files)", | |
dest="three", | |
action="store_true" | |
) | |
parser.add_argument( | |
"-u", | |
help="sort inputs", | |
dest="sort", | |
action="store_true" | |
) | |
args = parser.parse_args() | |
file1 = lines(args.FILE1) | |
file2 = lines(args.FILE2) | |
column_one = [] | |
column_two = [] | |
column_three = [] | |
for line1, line2 in zip(file1, file2): | |
column_one.append(line1) | |
column_two.append(line2) | |
for line in file1: | |
if line in file2 and line not in column_three: | |
column_three.append(line) | |
column_one.remove(line) | |
column_two.remove(line) | |
if len(file2) > len(file1): | |
restlines = len(file2) - len(file1) | |
for i in range(restlines): | |
column_two.append(file2[len(file1)+i]) | |
linedict = dict() | |
for line in file1: | |
if line in linedict: | |
linedict[line] += 1 | |
else: | |
linedict[line] = 1 | |
for line in file2: | |
if line in linedict: | |
linedict[line] += 1 | |
else: | |
linedict[line] = 1 | |
unified=[] | |
for key in linedict: | |
for i in range(linedict[key]): | |
unified.append(key) | |
diff = sorted(set(unified)) | |
if not (args.one and args.two and args.three): | |
if args.sort: | |
for line in file1: | |
if line in column_three: | |
if args.three: | |
c3 = '' | |
elif args.one and args.two: | |
c3 = line + '\n' | |
elif args.one: | |
c3 = '\t' + line + '\n' | |
else: | |
c3 = '\t\t' + line +'\n' | |
sys.stdout.write(c3) | |
column_three.remove(line) | |
else: | |
if not args.one: | |
sys.stdout.write(line + '\n') | |
for line in column_two: | |
if args.two: | |
c2 = '' | |
elif args.one: | |
c2 = line + '' | |
else: | |
c2 = '\t' + line + '\n'; | |
sys.stdout.write(c2) | |
else: | |
for line in diff: | |
# This is awful. | |
if args.one and args.two: | |
c1 = '' | |
c2 = '' | |
c3 = line + '\n' | |
elif args.one and args.three: | |
c1 = '' | |
c2 = line + '\n' | |
c3 = '' | |
elif args.two and args.three: | |
c1 = line + '\n' | |
c2 = '' | |
c3 = '' | |
elif args.one: | |
c1 = '' | |
c2 = line + '\n' | |
c3 = '\t\t' + line + '\n' | |
elif args.two: | |
c1 = line + '\n' | |
c2 = '' | |
c3 = '\t' + line + '\n' | |
elif args.three: | |
c1 = line + '\n' | |
c2 = '\t' + line + '\n' | |
c3 = '' | |
else: | |
c1 = line + '\n' | |
c2 = '\t' + line + '\n' | |
c3 = '\t\t' + line + '\n' | |
if line in column_one: | |
if line in column_three and linedict[line] >= 2: | |
sys.stdout.write(c3) | |
linedict[line] -= 2 | |
for i in range(linedict[line]): | |
sys.stdout.write(c1) | |
else: | |
sys.stdout.write(c1) | |
elif line in column_two: | |
if line in column_three and linedict[line] >= 2: | |
sys.stdout.write(c3) | |
linedict[line] -= 2 | |
for i in range(linedict[line]): | |
sys.stdout.write(c2) | |
else: | |
for i in range(linedict[line]): | |
sys.stdout.write(c2) | |
elif line in column_three and linedict[line] % 2 == 0: | |
for i in range(linedict[line] - 1): | |
sys.stdout.write(c3) | |
linedict[line] -= 1 | |
else: | |
pass | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment