Created
June 12, 2014 20:10
-
-
Save kennyyu/5e59519b3972823e20bf to your computer and use it in GitHub Desktop.
script to count the number of lines written by an author in a git repository
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import json | |
import os | |
import subprocess | |
class Chdir: | |
""" | |
Initializer/destructor pattern for cd taken from here: | |
http://stackoverflow.com/questions/431684/how-do-i-cd-in-python | |
""" | |
def __init__(self, newpath): | |
self.newpath = newpath | |
def __enter__(self): | |
self.oldpath = os.getcwd() | |
os.chdir(self.newpath) | |
def __exit__(self, *err): | |
os.chdir(self.oldpath) | |
def command(s): | |
""" | |
Runs the shell command and returns the stdout as a list | |
of strings. | |
""" | |
proc = subprocess.Popen(s, | |
stdout=subprocess.PIPE, | |
stderr=open(os.devnull, "w"), | |
shell=True) | |
# strip trailing newline characters | |
lines = proc.stdout.readlines() | |
return [l.strip() for l in lines] | |
def file_type(fname): | |
""" | |
Given a file name, returns the file extension, or empty | |
string if there is none. | |
""" | |
# handle special cases of tempfiles | |
if fname[-1] == "~": | |
return "<TEMP>" | |
if fname[0] == "#": | |
return "<TEMP>" | |
# handle special cases of Makefile and READMEs | |
if fname.lower().find("makefile") > -1: | |
return "Makefile" | |
if fname.lower().find("readme") > -1: | |
return "README" | |
parts = fname.split(".") | |
assert(len(parts) != 0) | |
# if there is only one part after splitting, there was no extension | |
if len(parts) == 1: | |
return "" | |
return parts[-1] | |
def commits_by_author(author): | |
""" | |
Returns the list of commit hashes by the given author | |
""" | |
lines = command("git log --author=\"%s\" --oneline" % author) | |
return [l.split(" ")[0] for l in lines] | |
def commit_stats(commit): | |
""" | |
Returns a mapping of filename -> (additions, deletions) | |
""" | |
lines = command("git show %s --oneline --numstat" % commit) | |
assert(len(lines) > 0) | |
# the first line is just a repeat of the hash, so skip it | |
lines = lines[1:] | |
# stats are of the form: | |
# additions deletions filename | |
stats = {} | |
for l in lines: | |
l = l.strip() | |
if l == "": | |
continue | |
addition, deletion, fname = l.split() | |
if addition == "-": | |
addition = "0" | |
if deletion == "-": | |
deletion = "0" | |
addition = int(addition) | |
deletion = int(deletion) | |
stats[fname] = (addition, deletion) | |
return stats | |
def author_stats(author): | |
""" | |
Returns a mapping of filename -> (total +, total -) | |
""" | |
commits = commits_by_author(author) | |
astats = {} | |
for commit in commits: | |
stats = commit_stats(commit) | |
for fname in stats: | |
adds, dels = stats[fname] | |
if fname not in astats: | |
astats[fname] = (0,0) | |
totadds, totdels = astats[fname] | |
astats[fname] = (totadds + adds, totdels + dels) | |
return astats | |
def author_stats_filetype(author, exclude=set()): | |
""" | |
Returns a mapping of filetype -> (total +, total -) | |
""" | |
tstats = {} | |
astats = author_stats(author) | |
for fname in astats: | |
ftype = file_type(fname) | |
if ftype in exclude: | |
continue | |
adds, dels = astats[fname] | |
if ftype not in tstats: | |
tstats[ftype] = (0,0) | |
totadds, totdels = tstats[ftype] | |
tstats[ftype] = (totadds + adds, totdels + dels) | |
return tstats | |
def total_lines(stats): | |
""" | |
Counts the total number of +'s and -'s | |
""" | |
totadds, totdels = 0, 0 | |
for k in stats: | |
adds, dels = stats[k] | |
totadds += adds | |
totdels += dels | |
return totadds, totdels | |
EXCLUDE = ["jpg", "pdf", "dat", "data", "csv", "xlsx", "tgz", "so", "png", "swp", "a", | |
"expected", "out", "cproject", "project", "asm", "fish", "scish", "cish", | |
"mlish", "<TEMP>", "gexf", "gephi", "ipynb", "cls", "JPG", "PNG", "nb", "xls", | |
"classpath", "DS_Store", "class", "script", "names", "jar", | |
"mat", "ppt", "pptx", "tif", "zip", "bmp", "eps", "crt", "csr", "key", "orig", | |
] | |
parser = argparse.ArgumentParser("line count") | |
parser.add_argument("path", type=str, help="path to git repo") | |
parser.add_argument("author", type=str, help="author to count lines") | |
parser.add_argument("--out", type=str, help="outfile for results", | |
default=None, dest="out") | |
parser.add_argument("--exclude", type=lambda s: s.split(","), help="exclude file types", | |
default=EXCLUDE, dest="exclude") | |
if __name__ == "__main__": | |
args = vars(parser.parse_args()) | |
path = args["path"] | |
author = args["author"] | |
exclude = args["exclude"] | |
out = args["out"] | |
meta = {} | |
meta["path"] = path | |
meta["author"] = author | |
meta["exclude"] = exclude | |
with Chdir(path) as cd: | |
stats = author_stats_filetype(author, exclude=exclude) | |
adds, dels = total_lines(stats) | |
meta["types"] = stats | |
meta["total"] = (adds, dels) | |
s = json.dumps(meta, indent=2, sort_keys=True) | |
if out is not None: | |
with open(out, "wb") as outfile: | |
outfile.write(s) | |
print s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment