Based on talk by James Powell - https://www.youtube.com/watch?v=mr2SE_drU5o
- cloc
- find -iname '.' | xargs cat |sed -e 's/^[ \t]//' | sort | uniq -c | sort -nr
- Python:
from subprocess import check_output
files = check_output('find -iname *.<type>'.split())\
.decode().splitlines()
from itertools import chain
from collections import Counter
Counter(chain.from_iterable(open(f).read().split() for f in files)).most_common()
- Python, extends point 3:
from keyword import iskeyword
from tokenizer import tokenizer
Counter(
chain.from_iterable((t.string for t in tokenize(open(f, 'rb').readline)
if len(t.string) > 5 and t.string.strip() and t.string.isalnum() and not iskeyword(t.string))
for f in files)
).most_common()
- To check for nesting we analyze imports, I made a quick script to pull out all imports and check all user imports. A lot of imports signify high amount of nesting
#!/usr/bin/env python3
import distutils.sysconfig as sysconfig
import os
import sys
import subprocess
def build_std_lib_stoplist():
std_lib_stoplist = []
std_lib = sysconfig.get_python_lib(standard_lib=True)
for top, dirs, files in os.walk(std_lib):
for nm in files:
prefix = top[len(std_lib)+1:]
if prefix[:13] == 'site-packages':
continue
if nm == '__init__.py':
std_lib_stoplist.append(top[len(std_lib)+1:].replace(os.path.sep,'.'))
elif nm[-3:] == '.py':
std_lib_stoplist.append(os.path.join(prefix, nm)[:-3].replace(os.path.sep,'.'))
elif nm[-3:] == '.so' and top[-11:] == 'lib-dynload':
std_lib_stoplist.append(nm[0:-3])
return [*[f' {s} ' for s in std_lib_stoplist],
*[f' {s},' for s in std_lib_stoplist]]
def stoplist_match(_stoplist, value):
for s in _stoplist:
if s in value:
return False
else:
return True
def get_most_used():
_mostused_raw = subprocess.check_output("find -iname '*.py' | xargs cat |sed -e 's/^[ \t]*//' | sort | uniq -c | sort -nr | sed -e 's/^[ \t]*//'", shell=True).decode().split('\n')
print(f'raw : {_mostused_raw[:5]}')
return list(filter(lambda x: x is not None,
[(int(m.split(' ')[0]), ' '.join(m.split(' ')[1:]))
if m.split(' ')[0].isdigit() else None
for m in _mostused_raw]))
if __name__ == '__main__':
imports = list(filter(lambda x: x is not None,
[v if 'import' in v[1] else None for v in get_most_used()]))
stoplist = build_std_lib_stoplist()
# Filter against stoplist
user_imports = list(filter(lambda x: stoplist_match(stoplist, x[1]+' '), imports))
total_imports = 0
for i in user_imports:
print(f'{i[1].split(" ")[-1]} imported {i[0]} times')
total_imports+=i[0]
print(f'Total user imports : {total_imports}')
Use this together with cloc to figure out imports per LoC. My code is typically around 0.05
imports per LoC
- fs: strace -e trace=open,stat,read,write python ...
- patch os.environ:
import os
from sys import stderr
class debug(dict):
def get(self, item):
stderr.write(f'{item}\n')
return super().get(item)
def __getitem__(self, item):
stderr.write(f'{item}\n')
return supert().__getitem__(item)
# ...
os.environ = debug(os.environ)
- Python linetracer:
from collections import Counter
hist = Counter()
def trace(f, *_):
hist[frame.f_code.co_filename, frame.f_fileno] += 1
from sys import settrace
settrace(trace)
# ...
hist.most_common(10)
- Using networkx:
from networkx import DiGraph
class GraphHook:
def find_spec(self, fullname, path, target=None):
# ...
- objgraph
import objgraph
objgraph.show_refs([x], filename='output.png')
- The above gives some overview. Now you would jump into a debugger:
- breakpoint()
- from pdb import post_mortem, set_trace
- from inspect import currentframe, getouterframes
- How to make it easier to pull apart your code?
- Test code
- Sample code
- Sample data
- Runnable container
- Importable module
- A good structure, flat, hub and spoke