Skip to content

Instantly share code, notes, and snippets.

@glenn-jocher
Created October 8, 2023 14:07
Show Gist options
  • Save glenn-jocher/8cee2e8c48884f0c22867b8c5206b55f to your computer and use it in GitHub Desktop.
Save glenn-jocher/8cee2e8c48884f0c22867b8c5206b55f to your computer and use it in GitHub Desktop.
"""
This script recursively scans Python files within a specified directory to identify functions and classes
that lack docstrings.
Features:
- Can navigate deeply nested directory structures to analyze all `.py` files.
- Allows for specific directories (like `venv` or `runs`) to be excluded from the scan.
- For each missing docstring, the script outputs the file path and the specific function or class declaration.
- Provides a summary count of total functions/classes analyzed, how many have docstrings, and how many are missing them.
- Offers an overview of the number of missing docstrings categorized by the top-level directory.
Usage:
- Define the starting directory and any directories to exclude, then run the script.
- Review the printed output for details on missing docstrings and the overall summary.
Note: This tool is beneficial for maintaining code documentation standards across projects and can
aid in identifying areas where documentation might be improved.
"""
from collections import defaultdict
from pathlib import Path
def find_missing_docstrings_in_directory(directory_path: Path, exclude_dirs: list = []) -> None:
"""
Recursively scan all Python files in a directory for functions and classes without docstrings.
Args:
directory_path (Path): Directory to start the scan from.
exclude_dirs (list): List of directory patterns to exclude.
"""
def should_exclude(file_path: Path) -> bool:
"""Check if the file path should be excluded based on the exclude patterns."""
for pattern in exclude_dirs:
if pattern in str(file_path):
return True
return False
total_functions = 0
total_classes = 0
missing_function_docstrings = 0
missing_class_docstrings = 0
missing_by_directory = defaultdict(int)
for py_file in directory_path.rglob('*.py'):
# Skip if the file is inside an excluded directory
if should_exclude(py_file):
continue
with py_file.open(encoding='utf-8', errors='replace') as f:
lines = f.readlines()
function_or_class_indices = [idx for idx, line in enumerate(lines) if
line.strip().startswith(('def ', 'class '))]
function_or_class_indices.append(len(lines)) # Add a sentinel value for easier indexing
for i in range(len(function_or_class_indices) - 1):
start_idx = function_or_class_indices[i]
end_idx = function_or_class_indices[i + 1]
segment = "".join(lines[start_idx:end_idx])
declaration = lines[start_idx].strip()
if declaration.startswith("def "):
total_functions += 1
if '"""' not in segment:
missing_function_docstrings += 1
missing_by_directory[
py_file.relative_to(directory_path).parts[0]] += 1 # Grabbing the top-level directory
print(f"In {py_file}, missing docstring for function: {declaration}")
else:
total_classes += 1
if '"""' not in segment:
missing_class_docstrings += 1
missing_by_directory[py_file.relative_to(directory_path).parts[0]] += 1
print(f"In {py_file}, missing docstring for class: {declaration}")
print("\nSummary:")
print(
f"Total functions: {total_functions}, With docstrings: {total_functions - missing_function_docstrings}, Missing docstrings: {missing_function_docstrings}")
print(
f"Total classes: {total_classes}, With docstrings: {total_classes - missing_class_docstrings}, Missing docstrings: {missing_class_docstrings}")
print("\nMissing docstrings by directory:")
for dir_name, count in missing_by_directory.items():
print(f"{dir_name}: {count} missing docstrings")
if __name__ == "__main__":
dir_path = Path('.').resolve() # Change this to your desired starting directory if needed
exclude_patterns = ['venv', 'runs']
find_missing_docstrings_in_directory(dir_path, exclude_patterns)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment