Skip to content

Instantly share code, notes, and snippets.

@rsheeter
Last active May 29, 2024 18:02
Show Gist options
  • Save rsheeter/14bebfdba0e79421e0fbc70aae6b7c02 to your computer and use it in GitHub Desktop.
Save rsheeter/14bebfdba0e79421e0fbc70aae6b7c02 to your computer and use it in GitHub Desktop.
"""
Result 5/29/2024:
Num tags 9125
Mean tags 5.086399108138239
Median tags 5.0
Stdev tags 2.474364733745681
Max tags 27
Usage:
clone https://github.com/google/fonts
write to a file in tags/all called stats.py
python3 stats.py
"""
import collections
import csv
from pathlib import Path
from statistics import mean, median, stdev
def main():
with open('families.csv') as f:
reader = csv.DictReader(f)
records = [r for r in reader]
count_by_family = collections.defaultdict(int)
for r in records:
count_by_family[r["Family"]] += 1
counts = sorted(count_by_family.values())
counted = sorted(tuple((k, v) for (k, v) in count_by_family.items()), key=lambda t: (t[1], t[0]))
for (count, name) in counted:
print(count, name)
print("Num tags", len(records))
print("Mean tags ", mean(counts))
print("Median tags", median(counts))
print("Stdev tags", stdev(counts))
print("Max tags", max(counts))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment