Last active
October 27, 2021 17:39
-
-
Save vicenterocha/d90fab37114bfa0dcc4320a310df5b3f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from collections import defaultdict | |
lines = [] | |
total_values_per_group = defaultdict(int) | |
with open('/Users/vicente.rocha/Downloads/david.txt') as f: | |
csv_reader = csv.reader(f, delimiter=',') | |
# iterate and add only 20 values per group | |
for row in csv_reader: | |
index = row[0] | |
value = row[1] | |
total_values_per_group[value] += 1 | |
if total_values_per_group[value] <= 20: | |
lines.append((row[0], row[1])) | |
# verify | |
validator = defaultdict(int) | |
for l in lines: | |
validator[l] += 1 | |
for val, size in validator.items(): | |
if size != 20: | |
print(f"{val} has size {size}") | |
# write to disk | |
with open('/Users/vicente.rocha/Downloads/david_fix.txt', 'w') as f: | |
for l in lines: | |
f.write(f"{l[0]},{l[1]}\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment