Skip to content

Instantly share code, notes, and snippets.

@fernandomv3
Last active April 19, 2016 13:27
Show Gist options
  • Save fernandomv3/a3d06dfbef1599dcde2c961b1904c0fd to your computer and use it in GitHub Desktop.
Save fernandomv3/a3d06dfbef1599dcde2c961b1904c0fd to your computer and use it in GitHub Desktop.
Split a CSV file in almost uniform line numbers
#!/usr/bin/env python
from __future__ import print_function, division
import sys
import math
def main(argv):
if len(argv) < 3:
print("Usage: split_csv.py filename N")
print("filename: File to be split")
print("N: number of files to be split to")
exit()
with open(argv[1],'r') as f:
num_lines = sum(1 for line in f)
lines_per_file = math.ceil(num_lines/float(argv[2]))
f.seek(0)
header = f.readline()
flist = []
for n in range(0,int(argv[2])):
new_name = "{0:03d}-".format(n) + argv[1]
fo = open(new_name,'w')
fo.write(header)
flist.append(fo)
for i,l in enumerate(f):
file_index = i // lines_per_file
flist[file_index].write(l)
for f in flist:
f.close()
if __name__ == '__main__':
main(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment