BHEADRICK · September 3, 2021 12:57
diff --git a/splitcsv.py b/splitcsv.py
 import pandas as pd

 #csv file name to be read in 
 in_csv = 'filename.csv'

 #get the number of lines of the csv file to be read
 number_lines = sum(1 for row in (open(in_csv)))

 #size of rows of data to write to the csv, 
 #you can change the row size according to your need
 rowsize = 20000

 #start looping through data writing it to a new file for each set
 out_file_prefix = in_csv.split()[0]


 hf = pd.read_csv(in_csv, nrows=1)

 header = list(hf.columns.values)

 part = 0
 for i in range(1,number_lines,rowsize):
    part +=1
    df = pd.read_csv(in_csv,
          header=None,
          nrows = rowsize,
          skiprows = i)

    #csv to write data to a new file with indexed name. input_1.csv etc.
    out_csv = out_file_prefix + '-part-' + str(part) + '.csv'

    df.to_csv(out_csv,
          index=False,
          header=header,
          mode='a',#append data to csv file
          chunksize=rowsize)#size of data to append for each loop
	import pandas as pd

	#csv file name to be read in
	in_csv = 'filename.csv'

	#get the number of lines of the csv file to be read
	number_lines = sum(1 for row in (open(in_csv)))

	#size of rows of data to write to the csv,
	#you can change the row size according to your need
	rowsize = 20000

	#start looping through data writing it to a new file for each set
	out_file_prefix = in_csv.split()[0]


	hf = pd.read_csv(in_csv, nrows=1)

	header = list(hf.columns.values)

	part = 0
	for i in range(1,number_lines,rowsize):
	part +=1
	df = pd.read_csv(in_csv,
	header=None,
	nrows = rowsize,
	skiprows = i)

	#csv to write data to a new file with indexed name. input_1.csv etc.
	out_csv = out_file_prefix + '-part-' + str(part) + '.csv'

	df.to_csv(out_csv,
	index=False,
	header=header,
	mode='a',#append data to csv file
	chunksize=rowsize)#size of data to append for each loop