Created
March 10, 2024 03:18
-
-
Save z-a-f/5ecfd9322b052e837a048025adaea996 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Original CSV file | |
original_csv_file = 'filename.csv' | |
# Columns that need to be separated | |
# For this example, we have 3 subfiles, with these columns | |
# File 1: 0, 2, 4 | |
# File 2: 1, 3, 5 | |
# File 3: 0, 5 | |
subfile_columns = [ | |
[0, 2, 4], | |
[1, 3, 5], | |
[0, 5], | |
] | |
num_files = len(subfile_columns) | |
# 1. Open the new subfiles in "append" mode -- that way we can add new lines to each file one-by-one | |
subfiles = [] | |
for idx in range(num_files): | |
# Create new file name | |
filename = original_csv_file[:-4] + str(idx) + '.csv' | |
# Create file handle in append mode ('a') | |
file_handle = open(filename, 'a') | |
subfiles.append(file_handle) | |
# 2. Open the original file, and write subfiles in the process | |
with open(original_csv_file, 'r') as csv_file_handle: | |
for line in csv_file_handle: # 2.1. Read the file one line at a time | |
split_line = line.split(',') # 2.2. Split the line by comma | |
for idx in range(num_files): # 2.3. Iterate over all files | |
# 2.4. Get the relevant columns | |
columns = subfile_columns[idx] | |
# 2.5. Create a new line by combining the relevant columns | |
subline = ','.join([split_line[col] for col in columns]) + '\n' # Don't forget new line character | |
# 2.6. Get the relevant file | |
fh = subfiles[idx] | |
# 2.7. Add the subline to the subfile | |
fh.write(subline) | |
# 3. Close all the open subfiles | |
for sf in subfiles: | |
sf.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment