Created
February 17, 2022 07:37
-
-
Save alex-d-boyd/583bd1d07d7f92af4a61d1c968bdcb7e to your computer and use it in GitHub Desktop.
Cutting Fields From A CSV File <Programming Praxis>
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# Cutting Fields From A CSV File | |
# Programming Praxis 2021-12-07 | |
# https://programmingpraxis.com/2021/12/07/cutting-fields-from-a-csv-file/ | |
import argparse | |
import csv | |
import sys | |
from pathlib import Path | |
def parse_args(): | |
parser = argparse.ArgumentParser(prefix_chars=r'/-@', | |
description='cut fields from a csv file by name') | |
parser.add_argument('input_file', type=Path, | |
help='input CSV file to be processed', | |
metavar='INFILE.CSV') | |
parser.add_argument('-o', '--output-file', metavar='OUTFILE.CSV', | |
default=None, type=Path, | |
help='output CSV file to be processed - stdout used if not specified') | |
parser.add_argument('-f', '--fields', nargs = '+', metavar='FIELD', | |
default=None, help='fields to cut from input file') | |
parser.add_argument('--encoding', default='utf-8', | |
help='encoding to read/write files, default UTF-8') | |
args = parser.parse_args() | |
return args | |
def read_csv_data(path, encoding): | |
if path.stat().st_size == 0: | |
sys.exit(f'{path} contains no data') | |
with path.open('r', encoding=encoding, newline='') as csv_obj: | |
reader = csv.reader(csv_obj) | |
data = [row for row in reader] | |
return data | |
def output_csv_data(data, path, encoding): | |
if path is not None: | |
with path.open('w', encoding=encoding, newline='') as csv_obj: | |
writer = csv.writer(csv_obj) | |
writer.writerows(data) | |
else: | |
writer = csv.writer(sys.stdout) | |
writer.writerows(data) | |
def cut_fields(data, fields): | |
if fields is None: | |
return data | |
headers = data[0] | |
header_dict = {head: i for i, head in enumerate(headers)} | |
wanted = [header_dict[field] for field in fields] | |
out_data = [[row[i] for i in wanted] for row in data] | |
return out_data | |
if __name__ == '__main__': | |
args = parse_args() | |
data = read_csv_data(args.input_file, args.encoding) | |
output = cut_fields(data, args.fields) | |
output_csv_data(output, args.output_file, args.encoding) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment