Last active
September 21, 2021 16:38
-
-
Save Mondonno/e0b7ffb2976fa9c427ef8efb9d13249b to your computer and use it in GitHub Desktop.
Analizer and deleter, for csv. Deletes rows where some condictions does not pass, and generates new csv file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyrighted by Mondonno | |
OUT_DIR = "./" # folder where is written output | |
IN_DIR = "./in/" # folder where program searches for input | |
CSV_FILE_EXTENSION = ".csv" # csv files extension | |
OUT_FILE_NAME = "out" # name for generated output file | |
COL_DELIMETER = "\n" # col delimeter that is used in CSV files (new line) | |
ROW_DELIMETER = ";" # row delimeter that is used in CSV files | |
ELIMINATE_PROVIDING_EMPTY = True # NOTE: deprecated filed | |
EMPTY_LIMIT = 2 # limit of empty elements | |
START_COL = 0 # from wich column to start searching and generating output | 0 = all | |
MAX_ELEMENTS = 3 # elements that getting selected | 0 = all | |
##################### | |
fileName = input("Provide CSV file name\n") | |
file = open(IN_DIR + fileName + CSV_FILE_EXTENSION) | |
fileContent = file.read() | |
fileCols = fileContent.split(COL_DELIMETER) | |
deletedCols = 0 | |
modifiedFileContent = "" | |
print("\nProcessing...\n") | |
for index in range(START_COL, len(fileCols)): | |
col = fileCols[index] | |
colRows = col.split(ROW_DELIMETER) | |
rowsSame = 0 | |
rowsEmpty = 0 | |
rowsLength = len(colRows) | |
rowIndex = 0 | |
rowSelectionIndex = 0 | |
rowMaxLen = (rowsLength - MAX_ELEMENTS - 1) | |
for row in colRows: | |
if(MAX_ELEMENTS > 0 and rowSelectionIndex <= rowMaxLen and rowMaxLen >= 0): | |
rowSelectionIndex += 1 | |
rowIndex += 1 | |
continue | |
foundSame = False | |
searchIndex = 0 | |
for rr in colRows: | |
if(rr == row and searchIndex != rowIndex): | |
foundSame = True | |
searchIndex += 1 | |
if(foundSame): | |
rowsSame += 1 | |
if(row == ""): | |
rowsEmpty += 1 | |
rowIndex += 1 | |
rowEq = (rowsSame + rowsEmpty) == rowsLength | |
if(rowsSame >= rowsLength - rowSelectionIndex or (rowEq and rowsEmpty == 2) or (rowEq and rowsEmpty == 1)): | |
print("Error! Limit reached") | |
deletedCols += 1 | |
else: | |
modifiedFileContent += col + COL_DELIMETER | |
print("\nWriting to output file...") | |
outFile = open(OUT_DIR + OUT_FILE_NAME + CSV_FILE_EXTENSION, "w") | |
outFile.write(modifiedFileContent.strip()) | |
print("Done! Deleted cols: " + str(deletedCols)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment