Skip to content

Instantly share code, notes, and snippets.

@Yutsa
Created February 2, 2018 17:01
Show Gist options
  • Save Yutsa/7ed3c8c5c45ae75053b0f3b37941212b to your computer and use it in GitHub Desktop.
Save Yutsa/7ed3c8c5c45ae75053b0f3b37941212b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import os
import argparse
import chardet
import codecs
from threading import Thread
parser = argparse.ArgumentParser(
description='Reencode every file in this directory with the right extension.')
parser.add_argument("-i",
required=True,
help='The path of the directory to process.',
dest="dirPath")
parser.add_argument("-e",
required=True,
help='The file extension of the files to process',
dest="extension")
def reencodeFile(file, oldCodec, newCodec):
with codecs.open(file, "r", oldCodec) as sourceFile:
with codecs.open("temp", "w", newCodec) as targetFile:
while True:
contents = sourceFile.read(65000)
if not contents:
break
targetFile.write(contents)
os.remove(file)
os.rename("temp", file)
def getCodec(file):
with open(file, 'r') as current_file:
oldCodec = chardet.detect(current_file.read())["encoding"]
return oldCodec
def convertDirectory(fullPath):
for root, dirs, files in os.walk(fullPath):
for dir in dirs:
dirPath = os.path.join(root, dir)
convertDirectory(dirPath)
for file in files:
file = os.path.join(root, file)
if (os.path.splitext(file)[1] == "." + args.extension):
oldCodec = getCodec(file)
if oldCodec != "utf-8":
print("Reencoding {} from {} to UTF-8".format(file, oldCodec))
reencodeFile(file, oldCodec, "utf-8")
if __name__ == "__main__":
args = parser.parse_args()
fullPath = os.path.abspath(args.dirPath)
convertDirectory(fullPath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment