Skip to content

Instantly share code, notes, and snippets.

@polymorphm
Last active August 29, 2015 14:19
Show Gist options
  • Save polymorphm/b364b8a0d1d27b84d754 to your computer and use it in GitHub Desktop.
Save polymorphm/b364b8a0d1d27b84d754 to your computer and use it in GitHub Desktop.
find some text words in files and replace it to other words
#!/usr/bin/env python3
# -*- mode: python; coding: utf-8 -*-
assert str is not bytes
import sys
import os, os.path
import csv
import itertools
import random
def replace_middle_iter_create():
for i in itertools.count():
rnd_str = '*'.join(str(random.randrange(10)) for r_i in range(10))
replace_middle = '**{}*{}**'.format(rnd_str, i)
yield replace_middle
def out_name_iter_create():
for i in itertools.count():
out_name = 'out-{}.txt'.format(i)
yield out_name
def main():
if len(sys.argv) != 4:
exit(code=2)
source_dir = sys.argv[1]
replace_csv_path = sys.argv[2]
out_dir = sys.argv[3]
source_path_list = []
replace_list = []
replace_middle_iter = replace_middle_iter_create()
out_name_iter = out_name_iter_create()
for r, d, f in os.walk(source_dir):
for source_name in f:
if not source_name.endswith('.txt'):
continue
source_path = os.path.join(r, source_name)
source_path_list.append(source_path)
with open(replace_csv_path, encoding='utf-8', errors='replace') as replace_csv_fd:
replace_csv = csv.reader(replace_csv_fd)
for replace_row in replace_csv:
if len(replace_row) != 2:
continue
replace_source = replace_row[0]
replace_middle = next(replace_middle_iter)
replace_target = replace_row[1]
replace_list.append((
replace_source,
replace_middle,
replace_target,
))
os.mkdir(out_dir)
for source_path in source_path_list:
with open(source_path, encoding='utf-8', errors='replace') as source_fd:
orig_data = source_fd.read()
data = orig_data
for replace_source, replace_middle, replace_target in replace_list:
if replace_source not in data:
continue
data = data.replace(replace_source, replace_middle, 1)
for replace_source, replace_middle, replace_target in replace_list:
if replace_middle not in data:
continue
data = data.replace(replace_middle, replace_target)
if orig_data != data:
out_path = os.path.join(out_dir, next(out_name_iter))
with open(out_path, mode='w', encoding='utf-8', newline='\n') as out_fd:
out_fd.write('{}\n'.format(data))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment