Last active
December 19, 2019 13:17
-
-
Save devarshi16/3f7e9d441b8724254a9d7a1b5f52b70d to your computer and use it in GitHub Desktop.
For dividing images from given folder proportionately into folders of given size
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import random | |
TASK_SIZE = 25 | |
dir_names = [ | |
"cheque", | |
"nach", | |
"pan_and_aadhaar" | |
] | |
total_images = 0 | |
for d in dir_names: | |
for imgs in os.listdir(d): | |
total_images+=1 | |
total_images = int(total_images/2) | |
weightage = {} | |
for d in dir_names: | |
weightage[d] = (len(os.listdir(d))/2)/total_images | |
print(weightage) | |
sum_weights = 0 | |
for i in weightage.values(): | |
sum_weights+=i | |
print(sum_weights) | |
files_dict = {} | |
for f in dir_names: | |
all_files = os.listdir(f) | |
imgs = [] | |
for fl in all_files: | |
if '.png' in fl or '.jpg' in fl or '.jpeg' in fl: | |
imgs.append(fl) | |
files_dict[f]=imgs | |
for a in files_dict: | |
print(a,len(files_dict[a])) | |
def copy_file(source,dest): | |
cmd = ' '.join(['cp',source,dest]) | |
print(cmd) | |
os.system(cmd) | |
source = '.'.join(source.split('.')[:-1])+'.json' | |
cmd = ' '.join(['cp',source,dest]) | |
print(cmd) | |
os.system(cmd) | |
k = 0 | |
dir_count = 0 | |
while k<total_images-1: | |
folder_name = 'internal'+str(dir_count) | |
if not os.path.exists(folder_name): | |
os.makedirs(folder_name) | |
task_names = [] | |
for d in dir_names: | |
for i in range(int(TASK_SIZE*weightage[d])): | |
try: | |
task_names.append([d,files_dict[d].pop()]) | |
k+=1 | |
except: | |
pass | |
print("Original len(task_names): ",str(len(task_names))) | |
while len(task_names)!=25: | |
flag = 0 | |
for items in files_dict.values(): | |
if len(items) != 0: | |
flag = 1 | |
if flag == 0: | |
break | |
d_name = random.choice(dir_names) | |
try: | |
task_names.append([d_name,files_dict[d_name].pop()]) | |
k+=1 | |
except: | |
pass | |
if len(task_names)==0: | |
break | |
for tsk in task_names: | |
copy_file('/'.join(tsk),folder_name) | |
dir_count+=1 | |
for f in os.listdir(): | |
if 'internal' in f: | |
cmd = ["zip","-r",f,f] | |
os.system(' '.join(cmd)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment