Created
June 20, 2023 23:16
-
-
Save VirtuosoChris/db4e6267551b1711fcc084a19a9e63ea to your computer and use it in GitHub Desktop.
groupimages.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# by chatgpt | |
import argparse | |
import os | |
from skimage import img_as_float | |
from skimage.metrics import structural_similarity as ssim | |
from PIL import Image | |
import numpy as np | |
import shutil | |
def main(): | |
parser = argparse.ArgumentParser(description='Group similar images in a directory.') | |
parser.add_argument('directory', type=str, help='directory to process') | |
parser.add_argument('output', type=str, help='output directory') | |
args = parser.parse_args() | |
group_similar_images(args.directory, args.output) | |
def resize_image(image, size=(64,64)): | |
return image.resize(size) | |
def group_similar_images(directory, output_dir): | |
images = {} | |
idx = 0 | |
image_types = (".png", ".jpg", ".jpeg", ".bmp", ".tiff") | |
for filename in os.listdir(directory): | |
if filename.endswith(image_types): | |
file_path = os.path.join(directory, filename) | |
image = Image.open(file_path).convert('L') # convert image to grayscale | |
image_resized = resize_image(image) # resize image for faster comparison | |
images[filename] = (np.array(image_resized), np.array(image)) | |
total_images = len(images) | |
while len(images) > 0: | |
image1_name, (image1_resized, image1) = list(images.items())[0] | |
similar_images = [image1_name] | |
for image2_name, (image2_resized, image2) in list(images.items())[1:]: | |
if image1_resized.shape == image2_resized.shape: | |
ssim_resized = ssim(image1_resized, image2_resized) | |
if ssim_resized > 0.4: # similarity threshold | |
similar_images.append(image2_name) | |
if len(similar_images) > 1: | |
os.makedirs(os.path.join(output_dir, f'group_{idx}'), exist_ok=True) | |
for image_name in similar_images: | |
shutil.copy(os.path.join(directory, image_name), os.path.join(output_dir, f'group_{idx}', image_name)) | |
del images[image_name] | |
idx += 1 | |
else: | |
# Unmatched images are copied to a separate directory | |
os.makedirs(os.path.join(output_dir, 'unmatched'), exist_ok=True) | |
shutil.copy(os.path.join(directory, similar_images[0]), os.path.join(output_dir, 'unmatched', similar_images[0])) | |
del images[similar_images[0]] | |
print(f"Processing: {idx}/{total_images} groups. {((idx)/total_images)*100:.2f}% done.") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment