Skip to content

Instantly share code, notes, and snippets.

@VirtuosoChris
Created June 20, 2023 23:16
Show Gist options
  • Save VirtuosoChris/db4e6267551b1711fcc084a19a9e63ea to your computer and use it in GitHub Desktop.
Save VirtuosoChris/db4e6267551b1711fcc084a19a9e63ea to your computer and use it in GitHub Desktop.
groupimages.py
# by chatgpt
import argparse
import os
from skimage import img_as_float
from skimage.metrics import structural_similarity as ssim
from PIL import Image
import numpy as np
import shutil
def main():
parser = argparse.ArgumentParser(description='Group similar images in a directory.')
parser.add_argument('directory', type=str, help='directory to process')
parser.add_argument('output', type=str, help='output directory')
args = parser.parse_args()
group_similar_images(args.directory, args.output)
def resize_image(image, size=(64,64)):
return image.resize(size)
def group_similar_images(directory, output_dir):
images = {}
idx = 0
image_types = (".png", ".jpg", ".jpeg", ".bmp", ".tiff")
for filename in os.listdir(directory):
if filename.endswith(image_types):
file_path = os.path.join(directory, filename)
image = Image.open(file_path).convert('L') # convert image to grayscale
image_resized = resize_image(image) # resize image for faster comparison
images[filename] = (np.array(image_resized), np.array(image))
total_images = len(images)
while len(images) > 0:
image1_name, (image1_resized, image1) = list(images.items())[0]
similar_images = [image1_name]
for image2_name, (image2_resized, image2) in list(images.items())[1:]:
if image1_resized.shape == image2_resized.shape:
ssim_resized = ssim(image1_resized, image2_resized)
if ssim_resized > 0.4: # similarity threshold
similar_images.append(image2_name)
if len(similar_images) > 1:
os.makedirs(os.path.join(output_dir, f'group_{idx}'), exist_ok=True)
for image_name in similar_images:
shutil.copy(os.path.join(directory, image_name), os.path.join(output_dir, f'group_{idx}', image_name))
del images[image_name]
idx += 1
else:
# Unmatched images are copied to a separate directory
os.makedirs(os.path.join(output_dir, 'unmatched'), exist_ok=True)
shutil.copy(os.path.join(directory, similar_images[0]), os.path.join(output_dir, 'unmatched', similar_images[0]))
del images[similar_images[0]]
print(f"Processing: {idx}/{total_images} groups. {((idx)/total_images)*100:.2f}% done.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment