Last active
October 14, 2020 10:15
-
-
Save sharkdeng/cf59df2377b1f54599bc6eb7cc226367 to your computer and use it in GitHub Desktop.
using imagehash to detect duplicates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# detect two duplicate photos | |
# 1 get hash function | |
import cv2 | |
import imagehash | |
funcs = [ | |
imagehash.average_hash, | |
imagehash.phash, | |
imagehash.dhash, | |
imagehash.whash | |
] | |
img1 = Image.fromarray(cv2.imread('1.jpg')) # same | |
img2 = Image.fromarray(cv2.imread('2.jpg')) # same | |
img3 = Image.fromarray(cv2.imread('3.jpg')) # different | |
hashes = [] | |
hashes.append(np.array([f(img1).hash for f in funcs]).reshape(256)) | |
hashes.append(np.array([f(img2).hash for f in funcs]).reshape(256)) | |
hashes.append(np.array([f(img3).hash for f in funcs]).reshape(256)) | |
# 2 get similarity matrix | |
hashes = torch.Tensor(np.array(hashes).astype(np.int)) # [-, 256] | |
sim = np.array([(hashes[i] == hashes).sum(dim=1).numpy()/256 for i in range(len(hashes))]) | |
sim2 = sim.copy() | |
np.fill_diagonal(sim2, 0) | |
sim2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment