-
Notifications
You must be signed in to change notification settings - Fork 0
/
duplicate.py
31 lines (25 loc) · 1003 Bytes
/
duplicate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from PIL import Image
import imagehash
import os
import numpy as np
def find_and_duplicates(images):
similarity = 70
hash_size = 8
threshold = 1 - similarity / 100
diff_limit = int(threshold * (hash_size**2))
dup_map = {}
for i in range(0, len(images)):
dup_map[images[i]] = None
with Image.open(images[i]) as img:
hash1 = imagehash.average_hash(img, hash_size).hash
for j in range(i + 1, len(images)):
with Image.open(images[j]) as img:
hash2 = imagehash.average_hash(img, hash_size).hash
if np.count_nonzero(hash1 != hash2) <= diff_limit:
if dup_map[images[i]] == None:
dup_map[images[i]] = [images[j]]
else:
dup_map[images[i]] = dup_map[images[i]].append(images[j])
dup_map = {k: v for k, v in dup_map.items() if v is not None}
print(dup_map)
return dup_map