开源地址:
https://github.com/idealo/imagededup
安装库
pip install imagededup
示例代码
from imagededup.methods import PHash
phasher = PHash()
# 生成图像目录中所有图像的二值hash编码
encodings = phasher.encode_images(image_dir='/tmp/close_eyes_jt/jingtiao_eyes_img') # 图像路径
# 对已编码图像寻找重复图像
d_1 = phasher.find_duplicates(encoding_map=encodings)
# 给定一幅图像,显示与其重复的图像
from imagededup.utils import plot_duplicates
plot_duplicates(image_dir='path/to/image/directory',
duplicate_map=d_1,
filename='ukbench00120.jpg')
repeat_img = [] # 重复图片列表
is_img = [] # 不重复图片列表
for k, v in d_1.items():
if not v:
is_img.append(k)
elif k not in repeat_img:
is_img.append(k)
repeat_img.extend(v)
else:
repeat_img.extend(v)
print(len(is_img))
单张图片调用方法
from imagededup.methods import PHash
def compare_image_similarity(photo_id, photo_path, encoding_map: dict):
"""
比较图片相似度
:param photo_id:
:param photo_path:
:param encoding_map: 哈希值map 首次传空 {}
:return:
"""
encoding = ""
try:
phasher = PHash()
# 生成图像的二值hash编码
encoding = phasher.encode_image(photo_path)
encoding_map[photo_id] = encoding
# 满分10分 相似度小于5分的图片过滤
duplicates = phasher.find_duplicates(encoding_map=encoding_map, scores=True, max_distance_threshold=10)
# 获取重复的图片 {'001.jpg': [('002.jpg', 0)],'003.jpg': []}
duplicates_list = duplicates.get(photo_id)
for duplicate_img_name_score in duplicates_list:
image_name, score = duplicate_img_name_score[0], duplicate_img_name_score[1]
if score < 5:
encoding_map.pop(photo_id)
return False, encoding
return True, encoding
except Exception as e:
print(e)
print(traceback.print_exc())
return True, encoding
if __name__ == "__main__":
# 单张循环比较图片相似度
compare_image_similarity("11111.jpg","/tmp/11111.jpg",{})