开源地址:
https://github.com/idealo/imagededup
安装库
- pip install imagededup
示例代码
- from imagededup.methods import PHash
-
- phasher = PHash()
-
- # 生成图像目录中所有图像的二值hash编码
- encodings = phasher.encode_images(image_dir='/tmp/close_eyes_jt/jingtiao_eyes_img') # 图像路径
-
- # 对已编码图像寻找重复图像
- d_1 = phasher.find_duplicates(encoding_map=encodings)
-
- # 给定一幅图像,显示与其重复的图像
- from imagededup.utils import plot_duplicates
- plot_duplicates(image_dir='path/to/image/directory',
- duplicate_map=d_1,
- filename='ukbench00120.jpg')
-
-
-
- repeat_img = [] # 重复图片列表
- is_img = [] # 不重复图片列表
-
- for k, v in d_1.items():
- if not v:
- is_img.append(k)
- elif k not in repeat_img:
- is_img.append(k)
- repeat_img.extend(v)
- else:
- repeat_img.extend(v)
-
- print(len(is_img))
单张图片调用方法
- from imagededup.methods import PHash
- def compare_image_similarity(photo_id, photo_path, encoding_map: dict):
- """
- 比较图片相似度
- :param photo_id:
- :param photo_path:
- :param encoding_map: 哈希值map 首次传空 {}
- :return:
- """
- encoding = ""
- try:
-
- phasher = PHash()
- # 生成图像的二值hash编码
- encoding = phasher.encode_image(photo_path)
- encoding_map[photo_id] = encoding
-
- # 满分10分 相似度小于5分的图片过滤
- duplicates = phasher.find_duplicates(encoding_map=encoding_map, scores=True, max_distance_threshold=10)
-
- # 获取重复的图片 {'001.jpg': [('002.jpg', 0)],'003.jpg': []}
- duplicates_list = duplicates.get(photo_id)
- for duplicate_img_name_score in duplicates_list:
- image_name, score = duplicate_img_name_score[0], duplicate_img_name_score[1]
- if score < 5:
- encoding_map.pop(photo_id)
- return False, encoding
- return True, encoding
- except Exception as e:
- print(e)
- print(traceback.print_exc())
- return True, encoding
-
- if __name__ == "__main__":
- # 单张循环比较图片相似度
- compare_image_similarity("11111.jpg","/tmp/11111.jpg",{})