淘宝亿级图片素材调度与处理的工程实践
import cv2
import numpy as np
from tfsClient import tfsClient
from PIL import Image
from io import BytesIO
# 感知hash算法
def pic_p_hash(img, hash_size = 32):
img = cv2.resize(img,(hash_size, hash_size))
# 将图像转换为灰度
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
img = img.astype(np.float32)
# 计算DCT(离散余弦变换)
dct = cv2.dct(np.float32(gray))
# 取DCT的左上角8x8区域
dct = dct[:hash_size, :hash_size]
# 计算均值
avg = np.mean(dct)
# 生成哈希
phash = (dct > avg).astype(int)
phash = phash.flatten()
phash_str = ''.join([str(x) for x in phash.flatten()])
phash_hex = hex(int(phash_str, 2))[2:].zfill(hash_size // 4)
return phash_hex
#均值哈希算法
def pic_avg_hash(img):
# 缩放为8*8
img = cv2.resize(img, (8, 8), interpolation=cv2.INTER_CUBIC)
# 转换为灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# s为像素和初值为0,hash_str为hash值初值为''
s = 0
hash_str = ''
# 遍历累加求像素和
for i in range(8):
for j in range(8):
s = s + gray[i, j]
# 求平均灰度
avg = s / 64
# 灰度大于平均值为1相反为0生成图片的hash值
for i in range(8):
for j in range(8):
if gray[i, j] > avg:
hash_str = hash_str + '1'
else:
hash_str = hash_str + '0'
return hash_str
#差值感知算法
def pic_dif_hash(img):
#缩放8*8
img=cv2.resize(img,(9,8),interpolation=cv2.INTER_CUBIC)
#转换灰度图
gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
hash_str=''
#每行前一个像素大于后一个像素为1,相反为0,生成哈希
for i in range(8):
for j in range(8):
if gray[i,j]>gray[i,j+1]:
hash_str=hash_str+'1'
else:
hash_str=hash_str+'0'
return hash_str
#Hash值对比
def hash_cmp(hash1,hash2):
n=0
#hash长度不同则返回-1代表传参出错
if len(hash1) != len(hash2):
return -1
#遍历判断
for i in range(len(hash1)):
#不相等则n计数+1,n最终为相似度
if hash1[i]!=hash2[i]:
n=n+1
return n
if __name__ == '__main__':
img1 = cv2.imread('/Users/bixi/Documents/整理归档/产业中心/图库/图库治理/测试图片/a.jpeg')
img2 = cv2.imread('/Users/bixi/Documents/整理归档/产业中心/图库/图库治理/测试图片/b.jpeg')
img3 = cv2.imread('/Users/bixi/Documents/整理归档/产业中心/图库/图库治理/测试图片/c.jpeg')
img4 = cv2.imread('/Users/bixi/Documents/整理归档/产业中心/图库/图库治理/测试图片/d.png')
img5 = cv2.imread('/Users/bixi/Documents/整理归档/产业中心/图库/图库治理/测试图片/e.jpeg')
imgHash1 = pic_p_hash(img1, 32)
imgHash2 = pic_p_hash(img2, 32)
imgHash3 = pic_p_hash(img3, 32)
imgHash4 = pic_p_hash(img4, 32)
imgHash5 = pic_p_hash(img5, 32)
print(imgHash5)
cmp1 = hash_cmp(imgHash1, imgHash2)
cmp2 = hash_cmp(imgHash1, imgHash3)
cmp3 = hash_cmp(imgHash1, imgHash4)
cmp4 = hash_cmp(imgHash1, imgHash5)
print(cmp1)
print(cmp2)
print(cmp3)
print(cmp4)