from pylab import * import tables from collections import Counter def crop_black(image): gray = image if gray.ndim==3: gray = sum(image,axis=2) yr = find(sum(gray,axis=1)>0) y0 = yr[0] y1 = yr[-1] xr = find(sum(gray,axis=0)>0) x0 = xr[0] x1 = xr[-1] if image.ndim==3: return image[y0:y1,x0:x1,:] else: return image[y0:y1,x0:x1] hdf = tables.openFile("1k.h5","r") image = crop_black(hdf.root.icons[60]/255.0) imshow(image) pixels = image.reshape(-1,3) from scipy.cluster.vq import kmeans,vq centers,_ = kmeans(pixels,8) palettized,_ = vq(pixels,centers) imshow(palettized.reshape(image.shape[:2]),cmap=cm.spectral) quant = centers[palettized].reshape(image.shape) subplot(121); imshow(quant) subplot(122); imshow(image) quant = centers[numpy.sort(palettized)].reshape(image.shape) subplot(121); imshow(quant) subplot(122); imshow(image) from collections import Counter counts = Counter(palettized) [(centers[c],n) for c,n in counts.most_common(100)] def color_descriptor(image,k=8): image = crop_black(image) total = 1.0*prod(image.shape[:2]) if amax(image)>1: image = image/255.0 pixels = image.reshape(-1,3) pixels = pixels[sum(pixels,axis=1)>0.01] centers,_ = kmeans(pixels,k) palettized,_ = vq(pixels,centers) counts = Counter(palettized) return [(centers[c],n/total) for c,n in counts.most_common(k)] color_descriptor(hdf.root.icons[1]) descriptors = [color_descriptor(hdf.root.icons[i]) for i in range(len(hdf.root.icons))] def cdmatch(desc1,desc2,delta=0.25): delta2 = delta**2 total = 0.0 for v,p in desc1: for w,q in desc2: d = maximum(1-norm(v-w)**2/delta2,0) total += d * minimum(p,q) return total print cdmatch(descriptors[1],descriptors[1]) print cdmatch(descriptors[1],descriptors[2]) def cddissim(desc1,desc2,delta=0.25): score = cdmatch(desc1,desc2,delta) score1 = cdmatch(desc1,desc1,delta) score2 = cdmatch(desc2,desc2,delta) return 1-score/max(score1,score2,1e-6) scores = [cddissim(descriptors[5],descriptors[i]) for i in range(100)] print scores[:8] argsort(scores) scores = [cddissim(descriptors[5],descriptors[i]) for i in range(100)] for i,j in enumerate(argsort(scores)[:6]): subplot(1,6,i+1); axis("off"); imshow(hdf.root.icons[j]); title("%d %.2f"%(j,scores[j])) subplot(121); hist(scores) subplot(122); plot(sorted(scores)[:30]) scores = [cddissim(descriptors[22],descriptors[i]) for i in range(100)] for i,j in enumerate(argsort(scores)[:6]): subplot(1,6,i+1); axis("off"); imshow(hdf.root.icons[j]); title("%d %.2f"%(j,scores[j])) scores = [cddissim(descriptors[60],descriptors[i]) for i in range(100)] for i,j in enumerate(argsort(scores)[:6]): subplot(1,6,i+1); axis("off"); imshow(hdf.root.icons[j]); title("%d %.2f"%(j,scores[j])) subplot(121); hist(scores) subplot(122); plot(sorted(scores)[:30]) h,w,_ = image.shape scale = 3.0 rs,cs = mgrid[:h,:w]*scale/max(h,w) augmented = array(list(transpose(image,[2,0,1]))+[rs,cs]) augmented = transpose(augmented,[1,2,0]) apixels = augmented.reshape(-1,5) apixels[:10] acenters,_ = kmeans(apixels,64) apalettized,_ = vq(apixels,acenters) imshow(apalettized.reshape(image.shape[:2])) aquant = acenters[apalettized].reshape(h,w,5) subplot(121); imshow(aquant[:,:,:3]) subplot(122); imshow(image) counts = Counter(apalettized) [(acenters[c],n) for c,n in counts.most_common(100)]