# import OpenCV computer vision library # we'll use imread, imdecode, resize, matchTemplate, minMaxLoc, and rectangle import cv2 # import the standard numerical and plotting packages import numpy as np import matplotlib.pyplot as plt # we'll use urllib for get images import urllib def url_to_array(url): request = urllib.urlopen(url) arr = np.asarray(bytearray(request.read()), dtype=np.uint8) return arr # cv2.imdecode: load an image from a NumPy array # http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html?highlight=imdecode#cv2.imdecode # for local files, there's cv2.imread pyladies_url = 'http://www.pyladies.com/assets/images/pyladies_logo.png' pyladies_arr = url_to_array(pyladies_url) pyladies_img = cv2.imdecode(pyladies_arr, cv2.CV_LOAD_IMAGE_COLOR) # images are just NumPy arrays! print "height = %d, width = %d, n_colors = %d" % pyladies_img.shape print "pixel (300,300) is %s (BGR)" % pyladies_img[300,300] plt.imshow(pyladies_img) # default is (blue, green, red) # let's change it to (red, green, blue) for pyplot.imshow compatibility # there's also cv2.imshow, which takes BGR images, but doesn't work in ipython inline :( pyladies_rgb = pyladies_img[:,:,[2,1,0]] print "shape is the same? %s" % (pyladies_rgb.shape == pyladies_img.shape) print "now pixel (300,300) is %s (RGB)" % pyladies_rgb[300,300] plt.imshow(pyladies_rgb) # cv2.resize: resize an image # http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=resize#cv2.resize tall_ladies = cv2.resize(pyladies_img, # image (pyladies_img.shape[1], # new width pyladies_img.shape[0]*2) # new height ) plt.imshow(tall_ladies) tall_ladies.shape # get the images waldo_url = 'http://farm4.staticflickr.com/3790/9137762420_b851165bf4_o.png' waldo_arr = url_to_array(waldo_url) waldo_img = cv2.imdecode(waldo_arr, cv2.CV_LOAD_IMAGE_COLOR) scene_url = 'http://farm6.staticflickr.com/5494/9137763452_8f51208ab2_o.png' scene_arr = url_to_array(scene_url) scene_img = cv2.imdecode(scene_arr, cv2.CV_LOAD_IMAGE_COLOR) # cv2.matchTemplate: scan a template image through a scene image and get score for match at each position # http://docs.opencv.org/modules/imgproc/doc/object_detection.html?highlight=matchtemplate#cv2.matchTemplate scores = cv2.matchTemplate(scene_img, # scene image waldo_img, # template image method=cv2.TM_CCORR_NORMED # see docs for methods ) plt.imshow(scores) # cv2.minMaxLoc: get the min, max, argmin, and argmax of a scores array # http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=minmaxloc#cv2.minMaxLoc min_score, max_score, (min_x, min_y), (max_x, max_y) = cv2.minMaxLoc(scores) corner_topL = (max_x, max_y) corner_botR = (corner_topL[0]+waldo_img.shape[1], corner_topL[1]+waldo_img.shape[0]) print corner_topL, corner_botR # cv2.rectangle: draw a rectangle on an image # http://docs.opencv.org/modules/core/doc/drawing_functions.html?highlight=rectangle#cv2.rectangle scene_img_highlighted = scene_img[:,:,[2,1,0]].copy() cv2.rectangle(scene_img_highlighted, # image to add a rectangle to corner_topL, # upper left corner of rectangle corner_botR, # lower right corner of rectangle (0,255,0), # rgb tuple for rectangle color 10 # rectangle stroke thickness (in pixels) ) plt.imshow(scene_img_highlighted) def best_match(template_img, scene_img, minsize, maxsize): """ Get the best match for a template image within a scene image, rescaling the template width between minsize and maxsize while maintaining the aspect ratio. Returns two 2-tuples of ints: corner is the (x,y) position of the upper-left corner of the template in the scene wh is (width, height) """ # widths is all the widths to try widths = np.arange(minsize, maxsize, dtype=int) # aspect_ratio is height/width of the template image aspect_ratio = template_img.shape[0] / float(template_img.shape[1]) # heights is all the heights to try heights = np.asarray(aspect_ratio*widths, dtype=int) # best_scores will store the best score for each width best_scores = np.zeros(len(widths)) # best_positions will store the best (x,y) positions of the template for each width best_positions = np.zeros([len(widths), 2], dtype=int) # scan widths for isize in range(widths.size): # log print "resizing to width = %d" % widths[isize] # resize resized_template_img = cv2.resize(template_img, (widths[isize], heights[isize])) # match scores = cv2.matchTemplate(scene_img, resized_template_img, method=cv2.TM_CCORR_NORMED) # get best score and position min_score, max_score, (min_x, min_y), (max_x, max_y) = cv2.minMaxLoc(scores) # store best score and position best_scores[isize] = max_score best_positions[isize] = [max_x, max_y] # choose best overall match best_isize = np.argmax(best_scores) best_width = widths[best_isize] best_position = best_positions[best_isize] # plot scores plt.plot(widths, best_scores) plt.arrow(widths[best_isize], 0, 0, 1, color='r') plt.xlabel('template width') plt.ylabel('score') # return return tuple(best_positions[best_isize]), (widths[best_isize], heights[best_isize]) def imshow_highlighted(img, corner, wh, rgb=(0,255,0), stroke=5): """ Show an image with a highlighted rectangle. corner is a (x_upperleft, y_upperleft) tuple of ints, wh is a (width, height) tuple of ints, rgb is an optional (r,g,b) tuple (default green), stroke is an optional number of pixels for rectangle stroke (default 5). """ # copy the image so we don't modify the original img_highlighted = img[:,:,[2,1,0]].copy() # add a rectangle cv2.rectangle(img_highlighted, corner, (corner[0]+wh[0], corner[1]+wh[1]), rgb, stroke) # show plt.imshow(img_highlighted) corner, wh = best_match(waldo_img, scene_img, 20, 60) imshow_highlighted(scene_img, corner, wh)