Where's Waldo? with Python and OpenCV

OpenCV is a C++ library for computer vision. It can do image segmentation, feature recognition (e.g., face detection), video processing, and much more. I use it to correct for drift in atomic force microscopy data; here we'll use it to play Where's Waldo (or Wally for the Brits).

OpenCV comes with a Python wrapper, cv2, which stores the images as NumPy arrays. This makes it easy to switch between treating images as images, and as arrays.

In [1]:
# import OpenCV computer vision library
# we'll use imread, imdecode, resize, matchTemplate, minMaxLoc, and rectangle
import cv2

# import the standard numerical and plotting packages
import numpy as np
import matplotlib.pyplot as plt

# we'll use urllib for get images
import urllib
def url_to_array(url):
    request = urllib.urlopen(url)
    arr = np.asarray(bytearray(request.read()), dtype=np.uint8)
    return arr

Practice with PyLadies

In [2]:
# cv2.imdecode: load an image from a NumPy array
# http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html?highlight=imdecode#cv2.imdecode
# for local files, there's cv2.imread
pyladies_url = 'http://www.pyladies.com/assets/images/pyladies_logo.png'
pyladies_arr = url_to_array(pyladies_url)
pyladies_img = cv2.imdecode(pyladies_arr, cv2.CV_LOAD_IMAGE_COLOR)
In [3]:
# images are just NumPy arrays!
print "height = %d, width = %d, n_colors = %d" % pyladies_img.shape
print "pixel (300,300) is %s (BGR)" % pyladies_img[300,300]
plt.imshow(pyladies_img)
height = 638, width = 1499, n_colors = 3
pixel (300,300) is [ 57   2 237] (BGR)

Out[3]:
<matplotlib.image.AxesImage at 0x2ed71d0>
In [4]:
# default is (blue, green, red)
# let's change it to (red, green, blue) for pyplot.imshow compatibility
# there's also cv2.imshow, which takes BGR images, but doesn't work in ipython inline :(
pyladies_rgb = pyladies_img[:,:,[2,1,0]]
print "shape is the same? %s" % (pyladies_rgb.shape == pyladies_img.shape)
print "now pixel (300,300) is %s (RGB)" % pyladies_rgb[300,300]
plt.imshow(pyladies_rgb)
shape is the same? True
now pixel (300,300) is [237   2  57] (RGB)

Out[4]:
<matplotlib.image.AxesImage at 0x3409d90>
In [5]:
# cv2.resize: resize an image
# http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=resize#cv2.resize
tall_ladies = cv2.resize(pyladies_img,               # image
                         (pyladies_img.shape[1],     # new width
                          pyladies_img.shape[0]*2)   # new height
                         )
plt.imshow(tall_ladies)
tall_ladies.shape
Out[5]:
(1276, 1499, 3)

Where's Waldo?

In [6]:
# get the images
waldo_url = 'http://farm4.staticflickr.com/3790/9137762420_b851165bf4_o.png'
waldo_arr = url_to_array(waldo_url)
waldo_img = cv2.imdecode(waldo_arr, cv2.CV_LOAD_IMAGE_COLOR)
scene_url = 'http://farm6.staticflickr.com/5494/9137763452_8f51208ab2_o.png'
scene_arr = url_to_array(scene_url)
scene_img = cv2.imdecode(scene_arr, cv2.CV_LOAD_IMAGE_COLOR)
In [7]:
# cv2.matchTemplate: scan a template image through a scene image and get score for match at each position
# http://docs.opencv.org/modules/imgproc/doc/object_detection.html?highlight=matchtemplate#cv2.matchTemplate
scores = cv2.matchTemplate(scene_img,                  # scene image  
                           waldo_img,                  # template image
                           method=cv2.TM_CCORR_NORMED  # see docs for methods
                          )
plt.imshow(scores)
Out[7]:
<matplotlib.image.AxesImage at 0x3423210>
In [8]:
# cv2.minMaxLoc: get the min, max, argmin, and argmax of a scores array
# http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=minmaxloc#cv2.minMaxLoc
min_score, max_score, (min_x, min_y), (max_x, max_y) = cv2.minMaxLoc(scores)
corner_topL = (max_x, max_y)
corner_botR = (corner_topL[0]+waldo_img.shape[1], corner_topL[1]+waldo_img.shape[0])
print corner_topL, corner_botR
(979, 842) (1013, 894)

In [9]:
# cv2.rectangle: draw a rectangle on an image
# http://docs.opencv.org/modules/core/doc/drawing_functions.html?highlight=rectangle#cv2.rectangle
scene_img_highlighted = scene_img[:,:,[2,1,0]].copy()
cv2.rectangle(scene_img_highlighted,  # image to add a rectangle to
              corner_topL,            # upper left corner of rectangle
              corner_botR,            # lower right corner of rectangle
              (0,255,0),              # rgb tuple for rectangle color
              10                      # rectangle stroke thickness (in pixels)
             )
plt.imshow(scene_img_highlighted)
Out[9]:
<matplotlib.image.AxesImage at 0x368a690>

For your convenience: reusable functions

In [10]:
def best_match(template_img, scene_img, minsize, maxsize):
    """ Get the best match for a template image within a scene image,
        rescaling the template width between minsize and maxsize
        while maintaining the aspect ratio.
        Returns two 2-tuples of ints:
            corner is the (x,y) position of the upper-left corner of the template in the scene
            wh is (width, height)
    """
    # widths is all the widths to try
    widths = np.arange(minsize, maxsize, dtype=int)
    # aspect_ratio is height/width of the template image
    aspect_ratio = template_img.shape[0] / float(template_img.shape[1])
    # heights is all the heights to try
    heights = np.asarray(aspect_ratio*widths, dtype=int)
    
    # best_scores will store the best score for each width
    best_scores = np.zeros(len(widths))
    # best_positions will store the best (x,y) positions of the template for each width
    best_positions = np.zeros([len(widths), 2], dtype=int)
    
    # scan widths
    for isize in range(widths.size):
        # log
        print "resizing to width = %d" % widths[isize]
        
        # resize
        resized_template_img = cv2.resize(template_img, (widths[isize], heights[isize]))
        
        # match
        scores = cv2.matchTemplate(scene_img, resized_template_img, method=cv2.TM_CCORR_NORMED)
        
        # get best score and position
        min_score, max_score, (min_x, min_y), (max_x, max_y) = cv2.minMaxLoc(scores)
        
        # store best score and position
        best_scores[isize] = max_score
        best_positions[isize] = [max_x, max_y]
        
    # choose best overall match
    best_isize = np.argmax(best_scores)
    best_width = widths[best_isize]
    best_position = best_positions[best_isize]
    
    # plot scores
    plt.plot(widths, best_scores)
    plt.arrow(widths[best_isize], 0, 0, 1, color='r')
    plt.xlabel('template width')
    plt.ylabel('score')
    
    # return
    return tuple(best_positions[best_isize]), (widths[best_isize], heights[best_isize])

def imshow_highlighted(img, corner, wh, rgb=(0,255,0), stroke=5):
    """ Show an image with a highlighted rectangle.
        corner is a (x_upperleft, y_upperleft) tuple of ints,
        wh is a (width, height) tuple of ints,
        rgb is an optional (r,g,b) tuple (default green),
        stroke is an optional number of pixels for rectangle stroke (default 5).
    """
    # copy the image so we don't modify the original
    img_highlighted = img[:,:,[2,1,0]].copy()
    
    # add a rectangle
    cv2.rectangle(img_highlighted, corner, (corner[0]+wh[0], corner[1]+wh[1]), rgb, stroke)
    
    # show
    plt.imshow(img_highlighted)
In [11]:
corner, wh = best_match(waldo_img, scene_img, 20, 60)
resizing to width = 20
resizing to width = 21
resizing to width = 22
resizing to width = 23
resizing to width = 24
resizing to width = 25
resizing to width = 26
resizing to width = 27
resizing to width = 28
resizing to width = 29
resizing to width = 30
resizing to width = 31
resizing to width = 32
resizing to width = 33
resizing to width = 34
resizing to width = 35
resizing to width = 36
resizing to width = 37
resizing to width = 38
resizing to width = 39
resizing to width = 40
resizing to width = 41
resizing to width = 42
resizing to width = 43
resizing to width = 44
resizing to width = 45
resizing to width = 46
resizing to width = 47
resizing to width = 48
resizing to width = 49
resizing to width = 50
resizing to width = 51
resizing to width = 52
resizing to width = 53
resizing to width = 54
resizing to width = 55
resizing to width = 56
resizing to width = 57
resizing to width = 58
resizing to width = 59

In [12]:
imshow_highlighted(scene_img, corner, wh)