OpenCV is a C++ library for computer vision. It can do image segmentation, feature recognition (e.g., face detection), video processing, and much more. I use it to correct for drift in atomic force microscopy data; here we'll use it to play Where's Waldo (or Wally for the Brits).
OpenCV comes with a Python wrapper, cv2
, which stores the images as NumPy arrays. This makes it easy to switch between treating images as images, and as arrays.
# import OpenCV computer vision library
# we'll use imread, imdecode, resize, matchTemplate, minMaxLoc, and rectangle
import cv2
# import the standard numerical and plotting packages
import numpy as np
import matplotlib.pyplot as plt
# we'll use urllib for get images
import urllib
def url_to_array(url):
request = urllib.urlopen(url)
arr = np.asarray(bytearray(request.read()), dtype=np.uint8)
return arr
# cv2.imdecode: load an image from a NumPy array
# http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html?highlight=imdecode#cv2.imdecode
# for local files, there's cv2.imread
pyladies_url = 'http://www.pyladies.com/assets/images/pyladies_logo.png'
pyladies_arr = url_to_array(pyladies_url)
pyladies_img = cv2.imdecode(pyladies_arr, cv2.CV_LOAD_IMAGE_COLOR)
# images are just NumPy arrays!
print "height = %d, width = %d, n_colors = %d" % pyladies_img.shape
print "pixel (300,300) is %s (BGR)" % pyladies_img[300,300]
plt.imshow(pyladies_img)
height = 638, width = 1499, n_colors = 3 pixel (300,300) is [ 57 2 237] (BGR)
<matplotlib.image.AxesImage at 0x2ed71d0>
# default is (blue, green, red)
# let's change it to (red, green, blue) for pyplot.imshow compatibility
# there's also cv2.imshow, which takes BGR images, but doesn't work in ipython inline :(
pyladies_rgb = pyladies_img[:,:,[2,1,0]]
print "shape is the same? %s" % (pyladies_rgb.shape == pyladies_img.shape)
print "now pixel (300,300) is %s (RGB)" % pyladies_rgb[300,300]
plt.imshow(pyladies_rgb)
shape is the same? True now pixel (300,300) is [237 2 57] (RGB)
<matplotlib.image.AxesImage at 0x3409d90>
# cv2.resize: resize an image
# http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=resize#cv2.resize
tall_ladies = cv2.resize(pyladies_img, # image
(pyladies_img.shape[1], # new width
pyladies_img.shape[0]*2) # new height
)
plt.imshow(tall_ladies)
tall_ladies.shape
(1276, 1499, 3)
# get the images
waldo_url = 'http://farm4.staticflickr.com/3790/9137762420_b851165bf4_o.png'
waldo_arr = url_to_array(waldo_url)
waldo_img = cv2.imdecode(waldo_arr, cv2.CV_LOAD_IMAGE_COLOR)
scene_url = 'http://farm6.staticflickr.com/5494/9137763452_8f51208ab2_o.png'
scene_arr = url_to_array(scene_url)
scene_img = cv2.imdecode(scene_arr, cv2.CV_LOAD_IMAGE_COLOR)
# cv2.matchTemplate: scan a template image through a scene image and get score for match at each position
# http://docs.opencv.org/modules/imgproc/doc/object_detection.html?highlight=matchtemplate#cv2.matchTemplate
scores = cv2.matchTemplate(scene_img, # scene image
waldo_img, # template image
method=cv2.TM_CCORR_NORMED # see docs for methods
)
plt.imshow(scores)
<matplotlib.image.AxesImage at 0x3423210>
# cv2.minMaxLoc: get the min, max, argmin, and argmax of a scores array
# http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=minmaxloc#cv2.minMaxLoc
min_score, max_score, (min_x, min_y), (max_x, max_y) = cv2.minMaxLoc(scores)
corner_topL = (max_x, max_y)
corner_botR = (corner_topL[0]+waldo_img.shape[1], corner_topL[1]+waldo_img.shape[0])
print corner_topL, corner_botR
(979, 842) (1013, 894)
# cv2.rectangle: draw a rectangle on an image
# http://docs.opencv.org/modules/core/doc/drawing_functions.html?highlight=rectangle#cv2.rectangle
scene_img_highlighted = scene_img[:,:,[2,1,0]].copy()
cv2.rectangle(scene_img_highlighted, # image to add a rectangle to
corner_topL, # upper left corner of rectangle
corner_botR, # lower right corner of rectangle
(0,255,0), # rgb tuple for rectangle color
10 # rectangle stroke thickness (in pixels)
)
plt.imshow(scene_img_highlighted)
<matplotlib.image.AxesImage at 0x368a690>
def best_match(template_img, scene_img, minsize, maxsize):
""" Get the best match for a template image within a scene image,
rescaling the template width between minsize and maxsize
while maintaining the aspect ratio.
Returns two 2-tuples of ints:
corner is the (x,y) position of the upper-left corner of the template in the scene
wh is (width, height)
"""
# widths is all the widths to try
widths = np.arange(minsize, maxsize, dtype=int)
# aspect_ratio is height/width of the template image
aspect_ratio = template_img.shape[0] / float(template_img.shape[1])
# heights is all the heights to try
heights = np.asarray(aspect_ratio*widths, dtype=int)
# best_scores will store the best score for each width
best_scores = np.zeros(len(widths))
# best_positions will store the best (x,y) positions of the template for each width
best_positions = np.zeros([len(widths), 2], dtype=int)
# scan widths
for isize in range(widths.size):
# log
print "resizing to width = %d" % widths[isize]
# resize
resized_template_img = cv2.resize(template_img, (widths[isize], heights[isize]))
# match
scores = cv2.matchTemplate(scene_img, resized_template_img, method=cv2.TM_CCORR_NORMED)
# get best score and position
min_score, max_score, (min_x, min_y), (max_x, max_y) = cv2.minMaxLoc(scores)
# store best score and position
best_scores[isize] = max_score
best_positions[isize] = [max_x, max_y]
# choose best overall match
best_isize = np.argmax(best_scores)
best_width = widths[best_isize]
best_position = best_positions[best_isize]
# plot scores
plt.plot(widths, best_scores)
plt.arrow(widths[best_isize], 0, 0, 1, color='r')
plt.xlabel('template width')
plt.ylabel('score')
# return
return tuple(best_positions[best_isize]), (widths[best_isize], heights[best_isize])
def imshow_highlighted(img, corner, wh, rgb=(0,255,0), stroke=5):
""" Show an image with a highlighted rectangle.
corner is a (x_upperleft, y_upperleft) tuple of ints,
wh is a (width, height) tuple of ints,
rgb is an optional (r,g,b) tuple (default green),
stroke is an optional number of pixels for rectangle stroke (default 5).
"""
# copy the image so we don't modify the original
img_highlighted = img[:,:,[2,1,0]].copy()
# add a rectangle
cv2.rectangle(img_highlighted, corner, (corner[0]+wh[0], corner[1]+wh[1]), rgb, stroke)
# show
plt.imshow(img_highlighted)
corner, wh = best_match(waldo_img, scene_img, 20, 60)
resizing to width = 20 resizing to width = 21 resizing to width = 22 resizing to width = 23 resizing to width = 24 resizing to width = 25 resizing to width = 26 resizing to width = 27 resizing to width = 28 resizing to width = 29 resizing to width = 30 resizing to width = 31 resizing to width = 32 resizing to width = 33 resizing to width = 34 resizing to width = 35 resizing to width = 36 resizing to width = 37 resizing to width = 38 resizing to width = 39 resizing to width = 40 resizing to width = 41 resizing to width = 42 resizing to width = 43 resizing to width = 44 resizing to width = 45 resizing to width = 46 resizing to width = 47 resizing to width = 48 resizing to width = 49 resizing to width = 50 resizing to width = 51 resizing to width = 52 resizing to width = 53 resizing to width = 54 resizing to width = 55 resizing to width = 56 resizing to width = 57 resizing to width = 58 resizing to width = 59
imshow_highlighted(scene_img, corner, wh)