# we will use the python imaging library to read color info from the image from PIL import Image # let's start by pulling down the image data import urllib2 fh = urllib2.urlopen('http://media.charlesleifer.com/blog/photos/thumbnails/akira_650x650.jpg') img_data = fh.read() fh.close() # what does img_data look like? here are the first 10 bytes -- looks like a header and some null bytes print img_data[:10] # let's load up this image data from StringIO import StringIO img_buf = StringIO(img_data) img = Image.open(img_buf) # what is img? it should be a jpeg image file print img # let's resize the image in C, this will make calculations faster and we won't lose much accuracy img.thumbnail((200, 200)) # let's load up some modules which will be useful while we're extracting color info and clustering from collections import namedtuple import random # these classes will represent the data we extract -- I use namedtuples as they have lower memory overhead than full classes Point = namedtuple('Point', ('coords', 'n', 'ct')) Cluster = namedtuple('Cluster', ('points', 'center', 'n')) # let's extract all the color points from the image -- the red/green/blue channels will be treated as points in a 3-dimensional space def get_points(img): points = [] w, h = img.size for count, color in img.getcolors(w * h): points.append(Point(color, 3, count)) return points img_points = get_points(img) # when we're clustering we will need a way to find the distance between two points def point_distance(p1, p2): return sum([ (p1.coords[i] - p2.coords[i]) ** 2 for i in range(p1.n) ]) # we also need a way to calculate the center when given a cluster of points -- this is done # by taking the average of the points across all dimensions def calculate_center(points, n): vals = [0.0 for i in range(n)] plen = 0 for p in points: plen += p.ct for i in range(n): vals[i] += (p.coords[i] * p.ct) return Point([(v / plen) for v in vals], n, 1) # finally, here is our algorithm -- 'kmeans' def kmeans(points, k, min_diff): clusters = [Cluster([p], p, p.n) for p in random.sample(points, k)] while 1: plists = [[] for i in range(k)] for p in points: smallest_distance = float('Inf') for i in range(k): distance = point_distance(p, clusters[i].center) if distance < smallest_distance: smallest_distance = distance idx = i plists[idx].append(p) diff = 0 for i in range(k): old = clusters[i] center = calculate_center(plists[i], old.n) new = Cluster(plists[i], center, old.n) clusters[i] = new diff = max(diff, point_distance(old.center, new.center)) if diff < min_diff: break return clusters print 'Calculating clusters -- this may take a few seconds' clusters = kmeans(img_points, 3, 1) # run k-means on the color points, calculating 3 clusters (3 dominant colors), and stopping when our clusters move < 1 unit rgbs = [map(int, c.center.coords) for c in clusters] print 'Done' print rgbs # let's create a function to convert RGBs into hex color code rtoh = lambda rgb: '#%s' % ''.join(('%02x' % p for p in rgb)) color_codes = map(rtoh, rgbs) print color_codes # now, let's display those colors using HTML from IPython.core.display import HTML HTML('
 
' % color_codes[0]) HTML('
 
' % color_codes[1]) HTML('
 
' % color_codes[2])