import numpy as np
from matplotlib import pyplot as plt
from scipy import misc
from scipy import ndimage
import pickle
from sklearn import datasets
%matplotlib inline
img = misc.imread('./yingyang.jpg')
img = np.mean(img, axis=2)
img = np.where(img==0, -1, img)
img = np.where(img==255, 1, img)
img = np.where(img>1, 0, img)
img = ndimage.gaussian_filter(img, sigma=2)
plt.imshow(img, cmap='gray')
<matplotlib.image.AxesImage at 0x7f4ce1fe9f10>
max_points = 1000
points = np.random.rand(max_points,2)*img.shape[0]
plt.scatter(points[:,0], points[:,1])
<matplotlib.collections.PathCollection at 0x7f4ce1c7d2d0>
label = np.ones(points.shape[0])
for i, p in enumerate(points):
label[i] = img[int(p[0]), int(p[1])]
epsilon = 10**-1
points = points[np.abs(label)>epsilon,:]
label = label[np.abs(label)>epsilon]
label = np.where(label > 0, 1, -1)
points /= img.shape[0]
plt.scatter(points[label==-1, 0], points[label==-1, 1], color='red')
plt.scatter(points[label==1, 0], points[label==1, 1], color='green')
<matplotlib.collections.PathCollection at 0x7f4ce1baf890>
ds = datasets.base.Bunch()
ds.update({'data':points,'target':label})
pickle.dump(ds, open('yingyang.dat', 'wb'))
dataset = pickle.load(open('yingyang.dat', 'rb'))