import os
from sklearn.feature_extraction.image import extract_patches_2d
from scipy.misc import imread
import pyprind
def get_fnames(basepath, category):
return [os.path.join(basepath, category, fname)
for fname in os.listdir(os.path.join(basepath, category))]
def extract_patches(fnames, patch_size=(20,20), max_patches=50):
patches = []
progbar = pyprind.ProgBar(len(fnames))
for fname in fnames:
img = imread(fname, flatten=1)
patches.extend((patch.ravel() for patch in extract_patches_2d(img, patch_size, max_patches=50)))
progbar.update()
return patches
basepath= '/Users/pierrelucbacon/Downloads/birdmachine/'
bird_fnames = get_fnames(basepath, 'bird')
machine_fnames = get_fnames(basepath, 'machine')
bird_patches = extract_patches(bird_fnames)
machine_patches = extract_patches(machine_fnames)
0% 100% [##############################] Total time elapsed: 71.565 sec 0% 100% [##############################] Total time elapsed: 71.872 sec
from sklearn.cluster import MiniBatchKMeans
def get_atoms(patches, n_atoms=200):
kmeans = MiniBatchKMeans(n_clusters=n_atoms)#, n_jobs=-2)
kmeans.fit(patches)
return kmeans.cluster_centers_
bird_atoms = get_atoms(bird_patches)
machine_atoms = get_atoms(machine_patches)
def plot_atoms(atoms, patch_size=(20,20)):
plt.figure(figsize=(6,4))
for i, patch in enumerate(atoms):
plt.subplot(10,20, i+1)
plt.imshow(patch.reshape(*patch_size), cmap=plt.cm.gray, interpolation='nearest')
plt.xticks(())
plt.yticks(())
plot_atoms(bird_atoms)
plot_atoms(machine_atoms)
from sklearn.decomposition import RandomizedPCA
pca = RandomizedPCA(n_components=200)
pca.fit(bird_patches)
plot_atoms(pca.components_)
pca = RandomizedPCA(n_components=200)
pca.fit(machine_patches)
plot_atoms(pca.components_)
Extract patches from the image and find the closest atom.
from sklearn.neighbors import NearestNeighbors
atoms = np.vstack((bird_atoms, machine_atoms))
nn = NearestNeighbors(n_neighbors=4)
nn.fit(atoms)
NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_neighbors=4, p=2, radius=1.0)
def bow_transform(fnames, patch_size=(20,20), max_patches=50):
patches = []
progbar = pyprind.ProgBar(len(fnames))
bow = []
for fname in fnames:
img = imread(fname, flatten=1)
patches = [patch.ravel() for patch in extract_patches_2d(img, patch_size, max_patches=50)]
_, ind = nn.kneighbors(patches, n_neighbors=1)
bow.append(ind.ravel())
progbar.update()
return np.vstack(bow)
bird_bow = bow_transform(bird_fnames[100:])
machine_bow = bow_transform(machine_fnames[100:])
0% 100% [##############################] Total time elapsed: 78.684 sec 0% 100% [##############################] Total time elapsed: 81.395 sec
X = np.vstack((bird_bow, machine_bow))
y = np.concatenate((np.zeros(bird_bow.shape[0]), np.ones(machine_bow.shape[0])))
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.cross_validation import train_test_split, StratifiedKFold
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
'C': [1, 10, 100, 1000]},
{'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
skf = StratifiedKFold(y_train, 5)
clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=skf, n_jobs=-1)
clf.fit(X_train, y_train)