import os from sklearn.feature_extraction.image import extract_patches_2d from scipy.misc import imread import pyprind def get_fnames(basepath, category): return [os.path.join(basepath, category, fname) for fname in os.listdir(os.path.join(basepath, category))] def extract_patches(fnames, patch_size=(20,20), max_patches=50): patches = [] progbar = pyprind.ProgBar(len(fnames)) for fname in fnames: img = imread(fname, flatten=1) patches.extend((patch.ravel() for patch in extract_patches_2d(img, patch_size, max_patches=50))) progbar.update() return patches basepath= '/Users/pierrelucbacon/Downloads/birdmachine/' bird_fnames = get_fnames(basepath, 'bird') machine_fnames = get_fnames(basepath, 'machine') bird_patches = extract_patches(bird_fnames) machine_patches = extract_patches(machine_fnames) from sklearn.cluster import MiniBatchKMeans def get_atoms(patches, n_atoms=200): kmeans = MiniBatchKMeans(n_clusters=n_atoms)#, n_jobs=-2) kmeans.fit(patches) return kmeans.cluster_centers_ bird_atoms = get_atoms(bird_patches) machine_atoms = get_atoms(machine_patches) def plot_atoms(atoms, patch_size=(20,20)): plt.figure(figsize=(6,4)) for i, patch in enumerate(atoms): plt.subplot(10,20, i+1) plt.imshow(patch.reshape(*patch_size), cmap=plt.cm.gray, interpolation='nearest') plt.xticks(()) plt.yticks(()) plot_atoms(bird_atoms) plot_atoms(machine_atoms) from sklearn.decomposition import RandomizedPCA pca = RandomizedPCA(n_components=200) pca.fit(bird_patches) plot_atoms(pca.components_) pca = RandomizedPCA(n_components=200) pca.fit(machine_patches) plot_atoms(pca.components_) from sklearn.neighbors import NearestNeighbors atoms = np.vstack((bird_atoms, machine_atoms)) nn = NearestNeighbors(n_neighbors=4) nn.fit(atoms) def bow_transform(fnames, patch_size=(20,20), max_patches=50): patches = [] progbar = pyprind.ProgBar(len(fnames)) bow = [] for fname in fnames: img = imread(fname, flatten=1) patches = [patch.ravel() for patch in extract_patches_2d(img, patch_size, max_patches=50)] _, ind = nn.kneighbors(patches, n_neighbors=1) bow.append(ind.ravel()) progbar.update() return np.vstack(bow) bird_bow = bow_transform(bird_fnames[100:]) machine_bow = bow_transform(machine_fnames[100:]) X = np.vstack((bird_bow, machine_bow)) y = np.concatenate((np.zeros(bird_bow.shape[0]), np.ones(machine_bow.shape[0]))) from sklearn.svm import SVC from sklearn.grid_search import GridSearchCV from sklearn.metrics import classification_report from sklearn.cross_validation import train_test_split, StratifiedKFold X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]}, {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}] skf = StratifiedKFold(y_train, 5) clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=skf, n_jobs=-1) clf.fit(X_train, y_train)