#import correct libraries import cv2 import numpy as np import matplotlib.pyplot as plt import os #Resize all images to 500x500 and convert from BGR to RGB color model size = (500,500) def img_to_numpy(filename): img = cv2.imread(filename) #convert to RGB img = img[:,:,[2,1,0]] #change size img = cv2.resize(img,size) return img #Create list of all images in directory img_dir = "/Users/danielforsyth/Desktop/Dev/Python/Image Classification/paintings/" images = [img_dir+ f for f in os.listdir(img_dir)] images.remove('/Users/danielforsyth/Desktop/Dev/Python/Image Classification/paintings/.DS_Store') labels = [1 if "magritte" in f.split('/')[-1] else 0 for f in images] data = [] for image in images: img = img_to_numpy(image) data.append(img) #flatten array def flatten(image): img = image.flatten() return img #create list of flattened array of all images flat_list = [] for i in data: img = flatten(i) flat_list.append(img) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(flat_list, labels, test_size=0.3) print X_train.shape, X_test.shape from sklearn import linear_model from sklearn import metrics clf = linear_model.SGDClassifier() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) y_pred y_test # print metrics.accuracy_score(y_test,y_pred) from sklearn import svm svc = svm.SVC(kernel='linear') svc.fit(X_train, y_train) y_pred = svc.predict(X_test) print metrics.accuracy_score(y_test,y_pred) from sklearn.svm import LinearSVC clf = LinearSVC() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "classification accuracy:", metrics.accuracy_score(y_test, y_pred) from sklearn.naive_bayes import MultinomialNB clf = MultinomialNB() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "classification accuracy:", metrics.accuracy_score(y_test, y_pred) from sklearn.naive_bayes import GaussianNB clf = GaussianNB() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "classification accuracy:", metrics.accuracy_score(y_test, y_pred) from sklearn.ensemble import RandomForestClassifier rf = RandomForestClassifier(n_estimators=100, min_samples_split=2) rf.fit(X_train, y_train) y_pred = rf.predict_proba(X_test) predicted_probs = rf.predict_proba(flat_list) predicted_probs = ["%f" % x[1] for x in predicted_probs]