#import correct libraries
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
#Resize all images to 500x500 and convert from BGR to RGB color model
size = (500,500)
def img_to_numpy(filename):
img = cv2.imread(filename)
#convert to RGB
img = img[:,:,[2,1,0]]
#change size
img = cv2.resize(img,size)
return img
#Create list of all images in directory
img_dir = "/Users/danielforsyth/Desktop/Dev/Python/Image Classification/paintings/"
images = [img_dir+ f for f in os.listdir(img_dir)]
images.remove('/Users/danielforsyth/Desktop/Dev/Python/Image Classification/paintings/.DS_Store')
labels = [1 if "magritte" in f.split('/')[-1] else 0 for f in images]
data = []
for image in images:
img = img_to_numpy(image)
data.append(img)
#flatten array
def flatten(image):
img = image.flatten()
return img
#create list of flattened array of all images
flat_list = []
for i in data:
img = flatten(i)
flat_list.append(img)
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(flat_list, labels, test_size=0.3)
print X_train.shape, X_test.shape
(84, 750000) (36, 750000)
from sklearn import linear_model
from sklearn import metrics
clf = linear_model.SGDClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_pred
y_test
# print metrics.accuracy_score(y_test,y_pred)
array([1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1])
from sklearn import svm
svc = svm.SVC(kernel='linear')
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
print metrics.accuracy_score(y_test,y_pred)
0.75
from sklearn.svm import LinearSVC
clf = LinearSVC()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print "classification accuracy:", metrics.accuracy_score(y_test, y_pred)
classification accuracy: 0.75
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print "classification accuracy:", metrics.accuracy_score(y_test, y_pred)
classification accuracy: 0.805555555556
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print "classification accuracy:", metrics.accuracy_score(y_test, y_pred)
classification accuracy: 0.722222222222
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100, min_samples_split=2)
rf.fit(X_train, y_train)
y_pred = rf.predict_proba(X_test)
predicted_probs = rf.predict_proba(flat_list)
predicted_probs = ["%f" % x[1] for x in predicted_probs]