import numpy as np from sklearn import cross_validation from sklearn import svm from matplotlib import pyplot as plt from sklearn import preprocessing from sklearn import metrics %matplotlib inline import pandas as pd import matplotlib from minepy import MINE import copy from mpl_toolkits.mplot3d import Axes3D mydir = "" filename = mydir+"dataset_har.csv" #filename = 'test.csv' conv = lambda valstr: float(valstr.replace(',','.')) c = {3:conv, 4:conv, 5:conv} col = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] age = np.genfromtxt(filename,delimiter=";", skip_header=1 ,usecols=(2),dtype=int) weight = np.genfromtxt(filename,delimiter=";", skip_header=1 ,usecols=(4),dtype=int) data_height_bmi= np.genfromtxt(filename,delimiter=";", skip_header=1 ,usecols=(3,5),dtype=None, converters = c) data= np.genfromtxt(filename,delimiter=";", skip_header=1 ,usecols=col,dtype=int) data=data*1.0 target = np.genfromtxt(filename,delimiter=";", skip_header=1 ,usecols=18,dtype=str) rawdata = copy.copy(data) data = preprocessing.scale(data) def digitize(starget): """ Convert string output labels to floats. Machine learning classifer only accepts float as the output value. """ stringlabels= np.unique(starget) lenlabels = len(stringlabels) dindex = 0 mydict = {} for dlabel in stringlabels: mydict[dlabel] = dindex dindex = dindex + 1 print mydict dtarget = copy.copy(starget) for i in stringlabels: myindex = np.where(starget == i)[0] dtarget[myindex]=mydict.get(i) return dtarget dtarget = digitize(target) stand_index= np.where(target=='standing') walk_index = np.where(target == 'walking') sitdown_index = np.where(target == 'sittingdown') standup_index = np.where(target == 'standingup') sit_index = np.where(target == 'sitting') height = data_height_bmi[:,0] BMI = data_height_bmi[:,1] x1 = data[:,0] y1 = data[:,1] z1 = data[:,2] x2 = data[:,3] y2 = data[:,4] z2 = data[:,5] x3 = data[:,6] y3 = data[:,7] z3 = data[:,8] x4 = data[:,9] y4 = data[:,10] z4 = data[:,11] x1r = rawdata[:,0] y1r = rawdata[:,1] z1r = rawdata[:,2] x2r = rawdata[:,3] y2r = rawdata[:,4] z2r = rawdata[:,5] x3r = rawdata[:,6] y3r = rawdata[:,7] z3r = rawdata[:,8] x4r = rawdata[:,9] y4r = rawdata[:,10] z4r = rawdata[:,11] position = (dtarget.astype(np.float)) plt.subplot(2,2,1) plt.scatter(x1r,y1r,c=position, cmap=plt.cm.Paired, alpha=0.9) plt.xlabel('x1') plt.ylabel('y1') plt.subplot(2,2,2) plt.scatter(x4r,y4r,c=position, cmap=plt.cm.Paired,alpha=0.9) plt.xlabel('x4') plt.ylabel('y4') plt.subplot(2,2,3) plt.scatter(x2r,y2r,c=position, cmap=plt.cm.Paired,alpha=0.9) plt.xlabel('x2') plt.ylabel('y2') plt.subplot(2,2,4) plt.scatter(x3r,y3r,c=position, cmap=plt.cm.Paired,alpha=0.9) plt.xlabel('x3') plt.ylabel('y3') plt.subplots_adjust(hspace=0.35,wspace=0.45) vec1 = rawdata[:,0:3] vec1_stand = vec1[stand_index,:][0] vec1_walk = vec1[walk_index,][0] vec1_sitdown = vec1[sitdown_index,][0] vec1_standup = vec1[standup_index,:][0] vec1_sit = vec1[sit_index,:][0] vec2 = rawdata[:,3:6] print np.squeeze(vec1_stand).shape print vec1_stand.shape fig = plt.figure() ax = Axes3D(fig, elev=-150, azim=110) ax.scatter(vec1_stand[:,0],vec1_stand[:,1],vec1_stand[:,2], marker='x',color='b',label='standing') ax.scatter(vec1_sit[:,0],vec1_sit[:,1],vec1_sit[:,2], marker='o',color='r',label='sitting',alpha=0.5) #ax.scatter(vec1_walk[:,0],vec1_walk[:,1],vec1_walk[:,2], marker='s',color='m',label='walking') ax.scatter(vec1_standup[:,0],vec1_standup[:,1],vec1_standup[:,2], marker='>',color='g',label='stand up',alpha=0.9) ax.set_title("Sitting, Standing, Walking") ax.set_xlabel("x-axis") #ax.w_xaxis.set_ticklabels([]) ax.set_ylabel("y-axis") #ax.w_yaxis.set_ticklabels([]) ax.set_zlabel("z-axis") #ax.w_zaxis.set_ticklabels([]) plt.legend(loc='upper left', numpoints=1, ncol=3, fontsize=8, bbox_to_anchor=(0, 0)) colors = ['b','r','g'] markers = ['x','o','>'] scatter1_proxy = matplotlib.lines.Line2D([0],[0], linestyle="none", c=colors[0], marker = markers[0]) scatter2_proxy = matplotlib.lines.Line2D([0],[0], linestyle="none", c=colors[1], marker = markers[1]) scatter3_proxy = matplotlib.lines.Line2D([0],[0], linestyle="none", c=colors[2], marker = markers[2]) ax.legend([scatter1_proxy, scatter2_proxy,scatter3_proxy], ['standing', 'sitting','standup'], numpoints = 1) plt.show() position = (dtarget.astype(np.float)) plt.subplot(2,2,1) plt.scatter(vec1[:,0],vec1[:,1],c=position, cmap=plt.cm.Paired, alpha=0.5) plt.xlabel('x1') plt.ylabel('y1') plt.subplot(2,2,2) plt.scatter(vec1_stand[:,0],vec1_stand[:,1],alpha=0.5) plt.xlabel('x1_stand') plt.ylabel('y1_stand') plt.subplot(2,2,3) plt.scatter(vec1_sit[:,0],vec1_sit[:,1],alpha=0.5) plt.xlabel('x1_sit') plt.ylabel('y1_sot') plt.subplot(2,2,4) plt.scatter(vec1_walk[:,0],vec1_walk[:,1],alpha=0.5) plt.xlabel('x1_walk') plt.ylabel('y1_walk') plt.subplots_adjust(hspace=0.35,wspace=0.45) X_train, X_test, y_train, y_test = cross_validation.train_test_split(data, dtarget, test_size=0.4, random_state=0) clf = svm.SVC(kernel='rbf',C=10) clf.fit(X_train, y_train) prediction = (clf.predict(X_test)) cvscore = cross_validation.cross_val_score(clf, X_test, y_test, scoring='accuracy',cv=5) print "Accuracy is: ", np.mean(cvscore) my_accuracy = metrics.accuracy_score(y_test,prediction) print my_accuracy from sklearn.metrics import confusion_matrix cm=confusion_matrix(y_test,prediction) print cm target_label = ('sitting','sittingdown','standing','standingup','walking') # {'standing': 2, 'walking': 4, 'sittingdown': 1, 'standingup': 3, 'sitting': 0} def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues): plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(target_label)) plt.xticks(tick_marks, target, rotation=45) plt.yticks(tick_marks, target) #plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') # Compute confusion matrix cm = confusion_matrix(y_test, prediction) np.set_printoptions(precision=2) print('Confusion matrix, without normalization') print(cm) plt.figure() plot_confusion_matrix(cm) # Normalize the confusion matrix by row (i.e by the number of samples # in each class) cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print('Normalized confusion matrix') print(cm_normalized) plt.figure() plot_confusion_matrix(cm_normalized, title='Normalized confusion matrix') from sklearn import ensemble rfc = ensemble.RandomForestClassifier(n_estimators=100) rfc.fit(X_train,y_train) rfc_prediction = rfc.predict(X_test) rfc_cm = confusion_matrix(y_test,rfc_prediction) print target_label print rfc_cm rf_accuracy = metrics.accuracy_score(y_test,rfc_prediction) print rf_accuracy