Copyright (C) 2017 Leen De Baets (leen.debaets@ugent.be)
This file is part of PLAID code repo.
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/
This code is based on the code of Jingkun Goa that analyses the PLAID data. The original code can be found in the notebooks on plaidplug.com or http://nbviewer.jupyter.org/github/jingkungao/PLAID/blob/master/ParseData.ipynb
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.font_manager as fm
from matplotlib import rc
rc('font', family='Times New Roman')
import subprocess
import numpy as np
from random import shuffle
from datetime import datetime
# functions to read data and meta data
def read_data_given_id(path,ids,progress=True,last_offset=0):
'''read data given a list of ids and CSV paths'''
start = datetime.now()
n = len(ids)
if n == 0:
return {}
else:
data = {}
for (i,ist_id) in enumerate(ids, start=1):
if progress and np.mod(i,np.ceil(n/10))==0:
print('%d/%d (%2.0f%s) have been read...\t time consumed: %ds'\
%(i,n,i/n*100,'%',(datetime.now()-start).seconds))
if last_offset==0:
data[ist_id] = np.genfromtxt(path+str(ist_id)+'.csv',delimiter=',',\
names='current,voltage',dtype=(float,float))
else:
p=subprocess.Popen(['tail','-'+str(int(last_offset)),path+str(ist_id)+'.csv'],\
stdout=subprocess.PIPE)
data[ist_id] = np.genfromtxt(p.stdout,delimiter=',',names='current,voltage',\
dtype=(float,float))
print('%d/%d (%2.0f%s) have been read(Done!) \t time consumed: %ds'\
%(n,n,100,'%',(datetime.now()-start).seconds))
return data
def clean_meta(ist):
'''remove None elements in Meta Data '''
clean_ist = ist.copy()
for k,v in ist.items():
if len(v) == 0:
del clean_ist[k]
return clean_ist
def parse_meta(meta):
'''parse meta data for easy access'''
M = {}
for m in meta:
for app in m:
M[int(app['id'])] = clean_meta(app['meta'])
return M
# read meta
Data_path = '/home/guest/Dataset/PLAID/'
csv_path = Data_path + 'CSV/';
import json
with open(Data_path + 'meta1.json') as data_file:
meta1 = json.load(data_file)
with open(Data_path + 'meta2.json') as data_file:
meta2 = json.load(data_file)
Meta = parse_meta([meta1,meta2]) # consider PLAID1 and 2
#Meta = parse_meta([meta1])
#Meta = parse_meta([meta2])
# read data
# applinace types of all instances
Types = [x['type'] for x in Meta.values()]
# unique appliance types
Unq_type = list(set(Types))
Unq_type.sort()
IDs_for_read_data = list(Meta.keys())
# households of appliances
Locs = [x['header']['collection_time']+'_'+x['location'] for x in Meta.values()]
# unique households
Unq_loc = list(set(Locs))
Unq_loc.sort()
print('Number of households: %d\nNumber of total measurements:%d'%(len(Unq_loc),len(Locs)))
Number of households: 64 Number of total measurements:1793
npts = 10000
Data = read_data_given_id(csv_path,IDs_for_read_data,progress=True, last_offset=npts)
179/1793 ( 0%) have been read... time consumed: 5s 358/1793 ( 0%) have been read... time consumed: 10s 537/1793 ( 0%) have been read... time consumed: 15s 716/1793 ( 0%) have been read... time consumed: 20s 895/1793 ( 0%) have been read... time consumed: 24s 1074/1793 ( 0%) have been read... time consumed: 29s 1253/1793 ( 0%) have been read... time consumed: 34s 1432/1793 ( 0%) have been read... time consumed: 39s 1611/1793 ( 0%) have been read... time consumed: 45s 1790/1793 ( 0%) have been read... time consumed: 50s 1793/1793 (100%) have been read(Done!) time consumed: 50s
type_Ids = {}
loc_Ids = {}
Mapping = {}
n = len(Data)
type_label = np.zeros(n,dtype='int')
loc_label = np.zeros(n,dtype='int')
for (ii,t) in enumerate(Unq_type):
type_Ids[t] = [i-1 for i,j in enumerate(Types,start=1) if j == t]
type_label[type_Ids[t]] = ii
Mapping[ii] = t
for (ii,t) in enumerate(Unq_loc):
loc_Ids[t] = [i-1 for i,j in enumerate(Locs,start=1) if j == t]
loc_label[loc_Ids[t]] = ii+1
print('number of different types: %d'% len(Unq_type))
print('number of different households: %d'% len(Unq_loc))
number of different types: 11 number of different households: 64
f1 = one representative period of the voltage and current if the appliance is in steady state
fs = 30000
f0 = 60
NS = int(fs/f0) # number of samples per period
NP = int(npts/NS) # number of periods for npts
# calculate the representative one period of steady state
# (mean of the aggregated signals over one cycle)
n = len(Data)
rep_I = np.empty([n,NS])
rep_V = np.empty([n,NS])
for i in range(n):
ind = list(Data)[i]
tempI = np.sum(np.reshape(Data[ind]['current'],[NP,NS]),0)/NP
tempV = np.sum(np.reshape(Data[ind]['voltage'],[NP,NS]),0)/NP
# align current to make all samples start from 0 and goes up
ix = np.argsort(np.abs(tempI))
j = 0
while True:
if ix[j]<499 and tempI[ix[j]+1]>tempI[ix[j]]:
real_ix = ix[j]
break
else:
j += 1
rep_I[i,] = np.hstack([tempI[real_ix:],tempI[:real_ix]])
rep_V[i,] = np.hstack([tempV[real_ix:],tempV[:real_ix]])
rep_I[i,] = rep_I[i,] / max(rep_I[i,])
rep_V[i,] = rep_V[i,] / max(rep_V[i,])
f1 = np.hstack([rep_I, rep_V])
f2 = n-by-n binary image of the VI trajectory with n = 'width' for the steady state behavior
from scipy import signal
def center(X,w):
minX = np.amin(X)
maxX = np.amax(X)
dist = max(abs(minX),maxX)
X[X<-dist] = -dist
X[X>dist] = dist
d = (maxX-minX)/(w-1)
return (X,d)
def get_img_from_VI(V, I, width,hard_threshold=False,para=.5):
'''Get images from VI, hard_threshold, set para as threshold to cut off,5-10
soft_threshold, set para to .1-.5 to shrink the intensity'''
# center the current and voltage, get the size resolution of mesh given width
d = V.shape[0]
# doing interploation if number of points is less than width*2
if d<2* width:
newI = np.hstack([V, V[0]])
newV = np.hstack([I, I[0]])
oldt = np.linspace(0,d,d+1)
newt = np.linspace(0,d,2*width)
I = np.interp(newt,oldt,newI)
V = np.interp(newt,oldt,newV)
(I,d_c) = center(I,width)
(V,d_v) = center(V,width)
# find the index where the VI goes through in current-voltage axis
ind_c = np.ceil((I-np.amin(I))/d_c)
ind_v = np.ceil((V-np.amin(V))/d_v)
ind_c[ind_c==width] = width-1
ind_v[ind_v==width] = width-1
Img = np.zeros((width,width))
for i in range(len(I)):
Img[int(ind_c[i]),int(width-ind_v[i]-1)] += 1
if hard_threshold:
Img[Img<para] = 0
Img[Img!=0] = 1
return Img
else:
return (Img/np.max(Img))**para
n = len(Data)
width = 16
Imgs = np.empty((n,width,width), dtype=np.float64)
for i in range(n):
Imgs[i,:,:] = get_img_from_VI(rep_V[i,], rep_I[i,], width,True,1)
f2=np.reshape(Imgs,(n,width*width))
f3 = the representative cycle for current and voltage if the appliance is in steady state, is downsampled by creating bins and counting the amount of samples belonging to that bin.
num = 20 # number of bins
def get_BinF(X,num):
'''X should be nd array of size N*P, the output will be N*num'''
(N,P) = X.shape
newP = int(np.floor(P/num)*num)
newX = np.reshape(X[:,:newP],[N,num,newP/num])
BinF = np.sum(newX,2)
return BinF
BinF_I = get_BinF(rep_I,num)
BinF_V = get_BinF(rep_V,num)
f3 = np.hstack([BinF_I,BinF_V])
f4 = one representative period of the current if the appliance is in steady state
f4= rep_I
f5 = real and reactive power
n = len(Data)
f5 = np.empty([n,2])
f5[:,0] = np.mean(rep_I*rep_V,1) # real power
for i in range(n):
tempI = Data[i+1]['current'][NS+1:]
tempV = Data[i+1]['voltage'][NS-NS/4+1:-NS/4]
f5[i,1] = np.sum(tempI*tempV)/(npts-NS) # reactive power
f6 = the harmonics of the power
order = 21
p = (order-1)//2+1 # number of harmonics to be extracted
harmonics = np.linspace(1,order,num=p)
fs = 30000
npts = 10000
f0 = 120 # for power
f6 = np.empty([n,p])
for i in range(n):
temp_P = Data[i+1]['current']*Data[i+1]['voltage']
y = np.abs(np.fft.fft(temp_P))
h = 40*harmonics+1
h = h.astype(int)
f6[i,] =y[h]
import numpy as np
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc, f1_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from scipy import interp
from sklearn.base import clone
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.lda import LDA
from sklearn.qda import QDA
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler
from blagging import BlaggingClassifier
knn = KNeighborsClassifier(n_neighbors=1)
gnb = GaussianNB()
logistic = LogisticRegression(C=1e5)
lda = LDA(solver='lsqr', shrinkage='auto')
qda = QDA()
dTree = tree.DecisionTreeClassifier(max_depth=10)
rForest = RandomForestClassifier(max_depth=10,n_estimators=20)
svm = SVC(kernel='linear', probability=True)
adaBoost = AdaBoostClassifier()
# choose classifier
n = len(Unq_loc)
# train the classifier with 11 outputs and store the result
y_proba_concat = {}
y_test_concat = {}
y_pred_concat = {}
for cname in ['knn', 'gnb', 'logistic', 'lda', 'qda', 'dTree', 'rForest', 'svm', 'adaBoost']:
y_proba_concat[cname] = dict()
y_test_concat[cname] = dict()
y_pred_concat[cname] = dict()
for k in ['normal','over','under','smote','blagging','weighted']:
y_proba_concat[cname][k] = np.empty((0,11))
y_test_concat[cname][k] = np.empty((0))
y_pred_concat[cname][k] = np.empty((0))
amount_houses_test = 1
for i in range(len(Unq_loc)):
print(i)
ix_train = [j for j in range(len(loc_label)) if loc_label[j] not in range(i+1,i+amount_houses_test + 1)]
X_train = f2[ix_train]
y_train = type_label[ix_train]
ix_test = [j for j in range(len(loc_label)) if loc_label[j] in range(i+1,i+amount_houses_test + 1) and type_label[j] in y_train]
X_test = f2[ix_test]
y_test = type_label[ix_test]
# oversample the train set, ALWAYS leave the test set as normal!
ros = RandomOverSampler(ratio='all')
X_over, y_over = ros.fit_sample(X_train, y_train)
# undersample the train set
ros = RandomUnderSampler(ratio='all')
X_under, y_under = ros.fit_sample(X_train, y_train)
# synthesize examples
ros = SMOTE(ratio='all')
X_smote, y_smote = ros.fit_sample(X_train, y_train)
# uncomment this if feature is binary
#id2 = np.where(X_smote > 0)
#id3 = np.where(X_smote < 0)
#X_smote[id2] = 1
#X_smote[id3] = 0
for c, cname in zip([knn, gnb, logistic, lda, qda, dTree, rForest, svm, adaBoost], ['knn', 'gnb', 'logistic', 'lda', 'qda', 'dTree', 'rForest', 'svm', 'adaBoost']):
# normal
print(cname)
classifier = clone(c)
classifier.fit(X_train, y_train)
y_proba = classifier.predict_proba(X_test)
y_pred = classifier.predict(X_test)
y_proba_concat[cname]['normal'] = np.concatenate((y_proba_concat[cname]['normal'],y_proba))
y_test_concat[cname]['normal'] = np.concatenate((y_test_concat[cname]['normal'],y_test))
y_pred_concat[cname]['normal'] = np.concatenate((y_pred_concat[cname]['normal'],y_pred))
# oversample
classifier = clone(c)
classifier.fit(X_over, y_over)
y_proba = classifier.predict_proba(X_test)
y_pred = classifier.predict(X_test)
y_proba_concat[cname]['over'] = np.concatenate((y_proba_concat[cname]['over'],y_proba))
y_test_concat[cname]['over'] = np.concatenate((y_test_concat[cname]['over'],y_test))
y_pred_concat[cname]['over'] = np.concatenate((y_pred_concat[cname]['over'],y_pred))
# undersample
classifier = clone(c)
classifier.fit(X_under, y_under)
y_proba = classifier.predict_proba(X_test)
y_pred = classifier.predict(X_test)
y_proba_concat[cname]['under'] = np.concatenate((y_proba_concat[cname]['under'],y_proba))
y_test_concat[cname]['under'] = np.concatenate((y_test_concat[cname]['under'],y_test))
y_pred_concat[cname]['under'] = np.concatenate((y_pred_concat[cname]['under'],y_pred))
# smote
classifier = clone(c)
classifier.fit(X_smote, y_smote)
y_proba = classifier.predict_proba(X_test)
y_pred = classifier.predict(X_test)
y_proba_concat[cname]['smote'] = np.concatenate((y_proba_concat[cname]['smote'],y_proba))
y_test_concat[cname]['smote'] = np.concatenate((y_test_concat[cname]['smote'],y_test))
y_pred_concat[cname]['smote'] = np.concatenate((y_pred_concat[cname]['smote'],y_pred))
# Blagging
classifier = clone(c)
bc = OneVsRestClassifier(BlaggingClassifier(classifier))
bc.fit(X_train,y_train)
y_proba = bc.predict_proba(X_test)
y_pred = bc.predict(X_test)
y_proba_concat[cname]['blagging'] = np.concatenate((y_proba_concat[cname]['blagging'],y_proba))
y_test_concat[cname]['blagging'] = np.concatenate((y_test_concat[cname]['blagging'],y_test))
y_pred_concat[cname]['blagging'] = np.concatenate((y_pred_concat[cname]['blagging'],y_pred))
# weighted
if cname in ['logistic', 'dTree', 'rForest', 'svm']:
classifier = clone(c)
classifier.set_params(class_weight = 'balanced')
classifier.fit(X_train, y_train)
y_proba = classifier.predict_proba(X_test)
y_pred = classifier.predict(X_test)
y_proba_concat[cname]['weighted'] = np.concatenate((y_proba_concat[cname]['weighted'],y_proba))
y_test_concat[cname]['weighted'] = np.concatenate((y_test_concat[cname]['weighted'],y_test))
y_pred_concat[cname]['weighted'] = np.concatenate((y_pred_concat[cname]['weighted'],y_pred))
c = np.unique(y_test_concat['logistic']['normal'])
for cname in ['knn', 'gnb', 'logistic', 'lda', 'qda', 'dTree', 'rForest', 'svm', 'adaBoost']:
for k in ['normal','over','under','smote','blagging','weighted']:
if k == 'weighted' and cname not in ['logistic', 'dTree', 'rForest', 'svm']:
continue
else:
y_test_concat[cname][k] = label_binarize(y_test_concat[cname][k], classes=c)
y_pred_concat[cname][k] = label_binarize(y_pred_concat[cname][k], classes=c)
0 knn gnb logistic lda
/home/leen/virtualEnv/myenv/local/lib/python2.7/site-packages/sklearn/discriminant_analysis.py:455: UserWarning: The priors do not sum to 1. Renormalizing UserWarning)
qda
/home/leen/virtualEnv/myenv/local/lib/python2.7/site-packages/sklearn/discriminant_analysis.py:695: UserWarning: Variables are collinear warnings.warn("Variables are collinear") /home/leen/virtualEnv/myenv/local/lib/python2.7/site-packages/sklearn/multiclass.py:352: RuntimeWarning: invalid value encountered in divide Y /= np.sum(Y, axis=1)[:, np.newaxis]
dTree rForest svm adaBoost 1 knn gnb logistic
/home/leen/virtualEnv/myenv/local/lib/python2.7/site-packages/sklearn/linear_model/base.py:352: RuntimeWarning: overflow encountered in exp np.exp(prob, prob)
lda qda dTree rForest svm adaBoost 2 knn gnb logistic lda qda
fpr = dict()
tpr = dict()
thresholds = dict()
roc_auc = dict()
accuracy = dict()
f_measure = dict()
for cname in ['knn', 'gnb', 'logistic', 'lda', 'qda', 'dTree', 'rForest', 'svm', 'adaBoost']:
fpr[cname] = dict()
tpr[cname] = dict()
thresholds[cname] = dict()
roc_auc[cname] = dict()
accuracy[cname] = dict()
f_measure[cname] = dict()
for k in ['normal','over','under','smote','blagging','weighted']:
if k == 'weighted' and cname not in ['logistic', 'dTree', 'rForest', 'svm']:
continue
else:
# Compute ROC curve and ROC area for each class
fpr[cname][k] = dict()
tpr[cname][k] = dict()
thresholds[cname][k] = dict()
roc_auc[cname][k] = dict()
accuracy[cname][k] = dict()
f_measure[cname][k] = dict()
for i in range(11):
fpr[cname][k][Mapping[i]], tpr[cname][k][Mapping[i]], thresholds[cname][k][Mapping[i]] = roc_curve(y_test_concat[cname][k][:, i], np.nan_to_num(y_proba_concat[cname][k][:, i]))
roc_auc[cname][k][Mapping[i]] = auc(fpr[cname][k][Mapping[i]], tpr[cname][k][Mapping[i]])
accuracy[cname][k][Mapping[i]] = accuracy_score(y_test_concat[cname][k][:, i], y_pred_concat[cname][k][:, i])
f_measure[cname][k][Mapping[i]] = f1_score(y_test_concat[cname][k][:,i], y_pred_concat[cname][k][:,i])
# Compute micro-average ROC curve and ROC area
fpr[cname][k]["micro"], tpr[cname][k]["micro"], thresholds[cname][k]["micro"] = roc_curve(y_test_concat[cname][k].ravel(), np.nan_to_num(y_proba_concat[cname][k].ravel()))
roc_auc[cname][k]["micro"] = auc(fpr[cname][k]["micro"], tpr[cname][k]["micro"])
accuracy[cname][k]["micro"] = accuracy_score(y_test_concat[cname][k].ravel(), y_pred_concat[cname][k].ravel())
f_measure[cname][k]["micro"] = f1_score(y_test_concat[cname][k].ravel(), y_pred_concat[cname][k].ravel())
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[cname][k][Mapping[i]] for i in range(11)]))
# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
mean_acc = 0
mean_f = 0
for i in range(11):
mean_tpr += interp(all_fpr, fpr[cname][k][Mapping[i]], tpr[cname][k][Mapping[i]])
mean_acc += accuracy[cname][k][Mapping[i]]
mean_f += f_measure[cname][k][Mapping[i]]
# Finally average it and compute AUC
mean_tpr /= 11
fpr[cname][k]["macro"] = all_fpr
tpr[cname][k]["macro"] = mean_tpr
roc_auc[cname][k]["macro"] = auc(fpr[cname][k]["macro"], tpr[cname][k]["macro"])
accuracy[cname][k]["macro"] = mean_acc / 11
f_measure[cname][k]["macro"] = mean_f / 11
from sklearn.metrics import confusion_matrix
i_values = Mapping.values()[:]
i_values.extend(['macro', 'micro'])
for i in i_values:
to_print = ""
to_print1 = ""
to_print2 = ""
for cname in ['knn', 'gnb', 'logistic', 'lda', 'qda', 'dTree', 'rForest', 'svm', 'adaBoost']:
to_print += str(cname) + " \t "
for k in ['normal','over','under','smote','blagging','weighted']:
if k == 'weighted' and cname not in ['logistic', 'dTree', 'rForest', 'svm']:
to_print += ""
continue
else:
to_print += "{0:.2f}".format(roc_auc[cname][k][i] * 100 - roc_auc[cname]['normal'][i] * 100 ) + "$ \t $"
to_print = to_print[:-3]
to_print += "\\\\ \n"
print("{} ({})".format(i, sum(np.array(Types) == i)) )
print("AUC")
print("# h \tnormal\tover\tunder\tsmote\tblagging\tweighted")
print(to_print)
# choose classifier
n = len(Unq_loc)
# train the classifier with 11 outputs and store the result
y_proba_concat = {}
y_test_concat = {}
y_pred_concat = {}
for cname in ['knn', 'gnb', 'logistic', 'lda', 'qda', 'dTree', 'rForest', 'svm', 'adaBoost']:
y_proba_concat[cname] = dict()
y_test_concat[cname] = dict()
y_pred_concat[cname] = dict()
for k in ['normal','over','under','smote','blagging','weighted']:
y_proba_concat[cname][k] = np.empty((0,11))
y_test_concat[cname][k] = np.empty((0))
y_pred_concat[cname][k] = np.empty((0))
amount_houses_test = 1
for i in range(len(Unq_loc)):
print(i)
ix_train = []
collected = {}
for j in range(len(loc_label)):
if loc_label[j] not in range(i+1,i+amount_houses_test + 1):
# j belongs to a train house:
if loc_label[j] not in collected:
collected[loc_label[j]] = []
# check if appliance type of index j is already collected
if type_label[j] not in collected[loc_label[j]]:
collected[loc_label[j]].append(type_label[j])
ix_train.append(j)
X_train = f2[ix_train]
y_train = type_label[ix_train]
ix_test = [j for j in range(len(loc_label)) if loc_label[j] in range(i+1,i+amount_houses_test + 1) and type_label[j] in y_train]
X_test = f2[ix_test]
y_test = type_label[ix_test]
# oversample the train set, ALWAYS leave the test set as normal!
ros = RandomOverSampler(ratio='all')
X_over, y_over = ros.fit_sample(X_train, y_train)
# undersample the train set
ros = RandomUnderSampler(ratio='all')
X_under, y_under = ros.fit_sample(X_train, y_train)
# synthesize examples
ros = SMOTE(ratio='all')
X_smote, y_smote = ros.fit_sample(X_train, y_train)
# uncomment this if feature is binary
#id2 = np.where(X_smote > 0)
#id3 = np.where(X_smote < 0)
#X_smote[id2] = 1
#X_smote[id3] = 0
for c, cname in zip([knn, gnb, logistic, lda, qda, dTree, rForest, svm, adaBoost], ['knn', 'gnb', 'logistic', 'lda', 'qda', 'dTree', 'rForest', 'svm', 'adaBoost']):
# normal
print(cname)
classifier = clone(c)
classifier.fit(X_train, y_train)
y_proba = classifier.predict_proba(X_test)
y_pred = classifier.predict(X_test)
y_proba_concat[cname]['normal'] = np.concatenate((y_proba_concat[cname]['normal'],y_proba))
y_test_concat[cname]['normal'] = np.concatenate((y_test_concat[cname]['normal'],y_test))
y_pred_concat[cname]['normal'] = np.concatenate((y_pred_concat[cname]['normal'],y_pred))
# oversample
classifier = clone(c)
classifier.fit(X_over, y_over)
y_proba = classifier.predict_proba(X_test)
y_pred = classifier.predict(X_test)
y_proba_concat[cname]['over'] = np.concatenate((y_proba_concat[cname]['over'],y_proba))
y_test_concat[cname]['over'] = np.concatenate((y_test_concat[cname]['over'],y_test))
y_pred_concat[cname]['over'] = np.concatenate((y_pred_concat[cname]['over'],y_pred))
# undersample
classifier = clone(c)
classifier.fit(X_under, y_under)
y_proba = classifier.predict_proba(X_test)
y_pred = classifier.predict(X_test)
y_proba_concat[cname]['under'] = np.concatenate((y_proba_concat[cname]['under'],y_proba))
y_test_concat[cname]['under'] = np.concatenate((y_test_concat[cname]['under'],y_test))
y_pred_concat[cname]['under'] = np.concatenate((y_pred_concat[cname]['under'],y_pred))
# smote
classifier = clone(c)
classifier.fit(X_smote, y_smote)
y_proba = classifier.predict_proba(X_test)
y_pred = classifier.predict(X_test)
y_proba_concat[cname]['smote'] = np.concatenate((y_proba_concat[cname]['smote'],y_proba))
y_test_concat[cname]['smote'] = np.concatenate((y_test_concat[cname]['smote'],y_test))
y_pred_concat[cname]['smote'] = np.concatenate((y_pred_concat[cname]['smote'],y_pred))
# Blagging
classifier = clone(c)
bc = OneVsRestClassifier(BlaggingClassifier(classifier))
bc.fit(X_train,y_train)
y_proba = bc.predict_proba(X_test)
y_pred = bc.predict(X_test)
y_proba_concat[cname]['blagging'] = np.concatenate((y_proba_concat[cname]['blagging'],y_proba))
y_test_concat[cname]['blagging'] = np.concatenate((y_test_concat[cname]['blagging'],y_test))
y_pred_concat[cname]['blagging'] = np.concatenate((y_pred_concat[cname]['blagging'],y_pred))
# weighted
if cname in ['logistic', 'dTree', 'rForest', 'svm']:
classifier = clone(c)
classifier.set_params(class_weight = 'balanced')
classifier.fit(X_train, y_train)
y_proba = classifier.predict_proba(X_test)
y_pred = classifier.predict(X_test)
y_proba_concat[cname]['weighted'] = np.concatenate((y_proba_concat[cname]['weighted'],y_proba))
y_test_concat[cname]['weighted'] = np.concatenate((y_test_concat[cname]['weighted'],y_test))
y_pred_concat[cname]['weighted'] = np.concatenate((y_pred_concat[cname]['weighted'],y_pred))
c = np.unique(y_test_concat['logistic']['normal'])
for cname in ['knn', 'gnb', 'logistic', 'lda', 'qda', 'dTree', 'rForest', 'svm', 'adaBoost']:
for k in ['normal','over','under','smote','blagging','weighted']:
if k == 'weighted' and cname not in ['logistic', 'dTree', 'rForest', 'svm']:
continue
else:
y_test_concat[cname][k] = label_binarize(y_test_concat[cname][k], classes=c)
y_pred_concat[cname][k] = label_binarize(y_pred_concat[cname][k], classes=c)
0 knn gnb logistic lda qda
/home/leen/virtualEnv/myenv/local/lib/python2.7/site-packages/sklearn/discriminant_analysis.py:719: RuntimeWarning: divide by zero encountered in power X2 = np.dot(Xm, R * (S ** (-0.5))) /home/leen/virtualEnv/myenv/local/lib/python2.7/site-packages/sklearn/discriminant_analysis.py:719: RuntimeWarning: invalid value encountered in multiply X2 = np.dot(Xm, R * (S ** (-0.5))) /home/leen/virtualEnv/myenv/local/lib/python2.7/site-packages/sklearn/discriminant_analysis.py:722: RuntimeWarning: divide by zero encountered in log u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
dTree rForest svm adaBoost 1 knn gnb logistic lda qda dTree rForest svm adaBoost 2 knn gnb logistic lda
/home/leen/virtualEnv/myenv/local/lib/python2.7/site-packages/sklearn/discriminant_analysis.py:523: RuntimeWarning: overflow encountered in exp np.exp(prob, prob)
qda dTree rForest svm adaBoost 3 knn gnb logistic lda qda
/home/leen/virtualEnv/myenv/local/lib/python2.7/site-packages/sklearn/discriminant_analysis.py:720: RuntimeWarning: overflow encountered in square norm2.append(np.sum(X2 ** 2, 1))
dTree rForest svm adaBoost 4 knn gnb logistic lda qda dTree rForest svm adaBoost 5 knn gnb logistic lda qda dTree rForest svm adaBoost 6 knn gnb logistic lda qda dTree rForest svm adaBoost 7 knn gnb logistic lda qda dTree rForest svm adaBoost 8 knn gnb logistic lda qda dTree rForest svm adaBoost 9 knn gnb logistic lda qda dTree rForest svm adaBoost 10 knn gnb logistic lda qda dTree rForest svm adaBoost 11 knn gnb logistic lda qda dTree rForest svm adaBoost 12 knn gnb logistic lda qda dTree rForest svm adaBoost 13 knn gnb logistic lda qda dTree rForest svm adaBoost 14 knn gnb logistic lda qda dTree rForest svm adaBoost 15 knn gnb logistic lda qda dTree rForest svm adaBoost 16 knn gnb logistic lda qda dTree rForest svm adaBoost 17 knn gnb logistic lda qda dTree rForest svm adaBoost 18 knn gnb logistic lda qda dTree rForest svm adaBoost 19 knn gnb logistic lda qda dTree rForest svm adaBoost 20 knn gnb logistic lda qda dTree rForest svm adaBoost 21 knn gnb logistic lda qda dTree rForest svm adaBoost 22 knn gnb logistic lda qda dTree rForest svm adaBoost 23 knn gnb logistic lda qda dTree rForest svm adaBoost 24 knn gnb logistic lda qda dTree rForest svm adaBoost 25 knn gnb logistic lda qda dTree rForest svm adaBoost 26 knn gnb logistic lda qda dTree rForest svm adaBoost 27 knn gnb logistic lda qda dTree rForest svm adaBoost 28 knn gnb logistic lda qda dTree rForest svm adaBoost 29 knn gnb logistic lda qda dTree rForest svm adaBoost 30 knn gnb logistic lda qda dTree rForest svm adaBoost 31 knn gnb logistic lda qda dTree rForest svm adaBoost 32 knn gnb logistic lda qda dTree rForest svm adaBoost 33 knn gnb logistic lda qda dTree rForest svm adaBoost 34 knn gnb logistic lda qda dTree rForest svm adaBoost 35 knn gnb logistic lda qda dTree rForest svm adaBoost 36 knn gnb logistic lda qda dTree rForest svm adaBoost 37 knn gnb logistic lda qda dTree rForest svm adaBoost 38 knn gnb logistic lda qda dTree rForest svm adaBoost 39 knn gnb logistic lda qda dTree rForest svm adaBoost 40 knn gnb logistic lda qda dTree rForest svm adaBoost 41 knn gnb logistic lda qda dTree rForest svm adaBoost 42 knn gnb logistic lda qda dTree rForest svm adaBoost 43 knn gnb logistic lda qda dTree rForest svm adaBoost 44 knn gnb logistic lda qda dTree rForest svm adaBoost 45 knn gnb logistic lda qda dTree rForest svm adaBoost 46 knn gnb logistic lda qda dTree rForest svm adaBoost 47 knn gnb logistic lda qda dTree rForest svm adaBoost 48 knn gnb logistic lda qda dTree rForest svm adaBoost 49 knn gnb logistic lda qda dTree rForest svm adaBoost 50 knn gnb logistic lda qda dTree rForest svm adaBoost 51 knn gnb logistic lda qda dTree rForest svm adaBoost 52 knn gnb logistic lda qda dTree rForest svm adaBoost 53 knn gnb logistic lda qda dTree rForest svm adaBoost 54 knn gnb logistic lda qda dTree rForest svm adaBoost 55 knn gnb logistic lda qda dTree rForest svm adaBoost 56 knn gnb logistic lda qda dTree rForest svm adaBoost 57 knn gnb logistic lda qda dTree rForest svm adaBoost 58 knn gnb logistic lda qda dTree rForest svm adaBoost 59 knn gnb logistic lda qda dTree rForest svm adaBoost 60 knn gnb logistic lda qda dTree rForest svm adaBoost 61 knn gnb logistic lda qda dTree rForest svm adaBoost 62 knn gnb logistic lda qda dTree rForest svm adaBoost 63 knn gnb logistic lda qda dTree rForest svm adaBoost
fpr = dict()
tpr = dict()
thresholds = dict()
roc_auc = dict()
accuracy = dict()
f_measure = dict()
for cname in ['knn', 'gnb', 'logistic', 'lda', 'qda', 'dTree', 'rForest', 'svm', 'adaBoost']:
fpr[cname] = dict()
tpr[cname] = dict()
thresholds[cname] = dict()
roc_auc[cname] = dict()
accuracy[cname] = dict()
f_measure[cname] = dict()
for k in ['normal','over','under','smote','blagging','weighted']:
if k == 'weighted' and cname not in ['logistic', 'dTree', 'rForest', 'svm']:
continue
else:
# Compute ROC curve and ROC area for each class
fpr[cname][k] = dict()
tpr[cname][k] = dict()
thresholds[cname][k] = dict()
roc_auc[cname][k] = dict()
accuracy[cname][k] = dict()
f_measure[cname][k] = dict()
for i in range(11):
fpr[cname][k][Mapping[i]], tpr[cname][k][Mapping[i]], thresholds[cname][k][Mapping[i]] = roc_curve(y_test_concat[cname][k][:, i], np.nan_to_num(y_proba_concat[cname][k][:, i]))
roc_auc[cname][k][Mapping[i]] = auc(fpr[cname][k][Mapping[i]], tpr[cname][k][Mapping[i]])
accuracy[cname][k][Mapping[i]] = accuracy_score(y_test_concat[cname][k][:, i], y_pred_concat[cname][k][:, i])
f_measure[cname][k][Mapping[i]] = f1_score(y_test_concat[cname][k][:,i], y_pred_concat[cname][k][:,i])
# Compute micro-average ROC curve and ROC area
fpr[cname][k]["micro"], tpr[cname][k]["micro"], thresholds[cname][k]["micro"] = roc_curve(y_test_concat[cname][k].ravel(), np.nan_to_num(y_proba_concat[cname][k].ravel()))
roc_auc[cname][k]["micro"] = auc(fpr[cname][k]["micro"], tpr[cname][k]["micro"])
accuracy[cname][k]["micro"] = accuracy_score(y_test_concat[cname][k].ravel(), y_pred_concat[cname][k].ravel())
f_measure[cname][k]["micro"] = f1_score(y_test_concat[cname][k].ravel(), y_pred_concat[cname][k].ravel())
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[cname][k][Mapping[i]] for i in range(11)]))
# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
mean_acc = 0
mean_f = 0
for i in range(11):
mean_tpr += interp(all_fpr, fpr[cname][k][Mapping[i]], tpr[cname][k][Mapping[i]])
mean_acc += accuracy[cname][k][Mapping[i]]
mean_f += f_measure[cname][k][Mapping[i]]
# Finally average it and compute AUC
mean_tpr /= 11
fpr[cname][k]["macro"] = all_fpr
tpr[cname][k]["macro"] = mean_tpr
roc_auc[cname][k]["macro"] = auc(fpr[cname][k]["macro"], tpr[cname][k]["macro"])
accuracy[cname][k]["macro"] = mean_acc / 11
f_measure[cname][k]["macro"] = mean_f / 11
/home/leen/virtualEnv/myenv/local/lib/python2.7/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples. 'precision', 'predicted', average, warn_for)
from sklearn.metrics import confusion_matrix
i_values = Mapping.values()[:]
i_values.extend(['macro', 'micro'])
for i in i_values:
to_print = ""
to_print1 = ""
to_print2 = ""
for cname in ['knn', 'gnb', 'logistic', 'lda', 'qda', 'dTree', 'rForest', 'svm','adaBoost']:
to_print += str(cname) + " & $ "
for k in ['normal','over','under','smote','blagging','weighted']:
if k == 'weighted' and cname not in ['logistic', 'dTree', 'rForest', 'svm']:
to_print += ""
continue
else:
if k == 'normal':
to_print += "{0:.2f}".format(roc_auc[cname][k][i] * 100) + "$ & $"
else:
to_print += "{0:.2f}".format(roc_auc[cname][k][i] * 100 - roc_auc[cname]['normal'][i] * 100 ) + "$ & $"
to_print = to_print[:-3]
to_print += "\\\\ \n"
print("{} ({})".format(i, sum(np.array(Types) == i)) )
print("AUC")
print("# h \tnormal\tover\tunder\tsmote\tblagging\tweighted")
print(to_print)
Air Conditioner (208) AUC # h normal over under smote blagging weighted knn & $ 52.30$ & $-0.03$ & $0.33$ & $0.27$ & $15.47$ \\ gnb & $ 78.14$ & $-0.01$ & $-11.18$ & $-0.05$ & $0.24$ \\ logistic & $ 65.62$ & $0.84$ & $-4.53$ & $1.88$ & $-1.36$ & $0.33$ \\ lda & $ 60.23$ & $-5.81$ & $-1.95$ & $-5.92$ & $6.20$ \\ qda & $ 51.70$ & $-6.77$ & $-1.97$ & $-6.76$ & $-1.70$ \\ dTree & $ 46.82$ & $0.09$ & $5.71$ & $7.54$ & $25.19$ & $3.51$ \\ rForest & $ 73.59$ & $-2.16$ & $-6.50$ & $-2.74$ & $-1.65$ & $-1.35$ \\ svm & $ 78.98$ & $-2.99$ & $-4.19$ & $-2.61$ & $-10.43$ & $-2.36$ \\ adaBoost & $ 60.25$ & $1.83$ & $2.85$ & $2.16$ & $10.89$ \\ Compact Fluorescent Lamp (220) AUC # h normal over under smote blagging weighted knn & $ 96.75$ & $0.00$ & $-2.56$ & $0.03$ & $0.86$ \\ gnb & $ 97.46$ & $0.00$ & $-4.48$ & $0.00$ & $2.25$ \\ logistic & $ 99.05$ & $0.18$ & $-0.19$ & $0.25$ & $0.40$ & $-0.03$ \\ lda & $ 81.72$ & $-5.51$ & $2.16$ & $-5.76$ & $17.93$ \\ qda & $ 49.94$ & $16.08$ & $1.27$ & $15.73$ & $0.06$ \\ dTree & $ 94.61$ & $1.34$ & $-6.30$ & $2.14$ & $4.94$ & $-0.10$ \\ rForest & $ 97.65$ & $0.39$ & $0.95$ & $0.36$ & $2.11$ & $0.19$ \\ svm & $ 99.69$ & $-0.09$ & $-0.01$ & $-0.15$ & $-0.12$ & $-0.22$ \\ adaBoost & $ 94.21$ & $2.75$ & $-4.19$ & $1.02$ & $5.60$ \\ Fan (210) AUC # h normal over under smote blagging weighted knn & $ 69.86$ & $-0.03$ & $-1.76$ & $-0.83$ & $11.97$ \\ gnb & $ 68.15$ & $-0.16$ & $-8.98$ & $0.50$ & $9.06$ \\ logistic & $ 76.92$ & $0.02$ & $-5.24$ & $-0.37$ & $-2.07$ & $-0.06$ \\ lda & $ 55.94$ & $0.24$ & $8.99$ & $-1.68$ & $20.73$ \\ qda & $ 51.98$ & $-1.25$ & $1.14$ & $-2.17$ & $-1.98$ \\ dTree & $ 63.38$ & $6.95$ & $0.38$ & $6.40$ & $16.52$ & $5.34$ \\ rForest & $ 82.44$ & $0.95$ & $-5.91$ & $0.68$ & $-0.51$ & $2.71$ \\ svm & $ 84.17$ & $-1.21$ & $-5.59$ & $-2.64$ & $-8.68$ & $-0.47$ \\ adaBoost & $ 66.85$ & $1.66$ & $1.53$ & $-7.65$ & $11.99$ \\ Fridge (90) AUC # h normal over under smote blagging weighted knn & $ 71.13$ & $0.03$ & $-6.84$ & $1.40$ & $11.72$ \\ gnb & $ 78.14$ & $1.02$ & $-8.79$ & $0.05$ & $0.42$ \\ logistic & $ 76.35$ & $0.23$ & $-3.48$ & $0.89$ & $7.50$ & $0.05$ \\ lda & $ 51.41$ & $1.54$ & $10.46$ & $-0.69$ & $35.38$ \\ qda & $ 52.50$ & $-0.78$ & $-3.14$ & $-0.99$ & $-2.50$ \\ dTree & $ 59.38$ & $8.55$ & $-2.12$ & $6.53$ & $23.66$ & $5.91$ \\ rForest & $ 88.23$ & $-0.72$ & $-11.69$ & $-0.73$ & $-2.49$ & $-0.57$ \\ svm & $ 86.20$ & $-2.15$ & $-9.91$ & $-1.45$ & $-2.43$ & $0.75$ \\ adaBoost & $ 61.11$ & $-4.32$ & $5.28$ & $6.42$ & $22.05$ \\ Hairdryer (248) AUC # h normal over under smote blagging weighted knn & $ 78.55$ & $14.33$ & $-6.75$ & $15.78$ & $17.11$ \\ gnb & $ 62.33$ & $-0.06$ & $6.16$ & $-0.07$ & $25.05$ \\ logistic & $ 93.67$ & $0.31$ & $-3.54$ & $-0.99$ & $-0.07$ & $0.63$ \\ lda & $ 77.93$ & $-3.10$ & $-0.65$ & $-2.38$ & $15.37$ \\ qda & $ 52.45$ & $25.21$ & $-2.45$ & $27.35$ & $-2.45$ \\ dTree & $ 90.43$ & $0.43$ & $-7.22$ & $1.13$ & $3.42$ & $-1.37$ \\ rForest & $ 96.53$ & $0.23$ & $-2.85$ & $-0.05$ & $-2.33$ & $-0.02$ \\ svm & $ 94.37$ & $0.89$ & $-1.89$ & $0.10$ & $-2.17$ & $-0.61$ \\ adaBoost & $ 62.69$ & $5.79$ & $11.82$ & $3.94$ & $31.64$ \\ Heater (85) AUC # h normal over under smote blagging weighted knn & $ 83.05$ & $-18.43$ & $-19.91$ & $-12.60$ & $9.22$ \\ gnb & $ 86.26$ & $0.00$ & $-0.20$ & $0.00$ & $2.03$ \\ logistic & $ 92.45$ & $2.43$ & $2.67$ & $1.86$ & $2.48$ & $1.03$ \\ lda & $ 70.40$ & $1.12$ & $0.56$ & $2.85$ & $23.30$ \\ qda & $ 50.00$ & $29.88$ & $4.49$ & $29.88$ & $0.00$ \\ dTree & $ 87.66$ & $-0.97$ & $-1.00$ & $-2.61$ & $5.44$ & $-4.29$ \\ rForest & $ 92.03$ & $2.07$ & $2.87$ & $0.29$ & $0.54$ & $1.83$ \\ svm & $ 93.81$ & $0.26$ & $-1.48$ & $-2.18$ & $0.33$ & $-2.58$ \\ adaBoost & $ 88.62$ & $0.84$ & $-17.00$ & $2.35$ & $5.49$ \\ Incandescent Light Bulb (148) AUC # h normal over under smote blagging weighted knn & $ 81.25$ & $0.03$ & $-3.56$ & $-1.60$ & $13.25$ \\ gnb & $ 91.01$ & $-0.00$ & $-5.64$ & $0.00$ & $2.09$ \\ logistic & $ 89.00$ & $-0.02$ & $3.27$ & $-0.30$ & $7.17$ & $0.01$ \\ lda & $ 50.66$ & $-6.88$ & $3.60$ & $-7.04$ & $44.19$ \\ qda & $ 48.04$ & $7.09$ & $0.71$ & $6.08$ & $1.96$ \\ dTree & $ 86.66$ & $0.04$ & $-9.60$ & $-4.91$ & $8.73$ & $-1.39$ \\ rForest & $ 96.24$ & $0.30$ & $-0.70$ & $0.34$ & $0.56$ & $-0.52$ \\ svm & $ 97.51$ & $-0.33$ & $-0.01$ & $-0.13$ & $0.44$ & $0.12$ \\ adaBoost & $ 82.99$ & $0.69$ & $-3.11$ & $3.92$ & $12.46$ \\ Laptop (207) AUC # h normal over under smote blagging weighted knn & $ 94.98$ & $-0.72$ & $-8.19$ & $-1.08$ & $2.21$ \\ gnb & $ 91.39$ & $-0.00$ & $-7.28$ & $0.03$ & $2.82$ \\ logistic & $ 95.22$ & $-0.16$ & $-4.39$ & $-0.71$ & $1.72$ & $0.02$ \\ lda & $ 22.35$ & $5.19$ & $4.59$ & $5.77$ & $74.34$ \\ qda & $ 53.38$ & $13.83$ & $2.11$ & $12.61$ & $-3.38$ \\ dTree & $ 89.54$ & $0.39$ & $-7.81$ & $-1.05$ & $7.67$ & $1.06$ \\ rForest & $ 96.95$ & $1.36$ & $-2.93$ & $-0.58$ & $-0.27$ & $0.35$ \\ svm & $ 98.73$ & $-0.21$ & $-6.10$ & $-0.27$ & $-2.77$ & $-0.14$ \\ adaBoost & $ 79.43$ & $2.71$ & $1.57$ & $-1.20$ & $16.48$ \\ Microwave (229) AUC # h normal over under smote blagging weighted knn & $ 94.50$ & $0.00$ & $-3.37$ & $-0.37$ & $3.32$ \\ gnb & $ 94.08$ & $0.01$ & $-17.16$ & $0.01$ & $-0.98$ \\ logistic & $ 97.77$ & $0.07$ & $-2.01$ & $0.02$ & $-0.33$ & $-0.02$ \\ lda & $ 36.58$ & $2.69$ & $-6.11$ & $3.36$ & $60.72$ \\ qda & $ 49.99$ & $5.01$ & $3.15$ & $6.17$ & $0.01$ \\ dTree & $ 83.29$ & $0.90$ & $-12.14$ & $5.19$ & $12.17$ & $-3.53$ \\ rForest & $ 98.56$ & $-0.21$ & $-3.11$ & $0.35$ & $-0.69$ & $0.16$ \\ svm & $ 99.31$ & $-0.06$ & $-4.76$ & $-0.05$ & $-0.94$ & $-0.13$ \\ adaBoost & $ 68.47$ & $12.69$ & $-1.84$ & $4.88$ & $27.14$ \\ Vacuum (73) AUC # h normal over under smote blagging weighted knn & $ 99.33$ & $0.00$ & $-0.52$ & $-0.09$ & $-0.62$ \\ gnb & $ 89.73$ & $0.00$ & $-1.37$ & $0.00$ & $1.30$ \\ logistic & $ 99.88$ & $-0.02$ & $-0.42$ & $0.08$ & $0.02$ & $-0.00$ \\ lda & $ 83.80$ & $-1.34$ & $-8.96$ & $-0.98$ & $16.12$ \\ qda & $ 54.11$ & $6.85$ & $7.31$ & $5.48$ & $-4.11$ \\ dTree & $ 95.28$ & $-1.10$ & $2.41$ & $-0.68$ & $4.56$ & $-0.51$ \\ rForest & $ 99.99$ & $-0.01$ & $-0.03$ & $-0.02$ & $-0.01$ & $-0.01$ \\ svm & $ 99.98$ & $0.00$ & $-0.04$ & $0.00$ & $-0.00$ & $0.00$ \\ adaBoost & $ 83.12$ & $6.39$ & $-1.16$ & $7.50$ & $16.65$ \\ Washing Machine (75) AUC # h normal over under smote blagging weighted knn & $ 79.88$ & $-0.12$ & $0.34$ & $2.17$ & $9.09$ \\ gnb & $ 86.72$ & $-0.05$ & $-1.77$ & $-0.07$ & $0.21$ \\ logistic & $ 90.40$ & $-0.10$ & $-0.04$ & $0.89$ & $-2.54$ & $-0.10$ \\ lda & $ 63.35$ & $-6.49$ & $-3.83$ & $-5.30$ & $26.53$ \\ qda & $ 49.39$ & $0.99$ & $5.35$ & $3.65$ & $0.61$ \\ dTree & $ 78.86$ & $-10.38$ & $-8.16$ & $-8.76$ & $8.61$ & $-9.34$ \\ rForest & $ 89.92$ & $0.93$ & $2.67$ & $1.84$ & $0.76$ & $2.05$ \\ svm & $ 91.88$ & $0.61$ & $0.18$ & $0.99$ & $-1.11$ & $1.06$ \\ adaBoost & $ 75.83$ & $-1.46$ & $-3.49$ & $-5.72$ & $14.44$ \\ macro (0) AUC # h normal over under smote blagging weighted knn & $ 81.96$ & $-0.45$ & $-4.80$ & $0.28$ & $8.51$ \\ gnb & $ 83.95$ & $0.07$ & $-5.52$ & $0.03$ & $4.05$ \\ logistic & $ 88.76$ & $0.34$ & $-1.63$ & $0.32$ & $1.17$ & $0.17$ \\ lda & $ 59.49$ & $-1.67$ & $0.81$ & $-1.62$ & $30.99$ \\ qda & $ 51.23$ & $8.74$ & $1.63$ & $8.82$ & $-1.23$ \\ dTree & $ 79.63$ & $0.56$ & $-4.17$ & $0.99$ & $10.99$ & $-0.43$ \\ rForest & $ 92.02$ & $0.29$ & $-2.48$ & $-0.02$ & $-0.36$ & $0.44$ \\ svm & $ 93.16$ & $-0.48$ & $-3.07$ & $-0.76$ & $-2.54$ & $-0.42$ \\ adaBoost & $ 74.88$ & $2.69$ & $-0.71$ & $1.60$ & $15.89$ \\ micro (0) AUC # h normal over under smote blagging weighted knn & $ 81.47$ & $1.29$ & $-4.36$ & $1.60$ & $10.85$ \\ gnb & $ 84.31$ & $0.03$ & $-6.94$ & $0.03$ & $4.85$ \\ logistic & $ 88.40$ & $0.02$ & $-2.33$ & $-0.16$ & $4.17$ & $0.09$ \\ lda & $ 54.24$ & $-0.69$ & $0.48$ & $-0.65$ & $38.81$ \\ qda & $ 51.31$ & $10.19$ & $0.74$ & $10.17$ & $-1.31$ \\ dTree & $ 80.36$ & $0.94$ & $-5.35$ & $2.03$ & $11.48$ & $0.06$ \\ rForest & $ 93.99$ & $0.04$ & $-3.66$ & $-0.29$ & $-0.68$ & $-0.15$ \\ svm & $ 95.57$ & $-0.75$ & $-2.69$ & $-1.09$ & $-1.74$ & $-0.18$ \\ adaBoost & $ 78.85$ & $0.88$ & $-4.09$ & $-1.09$ & $13.87$ \\