import scipy.stats
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
nat = scipy.stats.norm.rvs(loc=-0.5, scale=0.5, size=1000)
droog = scipy.stats.norm.rvs(loc=0.5, scale=0.5, size=1000)
_ = plt.hist(nat, label='nat')
_ = plt.hist(droog, label='droog')
plt.legend()
<matplotlib.legend.Legend at 0x111c58750>
test = scipy.stats.norm.rvs(loc=0, scale=1, size=1000)
_ = plt.hist(test)
loc_nat, scale_nat = scipy.stats.norm.fit(nat)
loc_droog, scale_droog = scipy.stats.norm.fit(droog)
z_nat = lambda value: (value - loc_nat)/scale_nat
z_droog = lambda value: (value - loc_droog)/scale_droog
isnat = np.abs(z_nat(test)) < np.abs(z_droog(test))
_ = plt.hist(test[isnat], label='nat')
_ = plt.hist(test[~isnat], label='droog')
plt.legend()
<matplotlib.legend.Legend at 0x111561a50>
import sklearn.neighbors.NearestNeighbors