# Modified Stylesheet for notebook. from IPython.core.display import HTML def css_styling(): styles = open("custom.css", "r").read() return HTML(styles) css_styling() import matplotlib.pyplot as plt import numpy as np import sys; sys.path.append('../src/') from change_detector import ChangeDetector, OnlineSimulator Welford's Method to compute running mean and running variance/std Welford 1962, see also Knuth, AOCP Vol. 2 page 232 http://www.johndcook.com/standard_deviation.html def welford(vals): #via https://gist.github.com/alexalemi/2151722 m = vals[0] s = 0 for k in range(1, len(vals)): oldm = m x = vals[k] newm = oldm + (x - oldm) / (k + 1) s = s + (x - newm) * (x - oldm) m = newm return s / (k+1) #Testing seq = np.random.randn(100) print welford(seq) print np.var(seq) class Welford_Detector(ChangeDetector): """ Testing Welford's algorithm implemented like our other change detector """ def __init__(self): super(Welford_Detector, self).__init__() #Interim and calculated values self.k = 0 #k is the signal_size #For Welford's self.mean_ = 0.0 self.s = 0.0 # var = s / (k + 1) def update_residuals(self, new_signal_value): #Update residuals #Welford's. oldm = self.mean_ x = new_signal_value newm = oldm + (x - oldm) / (self.k + 1) self.s = self.s + (x - newm) * (x - oldm) self.mean_ = newm self.var_ = self.s / (self.k+1) self.k += 1 # Initialize detector welford = Welford_Detector() # Create a random signal signal = np.random.randint(0,100,size=300) # Run simulation and fetch residual history sim = OnlineSimulator(welford, signal) sim.run(plot=False) residuals = sim.residuals_history # Comparisons of Welford's Method with numpy builtin stats print "{:<40} : {:>}".format( "Variance via Welford's algorithm", residuals['var_'][-1] #(the last value is the variance after the entire signal has been processed.) ) print "{:<40} : {:>}".format( "Variance via Numpy", np.var(signal), ) print "{:<40} : {:>}".format( "Mean via Welford's algorithm", residuals['mean_'][-1] #(the last value is the variance after the entire signal has been processed.) ) print "{:<40} : {:>}".format( "Mean via Numpy", np.mean(signal), ) print welford.__dict__ In this way we can collect decent statistics on a stream of data without having to store the entire history. In this way we can collect decent statistics on a stream of data without having to store the entire history.