# Modified Stylesheet for notebook.
from IPython.core.display import HTML
def css_styling():
    styles = open("custom.css", "r").read()
    return HTML(styles)

css_styling()

import matplotlib.pyplot as plt
import numpy as np

from collections import defaultdict
np.random.seed(seed=111111)
np.set_printoptions(precision=3, suppress=True)

import sys; sys.path.append('../src/')
from change_detector import ChangeDetector, OnlineSimulator

# Create a seasonal signal
# I imagined a metric that rises from 0 to 5 each calendar month

sig = np.linspace(0, 5, num=30)
sig = np.concatenate([sig for x in xrange(12)])

# Add a jump
jump_size = 5
sig[250:] = sig[250:] + jump_size

# Noise
noise = np.random.normal(
    size=sig.shape,
    scale=jump_size * 0.02)

plt.figure(figsize=(15,5))
plt.plot(sig + noise, 'b.', linestyle='')
plt.plot(sig + noise, 'b-', alpha=0.15)
plt.ylim(0,15)
plt.xlim(0,365)
plt.title("Imaginary Seasonal signal")

import change_detector
reload(change_detector)
from change_detector import ChangeDetector, OnlineSimulator

from collections import deque

class ZScoreDetector(ChangeDetector):
    """
    A change detection algorithm based on z-scores
    ---
    1. We will use Welford's algorithm to collect mean and variance 
    of the full stream. 
    
    2. At the same time, we will collect a window of recent
    observations. We are storing all the recent observations for this
    window, and compute local statistics traditionally.
    
    3. Then, we compute a Z-score to compare the sample (window) 
    distribution to the population (global) distribution. 

    Our stopping rule is based on this computed z-score
    """
    
    def __init__(self, window_size = 100, threshold=0.05):
        super( ZScoreDetector, self ).__init__()
        
        # hyper-parameters
        self.threshold = threshold
        self.window_size = window_size
            
        # Interim and calculated values
        self.k = 0  # total signal_size
        self.g_mean = 0.0  # global mean  
        self.s = 0.0  # for Welford's method. variance = s / (k + 1)
        
        # This is the window
        self.window = deque(maxlen = window_size)
        
        # ... and, finally, our residuals
        self.z_score_ = np.nan
        self.z_array = []
        
    def update_residuals(self, new_signal_value):
        self._update_base_residuals(new_signal_value)
        x = new_signal_value
        
        # Add new value to local window (deque will
        #  automatically drop a value to maintain window size)
        self.window.append(x)
        
        """Calculate Statistics on global and local window """
        # Calculate global statistics using welford's method
        oldm = self.g_mean
        newm = oldm + (x - oldm) / (self.k + 1)
        s = self.s + (x - newm) * (x - oldm)
        
        g_mean = newm  # Global mean
        g_std = np.sqrt(s / (self.k+1))  # Global std
        
        # Calculate local statistics on the window
        #  We have all values stored for the window, so 
        #  can use built-in numpy stats methods
        w_mean = np.mean(self.window)  # Window mean
        w_std = np.std(self.window)  # Window std

        """Calculate variables required for Zscore"""
        # Calculate SE, see formula above
        std_diff = (g_std - w_std) / g_std        
        SE = g_std / np.sqrt(self.window_size)
        
        # Difference between the means
        mean_diff = (g_mean - w_mean) / g_mean
        
        # Z-score (residual)
        self.z_score_ = (w_mean - g_mean) / SE
        self.z_array.append(self.z_score_)
        
        # Store attributes
        self.g_mean = g_mean
        self.s = s
        
        # Update k (size of global window). 
        #   This must be done at the end!
        self.k += 1
    
    def check_stopping_rules(self, new_signal_value): 
        # Check stopping rule!
        if np.abs(self.z_score_) > self.threshold:
            self.rules_triggered = True
        

# Our first, most basic, signal. 

basic_sig = np.ones(150)
basic_sig[:100] *= 50
basic_sig[100:] *= 4

# On Signal 1
detector = ZScoreDetector(window_size = 10, threshold=10.0)
OnlineSimulator(detector, basic_sig).run(
     signal_name='Original first signal, sig1'
     )

plt.figure(figsize=(15,5))
plt.plot(sig + noise, 'b.', linestyle='')
plt.plot(sig + noise, 'b-', alpha=0.15)
plt.ylim(0,15)
plt.xlim(0,365)
plt.title("Imaginary Seasonal signal")

detector = ZScoreDetector(window_size = 30, threshold=10.0)
OnlineSimulator(detector, sig).run(signal_name = "Imaginary Seasonal Signal")

jump_size = 30

sig2 = np.linspace(40, 50, num=20)
sig2 = np.concatenate([sig2 for x in xrange(8)])

# TODO need a change to detect
sig2[120:] = sig2[120:] + jump_size

noise = np.random.normal(
    size=sig2.shape,
    scale=jump_size * 0.05
    )

sig2 = sig2 + noise


detector = ZScoreDetector(window_size = 20, threshold=10.0)
OnlineSimulator(detector, sig2).run(signal_name = "Second Imaginary Seasonal Signal")

jump_size = 20
signal_size = 300

# Create a signal with a small jump (change)
sig3 = np.ones(signal_size)
sig3[:150] = 30
sig3[150:] = 30 + jump_size

# Add noise
noise = np.random.normal(
    scale = 0.10 * jump_size,
    size=signal_size)

sig3 = sig3 + noise

# Add outliers
outlier_idx = np.random.choice(signal_size, size=3)
sig3[outlier_idx] = 0
outlier_idx = np.random.choice(signal_size, size=2)
sig3[outlier_idx] = 90

# What does this  look like?
plt.figure(figsize=(15, 5))
plt.plot(sig3, 'b.')
plt.plot(sig3, '-', alpha=0.15)
plt.ylim(0,100)

# On Signal 3

detector = ZScoreDetector(window_size = 40, threshold=8.0)
OnlineSimulator(detector, sig3).run(signal_name = "Signal with noise and outlier")

Note [Aman Ahuja]

So far this demonstrates the need for windows, but still has a dependency on hyperparameters. 
 - How would we know what to pick for the Z-score threshold? 
 - We also now have a new hyper parameter, the window size.

This discussion leads to the Page Hinkley Stopping Rule

The likelihood ratio, or the ratio of the gaussian probability densities, can be used to detect the time at which the change occurred, if the magnitude of the jump (magnitude of the change in mean) is known a priori. Else the maximum likelihood estimate of the point (time) of change can be used, requiring some threshold value lambda for the size of the change in mean. This leads to the Page-Hinkley stopping rule (E.S. Page, 1954) and the cumulative sum algorithm (CUSUM)

There is a recursive method to compute the stopping rule.