%matplotlib inline  

import numpy
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import IPython
import scipy.stats
import statsmodels.stats.proportion as proportion
import statsmodels.stats.weightstats as weightstats

from moztelemetry.spark import get_pings
from __future__ import division

IPython.core.pylabtools.figsize(18, 7)

sc.defaultParallelism

pings = get_pings(sc, "Firefox", "beta", "35.0", "*", ("20141229", "20150104"))

%%capture
pings.count()

experiment_name = "flash-protectedmode-beta35@experiments.mozilla.org"

def fast_filter(raw):
    return "activeExperiment" in raw

def parse(raw):
    return json.loads(raw)

def is_experiment(ping):
    info = ping["info"]
    return info["activeExperiment"] == experiment_name and info["OS"] == "WINNT" \
        and (info["version"] == "6.1" or info["version"] == "6.0")

def extract_count(container, histogram_name, kind):
    histograms = container.get(histogram_name, {})
    histogram = histograms.get(kind, None)
    return histogram["sum"] if histogram else 0

def is_control(ping):
    return ping["info"]["activeExperimentBranch"] == "control"

def extract(ping):
    keyed = ping.get("keyedHistograms", {})
    
    clientID = ping.get("clientID", None)
    uptime = ping["simpleMeasurements"]["uptime"]
    control = is_control(ping)
    
    aborts = extract_count(keyed, "SUBPROCESS_ABNORMAL_ABORT", "plugin")
    crashes = extract_count(keyed, "SUBPROCESS_CRASHES_WITH_DUMP", "plugin")
    hangs = extract_count(keyed, "SUBPROCESS_CRASHES_WITH_DUMP", "pluginhang")
    
    lag = ping["histograms"].get("EVENTLOOP_UI_LAG_EXP_MS", None)
    lag = lag[:-5] if lag else lag # only take buckets
    
    return (clientID, aborts, crashes, hangs, control, lag, uptime)

data = pings.filter(fast_filter).map(parse).filter(is_experiment).map(extract)

%%capture
collected_data = data.collect()

len(collected_data)

frame = pd.DataFrame(collected_data, columns=["clientID", "aborts", "crashes", "hangs", "control", "lag", "uptime"])

frame = frame[frame["uptime"] > 0] # Ignore submissions with negative or zero uptime (see Bug 1106048)
frame = frame.groupby("clientID").last().reset_index() # To avoid bias, use only the last submission for each Client ID

frame["lag"] = frame["lag"].map(np.array)
frame["lag"] = frame["lag"] / frame["uptime"] # Normalize lag histograms by uptime
frame["ping"] = 1

len(frame)

frame["aborts"] = frame["aborts"] - frame["crashes"] - frame["hangs"]

binary_frame = frame.copy()
binary_frame["aborts"] = binary_frame["aborts"].map(bool)
binary_frame["crashes"] = binary_frame["crashes"].map(bool)
binary_frame["hangs"] = binary_frame["hangs"].map(bool)

frame.to_json("1111791.json")

def error(row):
    count = row[True]
    nobs = row.sum()
    return map(lambda x: float("{:.3f}".format(x)), proportion.proportion_confint(count, nobs, method="wilson"))

def compare_proportions(metric):
    agg = pd.pivot_table(binary_frame, index=["control", metric], values="ping", aggfunc=np.sum)
  
    default = pd.DataFrame({False: {False: 0, True: 0}, True: {False: 0, True: 0}})
    contingency_table = agg.unstack().combine_first(default)
    
    control = contingency_table.ix[True]
    experiment = contingency_table.ix[False]

    p1 = control[True]/control.sum()
    p2 = experiment[True]/experiment.sum()
    pvalue = scipy.stats.fisher_exact(contingency_table)[1]
   
    print "Contingency table:\n"
    print contingency_table
    print "\nThe estimated proportion of {} in the experiment branch is {:.3f} and its CI is {}".format(metric, p2, error(experiment))
    print "The estimated proportion of {} in the control branch is {:.3f} and its CI is {}".format(metric, p1, error(control))
    print "The probability that the ratios are different purely by chance is {:.3f}".format(pvalue)

compare_proportions("aborts")

def compare_means(metric):
    control_mask = frame["control"] == True

    experiment = weightstats.DescrStatsW(frame[-control_mask][metric])
    control = weightstats.DescrStatsW(frame[control_mask][metric])
    
    mean_diff = experiment.mean - control.mean
    comparator = weightstats.CompareMeans(experiment, control)
    
    ci_diff = comparator.tconfint_diff(usevar="unequal")[1] - mean_diff
    ci_control = control.tconfint_mean()[1] - control.mean
    ci_experiment = experiment.tconfint_mean()[1] - experiment.mean

    print "The mean number of {} of the experiment branch is {:.3f} +- {:.3f}".format(metric, experiment.mean, ci_experiment)
    print "The mean number of {} of the control branch is {:.3f} +- {:.3f}".format(metric, control.mean, ci_control)
    print "The difference between the means is {:.3f} +- {:.3f}".format(mean_diff, ci_diff)
    print "The probability to see this difference purely by chance is {:.3f}".format(comparator.ttest_ind(usevar="unequal")[1])

compare_means("aborts")

def hist_failures(metric):
    aborts = frame.groupby(["control", metric])["clientID"].count().unstack().fillna(0)
    aborts = (100 * aborts.T / aborts.sum(axis=1)).ix[1:] # remove 0 as its high frequency hides the ones of the other values
    aborts.plot(kind="bar")
    plt.title("Histogram of submissions for {}".format(metric))
    plt.ylabel("submissions %")
    
hist_failures("aborts")

compare_proportions("crashes")

compare_means("crashes")

hist_failures("crashes")

compare_proportions("hangs")

compare_means("hangs")

hist_failures("hangs")

control_mask = frame["control"]
control = frame[control_mask]["lag"]
experiment = frame[-control_mask]["lag"]

# Chi-Squared Histogram distance http://www.cs.huji.ac.il/~ofirpele/publications/ECCV2010.pdf
def chi2_distance(histA, histB, eps = 1e-10):
    d = 0.5 * np.sum([((a - b) ** 2) / (a + b + eps)
        for (a, b) in zip(histA, histB)])

    return d

def mc_permutation_test(xs, ys, num):
    n, k = len(xs), 0
    h1 = xs.sum()
    h2 = ys.sum()
        
    diff = chi2_distance(h1, h2)
    zs = pd.concat([xs, ys])
    zs.index = np.arange(0, len(zs))

    for j in range(num):
        zs = zs.reindex(np.random.permutation(zs.index))    
        h1 = zs[:n].sum()
        h2 = zs[n:].sum()        
        k += diff < chi2_distance(h1, h2)

    return k / num

print "{:.3f}".format(mc_permutation_test(control, experiment, 1000))

labels = ["0", "50", "74", "110", "163", "242", "359", "532", "789", "1.17k", 
          "1.74k", "2.57k", "3.81k", "5.65k", "8.38k", "12.42k", "18.42k", "27.3k", "40.47k", "60k"]

experiment_total = pd.Series(experiment.sum(), index=labels)
control_total = pd.Series(control.sum(), index=labels)
normalizer = control_total.sum()

control_total = 100*control_total/normalizer
experiment_total = 100*experiment_total/normalizer # Normalize both histogram relative to control to keep the proportions

experiment_total.plot(kind="bar", alpha=0.5, color="green", label="experiment")
control_total.plot(kind="bar", alpha=0.5, color="blue", label="control")
plt.xlabel("Time it takes for the message before a UI message (ms)")
plt.ylabel("Lag frequency %")
plt.legend()