e10s analysis¶

In [1]:

import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import IPython

from moztelemetry.spark import get_pings, get_one_ping_per_client, get_pings_properties
from moztelemetry.histogram import Histogram
from montecarlino import grouped_permutation_test
from __future__ import division

from moztelemetry import histogram_tools

%pylab inline
IPython.core.pylabtools.figsize(18, 7)

Populating the interactive namespace from numpy and matplotlib

In [2]:

sc.defaultParallelism

Out[2]:

In [155]:

def chi2_distance(xs, ys, eps = 1e-10, normalize = True):
    histA = xs.sum(axis=0)
    histB = ys.sum(axis=0)
    
    if normalize:
        histA = histA/histA.sum()
        histB = histB/histB.sum()
    
    d = 0.5 * np.sum([((a - b) ** 2) / (a + b + eps)
        for (a, b) in zip(histA, histB)])

    return d

def median_diff(xs, ys):
    return np.median(xs) - np.median(ys)

def compare_histogram(histogram, e10s, none10s):
    pvalue = grouped_permutation_test(chi2_distance, [e10s, none10s], num_samples=100)
    
    eTotal = e10s.sum()
    nTotal = none10s.sum()
        
    eTotal = 100*eTotal/eTotal.sum()
    nTotal = 100*nTotal/nTotal.sum()
        
    fig = plt.figure()
    fig.subplots_adjust(hspace=0.3)
        
    ax = fig.add_subplot(1, 1, 1)
    ax2 = ax.twinx()
    width = 0.4
        
    eTotal.plot(kind="bar", alpha=0.5, color="green", label="e10s", ax=ax, width=width, position=0)
    nTotal.plot(kind="bar", alpha=0.5, color="blue", label="non e10s", ax=ax2, width=width, position=1, grid=False, ylim=ax.get_ylim())
        
    ax.legend(ax.get_legend_handles_labels()[0] + ax2.get_legend_handles_labels()[0], ["e10s", "non e10s"])

    plt.xlabel(histogram)
    plt.ylabel("Frequency %")
    plt.show()
        
    print "The probability that the distributions for {} are differing by chance is {:.2f}.".format(histogram, pvalue)
    
def normalize_uptime_hour(frame):
    frame = frame[frame["uptime"] > 0]
    frame = 60 * frame.apply(lambda x: x/frame["uptime"]) # Metric per hour
    frame.drop('uptime', axis=1, inplace=True)
    return frame
    
def compare_count_histograms(pings, *histograms_names):
    properties = histograms_names + ("simpleMeasurements.uptime", "e10s")

    frame = pd.DataFrame(get_pings_properties(pings, properties).collect())

    e10s = frame[frame["e10s"] == True]
    e10s = normalize_uptime_hour(e10s)
    
    none10s = frame[frame["e10s"] == False]
    none10s = normalize_uptime_hour(none10s)
    
    for histogram in e10s.columns:
        if histogram == "e10s":
            continue
        compare_scalars(histogram + " per hour", e10s[histogram].dropna(), none10s[histogram].dropna())
    
def compare_histograms(pings, *histogram_names):
    frame = pd.DataFrame(get_pings_properties(pings, histogram_names + ("e10s",)).collect())
    e10s = frame[frame["e10s"] == True]
    none10s = frame[frame["e10s"] == False]
    
    for histogram in e10s.columns:
        if histogram == "e10s":
            continue
        compare_histogram(histogram, e10s[histogram].dropna(), none10s[histogram].dropna())
                    
def compare_scalars(metric, *groups):
    print "Median difference in {} is {:.2f}, ({:.2f}, {:.2f}).".format(metric,
                                                                        median_diff(*groups), 
                                                                        np.median(groups[0]),
                                                                        np.median(groups[1]))
    print "The probablity of this effect being purely by chance is {:.2f}.". \
        format(grouped_permutation_test(median_diff, groups, num_samples=10000))

Get e10s and non-e10s partitions¶

In [22]:

pings = get_pings(sc, "Firefox", "nightly", "*", ("20150309000000", "20150315999999"), "20150316", 1)

In [23]:

pings.count()

Out[23]:

In [117]:

subset = get_one_ping_per_client(pings)

In [118]:

def add_e10s_discriminator(ping):
    ping["e10s"] = True if ping.get("childPayloads", {}) else False
    return ping

subset = subset.map(add_e10s_discriminator)

In [146]:

def add_gecko_activity(ping):
    threads = ping.get("threadHangStats", {})
    uptime = ping["simpleMeasurements"].get("uptime", -1)
    
    if not threads or uptime <= 0:
        return ping
    
    for thread in threads:
        if thread["name"] == "Gecko":
            activity = thread["activity"]["values"]
            histogram = pd.Series(activity.values(), index=map(int, activity.keys())).sort_index()
            over100 = histogram[histogram.index > 100].sum()
            ping["gecko_hangs_per_minute"] = over100/uptime
        
    return ping

subset = subset.map(add_gecko_activity)

1. Generic stuff¶

In [147]:

simple = pd.DataFrame(get_pings_properties(subset, 
                                           ["simpleMeasurements.firstPaint",
                                            "simpleMeasurements.sessionRestored", 
                                            "simpleMeasurements.sessionRestoreRestoring",
                                            "simpleMeasurements.shutdownDuration",
                                            "gecko_hangs_per_minute",
                                            "e10s"]).collect())

eSimple = simple[simple["e10s"] == True]
nSimple = simple[simple["e10s"] == False]

a) Startup time evaluation:¶

In [157]:

compare_scalars("startup time", eSimple["firstPaint"].dropna(), nSimple["firstPaint"].dropna())

Median difference in startup time is 74.00, (3570.00, 3496.00).
The probablity of this effect being purely by chance is 0.24.

b) Session restore time evaluation:¶

In [158]:

eRestoreTime = (eSimple["sessionRestored"] - eSimple["sessionRestoreRestoring"]).dropna()
nRestoreTime = (nSimple["sessionRestored"] - nSimple["sessionRestoreRestoring"]).dropna()
compare_scalars("restore time", eRestoreTime, nRestoreTime)

Median difference in restore time is -28.00, (118.00, 146.00).
The probablity of this effect being purely by chance is 0.00.

c) Shutdown time evaluation:¶

In [159]:

compare_scalars("shutdown duration", eSimple["shutdownDuration"], nSimple["shutdownDuration"])

Median difference in shutdown duration is -106.00, (2240.00, 2346.00).
The probablity of this effect being purely by chance is 0.00.

d) Animation smoothness:¶

In [33]:

compare_histograms(subset,
                   "histograms.FX_TAB_ANIM_ANY_FRAME_INTERVAL_MS",
                   "histograms.FX_TAB_ANIM_OPEN_FRAME_INTERVAL_MS",
                   "histograms.FX_TAB_ANIM_OPEN_PREVIEW_FRAME_INTERVAL_MS")

The probability that the distributions for FX_TAB_ANIM_ANY_FRAME_INTERVAL_MS are differing by chance is 0.00.

The probability that the distributions for FX_TAB_ANIM_OPEN_FRAME_INTERVAL_MS are differing by chance is 0.02.

The probability that the distributions for FX_TAB_ANIM_OPEN_PREVIEW_FRAME_INTERVAL_MS are differing by chance is 0.00.

2. Responsivness¶

a) Event processing¶

In [34]:

compare_histograms(subset, "histograms.FX_REFRESH_DRIVER_FRAME_DELAY_MS", "histograms.EVENTLOOP_UI_LAG_EXP_MS")

The probability that the distributions for EVENTLOOP_UI_LAG_EXP_MS are differing by chance is 0.00.

The probability that the distributions for FX_REFRESH_DRIVER_FRAME_DELAY_MS are differing by chance is 0.13.

In [156]:

compare_scalars("hangs over 100ms per minute", eSimple["gecko_hangs_per_minute"], nSimple["gecko_hangs_per_minute"])

Median difference in hangs over 100ms per minute is -4.44, (2.71, 7.15).
The probablity of this effect being purely by chance is 0.00.

b) Plugins¶

In [35]:

compare_count_histograms(subset, "keyedHistograms.SUBPROCESS_ABNORMAL_ABORT.plugin")

Median difference in SUBPROCESS_ABNORMAL_ABORT.plugin per hour is 0.01.
The probablity of this effect being purely by chance is 0.67.

c) Page load¶

In [36]:

compare_histograms(subset, "histograms.FX_PAGE_LOAD_MS")

The probability that the distributions for FX_PAGE_LOAD_MS are differing by chance is 0.00.

d) Slow scripts¶

In [37]:

compare_count_histograms(subset, "histograms.SLOW_SCRIPT_NOTICE_COUNT")

Median difference in SLOW_SCRIPT_NOTICE_COUNT per hour is -0.04.
The probablity of this effect being purely by chance is 0.35.

e) Window open time¶

In [38]:

compare_histograms(subset, "histograms.FX_NEW_WINDOW_MS")

The probability that the distributions for FX_NEW_WINDOW_MS are differing by chance is 0.00.

f) Tab switch time¶

In [39]:

compare_histograms(subset, "histograms.FX_TAB_SWITCH_TOTAL_MS")

The probability that the distributions for FX_TAB_SWITCH_TOTAL_MS are differing by chance is 0.96.

g) Garbage collection¶

In [40]:

compare_histograms(subset, 
                   "histograms.GC_MS", 
                   "histograms.GC_MAX_PAUSE_MS", 
                   "histograms.GC_MARK_MS", 
                   "histograms.GC_SWEEP_MS", 
                   "histograms.GC_MARK_ROOTS_MS", 
                   "histograms.GC_MARK_GRAY_MS", 
                   "histograms.GC_SLICE_MS", 
                   "histograms.GC_SCC_SWEEP_TOTAL_MS", 
                   "histograms.GC_SCC_SWEEP_MAX_PAUSE_MS")

The probability that the distributions for GC_MARK_GRAY_MS are differing by chance is 0.00.

The probability that the distributions for GC_MARK_MS are differing by chance is 0.00.

The probability that the distributions for GC_MARK_ROOTS_MS are differing by chance is 0.00.

The probability that the distributions for GC_MAX_PAUSE_MS are differing by chance is 0.00.

The probability that the distributions for GC_MS are differing by chance is 0.00.

The probability that the distributions for GC_SCC_SWEEP_MAX_PAUSE_MS are differing by chance is 0.00.

The probability that the distributions for GC_SCC_SWEEP_TOTAL_MS are differing by chance is 0.00.

The probability that the distributions for GC_SLICE_MS are differing by chance is 0.00.

The probability that the distributions for GC_SWEEP_MS are differing by chance is 0.00.

h) Cycle Collector¶

In [41]:

compare_histograms(subset,
                   "histograms.CYCLE_COLLECTOR",
                   "histograms.CYCLE_COLLECTOR_WORKER",
                   "histograms.CYCLE_COLLECTOR_FULL",
                   "histograms.CYCLE_COLLECTOR_MAX_PAUSE",
                   "histograms.CYCLE_COLLECTOR_TIME_BETWEEN")

The probability that the distributions for CYCLE_COLLECTOR are differing by chance is 0.00.

The probability that the distributions for CYCLE_COLLECTOR_FULL are differing by chance is 0.00.

The probability that the distributions for CYCLE_COLLECTOR_MAX_PAUSE are differing by chance is 0.00.

The probability that the distributions for CYCLE_COLLECTOR_TIME_BETWEEN are differing by chance is 0.00.

The probability that the distributions for CYCLE_COLLECTOR_WORKER are differing by chance is 0.00.