import simplejson as json import pandas as pd import numpy as np from moztelemetry.spark import get_pings from __future__ import division sc.defaultParallelism %%capture pings = get_pings(sc, "Firefox", "nightly", "37.0a1", "*", ("20141208", "20141214")) %%capture def extract(ping): ping = json.loads(ping) uptime = ping["simpleMeasurements"]["uptime"] clientid = True if "clientID" in ping else False return (clientid, uptime) client_uptime = pings.map(extract) %%capture df = pd.DataFrame(client_uptime.collect(), columns=["has Client ID", "uptime"]) percentage = lambda xs: 100*len(xs)/df.shape[0] percentile75 = lambda xs: np.percentile(xs, 75) percentile95 = lambda xs: np.percentile(xs, 95) df = df[df["uptime"] >= 0] table = pd.pivot_table(df, index="has Client ID", values="uptime", aggfunc=[np.mean, np.median, percentile75, percentile95, len, percentage]) table.columns = ["mean", "50%", "75%", "95%", "# pings", "proportion"] table