import simplejson as json
import pandas as pd
import numpy as np
from moztelemetry.spark import get_pings
from __future__ import division
sc.defaultParallelism
16
%%capture
pings = get_pings(sc, "Firefox", "nightly", "37.0a1", "*", ("20141208", "20141214"))
%%capture
def extract(ping):
ping = json.loads(ping)
uptime = ping["simpleMeasurements"]["uptime"]
clientid = True if "clientID" in ping else False
return (clientid, uptime)
client_uptime = pings.map(extract)
%%capture
df = pd.DataFrame(client_uptime.collect(), columns=["has Client ID", "uptime"])
percentage = lambda xs: 100*len(xs)/df.shape[0]
percentile75 = lambda xs: np.percentile(xs, 75)
percentile95 = lambda xs: np.percentile(xs, 95)
df = df[df["uptime"] >= 0]
table = pd.pivot_table(df, index="has Client ID", values="uptime", aggfunc=[np.mean, np.median, percentile75, percentile95, len, percentage])
table.columns = ["mean", "50%", "75%", "95%", "# pings", "proportion"]
table
mean | 50% | 75% | 95% | # pings | proportion | |
---|---|---|---|---|---|---|
has Client ID | ||||||
False | 412.120798 | 1 | 1 | 122 | 201452 | 21.260482 |
True | 1200.948107 | 26 | 110 | 771 | 746090 | 78.739518 |