import binascii
import pandas as pd
from operator import attrgetter, itemgetter
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history
from collections import defaultdict
from __future__ import division
%pylab inline
Populating the interactive namespace from numpy and matplotlib
sc.defaultParallelism
80
Get all main pings for a set of recent build-ids:
build_ids = ("20150710000000", "20150717999999")
pings = get_pings(sc,
app="Firefox",
channel="nightly",
build_id=build_ids,
doc_type="main",
schema="v4")
crashes = get_pings(sc,
app="Firefox",
channel="nightly",
build_id=build_ids,
doc_type="crash",
schema="v4")
Take a subset of nightly clients:
def sample(ping):
client_id = ping.get("clientId", None)
return client_id and binascii.crc32(ping["clientId"]) % 100 < 10
sampled_pings = pings.filter(sample)
sampled_crashes = crashes.filter(sample)
crashes_by_client = sampled_crashes.map(lambda c: (c["clientId"], c["meta"])).groupByKey().collectAsMap()
Get a subset of fields:
subset = get_pings_properties(sampled_pings, ["clientId",
"meta/documentId",
"meta/submissionDate",
"meta/creationTimestamp",
"environment/system/os/name",
"payload/info/reason",
"payload/info/sessionId",
"payload/info/subsessionId",
"payload/info/previousSessionId",
"payload/info/previousSubsessionId",
"payload/info/subsessionCounter",
"payload/info/profileSubsessionCounter",
"payload/simpleMeasurements/firstPaint",
"payload/simpleMeasurements/savedPings",
"payload/simpleMeasurements/uptime",
"payload/histograms/STARTUP_CRASH_DETECTED"])
Group fragments by client and dedupe by documentId:
def dedupe_and_sort(group):
key, history = group
seen = set()
result = []
for fragment in history:
id = fragment["meta/documentId"]
if id in seen:
continue
seen.add(id)
result.append(fragment)
result.sort(key=itemgetter("payload/info/profileSubsessionCounter"))
return result
grouped = subset.groupBy(lambda x: x["clientId"]).map(dedupe_and_sort).collect()
< Digression> What's the percentage of clients that have at least one pair of fragments with different documentIds but the same profileSubsessionCounter?
def duplicate_pssc(grouped):
dupes = 0
dupe_clients = set()
for history in grouped:
counts = defaultdict(int)
for fragment in history:
key = fragment["payload/info/profileSubsessionCounter"]
counts[key] += 1
for _, v in counts.iteritems():
if v > 1:
dupes += 1
dupe_clients.add(history[0]["clientId"])
break
print 100.0*dupes/len(grouped)
return dupe_clients
dupe_clients = duplicate_pssc(grouped)
3.08981408746
< /Digression> Let's remove those clients to be safe.
dd_grouped = filter(lambda h: h[0]["clientId"] not in dupe_clients, grouped)
Given the set of chain breaks, how many of them are due to missing starting/ending fragments?
class AdjacentBreaks:
def __init__(self):
self.missing_total = 0
self.missing_start = 0
self.missing_end = 0
self.missing_both = 0
self.crashed_prev = 0
self.reason = defaultdict(int)
def process(self, prev, curr):
if prev["payload/info/sessionId"] == curr["payload/info/previousSessionId"]:
# Ignore fake missing fragments? See IncrementError class
if prev["payload/info/reason"] in ("aborted-session", "shutdown") and \
curr["payload/info/subsessionCounter"] == 1:
return
self.missing_total += 1
self.reason["{} -> {}".format(prev["payload/info/reason"], curr["payload/info/reason"])] += 1
# Are there missing starting fragments?
missing_start = curr["payload/info/subsessionCounter"] != 1
# Are there missing ending fragments?
missing_end = prev["payload/info/reason"] not in ("aborted-session", "shutdown")
if missing_start and missing_end:
self.missing_both += 1
elif missing_start:
self.missing_start += 1
elif missing_end:
self.missing_end += 1
self.crashed_prev += curr["payload/histograms/STARTUP_CRASH_DETECTED"] or has_crash_ping(prev, curr)
def stats(self, total):
print "ADJACENT SESSIONS STATS"
print "{:5.2f}% of edges have fragments missing".format(100*self.missing_total/total)
print "{:5.2f}% of edges are missing one or more starting fragments".format(100*self.missing_start/total)
print "{:5.2f}% of edges are missing one or more ending fragments".format(100*self.missing_end/total)
print "{:5.2f}% of edges are missing both starting and ending fragments".format(100*self.missing_both/total)
print "{:5.2f}% of edges have a crash in-between".format(100*self.crashed_prev/self.missing_total)
print ""
print "Reason distribution:"
print dict(self.reason)
print ""
class WithinBreaks:
def __init__(self):
self.missing_total = 0
self.crashed_prev = 0
self.reason = defaultdict(int)
def process(self, prev, curr):
if prev["payload/info/sessionId"] == curr["payload/info/sessionId"]:
self.missing_total += 1
self.reason["{} -> {}".format(prev["payload/info/reason"], curr["payload/info/reason"])] += 1
self.crashed_prev += curr["payload/histograms/STARTUP_CRASH_DETECTED"] or has_crash_ping(prev, curr)
def stats(self, total):
print "WITHIN SESSIONS STATS"
print "{:5.2f}% of edges have fragments missing".format(100*self.missing_total/total)
print "{:5.2f}% of edges have a crash in-between".format(100*self.crashed_prev/self.missing_total)
print ""
print "Reason distribution:"
print dict(self.reason)
print ""
class NonAdjacentBreaks:
def __init__(self):
self.missing_total = 0
self.reason = defaultdict(int)
self.difference = defaultdict(int)
self.crashed_prev = 0
def process(self, prev, curr):
if prev["payload/info/sessionId"] != curr["payload/info/sessionId"] and \
prev["payload/info/sessionId"] != curr["payload/info/previousSessionId"]:
self.missing_total += 1
self.reason["{} -> {}".format(prev["payload/info/reason"], curr["payload/info/reason"])] += 1
self.difference[curr["payload/info/profileSubsessionCounter"] - prev["payload/info/profileSubsessionCounter"]] += 1
self.crashed_prev += curr["payload/histograms/STARTUP_CRASH_DETECTED"] or has_crash_ping(prev, curr)
def stats(self, total):
print "NON-ADJACENT SESSIONS STATS"
print "{:5.2f}% of edges have fragments missing".format(100*self.missing_total/total)
print "{:5.2f}% of edges have a crash in-between".format(100*self.crashed_prev/self.missing_total)
print ""
print "Reason distribution:"
print dict(self.reason)
print ""
print "Difference distribution:"
dist = pd.Series(self.difference)
dist.sort_index()
print dist
print ""
class IncrementError:
def __init__(self):
self.errors_total = 0
self.reason = defaultdict(int)
def process(self, prev, curr):
if prev["payload/info/sessionId"] == curr["payload/info/previousSessionId"] and \
prev["payload/info/reason"] in ("aborted-session", "shutdown") and \
curr["payload/info/subsessionCounter"] == 1:
self.errors_total += 1
self.reason["{} -> {}".format(prev["payload/info/reason"], curr["payload/info/reason"])] += 1
def stats(self, total):
print "PROFILESUBSESSIONCOUNTER INCREMENT ERRORS"
print "{:5.2f}% of edges have a mismatching profileSubsessionCounter".format(100*self.errors_total/total)
print ""
print "Reason distribution:"
print dict(self.reason)
print ""
def has_crash_ping(prev, curr):
client_id = prev["clientId"]
client_crashes = crashes_by_client.get(client_id, None)
if client_crashes:
for crash in list(client_crashes):
if crash["creationTimestamp"] >= prev["meta/creationTimestamp"] and \
crash["creationTimestamp"] <= curr["meta/creationTimestamp"]:
return True
return False
def missing(grouped):
broken_clients = set()
correct_clients = set()
num_broken_chains = 0
num_crashed = 0
total_edges = 0
adjacent_breaks = AdjacentBreaks()
within_breaks = WithinBreaks()
non_adjacent_breaks = NonAdjacentBreaks()
increment_errors = IncrementError()
for history in grouped:
correct_clients.add(history[0]["clientId"])
for i in range(1, len(history)):
prev_fragment = history[i - 1]
prev_pss_counter = prev_fragment["payload/info/profileSubsessionCounter"]
curr_fragment = history[i]
current_pss_counter = curr_fragment["payload/info/profileSubsessionCounter"]
num_crashed += curr_fragment["payload/histograms/STARTUP_CRASH_DETECTED"] or has_crash_ping(prev_fragment, curr_fragment)
total_edges += 1
# Is a fragment missing?
if prev_pss_counter + 1 != current_pss_counter:
broken_clients.add(curr_fragment["clientId"])
num_broken_chains += 1
adjacent_breaks.process(prev_fragment, curr_fragment)
within_breaks.process(prev_fragment, curr_fragment)
non_adjacent_breaks.process(prev_fragment, curr_fragment)
increment_errors.process(prev_fragment, curr_fragment)
correct_clients = correct_clients.difference(broken_clients)
print "GENERAL STATS"
print "{:5.2f}% clients have a broken session chain".format(100*len(broken_clients)/len(grouped))
print "{:5.2f}% of clients with a missing fragment experienced at least one crash".format(100*len(broken_clients.intersection(crashes_by_client.keys()))/len(broken_clients))
print "{:5.2f}% of clients without a missing fragment experienced at least one crash".format(100*len(correct_clients.intersection(crashes_by_client.keys()))/len(correct_clients))
print "{:5.2f}% of edges have a crash in-between\n".format(100*num_crashed/total_edges)
increment_errors.stats(num_broken_chains)
adjacent_breaks.stats(num_broken_chains)
within_breaks.stats(num_broken_chains)
non_adjacent_breaks.stats(num_broken_chains)
missing(dd_grouped)
GENERAL STATS 4.51% clients have a broken session chain 23.65% of clients with a missing fragment experienced at least one crash 10.00% of clients without a missing fragment experienced at least one crash 2.44% of edges have a crash in-between PROFILESUBSESSIONCOUNTER INCREMENT ERRORS 3.72% of edges have a mismatching profileSubsessionCounter Reason distribution: {'aborted-session -> environment-change': 4, 'aborted-session -> shutdown': 10, 'aborted-session -> aborted-session': 6, 'aborted-session -> daily': 1} ADJACENT SESSIONS STATS 19.82% of edges have fragments missing 15.93% of edges are missing one or more starting fragments 3.89% of edges are missing one or more ending fragments 0.00% of edges are missing both starting and ending fragments 3.57% of edges have a crash in-between Reason distribution: {'shutdown -> shutdown': 31, 'environment-change -> shutdown': 5, 'daily -> shutdown': 10, 'environment-change -> environment-change': 1, 'daily -> daily': 5, 'environment-change -> daily': 1, 'shutdown -> environment-change': 2, 'aborted-session -> shutdown': 2, 'aborted-session -> aborted-session': 13, 'shutdown -> aborted-session': 42} WITHIN SESSIONS STATS 1.95% of edges have fragments missing 0.00% of edges have a crash in-between Reason distribution: {'daily -> daily': 1, 'daily -> aborted-session': 7, 'environment-change -> aborted-session': 3} NON-ADJACENT SESSIONS STATS 74.51% of edges have fragments missing 10.21% of edges have a crash in-between Reason distribution: {'shutdown -> shutdown': 334, 'aborted-session -> environment-change': 1, 'environment-change -> shutdown': 1, 'daily -> shutdown': 1, 'environment-change -> environment-change': 2, 'shutdown -> daily': 22, 'environment-change -> daily': 1, 'aborted-session -> daily': 2, 'shutdown -> environment-change': 12, 'aborted-session -> shutdown': 20, 'aborted-session -> aborted-session': 5, 'shutdown -> aborted-session': 20} Difference distribution: 2 210 3 55 4 28 5 16 6 8 7 5 8 8 9 7 10 4 11 3 12 1 13 1 14 4 15 5 16 2 ... 146 1 170 1 171 1 178 1 192 1 200 1 240 1 259 1 268 1 276 1 286 1 342 1 410 1 424 1 428 1 Length: 69, dtype: int64