import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import networkx as nx
import collections
import datetime
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client
# %pylab inline
Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['info'] `%matplotlib` prevents importing * from pylab and numpy
sc.defaultParallelism
64
pings = get_pings(sc, app="Firefox",
channel="nightly",
submission_date=("20150520","20150530"),
fraction=1,
schema="v4")
p = pings.first()
# p["payload"]["info"]
p.keys()
# p.get("payload",{}).get("info",{}).get("subsessionId",False)
# {k:p[k] for k in p.keys() if k!="main"}
[u'clientId', u'id', u'environment', u'application', u'version', 'meta', u'creationDate', u'type', u'payload']
#note that we have to filter out the 'meta' entry, b/c this can contain things like intake timestamps,
#which should be expected to change if the same ping is sent twice
pingsByPingId = pings \
.map(lambda p: (p.get("id","MISSING"),
[{k:p[k] for k in p.keys() if k!="meta"}]) ) \
.reduceByKey(lambda l1,l2: l1+l2)
pingsByPingId.cache()
Get the "payload/info" section recent pings from builds newer than 20150507000000, and which are not idle-daily pings, and group by clientId
info = get_pings_properties(pings, ["id",
"clientId",
"type",
"payload/info",
"environment/build/buildId"])
subsess = info.filter(
lambda p: ((p["payload/info"].get("reason", "idle-daily") != "idle-daily") and
(p["type"] == "main") and
(p["environment/build/buildId"]>"20150507000000")) )
# subsess.cache()
# numPings = subsess.count()
# numPings
clients = subsess.map(lambda p: (p.get("clientId","noId"),
[p["payload/info"]]) ) \
.reduceByKey(lambda l1,l2: l1+l2)
PythonRDD[11] at RDD at PythonRDD.scala:43
def dropDupeSubsessions(subsessList):
subsessIdsAdded = []
subsessListOut = []
for s in subsessList:
if s['subsessionId'] not in subsessIdsAdded:
subsessIdsAdded.append(s['subsessionId'])
subsessListOut.append(s)
return subsessListOut
cleanedClients = clients \
.map(lambda id_sesslist: (id_sesslist[0],dropDupeSubsessions(id_sesslist[1])) )
# cleanedClients.cache()
# numClients = cleanedClients.count()
# numClients
# cc = cleanedClients.first()
Once we've grouped by clientId, we can use the subSessionIds and previousSubsessionId pointer to construct a graph of the session, and then look at the degree of each node in the graph to see whether the graph looks the way we expect. Ideally, we should have a graph that has zero nodes with degree greater than 1 (no subsession should be pointed at by two or more sessions) and only one node with degree zero (the first subsession in the chain won't point to any prior subsession). Additionally, any given value of 'profileSubsessionCounter' should appear only once.
def sessGraphFromList(sessInfoList):
g = nx.DiGraph()
for s in sessInfoList:
# each s is a separate subsession submission "info" section
# have to clear these vars to prevent spillover from previous iter
subsessId = None
prevSubSess = None
try:
subsessId = s['subsessionId']
except KeyError:
continue
N = s.get("profileSubsessionCounter","NA")
g.add_node(subsessId,subSessNum=N)
prevSubSess = s.get('previousSubsessionId',None)
if prevSubSess:
g.add_node(prevSubSess)
g.add_edge(subsessId,prevSubSess)
return g
def degreeDistrib(g):
return collections.Counter(g.out_degree().values())
def subsessSummary(sessInfoList):
g = sessGraphFromList(sessInfoList)
d = degreeDistrib(g)
subSessCounterMult = collections.Counter(map(lambda x:x["profileSubsessionCounter"], sessInfoList))
return {"info":sorted(sessInfoList, key=lambda x:x["profileSubsessionCounter"]),
"graph":g,
"degDist":d,
"subSessCounterMult":subSessCounterMult}
# cc
(u'fe976713-23da-425e-a3b7-f2eda532f22c', [{u'addons': u'%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1', u'asyncPluginInit': False, u'flashVersion': u'17.0.0.169', u'previousSubsessionId': u'5e62b0fe-2b6e-41f0-9caa-cd7fe00b7905', u'profileSubsessionCounter': 26, u'reason': u'daily', u'revision': u'https://hg.mozilla.org/mozilla-central/rev/b9424d63fe35', u'sessionId': u'751eae58-97e0-48b6-927e-630e19ea6067', u'sessionStartDate': u'2015-05-29T00:00:00.0+02:00', u'subsessionCounter': 1, u'subsessionId': u'b9ec15b9-053f-4a6a-a358-5c7371620558', u'subsessionLength': 11785, u'subsessionStartDate': u'2015-05-29T00:00:00.0+02:00', u'timezoneOffset': 120}, {u'addons': u'%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1', u'asyncPluginInit': False, u'flashVersion': u'17.0.0.169', u'previousBuildId': u'20150520030205', u'previousSubsessionId': u'99c616b4-6965-44d4-8a7e-d3fe44317371', u'profileSubsessionCounter': 25, u'reason': u'shutdown', u'revision': u'https://hg.mozilla.org/mozilla-central/rev/b9424d63fe35', u'sessionId': u'8f60bec5-1115-491d-b285-34d076c0285f', u'sessionStartDate': u'2015-05-29T00:00:00.0+02:00', u'subsessionCounter': 1, u'subsessionId': u'5e62b0fe-2b6e-41f0-9caa-cd7fe00b7905', u'subsessionLength': 3910, u'subsessionStartDate': u'2015-05-29T00:00:00.0+02:00', u'timezoneOffset': 120}, {u'addons': u'%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1', u'asyncPluginInit': False, u'flashVersion': u'17.0.0.169', u'previousSubsessionId': u'cc99753e-311e-483e-8465-8c52ff11ebc8', u'profileSubsessionCounter': 20, u'reason': u'shutdown', u'revision': u'https://hg.mozilla.org/mozilla-central/rev/62d9b117c688', u'sessionId': u'66e93273-7b07-44c8-85e3-fa7c36ceaa9c', u'sessionStartDate': u'2015-05-18T00:00:00.0+02:00', u'subsessionCounter': 2, u'subsessionId': u'fdad78f2-6fe9-43f3-9e47-bb147afebd99', u'subsessionLength': 33290, u'subsessionStartDate': u'2015-05-18T00:00:00.0+02:00', u'timezoneOffset': 120}, {u'addons': u'%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1', u'asyncPluginInit': False, u'flashVersion': u'17.0.0.169', u'previousSubsessionId': u'fdad78f2-6fe9-43f3-9e47-bb147afebd99', u'profileSubsessionCounter': 21, u'reason': u'shutdown', u'revision': u'https://hg.mozilla.org/mozilla-central/rev/62d9b117c688', u'sessionId': u'15c3b65e-00f2-44c9-8946-79187522eb5f', u'sessionStartDate': u'2015-05-19T00:00:00.0+02:00', u'subsessionCounter': 1, u'subsessionId': u'1e994161-c014-4ae0-b410-57e1cda231cc', u'subsessionLength': 844, u'subsessionStartDate': u'2015-05-19T00:00:00.0+02:00', u'timezoneOffset': 120}, {u'addons': u'%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1', u'asyncPluginInit': False, u'flashVersion': u'17.0.0.169', u'previousBuildId': u'20150513030209', u'previousSubsessionId': u'1e994161-c014-4ae0-b410-57e1cda231cc', u'profileSubsessionCounter': 22, u'reason': u'shutdown', u'revision': u'https://hg.mozilla.org/mozilla-central/rev/2f6ea66057fe', u'sessionId': u'77d37251-6b7d-4418-ae33-2222c0008cbf', u'sessionStartDate': u'2015-05-20T00:00:00.0+02:00', u'subsessionCounter': 1, u'subsessionId': u'3a09aae1-c443-4194-adb4-bf7cc44e58c5', u'subsessionLength': 33, u'subsessionStartDate': u'2015-05-20T00:00:00.0+02:00', u'timezoneOffset': 120}, {u'addons': u'%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1', u'asyncPluginInit': False, u'flashVersion': u'17.0.0.169', u'previousBuildId': u'20150519030202', u'previousSubsessionId': u'b4245d65-ca11-4843-a037-db1e3e0e8021', u'profileSubsessionCounter': 24, u'reason': u'shutdown', u'revision': u'https://hg.mozilla.org/mozilla-central/rev/ac277e615f8f', u'sessionId': u'5dbb8ee9-2a6e-4c63-805f-7a6181dae5fd', u'sessionStartDate': u'2015-05-21T00:00:00.0+02:00', u'subsessionCounter': 1, u'subsessionId': u'99c616b4-6965-44d4-8a7e-d3fe44317371', u'subsessionLength': 501, u'subsessionStartDate': u'2015-05-21T00:00:00.0+02:00', u'timezoneOffset': 120}, {u'addons': u'%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1', u'asyncPluginInit': False, u'flashVersion': u'17.0.0.169', u'previousBuildId': u'20150517030204', u'previousSubsessionId': u'3a09aae1-c443-4194-adb4-bf7cc44e58c5', u'profileSubsessionCounter': 23, u'reason': u'shutdown', u'revision': u'https://hg.mozilla.org/mozilla-central/rev/4fb7ff694bf5', u'sessionId': u'e49e3cbf-5184-46bf-ba7b-e8fa7935e2fc', u'sessionStartDate': u'2015-05-20T00:00:00.0+02:00', u'subsessionCounter': 1, u'subsessionId': u'b4245d65-ca11-4843-a037-db1e3e0e8021', u'subsessionLength': 19947, u'subsessionStartDate': u'2015-05-20T00:00:00.0+02:00', u'timezoneOffset': 120}])
subsessSummaries = cleanedClients.map(lambda c: (c[0],subsessSummary(c[1])))
# subsessSummaries.cache()
# sss = subsessSummaries.take(1)
--------------------------------------------------------------------------- Py4JJavaError Traceback (most recent call last) <ipython-input-38-35e49ca3c093> in <module>() ----> 1 sss = subsessSummaries.take(1) /home/hadoop/spark/python/pyspark/rdd.py in take(self, num) 1119 1120 p = range(partsScanned, min(partsScanned + numPartsToTry, totalParts)) -> 1121 res = self.context.runJob(self, takeUpToNumLeft, p, True) 1122 1123 items += res /home/hadoop/spark/python/pyspark/context.py in runJob(self, rdd, partitionFunc, partitions, allowLocal) 825 # SparkContext#runJob. 826 mappedRDD = rdd.mapPartitions(partitionFunc) --> 827 it = self._jvm.PythonRDD.runJob(self._jsc.sc(), mappedRDD._jrdd, javaPartitions, allowLocal) 828 return list(mappedRDD._collect_iterator_through_file(it)) 829 /home/hadoop/anaconda/lib/python2.7/site-packages/py4j/java_gateway.pyc in __call__(self, *args) 536 answer = self.gateway_client.send_command(command) 537 return_value = get_return_value(answer, self.gateway_client, --> 538 self.target_id, self.name) 539 540 for temp_arg in temp_args: /home/hadoop/anaconda/lib/python2.7/site-packages/py4j/protocol.pyc in get_return_value(answer, gateway_client, target_id, name) 298 raise Py4JJavaError( 299 'An error occurred while calling {0}{1}{2}.\n'. --> 300 format(target_id, '.', name), value) 301 else: 302 raise Py4JError( Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.runJob. : org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 10.3 failed 4 times, most recent failure: Lost task 0.3 in stage 10.3 (TID 223299, ip-10-228-135-214.us-west-2.compute.internal): org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/mnt/var/lib/hadoop/tmp/nm-local-dir/usercache/hadoop/filecache/10/spark-assembly-1.2.1-hadoop2.4.0.jar/pyspark/worker.py", line 107, in main process() File "/mnt/var/lib/hadoop/tmp/nm-local-dir/usercache/hadoop/filecache/10/spark-assembly-1.2.1-hadoop2.4.0.jar/pyspark/worker.py", line 98, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/mnt/var/lib/hadoop/tmp/nm-local-dir/usercache/hadoop/filecache/10/spark-assembly-1.2.1-hadoop2.4.0.jar/pyspark/serializers.py", line 231, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/home/hadoop/spark/python/pyspark/rdd.py", line 1117, in takeUpToNumLeft yield next(iterator) File "<ipython-input-35-8cf34cf6d752>", line 1, in <lambda> File "<ipython-input-34-97af35f3d138>", line 29, in subsessSummary File "<ipython-input-34-97af35f3d138>", line 11, in sessGraphFromList TypeError: list indices must be integers, not str at org.apache.spark.api.python.PythonRDD$$anon$1.read(PythonRDD.scala:137) at org.apache.spark.api.python.PythonRDD$$anon$1.<init>(PythonRDD.scala:174) at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:96) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:280) at org.apache.spark.rdd.RDD.iterator(RDD.scala:247) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420) at akka.actor.Actor$class.aroundReceive(Actor.scala:465) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
badGraphSummaries = subsessSummaries.filter(
lambda id_summary: any([k>1 for k in id_summary[1]["degDist"].keys()]) \
or any([v>1 for k,v in id_summary[1]["degDist"].iteritems() if k!=1]) \
or any([v>1 for v in id_summary[1]["subSessCounterMult"].values()]) )
badGraphSummaries.cache()
numBadGraphs = badGraphSummaries.count()
numBadGraphs
32547
numBadGraphs/float(numClients)
0.33243450283438025
bg = badGraphSummaries.take(100)
def sessGraphFromList2(sessInfoList,byDate=True):
g = nx.DiGraph()
subSessIds = [s['subsessionId'] for s in sessInfoList]
numInferred = 0
nodePositions = []
for s in sessInfoList:
# each s is a separate subsession submission "info" section
# have to clear these vars to prevent spillover from previous iter
subsessId = None
prevSubSess = None
try:
subsessId = s['subsessionId']
except KeyError:
continue
N = s.get("profileSubsessionCounter",-1)
subSessCounter = s.get("subsessionCounter",-1)
#if a node has already been added with this (x,y) position, bump up the y position
if byDate:
dateStr = s.get("sessionStartDate","2015-01-01")[0:10]
dateNum = datetime.datetime.strptime(dateStr,"%Y-%m-%d").toordinal()
thisPosition = [dateNum,subSessCounter]
else:
thisPosition = [N,subSessCounter]
while thisPosition in nodePositions:
thisPosition[1]+=1
g.add_node(subsessId,subSessNum=N,x=thisPosition[0], y=thisPosition[1])
nodePositions.append(thisPosition)
prevSubSess = s.get('previousSubsessionId',None)
# if prevSubSess:
# #NB: adding these nodes allows us to infer the existence
# #of some sessions that have not actually been submitted.
# # Nodes added this way will not have any data attached to them.
# g.add_node(prevSubSess)
# g.add_edge(subsessId,prevSubSess)
if prevSubSess in subSessIds:
#NB: adding these nodes allows us to infer the existence
#of some sessions that have not actually been submitted.
# Nodes added this way will not have any data attached to them.
g.add_node(prevSubSess)
g.add_edge(subsessId,prevSubSess)
#add placement details for inferred nodes
for nodeId,nodeData in (tup for tup in g.nodes(data=True) if not tup[1]):
pred = g.predecessors(nodeId)[0]
nodeData["x"] = g.node[pred]["x"]-random.random()/3 +.1/.6
nodeData["y"] = g.node[pred]["y"]-1-random.random()
return g
def plotSessGraph(g):
pos = {n:[d['x'],d["y"]] for n,d in g.nodes(data=True)}
fig, ax = plt.subplots(1,figsize=(18,3), dpi=100)
G = nx.draw(g,pos=pos,ax=ax,node_size=300, node_color="w")
for sessId,nodeData in [(n,d) for n,d in g.nodes(data=True) if d]:
x,y = nodeData["x"],nodeData["y"]
subSessNum = nodeData.get("subSessNum","NA")
plt.text(x,y-.1,s=subSessNum
,horizontalalignment='center')
plt.show()
def plotSessGraphAndInfo(i_x):
i,x = i_x
sessInfoList = x[1]["info"]
g = sessGraphFromList2(sessInfoList)
dg = degreeDistrib(g)
print "############################################"
print "session graph index:", i
if any([k>1 for k in dg.keys()]) \
or any([v>1 for k,v in dg.iteritems() if k!=1]):
print "bad degree distribution: ",dg
subSessCounterMult = collections.Counter(map(lambda x:x["profileSubsessionCounter"], sessInfoList))
if any([v>1 for v in subSessCounterMult.values()]):
print "repeated profileSubsessionCounter(s): ", {c:m for c,m in subSessCounterMult.iteritems() if m>1}
# print
# print collections.Counter(map(lambda x:x["profileSubsessionCounter"], sessInfoList))
print "subsessions by date"
plotSessGraph(g)
print "subsessions by profileSubsessionCounter (x-axis)"
g = sessGraphFromList2(sessInfoList,byDate=False)
plotSessGraph(g)
In what follows, two plots are drawn for depicting the subsession histories of a sample of clients. For each client, we draw:
i=0
plotSessGraphAndInfo((i,bg[i]))
# bg[i]
############################################ session graph index: 0 bad degree distribution: Counter({1: 36, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
The following clients have had their profileSubsessionCounter reset at some point. Some of these also have missing subsessions.
resetClients = []
for i,clientData in enumerate(bg):
clientDates = [ss.get('sessionStartDate',0) for ss in clientData[1]["info"]]
if any([ (ss.get('profileSubsessionCounter',0)==1
and any(ss.get('sessionStartDate',None)>d for d in clientDates))
for ss in clientData[1]["info"]]):
print i
plotSessGraphAndInfo((i,clientData))
resetClients.append(i)
0 ############################################ session graph index: 0 bad degree distribution: Counter({1: 36, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
1 ############################################ session graph index: 1 bad degree distribution: Counter({1: 11, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
2 ############################################ session graph index: 2 bad degree distribution: Counter({1: 36, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
4 ############################################ session graph index: 4 bad degree distribution: Counter({1: 12, 0: 2}) repeated profileSubsessionCounter(s): {1: 2, 2: 2, 3: 2, 4: 2, 5: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
6 ############################################ session graph index: 6 bad degree distribution: Counter({1: 14, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
7 ############################################ session graph index: 7 bad degree distribution: Counter({1: 34, 0: 3}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
8 ############################################ session graph index: 8 bad degree distribution: Counter({0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
9 ############################################ session graph index: 9 bad degree distribution: Counter({1: 44, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
16 ############################################ session graph index: 16 bad degree distribution: Counter({1: 71, 0: 3}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
17 ############################################ session graph index: 17 bad degree distribution: Counter({1: 5, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
18 ############################################ session graph index: 18 bad degree distribution: Counter({1: 66, 0: 3}) repeated profileSubsessionCounter(s): {1: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
19 ############################################ session graph index: 19 bad degree distribution: Counter({1: 18, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
21 ############################################ session graph index: 21 bad degree distribution: Counter({1: 23, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
22 ############################################ session graph index: 22 bad degree distribution: Counter({1: 23, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
24 ############################################ session graph index: 24 bad degree distribution: Counter({1: 87, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
25 ############################################ session graph index: 25 bad degree distribution: Counter({1: 51, 0: 3}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
26 ############################################ session graph index: 26 bad degree distribution: Counter({1: 112, 0: 3}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
27 ############################################ session graph index: 27 bad degree distribution: Counter({1: 40, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
30 ############################################ session graph index: 30 bad degree distribution: Counter({1: 67, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
31 ############################################ session graph index: 31 bad degree distribution: Counter({1: 8, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
32 ############################################ session graph index: 32 bad degree distribution: Counter({1: 14, 0: 2}) repeated profileSubsessionCounter(s): {1: 2, 2: 2, 3: 2, 4: 2, 5: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
33 ############################################ session graph index: 33 bad degree distribution: Counter({1: 26, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
34 ############################################ session graph index: 34 bad degree distribution: Counter({1: 23, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
35 ############################################ session graph index: 35 bad degree distribution: Counter({1: 71, 0: 3}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
36 ############################################ session graph index: 36 bad degree distribution: Counter({1: 29, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
37 ############################################ session graph index: 37 bad degree distribution: Counter({1: 38, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
38 ############################################ session graph index: 38 bad degree distribution: Counter({1: 3, 0: 2}) repeated profileSubsessionCounter(s): {1: 2, 2: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
41 ############################################ session graph index: 41 bad degree distribution: Counter({1: 64, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
42 ############################################ session graph index: 42 bad degree distribution: Counter({1: 29, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
44 ############################################ session graph index: 44 bad degree distribution: Counter({1: 8, 0: 2}) repeated profileSubsessionCounter(s): {1: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
45 ############################################ session graph index: 45 bad degree distribution: Counter({1: 7, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
46 ############################################ session graph index: 46 bad degree distribution: Counter({1: 18, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
47 ############################################ session graph index: 47 bad degree distribution: Counter({1: 28, 0: 2}) repeated profileSubsessionCounter(s): {1: 2, 2: 2, 3: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
48 ############################################ session graph index: 48 bad degree distribution: Counter({1: 33, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
49 ############################################ session graph index: 49 bad degree distribution: Counter({1: 18, 0: 4}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
51 ############################################ session graph index: 51 bad degree distribution: Counter({1: 65, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
52 ############################################ session graph index: 52 bad degree distribution: Counter({1: 28, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
54 ############################################ session graph index: 54 bad degree distribution: Counter({1: 34, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
55 ############################################ session graph index: 55 bad degree distribution: Counter({1: 109, 0: 3}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
56 ############################################ session graph index: 56 bad degree distribution: Counter({1: 86, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
57 ############################################ session graph index: 57 bad degree distribution: Counter({0: 2, 1: 2}) repeated profileSubsessionCounter(s): {1: 2, 2: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
59 ############################################ session graph index: 59 bad degree distribution: Counter({1: 120, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
60 ############################################ session graph index: 60 bad degree distribution: Counter({1: 45, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
62 ############################################ session graph index: 62 bad degree distribution: Counter({1: 11, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
63 ############################################ session graph index: 63 bad degree distribution: Counter({1: 96, 0: 3}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
64 ############################################ session graph index: 64 bad degree distribution: Counter({1: 10, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
65 ############################################ session graph index: 65 bad degree distribution: Counter({1: 76, 0: 3}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
66 ############################################ session graph index: 66 bad degree distribution: Counter({1: 22, 0: 2}) repeated profileSubsessionCounter(s): {1: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
67 ############################################ session graph index: 67 bad degree distribution: Counter({1: 103, 0: 3}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
68 ############################################ session graph index: 68 bad degree distribution: Counter({1: 6, 0: 2}) repeated profileSubsessionCounter(s): {1: 2, 2: 2, 3: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
70 ############################################ session graph index: 70 bad degree distribution: Counter({1: 24, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
71 ############################################ session graph index: 71 bad degree distribution: Counter({1: 15, 0: 4}) repeated profileSubsessionCounter(s): {1: 3, 2: 3, 3: 3, 4: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
72 ############################################ session graph index: 72 bad degree distribution: Counter({1: 10, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
74 ############################################ session graph index: 74 bad degree distribution: Counter({1: 30, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
75 ############################################ session graph index: 75 bad degree distribution: Counter({1: 114, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
77 ############################################ session graph index: 77 bad degree distribution: Counter({1: 10, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
78 ############################################ session graph index: 78 bad degree distribution: Counter({1: 93, 0: 2}) repeated profileSubsessionCounter(s): {1: 2, 2: 2, 3: 2, 4: 2, 5: 2, 6: 2, 7: 2, 8: 2, 9: 2, 10: 2, 11: 2, 12: 2, 13: 2, 14: 2, 15: 2, 16: 2, 17: 2, 18: 2, 19: 2, 20: 2, 21: 2, 22: 2, 23: 2, 24: 2, 25: 2, 26: 2, 27: 2, 28: 2, 29: 2, 30: 2, 31: 2, 32: 2, 33: 2, 34: 2, 35: 2, 36: 2, 37: 2, 38: 2, 39: 2, 40: 2, 41: 2, 42: 2, 43: 2, 44: 2, 45: 2, 46: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
80 ############################################ session graph index: 80 bad degree distribution: Counter({1: 4, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
81 ############################################ session graph index: 81 bad degree distribution: Counter({1: 31, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
82 ############################################ session graph index: 82 bad degree distribution: Counter({1: 95, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
83 ############################################ session graph index: 83 bad degree distribution: Counter({1: 15, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
84 ############################################ session graph index: 84 bad degree distribution: Counter({0: 2}) repeated profileSubsessionCounter(s): {1: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
85 ############################################ session graph index: 85 bad degree distribution: Counter({1: 87, 0: 6}) repeated profileSubsessionCounter(s): {1: 3, 2: 2, 3: 2, 4: 2, 5: 2, 6: 2, 7: 2, 8: 2, 9: 2, 10: 3, 11: 3, 12: 2, 13: 2, 14: 2, 15: 2, 16: 2, 17: 2, 18: 2, 19: 2, 20: 2, 21: 2, 22: 2, 23: 2, 24: 2, 25: 2, 26: 2, 27: 2, 28: 2, 29: 2, 30: 2, 31: 2, 33: 2, 34: 2, 36: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
88 ############################################ session graph index: 88 bad degree distribution: Counter({1: 30, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
89 ############################################ session graph index: 89 bad degree distribution: Counter({1: 25, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
92 ############################################ session graph index: 92 bad degree distribution: Counter({1: 81, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
95 ############################################ session graph index: 95 bad degree distribution: Counter({0: 8, 1: 4}) repeated profileSubsessionCounter(s): {1: 3} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
96 ############################################ session graph index: 96 bad degree distribution: Counter({1: 72, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
97 ############################################ session graph index: 97 bad degree distribution: Counter({1: 29, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
98 ############################################ session graph index: 98 bad degree distribution: Counter({1: 40, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
99 ############################################ session graph index: 99 bad degree distribution: Counter({1: 27, 0: 3}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
Most of the other anomalies showing up here are simple gaps in the subsession history, but there are a couple more interesting examples, so please feel free to scrutinize. Number 73 is particularly interesting-- it may be a copied profile or something like that, or it may be another bug.
for i,clientData in enumerate(bg):
if i not in resetClients:
print i
plotSessGraphAndInfo((i,clientData))
3 ############################################ session graph index: 3 bad degree distribution: Counter({1: 3, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
5 ############################################ session graph index: 5 bad degree distribution: Counter({1: 26, 0: 3}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
10 ############################################ session graph index: 10 bad degree distribution: Counter({1: 31, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
11 ############################################ session graph index: 11 bad degree distribution: Counter({1: 8, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
12 ############################################ session graph index: 12 bad degree distribution: Counter({1: 3, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
13 ############################################ session graph index: 13 bad degree distribution: Counter({1: 40, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
14 ############################################ session graph index: 14 bad degree distribution: Counter({1: 14, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
15 ############################################ session graph index: 15 bad degree distribution: Counter({1: 31, 0: 3}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
20 ############################################ session graph index: 20 bad degree distribution: Counter({1: 6, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
23 ############################################ session graph index: 23 bad degree distribution: Counter({1: 45, 0: 2}) repeated profileSubsessionCounter(s): {112: 2, 113: 2, 114: 2} subsessions by date
subsessions by profileSubsessionCounter (x-axis)
28 ############################################ session graph index: 28 bad degree distribution: Counter({1: 14, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
29 ############################################ session graph index: 29 bad degree distribution: Counter({1: 73, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
39 ############################################ session graph index: 39 bad degree distribution: Counter({1: 23, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
40 ############################################ session graph index: 40 bad degree distribution: Counter({1: 5, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
43 ############################################ session graph index: 43 bad degree distribution: Counter({1: 32, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
50 ############################################ session graph index: 50 bad degree distribution: Counter({1: 45, 0: 5}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
53 ############################################ session graph index: 53 bad degree distribution: Counter({0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
58 ############################################ session graph index: 58 bad degree distribution: Counter({1: 15, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
61 ############################################ session graph index: 61 bad degree distribution: Counter({1: 68, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
69 ############################################ session graph index: 69 bad degree distribution: Counter({1: 7, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
73 ############################################ session graph index: 73 bad degree distribution: Counter({1: 39, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
76 ############################################ session graph index: 76 bad degree distribution: Counter({1: 45, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
79 ############################################ session graph index: 79 bad degree distribution: Counter({1: 34, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
86 ############################################ session graph index: 86 bad degree distribution: Counter({1: 18, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
87 ############################################ session graph index: 87 bad degree distribution: Counter({1: 45, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
90 ############################################ session graph index: 90 bad degree distribution: Counter({1: 3, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
91 ############################################ session graph index: 91 bad degree distribution: Counter({1: 5, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
93 ############################################ session graph index: 93 bad degree distribution: Counter({1: 9, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
94 ############################################ session graph index: 94 bad degree distribution: Counter({1: 64, 0: 2}) subsessions by date
subsessions by profileSubsessionCounter (x-axis)
i=4
plotSessGraphAndInfo((5,bg[i]))
keysToGet = [
"previousSubsessionId",
u'profileSubsessionCounter',
# u'reason',
# u'revision',
# u'sessionId',
u'sessionStartDate',
# u'subsessionCounter',
u'subsessionId']#,
# u'subsessionLength',
# u'subsessionStartDate']
def getKeysFromDict(d,keys):
return {k:d.get(k,"MISSING") for k in keys}
[getKeysFromDict(d,keysToGet) for d in bg[i][1]["info"]]
# bg[i][1]["info"]
5 bad degree distribution: Counter({1: 12, 0: 2}) repeated profileSubsessionCounter(s): {1: 2, 2: 2, 3: 2, 4: 2, 5: 2}
[{'previousSubsessionId': 'MISSING', u'profileSubsessionCounter': 1, u'sessionStartDate': u'2015-05-24T00:00:00.0+02:00', u'subsessionId': u'e3c6a031-a564-4148-833b-a0946b7e8dfe'}, {'previousSubsessionId': 'MISSING', u'profileSubsessionCounter': 1, u'sessionStartDate': u'2015-05-29T00:00:00.0+02:00', u'subsessionId': u'8d8b9500-1d2f-4ac1-80b6-1c6bd9d708ed'}, {'previousSubsessionId': u'e3c6a031-a564-4148-833b-a0946b7e8dfe', u'profileSubsessionCounter': 2, u'sessionStartDate': u'2015-05-24T00:00:00.0+02:00', u'subsessionId': u'dfbba49e-dda7-4194-be93-dd992c26395d'}, {'previousSubsessionId': u'8d8b9500-1d2f-4ac1-80b6-1c6bd9d708ed', u'profileSubsessionCounter': 2, u'sessionStartDate': u'2015-05-29T00:00:00.0+02:00', u'subsessionId': u'8a2c99a3-a6e4-4407-8a4b-3e0ad21cc73a'}, {'previousSubsessionId': u'dfbba49e-dda7-4194-be93-dd992c26395d', u'profileSubsessionCounter': 3, u'sessionStartDate': u'2015-05-25T00:00:00.0+02:00', u'subsessionId': u'837a6ae2-741c-49c9-93b7-bf5b5d800076'}, {'previousSubsessionId': u'8a2c99a3-a6e4-4407-8a4b-3e0ad21cc73a', u'profileSubsessionCounter': 3, u'sessionStartDate': u'2015-05-29T00:00:00.0+02:00', u'subsessionId': u'd762d328-a239-4c5c-aa91-bf60b9b8efdd'}, {'previousSubsessionId': u'd762d328-a239-4c5c-aa91-bf60b9b8efdd', u'profileSubsessionCounter': 4, u'sessionStartDate': u'2015-05-29T00:00:00.0+02:00', u'subsessionId': u'e40b7a12-ebe5-4ec4-af1b-4245f4cb1738'}, {'previousSubsessionId': u'837a6ae2-741c-49c9-93b7-bf5b5d800076', u'profileSubsessionCounter': 4, u'sessionStartDate': u'2015-05-25T00:00:00.0+02:00', u'subsessionId': u'f2cb54f6-7843-40e2-8528-fa70249709c8'}, {'previousSubsessionId': u'e40b7a12-ebe5-4ec4-af1b-4245f4cb1738', u'profileSubsessionCounter': 5, u'sessionStartDate': u'2015-05-30T00:00:00.0+02:00', u'subsessionId': u'b7b08e22-7347-4a52-95f8-ac154dda8966'}, {'previousSubsessionId': u'f2cb54f6-7843-40e2-8528-fa70249709c8', u'profileSubsessionCounter': 5, u'sessionStartDate': u'2015-05-26T00:00:00.0+02:00', u'subsessionId': u'6eb624c4-d26a-4db5-8df7-7d8fbcdaf222'}, {'previousSubsessionId': u'6eb624c4-d26a-4db5-8df7-7d8fbcdaf222', u'profileSubsessionCounter': 6, u'sessionStartDate': u'2015-05-26T00:00:00.0+02:00', u'subsessionId': u'3591a016-cafc-4374-862b-066fbffa11c3'}, {'previousSubsessionId': u'3591a016-cafc-4374-862b-066fbffa11c3', u'profileSubsessionCounter': 7, u'sessionStartDate': u'2015-05-27T00:00:00.0+02:00', u'subsessionId': u'dbecc8e0-9bef-4873-9c48-ce141d64bd20'}, {'previousSubsessionId': u'dbecc8e0-9bef-4873-9c48-ce141d64bd20', u'profileSubsessionCounter': 8, u'sessionStartDate': u'2015-05-28T00:00:00.0+02:00', u'subsessionId': u'adbb5249-67de-42f9-91d2-c14e4d65b6c1'}, {'previousSubsessionId': u'adbb5249-67de-42f9-91d2-c14e4d65b6c1', u'profileSubsessionCounter': 9, u'sessionStartDate': u'2015-05-29T00:00:00.0+02:00', u'subsessionId': u'bf230e47-6d3c-4f54-8393-8ccf382c6a7a'}]
datetime.datetime.strptime("2015-05-24","%Y-%m-%d").toordinal()
735742