import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import networkx as nx
import collections
from IPython.display import display,HTML
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client
%pylab inline
Populating the interactive namespace from numpy and matplotlib
import boto
conn = boto.connect_s3()
numCores = sc.defaultParallelism
numCores
64
outBucketName = "net-mozaws-prod-us-west-2-pipeline-analysis"
pathToOutput = "bcolloran/mergedDataPerClient/nightly/2015-07-09/8937clients/"
#list the files in the bucket
outBucket = conn.get_bucket(outBucketName) # Substitute in your bucket name
bl = outBucket.list(prefix=pathToOutput)
mergedDataSize = sum(key.size for key in bl)
print "mergedDataPerClient_repart data size:", mergedDataSize/(1.0*(10**9)),"GB"
list(bl)[-5:]
mergedDataPerClient_repart data size: 3.320365309 GB
[<Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/mergedDataPerClient/nightly/2015-07-09/8937clients/part-00195>, <Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/mergedDataPerClient/nightly/2015-07-09/8937clients/part-00196>, <Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/mergedDataPerClient/nightly/2015-07-09/8937clients/part-00197>, <Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/mergedDataPerClient/nightly/2015-07-09/8937clients/part-00198>, <Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/mergedDataPerClient/nightly/2015-07-09/8937clients/part-00199>]
#load the data--
# for a tiny sample, you can load one part: "part-00000"
# or you can do more--
# ten parts: part-0000*
# or 10% of parts: part-*0
# or all parts: part-*
pathToMergedData = "s3n://"+outBucketName+"/"+pathToOutput+"part-0000*"
print pathToMergedData
mergedData = sc.sequenceFile(pathToMergedData)
s3n://net-mozaws-prod-us-west-2-pipeline-analysis/bcolloran/mergedDataPerClient/nightly/2015-07-09/8937clients/part-0000*
mergedData.count()
443
mergeSample = mergedData.first()
oneClient = (mergeSample[0],json.loads(mergeSample[1]))
print oneClient[1].keys()
oneClient[1]["v4"][0].keys()
[u'v2', u'v4', u'clientId']
[u'payload/info', u'payload/simpleMeasurements/totalTime', u'payload/simpleMeasurements/sessionRestored', u'type', u'payload/simpleMeasurements/main', u'payload/simpleMeasurements/firstPaint', u'clientId', u'environment', u'application', u'payload/histograms/PLACES_PAGES_COUNT', u'version', u'payload/simpleMeasurements/activeTicks', u'meta/appUpdateChannel', u'creationDate', u'payload/histograms/PLACES_BOOKMARKS_COUNT', u'id', u'payload/keyedHistograms/SEARCH_COUNTS']
print oneClient[1]["v2"]["data"]["days"].values()[0]
print oneClient[1]["v2"]["data"]["days"].values()[0].get('org.mozilla.appSessions.previous',{})
# [d.get('org.mozilla.appSessions.previous',{}) for d in oneClient[1]["v2"]["data"]["days"].values()]
{u'org.mozilla.sync.sync': {u'activeProtocol': u'1.5', u'_v': 1, u'enabled': 0, u'preferredProtocol': u'1.5'}, u'org.mozilla.appSessions.previous': {u'cleanActiveTicks': [16, 329, 164], u'_v': 3, u'firstPaint': [527, 573, 1471], u'cleanTotalTime': [109, 11007, 40213], u'main': [55, 50, 159], u'sessionRestored': [715, 812, 1662]}, u'org.mozilla.appInfo.update': {u'_v': 1, u'enabled': 1, u'autoDownload': 1}, u'org.mozilla.appInfo.appinfo': {u'isBlocklistEnabled': 1, u'_v': 2, u'isDefaultBrowser': 0, u'isTelemetryEnabled': 0}, u'org.mozilla.places.places': {u'_v': 1, u'bookmarks': 22, u'pages': 34}, u'org.mozilla.appInfo.versions': {u'_v': 2, u'platformBuildID': [u'20150502030208'], u'appBuildID': [u'20150502030208']}, u'org.mozilla.searches.engines': {u'default': u'google', u'_v': 1}, u'org.mozilla.addons.counts': {u'_v': 2, u'theme': 2, u'extension': 2, u'plugin': 2}, u'org.mozilla.passwordmgr.passwordmgr': {u'numSavedPasswords': 0, u'_v': 2, u'enabled': 1, u'numTotalLoginsEncountered': 4}} {u'cleanActiveTicks': [16, 329, 164], u'_v': 3, u'firstPaint': [527, 573, 1471], u'cleanTotalTime': [109, 11007, 40213], u'main': [55, 50, 159], u'sessionRestored': [715, 812, 1662]}
"Session signature" are tuples of (main,firstPaint,sessionRestored). Supporting data includes the date and a few other items, including the values of activeTicks and totalTime.
Important note: in v2, we distinguish between "clean" and "aborted" sessions; on any day that has both a clean and an aborted session, it is not possible to know which values of activeticks and totalTime should be matched with which session. Therefore, we drop all v2 session signatures that are recorded on a day with both clean and aborted sessions.
Also, the version of getV4SessionSignatures in this notebook is different than the one used in e.g.
because in this case we don't want just the first subsession of each session
mf10 = mergedData.mapValues(json.loads).take(10)
mf10[0][1]["v4"][0].keys()
[u'payload/info', u'payload/simpleMeasurements/totalTime', u'payload/simpleMeasurements/sessionRestored', u'payload/histograms/PLACES_BOOKMARKS_COUNT', u'payload/simpleMeasurements/main', u'payload/simpleMeasurements/firstPaint', u'clientId', u'environment', u'application', u'version', u'payload/keyedHistograms/SEARCH_COUNTS', u'payload/simpleMeasurements/activeTicks', u'meta/appUpdateChannel', u'creationDate', u'type', u'id', u'payload/histograms/PLACES_PAGES_COUNT']
from bug https://bugzilla.mozilla.org/show_bug.cgi?id=1187054 , we need to look at payload/info/subsessionLength field
def getV4SessionSignatures(v4Subsessions):
# for subsessions with subsessionCounter==1, get the session signature
sessStarts = [{"date": s['payload/info']['sessionStartDate'][0:10],
"ssCounter": s['payload/info']['subsessionCounter'],
"profSsCounter": s['payload/info']['profileSubsessionCounter'],
"buildId": s['environment']['build']['buildId'],
"sig": (s['payload/simpleMeasurements/main'],
s['payload/simpleMeasurements/firstPaint'],
s['payload/simpleMeasurements/sessionRestored']),
"activeTicks":s['payload/simpleMeasurements/activeTicks'],
"totalTime":s['payload/simpleMeasurements/totalTime'],
"subsessionLength":s['payload/info']['subsessionLength'] }
for s in v4Subsessions]
return sessStarts
def getV2SessionSignatures(v2):
# get the data.days entries with session starts
sessStartDays = [(date, data.get('org.mozilla.appSessions.previous'))
for date,data in v2['data']['days'].items()
if data.get('org.mozilla.appSessions.previous',None) ]
# for each date with a session start, get the session signatures from that date
sessStarts = []
try:
for date,data in sessStartDays:
if ('cleanActiveTicks' in data.keys()) and ('abortedActiveTicks' in data.keys()):
pass
else:
if 'cleanActiveTicks' in data.keys():
activeTickKey = 'cleanActiveTicks'
totalTimeKey = 'cleanTotalTime'
else:
activeTickKey = 'abortedActiveTicks'
totalTimeKey = 'abortedTotalTime'
for i in range(len(data['main'])):
sessStarts.append(
{"date": date,
"sig": (data['main'][i],
data['firstPaint'][i],
data['sessionRestored'][i]),
"activeTicks":data[activeTickKey][i],
"totalTime":data[totalTimeKey][i]} )
except:
sessStarts = "ERROR"
return sessStarts
sessionSigs = mergedData.mapValues(json.loads)\
.mapValues(lambda x:{"v4sigs":getV4SessionSignatures(x["v4"]),
"v2sigs":getV2SessionSignatures(x["v2"])} )
sessionSigs.count()
443
ssf = sessionSigs.take(10)
def isIsoDateStr(x):
nums = set("0123456789")
try:
if ( (set(x[0:4]).issubset(nums))
and (x[4]=="-")
and (set(x[5:7]).issubset(nums))
and (x[7]=="-")
and (set(x[8:10]).issubset(nums)) ):
return True
else:
return False
except:
return False
print isIsoDateStr("0000-00-00"),isIsoDateStr("01000-00-00"),isIsoDateStr("w000-00-00")
def sessSortKey(dfRow):
if isIsoDateStr(dfRow["date_v2"]):
return dfRow["date_v2"],dfRow["profSsCounter"]
else:
return dfRow["date_v4"],dfRow["profSsCounter"]
True False False
def sumTimeAndTicksPerSessSig(v4DataFrame):
dfGrouped = v4DataFrame.groupby("sig")
dfAgg = dfGrouped.agg({"activeTicks":np.sum,
"totalTime":max,
"date":min,
"profSsCounter":min,
"ssCounter":min,
"buildId":min})
dfAgg["numSubsessions"] = dfGrouped["ssCounter"].agg(len)
dfAgg["summedTime"] = dfGrouped["subsessionLength"].agg(sum)
dfAgg["totalTimeMax"] = dfGrouped["totalTime"].agg(max)
dfAgg.reset_index(inplace=True)
return dfAgg
# """display info about v4 subsessions"""
# for i in range(len(ssf)):
# ssgs = ssf[i]
# dfv2 = pd.DataFrame(ssgs[1]["v2sigs"])
# dfv4 = pd.DataFrame(ssgs[1]["v4sigs"])
# # print dfv4.sort("profSsCounter")
# print dfv4.keys()
# dfv4Grouped = dfv4.groupby("sig")
# print dfv4Grouped
# for name, group in dfv4Grouped:
# if len(group)>1:
# group = group.sort("profSsCounter")
# print "sessionSig:", name
# display(group[['activeTicks','totalTime', 'profSsCounter', 'ssCounter','buildId']])
"""
In this case, since we are only interested in session signatures that appear
in both v2 and v2, we do an inner join.
Ultimately, we'll be most interested in session with numSubsessions > 1,
but we'll leave all subsessions on for now, b/c we should check that even
simple sessions composed of just one session have the matching values
for activeTicks and totalTime
"""
def mergedFrameFromSigLists(v2List,v4List):
outCols = ["sig","date_v2","date_v4",
"totalTime_v2","totalTime_v4","totalTimeMax","summedTime",
"activeTicks_v2","activeTicks_v4",
"ssCounter","profSsCounter","buildId","numSubsessions"]
if v2List and v4List:
try:
dfv2 = pd.DataFrame(v2List)
dfv4 = sumTimeAndTicksPerSessSig(pd.DataFrame(v4List))
df = pd.merge(dfv2,dfv4,how="inner",on="sig", suffixes=("_v2","_v4"))
if len(df)>0:
df["sortKey"] = df.apply(lambda x: sessSortKey(x),axis=1)
df.sort("sortKey",inplace=True)
# df.drop("sortKey", axis=1, inplace=True)
df.index = range(1,len(df)+1)
return df[outCols]
except:
return pd.DataFrame(columns=outCols)
return pd.DataFrame(columns=outCols)
colsToShow = [#"sig",
"numSubsessions",
"totalTime_v2","totalTimeMax","summedTime",
"deltaV2V4","deltaV2V4Sum",
"deltaV2V4Pct","deltaV2V4SumPct"
# "activeTicks_v2","activeTicks_v4",
# "totalTimeDiff","activeTicksDiff",
]
for i in range(len(ssf)):
mffsl = mergedFrameFromSigLists(ssf[i][1]["v2sigs"],ssf[i][1]["v4sigs"])
mffsl["deltaV2V4"] = mffsl["totalTime_v2"] - mffsl["totalTimeMax"]
mffsl["deltaV2V4Sum"] = mffsl["totalTime_v2"] - mffsl["summedTime"]
mffsl["deltaV2V4Pct"] = 1.0*(mffsl["totalTime_v2"] - mffsl["totalTimeMax"])/mffsl["totalTime_v2"]
mffsl["deltaV2V4SumPct"] = 1.0*(mffsl["totalTime_v2"] - mffsl["summedTime"])/mffsl["totalTime_v2"]
# mffsl["activeTicksDiff"] = mffsl["activeTicks_v2"] - mffsl["activeTicks_v4"]
mffsl.sort("numSubsessions",inplace=True)
if len(mffsl)>0:
display(mffsl[colsToShow])
print list(mffsl["deltaV2V4Pct"])
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 1 | 3837 | 3838 | 3836 | -1 | 1 | -0.000261 | 0.000261 |
2 | 1 | 8 | 8 | 7 | 0 | 1 | 0.000000 | 0.125000 |
3 | 1 | 20 | 20 | 19 | 0 | 1 | 0.000000 | 0.050000 |
[-0.00026062027625749283, 0.0, 0.0]
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 1 | 11156 | 11157 | 11070 | -1 | 86 | -0.000090 | 0.007709 |
18 | 1 | 93 | 94 | 80 | -1 | 13 | -0.010753 | 0.139785 |
17 | 1 | 6593 | 6594 | 6592 | -1 | 1 | -0.000152 | 0.000152 |
16 | 1 | 17260 | 17261 | 17259 | -1 | 1 | -0.000058 | 0.000058 |
15 | 1 | 68 | 69 | 67 | -1 | 1 | -0.014706 | 0.014706 |
14 | 1 | 2647 | 2647 | 2637 | 0 | 10 | 0.000000 | 0.003778 |
13 | 1 | 25115 | 25116 | 25078 | -1 | 37 | -0.000040 | 0.001473 |
12 | 1 | 28441 | 28442 | 28440 | -1 | 1 | -0.000035 | 0.000035 |
11 | 1 | 353 | 354 | 341 | -1 | 12 | -0.002833 | 0.033994 |
10 | 1 | 25751 | 25752 | 25751 | -1 | 0 | -0.000039 | 0.000000 |
9 | 1 | 472 | 475 | 444 | -3 | 28 | -0.006356 | 0.059322 |
8 | 1 | 24037 | 24038 | 24037 | -1 | 0 | -0.000042 | 0.000000 |
7 | 1 | 592 | 593 | 588 | -1 | 4 | -0.001689 | 0.006757 |
6 | 1 | 16259 | 16260 | 16254 | -1 | 5 | -0.000062 | 0.000308 |
5 | 1 | 20038 | 20038 | 20036 | 0 | 2 | 0.000000 | 0.000100 |
4 | 1 | 5767 | 5768 | 5760 | -1 | 7 | -0.000173 | 0.001214 |
3 | 1 | 25671 | 25672 | 25671 | -1 | 0 | -0.000039 | 0.000000 |
2 | 1 | 45 | 46 | 29 | -1 | 16 | -0.022222 | 0.355556 |
19 | 1 | 23856 | 23857 | 23855 | -1 | 1 | -0.000042 | 0.000042 |
20 | 1 | 388 | 389 | 375 | -1 | 13 | -0.002577 | 0.033505 |
[-8.9637863033345292e-05, -0.010752688172043012, -0.00015167602002123465, -5.7937427578215528e-05, -0.014705882352941176, 0.0, -3.9816842524387814e-05, -3.5160507717731446e-05, -0.0028328611898016999, -3.8833443361422857e-05, -0.0063559322033898309, -4.1602529433789573e-05, -0.0016891891891891893, -6.1504397564425851e-05, 0.0, -0.00017340038148083925, -3.8954462233648865e-05, -0.022222222222222223, -4.1918175720992621e-05, -0.0025773195876288659]
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 1 | 231 | 231 | 230 | 0 | 1 | 0.000000 | 0.004329 |
23 | 1 | 4510 | 4511 | 4509 | -1 | 1 | -0.000222 | 0.000222 |
22 | 1 | 923 | 924 | 923 | -1 | 0 | -0.001083 | 0.000000 |
21 | 1 | 789 | 790 | 789 | -1 | 0 | -0.001267 | 0.000000 |
20 | 1 | 634 | 635 | 634 | -1 | 0 | -0.001577 | 0.000000 |
19 | 1 | 15 | 16 | 13 | -1 | 2 | -0.066667 | 0.133333 |
18 | 1 | 3459 | 3460 | 3458 | -1 | 1 | -0.000289 | 0.000289 |
17 | 1 | 167 | 167 | 166 | 0 | 1 | 0.000000 | 0.005988 |
16 | 1 | 1002 | 1002 | 1001 | 0 | 1 | 0.000000 | 0.000998 |
15 | 1 | 2453 | 2454 | 2452 | -1 | 1 | -0.000408 | 0.000408 |
14 | 1 | 488 | 489 | 488 | -1 | 0 | -0.002049 | 0.000000 |
24 | 1 | 62 | 62 | 61 | 0 | 1 | 0.000000 | 0.016129 |
13 | 1 | 102 | 102 | 102 | 0 | 0 | 0.000000 | 0.000000 |
11 | 1 | 5764 | 5765 | 5764 | -1 | 0 | -0.000173 | 0.000000 |
10 | 1 | 36 | 37 | 36 | -1 | 0 | -0.027778 | 0.000000 |
9 | 1 | 11170 | 11171 | 11169 | -1 | 1 | -0.000090 | 0.000090 |
8 | 1 | 996 | 997 | 996 | -1 | 0 | -0.001004 | 0.000000 |
7 | 1 | 54 | 55 | 54 | -1 | 0 | -0.018519 | 0.000000 |
6 | 1 | 1107 | 1108 | 1107 | -1 | 0 | -0.000903 | 0.000000 |
5 | 1 | 1062 | 1062 | 1061 | 0 | 1 | 0.000000 | 0.000942 |
4 | 1 | 891 | 891 | 887 | 0 | 4 | 0.000000 | 0.004489 |
3 | 1 | 161 | 161 | 160 | 0 | 1 | 0.000000 | 0.006211 |
2 | 1 | 21 | 21 | 19 | 0 | 2 | 0.000000 | 0.095238 |
12 | 1 | 2315 | 2315 | 2315 | 0 | 0 | 0.000000 | 0.000000 |
25 | 1 | 447 | 448 | 447 | -1 | 0 | -0.002237 | 0.000000 |
[0.0, -0.00022172949002217295, -0.0010834236186348862, -0.0012674271229404308, -0.0015772870662460567, -0.066666666666666666, -0.00028910089621277829, 0.0, 0.0, -0.00040766408479412964, -0.0020491803278688526, 0.0, 0.0, -0.00017349063150589867, -0.027777777777777776, -8.952551477170994e-05, -0.001004016064257028, -0.018518518518518517, -0.00090334236675700087, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0022371364653243847]
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 1 | 62 | 63 | 60 | -1 | 2 | -0.016129 | 0.032258 |
72 | 1 | 100 | 100 | 96 | 0 | 4 | 0.000000 | 0.040000 |
70 | 1 | 9971 | 9973 | 9969 | -2 | 2 | -0.000201 | 0.000201 |
69 | 1 | 37 | 37 | 35 | 0 | 2 | 0.000000 | 0.054054 |
68 | 1 | 439 | 440 | 433 | -1 | 6 | -0.002278 | 0.013667 |
66 | 1 | 3567 | 3568 | 3566 | -1 | 1 | -0.000280 | 0.000280 |
65 | 1 | 19 | 20 | 17 | -1 | 2 | -0.052632 | 0.105263 |
64 | 1 | 210 | 210 | 204 | 0 | 6 | 0.000000 | 0.028571 |
61 | 1 | 84 | 84 | 78 | 0 | 6 | 0.000000 | 0.071429 |
74 | 1 | 301 | 301 | 295 | 0 | 6 | 0.000000 | 0.019934 |
59 | 1 | 18648 | 18648 | 18647 | 0 | 1 | 0.000000 | 0.000054 |
56 | 1 | 13 | 13 | 11 | 0 | 2 | 0.000000 | 0.153846 |
55 | 1 | 654 | 656 | 653 | -2 | 1 | -0.003058 | 0.001529 |
107 | 1 | 214 | 215 | 209 | -1 | 5 | -0.004673 | 0.023364 |
53 | 1 | 804 | 805 | 794 | -1 | 10 | -0.001244 | 0.012438 |
51 | 1 | 22 | 22 | 17 | 0 | 5 | 0.000000 | 0.227273 |
50 | 1 | 196 | 196 | 191 | 0 | 5 | 0.000000 | 0.025510 |
49 | 1 | 92 | 92 | 86 | 0 | 6 | 0.000000 | 0.065217 |
47 | 1 | 9309 | 9310 | 9304 | -1 | 5 | -0.000107 | 0.000537 |
58 | 1 | 138 | 138 | 135 | 0 | 3 | 0.000000 | 0.021739 |
45 | 1 | 1290 | 1290 | 1289 | 0 | 1 | 0.000000 | 0.000775 |
75 | 1 | 17 | 17 | 14 | 0 | 3 | 0.000000 | 0.176471 |
78 | 1 | 250 | 250 | 244 | 0 | 6 | 0.000000 | 0.024000 |
106 | 1 | 1818 | 1819 | 1813 | -1 | 5 | -0.000550 | 0.002750 |
104 | 1 | 128 | 129 | 123 | -1 | 5 | -0.007812 | 0.039062 |
103 | 1 | 1449 | 1450 | 1448 | -1 | 1 | -0.000690 | 0.000690 |
102 | 1 | 22 | 22 | 18 | 0 | 4 | 0.000000 | 0.181818 |
98 | 1 | 22 | 22 | 16 | 0 | 6 | 0.000000 | 0.272727 |
96 | 1 | 112 | 113 | 111 | -1 | 1 | -0.008929 | 0.008929 |
95 | 1 | 112 | 112 | 111 | 0 | 1 | 0.000000 | 0.008929 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
105 | 2 | 31407 | 31409 | 31405 | -2 | 2 | -0.000064 | 0.000064 |
11 | 2 | 96 | 97 | 94 | -1 | 2 | -0.010417 | 0.020833 |
83 | 2 | 262 | 262 | 259 | 0 | 3 | 0.000000 | 0.011450 |
42 | 2 | 16732 | 16734 | 16732 | -2 | 0 | -0.000120 | 0.000000 |
67 | 2 | 27457 | 27460 | 27453 | -3 | 4 | -0.000109 | 0.000146 |
71 | 2 | 63394 | 63396 | 63388 | -2 | 6 | -0.000032 | 0.000095 |
57 | 2 | 140 | 140 | 138 | 0 | 2 | 0.000000 | 0.014286 |
17 | 2 | 1688 | 1689 | 1686 | -1 | 2 | -0.000592 | 0.001185 |
77 | 2 | 27320 | 27321 | 27318 | -1 | 2 | -0.000037 | 0.000073 |
31 | 2 | 37722 | 37724 | 37721 | -2 | 1 | -0.000053 | 0.000027 |
92 | 3 | 11992 | 11993 | 11990 | -1 | 2 | -0.000083 | 0.000167 |
37 | 3 | 21589 | 21590 | 21584 | -1 | 5 | -0.000046 | 0.000232 |
34 | 3 | 3473 | 3474 | 3472 | -1 | 1 | -0.000288 | 0.000288 |
19 | 3 | 5933 | 5935 | 5926 | -2 | 7 | -0.000337 | 0.001180 |
2 | 3 | 6596 | 6597 | 6594 | -1 | 2 | -0.000152 | 0.000303 |
60 | 3 | 15296 | 15297 | 15294 | -1 | 2 | -0.000065 | 0.000131 |
101 | 4 | 29425 | 29427 | 29420 | -2 | 5 | -0.000068 | 0.000170 |
46 | 4 | 30920 | 30922 | 30918 | -2 | 2 | -0.000065 | 0.000065 |
9 | 4 | 8008 | 8009 | 7999 | -1 | 9 | -0.000125 | 0.001124 |
48 | 4 | 29144 | 29147 | 29140 | -3 | 4 | -0.000103 | 0.000137 |
80 | 4 | 25304 | 25305 | 25298 | -1 | 6 | -0.000040 | 0.000237 |
3 | 4 | 6372 | 6373 | 6366 | -1 | 6 | -0.000157 | 0.000942 |
38 | 5 | 3242 | 3243 | 3236 | -1 | 6 | -0.000308 | 0.001851 |
23 | 5 | 45000 | 45002 | 44996 | -2 | 4 | -0.000044 | 0.000089 |
90 | 5 | 21249 | 21250 | 21244 | -1 | 5 | -0.000047 | 0.000235 |
52 | 5 | 18866 | 18867 | 18863 | -1 | 3 | -0.000053 | 0.000159 |
27 | 5 | 44085 | 44088 | 44083 | -3 | 2 | -0.000068 | 0.000045 |
12 | 6 | 55154 | 55156 | 55153 | -2 | 1 | -0.000036 | 0.000018 |
8 | 6 | 19197 | 19199 | 19191 | -2 | 6 | -0.000104 | 0.000313 |
73 | 8 | 52368 | 52372 | 52367 | -4 | 1 | -0.000076 | 0.000019 |
108 rows × 8 columns
[-0.016129032258064516, 0.0, -0.00020058168689198675, 0.0, -0.0022779043280182231, -0.00028034763106251753, -0.052631578947368418, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0030581039755351682, -0.0046728971962616819, -0.0012437810945273632, 0.0, 0.0, 0.0, -0.0001074229240519927, 0.0, 0.0, 0.0, 0.0, -0.00055005500550055003, -0.0078125, -0.00069013112491373362, 0.0, 0.0, -0.0089285714285714281, 0.0, -0.083333333333333329, 0.0, -0.0086956521739130436, -0.0097087378640776691, -0.037037037037037035, -0.0013440860215053765, -0.00059916117435590175, -0.00031279324366593683, -0.013888888888888888, -0.0046728971962616819, -0.040000000000000001, -0.050000000000000003, -0.030303030303030304, 0.0, 0.0, 0.0, 0.0, -0.010869565217391304, 0.0, -0.028571428571428571, -0.0049261083743842365, -0.00040112314480545525, -0.00030284675953967292, -0.0035211267605633804, -0.029411764705882353, -0.0064516129032258064, -0.0068965517241379309, -0.00024752475247524753, -0.071428571428571425, -0.00063694267515923564, 0.0, -0.016393442622950821, -0.041666666666666664, -6.2453160129902576e-05, -0.0065789473684210523, -0.0097087378640776691, -0.055555555555555552, -0.041666666666666664, -0.0025510204081632651, -0.00028968713789107763, -0.00013836042891732966, -0.001488095238095238, 0.0, -2.5485823510672188e-05, -6.6024032747920237e-05, 0.0, -8.009611533840609e-05, -0.00011346873936230568, -6.3680071321679885e-05, -0.010416666666666666, 0.0, -0.00011953143676786995, -0.00010926175474378118, -3.1548727008865195e-05, 0.0, -0.00059241706161137445, -3.6603221083455344e-05, -5.3019458141137794e-05, -8.3388925950633749e-05, -4.6319885126684886e-05, -0.00028793550244745177, -0.00033709758975223328, -0.00015160703456640388, -6.5376569037656898e-05, -6.7969413763806284e-05, -6.4683053040103495e-05, -0.00012487512487512488, -0.00010293713972001099, -3.951944356623459e-05, -0.00015693659761456373, -0.00030845157310302283, -4.4444444444444447e-05, -4.706103816650195e-05, -5.300540655146825e-05, -6.8050357264375636e-05, -3.6262102476701596e-05, -0.00010418294525186227, -7.6382523678582338e-05]
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 1 | 16063 | 16064 | 16060 | -1 | 3 | -0.000062 | 0.000187 |
2 | 1 | 6340 | 6341 | 6339 | -1 | 1 | -0.000158 | 0.000158 |
3 | 1 | 34717 | 34718 | 34716 | -1 | 1 | -0.000029 | 0.000029 |
4 | 1 | 23505 | 23506 | 23504 | -1 | 1 | -0.000043 | 0.000043 |
5 | 1 | 15047 | 15048 | 15046 | -1 | 1 | -0.000066 | 0.000066 |
6 | 1 | 684 | 685 | 683 | -1 | 1 | -0.001462 | 0.001462 |
7 | 1 | 5012 | 5013 | 5008 | -1 | 4 | -0.000200 | 0.000798 |
8 | 1 | 2794 | 2795 | 2792 | -1 | 2 | -0.000358 | 0.000716 |
9 | 1 | 38412 | 38415 | 38401 | -3 | 11 | -0.000078 | 0.000286 |
10 | 1 | 2263 | 2265 | 2262 | -2 | 1 | -0.000884 | 0.000442 |
11 | 1 | 18163 | 18164 | 18162 | -1 | 1 | -0.000055 | 0.000055 |
12 | 1 | 29438 | 29439 | 29437 | -1 | 1 | -0.000034 | 0.000034 |
13 | 1 | 349 | 350 | 344 | -1 | 5 | -0.002865 | 0.014327 |
14 | 1 | 4481 | 4481 | 4479 | 0 | 2 | 0.000000 | 0.000446 |
15 | 1 | 14138 | 14141 | 14137 | -3 | 1 | -0.000212 | 0.000071 |
[-6.2254871443690474e-05, -0.00015772870662460569, -2.8804332171558601e-05, -4.2544139544777708e-05, -6.6458430251877454e-05, -0.0014619883040935672, -0.00019952114924181964, -0.00035790980672870435, -7.8100593564511088e-05, -0.00088378258948298722, -5.5056983978417663e-05, -3.3969699028466607e-05, -0.0028653295128939827, 0.0, -0.0002121940868581129]
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 1 | 10 | 11 | 9 | -1 | 1 | -0.100000 | 0.100000 |
26 | 1 | 220 | 220 | 219 | 0 | 1 | 0.000000 | 0.004545 |
27 | 1 | 9227 | 9227 | 9226 | 0 | 1 | 0.000000 | 0.000108 |
28 | 1 | 2157 | 2158 | 2157 | -1 | 0 | -0.000464 | 0.000000 |
29 | 1 | 2610 | 2611 | 2609 | -1 | 1 | -0.000383 | 0.000383 |
30 | 1 | 459 | 459 | 458 | 0 | 1 | 0.000000 | 0.002179 |
31 | 1 | 30 | 31 | 27 | -1 | 3 | -0.033333 | 0.100000 |
33 | 1 | 2310 | 2310 | 2308 | 0 | 2 | 0.000000 | 0.000866 |
34 | 1 | 6616 | 6617 | 6616 | -1 | 0 | -0.000151 | 0.000000 |
35 | 1 | 1930 | 1931 | 1924 | -1 | 6 | -0.000518 | 0.003109 |
36 | 1 | 40 | 41 | 33 | -1 | 7 | -0.025000 | 0.175000 |
37 | 1 | 149 | 150 | 145 | -1 | 4 | -0.006711 | 0.026846 |
38 | 1 | 400 | 401 | 400 | -1 | 0 | -0.002500 | 0.000000 |
39 | 1 | 9483 | 9483 | 9483 | 0 | 0 | 0.000000 | 0.000000 |
40 | 1 | 26222 | 26223 | 26222 | -1 | 0 | -0.000038 | 0.000000 |
41 | 1 | 118 | 119 | 98 | -1 | 20 | -0.008475 | 0.169492 |
43 | 1 | 8581 | 8582 | 8577 | -1 | 4 | -0.000117 | 0.000466 |
44 | 1 | 36691 | 36691 | 36690 | 0 | 1 | 0.000000 | 0.000027 |
45 | 1 | 4473 | 4474 | 4473 | -1 | 0 | -0.000224 | 0.000000 |
46 | 1 | 33143 | 33144 | 33141 | -1 | 2 | -0.000030 | 0.000060 |
25 | 1 | 908 | 909 | 907 | -1 | 1 | -0.001101 | 0.001101 |
47 | 1 | 20447 | 20447 | 20441 | 0 | 6 | 0.000000 | 0.000293 |
24 | 1 | 707 | 707 | 705 | 0 | 2 | 0.000000 | 0.002829 |
22 | 1 | 981 | 982 | 981 | -1 | 0 | -0.001019 | 0.000000 |
2 | 1 | 10776 | 10776 | 10775 | 0 | 1 | 0.000000 | 0.000093 |
3 | 1 | 7 | 8 | 7 | -1 | 0 | -0.142857 | 0.000000 |
4 | 1 | 7 | 8 | 7 | -1 | 0 | -0.142857 | 0.000000 |
5 | 1 | 9 | 10 | 9 | -1 | 0 | -0.111111 | 0.000000 |
6 | 1 | 6 | 7 | 6 | -1 | 0 | -0.166667 | 0.000000 |
7 | 1 | 817 | 817 | 816 | 0 | 1 | 0.000000 | 0.001224 |
8 | 1 | 134 | 135 | 134 | -1 | 0 | -0.007463 | 0.000000 |
9 | 1 | 16453 | 16454 | 16453 | -1 | 0 | -0.000061 | 0.000000 |
10 | 1 | 55 | 56 | 50 | -1 | 5 | -0.018182 | 0.090909 |
23 | 1 | 60 | 61 | 58 | -1 | 2 | -0.016667 | 0.033333 |
11 | 1 | 3856 | 3857 | 3856 | -1 | 0 | -0.000259 | 0.000000 |
13 | 1 | 115 | 116 | 107 | -1 | 8 | -0.008696 | 0.069565 |
14 | 1 | 160 | 162 | 160 | -2 | 0 | -0.012500 | 0.000000 |
15 | 1 | 726 | 727 | 724 | -1 | 2 | -0.001377 | 0.002755 |
16 | 1 | 28 | 29 | 28 | -1 | 0 | -0.035714 | 0.000000 |
17 | 1 | 30547 | 30548 | 30547 | -1 | 0 | -0.000033 | 0.000000 |
18 | 1 | 1610 | 1611 | 1608 | -1 | 2 | -0.000621 | 0.001242 |
19 | 1 | 14 | 14 | 13 | 0 | 1 | 0.000000 | 0.071429 |
20 | 1 | 882 | 882 | 882 | 0 | 0 | 0.000000 | 0.000000 |
21 | 1 | 315 | 315 | 314 | 0 | 1 | 0.000000 | 0.003175 |
12 | 1 | 25536 | 25537 | 25535 | -1 | 1 | -0.000039 | 0.000039 |
48 | 1 | 3077 | 3078 | 3074 | -1 | 3 | -0.000325 | 0.000975 |
32 | 3 | 38700 | 38701 | 38700 | -1 | 0 | -0.000026 | 0.000000 |
42 | 3 | 133504 | 133506 | 133503 | -2 | 1 | -0.000015 | 0.000007 |
[-0.10000000000000001, 0.0, 0.0, -0.00046360686138154843, -0.00038314176245210729, 0.0, -0.033333333333333333, 0.0, -0.00015114873035066505, -0.00051813471502590671, -0.025000000000000001, -0.0067114093959731542, -0.0025000000000000001, 0.0, -3.813591640607124e-05, -0.0084745762711864406, -0.0001165365342034728, 0.0, -0.000223563603845294, -3.0172283740156291e-05, -0.0011013215859030838, 0.0, 0.0, -0.0010193679918450561, 0.0, -0.14285714285714285, -0.14285714285714285, -0.1111111111111111, -0.16666666666666666, 0.0, -0.007462686567164179, -6.0779189205616e-05, -0.018181818181818181, -0.016666666666666666, -0.00025933609958506224, -0.0086956521739130436, -0.012500000000000001, -0.0013774104683195593, -0.035714285714285712, -3.2736438930173173e-05, -0.00062111801242236027, 0.0, 0.0, 0.0, -3.9160401002506263e-05, -0.00032499187520311994, -2.5839793281653746e-05, -1.4980824544582933e-05]
Since totalTime is a running total of the time recorded so far during a client session, the maximum value across v4 subsession pings belonging to a client session should match the value recorded in v2.
Also, scale up to a the rest of the sample.
#load the data--
# for a tiny sample, you can load one part: "part-00000"
# or you can do more--
# ten parts: part-0000*
# or 10% of parts: part-*0
# or all parts: part-*
pathToMergedData_all = "s3n://"+outBucketName+"/"+pathToOutput+"part-*"
print pathToMergedData_all
mergedData_all = sc.sequenceFile(pathToMergedData_all)
s3n://net-mozaws-prod-us-west-2-pipeline-analysis/bcolloran/mergedDataPerClient/nightly/2015-07-09/8937clients/part-*
sessionSigs_all = mergedData_all.mapValues(json.loads)\
.mapValues(lambda x:{"v4sigs":getV4SessionSignatures(x["v4"]),
"v2sigs":getV2SessionSignatures(x["v2"])} )
def getPctDiffsV2VsMaxTotalTime(df):
if len(df)>0:
return list(1.0*(df["totalTime_v2"] - df["totalTimeMax"])
/df["totalTime_v2"] )
else:
return []
mergedFrames_all = sessionSigs_all.mapValues(lambda x: mergedFrameFromSigLists(x["v2sigs"], x["v4sigs"]) )
pctDiffsV2VsMaxTotalTime = mergedFrames_all \
.flatMapValues(getPctDiffsV2VsMaxTotalTime)\
.values()
sinhBins = [np.sinh((i-100)/40.0) for i in range(201)]
histOfDeltaV2V4Pct = pctDiffsV2VsMaxTotalTime\
.histogram( sinhBins )
# histOfDeltaV2V4Pct
binEdges = histOfDeltaV2V4Pct[0]
binVals = histOfDeltaV2V4Pct[1]
plotX = reduce(lambda x,y:x+y,[binEdges[i:i+2] for i in range(len(binEdges)-1)] )
plotY = reduce(lambda x,y:x+y, map(lambda x: [x,x],binVals))
ax = plt.subplot(1,1,1)
# ax.plot(binEdges[:-1],binVals)
ax.plot(plotX,plotY)
plt.yscale('log')
plt.xscale('symlog')
binEdges = histOfDeltaV2V4Pct[0]
binVals = histOfDeltaV2V4Pct[1]
plotX = binEdges[:-1]
plotY = (1.0*np.cumsum(binVals))/sum(binVals)
ax = plt.subplot(1,1,1)
# ax.plot(binEdges[:-1],binVals)
ax.plot(plotX,plotY)
# plt.yscale('log')
plt.xscale('symlog')
plt.xlabel("pct difference of v4 from v2")
plt.ylabel("cumulative proportion")
<matplotlib.text.Text at 0x7f82621097d0>
So the above plot shows the ECDF across the population of sessions [1] of the percentage difference between the value for totalTime recorded in v2 vs v4, more specifically:
(totalTime_v2 - totalTime_v4)/totalTime_v2
Where totalTime_v4 is the maximum value of totalTime recorded in any of the sessions with this session signature.
This looks mostly OK. In the vast majority of cases the values are off by only a few ms (maybe about 90%), and the percentage difference is miniscule in most cases. Maybe about 1% of sessions have recorded values that differ by by more than 25% between v2 and v4.
The notably weird thing is that to the extent the number are off, we have them skewing negative (hardly any of the differences are positive). That means that totalTime_v2 < totalTime_v4. This strikes me as kind of strange -- if anything, I'd have expected to see a lot of cases of totalTime_v2 > totalTime_v4 (because if the last session is missing, then the v4 data sent back to our servers will not have any records of the last bits of the session. This may indicate problems with totalTime values in v4 that are too big.
Things to look into next:
[1] Remember that we have dropped sessions from some days. See the comment above. Also, we are only considering session that have matching signatures in the v2 and v4 data
Do the discrepancies seen above amount to a substantial difference when aggregated?
mfa = mergedFrames_all.first()
mfa
(u'5eb4f5f0-5264-4860-b10d-0a5a76602a46', Empty DataFrame Columns: [sig, date_v2, date_v4, totalTime_v2, totalTime_v4, totalTimeMax, summedTime, activeTicks_v2, activeTicks_v4, ssCounter, profSsCounter, buildId, numSubsessions] Index: [])
def getTotalTimeSums(df):
if len(df)>0:
return {"totalTime_v2":sum(df["totalTime_v2"]),
"totalTimeMax":sum(df["totalTimeMax"]) }
else:
return {"totalTime_v2":0,"totalTimeMax":0}
def totalTimeAdder(tt1,tt2):
return {"totalTime_v2": tt1["totalTime_v2"]+tt2["totalTime_v2"],
"totalTimeMax": tt1["totalTimeMax"]+tt2["totalTimeMax"] }
totalTimesAggregated = mergedFrames_all.values()\
.map(getTotalTimeSums) \
.reduce(lambda x,y:totalTimeAdder(x,y))
print totalTimesAggregated
print 1.0*(totalTimesAggregated["totalTime_v2"] - totalTimesAggregated["totalTimeMax"])/totalTimesAggregated["totalTime_v2"]
print 1.0*(totalTimesAggregated["totalTimeMax"])/totalTimesAggregated["totalTime_v2"]
{'totalTime_v2': 565012069, 'totalTimeMax': 1792076708} -2.17174943036 3.17174943036
Wow, so this is waaaay off. The sum of the per-session max of totalTime is more than 3x the value from v2. That's kind of crazy.
It seems like this might come from the tail of the distribution above-- The cases where totalTime_v2 << totalTime_v4. Let's see if we can look at a few examples likt that.
def totalTimeMismatch(df):
return 3*sum(df["totalTime_v2"]) < sum(df["totalTimeMax"])
totalTimeMismatchFrames = mergedFrames_all.filter(lambda id_df: totalTimeMismatch(id_df[1]))
ttmf = totalTimeMismatchFrames.take(10)
ttmf
colsToShow = [#"sig",
"numSubsessions",
"totalTime_v2","totalTimeMax","summedTime",
"deltaV2V4","deltaV2V4Sum",
"deltaV2V4Pct","deltaV2V4SumPct"
# "activeTicks_v2","activeTicks_v4",
# "totalTimeDiff","activeTicksDiff",
]
for i in range(len(ttmf)):
mffsl = ttmf[i][1]
mffsl["deltaV2V4"] = mffsl["totalTime_v2"] - mffsl["totalTimeMax"]
mffsl["deltaV2V4Sum"] = mffsl["totalTime_v2"] - mffsl["summedTime"]
mffsl["deltaV2V4Pct"] = 1.0*(mffsl["totalTime_v2"] - mffsl["totalTimeMax"])/mffsl["totalTime_v2"]
mffsl["deltaV2V4SumPct"] = 1.0*(mffsl["totalTime_v2"] - mffsl["summedTime"])/mffsl["totalTime_v2"]
# mffsl["activeTicksDiff"] = mffsl["activeTicks_v2"] - mffsl["activeTicks_v4"]
mffsl.sort("numSubsessions",inplace=True)
if len(mffsl)>0:
display(mffsl[colsToShow])
print sum(mffsl["totalTime_v2"]), sum(mffsl["totalTimeMax"])
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 1 | 683 | 684 | 666 | -1 | 17 | -0.001464 | 0.024890 |
2 | 1 | 934 | 17152 | 8 | -16218 | 926 | -17.364026 | 0.991435 |
3 | 1 | 1707 | 6519 | 6323 | -4812 | -4616 | -2.818981 | -2.704159 |
3324 24355
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 1 | 6748 | 6748 | 6743 | 0 | 5 | 0.000000 | 0.000741 |
2 | 1 | 557 | 557 | 542 | 0 | 15 | 0.000000 | 0.026930 |
3 | 1 | 905 | 906 | 905 | -1 | 0 | -0.001105 | 0.000000 |
4 | 1 | 92 | 93 | 92 | -1 | 0 | -0.010870 | 0.000000 |
5 | 1 | 1777 | 1778 | 1777 | -1 | 0 | -0.000563 | 0.000000 |
7 | 1 | 16953 | 16954 | 16951 | -1 | 2 | -0.000059 | 0.000118 |
8 | 1 | -2330663 | -2330661 | -2330662 | -2 | -1 | 0.000001 | 0.000000 |
9 | 1 | 34 | 11435 | 11433 | -11401 | -11399 | -335.323529 | -335.264706 |
6 | 2 | 2277 | 2278 | 2271 | -1 | 6 | -0.000439 | 0.002635 |
-2301320 -2289912
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 1 | 1740 | 1741 | 1740 | -1 | 0 | -5.747126e-04 | 0.000000e+00 |
6 | 1 | 256 | 256 | 255 | 0 | 1 | 0.000000e+00 | 3.906250e-03 |
7 | 1 | 626 | 626 | 625 | 0 | 1 | 0.000000e+00 | 1.597444e-03 |
17 | 1 | 2833 | 2833 | 2832 | 0 | 1 | 0.000000e+00 | 3.529827e-04 |
5 | 1 | 4306 | 4307 | 4306 | -1 | 0 | -2.322341e-04 | 0.000000e+00 |
10 | 1 | -46387243 | -46387242 | -46387244 | -1 | 1 | 2.155765e-08 | -2.155765e-08 |
3 | 1 | 3334 | 3335 | 3334 | -1 | 0 | -2.999400e-04 | 0.000000e+00 |
2 | 1 | 5290 | 5291 | 5290 | -1 | 0 | -1.890359e-04 | 0.000000e+00 |
15 | 1 | 73 | 74 | 73 | -1 | 0 | -1.369863e-02 | 0.000000e+00 |
16 | 1 | 7447 | 7448 | 7446 | -1 | 1 | -1.342823e-04 | 1.342823e-04 |
41 | 1 | 7546 | 7547 | 7546 | -1 | 0 | -1.325205e-04 | 0.000000e+00 |
19 | 1 | 4958 | 4958 | 4956 | 0 | 2 | 0.000000e+00 | 4.033885e-04 |
20 | 1 | 2649 | 2649 | 2648 | 0 | 1 | 0.000000e+00 | 3.775009e-04 |
12 | 1 | 458 | 459 | 457 | -1 | 1 | -2.183406e-03 | 2.183406e-03 |
22 | 1 | 7902 | 7903 | 7901 | -1 | 1 | -1.265502e-04 | 1.265502e-04 |
40 | 1 | 10275 | 10275 | 10274 | 0 | 1 | 0.000000e+00 | 9.732360e-05 |
38 | 1 | 23719 | 23719 | 23718 | 0 | 1 | 0.000000e+00 | 4.216029e-05 |
37 | 1 | 3592 | 3592 | 3590 | 0 | 2 | 0.000000e+00 | 5.567929e-04 |
35 | 1 | 116 | 117 | 116 | -1 | 0 | -8.620690e-03 | 0.000000e+00 |
31 | 1 | 957 | 957 | 956 | 0 | 1 | 0.000000e+00 | 1.044932e-03 |
30 | 1 | 3745 | 3745 | 3744 | 0 | 1 | 0.000000e+00 | 2.670227e-04 |
34 | 1 | 2416 | 2417 | 2415 | -1 | 1 | -4.139073e-04 | 4.139073e-04 |
28 | 1 | 1840 | 1841 | 1840 | -1 | 0 | -5.434783e-04 | 0.000000e+00 |
27 | 1 | 42983 | 42983 | 42982 | 0 | 1 | 0.000000e+00 | 2.326501e-05 |
25 | 1 | 10489 | 10489 | 10488 | 0 | 1 | 0.000000e+00 | 9.533797e-05 |
24 | 1 | 22257 | 22257 | 22256 | 0 | 1 | 0.000000e+00 | 4.492969e-05 |
23 | 1 | 267 | 268 | 266 | -1 | 1 | -3.745318e-03 | 3.745318e-03 |
26 | 2 | 13079 | 13079 | 13077 | 0 | 2 | 0.000000e+00 | 1.529169e-04 |
14 | 2 | 11054 | 11055 | 11053 | -1 | 1 | -9.046499e-05 | 9.046499e-05 |
13 | 2 | 13817 | 13818 | 13816 | -1 | 1 | -7.237461e-05 | 7.237461e-05 |
11 | 2 | 9577 | 9578 | 9574 | -1 | 3 | -1.044168e-04 | 3.132505e-04 |
32 | 2 | 17783 | 17783 | 17782 | 0 | 1 | 0.000000e+00 | 5.623348e-05 |
9 | 2 | 8966 | 8967 | 8965 | -1 | 1 | -1.115325e-04 | 1.115325e-04 |
18 | 2 | 4886 | 4887 | 4886 | -1 | 0 | -2.046664e-04 | 0.000000e+00 |
29 | 2 | 38867 | 38868 | 38866 | -1 | 1 | -2.572877e-05 | 2.572877e-05 |
33 | 2 | 17672 | 17673 | 17671 | -1 | 1 | -5.658669e-05 | 5.658669e-05 |
36 | 2 | 22103 | 22103 | 22102 | 0 | 1 | 0.000000e+00 | 4.524273e-05 |
39 | 2 | 32125 | 32126 | 32123 | -1 | 2 | -3.112840e-05 | 6.225681e-05 |
21 | 2 | 23824 | 23824 | 23822 | 0 | 2 | 0.000000e+00 | 8.394896e-05 |
8 | 2 | 23512 | 23513 | 23511 | -1 | 1 | -4.253147e-05 | 4.253147e-05 |
4 | 3 | 6211 | 6212 | 6209 | -1 | 2 | -1.610047e-04 | 3.220093e-04 |
-45971693 -45971669
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 2 | 13626 | 73022 | 73018 | -59396 | -59392 | -4.35902 | -4.358726 |
13626 73022
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 2 | 1141 | 46653 | 46648 | -45512 | -45507 | -39.887818 | -39.883436 |
2 | 2 | 801 | 803 | 792 | -2 | 9 | -0.002497 | 0.011236 |
3 | 2 | 5467 | 5467 | 5459 | 0 | 8 | 0.000000 | 0.001463 |
4 | 2 | 446 | 446 | 438 | 0 | 8 | 0.000000 | 0.017937 |
7855 53369
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 2 | 39 | 2903 | 2895 | -2864 | -2856 | -73.435897 | -73.230769 |
39 2903
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
2 | 1 | 279 | 279 | 278 | 0 | 1 | 0.000000 | 0.003584 |
5 | 1 | 37 | 961 | 960 | -924 | -923 | -24.972973 | -24.945946 |
7 | 1 | 403 | 403 | 402 | 0 | 1 | 0.000000 | 0.002481 |
1 | 2 | 1051 | 1052 | 1049 | -1 | 2 | -0.000951 | 0.001903 |
3 | 2 | 41 | 74534 | 74532 | -74493 | -74491 | -1816.902439 | -1816.853659 |
4 | 2 | 35 | 39461 | 39820 | -39426 | -39785 | -1126.457143 | -1136.714286 |
6 | 2 | 519 | 40551 | 40549 | -40032 | -40030 | -77.132948 | -77.129094 |
2365 157241
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 1 | 85 | 28567 | 28558 | -28482 | -28473 | -335.082353 | -334.976471 |
2 | 1 | 83 | 84 | 81 | -1 | 2 | -0.012048 | 0.024096 |
168 28651
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 1 | 84 | 9371 | 9352 | -9287 | -9268 | -110.559524 | -110.333333 |
2 | 1 | 67 | 2768 | 2761 | -2701 | -2694 | -40.313433 | -40.208955 |
4 | 1 | 42 | 3669 | 3661 | -3627 | -3619 | -86.357143 | -86.166667 |
5 | 1 | 59 | 2484 | 2460 | -2425 | -2401 | -41.101695 | -40.694915 |
7 | 1 | 45 | 15428 | 15408 | -15383 | -15363 | -341.844444 | -341.400000 |
8 | 1 | 28 | 36851 | 36843 | -36823 | -36815 | -1315.107143 | -1314.821429 |
10 | 1 | 80 | 81 | 76 | -1 | 4 | -0.012500 | 0.050000 |
11 | 1 | 180 | 180 | 179 | 0 | 1 | 0.000000 | 0.005556 |
12 | 1 | 24774 | 24778 | 24720 | -4 | 54 | -0.000161 | 0.002180 |
9 | 2 | 41 | 52860 | 52843 | -52819 | -52802 | -1288.268293 | -1287.853659 |
13 | 2 | 13 | 166280 | 166276 | -166267 | -166263 | -12789.769231 | -12789.461538 |
15 | 2 | 51 | 42592 | 42558 | -42541 | -42507 | -834.137255 | -833.470588 |
3 | 3 | 202 | 83833 | 83800 | -83631 | -83598 | -414.014851 | -413.851485 |
14 | 3 | 212 | 124372 | 124315 | -124160 | -124103 | -585.660377 | -585.391509 |
16 | 4 | 97 | 72811 | 72768 | -72714 | -72671 | -749.628866 | -749.185567 |
6 | 5 | 383 | 322300 | 257309 | -321917 | -256926 | -840.514360 | -670.825065 |
26358 960658
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 1 | 72 | 72 | 68 | 0 | 4 | 0.000000 | 0.055556 |
3 | 1 | 289 | 290 | 285 | -1 | 4 | -0.003460 | 0.013841 |
4 | 1 | 51 | 52 | 51 | -1 | 0 | -0.019608 | 0.000000 |
5 | 1 | 2180 | 2180 | 2180 | 0 | 0 | 0.000000 | 0.000000 |
6 | 1 | 824 | 825 | 813 | -1 | 11 | -0.001214 | 0.013350 |
7 | 1 | 422 | 422 | 417 | 0 | 5 | 0.000000 | 0.011848 |
8 | 1 | 299 | 299 | 285 | 0 | 14 | 0.000000 | 0.046823 |
9 | 1 | 82 | 83 | 80 | -1 | 2 | -0.012195 | 0.024390 |
10 | 1 | 561 | 562 | 556 | -1 | 5 | -0.001783 | 0.008913 |
11 | 1 | 8525 | 8526 | 8524 | -1 | 1 | -0.000117 | 0.000117 |
12 | 1 | 302 | 303 | 294 | -1 | 8 | -0.003311 | 0.026490 |
2 | 1 | 828 | 828 | 822 | 0 | 6 | 0.000000 | 0.007246 |
13 | 1 | 520 | 520 | 516 | 0 | 4 | 0.000000 | 0.007692 |
16 | 1 | 491 | 491 | 481 | 0 | 10 | 0.000000 | 0.020367 |
17 | 1 | 69 | 70 | 67 | -1 | 2 | -0.014493 | 0.028986 |
18 | 1 | 199 | 199 | 198 | 0 | 1 | 0.000000 | 0.005025 |
19 | 1 | 94 | 94 | 91 | 0 | 3 | 0.000000 | 0.031915 |
20 | 1 | 241 | 242 | 241 | -1 | 0 | -0.004149 | 0.000000 |
21 | 1 | 899 | 899 | 899 | 0 | 0 | 0.000000 | 0.000000 |
22 | 1 | -85359 | -85358 | -85370 | -1 | 11 | 0.000012 | -0.000129 |
23 | 1 | 650 | 650 | 635 | 0 | 15 | 0.000000 | 0.023077 |
24 | 1 | 133 | 134 | 130 | -1 | 3 | -0.007519 | 0.022556 |
51 | 1 | 553 | 554 | 549 | -1 | 4 | -0.001808 | 0.007233 |
14 | 1 | 99 | 100 | 99 | -1 | 0 | -0.010101 | 0.000000 |
25 | 1 | 23 | 23 | 20 | 0 | 3 | 0.000000 | 0.130435 |
26 | 1 | 457 | 457 | 456 | 0 | 1 | 0.000000 | 0.002188 |
27 | 1 | 292 | 293 | 288 | -1 | 4 | -0.003425 | 0.013699 |
29 | 1 | 143 | 144 | 143 | -1 | 0 | -0.006993 | 0.000000 |
30 | 1 | 1106 | 1106 | 1093 | 0 | 13 | 0.000000 | 0.011754 |
31 | 1 | 801 | 802 | 799 | -1 | 2 | -0.001248 | 0.002497 |
32 | 1 | 192 | 193 | 192 | -1 | 0 | -0.005208 | 0.000000 |
33 | 1 | 6603 | 6604 | 6603 | -1 | 0 | -0.000151 | 0.000000 |
34 | 1 | 972 | 972 | 949 | 0 | 23 | 0.000000 | 0.023663 |
35 | 1 | 122 | 123 | 122 | -1 | 0 | -0.008197 | 0.000000 |
36 | 1 | 213 | 213 | 213 | 0 | 0 | 0.000000 | 0.000000 |
37 | 1 | 11 | 12 | 10 | -1 | 1 | -0.090909 | 0.090909 |
38 | 1 | 36 | 37 | 36 | -1 | 0 | -0.027778 | 0.000000 |
28 | 1 | 29 | 30 | 29 | -1 | 0 | -0.034483 | 0.000000 |
39 | 1 | 81 | 82 | 81 | -1 | 0 | -0.012346 | 0.000000 |
41 | 1 | 1884 | 1885 | 1879 | -1 | 5 | -0.000531 | 0.002654 |
42 | 1 | 551 | 552 | 551 | -1 | 0 | -0.001815 | 0.000000 |
43 | 1 | 426 | 427 | 410 | -1 | 16 | -0.002347 | 0.037559 |
44 | 1 | 68 | 68 | 62 | 0 | 6 | 0.000000 | 0.088235 |
45 | 1 | 556 | 556 | 552 | 0 | 4 | 0.000000 | 0.007194 |
46 | 1 | 398 | 398 | 395 | 0 | 3 | 0.000000 | 0.007538 |
47 | 1 | 483 | 484 | 470 | -1 | 13 | -0.002070 | 0.026915 |
48 | 1 | 148 | 149 | 147 | -1 | 1 | -0.006757 | 0.006757 |
49 | 1 | 279 | 280 | 279 | -1 | 0 | -0.003584 | 0.000000 |
50 | 1 | 8399 | 8399 | 8397 | 0 | 2 | 0.000000 | 0.000238 |
40 | 1 | 619 | 620 | 611 | -1 | 8 | -0.001616 | 0.012924 |
52 | 1 | 872 | 873 | 869 | -1 | 3 | -0.001147 | 0.003440 |
15 | 2 | 10888 | 10889 | 10869 | -1 | 19 | -0.000092 | 0.001745 |
-30324 -30292
Ok, so we see that one thing that's happening is that we have a good number of sessions with negative values (of quite extreme absolute value). So let's ignore those for now (though we may have to consider them later).
# mffsl[(mffsl["totalTime_v2"]>0) & (mffsl["totalTimeMax"]>0)]
def getTotalTimeSums_dropNegs(df):
df = df[(df["totalTime_v2"]>0) & (df["totalTimeMax"]>0)]
if len(df)>0:
return {"totalTime_v2":sum(df["totalTime_v2"]),
"totalTimeMax":sum(df["totalTimeMax"]) }
else:
return {"totalTime_v2":0,"totalTimeMax":0}
totalTimesAggregated = mergedFrames_all.values()\
.map(getTotalTimeSums_dropNegs) \
.reduce(lambda x,y:totalTimeAdder(x,y))
print totalTimesAggregated
print 1.0*(totalTimesAggregated["totalTime_v2"] - totalTimesAggregated["totalTimeMax"])/totalTimesAggregated["totalTime_v2"]
print 1.0*(totalTimesAggregated["totalTimeMax"])/totalTimesAggregated["totalTime_v2"]
{'totalTime_v2': 1737606841, 'totalTimeMax': 1841719129} -0.0599170569219 1.05991705692
So the aggregate discrepancy still exists, but it is much more muted. This kind of indicates that there might be a number of v2 sessions where the negative values are much worse in v2 than v4. So let's investigate the negative values a bit.
def getNegRowFrame(df):
df = df[(df["totalTime_v2"]<0) | (df["totalTimeMax"]<0)]
return df
negFrames = mergedFrames_all\
.mapValues(getNegRowFrame) \
.filter(lambda id_df: len(id_df[1])>0)
nf = negFrames.take(20)
def showFramesWithDeltas(frameList,colsToShow):
for i in range(len(frameList)):
df = frameList[i][1]
df["deltaV2V4"] = df["totalTime_v2"] - df["totalTimeMax"]
df["deltaV2V4Sum"] = df["totalTime_v2"] - df["summedTime"]
df["deltaV2V4Pct"] = 1.0*(df["totalTime_v2"] - df["totalTimeMax"])/df["totalTime_v2"]
df["deltaV2V4SumPct"] = 1.0*(df["totalTime_v2"] - df["summedTime"])/df["totalTime_v2"]
df.sort("numSubsessions",inplace=True)
if len(df)>0:
display(df[colsToShow])
nf
[(u'81c46da0-ae07-45da-aaa5-855eeead5f2b', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 19 (5657, 7800, 8976) 2015-06-29 2015-06-30 -22067 -22066 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 19 -22066 -22073 2047 2047 1 profSsCounter buildId numSubsessions 19 26 20150625030202 1 ), (u'0dc992ec-2524-4f76-bdf7-2d86d7323455', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 8 (115, 3975, 4351) 2015-06-27 2015-06-27 -2330663 -2330661 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 8 -2330661 -2330662 381 381 1 profSsCounter buildId numSubsessions 8 17 20150626030206 1 ), (u'77663edb-5149-49e4-88d0-3e639668294c', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 10 (171, 3541, 3733) 2015-06-22 2015-06-22 -46387243 -46387242 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 10 -46387242 -46387244 1072 1072 1 profSsCounter buildId numSubsessions 10 122 20150621030204 1 ), (u'c391fb04-eac1-471d-88c1-9e1c08ff0494', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 13 (40, 1772, 2049) 2015-06-30 2015-06-30 -5042 -5042 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 13 -5042 -5042 93 93 1 profSsCounter buildId numSubsessions 13 58 20150629134017 1 ), (u'21d4e26a-fc7e-4b82-8129-7d2969c7b6f7', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 22 (11299, 21384, 22051) 2015-06-22 2015-06-22 -85359 -85358 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 22 -85358 -85370 199 199 1 profSsCounter buildId numSubsessions 22 54 20150619030204 1 ), (u'30e3ed42-e92b-4658-8508-357dafcdfadd', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 69 (3884, 41471, 42939) 2015-06-22 2015-06-22 -3060 -3057 97 (3439, 30598, 31024) 2015-06-28 2015-06-28 -1567 -1564 98 (1812, 11143, 17417) 2015-06-28 2015-06-28 -3029 -3028 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 69 -3057 -3068 53 53 1 97 -1564 -1592 240 240 1 98 -3028 -3033 36 36 1 profSsCounter buildId numSubsessions 69 169 20150621030204 1 97 218 20150627030211 1 98 219 20150627030211 1 ), (u'5215db1f-b62f-47fa-a89b-926df4342a15', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 15 (8844, 20424, 21849) 2015-06-23 2015-06-22 -4047 -4046 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 15 -4046 -4059 856 856 1 profSsCounter buildId numSubsessions 15 23 20150619030204 1 ), (u'b28ca254-97c1-47d3-863b-6f273e46d139', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 18 (469, 32011, 32537) 2015-06-23 2015-06-22 -86323 -86323 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 18 -86323 -86341 49 49 1 profSsCounter buildId numSubsessions 18 25 20150621030204 1 ), (u'7e4f6179-d3a6-4a5e-836f-4c1a9a65a24a', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 21 (1217, 21410, 22956) 2015-06-23 2015-06-22 -6841 -6840 33 (2184, 20423, 21600) 2015-06-30 2015-06-29 -4761 -4760 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 21 -6840 -6844 54 54 1 33 -4760 -4765 49 49 1 profSsCounter buildId numSubsessions 21 22 20150620030209 1 33 35 20150626030206 1 ), (u'9ef5ad28-6e06-4739-9ae3-8b15fe80dbee', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 43 (847, 1988, 2188) 2015-06-26 2015-06-26 -879 -878 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 43 -878 -880 151 151 1 profSsCounter buildId numSubsessions 43 125 20150624080416 1 ), (u'c0cb606e-afd2-49b4-a98e-1d6a2cdfa194', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 3 (977, 2198, 2600) 2015-06-19 2015-06-18 -5618 -5617 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 3 -5617 -5619 36 36 1 profSsCounter buildId numSubsessions 3 3 20150617030205 1 ), (u'2bf5ea86-2df7-4615-9e15-e466a1cffa8a', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 30 (1470, 2975, 3313) 2015-07-05 2015-07-05 -29246 -29245 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 30 -29245 -29248 1648 1648 1 profSsCounter buildId numSubsessions 30 43 20150629134017 1 ), (u'b17d5be1-27c1-4792-b65f-5598906f8e34', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 55 (12399, 22034, 22352) 2015-07-01 2015-07-01 -32587 -32586 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 55 -32586 -32602 565 565 1 profSsCounter buildId numSubsessions 55 117 20150701030207 1 ), (u'e08d3352-88a6-459b-ac0b-4be505e63dea', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 5 (4946, 6813, 6998) 2015-06-15 2015-06-16 -86369 -86368 14 (1093, 3126, 3510) 2015-06-16 2015-06-16 -80053 -80052 19 (811, 2405, 2646) 2015-06-18 2015-06-19 -165857 -165856 25 (1545, 3647, 4098) 2015-06-18 2015-06-19 -2570958 270915 29 (3586, 6826, 7135) 2015-06-26 2015-06-26 -78947 264391 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 5 -86368 -86374 6 6 1 14 -80052 -80054 382 382 1 19 -165856 -165858 1001 1001 1 25 270915 270912 3719 2210 1 29 264391 -78951 2 2 1 profSsCounter buildId numSubsessions 5 22 20150614030204 1 14 29 20150615030204 1 19 35 20150616030201 1 25 47 20150618030206 1 29 54 20150624080416 2 ), (u'72403d1f-a549-4629-b4a0-ddf8303b95ac', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 4 (3495, 9773, 10434) 2015-06-14 2015-06-14 -6858 67 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 4 67 -6863 4 4 1 profSsCounter buildId numSubsessions 4 47 20150611030208 2 ), (u'6c4b459e-0d08-4dc4-9a01-05fec445f350', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 1 (265, 1300, 22731) 2015-06-21 2015-06-21 -6143 -6142 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 1 -6142 -6151 128 128 1 profSsCounter buildId numSubsessions 1 1 20150621030204 1 ), (u'e5f648d1-d5ee-497c-9b27-ad65d276c67e', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 64 (591, 1650, 1983) 2015-06-30 2015-06-30 -40428 -40427 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 64 -40427 -40429 479 479 1 profSsCounter buildId numSubsessions 64 138 20150620030209 1 ), (u'1bf5827b-9668-4c1e-ab36-0ff088c12fb8', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 4 (25, 562, 689) 2015-06-29 2015-06-29 -420 -419 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 4 -419 -420 1869 1869 1 profSsCounter buildId numSubsessions 4 90 20150628030215 1 ), (u'4d807e7a-5ea6-4433-855d-7649dd0def80', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 4 (8003, 10035, 10253) 2015-06-20 2015-06-19 -57854 14427 19 (843, 3968, 4153) 2015-06-30 2015-06-30 -84394 -84393 26 (967, 5688, 5904) 2015-07-03 2015-07-02 -57882 -57881 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 4 14427 -57859 2001 2001 1 19 -84393 -84396 390 390 1 26 -57881 -57883 3039 3039 1 profSsCounter buildId numSubsessions 4 9 20150617030205 2 19 34 20150628030215 1 26 45 20150630030204 1 ), (u'436ab36a-6029-4236-a390-02432f794e79', sig date_v2 date_v4 totalTime_v2 totalTime_v4 \ 6 (20080, 32607, 39057) 2015-06-15 2015-06-15 -47647 -47646 totalTimeMax summedTime activeTicks_v2 activeTicks_v4 ssCounter \ 6 -47646 -47671 5940 5940 1 profSsCounter buildId numSubsessions 6 64 20150612030205 1 )]
colsToShow = [#"sig",
"numSubsessions",
"totalTime_v2","totalTimeMax","summedTime",
"deltaV2V4","deltaV2V4Sum",
"deltaV2V4Pct","deltaV2V4SumPct"
# "activeTicks_v2","activeTicks_v4",
# "totalTimeDiff","activeTicksDiff",
]
showFramesWithDeltas(nf,colsToShow)
# for clientId,df in nf:
# display(df)
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
19 | 1 | -22067 | -22066 | -22073 | -1 | 6 | 0.000045 | -0.000272 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
8 | 1 | -2330663 | -2330661 | -2330662 | -2 | -1 | 0.000001 | 0 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
10 | 1 | -46387243 | -46387242 | -46387244 | -1 | 1 | 2.155765e-08 | -2.155765e-08 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
13 | 1 | -5042 | -5042 | -5042 | 0 | 0 | -0 | -0 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
22 | 1 | -85359 | -85358 | -85370 | -1 | 11 | 0.000012 | -0.000129 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
69 | 1 | -3060 | -3057 | -3068 | -3 | 8 | 0.000980 | -0.002614 |
97 | 1 | -1567 | -1564 | -1592 | -3 | 25 | 0.001914 | -0.015954 |
98 | 1 | -3029 | -3028 | -3033 | -1 | 4 | 0.000330 | -0.001321 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
15 | 1 | -4047 | -4046 | -4059 | -1 | 12 | 0.000247 | -0.002965 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
18 | 1 | -86323 | -86323 | -86341 | 0 | 18 | -0 | -0.000209 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
21 | 1 | -6841 | -6840 | -6844 | -1 | 3 | 0.000146 | -0.000439 |
33 | 1 | -4761 | -4760 | -4765 | -1 | 4 | 0.000210 | -0.000840 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
43 | 1 | -879 | -878 | -880 | -1 | 1 | 0.001138 | -0.001138 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
3 | 1 | -5618 | -5617 | -5619 | -1 | 1 | 0.000178 | -0.000178 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
30 | 1 | -29246 | -29245 | -29248 | -1 | 2 | 0.000034 | -0.000068 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
55 | 1 | -32587 | -32586 | -32602 | -1 | 15 | 0.000031 | -0.00046 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
5 | 1 | -86369 | -86368 | -86374 | -1 | 5 | 0.000012 | -0.000058 |
14 | 1 | -80053 | -80052 | -80054 | -1 | 1 | 0.000012 | -0.000012 |
19 | 1 | -165857 | -165856 | -165858 | -1 | 1 | 0.000006 | -0.000006 |
25 | 1 | -2570958 | 270915 | 270912 | -2841873 | -2841870 | 1.105375 | 1.105374 |
29 | 2 | -78947 | 264391 | -78951 | -343338 | 4 | 4.348968 | -0.000051 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
4 | 2 | -6858 | 67 | -6863 | -6925 | 5 | 1.00977 | -0.000729 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
1 | 1 | -6143 | -6142 | -6151 | -1 | 8 | 0.000163 | -0.001302 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
64 | 1 | -40428 | -40427 | -40429 | -1 | 1 | 0.000025 | -0.000025 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
4 | 1 | -420 | -419 | -420 | -1 | 0 | 0.002381 | -0 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
19 | 1 | -84394 | -84393 | -84396 | -1 | 2 | 0.000012 | -0.000024 |
26 | 1 | -57882 | -57881 | -57883 | -1 | 1 | 0.000017 | -0.000017 |
4 | 2 | -57854 | 14427 | -57859 | -72281 | 5 | 1.249369 | -0.000086 |
numSubsessions | totalTime_v2 | totalTimeMax | summedTime | deltaV2V4 | deltaV2V4Sum | deltaV2V4Pct | deltaV2V4SumPct | |
---|---|---|---|---|---|---|---|---|
6 | 1 | -47647 | -47646 | -47671 | -1 | 24 | 0.000021 | -0.000504 |
So this is kind of disturbing-- in some cases above, the value of totalTime_v2 is quite far from totalTimeMax but summedTime is close to totalTime_v2 (remember that totalTimeMax is the maximum value recorded for "totalTime" over all the subsessions in a session, whereas summedTime is the sum of all the values of "info/subsessionLength" for all subsessions in a session.
This kind of suggests that in the case of totalTimeMax, at the end of one of the subsessions the time counter/timer was positive and that something happened in a subsequent subsession that set the timer negative. But it kind of looks like there may only be negative values in the last subsession of a session. Maybe something is happening on shutdown?
We need to go back to the subsession data for this (rather than the frames of data aggregated into full sessions).
For subsessions with subsessionLength < 0, We'll start by just looking how far this subsession is from the end of the series of subsessions in this session. Concretely, we'll look at the distribution of
(number of subsessions in session) - (subsessionCounter)
def getSubsessSigsForSessionsWithANegSubsessLength(sigList):
negSigs = set([ ss["sig"] for ss in sigList if ss["subsessionLength"]<0 ])
out = {sig:[] for sig in negSigs}
for ss in sigList:
if ss["sig"] in negSigs:
out[ss["sig"]].append(ss)
return out
sessionSigs_all.count()
8937
negSubsess = sessionSigs_all.mapValues(lambda x:x["v4sigs"])\
.mapValues(getSubsessSigsForSessionsWithANegSubsessLength) \
.filter(lambda id_dict: id_dict[1]!={} ) \
negSubsess.count()
80
So there are only 80 clients with a negative value for any subsessionLength. That's good. Out of 8973 clients that's less than 1%.
negSubsess.take(10)
[(u'81c46da0-ae07-45da-aaa5-855eeead5f2b', {(5657, 7800, 8976): [{'activeTicks': 2047, 'buildId': u'20150625030202', 'date': u'2015-06-30', 'profSsCounter': 26, 'sig': (5657, 7800, 8976), 'ssCounter': 1, 'subsessionLength': -22073, 'totalTime': -22066}]}), (u'0dc992ec-2524-4f76-bdf7-2d86d7323455', {(115, 3975, 4351): [{'activeTicks': 381, 'buildId': u'20150626030206', 'date': u'2015-06-27', 'profSsCounter': 17, 'sig': (115, 3975, 4351), 'ssCounter': 1, 'subsessionLength': -2330662, 'totalTime': -2330661}], (1690, 10727, 11139): [{'activeTicks': 3986, 'buildId': u'20150701030207', 'date': u'2015-07-02', 'profSsCounter': 30, 'sig': (1690, 10727, 11139), 'ssCounter': 1, 'subsessionLength': -146828, 'totalTime': -146824}]}), (u'77663edb-5149-49e4-88d0-3e639668294c', {(171, 3541, 3733): [{'activeTicks': 1072, 'buildId': u'20150621030204', 'date': u'2015-06-22', 'profSsCounter': 122, 'sig': (171, 3541, 3733), 'ssCounter': 1, 'subsessionLength': -46387244, 'totalTime': -46387242}]}), (u'86d35445-12aa-4709-9475-2531d4fa5705', {(818, 2619, 3013): [{'activeTicks': 161, 'buildId': u'20150615030204', 'date': u'2015-06-16', 'profSsCounter': 95, 'sig': (818, 2619, 3013), 'ssCounter': 1, 'subsessionLength': -2628, 'totalTime': -2626}]}), (u'04ece761-ed72-457a-8699-ed6f2741e275', {(2403, 12438, 13018): [{'activeTicks': 62, 'buildId': u'20150613030206', 'date': u'2015-06-14', 'profSsCounter': 17, 'sig': (2403, 12438, 13018), 'ssCounter': 1, 'subsessionLength': -3063, 'totalTime': -3057}]}), (u'c391fb04-eac1-471d-88c1-9e1c08ff0494', {(40, 1772, 2049): [{'activeTicks': 93, 'buildId': u'20150629134017', 'date': u'2015-06-30', 'profSsCounter': 58, 'sig': (40, 1772, 2049), 'ssCounter': 1, 'subsessionLength': -5042, 'totalTime': -5042}]}), (u'21d4e26a-fc7e-4b82-8129-7d2969c7b6f7', {(11299, 21384, 22051): [{'activeTicks': 199, 'buildId': u'20150619030204', 'date': u'2015-06-22', 'profSsCounter': 54, 'sig': (11299, 21384, 22051), 'ssCounter': 1, 'subsessionLength': -85370, 'totalTime': -85358}]}), (u'008ed914-7c3c-4059-b2df-83116cda8e80', {(1244, 2813, 3232): [{'activeTicks': 379, 'buildId': u'20150611030208', 'date': u'2015-06-13', 'profSsCounter': 28, 'sig': (1244, 2813, 3232), 'ssCounter': 1, 'subsessionLength': 2046, 'totalTime': 2049}, {'activeTicks': 710, 'buildId': u'20150611030208', 'date': u'2015-06-13', 'profSsCounter': 29, 'sig': (1244, 2813, 3232), 'ssCounter': 2, 'subsessionLength': -37532, 'totalTime': -35483}]}), (u'72e09b36-0d91-4bc5-b3b4-e697a7e523f2', {(172, 1839, 2282): [{'activeTicks': 292, 'buildId': u'20150612030205', 'date': u'2015-06-13', 'profSsCounter': 8, 'sig': (172, 1839, 2282), 'ssCounter': 1, 'subsessionLength': -11261, 'totalTime': -11260}], (1687, 6669, 7477): [{'activeTicks': 128, 'buildId': u'20150613030206', 'date': u'2015-06-16', 'profSsCounter': 14, 'sig': (1687, 6669, 7477), 'ssCounter': 1, 'subsessionLength': -516031, 'totalTime': -516027}]}), (u'30e3ed42-e92b-4658-8508-357dafcdfadd', {(1812, 11143, 17417): [{'activeTicks': 36, 'buildId': u'20150627030211', 'date': u'2015-06-28', 'profSsCounter': 219, 'sig': (1812, 11143, 17417), 'ssCounter': 1, 'subsessionLength': -3033, 'totalTime': -3028}], (3439, 30598, 31024): [{'activeTicks': 240, 'buildId': u'20150627030211', 'date': u'2015-06-28', 'profSsCounter': 218, 'sig': (3439, 30598, 31024), 'ssCounter': 1, 'subsessionLength': -1592, 'totalTime': -1564}], (3884, 41471, 42939): [{'activeTicks': 53, 'buildId': u'20150621030204', 'date': u'2015-06-22', 'profSsCounter': 169, 'sig': (3884, 41471, 42939), 'ssCounter': 1, 'subsessionLength': -3068, 'totalTime': -3057}]})]
def subsessPosOfNegs(negSubsessDict):
out = []
for sig,ssList in negSubsessDict.items():
out+= [(len(ssList),ss["ssCounter"]) for ss in ssList if ss["subsessionLength"]<0]
return out
negSubsess.values().flatMap(subsessPosOfNegs).count()
137
But there are 137 subsessions with negative subsessionLength measurements. That means that the clients that are affected have almost two negative measurements on average.
negSubsessSeqPos = negSubsess.values().flatMap(subsessPosOfNegs).collect()
collections.Counter( negSubsessSeqPos)
Counter({(1, 1): 108, (2, 2): 18, (3, 3): 7, (5, 5): 1, (3, 1): 1, (7, 7): 1, (2, 1): 1})
Ok, so although the sample is small, it does appear to be th case that negative measurements are more likely to occur at the end of a session. In this data there are 19 sessions that have a negative measurement and two subsessions-- from the counter above, {(2, 2): 18, (2, 1): 1} -- so only one of these 19 negative subsessionLength measurements occured in the first of the two sessions. It would be virtually impossible to see this outcome if the negative measurements we're disributed at random accross subsessions.
So in answer to Are all negative values "subsessionLength" in the final subsession? -- no, but most are
These should track each other quite closely. If they don't, then there may be problems with correctly assigning time to days, environments, etc.