%matplotlib inline
import matplotlib as mp
import matplotlib.pyplot as plt
import json
# import datetime
import os
# import pandas as pd
# import numpy as np
# from IPython.display import HTML
# from IPython.html.widgets import interact, interactive, fixed
# from IPython.html import widgets
# from IPython.display import clear_output, display
import networkx as nx
# nan = np.nan
wd = "/data/mozilla/fhrV4Testing/"
v4Ids = [c.replace("_","-") for c in os.listdir(wd+"v4Sample2/bug1149666.20150413")]
sampleSize = 50
sessGraphs = {}
for c in v4Ids[:sampleSize]:
fileName = os.listdir(wd+"v4Sample2/bug1149666.20150413/"+c.replace("-","_"))[0]
v4Path = wd + "v4Sample2/bug1149666.20150413/" + c.replace("-","_") + "/" + fileName
sessGraphs[c] = nx.Graph()
with open(v4Path,"r") as inFileV4:
for line in inFileV4:
# each line is a separate subsession submission
subsessId = None
prevSubSess = None
try:
_, submission = line.split("\t")
submission = json.loads(submission)
except:
print "couldn't load json"
continue
try:
subsessId = submission[1]['payload']['info']['subsessionId']
except KeyError:
print "missing subsesionId"
continue
sessGraphs[c].add_node(subsessId)
prevSubSess = submission[1]['payload']['info'].get('previousSubsessionId',None)
if prevSubSess:
sessGraphs[c].add_node(prevSubSess)
sessGraphs[c].add_edge(subsessId,prevSubSess)
for c in sessGraphs.values():
print len(c.nodes()), "distinct subsessionIds"
print len(c.edges()), "distinct pointers from one session to a previousSubsessionId"
nx.draw(c)
plt.show()
14 distinct subsessionIds 9 distinct pointers from one session to a previousSubsessionId
15 distinct subsessionIds 0 distinct pointers from one session to a previousSubsessionId
18 distinct subsessionIds 17 distinct pointers from one session to a previousSubsessionId
20 distinct subsessionIds 19 distinct pointers from one session to a previousSubsessionId
5 distinct subsessionIds 0 distinct pointers from one session to a previousSubsessionId
4 distinct subsessionIds 3 distinct pointers from one session to a previousSubsessionId
116 distinct subsessionIds 114 distinct pointers from one session to a previousSubsessionId
28 distinct subsessionIds 25 distinct pointers from one session to a previousSubsessionId
17 distinct subsessionIds 16 distinct pointers from one session to a previousSubsessionId
13 distinct subsessionIds 6 distinct pointers from one session to a previousSubsessionId
54 distinct subsessionIds 31 distinct pointers from one session to a previousSubsessionId
11 distinct subsessionIds 2 distinct pointers from one session to a previousSubsessionId
103 distinct subsessionIds 100 distinct pointers from one session to a previousSubsessionId
4 distinct subsessionIds 0 distinct pointers from one session to a previousSubsessionId
11 distinct subsessionIds 9 distinct pointers from one session to a previousSubsessionId
21 distinct subsessionIds 20 distinct pointers from one session to a previousSubsessionId
2 distinct subsessionIds 1 distinct pointers from one session to a previousSubsessionId
62 distinct subsessionIds 51 distinct pointers from one session to a previousSubsessionId
1 distinct subsessionIds 0 distinct pointers from one session to a previousSubsessionId
4 distinct subsessionIds 0 distinct pointers from one session to a previousSubsessionId
28 distinct subsessionIds 25 distinct pointers from one session to a previousSubsessionId
4 distinct subsessionIds 1 distinct pointers from one session to a previousSubsessionId
28 distinct subsessionIds 27 distinct pointers from one session to a previousSubsessionId
3 distinct subsessionIds 1 distinct pointers from one session to a previousSubsessionId
45 distinct subsessionIds 43 distinct pointers from one session to a previousSubsessionId
6 distinct subsessionIds 5 distinct pointers from one session to a previousSubsessionId
3 distinct subsessionIds 2 distinct pointers from one session to a previousSubsessionId
15 distinct subsessionIds 14 distinct pointers from one session to a previousSubsessionId
3 distinct subsessionIds 2 distinct pointers from one session to a previousSubsessionId
6 distinct subsessionIds 1 distinct pointers from one session to a previousSubsessionId
35 distinct subsessionIds 20 distinct pointers from one session to a previousSubsessionId
21 distinct subsessionIds 7 distinct pointers from one session to a previousSubsessionId
1 distinct subsessionIds 0 distinct pointers from one session to a previousSubsessionId
12 distinct subsessionIds 0 distinct pointers from one session to a previousSubsessionId
44 distinct subsessionIds 27 distinct pointers from one session to a previousSubsessionId
3 distinct subsessionIds 0 distinct pointers from one session to a previousSubsessionId
24 distinct subsessionIds 23 distinct pointers from one session to a previousSubsessionId
2 distinct subsessionIds 1 distinct pointers from one session to a previousSubsessionId
3 distinct subsessionIds 0 distinct pointers from one session to a previousSubsessionId
9 distinct subsessionIds 5 distinct pointers from one session to a previousSubsessionId
90 distinct subsessionIds 52 distinct pointers from one session to a previousSubsessionId
18 distinct subsessionIds 0 distinct pointers from one session to a previousSubsessionId
185 distinct subsessionIds 171 distinct pointers from one session to a previousSubsessionId
17 distinct subsessionIds 16 distinct pointers from one session to a previousSubsessionId
42 distinct subsessionIds 35 distinct pointers from one session to a previousSubsessionId
2 distinct subsessionIds 0 distinct pointers from one session to a previousSubsessionId
20 distinct subsessionIds 18 distinct pointers from one session to a previousSubsessionId
270 distinct subsessionIds 269 distinct pointers from one session to a previousSubsessionId
3 distinct subsessionIds 2 distinct pointers from one session to a previousSubsessionId
71 distinct subsessionIds 22 distinct pointers from one session to a previousSubsessionId