#use regular expression to parse log entry
import re
#http://docs.python.org/2/library/re.html
#awkward historical log format:
log_entry='proxy4.utsa.edu.au 151.217.6.9 - -|- [11/Apr/2013:23:57:14 -0400] [Mozilla/5.0 (Windows NT 6.1; rv:19.0) Gecko/20100101 Firefox/19.0|-=151.217.60.103|0|http://arxiv.org/|proxy4.utas.edu.au.1364191674910933] "GET /find/all/1/all:+2013arXiv13011419D/0/1/0/all/0/1 HTTP/1.0" 200 10737'
#re.match pulls out the objects in ()
mm = re.match(r"(\S+) (\S+) (\S+) (\S+?)\|(\S+) \[(.*?)\] \[(.*)\|(.*?)=(.*?)\|(\d+)\|(.*)\|(.*?)\] \"(.*)\" (\d+) (\S+)",log_entry)
keys=['host','ip','logname','tapiruid','tapirsid','datetime','ua','xfrom','xfor','delay','referer','cookie','request','status','bytes']
#mm.groups() is the list of matching objects
entry=dict(zip(keys,mm.groups()))
for k in keys: print k+':',entry[k]
host: proxy4.utsa.edu.au ip: 151.217.6.9 logname: - tapiruid: - tapirsid: - datetime: 11/Apr/2013:23:57:14 -0400 ua: Mozilla/5.0 (Windows NT 6.1; rv:19.0) Gecko/20100101 Firefox/19.0 xfrom: - xfor: 151.217.60.103 delay: 0 referer: http://arxiv.org/ cookie: proxy4.utas.edu.au.1364191674910933 request: GET /find/all/1/all:+2013arXiv13011419D/0/1/0/all/0/1 HTTP/1.0 status: 200 bytes: 10737
#next need to parse the datetime
import time
#http://docs.python.org/2/library/time.html
def timestr_utc(time_string):
#wants timezone names instead of time offset:
ts=time_string.replace('-0400','EDT').replace('-0500','EST')
try:
return int(time.mktime(time.strptime(ts,'%d/%b/%Y:%H:%M:%S %Z')))
except ValueError:
print "bad time",time_string
return(None)
#current time
print 'current time is',time.time()
time_string=time.strftime('%d/%b/%Y:%H:%M:%S %Z')
print 'current time string is',time_string
utc_time = timestr_utc(time_string)
print 'and converts back to',utc_time,'seconds'
print "in years that's roughly",utc_time/(60*60*24*365.25),'years'
current time is 1366481747.77 current time string is 20/Apr/2013:14:15:47 EDT and converts back to 1366481747 seconds in years that's roughly 43.3011935952 years
#recall that on 31 Dec 1969 at 7pm eastern time the ball at Times Square descended
#with great fanfare and announced to the world "0 Unix time"
for ts in ["31/Dec/1969:18:59:59 -0500","31/Dec/1969:19:00:00 -0500","31/Dec/1969:19:00:01 -0500"]:
print ts,'converts to',timestr_utc(ts)
31/Dec/1969:18:59:59 -0500 converts to -1 31/Dec/1969:19:00:00 -0500 converts to 0 31/Dec/1969:19:00:01 -0500 converts to 1
#http://imgs.xkcd.com/comics/bug.png
from urllib2 import urlopen
bug = urlopen('http://imgs.xkcd.com/comics/bug.png').read()
from IPython.display import Image
Image(bug)
#now check what happens when the clocks were turned back at 2a.m., and 1:30a.m. occurred twice:
time_string1 = "04/Nov/2012:01:30:13 -0400"
time_string2 = "04/Nov/2012:01:30:13 -0500"
utc1=timestr_utc(time_string1)
utc2=timestr_utc(time_string2)
print time_string1,'converts to',utc1
print time_string2,'converts to',utc2
print utc1,'-',utc2,'=',utc2-utc1,'seconds difference, and convert back to:'
#then see how they're translated back
print time.strftime('%d/%b/%Y:%H:%M:%S %Z',time.localtime(utc1))
print time.strftime('%d/%b/%Y:%H:%M:%S %Z',time.localtime(utc2))
04/Nov/2012:01:30:13 -0400 converts to 1352007013 04/Nov/2012:01:30:13 -0500 converts to 1352010613 1352007013 - 1352010613 = 3600 seconds difference, and convert back to: 04/Nov/2012:01:30:13 EDT 04/Nov/2012:01:30:13 EST
#some examples from http://networkx.github.io/documentation/latest/examples/
import networkx as nx
G=nx.Graph()
G.add_node("spam")
G.add_edge(1,2)
print 'nodes:',G.nodes()
print 'edges:',G.edges()
nodes: [1, 2, 'spam'] edges: [(1, 2)]
#http://networkx.github.io/documentation/latest/examples/drawing/house_with_colors.html
G=nx.house_graph()
# explicitly set positions
pos={0:(0,0),
1:(1,0),
2:(0,1),
3:(1,1),
4:(0.5,2.0)}
nx.draw_networkx_nodes(G,pos,node_size=2000,nodelist=[4])
nx.draw_networkx_nodes(G,pos,node_size=3000,nodelist=[0,1,2,3],node_color='b')
nx.draw_networkx_edges(G,pos,alpha=0.5,width=6)
axis('off')
None
#http://networkx.github.io/documentation/latest/examples/drawing/ego_graph.html
###just draw friends network of highest degree node in preferential attachment network
# Create a BA model graph
n=1000
m=2
from operator import itemgetter
G=nx.generators.barabasi_albert_graph(n,m)
# find node with largest degree
node_and_degree=G.degree()
(largest_hub,degree)=sorted(node_and_degree.items(),key=itemgetter(1))[-1]
# Create ego graph of main hub
hub_ego=nx.ego_graph(G,largest_hub)
# Draw graph
figure(figsize=(4,4))
pos=nx.spring_layout(hub_ego)
nx.draw(hub_ego,pos,node_color='b',node_size=50,with_labels=False)
# Draw ego as large and red
nx.draw_networkx_nodes(hub_ego,pos,nodelist=[largest_hub],node_size=300,node_color='r')
<matplotlib.collections.PathCollection at 0x10824da10>
#http://networkx.github.io/documentation/latest/examples/drawing/random_geometric_graph.html
#random geometric graph
#
G=nx.random_geometric_graph(200,0.125)
# position is stored as node attribute data for random_geometric_graph
pos=nx.get_node_attributes(G,'pos')
# find node near center (0.5,0.5)
dmin=1
ncenter=0
for n in pos:
x,y=pos[n]
d=(x-0.5)**2+(y-0.5)**2
if d<dmin:
ncenter=n
dmin=d
# color by path length from node near center
p=nx.single_source_shortest_path_length(G,ncenter)
figure(figsize=(8,8))
nx.draw_networkx_edges(G,pos,nodelist=[ncenter],alpha=0.4)
nx.draw_networkx_nodes(G,pos,nodelist=p.keys(),
node_size=80,
node_color=p.values(),
cmap=plt.cm.Reds_r)
xlim(-0.05,1.05)
ylim(-0.05,1.05)
axis('off')
None
#http://networkx.github.io/documentation/latest/examples/graph/degree_sequence.html
#Random graph from given degree sequence.
z=[5,3,3,3,3,2,2,2,1,1,1]
print nx.is_valid_degree_sequence(z)
print("Configuration model")
G=nx.configuration_model(z) # configuration model
degree_sequence=list(nx.degree(G).values()) # degree sequence
print("Degree sequence %s" % degree_sequence)
print("Degree histogram")
hist={}
for d in degree_sequence:
if d in hist:
hist[d]+=1
else:
hist[d]=1
print("degree #nodes")
for d in hist:
print('%d %d' % (d,hist[d]))
True Configuration model Degree sequence [5, 3, 3, 3, 3, 2, 2, 2, 1, 1, 1] Degree histogram degree #nodes 1 3 2 3 3 4 5 1
#http://networkx.github.io/documentation/latest/examples/graph/erdos_renyi.html
#Create an G{n,m} random graph with n nodes and m edges and report some properties.
n=10 # 10 nodes
m=20 # 20 edges
G=nx.gnm_random_graph(n,m)
# some properties
print("node degree clustering")
for v in nx.nodes(G):
print('%s %d %f' % (v,nx.degree(G,v),nx.clustering(G,v)))
# print the adjacency list to terminal
nx.write_adjlist(G,sys.stdout)
node degree clustering 0 2 0.000000 1 2 0.000000 2 4 0.500000 3 5 0.500000 4 5 0.500000 5 5 0.200000 6 4 0.666667 7 5 0.300000 8 5 0.200000 9 3 0.000000 # gnm_random_graph(10,20) 0 8 5 1 9 7 2 8 4 6 7 3 8 4 5 6 7 4 8 5 6 5 9 7 6 7 7 8 9 9
#http://networkx.github.io/documentation/latest/examples/graph/karate_club.html
#Zachary's Karate Club graph
#Data file from:
#http://vlado.fmf.uni-lj.si/pub/networks/data/Ucinet/UciData.htm
G=nx.karate_club_graph()
figure(figsize=(8,8))
nx.draw_networkx(G)
axis('off')
print("Node Degree")
for v in G: print('%s %s' % (v,G.degree(v)))
Node Degree 0 16 1 9 2 10 3 6 4 3 5 4 6 4 7 4 8 5 9 2 10 3 11 1 12 2 13 5 14 2 15 2 16 2 17 2 18 2 19 3 20 2 21 2 22 2 23 5 24 3 25 3 26 2 27 4 28 3 29 4 30 4 31 6 32 12 33 17
#http://networkx.github.io/documentation/latest/examples/drawing/giant_component.html
#illustrates sudden appearance of giant connected component in a binomial random graph.
layout=nx.graphviz_layout
n=150 # 150 nodes
# p value at which giant component (of size log(n) nodes) is expected
p_giant=1.0/(n-1)
# p value at which graph is expected to become completely connected
p_conn=math.log(n)/float(n)
# the following range of p values should be close to the threshold
pvals=[0.003, 0.006, 0.008, 0.015]
figure(figsize=(8,8))
region=220 # for pylab 2x2 subplot layout
subplots_adjust(left=0,right=1,bottom=0,top=0.95,wspace=0.01,hspace=0.01)
for p in pvals:
G=nx.binomial_graph(n,p)
pos=layout(G)
region+=1
subplot(region)
title("p = %6.3f"%(p))
nx.draw(G,pos, with_labels=False, node_size=10)
# identify largest connected component
Gcc=nx.connected_component_subgraphs(G)
G0=Gcc[0]
nx.draw_networkx_edges(G0,pos, with_labels=False,
edge_color='r', width=6.0)
# show other connected components
for Gi in Gcc[1:]:
if len(Gi)>1: nx.draw_networkx_edges(Gi,pos,with_labels=False,
edge_color='r',alpha=0.3,width=5.0)
#http://networkx.github.io/documentation/latest/examples/graph/napoleon_russian_campaign.html
#Minard's data from Napoleon's 1812-1813 Russian Campaign.
#http://www.math.yorku.ca/SCS/Gallery/minard/minard.txt
import string
def minard_graph():
data1="""\
24.0,54.9,340000,A,1
24.5,55.0,340000,A,1
25.5,54.5,340000,A,1
26.0,54.7,320000,A,1
27.0,54.8,300000,A,1
28.0,54.9,280000,A,1
28.5,55.0,240000,A,1
29.0,55.1,210000,A,1
30.0,55.2,180000,A,1
30.3,55.3,175000,A,1
32.0,54.8,145000,A,1
33.2,54.9,140000,A,1
34.4,55.5,127100,A,1
35.5,55.4,100000,A,1
36.0,55.5,100000,A,1
37.6,55.8,100000,A,1
37.7,55.7,100000,R,1
37.5,55.7,98000,R,1
37.0,55.0,97000,R,1
36.8,55.0,96000,R,1
35.4,55.3,87000,R,1
34.3,55.2,55000,R,1
33.3,54.8,37000,R,1
32.0,54.6,24000,R,1
30.4,54.4,20000,R,1
29.2,54.3,20000,R,1
28.5,54.2,20000,R,1
28.3,54.3,20000,R,1
27.5,54.5,20000,R,1
26.8,54.3,12000,R,1
26.4,54.4,14000,R,1
25.0,54.4,8000,R,1
24.4,54.4,4000,R,1
24.2,54.4,4000,R,1
24.1,54.4,4000,R,1"""
data2="""\
24.0,55.1,60000,A,2
24.5,55.2,60000,A,2
25.5,54.7,60000,A,2
26.6,55.7,40000,A,2
27.4,55.6,33000,A,2
28.7,55.5,33000,R,2
29.2,54.2,30000,R,2
28.5,54.1,30000,R,2
28.3,54.2,28000,R,2"""
data3="""\
24.0,55.2,22000,A,3
24.5,55.3,22000,A,3
24.6,55.8,6000,A,3
24.6,55.8,6000,R,3
24.2,54.4,6000,R,3
24.1,54.4,6000,R,3"""
cities="""\
24.0,55.0,Kowno
25.3,54.7,Wilna
26.4,54.4,Smorgoni
26.8,54.3,Moiodexno
27.7,55.2,Gloubokoe
27.6,53.9,Minsk
28.5,54.3,Studienska
28.7,55.5,Polotzk
29.2,54.4,Bobr
30.2,55.3,Witebsk
30.4,54.5,Orscha
30.4,53.9,Mohilow
32.0,54.8,Smolensk
33.2,54.9,Dorogobouge
34.3,55.2,Wixma
34.4,55.5,Chjat
36.0,55.5,Mojaisk
37.6,55.8,Moscou
36.6,55.3,Tarantino
36.5,55.0,Malo-Jarosewii"""
c={}
for line in cities.split('\n'):
x,y,name=line.split(',')
c[name]=(float(x),float(y))
g=[]
for data in [data1,data2,data3]:
G=nx.Graph()
i=0
G.pos={} # location
G.pop={} # size
last=None
for line in data.split('\n'):
x,y,p,r,n=line.split(',')
G.pos[i]=(float(x),float(y))
G.pop[i]=int(p)
if last is None:
last=i
else:
G.add_edge(i,last,{r:int(n)})
last=i
i=i+1
g.append(G)
return g,c
(g,city)=minard_graph()
figure(1,figsize=(11,5))
colors=['b','g','r']
for G in g:
c=colors.pop(0)
node_size=[int(G.pop[n]/300.0) for n in G]
nx.draw_networkx_edges(G,G.pos,edge_color=c,width=4,alpha=0.5)
nx.draw_networkx_nodes(G,G.pos,node_size=node_size,node_color=c,alpha=0.5)
nx.draw_networkx_nodes(G,G.pos,node_size=5,node_color='k')
for c in city:
x,y=city[c]
text(x,y+0.1,c)