import time import numpy as np import matplotlib.pyplot as plt import pandas as pd import scipy as sp import scipy.sparse.linalg as linalg import scipy.cluster.hierarchy as hr from scipy.spatial.distance import pdist, squareform import sklearn.datasets as datasets import sklearn.metrics as metrics import sklearn.utils as utils import sklearn.linear_model as linear_model import sklearn.svm as svm import sklearn.cross_validation as cross_validation import sklearn.cluster as cluster from sklearn.ensemble import AdaBoostClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.decomposition import TruncatedSVD from sklearn.preprocessing import StandardScaler import statsmodels.api as sm from patsy import dmatrices import networkx as nx import seaborn as sns %matplotlib inline g = nx.Graph() g.add_node(1) g.add_nodes_from([2,3]) g.add_node('ET') print g.nodes() g.remove_node(1) print g.nodes() g.add_edge(1,2) g.add_edge(3,'ET') g.add_edges_from([(2,3), (1,3)]) print g.edges() print g.nodes() g.remove_edge(1,2) print g.edges() print g.nodes() g.neighbors(1) g.degree(1) g.add_node(1, time='5pm') g.node[1]['time'] g.node[1] # Python dictionary g.add_edge(1, 2, weight=4.0 ) g[1][2]['weight'] = 5.0 # edge already added g[1][2] for node in g: print 'nodeid: ', node, '\t degree:', g.degree(node) print g.edges(data=True) G = nx.DiGraph() G.add_node(1) G.add_nodes_from([2,3]) G.add_nodes_from(range(100,110)) H=nx.Graph() H.add_path([0,1,2,3,4,5,6,7,8,9]) G.add_nodes_from(H) print G.nodes() G.add_edge(1, 2) G.add_edges_from([(1,2),(1,3)]) G.add_edges_from(H.edges()) print G.edges() G = nx.DiGraph(day="Friday") print G.graph G.add_node(1, time='5pm') G.add_nodes_from([3], time='2pm') print G.node[1] G.node[1]['room'] = 714 del G.node[1]['room'] # remove attribute print G.nodes(data=True) G.add_edge(1, 2, weight=4.7 ) G.add_edges_from([(3,4),(4,5)], color='red') G.add_edges_from([(1,2,{'color':'blue'}), (2,3,{'weight':8})]) G[1][2]['weight'] = 4.7 G.edge[1][2]['weight'] = 4 print G.edges(data=True) 1 in G # check if node in graph [n for n in G if n<3] # iterate through nodes len(G) # number of nodes in graph print G[1] # adjacency dict keyed by neighbor to edge attributes ... # Note: you should not change this dict manually! for n,nbrsdict in G.adjacency_iter(): for nbr,eattr in nbrsdict.items(): if 'weight' in eattr: print (n,nbr,eattr['weight']) [ (u,v,edata['weight']) for u,v,edata in G.edges(data=True) if 'weight' in edata ] Ggml = nx.read_gml('polblogs.gml') print len(Ggml.nodes()) print len(Ggml.edges()) with sns.axes_style('white'): fig = plt.subplots(1, figsize=(12,8)) nx.draw_networkx(Ggml, edge_color='#a4a4a4', node_size=50, with_labels=False, arrows=False) plt.axis('off') with open('football.txt', 'r') as f: football = nx.read_edgelist(f, comments='#', nodetype=int, data=False) print len(football.nodes()) print len(football.edges()) with sns.axes_style('white'): fig = plt.subplots(1, figsize=(12,8)) nx.draw_networkx(football, edge_color='#a4a4a4', node_size=50, with_labels=False) plt.axis('off') kn=nx.karate_club_graph() num_nodes = kn.number_of_nodes() print 'number of nodes: ' + str(num_nodes) num_edges = kn.number_of_edges() print 'number of edges: ' + str(num_edges) with sns.axes_style('white'): fig = plt.subplots(1, figsize=(12,8)) nx.draw_networkx(kn, edge_color='#a4a4a4', with_labels=True, font_color='#cacaca') plt.axis('off') fl = nx.florentine_families_graph() num_nodes = fl.number_of_nodes() print 'number of nodes: ' + str(num_nodes) num_edges = fl.number_of_edges() print 'number of edges: ' + str(num_edges) with sns.axes_style('white'): fig = plt.subplots(1, figsize=(12,8)) nx.draw_networkx(fl, edge_color='#a4a4a4', node_size=0, with_labels=True) plt.axis('off') er=nx.erdos_renyi_graph(1000,0.15) print type(er) print "Number of nodes in the random graph: ", er.number_of_nodes() print "Number of edges in the random graph: ", er.number_of_edges() with sns.axes_style('white'): fig = plt.subplots(1, figsize=(12,8)) nx.draw_networkx(er, node_size=15, edge_color='#a4a4a4', with_labels=False, alpha=.4, linewidths=0) plt.axis('off') degree_sequence=sorted(nx.degree(er).values(),reverse=True) dmax=max(degree_sequence) print dmax h,bins,patches = plt.hist(degree_sequence,bins=dmax) hmax=max(h) plt.axis([1,dmax,1,hmax]) # set ranges #x=compress(h,bins) # remove bins with zero entries #y=compress(h,h) # remove corresponding entries x=bins.compress(h) y=h.compress(h) plt.plot(x,y,'bo') plt.title("Degree distribution") plt.xlabel("degree") plt.ylabel("number of nodes") plt.show() cc= nx.connected_components(er) print type(cc) print [len(s) for s in cc] def print_cc_sizes(g): cc = nx.connected_components(g) print [len(s) for s in cc] ccall = nx.clustering(er) clustering_coefficient = nx.average_clustering(er) print clustering_coefficient h,bins,patches = plt.hist(list(nx.clustering(er).values())) plt.title('clustering coefficients') print(nx.triangles(er,0)) #print(nx.triangles(er)) h,bins, patches = plt.hist(list(nx.triangles(er).values())) plt.title('Triangles') print(nx.diameter(er)) print(nx.average_shortest_path_length(er)) ws=nx.watts_strogatz_graph(500,5,0.1) print_cc_sizes(ws) degree_sequence=sorted(nx.degree(ws).values(),reverse=True) dmax=max(degree_sequence) print dmax h,bins,patches = plt.hist(degree_sequence,bins=dmax) hmax=max(h) plt.axis([1,dmax,1,hmax]) # set ranges #x=compress(h,bins) # remove bins with zero entries #y=compress(h,h) # remove corresponding entries x=bins.compress(h) y=h.compress(h) plt.plot(x,y,'bo') plt.title("Degree distribution") plt.xlabel("degree") plt.ylabel("number of nodes") plt.show() h,bins,patches = plt.hist(list(nx.clustering(ws).values())) plt.title('clustering coefficients') print 'Diameter:', (nx.diameter(ws)) print 'Average shortest path length:', (nx.average_shortest_path_length(ws)) print 'Average clustering coefficient:', (nx.average_clustering(ws)) r = range(4,7) d = np.zeros(len(r)) cc = np.zeros(len(r)) pl = np.zeros(len(r)) index = 0 for i in r: ws=nx.watts_strogatz_graph(500,i,0.1) d[index] = nx.diameter(ws) cc[index] = nx.average_clustering(ws) pl[index] = nx.average_shortest_path_length(ws) index=+1 plt.plot(r,d,'r') plt.plot(r,cc,'b') #plt.plot(r,pl,'g'); ba=nx.barabasi_albert_graph(500,5) print_cc_sizes(ba) degree_sequence=sorted(nx.degree(ba).values(),reverse=True) dmax=max(degree_sequence) print dmax h,bins,patches = plt.hist(degree_sequence,bins=dmax) hmax=max(h) plt.axis([1,dmax,1,hmax]) # set ranges #x=compress(h,bins) # remove bins with zero entries #y=compress(h,h) # remove corresponding entries x=bins.compress(h) y=h.compress(h) plt.loglog(x,y,'bo') plt.title("Degree distribution") plt.xlabel("degree") plt.ylabel("number of nodes") plt.show() # Code for setting the style of the notebook from IPython.core.display import HTML def css_styling(): styles = open("../theme/custom.css", "r").read() return HTML(styles) css_styling()