import pandas as pd import networkx as nx df = pd.read_excel("exposures/WEBSTATS_CIBL_1405690253612.xlsx", "Sheet1", skiprows=15) import re df = df[[col for col in df.columns.values if not re.match('\d\d\d\d-Q\d\.\d', col)]] df.head() def clean_foreign_claims(foreign_claims): foreign_claims.loc[:,'Counterparty location'] = foreign_claims.loc[:,'Counterparty location'].fillna(method='ffill') foreign_claim_cp_blacklist = ['3P:All countries'] foreign_claim_rpt_blacklist = [u'5A:All reporting countries', u'5E:European reporting countries', u'5G:Non-European countries'] df1 = foreign_claims.ix[~foreign_claims.loc[:, 'Counterparty location'].isin(foreign_claim_cp_blacklist)] df2 = df1.ix[~df1.loc[:, 'Reporting country'].isin(foreign_claim_rpt_blacklist)] return df2 foreign_claims = clean_foreign_claims(df) foreign_claims.head() g = nx.DiGraph() for i, r in foreign_claims.iterrows(): g.add_edge(r['Reporting country'], r['Counterparty location'], attr_dict={'weight': float(r['2013-Q4'])}) %matplotlib inline nx.draw_graphviz(g) #, pos=nx.spring_layout(g) ) # nx.draw_circular(g) nx.write_gexf(g, "2013q4.gexf")