import plotly.plotly as py # signing in with your credentials file import plotly.tools as tls from plotly.graph_objs import Figure, Data, Layout from plotly.graph_objs import Scatter from plotly.graph_objs import Marker, Font from plotly.graph_objs import XAxis, YAxis, Annotation, Annotations import numpy as np import pandas as pd import urllib2 # The datasets' url. Thanks Jennifer Bryan! url_csv = 'http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/\ gapminder/data/gapminderDataFiveYear.txt' file_csv = urllib2.urlopen(url_csv) # import csv file into this session df = pd.read_csv(file_csv, sep='\t') # load csv file into a dataframe df.head() # print the first 5 lines of the dataframe # Start year, end year and number of year is dataset df['year'].min(), df['year'].max(), len(df['year'].unique()) countries = df['country'].unique() # list of countries N_country = len(countries) # number of countries N_rowcol = int(np.ceil(np.sqrt(N_country))) # size of the square subplot grid N_country, N_rowcol # print to screen # Generate Figure object with 144 axes (12 rows x 12 columns), fig = tls.get_subplots( rows= N_rowcol, # number of rows columns= N_rowcol, # number of columns horizontal_spacing= 0.02, # horiz. spacing (norm. coord) vertical_spacing= 0.02, # vert. spacing (norm. coord) print_grid=True) # print axis grid ids to screen # Function to make list of subplot indices def get_splts(N_rowcol, N_country): N_splt = N_rowcol**2 # number of subplots N_empty = N_splt-N_country # number of empty subplots tmp1d = np.arange(1,N_splt+1) # => [1,2,..,N_splt] tmp2d = np.resize(tmp1d, (N_rowcol,N_rowcol)) # => [[1,2,..,N_rowcol],..[..,N_splt]] tmp2d_flip = tmp2d[::-1,:] # => [[..,N_spl],..[1,2,..,N_rowcol]] splts_left = tmp2d_flip[:,0] # indices of the left-hand side subplots splts_bottom = tmp2d_flip[-1,:] # indices of the bottom subplots tmp1d_in_order = tmp2d_flip.flatten().tolist() # => [..,N_spl,..,1,2,..N_rowcol] splts_empty = range(N_rowcol-N_empty+1,N_rowcol+1) # indices of empty subplots for splt in splts_empty: tmp1d_in_order.remove(splt) # remove indices of empty subplots splts = tmp1d_in_order # and get the complete list of subplots return splts, splts_empty, splts_left, splts_bottom # Get lists of subplot indices splts, splts_empty, splts_left, splts_bottom = get_splts(N_rowcol, N_country) splts # print list # Function to make Scatter graph object def make_Scatter_gdp(splt, x, y, color, country, text): return Scatter( x= x, # x coordinates y= y, # y coordinates name= country, # label name (on hover) text = text, # hover text mode='lines+markers', # show marker pts and line beween them fill= 'tozeroy', # fill area down to y=0 marker= Marker(color= color), # marker, line and fill color xaxis= 'x{}'.format(splt), # bind coordinate to given x-axis yaxis= 'y{}'.format(splt)) # bind coordinate to given y-axis # Colors corresponding to contients colors = dict( Asia='#1f77b4', Europe='#ff7f0e', Africa='#2ca02c', Americas='#d62728', Oceania='#9467bd') # Function to make hover text list (for each data point) def make_text(X): return 'Continent: %s\
Year: %s\
GDP per capita: %s $\
Life Expectancy: %s years\
Population: %s million'\ % (X['continent'], X['year'], X['gdpPercap'], X['lifeExp'], X['pop']/1e6) df['gdpPercap'].min(), df['gdpPercap'].max() # For all x axes axis_style_x = dict( range = [1950,2010]) # Set x-axis range # For all y axes axis_style_y = dict( type='log', # N.B. log y-axis range = [np.log10(90),np.log10(5e5)]) # N.B. set y-axis range w.r.t. log scale # For all axes axis_style_all = dict( ticks='outside', # no ticks showline=True, # show axis bounding line showgrid=False, # remove grid zeroline=False, # no thick line at x=y=0 showticklabels=False) # remove tick labels # For y axes on the left hand side of the subplot grid axis_style_left = dict( showticklabels=True, # N.B. add back tick labels (overwrite axis_style_all) title='GDP per cap.') # title of the y axes # For x axes on the bottom of the subplot grid axis_style_bottom = dict( showticklabels=True, # N.B. add back tick labels (overwrite axis_style_all) title='year') # title of the x axes # Function to make annotation labelling each classifier (at top of each column) def make_splt_anno(splt_in, country): if len(country)>14: country = country[0:14]+'.' # truncate country's name if too long return Annotation( x= 1955, # x position y= np.log10(2.5e5), # y position text= country, # text align='center', # align text in the center font= Font(size=14), # font size showarrow=False, # no arrow xref= 'x{}'.format(splt), # position in relation to the x yref= 'y{}'.format(splt)) # and y axes width = 2000 # plot's width height = 1800 # and height in pixels title = "GDP per Capita from 1952 to 2007 in USD of the year 2000 [GapMinder]" fig['layout'].update( title= title, # plot's title font= Font( family='Georgia, serif', # global font, color='#635F5D'), # same as in 3.1 titlefont= Font(size=30), # increase title font size showlegend=False, # remove legend autosize=False, # turn off autosize width= width, # plot's width height= height) # plot's height fig['layout']['annotations'] = Annotations([]) # init. 'annotations' key i = 0 # init. subplot counter # Group dataframe by country in alphabetical order and loop for country, X in df.groupby('country'): splt = splts[i] # N.B. get axes id x = X['year'].values # get years y = X['gdpPercap'].values # get GDP values color = colors[X['continent'].values[0]] # get fill color text = X.apply(make_text,axis=1).tolist() # get hover text # Append data object fig['data'] += [make_Scatter_gdp(splt, x, y, color, country, text)] # Make shortcut to xaxis of splt id, update its style xaxis_splt = fig['layout']['xaxis{}'.format(splt)] xaxis_splt.update(axis_style_x) xaxis_splt.update(axis_style_all) if splt in splts_bottom: xaxis_splt.update(axis_style_bottom) # Make shortcut to yaxis of splt id, update its style yaxis_splt = fig['layout']['yaxis{}'.format(splt)] yaxis_splt.update(axis_style_y) yaxis_splt.update(axis_style_all) if splt in splts_left: yaxis_splt.update(axis_style_left) # Append annotations object, label each subplot fig['layout']['annotations'] += [make_splt_anno(splt, country)] i += 1 # increment counter for splt in splts_empty: # loop through list of empty subplot ids # Make shortcut to xaxis of splt id, update its style xaxis_splt = fig['layout']['xaxis{}'.format(splt)] xaxis_splt.update(axis_style_x) xaxis_splt.update(axis_style_all) xaxis_splt.update(axis_style_bottom) # empty subplots are on the bottom row # Make shortcut to yaxis of splt id, update its style yaxis_splt = fig['layout']['yaxis{}'.format(splt)] yaxis_splt.update(axis_style_y) yaxis_splt.update(axis_style_all) py.plot(fig, filename='small-multiple_gdp-time', auto_open=False) tls.embed('etpinard','311', width=width, height=height) py.image.save_as(fig, 'small-multiple_gdp-time') from IPython.display import Image Image('small-multiple_gdp-time.png') # CSS styling within IPython notebook from IPython.core.display import HTML import urllib2 def css_styling(): url = 'https://raw.githubusercontent.com/plotly/python-user-guide/master/custom.css' styles = urllib2.urlopen(url).read() return HTML(styles) css_styling()