import plotly.plotly as py # signing in with your credentials file
import plotly.tools as tls
from plotly.graph_objs import Figure, Data, Layout
from plotly.graph_objs import Scatter
from plotly.graph_objs import Marker, Font
from plotly.graph_objs import XAxis, YAxis, Annotation, Annotations
import numpy as np
import pandas as pd
import urllib2
# The datasets' url. Thanks Jennifer Bryan!
url_csv = 'http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/\
gapminder/data/gapminderDataFiveYear.txt'
file_csv = urllib2.urlopen(url_csv) # import csv file into this session
df = pd.read_csv(file_csv, sep='\t') # load csv file into a dataframe
df.head() # print the first 5 lines of the dataframe
# Start year, end year and number of year is dataset
df['year'].min(), df['year'].max(), len(df['year'].unique())
countries = df['country'].unique() # list of countries
N_country = len(countries) # number of countries
N_rowcol = int(np.ceil(np.sqrt(N_country))) # size of the square subplot grid
N_country, N_rowcol # print to screen
# Generate Figure object with 144 axes (12 rows x 12 columns),
fig = tls.get_subplots(
rows= N_rowcol, # number of rows
columns= N_rowcol, # number of columns
horizontal_spacing= 0.02, # horiz. spacing (norm. coord)
vertical_spacing= 0.02, # vert. spacing (norm. coord)
print_grid=True) # print axis grid ids to screen
# Function to make list of subplot indices
def get_splts(N_rowcol, N_country):
N_splt = N_rowcol**2 # number of subplots
N_empty = N_splt-N_country # number of empty subplots
tmp1d = np.arange(1,N_splt+1) # => [1,2,..,N_splt]
tmp2d = np.resize(tmp1d, (N_rowcol,N_rowcol)) # => [[1,2,..,N_rowcol],..[..,N_splt]]
tmp2d_flip = tmp2d[::-1,:] # => [[..,N_spl],..[1,2,..,N_rowcol]]
splts_left = tmp2d_flip[:,0] # indices of the left-hand side subplots
splts_bottom = tmp2d_flip[-1,:] # indices of the bottom subplots
tmp1d_in_order = tmp2d_flip.flatten().tolist() # => [..,N_spl,..,1,2,..N_rowcol]
splts_empty = range(N_rowcol-N_empty+1,N_rowcol+1) # indices of empty subplots
for splt in splts_empty:
tmp1d_in_order.remove(splt) # remove indices of empty subplots
splts = tmp1d_in_order # and get the complete list of subplots
return splts, splts_empty, splts_left, splts_bottom
# Get lists of subplot indices
splts, splts_empty, splts_left, splts_bottom = get_splts(N_rowcol, N_country)
splts # print list
# Function to make Scatter graph object
def make_Scatter_gdp(splt, x, y, color, country, text):
return Scatter(
x= x, # x coordinates
y= y, # y coordinates
name= country, # label name (on hover)
text = text, # hover text
mode='lines+markers', # show marker pts and line beween them
fill= 'tozeroy', # fill area down to y=0
marker= Marker(color= color), # marker, line and fill color
xaxis= 'x{}'.format(splt), # bind coordinate to given x-axis
yaxis= 'y{}'.format(splt)) # bind coordinate to given y-axis
# Colors corresponding to contients
colors = dict(
Asia='#1f77b4',
Europe='#ff7f0e',
Africa='#2ca02c',
Americas='#d62728',
Oceania='#9467bd')
# Function to make hover text list (for each data point)
def make_text(X):
return 'Continent: %s\
Year: %s\
GDP per capita: %s $\
Life Expectancy: %s years\
Population: %s million'\
% (X['continent'], X['year'], X['gdpPercap'], X['lifeExp'], X['pop']/1e6)
df['gdpPercap'].min(), df['gdpPercap'].max()
# For all x axes
axis_style_x = dict(
range = [1950,2010]) # Set x-axis range
# For all y axes
axis_style_y = dict(
type='log', # N.B. log y-axis
range = [np.log10(90),np.log10(5e5)]) # N.B. set y-axis range w.r.t. log scale
# For all axes
axis_style_all = dict(
ticks='outside', # no ticks
showline=True, # show axis bounding line
showgrid=False, # remove grid
zeroline=False, # no thick line at x=y=0
showticklabels=False) # remove tick labels
# For y axes on the left hand side of the subplot grid
axis_style_left = dict(
showticklabels=True, # N.B. add back tick labels (overwrite axis_style_all)
title='GDP per cap.') # title of the y axes
# For x axes on the bottom of the subplot grid
axis_style_bottom = dict(
showticklabels=True, # N.B. add back tick labels (overwrite axis_style_all)
title='year') # title of the x axes
# Function to make annotation labelling each classifier (at top of each column)
def make_splt_anno(splt_in, country):
if len(country)>14:
country = country[0:14]+'.' # truncate country's name if too long
return Annotation(
x= 1955, # x position
y= np.log10(2.5e5), # y position
text= country, # text
align='center', # align text in the center
font= Font(size=14), # font size
showarrow=False, # no arrow
xref= 'x{}'.format(splt), # position in relation to the x
yref= 'y{}'.format(splt)) # and y axes
width = 2000 # plot's width
height = 1800 # and height in pixels
title = "GDP per Capita from 1952 to 2007 in USD of the year 2000 [GapMinder]"
fig['layout'].update(
title= title, # plot's title
font= Font(
family='Georgia, serif', # global font,
color='#635F5D'), # same as in 3.1
titlefont= Font(size=30), # increase title font size
showlegend=False, # remove legend
autosize=False, # turn off autosize
width= width, # plot's width
height= height) # plot's height
fig['layout']['annotations'] = Annotations([]) # init. 'annotations' key
i = 0 # init. subplot counter
# Group dataframe by country in alphabetical order and loop
for country, X in df.groupby('country'):
splt = splts[i] # N.B. get axes id
x = X['year'].values # get years
y = X['gdpPercap'].values # get GDP values
color = colors[X['continent'].values[0]] # get fill color
text = X.apply(make_text,axis=1).tolist() # get hover text
# Append data object
fig['data'] += [make_Scatter_gdp(splt, x, y, color, country, text)]
# Make shortcut to xaxis of splt id, update its style
xaxis_splt = fig['layout']['xaxis{}'.format(splt)]
xaxis_splt.update(axis_style_x)
xaxis_splt.update(axis_style_all)
if splt in splts_bottom:
xaxis_splt.update(axis_style_bottom)
# Make shortcut to yaxis of splt id, update its style
yaxis_splt = fig['layout']['yaxis{}'.format(splt)]
yaxis_splt.update(axis_style_y)
yaxis_splt.update(axis_style_all)
if splt in splts_left:
yaxis_splt.update(axis_style_left)
# Append annotations object, label each subplot
fig['layout']['annotations'] += [make_splt_anno(splt, country)]
i += 1 # increment counter
for splt in splts_empty: # loop through list of empty subplot ids
# Make shortcut to xaxis of splt id, update its style
xaxis_splt = fig['layout']['xaxis{}'.format(splt)]
xaxis_splt.update(axis_style_x)
xaxis_splt.update(axis_style_all)
xaxis_splt.update(axis_style_bottom) # empty subplots are on the bottom row
# Make shortcut to yaxis of splt id, update its style
yaxis_splt = fig['layout']['yaxis{}'.format(splt)]
yaxis_splt.update(axis_style_y)
yaxis_splt.update(axis_style_all)
py.plot(fig, filename='small-multiple_gdp-time', auto_open=False)
tls.embed('etpinard','311',
width=width, height=height)
py.image.save_as(fig, 'small-multiple_gdp-time')
from IPython.display import Image
Image('small-multiple_gdp-time.png')
# CSS styling within IPython notebook
from IPython.core.display import HTML
import urllib2
def css_styling():
url = 'https://raw.githubusercontent.com/plotly/python-user-guide/master/custom.css'
styles = urllib2.urlopen(url).read()
return HTML(styles)
css_styling()