Goals
%pylab --no-import-all inline
Populating the interactive namespace from numpy and matplotlib
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
# plot different types of graphs to show distribution of populations
# Chap 8 of PfDA
# http://my.safaribooksonline.com/book/programming/python/9781449323592/8dot-plotting-and-visualization/id2802076
# hello world of mpl
plt.plot(np.arange(10))
[<matplotlib.lines.Line2D at 0x10e6f6e50>]
fig = plt.figure()
<matplotlib.figure.Figure at 0x10c67a410>
# set up a 2x2 grid of subplots and instantiate 3 of them
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
fig
PfDA:
When you issue a plotting command like plt.plot([1.5, 3.5, -2, 1.6])
, matplotlib draws on the last figure and subplot used (creating one if necessary), thus hiding the figure and subplot creation
plt.plot([1.5, 3.5, -2, 1.6])
[<matplotlib.lines.Line2D at 0x107323ed0>]
import math
options = [None, 'k--', 'ro', 'g+']
fig = plt.figure()
# let's plot subplot 2 columns wide
# try different options
num_row = math.ceil(len(options)/2.0)
for (i, option) in enumerate(options):
ax = fig.add_subplot(num_row,2, i+1)
if option is not None:
ax.plot([1.5, 3.5, -2, 1.6], option)
else:
ax.plot([1.5, 3.5, -2, 1.6])
fig.show()
from numpy.random import randn
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
ax1.hist(randn(100), bins=20, color='k', alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * randn(30))
ax3.plot(randn(50).cumsum(), 'k--')
fig.show()
#http://matplotlib.org/examples/api/barchart_demo.html
import numpy as np
import matplotlib.pyplot as plt
N = 5
menMeans = (20, 35, 30, 35, 27)
menStd = (2, 3, 4, 1, 2)
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
fig, ax = plt.subplots()
rects1 = ax.bar(ind, menMeans, width, color='r', yerr=menStd)
womenMeans = (25, 32, 34, 20, 25)
womenStd = (3, 5, 2, 3, 3)
rects2 = ax.bar(ind+width, womenMeans, width, color='y', yerr=womenStd)
# add some
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(ind+width)
ax.set_xticklabels( ('G1', 'G2', 'G3', 'G4', 'G5') )
ax.legend( (rects1[0], rects2[0]), ('Men', 'Women') )
def autolabel(rects):
# attach some text labels
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height),
ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
plt.show()
going back to our calculation from Day_01_B_World_Population.ipynb
# https://gist.github.com/rdhyee/8511607/raw/f16257434352916574473e63612fcea55a0c1b1c/population_of_countries.json
# scraping of https://en.wikipedia.org/w/index.php?title=List_of_countries_by_population_(United_Nations)&oldid=590438477
# read population in
import json
import requests
pop_json_url = "https://gist.github.com/rdhyee/8511607/raw/f16257434352916574473e63612fcea55a0c1b1c/population_of_countries.json"
pop_list= requests.get(pop_json_url).json()
pop_list
[[1, u'China', 1385566537], [2, u'India', 1252139596], [3, u'United States', 320050716], [4, u'Indonesia', 249865631], [5, u'Brazil', 200361925], [6, u'Pakistan', 182142594], [7, u'Nigeria', 173615345], [8, u'Bangladesh', 156594962], [9, u'Russia', 142833689], [10, u'Japan', 127143577], [11, u'Mexico', 122332399], [12, u'Philippines', 98393574], [13, u'Ethiopia', 94100756], [14, u'Vietnam', 91679733], [15, u'Germany', 82726626], [16, u'Egypt', 82056378], [17, u'Iran', 77447168], [18, u'Turkey', 74932641], [19, u'Congo, Democratic Republic of the', 67513677], [20, u'Thailand', 67010502], [21, u'France', 64291280], [22, u'United Kingdom', 63136265], [23, u'Italy', 60990277], [24, u'Myanmar', 53259018], [25, u'South Africa', 52776130], [26, u'Korea, South', 49262698], [27, u'Tanzania', 49253126], [28, u'Colombia', 48321405], [29, u'Spain', 46926963], [30, u'Ukraine', 45238805], [31, u'Kenya', 44353691], [32, u'Argentina', 41446246], [33, u'Algeria', 39208194], [34, u'Poland', 38216635], [35, u'Sudan', 37964306], [36, u'Uganda', 37578876], [37, u'Canada', 35181704], [38, u'Iraq', 33765232], [39, u'Morocco', 33008150], [40, u'Afghanistan', 30551674], [41, u'Venezuela', 30405207], [42, u'Peru', 30375603], [43, u'Malaysia', 29716965], [44, u'Uzbekistan', 28934102], [45, u'Saudi Arabia', 28828870], [46, u'Nepal', 27797457], [47, u'Ghana', 25904598], [48, u'Mozambique', 25833752], [49, u'Korea, North', 24895480], [50, u'Yemen', 24407381], [51, u'Australia', 23342553], [52, u'Taiwan', 23329772], [53, u'Madagascar', 22924851], [54, u'Cameroon', 22253959], [55, u'Syria', 21898061], [56, u'Romania', 21698585], [57, u'Angola', 21471618], [58, u'Sri Lanka', 21273228], [59, u"C\xf4te d'Ivoire", 20316086], [60, u'Niger', 17831270], [61, u'Chile', 17619708], [62, u'Burkina Faso', 16934839], [63, u'Netherlands', 16759229], [64, u'Kazakhstan', 16440586], [65, u'Malawi', 16362567], [66, u'Ecuador', 15737878], [67, u'Guatemala', 15468203], [68, u'Mali', 15301650], [69, u'Cambodia', 15135169], [70, u'Zambia', 14538640], [71, u'Zimbabwe', 14149648], [72, u'Senegal', 14133280], [73, u'Chad', 12825314], [74, u'Rwanda', 11776522], [75, u'Guinea', 11745189], [76, u'South Sudan', 11296173], [77, u'Cuba', 11265629], [78, u'Greece', 11127990], [79, u'Belgium', 11104476], [80, u'Tunisia', 10996515], [81, u'Czech Republic', 10702197], [82, u'Bolivia', 10671200], [83, u'Portugal', 10608156], [84, u'Somalia', 10495583], [85, u'Dominican Republic', 10403761], [86, u'Benin', 10323474], [87, u'Haiti', 10317461], [88, u'Burundi', 10162532], [89, u'Hungary', 9954941], [90, u'Sweden', 9571105], [91, u'Serbia; Kosovo', 9510506], [92, u'Azerbaijan', 9413420], [93, u'Belarus', 9356678], [94, u'United Arab Emirates', 9346129], [95, u'Austria', 8495145], [96, u'Tajikistan', 8207834], [97, u'Honduras', 8097688], [98, u'Switzerland', 8077833], [99, u'Israel', 7733144], [100, u'Papua New Guinea', 7321262], [101, u'Jordan', 7273799], [102, u'Bulgaria', 7222943], [None, u'Hong Kong', 7203836], [103, u'Togo', 6816982], [104, u'Paraguay', 6802295], [105, u'Laos', 6769727], [106, u'El Salvador', 6340454], [107, u'Eritrea', 6333135], [108, u'Libya', 6201521], [109, u'Sierra Leone', 6092075], [110, u'Nicaragua', 6080478], [111, u'Denmark', 5619096], [112, u'Kyrgyzstan', 5547548], [113, u'Slovakia', 5450223], [114, u'Finland', 5426323], [115, u'Singapore', 5411737], [116, u'Turkmenistan', 5240072], [117, u'Norway', 5042671], [118, u'Costa Rica', 4872166], [119, u'Lebanon', 4821971], [120, u'Ireland', 4627173], [121, u'Central African Republic', 4616417], [122, u'New Zealand', 4505761], [123, u'Congo, Republic of the', 4447632], [124, u'Georgia', 4340895], [125, u'Palestine', 4326295], [126, u'Liberia', 4294077], [127, u'Croatia', 4289714], [128, u'Mauritania', 3889880], [129, u'Panama', 3864170], [130, u'Bosnia and Herzegovina', 3829307], [None, u'Puerto Rico', 3688318], [131, u'Oman', 3632444], [132, u'Moldova', 3487204], [133, u'Uruguay', 3407062], [134, u'Kuwait', 3368572], [135, u'Albania', 3173271], [136, u'Lithuania', 3016933], [137, u'Armenia', 2976566], [138, u'Mongolia', 2839073], [139, u'Jamaica', 2783888], [140, u'Namibia', 2303315], [141, u'Qatar', 2168673], [142, u'Macedonia', 2107158], [143, u'Lesotho', 2074465], [144, u'Slovenia', 2071997], [145, u'Latvia', 2050317], [146, u'Botswana', 2021144], [147, u'Gambia', 1849285], [148, u'Guinea-Bissau', 1704255], [149, u'Gabon', 1671711], [150, u'Trinidad and Tobago', 1341151], [151, u'Bahrain', 1332171], [152, u'Estonia', 1287251], [153, u'Swaziland', 1249514], [154, u'Mauritius', 1244403], [155, u'Cyprus', 1141166], [156, u'Timor-Leste', 1132879], [157, u'Fiji', 881065], [None, u'R\xe9union', 875375], [158, u'Djibouti', 872932], [159, u'Guyana', 799613], [160, u'Equatorial Guinea', 757014], [161, u'Bhutan', 753947], [162, u'Comoros', 734917], [163, u'Montenegro', 621383], [None, u'Western Sahara', 567315], [None, u'Macau', 566375], [164, u'Solomon Islands', 561231], [165, u'Suriname', 539276], [166, u'Luxembourg', 530380], [167, u'Cape Verde', 498897], [None, u'Guadeloupe', 465800], [168, u'Malta', 429004], [169, u'Brunei', 417784], [None, u'Martinique', 403682], [170, u'Bahamas', 377374], [171, u'Maldives', 345023], [172, u'Belize', 331900], [173, u'Iceland', 329535], [174, u'Barbados', 284644], [None, u'French Polynesia', 276831], [None, u'New Caledonia', 256496], [175, u'Vanuatu', 252763], [None, u'French Guiana', 249227], [None, u'Mayotte', 222152], [176, u'S\xe3o Tom\xe9 and Pr\xedncipe', 192993], [177, u'Samoa', 190372], [178, u'Saint Lucia', 182273], [None, u'Guam', 165124], [None, u'Guernsey; Jersey', 162018], [None, u'Cura\xe7ao', 158760], [179, u'Saint Vincent and the Grenadines', 109373], [None, u'Virgin Islands, United States', 106627], [180, u'Grenada', 105897], [181, u'Tonga', 105323], [182, u'Micronesia, Federated States of', 103549], [None, u'Aruba', 102911], [183, u'Kiribati', 102351], [184, u'Seychelles', 92838], [185, u'Antigua and Barbuda', 89985], [None, u'Isle of Man', 85888], [186, u'Andorra', 79218], [187, u'Dominica', 72003], [None, u'Bermuda', 65341], [None, u'Cayman Islands', 58435], [None, u'Greenland', 56987], [None, u'American Samoa', 55165], [188, u'Saint Kitts and Nevis', 54191], [None, u'Northern Mariana Islands', 53855], [189, u'Marshall Islands', 52634], [None, u'Faroe Islands', 49469], [None, u'Sint Maarten', 45233], [190, u'Monaco', 37831], [191, u'Liechtenstein', 36925], [None, u'Turks and Caicos Islands', 33098], [192, u'San Marino', 31448], [None, u'Gibraltar', 29310], [None, u'Virgin Islands, British', 28341], [193, u'Palau', 20918], [None, u'Cook Islands', 20629], [None, u'Caribbean Netherlands', 19130], [None, u'Anguilla', 14300], [None, u'Wallis and Futuna', 13272], [194, u'Nauru', 10051], [195, u'Tuvalu', 9876], [None, u'Saint Pierre and Miquelon', 6043], [None, u'Montserrat', 5091], [None, u'Saint Helena, Ascension and Tristan da Cunha', 4129], [None, u'Falkland Islands', 3044], [None, u'Niue', 1344], [None, u'Tokelau', 1195], [196, u'Vatican City', 799]]
country_num = range(len(pop_list))
country_names = [c[1] for c in pop_list]
country_pops = [int(c[2]) for c in pop_list]
plt.plot(country_pops)
[<matplotlib.lines.Line2D at 0x10e9d9850>]
from itertools import izip, islice
sampled_country_tuples = list(islice(izip(country_num, country_names),0,None,10))
sampled_i = [s[0] for s in sampled_country_tuples]
sampled_countries = [s[1] for s in sampled_country_tuples]
# bar charts
# can find barh: http://matplotlib.org/examples/lines_bars_and_markers/barh_demo.html
# http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.bar
plt.bar(left=map(lambda x: x, range(len(country_pops))),height=country_pops, width=2, log='x')
#plt.xticks(range(len(country_pops)), country_names, rotation='vertical')
plt.xticks(sampled_i, sampled_countries, rotation='vertical')
plt.ylabel('Population')
plt.show()
# what if we make a DataFrame from pop_list
df = DataFrame(pop_list)
plt.plot(df[2])
[<matplotlib.lines.Line2D at 0x10ef14b10>]
plt.plot(df[2].cumsum())
[<matplotlib.lines.Line2D at 0x10ea9e110>]