Since I only know a few thing about matplotlib, I will start easy and will try to reproduce a trend chart of terms used in Google searches. The chart I will use now is the comparison of the search terms 'cat', 'dog' and 'fish' from 2012.
# Today's goal is to reproduce this (without the headlines, I'll leave that for later):
from IPython.core.display import HTML
HTML('<iframe width="500" height="330" src="//www.google.com/trends/fetchComponent?hl\75en-US\46q\75cat,+dog,+fish\46date\0751/2012+12m\46cmpt\75q\46content\0751\46cid\75TIMESERIES_GRAPH_AVERAGES_CHART\46export\0755\46w\075500\46h\075330" style="border: none;"></iframe>')
# Ready, set, go!
I will start by browsing the matplotlib gallery to find something I can start from. I don't know the minimum preamble to start a figure, so I think this is a good way to get started. From the Gallery, I found a very simple figure, so I will start from that one.
# imports, from the docs: "The pyplot interface is generally preferred for
# non-interactive plotting (i.e., scripting). The pylab interface is convenient
# for interactive calculations and plotting"
import matplotlib.pyplot as plt
%matplotlib inline
# Seems that need some space
fig = plt.figure(figsize=(5.21, 2.6), dpi=120) # got the (in x in)dpi equivalent of 500px x 300px
# The image is divided in two and the first part is roughly 1/6 of the image.
# To understand the subplots I went to http://matplotlib.org/users/gridspec.html
bars = plt.subplot2grid((1,3), (0,0), colspan=1)
plot = plt.subplot2grid((1,3), (0,1), colspan=2, axisbelow=True)
# This has the underlying structure
# Cleaning and removing lines and ticks
bars.set_yticks([])
bars.set_xlabel("Average")
bars.set_xticks([])
bars.axes.set_ylim((0, 100))
bars.spines['top'].set_visible(False)
bars.spines['left'].set_visible(False)
bars.spines['right'].set_visible(False)
plot.set_xticklabels(["Apr 2012", "Jul 2012", "Oct 2012"])
plot.set_xticks([4*4, 4*7, 4*10]) # setting the months where they begin (in weeks)
plot.set_yticklabels([])
plot.set_ylim((0, 120))
plot.spines['top'].set_visible(False)
plot.spines['left'].set_visible(False)
plot.spines['right'].set_visible(False)
# I don't know yet how many point I will have, so I will set the xticks later.
# and the same for the bars in the other subplot.
On the Google trends page they give the option to download the data as a CSV file. Which I modified to leave only the important data for this chart and now the file looks like this:
Week,cat,dog,fish
2012-01-01 - 2012-01-07,56,92,59
2012-01-08 - 2012-01-14,59,91,56
2012-01-15 - 2012-01-21,55,91,56
2012-01-22 - 2012-01-28,56,89,54
2012-01-29 - 2012-02-04,58,89,55
2012-02-05 - 2012-02-11,60,94,54
2012-02-12 - 2012-02-18,56,98,54
2012-02-19 - 2012-02-25,58,91,58
2012-02-26 - 2012-03-03,59,85,54
...
Since I need this information individually, seems like a good moment to try pandas (the Python Data Analysis Library) to import and handle this CSV file.
import pandas as pd
report = pd.read_csv('0_data.csv', na_values=' ') # na_values tells pandas, in the data, what is a null value.
# Now using the attributes 'cat', 'dog' and 'fish' I can access each column of the CSV file
report.fish.tolist()
[59, 56, 56, 54, 55, 54, 54, 58, 54, 56, 54, 55, 56, 59, 54, 55, 56, 57, 56, 57, 56, 56, 56, 55, 56, 57, 56, 59, 57, 56, 55, 55, 55, 56, 54, 57, 53, 52, 52, 52, 51, 50, 51, 47, 46, 48, 48, 49, 49, 48, 48, 54, 58]
# plot each column with their color.
plot.plot(report.cat.tolist(), color='blue', label='cat')
plot.plot(report.dog.tolist(), color='red', label='dog')
plot.plot(report.fish.tolist(), color='orange', label='fish')
[<matplotlib.lines.Line2D at 0x109e47a90>]
# and the same for the bars in the other subplot.
bars.axes.set_xlim((0, 5))
bars.bar(1, report.cat.mean(), color='blue', edgecolor='none', width=0.9, label='cat')
bars.bar(2, report.dog.mean(), color='red', edgecolor='none', width=0.92, label='dog')
bars.bar(3, report.fish.mean(), color='orange', edgecolor='none', width=0.92, label='fish')
<Container object of 1 artists>
# The legend
plot.legend(frameon=False, loc=2, fontsize='xx-small', ncol=3, mode='none')
<matplotlib.legend.Legend at 0x10a71c110>
# Almost there...
# A few adjustments to fonts and colors
plot.xaxis.set_ticks_position('none')
plot.yaxis.set_ticks_position('none')
for tk in plot.axes.get_xmajorticklabels():
plt.setp(tk, color='gray', fontsize='xx-small')
plt.setp(bars.xaxis.label, color='gray', fontsize='xx-small')
plot.grid(which='major', axis='y', linewidth=.8, linestyle='-', color='lightgray')
plot.spines['bottom'].set_color('gray')
bars.spines['bottom'].set_color('gray')
plt.subplots_adjust(wspace=-0.1)
<matplotlib.figure.Figure at 0x109d96c90>
setp
function is very useful to set properties to elements# The entire code
# imports, from the docs: "The pyplot interface is generally preferred for
# non-interactive plotting (i.e., scripting). The pylab interface is convenient
# for interactive calculations and plotting"
import matplotlib.pyplot as plt
# Seems that need some space
fig = plt.figure(figsize=(5.21, 2.6), dpi=120) # got the (in x in)dpi equivalent of 500px x 300px
# The image is divided in two and the first part is roughly 1/6 of the image.
# To understand the subplots I went to http://matplotlib.org/users/gridspec.html
bars = plt.subplot2grid((1,3), (0,0), colspan=1)
plot = plt.subplot2grid((1,3), (0,1), colspan=2, axisbelow=True)
# This has the underlying structure
# Cleaning and removing lines and ticks
bars.set_yticks([])
bars.set_xlabel("Average")
bars.set_xticks([])
bars.axes.set_ylim((0, 100))
bars.spines['top'].set_visible(False)
bars.spines['left'].set_visible(False)
bars.spines['right'].set_visible(False)
plot.set_xticklabels(["Apr 2012", "Jul 2012", "Oct 2012"])
plot.set_xticks([4*4, 4*7, 4*10]) # setting the months where they begin (in weeks)
plot.set_yticklabels([])
plot.set_ylim((0, 120))
plot.spines['top'].set_visible(False)
plot.spines['left'].set_visible(False)
plot.spines['right'].set_visible(False)
# I don't know yet how many point I will have, so I will set the xticks later.
# and the same for the bars in the other subplot.
import pandas as pd
report = pd.read_csv('0_data.csv', na_values=' ')
# plot each column with their color.
plot.plot(report.cat.tolist(), color='blue', label='cat')
plot.plot(report.dog.tolist(), color='red', label='dog')
plot.plot(report.fish.tolist(), color='orange', label='fish')
# and the same for the bars in the other subplot.
bars.axes.set_xlim((0, 5))
bars.bar(1, report.cat.mean(), color='blue', edgecolor='none', width=0.9, label='cat')
bars.bar(2, report.dog.mean(), color='red', edgecolor='none', width=0.92, label='dog')
bars.bar(3, report.fish.mean(), color='orange', edgecolor='none', width=0.92, label='fish')
# The legend
plot.legend(frameon=False, loc=2, fontsize='xx-small', ncol=3, mode='none')
# A few adjustments to fonts and colors
plot.xaxis.set_ticks_position('none')
plot.yaxis.set_ticks_position('none')
for tk in plot.axes.get_xmajorticklabels():
plt.setp(tk, color='gray', fontsize='xx-small')
plt.setp(bars.xaxis.label, color='gray', fontsize='xx-small')
plot.grid(which='major', axis='y', linewidth=.8, linestyle='-', color='lightgray')
plot.spines['bottom'].set_color('gray')
bars.spines['bottom'].set_color('gray')
plt.subplots_adjust(wspace=-0.1)
Sergiobuj