from IPython.display import Image, HTML
%load_ext load_style
%load_style talk.css
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
HTML('<iframe src=http://pandas.pydata.org/index.html width=900 height=350></iframe>')
import pandas as pd
pd.set_option("line_width", 80)
# toggle the line below that if one doesnt want DataFrames displayed as HTML tables
#pd.set_option("notebook_repr_html", False)
pd.set_option("notebook_repr_html", True)
pandas is a Python package providing fast, flexible, and expressive data structures designed to work with relational or labeled data. It is a fundamental high-level building block for doing practical, real world data analysis in Python.
pandas is well suited for:
Key features:
Pandas's data structures and functionalities will be familiar to R users, there's a section on Pandas's website where Wes McKinney gives some translation of common idioms / operations between R and Pandas
HTML('<iframe src=http://pandas.pydata.org/pandas-docs/stable/comparison_with_r.html#compare-with-r width=900 height=350></iframe>')
A Series is a single vector of data values (think a NumPy array with shape N or (N,1)) with an index that labels each element in the vector.
a = pd.Series(np.random.normal(0,1,(10,)))
a
0 0.515109 1 -0.481021 2 -0.859469 3 -0.818741 4 0.163780 5 -0.639470 6 0.762767 7 2.322911 8 1.129654 9 -1.012590 dtype: float64
b = a
b = b.mean()
b
0.10829310928343608
a
0 0.515109 1 -0.481021 2 -0.859469 3 -0.818741 4 0.163780 5 -0.639470 6 0.762767 7 2.322911 8 1.129654 9 -1.012590 dtype: float64
a.index
Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64')
a.values
array([ 0.51510939, -0.48102095, -0.85946922, -0.81874054, 0.16377954, -0.63946957, 0.76276735, 2.32291094, 1.12965396, -1.01258981])
a = pd.Series(np.random.normal(0,1,(10,)), index=np.arange(1,11))
a.index
Int64Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='int64')
a = pd.Series(np.random.normal(0,1,5), index=['a','b','c','d','e'], name='my series')
a
a 0.489256 b -1.675855 c 1.156451 d 1.851782 e 0.864339 Name: my series, dtype: float64
Pandas objects expose some powerful, high level plotting functions (built on top of Matplotlib)
plot = a.plot(kind='bar', rot=0, color='.8', title=a.name, hatch='///', edgecolor='k')
f, ax = plt.subplots()
bars = ax.bar(np.arange(len(a)), a.values, color='w', edgecolor='k', align='center', hatch='///')
ax.set_xticks(np.arange(len(a)))
ax.set_xlim(-0.5, len(a)-0.5)
ax.set_xticklabels(a.index, fontsize=16)
ax.set_title(a.name, fontsize=16)
<matplotlib.text.Text at 0x109d67c90>
Selecting from a Series is easy, using the corresponding index key (like a dict)
a['c']
1.1564513737482971
slices are permitted
a['a':'c'] ### Note the difference with standard Python / Numpy positional, integer indexing
a 0.489256 b -1.675855 c 1.156451 Name: my series, dtype: float64
a['c':]
c 1.156451 d 1.851782 e 0.864339 Name: my series, dtype: float64
deleting an element
a.drop('d')
a 0.489256 b -1.675855 c 1.156451 e 0.864339 Name: my series, dtype: float64
Adding an element is (to my knowledge) not straightforward
a.append(pd.Series({'f':5}))
a 0.489256 b -1.675855 c 1.156451 d 1.851782 e 0.864339 f 5.000000 dtype: float64
Mathematical operations involving two series will perform operations by aligning indices.
Indices that do not match are given the value NaN (not a number), and values are computed for all unique pairs of repeated indices.
s1 = pd.Series(np.arange(1.0,4.0),index=['a','b','c'])
s2 = pd.Series(np.arange(1.0,4.0),index=['b','c','d'])
s3 = s1 + s2
s3
a NaN b 3 c 5 d NaN dtype: float64
NaNs are ignored in all operations
s3.mean()
4.0
You can drop them from the Series
s4 = s3.dropna()
s4
b 3 c 5 dtype: float64
Or use the fillna
method to replace them by a value
s3.fillna(-999)
a -999 b 3 c 5 d -999 dtype: float64
s3.fillna(s3.mean())
a 4 b 3 c 5 d 4 dtype: float64
Series can have indexes representing dates / times
a
a 0.489256 b -1.675855 c 1.156451 d 1.851782 e 0.864339 Name: my series, dtype: float64
a.index = pd.date_range(start='2014-1-1', periods=len(a)) # default 'period' is daily
a.head()
2014-01-01 0.489256 2014-01-02 -1.675855 2014-01-03 1.156451 2014-01-04 1.851782 2014-01-05 0.864339 Freq: D, Name: my series, dtype: float64
a.index
<class 'pandas.tseries.index.DatetimeIndex'> [2014-01-01, ..., 2014-01-05] Length: 5, Freq: D, Timezone: None
### a datetime index in Pandas has its own type
a.index
<class 'pandas.tseries.index.DatetimeIndex'> [2014-01-01, ..., 2014-01-05] Length: 5, Freq: D, Timezone: None
### but you can convert it to an numpy array of python datetime objects if you want
py_datetimes = a.index.to_pydatetime()
py_datetimes
array([datetime.datetime(2014, 1, 1, 0, 0), datetime.datetime(2014, 1, 2, 0, 0), datetime.datetime(2014, 1, 3, 0, 0), datetime.datetime(2014, 1, 4, 0, 0), datetime.datetime(2014, 1, 5, 0, 0)], dtype=object)
And a number of useful methods for manipulation of time series is exposed
### resample daily time-series to 5 minutes 'period', using forward filling method
a.resample('5min',fill_method='ffill')
2014-01-01 00:00:00 0.489256 2014-01-01 00:05:00 0.489256 2014-01-01 00:10:00 0.489256 2014-01-01 00:15:00 0.489256 2014-01-01 00:20:00 0.489256 2014-01-01 00:25:00 0.489256 2014-01-01 00:30:00 0.489256 2014-01-01 00:35:00 0.489256 2014-01-01 00:40:00 0.489256 2014-01-01 00:45:00 0.489256 2014-01-01 00:50:00 0.489256 2014-01-01 00:55:00 0.489256 2014-01-01 01:00:00 0.489256 2014-01-01 01:05:00 0.489256 2014-01-01 01:10:00 0.489256 ... 2014-01-04 22:50:00 1.851782 2014-01-04 22:55:00 1.851782 2014-01-04 23:00:00 1.851782 2014-01-04 23:05:00 1.851782 2014-01-04 23:10:00 1.851782 2014-01-04 23:15:00 1.851782 2014-01-04 23:20:00 1.851782 2014-01-04 23:25:00 1.851782 2014-01-04 23:30:00 1.851782 2014-01-04 23:35:00 1.851782 2014-01-04 23:40:00 1.851782 2014-01-04 23:45:00 1.851782 2014-01-04 23:50:00 1.851782 2014-01-04 23:55:00 1.851782 2014-01-05 00:00:00 0.864339 Freq: 5T, Name: my series, Length: 1153
a
2014-01-01 0.489256 2014-01-02 -1.675855 2014-01-03 1.156451 2014-01-04 1.851782 2014-01-05 0.864339 Freq: D, Name: my series, dtype: float64
### the ```shift``` method makes it easy e.g. to compare series with lead / lags
a.shift(periods=-1)
2014-01-01 -1.675855 2014-01-02 1.156451 2014-01-03 1.851782 2014-01-04 0.864339 2014-01-05 NaN Freq: D, Name: my series, dtype: float64
### and the ```truncate`` method allows easy selection of time-slices
a.truncate(after='2014-1-2')
2014-01-01 0.489256 2014-01-02 -1.675855 Freq: D, Name: my series, dtype: float64
DataFrames are IMHO one of the most powerful data structures in the Python / data analysis world.
They can be viewed as a collection of Series. They feature two indexes, respectively for the rows and the columns, and can contain heteregoneous data types (although it must be consistent within each column).
Note that a DataFrame index, either along the rows or the columns (or both !) can contain more than one level, they are called hierarchical indexes and allows the representation of complex data organisation.
If the index along the rows of a DataFrame is of datetime type, all the methods exposed for the Series (re-sampling, shifting, truncating, etc) are available for the DataFrame.
import string # part of the standard library
idx = list(string.lowercase[:10])
print(idx)
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
df = pd.DataFrame(np.arange(100).reshape(10,10),columns=idx,index=np.arange(1,11))
df
a | b | c | d | e | f | g | h | i | j | |
---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
3 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 |
4 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 |
5 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 |
6 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
7 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 |
8 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 |
9 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 |
10 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 |
10 rows × 10 columns
### here I am creating a DataFrame from a dictionnary
df = pd.DataFrame({'A' : np.random.random(5), 'B' : np.random.random(5), 'C': np.random.random(5)})
print df
A B C 0 0.576212 0.497153 0.155229 1 0.338768 0.019293 0.991341 2 0.277941 0.445321 0.593195 3 0.178222 0.786506 0.855407 4 0.695456 0.457891 0.296605 [5 rows x 3 columns]
df
A | B | C | |
---|---|---|---|
0 | 0.576212 | 0.497153 | 0.155229 |
1 | 0.338768 | 0.019293 | 0.991341 |
2 | 0.277941 | 0.445321 | 0.593195 |
3 | 0.178222 | 0.786506 | 0.855407 |
4 | 0.695456 | 0.457891 | 0.296605 |
5 rows × 3 columns
df['A']
0 0.576212 1 0.338768 2 0.277941 3 0.178222 4 0.695456 Name: A, dtype: float64
To access a particular row instead of a column, you use the ix method
df.ix[3]
A 0.178222 B 0.786506 C 0.855407 Name: 3, dtype: float64
And you can combine of course row (with ix) and column indexing, using the same convention for slices as we saw for the Series
df.ix[3]['A':'B']
A 0.178222 B 0.786506 Name: 3, dtype: float64
df.ix[3][['A','C']]
A 0.178222 C 0.855407 Name: 3, dtype: float64
Adding a column is easy
df
A | B | C | |
---|---|---|---|
0 | 0.576212 | 0.497153 | 0.155229 |
1 | 0.338768 | 0.019293 | 0.991341 |
2 | 0.277941 | 0.445321 | 0.593195 |
3 | 0.178222 | 0.786506 | 0.855407 |
4 | 0.695456 | 0.457891 | 0.296605 |
5 rows × 3 columns
df['D'] = np.random.random(5)
df
A | B | C | D | |
---|---|---|---|---|
0 | 0.576212 | 0.497153 | 0.155229 | 0.286158 |
1 | 0.338768 | 0.019293 | 0.991341 | 0.196536 |
2 | 0.277941 | 0.445321 | 0.593195 | 0.382180 |
3 | 0.178222 | 0.786506 | 0.855407 | 0.954023 |
4 | 0.695456 | 0.457891 | 0.296605 | 0.993321 |
5 rows × 4 columns
The following works because Pandas understands that a single value must be repeated over the row length
df['E'] = 2.5
df
A | B | C | D | E | |
---|---|---|---|---|---|
0 | 0.576212 | 0.497153 | 0.155229 | 0.286158 | 2.5 |
1 | 0.338768 | 0.019293 | 0.991341 | 0.196536 | 2.5 |
2 | 0.277941 | 0.445321 | 0.593195 | 0.382180 | 2.5 |
3 | 0.178222 | 0.786506 | 0.855407 | 0.954023 | 2.5 |
4 | 0.695456 | 0.457891 | 0.296605 | 0.993321 | 2.5 |
5 rows × 5 columns
The following doesn't work because there's no way to tell where to insert the missing value (1st or last index ?)
df['F'] = np.random.random(4)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-69-3ee1a35781af> in <module>() ----> 1 df['F'] = np.random.random(4) /Users/nicolasfauchereau/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in __setitem__(self, key, value) 1885 else: 1886 # set column -> 1887 self._set_item(key, value) 1888 1889 def _setitem_slice(self, key, value): /Users/nicolasfauchereau/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _set_item(self, key, value) 1965 is_existing = key in self.columns 1966 self._ensure_valid_index(value) -> 1967 value = self._sanitize_column(key, value) 1968 NDFrame._set_item(self, key, value) 1969 /Users/nicolasfauchereau/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _sanitize_column(self, key, value) 2015 elif isinstance(value, Index) or _is_sequence(value): 2016 if len(value) != len(self.index): -> 2017 raise ValueError('Length of values does not match length of ' 2018 'index') 2019 ValueError: Length of values does not match length of index
Unless we make a series out of it, with a index matching at least partly the DataFrame (row) index
df['F'] = pd.Series(np.random.random(4)) #
df
A | B | C | D | E | F | |
---|---|---|---|---|---|---|
0 | 0.576212 | 0.497153 | 0.155229 | 0.286158 | 2.5 | 0.145339 |
1 | 0.338768 | 0.019293 | 0.991341 | 0.196536 | 2.5 | 0.492748 |
2 | 0.277941 | 0.445321 | 0.593195 | 0.382180 | 2.5 | 0.468712 |
3 | 0.178222 | 0.786506 | 0.855407 | 0.954023 | 2.5 | 0.943265 |
4 | 0.695456 | 0.457891 | 0.296605 | 0.993321 | 2.5 | NaN |
5 rows × 6 columns
df.apply(np.sqrt) # or np.sqrt(df)
A | B | C | D | E | F | |
---|---|---|---|---|---|---|
0 | 0.759086 | 0.705091 | 0.393991 | 0.534937 | 1.581139 | 0.381233 |
1 | 0.582037 | 0.138901 | 0.995661 | 0.443324 | 1.581139 | 0.701960 |
2 | 0.527201 | 0.667324 | 0.770191 | 0.618207 | 1.581139 | 0.684625 |
3 | 0.422164 | 0.886852 | 0.924882 | 0.976741 | 1.581139 | 0.971218 |
4 | 0.833940 | 0.676677 | 0.544614 | 0.996655 | 1.581139 | NaN |
5 rows × 6 columns
df.describe()
A | B | C | D | E | F | |
---|---|---|---|---|---|---|
count | 5.000000 | 5.000000 | 5.000000 | 5.000000 | 5.0 | 4.000000 |
mean | 0.413320 | 0.441233 | 0.578355 | 0.562444 | 2.5 | 0.512516 |
std | 0.215227 | 0.274166 | 0.355652 | 0.381349 | 0.0 | 0.327960 |
min | 0.178222 | 0.019293 | 0.155229 | 0.196536 | 2.5 | 0.145339 |
25% | 0.277941 | 0.445321 | 0.296605 | 0.286158 | 2.5 | 0.387869 |
50% | 0.338768 | 0.457891 | 0.593195 | 0.382180 | 2.5 | 0.480730 |
75% | 0.576212 | 0.497153 | 0.855407 | 0.954023 | 2.5 | 0.605377 |
max | 0.695456 | 0.786506 | 0.991341 | 0.993321 | 2.5 | 0.943265 |
8 rows × 6 columns
df.plot(ylim=[0,3]);
df[(df['A'] >= 0.5) & (df['B'] <= 0.5)]
A | B | C | D | E | F | |
---|---|---|---|---|---|---|
0 | 0.576212 | 0.497153 | 0.155229 | 0.286158 | 2.5 | 0.145339 |
4 | 0.695456 | 0.457891 | 0.296605 | 0.993321 | 2.5 | NaN |
2 rows × 6 columns
df.plot(figsize=(10,8), subplots=True, sharex=True, kind='bar', rot=0);
Pandas has very powerful IO methods, allowing to load csv, excel, tab-delimited files very easily. Pandas DataFrames can also be saved also in csv, excel files.
pd.read_
SOI = pd.read_csv('./data/NIWA_SOI.csv')
SOI.head()
Unnamed: 0 | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1876 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | 1877 | -1.044600 | -0.834198 | -0.759131 | -1.103454 | 0.349381 | -1.901213 | -1.002934 | -1.044881 | -1.839545 | -1.745311 | -1.453885 | -1.541774 |
2 | 1878 | -0.940401 | -2.339884 | -1.981073 | -1.009742 | 0.189664 | -0.448601 | 1.568691 | 1.216173 | 1.734780 | 1.002227 | 1.460825 | 1.744423 |
3 | 1879 | 1.404088 | 1.302904 | 1.257072 | 1.426758 | 0.189664 | 1.602146 | 2.147306 | 2.243925 | 1.858033 | 1.439335 | 0.905642 | -0.774995 |
4 | 1880 | 1.195689 | 0.622917 | 1.379266 | 0.583354 | 1.227823 | 0.833116 | 0.154297 | 1.353207 | 0.748760 | 0.377786 | 0.628051 | -0.391605 |
5 rows × 13 columns
SOI.tail()
Unnamed: 0 | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
134 | 2010 | -1.096700 | -1.659897 | -1.431199 | 1.707893 | 0.988248 | 0.064086 | 2.018725 | 1.832825 | 2.474296 | 1.751556 | 1.599621 | 2.730281 |
135 | 2011 | 2.185585 | 2.128602 | 2.173528 | 2.832432 | 0.189664 | -0.106810 | 1.054366 | 0.051388 | 1.118517 | 0.627563 | 1.322029 | 2.292122 |
136 | 2012 | 1.039390 | 0.088641 | 0.096228 | -0.822319 | -0.289487 | -1.217631 | -0.167156 | -0.702297 | 0.194123 | 0.128010 | 0.281061 | -0.829765 |
137 | 2013 | -0.106804 | -0.542775 | 1.012684 | 0.021085 | 0.828531 | 1.345802 | 0.797204 | -0.222680 | 0.317375 | -0.309098 | 0.836244 | -0.117755 |
138 | 2014 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 13 columns
SOI = pd.read_csv('./data/NIWA_SOI.csv', index_col=0)
SOI.head()
Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1876 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1877 | -1.044600 | -0.834198 | -0.759131 | -1.103454 | 0.349381 | -1.901213 | -1.002934 | -1.044881 | -1.839545 | -1.745311 | -1.453885 | -1.541774 |
1878 | -0.940401 | -2.339884 | -1.981073 | -1.009742 | 0.189664 | -0.448601 | 1.568691 | 1.216173 | 1.734780 | 1.002227 | 1.460825 | 1.744423 |
1879 | 1.404088 | 1.302904 | 1.257072 | 1.426758 | 0.189664 | 1.602146 | 2.147306 | 2.243925 | 1.858033 | 1.439335 | 0.905642 | -0.774995 |
1880 | 1.195689 | 0.622917 | 1.379266 | 0.583354 | 1.227823 | 0.833116 | 0.154297 | 1.353207 | 0.748760 | 0.377786 | 0.628051 | -0.391605 |
5 rows × 12 columns
SOI.index
Float64Index([1876.0, 1877.0, 1878.0, 1879.0, 1880.0, 1881.0, 1882.0, 1883.0, 1884.0, 1885.0, 1886.0, 1887.0, 1888.0, 1889.0, 1890.0, 1891.0, 1892.0, 1893.0, 1894.0, 1895.0, 1896.0, 1897.0, 1898.0, 1899.0, 1900.0, 1901.0, 1902.0, 1903.0, 1904.0, 1905.0, 1906.0, 1907.0, 1908.0, 1909.0, 1910.0, 1911.0, 1912.0, 1913.0, 1914.0, 1915.0, 1916.0, 1917.0, 1918.0, 1919.0, 1920.0, 1921.0, 1922.0, 1923.0, 1924.0, 1925.0, 1926.0, 1927.0, 1928.0, 1929.0, 1930.0, 1931.0, 1932.0, 1933.0, 1934.0, 1935.0, 1936.0, 1937.0, 1938.0, 1939.0, 1940.0, 1941.0, 1942.0, 1943.0, 1944.0, 1945.0, 1946.0, 1947.0, 1948.0, 1949.0, 1950.0, 1951.0, 1952.0, 1953.0, 1954.0, 1955.0, 1956.0, 1957.0, 1958.0, 1959.0, 1960.0, 1961.0, 1962.0, 1963.0, 1964.0, 1965.0, 1966.0, 1967.0, 1968.0, 1969.0, 1970.0, 1971.0, 1972.0, 1973.0, 1974.0, 1975.0, ...], dtype='object')
SOI = SOI.dropna()
SOI.head()
Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1877 | -1.044600 | -0.834198 | -0.759131 | -1.103454 | 0.349381 | -1.901213 | -1.002934 | -1.044881 | -1.839545 | -1.745311 | -1.453885 | -1.541774 |
1878 | -0.940401 | -2.339884 | -1.981073 | -1.009742 | 0.189664 | -0.448601 | 1.568691 | 1.216173 | 1.734780 | 1.002227 | 1.460825 | 1.744423 |
1879 | 1.404088 | 1.302904 | 1.257072 | 1.426758 | 0.189664 | 1.602146 | 2.147306 | 2.243925 | 1.858033 | 1.439335 | 0.905642 | -0.774995 |
1880 | 1.195689 | 0.622917 | 1.379266 | 0.583354 | 1.227823 | 0.833116 | 0.154297 | 1.353207 | 0.748760 | 0.377786 | 0.628051 | -0.391605 |
1881 | -0.784101 | -0.737057 | -0.025966 | 0.021085 | -0.449204 | -0.619496 | -0.552899 | -1.387465 | -1.469787 | -2.557084 | 0.628051 | 0.868104 |
5 rows × 12 columns
SOI.index
Float64Index([1877.0, 1878.0, 1879.0, 1880.0, 1881.0, 1882.0, 1883.0, 1884.0, 1885.0, 1886.0, 1887.0, 1888.0, 1889.0, 1890.0, 1891.0, 1892.0, 1893.0, 1894.0, 1895.0, 1896.0, 1897.0, 1898.0, 1899.0, 1900.0, 1901.0, 1902.0, 1903.0, 1904.0, 1905.0, 1906.0, 1907.0, 1908.0, 1909.0, 1910.0, 1911.0, 1912.0, 1913.0, 1914.0, 1915.0, 1916.0, 1917.0, 1918.0, 1919.0, 1920.0, 1921.0, 1922.0, 1923.0, 1924.0, 1925.0, 1926.0, 1927.0, 1928.0, 1929.0, 1930.0, 1931.0, 1932.0, 1933.0, 1934.0, 1935.0, 1936.0, 1937.0, 1938.0, 1939.0, 1940.0, 1941.0, 1942.0, 1943.0, 1944.0, 1945.0, 1946.0, 1947.0, 1948.0, 1949.0, 1950.0, 1951.0, 1952.0, 1953.0, 1954.0, 1955.0, 1956.0, 1957.0, 1958.0, 1959.0, 1960.0, 1961.0, 1962.0, 1963.0, 1964.0, 1965.0, 1966.0, 1967.0, 1968.0, 1969.0, 1970.0, 1971.0, 1972.0, 1973.0, 1974.0, 1975.0, 1976.0, ...], dtype='object')
SOI.index = np.array(SOI.index.to_native_types(), dtype=np.int)
SOI.index
Int64Index([1877, 1878, 1879, 1880, 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, ...], dtype='int64')
SOIs = SOI.stack()
SOIs.head()
1877 Jan -1.044600 Feb -0.834198 Mar -0.759131 Apr -1.103454 May 0.349381 dtype: float64
SOIs.index
MultiIndex(levels=[[1877, 1878, 1879, 1880, 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, ...], [u'Jan', u'Feb', u'Mar', u'Apr', u'May', u'Jun', u'Jul', u'Aug', u'Sep', u'Oct', u'Nov', u'Dec']], labels=[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, ...], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, ...]])
from dateutil import parser
dateindex = [parser.parse("-".join(map(str,[x[0],x[1], 1]))) for x in SOIs.index]
dateindex
[datetime.datetime(1877, 1, 1, 0, 0), datetime.datetime(1877, 2, 1, 0, 0), datetime.datetime(1877, 3, 1, 0, 0), datetime.datetime(1877, 4, 1, 0, 0), datetime.datetime(1877, 5, 1, 0, 0), datetime.datetime(1877, 6, 1, 0, 0), datetime.datetime(1877, 7, 1, 0, 0), datetime.datetime(1877, 8, 1, 0, 0), datetime.datetime(1877, 9, 1, 0, 0), datetime.datetime(1877, 10, 1, 0, 0), datetime.datetime(1877, 11, 1, 0, 0), datetime.datetime(1877, 12, 1, 0, 0), datetime.datetime(1878, 1, 1, 0, 0), datetime.datetime(1878, 2, 1, 0, 0), datetime.datetime(1878, 3, 1, 0, 0), datetime.datetime(1878, 4, 1, 0, 0), datetime.datetime(1878, 5, 1, 0, 0), datetime.datetime(1878, 6, 1, 0, 0), datetime.datetime(1878, 7, 1, 0, 0), datetime.datetime(1878, 8, 1, 0, 0), datetime.datetime(1878, 9, 1, 0, 0), datetime.datetime(1878, 10, 1, 0, 0), datetime.datetime(1878, 11, 1, 0, 0), datetime.datetime(1878, 12, 1, 0, 0), datetime.datetime(1879, 1, 1, 0, 0), datetime.datetime(1879, 2, 1, 0, 0), datetime.datetime(1879, 3, 1, 0, 0), datetime.datetime(1879, 4, 1, 0, 0), datetime.datetime(1879, 5, 1, 0, 0), datetime.datetime(1879, 6, 1, 0, 0), datetime.datetime(1879, 7, 1, 0, 0), datetime.datetime(1879, 8, 1, 0, 0), datetime.datetime(1879, 9, 1, 0, 0), datetime.datetime(1879, 10, 1, 0, 0), datetime.datetime(1879, 11, 1, 0, 0), datetime.datetime(1879, 12, 1, 0, 0), datetime.datetime(1880, 1, 1, 0, 0), datetime.datetime(1880, 2, 1, 0, 0), datetime.datetime(1880, 3, 1, 0, 0), datetime.datetime(1880, 4, 1, 0, 0), datetime.datetime(1880, 5, 1, 0, 0), datetime.datetime(1880, 6, 1, 0, 0), datetime.datetime(1880, 7, 1, 0, 0), datetime.datetime(1880, 8, 1, 0, 0), datetime.datetime(1880, 9, 1, 0, 0), datetime.datetime(1880, 10, 1, 0, 0), datetime.datetime(1880, 11, 1, 0, 0), datetime.datetime(1880, 12, 1, 0, 0), datetime.datetime(1881, 1, 1, 0, 0), datetime.datetime(1881, 2, 1, 0, 0), datetime.datetime(1881, 3, 1, 0, 0), datetime.datetime(1881, 4, 1, 0, 0), datetime.datetime(1881, 5, 1, 0, 0), datetime.datetime(1881, 6, 1, 0, 0), datetime.datetime(1881, 7, 1, 0, 0), datetime.datetime(1881, 8, 1, 0, 0), datetime.datetime(1881, 9, 1, 0, 0), datetime.datetime(1881, 10, 1, 0, 0), datetime.datetime(1881, 11, 1, 0, 0), datetime.datetime(1881, 12, 1, 0, 0), datetime.datetime(1882, 1, 1, 0, 0), datetime.datetime(1882, 2, 1, 0, 0), datetime.datetime(1882, 3, 1, 0, 0), datetime.datetime(1882, 4, 1, 0, 0), datetime.datetime(1882, 5, 1, 0, 0), datetime.datetime(1882, 6, 1, 0, 0), datetime.datetime(1882, 7, 1, 0, 0), datetime.datetime(1882, 8, 1, 0, 0), datetime.datetime(1882, 9, 1, 0, 0), datetime.datetime(1882, 10, 1, 0, 0), datetime.datetime(1882, 11, 1, 0, 0), datetime.datetime(1882, 12, 1, 0, 0), datetime.datetime(1883, 1, 1, 0, 0), datetime.datetime(1883, 2, 1, 0, 0), datetime.datetime(1883, 3, 1, 0, 0), datetime.datetime(1883, 4, 1, 0, 0), datetime.datetime(1883, 5, 1, 0, 0), datetime.datetime(1883, 6, 1, 0, 0), datetime.datetime(1883, 7, 1, 0, 0), datetime.datetime(1883, 8, 1, 0, 0), datetime.datetime(1883, 9, 1, 0, 0), datetime.datetime(1883, 10, 1, 0, 0), datetime.datetime(1883, 11, 1, 0, 0), datetime.datetime(1883, 12, 1, 0, 0), datetime.datetime(1884, 1, 1, 0, 0), datetime.datetime(1884, 2, 1, 0, 0), datetime.datetime(1884, 3, 1, 0, 0), datetime.datetime(1884, 4, 1, 0, 0), datetime.datetime(1884, 5, 1, 0, 0), datetime.datetime(1884, 6, 1, 0, 0), datetime.datetime(1884, 7, 1, 0, 0), datetime.datetime(1884, 8, 1, 0, 0), datetime.datetime(1884, 9, 1, 0, 0), datetime.datetime(1884, 10, 1, 0, 0), datetime.datetime(1884, 11, 1, 0, 0), datetime.datetime(1884, 12, 1, 0, 0), datetime.datetime(1885, 1, 1, 0, 0), datetime.datetime(1885, 2, 1, 0, 0), datetime.datetime(1885, 3, 1, 0, 0), datetime.datetime(1885, 4, 1, 0, 0), datetime.datetime(1885, 5, 1, 0, 0), datetime.datetime(1885, 6, 1, 0, 0), datetime.datetime(1885, 7, 1, 0, 0), datetime.datetime(1885, 8, 1, 0, 0), datetime.datetime(1885, 9, 1, 0, 0), datetime.datetime(1885, 10, 1, 0, 0), datetime.datetime(1885, 11, 1, 0, 0), datetime.datetime(1885, 12, 1, 0, 0), datetime.datetime(1886, 1, 1, 0, 0), datetime.datetime(1886, 2, 1, 0, 0), datetime.datetime(1886, 3, 1, 0, 0), datetime.datetime(1886, 4, 1, 0, 0), datetime.datetime(1886, 5, 1, 0, 0), datetime.datetime(1886, 6, 1, 0, 0), datetime.datetime(1886, 7, 1, 0, 0), datetime.datetime(1886, 8, 1, 0, 0), datetime.datetime(1886, 9, 1, 0, 0), datetime.datetime(1886, 10, 1, 0, 0), datetime.datetime(1886, 11, 1, 0, 0), datetime.datetime(1886, 12, 1, 0, 0), datetime.datetime(1887, 1, 1, 0, 0), datetime.datetime(1887, 2, 1, 0, 0), datetime.datetime(1887, 3, 1, 0, 0), datetime.datetime(1887, 4, 1, 0, 0), datetime.datetime(1887, 5, 1, 0, 0), datetime.datetime(1887, 6, 1, 0, 0), datetime.datetime(1887, 7, 1, 0, 0), datetime.datetime(1887, 8, 1, 0, 0), datetime.datetime(1887, 9, 1, 0, 0), datetime.datetime(1887, 10, 1, 0, 0), datetime.datetime(1887, 11, 1, 0, 0), datetime.datetime(1887, 12, 1, 0, 0), datetime.datetime(1888, 1, 1, 0, 0), datetime.datetime(1888, 2, 1, 0, 0), datetime.datetime(1888, 3, 1, 0, 0), datetime.datetime(1888, 4, 1, 0, 0), datetime.datetime(1888, 5, 1, 0, 0), datetime.datetime(1888, 6, 1, 0, 0), datetime.datetime(1888, 7, 1, 0, 0), datetime.datetime(1888, 8, 1, 0, 0), datetime.datetime(1888, 9, 1, 0, 0), datetime.datetime(1888, 10, 1, 0, 0), datetime.datetime(1888, 11, 1, 0, 0), datetime.datetime(1888, 12, 1, 0, 0), datetime.datetime(1889, 1, 1, 0, 0), datetime.datetime(1889, 2, 1, 0, 0), datetime.datetime(1889, 3, 1, 0, 0), datetime.datetime(1889, 4, 1, 0, 0), datetime.datetime(1889, 5, 1, 0, 0), datetime.datetime(1889, 6, 1, 0, 0), datetime.datetime(1889, 7, 1, 0, 0), datetime.datetime(1889, 8, 1, 0, 0), datetime.datetime(1889, 9, 1, 0, 0), datetime.datetime(1889, 10, 1, 0, 0), datetime.datetime(1889, 11, 1, 0, 0), datetime.datetime(1889, 12, 1, 0, 0), datetime.datetime(1890, 1, 1, 0, 0), datetime.datetime(1890, 2, 1, 0, 0), datetime.datetime(1890, 3, 1, 0, 0), datetime.datetime(1890, 4, 1, 0, 0), datetime.datetime(1890, 5, 1, 0, 0), datetime.datetime(1890, 6, 1, 0, 0), datetime.datetime(1890, 7, 1, 0, 0), datetime.datetime(1890, 8, 1, 0, 0), datetime.datetime(1890, 9, 1, 0, 0), datetime.datetime(1890, 10, 1, 0, 0), datetime.datetime(1890, 11, 1, 0, 0), datetime.datetime(1890, 12, 1, 0, 0), datetime.datetime(1891, 1, 1, 0, 0), datetime.datetime(1891, 2, 1, 0, 0), datetime.datetime(1891, 3, 1, 0, 0), datetime.datetime(1891, 4, 1, 0, 0), datetime.datetime(1891, 5, 1, 0, 0), datetime.datetime(1891, 6, 1, 0, 0), datetime.datetime(1891, 7, 1, 0, 0), datetime.datetime(1891, 8, 1, 0, 0), datetime.datetime(1891, 9, 1, 0, 0), datetime.datetime(1891, 10, 1, 0, 0), datetime.datetime(1891, 11, 1, 0, 0), datetime.datetime(1891, 12, 1, 0, 0), datetime.datetime(1892, 1, 1, 0, 0), datetime.datetime(1892, 2, 1, 0, 0), datetime.datetime(1892, 3, 1, 0, 0), datetime.datetime(1892, 4, 1, 0, 0), datetime.datetime(1892, 5, 1, 0, 0), datetime.datetime(1892, 6, 1, 0, 0), datetime.datetime(1892, 7, 1, 0, 0), datetime.datetime(1892, 8, 1, 0, 0), datetime.datetime(1892, 9, 1, 0, 0), datetime.datetime(1892, 10, 1, 0, 0), datetime.datetime(1892, 11, 1, 0, 0), datetime.datetime(1892, 12, 1, 0, 0), datetime.datetime(1893, 1, 1, 0, 0), datetime.datetime(1893, 2, 1, 0, 0), datetime.datetime(1893, 3, 1, 0, 0), datetime.datetime(1893, 4, 1, 0, 0), datetime.datetime(1893, 5, 1, 0, 0), datetime.datetime(1893, 6, 1, 0, 0), datetime.datetime(1893, 7, 1, 0, 0), datetime.datetime(1893, 8, 1, 0, 0), datetime.datetime(1893, 9, 1, 0, 0), datetime.datetime(1893, 10, 1, 0, 0), datetime.datetime(1893, 11, 1, 0, 0), datetime.datetime(1893, 12, 1, 0, 0), datetime.datetime(1894, 1, 1, 0, 0), datetime.datetime(1894, 2, 1, 0, 0), datetime.datetime(1894, 3, 1, 0, 0), datetime.datetime(1894, 4, 1, 0, 0), datetime.datetime(1894, 5, 1, 0, 0), datetime.datetime(1894, 6, 1, 0, 0), datetime.datetime(1894, 7, 1, 0, 0), datetime.datetime(1894, 8, 1, 0, 0), datetime.datetime(1894, 9, 1, 0, 0), datetime.datetime(1894, 10, 1, 0, 0), datetime.datetime(1894, 11, 1, 0, 0), datetime.datetime(1894, 12, 1, 0, 0), datetime.datetime(1895, 1, 1, 0, 0), datetime.datetime(1895, 2, 1, 0, 0), datetime.datetime(1895, 3, 1, 0, 0), datetime.datetime(1895, 4, 1, 0, 0), datetime.datetime(1895, 5, 1, 0, 0), datetime.datetime(1895, 6, 1, 0, 0), datetime.datetime(1895, 7, 1, 0, 0), datetime.datetime(1895, 8, 1, 0, 0), datetime.datetime(1895, 9, 1, 0, 0), datetime.datetime(1895, 10, 1, 0, 0), datetime.datetime(1895, 11, 1, 0, 0), datetime.datetime(1895, 12, 1, 0, 0), datetime.datetime(1896, 1, 1, 0, 0), datetime.datetime(1896, 2, 1, 0, 0), datetime.datetime(1896, 3, 1, 0, 0), datetime.datetime(1896, 4, 1, 0, 0), datetime.datetime(1896, 5, 1, 0, 0), datetime.datetime(1896, 6, 1, 0, 0), datetime.datetime(1896, 7, 1, 0, 0), datetime.datetime(1896, 8, 1, 0, 0), datetime.datetime(1896, 9, 1, 0, 0), datetime.datetime(1896, 10, 1, 0, 0), datetime.datetime(1896, 11, 1, 0, 0), datetime.datetime(1896, 12, 1, 0, 0), datetime.datetime(1897, 1, 1, 0, 0), datetime.datetime(1897, 2, 1, 0, 0), datetime.datetime(1897, 3, 1, 0, 0), datetime.datetime(1897, 4, 1, 0, 0), datetime.datetime(1897, 5, 1, 0, 0), datetime.datetime(1897, 6, 1, 0, 0), datetime.datetime(1897, 7, 1, 0, 0), datetime.datetime(1897, 8, 1, 0, 0), datetime.datetime(1897, 9, 1, 0, 0), datetime.datetime(1897, 10, 1, 0, 0), datetime.datetime(1897, 11, 1, 0, 0), datetime.datetime(1897, 12, 1, 0, 0), datetime.datetime(1898, 1, 1, 0, 0), datetime.datetime(1898, 2, 1, 0, 0), datetime.datetime(1898, 3, 1, 0, 0), datetime.datetime(1898, 4, 1, 0, 0), datetime.datetime(1898, 5, 1, 0, 0), datetime.datetime(1898, 6, 1, 0, 0), datetime.datetime(1898, 7, 1, 0, 0), datetime.datetime(1898, 8, 1, 0, 0), datetime.datetime(1898, 9, 1, 0, 0), datetime.datetime(1898, 10, 1, 0, 0), datetime.datetime(1898, 11, 1, 0, 0), datetime.datetime(1898, 12, 1, 0, 0), datetime.datetime(1899, 1, 1, 0, 0), datetime.datetime(1899, 2, 1, 0, 0), datetime.datetime(1899, 3, 1, 0, 0), datetime.datetime(1899, 4, 1, 0, 0), datetime.datetime(1899, 5, 1, 0, 0), datetime.datetime(1899, 6, 1, 0, 0), datetime.datetime(1899, 7, 1, 0, 0), datetime.datetime(1899, 8, 1, 0, 0), datetime.datetime(1899, 9, 1, 0, 0), datetime.datetime(1899, 10, 1, 0, 0), datetime.datetime(1899, 11, 1, 0, 0), datetime.datetime(1899, 12, 1, 0, 0), datetime.datetime(1900, 1, 1, 0, 0), datetime.datetime(1900, 2, 1, 0, 0), datetime.datetime(1900, 3, 1, 0, 0), datetime.datetime(1900, 4, 1, 0, 0), datetime.datetime(1900, 5, 1, 0, 0), datetime.datetime(1900, 6, 1, 0, 0), datetime.datetime(1900, 7, 1, 0, 0), datetime.datetime(1900, 8, 1, 0, 0), datetime.datetime(1900, 9, 1, 0, 0), datetime.datetime(1900, 10, 1, 0, 0), datetime.datetime(1900, 11, 1, 0, 0), datetime.datetime(1900, 12, 1, 0, 0), datetime.datetime(1901, 1, 1, 0, 0), datetime.datetime(1901, 2, 1, 0, 0), datetime.datetime(1901, 3, 1, 0, 0), datetime.datetime(1901, 4, 1, 0, 0), datetime.datetime(1901, 5, 1, 0, 0), datetime.datetime(1901, 6, 1, 0, 0), datetime.datetime(1901, 7, 1, 0, 0), datetime.datetime(1901, 8, 1, 0, 0), datetime.datetime(1901, 9, 1, 0, 0), datetime.datetime(1901, 10, 1, 0, 0), datetime.datetime(1901, 11, 1, 0, 0), datetime.datetime(1901, 12, 1, 0, 0), datetime.datetime(1902, 1, 1, 0, 0), datetime.datetime(1902, 2, 1, 0, 0), datetime.datetime(1902, 3, 1, 0, 0), datetime.datetime(1902, 4, 1, 0, 0), datetime.datetime(1902, 5, 1, 0, 0), datetime.datetime(1902, 6, 1, 0, 0), datetime.datetime(1902, 7, 1, 0, 0), datetime.datetime(1902, 8, 1, 0, 0), datetime.datetime(1902, 9, 1, 0, 0), datetime.datetime(1902, 10, 1, 0, 0), datetime.datetime(1902, 11, 1, 0, 0), datetime.datetime(1902, 12, 1, 0, 0), datetime.datetime(1903, 1, 1, 0, 0), datetime.datetime(1903, 2, 1, 0, 0), datetime.datetime(1903, 3, 1, 0, 0), datetime.datetime(1903, 4, 1, 0, 0), datetime.datetime(1903, 5, 1, 0, 0), datetime.datetime(1903, 6, 1, 0, 0), datetime.datetime(1903, 7, 1, 0, 0), datetime.datetime(1903, 8, 1, 0, 0), datetime.datetime(1903, 9, 1, 0, 0), datetime.datetime(1903, 10, 1, 0, 0), datetime.datetime(1903, 11, 1, 0, 0), datetime.datetime(1903, 12, 1, 0, 0), datetime.datetime(1904, 1, 1, 0, 0), datetime.datetime(1904, 2, 1, 0, 0), datetime.datetime(1904, 3, 1, 0, 0), datetime.datetime(1904, 4, 1, 0, 0), datetime.datetime(1904, 5, 1, 0, 0), datetime.datetime(1904, 6, 1, 0, 0), datetime.datetime(1904, 7, 1, 0, 0), datetime.datetime(1904, 8, 1, 0, 0), datetime.datetime(1904, 9, 1, 0, 0), datetime.datetime(1904, 10, 1, 0, 0), datetime.datetime(1904, 11, 1, 0, 0), datetime.datetime(1904, 12, 1, 0, 0), datetime.datetime(1905, 1, 1, 0, 0), datetime.datetime(1905, 2, 1, 0, 0), datetime.datetime(1905, 3, 1, 0, 0), datetime.datetime(1905, 4, 1, 0, 0), datetime.datetime(1905, 5, 1, 0, 0), datetime.datetime(1905, 6, 1, 0, 0), datetime.datetime(1905, 7, 1, 0, 0), datetime.datetime(1905, 8, 1, 0, 0), datetime.datetime(1905, 9, 1, 0, 0), datetime.datetime(1905, 10, 1, 0, 0), datetime.datetime(1905, 11, 1, 0, 0), datetime.datetime(1905, 12, 1, 0, 0), datetime.datetime(1906, 1, 1, 0, 0), datetime.datetime(1906, 2, 1, 0, 0), datetime.datetime(1906, 3, 1, 0, 0), datetime.datetime(1906, 4, 1, 0, 0), datetime.datetime(1906, 5, 1, 0, 0), datetime.datetime(1906, 6, 1, 0, 0), datetime.datetime(1906, 7, 1, 0, 0), datetime.datetime(1906, 8, 1, 0, 0), datetime.datetime(1906, 9, 1, 0, 0), datetime.datetime(1906, 10, 1, 0, 0), datetime.datetime(1906, 11, 1, 0, 0), datetime.datetime(1906, 12, 1, 0, 0), datetime.datetime(1907, 1, 1, 0, 0), datetime.datetime(1907, 2, 1, 0, 0), datetime.datetime(1907, 3, 1, 0, 0), datetime.datetime(1907, 4, 1, 0, 0), datetime.datetime(1907, 5, 1, 0, 0), datetime.datetime(1907, 6, 1, 0, 0), datetime.datetime(1907, 7, 1, 0, 0), datetime.datetime(1907, 8, 1, 0, 0), datetime.datetime(1907, 9, 1, 0, 0), datetime.datetime(1907, 10, 1, 0, 0), datetime.datetime(1907, 11, 1, 0, 0), datetime.datetime(1907, 12, 1, 0, 0), datetime.datetime(1908, 1, 1, 0, 0), datetime.datetime(1908, 2, 1, 0, 0), datetime.datetime(1908, 3, 1, 0, 0), datetime.datetime(1908, 4, 1, 0, 0), datetime.datetime(1908, 5, 1, 0, 0), datetime.datetime(1908, 6, 1, 0, 0), datetime.datetime(1908, 7, 1, 0, 0), datetime.datetime(1908, 8, 1, 0, 0), datetime.datetime(1908, 9, 1, 0, 0), datetime.datetime(1908, 10, 1, 0, 0), datetime.datetime(1908, 11, 1, 0, 0), datetime.datetime(1908, 12, 1, 0, 0), datetime.datetime(1909, 1, 1, 0, 0), datetime.datetime(1909, 2, 1, 0, 0), datetime.datetime(1909, 3, 1, 0, 0), datetime.datetime(1909, 4, 1, 0, 0), datetime.datetime(1909, 5, 1, 0, 0), datetime.datetime(1909, 6, 1, 0, 0), datetime.datetime(1909, 7, 1, 0, 0), datetime.datetime(1909, 8, 1, 0, 0), datetime.datetime(1909, 9, 1, 0, 0), datetime.datetime(1909, 10, 1, 0, 0), datetime.datetime(1909, 11, 1, 0, 0), datetime.datetime(1909, 12, 1, 0, 0), datetime.datetime(1910, 1, 1, 0, 0), datetime.datetime(1910, 2, 1, 0, 0), datetime.datetime(1910, 3, 1, 0, 0), datetime.datetime(1910, 4, 1, 0, 0), datetime.datetime(1910, 5, 1, 0, 0), datetime.datetime(1910, 6, 1, 0, 0), datetime.datetime(1910, 7, 1, 0, 0), datetime.datetime(1910, 8, 1, 0, 0), datetime.datetime(1910, 9, 1, 0, 0), datetime.datetime(1910, 10, 1, 0, 0), datetime.datetime(1910, 11, 1, 0, 0), datetime.datetime(1910, 12, 1, 0, 0), datetime.datetime(1911, 1, 1, 0, 0), datetime.datetime(1911, 2, 1, 0, 0), datetime.datetime(1911, 3, 1, 0, 0), datetime.datetime(1911, 4, 1, 0, 0), datetime.datetime(1911, 5, 1, 0, 0), datetime.datetime(1911, 6, 1, 0, 0), datetime.datetime(1911, 7, 1, 0, 0), datetime.datetime(1911, 8, 1, 0, 0), datetime.datetime(1911, 9, 1, 0, 0), datetime.datetime(1911, 10, 1, 0, 0), datetime.datetime(1911, 11, 1, 0, 0), datetime.datetime(1911, 12, 1, 0, 0), datetime.datetime(1912, 1, 1, 0, 0), datetime.datetime(1912, 2, 1, 0, 0), datetime.datetime(1912, 3, 1, 0, 0), datetime.datetime(1912, 4, 1, 0, 0), datetime.datetime(1912, 5, 1, 0, 0), datetime.datetime(1912, 6, 1, 0, 0), datetime.datetime(1912, 7, 1, 0, 0), datetime.datetime(1912, 8, 1, 0, 0), datetime.datetime(1912, 9, 1, 0, 0), datetime.datetime(1912, 10, 1, 0, 0), datetime.datetime(1912, 11, 1, 0, 0), datetime.datetime(1912, 12, 1, 0, 0), datetime.datetime(1913, 1, 1, 0, 0), datetime.datetime(1913, 2, 1, 0, 0), datetime.datetime(1913, 3, 1, 0, 0), datetime.datetime(1913, 4, 1, 0, 0), datetime.datetime(1913, 5, 1, 0, 0), datetime.datetime(1913, 6, 1, 0, 0), datetime.datetime(1913, 7, 1, 0, 0), datetime.datetime(1913, 8, 1, 0, 0), datetime.datetime(1913, 9, 1, 0, 0), datetime.datetime(1913, 10, 1, 0, 0), datetime.datetime(1913, 11, 1, 0, 0), datetime.datetime(1913, 12, 1, 0, 0), datetime.datetime(1914, 1, 1, 0, 0), datetime.datetime(1914, 2, 1, 0, 0), datetime.datetime(1914, 3, 1, 0, 0), datetime.datetime(1914, 4, 1, 0, 0), datetime.datetime(1914, 5, 1, 0, 0), datetime.datetime(1914, 6, 1, 0, 0), datetime.datetime(1914, 7, 1, 0, 0), datetime.datetime(1914, 8, 1, 0, 0), datetime.datetime(1914, 9, 1, 0, 0), datetime.datetime(1914, 10, 1, 0, 0), datetime.datetime(1914, 11, 1, 0, 0), datetime.datetime(1914, 12, 1, 0, 0), datetime.datetime(1915, 1, 1, 0, 0), datetime.datetime(1915, 2, 1, 0, 0), datetime.datetime(1915, 3, 1, 0, 0), datetime.datetime(1915, 4, 1, 0, 0), datetime.datetime(1915, 5, 1, 0, 0), datetime.datetime(1915, 6, 1, 0, 0), datetime.datetime(1915, 7, 1, 0, 0), datetime.datetime(1915, 8, 1, 0, 0), datetime.datetime(1915, 9, 1, 0, 0), datetime.datetime(1915, 10, 1, 0, 0), datetime.datetime(1915, 11, 1, 0, 0), datetime.datetime(1915, 12, 1, 0, 0), datetime.datetime(1916, 1, 1, 0, 0), datetime.datetime(1916, 2, 1, 0, 0), datetime.datetime(1916, 3, 1, 0, 0), datetime.datetime(1916, 4, 1, 0, 0), datetime.datetime(1916, 5, 1, 0, 0), datetime.datetime(1916, 6, 1, 0, 0), datetime.datetime(1916, 7, 1, 0, 0), datetime.datetime(1916, 8, 1, 0, 0), datetime.datetime(1916, 9, 1, 0, 0), datetime.datetime(1916, 10, 1, 0, 0), datetime.datetime(1916, 11, 1, 0, 0), datetime.datetime(1916, 12, 1, 0, 0), datetime.datetime(1917, 1, 1, 0, 0), datetime.datetime(1917, 2, 1, 0, 0), datetime.datetime(1917, 3, 1, 0, 0), datetime.datetime(1917, 4, 1, 0, 0), datetime.datetime(1917, 5, 1, 0, 0), datetime.datetime(1917, 6, 1, 0, 0), datetime.datetime(1917, 7, 1, 0, 0), datetime.datetime(1917, 8, 1, 0, 0), datetime.datetime(1917, 9, 1, 0, 0), datetime.datetime(1917, 10, 1, 0, 0), datetime.datetime(1917, 11, 1, 0, 0), datetime.datetime(1917, 12, 1, 0, 0), datetime.datetime(1918, 1, 1, 0, 0), datetime.datetime(1918, 2, 1, 0, 0), datetime.datetime(1918, 3, 1, 0, 0), datetime.datetime(1918, 4, 1, 0, 0), datetime.datetime(1918, 5, 1, 0, 0), datetime.datetime(1918, 6, 1, 0, 0), datetime.datetime(1918, 7, 1, 0, 0), datetime.datetime(1918, 8, 1, 0, 0), datetime.datetime(1918, 9, 1, 0, 0), datetime.datetime(1918, 10, 1, 0, 0), datetime.datetime(1918, 11, 1, 0, 0), datetime.datetime(1918, 12, 1, 0, 0), datetime.datetime(1919, 1, 1, 0, 0), datetime.datetime(1919, 2, 1, 0, 0), datetime.datetime(1919, 3, 1, 0, 0), datetime.datetime(1919, 4, 1, 0, 0), datetime.datetime(1919, 5, 1, 0, 0), datetime.datetime(1919, 6, 1, 0, 0), datetime.datetime(1919, 7, 1, 0, 0), datetime.datetime(1919, 8, 1, 0, 0), datetime.datetime(1919, 9, 1, 0, 0), datetime.datetime(1919, 10, 1, 0, 0), datetime.datetime(1919, 11, 1, 0, 0), datetime.datetime(1919, 12, 1, 0, 0), datetime.datetime(1920, 1, 1, 0, 0), datetime.datetime(1920, 2, 1, 0, 0), datetime.datetime(1920, 3, 1, 0, 0), datetime.datetime(1920, 4, 1, 0, 0), datetime.datetime(1920, 5, 1, 0, 0), datetime.datetime(1920, 6, 1, 0, 0), datetime.datetime(1920, 7, 1, 0, 0), datetime.datetime(1920, 8, 1, 0, 0), datetime.datetime(1920, 9, 1, 0, 0), datetime.datetime(1920, 10, 1, 0, 0), datetime.datetime(1920, 11, 1, 0, 0), datetime.datetime(1920, 12, 1, 0, 0), datetime.datetime(1921, 1, 1, 0, 0), datetime.datetime(1921, 2, 1, 0, 0), datetime.datetime(1921, 3, 1, 0, 0), datetime.datetime(1921, 4, 1, 0, 0), datetime.datetime(1921, 5, 1, 0, 0), datetime.datetime(1921, 6, 1, 0, 0), datetime.datetime(1921, 7, 1, 0, 0), datetime.datetime(1921, 8, 1, 0, 0), datetime.datetime(1921, 9, 1, 0, 0), datetime.datetime(1921, 10, 1, 0, 0), datetime.datetime(1921, 11, 1, 0, 0), datetime.datetime(1921, 12, 1, 0, 0), datetime.datetime(1922, 1, 1, 0, 0), datetime.datetime(1922, 2, 1, 0, 0), datetime.datetime(1922, 3, 1, 0, 0), datetime.datetime(1922, 4, 1, 0, 0), datetime.datetime(1922, 5, 1, 0, 0), datetime.datetime(1922, 6, 1, 0, 0), datetime.datetime(1922, 7, 1, 0, 0), datetime.datetime(1922, 8, 1, 0, 0), datetime.datetime(1922, 9, 1, 0, 0), datetime.datetime(1922, 10, 1, 0, 0), datetime.datetime(1922, 11, 1, 0, 0), datetime.datetime(1922, 12, 1, 0, 0), datetime.datetime(1923, 1, 1, 0, 0), datetime.datetime(1923, 2, 1, 0, 0), datetime.datetime(1923, 3, 1, 0, 0), datetime.datetime(1923, 4, 1, 0, 0), datetime.datetime(1923, 5, 1, 0, 0), datetime.datetime(1923, 6, 1, 0, 0), datetime.datetime(1923, 7, 1, 0, 0), datetime.datetime(1923, 8, 1, 0, 0), datetime.datetime(1923, 9, 1, 0, 0), datetime.datetime(1923, 10, 1, 0, 0), datetime.datetime(1923, 11, 1, 0, 0), datetime.datetime(1923, 12, 1, 0, 0), datetime.datetime(1924, 1, 1, 0, 0), datetime.datetime(1924, 2, 1, 0, 0), datetime.datetime(1924, 3, 1, 0, 0), datetime.datetime(1924, 4, 1, 0, 0), datetime.datetime(1924, 5, 1, 0, 0), datetime.datetime(1924, 6, 1, 0, 0), datetime.datetime(1924, 7, 1, 0, 0), datetime.datetime(1924, 8, 1, 0, 0), datetime.datetime(1924, 9, 1, 0, 0), datetime.datetime(1924, 10, 1, 0, 0), datetime.datetime(1924, 11, 1, 0, 0), datetime.datetime(1924, 12, 1, 0, 0), datetime.datetime(1925, 1, 1, 0, 0), datetime.datetime(1925, 2, 1, 0, 0), datetime.datetime(1925, 3, 1, 0, 0), datetime.datetime(1925, 4, 1, 0, 0), datetime.datetime(1925, 5, 1, 0, 0), datetime.datetime(1925, 6, 1, 0, 0), datetime.datetime(1925, 7, 1, 0, 0), datetime.datetime(1925, 8, 1, 0, 0), datetime.datetime(1925, 9, 1, 0, 0), datetime.datetime(1925, 10, 1, 0, 0), datetime.datetime(1925, 11, 1, 0, 0), datetime.datetime(1925, 12, 1, 0, 0), datetime.datetime(1926, 1, 1, 0, 0), datetime.datetime(1926, 2, 1, 0, 0), datetime.datetime(1926, 3, 1, 0, 0), datetime.datetime(1926, 4, 1, 0, 0), datetime.datetime(1926, 5, 1, 0, 0), datetime.datetime(1926, 6, 1, 0, 0), datetime.datetime(1926, 7, 1, 0, 0), datetime.datetime(1926, 8, 1, 0, 0), datetime.datetime(1926, 9, 1, 0, 0), datetime.datetime(1926, 10, 1, 0, 0), datetime.datetime(1926, 11, 1, 0, 0), datetime.datetime(1926, 12, 1, 0, 0), datetime.datetime(1927, 1, 1, 0, 0), datetime.datetime(1927, 2, 1, 0, 0), datetime.datetime(1927, 3, 1, 0, 0), datetime.datetime(1927, 4, 1, 0, 0), datetime.datetime(1927, 5, 1, 0, 0), datetime.datetime(1927, 6, 1, 0, 0), datetime.datetime(1927, 7, 1, 0, 0), datetime.datetime(1927, 8, 1, 0, 0), datetime.datetime(1927, 9, 1, 0, 0), datetime.datetime(1927, 10, 1, 0, 0), datetime.datetime(1927, 11, 1, 0, 0), datetime.datetime(1927, 12, 1, 0, 0), datetime.datetime(1928, 1, 1, 0, 0), datetime.datetime(1928, 2, 1, 0, 0), datetime.datetime(1928, 3, 1, 0, 0), datetime.datetime(1928, 4, 1, 0, 0), datetime.datetime(1928, 5, 1, 0, 0), datetime.datetime(1928, 6, 1, 0, 0), datetime.datetime(1928, 7, 1, 0, 0), datetime.datetime(1928, 8, 1, 0, 0), datetime.datetime(1928, 9, 1, 0, 0), datetime.datetime(1928, 10, 1, 0, 0), datetime.datetime(1928, 11, 1, 0, 0), datetime.datetime(1928, 12, 1, 0, 0), datetime.datetime(1929, 1, 1, 0, 0), datetime.datetime(1929, 2, 1, 0, 0), datetime.datetime(1929, 3, 1, 0, 0), datetime.datetime(1929, 4, 1, 0, 0), datetime.datetime(1929, 5, 1, 0, 0), datetime.datetime(1929, 6, 1, 0, 0), datetime.datetime(1929, 7, 1, 0, 0), datetime.datetime(1929, 8, 1, 0, 0), datetime.datetime(1929, 9, 1, 0, 0), datetime.datetime(1929, 10, 1, 0, 0), datetime.datetime(1929, 11, 1, 0, 0), datetime.datetime(1929, 12, 1, 0, 0), datetime.datetime(1930, 1, 1, 0, 0), datetime.datetime(1930, 2, 1, 0, 0), datetime.datetime(1930, 3, 1, 0, 0), datetime.datetime(1930, 4, 1, 0, 0), datetime.datetime(1930, 5, 1, 0, 0), datetime.datetime(1930, 6, 1, 0, 0), datetime.datetime(1930, 7, 1, 0, 0), datetime.datetime(1930, 8, 1, 0, 0), datetime.datetime(1930, 9, 1, 0, 0), datetime.datetime(1930, 10, 1, 0, 0), datetime.datetime(1930, 11, 1, 0, 0), datetime.datetime(1930, 12, 1, 0, 0), datetime.datetime(1931, 1, 1, 0, 0), datetime.datetime(1931, 2, 1, 0, 0), datetime.datetime(1931, 3, 1, 0, 0), datetime.datetime(1931, 4, 1, 0, 0), datetime.datetime(1931, 5, 1, 0, 0), datetime.datetime(1931, 6, 1, 0, 0), datetime.datetime(1931, 7, 1, 0, 0), datetime.datetime(1931, 8, 1, 0, 0), datetime.datetime(1931, 9, 1, 0, 0), datetime.datetime(1931, 10, 1, 0, 0), datetime.datetime(1931, 11, 1, 0, 0), datetime.datetime(1931, 12, 1, 0, 0), datetime.datetime(1932, 1, 1, 0, 0), datetime.datetime(1932, 2, 1, 0, 0), datetime.datetime(1932, 3, 1, 0, 0), datetime.datetime(1932, 4, 1, 0, 0), datetime.datetime(1932, 5, 1, 0, 0), datetime.datetime(1932, 6, 1, 0, 0), datetime.datetime(1932, 7, 1, 0, 0), datetime.datetime(1932, 8, 1, 0, 0), datetime.datetime(1932, 9, 1, 0, 0), datetime.datetime(1932, 10, 1, 0, 0), datetime.datetime(1932, 11, 1, 0, 0), datetime.datetime(1932, 12, 1, 0, 0), datetime.datetime(1933, 1, 1, 0, 0), datetime.datetime(1933, 2, 1, 0, 0), datetime.datetime(1933, 3, 1, 0, 0), datetime.datetime(1933, 4, 1, 0, 0), datetime.datetime(1933, 5, 1, 0, 0), datetime.datetime(1933, 6, 1, 0, 0), datetime.datetime(1933, 7, 1, 0, 0), datetime.datetime(1933, 8, 1, 0, 0), datetime.datetime(1933, 9, 1, 0, 0), datetime.datetime(1933, 10, 1, 0, 0), datetime.datetime(1933, 11, 1, 0, 0), datetime.datetime(1933, 12, 1, 0, 0), datetime.datetime(1934, 1, 1, 0, 0), datetime.datetime(1934, 2, 1, 0, 0), datetime.datetime(1934, 3, 1, 0, 0), datetime.datetime(1934, 4, 1, 0, 0), datetime.datetime(1934, 5, 1, 0, 0), datetime.datetime(1934, 6, 1, 0, 0), datetime.datetime(1934, 7, 1, 0, 0), datetime.datetime(1934, 8, 1, 0, 0), datetime.datetime(1934, 9, 1, 0, 0), datetime.datetime(1934, 10, 1, 0, 0), datetime.datetime(1934, 11, 1, 0, 0), datetime.datetime(1934, 12, 1, 0, 0), datetime.datetime(1935, 1, 1, 0, 0), datetime.datetime(1935, 2, 1, 0, 0), datetime.datetime(1935, 3, 1, 0, 0), datetime.datetime(1935, 4, 1, 0, 0), datetime.datetime(1935, 5, 1, 0, 0), datetime.datetime(1935, 6, 1, 0, 0), datetime.datetime(1935, 7, 1, 0, 0), datetime.datetime(1935, 8, 1, 0, 0), datetime.datetime(1935, 9, 1, 0, 0), datetime.datetime(1935, 10, 1, 0, 0), datetime.datetime(1935, 11, 1, 0, 0), datetime.datetime(1935, 12, 1, 0, 0), datetime.datetime(1936, 1, 1, 0, 0), datetime.datetime(1936, 2, 1, 0, 0), datetime.datetime(1936, 3, 1, 0, 0), datetime.datetime(1936, 4, 1, 0, 0), datetime.datetime(1936, 5, 1, 0, 0), datetime.datetime(1936, 6, 1, 0, 0), datetime.datetime(1936, 7, 1, 0, 0), datetime.datetime(1936, 8, 1, 0, 0), datetime.datetime(1936, 9, 1, 0, 0), datetime.datetime(1936, 10, 1, 0, 0), datetime.datetime(1936, 11, 1, 0, 0), datetime.datetime(1936, 12, 1, 0, 0), datetime.datetime(1937, 1, 1, 0, 0), datetime.datetime(1937, 2, 1, 0, 0), datetime.datetime(1937, 3, 1, 0, 0), datetime.datetime(1937, 4, 1, 0, 0), datetime.datetime(1937, 5, 1, 0, 0), datetime.datetime(1937, 6, 1, 0, 0), datetime.datetime(1937, 7, 1, 0, 0), datetime.datetime(1937, 8, 1, 0, 0), datetime.datetime(1937, 9, 1, 0, 0), datetime.datetime(1937, 10, 1, 0, 0), datetime.datetime(1937, 11, 1, 0, 0), datetime.datetime(1937, 12, 1, 0, 0), datetime.datetime(1938, 1, 1, 0, 0), datetime.datetime(1938, 2, 1, 0, 0), datetime.datetime(1938, 3, 1, 0, 0), datetime.datetime(1938, 4, 1, 0, 0), datetime.datetime(1938, 5, 1, 0, 0), datetime.datetime(1938, 6, 1, 0, 0), datetime.datetime(1938, 7, 1, 0, 0), datetime.datetime(1938, 8, 1, 0, 0), datetime.datetime(1938, 9, 1, 0, 0), datetime.datetime(1938, 10, 1, 0, 0), datetime.datetime(1938, 11, 1, 0, 0), datetime.datetime(1938, 12, 1, 0, 0), datetime.datetime(1939, 1, 1, 0, 0), datetime.datetime(1939, 2, 1, 0, 0), datetime.datetime(1939, 3, 1, 0, 0), datetime.datetime(1939, 4, 1, 0, 0), datetime.datetime(1939, 5, 1, 0, 0), datetime.datetime(1939, 6, 1, 0, 0), datetime.datetime(1939, 7, 1, 0, 0), datetime.datetime(1939, 8, 1, 0, 0), datetime.datetime(1939, 9, 1, 0, 0), datetime.datetime(1939, 10, 1, 0, 0), datetime.datetime(1939, 11, 1, 0, 0), datetime.datetime(1939, 12, 1, 0, 0), datetime.datetime(1940, 1, 1, 0, 0), datetime.datetime(1940, 2, 1, 0, 0), datetime.datetime(1940, 3, 1, 0, 0), datetime.datetime(1940, 4, 1, 0, 0), datetime.datetime(1940, 5, 1, 0, 0), datetime.datetime(1940, 6, 1, 0, 0), datetime.datetime(1940, 7, 1, 0, 0), datetime.datetime(1940, 8, 1, 0, 0), datetime.datetime(1940, 9, 1, 0, 0), datetime.datetime(1940, 10, 1, 0, 0), datetime.datetime(1940, 11, 1, 0, 0), datetime.datetime(1940, 12, 1, 0, 0), datetime.datetime(1941, 1, 1, 0, 0), datetime.datetime(1941, 2, 1, 0, 0), datetime.datetime(1941, 3, 1, 0, 0), datetime.datetime(1941, 4, 1, 0, 0), datetime.datetime(1941, 5, 1, 0, 0), datetime.datetime(1941, 6, 1, 0, 0), datetime.datetime(1941, 7, 1, 0, 0), datetime.datetime(1941, 8, 1, 0, 0), datetime.datetime(1941, 9, 1, 0, 0), datetime.datetime(1941, 10, 1, 0, 0), datetime.datetime(1941, 11, 1, 0, 0), datetime.datetime(1941, 12, 1, 0, 0), datetime.datetime(1942, 1, 1, 0, 0), datetime.datetime(1942, 2, 1, 0, 0), datetime.datetime(1942, 3, 1, 0, 0), datetime.datetime(1942, 4, 1, 0, 0), datetime.datetime(1942, 5, 1, 0, 0), datetime.datetime(1942, 6, 1, 0, 0), datetime.datetime(1942, 7, 1, 0, 0), datetime.datetime(1942, 8, 1, 0, 0), datetime.datetime(1942, 9, 1, 0, 0), datetime.datetime(1942, 10, 1, 0, 0), datetime.datetime(1942, 11, 1, 0, 0), datetime.datetime(1942, 12, 1, 0, 0), datetime.datetime(1943, 1, 1, 0, 0), datetime.datetime(1943, 2, 1, 0, 0), datetime.datetime(1943, 3, 1, 0, 0), datetime.datetime(1943, 4, 1, 0, 0), datetime.datetime(1943, 5, 1, 0, 0), datetime.datetime(1943, 6, 1, 0, 0), datetime.datetime(1943, 7, 1, 0, 0), datetime.datetime(1943, 8, 1, 0, 0), datetime.datetime(1943, 9, 1, 0, 0), datetime.datetime(1943, 10, 1, 0, 0), datetime.datetime(1943, 11, 1, 0, 0), datetime.datetime(1943, 12, 1, 0, 0), datetime.datetime(1944, 1, 1, 0, 0), datetime.datetime(1944, 2, 1, 0, 0), datetime.datetime(1944, 3, 1, 0, 0), datetime.datetime(1944, 4, 1, 0, 0), datetime.datetime(1944, 5, 1, 0, 0), datetime.datetime(1944, 6, 1, 0, 0), datetime.datetime(1944, 7, 1, 0, 0), datetime.datetime(1944, 8, 1, 0, 0), datetime.datetime(1944, 9, 1, 0, 0), datetime.datetime(1944, 10, 1, 0, 0), datetime.datetime(1944, 11, 1, 0, 0), datetime.datetime(1944, 12, 1, 0, 0), datetime.datetime(1945, 1, 1, 0, 0), datetime.datetime(1945, 2, 1, 0, 0), datetime.datetime(1945, 3, 1, 0, 0), datetime.datetime(1945, 4, 1, 0, 0), datetime.datetime(1945, 5, 1, 0, 0), datetime.datetime(1945, 6, 1, 0, 0), datetime.datetime(1945, 7, 1, 0, 0), datetime.datetime(1945, 8, 1, 0, 0), datetime.datetime(1945, 9, 1, 0, 0), datetime.datetime(1945, 10, 1, 0, 0), datetime.datetime(1945, 11, 1, 0, 0), datetime.datetime(1945, 12, 1, 0, 0), datetime.datetime(1946, 1, 1, 0, 0), datetime.datetime(1946, 2, 1, 0, 0), datetime.datetime(1946, 3, 1, 0, 0), datetime.datetime(1946, 4, 1, 0, 0), datetime.datetime(1946, 5, 1, 0, 0), datetime.datetime(1946, 6, 1, 0, 0), datetime.datetime(1946, 7, 1, 0, 0), datetime.datetime(1946, 8, 1, 0, 0), datetime.datetime(1946, 9, 1, 0, 0), datetime.datetime(1946, 10, 1, 0, 0), datetime.datetime(1946, 11, 1, 0, 0), datetime.datetime(1946, 12, 1, 0, 0), datetime.datetime(1947, 1, 1, 0, 0), datetime.datetime(1947, 2, 1, 0, 0), datetime.datetime(1947, 3, 1, 0, 0), datetime.datetime(1947, 4, 1, 0, 0), datetime.datetime(1947, 5, 1, 0, 0), datetime.datetime(1947, 6, 1, 0, 0), datetime.datetime(1947, 7, 1, 0, 0), datetime.datetime(1947, 8, 1, 0, 0), datetime.datetime(1947, 9, 1, 0, 0), datetime.datetime(1947, 10, 1, 0, 0), datetime.datetime(1947, 11, 1, 0, 0), datetime.datetime(1947, 12, 1, 0, 0), datetime.datetime(1948, 1, 1, 0, 0), datetime.datetime(1948, 2, 1, 0, 0), datetime.datetime(1948, 3, 1, 0, 0), datetime.datetime(1948, 4, 1, 0, 0), datetime.datetime(1948, 5, 1, 0, 0), datetime.datetime(1948, 6, 1, 0, 0), datetime.datetime(1948, 7, 1, 0, 0), datetime.datetime(1948, 8, 1, 0, 0), datetime.datetime(1948, 9, 1, 0, 0), datetime.datetime(1948, 10, 1, 0, 0), datetime.datetime(1948, 11, 1, 0, 0), datetime.datetime(1948, 12, 1, 0, 0), datetime.datetime(1949, 1, 1, 0, 0), datetime.datetime(1949, 2, 1, 0, 0), datetime.datetime(1949, 3, 1, 0, 0), datetime.datetime(1949, 4, 1, 0, 0), datetime.datetime(1949, 5, 1, 0, 0), datetime.datetime(1949, 6, 1, 0, 0), datetime.datetime(1949, 7, 1, 0, 0), datetime.datetime(1949, 8, 1, 0, 0), datetime.datetime(1949, 9, 1, 0, 0), datetime.datetime(1949, 10, 1, 0, 0), datetime.datetime(1949, 11, 1, 0, 0), datetime.datetime(1949, 12, 1, 0, 0), datetime.datetime(1950, 1, 1, 0, 0), datetime.datetime(1950, 2, 1, 0, 0), datetime.datetime(1950, 3, 1, 0, 0), datetime.datetime(1950, 4, 1, 0, 0), datetime.datetime(1950, 5, 1, 0, 0), datetime.datetime(1950, 6, 1, 0, 0), datetime.datetime(1950, 7, 1, 0, 0), datetime.datetime(1950, 8, 1, 0, 0), datetime.datetime(1950, 9, 1, 0, 0), datetime.datetime(1950, 10, 1, 0, 0), datetime.datetime(1950, 11, 1, 0, 0), datetime.datetime(1950, 12, 1, 0, 0), datetime.datetime(1951, 1, 1, 0, 0), datetime.datetime(1951, 2, 1, 0, 0), datetime.datetime(1951, 3, 1, 0, 0), datetime.datetime(1951, 4, 1, 0, 0), datetime.datetime(1951, 5, 1, 0, 0), datetime.datetime(1951, 6, 1, 0, 0), datetime.datetime(1951, 7, 1, 0, 0), datetime.datetime(1951, 8, 1, 0, 0), datetime.datetime(1951, 9, 1, 0, 0), datetime.datetime(1951, 10, 1, 0, 0), datetime.datetime(1951, 11, 1, 0, 0), datetime.datetime(1951, 12, 1, 0, 0), datetime.datetime(1952, 1, 1, 0, 0), datetime.datetime(1952, 2, 1, 0, 0), datetime.datetime(1952, 3, 1, 0, 0), datetime.datetime(1952, 4, 1, 0, 0), datetime.datetime(1952, 5, 1, 0, 0), datetime.datetime(1952, 6, 1, 0, 0), datetime.datetime(1952, 7, 1, 0, 0), datetime.datetime(1952, 8, 1, 0, 0), datetime.datetime(1952, 9, 1, 0, 0), datetime.datetime(1952, 10, 1, 0, 0), datetime.datetime(1952, 11, 1, 0, 0), datetime.datetime(1952, 12, 1, 0, 0), datetime.datetime(1953, 1, 1, 0, 0), datetime.datetime(1953, 2, 1, 0, 0), datetime.datetime(1953, 3, 1, 0, 0), datetime.datetime(1953, 4, 1, 0, 0), datetime.datetime(1953, 5, 1, 0, 0), datetime.datetime(1953, 6, 1, 0, 0), datetime.datetime(1953, 7, 1, 0, 0), datetime.datetime(1953, 8, 1, 0, 0), datetime.datetime(1953, 9, 1, 0, 0), datetime.datetime(1953, 10, 1, 0, 0), datetime.datetime(1953, 11, 1, 0, 0), datetime.datetime(1953, 12, 1, 0, 0), datetime.datetime(1954, 1, 1, 0, 0), datetime.datetime(1954, 2, 1, 0, 0), datetime.datetime(1954, 3, 1, 0, 0), datetime.datetime(1954, 4, 1, 0, 0), datetime.datetime(1954, 5, 1, 0, 0), datetime.datetime(1954, 6, 1, 0, 0), datetime.datetime(1954, 7, 1, 0, 0), datetime.datetime(1954, 8, 1, 0, 0), datetime.datetime(1954, 9, 1, 0, 0), datetime.datetime(1954, 10, 1, 0, 0), datetime.datetime(1954, 11, 1, 0, 0), datetime.datetime(1954, 12, 1, 0, 0), datetime.datetime(1955, 1, 1, 0, 0), datetime.datetime(1955, 2, 1, 0, 0), datetime.datetime(1955, 3, 1, 0, 0), datetime.datetime(1955, 4, 1, 0, 0), datetime.datetime(1955, 5, 1, 0, 0), datetime.datetime(1955, 6, 1, 0, 0), datetime.datetime(1955, 7, 1, 0, 0), datetime.datetime(1955, 8, 1, 0, 0), datetime.datetime(1955, 9, 1, 0, 0), datetime.datetime(1955, 10, 1, 0, 0), datetime.datetime(1955, 11, 1, 0, 0), datetime.datetime(1955, 12, 1, 0, 0), datetime.datetime(1956, 1, 1, 0, 0), datetime.datetime(1956, 2, 1, 0, 0), datetime.datetime(1956, 3, 1, 0, 0), datetime.datetime(1956, 4, 1, 0, 0), datetime.datetime(1956, 5, 1, 0, 0), datetime.datetime(1956, 6, 1, 0, 0), datetime.datetime(1956, 7, 1, 0, 0), datetime.datetime(1956, 8, 1, 0, 0), datetime.datetime(1956, 9, 1, 0, 0), datetime.datetime(1956, 10, 1, 0, 0), datetime.datetime(1956, 11, 1, 0, 0), datetime.datetime(1956, 12, 1, 0, 0), datetime.datetime(1957, 1, 1, 0, 0), datetime.datetime(1957, 2, 1, 0, 0), datetime.datetime(1957, 3, 1, 0, 0), datetime.datetime(1957, 4, 1, 0, 0), datetime.datetime(1957, 5, 1, 0, 0), datetime.datetime(1957, 6, 1, 0, 0), datetime.datetime(1957, 7, 1, 0, 0), datetime.datetime(1957, 8, 1, 0, 0), datetime.datetime(1957, 9, 1, 0, 0), datetime.datetime(1957, 10, 1, 0, 0), datetime.datetime(1957, 11, 1, 0, 0), datetime.datetime(1957, 12, 1, 0, 0), datetime.datetime(1958, 1, 1, 0, 0), datetime.datetime(1958, 2, 1, 0, 0), datetime.datetime(1958, 3, 1, 0, 0), datetime.datetime(1958, 4, 1, 0, 0), datetime.datetime(1958, 5, 1, 0, 0), datetime.datetime(1958, 6, 1, 0, 0), datetime.datetime(1958, 7, 1, 0, 0), datetime.datetime(1958, 8, 1, 0, 0), datetime.datetime(1958, 9, 1, 0, 0), datetime.datetime(1958, 10, 1, 0, 0), datetime.datetime(1958, 11, 1, 0, 0), datetime.datetime(1958, 12, 1, 0, 0), datetime.datetime(1959, 1, 1, 0, 0), datetime.datetime(1959, 2, 1, 0, 0), datetime.datetime(1959, 3, 1, 0, 0), datetime.datetime(1959, 4, 1, 0, 0), datetime.datetime(1959, 5, 1, 0, 0), datetime.datetime(1959, 6, 1, 0, 0), datetime.datetime(1959, 7, 1, 0, 0), datetime.datetime(1959, 8, 1, 0, 0), datetime.datetime(1959, 9, 1, 0, 0), datetime.datetime(1959, 10, 1, 0, 0), datetime.datetime(1959, 11, 1, 0, 0), datetime.datetime(1959, 12, 1, 0, 0), datetime.datetime(1960, 1, 1, 0, 0), datetime.datetime(1960, 2, 1, 0, 0), datetime.datetime(1960, 3, 1, 0, 0), datetime.datetime(1960, 4, 1, 0, 0), ...]
SOIs.index=dateindex
SOIs.head()
1877-01-01 -1.044600 1877-02-01 -0.834198 1877-03-01 -0.759131 1877-04-01 -1.103454 1877-05-01 0.349381 dtype: float64
SOIs.plot(figsize=(14,8))
<matplotlib.axes.AxesSubplot at 0x10a0f6f10>
The groupby method is a very powerful method of pandas DataFrames, in a nutschell it allows you to
split your data according to unique values of a variable (or unique combinations of N variables)
apply some operation to the groups thus defined, either an aggregation or transformation method
combine the results into a DataFrame
This process is illustrated below, where the operation is here calculating the mean of the groups's values
A very nice explanation of the groupby method, with examples, is available from Pandas's documentation at:
http://pandas.pydata.org/pandas-docs/stable/groupby.html
and a short tutorial on Wes McKinney's blog here
Image(filename='images/split-apply-combine.png', width=800)
url = "ftp://ftp.cpc.ncep.noaa.gov/wd52dg/data/indices/ersst3b.nino.mth.81-10.ascii"
#!wget -P ./data ftp://ftp.cpc.ncep.noaa.gov/wd52dg/data/indices/ersst3b.nino.mth.81-10.ascii
data = pd.read_table(url, sep='\s+')
data.tail()
YR | MON | NINO1+2 | ANOM | NINO3 | ANOM.1 | NINO4 | ANOM.2 | NINO3.4 | ANOM.3 | |
---|---|---|---|---|---|---|---|---|---|---|
767 | 2013 | 12 | 22.61 | -0.40 | 25.01 | -0.30 | 28.28 | -0.26 | 26.20 | -0.49 |
768 | 2014 | 1 | 24.47 | -0.22 | 25.30 | -0.47 | 27.91 | -0.42 | 26.03 | -0.64 |
769 | 2014 | 2 | 25.29 | -0.82 | 25.85 | -0.65 | 28.04 | -0.13 | 26.05 | -0.77 |
770 | 2014 | 3 | 25.97 | -0.54 | 27.04 | -0.20 | 28.26 | -0.03 | 26.84 | -0.49 |
771 | 2014 | 4 | 25.17 | -0.44 | 27.53 | -0.02 | 28.76 | 0.21 | 27.62 | -0.17 |
5 rows × 10 columns
I only keep the raw - monthly - values of NINO 3.4
nino = data[['YR','MON','NINO3.4']]
nino.head()
YR | MON | NINO3.4 | |
---|---|---|---|
0 | 1950 | 1 | 24.83 |
1 | 1950 | 2 | 25.20 |
2 | 1950 | 3 | 26.03 |
3 | 1950 | 4 | 26.36 |
4 | 1950 | 5 | 26.19 |
5 rows × 3 columns
Now I want to calculate a climatology (over the whole period available)
I first group by UNIQUE values of the variable months, I should get 12 groups
groups = nino.groupby('MON')
for month, group in groups:
print month
print group.head()
1 YR MON NINO3.4 0 1950 1 24.83 12 1951 1 25.46 24 1952 1 26.85 36 1953 1 26.85 48 1954 1 27.03 [5 rows x 3 columns] 2 YR MON NINO3.4 1 1950 2 25.20 13 1951 2 25.78 25 1952 2 26.79 37 1953 2 27.19 49 1954 2 27.22 [5 rows x 3 columns] 3 YR MON NINO3.4 2 1950 3 26.03 14 1951 3 26.72 26 1952 3 27.32 38 1953 3 27.68 50 1954 3 27.21 [5 rows x 3 columns] 4 YR MON NINO3.4 3 1950 4 26.36 15 1951 4 27.24 27 1952 4 27.88 39 1953 4 28.19 51 1954 4 26.87 [5 rows x 3 columns] 5 YR MON NINO3.4 4 1950 5 26.19 16 1951 5 27.68 28 1952 5 27.99 40 1953 5 28.29 52 1954 5 27.07 [5 rows x 3 columns] 6 YR MON NINO3.4 5 1950 6 26.52 17 1951 6 27.46 29 1952 6 27.33 41 1953 6 28.02 53 1954 6 26.93 [5 rows x 3 columns] 7 YR MON NINO3.4 6 1950 7 26.42 18 1951 7 27.72 30 1952 7 26.72 42 1953 7 27.52 54 1954 7 26.37 [5 rows x 3 columns] 8 YR MON NINO3.4 7 1950 8 25.98 19 1951 8 27.36 31 1952 8 26.46 43 1953 8 27.16 55 1954 8 25.73 [5 rows x 3 columns] 9 YR MON NINO3.4 8 1950 9 25.78 20 1951 9 27.51 32 1952 9 26.54 44 1953 9 27.13 56 1954 9 25.38 [5 rows x 3 columns] 10 YR MON NINO3.4 9 1950 10 25.96 21 1951 10 27.43 33 1952 10 26.54 45 1953 10 27.02 57 1954 10 25.51 [5 rows x 3 columns] 11 YR MON NINO3.4 10 1950 11 25.64 22 1951 11 27.48 34 1952 11 26.36 46 1953 11 26.96 58 1954 11 25.67 [5 rows x 3 columns] 12 YR MON NINO3.4 11 1950 12 25.50 23 1951 12 27.12 35 1952 12 26.53 47 1953 12 26.99 59 1954 12 25.37 [5 rows x 3 columns]
climatology = groups.mean()
Same as
climatology = groups.aggregate(np.mean)
climatology['NINO3.4'].head(12)
MON 1 26.484615 2 26.676923 3 27.184154 4 27.622923 5 27.733281 6 27.540469 7 27.149844 8 26.764531 9 26.667344 10 26.632656 11 26.593750 12 26.529062 Name: NINO3.4, dtype: float64
climatology['NINO3.4'].plot(kind='bar',ylim=[26,28], rot=0)
<matplotlib.axes.AxesSubplot at 0x10a7bca10>
Now suppose we want to apply a function that doesnt aggregate the values in the groups (such as sum, or mean) but rather want to apply a function to those values ...
An example would be calculating the standardized anomalies per month (to each value subtract the mean of the corresponding month, then divide by the standard-deviation)
def zscore(x):
z = (x - x.mean()) / x.std()
return z
transformed = groups.transform(zscore)
transformed['NINO3.4'].plot()
<matplotlib.axes.AxesSubplot at 0x10a11d350>
data['ANOM.3'].plot()
<matplotlib.axes.AxesSubplot at 0x10a167a90>
This is some real data provided by Dan Smale (Lauder). It is a summary file of daily QA/QC (data quality) diagnostics of raw GPS data collected at NIWA Lauder. The GPS data can be used to infer the amount of total column water vapour in the atmosphere (above lauder).
Summary of data /data/gruan/gnss_processing/data/netr9_laud/rinex_211/LAUD0010.13S:SUM 13 1 1 00:00 13 1 1 23:59 24.00 30 25298 23597 93 0.57 0.43 11799
data = pd.read_table('./data/teqc_SUM.dat', sep='\s+', header=None) ## the '\s+' is a regular expression
data.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 1 | 00:00 | 13 | 1 | 1 | 23:59 | 24 | 30 | 25298 | 23597 | 93 | 0.57 | 0.43 | 11799 |
1 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 2 | 00:00 | 13 | 1 | 2 | 23:59 | 24 | 30 | 24530 | 22652 | 92 | 0.58 | 0.42 | 22652 |
2 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 3 | 00:00 | 13 | 1 | 3 | 23:59 | 24 | 30 | 25296 | 23235 | 92 | 0.61 | 0.43 | 5809 |
3 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 4 | 00:00 | 13 | 1 | 4 | 23:59 | 24 | 30 | 25300 | 23939 | 95 | 0.59 | 0.45 | 7980 |
4 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 5 | 00:00 | 13 | 1 | 5 | 23:59 | 24 | 30 | 25301 | 24581 | 97 | 0.64 | 0.43 | 12291 |
5 rows × 17 columns
data = pd.read_table('./data/teqc_SUM.dat', header=None, sep='\s+', \
names=['path','year1','m1','d1','t1','year2','m2','d2','t2','nh','nd','c1','c2','c3','c4','c5','c6'])
data.head()
path | year1 | m1 | d1 | t1 | year2 | m2 | d2 | t2 | nh | nd | c1 | c2 | c3 | c4 | c5 | c6 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 1 | 00:00 | 13 | 1 | 1 | 23:59 | 24 | 30 | 25298 | 23597 | 93 | 0.57 | 0.43 | 11799 |
1 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 2 | 00:00 | 13 | 1 | 2 | 23:59 | 24 | 30 | 24530 | 22652 | 92 | 0.58 | 0.42 | 22652 |
2 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 3 | 00:00 | 13 | 1 | 3 | 23:59 | 24 | 30 | 25296 | 23235 | 92 | 0.61 | 0.43 | 5809 |
3 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 4 | 00:00 | 13 | 1 | 4 | 23:59 | 24 | 30 | 25300 | 23939 | 95 | 0.59 | 0.45 | 7980 |
4 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 5 | 00:00 | 13 | 1 | 5 | 23:59 | 24 | 30 | 25301 | 24581 | 97 | 0.64 | 0.43 | 12291 |
5 rows × 17 columns
We want the index (row wise) to properly represent datetime info
from datetime import datetime
data.index = [datetime(y,m,d) for y,m,d in zip(data.year1+2000,data.m1,data.d1)] # note that we are using list comprehension
data.head()
path | year1 | m1 | d1 | t1 | year2 | m2 | d2 | t2 | nh | nd | c1 | c2 | c3 | c4 | c5 | c6 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2013-01-01 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 1 | 00:00 | 13 | 1 | 1 | 23:59 | 24 | 30 | 25298 | 23597 | 93 | 0.57 | 0.43 | 11799 |
2013-01-02 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 2 | 00:00 | 13 | 1 | 2 | 23:59 | 24 | 30 | 24530 | 22652 | 92 | 0.58 | 0.42 | 22652 |
2013-01-03 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 3 | 00:00 | 13 | 1 | 3 | 23:59 | 24 | 30 | 25296 | 23235 | 92 | 0.61 | 0.43 | 5809 |
2013-01-04 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 4 | 00:00 | 13 | 1 | 4 | 23:59 | 24 | 30 | 25300 | 23939 | 95 | 0.59 | 0.45 | 7980 |
2013-01-05 | /data/gruan/gnss_processing/data/netr9_laud/ri... | 13 | 1 | 5 | 00:00 | 13 | 1 | 5 | 23:59 | 24 | 30 | 25301 | 24581 | 97 | 0.64 | 0.43 | 12291 |
5 rows × 17 columns
data.pop('path') # we pop the 'path' variable, ve careful it operates in place
2013-01-01 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-02 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-03 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-04 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-05 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-06 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-07 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-08 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-09 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-10 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-11 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-12 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-13 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-14 /data/gruan/gnss_processing/data/netr9_laud/ri... 2013-01-15 /data/gruan/gnss_processing/data/netr9_laud/ri... ... 2012-12-17 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-18 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-19 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-20 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-21 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-22 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-23 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-24 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-25 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-26 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-27 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-28 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-29 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-30 /data/gruan/gnss_processing/data/netr9_laud/ri... 2012-12-31 /data/gruan/gnss_processing/data/netr9_laud/ri... Name: path, Length: 551
data.head()
year1 | m1 | d1 | t1 | year2 | m2 | d2 | t2 | nh | nd | c1 | c2 | c3 | c4 | c5 | c6 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2013-01-01 | 13 | 1 | 1 | 00:00 | 13 | 1 | 1 | 23:59 | 24 | 30 | 25298 | 23597 | 93 | 0.57 | 0.43 | 11799 |
2013-01-02 | 13 | 1 | 2 | 00:00 | 13 | 1 | 2 | 23:59 | 24 | 30 | 24530 | 22652 | 92 | 0.58 | 0.42 | 22652 |
2013-01-03 | 13 | 1 | 3 | 00:00 | 13 | 1 | 3 | 23:59 | 24 | 30 | 25296 | 23235 | 92 | 0.61 | 0.43 | 5809 |
2013-01-04 | 13 | 1 | 4 | 00:00 | 13 | 1 | 4 | 23:59 | 24 | 30 | 25300 | 23939 | 95 | 0.59 | 0.45 | 7980 |
2013-01-05 | 13 | 1 | 5 | 00:00 | 13 | 1 | 5 | 23:59 | 24 | 30 | 25301 | 24581 | 97 | 0.64 | 0.43 | 12291 |
5 rows × 16 columns
Sort the dataframe according to the index values (rowwise)
Note that for plotting purposes, you don't even have to do that, pandas + matplotlib understand that the dates need to be in chronological order
data2 = data.sort()
data2.head()
year1 | m1 | d1 | t1 | year2 | m2 | d2 | t2 | nh | nd | c1 | c2 | c3 | c4 | c5 | c6 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2012-04-28 | 12 | 4 | 28 | 00:00 | 12 | 4 | 28 | 23:59 | 24 | 30 | 25462 | 25461 | 100 | 0.55 | 0.42 | 25461 |
2012-05-02 | 12 | 5 | 2 | 01:00 | 12 | 5 | 2 | 23:59 | 23 | 30 | 24480 | 24480 | 100 | 0.56 | 0.42 | 24480 |
2012-05-03 | 12 | 5 | 3 | 00:00 | 12 | 5 | 3 | 23:59 | 24 | 30 | 25455 | 25455 | 100 | 0.55 | 0.43 | 12728 |
2012-05-04 | 12 | 5 | 4 | 00:00 | 12 | 5 | 4 | 23:59 | 24 | 30 | 25459 | 25458 | 100 | 0.55 | 0.42 | 25458 |
2012-05-05 | 12 | 5 | 5 | 00:00 | 12 | 5 | 5 | 23:59 | 24 | 30 | 25451 | 25450 | 100 | 0.54 | 0.42 | 12725 |
5 rows × 16 columns
data['c3'].plot(rot=90)
<matplotlib.axes.AxesSubplot at 0x10a0b9dd0>
plt.plot(data2.c3.values)
[<matplotlib.lines.Line2D at 0x10c0e24d0>]
data.columns
Index([u'year1', u'm1', u'd1', u't1', u'year2', u'm2', u'd2', u't2', u'nh', u'nd', u'c1', u'c2', u'c3', u'c4', u'c5', u'c6'], dtype='object')
data2 = data[data.columns[-6:]]
# same as data2 = data[['c1','c2','c3','c4','c5','c6']] or data2 = data.ix[:,10::]
data2.head()
c1 | c2 | c3 | c4 | c5 | c6 | |
---|---|---|---|---|---|---|
2013-01-01 | 25298 | 23597 | 93 | 0.57 | 0.43 | 11799 |
2013-01-02 | 24530 | 22652 | 92 | 0.58 | 0.42 | 22652 |
2013-01-03 | 25296 | 23235 | 92 | 0.61 | 0.43 | 5809 |
2013-01-04 | 25300 | 23939 | 95 | 0.59 | 0.45 | 7980 |
2013-01-05 | 25301 | 24581 | 97 | 0.64 | 0.43 | 12291 |
5 rows × 6 columns
data2.plot(subplots=True, sharex=True, color='b', title='teqc_SUM', figsize=(14,13), rot=90)
plt.savefig('teqc_SUM.pdf')
f, ax = plt.subplots(1,1, figsize=(7,7))
data.groupby(data['m1']).mean()['c1'].plot(kind='bar', ylim=[24000,26000], color='steelblue',alpha=.7, ax=ax)
ax.set_xticklabels(list('JFMAMJJASOND'), rotation=None, fontsize=15);
f, ax = plt.subplots(1,1, figsize=(7,7))
data.groupby(data.index.month).mean()['c1'].plot(kind='bar',\
ylim=[24000,26000], color='steelblue',alpha=.7, ax=ax)
ax.set_xticklabels(list('JFMAMJJASOND'), rotation=None, fontsize=13);
[f.set_fontsize(13) for f in ax.yaxis.get_ticklabels()] # list comprehension again
[None, None, None, None, None]