from pandas import Series, DataFrame
import pandas as pd
import numpy as np
from datetime import datetime
pd.set_printoptions(notebook_repr_html=False)
/Users/antigen/.virtualenvs/pandas/lib/python2.7/site-packages/pandas/core/format.py:1286: FutureWarning: set_printoptions is deprecated, use set_option instead FutureWarning)
from pandas import *
import matplotlib as mpl
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
plot(np.random.randn(1000).cumsum())
[<matplotlib.lines.Line2D at 0x10c4941d0>]
np.arange(50)[10:40]
array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39])
index = ["1.1.1.1","2.2.2.2","3.3.3.3","4.4.4.4","5.5.5.5","6.6.6.6","7.7.7.7","8.8.8.8","9.9.9.9"]
login_failed = [10,5,8,9,3,2,1,8,5]
login_success = [47,15,40,20,30,12,18,8,5]
s = Series(login_failed, index=index)
s.index
ss = Series(login_success, index=index)
s[["2.2.2.2","5.5.5.5"]]
2.2.2.2 5 5.5.5.5 3
s.plot(kind="barh")
<matplotlib.axes.AxesSubplot at 0x10c690ad0>
ss.plot(kind="barh")
<matplotlib.axes.AxesSubplot at 0x10c755b50>
dates = pd.date_range('2012-07-16', '2012-07-21')
atemps = Series([21,23,24,19,18,26], index = dates)
atemps.plot(kind="barh")
<matplotlib.axes.AxesSubplot at 0x10f6bc990>
btemps = Series([10,3,2,1,8,6], index = dates)
temps = pd.DataFrame({'Vancouver': atemps, 'Toronto': btemps})
temps
Toronto Vancouver 2012-07-16 10 21 2012-07-17 3 23 2012-07-18 2 24 2012-07-19 1 19 2012-07-20 8 18 2012-07-21 6 26
plot(temps)
[<matplotlib.lines.Line2D at 0x10ff89710>, <matplotlib.lines.Line2D at 0x10ff89990>]
temps['Vancouver'] - temps['Toronto']
2012-07-16 11 2012-07-17 20 2012-07-18 22 2012-07-19 18 2012-07-20 10 2012-07-21 20 Freq: D
temp_diff = temps['Vancouver'] - temps['Toronto']
temp_diff.plot(kind="bar")
<matplotlib.axes.AxesSubplot at 0x11031bb10>
temps['Difference'] = temps['Vancouver'] - temps['Toronto']
temps
Toronto Vancouver Difference 2012-07-16 10 21 11 2012-07-17 3 23 20 2012-07-18 2 24 22 2012-07-19 1 19 18 2012-07-20 8 18 10 2012-07-21 6 26 20
#note add freq D in the bottom of the DataFrame
# getting rid of columns is easy too
del temps['Difference']
temps
Toronto Vancouver 2012-07-16 10 21 2012-07-17 3 23 2012-07-18 2 24 2012-07-19 1 19 2012-07-20 8 18 2012-07-21 6 26
temps.describe()
Toronto Vancouver count 6.000000 6.000000 mean 5.000000 21.833333 std 3.577709 3.060501 min 1.000000 18.000000 25% 2.250000 19.500000 50% 4.500000 22.000000 75% 7.500000 23.750000 max 10.000000 26.000000
ts = pd.read_csv('/Users/antigen/Downloads/latency.csv', parse_dates=['date'], index_col='date') # index is Timestamp
a = ts.resample('30s', how='count') # there is a bunch of how like mean, median, count, max, min
plot(b)
plot(a)
[<matplotlib.lines.Line2D at 0x10cd4d210>]
d = ts.resample('M') # takes sample for whole month
e = ts.resample('s', how='count')
f = ts.resample('s', how=lambda x: x.mean())
g = ts.resample('t', how=['median','mean','count'])
plot(e)
[<matplotlib.lines.Line2D at 0x10ef0ea90>]
plot(g.dropna())
[<matplotlib.lines.Line2D at 0x10ec3ad10>, <matplotlib.lines.Line2D at 0x10ec3af90>, <matplotlib.lines.Line2D at 0x10ec6c1d0>]
df2 = pd.read_csv('/Users/antigen/Downloads/latency.csv', parse_dates=['date']) # index is zero based
df2
date value 0 2013-01-01 00:01:00 156 1 2013-01-01 00:01:00 109 2 2013-01-01 00:01:00 93 3 2013-01-01 00:01:00 93 4 2013-01-01 00:02:00 109 5 2013-01-01 00:02:00 78 6 2013-01-01 00:05:00 109 7 2013-01-01 00:05:00 124 8 2013-01-01 00:06:00 156 9 2013-01-01 00:06:00 124 10 2013-01-01 00:06:00 93 11 2013-01-01 00:06:00 109 12 2013-01-01 00:09:00 109 13 2013-01-01 00:09:00 93 14 2013-01-01 00:10:00 109 15 2013-01-01 00:10:00 109 16 2013-01-01 00:11:00 124 17 2013-01-01 00:11:00 93 18 2013-01-01 00:13:00 109 19 2013-01-01 00:13:00 140 20 2013-01-01 00:14:00 109 21 2013-01-01 00:14:00 124 22 2013-01-01 00:16:00 140 23 2013-01-01 00:16:00 93 24 2013-01-01 00:17:00 93 25 2013-01-01 00:17:00 78 26 2013-01-01 00:18:00 93 27 2013-01-01 00:18:00 93 28 2013-01-01 00:21:00 171
plot(df['value'])
[<matplotlib.lines.Line2D at 0x110a13fd0>]
dates2 = ['2013-01-01','2013-01-02','2013-01-03','2013-01-04','2013-01-05','2013-01-06']
dates2
['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04', '2013-01-05', '2013-01-06']
data = [1,3,5,3,2,7]
ts = Series(data, index=pd.to_datetime(dates2))
ts.plot()
<matplotlib.axes.AxesSubplot at 0x110b3e690>
rng = pd.date_range("2013-02-06", periods=100)
data = abs(random.randn(100))
ts = pd.Series(data, index=rng)
ts
2013-02-06 2.177601 2013-02-07 0.131016 2013-02-08 0.186129 2013-02-09 2.214074 2013-02-10 1.313707 2013-02-11 0.288971 2013-02-12 0.429556 2013-02-13 1.361498 2013-02-14 0.597708 2013-02-15 0.717795 2013-02-16 1.609128 2013-02-17 0.973900 2013-02-18 0.429234 2013-02-19 1.155196 2013-02-20 0.575780 2013-02-21 0.447055 2013-02-22 1.556562 2013-02-23 1.426248 2013-02-24 0.759532 2013-02-25 0.328270 2013-02-26 1.445686 2013-02-27 0.592981 2013-02-28 1.388797 2013-03-01 0.487079 2013-03-02 0.128901 2013-03-03 0.976608 2013-03-04 0.290113 2013-03-05 0.186305 2013-03-06 0.921998 2013-03-07 1.033010 2013-03-08 1.302089 2013-03-09 0.831252 2013-03-10 0.830676 2013-03-11 2.037284 2013-03-12 0.587511 2013-03-13 1.006569 2013-03-14 1.155288 2013-03-15 0.348637 2013-03-16 0.394078 2013-03-17 1.210451 2013-03-18 1.737161 2013-03-19 0.730112 2013-03-20 0.153140 2013-03-21 0.324377 2013-03-22 0.605676 2013-03-23 1.396863 2013-03-24 1.196339 2013-03-25 0.110275 2013-03-26 0.088492 2013-03-27 0.547314 2013-03-28 1.507934 2013-03-29 1.560234 2013-03-30 0.057482 2013-03-31 0.385469 2013-04-01 0.760922 2013-04-02 0.501677 2013-04-03 0.085251 2013-04-04 0.169331 2013-04-05 0.331173 2013-04-06 0.163402 2013-04-07 1.851305 2013-04-08 1.138525 2013-04-09 1.409413 2013-04-10 0.251428 2013-04-11 1.239500 2013-04-12 0.125696 2013-04-13 0.358392 2013-04-14 0.316140 2013-04-15 0.417716 2013-04-16 1.681600 2013-04-17 0.474041 2013-04-18 0.708479 2013-04-19 0.148397 2013-04-20 1.145004 2013-04-21 0.467433 2013-04-22 0.728944 2013-04-23 0.344365 2013-04-24 2.348498 2013-04-25 0.524482 2013-04-26 0.443507 2013-04-27 0.319204 2013-04-28 0.955623 2013-04-29 0.912225 2013-04-30 0.175102 2013-05-01 0.934319 2013-05-02 0.839550 2013-05-03 0.559449 2013-05-04 1.105563 2013-05-05 0.650831 2013-05-06 0.319773 2013-05-07 2.117838 2013-05-08 0.636953 2013-05-09 1.123323 2013-05-10 0.616250 2013-05-11 2.424736 2013-05-12 0.473804 2013-05-13 0.481757 2013-05-14 1.046076 2013-05-15 1.612891 2013-05-16 0.532724 Freq: D, Length: 100
ts.plot()
<matplotlib.axes.AxesSubplot at 0x110dac190>
ts[datetime(2013,3,1)] # gets specific value on specific date
0.48707884254205852
ts['2013-03-01'] # slice in date string
0.48707884254205852
ts['2013-03-01':'2013-04-01']
2013-03-01 0.487079 2013-03-02 0.128901 2013-03-03 0.976608 2013-03-04 0.290113 2013-03-05 0.186305 2013-03-06 0.921998 2013-03-07 1.033010 2013-03-08 1.302089 2013-03-09 0.831252 2013-03-10 0.830676 2013-03-11 2.037284 2013-03-12 0.587511 2013-03-13 1.006569 2013-03-14 1.155288 2013-03-15 0.348637 2013-03-16 0.394078 2013-03-17 1.210451 2013-03-18 1.737161 2013-03-19 0.730112 2013-03-20 0.153140 2013-03-21 0.324377 2013-03-22 0.605676 2013-03-23 1.396863 2013-03-24 1.196339 2013-03-25 0.110275 2013-03-26 0.088492 2013-03-27 0.547314 2013-03-28 1.507934 2013-03-29 1.560234 2013-03-30 0.057482 2013-03-31 0.385469 2013-04-01 0.760922 Freq: D
plot(ts['2013-03-01':'2013-04-01'])
[<matplotlib.lines.Line2D at 0x111700790>]
plot(ts['2013-03']) # same as above truncate date for month
[<matplotlib.lines.Line2D at 0x1088a4a50>]
ip_df = pd.DataFrame(np.random.randn(400).reshape((100,4)), index=rng, columns=('1.1.1.1','2.2.2.2','3.3.3.3','4.4.4.4'))
ip_df.ix['2013-03']
1.1.1.1 2.2.2.2 3.3.3.3 4.4.4.4 2013-03-01 92 93 94 95 2013-03-02 96 97 98 99 2013-03-03 100 101 102 103 2013-03-04 104 105 106 107 2013-03-05 108 109 110 111 2013-03-06 112 113 114 115 2013-03-07 116 117 118 119 2013-03-08 120 121 122 123 2013-03-09 124 125 126 127 2013-03-10 128 129 130 131 2013-03-11 132 133 134 135 2013-03-12 136 137 138 139 2013-03-13 140 141 142 143 2013-03-14 144 145 146 147 2013-03-15 148 149 150 151 2013-03-16 152 153 154 155 2013-03-17 156 157 158 159 2013-03-18 160 161 162 163 2013-03-19 164 165 166 167 2013-03-20 168 169 170 171 2013-03-21 172 173 174 175 2013-03-22 176 177 178 179 2013-03-23 180 181 182 183 2013-03-24 184 185 186 187 2013-03-25 188 189 190 191 2013-03-26 192 193 194 195 2013-03-27 196 197 198 199 2013-03-28 200 201 202 203 2013-03-29 204 205 206 207 2013-03-30 208 209 210 211 2013-03-31 212 213 214 215
resampled = ip_df['1.1.1.1'].resample('D', how="ohlc")
resampled
open high low close 2013-02-06 0.667463 0.667463 0.667463 0.667463 2013-02-07 -1.287710 -1.287710 -1.287710 -1.287710 2013-02-08 -0.582973 -0.582973 -0.582973 -0.582973 2013-02-09 0.331141 0.331141 0.331141 0.331141 2013-02-10 0.311155 0.311155 0.311155 0.311155 2013-02-11 -0.330716 -0.330716 -0.330716 -0.330716 2013-02-12 0.057521 0.057521 0.057521 0.057521 2013-02-13 1.712147 1.712147 1.712147 1.712147 2013-02-14 0.820147 0.820147 0.820147 0.820147 2013-02-15 1.096700 1.096700 1.096700 1.096700 2013-02-16 -0.138053 -0.138053 -0.138053 -0.138053 2013-02-17 -1.601882 -1.601882 -1.601882 -1.601882 2013-02-18 0.249855 0.249855 0.249855 0.249855 2013-02-19 -0.097528 -0.097528 -0.097528 -0.097528 2013-02-20 -0.033422 -0.033422 -0.033422 -0.033422 2013-02-21 1.019544 1.019544 1.019544 1.019544 2013-02-22 -1.643013 -1.643013 -1.643013 -1.643013 2013-02-23 -0.582085 -0.582085 -0.582085 -0.582085 2013-02-24 1.436340 1.436340 1.436340 1.436340 2013-02-25 -1.191150 -1.191150 -1.191150 -1.191150 2013-02-26 0.567902 0.567902 0.567902 0.567902 2013-02-27 0.333226 0.333226 0.333226 0.333226 2013-02-28 1.579566 1.579566 1.579566 1.579566 2013-03-01 0.966033 0.966033 0.966033 0.966033 2013-03-02 -0.687408 -0.687408 -0.687408 -0.687408 2013-03-03 -0.973395 -0.973395 -0.973395 -0.973395 2013-03-04 -0.293088 -0.293088 -0.293088 -0.293088 2013-03-05 -1.024993 -1.024993 -1.024993 -1.024993 2013-03-06 0.920623 0.920623 0.920623 0.920623 2013-03-07 -0.234771 -0.234771 -0.234771 -0.234771 2013-03-08 -0.482392 -0.482392 -0.482392 -0.482392 2013-03-09 0.593006 0.593006 0.593006 0.593006 2013-03-10 0.079329 0.079329 0.079329 0.079329 2013-03-11 0.479830 0.479830 0.479830 0.479830 2013-03-12 -0.154861 -0.154861 -0.154861 -0.154861 2013-03-13 -0.679170 -0.679170 -0.679170 -0.679170 2013-03-14 1.008153 1.008153 1.008153 1.008153 2013-03-15 -1.753305 -1.753305 -1.753305 -1.753305 2013-03-16 -0.726288 -0.726288 -0.726288 -0.726288 2013-03-17 1.534673 1.534673 1.534673 1.534673 2013-03-18 -0.930242 -0.930242 -0.930242 -0.930242 2013-03-19 2.126452 2.126452 2.126452 2.126452 2013-03-20 -0.033397 -0.033397 -0.033397 -0.033397 2013-03-21 0.947184 0.947184 0.947184 0.947184 2013-03-22 0.031262 0.031262 0.031262 0.031262 2013-03-23 -0.318363 -0.318363 -0.318363 -0.318363 2013-03-24 -0.869514 -0.869514 -0.869514 -0.869514 2013-03-25 -0.628911 -0.628911 -0.628911 -0.628911 2013-03-26 -0.609140 -0.609140 -0.609140 -0.609140 2013-03-27 0.749727 0.749727 0.749727 0.749727 2013-03-28 -0.206117 -0.206117 -0.206117 -0.206117 2013-03-29 1.183828 1.183828 1.183828 1.183828 2013-03-30 -2.006837 -2.006837 -2.006837 -2.006837 2013-03-31 0.094606 0.094606 0.094606 0.094606 2013-04-01 -0.631297 -0.631297 -0.631297 -0.631297 2013-04-02 1.498079 1.498079 1.498079 1.498079 2013-04-03 -0.732888 -0.732888 -0.732888 -0.732888 2013-04-04 0.307917 0.307917 0.307917 0.307917 2013-04-05 0.789150 0.789150 0.789150 0.789150 2013-04-06 1.739575 1.739575 1.739575 1.739575 2013-04-07 0.181541 0.181541 0.181541 0.181541 2013-04-08 0.944273 0.944273 0.944273 0.944273 2013-04-09 -1.581479 -1.581479 -1.581479 -1.581479 2013-04-10 -0.494982 -0.494982 -0.494982 -0.494982 2013-04-11 0.007488 0.007488 0.007488 0.007488 2013-04-12 0.176036 0.176036 0.176036 0.176036 2013-04-13 -0.231764 -0.231764 -0.231764 -0.231764 2013-04-14 -0.595858 -0.595858 -0.595858 -0.595858 2013-04-15 -1.327879 -1.327879 -1.327879 -1.327879 2013-04-16 1.318089 1.318089 1.318089 1.318089 2013-04-17 1.403062 1.403062 1.403062 1.403062 2013-04-18 0.305167 0.305167 0.305167 0.305167 2013-04-19 -0.690680 -0.690680 -0.690680 -0.690680 2013-04-20 0.121424 0.121424 0.121424 0.121424 2013-04-21 0.770222 0.770222 0.770222 0.770222 2013-04-22 0.850089 0.850089 0.850089 0.850089 2013-04-23 0.575472 0.575472 0.575472 0.575472 2013-04-24 -0.202110 -0.202110 -0.202110 -0.202110 2013-04-25 -1.743006 -1.743006 -1.743006 -1.743006 2013-04-26 0.253060 0.253060 0.253060 0.253060 2013-04-27 1.422381 1.422381 1.422381 1.422381 2013-04-28 -0.082828 -0.082828 -0.082828 -0.082828 2013-04-29 -0.385594 -0.385594 -0.385594 -0.385594 2013-04-30 -0.246531 -0.246531 -0.246531 -0.246531 2013-05-01 0.927497 0.927497 0.927497 0.927497 2013-05-02 -0.361332 -0.361332 -0.361332 -0.361332 2013-05-03 -0.133065 -0.133065 -0.133065 -0.133065 2013-05-04 -2.733855 -2.733855 -2.733855 -2.733855 2013-05-05 0.520610 0.520610 0.520610 0.520610 2013-05-06 -0.818820 -0.818820 -0.818820 -0.818820 2013-05-07 -0.764690 -0.764690 -0.764690 -0.764690 2013-05-08 -0.059040 -0.059040 -0.059040 -0.059040 2013-05-09 1.397286 1.397286 1.397286 1.397286 2013-05-10 -0.649536 -0.649536 -0.649536 -0.649536 2013-05-11 -0.161345 -0.161345 -0.161345 -0.161345 2013-05-12 0.911908 0.911908 0.911908 0.911908 2013-05-13 -0.454775 -0.454775 -0.454775 -0.454775 2013-05-14 0.346036 0.346036 0.346036 0.346036 2013-05-15 1.010684 1.010684 1.010684 1.010684 2013-05-16 -1.548136 -1.548136 -1.548136 -1.548136
plot(resampled)
[<matplotlib.lines.Line2D at 0x1087d78d0>, <matplotlib.lines.Line2D at 0x1087d7b50>, <matplotlib.lines.Line2D at 0x1087d7d50>, <matplotlib.lines.Line2D at 0x1087d7ed0>]
plot(ip_df.ix['2013-03'])
[<matplotlib.lines.Line2D at 0x10f042a50>, <matplotlib.lines.Line2D at 0x10f042cd0>, <matplotlib.lines.Line2D at 0x10f042ed0>, <matplotlib.lines.Line2D at 0x10f046090>]
ip_df.ix['2013-03-31'].plot(kind="bar") # query bar chart for just one day
<matplotlib.axes.AxesSubplot at 0x10d540e10>