Go to the hydropy website for more information on the package and the code on Github.
#Loading the hydropy package
import hydropy as hp
We have a Dataframe with river discharge at different locations in the Maarkebeek basin (Belgium):
HTML('<iframe src=http://biomath.ugent.be/~stvhoey/maarkebeek_data/ width=700 height=350></iframe>')
Data downloaded from http://www.waterinfo.be/, made available by the Flemish Environmental Agency (VMM).
flowdata.head()
L06_347 | LS06_347 | LS06_348 | LS06_34C | LS06_34D | LS06_34E | LS06_34G | |
---|---|---|---|---|---|---|---|
Time | |||||||
2008-01-01 00:15:00 | 0.229 | 0.229 | 0.021 | 0.122 | 0.014 | 0.030 | NaN |
2008-01-01 00:30:00 | 0.229 | 0.229 | 0.021 | 0.122 | 0.014 | 0.029 | NaN |
2008-01-01 00:45:00 | 0.229 | 0.229 | 0.021 | 0.122 | 0.014 | 0.029 | NaN |
2008-01-01 01:00:00 | 0.229 | 0.229 | 0.021 | 0.122 | 0.015 | 0.029 | NaN |
2008-01-01 01:15:00 | 0.229 | 0.229 | 0.021 | 0.122 | 0.015 | 0.029 | NaN |
flowdata.tail()
L06_347 | LS06_347 | LS06_348 | LS06_34C | LS06_34D | LS06_34E | LS06_34G | |
---|---|---|---|---|---|---|---|
Time | |||||||
2013-01-01 23:00:00 | 0.883 | 0.883 | 0.075 | NaN | 0.119 | 0.034 | NaN |
2013-01-01 23:15:00 | 0.875 | 0.875 | 0.075 | NaN | 0.118 | 0.034 | NaN |
2013-01-01 23:30:00 | 0.872 | 0.872 | 0.074 | NaN | 0.119 | 0.034 | NaN |
2013-01-01 23:45:00 | 0.873 | 0.873 | 0.075 | NaN | 0.116 | 0.035 | NaN |
2013-01-02 00:00:00 | 0.860 | 0.860 | 0.075 | NaN | 0.114 | 0.036 | NaN |
print(len(flowdata), 'records', 'from', flowdata.index[0], 'till', flowdata.index[-1])
175488 records from 2008-01-01 00:15:00 till 2013-01-02 00:00:00
Converting the dataframe to a hydropy time series datatype, provides extra functionalities:
myflowserie = hp.HydroAnalysis(flowdata)
myflowserie.get_year('2009').get_season('summer').plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x1c113d75be0>
myflowserie.get_year('2011').get_month("Jun").get_recess().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x1c11441aa20>
fig, ax = plt.subplots(figsize=(13, 6))
myflowserie['LS06_347'].get_year('2010').get_month("Jul").get_highpeaks(150, above_percentile=0.9).plot(style='o', ax=ax)
myflowserie['LS06_347'].get_year('2010').get_month("Jul").plot(ax=ax)
<matplotlib.axes._subplots.AxesSubplot at 0x1c114b7d710>
raindata.columns
Index(['P05_019', 'P05_038', 'P05_039', 'P06_014', 'P06_040', 'P07_006', 'P07_021', 'P07_022'], dtype='object')
storms = myflowserie.derive_storms(raindata['P05_019'], 'LS06_347',
number_of_storms=3, drywindow=50,
makeplot=True)
C:\Users\Marty\Google Drive\PyDev\src\hydropy\hydropy\storm.py:62: FutureWarning: sort is deprecated, use sort_values(inplace=True) for INPLACE sorting temp.sort(ascending=False) C:\Users\Marty\Google Drive\PyDev\src\hydropy\hydropy\storm.py:87: FutureWarning: pd.rolling_sum is deprecated for Series and will be removed in a future version, replace with Series.rolling(center=False,window=50).sum() temp1 = pd.rolling_sum(temp1, window=drywindow, center=False)
storms = myflowserie.derive_storms(raindata['P06_014'], 'LS06_347',
number_of_storms=3, drywindow=96,
makeplot=True)
C:\Users\Marty\Google Drive\PyDev\src\hydropy\hydropy\storm.py:62: FutureWarning: sort is deprecated, use sort_values(inplace=True) for INPLACE sorting temp.sort(ascending=False) C:\Users\Marty\Google Drive\PyDev\src\hydropy\hydropy\storm.py:87: FutureWarning: pd.rolling_sum is deprecated for Series and will be removed in a future version, replace with Series.rolling(center=False,window=96).sum() temp1 = pd.rolling_sum(temp1, window=drywindow, center=False)
myflowserie.data.groupby('season').mean()
L06_347 | LS06_347 | LS06_348 | LS06_34C | LS06_34D | LS06_34E | LS06_34G | |
---|---|---|---|---|---|---|---|
season | |||||||
Autumn | 0.238284 | 0.246378 | 0.022630 | 0.182139 | 0.026797 | 0.026666 | 0.017467 |
Spring | 0.225966 | 0.239476 | 0.023696 | 0.144322 | 0.026558 | 0.029317 | 0.055004 |
Summer | 0.097924 | 0.094836 | 0.012179 | 0.048987 | 0.012076 | 0.023207 | 0.136304 |
Winter | 0.474153 | 0.480252 | 0.044169 | 0.219861 | 0.055192 | 0.034974 | 0.034519 |
Fork the github repo
Get the code on your computer
git clone https://github.com/yourname/hydropy
Run the python setup script (install as development package):
python setup.py develop
Improve implementation, add functionalities,...
# Data inspection
myflowserie.summary() #head(), tail(),
L06_347 | LS06_347 | LS06_348 | LS06_34C | LS06_34D | LS06_34E | LS06_34G | |
---|---|---|---|---|---|---|---|
count | 175465.000000 | 172153.000000 | 163312.000000 | 104486.000000 | 174054.000000 | 171812.000000 | 93985.000000 |
mean | 0.258319 | 0.264058 | 0.025398 | 0.148447 | 0.030074 | 0.028579 | 0.065588 |
std | 0.730894 | 0.732851 | 0.047489 | 0.444993 | 0.079334 | 0.027651 | 0.156632 |
min | -0.020000 | -0.019000 | 0.001000 | -0.010000 | 0.000000 | 0.002000 | 0.000000 |
25% | 0.059000 | 0.061000 | 0.009000 | 0.030000 | 0.008000 | 0.017000 | 0.000000 |
50% | 0.107000 | 0.116000 | 0.013000 | 0.065000 | 0.014000 | 0.025000 | 0.000000 |
75% | 0.240000 | 0.244000 | 0.025000 | 0.137000 | 0.028000 | 0.033000 | 0.001000 |
max | 25.900000 | 25.900000 | 1.400000 | 13.800000 | 3.720000 | 1.650000 | 0.691000 |
# Resampling frequencies
temp1 = myflowserie.frequency_resample('7D', 'mean') # 7 day means
temp1.head()
C:\Users\Marty\Google Drive\PyDev\src\hydropy\hydropy\flowanalysis.py:219: FutureWarning: how in .resample() is deprecated the new syntax is .resample(...).mean() return self.__class__(self.data.resample(*args, **kwargs),
L06_347 | LS06_347 | LS06_348 | LS06_34C | LS06_34D | LS06_34E | LS06_34G | season | |
---|---|---|---|---|---|---|---|---|
Time | ||||||||
2008-01-01 00:15:00 | 0.407740 | 0.407740 | 0.045302 | 0.218046 | 0.045960 | 0.039046 | 0.000877 | Winter |
2008-01-08 00:15:00 | 0.463927 | 0.463927 | 0.046653 | 0.247088 | 0.052054 | 0.043190 | 0.000057 | Winter |
2008-01-15 00:15:00 | 0.473644 | 0.473644 | 0.046685 | 0.252646 | 0.058049 | 0.041683 | 0.000124 | Winter |
2008-01-22 00:15:00 | 0.267629 | 0.267629 | 0.026110 | 0.142237 | 0.023388 | 0.032665 | 0.000004 | Winter |
2008-01-29 00:15:00 | 0.278516 | 0.278516 | 0.027170 | 0.148635 | 0.024737 | 0.035676 | 0.000009 | Winter |
temp2 = myflowserie.frequency_resample("M", "max") # Monthly maxima
temp2.head()
C:\Users\Marty\Google Drive\PyDev\src\hydropy\hydropy\flowanalysis.py:219: FutureWarning: how in .resample() is deprecated the new syntax is .resample(...).max() return self.__class__(self.data.resample(*args, **kwargs),
L06_347 | LS06_347 | LS06_348 | LS06_34C | LS06_34D | LS06_34E | LS06_34G | season | |
---|---|---|---|---|---|---|---|---|
Time | ||||||||
2008-01-31 | 1.730 | 1.730 | 0.312 | 0.924 | 0.452 | 0.164 | 0.006 | Winter |
2008-02-29 | 0.747 | 0.747 | 0.274 | 0.398 | 0.218 | 0.168 | 0.691 | Winter |
2008-03-31 | 7.430 | 7.430 | 0.879 | 3.960 | 1.260 | 0.829 | 0.018 | Spring |
2008-04-30 | 1.010 | 1.010 | 0.160 | 0.541 | 0.216 | 0.133 | 0.001 | Spring |
2008-05-31 | 0.427 | 0.427 | 0.075 | 0.228 | 0.069 | 0.106 | 0.691 | Spring |
temp3 = myflowserie.frequency_resample("A", 'sum') # Yearly sums
temp3.head(6)
C:\Users\Marty\Google Drive\PyDev\src\hydropy\hydropy\flowanalysis.py:219: FutureWarning: how in .resample() is deprecated the new syntax is .resample(...).sum() return self.__class__(self.data.resample(*args, **kwargs),
L06_347 | LS06_347 | LS06_348 | LS06_34C | LS06_34D | LS06_34E | LS06_34G | season | |
---|---|---|---|---|---|---|---|---|
Time | ||||||||
2008-12-31 | 9053.371 | 9051.862 | 997.975 | 4826.749 | 939.998 | 1167.250 | 926.223 | Winter |
2009-12-31 | 7683.888 | 7672.038 | 859.617 | 4092.716 | 1036.323 | 1053.482 | 319.755 | Winter |
2010-12-31 | 11933.623 | 12356.573 | 906.725 | 6588.184 | 1294.821 | 1212.729 | 4918.342 | Winter |
2011-12-31 | 8458.819 | 8299.722 | 598.294 | NaN | 826.962 | 710.203 | NaN | Winter |
2012-12-31 | 8029.035 | 7910.918 | 771.414 | 3.002 | 1111.521 | 760.598 | NaN | Winter |
2013-12-31 | 167.232 | 167.232 | 13.726 | NaN | 24.894 | 5.903 | NaN | None |
#slicing of the dataframes
myflowserie['L06_347']['2009'].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1c118e982b0>
# get_month, get_year, get_season, get_date_range
myflowserie.get_date_range("01/01/2010","03/05/2010").plot(figsize=(13, 6))
<matplotlib.axes._subplots.AxesSubplot at 0x1c11859fbe0>
# or combine different statements:
myflowserie.get_year('2010').get_month(6).plot(figsize=(13, 6))
<matplotlib.axes._subplots.AxesSubplot at 0x1c114649e10>
For the seasons some options are available: Meteorologic (first of the month) or astrologic (21st of the month)
myflowserie.current_season_dates()
{'Autumn': '0901', 'Spring': '0301', 'Summer': '0601', 'Winter': '1201'}
myflowserie.info_season_dates('north', 'astro')
{'Autumn': '0921', 'Spring': '0321', 'Summer': '0621', 'Winter': '1221'}
# Peaks (high or low)
myflowserie['LS06_348'].get_year('2012').get_highpeaks(60, above_percentile=0.8).data.dropna().head()
LS06_348 | season | |
---|---|---|
Time | ||
2012-01-01 23:15:00 | 0.199 | Winter |
2012-01-03 17:15:00 | 0.351 | Winter |
2012-01-04 23:45:00 | 0.415 | Winter |
2012-01-07 07:45:00 | 0.070 | Winter |
2012-01-09 10:45:00 | 0.031 | Winter |
# Recessions and climbing periods get_recess, get_climbing
myflowserie.get_year("2012").get_month("april").get_climbing().plot(figsize=(13, 6))
<matplotlib.axes._subplots.AxesSubplot at 0x1c114d00898>
# above/below certain percentile values
myflowserie["LS06_347"].get_above_percentile(0.6).get_year('2011').get_season('summer').plot()
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) C:\Users\Marty\Anaconda3\lib\site-packages\pandas\core\internals.py in eval(self, func, other, raise_on_error, try_cast, mgr) 1119 try: -> 1120 result = get_result(other) 1121 C:\Users\Marty\Anaconda3\lib\site-packages\pandas\core\internals.py in get_result(other) 1090 else: -> 1091 result = func(values, other) 1092 TypeError: unorderable types: str() > float() During handling of the above exception, another exception occurred: TypeError Traceback (most recent call last) <ipython-input-29-b371ac93b86e> in <module>() 1 # above/below certain percentile values ----> 2 myflowserie["LS06_347"].get_above_percentile(0.6).get_year('2011').get_season('summer').plot() C:\Users\Marty\Google Drive\PyDev\src\hydropy\hydropy\flowanalysis.py in get_above_percentile(self, percentile) 465 """ 466 percentilevalue = self.quantile(percentile) --> 467 df = self.data[self.data > percentilevalue].copy() 468 df = self._pass_freq(df) 469 return self.__class__(df, datacols=self._data_cols) C:\Users\Marty\Anaconda3\lib\site-packages\pandas\core\ops.py in f(self, other) 1169 return self._compare_frame(other, func, str_rep) 1170 elif isinstance(other, ABCSeries): -> 1171 return self._combine_series_infer(other, func) 1172 else: 1173 C:\Users\Marty\Anaconda3\lib\site-packages\pandas\core\frame.py in _combine_series_infer(self, other, func, level, fill_value) 3512 3513 return self._combine_match_columns(other, func, level=level, -> 3514 fill_value=fill_value) 3515 3516 def _combine_match_index(self, other, func, level=None, fill_value=None): C:\Users\Marty\Anaconda3\lib\site-packages\pandas\core\frame.py in _combine_match_columns(self, other, func, level, fill_value) 3532 3533 new_data = left._data.eval(func=func, other=right, -> 3534 axes=[left.columns, self.index]) 3535 return self._constructor(new_data) 3536 C:\Users\Marty\Anaconda3\lib\site-packages\pandas\core\internals.py in eval(self, **kwargs) 2847 2848 def eval(self, **kwargs): -> 2849 return self.apply('eval', **kwargs) 2850 2851 def setitem(self, **kwargs): C:\Users\Marty\Anaconda3\lib\site-packages\pandas\core\internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs) 2830 2831 kwargs['mgr'] = self -> 2832 applied = getattr(b, f)(**kwargs) 2833 result_blocks = _extend_blocks(applied, result_blocks) 2834 C:\Users\Marty\Anaconda3\lib\site-packages\pandas\core\internals.py in eval(self, func, other, raise_on_error, try_cast, mgr) 1125 raise 1126 except Exception as detail: -> 1127 result = handle_error() 1128 1129 # technically a broadcast error in numpy can 'work' by returning a C:\Users\Marty\Anaconda3\lib\site-packages\pandas\core\internals.py in handle_error() 1109 if raise_on_error: 1110 raise TypeError('Could not operate %s with block values %s' % -> 1111 (repr(other), str(detail))) 1112 else: 1113 # return the values TypeError: Could not operate array([ nan]) with block values unorderable types: str() > float()
Furthermore: