import numpy as np !head -n 5 BatteryParkTideData.csv data = np.genfromtxt('BatteryParkTideData.csv', delimiter=',', skip_header=1, missing='NA') data print 'Shape: ', data.shape print 'Size: ', data.size print 'Number of dimensions: ', data.ndim print 'Data type: ', data.dtype data[0] data[0, 1] data[:, 1] data = np.genfromtxt('BatteryParkTideData.csv', delimiter=',', names=True, missing='NA') data data[0] data['Pred6'] time, pred, backup, accoustic = np.genfromtxt('BatteryParkTideData.csv', delimiter=',', skip_header=1, missing='NA', unpack=True) pred np.array([2.3, 42, 5.6]) np.ones(10) np.zeros((2, 2)) np.arange(10, 20, 1.6) np.linspace(10, 20, 16) np.random.random((2, 2)) np.random.standard_normal((2, 2)) print pred.min() print pred.max() print pred.mean() print pred.std() print np.median(pred) # peak-to-peak print pred.ptp() backup.max() pred[[100, 5, 1, 5, 100]] pred[pred > 5] pred[(pred > 5) | (pred < 0.5)] np.arange(10) > 5 np.where(np.arange(10) > 5) a = np.array([1, 2, np.nan, 4, 5, np.nan]) np.isnan(a) a[np.isnan(a)] a[~np.isnan(a)] a[np.isfinite(a)] # are there any nan values? print 'time:', np.isnan(time).any() print 'backup:', np.isnan(backup).any() # are all of the values finite? print 'pred:', np.isfinite(pred).all() print 'accoustic:', np.isfinite(accoustic).all() not_nan = np.isfinite(backup) & np.isfinite(accoustic) time = time[not_nan] pred = pred[not_nan] backup = backup[not_nan] accoustic = accoustic[not_nan] not_nan.size - time.size print time[:5] print pred[:5] print accoustic[:5] print backup[:5] backup.max() - pred.max() m = backup.argmax() print m backup[m] - pred[m] %pylab inline %config InlineBackend.figure_format = 'svg' import matplotlib.pyplot as plt fig, ax = plt.subplots() ax.plot(time, pred) ax.plot(time, accoustic) ax.plot(time, backup) ax.set_ylabel('Feet above MLLW') ax.set_xlabel('Hours Since First Measurement') fig, ax = plt.subplots() ax.plot(time, pred, label='Predicted') ax.plot(time, accoustic, label='Accoustic') ax.plot(time, backup, label='Backup') ax.set_ylabel('Feet above MLLW') ax.set_xlabel('Hours Since First Measurement') ax.legend(loc='upper right') obs_minus_pred = backup - pred fig, ax = plt.subplots() ax.plot(time, pred, label='Predicted') ax.plot(time, backup, label='Backup') ax.plot(time, obs_minus_pred, label='Difference') ax.set_ylabel('Feet above MLLW') ax.set_xlabel('Hours Since First Measurement') ax.legend(loc='upper right') a = np.arange(5, dtype=np.float) # float to avoid integer surprises print a a + 5 a * 5 b = np.arange(10, 20, 2, dtype=np.float) print b b - a a / b time[backup.argmax()] time[obs_minus_pred.argmax()]