PyData Berlin 2014
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.html.widgets import interact
import matplotlib.mlab as mlab
import seaborn as sb
#sb.set() # Reset Style
sb.set(style="white", palette="muted")
sb.set_context("talk")
%matplotlib inline
Read some measurements in, which are collected with Tinkerforge IMU and GPS sensors, which layed flat on the ground, without moving in any direction (static measurement).
df = pd.read_csv('2014-06-27-001-Data.csv')
# Convert Epoch Millis to Timestamp
# http://pandas.pydata.org/pandas-docs/dev/timeseries.html#epoch-timestamps
df.index = pd.to_datetime(df['millis'], unit='ms')
accelerations = df[['ax','ay']].dropna()
accelerations.plot();
import scipy.stats as stats
from IPython.html.widgets import interact
dists = ['Normal', 'Rayleigh', 'Weibull']
Thanks Rob Story for the code in your PyData Silicon Valley 2014 talk!
@interact
def plot_sb_dist(column=accelerations.columns.tolist(), dist=dists):
plt.figure(figsize=(10, 4))
dist_map = {
'Rayleigh': stats.rayleigh,
'Weibull': stats.exponweib,
'Normal': stats.norm,
}
sb.distplot(accelerations[column], fit=dist_map[dist], label='# of Sensor Readings')
sb.plt.legend()
plt.savefig("ax_dist.png", dpi=72, bbox_inches='tight')
accelerations.describe()
ax | ay | |
---|---|---|
count | 1000.000000 | 1000.000000 |
mean | -0.300999 | -1.135121 |
std | 0.046919 | 0.042984 |
min | -0.470600 | -1.264900 |
25% | -0.333400 | -1.166900 |
50% | -0.303900 | -1.137500 |
75% | -0.274500 | -1.108000 |
max | -0.176500 | -0.990400 |
One can use mean
and std
to describe, what's going on.
The variance is $\mathrm{std}^2$
GPS measurements are in decimal degrees DD.dddddd
gps = pd.DataFrame()
Convert decimal degress approximately to meters
arcLon = 2*np.pi*6378.0/360.0
arcLat = arcLon * np.cos(df.longitude*np.pi/180.0)
gps['LonM'] = df.longitude * arcLon * 1000.0
gps['LatM'] = df.latitude * arcLat * 1000.0
sb.jointplot(gps.LonM, gps.LatM, kind='kde', \
xlim=(gps.LonM.min()-1, gps.LonM.max()+1), \
ylim=(gps.LatM.min()-1, gps.LatM.max()+1), \
size=10, space=1)
plt.savefig('gps_dist.png', dpi=150)
@interact
def plot_sb_dist(column=gps.columns.tolist(), dist=dists):
plt.figure(figsize=(10, 6))
dist_map = {
'Rayleigh': stats.rayleigh,
'Weibull': stats.exponweib,
'Normal': stats.norm,
}
sb.distplot(gps[column], fit=dist_map[dist])
gps.describe()
LonM | LatM | |
---|---|---|
count | 1000.000000 | 1000.000000 |
mean | 1535402.773968 | 5517728.990404 |
std | 0.273083 | 1.430401 |
min | 1535402.321909 | 5517726.748935 |
25% | 1535402.544544 | 5517727.816466 |
50% | 1535402.655861 | 5517728.968462 |
75% | 1535402.989812 | 5517730.181281 |
max | 1535403.435080 | 5517731.309635 |
One can use mean
and std
to describe, what's going on.
$f(x, \mu, \sigma) = \large \frac{1}{\sigma\sqrt{2\pi}} \ e^{\Large -\frac{(x-\mu)^2}{2\sigma^2} } \\$ with $\mu$ as mean and $\sigma^2$ as variance ($\sigma$ is the standard deviation).
def pltnormpdf(mean, variance):
plt.figure(figsize=(8,5))
plt.plot(x,mlab.normpdf(x, mean, variance))
plt.ylim(0,1)
x = np.linspace(-10,10,500)
interact(pltnormpdf, mean=(-5,5,0.5), variance=(0.1,10,0.1));
With the help of statistical parameters, it is possible to describe a sensor reading and it's noise. A lot of sensors can be described be the assumption:
[real value
] + [white
, gaussian
noise]
so one can use the Kalman filter! Even it is not a perfect normal distribution, the Kalman Filter is the best estimator of the real value
. The Kalman Filter does not work, if the sensor reading is multimodal
(two or more peaks).