Example : samples from a Weibull distribution (fake wind measurements, see http://wind-data.ch/tools/weibull.php?lng=en for example)
Other useful ressource on failure analysis with Weibull : https://www.wakari.io/nb/pybokeh/Weibull_Analysis
Distribution info : http://en.wikipedia.org/wiki/Weibull_distribution
$F(x) = 1 - e^{-(x/\lambda)^k}, \quad x \geq 0$
Pierre Haessig — April 2014
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
w_shape = 2.
w_scale = 6
#weib = stats.exponweib(a=w_shape, c=1, scale=w_scale) # doesn't work !
weib = stats.weibull_min(w_shape, scale=w_scale)
vel = np.linspace(0, w_scale*3, 500)
plt.plot(vel, weib.pdf(vel))
plt.title('Weibull pdf, scale={:.2f}, shape={:.2f}'.format(w_scale, w_shape))
plt.xlabel('wind speed (m/s)')
weib.mean()
5.3173615527165481
N_samp = 200
vel_samp = weib.rvs(N_samp)
vel_samp.mean()
5.1082282289751495
h_opts = dict(normed=True, alpha=1, histtype='stepfilled', color='#BBDDFF')
plt.hist(vel_samp, bins = 15, cumulative=False, **h_opts)
plt.plot(vel, weib.pdf(vel), 'r')
plt.title('Weibull pdf, scale={:.2f}, shape={:.2f}'.format(w_scale, w_shape))
plt.xlabel('wind speed (m/s)');
plt.hist(vel_samp, bins = 100, cumulative=True, **h_opts)
plt.plot(vel, weib.cdf(vel), 'r')
plt.title('Weibull cdf, scale={:.2f}, shape={:.2f}'.format(w_scale, w_shape))
plt.xlabel('wind speed (m/s)');
Logit transform (inverse sigmoid)
maps $[0,1] \mapsto [-\infty, +\infty]$
$$f(p) = log(\frac{p}{1-p}) = log(p) - log(1-p) $$def logit(p):
return np.log10(p/(1-p))
logit(np.array([0.01, 0.1, 0.5, 0.9, 0.99]))
array([-1.99563519, -0.95424251, 0. , 0.95424251, 1.99563519])
hist, bin_edges= np.histogram(vel_samp, bins=100, density=True)
cumhist = hist.cumsum()*(bin_edges[1]-bin_edges[0])
plt.plot(bin_edges[:-1], logit(cumhist))
plt.plot(vel, logit(weib.cdf(vel)), 'r')
plt.title('Weibull logit cdf, scale={:.2f}, shape={:.2f}'.format(w_scale, w_shape))
plt.ylim(-4,4);
plt.xlabel('wind speed (m/s)');
plt.ylabel('log-odds')
<matplotlib.text.Text at 0x96f07d0>
from matplotlib import scale as mscale
from matplotlib import transforms as mtransforms
from matplotlib import ticker as mticker
from numpy import ma
class ProbaFormatter(Formatter):
'''Probability formatter (using Math text)'''
def __call__(self, x, pos=None):
s = ''
if 0.01 <= x <= 0.99:
if x in [.01, 0.1, 0.5, 0.9, 0.99]:
s = '{:.2f}'.format(x)
elif x < 0.01:
if mticker.is_decade(x):
s = '$10^{%.0f}$' % np.log10(x)
elif x > 0.99:
if mticker.is_decade(1-x):
s = '$1-10^{%.0f}$' % np.log10(1-x)
return s
def format_data_short(self, value):
'return a short formatted string representation of a number'
return '%-12g' % value
class LogitScale(mscale.ScaleBase):
"""
Scales data in range 0,1 to -infty, +infty
"""
# The scale class must have a member ``name`` that defines the
# string used to select the scale.
name = 'logit'
def __init__(self, axis, **kwargs):
"""
Any keyword arguments passed to ``set_xscale`` and
``set_yscale`` will be passed along to the scale's
constructor.
thresh: The degree above which to crop the data.
"""
mscale.ScaleBase.__init__(self)
p_min = kwargs.pop("p_min", 1e-2)
if not (0 <p_min < 0.5):
raise ValueError("p_min must be between 0 and 0.5 excluded")
p_max = kwargs.pop("p_max", 1-p_min)
if not (0.5 < p_max < 1):
raise ValueError("p_max must be between 0.5 and 1 excluded")
self.p_min = p_min
self.p_max = p_max
def get_transform(self):
"""
Override this method to return a new instance that does the
actual transformation of the data.
"""
return self.LogitTransform(self.p_min, self.p_max)
def set_default_locators_and_formatters(self, axis):
expon_major = np.arange(1, 18)
# ..., 0.01, 0.1, 0.5, 0.9, 0.99, ...
axis.set_major_locator(mticker.FixedLocator(
list(1/10.**(expon)) + \
[0.5] + \
list(1-1/10.**(expon))
))
minor_ticks = [0.2,0.3,0.4,0.6,0.7, 0.8]
for i in range(2,17):
minor_ticks.extend(1/10.**i * np.arange(2,10))
minor_ticks.extend(1-1/10.**i * np.arange(2,10))
axis.set_minor_locator(mticker.FixedLocator(minor_ticks))
axis.set_major_formatter(ProbaFormatter())
axis.set_minor_formatter(ProbaFormatter())
def limit_range_for_scale(self, vmin, vmax, minpos):
return max(vmin, self.p_min), min(vmax, self.p_max)
class LogitTransform(mtransforms.Transform):
input_dims = 1
output_dims = 1
is_separable = True
def __init__(self, p_min, p_max):
mtransforms.Transform.__init__(self)
self.p_min = p_min
self.p_max = p_max
def transform_non_affine(self, a):
"""logit transform (base 10)"""
p_over = a > self.p_max
p_under = a < self.p_min
# saturate extreme values:
a_sat = np.where(p_over, self.p_max, a)
a_sat = np.where(p_under, self.p_min, a_sat)
return np.log10(a_sat / (1-a_sat))
def inverted(self):
return LogitScale.InvertedLogitTransform(self.p_min, self.p_max)
class InvertedLogitTransform(mtransforms.Transform):
input_dims = 1
output_dims = 1
is_separable = True
def __init__(self, p_min, p_max):
mtransforms.Transform.__init__(self)
self.p_min = p_min
self.p_max = p_max
def transform_non_affine(self, a):
"""sigmoid transform (base 10)"""
return 1/(1+10**(-a))
def inverted(self):
return LogitScale.LogitTransform(self.p_min, self.p_max)
mscale.register_scale(LogitScale)
%matplotlib inline
fig = plt.figure(figsize=(10,8))
ax = plt.subplot(111)
plt.plot(vel, weib.cdf(vel), 'r')
plt.hist(vel_samp, bins = 100, cumulative=True, **h_opts)
ax.set_yscale('logit', p_min=1e-4)
ax.set_ylim(1e-4, 0.9999);
plt.title('Weibull cdf, scale={:.2f}, shape={:.2f}'.format(w_scale, w_shape))
plt.xlabel('wind speed (m/s)');
n_loc = 5.3
n_scale = 2
norm = stats.norm(loc=n_loc, scale=n_scale)
vel_samp2 = norm.rvs(N_samp)
vel_samp2[vel_samp2<0] = 0
vel_samp2.mean()
5.0765252605208939
plt.hist(vel_samp2, bins = 15, cumulative=False, **h_opts)
plt.plot(vel, norm.pdf(vel), 'r')
plt.title('Normal cdf, loc={:.2f}, scale={:.2f}'.format(n_loc, n_scale))
plt.xlabel('wind speed (m/s)');
fig = plt.figure(figsize=(10,8))
ax = plt.subplot(111)
plt.plot(vel, norm.cdf(vel), 'r')
plt.plot(vel, weib.cdf(vel), color='orange')
plt.hist(vel_samp2, bins = 200, cumulative=True, **h_opts)
ax.set_yscale('logit', p_min=1e-4)
plt.title('Normal cdf, loc={:.2f}, scale={:.2f}'.format(n_loc, n_scale))
plt.xlabel('wind speed (m/s)');