Here we compare significance and excess sensitivity and limits for an on-off measurement computed with the Rolke and Li & Ma methods.
n_on
, n_off
and alpha
, compute significance(n_on, n_off, alpha)
.n_off
and alpha
and significance
, compute sensitivity(n_off, alpha, significance)
.n_on
, n_off
and alpha
, compute excess_limits(n_on, n_off, alpha, significance)
.Note that significance and p-value = confidence level are equivalent via the normal distribution:
p_value = scipy.stats.norm.sf(significance)
significance = scipy.stats.norm.isf(p_value)
confidence_level = p_value
import numpy as np
from numpy import log, sqrt, abs
Implements the formulae on page 6 of the Rolke 2005 paper: http://adsabs.harvard.edu/abs/2005NIMPA.551..493R
def compute_mu_best(x, y, tau):
return x - y / tau
def compute_b_best(y, tau):
return y / tau
def compute_b_hat(x, y, tau, mu):
term1 = x + y - (1 + tau) * mu
term2 = term1 ** 2 + 4 * (1 + tau) * y * mu
term3 = 2 * (1 + tau)
return (term1 + sqrt(term2)) / term3
def compute_L(x, y, tau, mu, b):
from scipy.stats import poisson
P1 = poisson.pmf(x, mu + b)
P2 = poisson.pmf(y, tau * b)
return P1 * P2
def compute_lambda(x, y, tau, mu):
mu_best = compute_mu_best(x, y, tau)
b_best = compute_b_best(y, tau)
b_hat = compute_b_hat(x, y, tau, mu)
a = compute_L(x, y, tau, mu, b_hat)
b = compute_L(x, y, tau, mu_best, b_best)
return a / b
def compute_TS(x, y, tau, mu):
lambda_ = compute_lambda(x, y, tau, mu)
return - 2 * log(lambda_)
# Reproduce Figure 1 in the Rolke paper
x, y, tau, cl = 8, 15, 5.0, 0.95
mu = np.arange(0, 14, 0.1)
TS = compute_TS(x, y, tau, mu)
plot(mu, TS);
#@np.vectorize
def loglike_lima(on, off, alpha, signal, do_switch=True):
"""Taken from Utilities::Statistics::LiMa_LogLikelihood
from the HESS software."""
#for _ in [on, off, alpha, signal]:
# _ = np.asarray(_)
# @todo Explain what the model is for negative signal
# Is there a source in the off region?
if do_switch:
positive = signal > 0
n1 = np.where(positive, on, off)
n2 = np.where(positive, off, on)
a = np.where(positive, alpha, 1.0 / alpha)
signal = np.where(positive, signal, -a * signal)
else:
n1 = on
n2 = off
a = alpha
# Estimate background for the on region
# @note Some of the other functions compute background
# in the off region as the parameter
def case1(n1, a, signal):
"""Corresponds to Li & Ma formula (7)"""
# @todo Explain this formula better!
return n1 / (a + 1) - signal / a
def case2(n1, n2, a, signal):
# @todo Explain this formula!?
# Is it the same as nll()?
t = (1 + a) * signal - a * (n1 + n2)
delta = t * t + 4 * a * (1 + a) * signal * n2
return (sqrt(delta) - t) / (2 + 2 * a)
background = np.where(off == 0,
case1(n1, a, signal),
case2(n1, n2, a, signal))
#background = case2(n1, n2, a, signal)
np.where(background < 0, 0, background)
# Compute Poisson nll for given signal / background:
# n1 ~ Pois(signal + background)
# n2 ~ Pois(background / a)
mu1 = signal + background
mu2 = background / a
l1 = np.where(n1 > 0, n1 * log(mu1), 0) - mu1
l2 = np.where(n2 > 0, n2 * log(mu2), 0) - mu2
l = l1 + l2
if 0:
print 'background', background
print 'signal', signal
print 'n1:', n1
print 'mu1:', mu1
print 'n2:', n2
print 'mu2:', mu2
print 'of interest', mu2[n2 > 0]
print 'l1', l1
print 'l2', l2
print 'l', l
return -1
#return -l, -l1, -l2
@np.vectorize
def excess_error(on, off, alpha, nsig=1,
up=True, loglike=loglike_lima):
"""Statistical uncertainties on signal using the profile
likelihood method on a variant of the likelihood function
given in the Li & Ma paper.
@param on: number of counts in on region
@param off: number of counts in off region
@param alpha: on / off exposure ratio
@param nsig: number of standard deviations of the error
@return: signal limit
The profile likelihood method finds the signal for which
L(signal) = L0 + 0.5 * nsig, where
L0 = L(signal0) is the best-fit likelihood, which here is
signal0 = on - alpha * off, i.e. the signal."""
from scipy.optimize import brentq
# Optimal signal and likelihood
signal0 = on - off * alpha
L0 = loglike(on, off, alpha, signal0)
# @todo Shouldn't dL scale with nsig^2
dL = nsig * 0.5
# We want to use the brentq method to find a root of
# f(signal) = L0 - L(signal) - dL
# The brentq method needs a start interval
# (signal_low, signal_up)
# containing the solution.
# We use signal_low = signal0 and find a signal_up
# such that f(signal_up) is positive.
# The details of the following algorithm are not important,
# we start with an interval of 1 and increase by factors of 2.
signal = signal0 + 1 if up else signal0 - 1
for _ in range(100): # Avoid infinite loop (should never occur)
L = loglike(on, off, alpha, signal)
if (L0 - L < dL) & np.isfinite(L):
signal = signal0 + 2 * (signal - signal0)
else:
break
# Now use brentq to find the signal
def f(signal):
L = loglike(on, off, alpha, signal)
return L0 - L - dL
signal_low = signal0 if up else signal
signal_up = signal if up else signal0
# @note The HESS software uses bisect
# (i.e. results will be different because we use brentq)
# with hardcoded xtol = 1e-4 and maxiter = 100
signal = brentq(f, signal_low, signal_up,
xtol=1e-4, maxiter=100)
# Return difference to excess
return signal - signal0
def compute_significance_lima(n_on, n_off, alpha):
"""The Li & Ma significance can be evaluated as a simple formula"""
n_sum = n_on + n_off
temp = (alpha + 1) / n_sum
l = n_on * log(n_on * temp / alpha)
m = n_off * log(n_off * temp)
sign = np.where(n_on - alpha * n_off > 0, 1, -1)
return sign * sqrt(abs(2 * (l + m)))
def compute_significance_rolke(n_on, n_off, alpha):
# Translate symbols used in gamma-ray astronomy to those in the Rolke paper
x, y, tau = n_on, n_off, 1. / alpha
TS = compute_TS(x, y, tau, 0.)
return sqrt(TS)
# Compare significance for one example -> Perfect agreement
n_on, n_off, alpha = 10, 10, 0.1
significance_lima = compute_significance_lima(n_on, n_off, alpha)
significance_rolke = compute_significance_rolke(n_on, n_off, alpha)
print significance_lima
print significance_rolke
4.70512718528 4.70512718528
def compute_sensitivity_lima(n_off, alpha, significance, guess=1e-3):
"""Compute sensitivity using the Li & Ma formula for significance.
There is no formula, we have to find the solution numerically.
@note: in weird cases (e.g. on=0.1, off=0.1, alpha=0.001)
fsolve does not find a solution.
@todo: Is it possible to find a better starting point or
implementation to make this robust and fast?
values < guess are often not found using this cost function."""
# It would be better to find a range that always contains the solution
# and to then call brentq, which will be a bit slower, but always converge.
from scipy.optimize import fsolve
def f(n_on):
if n_on >= 0:
return compute_significance_lima(n_on, n_off, alpha) - significance
else:
return 1e100
n_on = fsolve(f, guess)[0]
return n_on - alpha * n_off
# TODO: x must be int; results don't match
def compute_sensitivity_rolke(n_off, alpha, significance):
import ROOT
rolke = ROOT.TRolke()
rolke.SetCLSigmas(significance)
x, y, tau, e = int(alpha * n_off), int(n_off), 1. / alpha, 1
rolke.SetPoissonBkgKnownEff(x, y, tau, e)
low, high = ROOT.Double(0), ROOT.Double(0)
rolke.GetSensitivity(low, high)
return high
# Compute sensitivity for one example
n_off, alpha, significance = 10, 0.1, 3.
sensitivity_lima = compute_sensitivity_lima(n_off, alpha, significance)
sensitivity_rolke = compute_sensitivity_rolke(n_off, alpha, significance)
print sensitivity_lima
print sensitivity_rolke
4.81849708379 6.5305711766
def compute_limits_rolke(n_on, n_off, alpha, significance):
import ROOT
rolke = ROOT.TRolke()
rolke.SetCLSigmas(significance)
x, y, tau, e = int(n_on), int(n_off), 1. / alpha, 1
rolke.SetPoissonBkgKnownEff(x, y, tau, e)
low, high = ROOT.Double(0), ROOT.Double(0)
rolke.GetLimits(low, high)
return low, high
def compute_limits_lima(n_on, n_off, alpha, significance):
# TODO: Doesn't work yet
return None, None
down = excess_error(n_on, n_off, alpha, significance, False, loglike_lima)
up = excess_error(n_on, n_off, alpha, significance, True, loglike_lima)
return down, up
# Look at three examples: Gaussian limit, small positive excess, small negative excess
examples = [(100, 0, 1, 1), # should roughly give excess 100 +- 10
(100, 0, 1, 2), # should roughly give excess 100 +- 20
(10, 10, 0.1, 1), # excess = 9
(10, 90, 0.1, 1), # excess = 1
(10, 110, 0.1, 1) # excess = -1
]
for n_on, n_off, alpha, significance in examples:
limits_lima = compute_limits_lima(n_on, n_off, alpha, significance)
limits_rolke = compute_limits_rolke(n_on, n_off, alpha, significance)
print n_on, n_off, alpha, limits_lima, limits_rolke
100 0 1 (None, None) (90.33017981414673, 110.33660955973012) 100 0 1 (None, None) (81.31051153599459, 121.35596019830203) 10 10 0.1 (None, None) (6.141612917334355, 12.516769078804394) 10 90 0.1 (None, None) (0.0, 4.617504851826792) 10 110 0.1 (None, None) (0.0, 2.6423695593692913)