using Distributions
using GSL
using Gadfly
function prob(µ, N)
if N <= 0
return 0
end
if µ == 0
return 1
elseif µ < 0
return 0
end
return sf_gamma_inc_Q(0.5*N, 0.5*µ)
end
prob (generic function with 1 method)
In HEP, we usually quote the 90% CL on branching ratios. The probability to observe N events given there are µ true events is 1 - prob(2µ, 2N). To find the number that corresponds to an upper limit of 90%, one has to find the highest number of events, such that 1 - prob > 0.9
µ = 2.3
N = 0
# we'll increase N until we overshoot
while 1-prob(2*µ,2*N)>0.9
N += 1
end
# since we overshot, N-1 is the right answer
N -= 1
println("N is $N. The confidence level to find exactly $N event, given $µ true events is $(1-prob(2*µ,2*N))")
N is 0. The confidence level to find exactly 0 event, given 2.3 true events is 1
So we have over-coverage (due to the fact that N is integer)
Conversely, the number of true events that results in a given confidence limit for a given number of observed events is given by the following functions, copied from ROOT
# Computes quantiles for standard normal distribution N(0, 1)
# at probability p
# ALGORITHM AS241 APPL. STATIST. (1988) VOL. 37, NO. 3, 477-484.
function normQuantile(p)
if (p <= 0) || (p >= 1)
println("TMath::NormQuantile", "probability outside (0, 1)")
return 0
end
a0 = 3.3871328727963666080e0
a1 = 1.3314166789178437745e+2
a2 = 1.9715909503065514427e+3
a3 = 1.3731693765509461125e+4
a4 = 4.5921953931549871457e+4
a5 = 6.7265770927008700853e+4
a6 = 3.3430575583588128105e+4
a7 = 2.5090809287301226727e+3
b1 = 4.2313330701600911252e+1
b2 = 6.8718700749205790830e+2
b3 = 5.3941960214247511077e+3
b4 = 2.1213794301586595867e+4
b5 = 3.9307895800092710610e+4
b6 = 2.8729085735721942674e+4
b7 = 5.2264952788528545610e+3
c0 = 1.42343711074968357734e0
c1 = 4.63033784615654529590e0
c2 = 5.76949722146069140550e0
c3 = 3.64784832476320460504e0
c4 = 1.27045825245236838258e0
c5 = 2.41780725177450611770e-1
c6 = 2.27238449892691845833e-2
c7 = 7.74545014278341407640e-4
d1 = 2.05319162663775882187e0
d2 = 1.67638483018380384940e0
d3 = 6.89767334985100004550e-1
d4 = 1.48103976427480074590e-1
d5 = 1.51986665636164571966e-2
d6 = 5.47593808499534494600e-4
d7 = 1.05075007164441684324e-9
e0 = 6.65790464350110377720e0
e1 = 5.46378491116411436990e0
e2 = 1.78482653991729133580e0
e3 = 2.96560571828504891230e-1
e4 = 2.65321895265761230930e-2
e5 = 1.24266094738807843860e-3
e6 = 2.71155556874348757815e-5
e7 = 2.01033439929228813265e-7
f1 = 5.99832206555887937690e-1
f2 = 1.36929880922735805310e-1
f3 = 1.48753612908506148525e-2
f4 = 7.86869131145613259100e-4
f5 = 1.84631831751005468180e-5
f6 = 1.42151175831644588870e-7
f7 = 2.04426310338993978564e-15
split1 = 0.425
split2 = 5.
konst1 = 0.180625
konst2 = 1.6
# q::Float64
# r::Float64
# quantile::Float64
q = p-0.5
if abs(q) < split1
r = konst1 - q*q
quantile = q* (((((((a7 * r + a6) * r + a5) * r + a4) * r + a3)
* r + a2) * r + a1) * r + a0) /
(((((((b7 * r + b6) * r + b5) * r + b4) * r + b3)
* r + b2) * r + b1) * r + 1.)
else
if q < 0
r = p
else
r = 1-p
end
# error case
if r <= 0
quantile = 0
else
r = sqrt(-log(r))
if r<=split2
r=r-konst2
quantile=(((((((c7 * r + c6) * r + c5) * r + c4) * r + c3)
* r + c2) * r + c1) * r + c0) /
(((((((d7 * r + d6) * r + d5) * r + d4) * r + d3)
* r + d2) * r + d1) * r + 1)
else
r = r - split2
quantile = (((((((e7 * r + e6) * r + e5) * r + e4) * r + e3)
* r + e2) * r + e1) * r + e0) /
(((((((f7 * r + f6) * r + f5) * r + f4) * r + f3)
* r + f2) * r + f1) * r + 1)
end
if q < 0
quantile =- quantile
end
end
end
return quantile
end
# Evaluate the quantiles of the chi-squared probability distribution function.
# Algorithm AS 91 Appl. Statist. (1975) Vol.24, P.35
# Incorporates the suggested changes in AS R85 (vol.40(1), pp.233-5, 1991)
# Parameters:
# p - the probability value, at which the quantile is computed
# ndf - number of degrees of freedom
function chisquareQuantile(p, ndf)
if ndf <= 0
return 0
end
c = Float64[0.01, 0.222222, 0.32, 0.4, 1.24, 2.2,
4.67, 6.66, 6.73, 13.32, 60.0, 70.0,
84.0, 105.0, 120.0, 127.0, 140.0, 175.0,
210.0, 252.0, 264.0, 294.0, 346.0, 420.0,
462.0, 606.0, 672.0, 707.0, 735.0, 889.0,
932.0, 966.0, 1141.0, 1182.0, 1278.0, 1740.0,
2520.0, 5040.0]
e = 5e-7
aa = 0.6931471806
maxit = 20
g = lgamma(0.5*ndf)
xx = 0.5 * ndf
cp = xx - 1
if ndf >= log(p)*(-c[5])
# starting approximation for ndf less than or equal to 0.32
if (ndf > c[3])
x = normQuantile(p)
# starting approximation using Wilson and Hilferty estimate
p1=c[2]/ndf
ch = ndf*(x*sqrt(p1) + 1 - p1)^3
if ch > c[6]*ndf + 6
ch = -2 * (log(1-p) - cp * log(0.5 * ch) + g)
end
else
ch = c[4]
a = log(1-p)
while true
q = ch
p1 = 1 + ch * (c[7]+ch)
p2 = ch * (c[9] + ch * (c[8] + ch))
t = -0.5 + (c[7] + 2 * ch) / p1 - (c[9] + ch * (c[10] + 3 * ch)) / p2
ch = ch - (1 - exp(a + g + 0.5 * ch + cp * aa) *p2 / p1) / t
if abs(q/ch - 1) <= c[1]
break
end
end
end
else
ch = (p * xx * exp(g + xx * aa))^(1./xx)
if ch < e
return ch
end
end
# call to algorithm AS 239 and calculation of seven term Taylor series
for i=1:maxit
q = ch
p1 = 0.5 * ch
p2 = p - sf_gamma_inc_P(xx, p1)
t = p2 * exp(xx * aa + g + p1 - cp * log(ch))
b = t / ch
a = 0.5 * t - b * cp
s1 = (c[19] + a * (c[17] + a * (c[14] + a * (c[13] + a * (c[12] +c[11] * a))))) / c[24]
s2 = (c[24] + a * (c[29] + a * (c[32] + a * (c[33] + c[35] * a)))) / c[37]
s3 = (c[19] + a * (c[25] + a * (c[28] + c[31] * a))) / c[37]
s4 = (c[20] + a * (c[27] + c[34] * a) + cp * (c[22] + a * (c[30] + c[36] * a))) / c[38]
s5 = (c[13] + c[21] * a + cp * (c[18] + c[26] * a)) / c[37]
s6 = (c[15] + cp * (c[23] + c[16] * cp)) / c[38]
ch = ch + t * (1 + 0.5 * t * s1 - b * cp * (s1 - b * (s2 - b * (s3 - b * (s4 - b * (s5 - b * s6))))))
if abs(q / ch - 1) > e
break
end
end
return ch
end
chisquareQuantile (generic function with 1 method)
Note: To reproduce the numbers from exercises in Stefan Schmitt's lecture from LimitStatSchool2013, one needs to enter Nobs + 1
The true number of events µ = s + b. A simple estimate for the limit can be obtained from the following function. A more accurate limit can be obtained using Bayesian methods with likelihoods as input.
# Maximum number of µ with N_obs = 0 at 90% CL
chisquareQuantile(0.9, 2)/2
2.3025850929949723
# Signal Efficiency * tag efficiency
efficiency = 0.003
# expected number of background events
n_bg = linspace(0, 2.3, 200)
# assuming 0 observed events, µ at 90% CL is at most 2.3
# number of charged B mesons: 772e6
n_B_mesons = 772e6
# published limit for hadronic tag in B -> µν: 2.7E-6
branching_fraction = map(x -> (2.3-x) / n_B_mesons / efficiency, n_bg)
plot(x=n_bg, y=branching_fraction)
We can see that this construction gives us a better estimate for higher background. This is not entirely helpful. Instead, use code from Stefan Schmitt's lecture.
# calculate the expected signal limit, given the confidence level cl
# and the background b. Note: does not work for very large b
function getExpectedLimit(cl, b)
n0 = b
n1 = n0 + 1
p0 = 1.0
if b > 0.0
p0 = exp(n0*log(b) - b - lgamma(n0+1.))
end
p1 = p0 * b / n1
sum = 0.0
# sum up poisson terms from n0 to infinity
# (stop as the contribution vanishes)
while (p0 > 0.) && (n0 >= 0)
muLimit = 0.5 * chisquareQuantile(cl, 2*(n0+1))
signal = muLimit - b
sum += p0*signal
p0 *= n0 / b
n0 -= 1
end
# sum up poisson terms from n1 down to zero
# (stop earlier if teh contribution vanishes)x
while p1 > 0.0
muLimit = 0.5 * chisquareQuantile(cl, 2*(n1+1))
signal = muLimit - b
sum += p1*signal
n1 += 1
p1 *= b/n1
end
return sum
end
getExpectedLimit (generic function with 1 method)
# published limit for hadronic tag in B -> µν: 2.7E-6
branching_fraction2 = map(x -> getExpectedLimit(0.9, x) / n_B_mesons / efficiency, n_bg)
plot(x=n_bg, y=branching_fraction2)