%matplotlib inline import random random.seed(34557) results = [] for n in xrange(30): results.append(random.choice(["T", "H"])) print "".join(results) import numpy as np import matplotlib.pyplot as plt np.random.seed(6345345) # make 200 random numbers and use half as X coordinate # and the other half as Y coordinate for 100 points X = np.random.uniform(size=(100*2)) plt.scatter(X[:100], X[100:]) %load_ext cythonmagic %%cython -l gsl # Include the right header file and declare the function cdef extern from "gsl/gsl_sf_bessel.h": double gsl_sf_bessel_J0(double x) # small python wrapper to give it a nicer name def bessel_J0(double x): return gsl_sf_bessel_J0(x) bessel_J0(2.21) %%cython -l gsl from cpython.mem cimport PyMem_Malloc, PyMem_Free # Declare the few types and functions we need cdef extern from "gsl/gsl_qrng.h": ctypedef struct gsl_qrng ctypedef struct gsl_qrng_type gsl_qrng_type* gsl_qrng_sobol gsl_qrng* gsl_qrng_alloc(gsl_qrng_type* T, unsigned int d) void gsl_qrng_free(gsl_qrng* q) int gsl_qrng_get(const gsl_qrng * q, double x[]) # This is the wrapper class cdef class Sobol: cdef gsl_qrng* _q cdef unsigned _D cdef double *_v def __cinit__(self, D=1): """Create a `D` dimensional Sobol sequence.""" self._D = D # gsl_qrng_get() returns the next # value in one of its arguments self._v = PyMem_Malloc(D * sizeof(double)) if not self._v: raise MemoryError() # Actually allocate the QRNG generator self._q = gsl_qrng_alloc(gsl_qrng_sobol, D) if self._q is NULL: raise MemoryError() def get(self, int N=1): """The next `N` points in the sequence.""" points = [] for n in xrange(N): points.append(self.__next__()) return points def __next__(self): """Iterate over the sequence.""" gsl_qrng_get(self._q, self._v) return [self._v[i] for i in xrange(self._D)] def __iter__(self): return self # Make sure we free all the memory we allocated def __dealloc__(self): if self._q is not NULL: gsl_qrng_free(self._q) PyMem_Free(self._v) s = Sobol() for n,x in enumerate(s): print "Sobol point", n, "is", x if n > 10: break s2 = Sobol(2) sobol_X, sobol_Y = zip(*s2.get(100)) sobol_X2 = (np.array(sobol_X) + np.random.uniform())%1 sobol_Y2 = (np.array(sobol_Y) + np.random.uniform())%1 X = np.random.uniform(size=(100*2)) f, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(12,4)) ax1.scatter(X[:100], X[100:],) ax2.scatter(sobol_X, sobol_Y, color="red") ax3.scatter(sobol_X2, sobol_Y2, color="green") ax1.set_title("Random") ax2.set_title("Sobol") ax3.set_title("Sobol again") import scipy.stats as stats _norm_pdf = stats.norm.pdf def normal(x): return _norm_pdf(x, 1, 0.2) def draw_gaus(samples=[]): X = np.linspace(0, 2, 200) Y = [normal(x) for x in X] f, ax = plt.subplots(figsize=(4,4)) ax.plot(X, Y) ax.set_ylim([0.,2.1]) ax.set_xlabel("X") ax.set_ylabel("Y") rug_y = 0.1 for sample_points in samples: ax.plot(sample_points, [rug_y]*len(sample_points), '|', ms=20) rug_y += 0.2 return f, ax # Sample 100 points from a Sobol and a uniform sequence # over the range 0 to 2. s = Sobol() sobol_samples = np.asarray(s.get(100))*2 random_samples = np.random.uniform(size=100)*2 draw_gaus([random_samples, sobol_samples]) def MC_integral(samples): L = 2. dx = L / len(samples) values = [] for point in samples: values.append(normal(point)) uncertainty = (L / np.sqrt(len(samples))) * np.std(values, ddof=1) return sum(values) * dx, uncertainty print "Area: %.4f +- %.4f"%(MC_integral(np.random.uniform(size=100)*2)) for N in (100, 200, 400, 800, 1600): area, uncertainty = MC_integral(np.random.uniform(size=N)*2) print "%d samples, Area: %.4f +- %.4f"%(N, area, uncertainty) def test_integration(sampler, integrator, repeats=2000): Is = [] for n in xrange(repeats): points = sampler() Is.append(integrator(points)) integrals, errors = zip(*Is) # we know that the area is one, so by subtracting # the true value and dividing by the calculated # uncertainty we should end up with a gaussian # distribution centered on zero and width one if we # did everything right bias = integrals - np.ones_like(integrals) pulls = bias / errors f, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2, figsize=(8,8)) ax1.hist(integrals, bins=20, range=(0.7,1.3)) ax1.set_title("Area") ax2.hist(errors, bins=20, range=(np.min(errors)*0.8, np.max(errors)*1.2)) ax2.set_title("Calcuated Uncertainty") ax3.hist(pulls, range=(-4,4), bins=20) ax3.set_title("Pull") ax4.hist(bias, bins=20) ax4.set_title("Bias") print "Bias: %.4f, average uncertainty: %.4f"%(np.mean(bias), np.mean(errors)) # I hope I got the uncertainty on the width of the pull distribution right # Error on the error on the error on the error ... print "Width of pull distribution: %.4f+-%.5f"%(np.std(pulls), np.std(pulls)/np.sqrt(2.*(len(pulls)-1))) return np.mean(errors) test_integration(lambda: np.random.uniform(size=100)*2, MC_integral, repeats=4*500) s = Sobol() sobol_points = s.get(100) # Two approaches to re-randomise the Sobol sequence that should # allow for uncertainty estimates but do not. Try them yourself! #sampler = lambda: ((np.asarray(sobol_points).ravel() + np.random.uniform())%1)*2 #sampler = lambda: ((np.asarray(s.get(100)).ravel() + np.random.uniform())%1)*2 sampler = lambda: np.asarray(s.get(100)).ravel()*2 test_integration(sampler, MC_integral, repeats=4*500) test_integration(lambda: np.random.uniform(size=100)*2, MC_integral, repeats=2*500) # How well we can estimate the width of the pull distribution depends on # the number of repeats, not the number of samples test_integration(lambda: np.random.uniform(size=200)*2, MC_integral, repeats=2*500)