# Our normal warm up from __future__ import print_function, division %matplotlib inline import numpy as np import matplotlib.pyplot as plt np.random.seed(42) # so we always get the same random numbers N = 100 z_values = np.random.normal(size=N) import scipy.stats as sst normal_distribution = sst.norm(loc=0,scale=1.) #loc is the mean, scale is the variance. # The normal CDF p_values = normal_distribution.cdf(z_values) p_values = np.sort(p_values) i = np.arange(1, N+1) # the 1-based i index of the p values, as in p(i) plt.plot(i, p_values, '.') plt.xlabel('$i$') plt.ylabel('p value') q = 0.05 plt.plot(i, p_values, 'b.', label='$p(i)$') plt.plot(i, q * i / N, 'r', label='$q i / N$') plt.xlabel('$i$') plt.ylabel('$p$') plt.legend() N_signal = 20 N_noise = N - N_signal noise_z_values = np.random.normal(size=N_noise) # Add some signal with very low z scores / p values signal_z_values = np.random.normal(loc=-2.5, size=N_signal) mixed_z_values = np.sort(np.concatenate((noise_z_values, signal_z_values))) mixed_p_values = normal_distribution.cdf(mixed_z_values) plt.plot(i, mixed_p_values, 'b.', label='$p(i)$') plt.plot(i, q * i / N, 'r', label='$q i / N$') plt.xlabel('$i$') plt.ylabel('$p$') plt.legend() first_i = i[:30] plt.plot(first_i, mixed_p_values[:30], 'b.', label='$p(i)$') plt.plot(first_i, q * first_i / N, 'r', label='$q i / N$') plt.xlabel('$i$') plt.ylabel('$p$') plt.legend() below = mixed_p_values < (q * i / N) # True where p(i)