s=np.random.normal(size=(50000,100)) n = np.array(range(2,100)) # Note that to estimate the population SD from a sample, use ddof=1 rather than the default ddof=0. # With ddof=0, the SD bias is 4x higher. sd_est = np.array([s[:, 0:ii].std(axis=1, ddof=1).mean() for ii in n]) plot(n, sd_est) from scipy.special import gamma bias = sd_est * (1. - sqrt(2. / (n-1)) * (gamma(n / 2.) / gamma((n-1) / 2.))) plot(n,bias,'r-', n,sd_est/(4*n),'b-') xlabel('Sample size') ylabel('Standard deviation estimate bias') legend(('Full estimate','SD/4n approximation')) plot([0,100],[1,1],'k--', n,sd_est,'b-', n,(sd_est+sd_est/(4*n)),'c-', n,(sd_est+bias),'m-') xlabel('Sample size') ylabel('Standard deviation estimate') legend(('True population SD', 'Uncorrected estimate', 'Corrected (SD/4n approx.)', 'Corrected (full model)'),loc='lower right')