import pandas as pd import numpy as np import patsy as pt import statsmodels.api as sm from statsmodels.sandbox.stats.multicomp import multipletests np.random.seed(1010093) pValues = [] for i in xrange(1000): x = np.random.normal(size=20) y = np.random.normal(size=20) y, x = pt.dmatrices('y ~ x') pValues.append(sm.OLS(y, x).fit().pvalues[1]) pValues = np.array(pValues) sum(pValues < .05) _, p_adjust, _, _ = multipletests(pValues, method='bonferroni') sum(p_adjust < .05) _, p_adjust, _, _ = multipletests(pValues, method='fdr_bh') sum(p_adjust < .05) np.random.seed(1010093) pValues = [] for i in xrange(1000): x = np.random.normal(size=20) # first 500 beta = 0, last 500 beta = 2 if i < 500: y = np.random.normal(size=20) else: y = np.random.normal(loc=2*x, size=20) y, x = pt.dmatrices('y ~ x') pValues.append(sm.OLS(y, x).fit().pvalues[1]) pValues = np.array(pValues) trueStatus = np.concatenate([np.repeat('zero', 500), np.repeat('not zero', 500)]) pd.crosstab(pValues < .05, trueStatus) _, p_adjust, _, _ = multipletests(pValues, method='bonferroni') pd.crosstab(p_adjust < .05, trueStatus) plot(pValues, p_adjust, 'ok') xlim(-.05, 1.05) ylim(-.05, 1.05) xlabel('pValues') ylabel('p_adjust'); _, p_adjust, _, _ = multipletests(pValues, method='fdr_bh') pd.crosstab(p_adjust < .05, trueStatus) plot(pValues, p_adjust, 'ok') xlim(-.05, 1.05) ylim(-.05, 1.05) xlabel('pValues') ylabel('p_adjust');