a = 2 b = '3' a, b, type(a), type(b) a+b b = int(b) print b a + b a + b # integer division a / b for i in np.arange(6): print 'row: %s, column: %s' % (i/3, i % 3) b = 5. a / b l = [a, b] l d = {'a':a, 'b':b} print d print d['a'] list_subjects = [1, 2, 3, 4, 5] list_rts = [514, 550, 831, 600, 200] correct = [.8, .7, .65, .4, .9] for i in range(len(list_subjects)): print list_subjects[i], list_rts[i], correct[i] print zip(list_subjects, list_rts) print zip(list_subjects, list_rts, correct) for subj, rt, c in zip(list_subjects, list_rts, correct): print subj, rt, c X = [[1, 0, 0], [0,1,0], [0,1,1]] X = np.array(X) print X print X.shape plt.imshow(X.T, interpolation='nearest') Y = np.array([[0.3, 0.4, 0.7]]).T print Y X = np.array([[1, 0], [0, 1], [1, 1]]) print X beta = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(Y) beta X = np.array([np.random.random(15) < .5, np.random.random(15) < .5]).T.astype(int) X Y = np.random.randn(15, 2) Y[:, 0] += 1 Y[:, 1] += 3 Y Y = np.sum(X * Y, 1) beta = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(Y) beta params = [] for i in np.arange(1000): Y = np.random.randn(15, 2) Y[:, 0] += 1 Y[:, 1] += 3 X = np.array([np.random.random(15) < .5, np.random.random(15) < .1]).T.astype(int) Y = np.sum(X * Y, 1) beta = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(Y) params.append(beta) # cast list of numpy arrays to a 2D numpy array params = np.array(params) plt.hist(params, bins=15, label=['a', 'b']) plt.legend() plt.axvline(1, color='k', lw=2) plt.axvline(3, c='k', lw=2) from scipy import optimize def obj_func(param1, param2): params = np.array([param1, param2]) return np.sum((Y - X.dot(np.array(params)))**2) #Recall that param1 should be 1 and param 2 should be 3. print obj_func(3, 3) print obj_func(3, 10) print obj_func(1, 3) # Create simulated ata X = np.array([np.random.random(15) < .5, np.random.random(15) < .5]).T.astype(int) Y = np.random.randn(15, 2) Y[:, 0] += 5 Y[:, 1] -= 2 Y = np.sum(X * Y, 1) # Find OLS solution beta = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(Y) print beta # Vectorize is a way of applying a function (in this case obj_func), # to every element in a matrix obj_func_vec = np.vectorize(obj_func) # Set up the grid x_lim = (-25, 25) y_lim = (-25, 25) # very IMPORTANT function. Can you see what it does? x, y = np.meshgrid(np.arange(*x_lim), np.arange(*y_lim)) print x print y # Plot the result of the grid search plt.figure(figsize=(12, 9)) plt.imshow(obj_func_vec(x, y), extent=(x_lim + y_lim), cmap=plt.cm.jet_r, origin='lower') plt.grid(color='k') plt.colorbar() plt.xlabel('\beta_1') # Put a point on the OLS-solution plt.scatter(beta[0], beta[1], color='white') import scipy as sp from scipy import optimize result = sp.optimize.minimize(lambda x: obj_func(*x), [0, 0], method='Nelder-Mead') result print 'param 1: %.2f' % (result.x[0]) # %.2f: Make a string of the variable x[0] with 2-point precision print 'param 2: %.2f' % (result.x[1]) print 'in %d iterations' % (result.nit) # %d: Make an integer-like string of result.nit import statsmodels.api as sm model = sm.OLS(Y, X) fit = model.fit() fit.summary() f = open('behavior_jasper.csv') for line in f.readlines()[:10]: # Show first 10 lines of the file print line 'Ik,ben,Gilles'.split(',') f = open('/home/gdholla1/notebooks_demo/behavior_jasper.csv') d = {} keys = f.readline() for line in f.readlines(): line = line.split(',') #print line[6] print keys import pandas # Read in a csv data = pandas.read_csv('/home/gdholla1/notebooks_demo/behavior_jasper.csv') data data.subj.unique() ###Groupby() data.groupby('subj').mean()['RT'] data.groupby('subj').mean()['RT'].hist() data.groupby(['drug', 'correct']).mean().RT se = lambda x: np.std(x) / np.sqrt(len(x)) # Take the standard deviation of the dataframe and divide it # by the squareroot of the number of elements in the dataframe data.groupby(['drug', 'correct']).RT.agg({'m':np.mean, 'se':se}) data.groupby('subj') for g, d in data.groupby('subj'): print g, d.RT.mean() plt.figure(figsize=(16, 20)) for g, d in data.groupby('subj'): # Iterate over all groupings, g contains the group label, d the dataframe. plt.subplot(5,4,g) # Make a subplot of 5 rows and 4 columns, and plot the next thing in plot number g plt.hist(d.RT) # Plot histogram of RTs plt.xlim(0, 2000) # limit x-axis from 0 to 2000 miliseconds plt.figure(figsize=(20, 35)) for g, d in data.groupby('subj'): # First group on subject... plt.subplot(7,3,g) for drug, d2 in d.groupby('cue'): # Then on cue... plt.hist(d2.RT, label=str(drug), bins=np.arange(0, 2000, 50), alpha=0.6) # All these histogram are all in the same subplot # The alpha-parameter makes the histograms # slightly transparent plt.xlim(0, 2000) plt.title('Subjec %s' % g) plt.legend() plt.savefig('RT_histograms.pdf') # If we want to do traditional psychology-like ANOVA's, we often want to average # over all subject-condition combinations. Here we create a new dataframe 'd', # that contains such a dataset d = data.groupby(['subj', 'cue', 'day'], as_index=False).mean() d[['subj', 'RT', 'correct', 'day', 'cue']].head(25) # Here we say that we only want to see a limited # number of columns, namely subj, RT, correct, day # and cue. # Now we can have statistics on these averaged-over data d.groupby(['cue', 'drug']).agg([np.mean, lambda x: x.std() / np.sqrt(len(x))]).RT import statsmodels.formula.api as smf model = smf.ols('RT ~ subj + C(cue)*C(drug)', d[d.cue != 'null']) r = model.fit() r.summary() from statsmodels.stats.anova import anova_lm anova_lm(r) ### Integration with R %load_ext rmagic d = data[data.cue != 'null'] %%R -i d -o model library(lmerTest) model = lmer('RT ~ cue*drug + (1|subj) + (1|day)', d) print(summary(model)) print('*************') print(anova(model)) library(languageR) #plotLMER.fnc(model) plotLMER.fnc(model) import hddm hddm.HDDM? # Set up some extra variables data['rt'] = data.RT / 1000. # RTs in miliseconds data['response'] = data.correct # We model responses as correct vs incorrect data['subj_idx'] = data['subj'] # For hiearchial modeling the subjects have to be indicated with a subj_idx-column d = data[(data.resp != 0) & (data.resp != 99)].copy() # Remove null-events and misses model = hddm.HDDM(d[['rt', 'subj_idx', 'response', 'cue', 'day']], depends_on={'a':'cue', 'v':'day'}, include=('p_outlier')) model.sample(1000, 20) model.print_stats() model.plot_posteriors() import nibabel as nb image = nb.load('/home/gdholla1/ERC_data/QSM_data/Young/AD2T_QSM.nii') print image print image.shape # Shape of the image nb.orientations.aff2axcodes(image.get_affine()) data = image.get_data() type(data), data.shape plt.figure(figsize=(14, 10)) plt.imshow(data[:, 27, 100:325].T, cmap=plt.cm.gray, origin='lower') # T transforms the data, so that the rows of the matrix # correspond to the y-axis and the columns to the x-axis # origin='lower' makes sure both axis go 'up'. plt.figure(figsize=(24, 18)) for i, idx in enumerate(np.arange(30, 47,2)): plt.subplot(3, 3, i + 1) plt.imshow(data[:, idx, 100:325].T, origin='lower', cmap=plt.cm.copper) plt.title('y = %s' % idx) plt.savefig('ad2t.pdf') mask_image_l = nb.load('/home/gdholla1/ERC_data/STN_masks/QSM/Young/AD2T_QSM_L_GH.nii.gz') mask_l = mask_image_l.get_data() mask_image_r = nb.load('/home/gdholla1/ERC_data/STN_masks/QSM/Young/AD2T_QSM_R_GH.nii.gz') mask_r = mask_image_r.get_data() plt.figure(figsize=(24, 30)) mask_l = np.ma.masked_array(mask_l, mask= mask_l == 0) # Create masked array of original data, where mask_r = np.ma.masked_array(mask_r, mask= mask_r == 0) # where every voxel where the mask is zero is masked. for i, idx in enumerate(np.linspace(17, 30, 6)): # Enumerate returns a tuple of (index_of_element_in_list, element) # linspace returns a list-like object of integers between 17 and 30 in # 6 steps plt.subplot(3, 2, i + 1) plt.imshow(data[:, idx, 100:325].T, origin='lower', cmap=plt.cm.gray) masked_data_l = np.ma.masked_array(data, mask=mask_l == 0) masked_data_r = np.ma.masked_array(data, mask=mask_r == 0) plt.imshow(masked_data_l[:, idx, 100:325].T, origin='lower', cmap=plt.cm.hot) # Show masked plt.imshow(masked_data_r[:, idx, 100:325].T, origin='lower', cmap=plt.cm.hot) plt.title('y = %s' % idx) plt.hist(data[mask_l != 0], bins=np.arange(-.1, .3, .01), alpha=0.6) # Select all values within data where mask_l # is not equal to 0 and make a histogram plt.hist(data[mask_r != 0], bins=np.arange(-.1, .3, .01), alpha=0.6) # Same for mask_r, within same plot import glob fns = glob.glob('/home/gdholla1/ERC_data/QSM_data/Young/*') fns import re reg = re.compile('.*/([A-Z0-9]{4})_QSM.nii') # Because ([A-Z0-9]{4}) is between parentheses # it becomes a group that can be extracted print reg # reg is a 'regular expression' or SRE_Pattern-object which can print reg.match(fns[0]).group(1) # be applied to strings using its .match()-function for fn in fns: print fn, reg.match(fn).group(1) subj_idxs = [] for fn in fns: subj_idxs.append(reg.match(fn).group(1)) print subj_idxs print [reg.match(fn).group(1) for fn in fns] import scipy.ndimage sp.ndimage.center_of_mass(mask_l) data[:, com[1]-offset:com[1]+offset, :].squeeze().shape data[:, com[1]-os:com[1]+os, :] offset = 15 plt.imshow(data[:, com[1], :].squeeze().T, origin='lower') !fslreorient2std /home/gdholla1/ERC_data/QSM_data/Young/AD2T_QSM.nii /home/gdholla1/notebooks_demo/data/AD2T_QSM.nii import nipype from nipype.interfaces import fsl reorienter = fsl.Reorient2Std() reorienter.inputs.in_file = '/home/gdholla1/ERC_data/QSM_data/Young/AD2T_QSM.nii' r = reorienter.run() print r.outputs, reorienter.cmdline # r contains a 'result'-object, cmdline shows the bash command that has been executed... import nipype.pipeline as pe import nipype.interfaces.io as nio # Set up the workflow-object, which has a name and a base_dir where all intermediate files are stored. workflow = pe.Workflow(name='reorient_QSM', base_dir='/home/gdholla1/workflow_folders/') # Set up a datagrabber: an interface to grab data and feed it to analysis-nodes upstram in # the workflow. # Input is a subject_id, then it outputs a QSM-file dg = pe.Node(nio.DataGrabber(infields=['subject_id'], outfields=['QSM']), name='datagrabber') # Base directory to search for stuff dg.inputs.base_directory = '/home/gdholla1/ERC_data/QSM_data/Young/' # Standard glob-template: just everything dg.inputs.template = '*' # Sort output dg.inputs.sort_filelist = True # Specific template for QSM-file. we are looking of a file that looks lik # _QSM.nii, wehere dg.inputs.field_template = {'QSM' : '%s_QSM.nii'} dg.inputs.template_args = {'QSM' : [['subject_id']]} # We set a list of subject indices as input, by inputting htem as # 'iterables' we set up N parallel workflows, where N is the number of subjects dg.iterables = [('subject_id', subj_idxs)] # We need only one line to set up a standard reorienter reorient = pe.Node(fsl.Reorient2Std(), name='reorient') # Now we can connect the datagrabber to the reorienter workflow.connect(dg, 'QSM', reorient, 'in_file') # Datasink is an interface that can write away to some directory ds = pe.Node(nio.DataSink(), name='datasink') ds.inputs.base_directory = '/home/gdholla1/notebooks_demo/data' workflow.connect(reorient, 'out_file', ds, 'reoriented_qsm') # Write a picture of the workflow workflow.write_graph() # Use IPython to get the picture-filename and show it in the notebook import os from IPython.display import Image Image(os.path.join(workflow.base_dir, workflow.name, 'graph.dot.png')) # Run the workflow workflow.run() workflow = pe.Workflow(name='reorient_QSM', base_dir='/home/gdholla1/workflow_folders/') dg = pe.Node(nio.DataGrabber(infields=['subject_id', 'rater', 'hemisphere'], outfields=['QSM', 'mask']), name='datagrabber') dg.inputs.template = '*' dg.inputs.base_directory = '/home/gdholla1/ERC_data/' dg.inputs.field_template = {'QSM' : 'QSM_data/Young/%s_QSM.nii', 'mask': 'STN_masks/QSM/Young/%s_QSM_%s_%s.nii.gz'} dg.inputs.template_args = {'QSM' : [['subject_id']], 'mask': [['subject_id', 'hemisphere', 'rater']]} dg.inputs.subject_id = 'AD2T' dg.iterables = [('subject_id', subj_idxs), ('rater', ['GH', 'AA', 'CMA']), ('hemisphere', ['L', 'R'])] dg.inputs.sort_filelist = True reorient = pe.Node(fsl.Reorient2Std(), name='reorient') workflow.connect(dg, 'QSM', reorient, 'in_file') reorient_mask = pe.Node(fsl.Reorient2Std(), name='reorient_mask') workflow.connect(dg, 'mask', reorient_mask, 'in_file') ds = pe.Node(nio.DataSink(), name='datasink') ds.inputs.base_directory = '/home/gdholla1/notebooks_demo/data' ds.inputs.regexp_substitutions = [('reoriented_qsm/_hemisphere_([A-Z])_rater_([A-Z]{2,3})_subject_id_([A-Z0-9]{4})/(.*).nii.gz', 'reoriented_qsm/\\3/\\4.nii.gz'), ('reoriented_qsm_masks/_hemisphere_([A-Z])_rater_([A-Z]{2,3})_subject_id_([A-Z0-9]{4})/(.*).nii.gz', 'reoriented_qsm_masks/\\3/\\4_\\1_\\2.nii.gz')] workflow.connect(reorient, 'out_file', ds, 'reoriented_qsm') workflow.connect(reorient, 'out_file', ds, 'reoriented_qsm_masks') workflow.write_graph() import os Image(os.path.join(workflow.base_dir, workflow.name, 'graph.dot.png')) workflow.run(plugin='MultiProc', plugin_args={'n_procs' : 8}) def get_iron_content(data_fn, mask_fn): import nibabel as nb import os import numpy as np data = nb.load(data_fn).get_data() mask = nb.load(mask_fn).get_data() f = open(os.path.join(os.getcwd(), 'mean_qsm_value.txt'), 'w') qsm_value = np.mean(data[mask != 0]) f.write(str(qsm_value)) f.close() return os.path.join(os.getcwd(), 'mean_qsm_value.txt') fn =get_iron_content('/home/gdholla1/notebooks_demo/data/reoriented_qsm/AD2T/AD2T_QSM_reoriented.nii.gz', '/home/gdholla1/notebooks_demo/data/reoriented_qsm_masks/AD2T/AD2T_QSM_reoriented_L_AA.nii.gz') print fn print open(fn).read() import nipype.interfaces.utility as util # Submodule required to iron_node = pe.Node(util.Function(function=get_iron_content, input_names=['data_fn', 'mask_fn'], output_names=['iron_content']), name='iron') iron_node.inputs.data_fn = '/home/gdholla1/notebooks_demo/data/reoriented_qsm/AD2T/AD2T_QSM_reoriented.nii.gz' iron_node.inputs.mask_fn = '/home/gdholla1/notebooks_demo/data/reoriented_qsm_masks/AD2T/AD2T_QSM_reoriented_L_AA.nii.gz' r = iron_node.run() print r.outputs print open(r.outputs.iron_content).read() workflow = pe.Workflow(name='reorient_QSM', base_dir='/home/gdholla1/workflow_folders/') dg = pe.Node(nio.DataGrabber(infields=['subject_id', 'rater', 'hemisphere'], outfields=['QSM', 'mask']), name='datagrabber') dg.inputs.template = '*' dg.inputs.base_directory = '/home/gdholla1/ERC_data/' dg.inputs.field_template = {'QSM' : 'QSM_data/Young/%s_QSM.nii', 'mask': 'STN_masks/QSM/Young/%s_QSM_%s_%s.nii.gz'} dg.inputs.template_args = {'QSM' : [['subject_id']], 'mask': [['subject_id', 'hemisphere', 'rater']]} dg.iterables = [('subject_id', subj_idxs), ('rater', ['GH', 'AA', 'CMA']), ('hemisphere', ['L', 'R'])] dg.inputs.sort_filelist = True reorient = pe.Node(fsl.Reorient2Std(), name='reorient') workflow.connect(dg, 'QSM', reorient, 'in_file') reorient_mask = pe.Node(fsl.Reorient2Std(), name='reorient_mask') workflow.connect(dg, 'mask', reorient_mask, 'in_file') iron_node = pe.Node(util.Function(function=get_iron_content, input_names=['data_fn', 'mask_fn'], output_names=['iron_content']), name='iron') workflow.connect(reorient, 'out_file', iron_node, 'data_fn') workflow.connect(reorient_mask, 'out_file', iron_node, 'mask_fn') ds = pe.Node(nio.DataSink(), name='datasink') ds.inputs.base_directory = '/home/gdholla1/notebooks_demo/data' ds.inputs.regexp_substitutions = [('reoriented_qsm/_hemisphere_([A-Z])_rater_([A-Z]{2,3})_subject_id_([A-Z0-9]{4})/(.*).nii.gz', 'reoriented_qsm/\\3/\\4.nii.gz'), ('reoriented_qsm_masks/_hemisphere_([A-Z])_rater_([A-Z]{2,3})_subject_id_([A-Z0-9]{4})/(.*).nii.gz', 'reoriented_qsm_masks/\\3/\\4_\\1_\\2.nii.gz')] workflow.connect(reorient, 'out_file', ds, 'reoriented_qsm') workflow.connect(reorient, 'out_file', ds, 'reoriented_qsm_masks') workflow.connect(iron_node, 'iron_content', ds, 'iron_values') workflow.write_graph() import os Image(os.path.join(workflow.base_dir, workflow.name, 'graph.dot.png')) workflow.run(plugin='MultiProc', plugin_args={'n_procs' : 8}) ##Analyze output of NiPype-workflow. # EXTRACT INFO FROM FILENAMES import glob fns = glob.glob('/home/gdholla1/notebooks_demo/data/iron_values/_hemisphere_*_rater_*_subject_id_*/mean_qsm_value.txt') reg = re.compile('.*/_hemisphere_([A-Z])_rater_([A-Z]+)_subject_id_([A-Z0-9]+)/mean_qsm_value.txt') # zip(*l) is inverse of zip(l) hemispheres, raters, subject_ids = zip(*[reg.match(fn).groups() for fn in fns]) print hemispheres print raters print subject_ids iron_values = [float(open(fn).read()) for fn in fns] print iron_values # set up dataframe qsm_data = pandas.DataFrame({'hemisphere':hemispheres, 'rater':raters, 'subject_id':subject_ids, 'qsm_values' : iron_values}) import scipy as sp from scipy import stats qsm_data.groupby(['hemisphere']).agg([np.mean, sp.stats.sem]) # stats.sem = standard error model = smf.ols(formula='qsm_values ~ subject_id*rater + hemisphere', df=qsm_data) f = model.fit() f.summary() anova_lm(f) import nipype from nipype.workflows.fmri.fsl import create_featreg_preproc workflow = create_featreg_preproc() workflow.write_graph() from IPython.display import Image import os Image('graph.dot.png') workflow.inputs.inputspec