a = 2
b = '3'

a, b, type(a), type(b)

a+b

b = int(b)
print b
a + b

a + b

# integer division
a / b

for i in np.arange(6):
    print 'row: %s, column: %s' % (i/3, i % 3)

b = 5.
a / b

l = [a, b]
l

d = {'a':a, 'b':b}
print d

print d['a']

list_subjects = [1, 2, 3, 4, 5]
list_rts = [514, 550, 831, 600, 200]
correct = [.8, .7, .65, .4, .9]

for i in range(len(list_subjects)):
    print list_subjects[i], list_rts[i], correct[i]

print zip(list_subjects, list_rts)
print zip(list_subjects, list_rts, correct)

for subj, rt, c in zip(list_subjects, list_rts, correct):
    print subj, rt, c

X = [[1, 0, 0], [0,1,0], [0,1,1]]
X = np.array(X)
print X
print X.shape

plt.imshow(X.T, interpolation='nearest')

Y = np.array([[0.3, 0.4, 0.7]]).T
print Y

X = np.array([[1, 0], 
              [0, 1],
              [1, 1]])

print X

beta = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(Y)
beta

X = np.array([np.random.random(15) < .5, np.random.random(15) < .5]).T.astype(int)
X

Y = np.random.randn(15, 2)
Y[:, 0] += 1
Y[:, 1] += 3
Y

Y = np.sum(X * Y, 1)
beta = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(Y)
beta

params = []
for i in np.arange(1000):
    Y = np.random.randn(15, 2)
    Y[:, 0] += 1
    Y[:, 1] += 3
    X = np.array([np.random.random(15) < .5, np.random.random(15) < .1]).T.astype(int)
    Y = np.sum(X * Y, 1)
    beta = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(Y)
    params.append(beta)

# cast list of numpy arrays to a 2D numpy array
params = np.array(params)

plt.hist(params, bins=15, label=['a', 'b'])
plt.legend()

plt.axvline(1, color='k', lw=2)
plt.axvline(3, c='k', lw=2)

from scipy import optimize

def obj_func(param1, param2):
    params = np.array([param1, param2])
    return np.sum((Y - X.dot(np.array(params)))**2)

#Recall that param1 should be 1 and param 2 should be  3.
print obj_func(3, 3)
print obj_func(3, 10)
print obj_func(1, 3)

# Create simulated ata
X = np.array([np.random.random(15) < .5, np.random.random(15) < .5]).T.astype(int)
Y = np.random.randn(15, 2)
Y[:, 0] += 5
Y[:, 1] -= 2

Y = np.sum(X * Y, 1)

# Find OLS solution
beta = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(Y)
print beta

# Vectorize is a way of applying a function (in this case obj_func),
# to every element in a matrix
obj_func_vec = np.vectorize(obj_func)


# Set up the grid
x_lim = (-25, 25)
y_lim = (-25, 25)

# very IMPORTANT function. Can you see what it does?
x, y = np.meshgrid(np.arange(*x_lim), np.arange(*y_lim))
print x
print y


# Plot the result of the grid search
plt.figure(figsize=(12, 9))
plt.imshow(obj_func_vec(x, y), extent=(x_lim + y_lim), cmap=plt.cm.jet_r, origin='lower')
plt.grid(color='k')
plt.colorbar()
plt.xlabel('\beta_1')

# Put a point on the OLS-solution
plt.scatter(beta[0], beta[1], color='white')


import scipy as sp
from scipy import optimize

result = sp.optimize.minimize(lambda x: obj_func(*x), [0, 0], method='Nelder-Mead')
result

print 'param 1: %.2f' % (result.x[0]) # %.2f: Make a string of the variable x[0] with 2-point precision
print 'param 2: %.2f' % (result.x[1])
print 'in %d iterations' % (result.nit) # %d: Make an integer-like string of result.nit

import statsmodels.api as sm

model = sm.OLS(Y, X)
fit = model.fit()
fit.summary()

f = open('behavior_jasper.csv')

for line in f.readlines()[:10]: # Show first 10 lines of the file
    print line

'Ik,ben,Gilles'.split(',')

f = open('/home/gdholla1/notebooks_demo/behavior_jasper.csv')

d = {}

keys = f.readline()

for line in f.readlines():
    line = line.split(',')
    #print line[6]
    
print keys

import pandas

# Read in a csv
data = pandas.read_csv('/home/gdholla1/notebooks_demo/behavior_jasper.csv')
data

data.subj.unique()

###Groupby()

data.groupby('subj').mean()['RT']

data.groupby('subj').mean()['RT'].hist()

data.groupby(['drug', 'correct']).mean().RT

se = lambda x: np.std(x) / np.sqrt(len(x)) # Take the standard deviation of the dataframe and divide it 
                                           # by the squareroot of the number of elements in the dataframe

data.groupby(['drug', 'correct']).RT.agg({'m':np.mean, 'se':se})

data.groupby('subj')

for g, d in data.groupby('subj'):
    print g, d.RT.mean()

plt.figure(figsize=(16, 20))

for g, d in data.groupby('subj'):    # Iterate over all groupings, g contains the group label, d the dataframe.
    plt.subplot(5,4,g)               # Make a subplot of 5 rows and 4 columns, and plot the next thing in plot number g
    plt.hist(d.RT)                   # Plot histogram of RTs
    plt.xlim(0, 2000)                # limit x-axis from 0 to 2000 miliseconds

plt.figure(figsize=(20, 35))

for g, d in data.groupby('subj'): # First group on subject...
    
    plt.subplot(7,3,g)
    
    for drug, d2 in d.groupby('cue'): # Then on cue...
        plt.hist(d2.RT, label=str(drug), bins=np.arange(0, 2000, 50), alpha=0.6)   # All these histogram are all in the same subplot
                                                                                   # The alpha-parameter makes the histograms
                                                                                   # slightly transparent            
        plt.xlim(0, 2000)

        
    plt.title('Subjec %s' % g)
    plt.legend()
    plt.savefig('RT_histograms.pdf')

# If we want to do traditional psychology-like ANOVA's, we often want to average
# over all subject-condition combinations. Here we create a new dataframe 'd',
# that contains such a dataset

d = data.groupby(['subj', 'cue', 'day'], as_index=False).mean()

d[['subj', 'RT', 'correct', 'day', 'cue']].head(25) # Here we say that we only want to see a limited
                                                    # number of columns, namely subj, RT, correct, day
                                                    # and cue.    

# Now we can have statistics on these averaged-over data
d.groupby(['cue', 'drug']).agg([np.mean, lambda x: x.std() / np.sqrt(len(x))]).RT

import statsmodels.formula.api as smf

model = smf.ols('RT ~ subj + C(cue)*C(drug)', d[d.cue != 'null'])

r = model.fit()

r.summary()

from statsmodels.stats.anova import anova_lm
anova_lm(r)

### Integration with R

%load_ext rmagic

d = data[data.cue != 'null']

%%R -i d -o model
library(lmerTest)
model = lmer('RT ~ cue*drug + (1|subj) + (1|day)', d)

print(summary(model))
 
print('*************')

print(anova(model))

library(languageR)
#plotLMER.fnc(model)
plotLMER.fnc(model)

import hddm

hddm.HDDM?

# Set up some extra variables

data['rt'] = data.RT / 1000.       # RTs in miliseconds
data['response'] = data.correct    # We model responses as correct vs incorrect
data['subj_idx'] = data['subj']    # For hiearchial modeling the subjects have to be indicated with a subj_idx-column

d = data[(data.resp != 0) & (data.resp != 99)].copy() # Remove null-events and misses

model = hddm.HDDM(d[['rt', 'subj_idx', 'response', 'cue', 'day']], depends_on={'a':'cue', 'v':'day'}, include=('p_outlier'))

model.sample(1000, 20)

model.print_stats()

model.plot_posteriors()

import nibabel as nb

image = nb.load('/home/gdholla1/ERC_data/QSM_data/Young/AD2T_QSM.nii')

print image

print image.shape # Shape of the image

nb.orientations.aff2axcodes(image.get_affine())

data = image.get_data()

type(data), data.shape

plt.figure(figsize=(14, 10))
plt.imshow(data[:, 27, 100:325].T, cmap=plt.cm.gray, origin='lower') # T transforms the data, so that the rows of the matrix
                                                                     # correspond to the y-axis and the columns to the x-axis
                                                                     # origin='lower' makes sure both axis go 'up'.

plt.figure(figsize=(24, 18))
for i, idx in enumerate(np.arange(30, 47,2)):
    plt.subplot(3, 3, i + 1)
    plt.imshow(data[:, idx, 100:325].T, origin='lower', cmap=plt.cm.copper)
    plt.title('y = %s' % idx)
plt.savefig('ad2t.pdf')

mask_image_l = nb.load('/home/gdholla1/ERC_data/STN_masks/QSM/Young/AD2T_QSM_L_GH.nii.gz')
mask_l = mask_image_l.get_data()

mask_image_r = nb.load('/home/gdholla1/ERC_data/STN_masks/QSM/Young/AD2T_QSM_R_GH.nii.gz')
mask_r = mask_image_r.get_data()

plt.figure(figsize=(24, 30))

mask_l = np.ma.masked_array(mask_l, mask= mask_l == 0) # Create masked array of original data, where 
mask_r = np.ma.masked_array(mask_r, mask= mask_r == 0) # where every voxel where the mask is zero is masked.


for i, idx in enumerate(np.linspace(17, 30, 6)):    # Enumerate returns a tuple of (index_of_element_in_list, element)
                                                    # linspace returns a list-like object of integers between 17 and 30 in 
                                                    # 6 steps
    plt.subplot(3, 2, i + 1)
    plt.imshow(data[:, idx, 100:325].T, origin='lower', cmap=plt.cm.gray)

    masked_data_l = np.ma.masked_array(data, mask=mask_l == 0)
    masked_data_r = np.ma.masked_array(data, mask=mask_r == 0)

    plt.imshow(masked_data_l[:, idx, 100:325].T, origin='lower', cmap=plt.cm.hot) # Show masked
    plt.imshow(masked_data_r[:, idx, 100:325].T, origin='lower', cmap=plt.cm.hot)    

    plt.title('y = %s' % idx)

plt.hist(data[mask_l != 0], bins=np.arange(-.1, .3, .01), alpha=0.6) # Select all values within data where mask_l
                                                                     # is not equal to 0 and make a histogram
plt.hist(data[mask_r != 0], bins=np.arange(-.1, .3, .01), alpha=0.6) # Same for mask_r, within same plot

import glob
fns = glob.glob('/home/gdholla1/ERC_data/QSM_data/Young/*')
fns

import re

reg = re.compile('.*/([A-Z0-9]{4})_QSM.nii') # Because ([A-Z0-9]{4}) is between parentheses
                                             # it becomes a group that can be extracted

print reg                                    # reg is a 'regular expression' or SRE_Pattern-object which can
print reg.match(fns[0]).group(1)             # be applied to strings using its .match()-function


for fn in fns:
    print fn, reg.match(fn).group(1)

subj_idxs = []

for fn in fns:
    subj_idxs.append(reg.match(fn).group(1))
    
print subj_idxs

print [reg.match(fn).group(1) for fn in fns]

import scipy.ndimage
sp.ndimage.center_of_mass(mask_l)

data[:, com[1]-offset:com[1]+offset, :].squeeze().shape

data[:, com[1]-os:com[1]+os, :]

offset = 15
plt.imshow(data[:, com[1], :].squeeze().T, origin='lower')

!fslreorient2std /home/gdholla1/ERC_data/QSM_data/Young/AD2T_QSM.nii /home/gdholla1/notebooks_demo/data/AD2T_QSM.nii

import nipype
from nipype.interfaces import fsl

reorienter = fsl.Reorient2Std()
reorienter.inputs.in_file = '/home/gdholla1/ERC_data/QSM_data/Young/AD2T_QSM.nii'

r = reorienter.run()
print r.outputs, reorienter.cmdline     # r contains a 'result'-object, cmdline shows the bash command that has been executed...

import nipype.pipeline as pe
import nipype.interfaces.io as nio

# Set up the workflow-object, which has a name and a base_dir where all intermediate files are stored.
workflow = pe.Workflow(name='reorient_QSM', base_dir='/home/gdholla1/workflow_folders/')


# Set up a datagrabber: an interface to grab data and feed it to analysis-nodes upstram in
# the workflow.
# Input is a subject_id, then it outputs a QSM-file
dg = pe.Node(nio.DataGrabber(infields=['subject_id'], outfields=['QSM']), name='datagrabber')

# Base directory to search for stuff
dg.inputs.base_directory = '/home/gdholla1/ERC_data/QSM_data/Young/'

# Standard glob-template: just everything
dg.inputs.template = '*'

# Sort output
dg.inputs.sort_filelist = True

# Specific template for QSM-file. we are looking of a file that looks lik
# <SUBJECT_ID>_QSM.nii, wehere
dg.inputs.field_template = {'QSM' : '%s_QSM.nii'}
dg.inputs.template_args = {'QSM' : [['subject_id']]}

# We set a list of subject indices as input, by inputting htem as
# 'iterables' we set up N parallel workflows, where N is the number of subjects
dg.iterables = [('subject_id', subj_idxs)]


# We need only one line to set up a standard reorienter
reorient = pe.Node(fsl.Reorient2Std(), name='reorient')

# Now we can connect the datagrabber to the reorienter
workflow.connect(dg, 'QSM', reorient, 'in_file')

# Datasink is an interface that can write away to some directory
ds = pe.Node(nio.DataSink(), name='datasink')
ds.inputs.base_directory = '/home/gdholla1/notebooks_demo/data'

workflow.connect(reorient, 'out_file', ds, 'reoriented_qsm')


# Write a picture of the workflow
workflow.write_graph()

# Use IPython to get the picture-filename and show it in the notebook
import os
from IPython.display import Image
Image(os.path.join(workflow.base_dir, workflow.name, 'graph.dot.png'))

# Run the workflow
workflow.run()

workflow = pe.Workflow(name='reorient_QSM', base_dir='/home/gdholla1/workflow_folders/')

dg = pe.Node(nio.DataGrabber(infields=['subject_id', 'rater', 'hemisphere'], outfields=['QSM', 'mask']), name='datagrabber')
dg.inputs.template = '*'
dg.inputs.base_directory = '/home/gdholla1/ERC_data/'
dg.inputs.field_template = {'QSM' : 'QSM_data/Young/%s_QSM.nii',
                            'mask': 'STN_masks/QSM/Young/%s_QSM_%s_%s.nii.gz'}
dg.inputs.template_args = {'QSM' : [['subject_id']],
                           'mask': [['subject_id', 'hemisphere', 'rater']]}
dg.inputs.subject_id = 'AD2T'

dg.iterables = [('subject_id', subj_idxs), 
                ('rater', ['GH', 'AA', 'CMA']), 
                ('hemisphere', ['L', 'R'])]

dg.inputs.sort_filelist = True

reorient = pe.Node(fsl.Reorient2Std(), name='reorient')
workflow.connect(dg, 'QSM', reorient, 'in_file')


reorient_mask = pe.Node(fsl.Reorient2Std(), name='reorient_mask')
workflow.connect(dg, 'mask', reorient_mask, 'in_file')

ds = pe.Node(nio.DataSink(), name='datasink')
ds.inputs.base_directory = '/home/gdholla1/notebooks_demo/data'

ds.inputs.regexp_substitutions = [('reoriented_qsm/_hemisphere_([A-Z])_rater_([A-Z]{2,3})_subject_id_([A-Z0-9]{4})/(.*).nii.gz', 
                                  'reoriented_qsm/\\3/\\4.nii.gz'),
                                  ('reoriented_qsm_masks/_hemisphere_([A-Z])_rater_([A-Z]{2,3})_subject_id_([A-Z0-9]{4})/(.*).nii.gz', 
                                  'reoriented_qsm_masks/\\3/\\4_\\1_\\2.nii.gz')]

workflow.connect(reorient, 'out_file', ds, 'reoriented_qsm')
workflow.connect(reorient, 'out_file', ds, 'reoriented_qsm_masks')


workflow.write_graph()

import os
Image(os.path.join(workflow.base_dir, workflow.name, 'graph.dot.png'))

workflow.run(plugin='MultiProc', plugin_args={'n_procs' : 8})

def get_iron_content(data_fn, mask_fn):
    import nibabel as nb
    import os
    import numpy as np
    data = nb.load(data_fn).get_data()
    mask = nb.load(mask_fn).get_data()
    f = open(os.path.join(os.getcwd(), 'mean_qsm_value.txt'), 'w')
    
    qsm_value = np.mean(data[mask != 0])
    
    f.write(str(qsm_value))
    f.close()
    
    return os.path.join(os.getcwd(), 'mean_qsm_value.txt')

fn  =get_iron_content('/home/gdholla1/notebooks_demo/data/reoriented_qsm/AD2T/AD2T_QSM_reoriented.nii.gz', 
                 '/home/gdholla1/notebooks_demo/data/reoriented_qsm_masks/AD2T/AD2T_QSM_reoriented_L_AA.nii.gz')
print fn
print open(fn).read()

import nipype.interfaces.utility as util # Submodule required to
iron_node = pe.Node(util.Function(function=get_iron_content, 
                                  input_names=['data_fn', 'mask_fn'], 
                                  output_names=['iron_content']), 
                    name='iron')

iron_node.inputs.data_fn = '/home/gdholla1/notebooks_demo/data/reoriented_qsm/AD2T/AD2T_QSM_reoriented.nii.gz'
iron_node.inputs.mask_fn = '/home/gdholla1/notebooks_demo/data/reoriented_qsm_masks/AD2T/AD2T_QSM_reoriented_L_AA.nii.gz'

r = iron_node.run()
print r.outputs
print open(r.outputs.iron_content).read()

workflow = pe.Workflow(name='reorient_QSM', base_dir='/home/gdholla1/workflow_folders/')

dg = pe.Node(nio.DataGrabber(infields=['subject_id', 'rater', 'hemisphere'], outfields=['QSM', 'mask']), name='datagrabber')
dg.inputs.template = '*'
dg.inputs.base_directory = '/home/gdholla1/ERC_data/'
dg.inputs.field_template = {'QSM' : 'QSM_data/Young/%s_QSM.nii',
                            'mask': 'STN_masks/QSM/Young/%s_QSM_%s_%s.nii.gz'}
dg.inputs.template_args = {'QSM' : [['subject_id']],
                           'mask': [['subject_id', 'hemisphere', 'rater']]}

dg.iterables = [('subject_id', subj_idxs), 
                ('rater', ['GH', 'AA', 'CMA']), 
                ('hemisphere', ['L', 'R'])]

dg.inputs.sort_filelist = True

reorient = pe.Node(fsl.Reorient2Std(), name='reorient')
workflow.connect(dg, 'QSM', reorient, 'in_file')


reorient_mask = pe.Node(fsl.Reorient2Std(), name='reorient_mask')
workflow.connect(dg, 'mask', reorient_mask, 'in_file')

iron_node = pe.Node(util.Function(function=get_iron_content, input_names=['data_fn', 'mask_fn'], output_names=['iron_content']), name='iron')

workflow.connect(reorient, 'out_file', iron_node, 'data_fn')
workflow.connect(reorient_mask, 'out_file', iron_node, 'mask_fn')

ds = pe.Node(nio.DataSink(), name='datasink')
ds.inputs.base_directory = '/home/gdholla1/notebooks_demo/data'

ds.inputs.regexp_substitutions = [('reoriented_qsm/_hemisphere_([A-Z])_rater_([A-Z]{2,3})_subject_id_([A-Z0-9]{4})/(.*).nii.gz', 
                                  'reoriented_qsm/\\3/\\4.nii.gz'),
                                  ('reoriented_qsm_masks/_hemisphere_([A-Z])_rater_([A-Z]{2,3})_subject_id_([A-Z0-9]{4})/(.*).nii.gz', 
                                  'reoriented_qsm_masks/\\3/\\4_\\1_\\2.nii.gz')]

workflow.connect(reorient, 'out_file', ds, 'reoriented_qsm')
workflow.connect(reorient, 'out_file', ds, 'reoriented_qsm_masks')
workflow.connect(iron_node, 'iron_content', ds, 'iron_values')


workflow.write_graph()

import os
Image(os.path.join(workflow.base_dir, workflow.name, 'graph.dot.png'))


workflow.run(plugin='MultiProc', plugin_args={'n_procs' : 8})

##Analyze output of NiPype-workflow.

# EXTRACT INFO FROM FILENAMES
import glob
fns = glob.glob('/home/gdholla1/notebooks_demo/data/iron_values/_hemisphere_*_rater_*_subject_id_*/mean_qsm_value.txt')
reg = re.compile('.*/_hemisphere_([A-Z])_rater_([A-Z]+)_subject_id_([A-Z0-9]+)/mean_qsm_value.txt')

# zip(*l) is inverse of zip(l)
hemispheres, raters, subject_ids = zip(*[reg.match(fn).groups() for fn in fns])
print hemispheres
print raters
print subject_ids

iron_values = [float(open(fn).read()) for fn in fns]
print iron_values

# set up dataframe
qsm_data = pandas.DataFrame({'hemisphere':hemispheres, 
                  'rater':raters, 
                  'subject_id':subject_ids, 
                  'qsm_values' : iron_values})

import scipy as sp
from scipy import stats
qsm_data.groupby(['hemisphere']).agg([np.mean, sp.stats.sem]) # stats.sem = standard error

model = smf.ols(formula='qsm_values ~ subject_id*rater + hemisphere', df=qsm_data)
f = model.fit()
f.summary()

anova_lm(f)

import nipype
from nipype.workflows.fmri.fsl import create_featreg_preproc

workflow = create_featreg_preproc()
workflow.write_graph()
from IPython.display import Image
import os
Image('graph.dot.png')

workflow.inputs.inputspec