The cells in this notebook perform two steps related to processing the log files written by Dharsh's PsychToolBox code into a format we directly use in the analysis notebook.
First, read each functional run's .mat
file and extract the relevant information. Combine the runs and write a master .csv
file for each subject that we will use for all subsequent processing (i.e. the .mat
files are never touched again). This file will be read directly by the analysis notebook and used for behavioral and fMRI analyses.
Additionally, we will extract more specific information related to "decoding problems" that will be used, behind the scenes, by the lyman
code in the analysis notebook that builds datasets for decoding analyses. This information is saved in event files that contain at least the following fields:
run
condition
onset
Further fields can be used for confound regression -- in the code below, we save the rt
(reaction time) fields for this purpose.
import os
import os.path as op
import numpy as np
import pandas as pd
from scipy.io import loadmat
import lyman
subjects = lyman.determine_subjects()
project = lyman.gather_project_info()
data_dir = project["data_dir"]
anal_dir = project["analysis_dir"]
behav_temp = op.join(data_dir, "%s/behav/behav_data.csv")
event_temp = op.join(data_dir, "%s/design/%s.csv")
artifact_temp = op.join(anal_dir, "dksort/%s/preproc/run_%d/artifacts.csv")
dim_rules = ["shape", "color", "pattern"]
dec_rules = ["same", "different"]
sched_columns = ["run", "condition", "onset", "rt"]
for subj in subjects:
if not op.exists(op.join(data_dir, subj, "design")):
os.mkdir(op.join(data_dir, subj, "design"))
def dimension_rules(subj):
subj_df = pd.read_csv(behav_temp % subj)
sched = pd.DataFrame(columns=sched_columns, index=subj_df.index)
subj_df = subj_df[subj_df["clean"] & subj_df["correct"]]
sched.update(subj_df)
sched["condition"] = subj_df.dim_rule
sched["onset"] = subj_df.stim_time
sched = sched.dropna()
sched.to_csv(event_temp % (subj, "dimension"), index=False)
def parse_ptb_files(subj):
data = dict(run=[], rt=[], congruent=[],
dim_rule=[], dec_rule=[],
dim_shift=[], dec_shift=[],
dim_stay=[], dec_stay=[],
dim_shift_lag=[], dec_shift_lag=[],
attend_match=[], distract_match=[],
shape1=[], shape2=[],
color1=[], color2=[],
pattern1=[], pattern2=[],
answer=[], made_resp=[], correct=[],
stim_time=[], clean=[], block_pos=[])
for r in range(1, 5):
mat_file = op.join(data_dir, subj,
"behav/%s_run%d.mat" % (subj, r))
mat = loadmat(mat_file, squeeze_me=True)
events = mat["theData"]
n_trials = len(events["RT"][()])
data["rt"].append(events["RT"][()])
data["answer"].append(events["keypress"][()])
data["made_resp"].append(events["keypress"][()] > 0)
data["stim_time"].append(events["stim1_onset"][()] - 12)
data["correct"].append(events["score"][()] > 0)
data["block_pos"].append(np.tile(range(3), n_trials / 3))
data["run"].append(np.ones(n_trials, int) * r)
stim_vols = np.round(data["stim_time"][-1] / 2)
artifacts = pd.read_csv(artifact_temp % (subj, r)).max(axis=1).values
art_vols = np.unique(np.argwhere(artifacts)[:, None] + np.arange(-1, 5))
clean = np.logical_not(np.in1d(stim_vols, art_vols))
data["clean"].append(clean)
sched = mat["sess_trials"]
matches = sched[:n_trials, 2:] - 1
data["congruent"].append(~np.logical_xor(*matches.T))
data["attend_match"].append(matches[:, 0] == 0)
data["distract_match"].append(matches[:, 1] == 0)
for idx, rule in enumerate(["dim", "dec"]):
rule_key = sched[:n_trials, idx] - 1
rule_shift = np.ones(len(rule_key), bool)
rule_shift[1:] = rule_key[1:] != rule_key[:-1]
data["%s_rule" % rule].append(rule_key)
data["%s_shift" % rule].append(rule_shift)
rule_stay = np.logical_and(data["block_pos"][-1] == 0,
np.logical_not(rule_shift))
data["%s_stay" % rule].append(rule_stay)
lag = []
for i, shift_i in enumerate(rule_shift):
if shift_i:
lag.append(0)
else:
lag.append(i - np.argwhere(rule_shift[:i]).max())
data["%s_shift_lag" % rule].append(lag)
stims = mat["trial"][r - 1]["stim"]
for j, dim in enumerate(dim_rules):
for t in [1, 2]:
data["%s%d" % (dim, t)].append(stims[j][:72, t - 1].astype(int))
data = {k: np.concatenate(v) for k, v in data.iteritems()}
df = pd.DataFrame(data)
df["subj"] = subj
df["dim_rule"] = df.dim_rule.map(dict(enumerate(dim_rules)))
df["dec_rule"] = df.dec_rule.map(dict(enumerate(dec_rules)))
df["answer"] = df.answer.map({1: "no", 2: "yes"})
for dim in dim_rules:
df["%s_match" % dim] = df["%s1" % dim] == df["%s2" % dim]
csv_file = op.join(data_dir, subj, "behav/behav_data.csv")
df.to_csv(csv_file, index_label="trial")
def decision_rules(subj):
subj_df = pd.read_csv(behav_temp % subj)
sched = pd.DataFrame(columns=sched_columns, index=subj_df.index)
subj_df = subj_df[subj_df["clean"] & subj_df["correct"]]
sched.update(subj_df)
sched["condition"] = subj_df.dec_rule
sched["onset"] = subj_df.stim_time
sched = sched.dropna()
sched.to_csv(event_temp % (subj, "decision"), index=False)
def dimension_cues(subj):
subj_df = pd.read_csv(behav_temp % subj)
sched = pd.DataFrame(columns=sched_columns, index=subj_df.index)
subj_df = subj_df[subj_df["clean"] & subj_df["correct"]]
subj_df = subj_df[subj_df.block_pos == 0]
sched.update(subj_df)
sched["condition"] = subj_df.dim_rule
sched["onset"] = subj_df.stim_time - 12
sched = sched.dropna()
sched.to_csv(event_temp % (subj, "dimension_cue"), index=False)
for subj in subjects:
parse_ptb_files(subj)
dimension_rules(subj)
decision_rules(subj)
dimension_cues(subj)