$ chmod +x $ ./ $ conda update conda $ conda update ipython %%bash curl -O http://www.cs.mcgill.ca/~jfrank8/data/gait-dataset.tar.gz tar xzf gait-dataset.tar.gz import statsmodels.api as sm import pandas as pd import os dta_path = 'gait-dataset' files_byday = lambda dta_path, day: [os.path.join(dta_path, name) for name in os.listdir(dta_path) if name.endswith('.csv') and 'day'+str(day) in name] day1 = files_byday(dta_path, 1) dta = pd.read_csv(day1[0], header=0, sep="\t", usecols=['timestamp', 'accel_mag']) dta.index = dta['timestamp'].astype('M8[ms]') del dta['timestamp'] dta.plot(figsize=(12,8)); def arma_featurizer(filename, nwin=500, nparam=2): import numpy as np import pandas as pd import statsmodels.api as sm dta = pd.read_csv(filename, header=0, sep="\t", usecols=['timestamp', 'accel_mag']) dta.index = pd.to_datetime(dta['timestamp'], unit='ms') del dta['timestamp'] nwin, nparam = 10, nparam X = np.zeros((nwin, nparam+1)) for i, subseries in enumerate(np.array_split(dta, nwin)): model = sm.tsa.ARMA(subseries, (nparam,0)).fit() X[i] = model.params return X !ipcluster start -n=2 --daemon from IPython import parallel rc = parallel.Client() rc.block = True all_engines = rc[:] Y = all_engines.map(arma_featurizer, day1[:2]) def fit_svm(fold): import numpy as np from sklearn import svm from sklearn.metrics import accuracy_score X = np.load('instances.npy', mmap_mode='r') y = np.load('labels.npy', mmap_mode='r') train, test = fold clf = svm.SVC() clf.fit(X[train], y[train]) ypred = clf.predict(X[test]) return accuracy_score(y[test], ypred) from sklearn import cross_validation np.save('instances', np.vstack(Y)) labels = np.repeat(range(len(Y)), len(Y[0])) np.save('labels', labels) accuracy_scores = all_engines.map(fit_svm, cross_validation.StratifiedKFold(labels, n_folds=2)) print accuracy_scores !ipcluster stop $ pip install starcluster$ starcluster help StarCluster - (http://web.mit.edu/starcluster) Software Tools for Academics and Researchers (STAR) Please submit bug reports to starcluster@mit.edu cli.py:87 - ERROR - config file /home/user/.starcluster/config does not exist Options: -------- [1] Show the StarCluster config template [2] Write config template to /home/user/.starcluster/config [q] Quit Please enter your selection:[plugin ipcluster] setup_class = starcluster.plugins.ipcluster.IPClusterPLUGINS = ipclusterNODE_INSTANCE_TYPE = m1.small[key rllab] KEY_LOCATION=~/.ssh/rllab.rsa KEYNAME = rllab$ starcluster createkey mykey -o ~/.ssh/rllab.rsa$ starcluster spothistory m1.small StarCluster - (http://star.mit.edu/cluster) (v. 0.95.2) Software Tools for Academics and Researchers (STAR) Please submit bug reports to starcluster@mit.edu >>> Fetching spot history for m1.small (VPC) >>> Current price: $0.0071 >>> Max price: $0.5000 >>> Average price: $0.0459$ starcluster start -s 2 -b 0.05 rllab$ starcluster sshmaster rllab -u sgeadmin rc = parallel.Client('/Users/pierrelucbacon/.starcluster/ipcluster/SecurityGroup:@sc-rllab-us-east-1.json', sshkey='/Users/pierrelucbacon/.ssh/rllab.rsa') rc.block = True all_engines = rc[:] %%px !ifconfig eth0 | grep 'inet addr' %%px --targets=10 %%bash curl -O http://www.cs.mcgill.ca/~jfrank8/data/gait-dataset.tar.gz tar xzf gait-dataset.tar.gz Y = all_engines.map(arma_featurizer, day1) np.save('instances', np.vstack(Y)) labels = np.repeat(range(len(Y)), len(Y[0])) np.save('labels', labels) %%bash starcluster put rllab -u sgeadmin instances.npy /home/sgeadmin starcluster put rllab -u sgeadmin labels.npy /home/sgeadmin accuracy_scores = all_engines.map(fit_svm, cross_validation.StratifiedKFold(labels, n_folds=4)) print accuracy_scores