In previous examples, we only use scikit-learn algorithms. In this example, we will learn how to use another Python machine learning library. You need to install ParsimonY to run this example: https://github.com/neurospin/pylearn-parsimony
For Pipeline
, r2_score
and StratifiedKFold
we rely on scikit-learn objects.
from sklearn.cross_validation import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score
from mempamal.configuration import JSONify_estimator, JSONify_cv, build_dataset
from mempamal.workflow import create_wf, save_wf
from mempamal.datasets import iris
# iris dataset as usual but with linear regression (Why not! :p).
X, y = iris.get_data()
To ensure that parsimony.estimators.ElasticNet
is compliant with the scikit-learn interface, we create a wrapper which inherits from sklearn.base.BaseEstimator
. Notice, that this wrapper must be accessible in your PYTHONPATH
for the future tasks. In the MEmPaMaL examples we provide the ElasticNet wrapper for the sake of the example.
import inspect
from mempamal.examples.elasticnet_parsimony import EnetWrap
print(inspect.getsource(inspect.getmodule(EnetWrap)))
from parsimony.estimators import ElasticNet from sklearn.base import BaseEstimator class EnetWrap(BaseEstimator, ElasticNet): def __init__(self, l=0., alpha=1., algorithm_params={}, penalty_start=0, mean=True): super(EnetWrap, self).__init__(l=l, alpha=alpha, algorithm_params=algorithm_params, penalty_start=penalty_start, mean=mean)
The estimator is only the EnetWrap
and we create a multi-parameters grid (enet__l
and enet__alpha
).
est = Pipeline([("enet", EnetWrap())])
alphas = [1e-4, 1e-3, 1e-2, 0.1, 1., 10., 100., 1e3]
ls = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
grid = []
for a in alphas:
for l in ls:
grid.append({"enet__l": l,
"enet__alpha": a})
print("The grid contains {} sets of parameters:".format(len(grid)))
grid
The grid contains 72 sets of parameters:
[{'enet__alpha': 0.0001, 'enet__l': 0.1}, {'enet__alpha': 0.0001, 'enet__l': 0.2}, {'enet__alpha': 0.0001, 'enet__l': 0.3}, {'enet__alpha': 0.0001, 'enet__l': 0.4}, {'enet__alpha': 0.0001, 'enet__l': 0.5}, {'enet__alpha': 0.0001, 'enet__l': 0.6}, {'enet__alpha': 0.0001, 'enet__l': 0.7}, {'enet__alpha': 0.0001, 'enet__l': 0.8}, {'enet__alpha': 0.0001, 'enet__l': 0.9}, {'enet__alpha': 0.001, 'enet__l': 0.1}, {'enet__alpha': 0.001, 'enet__l': 0.2}, {'enet__alpha': 0.001, 'enet__l': 0.3}, {'enet__alpha': 0.001, 'enet__l': 0.4}, {'enet__alpha': 0.001, 'enet__l': 0.5}, {'enet__alpha': 0.001, 'enet__l': 0.6}, {'enet__alpha': 0.001, 'enet__l': 0.7}, {'enet__alpha': 0.001, 'enet__l': 0.8}, {'enet__alpha': 0.001, 'enet__l': 0.9}, {'enet__alpha': 0.01, 'enet__l': 0.1}, {'enet__alpha': 0.01, 'enet__l': 0.2}, {'enet__alpha': 0.01, 'enet__l': 0.3}, {'enet__alpha': 0.01, 'enet__l': 0.4}, {'enet__alpha': 0.01, 'enet__l': 0.5}, {'enet__alpha': 0.01, 'enet__l': 0.6}, {'enet__alpha': 0.01, 'enet__l': 0.7}, {'enet__alpha': 0.01, 'enet__l': 0.8}, {'enet__alpha': 0.01, 'enet__l': 0.9}, {'enet__alpha': 0.1, 'enet__l': 0.1}, {'enet__alpha': 0.1, 'enet__l': 0.2}, {'enet__alpha': 0.1, 'enet__l': 0.3}, {'enet__alpha': 0.1, 'enet__l': 0.4}, {'enet__alpha': 0.1, 'enet__l': 0.5}, {'enet__alpha': 0.1, 'enet__l': 0.6}, {'enet__alpha': 0.1, 'enet__l': 0.7}, {'enet__alpha': 0.1, 'enet__l': 0.8}, {'enet__alpha': 0.1, 'enet__l': 0.9}, {'enet__alpha': 1.0, 'enet__l': 0.1}, {'enet__alpha': 1.0, 'enet__l': 0.2}, {'enet__alpha': 1.0, 'enet__l': 0.3}, {'enet__alpha': 1.0, 'enet__l': 0.4}, {'enet__alpha': 1.0, 'enet__l': 0.5}, {'enet__alpha': 1.0, 'enet__l': 0.6}, {'enet__alpha': 1.0, 'enet__l': 0.7}, {'enet__alpha': 1.0, 'enet__l': 0.8}, {'enet__alpha': 1.0, 'enet__l': 0.9}, {'enet__alpha': 10.0, 'enet__l': 0.1}, {'enet__alpha': 10.0, 'enet__l': 0.2}, {'enet__alpha': 10.0, 'enet__l': 0.3}, {'enet__alpha': 10.0, 'enet__l': 0.4}, {'enet__alpha': 10.0, 'enet__l': 0.5}, {'enet__alpha': 10.0, 'enet__l': 0.6}, {'enet__alpha': 10.0, 'enet__l': 0.7}, {'enet__alpha': 10.0, 'enet__l': 0.8}, {'enet__alpha': 10.0, 'enet__l': 0.9}, {'enet__alpha': 100.0, 'enet__l': 0.1}, {'enet__alpha': 100.0, 'enet__l': 0.2}, {'enet__alpha': 100.0, 'enet__l': 0.3}, {'enet__alpha': 100.0, 'enet__l': 0.4}, {'enet__alpha': 100.0, 'enet__l': 0.5}, {'enet__alpha': 100.0, 'enet__l': 0.6}, {'enet__alpha': 100.0, 'enet__l': 0.7}, {'enet__alpha': 100.0, 'enet__l': 0.8}, {'enet__alpha': 100.0, 'enet__l': 0.9}, {'enet__alpha': 1000.0, 'enet__l': 0.1}, {'enet__alpha': 1000.0, 'enet__l': 0.2}, {'enet__alpha': 1000.0, 'enet__l': 0.3}, {'enet__alpha': 1000.0, 'enet__l': 0.4}, {'enet__alpha': 1000.0, 'enet__l': 0.5}, {'enet__alpha': 1000.0, 'enet__l': 0.6}, {'enet__alpha': 1000.0, 'enet__l': 0.7}, {'enet__alpha': 1000.0, 'enet__l': 0.8}, {'enet__alpha': 1000.0, 'enet__l': 0.9}]
We jsonify the estimator and the cross-validation configuration.
We build the dataset in the current directory.
It's create a dataset.joblib
file.
Then we create the workflow in our internal format (create_wf
).
With verbose=True
, it prints the commands on stdout
.
And finally, we output the workflow (save_wf
) in the soma-workflow format
and write it to workflow.json
(need soma-workflow).
method_conf = JSONify_estimator(est, out="./est.json")
cv_conf = JSONify_cv(StratifiedKFold, cv_kwargs={"n_folds": 5},
score_func=r2_score,
stratified=True,
inner_cv=StratifiedKFold,
inner_cv_kwargs={"n_folds": 5},
inner_score_func=r2_score,
out="./cv.json")
dataset = build_dataset(X, y, method_conf, cv_conf, grid=grid, outputdir=".")
wfi = create_wf(dataset['folds'], cv_conf, method_conf, ".",
verbose=True)
wf = save_wf(wfi, "./workflow.json", mode="soma-workflow")
python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_0_0.pkl 0 --inner 0 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_0_1.pkl 0 --inner 1 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_0_2.pkl 0 --inner 2 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_0_3.pkl 0 --inner 3 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_0_4.pkl 0 --inner 4 python mempamal/scripts/inner_reducer.py ./cv.json ./est.json ./dataset.joblib ./red_res_0.pkl ./map_res_0_{inner}.pkl 0 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_1_0.pkl 1 --inner 0 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_1_1.pkl 1 --inner 1 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_1_2.pkl 1 --inner 2 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_1_3.pkl 1 --inner 3 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_1_4.pkl 1 --inner 4 python mempamal/scripts/inner_reducer.py ./cv.json ./est.json ./dataset.joblib ./red_res_1.pkl ./map_res_1_{inner}.pkl 1 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_2_0.pkl 2 --inner 0 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_2_1.pkl 2 --inner 1 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_2_2.pkl 2 --inner 2 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_2_3.pkl 2 --inner 3 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_2_4.pkl 2 --inner 4 python mempamal/scripts/inner_reducer.py ./cv.json ./est.json ./dataset.joblib ./red_res_2.pkl ./map_res_2_{inner}.pkl 2 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_3_0.pkl 3 --inner 0 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_3_1.pkl 3 --inner 1 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_3_2.pkl 3 --inner 2 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_3_3.pkl 3 --inner 3 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_3_4.pkl 3 --inner 4 python mempamal/scripts/inner_reducer.py ./cv.json ./est.json ./dataset.joblib ./red_res_3.pkl ./map_res_3_{inner}.pkl 3 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_4_0.pkl 4 --inner 0 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_4_1.pkl 4 --inner 1 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_4_2.pkl 4 --inner 2 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_4_3.pkl 4 --inner 3 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_4_4.pkl 4 --inner 4 python mempamal/scripts/inner_reducer.py ./cv.json ./est.json ./dataset.joblib ./red_res_4.pkl ./map_res_4_{inner}.pkl 4 python mempamal/scripts/outer_reducer.py ./final_res.pkl ./red_res_{outer}.pkl
Now, we create a WorkflowController
and we submit the workflow.
We wait for workflow completion then we read the final results.
from soma_workflow.client import WorkflowController
import time
import json
import sklearn.externals.joblib as joblib
controller = WorkflowController()
wf_id = controller.submit_workflow(workflow=wf, name="third example")
while controller.workflow_status(wf_id) != 'workflow_done':
time.sleep(2)
print(joblib.load('./final_res.pkl'))
light mode {'std': 0.020656789215473488, 'raw': array([ 0.90915875, 0.95577807, 0.93911917, 0.93473956, 0.89903827]), 'median': 0.93473955712380996, 'mean': 0.9275667625341375}