from mempamal.datasets import iris
X, y = iris.get_data()
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.preprocessing.data import StandardScaler
from sklearn.cross_validation import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.metrics import f1_score
The pipeline will contains:
The goodness of fit is estimated with:
The model selection is performed with:
s1 = StandardScaler(with_mean=True, with_std=True)
s2 = LogisticRegression()
p = [("scaler", s1), ("logit", s2)]
est = Pipeline(p)
from mempamal.configuration import JSONify_estimator, JSONify_cv, build_dataset
from mempamal.examples.parameters_grid import make_log_grid
from mempamal.workflow import create_wf, save_wf
We jsonify the estimator and the cross-validation configuration:
method_conf = JSONify_estimator(est, out="./est.json")
cv_conf = JSONify_cv(StratifiedKFold, cv_kwargs={"n_folds": 5},
score_func=f1_score,
inner_cv=StratifiedKFold,
inner_cv_kwargs={"n_folds": 5},
inner_score_func=f1_score,
stratified=True,
out="./cv.json")
We build the dataset in the current directory.
It's create a dataset.joblib
file.
Then we create the workflow in our internal format (create_wf
).
With verbose=True
, it prints the commands on stdout
.
And finally, we output the workflow (save_wf
) in the soma-workflow format
and write it to workflow.json
(need soma-workflow).
grid = make_log_grid(X, y)
dataset = build_dataset(X, y, method_conf, cv_conf, ".", grid=grid)
wfi = create_wf(dataset['folds'], cv_conf, method_conf, ".",
verbose=True)
wf = save_wf(wfi, "./workflow.json", mode="soma-workflow")
python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_0_0.pkl 0 --inner 0 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_0_1.pkl 0 --inner 1 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_0_2.pkl 0 --inner 2 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_0_3.pkl 0 --inner 3 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_0_4.pkl 0 --inner 4 python mempamal/scripts/inner_reducer.py ./cv.json ./est.json ./dataset.joblib ./red_res_0.pkl ./map_res_0_{inner}.pkl 0 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_1_0.pkl 1 --inner 0 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_1_1.pkl 1 --inner 1 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_1_2.pkl 1 --inner 2 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_1_3.pkl 1 --inner 3 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_1_4.pkl 1 --inner 4 python mempamal/scripts/inner_reducer.py ./cv.json ./est.json ./dataset.joblib ./red_res_1.pkl ./map_res_1_{inner}.pkl 1 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_2_0.pkl 2 --inner 0 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_2_1.pkl 2 --inner 1 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_2_2.pkl 2 --inner 2 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_2_3.pkl 2 --inner 3 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_2_4.pkl 2 --inner 4 python mempamal/scripts/inner_reducer.py ./cv.json ./est.json ./dataset.joblib ./red_res_2.pkl ./map_res_2_{inner}.pkl 2 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_3_0.pkl 3 --inner 0 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_3_1.pkl 3 --inner 1 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_3_2.pkl 3 --inner 2 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_3_3.pkl 3 --inner 3 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_3_4.pkl 3 --inner 4 python mempamal/scripts/inner_reducer.py ./cv.json ./est.json ./dataset.joblib ./red_res_3.pkl ./map_res_3_{inner}.pkl 3 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_4_0.pkl 4 --inner 0 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_4_1.pkl 4 --inner 1 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_4_2.pkl 4 --inner 2 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_4_3.pkl 4 --inner 3 python mempamal/scripts/mapper.py ./cv.json ./est.json ./dataset.joblib ./map_res_4_4.pkl 4 --inner 4 python mempamal/scripts/inner_reducer.py ./cv.json ./est.json ./dataset.joblib ./red_res_4.pkl ./map_res_4_{inner}.pkl 4 python mempamal/scripts/outer_reducer.py ./final_res.pkl ./red_res_{outer}.pkl
Now, we create a WorkflowController
and we submit the workflow.
We wait for workflow completion then we read the final results.
from soma_workflow.client import WorkflowController
import time
import json
import sklearn.externals.joblib as joblib
controller = WorkflowController()
wf_id = controller.submit_workflow(workflow=wf, name="second example")
while controller.workflow_status(wf_id) != 'workflow_done':
time.sleep(2)
print(joblib.load('./final_res.pkl'))
light mode {'std': 0.025080367485459092, 'raw': array([ 0.93333333, 1. , 0.96658312, 0.96658312, 0.93265993]), 'median': 0.96658312447786132, 'mean': 0.9598319029897977}