%pylab inline import pylab as pl import numpy as np # Some nice default configuration for plots pl.rcParams['figure.figsize'] = 10, 7.5 pl.rcParams['axes.grid'] = True pl.gray() from IPython.parallel import Client client = Client() len(client) %px print("Hello from the cluster engines!") def where_am_i(): import os import socket return "In process with pid {0} on host: '{1}'".format( os.getpid(), socket.gethostname()) where_am_i() direct_view = client.direct_view() where_am_i_direct_results = direct_view.apply(where_am_i) where_am_i_direct_results where_am_i_direct_results.get() where_am_i_direct_results.get_dict() lb_view = client.load_balanced_view() where_am_i_lb_result = lb_view.apply(where_am_i) where_am_i_lb_result where_am_i_lb_result.get() import mmap_utils, model_selection _ = reload(mmap_utils), reload(model_selection) from sklearn.datasets import load_digits from sklearn.preprocessing import MinMaxScaler digits = load_digits() X = MinMaxScaler().fit_transform(digits.data) y = digits.target digits_cv_split_filenames = mmap_utils.persist_cv_splits('digits_10', X, y, 10) digits_cv_split_filenames mmap_utils.warm_mmap_on_cv_splits(client, digits_cv_split_filenames) from sklearn.svm import LinearSVC from collections import OrderedDict import numpy as np linear_svc_params = OrderedDict(( ('C', np.logspace(-2, 2, 5)), )) linear_svc = LinearSVC() linear_svc_search = model_selection.RandomizedGridSeach(lb_view) linear_svc_search.launch_for_splits(linear_svc, linear_svc_params, digits_cv_split_filenames) linear_svc_search linear_svc_search.boxplot_parameters(display_train=False) x = np.linspace(0, int(1e3), 100) pl.plot(x, x ** 3 / 1e9) pl.xlabel("Number of training samples") pl.ylabel("Estimated Convergence Time of SMO (in seconds)") 1e6 ** 3 / 1e9 / 60 / 60 / 24 / 365 from sklearn.kernel_approximation import Nystroem from sklearn.pipeline import Pipeline nystroem_pipeline = Pipeline([ ('nystroem', Nystroem()), ('clf', LinearSVC()), ]) nystroem_pipeline_params = OrderedDict(( ('nystroem__n_components', [50, 100, 200]), ('nystroem__gamma', np.logspace(-2, 2, 5)), ('clf__C', np.logspace(-2, 2, 5)), )) nystroem_search = model_selection.RandomizedGridSeach(lb_view) nystroem_search.launch_for_splits(nystroem_pipeline, nystroem_pipeline_params, digits_cv_split_filenames) nystroem_search nystroem_search.boxplot_parameters() client.abort()