import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
import autosklearn.classification
def main():
X, y = sklearn.datasets.load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = \
sklearn.model_selection.train_test_split(X, y, random_state=1)
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=120, per_run_time_limit=30,
tmp_folder='/tmp/autoslearn_sequential_example_tmp',
output_folder='/tmp/autosklearn_sequential_example_out',
# Do not construct ensembles in parallel to avoid using more than one
# core at a time. The ensemble will be constructed after auto-sklearn
# finished fitting all machine learning models.
ensemble_size=0, delete_tmp_folder_after_terminate=False)
automl.fit(X_train, y_train, dataset_name='digits')
# This call to fit_ensemble uses all models trained in the previous call
# to fit to build an ensemble which can be used with automl.predict()
automl.fit_ensemble(y_train, ensemble_size=50)
print(automl.show_models())
predictions = automl.predict(X_test)
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
if __name__ == '__main__':
main()
[(0.980000, MyDummyClassifier(configuration=1, init_params=None, random_state=None)), (0.020000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'liblinear_svc', 'imputation:strategy': 'median', 'one_hot_encoding:use_minimum_fraction': 'False', 'preprocessor:__choice__': 'polynomial', 'rescaling:__choice__': 'minmax', 'classifier:liblinear_svc:C': 2.4244459875201874, 'classifier:liblinear_svc:dual': 'False', 'classifier:liblinear_svc:fit_intercept': 'True', 'classifier:liblinear_svc:intercept_scaling': 1, 'classifier:liblinear_svc:loss': 'squared_hinge', 'classifier:liblinear_svc:multi_class': 'ovr', 'classifier:liblinear_svc:penalty': 'l2', 'classifier:liblinear_svc:tol': 0.0014473335587607684, 'preprocessor:polynomial:degree': 3, 'preprocessor:polynomial:include_bias': 'False', 'preprocessor:polynomial:interaction_only': 'True'}, dataset_properties={ 'task': 2, 'sparse': False, 'multilabel': False, 'multiclass': True, 'target_type': 'classification', 'signed': False})), ] auto-sklearn results: Dataset name: digits Metric: accuracy Best validation score: 0.988764 Number of target algorithm runs: 24 Number of successful target algorithm runs: 0 Number of crashed target algorithm runs: 0 Number of target algorithms that exceeded the memory limit: 0 Number of target algorithms that exceeded the time limit: 0 Accuracy score 0.986666666667
try:
import openml
except ImportError:
print("#"*80 + """
To run this example you need to install openml-python:
pip install git+https://github.com/renatopp/liac-arff
pip install requests xmltodict
pip install git+https://github.com/openml/openml-python@develop --no-deps\n""" +
"#"*80)
raise
def main():
# Load adult dataset from openml.org, see https://www.openml.org/t/2117
openml.config.apikey = '610344db6388d9ba34f6db45a3cf71de'
task = openml.tasks.get_task(2117)
train_indices, test_indices = task.get_train_test_split_indices()
X, y = task.get_X_and_y()
X_train = X[train_indices]
y_train = y[train_indices]
X_test = X[test_indices]
y_test = y[test_indices]
dataset = task.get_dataset()
_, _, categorical_indicator = dataset.\
get_data(target=task.target_name, return_categorical_indicator=True)
# Create feature type list from openml.org indicator and run autosklearn
feat_type = ['Categorical' if ci else 'Numerical'
for ci in categorical_indicator]
cls = autosklearn.classification.\
AutoSklearnClassifier(time_left_for_this_task=120,
per_run_time_limit=30)
cls.fit(X_train, y_train, feat_type=feat_type)
predictions = cls.predict(X_test)
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
if __name__ == "__main__":
main()
################################################################################ To run this example you need to install openml-python: pip install git+https://github.com/renatopp/liac-arff pip install requests xmltodict pip install git+https://github.com/openml/openml-python@develop --no-deps ################################################################################
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) <ipython-input-3-184bc93afb14> in <module>() 1 try: ----> 2 import openml 3 except ImportError: 4 print("#"*80 + """ 5 To run this example you need to install openml-python: ModuleNotFoundError: No module named 'openml'
#pip install git+https://github.com/renatopp/liac-arff
#pip install requests xmltodict
# pip install git+https://github.com/openml/openml-python@develop --no-deps
try:
import openml
except ImportError:
print("#"*80 + """
To run this example you need to install openml-python:
pip install git+https://github.com/renatopp/liac-arff
pip install requests xmltodict
pip install git+https://github.com/openml/openml-python@develop --no-deps\n""" +
"#"*80)
raise
def main():
# Load adult dataset from openml.org, see https://www.openml.org/t/2117
openml.config.apikey = '610344db6388d9ba34f6db45a3cf71de'
task = openml.tasks.get_task(2117)
train_indices, test_indices = task.get_train_test_split_indices()
X, y = task.get_X_and_y()
X_train = X[train_indices]
y_train = y[train_indices]
X_test = X[test_indices]
y_test = y[test_indices]
dataset = task.get_dataset()
_, _, categorical_indicator = dataset.\
get_data(target=task.target_name, return_categorical_indicator=True)
# Create feature type list from openml.org indicator and run autosklearn
feat_type = ['Categorical' if ci else 'Numerical'
for ci in categorical_indicator]
cls = autosklearn.classification.\
AutoSklearnClassifier(time_left_for_this_task=120,
per_run_time_limit=30)
cls.fit(X_train, y_train, feat_type=feat_type)
predictions = cls.predict(X_test)
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
if __name__ == "__main__":
main()
[WARNING] [2017-08-05 17:10:17,667:smac.intensification.intensification.Intensifier] Challenger was the same as the current incumbent; Skipping challenger [WARNING] [2017-08-05 17:10:17,667:smac.intensification.intensification.Intensifier] Challenger was the same as the current incumbent; Skipping challenger [WARNING] [2017-08-05 17:10:17,676:smac.intensification.intensification.Intensifier] Challenger was the same as the current incumbent; Skipping challenger [WARNING] [2017-08-05 17:10:17,676:smac.intensification.intensification.Intensifier] Challenger was the same as the current incumbent; Skipping challenger Accuracy score 0.851523236334
def main():
X, y = sklearn.datasets.load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = \
sklearn.model_selection.train_test_split(X, y, random_state=1)
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=120, per_run_time_limit=30,
tmp_folder='/tmp/autoslearn_holdout_example_tmp',
output_folder='/tmp/autosklearn_holdout_example_out',
disable_evaluator_output=False)
automl.fit(X_train, y_train, dataset_name='digits')
# Print the final ensemble constructed by auto-sklearn.
print(automl.show_models())
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
if __name__ == '__main__':
main()
You are already timing task: index_run5
[(0.960000, MyDummyClassifier(configuration=1, init_params=None, random_state=None)), (0.020000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'classifier:__choice__': 'gradient_boosting', 'imputation:strategy': 'mean', 'one_hot_encoding:use_minimum_fraction': 'True', 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'standardize', 'classifier:gradient_boosting:learning_rate': 0.03627152792976942, 'classifier:gradient_boosting:loss': 'deviance', 'classifier:gradient_boosting:max_depth': 10, 'classifier:gradient_boosting:max_features': 4.211238636565405, 'classifier:gradient_boosting:max_leaf_nodes': 'None', 'classifier:gradient_boosting:min_samples_leaf': 15, 'classifier:gradient_boosting:min_samples_split': 16, 'classifier:gradient_boosting:min_weight_fraction_leaf': 0.0, 'classifier:gradient_boosting:n_estimators': 340, 'classifier:gradient_boosting:subsample': 0.6289005711340923, 'one_hot_encoding:minimum_fraction': 0.0002148748655476835}, dataset_properties={ 'task': 2, 'sparse': False, 'multilabel': False, 'multiclass': True, 'target_type': 'classification', 'signed': False})), (0.020000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'classifier:__choice__': 'k_nearest_neighbors', 'imputation:strategy': 'mean', 'one_hot_encoding:use_minimum_fraction': 'True', 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'none', 'classifier:k_nearest_neighbors:n_neighbors': 2, 'classifier:k_nearest_neighbors:p': 1, 'classifier:k_nearest_neighbors:weights': 'distance', 'one_hot_encoding:minimum_fraction': 0.3530578080502024}, dataset_properties={ 'task': 2, 'sparse': False, 'multilabel': False, 'multiclass': True, 'target_type': 'classification', 'signed': False})), ] auto-sklearn results: Dataset name: digits Metric: accuracy Best validation score: 0.982022 Number of target algorithm runs: 23 Number of successful target algorithm runs: 0 Number of crashed target algorithm runs: 0 Number of target algorithms that exceeded the memory limit: 0 Number of target algorithms that exceeded the time limit: 0 Accuracy score 0.991111111111
import numpy as np
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
import autosklearn.classification
import autosklearn.metrics
def accuracy(solution, prediction):
# function defining accuracy
return np.mean(solution == prediction)
def accuracy_wk(solution, prediction, dummy):
# function defining accuracy and accepting an additional argument
assert dummy is None
return np.mean(solution == prediction)
def main():
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = \
sklearn.model_selection.train_test_split(X, y, random_state=1)
# Print a list of available metrics
print("Available CLASSIFICATION metrics autosklearn.metrics.*:")
print("\t*" + "\n\t*".join(autosklearn.metrics.CLASSIFICATION_METRICS))
print("Available REGRESSION autosklearn.metrics.*:")
print("\t*" + "\n\t*".join(autosklearn.metrics.REGRESSION_METRICS))
# First example: Use predefined accuracy metric
print("#"*80)
print("Use predefined accuracy metric")
cls = autosklearn.classification.\
AutoSklearnClassifier(time_left_for_this_task=60,
per_run_time_limit=30, seed=1)
cls.fit(X_train, y_train, metric=autosklearn.metrics.accuracy)
predictions = cls.predict(X_test)
print("Accuracy score {:g} using {:s}".
format(sklearn.metrics.accuracy_score(y_test, predictions),
cls._automl._automl._metric.name))
# Second example: Use own accuracy metric
print("#"*80)
print("Use self defined accuracy accuracy metric")
accuracy_scorer = autosklearn.metrics.make_scorer(name="accu",
score_func=accuracy,
greater_is_better=True,
needs_proba=False,
needs_threshold=False)
cls = autosklearn.classification.\
AutoSklearnClassifier(time_left_for_this_task=60,
per_run_time_limit=30, seed=1)
cls.fit(X_train, y_train, metric=accuracy_scorer)
predictions = cls.predict(X_test)
print("Accuracy score {:g} using {:s}".
format(sklearn.metrics.accuracy_score(y_test, predictions),
cls._automl._automl._metric.name))
# Third example: Use own accuracy metric with additional argument
print("#"*80)
print("Use self defined accuracy with additional argument")
accuracy_scorer = autosklearn.metrics.make_scorer(name="accu_add",
score_func=accuracy_wk,
greater_is_better=True,
needs_proba=False,
needs_threshold=False,
dummy=None)
cls = autosklearn.classification.\
AutoSklearnClassifier(time_left_for_this_task=60,
per_run_time_limit=30, seed=1)
cls.fit(X_train, y_train, metric=accuracy_scorer)
predictions = cls.predict(X_test)
print("Accuracy score {:g} using {:s}".
format(sklearn.metrics.accuracy_score(y_test, predictions),
cls._automl._automl._metric.name))
if __name__ == "__main__":
main()
Available CLASSIFICATION metrics autosklearn.metrics.*: *accuracy *balanced_accuracy *roc_auc *average_precision *log_loss *pac_score *precision *precision_macro *precision_micro *precision_samples *precision_weighted *recall *recall_macro *recall_micro *recall_samples *recall_weighted *f1 *f1_macro *f1_micro *f1_samples *f1_weighted Available REGRESSION autosklearn.metrics.*: *r2 *mean_squared_error *mean_absolute_error *median_absolute_error ################################################################################ Use predefined accuracy metric [WARNING] [2017-08-05 17:16:44,323:smac.intensification.intensification.Intensifier] Challenger was the same as the current incumbent; Skipping challenger [WARNING] [2017-08-05 17:16:44,323:smac.intensification.intensification.Intensifier] Challenger was the same as the current incumbent; Skipping challenger
You are already timing task: index_run2 You are already timing task: index_run3 You are already timing task: index_run3 You are already timing task: index_run3 You are already timing task: index_run3 You are already timing task: index_run3 You are already timing task: index_run3
Accuracy score 0.965035 using accuracy ################################################################################ Use self defined accuracy accuracy metric [WARNING] [2017-08-05 17:17:37,674:AutoMLSMBO(1)::d6d58dae5b02e07797da6d4d126ac9b6] Could not find meta-data directory /Users/tarrysingh/anaconda/lib/python3.6/site-packages/autosklearn/metalearning/files/accu_binary.classification_dense
You are already timing task: index_run2 You are already timing task: index_run2 You are already timing task: index_run2 You are already timing task: index_run2 You are already timing task: index_run2 You are already timing task: index_run2 You are already timing task: index_run2
Accuracy score 0.951049 using accu ################################################################################ Use self defined accuracy with additional argument [WARNING] [2017-08-05 17:18:35,228:AutoMLSMBO(1)::d6d58dae5b02e07797da6d4d126ac9b6] Could not find meta-data directory /Users/tarrysingh/anaconda/lib/python3.6/site-packages/autosklearn/metalearning/files/accu_add_binary.classification_dense
You are already timing task: index_run2 You are already timing task: index_run2 You are already timing task: index_run2 You are already timing task: index_run2 You are already timing task: index_run2 You are already timing task: index_run2 You are already timing task: index_run2
Accuracy score 0.951049 using accu_add
import multiprocessing
import shutil
from autosklearn.metrics import accuracy
from autosklearn.classification import AutoSklearnClassifier
from autosklearn.constants import *
tmp_folder = '/tmp/autosklearn_parallel_example_tmp'
output_folder = '/tmp/autosklearn_parallel_example_out'
for dir in [tmp_folder, output_folder]:
try:
shutil.rmtree(dir)
except OSError as e:
pass
def spawn_classifier(seed, dataset_name):
"""Spawn a subprocess.
auto-sklearn does not take care of spawning worker processes. This
function, which is called several times in the main block is a new
process which runs one instance of auto-sklearn.
"""
# Use the initial configurations from meta-learning only in one out of
# the four processes spawned. This prevents auto-sklearn from evaluating
# the same configurations in four processes.
if seed == 0:
initial_configurations_via_metalearning = 25
else:
initial_configurations_via_metalearning = 0
# Arguments which are different to other runs of auto-sklearn:
# 1. all classifiers write to the same output directory
# 2. shared_mode is set to True, this enables sharing of data between
# models.
# 3. all instances of the AutoSklearnClassifier must have a different seed!
automl = AutoSklearnClassifier(
time_left_for_this_task=60, # sec., how long should this seed fit
# process run
per_run_time_limit=15, # sec., each model may only take this long before it's killed
ml_memory_limit=1024, # MB, memory limit imposed on each call to a ML algorithm
shared_mode=True, # tmp folder will be shared between seeds
tmp_folder=tmp_folder,
output_folder=output_folder,
delete_tmp_folder_after_terminate=False,
ensemble_size=0, # ensembles will be built when all optimization runs are finished
initial_configurations_via_metalearning=initial_configurations_via_metalearning,
seed=seed)
automl.fit(X_train, y_train, dataset_name=dataset_name)
if __name__ == '__main__':
X, y = sklearn.datasets.load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = \
sklearn.model_selection.train_test_split(X, y, random_state=1)
processes = []
for i in range(4): # set this at roughly half of your cores
p = multiprocessing.Process(target=spawn_classifier, args=(i, 'digits'))
p.start()
processes.append(p)
for p in processes:
p.join()
print('Starting to build an ensemble!')
automl = AutoSklearnClassifier(time_left_for_this_task=15,
per_run_time_limit=15,
ml_memory_limit=1024,
shared_mode=True,
ensemble_size=50,
ensemble_nbest=200,
tmp_folder=tmp_folder,
output_folder=output_folder,
initial_configurations_via_metalearning=0,
seed=1)
# Both the ensemble_size and ensemble_nbest parameters can be changed now if
# necessary
automl.fit_ensemble(y_train,
task=MULTICLASS_CLASSIFICATION,
metric=accuracy,
precision='32',
dataset_name='digits',
ensemble_size=20,
ensemble_nbest=50)
predictions = automl.predict(X_test)
print(automl.show_models())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
Starting to build an ensemble! [(0.900000, MyDummyClassifier(configuration=1, init_params=None, random_state=None)), (0.050000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'classifier:__choice__': 'k_nearest_neighbors', 'imputation:strategy': 'mean', 'one_hot_encoding:use_minimum_fraction': 'True', 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'none', 'classifier:k_nearest_neighbors:n_neighbors': 2, 'classifier:k_nearest_neighbors:p': 1, 'classifier:k_nearest_neighbors:weights': 'distance', 'one_hot_encoding:minimum_fraction': 0.3530578080502024}, dataset_properties={ 'task': 2, 'sparse': False, 'multilabel': False, 'multiclass': True, 'target_type': 'classification', 'signed': False})), (0.050000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'classifier:__choice__': 'passive_aggressive', 'imputation:strategy': 'mean', 'one_hot_encoding:use_minimum_fraction': 'False', 'preprocessor:__choice__': 'random_trees_embedding', 'rescaling:__choice__': 'normalize', 'classifier:passive_aggressive:C': 0.10437045130506178, 'classifier:passive_aggressive:fit_intercept': 'True', 'classifier:passive_aggressive:loss': 'hinge', 'classifier:passive_aggressive:n_iter': 25, 'preprocessor:random_trees_embedding:max_depth': 7, 'preprocessor:random_trees_embedding:max_leaf_nodes': 'None', 'preprocessor:random_trees_embedding:min_samples_leaf': 2, 'preprocessor:random_trees_embedding:min_samples_split': 6, 'preprocessor:random_trees_embedding:min_weight_fraction_leaf': 1.0, 'preprocessor:random_trees_embedding:n_estimators': 64}, dataset_properties={ 'task': 2, 'sparse': False, 'multilabel': False, 'multiclass': True, 'target_type': 'classification', 'signed': False})), ] Accuracy score 0.991111111111
import autosklearn.regression
def main():
X, y = sklearn.datasets.load_boston(return_X_y=True)
feature_types = (['numerical'] * 3) + ['categorical'] + (['numerical'] * 9)
X_train, X_test, y_train, y_test = \
sklearn.model_selection.train_test_split(X, y, random_state=1)
automl = autosklearn.regression.AutoSklearnRegressor(
time_left_for_this_task=120, per_run_time_limit=30,
tmp_folder='/tmp/autoslearn_regression_example_tmp',
output_folder='/tmp/autosklearn_regression_example_out')
automl.fit(X_train, y_train, dataset_name='boston',
feat_type=feature_types)
print(automl.show_models())
predictions = automl.predict(X_test)
print("R2 score:", sklearn.metrics.r2_score(y_test, predictions))
if __name__ == '__main__':
main()
[WARNING] [2017-08-05 17:21:45,349:AutoMLSMBO(1)::boston] Could not find meta-data directory /Users/tarrysingh/anaconda/lib/python3.6/site-packages/autosklearn/metalearning/files/r2_regression_dense
You are already timing task: index_run2 You are already timing task: index_run2 You are already timing task: index_run2 You are already timing task: index_run3 You are already timing task: index_run3 You are already timing task: index_run3 You are already timing task: index_run3 You are already timing task: index_run3 You are already timing task: index_run4 You are already timing task: index_run5 You are already timing task: index_run5 You are already timing task: index_run5 You are already timing task: index_run5 You are already timing task: index_run6 You are already timing task: index_run6 You are already timing task: index_run6 You are already timing task: index_run7 You are already timing task: index_run7
[(0.660000, SimpleRegressionPipeline({'imputation:strategy': 'median', 'one_hot_encoding:use_minimum_fraction': 'True', 'preprocessor:__choice__': 'feature_agglomeration', 'regressor:__choice__': 'random_forest', 'rescaling:__choice__': 'standardize', 'one_hot_encoding:minimum_fraction': 0.010836306032657955, 'preprocessor:feature_agglomeration:affinity': 'euclidean', 'preprocessor:feature_agglomeration:linkage': 'ward', 'preprocessor:feature_agglomeration:n_clusters': 25, 'preprocessor:feature_agglomeration:pooling_func': 'mean', 'regressor:random_forest:bootstrap': 'False', 'regressor:random_forest:criterion': 'mse', 'regressor:random_forest:max_depth': 'None', 'regressor:random_forest:max_features': 4.418965161789183, 'regressor:random_forest:max_leaf_nodes': 'None', 'regressor:random_forest:min_samples_leaf': 2, 'regressor:random_forest:min_samples_split': 14, 'regressor:random_forest:min_weight_fraction_leaf': 0.0, 'regressor:random_forest:n_estimators': 100}, dataset_properties={ 'task': 4, 'sparse': False, 'multilabel': False, 'multiclass': False, 'target_type': 'regression', 'signed': False})), (0.200000, SimpleRegressionPipeline({'imputation:strategy': 'most_frequent', 'one_hot_encoding:use_minimum_fraction': 'True', 'preprocessor:__choice__': 'random_trees_embedding', 'regressor:__choice__': 'ridge_regression', 'rescaling:__choice__': 'standardize', 'one_hot_encoding:minimum_fraction': 0.00017301224964059824, 'preprocessor:random_trees_embedding:max_depth': 6, 'preprocessor:random_trees_embedding:max_leaf_nodes': 'None', 'preprocessor:random_trees_embedding:min_samples_leaf': 4, 'preprocessor:random_trees_embedding:min_samples_split': 17, 'preprocessor:random_trees_embedding:min_weight_fraction_leaf': 1.0, 'preprocessor:random_trees_embedding:n_estimators': 56, 'regressor:ridge_regression:alpha': 4.110147069014959, 'regressor:ridge_regression:fit_intercept': 'True', 'regressor:ridge_regression:tol': 0.003182277286417395}, dataset_properties={ 'task': 4, 'sparse': False, 'multilabel': False, 'multiclass': False, 'target_type': 'regression', 'signed': False})), (0.140000, SimpleRegressionPipeline({'imputation:strategy': 'most_frequent', 'one_hot_encoding:use_minimum_fraction': 'False', 'preprocessor:__choice__': 'random_trees_embedding', 'regressor:__choice__': 'ridge_regression', 'rescaling:__choice__': 'standardize', 'preprocessor:random_trees_embedding:max_depth': 6, 'preprocessor:random_trees_embedding:max_leaf_nodes': 'None', 'preprocessor:random_trees_embedding:min_samples_leaf': 18, 'preprocessor:random_trees_embedding:min_samples_split': 20, 'preprocessor:random_trees_embedding:min_weight_fraction_leaf': 1.0, 'preprocessor:random_trees_embedding:n_estimators': 86, 'regressor:ridge_regression:alpha': 4.152939579336265e-05, 'regressor:ridge_regression:fit_intercept': 'True', 'regressor:ridge_regression:tol': 0.00038470779387244015}, dataset_properties={ 'task': 4, 'sparse': False, 'multilabel': False, 'multiclass': False, 'target_type': 'regression', 'signed': False})), ] R2 score: 0.86442782432
def main():
X, y = sklearn.datasets.load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = \
sklearn.model_selection.train_test_split(X, y, random_state=1)
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=120, per_run_time_limit=30,
tmp_folder='/tmp/autoslearn_sequential_example_tmp',
output_folder='/tmp/autosklearn_sequential_example_out',
# Do not construct ensembles in parallel to avoid using more than one
# core at a time. The ensemble will be constructed after auto-sklearn
# finished fitting all machine learning models.
ensemble_size=0, delete_tmp_folder_after_terminate=False)
automl.fit(X_train, y_train, dataset_name='digits')
# This call to fit_ensemble uses all models trained in the previous call
# to fit to build an ensemble which can be used with automl.predict()
automl.fit_ensemble(y_train, ensemble_size=50)
print(automl.show_models())
predictions = automl.predict(X_test)
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
if __name__ == '__main__':
main()
[(0.960000, MyDummyClassifier(configuration=1, init_params=None, random_state=None)), (0.020000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'classifier:__choice__': 'gradient_boosting', 'imputation:strategy': 'mean', 'one_hot_encoding:use_minimum_fraction': 'True', 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'standardize', 'classifier:gradient_boosting:learning_rate': 0.03627152792976942, 'classifier:gradient_boosting:loss': 'deviance', 'classifier:gradient_boosting:max_depth': 10, 'classifier:gradient_boosting:max_features': 4.211238636565405, 'classifier:gradient_boosting:max_leaf_nodes': 'None', 'classifier:gradient_boosting:min_samples_leaf': 15, 'classifier:gradient_boosting:min_samples_split': 16, 'classifier:gradient_boosting:min_weight_fraction_leaf': 0.0, 'classifier:gradient_boosting:n_estimators': 340, 'classifier:gradient_boosting:subsample': 0.6289005711340923, 'one_hot_encoding:minimum_fraction': 0.0002148748655476835}, dataset_properties={ 'task': 2, 'sparse': False, 'multilabel': False, 'multiclass': True, 'target_type': 'classification', 'signed': False})), (0.020000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'classifier:__choice__': 'k_nearest_neighbors', 'imputation:strategy': 'mean', 'one_hot_encoding:use_minimum_fraction': 'True', 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'none', 'classifier:k_nearest_neighbors:n_neighbors': 2, 'classifier:k_nearest_neighbors:p': 1, 'classifier:k_nearest_neighbors:weights': 'distance', 'one_hot_encoding:minimum_fraction': 0.3530578080502024}, dataset_properties={ 'task': 2, 'sparse': False, 'multilabel': False, 'multiclass': True, 'target_type': 'classification', 'signed': False})), ] auto-sklearn results: Dataset name: digits Metric: accuracy Best validation score: 0.982022 Number of target algorithm runs: 23 Number of successful target algorithm runs: 0 Number of crashed target algorithm runs: 0 Number of target algorithms that exceeded the memory limit: 0 Number of target algorithms that exceeded the time limit: 0 Accuracy score 0.991111111111