%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
results = pd.read_json('letor_gridresults.json')
bins = np.linspace(0.2, 0.8, 50)
results.train_score.hist(bins=bins, alpha=0.5, label='train')
results.validation_score.hist(bins=bins, alpha=0.5, label='validation')
plt.legend(loc='best')
_ = plt.title("Distribution of NDGC@10")
plt.scatter(results.train_score, results.validation_score)
plt.xlabel('Training score')
_ = plt.ylabel('Validation score')
plt.scatter(results.training_time, results.validation_score)
plt.xlabel('Training time')
_ = plt.ylabel('Validation score')
results.columns
Index([u'data_load_time', u'learning_rate', u'loss', u'max_depth', u'max_features', u'model_filename', u'model_save_time', u'model_size_bytes', u'prediction_time', u'subsample', u'train_score', u'training_time', u'validation_score'], dtype='object')
top_models = results.sort('validation_score', ascending=False)[:50]
top_models.head(10)
data_load_time | learning_rate | loss | max_depth | max_features | model_filename | model_save_time | model_size_bytes | prediction_time | subsample | train_score | training_time | validation_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
237 | 0.046811 | 0.05 | ls | 5 | 100 | /scratch/ogrisel/grid_jobs/8e17bbd1ff21e1fcd06... | 0.051859 | 519341 | 0.594188 | 0.8 | 0.586927 | 195.013033 | 0.526397 |
29 | 0.066905 | 0.05 | huber | 5 | 100 | /scratch/ogrisel/grid_jobs/aa876001591bea153e4... | 0.016508 | 517042 | 0.486399 | 0.5 | 0.569554 | 103.655520 | 0.525084 |
66 | 0.064961 | 0.05 | ls | 5 | 50 | /scratch/ogrisel/grid_jobs/22f2dfd319c2382811a... | 0.014300 | 519384 | 0.336526 | 1.0 | 0.584439 | 76.862586 | 0.524646 |
394 | 0.054464 | 0.05 | ls | 5 | 50 | /scratch/ogrisel/grid_jobs/4a66992f6fd01847259... | 0.024513 | 512288 | 0.375913 | 0.5 | 0.577110 | 49.941632 | 0.524414 |
294 | 0.069219 | 0.10 | ls | 5 | 20 | /scratch/ogrisel/grid_jobs/d67d59a5b3f7099c863... | 0.024924 | 509243 | 0.405121 | 1.0 | 0.604082 | 44.688680 | 0.523742 |
379 | 0.070266 | 0.05 | ls | 5 | 50 | /scratch/ogrisel/grid_jobs/d510a2d1b4ff500c0b1... | 0.025867 | 519145 | 0.428217 | 0.8 | 0.575713 | 99.483197 | 0.523702 |
138 | 0.046001 | 0.05 | ls | 5 | 100 | /scratch/ogrisel/grid_jobs/4fa6ae1a7fe80334d60... | 0.015496 | 517583 | 0.338197 | 1.0 | 0.581882 | 213.198879 | 0.523508 |
387 | 0.047395 | 0.05 | ls | 5 | 20 | /scratch/ogrisel/grid_jobs/14aef0de77bdf33f9f5... | 0.023361 | 518827 | 0.716907 | 1.0 | 0.580399 | 48.495409 | 0.523483 |
426 | 0.060194 | 0.05 | huber | 5 | 100 | /scratch/ogrisel/grid_jobs/95b8e270ad6ff25f058... | 0.024757 | 518748 | 0.362902 | 0.8 | 0.574295 | 179.680194 | 0.523251 |
307 | 0.054457 | 0.05 | huber | 5 | 50 | /scratch/ogrisel/grid_jobs/1d55f52a03a113daa14... | 0.026856 | 523716 | 0.479956 | 0.8 | 0.571901 | 70.466220 | 0.523189 |
10 rows × 13 columns
parameters = [
'learning_rate',
'loss',
'max_depth',
'max_features',
'subsample',
]
for parameter in parameters:
top_models.boxplot('validation_score', parameter)
parameters = [
'learning_rate',
'loss',
'max_depth',
'max_features',
'subsample',
]
for parameter in parameters:
top_models.boxplot('training_time', parameter)