Import all necessary modules from Shogun
from modshogun import RealFeatures, RegressionLabels, GaussianKernel, Math
from modshogun import GaussianLikelihood, ZeroMean, ExactInferenceMethod, GaussianProcessRegression
Generate some data, a 1d noisy sine wave, evaluated at random points
n=15
x_range=4*pi
y_noise_variance=0.1
y_amplitude=1
y_frequency=1
X=random.rand(1,n)*x_range
Y=sin(X*y_frequency)*y_amplitude+randn(n)*sqrt(y_noise_variance)
X_test=linspace(0,x_range, 200)
Y_true=sin(X_test)
plot(X_test,Y_true, 'b-')
plot(X,Y, 'ro')
_=legend(['data generating model', 'noisy observations'])
Convert data into Shogun representation, print dimensions to be sure data was passed in correct
labels=RegressionLabels(Y.ravel())
feats_train=RealFeatures(X)
feats_test=RealFeatures(reshape(X_test, (1, len(X_test))))
print feats_train.get_num_vectors()
print feats_train.get_num_features()
print feats_test.get_num_vectors()
print feats_test.get_num_features()
15 1 200 1
Specify a Shogun GP (exact GP-regression) with fixed hyper-parameters and pass it the data
kernel_sigma=1
gp_obs_noise=0.5
kernel=GaussianKernel(10, kernel_sigma)
mean=ZeroMean()
lik=GaussianLikelihood(gp_obs_noise)
inf=ExactInferenceMethod(kernel, feats_train, mean, labels, lik)
gp = GaussianProcessRegression(inf)
Train GP and plot its predictions
_=gp.train()
Perform inference and plot predictions on full range
predictions=gp.apply(feats_test)
Y_test=predictions.get_labels()
plot(X_test,Y_true, 'b')
plot(X_test, Y_test, 'r-')
plot(X,Y, 'ro')
_=legend(['data generating model', 'mean predictions', 'noisy observations'])
So far so good. The nice thing is: we have a distribution over the predictions
mean = gp.get_mean_vector(feats_test)
variance = gp.get_variance_vector(feats_test)
# print 95% confidence region
plot(X_test,Y_true, 'b')
plot(X_test, Y_test, 'r-')
plot(X,Y, 'ro')
error=1.96*sqrt(variance)
fill_between(X_test,mean-error,mean+error,color='grey')
ylim([-y_amplitude,y_amplitude+1])
_=legend(['data generating model', 'mean predictions', 'noisy observations'])
We can even visualise it more fancy
from scipy.stats import norm
means = gp.get_mean_vector(feats_test)
variances = gp.get_variance_vector(feats_test)
y_values=linspace(-y_amplitude-2*y_noise_variance, y_amplitude+2*y_noise_variance)
D=zeros((len(y_values), len(X_test)))
# evaluate normal distribution at every prediction point (column)
for i in range(shape(D)[1]):
norm.pdf(y_values, means[i], variances[i])
D[:,i]=norm.pdf(y_values, means[i], variances[i])
pcolor(X_test,y_values,D)
plot(X_test,Y_true, 'b')
plot(X_test, Y_test, 'r-')
_=plot(X,Y, 'ro')
Now lets learn the best model parameters with Maximum Likelihood II
from shogun.ModelSelection import GradientModelSelection, ModelSelectionParameters, R_LINEAR, R_EXP
from shogun.Regression import GradientCriterion, GradientEvaluation
kernel=GaussianKernel(10, kernel_sigma)
mean=ZeroMean()
lik=GaussianLikelihood(gp_obs_noise)
inf=ExactInferenceMethod(kernel, feats_train, mean, labels, lik)
gp = GaussianProcessRegression(inf)
# construct model selection parameter tree
root=ModelSelectionParameters();
c1=ModelSelectionParameters("inference_method", inf);
root.append_child(c1);
c2=ModelSelectionParameters("likelihood_model", lik);
c1.append_child(c2);
c3=ModelSelectionParameters("sigma");
c2.append_child(c3);
c3.build_values(-1.0, 1.0, R_EXP);
c4=ModelSelectionParameters("scale");
c1.append_child(c4);
c4.build_values(-1.0, 1.0, R_EXP);
c5=ModelSelectionParameters("kernel", kernel);
c1.append_child(c5);
c6=ModelSelectionParameters("width");
c5.append_child(c6);
c6.build_values(-1.0, 1.0, R_EXP);
# Criterion for Gradient Search
crit = GradientCriterion()
# Evaluate our inference method for its derivatives
grad = GradientEvaluation(gp, feats_train, labels, crit)
grad.set_function(inf)
gp.print_modsel_params()
root.print_tree()
# gradient descent on marginal likelihood
grad_search = GradientModelSelection(root, grad)
# Set autolocking to false to get rid of warnings
grad.set_autolock(False)
# Search for best parameters
best_combination = grad_search.select_model(True)
# apply them to gp
best_combination.apply_to_machine(gp)
# training and inference with learned parameters
gp.train()
predictions=gp.apply(feats_test)
Y_test=predictions.get_labels()
# visualise
means = gp.get_mean_vector(feats_test)
variances = gp.get_variance_vector(feats_test)
y_values=linspace(-y_amplitude-2*y_noise_variance, y_amplitude+2*y_noise_variance)
D=zeros((len(y_values), len(X_test)))
# evaluate normal distribution at every prediction point (column)
for i in range(shape(D)[1]):
norm.pdf(y_values, means[i], variances[i])
D[:,i]=norm.pdf(y_values, means[i], variances[i])
pcolor(X_test,y_values,D)
plot(X_test,Y_true, 'b')
plot(X_test, Y_test, 'r-')
_=plot(X,Y, 'ro')
# print best parameters
print "kernel width", kernel.get_width()
print "kernel scalling", inf.get_scale()
print "noise level", lik.get_sigma()
kernel width 3.6679925725 kernel scalling 0.785651526209 noise level 0.5