#!/usr/bin/env python # coding: utf-8 # Parametric non Parametric inference # =================== # # Suppose you have a physical model of an output variable, which takes the form of a parametric model. You now want to model the random effects of the data by a non-parametric (better: infinite parametric) model, such as a Gaussian Process as described in [BayesianLinearRegression](../background/BayesianLinearRegression.ipynb). We can do inference in both worlds, the parameteric and infinite parametric one, by extending the features to a mix between # # \begin{align} # p(\mathbf{y}|\boldsymbol{\Phi}, \alpha, \sigma) &= \int p(\mathbf{y}|\boldsymbol{\Phi}, \mathbf{w}, \sigma)p(\mathbf{w}|\alpha) \,\mathrm{d}\mathbf{w}\\ # &= \langle\mathcal{N}(\mathbf{y}|\boldsymbol{\Phi}\mathbf{w}, \sigma^2\mathbf{I})\rangle_{\mathcal{N}(\mathbf{0}, \alpha\mathbf{I})}\\ # &= \mathcal{N}(\mathbf{y}|\mathbf{0}, \alpha\boldsymbol{\Phi}\boldsymbol{\Phi}^\top + \sigma^2\mathbf{I}) # \end{align} # # Thus, we can maximize this marginal likelihood w.r.t. the hyperparameters $\alpha, \sigma$ by log transforming and maximizing: # # \begin{align} # \hat\alpha, \hat\sigma = \mathop{\arg\max}_{\alpha, \sigma}\log p(\mathbf{y}|\boldsymbol{\Phi}, \alpha, \sigma) # \end{align} # # So we will define a mixed inference model mixing parametric and non-parametric models together. One part is described by a paramtric feature space mapping $\boldsymbol{\Phi}\mathbf{w}$ and the other part is a non-parametric function $\mathbf{f}_\text{n}$. For this we define the underlying function $\mathbf{f}$ as # # $$ # \begin{align} # p(\mathbf{f}) &= p\left( # \underbrace{ # \begin{bmatrix} # \delta(t-T)\\ # \boldsymbol{\Phi} # \end{bmatrix} # }_{=:\mathbf{A}} # \left. # \begin{bmatrix} # \mathbf{f}_{\text{n}}\\ # \mathbf{w} # \end{bmatrix} # \right| # \mathbf{0}, # \mathbf{A} # \underbrace{ # \begin{bmatrix} # \mathbf{K}_{\mathbf{f}} & \\ # & \mathbf{K}_{\mathbf{w}} # \end{bmatrix} # }_{=:\boldsymbol{\Sigma}} # \mathbf{A}^\top # \right)\enspace, # \end{align} # $$ # # where $\mathbf{K}_{\mathbf{f}}$ is the covariance describing the non-parametric part $\mathbf{f}_\text{n}\sim\mathcal{N}(\mathbf{0}, \mathbf{K}_\mathbf{f})$ and $\mathbf{K}_{\mathbf{w}}$ is the covariance of the prior over $\mathbf{w}\sim\mathcal{N}(\mathbf{w}|\mathbf{0}, \mathbf{K}_{\mathbf{w}})$. # # Thus we can now predict the different parts and even the paramters $\mathbf{w}$ themselves using (Note: If someone is willing to write down the proper path to this, here a welcome and thank you very much. Thanks to Philipp Hennig for his ideas in this.) # # $$ # \begin{align} # p(\mathbf{f}|\mathbf{y}) &= # \mathcal{N}(\mathbf{f} | # \boldsymbol{\Sigma}\mathbf{A}^\top # \underbrace{ # (\mathbf{A}\boldsymbol{\Sigma}\mathbf{A}^\top + \sigma^2\mathbf{I})^{-1}}_{=:\mathbf{K}^{-1}}\mathbf{y}, \boldsymbol{\Sigma}-\boldsymbol{\Sigma}\mathbf{A}^\top\mathbf{K}^{-1}\mathbf{A}\boldsymbol{\Sigma}) # \\ # p(\mathbf{w}|\mathbf{y}) &= \mathcal{N}(\mathbf{w} | \mathbf{K}_\mathbf{w}\boldsymbol{\Phi}^\top\mathbf{K}^{-1}\mathbf{y}, # \mathbf{K}_{\mathbf{w}}-\mathbf{K}_{\mathbf{w}}\boldsymbol{\Phi}^\top\mathbf{K}^{-1}\boldsymbol{\Phi}\mathbf{K}_{\mathbf{w}})) # \\ # p(\mathbf{f}_\text{n}|\mathbf{y}) &= \mathcal{N}(\mathbf{f}_\text{n}| \mathbf{K}_\mathbf{f}\mathbf{K}^{-1}\mathbf{y}, # \mathbf{K}_{\mathbf{f}}-\mathbf{K}_{\mathbf{f}}\mathbf{K}^{-1}\mathbf{K}_{\mathbf{f}})) # \end{align} # $$ # In[1]: import GPy, numpy as np, pandas as pd from GPy.kern import LinearSlopeBasisFuncKernel, DomainKernel, ChangePointBasisFuncKernel get_ipython().run_line_magic('matplotlib', 'inline') from matplotlib import pyplot as plt # We will create some data with a non-linear function, strongly driven by piecewise linear trends: # In[2]: np.random.seed(12345) # In[3]: x = np.random.uniform(0, 10, 40)[:,None] x.sort(0) starts, stops = np.arange(0, 10, 3), np.arange(1, 11, 3) k_lin = LinearSlopeBasisFuncKernel(1, starts, stops, variance=1., ARD=1) Phi = k_lin.phi(x) _ = plt.plot(x, Phi) # We will assume the prior over $w_i\sim\mathcal{N}(0, 3)$ and a Matern32 structure in the non-parametric part. Additionally, we add a half parametric part, which is a periodic effect only active between $x\in[3,8]$: # In[4]: k = GPy.kern.Matern32(1, .3) Kf = k.K(x) k_per = GPy.kern.PeriodicMatern32(1, variance=100, period=1) k_per.period.fix() k_dom = DomainKernel(1, 1., 5.) k_perdom = k_per * k_dom Kpd = k_perdom.K(x) # In[5]: np.random.seed(1234) alpha = np.random.gamma(3, 1, Phi.shape[1]) w = np.random.normal(0, alpha)[:,None] f_SE = np.random.multivariate_normal(np.zeros(x.shape[0]), Kf)[:, None] f_perdom = np.random.multivariate_normal(np.zeros(x.shape[0]), Kpd)[:, None] f_w = Phi.dot(w) f = f_SE + f_w + f_perdom y = f + np.random.normal(0, .1, f.shape) # In[6]: plt.plot(x, f_w) _ = plt.plot(x, y) # Make sure the function is driven by the linear trend, as there can be a difficulty in identifiability. # With this data, we can fit a model using the basis functions as paramtric part. If you want to implement your own basis function kernel, see GPy.kern._src.basis_funcs.BasisFuncKernel and implement the necessary parts. Usually it is enough to implement the phi(X) method, returning the higher dimensional mapping of inputs X. # In[7]: k = (GPy.kern.Bias(1) + GPy.kern.Matern52(1) + LinearSlopeBasisFuncKernel(1, ARD=1, start=starts, stop=stops, variance=.1, name='linear_slopes') + k_perdom.copy() ) k.randomize() m = GPy.models.GPRegression(x, y, k) # In[8]: m.checkgrad() # In[9]: m.optimize() # In[10]: m.plot() # In[11]: x_pred = np.linspace(0, 10, 500)[:,None] pred_SE, var_SE = m._raw_predict(x_pred, kern=m.kern.Mat52) pred_per, var_per = m._raw_predict(x_pred, kern=m.kern.mul) pred_bias, var_bias = m._raw_predict(x_pred, kern=m.kern.bias) pred_lin, var_lin = m._raw_predict(x_pred, kern=m.kern.linear_slopes) # In[12]: m.plot_f(resolution=500, predict_kw=dict(kern=m.kern.Mat52), plot_data=False) plt.plot(x, f_SE) # In[13]: m.plot_f(resolution=500, predict_kw=dict(kern=m.kern.mul), plot_data=False) plt.plot(x, f_perdom) # In[14]: m.plot_f(resolution=500, predict_kw=dict(kern=m.kern.linear_slopes), plot_data=False) plt.plot(x, f_w) # In[15]: w_pred, w_var = m.kern.linear_slopes.posterior_inf() # In[16]: df = pd.DataFrame(w, columns=['truth'], index=np.arange(Phi.shape[1])) df['mean'] = w_pred df['std'] = np.sqrt(w_var.diagonal()) # In[17]: np.round(df, 2)