#!/usr/bin/env python # coding: utf-8 # # Fault detection # # We'll consider a problem of identifying faults that have occurred in a system based on sensor measurements of system performance. # # # Topic references # - [Samar, Sikandar, Dimitry Gorinevsky, and Stephen Boyd. "Likelihood Bounds for Constrained Estimation with Uncertainty." Decision and Control, 2005 and 2005 European Control Conference. CDC-ECC'05. 44th IEEE Conference on. IEEE, 2005.](http://web.stanford.edu/~boyd/papers/pdf/map_bounds.pdf) # # # Problem statement # # Each of $n$ possible faults occurs independently with probability $p$. # The vector $x \in \lbrace 0,1 \rbrace^{n}$ encodes the fault occurrences, with $x_i = 1$ indicating that fault $i$ has occurred. # System performance is measured by $m$ sensors. # The sensor output is # \begin{equation} # y = Ax + v = \sum_{i=1}^n a_i x_i + v, # \end{equation} # where $A \in \mathbf{R}^{m \times n}$ is the sensing matrix with column $a_i$ being the **fault signature** of fault $i$, # and $v \in \mathbf{R}^m$ is a noise vector where $v_j$ is Gaussian with mean 0 and variance $\sigma^2$. # # The objective is to guess $x$ (which faults have occurred) given $y$ (sensor measurements). # # We are interested in the setting where $n > m$, that is, when we have more possible faults than measurements. # In this setting, we can expect a good recovery when the vector $x$ is sparse. # This is the subject of compressed sensing. # # # Solution approach # To identify the faults, one reasonable approach is to choose $x \in \lbrace 0,1 \rbrace^{n}$ to minimize the negative log-likelihood function # # \begin{equation} # \ell(x) = \frac{1}{2 \sigma^2} \|Ax-y\|_2^2 + \log(1/p-1)\mathbf{1}^T x + c. # \end{equation} # # However, this problem is nonconvex and NP-hard, due to the constraint that $x$ must be Boolean. # # To make this problem tractable, we can relax the Boolean constraints and instead constrain $x_i \in [0,1]$. # # The optimization problem # # \begin{array}{ll} # \mbox{minimize} & \|Ax-y\|_2^2 + 2 \sigma^2 \log(1/p-1)\mathbf{1}^T x\\ # \mbox{subject to} & 0 \leq x_i \leq 1, \quad i=1, \ldots n # \end{array} # # is convex. # We'll refer to the solution of the convex problem as the **relaxed ML** estimate. # # By taking the relaxed ML estimate of $x$ and rounding the entries to the nearest of 0 or 1, we recover a Boolean estimate of the fault occurrences. # # # Example # # We'll generate an example with $n = 2000$ possible faults, $m = 200$ measurements, and fault probability $p = 0.01$. # We'll choose $\sigma^2$ so that the signal-to-noise ratio is 5. # That is, # \begin{equation} # \sqrt{\frac{\mathbf{E}\|Ax \|^2_2}{\mathbf{E} \|v\|_2^2}} = 5. # \end{equation} # In[1]: import numpy as np import matplotlib.pyplot as plt np.random.seed(1) n = 2000 m = 200 p = 0.01 snr = 5 sigma = np.sqrt(p*n/(snr**2)) A = np.random.randn(m,n) x_true = (np.random.rand(n) <= p).astype(int) v = sigma*np.random.randn(m) y = A.dot(x_true) + v # Below, we show $x$, $Ax$ and the noise $v$. # In[2]: plt.plot(range(n),x_true) # In[3]: plt.plot(range(m), A.dot(x_true),range(m),v) plt.legend(('Ax','v')) # # Recovery # # We solve the relaxed maximum likelihood problem with CVXPY and then round the result to get a Boolean solution. # In[4]: get_ipython().run_cell_magic('time', '', 'import cvxpy as cp\nx = cp.Variable(shape=n)\ntau = 2*cp.log(1/p - 1)*sigma**2\nobj = cp.Minimize(cp.sum_squares(A*x - y) + tau*cp.sum(x))\nconst = [0 <= x, x <= 1]\ncp.Problem(obj,const).solve(verbose=True)\nprint("final objective value: {}".format(obj.value))\n\n# relaxed ML estimate\nx_rml = np.array(x.value).flatten()\n\n# rounded solution\nx_rnd = (x_rml >= .5).astype(int)\n') # # Evaluation # # We define a function for computing the estimation errors, and a function for plotting $x$, the relaxed ML estimate, and the rounded solutions. # In[5]: import matplotlib def errors(x_true, x, threshold=.5): '''Return estimation errors. Return the true number of faults, the number of false positives, and the number of false negatives. ''' n = len(x_true) k = sum(x_true) false_pos = sum(np.logical_and(x_true < threshold, x >= threshold)) false_neg = sum(np.logical_and(x_true >= threshold, x < threshold)) return (k, false_pos, false_neg) def plotXs(x_true, x_rml, x_rnd, filename=None): '''Plot true, relaxed ML, and rounded solutions.''' matplotlib.rcParams.update({'font.size': 14}) xs = [x_true, x_rml, x_rnd] titles = ['x_true', 'x_rml', 'x_rnd'] n = len(x_true) k = sum(x_true) fig, ax = plt.subplots(1, 3, sharex=True, sharey=True, figsize=(12, 3)) for i,x in enumerate(xs): ax[i].plot(range(n), x) ax[i].set_title(titles[i]) ax[i].set_ylim([0,1]) if filename: fig.savefig(filename, bbox_inches='tight') return errors(x_true, x_rml,.5) # We see that out of 20 actual faults, the rounded solution gives perfect recovery with 0 false negatives and 0 false positives. # In[6]: plotXs(x_true, x_rml, x_rnd, 'fault.pdf')