#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy, scipy, matplotlib.pyplot as plt, sklearn, IPython.display, librosa, urllib get_ipython().run_line_magic('matplotlib', 'inline') plt.rcParams['figure.figsize'] = (14,4) # [← Back to Index](index.html) # # Nonnegative Matrix Factorization # [Nonnegative matrix factorization](https://en.wikipedia.org/wiki/Non-negative_matrix_factorization) (NMF) is an algorithm that factorizes a nonnegative matrix, $X$, into a product of two nonnegative matrices, $W$ and $H$. It is an unsupervised iterative algorithm that minimizes a distance between $X$ and the product $WH$: # # $$ \min_{W, H} d(X, WH) $$ # If $X$ has dimensions $M$ by $N$, then $W$ will have dimensions $M$ by $R$, and $H$ will have dimensions $R$ by $N$, where inner dimension $R$ is the *rank* or *number of components* of the decomposition. # When applied to a musical signal, we find that NMF can decompose the signal into separate note events. Therefore, NMF is quite useful and popular for tasks such as transcription and source separation. # # The input, $X$, is often a magnitude spectrogram. In such a case, we find that the columns of $W$ represent spectra of note events, and the rows of $H$ represent temporal envelopes of the same note events. # Download an audio file: # In[2]: filename = 'conga_groove.wav' urllib.urlretrieve('http://audio.musicinformationretrieval.com/' + filename, filename=filename) # Let's load a signal: # In[3]: x, fs = librosa.load(filename) print fs # Play the audio signal: # In[4]: x[0] = 1 # hack to prevent automatic gain scaling IPython.display.Audio(x, rate=fs) # Compute the spectrogram: # In[5]: S = librosa.stft(x) print S.shape # Display the spectrogram: # In[6]: logX = librosa.logamplitude(S) librosa.display.specshow(logX, sr=fs, x_axis='time', y_axis='log') # ## `librosa.decompose.decompose` # We will use [`librosa.decompose.decompose`](http://bmcfee.github.io/librosa/generated/librosa.decompose.decompose.html) to perform our factorization. `librosa` uses [`sklearn.decomposition.NMF`](http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF) by default as its factorization method. # In[7]: X = numpy.absolute(S) n_components = 6 W, H = librosa.decompose.decompose(X, n_components=n_components, sort=True) print W.shape print H.shape # Let's display the spectra: # In[8]: logW = numpy.log10(W) for n in range(n_components): plt.subplot(numpy.ceil(n_components/2.0), 2, n+1) plt.plot(logW[:,n]) plt.ylim(-2, logW.max()) plt.xlim(0, W.shape[0]) plt.ylabel('Component %d' % n) # Let's display the temporal activations: # In[9]: for n in range(n_components): plt.subplot(numpy.ceil(n_components/2.0), 2, n+1) plt.plot(H[n]) plt.ylim(0, H.max()) plt.xlim(0, H.shape[1]) plt.ylabel('Component %d' % n) # Finally, re-create the individual components, and listen to them. To do this, we will reconstruct the magnitude spectrogram from the NMF outputs and use the phase spectrogram from the original signal. # In[10]: reconstructed_signal = scipy.zeros(len(x)) for n in range(n_components): Y = scipy.outer(W[:,n], H[n])*numpy.exp(1j*numpy.angle(S)) y = librosa.istft(Y) reconstructed_signal[:len(y)] += y IPython.display.display( IPython.display.Audio(y, rate=fs) ) # Listen to the reconstructed full mix: # In[11]: IPython.display.Audio(reconstructed_signal, rate=fs) # Listen to the residual: # In[12]: residual = x - reconstructed_signal residual[0] = 1 # hack to prevent automatic gain scaling IPython.display.Audio(residual, rate=fs) # [← Back to Index](index.html)