#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy, scipy, matplotlib.pyplot as plt, pandas, librosa # # Using Audio in IPython # ## Audio Libraries # We will mainly use three libraries for audio acquisition and playback: # ### 1. IPython.display.Audio # ### 2. librosa # ### 3. essentia.standard # Introduced in IPython 2.0, [`IPython.display.Audio`](http://ipython.org/ipython-doc/stable/api/generated/IPython.display.html#IPython.display.Audio) lets you play audio directly in an IPython notebook. # [`librosa`](http://bmcfee.github.io/librosa/) is a Python package for music and audio processing by [Brian McFee](http://cosmal.ucsd.edu/~bmcfee/). A large portion was ported from [Dan Ellis's Matlab audio processing examples](http://www.ee.columbia.edu/%7Edpwe/resources/matlab/). # # - [Documentation Home](http://bmcfee.github.io/librosa/) # - [Demo: Getting Started](http://nbviewer.ipython.org/github/bmcfee/librosa/blob/master/examples/LibROSA%20demo.ipynb) # - [librosa on Github](https://github.com/bmcfee/librosa/) # [Essentia](http://essentia.upf.edu) is an open-source library for audio analysis and music information retrieval # from the [Music Technology Group at Universitat Pompeu Fabra](http://mtg.upf.edu/home). # Although Essentia is written in C++, we will use the Python bindings for Essentia. # # - [Documentation Home](http://essentia.upf.edu/documentation/) # - [Python Tutorial](http://essentia.upf.edu/documentation/python_tutorial.html) # - [Essentia on GitHub](https://github.com/MTG/essentia) # ## Retrieving Audio # To download a file onto your local machine (or Vagrant box) in Python, you can use `urllib.urlretrieve`: # In[2]: import urllib urllib.urlretrieve( 'http://audio.musicinformationretrieval.com/simpleLoop.wav', filename='simpleLoop.wav' ) # To check that the file downloaded successfully, list the files in the working directory: # In[3]: get_ipython().run_line_magic('ls', '*.wav') # Visit https://ccrma.stanford.edu/workshops/mir2014/audio/ for more audio files. # If you only want to listen to, and not manipulate, a remote audio file, use `IPython.display.Audio` instead. (See [Playing Audio](#Playing-Audio).) # ## Reading Audio # ### `librosa.load` # In[4]: x, fs = librosa.load('simpleLoop.wav') print x.shape print fs # In[5]: plt.plot(x) # ### `essentia.standard.Monoloader` # [`MonoLoader`](http://essentia.upf.edu/documentation/reference/std_MonoLoader.html) reads (and downmixes, if necessary) an audio file into a single channel (as will often be the case during this workshop). `MonoLoader` also resamples the audio to a sampling frequency of your choice (default = 44100 Hz): # In[6]: from essentia.standard import MonoLoader audio = MonoLoader(filename='simpleLoop.wav')() audio.shape # In[7]: N = len(audio) t = numpy.arange(0, N)/44100.0 plt.plot(t, audio) plt.xlabel('Time (seconds)') # For more control over the audio acquisition process, you may want to use [`AudioLoader`](http://essentia.upf.edu/documentation/reference/std_AudioLoader.html) instead. # ## Playing Audio # ### `IPython.display.Audio` # Using [`IPython.display.Audio`](http://ipython.org/ipython-doc/2/api/generated/IPython.lib.display.html#IPython.lib.display.Audio), you can play a local audio file or a remote audio file: # In[8]: from IPython.display import Audio # load a remote WAV file Audio('https://ccrma.stanford.edu/workshops/mir2014/audio/CongaGroove-mono.wav') # In[9]: # load a local WAV file Audio('simpleLoop.wav') # `Audio` can also accept a NumPy array: # In[10]: fs = 44100 # sampling frequency T = 1.5 # seconds t = numpy.linspace(0, T, int(T*fs), endpoint=False) # time variable x = numpy.sin(2*numpy.pi*440*t) # pure sine wave at 440 Hz # load a NumPy array Audio(x, rate=fs) # ### SoX # To play or record audio from the command line, we recommend SoX (included in the `stanford-mir` Vagrant box). # $ rec test.wav # # $ play test.wav # ## Visualizing Audio # `plot` is the simplest way to plot time-domain signals: # In[11]: T = 0.001 # seconds fs = 44100 # sampling frequency t = numpy.linspace(0, T, int(T*fs), endpoint=False) # time variable x = numpy.sin(2*numpy.pi*3000*t) # Plot a sine wave plt.plot(t, x) plt.xlabel('Time (seconds)') # `specgram` is a Matplotlib tool for computing and displaying spectrograms. # In[12]: S, freqs, bins, im = plt.specgram(x, NFFT=1024, Fs=fs, noverlap=512) # Plot a spectrogram plt.xlabel('Time') plt.ylabel('Frequency') # ## Writing Audio # ### `librosa.output.write_wav` # `librosa.output.write_wav` also saves a NumPy array to a WAV file. This is a bit easier to use. # In[13]: noise = 0.1*scipy.randn(44100) # Write an array to a wav file librosa.output.write_wav('noise2.wav', noise, 44100) get_ipython().run_line_magic('ls', '*.wav')