In [100]:

#Let's start importing some fun things!
from os import listdir, chdir
from essentia.standard import AudioLoader

In [2]:

cd ~/Desktop/Music/

/Users/adj/Desktop/Music

In [3]:

base_dir = '/Users/adj/Desktop/Music/'
music_list = listdir(base_dir)

In [4]:

#Let's make a getRMS function that takes a numpy array as an argument and returns its root mean square.
def getRMS(x):
    return sqrt(mean(x**2))

In [5]:

#It will also be useful to make a function to downmix from stereo to mono as well as one to normalize.
def downmix(x):
    if x.ndim == 1:
        return x     #x is already mono
    else:
        return sum(x.astype(float), axis=1)/x.ndim

In [6]:

music_list[0]

Out[6]:

'.DS_Store'

In [7]:

#That's not an audio file! Let's get rid of it.
music_list = music_list[1:]
music_list[0]

Out[7]:

'01 Analogique A Et B (3 Excerpts) [1958].m4a'

In [8]:

#That's more like it. Now let's write our normalization function.
def normalize(x):
    abs_max = max(abs(x.min().astype(float)), abs(x.max().astype(float)))
    return x.astype(float) / abs_max

In [12]:

test = music_list[0]
loader = AudioLoader(filename = test)
audio, sr, nchnls = loader()

In [13]:

audio.shape

Out[13]:

(2928576, 2)

In [14]:

audio_mono = downmix(audio)
audio_mono.shape

Out[14]:

(2928576,)

In [15]:

plot(audio_mono)

Out[15]:

[<matplotlib.lines.Line2D at 0x1112aa650>]

In [16]:

#Doesn't look normalized.
audio_mono_normalized = normalize(audio_mono)
plot(audio_mono_normalized)

Out[16]:

[<matplotlib.lines.Line2D at 0x11124ac50>]

In [17]:

#Much better. Let's see what the RMS is.
rms = getRMS(audio_mono_normalized)
print(rms)

0.0998588627015

In [18]:

x = np.array([1, -2, 3, -4])
rms = getRMS(x)
rms == sqrt(0.25 * (1 + 4 + 9 + 16))

Out[18]:

True

In [19]:

#So getRMS is definitely working. Let's implement the windowed version (using the code from class).
def windowed_rms(input_sig, win_sizes = [512, 1024, 2048, 4096], hop=None):
    rms_windows = []
    for win_size in win_sizes:
        if not hop:
            hop = win_size/2 
        window_start = arange(0, len(input_sig) - win_size, hop)
        rms = []
        for start in window_start:
            w = input_sig[start: start+win_size].astype(float)
            rms_inst = getRMS(w)
            rms.append(rms_inst)
        rms_windows.append(rms)
    return rms_windows, win_sizes

In [22]:

analogique = audio_mono_normalized

In [23]:

w_rms, win_sizes = windowed_rms(analogique)

for rms_plot in w_rms:
    plot(linspace(0, len(analogique), len(rms_plot)), rms_plot)

plot(linspace(0, len(analogique), len(w_rms[-1])), w_rms[-1], lw=1, color='k')

Out[23]:

[<matplotlib.lines.Line2D at 0x111291a50>]

In [24]:

#We can see that several periods of absolute silence greatly affected the overall RMS, which was quite low as a result (~0.1)
#Let's try a few other files.
whiskey = music_list[9]
whiskey

Out[24]:

'01 Whiskey & Me.m4a'

In [25]:

loader = AudioLoader(filename = whiskey)
audio, sr, nchnls = loader()

In [26]:

whiskey = downmix(audio)
whiskey = normalize(whiskey)

In [27]:

plot(whiskey)

Out[27]:

[<matplotlib.lines.Line2D at 0x110476b50>]

In [28]:

getRMS(whiskey)

Out[28]:

0.12969081352157372

In [29]:

w_rms, win_sizes = windowed_rms(whiskey)

for rms_plot in w_rms:
    plot(linspace(0, len(whiskey), len(rms_plot)), rms_plot)

plot(linspace(0, len(whiskey), len(w_rms[-1])), w_rms[-1], lw=1, color='k')

Out[29]:

[<matplotlib.lines.Line2D at 0x10fdf7790>]

In [30]:

#Here we can see that the overall RMS was more congruent with the windowed versions, although of course the windowed version gives much more information.

In [31]:

#For instance, we can see that the song started off with less energy and then increased. 
intermezzo = music_list[4]
intermezzo

Out[31]:

'01 Intermezzo in A Major.m4a'

In [32]:

loader = AudioLoader(filename = intermezzo)
audio, sr, nchnls = loader()

In [33]:

intermezzo = downmix(audio)
intermezzo = normalize(intermezzo)
plot(intermezzo)

Out[33]:

[<matplotlib.lines.Line2D at 0x1112ef8d0>]

In [34]:

#Compared to the country music track, this classical track has a lot more dynamic variance.
getRMS(intermezzo)

Out[34]:

0.067854790375200466

In [35]:

w_rms, win_sizes = windowed_rms(intermezzo)

for rms_plot in w_rms:
    plot(linspace(0, len(intermezzo), len(rms_plot)), rms_plot)

plot(linspace(0, len(intermezzo), len(w_rms[-1])), w_rms[-1], lw=0.75, color='k')

Out[35]:

[<matplotlib.lines.Line2D at 0x111503f90>]

In [43]:

#We can see very clearly from this the climax just before 1.0e7 samples. How many seconds is that?
seconds = 0.95 * (10.0 ** 7) / sr
minutes = seconds / 60.0
print(minutes)

3.5903250189

In [45]:

#So there is a climax in volume (at least by RMS) around three-and-a-half minutes into the piece.

In [46]:

music_list[-1]

Out[46]:

'night_bird.mp3'

In [47]:

night_bird = music_list[-1]

In [48]:

loader = AudioLoader(filename = night_bird)
audio, sr, nchnls = loader()

In [49]:

night_bird = downmix(audio)
night_bird = normalize(night_bird)
plot(night_bird)

Out[49]:

[<matplotlib.lines.Line2D at 0x1100150d0>]

In [53]:

getRMS(night_bird)

Out[53]:

0.088171645605949964

In [54]:

#This number may also be skewed low due to the prolonged quiet parts of the track.
#(The recording is of a Cassin's Kingbird near my apartment.)

In [55]:

w_rms, win_sizes = windowed_rms(night_bird)

for rms_plot in w_rms:
    plot(linspace(0, len(night_bird), len(rms_plot)), rms_plot)

plot(linspace(0, len(night_bird), len(w_rms[-1])), w_rms[-1], lw=0.75, color='k')

Out[55]:

[<matplotlib.lines.Line2D at 0x11001d990>]

In [56]:

#Since this is a shorter file, we don't see as much distinction between the different window sizes here.

In [57]:

music_list[-20]

Out[57]:

'52 Constant-Q Filters On Grains, Tsound.m4a'

In [58]:

grains = music_list[-20]

In [59]:

loader = AudioLoader(filename = grains)
audio, sr, nchnls = loader()

In [60]:

grains = downmix(audio)
grains = normalize(grains)
plot(grains)

Out[60]:

[<matplotlib.lines.Line2D at 0x10fecd190>]

In [61]:

getRMS(grains)

Out[61]:

0.075266849366140434

In [64]:

w_rms, win_sizes = windowed_rms(grains)

for rms_plot in w_rms:
    plot(linspace(0, len(grains), len(rms_plot)), rms_plot)

plot(linspace(0, len(grains), len(w_rms[-1])), w_rms[-1], lw=0.1, color='k')

Out[64]:

[<matplotlib.lines.Line2D at 0x10ff558d0>]

In [65]:

#This track is an example of granular synthesis so I expected the RMS to be fairly jumpy, as we see from the windowed analysis.

Extracting RMS from the entire audio collection

In [66]:

total_length = len(music_list)
histogram_array = zeros(total_length)
counter = 0
for file in music_list:
    loader = AudioLoader(filename = file)
    audio, sr, nchnls = loader()
    audio = downmix(audio)
    audio = normalize(audio)
    histogram_array[counter] = getRMS(audio)
    counter += 1

In [67]:

 bar(arange(total_length), histogram_array, 0.05)

Out[67]:

<Container object of 194 artists>

In [68]:

#We can see that, with a few notable exceptions, most tracks are around 0.1 in total RMS.

In [69]:

max(histogram_array)

Out[69]:

0.57736147018902684

In [70]:

min(histogram_array)

Out[70]:

0.0093082953702758649

In [71]:

argmax(histogram_array), argmin(histogram_array)

Out[71]:

(191, 136)

In [72]:

music_list[191], music_list[136]

Out[72]:

('correlateWB.wav', '18 Broadband Cloud Of Short Grains.m4a')

In [73]:

#correlateWB is a few seconds of white noise at close to unity gain, while the 'short grain' track is full of super short grains separated by silence, so this result makes sense.

Grouping by album

In [74]:

cd ../Music_by_album/

/Users/adj/Desktop/Music_by_album

In [75]:

root_dir = '/Users/adj/Desktop/Music_by_album/'
album_list = listdir(root_dir)
    

In [76]:

album_list

Out[76]:

['.DS_Store',
 'Curtis Roads_ Microsounds',
 'Familiar Listening 1',
 'Familiar Listening 2',
 'Familiar Listening 3',
 'How the stars were made',
 'Unfamiliar Listening 1',
 'Unfamiliar Listening 2',
 'Unfamiliar Listening 3',
 'Unfamiliar Listening 4',
 'Unknown Album',
 'Whiskey & Me']

In [78]:

album_list = album_list[1:]

In [102]:

histogram_list = []
for name in album_list:
    chdir(root_dir + name)
    counter = 0
    songs_in_album = listdir(root_dir + name)
    songs_in_album = songs_in_album[1:]     #the first entry is always '.DS_Store' which is not an audio file
    total_length = len(songs_in_album)
    histogram_array = zeros(total_length)
    for file in songs_in_album:
        loader = AudioLoader(filename = file)
        audio, sr, nchnls = loader()
        audio = downmix(audio)
        audio = normalize(audio)
        histogram_array[counter] = getRMS(audio)
        counter += 1
    histogram_list.append(histogram_array)

In [106]:

bar(arange(len(histogram_list[0])), histogram_list[0], 0.05)     #Familiar Listening 1 (Renaissance/Baroque)

Out[106]:

<Container object of 13 artists>

In [107]:

bar(arange(len(histogram_list[1])), histogram_list[1], 0.05)     #Familiar Listening 2 (Baroque/Classical)

Out[107]:

<Container object of 10 artists>

In [108]:

bar(arange(len(histogram_list[2])), histogram_list[2], 0.05)     #Familiar Listening 3 (Romantic/20th Century)

Out[108]:

<Container object of 15 artists>

In [109]:

bar(arange(len(histogram_list[3])), histogram_list[3], 0.05)     #How the Stars were Made (Percussion quartet)

Out[109]:

<Container object of 7 artists>

In [110]:

bar(arange(len(histogram_list[4])), histogram_list[4], 0.05)     #Unfamiliar Listening 1 (similar to familiar listening albums)

Out[110]:

<Container object of 17 artists>

In [111]:

bar(arange(len(histogram_list[5])), histogram_list[5], 0.05)     #Unfamiliar Listening 2

Out[111]:

<Container object of 21 artists>

In [112]:

bar(arange(len(histogram_list[6])), histogram_list[6], 0.05)     #Unfamiliar Listening 3

Out[112]:

<Container object of 16 artists>

In [113]:

bar(arange(len(histogram_list[7])), histogram_list[7], 0.05)     #Unfamiliar Listening 4

Out[113]:

<Container object of 14 artists>

In [115]:

bar(arange(len(histogram_list[8])), histogram_list[8], 0.05)     #Unknown album (a few random tracks I've recorded)

Out[115]:

<Container object of 3 artists>

In [116]:

bar(arange(len(histogram_list[9])), histogram_list[9], 0.05)     #Whiskey & Me (country)

Out[116]:

<Container object of 10 artists>

In [117]:

#For some reason, Microsounds got skipped. For completeness, let's get it too.

In [132]:

counter = 0
chdir('/Users/adj/Desktop/Music_by_album/Curtis Roads_ Microsounds')
songs_in_album = listdir(root_dir + 'Curtis Roads_ Microsounds')
songs_in_album = songs_in_album[1:] 
total_length = len(songs_in_album)
histogram_array = zeros(total_length)
for file in songs_in_album:
    loader = AudioLoader(filename = file)
    audio, sr, nchnls = loader()
    audio = downmix(audio)
    audio = normalize(audio)
    histogram_array[counter] = getRMS(audio)
    counter += 1

In [133]:

bar(arange(len(songs_in_album)), histogram_array, 0.05)     #Microsound (granular synthesis)

Out[133]:

<Container object of 68 artists>

Conclusions

It's hard to draw too many meaningful results from just the bare RMS data itself---the windowed RMS is more descriptive, although some care must be made to choose an appropriate window to produce smooth-ish results. Some differences in genre did appear---in particular, the country album was significantly higher in RMS than most of the classical albums.

In [ ]: