#Let's start importing some fun things! from os import listdir, chdir from essentia.standard import AudioLoader cd ~/Desktop/Music/ base_dir = '/Users/adj/Desktop/Music/' music_list = listdir(base_dir) #Let's make a getRMS function that takes a numpy array as an argument and returns its root mean square. def getRMS(x): return sqrt(mean(x**2)) #It will also be useful to make a function to downmix from stereo to mono as well as one to normalize. def downmix(x): if x.ndim == 1: return x #x is already mono else: return sum(x.astype(float), axis=1)/x.ndim music_list[0] #That's not an audio file! Let's get rid of it. music_list = music_list[1:] music_list[0] #That's more like it. Now let's write our normalization function. def normalize(x): abs_max = max(abs(x.min().astype(float)), abs(x.max().astype(float))) return x.astype(float) / abs_max test = music_list[0] loader = AudioLoader(filename = test) audio, sr, nchnls = loader() audio.shape audio_mono = downmix(audio) audio_mono.shape plot(audio_mono) #Doesn't look normalized. audio_mono_normalized = normalize(audio_mono) plot(audio_mono_normalized) #Much better. Let's see what the RMS is. rms = getRMS(audio_mono_normalized) print(rms) x = np.array([1, -2, 3, -4]) rms = getRMS(x) rms == sqrt(0.25 * (1 + 4 + 9 + 16)) #So getRMS is definitely working. Let's implement the windowed version (using the code from class). def windowed_rms(input_sig, win_sizes = [512, 1024, 2048, 4096], hop=None): rms_windows = [] for win_size in win_sizes: if not hop: hop = win_size/2 window_start = arange(0, len(input_sig) - win_size, hop) rms = [] for start in window_start: w = input_sig[start: start+win_size].astype(float) rms_inst = getRMS(w) rms.append(rms_inst) rms_windows.append(rms) return rms_windows, win_sizes analogique = audio_mono_normalized w_rms, win_sizes = windowed_rms(analogique) for rms_plot in w_rms: plot(linspace(0, len(analogique), len(rms_plot)), rms_plot) plot(linspace(0, len(analogique), len(w_rms[-1])), w_rms[-1], lw=1, color='k') #We can see that several periods of absolute silence greatly affected the overall RMS, which was quite low as a result (~0.1) #Let's try a few other files. whiskey = music_list[9] whiskey loader = AudioLoader(filename = whiskey) audio, sr, nchnls = loader() whiskey = downmix(audio) whiskey = normalize(whiskey) plot(whiskey) getRMS(whiskey) w_rms, win_sizes = windowed_rms(whiskey) for rms_plot in w_rms: plot(linspace(0, len(whiskey), len(rms_plot)), rms_plot) plot(linspace(0, len(whiskey), len(w_rms[-1])), w_rms[-1], lw=1, color='k') #Here we can see that the overall RMS was more congruent with the windowed versions, although of course the windowed version gives much more information. #For instance, we can see that the song started off with less energy and then increased. intermezzo = music_list[4] intermezzo loader = AudioLoader(filename = intermezzo) audio, sr, nchnls = loader() intermezzo = downmix(audio) intermezzo = normalize(intermezzo) plot(intermezzo) #Compared to the country music track, this classical track has a lot more dynamic variance. getRMS(intermezzo) w_rms, win_sizes = windowed_rms(intermezzo) for rms_plot in w_rms: plot(linspace(0, len(intermezzo), len(rms_plot)), rms_plot) plot(linspace(0, len(intermezzo), len(w_rms[-1])), w_rms[-1], lw=0.75, color='k') #We can see very clearly from this the climax just before 1.0e7 samples. How many seconds is that? seconds = 0.95 * (10.0 ** 7) / sr minutes = seconds / 60.0 print(minutes) #So there is a climax in volume (at least by RMS) around three-and-a-half minutes into the piece. music_list[-1] night_bird = music_list[-1] loader = AudioLoader(filename = night_bird) audio, sr, nchnls = loader() night_bird = downmix(audio) night_bird = normalize(night_bird) plot(night_bird) getRMS(night_bird) #This number may also be skewed low due to the prolonged quiet parts of the track. #(The recording is of a Cassin's Kingbird near my apartment.) w_rms, win_sizes = windowed_rms(night_bird) for rms_plot in w_rms: plot(linspace(0, len(night_bird), len(rms_plot)), rms_plot) plot(linspace(0, len(night_bird), len(w_rms[-1])), w_rms[-1], lw=0.75, color='k') #Since this is a shorter file, we don't see as much distinction between the different window sizes here. music_list[-20] grains = music_list[-20] loader = AudioLoader(filename = grains) audio, sr, nchnls = loader() grains = downmix(audio) grains = normalize(grains) plot(grains) getRMS(grains) w_rms, win_sizes = windowed_rms(grains) for rms_plot in w_rms: plot(linspace(0, len(grains), len(rms_plot)), rms_plot) plot(linspace(0, len(grains), len(w_rms[-1])), w_rms[-1], lw=0.1, color='k') #This track is an example of granular synthesis so I expected the RMS to be fairly jumpy, as we see from the windowed analysis. total_length = len(music_list) histogram_array = zeros(total_length) counter = 0 for file in music_list: loader = AudioLoader(filename = file) audio, sr, nchnls = loader() audio = downmix(audio) audio = normalize(audio) histogram_array[counter] = getRMS(audio) counter += 1 bar(arange(total_length), histogram_array, 0.05) #We can see that, with a few notable exceptions, most tracks are around 0.1 in total RMS. max(histogram_array) min(histogram_array) argmax(histogram_array), argmin(histogram_array) music_list[191], music_list[136] #correlateWB is a few seconds of white noise at close to unity gain, while the 'short grain' track is full of super short grains separated by silence, so this result makes sense. cd ../Music_by_album/ root_dir = '/Users/adj/Desktop/Music_by_album/' album_list = listdir(root_dir) album_list album_list = album_list[1:] histogram_list = [] for name in album_list: chdir(root_dir + name) counter = 0 songs_in_album = listdir(root_dir + name) songs_in_album = songs_in_album[1:] #the first entry is always '.DS_Store' which is not an audio file total_length = len(songs_in_album) histogram_array = zeros(total_length) for file in songs_in_album: loader = AudioLoader(filename = file) audio, sr, nchnls = loader() audio = downmix(audio) audio = normalize(audio) histogram_array[counter] = getRMS(audio) counter += 1 histogram_list.append(histogram_array) bar(arange(len(histogram_list[0])), histogram_list[0], 0.05) #Familiar Listening 1 (Renaissance/Baroque) bar(arange(len(histogram_list[1])), histogram_list[1], 0.05) #Familiar Listening 2 (Baroque/Classical) bar(arange(len(histogram_list[2])), histogram_list[2], 0.05) #Familiar Listening 3 (Romantic/20th Century) bar(arange(len(histogram_list[3])), histogram_list[3], 0.05) #How the Stars were Made (Percussion quartet) bar(arange(len(histogram_list[4])), histogram_list[4], 0.05) #Unfamiliar Listening 1 (similar to familiar listening albums) bar(arange(len(histogram_list[5])), histogram_list[5], 0.05) #Unfamiliar Listening 2 bar(arange(len(histogram_list[6])), histogram_list[6], 0.05) #Unfamiliar Listening 3 bar(arange(len(histogram_list[7])), histogram_list[7], 0.05) #Unfamiliar Listening 4 bar(arange(len(histogram_list[8])), histogram_list[8], 0.05) #Unknown album (a few random tracks I've recorded) bar(arange(len(histogram_list[9])), histogram_list[9], 0.05) #Whiskey & Me (country) #For some reason, Microsounds got skipped. For completeness, let's get it too. counter = 0 chdir('/Users/adj/Desktop/Music_by_album/Curtis Roads_ Microsounds') songs_in_album = listdir(root_dir + 'Curtis Roads_ Microsounds') songs_in_album = songs_in_album[1:] total_length = len(songs_in_album) histogram_array = zeros(total_length) for file in songs_in_album: loader = AudioLoader(filename = file) audio, sr, nchnls = loader() audio = downmix(audio) audio = normalize(audio) histogram_array[counter] = getRMS(audio) counter += 1 bar(arange(len(songs_in_album)), histogram_array, 0.05) #Microsound (granular synthesis)