import csv import string rcParams['figure.figsize'] = (20, 5) Now that we've gathered the data using the 'rms_and_metadata' script, let's read the CSV and make some plots Let's see which genres, on average, have the highest and lowest RMS: with open('rms_and_metadata.txt', 'rb') as csvfile: #determine csv format and read the data dialect = csv.Sniffer().sniff(csvfile.read(1024)) csvfile.seek(0) reader = csv.reader(csvfile, dialect) #create container for data data = dict() for row in reader: if row[2] not in data: #add a new genre data[row[2]] = [] data[row[2]].append(row[0]) #append an rms value output = [] for genre in data: rms_sum = 0 rms_count = len(data[genre]) for rms in data[genre]: rms_sum += float(rms) data[genre] = rms_sum / rms_count output.append((genre, data[genre])) output.sort(key = lambda x: x[1], reverse=True) #print output highest = output[:10] #print highest #print '~~~~~~~~~~~~~~' lowest = output[-11:-1] #print lowest pyplot.figure(1) #pyplot.subplot(211) pyplot.bar(range(len(highest)), [pair[1] for pair in highest], align='center') pyplot.xticks(range(len(highest)), [pair[0] for pair in highest]) pyplot.title('Highest average RMS, by genre') pyplot.ylim(ymax=15000) pyplot.xticks(rotation='vertical') pyplot.figure(2) pyplot.bar(range(len(lowest)), [pair[1] for pair in lowest], align='center') pyplot.xticks(range(len(lowest)), [pair[0] for pair in lowest]) pyplot.title('Lowest average RMS, by genre') pyplot.ylim(ymax=15000) pyplot.xticks(rotation='vertical') pyplot.show() Now we'll examine 5 individual songs and analyze them with windowed RMS, starting with a window of 500ms. I chose Bob Dylan's "The Times They Are A-Changin'", Parliament's "P-Funk", a song I wrote and recorded, "Falling Elevator Blues" (I wanted to see how my mixing compared to these other tracks), James Blake's "The Wilhelm Scream", and MGMT's "Kids". These songs were chosen to try to cover a range of genres and time periods. import os from os.path import join, getsize from pydub import AudioSegment rcParams['figure.figsize'] = (20, 10) window_size = 500 for root, dirs, files in os.walk('/Users/ogc/Documents/ucsb/mat/240e/media'): if 'CVS' in dirs: dirs.remove('CVS') # don't visit CVS directories #print sum(getsize(join(root, name)) for name in files), for name in files: if name.endswith(('.wav', '.mp3', '.m4a')): try: #load the file filename = join(root, name) songname, filetype = name.split('.') audio = AudioSegment.from_file(filename, filetype) rms = [] #get windowed RMS duration_ms = len(audio) begin = 0 end = window_size hop_factor = 1 hop_size = window_size / hop_factor while end < duration_ms: window = audio[begin:end] rms.append(window.rms) begin += hop_size end += hop_size pyplot.plot(rms, label=songname) except: continue #pyplot.xlim(xmax=1500) pyplot.ylim(ymax=25000) pyplot.title('RMS - Window Size: {0} ms'.format(window_size)) pyplot.legend() pyplot.show() Now with a window of 100ms: window_size = 100 for root, dirs, files in os.walk('/Users/ogc/Documents/ucsb/mat/240e/media'): if 'CVS' in dirs: dirs.remove('CVS') # don't visit CVS directories #print sum(getsize(join(root, name)) for name in files), for name in files: if name.endswith(('.wav', '.mp3', '.m4a')): try: #load the file filename = join(root, name) songname, filetype = name.split('.') audio = AudioSegment.from_file(filename, filetype) rms = [] #get windowed RMS duration_ms = len(audio) begin = 0 end = window_size hop_factor = 1 hop_size = window_size / hop_factor while end < duration_ms: window = audio[begin:end] rms.append(window.rms) begin += hop_size end += hop_size pyplot.plot(rms, label=songname) except: continue #pyplot.xlim(xmax=1500) pyplot.ylim(ymax=25000) pyplot.title('RMS - Window Size: {0}'.format(window_size)) pyplot.legend() pyplot.show() 10ms: window_size = 10 for root, dirs, files in os.walk('/Users/ogc/Documents/ucsb/mat/240e/media'): if 'CVS' in dirs: dirs.remove('CVS') # don't visit CVS directories #print sum(getsize(join(root, name)) for name in files), for name in files: if name.endswith(('.wav', '.mp3', '.m4a')): try: #load the file filename = join(root, name) songname, filetype = name.split('.') audio = AudioSegment.from_file(filename, filetype) rms = [] #get windowed RMS duration_ms = len(audio) begin = 0 end = window_size hop_factor = 1 hop_size = window_size / hop_factor while end < duration_ms: window = audio[begin:end] rms.append(window.rms) begin += hop_size end += hop_size pyplot.plot(rms, label=songname) except: continue #pyplot.xlim(xmax=1500) pyplot.ylim(ymax=25000) pyplot.title('RMS - Window Size: {0}'.format(window_size)) pyplot.legend() pyplot.show() These plots seem to indicate that taking the RMS over a very short time period makes the measurement more susceptible to being skewed by brief spikes in amplitude. On the other hand, taking the RMS over too long a time period results in less information with respect to changes in time. Despite the differences in the plots due to differing window sizes, certain properties of the recordings are fairly easy to discern. As I expected from a pop anthem, the MGMT track was generally very 'loud', but did exhibit quite a bit of dynamic range. The James Blake tune, very much an electronic composition, surprised me with it's vast dynamic range, though I expected it to have a relatively high RMS in general. These properties correspond to 'good' modern mixing, high dynamic range and 'punchy'. I expected the earlier recordings, the Bob Dylan and Parliament tracks, to be quite a bit different from the James Blake and MGMT songs due to earlier mixing styles and technologies. Indeed, based on the RMS these recordings appear to be much quieter in general and have less dynamic range. It appears that my mixing style on "Falling Elevator Blues" resulted in RMS values much more closely resembling the older tracks, which is also something I expected to observe because I haven't quite figured out how to make my tracks sound 'loud' like most modern popular music tends to. Partly due to the increased datapoint (and therefore line) density in the plots with the shorter window sizes, but also due to the fact that RMS becomes less meaningful when the window size is too large or too small, I think the longer window sizes above give a better visual comparison of the songs overall. However, the shorter window sizes do give a better indication of the dynamic range.