# Muhammad Hafiz Wan Rosli # Media Arts & Technology # Music Information Retrieval # MAT240E: Homework05 - Audio Feature Extraction III #Analysis of pitch and tempo detection using essentia's built in algorithms #http://essentia.upf.edu/ #Experiments on different methods of segmenting signals based on sudden tempo changes for auto segmentation #Using essentia's BeatTrackerMultiFeature(), RhythmExtractor2013() & OnsetRate() #Plots of detected tatums and onsets #More explorations with texture windows and euclidean distances are done in Homework06 from essentia import * from essentia.standard import * from scipy.signal import decimate import os %pylab inline import matplotlib as mpl mpl.rcParams['figure.figsize'] = (16,4) loader = essentia.standard.MonoLoader(filename = './Music/iTunes/iTunes Media/Music/Ravi Shankar/Master Of Sitar/01 Raag Alahya Bilawal (Early Morning Raag).mp3') audio = loader() sampleRate = 44100 audioSnippet = audio[0*44100:60*44100] sample_dur_secs = len(audioSnippet)/ float(sampleRate) win_size=1024 hop = 512 window_start = arange(0, len(audioSnippet), hop) w = Windowing(type = 'hann') pitchconfs = [] f0= [] spectrum = Spectrum() pitchyin = PitchYinFFT() for frame in FrameGenerator(audioSnippet, frameSize = 1024, hopSize = 512): pitch, pconfidence = pitchyin(spectrum(w(frame))) f0.append(pitch) pitchconfs.append(pconfidence) plot(linspace(0, sample_dur_secs, len(f0)), f0) tonicICM = TonicIndianArtMusic() tonic = tonicICM(audioSnippet) tonic beatTrack = BeatTrackerMultiFeature() rhythmExtractor = RhythmExtractor2013() #for frame in FrameGenerator(audio[0: 10*44100], frameSize = 1024, hopSize = 512): bpm, ticks, confidence, estimates, bpmintervals = rhythmExtractor(audioSnippet) #ticks, tConfidence = beatTrack(audioSnippet) #ticks.append(tick) #tickConfs.append(tConfidence) plot(linspace(0, sample_dur_secs, len(estimates)), estimates) twinx() plot(linspace(0, sample_dur_secs, len(bpmintervals)), bpmintervals, color = 'r') confidence plot(linspace(0, sample_dur_secs, len(audioSnippet)), audioSnippet) #plot(linspace(0, sample_dur_secs, len(audioSnippet)), audioSnippet) for tick in ticks: axvline(tick, linewidth=1, color='r') distanceBetweenTicks=[] for i in ticks: distance = abs(ticks[i] - ticks[i-1]) distanceBetweenTicks.append(distance) plot(linspace(0, sample_dur_secs, len(audioSnippet)), audioSnippet) #for tick in ticks: # axvline(tick, linewidth=1, color='r') plot(linspace(0, sample_dur_secs, len(distanceBetweenTicks)),distanceBetweenTicks, color = 'g') axvline((argmax(distanceBetweenTicks[:]) / float(len(distanceBetweenTicks))) * sample_dur_secs, linewidth=3, color='k', alpha = 0.5) #xlim(10,30) onsetDetector = OnsetRate() onsets, onsetRate = onsetDetector(audioSnippet) plot(linspace(0, sample_dur_secs, len(audioSnippet)), audioSnippet, color = 'b') #plot(linspace(0, sample_dur_secs, len(audioSnippet)), audioSnippet) for onset in onsets: axvline(onset, linewidth=1, color = 'g') for tick in ticks: axvline(tick, linewidth=1, color = 'r') #xlim(40,60) #ylim(1,-1) def onsetVStick(input_sig): sampleRate = 44100 sample_dur_secs = len(input_sig)/ float(sampleRate) beatTrack = BeatTrackerMultiFeature() rhythmExtractor = RhythmExtractor2013() onsetDetector = OnsetRate() bpm, ticks, confidence, estimates, bpmintervals = rhythmExtractor(input_sig) onsets, onsetRate = onsetDetector(input_sig) plot(linspace(0, sample_dur_secs, len(input_sig)), input_sig, color = 'k') for onset in onsets: axvline(onset, linewidth=1, color='r', alpha = '0.7') for tick in ticks: axvline(tick, linewidth=1, color='w', alpha = '0.9') rShankar = [] for root, dirs, files in os.walk('./Music/iTunes/iTunes Media/Music/Ravi Shankar/Master Of Sitar'): for name in files: loader = essentia.standard.MonoLoader(filename = os.path.join(root, name)) rShankar.append(loader()) #detected onsets vs detected ticks onsetVStick(rShankar[0][0: 30*sampleRate]) onsetVStick(rShankar[1][30*sampleRate: 60*sampleRate]) onsetVStick(rShankar[2][60*sampleRate: 90*sampleRate]) onsetVStick(rShankar[3][90*sampleRate: 120*sampleRate])