In [2]:

#Muhammad Hafiz Wan Rosli
#MAT240E-Final I [Time on Pitch & HMM]
#Raag Analysis
#Source: Master of Sitar- 01 Raag Alahya Bilawal (Early Morning Raag).mp3
#Ground Truth:
#Jhala: 00:00 - 00:42
#Gat:   00:42 - 18:46 (1080+46 = 1126 secs)
#Jhala: 18:46 - end (check spectral difference 1 minute before & after)
#Gat-Jhala transition: 18:45 - 18:46


#Code written in python        : https://www.python.org/
#Interactive computing ipython : http://ipython.org/
#Audio Analysis & MIR Library  : http://essentia.upf.edu/

In [3]:

%pylab inline
import matplotlib as plt
from matplotlib.colors import LogNorm
plt.rcParams['figure.figsize'] = (16,4)

Populating the interactive namespace from numpy and matplotlib

In [4]:

from essentia import *
from essentia.standard import *

In [5]:

#loader = MonoLoader(filename='/Users/muhammadhafiz/Documents/work/ComputationalEthno/samples/RaagAlahyaBilawal0_3minutes.mp3')
loader = MonoLoader(filename='/Users/muhammadhafiz/Music/iTunes/iTunes Media/Music/Ravi Shankar/Master Of Sitar/01 Raag Alahya Bilawal (Early Morning Raag).mp3')
audio = loader()

In [6]:

sampleRate = 44100
audioSnippet = audio[:60*44100] #45
sample_dur_secs = len(audioSnippet)/ float(sampleRate)
win_size=1024
hop = 512
window_start = arange(0, len(audioSnippet), hop)
w = Windowing(type = 'hann')

Find the tonic - essentia's TonicIndianArtMusic() algorithm: http://mtg.upf.edu/system/files/publications/SalamonTonicID_ISMIR12.pdf ¶

In [7]:

tonicNICM=TonicIndianArtMusic()
tonic = tonicNICM(audio)
#tonic = tonicNICM(audio[:60*44100])

In [8]:

tonic

Out[8]:

142.05638122558594

Pitch tracking - essentia's PitchYinFFT() algorithm: https://qmro.qmul.ac.uk/jspui/bitstream/123456789/3809/1/BROSSIERAutomaticAnnotation2006.pdf ¶

In [9]:

pitchesConfidence = []
pitches= []

spectrum = Spectrum()
pitchyin = PitchYinFFT()

for frame in FrameGenerator(audioSnippet, frameSize = 1024, hopSize = 512):
    detectedPitch, pConfidence = pitchyin(spectrum(w(frame)))
    pitches.append(detectedPitch)
    pitchesConfidence.append(pConfidence)

In [10]:

max(pitches)

Out[10]:

4410.0

In [11]:

groundTruthAlap=42
plot(linspace(0, sample_dur_secs, len(pitches)), pitches)
hlines(tonic, 0, sample_dur_secs, color = 'g')
twinx()
vlines(groundTruthAlap, 0, max(pitches), color = 'g')
twinx()
plot(linspace(0, sample_dur_secs, len(pitchesConfidence)), pitchesConfidence, color='r')#pitch confidence plot

Out[11]:

[<matplotlib.lines.Line2D at 0x1005aaad0>]

Time on Pitch histogram¶

In [12]:

timeOnPitch = []
pc_count = zeros(int(max(pitches))+ 1) #for the maximum frequency
trackedPitch = pitches#[:1024]
for freq in trackedPitch:
      #print int(freq),
    pc_count[freq] +=1

In [13]:

argmax(pc_count), max(pc_count) #Hz, time

Out[13]:

(284, 1269.0)

In [14]:

pc_count[284]

Out[14]:

1269.0

In [15]:

minFreqTimeThreshold = 0.01 * max(pc_count) #Time - 5% of max time
peaks = argwhere(pc_count > minFreqTimeThreshold)
#peaks, pc_count[peaks], int(tonic)
peaks.T

Out[15]:

array([[   0,  141,  142,  143,  179,  190,  213,  214,  215,  216,  226,
         227,  238,  239,  268,  270,  272,  275,  277,  279,  280,  282,
         284,  286,  288,  317,  319,  321,  324,  352,  355,  358,  361,
         364,  367,  373,  376,  380,  383,  424,  428,  432,  436,  441,
         445,  479,  484,  490,  537,  544,  551,  565,  572,  580,  621,
         630, 1160, 1191, 2756, 2940, 3150]])

In [16]:

vlines(tonic, 0, max(pc_count), color='b', alpha = 0.5, lw= 3)#, linestyles = 'dashed')
hlines(minFreqTimeThreshold, 0, (int(max(pitches))+ 1), color='r')
bar(arange(int(max(pitches))+ 1), pc_count, 0.35)
xlim(100,800)
ylim(0,400)
stem(peaks, pc_count[peaks], linefmt='r--', markerfmt='r.')

Out[16]:

<Container object of 3 artists>

probability curve to fit nearby neighboring peaks
local maxima
GUI to interactively move threshold, and find peaks

In [17]:

from scipy.signal import argrelextrema
maxima = argrelextrema(peaks, np.greater)
minima = argrelextrema(peaks, np.less)

In [18]:

print(minima)

(array([], dtype=float64), array([], dtype=float64))

In [19]:

[a1, a2] = maxima

In [20]:

ratioToTonic=[]
for peak in peaks:
    ratio= peak/ float(int(tonic))
    ratioToTonic.append(ratio)

In [21]:

#peaks

In [21]:

peaks.T, ratioToTonic

Out[21]:

(array([[   0,  141,  142,  143,  179,  190,  213,  214,  215,  216,  226,
         227,  238,  239,  268,  270,  272,  275,  277,  279,  280,  282,
         284,  286,  288,  317,  319,  321,  324,  352,  355,  358,  361,
         364,  367,  373,  376,  380,  383,  424,  428,  432,  436,  441,
         445,  479,  484,  490,  537,  544,  551,  565,  572,  580,  621,
         630, 1160, 1191, 2756, 2940, 3150]]),
 [array([ 0.]),
  array([ 0.99295775]),
  array([ 1.]),
  array([ 1.00704225]),
  array([ 1.26056338]),
  array([ 1.33802817]),
  array([ 1.5]),
  array([ 1.50704225]),
  array([ 1.51408451]),
  array([ 1.52112676]),
  array([ 1.5915493]),
  array([ 1.59859155]),
  array([ 1.67605634]),
  array([ 1.68309859]),
  array([ 1.88732394]),
  array([ 1.90140845]),
  array([ 1.91549296]),
  array([ 1.93661972]),
  array([ 1.95070423]),
  array([ 1.96478873]),
  array([ 1.97183099]),
  array([ 1.98591549]),
  array([ 2.]),
  array([ 2.01408451]),
  array([ 2.02816901]),
  array([ 2.23239437]),
  array([ 2.24647887]),
  array([ 2.26056338]),
  array([ 2.28169014]),
  array([ 2.47887324]),
  array([ 2.5]),
  array([ 2.52112676]),
  array([ 2.54225352]),
  array([ 2.56338028]),
  array([ 2.58450704]),
  array([ 2.62676056]),
  array([ 2.64788732]),
  array([ 2.67605634]),
  array([ 2.6971831]),
  array([ 2.98591549]),
  array([ 3.01408451]),
  array([ 3.04225352]),
  array([ 3.07042254]),
  array([ 3.1056338]),
  array([ 3.13380282]),
  array([ 3.37323944]),
  array([ 3.4084507]),
  array([ 3.45070423]),
  array([ 3.78169014]),
  array([ 3.83098592]),
  array([ 3.88028169]),
  array([ 3.97887324]),
  array([ 4.02816901]),
  array([ 4.08450704]),
  array([ 4.37323944]),
  array([ 4.43661972]),
  array([ 8.16901408]),
  array([ 8.38732394]),
  array([ 19.4084507]),
  array([ 20.70422535]),
  array([ 22.18309859])])

In [22]:

def midi2Hz(midinote, tuning=440.0):
    return tuning * (2**((midinote - 69)/12.0))

In [23]:

def hz2Midi(frequency, tuning=440.0):
    return 69 + 12*(log2(frequency/ tuning))

In [26]:

for pitch in pitches[-100:]:
    for peak in peaks[1:]:
        if int(pitch) == peak: #round to nearest int = 1 hz apart
        #print argwhere(int(pitches) == peak)
            ###print pitch, pitches.index(pitch)
            #print pitches, argwhere(pitches)
            #plot(pitch.index(pitches), pitches)

  File "<ipython-input-26-cc732dd2a100>", line 7
    #plot(pitch.index(pitches), pitches)
                                        ^
IndentationError: expected an indented block

In [27]:

distinctSvara = []
distinctSvaraMidi = []
distinctSvaraPos = []
for i, pitch in enumerate(pitches):
    for peak in peaks[1:-5]: #remove 0th, and n-1th peak (0, 2940)
        if int(pitch) == peak:
            ###print pitch, i
            svaraInMidi = hz2Midi(pitch)#the correlating MIDI note of the found frequencies
            distinctSvara.append(int(pitch))#rounded frequencies that are found using timeOnPitch
            distinctSvaraPos.append(i)#the positions where distinctSvara is found on the tracked pitch
            distinctSvaraMidi.append(int(svaraInMidi))#the notes in MIDI
            #plot(i, pitches)

In [28]:

plot(distinctSvaraPos, distinctSvara, color='b', alpha= 0.5)
#twinx()
#plot(distinctSvaraPos, distinctSvaraMidi, color='r', alpha= 0.5)
#ylim(0,500)
#xlim(200, 500)

Out[28]:

[<matplotlib.lines.Line2D at 0x129fa5750>]

In [29]:

#plot(linspace(0, sample_dur_secs, len(pitch)), pitch)
#plot(linspace(0, sample_dur_secs, len(distinctSvaraPos)), distinctSvara, 'rx')
plot(pitches)
plot(distinctSvaraPos, distinctSvara, 'r.')
hlines(tonic, 0, len(pitches), color = 'g', lw=3, alpha=0.75)
vlines(groundTruthAlap*44100/hop, 0, max(pitches), color = 'g', lw=3, alpha= 0.75)
ylim(0, peaks[-4]+100)
twinx()
plot(pitchesConfidence, 'k', alpha = 0.5)
#xlim(800, 1000)

Out[29]:

[<matplotlib.lines.Line2D at 0x12adc4910>]

Remove sustains, and only look at note changes for transition patterns¶

In [30]:

changeInDistinctSvara = []
for i, svara in enumerate(distinctSvaraMidi):
    #print svara, i
    if distinctSvaraMidi[i] != distinctSvaraMidi[i-1]: #remove note sustains, only look for note changes based on timeonpitch notes
        changeInDistinctSvara.append(distinctSvaraMidi[i])
#        print 'true'
#    else:
#        print 'false'

In [31]:

#print(changeInDistinctSvara)
#plot(changeInDistinctSvara)
#plot(distinctSvaraPos, distinctSvara)

In [32]:

uniqueMidi = list(set(distinctSvaraMidi))
uniqueMidi

Out[32]:

[65, 66, 68, 69, 70, 72, 73, 74, 75, 49, 53, 54, 56, 57, 58, 60, 61, 63]

In [33]:

singleOctave=[]
for eachMidiNote in uniqueMidi:
    pitchClass = mod(eachMidiNote, 12)
    singleOctave.append(pitchClass)

In [34]:

singleOctave

Out[34]:

[5, 6, 8, 9, 10, 0, 1, 2, 3, 1, 5, 6, 8, 9, 10, 0, 1, 3]

In [35]:

singleOctave_changes = []
for eachMidiNote in changeInDistinctSvara:
    pitchClass = mod(eachMidiNote, 12)
    singleOctave_changes.append(pitchClass)

HMM Transition Matrix¶

In [36]:

def transitionMatrix_counter(markovChain, transitionMatrix):
    for i in xrange(1, len(markovChain)):
        old_state = markovChain[i - 1]
        new_state = markovChain[i]
        transitionMatrix[old_state, new_state] += 1

In [37]:

transmat = np.zeros((12, 12))#12 by 12 for the chromatic scale
transitionMatrix_counter(singleOctave_changes, transmat)

In [39]:

for i in xrange(0, 12):
    #print transmat[i]
    transmat[i] = (transmat[i]/ sum(transmat[i]))
    for j in xrange(0, 12):
        if np.isnan(transmat[i][j]) == True:
            transmat[i][j] = 0
            #print transmat[i][j]
    print transmat[i]

[ 0.04651163  0.6744186   0.04651163  0.          0.          0.02325581
  0.06976744  0.          0.04651163  0.04651163  0.04651163  0.        ]
[ 0.06203474  0.52109181  0.02233251  0.04466501  0.          0.10669975
  0.05955335  0.          0.10173697  0.0471464   0.03473945  0.        ]
[ 0.06666667  0.46666667  0.          0.2         0.          0.
  0.06666667  0.          0.2         0.          0.          0.        ]
[ 0.02272727  0.43181818  0.06818182  0.02272727  0.          0.31818182
  0.          0.          0.13636364  0.          0.          0.        ]
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[ 0.01298701  0.54545455  0.          0.16883117  0.          0.03896104
  0.12987013  0.          0.06493506  0.01298701  0.02597403  0.        ]
[ 0.01886792  0.47169811  0.          0.03773585  0.          0.1509434
  0.20754717  0.          0.05660377  0.03773585  0.01886792  0.        ]
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[ 0.03703704  0.5308642   0.01234568  0.08641975  0.          0.07407407
  0.03703704  0.          0.12345679  0.07407407  0.02469136  0.        ]
[ 0.06666667  0.6         0.          0.          0.          0.03333333
  0.06666667  0.          0.2         0.          0.03333333  0.        ]
[ 0.24  0.4   0.    0.    0.    0.04  0.    0.    0.2   0.    0.12  0.  ]
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]

In [40]:

plt.rcParams['figure.figsize'] = (8,8)
imshow(transmat, interpolation='none', cmap=cm.gray)
show()

In [41]:

imshow(transmat, interpolation='none', cmap=cm.gray)
for rownum, row in enumerate(transmat):
    for colnum, val in enumerate(row):
        text(colnum, rownum, round(transmat[rownum][colnum], 2), fontsize=10, color='white' if val < transmat.max()/2 else 'black', ha='center', va='center' )
#colorbar()
svaraNames=['Ni', 'Sa', 're', 'Re', 'ga', 'Ga', 'Ma', 'MA', 'Pa', 'dha', 'Dha', 'ni']
xticks(arange(12), svaraNames, fontsize=14)
yticks(arange(12), svaraNames, fontsize=14);
show()

Find the tonic - essentia's TonicIndianArtMusic() algorithm: http://mtg.upf.edu/system/files/publications/SalamonTonicID_ISMIR12.pdf¶

Pitch tracking - essentia's PitchYinFFT() algorithm: https://qmro.qmul.ac.uk/jspui/bitstream/123456789/3809/1/BROSSIERAutomaticAnnotation2006.pdf¶

Time on Pitch histogram¶

Remove sustains, and only look at note changes for transition patterns¶

HMM Transition Matrix¶

Find the tonic - essentia's TonicIndianArtMusic() algorithm: http://mtg.upf.edu/system/files/publications/SalamonTonicID_ISMIR12.pdf ¶

Pitch tracking - essentia's PitchYinFFT() algorithm: https://qmro.qmul.ac.uk/jspui/bitstream/123456789/3809/1/BROSSIERAutomaticAnnotation2006.pdf ¶