#Muhammad Hafiz Wan Rosli
#MAT240E-Final I [Time on Pitch & HMM]
#Raag Analysis
#Source: Master of Sitar- 01 Raag Alahya Bilawal (Early Morning Raag).mp3
#Ground Truth:
#Jhala: 00:00 - 00:42
#Gat: 00:42 - 18:46 (1080+46 = 1126 secs)
#Jhala: 18:46 - end (check spectral difference 1 minute before & after)
#Gat-Jhala transition: 18:45 - 18:46
#Code written in python : https://www.python.org/
#Interactive computing ipython : http://ipython.org/
#Audio Analysis & MIR Library : http://essentia.upf.edu/
%pylab inline
import matplotlib as plt
from matplotlib.colors import LogNorm
plt.rcParams['figure.figsize'] = (16,4)
Populating the interactive namespace from numpy and matplotlib
from essentia import *
from essentia.standard import *
#loader = MonoLoader(filename='/Users/muhammadhafiz/Documents/work/ComputationalEthno/samples/RaagAlahyaBilawal0_3minutes.mp3')
loader = MonoLoader(filename='/Users/muhammadhafiz/Music/iTunes/iTunes Media/Music/Ravi Shankar/Master Of Sitar/01 Raag Alahya Bilawal (Early Morning Raag).mp3')
audio = loader()
sampleRate = 44100
audioSnippet = audio[:60*44100] #45
sample_dur_secs = len(audioSnippet)/ float(sampleRate)
win_size=1024
hop = 512
window_start = arange(0, len(audioSnippet), hop)
w = Windowing(type = 'hann')
tonicNICM=TonicIndianArtMusic()
tonic = tonicNICM(audio)
#tonic = tonicNICM(audio[:60*44100])
tonic
142.05638122558594
pitchesConfidence = []
pitches= []
spectrum = Spectrum()
pitchyin = PitchYinFFT()
for frame in FrameGenerator(audioSnippet, frameSize = 1024, hopSize = 512):
detectedPitch, pConfidence = pitchyin(spectrum(w(frame)))
pitches.append(detectedPitch)
pitchesConfidence.append(pConfidence)
max(pitches)
4410.0
groundTruthAlap=42
plot(linspace(0, sample_dur_secs, len(pitches)), pitches)
hlines(tonic, 0, sample_dur_secs, color = 'g')
twinx()
vlines(groundTruthAlap, 0, max(pitches), color = 'g')
twinx()
plot(linspace(0, sample_dur_secs, len(pitchesConfidence)), pitchesConfidence, color='r')#pitch confidence plot
[<matplotlib.lines.Line2D at 0x1005aaad0>]
timeOnPitch = []
pc_count = zeros(int(max(pitches))+ 1) #for the maximum frequency
trackedPitch = pitches#[:1024]
for freq in trackedPitch:
#print int(freq),
pc_count[freq] +=1
argmax(pc_count), max(pc_count) #Hz, time
(284, 1269.0)
pc_count[284]
1269.0
minFreqTimeThreshold = 0.01 * max(pc_count) #Time - 5% of max time
peaks = argwhere(pc_count > minFreqTimeThreshold)
#peaks, pc_count[peaks], int(tonic)
peaks.T
array([[ 0, 141, 142, 143, 179, 190, 213, 214, 215, 216, 226, 227, 238, 239, 268, 270, 272, 275, 277, 279, 280, 282, 284, 286, 288, 317, 319, 321, 324, 352, 355, 358, 361, 364, 367, 373, 376, 380, 383, 424, 428, 432, 436, 441, 445, 479, 484, 490, 537, 544, 551, 565, 572, 580, 621, 630, 1160, 1191, 2756, 2940, 3150]])
vlines(tonic, 0, max(pc_count), color='b', alpha = 0.5, lw= 3)#, linestyles = 'dashed')
hlines(minFreqTimeThreshold, 0, (int(max(pitches))+ 1), color='r')
bar(arange(int(max(pitches))+ 1), pc_count, 0.35)
xlim(100,800)
ylim(0,400)
stem(peaks, pc_count[peaks], linefmt='r--', markerfmt='r.')
<Container object of 3 artists>
probability curve to fit nearby neighboring peaks
local maxima
GUI to interactively move threshold, and find peaks
from scipy.signal import argrelextrema
maxima = argrelextrema(peaks, np.greater)
minima = argrelextrema(peaks, np.less)
print(minima)
(array([], dtype=float64), array([], dtype=float64))
[a1, a2] = maxima
ratioToTonic=[]
for peak in peaks:
ratio= peak/ float(int(tonic))
ratioToTonic.append(ratio)
#peaks
peaks.T, ratioToTonic
(array([[ 0, 141, 142, 143, 179, 190, 213, 214, 215, 216, 226, 227, 238, 239, 268, 270, 272, 275, 277, 279, 280, 282, 284, 286, 288, 317, 319, 321, 324, 352, 355, 358, 361, 364, 367, 373, 376, 380, 383, 424, 428, 432, 436, 441, 445, 479, 484, 490, 537, 544, 551, 565, 572, 580, 621, 630, 1160, 1191, 2756, 2940, 3150]]), [array([ 0.]), array([ 0.99295775]), array([ 1.]), array([ 1.00704225]), array([ 1.26056338]), array([ 1.33802817]), array([ 1.5]), array([ 1.50704225]), array([ 1.51408451]), array([ 1.52112676]), array([ 1.5915493]), array([ 1.59859155]), array([ 1.67605634]), array([ 1.68309859]), array([ 1.88732394]), array([ 1.90140845]), array([ 1.91549296]), array([ 1.93661972]), array([ 1.95070423]), array([ 1.96478873]), array([ 1.97183099]), array([ 1.98591549]), array([ 2.]), array([ 2.01408451]), array([ 2.02816901]), array([ 2.23239437]), array([ 2.24647887]), array([ 2.26056338]), array([ 2.28169014]), array([ 2.47887324]), array([ 2.5]), array([ 2.52112676]), array([ 2.54225352]), array([ 2.56338028]), array([ 2.58450704]), array([ 2.62676056]), array([ 2.64788732]), array([ 2.67605634]), array([ 2.6971831]), array([ 2.98591549]), array([ 3.01408451]), array([ 3.04225352]), array([ 3.07042254]), array([ 3.1056338]), array([ 3.13380282]), array([ 3.37323944]), array([ 3.4084507]), array([ 3.45070423]), array([ 3.78169014]), array([ 3.83098592]), array([ 3.88028169]), array([ 3.97887324]), array([ 4.02816901]), array([ 4.08450704]), array([ 4.37323944]), array([ 4.43661972]), array([ 8.16901408]), array([ 8.38732394]), array([ 19.4084507]), array([ 20.70422535]), array([ 22.18309859])])
def midi2Hz(midinote, tuning=440.0):
return tuning * (2**((midinote - 69)/12.0))
def hz2Midi(frequency, tuning=440.0):
return 69 + 12*(log2(frequency/ tuning))
for pitch in pitches[-100:]:
for peak in peaks[1:]:
if int(pitch) == peak: #round to nearest int = 1 hz apart
#print argwhere(int(pitches) == peak)
###print pitch, pitches.index(pitch)
#print pitches, argwhere(pitches)
#plot(pitch.index(pitches), pitches)
File "<ipython-input-26-cc732dd2a100>", line 7 #plot(pitch.index(pitches), pitches) ^ IndentationError: expected an indented block
distinctSvara = []
distinctSvaraMidi = []
distinctSvaraPos = []
for i, pitch in enumerate(pitches):
for peak in peaks[1:-5]: #remove 0th, and n-1th peak (0, 2940)
if int(pitch) == peak:
###print pitch, i
svaraInMidi = hz2Midi(pitch)#the correlating MIDI note of the found frequencies
distinctSvara.append(int(pitch))#rounded frequencies that are found using timeOnPitch
distinctSvaraPos.append(i)#the positions where distinctSvara is found on the tracked pitch
distinctSvaraMidi.append(int(svaraInMidi))#the notes in MIDI
#plot(i, pitches)
plot(distinctSvaraPos, distinctSvara, color='b', alpha= 0.5)
#twinx()
#plot(distinctSvaraPos, distinctSvaraMidi, color='r', alpha= 0.5)
#ylim(0,500)
#xlim(200, 500)
[<matplotlib.lines.Line2D at 0x129fa5750>]
#plot(linspace(0, sample_dur_secs, len(pitch)), pitch)
#plot(linspace(0, sample_dur_secs, len(distinctSvaraPos)), distinctSvara, 'rx')
plot(pitches)
plot(distinctSvaraPos, distinctSvara, 'r.')
hlines(tonic, 0, len(pitches), color = 'g', lw=3, alpha=0.75)
vlines(groundTruthAlap*44100/hop, 0, max(pitches), color = 'g', lw=3, alpha= 0.75)
ylim(0, peaks[-4]+100)
twinx()
plot(pitchesConfidence, 'k', alpha = 0.5)
#xlim(800, 1000)
[<matplotlib.lines.Line2D at 0x12adc4910>]
changeInDistinctSvara = []
for i, svara in enumerate(distinctSvaraMidi):
#print svara, i
if distinctSvaraMidi[i] != distinctSvaraMidi[i-1]: #remove note sustains, only look for note changes based on timeonpitch notes
changeInDistinctSvara.append(distinctSvaraMidi[i])
# print 'true'
# else:
# print 'false'
#print(changeInDistinctSvara)
#plot(changeInDistinctSvara)
#plot(distinctSvaraPos, distinctSvara)
uniqueMidi = list(set(distinctSvaraMidi))
uniqueMidi
[65, 66, 68, 69, 70, 72, 73, 74, 75, 49, 53, 54, 56, 57, 58, 60, 61, 63]
singleOctave=[]
for eachMidiNote in uniqueMidi:
pitchClass = mod(eachMidiNote, 12)
singleOctave.append(pitchClass)
singleOctave
[5, 6, 8, 9, 10, 0, 1, 2, 3, 1, 5, 6, 8, 9, 10, 0, 1, 3]
singleOctave_changes = []
for eachMidiNote in changeInDistinctSvara:
pitchClass = mod(eachMidiNote, 12)
singleOctave_changes.append(pitchClass)
def transitionMatrix_counter(markovChain, transitionMatrix):
for i in xrange(1, len(markovChain)):
old_state = markovChain[i - 1]
new_state = markovChain[i]
transitionMatrix[old_state, new_state] += 1
transmat = np.zeros((12, 12))#12 by 12 for the chromatic scale
transitionMatrix_counter(singleOctave_changes, transmat)
for i in xrange(0, 12):
#print transmat[i]
transmat[i] = (transmat[i]/ sum(transmat[i]))
for j in xrange(0, 12):
if np.isnan(transmat[i][j]) == True:
transmat[i][j] = 0
#print transmat[i][j]
print transmat[i]
[ 0.04651163 0.6744186 0.04651163 0. 0. 0.02325581 0.06976744 0. 0.04651163 0.04651163 0.04651163 0. ] [ 0.06203474 0.52109181 0.02233251 0.04466501 0. 0.10669975 0.05955335 0. 0.10173697 0.0471464 0.03473945 0. ] [ 0.06666667 0.46666667 0. 0.2 0. 0. 0.06666667 0. 0.2 0. 0. 0. ] [ 0.02272727 0.43181818 0.06818182 0.02272727 0. 0.31818182 0. 0. 0.13636364 0. 0. 0. ] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0.01298701 0.54545455 0. 0.16883117 0. 0.03896104 0.12987013 0. 0.06493506 0.01298701 0.02597403 0. ] [ 0.01886792 0.47169811 0. 0.03773585 0. 0.1509434 0.20754717 0. 0.05660377 0.03773585 0.01886792 0. ] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0.03703704 0.5308642 0.01234568 0.08641975 0. 0.07407407 0.03703704 0. 0.12345679 0.07407407 0.02469136 0. ] [ 0.06666667 0.6 0. 0. 0. 0.03333333 0.06666667 0. 0.2 0. 0.03333333 0. ] [ 0.24 0.4 0. 0. 0. 0.04 0. 0. 0.2 0. 0.12 0. ] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
plt.rcParams['figure.figsize'] = (8,8)
imshow(transmat, interpolation='none', cmap=cm.gray)
show()
imshow(transmat, interpolation='none', cmap=cm.gray)
for rownum, row in enumerate(transmat):
for colnum, val in enumerate(row):
text(colnum, rownum, round(transmat[rownum][colnum], 2), fontsize=10, color='white' if val < transmat.max()/2 else 'black', ha='center', va='center' )
#colorbar()
svaraNames=['Ni', 'Sa', 're', 'Re', 'ga', 'Ga', 'Ma', 'MA', 'Pa', 'dha', 'Dha', 'ni']
xticks(arange(12), svaraNames, fontsize=14)
yticks(arange(12), svaraNames, fontsize=14);
show()