from os import listdir, chdir
from essentia.standard import AudioLoader

cd ~/Desktop/Music/

base_dir = '/Users/adj/Desktop/Music/'
music_list = listdir(base_dir)

def getRMS(x):
    return sqrt(mean(x**2))

def downmix(x):
    if x.ndim == 1:
        return x     #x is already mono
    else:
        return sum(x.astype(float), axis=1)/x.ndim

music_list = music_list[1:]

def normalize(x):
    abs_max = max(abs(x.min().astype(float)), abs(x.max().astype(float)))
    return x.astype(float) / abs_max

def windowed_rms(input_sig, win_sizes = [512, 1024, 2048, 4096], hop=None):
    rms_windows = []
    for win_size in win_sizes:
        if not hop:
            hop = win_size/2 
        window_start = arange(0, len(input_sig) - win_size, hop)
        rms = []
        for start in window_start:
            w = input_sig[start: start+win_size].astype(float)
            rms_inst = getRMS(w)
            rms.append(rms_inst)
        rms_windows.append(rms)
    return rms_windows, win_sizes

polonaise = music_list[16]

polonaise

july = music_list[12]

july

campanella = music_list[22]

campanella

seashore = music_list[21]

seashore

filtering = music_list[-10]

filtering

loader = AudioLoader(filename = polonaise)
song, sr, nchnls = loader()

polonaise = downmix(song)
polonaise = normalize(polonaise)
plot(polonaise)
    

loader = AudioLoader(filename = campanella)
song, sr, nchnls = loader()
campanella = downmix(song)
campanella = normalize(campanella)
plot(campanella)

loader = AudioLoader(filename = july)
song, sr, nchnls = loader()
july = downmix(song)
july = normalize(july)
plot(july)

loader = AudioLoader(filename = seashore)
song, sr, nchnls = loader()
seashore = downmix(song)
seashore = normalize(seashore)
plot(seashore)

loader = AudioLoader(filename = filtering)
song, sr, nchnls = loader()
filtering = downmix(song)
filtering = normalize(filtering)
plot(filtering)

music = [polonaise, campanella, july, seashore, filtering]

for song in music:
    
    L = 0.1
    win_len = sr * L
    
    w_rms, win_lens = windowed_rms(song, win_sizes=[win_len])
    
    sample_dur_secs = len(song)/float(sr)
    time = linspace(0, sample_dur_secs, len(w_rms[0]))
    
    tc = sum(w_rms[0]*time)/sum(w_rms[0])
    print tc, "Temporal Centroid"
    plot(linspace(0, sample_dur_secs, len(song)), song, alpha=0.5)
    vlines(tc, -40000, 40000, lw=4)
    grid()
    show()

#We can see that, despite the diversity of the genres (a couple classical solo piano pieces, a 21st century percussion quartet,
#a country song, and a sample of human speech from 'Microsound', most of the temporal centroids are very close to the center of the track.
#Probably the most interesting result was for the second one, 'La Campanella', which has such a dramatically loud and flamboyant
#ending and such a quiet and timid beginning that its centroid is skewed toward the end a bit more than usual.

def windowed_zcr(sig_in, winsize, hop, sr = 1.0):
    l = len(sig_in)
    win_start = arange(0, l - winsize, hop)
    zcr = zeros((len(win_start)))
    for i, start in enumerate(win_start):
        sl = sig_in[start: start + winsize].astype(float)
        zcr[i] = (sr/float(winsize)) * sum(sl[:-1]*sl[1:] < 0)

    times = win_start + winsize/2
    return times/float(sr), zcr

for song in music:   
    times, zcr = windowed_zcr(song, 0.1 * sr, 0.05 * sr, sr)
    plot(times, zcr)
    show()

#These are all quite distinct! The first two, both being solo piano classical pieces, seem to bear the closest resemblance, although
#we see that the second piece ('La Campanella') encompasses a broader range, up to almost 6000. The country song goes all the way up
#to 18000, which isn't so easy for me to understand---perhaps there is some noise that includes high frequencies. It also seems
#to be the most time-varying. We see that the percussion piece overall has some lower values, perhaps because many of the sounds
#are inharmonic, complex tones rather than pitches. I'm not sure how to interpret the last track ('Sonogram filtering'), which is
#actually human speech being modified.


Pxx, freqs, times, im = specgram(music[0]);

X = sqrt(Pxx)

centroid = []
for spec in X.T:
    sc = sum(spec*freqs)/sum(spec)
    centroid.append(sc)
    
spread = []
for spec in X.T:
    ss = var(spec)
    spread.append(ss)

p1 = plot(spread)
twinx()
p2 = plot(centroid, 'g')

legend((p1 + p2), ('spread', 'centroid'), loc='best')
grid()

Pxx, freqs, times, im = specgram(music[1]);

X = sqrt(Pxx)

centroid = []
for spec in X.T:
    sc = sum(spec*freqs)/sum(spec)
    centroid.append(sc)
    
spread = []
for spec in X.T:
    ss = var(spec)
    spread.append(ss)

p1 = plot(spread)
twinx()
p2 = plot(centroid, 'g')

legend((p1 + p2), ('spread', 'centroid'), loc='best')
grid()

#Even though these two (the 'Military Polonaise' and 'La Campanella' are very similar genres, both being 
#solo piano Romantic pieces, the spread/centroid seems quite different! In the second, 'La Campanella',
#for the most part the spread value is under the centroid value, whereas in the Polonaise, it is above 
#equally often. 'La Campanella' is full of some very high-pitched (and probably bright in timbre) ostinato
#notes, representing the bells, whereas the Polonaise overall is a bit darker and in a lower register,
#so perhaps this is the reason.

Pxx, freqs, times, im = specgram(music[-1]);     #the filtered human speech

Pxx2, freqs2, times2, im2 = specgram(music[-2]);     #the percussion quartet

X = sqrt(Pxx)

entropy = []
for spec in X.T:
    bands = spec / sum(spec, axis=0)
    entropy.append(- sum(bands*log(bands)))
    
plot(entropy)

figure()
X = sqrt(Pxx2)

entropy = []
for spec in X.T:
    bands = spec / sum(spec, axis=0)
    entropy.append(- sum(bands*log2(bands)))

plot(entropy)

#I would have expected a much more stark difference! Not really sure how to interpret the results...

Pxx, freqs, times, im = specgram(music[2], NFFT=2048, noverlap=1024);     #the country music track

Pxx2, freqs2, times2, im2 = specgram(music[-1], NFFT=2048, noverlap=1024);     #the human speech again

cutoff=0.85
rolloff = []

X = sqrt(Pxx)

for spec in X.T:
    where_greater = find(cumsum(spec) >= cutoff*sum(spec))
    rf = where_greater[0]/float(len(spec))
    rolloff.append(rf)
    
plot(rolloff)
    
figure()

X = sqrt(Pxx2)

for spec in X.T:
    where_greater = find(cumsum(spec) >= cutoff*sum(spec))
    rf = where_greater[0]/float(len(spec))
    rolloff.append(rf)

plot(rolloff)


#I had expected maybe the results to be somewhat similar since both tracks have vocals, so at least this isn't
#too surprising.