This notebook visualizes and clusters notes in the original MIDI file (oscar2notes.txt). Later, it goes on to other things with CFG techniques. Note that you do NOT modify the original notes in this notebook (although you might take a subset of those notes).

Basically, this is an example of what you can do with the (using the originals as the example).

In [99]:

%matplotlib inline

from collections import Counter, defaultdict
from sklearn.cluster import KMeans
from sklearn.cluster import AffinityPropagation
from itertools import izip
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys, copy, random

In [100]:

# Import and order the original notes.
oscar2 = pd.read_csv('oscar2notes.txt', skiprows=2)[:].sort("Offset") # only first 50 notes
oscar2.index = xrange(1, len(oscar2) + 1)
oscar2 = oscar2[oscar2.Octave >= 4] # threshold >= octave 4 for melodies
with open('oscar2notes.txt', 'rb') as f:
    metmark = float(f.readline())
    tsig_num, tsig_den = [i for i in f.readline().replace(' /', '').split()]
    
print "Metrics:" 
print metmark, tsig_num, tsig_den, len(oscar2) # len shorter if octave cutoff, also if [:200] in .read_csv
del oscar2["Octave"]
oscar2 = oscar2.rename(columns = {"Note/Rest" : "Note"})
oscar2.head(20)

Metrics:
176.0 4 4 1078

Out[100]:

	Note	Len	Offset
2	D	0.750000	12.666667
3	E	0.666667	14.000000
4	C#	0.875000	14.000000
5	A	0.250000	15.000000
6	F	3.125000	16.000000
7	D	0.250000	16.000000
8	A	3.125000	16.000000
9	F	1.333333	16.000000
10	D	3.000000	16.375000
11	F	1.750000	17.625000
12	G	0.666667	20.625000
13	B-	0.250000	20.666667
14	E-	0.625000	22.000000
15	A	0.125000	22.000000
17	G	0.375000	22.000000
18	B-	0.875000	23.875000
19	F	1.250000	23.875000
20	B-	1.250000	25.500000
21	D	0.750000	28.625000
22	B	1.375000	28.625000

20 rows × 3 columns

In [160]:

# Plot the length over offset.
# *args is some (n, 2) array you want to plot
def plotTiming(data, labels=None, clustercenters=None):
    numberofitems = len(data)
    
    # generate colors
    clusterCodes = dict()
    if labels is not None:
        for i in labels:
            r = lambda: random.randint(0,255)
            clusterCodes[i] = ('#%02X%02X%02X' % (r(),r(),r())).lower()
    
    # Initialize the graph
    dx = data['Offset']
    dy = data['Len']
    dn = data['Note']
    plt.plot(dx, dy, 'm.--', linewidth=1.5)
    for ix, (x, y) in enumerate(zip(dx, dy)):
        color = 'ko'
        if labels is not None:
            color = clusterCodes[labels[ix]]
            plt.plot(x, y, 'x', ms=15, mew=1.5, color=color)
            continue
        plt.plot(x, y, color)

    # plot the cluster centers if available
    if clustercenters is not None:
        for currColorIx, i in enumerate(clustercenters):
            cx = i[0]
            cy = i[1]
            color = clusterCodes[currColorIx]
            plt.plot(cx, cy, 'ko', mew=0, ms=7.5) # plot black. same color: color=color
                
    # plot the ticks if under certain # of points
    if numberofitems <= 100:
        plt.xticks(range(0, int(max(dx)) + 1))

    # Annotate with note data only if under certain # of points
    # (Otherwise, it gets too messy!)
    if numberofitems <= 100 and labels is None:
        for note, offset, length in izip(dn, dx, dy):
            plt.annotate(note, xy=(offset, length), color='g')

    # Set xticks on top
    fig = plt.gcf()
    ax = plt.gca()
    if numberofitems <= 100:
        ax.xaxis.set_tick_params(labeltop='on')

    # Enter title
    if numberofitems <= 100:
        plt.text(max(dx) / 2, max(dy) + 1.5, 'Original Notes', fontsize=20, horizontalalignment='center')
    else:
        plt.title('Original Notes', fontsize=20, horizontalalignment='center')
        
    # set fig limits, size, and other display things
    plt.ylim([0, max(dy)+ 0.25])
    plt.xlim([min(dx) - 1, max(dx) + 1])
    plt.ylabel('Duration', fontsize=16)
    plt.xlabel('Offset', fontsize=16)
    plt.grid()
    fig = plt.gcf()
    fig.set_size_inches(18, 6)
    # plt.xkcd()
    ax.xaxis.grid(False)
    
plotTiming(oscar2)

In [161]:

notesX = oscar2["Offset"].reshape(-1, 1)
notesY = oscar2["Len"].reshape(-1, 1)
notesXY = np.concatenate((notesX, notesY), axis=1)
notenames = np.array([i for i in oscar2["Note"]])

In [162]:

# print type(notesY)
km = KMeans(n_clusters=int(np.sqrt(len(notesX) / 2)))
km.fit(notesXY)
plotTiming(oscar2, labels=km.labels_, clustercenters=km.cluster_centers_)

In [163]:

## Getting the notes
# print "Label, note's x-coord, note's y-coord, notename "
# for label, note, notename in izip(km.labels_, notesXY, notenames):
#     print label, note[0], note[1], notename

In [164]:

# Next goal: for each cluster, find a chord that would go well to be played at its start.
noteinfo = pd.DataFrame()
noteinfo["Labels"] = km.labels_
noteinfo["Note"] = notenames
noteinfo["X-Coords"] = notesXY[:,0]
noteinfo["Y-Coords"] = notesXY[:,1]
print noteinfo.shape
noteinfo.head()

(1078, 4)

Out[164]:

	Labels	Note	X-Coords	Y-Coords
0	5	D	12.666667	0.750000
1	5	E	14.000000	0.666667
2	5	C#	14.000000	0.875000
3	5	A	15.000000	0.250000
4	5	F	16.000000	3.125000

5 rows × 4 columns

In [156]:

label14 = noteinfo[noteinfo.Labels == 14]
print label14.shape
label14

(30, 4)

Out[156]:

	Labels	Note	X-Coords	Y-Coords
0	14	D	12.666667	0.750000
1	14	E	14.000000	0.666667
2	14	C#	14.000000	0.875000
3	14	A	15.000000	0.250000
4	14	F	16.000000	3.125000
5	14	D	16.000000	0.250000
6	14	A	16.000000	3.125000
7	14	F	16.000000	1.333333
8	14	D	16.375000	3.000000
9	14	F	17.625000	1.750000
10	14	G	20.625000	0.666667
11	14	B-	20.666667	0.250000
12	14	E-	22.000000	0.625000
13	14	A	22.000000	0.125000
14	14	G	22.000000	0.375000
15	14	B-	23.875000	0.875000
16	14	F	23.875000	1.250000
17	14	B-	25.500000	1.250000
18	14	D	28.625000	0.750000
19	14	B	28.625000	1.375000
20	14	G#	28.625000	0.250000
21	14	D	28.625000	1.250000
22	14	B-	28.666667	0.125000
23	14	G#	29.250000	0.375000
24	14	C	30.000000	0.666667
25	14	D	30.000000	0.250000
26	14	B-	30.875000	0.250000
27	14	E	31.500000	3.375000
28	14	C	31.500000	3.625000
29	14	C	31.625000	3.500000

30 rows × 4 columns

In [ ]: