This notebook visualizes and clusters notes in the original MIDI file (oscar2notes.txt). Later, it goes on to other things with CFG techniques. Note that you do NOT modify the original notes in this notebook (although you might take a subset of those notes).

Basically, this is an example of what you can do with the (using the originals as the example).

Dependencies:

N/A

In [8]:

%matplotlib inline

from collections import Counter, defaultdict
from sklearn.cluster import KMeans
from sklearn.cluster import AffinityPropagation
from itertools import izip
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys, copy, random

In [9]:

# Import and order the original notes.
oscar2 = pd.read_csv('oscar2notes.txt', skiprows=2)[:].sort("Offset") # only first 50 notes
oscar2.index = xrange(1, len(oscar2) + 1)
oscar2 = oscar2[oscar2.Octave >= 4] # threshold >= octave 4 for melodies
with open('oscar2notes.txt', 'rb') as f:
    metmark = float(f.readline())
    tsig_num, tsig_den = [i for i in f.readline().replace(' /', '').split()]
    
print "Metrics:" 
print metmark, tsig_num, tsig_den, len(oscar2) # len shorter if octave cutoff, also if [:200] in .read_csv
oscar2 = oscar2.rename(columns = {"Note/Rest" : "Note"})
oscar2 = oscar2.reset_index(drop=True)
oscar2["FullNote"] = pd.Series(["%s%s" % (i[0], i[1]) for i in oscar2.itertuples(index=False)])
oscar2.head(20)

Metrics:
176.0 4 4 1078

Out[9]:

	Note	Octave	Len	Offset	FullNote
0	D	5	0.750000	12.666667	D5
1	E	4	0.666667	14.000000	E4
2	C#	5	0.875000	14.000000	C#5
3	A	5	0.250000	15.000000	A5
4	F	4	3.125000	16.000000	F4
5	D	5	0.250000	16.000000	D5
6	A	4	3.125000	16.000000	A4
7	F	5	1.333333	16.000000	F5
8	D	5	3.000000	16.375000	D5
9	F	5	1.750000	17.625000	F5
10	G	4	0.666667	20.625000	G4
11	B-	4	0.250000	20.666667	B-4
12	E-	4	0.625000	22.000000	E-4
13	A	4	0.125000	22.000000	A4
14	G	4	0.375000	22.000000	G4
15	B-	5	0.875000	23.875000	B-5
16	F	4	1.250000	23.875000	F4
17	B-	5	1.250000	25.500000	B-5
18	D	6	0.750000	28.625000	D6
19	B	5	1.375000	28.625000	B5

20 rows × 5 columns

In [10]:

# Plot the length over offset.
# *args is some (n, 2) array you want to plot
def plotTiming(data, labels=None, clustercenters=None):
    numberofitems = len(data)
    
    # generate colors
    clusterCodes = dict()
    if labels is not None:
        for i in labels:
            r = lambda: random.randint(0,255)
            clusterCodes[i] = ('#%02X%02X%02X' % (r(),r(),r())).lower()
    
    # Initialize the graph
    dx = data['Offset']
    dy = data['Len']
    dn = data['Note']
    plt.plot(dx, dy, 'm.--', linewidth=1.5)
    for ix, (x, y) in enumerate(zip(dx, dy)):
        color = 'ko'
        if labels is not None:
            color = clusterCodes[labels[ix]]
            plt.plot(x, y, 'x', ms=15, mew=1.5, color=color)
            continue
        plt.plot(x, y, color)

    # plot the cluster centers if available
    if clustercenters is not None:
        for currColorIx, i in enumerate(clustercenters):
            cx = i[0]
            cy = i[1]
            color = clusterCodes[currColorIx]
            plt.plot(cx, cy, 'ko', mew=0, ms=7.5) # plot black. same color: color=color
                
    # plot the ticks if under certain # of points
    if numberofitems <= 100:
        plt.xticks(range(0, int(max(dx)) + 1))

    # Annotate with note data only if under certain # of points
    # (Otherwise, it gets too messy!)
    if numberofitems <= 100 and labels is None:
        for note, offset, length in izip(dn, dx, dy):
            plt.annotate(note, xy=(offset, length), color='g')

    # Set xticks on top
    fig = plt.gcf()
    ax = plt.gca()
    if numberofitems <= 100:
        ax.xaxis.set_tick_params(labeltop='on')

    # Enter title
    if numberofitems <= 100:
        plt.text(max(dx) / 2, max(dy) + 1.5, 'Original Notes', fontsize=20, horizontalalignment='center')
    else:
        plt.title('Oscar\'s Playing', fontsize=20, horizontalalignment='center')
        
    # set fig limits, size, and other display things
    plt.ylim([0, max(dy)+ 0.25])
    plt.xlim([min(dx) - 1, max(dx) + 1])
    plt.ylabel('Duration', fontsize=16)
    plt.xlabel('Offset', fontsize=16)
    plt.grid()
    fig = plt.gcf()
    fig.set_size_inches(18, 6)
    # plt.xkcd()
    ax.xaxis.grid(False)
    
plotTiming(oscar2)

Next, let's cluster these notes with KMeans. Each cluster will have a different color, and the centroid for each one will be a black circle.

In [11]:

notesX = oscar2["Offset"].reshape(-1, 1)
notesY = oscar2["Len"].reshape(-1, 1)
notesXY = np.concatenate((notesX, notesY), axis=1)
notenames = np.array([i for i in oscar2["Note"]])

In [7]:

# print type(notesY)
km = KMeans(n_clusters=int(np.sqrt(len(notesX) / 2)))
km.fit(notesXY)
plotTiming(oscar2, labels=km.labels_, clustercenters=km.cluster_centers_)

In [40]:

# Given a note, such as C5 or D#7, convert it
# into a note on the keyboard between 0 and 87 inclusive.
# Don't convert it for mingus; try to use music21 note style
# as much as possible for all this stuff.
def quantify(note):
    notevals = {
        'C' : 0,
        'D' : 2,
        'E' : 4,
        'F' : 5,
        'G' : 7,
        'A' : 9,
        'B' : 11
    }
    quantized = 0
    octave = int(note[-1]) - 1
    for i in note[:-1]:
        if i in notevals: quantized += notevals[i]
        if i == '-': quantized -= 1
        if i == '#': quantized += 1
    quantized += 12 * octave
    return quantized

# Plot the notes as played by MIDI.
def plotMIDI(data, quantizednotes):
    # Initialize the graph
    dy = data['Offset']
    dn = data['Note']
    dx = quantizednotes
    print len(dx), len(dy)
    plt.plot(dx, dy, 'm.--', linewidth=1.5)
    for x, y in zip(dx, dy):
        plt.plot(x, y, 'ko')

    # Plot the ticks
    plt.xticks(range(0, 87))
    plt.yticks(range(0, int(max(dy)+1)))

    # Annotate with note data
    for note, nx, ny in izip(dn, dx, dy):
        plt.annotate(note, xy=(nx, ny), color='g')

    # Set xticks on top
    fig = plt.gcf()
    ax = plt.gca()

    # Set fig limits, size, and other display things
    max_xax = 18
    max_yax = 12
    # max_yax = len(data) / 10
    fig = plt.gcf()
    fig.set_size_inches(max_xax, max_yax)
    plt.ylim([-1, max(dy) + 1])
    plt.title('Oscar\'s Playing', fontsize=20, horizontalalignment='center')
    plt.ylabel('Offset', fontsize=16)
    plt.xlabel('Keyboard', fontsize=16)
    plt.grid()
    ax.yaxis.grid(False)
    
quantizednotes = [quantify(i) for i in oscar2['FullNote']]
plotMIDI(oscar2, quantizednotes)
plt.show()

1078 1078

Now that you've visualized Oscar's playing, you'll want to generate and align chords with it next.

In [7]:

## Getting the notes
# print "Label, note's x-coord, note's y-coord, notename "
# for label, note, notename in izip(km.labels_, notesXY, notenames):
#     print label, note[0], note[1], notename

In [8]:

# Next goal: for each cluster, find a chords that go well with it.
noteinfo = pd.DataFrame()
noteinfo["Labels"] = km.labels_
noteinfo["Note"] = notenames
noteinfo["X-Coords"] = notesXY[:,0]
noteinfo["Y-Coords"] = notesXY[:,1]
print noteinfo.shape
noteinfo.head()

(1078, 4)

Out[8]:

	Labels	Note	X-Coords	Y-Coords
0	10	D	12.666667	0.750000
1	10	E	14.000000	0.666667
2	10	C#	14.000000	0.875000
3	10	A	15.000000	0.250000
4	10	F	16.000000	3.125000

5 rows × 4 columns

In [9]:

# Example of filtering through for label=14, one cluster.
label14 = noteinfo[noteinfo.Labels == 14]
print label14.shape
label14.head()

(50, 4)

Out[9]:

	Labels	Note	X-Coords	Y-Coords
627	14	G	335.125	0.625000
628	14	E	335.125	0.375000
629	14	F	335.750	1.250000
630	14	D	335.750	1.125000
631	14	G#	340.000	0.333333

5 rows × 4 columns

In [9]:

# To do the next part, you'll need to go elsewhere to extract the chords first. Remember -- you've already
# generated the n-grams decently with random offsets by frequency. You can go back and fix the offsets later
# (i.e if wanted to generate based on cluster, uniform within cluster instead of random offset after
# each note. or even could do n-gram model for offsets)