This notebook visualizes and clusters notes in the original MIDI file (oscar2notes.txt). Later, it goes on to other things with CFG techniques. Note that you do NOT modify the original notes in this notebook (although you might take a subset of those notes).
Basically, this is an example of what you can do with the (using the originals as the example).
Dependencies:
N/A
%matplotlib inline
from collections import Counter, defaultdict
from sklearn.cluster import KMeans
from sklearn.cluster import AffinityPropagation
from itertools import izip
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys, copy, random
# Import and order the original notes.
oscar2 = pd.read_csv('oscar2notes.txt', skiprows=2)[:].sort("Offset") # only first 50 notes
oscar2.index = xrange(1, len(oscar2) + 1)
oscar2 = oscar2[oscar2.Octave >= 4] # threshold >= octave 4 for melodies
with open('oscar2notes.txt', 'rb') as f:
metmark = float(f.readline())
tsig_num, tsig_den = [i for i in f.readline().replace(' /', '').split()]
print "Metrics:"
print metmark, tsig_num, tsig_den, len(oscar2) # len shorter if octave cutoff, also if [:200] in .read_csv
oscar2 = oscar2.rename(columns = {"Note/Rest" : "Note"})
oscar2 = oscar2.reset_index(drop=True)
oscar2["FullNote"] = pd.Series(["%s%s" % (i[0], i[1]) for i in oscar2.itertuples(index=False)])
oscar2.head(20)
Metrics: 176.0 4 4 1078
Note | Octave | Len | Offset | FullNote | |
---|---|---|---|---|---|
0 | D | 5 | 0.750000 | 12.666667 | D5 |
1 | E | 4 | 0.666667 | 14.000000 | E4 |
2 | C# | 5 | 0.875000 | 14.000000 | C#5 |
3 | A | 5 | 0.250000 | 15.000000 | A5 |
4 | F | 4 | 3.125000 | 16.000000 | F4 |
5 | D | 5 | 0.250000 | 16.000000 | D5 |
6 | A | 4 | 3.125000 | 16.000000 | A4 |
7 | F | 5 | 1.333333 | 16.000000 | F5 |
8 | D | 5 | 3.000000 | 16.375000 | D5 |
9 | F | 5 | 1.750000 | 17.625000 | F5 |
10 | G | 4 | 0.666667 | 20.625000 | G4 |
11 | B- | 4 | 0.250000 | 20.666667 | B-4 |
12 | E- | 4 | 0.625000 | 22.000000 | E-4 |
13 | A | 4 | 0.125000 | 22.000000 | A4 |
14 | G | 4 | 0.375000 | 22.000000 | G4 |
15 | B- | 5 | 0.875000 | 23.875000 | B-5 |
16 | F | 4 | 1.250000 | 23.875000 | F4 |
17 | B- | 5 | 1.250000 | 25.500000 | B-5 |
18 | D | 6 | 0.750000 | 28.625000 | D6 |
19 | B | 5 | 1.375000 | 28.625000 | B5 |
20 rows × 5 columns
# Plot the length over offset.
# *args is some (n, 2) array you want to plot
def plotTiming(data, labels=None, clustercenters=None):
numberofitems = len(data)
# generate colors
clusterCodes = dict()
if labels is not None:
for i in labels:
r = lambda: random.randint(0,255)
clusterCodes[i] = ('#%02X%02X%02X' % (r(),r(),r())).lower()
# Initialize the graph
dx = data['Offset']
dy = data['Len']
dn = data['Note']
plt.plot(dx, dy, 'm.--', linewidth=1.5)
for ix, (x, y) in enumerate(zip(dx, dy)):
color = 'ko'
if labels is not None:
color = clusterCodes[labels[ix]]
plt.plot(x, y, 'x', ms=15, mew=1.5, color=color)
continue
plt.plot(x, y, color)
# plot the cluster centers if available
if clustercenters is not None:
for currColorIx, i in enumerate(clustercenters):
cx = i[0]
cy = i[1]
color = clusterCodes[currColorIx]
plt.plot(cx, cy, 'ko', mew=0, ms=7.5) # plot black. same color: color=color
# plot the ticks if under certain # of points
if numberofitems <= 100:
plt.xticks(range(0, int(max(dx)) + 1))
# Annotate with note data only if under certain # of points
# (Otherwise, it gets too messy!)
if numberofitems <= 100 and labels is None:
for note, offset, length in izip(dn, dx, dy):
plt.annotate(note, xy=(offset, length), color='g')
# Set xticks on top
fig = plt.gcf()
ax = plt.gca()
if numberofitems <= 100:
ax.xaxis.set_tick_params(labeltop='on')
# Enter title
if numberofitems <= 100:
plt.text(max(dx) / 2, max(dy) + 1.5, 'Original Notes', fontsize=20, horizontalalignment='center')
else:
plt.title('Oscar\'s Playing', fontsize=20, horizontalalignment='center')
# set fig limits, size, and other display things
plt.ylim([0, max(dy)+ 0.25])
plt.xlim([min(dx) - 1, max(dx) + 1])
plt.ylabel('Duration', fontsize=16)
plt.xlabel('Offset', fontsize=16)
plt.grid()
fig = plt.gcf()
fig.set_size_inches(18, 6)
# plt.xkcd()
ax.xaxis.grid(False)
plotTiming(oscar2)
Next, let's cluster these notes with KMeans. Each cluster will have a different color, and the centroid for each one will be a black circle.
notesX = oscar2["Offset"].reshape(-1, 1)
notesY = oscar2["Len"].reshape(-1, 1)
notesXY = np.concatenate((notesX, notesY), axis=1)
notenames = np.array([i for i in oscar2["Note"]])
# print type(notesY)
km = KMeans(n_clusters=int(np.sqrt(len(notesX) / 2)))
km.fit(notesXY)
plotTiming(oscar2, labels=km.labels_, clustercenters=km.cluster_centers_)
# Given a note, such as C5 or D#7, convert it
# into a note on the keyboard between 0 and 87 inclusive.
# Don't convert it for mingus; try to use music21 note style
# as much as possible for all this stuff.
def quantify(note):
notevals = {
'C' : 0,
'D' : 2,
'E' : 4,
'F' : 5,
'G' : 7,
'A' : 9,
'B' : 11
}
quantized = 0
octave = int(note[-1]) - 1
for i in note[:-1]:
if i in notevals: quantized += notevals[i]
if i == '-': quantized -= 1
if i == '#': quantized += 1
quantized += 12 * octave
return quantized
# Plot the notes as played by MIDI.
def plotMIDI(data, quantizednotes):
# Initialize the graph
dy = data['Offset']
dn = data['Note']
dx = quantizednotes
print len(dx), len(dy)
plt.plot(dx, dy, 'm.--', linewidth=1.5)
for x, y in zip(dx, dy):
plt.plot(x, y, 'ko')
# Plot the ticks
plt.xticks(range(0, 87))
plt.yticks(range(0, int(max(dy)+1)))
# Annotate with note data
for note, nx, ny in izip(dn, dx, dy):
plt.annotate(note, xy=(nx, ny), color='g')
# Set xticks on top
fig = plt.gcf()
ax = plt.gca()
# Set fig limits, size, and other display things
max_xax = 18
max_yax = 12
# max_yax = len(data) / 10
fig = plt.gcf()
fig.set_size_inches(max_xax, max_yax)
plt.ylim([-1, max(dy) + 1])
plt.title('Oscar\'s Playing', fontsize=20, horizontalalignment='center')
plt.ylabel('Offset', fontsize=16)
plt.xlabel('Keyboard', fontsize=16)
plt.grid()
ax.yaxis.grid(False)
quantizednotes = [quantify(i) for i in oscar2['FullNote']]
plotMIDI(oscar2, quantizednotes)
plt.show()
1078 1078
Now that you've visualized Oscar's playing, you'll want to generate and align chords with it next.
## Getting the notes
# print "Label, note's x-coord, note's y-coord, notename "
# for label, note, notename in izip(km.labels_, notesXY, notenames):
# print label, note[0], note[1], notename
# Next goal: for each cluster, find a chords that go well with it.
noteinfo = pd.DataFrame()
noteinfo["Labels"] = km.labels_
noteinfo["Note"] = notenames
noteinfo["X-Coords"] = notesXY[:,0]
noteinfo["Y-Coords"] = notesXY[:,1]
print noteinfo.shape
noteinfo.head()
(1078, 4)
Labels | Note | X-Coords | Y-Coords | |
---|---|---|---|---|
0 | 10 | D | 12.666667 | 0.750000 |
1 | 10 | E | 14.000000 | 0.666667 |
2 | 10 | C# | 14.000000 | 0.875000 |
3 | 10 | A | 15.000000 | 0.250000 |
4 | 10 | F | 16.000000 | 3.125000 |
5 rows × 4 columns
# Example of filtering through for label=14, one cluster.
label14 = noteinfo[noteinfo.Labels == 14]
print label14.shape
label14.head()
(50, 4)
Labels | Note | X-Coords | Y-Coords | |
---|---|---|---|---|
627 | 14 | G | 335.125 | 0.625000 |
628 | 14 | E | 335.125 | 0.375000 |
629 | 14 | F | 335.750 | 1.250000 |
630 | 14 | D | 335.750 | 1.125000 |
631 | 14 | G# | 340.000 | 0.333333 |
5 rows × 4 columns
# To do the next part, you'll need to go elsewhere to extract the chords first. Remember -- you've already
# generated the n-grams decently with random offsets by frequency. You can go back and fix the offsets later
# (i.e if wanted to generate based on cluster, uniform within cluster instead of random offset after
# each note. or even could do n-gram model for offsets)