This notebook visualizes and clusters notes in the original MIDI file (oscar2notes.txt). Later, it goes on to other things with CFG techniques. Note that you do NOT modify the original notes in this notebook (although you might take a subset of those notes).
Basically, this is an example of what you can do with the (using the originals as the example).
%matplotlib inline
from collections import Counter, defaultdict
from sklearn.cluster import KMeans
from sklearn.cluster import AffinityPropagation
from itertools import izip
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys, copy, random
# Import and order the original notes.
oscar2 = pd.read_csv('oscar2notes.txt', skiprows=2)[:].sort("Offset") # only first 50 notes
oscar2.index = xrange(1, len(oscar2) + 1)
oscar2 = oscar2[oscar2.Octave >= 4] # threshold >= octave 4 for melodies
with open('oscar2notes.txt', 'rb') as f:
metmark = float(f.readline())
tsig_num, tsig_den = [i for i in f.readline().replace(' /', '').split()]
print "Metrics:"
print metmark, tsig_num, tsig_den, len(oscar2) # len shorter if octave cutoff, also if [:200] in .read_csv
del oscar2["Octave"]
oscar2 = oscar2.rename(columns = {"Note/Rest" : "Note"})
oscar2.head(20)
Metrics: 176.0 4 4 1078
Note | Len | Offset | |
---|---|---|---|
2 | D | 0.750000 | 12.666667 |
3 | E | 0.666667 | 14.000000 |
4 | C# | 0.875000 | 14.000000 |
5 | A | 0.250000 | 15.000000 |
6 | F | 3.125000 | 16.000000 |
7 | D | 0.250000 | 16.000000 |
8 | A | 3.125000 | 16.000000 |
9 | F | 1.333333 | 16.000000 |
10 | D | 3.000000 | 16.375000 |
11 | F | 1.750000 | 17.625000 |
12 | G | 0.666667 | 20.625000 |
13 | B- | 0.250000 | 20.666667 |
14 | E- | 0.625000 | 22.000000 |
15 | A | 0.125000 | 22.000000 |
17 | G | 0.375000 | 22.000000 |
18 | B- | 0.875000 | 23.875000 |
19 | F | 1.250000 | 23.875000 |
20 | B- | 1.250000 | 25.500000 |
21 | D | 0.750000 | 28.625000 |
22 | B | 1.375000 | 28.625000 |
20 rows × 3 columns
# Plot the length over offset.
# *args is some (n, 2) array you want to plot
def plotTiming(data, labels=None, clustercenters=None):
numberofitems = len(data)
# generate colors
clusterCodes = dict()
if labels is not None:
for i in labels:
r = lambda: random.randint(0,255)
clusterCodes[i] = ('#%02X%02X%02X' % (r(),r(),r())).lower()
# Initialize the graph
dx = data['Offset']
dy = data['Len']
dn = data['Note']
plt.plot(dx, dy, 'm.--', linewidth=1.5)
for ix, (x, y) in enumerate(zip(dx, dy)):
color = 'ko'
if labels is not None:
color = clusterCodes[labels[ix]]
plt.plot(x, y, 'x', ms=15, mew=1.5, color=color)
continue
plt.plot(x, y, color)
# plot the cluster centers if available
if clustercenters is not None:
for currColorIx, i in enumerate(clustercenters):
cx = i[0]
cy = i[1]
color = clusterCodes[currColorIx]
plt.plot(cx, cy, 'ko', mew=0, ms=7.5) # plot black. same color: color=color
# plot the ticks if under certain # of points
if numberofitems <= 100:
plt.xticks(range(0, int(max(dx)) + 1))
# Annotate with note data only if under certain # of points
# (Otherwise, it gets too messy!)
if numberofitems <= 100 and labels is None:
for note, offset, length in izip(dn, dx, dy):
plt.annotate(note, xy=(offset, length), color='g')
# Set xticks on top
fig = plt.gcf()
ax = plt.gca()
if numberofitems <= 100:
ax.xaxis.set_tick_params(labeltop='on')
# Enter title
if numberofitems <= 100:
plt.text(max(dx) / 2, max(dy) + 1.5, 'Original Notes', fontsize=20, horizontalalignment='center')
else:
plt.title('Original Notes', fontsize=20, horizontalalignment='center')
# set fig limits, size, and other display things
plt.ylim([0, max(dy)+ 0.25])
plt.xlim([min(dx) - 1, max(dx) + 1])
plt.ylabel('Duration', fontsize=16)
plt.xlabel('Offset', fontsize=16)
plt.grid()
fig = plt.gcf()
fig.set_size_inches(18, 6)
# plt.xkcd()
ax.xaxis.grid(False)
plotTiming(oscar2)
notesX = oscar2["Offset"].reshape(-1, 1)
notesY = oscar2["Len"].reshape(-1, 1)
notesXY = np.concatenate((notesX, notesY), axis=1)
notenames = np.array([i for i in oscar2["Note"]])
# print type(notesY)
km = KMeans(n_clusters=int(np.sqrt(len(notesX) / 2)))
km.fit(notesXY)
plotTiming(oscar2, labels=km.labels_, clustercenters=km.cluster_centers_)
## Getting the notes
# print "Label, note's x-coord, note's y-coord, notename "
# for label, note, notename in izip(km.labels_, notesXY, notenames):
# print label, note[0], note[1], notename
# Next goal: for each cluster, find a chord that would go well to be played at its start.
noteinfo = pd.DataFrame()
noteinfo["Labels"] = km.labels_
noteinfo["Note"] = notenames
noteinfo["X-Coords"] = notesXY[:,0]
noteinfo["Y-Coords"] = notesXY[:,1]
print noteinfo.shape
noteinfo.head()
(1078, 4)
Labels | Note | X-Coords | Y-Coords | |
---|---|---|---|---|
0 | 5 | D | 12.666667 | 0.750000 |
1 | 5 | E | 14.000000 | 0.666667 |
2 | 5 | C# | 14.000000 | 0.875000 |
3 | 5 | A | 15.000000 | 0.250000 |
4 | 5 | F | 16.000000 | 3.125000 |
5 rows × 4 columns
label14 = noteinfo[noteinfo.Labels == 14]
print label14.shape
label14
(30, 4)
Labels | Note | X-Coords | Y-Coords | |
---|---|---|---|---|
0 | 14 | D | 12.666667 | 0.750000 |
1 | 14 | E | 14.000000 | 0.666667 |
2 | 14 | C# | 14.000000 | 0.875000 |
3 | 14 | A | 15.000000 | 0.250000 |
4 | 14 | F | 16.000000 | 3.125000 |
5 | 14 | D | 16.000000 | 0.250000 |
6 | 14 | A | 16.000000 | 3.125000 |
7 | 14 | F | 16.000000 | 1.333333 |
8 | 14 | D | 16.375000 | 3.000000 |
9 | 14 | F | 17.625000 | 1.750000 |
10 | 14 | G | 20.625000 | 0.666667 |
11 | 14 | B- | 20.666667 | 0.250000 |
12 | 14 | E- | 22.000000 | 0.625000 |
13 | 14 | A | 22.000000 | 0.125000 |
14 | 14 | G | 22.000000 | 0.375000 |
15 | 14 | B- | 23.875000 | 0.875000 |
16 | 14 | F | 23.875000 | 1.250000 |
17 | 14 | B- | 25.500000 | 1.250000 |
18 | 14 | D | 28.625000 | 0.750000 |
19 | 14 | B | 28.625000 | 1.375000 |
20 | 14 | G# | 28.625000 | 0.250000 |
21 | 14 | D | 28.625000 | 1.250000 |
22 | 14 | B- | 28.666667 | 0.125000 |
23 | 14 | G# | 29.250000 | 0.375000 |
24 | 14 | C | 30.000000 | 0.666667 |
25 | 14 | D | 30.000000 | 0.250000 |
26 | 14 | B- | 30.875000 | 0.250000 |
27 | 14 | E | 31.500000 | 3.375000 |
28 | 14 | C | 31.500000 | 3.625000 |
29 | 14 | C | 31.625000 | 3.500000 |
30 rows × 4 columns