This notebook aggregates everything you've done so far: generating the notes with the trigram model, extracting and generating relevant chords based on clustering, and playing everything back.
The next step after this is to delve more deeply into the n-gram model by supporting it with more advanced classification and clustering techniques. You'll use deep belief networks for the subcluster classification.
This already assumes the presence of oscar2chords.txt and oscar2notes.txt. In addition, you'll have to leave out a little bit of the functionality (e.g. plotting) because of memory issues.
Step 1: import everything needed for this assignment.
%matplotlib inline
from collections import Counter, defaultdict
from sklearn.cluster import KMeans, Ward, AffinityPropagation
from itertools import izip, izip_longest, groupby
from mingus.midi import fluidsynth
from mingus.containers import NoteContainer
from mingus.containers.Bar import Bar
import mingus.core.notes as notes
import mingus.core.value as value
import music21
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys, re, itertools, random, copy
sys.path.append('C:/Python27/Lib/site-packages')
sys.path.append('/usr/local/lib/python2.7/dist-packages/fluidsynth/')
fluidsynth.init('/usr/share/sounds/sf2/FluidR3_GM.sf2',"alsa")
True
Here, we read in Oscar Peterson's playing from the preprocessed files "oscar2chords.txt" and "oscar2notes.txt" representing the chord and note information in the original MIDI file, respectively. These files were created with the script "oscar.py."
""" Scripts for this part. """
""" 1. Get generated notes based on the trigram model. """
# Iterate over a list in chunks of size n. Return tuples (for dict).
def chunks(iterable, n):
for ix, item in enumerate(iterable):
if ix == len(iterable) - (n-1): return
yield tuple(iterable[ix:ix+n])
# Build the conditional probability tables.
def condProbTables(ngramfreqs, nngramfreqs):
nprobs = defaultdict(int)
prevnngramnexts = defaultdict(list)
for ngram, freq in ngramfreqs.items():
prevnngram = ngram[:-1]
currchar = ngram[-1]
nprobs[(currchar, prevnngram)] = float(ngramfreqs[ngram]) / nngramfreqs[prevnngram]
if prevnngram not in prevnngramnexts.keys():
prevnngramnexts[prevnngram].extend([(currchar, (float(ngramfreqs[ngram]) / nngramfreqs[prevnngram]))])
continue
prevnngramnexts[prevnngram].extend([(currchar, (float(ngramfreqs[ngram]) / nngramfreqs[prevnngram]))])
return nprobs, prevnngramnexts
# Yield the next note for a given n-gram model.
# 'unitsize' is n, i.e. 3 for using trigrams.
# args are the previous notes used to generate the next one.
# Assumes # of args == same # for lookup in prevnnnexts
def yieldNext(prevnnexts, *args):
lookup = tuple([a for a in args])
nexts = np.array(prevnnexts[lookup])
nextnotes = nexts[:,0]
probabilities = nexts[:,1]
# remove possibility of >= 3 notes in row for trigram model
if len(set(args)) == 1: # if prev notes = all same
ixToDel = []
for ix, (note, prob) in enumerate(zip(nextnotes, probabilities)):
if note in args:
ixToDel.append(ix)
nextnotes = np.delete(nextnotes, ixToDel)
probabilities = np.delete(probabilities, ixToDel)
# Also to consider: remove notes in nextnotes if jump from octave 4 to 6 etc.
totalprob = 0; # assert is normalized
for p in probabilities: totalprob += float(p)
if totalprob != 1.0: probabilities = normList(probabilities)
return np.random.choice(nextnotes, p=probabilities)
# Generate k trigrams; default is 100. Change # of trigrams here.
def genTrigrams(prevbigramnexts, k=100):
note1 = "start"
note2 = "start"
note3 = note2
for i in xrange(k):
note3 = yieldNext(prevbigramnexts, note1, note2)
note1 = note2
note2 = note3
yield note3
# Generate k trigrams; default is 100. Change # of trigrams here.
def genQuadgrams(prevtrigramnexts, k=100):
note1 = "start"
note2 = "start"
note3 = "start"
note4 = note3
for i in xrange(k):
note4 = yieldNext(prevtrigramnexts, note1, note2, note3)
note1 = note2
note2 = note3
note3 = note4
yield note4
""" 2. Generate the offsets using simple frequency probabilities. """
# Iterate over iterable in groups of n.
def grouper(n, iterable, fillvalue=None):
for ix, i in enumerate(iterable):
if ix == len(iterable) - 1:
break
yield (iterable[ix], iterable[ix+1])
# Normalize an iterable.
def normList(L, normalizeTo=1):
vMax = 0
for item in L:
vMax += float(item)
return [ float(x)/(vMax*1.0)*normalizeTo for x in L]
# Round to nearest nth of a unit.
def my_round(x, n=4):
return round(x*n)/n
""" 3. Pruning.
For one, go through and make sure you don't get random tiny clusters
of notes + awkward octave jumps. If you have time later, do this dynamically
in generating the n-gram models above.
Assume Oscar doesn't play any repeated notes at his
ridiculously fast tempo (since consequence of n-gram model anyway). """
# Given a MUSIC21 note, such as C5 or D#7, convert it
# into a note on the keyboard between 0 and 87 inclusive.
def quantify(note):
notevals = {
'C' : 0,
'D' : 2,
'E' : 4,
'F' : 5,
'G' : 7,
'A' : 9,
'B' : 11
}
quantized = 0
octave = int(note[-1]) - 1
for i in note[:-1]:
if i in notevals: quantized += notevals[i]
if i == '-': quantized -= 1
if i == '#': quantized += 1
quantized += 12 * octave
return quantized
# iterate through, remove if awkward jumps i.e. c6 b4 g4 e4 f6
def findJumps(generated):
ixJumps = []
for ix, note in enumerate(gennotes):
if ix == len(gennotes) - 2:
break
currOct = note[-1]
nextOct = gennotes[ix+1][-1]
if np.abs(float(currOct) - float(nextOct)) > 1:
ixJumps.append(ix)
return ixJumps
# Find jumps > 1 octave in the generated notes, and change so jump <= 1 oct.
# For example, if have c4 g4 c6, changes g4 to g5.
# Doesn't change original style too much, but solves n-gram problem noted in past literature.
def smoothen(original):
gennotes = copy.deepcopy(original)
ixJumps = findJumps(gennotes)
for i in ixJumps:
if i == len(gennotes) - 1:
break
prevnote = gennotes[i]
nextnote = gennotes[i+1]
prevoct = float(prevnote[-1])
nextoct = float(nextnote[-1])
if prevoct > nextoct:
gennotes[i] = "%s%s" % (prevnote[:-1], int(prevnote[-1]) - 1)
elif prevoct < nextoct:
gennotes[i+1] = "%s%s" % (nextnote[:-1], int(nextnote[-1]) - 1)
return gennotes
# Given the generated notes, removes duplicates
# For example, c4 g5 g5 g5 e5 -> c4 g5 e5.
def rmDuplicates(original):
gennotes = copy.deepcopy(original)
i = 0
while i < len(gennotes) - 1:
if gennotes[i] == gennotes[i+1]:
del gennotes[i]
else:
i += 1
return gennotes
# Given the generated notes, remove isolated notes w/jumps too far apart.
# For example, c6 g4 c6 --> c6 c6. only if adjacent = same octave
# since say c6 g5 c4 could make good sense. (Run rmDup. again after this)
def rmSingles(original):
gennotes = copy.deepcopy(original)
ixToDel = []
i = 0
while i < len(gennotes) - 1:
if i == 0: i+=1; continue
prevnote = gennotes[i-1]
currnote = gennotes[i]
nextnote = gennotes[i+1]
if (prevnote[-1] == nextnote[-1] and np.abs(float(prevnote[-1]) - float(currnote[-1])) > 0):
gennotes.pop(i)
i+=1
return gennotes
# Remove half notes. Experimental use only.
def rmHalfNotes(original):
gennotes = copy.deepcopy(original)
ixToDel = []
length = len(gennotes)
for ix in xrange(length):
if ix == length - 1:
break
if np.abs(quantify(gennotes[ix]) - quantify(gennotes[ix+1])) == 1:
ixToDel.append(ix)
gennotes = [i for ix, i in enumerate(gennotes) if ix not in ixToDel]
return gennotes
# Import and order the original notes.
oscar2 = pd.read_csv('oscar2notes.txt', skiprows=2)[:].sort("Offset")
oscar2.index = xrange(1, len(oscar2) + 1)
oscar2 = oscar2[oscar2.Octave >= 4] # threshold >= octave 4 for melodies
with open('oscar2notes.txt', 'rb') as f:
metmark = float(f.readline())
tsig_num, tsig_den = [i for i in f.readline().replace(' /', '').split()]
oscar2.head()
Note/Rest | Octave | Len | Offset | |
---|---|---|---|---|
2 | D | 5 | 0.750000 | 12.666667 |
3 | E | 4 | 0.666667 | 14.000000 |
4 | C# | 5 | 0.875000 | 14.000000 |
5 | A | 5 | 0.250000 | 15.000000 |
6 | F | 4 | 3.125000 | 16.000000 |
5 rows × 4 columns
Generate the notes with the n-gram model, and write them to disk. Note that we insert a few "start" dummy notes at the beginning to allow for the n-gram model to work with those initial first notes.
""" The script to generate the notes."""
# Iterates over rows, where each element in the iterable is twofold:
# element[0] = the index, element[1] = the note object
possiblenotes = ["%s%s" % (row[1]["Note/Rest"], row[1]["Octave"]) for row in oscar2.iterrows()]
possiblenotes.insert(0, "start")
possiblenotes.insert(0, "start")
possiblenotes.insert(0, "start")
# Get trigram probabilities.
bigramfreqs = defaultdict(int)
for i in chunks(possiblenotes, 2):
bigramfreqs[i] += 1
trigramfreqs = defaultdict(int)
for i in chunks(possiblenotes, 3):
trigramfreqs[i] += 1
quadgramfreqs = defaultdict(int)
for i in chunks(possiblenotes, 4):
quadgramfreqs[i] += 1
# Encode ngram probabilities
triprobs, prevbigramnexts = condProbTables(trigramfreqs, bigramfreqs)
# quadprobs, prevtrigramnexts = condProbTables(quadgramfreqs, trigramfreqs)
""" The offsets. """
offsets = defaultdict(int)
genTuples = grouper(2, [float(i) for i in oscar2["Offset"]])
for j in genTuples:
toCompare = j
diff = float(toCompare[1]) - float(toCompare[0])
diff = my_round(diff)
if diff > 4: continue # can't have gaps > 4
offsets[diff] += 1 # set gaps nicely, only integer gaps.
offset_poss = [k for k in offsets] # possible offsets. need separate for np.random.choice()
offset_probs = [offsets[k] for k in offsets] # probabilities for each of those offset
# prune offsets after normalizing so # possible offsets < 32 for np.random.choice()
# durations: cutoff if over 6
offset_ixToDel = [jx for jx, j in enumerate(offset_probs) if j < 5 and (offset_poss[jx] < 2)]
offset_poss = [i for ix, i in enumerate(offset_poss) if ix not in offset_ixToDel]
offset_probs = [j for jx, j in enumerate(offset_probs) if jx not in offset_ixToDel]
for jx, j in enumerate(offset_poss):
if j <= 0:
del offset_poss[jx]
del offset_probs[jx]
offset_probs = normList(offset_probs)
# Cheap fix since too lazy to debug: generate n-grams, if not right number, redo.
numberofngrams = 200 # Do 300-400 notes for the presentation. 1350 ~= # of oscar's original notes
numberGenerated = 0;
while numberGenerated != numberofngrams: # remove while if decide to rm. duplicates
try:
# gennotes = list(note for note in genQuadgrams(prevtrigramnexts, numberofngrams) if note != "start")
gennotes = list(note for note in genTrigrams(prevbigramnexts, numberofngrams) if note != "start")
genoffsets = list(np.random.choice(offset_poss, p=offset_probs) for i in xrange(len(gennotes)))
except IndexError:
# gennotes = list(note for note in genQuadgrams(prevtrigramnexts, numberofngrams) if note != "start")
gennotes = list(note for note in genTrigrams(prevbigramnexts, numberofngrams) if note != "start")
genoffsets = list(np.random.choice(offset_poss, p=offset_probs) for i in xrange(len(gennotes)))
numberGenerated = len(gennotes)
# Prune. Experiment with which to use, to see how close is to Oscar's style.
# Demonstrating pruning in your presentation!
gennotes = smoothen(gennotes)
gennotes = rmDuplicates(gennotes)
gennotes = rmSingles(gennotes)
gennotes = rmDuplicates(gennotes)
# Assert that you got the right # of notes.
print "# of notes generated after pruning: %s" % len(gennotes)
with open("oscar2ngrams.txt", 'wb') as f:
for note, length in zip(gennotes, genoffsets):
f.write("%s,%s\n" % (note, length))
# of notes generated after pruning: 170