from collections import defaultdict from sklearn.neural_network import BernoulliRBM import pandas as pd import numpy as np import scipy.sparse import random, cPickle # Extract chords into unique ids, e.g. 1, 2, 3, 4, 5 allchords = defaultdict() # remember that it's a hash table with open("oscar2chords_extract.txt", 'rb') as f: for ix, line in enumerate(f): items = line.split() allchords[ix] = items assert len(allchords) == len(set(allchords)) # ensure no duplicate chords # Read in Oscar's data. vectors = [] notedata = pd.read_csv(open("oscar2notes.txt", 'rb'), skiprows=2) allnotes = [] for note, octave in zip(notedata["Note/Rest"], notedata["Octave"]): allnotes.append("%s%s" % (note, octave)) print "Number of notes (# of samples for RBM): ", len(notedata) notedata.head() # Given a MUSIC21 note, such as C5 or D#7, convert it # into a note on the keyboard between 0 and 87 inclusive. # Don't convert it for mingus; try to use music21 note style # as much as possible for all this stuff. def quantify(note): notevals = { 'C' : 0, 'D' : 2, 'E' : 4, 'F' : 5, 'G' : 7, 'A' : 9, 'B' : 11 } quantized = 0 octave = int(note[-1]) - 1 for i in note[:-1]: if i in notevals: quantized += notevals[i] if i == '-': quantized -= 1 if i == '#': quantized += 1 quantized += 12 * octave return quantized # Create bitwise note vectors for use with Restricted Boltzmann Machine. vectors = np.zeros((1, 88)) for ix, note in enumerate(allnotes): vect = np.zeros((1, 88)) vect[0, quantify(note)] = 1 if ix == 0: vectors = vect else: vectors = np.vstack((vectors, vect)) print vectors.shape # Convert mingus note back to music21 note. WORKS def unmingify(note): return note.replace('-','').replace('b','-') # Given a list of mingus notes (i.e. a chord), say ['A-2', 'A-3', 'E-3'], # return a bitwise notevector with possible notes to go along with it. # Takes a chord (i.e. a list of notes) -- not a freqdict # Idea: what if just generate notewise vector with exact same pitches? Indepedence assumption? def genChordNotes(chord): chord = [unmingify(note) for note in chord] # really important to unmingify notes. notevect = np.zeros((1, 88)) # populate with initial pitches for note in chord: notevect[0, quantify(note)] = 1 # add other octaves of initial pitches otheroctaves = range(3, 6) for note in chord: notebase = note[:-1] for octv in otheroctaves: put = bool(random.getrandbits(1)) # randomize other pitches if put is True: translated = "%s%s" % (notebase, octv) notevect[0, quantify(translated)] = 1 # add other relevant notes if time. might have enough chords already # return the vector return notevect # Create initial arrays (1-40, one for each thing) x_train = np.zeros((1, 88)) for chordID, chord in allchords.items(): if chordID == 0: x_train = genChordNotes(chord) else: x_train = np.vstack((x_train, genChordNotes(chord))) y_train = allchords.keys() print "Before adding random data: ", x_train.shape, len(y_train) # create more randomized data for chordID, chord in allchords.items(): for j in xrange(5): x_train = np.vstack((x_train, genChordNotes(chord))) y_train.append(chordID) y_train = np.array(y_train).reshape(-1, ) print "After adding random data: ", x_train.shape, y_train.shape from sklearn.ensemble import RandomForestClassifier rf = RandomForestClassifier() rf.fit(x_train, y_train) # save the classifier to disk for use with 6b. The N-Gram Pipeline, Part II. with open('part7clf.pkl', 'wb') as fid: cPickle.dump(rf, fid) # save the defaultdict (intID : chord) to disk for use with 6b. The N-Gram Pipeline, Part II. with open('part7cdict.pkl', 'wb') as fid: cPickle.dump(allchords, fid)