from collections import defaultdict import pandas as pd import numpy as np import scipy.sparse import random, cPickle # Extract chords into unique ids, e.g. 1, 2, 3, 4, 5 allchords = defaultdict() # remember that it's a hash table with open("oscar2chords_extract.txt", 'rb') as f: for ix, line in enumerate(f): items = line.split() allchords[ix] = items assert len(allchords) == len(set(allchords)) # ensure no duplicate chords # Read in Oscar's data. vectors = [] notedata = pd.read_csv(open("oscar2notes.txt", 'rb'), skiprows=2) allnotes = [] for note, octave in zip(notedata["Note/Rest"], notedata["Octave"]): allnotes.append("%s%s" % (note, octave)) print "Number of notes (# of samples for RBM): ", len(notedata) notedata.head() # Generates the altered scale from octaves 3 to 6 for a pitch (e.g. G-) # for a given note (e.g. G-3) in music21 style. # Returns altered scale as list of music21 notes. def genAltered(note='C3'): # In case you have to convert a note (e.g. F#) into form below def convertSharps(note): pitch = ''.join([i for i in note if i.isdigit() is False]) enharmonic = {"C#" : "D-", "D#" : "E-", "E#" : "F", "F#" : "G-", "G#" : "A-", "A#" : "B-", "B#" : "C"} if '#' in pitch: return enharmonic[pitch] return pitch # Get scale with dictionary. For example: allscales[note[:-1]] allscales = { "C" : ["C3", "E-3", "F3", "G3", "B-3", "C4", "E-4", "F4", "G4", "B-4", "C5", "E-5", "F5", "G5", "B-5", "C6", "E-6", "F6", "G6", "B-6"], "D-" : ["D-3", "E3", "G-3", "A-3", "B3", "D-4", "E4", "G-4", "A-4", "B4", "D-5", "E5", "G-5", "A-5", "B5", "D-6", "E6", "G-6", "A-6", "B6"], "D" : ["C3", "D3", "F3", "G3", "A3", "C4", "D4", "F4", "G4", "A4", "C5", "D5", "F5", "G5", "A5", "C6", "D6", "F6", "G6", "A6"], "E-" : ["D-3", "E-3", "G-3", "A-3", "B-3", "D-4", "E-4", "G-4", "A-4", "B-4", "D-5", "E-5", "G-5", "A-5", "B-5", "D-6", "E-6", "G-6", "A-6", "B-6"], "E" : ["D3", "E3", "G3", "A3", "B3", "D4", "E4", "G4", "A4", "B4", "D5", "E5", "G5", "A5", "B5", "D6", "E6", "G6", "A6", "B6"], "F" : ["C3", "E-3", "F3", "A-3", "B-3", "C4", "E-4", "F4", "A-4", "B-4", "C5", "E-5", "F5", "A-5", "B-5", "C6", "E-6", "F6", "A-6", "B-6"], "G-" : ["D-3", "E3", "G-3", "A3", "B3", "D-4", "E4", "G-4", "A4", "B4", "D-5", "E5", "G-5", "A5", "B5", "D-6", "E6", "G-6", "A6", "B6"], "G" : ["C3", "D3", "F3", "G3", "B-3", "C4", "D4", "F4", "G4", "B-4", "C5", "D5", "F5", "G5", "B-5", "C6", "D6", "F6", "G6", "B-6"], "A-" : ["D-3", "E-3", "G-3", "A-3", "B3", "D-4", "E-4", "G-4", "A-4", "B4", "D-5", "E-5", "G-5", "A-5", "B5", "D-6", "E-6", "G-6", "A-6", "B6"], "A" : ["C3", "D3", "E3", "G3", "A3", "C4", "D4", "E4", "G4", "A4", "C5", "D5", "E5", "G5", "A5", "C6", "D6", "E6", "G6", "A6"], "B-" : ["D-3", "E-3", "F3", "A-3", "B-3", "D-4", "E-4", "F4", "A-4", "B-4", "D-5", "E-5", "F5", "A-5", "B-5", "D-6", "E-6", "F6", "A-6", "B-6"], "B" : ["D3", "E3", "G-3", "A3", "B3", "D4", "E4", "G-4", "A4", "B4", "D5", "E5", "G-5", "A5", "B5", "D6", "E6", "G-6", "A6", "B6"]} pitch = ''.join([i for i in note if i.isdigit() is False]) pitch = convertSharps(note) # Rm. octaveinfo, eg. G-5 --> G-, G5->G return allscales[pitch] # Given a MUSIC21 note, such as C5 or D#7, convert it # into a note on the keyboard between 0 and 87 inclusive. # Don't convert it for mingus; try to use music21 note style # as much as possible for all this stuff. def quantify(note): notevals = { 'C' : 0, 'D' : 2, 'E' : 4, 'F' : 5, 'G' : 7, 'A' : 9, 'B' : 11 } quantized = 0 octave = int(note[-1]) - 1 for i in note[:-1]: if i in notevals: quantized += notevals[i] if i == '-': quantized -= 1 if i == '#': quantized += 1 quantized += 12 * octave return quantized # Create bitwise note vectors for use with Restricted Boltzmann Machine. vectors = np.zeros((1, 88)) for ix, note in enumerate(allnotes): vect = np.zeros((1, 88)) vect[0, quantify(note)] = 1 if ix == 0: vectors = vect else: vectors = np.vstack((vectors, vect)) print vectors.shape """ Hard-code altered scales right below for genChordNotes(). """ # Convert mingus note back to music21 note. WORKS def unmingify(note): return note.replace('-','').replace('b','-') # Given a list of mingus notes (i.e. a chord), say ['A-2', 'A-3', 'E-3'], # Takes a chord (i.e. a list of notes) and returns a bitwise notevector with possible notes to go along with it. # Idea: what if just generate notewise vector with exact same pitches? Indepedence assumption? def genChordNotes(chord): chord = [unmingify(note) for note in chord] # really important to unmingify notes. notevect = np.zeros((1, 88)) # populate with initial pitches for note in chord: notevect[0, quantify(note)] = 1 # add initial pitches transposed to other octaves otheroctaves = range(3, 6) for note in chord: notebase = note[:-1] for octv in otheroctaves: put = bool(random.getrandbits(1)) # randomize other pitches if put is True: translated = "%s%s" % (notebase, octv) notevect[0, quantify(translated)] = 1 # Add altered scale that contains most # of notes from chord notes # e.g. if chord = [e5, g5, b5] then want altered scale with as many of # those notes as possible. This lets you expand past simply # the notes already in that chord. Encode the notes of the altered # scale into the bitwise vector as with the initial pitches. # Maybe it works better w/o the altered scales; or maybe instead with pentatonics? try that. # Toggle below to include alternative notes (e.g. pentatonic/altered scales) or not altfreqs = defaultdict(int) for note in chord: for i in genAltered(note): altfreqs[i] += 1 topnotes = [k for k, v in altfreqs.items() if v > 2] # get notes that overlap > 2 times for note in topnotes: # flip bits randomly from this list if bool(random.getrandbits(1)): notevect[0, quantify(note)] = 1 # return the vector return notevect # Create initial arrays (1-40, one for each thing) xdata = np.zeros((1, 88)) for chordID, chord in allchords.items(): if chordID == 0: xdata = genChordNotes(chord) else: xdata = np.vstack((xdata, genChordNotes(chord))) ydata = allchords.keys() print "Before adding random data: ", xdata.shape, len(ydata) # create more randomized data for chordID, chord in allchords.items(): for j in xrange(50): xdata = np.vstack((xdata, genChordNotes(chord))) ydata.append(chordID) ydata = np.array(ydata).reshape(-1, ) print "After adding random data: ", xdata.shape, ydata.shape # make sure you have the right # of chords. check with # of items in "oscarchords" back in (5). from sklearn.svm import SVC from sklearn.grid_search import GridSearchCV from sklearn.cross_validation import train_test_split from sklearn import metrics # Create train, test sets xtrain, xtest, ytrain, ytest = train_test_split(xdata, ydata, test_size=0.2, random_state=50) # Use gridsearch to build the classifier. Change verbose GridSearchCV param to True if want progress on the processing. grid_search = GridSearchCV(estimator=SVC(), param_grid={'kernel' : ('linear', 'rbf'), 'C' : np.linspace(0.1, 5.1, 10)}, n_jobs=-2) # Train the classifier grid_search.fit(xtrain, ytrain) # Evaluate the classifier's effectiveness. print "\nPredictions for sample of n=10: " print "Real values: ", ytest[:20] # verifies you get the class labels, not the problem earlier (only 1-2 of labels) print "Predicted: ", grid_search.predict(xtest[:20]) print metrics.classification_report(ytest, grid_search.predict(xtest)) print "Best parameters: ", grid_search.best_params_ # save the classifier to disk for use with 6b. The N-Gram Pipeline, Part II. with open('part7clf.pkl', 'wb') as fid: cPickle.dump(grid_search, fid) # save the defaultdict (intID : chord) to disk for use with 6b. The N-Gram Pipeline, Part II. with open('part7cdict.pkl', 'wb') as fid: cPickle.dump(allchords, fid)