from collections import defaultdict
from sklearn.neural_network import BernoulliRBM
import pandas as pd
import numpy as np
import scipy.sparse
import random, cPickle

# Extract chords into unique ids, e.g. 1, 2, 3, 4, 5
allchords = defaultdict() # remember that it's a hash table
with open("oscar2chords_extract.txt", 'rb') as f:
    for ix, line in enumerate(f):
        items = line.split()
        allchords[ix] = items
assert len(allchords) == len(set(allchords)) # ensure no duplicate chords

# Read in Oscar's data.
vectors = []
notedata = pd.read_csv(open("oscar2notes.txt", 'rb'), skiprows=2)
allnotes = []
for note, octave in zip(notedata["Note/Rest"], notedata["Octave"]):
    allnotes.append("%s%s" % (note, octave))

print "Number of notes (# of samples for RBM): ", len(notedata)
notedata.head()

# Given a MUSIC21 note, such as C5 or D#7, convert it
# into a note on the keyboard between 0 and 87 inclusive.
# Don't convert it for mingus; try to use music21 note style
# as much as possible for all this stuff.
def quantify(note):
    notevals = {
        'C' : 0,
        'D' : 2,
        'E' : 4,
        'F' : 5,
        'G' : 7,
        'A' : 9,
        'B' : 11
    }
    quantized = 0
    octave = int(note[-1]) - 1
    for i in note[:-1]:
        if i in notevals: quantized += notevals[i]
        if i == '-': quantized -= 1
        if i == '#': quantized += 1
    quantized += 12 * octave
    return quantized

# Create bitwise note vectors for use with Restricted Boltzmann Machine.
vectors = np.zeros((1, 88))
for ix, note in enumerate(allnotes):
    vect = np.zeros((1, 88))
    vect[0, quantify(note)] = 1
    if ix == 0:
        vectors = vect
    else:
        vectors = np.vstack((vectors, vect))
print vectors.shape

# Convert mingus note back to music21 note. WORKS
def unmingify(note):
    return note.replace('-','').replace('b','-')
    
# Given a list of mingus notes (i.e. a chord), say ['A-2', 'A-3', 'E-3'],
# return a bitwise notevector with possible notes to go along with it.
# Takes a chord (i.e. a list of notes) -- not a freqdict
# Idea: what if just generate notewise vector with exact same pitches? Indepedence assumption?
def genChordNotes(chord):
    chord = [unmingify(note) for note in chord] # really important to unmingify notes.
    notevect = np.zeros((1, 88))
    
    # populate with initial pitches
    for note in chord:
        notevect[0, quantify(note)] = 1
        
    # add other octaves of initial pitches
    otheroctaves = range(3, 6)
    for note in chord:
        notebase = note[:-1]
        for octv in otheroctaves:
            put = bool(random.getrandbits(1)) # randomize other pitches
            if put is True:
                translated = "%s%s" % (notebase, octv)
                notevect[0, quantify(translated)] = 1
    
    # add other relevant notes if time. might have enough chords already
    
    # return the vector
    return notevect

# Create initial arrays (1-40, one for each thing)
x_train = np.zeros((1, 88))
for chordID, chord in allchords.items():
    if chordID == 0:
        x_train = genChordNotes(chord)
    else:
        x_train = np.vstack((x_train, genChordNotes(chord)))
y_train = allchords.keys()

print "Before adding random data: ", x_train.shape, len(y_train)

# create more randomized data
for chordID, chord in allchords.items():
    for j in xrange(5): 
        x_train = np.vstack((x_train, genChordNotes(chord)))
        y_train.append(chordID)
y_train = np.array(y_train).reshape(-1, )

print "After adding random data: ", x_train.shape, y_train.shape

from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
rf.fit(x_train, y_train)

# save the classifier to disk for use with 6b. The N-Gram Pipeline, Part II.
with open('part7clf.pkl', 'wb') as fid:
    cPickle.dump(rf, fid)

# save the defaultdict (intID : chord) to disk for use with 6b. The N-Gram Pipeline, Part II.
with open('part7cdict.pkl', 'wb') as fid:
    cPickle.dump(allchords, fid)