from collections import defaultdict
import pandas as pd
import numpy as np
import scipy.sparse
import random, cPickle

# Extract chords into unique ids, e.g. 1, 2, 3, 4, 5
allchords = defaultdict() # remember that it's a hash table
with open("oscar2chords_extract.txt", 'rb') as f:
    for ix, line in enumerate(f):
        items = line.split()
        allchords[ix] = items
assert len(allchords) == len(set(allchords)) # ensure no duplicate chords

# Read in Oscar's data.
vectors = []
notedata = pd.read_csv(open("oscar2notes.txt", 'rb'), skiprows=2)
allnotes = []
for note, octave in zip(notedata["Note/Rest"], notedata["Octave"]):
    allnotes.append("%s%s" % (note, octave))

print "Number of notes (# of samples for RBM): ", len(notedata)
notedata.head()

# Generates the altered scale from octaves 3 to 6 for a pitch (e.g. G-)
# for a given note (e.g. G-3) in music21 style.
# Returns altered scale as list of music21 notes.
def genAltered(note='C3'):
    # In case you have to convert a note (e.g. F#) into form below
    def convertSharps(note):
        pitch = ''.join([i for i in note if i.isdigit() is False])
        enharmonic = {"C#" : "D-", "D#" : "E-", "E#" : "F", "F#" : "G-", "G#" : "A-", "A#" : "B-", "B#" : "C"}
        if '#' in pitch: return enharmonic[pitch]
        return pitch
    
    # Get scale with dictionary. For example: allscales[note[:-1]]
    allscales = {
        "C"  : ["C3", "E-3", "F3", "G3", "B-3",
                "C4", "E-4", "F4", "G4", "B-4",
                "C5", "E-5", "F5", "G5", "B-5",
                "C6", "E-6", "F6", "G6", "B-6"],
        "D-" : ["D-3", "E3", "G-3", "A-3", "B3",
                "D-4", "E4", "G-4", "A-4", "B4",
                "D-5", "E5", "G-5", "A-5", "B5",
                "D-6", "E6", "G-6", "A-6", "B6"],
        "D"  : ["C3", "D3", "F3", "G3", "A3", 
                "C4", "D4", "F4", "G4", "A4", 
                "C5", "D5", "F5", "G5", "A5", 
                "C6", "D6", "F6", "G6", "A6"],
        "E-" : ["D-3", "E-3", "G-3", "A-3", "B-3",
                "D-4", "E-4", "G-4", "A-4", "B-4",
                "D-5", "E-5", "G-5", "A-5", "B-5",
                "D-6", "E-6", "G-6", "A-6", "B-6"],
        "E"  : ["D3", "E3", "G3", "A3", "B3",
                "D4", "E4", "G4", "A4", "B4",
                "D5", "E5", "G5", "A5", "B5",
                "D6", "E6", "G6", "A6", "B6"],
        "F"  : ["C3", "E-3", "F3", "A-3", "B-3",
                "C4", "E-4", "F4", "A-4", "B-4",
                "C5", "E-5", "F5", "A-5", "B-5",
                "C6", "E-6", "F6", "A-6", "B-6"],
        "G-" : ["D-3", "E3", "G-3", "A3", "B3",
                "D-4", "E4", "G-4", "A4", "B4",
                "D-5", "E5", "G-5", "A5", "B5",
                "D-6", "E6", "G-6", "A6", "B6"],
        "G"  : ["C3", "D3", "F3", "G3", "B-3",
                "C4", "D4", "F4", "G4", "B-4",
                "C5", "D5", "F5", "G5", "B-5",
                "C6", "D6", "F6", "G6", "B-6"],
        "A-" : ["D-3", "E-3", "G-3", "A-3", "B3",
                "D-4", "E-4", "G-4", "A-4", "B4",
                "D-5", "E-5", "G-5", "A-5", "B5",
                "D-6", "E-6", "G-6", "A-6", "B6"],
        "A"  : ["C3", "D3", "E3", "G3", "A3",
                "C4", "D4", "E4", "G4", "A4",
                "C5", "D5", "E5", "G5", "A5",
                "C6", "D6", "E6", "G6", "A6"],
        "B-" : ["D-3", "E-3", "F3", "A-3", "B-3",
                "D-4", "E-4", "F4", "A-4", "B-4",
                "D-5", "E-5", "F5", "A-5", "B-5",
                "D-6", "E-6", "F6", "A-6", "B-6"],
        "B"  : ["D3", "E3", "G-3", "A3", "B3",
                "D4", "E4", "G-4", "A4", "B4",
                "D5", "E5", "G-5", "A5", "B5",
                "D6", "E6", "G-6", "A6", "B6"]}
    pitch = ''.join([i for i in note if i.isdigit() is False])
    pitch = convertSharps(note) # Rm. octaveinfo, eg. G-5 --> G-, G5->G
    return allscales[pitch]

# Given a MUSIC21 note, such as C5 or D#7, convert it
# into a note on the keyboard between 0 and 87 inclusive.
# Don't convert it for mingus; try to use music21 note style
# as much as possible for all this stuff.
def quantify(note):
    notevals = {
        'C' : 0,
        'D' : 2,
        'E' : 4,
        'F' : 5,
        'G' : 7,
        'A' : 9,
        'B' : 11
    }
    quantized = 0
    octave = int(note[-1]) - 1
    for i in note[:-1]:
        if i in notevals: quantized += notevals[i]
        if i == '-': quantized -= 1
        if i == '#': quantized += 1
    quantized += 12 * octave
    return quantized

# Create bitwise note vectors for use with Restricted Boltzmann Machine.
vectors = np.zeros((1, 88))
for ix, note in enumerate(allnotes):
    vect = np.zeros((1, 88))
    vect[0, quantify(note)] = 1
    if ix == 0:
        vectors = vect
    else:
        vectors = np.vstack((vectors, vect))
print vectors.shape

""" Hard-code altered scales right below for genChordNotes(). """

# Convert mingus note back to music21 note. WORKS
def unmingify(note):
    return note.replace('-','').replace('b','-')
    
# Given a list of mingus notes (i.e. a chord), say ['A-2', 'A-3', 'E-3'],
# Takes a chord (i.e. a list of notes) and returns a bitwise notevector with possible notes to go along with it.
# Idea: what if just generate notewise vector with exact same pitches? Indepedence assumption?
def genChordNotes(chord):
    chord = [unmingify(note) for note in chord] # really important to unmingify notes.
    notevect = np.zeros((1, 88))
    
    # populate with initial pitches
    for note in chord:
        notevect[0, quantify(note)] = 1
        
    # add initial pitches transposed to other octaves
    otheroctaves = range(3, 6)
    for note in chord:
        notebase = note[:-1]
        for octv in otheroctaves:
            put = bool(random.getrandbits(1)) # randomize other pitches
            if put is True:
                translated = "%s%s" % (notebase, octv)
                notevect[0, quantify(translated)] = 1
    
    # Add altered scale that contains most # of notes from chord notes
    # e.g. if chord = [e5, g5, b5] then want altered scale with as many of
    # those notes as possible. This lets you expand past simply
    # the notes already in that chord. Encode the notes of the altered
    # scale into the bitwise vector as with the initial pitches.
    # Maybe it works better w/o the altered scales; or maybe instead with pentatonics? try that.
    # Toggle below to include alternative notes (e.g. pentatonic/altered scales) or not
    altfreqs = defaultdict(int)
    for note in chord:
        for i in genAltered(note):
            altfreqs[i] += 1
    topnotes = [k for k, v in altfreqs.items() if v > 2] # get notes that overlap > 2 times
    for note in topnotes: # flip bits randomly from this list
        if bool(random.getrandbits(1)):
            notevect[0, quantify(note)] = 1
    
    # return the vector
    return notevect

# Create initial arrays (1-40, one for each thing)
xdata = np.zeros((1, 88))
for chordID, chord in allchords.items():
    if chordID == 0:
        xdata = genChordNotes(chord)
    else:
        xdata = np.vstack((xdata, genChordNotes(chord)))
ydata = allchords.keys()

print "Before adding random data: ", xdata.shape, len(ydata)

# create more randomized data
for chordID, chord in allchords.items():
    for j in xrange(50): 
        xdata = np.vstack((xdata, genChordNotes(chord)))
        ydata.append(chordID)
ydata = np.array(ydata).reshape(-1, )

print "After adding random data: ", xdata.shape, ydata.shape
# make sure you have the right # of chords. check with # of items in "oscarchords" back in (5).

from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import train_test_split
from sklearn import metrics

# Create train, test sets
xtrain, xtest, ytrain, ytest = train_test_split(xdata, ydata, test_size=0.2, random_state=50)

# Use gridsearch to build the classifier. Change verbose GridSearchCV param to True if want progress on the processing.
grid_search = GridSearchCV(estimator=SVC(), param_grid={'kernel' : ('linear', 'rbf'), 'C' : np.linspace(0.1, 5.1, 10)}, n_jobs=-2)

# Train the classifier
grid_search.fit(xtrain, ytrain)

# Evaluate the classifier's effectiveness.
print "\nPredictions for sample of n=10: "
print "Real values: ", ytest[:20] # verifies you get the class labels, not the problem earlier (only 1-2 of labels)
print "Predicted:   ", grid_search.predict(xtest[:20])
print metrics.classification_report(ytest, grid_search.predict(xtest))
print "Best parameters: ", grid_search.best_params_

# save the classifier to disk for use with 6b. The N-Gram Pipeline, Part II.
with open('part7clf.pkl', 'wb') as fid:
    cPickle.dump(grid_search, fid)

# save the defaultdict (intID : chord) to disk for use with 6b. The N-Gram Pipeline, Part II.
with open('part7cdict.pkl', 'wb') as fid:
    cPickle.dump(allchords, fid)