import numpy as np

from scipy import io
from sklearn.metrics import roc_auc_score, average_precision_score

import pmf

train_tracks = list()
with open('train_tracks.txt', 'rb') as f:
    for line in f:
        train_tracks.append(line.strip())
        
test_tracks = list()
with open('test_tracks.txt', 'rb') as f:
    for line in f:
        test_tracks.append(line.strip())
        
tags = list()
with open('voc.txt', 'rb') as f:
    for line in f:
        tags.append(line.strip())

# compute evaluation metrics
def construct_pred_mask(tags_predicted, predictat):
    n_samples, n_tags = tags_predicted.shape
    rankings = np.argsort(-tags_predicted, axis=1)[:, :predictat]
    tags_predicted_binary = np.zeros_like(tags_predicted, dtype=bool)
    for i in xrange(n_samples):
        tags_predicted_binary[i, rankings[i]] = 1
    return tags_predicted_binary

def per_tag_prec_recall(tags_predicted_binary, tags_true_binary):
    mask = np.logical_and(tags_predicted_binary, tags_true_binary)
    prec = mask.sum(axis=0) / (tags_predicted_binary.sum(axis=0) + np.spacing(1))
    tags_true_count = tags_true_binary.sum(axis=0).astype(float)
    idx = (tags_true_count > 0)
    recall = mask.sum(axis=0)[idx] / tags_true_count[idx]
    return prec, recall


def aroc_ap(tags_true_binary, tags_predicted):
    n_tags = tags_true_binary.shape[1]
    
    auc = list()
    aprec = list()
    for i in xrange(n_tags):
        if np.sum(tags_true_binary[:, i]) != 0:
            auc.append(roc_auc_score(tags_true_binary[:, i], tags_predicted[:, i]))
            aprec.append(average_precision_score(tags_true_binary[:, i], tags_predicted[:, i]))
    return auc, aprec


def print_out_metrics(tags_true_binary, tags_predicted, predictat):
    tags_predicted_binary = construct_pred_mask(tags_predicted, predictat)
    prec, recall = per_tag_prec_recall(tags_predicted_binary, tags_true_binary)
    mprec, mrecall = np.mean(prec), np.mean(recall)
    
    print 'Precision = %.3f (%.3f)' % (mprec, np.std(prec) / sqrt(prec.size))
    print 'Recall = %.3f (%.3f)' % (mrecall, np.std(recall) / sqrt(recall.size))
    print 'F-score = %.3f' % (2 * mprec * mrecall / (mprec + mrecall))

    auc, aprec = aroc_ap(tags_true_binary, tags_predicted)
    print 'AROC = %.3f (%.3f)' % (np.mean(auc), np.std(auc) / sqrt(len(auc)))
    print 'AP = %.3f (%.3f)' % (np.mean(aprec), np.std(aprec) / sqrt(len(aprec)))

# codebook size (for in memoery, should not be too large)
K = 512

# load the pre-saved data
data_mat = io.loadmat('data_K%d.mat' % K)
X, X_test, y_test = data_mat['X'], data_mat['X_test'], data_mat['y_test']

tmp = X[:, K:]
tmp[tmp > 0] = 1
X[:, K:] = tmp

hist(np.sum( (y_test > 0), axis=1), bins=50)
pass

D = K + len(tags)

# pick a random song and take a look
bar(np.arange(D), X[0])

n_components = 100
coder = pmf.PoissonMF(n_components=n_components, random_state=98765, verbose=True)

coder.fit(X)

tagger = pmf.PoissonMF(n_components=n_components, random_state=98765, verbose=True)

tagger.set_components(coder.gamma_b[:, :K], coder.rho_b)

Et = tagger.transform(X_test)

Et /= Et.sum(axis=1, keepdims=True)

tags_predicted = Et.dot(coder.Eb[:, K:])
print tags_predicted.min(), tags_predicted.max()

div_factor = 3
tags_predicted = tags_predicted - div_factor * np.mean(tags_predicted, axis=0)

predictat = 20
tags_true_binary = (y_test > 0)

print_out_metrics(tags_true_binary, tags_predicted, predictat)

n_components = 100
online_coder = pmf.OnlinePoissonMF(n_components=n_components, batch_size=500, n_pass=1, 
                                   random_state=98765, verbose=True)

online_coder.fit(X, est_total=len(train_tracks))

plot(online_coder.bound)
pass

tagger = pmf.PoissonMF(n_components=n_components, random_state=98765, verbose=True)

tagger.set_components(online_coder.gamma_b[:, :K], online_coder.rho_b[:, :K])

Et = tagger.transform(X_test)

Et /= Et.sum(axis=1, keepdims=True)

tags_predicted = Et.dot(online_coder.Eb[:, K:])
n_samples, n_tags = tags_predicted.shape

print tags_predicted.min(), tags_predicted.max()

div_factor = 3
tags_predicted = tags_predicted - div_factor * np.mean(tags_predicted, axis=0)

predictat = 20
tags_true_binary = (y_test > 0)

print_out_metrics(tags_true_binary, tags_predicted, predictat)

data_mat = io.loadmat('X_train_K%d.mat' % K)
tag_mat = io.loadmat('y_train.mat')

X = np.hstack((data_mat['X'], tag_mat['y_train']))

n_components = 100
batch_size = 1000
online_coder_full = pmf.OnlinePoissonMF(n_components=n_components, batch_size=batch_size, n_pass=1, 
                                        random_state=98765, verbose=True)

online_coder_full.fit(X)

# the last batch is not full
plot(online_coder_full.bound[:-1])
pass

tagger = pmf.PoissonMF(n_components=n_components, random_state=98765, verbose=True)

tagger.set_components(online_coder_full.gamma_b[:, :K], online_coder_full.rho_b[:, :K])

Et = tagger.transform(X_test)

Et /= Et.sum(axis=1, keepdims=True)
tags_predicted = Et.dot(online_coder_full.Eb[:, K:])
print tags_predicted.min(), tags_predicted.max()

div_factor = 3
tags_predicted = tags_predicted - div_factor * np.mean(tags_predicted, axis=0)

predictat = 20
tags_true_binary = (y_test > 0)

print_out_metrics(tags_true_binary, tags_predicted, predictat)