# download the software
import urllib

urllib.urlretrieve('https://github.com/sods/ods/archive/master.zip', 'master.zip')

# unzip the software
import zipfile
zip = zipfile.ZipFile('./master.zip', 'r')
for name in zip.namelist():
    zip.extract(name, '.')

# add the module location to the python path.    
import sys
sys.path.append("./ods-master/")

import pods
d = pods.datasets.movie_body_count()
movies = d['Y']

import pandas as pd
import os
pods.util.download_url('https://www.dropbox.com/s/s6gqvp9b383b59y/movies.csv?dl=0&raw=1', store_directory = 'class_movie', save_name='movies.csv')
movies = pd.read_csv(os.path.join(pods.datasets.data_path, 'class_movie', 'movies.csv')).set_index('index')


user_names = list(set(movies.columns)-set(movies.columns[:9]))
Y = pd.melt(movies.reset_index(), id_vars=['Film', 'index'], 
            var_name='user', value_name='rating', 
            value_vars=user_names)
Y = Y.dropna(axis=0)

# Question 4 Code Answer

import numpy as np
q = 2 # the dimension of our map of the 'library'
learn_rate = 0.01
U = pd.DataFrame(np.random.normal(size=(len(user_names), q))*0.001, index=user_names)
V = pd.DataFrame(np.random.normal(size=(len(movies.index), q))*0.001, index=movies.index)

Y['rating'] -= Y['rating'].mean()

def objective_gradient(Y, U, V):
    gU = pd.DataFrame(np.zeros((U.shape)), index=U.index)
    gV = pd.DataFrame(np.zeros((V.shape)), index=V.index)
    obj = 0.
    for ind, series in Y.iterrows():
        film = series['index']
        user = series['user']
        rating = series['rating']
        prediction = np.dot(U.loc[user], V.loc[film]) # vTu
        diff = prediction - rating # vTu - y
        obj += diff*diff
        gU.loc[user] += 2*diff*V.loc[film]
        gV.loc[film] += 2*diff*U.loc[user]
    return obj, gU, gV

import sys
iterations = 100
for i in range(iterations):
    obj, gU, gV = objective_gradient(Y, U, V)
    print "Iteration", i, " Objective function: ", obj
    U -= learn_rate*gU
    V -= learn_rate*gV

    
%matplotlib inline
import pylab as plt
fig, ax = plt.subplots(figsize=(8,8))
ax.plot(U[0], U[1], 'rx')
for index in U.index:
    ax.text(U[0][index], U[1][index], index)
fig, ax = plt.subplots(figsize=(8,8))
ax.plot(V[0], V[1], 'rx')
for index in V.index:
    # display the movie if it was rated.
    if np.sum(Y['index']==index)>0:
        ax.text(V[0][index], V[1][index], movies['Film'][index])
   

# Question 6 Code Answer

import pods
d = pods.datasets.movielens100k()
Y=d['Y']

# Code for question 7 here.