View assignment here
from __future__ import division
import numpy as np
import pdb
# MOVIES: Legally Blond; Matrix; Bourne Identity; You’ve Got Mail;
# The Devil Wears Prada; The Dark Knight; The Lord of the Rings.
movie_titles = ['Legally Blond', 'Matrix', 'Bourne Identity',
'You’ve Got Mail', 'The Devil Wears Prada',
'The Dark Knight', 'The Lord of the Rings']
P = [[ 0, 0,-1, 0,-1, 1, 1], # User 1
[-1, 1, 1,-1, 0, 1, 1], # User 2
[ 0, 1, 1, 0, 0,-1, 1], # User 3
[-1, 1, 1, 0, 0, 1, 1], # User 4
[ 0, 1, 1, 0, 0, 1, 1], # User 5
[ 1,-1, 1, 1, 1,-1, 0], # User 6
[-1, 1,-1, 0,-1, 0, 1], # User 7
[ 0,-1, 0, 1, 1,-1,-1], # User 8
[ 0, 0,-1, 1, 1, 0,-1]] # User 9
P = np.array(P)
C = np.abs(P) # Will be 0 only when P[i,j] == 0.
print('Raw Preference Matrix:')
print(P)
# Parameters
reg = 0.1 # regularization parameter
f = 2 # number of factors
m,n = P.shape
#Random Initialization
# X is (m x f)
# Y is (f x n)
X = 1 - 2*np.random.rand(m,f)
Y = 1 - 2*np.random.rand(f,n)
X *= 0.1
Y *= 0.1
# Alternating Ridge Regression
for _ in xrange(100):
# Least-squares keeping Y fixed
X = np.linalg.solve(
np.dot(Y, Y.T) + reg * np.eye(f),
np.dot(Y, P.T)
).T
# Least-squares keeping X fixed
Y = np.linalg.solve(
np.dot(X.T, X) + reg * np.eye(f),
np.dot(X.T, P)
)
print('Alternating Ridge Regression:')
print(np.dot(X,Y))
print('Error for movies that users actually rated: %.2f'%np.sum((C*(P - np.dot(X,Y)))**2))
# Re-initialize
X = 1 - 2*np.random.rand(m,f)
Y = 1 - 2*np.random.rand(f,n)
X *= 0.1
Y *= 0.1
# Alternating Weighted Ridge Regression
for _ in xrange(100):
# Each user u has a different set of weights Cu
for u,Cu in enumerate(C):
X[u] = np.linalg.solve(
np.dot(Y, np.dot(np.diag(Cu), Y.T)) + reg * np.eye(f),
np.dot(Y, np.dot(np.diag(Cu), P[u].T))
).T
for i,Ci in enumerate(C.T):
Y[:,i] = np.linalg.solve(
np.dot(X.T, np.dot(np.diag(Ci), X)) + reg * np.eye(f),
np.dot(X.T, np.dot(np.diag(Ci), P[:, i]))
)
print('Alternating Weighted Ridge Regression:')
print(np.dot(X,Y))
print('Error for movies that users actually rated: %.2f'%np.sum((C*(P - np.dot(X,Y)))**2))
not_C = abs(1 - C) # movies not rated
P_hat = np.dot(X, Y)
top_movie_id = np.argmax(P_hat - (4*C), axis=1)
for u, tm_id in zip(range(m), top_movie_id):
print('User %d liked %s'%(u+1, ', '.join([movie_titles[i] for i,p in enumerate(P[u]) if p == 1])))
print('User %d disliked %s'%(u+1, ', '.join([movie_titles[i] for i,p in enumerate(P[u]) if p == -1])))
print('For user %d the top movie is movie n.%d (%s) - predicted vote %.2f\n'% \
(u+1, tm_id+1, movie_titles[tm_id], P_hat[u,tm_id]))