# download the software import urllib urllib.urlretrieve('https://github.com/sods/ods/archive/master.zip', 'master.zip') # unzip the software import zipfile zip = zipfile.ZipFile('./master.zip', 'r') for name in zip.namelist(): zip.extract(name, '.') # add the module location to the python path. import sys sys.path.append("./ods-master/") import pods d = pods.datasets.movie_body_count() movies = d['Y'] import pandas as pd import os pods.util.download_url('https://www.dropbox.com/s/s6gqvp9b383b59y/movies.csv?dl=0&raw=1', store_directory = 'class_movie', save_name='movies.csv') movies = pd.read_csv(os.path.join(pods.datasets.data_path, 'class_movie', 'movies.csv')).set_index('index') user_names = list(set(movies.columns)-set(movies.columns[:9])) Y = pd.melt(movies.reset_index(), id_vars=['Film', 'index'], var_name='user', value_name='rating', value_vars=user_names) Y = Y.dropna(axis=0) # Question 4 Code Answer import numpy as np q = 2 # the dimension of our map of the 'library' learn_rate = 0.01 U = pd.DataFrame(np.random.normal(size=(len(user_names), q))*0.001, index=user_names) V = pd.DataFrame(np.random.normal(size=(len(movies.index), q))*0.001, index=movies.index) Y['rating'] -= Y['rating'].mean() def objective_gradient(Y, U, V): gU = pd.DataFrame(np.zeros((U.shape)), index=U.index) gV = pd.DataFrame(np.zeros((V.shape)), index=V.index) obj = 0. for ind, series in Y.iterrows(): film = series['index'] user = series['user'] rating = series['rating'] prediction = np.dot(U.loc[user], V.loc[film]) # vTu diff = prediction - rating # vTu - y obj += diff*diff gU.loc[user] += 2*diff*V.loc[film] gV.loc[film] += 2*diff*U.loc[user] return obj, gU, gV import sys iterations = 100 for i in range(iterations): obj, gU, gV = objective_gradient(Y, U, V) print "Iteration", i, " Objective function: ", obj U -= learn_rate*gU V -= learn_rate*gV %matplotlib inline import pylab as plt fig, ax = plt.subplots(figsize=(8,8)) ax.plot(U[0], U[1], 'rx') for index in U.index: ax.text(U[0][index], U[1][index], index) fig, ax = plt.subplots(figsize=(8,8)) ax.plot(V[0], V[1], 'rx') for index in V.index: # display the movie if it was rated. if np.sum(Y['index']==index)>0: ax.text(V[0][index], V[1][index], movies['Film'][index]) # Question 6 Code Answer import pods d = pods.datasets.movielens100k() Y=d['Y'] # Code for question 7 here.