# Render our plots inline %matplotlib inline import pandas as pd import matplotlib.pyplot as plt import numpy as np pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier plt.rcParams['figure.figsize'] = (15, 5) #get our data--temporary home !wget http://www.columbia.edu/~mj340/ml-100k.tar.gz !wget http://www.columbia.edu/~mj340/HMXPC13_DI_v2_5-14-14.csv.gz !gunzip HMXPC13_DI_v2_5-14-14.csv.gz !tar -zxvf ml-100k.tar.gz #check contents of directory! CPI={"2010": 218.056, "2011": 224.939, "2012": 229.594, "2013": 232.957} #http://www.bls.gov/cpi/home.htm films=pd.read_csv('./ml-100k/u.item', sep="|", names=["movie id", "movie_title", "release_date", "video_release_date", "IMDb_URL", "unknown", "Action","Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"]) users=pd.read_csv('./ml-100k/u.user', sep="|", names=["user_id", "age", "gender","occupation","zip_code"], index_col="user_id")