import numpy as np

# set some print options
np.set_printoptions(precision=4)
np.set_printoptions(threshold=5)
np.set_printoptions(suppress=True)

# init random gen
np.random.seed(2)

import numpy as np

# build an array using the array function
arr = np.array([0, 9, 5, 4, 3])
arr

np.zeros(4)

np.ones(4)

np.empty(4)

np.arange(4)

arr = np.random.randn(5)
arr

arr.dtype

arr.shape

# you can be explicit about the data type that you want
np.empty(4, dtype=np.int32)

np.array(['numpy','pandas','pytables'], dtype=np.string_)

float_arr = np.array([4.4, 5.52425, -0.1234, 98.1], dtype=np.float64)
# truncate the decimal part
float_arr.astype(np.int32)

arr = np.array([0, 9, 1, 4, 64])
arr[3]

arr[1:3]

arr[:2]

# set the last two elements to 555
arr[-2:] = 55
arr

arr_2d = np.array([[5,3,4],[0,1,2],[1,1,10],[0,0,0.1]])
arr_2d

# get the first row
arr_2d[0]

# get the first column
arr_2d[:,0]

# get the first two rows
arr_2d[:2]

arr = np.array([0, 3, 1, 4, 64])
arr

subarr = arr[2:4]
subarr[1] = 99
arr

arr = np.array([10, 20])
idx = np.array([True, False])
arr[idx]


arr_2d = np.random.randn(5)
arr_2d


arr_2d < 0

arr_2d[arr_2d < 0]

arr_2d[(arr_2d > -0.5) & (arr_2d < 0)]

arr_2d[arr_2d < 0] = 0
arr_2d

arr = np.arange(18).reshape(6,3)
arr

# fancy selection of rows in a particular order
arr[[0,4,4]]

# index into individual elements and flatten
arr[[5,3,1]]

arr[[5,3,1],[2,1,0]]

# select a submatrix
arr[np.ix_([5,3,1],[2,1])]

arr = np.array([0, 9, 1.02, 4, 32])
arr - arr

arr * arr


arr = np.array([0, 9, 1.02, 4, 64])
5 * arr 

10 + arr

arr ** .5

arr = np.random.randn(4,2)
arr

mean_row = np.mean(arr, axis=0)
mean_row

centered_rows = arr - mean_row
centered_rows

np.mean(centered_rows, axis=0)

mean_col = np.mean(arr, axis=1)
mean_col

centered_cols = arr - mean_col

# make the 1-D array a column vector
mean_col.reshape((4,1))

centered_cols = arr - mean_col.reshape((4,1))
centered_rows

centered_cols.mean(axis=1)

np.nan != np.nan

np.array([10,5,4,np.nan,1,np.nan]) == np.nan

np.isnan(np.array([10,5,4,np.nan,1,np.nan]))

import pandas as pd

pd.set_printoptions(precision=3, notebook_repr_html=True)

import pandas as pd

values = np.array([2.0, 1.0, 5.0, 0.97, 3.0, 10.0, 0.0599, 8.0])
ser = pd.Series(values)
print ser

values = np.array([2.0, 1.0, 5.0, 0.97, 3.0, 10.0, 0.0599, 8.0])
labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
ser = pd.Series(data=values, index=labels)
print ser


movie_rating = {
    'age': 1,
    'gender': 'F',
    'genres': 'Drama',
    'movie_id': 1193,
    'occupation': 10,
    'rating': 5,
    'timestamp': 978300760,
    'title': "One Flew Over the Cuckoo's Nest (1975)",
    'user_id': 1,
    'zip': '48067'
    }
ser = pd.Series(movie_rating)
print ser


ser.index

ser.values

ser[0]

ser['gender']

ser.get_value('gender')

ser_1 = pd.Series(data=[1,3,4], index=['A', 'B', 'C'])
ser_2 = pd.Series(data=[5,5,5], index=['A', 'G', 'C'])
print ser_1 + ser_2


# build from a dict of equal-length lists or ndarrays
pd.DataFrame({'col_1': [0.12, 7, 45, 10], 'col_2': [0.9, 9, 34, 11]})

pd.DataFrame(data={'col_1': [0.12, 7, 45, 10], 'col_2': [0.9, 9, 34, 11]},
             columns=['col_1', 'col_2', 'col_3'])

pd.DataFrame(data={'col_1': [0.12, 7, 45, 10], 'col_2': [0.9, 9, 34, 11]},
             columns=['col_1', 'col_2', 'col_3'],
             index=['obs1', 'obs2', 'obs3', 'obs4'])


movie_rating = {
    'gender': 'F',
    'genres': 'Drama',
    'movie_id': 1193,
    'rating': 5,
    'timestamp': 978300760,
    'user_id': 1,
    }
ser_1 = pd.Series(movie_rating)
ser_2 = pd.Series(movie_rating)
df = pd.DataFrame({'r_1': ser_1, 'r_2': ser_2})
df.columns.name = 'rating_events'
df.index.name = 'rating_data'
df

df = df.T
df

df.columns 

df.index

df.values

df = pd.DataFrame({'r_1': ser_1, 'r_2': ser_2})
df.drop('genres', axis=0)

df.drop('r_1', axis=1)

# careful with the order here
df['r_3'] = ['F', 'Drama', 1193, 5, 978300760, 1]
df

df = pd.DataFrame(data={'col_1': [0.12, 7, 45, 10], 'col_2': [0.9, 9, 34, 11]},
                  columns=['col_1', 'col_2', 'col_3'],
                  index=['obs1', 'obs2', 'obs3', 'obs4'])
df['col_1']

df.col_1

df[['col_2', 'col_1']]

df.ix['obs3']

df.ix[0]

df.ix[:2]

df.ix[:2, 'col_2']

df.ix[:2, ['col_1', 'col_2']]

import pandas as pd

unames = ['user_id', 'gender', 'age', 'occupation', 'zip']
users = pd.read_table('../data/ml-1m/users.dat',
                      sep='::', header=None, names=unames)

rnames = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_table('../data/ml-1m/ratings.dat',
                        sep='::', header=None, names=rnames)

mnames = ['movie_id', 'title', 'genres']
movies = pd.read_table('../data/ml-1m/movies.dat',
                       sep='::', header=None, names=mnames)

# show how one of them looks
ratings.head(5)