import numpy as np # set some print options np.set_printoptions(precision=4) np.set_printoptions(threshold=5) np.set_printoptions(suppress=True) # init random gen np.random.seed(2) import numpy as np # build an array using the array function arr = np.array([0, 9, 5, 4, 3]) arr np.zeros(4) np.ones(4) np.empty(4) np.arange(4) arr = np.random.randn(5) arr arr.dtype arr.shape # you can be explicit about the data type that you want np.empty(4, dtype=np.int32) np.array(['numpy','pandas','pytables'], dtype=np.string_) float_arr = np.array([4.4, 5.52425, -0.1234, 98.1], dtype=np.float64) # truncate the decimal part float_arr.astype(np.int32) arr = np.array([0, 9, 1, 4, 64]) arr[3] arr[1:3] arr[:2] # set the last two elements to 555 arr[-2:] = 55 arr arr_2d = np.array([[5,3,4],[0,1,2],[1,1,10],[0,0,0.1]]) arr_2d # get the first row arr_2d[0] # get the first column arr_2d[:,0] # get the first two rows arr_2d[:2] arr = np.array([0, 3, 1, 4, 64]) arr subarr = arr[2:4] subarr[1] = 99 arr arr = np.array([10, 20]) idx = np.array([True, False]) arr[idx] arr_2d = np.random.randn(5) arr_2d arr_2d < 0 arr_2d[arr_2d < 0] arr_2d[(arr_2d > -0.5) & (arr_2d < 0)] arr_2d[arr_2d < 0] = 0 arr_2d arr = np.arange(18).reshape(6,3) arr # fancy selection of rows in a particular order arr[[0,4,4]] # index into individual elements and flatten arr[[5,3,1]] arr[[5,3,1],[2,1,0]] # select a submatrix arr[np.ix_([5,3,1],[2,1])] arr = np.array([0, 9, 1.02, 4, 32]) arr - arr arr * arr arr = np.array([0, 9, 1.02, 4, 64]) 5 * arr 10 + arr arr ** .5 arr = np.random.randn(4,2) arr mean_row = np.mean(arr, axis=0) mean_row centered_rows = arr - mean_row centered_rows np.mean(centered_rows, axis=0) mean_col = np.mean(arr, axis=1) mean_col centered_cols = arr - mean_col # make the 1-D array a column vector mean_col.reshape((4,1)) centered_cols = arr - mean_col.reshape((4,1)) centered_rows centered_cols.mean(axis=1) np.nan != np.nan np.array([10,5,4,np.nan,1,np.nan]) == np.nan np.isnan(np.array([10,5,4,np.nan,1,np.nan])) import pandas as pd pd.set_printoptions(precision=3, notebook_repr_html=True) import pandas as pd values = np.array([2.0, 1.0, 5.0, 0.97, 3.0, 10.0, 0.0599, 8.0]) ser = pd.Series(values) print ser values = np.array([2.0, 1.0, 5.0, 0.97, 3.0, 10.0, 0.0599, 8.0]) labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'] ser = pd.Series(data=values, index=labels) print ser movie_rating = { 'age': 1, 'gender': 'F', 'genres': 'Drama', 'movie_id': 1193, 'occupation': 10, 'rating': 5, 'timestamp': 978300760, 'title': "One Flew Over the Cuckoo's Nest (1975)", 'user_id': 1, 'zip': '48067' } ser = pd.Series(movie_rating) print ser ser.index ser.values ser[0] ser['gender'] ser.get_value('gender') ser_1 = pd.Series(data=[1,3,4], index=['A', 'B', 'C']) ser_2 = pd.Series(data=[5,5,5], index=['A', 'G', 'C']) print ser_1 + ser_2 # build from a dict of equal-length lists or ndarrays pd.DataFrame({'col_1': [0.12, 7, 45, 10], 'col_2': [0.9, 9, 34, 11]}) pd.DataFrame(data={'col_1': [0.12, 7, 45, 10], 'col_2': [0.9, 9, 34, 11]}, columns=['col_1', 'col_2', 'col_3']) pd.DataFrame(data={'col_1': [0.12, 7, 45, 10], 'col_2': [0.9, 9, 34, 11]}, columns=['col_1', 'col_2', 'col_3'], index=['obs1', 'obs2', 'obs3', 'obs4']) movie_rating = { 'gender': 'F', 'genres': 'Drama', 'movie_id': 1193, 'rating': 5, 'timestamp': 978300760, 'user_id': 1, } ser_1 = pd.Series(movie_rating) ser_2 = pd.Series(movie_rating) df = pd.DataFrame({'r_1': ser_1, 'r_2': ser_2}) df.columns.name = 'rating_events' df.index.name = 'rating_data' df df = df.T df df.columns df.index df.values df = pd.DataFrame({'r_1': ser_1, 'r_2': ser_2}) df.drop('genres', axis=0) df.drop('r_1', axis=1) # careful with the order here df['r_3'] = ['F', 'Drama', 1193, 5, 978300760, 1] df df = pd.DataFrame(data={'col_1': [0.12, 7, 45, 10], 'col_2': [0.9, 9, 34, 11]}, columns=['col_1', 'col_2', 'col_3'], index=['obs1', 'obs2', 'obs3', 'obs4']) df['col_1'] df.col_1 df[['col_2', 'col_1']] df.ix['obs3'] df.ix[0] df.ix[:2] df.ix[:2, 'col_2'] df.ix[:2, ['col_1', 'col_2']] import pandas as pd unames = ['user_id', 'gender', 'age', 'occupation', 'zip'] users = pd.read_table('../data/ml-1m/users.dat', sep='::', header=None, names=unames) rnames = ['user_id', 'movie_id', 'rating', 'timestamp'] ratings = pd.read_table('../data/ml-1m/ratings.dat', sep='::', header=None, names=rnames) mnames = ['movie_id', 'title', 'genres'] movies = pd.read_table('../data/ml-1m/movies.dat', sep='::', header=None, names=mnames) # show how one of them looks ratings.head(5)