from sklearn import datasets iris = datasets.load_iris() print iris.data[0:10] from sklearn import preprocessing import numpy as np glass_data = np.loadtxt('../data/glass_data.csv', delimiter=',') glass_target = np.loadtxt('../data/glass_target.csv') print glass_data[0:5], glass_target[0:5] import csv car_data = list(csv.DictReader(open('../data/cardata.csv', 'rU'))) car_target = list(csv.reader(open('../data/cartarget.csv', 'rU'))) car_data[10] from sklearn.feature_extraction import DictVectorizer vec = DictVectorizer() car_data = vec.fit_transform(car_data).toarray() print 'Vectorized:', car_data[10] print 'Unvectorized:', vec.inverse_transform(car_data[10]) from sklearn import preprocessing le = preprocessing.LabelEncoder() le.fit(["unacc", "acc", "good", "vgood"]) target = le.transform(car_target[0]) print 'Transformed:', target[10] print 'Inverse transformed:', le.inverse_transform(target[10]) from sklearn.cross_validation import train_test_split car_data_train, car_data_test, target_train, target_test = train_test_split(car_data, target) print 'Training set:', len(car_data_train) print 'Test set:', len(car_data_test)