import pandas as pd
import numpy as np
import pylab as pl

from sklearn.datasets import load_iris

iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = iris.target

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

svm_clf = SVC()
neighbors_clf = KNeighborsClassifier()
clfs = [
    ("svc", SVC()),
    ("KNN", KNeighborsClassifier())
    ]
for name, clf in clfs:
    clf.fit(df[iris.feature_names], df.species)
    print name, clf.predict(iris.data)
    print "*"*80

from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(df[iris.feature_names], df.species)
clf.predict(df[iris.feature_names])
pd.crosstab(df.species, clf.predict(df[iris.feature_names]))

from sklearn import tree

clf = tree.DecisionTreeClassifier(max_features="auto",
                                  min_samples_leaf=10)
clf.fit(df[iris.feature_names], df.species)

from sklearn.externals.six import StringIO
with open("iris.dot", 'w') as f:
    f = tree.export_graphviz(clf, out_file=f)

# you will need to install graphviz 
#(http://www.graphviz.org/Download..php) and pydot (pip install pydot)
! dot -Tpng iris.dot -o iris.png

from IPython.core.display import Image
Image("iris.png")

Image(url="http://1.bp.blogspot.com/-ME24ePzpzIM/UQLWTwurfXI/AAAAAAAAANw/W3EETIroA80/s1600/drop_shadows_background.png",
      width=700)

from sklearn.datasets import load_boston
boston = load_boston()

import re


def camel_to_snake(column_name):
    """
    converts a string that is camelCase into snake_case
    Example:
        print camel_to_snake("javaLovesCamelCase")
        > java_loves_camel_case
    See Also:
        http://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-camel-case
    """
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', column_name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()

df = pd.DataFrame(boston.data)
df.columns = [camel_to_snake(col) for col in boston.feature_names[:-1]]
# add in prices
df['price'] = boston.target
print len(df)==506
df.head()

from sklearn.linear_model import LinearRegression

features = ['age', 'lstat', 'tax']
lm = LinearRegression()
lm.fit(df[features], df.price)

# add your actual vs. predicted points
pl.scatter(df.price, lm.predict(df[features]))
# add the line of perfect fit
straight_line = np.arange(0, 60)
pl.plot(straight_line, straight_line)
pl.title("Fitted Values")