%pylab inline import pandas as pd import matplotlib.pylab as plt from sklearn.decomposition import PCA df = pd.read_csv('iris.csv') df.head() #df.info() X = df.ix[:,:4] pca = PCA(n_components='mle') # 自动选择主成分个数 pca.fit(X) pca.components_ # 主成份负载 pca.components_[0]# 第一主成分的负载 pca.explained_variance_ # 方差贡献,碎石图 variance = pca.explained_variance_ratio_ readable_variance = variance * (1/variance[0]) component = pca.n_components_ plt.plot(range(component), readable_variance) plt.show() score = pca.transform(X) # 主成分得分 score = pd.concat([pd.DataFrame(score[:,:2]),df['Species']],axis=1) score.columns = ['var1','var2','species'] score.head() from ggplot import * ggplot(score,aes('var1','var2',color='species')) \ + geom_point() X.corr() from sklearn.decomposition import KernelPCA kpca = KernelPCA(n_components='mle') # 自动选择主成分个数 kpca.fit(X) score = kpca.transform(X) score = pd.concat([pd.DataFrame(score[:,:2]),df['Species']],axis=1) score.columns = ['var1','var2','species'] score.head() from ggplot import * ggplot(score,aes('var1','var2',color='species')) \ + geom_point()