import mca import pandas as pd import numpy as np np.set_printoptions(formatter={'float': '{: 0.4f}'.format}) pd.set_option('display.precision', 5) pd.set_option('display.max_columns', 25) data = pd.read_table('../data/burgundies.csv', sep=',', skiprows=1, index_col=0, header=0) X = data.drop('oak_type', axis=1) j_sup = data.oak_type i_sup = np.array([0, 1, 0, 1, 0, .5, .5, 1, 0, 1, 0, 0, 1, 0, .5, .5, 1, 0, .5, .5, 0, 1]) ncols = 10 X.shape, j_sup.shape, i_sup.shape src_index = (['Expert 1'] * 7 + ['Expert 2'] * 9 + ['Expert 3'] * 6) var_index = (['fruity'] * 2 + ['woody'] * 3 + ['coffee'] * 2 + ['fruity'] * 2 + ['roasted'] * 2 + ['vanillin'] * 3 + ['woody'] * 2 + ['fruity'] * 2 + ['butter'] * 2 + ['woody'] * 2) yn = ['y','n']; rg = ['1', '2', '3']; val_index = yn + rg + yn*3 + rg + yn*4 col_index = pd.MultiIndex.from_arrays([src_index, var_index, val_index], names=['source', 'variable', 'value']) table1 = pd.DataFrame(data=X.values, index=X.index, columns=col_index) table1.loc['W?'] = i_sup table1['','Oak Type',''] = j_sup table1 mca_ben = mca.MCA(X, ncols=ncols) mca_ind = mca.MCA(X, ncols=ncols, benzecri=False) print(mca.MCA.__doc__) data = {'Iλ': pd.Series(mca_ind.L), 'τI': mca_ind.expl_var(greenacre=False, N=4), 'Zλ': pd.Series(mca_ben.L), 'τZ': mca_ben.expl_var(greenacre=False, N=4), 'cλ': pd.Series(mca_ben.L), 'τc': mca_ind.expl_var(greenacre=True, N=4)} # 'Indicator Matrix', 'Benzecri Correction', 'Greenacre Correction' columns = ['Iλ', 'τI', 'Zλ', 'τZ', 'cλ', 'τc'] table2 = pd.DataFrame(data=data, columns=columns).fillna(0) table2.index += 1 table2.loc['Σ'] = table2.sum() table2.index.name = 'Factor' table2 mca_ind.inertia, mca_ind.L.sum(), mca_ben.inertia, mca_ben.L.sum() data = np.array([mca_ben.L[:2], mca_ben.expl_var(greenacre=True, N=2) * 100]).T df = pd.DataFrame(data=data, columns=['cλ','%c'], index=range(1,3)) df fs, cos, cont = 'Factor score','Squared cosines', 'Contributions x 1000' table3 = pd.DataFrame(columns=X.index, index=pd.MultiIndex .from_product([[fs, cos, cont], range(1, 3)])) table3.loc[fs, :] = mca_ben.fs_r(N=2).T table3.loc[cos, :] = mca_ben.cos_r(N=2).T table3.loc[cont, :] = mca_ben.cont_r(N=2).T * 1000 table3.loc[fs, 'W?'] = mca_ben.fs_r_sup(pd.DataFrame([i_sup]), N=2)[0] np.round(table3.astype(float), 2) table4 = pd.DataFrame(columns=col_index, index=pd.MultiIndex .from_product([[fs, cos, cont], range(1, 3)])) table4.loc[fs, :] = mca_ben.fs_c(N=2).T table4.loc[cos, :] = mca_ben.cos_c(N=2).T table4.loc[cont,:] = mca_ben.cont_c(N=2).T * 1000 fs_c_sup = mca_ben.fs_c_sup(mca.dummy(pd.DataFrame(j_sup)), N=2) table4.loc[fs, ('Oak', '', 1)] = fs_c_sup[0] table4.loc[fs, ('Oak', '', 2)] = fs_c_sup[1] np.round(table4.astype(float), 2) %matplotlib inline import matplotlib.pyplot as plt points = table3.loc[fs].values labels = table3.columns.values plt.figure() plt.margins(0.1) plt.axhline(0, color='gray') plt.axvline(0, color='gray') plt.xlabel('Factor 1') plt.ylabel('Factor 2') plt.scatter(*points, s=120, marker='o', c='r', alpha=.5, linewidths=0) for label, x, y in zip(labels, *points): plt.annotate(label, xy=(x, y), xytext=(x + .03, y + .03)) plt.show() noise = 0.05 * (np.random.rand(*table4.T[fs].shape) - 0.5) fs_by_source = table4.T[fs].add(noise).groupby(level=['source']) fig, ax = plt.subplots() plt.margins(0.1) plt.axhline(0, color='gray') plt.axvline(0, color='gray') plt.xlabel('Factor 1') plt.ylabel('Factor 2') ax.margins(0.1) markers = '^', 's', 'o', 'o' colors = 'r', 'g', 'b', 'y' for fscore, marker, color in zip(fs_by_source, markers, colors): label, points = fscore ax.plot(*points.T.values, marker=marker, color=color, label=label, linestyle='', alpha=.5, mew=0, ms=12) ax.legend(numpoints=1, loc=4) plt.show()