from yahmm import * import seaborn as sns, numpy as np from matplotlib.patches import Rectangle from __future__ import division, print_function # %install_ext https://raw.githubusercontent.com/rasbt/python_reference/master/ipython_magic/watermark.py %load_ext watermark %watermark -t -d -z -p yahmm,seaborn,numpy,matplotlib aas = list('IFLVMWAYGCTSPHNQDEKR') tm_freq = np.array([1826,1370,2562,1751,616,414,1657,615,1243, 289,755,806,423,121,250,141,104,110,78,83])/15214 other_freq = np.array([2187,1854,4156,2935,1201,819,3382,1616, 3352,960,2852,3410,2640,1085,2279,2054, 2551,2983,2651,2933]) / 47900 # Add emission frequencies to states _m = State(DiscreteDistribution(dict(zip(aas, tm_freq))), name='M') _x = State(DiscreteDistribution(dict(zip(aas, other_freq))), name='X') model = Model('TransmembraneDomainPrediction', start=_x) # Transition frequencies from Table 2 model.add_transition(_m, _x, 0.046) model.add_transition(_m, _m, 0.954) model.add_transition(_x, _x, 0.982) model.add_transition(_x, _m, 0.015) model.bake(verbose=True) ada1d_seq = list('MTFRDLLSVSFEGPRPDSSAGGSSAGGGGGSAGGAAPSEGPAVGGVPGGAGGGGGVVGAG\ SGEDNRSSAGEPGSAGAGGDVNGTAAVGGLVVSAQGVGVGVFLAAFILMAVAGNLLVILS\ VACNRHLQTVTNYFIVNLAVADLLLSATVLPFSATMEVLGFWAFGRAFCDVWAAVDVLCC\ TASILSLCTISVDRYVGVRHSLKYPAIMTERKAAAILALLWVVALVVSVGPLLGWKEPVP\ PDERFCGITEEAGYAVFSSVCSFYLPMAVIVVMYCRVYVVARSTTRSLEAGVKRERGKAS\ EVVLRIHCRGAATGADGAHGMRSAKGHTFRSSLSVRLLKFSREKKAAKTLAIVVGVFVLC\ WFPFFFVLPLGSLFPQLKPSEGVFKVIFWLGYFNSCVNPLIYPCSSREFKRAFLRLLRCQ\ CRRRRRRRPLWRVYGHHWRASTSGLRQDCAPSSGDAPPGAPLALTALPDPDPEPPGTPEM\ QAPVASRRKPPSAFREWRLLGPFRRPTTQLRAKVSSLSHKIRAGGAQRAEAACAQRSEVE\ AVSLGVPHEVAEGATCQAYELADYSNLRETDI') actual_transmems = [(96,121),(134,159),(170,192),(214,238), (252,275),(349,373),(381,405)] viterbi_path = model.viterbi(ada1d_seq)[1] viterbi_pred = np.array([state[1].name for state in viterbi_path[1:]]) viterbi_pred_mem = viterbi_pred == 'M' starts = np.nonzero(viterbi_pred_mem & ~np.roll(viterbi_pred_mem, 1))[0] ends = np.nonzero(viterbi_pred_mem & ~np.roll(viterbi_pred_mem, -1))[0] transmems = zip(starts, ends) trans, ems = model.forward_backward(ada1d_seq) ems = np.exp(ems) probs = ems/np.sum(ems, axis=1)[:, np.newaxis] %matplotlib inline colors = sns.color_palette('deep', n_colors=6, desat=0.5) sns.set_context(rc={"figure.figsize": (14, 6)}) sns.plt.axhline(y=1.1, c=colors[0], alpha=0.7) sns.plt.axhline(y=0.5, c=colors[1], alpha=0.7) sns.plt.xlim([1, len(ada1d_seq)+1]) sns.plt.ylim([0,1.2]) sns.plt.ylabel(r'posterior probs, $\gamma_k$') sns.plt.xlabel(r'$k$') axis = sns.plt.gca() # Plot viterbi predicted TMDs for start, end in transmems: axis.add_patch(Rectangle((start+1, 1.075), end-start+1, 0.05, facecolor=colors[0], alpha=0.7)) axis.text(480,1.13, 'viterbi') # Plot actual TMDs for start, end in actual_transmems: axis.add_patch(Rectangle((start+1, .475), end-start+1, 0.05, facecolor=colors[1], alpha=0.7)) axis.text(480, .53, 'actual') # Get indices of states indices = { state.name: i for i, state in enumerate( model.states ) } sns.plt.plot(range(1, len(ada1d_seq)+1), probs[:, indices['M']], c=colors[2], alpha=0.7) sns.plt.gcf().savefig('posteriors.png')