#!/usr/bin/env python
# coding: utf-8

# # Tutorial Session 2
# 
# Note: This is the **instructor copy**, for the tutorial use the **participant copy** (which is based on this copy with most of the code removed).

# ## Package imports 

# In[3]:


import numpy as np
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')

from IPython.core.display import Image


# In[4]:


import MDAnalysis as mda
print(mda.__version__)


# ## Data files
# Defined `datapath` to point to the directory where you [downloaded the tutorial trajectories](http://becksteinlab.github.io/MDAnalysis-workshop/datadownload.html) to.

# In[5]:


import os.path
datapath = os.path.expanduser("~/Workshops/CECAM/tutorial/sandbox")


# In[6]:


datapath


# ## AdK test trajectories
# Look again at AdK trajectories...

# In[15]:


eqtop = "../../sandbox/equilibrium/adk4AKE.psf"
eqtrj= "../../sandbox/equilibrium/1ake_007-nowater-core-dt240ps.dcd"

top = "../../sandbox/dims/adk4ake.psf"
trj = "../../sandbox/dims/dims_co_001.dcd"

#coDIMS = [os.path.join(datadir, "dims", "dims_co_{0:03n}.dcd".format(i)) for i in range(1,4)] 
#ocDIMS = [os.path.join(datadir, "dims", "dims_oc_{0:03n}.dcd".format(i)) for i in range(1,4)] 


# In[16]:


u = mda.Universe(top, trj)


# In[17]:


u


# In[18]:


eq = mda.Universe(eqtop, eqtrj)
eq


# ## Trajectories

# The `Universe.trajectory` object is the entry point to all trajectory functionality:

# In[19]:


u.trajectory


# Trajectories have a length in frames and in **ps** (the base MDAnalysis time unit):

# In[20]:


u.trajectory.n_frames


# In[21]:


len(u.trajectory)


# In[22]:


u.trajectory.totaltime


# ### `Timestep` 

# They also contain a `Timestep` object, which holds the **current**
# * frame and time
# * unitcell dimensions (if available) as `[A, B, C, alpha, beta, gamma]`
# * raw positions (and velocities and forces, if available)
# * additional data, if defined by the trajectory format

# In[23]:


ts = u.trajectory.ts
ts


# In[24]:


ts.frame


# In[25]:


ts.time


# In[26]:


ts.dimensions


# In[27]:


eq.trajectory.ts.dimensions


# In[28]:


ts.positions


# How many coordinates?

# In[29]:


ts.positions.shape


# In[30]:


ts.velocities


# In[31]:


ts.forces


# What is the time between frames?

# In[32]:


ts.dt


# ### Moving forward... or anywhere
# `Universe.trajectory` behaves a bit like a list of Timesteps:
# * index --> `Timestep`: go to frame
# * iterate --> `Timestep`: go to each frame in sequence
# * slice --> new trajectory iterator

# #### Trajectory indexing
# 
# Trajectory indexing and slicing uses **0-based indices** (as in standard Python) and MDAnalysis also numbers frames starting with 0. Thus the "tenth frame" in a trajectory has ``ts.frame == 9``.
# 
# Let's move to an arbitrary frame in the trajectory:

# In[33]:


u.trajectory[42]


# In[34]:


u.trajectory.ts.frame


# In[35]:


ts.frame


# In[22]:


u.trajectory[-23]


# In[23]:


u.trajectory.ts


# Note that you *cannot* "remember" a `Timestep`: The following will *not keep a copy of the timeset at frame 20*:

# In[24]:


ts20 = u.trajectory[20]


# In[25]:


ts20.frame


# In[26]:


u.trajectory[42]


# In[27]:


ts20


# If you *really* want to, you can make a copy (but this is not used very much):

# In[28]:


ts42copy = u.trajectory.ts.copy()


# In[29]:


ts42copy.frame


# In[30]:


u.trajectory[2]


# In[31]:


ts42copy.frame


# #### Iteration and Slicing
# The **most common usage pattern** in MDAnalysis: Access each frame in a trajectory:

# In[32]:


results = []
for ts in u.trajectory:
    results.append(ts.frame)
results = np.array(results)
results


# ... or arbitrary slices:

# In[33]:


results = np.fromiter((ts.frame for ts in u.trajectory[10::20]), dtype=np.int64)
results


# ... and for numpy afficionados, "fancy indexing" also works:

# In[34]:


results = np.fromiter((ts.frame for ts in u.trajectory[[10, 20, 10, 0]]), dtype=np.int64)
results


# *Note*: It does not make sense to create a list of timesteps like
# ```python
# timesteps = [ts for ts in u.trajectory]
# ```
# or
# ```python
# timesteps = list(u.trajectory)
# ```
# because each list item will refer to the same currently active timestep (the first one because at the end of iteration the trajectory rewinds).

# #### Challenge: Reverse the trajectory 

# Produce an array from `ts.frame` that is reversed, ie `[101, 100, ..., 1, 0]`

# In[38]:


results = np.array([ts.frame for ts in reversed(u.trajectory)])
results


# In[39]:


np.array([ts.frame for ts in u.trajectory[::-1]])


# Note: slicing and indexing works for many major trajectory formats (dcd, xtc, trr, Amber netcdf) but not for all of them (although work is in progress...)

# ## Analyzing trajectories with `Atomgroup`s
# Using the raw `Timestep` positions is cumbersome. Moving the time step automatically updates all positions that `Atomgroup.positions` sees at this instance:

# In[44]:


u.trajectory[0]


# In[45]:


protein = u.select_atoms("protein")
protein


# Calculate the radius of gyration
# \begin{equation*}
#     R_{\text{gyr}} = \sqrt{\frac{1}{M}\sum_{i=1}^{N}m_{i}(\textbf{r}_{i} - \textbf{R})^{2}}
# \end{equation*}
#  with `AtomGroup.radius_of_gyration()`:

# In[46]:


protein.radius_of_gyration()


# What's the current frame?

# In[47]:


u.trajectory.ts.frame


# Move to the last one:

# In[50]:


u.trajectory[-1]


# In[51]:


protein.radius_of_gyration()


# Remember: **All coordinate-dependent properties of an `AtomGroup` change when the trajectory index is changed.** Think of a pointer moving along the trajectory file. `AtomGroup` fetches coordinates from the frame that the pointer points to.

# #### Example: radius of gyration time series
# Collect `(time, radius_of_gyration)` for the protein for frames and plot $R_{\mathrm{gyr}}(t)$, with time in ps.

# In[52]:


protein = u.select_atoms("protein")
results = []
for ts in u.trajectory[:500]:
    results.append((ts.time, protein.radius_of_gyration()))
time, rgyr = np.transpose(results)


# In[54]:


ax = plt.subplot(111)
ax.plot(time, rgyr)
ax.set_xlabel("time (ps)")
ax.set_ylabel(r"$R_{\mathrm{gyr}}$ ($\AA$)")


# #### Challenge: Plot $R_\mathrm{gyr}$ for equilibrium MD simulation 

# In[201]:


peq = eq.select_atoms("protein")
eqrgyr = np.array([(ts.time, peq.radius_of_gyration()) for ts in eq.trajectory]).transpose()


# In[202]:


plt.plot(eqrgyr[0], eqrgyr[1])


# #### CHALLENGE: Collective variable analysis of AdK
# 
# 1. Generate timeseries for domain angles, i.e. $t$, $\theta_\mathrm{NMP}(t)$, $\theta_\mathrm{LID}(t)$, using the previously defined functions `theta_NMP()` and `theta_LID()`.
# 2. Plot (a) the time series, (b) the angles against each other.
# 3. Bonus: 
#    - is the radius of gyration correlated with the angles?
#    - for one of the equilibrium trajectories?

# In[55]:


import numpy as np
from numpy.linalg import norm

def theta_NMP(ag):
    """Calculate the NMP-CORE angle for E. coli AdK in degrees"""
    C = ag.select_atoms("resid 115:125 and backbone").center_of_geometry()
    B = ag.select_atoms("resid 90:100 and backbone").center_of_geometry()
    A = ag.select_atoms("resid 35:55 and backbone").center_of_geometry()
    BA = A - B
    BC = C - B
    theta = np.arccos(np.dot(BA, BC)/(norm(BA)*norm(BC)))
    return np.rad2deg(theta)

def theta_LID(ag):
    """Calculate the LID-CORE angle for E. coli AdK in degrees"""
    C = ag.select_atoms("resid 179:185 and backbone").center_of_geometry()
    B = ag.select_atoms("resid 115:125 and backbone").center_of_geometry()
    A = ag.select_atoms("resid 125:153 and backbone").center_of_geometry()
    BA = A - B
    BC = C - B
    theta = np.arccos(np.dot(BA, BC)/(norm(BA)*norm(BC)))
    return np.rad2deg(theta)


# In[57]:


data = np.array([[ts.time, theta_NMP(u), theta_LID(u)] for ts in u.trajectory])


# In[58]:


data[:3]


# In[59]:


time, aNMP, aLID = data.transpose()


# In[61]:


ax1 = plt.subplot(121)
ax1.plot(time, aNMP, label="NMP")
ax1.plot(time, aLID, label="LID")
ax1.set_xlabel("time (ps)")
ax1.set_ylabel("angle (degree)")
ax1.legend(loc="best")
ax2 = plt.subplot(122)
ax2.plot(aNMP, aLID)
ax2.set_xlabel(r"$\theta_\mathrm{NMP}$")
ax2.set_ylabel(r"$\theta_\mathrm{LID}$")
ax2.set_aspect(1)
plt.tight_layout()


# In[70]:


ax = plt.subplot(111)
ax.plot(rgyr-rgyr[0], aNMP-aNMP[0], label="NMP")
ax.plot(rgyr-rgyr[0], aLID-aLID[0], label="LID")
ax.set_xlabel(r"$\Delta R_\mathrm{gyr}$ ($\AA$)")
ax.set_ylabel(r"$\Delta\theta$ (degrees)")
ax.legend(loc="best")


# Bonus: note, only use first 200 frames of the equilibrium trajectory.

# In[179]:


eqdata = np.array([[ts.time, theta_NMP(eq), theta_LID(eq)] for ts in eq.trajectory[:200]]).transpose()


# In[185]:


ax1 = plt.subplot(121)
ax1.plot(eqdata[0]/1000, eqdata[1], label="NMP")
ax1.plot(eqdata[0]/1000, eqdata[2], label="LID")
ax1.set_xlabel("time (ns)")
ax1.set_ylabel("angle (degrees)")
ax2 = plt.subplot(122)
ax2.plot(eqdata[1], eqdata[2])
ax2.plot(aNMP, aLID, color="red")
ax2.set_xlabel(r"$\theta_\mathrm{NMP}$")
ax2.set_ylabel(r"$\theta_\mathrm{LID}$")
ax2.set_aspect(1)
plt.tight_layout()


# ### Digression: using frame indices to filter a trajectory

# If you have a quantity that is ceap to compute, you can use it to filter a trajectory:

# In[213]:


peq = eq.select_atoms("protein")
closed = [ts.frame for ts in eq.trajectory[:100] if peq.radius_of_gyration() < 19]


# In[207]:


u.trajectory[closed]


# In[212]:


plt.plot(eqdata[0, closed], eqdata[1, closed], '+')
plt.plot(eqdata[0, closed], eqdata[2, closed], '+')


# ## Trajectory manipulation
# 
# One can easily modify the coordinates of the current timestep: Either directly in `Timestep.positions` or via `AtomGroup` methods such as `translate` or `rotate`.
# 
# In particular, we can optimally superimpose RMSD fitting.
# 

# In[126]:


import MDAnalysis.analysis.rms as rms


# We want to calculate the RMSD for an optimal superposition of the C-alpha atoms. As a reference, we take the coordinates of the initial frame of the trajectory:

# In[131]:


ca = u.select_atoms("name CA")

u.trajectory[0]
ref = ca.positions.copy()
ref_com = ca.center_of_mass()


# Remove translations by moving to the center of mass of the reference:

# In[139]:


u.trajectory[-1]
ca.translate(-ca.center_of_mass() + ref_com)


# Then calculate the RMSD after optimum RMSD superposition with `rmsd()`:

# In[142]:


rms.rmsd(ca.positions, ref)


# Actually, `rmsd()` has the *center* keyword that does a superposition for us:

# In[147]:


u.trajectory[20]
rms.rmsd(ca.positions, ref, center=True)


# In[165]:


u.trajectory[-2]
rms.rmsd(ca.positions, ref, center=True)


# You can manually superimpose using translation and the optimal rotation matrix (from `rms.rotation_matrix()`) but for many use cases see the contents of the [MDAnalysis.analysis.align](http://pythonhosted.org/MDAnalysis/documentation_pages/analysis/align.html) module.

# In[166]:


# maybe??? alignto()


# #### Challenge: RMSD time series
# Calculate $\mathrm{RMSD}(t)$ (relative to the initial frame) and plot it.
# 
# * use the "manual" approach (instead of `MDAnalysis.rms.RMSD` ;-)
# * Bonus: compare the DIMS trajectories to the equilibrium trajectories (hint: plot as fraction of total time)

# In[161]:


data = np.array([[ts.time, rms.rmsd(ca.positions, ref, center=True)] 
                 for ts in u.trajectory]).transpose()


# In[162]:


data.shape


# In[163]:


ax1 = plt.subplot(111)
ax1.plot(data[0], data[1], label="DIMS")
ax1.set_xlabel("time (ps)")
ax1.set_ylabel(r"RMSD (\AA)")
ax1.legend(loc="best")


# ## Writing trajectories
# Writing out coordinates (David introduced the selection writer).

# ### Trajectory Writer
# Basic pattern:
# 1. get a trajectory writer
#    - use `with` and context manager
#    - provide `n_atoms` (typically from the `AtomGroup` that you are going to write)
#    - can use `format` keyword argument if extension is not recognized
# 2. iterate through a trajectory
# 3. `write()` each time step (or `AtomGroup`)
# 4. (close trajectory --- done by context manager)

# Converting trajectory formats:

# In[81]:


xtc = "new.xtc"
with mda.Writer(xtc, n_atoms=u.atoms.n_atoms) as W:
    for ts in u.trajectory:
        W.write(ts)


# In[83]:


get_ipython().system('ls -l *.xtc')


# In[84]:


uxtc = mda.Universe(psf, xtc)
uxtc


# In[85]:


import os
os.unlink(xtc)


# # MDSynthesis appetizer 

# Working with many trajectories is tedious....

# In[ ]: