#!/usr/bin/env python # coding: utf-8 # # Tutorial Session 2 # # Note: This is the **instructor copy**, for the tutorial use the **participant copy** (which is based on this copy with most of the code removed). # ## Package imports # In[3]: import numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') from IPython.core.display import Image # In[4]: import MDAnalysis as mda print(mda.__version__) # ## Data files # Defined `datapath` to point to the directory where you [downloaded the tutorial trajectories](http://becksteinlab.github.io/MDAnalysis-workshop/datadownload.html) to. # In[5]: import os.path datapath = os.path.expanduser("~/Workshops/CECAM/tutorial/sandbox") # In[6]: datapath # ## AdK test trajectories # Look again at AdK trajectories... # In[15]: eqtop = "../../sandbox/equilibrium/adk4AKE.psf" eqtrj= "../../sandbox/equilibrium/1ake_007-nowater-core-dt240ps.dcd" top = "../../sandbox/dims/adk4ake.psf" trj = "../../sandbox/dims/dims_co_001.dcd" #coDIMS = [os.path.join(datadir, "dims", "dims_co_{0:03n}.dcd".format(i)) for i in range(1,4)] #ocDIMS = [os.path.join(datadir, "dims", "dims_oc_{0:03n}.dcd".format(i)) for i in range(1,4)] # In[16]: u = mda.Universe(top, trj) # In[17]: u # In[18]: eq = mda.Universe(eqtop, eqtrj) eq # ## Trajectories # The `Universe.trajectory` object is the entry point to all trajectory functionality: # In[19]: u.trajectory # Trajectories have a length in frames and in **ps** (the base MDAnalysis time unit): # In[20]: u.trajectory.n_frames # In[21]: len(u.trajectory) # In[22]: u.trajectory.totaltime # ### `Timestep` # They also contain a `Timestep` object, which holds the **current** # * frame and time # * unitcell dimensions (if available) as `[A, B, C, alpha, beta, gamma]` # * raw positions (and velocities and forces, if available) # * additional data, if defined by the trajectory format # In[23]: ts = u.trajectory.ts ts # In[24]: ts.frame # In[25]: ts.time # In[26]: ts.dimensions # In[27]: eq.trajectory.ts.dimensions # In[28]: ts.positions # How many coordinates? # In[29]: ts.positions.shape # In[30]: ts.velocities # In[31]: ts.forces # What is the time between frames? # In[32]: ts.dt # ### Moving forward... or anywhere # `Universe.trajectory` behaves a bit like a list of Timesteps: # * index --> `Timestep`: go to frame # * iterate --> `Timestep`: go to each frame in sequence # * slice --> new trajectory iterator # #### Trajectory indexing # # Trajectory indexing and slicing uses **0-based indices** (as in standard Python) and MDAnalysis also numbers frames starting with 0. Thus the "tenth frame" in a trajectory has ``ts.frame == 9``. # # Let's move to an arbitrary frame in the trajectory: # In[33]: u.trajectory[42] # In[34]: u.trajectory.ts.frame # In[35]: ts.frame # In[22]: u.trajectory[-23] # In[23]: u.trajectory.ts # Note that you *cannot* "remember" a `Timestep`: The following will *not keep a copy of the timeset at frame 20*: # In[24]: ts20 = u.trajectory[20] # In[25]: ts20.frame # In[26]: u.trajectory[42] # In[27]: ts20 # If you *really* want to, you can make a copy (but this is not used very much): # In[28]: ts42copy = u.trajectory.ts.copy() # In[29]: ts42copy.frame # In[30]: u.trajectory[2] # In[31]: ts42copy.frame # #### Iteration and Slicing # The **most common usage pattern** in MDAnalysis: Access each frame in a trajectory: # In[32]: results = [] for ts in u.trajectory: results.append(ts.frame) results = np.array(results) results # ... or arbitrary slices: # In[33]: results = np.fromiter((ts.frame for ts in u.trajectory[10::20]), dtype=np.int64) results # ... and for numpy afficionados, "fancy indexing" also works: # In[34]: results = np.fromiter((ts.frame for ts in u.trajectory[[10, 20, 10, 0]]), dtype=np.int64) results # *Note*: It does not make sense to create a list of timesteps like # ```python # timesteps = [ts for ts in u.trajectory] # ``` # or # ```python # timesteps = list(u.trajectory) # ``` # because each list item will refer to the same currently active timestep (the first one because at the end of iteration the trajectory rewinds). # #### Challenge: Reverse the trajectory # Produce an array from `ts.frame` that is reversed, ie `[101, 100, ..., 1, 0]` # In[38]: results = np.array([ts.frame for ts in reversed(u.trajectory)]) results # In[39]: np.array([ts.frame for ts in u.trajectory[::-1]]) # Note: slicing and indexing works for many major trajectory formats (dcd, xtc, trr, Amber netcdf) but not for all of them (although work is in progress...) # ## Analyzing trajectories with `Atomgroup`s # Using the raw `Timestep` positions is cumbersome. Moving the time step automatically updates all positions that `Atomgroup.positions` sees at this instance: # In[44]: u.trajectory[0] # In[45]: protein = u.select_atoms("protein") protein # Calculate the radius of gyration # \begin{equation*} # R_{\text{gyr}} = \sqrt{\frac{1}{M}\sum_{i=1}^{N}m_{i}(\textbf{r}_{i} - \textbf{R})^{2}} # \end{equation*} # with `AtomGroup.radius_of_gyration()`: # In[46]: protein.radius_of_gyration() # What's the current frame? # In[47]: u.trajectory.ts.frame # Move to the last one: # In[50]: u.trajectory[-1] # In[51]: protein.radius_of_gyration() # Remember: **All coordinate-dependent properties of an `AtomGroup` change when the trajectory index is changed.** Think of a pointer moving along the trajectory file. `AtomGroup` fetches coordinates from the frame that the pointer points to. # #### Example: radius of gyration time series # Collect `(time, radius_of_gyration)` for the protein for frames and plot $R_{\mathrm{gyr}}(t)$, with time in ps. # In[52]: protein = u.select_atoms("protein") results = [] for ts in u.trajectory[:500]: results.append((ts.time, protein.radius_of_gyration())) time, rgyr = np.transpose(results) # In[54]: ax = plt.subplot(111) ax.plot(time, rgyr) ax.set_xlabel("time (ps)") ax.set_ylabel(r"$R_{\mathrm{gyr}}$ ($\AA$)") # #### Challenge: Plot $R_\mathrm{gyr}$ for equilibrium MD simulation # In[201]: peq = eq.select_atoms("protein") eqrgyr = np.array([(ts.time, peq.radius_of_gyration()) for ts in eq.trajectory]).transpose() # In[202]: plt.plot(eqrgyr[0], eqrgyr[1]) # #### CHALLENGE: Collective variable analysis of AdK # # 1. Generate timeseries for domain angles, i.e. $t$, $\theta_\mathrm{NMP}(t)$, $\theta_\mathrm{LID}(t)$, using the previously defined functions `theta_NMP()` and `theta_LID()`. # 2. Plot (a) the time series, (b) the angles against each other. # 3. Bonus: # - is the radius of gyration correlated with the angles? # - for one of the equilibrium trajectories? # In[55]: import numpy as np from numpy.linalg import norm def theta_NMP(ag): """Calculate the NMP-CORE angle for E. coli AdK in degrees""" C = ag.select_atoms("resid 115:125 and backbone").center_of_geometry() B = ag.select_atoms("resid 90:100 and backbone").center_of_geometry() A = ag.select_atoms("resid 35:55 and backbone").center_of_geometry() BA = A - B BC = C - B theta = np.arccos(np.dot(BA, BC)/(norm(BA)*norm(BC))) return np.rad2deg(theta) def theta_LID(ag): """Calculate the LID-CORE angle for E. coli AdK in degrees""" C = ag.select_atoms("resid 179:185 and backbone").center_of_geometry() B = ag.select_atoms("resid 115:125 and backbone").center_of_geometry() A = ag.select_atoms("resid 125:153 and backbone").center_of_geometry() BA = A - B BC = C - B theta = np.arccos(np.dot(BA, BC)/(norm(BA)*norm(BC))) return np.rad2deg(theta) # In[57]: data = np.array([[ts.time, theta_NMP(u), theta_LID(u)] for ts in u.trajectory]) # In[58]: data[:3] # In[59]: time, aNMP, aLID = data.transpose() # In[61]: ax1 = plt.subplot(121) ax1.plot(time, aNMP, label="NMP") ax1.plot(time, aLID, label="LID") ax1.set_xlabel("time (ps)") ax1.set_ylabel("angle (degree)") ax1.legend(loc="best") ax2 = plt.subplot(122) ax2.plot(aNMP, aLID) ax2.set_xlabel(r"$\theta_\mathrm{NMP}$") ax2.set_ylabel(r"$\theta_\mathrm{LID}$") ax2.set_aspect(1) plt.tight_layout() # In[70]: ax = plt.subplot(111) ax.plot(rgyr-rgyr[0], aNMP-aNMP[0], label="NMP") ax.plot(rgyr-rgyr[0], aLID-aLID[0], label="LID") ax.set_xlabel(r"$\Delta R_\mathrm{gyr}$ ($\AA$)") ax.set_ylabel(r"$\Delta\theta$ (degrees)") ax.legend(loc="best") # Bonus: note, only use first 200 frames of the equilibrium trajectory. # In[179]: eqdata = np.array([[ts.time, theta_NMP(eq), theta_LID(eq)] for ts in eq.trajectory[:200]]).transpose() # In[185]: ax1 = plt.subplot(121) ax1.plot(eqdata[0]/1000, eqdata[1], label="NMP") ax1.plot(eqdata[0]/1000, eqdata[2], label="LID") ax1.set_xlabel("time (ns)") ax1.set_ylabel("angle (degrees)") ax2 = plt.subplot(122) ax2.plot(eqdata[1], eqdata[2]) ax2.plot(aNMP, aLID, color="red") ax2.set_xlabel(r"$\theta_\mathrm{NMP}$") ax2.set_ylabel(r"$\theta_\mathrm{LID}$") ax2.set_aspect(1) plt.tight_layout() # ### Digression: using frame indices to filter a trajectory # If you have a quantity that is ceap to compute, you can use it to filter a trajectory: # In[213]: peq = eq.select_atoms("protein") closed = [ts.frame for ts in eq.trajectory[:100] if peq.radius_of_gyration() < 19] # In[207]: u.trajectory[closed] # In[212]: plt.plot(eqdata[0, closed], eqdata[1, closed], '+') plt.plot(eqdata[0, closed], eqdata[2, closed], '+') # ## Trajectory manipulation # # One can easily modify the coordinates of the current timestep: Either directly in `Timestep.positions` or via `AtomGroup` methods such as `translate` or `rotate`. # # In particular, we can optimally superimpose RMSD fitting. # # In[126]: import MDAnalysis.analysis.rms as rms # We want to calculate the RMSD for an optimal superposition of the C-alpha atoms. As a reference, we take the coordinates of the initial frame of the trajectory: # In[131]: ca = u.select_atoms("name CA") u.trajectory[0] ref = ca.positions.copy() ref_com = ca.center_of_mass() # Remove translations by moving to the center of mass of the reference: # In[139]: u.trajectory[-1] ca.translate(-ca.center_of_mass() + ref_com) # Then calculate the RMSD after optimum RMSD superposition with `rmsd()`: # In[142]: rms.rmsd(ca.positions, ref) # Actually, `rmsd()` has the *center* keyword that does a superposition for us: # In[147]: u.trajectory[20] rms.rmsd(ca.positions, ref, center=True) # In[165]: u.trajectory[-2] rms.rmsd(ca.positions, ref, center=True) # You can manually superimpose using translation and the optimal rotation matrix (from `rms.rotation_matrix()`) but for many use cases see the contents of the [MDAnalysis.analysis.align](http://pythonhosted.org/MDAnalysis/documentation_pages/analysis/align.html) module. # In[166]: # maybe??? alignto() # #### Challenge: RMSD time series # Calculate $\mathrm{RMSD}(t)$ (relative to the initial frame) and plot it. # # * use the "manual" approach (instead of `MDAnalysis.rms.RMSD` ;-) # * Bonus: compare the DIMS trajectories to the equilibrium trajectories (hint: plot as fraction of total time) # In[161]: data = np.array([[ts.time, rms.rmsd(ca.positions, ref, center=True)] for ts in u.trajectory]).transpose() # In[162]: data.shape # In[163]: ax1 = plt.subplot(111) ax1.plot(data[0], data[1], label="DIMS") ax1.set_xlabel("time (ps)") ax1.set_ylabel(r"RMSD (\AA)") ax1.legend(loc="best") # ## Writing trajectories # Writing out coordinates (David introduced the selection writer). # ### Trajectory Writer # Basic pattern: # 1. get a trajectory writer # - use `with` and context manager # - provide `n_atoms` (typically from the `AtomGroup` that you are going to write) # - can use `format` keyword argument if extension is not recognized # 2. iterate through a trajectory # 3. `write()` each time step (or `AtomGroup`) # 4. (close trajectory --- done by context manager) # Converting trajectory formats: # In[81]: xtc = "new.xtc" with mda.Writer(xtc, n_atoms=u.atoms.n_atoms) as W: for ts in u.trajectory: W.write(ts) # In[83]: get_ipython().system('ls -l *.xtc') # In[84]: uxtc = mda.Universe(psf, xtc) uxtc # In[85]: import os os.unlink(xtc) # # MDSynthesis appetizer # Working with many trajectories is tedious.... # In[ ]: