#!/usr/bin/env python
# coding: utf-8

# # Creating and importing molecular systems in OpenMM

# ## Preliminaries
# 
# First, we import OpenMM. It's recommended that you always import this way:

# In[1]:


from simtk import openmm, unit
from simtk.openmm import app
import numpy as np


# 
# ## The OpenMM `System` object
# 
# The OpenMM [`System`](http://docs.openmm.org/7.1.0/api-python/library.html#system) object is a container that completely specifies how to compute the forces and energies for all particles in the molecular system. 
# 
# Because it is part of the C++ OpenMM API, `System` (and the related `Force` classes) are not true Python objects but instead SWIG-wrapped C++ objects, so you will find their naming and accessor conventions conform to C++ (rather than Pythonic) standards.
# 
# There are many ways to create or import OpenMM `System` objects:
# * We can assemble all the particles and define their interactions ourselves programmatically using the OpenMM C++ API
# * We could load in a serialized version of the `System` object via the built-in XML serializer
# * We can use the various importers in the [`simtk.openmm.app`](http://docs.openmm.org/7.1.0/api-python/app.html) layer to read in a system definition in AMBER, CHARMM, or gromacs format
# * We can create our own `System` object from a PDB file using the OpenMM [`ForceField`](http://docs.openmm.org/7.1.0/userguide/application.html#force-fields) facility
# * We can use a large variety of pre-built test systems provided by the [`openmmtools.testsystems`](http://openmmtools.readthedocs.io) module for testing code on a battery of different system types with different kinds of forces
# 
# Remember that if you want more information on `openmm.System`, you can use `help(openmm.System)`:

# ## Creating a periodic Lennard-Jones system
# 
# We'll start by building a simple periodic Lennard-Jones system using the Python wrappers for the main OpenMM C++ API.

# In[2]:


# Define the parameters of the Lennard-Jones periodic system
nparticles = 512
reduced_density = 0.05
mass = 39.9 * unit.amu
charge = 0.0 * unit.elementary_charge
sigma = 3.4 * unit.angstroms
epsilon = 0.238 * unit.kilocalories_per_mole

# Create a system and add particles to it
system = openmm.System()
for index in range(nparticles):
    # Particles are added one at a time
    # Their indices in the System will correspond with their indices in the Force objects we will add later
    system.addParticle(mass)
    
# Set the periodic box vectors
number_density = reduced_density / sigma**3
volume = nparticles * (number_density ** -1)
box_edge = volume ** (1. / 3.)
box_vectors = np.diag([box_edge/unit.angstrom for i in range(3)]) * unit.angstroms
system.setDefaultPeriodicBoxVectors(*box_vectors)

# Add Lennard-Jones interactions using a NonbondedForce
force = openmm.NonbondedForce()
force.setNonbondedMethod(openmm.NonbondedForce.CutoffPeriodic)
for index in range(nparticles): # all particles must have parameters assigned for the NonbondedForce
    # Particles are assigned properties in the same order as they appear in the System object
    force.addParticle(charge, sigma, epsilon)
force.setCutoffDistance(3.0 * sigma) # set cutoff (truncation) distance at 3*sigma
force.setUseSwitchingFunction(True) # use a smooth switching function to avoid force discontinuities at cutoff
force.setSwitchingDistance(2.5 * sigma) # turn on switch at 2.5*sigma
force.setUseDispersionCorrection(True) # use long-range isotropic dispersion correction
force_index = system.addForce(force) # system takes ownership of the NonbondedForce object


# ### The OpenMM Python API uses units throughout
# 
# Note the use of unit-bearing quantities via [`simtk.unit`](http://docs.openmm.org/7.1.0/userguide/library.html#units-and-dimensional-analysis). OpenMM's Python API uses a powerful units system that prevents many common kinds of mistakes with unit conversion, such as those that caused the loss of the [Mars Polar Lander](http://www.cnn.com/TECH/space/9909/30/mars.metric.02/). We'll examine the units system in more detail later---for now, we simply recognize that best practices dictate we feed OpenMM unit-bearing quantities (though this is not strictly necessary), and that it will return unit-bearing quantities.

# In[3]:


# construct a Quantity using the multiply operator
bond_length = 1.53 * unit.nanometer
# equivalently using the explicit Quantity constructor
bond_length = unit.Quantity(1.53, unit.nanometer)
# or more verbosely
bond_length = unit.Quantity(value=1.53, unit=unit.nanometer)
# print a unit in human-readable form
print(bond_length)
# or display it in machine-readable form
repr(bond_length)


# ## Accesssors for System attributes and Forces
# 
# The [`System`](http://docs.openmm.org/7.1.0/api-python/generated/simtk.openmm.openmm.System.html#simtk.openmm.openmm.System) object has a number of useful accessors for examining the contents of the system and 

# In[4]:


# Get the number of particles in the System
print('The system has %d particles' % system.getNumParticles())
# Print a few particle masses
for index in range(5):
    print('Particle %5d has mass %12.3f amu' % (index, system.getParticleMass(index) / unit.amu))
# Print number of constraints
print('The system has %d constraints' % system.getNumConstraints())
# Get the number of forces and iterate through them
print('There are %d forces' % system.getNumForces())
for (index, force) in enumerate(system.getForces()):
    print('Force %5d : %s' % (index, force.__class__.__name__))


# ### Define positions for the Lennard-Jones particles
# 
# To compute energies or forces, we will need to define some positions for the system before we can compute energies or forces.
# We'll generate some randomly, but as we will see later, there are many more ways to load molecular positions.

# In[5]:


positions = box_edge * np.random.rand(nparticles,3) 
print(positions)


# ## Computing energies and forces
# 
# To compute energies and forces efficiently, OpenMM requires you create a [`Context`](http://docs.openmm.org/7.1.0/api-python/generated/simtk.openmm.openmm.Context.html#simtk.openmm.openmm.Context) that handles the computation on a particular piece of hardware. This could be a GPU or CPU using a specific set of fast computation kernels.
# 
# OpenMM forces us to explicitly move data (positions, velocities, forces) into and out of the `Context` through specific calls so that we remember that each of these operations carries overhead of moving data across the bus to the GPU, for example. To achieve maximum speed, OpenMM can perform a number of operations---such as integrating many molecular dynamics steps---fully on the GPU without slowing things down by moving data back and forth over the bus. We'll see examples of this soon.
# 
# First, we just want to compute energies and forces and then minimize the energy of our system. To do that, we first need to create an [`Integrator`](http://docs.openmm.org/7.1.0/userguide/application.html#integrators) that will be bound to the [`Context`](http://docs.openmm.org/7.1.0/api-python/generated/simtk.openmm.openmm.Context.html#simtk.openmm.openmm.Context):

# In[6]:


# Create an integrator
timestep = 1.0 * unit.femtoseconds
integrator = openmm.VerletIntegrator(timestep)
# Create a Context using the default platform (the fastest abailable one is picked automatically)
# NOTE: The integrator is bound irrevocably to the context, so we can't reuse it
context = openmm.Context(system, integrator)


# We will look at [`Integrator`](http://docs.openmm.org/7.1.0/userguide/application.html#integrators) objects more in depth in [Module 02 - Integrators and Sampling](https://github.com/choderalab/openmm-tutorials/blob/master/02%20-%20Integrators%20and%20sampling.ipynb)
# 
# We can also optionally specify which platform we want. OpenMM comes with the following [`Platforms:`](http://docs.openmm.org/7.1.0/userguide/application.html#platforms)
# * [`Reference`](http://docs.openmm.org/7.1.0/userguide/library.html#platforms) - A slow double-precision single-threaded CPU-only implementation intended for comparing fast implementations with a "verifiably correct" one
# * [`CPU`](http://docs.openmm.org/7.1.0/userguide/library.html#cpu-platform) - A fast multithreaded mixed-precision CPU-only implementation
# * [`OpenCL`](http://docs.openmm.org/7.1.0/userguide/library.html#opencl-platform) - A highly portable OpenCL implementation that can be used with GPU or CPU OpenCL drivers; supports multiple precision models (single, mixed, double)
# * [`CUDA`](http://docs.openmm.org/7.1.0/userguide/library.html#cuda-platform) - The fastest implementation that uses [CUDA](https://developer.nvidia.com/cuda-downloads) for NVIDIA GPUs; supports multiple precision models (single, mixed, double)

# In[7]:


# We have to create a new integrator for every Context since it takes ownership of the integrator we pass it
integrator = openmm.VerletIntegrator(timestep)
# Create a Context using the multithreaded mixed-precision CPU platform
platform = openmm.Platform.getPlatformByName('CPU')
context = openmm.Context(system, integrator, platform)


# The `System` object can be used by multiple `Contexts` because the `System` is copied and converted to the efficient `Platform` specific kernels inside the `Context` object. Changing parameters or adding new `Force`'s on the Python `System` itself will not update the representation inside the `Context`.

# In[8]:


from copy import deepcopy
# Create a new integrator we can throw away
throwaway_integrator = openmm.VerletIntegrator(timestep)
# Copy the System object for this example
copy_system = deepcopy(system)
throwaway_context = openmm.Context(copy_system, throwaway_integrator)
# Change something in the Python System, e.g. double the box size
copy_system.setDefaultPeriodicBoxVectors(*box_vectors*2)
# Observe how the box size in the Context and Python System are different
print([vector for i, vector in enumerate(copy_system.getDefaultPeriodicBoxVectors())])
print([vector for i, vector in enumerate(throwaway_context.getState().getPeriodicBoxVectors())])
# Cleanup
del throwaway_integrator, throwaway_context


# There are ways to update paramters in an existing `Context`, but that topic will be covered in later tutorials.
# 
# Once we have a [`Context`](http://docs.openmm.org/7.1.0/api-python/generated/simtk.openmm.openmm.Context.html#simtk.openmm.openmm.Context), we need to tell OpenMM we want to retrieve the energy and forces for a specific set of particle positions. This is done by retrieving a [`State`](http://docs.openmm.org/7.1.0/api-python/generated/simtk.openmm.openmm.State.html#simtk.openmm.openmm.State) object from the [`Context`](http://docs.openmm.org/7.1.0/api-python/generated/simtk.openmm.openmm.Context.html#simtk.openmm.openmm.Context) that contains *only* the information we want to retrieve so as to minimize the amount of data that needs to be sent over the bus from a GPU:

# In[9]:


# Set the positions
context.setPositions(positions)
# Retrieve the energy and forces
state = context.getState(getEnergy=True, getForces=True)
potential_energy = state.getPotentialEnergy()
print('Potential energy: %s' % potential_energy)
forces = state.getForces()
print('Forces: %s' % forces[0:5])


# Note that the forces are returned as a `list` of tuples with units attached. If we want a numpy array instead, we could convert it, but it's much easier just use the `asNumpy=True` argument to [`state.getForces`](http://docs.openmm.org/7.1.0/api-python/generated/simtk.openmm.openmm.State.html#simtk.openmm.openmm.State.getForces)

# In[10]:


# Get forces as numpy array
forces = state.getForces(asNumpy=True)
print('Forces: %s' % forces)


# ## Minimizing the potential energy
# 
# You'll notice the energy and force magnitude is pretty large:

# In[11]:


# Compute the energy
state = context.getState(getEnergy=True, getForces=True)
potential_energy = state.getPotentialEnergy()
print('Potential energy: %s' % potential_energy)
# Compute the force magnitude
forces = state.getForces(asNumpy=True)
force_magnitude = (forces**2).sum().sqrt()
print('Force magnitude: %s' % force_magnitude)


# If we tried to do dynamics at this point, the system will likely explode!
# 
# OpenMM provides a [`LocalEnergyMinimizer`](http://docs.openmm.org/7.1.0/api-python/generated/simtk.openmm.openmm.LocalEnergyMinimizer.html#simtk.openmm.openmm.LocalEnergyMinimizer) that can be used to bring the force magnitude to be low enough to be able to run a simulation that won't explode.

# In[12]:


# Minimize the potential energy
openmm.LocalEnergyMinimizer.minimize(context)

# Compute the energy and force magnitude after minimization
state = context.getState(getEnergy=True, getForces=True)
potential_energy = state.getPotentialEnergy()
print('Potential energy: %s' % potential_energy)
forces = state.getForces(asNumpy=True)
force_magnitude = (forces**2).sum().sqrt()
print('Force magnitude: %s' % force_magnitude)


# ## Serializing the System object to disk or for transport over the network
# 
# You can serialize and restore `System` objects as a string (which can be written to disk) via the [`XmlSerializer`](http://docs.openmm.org/7.1.0/api-python/generated/simtk.openmm.openmm.XmlSerializer.html#simtk.openmm.openmm.XmlSerializer) class:

# In[13]:


# Serialize to a string
system_xml = openmm.XmlSerializer.serialize(system)
# Restore from a string
restored_system = openmm.XmlSerializer.deserialize(system_xml)
assert(system.getNumParticles() == restored_system.getNumParticles())


# In[14]:


# Peek at the first 15 lines of the XML file
system_xml.split('\n')[:15]


# ## Load a molecular system defined by AMBER, CHARMM, or gromacs
# 
# The OpenMM [`app` layer](http://docs.openmm.org/7.1.0/userguide/application.html#using-amber-files) provides convenience classes that help you load in systems defined in AMBER, CHARMM, or gromacs. 

# ### Loading an AMBER system
# 
# We can use [`AmberPrmtopFile`](http://docs.openmm.org/7.1.0/api-python/generated/simtk.openmm.app.amberprmtopfile.AmberPrmtopFile.html#simtk.openmm.app.amberprmtopfile.AmberPrmtopFile) to load an Amber `prmtop` file and [`InpcrdFile`](http://docs.openmm.org/7.1.0/api-python/generated/simtk.openmm.app.amberinpcrdfile.AmberInpcrdFile.html#simtk.openmm.app.amberinpcrdfile.AmberInpcrdFile) to load an `inpcrd` file:

# In[15]:


# Load in an AMBER system
from simtk.openmm import app
prmtop = app.AmberPrmtopFile('resources/alanine-dipeptide-implicit.prmtop')
inpcrd = app.AmberInpcrdFile('resources/alanine-dipeptide-implicit.inpcrd')
system = prmtop.createSystem(nonbondedMethod=app.NoCutoff, implicitSolvent=app.OBC2, constraints=app.HBonds)
positions = inpcrd.getPositions(asNumpy=True)

# Compute the potential energy
def compute_potential(system, positions):
    """Print the potential energy given a System and positions."""
    integrator = openmm.VerletIntegrator(1.0 * unit.femtoseconds)
    context = openmm.Context(system, integrator)
    context.setPositions(positions)
    print('Potential energy: %s' % context.getState(getEnergy=True).getPotentialEnergy())
    # Clean up
    del context, integrator
    
compute_potential(system, positions)


# ## Loading a CHARMM system
# 
# We can also [load CHARMM files](http://docs.openmm.org/7.1.0/userguide/application.html#using-charmm-files):

# In[16]:


# Load in a CHARMM system
from simtk.openmm import app
psf = app.CharmmPsfFile('resources/ala_ala_ala.psf')
pdbfile = app.PDBFile('resources/ala_ala_ala.pdb')
params = app.CharmmParameterSet('resources/charmm22.rtf', 'resources/charmm22.par')
system = psf.createSystem(params, nonbondedMethod=app.NoCutoff, constraints=app.HBonds)
positions = pdbfile.getPositions(asNumpy=True)
compute_potential(system, positions)


# ## Loading a gromacs system
# 
# We can also [load gromacs files](http://docs.openmm.org/7.1.0/userguide/application.html#using-gromacs-files), though you'll want to have the gromacs parameter files installed first.
# 
# You can install a version of gromacs via `conda` with:
# ```
# conda install --yes -c bioconda gromacs
# ```

# In[17]:


# Load in a gromacs system
from simtk.openmm import app
gro = app.GromacsGroFile('resources/input.gro')
# Make sure you change this to your appropriate gromacs include directory!
gromacs_include_filepath = '/Users/choderaj/miniconda3/share/gromacs/top/'
top = app.GromacsTopFile('resources/input.top', periodicBoxVectors=gro.getPeriodicBoxVectors(), includeDir=gromacs_include_filepath)
system = top.createSystem(nonbondedMethod=app.PME, nonbondedCutoff=1*unit.nanometer, constraints=app.HBonds)
positions = gro.getPositions(asNumpy=True)
compute_potential(system, positions)


# ## Creating a `System` using the OpenMM `ForceField` class
# 
# OpenMM also provides a number of [force fields](http://docs.openmm.org/7.1.0/userguide/application.html#force-fields) that have been converted into a [universal XML force field description format](http://docs.openmm.org/7.1.0/userguide/application.html#writing-the-xml-file) that can also be [extended in a modular manner](http://docs.openmm.org/7.1.0/userguide/application.html#extending-forcefield). Several [common AMBER protein force fields are provided](http://docs.openmm.org/7.1.0/userguide/application.html#force-fields) that work well for protein systems in which all atoms are already present.

# In[18]:


# Create an OpenMM system using the ForceField class
pdb = app.PDBFile('resources/input.pdb')
forcefield = app.ForceField('amber99sb.xml', 'tip3p.xml')
system = forcefield.createSystem(pdb.topology, nonbondedMethod=app.PME, nonbondedCutoff=1*unit.nanometer, constraints=app.HBonds)
positions = pdb.positions
compute_potential(system, positions)


# ## Visualizing the system and using `Topology` objects

# While the `System` objects don't contain any information on the *identities* of the particles or how these are organized into residues or molecules, we can extract a [`Topology`](http://docs.openmm.org/7.1.0/api-python/generated/simtk.openmm.app.topology.Topology.html#simtk.openmm.app.topology.Topology) object that provides this information from a PDB file:

# In[19]:


# Extract the topology from a PDB file
pdbfile = app.PDBFile('resources/alanine-dipeptide-implicit.pdb')
positions = pdbfile.getPositions()
topology = pdbfile.getTopology()


# OpenMM provides a way to write a frame to a PDB file should you want to visualize your initial configuration:

# In[20]:


# Write out a PDB file
with open('output.pdb', 'w') as outfile:
    app.PDBFile.writeFile(topology, positions, outfile)


# If working in a Jupyter notebook, we can use [`mdtraj`](http://mdtraj.org) and [`nglview`](https://github.com/arose/nglview) to visualize the system:

# In[21]:


# Create an MDTraj Trajectory object
import mdtraj
mdtraj_topology = mdtraj.Topology.from_openmm(topology)
traj = mdtraj.Trajectory(positions/unit.nanometers, mdtraj_topology)

# View it in nglview
import nglview
view = nglview.show_mdtraj(traj)
view.add_ball_and_stick('all')
view.center_view(zoom=True)
view


# ## Prebuilt systems with `openmmtools.testsystems`

# The [`openmmtools.testsystems`](http://openmmtools.readthedocs.io/en/latest/testsystems.html) module provides a number 
# of pre-built test systems that are very useful for testing your code or benchmarking your algorithms. The full list of test systems can be found [here](http://openmmtools.readthedocs.io/en/latest/testsystems.html), but we'll highlight a few below.

# In[22]:


from openmmtools import testsystems

t = testsystems.HarmonicOscillator()
system, positions, topology = t.system, t.positions, t.topology

print(positions)
help(t)


# In[23]:


# Create a Lennard-Jones cluster with a harmonic spherical restraint:
t = testsystems.LennardJonesCluster()

def visualize(t):
    import mdtraj
    mdtraj_topology = mdtraj.Topology.from_openmm(t.topology)
    traj = mdtraj.Trajectory([t.positions/unit.nanometers], mdtraj_topology)

    # View it in nglview
    import nglview
    view = nglview.show_mdtraj(traj)
    if t.system.getNumParticles() < 2000:
        view.add_ball_and_stick('all')
    view.center_view(zoom=True)
    return view

visualize(t)


# In[24]:


t = testsystems.LennardJonesFluid(reduced_density=0.90)
visualize(t)


# In[25]:


t = testsystems.WaterBox()
visualize(t)


# In[26]:


t = testsystems.AlanineDipeptideVacuum()
visualize(t)


# In[27]:


# The Joint AMBER-CHARMM (JAC) DHFR in explicit solvent benchmark system
t = testsystems.DHFRExplicit()
visualize(t)


# There are lots and lots of test systems defined! You can find the complete list and descriptions [here](http://openmmtools.readthedocs.io/en/latest/testsystems.html).
# Try some!

# In[28]:


# Get all subclasses of type `TestSystem`
from openmmtools.tests.test_testsystems import get_all_subclasses
for t in get_all_subclasses(testsystems.TestSystem):
    print(t.__name__)


# # Wrap-up
# 
# We've reviewed a myriad of ways that you can create OpenMM [`System`](http://docs.openmm.org/7.1.0/api-python/generated/simtk.openmm.openmm.System.html#simtk.openmm.openmm.System) objects. 
# 
# To recap, there are many ways to create or import OpenMM `System` objects:
# * We can assemble all the particles and define their interactions ourselves programmatically using the OpenMM C++ API
# * We could load in a serialized version of the `System` object via the built-in XML serializer
# * We can use the various importers in the [`simtk.openmm.app`](http://docs.openmm.org/7.1.0/api-python/app.html) layer to read in a system definition in AMBER, CHARMM, or gromacs format
# * We can create our own `System` object from a PDB file using the OpenMM [`ForceField`](http://docs.openmm.org/7.1.0/userguide/application.html#force-fields) facility
# * We can use a large variety of pre-built test systems provided by the [`openmmtools.testsystems`](http://openmmtools.readthedocs.io) module for testing code on a battery of different system types with different kinds of forces

# In[ ]: