Improve LAMMPS DUMP parser (#3844)

* Add check in LAMMPS Dump Reader for velocities and forces * Translate LAMMPS coordinates to range from zero to box length * Add unwrap feature to LAMMPSDump * Added LAMMPS test files reference in datafile.py * Moved LAMMPSDUMP vel and force check with ATOM card parsing Co-authored-by: Hugo MacDermott-Opeskin <hugomacdermott@gmail.com>
MDAnalysis · Oct 26, 2022 · 5b2f447 · 5b2f447
1 parent 998a020
commit 5b2f447
Show file tree

Hide file tree

Showing 6 changed files with 289 additions and 69 deletions.
diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -19,6 +19,7 @@ The rules for this file:
  * 2.4.0
 
 Fixes
+  * LAMMPSDump Reader translates the box to the origin (#3741)
   * hbond analysis: access hbonds results through new results member in count_by_ids() and count_by_type()
   * Added isolayer selection method (Issue #3845)
   * Auxiliary; determination of representative frames: Removed undefined
@@ -30,6 +31,9 @@ Fixes
     (e.g. bonds, angles) (PR #3779).
 
 Enhancements
+  * LAMMPSDump Reader optionally unwraps trajectories with image flags upon 
+    loading (#3843
+  * LAMMPSDump Reader now imports velocities and forces (#3843)
   * Minimum NumPy version for py3.11 is now set to 1.23.2.
   * Added decorator to check for an empty atom group, applied in topological 
     attributes(Issue #3837)

diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py
@@ -452,25 +452,51 @@ def write(self, selection, frame=None):
 
 
 class DumpReader(base.ReaderBase):
-    """Reads the default `LAMMPS dump format`_
-
-    Supports coordinates in the LAMMPS "unscaled" (x,y,z), "scaled" (xs,ys,zs),
-    "unwrapped" (xu,yu,zu) and "scaled_unwrapped" (xsu,ysu,zsu) coordinate
-    conventions (see https://docs.lammps.org/dump.html for more details).
-    If `lammps_coordinate_convention='auto'` (default),
-    one will be guessed. Guessing checks whether the coordinates fit each convention in the order "unscaled",
-    "scaled", "unwrapped", "scaled_unwrapped" and whichever set of coordinates
-    is detected first will be used. If coordinates are given in the scaled
-    coordinate convention (xs,ys,zs) or scaled unwrapped coordinate convention
-    (xsu,ysu,zsu) they will automatically be converted from their
-    scaled/fractional representation to their real values.
-
-    Supports both orthogonal and triclinic simulation box dimensions (for more
-    details see https://docs.lammps.org/Howto_triclinic.html). In either case,
-    MDAnalysis will always use ``(*A*, *B*, *C*, *alpha*, *beta*, *gamma*)``
-    to represent the unit cell. Lengths *A*, *B*, *C* are in the MDAnalysis
-    length unit (Å), and angles are in degrees.
-
+    """Reads the default `LAMMPS dump format 
+    <https://docs.lammps.org/dump.html>`__
+
+    Supports coordinates in various LAMMPS coordinate conventions and both 
+    orthogonal and triclinic simulation box dimensions (for more details see 
+    `documentation <https://docs.lammps.org/Howto_triclinic.html>`__). In 
+    either case, MDAnalysis will always use ``(*A*, *B*, *C*, *alpha*, *beta*,
+    *gamma*)`` to represent the unit cell. Lengths *A*, *B*, *C* are in the 
+    MDAnalysis length unit (Å), and angles are in degrees.
+
+    Parameters
+    ----------
+    filename : str
+        Filename of LAMMPS dump file
+    lammps_coordinate_convention : str (optional) default="auto"
+        Convention used in coordinates, can be one of the following according
+        to the `LAMMPS documentation <https://docs.lammps.org/dump.html>`__:
+
+         - "auto" - Detect coordinate type from file column header. If auto 
+           detection is used, the guessing checks whether the coordinates
+           fit each convention in the order "unscaled", "scaled", "unwrapped", 
+           "scaled_unwrapped" and whichever set of coordinates is detected 
+           first will be used.
+         - "scaled" - Coordinates wrapped in box and scaled by box length (see
+            note below), i.e., xs, ys, zs
+         - "scaled_unwrapped" - Coordinates unwrapped and scaled by box length,
+           (see note below) i.e., xsu, ysu, zsu
+         - "unscaled" - Coordinates wrapped in box, i.e., x, y, z
+         - "unwrapped" - Coordinates unwrapped, i.e., xu, yu, zu
+
+        If coordinates are given in the scaled coordinate convention (xs,ys,zs)
+        or scaled unwrapped coordinate convention (xsu,ysu,zsu) they will 
+        automatically be converted from their scaled/fractional representation
+        to their real values.
+    unwrap_images : bool (optional) default=False
+        If `True` and the dump file contains image flags, the coordinates 
+        will be unwrapped. See `read_data 
+        <https://docs.lammps.org/read_data.html>`__  in the lammps 
+        documentation for more information.
+    **kwargs
+       Other keyword arguments used in :class:`~MDAnalysis.coordinates.base.ReaderBase`
+
+    .. versionchanged:: 2.4.0
+       Now imports velocities and forces, translates the box to the origin,
+       and optionally unwraps trajectories with image flags upon loading.
     .. versionchanged:: 2.2.0
        Triclinic simulation boxes are supported.
        (Issue `#3383 <https://github.com/MDAnalysis/mdanalysis/issues/3383>`__)
@@ -489,7 +515,9 @@ class DumpReader(base.ReaderBase):
     }
 
     @store_init_arguments
-    def __init__(self, filename, lammps_coordinate_convention="auto",
+    def __init__(self, filename, 
+                 lammps_coordinate_convention="auto",
+                 unwrap_images=False,
                  **kwargs):
         super(DumpReader, self).__init__(filename, **kwargs)
 
@@ -503,6 +531,8 @@ def __init__(self, filename, lammps_coordinate_convention="auto",
                              " is not a valid option. "
                              f"Please choose one of {option_string}")
 
+        self._unwrap = unwrap_images
+
         self._cache = {}
 
         self._reopen()
@@ -606,9 +636,27 @@ def _read_next_timestep(self):
         convention_to_col_ix = {}
         for cv_name, cv_col_names in self._coordtype_column_names.items():
             try:
-                convention_to_col_ix[cv_name] = [attr_to_col_ix[x] for x in cv_col_names]
+                convention_to_col_ix[cv_name] = [attr_to_col_ix[x] 
+                    for x in cv_col_names]
             except KeyError:
                 pass
+
+        if self._unwrap:
+            try:
+                image_cols = [attr_to_col_ix[x] for x in ["ix", "iy", "iz"]]
+            except:
+                raise ValueError("Trajectory must have image flag in order "
+                                 "to unwrap.")
+
+        self._has_vels = all(x in attr_to_col_ix for x in ["vx", "vy", "vz"])
+        if self._has_vels:
+            ts.has_velocities = True
+            vel_cols = [attr_to_col_ix[x] for x in ["vx", "vy", "vz"]]
+        self._has_forces = all(x in attr_to_col_ix for x in ["fx", "fy", "fz"])
+        if self._has_forces:
+            ts.has_forces = True
+            force_cols = [attr_to_col_ix[x] for x in ["fx", "fy", "fz"]]
+
         # this should only trigger on first read of "ATOM" card, after which it
         # is fixed to the guessed value. Auto proceeds unscaled -> scaled
         # -> unwrapped -> scaled_unwrapped
@@ -620,22 +668,46 @@ def _read_next_timestep(self):
             except IndexError:
                 raise ValueError("No coordinate information detected")
         elif not self.lammps_coordinate_convention in convention_to_col_ix:
-            raise ValueError(f"No coordinates following convention {self.lammps_coordinate_convention} found in timestep")
+            raise ValueError(f"No coordinates following convention "
+                             "{self.lammps_coordinate_convention} found in "
+                             "timestep")
 
         coord_cols = convention_to_col_ix[self.lammps_coordinate_convention]
+        if self._unwrap:
+            coord_cols.extend(image_cols)
 
         ids = "id" in attr_to_col_ix
         for i in range(self.n_atoms):
             fields = f.readline().split()
             if ids:
                 indices[i] = fields[attr_to_col_ix["id"]]
-            ts.positions[i] = [fields[dim] for dim in coord_cols]
+            coords = np.array([fields[dim] for dim in coord_cols], 
+                              dtype=np.float32)
+
+            if self._unwrap:
+                images = coords[3:]
+                coords = coords[:3]
+                coords += images * ts.dimensions[:3]
+            else:
+                coords = coords[:3]
+            ts.positions[i] = coords
+
+            if self._has_vels:
+                ts.velocities[i] = [fields[dim] for dim in vel_cols]
+            if self._has_forces:
+                ts.forces[i] = [fields[dim] for dim in force_cols]
 
         order = np.argsort(indices)
         ts.positions = ts.positions[order]
+        if self._has_vels:
+            ts.velocities = ts.velocities[order]
+        if self._has_forces:
+            ts.forces = ts.forces[order]
         if (self.lammps_coordinate_convention.startswith("scaled")):
             # if coordinates are given in scaled format, undo that
             ts.positions = distances.transform_StoR(ts.positions,
                                                     ts.dimensions)
+        # Transform to origin after transformation of scaled variables
+        ts.positions -= np.array([xlo, ylo, zlo])[None,:]
 
         return ts