From 6fdf06dc1c0aa61d7b5db455fbe48f47e1d3399a Mon Sep 17 00:00:00 2001 From: Max Linke Date: Fri, 2 Feb 2018 09:29:03 +0100 Subject: [PATCH 1/6] fix header length information in dcd A DCD has two byte positions to store the length of the remark section. First a it reads number of bytes in the remark plus 4. Afterwards it has the number of 80 character sections. These two numbers have to match. So far we wrote first that we use 164 chars and three 80 character blocks, 164 != 80*3 + 4 Now we corrected that and state correctly we write 244 bytes in the remark section. We never noticed that because our reader ignores the information about the byte length on looks only at the number of blocks. As a side note here. DCD remarks always have have to be a multiple of 80 in length. --- .../MDAnalysis/lib/formats/include/readdcd.h | 4 ++-- .../MDAnalysisTests/formats/test_libdcd.py | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/lib/formats/include/readdcd.h b/package/MDAnalysis/lib/formats/include/readdcd.h index 78544b0ca7f..153ac4723a5 100644 --- a/package/MDAnalysis/lib/formats/include/readdcd.h +++ b/package/MDAnalysis/lib/formats/include/readdcd.h @@ -735,13 +735,13 @@ int write_dcdheader(fio_fd fd, const char *remarks, int N, fio_write_int32(fd, 0); } fio_write_int32(fd, 84); - fio_write_int32(fd, 164); + fio_write_int32(fd, 244); fio_write_int32(fd, 3); /* the number of 80 character title strings */ strncpy(title_string, remarks, 240); WRITE(fd, title_string, 240); - fio_write_int32(fd, 164); + fio_write_int32(fd, 244); fio_write_int32(fd, 4); fio_write_int32(fd, N); fio_write_int32(fd, 4); diff --git a/testsuite/MDAnalysisTests/formats/test_libdcd.py b/testsuite/MDAnalysisTests/formats/test_libdcd.py index 537ca36a03b..3b13e2ffa6a 100644 --- a/testsuite/MDAnalysisTests/formats/test_libdcd.py +++ b/testsuite/MDAnalysisTests/formats/test_libdcd.py @@ -20,6 +20,7 @@ from collections import namedtuple import os import string +import struct import hypothesis.strategies as strategies from hypothesis import example, given @@ -242,6 +243,22 @@ def test_write_header(tmpdir): assert header['nsavc'] == 10 assert np.allclose(header['delta'], .02) + # we also check the bytes written directly. + with open(testfile, 'rb') as fh: + header_bytes = fh.read() + # check for magic number + assert struct.unpack('i', header_bytes[:4])[0] == 84 + # magic number should be written again before remark section + assert struct.unpack('i', header_bytes[88:92])[0] == 84 + # length of remark section. We hard code this to 244 right now + assert struct.unpack('i', header_bytes[92:96])[0] == 244 + # say we have 3 block of length 80 + assert struct.unpack('i', header_bytes[96:100])[0] == 3 + # after the remark section the length should be reported again + assert struct.unpack('i', header_bytes[340:344])[0] == 244 + # this is a magic number as far as I see + assert struct.unpack('i', header_bytes[344:348])[0] == 4 + def test_write_no_header(tmpdir): fname = str(tmpdir.join('test.dcd')) From fd68f60a4910675cf28462d8e0bd0d0e2dae9771 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Fri, 2 Feb 2018 14:17:18 +0100 Subject: [PATCH 2/6] DCDwriter now has a option to set istart Programs who use DCD have different conventions for istart, the starting frame of the trajectory. Because of this we now allow it to be set by the user. --- package/MDAnalysis/coordinates/DCD.py | 5 ++++- testsuite/MDAnalysisTests/coordinates/test_dcd.py | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/DCD.py b/package/MDAnalysis/coordinates/DCD.py index 8ba7f577350..0945148e73d 100644 --- a/package/MDAnalysis/coordinates/DCD.py +++ b/package/MDAnalysis/coordinates/DCD.py @@ -358,6 +358,7 @@ def __init__(self, dt=1, remarks='', nsavc=1, + istart=1, **kwargs): """Parameters ---------- @@ -380,6 +381,8 @@ def __init__(self, how many MD steps is a frame saved to the DCD). By default, this number is just set to one and this should be sufficient for almost all cases but if required, nsavc can be changed. + istart : int (optional) + starting frame number. CHARMM defaults to 1. **kwargs : dict General writer arguments @@ -400,7 +403,7 @@ def __init__(self, nsavc=nsavc, delta=delta, is_periodic=1, - istart=0) + istart=istart) def write_next_timestep(self, ts): """Write timestep object into trajectory. diff --git a/testsuite/MDAnalysisTests/coordinates/test_dcd.py b/testsuite/MDAnalysisTests/coordinates/test_dcd.py index ddda9bf5b24..593993b6df4 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_dcd.py +++ b/testsuite/MDAnalysisTests/coordinates/test_dcd.py @@ -284,6 +284,16 @@ def test_writer_trajectory_no_natoms(tmpdir, universe_dcd): universe_dcd.trajectory.Writer("foo.dcd") +@pytest.mark.parametrize('istart', (0, 1, 2, 3)) +def test_write_istart(universe_dcd, tmpdir, istart): + u = universe_dcd + outfile = str(tmpdir.join('test.dcd')) + with mda.Writer(outfile, u.atoms.n_atoms, istart=istart) as w: + w.write(u.atoms) + u = mda.Universe(PSF, outfile) + assert u.trajectory._file.header['istart'] == istart + + class RefCHARMMtriclinicDCD(object): topology = PSF_TRICLINIC trajectory = DCD_TRICLINIC From adaf9d977ce8bc16f6d248f3fe5c3f68ad4e1460 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Sat, 3 Feb 2018 09:22:00 +0100 Subject: [PATCH 3/6] use istart information in dcd to set correct time --- package/MDAnalysis/coordinates/DCD.py | 4 +-- .../MDAnalysisTests/analysis/test_rms.py | 14 ++++---- .../MDAnalysisTests/coordinates/test_dcd.py | 33 +++++++++++-------- .../coordinates/test_memory.py | 2 ++ .../MDAnalysisTests/core/test_universe.py | 9 +++-- 5 files changed, 34 insertions(+), 28 deletions(-) diff --git a/package/MDAnalysis/coordinates/DCD.py b/package/MDAnalysis/coordinates/DCD.py index 0945148e73d..276a326dd73 100644 --- a/package/MDAnalysis/coordinates/DCD.py +++ b/package/MDAnalysis/coordinates/DCD.py @@ -216,7 +216,7 @@ def Writer(self, filename, n_atoms=None, **kwargs): def _frame_to_ts(self, frame, ts): """convert a dcd-frame to a :class:`TimeStep`""" ts.frame = self._frame - ts.time = ts.frame * self.ts.dt + ts.time = (ts.frame + self._file.header['istart']) * self.ts.dt ts.data['step'] = self._file.tell() # The original unitcell is read as ``[A, gamma, B, beta, alpha, C]`` @@ -358,7 +358,7 @@ def __init__(self, dt=1, remarks='', nsavc=1, - istart=1, + istart=0, **kwargs): """Parameters ---------- diff --git a/testsuite/MDAnalysisTests/analysis/test_rms.py b/testsuite/MDAnalysisTests/analysis/test_rms.py index 405150fe233..bd0b0e2d7db 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rms.py +++ b/testsuite/MDAnalysisTests/analysis/test_rms.py @@ -165,21 +165,21 @@ def outfile(self, tmpdir): @pytest.fixture() def correct_values(self): - return [[0, 0, 0], [49, 49, 4.68953]] + return [[0, 1000, 0], [49, 1049, 4.68953]] @pytest.fixture() def correct_values_mass(self): - return [[0, 0, 0], [49, 49, 4.74920]] + return [[0, 1000, 0], [49, 1049, 4.74920]] @pytest.fixture() def correct_values_group(self): - return [[0, 0, 0, 0, 0], - [49, 49, 4.7857, 4.7048, 4.6924]] + return [[0, 1000, 0, 0, 0], + [49, 1049, 4.7857, 4.7048, 4.6924]] @pytest.fixture() def correct_values_backbone_group(self): - return [[0, 0, 0, 0, 0], - [49, 49, 4.6997, 1.9154, 2.7139]] + return [[0, 1000, 0, 0, 0], + [49, 1049, 4.6997, 1.9154, 2.7139]] def test_progress_meter(self, capsys, universe): @@ -217,7 +217,7 @@ def test_rmsd_atomgroup_selections(self, universe): def test_rmsd_single_frame(self, universe): RMSD = MDAnalysis.analysis.rms.RMSD(universe, select='name CA', start=5, stop=6).run() - single_frame = [[5, 5, 0.91544906]] + single_frame = [[5, 1005, 0.91544906]] assert_almost_equal(RMSD.rmsd, single_frame, 4, err_msg="error: rmsd profile should match" + "test values") diff --git a/testsuite/MDAnalysisTests/coordinates/test_dcd.py b/testsuite/MDAnalysisTests/coordinates/test_dcd.py index 593993b6df4..1b29b2724ce 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_dcd.py +++ b/testsuite/MDAnalysisTests/coordinates/test_dcd.py @@ -80,6 +80,20 @@ def test_with_statement(self): np.arange(0, N), err_msg="with_statement: DCDReader does not read all frames") + def test_set_time(self): + u = mda.Universe(PSF, DCD) + assert_almost_equal(u.trajectory.time, 1000, decimal=4) + + +@pytest.mark.parametrize('istart', (0, 1, 2, 3)) +def test_write_istart(universe_dcd, tmpdir, istart): + u = universe_dcd + outfile = str(tmpdir.join('test.dcd')) + with mda.Writer(outfile, u.atoms.n_atoms, istart=istart) as w: + w.write(u.atoms) + u = mda.Universe(PSF, outfile) + assert u.trajectory._file.header['istart'] == istart + class TestDCDWriter(BaseWriterTest): @staticmethod @@ -107,6 +121,7 @@ def test_write_random_unitcell(tmpdir): decimal=5) + ################ # Legacy tests # ################ @@ -213,7 +228,7 @@ def test_reader_set_dt(): dt = 100 frame = 3 u = mda.Universe(PSF, DCD, dt=dt) - assert_almost_equal(u.trajectory[frame].time, frame*dt, + assert_almost_equal(u.trajectory[frame].time, (frame + 1000)*dt, err_msg="setting time step dt={0} failed: " "actually used dt={1}".format( dt, u.trajectory._ts_kwargs['dt'])) @@ -221,14 +236,14 @@ def test_reader_set_dt(): err_msg="trajectory.dt does not match set dt") -@pytest.mark.parametrize("ext, decimal", (("dcd", 5), +@pytest.mark.parametrize("ext, decimal", (("dcd", 4), ("xtc", 3))) def test_writer_dt(tmpdir, ext, decimal): dt = 5.0 # set time step to 5 ps universe_dcd = mda.Universe(PSF, DCD, dt=dt) t = universe_dcd.trajectory outfile = str(tmpdir.join("test.{}".format(ext))) - with mda.Writer(outfile, n_atoms=t.n_atoms, dt=dt) as W: + with mda.Writer(outfile, n_atoms=t.n_atoms, dt=dt, istart=t._file.header['istart']) as W: for ts in universe_dcd.trajectory: W.write(universe_dcd.atoms) @@ -237,7 +252,7 @@ def test_writer_dt(tmpdir, ext, decimal): (uw.trajectory.n_frames - 1) * dt, decimal) times = np.array([uw.trajectory.time for ts in uw.trajectory]) frames = np.array([ts.frame for ts in uw.trajectory]) - assert_array_almost_equal(times, frames * dt, decimal) + assert_array_almost_equal(times, (frames + 1000) * dt, decimal) @pytest.mark.parametrize("ext, decimal", (("dcd", 5), @@ -284,16 +299,6 @@ def test_writer_trajectory_no_natoms(tmpdir, universe_dcd): universe_dcd.trajectory.Writer("foo.dcd") -@pytest.mark.parametrize('istart', (0, 1, 2, 3)) -def test_write_istart(universe_dcd, tmpdir, istart): - u = universe_dcd - outfile = str(tmpdir.join('test.dcd')) - with mda.Writer(outfile, u.atoms.n_atoms, istart=istart) as w: - w.write(u.atoms) - u = mda.Universe(PSF, outfile) - assert u.trajectory._file.header['istart'] == istart - - class RefCHARMMtriclinicDCD(object): topology = PSF_TRICLINIC trajectory = DCD_TRICLINIC diff --git a/testsuite/MDAnalysisTests/coordinates/test_memory.py b/testsuite/MDAnalysisTests/coordinates/test_memory.py index 9e95e0cd396..49da54ae12c 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_memory.py +++ b/testsuite/MDAnalysisTests/coordinates/test_memory.py @@ -75,6 +75,8 @@ def reader(self, trajectory): def iter_ts(self, i): ts = self.universe.trajectory[i] + # correct time because memory reader doesn't read the correct time + ts.time = ts.frame * self.dt return ts diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index 64e978a0adf..58dbbba9591 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -468,12 +468,11 @@ def test_slicing_step_with_start_stop(self): def test_slicing_step_dt(self): universe = MDAnalysis.Universe(PDB_small, DCD) - times = [ts.time for ts in universe.trajectory] + dt = universe.trajectory.dt universe.transfer_to_memory(step=2) - times2 = [ts.time for ts in universe.trajectory] - assert_almost_equal(times[::2], times2, - err_msg="Unexpected in-memory timestep: " - + "dt not updated with step information") + assert_almost_equal(dt * 2, universe.trajectory.dt, + err_msg="Unexpected in-memory timestep: " + + "dt not updated with step information") def test_slicing_negative_start(self): universe = MDAnalysis.Universe(PDB_small, DCD) From b307e733e51f1519c779916f60617bca2cc8dab7 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Sat, 3 Feb 2018 09:27:15 +0100 Subject: [PATCH 4/6] update changelog --- package/CHANGELOG | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index f07162e8525..1dde3dc1b35 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -13,7 +13,7 @@ The rules for this file: * release numbers follow "Semantic Versioning" http://semver.org ------------------------------------------------------------------------------ -mm/dd/18 richardjgowers, palnabarun, bieniekmateusz +mm/dd/18 richardjgowers, palnabarun, bieniekmateusz, kain88-de * 0.17.1 @@ -26,6 +26,8 @@ Fixes (Issue #1759) * AtomGroup.dimensions now strictly returns a copy (Issue #1582) * lib.distances.transform_StoR now checks input type (Issue #1699) + * libdcd now writes correct length of remark section (Issue #1701) + * DCDReader now reports the correct time based on istart information (PR #1767) 01/24/18 richardjgowers, rathann, orbeckst, tylerjereddy, mtiberti, kain88-de, From 75b69bc839de086a385ef306e88280b73011bc90 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Sun, 4 Feb 2018 20:39:43 +0000 Subject: [PATCH 5/6] Update test_dcd.py --- testsuite/MDAnalysisTests/coordinates/test_dcd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_dcd.py b/testsuite/MDAnalysisTests/coordinates/test_dcd.py index 1b29b2724ce..87b0c7c66d7 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_dcd.py +++ b/testsuite/MDAnalysisTests/coordinates/test_dcd.py @@ -82,7 +82,7 @@ def test_with_statement(self): def test_set_time(self): u = mda.Universe(PSF, DCD) - assert_almost_equal(u.trajectory.time, 1000, decimal=4) + assert_almost_equal(u.trajectory.time, 1000, decimal=3) @pytest.mark.parametrize('istart', (0, 1, 2, 3)) From 2c55dcd07a1c614ddeb8b7d77462e74ca69380e3 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Mon, 5 Feb 2018 14:55:08 +0100 Subject: [PATCH 6/6] proper null termination of remark string this might cause issues in other c libraries otherwise. document and fix null termination --- package/MDAnalysis/lib/formats/include/readdcd.h | 3 +++ package/MDAnalysis/lib/formats/libdcd.pyx | 4 +++- testsuite/MDAnalysisTests/formats/test_libdcd.py | 6 ++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/package/MDAnalysis/lib/formats/include/readdcd.h b/package/MDAnalysis/lib/formats/include/readdcd.h index 153ac4723a5..17be8901936 100644 --- a/package/MDAnalysis/lib/formats/include/readdcd.h +++ b/package/MDAnalysis/lib/formats/include/readdcd.h @@ -739,6 +739,9 @@ int write_dcdheader(fio_fd fd, const char *remarks, int N, fio_write_int32(fd, 3); /* the number of 80 character title strings */ strncpy(title_string, remarks, 240); + // Enforce null-termination for long remark strings. + // Not a problem for MDAnalysis but maybe for other readers. + title_string[239] = '\0'; WRITE(fd, title_string, 240); fio_write_int32(fd, 244); diff --git a/package/MDAnalysis/lib/formats/libdcd.pyx b/package/MDAnalysis/lib/formats/libdcd.pyx index 932d03ffdc1..be8469e3bb8 100644 --- a/package/MDAnalysis/lib/formats/libdcd.pyx +++ b/package/MDAnalysis/lib/formats/libdcd.pyx @@ -468,7 +468,8 @@ cdef class DCDFile: Parameters ---------- remarks : str - remarks of DCD file. Shouldn't be more then 240 characters (ASCII) + remarks of DCD file. Writes up to 239 characters (ASCII). The + character 240 will be the null terminator natoms : int number of atoms to write istart : int @@ -479,6 +480,7 @@ cdef class DCDFile: integrator time step. The time for 1 frame is nsavc * delta is_periodic : bool write unitcell information. Also pretends that file was written by CHARMM 24 + """ if not self.is_open: raise IOError("No file open") diff --git a/testsuite/MDAnalysisTests/formats/test_libdcd.py b/testsuite/MDAnalysisTests/formats/test_libdcd.py index 3b13e2ffa6a..852c9701032 100644 --- a/testsuite/MDAnalysisTests/formats/test_libdcd.py +++ b/testsuite/MDAnalysisTests/formats/test_libdcd.py @@ -318,7 +318,7 @@ def write_dcd(in_name, out_name, remarks='testing', header=None): @given(remarks=strategies.text( alphabet=string.printable, min_size=0, - max_size=240)) # handle the printable ASCII strings + max_size=239)) # handle the printable ASCII strings @example(remarks='') def test_written_remarks_property(remarks, tmpdir, dcd): # property based testing for writing of a wide range of string @@ -327,7 +327,7 @@ def test_written_remarks_property(remarks, tmpdir, dcd): header = dcd.header header['remarks'] = remarks write_dcd(DCD, testfile, header=header) - expected_remarks = remarks[:240] + expected_remarks = remarks with DCDFile(testfile) as f: assert f.header['remarks'] == expected_remarks @@ -340,6 +340,8 @@ def written_dcd(tmpdir_factory): testfile = str(testfile) write_dcd(DCD, testfile) Result = namedtuple("Result", "testfile, header, orgfile") + # throw away last char we didn't save due to null termination + header['remarks'] = header['remarks'][:-1] return Result(testfile, header, DCD)