add hashes and rearrange mult handling (#343)

* add hashes and rearrange mult handling * more frag mult testing * foramt * add some float charges tests
MolSSI · Sep 18, 2024 · 936407e · 936407e
1 parent b687499
commit 936407e
Show file tree

Hide file tree

Showing 6 changed files with 322 additions and 113 deletions.
diff --git a/qcelemental/models/molecule.py b/qcelemental/models/molecule.py
@@ -407,14 +407,28 @@ def _populate_real(cls, v, values, **kwargs):
             v = np.array([True for _ in range(n)])
         return v
 
-    @validator("fragment_charges_", "fragment_multiplicities_")
+    @validator("fragment_charges_")
     def _must_be_n_frag(cls, v, values, **kwargs):
         if "fragments_" in values and values["fragments_"] is not None:
             n = len(values["fragments_"])
             if len(v) != n:
-                raise ValueError(
-                    "Fragment Charges and Fragment Multiplicities must be same number of entries as Fragments"
-                )
+                raise ValueError("Fragment Charges must be same number of entries as Fragments")
+        return v
+
+    @validator("fragment_multiplicities_")
+    def _must_be_n_frag_mult(cls, v, values, **kwargs):
+        if "fragments_" in values and values["fragments_"] is not None:
+            n = len(values["fragments_"])
+            if len(v) != n:
+                raise ValueError("Fragment Multiplicities must be same number of entries as Fragments")
+        if any([m < 1.0 for m in v]):
+            raise ValueError(f"Fragment Multiplicity must be positive: {v}")
+        return v
+
+    @validator("molecular_multiplicity")
+    def _int_if_possible(cls, v, values, **kwargs):
+        if v < 1.0:
+            raise ValueError("Molecular Multiplicity must be positive")
         return v
 
     @property

diff --git a/qcelemental/molparse/chgmult.py b/qcelemental/molparse/chgmult.py
@@ -300,6 +300,19 @@ def validate_and_fill_chgmult(
     log_brief = verbose >= 2  # TODO: Move back to 1
     text = []
 
+    def int_if_possible(val):
+        if isinstance(val, float) and val.is_integer():
+            return int(val)
+        else:
+            return val
+
+    molecular_multiplicity = int_if_possible(molecular_multiplicity)
+    fragment_multiplicities = [int_if_possible(m) for m in fragment_multiplicities]
+    if (molecular_multiplicity and molecular_multiplicity < 1.0) or any(m < 1.0 for m in fragment_multiplicities if m):
+        raise ValidationError(
+            f"validate_and_fill_chgmult(): Multiplicity must be positive. m: {molecular_multiplicity}, fm: {fragment_multiplicities}"
+        )
+
     felez = np.split(zeff, fragment_separators)
     nfr = len(felez)
     if log_full:

diff --git a/qcelemental/molparse/from_arrays.py b/qcelemental/molparse/from_arrays.py
@@ -753,12 +753,15 @@ def validate_and_fill_fragments(nat, fragment_separators=None, fragment_charges=
 
         if fragment_multiplicities is None:
             frm = [None] * nfr
-        elif all(f is None or (isinstance(f, (int, np.integer)) and f >= 1) for f in fragment_multiplicities):
-            frm = fragment_multiplicities
         else:
-            raise ValidationError(
-                """fragment_multiplicities not among None or positive integer: {}""".format(fragment_multiplicities)
-            )
+            # positive-ness checks and integer-if-possible casting now deferred to validate_and_fill_chgmult()
+            #   to match molecular_multiplicities handling
+            try:
+                frm = [(f if f is None else float(f)) for f in fragment_multiplicities]
+            except TypeError:
+                raise ValidationError(
+                    """fragment_multiplicities not among None or float: {}""".format(fragment_charges)
+                )
 
     if not (len(frc) == len(frm) == len(frs) + 1):
         raise ValidationError(

diff --git a/qcelemental/tests/test_molecule.py b/qcelemental/tests/test_molecule.py
@@ -367,16 +367,20 @@ def test_water_orient():
 
     # Make sure the fragments match
     assert frag_0.get_hash() == frag_1.get_hash()
+    assert frag_0.get_hash() == "d0b499739f763e8d3a5556b4ddaeded6a148e4d5"
 
     # Make sure the complexes match
     frag_0_1 = mol.get_fragment(0, 1, orient=True, group_fragments=True)
     frag_1_0 = mol.get_fragment(1, 0, orient=True, group_fragments=True)
     assert frag_0_1.get_hash() == frag_1_0.get_hash()
+    assert frag_0_1.get_hash() == "bd23a8a5e48a3a6a32011559fdddc958bb70343b"
 
     # Fragments not reordered, should be different molecules.
     frag_0_1 = mol.get_fragment(0, 1, orient=True, group_fragments=False)
     frag_1_0 = mol.get_fragment(1, 0, orient=True, group_fragments=False)
     assert frag_0_1.get_hash() != frag_1_0.get_hash()
+    assert frag_0_1.get_hash() == "bd23a8a5e48a3a6a32011559fdddc958bb70343b"
+    assert frag_1_0.get_hash() == "9ed8bdc4ae559c20816d65225fdb1ae3c29d149f"
 
     # These are identical molecules, but should be different with ghost
     mol = Molecule.from_data(
@@ -399,6 +403,7 @@ def test_water_orient():
     # Make sure the fragments match
     assert frag_0.molecular_multiplicity == 1
     assert frag_0.get_hash() == frag_1.get_hash()
+    assert frag_0.get_hash() == "77b272802d61b578b1c65bb87747a89e53e015a7"
 
     # Make sure the complexes match
     frag_0_1 = mol.get_fragment(0, 1, orient=True)
@@ -407,6 +412,8 @@ def test_water_orient():
     # Ghost fragments should prevent overlap
     assert frag_0_1.molecular_multiplicity == 1
     assert frag_0_1.get_hash() != frag_1_0.get_hash()
+    assert frag_0_1.get_hash() == "4a4cd4d0ab0eef8fed2221fb692c3b1fbf4834de"
+    assert frag_1_0.get_hash() == "4cc0b30f9f50dd85f4f2036a683865bf17ded803"
 
 
 def test_molecule_errors_extra():
@@ -522,10 +529,12 @@ def test_get_fragment(group_fragments, orient):
         assert dimers[0].get_hash() != dimers[3].get_hash()  # atoms out of order
         assert dimers[1].get_hash() != dimers[4].get_hash()  # atoms out of order
         assert dimers[2].get_hash() == dimers[5].get_hash()
+        assert dimers[5].get_hash() == "f1d6551f95ce9467dbcce7c48e11bb98d0f1fb98"
     elif not group_fragments and not orient:
         assert dimers[0].get_hash() == dimers[3].get_hash()
         assert dimers[1].get_hash() == dimers[4].get_hash()
         assert dimers[2].get_hash() == dimers[5].get_hash()
+        assert dimers[5].get_hash() == "1bd9100e99748a0c34b01cef558ea5cf4ae6ab85"
     else:
         assert 0
 
@@ -541,11 +550,13 @@ def test_get_fragment(group_fragments, orient):
         assert ghdimers[0].get_hash() != ghdimers[3].get_hash()  # diff atoms ghosted
         assert ghdimers[1].get_hash() != ghdimers[4].get_hash()  # diff atoms ghosted
         assert ghdimers[2].get_hash() == ghdimers[5].get_hash()
+        assert ghdimers[5].get_hash() == "bd23a8a5e48a3a6a32011559fdddc958bb70343b"
     elif not group_fragments and not orient:
         assert ghdimers[0].get_hash() != ghdimers[3].get_hash()  # diff atoms ghosted
         assert ghdimers[1].get_hash() != ghdimers[4].get_hash()  # diff atoms ghosted
         assert ghdimers[2].get_hash() != ghdimers[5].get_hash()  # real pattern different
         assert not np.allclose(ghdimers[2].real, ghdimers[5].real)
+        assert ghdimers[5].get_hash() == "9d1fd57e90735a47af4156e1d72b7e8e78fb44eb"
     else:
         assert 0
 
@@ -564,6 +575,7 @@ def test_molecule_repeated_hashing():
     )
 
     h1 = mol.get_hash()
+    assert h1 == "7e604937e8a0c8e4c6426906e25b3002f785b1fc"
     assert mol.get_molecular_formula() == "H2O2"
 
     mol2 = Molecule(orient=False, **mol.dict())
@@ -779,3 +791,125 @@ def test_extras():
 
     mol = qcel.models.Molecule(symbols=["He"], geometry=[0, 0, 0], extras={"foo": "bar"})
     assert mol.extras["foo"] == "bar"
+
+
+_ref_mol_multiplicity_hash = {
+    "singlet": "b3855c64",
+    "triplet": "7caca87a",
+    "disinglet": "83a85546",
+    "ditriplet": "71d6ba82",
+}
+
+
+@pytest.mark.parametrize(
+    "mult_in,mult_store,validate,exp_hash",
+    [
+        pytest.param(3, 3, False, "triplet"),
+        pytest.param(3, 3, True, "triplet"),
+        # 3.1 -> 3 (validate=False) below documents the present bad behavior where a float mult
+        #   simply gets cast to int with no error. This will change soon. The validate=True throws a
+        #   nonspecific error that at least mentions type.
+        pytest.param(3.1, 3, False, "triplet"),
+        pytest.param(3.0, 3, False, "triplet"),
+        pytest.param(3.0, 3, True, "triplet"),
+        pytest.param(1, 1, False, "singlet"),
+        pytest.param(1, 1, True, "singlet"),
+        pytest.param(None, 1, False, "singlet"),
+        pytest.param(None, 1, True, "singlet"),
+        # fmt: off
+        pytest.param(3., 3, False, "triplet"),
+        pytest.param(3., 3, True, "triplet"),
+        # fmt: on
+    ],
+)
+def test_mol_multiplicity_types(mult_in, mult_store, validate, exp_hash):
+    # validate=False passes through pydantic validators. =True passes through molparse.
+
+    mol_args = {"symbols": ["He"], "geometry": [0, 0, 0], "validate": validate}
+    if mult_in is not None:
+        mol_args["molecular_multiplicity"] = mult_in
+
+    mol = qcel.models.Molecule(**mol_args)
+
+    assert mult_store == mol.molecular_multiplicity
+    assert type(mult_store) is type(mol.molecular_multiplicity)
+    assert mol.get_hash()[:8] == _ref_mol_multiplicity_hash[exp_hash]
+
+
+@pytest.mark.parametrize(
+    "mult_in,validate,error",
+    [
+        pytest.param(-3, False, "Multiplicity must be positive"),
+        pytest.param(-3, True, "Multiplicity must be positive"),
+    ],
+)
+def test_mol_multiplicity_types_errors(mult_in, validate, error):
+    mol_args = {"symbols": ["He"], "geometry": [0, 0, 0], "validate": validate}
+    if mult_in is not None:
+        mol_args["molecular_multiplicity"] = mult_in
+
+    with pytest.raises((ValueError, qcel.ValidationError)) as e:
+        qcel.models.Molecule(**mol_args)
+
+    assert error in str(e.value)
+
+
+@pytest.mark.parametrize(
+    "mol_mult_in,mult_in,mult_store,validate,exp_hash",
+    [
+        pytest.param(5, [3, 3], [3, 3], False, "ditriplet"),
+        pytest.param(5, [3, 3], [3, 3], True, "ditriplet"),
+        # 3.1 -> 3 (validate=False) below documents the present bad behavior where a float mult
+        #   simply gets cast to int with no error. This will change soon. The validate=True throws a
+        #   irreconcilable error.
+        pytest.param(5, [3.1, 3.4], [3, 3], False, "ditriplet"),
+        # fmt: off
+        pytest.param(5, [3.0, 3.], [3, 3], False, "ditriplet"),
+        pytest.param(5, [3.0, 3.], [3, 3], True, "ditriplet"),
+        # fmt: on
+        pytest.param(1, [1, 1], [1, 1], False, "disinglet"),
+        pytest.param(1, [1, 1], [1, 1], True, "disinglet"),
+        # None in frag_mult not allowed for validate=False
+        pytest.param(1, [None, None], [1, 1], True, "disinglet"),
+    ],
+)
+def test_frag_multiplicity_types(mol_mult_in, mult_in, mult_store, validate, exp_hash):
+    # validate=False passes through pydantic validators. =True passes through molparse.
+
+    mol_args = {
+        "symbols": ["He", "Ne"],
+        "geometry": [0, 0, 0, 2, 0, 0],
+        "fragments": [[0], [1]],
+        "validate": validate,
+        # below three passed in so hashes match btwn validate=T/F. otherwise, validate=False never
+        #   populates these fields
+        "molecular_charge": 0,
+        "fragment_charges": [0, 0],
+        "molecular_multiplicity": mol_mult_in,
+    }
+    if mult_in is not None:
+        mol_args["fragment_multiplicities"] = mult_in
+
+    mol = qcel.models.Molecule(**mol_args)
+
+    assert mult_store == mol.fragment_multiplicities
+    assert type(mult_store) is type(mol.fragment_multiplicities)
+    assert mol.get_hash()[:8] == _ref_mol_multiplicity_hash[exp_hash]
+
+
+@pytest.mark.parametrize(
+    "mult_in,validate,error",
+    [
+        pytest.param([-3, 1], False, "Multiplicity must be positive"),
+        pytest.param([-3, 1], True, "Multiplicity must be positive"),
+    ],
+)
+def test_frag_multiplicity_types_errors(mult_in, validate, error):
+    mol_args = {"symbols": ["He", "Ne"], "geometry": [0, 0, 0, 2, 0, 0], "fragments": [[0], [1]], "validate": validate}
+    if mult_in is not None:
+        mol_args["fragment_multiplicities"] = mult_in
+
+    with pytest.raises((ValueError, qcel.ValidationError)) as e:
+        qcel.models.Molecule(**mol_args)
+
+    assert error in str(e.value)
diff --git a/qcelemental/tests/test_molparse_from_string.py b/qcelemental/tests/test_molparse_from_string.py
@@ -1666,7 +1666,8 @@ def test_badmult_error():
             variables=[("bond", "3")],
         )
 
-    assert "fragment_multiplicities not among None or positive integer" in str(e.value)
+    # formerly: assert "fragment_multiplicities not among None or positive integer" in str(e.value)
+    assert "Multiplicity must be positive" in str(e.value)
 
 
 def test_badchg_error():