CederGroupHub · lbluque · Mar 30, 2022 · Mar 22, 2022 · Mar 23, 2022 · Mar 26, 2022
diff --git a/smol/cofe/space/domain.py b/smol/cofe/space/domain.py
@@ -129,9 +129,10 @@ def __init__(self, composition):
             composition (Composition):
                 Composition object that specifies the site space.
         """
-        if composition.num_atoms <= 0 or composition.num_atoms > 1:
+        # num_atoms = 0 identified as vacancy-only site-space.
+        if composition.num_atoms < 0 or composition.num_atoms > 1:
             raise ValueError(
-                "Number of atoms in Composition must be in (0, 1]"
+                "Number of atoms in Composition must be in [0, 1]"
                 f" got {composition.num_atoms}!"
             )
 

diff --git a/smol/moca/ensemble/base.py b/smol/moca/ensemble/base.py
@@ -29,29 +29,23 @@ class Ensemble(ABC):
 
     valid_mcmc_steps = None  # add this in derived classes
 
-    def __init__(self, processor, sublattices=None, inactive_sublattices=None):
+    def __init__(self, processor, sublattices=None):
         """Initialize class instance.
 
         Args:
             processor (Processor):
                 A processor that can compute the change in a property given
                 a set of flips.
             sublattices (list of Sublattice): optional
-                list of Lattice objects representing sites in the processor
-                supercell with same site spaces.
-            inactive_sublattices (list of InactiveSublattice): optional
-                list of Lattice objects representing sites in the processor
+                list of Sublattice objects representing sites in the processor
                 supercell with same site spaces.
         """
         if sublattices is None:
             sublattices = processor.get_sublattices()
-        if inactive_sublattices is None:
-            inactive_sublattices = processor.get_inactive_sublattices()
         self.num_energy_coefs = len(processor.coefs)
         self.thermo_boundaries = {}  # not pretty way to save general info
         self._processor = processor
         self._sublattices = sublattices
-        self._inact_sublattices = inactive_sublattices
 
     @classmethod
     def from_cluster_expansion(cls, cluster_expansion, supercell_matrix, **kwargs):
@@ -121,13 +115,13 @@ def processor(self):
     #  all sites are included.
     @property
     def sublattices(self):
-        """Get list of sublattices included in ensemble."""
+        """Get list of sub-lattices included in ensemble."""
         return self._sublattices
 
     @property
-    def inactive_sublattices(self):
-        """Get list of sublattices included in ensemble."""
-        return self._inact_sublattices
+    def active_sublattices(self):
+        """Get list of active sub-lattices."""
+        return [s for s in self.sublattices if s.is_active]
 
     @property
     def restricted_sites(self):
@@ -137,6 +131,44 @@ def restricted_sites(self):
             sites += sublattice.restricted_sites
         return sites
 
+    @property
+    def species(self):
+        """Species on active sublattices.
+
+        These are minimal species required in setting chemical potentials.
+        """
+        return list(
+            {sp for sublatt in self.active_sublattices for sp in sublatt.site_space}
+        )
+
+    def split_sublattice_by_species(self, sublattice_id, occu, species_in_partitions):
+        """Split a sub-lattice in system by its occupied species.
+
+        An example use case might be simulating topotactic Li extraction
+        and insertion, where we want to consider Li/Vac, TM and O as
+        different sub-lattices that can not be mixed by swapping.
+
+        Args:
+            sublattice_id (int):
+                The index of sub-lattice to split in self.sublattices.
+            occu (np.ndarray[int]):
+                An occupancy array to reference with.
+            species_in_partitions (List[List[int|Species|Vacancy|Element|str]]):
+                Each sub-list contains a few species or encodings of species in
+                the site space to be grouped as a new sub-lattice, namely,
+                sites with occu[sites] == specie in the sub-list, will be
+                used to initialize a new sub-lattice.
+                Sub-lists will be pre-sorted to ascending order.
+        """
+        splits = self.sublattices[sublattice_id].split_by_species(
+            occu, species_in_partitions
+        )
+        self._sublattices = (
+            self._sublattices[:sublattice_id]
+            + splits
+            + self._sublattices[sublattice_id + 1 :]
+        )
+
     @property
     @abstractmethod
     def natural_parameters(self):

diff --git a/smol/moca/ensemble/semigrand.py b/smol/moca/ensemble/semigrand.py
@@ -38,13 +38,7 @@ class SemiGrandEnsemble(Ensemble, MSONable):
 
     valid_mcmc_steps = ("flip",)
 
-    def __init__(
-        self,
-        processor,
-        chemical_potentials,
-        sublattices=None,
-        inactive_sublattices=None,
-    ):
+    def __init__(self, processor, chemical_potentials, sublattices=None):
         """Initialize MuSemiGrandEnsemble.
 
         Args:
@@ -57,25 +51,15 @@ def __init__(
                 List of Sublattice objects representing sites in the processor
                 supercell with same site spaces.
         """
-        super().__init__(
-            processor,
-            sublattices=sublattices,
-            inactive_sublattices=inactive_sublattices,
-        )
+        super().__init__(processor, sublattices=sublattices)
         self._params = np.append(self.processor.coefs, -1.0)
         # check that species are valid
         chemical_potentials = {
             get_species(k): v for k, v in chemical_potentials.items()
         }
-        species = {sp for sps in processor.unique_site_spaces for sp in sps}
-        for spec in chemical_potentials.keys():
-            if spec not in species:
-                raise ValueError(
-                    f"Species {spec} in provided chemical "
-                    "potentials is not an allowed species in the "
-                    f"system: {species}"
-                )
-        for spec in species:
+        # Excessive species not appeared on active sub-lattices
+        # will be dropped.
+        for spec in self.species:
             if spec not in chemical_potentials.keys():
                 raise ValueError(
                     f"Species {spec} was not assigned a chemical "
@@ -86,10 +70,10 @@ def __init__(
         self._dfeatures = np.empty(len(processor.coefs) + 1)
         self._features = np.empty(len(processor.coefs) + 1)
 
-        self._mus = chemical_potentials
-        self._mu_table = self._build_mu_table(chemical_potentials)
+        self._mus = {k: v for k, v in chemical_potentials.items() if k in self.species}
+        self._mu_table = self._build_mu_table(self._mus)
         self.thermo_boundaries = {
-            "chemical-potentials": {str(k): v for k, v in chemical_potentials.items()}
+            "chemical-potentials": {str(k): v for k, v in self._mus.items()}
         }
 
     @property
@@ -116,15 +100,18 @@ def chemical_potentials(self, value):
                     "you are using has only string keys or only Species "
                     "objects as keys."
                 )
-        value = {get_species(k): v for k, v in value.items()}
-        if set(value.keys()) != set(self._mus.keys()):
+        value = {get_species(k): v for k, v in value.items() if k in self.species}
+        if set(value.keys()) != set(self.species):
             raise ValueError(
                 "Chemical potentials given are missing species. "
                 "Values must be given for each of the following:"
-                f" {self._mus.keys()}"
+                f" {self.species}"
             )
         self._mus = value
         self._mu_table = self._build_mu_table(value)
+        self.thermo_boundaries = {
+            "chemical-potentials": {str(k): v for k, v in self._mus.items()}
+        }
 
     def compute_feature_vector(self, occupancy):
         """Compute the feature vector for a given occupancy.
@@ -170,6 +157,34 @@ def compute_chemical_work(self, occupancy):
             self._mu_table[site][species] for site, species in enumerate(occupancy)
         )
 
+    def split_sublattice_by_species(self, sublattice_id, occu, species_in_partitions):
+        """Split a sub-lattice in system by its occupied species.
+
+        An example use case might be simulating topotactic Li extraction
+        and insertion, where we want to consider Li/Vac, TM and O as
+        different sub-lattices that can not be mixed by swapping.
+
+        In the grand canonical ensemble, the mu table will also be updated
+        after split.
+
+        Args:
+            sublattice_id (int):
+                The index of sub-lattice to split in self.sublattices.
+            occu (np.ndarray[int]):
+                An occupancy array to reference with.
+            species_in_partitions (List[List[int|Species|Vacancy|Element|str]]):
+                Each sub-list contains a few species or encodings of species in
+                the site space to be grouped as a new sub-lattice, namely,
+                sites with occu[sites] == specie in the sub-list, will be
+                used to initialize a new sub-lattice.
+                Sub-lists will be pre-sorted to ascending order.
+        """
+        super().split_sublattice_by_species(sublattice_id, occu, species_in_partitions)
+        # Species in active sub-lattices may change after split.
+        # Need to reset and rebuild mu table.
+        new_chemical_potentials = {spec: self._mus[spec] for spec in self.species}
+        self.chemical_potentials = new_chemical_potentials
+
     def _build_mu_table(self, chemical_potentials):
         """Build an array for chemical potentials for all sites in system.
 
@@ -180,13 +195,15 @@ def _build_mu_table(self, chemical_potentials):
         be given values of 0. Also rows representing sites with not partial
         occupancy will have all 0 values and should never be used.
         """
-        num_cols = max(
-            len(site_space) for site_space in self.processor.unique_site_spaces
-        )
+        # Mu table should be built with ensemble, rather than processor data.
+        # Otherwise you may get wrong species encoding if the sub-lattices are
+        # split.
+        num_cols = max(max(sl.encoding) for sl in self.sublattices) + 1
+        # Sublattice can only be initialized as default, or splitted from default.
         table = np.zeros((self.num_sites, num_cols))
-        for sublatt in self.sublattices:
+        for sublatt in self.active_sublattices:
             ordered_pots = [chemical_potentials[sp] for sp in sublatt.site_space]
-            table[sublatt.sites, : len(ordered_pots)] = ordered_pots
+            table[sublatt.sites[:, None], sublatt.encoding] = ordered_pots
         return table
 
     def as_dict(self):
@@ -220,8 +237,13 @@ def from_dict(cls, d):
             else:
                 spec = Element(spec["element"])
             chemical_potentials[spec] = chem_pot
+
+        sublatts = d.get("sublattices")  # keep backwards compatibility
+        if sublatts is not None:
+            sublatts = [Sublattice.from_dict(sl_d) for sl_d in sublatts]
+
         return cls(
             Processor.from_dict(d["processor"]),
             chemical_potentials=chemical_potentials,
-            sublattices=[Sublattice.from_dict(s) for s in d["sublattices"]],
+            sublattices=sublatts,
         )
diff --git a/smol/moca/processor/base.py b/smol/moca/processor/base.py
@@ -18,7 +18,7 @@
 from pymatgen.core import PeriodicSite, Structure
 
 from smol.cofe.space import Vacancy, get_allowed_species, get_site_spaces
-from smol.moca.sublattice import InactiveSublattice, Sublattice
+from smol.moca.sublattice import Sublattice
 from smol.utils import get_subclasses
 
 
@@ -61,11 +61,11 @@ def __init__(self, cluster_subspace, supercell_matrix, coefficients):
             self.coefs = self.coefs[np.newaxis]
 
         # this can be used (maybe should) to check if a flip is valid
-        active_site_spaces = set(get_site_spaces(self._subspace.expansion_structure))
-        self.unique_site_spaces = tuple(active_site_spaces)
-        # and keep a record of sites with no DOFs
-        all_site_spaces = set(get_site_spaces(self._subspace.structure))
-        self.inactive_site_spaces = tuple(all_site_spaces - active_site_spaces)
+        site_spaces = set(get_site_spaces(self.structure))
+        self.unique_site_spaces = tuple(site_spaces)
+        self.active_site_spaces = tuple(
+            space for space in self.unique_site_spaces if len(space) > 1
+        )
 
         self.allowed_species = get_allowed_species(self.structure)
         self.size = self._subspace.num_prims_from_matrix(supercell_matrix)
@@ -206,6 +206,11 @@ def decode_occupancy(self, encoded_occupancy):
     def get_sublattices(self):
         """Get a list of sublattices from a processor.
 
+        Initialized as the default encoding, but encoding can be changed
+        in Ensemble (for example, when a sub-lattice is split by occupancy,
+        usually seen in a de-lithiation MC). Therefore, these sub-lattices,
+        and self.unique_site_spaces are not always consistent with the
+        sub-lattices in the ensemble. Use them carefully!
         Returns:
             list of Sublattice
         """
@@ -223,26 +228,6 @@ def get_sublattices(self):
             for site_space in self.unique_site_spaces
         ]
 
-    def get_inactive_sublattices(self):
-        """Get a list of inactive sublattices from a processor.
-
-        Returns:
-            list of InactiveSublattice
-        """
-        return [
-            InactiveSublattice(
-                site_space,
-                np.array(
-                    [
-                        i
-                        for i, spec in enumerate(self.allowed_species)
-                        if spec == list(site_space.keys())
-                    ]
-                ),
-            )
-            for site_space in self.inactive_site_spaces
-        ]
-
     def compute_average_drift(self, iterations=1000):
         """Compute average forward and reverse drift for the given property.