Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sublattice rework #179

Merged
merged 11 commits into from
Mar 30, 2022
5 changes: 3 additions & 2 deletions smol/cofe/space/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,10 @@ def __init__(self, composition):
composition (Composition):
Composition object that specifies the site space.
"""
if composition.num_atoms <= 0 or composition.num_atoms > 1:
# num_atoms = 0 identified as vacancy-only site-space.
if composition.num_atoms < 0 or composition.num_atoms > 1:
raise ValueError(
"Number of atoms in Composition must be in (0, 1]"
"Number of atoms in Composition must be in [0, 1]"
f" got {composition.num_atoms}!"
)

Expand Down
56 changes: 44 additions & 12 deletions smol/moca/ensemble/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,29 +29,23 @@ class Ensemble(ABC):

valid_mcmc_steps = None # add this in derived classes

def __init__(self, processor, sublattices=None, inactive_sublattices=None):
def __init__(self, processor, sublattices=None):
"""Initialize class instance.

Args:
processor (Processor):
A processor that can compute the change in a property given
a set of flips.
sublattices (list of Sublattice): optional
list of Lattice objects representing sites in the processor
supercell with same site spaces.
inactive_sublattices (list of InactiveSublattice): optional
list of Lattice objects representing sites in the processor
list of Sublattice objects representing sites in the processor
supercell with same site spaces.
"""
if sublattices is None:
sublattices = processor.get_sublattices()
if inactive_sublattices is None:
inactive_sublattices = processor.get_inactive_sublattices()
self.num_energy_coefs = len(processor.coefs)
self.thermo_boundaries = {} # not pretty way to save general info
self._processor = processor
self._sublattices = sublattices
self._inact_sublattices = inactive_sublattices

@classmethod
def from_cluster_expansion(cls, cluster_expansion, supercell_matrix, **kwargs):
Expand Down Expand Up @@ -121,13 +115,13 @@ def processor(self):
# all sites are included.
@property
def sublattices(self):
"""Get list of sublattices included in ensemble."""
"""Get list of sub-lattices included in ensemble."""
return self._sublattices

@property
def inactive_sublattices(self):
"""Get list of sublattices included in ensemble."""
return self._inact_sublattices
def active_sublattices(self):
"""Get list of active sub-lattices."""
return [s for s in self.sublattices if s.is_active]

@property
def restricted_sites(self):
Expand All @@ -137,6 +131,44 @@ def restricted_sites(self):
sites += sublattice.restricted_sites
return sites

@property
def species(self):
"""Species on active sublattices.

These are minimal species required in setting chemical potentials.
"""
return list(
{sp for sublatt in self.active_sublattices for sp in sublatt.site_space}
)

def split_sublattice_by_species(self, sublattice_id, occu, species_in_partitions):
"""Split a sub-lattice in system by its occupied species.

An example use case might be simulating topotactic Li extraction
and insertion, where we want to consider Li/Vac, TM and O as
different sub-lattices that can not be mixed by swapping.

Args:
sublattice_id (int):
The index of sub-lattice to split in self.sublattices.
occu (np.ndarray[int]):
An occupancy array to reference with.
species_in_partitions (List[List[int|Species|Vacancy|Element|str]]):
Each sub-list contains a few species or encodings of species in
the site space to be grouped as a new sub-lattice, namely,
sites with occu[sites] == specie in the sub-list, will be
used to initialize a new sub-lattice.
Sub-lists will be pre-sorted to ascending order.
"""
splits = self.sublattices[sublattice_id].split_by_species(
occu, species_in_partitions
)
self._sublattices = (
self._sublattices[:sublattice_id]
+ splits
+ self._sublattices[sublattice_id + 1 :]
)

@property
@abstractmethod
def natural_parameters(self):
Expand Down
88 changes: 55 additions & 33 deletions smol/moca/ensemble/semigrand.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,7 @@ class SemiGrandEnsemble(Ensemble, MSONable):

valid_mcmc_steps = ("flip",)

def __init__(
self,
processor,
chemical_potentials,
sublattices=None,
inactive_sublattices=None,
):
def __init__(self, processor, chemical_potentials, sublattices=None):
"""Initialize MuSemiGrandEnsemble.

Args:
Expand All @@ -57,25 +51,15 @@ def __init__(
List of Sublattice objects representing sites in the processor
supercell with same site spaces.
"""
super().__init__(
processor,
sublattices=sublattices,
inactive_sublattices=inactive_sublattices,
)
super().__init__(processor, sublattices=sublattices)
self._params = np.append(self.processor.coefs, -1.0)
# check that species are valid
chemical_potentials = {
get_species(k): v for k, v in chemical_potentials.items()
}
species = {sp for sps in processor.unique_site_spaces for sp in sps}
for spec in chemical_potentials.keys():
if spec not in species:
raise ValueError(
f"Species {spec} in provided chemical "
"potentials is not an allowed species in the "
f"system: {species}"
)
for spec in species:
# Excessive species not appeared on active sub-lattices
# will be dropped.
for spec in self.species:
if spec not in chemical_potentials.keys():
raise ValueError(
f"Species {spec} was not assigned a chemical "
Expand All @@ -86,10 +70,10 @@ def __init__(
self._dfeatures = np.empty(len(processor.coefs) + 1)
self._features = np.empty(len(processor.coefs) + 1)

self._mus = chemical_potentials
self._mu_table = self._build_mu_table(chemical_potentials)
self._mus = {k: v for k, v in chemical_potentials.items() if k in self.species}
self._mu_table = self._build_mu_table(self._mus)
self.thermo_boundaries = {
"chemical-potentials": {str(k): v for k, v in chemical_potentials.items()}
"chemical-potentials": {str(k): v for k, v in self._mus.items()}
}

@property
Expand All @@ -116,15 +100,18 @@ def chemical_potentials(self, value):
"you are using has only string keys or only Species "
"objects as keys."
)
value = {get_species(k): v for k, v in value.items()}
if set(value.keys()) != set(self._mus.keys()):
value = {get_species(k): v for k, v in value.items() if k in self.species}
if set(value.keys()) != set(self.species):
raise ValueError(
"Chemical potentials given are missing species. "
"Values must be given for each of the following:"
f" {self._mus.keys()}"
f" {self.species}"
)
self._mus = value
self._mu_table = self._build_mu_table(value)
self.thermo_boundaries = {
"chemical-potentials": {str(k): v for k, v in self._mus.items()}
}

def compute_feature_vector(self, occupancy):
"""Compute the feature vector for a given occupancy.
Expand Down Expand Up @@ -170,6 +157,34 @@ def compute_chemical_work(self, occupancy):
self._mu_table[site][species] for site, species in enumerate(occupancy)
)

def split_sublattice_by_species(self, sublattice_id, occu, species_in_partitions):
"""Split a sub-lattice in system by its occupied species.

An example use case might be simulating topotactic Li extraction
and insertion, where we want to consider Li/Vac, TM and O as
different sub-lattices that can not be mixed by swapping.

In the grand canonical ensemble, the mu table will also be updated
after split.

Args:
sublattice_id (int):
The index of sub-lattice to split in self.sublattices.
occu (np.ndarray[int]):
An occupancy array to reference with.
species_in_partitions (List[List[int|Species|Vacancy|Element|str]]):
Each sub-list contains a few species or encodings of species in
the site space to be grouped as a new sub-lattice, namely,
sites with occu[sites] == specie in the sub-list, will be
used to initialize a new sub-lattice.
Sub-lists will be pre-sorted to ascending order.
"""
super().split_sublattice_by_species(sublattice_id, occu, species_in_partitions)
# Species in active sub-lattices may change after split.
# Need to reset and rebuild mu table.
new_chemical_potentials = {spec: self._mus[spec] for spec in self.species}
self.chemical_potentials = new_chemical_potentials

def _build_mu_table(self, chemical_potentials):
"""Build an array for chemical potentials for all sites in system.

Expand All @@ -180,13 +195,15 @@ def _build_mu_table(self, chemical_potentials):
be given values of 0. Also rows representing sites with not partial
occupancy will have all 0 values and should never be used.
"""
num_cols = max(
len(site_space) for site_space in self.processor.unique_site_spaces
)
# Mu table should be built with ensemble, rather than processor data.
# Otherwise you may get wrong species encoding if the sub-lattices are
# split.
num_cols = max(max(sl.encoding) for sl in self.sublattices) + 1
# Sublattice can only be initialized as default, or splitted from default.
table = np.zeros((self.num_sites, num_cols))
for sublatt in self.sublattices:
for sublatt in self.active_sublattices:
ordered_pots = [chemical_potentials[sp] for sp in sublatt.site_space]
table[sublatt.sites, : len(ordered_pots)] = ordered_pots
table[sublatt.sites[:, None], sublatt.encoding] = ordered_pots
return table

def as_dict(self):
Expand Down Expand Up @@ -220,8 +237,13 @@ def from_dict(cls, d):
else:
spec = Element(spec["element"])
chemical_potentials[spec] = chem_pot

sublatts = d.get("sublattices") # keep backwards compatibility
if sublatts is not None:
sublatts = [Sublattice.from_dict(sl_d) for sl_d in sublatts]

return cls(
Processor.from_dict(d["processor"]),
chemical_potentials=chemical_potentials,
sublattices=[Sublattice.from_dict(s) for s in d["sublattices"]],
sublattices=sublatts,
)
37 changes: 11 additions & 26 deletions smol/moca/processor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pymatgen.core import PeriodicSite, Structure

from smol.cofe.space import Vacancy, get_allowed_species, get_site_spaces
from smol.moca.sublattice import InactiveSublattice, Sublattice
from smol.moca.sublattice import Sublattice
from smol.utils import get_subclasses


Expand Down Expand Up @@ -61,11 +61,11 @@ def __init__(self, cluster_subspace, supercell_matrix, coefficients):
self.coefs = self.coefs[np.newaxis]

# this can be used (maybe should) to check if a flip is valid
active_site_spaces = set(get_site_spaces(self._subspace.expansion_structure))
self.unique_site_spaces = tuple(active_site_spaces)
# and keep a record of sites with no DOFs
all_site_spaces = set(get_site_spaces(self._subspace.structure))
self.inactive_site_spaces = tuple(all_site_spaces - active_site_spaces)
site_spaces = set(get_site_spaces(self.structure))
self.unique_site_spaces = tuple(site_spaces)
self.active_site_spaces = tuple(
space for space in self.unique_site_spaces if len(space) > 1
)

self.allowed_species = get_allowed_species(self.structure)
self.size = self._subspace.num_prims_from_matrix(supercell_matrix)
Expand Down Expand Up @@ -206,6 +206,11 @@ def decode_occupancy(self, encoded_occupancy):
def get_sublattices(self):
"""Get a list of sublattices from a processor.

Initialized as the default encoding, but encoding can be changed
in Ensemble (for example, when a sub-lattice is split by occupancy,
usually seen in a de-lithiation MC). Therefore, these sub-lattices,
and self.unique_site_spaces are not always consistent with the
sub-lattices in the ensemble. Use them carefully!
Returns:
list of Sublattice
"""
Expand All @@ -223,26 +228,6 @@ def get_sublattices(self):
for site_space in self.unique_site_spaces
]

def get_inactive_sublattices(self):
"""Get a list of inactive sublattices from a processor.

Returns:
list of InactiveSublattice
"""
return [
InactiveSublattice(
site_space,
np.array(
[
i
for i, spec in enumerate(self.allowed_species)
if spec == list(site_space.keys())
]
),
)
for site_space in self.inactive_site_spaces
]

def compute_average_drift(self, iterations=1000):
"""Compute average forward and reverse drift for the given property.

Expand Down
Loading