diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py
index 0e7ebd6d69b..a19dda30b8a 100644
--- a/invokeai/app/invocations/flux_text_encoder.py
+++ b/invokeai/app/invocations/flux_text_encoder.py
@@ -40,7 +40,10 @@ class FluxTextEncoderInvocation(BaseInvocation):
 
     @torch.no_grad()
     def invoke(self, context: InvocationContext) -> FluxConditioningOutput:
-        t5_embeddings, clip_embeddings = self._encode_prompt(context)
+        # Note: The T5 and CLIP encoding are done in separate functions to ensure that all model references are locally
+        # scoped. This ensures that the T5 model can be freed and gc'd before loading the CLIP model (if necessary).
+        t5_embeddings = self._t5_encode(context)
+        clip_embeddings = self._clip_encode(context)
         conditioning_data = ConditioningFieldData(
             conditionings=[FLUXConditioningInfo(clip_embeds=clip_embeddings, t5_embeds=t5_embeddings)]
         )
@@ -48,12 +51,7 @@ def invoke(self, context: InvocationContext) -> FluxConditioningOutput:
         conditioning_name = context.conditioning.save(conditioning_data)
         return FluxConditioningOutput.build(conditioning_name)
 
-    def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torch.Tensor]:
-        # Load CLIP.
-        clip_tokenizer_info = context.models.load(self.clip.tokenizer)
-        clip_text_encoder_info = context.models.load(self.clip.text_encoder)
-
-        # Load T5.
+    def _t5_encode(self, context: InvocationContext) -> torch.Tensor:
         t5_tokenizer_info = context.models.load(self.t5_encoder.tokenizer)
         t5_text_encoder_info = context.models.load(self.t5_encoder.text_encoder)
 
@@ -70,6 +68,15 @@ def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torc
 
             prompt_embeds = t5_encoder(prompt)
 
+        assert isinstance(prompt_embeds, torch.Tensor)
+        return prompt_embeds
+
+    def _clip_encode(self, context: InvocationContext) -> torch.Tensor:
+        clip_tokenizer_info = context.models.load(self.clip.tokenizer)
+        clip_text_encoder_info = context.models.load(self.clip.text_encoder)
+
+        prompt = [self.prompt]
+
         with (
             clip_text_encoder_info as clip_text_encoder,
             clip_tokenizer_info as clip_tokenizer,
@@ -81,6 +88,5 @@ def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torc
 
             pooled_prompt_embeds = clip_encoder(prompt)
 
-        assert isinstance(prompt_embeds, torch.Tensor)
         assert isinstance(pooled_prompt_embeds, torch.Tensor)
-        return prompt_embeds, pooled_prompt_embeds
+        return pooled_prompt_embeds
diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py
index b6ff06c67bf..248122d8cdf 100644
--- a/invokeai/app/invocations/flux_text_to_image.py
+++ b/invokeai/app/invocations/flux_text_to_image.py
@@ -58,13 +58,7 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
 
     @torch.no_grad()
     def invoke(self, context: InvocationContext) -> ImageOutput:
-        # Load the conditioning data.
-        cond_data = context.conditioning.load(self.positive_text_conditioning.conditioning_name)
-        assert len(cond_data.conditionings) == 1
-        flux_conditioning = cond_data.conditionings[0]
-        assert isinstance(flux_conditioning, FLUXConditioningInfo)
-
-        latents = self._run_diffusion(context, flux_conditioning.clip_embeds, flux_conditioning.t5_embeds)
+        latents = self._run_diffusion(context)
         image = self._run_vae_decoding(context, latents)
         image_dto = context.images.save(image=image)
         return ImageOutput.build(image_dto)
@@ -72,12 +66,20 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
     def _run_diffusion(
         self,
         context: InvocationContext,
-        clip_embeddings: torch.Tensor,
-        t5_embeddings: torch.Tensor,
     ):
-        transformer_info = context.models.load(self.transformer.transformer)
         inference_dtype = torch.bfloat16
 
+        # Load the conditioning data.
+        cond_data = context.conditioning.load(self.positive_text_conditioning.conditioning_name)
+        assert len(cond_data.conditionings) == 1
+        flux_conditioning = cond_data.conditionings[0]
+        assert isinstance(flux_conditioning, FLUXConditioningInfo)
+        flux_conditioning = flux_conditioning.to(dtype=inference_dtype)
+        t5_embeddings = flux_conditioning.t5_embeds
+        clip_embeddings = flux_conditioning.clip_embeds
+
+        transformer_info = context.models.load(self.transformer.transformer)
+
         # Prepare input noise.
         x = get_noise(
             num_samples=1,
@@ -88,24 +90,19 @@ def _run_diffusion(
             seed=self.seed,
         )
 
-        img, img_ids = prepare_latent_img_patches(x)
+        x, img_ids = prepare_latent_img_patches(x)
 
         is_schnell = "schnell" in transformer_info.config.config_path
 
         timesteps = get_schedule(
             num_steps=self.num_steps,
-            image_seq_len=img.shape[1],
+            image_seq_len=x.shape[1],
             shift=not is_schnell,
         )
 
         bs, t5_seq_len, _ = t5_embeddings.shape
         txt_ids = torch.zeros(bs, t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device())
 
-        # HACK(ryand): Manually empty the cache. Currently we don't check the size of the model before loading it from
-        # disk. Since the transformer model is large (24GB), there's a good chance that it will OOM on 32GB RAM systems
-        # if the cache is not empty.
-        context.models._services.model_manager.load.ram_cache.make_room(24 * 2**30)
-
         with transformer_info as transformer:
             assert isinstance(transformer, Flux)
 
@@ -140,7 +137,7 @@ def step_callback() -> None:
 
             x = denoise(
                 model=transformer,
-                img=img,
+                img=x,
                 img_ids=img_ids,
                 txt=t5_embeddings,
                 txt_ids=txt_ids,
diff --git a/invokeai/backend/flux/sampling.py b/invokeai/backend/flux/sampling.py
index 19de48ae81a..7a35b0aedfb 100644
--- a/invokeai/backend/flux/sampling.py
+++ b/invokeai/backend/flux/sampling.py
@@ -111,16 +111,7 @@ def denoise(
     step_callback: Callable[[], None],
     guidance: float = 4.0,
 ):
-    dtype = model.txt_in.bias.dtype
-
-    # TODO(ryand): This shouldn't be necessary if we manage the dtypes properly in the caller.
-    img = img.to(dtype=dtype)
-    img_ids = img_ids.to(dtype=dtype)
-    txt = txt.to(dtype=dtype)
-    txt_ids = txt_ids.to(dtype=dtype)
-    vec = vec.to(dtype=dtype)
-
-    # this is ignored for schnell
+    # guidance_vec is ignored for schnell.
     guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype)
     for t_curr, t_prev in tqdm(list(zip(timesteps[:-1], timesteps[1:], strict=True))):
         t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
@@ -168,9 +159,9 @@ def prepare_latent_img_patches(latent_img: torch.Tensor) -> tuple[torch.Tensor,
         img = repeat(img, "1 ... -> bs ...", bs=bs)
 
     # Generate patch position ids.
-    img_ids = torch.zeros(h // 2, w // 2, 3, device=img.device)
-    img_ids[..., 1] = img_ids[..., 1] + torch.arange(h // 2, device=img.device)[:, None]
-    img_ids[..., 2] = img_ids[..., 2] + torch.arange(w // 2, device=img.device)[None, :]
+    img_ids = torch.zeros(h // 2, w // 2, 3, device=img.device, dtype=img.dtype)
+    img_ids[..., 1] = img_ids[..., 1] + torch.arange(h // 2, device=img.device, dtype=img.dtype)[:, None]
+    img_ids[..., 2] = img_ids[..., 2] + torch.arange(w // 2, device=img.device, dtype=img.dtype)[None, :]
     img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)
 
     return img, img_ids
diff --git a/invokeai/backend/model_manager/load/load_default.py b/invokeai/backend/model_manager/load/load_default.py
index ce9811534e8..d4e88857fa8 100644
--- a/invokeai/backend/model_manager/load/load_default.py
+++ b/invokeai/backend/model_manager/load/load_default.py
@@ -72,6 +72,7 @@ def _load_and_cache(self, config: AnyModelConfig, submodel_type: Optional[SubMod
             pass
 
         config.path = str(self._get_model_path(config))
+        self._ram_cache.make_room(self.get_size_fs(config, Path(config.path), submodel_type))
         loaded_model = self._load_model(config, submodel_type)
 
         self._ram_cache.put(
diff --git a/invokeai/backend/model_manager/load/model_cache/model_cache_base.py b/invokeai/backend/model_manager/load/model_cache/model_cache_base.py
index 012fd42d556..97fd401da03 100644
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_base.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_base.py
@@ -193,15 +193,6 @@ def get(
         """
         pass
 
-    @abstractmethod
-    def exists(
-        self,
-        key: str,
-        submodel_type: Optional[SubModelType] = None,
-    ) -> bool:
-        """Return true if the model identified by key and submodel_type is in the cache."""
-        pass
-
     @abstractmethod
     def cache_size(self) -> int:
         """Get the total size of the models currently cached."""
diff --git a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
index 482585e8e74..4b0ebbd40e5 100644
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
@@ -1,22 +1,6 @@
 # Copyright (c) 2024 Lincoln D. Stein and the InvokeAI Development team
 # TODO: Add Stalker's proper name to copyright
-"""
-Manage a RAM cache of diffusion/transformer models for fast switching.
-They are moved between GPU VRAM and CPU RAM as necessary. If the cache
-grows larger than a preset maximum, then the least recently used
-model will be cleared and (re)loaded from disk when next needed.
-
-The cache returns context manager generators designed to load the
-model into the GPU within the context, and unload outside the
-context. Use like this:
-
-   cache = ModelCache(max_cache_size=7.5)
-   with cache.get_model('runwayml/stable-diffusion-1-5') as SD1,
-          cache.get_model('stabilityai/stable-diffusion-2') as SD2:
-       do_something_in_GPU(SD1,SD2)
-
-
-"""
+""" """
 
 import gc
 import math
@@ -40,45 +24,64 @@
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.logging import InvokeAILogger
 
-# Maximum size of the cache, in gigs
-# Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
-DEFAULT_MAX_CACHE_SIZE = 6.0
-
-# amount of GPU memory to hold in reserve for use by generations (GB)
-DEFAULT_MAX_VRAM_CACHE_SIZE = 2.75
-
-# actual size of a gig
-GIG = 1073741824
+# Size of a GB in bytes.
+GB = 2**30
 
 # Size of a MB in bytes.
 MB = 2**20
 
 
 class ModelCache(ModelCacheBase[AnyModel]):
-    """Implementation of ModelCacheBase."""
+    """A cache for managing models in memory.
+
+    The cache is based on two levels of model storage:
+    - execution_device: The device where most models are executed (typically "cuda", "mps", or "cpu").
+    - storage_device: The device where models are offloaded when not in active use (typically "cpu").
+
+    The model cache is based on the following assumptions:
+    - storage_device_mem_size > execution_device_mem_size
+    - disk_to_storage_device_transfer_time >> storage_device_to_execution_device_transfer_time
+
+    A copy of all models in the cache is always kept on the storage_device. A subset of the models also have a copy on
+    the execution_device.
+
+    Models are moved between the storage_device and the execution_device as necessary. Cache size limits are enforced
+    on both the storage_device and the execution_device. The execution_device cache uses a smallest-first offload
+    policy. The storage_device cache uses a least-recently-used (LRU) offload policy.
+
+    Note: Neither of these offload policies has really been compared against alternatives. It's likely that different
+    policies would be better, although the optimal policies are likely heavily dependent on usage patterns and HW
+    configuration.
+
+    The cache returns context manager generators designed to load the model into the execution device (often GPU) within
+    the context, and unload outside the context.
+
+    Example usage:
+    ```
+    cache = ModelCache(max_cache_size=7.5, max_vram_cache_size=6.0)
+    with cache.get_model('runwayml/stable-diffusion-1-5') as SD1:
+        do_something_on_gpu(SD1)
+    ```
+    """
 
     def __init__(
         self,
-        max_cache_size: float = DEFAULT_MAX_CACHE_SIZE,
-        max_vram_cache_size: float = DEFAULT_MAX_VRAM_CACHE_SIZE,
+        max_cache_size: float,
+        max_vram_cache_size: float,
         execution_device: torch.device = torch.device("cuda"),
         storage_device: torch.device = torch.device("cpu"),
-        precision: torch.dtype = torch.float16,
-        sequential_offload: bool = False,
         lazy_offloading: bool = True,
-        sha_chunksize: int = 16777216,
         log_memory_usage: bool = False,
         logger: Optional[Logger] = None,
     ):
         """
         Initialize the model RAM cache.
 
-        :param max_cache_size: Maximum size of the RAM cache [6.0 GB]
+        :param max_cache_size: Maximum size of the storage_device cache in GBs.
+        :param max_vram_cache_size: Maximum size of the execution_device cache in GBs.
         :param execution_device: Torch device to load active model into [torch.device('cuda')]
         :param storage_device: Torch device to save inactive model in [torch.device('cpu')]
-        :param precision: Precision for loaded models [torch.float16]
-        :param lazy_offloading: Keep model in VRAM until another model needs to be loaded
-        :param sequential_offload: Conserve VRAM by loading and unloading each stage of the pipeline sequentially
+        :param lazy_offloading: Keep model in VRAM until another model needs to be loaded.
         :param log_memory_usage: If True, a memory snapshot will be captured before and after every model cache
             operation, and the result will be logged (at debug level). There is a time cost to capturing the memory
             snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's
@@ -86,7 +89,6 @@ def __init__(
         """
         # allow lazy offloading only when vram cache enabled
         self._lazy_offloading = lazy_offloading and max_vram_cache_size > 0
-        self._precision: torch.dtype = precision
         self._max_cache_size: float = max_cache_size
         self._max_vram_cache_size: float = max_vram_cache_size
         self._execution_device: torch.device = execution_device
@@ -145,15 +147,6 @@ def cache_size(self) -> int:
             total += cache_record.size
         return total
 
-    def exists(
-        self,
-        key: str,
-        submodel_type: Optional[SubModelType] = None,
-    ) -> bool:
-        """Return true if the model identified by key and submodel_type is in the cache."""
-        key = self._make_cache_key(key, submodel_type)
-        return key in self._cached_models
-
     def put(
         self,
         key: str,
@@ -203,7 +196,7 @@ def get(
         # more stats
         if self.stats:
             stats_name = stats_name or key
-            self.stats.cache_size = int(self._max_cache_size * GIG)
+            self.stats.cache_size = int(self._max_cache_size * GB)
             self.stats.high_watermark = max(self.stats.high_watermark, self.cache_size())
             self.stats.in_cache = len(self._cached_models)
             self.stats.loaded_model_sizes[stats_name] = max(
@@ -231,10 +224,13 @@ def _make_cache_key(self, model_key: str, submodel_type: Optional[SubModelType]
             return model_key
 
     def offload_unlocked_models(self, size_required: int) -> None:
-        """Move any unused models from VRAM."""
-        reserved = self._max_vram_cache_size * GIG
+        """Offload models from the execution_device to make room for size_required.
+
+        :param size_required: The amount of space to clear in the execution_device cache, in bytes.
+        """
+        reserved = self._max_vram_cache_size * GB
         vram_in_use = torch.cuda.memory_allocated() + size_required
-        self.logger.debug(f"{(vram_in_use/GIG):.2f}GB VRAM needed for models; max allowed={(reserved/GIG):.2f}GB")
+        self.logger.debug(f"{(vram_in_use/GB):.2f}GB VRAM needed for models; max allowed={(reserved/GB):.2f}GB")
         for _, cache_entry in sorted(self._cached_models.items(), key=lambda x: x[1].size):
             if vram_in_use <= reserved:
                 break
@@ -245,7 +241,7 @@ def offload_unlocked_models(self, size_required: int) -> None:
                 cache_entry.loaded = False
                 vram_in_use = torch.cuda.memory_allocated() + size_required
                 self.logger.debug(
-                    f"Removing {cache_entry.key} from VRAM to free {(cache_entry.size/GIG):.2f}GB; vram free = {(torch.cuda.memory_allocated()/GIG):.2f}GB"
+                    f"Removing {cache_entry.key} from VRAM to free {(cache_entry.size/GB):.2f}GB; vram free = {(torch.cuda.memory_allocated()/GB):.2f}GB"
                 )
 
         TorchDevice.empty_cache()
@@ -303,7 +299,7 @@ def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device
         self.logger.debug(
             f"Moved model '{cache_entry.key}' from {source_device} to"
             f" {target_device} in {(end_model_to_time-start_model_to_time):.2f}s."
-            f"Estimated model size: {(cache_entry.size/GIG):.3f} GB."
+            f"Estimated model size: {(cache_entry.size/GB):.3f} GB."
             f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
         )
 
@@ -326,14 +322,14 @@ def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device
                     f"Moving model '{cache_entry.key}' from {source_device} to"
                     f" {target_device} caused an unexpected change in VRAM usage. The model's"
                     " estimated size may be incorrect. Estimated model size:"
-                    f" {(cache_entry.size/GIG):.3f} GB.\n"
+                    f" {(cache_entry.size/GB):.3f} GB.\n"
                     f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
                 )
 
     def print_cuda_stats(self) -> None:
         """Log CUDA diagnostics."""
-        vram = "%4.2fG" % (torch.cuda.memory_allocated() / GIG)
-        ram = "%4.2fG" % (self.cache_size() / GIG)
+        vram = "%4.2fG" % (torch.cuda.memory_allocated() / GB)
+        ram = "%4.2fG" % (self.cache_size() / GB)
 
         in_ram_models = 0
         in_vram_models = 0
@@ -353,17 +349,20 @@ def print_cuda_stats(self) -> None:
                 )
 
     def make_room(self, size: int) -> None:
-        """Make enough room in the cache to accommodate a new model of indicated size."""
-        # calculate how much memory this model will require
-        # multiplier = 2 if self.precision==torch.float32 else 1
+        """Make enough room in the cache to accommodate a new model of indicated size.
+
+        Note: This function deletes all of the cache's internal references to a model in order to free it. If there are
+        external references to the model, there's nothing that the cache can do about it, and those models will not be
+        garbage-collected.
+        """
         bytes_needed = size
-        maximum_size = self.max_cache_size * GIG  # stored in GB, convert to bytes
+        maximum_size = self.max_cache_size * GB  # stored in GB, convert to bytes
         current_size = self.cache_size()
 
         if current_size + bytes_needed > maximum_size:
             self.logger.debug(
-                f"Max cache size exceeded: {(current_size/GIG):.2f}/{self.max_cache_size:.2f} GB, need an additional"
-                f" {(bytes_needed/GIG):.2f} GB"
+                f"Max cache size exceeded: {(current_size/GB):.2f}/{self.max_cache_size:.2f} GB, need an additional"
+                f" {(bytes_needed/GB):.2f} GB"
             )
 
         self.logger.debug(f"Before making_room: cached_models={len(self._cached_models)}")
@@ -380,7 +379,7 @@ def make_room(self, size: int) -> None:
 
             if not cache_entry.locked:
                 self.logger.debug(
-                    f"Removing {model_key} from RAM cache to free at least {(size/GIG):.2f} GB (-{(cache_entry.size/GIG):.2f} GB)"
+                    f"Removing {model_key} from RAM cache to free at least {(size/GB):.2f} GB (-{(cache_entry.size/GB):.2f} GB)"
                 )
                 current_size -= cache_entry.size
                 models_cleared += 1
diff --git a/invokeai/backend/quantization/bnb_llm_int8.py b/invokeai/backend/quantization/bnb_llm_int8.py
index b92717cbc57..02f94936e96 100644
--- a/invokeai/backend/quantization/bnb_llm_int8.py
+++ b/invokeai/backend/quantization/bnb_llm_int8.py
@@ -54,8 +54,10 @@ def _load_from_state_dict(
 
         # See `bnb.nn.Linear8bitLt._save_to_state_dict()` for the serialization logic of SCB and weight_format.
         scb = state_dict.pop(prefix + "SCB", None)
-        # weight_format is unused, but we pop it so we can validate that there are no unexpected keys.
-        _weight_format = state_dict.pop(prefix + "weight_format", None)
+
+        # Currently, we only support weight_format=0.
+        weight_format = state_dict.pop(prefix + "weight_format", None)
+        assert weight_format == 0
 
         # TODO(ryand): Technically, we should be using `strict`, `missing_keys`, `unexpected_keys`, and `error_msgs`
         # rather than raising an exception to correctly implement this API.
@@ -89,6 +91,14 @@ def _load_from_state_dict(
             )
             self.bias = bias if bias is None else torch.nn.Parameter(bias)
 
+        # Reset the state. The persisted fields are based on the initialization behaviour in
+        # `bnb.nn.Linear8bitLt.__init__()`.
+        new_state = bnb.MatmulLtState()
+        new_state.threshold = self.state.threshold
+        new_state.has_fp16_weights = False
+        new_state.use_pool = self.state.use_pool
+        self.state = new_state
+
 
 def _convert_linear_layers_to_llm_8bit(
     module: torch.nn.Module, ignore_modules: set[str], outlier_threshold: float, prefix: str = ""
diff --git a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py
index c5fda909c72..b7e9038cf7e 100644
--- a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py
+++ b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py
@@ -43,6 +43,11 @@ class FLUXConditioningInfo:
     clip_embeds: torch.Tensor
     t5_embeds: torch.Tensor
 
+    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
+        self.clip_embeds = self.clip_embeds.to(device=device, dtype=dtype)
+        self.t5_embeds = self.t5_embeds.to(device=device, dtype=dtype)
+        return self
+
 
 @dataclass
 class ConditioningFieldData:
diff --git a/invokeai/backend/util/__init__.py b/invokeai/backend/util/__init__.py
index 101215640ab..f24b6db3e12 100644
--- a/invokeai/backend/util/__init__.py
+++ b/invokeai/backend/util/__init__.py
@@ -3,10 +3,9 @@
 """
 
 from invokeai.backend.util.logging import InvokeAILogger
-from invokeai.backend.util.util import GIG, Chdir, directory_size
+from invokeai.backend.util.util import Chdir, directory_size
 
 __all__ = [
-    "GIG",
     "directory_size",
     "Chdir",
     "InvokeAILogger",
diff --git a/invokeai/backend/util/util.py b/invokeai/backend/util/util.py
index b3466ddba92..cc654e4d39b 100644
--- a/invokeai/backend/util/util.py
+++ b/invokeai/backend/util/util.py
@@ -7,9 +7,6 @@
 
 from PIL import Image
 
-# actual size of a gig
-GIG = 1073741824
-
 
 def slugify(value: str, allow_unicode: bool = False) -> str:
     """