Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 54 additions & 3 deletions invokeai/app/services/model_install/model_install_default.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Model installation class."""

import gc
import locale
import os
import re
import sys
import threading
import time
from copy import deepcopy
Expand Down Expand Up @@ -187,6 +189,22 @@ def register_path(
config.source_type = ModelSourceType.Path
return self._register(model_path, config)

# TODO: Replace this with a proper fix for underlying problem of Windows holding open
# the file when it needs to be moved.
@staticmethod
def _move_with_retries(src: Path, dst: Path, attempts: int = 5, delay: float = 0.5) -> None:
"""Workaround for Windows file-handle issues when moving files."""
for tries_left in range(attempts, 0, -1):
try:
move(src, dst)
return
except PermissionError:
gc.collect()
if tries_left == 1:
raise
time.sleep(delay)
delay *= 2 # Exponential backoff

def install_path(
self,
model_path: Union[Path, str],
Expand All @@ -205,7 +223,7 @@ def install_path(
dest_dir.mkdir(parents=True)
dest_path = dest_dir / model_path.name if model_path.is_file() else dest_dir
if model_path.is_file():
move(model_path, dest_path)
self._move_with_retries(model_path, dest_path) # Windows workaround TODO: fix root cause
elif model_path.is_dir():
# Move the contents of the directory, not the directory itself
for item in model_path.iterdir():
Expand Down Expand Up @@ -500,6 +518,39 @@ def _start_installer_thread(self) -> None:
self._install_thread.start()
self._running = True

@staticmethod
def _safe_rmtree(path: Path, logger: Any) -> None:
"""Remove a directory tree with retry logic for Windows file locking issues.

On Windows, memory-mapped files may not be immediately released even after
the file handle is closed. This function retries the removal with garbage
collection to help release any lingering references.
"""
max_retries = 3
retry_delay = 0.5 # seconds

for attempt in range(max_retries):
try:
# Force garbage collection to release any lingering file references
gc.collect()
rmtree(path)
return
except PermissionError as e:
if attempt < max_retries - 1 and sys.platform == "win32":
logger.warning(
f"Failed to remove {path} (attempt {attempt + 1}/{max_retries}): {e}. "
f"Retrying in {retry_delay}s..."
)
time.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
else:
logger.error(f"Failed to remove temporary directory {path}: {e}")
# On final failure, don't raise - the temp dir will be cleaned up on next startup
return
except Exception as e:
logger.error(f"Unexpected error removing {path}: {e}")
return

def _install_next_item(self) -> None:
self._logger.debug(f"Installer thread {threading.get_ident()} starting")
while True:
Expand Down Expand Up @@ -529,7 +580,7 @@ def _install_next_item(self) -> None:
finally:
# if this is an install of a remote file, then clean up the temporary directory
if job._install_tmpdir is not None:
rmtree(job._install_tmpdir)
self._safe_rmtree(job._install_tmpdir, self._logger)
self._install_completed_event.set()
self._install_queue.task_done()
self._logger.info(f"Installer thread {threading.get_ident()} exiting")
Expand Down Expand Up @@ -574,7 +625,7 @@ def _remove_dangling_install_dirs(self) -> None:
path = self._app_config.models_path
for tmpdir in path.glob(f"{TMPDIR_PREFIX}*"):
self._logger.info(f"Removing dangling temporary directory {tmpdir}")
rmtree(tmpdir)
self._safe_rmtree(tmpdir, self._logger)

def _scan_for_missing_models(self) -> list[AnyModelConfig]:
"""Scan the models directory for missing models and return a list of them."""
Expand Down
6 changes: 5 additions & 1 deletion invokeai/backend/quantization/gguf/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@ def gguf_sd_loader(path: Path, compute_dtype: torch.dtype) -> dict[str, GGMLTens
with WrappedGGUFReader(path) as reader:
sd: dict[str, GGMLTensor] = {}
for tensor in reader.tensors:
torch_tensor = torch.from_numpy(tensor.data)
# Use .copy() to create a true copy of the data, not a view.
# This is critical on Windows where the memory-mapped file cannot be deleted
# while tensors still hold references to the mapped memory.
torch_tensor = torch.from_numpy(tensor.data.copy())

shape = torch.Size(tuple(int(v) for v in reversed(tensor.shape)))
if tensor.tensor_type in TORCH_COMPATIBLE_QTYPES:
torch_tensor = torch_tensor.view(*shape)
Expand Down