diff --git a/docs/features/CONFIGURATION.md b/docs/features/CONFIGURATION.md index 09e6143e954..6920d3d97f2 100644 --- a/docs/features/CONFIGURATION.md +++ b/docs/features/CONFIGURATION.md @@ -175,22 +175,27 @@ These configuration settings allow you to enable and disable various InvokeAI fe | `internet_available` | `true` | When a resource is not available locally, try to fetch it via the internet | | `log_tokenization` | `false` | Before each text2image generation, print a color-coded representation of the prompt to the console; this can help understand why a prompt is not working as expected | | `patchmatch` | `true` | Activate the "patchmatch" algorithm for improved inpainting | -| `restore` | `true` | Activate the facial restoration features (DEPRECATED; restoration features will be removed in 3.0.0) | -### Memory/Performance +### Generation These options tune InvokeAI's memory and performance characteristics. -| Setting | Default Value | Description | -|----------|----------------|--------------| -| `always_use_cpu` | `false` | Use the CPU to generate images, even if a GPU is available | -| `free_gpu_mem` | `false` | Aggressively free up GPU memory after each operation; this will allow you to run in low-VRAM environments with some performance penalties | -| `max_cache_size` | `6` | Amount of CPU RAM (in GB) to reserve for caching models in memory; more cache allows you to keep models in memory and switch among them quickly | -| `max_vram_cache_size` | `2.75` | Amount of GPU VRAM (in GB) to reserve for caching models in VRAM; more cache speeds up generation but reduces the size of the images that can be generated. This can be set to zero to maximize the amount of memory available for generation. | -| `precision` | `auto` | Floating point precision. One of `auto`, `float16` or `float32`. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system | -| `sequential_guidance` | `false` | Calculate guidance in serial rather than in parallel, lowering memory requirements at the cost of some performance loss | -| `xformers_enabled` | `true` | If the x-formers memory-efficient attention module is installed, activate it for better memory usage and generation speed| -| `tiled_decode` | `false` | If true, then during the VAE decoding phase the image will be decoded a section at a time, reducing memory consumption at the cost of a performance hit | +| Setting | Default Value | Description | +|-----------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `sequential_guidance` | `false` | Calculate guidance in serial rather than in parallel, lowering memory requirements at the cost of some performance loss | +| `attention_type` | `auto` | Select the type of attention to use. One of `auto`,`normal`,`xformers`,`sliced`, or `torch-sdp` | +| `attention_slice_size` | `auto` | When "sliced" attention is selected, set the slice size. One of `auto`, `balanced`, `max` or the integers 1-8| +| `force_tiled_decode` | `false` | Force the VAE step to decode in tiles, reducing memory consumption at the cost of performance | + +### Device + +These options configure the generation execution device. + +| Setting | Default Value | Description | +|-----------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `device` | `auto` | Preferred execution device. One of `auto`, `cpu`, `cuda`, `cuda:1`, `mps`. `auto` will choose the device depending on the hardware platform and the installed torch capabilities. | +| `precision` | `auto` | Floating point precision. One of `auto`, `float16` or `float32`. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system | + ### Paths diff --git a/invokeai/app/services/config/__init__.py b/invokeai/app/services/config/__init__.py new file mode 100644 index 00000000000..6a42f9e08c0 --- /dev/null +++ b/invokeai/app/services/config/__init__.py @@ -0,0 +1,8 @@ +""" +Init file for InvokeAI configure package +""" + +from .invokeai_config import ( # noqa F401 + InvokeAIAppConfig, + get_invokeai_config, +) diff --git a/invokeai/app/services/config/base.py b/invokeai/app/services/config/base.py new file mode 100644 index 00000000000..b83621c708b --- /dev/null +++ b/invokeai/app/services/config/base.py @@ -0,0 +1,239 @@ +# Copyright (c) 2023 Lincoln Stein (https://github.com/lstein) and the InvokeAI Development Team + +""" +Base class for the InvokeAI configuration system. +It defines a type of pydantic BaseSettings object that +is able to read and write from an omegaconf-based config file, +with overriding of settings from environment variables and/or +the command line. +""" + +from __future__ import annotations +import argparse +import os +import pydoc +import sys +from argparse import ArgumentParser +from omegaconf import OmegaConf, DictConfig, ListConfig +from pathlib import Path +from pydantic import BaseSettings +from typing import ClassVar, Dict, List, Literal, Union, get_origin, get_type_hints, get_args + + +class PagingArgumentParser(argparse.ArgumentParser): + """ + A custom ArgumentParser that uses pydoc to page its output. + It also supports reading defaults from an init file. + """ + + def print_help(self, file=None): + text = self.format_help() + pydoc.pager(text) + + +class InvokeAISettings(BaseSettings): + """ + Runtime configuration settings in which default values are + read from an omegaconf .yaml file. + """ + + initconf: ClassVar[DictConfig] = None + argparse_groups: ClassVar[Dict] = {} + + def parse_args(self, argv: list = sys.argv[1:]): + parser = self.get_parser() + opt = parser.parse_args(argv) + for name in self.__fields__: + if name not in self._excluded(): + value = getattr(opt, name) + if isinstance(value, ListConfig): + value = list(value) + elif isinstance(value, DictConfig): + value = dict(value) + setattr(self, name, value) + + def to_yaml(self) -> str: + """ + Return a YAML string representing our settings. This can be used + as the contents of `invokeai.yaml` to restore settings later. + """ + cls = self.__class__ + type = get_args(get_type_hints(cls)["type"])[0] + field_dict = dict({type: dict()}) + for name, field in self.__fields__.items(): + if name in cls._excluded_from_yaml(): + continue + category = field.field_info.extra.get("category") or "Uncategorized" + value = getattr(self, name) + if category not in field_dict[type]: + field_dict[type][category] = dict() + # keep paths as strings to make it easier to read + field_dict[type][category][name] = str(value) if isinstance(value, Path) else value + conf = OmegaConf.create(field_dict) + return OmegaConf.to_yaml(conf) + + @classmethod + def add_parser_arguments(cls, parser): + if "type" in get_type_hints(cls): + settings_stanza = get_args(get_type_hints(cls)["type"])[0] + else: + settings_stanza = "Uncategorized" + + env_prefix = cls.Config.env_prefix if hasattr(cls.Config, "env_prefix") else settings_stanza.upper() + + initconf = ( + cls.initconf.get(settings_stanza) + if cls.initconf and settings_stanza in cls.initconf + else OmegaConf.create() + ) + + # create an upcase version of the environment in + # order to achieve case-insensitive environment + # variables (the way Windows does) + upcase_environ = dict() + for key, value in os.environ.items(): + upcase_environ[key.upper()] = value + + fields = cls.__fields__ + cls.argparse_groups = {} + + for name, field in fields.items(): + if name not in cls._excluded(): + current_default = field.default + + category = field.field_info.extra.get("category", "Uncategorized") + env_name = env_prefix + "_" + name + if category in initconf and name in initconf.get(category): + field.default = initconf.get(category).get(name) + if env_name.upper() in upcase_environ: + field.default = upcase_environ[env_name.upper()] + cls.add_field_argument(parser, name, field) + + field.default = current_default + + @classmethod + def cmd_name(self, command_field: str = "type") -> str: + hints = get_type_hints(self) + if command_field in hints: + return get_args(hints[command_field])[0] + else: + return "Uncategorized" + + @classmethod + def get_parser(cls) -> ArgumentParser: + parser = PagingArgumentParser( + prog=cls.cmd_name(), + description=cls.__doc__, + ) + cls.add_parser_arguments(parser) + return parser + + @classmethod + def add_subparser(cls, parser: argparse.ArgumentParser): + parser.add_parser(cls.cmd_name(), help=cls.__doc__) + + @classmethod + def _excluded(self) -> List[str]: + # internal fields that shouldn't be exposed as command line options + return ["type", "initconf"] + + @classmethod + def _excluded_from_yaml(self) -> List[str]: + # combination of deprecated parameters and internal ones that shouldn't be exposed as invokeai.yaml options + return [ + "type", + "initconf", + "version", + "from_file", + "model", + "root", + "max_cache_size", + "max_vram_cache_size", + "always_use_cpu", + "free_gpu_mem", + "xformers_enabled", + "tiled_decode", + ] + + class Config: + env_file_encoding = "utf-8" + arbitrary_types_allowed = True + case_sensitive = True + + @classmethod + def add_field_argument(cls, command_parser, name: str, field, default_override=None): + field_type = get_type_hints(cls).get(name) + default = ( + default_override + if default_override is not None + else field.default + if field.default_factory is None + else field.default_factory() + ) + if category := field.field_info.extra.get("category"): + if category not in cls.argparse_groups: + cls.argparse_groups[category] = command_parser.add_argument_group(category) + argparse_group = cls.argparse_groups[category] + else: + argparse_group = command_parser + + if get_origin(field_type) == Literal: + allowed_values = get_args(field.type_) + allowed_types = set() + for val in allowed_values: + allowed_types.add(type(val)) + allowed_types_list = list(allowed_types) + field_type = allowed_types_list[0] if len(allowed_types) == 1 else int_or_float_or_str + + argparse_group.add_argument( + f"--{name}", + dest=name, + type=field_type, + default=default, + choices=allowed_values, + help=field.field_info.description, + ) + + elif get_origin(field_type) == Union: + argparse_group.add_argument( + f"--{name}", + dest=name, + type=int_or_float_or_str, + default=default, + help=field.field_info.description, + ) + + elif get_origin(field_type) == list: + argparse_group.add_argument( + f"--{name}", + dest=name, + nargs="*", + type=field.type_, + default=default, + action=argparse.BooleanOptionalAction if field.type_ == bool else "store", + help=field.field_info.description, + ) + else: + argparse_group.add_argument( + f"--{name}", + dest=name, + type=field.type_, + default=default, + action=argparse.BooleanOptionalAction if field.type_ == bool else "store", + help=field.field_info.description, + ) + + +def int_or_float_or_str(value: str) -> Union[int, float, str]: + """ + Workaround for argparse type checking. + """ + try: + return int(value) + except Exception as e: # noqa F841 + pass + try: + return float(value) + except Exception as e: # noqa F841 + pass + return str(value) diff --git a/invokeai/app/services/config.py b/invokeai/app/services/config/invokeai_config.py similarity index 63% rename from invokeai/app/services/config.py rename to invokeai/app/services/config/invokeai_config.py index a9e5bbee98d..728fe188b5c 100644 --- a/invokeai/app/services/config.py +++ b/invokeai/app/services/config/invokeai_config.py @@ -10,37 +10,49 @@ [file: invokeai.yaml] InvokeAI: - Paths: - root: /home/lstein/invokeai-main - conf_path: configs/models.yaml - legacy_conf_dir: configs/stable-diffusion - outdir: outputs - autoimport_dir: null - Models: - model: stable-diffusion-1.5 - embeddings: true - Memory/Performance: - xformers_enabled: false - sequential_guidance: false - precision: float16 - max_cache_size: 6 - max_vram_cache_size: 0.5 - always_use_cpu: false - free_gpu_mem: false - Features: - esrgan: true - patchmatch: true - internet_available: true - log_tokenization: false Web Server: host: 127.0.0.1 - port: 8081 + port: 9090 allow_origins: [] allow_credentials: true allow_methods: - '*' allow_headers: - '*' + Features: + esrgan: true + internet_available: true + log_tokenization: false + patchmatch: true + ignore_missing_core_models: false + Paths: + autoimport_dir: autoimport + lora_dir: null + embedding_dir: null + controlnet_dir: null + conf_path: configs/models.yaml + models_dir: models + legacy_conf_dir: configs/stable-diffusion + db_dir: databases + outdir: /home/lstein/invokeai-main/outputs + use_memory_db: false + Logging: + log_handlers: + - console + log_format: plain + log_level: info + Model Cache: + ram: 13.5 + vram: 0.25 + lazy_offload: true + Device: + device: auto + precision: auto + Generation: + sequential_guidance: false + attention_type: xformers + attention_slice_size: auto + force_tiled_decode: false The default name of the configuration file is `invokeai.yaml`, located in INVOKEAI_ROOT. You can replace supersede this by providing any @@ -54,24 +66,23 @@ at initialization time. You may pass a list of strings in the optional `argv` argument to use instead of the system argv: - conf.parse_args(argv=['--xformers_enabled']) + conf.parse_args(argv=['--log_tokenization']) It is also possible to set a value at initialization time. However, if you call parse_args() it may be overwritten. - conf = InvokeAIAppConfig(xformers_enabled=True) - conf.parse_args(argv=['--no-xformers']) - conf.xformers_enabled + conf = InvokeAIAppConfig(log_tokenization=True) + conf.parse_args(argv=['--no-log_tokenization']) + conf.log_tokenization # False - To avoid this, use `get_config()` to retrieve the application-wide configuration object. This will retain any properties set at object creation time: - conf = InvokeAIAppConfig.get_config(xformers_enabled=True) - conf.parse_args(argv=['--no-xformers']) - conf.xformers_enabled + conf = InvokeAIAppConfig.get_config(log_tokenization=True) + conf.parse_args(argv=['--no-log_tokenization']) + conf.log_tokenization # True Any setting can be overwritten by setting an environment variable of @@ -93,7 +104,7 @@ # get global configuration and print its cache size conf = InvokeAIAppConfig.get_config() conf.parse_args() - print(conf.max_cache_size) + print(conf.ram_cache_size) Typical usage in a backend module: @@ -101,8 +112,7 @@ # get global configuration and print its cache size value conf = InvokeAIAppConfig.get_config() - print(conf.max_cache_size) - + print(conf.ram_cache_size) Computed properties: @@ -159,15 +169,13 @@ class InvokeBatch(InvokeAISettings): """ from __future__ import annotations -import argparse -import pydoc import os -import sys -from argparse import ArgumentParser -from omegaconf import OmegaConf, DictConfig, ListConfig +from omegaconf import OmegaConf, DictConfig from pathlib import Path -from pydantic import BaseSettings, Field, parse_obj_as -from typing import ClassVar, Dict, List, Literal, Union, get_origin, get_type_hints, get_args +from pydantic import Field, parse_obj_as +from typing import ClassVar, Dict, List, Literal, Union, Optional, get_type_hints + +from .base import InvokeAISettings INIT_FILE = Path("invokeai.yaml") DB_FILE = Path("invokeai.db") @@ -175,195 +183,6 @@ class InvokeBatch(InvokeAISettings): DEFAULT_MAX_VRAM = 0.5 -class InvokeAISettings(BaseSettings): - """ - Runtime configuration settings in which default values are - read from an omegaconf .yaml file. - """ - - initconf: ClassVar[DictConfig] = None - argparse_groups: ClassVar[Dict] = {} - - def parse_args(self, argv: list = sys.argv[1:]): - parser = self.get_parser() - opt = parser.parse_args(argv) - for name in self.__fields__: - if name not in self._excluded(): - value = getattr(opt, name) - if isinstance(value, ListConfig): - value = list(value) - elif isinstance(value, DictConfig): - value = dict(value) - setattr(self, name, value) - - def to_yaml(self) -> str: - """ - Return a YAML string representing our settings. This can be used - as the contents of `invokeai.yaml` to restore settings later. - """ - cls = self.__class__ - type = get_args(get_type_hints(cls)["type"])[0] - field_dict = dict({type: dict()}) - for name, field in self.__fields__.items(): - if name in cls._excluded_from_yaml(): - continue - category = field.field_info.extra.get("category") or "Uncategorized" - value = getattr(self, name) - if category not in field_dict[type]: - field_dict[type][category] = dict() - # keep paths as strings to make it easier to read - field_dict[type][category][name] = str(value) if isinstance(value, Path) else value - conf = OmegaConf.create(field_dict) - return OmegaConf.to_yaml(conf) - - @classmethod - def add_parser_arguments(cls, parser): - if "type" in get_type_hints(cls): - settings_stanza = get_args(get_type_hints(cls)["type"])[0] - else: - settings_stanza = "Uncategorized" - - env_prefix = cls.Config.env_prefix if hasattr(cls.Config, "env_prefix") else settings_stanza.upper() - - initconf = ( - cls.initconf.get(settings_stanza) - if cls.initconf and settings_stanza in cls.initconf - else OmegaConf.create() - ) - - # create an upcase version of the environment in - # order to achieve case-insensitive environment - # variables (the way Windows does) - upcase_environ = dict() - for key, value in os.environ.items(): - upcase_environ[key.upper()] = value - - fields = cls.__fields__ - cls.argparse_groups = {} - - for name, field in fields.items(): - if name not in cls._excluded(): - current_default = field.default - - category = field.field_info.extra.get("category", "Uncategorized") - env_name = env_prefix + "_" + name - if category in initconf and name in initconf.get(category): - field.default = initconf.get(category).get(name) - if env_name.upper() in upcase_environ: - field.default = upcase_environ[env_name.upper()] - cls.add_field_argument(parser, name, field) - - field.default = current_default - - @classmethod - def cmd_name(self, command_field: str = "type") -> str: - hints = get_type_hints(self) - if command_field in hints: - return get_args(hints[command_field])[0] - else: - return "Uncategorized" - - @classmethod - def get_parser(cls) -> ArgumentParser: - parser = PagingArgumentParser( - prog=cls.cmd_name(), - description=cls.__doc__, - ) - cls.add_parser_arguments(parser) - return parser - - @classmethod - def add_subparser(cls, parser: argparse.ArgumentParser): - parser.add_parser(cls.cmd_name(), help=cls.__doc__) - - @classmethod - def _excluded(self) -> List[str]: - # internal fields that shouldn't be exposed as command line options - return ["type", "initconf"] - - @classmethod - def _excluded_from_yaml(self) -> List[str]: - # combination of deprecated parameters and internal ones that shouldn't be exposed as invokeai.yaml options - return [ - "type", - "initconf", - "version", - "from_file", - "model", - "root", - ] - - class Config: - env_file_encoding = "utf-8" - arbitrary_types_allowed = True - case_sensitive = True - - @classmethod - def add_field_argument(cls, command_parser, name: str, field, default_override=None): - field_type = get_type_hints(cls).get(name) - default = ( - default_override - if default_override is not None - else field.default - if field.default_factory is None - else field.default_factory() - ) - if category := field.field_info.extra.get("category"): - if category not in cls.argparse_groups: - cls.argparse_groups[category] = command_parser.add_argument_group(category) - argparse_group = cls.argparse_groups[category] - else: - argparse_group = command_parser - - if get_origin(field_type) == Literal: - allowed_values = get_args(field.type_) - allowed_types = set() - for val in allowed_values: - allowed_types.add(type(val)) - allowed_types_list = list(allowed_types) - field_type = allowed_types_list[0] if len(allowed_types) == 1 else Union[allowed_types_list] # type: ignore - - argparse_group.add_argument( - f"--{name}", - dest=name, - type=field_type, - default=default, - choices=allowed_values, - help=field.field_info.description, - ) - - elif get_origin(field_type) == list: - argparse_group.add_argument( - f"--{name}", - dest=name, - nargs="*", - type=field.type_, - default=default, - action=argparse.BooleanOptionalAction if field.type_ == bool else "store", - help=field.field_info.description, - ) - else: - argparse_group.add_argument( - f"--{name}", - dest=name, - type=field.type_, - default=default, - action=argparse.BooleanOptionalAction if field.type_ == bool else "store", - help=field.field_info.description, - ) - - -def _find_root() -> Path: - venv = Path(os.environ.get("VIRTUAL_ENV") or ".") - if os.environ.get("INVOKEAI_ROOT"): - root = Path(os.environ["INVOKEAI_ROOT"]) - elif any([(venv.parent / x).exists() for x in [INIT_FILE, LEGACY_INIT_FILE]]): - root = (venv.parent).resolve() - else: - root = Path("~/invokeai").expanduser().resolve() - return root - - class InvokeAIAppConfig(InvokeAISettings): """ Generate images using Stable Diffusion. Use "invokeai" to launch @@ -378,6 +197,8 @@ class InvokeAIAppConfig(InvokeAISettings): # fmt: off type: Literal["InvokeAI"] = "InvokeAI" + + # WEB host : str = Field(default="127.0.0.1", description="IP address to bind to", category='Web Server') port : int = Field(default=9090, description="Port to bind to", category='Web Server') allow_origins : List[str] = Field(default=[], description="Allowed CORS origins", category='Web Server') @@ -385,20 +206,14 @@ class InvokeAIAppConfig(InvokeAISettings): allow_methods : List[str] = Field(default=["*"], description="Methods allowed for CORS", category='Web Server') allow_headers : List[str] = Field(default=["*"], description="Headers allowed for CORS", category='Web Server') + # FEATURES esrgan : bool = Field(default=True, description="Enable/disable upscaling code", category='Features') internet_available : bool = Field(default=True, description="If true, attempt to download models on the fly; otherwise only use local models", category='Features') log_tokenization : bool = Field(default=False, description="Enable logging of parsed prompt tokens.", category='Features') patchmatch : bool = Field(default=True, description="Enable/disable patchmatch inpaint code", category='Features') + ignore_missing_core_models : bool = Field(default=False, description='Ignore missing models in models/core/convert', category='Features') - always_use_cpu : bool = Field(default=False, description="If true, use the CPU for rendering even if a GPU is available.", category='Memory/Performance') - free_gpu_mem : bool = Field(default=False, description="If true, purge model from GPU after each generation.", category='Memory/Performance') - max_cache_size : float = Field(default=6.0, gt=0, description="Maximum memory amount used by model cache for rapid switching", category='Memory/Performance') - max_vram_cache_size : float = Field(default=2.75, ge=0, description="Amount of VRAM reserved for model storage", category='Memory/Performance') - precision : Literal['auto', 'float16', 'float32', 'autocast'] = Field(default='auto', description='Floating point precision', category='Memory/Performance') - sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category='Memory/Performance') - xformers_enabled : bool = Field(default=True, description="Enable/disable memory-efficient attention", category='Memory/Performance') - tiled_decode : bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category='Memory/Performance') - + # PATHS root : Path = Field(default=None, description='InvokeAI runtime root directory', category='Paths') autoimport_dir : Path = Field(default='autoimport', description='Path to a directory of models files to be imported on startup.', category='Paths') lora_dir : Path = Field(default=None, description='Path to a directory of LoRA/LyCORIS models to be imported on startup.', category='Paths') @@ -409,16 +224,41 @@ class InvokeAIAppConfig(InvokeAISettings): legacy_conf_dir : Path = Field(default='configs/stable-diffusion', description='Path to directory of legacy checkpoint config files', category='Paths') db_dir : Path = Field(default='databases', description='Path to InvokeAI databases directory', category='Paths') outdir : Path = Field(default='outputs', description='Default folder for output images', category='Paths') - from_file : Path = Field(default=None, description='Take command input from the indicated file (command-line client only)', category='Paths') use_memory_db : bool = Field(default=False, description='Use in-memory database for storing image metadata', category='Paths') - ignore_missing_core_models : bool = Field(default=False, description='Ignore missing models in models/core/convert', category='Features') + from_file : Path = Field(default=None, description='Take command input from the indicated file (command-line client only)', category='Paths') + # LOGGING log_handlers : List[str] = Field(default=["console"], description='Log handler. Valid options are "console", "file=", "syslog=path|address:host:port", "http="', category="Logging") # note - would be better to read the log_format values from logging.py, but this creates circular dependencies issues log_format : Literal['plain', 'color', 'syslog', 'legacy'] = Field(default="color", description='Log format. Use "plain" for text-only, "color" for colorized output, "legacy" for 2.3-style logging and "syslog" for syslog-style', category="Logging") log_level : Literal["debug", "info", "warning", "error", "critical"] = Field(default="info", description="Emit logging messages at this level or higher", category="Logging") version : bool = Field(default=False, description="Show InvokeAI version and exit", category="Other") + + # CACHE + ram : Union[float, Literal["auto"]] = Field(default=6.0, gt=0, description="Maximum memory amount used by model cache for rapid switching (floating point number or 'auto')", category="Model Cache", ) + vram : Union[float, Literal["auto"]] = Field(default=0.25, ge=0, description="Amount of VRAM reserved for model storage (floating point number or 'auto')", category="Model Cache", ) + lazy_offload : bool = Field(default=True, description="Keep models in VRAM until their space is needed", category="Model Cache", ) + + # DEVICE + device : Literal[tuple(["auto", "cpu", "cuda", "cuda:1", "mps"])] = Field(default="auto", description="Generation device", category="Device", ) + precision: Literal[tuple(["auto", "float16", "float32", "autocast"])] = Field(default="auto", description="Floating point precision", category="Device", ) + + # GENERATION + sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category="Generation", ) + attention_type : Literal[tuple(["auto", "normal", "xformers", "sliced", "torch-sdp"])] = Field(default="auto", description="Attention type", category="Generation", ) + attention_slice_size: Literal[tuple(["auto", "balanced", "max", 1, 2, 3, 4, 5, 6, 7, 8])] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', category="Generation", ) + force_tiled_decode: bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category="Generation",) + + # DEPRECATED FIELDS - STILL HERE IN ORDER TO OBTAN VALUES FROM PRE-3.1 CONFIG FILES + always_use_cpu : bool = Field(default=False, description="If true, use the CPU for rendering even if a GPU is available.", category='Memory/Performance') + free_gpu_mem : Optional[bool] = Field(default=None, description="If true, purge model from GPU after each generation.", category='Memory/Performance') + max_cache_size : Optional[float] = Field(default=None, gt=0, description="Maximum memory amount used by model cache for rapid switching", category='Memory/Performance') + max_vram_cache_size : Optional[float] = Field(default=None, ge=0, description="Amount of VRAM reserved for model storage", category='Memory/Performance') + xformers_enabled : bool = Field(default=True, description="Enable/disable memory-efficient attention", category='Memory/Performance') + tiled_decode : bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category='Memory/Performance') + + # See InvokeAIAppConfig subclass below for CACHE and DEVICE categories # fmt: on class Config: @@ -541,11 +381,6 @@ def full_precision(self) -> bool: """Return true if precision set to float32""" return self.precision == "float32" - @property - def disable_xformers(self) -> bool: - """Return true if xformers_enabled is false""" - return not self.xformers_enabled - @property def try_patchmatch(self) -> bool: """Return true if patchmatch true""" @@ -561,6 +396,27 @@ def invisible_watermark(self) -> bool: """invisible watermark node is always active and disabled from Web UIe""" return True + @property + def ram_cache_size(self) -> float: + return self.max_cache_size or self.ram + + @property + def vram_cache_size(self) -> float: + return self.max_vram_cache_size or self.vram + + @property + def use_cpu(self) -> bool: + return self.always_use_cpu or self.device == "cpu" + + @property + def disable_xformers(self) -> bool: + """ + Return true if enable_xformers is false (reversed logic) + and attention type is not set to xformers. + """ + disabled_in_config = not self.xformers_enabled + return disabled_in_config and self.attention_type != "xformers" + @staticmethod def find_root() -> Path: """ @@ -570,19 +426,19 @@ def find_root() -> Path: return _find_root() -class PagingArgumentParser(argparse.ArgumentParser): - """ - A custom ArgumentParser that uses pydoc to page its output. - It also supports reading defaults from an init file. - """ - - def print_help(self, file=None): - text = self.format_help() - pydoc.pager(text) - - def get_invokeai_config(**kwargs) -> InvokeAIAppConfig: """ Legacy function which returns InvokeAIAppConfig.get_config() """ return InvokeAIAppConfig.get_config(**kwargs) + + +def _find_root() -> Path: + venv = Path(os.environ.get("VIRTUAL_ENV") or ".") + if os.environ.get("INVOKEAI_ROOT"): + root = Path(os.environ["INVOKEAI_ROOT"]) + elif any([(venv.parent / x).exists() for x in [INIT_FILE, LEGACY_INIT_FILE]]): + root = (venv.parent).resolve() + else: + root = Path("~/invokeai").expanduser().resolve() + return root diff --git a/invokeai/app/services/model_manager_service.py b/invokeai/app/services/model_manager_service.py index 675bc71257d..11ebab79385 100644 --- a/invokeai/app/services/model_manager_service.py +++ b/invokeai/app/services/model_manager_service.py @@ -330,8 +330,8 @@ def __init__( # configuration value. If present, then the # cache size is set to 2.5 GB times # the number of max_loaded_models. Otherwise - # use new `max_cache_size` config setting - max_cache_size = config.max_cache_size if hasattr(config, "max_cache_size") else config.max_loaded_models * 2.5 + # use new `ram_cache_size` config setting + max_cache_size = config.ram_cache_size logger.debug(f"Maximum RAM cache size: {max_cache_size} GiB") diff --git a/invokeai/backend/install/invokeai_configure.py b/invokeai/backend/install/invokeai_configure.py index d7ecb41e9bc..7925066562b 100755 --- a/invokeai/backend/install/invokeai_configure.py +++ b/invokeai/backend/install/invokeai_configure.py @@ -21,6 +21,7 @@ from enum import Enum from pathlib import Path from shutil import get_terminal_size +from typing import get_type_hints, get_args, Any from urllib import request import npyscreen @@ -50,6 +51,7 @@ # TO DO - Move all the frontend code into invokeai.frontend.install from invokeai.frontend.install.widgets import ( SingleSelectColumns, + MultiSelectColumns, CenteredButtonPress, FileBox, set_min_terminal_size, @@ -71,6 +73,10 @@ transformers.logging.set_verbosity_error() +def get_literal_fields(field) -> list[Any]: + return get_args(get_type_hints(InvokeAIAppConfig).get(field)) + + # --------------------------globals----------------------- config = InvokeAIAppConfig.get_config() @@ -80,7 +86,11 @@ Default_config_file = config.model_conf_path SD_Configs = config.legacy_conf_path -PRECISION_CHOICES = ["auto", "float16", "float32"] +PRECISION_CHOICES = get_literal_fields("precision") +DEVICE_CHOICES = get_literal_fields("device") +ATTENTION_CHOICES = get_literal_fields("attention_type") +ATTENTION_SLICE_CHOICES = get_literal_fields("attention_slice_size") +GENERATION_OPT_CHOICES = ["sequential_guidance", "force_tiled_decode", "lazy_offload"] GB = 1073741824 # GB in bytes HAS_CUDA = torch.cuda.is_available() _, MAX_VRAM = torch.cuda.mem_get_info() if HAS_CUDA else (0, 0) @@ -311,6 +321,7 @@ def create(self): Use ctrl-N and ctrl-P to move to the ext and

revious fields. Use cursor arrows to make a checkbox selection, and space to toggle. """ + self.nextrely -= 1 for i in textwrap.wrap(label, width=window_width - 6): self.add_widget_intelligent( npyscreen.FixedText, @@ -337,76 +348,129 @@ def create(self): use_two_lines=False, scroll_exit=True, ) + + # old settings for defaults + precision = old_opts.precision or ("float32" if program_opts.full_precision else "auto") + device = old_opts.device + attention_type = old_opts.attention_type + attention_slice_size = old_opts.attention_slice_size + self.nextrely += 1 self.add_widget_intelligent( npyscreen.TitleFixedText, - name="GPU Management", + name="Image Generation Options:", + editable=False, + color="CONTROL", + scroll_exit=True, + ) + self.nextrely -= 2 + self.generation_options = self.add_widget_intelligent( + MultiSelectColumns, + columns=3, + values=GENERATION_OPT_CHOICES, + value=[GENERATION_OPT_CHOICES.index(x) for x in GENERATION_OPT_CHOICES if getattr(old_opts, x)], + relx=30, + max_height=2, + max_width=80, + scroll_exit=True, + ) + + self.add_widget_intelligent( + npyscreen.TitleFixedText, + name="Floating Point Precision:", begin_entry_at=0, editable=False, color="CONTROL", scroll_exit=True, ) - self.nextrely -= 1 - self.free_gpu_mem = self.add_widget_intelligent( - npyscreen.Checkbox, - name="Free GPU memory after each generation", - value=old_opts.free_gpu_mem, - max_width=45, - relx=5, + self.nextrely -= 2 + self.precision = self.add_widget_intelligent( + SingleSelectColumns, + columns=len(PRECISION_CHOICES), + name="Precision", + values=PRECISION_CHOICES, + value=PRECISION_CHOICES.index(precision), + begin_entry_at=3, + max_height=2, + relx=30, + max_width=56, scroll_exit=True, ) - self.nextrely -= 1 - self.xformers_enabled = self.add_widget_intelligent( - npyscreen.Checkbox, - name="Enable xformers support", - value=old_opts.xformers_enabled, - max_width=30, - relx=50, + self.add_widget_intelligent( + npyscreen.TitleFixedText, + name="Generation Device:", + begin_entry_at=0, + editable=False, + color="CONTROL", scroll_exit=True, ) - self.nextrely -= 1 - self.always_use_cpu = self.add_widget_intelligent( - npyscreen.Checkbox, - name="Force CPU to be used on GPU systems", - value=old_opts.always_use_cpu, - relx=80, + self.nextrely -= 2 + self.device = self.add_widget_intelligent( + SingleSelectColumns, + columns=len(DEVICE_CHOICES), + values=DEVICE_CHOICES, + value=DEVICE_CHOICES.index(device), + begin_entry_at=3, + relx=30, + max_height=2, + max_width=60, scroll_exit=True, ) - precision = old_opts.precision or ("float32" if program_opts.full_precision else "auto") - self.nextrely += 1 self.add_widget_intelligent( npyscreen.TitleFixedText, - name="Floating Point Precision", + name="Attention Type:", begin_entry_at=0, editable=False, color="CONTROL", scroll_exit=True, ) - self.nextrely -= 1 - self.precision = self.add_widget_intelligent( + self.nextrely -= 2 + self.attention_type = self.add_widget_intelligent( SingleSelectColumns, - columns=3, - name="Precision", - values=PRECISION_CHOICES, - value=PRECISION_CHOICES.index(precision), + columns=len(ATTENTION_CHOICES), + values=ATTENTION_CHOICES, + value=ATTENTION_CHOICES.index(attention_type), begin_entry_at=3, max_height=2, + relx=30, max_width=80, scroll_exit=True, ) - self.nextrely += 1 + self.attention_type.on_changed = self.show_hide_slice_sizes + self.attention_slice_label = self.add_widget_intelligent( + npyscreen.TitleFixedText, + name="Attention Slice Size:", + relx=5, + editable=False, + hidden=attention_type != "sliced", + color="CONTROL", + scroll_exit=True, + ) + self.nextrely -= 2 + self.attention_slice_size = self.add_widget_intelligent( + SingleSelectColumns, + columns=len(ATTENTION_SLICE_CHOICES), + values=ATTENTION_SLICE_CHOICES, + value=ATTENTION_SLICE_CHOICES.index(attention_slice_size), + relx=30, + hidden=attention_type != "sliced", + max_height=2, + max_width=110, + scroll_exit=True, + ) + self.add_widget_intelligent( npyscreen.TitleFixedText, - name="RAM cache size (GB). Make this at least large enough to hold a single full model.", + name="Model RAM cache size (GB). Make this at least large enough to hold a single full model.", begin_entry_at=0, editable=False, color="CONTROL", scroll_exit=True, ) self.nextrely -= 1 - self.max_cache_size = self.add_widget_intelligent( + self.ram = self.add_widget_intelligent( npyscreen.Slider, - value=clip(old_opts.max_cache_size, range=(3.0, MAX_RAM), step=0.5), + value=clip(old_opts.ram_cache_size, range=(3.0, MAX_RAM), step=0.5), out_of=round(MAX_RAM), lowest=0.0, step=0.5, @@ -417,16 +481,16 @@ def create(self): self.nextrely += 1 self.add_widget_intelligent( npyscreen.TitleFixedText, - name="VRAM cache size (GB). Reserving a small amount of VRAM will modestly speed up the start of image generation.", + name="Model VRAM cache size (GB). Reserving a small amount of VRAM will modestly speed up the start of image generation.", begin_entry_at=0, editable=False, color="CONTROL", scroll_exit=True, ) self.nextrely -= 1 - self.max_vram_cache_size = self.add_widget_intelligent( + self.vram = self.add_widget_intelligent( npyscreen.Slider, - value=clip(old_opts.max_vram_cache_size, range=(0, MAX_VRAM), step=0.25), + value=clip(old_opts.vram_cache_size, range=(0, MAX_VRAM), step=0.25), out_of=round(MAX_VRAM * 2) / 2, lowest=0.0, relx=8, @@ -434,7 +498,7 @@ def create(self): scroll_exit=True, ) else: - self.max_vram_cache_size = DummyWidgetValue.zero + self.vram_cache_size = DummyWidgetValue.zero self.nextrely += 1 self.outdir = self.add_widget_intelligent( FileBox, @@ -490,6 +554,11 @@ def create(self): when_pressed_function=self.on_ok, ) + def show_hide_slice_sizes(self, value): + show = ATTENTION_CHOICES[value[0]] == "sliced" + self.attention_slice_label.hidden = not show + self.attention_slice_size.hidden = not show + def on_ok(self): options = self.marshall_arguments() if self.validate_field_values(options): @@ -523,12 +592,9 @@ def marshall_arguments(self): new_opts = Namespace() for attr in [ + "ram", + "vram", "outdir", - "free_gpu_mem", - "max_cache_size", - "max_vram_cache_size", - "xformers_enabled", - "always_use_cpu", ]: setattr(new_opts, attr, getattr(self, attr).value) @@ -541,6 +607,12 @@ def marshall_arguments(self): new_opts.hf_token = self.hf_token.value new_opts.license_acceptance = self.license_acceptance.value new_opts.precision = PRECISION_CHOICES[self.precision.value[0]] + new_opts.device = DEVICE_CHOICES[self.device.value[0]] + new_opts.attention_type = ATTENTION_CHOICES[self.attention_type.value[0]] + new_opts.attention_slice_size = ATTENTION_SLICE_CHOICES[self.attention_slice_size.value[0]] + generation_options = [GENERATION_OPT_CHOICES[x] for x in self.generation_options.value] + for v in GENERATION_OPT_CHOICES: + setattr(new_opts, v, v in generation_options) return new_opts diff --git a/invokeai/backend/model_management/model_manager.py b/invokeai/backend/model_management/model_manager.py index 1b10554e693..d87bc03fb7d 100644 --- a/invokeai/backend/model_management/model_manager.py +++ b/invokeai/backend/model_management/model_manager.py @@ -341,7 +341,8 @@ def __init__( self.logger = logger self.cache = ModelCache( max_cache_size=max_cache_size, - max_vram_cache_size=self.app_config.max_vram_cache_size, + max_vram_cache_size=self.app_config.vram_cache_size, + lazy_offloading=self.app_config.lazy_offload, execution_device=device_type, precision=precision, sequential_offload=sequential_offload, diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index 0180830b768..63b0c78b51d 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -33,7 +33,7 @@ PostprocessingSettings, BasicConditioningInfo, ) -from ..util import normalize_device +from ..util import normalize_device, auto_detect_slice_size @dataclass @@ -291,6 +291,24 @@ def _adjust_memory_efficient_attention(self, latents: torch.Tensor): if xformers is available, use it, otherwise use sliced attention. """ config = InvokeAIAppConfig.get_config() + if config.attention_type == "xformers": + self.enable_xformers_memory_efficient_attention() + return + elif config.attention_type == "sliced": + slice_size = config.attention_slice_size + if slice_size == "auto": + slice_size = auto_detect_slice_size(latents) + elif slice_size == "balanced": + slice_size = "auto" + self.enable_attention_slicing(slice_size=slice_size) + return + elif config.attention_type == "normal": + self.disable_attention_slicing() + return + elif config.attention_type == "torch-sdp": + raise Exception("torch-sdp attention slicing not yet implemented") + + # the remainder if this code is called when attention_type=='auto' if self.unet.device.type == "cuda": if is_xformers_available() and not config.disable_xformers: self.enable_xformers_memory_efficient_attention() diff --git a/invokeai/backend/util/__init__.py b/invokeai/backend/util/__init__.py index 30bb0efc157..b4e1c6e3a3f 100644 --- a/invokeai/backend/util/__init__.py +++ b/invokeai/backend/util/__init__.py @@ -11,4 +11,11 @@ torch_dtype, ) from .log import write_log # noqa: F401 -from .util import ask_user, download_with_resume, instantiate_from_config, url_attachment_name, Chdir # noqa: F401 +from .util import ( # noqa: F401 + ask_user, + download_with_resume, + instantiate_from_config, + url_attachment_name, + Chdir, +) +from .attention import auto_detect_slice_size # noqa: F401 diff --git a/invokeai/backend/util/attention.py b/invokeai/backend/util/attention.py new file mode 100644 index 00000000000..a8214643944 --- /dev/null +++ b/invokeai/backend/util/attention.py @@ -0,0 +1,32 @@ +# Copyright (c) 2023 Lincoln Stein and the InvokeAI Team +""" +Utility routine used for autodetection of optimal slice size +for attention mechanism. +""" +import torch +import psutil + + +def auto_detect_slice_size(latents: torch.Tensor) -> str: + bytes_per_element_needed_for_baddbmm_duplication = latents.element_size() + 4 + max_size_required_for_baddbmm = ( + 16 + * latents.size(dim=2) + * latents.size(dim=3) + * latents.size(dim=2) + * latents.size(dim=3) + * bytes_per_element_needed_for_baddbmm_duplication + ) + if latents.device.type in {"cpu", "mps"}: + mem_free = psutil.virtual_memory().free + elif latents.device.type == "cuda": + mem_free, _ = torch.cuda.mem_get_info(latents.device) + else: + raise ValueError(f"unrecognized device {latents.device}") + + if max_size_required_for_baddbmm > (mem_free * 3.0 / 4.0): + return "max" + elif torch.backends.mps.is_available(): + return "max" + else: + return "balanced" diff --git a/invokeai/backend/util/devices.py b/invokeai/backend/util/devices.py index 1827f295e4b..bdaf3244f33 100644 --- a/invokeai/backend/util/devices.py +++ b/invokeai/backend/util/devices.py @@ -17,13 +17,17 @@ def choose_torch_device() -> torch.device: """Convenience routine for guessing which GPU device to run model on""" - if config.always_use_cpu: + if config.use_cpu: # legacy setting - force CPU return CPU_DEVICE - if torch.cuda.is_available(): - return torch.device("cuda") - if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): - return torch.device("mps") - return CPU_DEVICE + elif config.device == "auto": + if torch.cuda.is_available(): + return torch.device("cuda") + if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): + return torch.device("mps") + else: + return CPU_DEVICE + else: + return torch.device(config.device) def choose_precision(device: torch.device) -> str: diff --git a/invokeai/frontend/install/widgets.py b/invokeai/frontend/install/widgets.py index 79b62809903..f7d1d044c8d 100644 --- a/invokeai/frontend/install/widgets.py +++ b/invokeai/frontend/install/widgets.py @@ -17,8 +17,8 @@ from curses import BUTTON2_CLICKED, BUTTON3_CLICKED # minimum size for UIs -MIN_COLS = 130 -MIN_LINES = 38 +MIN_COLS = 150 +MIN_LINES = 40 class WindowTooSmallException(Exception): @@ -277,6 +277,9 @@ def when_cursor_moved(self): def h_cursor_line_right(self, ch): self.h_exit_down("bye bye") + def h_cursor_line_left(self, ch): + self.h_exit_up("bye bye") + class TextBoxInner(npyscreen.MultiLineEdit): def __init__(self, *args, **kwargs): @@ -324,55 +327,6 @@ def handle_mouse_event(self, mouse_event): if bstate & (BUTTON2_CLICKED | BUTTON3_CLICKED): self.h_paste() - # def update(self, clear=True): - # if clear: - # self.clear() - - # HEIGHT = self.height - # WIDTH = self.width - # # draw box. - # self.parent.curses_pad.hline(self.rely, self.relx, curses.ACS_HLINE, WIDTH) - # self.parent.curses_pad.hline( - # self.rely + HEIGHT, self.relx, curses.ACS_HLINE, WIDTH - # ) - # self.parent.curses_pad.vline( - # self.rely, self.relx, curses.ACS_VLINE, self.height - # ) - # self.parent.curses_pad.vline( - # self.rely, self.relx + WIDTH, curses.ACS_VLINE, HEIGHT - # ) - - # # draw corners - # self.parent.curses_pad.addch( - # self.rely, - # self.relx, - # curses.ACS_ULCORNER, - # ) - # self.parent.curses_pad.addch( - # self.rely, - # self.relx + WIDTH, - # curses.ACS_URCORNER, - # ) - # self.parent.curses_pad.addch( - # self.rely + HEIGHT, - # self.relx, - # curses.ACS_LLCORNER, - # ) - # self.parent.curses_pad.addch( - # self.rely + HEIGHT, - # self.relx + WIDTH, - # curses.ACS_LRCORNER, - # ) - - # # fool our superclass into thinking drawing area is smaller - this is really hacky but it seems to work - # (relx, rely, height, width) = (self.relx, self.rely, self.height, self.width) - # self.relx += 1 - # self.rely += 1 - # self.height -= 1 - # self.width -= 1 - # super().update(clear=False) - # (self.relx, self.rely, self.height, self.width) = (relx, rely, height, width) - class TextBox(npyscreen.BoxTitle): _contained_widget = TextBoxInner diff --git a/tests/test_config.py b/tests/test_config.py index 0a8d44ad34c..88da7a02ab5 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -5,6 +5,8 @@ from omegaconf import OmegaConf from pathlib import Path +from invokeai.app.services.config import InvokeAIAppConfig + @pytest.fixture def patch_rootdir(tmp_path: Path, monkeypatch: Any) -> None: @@ -34,6 +36,21 @@ def patch_rootdir(tmp_path: Path, monkeypatch: Any) -> None: """ ) +init3 = OmegaConf.create( + """ +InvokeAI: + Generation: + sequential_guidance: true + attention_type: xformers + attention_slice_size: 7 + forced_tiled_decode: True + Device: + device: cpu + Model Cache: + ram: 1.25 +""" +) + def test_use_init(patch_rootdir): # note that we explicitly set omegaconf dict and argv here @@ -56,9 +73,18 @@ def test_use_init(patch_rootdir): assert not hasattr(conf2, "invalid_attribute") -def test_argv_override(patch_rootdir): - from invokeai.app.services.config import InvokeAIAppConfig +def test_legacy(): + conf = InvokeAIAppConfig.get_config() + assert conf + conf.parse_args(conf=init3, argv=[]) + assert conf.xformers_enabled + assert conf.device == "cpu" + assert conf.use_cpu + assert conf.ram == 1.25 + assert conf.ram_cache_size == 1.25 + +def test_argv_override(): conf = InvokeAIAppConfig.get_config() conf.parse_args(conf=init1, argv=["--always_use_cpu", "--max_cache=10"]) assert conf.always_use_cpu