| 
4 | 4 | Whenever you add an architecture to this page, please also update  | 
5 | 5 | `tests/models/registry.py` with example HuggingFace models for it.  | 
6 | 6 | """  | 
 | 7 | +import hashlib  | 
7 | 8 | import importlib  | 
 | 9 | +import json  | 
8 | 10 | import os  | 
9 | 11 | import pickle  | 
10 | 12 | import subprocess  | 
11 | 13 | import sys  | 
12 | 14 | import tempfile  | 
13 | 15 | from abc import ABC, abstractmethod  | 
14 | 16 | from collections.abc import Set  | 
15 |  | -from dataclasses import dataclass, field  | 
 | 17 | +from dataclasses import asdict, dataclass, field  | 
16 | 18 | from functools import lru_cache  | 
 | 19 | +from pathlib import Path  | 
17 | 20 | from typing import Callable, Optional, TypeVar, Union  | 
18 | 21 | 
 
  | 
19 | 22 | import torch.nn as nn  | 
20 | 23 | import transformers  | 
21 | 24 | 
 
  | 
 | 25 | +from vllm import envs  | 
22 | 26 | from vllm.config import (ModelConfig, iter_architecture_defaults,  | 
23 | 27 |                          try_match_architecture_defaults)  | 
24 | 28 | from vllm.logger import init_logger  | 
 | 29 | +from vllm.logging_utils import logtime  | 
25 | 30 | from vllm.transformers_utils.dynamic_module import (  | 
26 | 31 |     try_get_class_from_dynamic_module)  | 
27 | 32 | 
 
  | 
@@ -421,10 +426,91 @@ class _LazyRegisteredModel(_BaseRegisteredModel):  | 
421 | 426 |     module_name: str  | 
422 | 427 |     class_name: str  | 
423 | 428 | 
 
  | 
424 |  | -    # Performed in another process to avoid initializing CUDA  | 
 | 429 | +    @staticmethod  | 
 | 430 | +    def _get_cache_dir() -> Path:  | 
 | 431 | +        return Path(envs.VLLM_CACHE_ROOT) / "modelinfos"  | 
 | 432 | + | 
 | 433 | +    def _get_cache_filename(self) -> str:  | 
 | 434 | +        cls_name = f"{self.module_name}-{self.class_name}".replace(".", "-")  | 
 | 435 | +        return f"{cls_name}.json"  | 
 | 436 | + | 
 | 437 | +    def _load_modelinfo_from_cache(self,  | 
 | 438 | +                                   module_hash: str) -> _ModelInfo | None:  | 
 | 439 | +        try:  | 
 | 440 | +            try:  | 
 | 441 | +                modelinfo_path = self._get_cache_dir(  | 
 | 442 | +                ) / self._get_cache_filename()  | 
 | 443 | +                with open(modelinfo_path, encoding="utf-8") as file:  | 
 | 444 | +                    mi_dict = json.load(file)  | 
 | 445 | +            except FileNotFoundError:  | 
 | 446 | +                logger.debug(("Cached model info file "  | 
 | 447 | +                              "for class %s.%s not found"), self.module_name,  | 
 | 448 | +                             self.class_name)  | 
 | 449 | +                return None  | 
 | 450 | + | 
 | 451 | +            if mi_dict["hash"] != module_hash:  | 
 | 452 | +                logger.debug(("Cached model info file "  | 
 | 453 | +                              "for class %s.%s is stale"), self.module_name,  | 
 | 454 | +                             self.class_name)  | 
 | 455 | +                return None  | 
 | 456 | + | 
 | 457 | +            # file not changed, use cached _ModelInfo properties  | 
 | 458 | +            return _ModelInfo(**mi_dict["modelinfo"])  | 
 | 459 | +        except Exception:  | 
 | 460 | +            logger.exception(("Cached model info "  | 
 | 461 | +                              "for class %s.%s error. "), self.module_name,  | 
 | 462 | +                             self.class_name)  | 
 | 463 | +            return None  | 
 | 464 | + | 
 | 465 | +    def _save_modelinfo_to_cache(self, mi: _ModelInfo,  | 
 | 466 | +                                 module_hash: str) -> None:  | 
 | 467 | +        """save dictionary json file to cache"""  | 
 | 468 | +        from vllm.model_executor.model_loader.weight_utils import atomic_writer  | 
 | 469 | +        try:  | 
 | 470 | +            modelinfo_dict = {  | 
 | 471 | +                "hash": module_hash,  | 
 | 472 | +                "modelinfo": asdict(mi),  | 
 | 473 | +            }  | 
 | 474 | +            cache_dir = self._get_cache_dir()  | 
 | 475 | +            cache_dir.mkdir(parents=True, exist_ok=True)  | 
 | 476 | +            modelinfo_path = cache_dir / self._get_cache_filename()  | 
 | 477 | +            with atomic_writer(modelinfo_path, encoding='utf-8') as f:  | 
 | 478 | +                json.dump(modelinfo_dict, f, indent=2)  | 
 | 479 | +        except Exception:  | 
 | 480 | +            logger.exception("Error saving model info cache.")  | 
 | 481 | + | 
 | 482 | +    @logtime(logger=logger, msg="Registry inspect model class")  | 
425 | 483 |     def inspect_model_cls(self) -> _ModelInfo:  | 
426 |  | -        return _run_in_subprocess(  | 
 | 484 | +        model_path = Path(  | 
 | 485 | +            __file__).parent / f"{self.module_name.split('.')[-1]}.py"  | 
 | 486 | + | 
 | 487 | +        assert model_path.exists(), \  | 
 | 488 | +            f"Model {self.module_name} expected to be on path {model_path}"  | 
 | 489 | +        with open(model_path, "rb") as f:  | 
 | 490 | +            module_hash = hashlib.md5(f.read()).hexdigest()  | 
 | 491 | + | 
 | 492 | +        mi = self._load_modelinfo_from_cache(module_hash)  | 
 | 493 | +        if mi is not None:  | 
 | 494 | +            logger.debug(("Loaded model info "  | 
 | 495 | +                          "for class %s.%s from cache"), self.module_name,  | 
 | 496 | +                         self.class_name)  | 
 | 497 | +            return mi  | 
 | 498 | +        else:  | 
 | 499 | +            logger.debug(("Cache model info "  | 
 | 500 | +                          "for class %s.%s miss. "  | 
 | 501 | +                          "Loading model instead."), self.module_name,  | 
 | 502 | +                         self.class_name)  | 
 | 503 | + | 
 | 504 | +        # Performed in another process to avoid initializing CUDA  | 
 | 505 | +        mi = _run_in_subprocess(  | 
427 | 506 |             lambda: _ModelInfo.from_model_cls(self.load_model_cls()))  | 
 | 507 | +        logger.debug("Loaded model info for class %s.%s", self.module_name,  | 
 | 508 | +                     self.class_name)  | 
 | 509 | + | 
 | 510 | +        # save cache file  | 
 | 511 | +        self._save_modelinfo_to_cache(mi, module_hash)  | 
 | 512 | + | 
 | 513 | +        return mi  | 
428 | 514 | 
 
  | 
429 | 515 |     def load_model_cls(self) -> type[nn.Module]:  | 
430 | 516 |         mod = importlib.import_module(self.module_name)  | 
 | 
0 commit comments