From 30ec5eb22d4a6f0a2fde36f178acf9d444039bd3 Mon Sep 17 00:00:00 2001 From: bukejiyu <395822456@qq.com> Date: Fri, 26 Sep 2025 09:02:41 +0000 Subject: [PATCH 1/4] fix st_proj --- .../model_executor/layers/activation.py | 2 +- .../model_executor/layers/embeddings.py | 9 ++--- .../model_loader/default_loader_v1.py | 1 + fastdeploy/model_executor/models/adapters.py | 38 +++++++++---------- fastdeploy/model_executor/utils.py | 4 +- 5 files changed, 25 insertions(+), 29 deletions(-) diff --git a/fastdeploy/model_executor/layers/activation.py b/fastdeploy/model_executor/layers/activation.py index 79fd3b24f6..b2b6fe3a3f 100644 --- a/fastdeploy/model_executor/layers/activation.py +++ b/fastdeploy/model_executor/layers/activation.py @@ -164,7 +164,7 @@ def get_act_fn(act_fn_name: str) -> nn.Layer: """Get an activation function by name.""" act_fn_name = act_fn_name.lower() - if act_fn_name.startswith("paddle.nn.Layer"): + if act_fn_name.startswith(("paddle.nn.Layer", "torch.nn.modules")): activation_name = act_fn_name.split(".")[-1] if activation_name == "identity": return nn.Identity() diff --git a/fastdeploy/model_executor/layers/embeddings.py b/fastdeploy/model_executor/layers/embeddings.py index 6df196f654..dae25fa081 100644 --- a/fastdeploy/model_executor/layers/embeddings.py +++ b/fastdeploy/model_executor/layers/embeddings.py @@ -163,10 +163,8 @@ def __init__( initializer=nn.initializer.Normal(mean=0.0, std=self.initializer_range), ), ) - if self.world_size > 1: - set_weight_attrs(self.embeddings.weight, {"output_dim": False}) - if num_embeddings % self.world_size != 0: - set_weight_attrs(self.embeddings.weight, {"weight_loader", self.weight_loader}) + set_weight_attrs(self.embeddings.weight, {"output_dim": False}) + set_weight_attrs(self.embeddings.weight, {"weight_loader": self.weight_loader}) else: # column cut embedding self.embeddings = nn.Embedding( @@ -176,8 +174,7 @@ def __init__( self.embeddings.weight.is_distributed = True self.embeddings.weight.split_axis = 1 - if self.world_size > 1: - set_weight_attrs(self.embeddings.weight, {"output_dim": True}) + set_weight_attrs(self.embeddings.weight, {"output_dim": True}) self.prefix = prefix self.dropout = nn.Dropout(self.hidden_dropout_prob) diff --git a/fastdeploy/model_executor/model_loader/default_loader_v1.py b/fastdeploy/model_executor/model_loader/default_loader_v1.py index 09b85cbe7e..87ad1f20bd 100644 --- a/fastdeploy/model_executor/model_loader/default_loader_v1.py +++ b/fastdeploy/model_executor/model_loader/default_loader_v1.py @@ -72,6 +72,7 @@ def load_model(self, fd_config: FDConfig) -> nn.Layer: with context: model_cls = ModelRegistry.get_class(architectures) convert_type = fd_config.model_config.convert_type + # print("model_cls.name:",model_cls.__name__) if convert_type == "none": pass elif convert_type == "embed": diff --git a/fastdeploy/model_executor/models/adapters.py b/fastdeploy/model_executor/models/adapters.py index 1f2590acdd..bb88bdb800 100644 --- a/fastdeploy/model_executor/models/adapters.py +++ b/fastdeploy/model_executor/models/adapters.py @@ -14,6 +14,7 @@ # limitations under the License. """ +import os from collections.abc import Iterable from typing import Optional, TypeVar @@ -40,36 +41,30 @@ def _load_dense_weights(linear: nn.Linear, folder: str, model_config: "ModelConf from fastdeploy.model_executor.utils import default_weight_loader filename = "model.safetensors" - file_path = f"{folder}/{filename}" if folder else filename + file_path = f"{model_config.model}/{folder}/{filename}" if folder else filename try: - file_bytes = get_hf_file_to_dict(file_path, model_config.model, model_config.revision) - if not file_bytes: + print(file_path) + if not os.path.exists(file_path): return False state_dict = {} - if filename.endswith(".safetensors"): - import io + # only safetensor now + from safetensors.numpy import load_file - from safetensors.numpy import load as load_safetensors - - numpy_tensors = load_safetensors(io.BytesIO(file_bytes)) - for key, numpy_array in numpy_tensors.items(): - state_dict[key] = paddle.to_tensor(numpy_array) - else: - import io - - state_dict = paddle.load(io.BytesIO(file_bytes)) + numpy_tensors = load_file(file_path) + for key, numpy_array in numpy_tensors.items(): + state_dict[key] = paddle.to_tensor(numpy_array) weight_keys = ["weight", "linear.weight", "dense.weight"] for weight_key in weight_keys: if weight_key in state_dict: - weight_loader = getattr(linear.weight, "weight_loader", default_weight_loader) + weight_loader = getattr(linear.weight, "weight_loader", default_weight_loader()) weight_loader(linear.weight, state_dict[weight_key].astype(paddle.float32)) bias_key = weight_key.replace("weight", "bias") if linear.bias is not None and bias_key in state_dict: - bias_loader = getattr(linear.bias, "weight_loader", default_weight_loader) + bias_loader = getattr(linear.bias, "weight_loader", default_weight_loader()) bias_loader(linear.bias, state_dict[bias_key].astype(paddle.float32)) return True except Exception as e: @@ -80,6 +75,7 @@ def _load_dense_weights(linear: nn.Linear, folder: str, model_config: "ModelConf def _load_st_projector(model_config: "ModelConfig") -> Optional[nn.Layer]: try: + print("Loading ST Projector...") modules = get_hf_file_to_dict("modules.json", model_config.model, model_config.revision) if not modules: return None @@ -98,13 +94,15 @@ def _load_st_projector(model_config: "ModelConfig") -> Optional[nn.Layer]: layer_config = get_hf_file_to_dict(config_path, model_config.model, model_config.revision) if not layer_config: continue + bias_attr = paddle.ParamAttr(name="linear_bias", initializer=paddle.nn.initializer.Constant(0)) linear = nn.Linear( - layer_config.get("in_features", 768), - layer_config.get("out_features", 768), - bias=layer_config.get("bias", True), + layer_config.get("in_features", 768), layer_config.get("out_features", 768), bias_attr=bias_attr ) + if linear.weight._is_initialized: + linear.weight.initialize() + if linear.bias._is_initialized: + linear.bias.initialize() linear = linear.astype(paddle.float32) - if not _load_dense_weights(linear, folder, model_config): continue diff --git a/fastdeploy/model_executor/utils.py b/fastdeploy/model_executor/utils.py index 754725691e..0e2f747ed5 100644 --- a/fastdeploy/model_executor/utils.py +++ b/fastdeploy/model_executor/utils.py @@ -153,7 +153,7 @@ def free_tensor(tensor): del tensor -def default_weight_loader(fd_config: FDConfig) -> None: +def default_weight_loader(fd_config: FDConfig = None) -> None: """Default weight loader""" def fn(param, loaded_weight, shard_id: Optional[Union[int, str]] = None): @@ -165,7 +165,7 @@ def fn(param, loaded_weight, shard_id: Optional[Union[int, str]] = None): loaded_weight = get_tensor(loaded_weight) loaded_weight = loaded_weight.transpose([1, 0]) # Tensor parallelism splits the weight along the output_dim - if output_dim is not None and fd_config.parallel_config.tensor_parallel_size > 1: + if output_dim is not None and fd_config is not None and fd_config.parallel_config.tensor_parallel_size > 1: dim = -1 if output_dim else 0 if isinstance(loaded_weight, paddle.Tensor): size = loaded_weight.shape[dim] From 0c94ea6b5f9f4e08559032b166b2ff5146d5e1bb Mon Sep 17 00:00:00 2001 From: bukejiyu <395822456@qq.com> Date: Fri, 26 Sep 2025 09:05:30 +0000 Subject: [PATCH 2/4] update --- fastdeploy/model_executor/model_loader/default_loader_v1.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fastdeploy/model_executor/model_loader/default_loader_v1.py b/fastdeploy/model_executor/model_loader/default_loader_v1.py index 87ad1f20bd..09b85cbe7e 100644 --- a/fastdeploy/model_executor/model_loader/default_loader_v1.py +++ b/fastdeploy/model_executor/model_loader/default_loader_v1.py @@ -72,7 +72,6 @@ def load_model(self, fd_config: FDConfig) -> nn.Layer: with context: model_cls = ModelRegistry.get_class(architectures) convert_type = fd_config.model_config.convert_type - # print("model_cls.name:",model_cls.__name__) if convert_type == "none": pass elif convert_type == "embed": From caab14f48c28819c757f9a8c2c1d172a764672e0 Mon Sep 17 00:00:00 2001 From: bukejiyu <395822456@qq.com> Date: Fri, 26 Sep 2025 09:18:08 +0000 Subject: [PATCH 3/4] update --- fastdeploy/model_executor/models/adapters.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fastdeploy/model_executor/models/adapters.py b/fastdeploy/model_executor/models/adapters.py index bb88bdb800..4ad9d5e32f 100644 --- a/fastdeploy/model_executor/models/adapters.py +++ b/fastdeploy/model_executor/models/adapters.py @@ -75,7 +75,6 @@ def _load_dense_weights(linear: nn.Linear, folder: str, model_config: "ModelConf def _load_st_projector(model_config: "ModelConfig") -> Optional[nn.Layer]: try: - print("Loading ST Projector...") modules = get_hf_file_to_dict("modules.json", model_config.model, model_config.revision) if not modules: return None From 4ec014835bf4191ba7869226dc591b6f44d9434d Mon Sep 17 00:00:00 2001 From: bukejiyu <395822456@qq.com> Date: Fri, 26 Sep 2025 10:37:45 +0000 Subject: [PATCH 4/4] commit --- fastdeploy/model_executor/models/adapters.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fastdeploy/model_executor/models/adapters.py b/fastdeploy/model_executor/models/adapters.py index 4ad9d5e32f..fe282a67dc 100644 --- a/fastdeploy/model_executor/models/adapters.py +++ b/fastdeploy/model_executor/models/adapters.py @@ -44,7 +44,6 @@ def _load_dense_weights(linear: nn.Linear, folder: str, model_config: "ModelConf file_path = f"{model_config.model}/{folder}/{filename}" if folder else filename try: - print(file_path) if not os.path.exists(file_path): return False