Skip to content

Commit

Permalink
[Mergekit]update & add LoRA merge (#9811)
Browse files Browse the repository at this point in the history
* add

* fix bug

* fix

* add

* add lora merge

* add

* add

* add

* add

* add

* add
  • Loading branch information
lugimzzz authored Feb 11, 2025
1 parent 58fc49f commit 765ab8d
Show file tree
Hide file tree
Showing 7 changed files with 595 additions and 304 deletions.
71 changes: 42 additions & 29 deletions paddlenlp/mergekit/merge_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,7 @@
from dataclasses import asdict, dataclass, field
from typing import List, Optional

import paddle

from paddlenlp.utils.env import MERGE_CONFIG_NAME
from paddlenlp.utils.log import logger


@dataclass
Expand All @@ -30,7 +27,6 @@ class MergeConfig:
"""

# Common parameters
device: str = field(default="cpu", metadata={"help": "Device to use for the merge.ex cpu、 gpu、low_gpu_mem"})
tensor_type: str = field(
default="np", metadata={"help": "Tensor type to use for the merge. Choose np(CPU Only) or pd (CPU/GPU)"}
)
Expand All @@ -39,14 +35,20 @@ class MergeConfig:
merge_method: str = field(default="linear", metadata={"help": "The merge strategy."})
merge_type: str = field(default="linear", metadata={"help": "The type of merge process."})
sparsify_type: str = field(default=None, metadata={"help": "The type of sparsify process."})
split_pieces: int = field(default=8, metadata={"help": "Split large tensor to multi-piece"})
max_tensor_mem: float = field(default=0.5, metadata={"help": "Split tensor if exceed setting max_tensor_mem."})

# Model parameters
model_path_list: Optional[List[str]] = field(default=None, metadata={"help": "Merge model name or path list"})
model_path_str: Optional[str] = field(
default=None, metadata={"help": "Merge model name or path string.(split by ',')"}
)
base_model_path: str = field(default=None, metadata={"help": "Base model name or path."})
output_path: str = field(default=None, metadata={"help": "Base model name or path."})
output_path: str = field(default=None, metadata={"help": "Output model name or path."})
lora_model_path: str = field(default=None, metadata={"help": "LoRA model name or path."})
copy_file_list: Optional[List[str]] = field(
default=None, metadata={"help": "Copy file list from base model path or first model path."}
)
# merge parameters
weight_list: Optional[List[float]] = field(
default=None, metadata={"help": "Relative (or absolute if normalize=False) weighting of a given tensor"}
Expand Down Expand Up @@ -75,32 +77,43 @@ def config_check(self):
os.makedirs(self.output_path, exist_ok=True)
if self.tensor_type not in ["np", "pd"]:
raise ValueError(f"Unsupported tensor type: {self.tensor_type}. Support 'np' and 'pd' only.")
if self.device == "gpu" and self.tensor_type == "np":
logger.warning("np only support cpu device, but got gpu. Setting `device` to `cpu`.")
self.device = "cpu"

elif self.merge_method not in ["linear", "ties", "slerp", "della_linear", "della", "dare_linear", "dare_ties"]:
raise ValueError(
f"Unsupported merge strategy: {self.merge_method}. Please choose one from ['linear', 'slerp']."
)
if self.model_path_str is not None:
self.model_path_list = self.model_path_str.split(",")
if self.model_path_list is not None:
if not isinstance(self.model_path_list, list) or len(self.model_path_list) < 2:
raise ValueError(f"Please specify the model_path_list at least two. But got {self.model_path_list}")
if self.weight_list is None:
self.weight_list = [1.0] * len(self.model_path_list)
self.normalize = True
if len(self.model_path_list) != len(self.weight_list):
raise ValueError("The length of model_path_list and weight_list must be the same.")
if self.reserve_p < 0 or self.reserve_p > 1:
raise ValueError("reserve_p must be between 0 and 1.")
if "della" in self.merge_method or self.sparsify_type == "magprune":
if self.reserve_p <= self.epsilon / 2 or self.reserve_p >= (1 - self.epsilon):
if self.lora_model_path is not None:
if self.base_model_path is None:
raise ValueError("Please specify the base_model_path when using LoRA merge.")
self.tensor_type = "pd"

if self.lora_model_path is None:
if self.merge_method not in [
"linear",
"ties",
"slerp",
"della_linear",
"della",
"dare_linear",
"dare_ties",
]:
raise ValueError(
f"Error: reserve_p +- epsilon/2 must be in the range (0, 1). reserve_p + epsilon/2 = {self.reserve_p + self.epsilon / 2 }, reserve_p - epsilon/2 = {self.reserve_p - self.epsilon / 2 }"
f"Unsupported merge strategy: {self.merge_method}. Please choose one from ['linear', 'slerp', 'ties', 'della_linear', 'della', ']."
)
paddle.set_device(self.device)
if self.model_path_str is not None:
self.model_path_list = self.model_path_str.split(",")
if self.model_path_list is not None:
if not isinstance(self.model_path_list, list) or len(self.model_path_list) < 2:
raise ValueError(
f"Please specify the model_path_list at least two. But got {self.model_path_list}"
)
if self.weight_list is None:
self.weight_list = [1.0] * len(self.model_path_list)
self.normalize = True
if len(self.model_path_list) != len(self.weight_list):
raise ValueError("The length of model_path_list and weight_list must be the same.")
if self.reserve_p < 0 or self.reserve_p > 1:
raise ValueError("reserve_p must be between 0 and 1.")
if "della" in self.merge_method or self.sparsify_type == "magprune":
if self.reserve_p <= self.epsilon / 2 or self.reserve_p >= (1 - self.epsilon):
raise ValueError(
f"Error: reserve_p +- epsilon/2 must be in the range (0, 1). reserve_p + epsilon/2 = {self.reserve_p + self.epsilon / 2 }, reserve_p - epsilon/2 = {self.reserve_p - self.epsilon / 2 }"
)

@property
def __dict__(self):
Expand Down
49 changes: 27 additions & 22 deletions paddlenlp/mergekit/merge_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,10 @@ def linear(self, tensor_list):
tensor_output = sum(weight * tensor for weight, tensor in zip(weight_list, tensor_list))
return tensor_output
elif self.merge_config.tensor_type == "pd":
stacked_tensors = paddle.stack(tensor_list, axis=0)
weights = paddle.to_tensor(weight_list, dtype=stacked_tensors.dtype)
weights = weights.reshape([-1] + [1] * (len(stacked_tensors.shape) - 1))
weighted_sum = paddle.sum(stacked_tensors * weights, axis=0)
return weighted_sum
tensor_output = paddle.zeros_like(tensor_list[0])
for i, tensor in enumerate(tensor_list):
tensor_output += tensor * weight_list[i]
return tensor_output
else:
raise ValueError(f"Unkonwn tensor type {self.merge_config.tensor_type}")

Expand Down Expand Up @@ -155,28 +154,34 @@ def ties(self, tensor_list):

elif self.merge_config.tensor_type == "pd":
mask_dtype = tensor_list[0].dtype
weight_list = self.merge_config.weight_list
stacked_tensors = paddle.stack(tensor_list, axis=0)
weights = paddle.to_tensor(weight_list, dtype=stacked_tensors.dtype)
weights = weights.reshape([-1] + [1] * (len(stacked_tensors.shape) - 1))
weighted_tensors = stacked_tensors * weights

# Elect majority sign
if self.merge_config.ties_elect_type == "sum":
majority_sign = (paddle.sum(weighted_tensors, axis=0) >= 0).astype(mask_dtype) * 2 - 1
elif self.merge_config.ties_elect_type == "count":
stacked_signs = paddle.sign(stacked_tensors).astype(mask_dtype)
majority_sign = (paddle.sum(stacked_signs, axis=0) >= 0).astype(mask_dtype) * 2 - 1
else:
raise NotImplementedError(f"ties_elect_type: {self.merge_config.ties_elect_type} is unknown.")
majority_sign = paddle.zeros_like(tensor_list[0])
for i, tensor in enumerate(tensor_list):
if self.merge_config.ties_elect_type == "sum":
majority_sign += tensor * self.merge_config.weight_list[i]
elif self.merge_config.ties_elect_type == "count":
majority_sign += tensor.sign()
else:
raise NotImplementedError(f"ties_elect_type: {self.merge_config.ties_elect_type} is unknown.")
majority_sign = (majority_sign >= 0).astype(mask_dtype) * 2 - 1

# Merge
stacked_masks = (paddle.sign(weighted_tensors) == majority_sign).astype(mask_dtype)
masked_tensors = stacked_masks * weighted_tensors
merge_tensor = paddle.sum(masked_tensors, axis=0)
merge_tensor = paddle.zeros_like(tensor_list[0])
if self.merge_config.normalize:
divisor = paddle.zeros_like(tensor_list[0])
for i, tensor in enumerate(tensor_list):
if self.merge_config.normalize:
mask = (tensor.sign() == majority_sign).astype(mask_dtype) * self.merge_config.weight_list[i]
divisor += mask
merge_tensor += mask * tensor
else:
merge_tensor += (
(tensor.sign() == majority_sign).astype(mask_dtype) * tensor * self.merge_config.weight_list[i]
)

# Normalize
if self.merge_config.normalize:
weight_masks = stacked_masks * weights
divisor = paddle.sum(weight_masks, axis=0)
divisor = paddle.where(paddle.abs(divisor) < 1e-8, paddle.ones_like(divisor), divisor)
merge_tensor /= divisor

Expand Down
Loading

0 comments on commit 765ab8d

Please sign in to comment.