From ba2d21157524a23060c3bd57bb051440906df2c5 Mon Sep 17 00:00:00 2001 From: Pherenice1125 Date: Tue, 30 Jul 2024 05:27:56 +0000 Subject: [PATCH 1/2] add phi-3 unit test --- tests/test_phi3.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 tests/test_phi3.py diff --git a/tests/test_phi3.py b/tests/test_phi3.py new file mode 100644 index 0000000..76c0d96 --- /dev/null +++ b/tests/test_phi3.py @@ -0,0 +1,64 @@ +import unittest + +import torch + +from mixlora.model import LoraLinear, MixLoraConfig, MixLoraSparseMoe + + +class DummyPhi3MLP(torch.nn.Module): + def __init__(self, hidden_size: int, intermediate_size: int): + super().__init__() + self.gate_up_proj = torch.nn.Linear(hidden_size, 2*intermediate_size, bias=False) + self.down_proj = torch.nn.Linear(intermediate_size, hidden_size, bias=False) + self.act_fn = torch.nn.SiLU() + + +config = MixLoraConfig.from_config( + { + "bias": "none", + "peft_type": "MIXLORA", + "r": 8, + "lora_alpha": 16, + "lora_dropout": 0.05, + "target_modules": [ + "qkv_proj" + "o_proj", + "gate_up_proj", + "down_proj" + ], + "routing_strategy": "mixtral", + "num_experts": 8, + "act_fn": "silu", + "top_k": 2, + "base_model_name_or_path": "DUMMY", + "task_type": "CAUSAL_LM", + } +) + +config.model_type_ = "phi3" + +hidden_size = 8 +intermediate_size = hidden_size * 2 +dummy_mlp = DummyPhi3MLP(hidden_size, intermediate_size) +moe_layer = MixLoraSparseMoe(dummy_mlp, config) +gate_layer = torch.nn.Linear(hidden_size, config.num_experts_, bias=False) +moe_layer.gate_ = gate_layer.weight +mlp_projections = ["gate_up_proj", "down_proj"] +for proj_name in mlp_projections: + base_layer: torch.nn.Linear = getattr(dummy_mlp, proj_name) + torch.nn.init.zeros_(base_layer.weight) + for expert_idx in range(config.num_experts_): + moe_layer.experts_[f"experts.{expert_idx}.{proj_name}"] = LoraLinear( + base_layer, config + ) + + +class LlamaTestCase(unittest.TestCase): + def test_forward(self): + input = torch.zeros((1, 8, hidden_size)) + output: torch.Tensor = moe_layer(input) + self.assertEqual(output.shape, (1, 8, hidden_size)) + + +if __name__ == "__main__": + unittest.main() From 06a188baf3398d31a3d05d3846113cbca3da9577 Mon Sep 17 00:00:00 2001 From: Pherenice1125 Date: Tue, 30 Jul 2024 05:34:40 +0000 Subject: [PATCH 2/2] phi3-unit-test --- tests/test_phi3.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/test_phi3.py b/tests/test_phi3.py index 76c0d96..40a1da9 100644 --- a/tests/test_phi3.py +++ b/tests/test_phi3.py @@ -8,7 +8,9 @@ class DummyPhi3MLP(torch.nn.Module): def __init__(self, hidden_size: int, intermediate_size: int): super().__init__() - self.gate_up_proj = torch.nn.Linear(hidden_size, 2*intermediate_size, bias=False) + self.gate_up_proj = torch.nn.Linear( + hidden_size, 2 * intermediate_size, bias=False + ) self.down_proj = torch.nn.Linear(intermediate_size, hidden_size, bias=False) self.act_fn = torch.nn.SiLU() @@ -20,12 +22,7 @@ def __init__(self, hidden_size: int, intermediate_size: int): "r": 8, "lora_alpha": 16, "lora_dropout": 0.05, - "target_modules": [ - "qkv_proj" - "o_proj", - "gate_up_proj", - "down_proj" - ], + "target_modules": ["qkv_proj" "o_proj", "gate_up_proj", "down_proj"], "routing_strategy": "mixtral", "num_experts": 8, "act_fn": "silu",