-
Notifications
You must be signed in to change notification settings - Fork 532
Fix static EPLB log2phy condition and improve unit test #1186
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d5edb8e
745d08b
a09a331
43d2fbc
33dd095
85e837d
c9981d1
014f93e
f82fce7
e1e5e0d
935f258
033eaf5
659e08b
36811c3
aefc4cc
bab3825
d8f4672
e80da7e
d70a4e9
bbd3fa0
729d8bb
7bffeb7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,2 @@ | ||
| pytest-asyncio | ||
|
|
||
| pytest-mock | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,7 @@ modelscope | |
| openai | ||
| pytest >= 6.0 | ||
| pytest-asyncio | ||
| pytest-mock | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we could do this later in your e2e test pr |
||
| lm-eval | ||
| ray | ||
| types-jsonschema | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,146 @@ | ||
| # fused moe ops test will hit the infer_schema error, we need add the patch | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for ut, I don't think use |
||
| # here to make the test pass. | ||
| import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa | ||
|
|
||
| import json | ||
| from typing import List, TypedDict | ||
|
|
||
| import pytest | ||
| import torch | ||
|
|
||
| from vllm_ascend.ops.expert_load_balancer import ExpertLoadBalancer | ||
|
|
||
|
|
||
| class Device(TypedDict): | ||
| device_id: int | ||
| device_expert: List[int] | ||
|
|
||
|
|
||
| class Layer(TypedDict): | ||
| layer_id: int | ||
| device_count: int | ||
| device_list: List[Device] | ||
|
|
||
|
|
||
| class MockData(TypedDict): | ||
| moe_layer_count: int | ||
| layer_list: List[Layer] | ||
|
|
||
|
|
||
| MOCK_DATA: MockData = { | ||
| "moe_layer_count": | ||
| 1, | ||
| "layer_list": [{ | ||
| "layer_id": | ||
| 0, | ||
| "device_count": | ||
| 2, | ||
| "device_list": [{ | ||
| "device_id": 0, | ||
| "device_expert": [7, 2, 0, 3, 5] | ||
| }, { | ||
| "device_id": 1, | ||
| "device_expert": [6, 1, 4, 7, 2] | ||
| }] | ||
| }] | ||
| } | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def mock_expert_load_balancer(tmp_path): | ||
| json_file = tmp_path / "expert_map.json" | ||
| with open(json_file, 'w') as f: | ||
| json.dump(MOCK_DATA, f) | ||
|
|
||
| return ExpertLoadBalancer(str(json_file), global_expert_num=8) | ||
|
|
||
|
|
||
| def test_init(mock_expert_load_balancer): | ||
| assert isinstance(mock_expert_load_balancer.expert_map_tensor, | ||
| torch.Tensor) | ||
| assert mock_expert_load_balancer.layers_num == MOCK_DATA["moe_layer_count"] | ||
| assert mock_expert_load_balancer.ranks_num == MOCK_DATA["layer_list"][0][ | ||
| "device_count"] | ||
|
|
||
|
|
||
| def test_generate_index_dicts(mock_expert_load_balancer): | ||
| tensor_2d = torch.tensor([[7, 2, 0, 3, 5], [6, 1, 4, 7, 2]]) | ||
| result = mock_expert_load_balancer.generate_index_dicts(tensor_2d) | ||
| expected_result = [{ | ||
| 7: 0, | ||
| 2: 1, | ||
| 0: 2, | ||
| 3: 3, | ||
| 5: 4 | ||
| }, { | ||
| 6: 5, | ||
| 1: 6, | ||
| 4: 7, | ||
| 7: 8, | ||
| 2: 9 | ||
| }] | ||
| assert result == expected_result | ||
|
|
||
|
|
||
| def test_generate_expert_placement_map(mock_expert_load_balancer): | ||
| expert_placement_map = mock_expert_load_balancer.generate_expert_placement_map( | ||
| ) | ||
| assert expert_placement_map.shape == (mock_expert_load_balancer.layers_num, | ||
| mock_expert_load_balancer.ranks_num, | ||
| 8) | ||
| assert torch.all(expert_placement_map >= -1) | ||
|
|
||
|
|
||
| def test_generate_log2phy_expert_map(mock_expert_load_balancer): | ||
| layer_id = 0 | ||
| log2phy_map = mock_expert_load_balancer.generate_log2phy_expert_map( | ||
| layer_id) | ||
| assert log2phy_map.shape == (mock_expert_load_balancer.ranks_num, 8) | ||
| assert torch.all(log2phy_map >= -1) | ||
|
|
||
|
|
||
| def test_get_rank_placement_map(mock_expert_load_balancer, mocker): | ||
| mocker.patch("torch_npu.npu._lazy_init") | ||
| mocker.patch('torch.npu.current_device', return_value='cpu') | ||
| layer_id = 0 | ||
| rank_id = 0 | ||
| rank_local_expert_num, rank_expert_map = mock_expert_load_balancer.get_rank_placement_map( | ||
| layer_id, rank_id) | ||
| assert rank_local_expert_num == 5 | ||
| expected_tensor = torch.tensor([2, -1, 1, 3, -1, 4, -1, 0], | ||
| dtype=torch.int32).to( | ||
| rank_expert_map.device) | ||
| assert rank_expert_map.equal(expected_tensor) | ||
|
|
||
| rank_id = 1 | ||
| rank_local_expert_num, rank_expert_map = mock_expert_load_balancer.get_rank_placement_map( | ||
| layer_id, rank_id) | ||
| expected_tensor = torch.tensor([-1, 1, 4, -1, 2, -1, 0, 3], | ||
| dtype=torch.int32).to( | ||
| rank_expert_map.device) | ||
| assert rank_expert_map.equal(expected_tensor) | ||
|
|
||
|
|
||
| def test_get_rank_log2phy_map(mock_expert_load_balancer): | ||
| layer_id = 0 | ||
| rank_id = 0 | ||
| log2phy_map = mock_expert_load_balancer.get_rank_log2phy_map( | ||
| layer_id, rank_id) | ||
| expected_tensor = torch.tensor([2, 6, 1, 3, 7, 4, 5, 0], | ||
| dtype=torch.int32).to(log2phy_map.device) | ||
| assert log2phy_map.equal(expected_tensor) | ||
|
|
||
| rank_id = 1 | ||
| log2phy_map = mock_expert_load_balancer.get_rank_log2phy_map( | ||
| layer_id, rank_id) | ||
| expected_tensor = torch.tensor([2, 6, 9, 3, 7, 4, 5, 8], | ||
| dtype=torch.int32).to(log2phy_map.device) | ||
| assert log2phy_map.equal(expected_tensor) | ||
|
|
||
|
|
||
| def test_get_global_redundant_expert_num(mock_expert_load_balancer): | ||
| redundant_expert_num = mock_expert_load_balancer.get_global_redundant_expert_num( | ||
| ) | ||
| expected_redundant_expert_num = len(MOCK_DATA["layer_list"][0]["device_list"][0]["device_expert"]) * \ | ||
| MOCK_DATA["layer_list"][0]["device_count"] - 8 | ||
| assert redundant_expert_num == expected_redundant_expert_num | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.