|
88 | 88 | # |
89 | 89 | # * Worker Patch: |
90 | 90 | # =============== |
| 91 | +# ** File: worker/patch_common/patch_utils.py ** |
| 92 | +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 93 | +# 1. `vllm.utils.direct_register_custom_op` |
| 94 | +# Why: |
| 95 | +# direct_register_custom_op requires pytorch version >= 2.7.0, |
| 96 | +# but vllm-ascend only support pytorch version 2.5.1 |
| 97 | +# How: |
| 98 | +# Convert annotation type to typing type for 2.5.1 backward compatibility |
| 99 | +# Related PR (if no, explain why): |
| 100 | +# No related PR, it's the change in vllm-ascend. |
| 101 | +# Future Plan: |
| 102 | +# Update pytorch and torch-npu to 2.7.0 in the future. |
| 103 | +# ** File: worker/patch_common/patch_cache_engine.py ** |
| 104 | +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 105 | +# 1. `vllm.worker.cache_engine.CacheEngine._allocate_kv_cache` |
| 106 | +# Why: |
| 107 | +# Add graph_mode optimization for kv cache allocation. |
| 108 | +# How: |
| 109 | +# If graph_mode is enabled, add layer_kv_cache_nope and layer_kv_cache_pe to the kv_cache. |
| 110 | +# Related PR (if no, explain why): |
| 111 | +# Need a PR to vllm to fix the issue. |
| 112 | +# Future Plan: |
| 113 | +# Revert it when the related pr is merged in vllm. |
91 | 114 | # ** File: worker/patch_common/patch_metrics.py ** |
92 | 115 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
93 | | -# 1. `vllm.spec_decode.metrics.AsyncMetricsCollector.maybe_collect_rejsample_metrics` |
| 116 | +# 1. `vllm.spec_decode.metrics.AsyncMetricsCollector._copy_rejsample_metrics_async` |
94 | 117 | # Why: |
95 | 118 | # There are cuda hard code (current_platform.is_cuda_alike()) in |
96 | | -# `AsyncMetricsCollector.maybe_collect_rejsample_metrics` |
| 119 | +# `AsyncMetricsCollector._copy_rejsample_metrics_async` |
97 | 120 | # How: |
98 | 121 | # Change to use `current_platform.Event` to determine whether to return None |
99 | | -# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit.... |
100 | | -# https://github.com/vllm-project/vllm/pull/14411 |
| 122 | +# Related PR (if no, explain why): |
| 123 | +# Need a PR to vllm to fix the issue. |
101 | 124 | # Future Plan: |
102 | 125 | # Revert it when the related pr is merged in vllm. |
103 | 126 | # |
|
110 | 133 | # However float32 is not supported in cann rope op, thus we keep this patch |
111 | 134 | # How: |
112 | 135 | # Removed the dtype convert operations in forward |
113 | | -# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit.... |
| 136 | +# Related PR (if no, explain why): |
114 | 137 | # NO, only for npu due to rope op. |
115 | 138 | # Future Plan: |
116 | 139 | # Keep this patch in vllm-ascend. |
|
126 | 149 | # - support attention metadata register to the set supported spec decode |
127 | 150 | # - offer a api in platform to determine whether spec decode is supported, |
128 | 151 | # and deprecate is_cuda_alike in it. |
129 | | -# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit.... |
| 152 | +# Related PR (if no, explain why): |
130 | 153 | # - https://github.com/vllm-project/vllm/pull/15195 |
131 | 154 | # - https://github.com/vllm-project/vllm-ascend/pull/395 |
132 | 155 | # Future Plan: |
|
138 | 161 | # vLLM `Remove Sampler from Model Code` so vllm-ascend needs adapt to this change. |
139 | 162 | # How: |
140 | 163 | # Use vLLM 0.8.4 method to patch it. |
141 | | -# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit.... |
| 164 | +# Related PR (if no, explain why): |
142 | 165 | # - https://github.com/vllm-project/vllm/pull/15195 |
143 | 166 | # - https://github.com/vllm-project/vllm-ascend/pull/395 |
144 | 167 | # Future Plan: |
|
153 | 176 | # `FlashAttentionMetadata` |
154 | 177 | # How: |
155 | 178 | # ditto |
156 | | -# Related PR (if no, explain why): 1. refused by vllm. 2. vllm doesn't support 3. prepare to submit.... |
| 179 | +# Related PR (if no, explain why): |
157 | 180 | # - https://github.com/vllm-project/vllm/pull/15195 |
158 | 181 | # - https://github.com/vllm-project/vllm-ascend/pull/395 |
159 | 182 | # Future Plan: |
|
0 commit comments