|
27 | 27 | # begin-env-vars-definition |
28 | 28 |
|
29 | 29 | env_variables: Dict[str, Callable[[], Any]] = { |
30 | | - # max compile thread num |
| 30 | + # max compile thread number for package building. Usually, it is set to |
| 31 | + # the number of CPU cores. If not set, the default value is None, which |
| 32 | + # means all number of CPU cores will be used. |
31 | 33 | "MAX_JOBS": |
32 | 34 | lambda: os.getenv("MAX_JOBS", None), |
| 35 | + # The build type of the package. It can be one of the following values: |
| 36 | + # Release, Debug, RelWithDebugInfo. If not set, the default value is Release. |
33 | 37 | "CMAKE_BUILD_TYPE": |
34 | 38 | lambda: os.getenv("CMAKE_BUILD_TYPE"), |
| 39 | + # Whether to compile custom kernels. If not set, the default value is True. |
| 40 | + # If set to False, the custom kernels will not be compiled. Please note that |
| 41 | + # the sleep mode feature will be disabled as well if custom kernels are not |
| 42 | + # compiled. |
35 | 43 | "COMPILE_CUSTOM_KERNELS": |
36 | 44 | lambda: bool(int(os.getenv("COMPILE_CUSTOM_KERNELS", "1"))), |
| 45 | + # The CXX compiler used for compiling the package. If not set, the default |
| 46 | + # value is None, which means the system default CXX compiler will be used. |
| 47 | + "CXX_COMPILER": |
| 48 | + lambda: os.getenv("CXX_COMPILER", None), |
| 49 | + # The C compiler used for compiling the package. If not set, the default |
| 50 | + # value is None, which means the system default C compiler will be used. |
| 51 | + "C_COMPILER": |
| 52 | + lambda: os.getenv("C_COMPILER", None), |
| 53 | + # Whether to enable MC2 for DeepSeek. If not set, the default value is False. |
| 54 | + # MC2 is a fusion operator provided by Ascend to speed up computing and communication. |
| 55 | + # Find more detail here: https://www.hiascend.com/document/detail/zh/canncommercial/81RC1/developmentguide/opdevg/ascendcbestP/atlas_ascendc_best_practices_10_0043.html |
37 | 56 | "VLLM_ENABLE_MC2": |
38 | 57 | lambda: bool(int(os.getenv("VLLM_ENABLE_MC2", '0'))), |
| 58 | + # Whether to enable the topk optimization. It's disabled by default for experimental support |
| 59 | + # We'll make it enabled by default in the future. |
39 | 60 | "VLLM_ASCEND_ENABLE_TOPK_OPTIMZE": |
40 | 61 | lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_TOPK_OPTIMZE", '0'))), |
| 62 | + # Whether to use LCCL communication. If not set, the default value is False. |
41 | 63 | "USING_LCCL_COM": |
42 | 64 | lambda: bool(int(os.getenv("USING_LCCL_COM", '0'))), |
| 65 | + # The version of the Ascend chip. If not set, the default value is |
| 66 | + # ASCEND910B1. It's used for package building. Please make sure that the |
| 67 | + # version is correct. |
43 | 68 | "SOC_VERSION": |
44 | 69 | lambda: os.getenv("SOC_VERSION", "ASCEND910B1"), |
45 | 70 | # If set, vllm-ascend will print verbose logs during compilation |
46 | 71 | "VERBOSE": |
47 | 72 | lambda: bool(int(os.getenv('VERBOSE', '0'))), |
| 73 | + # The home path for CANN toolkit. If not set, the default value is |
| 74 | + # /usr/local/Ascend/ascend-toolkit/latest |
48 | 75 | "ASCEND_HOME_PATH": |
49 | 76 | lambda: os.getenv("ASCEND_HOME_PATH", None), |
50 | | - "LD_LIBRARY_PATH": |
51 | | - lambda: os.getenv("LD_LIBRARY_PATH", None), |
52 | | - # Used for disaggregated prefilling |
| 77 | + # The path for HCCN Tool, the tool will be called by disaggregated prefilling |
| 78 | + # case. |
53 | 79 | "HCCN_PATH": |
54 | 80 | lambda: os.getenv("HCCN_PATH", "/usr/local/Ascend/driver/tools/hccn_tool"), |
| 81 | + # The path for HCCL library, it's used by pyhccl communicator backend. If |
| 82 | + # not set, the default value is libhccl.so。 |
55 | 83 | "HCCL_SO_PATH": |
| 84 | + # The prefill device id for disaggregated prefilling case. |
56 | 85 | lambda: os.environ.get("HCCL_SO_PATH", None), |
57 | 86 | "PROMPT_DEVICE_ID": |
58 | 87 | lambda: os.getenv("PROMPT_DEVICE_ID", None), |
| 88 | + # The decode device id for disaggregated prefilling case. |
59 | 89 | "DECODE_DEVICE_ID": |
60 | 90 | lambda: os.getenv("DECODE_DEVICE_ID", None), |
| 91 | + # The port number for llmdatadist communication. If not set, the default |
| 92 | + # value is 26000. |
61 | 93 | "LLMDATADIST_COMM_PORT": |
62 | 94 | lambda: os.getenv("LLMDATADIST_COMM_PORT", "26000"), |
| 95 | + # The wait time for llmdatadist sync cache. If not set, the default value is |
| 96 | + # 5000ms. |
63 | 97 | "LLMDATADIST_SYNC_CACHE_WAIT_TIME": |
64 | 98 | lambda: os.getenv("LLMDATADIST_SYNC_CACHE_WAIT_TIME", "5000"), |
65 | | - "CXX_COMPILER": |
66 | | - lambda: os.getenv("CXX_COMPILER", None), |
67 | | - "C_COMPILER": |
68 | | - lambda: os.getenv("C_COMPILER", None), |
| 99 | + # The version of vllm is installed. This value is used for developers who |
| 100 | + # installed vllm from source locally. In this case, the version of vllm is |
| 101 | + # usually changed. For example, if the version of vllm is "0.9.0", but when |
| 102 | + # it's installed from source, the version of vllm is usually set to "0.9.1". |
| 103 | + # In this case, developers need to set this value to "0.9.0" to make sure |
| 104 | + # that the correct package is installed. |
69 | 105 | "VLLM_VERSION": |
70 | 106 | lambda: os.getenv("VLLM_VERSION", None), |
| 107 | + # Whether to enable the trace recompiles from pytorch. |
71 | 108 | "VLLM_ASCEND_TRACE_RECOMPILES": |
72 | 109 | lambda: bool(int(os.getenv("VLLM_ASCEND_TRACE_RECOMPILES", '0'))), |
73 | 110 | "VLLM_ASCEND_ENABLE_DBO": |
|
0 commit comments