@@ -144,27 +144,32 @@ else()
144144endif ()
145145
146146
147- # 
148- # For cuda we want to be able to control which architectures we compile for on  
149- # a per-file basis in order to cut down on compile time. So here we extract 
150- # the set of architectures we want to compile for and remove the from the  
151- # CMAKE_CUDA_FLAGS so that they are not applied globally. 
152- # 
153147if (VLLM_GPU_LANG STREQUAL  "CUDA" )
148+   # 
149+   # For cuda we want to be able to control which architectures we compile for on  
150+   # a per-file basis in order to cut down on compile time. So here we extract 
151+   # the set of architectures we want to compile for and remove the from the  
152+   # CMAKE_CUDA_FLAGS so that they are not applied globally. 
153+   # 
154154  clear_cuda_arches(CUDA_ARCH_FLAGS)
155155  extract_unique_cuda_archs_ascending(CUDA_ARCHS "${CUDA_ARCH_FLAGS} " )
156156  message (STATUS  "CUDA target architectures: ${CUDA_ARCHS} " )
157+   # Filter the target architectures by the supported supported archs 
158+   # since for some files we will build for all CUDA_ARCHS. 
159+   cuda_archs_loose_intersection(CUDA_ARCHS 
160+     "${CUDA_SUPPORTED_ARCHS} "  "${CUDA_ARCHS} " )
161+   message (STATUS  "CUDA supported target architectures: ${CUDA_ARCHS} " )
162+ else ()
163+   # 
164+   # For other GPU targets override the GPU architectures detected by cmake/torch 
165+   # and filter them by the supported versions for the current language. 
166+   # The final set of arches is stored in `VLLM_GPU_ARCHES`. 
167+   # 
168+   override_gpu_arches(VLLM_GPU_ARCHES
169+     ${VLLM_GPU_LANG} 
170+     "${${VLLM_GPU_LANG} _SUPPORTED_ARCHS}" )
157171endif ()
158172
159- # 
160- # Override the GPU architectures detected by cmake/torch and filter them by 
161- # the supported versions for the current language. 
162- # The final set of arches is stored in `VLLM_GPU_ARCHES`. 
163- # 
164- override_gpu_arches(VLLM_GPU_ARCHES
165-   ${VLLM_GPU_LANG} 
166-   "${${VLLM_GPU_LANG} _SUPPORTED_ARCHS}" )
167- 
168173# 
169174# Query torch for additional GPU compilation flags for the given 
170175# `VLLM_GPU_LANG`. 
0 commit comments