Skip to content

Commit

Permalink
Only emit PTX code for the latest CUDA compute architecture
Browse files Browse the repository at this point in the history
Signed-off-by: Felix Abecassis <fabecassis@nvidia.com>
  • Loading branch information
flx42 committed Dec 15, 2017
1 parent c41b43b commit ccdcdf7
Showing 1 changed file with 7 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,15 @@ def InvokeNvcc(argv, log=False):
out = ' -o ' + out_file[0]

supported_cuda_compute_capabilities = [ %{cuda_compute_capabilities} ]
sorted_capabilities = sorted(supported_cuda_compute_capabilities, key=lambda x: map(int, x.split('.')))
nvccopts = '-D_FORCE_INLINES '
for capability in supported_cuda_compute_capabilities:
for capability in sorted_capabilities:
capability = capability.replace('.', '')
nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s,compute_%s\" ' % (
capability, capability, capability)
nvccopts += r'-gencode=arch=compute_%s,code=sm_%s ' % (
capability, capability)
last_capability = sorted_capabilities[-1].replace('.', '')
nvccopts += r'-gencode=arch=compute_%s,code=compute_%s ' % (
last_capability, last_capability)
nvccopts += ' ' + nvcc_compiler_options
nvccopts += undefines
nvccopts += defines
Expand Down

0 comments on commit ccdcdf7

Please sign in to comment.