Skip to content

error: reference to __host__ function 'parallel_for<thrust::cuda_cub::for_each_f...' in __host__ __device__ function #753

@HangJie720

Description

@HangJie720

I try to use clang10 with cuda10.2 to compile the following code:

#include <thrust/binary_search.h>
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <thrust/functional.h>

#define CUDA_1D_KERNEL_LOOP(i, n)                                 \
  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
       i += blockDim.x * gridDim.x)

__global__ void PieceWiseLinearTransformGeneralKernel(
    const int N,
    const int M,
    const int num_grp,
    const int num_fnc_per_grp,
    const float* bounds,
    const float* slopes,
    const float* intercepts,
    const float* X,
    float* Y) {
  CUDA_1D_KERNEL_LOOP(i, N * M) {
    int col = i % M;
    const float* bounds_group = bounds + (col * (num_fnc_per_grp + 1));
    const float* slopes_group = slopes + (col * num_fnc_per_grp);
    const float* intercepts_group = intercepts + (col * num_fnc_per_grp);

    if (X[i] <= bounds_group[0]) {
      Y[i] = slopes_group[0] * bounds_group[0] + intercepts_group[0];
    } else if (X[i] >= bounds_group[num_fnc_per_grp]) {
      Y[i] = slopes_group[num_fnc_per_grp - 1] * bounds_group[num_fnc_per_grp] +
          intercepts_group[num_fnc_per_grp - 1];
    } else {
      auto low_bound = thrust::lower_bound(
          thrust::device,
          bounds_group,
          bounds_group + num_fnc_per_grp + 1,
          X[i]);
      int bounds_idx = low_bound - bounds_group - 1;
      Y[i] = slopes_group[bounds_idx] * X[i] + intercepts_group[bounds_idx];
    }
  }
}

int main(int argc, char* argv[]){
    return 0;
}

reference to __host__function in __host____device__function error occurs as follow.

root@1615c67ed2eb:/home/jhang# clang++ -std=c++11 -lnvToolsExt -I/usr/local/cuda/include 
-L/usr/local/cuda/lib64 -lcudart -lcublas -lcudnn -lgomp -lcurand 
--cuda-path=/usr/local/cuda --cuda-gpu-arch=sm_61 -x cuda -o test test.cu
clang: warning: Unknown CUDA version 10.2. Assuming the latest supported version 10.1 [-Wunknown-cuda-version]
In file included from test.cu:1:
In file included from /usr/local/cuda/include/thrust/binary_search.h:1901:
In file included from /usr/local/cuda/include/thrust/detail/binary_search.inl:26:
In file included from /usr/local/cuda/include/thrust/system/detail/generic/binary_search.h:173:
In file included from /usr/local/cuda/include/thrust/system/detail/generic/binary_search.inl:31:
In file included from /usr/local/cuda/include/thrust/for_each.h:279:
In file included from /usr/local/cuda/include/thrust/detail/for_each.inl:27:
In file included from /usr/local/cuda/include/thrust/system/detail/adl/for_each.h:42:
In file included from /usr/local/cuda/include/thrust/system/cuda/detail/for_each.h:35:
/usr/local/cuda/include/thrust/system/cuda/detail/parallel_for.h:164:43: error: reference to __host__ function 'parallel_for<thrust::cuda_cub::for_each_f<thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::pointer<float, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::detail::normal_iterator<thrust::pointer<long, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type> >, thrust::detail::wrapped_function<thrust::system::detail::generic::detail::binary_search_functor<const float *, thrust::system::detail::generic::detail::binary_search_less, thrust::system::detail::generic::detail::lbf>, void> >, long>' in __host__ __device__ function
    cudaError_t  status = __parallel_for::parallel_for(count, f, stream);
                                          ^
/usr/local/cuda/include/thrust/system/cuda/detail/for_each.h:79:15: note: in instantiation of function template specialization 'thrust::cuda_cub::parallel_for<thrust::cuda_cub::par_t, thrust::cuda_cub::for_each_f<thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::pointer<float, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::detail::normal_iterator<thrust::pointer<long, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type> >, thrust::detail::wrapped_function<thrust::system::detail::generic::detail::binary_search_functor<const float *, thrust::system::detail::generic::detail::binary_search_less, thrust::system::detail::generic::detail::lbf>, void> >, long>' requested here
    cuda_cub::parallel_for(policy,
              ^
/usr/local/cuda/include/thrust/system/cuda/detail/for_each.h:103:22: note: in instantiation of function template specialization 'thrust::cuda_cub::for_each_n<thrust::cuda_cub::par_t, thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::pointer<float, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::detail::normal_iterator<thrust::pointer<long, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type> >, long, thrust::system::detail::generic::detail::binary_search_functor<const float *, thrust::system::detail::generic::detail::binary_search_less, thrust::system::detail::generic::detail::lbf> >' requested here
    return cuda_cub::for_each_n(policy, first,  count, op);
                     ^
/usr/local/cuda/include/thrust/detail/for_each.inl:44:10: note: in instantiation of function template specialization 'thrust::cuda_cub::for_each<thrust::cuda_cub::par_t, thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::pointer<float, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::detail::normal_iterator<thrust::pointer<long, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type> >, thrust::system::detail::generic::detail::binary_search_functor<const float *, thrust::system::detail::generic::detail::binary_search_less, thrust::system::detail::generic::detail::lbf> >' requested here
  return for_each(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, f);
         ^
/usr/local/cuda/include/thrust/system/detail/generic/binary_search.inl:132:11: note: in instantiation of function template specialization 'thrust::for_each<thrust::cuda_cub::par_t, thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::pointer<float, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::detail::normal_iterator<thrust::pointer<long, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type> >, thrust::system::detail::generic::detail::binary_search_functor<const float *, thrust::system::detail::generic::detail::binary_search_less, thrust::system::detail::generic::detail::lbf> >' requested here
  thrust::for_each(exec,
          ^
/usr/local/cuda/include/thrust/system/detail/generic/binary_search.inl:162:44: note: in instantiation of function template specialization 'thrust::system::detail::generic::detail::binary_search<thrust::cuda_cub::par_t, const float *, thrust::detail::normal_iterator<thrust::pointer<float, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::detail::normal_iterator<thrust::pointer<long, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::system::detail::generic::detail::binary_search_less, thrust::system::detail::generic::detail::lbf>' requested here
  thrust::system::detail::generic::detail::binary_search(exec, begin, end, d_value.begin(), d_value.end(), d_output.begin(), comp, func);
                                           ^
/usr/local/cuda/include/thrust/system/detail/generic/binary_search.inl:213:26: note: in instantiation of function template specialization 'thrust::system::detail::generic::detail::binary_search<long, thrust::cuda_cub::par_t, const float *, float, thrust::system::detail::generic::detail::binary_search_less, thrust::system::detail::generic::detail::lbf>' requested here
  return begin + detail::binary_search<difference_type>(exec, begin, end, value, comp, detail::lbf());
                         ^
/usr/local/cuda/include/thrust/detail/binary_search.inl:56:12: note: in instantiation of function template specialization 'thrust::system::detail::generic::lower_bound<thrust::cuda_cub::par_t, const float *, float, thrust::system::detail::generic::detail::binary_search_less>' requested here
    return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp);
           ^
/usr/local/cuda/include/thrust/system/detail/generic/binary_search.inl:200:18: note: in instantiation of function template specialization 'thrust::lower_bound<thrust::cuda_cub::par_t, const float *, float, thrust::system::detail::generic::detail::binary_search_less>' requested here
  return thrust::lower_bound(exec, begin, end, value, detail::binary_search_less());
                 ^
/usr/local/cuda/include/thrust/detail/binary_search.inl:42:12: note: in instantiation of function template specialization 'thrust::system::detail::generic::lower_bound<thrust::cuda_cub::par_t, const float *, float>' requested here
    return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value);
           ^
test.cu:33:32: note: in instantiation of function template specialization 'thrust::lower_bound<thrust::cuda_cub::par_t, const float *, float>' requested here
      auto low_bound = thrust::lower_bound(
                               ^
/usr/local/cuda/include/thrust/system/cuda/detail/for_each.h:79:15: note: called by 'for_each_n<thrust::cuda_cub::par_t, thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::pointer<float, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::detail::normal_iterator<thrust::pointer<long, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type> >, long, thrust::system::detail::generic::detail::binary_search_functor<const float *, thrust::system::detail::generic::detail::binary_search_less, thrust::system::detail::generic::detail::lbf> >'
    cuda_cub::parallel_for(policy,
              ^
/usr/local/cuda/include/thrust/system/cuda/detail/for_each.h:103:22: note: called by 'for_each<thrust::cuda_cub::par_t, thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::pointer<float, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::detail::normal_iterator<thrust::pointer<long, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type> >, thrust::system::detail::generic::detail::binary_search_functor<const float *, thrust::system::detail::generic::detail::binary_search_less, thrust::system::detail::generic::detail::lbf> >'
    return cuda_cub::for_each_n(policy, first,  count, op);
                     ^
/usr/local/cuda/include/thrust/detail/for_each.inl:44:10: note: called by 'for_each<thrust::cuda_cub::par_t, thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::pointer<float, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::detail::normal_iterator<thrust::pointer<long, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type> >, thrust::system::detail::generic::detail::binary_search_functor<const float *, thrust::system::detail::generic::detail::binary_search_less, thrust::system::detail::generic::detail::lbf> >'
  return for_each(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, f);
         ^
/usr/local/cuda/include/thrust/system/detail/generic/binary_search.inl:132:11: note: called by 'binary_search<thrust::cuda_cub::par_t, const float *, thrust::detail::normal_iterator<thrust::pointer<float, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::detail::normal_iterator<thrust::pointer<long, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::system::detail::generic::detail::binary_search_less, thrust::system::detail::generic::detail::lbf>'
  thrust::for_each(exec,
          ^
/usr/local/cuda/include/thrust/system/detail/generic/binary_search.inl:162:44: note: called by 'binary_search<long, thrust::cuda_cub::par_t, const float *, float, thrust::system::detail::generic::detail::binary_search_less, thrust::system::detail::generic::detail::lbf>'
  thrust::system::detail::generic::detail::binary_search(exec, begin, end, d_value.begin(), d_value.end(), d_output.begin(), comp, func);
                                           ^
/usr/local/cuda/include/thrust/system/detail/generic/binary_search.inl:213:26: note: called by 'lower_bound<thrust::cuda_cub::par_t, const float *, float, thrust::system::detail::generic::detail::binary_search_less>'
  return begin + detail::binary_search<difference_type>(exec, begin, end, value, comp, detail::lbf());
                         ^
/usr/local/cuda/include/thrust/detail/binary_search.inl:56:12: note: called by 'lower_bound<thrust::cuda_cub::par_t, const float *, float, thrust::system::detail::generic::detail::binary_search_less>'
    return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp);
           ^
/usr/local/cuda/include/thrust/system/detail/generic/binary_search.inl:200:18: note: called by 'lower_bound<thrust::cuda_cub::par_t, const float *, float>'
  return thrust::lower_bound(exec, begin, end, value, detail::binary_search_less());
                 ^
/usr/local/cuda/include/thrust/detail/binary_search.inl:42:12: note: called by 'lower_bound<thrust::cuda_cub::par_t, const float *, float>'
    return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value);
           ^
test.cu:33:32: note: called by 'PieceWiseLinearTransformGeneralKernel'
      auto low_bound = thrust::lower_bound(
                               ^
/usr/local/cuda/include/thrust/system/cuda/detail/parallel_for.h:127:3: note: 'parallel_for<thrust::cuda_cub::for_each_f<thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::pointer<float, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::detail::normal_iterator<thrust::pointer<long, thrust::cuda_cub::par_t, thrust::use_default, thrust::use_default> >, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type> >, thrust::detail::wrapped_function<thrust::system::detail::generic::detail::binary_search_functor<const float *, thrust::system::detail::generic::detail::binary_search_less, thrust::system::detail::generic::detail::lbf>, void> >, long>' declared here
  parallel_for(Size         num_items,
  ^
1 error generated when compiling for sm_61.

while nvcc is ok, for calling __host__function in __host____device__function is not allowed for clang?

Metadata

Metadata

Assignees

No one assigned

    Labels

    thrustFor all items related to Thrust.

    Type

    No type

    Projects

    Status

    Todo

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions