NVIDIA · tylera-nvidia · Mar 28, 2023 · Feb 3, 2023 · Feb 3, 2023 · Mar 27, 2023
diff --git a/bench/01_radar/SingleChanSimplePipeline.cu b/bench/01_radar/SingleChanSimplePipeline.cu
@@ -20,7 +20,7 @@ void simple_radar_pipeline_pulse_compression(nvbench::state &state, nvbench::typ
   state.exec( nvbench::exec_tag::timer, 
     [&numPulses, &numChannels, &numSamples, &waveformLength](nvbench::launch &launch, auto &timer) {
       auto radar = RadarPipeline(numPulses, numSamples, waveformLength, numChannels, launch.get_stream());
-      radar.GetInputView()->PrefetchDevice(launch.get_stream());      
+      radar.GetInputView().PrefetchDevice(launch.get_stream());      
 
       timer.start();
       radar.PulseCompression();
@@ -46,7 +46,7 @@ void simple_radar_pipeline_three_pulse_canceller(nvbench::state &state, nvbench:
   state.exec( nvbench::exec_tag::timer, 
     [&numPulses, &numChannels, &numSamples, &waveformLength](nvbench::launch &launch, auto &timer) {
       auto radar = RadarPipeline(numPulses, numSamples, waveformLength, numChannels, launch.get_stream());
-      radar.GetInputView()->PrefetchDevice(launch.get_stream());      
+      radar.GetInputView().PrefetchDevice(launch.get_stream());      
 
       timer.start();
       radar.ThreePulseCanceller();
@@ -72,7 +72,7 @@ void simple_radar_pipeline_doppler(nvbench::state &state, nvbench::type_list<Val
   state.exec( nvbench::exec_tag::timer, 
     [&numPulses, &numChannels, &numSamples, &waveformLength](nvbench::launch &launch, auto &timer) {
       auto radar = RadarPipeline(numPulses, numSamples, waveformLength, numChannels, launch.get_stream());
-      radar.GetInputView()->PrefetchDevice(launch.get_stream());      
+      radar.GetInputView().PrefetchDevice(launch.get_stream());      
 
       timer.start();
       radar.DopplerProcessing();
@@ -99,7 +99,7 @@ void simple_radar_pipeline_cfar(nvbench::state &state, nvbench::type_list<ValueT
   state.exec( nvbench::exec_tag::timer, 
     [&numPulses, &numChannels, &numSamples, &waveformLength](nvbench::launch &launch, auto &timer) {
       auto radar = RadarPipeline(numPulses, numSamples, waveformLength, numChannels, launch.get_stream());
-      radar.GetInputView()->PrefetchDevice(launch.get_stream());      
+      radar.GetInputView().PrefetchDevice(launch.get_stream());      
 
       timer.start();
       radar.CFARDetections();
@@ -125,7 +125,7 @@ void simple_radar_pipeline_end_to_end(nvbench::state &state, nvbench::type_list<
   state.exec( nvbench::exec_tag::timer, 
     [&numPulses, &numChannels, &numSamples, &waveformLength](nvbench::launch &launch, auto &timer) {
       auto radar = RadarPipeline(numPulses, numSamples, waveformLength, numChannels, launch.get_stream());
-      radar.GetInputView()->PrefetchDevice(launch.get_stream());      
+      radar.GetInputView().PrefetchDevice(launch.get_stream());      
 
       timer.start();
       radar.PulseCompression();

diff --git a/docs_input/notebooks/exercises/example4_pc.cu b/docs_input/notebooks/exercises/example4_pc.cu
@@ -56,7 +56,7 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
 
     radar.PulseCompression();
 
-    auto rv = radar.GetInputView()->Slice<1>({0, 0, 0}, {matxSliceDim, matxSliceDim, 16});
+    auto rv = radar.GetInputView().Slice<1>({0, 0, 0}, {matxSliceDim, matxSliceDim, 16});
     rv.print();
     cudaStreamDestroy(stream);
 

diff --git a/docs_input/notebooks/exercises/example4_tpc.cu b/docs_input/notebooks/exercises/example4_tpc.cu
@@ -53,7 +53,7 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
     radar.ThreePulseCanceller();
 
     printf("x input:\n");
-    radar.GetInputView()->Slice<1>({0, 0, 0}, {matxSliceDim, matxSliceDim, 16}).Print();
+    radar.GetInputView().Slice<1>({0, 0, 0}, {matxSliceDim, matxSliceDim, 16}).Print();
     printf("Convolution output:\n");
     radar.GetTPCView()->Slice<1>({0,0,0}, {matxSliceDim, matxSliceDim, 10}).Print();     
     cudaStreamDestroy(stream);

diff --git a/examples/simple_radar_pipeline.cu b/examples/simple_radar_pipeline.cu
@@ -63,7 +63,7 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   printf("Initializing data structures...\n");
   auto radar =
       RadarPipeline(numPulses, numSamples, waveformLength, numChannels, stream);
-  radar.GetInputView()->PrefetchDevice(stream);
+  radar.GetInputView().PrefetchDevice(stream);
   MATX_NVTX_END_RANGE(1)
 
   MATX_NVTX_START_RANGE("Pipeline Test", matx_nvxtLogLevels::MATX_NVTX_LOG_USER, 2)

diff --git a/include/matx/core/tensor_utils.h b/include/matx/core/tensor_utils.h
@@ -670,61 +670,11 @@ namespace detail {
 
 static constexpr bool PRINT_ON_DEVICE = false;      ///< print() uses printf on device
 
-/**
- * @brief Print a tensor's values to stdout
- *
- * This is a wrapper utility function to print the type, size and stride of tensor,
- * see PrintData for details of internal tensor printing options
- *
- * @tparam Args Integral argument types
- * @param op input Operator
- * @param dims Number of values to print for each dimension
- */
-template <typename Op, typename... Args,
-          std::enable_if_t<((std::is_integral_v<Args>)&&...) &&
-                                (Op::Rank() == 0 || sizeof...(Args) > 0),
-                            bool> = true>
-void print(const Op &op, Args... dims) 
-{
-  MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
-
-  // print tensor size info first
-  std::string type = (is_tensor_view_v<Op>) ? "Tensor" : "Operator";
-
-  printf("%s{%s} Rank: %d, Sizes:[", type.c_str(), detail::GetTensorType<typename Op::scalar_type>().c_str(), op.Rank());
-
-  for (index_t dimIdx = 0; dimIdx < (op.Rank() ); dimIdx++ )
-  {
-    printf("%" INDEX_T_FMT, op.Size(static_cast<int>(dimIdx)) );
-    if( dimIdx < (op.Rank() - 1) )
-      printf(", ");
-  }
-
-  if constexpr (is_tensor_view_v<Op>) 
-  {
-    printf("], Strides:[");
-    if constexpr (Op::Rank() > 0) 
-    {
-      for (index_t dimIdx = 0; dimIdx < (op.Rank() ); dimIdx++ ) 
-      {
-        printf("%" INDEX_T_FMT, op.Stride(static_cast<int>(dimIdx)) );
-        if( dimIdx < (op.Rank() - 1) )
-        {
-          printf(",");
-        }
-      }   
-    }
-  }
-
-  printf("]\n");
-  PrintData(op, dims...);
-
-}
-
 /**
  * @brief Print a tensor's values to stdout
  *
  * This is the internal `PrintData()` that takes integral values for each index, and prints that as many values
+
  * in each dimension as the arguments specify. For example:
  *
  * `print(a, 2, 3, 2);`
@@ -774,6 +724,59 @@ void PrintData(const Op &op, Args... dims) {
 #endif
 }
 
+
+/**
+ * @brief print a tensor's values to stdout
+ *
+ * This is a wrapper utility function to print the type, size and stride of tensor,
+ * see PrintData for details of internal tensor printing options
+ *
+ * @tparam Args Integral argument types
+ * @param op input Operator
+ * @param dims Number of values to print for each dimension
+ */
+template <typename Op, typename... Args,
+          std::enable_if_t<((std::is_integral_v<Args>)&&...) &&
+                                (Op::Rank() == 0 || sizeof...(Args) > 0),
+                            bool> = true>
+void print(const Op &op, Args... dims) 
+{
+  MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+
+  // print tensor size info first
+  std::string type = (is_tensor_view_v<Op>) ? "Tensor" : "Operator";
+
+  printf("%s{%s} Rank: %d, Sizes:[", type.c_str(), detail::GetTensorType<typename Op::scalar_type>().c_str(), op.Rank());
+
+  for (index_t dimIdx = 0; dimIdx < (op.Rank() ); dimIdx++ )
+  {
+    printf("%" INDEX_T_FMT, op.Size(static_cast<int>(dimIdx)) );
+    if( dimIdx < (op.Rank() - 1) )
+      printf(", ");
+  }
+
+  if constexpr (is_tensor_view_v<Op>) 
+  {
+    printf("], Strides:[");
+    if constexpr (Op::Rank() > 0) 
+    {
+      for (index_t dimIdx = 0; dimIdx < (op.Rank() ); dimIdx++ ) 
+      {
+        printf("%" INDEX_T_FMT, op.Stride(static_cast<int>(dimIdx)) );
+        if( dimIdx < (op.Rank() - 1) )
+        {
+          printf(",");
+        }
+      }   
+    }
+  }
+
+  printf("]\n");
+  PrintData(op, dims...);
+
+}
+
+
 /**
  * @brief Print a tensor's all values to stdout
  *

diff --git a/include/matx/operators/clone.h b/include/matx/operators/clone.h
@@ -119,9 +119,10 @@ namespace matx
     auto __MATX_INLINE__ clone(Op t, const std::array<index_t, Rank> &shape)
     {
       if constexpr (is_tensor_view_v<Op>) {
-        return t.template Clone<Rank>(shape);
+        return t.template Clone<static_cast<int>(Rank)>(shape);
       } else {
-        return detail::CloneOp<Rank, Op>(t, shape);
+        return detail::CloneOp<static_cast<int>(Rank), Op>(t, shape);
+
       }
     };