basic benchmarks

DiamonDinoia · Jul 3, 2024 · 45333fa · 45333fa
1 parent 5dde122
commit 45333fa
Show file tree

Hide file tree

Showing 12 changed files with 156 additions and 62 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.19)
+cmake_minimum_required(VERSION 3.23)
 
 project(finufft VERSION 2.2.0 LANGUAGES C CXX)
 
@@ -23,7 +23,7 @@ if (CMAKE_CXX_COMPILER_ID IN_LIST GNU_LIKE_FRONTENDS)
 endif ()
 set(FINUFFT_FFTW_SUFFIX "OpenMP" CACHE STRING "Suffix for FFTW libraries (e.g. OpenMP, Threads etc.)")
 set(FINUFFT_FFTW_LIBRARIES "DEFAULT" CACHE STRING "Specify a custom FFTW library")
-
+set(FINUFFT_CUDA_ARCHITECTURES "all-major" CACHE STRING "CUDA architectures to build for (e.g. 60;70;75;)")
 # All options go here
 # sphinx tag (don't remove): @cmake_opts_start
 option(FINUFFT_BUILD_EXAMPLES "Whether to build the FINUFFT examples" OFF)
@@ -219,30 +219,29 @@ if (FINUFFT_USE_CUDA)
     if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
         message("FINUFFT WARNING: No CUDA architecture supplied via '-DCMAKE_CUDA_ARCHITECTURES=...', defaulting to '60;70;75;'")
         message("See: https://developer.nvidia.com/cuda-gpus for more details on what architecture to supply.")
-        set(CMAKE_CUDA_ARCHITECTURES "60;70;75" CACHE STRING "" FORCE)
     endif ()
     enable_language(CUDA)
     find_package(CUDAToolkit REQUIRED)
     add_subdirectory(src/cuda)
-    if (BUILD_TESTING AND FINUFFT_BUILD_TESTS)
+    if (BUILD_TESTING OR FINUFFT_BUILD_TESTS)
         add_subdirectory(perftest/cuda)
+        add_subdirectory(test/cuda)
     endif ()
-
     list(APPEND INSTALL_TARGETS cufinufft cufinufft_static)
 endif ()
 
 # Add tests defined in their own directory
-if (BUILD_TESTING AND FINUFFT_BUILD_TESTS AND FINUFFT_USE_CPU)
+if (FINUFFT_USE_CPU AND (BUILD_TESTING OR FINUFFT_BUILD_TESTS))
     add_subdirectory(test)
     add_subdirectory(perftest)
 endif ()
 
-if (BUILD_TESTING AND FINUFFT_BUILD_TESTS AND FINUFFT_USE_CUDA)
-    add_subdirectory(test/cuda)
+if (FINUFFT_BUILD_EXAMPLES AND FINUFFT_USE_CPU)
+    add_subdirectory(examples)
 endif ()
 
-if (FINUFFT_BUILD_EXAMPLES)
-    add_subdirectory(examples)
+if (FINUFFT_BUILD_EXAMPLES AND FINUFFT_USE_GPU)
+    add_subdirectory(examples/cuda)
 endif ()
 
 if (FINUFFT_BUILD_FORTRAN)

diff --git a/include/cufinufft/common.h b/include/cufinufft/common.h
@@ -32,6 +32,10 @@ template<typename T>
 void onedim_fseries_kernel_compute(CUFINUFFT_BIGINT nf, T *f, std::complex<double> *a,
                                    T *fwkerhalf, finufft_spread_opts opts);
 
+template<typename T>
+std::size_t shared_memory_required(int dim, int ns, int bin_size_x, int bin_size_y,
+                                   int bin_size_z);
+
 } // namespace common
 } // namespace cufinufft
 #endif
diff --git a/include/cufinufft/impl.h b/include/cufinufft/impl.h
@@ -53,6 +53,7 @@ static void cufinufft_setup_binsize(int type, int dim, cufinufft_opts *opts) {
   } break;
   case 3: {
     switch (opts->gpu_method) {
+    case 0:
     case 1:
     case 2: {
       opts->gpu_binsizex = (opts->gpu_binsizex < 0) ? 16 : opts->gpu_binsizex;
@@ -109,17 +110,16 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran
   }
 
   // Mult-GPU support: set the CUDA Device ID:
-  const int device_id = opts == NULL ? 0 : opts->gpu_device_id;
+  const int device_id = opts == nullptr ? 0 : opts->gpu_device_id;
   cufinufft::utils::WithCudaDevice device_swapper(device_id);
 
   /* allocate the plan structure, assign address to user pointer. */
-  cufinufft_plan_t<T> *d_plan = new cufinufft_plan_t<T>;
-  *d_plan_ptr                 = d_plan;
+  auto *d_plan = new cufinufft_plan_t<T>;
+  *d_plan_ptr  = d_plan;
   // Zero out your struct, (sets all pointers to NULL)
   memset(d_plan, 0, sizeof(*d_plan));
-
   /* If a user has not supplied their own options, assign defaults for them. */
-  if (opts == NULL) {     // use default opts
+  if (opts == nullptr) {  // use default opts
     cufinufft_default_opts(&(d_plan->opts));
   } else {                // or read from what's passed in
     d_plan->opts = *opts; // keep a deep copy; changing *opts now has no effect
@@ -138,26 +138,9 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran
   }
 
   auto &stream = d_plan->stream = (cudaStream_t)d_plan->opts.gpu_stream;
-
-  /* Automatically set GPU method. */
-  if (d_plan->opts.gpu_method == 0) {
-    /* For type 1, we default to method 2 (SM) since this is generally faster.
-     * However, in the special case of _double precision_ in _three dimensions_
-     * with more than _three digits of precision_, there is note enough shared
-     * memory for this to work. As a result, we will default to method 1 (GM) in
-     * this special case.
-     *
-     * For type 2, we always default to method 1 (GM). */
-    if (type == 1 && (sizeof(T) == 4 || dim < 3 || tol >= 1e-3))
-      d_plan->opts.gpu_method = 2;
-    else if (type == 1 && tol < 1e-3)
-      d_plan->opts.gpu_method = 1;
-    else if (type == 2)
-      d_plan->opts.gpu_method = 1;
-  }
-
-  /* Setup Spreader */
   using namespace cufinufft::common;
+  /* Setup Spreader */
+
   // can return FINUFFT_WARN_EPS_TOO_SMALL=1, which is OK
   if ((ier = setup_spreader_for_nufft(d_plan->spopts, tol, d_plan->opts)) > 1) {
     delete *d_plan_ptr;
@@ -180,6 +163,41 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran
   if (dim > 2)
     set_nf_type12(d_plan->mu, d_plan->opts, d_plan->spopts, &nf3,
                   d_plan->opts.gpu_obinsizez);
+
+  // dynamically request the maximum amount of shared memory available
+  // for the spreader
+
+  /* Automatically set GPU method. */
+  if (d_plan->opts.gpu_method == 0) {
+    /* For type 1, we default to method 2 (SM) since this is generally faster.
+     * However, in the special case of _double precision_ in _three dimensions_
+     * with more than _three digits of precision_, there is note enough shared
+     * memory for this to work. As a result, we will default to method 1 (GM) in
+     * this special case.
+     *
+     * For type 2, we always default to method 1 (GM). */
+
+    // query the device for the amount of shared memory available
+    int shared_mem_per_block{};
+    cudaDeviceGetAttribute(&shared_mem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin,
+                           device_id);
+    RETURN_IF_CUDA_ERROR
+    // compute the amount of shared memory required for the method
+    const auto shared_mem_required =
+        shared_memory_required<T>(dim, d_plan->spopts.nspread, d_plan->opts.gpu_binsizex,
+                                  d_plan->opts.gpu_binsizey, d_plan->opts.gpu_binsizez);
+    printf("Shared memory available: %d KB, required: %d KB\n", shared_mem_per_block,
+           shared_mem_required);
+    if ((shared_mem_required > shared_mem_per_block)) {
+      d_plan->opts.gpu_method = 1;
+      printf("choosing method 1\n");
+    } else {
+      d_plan->opts.gpu_method = 2;
+      printf("choosing method 2\n");
+    }
+    printf("using method %d\n", d_plan->opts.gpu_method);
+  }
+
   int fftsign = (iflag >= 0) ? 1 : -1;
 
   d_plan->nf1      = nf1;

diff --git a/perftest/cuda/CMakeLists.txt b/perftest/cuda/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_executable(cuperftest cuperftest.cu)
 target_include_directories(cuperftest PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
 target_link_libraries(cuperftest PUBLIC cufinufft)
+#file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/bench.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/perftest/cuda/bench.sh b/perftest/cuda/bench.sh
@@ -0,0 +1,13 @@
+./cuperftest --prec d --n_runs 5 --N1 1e2 --N2 1e2 --M 2e6 --method 0 --tol 1e-4
+./cuperftest --prec d --n_runs 5 --N1 1e1 --N2 1e1 --N3 1e1 --M 2e6 --method 0 --tol 1e-4
+./cuperftest --prec d --n_runs 5 --N1 1e2 --N2 1e2 --N3 1e1 --M 2e6 --method 0 --tol 1e-4
+./cuperftest --prec d --n_runs 5 --N1 1e1 --N2 1e2 --N3 1e3 --M 2e6 --method 0 --tol 1e-4
+./cuperftest --prec d --n_runs 5 --N1 1e2 --N2 1e2 --N3 1e3 --M 2e6 --method 0 --tol 1e-4
+#./cuperftest --prec d --n_runs 5 --N1 1e5 --N2 1e5 --N3 1e5 --M 2e6 --method 0 --tol 1e-10
+#./cuperftest --prec d --n_runs 5 --N1 1e4 --N2 1e4 --N3 1e4 --M 2e6 --method 0 --tol 1e-10
+#./cuperftest --prec d --n_runs 5 --N1 1e5 --N2 1e5 --N3 1e5 --M 2e6 --method 0 --tol 1e-10
+#./cuperftest --prec d --n_runs 5 --N1 1e6 --N2 1e6 --M 2e6 --method 0 --tol 1e-10
+#./cuperftest --prec d --n_runs 5 --N1 1e8 --N2 1e6 --M 2e6 --method 0 --tol 1e-10
+#./cuperftest --prec d --n_runs 5 --N1 1e6 --N2 1e6 --M 2e6 --method 0 --tol 1e-10
+#./cuperftest --prec d --n_runs 5 --N1 1e7 --N2 1e7 --M 2e6 --method 0 --tol 1e-10
+#./cuperftest --prec d --n_runs 5 --N1 1e8 --N2 1e8 --M 2e6 --method 0 --tol 1e-10
diff --git a/perftest/cuda/cuperftest.cu b/perftest/cuda/cuperftest.cu
@@ -275,24 +275,29 @@ template<typename T> void run_test(test_options_t &test_opts) {
   }
 
   const int64_t nupts_tot = M * test_opts.n_runs * ntransf;
-
-  printf("event,count,tot(ms),mean(ms),std(ms),nupts/s,ns/nupt\n");
-  printf("host_to_device,%d,%f,%f,%f,0.0,0.0\n", h2d_timer.count(), h2d_timer.tot(),
-         h2d_timer.mean(), h2d_timer.std());
-  printf("makeplan,%d,%f,%f,%f,0.0,0.0\n", makeplan_timer.count(), makeplan_timer.tot(),
-         makeplan_timer.mean(), makeplan_timer.std());
-  printf("setpts,%d,%f,%f,%f,%g,%f\n", test_opts.n_runs, setpts_timer.tot(),
-         setpts_timer.mean(), setpts_timer.std(), nupts_tot * 1000 / setpts_timer.tot(),
-         setpts_timer.tot() * 1E6 / nupts_tot);
-  printf("execute,%d,%f,%f,%f,%g,%f\n", test_opts.n_runs, execute_timer.tot(),
-         execute_timer.mean(), execute_timer.std(),
-         nupts_tot * 1000 / execute_timer.tot(), execute_timer.tot() * 1E6 / nupts_tot);
-  printf("device_to_host,%d,%f,%f,%f,0.0,0.0\n", d2h_timer.count(), d2h_timer.tot(),
-         d2h_timer.mean(), d2h_timer.std());
-  printf("amortized,%d,%f,%f,%f,%g,%f\n", 1, amortized_timer.tot(),
-         amortized_timer.mean(), amortized_timer.std(),
-         nupts_tot * 1000 / amortized_timer.tot(),
-         amortized_timer.tot() * 1E6 / nupts_tot);
+  //
+  //  printf("event,count,tot(ms),mean(ms),std(ms),nupts/s,ns/nupt\n");
+  //  printf("host_to_device,%d,%f,%f,%f,0.0,0.0\n", h2d_timer.count(), h2d_timer.tot(),
+  //         h2d_timer.mean(), h2d_timer.std());
+  //  printf("makeplan,%d,%f,%f,%f,0.0,0.0\n", makeplan_timer.count(),
+  //  makeplan_timer.tot(),
+  //         makeplan_timer.mean(), makeplan_timer.std());
+  //  printf("setpts,%d,%f,%f,%f,%g,%f\n", test_opts.n_runs, setpts_timer.tot(),
+  //         setpts_timer.mean(), setpts_timer.std(), nupts_tot * 1000 /
+  //         setpts_timer.tot(), setpts_timer.tot() * 1E6 / nupts_tot);
+  //  printf("execute,%d,%f,%f,%f,%g,%f\n", test_opts.n_runs, execute_timer.tot(),
+  //         execute_timer.mean(), execute_timer.std(),
+  //         nupts_tot * 1000 / execute_timer.tot(), execute_timer.tot() * 1E6 /
+  //         nupts_tot);
+  //  printf("device_to_host,%d,%f,%f,%f,0.0,0.0\n", d2h_timer.count(), d2h_timer.tot(),
+  //         d2h_timer.mean(), d2h_timer.std());
+  //  printf("amortized,%d,%f,%f,%f,%g,%f\n", 1, amortized_timer.tot(),
+  //         amortized_timer.mean(), amortized_timer.std(),
+  //         nupts_tot * 1000 / amortized_timer.tot(),
+  //         amortized_timer.tot() * 1E6 / nupts_tot);
+  // print numpts / s
+  printf("setpts pts/s: %g\n", float(nupts_tot) * 1000 / setpts_timer.tot());
+  printf("execute pts/s: %g\n", float(nupts_tot) * 1000 / execute_timer.tot());
 }
 
 int main(int argc, char *argv[]) {

diff --git a/src/cuda/1d/cufinufft1d.cu b/src/cuda/1d/cufinufft1d.cu
@@ -5,11 +5,10 @@
 #include <iostream>
 #include <type_traits>
 
-#include <assert.h>
+#include <cassert>
 #include <cufft.h>
 
 #include <cufinufft/cudeconvolve.h>
-#include <cufinufft/memtransfer.h>
 #include <cufinufft/spreadinterp.h>
 #include <cufinufft/types.h>
 

diff --git a/src/cuda/3d/spread3d_wrapper.cu b/src/cuda/3d/spread3d_wrapper.cu
@@ -536,21 +536,27 @@ int cuspread3d_subprob(int nf1, int nf2, int nf3, int M, cufinufft_plan_t<T> *d_
   size_t sharedplanorysize = (bin_size_x + 2 * ceil(ns / 2.0)) *
                              (bin_size_y + 2 * ceil(ns / 2.0)) *
                              (bin_size_z + 2 * ceil(ns / 2.0)) * sizeof(cuda_complex<T>);
-  if (sharedplanorysize > 49152) {
-    std::cerr << "[cuspread3d_subprob] error: not enough shared memory ("
-              << sharedplanorysize << ")" << std::endl;
-    return FINUFFT_ERR_INSUFFICIENT_SHMEM;
-  }
+  //  if (sharedplanorysize > 49152) {
+  //    std::cerr << "[cuspread3d_subprob] error: not enough shared memory ("
+  //              << sharedplanorysize << ")" << std::endl;
+  //    return FINUFFT_ERR_INSUFFICIENT_SHMEM;
+  //  }
 
   for (int t = 0; t < blksize; t++) {
     if (d_plan->opts.gpu_kerevalmeth) {
+      cudaFuncSetAttribute(spread_3d_subprob<T, 1>,
+                           cudaFuncAttributeMaxDynamicSharedMemorySize,
+                           sharedplanorysize);
       spread_3d_subprob<T, 1><<<totalnumsubprob, 256, sharedplanorysize, stream>>>(
           d_kx, d_ky, d_kz, d_c + t * M, d_fw + t * nf1 * nf2 * nf3, M, ns, nf1, nf2, nf3,
           sigma, es_c, es_beta, d_binstartpts, d_binsize, bin_size_x, bin_size_y,
           bin_size_z, d_subprob_to_bin, d_subprobstartpts, d_numsubprob, maxsubprobsize,
           numbins[0], numbins[1], numbins[2], d_idxnupts);
       RETURN_IF_CUDA_ERROR
     } else {
+      cudaFuncSetAttribute(spread_3d_subprob<T, 0>,
+                           cudaFuncAttributeMaxDynamicSharedMemorySize,
+                           sharedplanorysize);
       spread_3d_subprob<T, 0><<<totalnumsubprob, 256, sharedplanorysize, stream>>>(
           d_kx, d_ky, d_kz, d_c + t * M, d_fw + t * nf1 * nf2 * nf3, M, ns, nf1, nf2, nf3,
           sigma, es_c, es_beta, d_binstartpts, d_binsize, bin_size_x, bin_size_y,

diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt
@@ -24,34 +24,47 @@ set(CUFINUFFT_INCLUDE_DIRS ${CUFINUFFT_INCLUDE_DIRS} PARENT_SCOPE)
 
 add_library(cufinufft_common_objects OBJECT ${PRECISION_INDEPENDENT_SRC})
 target_include_directories(cufinufft_common_objects PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
-set_property(TARGET cufinufft_common_objects PROPERTY POSITION_INDEPENDENT_CODE ON)
+set_target_properties(
+        cufinufft_common_objects PROPERTIES
+        POSITION_INDEPENDENT_CODE ON
+        CUDA_ARCHITECTURES ${FINUFFT_CUDA_ARCHITECTURES}
+)
 
 add_library(cufinufft_objects OBJECT ${PRECISION_DEPENDENT_SRC})
 target_include_directories(cufinufft_objects PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
 set_property(TARGET cufinufft_objects PROPERTY POSITION_INDEPENDENT_CODE ON)
+set_target_properties(
+        cufinufft_objects PROPERTIES
+        POSITION_INDEPENDENT_CODE ON
+        CUDA_ARCHITECTURES ${FINUFFT_CUDA_ARCHITECTURES}
+)
 
 add_library(cufinufft SHARED
   $<TARGET_OBJECTS:cufinufft_common_objects>
   $<TARGET_OBJECTS:cufinufft_objects>
 )
+target_include_directories(cufinufft PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
 target_link_libraries(cufinufft CUDA::cudart CUDA::cufft CUDA::nvToolsExt)
 set_target_properties(
   cufinufft PROPERTIES
   LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}"
+        CUDA_ARCHITECTURES ${FINUFFT_CUDA_ARCHITECTURES}
 )
 
 add_library(cufinufft_static STATIC
   $<TARGET_OBJECTS:cufinufft_common_objects>
   $<TARGET_OBJECTS:cufinufft_objects>
 )
+target_include_directories(cufinufft_static PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
 if(WIN32)
   target_link_libraries(cufinufft_static PUBLIC CUDA::cudart CUDA::cufft CUDA::nvToolsExt)
 else()
   target_link_libraries(cufinufft_static PUBLIC CUDA::cudart_static CUDA::cufft_static CUDA::nvToolsExt)
 endif()
 set_target_properties(
   cufinufft_static PROPERTIES
-  ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}"
+        CUDA_ARCHITECTURES ${FINUFFT_CUDA_ARCHITECTURES}
+        ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}"
 )
 
 file(GLOB CUFINUFFT_PUBLIC_HEADERS "${CMAKE_SOURCE_DIR}/include/cufinufft*.h")

diff --git a/src/cuda/common.cu b/src/cuda/common.cu
@@ -199,6 +199,28 @@ void onedim_fseries_kernel_compute(CUFINUFFT_BIGINT nf, T *f, std::complex<doubl
   }
 }
 
+template<typename T>
+std::size_t shared_memory_required(int dim, int ns, int bin_size_x, int bin_size_y,
+                                   int bin_size_z) {
+  printf("dim, ns, bin_size_x, bin_size_y, bin_size_z: %d %d %d %d %d\n", dim, ns,
+         bin_size_x, bin_size_y, bin_size_z);
+  int adjusted_ns = bin_size_x + ((ns + 1) / 2) * 2;
+
+  if (dim == 1) {
+    return adjusted_ns * sizeof(cuda_complex<T>);
+  }
+
+  adjusted_ns *= (bin_size_y + ((ns + 1) / 2) * 2);
+
+  if (dim == 2) {
+    return adjusted_ns * sizeof(cuda_complex<T>);
+  }
+
+  adjusted_ns *= (bin_size_z + ((ns + 1) / 2) * 2);
+
+  return adjusted_ns * sizeof(cuda_complex<T>);
+}
+
 template void onedim_fseries_kernel_compute(CUFINUFFT_BIGINT nf, float *f,
                                             std::complex<double> *a, float *fwkerhalf,
                                             finufft_spread_opts opts);
@@ -227,5 +249,11 @@ template void onedim_fseries_kernel(CUFINUFFT_BIGINT nf, float *fwkerhalf,
                                     finufft_spread_opts opts);
 template void onedim_fseries_kernel(CUFINUFFT_BIGINT nf, double *fwkerhalf,
                                     finufft_spread_opts opts);
+
+template std::size_t shared_memory_required<float>(int dim, int ns, int bin_size_x,
+                                                   int bin_size_y, int bin_size_z);
+template std::size_t shared_memory_required<double>(int dim, int ns, int bin_size_x,
+                                                    int bin_size_y, int bin_size_z);
+
 } // namespace common
 } // namespace cufinufft
diff --git a/src/cuda/spreadinterp.cpp b/src/cuda/spreadinterp.cpp
@@ -69,7 +69,7 @@ int setup_spreader(finufft_spread_opts &opts, T eps, T upsampfac, int kerevalmet
     ier = FINUFFT_WARN_EPS_TOO_SMALL;
   }
   opts.nspread      = ns;
-  opts.ES_halfwidth = (T)ns / 2; // constants to help ker eval (except Horner)
+  opts.ES_halfwidth = T(ns * .5); // constants to help ker eval (except Horner)
   opts.ES_c         = 4.0 / (T)(ns * ns);
 
   T betaoverns = 2.30;            // gives decent betas for default sigma=2.0

diff --git a/test/cuda/CMakeLists.txt b/test/cuda/CMakeLists.txt
@@ -7,6 +7,14 @@ foreach(srcfile ${test_src})
   add_executable(${executable} ${srcfile})
   target_include_directories(${executable} PUBLIC ${CUFINUFFT_INCLUDE_DIRS})
   target_link_libraries(${executable} PUBLIC cufinufft m)
+  set_target_properties(${executable} PROPERTIES
+          LINKER_LANGUAGE CUDA
+          CUDA_ARCHITECTURES ${FINUFFT_CUDA_ARCHITECTURES}
+  )
+  message(STATUS "Adding test ${executable}"
+          " with CUDA_ARCHITECTURES=${FINUFFT_CUDA_ARCHITECTURES}"
+          " and INCLUDE=${CUFINUFFT_INCLUDE_DIRS}"
+  )
 endforeach()
 
 function(add_tests PREC REQ_TOL CHECK_TOL)