Fixed mod function specialization #281 Fixed bitscan detection

g-truc · Nov 29, 2014 · 7e81213 · 7e81213
1 parent 9b250cc
commit 7e81213
Show file tree

Hide file tree

Showing 8 changed files with 225 additions and 36 deletions.
diff --git a/glm/detail/func_common.inl b/glm/detail/func_common.inl
@@ -162,6 +162,16 @@ namespace detail
  return (x >> Shift) | y;
  }
  };
+
+ template <typename T, precision P, template <class, precision> class vecType, typename genType, bool isFloat = true>
+ struct compute_mod
+ {
+ GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & a, genType const & b)
+ {
+ GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'mod' only accept floating-point inputs. Include <glm/gtc/integer.hpp> for integer inputs.");
+ return a - b * floor(a / b);
+ }
+ };
 }//namespace detail
 
  // abs
@@ -334,15 +344,13 @@ namespace detail
  template <typename T, precision P, template <typename, precision> class vecType>
  GLM_FUNC_QUALIFIER vecType<T, P> mod(vecType<T, P> const & x, T y)
  {
- GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'mod' only accept floating-point inputs");
- return x - y * floor(x / y);
+ return detail::compute_mod<T, P, vecType, T, std::numeric_limits<T>::is_iec559>::call(x, y);
  }
 
  template <typename T, precision P, template <typename, precision> class vecType>
  GLM_FUNC_QUALIFIER vecType<T, P> mod(vecType<T, P> const & x, vecType<T, P> const & y)
  {
- GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'mod' only accept floating-point inputs");
- return x - y * floor(x / y);
+ return detail::compute_mod<T, P, vecType, vecType<T, P>, std::numeric_limits<T>::is_iec559>::call(x, y);
  }
 
  // modf

diff --git a/glm/detail/func_integer.inl b/glm/detail/func_integer.inl
@@ -104,7 +104,7 @@ namespace detail
  }
  };
 
-# if(GLM_ARCH != GLM_ARCH_PURE) && ((GLM_COMPILER & GLM_COMPILER_VC) || ((GLM_COMPILER & (GLM_COMPILER_LLVM | GLM_COMPILER_INTEL)) && (GLM_PLATFORM & GLM_PLATFORM_WINDOWS)))
+# if GLM_HAS_BITSCAN_WINDOWS
  template <typename genIUType>
  struct compute_findLSB<genIUType, 32>
  {
@@ -126,7 +126,7 @@ namespace detail
  return IsNotNull ? int(Result) : -1;
  }
  };
-# endif//GLM_ARCH != GLM_ARCH_PURE
+# endif//GLM_HAS_BITSCAN_WINDOWS
 
  template <typename T, glm::precision P, template <class, glm::precision> class vecType, bool EXEC = true>
  struct compute_findMSB_step_vec
@@ -162,7 +162,7 @@ namespace detail
  }
  };
 
-# if(GLM_ARCH != GLM_ARCH_PURE) && ((GLM_COMPILER & GLM_COMPILER_VC) || ((GLM_COMPILER & (GLM_COMPILER_LLVM | GLM_COMPILER_INTEL)) && (GLM_PLATFORM & GLM_PLATFORM_WINDOWS)))
+# if GLM_HAS_BITSCAN_WINDOWS
  template <typename genIUType>
  GLM_FUNC_QUALIFIER int compute_findMSB_32(genIUType Value)
  {
@@ -196,7 +196,7 @@ namespace detail
  return detail::functor1<int, T, P, vecType>::call(compute_findMSB_64, x);
  }
  };
-# endif//GLM_ARCH != GLM_ARCH_PURE
+# endif//GLM_HAS_BITSCAN_WINDOWS
 }//namespace detail
 
  // uaddCarry

diff --git a/glm/detail/setup.hpp b/glm/detail/setup.hpp
@@ -525,14 +525,19 @@
  (GLM_LANG & GLM_LANG_CXX11_FLAG) || \
  ((GLM_LANG & GLM_LANG_CXX0X_FLAG) && (GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC49)))
 
-#define GLM_HAS_TRIVIAL_QUERIES (\
+#define GLM_HAS_TRIVIAL_QUERIES ( \
  ((GLM_LANG & GLM_LANG_CXX11_FLAG) && !(GLM_COMPILER & GLM_COMPILER_GCC)) || \
  ((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC2013)))
 
-#define GLM_HAS_MAKE_SIGNED (\
+#define GLM_HAS_MAKE_SIGNED ( \
  (GLM_LANG & GLM_LANG_CXX11_FLAG) || \
  ((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC2013)))
 
+#define GLM_HAS_BITSCAN_WINDOWS ( \
+ (GLM_ARCH != GLM_ARCH_PURE) && \
+ (GLM_PLATFORM & GLM_PLATFORM_WINDOWS) && \
+ (GLM_COMPILER & (GLM_COMPILER_VC | GLM_COMPILER_LLVM | GLM_COMPILER_INTEL)))
+
 // OpenMP
 #ifdef _OPENMP 
 # if GLM_COMPILER & GLM_COMPILER_GCC

diff --git a/glm/gtc/integer.hpp b/glm/gtc/integer.hpp
@@ -45,6 +45,7 @@
 // Dependencies
 #include "../detail/setup.hpp"
 #include "../detail/precision.hpp"
+#include "../detail/func_common.hpp"
 #include "../detail/func_integer.hpp"
 #include "../detail/func_exponential.hpp"
 #include <limits>
@@ -58,11 +59,46 @@ namespace glm
  /// @addtogroup gtc_integer
  /// @{
 
- /// Returns the log2 of x. Can be reliably using to compute mipmap count from the texture size.
- /// From GLM_GTC_integer extension.
+ /// Returns the log2 of x for integer values. Can be reliably using to compute mipmap count from the texture size.
+ /// @see gtc_integer
  template <typename genIUType>
  GLM_FUNC_DECL genIUType log2(genIUType x);
 
+ /// Modulus. Returns x % y
+ /// for each component in x using the floating point value y.
+ ///
+ /// @tparam genIUType Integer-point scalar or vector types.
+ ///
+ /// @see gtc_integer
+ /// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/mod.xml">GLSL mod man page</a>
+ /// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.3 Common Functions</a>
+ template <typename genIUType>
+ GLM_FUNC_DECL genIUType mod(genIUType x, genIUType y);
+
+ /// Modulus. Returns x % y
+ /// for each component in x using the floating point value y.
+ ///
+ /// @tparam T Integer scalar types.
+ /// @tparam vecType vector types.
+ ///
+ /// @see gtc_integer
+ /// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/mod.xml">GLSL mod man page</a>
+ /// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.3 Common Functions</a>
+ template <typename T, precision P, template <typename, precision> class vecType>
+ GLM_FUNC_DECL vecType<T, P> mod(vecType<T, P> const & x, T y);
+
+ /// Modulus. Returns x % y
+ /// for each component in x using the floating point value y.
+ ///
+ /// @tparam T Integer scalar types.
+ /// @tparam vecType vector types.
+ ///
+ /// @see gtc_integer
+ /// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/mod.xml">GLSL mod man page</a>
+ /// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.3 Common Functions</a>
+ template <typename T, precision P, template <typename, precision> class vecType>
+ GLM_FUNC_DECL vecType<T, P> mod(vecType<T, P> const & x, vecType<T, P> const & y);
+
  /// @}
 } //namespace glm
 

diff --git a/glm/gtc/integer.inl b/glm/gtc/integer.inl
@@ -44,24 +44,31 @@ namespace detail
  }
  };
 
-# if(GLM_ARCH != GLM_ARCH_PURE) && (GLM_COMPILER & (GLM_COMPILER_VC | GLM_COMPILER_APPLE_CLANG | GLM_COMPILER_LLVM))
-
- template <precision P>
- struct compute_log2<int, P, tvec4, false>
- {
- GLM_FUNC_QUALIFIER static tvec4<int, P> call(tvec4<int, P> const & vec)
+# if GLM_HAS_BITSCAN_WINDOWS
+ template <precision P>
+ struct compute_log2<int, P, tvec4, false>
  {
- tvec4<int, P> Result(glm::uninitialize);
+ GLM_FUNC_QUALIFIER static tvec4<int, P> call(tvec4<int, P> const & vec)
+ {
+ tvec4<int, P> Result(glm::uninitialize);
+
+ _BitScanReverse(reinterpret_cast<unsigned long*>(&Result.x), vec.x);
+ _BitScanReverse(reinterpret_cast<unsigned long*>(&Result.y), vec.y);
+ _BitScanReverse(reinterpret_cast<unsigned long*>(&Result.z), vec.z);
+ _BitScanReverse(reinterpret_cast<unsigned long*>(&Result.w), vec.w);
 
- _BitScanReverse(reinterpret_cast<unsigned long*>(&Result.x), vec.x);
- _BitScanReverse(reinterpret_cast<unsigned long*>(&Result.y), vec.y);
-  _BitScanReverse(reinterpret_cast<unsigned long*>(&Result.z), vec.z);
- _BitScanReverse(reinterpret_cast<unsigned long*>(&Result.w), vec.w);
+  return Result;
+ }
+ };
+# endif//GLM_HAS_BITSCAN_WINDOWS
 
- return Result;
+ template <typename T, precision P, template <class, precision> class vecType, typename genType>
+ struct compute_mod<T, P, vecType, genType, false>
+ {
+ GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & a, genType const & b)
+ {
+ return a % b;
  }
  };
-
-# endif//GLM_ARCH != GLM_ARCH_PURE
 }//namespace detail
 }//namespace glm
diff --git a/test/core/core_func_common.cpp b/test/core/core_func_common.cpp
@@ -162,6 +162,40 @@ namespace modf_
  }
 }//namespace modf
 
+namespace mod_
+{
+ int test()
+ {
+ int Error(0);
+
+ {
+ float A(3.0);
+ float B(2.0f);
+ float C = glm::mod(A, B);
+
+ Error += glm::abs(C - 1.0f) < 0.00001f ? 0 : 1;
+ }
+
+ {
+ glm::vec4 A(3.0);
+ float B(2.0f);
+ glm::vec4 C = glm::mod(A, B);
+
+ Error += glm::all(glm::epsilonEqual(C, glm::vec4(1.0f), 0.00001f)) ? 0 : 1;
+ }
+
+ {
+ glm::vec4 A(3.0);
+ glm::vec4 B(2.0f);
+ glm::vec4 C = glm::mod(A, B);
+
+ Error += glm::all(glm::epsilonEqual(C, glm::vec4(1.0f), 0.00001f)) ? 0 : 1;
+ }
+
+ return Error;
+ }
+}//namespace mod_
+
 namespace floatBitsToInt
 {
  int test()
@@ -1109,6 +1143,7 @@ int main()
 
  Error += sign::test();
  Error += floor_::test();
+ Error += mod_::test();
  Error += modf_::test();
  Error += floatBitsToInt::test();
  Error += floatBitsToUint::test();

diff --git a/test/core/core_func_integer.cpp b/test/core/core_func_integer.cpp
@@ -578,6 +578,7 @@ namespace findMSB
  genType Return;
  };
 
+# if GLM_HAS_BITSCAN_WINDOWS
  template <typename genIUType>
  GLM_FUNC_QUALIFIER int findMSB_intrinsic(genIUType Value)
  {
@@ -590,6 +591,20 @@ namespace findMSB
  _BitScanReverse(&Result, Value);
  return int(Result);
  }
+# endif//GLM_HAS_BITSCAN_WINDOWS
+
+# if GLM_ARCH & GLM_ARCH_AVX
+ template <typename genIUType>
+ GLM_FUNC_QUALIFIER int findMSB_avx(genIUType Value)
+ {
+ GLM_STATIC_ASSERT(std::numeric_limits<genIUType>::is_integer, "'findMSB' only accept integer values");
+
+ if(Value == 0)
+ return -1;
+
+ return int(_tzcnt_u32(Value));
+ }
+# endif
 
  template <typename genIUType>
  GLM_FUNC_QUALIFIER int findMSB_095(genIUType Value)
@@ -698,7 +713,7 @@ namespace findMSB
  };
 
  int Error(0);
- std::size_t const Count(1000000);
+ std::size_t const Count(10000000);
 
  std::clock_t Timestamps0 = std::clock();
 
@@ -738,12 +753,14 @@ namespace findMSB
 
  std::clock_t Timestamps4 = std::clock();
 
- for(std::size_t k = 0; k < Count; ++k)
- for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
- {
- int Result = findMSB_intrinsic(Data[i].Value);
- Error += Data[i].Return == Result ? 0 : 1;
- }
+# if GLM_HAS_BITSCAN_WINDOWS
+ for(std::size_t k = 0; k < Count; ++k)
+ for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
+ {
+ int Result = findMSB_intrinsic(Data[i].Value);
+ Error += Data[i].Return == Result ? 0 : 1;
+ }
+# endif//GLM_HAS_BITSCAN_WINDOWS
 
  std::clock_t Timestamps5 = std::clock();
 
@@ -756,13 +773,31 @@ namespace findMSB
 
  std::clock_t Timestamps6 = std::clock();
 
+# if GLM_ARCH & GLM_ARCH_AVX
+ for(std::size_t k = 0; k < Count; ++k)
+ for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
+ {
+ int Result = findMSB_avx(Data[i].Value);
+ Error += Data[i].Return == Result ? 0 : 1;
+ }
+# endif
+
+ std::clock_t Timestamps7 = std::clock();
+
  std::printf("glm::findMSB: %d clocks\n", static_cast<unsigned int>(Timestamps1 - Timestamps0));
  std::printf("findMSB - nlz1: %d clocks\n", static_cast<unsigned int>(Timestamps2 - Timestamps1));
  std::printf("findMSB - nlz2: %d clocks\n", static_cast<unsigned int>(Timestamps3 - Timestamps2));
  std::printf("findMSB - 0.9.5: %d clocks\n", static_cast<unsigned int>(Timestamps4 - Timestamps3));
- std::printf("findMSB - intrinsics: %d clocks\n", static_cast<unsigned int>(Timestamps5 - Timestamps4));
+
+# if GLM_HAS_BITSCAN_WINDOWS
+ std::printf("findMSB - intrinsics: %d clocks\n", static_cast<unsigned int>(Timestamps5 - Timestamps4));
+# endif//GLM_HAS_BITSCAN_WINDOWS
  std::printf("findMSB - pop: %d clocks\n", static_cast<unsigned int>(Timestamps6 - Timestamps5));
 
+# if GLM_ARCH & GLM_ARCH_AVX
+ std::printf("findMSB - avx tzcnt: %d clocks\n", static_cast<unsigned int>(Timestamps7 - Timestamps6));
+# endif
+
  return Error;
  }
 
@@ -888,6 +923,8 @@ namespace findMSB
  for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
  {
  int Result0 = findMSB_intrinsic(Data[i].Value);
+ //unsigned int A = _lzcnt_u32(Data[i].Value);
+ //unsigned int B = _tzcnt_u32(Data[i].Value);
  Error += Data[i].Return == Result0 ? 0 : 1;
  }
 
@@ -1527,6 +1564,8 @@ int main()
  Error += ::bitfieldInsert::test();
  Error += ::bitfieldExtract::test();
 
+ Error += ::findMSB::perf();
+
 # ifdef GLM_TEST_ENABLE_PERF
  Error += ::bitCount::perf();
  Error += ::bitfieldReverse::perf();