Adding my weighted graph coarsening code into kokkos-kernels (#1043)

* adding my wgted graph coarsening code into kokkos-kernels * graph coarsening unit tests * updating coarse builder with recent changes * updating with recent changes * fixing some details * more tests; small fixes * fix some problems * fix some things; unit tests work now * change contact * fixed for pthreads and when openmp/serial are not enabled * enable spgemm dedupe type in unit test * clang format * clang format again * converting all member functions to static; using a handle instead of member variable parameters * clang-format * requested changes * exclude new coarsening files from compilation for cuda builds without lambdas enabled * update files after rebase * clang format * fix g++ compiler warnings for serial build * clang format * 'correct' variable was not needed and has been eliminated * fix compiler warning for SKX gcc-7 build * fix randomly failing coarsening test for small random graphs
kokkos · Aug 23, 2022 · 5930a2b · 5930a2b
1 parent 1d37bad
commit 5930a2b
Show file tree

Hide file tree

Showing 5 changed files with 3,705 additions and 2 deletions.
diff --git a/src/common/KokkosKernels_HashmapAccumulator.hpp b/src/common/KokkosKernels_HashmapAccumulator.hpp
@@ -46,8 +46,6 @@
 #include <Kokkos_Atomic.hpp>
 #include <atomic>
 
-//#define HASHMAPACCUMULATOR_ASSERT_ENABLED
-
 namespace KokkosKernels {
 
 namespace Experimental {
@@ -442,6 +440,77 @@ struct HashmapAccumulator {
       keys[my_write_index]   = key;
       values[my_write_index] = value;
 
+#if defined(KOKKOS_ARCH_VOLTA) || defined(KOKKOS_ARCH_TURING75) || \
+    defined(KOKKOS_ARCH_AMPERE)
+      // this is an issue on VOLTA because warps do not go in SIMD fashion
+      // anymore. while some thread might insert my_write_index into linked
+      // list, another thread in the warp might be reading keys in above loop.
+      // before inserting the new value in liked list -- which is done with
+      // atomic exchange below, we make sure that the linked is is complete my
+      // assigning the hash_next to current head. the head might be different
+      // when we do the atomic exchange. this would cause temporarily skipping a
+      // key in the linkedlist until hash_nexts is updated second time as below.
+      // but this is okay for spgemm, because no two keys will be inserted into
+      // hashmap at the same time, as rows have unique columns.
+
+      // Neither the compiler nor the execution unit can re-order the line
+      // directly below with the next line performing the atomic_exchange as the
+      // atomic exchange writes to hash_begins[hash] and this line reads from
+      // hash_begins[hash].
+      // This line is needed such that threads of execution can still access the
+      // old linked list, after hash_begins+hash has been atomically overwritten
+      // with my_write_index but before hash_nexts[my_write_index] is
+      // overwritten with hashbeginning. If this line was not here, threads may
+      // not be able to access the dangling linked list since
+      // hash_nexts[my_write_index] would still be -1.
+      hash_nexts[my_write_index] = hash_begins[hash];
+#endif
+
+      hashbeginning =
+          Kokkos::atomic_exchange(hash_begins + hash, my_write_index);
+      if (hashbeginning == -1) {
+        used_hashes[Kokkos::atomic_fetch_add(used_hash_size, size_type(1))] =
+            hash;
+      }
+      hash_nexts[my_write_index] = hashbeginning;
+      return __insert_success;
+    }
+  }
+
+  // just like vector_atomic_insert_into_hash_mergeAdd_TrackHashes
+  // except uses atomic addition on updating the value
+  // necessary if duplicate key insertions happen simultaneously
+  KOKKOS_INLINE_FUNCTION
+  int vector_atomic_insert_into_hash_mergeAtomicAdd_TrackHashes(
+      const key_type key, const value_type value,
+      volatile size_type *used_size_, size_type *used_hash_size,
+      size_type *used_hashes) {
+    size_type hash, i, my_write_index, hashbeginning;
+
+    if (key == -1) return __insert_success;
+
+    hash = __compute_hash(key, __hashOpRHS);
+    if (hash != -1) {
+      i = hash_begins[hash];
+
+      for (; i != -1; i = hash_nexts[i]) {
+        if (keys[i] == key) {
+          Kokkos::atomic_add(values + i, value);
+          return __insert_success;
+        }
+      }
+    } else {
+      return __insert_success;
+    }
+
+    my_write_index = Kokkos::atomic_fetch_add(used_size_, size_type(1));
+
+    if (my_write_index >= __max_value_size) {
+      return __insert_full;
+    } else {
+      keys[my_write_index]   = key;
+      values[my_write_index] = value;
+
 #if defined(KOKKOS_ARCH_VOLTA) || defined(KOKKOS_ARCH_TURING75) || \
     defined(KOKKOS_ARCH_AMPERE)
       // this is an issue on VOLTA and up because warps do not go in SIMD
@@ -480,6 +549,48 @@ struct HashmapAccumulator {
     }
   }
 
+  KOKKOS_INLINE_FUNCTION
+  int vector_atomic_insert_into_hash_mergeAdd_TrackHashes_no_list(
+      const key_type key, const value_type value, size_type *used_hash_size,
+      size_type *used_hashes) {
+    size_type hash;
+
+    if (key == -1) return __insert_success;
+
+    hash = __compute_hash(key, __hashOpRHS);
+    if (hash != -1) {
+      // loop until an empty hash is found and the key insertion succeeds
+      // if our capacity is at least some constant multiple of the current used
+      // hashes then the expected number of iterations is constant
+      int depth = 0;
+      // add key to hash to ensure no two keys follow the same paths over hashes
+      // add depth to prevent cycles
+      for (;; hash = __compute_hash(hash + key + depth++, __hashOpRHS)) {
+        if (keys[hash] == key) {
+          Kokkos::atomic_add(values + hash, value);
+          return __insert_success;
+        } else if (keys[hash] == -1) {
+          if (Kokkos::atomic_compare_exchange_strong<key_type>(keys + hash, -1,
+                                                               key)) {
+            // should only be here if we used a new hash
+            used_hashes[Kokkos::atomic_fetch_add(used_hash_size,
+                                                 size_type(1))] = hash;
+            Kokkos::atomic_add(values + hash, value);
+            return __insert_success;
+          }
+          // we don't care if we failed if some other thread succeeded with the
+          // same key as ours
+          if (keys[hash] == key) {
+            Kokkos::atomic_add(values + hash, value);
+            return __insert_success;
+          }
+        }
+      }
+    } else {
+      return __insert_success;
+    }
+  }
+
   // NOTE: this is an exact copy of vector_atmoic_insert_into_hash_mergeAdd from
   // https://github.com/kokkos/kokkos-kernels/blob/750fe24508a69ed4dba92bb4a9e17a6094b1a083/src/common/KokkosKernels_HashmapAccumulator.hpp#L442-L502
   template <typename team_member_t>