Indexing refactor stage 2 : Remove reference tensor in predicate inde…

…xing logic (csarofeen#1784) Co-authored-by: Christian Sarofeen <csarofeen@nvidia.com>
shmsong · Jul 2, 2022 · 8d384da · 8d384da
1 parent f008140
commit 8d384da
Show file tree

Hide file tree

Showing 5 changed files with 287 additions and 65 deletions.
diff --git a/torch/csrc/jit/codegen/cuda/index_compute.cpp b/torch/csrc/jit/codegen/cuda/index_compute.cpp
@@ -2813,7 +2813,6 @@ auto getPredicateReferenceIndexing(
 std::pair<Val*, Val*> getStartAndStopOffsets(
     IterDomain* consumer_id,
     TensorView* consumer_tv,
-    const ReferenceTensor& reference,
     const std::unordered_map<IterDomain*, Val*>& consumer_start_index_map,
     const std::unordered_map<IterDomain*, Val*>& consumer_stop_index_map,
     bool padding_predicate,
@@ -2979,12 +2978,12 @@ std::pair<Val*, Val*> hoistPredicates(
     Val* start_index,
     Val* stop_index,
     const std::vector<kir::ForLoop*>& loops,
+    std::vector<IterDomain*> loop_domains,
+    const std::unordered_map<IterDomain*, Val*> start_initial_loop_index_map,
+    const std::unordered_map<IterDomain*, Val*> stop_initial_loop_index_map,
     kir::ForLoop* unswitch_or_vec_loop,
     IterDomain* predicated_consumer_id,
-    TensorView* predicated_consumer_tv,
-    TensorDomain* ref_td,
-    const std::unordered_map<IterDomain*, Val*>& ref_start_index_map,
-    const std::unordered_map<IterDomain*, Val*>& ref_stop_index_map) {
+    TensorView* predicated_consumer_tv) {
   const std::pair<Val*, Val*> same_indices{start_index, stop_index};
 
   if (isDisabled(DisableOption::IndexHoist)) {
@@ -3004,8 +3003,8 @@ std::pair<Val*, Val*> hoistPredicates(
         GpuLower::current()->commonIndexMap().insert(
             predicated_consumer_id,
             predicated_consumer_tv->domain(),
-            ref_td,
-            ref_stop_index_map,
+            loop_domains,
+            stop_initial_loop_index_map,
             loops,
             stop_index);
   }
@@ -3021,8 +3020,8 @@ std::pair<Val*, Val*> hoistPredicates(
         GpuLower::current()->commonIndexMap().insert(
             predicated_consumer_id,
             predicated_consumer_tv->domain(),
-            ref_td,
-            ref_start_index_map,
+            loop_domains,
+            start_initial_loop_index_map,
             loops,
             start_index);
   }
@@ -3033,12 +3032,11 @@ std::pair<Val*, Val*> hoistPredicates(
 } // namespace
 
 // Returns predicates and the concrete (by loop map) root domains they cover
-std::pair<std::vector<RootPredicateInfo>, ReferenceTensor> Index::
-    getReferenceRootPredicates(
-        TensorView* consumer_tv,
-        const std::vector<kir::ForLoop*>& loops,
-        kir::ForLoop* unswitch_or_vec_loop,
-        bool shift_padding) {
+std::vector<RootPredicateInfo> Index::getReferenceRootPredicates(
+    TensorView* consumer_tv,
+    const std::vector<kir::ForLoop*>& loops,
+    kir::ForLoop* unswitch_or_vec_loop,
+    bool shift_padding) {
   FUSER_PERF_SCOPE("GpuLower::Lower::Index::getReferenceRootPredicates");
 
   const auto gpu_lower = GpuLower::current();
@@ -3047,22 +3045,9 @@ std::pair<std::vector<RootPredicateInfo>, ReferenceTensor> Index::
 
   // Nothing needs to be done when padding is not required.
   if (shift_padding && !needsPadding(consumer_tv)) {
-    return {{RootPredicateInfo::getFalseInfo()}, ReferenceTensor{}};
+    return {RootPredicateInfo::getFalseInfo()};
   }
 
-  // Get a reference tensor replayed as existing loop structure
-  ReferenceTensor reference =
-      IndexReferenceReplay::getReference(loops, consumer_tv);
-
-  // Generate halo information for reference.
-  updateHaloInfoForReference(reference, consumer_tv);
-
-  const auto ref_2_consumer = indexMapReferenceTo(
-      consumer_tv, gpu_lower->caMap(), reference.concrete_to_id);
-
-  const auto reference_halo_extent_map =
-      getReferenceHaloExtentMap(reference, ref_2_consumer);
-
   auto db_axis = gpu_lower->doubleBufferInfo().getDoubleBufferAxis(consumer_tv);
 
   // Indexing is done without considering contig merging. Actual
@@ -3073,38 +3058,27 @@ std::pair<std::vector<RootPredicateInfo>, ReferenceTensor> Index::
       std::vector<bool>(consumer_tv->getMaybeRFactorDomain().size(), false),
       {});
 
+  // Generate start and stop indexing from idgraph.
+  //
   // Both start and stop positions may need to be predicated. Indexing
   // differs when generating predicates for unswitch.
   // NOTE: If we could find-and-replace KIR nodes, we could just
   // generate one index map, clone it and replace the loop-to-index
   // mappings of unswitched loops for the start predicate.
-  auto ref_stop_indexing = getPredicateReferenceIndexing(
-      loops, reference, unswitch_or_vec_loop, db_axis, false);
-  const auto consumer_stop_indexing = ref_stop_indexing.updateIndexCompute(
-      consumer_tv->domain(),
-      ref_2_consumer,
-      contig_finder,
-      reference_halo_extent_map);
+
+  auto stop_indexing_from_idgraph = getPredicateIndexingFromIdGraph(
+      loops, consumer_tv, unswitch_or_vec_loop, db_axis, false);
+  const auto consumer_stop_indexing = stop_indexing_from_idgraph.index;
   const auto& consumer_stop_index_map = consumer_stop_indexing.indexMap();
 
   // If not unswitch, share the same indexing map as the stop index
   // map
-  const auto& ref_start_indexing = is_unswitch
-      ? getPredicateReferenceIndexing(
-            loops, reference, unswitch_or_vec_loop, db_axis, true)
-      : ref_stop_indexing;
-
-  std::unordered_map<IterDomain*, Val*> consumer_start_index_map;
-  if (is_unswitch) {
-    const auto consumer_start_indexing = ref_start_indexing.updateIndexCompute(
-        consumer_tv->domain(),
-        ref_2_consumer,
-        contig_finder,
-        reference_halo_extent_map);
-    consumer_start_index_map = consumer_start_indexing.indexMap();
-  } else {
-    consumer_start_index_map = consumer_stop_index_map;
-  }
+  const auto start_indexing_from_idgraph = is_unswitch
+      ? getPredicateIndexingFromIdGraph(
+            loops, consumer_tv, unswitch_or_vec_loop, db_axis, true)
+      : stop_indexing_from_idgraph;
+  const auto consumer_start_indexing = start_indexing_from_idgraph.index;
+  const auto& consumer_start_index_map = consumer_start_indexing.indexMap();
 
   // Get the contiguous ids we need to generate predicates for
   auto contig_id_infos =
@@ -3161,7 +3135,6 @@ std::pair<std::vector<RootPredicateInfo>, ReferenceTensor> Index::
     std::tie(info.start_offset_, info.stop_offset_) = getStartAndStopOffsets(
         contig_id,
         consumer_tv,
-        reference,
         consumer_start_index_map,
         consumer_stop_index_map,
         shift_padding,
@@ -3175,12 +3148,12 @@ std::pair<std::vector<RootPredicateInfo>, ReferenceTensor> Index::
         start_index,
         stop_index,
         loops,
+        stop_indexing_from_idgraph.resolved_loop_domains,
+        start_indexing_from_idgraph.initial_concrete_index_map,
+        stop_indexing_from_idgraph.initial_concrete_index_map,
         unswitch_or_vec_loop,
         contig_id,
-        consumer_tv,
-        reference.domain,
-        ref_start_indexing.indexMap(),
-        ref_stop_indexing.indexMap());
+        consumer_tv);
 
     // Build predicates for start positions as:
     //   start_index + start_offset >= 0
@@ -3217,7 +3190,7 @@ std::pair<std::vector<RootPredicateInfo>, ReferenceTensor> Index::
     pred_info_vec.emplace_back(info);
   }
 
-  return {pred_info_vec, reference};
+  return pred_info_vec;
 }
 
 bool Index::protectWithMagicZero(

diff --git a/torch/csrc/jit/codegen/cuda/index_compute.h b/torch/csrc/jit/codegen/cuda/index_compute.h
@@ -345,8 +345,7 @@ class Index {
   //! this is not a bool value as if we have an unswitch loop with a vectorized
   //! loop inside, we only want to base the "unswitch" like predicate on the
   //! vectorized loop.
-  static std::pair<std::vector<RootPredicateInfo>, ReferenceTensor>
-  getReferenceRootPredicates(
+  static std::vector<RootPredicateInfo> getReferenceRootPredicates(
       TensorView* consumer_tv,
       const std::vector<kir::ForLoop*>& loops,
       kir::ForLoop* unswitch_or_vec_loop,