diff --git a/slothy/core/config.py b/slothy/core/config.py
index b7d98abb..ad3f387e 100644
--- a/slothy/core/config.py
+++ b/slothy/core/config.py
@@ -619,6 +619,18 @@ def split_heuristic_repeat(self):
                             "Shouldn't read config.split_heuristic_repeat otherwise.")
         return self._split_heuristic_repeat
 
+    @property
+    def split_heuristic_preprocess_naive_interleaving_strategy(self):
+        """Strategy for naive interleaving preprocessing step
+
+        Supported values are:
+          - "depth": Always pick the instruction with the lower possible
+                             depth in the DFG first.
+          - "alternate": Try to evenly alternate between instructions tagged with
+                         "interleaving_class=0/1".
+        """
+        return self._split_heuristic_preprocess_naive_interleaving_strategy
+
     def copy(self):
         """Make a deep copy of the configuration"""
         # Temporarily unset references to Arch and Target for deepcopy
@@ -1108,6 +1120,7 @@ def __init__(self, Arch, Target):
         self._split_heuristic_repeat = 1
         self._split_heuristic_preprocess_naive_interleaving = False
         self._split_heuristic_preprocess_naive_interleaving_by_latency = False
+        self._split_heuristic_preprocess_naive_interleaving_strategy = "depth"
         self._split_heuristic_estimate_performance = True
 
         self._compiler_binary = "gcc"
@@ -1303,6 +1316,9 @@ def split_heuristic_preprocess_naive_interleaving(self, val):
     @split_heuristic_preprocess_naive_interleaving_by_latency.setter
     def split_heuristic_preprocess_naive_interleaving_by_latency(self, val):
         self._split_heuristic_preprocess_naive_interleaving_by_latency = val
+    @split_heuristic_preprocess_naive_interleaving_strategy.setter
+    def split_heuristic_preprocess_naive_interleaving_strategy(self, val):
+        self._split_heuristic_preprocess_naive_interleaving_strategy = val
     @split_heuristic_estimate_performance.setter
     def split_heuristic_estimate_performance(self, val):
         self._split_heuristic_estimate_performance = val
diff --git a/slothy/core/heuristics.py b/slothy/core/heuristics.py
index c291d9c9..900b78e4 100644
--- a/slothy/core/heuristics.py
+++ b/slothy/core/heuristics.py
@@ -417,9 +417,7 @@ def _naive_reordering(body, logger, conf, use_latency_depth=False):
         dfg = DFG(body, logger.getChild("dfg"), DFGConfig(conf.copy()), parsing_cb=True)
         insts = [dfg.nodes[i] for i in range(l)]
 
-        if use_latency_depth is False:
-            depths = [dfg.nodes_by_id[i].depth for i in range(l) ]
-        else:
+        if use_latency_depth is True:
             # Calculate latency-depth of instruction nodes
             nodes_by_depth = dfg.nodes.copy()
             nodes_by_depth.sort(key=lambda t: t.depth)
@@ -434,7 +432,16 @@ def get_latency(tp,t):
                 t.latency_depth = max(map(lambda tp, t=t: tp.src.latency_depth +
                                           get_latency(tp, t), srcs),
                                       default=0)
-            depths = [dfg.nodes_by_id[i].latency_depth for i in range(l) ]
+
+        def get_depth(t):
+            if use_latency_depth is False:
+                pre_depth = t.depth
+            else:
+                pre_depth = t.latency_depth
+            scale = float(t.inst.source_line.tags.get("naive_interleaving_scale",1.0))
+            return int(pre_depth * scale)
+
+        depths = [get_depth(dfg.nodes_by_id[i]) for i in range(l) ]
 
         inputs = dfg.inputs.copy()
         outputs = conf.outputs.copy()
@@ -449,6 +456,17 @@ def get_outputs(inst):
         joint_prev_inputs = {}
         joint_prev_outputs = {}
 
+        strategy = conf.split_heuristic_preprocess_naive_interleaving_strategy
+
+        def get_interleaving_class(j):
+            return int(insts[j].inst.source_line.tags.get("interleaving_class", 0))
+
+        if strategy == "alternate":
+            # Compute target ratio between code classes
+            sz_0 = max(len(list(filter(lambda j: get_interleaving_class(j) == 0, range(l)))), 1)
+            sz_1 = max(len(list(filter(lambda j: get_interleaving_class(j) == 1, range(l)))), 1)
+            target_ratio = sz_0 / sz_1
+
         for i in range(l):
             cur_joint_prev_inputs = set()
             cur_joint_prev_outputs = set()
@@ -477,50 +495,31 @@ def could_come_next(j):
 
             def pick_candidate(candidate_idxs):
 
-                strategy = "minimal_depth"
-
-                if strategy == "minimal_depth":
+                if strategy == "depth":
                     candidate_depths = list(map(lambda j: depths[j], candidate_idxs))
                     logger.debug("Candidate %s: %s", depth_str, candidate_depths)
                     choice_idx = candidate_idxs[candidate_depths.index(min(candidate_depths))]
 
                 else:
-                    assert strategy == "alternate_functional_units"
-                    def flatten_units(units):
-                        res = []
-                        for u in units:
-                            if isinstance(u,list):
-                                res += u
-                            else:
-                                res.append(u)
-                        return res
-                    def units_disjoint(a,b):
-                        if a is None or b is None:
-                            return True
-                        a = flatten_units(a)
-                        b = flatten_units(b)
-                        return len([x for x in a if x in b]) == 0
-                    def units_different(a,b):
-                        return a != b
-
-                    disjoint_unit_idxs = [ i for i in candidate_idxs
-                        if units_disjoint(conf.target.get_units(insts[i].inst), last_unit) ]
-                    other_unit_idxs = [ i for i in candidate_idxs
-                        if units_different(conf.target.get_units(insts[i].inst), last_unit) ]
-
-                    if len(disjoint_unit_idxs) > 0:
-                        choice_idx = random.choice(disjoint_unit_idxs)
-                        last_unit = conf.target.get_units(insts[choice_idx].inst)
-                    elif len(other_unit_idxs) > 0:
-                        choice_idx = random.choice(other_unit_idxs)
-                        last_unit = conf.target.get_units(insts[choice_idx].inst)
+                    assert strategy == "alternate"
+
+                    sz_0 = max(len(list(filter(lambda j: get_interleaving_class(j) == 0, range(i)))), 1)
+                    sz_1 = max(len(list(filter(lambda j: get_interleaving_class(j) == 1, range(i)))), 1)
+
+                    candidates_0 = filter(lambda j: get_interleaving_class(j) == 0, candidate_idxs)
+                    candidates_1 = filter(lambda j: get_interleaving_class(j) == 1, candidate_idxs)
+
+                    current_ratio = sz_0 / sz_1
+
+                    c0 = next(candidates_0, None)
+                    c1 = next(candidates_1, None)
+
+                    if current_ratio > target_ratio and c1 is not None:
+                        choice_idx = c1
+                    elif c0 is not None:
+                        choice_idx = c0
                     else:
-                        candidate_depths = list(map(lambda j: depths[j], candidate_idxs))
-                        logger.debug(f"Candidate {depth_str}s: {candidate_depths}")
-                        min_depth = min(candidate_depths)
-                        refined_candidates = [ candidate_idxs[i]
-                            for i,d in enumerate(candidate_depths) if d == min_depth ]
-                        choice_idx = random.choice(refined_candidates)
+                        choice_idx = candidate_idxs[0]
 
                 return choice_idx
 
@@ -786,7 +785,7 @@ def not_empty(x):
         res.output_renamings = { s:s for s in outputs }
         res.valid = True
         res.selfcheck(log.getChild("split_heuristic_full"))
-        
+
         # Estimate performance of final code
         if conf.split_heuristic_estimate_performance:
             conf2 = conf.copy()