diff --git a/libs/libarchfpga/src/arch_check.cpp b/libs/libarchfpga/src/arch_check.cpp
index c8fb00299c4..5360d6e4c02 100644
--- a/libs/libarchfpga/src/arch_check.cpp
+++ b/libs/libarchfpga/src/arch_check.cpp
@@ -32,7 +32,7 @@ bool check_model_clocks(t_model* model, const char* file, uint32_t line) {
 bool check_model_combinational_sinks(const t_model* model, const char* file, uint32_t line) {
     //Outputs should have no combinational sinks
     for (t_model_ports* port = model->outputs; port != nullptr; port = port->next) {
-        if (port->combinational_sink_ports.size() != 0) {
+        if (!port->combinational_sink_ports.empty()) {
             archfpga_throw(file, line,
                            "Model '%s' output port '%s' can not have combinational sink ports",
                            model->name, port->name);
@@ -114,9 +114,9 @@ void check_port_direct_mappings(t_physical_tile_type_ptr physical_tile, t_sub_ti
     }
 
     for (auto pin_map : pin_direct_map) {
-        auto block_port = get_port_by_pin(logical_block, pin_map.first.pin);
+        const t_port* block_port = logical_block->get_port_by_pin(pin_map.first.pin);
 
-        auto sub_tile_port = get_port_by_pin(sub_tile, pin_map.second.pin);
+        const t_physical_tile_port* sub_tile_port = sub_tile->get_port_by_pin(pin_map.second.pin);
 
         VTR_ASSERT(block_port != nullptr);
         VTR_ASSERT(sub_tile_port != nullptr);
diff --git a/libs/libarchfpga/src/arch_util.h b/libs/libarchfpga/src/arch_util.h
index c39cf77b94f..fb251bffe10 100644
--- a/libs/libarchfpga/src/arch_util.h
+++ b/libs/libarchfpga/src/arch_util.h
@@ -23,8 +23,8 @@ class InstPort {
 
     InstPort() = default;
     InstPort(const std::string& str);
-    std::string instance_name() const { return instance_.name; }
-    std::string port_name() const { return port_.name; }
+    const std::string& instance_name() const { return instance_.name; }
+    const std::string& port_name() const { return port_.name; }
 
     int instance_low_index() const { return instance_.low_idx; }
     int instance_high_index() const { return instance_.high_idx; }
@@ -40,7 +40,7 @@ class InstPort {
 
   private:
     struct name_index {
-        std::string name = "";
+        std::string name;
         int low_idx = UNSPECIFIED;
         int high_idx = UNSPECIFIED;
     };
diff --git a/libs/libarchfpga/src/physical_types.cpp b/libs/libarchfpga/src/physical_types.cpp
index 3bdabaee2a7..bdacf50931d 100644
--- a/libs/libarchfpga/src/physical_types.cpp
+++ b/libs/libarchfpga/src/physical_types.cpp
@@ -136,6 +136,56 @@ bool t_physical_tile_type::is_empty() const {
     return name == std::string(EMPTY_BLOCK_NAME);
 }
 
+int t_physical_tile_type::find_pin(std::string_view port_name, int pin_index_in_port) const {
+    int ipin = OPEN;
+    int port_base_ipin = 0;
+    int num_port_pins = OPEN;
+    int pin_offset = 0;
+
+    bool port_found = false;
+    for (const t_sub_tile& sub_tile : sub_tiles) {
+        for (const t_physical_tile_port& port : sub_tile.ports) {
+            if (port_name == port.name) {
+                port_found = true;
+                num_port_pins = port.num_pins;
+                break;
+            }
+
+            port_base_ipin += port.num_pins;
+        }
+
+        if (port_found) {
+            break;
+        }
+
+        port_base_ipin = 0;
+        pin_offset += sub_tile.num_phy_pins;
+    }
+
+    if (num_port_pins != OPEN) {
+        VTR_ASSERT(pin_index_in_port < num_port_pins);
+
+        ipin = port_base_ipin + pin_index_in_port + pin_offset;
+    }
+
+    return ipin;
+}
+
+int t_physical_tile_type::find_pin_class(std::string_view port_name, int pin_index_in_port, e_pin_type pin_type) const {
+    int iclass = OPEN;
+
+    int ipin = find_pin(port_name, pin_index_in_port);
+
+    if (ipin != OPEN) {
+        iclass = pin_class[ipin];
+
+        if (iclass != OPEN) {
+            VTR_ASSERT(class_inf[iclass].type == pin_type);
+        }
+    }
+    return iclass;
+}
+
 /*
  * t_logical_block_type
  */
@@ -144,6 +194,28 @@ bool t_logical_block_type::is_empty() const {
     return name == std::string(EMPTY_BLOCK_NAME);
 }
 
+const t_port* t_logical_block_type::get_port(std::string_view port_name) const {
+    for (int i = 0; i < pb_type->num_ports; i++) {
+        auto port = pb_type->ports[i];
+        if (port_name == port.name) {
+            return &pb_type->ports[port.index];
+        }
+    }
+
+    return nullptr;
+}
+
+const t_port* t_logical_block_type::get_port_by_pin(int pin) const {
+    for (int i = 0; i < pb_type->num_ports; i++) {
+        const t_port& port = pb_type->ports[i];
+        if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) {
+            return &pb_type->ports[port.index];
+        }
+    }
+
+    return nullptr;
+}
+
 /**
  * t_pb_graph_node
  */
@@ -220,7 +292,7 @@ std::string t_pb_graph_pin::to_string(const bool full_description) const {
     return pin_string;
 }
 
-/**
+/*
  * t_pb_graph_edge
  */
 
@@ -253,3 +325,39 @@ bool t_pb_graph_edge::belongs_to_pattern(int pattern_index) const {
     // return false otherwise
     return false;
 }
+
+/*
+ * t_sub_tile
+ */
+
+int t_sub_tile::total_num_internal_pins() const {
+    int num_pins = 0;
+
+    for (t_logical_block_type_ptr eq_site : equivalent_sites) {
+        num_pins += (int)eq_site->pin_logical_num_to_pb_pin_mapping.size();
+    }
+
+    num_pins *= capacity.total();
+
+    return num_pins;
+}
+
+const t_physical_tile_port* t_sub_tile::get_port(std::string_view port_name) {
+    for (const t_physical_tile_port& port : ports) {
+        if (port_name == port.name) {
+            return &ports[port.index];
+        }
+    }
+
+    return nullptr;
+}
+
+const t_physical_tile_port* t_sub_tile::get_port_by_pin(int pin) const {
+    for (const t_physical_tile_port& port : ports) {
+        if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) {
+            return &ports[port.index];
+        }
+    }
+
+    return nullptr;
+}
\ No newline at end of file
diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index 4d415697554..c11f1c451ee 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -24,8 +24,7 @@
  * Authors: Jason Luu and Kenneth Kent
  */
 
-#ifndef PHYSICAL_TYPES_H
-#define PHYSICAL_TYPES_H
+#pragma once
 
 #include <functional>
 #include <utility>
@@ -704,11 +703,7 @@ struct t_physical_tile_type {
      * tile_block_pin_directs_map[logical block index][logical block pin] -> physical tile pin */
     std::unordered_map<int, std::unordered_map<int, vtr::bimap<t_logical_pin, t_physical_pin>>> tile_block_pin_directs_map;
 
-    /* Returns the indices of pins that contain a clock for this physical logic block */
-    std::vector<int> get_clock_pins_indices() const;
 
-    // Returns the sub tile location of the physical tile given an input pin
-    int get_sub_tile_loc_from_pin(int pin_num) const;
 
     // TODO: Remove is_input_type / is_output_type as part of
     // https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1193
@@ -719,8 +714,21 @@ struct t_physical_tile_type {
     // Does this t_physical_tile_type contain an outpad?
     bool is_output_type = false;
 
-    // Is this t_physical_tile_type an empty type?
+  public:   // Function members
+    ///@brief Returns the indices of pins that contain a clock for this physical logic block
+    std::vector<int> get_clock_pins_indices() const;
+
+    ///@brief Returns the sub tile location of the physical tile given an input pin
+    int get_sub_tile_loc_from_pin(int pin_num) const;
+
+    ///@brief Is this t_physical_tile_type an empty type?
     bool is_empty() const;
+
+    ///@brief Returns the relative pin index within a sub tile that corresponds to the pin within the given port and its index in the port
+    int find_pin(std::string_view port_name, int pin_index_in_port) const;
+
+    ///@brief Returns the pin class associated with the specified pin_index_in_port within the port port_name on type
+    int find_pin_class(std::string_view port_name, int pin_index_in_port, e_pin_type pin_type) const;
 };
 
 /* Holds the capacity range of a certain sub_tile block within the parent physical tile type.
@@ -796,6 +804,19 @@ struct t_sub_tile {
     int num_phy_pins = 0;
 
     int index = -1;
+
+  public:
+    int total_num_internal_pins() const;
+
+    /**
+     * @brief Returns the physical tile port given the port name and the corresponding sub tile
+     */
+    const t_physical_tile_port* get_port(std::string_view port_name);
+
+    /**
+     * @brief Returns the physical tile port given the pin name and the corresponding sub tile
+     */
+    const t_physical_tile_port* get_port_by_pin(int pin) const;
 };
 
 /** A logical pin defines the pin index of a logical block type (i.e. a top level PB type)
@@ -950,6 +971,17 @@ struct t_logical_block_type {
 
     // Is this t_logical_block_type empty?
     bool is_empty() const;
+
+  public:
+    /**
+     * @brief Returns the logical block port given the port name and the corresponding logical block type
+     */
+    const t_port* get_port(std::string_view port_name) const;
+
+    /**
+     * @brief Returns the logical block port given the pin name and the corresponding logical block type
+     */
+    const t_port* get_port_by_pin(int pin) const;
 };
 
 /*************************************************************************************************
@@ -2124,5 +2156,3 @@ struct t_arch {
     /// Stores NoC-related architectural information when there is an embedded NoC
     t_noc_inf* noc = nullptr;
 };
-
-#endif
diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp
index 2256f81d66c..2ecc7fbd41c 100644
--- a/libs/libarchfpga/src/physical_types_util.cpp
+++ b/libs/libarchfpga/src/physical_types_util.cpp
@@ -154,7 +154,7 @@ static std::tuple<int, int, int, int, int> get_pin_index_for_inst(t_physical_til
         pin_inst_num = (pin_physical_num - pin_offset) % pins_per_inst;
     } else {
         int pin_offset = get_sub_tile_inst_physical_pin_num_offset(type, sub_tile, sub_tile_cap);
-        int pins_per_inst = get_total_num_sub_tile_internal_pins(sub_tile) / sub_tile->capacity.total();
+        int pins_per_inst = sub_tile->total_num_internal_pins() / sub_tile->capacity.total();
         pin_inst_num = (pin_physical_num - pin_offset) % pins_per_inst;
     }
 
@@ -225,7 +225,7 @@ static int get_sub_tile_physical_pin_num_offset(t_physical_tile_type_ptr physica
         if (&tmp_sub_tile == curr_sub_tile)
             break;
         else
-            offset += get_total_num_sub_tile_internal_pins(&tmp_sub_tile);
+            offset += tmp_sub_tile.total_num_internal_pins();
     }
 
     return offset;
@@ -235,7 +235,7 @@ static int get_sub_tile_inst_physical_pin_num_offset(t_physical_tile_type_ptr ph
                                                      const t_sub_tile* curr_sub_tile,
                                                      const int curr_relative_cap) {
     int offset = get_sub_tile_physical_pin_num_offset(physical_tile, curr_sub_tile);
-    int sub_tile_inst_num_pins = get_total_num_sub_tile_internal_pins(curr_sub_tile) / curr_sub_tile->capacity.total();
+    int sub_tile_inst_num_pins = curr_sub_tile->total_num_internal_pins() / curr_sub_tile->capacity.total();
 
     offset += (curr_relative_cap * sub_tile_inst_num_pins);
 
@@ -563,57 +563,6 @@ int get_max_num_pins(t_logical_block_type_ptr logical_block) {
     return max_num_pins;
 }
 
-//Returns the pin class associated with the specified pin_index_in_port within the port port_name on type
-int find_pin_class(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port, e_pin_type pin_type) {
-    int iclass = OPEN;
-
-    int ipin = find_pin(type, port_name, pin_index_in_port);
-
-    if (ipin != OPEN) {
-        iclass = type->pin_class[ipin];
-
-        if (iclass != OPEN) {
-            VTR_ASSERT(type->class_inf[iclass].type == pin_type);
-        }
-    }
-    return iclass;
-}
-
-int find_pin(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port) {
-    int ipin = OPEN;
-    int port_base_ipin = 0;
-    int num_pins = OPEN;
-    int pin_offset = 0;
-
-    bool port_found = false;
-    for (const auto& sub_tile : type->sub_tiles) {
-        for (const auto& port : sub_tile.ports) {
-            if (0 == strcmp(port.name, port_name.c_str())) {
-                port_found = true;
-                num_pins = port.num_pins;
-                break;
-            }
-
-            port_base_ipin += port.num_pins;
-        }
-
-        if (port_found) {
-            break;
-        }
-
-        port_base_ipin = 0;
-        pin_offset += sub_tile.num_phy_pins;
-    }
-
-    if (num_pins != OPEN) {
-        VTR_ASSERT(pin_index_in_port < num_pins);
-
-        ipin = port_base_ipin + pin_index_in_port + pin_offset;
-    }
-
-    return ipin;
-}
-
 std::pair<int, int> get_capacity_location_from_physical_pin(t_physical_tile_type_ptr physical_tile, int pin) {
     int pins_to_remove = 0;
     for (const auto& sub_tile : physical_tile->sub_tiles) {
@@ -638,7 +587,7 @@ std::pair<int, int> get_capacity_location_from_physical_pin(t_physical_tile_type
 
 int get_physical_pin_from_capacity_location(t_physical_tile_type_ptr physical_tile, int relative_pin, int capacity_location) {
     int pins_to_add = 0;
-    for (auto sub_tile : physical_tile->sub_tiles) {
+    for (const t_sub_tile& sub_tile : physical_tile->sub_tiles) {
         auto capacity = sub_tile.capacity;
         int rel_capacity = capacity_location - capacity.low;
         int num_inst_pins = sub_tile.num_phy_pins / capacity.total();
@@ -841,52 +790,6 @@ std::vector<std::string> block_type_class_index_to_pin_names(t_physical_tile_typ
     return pin_names;
 }
 
-const t_physical_tile_port* get_port_by_name(t_sub_tile* sub_tile, const char* port_name) {
-    for (auto port : sub_tile->ports) {
-        if (0 == strcmp(port.name, port_name)) {
-            return &sub_tile->ports[port.index];
-        }
-    }
-
-    return nullptr;
-}
-
-const t_port* get_port_by_name(t_logical_block_type_ptr type, const char* port_name) {
-    auto pb_type = type->pb_type;
-
-    for (int i = 0; i < pb_type->num_ports; i++) {
-        auto port = pb_type->ports[i];
-        if (0 == strcmp(port.name, port_name)) {
-            return &pb_type->ports[port.index];
-        }
-    }
-
-    return nullptr;
-}
-
-const t_physical_tile_port* get_port_by_pin(const t_sub_tile* sub_tile, int pin) {
-    for (auto port : sub_tile->ports) {
-        if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) {
-            return &sub_tile->ports[port.index];
-        }
-    }
-
-    return nullptr;
-}
-
-const t_port* get_port_by_pin(t_logical_block_type_ptr type, int pin) {
-    auto pb_type = type->pb_type;
-
-    for (int i = 0; i < pb_type->num_ports; i++) {
-        auto port = pb_type->ports[i];
-        if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) {
-            return &pb_type->ports[port.index];
-        }
-    }
-
-    return nullptr;
-}
-
 /* Access information related to pin classes */
 
 /** get information given class physical num **/
@@ -1009,7 +912,7 @@ std::tuple<const t_sub_tile*, int> get_sub_tile_from_pin_physical_num(t_physical
     int pin_offset = total_pin_counts;
 
     for (auto& sub_tile : physical_tile->sub_tiles) {
-        int sub_tile_num_pins = pin_on_tile ? sub_tile.num_phy_pins : get_total_num_sub_tile_internal_pins(&sub_tile);
+        int sub_tile_num_pins = pin_on_tile ? sub_tile.num_phy_pins : sub_tile.total_num_internal_pins();
         total_pin_counts += sub_tile_num_pins;
 
         if (physical_num < total_pin_counts) {
@@ -1347,15 +1250,6 @@ const t_pb_graph_node* get_pb_graph_node_from_pin_physical_num(t_physical_tile_t
     return pb_graph_pin->parent_node;
 }
 
-int get_total_num_sub_tile_internal_pins(const t_sub_tile* sub_tile) {
-    int num_pins = 0;
-    for (auto eq_site : sub_tile->equivalent_sites) {
-        num_pins += (int)eq_site->pin_logical_num_to_pb_pin_mapping.size();
-    }
-    num_pins *= sub_tile->capacity.total();
-    return num_pins;
-}
-
 int get_tile_pin_max_ptc(t_physical_tile_type_ptr tile, bool is_flat) {
     if (is_flat) {
         return tile->num_pins + (int)tile->pin_num_to_pb_pin.size();
@@ -1538,4 +1432,3 @@ std::map<int, int> get_sink_choking_points(t_physical_tile_type_ptr physical_til
 
     return choking_point;
 }
-/* */
diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h
index aa7b2617834..a081683faeb 100644
--- a/libs/libarchfpga/src/physical_types_util.h
+++ b/libs/libarchfpga/src/physical_types_util.h
@@ -1,5 +1,5 @@
-#ifndef PHYSICAL_TYPES_UTIL_H
-#define PHYSICAL_TYPES_UTIL_H
+
+#pragma once
 
 #include "physical_types.h"
 
@@ -13,11 +13,11 @@
  *  functions in this file are the following:                       *
  *    - physical_tile_type: identifies a placeable tile within      *
  *                          the device grid.                        *
- *    - logical_block_tpye: identifies a clustered block type       *
+ *    - logical_block_type: identifies a clustered block type       *
  *                          within the clb_netlist                  *
  *                                                                  *
  *  All the following utilities are intended to ease the            *
- *  developement to access the above mentioned classes and perform  *
+ *  development to access the above mentioned classes and perform   *
  *  some required operations with their data.                       *
  *                                                                  *
  *  Please classify such functions in this file                     *
@@ -107,7 +107,7 @@
  *
  * For instance, the following information are required:
  *   - mapping between logical and sub tile pins.
- *   - mapping between sub tile pins and absoulte physical pin
+ *   - mapping between sub tile pins and absolute physical pin
  *   - capacity instance of the sub tile
  *
  * With all the above information we can calculate correctly the connection between the CLK (logical pin)
@@ -152,12 +152,12 @@ int get_physical_pin_from_capacity_location(t_physical_tile_type_ptr physical_ti
  *
  * Take the above CLOCK TILE example:
  *   - given the CLOCK TILE and the index corresponding to the CLK_1 pin, we want the relative pin
- *     of one of its sub tiles at a particualr capacity location (i.e. sub tile instance).
+ *     of one of its sub tiles at a particular capacity location (i.e. sub tile instance).
  *
  * std::tie(absolute_capacity, relative_pin) = get_capacity_location_from_physical_pin(clock_tile, 3)
  *
  * The value returned is (1, 0), where:
- *   - 1 corresponds to the capacity location (sub tile instance) where the absoulte physical pin index (CLK_1) is connected
+ *   - 1 corresponds to the capacity location (sub tile instance) where the absolute physical pin index (CLK_1) is connected
  *   - 0 corresponds to the relative pin index within the BUFGCTRL sub tile
  */
 std::pair<int, int> get_capacity_location_from_physical_pin(t_physical_tile_type_ptr physical_tile, int pin);
@@ -173,11 +173,6 @@ std::vector<std::string> block_type_class_index_to_pin_names(t_physical_tile_typ
 ///@brief Returns the physical tile type matching a given physical tile type name, or nullptr (if not found)
 t_physical_tile_type_ptr find_tile_type_by_name(const std::string& name, const std::vector<t_physical_tile_type>& types);
 
-int find_pin_class(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port, e_pin_type pin_type);
-
-///@brief Returns the relative pin index within a sub tile that corresponds to the pin within the given port and its index in the port
-int find_pin(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port);
-
 ///@brief Returns the maximum number of pins within a logical block
 int get_max_num_pins(t_logical_block_type_ptr logical_block);
 
@@ -217,7 +212,7 @@ int get_logical_block_physical_sub_tile_index(t_physical_tile_type_ptr physical_
                                               t_logical_block_type_ptr logical_block);
 /**
  * @brief Returns the physical pin index (within 'physical_tile') corresponding to the
- * logical index ('pin' of the first instance of 'logical_block' within the physcial tile.
+ * logical index ('pin' of the first instance of 'logical_block' within the physical tile.
  *
  * This function is called before/during placement, when a sub tile index was not yet assigned.
  *
@@ -228,7 +223,7 @@ int get_physical_pin(t_physical_tile_type_ptr physical_tile,
                      int pin);
 /**
  * @brief Returns the physical pin index (within 'physical_tile') corresponding to the
- * logical index ('pin' of the first instance of 'logical_block' within the physcial tile.
+ * logical index ('pin' of the first instance of 'logical_block' within the physical tile.
  * This function considers if a given offset is in the range of sub tile capacity
  *
  *   (First pin index at current sub-tile)                                     (The wanted pin index)
@@ -286,26 +281,6 @@ int get_sub_tile_physical_pin(int sub_tile_index,
  */
 t_physical_tile_port find_tile_port_by_name(t_physical_tile_type_ptr type, std::string_view port_name);
 
-/**
- * @brief Returns the physical tile port given the port name and the corresponding sub tile
- */
-const t_physical_tile_port* get_port_by_name(t_sub_tile* sub_tile, const char* port_name);
-
-/**
- * @brief Returns the logical block port given the port name and the corresponding logical block type
- */
-const t_port* get_port_by_name(t_logical_block_type_ptr type, const char* port_name);
-
-/**
- * @brief Returns the physical tile port given the pin name and the corresponding sub tile
- */
-const t_physical_tile_port* get_port_by_pin(const t_sub_tile* sub_tile, int pin);
-
-/**
- * @brief Returns the logical block port given the pin name and the corresponding logical block type
- */
-const t_port* get_port_by_pin(t_logical_block_type_ptr type, int pin);
-
 /************************************ Access to intra-block resources ************************************/
 
 /* Access information related to pin classes */
@@ -336,12 +311,6 @@ inline bool is_class_on_tile(t_physical_tile_type_ptr physical_tile, int class_p
 
 /**
  * @brief Classes are indexed in a way that the number of classes on the same pb_graph_node is continuous
- * @param physical_tile
- * @param sub_tile
- * @param logical_block
- * @param sub_tile_relative_cap
- * @param pb_graph_node
- * @return
  */
 t_class_range get_pb_graph_node_class_physical_range(t_physical_tile_type_ptr physical_tile,
                                                      const t_sub_tile* sub_tile,
@@ -358,15 +327,11 @@ std::vector<int> get_tile_root_classes(t_physical_tile_type_ptr physical_type);
 
 /**
  * Get the number of all classes, on the tile and inside the cluster.
- * @param physical_type
- * @return
  */
 t_class_range get_flat_tile_primitive_classes(t_physical_tile_type_ptr physical_type);
 /** **/
 int get_tile_class_max_ptc(t_physical_tile_type_ptr tile, bool is_flat);
 
-/*  */
-
 /* Access information related to pins */
 
 /** get information given pin physical number **/
@@ -434,8 +399,6 @@ int get_edge_sw_arch_idx(t_physical_tile_type_ptr physical_tile,
 const t_pb_graph_node* get_pb_graph_node_from_pin_physical_num(t_physical_tile_type_ptr physical_type,
                                                                int pin_physical_num);
 
-int get_total_num_sub_tile_internal_pins(const t_sub_tile* sub_tile);
-
 int get_tile_pin_max_ptc(t_physical_tile_type_ptr tile, bool is_flat);
 
 int get_tile_num_internal_pin(t_physical_tile_type_ptr tile);
@@ -459,11 +422,6 @@ float get_pin_primitive_comb_delay(t_physical_tile_type_ptr physical_type,
 
 /**
  * @brief This function is used during reachability analysis to check whether two classes should be put in the same group
- * @param physical_tile
- * @param first_class_ptc_num
- * @param second_class_ptc_num
- * @param is_flat
- * @return
  */
 bool classes_in_same_block(t_physical_tile_type_ptr physical_tile,
                            int first_class_ptc_num,
@@ -473,15 +431,8 @@ bool classes_in_same_block(t_physical_tile_type_ptr physical_tile,
 /**
  * @brief Given the sink group, identify the pins which can reach both sink_ptc_num and at least one of the sinks,
  * in the grp.
- * @param physical_tile
- * @param sink_ptc_num
- * @param grp
  * @return Key is the pin number and value is the number of sinks, including sink_ptc_num, in the grp reachable by the pin
  */
 std::map<int, int> get_sink_choking_points(t_physical_tile_type_ptr physical_tile,
                                            int sink_ptc_num,
                                            const std::vector<int>& grp);
-
-/* */
-
-#endif
diff --git a/libs/libarchfpga/src/read_xml_arch_file.cpp b/libs/libarchfpga/src/read_xml_arch_file.cpp
index 3950eb1b15b..46cde415630 100644
--- a/libs/libarchfpga/src/read_xml_arch_file.cpp
+++ b/libs/libarchfpga/src/read_xml_arch_file.cpp
@@ -774,7 +774,7 @@ static std::pair<int, int> ProcessPinString(pugi::xml_node Locations,
                        "No port name is present: %s\n", pin_loc_string);
     }
 
-    auto port = get_port_by_name(type, token.data);
+    auto port = type->get_port(token.data);
     if (port == nullptr) {
         archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
                        "Port %s for %s could not be found: %s\n",
diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp
index debd89c8bd6..61b4bb644a3 100644
--- a/utils/route_diag/src/main.cpp
+++ b/utils/route_diag/src/main.cpp
@@ -9,13 +9,10 @@
 // Tool can either perform one route between a source (--source_rr_node) and
 // a sink (--sink_rr_node), or profile a source to all tiles (set
 // --source_rr_node and "--profile_source true").
-#include <cstdio>
-#include <cstring>
-#include <ctime>
+
 #include <fstream>
 
 #include "vtr_error.h"
-#include "vtr_memory.h"
 #include "vtr_log.h"
 #include "vtr_time.h"
 
@@ -28,16 +25,13 @@
 #include "globals.h"
 
 #include "net_delay.h"
-#include "RoutingDelayCalculator.h"
 #include "place_and_route.h"
 #include "router_delay_profiling.h"
 #include "route_tree.h"
 #include "route_common.h"
 #include "route_net.h"
-#include "route_export.h"
 #include "rr_graph.h"
-#include "rr_graph2.h"
-#include "timing_place_lookup.h"
+#include "compute_delta_delays_utils.h"
 
 struct t_route_util_options {
     /* Router diag tool Options */
@@ -238,36 +232,6 @@ static void profile_source(const Netlist<>& net_list,
     VTR_LOG("\n");
 }
 
-static t_chan_width setup_chan_width(t_router_opts router_opts,
-        t_chan_width_dist chan_width_dist) {
-    /*we give plenty of tracks, this increases routability for the */
-    /*lookup table generation */
-
-    t_graph_type graph_directionality;
-    int width_fac;
-
-    if (router_opts.fixed_channel_width == NO_FIXED_CHANNEL_WIDTH) {
-        auto& device_ctx = g_vpr_ctx.device();
-
-        auto type = find_most_common_tile_type(device_ctx.grid);
-
-        width_fac = 4 * type->num_pins;
-        /*this is 2x the value that binary search starts */
-        /*this should be enough to allow most pins to   */
-        /*connect to tracks in the architecture */
-    } else {
-        width_fac = router_opts.fixed_channel_width;
-    }
-
-    if (router_opts.route_type == GLOBAL) {
-        graph_directionality = GRAPH_BIDIR;
-    } else {
-        graph_directionality = GRAPH_UNIDIR;
-    }
-
-    return init_chan(width_fac, chan_width_dist, graph_directionality);
-}
-
 t_route_util_options read_route_util_options(int argc, const char** argv) {
     //Explicitly initialize for zero initialization
     t_route_util_options args = t_route_util_options();
@@ -323,17 +287,15 @@ int main(int argc, const char **argv) {
         const Netlist<>& net_list = is_flat ? (const Netlist<>&)g_vpr_ctx.atom().nlist :
                                             (const Netlist<>&)g_vpr_ctx.clustering().clb_nlist;
 
-        t_chan_width chan_width = setup_chan_width(
-                vpr_setup.RouterOpts,
-                Arch.Chans);
+        t_chan_width chan_width = setup_chan_width(vpr_setup.RouterOpts,
+                                                   Arch.Chans);
 
-        alloc_routing_structs(
-            chan_width,
-            vpr_setup.RouterOpts,
-            &vpr_setup.RoutingArch,
-            vpr_setup.Segments,
-            Arch.directs,
-            is_flat);
+        alloc_routing_structs(chan_width,
+                              vpr_setup.RouterOpts,
+                              &vpr_setup.RoutingArch,
+                              vpr_setup.Segments,
+                              Arch.directs,
+                              is_flat);
 
         if(route_options.profile_source) {
             profile_source(net_list,
diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp
index ba7e20ccd80..7074d34662a 100644
--- a/vpr/src/base/place_and_route.cpp
+++ b/vpr/src/base/place_and_route.cpp
@@ -1,14 +1,9 @@
-#include <sys/types.h>
 
 #include <cstdio>
-#include <ctime>
-#include <climits>
 #include <cstdlib>
 #include <cmath>
 #include <algorithm>
 
-#include "vtr_util.h"
-#include "vtr_memory.h"
 #include "vtr_assert.h"
 #include "vtr_log.h"
 
@@ -16,7 +11,6 @@
 #include "vpr_utils.h"
 #include "vpr_error.h"
 #include "globals.h"
-#include "atom_netlist.h"
 #include "place_and_route.h"
 #include "place.h"
 #include "read_place.h"
@@ -24,21 +18,11 @@
 #include "route.h"
 #include "route_export.h"
 #include "draw.h"
-#include "stats.h"
-#include "check_route.h"
 #include "rr_graph.h"
-#include "net_delay.h"
-#include "timing_place.h"
 #include "read_xml_arch_file.h"
-#include "echo_files.h"
 #include "route_common.h"
-#include "place_macro.h"
-#include "power.h"
-#include "place_util.h"
 
 #include "RoutingDelayCalculator.h"
-#include "timing_info.h"
-#include "tatum/echo_writer.hpp"
 
 /******************* Subroutines local to this module ************************/
 
@@ -415,6 +399,36 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
     return (final);
 }
 
+t_chan_width setup_chan_width(const t_router_opts& router_opts,
+                              t_chan_width_dist chan_width_dist) {
+    /*we give plenty of tracks, this increases routability for the */
+    /*lookup table generation */
+
+    t_graph_type graph_directionality;
+    int width_fac;
+
+    if (router_opts.fixed_channel_width == NO_FIXED_CHANNEL_WIDTH) {
+        auto& device_ctx = g_vpr_ctx.device();
+
+        auto type = find_most_common_tile_type(device_ctx.grid);
+
+        width_fac = 4 * type->num_pins;
+        /*this is 2x the value that binary search starts */
+        /*this should be enough to allow most pins to   */
+        /*connect to tracks in the architecture */
+    } else {
+        width_fac = router_opts.fixed_channel_width;
+    }
+
+    if (router_opts.route_type == GLOBAL) {
+        graph_directionality = GRAPH_BIDIR;
+    } else {
+        graph_directionality = GRAPH_UNIDIR;
+    }
+
+    return init_chan(width_fac, chan_width_dist, graph_directionality);
+}
+
 /**
  * @brief Assigns widths to channels (in tracks).
  *
diff --git a/vpr/src/base/place_and_route.h b/vpr/src/base/place_and_route.h
index 6f191c0ff9e..538996548f2 100644
--- a/vpr/src/base/place_and_route.h
+++ b/vpr/src/base/place_and_route.h
@@ -2,11 +2,9 @@
 #define VPR_PLACE_AND_ROUTE_H
 
 #define INFINITE -1
-#define NOT_FOUND 0
 
 #define WNEED 1
 #define WL 2
-#define PROC_TIME 3
 
 #include "vpr_types.h"
 #include "timing_info.h"
@@ -18,7 +16,6 @@ struct t_fmap_cell {
     int fc;         ///<at this fc
     int wneed;      ///<need wneed to route
     int wirelength; ///<corresponding wirelength of successful routing at wneed
-    int proc_time;
     t_fmap_cell* next;
 };
 
@@ -39,6 +36,9 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
                                   const std::shared_ptr<RoutingDelayCalculator>& delay_calc,
                                   bool is_flat);
 
+t_chan_width setup_chan_width(const t_router_opts& router_opts,
+                              t_chan_width_dist chan_width_dist);
+
 t_chan_width init_chan(int cfactor,
                        const t_chan_width_dist& chan_width_dist,
                        t_graph_type graph_directionality);
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 78124dd85c3..145601ac66f 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2295,7 +2295,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_timing_grp.add_argument(args.post_place_timing_report_file, "--post_place_timing_report")
-        .help("Name of the post-placement timing report file (not generated if unspecfied)")
+        .help("Name of the post-placement timing report file (not generated if unspecified)")
         .default_value("")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
diff --git a/vpr/src/base/read_route.cpp b/vpr/src/base/read_route.cpp
index d2d3bc14d54..6ac9d099c4b 100644
--- a/vpr/src/base/read_route.cpp
+++ b/vpr/src/base/read_route.cpp
@@ -39,12 +39,12 @@
 #include "vpr_utils.h"
 #include "vpr_error.h"
 #include "place_and_route.h"
-#include "timing_place.h"
 #include "route_export.h"
 #include "echo_files.h"
 #include "route_common.h"
 #include "route_tree.h"
 #include "read_route.h"
+#include "d_ary_heap.h"
 
 #include "old_traceback.h"
 
@@ -212,7 +212,6 @@ static void process_nets(const Netlist<>& net_list, std::ifstream& fp, ClusterNe
         process_nodes(net_list, fp, inet, filename, lineno);
     }
     input_tokens.clear();
-    return;
 }
 
 static void process_nodes(const Netlist<>& net_list, std::ifstream& fp, ClusterNetId inet, const char* filename, int& lineno) {
diff --git a/vpr/src/noc/noc_routing_algorithm_creator.h b/vpr/src/noc/noc_routing_algorithm_creator.h
index 8cb9b777949..4c33d13f590 100644
--- a/vpr/src/noc/noc_routing_algorithm_creator.h
+++ b/vpr/src/noc/noc_routing_algorithm_creator.h
@@ -8,9 +8,10 @@
  * 
  * Overview
  * ========
- * There are a number of different available NoC routing algorithms. This class is a factory object for the NocRouting abstract class. This class constructs 
- * the appropriate routing algorithm based on the user specification in the
- * command line. The user identifies a 
+ * There are a number of different available NoC routing algorithms.
+ * This class is a factory object for the NocRouting abstract class.
+ * This class constructs the appropriate routing algorithm based on
+ * the user specification in the command line. The user identifies a
  * specific routing algorithm in the command line by providing a string
  * (which is the name of routing algorithm).
  * Then the corresponding routing algorithm is created here based on the 
diff --git a/vpr/src/place/analytic_placer.h b/vpr/src/place/analytic_placer.h
index b73b3486f57..b279b82e058 100644
--- a/vpr/src/place/analytic_placer.h
+++ b/vpr/src/place/analytic_placer.h
@@ -83,7 +83,6 @@
  */
 
 #    include "vpr_context.h"
-#    include "timing_place.h"
 #    include "PlacementDelayCalculator.h"
 
 /*
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index b18f60b27bd..e6e0ffc85dd 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -16,6 +16,8 @@
 #include "read_place.h"
 #include "placer_breakpoint.h"
 #include "RL_agent_util.h"
+#include "PlacerSetupSlacks.h"
+#include "PlacerCriticalities.h"
 
 /**************************************************************************/
 /*************** Static Function Declarations *****************************/
@@ -488,7 +490,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
             criticalities_->disable_update();
             setup_slacks_->enable_update();
             update_timing_classes(crit_params, timing_info_, criticalities_,
-                                  setup_slacks_, pin_timing_invalidator_, placer_state_);
+                                  setup_slacks_, pin_timing_invalidator_);
 
             /* Get the setup slack analysis cost */
             //TODO: calculate a weighted average of the slack cost and wiring cost
@@ -592,7 +594,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
 
                 // Revert the timing update
                 update_timing_classes(crit_params, timing_info_, criticalities_,
-                                      setup_slacks_, pin_timing_invalidator_, placer_state_);
+                                      setup_slacks_, pin_timing_invalidator_);
 
                 VTR_ASSERT_SAFE_MSG(
                     verify_connection_setup_slacks(setup_slacks_, placer_state_),
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index fd9b0dbd928..f788aea666d 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -17,6 +17,7 @@ enum class e_agent_state;
 
 class NocCostHandler;
 class NetPinTimingInvalidator;
+class PlacerSetupSlacks;
 
 /**
  * These variables keep track of the number of swaps
diff --git a/vpr/src/place/move_generators/centroid_move_generator.cpp b/vpr/src/place/move_generators/centroid_move_generator.cpp
index 45ba9121719..767fbf2ce7e 100644
--- a/vpr/src/place/move_generators/centroid_move_generator.cpp
+++ b/vpr/src/place/move_generators/centroid_move_generator.cpp
@@ -44,6 +44,7 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block
     ClusterBlockId b_from = propose_block_to_move(placer_opts,
                                                   proposed_action.logical_blk_type_index,
                                                   /*highly_crit_block=*/false,
+                                                  /*placer_criticalities=*/nullptr,
                                                   /*net_from=*/nullptr,
                                                   /*pin_from=*/nullptr,
                                                   placer_state,
diff --git a/vpr/src/place/move_generators/critical_uniform_move_generator.cpp b/vpr/src/place/move_generators/critical_uniform_move_generator.cpp
index 7a1d39ed308..7d36889c2f6 100644
--- a/vpr/src/place/move_generators/critical_uniform_move_generator.cpp
+++ b/vpr/src/place/move_generators/critical_uniform_move_generator.cpp
@@ -1,4 +1,6 @@
+
 #include "critical_uniform_move_generator.h"
+
 #include "globals.h"
 #include "place_constraints.h"
 #include "placer_state.h"
@@ -13,8 +15,8 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved
                                                          t_propose_action& proposed_action,
                                                          float rlim,
                                                          const t_placer_opts& placer_opts,
-                                                         const PlacerCriticalities* /*criticalities*/) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
+                                                         const PlacerCriticalities* criticalities) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& placer_state = placer_state_.get();
     const auto& block_locs = placer_state.block_locs();
     const auto& blk_loc_registry = placer_state.blk_loc_registry();
@@ -25,6 +27,7 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved
     ClusterBlockId b_from = propose_block_to_move(placer_opts,
                                                   proposed_action.logical_blk_type_index,
                                                   /*highly_crit_block=*/true,
+                                                  criticalities,
                                                   &net_from,
                                                   &pin_from,
                                                   placer_state,
diff --git a/vpr/src/place/move_generators/critical_uniform_move_generator.h b/vpr/src/place/move_generators/critical_uniform_move_generator.h
index dd4e5391474..68358552668 100644
--- a/vpr/src/place/move_generators/critical_uniform_move_generator.h
+++ b/vpr/src/place/move_generators/critical_uniform_move_generator.h
@@ -1,7 +1,6 @@
 #ifndef VPR_CRITICAL_UNIFORM_MOVE_GEN_H
 #define VPR_CRITICAL_UNIFORM_MOVE_GEN_H
 #include "move_generator.h"
-#include "timing_place.h"
 
 /**
  * @file 
diff --git a/vpr/src/place/move_generators/feasible_region_move_generator.cpp b/vpr/src/place/move_generators/feasible_region_move_generator.cpp
index 75210dafd43..1c719a7b0ff 100644
--- a/vpr/src/place/move_generators/feasible_region_move_generator.cpp
+++ b/vpr/src/place/move_generators/feasible_region_move_generator.cpp
@@ -30,6 +30,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     ClusterBlockId b_from = propose_block_to_move(placer_opts,
                                                   proposed_action.logical_blk_type_index,
                                                   /*highly_crit_block=*/true,
+                                                  criticalities,
                                                   &net_from,
                                                   &pin_from,
                                                   placer_state,
diff --git a/vpr/src/place/move_generators/feasible_region_move_generator.h b/vpr/src/place/move_generators/feasible_region_move_generator.h
index 702f8bdd26c..75304a60fd6 100644
--- a/vpr/src/place/move_generators/feasible_region_move_generator.h
+++ b/vpr/src/place/move_generators/feasible_region_move_generator.h
@@ -1,10 +1,9 @@
 #ifndef VPR_FEASIBLE_REGION_MOVE_GEN_H
 #define VPR_FEASIBLE_REGION_MOVE_GEN_H
 #include "move_generator.h"
-#include "timing_place.h"
 
 /**
- * @brief Feasible Reion (FR) move genrator
+ * @brief Feasible Region (FR) move generator
  *
  * This move was originally defined by Chen et al . in "Simultaneous timing-driven placement and duplication", FPGA 2005
  *
diff --git a/vpr/src/place/move_generators/median_move_generator.cpp b/vpr/src/place/move_generators/median_move_generator.cpp
index 2e982ac6425..99c1b892e17 100644
--- a/vpr/src/place/move_generators/median_move_generator.cpp
+++ b/vpr/src/place/move_generators/median_move_generator.cpp
@@ -28,6 +28,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     ClusterBlockId b_from = propose_block_to_move(placer_opts,
                                                   proposed_action.logical_blk_type_index,
                                                   /*highly_crit_block=*/false,
+                                                  /*placer_criticalities=*/nullptr,
                                                   /*net_from=*/nullptr,
                                                   /*pin_from=*/nullptr,
                                                   placer_state,
diff --git a/vpr/src/place/move_generators/move_generator.h b/vpr/src/place/move_generators/move_generator.h
index e39493e16c6..5ca0b4ce1f5 100644
--- a/vpr/src/place/move_generators/move_generator.h
+++ b/vpr/src/place/move_generators/move_generator.h
@@ -3,7 +3,7 @@
 
 #include "vpr_types.h"
 #include "move_utils.h"
-#include "timing_place.h"
+#include "PlacerCriticalities.h"
 
 #include <limits>
 
diff --git a/vpr/src/place/move_generators/uniform_move_generator.cpp b/vpr/src/place/move_generators/uniform_move_generator.cpp
index 6c6e283ba94..7190918aba3 100644
--- a/vpr/src/place/move_generators/uniform_move_generator.cpp
+++ b/vpr/src/place/move_generators/uniform_move_generator.cpp
@@ -24,6 +24,7 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks
     ClusterBlockId b_from = propose_block_to_move(placer_opts,
                                                   proposed_action.logical_blk_type_index,
                                                   /*highly_crit_block=*/false,
+                                                  /*placer_criticalities=*/nullptr,
                                                   /*net_from=*/nullptr,
                                                   /*pin_from=*/nullptr,
                                                   placer_state,
diff --git a/vpr/src/place/move_generators/weighted_median_move_generator.cpp b/vpr/src/place/move_generators/weighted_median_move_generator.cpp
index b391509f5c3..de949d37a75 100644
--- a/vpr/src/place/move_generators/weighted_median_move_generator.cpp
+++ b/vpr/src/place/move_generators/weighted_median_move_generator.cpp
@@ -30,6 +30,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     ClusterBlockId b_from = propose_block_to_move(placer_opts,
                                                   proposed_action.logical_blk_type_index,
                                                   /*highly_crit_block=*/false,
+                                                  /*placer_criticalities=*/nullptr,
                                                   /*net_from=*/nullptr,
                                                   /*pin_from=*/nullptr,
                                                   placer_state,
diff --git a/vpr/src/place/move_generators/weighted_median_move_generator.h b/vpr/src/place/move_generators/weighted_median_move_generator.h
index a6041f13e87..7da4be46bf6 100644
--- a/vpr/src/place/move_generators/weighted_median_move_generator.h
+++ b/vpr/src/place/move_generators/weighted_median_move_generator.h
@@ -2,7 +2,6 @@
 #define VPR_WEIGHTED_MEDIAN_MOVE_GEN_H
 
 #include "move_generator.h"
-#include "timing_place.h"
 
 /**
  * @brief The weighted median move generator
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index b5efb699fc7..d44c3611eca 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -547,30 +547,24 @@ void enable_placer_debug(const t_placer_opts& placer_opts,
 ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts,
                                      int& logical_blk_type_index,
                                      bool highly_crit_block,
+                                     const PlacerCriticalities* placer_criticalities,
                                      ClusterNetId* net_from,
                                      int* pin_from,
                                      const PlacerState& placer_state,
                                      vtr::RngContainer& rng) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+
     ClusterBlockId b_from = ClusterBlockId::INVALID();
-    auto& cluster_ctx = g_vpr_ctx.clustering();
 
-    if (logical_blk_type_index == -1) { //If the block type is unspecified, choose any random block to be swapped with another random block
-        if (highly_crit_block) {
-            b_from = pick_from_highly_critical_block(*net_from, *pin_from, placer_state, rng);
-        } else {
-            b_from = pick_from_block(rng);
-        }
+    if (highly_crit_block) {
+        b_from = pick_from_highly_critical_block(*net_from, *pin_from, logical_blk_type_index, placer_state, *placer_criticalities, rng);
+    } else {
+        b_from = pick_from_block(logical_blk_type_index, rng);
+    }
 
-        //if a movable block found, set the block type
-        if (b_from) {
-            logical_blk_type_index = cluster_ctx.clb_nlist.block_type(b_from)->index;
-        }
-    } else { //If the block type is specified, choose a random block with blk_type to be swapped with another random block
-        if (highly_crit_block) {
-            b_from = pick_from_highly_critical_block(*net_from, *pin_from, logical_blk_type_index, placer_state, rng);
-        } else {
-            b_from = pick_from_block(logical_blk_type_index, rng);
-        }
+    //if a movable block found, set the block type
+    if (b_from) {
+        logical_blk_type_index = cluster_ctx.clb_nlist.block_type(b_from)->index;
     }
 
     if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
@@ -589,99 +583,50 @@ const std::vector<ClusterBlockId>& movable_blocks_per_type(const t_logical_block
     return place_ctx.movable_blocks_per_type[blk_type.index];
 }
 
-ClusterBlockId pick_from_block(vtr::RngContainer& rng) {
-    auto& place_ctx = g_vpr_ctx.placement();
-
-    // get the number of movable clustered blocks
-    const size_t n_movable_blocks = place_ctx.movable_blocks.size();
-
-    if (n_movable_blocks > 0) {
-        //Pick a movable block at random and return it
-        auto b_from = ClusterBlockId(rng.irand((int)n_movable_blocks - 1));
-        return b_from;
-    } else {
-        //No movable blocks found
-        return ClusterBlockId::INVALID();
-    }
-}
-
 ClusterBlockId pick_from_block(const int logical_blk_type_index, vtr::RngContainer& rng) {
-    auto& place_ctx = g_vpr_ctx.placement();
-
-    const auto& movable_blocks_of_type = place_ctx.movable_blocks_per_type[logical_blk_type_index];
-
-    if (movable_blocks_of_type.empty()) {
-        return ClusterBlockId::INVALID();
-    }
-
-    auto b_from = ClusterBlockId(movable_blocks_of_type[rng.irand((int)movable_blocks_of_type.size() - 1)]);
-
-    return b_from;
-}
-
-//Pick a random highly critical block to be swapped with another random block.
-//If none is found return ClusterBlockId::INVALID()
-ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from,
-                                               int& pin_from,
-                                               const PlacerState& placer_state,
-                                               vtr::RngContainer& rng) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_move_ctx = placer_state.move();
-    auto& block_locs = placer_state.block_locs();
+    const auto& place_ctx = g_vpr_ctx.placement();
 
-    //Initialize critical net and pin to be invalid
-    net_from = ClusterNetId::INVALID();
-    pin_from = -1;
+    // if logical block type is specified, pick the 'from' block from blocks of that type;
+    // otherwise, select it randomly from all blocks
+    const auto& movable_blocks = (logical_blk_type_index < 0 )? place_ctx.movable_blocks : place_ctx.movable_blocks_per_type[logical_blk_type_index];
 
-    //check if any critical block is available
-    if (place_move_ctx.highly_crit_pins.empty()) {
+    if (movable_blocks.empty()) {
         return ClusterBlockId::INVALID();
     }
 
-    //pick a random highly critical pin and find the nets driver block
-    std::pair<ClusterNetId, int> crit_pin = place_move_ctx.highly_crit_pins[rng.irand(place_move_ctx.highly_crit_pins.size() - 1)];
-    ClusterBlockId b_from = cluster_ctx.clb_nlist.net_driver_block(crit_pin.first);
+    ClusterBlockId b_from = movable_blocks[rng.irand((int)movable_blocks.size() - 1)];
 
-    if (block_locs[b_from].is_fixed) {
-        return ClusterBlockId::INVALID(); //Block is fixed, cannot move
-    }
-
-    net_from = crit_pin.first;
-    pin_from = crit_pin.second;
     return b_from;
-
-    //Unreachable statement
-    return ClusterBlockId::INVALID();
 }
 
-//Pick a random highly critical block with a specified block type to be swapped with another random block.
-//If none is found return ClusterBlockId::INVALID()
 ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from,
                                                int& pin_from,
                                                const int logical_blk_type_index,
                                                const PlacerState& placer_state,
+                                               const PlacerCriticalities& placer_criticalities,
                                                vtr::RngContainer& rng) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_move_ctx = placer_state.move();
-    auto& block_locs = placer_state.block_locs();
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& block_locs = placer_state.block_locs();
 
     //Initialize critical net and pin to be invalid
     net_from = ClusterNetId::INVALID();
     pin_from = -1;
 
+    const auto& highly_crit_pins = placer_criticalities.get_highly_critical_pins();
+
     //check if any critical block is available
-    if (place_move_ctx.highly_crit_pins.empty()) {
+    if (highly_crit_pins.empty()) {
         return ClusterBlockId::INVALID();
     }
 
     //pick a random highly critical pin and find the nets driver block
-    std::pair<ClusterNetId, int> crit_pin = place_move_ctx.highly_crit_pins[rng.irand(place_move_ctx.highly_crit_pins.size() - 1)];
+    std::pair<ClusterNetId, int> crit_pin = highly_crit_pins[rng.irand(highly_crit_pins.size() - 1)];
     ClusterBlockId b_from = cluster_ctx.clb_nlist.net_driver_block(crit_pin.first);
 
     //Check if picked block type matches with the blk_type specified, and it is not fixed
     //blk_type from propose move doesn't account for the EMPTY type
     auto b_from_type = cluster_ctx.clb_nlist.block_type(b_from);
-    if (b_from_type->index == logical_blk_type_index) {
+    if (b_from_type->index == logical_blk_type_index || logical_blk_type_index < 0) {
         if (block_locs[b_from].is_fixed) {
             return ClusterBlockId::INVALID(); //Block is fixed, cannot move
         }
@@ -692,7 +637,6 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from,
     }
 
     //No critical block with 'blk_type' found
-    //Unreachable statement
     return ClusterBlockId::INVALID();
 }
 
@@ -707,7 +651,7 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
     //
     //Note that the range limit (rlim) is applied in a logical sense (i.e. 'compressed' grid space consisting
     //of the same block types, and not the physical grid space). This means, for example, that columns of 'rare'
-    //blocks (e.g. DSPs/RAMs) which are physically far appart but logically adjacent will be swappable even
+    //blocks (e.g. DSPs/RAMs) which are physically far apart but logically adjacent will be swappable even
     //at an rlim fo 1.
     //
     //This ensures that such blocks don't get locked down too early during placement (as would be the
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index de3d771e7ae..ea9a90cc18d 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -7,6 +7,7 @@
 
 class PlacerState;
 class BlkLocRegistry;
+class PlacerCriticalities;
 namespace vtr {
 class RngContainer;
 }
@@ -171,6 +172,7 @@ bool is_legal_swap_to_location(ClusterBlockId blk,
 ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts,
                                      int& logical_blk_type_index,
                                      bool highly_crit_block,
+                                     const PlacerCriticalities* placer_criticalities,
                                      ClusterNetId* net_from,
                                      int* pin_from,
                                      const PlacerState& placer_state,
@@ -183,43 +185,32 @@ ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts,
  */
 const std::vector<ClusterBlockId>& movable_blocks_per_type(const t_logical_block_type& blk_type);
 
-/**
- * @brief Select a random block to be swapped with another block
- * 
- * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found
- */
-ClusterBlockId pick_from_block(vtr::RngContainer& rng);
 
 /**
  * @brief Find a block with a specific block type to be swapped with another block
  *
- *  @param logical_blk_type_index: the agent type of the moving block.
+ * @param logical_blk_type_index The logical type of the moving block. If a negative value is passed,
+ * the block is selected randomly from all movable blocks and not from a specific type.
+ * @param rng A random number generator used to select a random block.
  * 
  * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found
  */
 ClusterBlockId pick_from_block(int logical_blk_type_index, vtr::RngContainer& rng);
 
 /**
- * @brief Select a random highly critical block to be swapped with another block
- * 
- * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found
- */
-ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from,
-                                               int& pin_from,
-                                               const PlacerState& placer_state,
-                                               vtr::RngContainer& rng);
-
-/**
- * @brief Find a block with a specific block type to be swapped with another block
+ * @brief Find a highly critical block with a specific block type to be swapped with another block.
  *
- *  @param logical_blk_type_index: the agent type of the moving block.
+ * @param logical_blk_type_index The logical type of the moving block. If a negative value is passed,
+ * the block is selected randomly from all movable blocks and not from a specific type.
+ * @param rng A random number generator used to select a random highly critical block.
  * 
- * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found
+ * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found.
  */
 ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from,
                                                int& pin_from,
                                                int logical_blk_type_index,
                                                const PlacerState& placer_state,
+                                               const PlacerCriticalities& placer_criticalities,
                                                vtr::RngContainer& rng);
 
 bool find_to_loc_uniform(t_logical_block_type_ptr type,
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index ac049995347..e2a8e902e31 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -34,6 +34,7 @@
 #include "vtr_math.h"
 #include "vtr_ndmatrix.h"
 #include "vtr_ndoffsetmatrix.h"
+#include "PlacerCriticalities.h"
 
 #include <array>
 
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 2b8e59af88f..9fad2757681 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -7,7 +7,6 @@
 #pragma once
 
 #include "place_delay_model.h"
-#include "timing_place.h"
 #include "move_transactions.h"
 #include "place_util.h"
 #include "vtr_ndoffsetmatrix.h"
@@ -15,6 +14,7 @@
 #include <functional>
 
 class PlacerState;
+class PlacerCriticalities;
 
 /**
  * @brief The method used to calculate placement cost
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 3506d00b801..69e4e1895a0 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -13,7 +13,7 @@
 #include "read_xml_arch_file.h"
 #include "echo_files.h"
 #include "histogram.h"
-#include "place_delay_model.h"
+#include "PlacementDelayModelCreator.h"
 #include "move_utils.h"
 #include "buttons.h"
 
@@ -65,14 +65,14 @@ void try_place(const Netlist<>& net_list,
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
         /*do this before the initial placement to avoid messing up the initial placement */
-        place_delay_model = alloc_lookups_and_delay_model(net_list,
-                                                          chan_width_dist,
-                                                          placer_opts,
-                                                          router_opts,
-                                                          det_routing_arch,
-                                                          segment_inf,
-                                                          directs,
-                                                          is_flat);
+        place_delay_model = PlacementDelayModelCreator::create_delay_model(placer_opts,
+                                                                           router_opts,
+                                                                           net_list,
+                                                                           det_routing_arch,
+                                                                           segment_inf,
+                                                                           chan_width_dist,
+                                                                           directs,
+                                                                           is_flat);
 
         if (isEchoFileEnabled(E_ECHO_PLACEMENT_DELTA_DELAY_MODEL)) {
             place_delay_model->dump_echo(getEchoFileName(E_ECHO_PLACEMENT_DELTA_DELAY_MODEL));
diff --git a/vpr/src/place/place_checkpoint.cpp b/vpr/src/place/place_checkpoint.cpp
index 60b009d85ae..a6e2858e577 100644
--- a/vpr/src/place/place_checkpoint.cpp
+++ b/vpr/src/place/place_checkpoint.cpp
@@ -1,7 +1,11 @@
+
 #include "place_checkpoint.h"
+
 #include "noc_place_utils.h"
 #include "placer_state.h"
 #include "grid_block.h"
+#include "PlacerCriticalities.h"
+#include "PlacerSetupSlacks.h"
 
 float t_placement_checkpoint::get_cp_cpd() const { return cpd_; }
 
diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/place_delay_model.h
deleted file mode 100644
index 0aa01385e6e..00000000000
--- a/vpr/src/place/place_delay_model.h
+++ /dev/null
@@ -1,260 +0,0 @@
-/**
- * @file place_delay_model.h
- * @brief This file contains all the class and function declarations related to
- *        the placer delay model. For implementations, see place_delay_model.cpp.
- */
-
-#pragma once
-#include "vtr_ndmatrix.h"
-#include "vtr_flat_map.h"
-#include "vpr_types.h"
-#include "router_delay_profiling.h"
-
-#ifndef __has_attribute
-#    define __has_attribute(x) 0 // Compatibility with non-clang compilers.
-#endif
-
-#if defined(COMPILER_GCC) && defined(NDEBUG)
-#    define ALWAYS_INLINE inline __attribute__((__always_inline__))
-#elif defined(COMPILER_MSVC) && defined(NDEBUG)
-#    define ALWAYS_INLINE __forceinline
-#elif __has_attribute(always_inline)
-#    define ALWAYS_INLINE __attribute__((always_inline)) // clang
-#else
-#    define ALWAYS_INLINE inline
-#endif
-
-///@brief Forward declarations.
-class PlaceDelayModel;
-class PlacerState;
-
-///@brief Initialize the placer delay model.
-std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
-                                                               t_chan_width_dist chan_width_dist,
-                                                               const t_placer_opts& place_opts,
-                                                               const t_router_opts& router_opts,
-                                                               t_det_routing_arch* det_routing_arch,
-                                                               std::vector<t_segment_inf>& segment_inf,
-                                                               const std::vector<t_direct_inf>& directs,
-                                                               bool is_flat);
-
-///@brief Returns the delay of one point to point connection.
-float comp_td_single_connection_delay(const PlaceDelayModel* delay_model,
-                                      const vtr::vector_map<ClusterBlockId, t_block_loc>& block_locs,
-                                      ClusterNetId net_id,
-                                      int ipin);
-
-///@brief Recompute all point to point delays, updating `connection_delay` matrix.
-void comp_td_connection_delays(const PlaceDelayModel* delay_model,
-                               PlacerState& placer_state);
-
-///@brief Abstract interface to a placement delay model.
-class PlaceDelayModel {
-  public:
-    virtual ~PlaceDelayModel() = default;
-
-    ///@brief Computes place delay model.
-    virtual void compute(
-        RouterDelayProfiler& route_profiler,
-        const t_placer_opts& placer_opts,
-        const t_router_opts& router_opts,
-        int longest_length)
-        = 0;
-
-    /**
-     * @brief Returns the delay estimate between the specified block pins.
-     *
-     * Either compute or read methods must be invoked before invoking delay.
-     */
-    virtual float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const = 0;
-
-    ///@brief Dumps the delay model to an echo file.
-    virtual void dump_echo(std::string filename) const = 0;
-
-    /**
-     * @brief Write place delay model to specified file.
-     *
-     * May be unimplemented, in which case method should throw an exception.
-     */
-    virtual void write(const std::string& file) const = 0;
-
-    /**
-     * @brief Read place delay model from specified file.
-     *
-     * May be unimplemented, in which case method should throw an exception.
-     */
-    virtual void read(const std::string& file) = 0;
-};
-
-///@brief A simple delay model based on the distance (delta) between block locations.
-class DeltaDelayModel : public PlaceDelayModel {
-  public:
-    DeltaDelayModel(float min_cross_layer_delay,
-                    bool is_flat)
-        : cross_layer_delay_(min_cross_layer_delay)
-        , is_flat_(is_flat) {}
-    DeltaDelayModel(float min_cross_layer_delay,
-                    vtr::NdMatrix<float, 4> delta_delays,
-                    bool is_flat)
-        : delays_(std::move(delta_delays))
-        , cross_layer_delay_(min_cross_layer_delay)
-        , is_flat_(is_flat) {}
-
-    void compute(
-        RouterDelayProfiler& router,
-        const t_placer_opts& placer_opts,
-        const t_router_opts& router_opts,
-        int longest_length) override;
-    float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override;
-    void dump_echo(std::string filepath) const override;
-
-    void read(const std::string& file) override;
-    void write(const std::string& file) const override;
-    const vtr::NdMatrix<float, 4>& delays() const {
-        return delays_;
-    }
-
-  private:
-    vtr::NdMatrix<float, 4> delays_; // [0..num_layers-1][0..max_dx][0..max_dy]
-    float cross_layer_delay_;
-    /**
-     * @brief Indicates whether the router is a two-stage or run-flat
-     */
-    bool is_flat_;
-};
-
-class OverrideDelayModel : public PlaceDelayModel {
-  public:
-    OverrideDelayModel(float min_cross_layer_delay,
-                       bool is_flat)
-        : cross_layer_delay_(min_cross_layer_delay)
-        , is_flat_(is_flat) {}
-    void compute(
-        RouterDelayProfiler& route_profiler,
-        const t_placer_opts& placer_opts,
-        const t_router_opts& router_opts,
-        int longest_length) override;
-    // returns delay from the specified (x,y) to the specified (x,y) with both endpoints on layer_num and the
-    // specified from and to pins
-    float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const override;
-    void dump_echo(std::string filepath) const override;
-
-    void read(const std::string& file) override;
-    void write(const std::string& file) const override;
-
-  public: //Mutators
-    void set_base_delay_model(std::unique_ptr<DeltaDelayModel> base_delay_model);
-    const DeltaDelayModel* base_delay_model() const;
-    float get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const;
-    void set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay);
-
-  private:
-    std::unique_ptr<DeltaDelayModel> base_delay_model_;
-    /**
-     * @brief Minimum delay of cross-layer connections
-     */
-    float cross_layer_delay_;
-    /**
-     * @brief Indicates whether the router is a two-stage or run-flat
-     */
-    bool is_flat_;
-
-    void compute_override_delay_model(RouterDelayProfiler& router,
-                                      const t_router_opts& router_opts);
-
-    /**
-     * @brief Structure that allows delays to be queried from the delay model.
-     *
-     * Delay is calculated given the origin physical tile, the origin
-     * pin, the destination physical tile, and the destination pin.
-     * This structure encapsulates all these information.
-     *
-     *   @param from_type, to_type
-     *              Physical tile index (for easy array access)
-     *   @param from_class, to_class
-     *              The class that the pins belongs to.
-     *   @param to_x, to_y
-     *              The horizontal and vertical displacement
-     *              between two physical tiles.
-     */
-    struct t_override {
-        short from_type;
-        short to_type;
-        short from_class;
-        short to_class;
-        short delta_x;
-        short delta_y;
-
-        /**
-         * @brief Comparison operator designed for performance.
-         *
-         * Operator< is important since t_override serves as the key into the
-         * map structure delay_overrides_. A default comparison operator would
-         * not be inlined by the compiler.
-         *
-         * A combination of ALWAYS_INLINE attribute and std::lexicographical_compare
-         * is required for operator< to be inlined by compiler. Proper inlining of
-         * the function reduces place time by around 5%.
-         *
-         * For more information: https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1225
-         */
-        friend ALWAYS_INLINE bool operator<(const t_override& lhs, const t_override& rhs) {
-            const short* left = reinterpret_cast<const short*>(&lhs);
-            const short* right = reinterpret_cast<const short*>(&rhs);
-            constexpr size_t NUM_T_OVERRIDE_MEMBERS = sizeof(t_override) / sizeof(short);
-            return std::lexicographical_compare(left, left + NUM_T_OVERRIDE_MEMBERS, right, right + NUM_T_OVERRIDE_MEMBERS);
-        }
-    };
-
-    /**
-     * @brief Map data structure that returns delay values according to
-     *        specific delay model queries.
-     *
-     * Delay model queries are provided by the t_override structure, which
-     * encapsulates the information regarding the origin and the destination.
-     */
-    vtr::flat_map2<t_override, float> delay_overrides_;
-
-    /**
-     * operator< treats memory layout of t_override as an array of short.
-     * This requires all members of t_override are shorts and there is no
-     * padding between members of t_override.
-     */
-    static_assert(sizeof(t_override) == sizeof(t_override::from_type) + sizeof(t_override::to_type) + sizeof(t_override::from_class) + sizeof(t_override::to_class) + sizeof(t_override::delta_x) + sizeof(t_override::delta_y), "Expect t_override to have a memory layout equivalent to an array of short (no padding)");
-    static_assert(sizeof(t_override::from_type) == sizeof(short), "Expect all t_override data members to be shorts");
-    static_assert(sizeof(t_override::to_type) == sizeof(short), "Expect all t_override data members to be shorts");
-    static_assert(sizeof(t_override::from_class) == sizeof(short), "Expect all t_override data members to be shorts");
-    static_assert(sizeof(t_override::to_class) == sizeof(short), "Expect all t_override data members to be shorts");
-    static_assert(sizeof(t_override::delta_x) == sizeof(short), "Expect all t_override data members to be shorts");
-    static_assert(sizeof(t_override::delta_y) == sizeof(short), "Expect all t_override data members to be shorts");
-};
-
-///@brief A simple delay model based on the information stored in router lookahead
-///  This is in contrast to other placement delay models that get the cost of getting from one location to another by running the router
-class SimpleDelayModel : public PlaceDelayModel {
-  public:
-    SimpleDelayModel() {}
-
-    void compute(
-        RouterDelayProfiler& router,
-        const t_placer_opts& placer_opts,
-        const t_router_opts& router_opts,
-        int longest_length) override;
-    float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override;
-    void dump_echo(std::string /*filepath*/) const override {}
-
-    void read(const std::string& /*file*/) override {}
-    void write(const std::string& /*file*/) const override {}
-
-  private:
-    /**
-     * @brief The matrix to store the minimum delay between different points on different layers.
-     *
-     *The matrix used to store delay information is a 5D matrix. This data structure stores the minimum delay for each tile type on each layer to other layers
-     *for each dx and dy. We decided to separate the delay for each physical type on each die to accommodate cases where the connectivity of a physical type differs
-     *on each layer. Additionally, instead of using d_layer, we distinguish between the destination layer to handle scenarios where connectivity between layers
-     *is not uniform. For example, if the number of inter-layer connections between layer 1 and 2 differs from the number of connections between layer 0 and 1.
-     *One might argue that this variability could also occur for dx and dy. However, we are operating under the assumption that the FPGA fabric architecture is regular.
-     */
-    vtr::NdMatrix<float, 5> delays_; // [0..num_physical_type-1][0..num_layers-1][0..num_layers-1][0..max_dx][0..max_dy]
-};
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index 99c00d7e8e5..3fb89fb20f3 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -20,13 +20,15 @@
 #include <memory>
 #include <optional>
 
-#include "timing_place.h"
 #include "place_checkpoint.h"
 #include "PlacementDelayCalculator.h"
 #include "placer_state.h"
 #include "noc_place_utils.h"
 #include "net_cost_handler.h"
 #include "placement_log_printer.h"
+#include "PlacerSetupSlacks.h"
+#include "PlacerCriticalities.h"
+#include "NetPinTimingInvalidator.h"
 
 class PlacementAnnealer;
 namespace vtr{
diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h
index 8f3b966a56d..a6896a359e8 100644
--- a/vpr/src/place/placer_state.h
+++ b/vpr/src/place/placer_state.h
@@ -12,7 +12,7 @@
 #include "vpr_context.h"
 #include "vpr_net_pins_matrix.h"
 #include "vpr_types.h"
-#include "timing_place.h"
+#include "PlacerTimingCosts.h"
 
 /**
  * @brief State relating to the timing driven data.
@@ -145,9 +145,6 @@ struct PlacerMoveContext : public Context {
     std::vector<int> X_coord;
     std::vector<int> Y_coord;
     std::vector<int> layer_coord;
-
-    // Container to save the highly critical pins (higher than a timing criticality limit set by commandline option)
-    std::vector<std::pair<ClusterNetId, int>> highly_crit_pins;
 };
 
 
diff --git a/vpr/src/place/timing/PlacerCriticalities.cpp b/vpr/src/place/timing/PlacerCriticalities.cpp
new file mode 100644
index 00000000000..1f2e4f518e9
--- /dev/null
+++ b/vpr/src/place/timing/PlacerCriticalities.cpp
@@ -0,0 +1,127 @@
+
+#include "PlacerCriticalities.h"
+
+#include "timing_info.h"
+#include "timing_util.h"
+
+PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist,
+                                         const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+                                         std::shared_ptr<const SetupTimingInfo> timing_info)
+    : clb_nlist_(clb_nlist)
+    , pin_lookup_(netlist_pin_lookup)
+    , timing_info_(std::move(timing_info))
+    , timing_place_crit_(make_net_pins_matrix(clb_nlist_, std::numeric_limits<float>::quiet_NaN())) {
+}
+
+/**
+ * @brief Updated the criticalities in the timing_place_crit_ data structure.
+ *
+ * If the criticalities are not updated immediately after each time we call
+ * timing_info->update(), then timing_info->pins_with_modified_setup_criticality()
+ * cannot accurately account for all the pins that need to be updated. In this case,
+ * `recompute_required` would be true, and we update all criticalities from scratch.
+ *
+ * If the criticality exponent has changed, we also need to update from scratch.
+ */
+void PlacerCriticalities::update_criticalities(const PlaceCritParams& crit_params) {
+    // If update is not enabled, exit the routine.
+    if (!update_enabled) {
+        // re-computation is required on the next iteration
+        recompute_required = true;
+        return;
+    }
+
+    // Determine what pins need updating
+    if (!recompute_required && crit_params.crit_exponent == last_crit_exponent_) {
+        incr_update_criticalities();
+    } else {
+        recompute_criticalities();
+
+        // Record new criticality exponent
+        last_crit_exponent_ = crit_params.crit_exponent;
+    }
+
+    /* Performs a 1-to-1 mapping from criticality to timing_place_crit_.
+     * For every pin on every net (or, equivalently, for every tedge ending
+     * in that pin), timing_place_crit_ = criticality^(criticality exponent) */
+
+    // Update the affected pins
+    for (ClusterPinId clb_pin : cluster_pins_with_modified_criticality_) {
+        ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin);
+        int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin);
+
+        float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info_, pin_lookup_, ParentPinId(size_t(clb_pin)), /*is_flat=*/false);
+        float new_crit = pow(clb_pin_crit, crit_params.crit_exponent);
+
+        /* Update the highly critical pins container
+         *
+         * If the old criticality < limit and the new criticality > limit --> add this pin to the highly critical pins
+         * If the old criticality > limit and the new criticality < limit --> remove this pin from the highly critical pins
+         */
+        if (!first_time_update_criticality) {
+            if (new_crit > crit_params.crit_limit && timing_place_crit_[clb_net][pin_index_in_net] < crit_params.crit_limit) {
+                highly_crit_pins.emplace_back(clb_net, pin_index_in_net);
+            } else if (new_crit < crit_params.crit_limit && timing_place_crit_[clb_net][pin_index_in_net] > crit_params.crit_limit) {
+                highly_crit_pins.erase(std::remove(highly_crit_pins.begin(), highly_crit_pins.end(), std::make_pair(clb_net, pin_index_in_net)),
+                                       highly_crit_pins.end());
+            }
+        } else {
+            if (new_crit > crit_params.crit_limit) {
+                highly_crit_pins.emplace_back(clb_net, pin_index_in_net);
+            }
+        }
+
+        /* The placer likes a great deal of contrast between criticalities.
+         * Since path criticality varies much more than timing, we "sharpen" timing
+         * criticality by taking it to some power, crit_exponent (between 1 and 8 by default). */
+        timing_place_crit_[clb_net][pin_index_in_net] = new_crit;
+    }
+
+    /* Criticalities updated. In sync with timing info.
+     * Can be incrementally updated on the next iteration */
+    recompute_required = false;
+
+    first_time_update_criticality = false;
+}
+
+void PlacerCriticalities::set_recompute_required() {
+    recompute_required = true;
+}
+
+void PlacerCriticalities::incr_update_criticalities() {
+    cluster_pins_with_modified_criticality_.clear();
+
+    for (AtomPinId atom_pin : timing_info_->pins_with_modified_setup_criticality()) {
+        ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin);
+
+        /* Some atom pins correspond to connections which are completely
+         * contained within a cluster, and hence have no corresponding
+         * clustered pin. */
+        if (!clb_pin) continue;
+
+        cluster_pins_with_modified_criticality_.insert(clb_pin);
+    }
+}
+
+void PlacerCriticalities::recompute_criticalities() {
+    cluster_pins_with_modified_criticality_.clear();
+
+    // Non-incremental: all sink pins need updating
+    for (ClusterNetId net_id : clb_nlist_.nets()) {
+        for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) {
+            cluster_pins_with_modified_criticality_.insert(pin_id);
+        }
+    }
+}
+
+///@brief Override the criticality of a particular connection.
+void PlacerCriticalities::set_criticality(ClusterNetId net_id, int ipin, float crit_val) {
+    VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin != 0)");
+    VTR_ASSERT_SAFE_MSG(ipin < int(clb_nlist_.net_pins(net_id).size()), "The pin index in net should be smaller than fanout");
+
+    timing_place_crit_[net_id][ipin] = crit_val;
+}
+
+PlacerCriticalities::pin_range PlacerCriticalities::pins_with_modified_criticality() const {
+    return vtr::make_range(cluster_pins_with_modified_criticality_);
+}
diff --git a/vpr/src/place/timing/PlacerCriticalities.h b/vpr/src/place/timing/PlacerCriticalities.h
new file mode 100644
index 00000000000..161423dba6a
--- /dev/null
+++ b/vpr/src/place/timing/PlacerCriticalities.h
@@ -0,0 +1,199 @@
+
+#pragma once
+
+#include "vtr_vec_id_set.h"
+#include "timing_info_fwd.h"
+#include "clustered_netlist_utils.h"
+#include "place_delay_model.h"
+#include "vpr_net_pins_matrix.h"
+
+/**
+ * @brief Saves the placement criticality parameters
+ *
+ * crit_exponent: The criticality exponent used to sharpen the criticalities
+ * crit_limit:    The limit to consider a pin as timing critical
+ */
+struct PlaceCritParams {
+    float crit_exponent;
+    float crit_limit;
+};
+
+/**
+ * @brief PlacerCriticalities returns the clustered netlist connection criticalities
+ *        used by the placer ('sharpened' by a criticality exponent).
+ *
+ * Usage
+ * =====
+ * This class also serves to map atom netlist level criticalites (i.e. on AtomPinIds)
+ * to the clustered netlist (i.e. ClusterPinIds) used during placement.
+ *
+ * Criticalities are updated by update_criticalities(), given that `update_enabled` is
+ * set to true. It will update criticalities based on the atom netlist connection
+ * criticalities provided by the passed in SetupTimingInfo.
+ *
+ * This process can be done incrementally, based on the modified connections/AtomPinIds
+ * returned by SetupTimingInfo. However, the set returned only reflects the connections
+ * changed by the last call to the timing info update.
+ *
+ * Therefore, if SetupTimingInfo is updated twice in succession without criticalities
+ * getting updated (update_enabled = false), the returned set cannot account for all
+ * the connections that have been modified. In this case, we flag `recompute_required`
+ * as false, and we recompute the criticalities for every connection to ensure that
+ * they are all up to date. Hence, each time update_setup_slacks_and_criticalities()
+ * is called, we assign `recompute_required` the opposite value of `update_enabled`.
+ *
+ * This class also maps/transforms the modified atom connections/pins returned by the
+ * timing info into modified clustered netlist connections/pins after calling
+ * update_criticalities(). The interface then enables users to iterate over this range
+ * via pins_with_modified_criticalities(). This is useful for incrementally re-calculating
+ * the timing costs.
+ *
+ * The criticalities of individual connections can then be queried by calling the
+ * criticality() member function.
+ *
+ * Implementation
+ * ==============
+ * To support incremental re-calculation, the class saves the last criticality exponent
+ * passed to PlacerCriticalities::update_criticalites(). If the next update uses the same
+ * exponent, criticalities can be incrementally updated. Otherwise, they must be re-calculated
+ * from scratch, since a change in exponent changes *all* criticalities.
+ *
+ * Calculating criticalities:
+ * All the raw setup slack values across a single clock domain are gathered
+ * and rated from the best to the worst in terms of criticalities. In order
+ * to calculate criticalities, all the slack values need to be non-negative.
+ * Hence, if the worst slack is negative, all the slack values are shifted
+ * by the value of the worst slack so that the value is at least 0. If the
+ * worst slack is positive, then no shift happens.
+ *
+ * The best (shifted) slack (the most positive one) will have a criticality of 0.
+ * The worst (shifted) slack value will have a criticality of 1.
+ *
+ * Criticalities are used to calculated timing costs for each connection.
+ * The formula is cost = delay * criticality.
+ *
+ * For a more detailed description on how criticalities are calculated, see
+ * calc_relaxed_criticality() in `timing_util.cpp`.
+ */
+class PlacerCriticalities {
+  public: //Types
+    typedef vtr::vec_id_set<ClusterPinId>::iterator pin_iterator;
+    typedef vtr::vec_id_set<ClusterNetId>::iterator net_iterator;
+
+    typedef vtr::Range<pin_iterator> pin_range;
+    typedef vtr::Range<net_iterator> net_range;
+
+  public: //Lifetime
+
+    ///@brief Allocates space for the timing_place_crit_ data structure.
+    PlacerCriticalities(const ClusteredNetlist& clb_nlist,
+                        const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+                        std::shared_ptr<const SetupTimingInfo> timing_info);
+
+    PlacerCriticalities(const PlacerCriticalities&) = delete;
+    PlacerCriticalities& operator=(const PlacerCriticalities&) = delete;
+
+  public: //Accessors
+    ///@brief Returns the criticality of the specified connection.
+    float criticality(ClusterNetId net, int ipin) const { return timing_place_crit_[net][ipin]; }
+
+    /**
+     * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) which
+     *        were modified by the last call to PlacerCriticalities::update_criticalities().
+     */
+    pin_range pins_with_modified_criticality() const;
+
+    /// @brief Returns a constant reference to highly critical pins
+    const std::vector<std::pair<ClusterNetId, int>>& get_highly_critical_pins() const { return highly_crit_pins; }
+
+  public: //Modifiers
+    /**
+     * @brief Updates criticalities based on the atom netlist criticalities
+     *        provided by timing_info and the provided criticality_exponent.
+     *
+     * Should consistently call this method after the most recent timing analysis to
+     * keep the criticalities stored in this class in sync with the timing analyzer.
+     * If out of sync, then the criticalities cannot be incrementally updated on
+     * during the next timing analysis iteration.
+     */
+    void update_criticalities(const PlaceCritParams& crit_params);
+
+    ///@bried Enable the recompute_required flag to enforce from scratch update.
+    void set_recompute_required();
+
+    /**
+     * @brief Collect all the sink pins in the netlist and prepare them update.
+     *
+     * For the incremental version, see PlacerCriticalities::incr_update_criticalities().
+     */
+    void recompute_criticalities();
+
+    ///@brief Override the criticality of a particular connection.
+    void set_criticality(ClusterNetId net, int ipin, float crit_val);
+
+    ///@brief Set `update_enabled` to true.
+    void enable_update() { update_enabled = true; }
+
+    ///@brief Set `update_enabled` to true.
+    void disable_update() { update_enabled = false; }
+
+  private: //Data
+    ///@brief The clb netlist in the placement context.
+    const ClusteredNetlist& clb_nlist_;
+
+    ///@brief The lookup table that maps atom pins to clb pins.
+    const ClusteredPinAtomPinsLookup& pin_lookup_;
+
+    ///@brief A pointer to the setup timing analyzer
+    std::shared_ptr<const SetupTimingInfo> timing_info_;
+
+    /**
+     * @brief The matrix that stores criticality value for each connection.
+     *
+     * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]
+     */
+    ClbNetPinsMatrix<float> timing_place_crit_;
+
+    /**
+     * The criticality exponent when update_criticalites() was last called
+     * (used to detect if incremental update can be used).
+     */
+    float last_crit_exponent_ = std::numeric_limits<float>::quiet_NaN();
+
+    ///@brief Set of pins with criticalities modified by last call to update_criticalities().
+    vtr::vec_id_set<ClusterPinId> cluster_pins_with_modified_criticality_;
+
+    /**
+     * @brief Collect the cluster pins which need to be updated based on the latest timing
+     *        analysis so that incremental updates to criticalities can be performed.
+     *
+     * Note we use the set of pins reported by the *timing_info* as having modified
+     * criticality, rather than those marked as modified by the timing analyzer.
+     *
+     * Since timing_info uses shifted/relaxed criticality (which depends on max required
+     * time and worst case slacks), additional nodes may be modified when updating the
+     * atom pin criticalities.
+     */
+    void incr_update_criticalities();
+
+    ///@brief Flag that turns on/off the update_criticalities() routine.
+    bool update_enabled = true;
+
+    /**
+     * @brief Flag that checks if criticalities need to be recomputed for all connections.
+     *
+     * Used by the method update_criticalities(). They incremental update is not possible
+     * if this method wasn't called updated after the previous timing info update.
+     */
+    bool recompute_required = true;
+
+    /**
+     * @brief if this is first time to call update_criticality
+     *
+     * This can be used for incremental criticality update and also incrementally update the highly critical pins
+     */
+    bool first_time_update_criticality = true;
+
+    /// @brief Saves the highly critical pins (higher than a timing criticality limit set by commandline option)
+    std::vector<std::pair<ClusterNetId, int>> highly_crit_pins;
+};
diff --git a/vpr/src/place/timing/PlacerSetupSlacks.cpp b/vpr/src/place/timing/PlacerSetupSlacks.cpp
new file mode 100644
index 00000000000..3a097a582ff
--- /dev/null
+++ b/vpr/src/place/timing/PlacerSetupSlacks.cpp
@@ -0,0 +1,92 @@
+
+#include "PlacerSetupSlacks.h"
+
+#include "timing_util.h"
+#include "timing_info.h"
+
+
+PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist,
+                                     const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+                                     std::shared_ptr<const SetupTimingInfo> timing_info)
+    : clb_nlist_(clb_nlist)
+    , pin_lookup_(netlist_pin_lookup)
+    , timing_info_(std::move(timing_info))
+    , timing_place_setup_slacks_(make_net_pins_matrix(clb_nlist_, std::numeric_limits<float>::quiet_NaN())) {
+}
+
+/**
+ * @brief Updated the setup slacks in the timing_place_setup_slacks_ data structure.
+ *
+ * If the setup slacks are not updated immediately after each time we call
+ * timing_info->update(), then timing_info->pins_with_modified_setup_slack()
+ * cannot accurately account for all the pins that need to be updated.
+ *
+ * In this case, `recompute_required` would be true, and we update all setup slacks
+ * from scratch.
+ */
+void PlacerSetupSlacks::update_setup_slacks() {
+    // If update is not enabled, exit the routine.
+    if (!update_enabled) {
+        // re-computation is required on the next iteration
+        recompute_required = true;
+        return;
+    }
+
+    // Determine what pins need updating
+    if (!recompute_required) {
+        incr_update_setup_slacks();
+    } else {
+        recompute_setup_slacks();
+    }
+
+    // Update the affected pins
+    for (ClusterPinId clb_pin : cluster_pins_with_modified_setup_slack_) {
+        ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin);
+        int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin);
+
+        float clb_pin_setup_slack = calculate_clb_net_pin_setup_slack(*timing_info_, pin_lookup_, clb_pin);
+
+        timing_place_setup_slacks_[clb_net][pin_index_in_net] = clb_pin_setup_slack;
+    }
+
+    /* Setup slacks updated. In sync with timing info.
+     * Can be incrementally updated on the next iteration. */
+    recompute_required = false;
+}
+
+void PlacerSetupSlacks::incr_update_setup_slacks() {
+    cluster_pins_with_modified_setup_slack_.clear();
+
+    for (AtomPinId atom_pin : timing_info_->pins_with_modified_setup_slack()) {
+        ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin);
+
+        //Some atom pins correspond to connections which are completely
+        //contained within a cluster, and hence have no corresponding
+        //clustered pin.
+        if (!clb_pin) continue;
+
+        cluster_pins_with_modified_setup_slack_.insert(clb_pin);
+    }
+}
+
+void PlacerSetupSlacks::recompute_setup_slacks() {
+    cluster_pins_with_modified_setup_slack_.clear();
+
+    // Non-incremental: all sink pins need updating
+    for (ClusterNetId net_id : clb_nlist_.nets()) {
+        for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) {
+            cluster_pins_with_modified_setup_slack_.insert(pin_id);
+        }
+    }
+}
+
+void PlacerSetupSlacks::set_setup_slack(ClusterNetId net_id, int ipin, float slack_val) {
+    VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin != 0)");
+    VTR_ASSERT_SAFE_MSG(ipin < int(clb_nlist_.net_pins(net_id).size()), "The pin index in net should be smaller than fanout");
+
+    timing_place_setup_slacks_[net_id][ipin] = slack_val;
+}
+
+PlacerSetupSlacks::pin_range PlacerSetupSlacks::pins_with_modified_setup_slack() const {
+    return vtr::make_range(cluster_pins_with_modified_setup_slack_);
+}
diff --git a/vpr/src/place/timing/PlacerSetupSlacks.h b/vpr/src/place/timing/PlacerSetupSlacks.h
new file mode 100644
index 00000000000..7ffc450e94b
--- /dev/null
+++ b/vpr/src/place/timing/PlacerSetupSlacks.h
@@ -0,0 +1,120 @@
+
+#pragma once
+
+#include "vtr_vec_id_set.h"
+#include "timing_info_fwd.h"
+#include "clustered_netlist_utils.h"
+#include "place_delay_model.h"
+#include "vpr_net_pins_matrix.h"
+
+/**
+ * @brief PlacerSetupSlacks returns the RAW setup slacks of clustered netlist connection.
+ *
+ * Usage
+ * =====
+ * This class mirrors PlacerCriticalities by both its methods and its members. The only
+ * difference is that this class deals with RAW setup slacks returned by SetupTimingInfo
+ * rather than criticalities. See the documentation on PlacerCriticalities for more.
+ *
+ * RAW setup slacks are unlike criticalities. Their values are not confined between
+ * 0 and 1. Their values can be either positive or negative.
+ *
+ * This class also provides iterating over the clustered netlist connections/pins that
+ * have modified setup slacks by the last call to update_setup_slacks(). However, this
+ * utility is mainly used for incrementally committing the setup slack values into the
+ * structure `connection_setup_slack` used by many placer routines.
+ */
+class PlacerSetupSlacks {
+  public: //Types
+    typedef vtr::vec_id_set<ClusterPinId>::iterator pin_iterator;
+    typedef vtr::vec_id_set<ClusterNetId>::iterator net_iterator;
+
+    typedef vtr::Range<pin_iterator> pin_range;
+    typedef vtr::Range<net_iterator> net_range;
+
+  public: //Lifetime
+    ///@brief Allocates space for the timing_place_setup_slacks_ data structure.
+    PlacerSetupSlacks(const ClusteredNetlist& clb_nlist,
+                      const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+                      std::shared_ptr<const SetupTimingInfo> timing_info);
+
+    PlacerSetupSlacks(const PlacerSetupSlacks& clb_nlist) = delete;
+    PlacerSetupSlacks& operator=(const PlacerSetupSlacks& clb_nlist) = delete;
+
+  public: //Accessors
+    ///@brief Returns the setup slack of the specified connection.
+    float setup_slack(ClusterNetId net, int ipin) const { return timing_place_setup_slacks_[net][ipin]; }
+
+    /**
+     * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds)
+     *        which were modified by the last call to PlacerSetupSlacks::update_setup_slacks().
+     */
+    pin_range pins_with_modified_setup_slack() const;
+
+  public: //Modifiers
+    /**
+     * @brief Updates setup slacks based on the atom netlist setup slacks provided
+     *        by timing_info_.
+     *
+     * Should consistently call this method after the most recent timing analysis to
+     * keep the setup slacks stored in this class in sync with the timing analyzer.
+     * If out of sync, then the setup slacks cannot be incrementally updated on
+     * during the next timing analysis iteration.
+     */
+    void update_setup_slacks();
+
+    ///@bried Enable the recompute_required flag to enforce from scratch update.
+    void set_recompute_required() { recompute_required = true; }
+
+    ///@brief Override the setup slack of a particular connection.
+    void set_setup_slack(ClusterNetId net, int ipin, float slack_val);
+
+    ///@brief Set `update_enabled` to true.
+    void enable_update() { update_enabled = true; }
+
+    ///@brief Set `update_enabled` to true.
+    void disable_update() { update_enabled = false; }
+
+  private: //Data
+    const ClusteredNetlist& clb_nlist_;
+    const ClusteredPinAtomPinsLookup& pin_lookup_;
+    std::shared_ptr<const SetupTimingInfo> timing_info_;
+
+    /**
+     * @brief The matrix that stores raw setup slack values for each connection.
+     *
+     * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]
+     */
+    ClbNetPinsMatrix<float> timing_place_setup_slacks_;
+
+    ///@brief Set of pins with raw setup slacks modified by last call to update_setup_slacks()
+    vtr::vec_id_set<ClusterPinId> cluster_pins_with_modified_setup_slack_;
+
+    /**
+     * @brief Collect the cluster pins which need to be updated based on the latest timing
+     *        analysis so that incremental updates to setup slacks can be performed.
+     *
+     * Note we use the set of pins reported by the *timing_info* as having modified
+     * setup slacks, rather than those marked as modified by the timing analyzer.
+     */
+    void incr_update_setup_slacks();
+
+    /**
+     * @brief Collect all the sink pins in the netlist and prepare them update.
+     *
+     * For the incremental version, see PlacerSetupSlacks::incr_update_setup_slacks().
+     */
+    void recompute_setup_slacks();
+
+    ///@brief Flag that turns on/off the update_setup_slacks() routine.
+    bool update_enabled = true;
+
+    /**
+     * @brief Flag that checks if setup slacks need to be recomputed for all connections.
+     *
+     * Used by the method update_setup_slacks(). They incremental update is not possible
+     * if this method wasn't called updated after the previous timing info update.
+     */
+    bool recompute_required = true;
+};
+
diff --git a/vpr/src/place/timing/PlacerTimingCosts.cpp b/vpr/src/place/timing/PlacerTimingCosts.cpp
new file mode 100644
index 00000000000..d8ad6afafab
--- /dev/null
+++ b/vpr/src/place/timing/PlacerTimingCosts.cpp
@@ -0,0 +1,126 @@
+
+#include "PlacerTimingCosts.h"
+
+PlacerTimingCosts::PlacerTimingCosts(const ClusteredNetlist& nlist) {
+    auto nets = nlist.nets();
+
+    net_start_indices_.resize(nets.size());
+
+    // Walk through the netlist to determine how many connections there are.
+    size_t iconn = 0;
+    for (ClusterNetId net : nets) {
+        // The placer always skips 'ignored' nets, so they don't affect timing
+        // costs, so we also skip them here
+        if (nlist.net_is_ignored(net)) {
+            net_start_indices_[net] = OPEN;
+            continue;
+        }
+
+        // Save the starting index of the current net's connections.
+        // We use a -1 offset, since sinks indexed from [1..num_net_pins-1]
+        // (there is no timing cost associated with net drivers)
+        net_start_indices_[net] = iconn - 1;
+
+        // Reserve space for all this net's connections
+        iconn += nlist.net_sinks(net).size();
+    }
+
+    const size_t num_connections = iconn;
+
+    // Determine how many binary tree levels we need to have a leaf for each connection cost
+    size_t ilevel = 0;
+    while (num_nodes_in_level(ilevel) < num_connections) {
+        ++ilevel;
+    }
+    num_levels_ = ilevel + 1;
+
+    size_t num_leaves = num_nodes_in_level(ilevel);
+    size_t num_nodes_in_previous_level = num_nodes_in_level(ilevel - 1);
+
+    VTR_ASSERT_MSG(num_leaves >= num_connections, "Need at least as many leaves as connections");
+    VTR_ASSERT_MSG(num_connections == 0 || num_nodes_in_previous_level < num_connections,
+                   "Level before should have fewer nodes than connections (to ensure using the smallest binary tree)");
+
+    // We don't need to store all possible leaves if we have fewer connections (i.e. bottom-right of tree is empty)
+    size_t last_level_unused_nodes = num_nodes_in_level(ilevel) - num_connections;
+    size_t num_nodes = num_nodes_up_to_level(ilevel) - last_level_unused_nodes;
+
+    // Reserve space for connection costs and intermediate node values
+    connection_costs_ = std::vector<double>(num_nodes, std::numeric_limits<double>::quiet_NaN());
+
+    // The net start indices we calculated earlier didn't account for intermediate binary tree nodes
+    // Shift the start indices after the intermediate nodes
+    size_t num_intermediate_nodes = num_nodes_up_to_level(ilevel - 1);
+    for (ClusterNetId net : nets) {
+        if (nlist.net_is_ignored(net)) continue;
+
+        net_start_indices_[net] = net_start_indices_[net] + num_intermediate_nodes;
+    }
+}
+
+double PlacerTimingCosts::total_cost_recurr(size_t inode) {
+    // Prune out-of-tree
+    if (inode > connection_costs_.size() - 1) {
+        return 0.;
+    }
+
+    //Valid pre-calculated intermediate result or valid leaf
+    if (!std::isnan(connection_costs_[inode])) {
+        return connection_costs_[inode];
+    }
+
+    //Recompute recursively
+    double node_cost = total_cost_recurr(left_child(inode))
+                       + total_cost_recurr(right_child(inode));
+
+    //Save intermediate cost at this node
+    connection_costs_[inode] = node_cost;
+
+    return node_cost;
+}
+
+double PlacerTimingCosts::total_cost_from_scratch(size_t inode) const {
+    // Prune out-of-tree
+    if (inode > connection_costs_.size() - 1) {
+        return 0.;
+    }
+
+    //Recompute recursively
+    double node_cost = total_cost_from_scratch(left_child(inode))
+                       + total_cost_from_scratch(right_child(inode));
+
+    return node_cost;
+}
+
+void PlacerTimingCosts::invalidate(const double* invalidated_cost) {
+    //Check pointer within range of internal storage
+    VTR_ASSERT_SAFE_MSG(
+        invalidated_cost >= &connection_costs_[0],
+        "Connection cost pointer should be after start of internal storage");
+
+    VTR_ASSERT_SAFE_MSG(
+        invalidated_cost <= &connection_costs_[connection_costs_.size() - 1],
+        "Connection cost pointer should be before end of internal storage");
+
+    size_t icost = invalidated_cost - &connection_costs_[0];
+
+    VTR_ASSERT_SAFE(icost >= num_nodes_up_to_level(num_levels_ - 2));
+
+    //Invalidate parent intermediate costs up to root or first
+    //already-invalidated parent
+    size_t iparent = parent(icost);
+
+    while (!std::isnan(connection_costs_[iparent])) {
+        //Invalidate
+        connection_costs_[iparent] = std::numeric_limits<double>::quiet_NaN();
+
+        if (iparent == 0) {
+            break; //At root
+        } else {
+            //Next parent
+            iparent = parent(iparent);
+        }
+    }
+
+    VTR_ASSERT_SAFE_MSG(std::isnan(connection_costs_[0]), "Invalidating any connection should have invalidated the root");
+}
\ No newline at end of file
diff --git a/vpr/src/place/timing/PlacerTimingCosts.h b/vpr/src/place/timing/PlacerTimingCosts.h
new file mode 100644
index 00000000000..5e1415581c3
--- /dev/null
+++ b/vpr/src/place/timing/PlacerTimingCosts.h
@@ -0,0 +1,242 @@
+
+#pragma once
+#include "vtr_vec_id_set.h"
+#include "timing_info_fwd.h"
+#include "clustered_netlist_utils.h"
+#include "place_delay_model.h"
+#include "vpr_net_pins_matrix.h"
+
+/**
+ * @brief PlacerTimingCosts mimics a 2D array of connection timing costs running from:
+ *        [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1].
+ *
+ * It can be used similar to:
+ *
+ *      PlacerTimingCosts connection_timing_costs(cluster_ctx.clb_nlist); //Construct
+ *
+ *      //...
+ *
+ *      //Modify a connection cost
+ *      connection_timing_costs[net_id][ipin] = new_cost;
+ *
+ *      //Potentially other modifications...
+ *
+ *      //Calculate the updated timing cost, of all connections,
+ *      //incrementally based on modifications
+ *      float total_timing_cost = connection_timing_costs.total_cost();
+ *
+ * However behind the scenes PlacerTimingCosts tracks when connection costs are modified,
+ * and efficiently re-calculates the total timing cost incrementally based on the connections
+ * which have had their cost modified.
+ *
+ * Implementation
+ * ==============
+ * Internally, PlacerTimingCosts stores all connection costs in a flat array in the last part
+ * of connection_costs_.  To mimic 2d-array like access PlacerTimingCosts also uses two proxy
+ * classes which allow indexing in the net and pin dimensions (NetProxy and ConnectionProxy
+ * respectively).
+ *
+ * The first part of connection_costs_ stores intermediate sums of the connection costs for
+ * efficient incremental re-calculation. More concretely, connection_costs_ stores a binary
+ * tree, where leaves correspond to individual connection costs and intermediate nodes the
+ * partial sums of the connection costs. (The binary tree is stored implicitly in the
+ * connection_costs_ vector, using Eytzinger's/BFS layout.) By summing the entire binary
+ * tree we calculate the total timing cost over all connections.
+ *
+ * Using a binary tree allows us to efficiently re-calculate the timing costs when only a subset
+ * of connections are changed. This is done by 'invalidating' intermediate nodes (from leaves up
+ * to the root) which have ancestors (leaves) with modified connection costs. When the
+ * total_cost() method is called, it recursively walks the binary tree to re-calculate the cost.
+ * Only invalidated nodes are traversed, with valid nodes just returning their previously
+ * calculated (and unchanged) value.
+ *
+ * For a circuit with 'K' connections, of which 'k' have changed (typically k << K), this can
+ * be done in O(k log K) time.
+ *
+ * It is important to note that due to limited floating point precision, floating point
+ * arithmetic has an order dependence (due to round-off). Using a binary tree to total
+ * the timing connection costs allows us to incrementally update the total timing cost while
+ * maintaining the *same order of operations* as if it was re-computed from scratch. This
+ * ensures we *always* get consistent results regardless of what/when connections are changed.
+ *
+ * Proxy Classes
+ * =============
+ * NetProxy is returned by PlacerTimingCost's operator[], and stores a pointer to the start of
+ * internal storage of that net's connection costs.
+ *
+ * ConnectionProxy is returned by NetProxy's operator[], and holds a reference to a particular
+ * element of the internal storage pertaining to a specific connection's cost. ConnectionProxy
+ * supports assignment, allowing clients to modify the connection cost. It also detects if the
+ * assigned value differs from the previous value and if so, calls PlacerTimingCosts's
+ * invalidate() method on that connection cost.
+ *
+ * PlacerTimingCosts's invalidate() method marks the cost element's ancestors as invalid (NaN)
+ * so they will be re-calculated by PlacerTimingCosts' total_cost() method.
+ */
+class PlacerTimingCosts {
+  public:
+    PlacerTimingCosts() = default;
+
+    PlacerTimingCosts(const ClusteredNetlist& nlist);
+
+    /**
+     * @brief Proxy class representing a connection cost.
+     *
+     * Supports modification of connection cost while detecting
+     * changes and reporting them up to PlacerTimingCosts.
+     */
+    class ConnectionProxy {
+      public:
+        ConnectionProxy(PlacerTimingCosts* timing_costs, double& connection_cost)
+            : timing_costs_(timing_costs)
+            , connection_cost_(connection_cost) {}
+
+        ///@brief Allow clients to modify the connection cost via assignment.
+        ConnectionProxy& operator=(double new_cost) {
+            if (new_cost != connection_cost_) {
+                //If connection cost changed, update it, and mark it
+                //as invalidated
+                connection_cost_ = new_cost;
+                timing_costs_->invalidate(&connection_cost_);
+            }
+            return *this;
+        }
+
+        /**
+         * @brief Support getting the current connection cost as a double.
+         *
+         * Useful for client code operating on the cost values (e.g. difference between costs).
+         */
+        operator double() const {
+            return connection_cost_;
+        }
+
+      private:
+        PlacerTimingCosts* timing_costs_;
+        double& connection_cost_;
+    };
+
+    /**
+     * @brief Proxy class representing the connection costs of a net.
+     *
+     * Supports indexing by pin index to retrieve the ConnectionProxy for that pin/connection.
+     */
+    class NetProxy {
+      public:
+        NetProxy(PlacerTimingCosts* timing_costs, double* net_sink_costs)
+            : timing_costs_(timing_costs)
+            , net_sink_costs_(net_sink_costs) {}
+
+        ///@brief Indexes into the specific net pin/connection.
+        ConnectionProxy operator[](size_t ipin) {
+            return ConnectionProxy(timing_costs_, net_sink_costs_[ipin]);
+        }
+
+        const ConnectionProxy operator[](size_t ipin) const {
+            return ConnectionProxy(timing_costs_, net_sink_costs_[ipin]);
+        }
+
+      private:
+        PlacerTimingCosts* timing_costs_;
+        double* net_sink_costs_;
+    };
+
+    ///@brief Indexes into the specific net.
+    NetProxy operator[](ClusterNetId net_id) {
+        VTR_ASSERT_SAFE(net_start_indices_[net_id] >= 0);
+
+        double* net_connection_costs = &connection_costs_[net_start_indices_[net_id]];
+        return NetProxy(this, net_connection_costs);
+    }
+
+    NetProxy operator[](ClusterNetId net_id) const {
+        VTR_ASSERT_SAFE(net_start_indices_[net_id] >= 0);
+
+        const double* net_connection_costs = &connection_costs_[net_start_indices_[net_id]];
+        return NetProxy(const_cast<PlacerTimingCosts*>(this), const_cast<double*>(net_connection_costs));
+    }
+
+    void clear() {
+        connection_costs_.clear();
+        net_start_indices_.clear();
+    }
+
+    void swap(PlacerTimingCosts& other) {
+        std::swap(connection_costs_, other.connection_costs_);
+        std::swap(net_start_indices_, other.net_start_indices_);
+        std::swap(num_levels_, other.num_levels_);
+    }
+
+    /**
+     * @brief Calculates the total cost of all connections efficiently
+     *        in the face of modified connection costs.
+     */
+    double total_cost() {
+        float cost = total_cost_recurr(0); //Root
+
+        VTR_ASSERT_DEBUG_MSG(cost == total_cost_from_scratch(0),
+                             "Expected incremental and from-scratch costs to be consistent");
+
+        return cost;
+    }
+
+  private:
+    ///@brief Recursively calculate and update the timing cost rooted at inode.
+    double total_cost_recurr(size_t inode);
+
+    double total_cost_from_scratch(size_t inode) const;
+
+    ///@brief Friend-ed so it can call invalidate().
+    friend ConnectionProxy;
+
+    void invalidate(const double* invalidated_cost);
+
+    static size_t left_child(size_t i) {
+        return 2 * i + 1;
+    }
+
+    static size_t right_child(size_t i) {
+        return 2 * i + 2;
+    }
+
+    static size_t parent(size_t i) {
+        return (i - 1) / 2;
+    }
+
+    /**
+     * @brief Returns the number of nodes in ilevel'th level.
+     *
+     * If ilevel is negative, return 0, since the root shouldn't
+     * be counted as a leaf node candidate.
+     */
+    static size_t num_nodes_in_level(int ilevel) {
+        return ilevel < 0 ? 0 : (2 << (ilevel));
+    }
+
+    ///@brief Returns the total number of nodes in levels [0..ilevel] (inclusive).
+    static size_t num_nodes_up_to_level(int ilevel) {
+        return (2 << (ilevel + 1)) - 1;
+    }
+
+  private:
+    /**
+     * @brief Vector storing the implicit binary tree of connection costs.
+     *
+     * The actual connections are stored at the end of the vector
+     * (last level of the binary tree). The earlier portions of
+     * the tree are the intermediate nodes.
+     *
+     * The methods left_child()/right_child()/parent() can be used
+     * to traverse the tree by indices into this vector.
+     */
+    std::vector<double> connection_costs_;
+
+    /**
+     * @brief Vector storing the indices of the first connection
+     *        for each net in the netlist, used for indexing by net.
+     */
+    vtr::vector<ClusterNetId, int> net_start_indices_;
+
+    ///@brief Number of levels in the binary tree.
+    size_t num_levels_ = 0;
+};
diff --git a/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.cpp b/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.cpp
new file mode 100644
index 00000000000..3482cd091e0
--- /dev/null
+++ b/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.cpp
@@ -0,0 +1,80 @@
+
+
+#include "PlacementDelayModelCreator.h"
+
+#include "place_delay_model.h"
+#include "simple_delay_model.h"
+#include "delta_delay_model.h"
+#include "override_delay_model.h"
+
+#include "vtr_time.h"
+#include "physical_types.h"
+#include "place_and_route.h"
+
+static int get_longest_segment_length(std::vector<t_segment_inf>& segment_inf) {
+    int length = 0;
+
+    for (const t_segment_inf& seg_info : segment_inf) {
+        if (seg_info.length > length) {
+            length = seg_info.length;
+        }
+    }
+
+    return length;
+}
+
+std::unique_ptr<PlaceDelayModel>
+PlacementDelayModelCreator::create_delay_model(const t_placer_opts& placer_opts,
+                                               const t_router_opts& router_opts,
+                                               const Netlist<>& net_list,
+                                               t_det_routing_arch* det_routing_arch,
+                                               std::vector<t_segment_inf>& segment_inf,
+                                               t_chan_width_dist chan_width_dist,
+                                               const std::vector<t_direct_inf>& directs,
+                                               bool is_flat) {
+    vtr::ScopedStartFinishTimer timer("Computing placement delta delay look-up");
+
+    t_chan_width chan_width = setup_chan_width(router_opts, chan_width_dist);
+
+    alloc_routing_structs(chan_width, router_opts, det_routing_arch, segment_inf, directs, is_flat);
+
+    const RouterLookahead* router_lookahead = get_cached_router_lookahead(*det_routing_arch,
+                                                                          router_opts.lookahead_type,
+                                                                          router_opts.write_router_lookahead,
+                                                                          router_opts.read_router_lookahead,
+                                                                          segment_inf,
+                                                                          is_flat);
+
+    RouterDelayProfiler route_profiler(net_list, router_lookahead, is_flat);
+
+    int longest_length = get_longest_segment_length(segment_inf);
+
+    // now setup and compute the actual arrays
+    std::unique_ptr<PlaceDelayModel> place_delay_model;
+    float min_cross_layer_delay = get_min_cross_layer_delay();
+
+    if (placer_opts.delay_model_type == PlaceDelayModelType::SIMPLE) {
+        place_delay_model = std::make_unique<SimpleDelayModel>();
+    } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) {
+        place_delay_model = std::make_unique<DeltaDelayModel>(min_cross_layer_delay, is_flat);
+    } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) {
+        place_delay_model = std::make_unique<OverrideDelayModel>(min_cross_layer_delay, is_flat);
+    } else {
+        VTR_ASSERT_MSG(false, "Invalid placer delay model");
+    }
+
+    if (placer_opts.read_placement_delay_lookup.empty()) {
+        place_delay_model->compute(route_profiler, placer_opts, router_opts, longest_length);
+    } else {
+        place_delay_model->read(placer_opts.read_placement_delay_lookup);
+    }
+
+    if (!placer_opts.write_placement_delay_lookup.empty()) {
+        place_delay_model->write(placer_opts.write_placement_delay_lookup);
+    }
+
+    // free all data structures that are no longer needed
+    free_routing_structs();
+
+    return place_delay_model;
+}
\ No newline at end of file
diff --git a/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h b/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h
new file mode 100644
index 00000000000..c92b67d4854
--- /dev/null
+++ b/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h
@@ -0,0 +1,30 @@
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "netlist.h"
+
+class PlaceDelayModel;
+struct t_placer_opts;
+struct t_router_opts;
+struct t_det_routing_arch;
+struct t_segment_inf;
+struct t_chan_width_dist;
+struct t_direct_inf;
+
+class PlacementDelayModelCreator {
+  public:
+    // nothing to do in the constructor
+    PlacementDelayModelCreator() = delete;
+
+    static std::unique_ptr<PlaceDelayModel> create_delay_model(const t_placer_opts& placer_opts,
+                                                               const t_router_opts& router_opts,
+                                                               const Netlist<>& net_list,
+                                                               t_det_routing_arch* det_routing_arch,
+                                                               std::vector<t_segment_inf>& segment_inf,
+                                                               t_chan_width_dist chan_width_dist,
+                                                               const std::vector<t_direct_inf>& directs,
+                                                               bool is_flat);
+};
diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp
new file mode 100644
index 00000000000..725159406c0
--- /dev/null
+++ b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp
@@ -0,0 +1,968 @@
+
+#include "compute_delta_delays_utils.h"
+
+#include "vtr_time.h"
+#include "vtr_math.h"
+#include "physical_types.h"
+#include "globals.h"
+#include "router_delay_profiling.h"
+
+/// Indicates the delta delay value has not been calculated
+static constexpr float UNINITIALIZED_DELTA = -1;
+/// Indicates delta delay from/to an EMPTY block
+static constexpr float EMPTY_DELTA = -2;
+/// Indicates there is no valid delta delay
+static constexpr float IMPOSSIBLE_DELTA = std::numeric_limits<float>::infinity();
+
+static vtr::NdMatrix<float, 4> compute_delta_delays(RouterDelayProfiler& route_profiler,
+                                                    const t_placer_opts& palcer_opts,
+                                                    const t_router_opts& router_opts,
+                                                    bool measure_directconnect,
+                                                    size_t longest_length,
+                                                    bool is_flat);
+
+static void fix_empty_coordinates(vtr::NdMatrix<float, 4>& delta_delays);
+
+static void fill_impossible_coordinates(vtr::NdMatrix<float, 4>& delta_delays);
+
+static bool verify_delta_delays(const vtr::NdMatrix<float, 4>& delta_delays);
+
+static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_profiler,
+                                                   vtr::Matrix<std::vector<float>>& matrix,
+                                                   int from_layer_num,
+                                                   int to_layer_num,
+                                                   int source_x,
+                                                   int source_y,
+                                                   int start_x,
+                                                   int start_y,
+                                                   int end_x,
+                                                   int end_y,
+                                                   const t_router_opts& router_opts,
+                                                   bool measure_directconnect,
+                                                   const std::set<std::string>& allowed_types,
+                                                   bool /*is_flat*/);
+
+static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& route_profiler,
+                                                      vtr::Matrix<std::vector<float>>& matrix,
+                                                      int from_layer_num,
+                                                      int to_layer_num,
+                                                      int source_x,
+                                                      int source_y,
+                                                      int start_x,
+                                                      int start_y,
+                                                      int end_x,
+                                                      int end_y,
+                                                      const t_router_opts& router_opts,
+                                                      bool measure_directconnect,
+                                                      const std::set<std::string>& allowed_types,
+                                                      bool is_flat);
+
+/**
+ * @brief Routes between a source and sink location to calculate the delay.
+ *
+ * This function computes the delay of a routed connection between a source and sink node
+ * specified by their coordinates and layers. It iterates over the best driver and sink pin
+ * classes to find a valid routing path and calculates the delay if a path exists.
+ *
+ * @param route_profiler Reference to the `RouterDelayProfiler` responsible for calculating routing delays.
+ * @param source_x The x-coordinate of the source location.
+ * @param source_y The y-coordinate of the source location.
+ * @param source_layer The layer index of the source node.
+ * @param sink_x The x-coordinate of the sink location.
+ * @param sink_y The y-coordinate of the sink location.
+ * @param sink_layer The layer index of the sink node.
+ * @param router_opts Routing options used for delay calculation.
+ * @param measure_directconnect If `true`, includes direct connect delays; otherwise, skips direct connections.
+ *
+ * @return The calculated routing delay. If routing fails, it returns `IMPOSSIBLE_DELTA`.
+ */
+static float route_connection_delay(RouterDelayProfiler& route_profiler,
+                                    int source_x,
+                                    int source_y,
+                                    int source_layer,
+                                    int sink_x,
+                                    int sink_y,
+                                    int sink_layer,
+                                    const t_router_opts& router_opts,
+                                    bool measure_directconnect);
+
+/**
+ * @brief Computes a reduced value from a vector of delay values using the specified reduction method.
+ *
+ * @param delays A reference to a vector of delay values. This vector may be modified
+ *               (e.g., sorted) depending on the reducer used.
+ * @param reducer The reduction method to be applied.
+ *
+ * @return The reduced delay value. If the input vector is empty, the function
+ *         returns `IMPOSSIBLE_DELTA`.
+ *
+ * @throws VPR_FATAL_ERROR if the reducer is unrecognized.
+ */
+static float delay_reduce(std::vector<float>& delays, e_reducer reducer);
+
+/**
+ * @brief Adds a delay value to a 2D matrix of delay vectors.
+ *
+ * Updates the delay vector at position (`delta_x`, `delta_y`) in the matrix.
+ * If the element contains only `EMPTY_DELTA`, it is replaced with the new delay;
+ * otherwise, the delay is appended to the vector.
+ *
+ * @param matrix A 2D matrix of delay vectors.
+ * @param delta_x The x-index in the matrix.
+ * @param delta_y The y-index in the matrix.
+ * @param delay The delay value to add.
+ */
+static void add_delay_to_matrix(vtr::Matrix<std::vector<float>>& matrix,
+                                int delta_x,
+                                int delta_y,
+                                float delay);
+
+/**
+ * @brief Computes the average delay for a routing span.
+ *
+ * This function calculates the average placement delay for a routing span starting from a
+ * given layer and spanning a region defined by delta x and delta y. It iteratively searches
+ * for valid delay values within an expanding neighborhood  (starting from a distance of 1)
+ * around the specified delta offsets and layer, until valid  values are found or
+ * the maximum search distance (`max_distance`) is reached.
+ *
+ * @param matrix A 4D matrix of delay values indexed by `[from_layer][to_layer][delta_x][delta_y]`.
+ * @param from_layer The starting layer index of the routing span.
+ * @param to_tile_loc A structure holding the delta offsets (`x` and `y`) and the target layer index (`layer_num`).
+ * @param max_distance The maximum neighborhood distance to search for valid delay values.
+ *
+ * @return The average of valid delay values within the search range. If no valid delays
+ *         are found up to the maximum distance, the function returns `IMPOSSIBLE_DELTA`.
+ *
+ * @note The function performs a Manhattan-distance-based neighborhood search around the target location.
+ */
+static float find_neighboring_average(vtr::NdMatrix<float, 4>& matrix,
+                                      int from_layer,
+                                      t_physical_tile_loc to_tile_loc,
+                                      int max_distance);
+
+/***************************************************************************************/
+
+static vtr::NdMatrix<float, 4> compute_delta_delays(RouterDelayProfiler& route_profiler,
+                                                    const t_placer_opts& placer_opts,
+                                                    const t_router_opts& router_opts,
+                                                    bool measure_directconnect,
+                                                    size_t longest_length,
+                                                    bool is_flat) {
+
+
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& grid = device_ctx.grid;
+
+    const size_t num_layers = grid.get_num_layers();
+    const size_t device_width = grid.width();
+    const size_t device_height = grid.height();
+
+    /* To avoid edge effects we place the source at least 'longest_length' away
+     * from the device edge and route from there for all possible delta values < dimension
+     */
+
+    //   +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    //   +                 |                       |               +
+    //   +        A        |           B           |       C       +
+    //   +                 |                       |               +
+    //   +-----------------\-----------------------.---------------+
+    //   +                 |                       |               +
+    //   +                 |                       |               +
+    //   +                 |                       |               +
+    //   +                 |                       |               +
+    //   +        D        |           E           |       F       +
+    //   +                 |                       |               +
+    //   +                 |                       |               +
+    //   +                 |                       |               +
+    //   +                 |                       |               +
+    //   +-----------------*-----------------------/---------------+
+    //   +                 |                       |               +
+    //   +        G        |           H           |       I       +
+    //   +                 |                       |               +
+    //   +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    //
+    //   * = (low_x, low_y)
+    //   . = (high_x, high_y)
+    //   / = (high_x, low_y)
+    //   \ = (low_x, high_y)
+    //   + = device edge
+    const size_t mid_x = vtr::nint(device_width / 2);
+    const size_t mid_y = vtr::nint(device_height / 2);
+    const size_t low_x = std::min(longest_length, mid_x);
+    const size_t low_y = std::min(longest_length, mid_y);
+    const size_t high_x = (longest_length <= device_width)  ? std::max(device_width - longest_length, mid_x) : mid_x;
+    const size_t high_y = (longest_length <= device_height) ? std::max(device_width - longest_length, mid_y) : mid_y;
+
+    vtr::NdMatrix<float, 4> delta_delays({num_layers, num_layers, device_width, device_height});
+
+    std::set<std::string> allowed_types;
+    if (!placer_opts.allowed_tiles_for_delay_model.empty()) {
+        std::vector<std::string> allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ",");
+        allowed_types = std::set(allowed_types_vector.begin(), allowed_types_vector.end());
+    }
+
+    for (int from_layer_num = 0; from_layer_num < (int)num_layers; from_layer_num++) {
+        for (int to_layer_num = 0; to_layer_num < (int)num_layers; to_layer_num++) {
+            vtr::NdMatrix<std::vector<float>, 2> sampled_delta_delays({device_width, device_height});
+
+            // Find the lowest y location on the left edge with a non-empty block
+            int y = 0;
+            int x = 0;
+            t_physical_tile_type_ptr src_type = nullptr;
+            for (x = 0; x < (int)device_width; ++x) {
+                for (y = 0; y < (int)device_height; ++y) {
+                    t_physical_tile_type_ptr type = grid.get_physical_type({x, y, from_layer_num});
+
+                    if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
+                        // check if the tile type is among the allowed types
+                        if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) {
+                            continue;
+                        }
+                        src_type = type;
+                        break;
+                    }
+                }
+                if (src_type != nullptr) {
+                    break;
+                }
+            }
+            VTR_ASSERT(src_type != nullptr);
+
+            auto generic_compute_matrix = (placer_opts.place_delta_delay_matrix_calculation_method == e_place_delta_delay_algorithm::ASTAR_ROUTE) ? generic_compute_matrix_iterative_astar : generic_compute_matrix_dijkstra_expansion;
+
+#ifdef VERBOSE
+            VTR_LOG("Computing from lower left edge (%d,%d):\n", x, y);
+#endif
+            generic_compute_matrix(route_profiler, sampled_delta_delays,
+                                   from_layer_num, to_layer_num,
+                                   x, y,
+                                   x, y,
+                                   device_width - 1, device_height - 1,
+                                   router_opts,
+                                   measure_directconnect, allowed_types,
+                                   is_flat);
+
+            // Find the lowest x location on the bottom edge with a non-empty block
+            src_type = nullptr;
+            for (y = 0; y < (int)device_height; ++y) {
+                for (x = 0; x < (int)device_width; ++x) {
+                    t_physical_tile_type_ptr type = grid.get_physical_type({x, y, from_layer_num});
+
+                    if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
+                        // check if the tile type is among the allowed types
+                        if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) {
+                            continue;
+                        }
+                        src_type = type;
+                        break;
+                    }
+                }
+                if (src_type) {
+                    break;
+                }
+            }
+            VTR_ASSERT(src_type != nullptr);
+#ifdef VERBOSE
+            VTR_LOG("Computing from left bottom edge (%d,%d):\n", x, y);
+#endif
+            generic_compute_matrix(route_profiler, sampled_delta_delays,
+                                   from_layer_num, to_layer_num,
+                                   x, y,
+                                   x, y,
+                                   device_width - 1, device_height - 1,
+                                   router_opts,
+                                   measure_directconnect, allowed_types,
+                                   is_flat);
+
+            //Since the other delta delay values may have suffered from edge effects,
+            //we recalculate deltas within regions B, C, E, F
+#ifdef VERBOSE
+            VTR_LOG("Computing from low/low:\n");
+#endif
+            generic_compute_matrix(route_profiler, sampled_delta_delays,
+                                   from_layer_num, to_layer_num,
+                                   low_x, low_y,
+                                   low_x, low_y,
+                                   device_width - 1, device_height - 1,
+                                   router_opts,
+                                   measure_directconnect, allowed_types,
+                                   is_flat);
+
+            //Since the other delta delay values may have suffered from edge effects,
+            //we recalculate deltas within regions D, E, G, H
+#ifdef VERBOSE
+            VTR_LOG("Computing from high/high:\n");
+#endif
+            generic_compute_matrix(route_profiler, sampled_delta_delays,
+                                   from_layer_num, to_layer_num,
+                                   high_x, high_y,
+                                   0, 0,
+                                   high_x, high_y,
+                                   router_opts,
+                                   measure_directconnect, allowed_types,
+                                   is_flat);
+
+            //Since the other delta delay values may have suffered from edge effects,
+            //we recalculate deltas within regions A, B, D, E
+#ifdef VERBOSE
+            VTR_LOG("Computing from high/low:\n");
+#endif
+            generic_compute_matrix(route_profiler, sampled_delta_delays,
+                                   from_layer_num, to_layer_num,
+                                   high_x, low_y,
+                                   0, low_y,
+                                   high_x, device_height - 1,
+                                   router_opts,
+                                   measure_directconnect, allowed_types,
+                                   is_flat);
+
+            //Since the other delta delay values may have suffered from edge effects,
+            //we recalculate deltas within regions E, F, H, I
+#ifdef VERBOSE
+            VTR_LOG("Computing from low/high:\n");
+#endif
+            generic_compute_matrix(route_profiler, sampled_delta_delays,
+                                   from_layer_num, to_layer_num,
+                                   low_x, high_y,
+                                   low_x, 0,
+                                   device_width - 1, high_y,
+                                   router_opts,
+                                   measure_directconnect, allowed_types,
+                                   is_flat);
+            for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) {
+                for (size_t dy = 0; dy < sampled_delta_delays.dim_size(1); ++dy) {
+                    delta_delays[from_layer_num][to_layer_num][dx][dy] = delay_reduce(sampled_delta_delays[dx][dy], placer_opts.delay_model_reducer);
+                }
+            }
+        }
+    }
+
+    return delta_delays;
+}
+
+static void fix_empty_coordinates(vtr::NdMatrix<float, 4>& delta_delays) {
+    // Set any empty delta's to the average of its neighbours
+    //
+    // Empty coordinates may occur if the sampling location happens to not have
+    // a connection at that location. However, a more thorough sampling likely
+    // would return a result, so we fill in the empty holes with a small
+    // neighbour average.
+    constexpr int kMaxAverageDistance = 2;
+    for (int from_layer = 0; from_layer < (int)delta_delays.dim_size(0); ++from_layer) {
+        for (int to_layer = 0; to_layer < (int)delta_delays.dim_size(1); ++to_layer) {
+            for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) {
+                for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) {
+                    if (delta_delays[from_layer][to_layer][delta_x][delta_y] == EMPTY_DELTA) {
+                        delta_delays[from_layer][to_layer][delta_x][delta_y] =
+                            find_neighboring_average(delta_delays,
+                                                     from_layer,
+                                                     {delta_x, delta_y, to_layer},
+                                                     kMaxAverageDistance);
+                    }
+                }
+            }
+        }
+    }
+}
+
+static void fill_impossible_coordinates(vtr::NdMatrix<float, 4>& delta_delays) {
+    // Set any impossible delta's to the average of its neighbours
+    //
+    // Impossible coordinates may occur if an IPIN cannot be reached from the
+    // sampling OPIN.  This might occur if the IPIN or OPIN used for sampling
+    // is specialized, and therefore cannot be reached via the by the pins
+    // sampled.  Leaving this value in the delay matrix will result in invalid
+    // slacks if the delay matrix uses this value.
+    //
+    // A max average distance of 5 is used to provide increased effort in
+    // filling these gaps.  It is more important to have a poor predication,
+    // than an invalid value and causing a slack assertion.
+    constexpr int kMaxAverageDistance = 5;
+    for (int from_layer_num = 0; from_layer_num < (int)delta_delays.dim_size(0); ++from_layer_num) {
+        for (int to_layer_num = 0; to_layer_num < (int)delta_delays.dim_size(1); ++to_layer_num) {
+            for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) {
+                for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) {
+                    if (delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] == IMPOSSIBLE_DELTA) {
+                        delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] = find_neighboring_average(
+                            delta_delays, from_layer_num, {delta_x, delta_y, to_layer_num}, kMaxAverageDistance);
+                    }
+                }
+            }
+        }
+    }
+}
+
+static bool verify_delta_delays(const vtr::NdMatrix<float, 4>& delta_delays) {
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& grid = device_ctx.grid;
+
+    for (int from_layer_num = 0; from_layer_num < grid.get_num_layers(); ++from_layer_num) {
+        for (int to_layer_num = 0; to_layer_num < grid.get_num_layers(); ++to_layer_num) {
+            for (size_t x = 0; x < grid.width(); ++x) {
+                for (size_t y = 0; y < grid.height(); ++y) {
+                    float delta_delay = delta_delays[from_layer_num][to_layer_num][x][y];
+
+                    if (delta_delay < 0.) {
+                        VPR_ERROR(VPR_ERROR_PLACE,
+                                  "Found invalid negative delay %g for delta [%d,%d,%d,%d]",
+                                  delta_delay, from_layer_num, to_layer_num, x, y);
+                    }
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_profiler,
+                                                   vtr::Matrix<std::vector<float>>& matrix,
+                                                   int from_layer_num,
+                                                   int to_layer_num,
+                                                   int source_x,
+                                                   int source_y,
+                                                   int start_x,
+                                                   int start_y,
+                                                   int end_x,
+                                                   int end_y,
+                                                   const t_router_opts& router_opts,
+                                                   bool measure_directconnect,
+                                                   const std::set<std::string>& allowed_types,
+                                                   bool /*is_flat*/) {
+    const auto& device_ctx = g_vpr_ctx.device();
+
+    for (int sink_x = start_x; sink_x <= end_x; sink_x++) {
+        for (int sink_y = start_y; sink_y <= end_y; sink_y++) {
+            const int delta_x = abs(sink_x - source_x);
+            const int delta_y = abs(sink_y - source_y);
+
+            t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num});
+            t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num});
+
+            bool src_or_target_empty = (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE
+                                        || sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE);
+
+            bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end();
+
+            if (src_or_target_empty || !is_allowed_type) {
+                if (matrix[delta_x][delta_y].empty()) {
+                    // Only set empty target if we don't already have a valid delta delay
+                    matrix[delta_x][delta_y].push_back(EMPTY_DELTA);
+#ifdef VERBOSE
+                    VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n",
+                            "EMPTY",
+                            delta_x, delta_y,
+                            source_x, source_y,
+                            sink_x, sink_y);
+#endif
+                }
+            } else {
+                // Valid start/end
+                float delay = route_connection_delay(route_profiler,
+                                                     source_x,
+                                                     source_y,
+                                                     from_layer_num,
+                                                     sink_x,
+                                                     sink_y,
+                                                     to_layer_num,
+                                                     router_opts,
+                                                     measure_directconnect);
+
+#ifdef VERBOSE
+                VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n",
+                        delay,
+                        delta_x, delta_y,
+                        source_x, source_y,
+                        sink_x, sink_y);
+#endif
+                if (matrix[delta_x][delta_y].size() == 1 && matrix[delta_x][delta_y][0] == EMPTY_DELTA) {
+                    // Overwrite empty delta
+                    matrix[delta_x][delta_y][0] = delay;
+                } else {
+                    // Collect delta
+                    matrix[delta_x][delta_y].push_back(delay);
+                }
+            }
+        }
+    }
+}
+
+static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& /*route_profiler*/,
+                                                      vtr::Matrix<std::vector<float>>& matrix,
+                                                      int from_layer_num,
+                                                      int to_layer_num,
+                                                      int source_x,
+                                                      int source_y,
+                                                      int start_x,
+                                                      int start_y,
+                                                      int end_x,
+                                                      int end_y,
+                                                      const t_router_opts& router_opts,
+                                                      bool measure_directconnect,
+                                                      const std::set<std::string>& allowed_types,
+                                                      bool is_flat) {
+    const auto& device_ctx = g_vpr_ctx.device();
+
+    t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num});
+    bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end();
+    if (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE || !is_allowed_type) {
+        for (int sink_x = start_x; sink_x <= end_x; sink_x++) {
+            for (int sink_y = start_y; sink_y <= end_y; sink_y++) {
+                int delta_x = abs(sink_x - source_x);
+                int delta_y = abs(sink_y - source_y);
+
+                if (matrix[delta_x][delta_y].empty()) {
+                    //Only set empty target if we don't already have a valid delta delay
+                    matrix[delta_x][delta_y].push_back(EMPTY_DELTA);
+#ifdef VERBOSE
+                    VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n",
+                            "EMPTY",
+                            delta_x, delta_y,
+                            source_x, source_y,
+                            sink_x, sink_y);
+#endif
+                }
+            }
+        }
+
+        return;
+    }
+
+    vtr::Matrix<bool> found_matrix({matrix.dim_size(0), matrix.dim_size(1)}, false);
+
+    auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}));
+    for (int driver_ptc : best_driver_ptcs) {
+        VTR_ASSERT(driver_ptc != OPEN);
+        RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(from_layer_num, source_x, source_y, SOURCE, driver_ptc);
+
+        VTR_ASSERT(source_rr_node != RRNodeId::INVALID());
+        auto delays = calculate_all_path_delays_from_rr_node(source_rr_node, router_opts, is_flat);
+
+        bool path_to_all_sinks = true;
+        for (int sink_x = start_x; sink_x <= end_x; sink_x++) {
+            for (int sink_y = start_y; sink_y <= end_y; sink_y++) {
+                int delta_x = abs(sink_x - source_x);
+                int delta_y = abs(sink_y - source_y);
+
+                if (found_matrix[delta_x][delta_y]) {
+                    continue;
+                }
+
+                t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num});
+                if (sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
+                    if (matrix[delta_x][delta_y].empty()) {
+                        // Only set empty target if we don't already have a valid delta delay
+                        matrix[delta_x][delta_y].push_back(EMPTY_DELTA);
+#ifdef VERBOSE
+                        VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n",
+                                "EMPTY",
+                                delta_x, delta_y,
+                                source_x, source_y,
+                                sink_x, sink_y);
+#endif
+                        found_matrix[delta_x][delta_y] = true;
+                    }
+                } else {
+                    bool found_a_sink = false;
+                    auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}));
+                    for (int sink_ptc : best_sink_ptcs) {
+                        VTR_ASSERT(sink_ptc != OPEN);
+                        RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(to_layer_num, sink_x, sink_y, SINK, sink_ptc);
+
+                        if (sink_rr_node == RRNodeId::INVALID())
+                            continue;
+
+                        if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) {
+                            // Skip if we shouldn't measure direct connects and a direct connect exists
+                            continue;
+                        }
+
+                        if (std::isnan(delays[sink_rr_node])) {
+                            // This sink was not found
+                            continue;
+                        }
+
+#ifdef VERBOSE
+                        VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n",
+                                delays[size_t(sink_rr_node)],
+                                delta_x, delta_y,
+                                source_x, source_y,
+                                sink_x, sink_y);
+#endif
+                        found_matrix[delta_x][delta_y] = true;
+
+                        add_delay_to_matrix(matrix, delta_x, delta_y, delays[sink_rr_node]);
+
+                        found_a_sink = true;
+                        break;
+                    }
+
+                    if (!found_a_sink) {
+                        path_to_all_sinks = false;
+                    }
+                }
+            }
+        }
+
+        if (path_to_all_sinks) {
+            break;
+        }
+    }
+
+    for (int sink_x = start_x; sink_x <= end_x; sink_x++) {
+        for (int sink_y = start_y; sink_y <= end_y; sink_y++) {
+            int delta_x = abs(sink_x - source_x);
+            int delta_y = abs(sink_y - source_y);
+            if (!found_matrix[delta_x][delta_y]) {
+                add_delay_to_matrix(matrix, delta_x, delta_y, IMPOSSIBLE_DELTA);
+                VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n",
+                             source_x,
+                             source_y,
+                             from_layer_num,
+                             sink_x,
+                             sink_y,
+                             to_layer_num,
+                             IMPOSSIBLE_DELTA);
+            }
+        }
+    }
+}
+
+static float route_connection_delay(RouterDelayProfiler& route_profiler,
+                                    int source_x,
+                                    int source_y,
+                                    int source_layer,
+                                    int sink_x,
+                                    int sink_y,
+                                    int sink_layer,
+                                    const t_router_opts& router_opts,
+                                    bool measure_directconnect) {
+    //Routes between the source and sink locations and calculates the delay
+
+    // set to known value for debug purposes
+    float net_delay_value = IMPOSSIBLE_DELTA;
+
+    const auto& device_ctx = g_vpr_ctx.device();
+
+    bool successfully_routed = false;
+
+    // Get the rr nodes to route between
+    auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, source_layer}));
+    auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, sink_layer}));
+
+    for (int driver_ptc : best_driver_ptcs) {
+        VTR_ASSERT(driver_ptc != OPEN);
+        RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(source_layer, source_x, source_y, SOURCE, driver_ptc);
+
+        VTR_ASSERT(source_rr_node != RRNodeId::INVALID());
+
+        for (int sink_ptc : best_sink_ptcs) {
+            VTR_ASSERT(sink_ptc != OPEN);
+            RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(sink_layer, sink_x, sink_y, SINK, sink_ptc);
+
+            if (sink_rr_node == RRNodeId::INVALID())
+                continue;
+
+            if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) {
+                // Skip if we shouldn't measure direct connects and a direct connect exists
+                continue;
+            }
+
+            successfully_routed = route_profiler.calculate_delay(source_rr_node,
+                                                                 sink_rr_node,
+                                                                 router_opts,
+                                                                 &net_delay_value);
+
+            if (successfully_routed) break;
+        }
+        if (successfully_routed) break;
+    }
+
+    if (!successfully_routed) {
+        VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n",
+                     source_x, source_y, source_layer, sink_x, sink_y, sink_layer, net_delay_value);
+    }
+
+    return net_delay_value;
+}
+
+static float delay_reduce(std::vector<float>& delays, e_reducer reducer) {
+    if (delays.empty()) {
+        return IMPOSSIBLE_DELTA;
+    }
+
+    if (delays.size() == 1) {
+        return delays[0];
+    }
+
+    VTR_ASSERT(delays.size() > 1);
+
+    float delay;
+
+    if (reducer == e_reducer::MIN) {
+        auto itr = std::min_element(delays.begin(), delays.end());
+        delay = *itr;
+    } else if (reducer == e_reducer::MAX) {
+        auto itr = std::max_element(delays.begin(), delays.end());
+        delay = *itr;
+    } else if (reducer == e_reducer::MEDIAN) {
+        std::stable_sort(delays.begin(), delays.end());
+        delay = vtr::median(delays.begin(), delays.end());
+    } else if (reducer == e_reducer::ARITHMEAN) {
+        delay = vtr::arithmean(delays.begin(), delays.end());
+    } else if (reducer == e_reducer::GEOMEAN) {
+        delay = vtr::geomean(delays.begin(), delays.end());
+    } else {
+        VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unrecognized delta delay reducer");
+    }
+
+    return delay;
+}
+
+static void add_delay_to_matrix(vtr::Matrix<std::vector<float>>& matrix,
+                                int delta_x,
+                                int delta_y,
+                                float delay) {
+    if (matrix[delta_x][delta_y].size() == 1 && matrix[delta_x][delta_y][0] == EMPTY_DELTA) {
+        // Overwrite empty delta
+        matrix[delta_x][delta_y][0] = delay;
+    } else {
+        // Collect delta
+        matrix[delta_x][delta_y].push_back(delay);
+    }
+}
+
+static float find_neighboring_average(vtr::NdMatrix<float, 4>& matrix,
+                                      int from_layer,
+                                      t_physical_tile_loc to_tile_loc,
+                                      int max_distance) {
+    float sum = 0.f;
+    int num_samples = 0;
+    const int endx = matrix.end_index(2);
+    const int endy = matrix.end_index(3);
+
+    const int x = to_tile_loc.x;
+    const int y = to_tile_loc.y;
+    const int to_layer = to_tile_loc.layer_num;
+
+    for (int distance = 1; distance <= max_distance; ++distance) {
+        for (int delx = x - distance; delx <= x + distance; delx++) {
+            for (int dely = y - distance; dely <= y + distance; dely++) {
+                // Check distance constraint
+                if (abs(delx - x) + abs(dely - y) > distance) {
+                    continue;
+                }
+
+                //check out of bounds
+                if (delx < 0 || dely < 0 || delx >= endx || dely >= endy || (delx == x && dely == y)) {
+                    continue;
+                }
+
+                if (matrix[from_layer][to_layer][delx][dely] == EMPTY_DELTA || matrix[from_layer][to_layer][delx][dely] == IMPOSSIBLE_DELTA) {
+                    continue;
+                }
+
+                sum += matrix[from_layer][to_layer][delx][dely];
+                num_samples++;
+            }
+        }
+
+        if (num_samples != 0) {
+            return sum / (float)num_samples;
+        }
+    }
+
+    return IMPOSSIBLE_DELTA;
+}
+
+/***************************************************************************************/
+
+vtr::NdMatrix<float, 4> compute_delta_delay_model(RouterDelayProfiler& route_profiler,
+                                                  const t_placer_opts& placer_opts,
+                                                  const t_router_opts& router_opts,
+                                                  bool measure_directconnect,
+                                                  int longest_length,
+                                                  bool is_flat) {
+    vtr::ScopedStartFinishTimer timer("Computing delta delays");
+    vtr::NdMatrix<float, 4> delta_delays = compute_delta_delays(route_profiler,
+                                                                placer_opts,
+                                                                router_opts,
+                                                                measure_directconnect,
+                                                                longest_length,
+                                                                is_flat);
+
+    const size_t num_elements = delta_delays.size();
+
+    // set uninitialized elements to infinity
+    for (size_t i = 0; i < num_elements; i++) {
+        if (delta_delays.get(i) == UNINITIALIZED_DELTA) {
+            delta_delays.get(i) = IMPOSSIBLE_DELTA;
+        }
+    }
+
+    fix_empty_coordinates(delta_delays);
+
+    fill_impossible_coordinates(delta_delays);
+
+    verify_delta_delays(delta_delays);
+
+    return delta_delays;
+}
+
+//Finds a src_rr and sink_rr appropriate for measuring the delay of the current direct specification
+bool find_direct_connect_sample_locations(const t_direct_inf* direct,
+                                          t_physical_tile_type_ptr from_type,
+                                          int from_pin,
+                                          int from_pin_class,
+                                          t_physical_tile_type_ptr to_type,
+                                          int to_pin,
+                                          int to_pin_class,
+                                          RRNodeId& out_src_node,
+                                          RRNodeId& out_sink_node) {
+    VTR_ASSERT(from_type != nullptr);
+    VTR_ASSERT(to_type != nullptr);
+
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& grid = device_ctx.grid;
+    const auto& node_lookup = device_ctx.rr_graph.node_lookup();
+
+    //Search the grid for an instance of from/to blocks which satisfy this direct connect offsets,
+    //and which has the appropriate pins
+    int from_x = -1;
+    int from_y = -1;
+    int from_sub_tile = -1;
+    int to_x = 0, to_y = 0, to_sub_tile = 0;
+    bool found = false;
+    int found_layer_num = -1;
+    //TODO: Function *FOR NOW* assumes that from/to blocks are at same die and have a same layer nums
+    for (int layer_num = 0; layer_num < grid.get_num_layers() && !found; ++layer_num) {
+        for (int x = 0; x < (int)grid.width() && !found; ++x) {
+            to_x = x + direct->x_offset;
+            if (to_x < 0 || to_x >= (int)grid.width()) continue;
+
+            for (int y = 0; y < (int)grid.height() && !found; ++y) {
+                if (grid.get_physical_type({x, y, layer_num}) != from_type) continue;
+
+                //Check that the from pin exists at this from location
+                //(with multi-width/height blocks pins may not exist at all locations)
+                bool from_pin_found = false;
+                if (direct->from_side != NUM_2D_SIDES) {
+                    RRNodeId from_pin_rr = node_lookup.find_node(layer_num, x, y, OPIN, from_pin, direct->from_side);
+                    from_pin_found = from_pin_rr.is_valid();
+                } else {
+                    from_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, x, y, OPIN, from_pin).empty());
+                }
+                if (!from_pin_found) continue;
+
+                to_y = y + direct->y_offset;
+
+                if (to_y < 0 || to_y >= (int)grid.height()) continue;
+                if (grid.get_physical_type({to_x, to_y, layer_num}) != to_type) continue;
+
+                //Check that the from pin exists at this from location
+                //(with multi-width/height blocks pins may not exist at all locations)
+                bool to_pin_found = false;
+                if (direct->to_side != NUM_2D_SIDES) {
+                    RRNodeId to_pin_rr = node_lookup.find_node(layer_num, to_x, to_y, IPIN, to_pin, direct->to_side);
+                    to_pin_found = (to_pin_rr != RRNodeId::INVALID());
+                } else {
+                    to_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, to_x, to_y, IPIN, to_pin).empty());
+                }
+                if (!to_pin_found) continue;
+
+                for (int sub_tile_num = 0; sub_tile_num < from_type->capacity; ++sub_tile_num) {
+                    to_sub_tile = sub_tile_num + direct->sub_tile_offset;
+
+                    if (to_sub_tile < 0 || to_sub_tile >= to_type->capacity) continue;
+
+                    found = true;
+                    found_layer_num = layer_num;
+                    from_x = x;
+                    from_y = y;
+                    from_sub_tile = sub_tile_num;
+
+                    break;
+                }
+            }
+        }
+    }
+
+    if (!found) {
+        return false;
+    }
+
+    //Now have a legal instance of this direct connect
+    VTR_ASSERT(grid.get_physical_type({from_x, from_y, found_layer_num}) == from_type);
+    VTR_ASSERT(from_sub_tile < from_type->capacity);
+
+    VTR_ASSERT(grid.get_physical_type({to_x, to_y, found_layer_num}) == to_type);
+    VTR_ASSERT(to_sub_tile < to_type->capacity);
+
+    VTR_ASSERT(from_x + direct->x_offset == to_x);
+    VTR_ASSERT(from_y + direct->y_offset == to_y);
+    VTR_ASSERT(from_sub_tile + direct->sub_tile_offset == to_sub_tile);
+
+    // Find a source/sink RR node associated with the pins of the direct
+    {
+        RRNodeId src_rr_candidate = node_lookup.find_node(found_layer_num, from_x, from_y, SOURCE, from_pin_class);
+        VTR_ASSERT(src_rr_candidate);
+        out_src_node = src_rr_candidate;
+    }
+
+    {
+        RRNodeId sink_rr_candidate = node_lookup.find_node(found_layer_num, to_x, to_y, SINK, to_pin_class);
+        VTR_ASSERT(sink_rr_candidate);
+        out_sink_node = sink_rr_candidate;
+    }
+
+    return true;
+}
+
+std::vector<int> get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type) {
+    std::vector<int> best_classes;
+
+    //Record any non-zero Fc pins
+    //
+    //Note that we track non-zero Fc pins, since certain Fc overrides
+    //may apply to only a subset of wire types. This ensures we record
+    //which pins can potentially connect to global routing.
+    std::unordered_set<int> non_zero_fc_pins;
+    for (const t_fc_specification& fc_spec : type->fc_specs) {
+        if (fc_spec.fc_value == 0) continue;
+
+        non_zero_fc_pins.insert(fc_spec.pins.begin(), fc_spec.pins.end());
+    }
+
+    // Collect all classes of matching type which connect to general routing
+    for (int i = 0; i < (int)type->class_inf.size(); i++) {
+        if (type->class_inf[i].type == pintype) {
+            //Check whether all pins in this class are ignored or have zero fc
+            bool any_pins_connect_to_general_routing = false;
+            for (int ipin = 0; ipin < type->class_inf[i].num_pins; ++ipin) {
+                int pin = type->class_inf[i].pinlist[ipin];
+                //If the pin isn't ignored, and has a non-zero Fc to some general
+                //routing the class is suitable for delay profiling
+                if (!type->is_ignored_pin[pin] && non_zero_fc_pins.count(pin)) {
+                    any_pins_connect_to_general_routing = true;
+                    break;
+                }
+            }
+
+            // Skip if the pin class doesn't connect to general routing
+            if (!any_pins_connect_to_general_routing) continue;
+
+            // Record candidate class
+            best_classes.push_back(i);
+        }
+    }
+
+    // Sort classes so the largest pin class is first
+    auto cmp_class = [&](int lhs, int rhs) {
+        return type->class_inf[lhs].num_pins > type->class_inf[rhs].num_pins;
+    };
+
+    std::stable_sort(best_classes.begin(), best_classes.end(), cmp_class);
+
+    return best_classes;
+}
\ No newline at end of file
diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.h b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.h
new file mode 100644
index 00000000000..71ac632b149
--- /dev/null
+++ b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.h
@@ -0,0 +1,56 @@
+
+#pragma once
+
+#include "vtr_ndmatrix.h"
+#include "physical_types.h"
+#include "rr_graph_fwd.h"
+
+struct t_placer_opts;
+struct t_router_opts;
+class RouterDelayProfiler;
+
+vtr::NdMatrix<float, 4> compute_delta_delay_model(RouterDelayProfiler& route_profiler,
+                                                  const t_placer_opts& placer_opts,
+                                                  const t_router_opts& router_opts,
+                                                  bool measure_directconnect,
+                                                  int longest_length,
+                                                  bool is_flat);
+
+bool find_direct_connect_sample_locations(const t_direct_inf* direct,
+                                          t_physical_tile_type_ptr from_type,
+                                          int from_pin,
+                                          int from_pin_class,
+                                          t_physical_tile_type_ptr to_type,
+                                          int to_pin,
+                                          int to_pin_class,
+                                          RRNodeId& out_src_node,
+                                          RRNodeId& out_sink_node);
+
+/**
+ * @brief Identifies the best pin classes for delay calculation based on pin count and connectivity.
+ *
+ * This function selects pin classes of a specified type (`pintype`) from a physical tile type (`type`)
+ * that are suitable for delay calculations. It prioritizes pin classes with the largest number of pins
+ * that connect to general routing, ensuring commonly used pins are chosen for delay profiling.
+ *
+ * @param pintype The type of pins to filter.
+ * @param type Pointer to the physical tile type containing pin and class information.
+ *
+ * @return A vector of indices representing the selected pin classes. The classes are sorted
+ *         in descending order based on the number of pins they contain.
+ *
+ * @details
+ * - A pin class is eligible if its type matches `pintype` and it contains at least one pin
+ *   that connects to general routing (non-zero Fc).
+ * - Non-zero Fc pins are determined by inspecting the tile's `fc_specs`.
+ * - Classes are sorted so that the class with the largest number of pins appears first.
+ *   If multiple classes have the same pin count, their order depends on their initial appearance
+ *   in the architecture file.
+ *
+ * @note
+ * - Pins explicitly marked as ignored in `type->is_ignored_pin` are excluded.
+ * - The function ensures stability in sorting, preserving the input order for classes
+ *   with the same number of pins.
+ */
+
+std::vector<int> get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type);
\ No newline at end of file
diff --git a/vpr/src/place/timing/delay_model/delta_delay_model.cpp b/vpr/src/place/timing/delay_model/delta_delay_model.cpp
new file mode 100644
index 00000000000..f4e202e7106
--- /dev/null
+++ b/vpr/src/place/timing/delay_model/delta_delay_model.cpp
@@ -0,0 +1,48 @@
+
+#include "delta_delay_model.h"
+
+#include "compute_delta_delays_utils.h"
+
+void DeltaDelayModel::compute(RouterDelayProfiler& route_profiler,
+                              const t_placer_opts& placer_opts,
+                              const t_router_opts& router_opts,
+                              int longest_length) {
+    delays_ = compute_delta_delay_model(route_profiler,
+                                        placer_opts,
+                                        router_opts,
+                                        /*measure_directconnect=*/true,
+                                        longest_length,
+                                        is_flat_);
+}
+
+float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/,
+                             const t_physical_tile_loc& to_loc, int /*to_pin*/) const {
+    int delta_x = std::abs(from_loc.x - to_loc.x);
+    int delta_y = std::abs(from_loc.y - to_loc.y);
+
+    return delays_[from_loc.layer_num][to_loc.layer_num][delta_x][delta_y];
+}
+
+void DeltaDelayModel::dump_echo(std::string filepath) const {
+    FILE* f = vtr::fopen(filepath.c_str(), "w");
+    fprintf(f, "         ");
+    for (size_t from_layer_num = 0; from_layer_num < delays_.dim_size(0); ++from_layer_num) {
+        for (size_t to_layer_num = 0; to_layer_num < delays_.dim_size(1); ++to_layer_num) {
+            fprintf(f, " %9zu", from_layer_num);
+            fprintf(f, "\n");
+            for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) {
+                fprintf(f, " %9zu", dx);
+            }
+            fprintf(f, "\n");
+            for (size_t dy = 0; dy < delays_.dim_size(3); ++dy) {
+                fprintf(f, "%9zu", dy);
+                for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) {
+                    fprintf(f, " %9.2e", delays_[from_layer_num][to_layer_num][dx][dy]);
+                }
+                fprintf(f, "\n");
+            }
+        }
+    }
+    vtr::fclose(f);
+}
+
diff --git a/vpr/src/place/timing/delay_model/delta_delay_model.h b/vpr/src/place/timing/delay_model/delta_delay_model.h
new file mode 100644
index 00000000000..c3ae0d83cf7
--- /dev/null
+++ b/vpr/src/place/timing/delay_model/delta_delay_model.h
@@ -0,0 +1,47 @@
+
+#pragma once
+
+#include "place_delay_model.h"
+
+/**
+ * @class DeltaDelayModel
+ *
+ * @brief A simple delay model based on the distance (delta) between block locations.
+ */
+class DeltaDelayModel : public PlaceDelayModel {
+  public:
+    DeltaDelayModel(float min_cross_layer_delay,
+                    bool is_flat)
+        : cross_layer_delay_(min_cross_layer_delay)
+        , is_flat_(is_flat) {}
+
+    DeltaDelayModel(float min_cross_layer_delay,
+                    vtr::NdMatrix<float, 4> delta_delays,
+                    bool is_flat)
+        : delays_(std::move(delta_delays))
+        , cross_layer_delay_(min_cross_layer_delay)
+        , is_flat_(is_flat) {}
+
+    void compute(RouterDelayProfiler& router,
+                 const t_placer_opts& placer_opts,
+                 const t_router_opts& router_opts,
+                 int longest_length) override;
+
+    float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override;
+
+    void dump_echo(std::string filepath) const override;
+
+    void read(const std::string& file) override;
+    void write(const std::string& file) const override;
+
+    const vtr::NdMatrix<float, 4>& delays() const {
+        return delays_;
+    }
+
+  private:
+    vtr::NdMatrix<float, 4> delays_; // [0..num_layers-1][0..max_dx][0..max_dy]
+    float cross_layer_delay_;
+
+    /// Indicates whether the router is a two-stage or run-flat
+    bool is_flat_;
+};
\ No newline at end of file
diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/timing/delay_model/override_delay_model.cpp
similarity index 58%
rename from vpr/src/place/place_delay_model.cpp
rename to vpr/src/place/timing/delay_model/override_delay_model.cpp
index 4f626a5817f..d496a43b5e7 100644
--- a/vpr/src/place/place_delay_model.cpp
+++ b/vpr/src/place/timing/delay_model/override_delay_model.cpp
@@ -1,21 +1,7 @@
-/**
- * @file place_delay_model.cpp
- * @brief This file implements all the class methods and individual
- *        routines related to the placer delay model.
- */
 
-#include <queue>
-#include "place_delay_model.h"
-#include "globals.h"
-#include "router_lookahead_map.h"
-#include "rr_graph2.h"
+#include "override_delay_model.h"
 
-#include "timing_place_lookup.h"
-#include "placer_state.h"
-
-#include "vtr_log.h"
-#include "vtr_math.h"
-#include "vpr_error.h"
+#include "compute_delta_delays_utils.h"
 
 #ifdef VTR_ENABLE_CAPNPROTO
 #    include "capnp/serialize.h"
@@ -23,48 +9,109 @@
 #    include "ndmatrix_serdes.h"
 #    include "mmap_file.h"
 #    include "serdes_utils.h"
-#endif /* VTR_ENABLE_CAPNPROTO */
-
-///@brief DeltaDelayModel methods.
-float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const {
-    int delta_x = std::abs(from_loc.x - to_loc.x);
-    int delta_y = std::abs(from_loc.y - to_loc.y);
-
-    return delays_[from_loc.layer_num][to_loc.layer_num][delta_x][delta_y];
+#endif  // VTR_ENABLE_CAPNPROTO
+
+void OverrideDelayModel::compute(RouterDelayProfiler& route_profiler,
+                                 const t_placer_opts& placer_opts,
+                                 const t_router_opts& router_opts,
+                                 int longest_length) {
+    auto delays = compute_delta_delay_model(route_profiler,
+                                            placer_opts,
+                                            router_opts,
+                                            /*measure_directconnect=*/false,
+                                            longest_length,
+                                            is_flat_);
+
+    base_delay_model_ = std::make_unique<DeltaDelayModel>(cross_layer_delay_, delays, false);
+
+    compute_override_delay_model_(route_profiler, router_opts);
 }
 
-void DeltaDelayModel::dump_echo(std::string filepath) const {
-    FILE* f = vtr::fopen(filepath.c_str(), "w");
-    fprintf(f, "         ");
-    for (size_t from_layer_num = 0; from_layer_num < delays_.dim_size(0); ++from_layer_num) {
-        for (size_t to_layer_num = 0; to_layer_num < delays_.dim_size(1); ++to_layer_num) {
-            fprintf(f, " %9zu", from_layer_num);
-            fprintf(f, "\n");
-            for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) {
-                fprintf(f, " %9zu", dx);
+void OverrideDelayModel::compute_override_delay_model_(RouterDelayProfiler& route_profiler,
+                                                       const t_router_opts& router_opts) {
+    const auto& device_ctx = g_vpr_ctx.device();
+    t_router_opts router_opts2 = router_opts;
+    router_opts2.astar_fac = 0.f;
+    router_opts2.astar_offset = 0.f;
+
+    // Look at all the direct connections that exist, and add overrides to delay model
+    for (int idirect = 0; idirect < (int)device_ctx.arch->directs.size(); ++idirect) {
+        const t_direct_inf* direct = &device_ctx.arch->directs[idirect];
+
+        InstPort from_port = parse_inst_port(direct->from_pin);
+        InstPort to_port = parse_inst_port(direct->to_pin);
+
+        t_physical_tile_type_ptr from_type = find_tile_type_by_name(from_port.instance_name(), device_ctx.physical_tile_types);
+        t_physical_tile_type_ptr to_type = find_tile_type_by_name(to_port.instance_name(), device_ctx.physical_tile_types);
+
+        int num_conns = from_port.port_high_index() - from_port.port_low_index() + 1;
+        VTR_ASSERT_MSG(num_conns == to_port.port_high_index() - to_port.port_low_index() + 1, "Directs must have the same size to/from");
+
+        //We now walk through all the connections associated with the current direct specification, measure
+        //their delay and specify that value as an override in the delay model.
+        //
+        //Note that we need to check every connection in the direct to cover the case where the pins are not
+        //equivalent.
+        //
+        //However, if the from/to ports are equivalent we could end up sampling the same RR SOURCE/SINK
+        //paths multiple times (wasting CPU time) -- we avoid this by recording the sampled paths in
+        //sampled_rr_pairs and skipping them if they occur multiple times.
+        int missing_instances = 0;
+        int missing_paths = 0;
+        std::set<std::pair<RRNodeId, RRNodeId>> sampled_rr_pairs;
+        for (int iconn = 0; iconn < num_conns; ++iconn) {
+            //Find the associated pins
+            int from_pin = from_type->find_pin(from_port.port_name(), from_port.port_low_index() + iconn);
+            int to_pin = to_type->find_pin(to_port.port_name(), to_port.port_low_index() + iconn);
+
+            VTR_ASSERT(from_pin != OPEN);
+            VTR_ASSERT(to_pin != OPEN);
+
+            int from_pin_class = from_type->find_pin_class(from_port.port_name(), from_port.port_low_index() + iconn, DRIVER);
+            VTR_ASSERT(from_pin_class != OPEN);
+
+            int to_pin_class = to_type->find_pin_class(to_port.port_name(), to_port.port_low_index() + iconn, RECEIVER);
+            VTR_ASSERT(to_pin_class != OPEN);
+
+            bool found_sample_points;
+            RRNodeId src_rr, sink_rr;
+            found_sample_points = find_direct_connect_sample_locations(direct, from_type, from_pin, from_pin_class, to_type, to_pin, to_pin_class, src_rr, sink_rr);
+
+            if (!found_sample_points) {
+                ++missing_instances;
+                continue;
             }
-            fprintf(f, "\n");
-            for (size_t dy = 0; dy < delays_.dim_size(3); ++dy) {
-                fprintf(f, "%9zu", dy);
-                for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) {
-                    fprintf(f, " %9.2e", delays_[from_layer_num][to_layer_num][dx][dy]);
-                }
-                fprintf(f, "\n");
+
+            //If some of the source/sink ports are logically equivalent we may have already
+            //sampled the associated source/sink pair and don't need to do so again
+            if (sampled_rr_pairs.count({src_rr, sink_rr})) continue;
+
+            float direct_connect_delay = std::numeric_limits<float>::quiet_NaN();
+            bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay);
+
+            if (found_routing_path) {
+                set_delay_override(from_type->index, from_pin_class, to_type->index, to_pin_class, direct->x_offset, direct->y_offset, direct_connect_delay);
+            } else {
+                ++missing_paths;
             }
+
+            //Record that we've sampled this pair of source and sink nodes
+            sampled_rr_pairs.insert({src_rr, sink_rr});
         }
+
+        VTR_LOGV_WARN(missing_instances > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no instances of this direct found)\n", missing_instances, direct->name.c_str());
+        VTR_LOGV_WARN(missing_paths > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no routing path found)\n", missing_paths, direct->name.c_str());
     }
-    vtr::fclose(f);
 }
 
 const DeltaDelayModel* OverrideDelayModel::base_delay_model() const {
     return base_delay_model_.get();
 }
 
-///@brief OverrideDelayModel methods.
 float OverrideDelayModel::delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const {
-    //First check to if there is an override delay value
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& grid = device_ctx.grid;
+    // First check to if there is an override delay value
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& grid = device_ctx.grid;
 
     t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type(from_loc);
     t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type(to_loc);
@@ -152,14 +199,6 @@ void OverrideDelayModel::set_base_delay_model(std::unique_ptr<DeltaDelayModel> b
     base_delay_model_ = std::move(base_delay_model_obj);
 }
 
-float SimpleDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const {
-    int delta_x = std::abs(from_loc.x - to_loc.x);
-    int delta_y = std::abs(from_loc.y - to_loc.y);
-
-    int from_tile_idx = g_vpr_ctx.device().grid.get_physical_type(from_loc)->index;
-    return delays_[from_tile_idx][from_loc.layer_num][to_loc.layer_num][delta_x][delta_y];
-}
-
 /**
  * When writing capnp targetted serialization, always allow compilation when
  * VTR_ENABLE_CAPNPROTO=OFF. Generally this means throwing an exception instead.
@@ -315,89 +354,4 @@ void OverrideDelayModel::write(const std::string& file) const {
     writeMessageToFile(file, &builder);
 }
 
-#endif
-
-///@brief Initialize the placer delay model.
-std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
-                                                               t_chan_width_dist chan_width_dist,
-                                                               const t_placer_opts& placer_opts,
-                                                               const t_router_opts& router_opts,
-                                                               t_det_routing_arch* det_routing_arch,
-                                                               std::vector<t_segment_inf>& segment_inf,
-                                                               const std::vector<t_direct_inf>& directs,
-                                                               bool is_flat) {
-    return compute_place_delay_model(placer_opts,
-                                     router_opts,
-                                     net_list,
-                                     det_routing_arch,
-                                     segment_inf,
-                                     chan_width_dist,
-                                     directs,
-                                     is_flat);
-}
-
-/**
- * @brief Returns the delay of one point to point connection.
- *
- * Only estimate delay for signals routed through the inter-block routing network.
- * TODO: Do how should we compute the delay for globals. "Global signals are assumed to have zero delay."
- */
-float comp_td_single_connection_delay(const PlaceDelayModel* delay_model,
-                                      const vtr::vector_map<ClusterBlockId, t_block_loc>& block_locs,
-                                      ClusterNetId net_id,
-                                      int ipin) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    float delay_source_to_sink = 0.;
-
-    if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
-        ClusterPinId source_pin = cluster_ctx.clb_nlist.net_driver(net_id);
-        ClusterPinId sink_pin = cluster_ctx.clb_nlist.net_pin(net_id, ipin);
-
-        ClusterBlockId source_block = cluster_ctx.clb_nlist.pin_block(source_pin);
-        ClusterBlockId sink_block = cluster_ctx.clb_nlist.pin_block(sink_pin);
-
-        int source_block_ipin = cluster_ctx.clb_nlist.pin_logical_index(source_pin);
-        int sink_block_ipin = cluster_ctx.clb_nlist.pin_logical_index(sink_pin);
-
-        t_pl_loc source_block_loc = block_locs[source_block].loc;
-        t_pl_loc sink_block_loc = block_locs[sink_block].loc;
-
-        /**
-         * This heuristic only considers delta_x and delta_y, a much better
-         * heuristic would be to to create a more comprehensive lookup table.
-         *
-         * In particular this approach does not accurately capture the effect
-         * of fast carry-chain connections.
-         */
-        delay_source_to_sink = delay_model->delay({source_block_loc.x, source_block_loc.y, source_block_loc.layer}, source_block_ipin,
-                                                  {sink_block_loc.x, sink_block_loc.y, sink_block_loc.layer}, sink_block_ipin);
-        if (delay_source_to_sink < 0) {
-            VPR_ERROR(VPR_ERROR_PLACE,
-                      "in comp_td_single_connection_delay: Bad delay_source_to_sink value %g from %s (at %d,%d,%d) to %s (at %d,%d,%d)\n"
-                      "in comp_td_single_connection_delay: Delay is less than 0\n",
-                      block_type_pin_index_to_name(physical_tile_type(source_block_loc), source_block_ipin, false).c_str(),
-                      source_block_loc.x, source_block_loc.y, source_block_loc.layer,
-                      block_type_pin_index_to_name(physical_tile_type(sink_block_loc), sink_block_ipin, false).c_str(),
-                      sink_block_loc.x, sink_block_loc.y, sink_block_loc.layer,
-                      delay_source_to_sink);
-        }
-    }
-
-    return (delay_source_to_sink);
-}
-
-///@brief Recompute all point to point delays, updating `connection_delay` matrix.
-void comp_td_connection_delays(const PlaceDelayModel* delay_model,
-                               PlacerState& placer_state) {
-    const auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& p_timing_ctx = placer_state.mutable_timing();
-    auto& block_locs = placer_state.block_locs();
-    auto& connection_delay = p_timing_ctx.connection_delay;
-
-    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
-        for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ++ipin) {
-            connection_delay[net_id][ipin] = comp_td_single_connection_delay(delay_model, block_locs, net_id, ipin);
-        }
-    }
-}
+#endif
\ No newline at end of file
diff --git a/vpr/src/place/timing/delay_model/override_delay_model.h b/vpr/src/place/timing/delay_model/override_delay_model.h
new file mode 100644
index 00000000000..5965261c272
--- /dev/null
+++ b/vpr/src/place/timing/delay_model/override_delay_model.h
@@ -0,0 +1,112 @@
+
+#pragma once
+
+#include "place_delay_model.h"
+#include "delta_delay_model.h"
+
+class OverrideDelayModel : public PlaceDelayModel {
+  public:
+    OverrideDelayModel(float min_cross_layer_delay,
+                       bool is_flat)
+        : cross_layer_delay_(min_cross_layer_delay)
+        , is_flat_(is_flat) {}
+
+    void compute(RouterDelayProfiler& route_profiler,
+                 const t_placer_opts& placer_opts,
+                 const t_router_opts& router_opts,
+                 int longest_length) override;
+
+    /**
+     * @brief returns delay from the specified (x,y) to the specified (x,y) with both endpoints on layer_num and the
+     * specified from and to pins
+     */
+    float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const override;
+
+    void dump_echo(std::string filepath) const override;
+
+    void read(const std::string& file) override;
+    void write(const std::string& file) const override;
+
+  public: //Mutators
+    void set_base_delay_model(std::unique_ptr<DeltaDelayModel> base_delay_model);
+    const DeltaDelayModel* base_delay_model() const;
+    float get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const;
+    void set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay);
+
+  private:
+    std::unique_ptr<DeltaDelayModel> base_delay_model_;
+    /// Minimum delay of cross-layer connections
+    float cross_layer_delay_;
+
+    /// Indicates whether the router is a two-stage or run-flat
+    bool is_flat_;
+
+    void compute_override_delay_model_(RouterDelayProfiler& router,
+                                       const t_router_opts& router_opts);
+
+    /**
+     * @brief Structure that allows delays to be queried from the delay model.
+     *
+     * Delay is calculated given the origin physical tile, the origin
+     * pin, the destination physical tile, and the destination pin.
+     * This structure encapsulates all these information.
+     *
+     *   @param from_type, to_type
+     *              Physical tile index (for easy array access)
+     *   @param from_class, to_class
+     *              The class that the pins belongs to.
+     *   @param to_x, to_y
+     *              The horizontal and vertical displacement
+     *              between two physical tiles.
+     */
+    struct t_override {
+        short from_type;
+        short to_type;
+        short from_class;
+        short to_class;
+        short delta_x;
+        short delta_y;
+
+        /**
+         * @brief Comparison operator designed for performance.
+         *
+         * Operator< is important since t_override serves as the key into the
+         * map structure delay_overrides_. A default comparison operator would
+         * not be inlined by the compiler.
+         *
+         * A combination of ALWAYS_INLINE attribute and std::lexicographical_compare
+         * is required for operator< to be inlined by compiler. Proper inlining of
+         * the function reduces place time by around 5%.
+         *
+         * For more information: https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1225
+         */
+        friend ALWAYS_INLINE bool operator<(const t_override& lhs, const t_override& rhs) {
+            const short* left = reinterpret_cast<const short*>(&lhs);
+            const short* right = reinterpret_cast<const short*>(&rhs);
+            constexpr size_t NUM_T_OVERRIDE_MEMBERS = sizeof(t_override) / sizeof(short);
+            return std::lexicographical_compare(left, left + NUM_T_OVERRIDE_MEMBERS, right, right + NUM_T_OVERRIDE_MEMBERS);
+        }
+    };
+
+    /**
+     * @brief Map data structure that returns delay values according to
+     *        specific delay model queries.
+     *
+     * Delay model queries are provided by the t_override structure, which
+     * encapsulates the information regarding the origin and the destination.
+     */
+    vtr::flat_map2<t_override, float> delay_overrides_;
+
+    /**
+     * operator< treats memory layout of t_override as an array of short.
+     * This requires all members of t_override are shorts and there is no
+     * padding between members of t_override.
+     */
+    static_assert(sizeof(t_override) == sizeof(t_override::from_type) + sizeof(t_override::to_type) + sizeof(t_override::from_class) + sizeof(t_override::to_class) + sizeof(t_override::delta_x) + sizeof(t_override::delta_y), "Expect t_override to have a memory layout equivalent to an array of short (no padding)");
+    static_assert(sizeof(t_override::from_type) == sizeof(short), "Expect all t_override data members to be shorts");
+    static_assert(sizeof(t_override::to_type) == sizeof(short), "Expect all t_override data members to be shorts");
+    static_assert(sizeof(t_override::from_class) == sizeof(short), "Expect all t_override data members to be shorts");
+    static_assert(sizeof(t_override::to_class) == sizeof(short), "Expect all t_override data members to be shorts");
+    static_assert(sizeof(t_override::delta_x) == sizeof(short), "Expect all t_override data members to be shorts");
+    static_assert(sizeof(t_override::delta_y) == sizeof(short), "Expect all t_override data members to be shorts");
+};
\ No newline at end of file
diff --git a/vpr/src/place/timing/delay_model/place_delay_model.cpp b/vpr/src/place/timing/delay_model/place_delay_model.cpp
new file mode 100644
index 00000000000..04267e0e5f1
--- /dev/null
+++ b/vpr/src/place/timing/delay_model/place_delay_model.cpp
@@ -0,0 +1,78 @@
+/**
+ * @file place_delay_model.cpp
+ * @brief This file implements all the class methods and individual
+ *        routines related to the placer delay model.
+ */
+
+#include "place_delay_model.h"
+
+#include "globals.h"
+#include "router_lookahead_map.h"
+#include "placer_state.h"
+#include "vpr_error.h"
+
+/**
+ * @brief Returns the delay of one point to point connection.
+ *
+ * Only estimate delay for signals routed through the inter-block routing network.
+ * TODO: Do how should we compute the delay for globals. "Global signals are assumed to have zero delay."
+ */
+float comp_td_single_connection_delay(const PlaceDelayModel* delay_model,
+                                      const vtr::vector_map<ClusterBlockId, t_block_loc>& block_locs,
+                                      ClusterNetId net_id,
+                                      int ipin) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    float delay_source_to_sink = 0.;
+
+    if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+        ClusterPinId source_pin = cluster_ctx.clb_nlist.net_driver(net_id);
+        ClusterPinId sink_pin = cluster_ctx.clb_nlist.net_pin(net_id, ipin);
+
+        ClusterBlockId source_block = cluster_ctx.clb_nlist.pin_block(source_pin);
+        ClusterBlockId sink_block = cluster_ctx.clb_nlist.pin_block(sink_pin);
+
+        int source_block_ipin = cluster_ctx.clb_nlist.pin_logical_index(source_pin);
+        int sink_block_ipin = cluster_ctx.clb_nlist.pin_logical_index(sink_pin);
+
+        t_pl_loc source_block_loc = block_locs[source_block].loc;
+        t_pl_loc sink_block_loc = block_locs[sink_block].loc;
+
+        /**
+         * This heuristic only considers delta_x and delta_y, a much better
+         * heuristic would be to to create a more comprehensive lookup table.
+         *
+         * In particular this approach does not accurately capture the effect
+         * of fast carry-chain connections.
+         */
+        delay_source_to_sink = delay_model->delay({source_block_loc.x, source_block_loc.y, source_block_loc.layer}, source_block_ipin,
+                                                  {sink_block_loc.x, sink_block_loc.y, sink_block_loc.layer}, sink_block_ipin);
+        if (delay_source_to_sink < 0) {
+            VPR_ERROR(VPR_ERROR_PLACE,
+                      "in comp_td_single_connection_delay: Bad delay_source_to_sink value %g from %s (at %d,%d,%d) to %s (at %d,%d,%d)\n"
+                      "in comp_td_single_connection_delay: Delay is less than 0\n",
+                      block_type_pin_index_to_name(physical_tile_type(source_block_loc), source_block_ipin, false).c_str(),
+                      source_block_loc.x, source_block_loc.y, source_block_loc.layer,
+                      block_type_pin_index_to_name(physical_tile_type(sink_block_loc), sink_block_ipin, false).c_str(),
+                      sink_block_loc.x, sink_block_loc.y, sink_block_loc.layer,
+                      delay_source_to_sink);
+        }
+    }
+
+    return (delay_source_to_sink);
+}
+
+///@brief Recompute all point to point delays, updating `connection_delay` matrix.
+void comp_td_connection_delays(const PlaceDelayModel* delay_model,
+                               PlacerState& placer_state) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& p_timing_ctx = placer_state.mutable_timing();
+    auto& block_locs = placer_state.block_locs();
+    auto& connection_delay = p_timing_ctx.connection_delay;
+
+    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
+        for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ++ipin) {
+            connection_delay[net_id][ipin] = comp_td_single_connection_delay(delay_model, block_locs, net_id, ipin);
+        }
+    }
+}
diff --git a/vpr/src/place/timing/delay_model/place_delay_model.h b/vpr/src/place/timing/delay_model/place_delay_model.h
new file mode 100644
index 00000000000..27c89591071
--- /dev/null
+++ b/vpr/src/place/timing/delay_model/place_delay_model.h
@@ -0,0 +1,80 @@
+/**
+ * @file place_delay_model.h
+ * @brief This file contains all the class and function declarations related to
+ *        the placer delay model. For implementations, see place_delay_model.cpp.
+ */
+
+#pragma once
+
+#include "vtr_ndmatrix.h"
+#include "vtr_flat_map.h"
+#include "vpr_types.h"
+#include "router_delay_profiling.h"
+
+#ifndef __has_attribute
+#    define __has_attribute(x) 0 // Compatibility with non-clang compilers.
+#endif
+
+#if defined(COMPILER_GCC) && defined(NDEBUG)
+#    define ALWAYS_INLINE inline __attribute__((__always_inline__))
+#elif defined(COMPILER_MSVC) && defined(NDEBUG)
+#    define ALWAYS_INLINE __forceinline
+#elif __has_attribute(always_inline)
+#    define ALWAYS_INLINE __attribute__((always_inline)) // clang
+#else
+#    define ALWAYS_INLINE inline
+#endif
+
+///@brief Forward declarations.
+class PlaceDelayModel;
+class PlacerState;
+
+///@brief Returns the delay of one point to point connection.
+float comp_td_single_connection_delay(const PlaceDelayModel* delay_model,
+                                      const vtr::vector_map<ClusterBlockId, t_block_loc>& block_locs,
+                                      ClusterNetId net_id,
+                                      int ipin);
+
+///@brief Recompute all point to point delays, updating `connection_delay` matrix.
+void comp_td_connection_delays(const PlaceDelayModel* delay_model,
+                               PlacerState& placer_state);
+
+///@brief Abstract interface to a placement delay model.
+class PlaceDelayModel {
+  public:
+    virtual ~PlaceDelayModel() = default;
+
+    ///@brief Computes place delay model.
+    virtual void compute(RouterDelayProfiler& route_profiler,
+                         const t_placer_opts& placer_opts,
+                         const t_router_opts& router_opts,
+                         int longest_length)
+        = 0;
+
+    /**
+     * @brief Returns the delay estimate between the specified block pins.
+     *
+     * Either compute or read methods must be invoked before invoking delay.
+     */
+    virtual float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const = 0;
+
+    ///@brief Dumps the delay model to an echo file.
+    virtual void dump_echo(std::string filename) const = 0;
+
+    /**
+     * @brief Write place delay model to specified file.
+     *
+     * May be unimplemented, in which case method should throw an exception.
+     */
+    virtual void write(const std::string& file) const = 0;
+
+    /**
+     * @brief Read place delay model from specified file.
+     *
+     * May be unimplemented, in which case method should throw an exception.
+     */
+    virtual void read(const std::string& file) = 0;
+};
+
+
+
diff --git a/vpr/src/place/timing/delay_model/simple_delay_model.cpp b/vpr/src/place/timing/delay_model/simple_delay_model.cpp
new file mode 100644
index 00000000000..0031d9eb1fe
--- /dev/null
+++ b/vpr/src/place/timing/delay_model/simple_delay_model.cpp
@@ -0,0 +1,45 @@
+
+#include "simple_delay_model.h"
+
+
+void SimpleDelayModel::compute(RouterDelayProfiler& route_profiler,
+                               const t_placer_opts& /*placer_opts*/,
+                               const t_router_opts& /*router_opts*/,
+                               int /*longest_length*/) {
+    const auto& grid = g_vpr_ctx.device().grid;
+    const size_t num_physical_tile_types = g_vpr_ctx.device().physical_tile_types.size();
+    const size_t num_layers = grid.get_num_layers();
+
+    // Initializing the delay matrix to [num_physical_types][num_layers][num_layers][width][height]
+    // The second index related to the layer that the source location is on and the third index is for the sink layer
+    delays_ = vtr::NdMatrix<float, 5>({num_physical_tile_types,
+                                       num_layers,
+                                       num_layers,
+                                       grid.width(),
+                                       grid.height()});
+
+    for (size_t physical_tile_type_idx = 0; physical_tile_type_idx < num_physical_tile_types; ++physical_tile_type_idx) {
+        for (size_t from_layer = 0; from_layer < num_layers; ++from_layer) {
+            for (size_t to_layer = 0; to_layer < num_layers; ++to_layer) {
+                for (size_t dx = 0; dx < grid.width(); ++dx) {
+                    for (size_t dy = 0; dy < grid.height(); ++dy) {
+                        float min_delay = route_profiler.get_min_delay(physical_tile_type_idx,
+                                                                       from_layer,
+                                                                       to_layer,
+                                                                       dx,
+                                                                       dy);
+                        delays_[physical_tile_type_idx][from_layer][to_layer][dx][dy] = min_delay;
+                    }
+                }
+            }
+        }
+    }
+}
+
+float SimpleDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const {
+    int delta_x = std::abs(from_loc.x - to_loc.x);
+    int delta_y = std::abs(from_loc.y - to_loc.y);
+
+    int from_tile_idx = g_vpr_ctx.device().grid.get_physical_type(from_loc)->index;
+    return delays_[from_tile_idx][from_loc.layer_num][to_loc.layer_num][delta_x][delta_y];
+}
\ No newline at end of file
diff --git a/vpr/src/place/timing/delay_model/simple_delay_model.h b/vpr/src/place/timing/delay_model/simple_delay_model.h
new file mode 100644
index 00000000000..f5a856688cd
--- /dev/null
+++ b/vpr/src/place/timing/delay_model/simple_delay_model.h
@@ -0,0 +1,39 @@
+
+#pragma once
+
+#include "place_delay_model.h"
+
+/**
+ * @class SimpleDelayModel
+ * @brief A simple delay model based on the information stored in router lookahead
+ * This is in contrast to other placement delay models that get the cost of getting from one location to another by running the router
+ */
+class SimpleDelayModel : public PlaceDelayModel {
+  public:
+    SimpleDelayModel() {}
+
+    /// @brief Use the information in the router lookahead to fill the delay matrix instead of running the router
+    void compute(RouterDelayProfiler& router,
+                 const t_placer_opts& placer_opts,
+                 const t_router_opts& router_opts,
+                 int longest_length) override;
+
+    float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override;
+
+    void dump_echo(std::string /*filepath*/) const override {}
+
+    void read(const std::string& /*file*/) override {}
+    void write(const std::string& /*file*/) const override {}
+
+  private:
+    /**
+     * @brief The matrix to store the minimum delay between different points on different layers.
+     *
+     *The matrix used to store delay information is a 5D matrix. This data structure stores the minimum delay for each tile type on each layer to other layers
+     *for each dx and dy. We decided to separate the delay for each physical type on each die to accommodate cases where the connectivity of a physical type differs
+     *on each layer. Additionally, instead of using d_layer, we distinguish between the destination layer to handle scenarios where connectivity between layers
+     *is not uniform. For example, if the number of inter-layer connections between layer 1 and 2 differs from the number of connections between layer 0 and 1.
+     *One might argue that this variability could also occur for dx and dy. However, we are operating under the assumption that the FPGA fabric architecture is regular.
+     */
+    vtr::NdMatrix<float, 5> delays_; // [0..num_physical_type-1][0..num_layers-1][0..num_layers-1][0..max_dx][0..max_dy]
+};
\ No newline at end of file
diff --git a/vpr/src/place/place_timing_update.cpp b/vpr/src/place/timing/place_timing_update.cpp
similarity index 98%
rename from vpr/src/place/place_timing_update.cpp
rename to vpr/src/place/timing/place_timing_update.cpp
index c9c53b88f90..246db01f97d 100644
--- a/vpr/src/place/place_timing_update.cpp
+++ b/vpr/src/place/timing/place_timing_update.cpp
@@ -3,10 +3,15 @@
  * @brief Defines the routines declared in place_timing_update.h.
  */
 
-#include "vtr_time.h"
-
 #include "place_timing_update.h"
+
+#include "NetPinTimingInvalidator.h"
+#include "PlacerCriticalities.h"
+#include "PlacerSetupSlacks.h"
 #include "placer_state.h"
+#include "place_util.h"
+#include "vtr_time.h"
+
 
 /* Routines local to place_timing_update.cpp */
 static double comp_td_connection_cost(const PlaceDelayModel* delay_model,
@@ -94,8 +99,7 @@ void perform_full_timing_update(const PlaceCritParams& crit_params,
                           timing_info,
                           criticalities,
                           setup_slacks,
-                          pin_timing_invalidator,
-                          placer_state);
+                          pin_timing_invalidator);
 
     /* Update the timing cost with new connection criticalities. */
     update_timing_cost(delay_model,
@@ -136,13 +140,12 @@ void update_timing_classes(const PlaceCritParams& crit_params,
                            SetupTimingInfo* timing_info,
                            PlacerCriticalities* criticalities,
                            PlacerSetupSlacks* setup_slacks,
-                           NetPinTimingInvalidator* pin_timing_invalidator,
-                           PlacerState& placer_state) {
+                           NetPinTimingInvalidator* pin_timing_invalidator) {
     /* Run STA to update slacks and adjusted/relaxed criticalities. */
     timing_info->update();
 
     /* Update the placer's criticalities (e.g. sharpen with crit_exponent). */
-    criticalities->update_criticalities(crit_params, placer_state);
+    criticalities->update_criticalities(crit_params);
 
     /* Update the placer's raw setup slacks. */
     setup_slacks->update_setup_slacks();
diff --git a/vpr/src/place/place_timing_update.h b/vpr/src/place/timing/place_timing_update.h
similarity index 93%
rename from vpr/src/place/place_timing_update.h
rename to vpr/src/place/timing/place_timing_update.h
index 7944c4a7552..8e7a0dc1f46 100644
--- a/vpr/src/place/place_timing_update.h
+++ b/vpr/src/place/timing/place_timing_update.h
@@ -4,10 +4,15 @@
  */
 
 #pragma once
-#include "timing_place.h"
-#include "place_util.h"
 
-#include "NetPinTimingInvalidator.h"
+class PlacerState;
+class PlaceCritParams;
+class PlacerCriticalities;
+class PlacerSetupSlacks;
+class NetPinTimingInvalidator;
+class PlaceDelayModel;
+class SetupTimingInfo;
+struct t_placer_costs;
 
 ///@brief Initialize the timing information and structures in the placer.
 void initialize_timing_info(const PlaceCritParams& crit_params,
@@ -34,8 +39,7 @@ void update_timing_classes(const PlaceCritParams& crit_params,
                            SetupTimingInfo* timing_info,
                            PlacerCriticalities* criticalities,
                            PlacerSetupSlacks* setup_slacks,
-                           NetPinTimingInvalidator* pin_timing_invalidator,
-                           PlacerState& placer_state);
+                           NetPinTimingInvalidator* pin_timing_invalidator);
 
 ///@brief Updates the timing driven (td) costs.
 void update_timing_cost(const PlaceDelayModel* delay_model,
diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp
deleted file mode 100644
index badd9d1fb61..00000000000
--- a/vpr/src/place/timing_place.cpp
+++ /dev/null
@@ -1,270 +0,0 @@
-/**
- * @file timing_place.cpp
- * @brief Stores the method definitions of classes defined in timing_place.h.
- */
-
-#include <cmath>
-
-#include "vtr_util.h"
-
-#include "vpr_types.h"
-#include "vpr_utils.h"
-#include "net_delay.h"
-#include "timing_place.h"
-#include "placer_state.h"
-
-#include "timing_info.h"
-
-///@brief Allocates space for the timing_place_crit_ data structure.
-PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist,
-                                         const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                         std::shared_ptr<const SetupTimingInfo> timing_info)
-    : clb_nlist_(clb_nlist)
-    , pin_lookup_(netlist_pin_lookup)
-    , timing_info_(std::move(timing_info))
-    , timing_place_crit_(make_net_pins_matrix(clb_nlist_, std::numeric_limits<float>::quiet_NaN())) {
-}
-
-/**
- * @brief Updated the criticalities in the timing_place_crit_ data structure.
- *
- * If the criticalities are not updated immediately after each time we call
- * timing_info->update(), then timing_info->pins_with_modified_setup_criticality()
- * cannot accurately account for all the pins that need to be updated. In this case,
- * `recompute_required` would be true, and we update all criticalities from scratch.
- *
- * If the criticality exponent has changed, we also need to update from scratch.
- */
-void PlacerCriticalities::update_criticalities(const PlaceCritParams& crit_params,
-                                               PlacerState& placer_state) {
-    /* If update is not enabled, exit the routine. */
-    if (!update_enabled) {
-        /* re-computation is required on the next iteration */
-        recompute_required = true;
-        return;
-    }
-
-    /* Determine what pins need updating */
-    if (!recompute_required && crit_params.crit_exponent == last_crit_exponent_) {
-        incr_update_criticalities();
-    } else {
-        recompute_criticalities();
-
-        /* Record new criticality exponent */
-        last_crit_exponent_ = crit_params.crit_exponent;
-    }
-
-    auto& place_move_ctx = placer_state.mutable_move();
-
-    /* Performs a 1-to-1 mapping from criticality to timing_place_crit_.
-     * For every pin on every net (or, equivalently, for every tedge ending
-     * in that pin), timing_place_crit_ = criticality^(criticality exponent) */
-
-    /* Update the affected pins */
-    for (ClusterPinId clb_pin : cluster_pins_with_modified_criticality_) {
-        ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin);
-        int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin);
-        // Routing for placement is not flat (at least for the time being)
-        float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info_, pin_lookup_, ParentPinId(size_t(clb_pin)), /*is_flat=*/false);
-
-        float new_crit = pow(clb_pin_crit, crit_params.crit_exponent);
-        /*
-         * Update the highly critical pins container
-         *
-         * If the old criticality < limit and the new criticality > limit --> add this pin to the highly critical pins
-         * If the old criticality > limit and the new criticality < limit --> remove this pin from the highly critical pins
-         */
-        if (!first_time_update_criticality) {
-            if (new_crit > crit_params.crit_limit && timing_place_crit_[clb_net][pin_index_in_net] < crit_params.crit_limit) {
-                place_move_ctx.highly_crit_pins.emplace_back(clb_net, pin_index_in_net);
-            } else if (new_crit < crit_params.crit_limit && timing_place_crit_[clb_net][pin_index_in_net] > crit_params.crit_limit) {
-                place_move_ctx.highly_crit_pins.erase(std::remove(place_move_ctx.highly_crit_pins.begin(), place_move_ctx.highly_crit_pins.end(), std::make_pair(clb_net, pin_index_in_net)),
-                                                      place_move_ctx.highly_crit_pins.end());
-            }
-        } else {
-            if (new_crit > crit_params.crit_limit) {
-                place_move_ctx.highly_crit_pins.emplace_back(clb_net, pin_index_in_net);
-            }
-        }
-
-        /* The placer likes a great deal of contrast between criticalities.
-         * Since path criticality varies much more than timing, we "sharpen" timing
-         * criticality by taking it to some power, crit_exponent (between 1 and 8 by default). */
-        timing_place_crit_[clb_net][pin_index_in_net] = new_crit;
-    }
-
-    /* Criticalities updated. In sync with timing info.   */
-    /* Can be incrementally updated on the next iteration */
-    recompute_required = false;
-
-    first_time_update_criticality = false;
-}
-
-void PlacerCriticalities::set_recompute_required() {
-    recompute_required = true;
-}
-
-/**
- * @brief Collect the cluster pins which need to be updated based on the latest timing
- *        analysis so that incremental updates to criticalities can be performed.
- *
- * Note we use the set of pins reported by the *timing_info* as having modified
- * criticality, rather than those marked as modified by the timing analyzer.
- *
- * Since timing_info uses shifted/relaxed criticality (which depends on max required
- * time and worst case slacks), additional nodes may be modified when updating the
- * atom pin criticalities.
- */
-
-void PlacerCriticalities::incr_update_criticalities() {
-    cluster_pins_with_modified_criticality_.clear();
-
-    for (AtomPinId atom_pin : timing_info_->pins_with_modified_setup_criticality()) {
-        ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin);
-
-        //Some atom pins correspond to connections which are completely
-        //contained within a cluster, and hence have no corresponding
-        //clustered pin.
-        if (!clb_pin) continue;
-
-        cluster_pins_with_modified_criticality_.insert(clb_pin);
-    }
-}
-
-/**
- * @brief Collect all the sink pins in the netlist and prepare them update.
- *
- * For the incremental version, see PlacerCriticalities::incr_update_criticalities().
- */
-void PlacerCriticalities::recompute_criticalities() {
-    cluster_pins_with_modified_criticality_.clear();
-
-    /* Non-incremental: all sink pins need updating */
-    for (ClusterNetId net_id : clb_nlist_.nets()) {
-        for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) {
-            cluster_pins_with_modified_criticality_.insert(pin_id);
-        }
-    }
-}
-
-///@brief Override the criticality of a particular connection.
-void PlacerCriticalities::set_criticality(ClusterNetId net_id, int ipin, float crit_val) {
-    VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin != 0)");
-    VTR_ASSERT_SAFE_MSG(ipin < int(clb_nlist_.net_pins(net_id).size()), "The pin index in net should be smaller than fanout");
-
-    timing_place_crit_[net_id][ipin] = crit_val;
-}
-
-/**
- * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) which
- *        were modified by the last call to PlacerCriticalities::update_criticalities().
- */
-PlacerCriticalities::pin_range PlacerCriticalities::pins_with_modified_criticality() const {
-    return vtr::make_range(cluster_pins_with_modified_criticality_);
-}
-
-/**************************************/
-
-///@brief Allocates space for the timing_place_setup_slacks_ data structure.
-PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist,
-                                     const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                     std::shared_ptr<const SetupTimingInfo> timing_info)
-    : clb_nlist_(clb_nlist)
-    , pin_lookup_(netlist_pin_lookup)
-    , timing_info_(std::move(timing_info))
-    , timing_place_setup_slacks_(make_net_pins_matrix(clb_nlist_, std::numeric_limits<float>::quiet_NaN())) {
-}
-
-/**
- * @brief Updated the setup slacks in the timing_place_setup_slacks_ data structure.
- *
- * If the setup slacks are not updated immediately after each time we call
- * timing_info->update(), then timing_info->pins_with_modified_setup_slack()
- * cannot accurately account for all the pins that need to be updated.
- *
- * In this case, `recompute_required` would be true, and we update all setup slacks
- * from scratch.
- */
-void PlacerSetupSlacks::update_setup_slacks() {
-    /* If update is not enabled, exit the routine. */
-    if (!update_enabled) {
-        /* re-computation is required on the next iteration */
-        recompute_required = true;
-        return;
-    }
-
-    /* Determine what pins need updating */
-    if (!recompute_required) {
-        incr_update_setup_slacks();
-    } else {
-        recompute_setup_slacks();
-    }
-
-    /* Update the affected pins */
-    for (ClusterPinId clb_pin : cluster_pins_with_modified_setup_slack_) {
-        ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin);
-        int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin);
-
-        float clb_pin_setup_slack = calculate_clb_net_pin_setup_slack(*timing_info_, pin_lookup_, clb_pin);
-
-        timing_place_setup_slacks_[clb_net][pin_index_in_net] = clb_pin_setup_slack;
-    }
-
-    /* Setup slacks updated. In sync with timing info.     */
-    /* Can be incrementally updated on the next iteration. */
-    recompute_required = false;
-}
-
-/**
- * @brief Collect the cluster pins which need to be updated based on the latest timing
- *        analysis so that incremental updates to setup slacks can be performed.
- *
- * Note we use the set of pins reported by the *timing_info* as having modified
- * setup slacks, rather than those marked as modified by the timing analyzer.
- */
-void PlacerSetupSlacks::incr_update_setup_slacks() {
-    cluster_pins_with_modified_setup_slack_.clear();
-
-    for (AtomPinId atom_pin : timing_info_->pins_with_modified_setup_slack()) {
-        ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin);
-
-        //Some atom pins correspond to connections which are completely
-        //contained within a cluster, and hence have no corresponding
-        //clustered pin.
-        if (!clb_pin) continue;
-
-        cluster_pins_with_modified_setup_slack_.insert(clb_pin);
-    }
-}
-
-/**
- * @brief Collect all the sink pins in the netlist and prepare them update.
- *
- * For the incremental version, see PlacerSetupSlacks::incr_update_setup_slacks().
- */
-void PlacerSetupSlacks::recompute_setup_slacks() {
-    cluster_pins_with_modified_setup_slack_.clear();
-
-    /* Non-incremental: all sink pins need updating */
-    for (ClusterNetId net_id : clb_nlist_.nets()) {
-        for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) {
-            cluster_pins_with_modified_setup_slack_.insert(pin_id);
-        }
-    }
-}
-
-///@brief Override the setup slack of a particular connection.
-void PlacerSetupSlacks::set_setup_slack(ClusterNetId net_id, int ipin, float slack_val) {
-    VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin != 0)");
-    VTR_ASSERT_SAFE_MSG(ipin < int(clb_nlist_.net_pins(net_id).size()), "The pin index in net should be smaller than fanout");
-
-    timing_place_setup_slacks_[net_id][ipin] = slack_val;
-}
-
-/**
- * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds)
- *        which were modified by the last call to PlacerSetupSlacks::update_setup_slacks().
- */
-PlacerSetupSlacks::pin_range PlacerSetupSlacks::pins_with_modified_setup_slack() const {
-    return vtr::make_range(cluster_pins_with_modified_setup_slack_);
-}
diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h
deleted file mode 100644
index 71e144334ad..00000000000
--- a/vpr/src/place/timing_place.h
+++ /dev/null
@@ -1,647 +0,0 @@
-/**
- * @file timing_place.h
- * @brief Interface used by the VPR placer to query information
- *        from the Tatum timing analyzer.
- *
- *   @class PlacerSetupSlacks
- *              Queries connection **RAW** setup slacks, which can
- *              range from negative to positive values. Also maps
- *              atom pin setup slacks to clb pin setup slacks.
- *   @class PlacerCriticalities
- *              Query connection criticalities, which are calculuated
- *              based on the raw setup slacks and ranges from 0 to 1.
- *              Also maps atom pin crit. to clb pin crit.
- *   @class PlacerTimingCosts
- *              Hierarchical structure used by update_td_costs() to
- *              maintain the order of addition operation of float values
- *              (to avoid round-offs) while doing incremental updates.
- *
- * Calculating criticalities:
- *      All the raw setup slack values across a single clock domain are gathered
- *      and rated from the best to the worst in terms of criticalities. In order
- *      to calculate criticalities, all the slack values need to be non-negative.
- *      Hence, if the worst slack is negative, all the slack values are shifted
- *      by the value of the worst slack so that the value is at least 0. If the
- *      worst slack is positive, then no shift happens.
- *
- *      The best (shifted) slack (the most positive one) will have a criticality of 0.
- *      The worst (shifted) slack value will have a criticality of 1.
- *
- *      Criticalities are used to calculated timing costs for each connection.
- *      The formula is cost = delay * criticality.
- *
- *      For a more detailed description on how criticalities are calculated, see
- *      calc_relaxed_criticality() in `timing_util.cpp`.
- */
-
-#pragma once
-#include "vtr_vec_id_set.h"
-#include "timing_info_fwd.h"
-#include "clustered_netlist_utils.h"
-#include "place_delay_model.h"
-#include "vpr_net_pins_matrix.h"
-
-/**
- * @brief Saves the placement criticality parameters
- *
- * crit_exponent: The criticality exponent used to sharpen the criticalities
- * crit_limit:    The limit to consider a pin as timing critical
- */
-struct PlaceCritParams {
-    float crit_exponent;
-    float crit_limit;
-};
-
-/**
- * @brief PlacerCriticalities returns the clustered netlist connection criticalities
- *        used by the placer ('sharpened' by a criticality exponent).
- *
- * Usage
- * =====
- * This class also serves to map atom netlist level criticalites (i.e. on AtomPinIds)
- * to the clustered netlist (i.e. ClusterPinIds) used during placement.
- *
- * Criticalities are updated by update_criticalities(), given that `update_enabled` is
- * set to true. It will update criticalities based on the atom netlist connection
- * criticalities provided by the passed in SetupTimingInfo.
- *
- * This process can be done incrementally, based on the modified connections/AtomPinIds
- * returned by SetupTimingInfo. However, the set returned only reflects the connections
- * changed by the last call to the timing info update.
- *
- * Therefore, if SetupTimingInfo is updated twice in succession without criticalities
- * getting updated (update_enabled = false), the returned set cannot account for all
- * the connections that have been modified. In this case, we flag `recompute_required`
- * as false, and we recompute the criticalities for every connection to ensure that
- * they are all up to date. Hence, each time update_setup_slacks_and_criticalities()
- * is called, we assign `recompute_required` the opposite value of `update_enabled`.
- *
- * This class also maps/transforms the modified atom connections/pins returned by the
- * timing info into modified clustered netlist connections/pins after calling
- * update_criticalities(). The interface then enables users to iterate over this range
- * via pins_with_modified_criticalities(). This is useful for incrementally re-calculating
- * the timing costs.
- *
- * The criticalities of individual connections can then be queried by calling the
- * criticality() member function.
- *
- * Implementation
- * ==============
- * To support incremental re-calculation, the class saves the last criticality exponent
- * passed to PlacerCriticalities::update_criticalites(). If the next update uses the same
- * exponent, criticalities can be incrementally updated. Otherwise, they must be re-calculated
- * from scratch, since a change in exponent changes *all* criticalities.
- */
-class PlacerCriticalities {
-  public: //Types
-    typedef vtr::vec_id_set<ClusterPinId>::iterator pin_iterator;
-    typedef vtr::vec_id_set<ClusterNetId>::iterator net_iterator;
-
-    typedef vtr::Range<pin_iterator> pin_range;
-    typedef vtr::Range<net_iterator> net_range;
-
-  public: //Lifetime
-    PlacerCriticalities(const ClusteredNetlist& clb_nlist,
-                        const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                        std::shared_ptr<const SetupTimingInfo> timing_info);
-    PlacerCriticalities(const PlacerCriticalities&) = delete;
-    PlacerCriticalities& operator=(const PlacerCriticalities&) = delete;
-
-  public: //Accessors
-    ///@brief Returns the criticality of the specified connection.
-    float criticality(ClusterNetId net, int ipin) const { return timing_place_crit_[net][ipin]; }
-
-    /**
-     * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) which
-     *        were modified by the last call to PlacerCriticalities::update_criticalities().
-     */
-    pin_range pins_with_modified_criticality() const;
-
-  public: //Modifiers
-    /**
-     * @brief Updates criticalities based on the atom netlist criticalitites
-     *        provided by timing_info and the provided criticality_exponent.
-     *
-     * Should consistently call this method after the most recent timing analysis to
-     * keep the criticalities stored in this class in sync with the timing analyzer.
-     * If out of sync, then the criticalities cannot be incrementally updated on
-     * during the next timing analysis iteration.
-     */
-    void update_criticalities(const PlaceCritParams& crit_params,
-                              PlacerState& placer_state);
-
-    ///@bried Enable the recompute_required flag to enforce from scratch update.
-    void set_recompute_required();
-
-    ///@brief From scratch update. See timing_place.cpp for more.
-    void recompute_criticalities();
-
-    ///@brief Override the criticality of a particular connection.
-    void set_criticality(ClusterNetId net, int ipin, float crit_val);
-
-    ///@brief Set `update_enabled` to true.
-    void enable_update() { update_enabled = true; }
-
-    ///@brief Set `update_enabled` to true.
-    void disable_update() { update_enabled = false; }
-
-  private: //Data
-    ///@brief The clb netlist in the placement context.
-    const ClusteredNetlist& clb_nlist_;
-
-    ///@brief The lookup table that maps atom pins to clb pins.
-    const ClusteredPinAtomPinsLookup& pin_lookup_;
-
-    ///@brief A pointer to the setup timing analyzer
-    std::shared_ptr<const SetupTimingInfo> timing_info_;
-
-    /**
-     * @brief The matrix that stores criticality value for each connection.
-     *
-     * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]
-     */
-    ClbNetPinsMatrix<float> timing_place_crit_;
-
-    /**
-     * The criticality exponent when update_criticalites() was last called
-     * (used to detect if incremental update can be used).
-     */
-    float last_crit_exponent_ = std::numeric_limits<float>::quiet_NaN();
-
-    ///@brief Set of pins with criticaltites modified by last call to update_criticalities().
-    vtr::vec_id_set<ClusterPinId> cluster_pins_with_modified_criticality_;
-
-    ///@brief Incremental update. See timing_place.cpp for more.
-    void incr_update_criticalities();
-
-    ///@brief Flag that turns on/off the update_criticalities() routine.
-    bool update_enabled = true;
-
-    /**
-     * @brief Flag that checks if criticalities need to be recomputed for all connections.
-     *
-     * Used by the method update_criticalities(). They incremental update is not possible
-     * if this method wasn't called updated after the previous timing info update.
-     */
-    bool recompute_required = true;
-
-    /**
-     * @brief if this is first time to call update_criticality
-     * 
-     * This can be used for incremental criticality update and also incrementally update the highly critical pins
-     */
-    bool first_time_update_criticality = true;
-};
-
-/**
- * @brief PlacerSetupSlacks returns the RAW setup slacks of clustered netlist connection.
- *
- * Usage
- * =====
- * This class mirrors PlacerCriticalities by both its methods and its members. The only
- * difference is that this class deals with RAW setup slacks returned by SetupTimingInfo
- * rather than criticalities. See the documentation on PlacerCriticalities for more.
- *
- * RAW setup slacks are unlike criticalities. Their values are not confined between
- * 0 and 1. Their values can be either positive or negative.
- *
- * This class also provides iterating over the clustered netlist connections/pins that
- * have modified setup slacks by the last call to update_setup_slacks(). However, this
- * utility is mainly used for incrementally committing the setup slack values into the
- * structure `connection_setup_slack` used by many placer routines.
- */
-class PlacerSetupSlacks {
-  public: //Types
-    typedef vtr::vec_id_set<ClusterPinId>::iterator pin_iterator;
-    typedef vtr::vec_id_set<ClusterNetId>::iterator net_iterator;
-
-    typedef vtr::Range<pin_iterator> pin_range;
-    typedef vtr::Range<net_iterator> net_range;
-
-  public: //Lifetime
-    PlacerSetupSlacks(const ClusteredNetlist& clb_nlist,
-                      const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                      std::shared_ptr<const SetupTimingInfo> timing_info);
-    PlacerSetupSlacks(const PlacerSetupSlacks& clb_nlist) = delete;
-    PlacerSetupSlacks& operator=(const PlacerSetupSlacks& clb_nlist) = delete;
-
-  public: //Accessors
-    ///@brief Returns the setup slack of the specified connection.
-    float setup_slack(ClusterNetId net, int ipin) const { return timing_place_setup_slacks_[net][ipin]; }
-
-    /**
-     * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds)
-     *        which were modified by the last call to PlacerSetupSlacks::update_setup_slacks().
-     */
-    pin_range pins_with_modified_setup_slack() const;
-
-  public: //Modifiers
-    /**
-     * @brief Updates setup slacks based on the atom netlist setup slacks provided
-     *        by timing_info_.
-     *
-     * Should consistently call this method after the most recent timing analysis to
-     * keep the setup slacks stored in this class in sync with the timing analyzer.
-     * If out of sync, then the setup slacks cannot be incrementally updated on
-     * during the next timing analysis iteration.
-     */
-    void update_setup_slacks();
-
-    ///@bried Enable the recompute_required flag to enforce from scratch update.
-    void set_recompute_required() { recompute_required = true; }
-
-    ///@brief Override the setup slack of a particular connection.
-    void set_setup_slack(ClusterNetId net, int ipin, float slack_val);
-
-    ///@brief Set `update_enabled` to true.
-    void enable_update() { update_enabled = true; }
-
-    ///@brief Set `update_enabled` to true.
-    void disable_update() { update_enabled = false; }
-
-  private: //Data
-    const ClusteredNetlist& clb_nlist_;
-    const ClusteredPinAtomPinsLookup& pin_lookup_;
-    std::shared_ptr<const SetupTimingInfo> timing_info_;
-
-    /**
-     * @brief The matrix that stores raw setup slack values for each connection.
-     *
-     * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]
-     */
-    ClbNetPinsMatrix<float> timing_place_setup_slacks_;
-
-    ///@brief Set of pins with raw setup slacks modified by last call to update_setup_slacks()
-    vtr::vec_id_set<ClusterPinId> cluster_pins_with_modified_setup_slack_;
-
-    ///@brief Incremental update. See timing_place.cpp for more.
-    void incr_update_setup_slacks();
-
-    ///@brief Incremental update. See timing_place.cpp for more.
-    void recompute_setup_slacks();
-
-    ///@brief Flag that turns on/off the update_setup_slacks() routine.
-    bool update_enabled = true;
-
-    /**
-     * @brief Flag that checks if setup slacks need to be recomputed for all connections.
-     *
-     * Used by the method update_setup_slacks(). They incremental update is not possible
-     * if this method wasn't called updated after the previous timing info update.
-     */
-    bool recompute_required = true;
-};
-
-/**
- * @brief PlacerTimingCosts mimics a 2D array of connection timing costs running from:
- *        [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1].
- *
- * It can be used similar to:
- *
- *      PlacerTimingCosts connection_timing_costs(cluster_ctx.clb_nlist); //Construct
- *
- *      //...
- *
- *      //Modify a connection cost
- *      connection_timing_costs[net_id][ipin] = new_cost;
- *
- *      //Potentially other modifications...
- *
- *      //Calculate the updated timing cost, of all connections,
- *      //incrementally based on modifications
- *      float total_timing_cost = connection_timing_costs.total_cost();
- *
- * However behind the scenes PlacerTimingCosts tracks when connection costs are modified,
- * and efficiently re-calculates the total timing cost incrementally based on the connections
- * which have had their cost modified.
- *
- * Implementation
- * ==============
- * Internally, PlacerTimingCosts stores all connection costs in a flat array in the last part
- * of connection_costs_.  To mimic 2d-array like access PlacerTimingCosts also uses two proxy
- * classes which allow indexing in the net and pin dimensions (NetProxy and ConnectionProxy
- * respectively).
- *
- * The first part of connection_costs_ stores intermediate sums of the connection costs for
- * efficient incremental re-calculation. More concretely, connection_costs_ stores a binary
- * tree, where leaves correspond to individual connection costs and intermediate nodes the
- * partial sums of the connection costs. (The binary tree is stored implicitly in the
- * connection_costs_  vector, using Eytzinger's/BFS layout.) By summing the entire binary
- * tree we calculate the total timing cost over all connections.
- *
- * Using a binary tree allows us to efficiently re-calculate the timing costs when only a subset
- * of connections are changed. This is done by 'invalidating' intermediate nodes (from leaves up
- * to the root) which have ancestors (leaves) with modified connection costs. When the
- * total_cost() method is called, it recursively walks the binary tree to re-calculate the cost.
- * Only invalidated nodes are traversed, with valid nodes just returning their previously
- * calculated (and unchanged) value.
- *
- * For a circuit with 'K' connections, of which 'k' have changed (typically k << K), this can
- * be done in O(k log K) time.
- *
- * It is important to note that due to limited floating point precision, floating point
- * arithmetic has an order dependence (due to round-off). Using a binary tree to total
- * the timing connection costs allows us to incrementally update the total timing cost while
- * maintianing the *same order of operations* as if it was re-computed from scratch. This
- * ensures we *always* get consistent results regardless of what/when connections are changed.
- *
- * Proxy Classes
- * =============
- * NetProxy is returned by PlacerTimingCost's operator[], and stores a pointer to the start of
- * internal storage of that net's connection costs.
- *
- * ConnectionProxy is returned by NetProxy's operator[], and holds a reference to a particular
- * element of the internal storage pertaining to a specific connection's cost. ConnectionProxy
- * supports assignment, allowing clients to modify the connection cost. It also detects if the
- * assigned value differs from the previous value and if so, calls PlacerTimingCosts's
- * invalidate() method on that connection cost.
- *
- * PlacerTimingCosts's invalidate() method marks the cost element's ancestors as invalid (NaN)
- * so they will be re-calculated by PlacerTimingCosts' total_cost() method.
- */
-class PlacerTimingCosts {
-  public:
-    PlacerTimingCosts() = default;
-
-    PlacerTimingCosts(const ClusteredNetlist& nlist) {
-        auto nets = nlist.nets();
-
-        net_start_indicies_.resize(nets.size());
-
-        //Walk through the netlist to determine how many connections there are.
-        size_t iconn = 0;
-        for (ClusterNetId net : nets) {
-            //The placer always skips 'ignored' nets, so they don't affect timing
-            //costs, so we also skip them here
-            if (nlist.net_is_ignored(net)) {
-                net_start_indicies_[net] = OPEN;
-                continue;
-            }
-
-            //Save the startind index of the current net's connections.
-            // We use a -1 offset, since sinks indexed from [1..num_net_pins-1]
-            // (there is no timing cost associated with net drivers)
-            net_start_indicies_[net] = iconn - 1;
-
-            //Reserve space for all this net's connections
-            iconn += nlist.net_sinks(net).size();
-        }
-
-        size_t num_connections = iconn;
-
-        //Determine how many binary tree levels we need to have a leaf
-        //for each connection cost
-        size_t ilevel = 0;
-        while (num_nodes_in_level(ilevel) < num_connections) {
-            ++ilevel;
-        }
-        num_levels_ = ilevel + 1;
-
-        size_t num_leaves = num_nodes_in_level(ilevel);
-        size_t num_level_before_leaves = num_nodes_in_level(ilevel - 1);
-
-        VTR_ASSERT_MSG(num_leaves >= num_connections, "Need at least as many leaves as connections");
-        VTR_ASSERT_MSG(
-            num_connections == 0 || num_level_before_leaves < num_connections,
-            "Level before should have fewer nodes than connections (to ensure using the smallest binary tree)");
-
-        //We don't need to store all possible leaves if we have fewer connections
-        //(i.e. bottom-right of tree is empty)
-        size_t last_level_unused_nodes = num_nodes_in_level(ilevel) - num_connections;
-        size_t num_nodes = num_nodes_up_to_level(ilevel) - last_level_unused_nodes;
-
-        //Reserve space for connection costs and intermediate node values
-        connection_costs_ = std::vector<double>(num_nodes, std::numeric_limits<double>::quiet_NaN());
-
-        //The net start indicies we calculated earlier didn't account for intermediate binary tree nodes
-        //Shift the start indicies after the intermediate nodes
-        size_t num_intermediate_nodes = num_nodes_up_to_level(ilevel - 1);
-        for (ClusterNetId net : nets) {
-            if (nlist.net_is_ignored(net)) continue;
-
-            net_start_indicies_[net] = net_start_indicies_[net] + num_intermediate_nodes;
-        }
-    }
-
-    /**
-     * @brief Proxy class representing a connection cost.
-     *
-     * Supports modification of connection cost while detecting
-     * changes and reporting them up to PlacerTimingCosts.
-     */
-    class ConnectionProxy {
-      public:
-        ConnectionProxy(PlacerTimingCosts* timing_costs, double& connection_cost)
-            : timing_costs_(timing_costs)
-            , connection_cost_(connection_cost) {}
-
-        ///@brief Allow clients to modify the connection cost via assignment.
-        ConnectionProxy& operator=(double new_cost) {
-            if (new_cost != connection_cost_) {
-                //If connection cost changed, update it, and mark it
-                //as invalidated
-                connection_cost_ = new_cost;
-                timing_costs_->invalidate(&connection_cost_);
-            }
-            return *this;
-        }
-
-        /**
-         * @brief Support getting the current connection cost as a double.
-         *
-         * Useful for client code operating on the cost values (e.g. difference between costs).
-         */
-        operator double() const {
-            return connection_cost_;
-        }
-
-      private:
-        PlacerTimingCosts* timing_costs_;
-        double& connection_cost_;
-    };
-
-    /**
-     * @brief Proxy class representing the connection costs of a net.
-     *
-     * Supports indexing by pin index to retrieve the ConnectionProxy for that pin/connection.
-     */
-    class NetProxy {
-      public:
-        NetProxy(PlacerTimingCosts* timing_costs, double* net_sink_costs)
-            : timing_costs_(timing_costs)
-            , net_sink_costs_(net_sink_costs) {}
-
-        ///@brief Indexes into the specific net pin/connection.
-        ConnectionProxy operator[](size_t ipin) {
-            return ConnectionProxy(timing_costs_, net_sink_costs_[ipin]);
-        }
-
-        const ConnectionProxy operator[](size_t ipin) const {
-            return ConnectionProxy(timing_costs_, net_sink_costs_[ipin]);
-        }
-
-      private:
-        PlacerTimingCosts* timing_costs_;
-        double* net_sink_costs_;
-    };
-
-    ///@brief Indexes into the specific net.
-    NetProxy operator[](ClusterNetId net_id) {
-        VTR_ASSERT_SAFE(net_start_indicies_[net_id] >= 0);
-
-        double* net_connection_costs = &connection_costs_[net_start_indicies_[net_id]];
-        return NetProxy(this, net_connection_costs);
-    }
-
-    NetProxy operator[](ClusterNetId net_id) const {
-        VTR_ASSERT_SAFE(net_start_indicies_[net_id] >= 0);
-
-        const double* net_connection_costs = &connection_costs_[net_start_indicies_[net_id]];
-        return NetProxy(const_cast<PlacerTimingCosts*>(this), const_cast<double*>(net_connection_costs));
-    }
-
-    void clear() {
-        connection_costs_.clear();
-        net_start_indicies_.clear();
-    }
-
-    void swap(PlacerTimingCosts& other) {
-        std::swap(connection_costs_, other.connection_costs_);
-        std::swap(net_start_indicies_, other.net_start_indicies_);
-        std::swap(num_levels_, other.num_levels_);
-    }
-
-    /**
-     * @brief Calculates the total cost of all connections efficiently
-     *        in the face of modified connection costs.
-     */
-    double total_cost() {
-        float cost = total_cost_recurr(0); //Root
-
-        VTR_ASSERT_DEBUG_MSG(cost == total_cost_from_scratch(0),
-                             "Expected incremental and from-scratch costs to be consistent");
-
-        return cost;
-    }
-
-  private:
-    ///@brief Recursively calculate and update the timing cost rooted at inode.
-    double total_cost_recurr(size_t inode) {
-        //Prune out-of-tree
-        if (inode > connection_costs_.size() - 1) {
-            return 0.;
-        }
-
-        //Valid pre-calculated intermediate result or valid leaf
-        if (!std::isnan(connection_costs_[inode])) {
-            return connection_costs_[inode];
-        }
-
-        //Recompute recursively
-        double node_cost = total_cost_recurr(left_child(inode))
-                           + total_cost_recurr(right_child(inode));
-
-        //Save intermedate cost at this node
-        connection_costs_[inode] = node_cost;
-
-        return node_cost;
-    }
-
-    double total_cost_from_scratch(size_t inode) const {
-        //Prune out-of-tree
-        if (inode > connection_costs_.size() - 1) {
-            return 0.;
-        }
-
-        //Recompute recursively
-        double node_cost = total_cost_from_scratch(left_child(inode))
-                           + total_cost_from_scratch(right_child(inode));
-
-        return node_cost;
-    }
-
-    ///@brief Friend-ed so it can call invalidate().
-    friend ConnectionProxy;
-
-    void invalidate(double* invalidated_cost) {
-        //Check pointer within range of internal storage
-        VTR_ASSERT_SAFE_MSG(
-            invalidated_cost >= &connection_costs_[0],
-            "Connection cost pointer should be after start of internal storage");
-
-        VTR_ASSERT_SAFE_MSG(
-            invalidated_cost <= &connection_costs_[connection_costs_.size() - 1],
-            "Connection cost pointer should be before end of internal storage");
-
-        size_t icost = invalidated_cost - &connection_costs_[0];
-
-        VTR_ASSERT_SAFE(icost >= num_nodes_up_to_level(num_levels_ - 2));
-
-        //Invalidate parent intermediate costs up to root or first
-        //already-invalidated parent
-        size_t iparent = parent(icost);
-
-        while (!std::isnan(connection_costs_[iparent])) {
-            //Invalidate
-            connection_costs_[iparent] = std::numeric_limits<double>::quiet_NaN();
-
-            if (iparent == 0) {
-                break; //At root
-            } else {
-                //Next parent
-                iparent = parent(iparent);
-            }
-        }
-
-        VTR_ASSERT_SAFE_MSG(std::isnan(connection_costs_[0]), "Invalidating any connection should have invalidated the root");
-    }
-
-    size_t left_child(size_t i) const {
-        return 2 * i + 1;
-    }
-
-    size_t right_child(size_t i) const {
-        return 2 * i + 2;
-    }
-
-    size_t parent(size_t i) const {
-        return (i - 1) / 2;
-    }
-
-    /**
-     * @brief Returns the number of nodes in ilevel'th level.
-     *
-     * If ilevel is negative, return 0, since the root shouldn't
-     * be counted as a leaf node candidate.
-     */
-    size_t num_nodes_in_level(int ilevel) const {
-        return ilevel < 0 ? 0 : (2 << (ilevel));
-    }
-
-    ///@brief Returns the total number of nodes in levels [0..ilevel] (inclusive).
-    size_t num_nodes_up_to_level(int ilevel) const {
-        return (2 << (ilevel + 1)) - 1;
-    }
-
-  private:
-    /**
-     * @brief Vector storing the implicit binary tree of connection costs.
-     *
-     * The actual connections are stored at the end of the vector
-     * (last level of the binary tree). The earlier portions of
-     * the tree are the intermediate nodes.
-     *
-     * The methods left_child()/right_child()/parent() can be used
-     * to traverse the tree by indicies into this vector.
-     */
-    std::vector<double> connection_costs_;
-
-    /**
-     * @brief Vector storing the indicies of the first connection
-     *        for each net in the netlist, used for indexing by net.
-     */
-    vtr::vector<ClusterNetId, int> net_start_indicies_;
-
-    ///@brief Number of levels in the binary tree.
-    size_t num_levels_ = 0;
-};
diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp
deleted file mode 100644
index 86dc396e2b8..00000000000
--- a/vpr/src/place/timing_place_lookup.cpp
+++ /dev/null
@@ -1,1319 +0,0 @@
-
-#include <cmath>
-#include <limits>
-
-#include "rr_graph_fwd.h"
-#include "vtr_assert.h"
-#include "vtr_ndmatrix.h"
-#include "vtr_log.h"
-#include "vtr_util.h"
-#include "vtr_math.h"
-#include "vtr_memory.h"
-#include "vtr_time.h"
-#include "vtr_geometry.h"
-
-#include "arch_util.h"
-#include "vpr_types.h"
-#include "globals.h"
-#include "place_and_route.h"
-#include "route_net.h"
-#include "timing_place_lookup.h"
-#include "read_xml_arch_file.h"
-#include "atom_netlist.h"
-
-// all functions in profiling:: namespace, which are only activated if PROFILE is defined
-#include "route_profiling.h"
-#include "router_delay_profiling.h"
-#include "place_delay_model.h"
-
-/*To compute delay between blocks we calculate the delay between */
-/*different nodes in the FPGA.  From this procedure we generate
- * a lookup table which tells us the delay between different locations in*/
-/*the FPGA */
-
-/*the delta arrays are used to contain the best case routing delay */
-/*between different locations on the FPGA. */
-
-//#define VERBOSE
-
-constexpr float UNINITIALIZED_DELTA = -1;                                  //Indicates the delta delay value has not been calculated
-constexpr float EMPTY_DELTA = -2;                                          //Indicates delta delay from/to an EMPTY block
-constexpr float IMPOSSIBLE_DELTA = std::numeric_limits<float>::infinity(); //Indicates there is no valid delta delay
-
-struct t_profile_loc {
-    t_profile_loc(int x, int y, std::vector<vtr::Point<int>> delta_values)
-        : root(x, y)
-        , deltas(delta_values) {}
-
-    vtr::Point<int> root;
-    std::vector<vtr::Point<int>> deltas;
-};
-
-struct t_profile_info {
-    std::vector<t_profile_loc> locations;
-
-    int max_delta_x;
-    int max_delta_y;
-};
-
-/*** Function Prototypes *****/
-static t_chan_width setup_chan_width(const t_router_opts& router_opts,
-                                     t_chan_width_dist chan_width_dist);
-
-static float route_connection_delay(
-    RouterDelayProfiler& route_profiler,
-    int from_layer_num,
-    int to_layer_num,
-    int source_x_loc,
-    int source_y_loc,
-    int sink_x_loc,
-    int sink_y_loc,
-    const t_router_opts& router_opts,
-    bool measure_directconnect);
-
-// Prototype for computing delta delay matrix.
-typedef std::function<void(
-    RouterDelayProfiler&,
-    vtr::Matrix<std::vector<float>>&,
-    int,
-    int,
-    int,
-    int,
-    int,
-    int,
-    int,
-    int,
-    const t_router_opts&,
-    bool,
-    const std::set<std::string>&,
-    bool)>
-    t_compute_delta_delay_matrix;
-
-static void generic_compute_matrix_iterative_astar(
-    RouterDelayProfiler& route_profiler,
-    vtr::Matrix<std::vector<float>>& matrix,
-    int from_layer_num,
-    int to_layer_num,
-    int source_x,
-    int source_y,
-    int start_x,
-    int start_y,
-    int end_x,
-    int end_y,
-    const t_router_opts& router_opts,
-    bool measure_directconnect,
-    const std::set<std::string>& allowed_types,
-    bool /***/);
-
-static void generic_compute_matrix_dijkstra_expansion(
-    RouterDelayProfiler& route_profiler,
-    vtr::Matrix<std::vector<float>>& matrix,
-    int from_layer_num,
-    int to_layer_num,
-    int source_x,
-    int source_y,
-    int start_x,
-    int start_y,
-    int end_x,
-    int end_y,
-    const t_router_opts& router_opts,
-    bool measure_directconnect,
-    const std::set<std::string>& allowed_types,
-    bool is_flat);
-
-static vtr::NdMatrix<float, 4> compute_delta_delays(
-    RouterDelayProfiler& route_profiler,
-    const t_placer_opts& palcer_opts,
-    const t_router_opts& router_opts,
-    bool measure_directconnect,
-    size_t longest_length,
-    bool is_flat);
-
-float delay_reduce(std::vector<float>& delays, e_reducer reducer);
-
-static vtr::NdMatrix<float, 4> compute_delta_delay_model(
-    RouterDelayProfiler& route_profiler,
-    const t_placer_opts& placer_opts,
-    const t_router_opts& router_opts,
-    bool measure_directconnect,
-    int longest_length,
-    bool is_flat);
-
-/**
- * @brief Use the information in the router lookahead to fill the delay matrix instead of running the router
- * @param route_profiler
- * @return The delay matrix that contain the minimum cost between two locations
- */
-static vtr::NdMatrix<float, 5> compute_simple_delay_model(RouterDelayProfiler& route_profiler);
-
-static bool find_direct_connect_sample_locations(const t_direct_inf* direct,
-                                                 t_physical_tile_type_ptr from_type,
-                                                 int from_pin,
-                                                 int from_pin_class,
-                                                 t_physical_tile_type_ptr to_type,
-                                                 int to_pin,
-                                                 int to_pin_class,
-                                                 RRNodeId& out_src_node,
-                                                 RRNodeId& out_sink_node);
-
-static bool verify_delta_delays(const vtr::NdMatrix<float, 4>& delta_delays);
-
-static int get_longest_segment_length(std::vector<t_segment_inf>& segment_inf);
-
-static void fix_empty_coordinates(vtr::NdMatrix<float, 4>& delta_delays);
-static void fix_uninitialized_coordinates(vtr::NdMatrix<float, 4>& delta_delays);
-
-static float find_neighboring_average(vtr::NdMatrix<float, 4>& matrix,
-                                      int from_layer,
-                                      t_physical_tile_loc to_tile_loc,
-                                      int max_distance);
-
-/******* Globally Accessible Functions **********/
-
-std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
-                                                           const t_router_opts& router_opts,
-                                                           const Netlist<>& net_list,
-                                                           t_det_routing_arch* det_routing_arch,
-                                                           std::vector<t_segment_inf>& segment_inf,
-                                                           t_chan_width_dist chan_width_dist,
-                                                           const std::vector<t_direct_inf>& directs,
-                                                           bool is_flat) {
-    vtr::ScopedStartFinishTimer timer("Computing placement delta delay look-up");
-
-    t_chan_width chan_width = setup_chan_width(router_opts, chan_width_dist);
-
-    alloc_routing_structs(chan_width, router_opts, det_routing_arch, segment_inf, directs, is_flat);
-
-    const RouterLookahead* router_lookahead = get_cached_router_lookahead(*det_routing_arch,
-                                                                          router_opts.lookahead_type,
-                                                                          router_opts.write_router_lookahead,
-                                                                          router_opts.read_router_lookahead,
-                                                                          segment_inf,
-                                                                          is_flat);
-
-    RouterDelayProfiler route_profiler(net_list, router_lookahead, is_flat);
-
-    int longest_length = get_longest_segment_length(segment_inf);
-
-    /*now setup and compute the actual arrays */
-    std::unique_ptr<PlaceDelayModel> place_delay_model;
-    float min_cross_layer_delay = get_min_cross_layer_delay();
-
-    if (placer_opts.delay_model_type == PlaceDelayModelType::SIMPLE) {
-        place_delay_model = std::make_unique<SimpleDelayModel>();
-    } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) {
-        place_delay_model = std::make_unique<DeltaDelayModel>(min_cross_layer_delay, is_flat);
-    } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) {
-        place_delay_model = std::make_unique<OverrideDelayModel>(min_cross_layer_delay, is_flat);
-    } else {
-        VTR_ASSERT_MSG(false, "Invalid placer delay model");
-    }
-
-    if (placer_opts.read_placement_delay_lookup.empty()) {
-        place_delay_model->compute(route_profiler, placer_opts, router_opts, longest_length);
-    } else {
-        place_delay_model->read(placer_opts.read_placement_delay_lookup);
-    }
-
-    if (!placer_opts.write_placement_delay_lookup.empty()) {
-        place_delay_model->write(placer_opts.write_placement_delay_lookup);
-    }
-
-    /*free all data structures that are no longer needed */
-    free_routing_structs();
-
-    return place_delay_model;
-}
-
-void DeltaDelayModel::compute(
-    RouterDelayProfiler& route_profiler,
-    const t_placer_opts& placer_opts,
-    const t_router_opts& router_opts,
-    int longest_length) {
-    delays_ = compute_delta_delay_model(
-        route_profiler,
-        placer_opts, router_opts, /*measure_directconnect=*/true,
-        longest_length,
-        is_flat_);
-}
-
-void OverrideDelayModel::compute(
-    RouterDelayProfiler& route_profiler,
-    const t_placer_opts& placer_opts,
-    const t_router_opts& router_opts,
-    int longest_length) {
-    auto delays = compute_delta_delay_model(
-        route_profiler,
-        placer_opts, router_opts, /*measure_directconnect=*/false,
-        longest_length,
-        is_flat_);
-
-    base_delay_model_ = std::make_unique<DeltaDelayModel>(cross_layer_delay_, delays, false);
-
-    compute_override_delay_model(route_profiler, router_opts);
-}
-
-void SimpleDelayModel::compute(
-    RouterDelayProfiler& router,
-    const t_placer_opts& /*placer_opts*/,
-    const t_router_opts& /*router_opts*/,
-    int /*longest_length*/) {
-    delays_ = compute_simple_delay_model(router);
-}
-
-/******* File Accessible Functions **********/
-
-std::vector<int> get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type) {
-    /*
-     * This function tries to identify the best pin classes to hook up
-     * for delay calculation.  The assumption is that we should pick
-     * the pin class with the largest number of pins. This makes
-     * sense, since it ensures we pick commonly used pins, and
-     * removes order dependence on how the pins are specified
-     * in the architecture (except in the case were the two largest pin classes
-     * of a particular pintype have the same number of pins, in which case the
-     * first pin class is used).
-     */
-
-    std::vector<int> best_classes;
-
-    //Record any non-zero Fc pins
-    //
-    //Note that we track non-zero Fc pins, since certain Fc overides
-    //may apply to only a subset of wire types. This ensures we record
-    //which pins can potentially connect to global routing.
-    std::unordered_set<int> non_zero_fc_pins;
-    for (const t_fc_specification& fc_spec : type->fc_specs) {
-        if (fc_spec.fc_value == 0) continue;
-
-        non_zero_fc_pins.insert(fc_spec.pins.begin(), fc_spec.pins.end());
-    }
-
-    //Collect all classes of matching type which connect to general routing
-    for (int i = 0; i < (int)type->class_inf.size(); i++) {
-        if (type->class_inf[i].type == pintype) {
-            //Check whether all pins in this class are ignored or have zero fc
-            bool any_pins_connect_to_general_routing = false;
-            for (int ipin = 0; ipin < type->class_inf[i].num_pins; ++ipin) {
-                int pin = type->class_inf[i].pinlist[ipin];
-                //If the pin isn't ignored, and has a non-zero Fc to some general
-                //routing the class is suitable for delay profiling
-                if (!type->is_ignored_pin[pin] && non_zero_fc_pins.count(pin)) {
-                    any_pins_connect_to_general_routing = true;
-                    break;
-                }
-            }
-
-            if (!any_pins_connect_to_general_routing) continue; //Skip if doesn't connect to general routing
-
-            //Record candidate class
-            best_classes.push_back(i);
-        }
-    }
-
-    //Sort classe so largest pin class is first
-    auto cmp_class = [&](int lhs, int rhs) {
-        return type->class_inf[lhs].num_pins > type->class_inf[rhs].num_pins;
-    };
-
-    std::stable_sort(best_classes.begin(), best_classes.end(), cmp_class);
-
-    return best_classes;
-}
-
-static int get_longest_segment_length(std::vector<t_segment_inf>& segment_inf) {
-    int length = 0;
-
-    for (const t_segment_inf &seg_info : segment_inf) {
-        if (seg_info.length > length) {
-            length = seg_info.length;
-        }
-    }
-
-    return length;
-}
-
-static t_chan_width setup_chan_width(const t_router_opts& router_opts,
-                                     t_chan_width_dist chan_width_dist) {
-    /*we give plenty of tracks, this increases routability for the */
-    /*lookup table generation */
-
-    t_graph_type graph_directionality;
-    int width_fac;
-
-    if (router_opts.fixed_channel_width == NO_FIXED_CHANNEL_WIDTH) {
-        auto& device_ctx = g_vpr_ctx.device();
-
-        auto type = find_most_common_tile_type(device_ctx.grid);
-
-        width_fac = 4 * type->num_pins;
-        /*this is 2x the value that binary search starts */
-        /*this should be enough to allow most pins to   */
-        /*connect to tracks in the architecture */
-    } else {
-        width_fac = router_opts.fixed_channel_width;
-    }
-
-    if (router_opts.route_type == GLOBAL) {
-        graph_directionality = GRAPH_BIDIR;
-    } else {
-        graph_directionality = GRAPH_UNIDIR;
-    }
-
-    return init_chan(width_fac, chan_width_dist, graph_directionality);
-}
-
-static float route_connection_delay(
-    RouterDelayProfiler& route_profiler,
-    int from_layer_num,
-    int to_layer_num,
-    int source_x,
-    int source_y,
-    int sink_x,
-    int sink_y,
-    const t_router_opts& router_opts,
-    bool measure_directconnect) {
-    //Routes between the source and sink locations and calculates the delay
-
-    float net_delay_value = IMPOSSIBLE_DELTA; /*set to known value for debug purposes */
-
-    auto& device_ctx = g_vpr_ctx.device();
-
-    bool successfully_routed = false;
-
-    //Get the rr nodes to route between
-    auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}));
-    auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}));
-
-    for (int driver_ptc : best_driver_ptcs) {
-        VTR_ASSERT(driver_ptc != OPEN);
-        RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(from_layer_num, source_x, source_y, SOURCE, driver_ptc);
-
-        VTR_ASSERT(source_rr_node != RRNodeId::INVALID());
-
-        for (int sink_ptc : best_sink_ptcs) {
-            VTR_ASSERT(sink_ptc != OPEN);
-            RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(to_layer_num, sink_x, sink_y, SINK, sink_ptc);
-
-            if (sink_rr_node == RRNodeId::INVALID())
-                continue;
-
-            if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) {
-                //Skip if we shouldn't measure direct connects and a direct connect exists
-                continue;
-            }
-
-            {
-                successfully_routed = route_profiler.calculate_delay(
-                    source_rr_node, sink_rr_node,
-                    router_opts,
-                    &net_delay_value);
-            }
-
-            if (successfully_routed) break;
-        }
-        if (successfully_routed) break;
-    }
-
-    if (!successfully_routed) {
-        VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n",
-                     source_x, source_y, from_layer_num, sink_x, sink_y, to_layer_num, net_delay_value);
-    }
-
-    return (net_delay_value);
-}
-
-static void add_delay_to_matrix(
-    vtr::Matrix<std::vector<float>>* matrix,
-    int delta_x,
-    int delta_y,
-    float delay) {
-    if ((*matrix)[delta_x][delta_y].size() == 1 && (*matrix)[delta_x][delta_y][0] == EMPTY_DELTA) {
-        //Overwrite empty delta
-        (*matrix)[delta_x][delta_y][0] = delay;
-    } else {
-        //Collect delta
-        (*matrix)[delta_x][delta_y].push_back(delay);
-    }
-}
-
-static void generic_compute_matrix_dijkstra_expansion(
-    RouterDelayProfiler& /*route_profiler*/,
-    vtr::Matrix<std::vector<float>>& matrix,
-    int from_layer_num,
-    int to_layer_num,
-    int source_x,
-    int source_y,
-    int start_x,
-    int start_y,
-    int end_x,
-    int end_y,
-    const t_router_opts& router_opts,
-    bool measure_directconnect,
-    const std::set<std::string>& allowed_types,
-    bool is_flat) {
-    auto& device_ctx = g_vpr_ctx.device();
-
-    t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num});
-    bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end();
-    if (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE || !is_allowed_type) {
-        for (int sink_x = start_x; sink_x <= end_x; sink_x++) {
-            for (int sink_y = start_y; sink_y <= end_y; sink_y++) {
-                int delta_x = abs(sink_x - source_x);
-                int delta_y = abs(sink_y - source_y);
-
-                if (matrix[delta_x][delta_y].empty()) {
-                    //Only set empty target if we don't already have a valid delta delay
-                    matrix[delta_x][delta_y].push_back(EMPTY_DELTA);
-#ifdef VERBOSE
-                    VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n",
-                            "EMPTY",
-                            delta_x, delta_y,
-                            source_x, source_y,
-                            sink_x, sink_y);
-#endif
-                }
-            }
-        }
-
-        return;
-    }
-
-    vtr::Matrix<bool> found_matrix({matrix.dim_size(0), matrix.dim_size(1)}, false);
-
-    auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}));
-    for (int driver_ptc : best_driver_ptcs) {
-        VTR_ASSERT(driver_ptc != OPEN);
-        RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(from_layer_num, source_x, source_y, SOURCE, driver_ptc);
-
-        VTR_ASSERT(source_rr_node != RRNodeId::INVALID());
-        auto delays = calculate_all_path_delays_from_rr_node(source_rr_node, router_opts, is_flat);
-
-        bool path_to_all_sinks = true;
-        for (int sink_x = start_x; sink_x <= end_x; sink_x++) {
-            for (int sink_y = start_y; sink_y <= end_y; sink_y++) {
-                int delta_x = abs(sink_x - source_x);
-                int delta_y = abs(sink_y - source_y);
-
-                if (found_matrix[delta_x][delta_y]) {
-                    continue;
-                }
-
-                t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num});
-                if (sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
-                    if (matrix[delta_x][delta_y].empty()) {
-                        //Only set empty target if we don't already have a valid delta delay
-                        matrix[delta_x][delta_y].push_back(EMPTY_DELTA);
-#ifdef VERBOSE
-                        VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n",
-                                "EMPTY",
-                                delta_x, delta_y,
-                                source_x, source_y,
-                                sink_x, sink_y);
-#endif
-                        found_matrix[delta_x][delta_y] = true;
-                    }
-                } else {
-                    bool found_a_sink = false;
-                    auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}));
-                    for (int sink_ptc : best_sink_ptcs) {
-                        VTR_ASSERT(sink_ptc != OPEN);
-                        RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(to_layer_num, sink_x, sink_y, SINK, sink_ptc);
-
-                        if (sink_rr_node == RRNodeId::INVALID())
-                            continue;
-
-                        if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) {
-                            //Skip if we shouldn't measure direct connects and a direct connect exists
-                            continue;
-                        }
-
-                        if (std::isnan(delays[sink_rr_node])) {
-                            // This sink was not found
-                            continue;
-                        }
-
-#ifdef VERBOSE
-                        VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n",
-                                delays[size_t(sink_rr_node)],
-                                delta_x, delta_y,
-                                source_x, source_y,
-                                sink_x, sink_y);
-#endif
-                        found_matrix[delta_x][delta_y] = true;
-
-                        add_delay_to_matrix(&matrix, delta_x, delta_y, delays[sink_rr_node]);
-
-                        found_a_sink = true;
-                        break;
-                    }
-
-                    if (!found_a_sink) {
-                        path_to_all_sinks = false;
-                    }
-                }
-            }
-        }
-
-        if (path_to_all_sinks) {
-            break;
-        }
-    }
-
-    for (int sink_x = start_x; sink_x <= end_x; sink_x++) {
-        for (int sink_y = start_y; sink_y <= end_y; sink_y++) {
-            int delta_x = abs(sink_x - source_x);
-            int delta_y = abs(sink_y - source_y);
-            if (!found_matrix[delta_x][delta_y]) {
-                add_delay_to_matrix(&matrix, delta_x, delta_y, IMPOSSIBLE_DELTA);
-                VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n",
-                             source_x,
-                             source_y,
-                             from_layer_num,
-                             sink_x, 
-                             sink_y,
-                             to_layer_num,
-                             IMPOSSIBLE_DELTA);
-            }
-        }
-    }
-}
-
-static void generic_compute_matrix_iterative_astar(
-    RouterDelayProfiler& route_profiler,
-    vtr::Matrix<std::vector<float>>& matrix,
-    int from_layer_num,
-    int to_layer_num,
-    int source_x,
-    int source_y,
-    int start_x,
-    int start_y,
-    int end_x,
-    int end_y,
-    const t_router_opts& router_opts,
-    bool measure_directconnect,
-    const std::set<std::string>& allowed_types,
-    bool /***/) {
-    //vtr::ScopedStartFinishTimer t(vtr::string_fmt("Profiling from (%d,%d)", source_x, source_y));
-
-    int delta_x, delta_y;
-    int sink_x, sink_y;
-
-    auto& device_ctx = g_vpr_ctx.device();
-
-    for (sink_x = start_x; sink_x <= end_x; sink_x++) {
-        for (sink_y = start_y; sink_y <= end_y; sink_y++) {
-            delta_x = abs(sink_x - source_x);
-            delta_y = abs(sink_y - source_y);
-
-            t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num});
-            t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num});
-
-            bool src_or_target_empty = (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE
-                                        || sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE);
-
-            bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end();
-
-            if (src_or_target_empty || !is_allowed_type) {
-                if (matrix[delta_x][delta_y].empty()) {
-                    //Only set empty target if we don't already have a valid delta delay
-                    matrix[delta_x][delta_y].push_back(EMPTY_DELTA);
-#ifdef VERBOSE
-                    VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n",
-                            "EMPTY",
-                            delta_x, delta_y,
-                            source_x, source_y,
-                            sink_x, sink_y);
-#endif
-                }
-            } else {
-                //Valid start/end
-
-                float delay = route_connection_delay(route_profiler,
-                                                     from_layer_num,
-                                                     to_layer_num,
-                                                     source_x,
-                                                     source_y,
-                                                     sink_x,
-                                                     sink_y,
-                                                     router_opts,
-                                                     measure_directconnect);
-
-#ifdef VERBOSE
-                VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n",
-                        delay,
-                        delta_x, delta_y,
-                        source_x, source_y,
-                        sink_x, sink_y);
-#endif
-                if (matrix[delta_x][delta_y].size() == 1 && matrix[delta_x][delta_y][0] == EMPTY_DELTA) {
-                    //Overwrite empty delta
-                    matrix[delta_x][delta_y][0] = delay;
-                } else {
-                    //Collect delta
-                    matrix[delta_x][delta_y].push_back(delay);
-                }
-            }
-        }
-    }
-}
-
-static vtr::NdMatrix<float, 4> compute_delta_delays(
-    RouterDelayProfiler& route_profiler,
-    const t_placer_opts& placer_opts,
-    const t_router_opts& router_opts,
-    bool measure_directconnect,
-    size_t longest_length,
-    bool is_flat) {
-    //To avoid edge effects we place the source at least 'longest_length' away
-    //from the device edge
-    //and route from there for all possible delta values < dimension
-
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& grid = device_ctx.grid;
-
-    vtr::NdMatrix<float, 4> delta_delays({static_cast<unsigned long>(grid.get_num_layers()), static_cast<unsigned long>(grid.get_num_layers()), grid.width(), grid.height()});
-
-    for (int from_layer_num = 0; from_layer_num < grid.get_num_layers(); from_layer_num++) {
-        for (int to_layer_num = 0; to_layer_num < grid.get_num_layers(); to_layer_num++) {
-            vtr::NdMatrix<std::vector<float>, 2> sampled_delta_delays({grid.width(), grid.height()});
-
-            size_t mid_x = vtr::nint(grid.width() / 2);
-            size_t mid_y = vtr::nint(grid.height() / 2);
-
-            size_t low_x = std::min(longest_length, mid_x);
-            size_t low_y = std::min(longest_length, mid_y);
-            size_t high_x = mid_x;
-            size_t high_y = mid_y;
-            if (longest_length <= grid.width()) {
-                high_x = std::max(grid.width() - longest_length, mid_x);
-            }
-            if (longest_length <= grid.height()) {
-                high_y = std::max(grid.height() - longest_length, mid_y);
-            }
-
-            std::set<std::string> allowed_types;
-            if (!placer_opts.allowed_tiles_for_delay_model.empty()) {
-                auto allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ",");
-                for (const auto& type : allowed_types_vector) {
-                    allowed_types.insert(type);
-                }
-            }
-
-            //   +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-            //   +                 |                       |               +
-            //   +        A        |           B           |       C       +
-            //   +                 |                       |               +
-            //   +-----------------\-----------------------.---------------+
-            //   +                 |                       |               +
-            //   +                 |                       |               +
-            //   +                 |                       |               +
-            //   +                 |                       |               +
-            //   +        D        |           E           |       F       +
-            //   +                 |                       |               +
-            //   +                 |                       |               +
-            //   +                 |                       |               +
-            //   +                 |                       |               +
-            //   +-----------------*-----------------------/---------------+
-            //   +                 |                       |               +
-            //   +        G        |           H           |       I       +
-            //   +                 |                       |               +
-            //   +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-            //
-            //   * = (low_x, low_y)
-            //   . = (high_x, high_y)
-            //   / = (high_x, low_y)
-            //   \ = (low_x, high_y)
-            //   + = device edge
-
-            //Find the lowest y location on the left edge with a non-empty block
-            int y = 0;
-            int x = 0;
-            t_physical_tile_type_ptr src_type = nullptr;
-            for (x = 0; x < (int)grid.width(); ++x) {
-                for (y = 0; y < (int)grid.height(); ++y) {
-                    auto type = grid.get_physical_type({x, y, from_layer_num});
-
-                    if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
-                        if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) {
-                            continue;
-                        }
-                        src_type = type;
-                        break;
-                    }
-                }
-                if (src_type) {
-                    break;
-                }
-            }
-            VTR_ASSERT(src_type != nullptr);
-
-            t_compute_delta_delay_matrix generic_compute_matrix;
-            switch (placer_opts.place_delta_delay_matrix_calculation_method) {
-                case e_place_delta_delay_algorithm::ASTAR_ROUTE:
-                    generic_compute_matrix = generic_compute_matrix_iterative_astar;
-                    break;
-                case e_place_delta_delay_algorithm::DIJKSTRA_EXPANSION:
-                    generic_compute_matrix = generic_compute_matrix_dijkstra_expansion;
-                    break;
-                default:
-                    VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unknown place_delta_delay_matrix_calculation_method %d", placer_opts.place_delta_delay_matrix_calculation_method);
-            }
-
-#ifdef VERBOSE
-            VTR_LOG("Computing from lower left edge (%d,%d):\n", x, y);
-#endif
-            generic_compute_matrix(route_profiler, sampled_delta_delays,
-                                   from_layer_num, to_layer_num,
-                                   x, y,
-                                   x, y,
-                                   grid.width() - 1, grid.height() - 1,
-                                   router_opts,
-                                   measure_directconnect, allowed_types,
-                                   is_flat);
-
-            //Find the lowest x location on the bottom edge with a non-empty block
-            src_type = nullptr;
-            for (y = 0; y < (int)grid.height(); ++y) {
-                for (x = 0; x < (int)grid.width(); ++x) {
-                    auto type = grid.get_physical_type({x, y, from_layer_num});
-
-                    if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
-                        if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) {
-                            continue;
-                        }
-                        src_type = type;
-                        break;
-                    }
-                }
-                if (src_type) {
-                    break;
-                }
-            }
-            VTR_ASSERT(src_type != nullptr);
-#ifdef VERBOSE
-            VTR_LOG("Computing from left bottom edge (%d,%d):\n", x, y);
-#endif
-            generic_compute_matrix(route_profiler, sampled_delta_delays,
-                                   from_layer_num, to_layer_num,
-                                   x, y,
-                                   x, y,
-                                   grid.width() - 1, grid.height() - 1,
-                                   router_opts,
-                                   measure_directconnect, allowed_types,
-                                   is_flat);
-
-            //Since the other delta delay values may have suffered from edge effects,
-            //we recalculate deltas within regions B, C, E, F
-#ifdef VERBOSE
-            VTR_LOG("Computing from low/low:\n");
-#endif
-            generic_compute_matrix(route_profiler, sampled_delta_delays,
-                                   from_layer_num, to_layer_num,
-                                   low_x, low_y,
-                                   low_x, low_y,
-                                   grid.width() - 1, grid.height() - 1,
-                                   router_opts,
-                                   measure_directconnect, allowed_types,
-                                   is_flat);
-
-            //Since the other delta delay values may have suffered from edge effects,
-            //we recalculate deltas within regions D, E, G, H
-#ifdef VERBOSE
-            VTR_LOG("Computing from high/high:\n");
-#endif
-            generic_compute_matrix(route_profiler, sampled_delta_delays,
-                                   from_layer_num, to_layer_num,
-                                   high_x, high_y,
-                                   0, 0,
-                                   high_x, high_y,
-                                   router_opts,
-                                   measure_directconnect, allowed_types,
-                                   is_flat);
-
-            //Since the other delta delay values may have suffered from edge effects,
-            //we recalculate deltas within regions A, B, D, E
-#ifdef VERBOSE
-            VTR_LOG("Computing from high/low:\n");
-#endif
-            generic_compute_matrix(route_profiler, sampled_delta_delays,
-                                   from_layer_num, to_layer_num,
-                                   high_x, low_y,
-                                   0, low_y,
-                                   high_x, grid.height() - 1,
-                                   router_opts,
-                                   measure_directconnect, allowed_types,
-                                   is_flat);
-
-            //Since the other delta delay values may have suffered from edge effects,
-            //we recalculate deltas within regions E, F, H, I
-#ifdef VERBOSE
-            VTR_LOG("Computing from low/high:\n");
-#endif
-            generic_compute_matrix(route_profiler, sampled_delta_delays,
-                                   from_layer_num, to_layer_num,
-                                   low_x, high_y,
-                                   low_x, 0,
-                                   grid.width() - 1, high_y,
-                                   router_opts,
-                                   measure_directconnect, allowed_types,
-                                   is_flat);
-            for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) {
-                for (size_t dy = 0; dy < sampled_delta_delays.dim_size(1); ++dy) {
-                    delta_delays[from_layer_num][to_layer_num][dx][dy] = delay_reduce(sampled_delta_delays[dx][dy], placer_opts.delay_model_reducer);
-                }
-            }
-        }
-    }
-
-    return delta_delays;
-}
-
-float delay_reduce(std::vector<float>& delays, e_reducer reducer) {
-    if (delays.empty()) {
-        return IMPOSSIBLE_DELTA;
-    } else if (delays.size() == 1) {
-        return delays[0];
-    }
-
-    VTR_ASSERT(delays.size() > 1);
-
-    float delay;
-
-    if (reducer == e_reducer::MIN) {
-        auto itr = std::min_element(delays.begin(), delays.end());
-        delay = *itr;
-    } else if (reducer == e_reducer::MAX) {
-        auto itr = std::max_element(delays.begin(), delays.end());
-        delay = *itr;
-    } else if (reducer == e_reducer::MEDIAN) {
-        std::stable_sort(delays.begin(), delays.end());
-        delay = vtr::median(delays.begin(), delays.end());
-    } else if (reducer == e_reducer::ARITHMEAN) {
-        delay = vtr::arithmean(delays.begin(), delays.end());
-    } else if (reducer == e_reducer::GEOMEAN) {
-        delay = vtr::geomean(delays.begin(), delays.end());
-    } else {
-        VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unrecognized delta delay reducer");
-    }
-
-    return delay;
-}
-
-/* We return the average placement estimated delay for a routing spanning (x,y).
- * We start with an averaging distance of 1 (i.e. from (x-1,y-1) to (x+1,y+1))
- * and look for legal delay values to average; if some are found we return the
- * average and if none are found we increase the distance to average over.
- *
- * If no legal values are found to average over with a range of max_distance,
- * we return IMPOSSIBLE_DELTA.
- */
-static float find_neighboring_average(
-    vtr::NdMatrix<float, 4>& matrix,
-    int from_layer,
-    t_physical_tile_loc to_tile_loc,
-    int max_distance) {
-    float sum = 0;
-    int counter = 0;
-    int endx = matrix.end_index(2);
-    int endy = matrix.end_index(3);
-
-    int x = to_tile_loc.x;
-    int y = to_tile_loc.y;
-    int to_layer = to_tile_loc.layer_num;
-
-    for (int distance = 1; distance <= max_distance; ++distance) {
-        for (int delx = x - distance; delx <= x + distance; delx++) {
-            for (int dely = y - distance; dely <= y + distance; dely++) {
-                // Check distance constraint
-                if (abs(delx - x) + abs(dely - y) > distance) {
-                    continue;
-                }
-
-                //check out of bounds
-                if (delx < 0 || dely < 0 || delx >= endx || dely >= endy || (delx == x && dely == y)) {
-                    continue;
-                }
-
-                if (matrix[from_layer][to_layer][delx][dely] == EMPTY_DELTA || matrix[from_layer][to_layer][delx][dely] == IMPOSSIBLE_DELTA) {
-                    continue;
-                }
-                counter++;
-                sum += matrix[from_layer][to_layer][delx][dely];
-            }
-        }
-        if (counter != 0) {
-            return sum / (float)counter;
-        }
-    }
-
-    return IMPOSSIBLE_DELTA;
-}
-
-static void fix_empty_coordinates(vtr::NdMatrix<float, 4>& delta_delays) {
-    // Set any empty delta's to the average of it's neighbours
-    //
-    // Empty coordinates may occur if the sampling location happens to not have
-    // a connection at that location.  However a more through sampling likely
-    // would return a result, so we fill in the empty holes with a small
-    // neighbour average.
-    constexpr int kMaxAverageDistance = 2;
-    for (int from_layer = 0; from_layer < (int)delta_delays.dim_size(0); ++from_layer) {
-        for (int to_layer = 0; to_layer < (int)delta_delays.dim_size(1); ++to_layer) {
-            for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) {
-                for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) {
-                    if (delta_delays[from_layer][to_layer][delta_x][delta_y] == EMPTY_DELTA) {
-                        delta_delays[from_layer][to_layer][delta_x][delta_y] =
-                            find_neighboring_average(delta_delays,
-                                                     from_layer,
-                                                     {delta_x, delta_y, to_layer},
-                                                     kMaxAverageDistance);
-                    }
-                }
-            }
-        }
-    }
-}
-
-static void fix_uninitialized_coordinates(vtr::NdMatrix<float, 4>& delta_delays) {
-    // Set any empty delta's to the average of it's neighbours
-
-    for (size_t from_layer_num = 0; from_layer_num < delta_delays.dim_size(0); ++from_layer_num) {
-        for (size_t to_layer_num = 0; to_layer_num < delta_delays.dim_size(1); ++to_layer_num) {
-            for (size_t delta_x = 0; delta_x < delta_delays.dim_size(2); ++delta_x) {
-                for (size_t delta_y = 0; delta_y < delta_delays.dim_size(3); ++delta_y) {
-                    if (delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] == UNINITIALIZED_DELTA) {
-                        delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] = IMPOSSIBLE_DELTA;
-                    }
-                }
-            }
-        }
-    }
-}
-
-static void fill_impossible_coordinates(vtr::NdMatrix<float, 4>& delta_delays) {
-    // Set any impossible delta's to the average of its neighbours
-    //
-    // Impossible coordinates may occur if an IPIN cannot be reached from the
-    // sampling OPIN.  This might occur if the IPIN or OPIN used for sampling
-    // is specialized, and therefore cannot be reached via the by the pins
-    // sampled.  Leaving this value in the delay matrix will result in invalid
-    // slacks if the delay matrix uses this value.
-    //
-    // A max average distance of 5 is used to provide increased effort in
-    // filling these gaps.  It is more important to have a poor predication,
-    // than an invalid value and causing a slack assertion.
-    constexpr int kMaxAverageDistance = 5;
-    for (int from_layer_num = 0; from_layer_num < (int)delta_delays.dim_size(0); ++from_layer_num) {
-        for (int to_layer_num = 0; to_layer_num < (int)delta_delays.dim_size(1); ++to_layer_num) {
-            for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) {
-                for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) {
-                    if (delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] == IMPOSSIBLE_DELTA) {
-                        delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] = find_neighboring_average(
-                            delta_delays, from_layer_num, {delta_x, delta_y, to_layer_num}, kMaxAverageDistance);
-                    }
-                }
-            }
-        }
-    }
-}
-
-static vtr::NdMatrix<float, 4> compute_delta_delay_model(
-    RouterDelayProfiler& route_profiler,
-    const t_placer_opts& placer_opts,
-    const t_router_opts& router_opts,
-    bool measure_directconnect,
-    int longest_length,
-    bool is_flat) {
-    vtr::ScopedStartFinishTimer timer("Computing delta delays");
-    vtr::NdMatrix<float, 4> delta_delays = compute_delta_delays(route_profiler,
-                                                                placer_opts,
-                                                                router_opts,
-                                                                measure_directconnect,
-                                                                longest_length,
-                                                                is_flat);
-
-    fix_uninitialized_coordinates(delta_delays);
-
-    fix_empty_coordinates(delta_delays);
-
-    fill_impossible_coordinates(delta_delays);
-
-    verify_delta_delays(delta_delays);
-
-    return delta_delays;
-}
-
-static vtr::NdMatrix<float, 5> compute_simple_delay_model(RouterDelayProfiler& route_profiler) {
-    const auto& grid = g_vpr_ctx.device().grid;
-    int num_physical_tile_types = static_cast<int>(g_vpr_ctx.device().physical_tile_types.size());
-    // Initializing the delay matrix to [num_physical_types][num_layers][num_layers][width][height]
-    // The second index related to the layer that the source location is on and the third index is for the sink layer
-    vtr::NdMatrix<float, 5> delta_delays({static_cast<unsigned long>(num_physical_tile_types),
-                                          static_cast<unsigned long>(grid.get_num_layers()),
-                                          static_cast<unsigned long>(grid.get_num_layers()),
-                                          grid.width(),
-                                          grid.height()});
-
-    for (int physical_tile_type_idx = 0; physical_tile_type_idx < num_physical_tile_types; ++physical_tile_type_idx) {
-        for (int from_layer = 0; from_layer < grid.get_num_layers(); ++from_layer) {
-            for (int to_layer = 0; to_layer < grid.get_num_layers(); ++to_layer) {
-                for (int dx = 0; dx < static_cast<int>(grid.width()); ++dx) {
-                    for (int dy = 0; dy < static_cast<int>(grid.height()); ++dy) {
-                        float min_delay = route_profiler.get_min_delay(physical_tile_type_idx,
-                                                                       from_layer,
-                                                                       to_layer,
-                                                                       dx,
-                                                                       dy);
-                        delta_delays[physical_tile_type_idx][from_layer][to_layer][dx][dy] = min_delay;
-                    }
-                }
-            }
-        }
-    }
-
-    return delta_delays;
-}
-
-//Finds a src_rr and sink_rr appropriate for measuring the delay of the current direct specification
-static bool find_direct_connect_sample_locations(const t_direct_inf* direct,
-                                                 t_physical_tile_type_ptr from_type,
-                                                 int from_pin,
-                                                 int from_pin_class,
-                                                 t_physical_tile_type_ptr to_type,
-                                                 int to_pin,
-                                                 int to_pin_class,
-                                                 RRNodeId& out_src_node,
-                                                 RRNodeId& out_sink_node) {
-    VTR_ASSERT(from_type != nullptr);
-    VTR_ASSERT(to_type != nullptr);
-
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& grid = device_ctx.grid;
-    const auto& node_lookup = device_ctx.rr_graph.node_lookup();
-
-    //Search the grid for an instance of from/to blocks which satisfy this direct connect offsets,
-    //and which has the appropriate pins
-    int from_x = -1;
-    int from_y = -1;
-    int from_sub_tile = -1;
-    int to_x = 0, to_y = 0, to_sub_tile = 0;
-    bool found = false;
-    int found_layer_num = -1;
-    //TODO: Function *FOR NOW* assumes that from/to blocks are at same die and have a same layer nums
-    for (int layer_num = 0; layer_num < grid.get_num_layers() && !found; ++layer_num) {
-        for (int x = 0; x < (int)grid.width() && !found; ++x) {
-            to_x = x + direct->x_offset;
-            if (to_x < 0 || to_x >= (int)grid.width()) continue;
-
-            for (int y = 0; y < (int)grid.height() && !found; ++y) {
-                if (grid.get_physical_type({x, y, layer_num}) != from_type) continue;
-
-                //Check that the from pin exists at this from location
-                //(with multi-width/height blocks pins may not exist at all locations)
-                bool from_pin_found = false;
-                if (direct->from_side != NUM_2D_SIDES) {
-                    RRNodeId from_pin_rr = node_lookup.find_node(layer_num, x, y, OPIN, from_pin, direct->from_side);
-                    from_pin_found = from_pin_rr.is_valid();
-                } else {
-                    from_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, x, y, OPIN, from_pin).empty());
-                }
-                if (!from_pin_found) continue;
-
-                to_y = y + direct->y_offset;
-
-                if (to_y < 0 || to_y >= (int)grid.height()) continue;
-                if (grid.get_physical_type({to_x, to_y, layer_num}) != to_type) continue;
-
-                //Check that the from pin exists at this from location
-                //(with multi-width/height blocks pins may not exist at all locations)
-                bool to_pin_found = false;
-                if (direct->to_side != NUM_2D_SIDES) {
-                    RRNodeId to_pin_rr = node_lookup.find_node(layer_num, to_x, to_y, IPIN, to_pin, direct->to_side);
-                    to_pin_found = (to_pin_rr != RRNodeId::INVALID());
-                } else {
-                    to_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, to_x, to_y, IPIN, to_pin).empty());
-                }
-                if (!to_pin_found) continue;
-
-                for (int sub_tile_num = 0; sub_tile_num < from_type->capacity; ++sub_tile_num) {
-                    to_sub_tile = sub_tile_num + direct->sub_tile_offset;
-
-                    if (to_sub_tile < 0 || to_sub_tile >= to_type->capacity) continue;
-
-                    found = true;
-                    found_layer_num = layer_num;
-                    from_x = x;
-                    from_y = y;
-                    from_sub_tile = sub_tile_num;
-
-                    break;
-                }
-            }
-        }
-    }
-
-    if (!found) {
-        return false;
-    }
-
-    //Now have a legal instance of this direct connect
-    VTR_ASSERT(grid.get_physical_type({from_x, from_y, found_layer_num}) == from_type);
-    VTR_ASSERT(from_sub_tile < from_type->capacity);
-
-    VTR_ASSERT(grid.get_physical_type({to_x, to_y, found_layer_num}) == to_type);
-    VTR_ASSERT(to_sub_tile < to_type->capacity);
-
-    VTR_ASSERT(from_x + direct->x_offset == to_x);
-    VTR_ASSERT(from_y + direct->y_offset == to_y);
-    VTR_ASSERT(from_sub_tile + direct->sub_tile_offset == to_sub_tile);
-
-    //
-    //Find a source/sink RR node associated with the pins of the direct
-    //
-
-    {
-        RRNodeId src_rr_candidate = node_lookup.find_node(found_layer_num, from_x, from_y, SOURCE, from_pin_class);
-        VTR_ASSERT(src_rr_candidate);
-        out_src_node = src_rr_candidate;
-    }
-
-    {
-        RRNodeId sink_rr_candidate = node_lookup.find_node(found_layer_num, to_x, to_y, SINK, to_pin_class);
-        VTR_ASSERT(sink_rr_candidate);
-        out_sink_node = sink_rr_candidate;
-    }
-
-    return true;
-}
-
-static bool verify_delta_delays(const vtr::NdMatrix<float, 4>& delta_delays) {
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& grid = device_ctx.grid;
-
-    for (int from_layer_num = 0; from_layer_num < grid.get_num_layers(); ++from_layer_num) {
-        for (int to_layer_num = 0; to_layer_num < grid.get_num_layers(); ++to_layer_num) {
-            for (size_t x = 0; x < grid.width(); ++x) {
-                for (size_t y = 0; y < grid.height(); ++y) {
-                    float delta_delay = delta_delays[from_layer_num][to_layer_num][x][y];
-
-                    if (delta_delay < 0.) {
-                        VPR_ERROR(VPR_ERROR_PLACE,
-                                  "Found invaild negative delay %g for delta [%d,%d,%d,%d]",
-                                  delta_delay, from_layer_num, to_layer_num, x, y);
-                    }
-                }
-            }
-        }
-    }
-
-    return true;
-}
-
-void OverrideDelayModel::compute_override_delay_model(
-    RouterDelayProfiler& route_profiler,
-    const t_router_opts& router_opts) {
-    t_router_opts router_opts2 = router_opts;
-    router_opts2.astar_fac = 0.f;
-    router_opts2.astar_offset = 0.f;
-
-    //Look at all the direct connections that exist, and add overrides to delay model
-    auto& device_ctx = g_vpr_ctx.device();
-    for (int idirect = 0; idirect < (int)device_ctx.arch->directs.size(); ++idirect) {
-        const t_direct_inf* direct = &device_ctx.arch->directs[idirect];
-
-        InstPort from_port = parse_inst_port(direct->from_pin);
-        InstPort to_port = parse_inst_port(direct->to_pin);
-
-        t_physical_tile_type_ptr from_type = find_tile_type_by_name(from_port.instance_name(), device_ctx.physical_tile_types);
-        t_physical_tile_type_ptr to_type = find_tile_type_by_name(to_port.instance_name(), device_ctx.physical_tile_types);
-
-        int num_conns = from_port.port_high_index() - from_port.port_low_index() + 1;
-        VTR_ASSERT_MSG(num_conns == to_port.port_high_index() - to_port.port_low_index() + 1, "Directs must have the same size to/from");
-
-        //We now walk through all the connections associated with the current direct specification, measure
-        //their delay and specify that value as an override in the delay model.
-        //
-        //Note that we need to check every connection in the direct to cover the case where the pins are not
-        //equivalent.
-        //
-        //However, if the from/to ports are equivalent we could end up sampling the same RR SOURCE/SINK
-        //paths multiple times (wasting CPU time) -- we avoid this by recording the sampled paths in
-        //sampled_rr_pairs and skipping them if they occur multiple times.
-        int missing_instances = 0;
-        int missing_paths = 0;
-        std::set<std::pair<RRNodeId, RRNodeId>> sampled_rr_pairs;
-        for (int iconn = 0; iconn < num_conns; ++iconn) {
-            //Find the associated pins
-            int from_pin = find_pin(from_type, from_port.port_name(), from_port.port_low_index() + iconn);
-            int to_pin = find_pin(to_type, to_port.port_name(), to_port.port_low_index() + iconn);
-
-            VTR_ASSERT(from_pin != OPEN);
-            VTR_ASSERT(to_pin != OPEN);
-
-            int from_pin_class = find_pin_class(from_type, from_port.port_name(), from_port.port_low_index() + iconn, DRIVER);
-            VTR_ASSERT(from_pin_class != OPEN);
-
-            int to_pin_class = find_pin_class(to_type, to_port.port_name(), to_port.port_low_index() + iconn, RECEIVER);
-            VTR_ASSERT(to_pin_class != OPEN);
-
-            bool found_sample_points;
-            RRNodeId src_rr, sink_rr;
-            found_sample_points = find_direct_connect_sample_locations(direct, from_type, from_pin, from_pin_class, to_type, to_pin, to_pin_class, src_rr, sink_rr);
-
-            if (!found_sample_points) {
-                ++missing_instances;
-                continue;
-            }
-
-            //If some of the source/sink ports are logically equivalent we may have already
-            //sampled the associated source/sink pair and don't need to do so again
-            if (sampled_rr_pairs.count({src_rr, sink_rr})) continue;
-
-            float direct_connect_delay = std::numeric_limits<float>::quiet_NaN();
-            bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay);
-
-            if (found_routing_path) {
-                set_delay_override(from_type->index, from_pin_class, to_type->index, to_pin_class, direct->x_offset, direct->y_offset, direct_connect_delay);
-            } else {
-                ++missing_paths;
-            }
-
-            //Record that we've sampled this pair of source and sink nodes
-            sampled_rr_pairs.insert({src_rr, sink_rr});
-        }
-
-        VTR_LOGV_WARN(missing_instances > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no instances of this direct found)\n", missing_instances, direct->name.c_str());
-        VTR_LOGV_WARN(missing_paths > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no routing path found)\n", missing_paths, direct->name.c_str());
-    }
-}
-
-bool directconnect_exists(RRNodeId src_rr_node, RRNodeId sink_rr_node) {
-    //Returns true if there is a directconnect between the two RR nodes
-    //
-    //This is checked by looking for a SOURCE -> OPIN -> IPIN -> SINK path
-    //which starts at src_rr_node and ends at sink_rr_node
-    auto& device_ctx = g_vpr_ctx.device();
-    const auto& rr_graph = device_ctx.rr_graph;
-
-    VTR_ASSERT(rr_graph.node_type(src_rr_node) == SOURCE && rr_graph.node_type(sink_rr_node) == SINK);
-
-    //TODO: This is a constant depth search, but still may be too slow
-    for (t_edge_size i_src_edge = 0; i_src_edge < rr_graph.num_edges(src_rr_node); ++i_src_edge) {
-        RRNodeId opin_rr_node = rr_graph.edge_sink_node(src_rr_node, i_src_edge);
-
-        if (rr_graph.node_type(opin_rr_node) != OPIN) continue;
-
-        for (t_edge_size i_opin_edge = 0; i_opin_edge < rr_graph.num_edges(opin_rr_node); ++i_opin_edge) {
-            RRNodeId ipin_rr_node = rr_graph.edge_sink_node(opin_rr_node, i_opin_edge);
-            if (rr_graph.node_type(ipin_rr_node) != IPIN) continue;
-
-            for (t_edge_size i_ipin_edge = 0; i_ipin_edge < rr_graph.num_edges(ipin_rr_node); ++i_ipin_edge) {
-                if (sink_rr_node == rr_graph.edge_sink_node(ipin_rr_node, i_ipin_edge)) {
-                    return true;
-                }
-            }
-        }
-    }
-    return false;
-}
diff --git a/vpr/src/place/timing_place_lookup.h b/vpr/src/place/timing_place_lookup.h
deleted file mode 100644
index fba3f470483..00000000000
--- a/vpr/src/place/timing_place_lookup.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef TIMING_PLACE_LOOKUP_H
-#define TIMING_PLACE_LOOKUP_H
-#include "place_delay_model.h"
-
-std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
-                                                           const t_router_opts& router_opts,
-                                                           const Netlist<>& net_list,
-                                                           t_det_routing_arch* det_routing_arch,
-                                                           std::vector<t_segment_inf>& segment_inf,
-                                                           t_chan_width_dist chan_width_dist,
-                                                           const std::vector<t_direct_inf>& directs,
-                                                           bool is_flat);
-
-std::vector<int> get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type);
-
-bool directconnect_exists(RRNodeId src_rr_node, RRNodeId sink_rr_node);
-
-#endif
diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp
index 5feb0e9b2f6..f9c4c1d74a8 100644
--- a/vpr/src/route/router_delay_profiling.cpp
+++ b/vpr/src/route/router_delay_profiling.cpp
@@ -6,7 +6,6 @@
 #include "route_tree.h"
 #include "rr_graph.h"
 #include "vtr_time.h"
-#include "draw.h"
 
 RouterDelayProfiler::RouterDelayProfiler(const Netlist<>& net_list,
                                          const RouterLookahead* lookahead,
diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index c2aa98286c0..430b386562f 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -708,7 +708,7 @@ InstPort parse_inst_port(const std::string& str) {
         VPR_FATAL_ERROR(VPR_ERROR_ARCH, "Failed to find block type named %s", inst_port.instance_name().c_str());
     }
 
-    int num_pins = find_tile_port_by_name(blk_type, inst_port.port_name().c_str()).num_pins;
+    int num_pins = find_tile_port_by_name(blk_type, inst_port.port_name()).num_pins;
 
     if (num_pins == OPEN) {
         VPR_FATAL_ERROR(VPR_ERROR_ARCH, "Failed to find port %s on block type %s", inst_port.port_name().c_str(), inst_port.instance_name().c_str());
@@ -1857,6 +1857,33 @@ bool node_in_same_physical_tile(RRNodeId node_first, RRNodeId node_second) {
     }
 }
 
+bool directconnect_exists(RRNodeId src_rr_node, RRNodeId sink_rr_node) {
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& rr_graph = device_ctx.rr_graph;
+
+    VTR_ASSERT(rr_graph.node_type(src_rr_node) == SOURCE && rr_graph.node_type(sink_rr_node) == SINK);
+
+    // A direct connection is defined as a specific path: `SOURCE -> OPIN -> IPIN -> SINK`.
+    //TODO: This is a constant depth search, but still may be too slow
+    for (t_edge_size i_src_edge = 0; i_src_edge < rr_graph.num_edges(src_rr_node); ++i_src_edge) {
+        RRNodeId opin_rr_node = rr_graph.edge_sink_node(src_rr_node, i_src_edge);
+
+        if (rr_graph.node_type(opin_rr_node) != OPIN) continue;
+
+        for (t_edge_size i_opin_edge = 0; i_opin_edge < rr_graph.num_edges(opin_rr_node); ++i_opin_edge) {
+            RRNodeId ipin_rr_node = rr_graph.edge_sink_node(opin_rr_node, i_opin_edge);
+            if (rr_graph.node_type(ipin_rr_node) != IPIN) continue;
+
+            for (t_edge_size i_ipin_edge = 0; i_ipin_edge < rr_graph.num_edges(ipin_rr_node); ++i_ipin_edge) {
+                if (sink_rr_node == rr_graph.edge_sink_node(ipin_rr_node, i_ipin_edge)) {
+                    return true;
+                }
+            }
+        }
+    }
+    return false;
+}
+
 std::vector<int> get_cluster_netlist_intra_tile_classes_at_loc(int layer,
                                                                int i,
                                                                int j,
diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h
index 8869cc55ddd..abaafadbfe7 100644
--- a/vpr/src/util/vpr_utils.h
+++ b/vpr/src/util/vpr_utils.h
@@ -264,9 +264,28 @@ RRNodeId get_class_rr_node_id(const RRSpatialLookup& rr_spatial_lookup,
                               const int j,
                               int class_physical_num);
 
-// Check whether the given nodes are in the same cluster
+/// @brief Check whether the given nodes are in the same cluster
 bool node_in_same_physical_tile(RRNodeId node_first, RRNodeId node_second);
 
+/**
+ * @brief Checks if a direct connection exists between two RR nodes.
+ *
+ * A direct connection is defined as a specific path: `SOURCE -> OPIN -> IPIN -> SINK`.
+ *
+ * @param src_rr_node The source RR node (must be of type `SOURCE`).
+ * @param sink_rr_node The sink RR node (must be of type `SINK`).
+ *
+ * @return `true` if a direct connection exists between the source and sink nodes;
+ *         otherwise, `false`.
+ *
+ * @details
+ * - The function performs a depth-limited search starting from the source node,
+ *   traversing through OPIN, IPIN, and finally checking if the path reaches the sink node.
+ * - Ensures the specified node types are respected (e.g., source node must be of type `SOURCE`).
+ */
+
+bool directconnect_exists(RRNodeId src_rr_node, RRNodeId sink_rr_node);
+
 std::vector<int> get_cluster_netlist_intra_tile_classes_at_loc(int layer,
                                                                int i,
                                                                int j,
diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp
index a106ad80a80..2b584daedc3 100644
--- a/vpr/test/test_connection_router.cpp
+++ b/vpr/test/test_connection_router.cpp
@@ -8,7 +8,6 @@
 #include "globals.h"
 #include "net_delay.h"
 #include "place_and_route.h"
-#include "timing_place_lookup.h"
 
 static constexpr const char kArchFile[] = "../../vtr_flow/arch/timing/k6_frac_N10_mem32K_40nm.xml";
 static constexpr int kMaxHops = 10;
@@ -188,8 +187,7 @@ TEST_CASE("connection_router", "[vpr]") {
 
     // Clean up
     free_routing_structs();
-    vpr_free_all(arch,
-                 vpr_setup);
+    vpr_free_all(arch, vpr_setup);
 }
 
 } // namespace
diff --git a/vpr/test/test_post_verilog.cpp b/vpr/test/test_post_verilog.cpp
index a8344fa79d4..ca1a250b7d2 100644
--- a/vpr/test/test_post_verilog.cpp
+++ b/vpr/test/test_post_verilog.cpp
@@ -1,7 +1,7 @@
 #include "catch2/catch_test_macros.hpp"
 
 #include "vpr_api.h"
-#include "timing_place_lookup.h"
+#include "router_delay_profiling.h"
 
 #include <fstream>
 #include <memory>