rename directories to yxi/axi-calyx

calyxir · Dec 21, 2023 · 45b67b3 · 45b67b3
1 parent b1a9411
commit 45b67b3
Show file tree

Hide file tree

Showing 3 changed files with 584 additions and 0 deletions.
diff --git a/yxi/axi-calyx/axi-reads-calyx.futil b/yxi/axi-calyx/axi-reads-calyx.futil
@@ -0,0 +1,373 @@
+// ###
+// This file contains the components needed to perform read transacitons via AXI.
+// Current goal is to create a cocotb testbench that tests correctness of this.
+// See https://github.com/cucapra/calyx/issues/1733 for more information.
+//
+// This wrapper assumes it is part of a dot product computation with vectors of
+// length 16
+// It assumes a bus data width of 32
+// This is largely a work in progress and as of Nov 20 2023 is not intended to
+// actually be used for anything
+// ###
+
+import "primitives/core.futil";
+import "primitives/compile.futil";
+import "primitives/math.futil";
+import "primitives/memories.futil";
+
+
+//this goes m->s unlike read channel
+component m_arread_channel(
+  ARESET: 1,
+  ARREADY: 1
+) -> (
+  ARVALID: 1,
+  // This needs to be 64, see link below `m_axi` section.
+  ARADDR: 64,
+  // 2^ARSIZE is bytes used in transfer. For memory-mapped AXI (which is what we
+  // are doing I believe), should match width of data bus (to shell?, so 32 wide? This
+  // is 3'b010)
+  // see https://docs.xilinx.com/r/en-US/ug1393-vitis-application-acceleration/Kernel-Interface-Requirements
+  // for restrictions
+  ARSIZE: 3,
+  // in AXI4 this is 8 bits, 1-256 transfers in requested transaction.
+  ARLEN : 8, 
+  // 00 for fixed, 01 for incrementing, 2 for wrap,
+  // needs to be incr for RTL kernels (can't use wrapped of fixed
+  ARBURST : 2,
+  // required by spec. We hardwire this to priviliged access, non secure, data access.
+  ARPROT : 3) {
+  cells{
+      is_arvalid = std_reg(1);
+
+      // gets set high with ARVALID and remains high
+      arvalid_was_high = std_reg(1);
+      // TODO(nathanielnrn): should arguably eventually live in `s_axi_control`
+      // but for now will live here.
+      ref base_addr = std_reg(64);
+
+      // number of trasfers in a transaction. This is sent to subordinate
+      txn_len = std_reg(8);
+
+      // number of txns we want to occur before entire m_arread_channel is done
+      // this is internal to the channel (unlike txn_len)
+      txn_n = std_const(32,1);
+      txn_count = std_reg(32);
+      perform_reads = std_neq(32);
+      txn_adder = std_add(32);
+
+      //"block_transfer" register. need to put into a reg to avoid combinational loops
+      bt_reg = std_reg(1);
+
+
+  }
+
+  wires{
+
+      ARVALID = is_arvalid.out;
+
+      group deassert_val {
+          is_arvalid.in = 1'b0;
+          is_arvalid.write_en = 1'b1;
+          deassert_val[done] = is_arvalid.done;
+      }
+
+      group reset_bt {
+          bt_reg.in = 1'b0;
+          bt_reg.write_en = 1'b1;
+          reset_bt[done] = bt_reg.done;
+      }
+
+      // this asserts valid and defines all inputs correctly
+      // because valid should not be deasserted until handshake occurs
+      // this all needs to be one group
+      // this contains blocking logic previously in its own group
+      group do_ar_transfer {
+          //assert ARVALID
+          is_arvalid.in = !(is_arvalid.out & ARREADY) & !arvalid_was_high.out ? 1'b1;
+
+          // TODO(nathanielnrn): in theory should be able to get rid of arvalid_was_high
+          // but for now we will be explicit and reduce this in generation maybe. Not sure
+          // it even matters.
+          // This makes ARVALID go low after a single cycle. Without it it stays high for 2.
+          is_arvalid.in = is_arvalid.out & ARREADY & arvalid_was_high.out ? 1'b0;
+          is_arvalid.write_en = 1'b1;
+
+          arvalid_was_high.in = !(is_arvalid.out & ARREADY) & !arvalid_was_high.out ? 1'b1;
+          arvalid_was_high.write_en = !(is_arvalid.out & ARREADY) & !arvalid_was_high.out ? 1'b1;
+
+
+        // drive output signals for transfer  
+          ARADDR = base_addr.out;
+          // see link above, needs to match data width to host.
+          // In this case 2^2 = 4 bytes = 32 bits = width of our data_bus.
+          ARSIZE = 3'b010; 
+          // For now this can be taken from .yxi, as size of mem, because we are assuming
+          // data_bus width that matches size of memory cells
+          // If we want to use bigger mems need to be able to update base addr
+          ARLEN = txn_len.out;
+          ARBURST = 2'b01; //incr
+          // privileged, non-secure, instruction access
+          ARPROT = 3'b110;
+
+
+          //done when one cycle after handshake (handshake happens for a single cycle)
+          bt_reg.in = ARREADY & is_arvalid.out ? 1'b1;
+          bt_reg.in = !(ARREADY & is_arvalid.out) ? 1'b0;
+          bt_reg.write_en = 1'b1;
+          do_ar_transfer[done] = bt_reg.out;
+      }
+
+
+      //txn bookkeeping.
+      //We are done performing reads when txn_count == txn_n
+      group txn_count_init {
+          txn_count.in = 32'b0;
+          txn_count.write_en = 1'b1;
+          txn_count_init[done] = txn_count.done;
+
+      }
+
+      group txn_len_init {
+          //TODO(nathanielnrn): 15 is good for word wide data bus. We'd
+          //expect 16 transfers. Number of transfers that occur is ARLEN + 1
+          txn_len.in = 8'd15;
+          txn_len.write_en = 1'b1;
+          txn_len_init[done] = txn_len.done;
+      }
+
+      group txn_incr {
+          txn_adder.left = txn_count.out;
+          txn_adder.right = 32'b1;
+          txn_count.in = txn_adder.out;
+          txn_count.write_en = 1'b1;
+          txn_incr[done] = txn_count.done;
+
+      }
+
+      comb group check_reads_done {
+          perform_reads.left = txn_count.out;
+          perform_reads.right = txn_n.out;
+      }
+  }
+
+  control{
+      //XXX(nathanielnrn): What is best way to offer more flexiblity beyond just a counter?
+      seq{
+          txn_count_init;
+          txn_len_init;
+          while perform_reads.out with check_reads_done{
+              seq{
+                  reset_bt;
+                  do_ar_transfer;
+                  deassert_val;
+                  txn_incr;
+              }
+          }
+      }
+  }
+}
+
+
+
+
+component m_read_channel(
+  ARESET : 1,
+  RVALID : 1,
+  RLAST  : 1,
+  RDATA  : 32, 
+  RRESP :  2,  // Note: This is generated in subordinate! had this backwards in earlier version
+) -> (
+  // NOTE: In general, according to ZipCPU we want xREADY signals to be registered
+  // because (IIRC) it helps avoid combinational loops between READY and VALID.
+  RREADY : 1,
+) {
+  cells {
+      // 16 is due to dot-product vector length assumption
+      // For this manual implementation we are just writing into this data based
+      // on the data we read from cocotb
+      ref data_received = seq_mem_d1(32, 16, 64);
+      is_rdy = std_reg(1);
+      ref curr_addr = std_reg(64); 
+
+      // registered because RLAST is high with last transfer, not after
+      // before this was registered we were terminating immediately with
+      // last transfer and not servicing it
+      n_RLAST = std_reg(1);
+
+      // TODO: get this width from yxi
+      read_data_reg = std_reg(32);
+
+      //address of seq_d1_mem we are writing to
+      curr_addr_adder = std_add(64);
+
+      // block_transfer reg to avoid combinational loops
+      bt_reg = std_reg(1);
+
+  }
+  wires{
+
+      RREADY = is_rdy.out;
+      data_received.read_en = 1'b0;
+
+      group init_n_RLAST {
+          n_RLAST.in = 1'b1;
+          n_RLAST.write_en = 1'b1;
+          init_n_RLAST[done] = n_RLAST.done;
+      }
+
+      // Used to block any servicing until handshake occurs. 
+      group reset_bt {
+          bt_reg.in = 1'b0;
+          bt_reg.write_en = 1'b1;
+          reset_bt[done] = bt_reg.done;
+      }
+
+      // NOTE: xVALID signals must be high until xREADY is high as well, so this works
+      // because if xREADY is high (is_rdy.out) then RVALID being high makes 1 flip
+      // and group will be done by bt_reg.out
+      group block_transfer {
+        // set RREADY high
+        // TODO (nathanielnrn): technically we can make RREADY depend on on RVALID (but not vice versa).
+        // Could we simplify this we just making things ready when we are in
+        // block_transfer && RVALID?
+
+        //NOTE: is_rdy.in = 1'b1; does not work, it leaves RREADY high for 2 cycles
+        // this both asserts and deasserts one cycle later
+        // TODO(nathanielnrn): Spec recommends defaulting xREADY high as it
+        // can get rid of extra cycle. Maybe doing so here would be useful?
+        // as opposed to waiting for RVALID
+        is_rdy.in = !(RVALID & is_rdy.out) ? 1'b1;
+        is_rdy.in = RVALID & is_rdy.out ? 1'b0;
+        is_rdy.write_en = 1'b1;
+
+
+        //store the data we want to write
+        read_data_reg.in = RDATA;
+        read_data_reg.write_en = is_rdy.out;
+
+        //update n_RLAST reg
+        n_RLAST.in = RLAST ? 1'b0;
+        n_RLAST.in = !RLAST ? 1'b1;
+        n_RLAST.write_en = 1'b1;
+
+
+        // we are done after handshake
+        bt_reg.in = is_rdy.out & RVALID ? 1'b1;
+        bt_reg.in = !(is_rdy.out & RVALID) ? 1'b0;
+        bt_reg.write_en = 1'b1;
+        block_transfer[done] = bt_reg.out;
+      }
+
+      group receive_r_transfer{
+          // keep RREADY low;
+          is_rdy.in = 1'b0;
+          is_rdy.write_en = 1'b1;
+
+          //write the data we received during transfer to seq_d1_mem
+          data_received.addr0 = curr_addr.out;
+          data_received.write_en = 1'b1;
+          data_received.write_data = read_data_reg.out;
+          receive_r_transfer[done] = data_received.write_done;
+
+      }
+
+      group incr_curr_addr{
+          curr_addr_adder.left = 64'd1 ;
+          curr_addr_adder.right = curr_addr.out;
+          curr_addr.in = curr_addr_adder.out;
+          curr_addr.write_en = 1'b1;
+          incr_curr_addr[done] = curr_addr.done;
+      }
+  }
+  control{
+      init_n_RLAST;
+      while n_RLAST.out{
+          seq{
+              reset_bt;
+              block_transfer;
+              receive_r_transfer;
+              incr_curr_addr;
+          }
+      }
+  }
+}
+
+//TODO(nathanielnrn): this is axi_wrapper, prefer to use @toplevel attribute but its not working
+// See individual channel components for explanations of signals
+component main(
+    m_ARESET : 1,
+    m_ARREADY : 1,
+
+    m_RVALID : 1,
+    m_RLAST : 1,
+    m_RDATA : 32,
+    m_RRESP : 2,
+    //NOTE: Only used for cocotb compatability, doesn't do anything within the wrapper itself currently.
+    m_RID : 1,
+) -> (
+    m_ARVALID : 1,
+    m_ARADDR: 64,
+    m_ARSIZE: 3,
+    m_ARLEN : 8, 
+    m_ARBURST : 2,
+
+    m_RREADY : 1,
+    //NOTE: Only used for cocotb compatability, doesn't do anything within the wrapper itself currently.
+    m_ARID : 1
+) {
+    cells{
+        vec1_data = seq_mem_d1(32,16,64);
+        output_data = seq_mem_d1(32,1,0);
+
+        curr_addr = std_reg(64);
+        base_addr = std_reg(64);
+
+        read_channel = m_read_channel();
+        arread_channel = m_arread_channel();
+
+    }
+
+    wires{
+
+        m_ARID = 1'b0;
+
+        group set_curr_to_base_addr{
+            curr_addr.in = base_addr.out;
+            curr_addr.write_en = 1'b1;
+            set_curr_to_base_addr[done] = curr_addr.done;
+        }
+    }
+    control{
+        seq{
+            invoke arread_channel[base_addr = base_addr]
+            (
+            ARESET = m_ARESET,
+            ARREADY = m_ARREADY
+            )
+            (
+            ARVALID = m_ARVALID,
+            ARADDR = m_ARADDR,
+            ARSIZE = m_ARSIZE,
+            ARLEN = m_ARLEN,
+            ARBURST = m_ARBURST
+            );
+
+            set_curr_to_base_addr;
+
+            invoke read_channel[data_received = vec1_data, curr_addr = curr_addr]
+            (
+            ARESET = m_ARESET,
+            RVALID = m_RVALID,
+            RLAST = m_RLAST,
+            RDATA = m_RDATA,
+            RRESP = m_RRESP
+            )
+            (
+            RREADY = m_RREADY
+            );
+        }
+    }
+
+
+}
diff --git a/yxi/axi-calyx/cocotb/Makefile b/yxi/axi-calyx/cocotb/Makefile
@@ -0,0 +1,23 @@
+# Makefile
+
+# defaults
+SIM ?= icarus
+TOPLEVEL_LANG ?= verilog
+
+
+#Needed to extract desired test from runt invocation
+
+VERILOG_SOURCES += $(PWD)/../outputs/axi-reads.v
+
+#Defines build directory, if left to default only a single computation is run
+SIM_BUILD=sim_build/axi-reads
+
+# TOPLEVEL is the name of the toplevel module in your Verilog or VHDL file
+TOPLEVEL = main
+
+# MODULE is the basename of the Python test file
+MODULE = axi-read-tests
+
+
+# include cocotb's make rules to take care of the simulator setup
+include $(shell cocotb-config --makefiles)/Makefile.sim