Skip to content

Commit

Permalink
rename directories to yxi/axi-calyx
Browse files Browse the repository at this point in the history
  • Loading branch information
nathanielnrn committed Dec 21, 2023
1 parent b1a9411 commit 45b67b3
Show file tree
Hide file tree
Showing 3 changed files with 584 additions and 0 deletions.
373 changes: 373 additions & 0 deletions yxi/axi-calyx/axi-reads-calyx.futil
Original file line number Diff line number Diff line change
@@ -0,0 +1,373 @@
// ###
// This file contains the components needed to perform read transacitons via AXI.
// Current goal is to create a cocotb testbench that tests correctness of this.
// See https://github.com/cucapra/calyx/issues/1733 for more information.
//
// This wrapper assumes it is part of a dot product computation with vectors of
// length 16
// It assumes a bus data width of 32
// This is largely a work in progress and as of Nov 20 2023 is not intended to
// actually be used for anything
// ###

import "primitives/core.futil";
import "primitives/compile.futil";
import "primitives/math.futil";
import "primitives/memories.futil";


//this goes m->s unlike read channel
component m_arread_channel(
ARESET: 1,
ARREADY: 1
) -> (
ARVALID: 1,
// This needs to be 64, see link below `m_axi` section.
ARADDR: 64,
// 2^ARSIZE is bytes used in transfer. For memory-mapped AXI (which is what we
// are doing I believe), should match width of data bus (to shell?, so 32 wide? This
// is 3'b010)
// see https://docs.xilinx.com/r/en-US/ug1393-vitis-application-acceleration/Kernel-Interface-Requirements
// for restrictions
ARSIZE: 3,
// in AXI4 this is 8 bits, 1-256 transfers in requested transaction.
ARLEN : 8,
// 00 for fixed, 01 for incrementing, 2 for wrap,
// needs to be incr for RTL kernels (can't use wrapped of fixed
ARBURST : 2,
// required by spec. We hardwire this to priviliged access, non secure, data access.
ARPROT : 3) {
cells{
is_arvalid = std_reg(1);

// gets set high with ARVALID and remains high
arvalid_was_high = std_reg(1);
// TODO(nathanielnrn): should arguably eventually live in `s_axi_control`
// but for now will live here.
ref base_addr = std_reg(64);

// number of trasfers in a transaction. This is sent to subordinate
txn_len = std_reg(8);

// number of txns we want to occur before entire m_arread_channel is done
// this is internal to the channel (unlike txn_len)
txn_n = std_const(32,1);
txn_count = std_reg(32);
perform_reads = std_neq(32);
txn_adder = std_add(32);

//"block_transfer" register. need to put into a reg to avoid combinational loops
bt_reg = std_reg(1);


}

wires{

ARVALID = is_arvalid.out;

group deassert_val {
is_arvalid.in = 1'b0;
is_arvalid.write_en = 1'b1;
deassert_val[done] = is_arvalid.done;
}

group reset_bt {
bt_reg.in = 1'b0;
bt_reg.write_en = 1'b1;
reset_bt[done] = bt_reg.done;
}

// this asserts valid and defines all inputs correctly
// because valid should not be deasserted until handshake occurs
// this all needs to be one group
// this contains blocking logic previously in its own group
group do_ar_transfer {
//assert ARVALID
is_arvalid.in = !(is_arvalid.out & ARREADY) & !arvalid_was_high.out ? 1'b1;

// TODO(nathanielnrn): in theory should be able to get rid of arvalid_was_high
// but for now we will be explicit and reduce this in generation maybe. Not sure
// it even matters.
// This makes ARVALID go low after a single cycle. Without it it stays high for 2.
is_arvalid.in = is_arvalid.out & ARREADY & arvalid_was_high.out ? 1'b0;
is_arvalid.write_en = 1'b1;

arvalid_was_high.in = !(is_arvalid.out & ARREADY) & !arvalid_was_high.out ? 1'b1;
arvalid_was_high.write_en = !(is_arvalid.out & ARREADY) & !arvalid_was_high.out ? 1'b1;


// drive output signals for transfer
ARADDR = base_addr.out;
// see link above, needs to match data width to host.
// In this case 2^2 = 4 bytes = 32 bits = width of our data_bus.
ARSIZE = 3'b010;
// For now this can be taken from .yxi, as size of mem, because we are assuming
// data_bus width that matches size of memory cells
// If we want to use bigger mems need to be able to update base addr
ARLEN = txn_len.out;
ARBURST = 2'b01; //incr
// privileged, non-secure, instruction access
ARPROT = 3'b110;


//done when one cycle after handshake (handshake happens for a single cycle)
bt_reg.in = ARREADY & is_arvalid.out ? 1'b1;
bt_reg.in = !(ARREADY & is_arvalid.out) ? 1'b0;
bt_reg.write_en = 1'b1;
do_ar_transfer[done] = bt_reg.out;
}


//txn bookkeeping.
//We are done performing reads when txn_count == txn_n
group txn_count_init {
txn_count.in = 32'b0;
txn_count.write_en = 1'b1;
txn_count_init[done] = txn_count.done;

}

group txn_len_init {
//TODO(nathanielnrn): 15 is good for word wide data bus. We'd
//expect 16 transfers. Number of transfers that occur is ARLEN + 1
txn_len.in = 8'd15;
txn_len.write_en = 1'b1;
txn_len_init[done] = txn_len.done;
}

group txn_incr {
txn_adder.left = txn_count.out;
txn_adder.right = 32'b1;
txn_count.in = txn_adder.out;
txn_count.write_en = 1'b1;
txn_incr[done] = txn_count.done;

}

comb group check_reads_done {
perform_reads.left = txn_count.out;
perform_reads.right = txn_n.out;
}
}

control{
//XXX(nathanielnrn): What is best way to offer more flexiblity beyond just a counter?
seq{
txn_count_init;
txn_len_init;
while perform_reads.out with check_reads_done{
seq{
reset_bt;
do_ar_transfer;
deassert_val;
txn_incr;
}
}
}
}
}




component m_read_channel(
ARESET : 1,
RVALID : 1,
RLAST : 1,
RDATA : 32,
RRESP : 2, // Note: This is generated in subordinate! had this backwards in earlier version
) -> (
// NOTE: In general, according to ZipCPU we want xREADY signals to be registered
// because (IIRC) it helps avoid combinational loops between READY and VALID.
RREADY : 1,
) {
cells {
// 16 is due to dot-product vector length assumption
// For this manual implementation we are just writing into this data based
// on the data we read from cocotb
ref data_received = seq_mem_d1(32, 16, 64);
is_rdy = std_reg(1);
ref curr_addr = std_reg(64);

// registered because RLAST is high with last transfer, not after
// before this was registered we were terminating immediately with
// last transfer and not servicing it
n_RLAST = std_reg(1);

// TODO: get this width from yxi
read_data_reg = std_reg(32);

//address of seq_d1_mem we are writing to
curr_addr_adder = std_add(64);

// block_transfer reg to avoid combinational loops
bt_reg = std_reg(1);

}
wires{

RREADY = is_rdy.out;
data_received.read_en = 1'b0;

group init_n_RLAST {
n_RLAST.in = 1'b1;
n_RLAST.write_en = 1'b1;
init_n_RLAST[done] = n_RLAST.done;
}

// Used to block any servicing until handshake occurs.
group reset_bt {
bt_reg.in = 1'b0;
bt_reg.write_en = 1'b1;
reset_bt[done] = bt_reg.done;
}

// NOTE: xVALID signals must be high until xREADY is high as well, so this works
// because if xREADY is high (is_rdy.out) then RVALID being high makes 1 flip
// and group will be done by bt_reg.out
group block_transfer {
// set RREADY high
// TODO (nathanielnrn): technically we can make RREADY depend on on RVALID (but not vice versa).
// Could we simplify this we just making things ready when we are in
// block_transfer && RVALID?

//NOTE: is_rdy.in = 1'b1; does not work, it leaves RREADY high for 2 cycles
// this both asserts and deasserts one cycle later
// TODO(nathanielnrn): Spec recommends defaulting xREADY high as it
// can get rid of extra cycle. Maybe doing so here would be useful?
// as opposed to waiting for RVALID
is_rdy.in = !(RVALID & is_rdy.out) ? 1'b1;
is_rdy.in = RVALID & is_rdy.out ? 1'b0;
is_rdy.write_en = 1'b1;


//store the data we want to write
read_data_reg.in = RDATA;
read_data_reg.write_en = is_rdy.out;

//update n_RLAST reg
n_RLAST.in = RLAST ? 1'b0;
n_RLAST.in = !RLAST ? 1'b1;
n_RLAST.write_en = 1'b1;


// we are done after handshake
bt_reg.in = is_rdy.out & RVALID ? 1'b1;
bt_reg.in = !(is_rdy.out & RVALID) ? 1'b0;
bt_reg.write_en = 1'b1;
block_transfer[done] = bt_reg.out;
}

group receive_r_transfer{
// keep RREADY low;
is_rdy.in = 1'b0;
is_rdy.write_en = 1'b1;

//write the data we received during transfer to seq_d1_mem
data_received.addr0 = curr_addr.out;
data_received.write_en = 1'b1;
data_received.write_data = read_data_reg.out;
receive_r_transfer[done] = data_received.write_done;

}

group incr_curr_addr{
curr_addr_adder.left = 64'd1 ;
curr_addr_adder.right = curr_addr.out;
curr_addr.in = curr_addr_adder.out;
curr_addr.write_en = 1'b1;
incr_curr_addr[done] = curr_addr.done;
}
}
control{
init_n_RLAST;
while n_RLAST.out{
seq{
reset_bt;
block_transfer;
receive_r_transfer;
incr_curr_addr;
}
}
}
}

//TODO(nathanielnrn): this is axi_wrapper, prefer to use @toplevel attribute but its not working
// See individual channel components for explanations of signals
component main(
m_ARESET : 1,
m_ARREADY : 1,

m_RVALID : 1,
m_RLAST : 1,
m_RDATA : 32,
m_RRESP : 2,
//NOTE: Only used for cocotb compatability, doesn't do anything within the wrapper itself currently.
m_RID : 1,
) -> (
m_ARVALID : 1,
m_ARADDR: 64,
m_ARSIZE: 3,
m_ARLEN : 8,
m_ARBURST : 2,

m_RREADY : 1,
//NOTE: Only used for cocotb compatability, doesn't do anything within the wrapper itself currently.
m_ARID : 1
) {
cells{
vec1_data = seq_mem_d1(32,16,64);
output_data = seq_mem_d1(32,1,0);

curr_addr = std_reg(64);
base_addr = std_reg(64);

read_channel = m_read_channel();
arread_channel = m_arread_channel();

}

wires{

m_ARID = 1'b0;

group set_curr_to_base_addr{
curr_addr.in = base_addr.out;
curr_addr.write_en = 1'b1;
set_curr_to_base_addr[done] = curr_addr.done;
}
}
control{
seq{
invoke arread_channel[base_addr = base_addr]
(
ARESET = m_ARESET,
ARREADY = m_ARREADY
)
(
ARVALID = m_ARVALID,
ARADDR = m_ARADDR,
ARSIZE = m_ARSIZE,
ARLEN = m_ARLEN,
ARBURST = m_ARBURST
);

set_curr_to_base_addr;

invoke read_channel[data_received = vec1_data, curr_addr = curr_addr]
(
ARESET = m_ARESET,
RVALID = m_RVALID,
RLAST = m_RLAST,
RDATA = m_RDATA,
RRESP = m_RRESP
)
(
RREADY = m_RREADY
);
}
}


}
23 changes: 23 additions & 0 deletions yxi/axi-calyx/cocotb/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Makefile

# defaults
SIM ?= icarus
TOPLEVEL_LANG ?= verilog


#Needed to extract desired test from runt invocation

VERILOG_SOURCES += $(PWD)/../outputs/axi-reads.v

#Defines build directory, if left to default only a single computation is run
SIM_BUILD=sim_build/axi-reads

# TOPLEVEL is the name of the toplevel module in your Verilog or VHDL file
TOPLEVEL = main

# MODULE is the basename of the Python test file
MODULE = axi-read-tests


# include cocotb's make rules to take care of the simulator setup
include $(shell cocotb-config --makefiles)/Makefile.sim
Loading

0 comments on commit 45b67b3

Please sign in to comment.