Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Working Calyx Implementation of AXI Read channels #1820

Merged
merged 29 commits into from
Jan 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
369f443
init commit of hardcoded axi wrapper for a 'main' kernel
nathanielnrn Oct 23, 2023
22b77fd
add axi-reads-calix
nathanielnrn Nov 20, 2023
f9a35d5
hook up inputs to channels in the wrapper. tbd if this works
nathanielnrn Nov 23, 2023
c37b748
Working calyx verison of AR and R
nathanielnrn Nov 27, 2023
af62349
Track output of compiled calyx read channel
nathanielnrn Nov 27, 2023
3b8c435
update gitignore to get rid of sim_build and other cocotb artifacts
nathanielnrn Dec 3, 2023
556ffc7
Working make files for running cocotb tests
nathanielnrn Dec 3, 2023
aba6ac9
Add xID signals for cocotb compatability
nathanielnrn Dec 3, 2023
662ec67
Fix prefix issue on cocotb axi test bench
nathanielnrn Dec 3, 2023
345f2db
commit to repro 'make WAVES=1' cocotb error from axi-reads-calyx.futil
nathanielnrn Dec 6, 2023
c303644
axi-reads patch
nathanielnrn Dec 13, 2023
1f98f41
sync debug
rachitnigam Dec 13, 2023
85e889e
Add txn_len initialization to 16 in calyx program
nathanielnrn Dec 18, 2023
82f860d
AXI Read fixed to get to read channel start
nathanielnrn Dec 19, 2023
a33f4d8
Add integer byte conversion for tests on Calyx AXI testharness
nathanielnrn Dec 19, 2023
3aadee2
WIP get reads to work. Add incr_curr_addr group
nathanielnrn Dec 19, 2023
2cc2b17
remove .fst from tracking
nathanielnrn Dec 20, 2023
66c6f58
Add more data to testbench to make waveform viewing easier
nathanielnrn Dec 20, 2023
f6d6608
Reads seem to be terminating correctly at RLAST
nathanielnrn Dec 20, 2023
5b9fa21
AR transfers seem to work, valid is high for 1 cycle
nathanielnrn Dec 20, 2023
a907fc4
Unreduced axi-reads-calyx.futil
nathanielnrn Dec 21, 2023
ece6bdd
Cocotb testbench now passes
nathanielnrn Dec 21, 2023
f7cecf3
Formatted and passing axi-read-tests
nathanielnrn Dec 21, 2023
581f8cf
Reduce and comment axi-reads-calyx.futil
nathanielnrn Dec 21, 2023
5d87b2a
remove axi-reads.v from being tracked
nathanielnrn Dec 21, 2023
af0f8af
add a todo
nathanielnrn Dec 21, 2023
acd0bb4
add required ARPROT signal. This is hardcoded to be priviliged
nathanielnrn Dec 21, 2023
9787aba
rename directories to yxi/axi-calyx
nathanielnrn Dec 21, 2023
f78063f
remove a guard in favor of 1'b1 to simplify reading of source code
nathanielnrn Dec 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ __pycache__
!.vscode/launch.json
!.vscode/tasks.json

# cocotb artifacts
tests/xilinx/cocotb/**/hdl
sim_build/
results.xml


!cider-dap/calyxDebug/package.json
374 changes: 374 additions & 0 deletions yxi/axi-calyx/axi-reads-calyx.futil
Original file line number Diff line number Diff line change
@@ -0,0 +1,374 @@
// ###
// This file contains the components needed to perform read transacitons via AXI.
// Current goal is to create a cocotb testbench that tests correctness of this.
// See https://github.com/cucapra/calyx/issues/1733 for more information.
//
// This wrapper assumes it is part of a dot product computation with vectors of
// length 16
// It assumes a bus data width of 32
// This is largely a work in progress and as of Nov 20 2023 is not intended to
// actually be used for anything
// ###

import "primitives/core.futil";
import "primitives/compile.futil";
import "primitives/math.futil";
import "primitives/memories.futil";


//this goes m->s unlike read channel
component m_arread_channel(
ARESET: 1,
ARREADY: 1
) -> (
ARVALID: 1,
// This needs to be 64, see link below `m_axi` section.
ARADDR: 64,
// 2^ARSIZE is bytes used in transfer. For memory-mapped AXI (which is what we
// are doing I believe), should match width of data bus (to shell?, so 32 wide? This
// is 3'b010)
// see https://docs.xilinx.com/r/en-US/ug1393-vitis-application-acceleration/Kernel-Interface-Requirements
// for restrictions
ARSIZE: 3,
// in AXI4 this is 8 bits, 1-256 transfers in requested transaction.
ARLEN : 8,
// 00 for fixed, 01 for incrementing, 2 for wrap,
// needs to be incr for RTL kernels (can't use wrapped of fixed
ARBURST : 2,
// required by spec. We hardwire this to priviliged access, non secure, data access.
ARPROT : 3) {
cells{
is_arvalid = std_reg(1);

// gets set high with ARVALID and remains high
arvalid_was_high = std_reg(1);
// TODO(nathanielnrn): should arguably eventually live in `s_axi_control`
// but for now will live here.
ref base_addr = std_reg(64);

// number of trasfers in a transaction. This is sent to subordinate
txn_len = std_reg(8);

// number of txns we want to occur before entire m_arread_channel is done
// this is internal to the channel (unlike txn_len)
txn_n = std_const(32,1);
txn_count = std_reg(32);
perform_reads = std_neq(32);
txn_adder = std_add(32);

//"block_transfer" register. need to put into a reg to avoid combinational loops
bt_reg = std_reg(1);


}

wires{

ARVALID = is_arvalid.out;

group deassert_val {
is_arvalid.in = 1'b0;
is_arvalid.write_en = 1'b1;
deassert_val[done] = is_arvalid.done;
}

group reset_bt {
bt_reg.in = 1'b0;
bt_reg.write_en = 1'b1;
reset_bt[done] = bt_reg.done;
}

// this asserts valid and defines all inputs correctly
// because valid should not be deasserted until handshake occurs
// this all needs to be one group
// this contains blocking logic previously in its own group
group do_ar_transfer {
//assert ARVALID
is_arvalid.in = !(is_arvalid.out & ARREADY) & !arvalid_was_high.out ? 1'b1;

// TODO(nathanielnrn): in theory should be able to get rid of arvalid_was_high
// but for now we will be explicit and reduce this in generation maybe. Not sure
// it even matters.
// This makes ARVALID go low after a single cycle. Without it it stays high for 2.
is_arvalid.in = is_arvalid.out & ARREADY & arvalid_was_high.out ? 1'b0;
is_arvalid.write_en = 1'b1;


arvalid_was_high.in = 1'b1;
arvalid_was_high.write_en = !(is_arvalid.out & ARREADY) & !arvalid_was_high.out ? 1'b1;


// drive output signals for transfer
ARADDR = base_addr.out;
// see link above, needs to match data width to host.
// In this case 2^2 = 4 bytes = 32 bits = width of our data_bus.
ARSIZE = 3'b010;
// For now this can be taken from .yxi, as size of mem, because we are assuming
// data_bus width that matches size of memory cells
// If we want to use bigger mems need to be able to update base addr
ARLEN = txn_len.out;
ARBURST = 2'b01; //incr
// privileged, non-secure, instruction access
ARPROT = 3'b110;


//done when one cycle after handshake (handshake happens for a single cycle)
bt_reg.in = ARREADY & is_arvalid.out ? 1'b1;
bt_reg.in = !(ARREADY & is_arvalid.out) ? 1'b0;
bt_reg.write_en = 1'b1;
do_ar_transfer[done] = bt_reg.out;
}


//txn bookkeeping.
//We are done performing reads when txn_count == txn_n
group txn_count_init {
txn_count.in = 32'b0;
txn_count.write_en = 1'b1;
txn_count_init[done] = txn_count.done;

}

group txn_len_init {
//TODO(nathanielnrn): 15 is good for word wide data bus. We'd
//expect 16 transfers. Number of transfers that occur is ARLEN + 1
txn_len.in = 8'd15;
txn_len.write_en = 1'b1;
txn_len_init[done] = txn_len.done;
}

group txn_incr {
txn_adder.left = txn_count.out;
txn_adder.right = 32'b1;
txn_count.in = txn_adder.out;
txn_count.write_en = 1'b1;
txn_incr[done] = txn_count.done;

}

comb group check_reads_done {
perform_reads.left = txn_count.out;
perform_reads.right = txn_n.out;
}
}

control{
//XXX(nathanielnrn): What is best way to offer more flexiblity beyond just a counter?
seq{
txn_count_init;
txn_len_init;
while perform_reads.out with check_reads_done{
seq{
reset_bt;
do_ar_transfer;
deassert_val;
txn_incr;
}
}
}
}
}




component m_read_channel(
ARESET : 1,
RVALID : 1,
RLAST : 1,
RDATA : 32,
RRESP : 2, // Note: This is generated in subordinate! had this backwards in earlier version
) -> (
// NOTE: In general, according to ZipCPU we want xREADY signals to be registered
// because (IIRC) it helps avoid combinational loops between READY and VALID.
RREADY : 1,
) {
cells {
// 16 is due to dot-product vector length assumption
// For this manual implementation we are just writing into this data based
// on the data we read from cocotb
ref data_received = seq_mem_d1(32, 16, 64);
is_rdy = std_reg(1);
ref curr_addr = std_reg(64);

// registered because RLAST is high with last transfer, not after
// before this was registered we were terminating immediately with
// last transfer and not servicing it
n_RLAST = std_reg(1);

// TODO: get this width from yxi
read_data_reg = std_reg(32);

//address of seq_d1_mem we are writing to
curr_addr_adder = std_add(64);

// block_transfer reg to avoid combinational loops
bt_reg = std_reg(1);

}
wires{

RREADY = is_rdy.out;
data_received.read_en = 1'b0;

group init_n_RLAST {
n_RLAST.in = 1'b1;
n_RLAST.write_en = 1'b1;
init_n_RLAST[done] = n_RLAST.done;
}

// Used to block any servicing until handshake occurs.
group reset_bt {
bt_reg.in = 1'b0;
bt_reg.write_en = 1'b1;
reset_bt[done] = bt_reg.done;
}

// NOTE: xVALID signals must be high until xREADY is high as well, so this works
// because if xREADY is high (is_rdy.out) then RVALID being high makes 1 flip
// and group will be done by bt_reg.out
group block_transfer {
// set RREADY high
// TODO (nathanielnrn): technically we can make RREADY depend on on RVALID (but not vice versa).
// Could we simplify this we just making things ready when we are in
// block_transfer && RVALID?

//NOTE: is_rdy.in = 1'b1; does not work, it leaves RREADY high for 2 cycles
// this both asserts and deasserts one cycle later
// TODO(nathanielnrn): Spec recommends defaulting xREADY high as it
// can get rid of extra cycle. Maybe doing so here would be useful?
// as opposed to waiting for RVALID
is_rdy.in = !(RVALID & is_rdy.out) ? 1'b1;
is_rdy.in = RVALID & is_rdy.out ? 1'b0;
is_rdy.write_en = 1'b1;


//store the data we want to write
read_data_reg.in = RDATA;
read_data_reg.write_en = is_rdy.out;

//update n_RLAST reg
n_RLAST.in = RLAST ? 1'b0;
n_RLAST.in = !RLAST ? 1'b1;
n_RLAST.write_en = 1'b1;


// we are done after handshake
bt_reg.in = is_rdy.out & RVALID ? 1'b1;
bt_reg.in = !(is_rdy.out & RVALID) ? 1'b0;
bt_reg.write_en = 1'b1;
block_transfer[done] = bt_reg.out;
}

group receive_r_transfer{
// keep RREADY low;
is_rdy.in = 1'b0;
is_rdy.write_en = 1'b1;

//write the data we received during transfer to seq_d1_mem
data_received.addr0 = curr_addr.out;
data_received.write_en = 1'b1;
data_received.write_data = read_data_reg.out;
receive_r_transfer[done] = data_received.write_done;

}

group incr_curr_addr{
curr_addr_adder.left = 64'd1 ;
curr_addr_adder.right = curr_addr.out;
curr_addr.in = curr_addr_adder.out;
curr_addr.write_en = 1'b1;
incr_curr_addr[done] = curr_addr.done;
}
}
control{
init_n_RLAST;
while n_RLAST.out{
seq{
reset_bt;
block_transfer;
receive_r_transfer;
incr_curr_addr;
}
}
}
}

//TODO(nathanielnrn): this is axi_wrapper, prefer to use @toplevel attribute but its not working
// See individual channel components for explanations of signals
component main(
m_ARESET : 1,
m_ARREADY : 1,

m_RVALID : 1,
m_RLAST : 1,
m_RDATA : 32,
m_RRESP : 2,
//NOTE: Only used for cocotb compatability, doesn't do anything within the wrapper itself currently.
m_RID : 1,
) -> (
m_ARVALID : 1,
m_ARADDR: 64,
m_ARSIZE: 3,
m_ARLEN : 8,
m_ARBURST : 2,

m_RREADY : 1,
//NOTE: Only used for cocotb compatability, doesn't do anything within the wrapper itself currently.
m_ARID : 1
) {
cells{
vec1_data = seq_mem_d1(32,16,64);
output_data = seq_mem_d1(32,1,0);

curr_addr = std_reg(64);
base_addr = std_reg(64);

read_channel = m_read_channel();
arread_channel = m_arread_channel();

}

wires{

m_ARID = 1'b0;

group set_curr_to_base_addr{
curr_addr.in = base_addr.out;
curr_addr.write_en = 1'b1;
set_curr_to_base_addr[done] = curr_addr.done;
}
}
control{
seq{
invoke arread_channel[base_addr = base_addr]
(
ARESET = m_ARESET,
ARREADY = m_ARREADY
)
(
ARVALID = m_ARVALID,
ARADDR = m_ARADDR,
ARSIZE = m_ARSIZE,
ARLEN = m_ARLEN,
ARBURST = m_ARBURST
);

set_curr_to_base_addr;

invoke read_channel[data_received = vec1_data, curr_addr = curr_addr]
(
ARESET = m_ARESET,
RVALID = m_RVALID,
RLAST = m_RLAST,
RDATA = m_RDATA,
RRESP = m_RRESP
)
(
RREADY = m_RREADY
);
}
}


}
Loading
Loading