diff --git a/source_code/cache_generic/cache_control_downstream.sv b/source_code/cache_generic/cache_control_downstream.sv new file mode 100644 index 000000000..97cc0a374 --- /dev/null +++ b/source_code/cache_generic/cache_control_downstream.sv @@ -0,0 +1,112 @@ +// Module: cache_control_downstream +// Description: +// control the output signals to the downstream memory (closer to main memory) +// setting the outputs to mem_gen_bus_if (downstream) passing instruction from proc_gen_bus_if (upstream) +`include "cache_generic_struct.vh" +`include "generic_bus_if.vh" +`include "cache_coherence_if.vh" + +module cache_control_downstream ( + input logic CLK, + input logic nRST, + input logic hit, + input logic flush, + input logic reserve, + input logic addr_is_reserved, + input logic pass_through, + input logic sc_valid_block, + input cache_set sramRead, + input cache_fsm_t state_in, + input logic [N_FRAME_BITS-1:0] ridx, + input decoded_cache_addr_t decoded_addr, + input flush_idx_t flush_idx, + + generic_bus_if.cpu mem_gen_bus_if, // bus to downstream memory, to main memory + generic_bus_if.generic_bus proc_gen_bus_if, // bus to upstream memory, to CPU + cache_coherence_if.cache ccif +); + + always_ff @(posedge CLK, negedge nRST) begin + if (!nRST) begin + read_addr <= 0; + end else begin + read_addr <= next_read_addr; // cache address to provide to memory + end + end + + always_comb begin + mem_gen_bus_if.ren = 0; + mem_gen_bus_if.wen = 0; + mem_gen_bus_if.addr = 0; + mem_gen_bus_if.wdata = 0; + mem_gen_bus_if.byte_en = '1; // set this to all 1s for evictions + next_read_addr = proc_gen_bus_if.addr & ~{CLEAR_LENGTH{1'b1}}; + casez(state_in) + IDLE: // nothing to do downstream + HIT: begin + // cache hit on a processor read -> nothing to do + if(proc_gen_bus_if.ren && hit && !flush) begin end + + // cache hit on a processor write -> nothing to do + else if(proc_gen_bus_if.wen && hit && (!reserve || (reserve && addr_is_reserved)) && !flush) begin end + + // passthrough (for direct access memory, using the same bus (for output, input straight to CPU)) + else if(pass_through) begin + mem_gen_bus_if.wen = proc_gen_bus_if.wen; // relay the instruction (write enable) to lower bus + mem_gen_bus_if.ren = proc_gen_bus_if.ren; // relay the instruction (read enable) to lower bus + mem_gen_bus_if.addr = proc_gen_bus_if.addr; // relay the instruction (address) to lower bus + mem_gen_bus_if.byte_en = proc_gen_bus_if.byte_en; // relay the instruction (byte enable) to lower bus + if(proc_gen_bus_if.wen) begin // if it's write instruction + casez (proc_gen_bus_if.byte_en) // based on the byte enable, write the data to data line + 4'b0001: mem_gen_bus_if.wdata = {24'd0, proc_gen_bus_if.wdata[7:0]}; + 4'b0010: mem_gen_bus_if.wdata = {16'd0,proc_gen_bus_if.wdata[15:8],8'd0}; + 4'b0100: mem_gen_bus_if.wdata = {8'd0, proc_gen_bus_if.wdata[23:16], 16'd0}; + 4'b1000: mem_gen_bus_if.wdata = {proc_gen_bus_if.wdata[31:24], 24'd0}; + 4'b0011: mem_gen_bus_if.wdata = {16'd0, proc_gen_bus_if.wdata[15:0]}; + 4'b1100: mem_gen_bus_if.wdata = {proc_gen_bus_if.wdata[31:16],16'd0}; + default: mem_gen_bus_if.wdata = proc_gen_bus_if.wdata; + endcase + end + end + + // Cache miss of sc -> nothin to do (*** what is sc valid block for? coherency?) + else if (proc_gen_bus_if.wen && reserve && !sc_valid_block && ~pass_through) begin end + + // cache miss on a clean block -> nothing to do (state machine will proceed to fetch) + else if((proc_gen_bus_if.ren || proc_gen_bus_if.wen) && ~hit && ~sramRead.frames[ridx].tag.dirty && ~pass_through) begin end + + // cache miss on a dirty block -> nothing to do (state machine will proceed to write back before fetching new data) + else if((proc_gen_bus_if.ren || proc_gen_bus_if.wen) && ~hit && sramRead.frames[ridx].tag.dirty && ~pass_through) begin end + end + FETCH: begin + mem_gen_bus_if.wen = proc_gen_bus_if.wen; // relay the write enable data instruction from cpu to main memory + mem_gen_bus_if.ren = proc_gen_bus_if.ren || !ccif.abort_bus; // relay the read enable to main memory + mem_gen_bus_if.addr = read_addr; // relay the address requested to main memory + // with these, main memory will return the data in the data line which will go to cache sram + // byte_en will control which bytes in this data got stored in cache sram, be dealt with in sram control module + end + WB: begin + mem_gen_bus_if.wen = 1'b1; + mem_gen_bus_if.addr = read_addr; + mem_gen_bus_if.wdata = sramRead.frames[ridx].data; + next_read_addr = {sramRead.frames[ridx].tag, decoded_addr.idx.idx_bits, N_BLOCK_BITS'('0), 2'b00}; + end + FLUSH_CACHE: begin + // flush to memory if valid & dirty + if (sramRead.frames[flush_idx.frame_num].tag.valid && sramRead.frames[flush_idx.frame_num].tag.dirty) begin + mem_gen_bus_if.wen = 1'b1; + mem_gen_bus_if.addr = {sramRead.frames[flush_idx.frame_num].tag.tag_bits, flush_idx.set_num, {N_BLOCK_BITS{1'b0}}, 2'b00}; + mem_gen_bus_if.wdata = sramRead.frames[flush_idx.frame_num].data; + end + end + SNOOP: // nothing to do + CANCEL_REQ: begin + mem_gen_bus_if.wen = 0; + mem_gen_bus_if.ren = 1; + mem_gen_bus_if.addr = decoded_addr; + mem_gen_bus_if.byte_en = 0; + end + endcase + end + +endmodule \ No newline at end of file diff --git a/source_code/cache_generic/cache_control_snoop.sv b/source_code/cache_generic/cache_control_snoop.sv new file mode 100644 index 000000000..f80338c54 --- /dev/null +++ b/source_code/cache_generic/cache_control_snoop.sv @@ -0,0 +1,66 @@ +// Module: cache_control_snoop +// Description: +// control the snoopy bus output base on the read/write hit/miss state transition +// to implement cache coherency among multiple caches of the multicore processor +`include "cache_generic_struct.vh" +`include "generic_bus_if.vh" +`include "cache_coherence_if.vh" + +module cache_control_snoop ( + input CLK, + input nRst, + input cache_set sramRead, + input logic sramWEN, + input logic [N_SET_BITS-1:0] sramSEL, + output logic [N_SET_BITS-1:0] sramSNOOPSEL, + input decoded_cache_addr_t snoop_decoded_addr, + output cache_tag_t sramTags [ASSOC-1:0], + output cache_tag_t sramTagsMask [ASSOC-1:0], + input cache_set_t sramMask, + input cache_set_t sramWrite, + input flush_idx_t flush_idx, + input logic [N_FRAME_BITS-1:0] hit_idx, + cache_coherence_if.cache ccif +); + always_ff @(negedge nRst, posedge CLK) begin + if(!nRst) begin + ccif.abort_bus <= 0; + end else begin + ccif.abort_bus <= !proc_gen_bus_if.ren && !proc_gen_bus_if.wen; + end + end + + always_comb begin + ccif.dWEN = 1'b0; + ccif.requested_data = {BLOCK_SIZE{32'hBAD1BAD1}}; + casez(state_in) + IDLE: begin end // nothing to do + HIT: begin end // nothing to do + FETCH: begin end // nothing to do + WB: begin + ccif.dWEN = 1'b1; + end + FLUSH_CACHE: begin + // flush to memory if valid & dirty + if (sramRead.frames[flush_idx.frame_num].tag.valid && sramRead.frames[flush_idx.frame_num].tag.dirty) begin + ccif.dWEN = 1'b1; + end + end + SNOOP: begin + ccif.requested_data = sramRead.frames[hit_idx].data; + end + endcase + + // Same as sramSEL except try to lookup the snoop addr when there's + // a request + sramSNOOPSEL = sramWEN ? sramSEL + : ccif.snoop_req ? snoop_decoded_addr.idx.idx_bits + : sramSEL; + ccif.snoop_busy = sramWEN || !ccif.snoop_req; + + for (int i = 0; i < ASSOC; i++) begin + sramTags[i] = sramWrite.frames[i].tag; + sramTagsMask[i] = sramMask.frames[i].tag; + end + end +endmodule \ No newline at end of file diff --git a/source_code/cache_generic/cache_control_sram.sv b/source_code/cache_generic/cache_control_sram.sv new file mode 100644 index 000000000..4a51ab4e0 --- /dev/null +++ b/source_code/cache_generic/cache_control_sram.sv @@ -0,0 +1,273 @@ +// Module: cache_control_sram +// Description: +// control the output signals to the actual sram, module that implements this cache +// The sram that we instantiate consist of the **CPU_SRAM (data + overhead) and **BUS_SRAM (overhead) +// The main signals are sramWE, sramRead, sramWrite, sramMask +// +// -----------Notice------------ +// **CPU_SRAM and BUS_SRAM naming seems to be misleading implying it connected to CPU and BUS to somewhere. Idk why either +`include "cache_generic_struct.vh" +`include "generic_bus_if.vh" +`include "cache_coherence_if.vh" + +module cache_control_sram( + input logic CLK, + input logic nRST, + input logic hit, + input logic [N_FRAME_BITS-1:0] hit_idx, + // input word_t [BLOCK_SIZE-1:0] hit_data, + input logic flush, + input logic reserve, + input logic addr_is_reserved, + input cache_fsm_t state_in, + input flush_idx_t flush_idx, + input decoded_cache_addr_t snoop_decoded_addr, + output decoded_cache_addr_t decoded_addr, + + input cache_set_t sramRead, + output logic sramWEN, + output cache_set_t sramWrite, + output cache_set_t sramMask, + + output logic enable_flush_count, + output logic enable_flush_count_nowb, + output logic idle_done, + output logic clear_done, + output logic flush_done, + output logic clear_flush_count, + + output logic [N_FRAME_BITS-1:0] ridx, + generic_bus_if.cpu mem_gen_bus_if, // bus to downstream memory, to main memory + generic_bus_if.generic_bus proc_gen_bus_if, // bus to upstream memory, to CPU + cache_coherence_if.cache ccif +); + decoded_cache_addr_t decoded_req_addr, next_decoded_req_addr; + + // replacement policy LRU logic + logic [N_SETS-1:0] last_used; + logic [N_SETS-1:0] next_last_used; + + always @(posedge CLK, negedge nRST) begin + if(!nRST) begin + decoded_req_addr <= 0; + last_used <= 0; + end else begin + decoded_req_addr <= next_decoded_req_addr; // cache address requested by core + last_used <= next_last_used; // MRU index + end + end + + // decoded address conversion + assign decoded_addr = state == SNOOP ? snoop_decoded_addr : decoded_cache_addr_t'(proc_gen_bus_if.addr); + + always_comb begin + sramWEN = 0; // default sramWEN zero, only allow to write (one) when receiving instruction + sramWrite = 0; // default sramWrite zero, only modify the write data (data) when receiving instruction + sramMask = '1; // default sramMask 'one, masking all the bit, only modify some bits to zero when receiving instruction + enable_flush_count = 0; + enable_flush_count_nowb = 0; + idle_done = 0; + clear_done = 0; + flush_done = 0; + clear_flush_count = 0; + next_decoded_req_addr = decoded_req_addr; + next_last_used = last_used; + + // associativity, using NRU + if (ASSOC == 1 || (last_used[decoded_addr.idx.idx_bits] == (ASSOC - 1))) begin + ridx = 0; + end else begin + ridx = last_used[decoded_addr.idx.idx_bits] + 1; + end + + casez(state_in) + IDLE: begin + // Case: At the beginning of all operation + // Do: clear out caches with flushing, but no write back require (basically set all frames in cache to zeros) + sramWEN = 1; // enable to write to sram + sramWrite.frames[flush_idx.frame_num] = '0; // sram write set the data wVal in cache to zeros + sramMask.frames[flush_idx.frame_num] = '0; // sram mask set to zero so it doesn't mask out any byte + enable_flush_count_nowb = 1; // remain high until all frames are flushed + + // Case: Done with flushing at the beginning + // Do: set the flush completion flag high, proceeding the cache state machine to HIT + if (flush_idx.finish) begin + clear_flush_count = 1; // clear_flush_count clears the flush_idx (counter) to zero + idle_done = 1; // idle_done will proceed cache state machine to HIT state + flush_done = 1; // output signal for cache ***HACK: Remove if this causes bugs, used for testbench + end + end + HIT: begin + // Case: cache hit on a processor read + // Do: update last used index, the cache hit return data is a combinational hit_data which is dealt with in upstream control + if(proc_gen_bus_if.ren && hit && !flush) begin + next_last_used[decoded_addr.idx.idx_bits] = hit_idx; + end + + // Case: cache hit on a processor write + // Do: write the data to the cache sram + else if(proc_gen_bus_if.wen && hit && (!reserve || (reserve && addr_is_reserved)) && !flush) begin + sramWEN = 1; // enable the sram write signal + + // set the sram Mask based on the byte_en signal + casez (proc_gen_bus_if.byte_en) + 4'b0001: sramMask.frames[hit_idx].data[decoded_addr.idx.block_bits] = 32'hFFFFFF00; + 4'b0010: sramMask.frames[hit_idx].data[decoded_addr.idx.block_bits] = 32'hFFFF00FF; + 4'b0100: sramMask.frames[hit_idx].data[decoded_addr.idx.block_bits] = 32'hFF00FFFF; + 4'b1000: sramMask.frames[hit_idx].data[decoded_addr.idx.block_bits] = 32'h00FFFFFF; + 4'b0011: sramMask.frames[hit_idx].data[decoded_addr.idx.block_bits] = 32'hFFFF0000; + 4'b1100: sramMask.frames[hit_idx].data[decoded_addr.idx.block_bits] = 32'h0000FFFF; + default: sramMask.frames[hit_idx].data[decoded_addr.idx.block_bits] = 32'h0; + endcase + // set the cache tag within the cache sram to be dirty + sramWrite.frames[hit_idx].data[decoded_addr.idx.block_bits] = proc_gen_bus_if.wdata; + sramWrite.frames[hit_idx].tag.dirty = 1; // the data dirty since it is not written-back to main memory yet + sramWrite.frames[hit_idx].tag.exclusive = 0; // Set exclusive bit in tag to 0, E -> M case + sramMask.frames[hit_idx].tag.dirty = 0; // set the dirty bit mask to zero, allowing the status update in sram + sramMask.frames[hit_idx].tag.exclusive = 0; // set the exclusive bit mask to zero, allowing the status update in sram + next_last_used[decoded_addr.idx.idx_bits] = hit_idx; //*********** + end + + // Case: passthrough + // Do: nothing + else if(pass_through) begin end + + // Case: Cache miss of sc + // Do: nothing + else if (proc_gen_bus_if.wen && reserve && !sc_valid_block && ~pass_through) begin end + + // Case: cache miss on a clean block + // Do: nothing + else if((proc_gen_bus_if.ren || proc_gen_bus_if.wen) && ~hit && ~sramRead.frames[ridx].tag.dirty && ~pass_through) begin + next_decoded_req_addr = decoded_addr; + end + + // Case: cache miss on a dirty block + // DO: nothing + else if((proc_gen_bus_if.ren || proc_gen_bus_if.wen) && ~hit && sramRead.frames[ridx].tag.dirty && ~pass_through) begin + next_decoded_req_addr = decoded_addr; + next_read_addr = {sramRead.frames[ridx].tag, decoded_addr.idx.idx_bits, N_BLOCK_BITS'('0), 2'b00}; + end + end + FETCH: begin + // Set the cache valid status to zero, until the cache is done fetching + sramWrite.frames[ridx].tag.valid = 0; // set the cache valid bit to zero + sramMask.frames[ridx].tag.valid = 0; // set the mask at valid bit zero, allowing the sram valid bit to be updated + + // Fill in data + if(~mem_gen_bus_if.busy) begin // can fill the data when the main memory is not busy + // Fill in the data from the main memory to cache + sramWEN = 1'b1; // enable writing to cache sram + sramWrite.frames[ridx].data = mem_gen_bus_if.rdata; // set the data to what returns from main memory bus + sramWrite.frames[ridx].tag.valid = 1'b1; // set the valid bit value, meaning data is fetched, not the preexisting data + sramWrite.frames[ridx].tag.tag_bits = decoded_req_addr.idx.tag_bits; // set the tage bit value to match with the main memory address + sramMask.frames[ridx].data = 1'b0; // unmask to allow updating data value in sram (** shouldn't this be the size of data) + sramMask.frames[ridx].tag.valid = 1'b0; // unmask to allow updating valid bit value in sram + sramMask.frames[ridx].tag.tag_bits = 1'b0; // unmask to allow updating tag bit value in sram + + // These below parts, I don't know yet what they do + sramWrite.frames[ridx].tag.exclusive = (ccif.state_transfer == EXCLUSIVE); + sramWrite.frames[ridx].tag.dirty = (ccif.state_transfer == MODIFIED); + sramMask.frames[ridx].tag.exclusive = 0; + sramMask.frames[ridx].tag.dirty = 0; + + // Fill in the data from the CPU write request to cache + /* + *** shouldn't there be a way to make the process a state-machine specific, eg. after fetching, + go back to the hit state and then write on write hit, so that we don't write duplicate module + like the below line is similar to write on write hit in hit state above ???? + */ + + // Fetched to write, the CPU will also send a write-enable signal + if (proc_gen_bus_if.wen) begin + casez (proc_gen_bus_if.byte_en) // based on the byte_en, select + 4'b0001: sramWrite.frames[ridx].data[decoded_addr.idx.block_bits][7:0] = proc_gen_bus_if.wdata[7:0]; + 4'b0010: sramWrite.frames[ridx].data[decoded_addr.idx.block_bits][15:8] = proc_gen_bus_if.wdata[15:8]; + 4'b0100: sramWrite.frames[ridx].data[decoded_addr.idx.block_bits][23:16] = proc_gen_bus_if.wdata[23:16]; + 4'b1000: sramWrite.frames[ridx].data[decoded_addr.idx.block_bits][31:24] = proc_gen_bus_if.wdata[31:24]; + 4'b0011: sramWrite.frames[ridx].data[decoded_addr.idx.block_bits][15:0] = proc_gen_bus_if.wdata[15:0]; + 4'b1100: sramWrite.frames[ridx].data[decoded_addr.idx.block_bits][31:16] = proc_gen_bus_if.wdata[31:16]; + default: sramWrite.frames[ridx].data[decoded_addr.idx.block_bits] = proc_gen_bus_if.wdata; + endcase + end + end + end + WB: begin + // increment eviction word counter + if(!mem_gen_bus_if.busy) begin + // invalidate when eviction is complete + sramWEN = 1; + sramWrite.frames[ridx].tag.dirty = 0; + sramWrite.frames[ridx].tag.valid = 0; + sramMask.frames[ridx].tag.dirty = 0; + sramMask.frames[ridx].tag.valid = 0; + end + end + FLUSH_CACHE: begin + // flush to memory if valid & dirty + if (sramRead.frames[flush_idx.frame_num].tag.valid && sramRead.frames[flush_idx.frame_num].tag.dirty) begin + if (~mem_gen_bus_if.busy) begin + enable_flush_count = 1; + // clears entry when flushed + sramWEN = 1; + sramWrite.frames[flush_idx.frame_num] = 0; + sramMask.frames[flush_idx.frame_num] = 0; + end + end + // else clears entry, moves to next frame + else begin + sramWEN = 1; + sramWrite.frames[flush_idx.frame_num] = 0; + sramMask.frames[flush_idx.frame_num] = 0; + enable_flush_count_nowb = 1; + end + // flag the completion of flush + if (flush_idx.finish) begin + clear_flush_count = 1; + flush_done = 1; + end + end + SNOOP: begin + if (!mem_gen_bus_if.busy) begin + sramWEN = 1; + case(ccif.state_transfer) + INVALID: begin + sramWrite.frames[hit_idx].tag.dirty = 0; + sramWrite.frames[hit_idx].tag.valid = 0; + sramWrite.frames[hit_idx].tag.exclusive = 0; + sramMask.frames[hit_idx].tag.dirty = 0; + sramMask.frames[hit_idx].tag.valid = 0; + sramMask.frames[hit_idx].tag.exclusive = 0; + end + SHARED: begin + sramWrite.frames[hit_idx].tag.dirty = 0; + sramWrite.frames[hit_idx].tag.valid = 1; + sramWrite.frames[hit_idx].tag.exclusive = 0; + sramMask.frames[hit_idx].tag.dirty = 0; + sramMask.frames[hit_idx].tag.valid = 0; + sramMask.frames[hit_idx].tag.exclusive = 0; + end + EXCLUSIVE: begin + sramWrite.frames[hit_idx].tag.dirty = 0; + sramWrite.frames[hit_idx].tag.valid = 1; + sramWrite.frames[hit_idx].tag.exclusive = 1; + sramMask.frames[hit_idx].tag.dirty = 0; + sramMask.frames[hit_idx].tag.valid = 0; + sramMask.frames[hit_idx].tag.exclusive = 0; + end + MODIFIED: begin + sramWrite.frames[hit_idx].tag.dirty = 1; + sramWrite.frames[hit_idx].tag.valid = 1; + sramWrite.frames[hit_idx].tag.exclusive = 0; + sramMask.frames[hit_idx].tag.dirty = 0; + sramMask.frames[hit_idx].tag.valid = 0; + sramMask.frames[hit_idx].tag.exclusive = 0; + end + endcase + end + end + CANCEL_REQ: begin end// nothing to do with sram cache + endcase + + end +endmodule \ No newline at end of file diff --git a/source_code/cache_generic/cache_control_upstream.sv b/source_code/cache_generic/cache_control_upstream.sv new file mode 100644 index 000000000..afc76cc8b --- /dev/null +++ b/source_code/cache_generic/cache_control_upstream.sv @@ -0,0 +1,83 @@ +// Module: cache_control_upstream +// Description: +// control the output signals to the upstream memory (closer to CPU) +// setting the ouputs in prog_gen_bus_if (upstream) from the data sent back from mem_gen_bus_if (downstream) +`include "cache_generic_struct.vh" +`include "generic_bus_if.vh" +`include "cache_coherence_if.vh" + +module cache_control_upstream ( + input logic CLK, + input logic nRST, + input logic hit, + input logic flush, + input logic reserve, + input logic addr_is_reserved, + input logic pass_through, + input logic sc_valid_block, + input decoded_cache_addr_t decoded_addr, + input cache_fsm_t state_in, + + generic_bus_if.cpu mem_gen_bus_if, // bus to downstream memory, to main memory + generic_bus_if.generic_bus proc_gen_bus_if // bus to upstream memory, to CPU +); + always_ff @(posedge CLK, negedge nRST) begin + if (!nRST) begin + + end else begin + + end + end + + always_comb begin + proc_gen_bus_if.busy = 1; + proc_gen_bus_if.rdata = 0; // TODO: Can this be optimized? + casez(state_in) + IDLE: begin end // nothing to do + HIT: begin + // Case: cache hit on a processor read + // Do: send the data upstream directly from cache + if(proc_gen_bus_if.ren && hit && !flush) begin + proc_gen_bus_if.busy = 0; + proc_gen_bus_if.rdata = hit_data[decoded_addr.idx.block_bits]; + + // Delay so we can set the reservation set + // if (reserve && !addr_is_reserved) begin + // proc_gen_bus_if.busy = 1; + // end + end + // Case: cache hit on a processor write + // Do: send the busy signal upstream, waiting for writing the data to cache done + else if(proc_gen_bus_if.wen && hit && (!reserve || (reserve && addr_is_reserved)) && !flush) begin + proc_gen_bus_if.busy = 0; + proc_gen_bus_if.rdata = 0; + end + + // passthrough + else if(pass_through) begin + proc_gen_bus_if.busy = mem_gen_bus_if.busy; + proc_gen_bus_if.rdata = mem_gen_bus_if.rdata; + end + + // Cache miss of sc + else if (proc_gen_bus_if.wen && reserve && !sc_valid_block && ~pass_through) begin + proc_gen_bus_if.busy = 0; + proc_gen_bus_if.rdata = 32'b1; + end + + // cache miss on a clean block -> nothing to do (state machine will proceed to fetch) + else if((proc_gen_bus_if.ren || proc_gen_bus_if.wen) && ~hit && ~sramRead.frames[ridx].tag.dirty && ~pass_through) begin end + + // cache miss on a dirty block -> nothing to do (state machine will proceed to write back before fetching new data) + else if((proc_gen_bus_if.ren || proc_gen_bus_if.wen) && ~hit && sramRead.frames[ridx].tag.dirty && ~pass_through) begin end + end + FETCH: begin end // nothing to do -> once the data is fetched into cache, the state machine proceed to HIT returning data upstream + WB: begin end // nothing to do upstream + FLUSH_CACHE: begin end // nothing to do upstream + SNOOP: begin end // nothing to do upstream + CANCEL_REQ: begin + proc_gen_bus_if.busy = 1; + end + endcase + end +endmodule \ No newline at end of file diff --git a/source_code/cache_generic/cache_flush_counter_handler.sv b/source_code/cache_generic/cache_flush_counter_handler.sv new file mode 100644 index 000000000..3620dd0f3 --- /dev/null +++ b/source_code/cache_generic/cache_flush_counter_handler.sv @@ -0,0 +1,73 @@ +// Module: cache_flush_counter_handler +// Description: +// Handle the cache flush counter including +// - taking flush signal +// - keep track of the flush, next_flush counter +// - updating the flush_clear, flush_done logic +`include "cache_generic_struct.vh" +`include "generic_bus_if.vh" +`include "cache_coherence_if.vh" + +module cache_flush_counter_handler ( + input logic CLK, + input logic nRST, + input logic flush, + input logic flush_done, + input logic clear_flush_count, + input logic enable_flush_count, + input logic enable_flush_count_nowb, + output logic flush_req, + output flush_idx_t flush_idx_out +); + flush_idx_t flush_idx, next_flush_idx; + logic nflush_req; + assign flush_idx_out = flush_idx; + + always_ff @ (posedge CLK, negedge nRST) begin + if(~nRST) begin + flush_idx <= 0; + flush_req <= 0; + end else begin + flush_idx <= next_flush_idx; // index for flushing the cache entries + flush_req <= nflush_req; // flush requested by core + end + end + // counters + always_comb begin + next_flush_idx = flush_idx; + + // flush counter logic + if (clear_flush_count) + next_flush_idx = 0; + else if (enable_flush_count_nowb && BLOCK_SIZE != 1) + next_flush_idx = flush_idx + 1; + else if (enable_flush_count || enable_flush_count_nowb) + next_flush_idx = flush_idx + 1; + + // correction for non-powers of 2 + if (next_flush_idx.set_num == N_SETS) begin + next_flush_idx.finish = 1; + next_flush_idx.set_num = 0; + next_flush_idx.frame_num = 0; + end + else if (next_flush_idx.frame_num == ASSOC) begin + next_flush_idx.set_num = flush_idx.set_num + 1; + next_flush_idx.frame_num = 0; + end + + // FOR ASSOC == 1 FINISH FLAG + if (next_flush_idx.set_num == 0 && flush_idx.set_num == N_SETS - 1) begin + next_flush_idx.finish = 1; + next_flush_idx.set_num = 0; + next_flush_idx.frame_num = 0; + end + + // flush saver + nflush_req = flush_req; + if (flush) + nflush_req = 1; + if (flush_done) + nflush_req = 0; + end + +endmodule \ No newline at end of file diff --git a/source_code/cache_generic/cache_generic.core b/source_code/cache_generic/cache_generic.core new file mode 100644 index 000000000..7b5a27a12 --- /dev/null +++ b/source_code/cache_generic/cache_generic.core @@ -0,0 +1,45 @@ +CAPI=2: +name: socet:riscv:cache_generic:0.1.0 +description: cache_generic + +filesets: + rtl_cache_generic: + depend: + - "socet:riscv:packages" + - "socet:riscv:riscv_include" + - "socet:riscv:ram" + files: + - cache_control_downstream.sv + - cache_control_snoop.sv + - cache_control_sram.sv + - cache_control_upstream.sv + - cache_flush_counter_handler.sv + - cache_generic.sv + - cache_hit_logic.sv + - cache_sm.sv + - sram.sv + file_type: systemVerilogSource + tb: + files: + - tb_cache_generic/tb_cache_generic.sv + file_type: systemVerilogSource + +targets: + default: &default + filesets: + - rtl_cache_generic + - tb + toplevel: tb_cache_generic + + sim: + <<: *default + description: Simulate w/SV TB + default_tool: verilator + toplevel: tb_cache_generic + tools: + verilator: + verilator_options: + - --binary + - --coverage + - --trace-fst + - --trace-structs diff --git a/source_code/cache_generic/cache_generic.sv b/source_code/cache_generic/cache_generic.sv new file mode 100644 index 000000000..e5dc4d8d0 --- /dev/null +++ b/source_code/cache_generic/cache_generic.sv @@ -0,0 +1,291 @@ +/* +* Filename: cache_generic.sv +* +* Created by: Puthimet Kitjaruwankul +* Email: pkitjaru@purdue.edu +* Date Created: 11/17/2024 +* Description: Generic cache module: +* - Cache Size +* - Non-Cacheable start address +* - Block Size | max 8 +* - ASSOC | either 1 or 2 +* The cache uses SRAM to store the data +*/ +`include "cache_generic_struct.vh" +`include "generic_bus_if.vh" +`include "cache_coherence_if.vh" + +module cache_generic #( + parameter CACHE_SIZE = 1024, // cache size in bits ****(or bytes) + parameter BLOCK_SIZE = 2, // number of words in data in the cache frame + parameter ASSOC = 1, // associativity (ways): a number of sets of caches + parameter NONCACHE_START_ADDR = 32'hF000_0000 // **** sh/sb still have issues when uncached; not sure whats up with that still tbh +) +( + input logic CLK, // Clock signal + input logic nRST, // Negedge reset signal + input logic clear, // + input logic flush, // + input logic reserve, // + input logic exclusive, // + output logic clear_done, // + output logic flush_done, // + generic_bus_if.cpu mem_gen_bus_if, // bus to downstream memory, to main memory + generic_bus_if.generic_bus proc_gen_bus_if, // bus to upstream memory, to CPU + cache_coherence_if.cache ccif // Coherency interface, connected to coherency unit +); + // -------------------------------Importing relevent package------------------------------------------ + import rv32i_types_pkg::*; // From the rv32i_types_pkg, use: + // paramater int WORD_SIZE = 32 -> number of bits in one word + // parameter RAM_ADDR_SIZE = 32 -> number of bits in ram index + + // --------------------------------Declaring the signals--------------------------------------------- + // counter signals + flush_idx_t flush_idx; // asynchronous flush index, since we need to flush different index at time + logic clear_flush_count; // clear the flush count ** + logic enable_flush_count; // enable the flush count ** + logic enable_flush_count_nowb; // enable the flush count with no write-back** + + // States + cache_fsm_t cache_state; // cache state machine + + // sram signals + cache_set_t sramWrite; // data writing to the sram + cache_set_t sramRead; // data reading from the sram memory + cache_set_t sramMask; // data masking for data write based on the byte_enable + // (eg. byte_en = 4'b0001 -> sramMask = 32'hFFFF_FF00) + cache_tag_t sramTags [ASSOC-1:0]; + cache_tag_t sramTagsMask [ASSOC-1:0]; + logic sramWEN; // no need for REN + logic [N_SET_BITS-1:0] sramSEL, sramSNOOPSEL; + cache_tag_t read_tag_bits [ASSOC-1:0]; //Tag coming from bus + + // lru: logic that least recent used replacement policy for cache full hitting same cache index + logic [N_FRAME_BITS-1:0] ridx; + + // address + word_t read_addr, next_read_addr; + decoded_cache_addr_t decoded_addr, snoop_decoded_addr; + // decoded_cache_addr_t decoded_snoop_addr; + + // Cache Hit + logic hit, pass_through; + word_t [BLOCK_SIZE-1:0] hit_data; + logic [N_FRAME_BITS-1:0] hit_idx; + + // flush reg + logic flush_req; + logic idle_done; + + // Reservation tracking reserve + reservation_set_t reservation_set, next_reservation_set; + logic addr_is_reserved; + + // Snooping signals + logic[N_TAG_BITS-1:0] bus_frame_tag; //Tag from bus to compare + + logic coherence_hit, sc_valid_block; + + assign snoop_decoded_addr = decoded_cache_addr_t'(ccif.addr); + assign bus_frame_tag = snoop_decoded_addr.idx.tag_bits; + + // --------------------------------Cache Module instantiation----------------------------------------- + // cache finite state machine + cache_sm cache_sm0 ( + .CLK(CLK), + .nRST(nRST), + .idle_done(idle_done), + .reserve(reserve), + .sc_valid_block(sc_valid_block), + .pass_through(pass_through), + .hit(hit), + .sramRead(sramRead), //** sramRead is actually cache_set_t + .flush(flush), + .flush_req(flush_req), + .flush_done(flush_done), + .mem_gen_bus_if(mem_gen_bus_if), + .proc_gen_bus_if(proc_gen_bus_if), + .ccif(ccif), + .state_out(cache_state) + ); + + // cache flush handler handles the flush counter signal + cache_flush_counter_handler flush_handler0 ( + .CLK(CLK), + .nRST(nRST), + .flush(flush), + .flush_done(flush_done), + .clear_flush_count(clear_flush_count), + .enable_flush_count(enable_flush_count), + .enable_flush_count_nowb(enable_flush_count_nowb), + .flush_req(flush_req), + .flush_idx_out(flush_idx) + ); + + // cache hit logic: + cache_hit_logic cache_hit0 ( + .sramRead(sramRead), + .decoded_addr(decoded_addr), + .addr_is_reserved(addr_is_reserved), + .state_in(state), + .read_tag_bits(read_tag_bits), + .bus_frame_tag(bus_frame_tag), + .hit(hit), + .hit_idx(hit_idx), + .hit_data(hit_data), + .pass_through(pass_through), + .coherence_hit(coherence_hit), + .sc_valid_block(sc_valid_block), + .proc_gen_bus_if(proc_gen_bus_if), + .ccif(ccif) + ); + + // cache control downstream: manage output signal downstream (to main memory) + cache_control_downstream cctr_downstream0 ( + .CLK(CLK), + .nRST(nRST), + .hit(hit), + .flush(flush), + .reserve(reserve), + .addr_is_reserved(addr_is_reserved), + .pass_through(pass_through), + .sc_valid_block(sc_valid_block), + .sramRead(sramRead), + .state_in(state), + .ridx(ridx), + .decoded_addr(decoded_addr), + .flush_idx(flush_idx), + .mem_gen_bus_if(mem_gen_bus_if), // bus to downstream memory, to main memory + .proc_gen_bus_if(proc_gen_bus_if), // bus to upstream memory, to CPU + .ccif(ccif) + ); + + // cache control upstream: manage output signal upstream (to CPU) + cache_control_upstream cctr_upstream0 ( + .CLK(CLK), + .nRST(nRST), + .hit(hit), + .flush(flush), + .reserve(reserve), + .addr_is_reserved(addr_is_reserved), + .pass_through(pass_through), + .sc_valid_block(sc_valid_block), + .decoded_addr(decoded_addr), + .state_in(state), + .mem_gen_bus_if(mem_gen_bus_if), // bus to downstream memory, to main memory + .proc_gen_bus_if(proc_gen_bus_if) // bus to upstream memory, to CPU + ); + + // cache control sram: manage output signal to sram module (to cache) + cache_control_sram cctr_sram0 ( + .CLK(CLK), + .nRST(nRST), + .hit(hit), + .hit_idx(hit_idx), + .flush(flush), + .reserve(reserve), + .addr_is_reserved(addr_is_reserved), + .state_in(state_in), + .flush_idx(flush_idx), + .snoop_decoded_addr(snoop_decoded_addr), + .decoded_addr(decoded_addr), + .sramRead(sramRead), + .sramWEN(sramWEN), + .sramWrite(sramWrite), + .sramMask(sramMask), + .enable_flush_count(enable_flush_count), + .enable_flush_count_nowb(enable_flush_count_nowb), + .idle_done(idle_done), + .clear_done(clear_done), + .flush_done(flush_done), + .clear_flush_count(clear_flush_count), + .ridx(ridx), + .mem_gen_bus_if(mem_gen_bus_if), // bus to downstream memory, to main memory + .proc_gen_bus_if(proc_gen_bus_if), // bus to upstream memory, to CPU + .ccif(ccif) + ); + + cache_control_snoop cctr_snoop0 ( + .CLK(CLK), + .nRst(nRst), + .sramRead(sramRead), + .sramWEN(sramWEN), + .sramSEL(sramSEL), + .sramSNOOPSEL(sramSNOOPSEL), + .snoop_decoded_addr(snoop_decoded_addr), + .sramTags(sramTags), + .sramTagsMask(sramTagsMask), + .sramMask(sramMask), + .sramWrite(sramWrite), + .flush_idx(flush_idx), + .hit_idx(hit_idx), + .ccif(ccif) + ); + + // --------------------------------SRAM Module instantiation------------------------------------- + // sram instance + assign sramSEL = (state == FLUSH_CACHE || state == IDLE) ? flush_idx.set_num + : (state == SNOOP) ? snoop_decoded_addr.idx.idx_bits + : decoded_addr.idx.idx_bits; + + // Note: sram module write data 'wVal' to the sramMemory at 'SEL' index when 'WEN' is high, + // the sramMemory writing is delayed by a clock cycle. + // sram module read data to 'rVal' from the sramMemory of 'SEL' index when 'REN' is high, + // the rVal reading is purely combinational. + + // SRAM storing cache overall (data + overhead): ***not sure why it was initially named CPU_SRAM ? + sram #( + .SRAM_WR_SIZE(SRAM_W), + .SRAM_HEIGHT(N_SETS) + ) CPU_SRAM ( + .CLK(CLK), + .nRST(nRST), + .wVal(sramWrite), + .rVal(sramRead), + .REN(1'b1), + .WEN(sramWEN), + .SEL(sramSEL), + .wMask(sramMask) + ); + + // SRAM storing cache overhead (tag, cache status, etc.): ***not sure why it was initially named BUS_SRAM ? + // *** also not sure why we have this sram storing overhead when we have the one that stores both data and overhead already ? + sram #( + .SRAM_WR_SIZE(SRAM_TAG_W), + .SRAM_HEIGHT(N_SETS) + ) BUS_SRAM ( + .CLK(CLK), + .nRST(nRST), + .wVal(sramTags), + .rVal(read_tag_bits), + .REN(1'b1), + .WEN(sramWEN), + .SEL(sramSNOOPSEL), + .wMask(sramTagsMask) + ); + +//************************************************************************************************ + + // flip flops + always_ff @ (posedge CLK, negedge nRST) begin + if(~nRST) begin + reservation_set <= 0; + end + else begin + reservation_set <= next_reservation_set; + end + end + + // // Reservation tracking logic + // // TODO: Remove exclusive signal + always_comb begin + next_reservation_set = reservation_set; + if (proc_gen_bus_if.ren && reserve && hit) begin + next_reservation_set.idx = decoded_addr.idx; + next_reservation_set.reserved = 1'b1; + end else if (((proc_gen_bus_if.ren || proc_gen_bus_if.wen) && !proc_gen_bus_if.busy) || clear || flush) begin + next_reservation_set.reserved = 1'b0; + end + addr_is_reserved = reservation_set.idx == decoded_addr.idx && reservation_set.reserved; + end +endmodule diff --git a/source_code/cache_generic/cache_hit_logic.sv b/source_code/cache_generic/cache_hit_logic.sv new file mode 100644 index 000000000..5c3d3d0bf --- /dev/null +++ b/source_code/cache_generic/cache_hit_logic.sv @@ -0,0 +1,63 @@ +`include "cache_generic_struct.vh" +`include "generic_bus_if.vh" +`include "cache_coherence_if.vh" + +module cache_hit_logic ( + input cache_set sramRead, + input decoded_cache_addr_t decoded_addr, + input logic addr_is_reserved, + input cache_fsm_t state_in, + input cache_tag_t read_tag_bits, + input logic[N_TAG_BITS-1:0] bus_frame_tag, + output logic hit, + output logic [N_FRAME_BITS-1:0] hit_idx, + output word_t [BLOCK_SIZE-1:0] hit_data, + output logic pass_through, + output logic coherence_hit, + output logic sc_valid_block, + generic_bus_if.generic_bus proc_gen_bus_if, + cache_coherence_if.cache ccif +); + // Hit logic with pass through + // CPU and bus sram have different always_comb blocks to prevent false + // circular logic + always_comb begin + hit = 0; + hit_idx = 0; + hit_data = 0; + pass_through = proc_gen_bus_if.addr >= NONCACHE_START_ADDR; + coherence_hit = 0; + sc_valid_block = 0; + + if (!pass_through) begin + for(int i = 0; i < ASSOC; i++) begin + if(sramRead.frames[i].tag.tag_bits == decoded_addr.idx.tag_bits && sramRead.frames[i].tag.valid) begin + sc_valid_block = addr_is_reserved; + coherence_hit = sramRead.frames[i].tag.dirty || sramRead.frames[i].tag.exclusive; + //Read or write hit: using write back policy, only update cache write + if((state == HIT && (proc_gen_bus_if.ren || (proc_gen_bus_if.wen && coherence_hit))) || state == SNOOP) begin + hit = 1'b1; + hit_data = sramRead.frames[i].data; + hit_idx = i; + end + end + end + end + end + + always_comb begin + ccif.snoop_hit = 0; + ccif.valid = 0; + ccif.dirty = 0; + ccif.exclusive = 0; + + for(int i = 0; i < ASSOC; i++) begin + if (read_tag_bits[i].tag_bits == bus_frame_tag && read_tag_bits[i].valid) begin + ccif.snoop_hit = 1'b1; + ccif.valid = read_tag_bits[i].valid; + ccif.dirty = read_tag_bits[i].dirty; + ccif.exclusive = read_tag_bits[i].exclusive; + end + end + end +endmodule \ No newline at end of file diff --git a/source_code/cache_generic/cache_sm.sv b/source_code/cache_generic/cache_sm.sv new file mode 100644 index 000000000..e4ea39c4c --- /dev/null +++ b/source_code/cache_generic/cache_sm.sv @@ -0,0 +1,100 @@ +// Module: cache_sm +// Description: +// cache state machine control the state of the cache consist of +// IDLE: +// HIT: +// FETCH: +// WB: +// SNOOP: +// FLUSH_CACHE: +// CANCLE_REQ: +`include "cache_generic_struct.vh" +`include "generic_bus_if.vh" +`include "cache_coherence_if.vh" + +module cache_sm ( + input logic CLK, + input logic nRST, + input logic idle_done, + input logic reserve, + input logic sc_valid_block, + input logic pass_through, + input logic hit, + input logic sramRead, + input logic flush, + input logic flush_req, + input logic flush_done, //*** + + generic_bus_if.cpu mem_gen_bus_if, + generic_bus_if.generic_bus proc_gen_bus_if + cache_coherence_if.cache ccif, + + output cache_fsm_t state_out; +); + + cache_fsm_t state, next_state; + assign state_out = state; + + // cache state machine: sequential update + always_ff @ (posedge CLK, negedge nRST) begin + if(~nRST) begin + state <= IDLE; + end else begin + state <= next_state; + end + end + + // next state logic + always_comb begin + next_state = state; + casez(state) + IDLE: begin + if (idle_done) // Used when flushing + next_state = HIT; + end + HIT: begin + if (ccif.snoop_hit && !ccif.snoop_busy) + next_state = SNOOP; + else if (proc_gen_bus_if.wen && reserve && !sc_valid_block && ~pass_through) // Don't transition on a failed sc + next_state = state; + else if ((proc_gen_bus_if.ren || proc_gen_bus_if.wen) && ~hit && sramRead.frames[ridx].tag.dirty && ~pass_through) + next_state = WB; + else if ((proc_gen_bus_if.ren || proc_gen_bus_if.wen) && ~hit && ~sramRead.frames[ridx].tag.dirty && ~pass_through) + next_state = FETCH; + if (flush || flush_req) + next_state = FLUSH_CACHE; + end + FETCH: begin + if (!mem_gen_bus_if.busy || mem_gen_bus_if.error) + next_state = HIT; + else if (ccif.snoop_hit && !ccif.snoop_busy) + next_state = SNOOP; + else if (!ccif.abort_bus && !proc_gen_bus_if.ren && !proc_gen_bus_if.wen) + next_state = CANCEL_REQ; + end + WB: begin + if (!mem_gen_bus_if.busy) + next_state = HIT; + else if (ccif.snoop_hit && !ccif.snoop_busy) + next_state = SNOOP; + end + SNOOP: begin + next_state = ccif.snoop_req ? SNOOP : + flush_req ? FLUSH_CACHE : HIT; + end + FLUSH_CACHE: begin + if (flush_done) + next_state = HIT; + else if (ccif.snoop_hit && !ccif.snoop_busy) + next_state = SNOOP; + end + CANCEL_REQ: begin + if (!mem_gen_bus_if.busy) begin + next_state = HIT; + end else if (ccif.snoop_hit && !ccif.snoop_busy) begin + next_state = SNOOP; + end + end + endcase + end +endmodule \ No newline at end of file diff --git a/source_code/cache_generic/sram.sv b/source_code/cache_generic/sram.sv new file mode 100644 index 000000000..94bdee167 --- /dev/null +++ b/source_code/cache_generic/sram.sv @@ -0,0 +1,29 @@ +module sram #( + parameter SRAM_WR_SIZE = 128, + parameter SRAM_HEIGHT = 128 +) +( + input logic CLK, nRST, + input logic [SRAM_WR_SIZE-1:0] wVal, + output logic [SRAM_WR_SIZE-1:0] rVal, + input logic REN, WEN, + input logic [$clog2(SRAM_HEIGHT)-1:0] SEL, + input logic [SRAM_WR_SIZE-1:0] wMask +); + typedef logic [SRAM_WR_SIZE-1:0] sram_entry_size_t; + sram_entry_size_t [SRAM_HEIGHT-1:0] sramMemory; + sram_entry_size_t [SRAM_HEIGHT-1:0] n_sramMemory; + + always_ff @(posedge CLK) begin + sramMemory <= n_sramMemory; + end + + always_comb begin + n_sramMemory = sramMemory; + rVal = 32'hBAD0BAD0; + if (WEN) + n_sramMemory[SEL] = (wVal & ~wMask) | (wMask & sramMemory[SEL]); + if (REN) + rVal = sramMemory[SEL]; + end +endmodule \ No newline at end of file diff --git a/source_code/cache_generic/tb_cache_generic/tb_cache_generic.sv b/source_code/cache_generic/tb_cache_generic/tb_cache_generic.sv new file mode 100644 index 000000000..c2f65deab --- /dev/null +++ b/source_code/cache_generic/tb_cache_generic/tb_cache_generic.sv @@ -0,0 +1,39 @@ +`include "cache_generic_struct.sv" +`include "generic_bus_if.vh" +`include "cache_coherence_if.vh" + +module tb_cache_generic (); + import rv32i_types_pkg::*; + + logic tb_CLK; + logic tb_nRST; + logic tb_clear; + logic tb_flush; + logic tb_reserve; + logic tb_exclusive; + logic tb_clear_done; + logic tb_flush_done; + generic_bus_if.cpu tb_mem_gen_bus_if; + generic_bus_if.generic_bus tb_proc_gen_bus_if; + cache_coherence_if.cache tb_ccif; + + cache_generic #( + .CACHE_SIZE() + .BLOCK_SIZE() + .ASSOC() + .NONCACHE_START_ADDR() + ) DUT_cache_generic ( + .CLK(tb_CLK), // Clock signal + .nRST(tb_nRST), // Negedge reset signal + .clear(tb_clear), // + .flush(tb_flush), // + .reserve(tb_reserve), // + .exclusive(tb_exclusive), // + .clear_done(tb_clear_done), // + .flush_done(tb_flush_done), // + .mem_gen_bus_if(tb_mem_gen_bus_if), // bus to downstream memory, to main memory + .proc_gen_bus_if(tb_proc_gen_bus_if), // bus to upstream memory, to CPU + .ccif(tb_ccif) // Coherency interface, connected to coherency unit + ); + +endmodule \ No newline at end of file diff --git a/source_code/caches/tb/tb_caches.sv b/source_code/caches/tb/tb_caches.sv new file mode 100644 index 000000000..3e5c7d54f --- /dev/null +++ b/source_code/caches/tb/tb_caches.sv @@ -0,0 +1,515 @@ +/* +* Copyright 2016 Purdue University +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +* +* Filename: tb_caches.sv +* +* Created by: John Skubic +* Email: jjs.skubic@gmail.com +* Date Created: 05/26/2017 +* Description: Blackbox testbench for the caches. This should be used to test any +* newly developed cache for correctness. Test cases include: +* - replacement up to 8 ways and 8 word blocks +* - cache line clear +* - cache flush +* - constrained random testing of read and write xactions +*/ + +`include "generic_bus_if.vh" + +module tb_caches (); + + import rv32i_types_pkg::*; + + parameter NUM_TESTS = 1000; + parameter NUM_ADDRS = 20; + parameter PERIOD = 20; + parameter DELAY = 5; + parameter CACHE_SELECT = "direct_mapped_tpf";// "pass_through"; + + parameter SEED = 11; + parameter VERBOSE = 0; + + parameter CACHE_CONTROL = 1'b1; + parameter TB_CONTROL = 1'b0; + + parameter DATA_1 = 32'h12ab_89ef; + /* TAG_BIT needed because memory doesn't use full 32 bit addr space*/ + parameter TAG_BIT = 14; + + // -- TB Variables -- // + + logic CLK, nRST; + integer seed; + + logic [RAM_ADDR_SIZE-1:0] tb_addr; + word_t tb_wdata; + logic [3:0] tb_byte_sel; + logic tb_xaction_type; + word_t tb_DUT_rdata; + word_t tb_gold_rdata; + integer i,j, error_cnt; + logic mem_ctrl; + logic [RAM_ADDR_SIZE-1:0] tb_addr_array [NUM_ADDRS]; + + // -- DUT -- // + + generic_bus_if DUT_bus_if(); + generic_bus_if tb_bus_if(); + generic_bus_if DUT_ram_if(); + generic_bus_if cache_2_ram_if(); + logic DUT_flush, DUT_clear; + + generate + if (CACHE_SELECT == "pass_through") begin + pass_through_cache DUT ( + .CLK(CLK), + .nRST(nRST), + .proc_gen_bus_if(DUT_bus_if), + .mem_gen_bus_if(cache_2_ram_if) + ); + end else if (CACHE_SELECT == "direct_mapped_tpf") begin + direct_mapped_tpf_cache DUT ( + .CLK(CLK), + .nRST(nRST), + .proc_gen_bus_if(DUT_bus_if), + .mem_gen_bus_if(cache_2_ram_if), + .clear(DUT_clear), + .flush(DUT_flush), + .clear_done(), + .flush_done() + ); + end + endgenerate + + // multiplexor for testbench cache bypass to memory + assign DUT_ram_if.addr = (mem_ctrl == CACHE_CONTROL) ? cache_2_ram_if.addr : + tb_bus_if.addr; + assign DUT_ram_if.wdata = (mem_ctrl == CACHE_CONTROL) ? cache_2_ram_if.wdata : + tb_bus_if.wdata; + assign DUT_ram_if.ren = (mem_ctrl == CACHE_CONTROL) ? cache_2_ram_if.ren : + tb_bus_if.ren; + assign DUT_ram_if.wen = (mem_ctrl == CACHE_CONTROL) ? cache_2_ram_if.wen : + tb_bus_if.wen; + assign DUT_ram_if.byte_en = (mem_ctrl == CACHE_CONTROL) ? cache_2_ram_if.byte_en : + tb_bus_if.byte_en; + assign cache_2_ram_if.rdata = DUT_ram_if.rdata; + assign tb_bus_if.rdata = DUT_ram_if.rdata; + assign cache_2_ram_if.busy = !(mem_ctrl == CACHE_CONTROL) || DUT_ram_if.busy; + assign tb_bus_if.busy = !(mem_ctrl == TB_CONTROL) || DUT_ram_if.busy; + + ram_wrapper DUT_ram ( + .CLK(CLK), + .nRST(nRST), + .gen_bus_if(DUT_ram_if) + ); + + // -- Gold Model -- // + + generic_bus_if gold_bus_if(); + + ram_wrapper gold_ram ( + .CLK(CLK), + .nRST(nRST), + .gen_bus_if(gold_bus_if) + ); + + // -- Clock Generation -- // + + initial begin : CLK_INIT + CLK = 1'b0; + end : CLK_INIT + + always begin : CLK_GEN + #(PERIOD/2) CLK = ~CLK; + end : CLK_GEN + + + // -- Testing -- // + + initial begin : MAIN + + //-- Initial reset --// + nRST = 0; + DUT_flush = 0; + DUT_clear = 0; + set_mem_ctrl(CACHE_CONTROL); + set_ren(1'b0); + set_wen(1'b0); + set_addr('0); + set_wdata('0); + set_byte_en('0); + + // -- Setup Seed for randomized testing -- // + error_cnt = 0; + seed = SEED; + $urandom(seed); + + #(DELAY); + @(posedge CLK); + nRST = 1; + @(posedge CLK); + + // -- Basic Testing -- // + + $info("---------- Beginning Basic Test Cases ---------"); + + // Write a word to memory and perform a read + + tb_addr = 0; + tb_wdata = DATA_1; + + write_mem(tb_addr, tb_wdata, 4'hf); + read_cache_check(tb_addr); + + // write word to cache + + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'hf); + read_cache_check(tb_addr); + + // write halfwords to cache + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'h3); + read_cache_check(tb_addr); + + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'hc); + read_cache_check(tb_addr); + + // write quarterwords to cache + + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'h1); + read_cache_check(tb_addr); + + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'h2); + read_cache_check(tb_addr); + + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'h4); + read_cache_check(tb_addr); + + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'h8); + read_cache_check(tb_addr); + + // -- Testing Replacement -- // + + $info("---------- Beginning Replacement Testing----------"); + + // Write to different address to force replacements + tb_addr = 0; + for (i = 0; i < 9; i++) begin // iterate through all the ways + tb_addr[TAG_BIT-1 -: 4] = 4'(i); // set bits in the tag + for (j = 0; j < 8; j++) begin // iterate through blocks and write to each word + tb_addr[4:2] = 3'(j); + tb_wdata = $urandom; + write_cache(tb_addr, tb_wdata, 4'hf); + end + end + // Read from the previously written addresses + tb_addr = 0; + for (i = 0; i < 9; i++) begin // iterate through all the ways + tb_addr[TAG_BIT-1 -: 4] = 4'(i); // set bits in the tag + for (j = 0; j < 8; j++) begin // iterate through blocks and write to each word + tb_addr[4:2] = 3'(j); + read_cache_check(tb_addr); + end + end + + // -- Random Testing -- // + + $info("---------- Beginning Random Testing of %0d Xactions %0d Unique Addrs ----------", NUM_TESTS, NUM_ADDRS); + + // Generate the addresses and fill mem with random values + for (i = 0; i < NUM_ADDRS; i++) begin + j = $urandom; + tb_addr_array[i] = j & 32'hffff_fffc; + tb_wdata = $urandom; + write_mem(tb_addr_array[i] , tb_wdata, 4'hf); + end + + for (i = 0; i < NUM_TESTS; i++) begin + tb_xaction_type = 1'($urandom%2); + j = $urandom%NUM_ADDRS; + tb_addr = tb_addr_array[j]; + tb_wdata = $urandom; + case ($urandom%7) + 0 : tb_byte_sel = 4'hf; + 1 : tb_byte_sel = 4'h1; + 2 : tb_byte_sel = 4'h2; + 3 : tb_byte_sel = 4'h3; + 4 : tb_byte_sel = 4'h4; + 5 : tb_byte_sel = 4'h3; + 6 : tb_byte_sel = 4'hc; + default : tb_byte_sel = 4'hf; + endcase + + if (tb_xaction_type == 0) begin // write + if(VERBOSE) begin + $info("\nXaction %0d -- Write -- Addr: %0h Wdata: %0h Byte_en: %h", + i, tb_addr, tb_wdata, tb_byte_sel); + end + write_cache(tb_addr, tb_wdata, tb_byte_sel); + end else begin // read + if(VERBOSE) begin + $info("\nXaction %0d -- Read -- Addr: %0h", i, tb_addr); + end + read_cache_check(tb_addr); + end + end + + // -- Cache Clear -- // + + $info("---------- Beginning Cache Clear Testing ----------"); + + tb_addr = 0; + tb_wdata = $urandom; + read_cache_check(tb_addr); + clear_line(tb_addr); + write_mem(tb_addr, tb_wdata, 4'hf); + read_cache_check(tb_addr); + + // -- Cache Flush -- // + + $info("---------- Beginning Cache Flush Testing ----------"); + + // fill cache contents + tb_addr = 0; + for (i = 0; i < 9; i++) begin // iterate through all the ways + tb_addr[TAG_BIT-1 -: 4] = 4'(i); // set bits in the tag + for (j = 0; j < 8; j++) begin // iterate through blocks and write to each word + tb_addr[4:2] = 3'(j); + read_cache_check(tb_addr); + end + end + + // flush cache + flush_cache(); + + // Read to dummy addr to ensure flushing is completed + tb_addr = '1; + read_cache_check(tb_addr); + + // write directly to mem + tb_addr = 0; + for (i = 0; i < 9; i++) begin // iterate through all the ways + tb_addr[TAG_BIT-1 -: 4] = 4'(i); // set bits in the tag + for (j = 0; j < 8; j++) begin // iterate through blocks and write to each word + tb_addr[4:2] = 3'(j); + tb_wdata = $urandom; + write_mem(tb_addr, tb_wdata, 4'hf); + end + end + + // re-read memory to ensure up to date data is received + tb_addr = 0; + for (i = 0; i < 9; i++) begin // iterate through all the ways + tb_addr[TAG_BIT-1 -: 4] = 4'(i); // set bits in the tag + for (j = 0; j < 8; j++) begin // iterate through blocks and write to each word + tb_addr[4:2] = 3'(j); + read_cache_check(tb_addr); + end + end + + + $info("\n---------- Testing Completed Successfully---------\n", error_cnt); + + $finish; + end : MAIN + + + + // --- Helper Tasks and Functions --- // + + // read_cache + // Reads a value from memory through the cache interface + task read_cache; + input [RAM_ADDR_SIZE-1:0] read_addr; + output word_t DUT_rdata; + output word_t gold_rdata; + + set_mem_ctrl(CACHE_CONTROL); + set_ren(1'b1); + set_wen(1'b0); + set_addr(read_addr); + set_byte_en(4'b1111); + + @(posedge CLK); + + while (caches_busy()) + @(posedge CLK); + + DUT_rdata = DUT_bus_if.rdata; + gold_rdata = gold_bus_if.rdata; + endtask + + // read_cache_check + // Reads a value from memory and reports an error if there is a mismatch. + task read_cache_check; + input [RAM_ADDR_SIZE-1:0] read_addr; + + word_t DUT_rdata; + word_t gold_rdata; + + read_cache(read_addr, DUT_rdata, gold_rdata); + + if (DUT_rdata !== gold_rdata) begin + $info("\nData Mismatch \nAddr: 0x%0h\nExpected: 0x%0h\nReceived: 0x%0h\n", + read_addr, gold_rdata, DUT_rdata); + error_cnt = error_cnt + 1; + #(DELAY); + $finish; + end + + endtask + + // write_cache + // Writes a value to memory through the cache interface + task write_cache; + input [RAM_ADDR_SIZE-1:0] write_addr; + input word_t write_data; + input logic [3:0] write_byte_en; + + set_mem_ctrl(CACHE_CONTROL); + set_ren(1'b0); + set_wen(1'b1); + set_addr(write_addr); + set_wdata(write_data); + set_byte_en(write_byte_en); + + @(posedge CLK); + + while (caches_busy()) + @(posedge CLK); + + endtask + + // write_mem + // Bypasses the caches layer and directly modifies values in memory + // This is useful to test clearing and flushing functionality + task write_mem; + input logic [RAM_ADDR_SIZE-1:0] write_addr; + input word_t write_data; + input logic [3:0] write_byte_en; + + set_mem_ctrl(TB_CONTROL); + set_ren(1'b0); + set_wen(1'b1); + set_addr(write_addr); + set_wdata(write_data); + set_byte_en(write_byte_en); + + @(posedge CLK); + + while (mem_busy()) + @(posedge CLK); + + endtask + + // clear_line + // Sends the request to clear a cache line to the cache + task clear_line; + input logic [RAM_ADDR_SIZE-1:0] clear_addr; + + DUT_clear = 1'b1; + set_addr(clear_addr); + @(posedge CLK); + DUT_clear = 1'b0; + endtask + + // flush + // Sends the request to flush the entire contents of the cache + task flush_cache; + DUT_flush = 1'b1; + @(posedge CLK); + DUT_flush = 1'b0; + endtask + + // caches_busy + // blocks execution until the DUT and gold model are no longer busy + function caches_busy; + caches_busy = (DUT_bus_if.busy || gold_bus_if.busy); + endfunction + + // mem_busy + // blocks execution until the TB memory bypass and gold model are no longer busy + function mem_busy; + mem_busy = (tb_bus_if.busy || gold_bus_if.busy); + endfunction + + // set_addr + // Sets the address to the DUT and gold model + task set_addr; + input logic [RAM_ADDR_SIZE-1:0] new_addr; + + DUT_bus_if.addr = new_addr; + gold_bus_if.addr = new_addr; + tb_bus_if.addr = new_addr; + endtask + + // set_wdata + // sets the write data to the DUT and gold model + task set_wdata; + input word_t new_wdata; + + DUT_bus_if.wdata = new_wdata; + gold_bus_if.wdata = new_wdata; + tb_bus_if.wdata = new_wdata; + endtask + + // set_wen + // sets the write enable to the DUT and gold model + task set_wen; + input logic new_wen; + + DUT_bus_if.wen = new_wen; + gold_bus_if.wen = new_wen; + tb_bus_if.wen = new_wen; + endtask + + // set_ren + // sets the read enable to the DUT and gold model + task set_ren; + input logic new_ren; + + DUT_bus_if.ren = new_ren; + gold_bus_if.ren = new_ren; + tb_bus_if.ren = new_ren; + endtask + + // set_byte_en + // sets the byte enable to the DUT and gold model + task set_byte_en; + input logic [3:0] new_byte_en; + + DUT_bus_if.byte_en = new_byte_en; + gold_bus_if.byte_en = new_byte_en; + tb_bus_if.byte_en = new_byte_en; + endtask + + // set_mem_ctrl + // Sets the memory control. A value of 1 indicates the cache + // has access to memory. A value of 0 indicates the tb has + // access to memory. + task set_mem_ctrl; + input logic new_mem_ctrl; + + mem_ctrl = new_mem_ctrl; + endtask + +endmodule diff --git a/source_code/caches/tb/tb_l1_caches.sv b/source_code/caches/tb/tb_l1_caches.sv new file mode 100644 index 000000000..7dc21d801 --- /dev/null +++ b/source_code/caches/tb/tb_l1_caches.sv @@ -0,0 +1,515 @@ +/* +* Copyright 2016 Purdue University +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +* +* Filename: tb_caches.sv +* +* Created by: John Skubic +* Email: jjs.skubic@gmail.com +* Date Created: 05/26/2017 +* Description: Blackbox testbench for the caches. This should be used to test any +* newly developed cache for correctness. Test cases include: +* - replacement up to 8 ways and 8 word blocks +* - cache line clear +* - cache flush +* - constrained random testing of read and write xactions +*/ + +`include "generic_bus_if.vh" + +module tb_l1_caches (); + + import rv32i_types_pkg::*; + + parameter NUM_TESTS = 1000; + parameter NUM_ADDRS = 20; + parameter PERIOD = 20; + parameter DELAY = 5; + parameter CACHE_SELECT = "direct_mapped_tpf";// "pass_through"; + + parameter SEED = 11; + parameter VERBOSE = 0; + + parameter CACHE_CONTROL = 1'b1; + parameter TB_CONTROL = 1'b0; + + parameter DATA_1 = 32'h12ab_89ef; + /* TAG_BIT needed because memory doesn't use full 32 bit addr space*/ + parameter TAG_BIT = 14; + + // -- TB Variables -- // + + logic CLK, nRST; + integer seed; + + logic [RAM_ADDR_SIZE-1:0] tb_addr; + word_t tb_wdata; + logic [3:0] tb_byte_sel; + logic tb_xaction_type; + word_t tb_DUT_rdata; + word_t tb_gold_rdata; + integer i,j, error_cnt; + logic mem_ctrl; + logic [RAM_ADDR_SIZE-1:0] tb_addr_array [NUM_ADDRS]; + + // -- DUT -- // + + generic_bus_if DUT_bus_if(); + generic_bus_if tb_bus_if(); + generic_bus_if DUT_ram_if(); + generic_bus_if cache_2_ram_if(); + cache_coherence_if d_cache_coherency_if(); + + logic DUT_flush, DUT_clear; + logic tb_reserve, tb_exclusive; + l1_cache #( + .CACHE_SIZE(1024), + .BLOCK_SIZE(2), + .ASSOC(1), + .NONCACHE_START_ADDR(32'hF000_0000) + ) cache_generic + ( + .CLK(CLK), + .nRST(nRST), + .clear(DUT_clear), + .flush(DUT_flush), + .reserve(tb_reserve), + .exclusive(tb_exclusive), + .clear_done(), + .flush_done(), + .mem_gen_bus_if(cache_2_ram_if), + .proc_gen_bus_if(DUT_bus_if), + .ccif(d_cache_coherency_if) //Coherency interface, connected to coherency unit + ); + + // multiplexor for testbench cache bypass to memory + assign DUT_ram_if.addr = (mem_ctrl == CACHE_CONTROL) ? cache_2_ram_if.addr : + tb_bus_if.addr; + assign DUT_ram_if.wdata = (mem_ctrl == CACHE_CONTROL) ? cache_2_ram_if.wdata : + tb_bus_if.wdata; + assign DUT_ram_if.ren = (mem_ctrl == CACHE_CONTROL) ? cache_2_ram_if.ren : + tb_bus_if.ren; + assign DUT_ram_if.wen = (mem_ctrl == CACHE_CONTROL) ? cache_2_ram_if.wen : + tb_bus_if.wen; + assign DUT_ram_if.byte_en = (mem_ctrl == CACHE_CONTROL) ? cache_2_ram_if.byte_en : + tb_bus_if.byte_en; + assign cache_2_ram_if.rdata = DUT_ram_if.rdata; + assign tb_bus_if.rdata = DUT_ram_if.rdata; + assign cache_2_ram_if.busy = !(mem_ctrl == CACHE_CONTROL) || DUT_ram_if.busy; + assign tb_bus_if.busy = !(mem_ctrl == TB_CONTROL) || DUT_ram_if.busy; + + ram_wrapper DUT_ram ( + .CLK(CLK), + .nRST(nRST), + .gen_bus_if(DUT_ram_if) + ); + + // -- Gold Model -- // + + generic_bus_if gold_bus_if(); + + ram_wrapper gold_ram ( + .CLK(CLK), + .nRST(nRST), + .gen_bus_if(gold_bus_if) + ); + + // -- Clock Generation -- // + + initial begin : CLK_INIT + CLK = 1'b0; + end : CLK_INIT + + always begin : CLK_GEN + #(PERIOD/2) CLK = ~CLK; + end : CLK_GEN + + + // -- Testing -- // + + initial begin : MAIN + + //-- Initial reset --// + nRST = 0; + DUT_flush = 0; + DUT_clear = 0; + set_mem_ctrl(CACHE_CONTROL); + set_ren(1'b0); + set_wen(1'b0); + set_addr('0); + set_wdata('0); + set_byte_en('0); + + // -- Setup Seed for randomized testing -- // + error_cnt = 0; + seed = SEED; + $urandom(seed); + + #(DELAY); + @(posedge CLK); + nRST = 1; + @(posedge CLK); + + // -- Basic Testing -- // + + $info("---------- Beginning Basic Test Cases ---------"); + + // Write a word to memory and perform a read + + tb_addr = 0; + tb_wdata = DATA_1; + + write_mem(tb_addr, tb_wdata, 4'hf); + read_cache_check(tb_addr); + + // write word to cache + + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'hf); + read_cache_check(tb_addr); + + // write halfwords to cache + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'h3); + read_cache_check(tb_addr); + + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'hc); + read_cache_check(tb_addr); + + // write quarterwords to cache + + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'h1); + read_cache_check(tb_addr); + + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'h2); + read_cache_check(tb_addr); + + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'h4); + read_cache_check(tb_addr); + + tb_addr = tb_addr + 4; + write_cache(tb_addr, tb_wdata, 4'h8); + read_cache_check(tb_addr); + + // -- Testing Replacement -- // + + $info("---------- Beginning Replacement Testing----------"); + + // Write to different address to force replacements + tb_addr = 0; + for (i = 0; i < 9; i++) begin // iterate through all the ways + tb_addr[TAG_BIT-1 -: 4] = 4'(i); // set bits in the tag + for (j = 0; j < 8; j++) begin // iterate through blocks and write to each word + tb_addr[4:2] = 3'(j); + tb_wdata = $urandom; + write_cache(tb_addr, tb_wdata, 4'hf); + end + end + // Read from the previously written addresses + tb_addr = 0; + for (i = 0; i < 9; i++) begin // iterate through all the ways + tb_addr[TAG_BIT-1 -: 4] = 4'(i); // set bits in the tag + for (j = 0; j < 8; j++) begin // iterate through blocks and write to each word + tb_addr[4:2] = 3'(j); + read_cache_check(tb_addr); + end + end + + // -- Random Testing -- // + + $info("---------- Beginning Random Testing of %0d Xactions %0d Unique Addrs ----------", NUM_TESTS, NUM_ADDRS); + + // Generate the addresses and fill mem with random values + for (i = 0; i < NUM_ADDRS; i++) begin + j = $urandom; + tb_addr_array[i] = j & 32'hffff_fffc; + tb_wdata = $urandom; + write_mem(tb_addr_array[i] , tb_wdata, 4'hf); + end + + for (i = 0; i < NUM_TESTS; i++) begin + tb_xaction_type = 1'($urandom%2); + j = $urandom%NUM_ADDRS; + tb_addr = tb_addr_array[j]; + tb_wdata = $urandom; + case ($urandom%7) + 0 : tb_byte_sel = 4'hf; + 1 : tb_byte_sel = 4'h1; + 2 : tb_byte_sel = 4'h2; + 3 : tb_byte_sel = 4'h3; + 4 : tb_byte_sel = 4'h4; + 5 : tb_byte_sel = 4'h3; + 6 : tb_byte_sel = 4'hc; + default : tb_byte_sel = 4'hf; + endcase + + if (tb_xaction_type == 0) begin // write + if(VERBOSE) begin + $info("\nXaction %0d -- Write -- Addr: %0h Wdata: %0h Byte_en: %h", + i, tb_addr, tb_wdata, tb_byte_sel); + end + write_cache(tb_addr, tb_wdata, tb_byte_sel); + end else begin // read + if(VERBOSE) begin + $info("\nXaction %0d -- Read -- Addr: %0h", i, tb_addr); + end + read_cache_check(tb_addr); + end + end + + // -- Cache Clear -- // + + $info("---------- Beginning Cache Clear Testing ----------"); + + tb_addr = 0; + tb_wdata = $urandom; + read_cache_check(tb_addr); + clear_line(tb_addr); + write_mem(tb_addr, tb_wdata, 4'hf); + read_cache_check(tb_addr); + + // -- Cache Flush -- // + + $info("---------- Beginning Cache Flush Testing ----------"); + + // fill cache contents + tb_addr = 0; + for (i = 0; i < 9; i++) begin // iterate through all the ways + tb_addr[TAG_BIT-1 -: 4] = 4'(i); // set bits in the tag + for (j = 0; j < 8; j++) begin // iterate through blocks and write to each word + tb_addr[4:2] = 3'(j); + read_cache_check(tb_addr); + end + end + + // flush cache + flush_cache(); + + // Read to dummy addr to ensure flushing is completed + tb_addr = '1; + read_cache_check(tb_addr); + + // write directly to mem + tb_addr = 0; + for (i = 0; i < 9; i++) begin // iterate through all the ways + tb_addr[TAG_BIT-1 -: 4] = 4'(i); // set bits in the tag + for (j = 0; j < 8; j++) begin // iterate through blocks and write to each word + tb_addr[4:2] = 3'(j); + tb_wdata = $urandom; + write_mem(tb_addr, tb_wdata, 4'hf); + end + end + + // re-read memory to ensure up to date data is received + tb_addr = 0; + for (i = 0; i < 9; i++) begin // iterate through all the ways + tb_addr[TAG_BIT-1 -: 4] = 4'(i); // set bits in the tag + for (j = 0; j < 8; j++) begin // iterate through blocks and write to each word + tb_addr[4:2] = 3'(j); + read_cache_check(tb_addr); + end + end + + + $info("\n---------- Testing Completed Successfully---------\n", error_cnt); + + $finish; + end : MAIN + + + + // --- Helper Tasks and Functions --- // + + // read_cache + // Reads a value from memory through the cache interface + task read_cache; + input [RAM_ADDR_SIZE-1:0] read_addr; + output word_t DUT_rdata; + output word_t gold_rdata; + + set_mem_ctrl(CACHE_CONTROL); + set_ren(1'b1); + set_wen(1'b0); + set_addr(read_addr); + set_byte_en(4'b1111); + + @(posedge CLK); + + while (caches_busy()) + @(posedge CLK); + + DUT_rdata = DUT_bus_if.rdata; + gold_rdata = gold_bus_if.rdata; + endtask + + // read_cache_check + // Reads a value from memory and reports an error if there is a mismatch. + task read_cache_check; + input [RAM_ADDR_SIZE-1:0] read_addr; + + word_t DUT_rdata; + word_t gold_rdata; + + read_cache(read_addr, DUT_rdata, gold_rdata); + + if (DUT_rdata !== gold_rdata) begin + $info("\nData Mismatch \nAddr: 0x%0h\nExpected: 0x%0h\nReceived: 0x%0h\n", + read_addr, gold_rdata, DUT_rdata); + error_cnt = error_cnt + 1; + #(DELAY); + $finish; + end + + endtask + + // write_cache + // Writes a value to memory through the cache interface + task write_cache; + input [RAM_ADDR_SIZE-1:0] write_addr; + input word_t write_data; + input logic [3:0] write_byte_en; + + set_mem_ctrl(CACHE_CONTROL); + set_ren(1'b0); + set_wen(1'b1); + set_addr(write_addr); + set_wdata(write_data); + set_byte_en(write_byte_en); + + @(posedge CLK); + + while (caches_busy()) + @(posedge CLK); + + endtask + + // write_mem + // Bypasses the caches layer and directly modifies values in memory + // This is useful to test clearing and flushing functionality + task write_mem; + input logic [RAM_ADDR_SIZE-1:0] write_addr; + input word_t write_data; + input logic [3:0] write_byte_en; + + set_mem_ctrl(TB_CONTROL); + set_ren(1'b0); + set_wen(1'b1); + set_addr(write_addr); + set_wdata(write_data); + set_byte_en(write_byte_en); + + @(posedge CLK); + + while (mem_busy()) + @(posedge CLK); + + endtask + + // clear_line + // Sends the request to clear a cache line to the cache + task clear_line; + input logic [RAM_ADDR_SIZE-1:0] clear_addr; + + DUT_clear = 1'b1; + set_addr(clear_addr); + @(posedge CLK); + DUT_clear = 1'b0; + endtask + + // flush + // Sends the request to flush the entire contents of the cache + task flush_cache; + DUT_flush = 1'b1; + @(posedge CLK); + DUT_flush = 1'b0; + endtask + + // caches_busy + // blocks execution until the DUT and gold model are no longer busy + function caches_busy; + caches_busy = (DUT_bus_if.busy || gold_bus_if.busy); + endfunction + + // mem_busy + // blocks execution until the TB memory bypass and gold model are no longer busy + function mem_busy; + mem_busy = (tb_bus_if.busy || gold_bus_if.busy); + endfunction + + // set_addr + // Sets the address to the DUT and gold model + task set_addr; + input logic [RAM_ADDR_SIZE-1:0] new_addr; + + DUT_bus_if.addr = new_addr; + gold_bus_if.addr = new_addr; + tb_bus_if.addr = new_addr; + endtask + + // set_wdata + // sets the write data to the DUT and gold model + task set_wdata; + input word_t new_wdata; + + DUT_bus_if.wdata = new_wdata; + gold_bus_if.wdata = new_wdata; + tb_bus_if.wdata = new_wdata; + endtask + + // set_wen + // sets the write enable to the DUT and gold model + task set_wen; + input logic new_wen; + + DUT_bus_if.wen = new_wen; + gold_bus_if.wen = new_wen; + tb_bus_if.wen = new_wen; + endtask + + // set_ren + // sets the read enable to the DUT and gold model + task set_ren; + input logic new_ren; + + DUT_bus_if.ren = new_ren; + gold_bus_if.ren = new_ren; + tb_bus_if.ren = new_ren; + endtask + + // set_byte_en + // sets the byte enable to the DUT and gold model + task set_byte_en; + input logic [3:0] new_byte_en; + + DUT_bus_if.byte_en = new_byte_en; + gold_bus_if.byte_en = new_byte_en; + tb_bus_if.byte_en = new_byte_en; + endtask + + // set_mem_ctrl + // Sets the memory control. A value of 1 indicates the cache + // has access to memory. A value of 0 indicates the tb has + // access to memory. + task set_mem_ctrl; + input logic new_mem_ctrl; + + mem_ctrl = new_mem_ctrl; + endtask + +endmodule diff --git a/source_code/include/cache_generic_struct.vh b/source_code/include/cache_generic_struct.vh new file mode 100644 index 000000000..1fb57b878 --- /dev/null +++ b/source_code/include/cache_generic_struct.vh @@ -0,0 +1,89 @@ +`ifndef RV32I_TYPES_PKG_SV +`define RV32I_TYPES_PKG_SV + +// local parameters +parameter N_TOTAL_BYTES = CACHE_SIZE / 8; // cache size in bytes +parameter N_TOTAL_WORDS = N_TOTAL_BYTES / (WORD_SIZE / 8); // number of words in cache +parameter N_TOTAL_FRAMES = N_TOTAL_WORDS / BLOCK_SIZE; // number of frame in cache +parameter N_SETS = N_TOTAL_FRAMES / ASSOC; // number of frame per set (ASSOC = set of cache) +parameter N_FRAME_BITS = $clog2(ASSOC) + (ASSOC == 1); // number of bits in cache index indicating set of cache +parameter N_SET_BITS = $clog2(N_SETS) + (N_SETS == 1); // number of bits in cache index indicating frame index within that set of cache +// Note: N_FRAME_BITS and N_SET_BITS may be swapped +parameter N_BLOCK_BITS = $clog2(BLOCK_SIZE) + (BLOCK_SIZE == 1); // number of bits in cache index indicating word/block index within the frame of cache +parameter N_TAG_BITS = WORD_SIZE - N_SET_BITS - N_BLOCK_BITS; // ** (what is -2 in the original code?) number of bits in cache that will be a tag bit +parameter FRAME_SIZE = WORD_SIZE * BLOCK_SIZE + N_TAG_BITS + 3; // number of bits per frame in cache (includes overhead, tag_bits + cache_status_bits) +parameter SRAM_W = FRAME_SIZE * ASSOC; // number of bits of caches with overhead included of all sets combined +parameter SRAM_TAG_W = (N_TAG_BITS + 3) * ASSOC; // number of bits of only the overhead of the caches of all sets combined +parameter CLEAR_LENGTH = $clog2(BLOCK_SIZE) + 2; // ** number of bits in cache index indicating blocksize + 2 +parameter NUM_STATE = 7; // number of cache states +parameter N_STATE_BITS = $clog2(NUM_STATE); // number of bits for cache state logic +// Note: overhead refer to tag_bit + cache_status_bit (3) for dirty, valid, exclusive bit + +// struct declaration +// cache tag: overhead of the cache +typedef struct packed { + logic exclusive; + logic valid; + logic dirty; + logic [N_TAG_BITS-1:0] tag_bits; +} cache_tag_t; + +// cache frame: overhead of the cache + the data +typedef struct packed{ + cache_tag_t tag; + word_t [BLOCK_SIZE - 1:0] data; +} cache_frame_t; + +// cache set: cache frame of the all associativity (all different sets) with the same cache tag +typedef struct packed { + cache_frame_t [ASSOC - 1:0] frames; +} cache_set_t; + +// decode the main memory address into tag_bits, index_bits, and block_bits +typedef struct packed { + logic [N_TAG_BITS-1:0] tag_bits; + logic [N_SET_BITS-1:0] idx_bits; + logic [N_BLOCK_BITS-1:0] block_bits; +} decoded_cache_idx_t; + +// flush counter type +typedef struct packed { + logic finish; // status whether the flush is done (...) + logic [N_SET_BITS-1:0] set_num; // select the frame within the set + logic [N_FRAME_BITS-1:0] frame_num; // assoc, select which set to flush +} flush_idx_t; + +typedef struct packed { + decoded_cache_idx_t idx; + logic [1:0] byte_bits; +} decoded_cache_addr_t; // cache address type + +// cache state machine +typedef enum { + IDLE, // *** ... + HIT, // check the tag and cache status with the request and it hit + FETCH, // load the data from the main memory to cache + WB, // write-back the data to cache without writing to the main memory (set the dirty bit to one) + FLUSH_CACHE, // write the cached data back to main memory (when cache is full or replacement policy) + SNOOP, // *** cache coherency protocol + CANCEL_REQ // *** ... +} cache_fsm_t; + +// reservation (...explanation...) +typedef struct packed { + decoded_cache_idx_t idx; + logic reserved; +} reservation_set_t; + +// cache state machine +typedef enum logic [STATE_BITS - 1:0] { + IDLE, // *** ... + HIT, // check the tag and cache status with the request and it hit + FETCH, // load the data from the main memory to cache + WB, // write-back the data to cache without writing to the main memory (set the dirty bit to one) + FLUSH_CACHE, // write the cached data back to main memory (when cache is full or replacement policy) + SNOOP, // *** cache coherency protocol + CANCEL_REQ // *** ... +} cache_fsm_t; + +`endif