diff --git a/core/branch_unit.sv b/core/branch_unit.sv index 717bfdf3e4..c702567725 100644 --- a/core/branch_unit.sv +++ b/core/branch_unit.sv @@ -31,6 +31,8 @@ module branch_unit #( input fu_data_t fu_data_i, // Instruction PC - ISSUE_STAGE input logic [CVA6Cfg.VLEN-1:0] pc_i, + // is zcmt instruction + input logic is_zcmt_i, // Instruction is compressed - ISSUE_STAGE input logic is_compressed_instr_i, // Branch unit instruction is valid - ISSUE_STAGE @@ -46,18 +48,10 @@ module branch_unit #( // Branch is resolved, new entries can be accepted by scoreboard - ID_STAGE output logic resolve_branch_o, // Branch exception out - TO_BE_COMPLETED - output exception_t branch_exception_o, - //zcmt - input logic is_zcmt_i + output exception_t branch_exception_o ); logic [CVA6Cfg.VLEN-1:0] target_address; logic [CVA6Cfg.VLEN-1:0] next_pc; - logic is_zcmt_q; - - always_ff @(posedge clk_i or negedge rst_ni) begin - if (~rst_ni) is_zcmt_q <= '0; - else is_zcmt_q <= is_zcmt_i; - end // here we handle the various possibilities of mis-predicts always_comb begin : mispredict_handler @@ -83,20 +77,18 @@ module branch_unit #( // we need to put the branch target address into rd, this is the result of this unit branch_result_o = next_pc; resolved_branch_o.pc = pc_i; - // There are only two sources of mispredicts: + // There are only three sources of mispredicts: // 1. Branches // 2. Jumps to register addresses + // 3. Zcmt instructions if (branch_valid_i) begin - if (is_zcmt_q) begin + // write target address which goes to PC Gen or select target address if zcmt + resolved_branch_o.target_address = (branch_comp_res_i) | is_zcmt_i ? target_address : next_pc; + resolved_branch_o.is_taken = is_zcmt_i ? 1'b1 : branch_comp_res_i; + if (is_zcmt_i) begin // Unconditional jump handling - resolved_branch_o.is_taken = 1'b1; resolved_branch_o.is_mispredict = 1'b1; // miss prediction for ZCMT - resolved_branch_o.target_address = target_address; // Use calculated address directly resolved_branch_o.cf_type = ariane_pkg::Jump; - end else begin - // write target address which goes to PC Gen - resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc; - resolved_branch_o.is_taken = branch_comp_res_i; end // check the outcome of the branch speculation if (ariane_pkg::op_is_branch(fu_data_i.operation)) begin diff --git a/core/cache_subsystem/wt_dcache.sv b/core/cache_subsystem/wt_dcache.sv index a829fc48bd..5b49d957fb 100644 --- a/core/cache_subsystem/wt_dcache.sv +++ b/core/cache_subsystem/wt_dcache.sv @@ -188,10 +188,10 @@ module wt_dcache // read controllers (LD unit and PTW/MMU) /////////////////////////////////////////////////////// - // 0 is used by MMU, 1 by READ access requests + // 0 is used by MMU or implicit read by zcmt, 1 by READ access requests for (genvar k = 0; k < NumPorts - 1; k++) begin : gen_rd_ports // set these to high prio ports - if ((k == 0 && CVA6Cfg.MmuPresent) || (k == 1) || (k == 2 && CVA6Cfg.EnableAccelerator)) begin + if ((k == 0 && (CVA6Cfg.MmuPresent || CVA6Cfg.RVZCMT )) || (k == 1) || (k == 2 && CVA6Cfg.EnableAccelerator)) begin assign rd_prio[k] = 1'b1; wt_dcache_ctrl #( .CVA6Cfg(CVA6Cfg), diff --git a/core/compressed_decoder.sv b/core/compressed_decoder.sv index 5278f40a21..d8e395cb48 100644 --- a/core/compressed_decoder.sv +++ b/core/compressed_decoder.sv @@ -876,7 +876,6 @@ module compressed_decoder #( instr_o = instr_i; end else if (instr_i[12:10] == 3'b000) begin //jt/jalt instruction is_zcmt_instr_o = 1; - instr_o = instr_i; end else begin illegal_instr_o = 1'b1; end diff --git a/core/csr_regfile.sv b/core/csr_regfile.sv index cfe5f35b13..3b58ad2995 100644 --- a/core/csr_regfile.sv +++ b/core/csr_regfile.sv @@ -18,6 +18,7 @@ module csr_regfile #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter type exception_t = logic, + parameter type jvt_t = logic, parameter type irq_ctrl_t = logic, parameter type scoreboard_entry_t = logic, parameter type rvfi_probes_csr_t = logic, @@ -169,8 +170,7 @@ module csr_regfile // RVFI output rvfi_probes_csr_t rvfi_csr_o, //jvt output - output logic [CVA6Cfg.XLEN-1:6] jvt_base_o, - output logic [5:0] jvt_mode_o + output jvt_t jvt_o ); localparam logic [63:0] SMODE_STATUS_READ_MASK = ariane_pkg::smode_status_read_mask(CVA6Cfg); @@ -355,7 +355,11 @@ module csr_regfile end end riscv::CSR_JVT: begin - csr_rdata = {jvt_q.base, jvt_q.mode}; + if (CVA6Cfg.RVZCMT) begin + csr_rdata = {jvt_q.base, jvt_q.mode}; + end else begin + read_access_exception = 1'b1; + end end // non-standard extension riscv::CSR_FTRAN: begin @@ -1068,8 +1072,12 @@ module csr_regfile if (CVA6Cfg.DebugEn) dscratch1_d = csr_wdata; else update_access_exception = 1'b1; riscv::CSR_JVT: begin - jvt_d.base = csr_wdata[CVA6Cfg.XLEN-1:6]; - jvt_d.mode = 6'b000000; + if (CVA6Cfg.RVZCMT) begin + jvt_d.base = csr_wdata[CVA6Cfg.XLEN-1:6]; + jvt_d.mode = 6'b000000; + end else begin + update_access_exception = 1'b1; + end end // trigger module CSRs riscv::CSR_TSELECT: update_access_exception = 1'b1; // not implemented @@ -2464,8 +2472,8 @@ module csr_regfile assign frm_o = fcsr_q.frm; assign fprec_o = fcsr_q.fprec; //JVT outputs - assign jvt_base_o = jvt_q.base; - assign jvt_mode_o = jvt_q.mode; + assign jvt_o.base = jvt_q.base; + assign jvt_o.mode = jvt_q.mode; // MMU outputs assign satp_ppn_o = CVA6Cfg.RVS ? satp_q.ppn : '0; assign vsatp_ppn_o = CVA6Cfg.RVH ? vsatp_q.ppn : '0; @@ -2738,7 +2746,7 @@ module csr_regfile // RVFI //------------- assign rvfi_csr_o.fcsr_q = CVA6Cfg.FpPresent ? fcsr_q : '0; - assign rvfi_csr_o.jvt_q = jvt_q; + assign rvfi_csr_o.jvt_q = CVA6Cfg.RVZCMT ? jvt_q : '0; assign rvfi_csr_o.dcsr_q = CVA6Cfg.DebugEn ? dcsr_q : '0; assign rvfi_csr_o.dpc_q = CVA6Cfg.DebugEn ? dpc_q : '0; assign rvfi_csr_o.dscratch0_q = CVA6Cfg.DebugEn ? dscratch0_q : '0; diff --git a/core/cva6.sv b/core/cva6.sv index c5997bd561..160b23f55a 100644 --- a/core/cva6.sv +++ b/core/cva6.sv @@ -86,6 +86,11 @@ module cva6 branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path exception_t ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions }, + //JVT struct{base,mode} + localparam type jvt_t = struct packed { + logic [CVA6Cfg.XLEN-7:0] base; + logic [5:0] mode; + }, // ID/EX/WB Stage localparam type scoreboard_entry_t = struct packed { @@ -113,6 +118,7 @@ module cva6 logic is_last_macro_instr; // is last decoded 32bit instruction of macro definition logic is_double_rd_macro_instr; // is double move decoded 32bit instruction of macro definition logic vfp; // is this a vector floating-point instruction? + logic is_zcmt; //is a zcmt instruction }, localparam type writeback_t = struct packed { logic valid; // wb data is valid @@ -415,6 +421,7 @@ module cva6 fu_data_t [CVA6Cfg.NrIssuePorts-1:0] fu_data_id_ex; logic [CVA6Cfg.VLEN-1:0] pc_id_ex; + logic zcmt_id_ex; logic is_compressed_instr_id_ex; logic [CVA6Cfg.NrIssuePorts-1:0][31:0] tinst_ex; // fixed latency units @@ -564,9 +571,7 @@ module cva6 logic [CVA6Cfg.NrPMPEntries-1:0][CVA6Cfg.PLEN-3:0] pmpaddr; logic [31:0] mcountinhibit_csr_perf; //jvt - logic [CVA6Cfg.XLEN-1:6] jvt_base; - logic [5:0] jvt_mode; - logic is_zcmt_id_is, is_zcmt_is_ex; + jvt_t jvt; // ---------------------------- // Performance Counters <-> * // ---------------------------- @@ -681,6 +686,7 @@ module cva6 .dcache_req_o_t(dcache_req_o_t), .exception_t(exception_t), .fetch_entry_t(fetch_entry_t), + .jvt_t(jvt_t), .irq_ctrl_t(irq_ctrl_t), .scoreboard_entry_t(scoreboard_entry_t), .interrupts_t(interrupts_t), @@ -725,9 +731,7 @@ module cva6 .compressed_resp_i (x_compressed_resp), .compressed_valid_o(x_compressed_valid), .compressed_req_o (x_compressed_req), - .jvt_base_i (jvt_base), - .jvt_mode_i (jvt_mode), - .is_zcmt_o (is_zcmt_id_is), + .jvt_i (jvt), // DCACHE interfaces .dcache_req_ports_i(dcache_req_ports_cache_id), .dcache_req_ports_o(dcache_req_ports_id_cache) @@ -826,13 +830,12 @@ module cva6 .decoded_instr_valid_i (issue_entry_valid_id_issue), .is_ctrl_flow_i (is_ctrl_fow_id_issue), .decoded_instr_ack_o (issue_instr_issue_id), - .is_zcmt_i (is_zcmt_id_is), - .is_zcmt_o (is_zcmt_is_ex), // Functional Units .rs1_forwarding_o (rs1_forwarding_id_ex), .rs2_forwarding_o (rs2_forwarding_id_ex), .fu_data_o (fu_data_id_ex), .pc_o (pc_id_ex), + .is_zcmt_o (zcmt_id_ex), .is_compressed_instr_o (is_compressed_instr_id_ex), .tinst_o (tinst_ex), // fixed latency unit ready @@ -924,6 +927,7 @@ module cva6 .rs2_forwarding_i(rs2_forwarding_id_ex), .fu_data_i(fu_data_id_ex), .pc_i(pc_id_ex), + .is_zcmt_i(zcmt_id_ex), .is_compressed_instr_i(is_compressed_instr_id_ex), .tinst_i(tinst_ex), // fixed latency units @@ -1034,8 +1038,7 @@ module cva6 .pmpaddr_i (pmpaddr), //RVFI .rvfi_lsu_ctrl_o (rvfi_lsu_ctrl), - .rvfi_mem_paddr_o (rvfi_mem_paddr), - .is_zcmt_i (is_zcmt_is_ex) + .rvfi_mem_paddr_o (rvfi_mem_paddr) ); // --------- @@ -1095,6 +1098,7 @@ module cva6 csr_regfile #( .CVA6Cfg (CVA6Cfg), .exception_t (exception_t), + .jvt_t (jvt_t), .irq_ctrl_t (irq_ctrl_t), .scoreboard_entry_t(scoreboard_entry_t), .rvfi_probes_csr_t (rvfi_probes_csr_t), @@ -1171,8 +1175,7 @@ module cva6 .pmpcfg_o (pmpcfg), .pmpaddr_o (pmpaddr), .mcountinhibit_o (mcountinhibit_csr_perf), - .jvt_base_o (jvt_base), - .jvt_mode_o (jvt_mode), + .jvt_o (jvt), //RVFI .rvfi_csr_o (rvfi_csr) ); @@ -1277,8 +1280,7 @@ module cva6 dcache_req_o_t [NumPorts-1:0] dcache_req_from_cache; // D$ request - // D$ request - if (CVA6Cfg.RVZCMT) begin + if (CVA6Cfg.RVZCMT & ~(CVA6Cfg.MmuPresent)) begin // Cache port 0 is ultilize in implicit read access in ZCMT extension. Therefore, MMU should be turn off. assign dcache_req_to_cache[0] = dcache_req_ports_id_cache; end else begin assign dcache_req_to_cache[0] = dcache_req_ports_ex_cache[0]; @@ -1289,7 +1291,7 @@ module cva6 dcache_req_ports_acc_cache[1]; // D$ response - if (CVA6Cfg.RVZCMT) begin + if (CVA6Cfg.RVZCMT & ~(CVA6Cfg.MmuPresent)) begin // Cache port 0 is ultilize in implicit read access in ZCMT extension. Therefore, MMU should be turn off. assign dcache_req_ports_cache_id = dcache_req_from_cache[0]; end else begin assign dcache_req_ports_cache_ex[0] = dcache_req_from_cache[0]; diff --git a/core/cva6_rvfi.sv b/core/cva6_rvfi.sv index 944f5cbbd6..02bd564f76 100644 --- a/core/cva6_rvfi.sv +++ b/core/cva6_rvfi.sv @@ -344,7 +344,7 @@ module cva6_rvfi `CONNECT_RVFI_FULL(CVA6Cfg.FpPresent, fflags, csr.fcsr_q.fflags) `CONNECT_RVFI_FULL(CVA6Cfg.FpPresent, frm, csr.fcsr_q.frm) `CONNECT_RVFI_FULL(CVA6Cfg.FpPresent, fcsr, { csr.fcsr_q.frm `COMMA csr.fcsr_q.fflags}) - `CONNECT_RVFI_FULL(1, jvt, { csr.jvt_q.base `COMMA csr.jvt_q.mode}) + `CONNECT_RVFI_FULL(CVA6Cfg.RVZCMT, jvt, { csr.jvt_q.base `COMMA csr.jvt_q.mode}) `CONNECT_RVFI_FULL(CVA6Cfg.FpPresent, ftran, csr.fcsr_q.fprec) `CONNECT_RVFI_SAME(CVA6Cfg.FpPresent, dcsr) diff --git a/core/decoder.sv b/core/decoder.sv index 55b4de761a..015d95c81e 100644 --- a/core/decoder.sv +++ b/core/decoder.sv @@ -87,8 +87,7 @@ module decoder // Is a control flow instruction - ISSUE_STAGE output logic is_control_flow_instr_o, //zcmt instruction - input logic is_zcmt_i, - output logic is_zcmt_o + input logic is_zcmt_i ); logic illegal_instr; logic illegal_instr_bm; @@ -181,10 +180,10 @@ module decoder instruction_o.use_zimm = 1'b0; instruction_o.bp = branch_predict_i; instruction_o.vfp = 1'b0; + instruction_o.is_zcmt = is_zcmt_i; ecall = 1'b0; ebreak = 1'b0; check_fprm = 1'b0; - is_zcmt_o = 1'b0; if (~ex_i.valid) begin case (instr.rtype.opcode) @@ -1408,7 +1407,6 @@ module decoder imm_select = JIMM; instruction_o.rd = instr.utype.rd; is_control_flow_instr_o = 1'b1; - is_zcmt_o = is_zcmt_i; end riscv::OpcodeAuipc: begin diff --git a/core/ex_stage.sv b/core/ex_stage.sv index 4d7338586a..6c3da27a06 100644 --- a/core/ex_stage.sv +++ b/core/ex_stage.sv @@ -47,6 +47,8 @@ module ex_stage input fu_data_t [CVA6Cfg.NrIssuePorts-1:0] fu_data_i, // PC of the current instruction - ISSUE_STAGE input logic [CVA6Cfg.VLEN-1:0] pc_i, + // is_zcmt instruction + input logic is_zcmt_i, // Report whether instruction is compressed - ISSUE_STAGE input logic is_compressed_instr_i, // Report instruction encoding - ISSUE_STAGE @@ -228,9 +230,7 @@ module ex_stage // Information dedicated to RVFI - RVFI output lsu_ctrl_t rvfi_lsu_ctrl_o, // Information dedicated to RVFI - RVFI - output [CVA6Cfg.PLEN-1:0] rvfi_mem_paddr_o, - //zcmt instruction - input logic is_zcmt_i + output [CVA6Cfg.PLEN-1:0] rvfi_mem_paddr_o ); // ------------------------- @@ -322,6 +322,7 @@ module ex_stage .debug_mode_i, .fu_data_i (one_cycle_data), .pc_i, + .is_zcmt_i, .is_compressed_instr_i, .branch_valid_i (|branch_valid_i), .branch_comp_res_i (alu_branch_res), @@ -329,8 +330,7 @@ module ex_stage .branch_predict_i, .resolved_branch_o, .resolve_branch_o, - .branch_exception_o(flu_exception_o), - .is_zcmt_i (is_zcmt_i) + .branch_exception_o(flu_exception_o) ); // 3. CSR (sequential) diff --git a/core/id_stage.sv b/core/id_stage.sv index b318856e98..3196ef76e7 100644 --- a/core/id_stage.sv +++ b/core/id_stage.sv @@ -20,6 +20,7 @@ module id_stage #( parameter type dcache_req_o_t = logic, parameter type exception_t = logic, parameter type fetch_entry_t = logic, + parameter type jvt_t = logic, parameter type irq_ctrl_t = logic, parameter type scoreboard_entry_t = logic, parameter type interrupts_t = logic, @@ -85,10 +86,8 @@ module id_stage #( // CVXIF Compressed interface input logic [CVA6Cfg.XLEN-1:0] hart_id_i, input logic compressed_ready_i, - //JVT base - input logic [CVA6Cfg.XLEN-1:6] jvt_base_i, - input logic [5:0] jvt_mode_i, - output logic is_zcmt_o, + //JVT + input jvt_t jvt_i, input x_compressed_resp_t compressed_resp_i, output logic compressed_valid_o, output x_compressed_req_t compressed_req_o, @@ -112,26 +111,24 @@ module id_stage #( logic [CVA6Cfg.NrIssuePorts-1:0] is_illegal; logic [CVA6Cfg.NrIssuePorts-1:0] is_illegal_cmp; - logic [CVA6Cfg.NrIssuePorts-1:0] is_illegal_cvxif; - logic [CVA6Cfg.NrIssuePorts-1:0][31:0] instruction; - logic [CVA6Cfg.NrIssuePorts-1:0][31:0] compressed_instr; - logic [CVA6Cfg.NrIssuePorts-1:0][31:0] instruction_cvxif; - logic [CVA6Cfg.NrIssuePorts-1:0] is_compressed; - logic [CVA6Cfg.NrIssuePorts-1:0] is_compressed_cmp; - logic [CVA6Cfg.NrIssuePorts-1:0] is_compressed_cvxif; + logic [CVA6Cfg.NrIssuePorts-1:0] is_illegal_cvxif, is_illegal_cvxif_zcmp, is_illegal_cvxif_zcmt; + logic [CVA6Cfg.NrIssuePorts-1:0][31:0] instruction; + logic [CVA6Cfg.NrIssuePorts-1:0][31:0] compressed_instr; + logic [CVA6Cfg.NrIssuePorts-1:0][31:0] + instruction_cvxif, instruction_cvxif_zcmp, instruction_cvxif_zcmt; + logic [CVA6Cfg.NrIssuePorts-1:0] is_compressed; + logic [CVA6Cfg.NrIssuePorts-1:0] is_compressed_cmp; + logic [CVA6Cfg.NrIssuePorts-1:0] + is_compressed_cvxif, is_compressed_cvxif_zcmp, is_compressed_cvxif_zcmt; - logic [CVA6Cfg.NrIssuePorts-1:0] is_macro_instr_i; - logic stall_instr_fetch; - logic stall_macro_deco; - logic is_last_macro_instr_o; - logic is_double_rd_macro_instr_o; - logic [CVA6Cfg.NrIssuePorts-1:0] is_zcmt_instr_i; - branchpredict_sbe_t branch_predict; - logic is_zcmt; - logic is_zcmt_q, is_zcmt_n, is_zcmt_o2; - - assign is_zcmt_n = is_zcmt_o2; - assign is_zcmt_o = is_zcmt_q; + logic [CVA6Cfg.NrIssuePorts-1:0] is_macro_instr_i; + logic stall_instr_fetch; + logic stall_macro_deco, stall_macro_deco_zcmp, stall_macro_deco_zcmt; + logic is_last_macro_instr_o; + logic is_double_rd_macro_instr_o; + logic [CVA6Cfg.NrIssuePorts-1:0] is_zcmt_instr_i; + branchpredict_sbe_t branch_predict; + logic is_zcmt; if (CVA6Cfg.RVC) begin // --------------------------------------------------------- @@ -149,7 +146,7 @@ module id_stage #( .is_zcmt_instr_o (is_zcmt_instr_i[i]) ); end - if (CVA6Cfg.RVZCMP) begin + if (CVA6Cfg.RVZCMP || (CVA6Cfg.RVZCMT & ~CVA6Cfg.MmuPresent)) begin //MMU should be off when using ZCMT //sequencial decoder macro_decoder #( .CVA6Cfg(CVA6Cfg) @@ -158,47 +155,21 @@ module id_stage #( .is_macro_instr_i (is_macro_instr_i[0]), .clk_i (clk_i), .rst_ni (rst_ni), - .instr_o (instruction_cvxif[0]), + .instr_o (instruction_cvxif_zcmp[0]), .illegal_instr_i (is_illegal[0]), .is_compressed_i (is_compressed[0]), .issue_ack_i (issue_instr_ack_i[0]), - .illegal_instr_o (is_illegal_cvxif[0]), - .is_compressed_o (is_compressed_cvxif[0]), - .fetch_stall_o (stall_macro_deco), + .illegal_instr_o (is_illegal_cvxif_zcmp[0]), + .is_compressed_o (is_compressed_cvxif_zcmp[0]), + .fetch_stall_o (stall_macro_deco_zcmp), .is_last_macro_instr_o (is_last_macro_instr_o), .is_double_rd_macro_instr_o(is_double_rd_macro_instr_o) ); - if (CVA6Cfg.SuperscalarEn) begin - assign instruction_cvxif[CVA6Cfg.NrIssuePorts-1] = '0; - assign is_illegal_cvxif[CVA6Cfg.NrIssuePorts-1] = '0; - assign is_compressed_cvxif[CVA6Cfg.NrIssuePorts-1] = '0; - end - cvxif_compressed_if_driver #( - .CVA6Cfg(CVA6Cfg), - .x_compressed_req_t(x_compressed_req_t), - .x_compressed_resp_t(x_compressed_resp_t) - ) i_cvxif_compressed_if_driver_i ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .hart_id_i (hart_id_i), - .is_compressed_i (is_compressed_cvxif), - .is_illegal_i (is_illegal_cvxif), - .instruction_i (instruction_cvxif), - .is_compressed_o (is_compressed_cmp), - .is_illegal_o (is_illegal_cmp), - .instruction_o (instruction), - .stall_i (stall_macro_deco), - .stall_o (stall_instr_fetch), - .compressed_ready_i(compressed_ready_i), - .compressed_resp_i (compressed_resp_i), - .compressed_valid_o(compressed_valid_o), - .compressed_req_o (compressed_req_o) - ); - end else if (CVA6Cfg.RVZCMT) begin zcmt_decoder #( .CVA6Cfg(CVA6Cfg), .dcache_req_i_t(dcache_req_i_t), .dcache_req_o_t(dcache_req_o_t), + .jvt_t(jvt_t), .branchpredict_sbe_t(branchpredict_sbe_t) ) zcmt_decoder_i ( .instr_i (compressed_instr[0]), @@ -206,19 +177,21 @@ module id_stage #( .is_zcmt_instr_i(is_zcmt_instr_i[0]), .clk_i (clk_i), .rst_ni (rst_ni), - .instr_o (instruction_cvxif[0]), + .instr_o (instruction_cvxif_zcmt[0]), .illegal_instr_i(is_illegal[0]), .is_compressed_i(is_compressed[0]), - .issue_ack_i (issue_instr_ack_i[0]), - .illegal_instr_o(is_illegal_cvxif[0]), - .is_compressed_o(is_compressed_cvxif[0]), - .fetch_stall_o (stall_macro_deco), - .jvt_base_i (jvt_base_i), - .jvt_mode_i (jvt_mode_i), - .is_zcmt_o (is_zcmt), + .illegal_instr_o(is_illegal_cvxif_zcmt[0]), + .is_compressed_o(is_compressed_cvxif_zcmt[0]), + .fetch_stall_o (stall_macro_deco_zcmt), + .jvt_i (jvt_i), .req_port_i (dcache_req_ports_i), .req_port_o (dcache_req_ports_o) ); + + assign instruction_cvxif[0] = is_zcmt_instr_i[0] ? instruction_cvxif_zcmt[0] : instruction_cvxif_zcmp[0]; + assign is_illegal_cvxif[0] = is_zcmt_instr_i[0] ? is_illegal_cvxif_zcmt[0] : is_illegal_cvxif_zcmp[0]; + assign is_compressed_cvxif[0] = is_zcmt_instr_i[0] ? is_compressed_cvxif_zcmt[0] : is_compressed_cvxif_zcmp[0]; + assign stall_macro_deco = is_zcmt_instr_i[0] ? stall_macro_deco_zcmt : stall_macro_deco_zcmp; if (CVA6Cfg.SuperscalarEn) begin assign instruction_cvxif[CVA6Cfg.NrIssuePorts-1] = '0; assign is_illegal_cvxif[CVA6Cfg.NrIssuePorts-1] = '0; @@ -307,8 +280,7 @@ module id_stage #( .pc_i (fetch_entry_i[i].address), .is_compressed_i (is_compressed_cmp[i]), .is_macro_instr_i (is_macro_instr_i[i]), - .is_zcmt_i (is_zcmt), - .is_zcmt_o (is_zcmt_o2), + .is_zcmt_i (is_zcmt_instr_i[i]), .is_last_macro_instr_i (is_last_macro_instr_o), .is_double_rd_macro_instr_i(is_double_rd_macro_instr_o), .is_illegal_i (is_illegal_cmp[i]), @@ -414,11 +386,9 @@ module id_stage #( // ------------------------- always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - issue_q <= '0; - is_zcmt_q <= '0; + issue_q <= '0; end else begin - issue_q <= issue_n; - is_zcmt_q <= is_zcmt_n; + issue_q <= issue_n; end end endmodule diff --git a/core/include/cv32a60x_config_pkg.sv b/core/include/cv32a60x_config_pkg.sv index 65c04bf5e5..7cf0af9b43 100644 --- a/core/include/cv32a60x_config_pkg.sv +++ b/core/include/cv32a60x_config_pkg.sv @@ -44,7 +44,7 @@ package cva6_config_pkg; RVH: bit'(0), RVZCMT: bit'(1), RVZCB: bit'(1), - RVZCMP: bit'(0), + RVZCMP: bit'(1), XFVec: bit'(0), CvxifEn: bit'(1), RVZiCond: bit'(0), diff --git a/core/include/riscv_pkg.sv b/core/include/riscv_pkg.sv index 762d422e4f..5e29330277 100644 --- a/core/include/riscv_pkg.sv +++ b/core/include/riscv_pkg.sv @@ -727,8 +727,6 @@ package riscv; localparam logic [63:0] SSTATUS_UPIE = 'h00000010; localparam logic [63:0] SSTATUS_UXL = 64'h0000000300000000; // CSR Bit Implementation Masks - // A mask bit of '1' means a flipflop is implemented. - parameter CSR_JVT_MASK = 32'hFFFFFFC0; function automatic logic [63:0] sstatus_sd(logic IS_XLEN64); return {IS_XLEN64, 31'h00000000, ~IS_XLEN64, 31'h00000000}; @@ -866,12 +864,11 @@ package riscv; } dcsr_t; //jvt struct + parameter JVT_ADDR_WIDTH = XLEN - 6; typedef struct packed { - logic [XLEN-1:6] base; + logic [JVT_ADDR_WIDTH-1:0] base; logic [5:0] mode; } jvt_t; - parameter JVT_ADDR_WIDTH = XLEN - 6; - parameter JVT_RESET_VAL = 32'd0; //32 bit // Instruction Generation *incomplete* function automatic logic [31:0] jal(logic [4:0] rd, logic [20:0] imm); // OpCode Jal diff --git a/core/issue_read_operands.sv b/core/issue_read_operands.sv index b11b2a86ba..1c1a904719 100644 --- a/core/issue_read_operands.sv +++ b/core/issue_read_operands.sv @@ -56,6 +56,8 @@ module issue_read_operands output logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs2_forwarding_o, // Program Counter - EX_STAGE output logic [CVA6Cfg.VLEN-1:0] pc_o, + //is zcmt + output logic is_zcmt_o, // Is compressed instruction - EX_STAGE output logic is_compressed_instr_o, // Fixed Latency Unit is ready - EX_STAGE @@ -119,11 +121,8 @@ module issue_read_operands input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_i, // FPR write enable - COMMIT_STAGE input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_i, - // Issue stall - PERF_COUNTERS - output logic stall_issue_o, - input logic is_zcmt_i, - output logic is_zcmt_o + output logic stall_issue_o ); localparam OPERANDS_PER_INSTR = CVA6Cfg.NrRgprPorts / CVA6Cfg.NrIssuePorts; @@ -192,9 +191,6 @@ module issue_read_operands // forwarding signals logic [CVA6Cfg.NrIssuePorts-1:0] forward_rs1, forward_rs2, forward_rs3; - logic is_zcmt_n, is_zcmt_q; - assign is_zcmt_n = is_zcmt_i; - assign is_zcmt_o = is_zcmt_q; // original instruction riscv::instruction_t orig_instr; @@ -1106,13 +1102,13 @@ module issue_read_operands tinst_q <= '0; end pc_o <= '0; + is_zcmt_o <= '0; is_compressed_instr_o <= 1'b0; branch_predict_o <= {cf_t'(0), {CVA6Cfg.VLEN{1'b0}}}; x_transaction_rejected_o <= 1'b0; - is_zcmt_q <= 1'b0; end else begin fu_data_q <= fu_data_n; - is_zcmt_q <= is_zcmt_n; + is_zcmt_o <= issue_instr_i[0].is_zcmt; if (CVA6Cfg.RVH) begin tinst_q <= tinst_n; end diff --git a/core/issue_stage.sv b/core/issue_stage.sv index f111805433..55a8ef5622 100644 --- a/core/issue_stage.sv +++ b/core/issue_stage.sv @@ -60,6 +60,8 @@ module issue_stage output fu_data_t [CVA6Cfg.NrIssuePorts-1:0] fu_data_o, // Program Counter - EX_STAGE output logic [CVA6Cfg.VLEN-1:0] pc_o, + //is zcmt instruction + output logic is_zcmt_o, // Is compressed instruction - EX_STAGE output logic is_compressed_instr_o, // Transformed trap instruction - EX_STAGE @@ -157,10 +159,7 @@ module issue_stage // Information dedicated to RVFI - RVFI output logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] rvfi_issue_pointer_o, // Information dedicated to RVFI - RVFI - output logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] rvfi_commit_pointer_o, - //zcmt instruction - input logic is_zcmt_i, - output logic is_zcmt_o + output logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] rvfi_commit_pointer_o ); // --------------------------------------------------- // Scoreboard (SB) <-> Issue and Read Operands (IRO) @@ -189,7 +188,6 @@ module issue_stage assign issue_instr_o = issue_instr_sb_iro[0]; assign issue_instr_hs_o = issue_instr_valid_sb_iro[0] & issue_ack_iro_sb[0]; - assign is_zcmt_o = is_zcmt_i; logic x_transaction_accepted_iro_sb, x_issue_writeback_iro_sb; logic [CVA6Cfg.TRANS_ID_BITS-1:0] x_id_iro_sb; @@ -267,6 +265,7 @@ module issue_stage .rs1_forwarding_o (rs1_forwarding_xlen), .rs2_forwarding_o (rs2_forwarding_xlen), .pc_o, + .is_zcmt_o (is_zcmt_o), .is_compressed_instr_o, .flu_ready_i (flu_ready_i), .alu_valid_o (alu_valid_o), @@ -303,9 +302,7 @@ module issue_stage .wdata_i, .we_gpr_i, .we_fpr_i, - .stall_issue_o, - .is_zcmt_i (is_zcmt_i), - .is_zcmt_o (is_zcmt_o) + .stall_issue_o ); endmodule diff --git a/core/zcmt_decoder.sv b/core/zcmt_decoder.sv index 68ea0ffbe8..9cc8e81911 100644 --- a/core/zcmt_decoder.sv +++ b/core/zcmt_decoder.sv @@ -1,204 +1,107 @@ // Author: Farhan Ali Shah, 10xEngineers // Date: 15.11.2024 -// Description: ZCMT Extension - +// Description: ZCMT extension in the CVA6 core targeting the 32-bit embedded-class platforms (CV32A60x). +// ZCMT is a code-size reduction feature that utilizes compressed table jump instructions (cm.jt and cm.jalt) to +//reduce code size for embedded systems +// module zcmt_decoder #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter type dcache_req_i_t = logic, parameter type dcache_req_o_t = logic, + parameter type jvt_t = logic, parameter type branchpredict_sbe_t = logic ) ( input logic clk_i, // Clock - input logic rst_ni, // Synchronous reset - input logic [31:0] instr_i, // instruction + input logic rst_ni, // Synchronous reset + input logic [31:0] instr_i, // instruction input logic [CVA6Cfg.VLEN-1:0] pc_i, // PC input logic is_zcmt_instr_i, // Intruction is of macro extension input logic illegal_instr_i, // From compressed decoder input logic is_compressed_i, // is compressed instruction - input logic issue_ack_i, // Check if the intruction is acknowledged - input logic [CVA6Cfg.XLEN-1:6] jvt_base_i, // JVT CSR base - input logic [5:0] jvt_mode_i, // JVT CSR mode + input jvt_t jvt_i, input dcache_req_o_t req_port_i, // Data cache request ouput - CACHE output logic [31:0] instr_o, // Instruction out output logic illegal_instr_o, // Illegel instruction output logic is_compressed_o, // is compressed instruction output logic fetch_stall_o, // Wait while address fetched from table - output logic is_zcmt_o, // is zcmt instruction output dcache_req_i_t req_port_o // Data cache request input - CACHE ); // FSM States - enum logic [1:0] { - IDLE, // Wait for ZCMT instruction - REQ_SENT, // Request sent to fetch the entry from jump table - TABLE_FETCH, // Check the valid data from jump table and record - JUMP // Calculate the offset for jump and create jal instruction + enum logic { + IDLE, // if ZCMT instruction then request sent to fetch the entry from jump table + TABLE_JUMP // Check the valid data from jump table and Calculate the offset for jump and create jal instruction } state_d, state_q; - - //zcmt instruction type - enum logic [1:0] { - NOT_ZCMT, // 00: Not a ZCMT instrcution - JT, // 01: cm.jt instruction - JALT // 10: cm.jalt instruction - } zcmt_instr_type; - // Temporary registers - logic [31:0] instr_o_reg; logic [7:0] index; //index of instruction - logic [CVA6Cfg.XLEN-1:0] table_address; //Physical address: jvt + (index <<2) - logic [CVA6Cfg.XLEN+1:0] table_a; //Virtual address: {00,Physical address} - logic [31:0] jvt_table_add; - logic [CVA6Cfg.XLEN-1:0] data_rdata_d, data_rdata_q; //data received from instruction memory - logic [20:0] jump_add; //jump address immidiate - - assign instr_o = instr_o_reg; //instruction output assigned + //Physical address: jvt + (index <<2) + logic [CVA6Cfg.XLEN+1:0] table_address; //Virtual address: {00,Physical address} + logic [20:0] jump_addr; //jump address immidiate always_comb begin - state_d = state_q; - data_rdata_d = data_rdata_q; - illegal_instr_o = 1'b0; - is_compressed_o = is_zcmt_instr_i ? 1'b1 : is_compressed_i; - illegal_instr_o = 1'b0; - is_zcmt_o = 1'b0; - fetch_stall_o = is_zcmt_instr_i ? 1'b1 : 0; - - if (is_zcmt_instr_i) begin - unique case (instr_i[12:10]) - //zcmt instruction - 3'b000: begin - if (instr_i[9:2] < 32) begin //JT instruction - zcmt_instr_type = JT; - index = instr_i[9:2]; - end else if (instr_i[9:2] >= 32 & instr_i[9:2] <= 32) begin //JALT instruction - zcmt_instr_type = JALT; - index = instr_i[9:2]; - end else begin - zcmt_instr_type = NOT_ZCMT; //NOT ZCMT instruction - illegal_instr_o = 1'b1; - instr_o_reg = instr_i; - end - end - end - end - end - end - end - end - end - end - end - end - default: begin - illegal_instr_o = 1'b1; - instr_o_reg = instr_i; - zcmt_instr_type = NOT_ZCMT; - end - endcase - end else begin - illegal_instr_o = illegal_instr_i; - instr_o_reg = instr_i; - end + state_d = state_q; + illegal_instr_o = 1'b0; + is_compressed_o = is_zcmt_instr_i ? 1'b1 : is_compressed_i; + fetch_stall_o = is_zcmt_instr_i ? 1'b1 : 0; + + //cache request port + req_port_o.data_wdata = 1'b0; + req_port_o.data_wuser = '0; + req_port_o.data_req = 1'b0; + req_port_o.data_we = 1'b0; + req_port_o.data_be = 1'b0; + req_port_o.data_size = 2'b10; + req_port_o.data_id = 1; + req_port_o.kill_req = 0; + req_port_o.tag_valid = 1; unique case (state_q) IDLE: begin if (is_zcmt_instr_i) begin - state_d = REQ_SENT; + if (CVA6Cfg.XLEN == 32) begin //It is only target for 32 bit targets in cva6 with No MMU + table_address = {2'b00, ({jvt_i.base, jvt_i.mode} + (instr_i[9:2] << 2))}; + req_port_o.address_index = table_address[9:0]; + req_port_o.address_tag = table_address[33:10]; + state_d = TABLE_JUMP; + req_port_o.data_req = 1'b1; + end else illegal_instr_o = 1'b1; + //Condition may be extented for 64 bits embedded targets with No MMU end else begin - state_d = IDLE; + illegal_instr_o = illegal_instr_i; + instr_o = instr_i; + state_d = IDLE; end end - REQ_SENT: begin - state_d = TABLE_FETCH; - case (zcmt_instr_type) - JT: begin - if (CVA6Cfg.XLEN == 32) begin - jvt_table_add = {jvt_base_i[31:6], 6'b000000}; - table_address = jvt_table_add + (index << 2); - table_a = {2'b00, table_address[CVA6Cfg.XLEN-1:0]}; - req_port_o.address_index = table_a[9:0]; - req_port_o.address_tag = table_a[33:10]; - req_port_o.data_wdata = 1'b0; - req_port_o.data_wuser = '0; - req_port_o.data_req = 1'b1; - req_port_o.data_we = 1'b0; - req_port_o.data_be = 1'b0; - req_port_o.data_size = 2'b10; - req_port_o.data_id = 1; - req_port_o.kill_req = 0; - req_port_o.tag_valid = 1; - - end else if (CVA6Cfg.XLEN == 64) begin - jvt_table_add = {jvt_base_i[31:6], 6'b000000}; - table_address = jvt_table_add + (index << 3); - table_a = {2'b00, table_address[CVA6Cfg.XLEN-1:0]}; - // will will completed in future( for 64 bit embedded core) - illegal_instr_o = 1'b1; - end else begin - illegal_instr_o = 1'b1; - instr_o_reg = instr_i; - end - end - JALT: begin - if (CVA6Cfg.XLEN == 32) begin - jvt_table_add = {jvt_base_i[31:6], 6'b000000}; - table_address = jvt_table_add + (index << 2); - table_a = {2'b00, table_address[CVA6Cfg.XLEN-1:0]}; - req_port_o.address_index = table_a[9:0]; - req_port_o.address_tag = table_a[33:10]; - req_port_o.data_wdata = 1'b0; - req_port_o.data_wuser = '0; - req_port_o.data_req = 1'b1; - req_port_o.data_we = 1'b0; - req_port_o.data_be = 1'b0; - req_port_o.data_size = 2'b10; - req_port_o.data_id = 1; - req_port_o.kill_req = 0; - req_port_o.tag_valid = 1; - - end else if (CVA6Cfg.XLEN == 64) begin - jvt_table_add = {jvt_base_i[31:6], 6'b000000}; - table_address = jvt_table_add + (index << 3); - table_a = {2'b00, table_address[CVA6Cfg.XLEN-1:0]}; - // will will completed in future( for 64 bit embedded core) - illegal_instr_o = 1'b1; - end else begin - illegal_instr_o = 1'b1; - instr_o_reg = instr_i; - end - end - default: state_d = IDLE; - endcase - end - TABLE_FETCH: begin - if (req_port_i.data_rid & req_port_i.data_rvalid) begin - data_rdata_d = req_port_i.data_rdata; - state_d = JUMP; - end else begin - state_d = TABLE_FETCH; - end - end - end - JUMP: begin - if (issue_ack_i) begin - - jump_add = $unsigned($signed(data_rdata_q) - $signed(pc_i)); - - if (zcmt_instr_type == JT) begin - instr_o_reg = { - jump_add[20], jump_add[10:1], jump_add[11], jump_add[19:12], 5'h0, riscv::OpcodeJal - }; //- jal pc_offset, x0 - end else if (zcmt_instr_type == JALT) begin - instr_o_reg = { - jump_add[20], jump_add[10:1], jump_add[11], jump_add[19:12], 5'h1, riscv::OpcodeJal + TABLE_JUMP: begin + if (req_port_i.data_rvalid) begin + jump_addr = $unsigned($signed(req_port_i.data_rdata) - $signed(pc_i)); + if (instr_i[9:2] < 32) begin //- jal pc_offset, x0 for no return stack + instr_o = { + jump_addr[20], + jump_addr[10:1], + jump_addr[11], + jump_addr[19:12], + 5'h0, + riscv::OpcodeJal + }; + end else if ((instr_i[9:2] >= 32) & (instr_i[9:2] <= 255)) begin //- jal pc_offset, x1 for return stack + instr_o = { + jump_addr[20], + jump_addr[10:1], + jump_addr[11], + jump_addr[19:12], + 5'h1, + riscv::OpcodeJal }; + end else begin + illegal_instr_o = 1'b1; + instr_o = instr_i; end - - is_zcmt_o = 1'b1; - state_d = IDLE; + state_d = IDLE; end else begin - state_d = JUMP; + state_d = TABLE_JUMP; end end default: begin @@ -209,17 +112,10 @@ module zcmt_decoder #( always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - state_q <= IDLE; - data_rdata_q <= '0; + state_q <= IDLE; end else begin - state_q <= state_d; - data_rdata_q <= data_rdata_d; - - end - end - end - end + state_q <= state_d; end end endmodule diff --git a/verif/tests/custom/zcmt/cm_jalt_ret.S b/verif/tests/custom/zcmt/cm_jalt_ret.S index f6586d60a0..82eaf36cc4 100644 --- a/verif/tests/custom/zcmt/cm_jalt_ret.S +++ b/verif/tests/custom/zcmt/cm_jalt_ret.S @@ -43,7 +43,7 @@ __jvt_base$: # Target Addresses (Where cm.jalt will jump) target0: - li x5, 99 + li x5, 9 j write_tohost target1: li x2, 99