Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some Core RTL bug fixes and CVFPU 0.8.1 vendorization #860

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions bhv/cv32e40p_tb_wrapper.sv
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ module cv32e40p_tb_wrapper
.apu_en_i (cv32e40p_top_i.apu_req),
.apu_singlecycle_i(cv32e40p_top_i.core_i.ex_stage_i.apu_singlecycle),
.apu_multicycle_i (cv32e40p_top_i.core_i.ex_stage_i.apu_multicycle),
.apu_rvalid_i (cv32e40p_top_i.apu_rvalid)
.apu_rvalid_i (cv32e40p_top_i.core_i.ex_stage_i.apu_valid)
);
`endif

Expand Down Expand Up @@ -344,7 +344,7 @@ module cv32e40p_tb_wrapper
// APU
.apu_req_i (cv32e40p_top_i.core_i.apu_req_o),
.apu_gnt_i (cv32e40p_top_i.core_i.apu_gnt_i),
.apu_rvalid_i(cv32e40p_top_i.core_i.apu_rvalid_i),
.apu_rvalid_i(cv32e40p_top_i.core_i.ex_stage_i.apu_valid),

// Controller FSM probes
.ctrl_fsm_cs_i(cv32e40p_top_i.core_i.id_stage_i.controller_i.ctrl_fsm_cs),
Expand Down
5 changes: 5 additions & 0 deletions rtl/cv32e40p_apu_disp.sv
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ module cv32e40p_apu_disp (
input logic [2:0][5:0] read_regs_i,
input logic [2:0] read_regs_valid_i,
output logic read_dep_o,
output logic read_dep_for_jalr_o,

input logic [1:0][5:0] write_regs_i,
input logic [1:0] write_regs_valid_i,
Expand Down Expand Up @@ -189,6 +190,10 @@ module cv32e40p_apu_disp (
assign read_dep_o = (read_dep_req | read_dep_inflight | read_dep_waiting) & is_decoding_i;
assign write_dep_o = (write_dep_req | write_dep_inflight | write_dep_waiting) & is_decoding_i;

assign read_dep_for_jalr_o = is_decoding_i & ((|read_deps_req & enable_i) |
(|read_deps_inflight & valid_inflight) |
(|read_deps_waiting & valid_waiting));

//
// Stall signals
//
Expand Down
9 changes: 7 additions & 2 deletions rtl/cv32e40p_controller.sv
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
module cv32e40p_controller import cv32e40p_pkg::*;
#(
parameter COREV_CLUSTER = 0,
parameter COREV_PULP = 1
parameter COREV_PULP = 0,
parameter FPU = 0
)
(
input logic clk, // Gated clock
Expand Down Expand Up @@ -104,6 +105,7 @@ module cv32e40p_controller import cv32e40p_pkg::*;
// APU dependency checks
input logic apu_en_i,
input logic apu_read_dep_i,
input logic apu_read_dep_for_jalr_i,
input logic apu_write_dep_i,

output logic apu_stall_o,
Expand Down Expand Up @@ -1338,7 +1340,10 @@ endgenerate
if ((ctrl_transfer_insn_in_dec_i == BRANCH_JALR) &&
(((regfile_we_wb_i == 1'b1) && (reg_d_wb_is_reg_a_i == 1'b1)) ||
((regfile_we_ex_i == 1'b1) && (reg_d_ex_is_reg_a_i == 1'b1)) ||
((regfile_alu_we_fw_i == 1'b1) && (reg_d_alu_is_reg_a_i == 1'b1))) )
((regfile_alu_we_fw_i == 1'b1) && (reg_d_alu_is_reg_a_i == 1'b1)) ||
(FPU && (apu_read_dep_for_jalr_i == 1'b1))
)
)
begin
jr_stall_o = 1'b1;
deassert_we_o = 1'b1;
Expand Down
40 changes: 22 additions & 18 deletions rtl/cv32e40p_core.sv
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ module cv32e40p_core
logic [ 2:0][ 5:0] apu_read_regs;
logic [ 2:0] apu_read_regs_valid;
logic apu_read_dep;
logic apu_read_dep_for_jalr;
logic [ 1:0][ 5:0] apu_write_regs;
logic [ 1:0] apu_write_regs_valid;
logic apu_write_dep;
Expand Down Expand Up @@ -361,7 +362,6 @@ module cv32e40p_core

// APU master signals
assign apu_flags_o = apu_flags_ex;
assign fflags_csr = apu_flags_i;

//////////////////////////////////////////////////////////////////////////////////////////////
// ____ _ _ __ __ _ //
Expand Down Expand Up @@ -621,14 +621,15 @@ module cv32e40p_core
.apu_flags_ex_o (apu_flags_ex),
.apu_waddr_ex_o (apu_waddr_ex),

.apu_read_regs_o (apu_read_regs),
.apu_read_regs_valid_o (apu_read_regs_valid),
.apu_read_dep_i (apu_read_dep),
.apu_write_regs_o (apu_write_regs),
.apu_write_regs_valid_o(apu_write_regs_valid),
.apu_write_dep_i (apu_write_dep),
.apu_perf_dep_o (perf_apu_dep),
.apu_busy_i (apu_busy),
.apu_read_regs_o (apu_read_regs),
.apu_read_regs_valid_o (apu_read_regs_valid),
.apu_read_dep_i (apu_read_dep),
.apu_read_dep_for_jalr_i(apu_read_dep_for_jalr),
.apu_write_regs_o (apu_write_regs),
.apu_write_regs_valid_o (apu_write_regs_valid),
.apu_write_dep_i (apu_write_dep),
.apu_perf_dep_o (perf_apu_dep),
.apu_busy_i (apu_busy),

// CSR ID/EX
.csr_access_ex_o (csr_access_ex),
Expand Down Expand Up @@ -779,23 +780,27 @@ module cv32e40p_core

.mult_multicycle_o(mult_multicycle), // to ID/EX pipe registers

.data_misaligned_ex_i(data_misaligned_ex), // from ID/EX pipeline
.data_misaligned_i (data_misaligned),

// FPU
.fpu_fflags_we_o(fflags_we),
.fpu_fflags_o (fflags_csr),

// APU
.apu_en_i (apu_en_ex),
.apu_op_i (apu_op_ex),
.apu_lat_i (apu_lat_ex),
.apu_operands_i(apu_operands_ex),
.apu_waddr_i (apu_waddr_ex),
.apu_flags_i (apu_flags_ex),

.apu_read_regs_i (apu_read_regs),
.apu_read_regs_valid_i (apu_read_regs_valid),
.apu_read_dep_o (apu_read_dep),
.apu_write_regs_i (apu_write_regs),
.apu_write_regs_valid_i(apu_write_regs_valid),
.apu_write_dep_o (apu_write_dep),
.apu_read_regs_i (apu_read_regs),
.apu_read_regs_valid_i (apu_read_regs_valid),
.apu_read_dep_o (apu_read_dep),
.apu_read_dep_for_jalr_o(apu_read_dep_for_jalr),
.apu_write_regs_i (apu_write_regs),
.apu_write_regs_valid_i (apu_write_regs_valid),
.apu_write_dep_o (apu_write_dep),

.apu_perf_type_o(perf_apu_type),
.apu_perf_cont_o(perf_apu_cont),
Expand All @@ -813,6 +818,7 @@ module cv32e40p_core
// response channel
.apu_rvalid_i (apu_rvalid_i),
.apu_result_i (apu_result_i),
.apu_flags_i (apu_flags_i),

.lsu_en_i (data_req_ex),
.lsu_rdata_i(lsu_rdata),
Expand Down Expand Up @@ -901,8 +907,6 @@ module cv32e40p_core
.data_misaligned_ex_i(data_misaligned_ex), // from ID/EX pipeline
.data_misaligned_o (data_misaligned),

.apu_busy_i(apu_busy),

.p_elw_start_o (p_elw_start),
.p_elw_finish_o(p_elw_finish),

Expand Down
152 changes: 92 additions & 60 deletions rtl/cv32e40p_ex_stage.sv
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,25 @@ module cv32e40p_ex_stage

output logic mult_multicycle_o,

input logic data_misaligned_ex_i,
input logic data_misaligned_i,

// FPU signals
output logic fpu_fflags_we_o,
output logic [APU_NUSFLAGS_CPU-1:0] fpu_fflags_o,

// APU signals
input logic apu_en_i,
input logic [ APU_WOP_CPU-1:0] apu_op_i,
input logic [ 1:0] apu_lat_i,
input logic [ APU_NARGS_CPU-1:0][31:0] apu_operands_i,
input logic [ 5:0] apu_waddr_i,
input logic [APU_NDSFLAGS_CPU-1:0] apu_flags_i,
input logic [APU_NUSFLAGS_CPU-1:0] apu_flags_i,

input logic [2:0][5:0] apu_read_regs_i,
input logic [2:0] apu_read_regs_valid_i,
output logic apu_read_dep_o,
output logic apu_read_dep_for_jalr_o,
input logic [1:0][5:0] apu_write_regs_i,
input logic [1:0] apu_write_regs_valid_i,
output logic apu_write_dep_o,
Expand Down Expand Up @@ -143,7 +148,7 @@ module cv32e40p_ex_stage
output logic branch_decision_o,

// Stall Control
input logic is_decoding_i, // Used to mask data Dependency inside the APU dispatcher in case of an istruction non valid
input logic is_decoding_i, // Used to mask data Dependency inside the APU dispatcher in case of an istruction non valid
input logic lsu_ready_ex_i, // EX part of LSU is done
input logic lsu_err_i,

Expand All @@ -152,29 +157,34 @@ module cv32e40p_ex_stage
input logic wb_ready_i // WB stage ready for new data
);

logic [31:0] alu_result;
logic [31:0] mult_result;
logic alu_cmp_result;
logic [ 31:0] alu_result;
logic [ 31:0] mult_result;
logic alu_cmp_result;

logic regfile_we_lsu;
logic [ 5:0] regfile_waddr_lsu;
logic regfile_we_lsu;
logic [ 5:0] regfile_waddr_lsu;

logic wb_contention;
logic wb_contention_lsu;
logic wb_contention;
logic wb_contention_lsu;

logic alu_ready;
logic mult_ready;
logic alu_ready;
logic mulh_active;
logic mult_ready;

// APU signals
logic apu_valid;
logic [ 5:0] apu_waddr;
logic [31:0] apu_result;
logic apu_stall;
logic apu_active;
logic apu_singlecycle;
logic apu_multicycle;
logic apu_req;
logic apu_gnt;
logic apu_valid;
logic [ 5:0] apu_waddr;
logic [ 31:0] apu_result;
logic apu_stall;
logic apu_active;
logic apu_singlecycle;
logic apu_multicycle;
logic apu_req;
logic apu_gnt;

logic apu_rvalid_q;
logic [ 31:0] apu_result_q;
logic [APU_NUSFLAGS_CPU-1:0] apu_flags_q;

// ALU write port mux
always_comb begin
Expand Down Expand Up @@ -295,9 +305,10 @@ module cv32e40p_ex_stage

.result_o(mult_result),

.multicycle_o(mult_multicycle_o),
.ready_o (mult_ready),
.ex_ready_i (ex_ready_o)
.multicycle_o (mult_multicycle_o),
.mulh_active_o(mulh_active),
.ready_o (mult_ready),
.ex_ready_i (ex_ready_o)
);

generate
Expand Down Expand Up @@ -326,13 +337,14 @@ module cv32e40p_ex_stage
.active_o(apu_active),
.stall_o (apu_stall),

.is_decoding_i (is_decoding_i),
.read_regs_i (apu_read_regs_i),
.read_regs_valid_i (apu_read_regs_valid_i),
.read_dep_o (apu_read_dep_o),
.write_regs_i (apu_write_regs_i),
.write_regs_valid_i(apu_write_regs_valid_i),
.write_dep_o (apu_write_dep_o),
.is_decoding_i (is_decoding_i),
.read_regs_i (apu_read_regs_i),
.read_regs_valid_i (apu_read_regs_valid_i),
.read_dep_o (apu_read_dep_o),
.read_dep_for_jalr_o(apu_read_dep_for_jalr_o),
.write_regs_i (apu_write_regs_i),
.write_regs_valid_i (apu_write_regs_valid_i),
.write_dep_o (apu_write_dep_o),

.perf_type_o(apu_perf_type_o),
.perf_cont_o(apu_perf_cont_o),
Expand All @@ -345,40 +357,60 @@ module cv32e40p_ex_stage
.apu_rvalid_i(apu_valid)
);

assign apu_perf_wb_o = wb_contention | wb_contention_lsu;
assign apu_ready_wb_o = ~(apu_active | apu_en_i | apu_stall) | apu_valid;
assign apu_perf_wb_o = wb_contention | wb_contention_lsu;
assign apu_ready_wb_o = ~(apu_active | apu_en_i | apu_stall) | apu_valid;

///////////////////////////////////////
// APU result memorization Register //
///////////////////////////////////////
always_ff @(posedge clk, negedge rst_n) begin : APU_Result_Memorization
if (~rst_n) begin
apu_rvalid_q <= 1'b0;
apu_result_q <= 'b0;
apu_flags_q <= 'b0;
end else begin
if (apu_rvalid_i && apu_multicycle && (data_misaligned_i || data_misaligned_ex_i || regfile_alu_we_i || (mulh_active && (mult_operator_i == MUL_H)))) begin
apu_rvalid_q <= 1'b1;
apu_result_q <= apu_result_i;
apu_flags_q <= apu_flags_i;
end else if (apu_rvalid_q && !(data_misaligned_i || data_misaligned_ex_i || regfile_alu_we_i || (mulh_active && (mult_operator_i == MUL_H)))) begin
apu_rvalid_q <= 1'b0;
end
end
end

assign apu_req_o = apu_req;
assign apu_gnt = apu_gnt_i;
assign apu_valid = apu_rvalid_i;
assign apu_operands_o = apu_operands_i;
assign apu_op_o = apu_op_i;
assign apu_result = apu_result_i;
assign apu_req_o = apu_req;
assign apu_gnt = apu_gnt_i;
assign apu_valid = (apu_multicycle && (data_misaligned_i || data_misaligned_ex_i || regfile_alu_we_i || (mulh_active && (mult_operator_i == MUL_H)))) ? 1'b0 : (apu_rvalid_i || apu_rvalid_q);
assign apu_operands_o = apu_operands_i;
assign apu_op_o = apu_op_i;
assign apu_result = apu_rvalid_q ? apu_result_q : apu_result_i;
assign fpu_fflags_we_o = apu_valid;
assign fpu_fflags_o = apu_rvalid_q ? apu_flags_q : apu_flags_i;
end else begin : gen_no_apu
// default assignements for the case when no FPU/APU is attached.
assign apu_req_o = '0;
assign apu_operands_o[0] = '0;
assign apu_operands_o[1] = '0;
assign apu_operands_o[2] = '0;
assign apu_op_o = '0;
assign apu_req = 1'b0;
assign apu_gnt = 1'b0;
assign apu_result = 32'b0;
assign apu_valid = 1'b0;
assign apu_waddr = 6'b0;
assign apu_stall = 1'b0;
assign apu_active = 1'b0;
assign apu_ready_wb_o = 1'b1;
assign apu_perf_wb_o = 1'b0;
assign apu_perf_cont_o = 1'b0;
assign apu_perf_type_o = 1'b0;
assign apu_singlecycle = 1'b0;
assign apu_multicycle = 1'b0;
assign apu_read_dep_o = 1'b0;
assign apu_write_dep_o = 1'b0;
assign fpu_fflags_we_o = 1'b0;

assign apu_req_o = '0;
assign apu_operands_o[0] = '0;
assign apu_operands_o[1] = '0;
assign apu_operands_o[2] = '0;
assign apu_op_o = '0;
assign apu_req = 1'b0;
assign apu_gnt = 1'b0;
assign apu_result = 32'b0;
assign apu_valid = 1'b0;
assign apu_waddr = 6'b0;
assign apu_stall = 1'b0;
assign apu_active = 1'b0;
assign apu_ready_wb_o = 1'b1;
assign apu_perf_wb_o = 1'b0;
assign apu_perf_cont_o = 1'b0;
assign apu_perf_type_o = 1'b0;
assign apu_singlecycle = 1'b0;
assign apu_multicycle = 1'b0;
assign apu_read_dep_o = 1'b0;
assign apu_read_dep_for_jalr_o = 1'b0;
assign apu_write_dep_o = 1'b0;
assign fpu_fflags_o = '0;
end
endgenerate

Expand Down
Loading
Loading