From 95fbc1e2b4708064e145f75b3372b4c132e1d0c6 Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Sat, 26 Oct 2024 11:48:35 +0200 Subject: [PATCH] [hardware] Add support for segment mem ops --- Bender.yml | 2 + hardware/include/ara_pkg.sv | 6 + hardware/scripts/wave_ara.tcl | 1 + hardware/src/ara.sv | 6 + hardware/src/ara_dispatcher.sv | 2314 +++++++++++++++-------------- hardware/src/ara_soc.sv | 3 + hardware/src/ara_system.sv | 3 + hardware/src/segment_sequencer.sv | 188 +++ 8 files changed, 1412 insertions(+), 1111 deletions(-) create mode 100644 hardware/src/segment_sequencer.sv diff --git a/Bender.yml b/Bender.yml index 58a20d33e..b5ed84733 100644 --- a/Bender.yml +++ b/Bender.yml @@ -27,6 +27,8 @@ sources: - hardware/include/ara_pkg.sv # Sources + # Level 0 + - hardware/src/segment_sequencer.sv # Level 1 - hardware/src/ctrl_registers.sv - hardware/src/cva6_accel_first_pass_decoder.sv diff --git a/hardware/include/ara_pkg.sv b/hardware/include/ara_pkg.sv index 13b5b0424..6fa695a7f 100644 --- a/hardware/include/ara_pkg.sv +++ b/hardware/include/ara_pkg.sv @@ -32,6 +32,12 @@ package ara_pkg; FixedPointEnable = 1'b1 } fixpt_support_e; + // Support for segment memory operations + typedef enum logic { + SegSupportDisable = 1'b0, + SegSupportEnable = 1'b1 + } seg_support_e; + // FP support outside of the FPU (external) // vfrec7, vfrsqrt7, round-toward-odd typedef enum logic { diff --git a/hardware/scripts/wave_ara.tcl b/hardware/scripts/wave_ara.tcl index b66587474..f32432801 100644 --- a/hardware/scripts/wave_ara.tcl +++ b/hardware/scripts/wave_ara.tcl @@ -7,6 +7,7 @@ add wave -noupdate -group Ara -group core /ara_tb/dut/i_ara_soc/i_system/i_ara/* add wave -noupdate -group Ara -group dispatcher /ara_tb/dut/i_ara_soc/i_system/i_ara/i_dispatcher/* +add wave -noupdate -group Ara -group dispatcher -group segment_sequencer /ara_tb/dut/i_ara_soc/i_system/i_ara/i_dispatcher/i_segment_sequencer/* add wave -noupdate -group Ara -group sequencer /ara_tb/dut/i_ara_soc/i_system/i_ara/i_sequencer/* # Add waves from all the lanes diff --git a/hardware/src/ara.sv b/hardware/src/ara.sv index 0005a3542..509c714ba 100644 --- a/hardware/src/ara.sv +++ b/hardware/src/ara.sv @@ -16,6 +16,8 @@ module ara import ara_pkg::*; #( parameter fpext_support_e FPExtSupport = FPExtSupportEnable, // Support for fixed-point data types parameter fixpt_support_e FixPtSupport = FixedPointEnable, + // Support for segment memory operations + parameter seg_support_e SegSupport = SegSupportEnable, // AXI Interface parameter int unsigned AxiDataWidth = 0, parameter int unsigned AxiAddrWidth = 0, @@ -123,6 +125,9 @@ module ara import ara_pkg::*; #( // Effective length multiplier rvv_pkg::vlmul_e emul; + // Number of segments in segment mem op + logic [2:0] nf; + // Rounding-Mode for FP operations fpnew_pkg::roundmode_e fp_rm; // Widen FP immediate (re-encoding) @@ -175,6 +180,7 @@ module ara import ara_pkg::*; #( ara_dispatcher #( .NrLanes (NrLanes ), .VLEN (VLEN ), + .SegSupport(SegSupport), .ara_req_t (ara_req_t ), .ara_resp_t(ara_resp_t) ) i_dispatcher ( diff --git a/hardware/src/ara_dispatcher.sv b/hardware/src/ara_dispatcher.sv index 6063cdfe9..b6768aeab 100644 --- a/hardware/src/ara_dispatcher.sv +++ b/hardware/src/ara_dispatcher.sv @@ -19,6 +19,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( parameter fpext_support_e FPExtSupport = FPExtSupportEnable, // Support for fixed-point data types parameter fixpt_support_e FixPtSupport = FixedPointEnable, + // Support for segment memory operations + parameter seg_support_e SegSupport = SegSupportEnable, // Dependent parameters: DO NOT CHANGE localparam type vlen_t = logic[$clog2(VLEN+1)-1:0], localparam int unsigned VLENB = VLEN / 8 @@ -113,7 +115,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Backend interface // ///////////////////////// - ara_req_t ara_req_d; + ara_req_t ara_req, ara_req_d; logic ara_req_valid_d; always_ff @(posedge clk_i or negedge rst_ni) begin @@ -227,11 +229,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( logic null_vslideup; // Pipeline the VLSU's load and store complete signals, for timing reasons - logic load_complete_q; - logic store_complete_q; + logic load_complete, load_complete_q; + logic store_complete, store_complete_q; logic illegal_insn_load, illegal_insn_store; - `FF(load_complete_q, load_complete_i || illegal_insn_load, 1'b0) - `FF(store_complete_q, store_complete_i || illegal_insn_store, 1'b0) + `FF(load_complete_q, load_complete || illegal_insn_load, 1'b0) + `FF(store_complete_q, store_complete || illegal_insn_store, 1'b0) // NP2 Slide support logic is_stride_np2; @@ -241,17 +243,49 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( popcount #( .INPUT_WIDTH (idx_width(VLENB << 3)) ) i_np2_stride ( - .data_i (ara_req_d.stride[idx_width(VLENB << 3)-1:0]), + .data_i (ara_req.stride[idx_width(VLENB << 3)-1:0] ), .popcount_o(sldu_popc ) ); assign is_stride_np2 = sldu_popc > 1; + // Segment-memory instruction sequencer + // Decompose the segment memory operations into non-segment memory operations + // This is a low-impact and low-performance implementation + logic is_segment_mem_op; + logic illegal_insn; + // Is the seg sequencer operating? + logic segment_micro_op_on; + + // The handshake signals are just passed-through if the insn is non-segment + ara_resp_t ara_resp; + segment_sequencer #( + .SegSupport(SegSupport) + ) i_segment_sequencer ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .ara_idle_i(ara_idle_i), + .is_segment_mem_op_i(is_segment_mem_op), + .illegal_insn_i(illegal_insn), + .is_vload_i(is_vload), + .segment_micro_op_on_o(segment_micro_op_on), + .load_complete_i(load_complete_i), + .load_complete_o(load_complete), + .store_complete_i(store_complete_i), + .store_complete_o(store_complete), + .ara_req_i(ara_req), + .ara_req_o(ara_req_d), + .ara_req_ready_i(ara_req_ready_i), + .ara_resp_i(ara_resp_i), + .ara_resp_o(ara_resp), + .ara_resp_valid_i(ara_resp_valid_i), + .ara_resp_valid_o(ara_resp_valid) + ); + /////////////// // Decoder // /////////////// - logic illegal_insn; elen_t vfmvfs_result; always_comb begin: p_decoder @@ -296,6 +330,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( is_decoding = 1'b0; in_lane_op = 1'b0; + is_segment_mem_op = 1'b0; + acc_resp_o = '{ trans_id : acc_req_i.trans_id, load_complete : load_zero_vl | load_complete_q, @@ -310,7 +346,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // fflags for (int lane = 0; lane < NrLanes; lane++) acc_resp_o.fflags |= fflags_ex_i[lane]; - ara_req_d = '{ + ara_req = '{ vl : csr_vl_q, vstart : csr_vstart_q, vtype : csr_vtype_q, @@ -355,33 +391,33 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( rs_mask_request_d = 1'b0; // Every single reshuffle request refers to LMUL == 1 - ara_req_d.emul = LMUL_1; + ara_req.emul = LMUL_1; // vstart is always 0 for a reshuffle - ara_req_d.vstart = '0; + ara_req.vstart = '0; // These generate a reshuffle request to Ara's backend // When LMUL > 1, not all the regs that compose a large // register should always be reshuffled ara_req_valid_d = ~rs_mask_request_q; - ara_req_d.use_scalar_op = 1'b1; - ara_req_d.vs2 = vs_buffer_q; - ara_req_d.eew_vs2 = eew_old_buffer_q; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = vs_buffer_q; - ara_req_d.use_vd = 1'b1; - ara_req_d.op = ara_pkg::VSLIDEDOWN; - ara_req_d.stride = '0; - ara_req_d.use_scalar_op = 1'b0; + ara_req.use_scalar_op = 1'b1; + ara_req.vs2 = vs_buffer_q; + ara_req.eew_vs2 = eew_old_buffer_q; + ara_req.use_vs2 = 1'b1; + ara_req.vd = vs_buffer_q; + ara_req.use_vd = 1'b1; + ara_req.op = ara_pkg::VSLIDEDOWN; + ara_req.stride = '0; + ara_req.use_scalar_op = 1'b0; // Unmasked: reshuffle everything - ara_req_d.vm = 1'b1; + ara_req.vm = 1'b1; // Shuffle the whole reg (vl refers to current vsew) - ara_req_d.vtype.vsew = eew_new_buffer_q; + ara_req.vtype.vsew = eew_new_buffer_q; // Always reshuffle one vreg at a time - ara_req_d.vl = VLENB >> ara_req_d.vtype.vsew; + ara_req.vl = VLENB >> ara_req.vtype.vsew; // Vl refers to current system vsew but operand requesters // will fetch from a register with a different eew - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; // Backend ready - Decide what to do next if (ara_req_ready_i) begin @@ -557,28 +593,28 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( OPIVV: begin: opivv // These generate a request to Ara's backend - ara_req_d.vs1 = insn.varith_type.rs1; - ara_req_d.use_vs1 = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; + ara_req.vs1 = insn.varith_type.rs1; + ara_req.use_vs1 = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; ara_req_valid_d = 1'b1; // Decode based on the func6 field unique case (insn.varith_type.func6) - 6'b000000: ara_req_d.op = ara_pkg::VADD; - 6'b000010: ara_req_d.op = ara_pkg::VSUB; - 6'b000100: ara_req_d.op = ara_pkg::VMINU; - 6'b000101: ara_req_d.op = ara_pkg::VMIN; - 6'b000110: ara_req_d.op = ara_pkg::VMAXU; - 6'b000111: ara_req_d.op = ara_pkg::VMAX; - 6'b001001: ara_req_d.op = ara_pkg::VAND; - 6'b001010: ara_req_d.op = ara_pkg::VOR; - 6'b001011: ara_req_d.op = ara_pkg::VXOR; + 6'b000000: ara_req.op = ara_pkg::VADD; + 6'b000010: ara_req.op = ara_pkg::VSUB; + 6'b000100: ara_req.op = ara_pkg::VMINU; + 6'b000101: ara_req.op = ara_pkg::VMIN; + 6'b000110: ara_req.op = ara_pkg::VMAXU; + 6'b000111: ara_req.op = ara_pkg::VMAX; + 6'b001001: ara_req.op = ara_pkg::VAND; + 6'b001010: ara_req.op = ara_pkg::VOR; + 6'b001011: ara_req.op = ara_pkg::VXOR; 6'b010000: begin - ara_req_d.op = ara_pkg::VADC; + ara_req.op = ara_pkg::VADC; // Encoding corresponding to unmasked operations are reserved if (insn.varith_type.vm) illegal_insn = 1'b1; @@ -587,11 +623,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( if (insn.varith_type.rd == 5'b0) illegal_insn = 1'b1; end 6'b010001: begin - ara_req_d.op = ara_pkg::VMADC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMADC; + ara_req.use_vd_op = 1'b1; // Check whether we can access vs1 and vs2 - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if (((insn.varith_type.rs1 & 5'b00001) == (insn.varith_type.rd & 5'b00001)) || ((insn.varith_type.rs2 & 5'b00001) == (insn.varith_type.rd & 5'b00001))) @@ -610,18 +646,18 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b010010: begin - ara_req_d.op = ara_pkg::VSBC; + ara_req.op = ara_pkg::VSBC; // Encoding corresponding to unmasked operations are reserved if (insn.varith_type.vm) illegal_insn = 1'b1; // An illegal instruction is raised if the destination vector is v0 if (insn.varith_type.rd == 5'b0) illegal_insn = 1'b1; end 6'b010011: begin - ara_req_d.op = ara_pkg::VMSBC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSBC; + ara_req.use_vd_op = 1'b1; // Check whether we can access vs1 and vs2 - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if (((insn.varith_type.rs1 & 5'b00001) == (insn.varith_type.rd & 5'b00001)) || ((insn.varith_type.rs2 & 5'b00001) == ( insn.varith_type.rd & 5'b00001))) @@ -640,61 +676,61 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b011000: begin - ara_req_d.op = ara_pkg::VMSEQ; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSEQ; + ara_req.use_vd_op = 1'b1; end 6'b011001: begin - ara_req_d.op = ara_pkg::VMSNE; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSNE; + ara_req.use_vd_op = 1'b1; end 6'b011010: begin - ara_req_d.op = ara_pkg::VMSLTU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLTU; + ara_req.use_vd_op = 1'b1; end 6'b011011: begin - ara_req_d.op = ara_pkg::VMSLT; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLT; + ara_req.use_vd_op = 1'b1; end 6'b011100: begin - ara_req_d.op = ara_pkg::VMSLEU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLEU; + ara_req.use_vd_op = 1'b1; end 6'b011101: begin - ara_req_d.op = ara_pkg::VMSLE; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLE; + ara_req.use_vd_op = 1'b1; end 6'b010111: begin - ara_req_d.op = ara_pkg::VMERGE; - ara_req_d.use_vs2 = !insn.varith_type.vm; // vmv.v.v does not use vs2 + ara_req.op = ara_pkg::VMERGE; + ara_req.use_vs2 = !insn.varith_type.vm; // vmv.v.v does not use vs2 // With a normal vmv.v.v, copy input eew to output // to avoid unnecessary reshuffles if (insn.varith_type.vm) begin - ara_req_d.eew_vs1 = eew_q[ara_req_d.vs1]; - ara_req_d.vtype.vsew = eew_q[ara_req_d.vs1]; - ara_req_d.vl = (csr_vl_q << csr_vtype_q.vsew[1:0]) >> ara_req_d.eew_vs1[1:0]; - end - end - 6'b100000: ara_req_d.op = ara_pkg::VSADDU; - 6'b100001: ara_req_d.op = ara_pkg::VSADD; - 6'b100010: ara_req_d.op = ara_pkg::VSSUBU; - 6'b100011: ara_req_d.op = ara_pkg::VSSUB; - 6'b100101: ara_req_d.op = ara_pkg::VSLL; - 6'b100111: ara_req_d.op = ara_pkg::VSMUL; - 6'b101000: ara_req_d.op = ara_pkg::VSRL; - 6'b101010: ara_req_d.op = ara_pkg::VSSRL; - 6'b101011: ara_req_d.op = ara_pkg::VSSRA; - 6'b101001: ara_req_d.op = ara_pkg::VSRA; + ara_req.eew_vs1 = eew_q[ara_req.vs1]; + ara_req.vtype.vsew = eew_q[ara_req.vs1]; + ara_req.vl = (csr_vl_q << csr_vtype_q.vsew[1:0]) >> ara_req.eew_vs1[1:0]; + end + end + 6'b100000: ara_req.op = ara_pkg::VSADDU; + 6'b100001: ara_req.op = ara_pkg::VSADD; + 6'b100010: ara_req.op = ara_pkg::VSSUBU; + 6'b100011: ara_req.op = ara_pkg::VSSUB; + 6'b100101: ara_req.op = ara_pkg::VSLL; + 6'b100111: ara_req.op = ara_pkg::VSMUL; + 6'b101000: ara_req.op = ara_pkg::VSRL; + 6'b101010: ara_req.op = ara_pkg::VSSRL; + 6'b101011: ara_req.op = ara_pkg::VSSRA; + 6'b101001: ara_req.op = ara_pkg::VSRA; 6'b101100: begin - ara_req_d.op = ara_pkg::VNSRL; - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNSRL; + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); // Check whether the EEW is not too wide. if (int'(csr_vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1; // Check whether we can access vs2 - unique case (ara_req_d.emul.next()) + unique case (ara_req.emul.next()) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rs2 & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -703,16 +739,16 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b101101: begin - ara_req_d.op = ara_pkg::VNSRA; - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNSRA; + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); // Check whether the EEW is not too wide. if (int'(csr_vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1; // Check whether we can access vs2 - unique case (ara_req_d.emul.next()) + unique case (ara_req.emul.next()) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rs2 & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -721,39 +757,39 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b101110: begin - ara_req_d.op = ara_pkg::VNCLIPU; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNCLIPU; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end 6'b101111: begin - ara_req_d.op = ara_pkg::VNCLIP; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNCLIP; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end // Reductions encode in cvt_resize the neutral value bits // CVT_WIDE is 2'b00 (hack to save wires) 6'b110000: begin - ara_req_d.op = ara_pkg::VWREDSUMU; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.eew_vs1 = csr_vtype_q.vsew.next(); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VWREDSUMU; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.eew_vs1 = csr_vtype_q.vsew.next(); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110001: begin - ara_req_d.op = ara_pkg::VWREDSUM; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.eew_vs1 = csr_vtype_q.vsew.next(); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VWREDSUM; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.eew_vs1 = csr_vtype_q.vsew.next(); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end default: illegal_insn = 1'b1; endcase // Instructions with an integer LMUL have extra constraints on the registers they can // access. - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rs1 & 5'b00001) != 5'b00000 || (insn.varith_type.rs2 & 5'b00001) != 5'b00000 || (insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; @@ -772,53 +808,53 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( OPIVX: begin: opivx // These generate a request to Ara's backend - ara_req_d.scalar_op = acc_req_i.rs1; - ara_req_d.use_scalar_op = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; - ara_req_d.is_stride_np2 = is_stride_np2; + ara_req.scalar_op = acc_req_i.rs1; + ara_req.use_scalar_op = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; + ara_req.is_stride_np2 = is_stride_np2; ara_req_valid_d = 1'b1; // Decode based on the func6 field unique case (insn.varith_type.func6) - 6'b000000: ara_req_d.op = ara_pkg::VADD; - 6'b000010: ara_req_d.op = ara_pkg::VSUB; - 6'b000011: ara_req_d.op = ara_pkg::VRSUB; - 6'b000100: ara_req_d.op = ara_pkg::VMINU; - 6'b000101: ara_req_d.op = ara_pkg::VMIN; - 6'b000110: ara_req_d.op = ara_pkg::VMAXU; - 6'b000111: ara_req_d.op = ara_pkg::VMAX; - 6'b001001: ara_req_d.op = ara_pkg::VAND; - 6'b001010: ara_req_d.op = ara_pkg::VOR; - 6'b001011: ara_req_d.op = ara_pkg::VXOR; + 6'b000000: ara_req.op = ara_pkg::VADD; + 6'b000010: ara_req.op = ara_pkg::VSUB; + 6'b000011: ara_req.op = ara_pkg::VRSUB; + 6'b000100: ara_req.op = ara_pkg::VMINU; + 6'b000101: ara_req.op = ara_pkg::VMIN; + 6'b000110: ara_req.op = ara_pkg::VMAXU; + 6'b000111: ara_req.op = ara_pkg::VMAX; + 6'b001001: ara_req.op = ara_pkg::VAND; + 6'b001010: ara_req.op = ara_pkg::VOR; + 6'b001011: ara_req.op = ara_pkg::VXOR; 6'b001110: begin - ara_req_d.op = ara_pkg::VSLIDEUP; - ara_req_d.stride = acc_req_i.rs1; - ara_req_d.eew_vs2 = csr_vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEUP; + ara_req.stride = acc_req_i.rs1; + ara_req.eew_vs2 = csr_vtype_q.vsew; // Encode vslideup/vslide1up on the use_scalar_op field - ara_req_d.use_scalar_op = 1'b0; + ara_req.use_scalar_op = 1'b0; // Vl refers to current system vsew, but operand requesters // will fetch bytes from a vreg with a different eew // i.e., request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; // If stride > vl, the vslideup has no effects - if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(csr_vl_q)] || - (vlen_t'(ara_req_d.stride) >= csr_vl_q)) null_vslideup = 1'b1; + if (|ara_req.stride[$bits(ara_req.stride)-1:$bits(csr_vl_q)] || + (vlen_t'(ara_req.stride) >= csr_vl_q)) null_vslideup = 1'b1; end 6'b001111: begin - ara_req_d.op = ara_pkg::VSLIDEDOWN; - ara_req_d.stride = acc_req_i.rs1; - ara_req_d.eew_vs2 = csr_vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEDOWN; + ara_req.stride = acc_req_i.rs1; + ara_req.eew_vs2 = csr_vtype_q.vsew; // Encode vslidedown/vslide1down on the use_scalar_op field - ara_req_d.use_scalar_op = 1'b0; + ara_req.use_scalar_op = 1'b0; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; end 6'b010000: begin - ara_req_d.op = ara_pkg::VADC; + ara_req.op = ara_pkg::VADC; // Encoding corresponding to unmasked operations are reserved if (insn.varith_type.vm) illegal_insn = 1'b1; @@ -827,11 +863,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( if (insn.varith_type.rd == 5'b0) illegal_insn = 1'b1; end 6'b010001: begin - ara_req_d.op = ara_pkg::VMADC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMADC; + ara_req.use_vd_op = 1'b1; // Check whether we can access vs1 and vs2 - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) == (insn.varith_type.rd & 5'b00001)) illegal_insn = 1'b1; @@ -845,7 +881,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b010010: begin - ara_req_d.op = ara_pkg::VSBC; + ara_req.op = ara_pkg::VSBC; // Encoding corresponding to unmasked operations are reserved if (insn.varith_type.vm) illegal_insn = 1'b1; @@ -854,11 +890,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( if (insn.varith_type.rd == 5'b0) illegal_insn = 1'b1; end 6'b010011: begin - ara_req_d.op = ara_pkg::VMSBC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSBC; + ara_req.use_vd_op = 1'b1; // Check whether we can access vs1 and vs2 - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) == (insn.varith_type.rd & 5'b00001)) illegal_insn = 1'b1; @@ -872,62 +908,62 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b011000: begin - ara_req_d.op = ara_pkg::VMSEQ; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSEQ; + ara_req.use_vd_op = 1'b1; end 6'b011001: begin - ara_req_d.op = ara_pkg::VMSNE; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSNE; + ara_req.use_vd_op = 1'b1; end 6'b011010: begin - ara_req_d.op = ara_pkg::VMSLTU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLTU; + ara_req.use_vd_op = 1'b1; end 6'b011011: begin - ara_req_d.op = ara_pkg::VMSLT; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLT; + ara_req.use_vd_op = 1'b1; end 6'b011100: begin - ara_req_d.op = ara_pkg::VMSLEU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLEU; + ara_req.use_vd_op = 1'b1; end 6'b011101: begin - ara_req_d.op = ara_pkg::VMSLE; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLE; + ara_req.use_vd_op = 1'b1; end 6'b011110: begin - ara_req_d.op = ara_pkg::VMSGTU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSGTU; + ara_req.use_vd_op = 1'b1; end 6'b011111: begin - ara_req_d.op = ara_pkg::VMSGT; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSGT; + ara_req.use_vd_op = 1'b1; end 6'b010111: begin - ara_req_d.op = ara_pkg::VMERGE; - ara_req_d.use_vs2 = !insn.varith_type.vm; // vmv.v.x does not use vs2 - end - 6'b100000: ara_req_d.op = ara_pkg::VSADDU; - 6'b100001: ara_req_d.op = ara_pkg::VSADD; - 6'b100010: ara_req_d.op = ara_pkg::VSSUBU; - 6'b100011: ara_req_d.op = ara_pkg::VSSUB; - 6'b100101: ara_req_d.op = ara_pkg::VSLL; - 6'b100111: ara_req_d.op = ara_pkg::VSMUL; - 6'b101000: ara_req_d.op = ara_pkg::VSRL; - 6'b101010: ara_req_d.op = ara_pkg::VSSRL; - 6'b101011: ara_req_d.op = ara_pkg::VSSRA; - 6'b101001: ara_req_d.op = ara_pkg::VSRA; + ara_req.op = ara_pkg::VMERGE; + ara_req.use_vs2 = !insn.varith_type.vm; // vmv.v.x does not use vs2 + end + 6'b100000: ara_req.op = ara_pkg::VSADDU; + 6'b100001: ara_req.op = ara_pkg::VSADD; + 6'b100010: ara_req.op = ara_pkg::VSSUBU; + 6'b100011: ara_req.op = ara_pkg::VSSUB; + 6'b100101: ara_req.op = ara_pkg::VSLL; + 6'b100111: ara_req.op = ara_pkg::VSMUL; + 6'b101000: ara_req.op = ara_pkg::VSRL; + 6'b101010: ara_req.op = ara_pkg::VSSRL; + 6'b101011: ara_req.op = ara_pkg::VSSRA; + 6'b101001: ara_req.op = ara_pkg::VSRA; 6'b101100: begin - ara_req_d.op = ara_pkg::VNSRL; - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNSRL; + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); // Check whether the EEW is not too wide. if (int'(csr_vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1; // Check whether we can access vs2 - unique case (ara_req_d.emul.next()) + unique case (ara_req.emul.next()) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rs2 & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -936,16 +972,16 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b101101: begin - ara_req_d.op = ara_pkg::VNSRA; - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNSRA; + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); // Check whether the EEW is not too wide. if (int'(csr_vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1; // Check whether we can access vs2 - unique case (ara_req_d.emul.next()) + unique case (ara_req.emul.next()) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rs2 & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -954,19 +990,19 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b101110: begin - ara_req_d.op = ara_pkg::VNCLIPU; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNCLIPU; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end 6'b101111: begin - ara_req_d.op = ara_pkg::VNCLIP; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNCLIP; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end default: illegal_insn = 1'b1; endcase // Instructions with an integer LMUL have extra constraints on the registers they can // access. - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000 || (insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000 || @@ -985,46 +1021,46 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Sign-extend this by default. // Instructions that need the immediate to be zero-extended // (vrgather, shifts, clips, slides) should do overwrite this. - ara_req_d.scalar_op = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1}; - ara_req_d.use_scalar_op = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; - ara_req_d.is_stride_np2 = is_stride_np2; + ara_req.scalar_op = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1}; + ara_req.use_scalar_op = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; + ara_req.is_stride_np2 = is_stride_np2; ara_req_valid_d = 1'b1; // Decode based on the func6 field unique case (insn.varith_type.func6) - 6'b000000: ara_req_d.op = ara_pkg::VADD; - 6'b000011: ara_req_d.op = ara_pkg::VRSUB; - 6'b001001: ara_req_d.op = ara_pkg::VAND; - 6'b001010: ara_req_d.op = ara_pkg::VOR; - 6'b001011: ara_req_d.op = ara_pkg::VXOR; + 6'b000000: ara_req.op = ara_pkg::VADD; + 6'b000011: ara_req.op = ara_pkg::VRSUB; + 6'b001001: ara_req.op = ara_pkg::VAND; + 6'b001010: ara_req.op = ara_pkg::VOR; + 6'b001011: ara_req.op = ara_pkg::VXOR; 6'b001110: begin - ara_req_d.op = ara_pkg::VSLIDEUP; - ara_req_d.stride = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1}; - ara_req_d.eew_vs2 = csr_vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEUP; + ara_req.stride = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1}; + ara_req.eew_vs2 = csr_vtype_q.vsew; // Encode vslideup/vslide1up on the use_scalar_op field - ara_req_d.use_scalar_op = 1'b0; + ara_req.use_scalar_op = 1'b0; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; // If stride > vl, the vslideup has no effects - if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(csr_vl_q)] || - (vlen_t'(ara_req_d.stride) >= csr_vl_q)) null_vslideup = 1'b1; + if (|ara_req.stride[$bits(ara_req.stride)-1:$bits(csr_vl_q)] || + (vlen_t'(ara_req.stride) >= csr_vl_q)) null_vslideup = 1'b1; end 6'b001111: begin - ara_req_d.op = ara_pkg::VSLIDEDOWN; - ara_req_d.stride = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1}; - ara_req_d.eew_vs2 = csr_vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEDOWN; + ara_req.stride = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1}; + ara_req.eew_vs2 = csr_vtype_q.vsew; // Encode vslidedown/vslide1down on the use_scalar_op field - ara_req_d.use_scalar_op = 1'b0; + ara_req.use_scalar_op = 1'b0; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; end 6'b010000: begin - ara_req_d.op = ara_pkg::VADC; + ara_req.op = ara_pkg::VADC; // Encoding corresponding to unmasked operations are reserved if (insn.varith_type.vm) illegal_insn = 1'b1; @@ -1033,11 +1069,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( if (insn.varith_type.rd == 5'b0) illegal_insn = 1'b1; end 6'b010001: begin - ara_req_d.op = ara_pkg::VMADC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMADC; + ara_req.use_vd_op = 1'b1; // Check whether we can access vs1 and vs2 - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) == (insn.varith_type.rd & 5'b00001)) illegal_insn = 1'b1; @@ -1051,36 +1087,36 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b011000: begin - ara_req_d.op = ara_pkg::VMSEQ; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSEQ; + ara_req.use_vd_op = 1'b1; end 6'b011001: begin - ara_req_d.op = ara_pkg::VMSNE; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSNE; + ara_req.use_vd_op = 1'b1; end 6'b011100: begin - ara_req_d.op = ara_pkg::VMSLEU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLEU; + ara_req.use_vd_op = 1'b1; end 6'b011101: begin - ara_req_d.op = ara_pkg::VMSLE; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLE; + ara_req.use_vd_op = 1'b1; end 6'b011110: begin - ara_req_d.op = ara_pkg::VMSGTU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSGTU; + ara_req.use_vd_op = 1'b1; end 6'b011111: begin - ara_req_d.op = ara_pkg::VMSGT; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSGT; + ara_req.use_vd_op = 1'b1; end 6'b010111: begin - ara_req_d.op = ara_pkg::VMERGE; - ara_req_d.use_vs2 = !insn.varith_type.vm; // vmv.v.i does not use vs2 + ara_req.op = ara_pkg::VMERGE; + ara_req.use_vs2 = !insn.varith_type.vm; // vmv.v.i does not use vs2 end - 6'b100000: ara_req_d.op = ara_pkg::VSADDU; - 6'b100001: ara_req_d.op = ara_pkg::VSADD; - 6'b100101: ara_req_d.op = ara_pkg::VSLL; + 6'b100000: ara_req.op = ara_pkg::VSADDU; + 6'b100001: ara_req.op = ara_pkg::VSADD; + 6'b100101: ara_req.op = ara_pkg::VSLL; 6'b100111: begin // vmvr.v automatic int unsigned vlmax; // Execute also if vl == 0 @@ -1092,19 +1128,19 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( unique case (insn.varith_type.rs1[17:15]) 3'd0 : begin vlmax <<= 0; - ara_req_d.emul = LMUL_1; + ara_req.emul = LMUL_1; end 3'd1 : begin vlmax <<= 1; - ara_req_d.emul = LMUL_2; + ara_req.emul = LMUL_2; end 3'd3 : begin vlmax <<= 2; - ara_req_d.emul = LMUL_4; + ara_req.emul = LMUL_4; end 3'd7 : begin vlmax <<= 3; - ara_req_d.emul = LMUL_8; + ara_req.emul = LMUL_8; end default: begin // Trigger an error for the reserved simm values @@ -1113,31 +1149,31 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase // From here on, the only difference with a vmv.v.v is that the vector reg index // is in rs2. For the rest,, pretend to be a vmv.v.v - ara_req_d.op = ara_pkg::VMERGE; - ara_req_d.use_scalar_op = 1'b0; - ara_req_d.use_vs1 = 1'b1; - ara_req_d.use_vs2 = 1'b0; - ara_req_d.vs1 = insn.varith_type.rs2; - ara_req_d.eew_vs1 = eew_q[insn.varith_type.rs2]; + ara_req.op = ara_pkg::VMERGE; + ara_req.use_scalar_op = 1'b0; + ara_req.use_vs1 = 1'b1; + ara_req.use_vs2 = 1'b0; + ara_req.vs1 = insn.varith_type.rs2; + ara_req.eew_vs1 = eew_q[insn.varith_type.rs2]; // Copy the encoding information to the new register - ara_req_d.vtype.vsew = eew_q[insn.varith_type.rs2]; - ara_req_d.vl = vlmax; // whole register move + ara_req.vtype.vsew = eew_q[insn.varith_type.rs2]; + ara_req.vl = vlmax; // whole register move end - 6'b101000: ara_req_d.op = ara_pkg::VSRL; - 6'b101001: ara_req_d.op = ara_pkg::VSRA; - 6'b101010: ara_req_d.op = ara_pkg::VSSRL; - 6'b101011: ara_req_d.op = ara_pkg::VSSRA; + 6'b101000: ara_req.op = ara_pkg::VSRL; + 6'b101001: ara_req.op = ara_pkg::VSRA; + 6'b101010: ara_req.op = ara_pkg::VSSRL; + 6'b101011: ara_req.op = ara_pkg::VSSRA; 6'b101100: begin - ara_req_d.op = ara_pkg::VNSRL; - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNSRL; + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); // Check whether the EEW is not too wide. if (int'(csr_vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1; // Check whether we can access vs2 - unique case (ara_req_d.emul.next()) + unique case (ara_req.emul.next()) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rs2 & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -1146,16 +1182,16 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b101101: begin - ara_req_d.op = ara_pkg::VNSRA; - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNSRA; + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); // Check whether the EEW is not too wide. if (int'(csr_vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1; // Check whether we can access vs2 - unique case (ara_req_d.emul.next()) + unique case (ara_req.emul.next()) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rs2 & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -1164,19 +1200,19 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b101110: begin - ara_req_d.op = ara_pkg::VNCLIPU; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNCLIPU; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end 6'b101111: begin - ara_req_d.op = ara_pkg::VNCLIP; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNCLIP; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end default: illegal_insn = 1'b1; endcase // Instructions with an integer LMUL have extra constraints on the registers they can // access. - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000 || (insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000 || @@ -1192,61 +1228,61 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( OPMVV: begin: opmvv // These generate a request to Ara's backend - ara_req_d.vs1 = insn.varith_type.rs1; - ara_req_d.use_vs1 = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; + ara_req.vs1 = insn.varith_type.rs1; + ara_req.use_vs1 = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; ara_req_valid_d = 1'b1; // Assume an effective EMUL = LMUL1 by default (for the mask operations) - ara_req_d.emul = LMUL_1; + ara_req.emul = LMUL_1; // Decode based on the func6 field unique case (insn.varith_type.func6) // Encode, for each reduction, the bits of the neutral // value of each operation 6'b000000: begin - ara_req_d.op = ara_pkg::VREDSUM; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VREDSUM; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b00); end 6'b000001: begin - ara_req_d.op = ara_pkg::VREDAND; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b11); + ara_req.op = ara_pkg::VREDAND; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b11); end 6'b000010: begin - ara_req_d.op = ara_pkg::VREDOR; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VREDOR; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b00); end 6'b000011: begin - ara_req_d.op = ara_pkg::VREDXOR; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VREDXOR; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b00); end 6'b000100: begin - ara_req_d.op = ara_pkg::VREDMINU; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b11); + ara_req.op = ara_pkg::VREDMINU; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b11); end 6'b000101: begin - ara_req_d.op = ara_pkg::VREDMIN; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b01); + ara_req.op = ara_pkg::VREDMIN; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b01); end 6'b000110: begin - ara_req_d.op = ara_pkg::VREDMAXU; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VREDMAXU; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b00); end 6'b000111: begin - ara_req_d.op = ara_pkg::VREDMAX; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b10); + ara_req.op = ara_pkg::VREDMAX; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b10); end 6'b010000: begin // VWXUNARY0 // vmv.x.s @@ -1256,144 +1292,144 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( case (insn.varith_type.rs1) 5'b00000: begin - ara_req_d.op = ara_pkg::VMVXS; - ara_req_d.vl = 1; + ara_req.op = ara_pkg::VMVXS; + ara_req.vl = 1; end 5'b10000: begin - ara_req_d.op = ara_pkg::VCPOP; - ara_req_d.use_vs1 = 1'b0; + ara_req.op = ara_pkg::VCPOP; + ara_req.use_vs1 = 1'b0; end 5'b10001: begin - ara_req_d.op = ara_pkg::VFIRST; - ara_req_d.use_vs1 = 1'b0; + ara_req.op = ara_pkg::VFIRST; + ara_req.use_vs1 = 1'b0; end default :; endcase - ara_req_d.use_vd = 1'b0; - ara_req_d.vstart = '0; + ara_req.use_vd = 1'b0; + ara_req.vstart = '0; skip_lmul_checks = 1'b1; ignore_zero_vl_check = 1'b1; // Sign extend operands unique case (csr_vtype_q.vsew) EW8: begin - ara_req_d.conversion_vs2 = OpQueueConversionSExt8; + ara_req.conversion_vs2 = OpQueueConversionSExt8; end EW16: begin - ara_req_d.conversion_vs2 = OpQueueConversionSExt4; + ara_req.conversion_vs2 = OpQueueConversionSExt4; end EW32: begin - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; end default:; endcase // Wait until the back-end answers to acknowledge those instructions - if ( ara_resp_valid_i ) begin + if ( ara_resp_valid ) begin acc_resp_o.req_ready = 1'b1; acc_resp_o.resp_valid = 1'b1; - acc_resp_o.result = ara_resp_i.resp; - acc_resp_o.exception = ara_resp_i.exception; + acc_resp_o.result = ara_resp.resp; + acc_resp_o.exception = ara_resp.exception; ara_req_valid_d = 1'b0; end end 6'b010100: begin - ara_req_d.use_vd_op = 1'b1; - ara_req_d.use_vs1 = 1'b0; + ara_req.use_vd_op = 1'b1; + ara_req.use_vs1 = 1'b0; case (insn.varith_type.rs1) - 5'b00001: ara_req_d.op = ara_pkg::VMSBF; - 5'b00010: ara_req_d.op = ara_pkg::VMSOF; - 5'b00011: ara_req_d.op = ara_pkg::VMSIF; - 5'b10000: ara_req_d.op = ara_pkg::VIOTA; - 5'b10001: ara_req_d.op = ara_pkg::VID; + 5'b00001: ara_req.op = ara_pkg::VMSBF; + 5'b00010: ara_req.op = ara_pkg::VMSOF; + 5'b00011: ara_req.op = ara_pkg::VMSIF; + 5'b10000: ara_req.op = ara_pkg::VIOTA; + 5'b10001: ara_req.op = ara_pkg::VID; endcase end - 6'b001000: ara_req_d.op = ara_pkg::VAADDU; - 6'b001001: ara_req_d.op = ara_pkg::VAADD; - 6'b001010: ara_req_d.op = ara_pkg::VASUBU; - 6'b001011: ara_req_d.op = ara_pkg::VASUB; + 6'b001000: ara_req.op = ara_pkg::VAADDU; + 6'b001001: ara_req.op = ara_pkg::VAADD; + 6'b001010: ara_req.op = ara_pkg::VASUBU; + 6'b001011: ara_req.op = ara_pkg::VASUB; 6'b011000: begin - ara_req_d.op = ara_pkg::VMANDNOT; + ara_req.op = ara_pkg::VMANDNOT; // Prefer mask operation on EW8 encoding // In mask operations, vs1, vs2, vd should // have the same encoding. - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011001: begin - ara_req_d.op = ara_pkg::VMAND; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMAND; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011010: begin - ara_req_d.op = ara_pkg::VMOR; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMOR; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011011: begin - ara_req_d.op = ara_pkg::VMXOR; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMXOR; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011100: begin - ara_req_d.op = ara_pkg::VMORNOT; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMORNOT; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011101: begin - ara_req_d.op = ara_pkg::VMNAND; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMNAND; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011110: begin - ara_req_d.op = ara_pkg::VMNOR; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMNOR; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011111: begin - ara_req_d.op = ara_pkg::VMXNOR; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMXNOR; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b010010: begin // VXUNARY0 // These instructions do not use vs1 - ara_req_d.use_vs1 = 1'b0; + ara_req.use_vs1 = 1'b0; skip_vs1_lmul_checks = 1'b1; // They are always encoded as ADDs with zero. - ara_req_d.op = ara_pkg::VADD; - ara_req_d.use_scalar_op = 1'b1; - ara_req_d.scalar_op = '0; + ara_req.op = ara_pkg::VADD; + ara_req.use_scalar_op = 1'b1; + ara_req.scalar_op = '0; case (insn.varith_type.rs1) 5'b00010: begin // VZEXT.VF8 - ara_req_d.conversion_vs2 = OpQueueConversionZExt8; - ara_req_d.eew_vs2 = eew_q[insn.varith_type.rs2]; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.conversion_vs2 = OpQueueConversionZExt8; + ara_req.eew_vs2 = eew_q[insn.varith_type.rs2]; + ara_req.cvt_resize = CVT_WIDE; // Invalid conversion if (int'(csr_vtype_q.vsew) < int'(EW64) || @@ -1401,9 +1437,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( illegal_insn = 1'b1; end 5'b00011: begin // VSEXT.VF8 - ara_req_d.conversion_vs2 = OpQueueConversionSExt8; - ara_req_d.eew_vs2 = eew_q[insn.varith_type.rs2]; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.conversion_vs2 = OpQueueConversionSExt8; + ara_req.eew_vs2 = eew_q[insn.varith_type.rs2]; + ara_req.cvt_resize = CVT_WIDE; // Invalid conversion if (int'(csr_vtype_q.vsew) < int'(EW64) || @@ -1411,36 +1447,36 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( illegal_insn = 1'b1; end 5'b00100: begin // VZEXT.VF4 - ara_req_d.conversion_vs2 = OpQueueConversionZExt4; - ara_req_d.eew_vs2 = prev_prev_ew(csr_vtype_q.vsew); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.conversion_vs2 = OpQueueConversionZExt4; + ara_req.eew_vs2 = prev_prev_ew(csr_vtype_q.vsew); + ara_req.cvt_resize = CVT_WIDE; // Invalid conversion if (int'(csr_vtype_q.vsew) < int'(EW32) || int'(csr_vtype_q.vlmul) inside {LMUL_1_4, LMUL_1_8}) illegal_insn = 1'b1; end 5'b00101: begin // VSEXT.VF4 - ara_req_d.conversion_vs2 = OpQueueConversionSExt4; - ara_req_d.eew_vs2 = prev_prev_ew(csr_vtype_q.vsew); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.conversion_vs2 = OpQueueConversionSExt4; + ara_req.eew_vs2 = prev_prev_ew(csr_vtype_q.vsew); + ara_req.cvt_resize = CVT_WIDE; // Invalid conversion if (int'(csr_vtype_q.vsew) < int'(EW32) || int'(csr_vtype_q.vlmul) inside {LMUL_1_4, LMUL_1_8}) illegal_insn = 1'b1; end 5'b00110: begin // VZEXT.VF2 - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.prev(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.prev(); + ara_req.cvt_resize = CVT_WIDE; // Invalid conversion if (int'(csr_vtype_q.vsew) < int'(EW16) || int'(csr_vtype_q.vlmul) inside {LMUL_1_8}) illegal_insn = 1'b1; end 5'b00111: begin // VSEXT.VF2 - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.prev(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.prev(); + ara_req.cvt_resize = CVT_WIDE; // Invalid conversion if (int'(csr_vtype_q.vsew) < int'(EW16) || int'(csr_vtype_q.vlmul) inside {LMUL_1_8}) @@ -1450,158 +1486,158 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end // Divide instructions - 6'b100000: ara_req_d.op = ara_pkg::VDIVU; - 6'b100001: ara_req_d.op = ara_pkg::VDIV; - 6'b100010: ara_req_d.op = ara_pkg::VREMU; - 6'b100011: ara_req_d.op = ara_pkg::VREM; + 6'b100000: ara_req.op = ara_pkg::VDIVU; + 6'b100001: ara_req.op = ara_pkg::VDIV; + 6'b100010: ara_req.op = ara_pkg::VREMU; + 6'b100011: ara_req.op = ara_pkg::VREM; // Multiply instructions - 6'b100100: ara_req_d.op = ara_pkg::VMULHU; - 6'b100101: ara_req_d.op = ara_pkg::VMUL; - 6'b100110: ara_req_d.op = ara_pkg::VMULHSU; - 6'b100111: ara_req_d.op = ara_pkg::VMULH; + 6'b100100: ara_req.op = ara_pkg::VMULHU; + 6'b100101: ara_req.op = ara_pkg::VMUL; + 6'b100110: ara_req.op = ara_pkg::VMULHSU; + 6'b100111: ara_req.op = ara_pkg::VMULH; // Multiply-Add instructions // vd is also used as a source operand 6'b101001: begin - ara_req_d.op = ara_pkg::VMADD; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMADD; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101011: begin - ara_req_d.op = ara_pkg::VNMSUB; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.op = ara_pkg::VNMSUB; + ara_req.use_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101101: begin - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; end 6'b101111: begin - ara_req_d.op = ara_pkg::VNMSAC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VNMSAC; + ara_req.use_vd_op = 1'b1; end // Widening instructions 6'b110000: begin // VWADDU - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110001: begin // VWADD - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110010: begin // VWSUBU - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110011: begin // VWSUB - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110100: begin // VWADDU.W - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b110101: begin // VWADD.W - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b110110: begin // VWSUBU.W - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b110111: begin // VWSUB.W - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111000: begin // VWMULU - ara_req_d.op = ara_pkg::VMUL; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMUL; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b111010: begin // VWMULSU - ara_req_d.op = ara_pkg::VMUL; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMUL; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b111011: begin // VWMUL - ara_req_d.op = ara_pkg::VMUL; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMUL; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b111100: begin // VWMACCU - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111101: begin // VWMACC - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111111: begin // VWMACCSU - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end default: illegal_insn = 1'b1; endcase @@ -1610,7 +1646,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // access. These constraints can be different for the two source operands and the // destination register. if (!skip_lmul_checks) begin - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rd & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rd & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -1631,7 +1667,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end // Ara cannot support instructions who operates on more than 64 bits. - if (int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; + if (int'(ara_req.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; // Instruction is invalid if the vtype is invalid if (csr_vtype_q.vill) illegal_insn = 1'b1; @@ -1639,211 +1675,211 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( OPMVX: begin: opmvx // These generate a request to Ara's backend - ara_req_d.scalar_op = acc_req_i.rs1; - ara_req_d.use_scalar_op = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; - ara_req_d.is_stride_np2 = is_stride_np2; + ara_req.scalar_op = acc_req_i.rs1; + ara_req.use_scalar_op = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; + ara_req.is_stride_np2 = is_stride_np2; ara_req_valid_d = 1'b1; // Decode based on the func6 field unique case (insn.varith_type.func6) - 6'b001000: ara_req_d.op = ara_pkg::VAADDU; - 6'b001001: ara_req_d.op = ara_pkg::VAADD; - 6'b001010: ara_req_d.op = ara_pkg::VASUBU; - 6'b001011: ara_req_d.op = ara_pkg::VASUB; + 6'b001000: ara_req.op = ara_pkg::VAADDU; + 6'b001001: ara_req.op = ara_pkg::VAADD; + 6'b001010: ara_req.op = ara_pkg::VASUBU; + 6'b001011: ara_req.op = ara_pkg::VASUB; // Slides 6'b001110: begin // vslide1up - ara_req_d.op = ara_pkg::VSLIDEUP; - ara_req_d.stride = 1; - ara_req_d.eew_vs2 = csr_vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEUP; + ara_req.stride = 1; + ara_req.eew_vs2 = csr_vtype_q.vsew; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; // If stride > vl, the vslideup has no effects - if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(csr_vl_q)] || - (vlen_t'(ara_req_d.stride) >= csr_vl_q)) null_vslideup = 1'b1; + if (|ara_req.stride[$bits(ara_req.stride)-1:$bits(csr_vl_q)] || + (vlen_t'(ara_req.stride) >= csr_vl_q)) null_vslideup = 1'b1; end 6'b001111: begin // vslide1down - ara_req_d.op = ara_pkg::VSLIDEDOWN; - ara_req_d.stride = 1; - ara_req_d.eew_vs2 = csr_vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEDOWN; + ara_req.stride = 1; + ara_req.eew_vs2 = csr_vtype_q.vsew; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; end 6'b010000: begin // VRXUNARY0 // vmv.s.x - ara_req_d.op = ara_pkg::VMVSX; - ara_req_d.use_vs2 = 1'b0; - ara_req_d.vl = |csr_vl_q ? 1 : '0; + ara_req.op = ara_pkg::VMVSX; + ara_req.use_vs2 = 1'b0; + ara_req.vl = |csr_vl_q ? 1 : '0; // This instruction ignores LMUL checks skip_lmul_checks = 1'b1; end // Divide instructions - 6'b100000: ara_req_d.op = ara_pkg::VDIVU; - 6'b100001: ara_req_d.op = ara_pkg::VDIV; - 6'b100010: ara_req_d.op = ara_pkg::VREMU; - 6'b100011: ara_req_d.op = ara_pkg::VREM; + 6'b100000: ara_req.op = ara_pkg::VDIVU; + 6'b100001: ara_req.op = ara_pkg::VDIV; + 6'b100010: ara_req.op = ara_pkg::VREMU; + 6'b100011: ara_req.op = ara_pkg::VREM; // Multiply instructions - 6'b100100: ara_req_d.op = ara_pkg::VMULHU; - 6'b100101: ara_req_d.op = ara_pkg::VMUL; - 6'b100110: ara_req_d.op = ara_pkg::VMULHSU; - 6'b100111: ara_req_d.op = ara_pkg::VMULH; + 6'b100100: ara_req.op = ara_pkg::VMULHU; + 6'b100101: ara_req.op = ara_pkg::VMUL; + 6'b100110: ara_req.op = ara_pkg::VMULHSU; + 6'b100111: ara_req.op = ara_pkg::VMULH; // Multiply-Add instructions // vd is also used as a source operand 6'b101001: begin - ara_req_d.op = ara_pkg::VMADD; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMADD; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101011: begin - ara_req_d.op = ara_pkg::VNMSUB; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.op = ara_pkg::VNMSUB; + ara_req.use_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101101: begin - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; end 6'b101111: begin - ara_req_d.op = ara_pkg::VNMSAC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VNMSAC; + ara_req.use_vd_op = 1'b1; end // Widening instructions 6'b110000: begin // VWADDU - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110001: begin // VWADD - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110010: begin // VWSUBU - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110011: begin // VWSUB - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110100: begin // VWADDU.W - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b110101: begin // VWADD.W - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b110110: begin // VWSUBU.W - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b110111: begin // VWSUB.W - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111000: begin // VWMULU - ara_req_d.op = ara_pkg::VMUL; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMUL; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b111010: begin // VWMULSU - ara_req_d.op = ara_pkg::VMUL; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMUL; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b111011: begin // VWMUL - ara_req_d.op = ara_pkg::VMUL; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMUL; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b111100: begin // VWMACCU - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111101: begin // VWMACC - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111110: begin // VWMACCUS - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111111: begin // VWMACCSU - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end default: illegal_insn = 1'b1; endcase @@ -1852,7 +1888,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // access. The constraints can be different for the two source operands and the // destination register. if (!skip_lmul_checks) begin - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rd & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rd & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -1867,7 +1903,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end // Ara cannot support instructions who operates on more than 64 bits. - if (int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; + if (int'(ara_req.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; // Instruction is invalid if the vtype is invalid if (csr_vtype_q.vill) illegal_insn = 1'b1; @@ -1876,76 +1912,76 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( OPFVV: begin: opfvv if (FPUSupport != FPUSupportNone) begin // These generate a request to Ara's backend - ara_req_d.vs1 = insn.varith_type.rs1; - ara_req_d.use_vs1 = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; - ara_req_d.fp_rm = acc_req_i.frm; + ara_req.vs1 = insn.varith_type.rs1; + ara_req.use_vs1 = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; + ara_req.fp_rm = acc_req_i.frm; ara_req_valid_d = 1'b1; // Decode based on the func6 field unique case (insn.varith_type.func6) // VFP Addition 6'b000000: begin - ara_req_d.op = ara_pkg::VFADD; + ara_req.op = ara_pkg::VFADD; // When performing a floating-point add/sub, fpnew adds the second and the third // operand. Send the first operand (vs2) to the third result queue. - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b000001: begin - ara_req_d.op = ara_pkg::VFREDUSUM; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VFREDUSUM; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.cvt_resize = resize_e'(2'b00); end 6'b000010: begin - ara_req_d.op = ara_pkg::VFSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.op = ara_pkg::VFSUB; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b000011: begin - ara_req_d.op = ara_pkg::VFREDOSUM; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VFREDOSUM; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.cvt_resize = resize_e'(2'b00); end - 6'b000100: ara_req_d.op = ara_pkg::VFMIN; + 6'b000100: ara_req.op = ara_pkg::VFMIN; 6'b000101: begin - ara_req_d.op = ara_pkg::VFREDMIN; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b01); + ara_req.op = ara_pkg::VFREDMIN; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b01); end - 6'b000110: ara_req_d.op = ara_pkg::VFMAX; + 6'b000110: ara_req.op = ara_pkg::VFMAX; 6'b000111: begin - ara_req_d.op = ara_pkg::VFREDMAX; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b10); + ara_req.op = ara_pkg::VFREDMAX; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b10); end - 6'b001000: ara_req_d.op = ara_pkg::VFSGNJ; - 6'b001001: ara_req_d.op = ara_pkg::VFSGNJN; - 6'b001010: ara_req_d.op = ara_pkg::VFSGNJX; + 6'b001000: ara_req.op = ara_pkg::VFSGNJ; + 6'b001001: ara_req.op = ara_pkg::VFSGNJN; + 6'b001010: ara_req.op = ara_pkg::VFSGNJX; 6'b010000: begin // VWFUNARY0 // vmv.f.s // Stall the interface until we get the result acc_resp_o.req_ready = 1'b0; acc_resp_o.resp_valid = 1'b0; - ara_req_d.op = ara_pkg::VFMVFS; - ara_req_d.use_vd = 1'b0; - ara_req_d.vl = 1; - ara_req_d.vstart = '0; + ara_req.op = ara_pkg::VFMVFS; + ara_req.use_vd = 1'b0; + ara_req.vl = 1; + ara_req.vstart = '0; skip_lmul_checks = 1'b1; ignore_zero_vl_check = 1'b1; // Zero-extend operands unique case (csr_vtype_q.vsew) EW16: begin - ara_req_d.conversion_vs2 = OpQueueConversionZExt4; + ara_req.conversion_vs2 = OpQueueConversionZExt4; end EW32: begin - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; end default:; endcase @@ -1954,128 +1990,128 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( unique case (csr_vtype_q.vsew) EW16: begin vfmvfs_result[63:16] = '1; - vfmvfs_result[15:0] = ara_resp_i.resp[15:0]; + vfmvfs_result[15:0] = ara_resp.resp[15:0]; end EW32: begin vfmvfs_result[63:32] = '1; - vfmvfs_result[31:0] = ara_resp_i.resp[31:0]; + vfmvfs_result[31:0] = ara_resp.resp[31:0]; end - default: vfmvfs_result = ara_resp_i.resp; + default: vfmvfs_result = ara_resp.resp; endcase // Wait until the back-end answers to acknowledge those instructions - if (ara_resp_valid_i) begin + if (ara_resp_valid) begin acc_resp_o.req_ready = 1'b1; acc_resp_o.resp_valid = 1'b1; acc_resp_o.result = vfmvfs_result; - acc_resp_o.exception = ara_resp_i.exception; + acc_resp_o.exception = ara_resp.exception; ara_req_valid_d = 1'b0; end end - 6'b011000: ara_req_d.op = ara_pkg::VMFEQ; - 6'b011001: ara_req_d.op = ara_pkg::VMFLE; - 6'b011011: ara_req_d.op = ara_pkg::VMFLT; - 6'b011100: ara_req_d.op = ara_pkg::VMFNE; + 6'b011000: ara_req.op = ara_pkg::VMFEQ; + 6'b011001: ara_req.op = ara_pkg::VMFLE; + 6'b011011: ara_req.op = ara_pkg::VMFLT; + 6'b011100: ara_req.op = ara_pkg::VMFNE; 6'b010010: begin // VFUNARY0 // These instructions do not use vs1 - ara_req_d.use_vs1 = 1'b0; + ara_req.use_vs1 = 1'b0; skip_vs1_lmul_checks = 1'b1; case (insn.varith_type.rs1) - 5'b00000: ara_req_d.op = VFCVTXUF; - 5'b00001: ara_req_d.op = VFCVTXF; - 5'b00010: ara_req_d.op = VFCVTFXU; - 5'b00011: ara_req_d.op = VFCVTFX; - 5'b00110: ara_req_d.op = VFCVTRTZXUF; - 5'b00111: ara_req_d.op = VFCVTRTZXF; + 5'b00000: ara_req.op = VFCVTXUF; + 5'b00001: ara_req.op = VFCVTXF; + 5'b00010: ara_req.op = VFCVTFXU; + 5'b00011: ara_req.op = VFCVTFX; + 5'b00110: ara_req.op = VFCVTRTZXUF; + 5'b00111: ara_req.op = VFCVTRTZXF; 5'b01000: begin // Widening VFCVTXUF - ara_req_d.op = VFCVTXUF; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTXUF; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b01001: begin // Widening VFCVTXF - ara_req_d.op = VFCVTXF; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTXF; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b01010: begin // Widening VFCVTFXU - ara_req_d.op = VFCVTFXU; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTFXU; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b01011: begin // Widening VFCVTFX - ara_req_d.op = VFCVTFX; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTFX; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b01100: begin // Widening VFCVTFF - ara_req_d.op = VFCVTFF; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTFF; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b01110: begin // Widening VFCVTRTZXUF - ara_req_d.op = VFCVTRTZXUF; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTRTZXUF; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b01111: begin // Widening VFCVTRTZXF - ara_req_d.op = VFCVTRTZXF; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTRTZXF; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b10000: begin // Narrowing VFCVTXUF - ara_req_d.op = VFCVTXUF; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = VFCVTXUF; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end 5'b10001: begin // Narrowing VFCVTXF - ara_req_d.op = VFCVTXF; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = VFCVTXF; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end 5'b10010: begin // Narrowing VFCVTFXU - ara_req_d.op = VFCVTFXU; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = VFCVTFXU; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end 5'b10011: begin // Narrowing VFCVTFX - ara_req_d.op = VFCVTFX; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = VFCVTFX; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end 5'b10100: begin // Narrowing VFCVTFF - ara_req_d.op = VFCVTFF; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = VFCVTFF; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end 5'b10101: begin // Narrowing VFNCVTRODFF - ara_req_d.op = VFNCVTRODFF; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = VFNCVTRODFF; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end 5'b10110: begin // Narrowing VFCVTRTZXUF - ara_req_d.op = VFCVTRTZXUF; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = VFCVTRTZXUF; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end 5'b10111: begin // Narrowing VFCVTRTZXF - ara_req_d.op = VFCVTRTZXF; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.op = VFCVTRTZXF; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); end default: begin // Trigger an error @@ -2085,156 +2121,156 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end 6'b010011: begin // VFUNARY1 // These instructions do not use vs1 - ara_req_d.use_vs1 = 1'b0; + ara_req.use_vs1 = 1'b0; skip_vs1_lmul_checks = 1'b1; unique case (insn.varith_type.rs1) - 5'b00000: ara_req_d.op = ara_pkg::VFSQRT; - 5'b00100: ara_req_d.op = ara_pkg::VFRSQRT7; - 5'b00101: ara_req_d.op = ara_pkg::VFREC7; - 5'b10000: ara_req_d.op = ara_pkg::VFCLASS; + 5'b00000: ara_req.op = ara_pkg::VFSQRT; + 5'b00100: ara_req.op = ara_pkg::VFRSQRT7; + 5'b00101: ara_req.op = ara_pkg::VFREC7; + 5'b10000: ara_req.op = ara_pkg::VFCLASS; default : illegal_insn = 1'b1; endcase end - 6'b100000: ara_req_d.op = ara_pkg::VFDIV; - 6'b100100: ara_req_d.op = ara_pkg::VFMUL; + 6'b100000: ara_req.op = ara_pkg::VFDIV; + 6'b100100: ara_req.op = ara_pkg::VFMUL; 6'b101000: begin - ara_req_d.op = ara_pkg::VFMADD; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMADD; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101001: begin - ara_req_d.op = ara_pkg::VFNMADD; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMADD; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101010: begin - ara_req_d.op = ara_pkg::VFMSUB; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMSUB; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101011: begin - ara_req_d.op = ara_pkg::VFNMSUB; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMSUB; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101100: begin - ara_req_d.op = ara_pkg::VFMACC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMACC; + ara_req.use_vd_op = 1'b1; end 6'b101101: begin - ara_req_d.op = ara_pkg::VFNMACC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMACC; + ara_req.use_vd_op = 1'b1; end 6'b101110: begin - ara_req_d.op = ara_pkg::VFMSAC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMSAC; + ara_req.use_vd_op = 1'b1; end 6'b101111: begin - ara_req_d.op = ara_pkg::VFNMSAC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMSAC; + ara_req.use_vd_op = 1'b1; end 6'b110000: begin // VFWADD - ara_req_d.op = ara_pkg::VFADD; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.op = ara_pkg::VFADD; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; end 6'b110001: begin // VFWREDUSUM - ara_req_d.op = ara_pkg::VFWREDUSUM; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.eew_vs1 = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VFWREDUSUM; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.eew_vs1 = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = resize_e'(2'b00); end 6'b110010: begin // VFWSUB - ara_req_d.op = ara_pkg::VFSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.op = ara_pkg::VFSUB; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; end 6'b110011: begin // VFWREDOSUM - ara_req_d.op = ara_pkg::VFWREDOSUM; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.eew_vs1 = csr_vtype_q.vsew.next(); - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VFWREDOSUM; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.eew_vs1 = csr_vtype_q.vsew.next(); + ara_req.cvt_resize = resize_e'(2'b00); end 6'b110100: begin // VFWADD.W - ara_req_d.op = ara_pkg::VFADD; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.op = ara_pkg::VFADD; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; end 6'b110110: begin // VFWSUB.W - ara_req_d.op = ara_pkg::VFSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.op = ara_pkg::VFSUB; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; end 6'b111000: begin // VFWMUL - ara_req_d.op = ara_pkg::VFMUL; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.op = ara_pkg::VFMUL; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; end 6'b111100: begin // VFWMACC - ara_req_d.op = ara_pkg::VFMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); end 6'b111101: begin // VFWNMACC - ara_req_d.op = ara_pkg::VFNMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFNMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); end 6'b111110: begin // VFWMSAC - ara_req_d.op = ara_pkg::VFMSAC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFMSAC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); end 6'b111111: begin // VFWNMSAC - ara_req_d.op = ara_pkg::VFNMSAC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFNMSAC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); end default: illegal_insn = 1'b1; endcase @@ -2243,7 +2279,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // can access. The constraints can be different for the two source operands and the // destination register. if (!skip_lmul_checks) begin - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2 : if ((insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4 : if ((insn.varith_type.rd & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8 : if ((insn.varith_type.rd & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -2271,20 +2307,20 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Ara can support 16-bit float, 32-bit float, 64-bit float. // Ara cannot support instructions who operates on more than 64 bits. unique case (FPUSupport) - FPUSupportHalfSingleDouble: if (int'(ara_req_d.vtype.vsew) < int'(EW16) || - int'(ara_req_d.vtype.vsew) > int'(EW64) || int'(ara_req_d.eew_vs2) > int'(EW64)) + FPUSupportHalfSingleDouble: if (int'(ara_req.vtype.vsew) < int'(EW16) || + int'(ara_req.vtype.vsew) > int'(EW64) || int'(ara_req.eew_vs2) > int'(EW64)) illegal_insn = 1'b1; - FPUSupportHalfSingle: if (int'(ara_req_d.vtype.vsew) < int'(EW16) || - int'(ara_req_d.vtype.vsew) > int'(EW32) || int'(ara_req_d.eew_vs2) > int'(EW32)) + FPUSupportHalfSingle: if (int'(ara_req.vtype.vsew) < int'(EW16) || + int'(ara_req.vtype.vsew) > int'(EW32) || int'(ara_req.eew_vs2) > int'(EW32)) illegal_insn = 1'b1; - FPUSupportSingleDouble: if (int'(ara_req_d.vtype.vsew) < int'(EW32) || - int'(ara_req_d.vtype.vsew) > int'(EW64) || int'(ara_req_d.eew_vs2) > int'(EW64)) + FPUSupportSingleDouble: if (int'(ara_req.vtype.vsew) < int'(EW32) || + int'(ara_req.vtype.vsew) > int'(EW64) || int'(ara_req.eew_vs2) > int'(EW64)) illegal_insn = 1'b1; - FPUSupportHalf: if (int'(ara_req_d.vtype.vsew) != int'(EW16) || int'(ara_req_d.eew_vs2) > int'(EW16)) + FPUSupportHalf: if (int'(ara_req.vtype.vsew) != int'(EW16) || int'(ara_req.eew_vs2) > int'(EW16)) illegal_insn = 1'b1; - FPUSupportSingle: if (int'(ara_req_d.vtype.vsew) != int'(EW32) || int'(ara_req_d.eew_vs2) > int'(EW32)) + FPUSupportSingle: if (int'(ara_req.vtype.vsew) != int'(EW32) || int'(ara_req.eew_vs2) > int'(EW32)) illegal_insn = 1'b1; - FPUSupportDouble: if (int'(ara_req_d.vtype.vsew) != int'(EW64) || int'(ara_req_d.eew_vs2) > int'(EW64)) + FPUSupportDouble: if (int'(ara_req.vtype.vsew) != int'(EW64) || int'(ara_req.eew_vs2) > int'(EW64)) illegal_insn = 1'b1; default: illegal_insn = 1'b1; // Unsupported configuration endcase @@ -2297,205 +2333,205 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( OPFVF: begin: opfvf if (FPUSupport != FPUSupportNone) begin // These generate a request to Ara's backend - ara_req_d.scalar_op = acc_req_i.rs1; - ara_req_d.use_scalar_op = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; - ara_req_d.is_stride_np2 = is_stride_np2; - ara_req_d.fp_rm = acc_req_i.frm; + ara_req.scalar_op = acc_req_i.rs1; + ara_req.use_scalar_op = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; + ara_req.is_stride_np2 = is_stride_np2; + ara_req.fp_rm = acc_req_i.frm; ara_req_valid_d = 1'b1; // Decode based on the func6 field unique case (insn.varith_type.func6) 6'b000000: begin - ara_req_d.op = ara_pkg::VFADD; + ara_req.op = ara_pkg::VFADD; // When performing a floating-point add/sub, fpnew adds the second and the third // operand // So, send the first operand (vs2) to the third result queue - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b000010: begin - ara_req_d.op = ara_pkg::VFSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; - end - 6'b000100: ara_req_d.op = ara_pkg::VFMIN; - 6'b000110: ara_req_d.op = ara_pkg::VFMAX; - 6'b001000: ara_req_d.op = ara_pkg::VFSGNJ; - 6'b001001: ara_req_d.op = ara_pkg::VFSGNJN; - 6'b001010: ara_req_d.op = ara_pkg::VFSGNJX; + ara_req.op = ara_pkg::VFSUB; + ara_req.swap_vs2_vd_op = 1'b1; + end + 6'b000100: ara_req.op = ara_pkg::VFMIN; + 6'b000110: ara_req.op = ara_pkg::VFMAX; + 6'b001000: ara_req.op = ara_pkg::VFSGNJ; + 6'b001001: ara_req.op = ara_pkg::VFSGNJN; + 6'b001010: ara_req.op = ara_pkg::VFSGNJX; 6'b001110: begin // vfslide1up - ara_req_d.op = ara_pkg::VSLIDEUP; - ara_req_d.stride = 1; - ara_req_d.eew_vs2 = csr_vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEUP; + ara_req.stride = 1; + ara_req.eew_vs2 = csr_vtype_q.vsew; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; // If stride > vl, the vslideup has no effects - if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(csr_vl_q)] || - (vlen_t'(ara_req_d.stride) >= csr_vl_q)) null_vslideup = 1'b1; + if (|ara_req.stride[$bits(ara_req.stride)-1:$bits(csr_vl_q)] || + (vlen_t'(ara_req.stride) >= csr_vl_q)) null_vslideup = 1'b1; end 6'b001111: begin // vfslide1down - ara_req_d.op = ara_pkg::VSLIDEDOWN; - ara_req_d.stride = 1; - ara_req_d.eew_vs2 = csr_vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEDOWN; + ara_req.stride = 1; + ara_req.eew_vs2 = csr_vtype_q.vsew; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; end 6'b010000: begin // VRFUNARY0 // vmv.s.f - ara_req_d.op = ara_pkg::VFMVSF; - ara_req_d.use_vs2 = 1'b0; - ara_req_d.vl = |csr_vl_q ? 1 : '0; + ara_req.op = ara_pkg::VFMVSF; + ara_req.use_vs2 = 1'b0; + ara_req.vl = |csr_vl_q ? 1 : '0; // This instruction ignores LMUL checks skip_lmul_checks = 1'b1; end - 6'b010111: ara_req_d.op = ara_pkg::VMERGE; - 6'b011000: ara_req_d.op = ara_pkg::VMFEQ; - 6'b011001: ara_req_d.op = ara_pkg::VMFLE; - 6'b011011: ara_req_d.op = ara_pkg::VMFLT; - 6'b011100: ara_req_d.op = ara_pkg::VMFNE; - 6'b011101: ara_req_d.op = ara_pkg::VMFGT; - 6'b011111: ara_req_d.op = ara_pkg::VMFGE; - 6'b100100: ara_req_d.op = ara_pkg::VFMUL; - 6'b100000: ara_req_d.op = ara_pkg::VFDIV; - 6'b100001: ara_req_d.op = ara_pkg::VFRDIV; + 6'b010111: ara_req.op = ara_pkg::VMERGE; + 6'b011000: ara_req.op = ara_pkg::VMFEQ; + 6'b011001: ara_req.op = ara_pkg::VMFLE; + 6'b011011: ara_req.op = ara_pkg::VMFLT; + 6'b011100: ara_req.op = ara_pkg::VMFNE; + 6'b011101: ara_req.op = ara_pkg::VMFGT; + 6'b011111: ara_req.op = ara_pkg::VMFGE; + 6'b100100: ara_req.op = ara_pkg::VFMUL; + 6'b100000: ara_req.op = ara_pkg::VFDIV; + 6'b100001: ara_req.op = ara_pkg::VFRDIV; 6'b100111: begin - ara_req_d.op = ara_pkg::VFRSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.op = ara_pkg::VFRSUB; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101000: begin - ara_req_d.op = ara_pkg::VFMADD; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMADD; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101001: begin - ara_req_d.op = ara_pkg::VFNMADD; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMADD; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101010: begin - ara_req_d.op = ara_pkg::VFMSUB; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMSUB; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101011: begin - ara_req_d.op = ara_pkg::VFNMSUB; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMSUB; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101100: begin - ara_req_d.op = ara_pkg::VFMACC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMACC; + ara_req.use_vd_op = 1'b1; end 6'b101101: begin - ara_req_d.op = ara_pkg::VFNMACC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMACC; + ara_req.use_vd_op = 1'b1; end 6'b101110: begin - ara_req_d.op = ara_pkg::VFMSAC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMSAC; + ara_req.use_vd_op = 1'b1; end 6'b101111: begin - ara_req_d.op = ara_pkg::VFNMSAC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMSAC; + ara_req.use_vd_op = 1'b1; end 6'b110000: begin // VFWADD - ara_req_d.op = ara_pkg::VFADD; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; + ara_req.op = ara_pkg::VFADD; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; end 6'b110010: begin // VFWSUB - ara_req_d.op = ara_pkg::VFSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; + ara_req.op = ara_pkg::VFSUB; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; end 6'b110100: begin // VFWADD.W - ara_req_d.op = ara_pkg::VFADD; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.op = ara_pkg::VFADD; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); - ara_req_d.wide_fp_imm = 1'b1; + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.wide_fp_imm = 1'b1; end 6'b110110: begin // VFWSUB.W - ara_req_d.op = ara_pkg::VFSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.op = ara_pkg::VFSUB; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); lmul_vs2 = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.eew_vs2 = csr_vtype_q.vsew.next(); - ara_req_d.wide_fp_imm = 1'b1; + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.eew_vs2 = csr_vtype_q.vsew.next(); + ara_req.wide_fp_imm = 1'b1; end 6'b111000: begin // VFWMUL - ara_req_d.op = ara_pkg::VFMUL; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; + ara_req.op = ara_pkg::VFMUL; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; end 6'b111100: begin // VFWMACC - ara_req_d.op = ara_pkg::VFMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); end 6'b111101: begin // VFWNMACC - ara_req_d.op = ara_pkg::VFNMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFNMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); end 6'b111110: begin // VFWMSAC - ara_req_d.op = ara_pkg::VFMSAC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFMSAC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); end 6'b111111: begin // VFWNMSAC - ara_req_d.op = ara_pkg::VFNMSAC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(csr_vtype_q.vlmul); - ara_req_d.vtype.vsew = csr_vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; - ara_req_d.eew_vd_op = csr_vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFNMSAC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(csr_vtype_q.vlmul); + ara_req.vtype.vsew = csr_vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; + ara_req.eew_vd_op = csr_vtype_q.vsew.next(); end default: illegal_insn = 1'b1; endcase // Check if the FP scalar operand is NaN-boxed. If not, replace it with a NaN. case (csr_vtype_q.vsew) - EW16: if (~(&acc_req_i.rs1[63:16])) ara_req_d.scalar_op = 64'h0000000000007e00; - EW32: if (~(&acc_req_i.rs1[63:32])) ara_req_d.scalar_op = 64'h000000007fc00000; + EW16: if (~(&acc_req_i.rs1[63:16])) ara_req.scalar_op = 64'h0000000000007e00; + EW32: if (~(&acc_req_i.rs1[63:32])) ara_req.scalar_op = 64'h000000007fc00000; endcase // Instructions with an integer LMUL have extra constraints on the registers they // can access. The constraints can be different for the two source operands and the // destination register. if (!skip_lmul_checks) begin - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2 : if ((insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4 : if ((insn.varith_type.rd & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8 : if ((insn.varith_type.rd & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -2514,16 +2550,16 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Ara can support 16-bit float, 32-bit float, 64-bit float. // Ara cannot support instructions who operates on more than 64 bits. unique case (FPUSupport) - FPUSupportHalfSingleDouble: if (int'(ara_req_d.vtype.vsew) < int'(EW16) || - int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; - FPUSupportHalfSingle: if (int'(ara_req_d.vtype.vsew) < int'(EW16) || - int'(ara_req_d.vtype.vsew) > int'(EW32)) illegal_insn = 1'b1; - FPUSupportSingleDouble: if (int'(ara_req_d.vtype.vsew) < int'(EW32) || - int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; - FPUSupportHalf: if (int'(ara_req_d.vtype.vsew) != int'(EW16)) illegal_insn = 1'b1; - FPUSupportSingle: if (int'(ara_req_d.vtype.vsew) != int'(EW32)) + FPUSupportHalfSingleDouble: if (int'(ara_req.vtype.vsew) < int'(EW16) || + int'(ara_req.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; + FPUSupportHalfSingle: if (int'(ara_req.vtype.vsew) < int'(EW16) || + int'(ara_req.vtype.vsew) > int'(EW32)) illegal_insn = 1'b1; + FPUSupportSingleDouble: if (int'(ara_req.vtype.vsew) < int'(EW32) || + int'(ara_req.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; + FPUSupportHalf: if (int'(ara_req.vtype.vsew) != int'(EW16)) illegal_insn = 1'b1; + FPUSupportSingle: if (int'(ara_req.vtype.vsew) != int'(EW32)) illegal_insn = 1'b1; - FPUSupportDouble: if (int'(ara_req_d.vtype.vsew) != int'(EW64)) + FPUSupportDouble: if (int'(ara_req.vtype.vsew) != int'(EW64)) illegal_insn = 1'b1; default: illegal_insn = 1'b1; // Unsupported configuration endcase @@ -2550,10 +2586,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( acc_resp_o.req_ready = 1'b0; // These generate a request to Ara's backend - ara_req_d.vd = insn.vmem_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.vmem_type.vm; - ara_req_d.scalar_op = acc_req_i.rs1; + ara_req.vd = insn.vmem_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.vmem_type.vm; + ara_req.scalar_op = acc_req_i.rs1; + ara_req.nf = insn.vmem_type.nf; ara_req_valid_d = 1'b1; // Decode the element width @@ -2561,34 +2598,34 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( unique case ({insn.vmem_type.mew, insn.vmem_type.width}) 4'b0000: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW8; + ara_req.vtype.vsew = EW8; end else begin - ara_req_d.vtype.vsew = csr_vtype_q.vsew; - ara_req_d.eew_vs2 = EW8; + ara_req.vtype.vsew = csr_vtype_q.vsew; + ara_req.eew_vs2 = EW8; end end 4'b0101: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW16; + ara_req.vtype.vsew = EW16; end else begin - ara_req_d.vtype.vsew = csr_vtype_q.vsew; - ara_req_d.eew_vs2 = EW16; + ara_req.vtype.vsew = csr_vtype_q.vsew; + ara_req.eew_vs2 = EW16; end end 4'b0110: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW32; + ara_req.vtype.vsew = EW32; end else begin - ara_req_d.vtype.vsew = csr_vtype_q.vsew; - ara_req_d.eew_vs2 = EW32; + ara_req.vtype.vsew = csr_vtype_q.vsew; + ara_req.eew_vs2 = EW32; end end 4'b0111: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW64; + ara_req.vtype.vsew = EW64; end else begin - ara_req_d.vtype.vsew = csr_vtype_q.vsew; - ara_req_d.eew_vs2 = EW64; + ara_req.vtype.vsew = csr_vtype_q.vsew; + ara_req.eew_vs2 = EW64; end end default: begin // Invalid. Element is too wide, or encoding is non-existant. @@ -2602,7 +2639,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Decode the addressing mode unique case (insn.vmem_type.mop) 2'b00: begin - ara_req_d.op = VLE; + ara_req.op = VLE; // Decode the lumop field case (insn.vmem_type.rs2) @@ -2610,8 +2647,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( 5'b01000:; // Unit-strided, whole registers 5'b01011: begin // Unit-strided, mask load, EEW=1 // We operate ceil(vl/8) bytes - ara_req_d.vl = (csr_vl_q >> 3) + |csr_vl_q[2:0]; - ara_req_d.vtype.vsew = EW8; + ara_req.vl = (csr_vl_q >> 3) + |csr_vl_q[2:0]; + ara_req.vtype.vsew = EW8; end 5'b10000: begin // Unit-strided, fault-only first // TODO: Not implemented @@ -2623,36 +2660,36 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 2'b10: begin - ara_req_d.op = VLSE; - ara_req_d.stride = acc_req_i.rs2; + ara_req.op = VLSE; + ara_req.stride = acc_req_i.rs2; end 2'b01, // Indexed-unordered 2'b11: begin // Indexed-ordered - ara_req_d.op = VLXE; + ara_req.op = VLXE; // These also read vs2 - ara_req_d.vs2 = insn.vmem_type.rs2; - ara_req_d.use_vs2 = 1'b1; + ara_req.vs2 = insn.vmem_type.rs2; + ara_req.use_vs2 = 1'b1; end default:; endcase // For memory operations: EMUL = LMUL * (EEW / SEW) // EEW is encoded in the instruction - ara_req_d.emul = vlmul_e'(csr_vtype_q.vlmul + (ara_req_d.vtype.vsew - csr_vtype_q.vsew)); + ara_req.emul = vlmul_e'(csr_vtype_q.vlmul + (ara_req.vtype.vsew - csr_vtype_q.vsew)); // Exception if EMUL > 8 or < 1/8 - unique case ({csr_vtype_q.vlmul[2], ara_req_d.emul[2]}) + unique case ({csr_vtype_q.vlmul[2], ara_req.emul[2]}) // The new emul is lower than the previous lmul 2'b01: begin // But the new eew is greater than vsew - if (signed'(ara_req_d.vtype.vsew - csr_vtype_q.vsew) > 0) begin + if (signed'(ara_req.vtype.vsew - csr_vtype_q.vsew) > 0) begin illegal_insn_load = 1'b1; end end // The new emul is greater than the previous lmul 2'b10: begin // But the new eew is lower than vsew - if (signed'(ara_req_d.vtype.vsew - csr_vtype_q.vsew) < 0) begin + if (signed'(ara_req.vtype.vsew - csr_vtype_q.vsew) < 0) begin illegal_insn_load = 1'b1; end end @@ -2661,7 +2698,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Instructions with an integer LMUL have extra constraints on the registers they can // access. - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rd & 5'b00001) != 5'b00000) begin illegal_insn_load = 1'b1; end @@ -2677,8 +2714,35 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( default:; endcase + // Check for segment loads + if (ara_req.nf != 3'b000) begin + // This is a segment load instruction + is_segment_mem_op = 1'b1; + // Wait for idle not to mess with load/store_complete_i + // since the segment sequencer filters these signals + if (!segment_micro_op_on) state_d = WAIT_IDLE; + // Check that EMUL * NFIELDS <= 8 + if (!ara_req.emul[2]) begin + // emul >= 1 + if ((ara_req.nf << ara_req.emul[1:0]) > 8) + illegal_insn = 1'b1; + end else begin + // emul < 1 + if ((ara_req.nf >> ara_req.emul[1:0]) > 8) + illegal_insn = 1'b1; + end + // Check if we will not access vector regs past 31 + if (!ara_req.emul[2]) begin + if ((ara_req.vd + (ara_req.nf << ara_req.emul[1:0])) > 5'b11111) + illegal_insn = 1'b1; + end else begin + if ((ara_req.vd + ara_req.nf) > 5'b11111) + illegal_insn = 1'b1; + end + end + // Vector whole register loads overwrite all the other decoding information. - if (ara_req_d.op == VLE && insn.vmem_type.rs2 == 5'b01000) begin + if (ara_req.op == VLE && insn.vmem_type.rs2 == 5'b01000) begin // Execute also if vl == 0 ignore_zero_vl_check = 1'b1; // The LMUL value is kept in the instruction itself @@ -2686,23 +2750,23 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( ara_req_valid_d = 1'b1; // Maximum vector length. VLMAX = nf * VLEN / EW8. - ara_req_d.vtype.vsew = EW8; + ara_req.vtype.vsew = EW8; unique case (insn.vmem_type.nf) 3'd0: begin - ara_req_d.vl = VLENB << 0; - ara_req_d.emul = LMUL_1; + ara_req.vl = VLENB << 0; + ara_req.emul = LMUL_1; end 3'd1: begin - ara_req_d.vl = VLENB << 1; - ara_req_d.emul = LMUL_2; + ara_req.vl = VLENB << 1; + ara_req.emul = LMUL_2; end 3'd3: begin - ara_req_d.vl = VLENB << 2; - ara_req_d.emul = LMUL_4; + ara_req.vl = VLENB << 2; + ara_req.emul = LMUL_4; end 3'd7: begin - ara_req_d.vl = VLENB << 3; - ara_req_d.emul = LMUL_8; + ara_req.vl = VLENB << 3; + ara_req.emul = LMUL_8; end default: begin // Trigger an error for the reserved simm values @@ -2712,14 +2776,14 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end // Wait until the back-end answers to acknowledge those instructions - if ( ara_resp_valid_i ) begin + if ( ara_resp_valid ) begin acc_resp_o.req_ready = 1'b1; acc_resp_o.resp_valid = 1'b1; - acc_resp_o.exception = ara_resp_i.exception; + acc_resp_o.exception = ara_resp.exception; ara_req_valid_d = 1'b0; // In case of exception, modify vstart - if ( ara_resp_i.exception.valid ) begin - csr_vstart_d = ara_resp_i.exception_vstart; + if ( ara_resp.exception.valid ) begin + csr_vstart_d = ara_resp.exception_vstart; end end end @@ -2729,8 +2793,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( ///////////////////// // Vector stores encode: - // - The target EEW in ara_req_d.vtype.vsew - // - The EEW of the source register in ara_req_d.eew_vs1 + // - The target EEW in ara_req.vtype.vsew + // - The EEW of the source register in ara_req.eew_vs1 // The current vector length refers to the target EEW! // Vector stores never re-shuffle the source register! @@ -2748,14 +2812,15 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Ara does not reshuffle source vregs upon vector stores, // thus the operand requesters will fetch Bytes referring // to the encoding of the source register - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; // These generate a request to Ara's backend - ara_req_d.vs1 = insn.vmem_type.rd; // vs3 is encoded in the same position as rd - ara_req_d.use_vs1 = 1'b1; - ara_req_d.old_eew_vs1 = eew_q[insn.vmem_type.rd]; // This is the old vs1 EEW; - ara_req_d.vm = insn.vmem_type.vm; - ara_req_d.scalar_op = acc_req_i.rs1; + ara_req.vs1 = insn.vmem_type.rd; // vs3 is encoded in the same position as rd + ara_req.use_vs1 = 1'b1; + ara_req.old_eew_vs1 = eew_q[insn.vmem_type.rd]; // This is the old vs1 EEW; + ara_req.vm = insn.vmem_type.vm; + ara_req.scalar_op = acc_req_i.rs1; + ara_req.nf = insn.vmem_type.nf; ara_req_valid_d = 1'b1; // Decode the element width @@ -2763,34 +2828,34 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( unique case ({insn.vmem_type.mew, insn.vmem_type.width}) 4'b0000: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW8; // ara_req_d.vtype.vsew is the target EEW! + ara_req.vtype.vsew = EW8; // ara_req.vtype.vsew is the target EEW! end else begin - ara_req_d.vtype.vsew = csr_vtype_q.vsew; - ara_req_d.eew_vs2 = EW8; + ara_req.vtype.vsew = csr_vtype_q.vsew; + ara_req.eew_vs2 = EW8; end end 4'b0101: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW16; + ara_req.vtype.vsew = EW16; end else begin - ara_req_d.vtype.vsew = csr_vtype_q.vsew; - ara_req_d.eew_vs2 = EW16; + ara_req.vtype.vsew = csr_vtype_q.vsew; + ara_req.eew_vs2 = EW16; end end 4'b0110: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW32; + ara_req.vtype.vsew = EW32; end else begin - ara_req_d.vtype.vsew = csr_vtype_q.vsew; - ara_req_d.eew_vs2 = EW32; + ara_req.vtype.vsew = csr_vtype_q.vsew; + ara_req.eew_vs2 = EW32; end end 4'b0111: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW64; + ara_req.vtype.vsew = EW64; end else begin - ara_req_d.vtype.vsew = csr_vtype_q.vsew; - ara_req_d.eew_vs2 = EW64; + ara_req.vtype.vsew = csr_vtype_q.vsew; + ara_req.eew_vs2 = EW64; end end default: begin // Invalid. Element is too wide, or encoding is non-existant. @@ -2801,7 +2866,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Decode the addressing mode unique case (insn.vmem_type.mop) 2'b00: begin - ara_req_d.op = VSE; + ara_req.op = VSE; // Decode the sumop field unique case (insn.vmem_type.rs2) @@ -2809,8 +2874,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( 5'b01000:; // Unit-strided, whole registers 5'b01011: begin // Unit-strided, mask store, EEW=1 // We operate ceil(vl/8) bytes - ara_req_d.vl = (csr_vl_q >> 3) + |csr_vl_q[2:0]; - ara_req_d.vtype.vsew = EW8; + ara_req.vl = (csr_vl_q >> 3) + |csr_vl_q[2:0]; + ara_req.vtype.vsew = EW8; end default: begin // Reserved illegal_insn_store = 1'b1; @@ -2818,36 +2883,36 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 2'b10: begin - ara_req_d.op = VSSE; - ara_req_d.stride = acc_req_i.rs2; + ara_req.op = VSSE; + ara_req.stride = acc_req_i.rs2; end 2'b01, // Indexed-unordered 2'b11: begin // Indexed-orderd - ara_req_d.op = VSXE; + ara_req.op = VSXE; // These also read vs2 - ara_req_d.vs2 = insn.vmem_type.rs2; - ara_req_d.use_vs2 = 1'b1; + ara_req.vs2 = insn.vmem_type.rs2; + ara_req.use_vs2 = 1'b1; end default:; endcase // For memory operations: EMUL = LMUL * (EEW / SEW) // EEW is encoded in the instruction - ara_req_d.emul = vlmul_e'(csr_vtype_q.vlmul + (ara_req_d.vtype.vsew - csr_vtype_q.vsew)); + ara_req.emul = vlmul_e'(csr_vtype_q.vlmul + (ara_req.vtype.vsew - csr_vtype_q.vsew)); // Exception if EMUL > 8 or < 1/8 - unique case ({csr_vtype_q.vlmul[2], ara_req_d.emul[2]}) + unique case ({csr_vtype_q.vlmul[2], ara_req.emul[2]}) // The new emul is lower than the previous lmul 2'b01: begin // But the new eew is greater than vsew - if (signed'(ara_req_d.vtype.vsew - csr_vtype_q.vsew) > 0) begin + if (signed'(ara_req.vtype.vsew - csr_vtype_q.vsew) > 0) begin illegal_insn_store = 1'b1; end end // The new emul is greater than the previous lmul 2'b10: begin // But the new eew is lower than vsew - if (signed'(ara_req_d.vtype.vsew - csr_vtype_q.vsew) < 0) begin + if (signed'(ara_req.vtype.vsew - csr_vtype_q.vsew) < 0) begin illegal_insn_store = 1'b1; end end @@ -2856,7 +2921,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Instructions with an integer LMUL have extra constraints on the registers they can // access. - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rd & 5'b00001) != 5'b00000) begin illegal_insn_store = 1'b1; end @@ -2872,31 +2937,58 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( default:; endcase + // Check for segment stores + if (ara_req.nf != 3'b000) begin + // This is a segment store instruction + is_segment_mem_op = 1'b1; + // Wait for idle not to mess with load/store_complete_i + // since the segment sequencer filters these signals + if (!segment_micro_op_on) state_d = WAIT_IDLE; + // Check that EMUL * NFIELDS <= 8 + if (!ara_req.emul[2]) begin + // emul >= 1 + if ((ara_req.nf << ara_req.emul[1:0]) > 8) + illegal_insn = 1'b1; + end else begin + // emul < 1 + if ((ara_req.nf >> ara_req.emul[1:0]) > 8) + illegal_insn = 1'b1; + end + // Check if we will not access vector regs past 31 + if (!ara_req.emul[2]) begin + if ((ara_req.vd + (ara_req.nf << ara_req.emul[1:0])) > 5'b11111) + illegal_insn = 1'b1; + end else begin + if ((ara_req.vd + ara_req.nf) > 5'b11111) + illegal_insn = 1'b1; + end + end + // Vector whole register stores are encoded as stores of length VLENB, length // multiplier LMUL_1 and element width EW8. They overwrite all this decoding. - if (ara_req_d.op == VSE && insn.vmem_type.rs2 == 5'b01000) begin + if (ara_req.op == VSE && insn.vmem_type.rs2 == 5'b01000) begin // Execute also if vl == 0 ignore_zero_vl_check = 1'b1; illegal_insn_store = 1'b0; // Maximum vector length. VLMAX = nf * VLEN / EW8. - ara_req_d.vtype.vsew = EW8; + ara_req.vtype.vsew = EW8; unique case (insn.vmem_type.nf) 3'd0: begin - ara_req_d.vl = VLENB << 0; - ara_req_d.emul = LMUL_1; + ara_req.vl = VLENB << 0; + ara_req.emul = LMUL_1; end 3'd1: begin - ara_req_d.vl = VLENB << 1; - ara_req_d.emul = LMUL_2; + ara_req.vl = VLENB << 1; + ara_req.emul = LMUL_2; end 3'd3: begin - ara_req_d.vl = VLENB << 2; - ara_req_d.emul = LMUL_4; + ara_req.vl = VLENB << 2; + ara_req.emul = LMUL_4; end 3'd7: begin - ara_req_d.vl = VLENB << 3; - ara_req_d.emul = LMUL_8; + ara_req.vl = VLENB << 3; + ara_req.emul = LMUL_8; end default: begin // Trigger an error for the reserved simm values @@ -2910,19 +3002,19 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end // Wait until the back-end answers to acknowledge those instructions - if ( ara_resp_valid_i ) begin + if ( ara_resp_valid ) begin acc_resp_o.req_ready = 1'b1; acc_resp_o.resp_valid = 1'b1; - acc_resp_o.exception = ara_resp_i.exception; + acc_resp_o.exception = ara_resp.exception; ara_req_valid_d = 1'b0; // In case of exception, modify vstart and wait until the previous // operations are over - if ( ara_resp_i.exception.valid ) begin - csr_vstart_d = ara_resp_i.exception_vstart; + if ( ara_resp.exception.valid ) begin + csr_vstart_d = ara_resp.exception_vstart; state_d = WAIT_IDLE; end end - ara_req_d.eew_vs1 = ara_req_d.vtype.vsew; // This is the new vs1 EEW + ara_req.eew_vs1 = ara_req.vtype.vsew; // This is the new vs1 EEW end //////////////////////////// @@ -3170,11 +3262,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Check that we have fixed-point support if requested // vxsat and vxrm are always accessible anyway - if (ara_req_valid_d && (ara_req_d.op inside {[VSADDU:VNCLIPU], VSMUL}) && (FixPtSupport == FixedPointDisable)) + if (ara_req_valid_d && (ara_req.op inside {[VSADDU:VNCLIPU], VSMUL}) && (FixPtSupport == FixedPointDisable)) illegal_insn = 1'b1; // Check that we have we have vfrec7, vfrsqrt7 - if (ara_req_valid_d && (ara_req_d.op inside {VFREC7, VFRSQRT7}) && (FPExtSupport == FPExtSupportDisable)) + if (ara_req_valid_d && (ara_req.op inside {VFREC7, VFRSQRT7}) && (FPExtSupport == FPExtSupportDisable)) illegal_insn = 1'b1; // Raise an illegal instruction exception @@ -3193,15 +3285,15 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( automatic rvv_instruction_t insn = rvv_instruction_t'(acc_req_i.insn.instr); // Is the instruction an in-lane one and could it be subject to reshuffling? - in_lane_op = ara_req_d.op inside {[VADD:VMERGE]} || ara_req_d.op inside {[VREDSUM:VMSBC]} || - ara_req_d.op inside {[VMANDNOT:VMXNOR]} || ara_req_d.op inside {VSLIDEUP, VSLIDEDOWN}; + in_lane_op = ara_req.op inside {[VADD:VMERGE]} || ara_req.op inside {[VREDSUM:VMSBC]} || + ara_req.op inside {[VMANDNOT:VMXNOR]} || ara_req.op inside {VSLIDEUP, VSLIDEDOWN}; // Annotate which registers need a reshuffle -> |vs1|vs2|vd| // Optimization: reshuffle vs1 and vs2 only if the operation is strictly in-lane // Optimization: reshuffle vd only if we are not overwriting the whole vector register! // During a vstore, if vstart > 0, reshuffle immediately not to complicate operand fetch stage - reshuffle_req_d = {ara_req_d.use_vs1 && (ara_req_d.eew_vs1 != eew_q[ara_req_d.vs1]) && eew_valid_q[ara_req_d.vs1] && (in_lane_op || (is_vstore && (csr_vstart_q != '0))), - ara_req_d.use_vs2 && (ara_req_d.eew_vs2 != eew_q[ara_req_d.vs2]) && eew_valid_q[ara_req_d.vs2] && in_lane_op, - ara_req_d.use_vd && (ara_req_d.vtype.vsew != eew_q[ara_req_d.vd ]) && eew_valid_q[ara_req_d.vd ] && csr_vl_q != ((VLENB << ara_req_d.emul[1:0]) >> ara_req_d.vtype.vsew)}; + reshuffle_req_d = {ara_req.use_vs1 && (ara_req.eew_vs1 != eew_q[ara_req.vs1]) && eew_valid_q[ara_req.vs1] && (in_lane_op || (is_vstore && (csr_vstart_q != '0))), + ara_req.use_vs2 && (ara_req.eew_vs2 != eew_q[ara_req.vs2]) && eew_valid_q[ara_req.vs2] && in_lane_op, + ara_req.use_vd && (ara_req.vtype.vsew != eew_q[ara_req.vd ]) && eew_valid_q[ara_req.vd ] && csr_vl_q != ((VLENB << ara_req.emul[1:0]) >> ara_req.vtype.vsew)}; // Mask out requests if they refer to the same register! reshuffle_req_d &= { (insn.varith_type.rs1 != insn.varith_type.rs2) && (insn.varith_type.rs1 != insn.varith_type.rd), @@ -3213,17 +3305,17 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( unique casez (reshuffle_req_d) 3'b??1: begin eew_old_buffer_d = eew_q[insn.vmem_type.rd]; - eew_new_buffer_d = ara_req_d.vtype.vsew; + eew_new_buffer_d = ara_req.vtype.vsew; vs_buffer_d = insn.varith_type.rd; end 3'b?10: begin eew_old_buffer_d = eew_q[insn.vmem_type.rs2]; - eew_new_buffer_d = ara_req_d.eew_vs2; + eew_new_buffer_d = ara_req.eew_vs2; vs_buffer_d = insn.varith_type.rs2; end 3'b100: begin eew_old_buffer_d = is_vstore ? eew_q[insn.vmem_type.rd] : eew_q[insn.vmem_type.rs1]; - eew_new_buffer_d = ara_req_d.eew_vs1; + eew_new_buffer_d = ara_req.eew_vs1; vs_buffer_d = is_vstore ? insn.vmem_type.rd : insn.varith_type.rs1; end default:; @@ -3241,7 +3333,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( ara_req_valid_d = 1'b0; // Initialize the reshuffle counter limit to handle LMUL > 1 - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: rs_lmul_cnt_limit_d = 1; LMUL_4: rs_lmul_cnt_limit_d = 3; LMUL_8: rs_lmul_cnt_limit_d = 7; @@ -3249,9 +3341,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase // Save info for next reshuffles - reshuffle_eew_vs1_d = ara_req_d.eew_vs1; - reshuffle_eew_vs2_d = ara_req_d.eew_vs2; - reshuffle_eew_vd_d = ara_req_d.vtype.vsew; + reshuffle_eew_vs1_d = ara_req.eew_vs1; + reshuffle_eew_vs2_d = ara_req.eew_vs2; + reshuffle_eew_vd_d = ara_req.vtype.vsew; // Reshuffle state_d = RESHUFFLE; @@ -3259,36 +3351,36 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end // Update the EEW - if (ara_req_valid_d && ara_req_d.use_vd && ara_req_ready_i) begin - unique case (ara_req_d.emul) + if (ara_req_valid_d && ara_req.use_vd && ara_req_ready_i) begin + unique case (ara_req.emul) LMUL_1: begin for (int i = 0; i < 1; i++) begin - eew_d[ara_req_d.vd + i] = ara_req_d.vtype.vsew; - eew_valid_d[ara_req_d.vd + i] = 1'b1; + eew_d[ara_req.vd + i] = ara_req.vtype.vsew; + eew_valid_d[ara_req.vd + i] = 1'b1; end end LMUL_2: begin for (int i = 0; i < 2; i++) begin - eew_d[ara_req_d.vd + i] = ara_req_d.vtype.vsew; - eew_valid_d[ara_req_d.vd + i] = 1'b1; + eew_d[ara_req.vd + i] = ara_req.vtype.vsew; + eew_valid_d[ara_req.vd + i] = 1'b1; end end LMUL_4: begin for (int i = 0; i < 4; i++) begin - eew_d[ara_req_d.vd + i] = ara_req_d.vtype.vsew; - eew_valid_d[ara_req_d.vd + i] = 1'b1; + eew_d[ara_req.vd + i] = ara_req.vtype.vsew; + eew_valid_d[ara_req.vd + i] = 1'b1; end end LMUL_8: begin for (int i = 0; i < 8; i++) begin - eew_d[ara_req_d.vd + i] = ara_req_d.vtype.vsew; - eew_valid_d[ara_req_d.vd + i] = 1'b1; + eew_d[ara_req.vd + i] = ara_req.vtype.vsew; + eew_valid_d[ara_req.vd + i] = 1'b1; end end default: begin // EMUL < 1 for (int i = 0; i < 1; i++) begin - eew_d[ara_req_d.vd + i] = ara_req_d.vtype.vsew; - eew_valid_d[ara_req_d.vd + i] = 1'b1; + eew_d[ara_req.vd + i] = ara_req.vtype.vsew; + eew_valid_d[ara_req.vd + i] = 1'b1; end end endcase @@ -3324,7 +3416,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( acc_resp_o.store_complete = store_zero_vl | store_complete_q; // The token must change at every new instruction - ara_req_d.token = (ara_req_valid_o && ara_req_ready_i) ? ~ara_req_o.token : ara_req_o.token; + ara_req.token = (ara_req_valid_o && ara_req_ready_i) ? ~ara_req_o.token : ara_req_o.token; end: p_decoder endmodule : ara_dispatcher diff --git a/hardware/src/ara_soc.sv b/hardware/src/ara_soc.sv index c49104b80..762012a95 100644 --- a/hardware/src/ara_soc.sv +++ b/hardware/src/ara_soc.sv @@ -16,6 +16,8 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #( parameter fpext_support_e FPExtSupport = FPExtSupportEnable, // Support for fixed-point data types parameter fixpt_support_e FixPtSupport = FixedPointEnable, + // Support for segment memory operations + parameter seg_support_e SegSupport = SegSupportEnable, // AXI Interface parameter int unsigned AxiDataWidth = 32*NrLanes, parameter int unsigned AxiAddrWidth = 64, @@ -517,6 +519,7 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #( .FPUSupport (FPUSupport ), .FPExtSupport (FPExtSupport ), .FixPtSupport (FixPtSupport ), + .SegSupport (SegSupport ), .CVA6Cfg (CVA6AraConfig ), .AxiAddrWidth (AxiAddrWidth ), .AxiIdWidth (AxiCoreIdWidth ), diff --git a/hardware/src/ara_system.sv b/hardware/src/ara_system.sv index df32066bb..4fa64f6c1 100644 --- a/hardware/src/ara_system.sv +++ b/hardware/src/ara_system.sv @@ -16,6 +16,8 @@ module ara_system import axi_pkg::*; import ara_pkg::*; #( parameter fpext_support_e FPExtSupport = FPExtSupportEnable, // Support for fixed-point data types parameter fixpt_support_e FixPtSupport = FixedPointEnable, + // Support for segment memory operations + parameter seg_support_e SegSupport = SegSupportEnable, // Ariane configuration parameter config_pkg::cva6_cfg_t CVA6Cfg = cva6_config_pkg::cva6_cfg, // AXI Interface @@ -213,6 +215,7 @@ module ara_system import axi_pkg::*; import ara_pkg::*; #( .FPUSupport (FPUSupport ), .FPExtSupport(FPExtSupport ), .FixPtSupport(FixPtSupport ), + .SegSupport (SegSupport ), .AxiDataWidth(AxiWideDataWidth), .AxiAddrWidth(AxiAddrWidth ), .axi_ar_t (ara_axi_ar_t ), diff --git a/hardware/src/segment_sequencer.sv b/hardware/src/segment_sequencer.sv new file mode 100644 index 000000000..6551d624e --- /dev/null +++ b/hardware/src/segment_sequencer.sv @@ -0,0 +1,188 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Description: break down segmented memory operations into scalar +// memory operations. This is extremely bad in terms of IPC, but +// it has low-impact on the physical implementation. + +module segment_sequencer import ara_pkg::*; import rvv_pkg::*; #( + parameter bit SegSupport = 1'b0 + ) ( + // Clock and reset + input logic clk_i, + input logic rst_ni, + input logic ara_idle_i, + // Enable the segment sequencer? + input logic is_segment_mem_op_i, + input logic illegal_insn_i, + input logic is_vload_i, + output logic segment_micro_op_on_o, + input logic load_complete_i, + output logic load_complete_o, + input logic store_complete_i, + output logic store_complete_o, + // Ara frontend - backend info and handshakes + input ara_req_t ara_req_i, + output ara_req_t ara_req_o, + input logic ara_req_ready_i, + input ara_resp_t ara_resp_i, + output ara_resp_t ara_resp_o, + input logic ara_resp_valid_i, + output logic ara_resp_valid_o + ); + + import cf_math_pkg::idx_width; + + logic ara_resp_valid_d, ara_resp_valid_q; + ara_resp_t ara_resp_d, ara_resp_q; + logic is_vload_d, is_vload_q; + logic [$bits(ara_req_i.vstart):0] next_vstart_cnt; + + typedef enum logic [1:0] { + IDLE, + SEGMENT_MICRO_OPS, + SEGMENT_MICRO_OPS_END + } state_e; + state_e state_d, state_q; + + // Track the elements within each segment + logic new_seg_mem_op; + logic segment_cnt_en, segment_cnt_clear; + logic [$bits(ara_req_i.nf)-1:0] segment_cnt_q; + + counter #( + .WIDTH($bits(ara_req_i.nf)), + .STICKY_OVERFLOW(1'b0) + ) i_segment_cnt ( + .clk_i, + .rst_ni, + .clear_i(segment_cnt_clear), + .en_i(segment_cnt_en), + .load_i(1'b0), + .down_i(1'b0), + .d_i('0), + .q_o(segment_cnt_q), + .overflow_o( /* Unused */ ) + ); + assign segment_cnt_clear = new_seg_mem_op | (segment_cnt_en & (segment_cnt_q == ara_req_i.nf)); + + // Track the number of segments + logic vstart_cnt_en; + logic [$bits(ara_req_i.vstart)-1:0] vstart_cnt_q; + + counter #( + .WIDTH($bits(ara_req_i.vstart)), + .STICKY_OVERFLOW(1'b0) + ) i_vstart_cnt ( + .clk_i, + .rst_ni, + .clear_i( /* Unused */ ), + .en_i(vstart_cnt_en), + .load_i(new_seg_mem_op), + .down_i(1'b0), + .d_i(ara_req_i.vstart), + .q_o(vstart_cnt_q), + .overflow_o( /* Unused */ ) + ); + // Change destination vector index when all the fields of the segment have been processed + assign vstart_cnt_en = segment_cnt_en & (segment_cnt_q == ara_req_i.nf); + + // Next vstart count + assign next_vstart_cnt = vstart_cnt_q + 1; + + // Signal if the micro op seq is on + assign segment_micro_op_on_o = state_q != IDLE; + + always_comb begin + state_d = state_q; + + // Pass through + ara_req_o = ara_req_i; + ara_resp_o = ara_resp_i; + ara_resp_valid_o = ara_resp_valid_i; + // Block load/store_complete + load_complete_o = 1'b0; + store_complete_o = 1'b0; + + ara_resp_d = ara_resp_q; + ara_resp_valid_d = ara_resp_valid_q; + is_vload_d = is_vload_q; + + // Don't count up by default + new_seg_mem_op = 1'b0; + segment_cnt_en = 1'b0; + + // Low-perf Moore's FSM + unique case (state_q) + IDLE: begin + // Send a first micro operation upon valid segment mem op + if (is_segment_mem_op_i && !illegal_insn_i) begin + // If we are here, the backend is able to accept the request + // Set-up sequencing + new_seg_mem_op = 1'b1; + // Set up the first micro operation + ara_req_o.vl = 1; + // Start sequencing + state_d = SEGMENT_MICRO_OPS; + end + end + SEGMENT_MICRO_OPS: begin + // Manipulate the memory micro request in advance + ara_req_o.vl = 1; + ara_req_o.vstart = vstart_cnt_q; + ara_req_o.vs1 = ara_req_i.vs1 + segment_cnt_q; + ara_req_o.vd = ara_req_i.vd + segment_cnt_q; + ara_resp_valid_o = 1'b0; + + // Wait for an answer from Ara's backend + if (ara_resp_valid_i) begin + // Pass to the next field if the previous micro op finished + segment_cnt_en = 1'b1; + // If exception, stop the execution + if (ara_resp_i.error) begin + ara_resp_valid_o = ara_resp_valid_i; + // If no exception, continue with the micro ops + end else begin + // If over - stop in the next cycle + if (segment_cnt_clear && (next_vstart_cnt == ara_req_i.vl)) begin + // Sample the last answer + ara_resp_d = ara_resp_i; + ara_resp_valid_d = ara_resp_valid_i; + is_vload_d = is_vload_i; + state_d = SEGMENT_MICRO_OPS_END; + end + end + end + end + SEGMENT_MICRO_OPS_END: begin + ara_resp_valid_o = 1'b0; + // Wait for idle to give the final load/store_complete + if (ara_idle_i) begin + ara_resp_o = ara_resp_q; + ara_resp_valid_o = ara_resp_valid_q; + load_complete_o = is_vload_q; + store_complete_o = ~is_vload_q; + state_d = IDLE; + end + end + default:; + endcase + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + state_q <= IDLE; + is_vload_q <= 1'b0; + ara_resp_q <= '0; + ara_resp_valid_q <= '0; + end else begin + state_q <= state_d; + is_vload_q <= is_vload_d; + ara_resp_q <= ara_resp_d; + ara_resp_valid_q <= ara_resp_valid_d; + end + end + +endmodule