diff --git a/Bender.yml b/Bender.yml index 051313258..8f9dbdff4 100644 --- a/Bender.yml +++ b/Bender.yml @@ -27,6 +27,8 @@ sources: - hardware/include/ara_pkg.sv # Sources + # Level 0 + - hardware/src/segment_sequencer.sv # Level 1 - hardware/src/axi_to_mem.sv - hardware/src/ctrl_registers.sv diff --git a/hardware/include/ara_pkg.sv b/hardware/include/ara_pkg.sv index 014b00473..649eaf8ef 100644 --- a/hardware/include/ara_pkg.sv +++ b/hardware/include/ara_pkg.sv @@ -38,6 +38,12 @@ package ara_pkg; FixedPointEnable = 1'b1 } fixpt_support_e; + // Support for segment memory operations + typedef enum logic { + SegSupportDisable = 1'b0, + SegSupportEnable = 1'b1 + } seg_support_e; + // FP support outside of the FPU (external) // vfrec7, vfrsqrt7, round-toward-odd typedef enum logic { @@ -302,6 +308,9 @@ package ara_pkg; // Effective length multiplier rvv_pkg::vlmul_e emul; + // Number of segments in segment mem op + logic [2:0] nf; + // Rounding-Mode for FP operations fpnew_pkg::roundmode_e fp_rm; // Widen FP immediate (re-encoding) diff --git a/hardware/scripts/wave_ara.tcl b/hardware/scripts/wave_ara.tcl index b66587474..f32432801 100644 --- a/hardware/scripts/wave_ara.tcl +++ b/hardware/scripts/wave_ara.tcl @@ -7,6 +7,7 @@ add wave -noupdate -group Ara -group core /ara_tb/dut/i_ara_soc/i_system/i_ara/* add wave -noupdate -group Ara -group dispatcher /ara_tb/dut/i_ara_soc/i_system/i_ara/i_dispatcher/* +add wave -noupdate -group Ara -group dispatcher -group segment_sequencer /ara_tb/dut/i_ara_soc/i_system/i_ara/i_dispatcher/i_segment_sequencer/* add wave -noupdate -group Ara -group sequencer /ara_tb/dut/i_ara_soc/i_system/i_ara/i_sequencer/* # Add waves from all the lanes diff --git a/hardware/src/ara.sv b/hardware/src/ara.sv index c6976be6f..2f8698853 100644 --- a/hardware/src/ara.sv +++ b/hardware/src/ara.sv @@ -15,6 +15,8 @@ module ara import ara_pkg::*; #( parameter fpext_support_e FPExtSupport = FPExtSupportEnable, // Support for fixed-point data types parameter fixpt_support_e FixPtSupport = FixedPointEnable, + // Support for segment memory operations + parameter seg_support_e SegSupport = SegSupportEnable, // AXI Interface parameter int unsigned AxiDataWidth = 0, parameter int unsigned AxiAddrWidth = 0, @@ -89,7 +91,8 @@ module ara import ara_pkg::*; #( vxrm_t [NrLanes-1:0] alu_vxrm; ara_dispatcher #( - .NrLanes(NrLanes) + .NrLanes(NrLanes), + .SegSupport(SegSupport) ) i_dispatcher ( .clk_i (clk_i ), .rst_ni (rst_ni ), diff --git a/hardware/src/ara_dispatcher.sv b/hardware/src/ara_dispatcher.sv index b48f33c66..cb9002763 100644 --- a/hardware/src/ara_dispatcher.sv +++ b/hardware/src/ara_dispatcher.sv @@ -14,6 +14,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( parameter fpu_support_e FPUSupport = FPUSupportHalfSingleDouble, // External support for vfrec7, vfrsqrt7 parameter fpext_support_e FPExtSupport = FPExtSupportEnable, + // Support for segment memory operations + parameter seg_support_e SegSupport = SegSupportEnable, // Support for fixed-point data types parameter fixpt_support_e FixPtSupport = FixedPointEnable ) ( @@ -111,7 +113,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Backend interface // ///////////////////////// - ara_req_t ara_req_d; + ara_req_t ara_req, ara_req_d; logic ara_req_valid_d; always_ff @(posedge clk_i or negedge rst_ni) begin @@ -213,10 +215,10 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( logic null_vslideup; // Pipeline the VLSU's load and store complete signals, for timing reasons - logic load_complete_q; - logic store_complete_q; - `FF(load_complete_q, load_complete_i, 1'b0) - `FF(store_complete_q, store_complete_i, 1'b0) + logic load_complete, load_complete_q; + logic store_complete, store_complete_q; + `FF(load_complete_q, load_complete, 1'b0) + `FF(store_complete_q, store_complete, 1'b0) // NP2 Slide support logic is_stride_np2; @@ -226,18 +228,49 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( popcount #( .INPUT_WIDTH (idx_width(VLENB << 3)) ) i_np2_stride ( - .data_i (ara_req_d.stride[idx_width(VLENB << 3)-1:0]), + .data_i (ara_req.stride[idx_width(VLENB << 3)-1:0]), .popcount_o(sldu_popc ) ); assign is_stride_np2 = sldu_popc > 1; + // Segment-memory instruction sequencer + // Decompose the segment memory operations into non-segment memory operations + // This is a low-impact and low-performance implementation + logic is_segment_mem_op; + logic illegal_insn; + // Is the seg sequencer operating? + logic segment_micro_op_on; + + // The handshake signals are just passed-through if the insn is non-segment + ara_resp_t ara_resp; + segment_sequencer #( + .SegSupport(SegSupport) + ) i_segment_sequencer ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .ara_idle_i(ara_idle_i), + .is_segment_mem_op_i(is_segment_mem_op), + .illegal_insn_i(illegal_insn), + .is_vload_i(is_vload), + .segment_micro_op_on_o(segment_micro_op_on), + .load_complete_i(load_complete_i), + .load_complete_o(load_complete), + .store_complete_i(store_complete_i), + .store_complete_o(store_complete), + .ara_req_i(ara_req), + .ara_req_o(ara_req_d), + .ara_req_ready_i(ara_req_ready_i), + .ara_resp_i(ara_resp_i), + .ara_resp_o(ara_resp), + .ara_resp_valid_i(ara_resp_valid_i), + .ara_resp_valid_o(ara_resp_valid) + ); + /////////////// // Decoder // /////////////// - logic illegal_insn; - always_comb begin: p_decoder // Default values vstart_d = vstart_q; @@ -275,6 +308,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( is_decoding = 1'b0; in_lane_op = 1'b0; + is_segment_mem_op = 1'b0; + acc_req_ready_o = 1'b0; acc_resp_valid_o = 1'b0; acc_resp_o = '{ @@ -289,7 +324,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // fflags for (int lane = 0; lane < NrLanes; lane++) acc_resp_o.fflags |= fflags_ex_i[lane]; - ara_req_d = '{ + ara_req = '{ vl : vl_q, vstart : vstart_q, vtype : vtype_q, @@ -337,24 +372,24 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // When LMUL > 1, not all the regs that compose a large // register should always be reshuffled ara_req_valid_d = ~rs_mask_request_q; - ara_req_d.use_scalar_op = 1'b1; - ara_req_d.vs2 = vs_buffer_q; - ara_req_d.eew_vs2 = eew_old_buffer_q; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = vs_buffer_q; - ara_req_d.use_vd = 1'b1; - ara_req_d.op = ara_pkg::VSLIDEDOWN; - ara_req_d.stride = '0; - ara_req_d.use_scalar_op = 1'b0; + ara_req.use_scalar_op = 1'b1; + ara_req.vs2 = vs_buffer_q; + ara_req.eew_vs2 = eew_old_buffer_q; + ara_req.use_vs2 = 1'b1; + ara_req.vd = vs_buffer_q; + ara_req.use_vd = 1'b1; + ara_req.op = ara_pkg::VSLIDEDOWN; + ara_req.stride = '0; + ara_req.use_scalar_op = 1'b0; // Unmasked: reshuffle everything - ara_req_d.vm = 1'b1; + ara_req.vm = 1'b1; // Shuffle the whole reg (vl refers to current vsew) - ara_req_d.vtype.vsew = eew_new_buffer_q; + ara_req.vtype.vsew = eew_new_buffer_q; // Always reshuffle one vreg at a time - ara_req_d.vl = VLENB >> ara_req_d.vtype.vsew; + ara_req.vl = VLENB >> ara_req.vtype.vsew; // Vl refers to current system vsew but operand requesters // will fetch from a register with a different eew - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; // Backend ready - Decide what to do next if (ara_req_ready_i) begin @@ -375,17 +410,17 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( unique casez (reshuffle_req_d) 3'b??1: begin eew_old_buffer_d = eew_q[insn.vmem_type.rd]; - eew_new_buffer_d = ara_req_d.vtype.vsew; + eew_new_buffer_d = ara_req.vtype.vsew; vs_buffer_d = insn.varith_type.rd; end 3'b?10: begin eew_old_buffer_d = eew_q[insn.vmem_type.rs2]; - eew_new_buffer_d = ara_req_d.eew_vs2; + eew_new_buffer_d = ara_req.eew_vs2; vs_buffer_d = insn.varith_type.rs2; end 3'b100: begin eew_old_buffer_d = eew_q[insn.vmem_type.rs1]; - eew_new_buffer_d = ara_req_d.eew_vs1; + eew_new_buffer_d = ara_req.eew_vs1; vs_buffer_d = insn.varith_type.rs1; end default:; @@ -404,17 +439,17 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( 3'b??1: begin vs_buffer_d = vs_buffer_q + 1; eew_old_buffer_d = eew_q[vs_buffer_d]; - eew_new_buffer_d = ara_req_d.vtype.vsew; + eew_new_buffer_d = ara_req.vtype.vsew; end 3'b?10: begin vs_buffer_d = vs_buffer_q + 1; eew_old_buffer_d = eew_q[vs_buffer_d]; - eew_new_buffer_d = ara_req_d.eew_vs2; + eew_new_buffer_d = ara_req.eew_vs2; end 3'b100: begin vs_buffer_d = vs_buffer_q + 1; eew_old_buffer_d = eew_q[vs_buffer_d]; - eew_new_buffer_d = ara_req_d.eew_vs1; + eew_new_buffer_d = ara_req.eew_vs1; end default:; endcase @@ -521,28 +556,28 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( OPIVV: begin: opivv // These generate a request to Ara's backend - ara_req_d.vs1 = insn.varith_type.rs1; - ara_req_d.use_vs1 = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; + ara_req.vs1 = insn.varith_type.rs1; + ara_req.use_vs1 = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; ara_req_valid_d = 1'b1; // Decode based on the func6 field unique case (insn.varith_type.func6) - 6'b000000: ara_req_d.op = ara_pkg::VADD; - 6'b000010: ara_req_d.op = ara_pkg::VSUB; - 6'b000100: ara_req_d.op = ara_pkg::VMINU; - 6'b000101: ara_req_d.op = ara_pkg::VMIN; - 6'b000110: ara_req_d.op = ara_pkg::VMAXU; - 6'b000111: ara_req_d.op = ara_pkg::VMAX; - 6'b001001: ara_req_d.op = ara_pkg::VAND; - 6'b001010: ara_req_d.op = ara_pkg::VOR; - 6'b001011: ara_req_d.op = ara_pkg::VXOR; + 6'b000000: ara_req.op = ara_pkg::VADD; + 6'b000010: ara_req.op = ara_pkg::VSUB; + 6'b000100: ara_req.op = ara_pkg::VMINU; + 6'b000101: ara_req.op = ara_pkg::VMIN; + 6'b000110: ara_req.op = ara_pkg::VMAXU; + 6'b000111: ara_req.op = ara_pkg::VMAX; + 6'b001001: ara_req.op = ara_pkg::VAND; + 6'b001010: ara_req.op = ara_pkg::VOR; + 6'b001011: ara_req.op = ara_pkg::VXOR; 6'b010000: begin - ara_req_d.op = ara_pkg::VADC; + ara_req.op = ara_pkg::VADC; // Encoding corresponding to unmasked operations are reserved if (insn.varith_type.vm) illegal_insn = 1'b1; @@ -551,11 +586,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( if (insn.varith_type.rd == 5'b0) illegal_insn = 1'b1; end 6'b010001: begin - ara_req_d.op = ara_pkg::VMADC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMADC; + ara_req.use_vd_op = 1'b1; // Check whether we can access vs1 and vs2 - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if (((insn.varith_type.rs1 & 5'b00001) == (insn.varith_type.rd & 5'b00001)) || ((insn.varith_type.rs2 & 5'b00001) == (insn.varith_type.rd & 5'b00001))) @@ -574,18 +609,18 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b010010: begin - ara_req_d.op = ara_pkg::VSBC; + ara_req.op = ara_pkg::VSBC; // Encoding corresponding to unmasked operations are reserved if (insn.varith_type.vm) illegal_insn = 1'b1; // An illegal instruction is raised if the destination vector is v0 if (insn.varith_type.rd == 5'b0) illegal_insn = 1'b1; end 6'b010011: begin - ara_req_d.op = ara_pkg::VMSBC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSBC; + ara_req.use_vd_op = 1'b1; // Check whether we can access vs1 and vs2 - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if (((insn.varith_type.rs1 & 5'b00001) == (insn.varith_type.rd & 5'b00001)) || ((insn.varith_type.rs2 & 5'b00001) == ( insn.varith_type.rd & 5'b00001))) @@ -604,61 +639,61 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b011000: begin - ara_req_d.op = ara_pkg::VMSEQ; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSEQ; + ara_req.use_vd_op = 1'b1; end 6'b011001: begin - ara_req_d.op = ara_pkg::VMSNE; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSNE; + ara_req.use_vd_op = 1'b1; end 6'b011010: begin - ara_req_d.op = ara_pkg::VMSLTU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLTU; + ara_req.use_vd_op = 1'b1; end 6'b011011: begin - ara_req_d.op = ara_pkg::VMSLT; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLT; + ara_req.use_vd_op = 1'b1; end 6'b011100: begin - ara_req_d.op = ara_pkg::VMSLEU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLEU; + ara_req.use_vd_op = 1'b1; end 6'b011101: begin - ara_req_d.op = ara_pkg::VMSLE; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLE; + ara_req.use_vd_op = 1'b1; end 6'b010111: begin - ara_req_d.op = ara_pkg::VMERGE; - ara_req_d.use_vs2 = !insn.varith_type.vm; // vmv.v.v does not use vs2 + ara_req.op = ara_pkg::VMERGE; + ara_req.use_vs2 = !insn.varith_type.vm; // vmv.v.v does not use vs2 // With a normal vmv.v.v, copy input eew to output // to avoid unnecessary reshuffles if (insn.varith_type.vm) begin - ara_req_d.eew_vs1 = eew_q[ara_req_d.vs1]; - ara_req_d.vtype.vsew = eew_q[ara_req_d.vs1]; - ara_req_d.vl = (vl_q << vtype_q.vsew[1:0]) >> ara_req_d.eew_vs1[1:0]; + ara_req.eew_vs1 = eew_q[ara_req.vs1]; + ara_req.vtype.vsew = eew_q[ara_req.vs1]; + ara_req.vl = (vl_q << vtype_q.vsew[1:0]) >> ara_req.eew_vs1[1:0]; end end - 6'b100000: ara_req_d.op = ara_pkg::VSADDU; - 6'b100001: ara_req_d.op = ara_pkg::VSADD; - 6'b100010: ara_req_d.op = ara_pkg::VSSUBU; - 6'b100011: ara_req_d.op = ara_pkg::VSSUB; - 6'b100101: ara_req_d.op = ara_pkg::VSLL; - 6'b100111: ara_req_d.op = ara_pkg::VSMUL; - 6'b101000: ara_req_d.op = ara_pkg::VSRL; - 6'b101010: ara_req_d.op = ara_pkg::VSSRL; - 6'b101011: ara_req_d.op = ara_pkg::VSSRA; - 6'b101001: ara_req_d.op = ara_pkg::VSRA; + 6'b100000: ara_req.op = ara_pkg::VSADDU; + 6'b100001: ara_req.op = ara_pkg::VSADD; + 6'b100010: ara_req.op = ara_pkg::VSSUBU; + 6'b100011: ara_req.op = ara_pkg::VSSUB; + 6'b100101: ara_req.op = ara_pkg::VSLL; + 6'b100111: ara_req.op = ara_pkg::VSMUL; + 6'b101000: ara_req.op = ara_pkg::VSRL; + 6'b101010: ara_req.op = ara_pkg::VSSRL; + 6'b101011: ara_req.op = ara_pkg::VSSRA; + 6'b101001: ara_req.op = ara_pkg::VSRA; 6'b101100: begin - ara_req_d.op = ara_pkg::VNSRL; - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNSRL; + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); lmul_vs2 = next_lmul(vtype_q.vlmul); // Check whether the EEW is not too wide. if (int'(vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1; // Check whether we can access vs2 - unique case (ara_req_d.emul.next()) + unique case (ara_req.emul.next()) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rs2 & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -667,16 +702,16 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b101101: begin - ara_req_d.op = ara_pkg::VNSRA; - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNSRA; + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); lmul_vs2 = next_lmul(vtype_q.vlmul); // Check whether the EEW is not too wide. if (int'(vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1; // Check whether we can access vs2 - unique case (ara_req_d.emul.next()) + unique case (ara_req.emul.next()) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rs2 & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -685,39 +720,39 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b101110: begin - ara_req_d.op = ara_pkg::VNCLIPU; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNCLIPU; + ara_req.eew_vs2 = vtype_q.vsew.next(); end 6'b101111: begin - ara_req_d.op = ara_pkg::VNCLIP; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNCLIP; + ara_req.eew_vs2 = vtype_q.vsew.next(); end // Reductions encode in cvt_resize the neutral value bits // CVT_WIDE is 2'b00 (hack to save wires) 6'b110000: begin - ara_req_d.op = ara_pkg::VWREDSUMU; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.eew_vs1 = vtype_q.vsew.next(); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VWREDSUMU; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.eew_vs1 = vtype_q.vsew.next(); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110001: begin - ara_req_d.op = ara_pkg::VWREDSUM; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.eew_vs1 = vtype_q.vsew.next(); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VWREDSUM; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.eew_vs1 = vtype_q.vsew.next(); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end default: illegal_insn = 1'b1; endcase // Instructions with an integer LMUL have extra constraints on the registers they can // access. - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rs1 & 5'b00001) != 5'b00000 || (insn.varith_type.rs2 & 5'b00001) != 5'b00000 || (insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; @@ -736,53 +771,53 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( OPIVX: begin: opivx // These generate a request to Ara's backend - ara_req_d.scalar_op = acc_req_i.rs1; - ara_req_d.use_scalar_op = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; - ara_req_d.is_stride_np2 = is_stride_np2; + ara_req.scalar_op = acc_req_i.rs1; + ara_req.use_scalar_op = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; + ara_req.is_stride_np2 = is_stride_np2; ara_req_valid_d = 1'b1; // Decode based on the func6 field unique case (insn.varith_type.func6) - 6'b000000: ara_req_d.op = ara_pkg::VADD; - 6'b000010: ara_req_d.op = ara_pkg::VSUB; - 6'b000011: ara_req_d.op = ara_pkg::VRSUB; - 6'b000100: ara_req_d.op = ara_pkg::VMINU; - 6'b000101: ara_req_d.op = ara_pkg::VMIN; - 6'b000110: ara_req_d.op = ara_pkg::VMAXU; - 6'b000111: ara_req_d.op = ara_pkg::VMAX; - 6'b001001: ara_req_d.op = ara_pkg::VAND; - 6'b001010: ara_req_d.op = ara_pkg::VOR; - 6'b001011: ara_req_d.op = ara_pkg::VXOR; + 6'b000000: ara_req.op = ara_pkg::VADD; + 6'b000010: ara_req.op = ara_pkg::VSUB; + 6'b000011: ara_req.op = ara_pkg::VRSUB; + 6'b000100: ara_req.op = ara_pkg::VMINU; + 6'b000101: ara_req.op = ara_pkg::VMIN; + 6'b000110: ara_req.op = ara_pkg::VMAXU; + 6'b000111: ara_req.op = ara_pkg::VMAX; + 6'b001001: ara_req.op = ara_pkg::VAND; + 6'b001010: ara_req.op = ara_pkg::VOR; + 6'b001011: ara_req.op = ara_pkg::VXOR; 6'b001110: begin - ara_req_d.op = ara_pkg::VSLIDEUP; - ara_req_d.stride = acc_req_i.rs1; - ara_req_d.eew_vs2 = vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEUP; + ara_req.stride = acc_req_i.rs1; + ara_req.eew_vs2 = vtype_q.vsew; // Encode vslideup/vslide1up on the use_scalar_op field - ara_req_d.use_scalar_op = 1'b0; + ara_req.use_scalar_op = 1'b0; // Vl refers to current system vsew, but operand requesters // will fetch bytes from a vreg with a different eew // i.e., request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; // If stride > vl, the vslideup has no effects - if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(vl_q)] || - (vlen_t'(ara_req_d.stride) >= vl_q)) null_vslideup = 1'b1; + if (|ara_req.stride[$bits(ara_req.stride)-1:$bits(vl_q)] || + (vlen_t'(ara_req.stride) >= vl_q)) null_vslideup = 1'b1; end 6'b001111: begin - ara_req_d.op = ara_pkg::VSLIDEDOWN; - ara_req_d.stride = acc_req_i.rs1; - ara_req_d.eew_vs2 = vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEDOWN; + ara_req.stride = acc_req_i.rs1; + ara_req.eew_vs2 = vtype_q.vsew; // Encode vslidedown/vslide1down on the use_scalar_op field - ara_req_d.use_scalar_op = 1'b0; + ara_req.use_scalar_op = 1'b0; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; end 6'b010000: begin - ara_req_d.op = ara_pkg::VADC; + ara_req.op = ara_pkg::VADC; // Encoding corresponding to unmasked operations are reserved if (insn.varith_type.vm) illegal_insn = 1'b1; @@ -791,11 +826,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( if (insn.varith_type.rd == 5'b0) illegal_insn = 1'b1; end 6'b010001: begin - ara_req_d.op = ara_pkg::VMADC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMADC; + ara_req.use_vd_op = 1'b1; // Check whether we can access vs1 and vs2 - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) == (insn.varith_type.rd & 5'b00001)) illegal_insn = 1'b1; @@ -809,7 +844,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b010010: begin - ara_req_d.op = ara_pkg::VSBC; + ara_req.op = ara_pkg::VSBC; // Encoding corresponding to unmasked operations are reserved if (insn.varith_type.vm) illegal_insn = 1'b1; @@ -818,11 +853,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( if (insn.varith_type.rd == 5'b0) illegal_insn = 1'b1; end 6'b010011: begin - ara_req_d.op = ara_pkg::VMSBC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSBC; + ara_req.use_vd_op = 1'b1; // Check whether we can access vs1 and vs2 - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) == (insn.varith_type.rd & 5'b00001)) illegal_insn = 1'b1; @@ -836,62 +871,62 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b011000: begin - ara_req_d.op = ara_pkg::VMSEQ; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSEQ; + ara_req.use_vd_op = 1'b1; end 6'b011001: begin - ara_req_d.op = ara_pkg::VMSNE; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSNE; + ara_req.use_vd_op = 1'b1; end 6'b011010: begin - ara_req_d.op = ara_pkg::VMSLTU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLTU; + ara_req.use_vd_op = 1'b1; end 6'b011011: begin - ara_req_d.op = ara_pkg::VMSLT; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLT; + ara_req.use_vd_op = 1'b1; end 6'b011100: begin - ara_req_d.op = ara_pkg::VMSLEU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLEU; + ara_req.use_vd_op = 1'b1; end 6'b011101: begin - ara_req_d.op = ara_pkg::VMSLE; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLE; + ara_req.use_vd_op = 1'b1; end 6'b011110: begin - ara_req_d.op = ara_pkg::VMSGTU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSGTU; + ara_req.use_vd_op = 1'b1; end 6'b011111: begin - ara_req_d.op = ara_pkg::VMSGT; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSGT; + ara_req.use_vd_op = 1'b1; end 6'b010111: begin - ara_req_d.op = ara_pkg::VMERGE; - ara_req_d.use_vs2 = !insn.varith_type.vm; // vmv.v.x does not use vs2 - end - 6'b100000: ara_req_d.op = ara_pkg::VSADDU; - 6'b100001: ara_req_d.op = ara_pkg::VSADD; - 6'b100010: ara_req_d.op = ara_pkg::VSSUBU; - 6'b100011: ara_req_d.op = ara_pkg::VSSUB; - 6'b100101: ara_req_d.op = ara_pkg::VSLL; - 6'b100111: ara_req_d.op = ara_pkg::VSMUL; - 6'b101000: ara_req_d.op = ara_pkg::VSRL; - 6'b101010: ara_req_d.op = ara_pkg::VSSRL; - 6'b101011: ara_req_d.op = ara_pkg::VSSRA; - 6'b101001: ara_req_d.op = ara_pkg::VSRA; + ara_req.op = ara_pkg::VMERGE; + ara_req.use_vs2 = !insn.varith_type.vm; // vmv.v.x does not use vs2 + end + 6'b100000: ara_req.op = ara_pkg::VSADDU; + 6'b100001: ara_req.op = ara_pkg::VSADD; + 6'b100010: ara_req.op = ara_pkg::VSSUBU; + 6'b100011: ara_req.op = ara_pkg::VSSUB; + 6'b100101: ara_req.op = ara_pkg::VSLL; + 6'b100111: ara_req.op = ara_pkg::VSMUL; + 6'b101000: ara_req.op = ara_pkg::VSRL; + 6'b101010: ara_req.op = ara_pkg::VSSRL; + 6'b101011: ara_req.op = ara_pkg::VSSRA; + 6'b101001: ara_req.op = ara_pkg::VSRA; 6'b101100: begin - ara_req_d.op = ara_pkg::VNSRL; - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNSRL; + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); lmul_vs2 = next_lmul(vtype_q.vlmul); // Check whether the EEW is not too wide. if (int'(vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1; // Check whether we can access vs2 - unique case (ara_req_d.emul.next()) + unique case (ara_req.emul.next()) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rs2 & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -900,16 +935,16 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b101101: begin - ara_req_d.op = ara_pkg::VNSRA; - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNSRA; + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); lmul_vs2 = next_lmul(vtype_q.vlmul); // Check whether the EEW is not too wide. if (int'(vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1; // Check whether we can access vs2 - unique case (ara_req_d.emul.next()) + unique case (ara_req.emul.next()) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rs2 & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -918,19 +953,19 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b101110: begin - ara_req_d.op = ara_pkg::VNCLIPU; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNCLIPU; + ara_req.eew_vs2 = vtype_q.vsew.next(); end 6'b101111: begin - ara_req_d.op = ara_pkg::VNCLIP; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNCLIP; + ara_req.eew_vs2 = vtype_q.vsew.next(); end default: illegal_insn = 1'b1; endcase // Instructions with an integer LMUL have extra constraints on the registers they can // access. - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000 || (insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000 || @@ -949,46 +984,46 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Sign-extend this by default. // Instructions that need the immediate to be zero-extended // (vrgather, shifts, clips, slides) should do overwrite this. - ara_req_d.scalar_op = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1}; - ara_req_d.use_scalar_op = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; - ara_req_d.is_stride_np2 = is_stride_np2; + ara_req.scalar_op = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1}; + ara_req.use_scalar_op = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; + ara_req.is_stride_np2 = is_stride_np2; ara_req_valid_d = 1'b1; // Decode based on the func6 field unique case (insn.varith_type.func6) - 6'b000000: ara_req_d.op = ara_pkg::VADD; - 6'b000011: ara_req_d.op = ara_pkg::VRSUB; - 6'b001001: ara_req_d.op = ara_pkg::VAND; - 6'b001010: ara_req_d.op = ara_pkg::VOR; - 6'b001011: ara_req_d.op = ara_pkg::VXOR; + 6'b000000: ara_req.op = ara_pkg::VADD; + 6'b000011: ara_req.op = ara_pkg::VRSUB; + 6'b001001: ara_req.op = ara_pkg::VAND; + 6'b001010: ara_req.op = ara_pkg::VOR; + 6'b001011: ara_req.op = ara_pkg::VXOR; 6'b001110: begin - ara_req_d.op = ara_pkg::VSLIDEUP; - ara_req_d.stride = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1}; - ara_req_d.eew_vs2 = vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEUP; + ara_req.stride = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1}; + ara_req.eew_vs2 = vtype_q.vsew; // Encode vslideup/vslide1up on the use_scalar_op field - ara_req_d.use_scalar_op = 1'b0; + ara_req.use_scalar_op = 1'b0; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; // If stride > vl, the vslideup has no effects - if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(vl_q)] || - (vlen_t'(ara_req_d.stride) >= vl_q)) null_vslideup = 1'b1; + if (|ara_req.stride[$bits(ara_req.stride)-1:$bits(vl_q)] || + (vlen_t'(ara_req.stride) >= vl_q)) null_vslideup = 1'b1; end 6'b001111: begin - ara_req_d.op = ara_pkg::VSLIDEDOWN; - ara_req_d.stride = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1}; - ara_req_d.eew_vs2 = vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEDOWN; + ara_req.stride = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1}; + ara_req.eew_vs2 = vtype_q.vsew; // Encode vslidedown/vslide1down on the use_scalar_op field - ara_req_d.use_scalar_op = 1'b0; + ara_req.use_scalar_op = 1'b0; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; end 6'b010000: begin - ara_req_d.op = ara_pkg::VADC; + ara_req.op = ara_pkg::VADC; // Encoding corresponding to unmasked operations are reserved if (insn.varith_type.vm) illegal_insn = 1'b1; @@ -997,11 +1032,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( if (insn.varith_type.rd == 5'b0) illegal_insn = 1'b1; end 6'b010001: begin - ara_req_d.op = ara_pkg::VMADC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMADC; + ara_req.use_vd_op = 1'b1; // Check whether we can access vs1 and vs2 - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) == (insn.varith_type.rd & 5'b00001)) illegal_insn = 1'b1; @@ -1015,36 +1050,36 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b011000: begin - ara_req_d.op = ara_pkg::VMSEQ; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSEQ; + ara_req.use_vd_op = 1'b1; end 6'b011001: begin - ara_req_d.op = ara_pkg::VMSNE; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSNE; + ara_req.use_vd_op = 1'b1; end 6'b011100: begin - ara_req_d.op = ara_pkg::VMSLEU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLEU; + ara_req.use_vd_op = 1'b1; end 6'b011101: begin - ara_req_d.op = ara_pkg::VMSLE; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSLE; + ara_req.use_vd_op = 1'b1; end 6'b011110: begin - ara_req_d.op = ara_pkg::VMSGTU; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSGTU; + ara_req.use_vd_op = 1'b1; end 6'b011111: begin - ara_req_d.op = ara_pkg::VMSGT; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMSGT; + ara_req.use_vd_op = 1'b1; end 6'b010111: begin - ara_req_d.op = ara_pkg::VMERGE; - ara_req_d.use_vs2 = !insn.varith_type.vm; // vmv.v.i does not use vs2 + ara_req.op = ara_pkg::VMERGE; + ara_req.use_vs2 = !insn.varith_type.vm; // vmv.v.i does not use vs2 end - 6'b100000: ara_req_d.op = ara_pkg::VSADDU; - 6'b100001: ara_req_d.op = ara_pkg::VSADD; - 6'b100101: ara_req_d.op = ara_pkg::VSLL; + 6'b100000: ara_req.op = ara_pkg::VSADDU; + 6'b100001: ara_req.op = ara_pkg::VSADD; + 6'b100101: ara_req.op = ara_pkg::VSLL; 6'b100111: begin // vmvr.v automatic int unsigned vlmax; // Execute also if vl == 0 @@ -1056,19 +1091,19 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( unique case (insn.varith_type.rs1[17:15]) 3'd0 : begin vlmax <<= 0; - ara_req_d.emul = LMUL_1; + ara_req.emul = LMUL_1; end 3'd1 : begin vlmax <<= 1; - ara_req_d.emul = LMUL_2; + ara_req.emul = LMUL_2; end 3'd3 : begin vlmax <<= 2; - ara_req_d.emul = LMUL_4; + ara_req.emul = LMUL_4; end 3'd7 : begin vlmax <<= 3; - ara_req_d.emul = LMUL_8; + ara_req.emul = LMUL_8; end default: begin // Trigger an error for the reserved simm values @@ -1077,31 +1112,31 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase // From here on, the only difference with a vmv.v.v is that the vector reg index // is in rs2. For the rest,, pretend to be a vmv.v.v - ara_req_d.op = ara_pkg::VMERGE; - ara_req_d.use_scalar_op = 1'b0; - ara_req_d.use_vs1 = 1'b1; - ara_req_d.use_vs2 = 1'b0; - ara_req_d.vs1 = insn.varith_type.rs2; - ara_req_d.eew_vs1 = eew_q[insn.varith_type.rs2]; + ara_req.op = ara_pkg::VMERGE; + ara_req.use_scalar_op = 1'b0; + ara_req.use_vs1 = 1'b1; + ara_req.use_vs2 = 1'b0; + ara_req.vs1 = insn.varith_type.rs2; + ara_req.eew_vs1 = eew_q[insn.varith_type.rs2]; // Copy the encoding information to the new register - ara_req_d.vtype.vsew = eew_q[insn.varith_type.rs2]; - ara_req_d.vl = vlmax; // whole register move + ara_req.vtype.vsew = eew_q[insn.varith_type.rs2]; + ara_req.vl = vlmax; // whole register move end - 6'b101000: ara_req_d.op = ara_pkg::VSRL; - 6'b101001: ara_req_d.op = ara_pkg::VSRA; - 6'b101010: ara_req_d.op = ara_pkg::VSSRL; - 6'b101011: ara_req_d.op = ara_pkg::VSSRA; + 6'b101000: ara_req.op = ara_pkg::VSRL; + 6'b101001: ara_req.op = ara_pkg::VSRA; + 6'b101010: ara_req.op = ara_pkg::VSSRL; + 6'b101011: ara_req.op = ara_pkg::VSSRA; 6'b101100: begin - ara_req_d.op = ara_pkg::VNSRL; - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNSRL; + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); lmul_vs2 = next_lmul(vtype_q.vlmul); // Check whether the EEW is not too wide. if (int'(vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1; // Check whether we can access vs2 - unique case (ara_req_d.emul.next()) + unique case (ara_req.emul.next()) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rs2 & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -1110,16 +1145,16 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b101101: begin - ara_req_d.op = ara_pkg::VNSRA; - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNSRA; + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); lmul_vs2 = next_lmul(vtype_q.vlmul); // Check whether the EEW is not too wide. if (int'(vtype_q.vsew) > int'(EW32)) illegal_insn = 1'b1; // Check whether we can access vs2 - unique case (ara_req_d.emul.next()) + unique case (ara_req.emul.next()) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rs2 & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -1128,19 +1163,19 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 6'b101110: begin - ara_req_d.op = ara_pkg::VNCLIPU; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNCLIPU; + ara_req.eew_vs2 = vtype_q.vsew.next(); end 6'b101111: begin - ara_req_d.op = ara_pkg::VNCLIP; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VNCLIP; + ara_req.eew_vs2 = vtype_q.vsew.next(); end default: illegal_insn = 1'b1; endcase // Instructions with an integer LMUL have extra constraints on the registers they can // access. - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rs2 & 5'b00001) != 5'b00000 || (insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rs2 & 5'b00011) != 5'b00000 || @@ -1156,61 +1191,61 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( OPMVV: begin: opmvv // These generate a request to Ara's backend - ara_req_d.vs1 = insn.varith_type.rs1; - ara_req_d.use_vs1 = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; + ara_req.vs1 = insn.varith_type.rs1; + ara_req.use_vs1 = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; ara_req_valid_d = 1'b1; // Assume an effective EMUL = LMUL1 by default (for the mask operations) - ara_req_d.emul = LMUL_1; + ara_req.emul = LMUL_1; // Decode based on the func6 field unique case (insn.varith_type.func6) // Encode, for each reduction, the bits of the neutral // value of each operation 6'b000000: begin - ara_req_d.op = ara_pkg::VREDSUM; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VREDSUM; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b00); end 6'b000001: begin - ara_req_d.op = ara_pkg::VREDAND; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b11); + ara_req.op = ara_pkg::VREDAND; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b11); end 6'b000010: begin - ara_req_d.op = ara_pkg::VREDOR; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VREDOR; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b00); end 6'b000011: begin - ara_req_d.op = ara_pkg::VREDXOR; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VREDXOR; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b00); end 6'b000100: begin - ara_req_d.op = ara_pkg::VREDMINU; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b11); + ara_req.op = ara_pkg::VREDMINU; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b11); end 6'b000101: begin - ara_req_d.op = ara_pkg::VREDMIN; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b01); + ara_req.op = ara_pkg::VREDMIN; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b01); end 6'b000110: begin - ara_req_d.op = ara_pkg::VREDMAXU; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VREDMAXU; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b00); end 6'b000111: begin - ara_req_d.op = ara_pkg::VREDMAX; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b10); + ara_req.op = ara_pkg::VREDMAX; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b10); end 6'b010000: begin // VWXUNARY0 // vmv.x.s @@ -1220,144 +1255,144 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( case (insn.varith_type.rs1) 5'b00000: begin - ara_req_d.op = ara_pkg::VMVXS; - ara_req_d.vl = 1; + ara_req.op = ara_pkg::VMVXS; + ara_req.vl = 1; end 5'b10000: begin - ara_req_d.op = ara_pkg::VCPOP; - ara_req_d.use_vs1 = 1'b0; + ara_req.op = ara_pkg::VCPOP; + ara_req.use_vs1 = 1'b0; end 5'b10001: begin - ara_req_d.op = ara_pkg::VFIRST; - ara_req_d.use_vs1 = 1'b0; + ara_req.op = ara_pkg::VFIRST; + ara_req.use_vs1 = 1'b0; end default :; endcase - ara_req_d.use_vd = 1'b0; - ara_req_d.vstart = '0; + ara_req.use_vd = 1'b0; + ara_req.vstart = '0; skip_lmul_checks = 1'b1; ignore_zero_vl_check = 1'b1; // Sign extend operands unique case (vtype_q.vsew) EW8: begin - ara_req_d.conversion_vs2 = OpQueueConversionSExt8; + ara_req.conversion_vs2 = OpQueueConversionSExt8; end EW16: begin - ara_req_d.conversion_vs2 = OpQueueConversionSExt4; + ara_req.conversion_vs2 = OpQueueConversionSExt4; end EW32: begin - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; end default:; endcase // Wait until the back-end answers to acknowledge those instructions - if (ara_resp_valid_i) begin + if (ara_resp_valid) begin acc_req_ready_o = 1'b1; - acc_resp_o.result = ara_resp_i.resp; - acc_resp_o.error = ara_resp_i.error; + acc_resp_o.result = ara_resp.resp; + acc_resp_o.error = ara_resp.error; acc_resp_valid_o = 1'b1; ara_req_valid_d = 1'b0; end end 6'b010100: begin - ara_req_d.use_vd_op = 1'b1; - ara_req_d.use_vs1 = 1'b0; + ara_req.use_vd_op = 1'b1; + ara_req.use_vs1 = 1'b0; case (insn.varith_type.rs1) - 5'b00001: ara_req_d.op = ara_pkg::VMSBF; - 5'b00010: ara_req_d.op = ara_pkg::VMSOF; - 5'b00011: ara_req_d.op = ara_pkg::VMSIF; - 5'b10000: ara_req_d.op = ara_pkg::VIOTA; - 5'b10001: ara_req_d.op = ara_pkg::VID; + 5'b00001: ara_req.op = ara_pkg::VMSBF; + 5'b00010: ara_req.op = ara_pkg::VMSOF; + 5'b00011: ara_req.op = ara_pkg::VMSIF; + 5'b10000: ara_req.op = ara_pkg::VIOTA; + 5'b10001: ara_req.op = ara_pkg::VID; endcase end - 6'b001000: ara_req_d.op = ara_pkg::VAADDU; - 6'b001001: ara_req_d.op = ara_pkg::VAADD; - 6'b001010: ara_req_d.op = ara_pkg::VASUBU; - 6'b001011: ara_req_d.op = ara_pkg::VASUB; + 6'b001000: ara_req.op = ara_pkg::VAADDU; + 6'b001001: ara_req.op = ara_pkg::VAADD; + 6'b001010: ara_req.op = ara_pkg::VASUBU; + 6'b001011: ara_req.op = ara_pkg::VASUB; 6'b011000: begin - ara_req_d.op = ara_pkg::VMANDNOT; + ara_req.op = ara_pkg::VMANDNOT; // Prefer mask operation on EW8 encoding // In mask operations, vs1, vs2, vd should // have the same encoding. - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011001: begin - ara_req_d.op = ara_pkg::VMAND; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMAND; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011010: begin - ara_req_d.op = ara_pkg::VMOR; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMOR; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011011: begin - ara_req_d.op = ara_pkg::VMXOR; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMXOR; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011100: begin - ara_req_d.op = ara_pkg::VMORNOT; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMORNOT; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011101: begin - ara_req_d.op = ara_pkg::VMNAND; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMNAND; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011110: begin - ara_req_d.op = ara_pkg::VMNOR; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMNOR; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b011111: begin - ara_req_d.op = ara_pkg::VMXNOR; - ara_req_d.eew_vs1 = EW8; - ara_req_d.eew_vs2 = EW8; - ara_req_d.eew_vd_op = EW8; - ara_req_d.vtype.vsew = EW8; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMXNOR; + ara_req.eew_vs1 = EW8; + ara_req.eew_vs2 = EW8; + ara_req.eew_vd_op = EW8; + ara_req.vtype.vsew = EW8; + ara_req.use_vd_op = 1'b1; end 6'b010010: begin // VXUNARY0 // These instructions do not use vs1 - ara_req_d.use_vs1 = 1'b0; + ara_req.use_vs1 = 1'b0; skip_vs1_lmul_checks = 1'b1; // They are always encoded as ADDs with zero. - ara_req_d.op = ara_pkg::VADD; - ara_req_d.use_scalar_op = 1'b1; - ara_req_d.scalar_op = '0; + ara_req.op = ara_pkg::VADD; + ara_req.use_scalar_op = 1'b1; + ara_req.scalar_op = '0; case (insn.varith_type.rs1) 5'b00010: begin // VZEXT.VF8 - ara_req_d.conversion_vs2 = OpQueueConversionZExt8; - ara_req_d.eew_vs2 = eew_q[insn.varith_type.rs2]; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.conversion_vs2 = OpQueueConversionZExt8; + ara_req.eew_vs2 = eew_q[insn.varith_type.rs2]; + ara_req.cvt_resize = CVT_WIDE; // Invalid conversion if (int'(vtype_q.vsew) < int'(EW64) || @@ -1365,9 +1400,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( illegal_insn = 1'b1; end 5'b00011: begin // VSEXT.VF8 - ara_req_d.conversion_vs2 = OpQueueConversionSExt8; - ara_req_d.eew_vs2 = eew_q[insn.varith_type.rs2]; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.conversion_vs2 = OpQueueConversionSExt8; + ara_req.eew_vs2 = eew_q[insn.varith_type.rs2]; + ara_req.cvt_resize = CVT_WIDE; // Invalid conversion if (int'(vtype_q.vsew) < int'(EW64) || @@ -1375,36 +1410,36 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( illegal_insn = 1'b1; end 5'b00100: begin // VZEXT.VF4 - ara_req_d.conversion_vs2 = OpQueueConversionZExt4; - ara_req_d.eew_vs2 = prev_prev_ew(vtype_q.vsew); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.conversion_vs2 = OpQueueConversionZExt4; + ara_req.eew_vs2 = prev_prev_ew(vtype_q.vsew); + ara_req.cvt_resize = CVT_WIDE; // Invalid conversion if (int'(vtype_q.vsew) < int'(EW32) || int'(vtype_q.vlmul) inside {LMUL_1_4, LMUL_1_8}) illegal_insn = 1'b1; end 5'b00101: begin // VSEXT.VF4 - ara_req_d.conversion_vs2 = OpQueueConversionSExt4; - ara_req_d.eew_vs2 = prev_prev_ew(vtype_q.vsew); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.conversion_vs2 = OpQueueConversionSExt4; + ara_req.eew_vs2 = prev_prev_ew(vtype_q.vsew); + ara_req.cvt_resize = CVT_WIDE; // Invalid conversion if (int'(vtype_q.vsew) < int'(EW32) || int'(vtype_q.vlmul) inside {LMUL_1_4, LMUL_1_8}) illegal_insn = 1'b1; end 5'b00110: begin // VZEXT.VF2 - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.prev(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.eew_vs2 = vtype_q.vsew.prev(); + ara_req.cvt_resize = CVT_WIDE; // Invalid conversion if (int'(vtype_q.vsew) < int'(EW16) || int'(vtype_q.vlmul) inside {LMUL_1_8}) illegal_insn = 1'b1; end 5'b00111: begin // VSEXT.VF2 - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.prev(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.eew_vs2 = vtype_q.vsew.prev(); + ara_req.cvt_resize = CVT_WIDE; // Invalid conversion if (int'(vtype_q.vsew) < int'(EW16) || int'(vtype_q.vlmul) inside {LMUL_1_8}) @@ -1414,158 +1449,158 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end // Divide instructions - 6'b100000: ara_req_d.op = ara_pkg::VDIVU; - 6'b100001: ara_req_d.op = ara_pkg::VDIV; - 6'b100010: ara_req_d.op = ara_pkg::VREMU; - 6'b100011: ara_req_d.op = ara_pkg::VREM; + 6'b100000: ara_req.op = ara_pkg::VDIVU; + 6'b100001: ara_req.op = ara_pkg::VDIV; + 6'b100010: ara_req.op = ara_pkg::VREMU; + 6'b100011: ara_req.op = ara_pkg::VREM; // Multiply instructions - 6'b100100: ara_req_d.op = ara_pkg::VMULHU; - 6'b100101: ara_req_d.op = ara_pkg::VMUL; - 6'b100110: ara_req_d.op = ara_pkg::VMULHSU; - 6'b100111: ara_req_d.op = ara_pkg::VMULH; + 6'b100100: ara_req.op = ara_pkg::VMULHU; + 6'b100101: ara_req.op = ara_pkg::VMUL; + 6'b100110: ara_req.op = ara_pkg::VMULHSU; + 6'b100111: ara_req.op = ara_pkg::VMULH; // Multiply-Add instructions // vd is also used as a source operand 6'b101001: begin - ara_req_d.op = ara_pkg::VMADD; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMADD; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101011: begin - ara_req_d.op = ara_pkg::VNMSUB; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.op = ara_pkg::VNMSUB; + ara_req.use_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101101: begin - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; end 6'b101111: begin - ara_req_d.op = ara_pkg::VNMSAC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VNMSAC; + ara_req.use_vd_op = 1'b1; end // Widening instructions 6'b110000: begin // VWADDU - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110001: begin // VWADD - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110010: begin // VWSUBU - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110011: begin // VWSUB - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110100: begin // VWADDU.W - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(vtype_q.vlmul); + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(vtype_q.vlmul); lmul_vs2 = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b110101: begin // VWADD.W - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(vtype_q.vlmul); + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(vtype_q.vlmul); lmul_vs2 = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b110110: begin // VWSUBU.W - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(vtype_q.vlmul); + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(vtype_q.vlmul); lmul_vs2 = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b110111: begin // VWSUB.W - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(vtype_q.vlmul); + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(vtype_q.vlmul); lmul_vs2 = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111000: begin // VWMULU - ara_req_d.op = ara_pkg::VMUL; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMUL; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b111010: begin // VWMULSU - ara_req_d.op = ara_pkg::VMUL; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMUL; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b111011: begin // VWMUL - ara_req_d.op = ara_pkg::VMUL; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMUL; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b111100: begin // VWMACCU - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.eew_vd_op = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111101: begin // VWMACC - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.eew_vd_op = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111111: begin // VWMACCSU - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.eew_vd_op = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end default: illegal_insn = 1'b1; endcase @@ -1574,7 +1609,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // access. These constraints can be different for the two source operands and the // destination register. if (!skip_lmul_checks) begin - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rd & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rd & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -1595,7 +1630,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end // Ara cannot support instructions who operates on more than 64 bits. - if (int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; + if (int'(ara_req.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; // Instruction is invalid if the vtype is invalid if (vtype_q.vill) illegal_insn = 1'b1; @@ -1603,211 +1638,211 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( OPMVX: begin: opmvx // These generate a request to Ara's backend - ara_req_d.scalar_op = acc_req_i.rs1; - ara_req_d.use_scalar_op = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; - ara_req_d.is_stride_np2 = is_stride_np2; + ara_req.scalar_op = acc_req_i.rs1; + ara_req.use_scalar_op = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; + ara_req.is_stride_np2 = is_stride_np2; ara_req_valid_d = 1'b1; // Decode based on the func6 field unique case (insn.varith_type.func6) - 6'b001000: ara_req_d.op = ara_pkg::VAADDU; - 6'b001001: ara_req_d.op = ara_pkg::VAADD; - 6'b001010: ara_req_d.op = ara_pkg::VASUBU; - 6'b001011: ara_req_d.op = ara_pkg::VASUB; + 6'b001000: ara_req.op = ara_pkg::VAADDU; + 6'b001001: ara_req.op = ara_pkg::VAADD; + 6'b001010: ara_req.op = ara_pkg::VASUBU; + 6'b001011: ara_req.op = ara_pkg::VASUB; // Slides 6'b001110: begin // vslide1up - ara_req_d.op = ara_pkg::VSLIDEUP; - ara_req_d.stride = 1; - ara_req_d.eew_vs2 = vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEUP; + ara_req.stride = 1; + ara_req.eew_vs2 = vtype_q.vsew; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; // If stride > vl, the vslideup has no effects - if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(vl_q)] || - (vlen_t'(ara_req_d.stride) >= vl_q)) null_vslideup = 1'b1; + if (|ara_req.stride[$bits(ara_req.stride)-1:$bits(vl_q)] || + (vlen_t'(ara_req.stride) >= vl_q)) null_vslideup = 1'b1; end 6'b001111: begin // vslide1down - ara_req_d.op = ara_pkg::VSLIDEDOWN; - ara_req_d.stride = 1; - ara_req_d.eew_vs2 = vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEDOWN; + ara_req.stride = 1; + ara_req.eew_vs2 = vtype_q.vsew; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; end 6'b010000: begin // VRXUNARY0 // vmv.s.x - ara_req_d.op = ara_pkg::VMVSX; - ara_req_d.use_vs2 = 1'b0; - ara_req_d.vl = |vl_q ? 1 : '0; + ara_req.op = ara_pkg::VMVSX; + ara_req.use_vs2 = 1'b0; + ara_req.vl = |vl_q ? 1 : '0; // This instruction ignores LMUL checks skip_lmul_checks = 1'b1; end // Divide instructions - 6'b100000: ara_req_d.op = ara_pkg::VDIVU; - 6'b100001: ara_req_d.op = ara_pkg::VDIV; - 6'b100010: ara_req_d.op = ara_pkg::VREMU; - 6'b100011: ara_req_d.op = ara_pkg::VREM; + 6'b100000: ara_req.op = ara_pkg::VDIVU; + 6'b100001: ara_req.op = ara_pkg::VDIV; + 6'b100010: ara_req.op = ara_pkg::VREMU; + 6'b100011: ara_req.op = ara_pkg::VREM; // Multiply instructions - 6'b100100: ara_req_d.op = ara_pkg::VMULHU; - 6'b100101: ara_req_d.op = ara_pkg::VMUL; - 6'b100110: ara_req_d.op = ara_pkg::VMULHSU; - 6'b100111: ara_req_d.op = ara_pkg::VMULH; + 6'b100100: ara_req.op = ara_pkg::VMULHU; + 6'b100101: ara_req.op = ara_pkg::VMUL; + 6'b100110: ara_req.op = ara_pkg::VMULHSU; + 6'b100111: ara_req.op = ara_pkg::VMULH; // Multiply-Add instructions // vd is also used as a source operand 6'b101001: begin - ara_req_d.op = ara_pkg::VMADD; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMADD; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101011: begin - ara_req_d.op = ara_pkg::VNMSUB; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.op = ara_pkg::VNMSUB; + ara_req.use_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101101: begin - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; end 6'b101111: begin - ara_req_d.op = ara_pkg::VNMSAC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VNMSAC; + ara_req.use_vd_op = 1'b1; end // Widening instructions 6'b110000: begin // VWADDU - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110001: begin // VWADD - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110010: begin // VWSUBU - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110011: begin // VWSUB - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b110100: begin // VWADDU.W - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(vtype_q.vlmul); + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(vtype_q.vlmul); lmul_vs2 = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b110101: begin // VWADD.W - ara_req_d.op = ara_pkg::VADD; - ara_req_d.emul = next_lmul(vtype_q.vlmul); + ara_req.op = ara_pkg::VADD; + ara_req.emul = next_lmul(vtype_q.vlmul); lmul_vs2 = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b110110: begin // VWSUBU.W - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(vtype_q.vlmul); + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(vtype_q.vlmul); lmul_vs2 = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b110111: begin // VWSUB.W - ara_req_d.op = ara_pkg::VSUB; - ara_req_d.emul = next_lmul(vtype_q.vlmul); + ara_req.op = ara_pkg::VSUB; + ara_req.emul = next_lmul(vtype_q.vlmul); lmul_vs2 = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.eew_vs2 = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111000: begin // VWMULU - ara_req_d.op = ara_pkg::VMUL; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMUL; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b111010: begin // VWMULSU - ara_req_d.op = ara_pkg::VMUL; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMUL; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b111011: begin // VWMUL - ara_req_d.op = ara_pkg::VMUL; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMUL; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.cvt_resize = CVT_WIDE; end 6'b111100: begin // VWMACCU - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.eew_vd_op = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111101: begin // VWMACC - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.eew_vd_op = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111110: begin // VWMACCUS - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionZExt2; - ara_req_d.conversion_vs2 = OpQueueConversionSExt2; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionSExt2; + ara_req.eew_vd_op = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end 6'b111111: begin // VWMACCSU - ara_req_d.op = ara_pkg::VMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionSExt2; - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); - ara_req_d.cvt_resize = CVT_WIDE; + ara_req.op = ara_pkg::VMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionSExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; + ara_req.eew_vd_op = vtype_q.vsew.next(); + ara_req.cvt_resize = CVT_WIDE; end default: illegal_insn = 1'b1; endcase @@ -1816,7 +1851,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // access. The constraints can be different for the two source operands and the // destination register. if (!skip_lmul_checks) begin - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4: if ((insn.varith_type.rd & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8: if ((insn.varith_type.rd & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -1831,7 +1866,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end // Ara cannot support instructions who operates on more than 64 bits. - if (int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; + if (int'(ara_req.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; // Instruction is invalid if the vtype is invalid if (vtype_q.vill) illegal_insn = 1'b1; @@ -1840,193 +1875,193 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( OPFVV: begin: opfvv if (FPUSupport != FPUSupportNone) begin // These generate a request to Ara's backend - ara_req_d.vs1 = insn.varith_type.rs1; - ara_req_d.use_vs1 = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; - ara_req_d.fp_rm = acc_req_i.frm; + ara_req.vs1 = insn.varith_type.rs1; + ara_req.use_vs1 = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; + ara_req.fp_rm = acc_req_i.frm; ara_req_valid_d = 1'b1; // Decode based on the func6 field unique case (insn.varith_type.func6) // VFP Addition 6'b000000: begin - ara_req_d.op = ara_pkg::VFADD; + ara_req.op = ara_pkg::VFADD; // When performing a floating-point add/sub, fpnew adds the second and the third // operand. Send the first operand (vs2) to the third result queue. - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b000001: begin - ara_req_d.op = ara_pkg::VFREDUSUM; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VFREDUSUM; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.cvt_resize = resize_e'(2'b00); end 6'b000010: begin - ara_req_d.op = ara_pkg::VFSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.op = ara_pkg::VFSUB; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b000011: begin - ara_req_d.op = ara_pkg::VFREDOSUM; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VFREDOSUM; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.cvt_resize = resize_e'(2'b00); end - 6'b000100: ara_req_d.op = ara_pkg::VFMIN; + 6'b000100: ara_req.op = ara_pkg::VFMIN; 6'b000101: begin - ara_req_d.op = ara_pkg::VFREDMIN; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b01); + ara_req.op = ara_pkg::VFREDMIN; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b01); end - 6'b000110: ara_req_d.op = ara_pkg::VFMAX; + 6'b000110: ara_req.op = ara_pkg::VFMAX; 6'b000111: begin - ara_req_d.op = ara_pkg::VFREDMAX; - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.cvt_resize = resize_e'(2'b10); + ara_req.op = ara_pkg::VFREDMAX; + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.cvt_resize = resize_e'(2'b10); end - 6'b001000: ara_req_d.op = ara_pkg::VFSGNJ; - 6'b001001: ara_req_d.op = ara_pkg::VFSGNJN; - 6'b001010: ara_req_d.op = ara_pkg::VFSGNJX; + 6'b001000: ara_req.op = ara_pkg::VFSGNJ; + 6'b001001: ara_req.op = ara_pkg::VFSGNJN; + 6'b001010: ara_req.op = ara_pkg::VFSGNJX; 6'b010000: begin // VWFUNARY0 // vmv.f.s // Stall the interface until we get the result acc_req_ready_o = 1'b0; acc_resp_valid_o = 1'b0; - ara_req_d.op = ara_pkg::VFMVFS; - ara_req_d.use_vd = 1'b0; - ara_req_d.vl = 1; - ara_req_d.vstart = '0; + ara_req.op = ara_pkg::VFMVFS; + ara_req.use_vd = 1'b0; + ara_req.vl = 1; + ara_req.vstart = '0; skip_lmul_checks = 1'b1; ignore_zero_vl_check = 1'b1; // Zero-extend operands unique case (vtype_q.vsew) EW16: begin - ara_req_d.conversion_vs2 = OpQueueConversionZExt4; + ara_req.conversion_vs2 = OpQueueConversionZExt4; end EW32: begin - ara_req_d.conversion_vs2 = OpQueueConversionZExt2; + ara_req.conversion_vs2 = OpQueueConversionZExt2; end default:; endcase // Wait until the back-end answers to acknowledge those instructions - if (ara_resp_valid_i) begin + if (ara_resp_valid) begin acc_req_ready_o = 1'b1; - acc_resp_o.result = ara_resp_i.resp; - acc_resp_o.error = ara_resp_i.error; + acc_resp_o.result = ara_resp.resp; + acc_resp_o.error = ara_resp.error; acc_resp_valid_o = 1'b1; ara_req_valid_d = 1'b0; end end - 6'b011000: ara_req_d.op = ara_pkg::VMFEQ; - 6'b011001: ara_req_d.op = ara_pkg::VMFLE; - 6'b011011: ara_req_d.op = ara_pkg::VMFLT; - 6'b011100: ara_req_d.op = ara_pkg::VMFNE; + 6'b011000: ara_req.op = ara_pkg::VMFEQ; + 6'b011001: ara_req.op = ara_pkg::VMFLE; + 6'b011011: ara_req.op = ara_pkg::VMFLT; + 6'b011100: ara_req.op = ara_pkg::VMFNE; 6'b010010: begin // VFUNARY0 // These instructions do not use vs1 - ara_req_d.use_vs1 = 1'b0; + ara_req.use_vs1 = 1'b0; skip_vs1_lmul_checks = 1'b1; case (insn.varith_type.rs1) - 5'b00000: ara_req_d.op = VFCVTXUF; - 5'b00001: ara_req_d.op = VFCVTXF; - 5'b00010: ara_req_d.op = VFCVTFXU; - 5'b00011: ara_req_d.op = VFCVTFX; - 5'b00110: ara_req_d.op = VFCVTRTZXUF; - 5'b00111: ara_req_d.op = VFCVTRTZXF; + 5'b00000: ara_req.op = VFCVTXUF; + 5'b00001: ara_req.op = VFCVTXF; + 5'b00010: ara_req.op = VFCVTFXU; + 5'b00011: ara_req.op = VFCVTFX; + 5'b00110: ara_req.op = VFCVTRTZXUF; + 5'b00111: ara_req.op = VFCVTRTZXF; 5'b01000: begin // Widening VFCVTXUF - ara_req_d.op = VFCVTXUF; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTXUF; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b01001: begin // Widening VFCVTXF - ara_req_d.op = VFCVTXF; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTXF; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b01010: begin // Widening VFCVTFXU - ara_req_d.op = VFCVTFXU; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTFXU; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b01011: begin // Widening VFCVTFX - ara_req_d.op = VFCVTFX; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTFX; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b01100: begin // Widening VFCVTFF - ara_req_d.op = VFCVTFF; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTFF; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b01110: begin // Widening VFCVTRTZXUF - ara_req_d.op = VFCVTRTZXUF; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTRTZXUF; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b01111: begin // Widening VFCVTRTZXF - ara_req_d.op = VFCVTRTZXF; - ara_req_d.cvt_resize = CVT_WIDE; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt; + ara_req.op = VFCVTRTZXF; + ara_req.cvt_resize = CVT_WIDE; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueAdjustFPCvt; end 5'b10000: begin // Narrowing VFCVTXUF - ara_req_d.op = VFCVTXUF; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = VFCVTXUF; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = vtype_q.vsew.next(); end 5'b10001: begin // Narrowing VFCVTXF - ara_req_d.op = VFCVTXF; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = VFCVTXF; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = vtype_q.vsew.next(); end 5'b10010: begin // Narrowing VFCVTFXU - ara_req_d.op = VFCVTFXU; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = VFCVTFXU; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = vtype_q.vsew.next(); end 5'b10011: begin // Narrowing VFCVTFX - ara_req_d.op = VFCVTFX; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = VFCVTFX; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = vtype_q.vsew.next(); end 5'b10100: begin // Narrowing VFCVTFF - ara_req_d.op = VFCVTFF; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = VFCVTFF; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = vtype_q.vsew.next(); end 5'b10101: begin // Narrowing VFNCVTRODFF - ara_req_d.op = VFNCVTRODFF; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = VFNCVTRODFF; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = vtype_q.vsew.next(); end 5'b10110: begin // Narrowing VFCVTRTZXUF - ara_req_d.op = VFCVTRTZXUF; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = VFCVTRTZXUF; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = vtype_q.vsew.next(); end 5'b10111: begin // Narrowing VFCVTRTZXF - ara_req_d.op = VFCVTRTZXF; - ara_req_d.cvt_resize = CVT_NARROW; - ara_req_d.eew_vs2 = vtype_q.vsew.next(); + ara_req.op = VFCVTRTZXF; + ara_req.cvt_resize = CVT_NARROW; + ara_req.eew_vs2 = vtype_q.vsew.next(); end default: begin // Trigger an error @@ -2037,156 +2072,156 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end 6'b010011: begin // VFUNARY1 // These instructions do not use vs1 - ara_req_d.use_vs1 = 1'b0; + ara_req.use_vs1 = 1'b0; skip_vs1_lmul_checks = 1'b1; unique case (insn.varith_type.rs1) - 5'b00000: ara_req_d.op = ara_pkg::VFSQRT; - 5'b00100: ara_req_d.op = ara_pkg::VFRSQRT7; - 5'b00101: ara_req_d.op = ara_pkg::VFREC7; - 5'b10000: ara_req_d.op = ara_pkg::VFCLASS; + 5'b00000: ara_req.op = ara_pkg::VFSQRT; + 5'b00100: ara_req.op = ara_pkg::VFRSQRT7; + 5'b00101: ara_req.op = ara_pkg::VFREC7; + 5'b10000: ara_req.op = ara_pkg::VFCLASS; default : illegal_insn = 1'b1; endcase end - 6'b100000: ara_req_d.op = ara_pkg::VFDIV; - 6'b100100: ara_req_d.op = ara_pkg::VFMUL; + 6'b100000: ara_req.op = ara_pkg::VFDIV; + 6'b100100: ara_req.op = ara_pkg::VFMUL; 6'b101000: begin - ara_req_d.op = ara_pkg::VFMADD; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMADD; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101001: begin - ara_req_d.op = ara_pkg::VFNMADD; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMADD; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101010: begin - ara_req_d.op = ara_pkg::VFMSUB; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMSUB; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101011: begin - ara_req_d.op = ara_pkg::VFNMSUB; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMSUB; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101100: begin - ara_req_d.op = ara_pkg::VFMACC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMACC; + ara_req.use_vd_op = 1'b1; end 6'b101101: begin - ara_req_d.op = ara_pkg::VFNMACC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMACC; + ara_req.use_vd_op = 1'b1; end 6'b101110: begin - ara_req_d.op = ara_pkg::VFMSAC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMSAC; + ara_req.use_vd_op = 1'b1; end 6'b101111: begin - ara_req_d.op = ara_pkg::VFNMSAC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMSAC; + ara_req.use_vd_op = 1'b1; end 6'b110000: begin // VFWADD - ara_req_d.op = ara_pkg::VFADD; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.op = ara_pkg::VFADD; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; end 6'b110001: begin // VFWREDUSUM - ara_req_d.op = ara_pkg::VFWREDUSUM; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.eew_vs1 = vtype_q.vsew.next(); - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VFWREDUSUM; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.eew_vs1 = vtype_q.vsew.next(); + ara_req.cvt_resize = resize_e'(2'b00); end 6'b110010: begin // VFWSUB - ara_req_d.op = ara_pkg::VFSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.op = ara_pkg::VFSUB; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; end 6'b110011: begin // VFWREDOSUM - ara_req_d.op = ara_pkg::VFWREDOSUM; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueReductionZExt; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.eew_vs1 = vtype_q.vsew.next(); - ara_req_d.cvt_resize = resize_e'(2'b00); + ara_req.op = ara_pkg::VFWREDOSUM; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueReductionZExt; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.eew_vs1 = vtype_q.vsew.next(); + ara_req.cvt_resize = resize_e'(2'b00); end 6'b110100: begin // VFWADD.W - ara_req_d.op = ara_pkg::VFADD; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); + ara_req.op = ara_pkg::VFADD; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); lmul_vs2 = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.eew_vs2 = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.eew_vs2 = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; end 6'b110110: begin // VFWSUB.W - ara_req_d.op = ara_pkg::VFSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); + ara_req.op = ara_pkg::VFSUB; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); lmul_vs2 = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.eew_vs2 = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.eew_vs2 = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; end 6'b111000: begin // VFWMUL - ara_req_d.op = ara_pkg::VFMUL; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.op = ara_pkg::VFMUL; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; end 6'b111100: begin // VFWMACC - ara_req_d.op = ara_pkg::VFMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.eew_vd_op = vtype_q.vsew.next(); end 6'b111101: begin // VFWNMACC - ara_req_d.op = ara_pkg::VFNMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFNMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.eew_vd_op = vtype_q.vsew.next(); end 6'b111110: begin // VFWMSAC - ara_req_d.op = ara_pkg::VFMSAC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFMSAC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.eew_vd_op = vtype_q.vsew.next(); end 6'b111111: begin // VFWNMSAC - ara_req_d.op = ara_pkg::VFNMSAC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs1 = OpQueueConversionWideFP2; - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFNMSAC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs1 = OpQueueConversionWideFP2; + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.eew_vd_op = vtype_q.vsew.next(); end default: illegal_insn = 1'b1; endcase @@ -2195,7 +2230,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // can access. The constraints can be different for the two source operands and the // destination register. if (!skip_lmul_checks) begin - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2 : if ((insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4 : if ((insn.varith_type.rd & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8 : if ((insn.varith_type.rd & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -2223,20 +2258,20 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Ara can support 16-bit float, 32-bit float, 64-bit float. // Ara cannot support instructions who operates on more than 64 bits. unique case (FPUSupport) - FPUSupportHalfSingleDouble: if (int'(ara_req_d.vtype.vsew) < int'(EW16) || - int'(ara_req_d.vtype.vsew) > int'(EW64) || int'(ara_req_d.eew_vs2) > int'(EW64)) + FPUSupportHalfSingleDouble: if (int'(ara_req.vtype.vsew) < int'(EW16) || + int'(ara_req.vtype.vsew) > int'(EW64) || int'(ara_req.eew_vs2) > int'(EW64)) illegal_insn = 1'b1; - FPUSupportHalfSingle: if (int'(ara_req_d.vtype.vsew) < int'(EW16) || - int'(ara_req_d.vtype.vsew) > int'(EW32) || int'(ara_req_d.eew_vs2) > int'(EW32)) + FPUSupportHalfSingle: if (int'(ara_req.vtype.vsew) < int'(EW16) || + int'(ara_req.vtype.vsew) > int'(EW32) || int'(ara_req.eew_vs2) > int'(EW32)) illegal_insn = 1'b1; - FPUSupportSingleDouble: if (int'(ara_req_d.vtype.vsew) < int'(EW32) || - int'(ara_req_d.vtype.vsew) > int'(EW64) || int'(ara_req_d.eew_vs2) > int'(EW64)) + FPUSupportSingleDouble: if (int'(ara_req.vtype.vsew) < int'(EW32) || + int'(ara_req.vtype.vsew) > int'(EW64) || int'(ara_req.eew_vs2) > int'(EW64)) illegal_insn = 1'b1; - FPUSupportHalf: if (int'(ara_req_d.vtype.vsew) != int'(EW16) || int'(ara_req_d.eew_vs2) > int'(EW16)) + FPUSupportHalf: if (int'(ara_req.vtype.vsew) != int'(EW16) || int'(ara_req.eew_vs2) > int'(EW16)) illegal_insn = 1'b1; - FPUSupportSingle: if (int'(ara_req_d.vtype.vsew) != int'(EW32) || int'(ara_req_d.eew_vs2) > int'(EW32)) + FPUSupportSingle: if (int'(ara_req.vtype.vsew) != int'(EW32) || int'(ara_req.eew_vs2) > int'(EW32)) illegal_insn = 1'b1; - FPUSupportDouble: if (int'(ara_req_d.vtype.vsew) != int'(EW64) || int'(ara_req_d.eew_vs2) > int'(EW64)) + FPUSupportDouble: if (int'(ara_req.vtype.vsew) != int'(EW64) || int'(ara_req.eew_vs2) > int'(EW64)) illegal_insn = 1'b1; default: illegal_insn = 1'b1; // Unsupported configuration endcase @@ -2249,205 +2284,205 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( OPFVF: begin: opfvf if (FPUSupport != FPUSupportNone) begin // These generate a request to Ara's backend - ara_req_d.scalar_op = acc_req_i.rs1; - ara_req_d.use_scalar_op = 1'b1; - ara_req_d.vs2 = insn.varith_type.rs2; - ara_req_d.use_vs2 = 1'b1; - ara_req_d.vd = insn.varith_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.varith_type.vm; - ara_req_d.is_stride_np2 = is_stride_np2; - ara_req_d.fp_rm = acc_req_i.frm; + ara_req.scalar_op = acc_req_i.rs1; + ara_req.use_scalar_op = 1'b1; + ara_req.vs2 = insn.varith_type.rs2; + ara_req.use_vs2 = 1'b1; + ara_req.vd = insn.varith_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.varith_type.vm; + ara_req.is_stride_np2 = is_stride_np2; + ara_req.fp_rm = acc_req_i.frm; ara_req_valid_d = 1'b1; // Decode based on the func6 field unique case (insn.varith_type.func6) 6'b000000: begin - ara_req_d.op = ara_pkg::VFADD; + ara_req.op = ara_pkg::VFADD; // When performing a floating-point add/sub, fpnew adds the second and the third // operand // So, send the first operand (vs2) to the third result queue - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b000010: begin - ara_req_d.op = ara_pkg::VFSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.op = ara_pkg::VFSUB; + ara_req.swap_vs2_vd_op = 1'b1; end - 6'b000100: ara_req_d.op = ara_pkg::VFMIN; - 6'b000110: ara_req_d.op = ara_pkg::VFMAX; - 6'b001000: ara_req_d.op = ara_pkg::VFSGNJ; - 6'b001001: ara_req_d.op = ara_pkg::VFSGNJN; - 6'b001010: ara_req_d.op = ara_pkg::VFSGNJX; + 6'b000100: ara_req.op = ara_pkg::VFMIN; + 6'b000110: ara_req.op = ara_pkg::VFMAX; + 6'b001000: ara_req.op = ara_pkg::VFSGNJ; + 6'b001001: ara_req.op = ara_pkg::VFSGNJN; + 6'b001010: ara_req.op = ara_pkg::VFSGNJX; 6'b001110: begin // vfslide1up - ara_req_d.op = ara_pkg::VSLIDEUP; - ara_req_d.stride = 1; - ara_req_d.eew_vs2 = vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEUP; + ara_req.stride = 1; + ara_req.eew_vs2 = vtype_q.vsew; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; // If stride > vl, the vslideup has no effects - if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(vl_q)] || - (vlen_t'(ara_req_d.stride) >= vl_q)) null_vslideup = 1'b1; + if (|ara_req.stride[$bits(ara_req.stride)-1:$bits(vl_q)] || + (vlen_t'(ara_req.stride) >= vl_q)) null_vslideup = 1'b1; end 6'b001111: begin // vfslide1down - ara_req_d.op = ara_pkg::VSLIDEDOWN; - ara_req_d.stride = 1; - ara_req_d.eew_vs2 = vtype_q.vsew; + ara_req.op = ara_pkg::VSLIDEDOWN; + ara_req.stride = 1; + ara_req.eew_vs2 = vtype_q.vsew; // Request will need reshuffling - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; end 6'b010000: begin // VRFUNARY0 // vmv.s.f - ara_req_d.op = ara_pkg::VFMVSF; - ara_req_d.use_vs2 = 1'b0; - ara_req_d.vl = |vl_q ? 1 : '0; + ara_req.op = ara_pkg::VFMVSF; + ara_req.use_vs2 = 1'b0; + ara_req.vl = |vl_q ? 1 : '0; // This instruction ignores LMUL checks skip_lmul_checks = 1'b1; end - 6'b010111: ara_req_d.op = ara_pkg::VMERGE; - 6'b011000: ara_req_d.op = ara_pkg::VMFEQ; - 6'b011001: ara_req_d.op = ara_pkg::VMFLE; - 6'b011011: ara_req_d.op = ara_pkg::VMFLT; - 6'b011100: ara_req_d.op = ara_pkg::VMFNE; - 6'b011101: ara_req_d.op = ara_pkg::VMFGT; - 6'b011111: ara_req_d.op = ara_pkg::VMFGE; - 6'b100100: ara_req_d.op = ara_pkg::VFMUL; - 6'b100000: ara_req_d.op = ara_pkg::VFDIV; - 6'b100001: ara_req_d.op = ara_pkg::VFRDIV; + 6'b010111: ara_req.op = ara_pkg::VMERGE; + 6'b011000: ara_req.op = ara_pkg::VMFEQ; + 6'b011001: ara_req.op = ara_pkg::VMFLE; + 6'b011011: ara_req.op = ara_pkg::VMFLT; + 6'b011100: ara_req.op = ara_pkg::VMFNE; + 6'b011101: ara_req.op = ara_pkg::VMFGT; + 6'b011111: ara_req.op = ara_pkg::VMFGE; + 6'b100100: ara_req.op = ara_pkg::VFMUL; + 6'b100000: ara_req.op = ara_pkg::VFDIV; + 6'b100001: ara_req.op = ara_pkg::VFRDIV; 6'b100111: begin - ara_req_d.op = ara_pkg::VFRSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.op = ara_pkg::VFRSUB; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101000: begin - ara_req_d.op = ara_pkg::VFMADD; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMADD; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101001: begin - ara_req_d.op = ara_pkg::VFNMADD; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMADD; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101010: begin - ara_req_d.op = ara_pkg::VFMSUB; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMSUB; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101011: begin - ara_req_d.op = ara_pkg::VFNMSUB; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMSUB; + ara_req.use_vd_op = 1'b1; // Swap "vs2" and "vd" since "vs2" is the addend and "vd" is the multiplicand - ara_req_d.swap_vs2_vd_op = 1'b1; + ara_req.swap_vs2_vd_op = 1'b1; end 6'b101100: begin - ara_req_d.op = ara_pkg::VFMACC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMACC; + ara_req.use_vd_op = 1'b1; end 6'b101101: begin - ara_req_d.op = ara_pkg::VFNMACC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMACC; + ara_req.use_vd_op = 1'b1; end 6'b101110: begin - ara_req_d.op = ara_pkg::VFMSAC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFMSAC; + ara_req.use_vd_op = 1'b1; end 6'b101111: begin - ara_req_d.op = ara_pkg::VFNMSAC; - ara_req_d.use_vd_op = 1'b1; + ara_req.op = ara_pkg::VFNMSAC; + ara_req.use_vd_op = 1'b1; end 6'b110000: begin // VFWADD - ara_req_d.op = ara_pkg::VFADD; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; + ara_req.op = ara_pkg::VFADD; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; end 6'b110010: begin // VFWSUB - ara_req_d.op = ara_pkg::VFSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; + ara_req.op = ara_pkg::VFSUB; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; end 6'b110100: begin // VFWADD.W - ara_req_d.op = ara_pkg::VFADD; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); + ara_req.op = ara_pkg::VFADD; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); lmul_vs2 = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.eew_vs2 = vtype_q.vsew.next(); - ara_req_d.wide_fp_imm = 1'b1; + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.eew_vs2 = vtype_q.vsew.next(); + ara_req.wide_fp_imm = 1'b1; end 6'b110110: begin // VFWSUB.W - ara_req_d.op = ara_pkg::VFSUB; - ara_req_d.swap_vs2_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); + ara_req.op = ara_pkg::VFSUB; + ara_req.swap_vs2_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); lmul_vs2 = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.eew_vs2 = vtype_q.vsew.next(); - ara_req_d.wide_fp_imm = 1'b1; + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.eew_vs2 = vtype_q.vsew.next(); + ara_req.wide_fp_imm = 1'b1; end 6'b111000: begin // VFWMUL - ara_req_d.op = ara_pkg::VFMUL; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; + ara_req.op = ara_pkg::VFMUL; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; end 6'b111100: begin // VFWMACC - ara_req_d.op = ara_pkg::VFMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; + ara_req.eew_vd_op = vtype_q.vsew.next(); end 6'b111101: begin // VFWNMACC - ara_req_d.op = ara_pkg::VFNMACC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFNMACC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; + ara_req.eew_vd_op = vtype_q.vsew.next(); end 6'b111110: begin // VFWMSAC - ara_req_d.op = ara_pkg::VFMSAC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFMSAC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; + ara_req.eew_vd_op = vtype_q.vsew.next(); end 6'b111111: begin // VFWNMSAC - ara_req_d.op = ara_pkg::VFNMSAC; - ara_req_d.use_vd_op = 1'b1; - ara_req_d.emul = next_lmul(vtype_q.vlmul); - ara_req_d.vtype.vsew = vtype_q.vsew.next(); - ara_req_d.conversion_vs2 = OpQueueConversionWideFP2; - ara_req_d.wide_fp_imm = 1'b1; - ara_req_d.eew_vd_op = vtype_q.vsew.next(); + ara_req.op = ara_pkg::VFNMSAC; + ara_req.use_vd_op = 1'b1; + ara_req.emul = next_lmul(vtype_q.vlmul); + ara_req.vtype.vsew = vtype_q.vsew.next(); + ara_req.conversion_vs2 = OpQueueConversionWideFP2; + ara_req.wide_fp_imm = 1'b1; + ara_req.eew_vd_op = vtype_q.vsew.next(); end default: illegal_insn = 1'b1; endcase // Check if the FP scalar operand is NaN-boxed. If not, replace it with a NaN. case (vtype_q.vsew) - EW16: if (~(&acc_req_i.rs1[63:16])) ara_req_d.scalar_op = 64'h0000000000007e00; - EW32: if (~(&acc_req_i.rs1[63:32])) ara_req_d.scalar_op = 64'h000000007fc00000; + EW16: if (~(&acc_req_i.rs1[63:16])) ara_req.scalar_op = 64'h0000000000007e00; + EW32: if (~(&acc_req_i.rs1[63:32])) ara_req.scalar_op = 64'h000000007fc00000; endcase // Instructions with an integer LMUL have extra constraints on the registers they // can access. The constraints can be different for the two source operands and the // destination register. if (!skip_lmul_checks) begin - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2 : if ((insn.varith_type.rd & 5'b00001) != 5'b00000) illegal_insn = 1'b1; LMUL_4 : if ((insn.varith_type.rd & 5'b00011) != 5'b00000) illegal_insn = 1'b1; LMUL_8 : if ((insn.varith_type.rd & 5'b00111) != 5'b00000) illegal_insn = 1'b1; @@ -2466,16 +2501,16 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Ara can support 16-bit float, 32-bit float, 64-bit float. // Ara cannot support instructions who operates on more than 64 bits. unique case (FPUSupport) - FPUSupportHalfSingleDouble: if (int'(ara_req_d.vtype.vsew) < int'(EW16) || - int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; - FPUSupportHalfSingle: if (int'(ara_req_d.vtype.vsew) < int'(EW16) || - int'(ara_req_d.vtype.vsew) > int'(EW32)) illegal_insn = 1'b1; - FPUSupportSingleDouble: if (int'(ara_req_d.vtype.vsew) < int'(EW32) || - int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; - FPUSupportHalf: if (int'(ara_req_d.vtype.vsew) != int'(EW16)) illegal_insn = 1'b1; - FPUSupportSingle: if (int'(ara_req_d.vtype.vsew) != int'(EW32)) + FPUSupportHalfSingleDouble: if (int'(ara_req.vtype.vsew) < int'(EW16) || + int'(ara_req.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; + FPUSupportHalfSingle: if (int'(ara_req.vtype.vsew) < int'(EW16) || + int'(ara_req.vtype.vsew) > int'(EW32)) illegal_insn = 1'b1; + FPUSupportSingleDouble: if (int'(ara_req.vtype.vsew) < int'(EW32) || + int'(ara_req.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1; + FPUSupportHalf: if (int'(ara_req.vtype.vsew) != int'(EW16)) illegal_insn = 1'b1; + FPUSupportSingle: if (int'(ara_req.vtype.vsew) != int'(EW32)) illegal_insn = 1'b1; - FPUSupportDouble: if (int'(ara_req_d.vtype.vsew) != int'(EW64)) + FPUSupportDouble: if (int'(ara_req.vtype.vsew) != int'(EW64)) illegal_insn = 1'b1; default: illegal_insn = 1'b1; // Unsupported configuration endcase @@ -2502,10 +2537,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( acc_req_ready_o = 1'b0; // These generate a request to Ara's backend - ara_req_d.vd = insn.vmem_type.rd; - ara_req_d.use_vd = 1'b1; - ara_req_d.vm = insn.vmem_type.vm; - ara_req_d.scalar_op = acc_req_i.rs1; + ara_req.vd = insn.vmem_type.rd; + ara_req.use_vd = 1'b1; + ara_req.vm = insn.vmem_type.vm; + ara_req.scalar_op = acc_req_i.rs1; + ara_req.nf = insn.vmem_type.nf; ara_req_valid_d = 1'b1; // Decode the element width @@ -2513,34 +2549,34 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( unique case ({insn.vmem_type.mew, insn.vmem_type.width}) 4'b0000: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW8; + ara_req.vtype.vsew = EW8; end else begin - ara_req_d.vtype.vsew = vtype_q.vsew; - ara_req_d.eew_vs2 = EW8; + ara_req.vtype.vsew = vtype_q.vsew; + ara_req.eew_vs2 = EW8; end end 4'b0101: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW16; + ara_req.vtype.vsew = EW16; end else begin - ara_req_d.vtype.vsew = vtype_q.vsew; - ara_req_d.eew_vs2 = EW16; + ara_req.vtype.vsew = vtype_q.vsew; + ara_req.eew_vs2 = EW16; end end 4'b0110: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW32; + ara_req.vtype.vsew = EW32; end else begin - ara_req_d.vtype.vsew = vtype_q.vsew; - ara_req_d.eew_vs2 = EW32; + ara_req.vtype.vsew = vtype_q.vsew; + ara_req.eew_vs2 = EW32; end end 4'b0111: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW64; + ara_req.vtype.vsew = EW64; end else begin - ara_req_d.vtype.vsew = vtype_q.vsew; - ara_req_d.eew_vs2 = EW64; + ara_req.vtype.vsew = vtype_q.vsew; + ara_req.eew_vs2 = EW64; end end default: begin // Invalid. Element is too wide, or encoding is non-existant. @@ -2554,7 +2590,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Decode the addressing mode unique case (insn.vmem_type.mop) 2'b00: begin - ara_req_d.op = VLE; + ara_req.op = VLE; // Decode the lumop field case (insn.vmem_type.rs2) @@ -2562,8 +2598,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( 5'b01000:; // Unit-strided, whole registers 5'b01011: begin // Unit-strided, mask load, EEW=1 // We operate ceil(vl/8) bytes - ara_req_d.vl = (vl_q >> 3) + |vl_q[2:0]; - ara_req_d.vtype.vsew = EW8; + ara_req.vl = (vl_q >> 3) + |vl_q[2:0]; + ara_req.vtype.vsew = EW8; end 5'b10000: begin // Unit-strided, fault-only first // TODO: Not implemented @@ -2579,29 +2615,29 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 2'b10: begin - ara_req_d.op = VLSE; - ara_req_d.stride = acc_req_i.rs2; + ara_req.op = VLSE; + ara_req.stride = acc_req_i.rs2; end 2'b01, // Indexed-unordered 2'b11: begin // Indexed-ordered - ara_req_d.op = VLXE; + ara_req.op = VLXE; // These also read vs2 - ara_req_d.vs2 = insn.vmem_type.rs2; - ara_req_d.use_vs2 = 1'b1; + ara_req.vs2 = insn.vmem_type.rs2; + ara_req.use_vs2 = 1'b1; end default:; endcase // For memory operations: EMUL = LMUL * (EEW / SEW) // EEW is encoded in the instruction - ara_req_d.emul = vlmul_e'(vtype_q.vlmul + (ara_req_d.vtype.vsew - vtype_q.vsew)); + ara_req.emul = vlmul_e'(vtype_q.vlmul + (ara_req.vtype.vsew - vtype_q.vsew)); // Exception if EMUL > 8 or < 1/8 - unique case ({vtype_q.vlmul[2], ara_req_d.emul[2]}) + unique case ({vtype_q.vlmul[2], ara_req.emul[2]}) // The new emul is lower than the previous lmul 2'b01: begin // But the new eew is greater than vsew - if (signed'(ara_req_d.vtype.vsew - vtype_q.vsew) > 0) begin + if (signed'(ara_req.vtype.vsew - vtype_q.vsew) > 0) begin illegal_insn = 1'b1; acc_resp_valid_o = 1'b1; end @@ -2609,7 +2645,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // The new emul is greater than the previous lmul 2'b10: begin // But the new eew is lower than vsew - if (signed'(ara_req_d.vtype.vsew - vtype_q.vsew) < 0) begin + if (signed'(ara_req.vtype.vsew - vtype_q.vsew) < 0) begin illegal_insn = 1'b1; acc_resp_valid_o = 1'b1; end @@ -2619,7 +2655,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Instructions with an integer LMUL have extra constraints on the registers they can // access. - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rd & 5'b00001) != 5'b00000) begin illegal_insn = 1'b1; acc_resp_valid_o = 1'b1; @@ -2639,8 +2675,35 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( default:; endcase + // Check for segment loads + if (ara_req.nf != 3'b000) begin + // This is a segment load instruction + is_segment_mem_op = 1'b1; + // Wait for idle not to mess with load/store_complete_i + // since the segment sequencer filters these signals + if (!segment_micro_op_on) state_d = WAIT_IDLE; + // Check that EMUL * NFIELDS <= 8 + if (!ara_req.emul[2]) begin + // emul >= 1 + if ((ara_req.nf << ara_req.emul[1:0]) > 8) + illegal_insn = 1'b1; + end else begin + // emul < 1 + if ((ara_req.nf >> ara_req.emul[1:0]) > 8) + illegal_insn = 1'b1; + end + // Check if we will not access vector regs past 31 + if (!ara_req.emul[2]) begin + if ((ara_req.vd + (ara_req.nf << ara_req.emul[1:0])) > 5'b11111) + illegal_insn = 1'b1; + end else begin + if ((ara_req.vd + ara_req.nf) > 5'b11111) + illegal_insn = 1'b1; + end + end + // Vector whole register loads overwrite all the other decoding information. - if (ara_req_d.op == VLE && insn.vmem_type.rs2 == 5'b01000) begin + if (ara_req.op == VLE && insn.vmem_type.rs2 == 5'b01000) begin // Execute also if vl == 0 ignore_zero_vl_check = 1'b1; // The LMUL value is kept in the instruction itself @@ -2650,23 +2713,23 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( ara_req_valid_d = 1'b1; // Maximum vector length. VLMAX = nf * VLEN / EW8. - ara_req_d.vtype.vsew = EW8; + ara_req.vtype.vsew = EW8; unique case (insn.vmem_type.nf) 3'd0: begin - ara_req_d.vl = VLENB << 0; - ara_req_d.emul = LMUL_1; + ara_req.vl = VLENB << 0; + ara_req.emul = LMUL_1; end 3'd1: begin - ara_req_d.vl = VLENB << 1; - ara_req_d.emul = LMUL_2; + ara_req.vl = VLENB << 1; + ara_req.emul = LMUL_2; end 3'd3: begin - ara_req_d.vl = VLENB << 2; - ara_req_d.emul = LMUL_4; + ara_req.vl = VLENB << 2; + ara_req.emul = LMUL_4; end 3'd7: begin - ara_req_d.vl = VLENB << 3; - ara_req_d.emul = LMUL_8; + ara_req.vl = VLENB << 3; + ara_req.emul = LMUL_8; end default: begin // Trigger an error for the reserved simm values @@ -2676,14 +2739,14 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end // Wait until the back-end answers to acknowledge those instructions - if (ara_resp_valid_i) begin + if (ara_resp_valid) begin acc_req_ready_o = 1'b1; - acc_resp_o.error = ara_resp_i.error; + acc_resp_o.error = ara_resp.error; acc_resp_valid_o = 1'b1; ara_req_valid_d = 1'b0; // In case of error, modify vstart - if (ara_resp_i.error) - vstart_d = ara_resp_i.error_vl; + if (ara_resp.error) + vstart_d = ara_resp.error_vl; end end @@ -2692,8 +2755,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( ///////////////////// // Vector stores encode: - // - The target EEW in ara_req_d.vtype.vsew - // - The EEW of the source register in ara_req_d.eew_vs1 + // - The target EEW in ara_req.vtype.vsew + // - The EEW of the source register in ara_req.eew_vs1 // The current vector length refers to the target EEW! // Vector stores never re-shuffle the source register! @@ -2711,14 +2774,15 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Ara does not reshuffle source vregs upon vector stores, // thus the operand requesters will fetch Bytes referring // to the encoding of the source register - ara_req_d.scale_vl = 1'b1; + ara_req.scale_vl = 1'b1; // These generate a request to Ara's backend - ara_req_d.vs1 = insn.vmem_type.rd; // vs3 is encoded in the same position as rd - ara_req_d.use_vs1 = 1'b1; - ara_req_d.eew_vs1 = eew_q[insn.vmem_type.rd]; // This is the vs1 EEW - ara_req_d.vm = insn.vmem_type.vm; - ara_req_d.scalar_op = acc_req_i.rs1; + ara_req.vs1 = insn.vmem_type.rd; // vs3 is encoded in the same position as rd + ara_req.use_vs1 = 1'b1; + ara_req.eew_vs1 = eew_q[insn.vmem_type.rd]; // This is the vs1 EEW + ara_req.vm = insn.vmem_type.vm; + ara_req.nf = insn.vmem_type.nf; + ara_req.scalar_op = acc_req_i.rs1; ara_req_valid_d = 1'b1; // Decode the element width @@ -2726,34 +2790,34 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( unique case ({insn.vmem_type.mew, insn.vmem_type.width}) 4'b0000: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW8; // ara_req_d.vtype.vsew is the target EEW! + ara_req.vtype.vsew = EW8; // ara_req.vtype.vsew is the target EEW! end else begin - ara_req_d.vtype.vsew = vtype_q.vsew; - ara_req_d.eew_vs2 = EW8; + ara_req.vtype.vsew = vtype_q.vsew; + ara_req.eew_vs2 = EW8; end end 4'b0101: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW16; + ara_req.vtype.vsew = EW16; end else begin - ara_req_d.vtype.vsew = vtype_q.vsew; - ara_req_d.eew_vs2 = EW16; + ara_req.vtype.vsew = vtype_q.vsew; + ara_req.eew_vs2 = EW16; end end 4'b0110: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW32; + ara_req.vtype.vsew = EW32; end else begin - ara_req_d.vtype.vsew = vtype_q.vsew; - ara_req_d.eew_vs2 = EW32; + ara_req.vtype.vsew = vtype_q.vsew; + ara_req.eew_vs2 = EW32; end end 4'b0111: begin if (insn.vmem_type.mop != 2'b01 && insn.vmem_type.mop != 2'b11) begin - ara_req_d.vtype.vsew = EW64; + ara_req.vtype.vsew = EW64; end else begin - ara_req_d.vtype.vsew = vtype_q.vsew; - ara_req_d.eew_vs2 = EW64; + ara_req.vtype.vsew = vtype_q.vsew; + ara_req.eew_vs2 = EW64; end end default: begin // Invalid. Element is too wide, or encoding is non-existant. @@ -2767,7 +2831,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Decode the addressing mode unique case (insn.vmem_type.mop) 2'b00: begin - ara_req_d.op = VSE; + ara_req.op = VSE; // Decode the sumop field unique case (insn.vmem_type.rs2) @@ -2775,8 +2839,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( 5'b01000:; // Unit-strided, whole registers 5'b01011: begin // Unit-strided, mask store, EEW=1 // We operate ceil(vl/8) bytes - ara_req_d.vl = (vl_q >> 3) + |vl_q[2:0]; - ara_req_d.vtype.vsew = EW8; + ara_req.vl = (vl_q >> 3) + |vl_q[2:0]; + ara_req.vtype.vsew = EW8; end default: begin // Reserved illegal_insn = 1'b1; @@ -2786,29 +2850,29 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase end 2'b10: begin - ara_req_d.op = VSSE; - ara_req_d.stride = acc_req_i.rs2; + ara_req.op = VSSE; + ara_req.stride = acc_req_i.rs2; end 2'b01, // Indexed-unordered 2'b11: begin // Indexed-orderd - ara_req_d.op = VSXE; + ara_req.op = VSXE; // These also read vs2 - ara_req_d.vs2 = insn.vmem_type.rs2; - ara_req_d.use_vs2 = 1'b1; + ara_req.vs2 = insn.vmem_type.rs2; + ara_req.use_vs2 = 1'b1; end default:; endcase // For memory operations: EMUL = LMUL * (EEW / SEW) // EEW is encoded in the instruction - ara_req_d.emul = vlmul_e'(vtype_q.vlmul + (ara_req_d.vtype.vsew - vtype_q.vsew)); + ara_req.emul = vlmul_e'(vtype_q.vlmul + (ara_req.vtype.vsew - vtype_q.vsew)); // Exception if EMUL > 8 or < 1/8 - unique case ({vtype_q.vlmul[2], ara_req_d.emul[2]}) + unique case ({vtype_q.vlmul[2], ara_req.emul[2]}) // The new emul is lower than the previous lmul 2'b01: begin // But the new eew is greater than vsew - if (signed'(ara_req_d.vtype.vsew - vtype_q.vsew) > 0) begin + if (signed'(ara_req.vtype.vsew - vtype_q.vsew) > 0) begin illegal_insn = 1'b1; acc_resp_valid_o = 1'b1; end @@ -2816,7 +2880,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // The new emul is greater than the previous lmul 2'b10: begin // But the new eew is lower than vsew - if (signed'(ara_req_d.vtype.vsew - vtype_q.vsew) < 0) begin + if (signed'(ara_req.vtype.vsew - vtype_q.vsew) < 0) begin illegal_insn = 1'b1; acc_resp_valid_o = 1'b1; end @@ -2826,7 +2890,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Instructions with an integer LMUL have extra constraints on the registers they can // access. - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: if ((insn.varith_type.rd & 5'b00001) != 5'b00000) begin illegal_insn = 1'b1; acc_resp_valid_o = 1'b1; @@ -2846,30 +2910,57 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( default:; endcase + // Check for segment stores + if (ara_req.nf != 3'b000) begin + // This is a segment store instruction + is_segment_mem_op = 1'b1; + // Wait for idle not to mess with load/store_complete_i + // since the segment sequencer filters these signals + if (!segment_micro_op_on) state_d = WAIT_IDLE; + // Check that EMUL * NFIELDS <= 8 + if (!ara_req.emul[2]) begin + // emul >= 1 + if ((ara_req.nf << ara_req.emul[1:0]) > 8) + illegal_insn = 1'b1; + end else begin + // emul < 1 + if ((ara_req.nf >> ara_req.emul[1:0]) > 8) + illegal_insn = 1'b1; + end + // Check if we will not access vector regs past 31 + if (!ara_req.emul[2]) begin + if ((ara_req.vd + (ara_req.nf << ara_req.emul[1:0])) > 5'b11111) + illegal_insn = 1'b1; + end else begin + if ((ara_req.vd + ara_req.nf) > 5'b11111) + illegal_insn = 1'b1; + end + end + // Vector whole register stores are encoded as stores of length VLENB, length // multiplier LMUL_1 and element width EW8. They overwrite all this decoding. - if (ara_req_d.op == VSE && insn.vmem_type.rs2 == 5'b01000) begin + if (ara_req.op == VSE && insn.vmem_type.rs2 == 5'b01000) begin // Execute also if vl == 0 ignore_zero_vl_check = 1'b1; // Maximum vector length. VLMAX = nf * VLEN / EW8. - ara_req_d.vtype.vsew = EW8; + ara_req.vtype.vsew = EW8; unique case (insn.vmem_type.nf) 3'd0: begin - ara_req_d.vl = VLENB << 0; - ara_req_d.emul = LMUL_1; + ara_req.vl = VLENB << 0; + ara_req.emul = LMUL_1; end 3'd1: begin - ara_req_d.vl = VLENB << 1; - ara_req_d.emul = LMUL_2; + ara_req.vl = VLENB << 1; + ara_req.emul = LMUL_2; end 3'd3: begin - ara_req_d.vl = VLENB << 2; - ara_req_d.emul = LMUL_4; + ara_req.vl = VLENB << 2; + ara_req.emul = LMUL_4; end 3'd7: begin - ara_req_d.vl = VLENB << 3; - ara_req_d.emul = LMUL_8; + ara_req.vl = VLENB << 3; + ara_req.emul = LMUL_8; end default: begin // Trigger an error for the reserved simm values @@ -2884,14 +2975,14 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end // Wait until the back-end answers to acknowledge those instructions - if (ara_resp_valid_i) begin + if (ara_resp_valid) begin acc_req_ready_o = 1'b1; - acc_resp_o.error = ara_resp_i.error; + acc_resp_o.error = ara_resp.error; acc_resp_valid_o = 1'b1; ara_req_valid_d = 1'b0; // If there is an error, change vstart - if (ara_resp_i.error) - vstart_d = ara_resp_i.error_vl; + if (ara_resp.error) + vstart_d = ara_resp.error_vl; end end @@ -3084,11 +3175,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Check that we have fixed-point support if requested // vxsat and vxrm are always accessible anyway - if (ara_req_valid_d && (ara_req_d.op inside {[VSADDU:VNCLIPU], VSMUL}) && (FixPtSupport == FixedPointDisable)) + if (ara_req_valid_d && (ara_req.op inside {[VSADDU:VNCLIPU], VSMUL}) && (FixPtSupport == FixedPointDisable)) illegal_insn = 1'b1; // Check that we have we have vfrec7, vfrsqrt7 - if (ara_req_valid_d && (ara_req_d.op inside {VFREC7, VFRSQRT7}) && (FPExtSupport == FPExtSupportDisable)) + if (ara_req_valid_d && (ara_req.op inside {VFREC7, VFRSQRT7}) && (FPExtSupport == FPExtSupportDisable)) illegal_insn = 1'b1; // Check if we need to reshuffle our vector registers involved in the operation @@ -3097,31 +3188,31 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( automatic rvv_instruction_t insn = rvv_instruction_t'(acc_req_i.insn.instr); // Is the instruction an in-lane one and could it be subject to reshuffling? - in_lane_op = ara_req_d.op inside {[VADD:VMERGE]} || ara_req_d.op inside {[VREDSUM:VMSBC]} || - ara_req_d.op inside {[VMANDNOT:VMXNOR]} || ara_req_d.op inside {VSLIDEUP, VSLIDEDOWN}; + in_lane_op = ara_req.op inside {[VADD:VMERGE]} || ara_req.op inside {[VREDSUM:VMSBC]} || + ara_req.op inside {[VMANDNOT:VMXNOR]} || ara_req.op inside {VSLIDEUP, VSLIDEDOWN}; // Annotate which registers need a reshuffle -> |vs1|vs2|vd| // Optimization: reshuffle vs1 and vs2 only if the operation is strictly in-lane // Optimization: reshuffle vd only if we are not overwriting the whole vector register! - reshuffle_req_d = {ara_req_d.use_vs1 && (ara_req_d.eew_vs1 != eew_q[ara_req_d.vs1]) && eew_valid_q[ara_req_d.vs1] && in_lane_op, - ara_req_d.use_vs2 && (ara_req_d.eew_vs2 != eew_q[ara_req_d.vs2]) && eew_valid_q[ara_req_d.vs2] && in_lane_op, - ara_req_d.use_vd && (ara_req_d.vtype.vsew != eew_q[ara_req_d.vd ]) && eew_valid_q[ara_req_d.vd ] && vl_q != (VLENB >> ara_req_d.vtype.vsew)}; + reshuffle_req_d = {ara_req.use_vs1 && (ara_req.eew_vs1 != eew_q[ara_req.vs1]) && eew_valid_q[ara_req.vs1] && in_lane_op, + ara_req.use_vs2 && (ara_req.eew_vs2 != eew_q[ara_req.vs2]) && eew_valid_q[ara_req.vs2] && in_lane_op, + ara_req.use_vd && (ara_req.vtype.vsew != eew_q[ara_req.vd ]) && eew_valid_q[ara_req.vd ] && vl_q != (VLENB >> ara_req.vtype.vsew)}; // Prepare the information to reshuffle the vector registers during the next cycles // Reshuffle in the following order: vd, v2, v1. The order is arbitrary. unique casez (reshuffle_req_d) 3'b??1: begin eew_old_buffer_d = eew_q[insn.vmem_type.rd]; - eew_new_buffer_d = ara_req_d.vtype.vsew; + eew_new_buffer_d = ara_req.vtype.vsew; vs_buffer_d = insn.varith_type.rd; end 3'b?10: begin eew_old_buffer_d = eew_q[insn.vmem_type.rs2]; - eew_new_buffer_d = ara_req_d.eew_vs2; + eew_new_buffer_d = ara_req.eew_vs2; vs_buffer_d = insn.varith_type.rs2; end 3'b100: begin eew_old_buffer_d = eew_q[insn.vmem_type.rs1]; - eew_new_buffer_d = ara_req_d.eew_vs1; + eew_new_buffer_d = ara_req.eew_vs1; vs_buffer_d = insn.varith_type.rs1; end default:; @@ -3139,7 +3230,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( ara_req_valid_d = 1'b0; // Initialize the reshuffle counter limit to handle LMUL > 1 - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_2: rs_lmul_cnt_limit_d = 1; LMUL_4: rs_lmul_cnt_limit_d = 3; LMUL_8: rs_lmul_cnt_limit_d = 7; @@ -3159,35 +3250,35 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Update the EEW if (ara_req_valid_d && ara_req_d.use_vd && ara_req_ready_i) begin - unique case (ara_req_d.emul) + unique case (ara_req.emul) LMUL_1: begin for (int i = 0; i < 1; i++) begin - eew_d[ara_req_d.vd + i] = ara_req_d.vtype.vsew; - eew_valid_d[ara_req_d.vd + i] = 1'b1; + eew_d[ara_req.vd + i] = ara_req.vtype.vsew; + eew_valid_d[ara_req.vd + i] = 1'b1; end end LMUL_2: begin for (int i = 0; i < 2; i++) begin - eew_d[ara_req_d.vd + i] = ara_req_d.vtype.vsew; - eew_valid_d[ara_req_d.vd + i] = 1'b1; + eew_d[ara_req.vd + i] = ara_req.vtype.vsew; + eew_valid_d[ara_req.vd + i] = 1'b1; end end LMUL_4: begin for (int i = 0; i < 4; i++) begin - eew_d[ara_req_d.vd + i] = ara_req_d.vtype.vsew; - eew_valid_d[ara_req_d.vd + i] = 1'b1; + eew_d[ara_req.vd + i] = ara_req.vtype.vsew; + eew_valid_d[ara_req.vd + i] = 1'b1; end end LMUL_8: begin for (int i = 0; i < 8; i++) begin - eew_d[ara_req_d.vd + i] = ara_req_d.vtype.vsew; - eew_valid_d[ara_req_d.vd + i] = 1'b1; + eew_d[ara_req.vd + i] = ara_req.vtype.vsew; + eew_valid_d[ara_req.vd + i] = 1'b1; end end default: begin // EMUL < 1 for (int i = 0; i < 1; i++) begin - eew_d[ara_req_d.vd + i] = ara_req_d.vtype.vsew; - eew_valid_d[ara_req_d.vd + i] = 1'b1; + eew_d[ara_req.vd + i] = ara_req.vtype.vsew; + eew_valid_d[ara_req.vd + i] = 1'b1; end end endcase @@ -3212,7 +3303,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( acc_resp_o.store_complete = store_zero_vl | store_complete_q; // The token must change at every new instruction - ara_req_d.token = (ara_req_valid_o && ara_req_ready_i) ? ~ara_req_o.token : ara_req_o.token; + ara_req.token = (ara_req_valid_o && ara_req_ready_i) ? ~ara_req_o.token : ara_req_o.token; end: p_decoder endmodule : ara_dispatcher diff --git a/hardware/src/ara_soc.sv b/hardware/src/ara_soc.sv index f65d37160..4e8347410 100644 --- a/hardware/src/ara_soc.sv +++ b/hardware/src/ara_soc.sv @@ -13,6 +13,8 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #( parameter fpu_support_e FPUSupport = FPUSupportHalfSingleDouble, // External support for vfrec7, vfrsqrt7 parameter fpext_support_e FPExtSupport = FPExtSupportEnable, + // Support for segment memory operations + parameter seg_support_e SegSupport = SegSupportEnable, // Support for fixed-point data types parameter fixpt_support_e FixPtSupport = FixedPointEnable, // AXI Interface @@ -469,6 +471,7 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #( .NrLanes (NrLanes ), .FPUSupport (FPUSupport ), .FPExtSupport (FPExtSupport ), + .SegSupport (SegSupport ), .FixPtSupport (FixPtSupport ), .ArianeCfg (ArianeAraConfig ), .AxiAddrWidth (AxiAddrWidth ), diff --git a/hardware/src/ara_system.sv b/hardware/src/ara_system.sv index f8c32d44e..bca901970 100644 --- a/hardware/src/ara_system.sv +++ b/hardware/src/ara_system.sv @@ -13,6 +13,8 @@ module ara_system import axi_pkg::*; import ara_pkg::*; #( parameter fpu_support_e FPUSupport = FPUSupportHalfSingleDouble, // External support for vfrec7, vfrsqrt7 parameter fpext_support_e FPExtSupport = FPExtSupportEnable, + // Support for segment memory operations + parameter seg_support_e SegSupport = SegSupportEnable, // Support for fixed-point data types parameter fixpt_support_e FixPtSupport = FixedPointEnable, // Ariane configuration @@ -194,6 +196,7 @@ module ara_system import axi_pkg::*; import ara_pkg::*; #( .NrLanes (NrLanes ), .FPUSupport (FPUSupport ), .FPExtSupport(FPExtSupport ), + .SegSupport (SegSupport ), .FixPtSupport(FixPtSupport ), .AxiDataWidth(AxiWideDataWidth), .AxiAddrWidth(AxiAddrWidth ),