From 6417d0da2fee00c59460432e75169ba7b1b53055 Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Wed, 11 Dec 2024 18:09:37 +0800 Subject: [PATCH] [rtl] duplicate v0 in lsu. --- t1/src/Bundles.scala | 6 ++-- t1/src/Lane.scala | 64 +++++++++++++++++++------------------- t1/src/T1.scala | 11 +++---- t1/src/lsu/LSU.scala | 34 ++++++++++++++------ t1/src/mask/MaskUnit.scala | 13 ++------ 5 files changed, 67 insertions(+), 61 deletions(-) diff --git a/t1/src/Bundles.scala b/t1/src/Bundles.scala index 5dd255384..d59a97d75 100644 --- a/t1/src/Bundles.scala +++ b/t1/src/Bundles.scala @@ -360,9 +360,9 @@ class LaneResponseFeedback(param: LaneParameter) extends Bundle { val complete: Bool = Bool() } -class V0Update(param: LaneParameter) extends Bundle { - val data: UInt = UInt(param.datapathWidth.W) - val offset: UInt = UInt(param.vrfOffsetBits.W) +class V0Update(datapathWidth: Int, vrfOffsetBits: Int) extends Bundle { + val data: UInt = UInt(datapathWidth.W) + val offset: UInt = UInt(vrfOffsetBits.W) // mask/ld类型的有可能不会写完整的32bit val mask: UInt = UInt(4.W) } diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index 0f241c091..5d5e5f08f 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -276,7 +276,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ /** V0 update in the lane should also update [[T1.v0]] */ @public - val v0Update: ValidIO[V0Update] = IO(Valid(new V0Update(parameter))) + val v0Update: ValidIO[V0Update] = IO(Valid(new V0Update(parameter.datapathWidth, parameter.vrfOffsetBits))) /** input of mask data */ @public @@ -371,33 +371,33 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ // todo: mv to bundle.scala class MaskControl(parameter: LaneParameter) extends Bundle { - val index: UInt = UInt(parameter.instructionIndexBits.W) - val sew: UInt = UInt(2.W) - val maskData: UInt = UInt(parameter.datapathWidth.W) - val group: UInt = UInt(parameter.maskGroupSizeBits.W) - val dataValid: Bool = Bool() + val index: UInt = UInt(parameter.instructionIndexBits.W) + val sew: UInt = UInt(2.W) + val maskData: UInt = UInt(parameter.datapathWidth.W) + val group: UInt = UInt(parameter.maskGroupSizeBits.W) + val dataValid: Bool = Bool() val waiteResponse: Bool = Bool() - val controlValid: Bool = Bool() + val controlValid: Bool = Bool() } val maskControlRelease: Vec[ValidIO[UInt]] = Wire(Vec(parameter.chainingSize, Valid(UInt(parameter.instructionIndexBits.W)))) - val maskControlEnq: UInt = Wire(UInt(parameter.chainingSize.W)) - val maskControlDataDeq: UInt = Wire(UInt(parameter.chainingSize.W)) - val maskControlReq: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool())) - val maskControlReqSelect: UInt = ffo(maskControlReq.asUInt) + val maskControlEnq: UInt = Wire(UInt(parameter.chainingSize.W)) + val maskControlDataDeq: UInt = Wire(UInt(parameter.chainingSize.W)) + val maskControlReq: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool())) + val maskControlReqSelect: UInt = ffo(maskControlReq.asUInt) // mask request & response handle - val maskControlVec: Seq[MaskControl] = Seq.tabulate(parameter.chainingSize) { index => + val maskControlVec: Seq[MaskControl] = Seq.tabulate(parameter.chainingSize) { index => val state = RegInit(0.U.asTypeOf(new MaskControl(parameter))) val releaseHit: Bool = maskControlRelease.map(r => r.valid && (r.bits === state.index)).reduce(_ || _) val responseFire = Pipe(maskControlReqSelect(index), 0.U.asTypeOf(new EmptyBundle), parameter.maskRequestLatency).valid when(maskControlEnq(index)) { - state := 0.U.asTypeOf(state) - state.index := laneRequest.bits.instructionIndex - state.sew := laneRequest.bits.csrInterface.vSew + state := 0.U.asTypeOf(state) + state.index := laneRequest.bits.instructionIndex + state.sew := laneRequest.bits.csrInterface.vSew state.controlValid := true.B } @@ -410,13 +410,13 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ maskControlReq(index) := state.controlValid && !state.dataValid && !state.waiteResponse when(maskControlReqSelect(index)) { state.waiteResponse := true.B - state.group := state.group + 1.U + state.group := state.group + 1.U } when(responseFire) { - state.dataValid := true.B + state.dataValid := true.B state.waiteResponse := false.B - state.maskData := maskInput + state.maskData := maskInput } when(maskControlDataDeq(index)) { @@ -425,8 +425,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ state } - val maskControlFree: Seq[Bool] = maskControlVec.map(s => !s.controlValid && !s.waiteResponse) - val freeSelect: UInt = ffo(VecInit(maskControlFree).asUInt) + val maskControlFree: Seq[Bool] = maskControlVec.map(s => !s.controlValid && !s.waiteResponse) + val freeSelect: UInt = ffo(VecInit(maskControlFree).asUInt) maskControlEnq := maskAnd(laneRequest.fire && laneRequest.bits.mask, freeSelect) /** for each slot, assert when it is asking [[T1]] to change mask */ @@ -439,7 +439,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ /** which slot wins the arbitration for requesting mask. */ val maskRequestFireOH: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool())) - val maskDataVec: Vec[UInt] = Wire(Vec(parameter.chainingSize, UInt(parameter.maskGroupWidth.W))) + val maskDataVec: Vec[UInt] = Wire(Vec(parameter.chainingSize, UInt(parameter.maskGroupWidth.W))) /** FSM control for each slot. if index == 0, * - slot can support write v0 in mask type, see [[Decoder.maskDestination]] [[Decoder.maskSource]] @@ -663,14 +663,14 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ } maskControlRelease(index).valid := false.B - maskControlRelease(index).bits := record.laneRequest.instructionIndex + maskControlRelease(index).bits := record.laneRequest.instructionIndex // update lane state when(stage0.enqueue.fire) { maskGroupCountVec(index) := stage0.updateLaneState.maskGroupCount // todo: handle all elements in first group are masked maskIndexVec(index) := stage0.updateLaneState.maskIndex when(stage0.updateLaneState.outOfExecutionRange) { - slotOccupied(index) := false.B + slotOccupied(index) := false.B maskControlRelease(index).valid := true.B } } @@ -962,19 +962,19 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ } { - maskSelect := Mux1H(maskControlReqSelect, maskControlVec.map(_.group)) - maskSelectSew := Mux1H(maskControlReqSelect, maskControlVec.map(_.sew)) + maskSelect := Mux1H(maskControlReqSelect, maskControlVec.map(_.group)) + maskSelectSew := Mux1H(maskControlReqSelect, maskControlVec.map(_.sew)) maskControlDataDeq := slotMaskRequestVec.zipWithIndex.map { case (req, index) => - val slotIndex = slotControl(index).laneRequest.instructionIndex - val hitMaskControl = VecInit(maskControlVec.map(_.index === slotIndex)).asUInt - val dataValid = Mux1H(hitMaskControl, maskControlVec.map(_.dataValid)) - val data = Mux1H(hitMaskControl, maskControlVec.map(_.maskData)) - val group = Mux1H(hitMaskControl, maskControlVec.map(_.group)) - val sameGroup = group === req.bits + val slotIndex = slotControl(index).laneRequest.instructionIndex + val hitMaskControl = VecInit(maskControlVec.map(_.index === slotIndex)).asUInt + val dataValid = Mux1H(hitMaskControl, maskControlVec.map(_.dataValid)) + val data = Mux1H(hitMaskControl, maskControlVec.map(_.maskData)) + val group = Mux1H(hitMaskControl, maskControlVec.map(_.group)) + val sameGroup = group === req.bits dontTouch(sameGroup) val maskRequestFire = req.valid && dataValid maskRequestFireOH(index) := maskRequestFire - maskDataVec(index) := data + maskDataVec(index) := data maskAnd(maskRequestFire, hitMaskControl).asUInt }.reduce(_ | _) } diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 0418d056a..0d71bf6a5 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -777,6 +777,7 @@ class T1(val parameter: T1Parameter) maskUnit.io.laneMaskSelect(index) := Pipe(true.B, lane.maskSelect, parameter.maskRequestLatency).bits maskUnit.io.laneMaskSewSelect(index) := Pipe(true.B, lane.maskSelectSew, parameter.maskRequestLatency).bits maskUnit.io.v0UpdateVec(index) <> lane.v0Update + lsu.v0UpdateVec(index) <> lane.v0Update lane.lsuLastReport := lsu.lastReport | maskUnit.io.lastReport @@ -811,12 +812,10 @@ class T1(val parameter: T1Parameter) lsu.request.bits.instructionInformation.isStore := isStoreType lsu.request.bits.instructionInformation.maskedLoadStore := maskType - maskUnit.io.lsuMaskSelect := lsu.maskSelect - lsu.maskInput := maskUnit.io.lsuMaskInput - lsu.csrInterface := requestRegCSR - lsu.csrInterface.vl := evlForLsu - lsu.writeReadyForLsu := VecInit(laneVec.map(_.writeReadyForLsu)).asUInt.andR - lsu.vrfReadyToStore := VecInit(laneVec.map(_.vrfReadyToStore)).asUInt.andR + lsu.csrInterface := requestRegCSR + lsu.csrInterface.vl := evlForLsu + lsu.writeReadyForLsu := VecInit(laneVec.map(_.writeReadyForLsu)).asUInt.andR + lsu.vrfReadyToStore := VecInit(laneVec.map(_.vrfReadyToStore)).asUInt.andR // connect mask unit maskUnit.io.instReq.valid := requestRegDequeue.fire && requestReg.bits.decodeResult(Decoder.maskUnit) diff --git a/t1/src/lsu/LSU.scala b/t1/src/lsu/LSU.scala index b7bd81202..cd29f90be 100644 --- a/t1/src/lsu/LSU.scala +++ b/t1/src/lsu/LSU.scala @@ -118,15 +118,10 @@ class LSU(param: LSUParameter) extends Module { @public val request: DecoupledIO[LSURequest] = IO(Flipped(Decoupled(new LSURequest(param.datapathWidth)))) - /** mask from [[V]] TODO: since mask is one-cycle information for a mask group, we should latch it in the LSU, and - * reduce the IO width. this needs PnR information. - */ - @public - val maskInput: Vec[UInt] = IO(Input(Vec(param.lsuMSHRSize, UInt(param.maskGroupWidth.W)))) - - /** the address of the mask group in the [[V]]. */ @public - val maskSelect: Vec[UInt] = IO(Output(Vec(param.lsuMSHRSize, UInt(param.maskGroupSizeBits.W)))) + val v0UpdateVec: Vec[ValidIO[V0Update]] = IO( + Flipped(Vec(param.laneNumber, Valid(new V0Update(param.datapathWidth, param.vrfOffsetBits)))) + ) @public val axi4Port: AXI4RWIrrevocable = IO(new AXI4RWIrrevocable(param.axi4BundleParameter)) @@ -197,6 +192,25 @@ class LSU(param: LSUParameter) extends Module { val storeUnit: StoreUnit = Module(new StoreUnit(param.mshrParam)) val otherUnit: SimpleAccessUnit = Module(new SimpleAccessUnit(param.mshrParam)) + /** duplicate v0 in lsu */ + val v0: Vec[UInt] = RegInit( + VecInit(Seq.fill(param.vLen / param.datapathWidth)(0.U(param.datapathWidth.W))) + ) + + // write v0(mask) + v0.zipWithIndex.foreach { case (data, index) => + // 属于哪个lane + val laneIndex: Int = index % param.laneNumber + // 取出写的端口 + val v0Write = v0UpdateVec(laneIndex) + // offset + val offset: Int = index / param.laneNumber + val maskExt = FillInterleaved(8, v0Write.bits.mask) + when(v0Write.valid && v0Write.bits.offset === offset.U) { + data := (data & (~maskExt).asUInt) | (maskExt & v0Write.bits.data) + } + } + val unitVec = Seq(loadUnit, storeUnit, otherUnit) /** Always merge into cache line */ @@ -222,8 +236,8 @@ class LSU(param: LSUParameter) extends Module { mshr.lsuRequest.valid := reqEnq(index) mshr.lsuRequest.bits := request.bits - maskSelect(index) := Mux(mshr.maskSelect.valid, mshr.maskSelect.bits, 0.U) - mshr.maskInput := maskInput(index) + val maskSelect = Mux(mshr.maskSelect.valid, mshr.maskSelect.bits, 0.U) + mshr.maskInput := cutUInt(v0.asUInt, param.maskGroupWidth)(maskSelect) // broadcast CSR mshr.csrInterface := csrInterface diff --git a/t1/src/mask/MaskUnit.scala b/t1/src/mask/MaskUnit.scala index ce1ab81ae..76cd556d2 100644 --- a/t1/src/mask/MaskUnit.scala +++ b/t1/src/mask/MaskUnit.scala @@ -70,12 +70,12 @@ class MaskUnitInterface(parameter: T1Parameter) extends Bundle { val readResult: Vec[ValidIO[UInt]] = Flipped(Vec(parameter.laneNumber, Valid(UInt(parameter.datapathWidth.W)))) val writeRD: ValidIO[UInt] = Valid(UInt(parameter.datapathWidth.W)) val lastReport: UInt = Output(UInt((2 * parameter.chainingSize).W)) - val lsuMaskInput: Vec[UInt] = Output(Vec(parameter.lsuMSHRSize, UInt(parameter.maskGroupWidth.W))) - val lsuMaskSelect: Vec[UInt] = Input(Vec(parameter.lsuMSHRSize, UInt(parameter.lsuParameters.maskGroupSizeBits.W))) val laneMaskInput: Vec[UInt] = Output(Vec(parameter.laneNumber, UInt(parameter.datapathWidth.W))) val laneMaskSelect: Vec[UInt] = Input(Vec(parameter.laneNumber, UInt(parameter.laneParam.maskGroupSizeBits.W))) val laneMaskSewSelect: Vec[UInt] = Input(Vec(parameter.laneNumber, UInt(2.W))) - val v0UpdateVec: Vec[ValidIO[V0Update]] = Flipped(Vec(parameter.laneNumber, Valid(new V0Update(parameter.laneParam)))) + val v0UpdateVec: Vec[ValidIO[V0Update]] = Flipped( + Vec(parameter.laneNumber, Valid(new V0Update(parameter.laneParam.datapathWidth, parameter.laneParam.vrfOffsetBits))) + ) val writeRDData: UInt = Output(UInt(parameter.xLen.W)) val gatherData: DecoupledIO[UInt] = Decoupled(UInt(parameter.xLen.W)) val gatherRead: Bool = Input(Bool()) @@ -116,8 +116,6 @@ class MaskUnit(val parameter: T1Parameter) val readResult = io.readResult val writeRD = io.writeRD val lastReport = io.lastReport - val lsuMaskInput = io.lsuMaskInput - val lsuMaskSelect = io.lsuMaskSelect val laneMaskInput = io.laneMaskInput val laneMaskSelect = io.laneMaskSelect val laneMaskSewSelect = io.laneMaskSewSelect @@ -168,11 +166,6 @@ class MaskUnit(val parameter: T1Parameter) input := cutUInt(v0SelectBySew, parameter.datapathWidth)(laneMaskSelect(index)) } - // lsu - lsuMaskInput.zip(lsuMaskSelect).foreach { case (data, index) => - data := cutUInt(v0.asUInt, parameter.maskGroupWidth)(index) - } - val maskedWrite: BitLevelMaskWrite = Module(new BitLevelMaskWrite(parameter)) def gatherIndex(elementIndex: UInt, vlmul: UInt, sew: UInt): (UInt, UInt, UInt, UInt, Bool) = {