diff --git a/t1/src/Bundles.scala b/t1/src/Bundles.scala index 492fc195d..5dd255384 100644 --- a/t1/src/Bundles.scala +++ b/t1/src/Bundles.scala @@ -792,6 +792,5 @@ class MaskUnitReadVs1(parameter: T1Parameter) extends Bundle { } class LaneTokenBundle extends Bundle { - val maskResponseRelease: Bool = Output(Bool()) - val maskRequestRelease: Bool = Input(Bool()) + val maskRequestRelease: Bool = Input(Bool()) } diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index 5864f41e6..d6e68acf6 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -349,9 +349,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ val maskIndexVec: Vec[UInt] = RegInit(VecInit(Seq.fill(parameter.chainingSize)(0.U(log2Ceil(parameter.maskGroupWidth).W)))) - /** the find first one index register in this lane. */ - val ffoIndexReg: UInt = RegInit(0.U(log2Ceil(parameter.vLen / 8).W)) - /** result of reduce instruction. */ val reduceResult: UInt = RegInit(0.U(parameter.datapathWidth.W)) @@ -359,7 +356,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ */ val vrfWriteArbiter: Vec[DecoupledIO[VRFWriteRequest]] = Wire( Vec( - parameter.chainingSize + 2, + parameter.chainingSize + 1, Decoupled( new VRFWriteRequest( parameter.vrfParam.regNumBits, @@ -371,30 +368,15 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ ) ) - val lsuWriteQueue: QueueIO[VRFWriteRequest] = Queue.io(vrfWriteType, 1, flow = true) - // connect lsuWriteQueue.enq - lsuWriteQueue.enq.valid := vrfWriteChannel.valid && !writeFromMask - lsuWriteQueue.enq.bits := vrfWriteChannel.bits - vrfWriteChannel.ready := writeFromMask || lsuWriteQueue.enq.ready - - val maskWriteQueue: QueueIO[VRFWriteRequest] = Queue.io(vrfWriteType, parameter.maskUnitVefWriteQueueSize) - // connect maskWriteQueue.enq - maskWriteQueue.enq.valid := vrfWriteChannel.valid && writeFromMask - maskWriteQueue.enq.bits := vrfWriteChannel.bits - - vrfWriteArbiter(parameter.chainingSize).valid := lsuWriteQueue.deq.valid - vrfWriteArbiter(parameter.chainingSize).bits := lsuWriteQueue.deq.bits - lsuWriteQueue.deq.ready := vrfWriteArbiter(parameter.chainingSize).ready + vrfWriteArbiter(parameter.chainingSize).valid := vrfWriteChannel.valid + vrfWriteArbiter(parameter.chainingSize).bits := vrfWriteChannel.bits + vrfWriteChannel.ready := vrfWriteArbiter(parameter.chainingSize).ready - vrfWriteArbiter(parameter.chainingSize + 1).valid := maskWriteQueue.deq.valid - vrfWriteArbiter(parameter.chainingSize + 1).bits := maskWriteQueue.deq.bits - maskWriteQueue.deq.ready := vrfWriteArbiter(parameter.chainingSize + 1).ready - - val allVrfWriteAfterCheck: Seq[VRFWriteRequest] = Seq.tabulate(parameter.chainingSize + 4) { i => + val allVrfWriteAfterCheck: Seq[VRFWriteRequest] = Seq.tabulate(parameter.chainingSize + 3) { i => RegInit(0.U.asTypeOf(vrfWriteArbiter.head.bits)) } - val afterCheckValid: Seq[Bool] = Seq.tabulate(parameter.chainingSize + 4) { _ => RegInit(false.B) } - val afterCheckDequeueReady: Vec[Bool] = Wire(Vec(parameter.chainingSize + 4, Bool())) + val afterCheckValid: Seq[Bool] = Seq.tabulate(parameter.chainingSize + 3) { _ => RegInit(false.B) } + val afterCheckDequeueReady: Vec[Bool] = Wire(Vec(parameter.chainingSize + 3, Bool())) val afterCheckDequeueFire: Seq[Bool] = afterCheckValid.zip(afterCheckDequeueReady).map { case (v, r) => v && r } /** for each slot, assert when it is asking [[T1]] to change mask */ @@ -594,7 +576,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ maskUnitRequest <> mask.maskReq maskRequestToLSU <> mask.maskRequestToLSU tokenIO <> mask.tokenIO - tokenIO.maskResponseRelease := maskWriteQueue.deq.fire mask.dequeue }.getOrElse(stage3EnqWire) stage3.enqueue <> stage3EnqSelect @@ -849,7 +830,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ // It’s been a long time since I selected it. Need pipe val queueBeforeMaskWrite: QueueIO[VRFWriteRequest] = Queue.io(chiselTypeOf(maskedWriteUnit.enqueue.bits), entries = 1, pipe = true) - val writeSelect: UInt = Wire(UInt((parameter.chainingSize + 4).W)) + val writeSelect: UInt = Wire(UInt((parameter.chainingSize + 3).W)) val writeCavitation: UInt = VecInit(allVrfWriteAfterCheck.map(_.mask === 0.U)).asUInt // 处理 rf @@ -1156,8 +1137,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ writeReadyForLsu := vrf.writeReadyForLsu vrfReadyToStore := vrf.vrfReadyToStore tokenManager.crossWriteReports.zipWithIndex.foreach { case (rpt, rptIndex) => - rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 2 + rptIndex) - rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 2 + rptIndex).instructionIndex + rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 1 + rptIndex) + rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1 + rptIndex).instructionIndex } // todo: add mask unit write token tokenManager.responseReport.valid := maskUnitRequest.valid @@ -1193,13 +1174,9 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ tokenManager.topWriteEnq.valid := vrfWriteChannel.fire tokenManager.topWriteEnq.bits := vrfWriteChannel.bits.instructionIndex - tokenManager.fromMask := writeFromMask - - tokenManager.lsuWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize) - tokenManager.lsuWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize).instructionIndex - tokenManager.maskWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize + 1) - tokenManager.maskWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1).instructionIndex + tokenManager.topWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize) + tokenManager.topWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize).instructionIndex tokenManager.maskUnitLastReport := lsuLastReport diff --git a/t1/src/T1.scala b/t1/src/T1.scala index dfbfc1db5..dc510ffea 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -656,20 +656,6 @@ class T1(val parameter: T1Parameter) val completeIndexInstruction: Bool = ohCheck(lsu.lastReport, slots.last.record.instructionIndex, parameter.chainingSize) && !slots.last.state.idle - val vrfWrite: Vec[DecoupledIO[VRFWriteRequest]] = Wire( - Vec( - parameter.laneNumber, - Decoupled( - new VRFWriteRequest( - parameter.vrfParam.regNumBits, - parameter.vrfParam.vrfOffsetBits, - parameter.instructionIndexBits, - parameter.datapathWidth - ) - ) - ) - ) - val freeOR: Bool = VecInit(slots.map(_.state.idle)).asUInt.orR /** slot is ready to accept new instructions. */ @@ -766,13 +752,15 @@ class T1(val parameter: T1Parameter) Some(Seq(maskUnit.io.readResult(index), lsu.vrfReadResults(index))) ) - val maskTryToWrite = maskUnit.io.exeResp(index) - // lsu & mask unit write lane - // Mask write has absolute priority because it has a token - lane.vrfWriteChannel.valid := vrfWrite(index).valid || maskTryToWrite.valid - lane.vrfWriteChannel.bits := Mux(maskTryToWrite.valid, maskTryToWrite.bits, vrfWrite(index).bits) - vrfWrite(index).ready := lane.vrfWriteChannel.ready && !maskTryToWrite.valid - lane.writeFromMask := maskTryToWrite.valid + connectVrfAccess( + Seq(parameter.maskUnitReadShifterSize(index), parameter.lsuReadShifterSize(index)), + Seq(parameter.maskUnitReadTokenSize(index), parameter.lsuReadTokenSize(index)) + )( + VecInit(Seq(maskUnit.io.exeResp(index), lsu.vrfWritePort(index))), + lane.vrfWriteChannel, + 0 + ) + lane.writeFromMask := maskUnit.exeResp(index).fire lsu.offsetReadResult(index).valid := lane.maskUnitRequest.valid && lane.maskRequestToLSU lsu.offsetReadResult(index).bits := lane.maskUnitRequest.bits.source2 @@ -850,7 +838,6 @@ class T1(val parameter: T1Parameter) } maskUnit.io.tokenIO.zip(laneVec).zipWithIndex.foreach { case ((token, lane), index) => - token.maskResponseRelease := lane.tokenIO.maskResponseRelease lane.tokenIO.maskRequestRelease := token.maskRequestRelease || lsu.tokenIO.offsetGroupRelease(index) } @@ -887,8 +874,6 @@ class T1(val parameter: T1Parameter) io.highBandwidthLoadStorePort <> lsu.axi4Port io.indexedLoadStorePort <> lsu.simpleAccessPorts - // 暂时直接连lsu的写,后续需要处理scheduler的写 - vrfWrite.zip(lsu.vrfWritePort).foreach { case (sink, source) => sink <> source } /** Slot has free entries. */ val free = VecInit(slots.map(_.state.idle)).asUInt @@ -974,7 +959,7 @@ class T1(val parameter: T1Parameter) probeWire.requestRegReady := requestRegDequeue.ready // maskUnitWrite maskUnitWriteReady probeWire.writeQueueEnqVec.zip(maskUnit.io.exeResp).foreach { case (probe, write) => - probe.valid := write.valid && write.bits.mask.orR + probe.valid := write.fire && write.bits.mask.orR probe.bits := write.bits.instructionIndex } probeWire.instructionValid := maskAnd( diff --git a/t1/src/laneStage/MaskExchangeUnit.scala b/t1/src/laneStage/MaskExchangeUnit.scala index cb8257f83..49faff45f 100644 --- a/t1/src/laneStage/MaskExchangeUnit.scala +++ b/t1/src/laneStage/MaskExchangeUnit.scala @@ -54,8 +54,7 @@ class MaskExchangeUnit(parameter: LaneParameter) extends Module { val maskRequestEnqReady: Bool = !enqIsMaskRequest || maskRequestAllow - dequeue.valid := enqueue.valid && enqSendToDeq - dequeue.bits := enqueue.bits - enqueue.ready := Mux(enqSendToDeq, dequeue.ready, maskRequestEnqReady) - tokenIO.maskResponseRelease := DontCare + dequeue.valid := enqueue.valid && enqSendToDeq + dequeue.bits := enqueue.bits + enqueue.ready := Mux(enqSendToDeq, dequeue.ready, maskRequestEnqReady) } diff --git a/t1/src/laneStage/SlotTokenManager.scala b/t1/src/laneStage/SlotTokenManager.scala index 22eef760b..dadea0fd5 100644 --- a/t1/src/laneStage/SlotTokenManager.scala +++ b/t1/src/laneStage/SlotTokenManager.scala @@ -94,13 +94,7 @@ class SlotTokenManager(parameter: LaneParameter) extends Module { val topWriteEnq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W)))) @public - val fromMask: Bool = IO(Input(Bool())) - - @public - val lsuWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W)))) - - @public - val maskWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W)))) + val topWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W)))) @public val instructionValid: UInt = IO(Output(UInt((2 * parameter.chainingSize).W))) @@ -212,27 +206,16 @@ class SlotTokenManager(parameter: LaneParameter) extends Module { val instructionInWritePipe: UInt = tokenUpdate(writePipeToken, writePipeEnq, writePipeDeq) // lsu & mask write token - val lsuWriteToken: Seq[UInt] = Seq.tabulate(2 * parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W))) - val maskWriteToken: Seq[UInt] = Seq.tabulate(2 * parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W))) + val topWriteToken: Seq[UInt] = Seq.tabulate(2 * parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W))) val topWriteDoEnq: UInt = maskAnd(topWriteEnq.valid, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt - val lsuWriteDoEnq: UInt = - maskAnd(topWriteEnq.valid && !fromMask, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt - - val maskWriteDoEnq: UInt = - maskAnd(topWriteEnq.valid && fromMask, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt - - val lsuWriteDoDeq: UInt = - maskAnd(lsuWriteDeq.valid, indexToOH(lsuWriteDeq.bits, parameter.chainingSize)).asUInt - - val maskWriteDoDeq: UInt = - maskAnd(maskWriteDeq.valid, indexToOH(maskWriteDeq.bits, parameter.chainingSize)).asUInt + val topWriteDoDeq: UInt = + maskAnd(topWriteDeq.valid, indexToOH(topWriteDeq.bits, parameter.chainingSize)).asUInt - val lsuInTopWrite = tokenUpdate(lsuWriteToken, lsuWriteDoEnq, lsuWriteDoDeq) - val maskInTopWrite = tokenUpdate(maskWriteToken, maskWriteDoEnq, maskWriteDoDeq) + val topWrite: UInt = tokenUpdate(topWriteToken, topWriteDoEnq, topWriteDoDeq) - dataInWritePipe := instructionInWritePipe | lsuInTopWrite | maskInTopWrite + dataInWritePipe := instructionInWritePipe | topWrite instructionValid := dataInWritePipe | instructionInSlot } diff --git a/t1/src/mask/MaskUnit.scala b/t1/src/mask/MaskUnit.scala index ca0c1b36e..ce1ab81ae 100644 --- a/t1/src/mask/MaskUnit.scala +++ b/t1/src/mask/MaskUnit.scala @@ -39,15 +39,15 @@ import org.chipsalliance.t1.rtl.decoder.Decoder // 11 11 1 -> maskdestination class MaskUnitInterface(parameter: T1Parameter) extends Bundle { - val clock: Clock = Input(Clock()) - val reset: Reset = Input(Reset()) - val instReq: ValidIO[MaskUnitInstReq] = Flipped(Valid(new MaskUnitInstReq(parameter))) - val exeReq: Vec[ValidIO[MaskUnitExeReq]] = Flipped( + val clock: Clock = Input(Clock()) + val reset: Reset = Input(Reset()) + val instReq: ValidIO[MaskUnitInstReq] = Flipped(Valid(new MaskUnitInstReq(parameter))) + val exeReq: Vec[ValidIO[MaskUnitExeReq]] = Flipped( Vec(parameter.laneNumber, Valid(new MaskUnitExeReq(parameter.laneParam))) ) - val exeResp: Vec[ValidIO[VRFWriteRequest]] = Vec( + val exeResp: Vec[DecoupledIO[VRFWriteRequest]] = Vec( parameter.laneNumber, - Valid( + Decoupled( new VRFWriteRequest( parameter.vrfParam.regNumBits, parameter.laneParam.vrfOffsetBits, @@ -56,8 +56,8 @@ class MaskUnitInterface(parameter: T1Parameter) extends Bundle { ) ) ) - val tokenIO: Vec[LaneTokenBundle] = Flipped(Vec(parameter.laneNumber, new LaneTokenBundle)) - val readChannel: Vec[DecoupledIO[VRFReadRequest]] = Vec( + val tokenIO: Vec[LaneTokenBundle] = Flipped(Vec(parameter.laneNumber, new LaneTokenBundle)) + val readChannel: Vec[DecoupledIO[VRFReadRequest]] = Vec( parameter.laneNumber, Decoupled( new VRFReadRequest( @@ -67,19 +67,19 @@ class MaskUnitInterface(parameter: T1Parameter) extends Bundle { ) ) ) - val readResult: Vec[ValidIO[UInt]] = Flipped(Vec(parameter.laneNumber, Valid(UInt(parameter.datapathWidth.W)))) - val writeRD: ValidIO[UInt] = Valid(UInt(parameter.datapathWidth.W)) - val lastReport: UInt = Output(UInt((2 * parameter.chainingSize).W)) - val lsuMaskInput: Vec[UInt] = Output(Vec(parameter.lsuMSHRSize, UInt(parameter.maskGroupWidth.W))) - val lsuMaskSelect: Vec[UInt] = Input(Vec(parameter.lsuMSHRSize, UInt(parameter.lsuParameters.maskGroupSizeBits.W))) - val laneMaskInput: Vec[UInt] = Output(Vec(parameter.laneNumber, UInt(parameter.datapathWidth.W))) - val laneMaskSelect: Vec[UInt] = Input(Vec(parameter.laneNumber, UInt(parameter.laneParam.maskGroupSizeBits.W))) - val laneMaskSewSelect: Vec[UInt] = Input(Vec(parameter.laneNumber, UInt(2.W))) - val v0UpdateVec: Vec[ValidIO[V0Update]] = Flipped(Vec(parameter.laneNumber, Valid(new V0Update(parameter.laneParam)))) - val writeRDData: UInt = Output(UInt(parameter.xLen.W)) - val gatherData: DecoupledIO[UInt] = Decoupled(UInt(parameter.xLen.W)) - val gatherRead: Bool = Input(Bool()) - val om: Property[ClassType] = Output(Property[AnyClassType]()) + val readResult: Vec[ValidIO[UInt]] = Flipped(Vec(parameter.laneNumber, Valid(UInt(parameter.datapathWidth.W)))) + val writeRD: ValidIO[UInt] = Valid(UInt(parameter.datapathWidth.W)) + val lastReport: UInt = Output(UInt((2 * parameter.chainingSize).W)) + val lsuMaskInput: Vec[UInt] = Output(Vec(parameter.lsuMSHRSize, UInt(parameter.maskGroupWidth.W))) + val lsuMaskSelect: Vec[UInt] = Input(Vec(parameter.lsuMSHRSize, UInt(parameter.lsuParameters.maskGroupSizeBits.W))) + val laneMaskInput: Vec[UInt] = Output(Vec(parameter.laneNumber, UInt(parameter.datapathWidth.W))) + val laneMaskSelect: Vec[UInt] = Input(Vec(parameter.laneNumber, UInt(parameter.laneParam.maskGroupSizeBits.W))) + val laneMaskSewSelect: Vec[UInt] = Input(Vec(parameter.laneNumber, UInt(2.W))) + val v0UpdateVec: Vec[ValidIO[V0Update]] = Flipped(Vec(parameter.laneNumber, Valid(new V0Update(parameter.laneParam)))) + val writeRDData: UInt = Output(UInt(parameter.xLen.W)) + val gatherData: DecoupledIO[UInt] = Decoupled(UInt(parameter.xLen.W)) + val gatherRead: Bool = Input(Bool()) + val om: Property[ClassType] = Output(Property[AnyClassType]()) } @instantiable @@ -1091,17 +1091,10 @@ class MaskUnit(val parameter: T1Parameter) } queue.enq.bits.index := instReg.instructionIndex - // write token - val tokenCounter = RegInit(0.U(log2Ceil(parameter.maskUnitVefWriteQueueSize + 1).W)) - val tokenAllow: Bool = queue.deq.fire - val counterChange: UInt = Mux(tokenAllow, 1.U, -1.S(tokenCounter.getWidth.W).asUInt) - when(tokenAllow ^ tokenIO(index).maskResponseRelease) { - tokenCounter := tokenCounter + counterChange - } // write vrf val writePort = exeResp(index) - queue.deq.ready := !tokenCounter.asBools.last - writePort.valid := tokenAllow + queue.deq.ready := writePort.ready + writePort.valid := queue.deq.valid writePort.bits.last := DontCare writePort.bits.instructionIndex := instReg.instructionIndex writePort.bits.data := Mux(queue.deq.bits.ffoByOther, queue.deq.bits.pipeData, queue.deq.bits.writeData.data) diff --git a/t1/src/package.scala b/t1/src/package.scala index 5eca78e39..412d62ddf 100644 --- a/t1/src/package.scala +++ b/t1/src/package.scala @@ -277,7 +277,7 @@ package object rtl { def connectVrfAccess[T <: Data]( latencyVec: Seq[Int], tokenSizeVec: Seq[Int], - vrfReadLatency: Option[Int] + vrfReadLatency: Option[Int] = None )(sourceVec: Vec[DecoupledIO[T]], sink: DecoupledIO[T], arb: Int, diff --git a/t1/src/vrf/VRF.scala b/t1/src/vrf/VRF.scala index 721c888f4..6e094a47b 100644 --- a/t1/src/vrf/VRF.scala +++ b/t1/src/vrf/VRF.scala @@ -199,7 +199,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar @public val writeCheck: Vec[LSUWriteCheck] = IO( Vec( - parameter.chainingSize + 4, + parameter.chainingSize + 3, Input( new LSUWriteCheck( parameter.regNumBits, @@ -211,7 +211,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar ) @public - val writeAllow: Vec[Bool] = IO(Vec(parameter.chainingSize + 4, Output(Bool()))) + val writeAllow: Vec[Bool] = IO(Vec(parameter.chainingSize + 3, Output(Bool()))) /** when instruction is fired, record it in the VRF for chaining. */ @public