diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index abecc3220..934e9af27 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -315,9 +315,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ @public val laneProbe = IO(Output(Probe(new LaneProbe(parameter), layers.Verification))) - @public - val vrfAllocateIssue: Bool = IO(Output(Bool())) - // TODO: remove dontTouch(writeBusPort) val csrInterface: CSRInterface = laneRequest.bits.csrInterface @@ -544,8 +541,10 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ // Overflow occurs val vxsatEnq: Vec[UInt] = Wire(Vec(parameter.chainingSize, UInt((2 * parameter.chainingSize).W))) + + val instructionFinishInSlot: UInt = Wire(UInt((2 * parameter.chainingSize).W)) // vxsatEnq and instructionFinished cannot happen at the same time - vxsatResult := (vxsatEnq.reduce(_ | _) | vxsatResult) & (~instructionFinished).asUInt + vxsatResult := (vxsatEnq.reduce(_ | _) | vxsatResult) & (~instructionFinishInSlot).asUInt /** assert when a instruction will not use mask unit */ val instructionUnrelatedMaskUnitVec: Vec[UInt] = Wire(Vec(parameter.chainingSize, UInt(parameter.chainingSize.W))) @@ -1087,7 +1086,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ // enqueue from lane request if (slotIndex == parameter.chainingSize - 1) { enqueueValid := laneRequest.valid - enqueueReady := slotShiftValid(slotIndex) && vrf.instructionWriteReport.ready + enqueueReady := slotShiftValid(slotIndex) when(enqueueFire) { slotControl(slotIndex) := entranceControl maskGroupCountVec(slotIndex) := 0.U(parameter.maskGroupSizeBits.W) @@ -1117,7 +1116,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ // handshake // @todo @Clo91eaf lane can take request from Sequencer - laneRequest.ready := slotFree && vrf.instructionWriteReport.ready + laneRequest.ready := slotFree val instructionFinishAndNotReportByTop: Bool = entranceControl.instructionFinished && !laneRequest.bits.decodeResult(Decoder.readOnly) && (writeCount === 0.U) @@ -1149,7 +1148,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ vrf.instructionWriteReport.bits.state.wLaneLastReport := !laneRequest.valid vrf.instructionWriteReport.bits.state.wTopLastReport := !laneRequest.bits.decodeResult(Decoder.maskUnit) vrf.instructionWriteReport.bits.state.wLaneClear := false.B - vrfAllocateIssue := vrf.vrfAllocateIssue val elementSizeForOneRegister: Int = parameter.vLen / parameter.datapathWidth / parameter.laneNumber val nrMask: UInt = VecInit(Seq.tabulate(8) { i => @@ -1183,12 +1181,14 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ vrf.instructionWriteReport.bits.elementMask := selectMask + instructionFinishInSlot := (~instructionValid).asUInt & instructionValidNext + // clear record by instructionFinished - vrf.instructionLastReport := instructionFinished + vrf.instructionLastReport := instructionFinishInSlot vrf.lsuLastReport := lsuLastReport vrf.loadDataInLSUWriteQueue := loadDataInLSUWriteQueue vrf.dataInLane := instructionValid - instructionFinished := (~instructionValid).asUInt & instructionValidNext + instructionFinished := vrf.vrfSlotRelease writeReadyForLsu := vrf.writeReadyForLsu vrfReadyToStore := vrf.vrfReadyToStore tokenManager.crossWriteReports.zipWithIndex.foreach { case (rpt, rptIndex) => diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 4c395f147..6813c9b05 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -664,6 +664,13 @@ class T1(val parameter: T1Parameter) /** slot is ready to accept new instructions. */ val slotReady: Bool = Mux(specialInstruction, slots.map(_.state.idle).last, freeOR) + val olderCheck: Bool = slots.map { re => + // The same lsb will make it difficult to distinguish between the new and the old + val notSameLSB: Bool = re.record.instructionIndex(parameter.instructionIndexBits - 2, 0) =/= + requestReg.bits.instructionIndex(parameter.instructionIndexBits - 2, 0) + re.state.idle || (instIndexL(re.record.instructionIndex, requestReg.bits.instructionIndex) && notSameLSB) + }.reduce(_ && _) + val source1Select: UInt = Mux( decodeResult(Decoder.gather), @@ -699,7 +706,7 @@ class T1(val parameter: T1Parameter) ) laneRequestSourceWire.foreach { request => - request.valid := requestRegDequeue.fire && !noOffsetReadLoadStore && !maskUnitInstruction + request.valid := requestRegDequeue.fire // hard wire request.bits.instructionIndex := requestReg.bits.instructionIndex request.bits.decodeResult := decodeResult @@ -717,7 +724,7 @@ class T1(val parameter: T1Parameter) // and broadcast to all lanes. request.bits.readFromScalar := source1Select - request.bits.issueInst := requestRegDequeue.fire + request.bits.issueInst := !noOffsetReadLoadStore && !maskUnitInstruction request.bits.loadStore := isLoadStoreType // let record in VRF to know there is a store instruction. request.bits.store := isStoreType @@ -738,10 +745,12 @@ class T1(val parameter: T1Parameter) */ val laneVec: Seq[Instance[Lane]] = Seq.tabulate(parameter.laneNumber) { index => val lane: Instance[Lane] = Instantiate(new Lane(parameter.laneParam)) - lane.laneRequest.valid := laneRequestSinkWire(index).valid && lane.vrfAllocateIssue + lane.laneRequest.valid := laneRequestSinkWire(index).valid && laneRequestSinkWire(index).bits.issueInst lane.laneRequest.bits := laneRequestSinkWire(index).bits - laneRequestSinkWire(index).ready := lane.laneRequest.ready && lane.vrfAllocateIssue - lane.laneIndex := index.U + lane.laneRequest.bits.issueInst := laneRequestSinkWire(index).fire + laneRequestSinkWire(index).ready := !laneRequestSinkWire(index).bits.issueInst || lane.laneRequest.ready + + lane.laneIndex := index.U connectVrfAccess( Seq(parameter.maskUnitReadShifterSize(index), parameter.lsuReadShifterSize(index)), @@ -905,7 +914,7 @@ class T1(val parameter: T1Parameter) // we detect the hazard and decide should we issue this slide or // issue the instruction after the slide which already in the slot. requestRegDequeue.ready := executionReady && slotReady && (!gatherNeedRead || maskUnit.io.gatherData.valid) && - tokenManager.issueAllow && instructionIndexFree + tokenManager.issueAllow && instructionIndexFree && olderCheck instructionToSlotOH := Mux(requestRegDequeue.fire, slotToEnqueue, 0.U) diff --git a/t1/src/vrf/VRF.scala b/t1/src/vrf/VRF.scala index 6e094a47b..0292d8409 100644 --- a/t1/src/vrf/VRF.scala +++ b/t1/src/vrf/VRF.scala @@ -15,6 +15,7 @@ import chisel3.properties.{AnyClassType, Class, ClassType, Path, Property} import org.chipsalliance.stdlib.GeneralOM import org.chipsalliance.t1.rtl.{ ffo, + indexToOH, instIndexL, instIndexLE, ohCheck, @@ -215,7 +216,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar /** when instruction is fired, record it in the VRF for chaining. */ @public - val instructionWriteReport: DecoupledIO[VRFWriteReport] = IO(Flipped(Decoupled(new VRFWriteReport(parameter)))) + val instructionWriteReport: ValidIO[VRFWriteReport] = IO(Flipped(Valid(new VRFWriteReport(parameter)))) /** similar to [[flush]]. */ @public @@ -224,6 +225,9 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar @public val lsuLastReport: UInt = IO(Input(UInt((2 * parameter.chainingSize).W))) + @public + val vrfSlotRelease: UInt = IO(Output(UInt((2 * parameter.chainingSize).W))) + @public val dataInLane: UInt = IO(Input(UInt((2 * parameter.chainingSize).W))) @@ -232,9 +236,6 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar @public val vrfReadyToStore: Bool = IO(Output(Bool())) - @public - val vrfAllocateIssue: Bool = IO(Output(Bool())) - /** we can only chain LSU instructions, after [[LSU.writeQueueVec]] is cleared. */ @public val loadDataInLSUWriteQueue: UInt = IO(Input(UInt((2 * parameter.chainingSize).W))) @@ -275,6 +276,13 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar val chainingRecordCopy: Vec[ValidIO[VRFWriteReport]] = RegInit( VecInit(Seq.fill(parameter.chainingSize + 1)(0.U.asTypeOf(Valid(new VRFWriteReport(parameter))))) ) + val recordRelease: Vec[UInt] = WireDefault( + VecInit( + Seq.fill(parameter.chainingSize + 1)( + 0.U.asTypeOf(UInt((parameter.chainingSize * 2).W)) + ) + ) + ) val recordValidVec: Seq[Bool] = chainingRecord.map(r => !r.bits.elementMask.andR && r.valid) // first read @@ -500,23 +508,12 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar // @todo @Clo91eaf VRF ready signal for performance. val freeRecord: UInt = VecInit(chainingRecord.map(!_.valid)).asUInt val recordFFO: UInt = ffo(freeRecord) - val recordEnq: UInt = Wire(UInt((parameter.chainingSize + 1).W)) - val olderCheck = chainingRecord.map { re => - // The same lsb will make it difficult to distinguish between the new and the old - val notSameLSB: Bool = re.bits.instIndex(parameter.instructionIndexBits - 2, 0) =/= - instructionWriteReport.bits.instIndex(parameter.instructionIndexBits - 2, 0) - !re.valid || (instIndexL(re.bits.instIndex, instructionWriteReport.bits.instIndex) && notSameLSB) - }.reduce(_ && _) - // handle VRF hazard - // @todo @Clo91eaf VRF ready signal for performance. - instructionWriteReport.ready := freeRecord.orR && olderCheck - recordEnq := Mux( + val recordEnq: UInt = Mux( // 纯粹的lsu指令的记录不需要ready instructionWriteReport.valid, recordFFO, 0.U((parameter.chainingSize + 1).W) ) - vrfAllocateIssue := freeRecord.orR && olderCheck val writePort: Seq[ValidIO[VRFWriteRequest]] = Seq(writePipe) val loadUnitReadPorts: Seq[DecoupledIO[VRFReadRequest]] = Seq(readRequests.last) @@ -570,6 +567,9 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar when(stateClear) { record.valid := false.B + when(record.valid) { + recordRelease(i) := indexToOH(record.bits.instIndex, parameter.chainingSize) + } } when(recordEnq(i)) { @@ -617,6 +617,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar } writeReadyForLsu := !hazardVec.map(_.map(_._1).reduce(_ || _)).reduce(_ || _) vrfReadyToStore := !hazardVec.map(_.map(_._2).reduce(_ || _)).reduce(_ || _) + vrfSlotRelease := recordRelease.reduce(_ | _) writeCheck.zip(writeAllow).foreach { case (check, allow) => allow := chainingRecordCopy