Skip to content

Commit

Permalink
[rtl] connect vrf write with shifter.
Browse files Browse the repository at this point in the history
  • Loading branch information
qinjun-li authored and Avimitin committed Dec 9, 2024
1 parent 06a07e8 commit 6e16894
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 103 deletions.
3 changes: 1 addition & 2 deletions t1/src/Bundles.scala
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,5 @@ class MaskUnitReadVs1(parameter: T1Parameter) extends Bundle {
}

class LaneTokenBundle extends Bundle {
val maskResponseRelease: Bool = Output(Bool())
val maskRequestRelease: Bool = Input(Bool())
val maskRequestRelease: Bool = Input(Bool())
}
47 changes: 12 additions & 35 deletions t1/src/Lane.scala
Original file line number Diff line number Diff line change
Expand Up @@ -350,17 +350,14 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
val maskIndexVec: Vec[UInt] =
RegInit(VecInit(Seq.fill(parameter.chainingSize)(0.U(log2Ceil(parameter.maskGroupWidth).W))))

/** the find first one index register in this lane. */
val ffoIndexReg: UInt = RegInit(0.U(log2Ceil(parameter.vLen / 8).W))

/** result of reduce instruction. */
val reduceResult: UInt = RegInit(0.U(parameter.datapathWidth.W))

/** arbiter for VRF write 1 for [[vrfWriteChannel]]
*/
val vrfWriteArbiter: Vec[DecoupledIO[VRFWriteRequest]] = Wire(
Vec(
parameter.chainingSize + 2,
parameter.chainingSize + 1,
Decoupled(
new VRFWriteRequest(
parameter.vrfParam.regNumBits,
Expand All @@ -372,30 +369,15 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
)
)

val lsuWriteQueue: QueueIO[VRFWriteRequest] = Queue.io(vrfWriteType, 1, flow = true)
// connect lsuWriteQueue.enq
lsuWriteQueue.enq.valid := vrfWriteChannel.valid && !writeFromMask
lsuWriteQueue.enq.bits := vrfWriteChannel.bits
vrfWriteChannel.ready := writeFromMask || lsuWriteQueue.enq.ready

val maskWriteQueue: QueueIO[VRFWriteRequest] = Queue.io(vrfWriteType, parameter.maskUnitVefWriteQueueSize)
// connect maskWriteQueue.enq
maskWriteQueue.enq.valid := vrfWriteChannel.valid && writeFromMask
maskWriteQueue.enq.bits := vrfWriteChannel.bits

vrfWriteArbiter(parameter.chainingSize).valid := lsuWriteQueue.deq.valid
vrfWriteArbiter(parameter.chainingSize).bits := lsuWriteQueue.deq.bits
lsuWriteQueue.deq.ready := vrfWriteArbiter(parameter.chainingSize).ready
vrfWriteArbiter(parameter.chainingSize).valid := vrfWriteChannel.valid
vrfWriteArbiter(parameter.chainingSize).bits := vrfWriteChannel.bits
vrfWriteChannel.ready := vrfWriteArbiter(parameter.chainingSize).ready

vrfWriteArbiter(parameter.chainingSize + 1).valid := maskWriteQueue.deq.valid
vrfWriteArbiter(parameter.chainingSize + 1).bits := maskWriteQueue.deq.bits
maskWriteQueue.deq.ready := vrfWriteArbiter(parameter.chainingSize + 1).ready

val allVrfWriteAfterCheck: Seq[VRFWriteRequest] = Seq.tabulate(parameter.chainingSize + 4) { i =>
val allVrfWriteAfterCheck: Seq[VRFWriteRequest] = Seq.tabulate(parameter.chainingSize + 3) { i =>
RegInit(0.U.asTypeOf(vrfWriteArbiter.head.bits))
}
val afterCheckValid: Seq[Bool] = Seq.tabulate(parameter.chainingSize + 4) { _ => RegInit(false.B) }
val afterCheckDequeueReady: Vec[Bool] = Wire(Vec(parameter.chainingSize + 4, Bool()))
val afterCheckValid: Seq[Bool] = Seq.tabulate(parameter.chainingSize + 3) { _ => RegInit(false.B) }
val afterCheckDequeueReady: Vec[Bool] = Wire(Vec(parameter.chainingSize + 3, Bool()))
val afterCheckDequeueFire: Seq[Bool] = afterCheckValid.zip(afterCheckDequeueReady).map { case (v, r) => v && r }

/** for each slot, assert when it is asking [[T1]] to change mask */
Expand Down Expand Up @@ -595,7 +577,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
maskUnitRequest <> mask.maskReq
maskRequestToLSU <> mask.maskRequestToLSU
tokenIO <> mask.tokenIO
tokenIO.maskResponseRelease := maskWriteQueue.deq.fire
mask.dequeue
}.getOrElse(stage3EnqWire)
stage3.enqueue <> stage3EnqSelect
Expand Down Expand Up @@ -850,7 +831,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
// It’s been a long time since I selected it. Need pipe
val queueBeforeMaskWrite: QueueIO[VRFWriteRequest] =
Queue.io(chiselTypeOf(maskedWriteUnit.enqueue.bits), entries = 1, pipe = true)
val writeSelect: UInt = Wire(UInt((parameter.chainingSize + 4).W))
val writeSelect: UInt = Wire(UInt((parameter.chainingSize + 3).W))
val writeCavitation: UInt = VecInit(allVrfWriteAfterCheck.map(_.mask === 0.U)).asUInt

// 处理 rf
Expand Down Expand Up @@ -1157,8 +1138,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
writeReadyForLsu := vrf.writeReadyForLsu
vrfReadyToStore := vrf.vrfReadyToStore
tokenManager.crossWriteReports.zipWithIndex.foreach { case (rpt, rptIndex) =>
rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 2 + rptIndex)
rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 2 + rptIndex).instructionIndex
rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 1 + rptIndex)
rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1 + rptIndex).instructionIndex
}
// todo: add mask unit write token
tokenManager.responseReport.valid := maskUnitRequest.valid
Expand Down Expand Up @@ -1194,13 +1175,9 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[

tokenManager.topWriteEnq.valid := vrfWriteChannel.fire
tokenManager.topWriteEnq.bits := vrfWriteChannel.bits.instructionIndex
tokenManager.fromMask := writeFromMask

tokenManager.lsuWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize)
tokenManager.lsuWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize).instructionIndex

tokenManager.maskWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize + 1)
tokenManager.maskWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1).instructionIndex
tokenManager.topWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize)
tokenManager.topWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize).instructionIndex

tokenManager.maskUnitLastReport := lsuLastReport

Expand Down
35 changes: 10 additions & 25 deletions t1/src/T1.scala
Original file line number Diff line number Diff line change
Expand Up @@ -666,20 +666,6 @@ class T1(val parameter: T1Parameter)
val completeIndexInstruction: Bool =
ohCheck(lsu.lastReport, slots.last.record.instructionIndex, parameter.chainingSize) && !slots.last.state.idle

val vrfWrite: Vec[DecoupledIO[VRFWriteRequest]] = Wire(
Vec(
parameter.laneNumber,
Decoupled(
new VRFWriteRequest(
parameter.vrfParam.regNumBits,
parameter.vrfParam.vrfOffsetBits,
parameter.instructionIndexBits,
parameter.datapathWidth
)
)
)
)

val freeOR: Bool = VecInit(slots.map(_.state.idle)).asUInt.orR

/** slot is ready to accept new instructions. */
Expand Down Expand Up @@ -776,13 +762,15 @@ class T1(val parameter: T1Parameter)
Some(Seq(maskUnit.readResult(index), lsu.vrfReadResults(index)))
)

val maskTryToWrite = maskUnit.exeResp(index)
// lsu & mask unit write lane
// Mask write has absolute priority because it has a token
lane.vrfWriteChannel.valid := vrfWrite(index).valid || maskTryToWrite.valid
lane.vrfWriteChannel.bits := Mux(maskTryToWrite.valid, maskTryToWrite.bits, vrfWrite(index).bits)
vrfWrite(index).ready := lane.vrfWriteChannel.ready && !maskTryToWrite.valid
lane.writeFromMask := maskTryToWrite.valid
connectVrfAccess(
Seq(parameter.maskUnitReadShifterSize(index), parameter.lsuReadShifterSize(index)),
Seq(parameter.maskUnitReadTokenSize(index), parameter.lsuReadTokenSize(index))
)(
VecInit(Seq(maskUnit.exeResp(index), lsu.vrfWritePort(index))),
lane.vrfWriteChannel,
0
)
lane.writeFromMask := maskUnit.exeResp(index).fire

lsu.offsetReadResult(index).valid := lane.maskUnitRequest.valid && lane.maskRequestToLSU
lsu.offsetReadResult(index).bits := lane.maskUnitRequest.bits.source2
Expand Down Expand Up @@ -860,7 +848,6 @@ class T1(val parameter: T1Parameter)
}

maskUnit.tokenIO.zip(laneVec).zipWithIndex.foreach { case ((token, lane), index) =>
token.maskResponseRelease := lane.tokenIO.maskResponseRelease
lane.tokenIO.maskRequestRelease := token.maskRequestRelease || lsu.tokenIO.offsetGroupRelease(index)
}

Expand Down Expand Up @@ -897,8 +884,6 @@ class T1(val parameter: T1Parameter)

io.highBandwidthLoadStorePort <> lsu.axi4Port
io.indexedLoadStorePort <> lsu.simpleAccessPorts
// 暂时直接连lsu的写,后续需要处理scheduler的写
vrfWrite.zip(lsu.vrfWritePort).foreach { case (sink, source) => sink <> source }

/** Slot has free entries. */
val free = VecInit(slots.map(_.state.idle)).asUInt
Expand Down Expand Up @@ -984,7 +969,7 @@ class T1(val parameter: T1Parameter)
probeWire.requestRegReady := requestRegDequeue.ready
// maskUnitWrite maskUnitWriteReady
probeWire.writeQueueEnqVec.zip(maskUnit.exeResp).foreach { case (probe, write) =>
probe.valid := write.valid && write.bits.mask.orR
probe.valid := write.fire && write.bits.mask.orR
probe.bits := write.bits.instructionIndex
}
probeWire.instructionValid := maskAnd(
Expand Down
7 changes: 3 additions & 4 deletions t1/src/laneStage/MaskExchangeUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ class MaskExchangeUnit(parameter: LaneParameter) extends Module {

val maskRequestEnqReady: Bool = !enqIsMaskRequest || maskRequestAllow

dequeue.valid := enqueue.valid && enqSendToDeq
dequeue.bits := enqueue.bits
enqueue.ready := Mux(enqSendToDeq, dequeue.ready, maskRequestEnqReady)
tokenIO.maskResponseRelease := DontCare
dequeue.valid := enqueue.valid && enqSendToDeq
dequeue.bits := enqueue.bits
enqueue.ready := Mux(enqSendToDeq, dequeue.ready, maskRequestEnqReady)
}
29 changes: 6 additions & 23 deletions t1/src/laneStage/SlotTokenManager.scala
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,7 @@ class SlotTokenManager(parameter: LaneParameter) extends Module {
val topWriteEnq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W))))

@public
val fromMask: Bool = IO(Input(Bool()))

@public
val lsuWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W))))

@public
val maskWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W))))
val topWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W))))

@public
val instructionValid: UInt = IO(Output(UInt((2 * parameter.chainingSize).W)))
Expand Down Expand Up @@ -212,27 +206,16 @@ class SlotTokenManager(parameter: LaneParameter) extends Module {
val instructionInWritePipe: UInt = tokenUpdate(writePipeToken, writePipeEnq, writePipeDeq)

// lsu & mask write token
val lsuWriteToken: Seq[UInt] = Seq.tabulate(2 * parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W)))
val maskWriteToken: Seq[UInt] = Seq.tabulate(2 * parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W)))
val topWriteToken: Seq[UInt] = Seq.tabulate(2 * parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W)))

val topWriteDoEnq: UInt =
maskAnd(topWriteEnq.valid, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt

val lsuWriteDoEnq: UInt =
maskAnd(topWriteEnq.valid && !fromMask, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt

val maskWriteDoEnq: UInt =
maskAnd(topWriteEnq.valid && fromMask, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt

val lsuWriteDoDeq: UInt =
maskAnd(lsuWriteDeq.valid, indexToOH(lsuWriteDeq.bits, parameter.chainingSize)).asUInt

val maskWriteDoDeq: UInt =
maskAnd(maskWriteDeq.valid, indexToOH(maskWriteDeq.bits, parameter.chainingSize)).asUInt
val topWriteDoDeq: UInt =
maskAnd(topWriteDeq.valid, indexToOH(topWriteDeq.bits, parameter.chainingSize)).asUInt

val lsuInTopWrite = tokenUpdate(lsuWriteToken, lsuWriteDoEnq, lsuWriteDoDeq)
val maskInTopWrite = tokenUpdate(maskWriteToken, maskWriteDoEnq, maskWriteDoDeq)
val topWrite: UInt = tokenUpdate(topWriteToken, topWriteDoEnq, topWriteDoDeq)

dataInWritePipe := instructionInWritePipe | lsuInTopWrite | maskInTopWrite
dataInWritePipe := instructionInWritePipe | topWrite
instructionValid := dataInWritePipe | instructionInSlot
}
15 changes: 4 additions & 11 deletions t1/src/mask/MaskUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ class MaskUnit(parameter: T1Parameter) extends Module {
}

@public
val exeResp: Seq[ValidIO[VRFWriteRequest]] = Seq.tabulate(parameter.laneNumber) { _ =>
val exeResp: Seq[DecoupledIO[VRFWriteRequest]] = Seq.tabulate(parameter.laneNumber) { _ =>
IO(
Valid(
Decoupled(
new VRFWriteRequest(
parameter.vrfParam.regNumBits,
parameter.laneParam.vrfOffsetBits,
Expand Down Expand Up @@ -1078,17 +1078,10 @@ class MaskUnit(parameter: T1Parameter) extends Module {
}
queue.enq.bits.index := instReg.instructionIndex

// write token
val tokenCounter = RegInit(0.U(log2Ceil(parameter.maskUnitVefWriteQueueSize + 1).W))
val tokenAllow: Bool = queue.deq.fire
val counterChange: UInt = Mux(tokenAllow, 1.U, -1.S(tokenCounter.getWidth.W).asUInt)
when(tokenAllow ^ tokenIO(index).maskResponseRelease) {
tokenCounter := tokenCounter + counterChange
}
// write vrf
val writePort = exeResp(index)
queue.deq.ready := !tokenCounter.asBools.last
writePort.valid := tokenAllow
queue.deq.ready := writePort.ready
writePort.valid := queue.deq.valid
writePort.bits.last := DontCare
writePort.bits.instructionIndex := instReg.instructionIndex
writePort.bits.data := Mux(queue.deq.bits.ffoByOther, queue.deq.bits.pipeData, queue.deq.bits.writeData.data)
Expand Down
2 changes: 1 addition & 1 deletion t1/src/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ package object rtl {
def connectVrfAccess[T <: Data](
latencyVec: Seq[Int],
tokenSizeVec: Seq[Int],
vrfReadLatency: Option[Int]
vrfReadLatency: Option[Int] = None
)(sourceVec: Vec[DecoupledIO[T]],
sink: DecoupledIO[T],
arb: Int,
Expand Down
4 changes: 2 additions & 2 deletions t1/src/vrf/VRF.scala
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar
@public
val writeCheck: Vec[LSUWriteCheck] = IO(
Vec(
parameter.chainingSize + 4,
parameter.chainingSize + 3,
Input(
new LSUWriteCheck(
parameter.regNumBits,
Expand All @@ -214,7 +214,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar
)

@public
val writeAllow: Vec[Bool] = IO(Vec(parameter.chainingSize + 4, Output(Bool())))
val writeAllow: Vec[Bool] = IO(Vec(parameter.chainingSize + 3, Output(Bool())))

/** when instruction is fired, record it in the VRF for chaining. */
@public
Expand Down

0 comments on commit 6e16894

Please sign in to comment.