Skip to content

Commit

Permalink
[rtl] connect vrf read with shifter.
Browse files Browse the repository at this point in the history
  • Loading branch information
qinjun-li authored and Avimitin committed Dec 7, 2024
1 parent 3d704ce commit 19e9a71
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 38 deletions.
38 changes: 16 additions & 22 deletions t1/src/T1.scala
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,12 @@ case class T1Parameter(
val laneRequestTokenSize: Int = 4
val laneRequestShifterSize: Seq[Int] = Seq.tabulate(laneNumber)(_ => 1)

val maskUnitReadTokenSize: Seq[Int] = Seq.tabulate(laneNumber)(_ => 4)
val maskUnitReadShifterSize: Seq[Int] = Seq.tabulate(laneNumber)(_ => 1)

val lsuReadTokenSize: Seq[Int] = Seq.tabulate(laneNumber)(_ => 4)
val lsuReadShifterSize: Seq[Int] = Seq.tabulate(laneNumber)(_ => 1)

val decoderParam: DecoderParam = DecoderParam(fpuEnable, zvbbEnable, allInstructions)

/** paraemter for AXI4. */
Expand Down Expand Up @@ -756,29 +762,17 @@ class T1(val parameter: T1Parameter)
laneRequestSinkWire(index).ready := lane.laneRequest.ready && lane.vrfAllocateIssue
lane.laneIndex := index.U

// lsu 优先会有死锁:
// vmadc, v1, v2, 1 (vl=17) -> 需要先读后写
// vse32.v v1, (a0) -> 依赖上一条,但是会先发出read

// Mask priority will also be
// vse32.v v19, (a0)
// vfslide1down.vf v19, v10, x1
val maskUnitFirst = RegInit(false.B)
val tryToRead = lsu.vrfReadDataPorts(index).valid || maskUnit.readChannel(index).valid
when(tryToRead && !lane.vrfReadAddressChannel.fire) {
maskUnitFirst := !maskUnitFirst
}
lane.vrfReadAddressChannel.valid := Mux(
maskUnitFirst,
maskUnit.readChannel(index).valid,
lsu.vrfReadDataPorts(index).valid
connectVrfAccess(
Seq(parameter.maskUnitReadShifterSize(index), parameter.lsuReadShifterSize(index)),
Seq(parameter.maskUnitReadTokenSize(index), parameter.lsuReadTokenSize(index)),
Some(parameter.vrfReadLatency)
)(
VecInit(Seq(maskUnit.readChannel(index), lsu.vrfReadDataPorts(index))),
lane.vrfReadAddressChannel,
0,
Some(lane.vrfReadDataChannel),
Some(Seq(maskUnit.readResult(index), lsu.vrfReadResults(index)))
)
lane.vrfReadAddressChannel.bits :=
Mux(maskUnitFirst, maskUnit.readChannel(index).bits, lsu.vrfReadDataPorts(index).bits)
lsu.vrfReadDataPorts(index).ready := lane.vrfReadAddressChannel.ready && !maskUnitFirst
maskUnit.readChannel(index).ready := lane.vrfReadAddressChannel.ready && maskUnitFirst
maskUnit.readResult(index) := lane.vrfReadDataChannel
lsu.vrfReadResults(index) := lane.vrfReadDataChannel

val maskTryToWrite = maskUnit.exeResp(index)
// lsu & mask unit write lane
Expand Down
5 changes: 3 additions & 2 deletions t1/src/lsu/LSU.scala
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class LSU(param: LSUParameter) extends Module {
/** hard wire form Top. TODO: merge to [[vrfReadDataPorts]]
*/
@public
val vrfReadResults: Vec[UInt] = IO(Input(Vec(param.laneNumber, UInt(param.datapathWidth.W))))
val vrfReadResults: Vec[ValidIO[UInt]] = IO(Vec(param.laneNumber, Flipped(Valid(UInt(param.datapathWidth.W)))))

/** write channel to [[V]], which will redirect it to [[Lane.vrf]]. */
@public
Expand Down Expand Up @@ -250,7 +250,8 @@ class LSU(param: LSUParameter) extends Module {
otherUnit.vrfReadDataPorts.ready := (otherTryReadVrf & VecInit(vrfReadDataPorts.map(_.ready)).asUInt).orR
val pipeOtherRead: ValidIO[UInt] =
Pipe(otherUnit.vrfReadDataPorts.fire, otherUnit.status.targetLane, param.vrfReadLatency)
otherUnit.vrfReadResults.bits := Mux1H(pipeOtherRead.bits, vrfReadResults)
// todo: read data reorder
otherUnit.vrfReadResults.bits := Mux1H(pipeOtherRead.bits, vrfReadResults.map(_.bits))
otherUnit.vrfReadResults.valid := pipeOtherRead.valid

// write vrf
Expand Down
12 changes: 5 additions & 7 deletions t1/src/lsu/StoreUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic {
/** hard wire form Top. see [[LSU.vrfReadResults]]
*/
@public
val vrfReadResults: Vec[UInt] = IO(Input(Vec(param.laneNumber, UInt(param.datapathWidth.W))))
val vrfReadResults: Vec[ValidIO[UInt]] = IO(Input(Vec(param.laneNumber, Valid(UInt(param.datapathWidth.W)))))
@public
val vrfReadyToStore: Bool = IO(Input(Bool()))
val vrfReadyToStore: Bool = IO(Input(Bool()))
@public
val storeResponse = IO(Input(Bool()))

Expand Down Expand Up @@ -73,6 +73,7 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic {
val readCount: UInt = RegInit(0.U(dataGroupBits.W))
val stageValid = RegInit(false.B)
// queue for read latency
// todo: param.vrfReadLatency => param.vrfReadLatency + shifterLatency
val queue: QueueIO[UInt] =
Queue.io(UInt(param.datapathWidth.W), param.vrfReadLatency, flow = true)

Expand Down Expand Up @@ -114,12 +115,9 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic {
readPort.bits.offset := readCount
readPort.bits.instructionIndex := lsuRequestReg.instructionIndex

// pipe read fire
val readResultFire = Pipe(readPort.fire, 0.U.asTypeOf(new EmptyBundle), param.vrfReadLatency).valid

// latency queue enq
queue.enq.valid := readResultFire
queue.enq.bits := vrfReadResults(laneIndex)
queue.enq.valid := vrfReadResults(laneIndex).valid
queue.enq.bits := vrfReadResults(laneIndex).bits
AssertProperty(BoolSequence(!queue.enq.valid || queue.enq.ready))
vrfReadQueueVec(laneIndex).enq <> queue.deq
stageValid || RegNext(readPort.fire)
Expand Down
8 changes: 4 additions & 4 deletions t1/src/mask/BitLevelMaskWrite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,16 @@ class BitLevelMaskWrite(parameter: T1Parameter) extends Module {
)
}

val readResult: Seq[UInt] = Seq.tabulate(parameter.laneNumber) { _ =>
IO(Input(UInt(parameter.datapathWidth.W)))
val readResult: Seq[ValidIO[UInt]] = Seq.tabulate(parameter.laneNumber) { _ =>
IO(Flipped(Valid(UInt(parameter.datapathWidth.W))))
}

val stageClear: Bool = IO(Output(Bool()))

val stageClearVec: Seq[Bool] = in.zipWithIndex.map { case (req, index) =>
val reqQueue: QueueIO[BitLevelWriteRequest] = Queue.io(chiselTypeOf(req.bits), 4)
val readPort = readChannel(index)
val readData = readResult(index)
val readData = readResult(index).bits
val res = out(index)

val WaitReadQueue: QueueIO[BitLevelWriteRequest] = Queue.io(chiselTypeOf(req.bits), readVRFLatency)
Expand All @@ -68,7 +68,7 @@ class BitLevelMaskWrite(parameter: T1Parameter) extends Module {
readPort.bits.vs := vd + (reqQueue.deq.bits.groupCounter >> readPort.bits.offset.getWidth).asUInt
readPort.bits.offset := changeUIntSize(reqQueue.deq.bits.groupCounter, readPort.bits.offset.getWidth)

val readValidPipe = Pipe(readPort.fire, false.B, readVRFLatency).valid
val readValidPipe = Pipe(readPort.fire, false.B, readVRFLatency).valid && readResult(index).valid
val readResultValid = !needWAR || readValidPipe

val WARData = (WaitReadQueue.deq.bits.data & WaitReadQueue.deq.bits.bitMask) |
Expand Down
6 changes: 3 additions & 3 deletions t1/src/mask/MaskUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ class MaskUnit(parameter: T1Parameter) extends Module {
}

@public
val readResult: Seq[UInt] = Seq.tabulate(parameter.laneNumber) { _ =>
IO(Input(UInt(parameter.datapathWidth.W)))
val readResult: Seq[ValidIO[UInt]] = Seq.tabulate(parameter.laneNumber) { _ =>
IO(Flipped(Valid(UInt(parameter.datapathWidth.W))))
}

@public
Expand Down Expand Up @@ -789,7 +789,7 @@ class MaskUnit(parameter: T1Parameter) extends Module {
val dataOffset: UInt = Mux1H(readResultSelect, pipeDataOffset)
readTokenRelease(index) := readDataQueue.deq.fire
readDataQueue.enq.valid := readResultSelect.orR
readDataQueue.enq.bits := Mux1H(readResultSelect, readResult) >> (dataOffset ## 0.U(3.W))
readDataQueue.enq.bits := Mux1H(readResultSelect, readResult.map(_.bits)) >> (dataOffset ## 0.U(3.W))
readDataQueue.deq
}

Expand Down
54 changes: 54 additions & 0 deletions t1/src/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,60 @@ package object rtl {
sink <> queue.deq
}

def maskUnitReadArbitrate[T <: Data](source: Vec[DecoupledIO[T]]): DecoupledIO[T] = {
require(source.size == 2)
val maskRead = source.head
val lsuRead = source.last
val sinkWire: DecoupledIO[T] = Wire(Decoupled(chiselTypeOf(maskRead.bits)))
val maskUnitFirst = RegInit(false.B)
val tryToRead = maskRead.valid || lsuRead.valid
when(tryToRead && !sinkWire.fire) {
maskUnitFirst := !maskUnitFirst
}

sinkWire.valid := Mux(
maskUnitFirst,
maskRead.valid,
lsuRead.valid
)
sinkWire.bits :=
Mux(maskUnitFirst, maskRead.bits, lsuRead.bits)
lsuRead.ready := sinkWire.ready && !maskUnitFirst
maskRead.ready := sinkWire.ready && maskUnitFirst
sinkWire
}

def connectVrfAccess[T <: Data](
latencyVec: Seq[Int],
tokenSizeVec: Seq[Int],
vrfReadLatency: Option[Int]
)(sourceVec: Vec[DecoupledIO[T]],
sink: DecoupledIO[T],
arb: Int,
dataAck: Option[UInt] = None,
dataToSource: Option[Seq[ValidIO[UInt]]] = None
): Unit = {
val sinkVec: Vec[DecoupledIO[T]] = VecInit(sourceVec.zipWithIndex.map { case (source, index) =>
val sinkWire: DecoupledIO[T] = Wire(Decoupled(chiselTypeOf(source.bits)))
connectDecoupledWithShifter(latencyVec(index), tokenSizeVec(index))(source, sinkWire)
sinkWire
})
if (arb == 0) {
sink <> maskUnitReadArbitrate(sinkVec)
}
dataToSource.foreach { sourceDataVec =>
require(dataAck.isDefined)
sourceDataVec.zipWithIndex.foreach { case (sourceData, index) =>
val sinkRequest = sinkVec(index)
val accessDataValid = Pipe(sinkRequest.fire, 0.U.asTypeOf(new EmptyBundle), vrfReadLatency.get).valid
val accessDataSource = Wire(Valid(chiselTypeOf(dataAck.get)))
accessDataSource.valid := accessDataValid
accessDataSource.bits := accessDataValid
connectWithShifter(latencyVec(index))(accessDataSource, sourceData)
}
}
}

def instantiateVFU(
parameter: VFUInstantiateParameter
)(requestVec: Vec[SlotRequestToVFU],
Expand Down

0 comments on commit 19e9a71

Please sign in to comment.