Skip to content

Commit

Permalink
[rtl] duplicate v0 in lsu.
Browse files Browse the repository at this point in the history
  • Loading branch information
qinjun-li committed Dec 12, 2024
1 parent ea1d61f commit 6417d0d
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 61 deletions.
6 changes: 3 additions & 3 deletions t1/src/Bundles.scala
Original file line number Diff line number Diff line change
Expand Up @@ -360,9 +360,9 @@ class LaneResponseFeedback(param: LaneParameter) extends Bundle {
val complete: Bool = Bool()
}

class V0Update(param: LaneParameter) extends Bundle {
val data: UInt = UInt(param.datapathWidth.W)
val offset: UInt = UInt(param.vrfOffsetBits.W)
class V0Update(datapathWidth: Int, vrfOffsetBits: Int) extends Bundle {
val data: UInt = UInt(datapathWidth.W)
val offset: UInt = UInt(vrfOffsetBits.W)
// mask/ld类型的有可能不会写完整的32bit
val mask: UInt = UInt(4.W)
}
Expand Down
64 changes: 32 additions & 32 deletions t1/src/Lane.scala
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[

/** V0 update in the lane should also update [[T1.v0]] */
@public
val v0Update: ValidIO[V0Update] = IO(Valid(new V0Update(parameter)))
val v0Update: ValidIO[V0Update] = IO(Valid(new V0Update(parameter.datapathWidth, parameter.vrfOffsetBits)))

/** input of mask data */
@public
Expand Down Expand Up @@ -371,33 +371,33 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[

// todo: mv to bundle.scala
class MaskControl(parameter: LaneParameter) extends Bundle {
val index: UInt = UInt(parameter.instructionIndexBits.W)
val sew: UInt = UInt(2.W)
val maskData: UInt = UInt(parameter.datapathWidth.W)
val group: UInt = UInt(parameter.maskGroupSizeBits.W)
val dataValid: Bool = Bool()
val index: UInt = UInt(parameter.instructionIndexBits.W)
val sew: UInt = UInt(2.W)
val maskData: UInt = UInt(parameter.datapathWidth.W)
val group: UInt = UInt(parameter.maskGroupSizeBits.W)
val dataValid: Bool = Bool()
val waiteResponse: Bool = Bool()
val controlValid: Bool = Bool()
val controlValid: Bool = Bool()
}

val maskControlRelease: Vec[ValidIO[UInt]] =
Wire(Vec(parameter.chainingSize, Valid(UInt(parameter.instructionIndexBits.W))))

val maskControlEnq: UInt = Wire(UInt(parameter.chainingSize.W))
val maskControlDataDeq: UInt = Wire(UInt(parameter.chainingSize.W))
val maskControlReq: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool()))
val maskControlReqSelect: UInt = ffo(maskControlReq.asUInt)
val maskControlEnq: UInt = Wire(UInt(parameter.chainingSize.W))
val maskControlDataDeq: UInt = Wire(UInt(parameter.chainingSize.W))
val maskControlReq: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool()))
val maskControlReqSelect: UInt = ffo(maskControlReq.asUInt)
// mask request & response handle
val maskControlVec: Seq[MaskControl] = Seq.tabulate(parameter.chainingSize) { index =>
val maskControlVec: Seq[MaskControl] = Seq.tabulate(parameter.chainingSize) { index =>
val state = RegInit(0.U.asTypeOf(new MaskControl(parameter)))
val releaseHit: Bool = maskControlRelease.map(r => r.valid && (r.bits === state.index)).reduce(_ || _)
val responseFire =
Pipe(maskControlReqSelect(index), 0.U.asTypeOf(new EmptyBundle), parameter.maskRequestLatency).valid

when(maskControlEnq(index)) {
state := 0.U.asTypeOf(state)
state.index := laneRequest.bits.instructionIndex
state.sew := laneRequest.bits.csrInterface.vSew
state := 0.U.asTypeOf(state)
state.index := laneRequest.bits.instructionIndex
state.sew := laneRequest.bits.csrInterface.vSew
state.controlValid := true.B
}

Expand All @@ -410,13 +410,13 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
maskControlReq(index) := state.controlValid && !state.dataValid && !state.waiteResponse
when(maskControlReqSelect(index)) {
state.waiteResponse := true.B
state.group := state.group + 1.U
state.group := state.group + 1.U
}

when(responseFire) {
state.dataValid := true.B
state.dataValid := true.B
state.waiteResponse := false.B
state.maskData := maskInput
state.maskData := maskInput
}

when(maskControlDataDeq(index)) {
Expand All @@ -425,8 +425,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[

state
}
val maskControlFree: Seq[Bool] = maskControlVec.map(s => !s.controlValid && !s.waiteResponse)
val freeSelect: UInt = ffo(VecInit(maskControlFree).asUInt)
val maskControlFree: Seq[Bool] = maskControlVec.map(s => !s.controlValid && !s.waiteResponse)
val freeSelect: UInt = ffo(VecInit(maskControlFree).asUInt)
maskControlEnq := maskAnd(laneRequest.fire && laneRequest.bits.mask, freeSelect)

/** for each slot, assert when it is asking [[T1]] to change mask */
Expand All @@ -439,7 +439,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[

/** which slot wins the arbitration for requesting mask. */
val maskRequestFireOH: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool()))
val maskDataVec: Vec[UInt] = Wire(Vec(parameter.chainingSize, UInt(parameter.maskGroupWidth.W)))
val maskDataVec: Vec[UInt] = Wire(Vec(parameter.chainingSize, UInt(parameter.maskGroupWidth.W)))

/** FSM control for each slot. if index == 0,
* - slot can support write v0 in mask type, see [[Decoder.maskDestination]] [[Decoder.maskSource]]
Expand Down Expand Up @@ -663,14 +663,14 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
}

maskControlRelease(index).valid := false.B
maskControlRelease(index).bits := record.laneRequest.instructionIndex
maskControlRelease(index).bits := record.laneRequest.instructionIndex
// update lane state
when(stage0.enqueue.fire) {
maskGroupCountVec(index) := stage0.updateLaneState.maskGroupCount
// todo: handle all elements in first group are masked
maskIndexVec(index) := stage0.updateLaneState.maskIndex
when(stage0.updateLaneState.outOfExecutionRange) {
slotOccupied(index) := false.B
slotOccupied(index) := false.B
maskControlRelease(index).valid := true.B
}
}
Expand Down Expand Up @@ -962,19 +962,19 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
}

{
maskSelect := Mux1H(maskControlReqSelect, maskControlVec.map(_.group))
maskSelectSew := Mux1H(maskControlReqSelect, maskControlVec.map(_.sew))
maskSelect := Mux1H(maskControlReqSelect, maskControlVec.map(_.group))
maskSelectSew := Mux1H(maskControlReqSelect, maskControlVec.map(_.sew))
maskControlDataDeq := slotMaskRequestVec.zipWithIndex.map { case (req, index) =>
val slotIndex = slotControl(index).laneRequest.instructionIndex
val hitMaskControl = VecInit(maskControlVec.map(_.index === slotIndex)).asUInt
val dataValid = Mux1H(hitMaskControl, maskControlVec.map(_.dataValid))
val data = Mux1H(hitMaskControl, maskControlVec.map(_.maskData))
val group = Mux1H(hitMaskControl, maskControlVec.map(_.group))
val sameGroup = group === req.bits
val slotIndex = slotControl(index).laneRequest.instructionIndex
val hitMaskControl = VecInit(maskControlVec.map(_.index === slotIndex)).asUInt
val dataValid = Mux1H(hitMaskControl, maskControlVec.map(_.dataValid))
val data = Mux1H(hitMaskControl, maskControlVec.map(_.maskData))
val group = Mux1H(hitMaskControl, maskControlVec.map(_.group))
val sameGroup = group === req.bits
dontTouch(sameGroup)
val maskRequestFire = req.valid && dataValid
maskRequestFireOH(index) := maskRequestFire
maskDataVec(index) := data
maskDataVec(index) := data
maskAnd(maskRequestFire, hitMaskControl).asUInt
}.reduce(_ | _)
}
Expand Down
11 changes: 5 additions & 6 deletions t1/src/T1.scala
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,7 @@ class T1(val parameter: T1Parameter)
maskUnit.io.laneMaskSelect(index) := Pipe(true.B, lane.maskSelect, parameter.maskRequestLatency).bits
maskUnit.io.laneMaskSewSelect(index) := Pipe(true.B, lane.maskSelectSew, parameter.maskRequestLatency).bits
maskUnit.io.v0UpdateVec(index) <> lane.v0Update
lsu.v0UpdateVec(index) <> lane.v0Update

lane.lsuLastReport := lsu.lastReport | maskUnit.io.lastReport

Expand Down Expand Up @@ -811,12 +812,10 @@ class T1(val parameter: T1Parameter)
lsu.request.bits.instructionInformation.isStore := isStoreType
lsu.request.bits.instructionInformation.maskedLoadStore := maskType

maskUnit.io.lsuMaskSelect := lsu.maskSelect
lsu.maskInput := maskUnit.io.lsuMaskInput
lsu.csrInterface := requestRegCSR
lsu.csrInterface.vl := evlForLsu
lsu.writeReadyForLsu := VecInit(laneVec.map(_.writeReadyForLsu)).asUInt.andR
lsu.vrfReadyToStore := VecInit(laneVec.map(_.vrfReadyToStore)).asUInt.andR
lsu.csrInterface := requestRegCSR
lsu.csrInterface.vl := evlForLsu
lsu.writeReadyForLsu := VecInit(laneVec.map(_.writeReadyForLsu)).asUInt.andR
lsu.vrfReadyToStore := VecInit(laneVec.map(_.vrfReadyToStore)).asUInt.andR

// connect mask unit
maskUnit.io.instReq.valid := requestRegDequeue.fire && requestReg.bits.decodeResult(Decoder.maskUnit)
Expand Down
34 changes: 24 additions & 10 deletions t1/src/lsu/LSU.scala
Original file line number Diff line number Diff line change
Expand Up @@ -118,15 +118,10 @@ class LSU(param: LSUParameter) extends Module {
@public
val request: DecoupledIO[LSURequest] = IO(Flipped(Decoupled(new LSURequest(param.datapathWidth))))

/** mask from [[V]] TODO: since mask is one-cycle information for a mask group, we should latch it in the LSU, and
* reduce the IO width. this needs PnR information.
*/
@public
val maskInput: Vec[UInt] = IO(Input(Vec(param.lsuMSHRSize, UInt(param.maskGroupWidth.W))))

/** the address of the mask group in the [[V]]. */
@public
val maskSelect: Vec[UInt] = IO(Output(Vec(param.lsuMSHRSize, UInt(param.maskGroupSizeBits.W))))
val v0UpdateVec: Vec[ValidIO[V0Update]] = IO(
Flipped(Vec(param.laneNumber, Valid(new V0Update(param.datapathWidth, param.vrfOffsetBits))))
)

@public
val axi4Port: AXI4RWIrrevocable = IO(new AXI4RWIrrevocable(param.axi4BundleParameter))
Expand Down Expand Up @@ -197,6 +192,25 @@ class LSU(param: LSUParameter) extends Module {
val storeUnit: StoreUnit = Module(new StoreUnit(param.mshrParam))
val otherUnit: SimpleAccessUnit = Module(new SimpleAccessUnit(param.mshrParam))

/** duplicate v0 in lsu */
val v0: Vec[UInt] = RegInit(
VecInit(Seq.fill(param.vLen / param.datapathWidth)(0.U(param.datapathWidth.W)))
)

// write v0(mask)
v0.zipWithIndex.foreach { case (data, index) =>
// 属于哪个lane
val laneIndex: Int = index % param.laneNumber
// 取出写的端口
val v0Write = v0UpdateVec(laneIndex)
// offset
val offset: Int = index / param.laneNumber
val maskExt = FillInterleaved(8, v0Write.bits.mask)
when(v0Write.valid && v0Write.bits.offset === offset.U) {
data := (data & (~maskExt).asUInt) | (maskExt & v0Write.bits.data)
}
}

val unitVec = Seq(loadUnit, storeUnit, otherUnit)

/** Always merge into cache line */
Expand All @@ -222,8 +236,8 @@ class LSU(param: LSUParameter) extends Module {
mshr.lsuRequest.valid := reqEnq(index)
mshr.lsuRequest.bits := request.bits

maskSelect(index) := Mux(mshr.maskSelect.valid, mshr.maskSelect.bits, 0.U)
mshr.maskInput := maskInput(index)
val maskSelect = Mux(mshr.maskSelect.valid, mshr.maskSelect.bits, 0.U)
mshr.maskInput := cutUInt(v0.asUInt, param.maskGroupWidth)(maskSelect)

// broadcast CSR
mshr.csrInterface := csrInterface
Expand Down
13 changes: 3 additions & 10 deletions t1/src/mask/MaskUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ class MaskUnitInterface(parameter: T1Parameter) extends Bundle {
val readResult: Vec[ValidIO[UInt]] = Flipped(Vec(parameter.laneNumber, Valid(UInt(parameter.datapathWidth.W))))
val writeRD: ValidIO[UInt] = Valid(UInt(parameter.datapathWidth.W))
val lastReport: UInt = Output(UInt((2 * parameter.chainingSize).W))
val lsuMaskInput: Vec[UInt] = Output(Vec(parameter.lsuMSHRSize, UInt(parameter.maskGroupWidth.W)))
val lsuMaskSelect: Vec[UInt] = Input(Vec(parameter.lsuMSHRSize, UInt(parameter.lsuParameters.maskGroupSizeBits.W)))
val laneMaskInput: Vec[UInt] = Output(Vec(parameter.laneNumber, UInt(parameter.datapathWidth.W)))
val laneMaskSelect: Vec[UInt] = Input(Vec(parameter.laneNumber, UInt(parameter.laneParam.maskGroupSizeBits.W)))
val laneMaskSewSelect: Vec[UInt] = Input(Vec(parameter.laneNumber, UInt(2.W)))
val v0UpdateVec: Vec[ValidIO[V0Update]] = Flipped(Vec(parameter.laneNumber, Valid(new V0Update(parameter.laneParam))))
val v0UpdateVec: Vec[ValidIO[V0Update]] = Flipped(
Vec(parameter.laneNumber, Valid(new V0Update(parameter.laneParam.datapathWidth, parameter.laneParam.vrfOffsetBits)))
)
val writeRDData: UInt = Output(UInt(parameter.xLen.W))
val gatherData: DecoupledIO[UInt] = Decoupled(UInt(parameter.xLen.W))
val gatherRead: Bool = Input(Bool())
Expand Down Expand Up @@ -116,8 +116,6 @@ class MaskUnit(val parameter: T1Parameter)
val readResult = io.readResult
val writeRD = io.writeRD
val lastReport = io.lastReport
val lsuMaskInput = io.lsuMaskInput
val lsuMaskSelect = io.lsuMaskSelect
val laneMaskInput = io.laneMaskInput
val laneMaskSelect = io.laneMaskSelect
val laneMaskSewSelect = io.laneMaskSewSelect
Expand Down Expand Up @@ -168,11 +166,6 @@ class MaskUnit(val parameter: T1Parameter)
input := cutUInt(v0SelectBySew, parameter.datapathWidth)(laneMaskSelect(index))
}

// lsu
lsuMaskInput.zip(lsuMaskSelect).foreach { case (data, index) =>
data := cutUInt(v0.asUInt, parameter.maskGroupWidth)(index)
}

val maskedWrite: BitLevelMaskWrite = Module(new BitLevelMaskWrite(parameter))

def gatherIndex(elementIndex: UInt, vlmul: UInt, sew: UInt): (UInt, UInt, UInt, UInt, Bool) = {
Expand Down

0 comments on commit 6417d0d

Please sign in to comment.