Skip to content

Commit

Permalink
[rtl] connect laneRequest with shifter.
Browse files Browse the repository at this point in the history
  • Loading branch information
qinjun-li authored and Avimitin committed Dec 9, 2024
1 parent 1dc4298 commit 2cc2b96
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 32 deletions.
80 changes: 48 additions & 32 deletions t1/src/T1.scala
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,9 @@ case class T1Parameter(
// and the values are their respective delays.
val crossLaneConnectCycles: Seq[Seq[Int]] = Seq.tabulate(laneNumber)(_ => Seq(1, 1))

val laneRequestTokenSize: Int = 4
val laneRequestShifterSize: Seq[Int] = Seq.tabulate(laneNumber)(_ => 1)

val decoderParam: DecoderParam = DecoderParam(fpuEnable, zvbbEnable, allInstructions)

/** paraemter for AXI4. */
Expand Down Expand Up @@ -634,9 +637,21 @@ class T1(val parameter: T1Parameter)
control
}

/** lane is ready to receive new instruction. */
val laneReady: Vec[Bool] = Wire(Vec(parameter.laneNumber, Bool()))
val allLaneReady: Bool = laneReady.asUInt.andR
// Close to top
val laneRequestSourceWire: Vec[DecoupledIO[LaneRequest]] = Wire(
Vec(parameter.laneNumber, Decoupled(new LaneRequest(parameter.laneParam)))
)
// Close to lane
val laneRequestSinkWire: Vec[DecoupledIO[LaneRequest]] = Wire(
Vec(parameter.laneNumber, Decoupled(new LaneRequest(parameter.laneParam)))
)

laneRequestSourceWire.zipWithIndex.foreach { case (source, index) =>
val sink = laneRequestSinkWire(index)
connectDecoupledWithShifter(parameter.laneRequestShifterSize(index), parameter.laneRequestTokenSize)(source, sink)
}

val allLaneReady: Bool = VecInit(laneRequestSourceWire.map(_.ready)).asUInt.andR
// TODO: review later
// todo: 把scheduler的反馈也加上,lsu有更高的优先级

Expand Down Expand Up @@ -698,48 +713,50 @@ class T1(val parameter: T1Parameter)
requestReg.bits.issue.vl
)

/** instantiate lanes. TODO: move instantiate to top of class.
*/
val laneVec: Seq[Instance[Lane]] = Seq.tabulate(parameter.laneNumber) { index =>
val lane: Instance[Lane] = Instantiate(new Lane(parameter.laneParam))
// lane.laneRequest.valid -> requestRegDequeue.ready -> lane.laneRequest.ready -> lane.laneRequest.bits
// TODO: this is harmful for PnR design, since it broadcast ready singal to each lanes, which will significantly
// reduce the scalability for large number of lanes.
lane.laneRequest.valid := requestRegDequeue.fire && !noOffsetReadLoadStore && !maskUnitInstruction
laneRequestSourceWire.foreach { request =>
request.valid := requestRegDequeue.fire && !noOffsetReadLoadStore && !maskUnitInstruction
// hard wire
lane.laneRequest.bits.instructionIndex := requestReg.bits.instructionIndex
lane.laneRequest.bits.decodeResult := decodeResult
lane.laneRequest.bits.vs1 := requestRegDequeue.bits.instruction(19, 15)
lane.laneRequest.bits.vs2 := requestRegDequeue.bits.instruction(24, 20)
lane.laneRequest.bits.vd := requestRegDequeue.bits.instruction(11, 7)
lane.laneRequest.bits.segment := Mux(
request.bits.instructionIndex := requestReg.bits.instructionIndex
request.bits.decodeResult := decodeResult
request.bits.vs1 := requestRegDequeue.bits.instruction(19, 15)
request.bits.vs2 := requestRegDequeue.bits.instruction(24, 20)
request.bits.vd := requestRegDequeue.bits.instruction(11, 7)
request.bits.segment := Mux(
decodeResult(Decoder.nr),
requestRegDequeue.bits.instruction(17, 15),
requestRegDequeue.bits.instruction(31, 29)
)

lane.laneRequest.bits.loadStoreEEW := requestRegDequeue.bits.instruction(13, 12)
request.bits.loadStoreEEW := requestRegDequeue.bits.instruction(13, 12)
// if the instruction is vi and vx type of gather, gather from rs2 with mask VRF read channel from one lane,
// and broadcast to all lanes.
lane.laneRequest.bits.readFromScalar := source1Select
request.bits.readFromScalar := source1Select

lane.laneRequest.bits.issueInst := requestRegDequeue.fire
lane.laneRequest.bits.loadStore := isLoadStoreType
request.bits.issueInst := requestRegDequeue.fire
request.bits.loadStore := isLoadStoreType
// let record in VRF to know there is a store instruction.
lane.laneRequest.bits.store := isStoreType
request.bits.store := isStoreType
// let lane know if this is a special instruction, which need group-level synchronization between lane and [[V]]
lane.laneRequest.bits.special := specialInstruction
lane.laneRequest.bits.lsWholeReg := lsWholeReg
request.bits.special := specialInstruction
request.bits.lsWholeReg := lsWholeReg
// mask type instruction.
lane.laneRequest.bits.mask := maskType
laneReady(index) := lane.laneRequest.ready
request.bits.mask := maskType

// connect csrInterface
lane.laneRequest.bits.csrInterface := requestRegCSR
request.bits.csrInterface := requestRegCSR
// index type EEW Decoded in the instruction
lane.laneRequest.bits.csrInterface.vSew := vSewSelect
lane.laneRequest.bits.csrInterface.vl := evlForLane
lane.laneIndex := index.U
request.bits.csrInterface.vSew := vSewSelect
request.bits.csrInterface.vl := evlForLane
}

/** instantiate lanes. TODO: move instantiate to top of class.
*/
val laneVec: Seq[Instance[Lane]] = Seq.tabulate(parameter.laneNumber) { index =>
val lane: Instance[Lane] = Instantiate(new Lane(parameter.laneParam))
lane.laneRequest.valid := laneRequestSinkWire(index).valid && lane.vrfAllocateIssue
lane.laneRequest.bits := laneRequestSinkWire(index).bits
laneRequestSinkWire(index).ready := lane.laneRequest.ready && lane.vrfAllocateIssue
lane.laneIndex := index.U

// lsu 优先会有死锁:
// vmadc, v1, v2, 1 (vl=17) -> 需要先读后写
Expand Down Expand Up @@ -909,7 +926,6 @@ class T1(val parameter: T1Parameter)

/** for lsu instruction lsu is ready, for normal instructions, lanes are ready. */
val executionReady: Bool = (!isLoadStoreType || lsu.request.ready) && (noOffsetReadLoadStore || allLaneReady)
val vrfAllocate: Bool = VecInit(laneVec.map(_.vrfAllocateIssue)).asUInt.andR
// - ready to issue instruction
// - for vi and vx type of gather, it need to access vs2 for one time, we read vs2 firstly in `gatherReadFinish`
// and convert it to mv instruction.
Expand All @@ -918,7 +934,7 @@ class T1(val parameter: T1Parameter)
// we detect the hazard and decide should we issue this slide or
// issue the instruction after the slide which already in the slot.
requestRegDequeue.ready := executionReady && slotReady && (!gatherNeedRead || maskUnit.gatherData.valid) &&
tokenManager.issueAllow && instructionIndexFree && vrfAllocate
tokenManager.issueAllow && instructionIndexFree

instructionToSlotOH := Mux(requestRegDequeue.fire, slotToEnqueue, 0.U)

Expand Down
30 changes: 30 additions & 0 deletions t1/src/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import chisel3._
import chisel3.experimental.hierarchy.{Instance, Instantiate}
import chisel3.util._
import chisel3.util.experimental.decode.DecodeBundle
import org.chipsalliance.dwbb.stdlib.queue.Queue
import org.chipsalliance.t1.rtl.decoder.{Decoder, TableGenerator}
import org.chipsalliance.t1.rtl.lane.Distributor

Expand Down Expand Up @@ -221,6 +222,35 @@ package object rtl {
id.map(f => (shifterReg :+ source).map(p => Mux(p.valid, indexToOH(f(p.bits), 4), 0.U)).reduce(_ | _))
}

def connectDecoupledWithShifter[T <: Data](latency: Int, tokenSize: Int)(source: DecoupledIO[T], sink: DecoupledIO[T])
: Unit = {
val queue = Queue.io(chiselTypeOf(source.bits), tokenSize, flow = true)
// Reverse pipe release
val releasePipe = Pipe(
sink.fire,
0.U.asTypeOf(new EmptyBundle),
latency
).valid
val tokenCheck: Bool = pipeToken(tokenSize)(source.fire, releasePipe)
source.ready := tokenCheck

// Complete the handshake at the source end and convert the result of the handshake into a data stream
val validSource: ValidIO[T] = Wire(Valid(chiselTypeOf(source.bits)))
validSource.valid := source.fire
validSource.bits := source.bits

val validSink: ValidIO[T] = Wire(Valid(chiselTypeOf(source.bits)))

// Shift Data
connectWithShifter(latency)(validSource, validSink)
// Throw the moved data into the queue
// todo: assert(queue.enq.ready || !queue.enq.valid)
queue.enq.valid := validSink.valid
queue.enq.bits := validSink.bits
// Finally, send the data to the sink
sink <> queue.deq
}

def instantiateVFU(
parameter: VFUInstantiateParameter
)(requestVec: Vec[SlotRequestToVFU],
Expand Down

0 comments on commit 2cc2b96

Please sign in to comment.