From 6cd1be2773d35b2142946958102974b4725f45cd Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Wed, 23 Oct 2024 13:07:23 +0800 Subject: [PATCH] [rtl] fix elaborate. --- t1/src/Lane.scala | 13 ++++++------- t1/src/VectorFunctionUnit.scala | 7 +++++++ t1/src/laneStage/LaneExecutionBridge.scala | 2 +- t1/src/laneStage/LaneStage1.scala | 20 ++++++++++++++------ t1/src/vrf/VRF.scala | 8 +++++--- 5 files changed, 33 insertions(+), 17 deletions(-) diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index bb5f829e0..ea5610268 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -819,14 +819,13 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ // cross write bus <> write queue crossLaneWriteQueue.zipWithIndex.foreach { case (queue, index) => - val port = writeBusPort(index) - // ((counter << 1) >> parameter.vrfParam.vrfOffsetBits).low(3) - val registerIncreaseBase = parameter.vrfParam.vrfOffsetBits - 1 + val port = writeBusPort(index) + val baseIndex = slotControl.head.laneRequest.vd ## 0.U(parameter.vrfOffsetBits.W) + val indexGrowth: UInt = changeUIntSize(port.enq.bits.counter ## index.U(1.W), port.enq.bits.counter.getWidth) + val finalIndex: UInt = baseIndex + indexGrowth queue.io.enq.valid := port.enq.valid - queue.io.enq.bits.vd := - // 3: 8 reg => log(2, 8) - slotControl.head.laneRequest.vd + port.enq.bits.counter(registerIncreaseBase + 3 - 1, registerIncreaseBase) - queue.io.enq.bits.offset := port.enq.bits.counter ## index.U(1.W) + queue.io.enq.bits.vd := finalIndex >> parameter.vrfOffsetBits + queue.io.enq.bits.offset := finalIndex queue.io.enq.bits.data := port.enq.bits.data queue.io.enq.bits.last := DontCare queue.io.enq.bits.instructionIndex := port.enq.bits.instructionIndex diff --git a/t1/src/VectorFunctionUnit.scala b/t1/src/VectorFunctionUnit.scala index 9450a11d2..77b1d411c 100644 --- a/t1/src/VectorFunctionUnit.scala +++ b/t1/src/VectorFunctionUnit.scala @@ -132,6 +132,13 @@ object VFUInstantiateParameter { case (false, true) => VFUInstantiateParameter.zvbb(vLen, dLen) case (true, true) => VFUInstantiateParameter.zvbbFP(vLen, dLen) } + case "huge" => + (fp, zvbb) match { + case (false, false) => VFUInstantiateParameter.smallInt(vLen, dLen) + case (true, false) => VFUInstantiateParameter.hugeFP(vLen, dLen) + case (false, true) => VFUInstantiateParameter.zvbb(vLen, dLen) + case (true, true) => VFUInstantiateParameter.zvbbFP(vLen, dLen) + } } // instantiate each module and connect to all scoreboards diff --git a/t1/src/laneStage/LaneExecutionBridge.scala b/t1/src/laneStage/LaneExecutionBridge.scala index 8e334b71e..5e6f5a11c 100644 --- a/t1/src/laneStage/LaneExecutionBridge.scala +++ b/t1/src/laneStage/LaneExecutionBridge.scala @@ -412,7 +412,7 @@ class LaneExecutionBridge(parameter: LaneParameter, isLastSlot: Boolean, slotInd maskResult(1, 0) << (recordQueue.io.deq.bits.groupCounter(3, 0) ## false.B), // 1 bit per data group, it will had 32 data groups -> executeIndex1H << 1 * groupCounter(4, 0) - maskResult(0) << recordQueue.io.deq.bits.groupCounter(4, 0) + maskResult(0) << recordQueue.io.deq.bits.groupCounter(4.min(parameter.groupNumberBits - 1), 0) ) ).asUInt diff --git a/t1/src/laneStage/LaneStage1.scala b/t1/src/laneStage/LaneStage1.scala index 9cde79dde..196e36ad6 100644 --- a/t1/src/laneStage/LaneStage1.scala +++ b/t1/src/laneStage/LaneStage1.scala @@ -209,27 +209,35 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { // cross read enqueue queueBeforeCheckLSB.foreach { q => - q.io.enq.bits.vs := Mux( + val baseVs = Mux( enqueue.bits.decodeResult(Decoder.vwmacc), // cross read vd for vwmacc, since it need dual [[dataPathWidth]], use vs2 port to read LSB part of it. enqueue.bits.vd, // read vs2 for other instruction enqueue.bits.vs2 - ) + groupCounter(parameter.groupNumberBits - 2, parameter.vrfOffsetBits - 1) + ) + val baseIndex = baseVs ## 0.U(parameter.vrfOffsetBits.W) + val indexGrowth: UInt = changeUIntSize(groupCounter ## false.B, groupCounter.getWidth) + val finalIndex: UInt = baseIndex + indexGrowth + q.io.enq.bits.vs := finalIndex >> parameter.vrfOffsetBits q.io.enq.bits.readSource := Mux(enqueue.bits.decodeResult(Decoder.vwmacc), 2.U, 1.U) - q.io.enq.bits.offset := groupCounter(parameter.vrfOffsetBits - 2, 0) ## false.B + q.io.enq.bits.offset := finalIndex } queueBeforeCheckMSB.foreach { q => - q.io.enq.bits.vs := Mux( + val baseVs = Mux( enqueue.bits.decodeResult(Decoder.vwmacc), // cross read vd for vwmacc enqueue.bits.vd, // cross lane access use vs2 enqueue.bits.vs2 - ) + groupCounter(parameter.groupNumberBits - 2, parameter.vrfOffsetBits - 1) + ) + val baseIndex = baseVs ## 0.U(parameter.vrfOffsetBits.W) + val indexGrowth: UInt = changeUIntSize(groupCounter ## true.B, groupCounter.getWidth) + val finalIndex: UInt = baseIndex + indexGrowth + q.io.enq.bits.vs := finalIndex >> parameter.vrfOffsetBits q.io.enq.bits.readSource := Mux(enqueue.bits.decodeResult(Decoder.vwmacc), 2.U, 1.U) - q.io.enq.bits.offset := groupCounter(parameter.vrfOffsetBits - 2, 0) ## true.B + q.io.enq.bits.offset := finalIndex } // read pipe diff --git a/t1/src/vrf/VRF.scala b/t1/src/vrf/VRF.scala index eaccfee70..9c8321fda 100644 --- a/t1/src/vrf/VRF.scala +++ b/t1/src/vrf/VRF.scala @@ -268,8 +268,9 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar val portFireCount: UInt = PopCount(VecInit(readRequests.map(_.fire) :+ write.fire)) dontTouch(portFireCount) - val writeBank: UInt = - if (parameter.rfBankNum == 1) true.B else UIntToOH(write.bits.offset(log2Ceil(parameter.rfBankNum) - 1, 0)) + val writeIndex: UInt = write.bits.vd ## write.bits.offset + val writeBank: UInt = + if (parameter.rfBankNum == 1) true.B else UIntToOH(writeIndex(log2Ceil(parameter.rfBankNum) - 1, 0)) // Add one more record slot to prevent there is no free slot when the instruction comes in // (the slot will die a few cycles later than the instruction) @@ -350,8 +351,9 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar .reduce(_ && _) && portConflictCheck } val validCorrect: Bool = if (i == (readRequests.size - 1)) v.valid && checkResult.get else v.valid + val address = v.bits.vs ## v.bits.offset // select bank - val bank = if (parameter.rfBankNum == 1) true.B else UIntToOH(v.bits.offset(log2Ceil(parameter.rfBankNum) - 1, 0)) + val bank = if (parameter.rfBankNum == 1) true.B else UIntToOH(address(log2Ceil(parameter.rfBankNum) - 1, 0)) val pipeBank = Pipe(true.B, bank, parameter.vrfReadLatency).bits val bankCorrect = Mux(validCorrect, bank, 0.U(parameter.rfBankNum.W)) val readPortCheckSelect = parameter.ramType match {