From 09914a266deff52e7a029c93ffeb239847bbc2ba Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Wed, 11 Dec 2024 18:09:37 +0800 Subject: [PATCH] [rtl] duplicate v0 in lsu. --- rocketv/src/HellaCache.scala | 4 ++- rocketv/src/ICache.scala | 4 ++- t1/src/Bundles.scala | 6 ++-- t1/src/FloatModule.scala | 8 ++--- t1/src/Lane.scala | 64 ++++++++++++++++----------------- t1/src/LaneAdder.scala | 18 +++++----- t1/src/LaneDiv.scala | 12 +++---- t1/src/LaneDivFP.scala | 10 +++--- t1/src/LaneFloat.scala | 10 ++++-- t1/src/LaneLogic.scala | 2 +- t1/src/LaneMul.scala | 6 ++-- t1/src/LaneShifter.scala | 6 ++-- t1/src/LaneZvbb.scala | 2 +- t1/src/MaskedLogic.scala | 6 ++-- t1/src/OtherUnit.scala | 8 ++--- t1/src/T1.scala | 11 +++--- t1/src/VectorFunctionUnit.scala | 6 ++-- t1/src/decoder/Decoder.scala | 3 +- t1/src/lsu/LSU.scala | 34 ++++++++++++------ t1/src/mask/MaskReduce.scala | 2 +- t1/src/mask/MaskUnit.scala | 18 +++++----- 21 files changed, 127 insertions(+), 113 deletions(-) diff --git a/rocketv/src/HellaCache.scala b/rocketv/src/HellaCache.scala index b2184fcc4..49f51195b 100644 --- a/rocketv/src/HellaCache.scala +++ b/rocketv/src/HellaCache.scala @@ -525,7 +525,9 @@ class HellaCache(val parameter: HellaCacheParameter) numReadwritePorts = 1 ) } - omInstance.sramsIn.foreach(_ := Property((dcacheDataSRAM ++ Some(dcacheTagSRAM)).map(_.description.get.asAnyClassType))) + omInstance.sramsIn.foreach( + _ := Property((dcacheDataSRAM ++ Some(dcacheTagSRAM)).map(_.description.get.asAnyClassType)) + ) /** Data Arbiter 0: data from pending store buffer 1: data from TL-D refill 2: release to TL-A 3: hit path to CPU */ diff --git a/rocketv/src/ICache.scala b/rocketv/src/ICache.scala index 4be2bd86c..0bb52ce2b 100644 --- a/rocketv/src/ICache.scala +++ b/rocketv/src/ICache.scala @@ -555,7 +555,9 @@ class ICache(val parameter: ICacheParameter) numReadwritePorts = 1 ) } - omInstance.sramsIn.foreach(_ := Property((icacheDataSRAM ++ Some(icacheTagSRAM)).map(_.description.get.asAnyClassType))) + omInstance.sramsIn.foreach( + _ := Property((icacheDataSRAM ++ Some(icacheTagSRAM)).map(_.description.get.asAnyClassType)) + ) for ((data_array, i) <- icacheDataSRAM.zipWithIndex) { diff --git a/t1/src/Bundles.scala b/t1/src/Bundles.scala index 5dd255384..d59a97d75 100644 --- a/t1/src/Bundles.scala +++ b/t1/src/Bundles.scala @@ -360,9 +360,9 @@ class LaneResponseFeedback(param: LaneParameter) extends Bundle { val complete: Bool = Bool() } -class V0Update(param: LaneParameter) extends Bundle { - val data: UInt = UInt(param.datapathWidth.W) - val offset: UInt = UInt(param.vrfOffsetBits.W) +class V0Update(datapathWidth: Int, vrfOffsetBits: Int) extends Bundle { + val data: UInt = UInt(datapathWidth.W) + val offset: UInt = UInt(vrfOffsetBits.W) // mask/ld类型的有可能不会写完整的32bit val mask: UInt = UInt(4.W) } diff --git a/t1/src/FloatModule.scala b/t1/src/FloatModule.scala index cb962abb7..cfb09478d 100644 --- a/t1/src/FloatModule.scala +++ b/t1/src/FloatModule.scala @@ -4,11 +4,11 @@ package org.chipsalliance.t1.rtl import chisel3._ -import chisel3.experimental.{SerializableModule, SerializableModuleParameter, SerializableModuleGenerator} -import chisel3.experimental.hierarchy.{public, Instance, instantiable, Instantiate} +import chisel3.experimental.{SerializableModule, SerializableModuleGenerator, SerializableModuleParameter} +import chisel3.experimental.hierarchy.{instantiable, public, Instance, Instantiate} import chisel3.util._ import chisel3.util.experimental.decode.TruthTable -import chisel3.properties.{Path, AnyClassType, Property} +import chisel3.properties.{AnyClassType, Path, Property} import hardfloat._ import org.chipsalliance.stdlib.GeneralOM import upickle.default @@ -52,7 +52,7 @@ class FloatAdder(val parameter: FloatAdderParameter) val latency = parameter.latency val omInstance: Instance[FloatAdderOM] = Instantiate(new FloatAdderOM(parameter)) - io.om := omInstance.getPropertyReference + io.om := omInstance.getPropertyReference omInstance.retimeIn.foreach(_ := Property(Path(io.clock))) val addRecFN = Module(new AddRecFN(expWidth, sigWidth)) diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index 0f55ccd3e..e2968cf34 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -276,7 +276,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ /** V0 update in the lane should also update [[T1.v0]] */ @public - val v0Update: ValidIO[V0Update] = IO(Valid(new V0Update(parameter))) + val v0Update: ValidIO[V0Update] = IO(Valid(new V0Update(parameter.datapathWidth, parameter.vrfOffsetBits))) /** input of mask data */ @public @@ -371,33 +371,33 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ // todo: mv to bundle.scala class MaskControl(parameter: LaneParameter) extends Bundle { - val index: UInt = UInt(parameter.instructionIndexBits.W) - val sew: UInt = UInt(2.W) - val maskData: UInt = UInt(parameter.datapathWidth.W) - val group: UInt = UInt(parameter.maskGroupSizeBits.W) - val dataValid: Bool = Bool() + val index: UInt = UInt(parameter.instructionIndexBits.W) + val sew: UInt = UInt(2.W) + val maskData: UInt = UInt(parameter.datapathWidth.W) + val group: UInt = UInt(parameter.maskGroupSizeBits.W) + val dataValid: Bool = Bool() val waiteResponse: Bool = Bool() - val controlValid: Bool = Bool() + val controlValid: Bool = Bool() } val maskControlRelease: Vec[ValidIO[UInt]] = Wire(Vec(parameter.chainingSize, Valid(UInt(parameter.instructionIndexBits.W)))) - val maskControlEnq: UInt = Wire(UInt(parameter.chainingSize.W)) - val maskControlDataDeq: UInt = Wire(UInt(parameter.chainingSize.W)) - val maskControlReq: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool())) - val maskControlReqSelect: UInt = ffo(maskControlReq.asUInt) + val maskControlEnq: UInt = Wire(UInt(parameter.chainingSize.W)) + val maskControlDataDeq: UInt = Wire(UInt(parameter.chainingSize.W)) + val maskControlReq: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool())) + val maskControlReqSelect: UInt = ffo(maskControlReq.asUInt) // mask request & response handle - val maskControlVec: Seq[MaskControl] = Seq.tabulate(parameter.chainingSize) { index => + val maskControlVec: Seq[MaskControl] = Seq.tabulate(parameter.chainingSize) { index => val state = RegInit(0.U.asTypeOf(new MaskControl(parameter))) val releaseHit: Bool = maskControlRelease.map(r => r.valid && (r.bits === state.index)).reduce(_ || _) val responseFire = Pipe(maskControlReqSelect(index), 0.U.asTypeOf(new EmptyBundle), parameter.maskRequestLatency).valid when(maskControlEnq(index)) { - state := 0.U.asTypeOf(state) - state.index := laneRequest.bits.instructionIndex - state.sew := laneRequest.bits.csrInterface.vSew + state := 0.U.asTypeOf(state) + state.index := laneRequest.bits.instructionIndex + state.sew := laneRequest.bits.csrInterface.vSew state.controlValid := true.B } @@ -410,13 +410,13 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ maskControlReq(index) := state.controlValid && !state.dataValid && !state.waiteResponse when(maskControlReqSelect(index)) { state.waiteResponse := true.B - state.group := state.group + 1.U + state.group := state.group + 1.U } when(responseFire) { - state.dataValid := true.B + state.dataValid := true.B state.waiteResponse := false.B - state.maskData := maskInput + state.maskData := maskInput } when(maskControlDataDeq(index)) { @@ -425,8 +425,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ state } - val maskControlFree: Seq[Bool] = maskControlVec.map(s => !s.controlValid && !s.waiteResponse) - val freeSelect: UInt = ffo(VecInit(maskControlFree).asUInt) + val maskControlFree: Seq[Bool] = maskControlVec.map(s => !s.controlValid && !s.waiteResponse) + val freeSelect: UInt = ffo(VecInit(maskControlFree).asUInt) maskControlEnq := maskAnd(laneRequest.fire && laneRequest.bits.mask, freeSelect) /** for each slot, assert when it is asking [[T1]] to change mask */ @@ -439,7 +439,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ /** which slot wins the arbitration for requesting mask. */ val maskRequestFireOH: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool())) - val maskDataVec: Vec[UInt] = Wire(Vec(parameter.chainingSize, UInt(parameter.maskGroupWidth.W))) + val maskDataVec: Vec[UInt] = Wire(Vec(parameter.chainingSize, UInt(parameter.maskGroupWidth.W))) /** FSM control for each slot. if index == 0, * - slot can support write v0 in mask type, see [[Decoder.maskDestination]] [[Decoder.maskSource]] @@ -663,14 +663,14 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ } maskControlRelease(index).valid := false.B - maskControlRelease(index).bits := record.laneRequest.instructionIndex + maskControlRelease(index).bits := record.laneRequest.instructionIndex // update lane state when(stage0.enqueue.fire) { maskGroupCountVec(index) := stage0.updateLaneState.maskGroupCount // todo: handle all elements in first group are masked maskIndexVec(index) := stage0.updateLaneState.maskIndex when(stage0.updateLaneState.outOfExecutionRange) { - slotOccupied(index) := false.B + slotOccupied(index) := false.B maskControlRelease(index).valid := true.B } } @@ -962,19 +962,19 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ } { - maskSelect := Mux1H(maskControlReqSelect, maskControlVec.map(_.group)) - maskSelectSew := Mux1H(maskControlReqSelect, maskControlVec.map(_.sew)) + maskSelect := Mux1H(maskControlReqSelect, maskControlVec.map(_.group)) + maskSelectSew := Mux1H(maskControlReqSelect, maskControlVec.map(_.sew)) maskControlDataDeq := slotMaskRequestVec.zipWithIndex.map { case (req, index) => - val slotIndex = slotControl(index).laneRequest.instructionIndex - val hitMaskControl = VecInit(maskControlVec.map(_.index === slotIndex)).asUInt - val dataValid = Mux1H(hitMaskControl, maskControlVec.map(_.dataValid)) - val data = Mux1H(hitMaskControl, maskControlVec.map(_.maskData)) - val group = Mux1H(hitMaskControl, maskControlVec.map(_.group)) - val sameGroup = group === req.bits + val slotIndex = slotControl(index).laneRequest.instructionIndex + val hitMaskControl = VecInit(maskControlVec.map(_.index === slotIndex)).asUInt + val dataValid = Mux1H(hitMaskControl, maskControlVec.map(_.dataValid)) + val data = Mux1H(hitMaskControl, maskControlVec.map(_.maskData)) + val group = Mux1H(hitMaskControl, maskControlVec.map(_.group)) + val sameGroup = group === req.bits dontTouch(sameGroup) val maskRequestFire = req.valid && dataValid maskRequestFireOH(index) := maskRequestFire - maskDataVec(index) := data + maskDataVec(index) := data maskAnd(maskRequestFire, hitMaskControl).asUInt }.reduce(_ | _) } diff --git a/t1/src/LaneAdder.scala b/t1/src/LaneAdder.scala index 079164aef..0bf09f83b 100644 --- a/t1/src/LaneAdder.scala +++ b/t1/src/LaneAdder.scala @@ -4,7 +4,7 @@ package org.chipsalliance.t1.rtl import chisel3._ -import chisel3.experimental.hierarchy.{Instance, instantiable, Instantiate} +import chisel3.experimental.hierarchy.{instantiable, Instance, Instantiate} import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.properties.{Path, Property} import chisel3.util._ @@ -54,21 +54,19 @@ class LaneAdderOM(parameter: LaneAdderParam) extends GeneralOM[LaneAdderParam, L * 1. 判断大小的结果 */ @instantiable -class LaneAdder(val parameter: LaneAdderParam) - extends VFUModule - with SerializableModule[LaneAdderParam] { +class LaneAdder(val parameter: LaneAdderParam) extends VFUModule with SerializableModule[LaneAdderParam] { override val omInstance: Instance[LaneAdderOM] = Instantiate(new LaneAdderOM(parameter)) omInstance.retimeIn.foreach(_ := Property(Path(clock))) - val response: LaneAdderResp = Wire(new LaneAdderResp(parameter.datapathWidth)) - val request: LaneAdderReq = connectIO(response).asTypeOf(parameter.inputBundle) + val response: LaneAdderResp = Wire(new LaneAdderResp(parameter.datapathWidth)) + val request: LaneAdderReq = connectIO(response).asTypeOf(parameter.inputBundle) // todo: decode // ["add", "sub", "slt", "sle", "sgt", "sge", "max", "min", "seq", "sne", "adc", "sbc"] - val uopOH: UInt = UIntToOH(request.opcode)(11, 0) - val isSub: Bool = !(uopOH(0) || uopOH(10)) + val uopOH: UInt = UIntToOH(request.opcode)(11, 0) + val isSub: Bool = !(uopOH(0) || uopOH(10)) // sub -> src(1) - src(0) - val subOperation0: UInt = Mux(isSub && !request.reverse, (~request.src.head).asUInt, request.src.head) - val subOperation1: UInt = Mux(isSub && request.reverse, (~request.src.last).asUInt, request.src.last) + val subOperation0: UInt = Mux(isSub && !request.reverse, (~request.src.head).asUInt, request.src.head) + val subOperation1: UInt = Mux(isSub && request.reverse, (~request.src.last).asUInt, request.src.last) // sub + 1 || carry || borrow val operation2 = Fill(4, isSub) ^ request.mask val vSewOrR: Bool = request.vSew.orR diff --git a/t1/src/LaneDiv.scala b/t1/src/LaneDiv.scala index 98927da38..a49ae38dc 100644 --- a/t1/src/LaneDiv.scala +++ b/t1/src/LaneDiv.scala @@ -4,9 +4,9 @@ package org.chipsalliance.t1.rtl import chisel3._ -import chisel3.experimental.hierarchy.{public, Instance, instantiable, Instantiate} +import chisel3.experimental.hierarchy.{instantiable, public, Instance, Instantiate} import chisel3.experimental.{SerializableModule, SerializableModuleParameter} -import chisel3.properties.{Property, Path} +import chisel3.properties.{Path, Property} import chisel3.util._ import division.srt.{SRT, SRTOutput} import org.chipsalliance.stdlib.GeneralOM @@ -43,10 +43,10 @@ class LaneDivOM(parameter: LaneDivParam) extends GeneralOM[LaneDivParam, LaneDiv @instantiable class LaneDiv(val parameter: LaneDivParam) extends VFUModule with SerializableModule[LaneDivParam] { - val omInstance: Instance[LaneDivOM] = Instantiate(new LaneDivOM(parameter)) - val response: LaneDivResponse = Wire(new LaneDivResponse(parameter.datapathWidth)) - val responseValid: Bool = Wire(Bool()) - val request: LaneDivRequest = connectIO(response, responseValid).asTypeOf(parameter.inputBundle) + val omInstance: Instance[LaneDivOM] = Instantiate(new LaneDivOM(parameter)) + val response: LaneDivResponse = Wire(new LaneDivResponse(parameter.datapathWidth)) + val responseValid: Bool = Wire(Bool()) + val request: LaneDivRequest = connectIO(response, responseValid).asTypeOf(parameter.inputBundle) val wrapper = Instantiate(new SRTWrapper) wrapper.input.bits.dividend := request.src.last.asSInt diff --git a/t1/src/LaneDivFP.scala b/t1/src/LaneDivFP.scala index 76d2c349c..05fd95abd 100644 --- a/t1/src/LaneDivFP.scala +++ b/t1/src/LaneDivFP.scala @@ -4,7 +4,7 @@ package org.chipsalliance.t1.rtl import chisel3._ -import chisel3.experimental.hierarchy.{public, Instance, instantiable, Instantiate} +import chisel3.experimental.hierarchy.{instantiable, public, Instance, Instantiate} import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.util._ import float._ @@ -46,10 +46,10 @@ class LaneDivFPOM(parameter: LaneDivFPParam) extends GeneralOM[LaneDivFPParam, L @instantiable class LaneDivFP(val parameter: LaneDivFPParam) extends VFUModule with SerializableModule[LaneDivFPParam] { - val omInstance: Instance[LaneDivFPOM] = Instantiate(new LaneDivFPOM(parameter)) - val response: LaneDivFPResponse = Wire(new LaneDivFPResponse(parameter.datapathWidth)) - val responseValid: Bool = Wire(Bool()) - val request: LaneDivFPRequest = connectIO(response, responseValid).asTypeOf(parameter.inputBundle) + val omInstance: Instance[LaneDivFPOM] = Instantiate(new LaneDivFPOM(parameter)) + val response: LaneDivFPResponse = Wire(new LaneDivFPResponse(parameter.datapathWidth)) + val responseValid: Bool = Wire(Bool()) + val request: LaneDivFPRequest = connectIO(response, responseValid).asTypeOf(parameter.inputBundle) val uop = request.opcode diff --git a/t1/src/LaneFloat.scala b/t1/src/LaneFloat.scala index 7acd890fb..ca0cf4f4f 100644 --- a/t1/src/LaneFloat.scala +++ b/t1/src/LaneFloat.scala @@ -3,7 +3,7 @@ package org.chipsalliance.t1.rtl -import chisel3.experimental.hierarchy.{Instance, instantiable, Instantiate} +import chisel3.experimental.hierarchy.{instantiable, Instance, Instantiate} import chisel3.{UInt, _} import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.util._ @@ -57,7 +57,7 @@ class LaneFloatResponse(datapathWidth: Int) extends VFUPipeBundle { val executeIndex: UInt = UInt(2.W) } -class LaneFloatOM(parameter: LaneFloatParam) extends GeneralOM[LaneFloatParam, LaneFloat](parameter){ +class LaneFloatOM(parameter: LaneFloatParam) extends GeneralOM[LaneFloatParam, LaneFloat](parameter) { override def hasRetime: Boolean = true } @@ -282,7 +282,11 @@ class LaneFloat(val parameter: LaneFloatParam) extends VFUModule with Serializab ) ) - otherFlags := Mux(rec7En, rec7Module.io.out.exceptionFlags, Mux(rsqrt7En, rsqrt7Module.io.out.exceptionFlags, convertFlags)) + otherFlags := Mux( + rec7En, + rec7Module.io.out.exceptionFlags, + Mux(rsqrt7En, rsqrt7Module.io.out.exceptionFlags, convertFlags) + ) /** collect results */ result := Mux1H( diff --git a/t1/src/LaneLogic.scala b/t1/src/LaneLogic.scala index 4027e555f..a34641997 100644 --- a/t1/src/LaneLogic.scala +++ b/t1/src/LaneLogic.scala @@ -5,7 +5,7 @@ package org.chipsalliance.t1.rtl import chisel3._ import chisel3.experimental.{SerializableModule, SerializableModuleParameter} -import chisel3.experimental.hierarchy.{Instance, instantiable, Instantiate} +import chisel3.experimental.hierarchy.{instantiable, Instance, Instantiate} import chisel3.util.BitPat import chisel3.util.experimental.decode.TruthTable import org.chipsalliance.t1.rtl.decoder.TableGenerator diff --git a/t1/src/LaneMul.scala b/t1/src/LaneMul.scala index 51948ede2..3cf4557de 100644 --- a/t1/src/LaneMul.scala +++ b/t1/src/LaneMul.scala @@ -4,9 +4,9 @@ package org.chipsalliance.t1.rtl import chisel3._ -import chisel3.experimental.hierarchy.{Instance, instantiable, Instantiate} +import chisel3.experimental.hierarchy.{instantiable, Instance, Instantiate} import chisel3.experimental.{SerializableModule, SerializableModuleParameter} -import chisel3.properties.{Property, Path} +import chisel3.properties.{Path, Property} import chisel3.util._ import org.chipsalliance.stdlib.GeneralOM import org.chipsalliance.t1.rtl.decoder.{BoolField, Decoder} @@ -42,7 +42,7 @@ class LaneMulResponse(parameter: LaneMulParam) extends VFUPipeBundle { val vxsat: Bool = Bool() } -class LaneMulOM(parameter: LaneMulParam) extends GeneralOM[LaneMulParam, LaneMul](parameter){ +class LaneMulOM(parameter: LaneMulParam) extends GeneralOM[LaneMulParam, LaneMul](parameter) { override def hasRetime: Boolean = true } diff --git a/t1/src/LaneShifter.scala b/t1/src/LaneShifter.scala index 31dcee9de..d0012ca5b 100644 --- a/t1/src/LaneShifter.scala +++ b/t1/src/LaneShifter.scala @@ -4,7 +4,7 @@ package org.chipsalliance.t1.rtl import chisel3._ -import chisel3.experimental.hierarchy.{instantiable, Instantiate, Instance} +import chisel3.experimental.hierarchy.{instantiable, Instance, Instantiate} import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.properties.{Path, Property} import chisel3.util._ @@ -40,9 +40,7 @@ class LaneShifterOM(parameter: LaneShifterParameter) extends GeneralOM[LaneShift } @instantiable -class LaneShifter(val parameter: LaneShifterParameter) - extends VFUModule - with SerializableModule[LaneShifterParameter] { +class LaneShifter(val parameter: LaneShifterParameter) extends VFUModule with SerializableModule[LaneShifterParameter] { val omInstance: Instance[LaneShifterOM] = Instantiate(new LaneShifterOM(parameter)) omInstance.retimeIn.foreach(_ := Property(Path(clock))) diff --git a/t1/src/LaneZvbb.scala b/t1/src/LaneZvbb.scala index 7befb3af4..2043f6fef 100644 --- a/t1/src/LaneZvbb.scala +++ b/t1/src/LaneZvbb.scala @@ -3,7 +3,7 @@ package org.chipsalliance.t1.rtl -import chisel3.experimental.hierarchy.{instantiable, Instantiate, Instance} +import chisel3.experimental.hierarchy.{instantiable, Instance, Instantiate} import chisel3._ import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.properties.{Path, Property} diff --git a/t1/src/MaskedLogic.scala b/t1/src/MaskedLogic.scala index 712c14b8d..08008e3eb 100644 --- a/t1/src/MaskedLogic.scala +++ b/t1/src/MaskedLogic.scala @@ -4,13 +4,13 @@ package org.chipsalliance.t1.rtl import chisel3._ -import chisel3.experimental.hierarchy.{Instance, instantiable, Instantiate} +import chisel3.experimental.hierarchy.{instantiable, Instance, Instantiate} import chisel3.experimental.{SerializableModule, SerializableModuleParameter} -import chisel3.properties.{Property, Path} +import chisel3.properties.{Path, Property} import chisel3.util.BitPat import chisel3.util.experimental.decode.TruthTable import org.chipsalliance.stdlib.GeneralOM -import org.chipsalliance.t1.rtl.decoder.{BoolField, TableGenerator, Decoder} +import org.chipsalliance.t1.rtl.decoder.{BoolField, Decoder, TableGenerator} object LogicParam { implicit def rw: upickle.default.ReadWriter[LogicParam] = upickle.default.macroRW diff --git a/t1/src/OtherUnit.scala b/t1/src/OtherUnit.scala index 35458453b..dd92d056d 100644 --- a/t1/src/OtherUnit.scala +++ b/t1/src/OtherUnit.scala @@ -4,9 +4,9 @@ package org.chipsalliance.t1.rtl import chisel3._ -import chisel3.experimental.hierarchy.{Instance, instantiable, Instantiate} +import chisel3.experimental.hierarchy.{instantiable, Instance, Instantiate} import chisel3.experimental.{SerializableModule, SerializableModuleParameter} -import chisel3.properties.{Property, Path} +import chisel3.properties.{Path, Property} import chisel3.util._ import org.chipsalliance.stdlib.GeneralOM import org.chipsalliance.t1.rtl.decoder.{BoolField, Decoder} @@ -59,9 +59,7 @@ class OtherUnitOM(parameter: OtherUnitParam) extends GeneralOM[OtherUnitParam, O } @instantiable -class OtherUnit(val parameter: OtherUnitParam) - extends VFUModule - with SerializableModule[OtherUnitParam] { +class OtherUnit(val parameter: OtherUnitParam) extends VFUModule with SerializableModule[OtherUnitParam] { val omInstance: Instance[OtherUnitOM] = Instantiate(new OtherUnitOM(parameter)) omInstance.retimeIn.foreach(_ := Property(Path(clock))) diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 0418d056a..0d71bf6a5 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -777,6 +777,7 @@ class T1(val parameter: T1Parameter) maskUnit.io.laneMaskSelect(index) := Pipe(true.B, lane.maskSelect, parameter.maskRequestLatency).bits maskUnit.io.laneMaskSewSelect(index) := Pipe(true.B, lane.maskSelectSew, parameter.maskRequestLatency).bits maskUnit.io.v0UpdateVec(index) <> lane.v0Update + lsu.v0UpdateVec(index) <> lane.v0Update lane.lsuLastReport := lsu.lastReport | maskUnit.io.lastReport @@ -811,12 +812,10 @@ class T1(val parameter: T1Parameter) lsu.request.bits.instructionInformation.isStore := isStoreType lsu.request.bits.instructionInformation.maskedLoadStore := maskType - maskUnit.io.lsuMaskSelect := lsu.maskSelect - lsu.maskInput := maskUnit.io.lsuMaskInput - lsu.csrInterface := requestRegCSR - lsu.csrInterface.vl := evlForLsu - lsu.writeReadyForLsu := VecInit(laneVec.map(_.writeReadyForLsu)).asUInt.andR - lsu.vrfReadyToStore := VecInit(laneVec.map(_.vrfReadyToStore)).asUInt.andR + lsu.csrInterface := requestRegCSR + lsu.csrInterface.vl := evlForLsu + lsu.writeReadyForLsu := VecInit(laneVec.map(_.writeReadyForLsu)).asUInt.andR + lsu.vrfReadyToStore := VecInit(laneVec.map(_.vrfReadyToStore)).asUInt.andR // connect mask unit maskUnit.io.instReq.valid := requestRegDequeue.fire && requestReg.bits.decodeResult(Decoder.maskUnit) diff --git a/t1/src/VectorFunctionUnit.scala b/t1/src/VectorFunctionUnit.scala index f6a2e85f7..69fc35efe 100644 --- a/t1/src/VectorFunctionUnit.scala +++ b/t1/src/VectorFunctionUnit.scala @@ -30,7 +30,7 @@ class VFUPipeBundle extends Bundle { @instantiable abstract class VFUModule extends Module { - val parameter: VFUParameter + val parameter: VFUParameter val omInstance: Instance[GeneralOM[_, _]] @public val om: Property[ClassType] = IO(Output(Property[AnyClassType]())) @@ -38,7 +38,7 @@ abstract class VFUModule extends Module { val requestIO: DecoupledIO[VFUPipeBundle] = IO(Flipped(Decoupled(parameter.inputBundle))) @public val responseIO: DecoupledIO[VFUPipeBundle] = IO(Decoupled(parameter.outputBundle)) - atModuleBodyEnd{ + atModuleBodyEnd { om := omInstance.getPropertyReference.asAnyClassType } @@ -546,7 +546,7 @@ case class VFUInstantiateParameter( genVec.foreach { case (_, connect) => connect.foreach(connectIndex => require(connectIndex < slotCount)) } - val maxLatency: Int = genVec.map(_._1.parameter.latency).max + val maxLatency: Int = genVec.map(_._1.parameter.latency).max } class SlotExecuteRequest[T <: SlotRequestToVFU](requestFromSlot: T)(slotIndex: Int, parameter: VFUInstantiateParameter) diff --git a/t1/src/decoder/Decoder.scala b/t1/src/decoder/Decoder.scala index 9221cb0e8..d9fd61a29 100644 --- a/t1/src/decoder/Decoder.scala +++ b/t1/src/decoder/Decoder.scala @@ -15,7 +15,8 @@ object DecoderParam { implicit def rwP: upickle.default.ReadWriter[DecoderParam] = upickle.default.macroRW } -case class DecoderParam(fpuEnable: Boolean, zvbbEnable: Boolean, allInstructions: Seq[Instruction]) extends SerializableModuleParameter +case class DecoderParam(fpuEnable: Boolean, zvbbEnable: Boolean, allInstructions: Seq[Instruction]) + extends SerializableModuleParameter trait T1DecodeFiled[D <: Data] extends DecodeField[T1DecodePattern, D] with FieldName diff --git a/t1/src/lsu/LSU.scala b/t1/src/lsu/LSU.scala index b7bd81202..cd29f90be 100644 --- a/t1/src/lsu/LSU.scala +++ b/t1/src/lsu/LSU.scala @@ -118,15 +118,10 @@ class LSU(param: LSUParameter) extends Module { @public val request: DecoupledIO[LSURequest] = IO(Flipped(Decoupled(new LSURequest(param.datapathWidth)))) - /** mask from [[V]] TODO: since mask is one-cycle information for a mask group, we should latch it in the LSU, and - * reduce the IO width. this needs PnR information. - */ - @public - val maskInput: Vec[UInt] = IO(Input(Vec(param.lsuMSHRSize, UInt(param.maskGroupWidth.W)))) - - /** the address of the mask group in the [[V]]. */ @public - val maskSelect: Vec[UInt] = IO(Output(Vec(param.lsuMSHRSize, UInt(param.maskGroupSizeBits.W)))) + val v0UpdateVec: Vec[ValidIO[V0Update]] = IO( + Flipped(Vec(param.laneNumber, Valid(new V0Update(param.datapathWidth, param.vrfOffsetBits)))) + ) @public val axi4Port: AXI4RWIrrevocable = IO(new AXI4RWIrrevocable(param.axi4BundleParameter)) @@ -197,6 +192,25 @@ class LSU(param: LSUParameter) extends Module { val storeUnit: StoreUnit = Module(new StoreUnit(param.mshrParam)) val otherUnit: SimpleAccessUnit = Module(new SimpleAccessUnit(param.mshrParam)) + /** duplicate v0 in lsu */ + val v0: Vec[UInt] = RegInit( + VecInit(Seq.fill(param.vLen / param.datapathWidth)(0.U(param.datapathWidth.W))) + ) + + // write v0(mask) + v0.zipWithIndex.foreach { case (data, index) => + // 属于哪个lane + val laneIndex: Int = index % param.laneNumber + // 取出写的端口 + val v0Write = v0UpdateVec(laneIndex) + // offset + val offset: Int = index / param.laneNumber + val maskExt = FillInterleaved(8, v0Write.bits.mask) + when(v0Write.valid && v0Write.bits.offset === offset.U) { + data := (data & (~maskExt).asUInt) | (maskExt & v0Write.bits.data) + } + } + val unitVec = Seq(loadUnit, storeUnit, otherUnit) /** Always merge into cache line */ @@ -222,8 +236,8 @@ class LSU(param: LSUParameter) extends Module { mshr.lsuRequest.valid := reqEnq(index) mshr.lsuRequest.bits := request.bits - maskSelect(index) := Mux(mshr.maskSelect.valid, mshr.maskSelect.bits, 0.U) - mshr.maskInput := maskInput(index) + val maskSelect = Mux(mshr.maskSelect.valid, mshr.maskSelect.bits, 0.U) + mshr.maskInput := cutUInt(v0.asUInt, param.maskGroupWidth)(maskSelect) // broadcast CSR mshr.csrInterface := csrInterface diff --git a/t1/src/mask/MaskReduce.scala b/t1/src/mask/MaskReduce.scala index 3dd6fb0d3..169a62684 100644 --- a/t1/src/mask/MaskReduce.scala +++ b/t1/src/mask/MaskReduce.scala @@ -6,7 +6,7 @@ package org.chipsalliance.t1.rtl import chisel3._ import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.properties.{AnyClassType, Class, Property} -import chisel3.experimental.hierarchy.{public, Instance, instantiable, Instantiate} +import chisel3.experimental.hierarchy.{instantiable, public, Instance, Instantiate} import chisel3.util._ import org.chipsalliance.stdlib.GeneralOM diff --git a/t1/src/mask/MaskUnit.scala b/t1/src/mask/MaskUnit.scala index 10ab82da1..9b4c286ad 100644 --- a/t1/src/mask/MaskUnit.scala +++ b/t1/src/mask/MaskUnit.scala @@ -68,12 +68,17 @@ class MaskUnitInterface(parameter: T1Parameter) extends Bundle { val readResult = Flipped(Vec(parameter.laneNumber, Valid(UInt(parameter.datapathWidth.W)))) val writeRD = Valid(UInt(parameter.datapathWidth.W)) val lastReport = Output(UInt((2 * parameter.chainingSize).W)) - val lsuMaskInput = Output(Vec(parameter.lsuMSHRSize, UInt(parameter.maskGroupWidth.W))) - val lsuMaskSelect = Input(Vec(parameter.lsuMSHRSize, UInt(parameter.lsuParameters.maskGroupSizeBits.W))) val laneMaskInput = Output(Vec(parameter.laneNumber, UInt(parameter.datapathWidth.W))) val laneMaskSelect = Input(Vec(parameter.laneNumber, UInt(parameter.laneParam.maskGroupSizeBits.W))) val laneMaskSewSelect = Input(Vec(parameter.laneNumber, UInt(2.W))) - val v0UpdateVec = Flipped(Vec(parameter.laneNumber, Valid(new V0Update(parameter.laneParam)))) + val v0UpdateVec = Flipped( + Vec( + parameter.laneNumber, + Valid( + new V0Update(parameter.laneParam.datapathWidth, parameter.laneParam.vrfOffsetBits) + ) + ) + ) val writeRDData = Output(UInt(parameter.xLen.W)) val gatherData = Decoupled(UInt(parameter.xLen.W)) val gatherRead = Input(Bool()) @@ -114,8 +119,6 @@ class MaskUnit(val parameter: T1Parameter) val readResult = io.readResult val writeRD = io.writeRD val lastReport = io.lastReport - val lsuMaskInput = io.lsuMaskInput - val lsuMaskSelect = io.lsuMaskSelect val laneMaskInput = io.laneMaskInput val laneMaskSelect = io.laneMaskSelect val laneMaskSewSelect = io.laneMaskSewSelect @@ -166,11 +169,6 @@ class MaskUnit(val parameter: T1Parameter) input := cutUInt(v0SelectBySew, parameter.datapathWidth)(laneMaskSelect(index)) } - // lsu - lsuMaskInput.zip(lsuMaskSelect).foreach { case (data, index) => - data := cutUInt(v0.asUInt, parameter.maskGroupWidth)(index) - } - val maskedWrite: BitLevelMaskWrite = Module(new BitLevelMaskWrite(parameter)) def gatherIndex(elementIndex: UInt, vlmul: UInt, sew: UInt): (UInt, UInt, UInt, UInt, Bool) = {