diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index eb0dd00ea..7df91efa9 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -818,14 +818,13 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ // cross write bus <> write queue crossLaneWriteQueue.zipWithIndex.foreach { case (queue, index) => - val port = writeBusPort(index) - // ((counter << 1) >> parameter.vrfParam.vrfOffsetBits).low(3) - val registerIncreaseBase = parameter.vrfParam.vrfOffsetBits - 1 + val port = writeBusPort(index) + val baseIndex = slotControl.head.laneRequest.vd ## 0.U(parameter.vrfOffsetBits.W) + val indexGrowth: UInt = changeUIntSize(port.enq.bits.counter ## index.U(1.W), port.enq.bits.counter.getWidth) + val finalIndex: UInt = baseIndex + indexGrowth queue.enq.valid := port.enq.valid - queue.enq.bits.vd := - // 3: 8 reg => log(2, 8) - slotControl.head.laneRequest.vd + port.enq.bits.counter(registerIncreaseBase + 3 - 1, registerIncreaseBase) - queue.enq.bits.offset := port.enq.bits.counter ## index.U(1.W) + queue.enq.bits.vd := finalIndex >> parameter.vrfOffsetBits + queue.enq.bits.offset := finalIndex queue.enq.bits.data := port.enq.bits.data queue.enq.bits.last := DontCare queue.enq.bits.instructionIndex := port.enq.bits.instructionIndex diff --git a/t1/src/VectorFunctionUnit.scala b/t1/src/VectorFunctionUnit.scala index 9450a11d2..4c16022b0 100644 --- a/t1/src/VectorFunctionUnit.scala +++ b/t1/src/VectorFunctionUnit.scala @@ -132,6 +132,13 @@ object VFUInstantiateParameter { case (false, true) => VFUInstantiateParameter.zvbb(vLen, dLen) case (true, true) => VFUInstantiateParameter.zvbbFP(vLen, dLen) } + case "huge" => + (fp, zvbb) match { + case (false, false) => VFUInstantiateParameter.smallInt(vLen, dLen) + case (true, false) => VFUInstantiateParameter.hugeFP(vLen, dLen) + case (false, true) => VFUInstantiateParameter.zvbb(vLen, dLen) + case (true, true) => VFUInstantiateParameter.zvbbFP(vLen, dLen) + } } // instantiate each module and connect to all scoreboards diff --git a/t1/src/laneStage/LaneExecutionBridge.scala b/t1/src/laneStage/LaneExecutionBridge.scala index e1456f7db..0883ca8fe 100644 --- a/t1/src/laneStage/LaneExecutionBridge.scala +++ b/t1/src/laneStage/LaneExecutionBridge.scala @@ -411,7 +411,7 @@ class LaneExecutionBridge(parameter: LaneParameter, isLastSlot: Boolean, slotInd maskResult(1, 0) << (recordQueue.deq.bits.groupCounter(3, 0) ## false.B), // 1 bit per data group, it will had 32 data groups -> executeIndex1H << 1 * groupCounter(4, 0) - maskResult(0) << recordQueue.deq.bits.groupCounter(4, 0) + maskResult(0) << recordQueue.deq.bits.groupCounter(4.min(parameter.groupNumberBits - 1), 0) ) ).asUInt diff --git a/t1/src/laneStage/LaneStage1.scala b/t1/src/laneStage/LaneStage1.scala index 5b5809080..ce1836583 100644 --- a/t1/src/laneStage/LaneStage1.scala +++ b/t1/src/laneStage/LaneStage1.scala @@ -212,27 +212,35 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { // cross read enqueue queueBeforeCheckLSB.foreach { q => - q.enq.bits.vs := Mux( + val baseVs = Mux( enqueue.bits.decodeResult(Decoder.vwmacc), // cross read vd for vwmacc, since it need dual [[dataPathWidth]], use vs2 port to read LSB part of it. enqueue.bits.vd, // read vs2 for other instruction enqueue.bits.vs2 - ) + groupCounter(parameter.groupNumberBits - 2, parameter.vrfOffsetBits - 1) + ) + val baseIndex = baseVs ## 0.U(parameter.vrfOffsetBits.W) + val indexGrowth: UInt = changeUIntSize(groupCounter ## false.B, groupCounter.getWidth) + val finalIndex: UInt = baseIndex + indexGrowth + q.enq.bits.vs := finalIndex >> parameter.vrfOffsetBits q.enq.bits.readSource := Mux(enqueue.bits.decodeResult(Decoder.vwmacc), 2.U, 1.U) - q.enq.bits.offset := groupCounter(parameter.vrfOffsetBits - 2, 0) ## false.B + q.enq.bits.offset := finalIndex } queueBeforeCheckMSB.foreach { q => - q.enq.bits.vs := Mux( + val baseVs = Mux( enqueue.bits.decodeResult(Decoder.vwmacc), // cross read vd for vwmacc enqueue.bits.vd, // cross lane access use vs2 enqueue.bits.vs2 - ) + groupCounter(parameter.groupNumberBits - 2, parameter.vrfOffsetBits - 1) + ) + val baseIndex = baseVs ## 0.U(parameter.vrfOffsetBits.W) + val indexGrowth: UInt = changeUIntSize(groupCounter ## true.B, groupCounter.getWidth) + val finalIndex: UInt = baseIndex + indexGrowth + q.enq.bits.vs := finalIndex >> parameter.vrfOffsetBits q.enq.bits.readSource := Mux(enqueue.bits.decodeResult(Decoder.vwmacc), 2.U, 1.U) - q.enq.bits.offset := groupCounter(parameter.vrfOffsetBits - 2, 0) ## true.B + q.enq.bits.offset := finalIndex } // read pipe diff --git a/t1/src/vrf/VRF.scala b/t1/src/vrf/VRF.scala index be019fc58..8edbaebfd 100644 --- a/t1/src/vrf/VRF.scala +++ b/t1/src/vrf/VRF.scala @@ -268,8 +268,9 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar val portFireCount: UInt = PopCount(VecInit(readRequests.map(_.fire) :+ write.fire)) dontTouch(portFireCount) - val writeBank: UInt = - if (parameter.rfBankNum == 1) true.B else UIntToOH(write.bits.offset(log2Ceil(parameter.rfBankNum) - 1, 0)) + val writeIndex: UInt = write.bits.vd ## write.bits.offset + val writeBank: UInt = + if (parameter.rfBankNum == 1) true.B else UIntToOH(writeIndex(log2Ceil(parameter.rfBankNum) - 1, 0)) // Add one more record slot to prevent there is no free slot when the instruction comes in // (the slot will die a few cycles later than the instruction) @@ -350,8 +351,9 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar .reduce(_ && _) && portConflictCheck } val validCorrect: Bool = if (i == (readRequests.size - 1)) v.valid && checkResult.get else v.valid + val address = v.bits.vs ## v.bits.offset // select bank - val bank = if (parameter.rfBankNum == 1) true.B else UIntToOH(v.bits.offset(log2Ceil(parameter.rfBankNum) - 1, 0)) + val bank = if (parameter.rfBankNum == 1) true.B else UIntToOH(address(log2Ceil(parameter.rfBankNum) - 1, 0)) val pipeBank = Pipe(true.B, bank, parameter.vrfReadLatency).bits val bankCorrect = Mux(validCorrect, bank, 0.U(parameter.rfBankNum.W)) val readPortCheckSelect = parameter.ramType match {