diff --git a/Makefile b/Makefile index a4ff22077..019a5fd26 100644 --- a/Makefile +++ b/Makefile @@ -113,7 +113,7 @@ CORECNTT:=$(shell lscpu | grep 'Core(s) per socket:') emulator: -mkdir -p $(HWBUILDDIR) $(MAKE) -C hardware verilog BOOTAPP=$(BOOTAPP) BOARD=$(BOARD) - -cd $(HWBUILDDIR) && verilator --cc ../harnessConfig.vlt Patmos.v --top-module Patmos +define+TOP_TYPE=VPatmos --threads 1 -CFLAGS "-Wno-undefined-bool-conversion -O1 -DTOP_TYPE=VPatmos -DVL_USER_FINISH -include VPatmos.h" -Mdir $(HWBUILDDIR) --exe ../Patmos-harness.cpp -LDFLAGS -lelf --trace-fst + -cd $(HWBUILDDIR) && verilator --cc ../harnessConfig.vlt Patmos.v --top-module Patmos +define+TOP_TYPE=VPatmos --threads 1 -CFLAGS "-Wno-undefined-bool-conversion -O1 -DTOP_TYPE=VPatmos -DVL_USER_FINISH -include VPatmos.h" -Wno-MULTIDRIVEN -Mdir $(HWBUILDDIR) --exe ../Patmos-harness.cpp -LDFLAGS -lelf --trace-fst -cd $(HWBUILDDIR) && make -j -f VPatmos.mk -cp $(HWBUILDDIR)/VPatmos $(HWBUILDDIR)/emulator -mkdir -p $(HWINSTALLDIR)/bin diff --git a/hardware/Makefile b/hardware/Makefile index a3fddb3fa..98ffaa078 100755 --- a/hardware/Makefile +++ b/hardware/Makefile @@ -97,6 +97,8 @@ view: verilog: $(HWBUILDDIR)/$(HWMODULEPREFIX)Patmos.v cd ../../argo && sbt "runMain argo.Argo" cp ../../argo/argo_build/*.v $(HWBUILDDIR) + cp verilog/AsyncArbiter.v $(HWBUILDDIR) + cp verilog/AsyncMutex.v $(HWBUILDDIR) modules: \ $(HWBUILDDIR)/$(HWMODULEPREFIX)PatmosCore.v \ diff --git a/hardware/src/main/scala/argo/Argo.scala b/hardware/src/main/scala/argo/Argo.scala index e27bed6fe..61e187045 100644 --- a/hardware/src/main/scala/argo/Argo.scala +++ b/hardware/src/main/scala/argo/Argo.scala @@ -24,7 +24,7 @@ class CmpArgoIO(corecnt : Int, val argoConf: ArgoConfig) extends CmpIO(corecnt : override val cores = Vec(corecnt, new OcpArgoSlavePort(ADDR_WIDTH, DATA_WIDTH, argoConf)).asInstanceOf[Vec[OcpCoreSlavePort]] } -class Argo(nrCores: Int, wrapped: Boolean = false, emulateBB: Boolean = false) extends Module { +class Argo(nrCores: Int, wrapped: Boolean = false, emulateBB: Boolean = false) extends CmpDevice(nrCores) { ArgoConfig.setCores(nrCores) val argoConf = ArgoConfig.getConfig val io = IO(new CmpArgoIO(argoConf.CORES, argoConf)) diff --git a/hardware/src/main/scala/argo/ArgoNoC.scala b/hardware/src/main/scala/argo/ArgoNoC.scala index 01ada1e6f..bb6736a12 100644 --- a/hardware/src/main/scala/argo/ArgoNoC.scala +++ b/hardware/src/main/scala/argo/ArgoNoC.scala @@ -28,6 +28,19 @@ class ArgoNoC(val argoConf: ArgoConfig, wrapped: Boolean = false, emulateBB: Boo val argoNodes = (0 until argoConf.M).map(j => (0 until argoConf.N).map(i => if (emulateBB) Module(new NoCNodeDummy(argoConf, i == 0 && j == 0)) else Module(new NoCNodeWrapper(argoConf, i == 0 && j == 0)))) + + if (!emulateBB) { + argoNodes.flatten.foreach(n => { + val node = n.asInstanceOf[NoCNodeWrapper] + Seq(node.io.north_in, node.io.east_in, node.io.south_in, node.io.west_in).foreach(p => { + p.f.req := false.B + }) + Seq(node.io.north_out, node.io.east_out, node.io.south_out, node.io.west_out).foreach(p => { + p.b.ack := false.B + }) + }) + } + val argoMesh = Wire(Vec(argoConf.M, Vec(argoConf.N, new NodeInterconnection(argoConf)))) /* * Nodes Port Interconnect diff --git a/hardware/src/main/scala/argo/NoCNodeWrapper.scala b/hardware/src/main/scala/argo/NoCNodeWrapper.scala index b65311b3a..cfce5ee71 100644 --- a/hardware/src/main/scala/argo/NoCNodeWrapper.scala +++ b/hardware/src/main/scala/argo/NoCNodeWrapper.scala @@ -44,7 +44,7 @@ class NoCNodeDummy(val argoConf: ArgoConfig, master: Boolean) extends Module { val respReg = RegInit(OcpResp.NULL) val acceptReg = RegInit(false.B) - acceptReg := (io.proc.M.Cmd === OcpCmd.WR) && ~acceptReg + acceptReg := (io.proc.M.Cmd === OcpCmd.WR) && !acceptReg when (io.proc.M.Cmd===OcpCmd.WR && acceptReg) { when(io.proc.M.Addr(15, 12) === 0.U){ diff --git a/hardware/src/main/scala/cmp/AsyncArbiterTree.scala b/hardware/src/main/scala/cmp/AsyncArbiterTree.scala index 05940f7ec..bcccd0ace 100644 --- a/hardware/src/main/scala/cmp/AsyncArbiterTree.scala +++ b/hardware/src/main/scala/cmp/AsyncArbiterTree.scala @@ -7,7 +7,8 @@ */ package cmp -import Chisel._ +import chisel3._ +import chisel3.util.HasBlackBoxResource class AsyncArbiterIO extends Bundle { @@ -19,45 +20,29 @@ class AsyncArbiterIO extends Bundle class AsyncArbiterTreeIO(cnt: Int) extends AsyncArbiterIO { - val cores = Vec(cnt, new AsyncArbiterIO().flip) + val cores = Vec(cnt, Flipped(new AsyncArbiterIO())) override def clone = new AsyncArbiterTreeIO(cnt).asInstanceOf[this.type] } -class AsyncArbiterBB() extends BlackBox { - val io = new AsyncArbiterIO() - { +class AsyncArbiter extends BlackBox { + val io = IO(new AsyncArbiterIO() { val req1 = Input(Bool()) val req2 = Input(Bool()) val ack1 = Output(Bool()) val ack2 = Output(Bool()) - } - //throw new Error("BlackBox wrapper for AsyncArbiter needs update for Chisel 3") - - // should be commented out to compile for chisel3 - // rename component - /*setModuleName("AsyncArbiter") - - renameClock(clock, "clk") - renameReset("rst") - - io.req.setName("req") - io.req1.setName("req1") - io.req2.setName("req2") - io.ack.setName("ack") - io.ack1.setName("ack1") - io.ack2.setName("ack2")*/ + }) } abstract class AsyncArbiterBase(corecnt: Int) extends Module { - val io = new AsyncArbiterTreeIO(corecnt) + val io = IO(new AsyncArbiterTreeIO(corecnt)) } class AsyncArbiterTree(corecnt : Int) extends AsyncArbiterBase(corecnt) { val leafarbiters = (0 until math.ceil(corecnt/2).toInt).map(i => { - val arbiter = Module(new AsyncArbiterBB()) + val arbiter = Module(new AsyncArbiter()) val idx = i*2 arbiter.io.req1 := io.cores(idx).req io.cores(idx).ack := arbiter.io.ack1 @@ -71,8 +56,8 @@ class AsyncArbiterTree(corecnt : Int) extends AsyncArbiterBase(corecnt) { - val genarbiter = new ((IndexedSeq[AsyncArbiterBB]) => AsyncArbiterBB){ - def apply(children:IndexedSeq[AsyncArbiterBB]):AsyncArbiterBB = + val genarbiter = new ((IndexedSeq[AsyncArbiter]) => AsyncArbiter){ + def apply(children:IndexedSeq[AsyncArbiter]):AsyncArbiter = { val len = children.count(e => true) println(len) @@ -90,7 +75,7 @@ class AsyncArbiterTree(corecnt : Int) extends AsyncArbiterBase(corecnt) { val child1 = _children._1 val child2 = _children._2 - val parent = Module(new AsyncArbiterBB()) + val parent = Module(new AsyncArbiter()) parent.io.req1 := child1.io.req child1.io.ack := parent.io.ack1 diff --git a/hardware/src/main/scala/cmp/AsyncLock.scala b/hardware/src/main/scala/cmp/AsyncLock.scala index 298eb3f42..28f500257 100644 --- a/hardware/src/main/scala/cmp/AsyncLock.scala +++ b/hardware/src/main/scala/cmp/AsyncLock.scala @@ -6,7 +6,8 @@ */ package cmp -import Chisel._ +import chisel3._ +import chisel3.util._ import ocp._ import patmos.Constants._ @@ -22,20 +23,8 @@ class AsyncMutexIO extends Bundle override def clone = new AsyncMutexIO().asInstanceOf[this.type] } -class AsyncMutexBB() extends BlackBox { +class AsyncMutex extends BlackBox { val io = new AsyncMutexIO() - //throw new Error("BlackBox wrapper for AsyncMuteX in AsyncLock.scala needs update for Chisel 3") - // rename component - // should be Commented out to compile for chisel3 - /*setModuleName("AsyncMutex") - - renameClock(clock, "clk") - renameReset("rst") - - io.req1.setName("req1") - io.req2.setName("req2") - io.gnt1.setName("gnt1") - io.gnt2.setName("gnt2")*/ } class AsyncArbiterMesh(corecnt: Int) extends AsyncArbiterBase(corecnt) { @@ -71,12 +60,11 @@ class AsyncArbiterMesh(corecnt: Int) extends AsyncArbiterBase(corecnt) { val seq = genset(IndexedSeq(), 1) - if(seq.isEmpty) - throwException("Should not happen!") + require(seq.nonEmpty, "Should not happen!") for(tup <- seq) { avail = avail.filter(e => e != tup) - val mutex = Module(new AsyncMutexBB()) + val mutex = Module(new AsyncMutex()) mutex.io.req1 := ins(tup._1) ins(tup._1) = mutex.io.gnt1 @@ -94,7 +82,7 @@ class AsyncArbiterMesh(corecnt: Int) extends AsyncArbiterBase(corecnt) { } } -class AsyncLock(corecnt: Int, lckcnt: Int, fair: Boolean = false) extends Module { +class AsyncLock(corecnt: Int, lckcnt: Int, fair: Boolean = false) extends CmpDevice(corecnt) { val arbiters = if(!fair) @@ -105,23 +93,22 @@ class AsyncLock(corecnt: Int, lckcnt: Int, fair: Boolean = false) extends Module arb }) else - (0 until lckcnt).map(i => Module(new AsyncArbiterMesh(corecnt))) + Seq.fill(lckcnt)(Module(new AsyncArbiterMesh(corecnt))) - val arbiterio = Vec(arbiters.map(e => e.io)) + val arbiterio = arbiters.map(_.io) val io = IO(new CmpIO(corecnt)) //Vec(corecnt,new OcpCoreSlavePort(ADDR_WIDTH, DATA_WIDTH)) for (i <- 0 until corecnt) { val addr = io.cores(i).M.Addr(log2Up(lckcnt)-1+2, 2) - val acks = Bits(width = lckcnt) - acks := 0.U - val blck = acks.orR + val acks = WireDefault(VecInit.fill(lckcnt)(false.B)) + val blck = acks.reduce(_ || _) for (j <- 0 until lckcnt) { - val reqReg = Reg(init = false.B) + val reqReg = RegInit(init = false.B) arbiterio(j).cores(i).req := reqReg - val ackReg = Reg(next = Reg(next = arbiterio(j).cores(i).ack)) + val ackReg = RegNext(next = RegNext(next = arbiterio(j).cores(i).ack)) acks(j) := ackReg =/= reqReg when(addr === j.U) { @@ -133,7 +120,7 @@ class AsyncLock(corecnt: Int, lckcnt: Int, fair: Boolean = false) extends Module } } - val dvaReg = Reg(init = false.B) + val dvaReg = RegInit(init = false.B) when(io.cores(i).M.Cmd =/= OcpCmd.IDLE) { dvaReg := true.B diff --git a/hardware/src/main/scala/cmp/CASPM.scala b/hardware/src/main/scala/cmp/CASPM.scala index 30afa3636..aa994a3e5 100644 --- a/hardware/src/main/scala/cmp/CASPM.scala +++ b/hardware/src/main/scala/cmp/CASPM.scala @@ -6,30 +6,32 @@ package cmp -import Chisel._ -import chisel3.VecInit +import chisel3._ +import chisel3.util._ import ocp._ import patmos.Constants._ import patmos._ class CASPM(corecnt: Int, size: Int) extends CmpDevice(corecnt) { + val io = IO(new CmpIO(corecnt)) + val spm = Module(new Spm(size)) val cntmax = 2.U - val precnt = Reg(init = 0.U(cntmax.getWidth.W)) + val precnt = RegInit(init = 0.U(cntmax.getWidth.W)) precnt := Mux(precnt === cntmax, 0.U, precnt + 1.U) - val cnt = Reg(init = 0.U(log2Up(corecnt).W)) + val cnt = RegInit(init = 0.U(log2Up(corecnt).W)) cnt := Mux(precnt =/= cntmax, cnt, Mux(cnt === (corecnt-1).U, 0.U, cnt + 1.U)) val cmdRegs = RegInit(VecInit(Seq.fill(corecnt)(OcpCmd.RD))) - val addrRegs = Reg(Vec(corecnt, spm.io.M.Addr)) - val newvalRegs = Reg(Vec(corecnt, spm.io.M.Data)) - val bytenRegs = Reg(Vec(corecnt, spm.io.M.ByteEn)) + val addrRegs = Reg(Vec(corecnt, chiselTypeOf(spm.io.M.Addr))) + val newvalRegs = Reg(Vec(corecnt, chiselTypeOf(spm.io.M.Data))) + val bytenRegs = Reg(Vec(corecnt, chiselTypeOf(spm.io.M.ByteEn))) - val expvalRegs = Reg(Vec(corecnt, spm.io.S.Data)) + val expvalRegs = Reg(Vec(corecnt, chiselTypeOf(spm.io.S.Data))) - val sIdle :: sRead :: sWrite :: Nil = Enum(UInt(),3) + val sIdle :: sRead :: sWrite :: Nil = Enum(3) val states = RegInit(VecInit(Seq.fill(corecnt)(sIdle))) spm.io.M.Cmd := cmdRegs(cnt) @@ -39,7 +41,7 @@ class CASPM(corecnt: Int, size: Int) extends CmpDevice(corecnt) { for (i <- 0 until corecnt) { - val respReg = Reg(io.cores(i).S.Resp, OcpResp.NULL) + val respReg = Reg(chiselTypeOf(io.cores(i).S.Resp)) io.cores(i).S.Resp := respReg io.cores(i).S.Data := spm.io.S.Data diff --git a/hardware/src/main/scala/cmp/CmpDevice.scala b/hardware/src/main/scala/cmp/CmpDevice.scala index 750d05184..2ccc6c8f9 100644 --- a/hardware/src/main/scala/cmp/CmpDevice.scala +++ b/hardware/src/main/scala/cmp/CmpDevice.scala @@ -8,5 +8,5 @@ import chisel3._ * @param cnt */ abstract class CmpDevice(cnt: Int) extends Module { - val io = IO(new CmpIO(cnt)) + val io: CmpIO } diff --git a/hardware/src/main/scala/cmp/CmpIO.scala b/hardware/src/main/scala/cmp/CmpIO.scala index 8fb410aea..db070f1bf 100644 --- a/hardware/src/main/scala/cmp/CmpIO.scala +++ b/hardware/src/main/scala/cmp/CmpIO.scala @@ -4,11 +4,6 @@ import chisel3._ import ocp.OcpCoreSlavePort import patmos.Constants.{ADDR_WIDTH, DATA_WIDTH} -class CmpIO(val corecnt: Int) extends Bundle with patmos.HasPins { +class CmpIO(val corecnt: Int) extends Bundle { val cores = Vec(corecnt, new OcpCoreSlavePort(ADDR_WIDTH, DATA_WIDTH)) - // TODO: just for now to move to Chisel 3.5, needs a btter fix in the future - override val pins = new Bundle { - val tx = Output(Bits(1.W)) - val rx = Input(Bits(1.W)) - } } diff --git a/hardware/src/main/scala/cmp/Hardlock.scala b/hardware/src/main/scala/cmp/Hardlock.scala index 97cd73e2a..0c79f3e76 100755 --- a/hardware/src/main/scala/cmp/Hardlock.scala +++ b/hardware/src/main/scala/cmp/Hardlock.scala @@ -9,8 +9,8 @@ */ package cmp -import Chisel._ -import chisel3.VecInit +import chisel3._ +import chisel3.util._ import patmos.Constants._ import ocp._ @@ -75,15 +75,14 @@ class Hardlock(coreCnt : Int,lckCnt : Int) extends AbstractHardlock(coreCnt, lck } class HardlockOCPWrapper(nrCores: Int, hardlockgen: () => AbstractHardlock) extends CmpDevice(nrCores) { + val io = IO(new CmpIO(nrCores)) + val hardlock = Module(hardlockgen()) - // TODO: workaround: - io.pins.tx := 0.U - // Mapping between internal io and OCP here - val reqReg = Reg(init = Bits(0,hardlock.CoreCount)) + val reqReg = RegInit(init = 0.U(hardlock.CoreCount.W)) val reqBools = Wire(Vec(hardlock.CoreCount, Bool())) reqBools := reqReg.asBools @@ -98,11 +97,11 @@ class HardlockOCPWrapper(nrCores: Int, hardlockgen: () => AbstractHardlock) exte when(io.cores(i).M.Cmd =/= OcpCmd.IDLE) { reqBools(i) := true.B - reqReg := reqBools.asUInt() + reqReg := reqBools.asUInt } .elsewhen(reqReg(i) === true.B && hardlock.io.cores(i).blck === false.B) { reqBools(i) := false.B - reqReg := reqBools.asUInt() + reqReg := reqBools.asUInt } io.cores(i).S.Resp := OcpResp.NULL diff --git a/hardware/src/main/scala/cmp/LedsCmp.scala b/hardware/src/main/scala/cmp/LedsCmp.scala index 928813277..fefd0309e 100644 --- a/hardware/src/main/scala/cmp/LedsCmp.scala +++ b/hardware/src/main/scala/cmp/LedsCmp.scala @@ -1,36 +1,33 @@ -/* package cmp -import Chisel._ +import chisel3._ import io._ import ocp.OcpCoreSlavePort import patmos.Constants.{ADDR_WIDTH, DATA_WIDTH} -MS: commented out for the moment until we find a good better solution for pins on CPM devices. - -class LedsCmp(nrCores: Int, nrLedPerCore: Int) extends Module { - val io = new CmpIO(nrCores) with patmos.HasPins { +class LedsCmp(nrCores: Int, nrLedPerCore: Int) extends CmpDevice(nrCores) { + val io = IO(new CmpIO(nrCores) with patmos.HasPins { override val pins = new Bundle() { - val led = Output(Bits(width = nrCores * nrLedPerCore)) + val led = Output(UInt((nrCores * nrLedPerCore).W)) } - } + }) // commented out below as chisel3 do not support setWidth, trait are parameterless which means // there is no good way of setting with. All uses of this class has nrLedPerCore = 1 anyway //io.ledsCmpPins.led.setWidth(nrCores * nrLedPerCore) //modify number of ledPins dynamically io.pins.led := 0.U - val ledDevs = Vec(nrCores, Module(new Leds(nrLedPerCore)).io) + val ledDevs = Seq.fill(nrCores)(Module(new Leds(nrLedPerCore)).io) //Wire one led IO device per core, each with a number of led for (i <- 0 until nrCores) { ledDevs(i).ocp.M := io.cores(i).M io.cores(i).S := ledDevs(i).ocp.S - io.pins.led(i) := ledDevs(i).pins.led(0) + ledDevs(i).superMode := false.B } + io.pins.led := ledDevs.map(_.pins.led).reduceLeft((l, h) => h ## l) } - */ diff --git a/hardware/src/main/scala/cmp/OneWayOCPWrapper.scala b/hardware/src/main/scala/cmp/OneWayOCPWrapper.scala index 0892f1205..ad978f1b1 100755 --- a/hardware/src/main/scala/cmp/OneWayOCPWrapper.scala +++ b/hardware/src/main/scala/cmp/OneWayOCPWrapper.scala @@ -7,13 +7,15 @@ */ package cmp -import Chisel._ - +import chisel3._ +import chisel3.util._ import patmos.Constants._ import ocp._ class OneWayOCPWrapper(nrCores: Int) extends CmpDevice(nrCores) { + val io = IO(new CmpIO(nrCores)) + val dim = math.sqrt(nrCores).toInt if (dim * dim != nrCores) throw new Error("Number of cores must be quadratic") @@ -38,6 +40,6 @@ class OneWayOCPWrapper(nrCores: Int) extends CmpDevice(nrCores) { onewaymem.io.memPorts(i).wrEna := io.cores(i).M.Cmd === OcpCmd.WR // Memory has one cycle latency (read address is in register) io.cores(i).S.Data := onewaymem.io.memPorts(i).rdData - io.cores(i).S.Resp := Reg(init = OcpResp.NULL, next = resp) + io.cores(i).S.Resp := RegNext(init = OcpResp.NULL, next = resp) } } diff --git a/hardware/src/main/scala/cmp/OwnSPM.scala b/hardware/src/main/scala/cmp/OwnSPM.scala index c2bc4200b..23adcfd4c 100644 --- a/hardware/src/main/scala/cmp/OwnSPM.scala +++ b/hardware/src/main/scala/cmp/OwnSPM.scala @@ -12,8 +12,8 @@ package cmp -import Chisel._ -import chisel3.VecInit +import chisel3._ +import chisel3.util._ import patmos._ import patmos.Constants._ @@ -21,10 +21,13 @@ import ocp._ class OwnSPM(nrCores: Int, nrSPMs: Int, size: Int) extends CmpDevice(nrCores) { + val io = IO(new CmpIO(nrCores)) + val bits = log2Up(nrSPMs) println("OwnSPM: cnt = " + nrSPMs + " bits = " + bits) val masters = Wire(Vec(nrSPMs, Vec(nrCores, new OcpCoreSlavePort(ADDR_WIDTH, DATA_WIDTH)))) + masters := DontCare val spms = (0 until nrSPMs).map(i => Module(new Spm(size))) val cmdOutReg = RegInit(VecInit(Seq.fill(nrCores)(false.B))) @@ -38,6 +41,7 @@ class OwnSPM(nrCores: Int, nrSPMs: Int, size: Int) extends CmpDevice(nrCores) { when(io.cores(i).M.Cmd =/= OcpCmd.IDLE && io.cores(i).M.Addr(12 + bits - 1, 12) === s.U) { masters(s)(i).M := io.cores(i).M } + masters(s)(i).S.Resp := DontCare } // Or the master signals @@ -56,7 +60,8 @@ class OwnSPM(nrCores: Int, nrSPMs: Int, size: Int) extends CmpDevice(nrCores) { } // Connect SPM out to output muxes and muxes to slave responses - val muxes = Vec(nrSPMs, Vec(nrSPMs, new OcpCoreSlavePort(ADDR_WIDTH, DATA_WIDTH))) + val muxes = Wire(Vec(nrSPMs, Vec(nrSPMs, new OcpCoreSlavePort(ADDR_WIDTH, DATA_WIDTH)))) + muxes := DontCare for (i <- 0 until nrCores) { // val mux = Vec(nrSPMs, new OcpCoreSlavePort(ADDR_WIDTH, DATA_WIDTH)) for (j <- 0 until nrSPMs) { diff --git a/hardware/src/main/scala/cmp/S4nocOCPWrapper.scala b/hardware/src/main/scala/cmp/S4nocOCPWrapper.scala deleted file mode 100644 index 3205ca755..000000000 --- a/hardware/src/main/scala/cmp/S4nocOCPWrapper.scala +++ /dev/null @@ -1,32 +0,0 @@ -/* - OCP wrapper for the S4NOC. - - Author: Martin Schoeberl (martin@jopdesign.com) - license see LICENSE - */ -package cmp - -import Chisel._ - -import patmos.Constants._ -import ocp._ -import s4noc._ - -class S4nocOCPWrapper(nrCores: Int, txFifo: Int, rxFifo: Int) extends CmpDevice(nrCores) { - - val s4noc = Module(new S4noc(nrCores, txFifo, rxFifo)) - - for (i <- 0 until nrCores) { - - val resp = Mux(io.cores(i).M.Cmd === OcpCmd.RD || io.cores(i).M.Cmd === OcpCmd.WR, - OcpResp.DVA, OcpResp.NULL) - - // addresses are in words - s4noc.io.cpuPorts(i).addr := io.cores(i).M.Addr >> 2 - s4noc.io.cpuPorts(i).wrData := io.cores(i).M.Data - s4noc.io.cpuPorts(i).wr := io.cores(i).M.Cmd === OcpCmd.WR - s4noc.io.cpuPorts(i).rd := io.cores(i).M.Cmd === OcpCmd.RD - io.cores(i).S.Data := RegNext(s4noc.io.cpuPorts(i).rdData) - io.cores(i).S.Resp := Reg(init = OcpResp.NULL, next = resp) - } -} diff --git a/hardware/src/main/scala/cmp/SPMPool.scala b/hardware/src/main/scala/cmp/SPMPool.scala index e7cf44fce..ec8d7be72 100644 --- a/hardware/src/main/scala/cmp/SPMPool.scala +++ b/hardware/src/main/scala/cmp/SPMPool.scala @@ -1,7 +1,7 @@ package cmp -import Chisel._ -import chisel3.VecInit +import chisel3._ +import chisel3.util._ import patmos.Constants._ import ocp._ @@ -18,18 +18,16 @@ object SPMPool { val curReg = Reg(UInt(log2Up(reqs.getWidth).W)) - val hi = UInt(reqs.getWidth.W) - val lo = UInt(reqs.getWidth.W) + val hi = WireDefault(VecInit.fill(reqs.getWidth)(false.B)) + val lo = WireDefault(VecInit.fill(reqs.getWidth)(false.B)) - lo := 0.U - hi := 0.U for (i <- 0 until reqs.getWidth) { lo(i) := reqs(i) && (curReg >= i.U) hi(i) := reqs(i) && (curReg < i.U) } when(!reqs(curReg) || continue) { - when(hi.orR) { + when(hi.reduceTree(_ || _)) { curReg := PriorityEncoder(hi) }.otherwise { curReg := PriorityEncoder(lo) @@ -41,11 +39,11 @@ object SPMPool { class TDMSPM(corecnt:Int, spmsize:Int) extends Module { - val io = new Bundle() + val io = IO(new Bundle() { val sched = Input(UInt(corecnt.W)) val cores = Vec(corecnt, new OcpCoreSlavePort(ADDR_WIDTH, DATA_WIDTH)) - } + }) val spm = Module(new patmos.Spm(spmsize)) val cur = SPMPool.roundRobinArbiter(io.sched, true.B) @@ -61,7 +59,7 @@ object SPMPool { } } -class SPMPool(corecnt:Int, spmcnt:Int, spmsize:Int, spmcntmax:Int = 15, spmsizemax:Int = 4096) extends Module { +class SPMPool(corecnt:Int, spmcnt:Int, spmsize:Int, spmcntmax:Int = 15, spmsizemax:Int = 4096) extends CmpDevice(corecnt) { if(spmcnt > spmcntmax) throw new IllegalArgumentException("SPM count is greater than SPM maximum count") @@ -71,13 +69,13 @@ class SPMPool(corecnt:Int, spmcnt:Int, spmsize:Int, spmcntmax:Int = 15, spmsizem val io = IO(new CmpIO(corecnt)) //Vec(corecnt, new OcpCoreSlavePort(ADDR_WIDTH, DATA_WIDTH)) - val spms = (0 until spmcnt).map(e => Module(new SPMPool.TDMSPM(corecnt, spmsize))) + val spms = Seq.fill(spmcnt)(Module(new SPMPool.TDMSPM(corecnt, spmsize))) // remove empty fields - val spmios = Wire(Vec(spms.map(e => e.io.cores))) + val spmios = VecInit(spms.map(_.io.cores)) - val spmscheds = Reg(Vec(spms.map(e => UInt(corecnt.W)))) + val spmscheds = Reg(Vec(spmcnt, UInt(corecnt.W))) for(i <- 0 until spms.length) spms(i).io.sched := spmscheds(i) @@ -91,13 +89,13 @@ class SPMPool(corecnt:Int, spmcnt:Int, spmsize:Int, spmcntmax:Int = 15, spmsizem val anyavail = avails.orR val respRegs = RegInit(VecInit(Seq.fill(corecnt)(OcpResp.NULL))) - val dataRegs = Reg(Vec(corecnt, io.cores(0).S.Data)) + val dataRegs = Reg(Vec(corecnt, chiselTypeOf(io.cores(0).S.Data))) val dumio = Wire(Vec(corecnt, new OcpCoreSlavePort(ADDR_WIDTH, DATA_WIDTH))) for(i <- 0 until corecnt) { - val mReg = Reg(io.cores(i).M) + val mReg = Reg(chiselTypeOf(io.cores(i).M)) diff --git a/hardware/src/main/scala/cmp/SharedSPM.scala b/hardware/src/main/scala/cmp/SharedSPM.scala index 5f2a33470..9807aafba 100644 --- a/hardware/src/main/scala/cmp/SharedSPM.scala +++ b/hardware/src/main/scala/cmp/SharedSPM.scala @@ -6,7 +6,8 @@ package cmp -import Chisel._ +import chisel3._ +import chisel3.util._ import patmos._ import patmos.Constants._ @@ -28,17 +29,17 @@ import ocp._ */ class NodeSPM(id: Int, nrCores: Int) extends Module { - val io = new Bundle() { + val io = IO(new Bundle() { val fromCore = new OcpCoreSlavePort(ADDR_WIDTH, DATA_WIDTH) val toMem = new OcpCoreMasterPort(ADDR_WIDTH, DATA_WIDTH) - } + }) - val idle :: rd :: wr :: Nil = Enum(UInt(), 3) + val idle :: rd :: wr :: Nil = Enum(3) val state = RegInit(idle) val cnt = RegInit(0.U(log2Up(nrCores).W)) // TODO: how to reset with a harmless IDLE command? - val masterReg = Reg(io.fromCore.M) + val masterReg = Reg(chiselTypeOf(io.fromCore.M)) cnt := Mux(cnt === (nrCores - 1).U, 0.U, cnt + 1.U) val enable = cnt === id.U @@ -87,6 +88,8 @@ class NodeSPM(id: Int, nrCores: Int) extends Module { class SharedSPM(nrCores: Int, size: Int) extends CmpDevice(nrCores) { + val io = IO(new CmpIO(nrCores)) + val spm = Module(new Spm(size)) val nd = new Array[NodeSPM](nrCores) diff --git a/hardware/src/main/scala/cmp/TransactionalMemory.scala b/hardware/src/main/scala/cmp/TransactionalMemory.scala index c3d7234b6..f6873ea47 100644 --- a/hardware/src/main/scala/cmp/TransactionalMemory.scala +++ b/hardware/src/main/scala/cmp/TransactionalMemory.scala @@ -9,33 +9,30 @@ */ package cmp -import Chisel._ -import chisel3.VecInit +import chisel3._ +import chisel3.util._ import patmos._ import patmos.Constants._ import ocp._ class TransactionalMemory(corecnt: Int, memsize: Int = 128, bufsize: Int = 16, pipeline: Boolean = true) extends CmpDevice(corecnt) { + + val io = IO(new CmpIO(corecnt)) val datawidth = DATA_WIDTH val memaddrwidth = log2Up(memsize) val corecur = Counter(corecnt) - val sharedmem = Mem(UInt(datawidth.W), memsize) - val sharedmemwr = Bool() - sharedmemwr := false.B - val sharedmemwrfin = Bool() - sharedmemwrfin := false.B + val sharedmem = Mem(memsize, UInt(datawidth.W)) + val sharedmemwr = WireDefault(false.B) + val sharedmemwrfin = WireDefault(false.B) val sharedmemrdaddrReg = Reg(UInt(memaddrwidth.W)) - val sharedmemwraddr = UInt(memaddrwidth.W) - sharedmemwraddr := 0.U + val sharedmemwraddr = WireDefault(0.U(memaddrwidth.W)) val sharedmemrddata = sharedmem(sharedmemrdaddrReg) - val sharedmemwrdata = UInt(datawidth.W) - sharedmemwrdata := 0.U + val sharedmemwrdata = WireDefault(0.U(datawidth.W)) - val _bufwr = Bool() - _bufwr := false.B + val _bufwr = WireDefault(false.B) when(sharedmemwr) { when(_bufwr) { @@ -49,7 +46,7 @@ class TransactionalMemory(corecnt: Int, memsize: Int = 128, bufsize: Int = 16, p corecur.inc } - val sIdle::sRead::sPreSharedRead::sSharedRead::sPreCommit::sCommit::Nil = Enum(UInt(),6) + val sIdle::sRead::sPreSharedRead::sSharedRead::sPreCommit::sCommit::Nil = Enum(6) for(i <- 0 until corecnt) { @@ -66,27 +63,24 @@ class TransactionalMemory(corecnt: Int, memsize: Int = 128, bufsize: Int = 16, p val bufwrs = RegInit(VecInit(Seq.fill(bufsize)(false.B))) val bufnxt = Counter(bufsize+1) - val bufmem = Mem(UInt(datawidth.W), bufsize) - val bufmemwr = Bool() + val bufmem = Mem(bufsize, UInt(datawidth.W)) + val bufmemwr = Wire(Bool()) val bufmemrdaddrReg = RegInit(0.U(bufaddrwidth.W)) bufmemrdaddrReg := 0.U - val bufmemwraddr = UInt(bufaddrwidth.W) - bufmemwraddr := 0.U + val bufmemwraddr = WireDefault(0.U(bufaddrwidth.W)) val bufmemrddata = bufmem(bufmemrdaddrReg) - val bufmemwrdata = UInt(datawidth.W) - bufmemwrdata := 0.U + val bufmemwrdata = WireDefault(0.U(datawidth.W)) when(bufmemwr) { bufmem(bufmemwraddr) := bufmemwrdata } - val bufmatches = UInt(bufsize.W) - bufmatches := 0.U + val bufmatches = WireDefault(VecInit.fill(bufsize)(false.B)) for(j <- 0 until bufsize) { bufmatches(j) := (bufaddrs(j) === bufaddr) && (bufrds(j) || bufwrs(j)) } val bufmatch = OHToUInt(bufmatches); - val bufmatched = bufmatches.orR + val bufmatched = bufmatches.reduceTree(_ || _) val bufconflict = RegInit(false.B) @@ -110,7 +104,7 @@ class TransactionalMemory(corecnt: Int, memsize: Int = 128, bufsize: Int = 16, p sharedmemrdaddrReg := memrdaddrReg } - val slaveReg = Reg(ioS) + val slaveReg = Reg(chiselTypeOf(ioS)) ioS.Data := slaveReg.Data ioS.Resp := slaveReg.Resp @@ -146,7 +140,7 @@ class TransactionalMemory(corecnt: Int, memsize: Int = 128, bufsize: Int = 16, p when(bufnxt.value === 0.U || bufconflict || overflowReg) { // rd/wr conflict or nothing to commit, return failure slaveReg.Resp := OcpResp.DVA - slaveReg.Data := -1.S + slaveReg.Data := Fill(slaveReg.Data.getWidth, true.B) overflowReg := false.B bufnxt.value := 0.U @@ -200,7 +194,7 @@ class TransactionalMemory(corecnt: Int, memsize: Int = 128, bufsize: Int = 16, p when((bufmemrdaddrReg === bufnxt.value) || bufconflict) { // Finish here slaveReg.Resp := OcpResp.DVA - slaveReg.Data := Mux(bufconflict, -1.S, 0.S) + slaveReg.Data := Mux(bufconflict, Fill(slaveReg.Data.getWidth, true.B), 0.U) overflowReg := false.B bufnxt.value := 0.U diff --git a/hardware/src/main/scala/cmp/TwoWayOCPWrapper.scala b/hardware/src/main/scala/cmp/TwoWayOCPWrapper.scala index 486aa08f6..db0d27d85 100644 --- a/hardware/src/main/scala/cmp/TwoWayOCPWrapper.scala +++ b/hardware/src/main/scala/cmp/TwoWayOCPWrapper.scala @@ -7,7 +7,8 @@ */ package cmp -import Chisel._ +import chisel3._ +import chisel3.util._ import patmos._ import patmos.Constants._ @@ -28,6 +29,8 @@ class XXXIO(lckCnt: Int) extends Bundle { // TODO: is this dead code? If so, just delete it. class TwoWayOCPWrapper(nrCores: Int, memSizePrNI : Int) extends CmpDevice(nrCores) { + val io = IO(new CmpIO(nrCores)) + val dim = math.sqrt(nrCores).toInt if (dim * dim != nrCores) throw new Error("Number of cores must be quadratic") // just start with four words diff --git a/hardware/src/main/scala/cmp/UartCmp.scala b/hardware/src/main/scala/cmp/UartCmp.scala index 861148a3b..c0e779f8a 100644 --- a/hardware/src/main/scala/cmp/UartCmp.scala +++ b/hardware/src/main/scala/cmp/UartCmp.scala @@ -6,7 +6,8 @@ package cmp -import Chisel._ +import chisel3._ +import chisel3.util._ import ocp._ import patmos.Constants._ import patmos._ @@ -17,15 +18,15 @@ import io.Uart class UartCmp(corecnt: Int, clk_freq: Int, baud_rate: Int, fifoDepth: Int) extends CmpDevice(corecnt) { -/* - override val io = IO(new CmpIO(corecnt) with patmos.HasPins { + + val io = IO(new CmpIO(corecnt) with patmos.HasPins { override val pins = new Bundle { val tx = Output(UInt(1.W)) val rx = Input(UInt(1.W)) } }) - */ + /* val abc = IO(new Bundle { @@ -46,12 +47,14 @@ class UartCmp(corecnt: Int, clk_freq: Int, baud_rate: Int, fifoDepth: Int) exten val uart = Module(new Uart(clk_freq,baud_rate,fifoDepth)) + uart.io.superMode := false.B + io.pins <> uart.io.pins uart.io.ocp.M := PriorityMux(io.cores.map(e => (e.M.Cmd =/= OcpCmd.IDLE, e.M))) for (i <- 0 until corecnt) { - val cmdReg = Reg(init = false.B) + val cmdReg = RegInit(init = false.B) when(io.cores(i).M.Cmd =/= OcpCmd.IDLE) { cmdReg := true.B }.elsewhen(uart.io.ocp.S.Resp === OcpResp.DVA) { diff --git a/hardware/src/main/scala/ocp/NodeTdmArbiter.scala b/hardware/src/main/scala/ocp/NodeTdmArbiter.scala index d8e405561..77aa140de 100644 --- a/hardware/src/main/scala/ocp/NodeTdmArbiter.scala +++ b/hardware/src/main/scala/ocp/NodeTdmArbiter.scala @@ -244,12 +244,13 @@ class MemMuxIntf(nr: Int, addrWidth : Int, dataWidth : Int, burstLen: Int) exten // 1st stage pipeline registers for output val sResp_p1_Reg = RegNext(next=io.slave.S.Resp) val sData_p1_Reg = RegNext(next=io.slave.S.Data) - // Forward response to all arbiters for (i <- 0 until nr) { io.master(i).S.Data := sData_p1_Reg - io.master(i).S.Resp := sResp_p1_Reg + io.master(i).S.Resp := sResp_p1_Reg + io.master(i).S.CmdAccept := 0.B + io.master(i).S.DataAccept := 0.B } } diff --git a/hardware/src/main/scala/patmos/Patmos.scala b/hardware/src/main/scala/patmos/Patmos.scala index 34d5f5137..f7fb6f3c5 100644 --- a/hardware/src/main/scala/patmos/Patmos.scala +++ b/hardware/src/main/scala/patmos/Patmos.scala @@ -260,23 +260,22 @@ class Patmos(configFile: String, binFile: String, datFile: String) extends Modul val cmpdevios = config.cmpDevices.map(e => { println(s"CMP device: $e") val (off, width, dev) = e match { - // CMP devices need to be update for Chisel 3 - // case "Argo" => (0x1C, 5, Module(new argo.Argo(nrCores, wrapped=false, emulateBB=false))) + case "Argo" => (0x1C, 5, Module(new argo.Argo(nrCores, wrapped=false, emulateBB=false))) case "Hardlock" => (0xE801, IO_DEVICE_ADDR_WIDTH, Module(new cmp.HardlockOCPWrapper(nrCores, () => new cmp.Hardlock(nrCores, 1)))) case "SharedSPM" => (0xE802, IO_DEVICE_ADDR_WIDTH, Module(new cmp.SharedSPM(nrCores, (nrCores-1)*2*1024))) - // case "OneWay" => (0xE803, IO_DEVICE_ADDR_WIDTH, Module(new cmp.OneWayOCPWrapper(nrCores))) + case "OneWay" => (0xE803, IO_DEVICE_ADDR_WIDTH, Module(new cmp.OneWayOCPWrapper(nrCores))) // removed as it was never used, address is free // TODO: remove constants from patmos.h // case "TdmArbiter" => (0xE804, IO_DEVICE_ADDR_WIDTH, Module(new cmp.TdmArbiter(nrCores))) - // case "OwnSPM" => (0xE805, IO_DEVICE_ADDR_WIDTH, Module(new cmp.OwnSPM(nrCores, (nrCores-1)*2, 1024))) - // case "SPMPool" => (0xE806, IO_DEVICE_ADDR_WIDTH, Module(new cmp.SPMPool(nrCores, (nrCores-1)*2, 1024))) + case "OwnSPM" => (0xE805, IO_DEVICE_ADDR_WIDTH, Module(new cmp.OwnSPM(nrCores, (nrCores-1)*2, 1024))) + case "SPMPool" => (0xE806, IO_DEVICE_ADDR_WIDTH, Module(new cmp.SPMPool(nrCores, (nrCores-1)*2, 1024))) // case "S4noc" => (0xE807, IO_DEVICE_ADDR_WIDTH, Module(new cmp.S4nocOCPWrapper(nrCores, 4, 4))) - // case "CASPM" => (0xE808, IO_DEVICE_ADDR_WIDTH, Module(new cmp.CASPM(nrCores, nrCores * 8))) - // case "AsyncLock" => (0xE809, IO_DEVICE_ADDR_WIDTH, Module(new cmp.AsyncLock(nrCores, nrCores * 2))) + case "CASPM" => (0xE808, IO_DEVICE_ADDR_WIDTH, Module(new cmp.CASPM(nrCores, nrCores * 8))) + case "AsyncLock" => (0xE809, IO_DEVICE_ADDR_WIDTH, Module(new cmp.AsyncLock(nrCores, nrCores * 2))) case "UartCmp" => (0xF008, IO_DEVICE_ADDR_WIDTH, Module(new cmp.UartCmp(nrCores,CLOCK_FREQ,UART_BAUD,16))) - // case "TwoWay" => (0xE80B, IO_DEVICE_ADDR_WIDTH, Module(new cmp.TwoWayOCPWrapper(nrCores, 1024))) - // case "TransactionalMemory" => (0xE80C, IO_DEVICE_ADDR_WIDTH, Module(new cmp.TransactionalMemory(nrCores, 512))) - // case "LedsCmp" => (0xE80D, IO_DEVICE_ADDR_WIDTH, Module(new cmp.LedsCmp(nrCores, 1))) + case "TwoWay" => (0xE80B, IO_DEVICE_ADDR_WIDTH, Module(new cmp.TwoWayOCPWrapper(nrCores, 1024))) + case "TransactionalMemory" => (0xE80C, IO_DEVICE_ADDR_WIDTH, Module(new cmp.TransactionalMemory(nrCores, 512))) + case "LedsCmp" => (0xE80D, IO_DEVICE_ADDR_WIDTH, Module(new cmp.LedsCmp(nrCores, 1))) case _ => throw new Error("Unknown device " + e) } diff --git a/hardware/src/main/scala/s4noc/Network.scala b/hardware/src/main/scala/s4noc/Network.scala index 63cc12623..36237022d 100644 --- a/hardware/src/main/scala/s4noc/Network.scala +++ b/hardware/src/main/scala/s4noc/Network.scala @@ -13,9 +13,9 @@ import Const._ * Create and connect a n x n NoC. */ class Network[T <: Data](n: Int, dt: T) extends Module { - val io = new Bundle { + val io = IO(new Bundle { val local = Vec(n * n, new Channel(dt)) - } + }) val schedule = Schedule.getSchedule(n)._1 diff --git a/hardware/src/main/scala/s4noc/S4Router.scala b/hardware/src/main/scala/s4noc/S4Router.scala index 40d3a5a46..8748658c0 100644 --- a/hardware/src/main/scala/s4noc/S4Router.scala +++ b/hardware/src/main/scala/s4noc/S4Router.scala @@ -44,7 +44,7 @@ class RouterPorts[T <: Data](dt: T) extends Bundle { } class S4Router[T <: Data](schedule: Array[Array[Int]], dt: T) extends Module { - val io = new RouterPorts(dt) + val io = IO(new RouterPorts(dt)) val regCounter = RegInit(0.U(log2Up(schedule.length).W)) val end = regCounter === (schedule.length - 1).U diff --git a/hardware/src/main/scala/cmp/AsyncArbiter.v b/hardware/verilog/AsyncArbiter.v similarity index 100% rename from hardware/src/main/scala/cmp/AsyncArbiter.v rename to hardware/verilog/AsyncArbiter.v diff --git a/hardware/src/main/scala/cmp/AsyncMutex.v b/hardware/verilog/AsyncMutex.v similarity index 100% rename from hardware/src/main/scala/cmp/AsyncMutex.v rename to hardware/verilog/AsyncMutex.v