From 8f9bdb62e51e68b2ace7a51a65dc17c7be94dfe7 Mon Sep 17 00:00:00 2001 From: Ella Schwarz Date: Wed, 18 Sep 2024 16:02:37 -0700 Subject: [PATCH 1/9] Add HLS example accelerator --- .github/scripts/defaults.sh | 3 +- .github/workflows/chipyard-run-tests.yml | 23 ++++ build.sbt | 7 +- docs/Customization/Incorporating-HLS.rst | 69 ++++++++++ docs/Customization/index.rst | 1 + .../chipyard/src/main/scala/DigitalTop.scala | 1 + .../scala/config/MMIOAcceleratorConfigs.scala | 5 + generators/hls-example/Makefile | 23 ++++ generators/hls-example/accel/HLSAccel.cpp | 19 +++ generators/hls-example/accel/HLSAccel.hpp | 17 +++ generators/hls-example/run_hls.tcl | 8 ++ .../src/main/scala/HLSExample.scala | 125 ++++++++++++++++++ 12 files changed, 299 insertions(+), 2 deletions(-) create mode 100644 docs/Customization/Incorporating-HLS.rst create mode 100644 generators/hls-example/Makefile create mode 100644 generators/hls-example/accel/HLSAccel.cpp create mode 100644 generators/hls-example/accel/HLSAccel.hpp create mode 100644 generators/hls-example/run_hls.tcl create mode 100644 generators/hls-example/src/main/scala/HLSExample.scala diff --git a/.github/scripts/defaults.sh b/.github/scripts/defaults.sh index ea90bbaee7..ee1c25bf92 100755 --- a/.github/scripts/defaults.sh +++ b/.github/scripts/defaults.sh @@ -27,7 +27,7 @@ REMOTE_COURSIER_CACHE=$REMOTE_WORK_DIR/.coursier-cache declare -A grouping grouping["group-cores"]="chipyard-cva6 chipyard-ibex chipyard-rocket chipyard-hetero chipyard-boomv3 chipyard-boomv4 chipyard-sodor chipyard-digitaltop chipyard-multiclock-rocket chipyard-nomem-scratchpad chipyard-spike chipyard-clone chipyard-prefetchers chipyard-shuttle" grouping["group-peripherals"]="chipyard-dmirocket chipyard-dmiboomv3 chipyard-dmiboomv4 chipyard-spiflashwrite chipyard-mmios chipyard-nocores chipyard-manyperipherals chipyard-chiplike chipyard-tethered chipyard-symmetric chipyard-llcchiplet" -grouping["group-accels"]="chipyard-compressacc chipyard-mempress chipyard-gemmini chipyard-manymmioaccels chipyard-nvdla chipyard-aes256ecb chipyard-rerocc chipyard-rocketvector chipyard-shuttlevector chipyard-shuttleara" +grouping["group-accels"]="chipyard-compressacc chipyard-mempress chipyard-gemmini chipyard-manymmioaccels chipyard-nvdla chipyard-aes256ecb chipyard-rerocc chipyard-rocketvector chipyard-shuttlevector chipyard-shuttleara chipyard-hlsacc" grouping["group-constellation"]="chipyard-constellation" grouping["group-tracegen"]="tracegen tracegen-boomv3 tracegen-boomv4" grouping["group-other"]="icenet testchipip constellation rocketchip-amba rocketchip-tlsimple rocketchip-tlwidth rocketchip-tlxbar chipyard-clusters" @@ -41,6 +41,7 @@ mapping["chipyard-rocket"]=" CONFIG=QuadChannelRocketConfig" mapping["chipyard-dmirocket"]=" CONFIG=dmiRocketConfig" mapping["chipyard-mempress"]=" CONFIG=MempressRocketConfig" mapping["chipyard-compressacc"]=" CONFIG=ZstdCompressorRocketConfig" +mapping["chipyard-hlsacc"]=" CONFIG=HLSAcceleratorRocketConfig" mapping["chipyard-prefetchers"]=" CONFIG=PrefetchingRocketConfig" mapping["chipyard-digitaltop"]=" TOP=DigitalTop" mapping["chipyard-manymmioaccels"]=" CONFIG=ManyMMIOAcceleratorRocketConfig" diff --git a/.github/workflows/chipyard-run-tests.yml b/.github/workflows/chipyard-run-tests.yml index 31d4900c5b..d9b8244775 100644 --- a/.github/workflows/chipyard-run-tests.yml +++ b/.github/workflows/chipyard-run-tests.yml @@ -972,6 +972,29 @@ jobs: group-key: "group-accels" project-key: "chipyard-compressacc" + chipyard-hlsacc-run-tests: + name: chipyard-hlsacc-run-tests + needs: prepare-chipyard-accels + runs-on: as4 + steps: + - name: Delete old checkout + run: | + ls -alh . + rm -rf ${{ github.workspace }}/* || true + rm -rf ${{ github.workspace }}/.* || true + ls -alh . + - name: Checkout + uses: actions/checkout@v4 + - name: Git workaround + uses: ./.github/actions/git-workaround + - name: Create conda env + uses: ./.github/actions/create-conda-env + - name: Run tests + uses: ./.github/actions/run-tests + with: + group-key: "group-accels" + project-key: "chipyard-hlsacc" + tracegen-boomv3-run-tests: name: tracegen-boomv3-run-tests diff --git a/build.sbt b/build.sbt index 39fd7f214a..26b96f76db 100644 --- a/build.sbt +++ b/build.sbt @@ -158,7 +158,7 @@ lazy val chipyard = (project in file("generators/chipyard")) dsptools, rocket_dsp_utils, gemmini, icenet, tracegen, cva6, nvdla, sodor, ibex, fft_generator, constellation, mempress, barf, shuttle, caliptra_aes, rerocc, - compressacc, saturn, ara, firrtl2_bridge) + compressacc, saturn, ara, firrtl2_bridge, hls_accel) .settings(libraryDependencies ++= rocketLibDeps.value) .settings( libraryDependencies ++= Seq( @@ -263,6 +263,11 @@ lazy val rocc_acc_utils = (project in file("generators/rocc-acc-utils")) .settings(libraryDependencies ++= rocketLibDeps.value) .settings(commonSettings) +lazy val hls_accel = (project in file("generators/hls-example")) + .dependsOn(rocketchip) + .settings(libraryDependencies ++= rocketLibDeps.value) + .settings(commonSettings) + lazy val tapeout = (project in file("./tools/tapeout/")) .settings(chisel3Settings) // stuck on chisel3 and SFC .settings(commonSettings) diff --git a/docs/Customization/Incorporating-HLS.rst b/docs/Customization/Incorporating-HLS.rst new file mode 100644 index 0000000000..f7e643749f --- /dev/null +++ b/docs/Customization/Incorporating-HLS.rst @@ -0,0 +1,69 @@ +.. _incorporating-hls: + +Incorporating HLS +============================ + +High Level Synthesis (HLS) is a method for iterating quickly on +different hardware algorithms that automatically generates an RTL +circuit to match a specification in a high level language like C. + +Here, we will integrate an HLS-generated accelerator that computes +the Great Common Denominator (GCD) of two integers. This tutorial +builds on the sections :ref:`mmio-accelerators` and +:ref:`incorporating-verilog-blocks`. The code for this example can +be found in ``/generators/hls-example`` + +Adding an HLS project +--------------------------------------- + +In this tutorial, we use Vitis HLS, version 2023.2. + +Our project consists of 3 HLS files: +* C program of the GCD algorithm: ``accel/HLSAccel.cpp`` +* Header file: ``accel/HLSAccel.hpp`` +* TCL script to run Vitis HLS: ``run_hls.tcl`` + +To generate the verilog files, as well as synthesis reports, run: + +.. code-block:: none + vitis_hls run_hls.tcl + +The files can be found in a generated folder named proj\_\, +in our case, ``proj_gcd_example``. + +In our case, we include a ``Makefile`` to script running HLS. To generate the +verilog files using the Makefile, run: + +.. code-block:: none + make + +To delete the generated files, run: + +.. code-block:: none + make clean + +Creating the Verilog black box +--------------------------------------- + +.. Note:: This section discusses automatically running HLS within a Verilog black box. +Please consult :ref:`incorporating-verilog-blocks` for background information +on writing a Verilog black box. + +We use Scala to run ``make``, which runs HLS and copies the files into ``hls-example/src/main/resources/vsrc``. +Then, we add the path to each file. This code will execute during Chisel elaboration, conveniently handling +file generation for the user. + +.. literalinclude:: ../../generators/hls-example/src/main/scala/example/HLSExample.scala + :language: scala + :start-after: DOC include start: HLS blackbox + :end-before: DOC include end: HLS blackbox + +Running the example +--------------------------------------- + +To test if the accelerator works, use the test program in ``tests/gcd.c``. +Compile the program with ``make``. Then, run: + +.. code-block:: none + cd sims/vcs + make run-binary CONFIG=HLSAcceleratorRocketConfig BINARY=../../tests/gcd.riscv \ No newline at end of file diff --git a/docs/Customization/index.rst b/docs/Customization/index.rst index 9656efe654..b74135f7c7 100644 --- a/docs/Customization/index.rst +++ b/docs/Customization/index.rst @@ -46,6 +46,7 @@ We recommend reading all these pages in order. Hit next to get started! Keys-Traits-Configs DMA-Devices Incorporating-Verilog-Blocks + Incorporating-HLS Memory-Hierarchy Boot-Process IOBinders diff --git a/generators/chipyard/src/main/scala/DigitalTop.scala b/generators/chipyard/src/main/scala/DigitalTop.scala index c4fa17aafa..814e60d9ae 100644 --- a/generators/chipyard/src/main/scala/DigitalTop.scala +++ b/generators/chipyard/src/main/scala/DigitalTop.scala @@ -40,6 +40,7 @@ class DigitalTop(implicit p: Parameters) extends ChipyardSystem with fftgenerator.CanHavePeripheryFFT // Enables optionally having an MMIO-based FFT block with constellation.soc.CanHaveGlobalNoC // Support instantiating a global NoC interconnect with rerocc.CanHaveReRoCCTiles // Support tiles that instantiate rerocc-attached accelerators + with hlsaccel.CanHavePeripheryHLSAccel { override lazy val module = new DigitalTopModule(this) } diff --git a/generators/chipyard/src/main/scala/config/MMIOAcceleratorConfigs.scala b/generators/chipyard/src/main/scala/config/MMIOAcceleratorConfigs.scala index 30a180439f..c194730a00 100644 --- a/generators/chipyard/src/main/scala/config/MMIOAcceleratorConfigs.scala +++ b/generators/chipyard/src/main/scala/config/MMIOAcceleratorConfigs.scala @@ -65,3 +65,8 @@ class ManyMMIOAcceleratorRocketConfig extends Config( new chipyard.example.WithStreamingFIR ++ // use top with tilelink-controlled streaming FIR new freechips.rocketchip.rocket.WithNHugeCores(1) ++ new chipyard.config.AbstractConfig) + +class HLSAcceleratorRocketConfig extends Config( + new hlsaccel.WithHLSAccel ++ + new freechips.rocketchip.rocket.WithNHugeCores(1) ++ + new chipyard.config.AbstractConfig) diff --git a/generators/hls-example/Makefile b/generators/hls-example/Makefile new file mode 100644 index 0000000000..bde615aa6f --- /dev/null +++ b/generators/hls-example/Makefile @@ -0,0 +1,23 @@ +base_dir=$(abspath .) +hls_gendir=$(base_dir)/proj_gcd_example/solution1/syn/verilog +vsrc_dir=$(base_dir)/src/main/resources/vsrc + +.PHONY: default $(RUN_HLS) clean + +HLS_CMD = vitis_hls +TCL_SCRIPT = run_hls.tcl +RUN_HLS = vitis_hls.log +ACCEL_C = accel/HLSAccel.cpp +ACCEL_H = accel/HLSAccel.hpp + +default: $(RUN_HLS) + +$(RUN_HLS): $(ACCEL_C) $(ACCEL_H) $(TCL_SCRIPT) + $(HLS_CMD) $(TCL_SCRIPT) + mkdir -p $(vsrc_dir) + cp -r $(hls_gendir)/. $(vsrc_dir) + +clean: + rm -rf proj_gcd_example + rm -f vitis_hls.log + rm -f $(vsrc_dir)/*.v \ No newline at end of file diff --git a/generators/hls-example/accel/HLSAccel.cpp b/generators/hls-example/accel/HLSAccel.cpp new file mode 100644 index 0000000000..d46e9a1b76 --- /dev/null +++ b/generators/hls-example/accel/HLSAccel.cpp @@ -0,0 +1,19 @@ +#include "HLSAccel.hpp" + +io_t HLSAccelBlackBox(io_t x, io_t y) { + io_t tmp; + io_t gcd; + + tmp = y; + gcd = x; + + while(tmp != 0) { + if (gcd > tmp) { + gcd = gcd - tmp; + } else { + tmp = tmp - gcd; + } + } + + return gcd; +} \ No newline at end of file diff --git a/generators/hls-example/accel/HLSAccel.hpp b/generators/hls-example/accel/HLSAccel.hpp new file mode 100644 index 0000000000..dac31809d3 --- /dev/null +++ b/generators/hls-example/accel/HLSAccel.hpp @@ -0,0 +1,17 @@ +#ifndef _GCD_EX_H_ +#define _GCD_EX_H_ + +#include +#include +#include + +#define DATA_WIDTH 32 + +typedef ap_uint io_t; + +// extern "C" { +// io_t gcd_example(io_t x, io_t y); +// } +io_t HLSAccelBlackBox(io_t x, io_t y); + +#endif \ No newline at end of file diff --git a/generators/hls-example/run_hls.tcl b/generators/hls-example/run_hls.tcl new file mode 100644 index 0000000000..ab4c5fa369 --- /dev/null +++ b/generators/hls-example/run_hls.tcl @@ -0,0 +1,8 @@ +open_project -reset proj_gcd_example +add_files accel/HLSAccel.cpp +set_top HLSAccelBlackBox +open_solution -reset "solution1" +set_part {xcvu9p-flgb2104-2-i} +create_clock -period 10 +csynth_design +exit \ No newline at end of file diff --git a/generators/hls-example/src/main/scala/HLSExample.scala b/generators/hls-example/src/main/scala/HLSExample.scala new file mode 100644 index 0000000000..bef81e3615 --- /dev/null +++ b/generators/hls-example/src/main/scala/HLSExample.scala @@ -0,0 +1,125 @@ +package hlsaccel + +import sys.process._ + +import chisel3._ +import chisel3.util._ +import chisel3.experimental.{IntParam, BaseModule} +import freechips.rocketchip.amba.axi4._ +import freechips.rocketchip.prci._ +import freechips.rocketchip.subsystem.{BaseSubsystem, PBUS} +import org.chipsalliance.cde.config.{Parameters, Field, Config} +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.regmapper.{HasRegMap, RegField} +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util.UIntIsOneOf + +case class HLSAccelParams( + address: BigInt = 0x4000, + width: Int = 32 +) + +case object HLSAccelKey extends Field[Option[HLSAccelParams]](None) + +class HLSAccelIO(val w: Int) extends Bundle { + val ap_clk = Input(Clock()) + val ap_rst = Input(Reset()) + val ap_start = Input(Bool()) + val ap_done = Output(Bool()) + val ap_idle = Output(Bool()) + val ap_ready = Output(Bool()) + val x = Input(UInt(w.W)) + val y = Input(UInt(w.W)) + val ap_return = Output(UInt(w.W)) +} + +// DOC include start: HLS blackbox +class HLSAccelBlackBox(val w: Int) extends BlackBox with HasBlackBoxPath { + val io = IO(new HLSAccelIO(w)) + + val chipyardDir = System.getProperty("user.dir") + val hlsDir = s"$chipyardDir/generators/hls-example" + + // Run HLS command + val make = s"make -C ${hlsDir} default" + require (make.! == 0, "Failed to run HLS") + + // Add each vlog file + addPath(s"$hlsDir/src/main/resources/vsrc/HLSAccelBlackBox.v") + addPath(s"$hlsDir/src/main/resources/vsrc/HLSAccelBlackBox_flow_control_loop_pipe.v") +} +// DOC include end: HLS blackbox + +class HLSAccel(params: HLSAccelParams, beatBytes: Int)(implicit p: Parameters) extends ClockSinkDomain(ClockSinkParameters())(p) { + val device = new SimpleDevice("hlsaccel", Seq("ucbbar,hlsaccel")) + val node = TLRegisterNode(Seq(AddressSet(params.address, 4096-1)), device, "reg/control", beatBytes=beatBytes) + + override lazy val module = new HLSAccelImpl + class HLSAccelImpl extends Impl { + withClockAndReset(clock, reset) { + val x = Reg(UInt(params.width.W)) + val y = Wire(new DecoupledIO(UInt(params.width.W))) + val y_reg = Reg(UInt(params.width.W)) + val gcd = Wire(new DecoupledIO(UInt(params.width.W))) + val gcd_reg = Reg(UInt(params.width.W)) + val status = Wire(UInt(2.W)) + + val impl = Module(new HLSAccelBlackBox(params.width)) + + impl.io.ap_clk := clock + impl.io.ap_rst := reset + + val s_idle :: s_busy :: Nil = Enum(2) + val state = RegInit(s_idle) + val result_valid = RegInit(false.B) + when (state === s_idle && y.valid) { + state := s_busy + result_valid := false.B + y_reg := y.bits + } .elsewhen (state === s_busy && impl.io.ap_done) { + state := s_idle + result_valid := true.B + gcd_reg := impl.io.ap_return + } + + impl.io.ap_start := state === s_busy + + gcd.valid := result_valid + status := Cat(impl.io.ap_idle, result_valid) + + impl.io.x := x + impl.io.y := y_reg + y.ready := impl.io.ap_idle + gcd.bits := gcd_reg + + node.regmap( + 0x00 -> Seq( + RegField.r(2, status)), // a read-only register capturing current status + 0x04 -> Seq( + RegField.w(params.width, x)), // a plain, write-only register + 0x08 -> Seq( + RegField.w(params.width, y)), // write-only, y.valid is set on write + 0x0C -> Seq( + RegField.r(params.width, gcd))) // read-only, gcd.ready is set on read + } + } +} + +trait CanHavePeripheryHLSAccel { this: BaseSubsystem => + private val portName = "hlsaccel" + private val pbus = locateTLBusWrapper(PBUS) + + val hlsacc = p(HLSAccelKey) match { + case Some(params) => { + val acc = LazyModule(new HLSAccel(params, pbus.beatBytes)(p)) + acc.clockNode := pbus.fixedClockNode + pbus.coupleTo(portName) { acc.node := TLFragmenter(pbus.beatBytes, pbus.blockBytes) := _ } + acc + } + case None => None + } +} + +class WithHLSAccel(address: BigInt = 0x4000) extends Config((site, here, up) => { + case HLSAccelKey => Some(HLSAccelParams(address = address)) +}) From 89a1c0bebd8d35343e86c5a4036f2ec3170ec418 Mon Sep 17 00:00:00 2001 From: Ella Schwarz Date: Wed, 18 Sep 2024 16:06:12 -0700 Subject: [PATCH 2/9] Remove commented code --- generators/hls-example/accel/HLSAccel.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/generators/hls-example/accel/HLSAccel.hpp b/generators/hls-example/accel/HLSAccel.hpp index dac31809d3..88ce36d0d9 100644 --- a/generators/hls-example/accel/HLSAccel.hpp +++ b/generators/hls-example/accel/HLSAccel.hpp @@ -9,9 +9,6 @@ typedef ap_uint io_t; -// extern "C" { -// io_t gcd_example(io_t x, io_t y); -// } io_t HLSAccelBlackBox(io_t x, io_t y); #endif \ No newline at end of file From 298cf8a2946e7b8f392f8640d017700f0709b641 Mon Sep 17 00:00:00 2001 From: Ella Schwarz Date: Mon, 23 Sep 2024 11:10:25 -0700 Subject: [PATCH 3/9] Remove HLS accel from CI run-tests --- .github/workflows/chipyard-run-tests.yml | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/.github/workflows/chipyard-run-tests.yml b/.github/workflows/chipyard-run-tests.yml index d9b8244775..31d4900c5b 100644 --- a/.github/workflows/chipyard-run-tests.yml +++ b/.github/workflows/chipyard-run-tests.yml @@ -972,29 +972,6 @@ jobs: group-key: "group-accels" project-key: "chipyard-compressacc" - chipyard-hlsacc-run-tests: - name: chipyard-hlsacc-run-tests - needs: prepare-chipyard-accels - runs-on: as4 - steps: - - name: Delete old checkout - run: | - ls -alh . - rm -rf ${{ github.workspace }}/* || true - rm -rf ${{ github.workspace }}/.* || true - ls -alh . - - name: Checkout - uses: actions/checkout@v4 - - name: Git workaround - uses: ./.github/actions/git-workaround - - name: Create conda env - uses: ./.github/actions/create-conda-env - - name: Run tests - uses: ./.github/actions/run-tests - with: - group-key: "group-accels" - project-key: "chipyard-hlsacc" - tracegen-boomv3-run-tests: name: tracegen-boomv3-run-tests From e5ac97845bea2f6708bb2d83e678278f02eeaceb Mon Sep 17 00:00:00 2001 From: Ella Schwarz Date: Mon, 23 Sep 2024 16:41:11 -0700 Subject: [PATCH 4/9] Compress C accel into 1 file + fix spacing --- generators/hls-example/Makefile | 8 +++---- generators/hls-example/accel/HLSAccel.cpp | 27 +++++++++++++++-------- generators/hls-example/accel/HLSAccel.hpp | 14 ------------ 3 files changed, 21 insertions(+), 28 deletions(-) delete mode 100644 generators/hls-example/accel/HLSAccel.hpp diff --git a/generators/hls-example/Makefile b/generators/hls-example/Makefile index bde615aa6f..a684135eaf 100644 --- a/generators/hls-example/Makefile +++ b/generators/hls-example/Makefile @@ -2,17 +2,15 @@ base_dir=$(abspath .) hls_gendir=$(base_dir)/proj_gcd_example/solution1/syn/verilog vsrc_dir=$(base_dir)/src/main/resources/vsrc -.PHONY: default $(RUN_HLS) clean +.PHONY: default run-hls clean HLS_CMD = vitis_hls TCL_SCRIPT = run_hls.tcl -RUN_HLS = vitis_hls.log ACCEL_C = accel/HLSAccel.cpp -ACCEL_H = accel/HLSAccel.hpp -default: $(RUN_HLS) +default: run-hls -$(RUN_HLS): $(ACCEL_C) $(ACCEL_H) $(TCL_SCRIPT) +run-hls: $(ACCEL_C) $(TCL_SCRIPT) $(HLS_CMD) $(TCL_SCRIPT) mkdir -p $(vsrc_dir) cp -r $(hls_gendir)/. $(vsrc_dir) diff --git a/generators/hls-example/accel/HLSAccel.cpp b/generators/hls-example/accel/HLSAccel.cpp index d46e9a1b76..39b5992775 100644 --- a/generators/hls-example/accel/HLSAccel.cpp +++ b/generators/hls-example/accel/HLSAccel.cpp @@ -1,19 +1,28 @@ -#include "HLSAccel.hpp" +#ifndef _GCD_EX_H_ +#define _GCD_EX_H_ + +#include + +#define DATA_WIDTH 32 + +typedef ap_uint io_t; io_t HLSAccelBlackBox(io_t x, io_t y) { - io_t tmp; - io_t gcd; + io_t tmp; + io_t gcd; - tmp = y; + tmp = y; gcd = x; - while(tmp != 0) { - if (gcd > tmp) { + while(tmp != 0) { + if (gcd > tmp) { gcd = gcd - tmp; } else { tmp = tmp - gcd; } - } + } + + return gcd; +} - return gcd; -} \ No newline at end of file +#endif \ No newline at end of file diff --git a/generators/hls-example/accel/HLSAccel.hpp b/generators/hls-example/accel/HLSAccel.hpp deleted file mode 100644 index 88ce36d0d9..0000000000 --- a/generators/hls-example/accel/HLSAccel.hpp +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _GCD_EX_H_ -#define _GCD_EX_H_ - -#include -#include -#include - -#define DATA_WIDTH 32 - -typedef ap_uint io_t; - -io_t HLSAccelBlackBox(io_t x, io_t y); - -#endif \ No newline at end of file From 61f19f1b195d4899ea5320d6148ec1ce9c0cb352 Mon Sep 17 00:00:00 2001 From: Ella Schwarz Date: Wed, 25 Sep 2024 16:03:48 -0700 Subject: [PATCH 5/9] Move HLS example to chipyard/example --- build.sbt | 7 +- docs/Customization/Incorporating-HLS.rst | 37 ++++-- .../src/main/resources/hls}/HLSAccel.cpp | 2 +- .../chipyard/src/main/resources/hls/Makefile | 21 +++ .../src/main/resources/hls/run_hls.tcl | 11 ++ .../chipyard/src/main/scala/DigitalTop.scala | 1 - .../scala/config/MMIOAcceleratorConfigs.scala | 10 +- .../chipyard/src/main/scala/example/GCD.scala | 106 ++++++++++++++- generators/hls-example/Makefile | 21 --- generators/hls-example/run_hls.tcl | 8 -- .../src/main/scala/HLSExample.scala | 125 ------------------ 11 files changed, 170 insertions(+), 179 deletions(-) rename generators/{hls-example/accel => chipyard/src/main/resources/hls}/HLSAccel.cpp (88%) create mode 100644 generators/chipyard/src/main/resources/hls/Makefile create mode 100644 generators/chipyard/src/main/resources/hls/run_hls.tcl delete mode 100644 generators/hls-example/Makefile delete mode 100644 generators/hls-example/run_hls.tcl delete mode 100644 generators/hls-example/src/main/scala/HLSExample.scala diff --git a/build.sbt b/build.sbt index 26b96f76db..39fd7f214a 100644 --- a/build.sbt +++ b/build.sbt @@ -158,7 +158,7 @@ lazy val chipyard = (project in file("generators/chipyard")) dsptools, rocket_dsp_utils, gemmini, icenet, tracegen, cva6, nvdla, sodor, ibex, fft_generator, constellation, mempress, barf, shuttle, caliptra_aes, rerocc, - compressacc, saturn, ara, firrtl2_bridge, hls_accel) + compressacc, saturn, ara, firrtl2_bridge) .settings(libraryDependencies ++= rocketLibDeps.value) .settings( libraryDependencies ++= Seq( @@ -263,11 +263,6 @@ lazy val rocc_acc_utils = (project in file("generators/rocc-acc-utils")) .settings(libraryDependencies ++= rocketLibDeps.value) .settings(commonSettings) -lazy val hls_accel = (project in file("generators/hls-example")) - .dependsOn(rocketchip) - .settings(libraryDependencies ++= rocketLibDeps.value) - .settings(commonSettings) - lazy val tapeout = (project in file("./tools/tapeout/")) .settings(chisel3Settings) // stuck on chisel3 and SFC .settings(commonSettings) diff --git a/docs/Customization/Incorporating-HLS.rst b/docs/Customization/Incorporating-HLS.rst index f7e643749f..c9b0bc0fdb 100644 --- a/docs/Customization/Incorporating-HLS.rst +++ b/docs/Customization/Incorporating-HLS.rst @@ -10,36 +10,50 @@ circuit to match a specification in a high level language like C. Here, we will integrate an HLS-generated accelerator that computes the Great Common Denominator (GCD) of two integers. This tutorial builds on the sections :ref:`mmio-accelerators` and -:ref:`incorporating-verilog-blocks`. The code for this example can -be found in ``/generators/hls-example`` +:ref:`incorporating-verilog-blocks`. Adding an HLS project --------------------------------------- -In this tutorial, we use Vitis HLS, version 2023.2. +In this tutorial, we use Vitis HLS. The user guide for this tool +can be found at https://docs.amd.com/r/en-US/ug1399-vitis-hls. Our project consists of 3 HLS files: -* C program of the GCD algorithm: ``accel/HLSAccel.cpp`` -* Header file: ``accel/HLSAccel.hpp`` -* TCL script to run Vitis HLS: ``run_hls.tcl`` +* C program of the GCD algorithm: :gh-file-ref:`generators/chipyard/src/main/resources/hls/HLSAccel.cpp` +* TCL script to run Vitis HLS: :gh-file-ref:`generators/chipyard/src/main/resources/hls/run_hls.tcl` +* Makefile to run HLS and move verilog files: :gh-file-ref:`generators/chipyard/src/main/resources/hls/Makefile` + +This example implements an iterative GCD algorithm, which is manually connected to +a TileLink register node in the ``HLSGCDAccel`` class in +:gh-file-ref:`generators/chipyard/src/main/scala/GCD.scala`. +HLS also supports adding AXI nodes to accelerators using compiler directives and +the HLS stream library. See the Vitis HLS user guide for AXI implementation information. + +The HLS code is synthesized for a particular FPGA target, in this case, +an AMD Alveo U200. The target FPGA part is specified in ``run_hls.tcl`` using +the ``set_part command``. The clock period, used for design optimization purposes, +is also set in ``run_hls.tcl`` using the ``create_clock`` command. To generate the verilog files, as well as synthesis reports, run: .. code-block:: none + vitis_hls run_hls.tcl The files can be found in a generated folder named proj\_\, in our case, ``proj_gcd_example``. -In our case, we include a ``Makefile`` to script running HLS. To generate the -verilog files using the Makefile, run: +In our case, we include a ``Makefile`` to run HLS and to move files to +their intended locations. To generate the verilog files using the Makefile, run: .. code-block:: none + make To delete the generated files, run: .. code-block:: none + make clean Creating the Verilog black box @@ -49,11 +63,11 @@ Creating the Verilog black box Please consult :ref:`incorporating-verilog-blocks` for background information on writing a Verilog black box. -We use Scala to run ``make``, which runs HLS and copies the files into ``hls-example/src/main/resources/vsrc``. +We use Scala to run ``make``, which runs HLS and copies the files into :gh-file-ref:`generators/chipyard/src/main/resources/vsrc`. Then, we add the path to each file. This code will execute during Chisel elaboration, conveniently handling file generation for the user. -.. literalinclude:: ../../generators/hls-example/src/main/scala/example/HLSExample.scala +.. literalinclude:: ../../generators/chipyard/src/main/scala/example/GCD.scala :language: scala :start-after: DOC include start: HLS blackbox :end-before: DOC include end: HLS blackbox @@ -61,9 +75,10 @@ file generation for the user. Running the example --------------------------------------- -To test if the accelerator works, use the test program in ``tests/gcd.c``. +To test if the accelerator works, use the test program in :gh-file-ref:`tests/gcd.c`. Compile the program with ``make``. Then, run: .. code-block:: none + cd sims/vcs make run-binary CONFIG=HLSAcceleratorRocketConfig BINARY=../../tests/gcd.riscv \ No newline at end of file diff --git a/generators/hls-example/accel/HLSAccel.cpp b/generators/chipyard/src/main/resources/hls/HLSAccel.cpp similarity index 88% rename from generators/hls-example/accel/HLSAccel.cpp rename to generators/chipyard/src/main/resources/hls/HLSAccel.cpp index 39b5992775..41e58a4a5f 100644 --- a/generators/hls-example/accel/HLSAccel.cpp +++ b/generators/chipyard/src/main/resources/hls/HLSAccel.cpp @@ -7,7 +7,7 @@ typedef ap_uint io_t; -io_t HLSAccelBlackBox(io_t x, io_t y) { +io_t HLSGCDAccelBlackBox(io_t x, io_t y) { io_t tmp; io_t gcd; diff --git a/generators/chipyard/src/main/resources/hls/Makefile b/generators/chipyard/src/main/resources/hls/Makefile new file mode 100644 index 0000000000..01ba225b01 --- /dev/null +++ b/generators/chipyard/src/main/resources/hls/Makefile @@ -0,0 +1,21 @@ +base_dir=$(abspath ../../../..) +hls_dir=$(abspath .) +hls_vlog_gendir=$(hls_dir)/proj_gcd_example/solution1/syn/verilog +vsrc_dir=$(base_dir)/src/main/resources/vsrc + +.PHONY: default run-hls clean + +HLS_CMD = vitis_hls +TCL_SCRIPT = run_hls.tcl +ACCEL_C = HLSAccel.cpp + +default: run-hls + +run-hls: $(ACCEL_C) $(TCL_SCRIPT) + $(HLS_CMD) $(TCL_SCRIPT) + cp -r $(hls_vlog_gendir)/. $(vsrc_dir) + +clean: + rm -rf $(hls_dir)/proj_gcd_example + rm -f $(hls_dir)/vitis_hls.log + rm -f $(vsrc_dir)/HLSGCDAccelBlackBox* \ No newline at end of file diff --git a/generators/chipyard/src/main/resources/hls/run_hls.tcl b/generators/chipyard/src/main/resources/hls/run_hls.tcl new file mode 100644 index 0000000000..7b3dd9cd0b --- /dev/null +++ b/generators/chipyard/src/main/resources/hls/run_hls.tcl @@ -0,0 +1,11 @@ +open_project -reset proj_gcd_example +add_files HLSAccel.cpp +set_top HLSGCDAccelBlackBox +open_solution -reset "solution1" + +# Specify FPGA board and clock frequency +set_part {xcu200-fsgd2104-2-e} +create_clock -period 10 + +csynth_design +exit \ No newline at end of file diff --git a/generators/chipyard/src/main/scala/DigitalTop.scala b/generators/chipyard/src/main/scala/DigitalTop.scala index 814e60d9ae..c4fa17aafa 100644 --- a/generators/chipyard/src/main/scala/DigitalTop.scala +++ b/generators/chipyard/src/main/scala/DigitalTop.scala @@ -40,7 +40,6 @@ class DigitalTop(implicit p: Parameters) extends ChipyardSystem with fftgenerator.CanHavePeripheryFFT // Enables optionally having an MMIO-based FFT block with constellation.soc.CanHaveGlobalNoC // Support instantiating a global NoC interconnect with rerocc.CanHaveReRoCCTiles // Support tiles that instantiate rerocc-attached accelerators - with hlsaccel.CanHavePeripheryHLSAccel { override lazy val module = new DigitalTopModule(this) } diff --git a/generators/chipyard/src/main/scala/config/MMIOAcceleratorConfigs.scala b/generators/chipyard/src/main/scala/config/MMIOAcceleratorConfigs.scala index c194730a00..f9418bf359 100644 --- a/generators/chipyard/src/main/scala/config/MMIOAcceleratorConfigs.scala +++ b/generators/chipyard/src/main/scala/config/MMIOAcceleratorConfigs.scala @@ -28,6 +28,11 @@ class GCDAXI4BlackBoxRocketConfig extends Config( new chipyard.config.AbstractConfig) // DOC include end: GCDAXI4BlackBoxRocketConfig +class GCDHLSRocketConfig extends Config( + new chipyard.example.WithHLSGCD ++ + new freechips.rocketchip.rocket.WithNHugeCores(1) ++ + new chipyard.config.AbstractConfig) + // DOC include start: InitZeroRocketConfig class InitZeroRocketConfig extends Config( new chipyard.example.WithInitZero(0x88000000L, 0x1000L) ++ // add InitZero @@ -65,8 +70,3 @@ class ManyMMIOAcceleratorRocketConfig extends Config( new chipyard.example.WithStreamingFIR ++ // use top with tilelink-controlled streaming FIR new freechips.rocketchip.rocket.WithNHugeCores(1) ++ new chipyard.config.AbstractConfig) - -class HLSAcceleratorRocketConfig extends Config( - new hlsaccel.WithHLSAccel ++ - new freechips.rocketchip.rocket.WithNHugeCores(1) ++ - new chipyard.config.AbstractConfig) diff --git a/generators/chipyard/src/main/scala/example/GCD.scala b/generators/chipyard/src/main/scala/example/GCD.scala index 7fdb171443..2dca5c4718 100644 --- a/generators/chipyard/src/main/scala/example/GCD.scala +++ b/generators/chipyard/src/main/scala/example/GCD.scala @@ -1,5 +1,7 @@ package chipyard.example +import sys.process._ + import chisel3._ import chisel3.util._ import chisel3.experimental.{IntParam, BaseModule} @@ -17,7 +19,8 @@ case class GCDParams( address: BigInt = 0x4000, width: Int = 32, useAXI4: Boolean = false, - useBlackBox: Boolean = true) + useBlackBox: Boolean = true, + useHLS: Boolean = false) // DOC include end: GCD params // DOC include start: GCD key @@ -37,6 +40,18 @@ class GCDIO(val w: Int) extends Bundle { val busy = Output(Bool()) } +class HLSGCDAccelIO(val w: Int) extends Bundle { + val ap_clk = Input(Clock()) + val ap_rst = Input(Reset()) + val ap_start = Input(Bool()) + val ap_done = Output(Bool()) + val ap_idle = Output(Bool()) + val ap_ready = Output(Bool()) + val x = Input(UInt(w.W)) + val y = Input(UInt(w.W)) + val ap_return = Output(UInt(w.W)) +} + class GCDTopIO extends Bundle { val gcd_busy = Output(Bool()) } @@ -88,6 +103,23 @@ class GCDMMIOChiselModule(val w: Int) extends Module { } // DOC include end: GCD chisel +// DOC include start: HLS blackbox +class HLSGCDAccelBlackBox(val w: Int) extends BlackBox with HasBlackBoxPath { + val io = IO(new HLSGCDAccelIO(w)) + + val chipyardDir = System.getProperty("user.dir") + val hlsDir = s"$chipyardDir/generators/chipyard" + + // Run HLS command + val make = s"make -C ${hlsDir}/src/main/resources/hls default" + require (make.! == 0, "Failed to run HLS") + + // Add each vlog file + addPath(s"$hlsDir/src/main/resources/vsrc/HLSGCDAccelBlackBox.v") + addPath(s"$hlsDir/src/main/resources/vsrc/HLSGCDAccelBlackBox_flow_control_loop_pipe.v") +} +// DOC include end: HLS blackbox + // DOC include start: GCD router class GCDTL(params: GCDParams, beatBytes: Int)(implicit p: Parameters) extends ClockSinkDomain(ClockSinkParameters())(p) { val device = new SimpleDevice("gcd", Seq("ucbbar,gcd")) @@ -190,6 +222,64 @@ class GCDAXI4(params: GCDParams, beatBytes: Int)(implicit p: Parameters) extends } // DOC include end: GCD router +class HLSGCDAccel(params: GCDParams, beatBytes: Int)(implicit p: Parameters) extends ClockSinkDomain(ClockSinkParameters())(p) { + val device = new SimpleDevice("hlsgcdaccel", Seq("ucbbar,hlsgcdaccel")) + val node = TLRegisterNode(Seq(AddressSet(params.address, 4096-1)), device, "reg/control", beatBytes=beatBytes) + + override lazy val module = new HLSGCDAccelImpl + class HLSGCDAccelImpl extends Impl with HasGCDTopIO { + val io = IO(new GCDTopIO) + withClockAndReset(clock, reset) { + val x = Reg(UInt(params.width.W)) + val y = Wire(new DecoupledIO(UInt(params.width.W))) + val y_reg = Reg(UInt(params.width.W)) + val gcd = Wire(new DecoupledIO(UInt(params.width.W))) + val gcd_reg = Reg(UInt(params.width.W)) + val status = Wire(UInt(2.W)) + + val impl = Module(new HLSGCDAccelBlackBox(params.width)) + + impl.io.ap_clk := clock + impl.io.ap_rst := reset + + val s_idle :: s_busy :: Nil = Enum(2) + val state = RegInit(s_idle) + val result_valid = RegInit(false.B) + when (state === s_idle && y.valid) { + state := s_busy + result_valid := false.B + y_reg := y.bits + } .elsewhen (state === s_busy && impl.io.ap_done) { + state := s_idle + result_valid := true.B + gcd_reg := impl.io.ap_return + } + + impl.io.ap_start := state === s_busy + + gcd.valid := result_valid + status := Cat(impl.io.ap_idle, result_valid) + + impl.io.x := x + impl.io.y := y_reg + y.ready := impl.io.ap_idle + gcd.bits := gcd_reg + + io.gcd_busy := !impl.io.ap_idle + + node.regmap( + 0x00 -> Seq( + RegField.r(2, status)), // a read-only register capturing current status + 0x04 -> Seq( + RegField.w(params.width, x)), // a plain, write-only register + 0x08 -> Seq( + RegField.w(params.width, y)), // write-only, y.valid is set on write + 0x0C -> Seq( + RegField.r(params.width, gcd))) // read-only, gcd.ready is set on read + } + } +} + // DOC include start: GCD lazy trait trait CanHavePeripheryGCD { this: BaseSubsystem => private val portName = "gcd" @@ -210,6 +300,15 @@ trait CanHavePeripheryGCD { this: BaseSubsystem => TLFragmenter(pbus.beatBytes, pbus.blockBytes, holdFirstDeny = true) := _ } gcd + } else if (params.useHLS) { + // val gcd = LazyModule( + // if (params.useHLS) new HLSGCDAccel(params, pbus.beatBytes)(p) + // else new GCDTL(params, pbus.beatBytes)(p) + // ) + val gcd = LazyModule(new HLSGCDAccel(params, pbus.beatBytes)(p)) + gcd.clockNode := pbus.fixedClockNode + pbus.coupleTo(portName) { gcd.node := TLFragmenter(pbus.beatBytes, pbus.blockBytes) := _ } + gcd } else { val gcd = LazyModule(new GCDTL(params, pbus.beatBytes)(p)) gcd.clockNode := pbus.fixedClockNode @@ -233,3 +332,8 @@ class WithGCD(useAXI4: Boolean = false, useBlackBox: Boolean = false) extends Co case GCDKey => Some(GCDParams(useAXI4 = useAXI4, useBlackBox = useBlackBox)) }) // DOC include end: GCD config fragment + +// useHLS cannot be used with useAXI4 and useBlackBox +class WithHLSGCD extends Config((site, here, up) => { + case GCDKey => Some(GCDParams(useAXI4 = false, useBlackBox = false, useHLS = true)) +}) \ No newline at end of file diff --git a/generators/hls-example/Makefile b/generators/hls-example/Makefile deleted file mode 100644 index a684135eaf..0000000000 --- a/generators/hls-example/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -base_dir=$(abspath .) -hls_gendir=$(base_dir)/proj_gcd_example/solution1/syn/verilog -vsrc_dir=$(base_dir)/src/main/resources/vsrc - -.PHONY: default run-hls clean - -HLS_CMD = vitis_hls -TCL_SCRIPT = run_hls.tcl -ACCEL_C = accel/HLSAccel.cpp - -default: run-hls - -run-hls: $(ACCEL_C) $(TCL_SCRIPT) - $(HLS_CMD) $(TCL_SCRIPT) - mkdir -p $(vsrc_dir) - cp -r $(hls_gendir)/. $(vsrc_dir) - -clean: - rm -rf proj_gcd_example - rm -f vitis_hls.log - rm -f $(vsrc_dir)/*.v \ No newline at end of file diff --git a/generators/hls-example/run_hls.tcl b/generators/hls-example/run_hls.tcl deleted file mode 100644 index ab4c5fa369..0000000000 --- a/generators/hls-example/run_hls.tcl +++ /dev/null @@ -1,8 +0,0 @@ -open_project -reset proj_gcd_example -add_files accel/HLSAccel.cpp -set_top HLSAccelBlackBox -open_solution -reset "solution1" -set_part {xcvu9p-flgb2104-2-i} -create_clock -period 10 -csynth_design -exit \ No newline at end of file diff --git a/generators/hls-example/src/main/scala/HLSExample.scala b/generators/hls-example/src/main/scala/HLSExample.scala deleted file mode 100644 index bef81e3615..0000000000 --- a/generators/hls-example/src/main/scala/HLSExample.scala +++ /dev/null @@ -1,125 +0,0 @@ -package hlsaccel - -import sys.process._ - -import chisel3._ -import chisel3.util._ -import chisel3.experimental.{IntParam, BaseModule} -import freechips.rocketchip.amba.axi4._ -import freechips.rocketchip.prci._ -import freechips.rocketchip.subsystem.{BaseSubsystem, PBUS} -import org.chipsalliance.cde.config.{Parameters, Field, Config} -import freechips.rocketchip.diplomacy._ -import freechips.rocketchip.regmapper.{HasRegMap, RegField} -import freechips.rocketchip.tilelink._ -import freechips.rocketchip.util.UIntIsOneOf - -case class HLSAccelParams( - address: BigInt = 0x4000, - width: Int = 32 -) - -case object HLSAccelKey extends Field[Option[HLSAccelParams]](None) - -class HLSAccelIO(val w: Int) extends Bundle { - val ap_clk = Input(Clock()) - val ap_rst = Input(Reset()) - val ap_start = Input(Bool()) - val ap_done = Output(Bool()) - val ap_idle = Output(Bool()) - val ap_ready = Output(Bool()) - val x = Input(UInt(w.W)) - val y = Input(UInt(w.W)) - val ap_return = Output(UInt(w.W)) -} - -// DOC include start: HLS blackbox -class HLSAccelBlackBox(val w: Int) extends BlackBox with HasBlackBoxPath { - val io = IO(new HLSAccelIO(w)) - - val chipyardDir = System.getProperty("user.dir") - val hlsDir = s"$chipyardDir/generators/hls-example" - - // Run HLS command - val make = s"make -C ${hlsDir} default" - require (make.! == 0, "Failed to run HLS") - - // Add each vlog file - addPath(s"$hlsDir/src/main/resources/vsrc/HLSAccelBlackBox.v") - addPath(s"$hlsDir/src/main/resources/vsrc/HLSAccelBlackBox_flow_control_loop_pipe.v") -} -// DOC include end: HLS blackbox - -class HLSAccel(params: HLSAccelParams, beatBytes: Int)(implicit p: Parameters) extends ClockSinkDomain(ClockSinkParameters())(p) { - val device = new SimpleDevice("hlsaccel", Seq("ucbbar,hlsaccel")) - val node = TLRegisterNode(Seq(AddressSet(params.address, 4096-1)), device, "reg/control", beatBytes=beatBytes) - - override lazy val module = new HLSAccelImpl - class HLSAccelImpl extends Impl { - withClockAndReset(clock, reset) { - val x = Reg(UInt(params.width.W)) - val y = Wire(new DecoupledIO(UInt(params.width.W))) - val y_reg = Reg(UInt(params.width.W)) - val gcd = Wire(new DecoupledIO(UInt(params.width.W))) - val gcd_reg = Reg(UInt(params.width.W)) - val status = Wire(UInt(2.W)) - - val impl = Module(new HLSAccelBlackBox(params.width)) - - impl.io.ap_clk := clock - impl.io.ap_rst := reset - - val s_idle :: s_busy :: Nil = Enum(2) - val state = RegInit(s_idle) - val result_valid = RegInit(false.B) - when (state === s_idle && y.valid) { - state := s_busy - result_valid := false.B - y_reg := y.bits - } .elsewhen (state === s_busy && impl.io.ap_done) { - state := s_idle - result_valid := true.B - gcd_reg := impl.io.ap_return - } - - impl.io.ap_start := state === s_busy - - gcd.valid := result_valid - status := Cat(impl.io.ap_idle, result_valid) - - impl.io.x := x - impl.io.y := y_reg - y.ready := impl.io.ap_idle - gcd.bits := gcd_reg - - node.regmap( - 0x00 -> Seq( - RegField.r(2, status)), // a read-only register capturing current status - 0x04 -> Seq( - RegField.w(params.width, x)), // a plain, write-only register - 0x08 -> Seq( - RegField.w(params.width, y)), // write-only, y.valid is set on write - 0x0C -> Seq( - RegField.r(params.width, gcd))) // read-only, gcd.ready is set on read - } - } -} - -trait CanHavePeripheryHLSAccel { this: BaseSubsystem => - private val portName = "hlsaccel" - private val pbus = locateTLBusWrapper(PBUS) - - val hlsacc = p(HLSAccelKey) match { - case Some(params) => { - val acc = LazyModule(new HLSAccel(params, pbus.beatBytes)(p)) - acc.clockNode := pbus.fixedClockNode - pbus.coupleTo(portName) { acc.node := TLFragmenter(pbus.beatBytes, pbus.blockBytes) := _ } - acc - } - case None => None - } -} - -class WithHLSAccel(address: BigInt = 0x4000) extends Config((site, here, up) => { - case HLSAccelKey => Some(HLSAccelParams(address = address)) -}) From aa950a8bfae4d789fe07cca932f702d99b41f082 Mon Sep 17 00:00:00 2001 From: Ella Schwarz Date: Thu, 26 Sep 2024 12:34:56 -0700 Subject: [PATCH 6/9] Merge HLS config fragment into existing fragment --- .../scala/config/MMIOAcceleratorConfigs.scala | 2 +- .../chipyard/src/main/scala/example/GCD.scala | 17 ++++++----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/generators/chipyard/src/main/scala/config/MMIOAcceleratorConfigs.scala b/generators/chipyard/src/main/scala/config/MMIOAcceleratorConfigs.scala index f9418bf359..3c8ca45a90 100644 --- a/generators/chipyard/src/main/scala/config/MMIOAcceleratorConfigs.scala +++ b/generators/chipyard/src/main/scala/config/MMIOAcceleratorConfigs.scala @@ -29,7 +29,7 @@ class GCDAXI4BlackBoxRocketConfig extends Config( // DOC include end: GCDAXI4BlackBoxRocketConfig class GCDHLSRocketConfig extends Config( - new chipyard.example.WithHLSGCD ++ + new chipyard.example.WithGCD(useAXI4=false, useBlackBox=false, useHLS=true) ++ new freechips.rocketchip.rocket.WithNHugeCores(1) ++ new chipyard.config.AbstractConfig) diff --git a/generators/chipyard/src/main/scala/example/GCD.scala b/generators/chipyard/src/main/scala/example/GCD.scala index 2dca5c4718..94c434b789 100644 --- a/generators/chipyard/src/main/scala/example/GCD.scala +++ b/generators/chipyard/src/main/scala/example/GCD.scala @@ -301,10 +301,6 @@ trait CanHavePeripheryGCD { this: BaseSubsystem => } gcd } else if (params.useHLS) { - // val gcd = LazyModule( - // if (params.useHLS) new HLSGCDAccel(params, pbus.beatBytes)(p) - // else new GCDTL(params, pbus.beatBytes)(p) - // ) val gcd = LazyModule(new HLSGCDAccel(params, pbus.beatBytes)(p)) gcd.clockNode := pbus.fixedClockNode pbus.coupleTo(portName) { gcd.node := TLFragmenter(pbus.beatBytes, pbus.blockBytes) := _ } @@ -328,12 +324,11 @@ trait CanHavePeripheryGCD { this: BaseSubsystem => // DOC include end: GCD lazy trait // DOC include start: GCD config fragment -class WithGCD(useAXI4: Boolean = false, useBlackBox: Boolean = false) extends Config((site, here, up) => { - case GCDKey => Some(GCDParams(useAXI4 = useAXI4, useBlackBox = useBlackBox)) +class WithGCD(useAXI4: Boolean = false, useBlackBox: Boolean = false, useHLS: Boolean = false) extends Config((site, here, up) => { + case GCDKey => { + // useHLS cannot be used with useAXI4 and useBlackBox + assert(!useHLS || (useHLS && !useAXI4 && !useBlackBox)) + Some(GCDParams(useAXI4 = useAXI4, useBlackBox = useBlackBox, useHLS = useHLS)) + } }) // DOC include end: GCD config fragment - -// useHLS cannot be used with useAXI4 and useBlackBox -class WithHLSGCD extends Config((site, here, up) => { - case GCDKey => Some(GCDParams(useAXI4 = false, useBlackBox = false, useHLS = true)) -}) \ No newline at end of file From 6b14034e30d6c2e38d696dfabb878457613de697 Mon Sep 17 00:00:00 2001 From: Ella Schwarz Date: Thu, 26 Sep 2024 12:36:31 -0700 Subject: [PATCH 7/9] Fix broken path --- docs/Customization/Incorporating-HLS.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Customization/Incorporating-HLS.rst b/docs/Customization/Incorporating-HLS.rst index c9b0bc0fdb..9e09bc2a00 100644 --- a/docs/Customization/Incorporating-HLS.rst +++ b/docs/Customization/Incorporating-HLS.rst @@ -25,7 +25,7 @@ Our project consists of 3 HLS files: This example implements an iterative GCD algorithm, which is manually connected to a TileLink register node in the ``HLSGCDAccel`` class in -:gh-file-ref:`generators/chipyard/src/main/scala/GCD.scala`. +:gh-file-ref:`generators/chipyard/src/main/scala/example/GCD.scala`. HLS also supports adding AXI nodes to accelerators using compiler directives and the HLS stream library. See the Vitis HLS user guide for AXI implementation information. From 669f7ae08cee9ec68e91f8e598e1e90a1b90a97a Mon Sep 17 00:00:00 2001 From: Ella Schwarz Date: Thu, 26 Sep 2024 12:44:50 -0700 Subject: [PATCH 8/9] Fix HLS accel config name --- .github/scripts/defaults.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/defaults.sh b/.github/scripts/defaults.sh index ee1c25bf92..dfecfa5881 100755 --- a/.github/scripts/defaults.sh +++ b/.github/scripts/defaults.sh @@ -41,7 +41,7 @@ mapping["chipyard-rocket"]=" CONFIG=QuadChannelRocketConfig" mapping["chipyard-dmirocket"]=" CONFIG=dmiRocketConfig" mapping["chipyard-mempress"]=" CONFIG=MempressRocketConfig" mapping["chipyard-compressacc"]=" CONFIG=ZstdCompressorRocketConfig" -mapping["chipyard-hlsacc"]=" CONFIG=HLSAcceleratorRocketConfig" +mapping["chipyard-hlsacc"]=" CONFIG=GCDHLSRocketConfig" mapping["chipyard-prefetchers"]=" CONFIG=PrefetchingRocketConfig" mapping["chipyard-digitaltop"]=" TOP=DigitalTop" mapping["chipyard-manymmioaccels"]=" CONFIG=ManyMMIOAcceleratorRocketConfig" From d030088c5bb6b770dba37ec00e5db7c2e89fe005 Mon Sep 17 00:00:00 2001 From: Ella Schwarz Date: Fri, 27 Sep 2024 10:22:29 -0700 Subject: [PATCH 9/9] Fix newline error --- docs/Customization/Incorporating-HLS.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/Customization/Incorporating-HLS.rst b/docs/Customization/Incorporating-HLS.rst index 9e09bc2a00..4351946899 100644 --- a/docs/Customization/Incorporating-HLS.rst +++ b/docs/Customization/Incorporating-HLS.rst @@ -59,9 +59,7 @@ To delete the generated files, run: Creating the Verilog black box --------------------------------------- -.. Note:: This section discusses automatically running HLS within a Verilog black box. -Please consult :ref:`incorporating-verilog-blocks` for background information -on writing a Verilog black box. +.. Note:: This section discusses automatically running HLS within a Verilog black box. Please consult :ref:`incorporating-verilog-blocks` for background information on writing a Verilog black box. We use Scala to run ``make``, which runs HLS and copies the files into :gh-file-ref:`generators/chipyard/src/main/resources/vsrc`. Then, we add the path to each file. This code will execute during Chisel elaboration, conveniently handling