diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile deleted file mode 100644 index f49c338..0000000 --- a/.devcontainer/Dockerfile +++ /dev/null @@ -1,22 +0,0 @@ -FROM docker.io/nvidia/cuda:12.9.0-cudnn-devel-ubuntu24.04 - -# install base dependencies -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get -qq -y install curl lsb-release wget software-properties-common gnupg - -# install llvm 19 -RUN curl -L -O https://apt.llvm.org/llvm.sh && \ - chmod +x llvm.sh && \ - ./llvm.sh 19 - -# install layer 2 depdencncies -RUN DEBIAN_FRONTEND=noninteractive apt-get -qq -y install git pkg-config libssl-dev libpolly-19-dev zlib1g-dev libzstd-dev - -# install rust -RUN curl -sSf -L https://sh.rustup.rs | bash -s -- -y --default-toolchain nightly-2025-03-02 --profile complete -ENV PATH="/root/.cargo/bin:${PATH}" - -# set compliation flags -ENV LD_LIBRARY_PATH="/usr/local/cuda/nvvm/lib64:${LD_LIBRARY_PATH}" -ENV LLVM_LINK_STATIC=1 - diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index 1e03420..0000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "name": "Rust CUDA Development", - "build": { - "dockerfile": "Dockerfile", - "context": "../" - }, - "mounts": [ - "source=${localEnv:HOME}/.ssh,target=/tmp/.ssh-localhost,type=bind,consistency=cached", - "source=${localEnv:HOME}/llvm-build/llvm-19-debug,target=/opt/llvm-19-debug,type=bind,consistency=cached" - ], - "hostRequirements": { - "cpus": 8, - "memory": "32gb", - "storage": "64gb" - }, - "features": { - "ghcr.io/devcontainers/features/sshd:1": { - "version": "latest" - } - }, - "capAdd": [ - "SYS_PTRACE" - ], - "runArgs": [ - "--security-opt", - "seccomp=unconfined" - ], - "customizations": { - "vscode": { - "extensions": [ - "rust-lang.rust-analyzer", - "ms-vscode.cpptools", - "ms-vscode.cmake-tools", - "nvidia.nsight-vscode-edition", - "vadimcn.vscode-lldb" - ], - "settings": { - "rust-analyzer.cargo.features": "all", - "rust-analyzer.checkOnSave": true, - "rust-analyzer.check.command": "clippy" - } - } - }, - "forwardPorts": [], - "postCreateCommand": "rustc --version && nvcc --version", - "remoteUser": "root" -} \ No newline at end of file diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml deleted file mode 100644 index 4d0cf20..0000000 --- a/.github/workflows/ci.yaml +++ /dev/null @@ -1,64 +0,0 @@ -name: Build and Push Docker Images - -on: - push: - branches: - - master - -jobs: - setup: - runs-on: ubuntu-latest - outputs: - repo_name: ${{ steps.repo_name.outputs.lowercase }} - steps: - - name: Set repository name to lowercase - id: repo_name - run: echo "lowercase=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT - - build: - needs: setup - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build Docker image and extract binary - run: | - # Build the Docker image - docker build -f Dockerfile.build -t rust-cuda-builder . - - # Create a container from the image - CONTAINER_ID=$(docker create rust-cuda-builder) - - # Create output directory - mkdir -p artifacts - - # Copy the binary - docker cp $CONTAINER_ID:/app/target/release/gpu_runner ./artifacts/ - docker cp $CONTAINER_ID:/app/target/cuda-builder/nvptx64-nvidia-cuda/release/kernels.ptx ./artifacts/ - echo "Copied to artifacts directory" - - # Remove the container - docker rm $CONTAINER_ID - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - name: gpu_runner - path: artifacts/ diff --git a/.gitignore b/.gitignore index 54088d5..e7f35f4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ target .DS_Store +build diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..e671987 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "rust-analyzer.check.allTargets": false +} \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 912a343..b9185f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + [[package]] name = "adler2" version = "2.0.1" @@ -17,6 +26,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "anyhow" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + [[package]] name = "approx" version = "0.5.1" @@ -32,6 +47,21 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "backtrace" +version = "0.3.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", +] + [[package]] name = "base16ct" version = "0.2.0" @@ -59,7 +89,7 @@ dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools 0.13.0", + "itertools", "log", "prettyplease", "proc-macro2", @@ -85,15 +115,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "build-helper" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdce191bf3fa4995ce948c8c83b4640a1745457a149e73c6db75b4ffe36aad5f" -dependencies = [ - "semver 0.6.0", -] - [[package]] name = "bytemuck" version = "1.23.1" @@ -106,8 +127,6 @@ version = "1.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc" dependencies = [ - "jobserver", - "libc", "shlex", ] @@ -169,15 +188,6 @@ dependencies = [ "libc", ] -[[package]] -name = "crc32fast" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" -dependencies = [ - "cfg-if", -] - [[package]] name = "critical-section" version = "1.2.0" @@ -212,69 +222,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "cuda_builder" -version = "0.3.0" -source = "git+https://github.com/Rust-GPU/Rust-CUDA.git#afb147ed51fbb14b758e10a0a24dbc2311a52b82" -dependencies = [ - "nvvm", - "rustc_codegen_nvvm", - "serde", - "serde_json", -] - -[[package]] -name = "cuda_std" -version = "0.2.2" -source = "git+https://github.com/Rust-GPU/Rust-CUDA.git#afb147ed51fbb14b758e10a0a24dbc2311a52b82" -dependencies = [ - "bitflags", - "cuda_std_macros", - "half", - "paste", - "vek", -] - -[[package]] -name = "cuda_std_macros" -version = "0.2.0" -source = "git+https://github.com/Rust-GPU/Rust-CUDA.git#afb147ed51fbb14b758e10a0a24dbc2311a52b82" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "curl" -version = "0.4.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e2d5c8f48d9c0c23250e52b55e82a6ab4fdba6650c931f5a0a57a43abda812b" -dependencies = [ - "curl-sys", - "libc", - "openssl-probe", - "openssl-sys", - "schannel", - "socket2", - "windows-sys 0.59.0", -] - -[[package]] -name = "curl-sys" -version = "0.4.82+curl-8.14.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4d63638b5ec65f1a4ae945287b3fd035be4554bbaf211901159c9a2a74fb5be" -dependencies = [ - "cc", - "libc", - "libz-sys", - "openssl-sys", - "pkg-config", - "vcpkg", - "windows-sys 0.59.0", -] - [[package]] name = "curve25519-dalek" version = "4.1.3" @@ -304,7 +251,7 @@ dependencies = [ [[package]] name = "cust" version = "0.3.2" -source = "git+https://github.com/Rust-GPU/Rust-CUDA.git#afb147ed51fbb14b758e10a0a24dbc2311a52b82" +source = "git+https://github.com/brandonros/Rust-CUDA.git?rev=10cbfd2bb8fbe54b42b891b270da821512c56409#10cbfd2bb8fbe54b42b891b270da821512c56409" dependencies = [ "bitflags", "bytemuck", @@ -320,7 +267,7 @@ dependencies = [ [[package]] name = "cust_core" version = "0.1.1" -source = "git+https://github.com/Rust-GPU/Rust-CUDA.git#afb147ed51fbb14b758e10a0a24dbc2311a52b82" +source = "git+https://github.com/brandonros/Rust-CUDA.git?rev=10cbfd2bb8fbe54b42b891b270da821512c56409#10cbfd2bb8fbe54b42b891b270da821512c56409" dependencies = [ "cust_derive", "glam", @@ -331,7 +278,7 @@ dependencies = [ [[package]] name = "cust_derive" version = "0.2.0" -source = "git+https://github.com/Rust-GPU/Rust-CUDA.git#afb147ed51fbb14b758e10a0a24dbc2311a52b82" +source = "git+https://github.com/brandonros/Rust-CUDA.git?rev=10cbfd2bb8fbe54b42b891b270da821512c56409#10cbfd2bb8fbe54b42b891b270da821512c56409" dependencies = [ "proc-macro2", "quote", @@ -341,7 +288,7 @@ dependencies = [ [[package]] name = "cust_raw" version = "0.11.3" -source = "git+https://github.com/Rust-GPU/Rust-CUDA.git#afb147ed51fbb14b758e10a0a24dbc2311a52b82" +source = "git+https://github.com/brandonros/Rust-CUDA.git?rev=10cbfd2bb8fbe54b42b891b270da821512c56409#10cbfd2bb8fbe54b42b891b270da821512c56409" dependencies = [ "bimap", "bindgen", @@ -405,28 +352,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - -[[package]] -name = "errno" -version = "0.3.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" -dependencies = [ - "libc", - "windows-sys 0.60.2", -] - -[[package]] -name = "fallible-iterator" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" - [[package]] name = "ff" version = "0.13.1" @@ -443,28 +368,6 @@ version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" -[[package]] -name = "filetime" -version = "0.2.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" -dependencies = [ - "cfg-if", - "libc", - "libredox", - "windows-sys 0.59.0", -] - -[[package]] -name = "flate2" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -484,31 +387,14 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", -] - -[[package]] -name = "getrandom" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasi 0.14.2+wasi-0.2.4", + "wasi", ] [[package]] name = "gimli" -version = "0.30.0" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e1d97fbe9722ba9bbd0c97051c2956e726562b61f86a25a4360398a40edfc9" -dependencies = [ - "fallible-iterator", - "indexmap", - "stable_deref_trait", -] +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glam" @@ -529,8 +415,8 @@ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" name = "gpu_runner" version = "0.1.0" dependencies = [ + "backtrace", "common", - "cuda_builder", "cust", "hex", "logic", @@ -548,22 +434,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "half" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" -dependencies = [ - "cfg-if", - "crunchy", -] - -[[package]] -name = "hashbrown" -version = "0.15.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" - [[package]] name = "hex" version = "0.4.3" @@ -571,29 +441,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] -name = "indexmap" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +name = "inkwell" +version = "0.6.0" +source = "git+https://github.com/brandonros/inkwell?rev=28eca69a39abc81383c1481ee2bb88836c5d1143#28eca69a39abc81383c1481ee2bb88836c5d1143" dependencies = [ - "equivalent", - "hashbrown", + "either", + "inkwell_internals", + "libc", + "llvm-sys", + "once_cell", + "thiserror", ] [[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +name = "inkwell_internals" +version = "0.11.0" +source = "git+https://github.com/brandonros/inkwell?rev=28eca69a39abc81383c1481ee2bb88836c5d1143#28eca69a39abc81383c1481ee2bb88836c5d1143" dependencies = [ - "either", + "proc-macro2", + "quote", + "syn", ] [[package]] name = "itertools" -version = "0.14.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] @@ -604,16 +478,6 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" -[[package]] -name = "jobserver" -version = "0.1.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" -dependencies = [ - "getrandom 0.3.3", - "libc", -] - [[package]] name = "k256" version = "0.13.4" @@ -630,7 +494,6 @@ dependencies = [ name = "kernels" version = "0.1.0" dependencies = [ - "cuda_std", "logic", ] @@ -663,34 +526,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] -name = "libredox" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" -dependencies = [ - "bitflags", - "libc", - "redox_syscall", -] - -[[package]] -name = "libz-sys" -version = "1.1.22" +name = "llvm-sys" +version = "191.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d" +checksum = "893cddf1adf0354b93411e413553dd4daf5c43195d73f1acfa1e394bdd371456" dependencies = [ + "anyhow", "cc", + "lazy_static", "libc", - "pkg-config", - "vcpkg", + "regex-lite", + "semver", ] -[[package]] -name = "linux-raw-sys" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" - [[package]] name = "log" version = "0.4.27" @@ -710,26 +558,6 @@ dependencies = [ "tiny-keccak", ] -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "matchers" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" -dependencies = [ - "regex-automata 0.1.10", -] - [[package]] name = "memchr" version = "2.7.5" @@ -767,16 +595,6 @@ dependencies = [ "minimal-lexical", ] -[[package]] -name = "nu-ansi-term" -version = "0.46.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" -dependencies = [ - "overload", - "winapi", -] - [[package]] name = "num-integer" version = "0.1.46" @@ -796,23 +614,13 @@ dependencies = [ "libm", ] -[[package]] -name = "nvvm" -version = "0.1.1" -source = "git+https://github.com/Rust-GPU/Rust-CUDA.git#afb147ed51fbb14b758e10a0a24dbc2311a52b82" -dependencies = [ - "cust_raw", -] - [[package]] name = "object" version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ - "flate2", "memchr", - "ruzstd", ] [[package]] @@ -825,42 +633,6 @@ dependencies = [ "portable-atomic", ] -[[package]] -name = "openssl-probe" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" - -[[package]] -name = "openssl-sys" -version = "0.9.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - -[[package]] -name = "pin-project-lite" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" - [[package]] name = "pkcs8" version = "0.10.2" @@ -871,12 +643,6 @@ dependencies = [ "spki", ] -[[package]] -name = "pkg-config" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" - [[package]] name = "portable-atomic" version = "1.11.1" @@ -894,9 +660,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.34" +version = "0.2.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6837b9e10d61f45f987d50808f83d1ee3d206c66acf650c3e4ae2e1f6ddedf55" +checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a" dependencies = [ "proc-macro2", "syn", @@ -920,12 +686,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "r-efi" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" - [[package]] name = "rand" version = "0.8.5" @@ -953,7 +713,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom", ] [[package]] @@ -971,15 +731,6 @@ dependencies = [ "rand_core 0.9.3", ] -[[package]] -name = "redox_syscall" -version = "0.5.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" -dependencies = [ - "bitflags", -] - [[package]] name = "regex" version = "1.11.1" @@ -988,17 +739,8 @@ checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.9", - "regex-syntax 0.8.5", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -dependencies = [ - "regex-syntax 0.6.29", + "regex-automata", + "regex-syntax", ] [[package]] @@ -1009,14 +751,14 @@ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.5", + "regex-syntax", ] [[package]] -name = "regex-syntax" -version = "0.6.29" +name = "regex-lite" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" [[package]] name = "regex-syntax" @@ -1036,69 +778,13 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" -[[package]] -name = "rustc_codegen_nvvm" -version = "0.3.0" -source = "git+https://github.com/Rust-GPU/Rust-CUDA.git#afb147ed51fbb14b758e10a0a24dbc2311a52b82" -dependencies = [ - "bitflags", - "build-helper", - "cc", - "curl", - "gimli", - "itertools 0.14.0", - "libc", - "libloading", - "nvvm", - "object", - "rustc-demangle", - "rustc_codegen_nvvm_macros", - "smallvec", - "tar", - "tracing", - "tracing-subscriber", - "xz", -] - -[[package]] -name = "rustc_codegen_nvvm_macros" -version = "0.1.0" -source = "git+https://github.com/Rust-GPU/Rust-CUDA.git#afb147ed51fbb14b758e10a0a24dbc2311a52b82" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "rustc_version" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ - "semver 1.0.26", -] - -[[package]] -name = "rustix" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys", - "windows-sys 0.59.0", -] - -[[package]] -name = "ruzstd" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad02996bfc73da3e301efe90b1837be9ed8f4a462b6ed410aa35d00381de89f" -dependencies = [ - "twox-hash", + "semver", ] [[package]] @@ -1107,15 +793,6 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" -[[package]] -name = "schannel" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" -dependencies = [ - "windows-sys 0.59.0", -] - [[package]] name = "sec1" version = "0.7.3" @@ -1130,27 +807,12 @@ dependencies = [ "zeroize", ] -[[package]] -name = "semver" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a3186ec9e65071a2095434b1f5bb24838d4e8e130f584c790f6033c79943537" -dependencies = [ - "semver-parser", -] - [[package]] name = "semver" version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" -[[package]] -name = "semver-parser" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" - [[package]] name = "seq-macro" version = "0.3.6" @@ -1189,15 +851,6 @@ dependencies = [ "serde", ] -[[package]] -name = "sharded-slab" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" -dependencies = [ - "lazy_static", -] - [[package]] name = "shlex" version = "1.3.0" @@ -1213,22 +866,6 @@ dependencies = [ "rand_core 0.6.4", ] -[[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" - -[[package]] -name = "socket2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - [[package]] name = "spki" version = "0.7.3" @@ -1239,18 +876,6 @@ dependencies = [ "der", ] -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - [[package]] name = "subtle" version = "2.6.1" @@ -1259,9 +884,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.103" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", @@ -1269,23 +894,23 @@ dependencies = [ ] [[package]] -name = "tar" -version = "0.4.44" +name = "thiserror" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" dependencies = [ - "filetime", - "libc", - "xattr", + "thiserror-impl", ] [[package]] -name = "thread_local" -version = "1.1.9" +name = "thiserror-impl" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ - "cfg-if", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -1298,74 +923,10 @@ dependencies = [ ] [[package]] -name = "tracing" -version = "0.1.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" -dependencies = [ - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tracing-core" -version = "0.1.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" -dependencies = [ - "once_cell", - "valuable", -] - -[[package]] -name = "tracing-log" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" -dependencies = [ - "log", - "once_cell", - "tracing-core", -] - -[[package]] -name = "tracing-subscriber" -version = "0.3.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" -dependencies = [ - "matchers", - "nu-ansi-term", - "once_cell", - "regex", - "sharded-slab", - "smallvec", - "thread_local", - "tracing", - "tracing-core", - "tracing-log", -] - -[[package]] -name = "twox-hash" -version = "1.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +name = "transpiler" +version = "0.1.0" dependencies = [ - "cfg-if", - "static_assertions", + "inkwell", ] [[package]] @@ -1380,18 +941,6 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" -[[package]] -name = "valuable" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" - -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - [[package]] name = "vek" version = "0.17.1" @@ -1416,64 +965,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" -dependencies = [ - "wit-bindgen-rt", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-sys" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.2", -] - [[package]] name = "windows-targets" version = "0.52.6" @@ -1602,43 +1093,6 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] - -[[package]] -name = "xattr" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d65cbf2f12c15564212d48f4e3dfb87923d25d611f2aed18f4cb23f0413d89e" -dependencies = [ - "libc", - "rustix", -] - -[[package]] -name = "xz" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c887690ff2a2e233e8e49633461521f98ec57fbff9d59a884c9a4f04ec1da34" -dependencies = [ - "xz2", -] - -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - [[package]] name = "zerocopy" version = "0.8.26" diff --git a/Cargo.toml b/Cargo.toml index b66ba39..a01f17d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,17 +5,13 @@ members = [ "cpu_runner", "gpu_runner", "kernels", - "logic" + "logic", + "transpiler" ] [patch.crates-io] zeroize = { git = "https://github.com/brandonros/utils", rev = "4c9734e1ae4a6cb61db4693f9d164866f084ca97" } -#[patch."https://github.com/Rust-GPU/Rust-CUDA.git"] -#cust = { git = "https://github.com/brandonros/Rust-CUDA.git", rev = "52791ef418ed844aa7d740a86a027fd4cb774909" } -#cuda_std = { git = "https://github.com/brandonros/Rust-CUDA.git", rev = "52791ef418ed844aa7d740a86a027fd4cb774909" } -#cuda_builder = { git = "https://github.com/brandonros/Rust-CUDA.git", rev = "52791ef418ed844aa7d740a86a027fd4cb774909" } - [profile.dev] panic = "abort" diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..5e3bf16 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +FROM docker.io/nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 + +# set the path for the nvvm library +ENV LD_LIBRARY_PATH="/usr/local/cuda/nvvm/lib64:${LD_LIBRARY_PATH}" + +# install dependencies +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get -qq -y install curl lsb-release wget software-properties-common gnupg git pkg-config libssl-dev zlib1g-dev libzstd-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# install llvm 19 +RUN curl -L -O https://apt.llvm.org/llvm.sh && \ + chmod +x llvm.sh && \ + ./llvm.sh 19 + +# install polly +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get -qq -y install libpolly-19-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# install rust +RUN curl -sSf -L https://sh.rustup.rs | bash -s -- -y --default-toolchain 1.86.0 --profile complete +ENV PATH="/root/.cargo/bin:${PATH}" +ENV CARGO_TARGET_DIR="/root/.cargo/target" +RUN rustup target add riscv64gc-unknown-none-elf diff --git a/Dockerfile.build b/Dockerfile.build deleted file mode 100644 index 86212f5..0000000 --- a/Dockerfile.build +++ /dev/null @@ -1,44 +0,0 @@ -#FROM docker.io/nvidia/cuda:12.9.0-cudnn-devel-ubuntu24.04 -FROM docker.io/nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04 - -# install base dependencies -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get -qq -y install curl lsb-release wget software-properties-common gnupg - -# install llvm 7 -WORKDIR /data/llvm7 -RUN DEBIAN_FRONTEND=noninteractive apt-get -qq -y install clang -RUN curl -sSf -L -O http://security.ubuntu.com/ubuntu/pool/universe/libf/libffi7/libffi7_3.3-5ubuntu1_amd64.deb && \ - curl -sSf -L -O http://mirrors.kernel.org/ubuntu/pool/universe/l/llvm-toolchain-7/llvm-7_7.0.1-12_amd64.deb && \ - curl -sSf -L -O http://mirrors.kernel.org/ubuntu/pool/universe/l/llvm-toolchain-7/llvm-7-dev_7.0.1-12_amd64.deb && \ - curl -sSf -L -O http://mirrors.kernel.org/ubuntu/pool/universe/l/llvm-toolchain-7/libllvm7_7.0.1-12_amd64.deb && \ - curl -sSf -L -O http://mirrors.kernel.org/ubuntu/pool/universe/l/llvm-toolchain-7/llvm-7-runtime_7.0.1-12_amd64.deb && \ - apt-get install -y ./*.deb && \ - ln -s /usr/bin/llvm-config-7 /usr/bin/llvm-config && \ - rm -rf ./*.deb - -# install llvm 19 -# RUN curl -L -O https://apt.llvm.org/llvm.sh && \ -# chmod +x llvm.sh && \ -# ./llvm.sh 19 - -# install layer 2 depdencncies -RUN DEBIAN_FRONTEND=noninteractive apt-get -qq -y install git pkg-config libssl-dev libpolly-19-dev zlib1g-dev libzstd-dev - -# install rust -RUN curl -sSf -L https://sh.rustup.rs | bash -s -- -y --default-toolchain nightly-2025-03-02 --profile complete -ENV PATH="/root/.cargo/bin:${PATH}" - -# set compliation flags -ENV LD_LIBRARY_PATH="/usr/local/cuda/nvvm/lib64:${LD_LIBRARY_PATH}" -ENV LLVM_LINK_STATIC=1 - -# Copy source code into the image -COPY . /app -WORKDIR /app - -# Build the project -RUN . "$HOME/.cargo/env" && cargo build --release -p gpu_runner - -# The binary will be /app/target/release/gpu_runner -# The PTX will be /app/target/cuda-builder/nvptx64-nvidia-cuda/release/kernels.ptx diff --git a/README.md b/README.md index 9077f18..9ea4a11 100644 --- a/README.md +++ b/README.md @@ -9,3 +9,14 @@ cargo run --release -- ethereum-vanity 555555 "" # broken cargo run --release -- bitcoin-vanity bc1qqqqq "" # broken cargo run --release -- shallenge brandonros 000000000000cbaec87e070a04c2eb90644e16f37aab655ccdf683fdda5a6f96 ``` + +## Apple Container + +```shell +container system start +container build -t cuda-12-9-rust-builder +container run --rm -it --memory 8G -v $(pwd):/mnt cuda-12-9-rust-builder +cd /mnt +./scripts/build.sh +container system stop +``` diff --git a/gpu_runner/Cargo.toml b/gpu_runner/Cargo.toml index 97950e5..d7c425b 100644 --- a/gpu_runner/Cargo.toml +++ b/gpu_runner/Cargo.toml @@ -6,9 +6,7 @@ edition = "2024" [dependencies] hex = "0.4.3" rand = "0.8" +backtrace = "0.3" common = { path = "../common" } logic = { path = "../logic" } -cust = { git = "https://github.com/Rust-GPU/Rust-CUDA.git" } - -[build-dependencies] -cuda_builder = { git = "https://github.com/Rust-GPU/Rust-CUDA.git" } +cust = { git = "https://github.com/brandonros/Rust-CUDA.git", rev = "10cbfd2bb8fbe54b42b891b270da821512c56409" } diff --git a/gpu_runner/build.rs b/gpu_runner/build.rs deleted file mode 100644 index 2061c3e..0000000 --- a/gpu_runner/build.rs +++ /dev/null @@ -1,19 +0,0 @@ -use std::env; -use std::path; - -use cuda_builder::CudaBuilder; -use cuda_builder::NvvmArch; - -fn main() { - println!("cargo:rerun-if-changed=build.rs"); - println!("cargo:rerun-if-changed=../kernels"); - println!("cargo:rerun-if-changed=../common"); - println!("cargo:rerun-if-changed=../logic"); - - let out_path = path::PathBuf::from(env::var("OUT_DIR").unwrap()); - CudaBuilder::new("../kernels") - .copy_to(out_path.join("kernels.ptx")) - .arch(NvvmArch::Compute70) - .build() - .unwrap(); -} diff --git a/gpu_runner/src/add.rs b/gpu_runner/src/add.rs new file mode 100644 index 0000000..55630ce --- /dev/null +++ b/gpu_runner/src/add.rs @@ -0,0 +1,68 @@ +use cust::device::Device; +use cust::module::Module; +use cust::prelude::Context; +use cust::stream::{Stream, StreamFlags}; +use cust::util::SliceExt; +use cust::memory::CopyDestination; +use cust::launch; +use std::error::Error; + +pub fn device_main_add( + ordinal: usize, + module: &Module, +) -> Result<(), Box> { + // Initialize device and context + let device = Device::get_device(ordinal as u32)?; + let ctx = Context::new(device)?; + cust::context::CurrentContext::set_current(&ctx)?; + + // Create stream and get kernel function + let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; + let kernel_add = module.get_function("kernel_add")?; + + // Calculate grid/block dimensions + let number_of_streaming_multiprocessors = device.get_attribute(cust::device::DeviceAttribute::MultiprocessorCount)? as usize; + let blocks_per_sm = std::env::var("BLOCKS_PER_SM").unwrap_or("128".to_string()).parse::().unwrap(); + let threads_per_block = std::env::var("THREADS_PER_BLOCK").unwrap_or("256".to_string()).parse::().unwrap(); + let blocks_per_grid = number_of_streaming_multiprocessors * blocks_per_sm; + let operations_per_launch = blocks_per_grid * threads_per_block; + + let data_len = operations_per_launch; + + // Initialize input arrays with predictable data + let input_a: Vec = (0..data_len).map(|i| (i % 1000) as f32).collect(); + let input_b: Vec = (0..data_len).map(|i| ((i + 1) % 1000) as f32).collect(); + + println!("[{ordinal}] Processing {} elements ({} blocks, {} threads per block)", + data_len, blocks_per_grid, threads_per_block); + + // Prepare output buffer + let mut output = vec![0.0f32; data_len]; + + // Transfer data to GPU using SliceExt + let input_a_dev = input_a.as_slice().as_dbuf()?; + let input_b_dev = input_b.as_slice().as_dbuf()?; + let output_dev = output.as_slice().as_dbuf()?; + + // Launch kernel + unsafe { + launch!( + kernel_add<<>>( + input_a_dev.as_device_ptr(), + input_b_dev.as_device_ptr(), + output_dev.as_device_ptr(), + ) + )?; + } + + // Wait for completion and copy result back + stream.synchronize()?; + output_dev.copy_to(&mut output)?; + + println!("[{ordinal}] Computation completed"); + + // print the first 10 elements of the output + println!("[{ordinal}] First 10 elements of the output: {:?}", &output[..10]); + + Ok(()) +} diff --git a/gpu_runner/src/bitcoin.rs b/gpu_runner/src/bitcoin.rs index 2168628..6156340 100644 --- a/gpu_runner/src/bitcoin.rs +++ b/gpu_runner/src/bitcoin.rs @@ -1,6 +1,7 @@ use cust::device::Device; use cust::module::Module; use cust::prelude::Context; +use cust::context::ResourceLimit; use cust::stream::{Stream, StreamFlags}; use cust::util::SliceExt; use cust::memory::CopyDestination; @@ -26,6 +27,12 @@ pub fn device_main_bitcoin_vanity( let device = Device::get_device(ordinal as u32)?; let ctx = Context::new(device)?; cust::context::CurrentContext::set_current(&ctx)?; + + // optionally override stack size + if let Some(stack_size) = std::env::var("STACK_SIZE").ok() { + let stack_size = stack_size.parse::().unwrap(); + cust::context::CurrentContext::set_resource_limit(ResourceLimit::StackSize, stack_size)?; + } let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; let find_bitcoin_vanity_private_key = module.get_function("kernel_find_bitcoin_vanity_private_key")?; diff --git a/gpu_runner/src/ethereum.rs b/gpu_runner/src/ethereum.rs index 61da219..66f34bb 100644 --- a/gpu_runner/src/ethereum.rs +++ b/gpu_runner/src/ethereum.rs @@ -1,5 +1,6 @@ use cust::device::Device; use cust::module::Module; +use cust::context::ResourceLimit; use cust::prelude::Context; use cust::stream::{Stream, StreamFlags}; use cust::util::SliceExt; @@ -18,16 +19,22 @@ pub fn device_main_ethereum_vanity( module: &Module, global_stats: Arc ) -> Result<(), Box> { - let vanity_prefix_bytes = hex::decode(vanity_prefix)?; - let vanity_prefix_bytes = vanity_prefix_bytes.as_slice(); + let vanity_prefix_vec = hex::decode(vanity_prefix)?; + let vanity_prefix_bytes = vanity_prefix_vec.as_slice(); let vanity_prefix_len: usize = vanity_prefix_bytes.len(); - let vanity_suffix_bytes = hex::decode(vanity_suffix)?; - let vanity_suffix_bytes = vanity_suffix_bytes.as_slice(); + let vanity_suffix_vec = hex::decode(vanity_suffix)?; + let vanity_suffix_bytes = vanity_suffix_vec.as_slice(); let vanity_suffix_len: usize = vanity_suffix_bytes.len(); let device = Device::get_device(ordinal as u32)?; let ctx = Context::new(device)?; cust::context::CurrentContext::set_current(&ctx)?; + + // optionally override stack size + if let Some(stack_size) = std::env::var("STACK_SIZE").ok() { + let stack_size = stack_size.parse::().unwrap(); + cust::context::CurrentContext::set_resource_limit(ResourceLimit::StackSize, stack_size)?; + } let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; let find_ethereum_vanity_private_key = module.get_function("kernel_find_ethereum_vanity_private_key")?; diff --git a/gpu_runner/src/main.rs b/gpu_runner/src/main.rs index 4d8e600..9189ff3 100644 --- a/gpu_runner/src/main.rs +++ b/gpu_runner/src/main.rs @@ -1,3 +1,4 @@ +mod add; mod solana; mod bitcoin; mod shallenge; @@ -7,6 +8,8 @@ use cust::device::Device; use cust::module::{Module, ModuleJitOption}; use cust::prelude::Context; use cust::CudaFlags; +use backtrace::Backtrace; + use std::error::Error; use std::sync::{Arc, RwLock}; @@ -15,6 +18,7 @@ use common::SharedBestHash; #[derive(Debug, Clone)] enum Mode { + Add, SolanaVanity { prefix: String, suffix: String }, BitcoinVanity { prefix: String, suffix: String }, EthereumVanity { prefix: String, suffix: String }, @@ -32,16 +36,29 @@ fn device_main( cust::context::CurrentContext::set_current(&ctx)?; println!("[{ordinal}] Loading module..."); - let ptx_path = std::env::var("PTX_PATH") - .map_err(|_| "PTX_PATH environment variable is required")?; - let ptx = std::fs::read_to_string(ptx_path) - .map_err(|e| format!("Failed to read PTX file: {}", e))?; - let module = Module::from_ptx(ptx, &[ - ModuleJitOption::MaxRegisters(256), - ])?; + let module = { + let cubin_path = std::env::var("CUBIN_PATH"); + let ptx_path = std::env::var("PTX_PATH"); + if let Ok(cubin_path) = cubin_path { + let cubin = std::fs::read(cubin_path) + .map_err(|e| format!("Failed to read CUBIN file: {}", e))?; + Module::from_cubin(cubin, &[])? + } else if let Ok(ptx_path) = ptx_path { + let ptx = std::fs::read_to_string(ptx_path) + .map_err(|e| format!("Failed to read PTX file: {}", e))?; + Module::from_ptx(ptx, &[ + ModuleJitOption::MaxRegisters(256), + ])? + } else { + return Err("CUBIN_PATH or PTX_PATH environment variable is required".into()); + } + }; println!("[{ordinal}] Module loaded"); match mode { + Mode::Add => { + add::device_main_add(ordinal, &module) + } Mode::SolanaVanity { prefix, suffix } => { solana::device_main_solana_vanity(ordinal, prefix, suffix, &module, global_stats) } @@ -59,9 +76,17 @@ fn device_main( } fn main() -> Result<(), Box> { + std::panic::set_hook(Box::new(|panic_info| { + let backtrace = Backtrace::new(); + eprintln!("Thread panicked: {}", panic_info); + eprintln!("Backtrace:\n{:?}", backtrace); + })); + let args = std::env::args().collect::>(); - let mode = if args.len() == 4 && args[1] == "solana-vanity" { + let mode = if args.len() == 2 && args[1] == "add" { + Mode::Add + } else if args.len() == 4 && args[1] == "solana-vanity" { let vanity_prefix = args[2].clone(); let vanity_suffix = args[3].clone(); if vanity_prefix.len() > 0 { @@ -98,6 +123,7 @@ fn main() -> Result<(), Box> { Mode::Shallenge { username, target_hash } } else { println!("Usage:"); + println!(" {} add", args[0]); println!(" {} solana-vanity ", args[0]); println!(" {} bitcoin-vanity ", args[0]); println!(" {} ethereum-vanity ", args[0]); @@ -130,10 +156,16 @@ fn main() -> Result<(), Box> { username.len(), 0 // No suffix for shallenge )), + Mode::Add => Arc::new(GlobalStats::new( + num_devices as usize, + 0, // No prefix for add + 0 // No suffix for add + )), }; // Create shared state for shallenge mode let shared_best_hash = match &mode { + Mode::Add => None, Mode::SolanaVanity { .. } => None, Mode::BitcoinVanity { .. } => None, Mode::EthereumVanity { .. } => None, @@ -145,7 +177,11 @@ fn main() -> Result<(), Box> { } }; + // log match &mode { + Mode::Add => { + println!("Running add mode"); + } Mode::SolanaVanity { prefix, suffix } => { println!("Searching for solana vanity key with prefix '{}' and suffix '{}'", prefix, suffix); } @@ -160,22 +196,50 @@ fn main() -> Result<(), Box> { } } + // spawn threads let mut handles = Vec::new(); for i in 0..num_devices as usize { println!("Starting device {}", i); let mode_clone = mode.clone(); let shared_best_hash_clone = shared_best_hash.clone(); let stats_clone = Arc::clone(&global_stats); - handles.push(std::thread::spawn(move || device_main( - i, - mode_clone, - shared_best_hash_clone, - stats_clone - ))); + handles.push(std::thread::spawn(move || -> Result<(), Box> { + // Wrap the device_main call to capture any errors with context + device_main(i, mode_clone, shared_best_hash_clone, stats_clone) + .map_err(|e| { + let bt = Backtrace::new(); + eprintln!("Error in device {}: {}", i, e); + eprintln!("Backtrace:\n{:?}", bt); + e + }) + })); } - for handle in handles { - handle.join().unwrap().unwrap(); + // wait for threads + for (i, handle) in handles.into_iter().enumerate() { + match handle.join() { + Ok(result) => { + if let Err(e) = result { + eprintln!("Device {} returned error: {}", i, e); + return Err(e); + } + } + Err(panic_payload) => { + eprintln!("Device {} thread panicked!", i); + + // Try to extract panic message + let panic_msg = if let Some(s) = panic_payload.downcast_ref::<&str>() { + s.to_string() + } else if let Some(s) = panic_payload.downcast_ref::() { + s.clone() + } else { + "Unknown panic".to_string() + }; + + eprintln!("Panic message: {}", panic_msg); + return Err(format!("Device {} panicked: {}", i, panic_msg).into()); + } + } } Ok(()) diff --git a/kernels/.cargo/config.toml b/kernels/.cargo/config.toml new file mode 100644 index 0000000..a5d635b --- /dev/null +++ b/kernels/.cargo/config.toml @@ -0,0 +1,9 @@ +[build] +target = "riscv64gc-unknown-none-elf" + +[target.riscv64gc-unknown-none-elf] +rustflags = [ + "--emit=llvm-ir", + "-C", "lto=fat", + "-C", "embed-bitcode=yes", +] diff --git a/kernels/Cargo.toml b/kernels/Cargo.toml index 92b99f7..8714036 100644 --- a/kernels/Cargo.toml +++ b/kernels/Cargo.toml @@ -4,8 +4,7 @@ version = "0.1.0" edition = "2024" [lib] -crate-type = ["cdylib", "rlib"] +crate-type = ["staticlib"] [dependencies] logic = { path = "../logic" } -cuda_std = { git = "https://github.com/Rust-GPU/Rust-CUDA.git" } diff --git a/kernels/src/add.rs b/kernels/src/add.rs new file mode 100644 index 0000000..9fdf33b --- /dev/null +++ b/kernels/src/add.rs @@ -0,0 +1,20 @@ +use crate::utilities; + +#[unsafe(no_mangle)] +#[allow(improper_ctypes_definitions, clippy::missing_safety_doc)] +pub unsafe extern "C" fn kernel_add( + input_a_ptr: *const f32, + input_b_ptr: *const f32, + output_ptr: *mut f32, +) { + let thread_idx = utilities::get_thread_idx(); + + // Perform the addition + let a = unsafe { *input_a_ptr.add(thread_idx) }; + let b = unsafe { *input_b_ptr.add(thread_idx) }; + let result = a + b; + + unsafe { + *output_ptr.add(thread_idx) = result; + } +} diff --git a/kernels/src/atomic.rs b/kernels/src/atomic.rs new file mode 100644 index 0000000..41f0751 --- /dev/null +++ b/kernels/src/atomic.rs @@ -0,0 +1,7 @@ +unsafe extern "C" { + fn __nvvm_atomic_add_global_i32(address: *mut u32, val: u32) -> u32; +} + +pub unsafe fn atomic_add_u32(address: *mut u32, val: u32) -> u32 { + unsafe { __nvvm_atomic_add_global_i32(address, val) } +} diff --git a/kernels/src/bitcoin_vanity.rs b/kernels/src/bitcoin_vanity.rs index da33e32..9e4ec12 100644 --- a/kernels/src/bitcoin_vanity.rs +++ b/kernels/src/bitcoin_vanity.rs @@ -1,4 +1,4 @@ -use cuda_std::atomic::intrinsics::atomic_fetch_add_relaxed_u32_device; +use crate::{atomic, utilities}; /// Handle the infrastructure concerns when a match is found unsafe fn handle_bitcoin_vanity_match_found( @@ -16,7 +16,7 @@ unsafe fn handle_bitcoin_vanity_match_found( let found_matches = &mut found_matches_slice[0]; // If first find, copy results to host - if unsafe { atomic_fetch_add_relaxed_u32_device(found_matches, 0) } == 0 { + if unsafe { atomic::atomic_add_u32(found_matches, 0) } == 0 { let found_private_key = unsafe { core::slice::from_raw_parts_mut(found_private_key_ptr, 32) }; let found_public_key = unsafe { core::slice::from_raw_parts_mut(found_public_key_ptr, 33) }; let found_public_key_hash = unsafe { core::slice::from_raw_parts_mut(found_public_key_hash_ptr, 20) }; @@ -33,14 +33,15 @@ unsafe fn handle_bitcoin_vanity_match_found( } // Increment number of found matches - unsafe { atomic_fetch_add_relaxed_u32_device(found_matches, 1) }; + unsafe { atomic::atomic_add_u32(found_matches, 1) }; // TODO: do we need device_fence here? } -#[cuda_std::kernel] +// TODO: kernel +#[unsafe(no_mangle)] #[allow(improper_ctypes_definitions, clippy::missing_safety_doc)] -pub unsafe fn kernel_find_bitcoin_vanity_private_key( +pub unsafe extern "C" fn kernel_find_bitcoin_vanity_private_key( // input vanity_prefix_ptr: *const u8, vanity_prefix_len: usize, @@ -57,7 +58,7 @@ pub unsafe fn kernel_find_bitcoin_vanity_private_key( found_thread_idx_slice_ptr: *mut u32, ) { // Prepare request - let thread_idx = cuda_std::thread::index() as usize; + let thread_idx = utilities::get_thread_idx(); let vanity_prefix = unsafe { core::slice::from_raw_parts(vanity_prefix_ptr, vanity_prefix_len) }; let vanity_suffix = unsafe { core::slice::from_raw_parts(vanity_suffix_ptr, vanity_suffix_len) }; let request = logic::BitcoinVanityKeyRequest { diff --git a/kernels/src/ethereum_vanity.rs b/kernels/src/ethereum_vanity.rs index 7f6e184..d6717e1 100644 --- a/kernels/src/ethereum_vanity.rs +++ b/kernels/src/ethereum_vanity.rs @@ -1,4 +1,4 @@ -use cuda_std::atomic::intrinsics::atomic_fetch_add_relaxed_u32_device; +use crate::{atomic, utilities}; /// Handle the infrastructure concerns when a match is found unsafe fn handle_ethereum_vanity_match_found( @@ -14,7 +14,7 @@ unsafe fn handle_ethereum_vanity_match_found( let found_matches = &mut found_matches_slice[0]; // If first find, copy results to host - if unsafe { atomic_fetch_add_relaxed_u32_device(found_matches, 0) } == 0 { + if unsafe { atomic::atomic_add_u32(found_matches, 0) } == 0 { let found_private_key = unsafe { core::slice::from_raw_parts_mut(found_private_key_ptr, 32) }; let found_public_key = unsafe { core::slice::from_raw_parts_mut(found_public_key_ptr, 64) }; let found_address = unsafe { core::slice::from_raw_parts_mut(found_address_ptr, 20) }; @@ -27,14 +27,15 @@ unsafe fn handle_ethereum_vanity_match_found( } // Increment number of found matches - unsafe { atomic_fetch_add_relaxed_u32_device(found_matches, 1) }; + unsafe { atomic::atomic_add_u32(found_matches, 1) }; // TODO: do we need device_fence here? } -#[cuda_std::kernel] +// TODO: kernel +#[unsafe(no_mangle)] #[allow(improper_ctypes_definitions, clippy::missing_safety_doc)] -pub unsafe fn kernel_find_ethereum_vanity_private_key( +pub unsafe extern "C" fn kernel_find_ethereum_vanity_private_key( // input vanity_prefix_ptr: *const u8, vanity_prefix_len: usize, @@ -49,7 +50,7 @@ pub unsafe fn kernel_find_ethereum_vanity_private_key( found_thread_idx_slice_ptr: *mut u32, ) { // Prepare request - let thread_idx = cuda_std::thread::index() as usize; + let thread_idx = utilities::get_thread_idx(); let vanity_prefix = unsafe { core::slice::from_raw_parts(vanity_prefix_ptr, vanity_prefix_len) }; let vanity_suffix = unsafe { core::slice::from_raw_parts(vanity_suffix_ptr, vanity_suffix_len) }; let request = logic::EthereumVanityKeyRequest { diff --git a/kernels/src/lib.rs b/kernels/src/lib.rs index 4dc9865..af92330 100644 --- a/kernels/src/lib.rs +++ b/kernels/src/lib.rs @@ -2,12 +2,18 @@ extern crate alloc; +mod add; +mod atomic; mod solana_vanity; mod bitcoin_vanity; mod ethereum_vanity; mod shallenge; +mod memory; +mod panic; +mod utilities; pub use solana_vanity::*; pub use bitcoin_vanity::*; pub use ethereum_vanity::*; pub use shallenge::*; +pub use add::*; diff --git a/kernels/src/memory.rs b/kernels/src/memory.rs new file mode 100644 index 0000000..9a1a176 --- /dev/null +++ b/kernels/src/memory.rs @@ -0,0 +1,23 @@ +use alloc::alloc::{GlobalAlloc, Layout}; +use core::ffi::c_void; + +unsafe extern "C" { + // implicitly defined by cuda. + pub fn malloc(size: usize) -> *mut c_void; + + pub fn free(ptr: *mut c_void); +} + +pub struct CUDAAllocator; + +unsafe impl GlobalAlloc for CUDAAllocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + unsafe { malloc(layout.size()) as *mut u8 } + } + unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { + unsafe { free(ptr as *mut _); } + } +} + +#[global_allocator] +pub static GLOBAL_ALLOCATOR: CUDAAllocator = CUDAAllocator; diff --git a/kernels/src/panic.rs b/kernels/src/panic.rs new file mode 100644 index 0000000..48d01fb --- /dev/null +++ b/kernels/src/panic.rs @@ -0,0 +1,10 @@ +use core::panic::PanicInfo; + +unsafe extern "C" { + fn __nvvm_trap() -> !; +} + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + unsafe { __nvvm_trap() }; +} diff --git a/kernels/src/shallenge.rs b/kernels/src/shallenge.rs index 99410d3..bd1ebc1 100644 --- a/kernels/src/shallenge.rs +++ b/kernels/src/shallenge.rs @@ -1,4 +1,4 @@ -use cuda_std::atomic::intrinsics::atomic_fetch_add_relaxed_u32_device; +use crate::{atomic, utilities}; /// Handle the infrastructure concerns when a better hash is found unsafe fn handle_shallenge_match_found( @@ -25,14 +25,15 @@ unsafe fn handle_shallenge_match_found( found_thread_idx_slice[0] = thread_idx as u32; // Increment number of found matches - unsafe { atomic_fetch_add_relaxed_u32_device(found_matches, 1) }; + unsafe { atomic::atomic_add_u32(found_matches, 1) }; // TODO: do we need device_fence here? } -#[cuda_std::kernel] +// TODO: kernel +#[unsafe(no_mangle)] #[allow(improper_ctypes_definitions, clippy::missing_safety_doc)] -pub unsafe fn kernel_find_better_shallenge_nonce( +pub unsafe extern "C" fn kernel_find_better_shallenge_nonce( // input username_ptr: *const u8, username_len: usize, @@ -46,7 +47,7 @@ pub unsafe fn kernel_find_better_shallenge_nonce( found_thread_idx_slice_ptr: *mut u32, ) { // Prepare request - let thread_idx = cuda_std::thread::index() as usize; + let thread_idx = utilities::get_thread_idx(); let username = unsafe { core::slice::from_raw_parts(username_ptr, username_len) }; let target_hash_slice = unsafe { core::slice::from_raw_parts(target_hash_ptr, 32) }; let target_hash: &[u8; 32] = unsafe { &*(target_hash_slice.as_ptr() as *const [u8; 32]) }; diff --git a/kernels/src/solana_vanity.rs b/kernels/src/solana_vanity.rs index b94f1b3..707bc6c 100644 --- a/kernels/src/solana_vanity.rs +++ b/kernels/src/solana_vanity.rs @@ -1,4 +1,4 @@ -use cuda_std::atomic::intrinsics::atomic_fetch_add_relaxed_u32_device; +use crate::{atomic, utilities}; /// Handle the infrastructure concerns when a match is found unsafe fn handle_solana_vanity_match_found( @@ -14,7 +14,7 @@ unsafe fn handle_solana_vanity_match_found( let found_matches = &mut found_matches_slice[0]; // If first find, copy results to host - if unsafe { atomic_fetch_add_relaxed_u32_device(found_matches, 0) } == 0 { + if unsafe { atomic::atomic_add_u32(found_matches, 0) } == 0 { let found_private_key = unsafe { core::slice::from_raw_parts_mut(found_private_key_ptr, 32) }; let found_public_key = unsafe { core::slice::from_raw_parts_mut(found_public_key_ptr, 32) }; let found_bs58_encoded_public_key = unsafe { core::slice::from_raw_parts_mut(found_bs58_encoded_public_key_ptr, 64) }; @@ -27,14 +27,15 @@ unsafe fn handle_solana_vanity_match_found( } // Increment number of found matches - unsafe { atomic_fetch_add_relaxed_u32_device(found_matches, 1) }; + unsafe { atomic::atomic_add_u32(found_matches, 1) }; // TODO: do we need device_fence here? } -#[cuda_std::kernel] +// TODO: kernel +#[unsafe(no_mangle)] #[allow(improper_ctypes_definitions, clippy::missing_safety_doc)] -pub unsafe fn kernel_find_solana_vanity_private_key( +pub unsafe extern "C" fn kernel_find_solana_vanity_private_key( // input vanity_prefix_ptr: *const u8, vanity_prefix_len: usize, @@ -49,7 +50,7 @@ pub unsafe fn kernel_find_solana_vanity_private_key( found_thread_idx_slice_ptr: *mut u32, ) { // Prepare request - let thread_idx = cuda_std::thread::index() as usize; + let thread_idx = utilities::get_thread_idx(); let vanity_prefix = unsafe { core::slice::from_raw_parts(vanity_prefix_ptr, vanity_prefix_len) }; let vanity_suffix = unsafe { core::slice::from_raw_parts(vanity_suffix_ptr, vanity_suffix_len) }; let request = logic::SolanaVanityKeyRequest { diff --git a/kernels/src/utilities.rs b/kernels/src/utilities.rs new file mode 100644 index 0000000..646ea13 --- /dev/null +++ b/kernels/src/utilities.rs @@ -0,0 +1,53 @@ +unsafe extern "C" { + fn __nvvm_thread_idx_x() -> u32; + fn __nvvm_thread_idx_y() -> u32; + fn __nvvm_thread_idx_z() -> u32; + + fn __nvvm_block_dim_x() -> u32; + fn __nvvm_block_dim_y() -> u32; + fn __nvvm_block_dim_z() -> u32; + + fn __nvvm_block_idx_x() -> u32; + fn __nvvm_block_idx_y() -> u32; + fn __nvvm_block_idx_z() -> u32; + + fn __nvvm_grid_dim_x() -> u32; + fn __nvvm_grid_dim_y() -> u32; + fn __nvvm_grid_dim_z() -> u32; + + fn __nvvm_warp_size() -> u32; + + fn __nvvm_block_barrier(); + + fn __nvvm_grid_fence(); + fn __nvvm_device_fence(); + fn __nvvm_system_fence(); +} + +pub fn get_thread_idx() -> usize { + let thread_idx_x = unsafe { __nvvm_thread_idx_x() }; + let thread_idx_y = unsafe { __nvvm_thread_idx_y() }; + let thread_idx_z = unsafe { __nvvm_thread_idx_z() }; + + let block_dim_x = unsafe { __nvvm_block_dim_x() }; + let block_dim_y = unsafe { __nvvm_block_dim_y() }; + let block_dim_z = unsafe { __nvvm_block_dim_z() }; + + let grid_dim_x = unsafe { __nvvm_grid_dim_x() }; + let grid_dim_y = unsafe { __nvvm_grid_dim_y() }; + let _grid_dim_z = unsafe { __nvvm_grid_dim_z() }; + + let block_idx_x = unsafe { __nvvm_block_idx_x() }; + let block_idx_y = unsafe { __nvvm_block_idx_y() }; + let block_idx_z = unsafe { __nvvm_block_idx_z() }; + + let block_dim_product = block_dim_x * block_dim_y * block_dim_z; + let block_id = block_idx_x + block_idx_y * grid_dim_x + + grid_dim_x * grid_dim_y * block_idx_z; + + let thread_idx = block_id * block_dim_product + + (thread_idx_z * (block_dim_x * block_dim_y)) + + (thread_idx_y * block_dim_x) + thread_idx_x; + + thread_idx as usize +} diff --git a/logic/src/sha256.rs b/logic/src/sha256.rs index 8136e2a..eb7d807 100644 --- a/logic/src/sha256.rs +++ b/logic/src/sha256.rs @@ -239,10 +239,10 @@ pub fn sha256_32_from_bytes(input: &[u8; 32]) -> [u8; 32] { pub fn sha256_from_bytes(input: &[u8]) -> [u8; 32] { let hash_words = sha256_variable_length(input); let mut result = [0u8; 32]; - for (i, word) in hash_words.iter().enumerate() { - let bytes = word.to_be_bytes(); - result[i * 4..i * 4 + 4].copy_from_slice(&bytes); - } + seq!(N in 0..8 { + let bytes = hash_words[N].to_be_bytes(); + result[N * 4..N * 4 + 4].copy_from_slice(&bytes); + }); result } diff --git a/nvvm_compiler/Makefile b/nvvm_compiler/Makefile new file mode 100644 index 0000000..42a923d --- /dev/null +++ b/nvvm_compiler/Makefile @@ -0,0 +1,20 @@ +# Simple Makefile for NVVM compiler + +CC = cc +CUDA_PATH = /usr/local/cuda-12.9 +CFLAGS = -I$(CUDA_PATH)/nvvm/include +LDFLAGS = -L$(CUDA_PATH)/nvvm/lib64 -lnvvm +BUILD_DIR = build +TARGET = $(BUILD_DIR)/nvvm_compiler +SOURCE = main.c + +$(TARGET): $(SOURCE) | $(BUILD_DIR) + $(CC) $(SOURCE) $(CFLAGS) $(LDFLAGS) -o $(TARGET) + +$(BUILD_DIR): + mkdir -p $(BUILD_DIR) + +clean: + rm -rf $(BUILD_DIR) + +.PHONY: clean \ No newline at end of file diff --git a/nvvm_compiler/main.c b/nvvm_compiler/main.c new file mode 100644 index 0000000..5aca974 --- /dev/null +++ b/nvvm_compiler/main.c @@ -0,0 +1,190 @@ +#include +#include +#include +#include + +int main(int argc, char* argv[]) { + if (argc != 4) { + fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "Example: %s input.bc libintrinsics.bc compute_75\n", argv[0]); + return 1; + } + + // Read your Rust-generated bitcode + fprintf(stderr, "Reading bitcode from %s\n", argv[1]); + FILE* bitcode_file = fopen(argv[1], "rb"); + if (!bitcode_file) { + fprintf(stderr, "Error: Could not open %s\n", argv[1]); + return 1; + } + + fseek(bitcode_file, 0, SEEK_END); + size_t bitcode_size = ftell(bitcode_file); + rewind(bitcode_file); + char* bitcode = malloc(bitcode_size); + fread(bitcode, 1, bitcode_size, bitcode_file); + fclose(bitcode_file); + + // Read libintrinsics + fprintf(stderr, "Reading libintrinsics from %s\n", argv[2]); + FILE* libintrinsics_file = fopen(argv[2], "rb"); + if (!libintrinsics_file) { + fprintf(stderr, "Error: Could not open %s\n", argv[2]); + return 1; + } + + fseek(libintrinsics_file, 0, SEEK_END); + size_t libintrinsics_size = ftell(libintrinsics_file); + rewind(libintrinsics_file); + char* libintrinsics = malloc(libintrinsics_size); + fread(libintrinsics, 1, libintrinsics_size, libintrinsics_file); + fclose(libintrinsics_file); + + // Prepare architecture option + char arch_option[64]; + snprintf(arch_option, sizeof(arch_option), "-arch=%s", argv[3]); + const char* options[] = { + arch_option, + "-opt=3", + }; + int num_options = sizeof(options) / sizeof(options[0]); + + // Create NVVM program + fprintf(stderr, "Creating NVVM program\n"); + nvvmProgram prog; + nvvmResult result = nvvmCreateProgram(&prog); + if (result != NVVM_SUCCESS) { + fprintf(stderr, "Error creating NVVM program: %d\n", result); + free(bitcode); + free(libintrinsics); + return 1; + } + + // Add your LLVM bitcode module + fprintf(stderr, "Adding module to program\n"); + result = nvvmAddModuleToProgram(prog, bitcode, bitcode_size, "mymodule"); + if (result != NVVM_SUCCESS) { + fprintf(stderr, "Error adding module to program: %d\n", result); + + // Get compilation log for debugging + size_t log_size; + nvvmGetProgramLogSize(prog, &log_size); + if (log_size > 1) { + char* log = malloc(log_size); + nvvmGetProgramLog(prog, log); + fprintf(stderr, "NVVM Log:\n%s\n", log); + free(log); + } + + nvvmDestroyProgram(&prog); + free(bitcode); + free(libintrinsics); + return 1; + } + + // Add libintrinsics + fprintf(stderr, "Adding libintrinsics to program\n"); + result = nvvmLazyAddModuleToProgram(prog, libintrinsics, libintrinsics_size, "libintrinsics"); + if (result != NVVM_SUCCESS) { + fprintf(stderr, "Error adding module to program: %d\n", result); + + // Get compilation log for debugging + size_t log_size; + nvvmGetProgramLogSize(prog, &log_size); + if (log_size > 1) { + char* log = malloc(log_size); + nvvmGetProgramLog(prog, log); + fprintf(stderr, "NVVM Log:\n%s\n", log); + free(log); + } + + nvvmDestroyProgram(&prog); + free(bitcode); + free(libintrinsics); + return 1; + } + + // Verify the program before compilation + fprintf(stderr, "Verifying program with arch: %s\n", argv[3]); + result = nvvmVerifyProgram(prog, num_options, options); + if (result != NVVM_SUCCESS) { + fprintf(stderr, "Error verifying program: %d\n", result); + + // Get verification log for debugging + size_t log_size; + nvvmGetProgramLogSize(prog, &log_size); + if (log_size > 1) { + char* log = malloc(log_size); + nvvmGetProgramLog(prog, log); + fprintf(stderr, "NVVM Verification Log:\n%s\n", log); + free(log); + } + + nvvmDestroyProgram(&prog); + free(bitcode); + free(libintrinsics); + return 1; + } + + // Compile to PTX + fprintf(stderr, "Compiling program with arch: %s\n", argv[3]); + result = nvvmCompileProgram(prog, num_options, options); + if (result != NVVM_SUCCESS) { + fprintf(stderr, "Error compiling program: %d\n", result); + + // Get compilation log for debugging + size_t log_size; + nvvmGetProgramLogSize(prog, &log_size); + if (log_size > 1) { + char* log = malloc(log_size); + nvvmGetProgramLog(prog, log); + fprintf(stderr, "NVVM Log:\n%s\n", log); + free(log); + } + + nvvmDestroyProgram(&prog); + free(bitcode); + free(libintrinsics); + return 1; + } + + // Get PTX result + fprintf(stderr, "Getting compiled result size\n"); + size_t ptx_size; + result = nvvmGetCompiledResultSize(prog, &ptx_size); + if (result != NVVM_SUCCESS) { + fprintf(stderr, "Error getting compiled result size: %d\n", result); + nvvmDestroyProgram(&prog); + free(bitcode); + free(libintrinsics); + return 1; + } + + fprintf(stderr, "Getting compiled result\n"); + char* ptx = malloc(ptx_size); + result = nvvmGetCompiledResult(prog, ptx); + if (result != NVVM_SUCCESS) { + fprintf(stderr, "Error getting compiled result: %d\n", result); + nvvmDestroyProgram(&prog); + free(bitcode); + free(ptx); + return 1; + } + + // Write PTX to stdout + fprintf(stderr, "Writing PTX to stdout\n"); + fwrite(ptx, 1, ptx_size - 1, stdout); // -1 to exclude null terminator + + // Optional: Write stats to stderr so they don't interfere with PTX output + fprintf(stderr, "Successfully compiled LLVM bitcode to PTX!\n"); + fprintf(stderr, "Input: %s (%zu bytes)\n", argv[1], bitcode_size); + fprintf(stderr, "Architecture: %s\n", argv[3]); + fprintf(stderr, "Output: PTX (%zu bytes)\n", ptx_size - 1); + + // Clean up + nvvmDestroyProgram(&prog); + free(bitcode); + free(libintrinsics); + free(ptx); + return 0; +} diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 2f6ca2c..734a379 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,10 +1,2 @@ [toolchain] -channel = "nightly-2025-03-02" -components = [ - "clippy", - "llvm-tools-preview", - "rust-src", - "rustc-dev", - "rustfmt", - "rust-analyzer" -] +channel = "1.86.0" # last version to use llvm v19 diff --git a/rustc-ice-2025-06-20T01_56_29-204251.txt b/rustc-ice-2025-06-20T01_56_29-204251.txt deleted file mode 100644 index 15d0dfc..0000000 --- a/rustc-ice-2025-06-20T01_56_29-204251.txt +++ /dev/null @@ -1,70 +0,0 @@ -thread 'rustc' panicked at /home/brandon/.cargo/git/checkouts/rust-cuda-fad079f24bbca397/52791ef/crates/rustc_codegen_nvvm_v7/src/nvvm.rs:120:9: -Malformed NVVM IR program rejected by libnvvm, dumping verifier log: - -error: Error: DataLayoutError: Unsupported integer alignment [Supported: -i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128].Error: -Example valid data layout: -Error: 32-bit: e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64 -Error: 64-bit: e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64 - - - -If you plan to submit a bug report please re-run the codegen with `RUSTFLAGS="--emit=llvm-ir" and include the .ll file corresponding to the .o file mentioned in the log -stack backtrace: - 0: 0x7f6483a1b4a5 - std::backtrace::Backtrace::create::h4caae72ea1d639e2 - 1: 0x7f6481da9495 - std::backtrace::Backtrace::force_capture::hed7edc9d6077f7f2 - 2: 0x7f6480f18b5e - std[e7ccd300aecc5933]::panicking::update_hook::>::{closure#0} - 3: 0x7f6480f18417 - std[e7ccd300aecc5933]::panicking::update_hook::>::{closure#0} - 4: 0x7f6481dc1da3 - std::panicking::rust_panic_with_hook::h98fc165e90ef379e - 5: 0x7f6481dc1a9a - std::panicking::begin_panic_handler::{{closure}}::h2c1a60d0a908eaec - 6: 0x7f6481dbf2c9 - std::sys::backtrace::__rust_end_short_backtrace::he8aba8f9b7ddf304 - 7: 0x7f6481dc175d - rust_begin_unwind - 8: 0x7f647ea5a680 - core::panicking::panic_fmt::hcbf39f8c1e585f84 - 9: 0x7f647041b1fc - rustc_codegen_nvvm_v7::nvvm::codegen_bitcode_modules::hf5f29fe48bec9963 - 10: 0x7f6470415ca2 - rustc_codegen_nvvm_v7::link::codegen_into_ptx_file::h5689fd9a71d6bc1b - 11: 0x7f6470414f60 - rustc_codegen_nvvm_v7::link::link_exe::hc2a779eb153eb603 - 12: 0x7f6470413ad7 - rustc_codegen_nvvm_v7::link::link::h43d444fd80887b95 - 13: 0x7f647041d56a - ::link::h91cc8270dd61187c - 14: 0x7f64836f7b25 - ::link - 15: 0x7f6483703ba3 - rustc_interface[f15b12210e2206a6]::interface::run_compiler::<(), rustc_driver_impl[b622dd38a520489a]::run_compiler::{closure#0}>::{closure#1} - 16: 0x7f6483559bc8 - std[e7ccd300aecc5933]::sys::backtrace::__rust_begin_short_backtrace::::{closure#1}, ()>::{closure#0}, ()>::{closure#0}::{closure#0}, ()> - 17: 0x7f648355a4b4 - <::spawn_unchecked_::{closure#1}, ()>::{closure#0}, ()>::{closure#0}::{closure#0}, ()>::{closure#1} as core[9e5e7b8b77114b88]::ops::function::FnOnce<()>>::call_once::{shim:vtable#0} - 18: 0x7f648355b8ab - std::sys::pal::unix::thread::Thread::new::thread_start::h20288ab9ea215a81 - 19: 0x7f647d6a81f5 - - 20: 0x7f647d72889c - - 21: 0x0 - - - -rustc version: 1.87.0-nightly (8c392966a 2025-03-01) -platform: x86_64-unknown-linux-gnuthread 'rustc' panicked at /home/brandon/.cargo/git/checkouts/rust-cuda-fad079f24bbca397/52791ef/crates/rustc_codegen_nvvm_v7/src/nvvm.rs:120:9: -Malformed NVVM IR program rejected by libnvvm, dumping verifier log: - -error: Error: DataLayoutError: Unsupported integer alignment [Supported: -i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128].Error: -Example valid data layout: -Error: 32-bit: e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64 -Error: 64-bit: e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64 - - - -If you plan to submit a bug report please re-run the codegen with `RUSTFLAGS="--emit=llvm-ir" and include the .ll file corresponding to the .o file mentioned in the log -stack backtrace: - 0: 0x7f6483a1b4a5 - std::backtrace::Backtrace::create::h4caae72ea1d639e2 - 1: 0x7f6481da9495 - std::backtrace::Backtrace::force_capture::hed7edc9d6077f7f2 - 2: 0x7f6480f18b5e - std[e7ccd300aecc5933]::panicking::update_hook::>::{closure#0} - 3: 0x7f6481dc1da3 - std::panicking::rust_panic_with_hook::h98fc165e90ef379e - 4: 0x7f6481dc1a9a - std::panicking::begin_panic_handler::{{closure}}::h2c1a60d0a908eaec - 5: 0x7f6481dbf2c9 - std::sys::backtrace::__rust_end_short_backtrace::he8aba8f9b7ddf304 - 6: 0x7f6481dc175d - rust_begin_unwind - 7: 0x7f647ea5a680 - core::panicking::panic_fmt::hcbf39f8c1e585f84 - 8: 0x7f647041b1fc - rustc_codegen_nvvm_v7::nvvm::codegen_bitcode_modules::hf5f29fe48bec9963 - 9: 0x7f6470415ca2 - rustc_codegen_nvvm_v7::link::codegen_into_ptx_file::h5689fd9a71d6bc1b - 10: 0x7f6470414f60 - rustc_codegen_nvvm_v7::link::link_exe::hc2a779eb153eb603 - 11: 0x7f6470413ad7 - rustc_codegen_nvvm_v7::link::link::h43d444fd80887b95 - 12: 0x7f647041d56a - ::link::h91cc8270dd61187c - 13: 0x7f64836f7b25 - ::link - 14: 0x7f6483703ba3 - rustc_interface[f15b12210e2206a6]::interface::run_compiler::<(), rustc_driver_impl[b622dd38a520489a]::run_compiler::{closure#0}>::{closure#1} - 15: 0x7f6483559bc8 - std[e7ccd300aecc5933]::sys::backtrace::__rust_begin_short_backtrace::::{closure#1}, ()>::{closure#0}, ()>::{closure#0}::{closure#0}, ()> - 16: 0x7f648355a4b4 - <::spawn_unchecked_::{closure#1}, ()>::{closure#0}, ()>::{closure#0}::{closure#0}, ()>::{closure#1} as core[9e5e7b8b77114b88]::ops::function::FnOnce<()>>::call_once::{shim:vtable#0} - 17: 0x7f648355b8ab - std::sys::pal::unix::thread::Thread::new::thread_start::h20288ab9ea215a81 - 18: 0x7f647d6a81f5 - - 19: 0x7f647d72889c - - 20: 0x0 - diff --git a/scripts/build.sh b/scripts/build.sh new file mode 100755 index 0000000..3287f70 --- /dev/null +++ b/scripts/build.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +set -e + +# set architecture +VIRTUAL_ARCH=compute_120 # rtx5090 blackwell +PHYSICAL_ARCH=sm_120 # rtx5090 blackwell + +# clean +cargo clean + +# build kernels to get the riscv .ll file +pushd kernels +cargo build --target riscv64gc-unknown-none-elf -p kernels --release +popd + +# find the riscv .ll file +RISCV_LL_FILE=$(find $CARGO_TARGET_DIR/riscv64gc-unknown-none-elf/release/deps/kernels-* -type f -name "*.ll") +if [ -z "$RISCV_LL_FILE" ]; then + echo "No .ll file found" + exit 1 +fi + +# replace uwtable attributes due to riscv core being built with unwind and not being recompiled despite panic = "abort" flag? +sed -i 's/ uwtable //g' $RISCV_LL_FILE +sed -i 's/ uwtable//g' $RISCV_LL_FILE + +# transpile riscv .ll to nvptx64 .ll +pushd transpiler +cargo run --release -- $RISCV_LL_FILE +popd + +# mark kernels as ptx_kernel +sed -i 's/define dso_local void @kernel_/define dso_local ptx_kernel void @kernel_/g' /tmp/output.ll + +# convert the ptx .ll files to .bc files +llvm-as-19 /tmp/output.ll -o /tmp/output.bc +llvm-as-19 transpiler/assets/libintrinsics.ll -o /tmp/libintrinsics.bc + +# strip debug info out of the .bc file +opt-19 -strip-debug /tmp/output.bc -o /tmp/output.bc + +# compile the .bc files to .ptx +pushd nvvm_compiler +make clean +make +./build/nvvm_compiler /tmp/output.bc /tmp/libintrinsics.bc $VIRTUAL_ARCH > /tmp/output.ptx +popd + +# compile the .ptx to .cubin +echo "assembling .ptx to .cubin" +ptxas -arch=$PHYSICAL_ARCH -o /tmp/output.cubin /tmp/output.ptx + +# copy back +pushd nvvm_compiler +cp $RISCV_LL_FILE build/ +cp /tmp/output.ll build/ +cp /tmp/output.ptx build/ +cp /tmp/output.cubin build/ +popd diff --git a/scripts/vast-build.sh b/scripts/vast-build.sh deleted file mode 100755 index 265b80a..0000000 --- a/scripts/vast-build.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash - -set -e - -PORT=28564 -HOST=ssh4.vast.ai -USER=root - -# generate key -ssh -o StrictHostKeyChecking=no -p $PORT $USER@$HOST <<'EOF' -if [[ ! -f ~/.ssh/id_rsa ]] -then - ssh-keygen -t rsa -b 3072 -f ~/.ssh/id_rsa -N "" - chmod 600 ~/.ssh/id_rsa -fi -EOF - -# copy it -scp -P $PORT $USER@$HOST:~/.ssh/id_rsa.pub /tmp - -# add it to github -gh ssh-key add /tmp/id_rsa.pub - -# install dependencies -ssh -o StrictHostKeyChecking=no -p $PORT $USER@$HOST <<'EOF' -if ! which pkg-config >/dev/null 2>&1; then - apt update - apt install -y pkg-config libssl-dev zlib1g-dev clang -fi - -# llvm-7 -if [[ ! -f /usr/bin/llvm-config-7 ]] -then - curl -sSf -L -O http://security.ubuntu.com/ubuntu/pool/universe/libf/libffi7/libffi7_3.3-5ubuntu1_amd64.deb && \ - curl -sSf -L -O http://mirrors.kernel.org/ubuntu/pool/universe/l/llvm-toolchain-7/llvm-7_7.0.1-12_amd64.deb && \ - curl -sSf -L -O http://mirrors.kernel.org/ubuntu/pool/universe/l/llvm-toolchain-7/llvm-7-dev_7.0.1-12_amd64.deb && \ - curl -sSf -L -O http://mirrors.kernel.org/ubuntu/pool/universe/l/llvm-toolchain-7/libllvm7_7.0.1-12_amd64.deb && \ - curl -sSf -L -O http://mirrors.kernel.org/ubuntu/pool/universe/l/llvm-toolchain-7/llvm-7-runtime_7.0.1-12_amd64.deb && \ - apt-get install -y ./*.deb && \ - ln -s /usr/bin/llvm-config-7 /usr/bin/llvm-config && \ - rm ./*.deb -fi - -# rust -if [[ ! -f ~/.cargo/env ]] -then - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain nightly-2025-03-02 -fi -EOF - -# build -ssh -o StrictHostKeyChecking=no -p $PORT $USER@$HOST <<'EOF' -# clone -if [[ ! -d ed25519-vanity-rs ]] -then - GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=no" git clone git@github.com:brandonros/ed25519-vanity-rs.git -fi - -# checkout -pushd ed25519-vanity-rs -git fetch -git checkout --force master -git reset --hard origin/master - -# set up environment -. $HOME/.cargo/env - -# build -pushd gpu_runner -cargo build --release -./gpu_runner shallenge brandonros 000000000000cbaec87e070a04c2eb90644e16f37aab655ccdf683fdda5a6f96 -EOF diff --git a/scripts/vast-run.sh b/scripts/vast-run.sh index ee66d70..c33b634 100755 --- a/scripts/vast-run.sh +++ b/scripts/vast-run.sh @@ -2,21 +2,37 @@ set -e -PORT=13580 -HOST=ssh4.vast.ai +PORT=28770 +HOST=ssh8.vast.ai USER=root +scp -P $PORT nvvm_compiler/build/output.cubin $USER@$HOST: + ssh -o StrictHostKeyChecking=no -p $PORT $USER@$HOST <<'EOF' -rm -f gpu_runner -if [[ ! -f gpu_runner ]] +VERSION=1.12.0 + +# check for killall +if ! command -v killall &> /dev/null then - curl -L -O https://github.com/brandonros/ed25519-vanity-rs/releases/download/1.5.0/gpu_runner - curl -L -O https://github.com/brandonros/ed25519-vanity-rs/releases/download/1.5.0/kernels.ptx - chmod +x gpu_runner + apt update + apt install -y psmisc fi -export BLOCKS_PER_SM="1024" -export THREADS_PER_BLOCK="256" -export PTX_PATH="kernels.ptx" + +# cleanup +rm -f gpu_runner +#rm -f output.cubin killall gpu_runner || true -./gpu_runner solana-vanity aaa "" + +# download +curl -L -O https://github.com/brandonros/ed25519-vanity-rs/releases/download/$VERSION/gpu_runner +#curl -L -O https://github.com/brandonros/ed25519-vanity-rs/releases/download/$VERSION/output.cubin +chmod +x gpu_runner + +# run +export BLOCKS_PER_SM="256" +export THREADS_PER_BLOCK="256" +export STACK_SIZE="8192" +export CUBIN_PATH="output.cubin" +./gpu_runner bitcoin-vanity bc1qqqqqq "" +#./gpu_runner ethereum-vanity 55555555 "" EOF diff --git a/transpiler/.cargo/config.toml b/transpiler/.cargo/config.toml new file mode 100644 index 0000000..4b78939 --- /dev/null +++ b/transpiler/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["-L", "/Users/brandon/Applications/zstd/1.5.7/lib"] diff --git a/transpiler/Cargo.lock b/transpiler/Cargo.lock new file mode 100644 index 0000000..dec74f4 --- /dev/null +++ b/transpiler/Cargo.lock @@ -0,0 +1,166 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anyhow" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + +[[package]] +name = "cc" +version = "1.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc" +dependencies = [ + "shlex", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "inkwell" +version = "0.6.0" +source = "git+https://github.com/brandonros/inkwell?rev=28eca69a39abc81383c1481ee2bb88836c5d1143#28eca69a39abc81383c1481ee2bb88836c5d1143" +dependencies = [ + "either", + "inkwell_internals", + "libc", + "llvm-sys", + "once_cell", + "thiserror", +] + +[[package]] +name = "inkwell_internals" +version = "0.11.0" +source = "git+https://github.com/brandonros/inkwell?rev=28eca69a39abc81383c1481ee2bb88836c5d1143#28eca69a39abc81383c1481ee2bb88836c5d1143" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "inkwell_poc" +version = "0.1.0" +dependencies = [ + "inkwell", + "rustc-demangle", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" + +[[package]] +name = "llvm-sys" +version = "191.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "893cddf1adf0354b93411e413553dd4daf5c43195d73f1acfa1e394bdd371456" +dependencies = [ + "anyhow", + "cc", + "lazy_static", + "libc", + "regex-lite", + "semver", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex-lite" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" + +[[package]] +name = "rustc-demangle" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f" + +[[package]] +name = "semver" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "syn" +version = "2.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" diff --git a/transpiler/Cargo.toml b/transpiler/Cargo.toml new file mode 100644 index 0000000..d84c7d3 --- /dev/null +++ b/transpiler/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "transpiler" +version = "0.1.0" +edition = "2024" + +[dependencies] +inkwell = { git = "https://github.com/brandonros/inkwell", rev = "28eca69a39abc81383c1481ee2bb88836c5d1143", features = ["llvm19-1"] } diff --git a/transpiler/assets/libintrinsics.ll b/transpiler/assets/libintrinsics.ll new file mode 100644 index 0000000..2757454 --- /dev/null +++ b/transpiler/assets/libintrinsics.ll @@ -0,0 +1,342 @@ +; This is a hand-written llvm ir module which contains extra functions +; that are easier to write. They mostly contain nvvm intrinsics that are wrapped in new +; functions so that rustc does not think they are llvm intrinsics and so you don't need to always use nightly for that. +; +; if you update this make sure to update libintrinsics.bc by running llvm-as (make sure you are using llvm-7 or it won't work when +; loaded into libnvvm). +source_filename = "libintrinsics" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64-a:8:8" +target triple = "nvptx64-nvidia-cuda" + +; thread ---- + +define i32 @__nvvm_thread_idx_x() #0 { +start: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + ret i32 %0 +} + +define i32 @__nvvm_thread_idx_y() #0 { +start: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() + ret i32 %0 +} + +define i32 @__nvvm_thread_idx_z() #0 { +start: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() + ret i32 %0 +} + +; block dimension ---- + +define i32 @__nvvm_block_dim_x() #0 { +start: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + ret i32 %0 +} + +define i32 @__nvvm_block_dim_y() #0 { +start: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() + ret i32 %0 +} + +define i32 @__nvvm_block_dim_z() #0 { +start: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() + ret i32 %0 +} + +; block idx ---- + +define i32 @__nvvm_block_idx_x() #0 { +start: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() + ret i32 %0 +} + +define i32 @__nvvm_block_idx_y() #0 { +start: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() + ret i32 %0 +} + +define i32 @__nvvm_block_idx_z() #0 { +start: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() + ret i32 %0 +} + +; grid dimension ---- + +define i32 @__nvvm_grid_dim_x() #0 { +start: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() + ret i32 %0 +} + +define i32 @__nvvm_grid_dim_y() #0 { +start: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() + ret i32 %0 +} + +define i32 @__nvvm_grid_dim_z() #0 { +start: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() + ret i32 %0 +} + +; warp ---- + +define i32 @__nvvm_warp_size() #0 { +start: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + ret i32 %0 +} + +declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() +declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() +declare i32 @llvm.nvvm.read.ptx.sreg.tid.z() +declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y() +declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z() +declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() +declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() +declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() +declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() +declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() +declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() +declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() + +; other ---- + +define void @__nvvm_block_barrier() #1 { +start: + call void @llvm.nvvm.barrier0() + ret void +} + +declare void @llvm.nvvm.barrier0() + +define void @__nvvm_grid_fence() #1 { +start: + call void @llvm.nvvm.membar.cta() + ret void +} + +declare void @llvm.nvvm.membar.cta() + +define void @__nvvm_device_fence() #1 { +start: + call void @llvm.nvvm.membar.gl() + ret void +} + +declare void @llvm.nvvm.membar.gl() + +define void @__nvvm_system_fence() #1 { +start: + call void @llvm.nvvm.membar.sys() + ret void +} + +declare void @llvm.nvvm.membar.sys() + +define void @__nvvm_trap() #1 { +start: + call void @llvm.trap() + unreachable + ret void +} + +declare void @llvm.trap() + +; atomic stuff ------------- + +define i32 @__nvvm_atomic_add_global_i32(i32* %ptr, i32 %val) #0 { +start: + %0 = call i32 asm sideeffect "atom.global.add.u32 $0, [$1], $2;", "=r,l,r"(i32* %ptr, i32 %val) + ret i32 %0 +} + +declare i32 @llvm.nvvm.atomic.add.gen.i.global.i32.p0i32(i32*, i32) #1 + +; math stuff ------------- + +define {i8, i1} @__nvvm_i8_addo(i8, i8) #0 { +start: + %2 = sext i8 %0 to i16 + %3 = sext i8 %1 to i16 + %4 = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %2, i16 %3) + %5 = extractvalue {i16, i1} %4, 0 + %6 = extractvalue {i16, i1} %4, 1 + %7 = trunc i16 %5 to i8 + %8 = insertvalue {i8, i1} undef, i8 %7, 0 + %9 = insertvalue {i8, i1} %8, i1 %6, 1 + ret {i8, i1} %9 +} +declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) #0 + +define {i8, i1} @__nvvm_u8_addo(i8, i8) #0 { +start: + %2 = sext i8 %0 to i16 + %3 = sext i8 %1 to i16 + %4 = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %2, i16 %3) + %5 = extractvalue {i16, i1} %4, 0 + %6 = extractvalue {i16, i1} %4, 1 + %7 = trunc i16 %5 to i8 + %8 = insertvalue {i8, i1} undef, i8 %7, 0 + %9 = insertvalue {i8, i1} %8, i1 %6, 1 + ret {i8, i1} %9 +} +declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16) #0 + +define {i8, i1} @__nvvm_i8_subo(i8, i8) #0 { +start: + %2 = sext i8 %0 to i16 + %3 = sext i8 %1 to i16 + %4 = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 %2, i16 %3) + %5 = extractvalue {i16, i1} %4, 0 + %6 = extractvalue {i16, i1} %4, 1 + %7 = trunc i16 %5 to i8 + %8 = insertvalue {i8, i1} undef, i8 %7, 0 + %9 = insertvalue {i8, i1} %8, i1 %6, 1 + ret {i8, i1} %9 +} +declare {i16, i1} @llvm.ssub.with.overflow.i16(i16, i16) #0 + +define {i8, i1} @__nvvm_u8_subo(i8, i8) #0 { +start: + %2 = sext i8 %0 to i16 + %3 = sext i8 %1 to i16 + %4 = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %2, i16 %3) + %5 = extractvalue {i16, i1} %4, 0 + %6 = extractvalue {i16, i1} %4, 1 + %7 = trunc i16 %5 to i8 + %8 = insertvalue {i8, i1} undef, i8 %7, 0 + %9 = insertvalue {i8, i1} %8, i1 %6, 1 + ret {i8, i1} %9 +} +declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16) #0 + +define {i8, i1} @__nvvm_i8_mulo(i8, i8) #0 { +start: + %2 = sext i8 %0 to i16 + %3 = sext i8 %1 to i16 + %4 = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %2, i16 %3) + %5 = extractvalue {i16, i1} %4, 0 + %6 = extractvalue {i16, i1} %4, 1 + %7 = trunc i16 %5 to i8 + %8 = insertvalue {i8, i1} undef, i8 %7, 0 + %9 = insertvalue {i8, i1} %8, i1 %6, 1 + ret {i8, i1} %9 +} +declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) #0 + +define {i8, i1} @__nvvm_u8_mulo(i8, i8) #0 { +start: + %2 = sext i8 %0 to i16 + %3 = sext i8 %1 to i16 + %4 = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %2, i16 %3) + %5 = extractvalue {i16, i1} %4, 0 + %6 = extractvalue {i16, i1} %4, 1 + %7 = trunc i16 %5 to i8 + %8 = insertvalue {i8, i1} undef, i8 %7, 0 + %9 = insertvalue {i8, i1} %8, i1 %6, 1 + ret {i8, i1} %9 +} +declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) #0 + +; This is a bit weird, we need to use functions defined in rust crates (compiler_builtins) +; as intrinsics in the codegen, but we can't directly use their name, otherwise we will have +; really odd and incorrect behavior in the crate theyre defined in. So we need to make a wrapper for them that is opaque +; to the codegen, which is what this is doing. + +define {<2 x i64>, i1} @__nvvm_i128_addo(<2 x i64>, <2 x i64>) #0 { +start: + %2 = call {<2 x i64>, i1} @__rust_i128_addo(<2 x i64> %0, <2 x i64> %1) + ret {<2 x i64>, i1} %2 +} +declare {<2 x i64>, i1} @__rust_i128_addo(<2 x i64>, <2 x i64>) #0 + +define {<2 x i64>, i1} @__nvvm_u128_addo(<2 x i64>, <2 x i64>) #0 { +start: + %2 = call {<2 x i64>, i1} @__rust_u128_addo(<2 x i64> %0, <2 x i64> %1) + ret {<2 x i64>, i1} %2 +} +declare {<2 x i64>, i1} @__rust_u128_addo(<2 x i64>, <2 x i64>) #0 + +define {<2 x i64>, i1} @__nvvm_i128_subo(<2 x i64>, <2 x i64>) #0 { +start: + %2 = call {<2 x i64>, i1} @__rust_i128_subo(<2 x i64> %0, <2 x i64> %1) + ret {<2 x i64>, i1} %2 +} +declare {<2 x i64>, i1} @__rust_i128_subo(<2 x i64>, <2 x i64>) #0 + +define {<2 x i64>, i1} @__nvvm_u128_subo(<2 x i64>, <2 x i64>) #0 { +start: + %2 = call {<2 x i64>, i1} @__rust_u128_subo(<2 x i64> %0, <2 x i64> %1) + ret {<2 x i64>, i1} %2 +} +declare {<2 x i64>, i1} @__rust_u128_subo(<2 x i64>, <2 x i64>) #0 + +define {<2 x i64>, i1} @__nvvm_i128_mulo(<2 x i64>, <2 x i64>) #0 { +start: + %2 = call {<2 x i64>, i1} @__rust_i128_mulo(<2 x i64> %0, <2 x i64> %1) + ret {<2 x i64>, i1} %2 +} +declare {<2 x i64>, i1} @__rust_i128_mulo(<2 x i64>, <2 x i64>) #0 + +define {<2 x i64>, i1} @__nvvm_u128_mulo(<2 x i64>, <2 x i64>) #0 { +start: + %2 = call {<2 x i64>, i1} @__rust_u128_mulo(<2 x i64> %0, <2 x i64> %1) + ret {<2 x i64>, i1} %2 +} +declare {<2 x i64>, i1} @__rust_u128_mulo(<2 x i64>, <2 x i64>) #0 + +; Required because we need to explicitly generate { i32, i1 } for the following intrinsics +; except rustc will not generate them (it will make { i32, i8 }) which libnvvm rejects. + +define { i32, i8 } @__nvvm_warp_shuffle(i32, i32, i32, i32, i32) #1 { +start: + %5 = call { i32, i1 } @llvm.nvvm.shfl.sync.i32(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) + %6 = extractvalue { i32, i1 } %5, 1 + %7 = zext i1 %6 to i8 + %8 = extractvalue { i32, i1 } %5, 0 + %9 = insertvalue { i32, i8 } undef, i32 %8, 0 + %10 = insertvalue { i32, i8 } %9, i8 %7, 1 + ret { i32, i8 } %10 +} + +declare { i32, i1 } @llvm.nvvm.shfl.sync.i32(i32, i32, i32, i32, i32) #1 + +define { i32, i8 } @__nvvm_warp_match_all_32(i32, i32) { +start: + %2 = call { i32, i1 } @llvm.nvvm.match.all.sync.i32(i32 %0, i32 %1) + %3 = extractvalue { i32, i1 } %2, 1 + %4 = zext i1 %3 to i8 + %5 = extractvalue { i32, i1 } %2, 0 + %6 = insertvalue { i32, i8 } undef, i32 %5, 0 + %7 = insertvalue { i32, i8 } %6, i8 %4, 1 + ret { i32, i8 } %7 +} + +declare { i32, i1 } @llvm.nvvm.match.all.sync.i32(i32, i32) #1 + +define { i32, i8 } @__nvvm_warp_match_all_64(i32, i64) { +start: + %2 = call { i32, i1 } @llvm.nvvm.match.all.sync.i64(i32 %0, i64 %1) + %3 = extractvalue { i32, i1 } %2, 1 + %4 = zext i1 %3 to i8 + %5 = extractvalue { i32, i1 } %2, 0 + %6 = insertvalue { i32, i8 } undef, i32 %5, 0 + %7 = insertvalue { i32, i8 } %6, i8 %4, 1 + ret { i32, i8 } %7 +} + +declare { i32, i1 } @llvm.nvvm.match.all.sync.i64(i32, i64) #1 + +attributes #0 = { alwaysinline speculatable } +attributes #1 = { alwaysinline } \ No newline at end of file diff --git a/transpiler/src/main.rs b/transpiler/src/main.rs new file mode 100644 index 0000000..8987bcf --- /dev/null +++ b/transpiler/src/main.rs @@ -0,0 +1,64 @@ +use inkwell::context::Context; +use inkwell::data_layout::DataLayout; +use inkwell::memory_buffer::MemoryBuffer; +use inkwell::module::Module; +use inkwell::targets::TargetTriple; +use inkwell::values::BasicMetadataValueEnum; +use std::fs; + +fn add_nvvm_ir_version<'ctx, 'module>(context: &'ctx Context, module: &'module Module<'ctx>) { + // Add NVVM IR version metadata: !nvvm.ir.version = !{!0} + // !0 = !{i32 2, i32 0} + let i32_type = context.i32_type(); + + // Create constant values for version 2.0 + let major_version = i32_type.const_int(2, false); + let minor_version = i32_type.const_int(0, false); + + // Create metadata node directly with the constant values + let version_metadata = context.metadata_node(&[ + BasicMetadataValueEnum::IntValue(major_version), + BasicMetadataValueEnum::IntValue(minor_version), + ]); + + // Add to named metadata + module.add_global_metadata("nvvmir.version", &version_metadata).unwrap(); +} +fn main() -> Result<(), Box> { + let args: Vec = std::env::args().collect(); + + if args.len() != 2 { + eprintln!("Usage: {} ", args[0]); + std::process::exit(1); + } + + // create context + let context = Context::create(); + + // load riscv64gc-unknown-none-elf llvm ir + let filename = args[1].clone(); + let risc_ir = fs::read(filename)?; + let risc_memory_buffer = MemoryBuffer::create_from_memory_range(&risc_ir, "risc_ir"); + let risc_module = context.create_module_from_ir(risc_memory_buffer)?; + + // jumpstart ptx module from risc module + let ptx_module = risc_module.clone(); + + // Set target triple + let target_triple = "nvptx64-nvidia-cuda"; + ptx_module.set_triple(&TargetTriple::create(target_triple)); + + // Set data layout + let data_layout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64-a:8:8"; + ptx_module.set_data_layout(&DataLayout::create(data_layout)); + + // Add NVVM IR version metadata + add_nvvm_ir_version(&context, &ptx_module); + + // Write to .ll file + let llvm_ir = ptx_module.print_to_string().to_string(); + fs::write("/tmp/output.ll", llvm_ir).expect("Unable to write file"); + println!("LLVM IR written to output.ll"); + + Ok(()) +}