Skip to content

Commit

Permalink
Merge pull request #78 from filecoin-project/feat/opencl
Browse files Browse the repository at this point in the history
Implement pure OpenCL batch hashing.
  • Loading branch information
porcuquine authored Jan 20, 2021
2 parents 6a4071c + 569db95 commit 7f90f5b
Show file tree
Hide file tree
Showing 19 changed files with 749 additions and 112 deletions.
23 changes: 16 additions & 7 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ restore-workspace: &restore-workspace
restore-cache: &restore-cache
restore_cache:
keys:
- cargo-v0-{{ checksum "rust-toolchain" }}-{{ checksum "Cargo.toml" }}-{{ checksum "Cargo.lock" }}-{{ arch }}
- cargo-v1-{{ checksum "rust-toolchain" }}-{{ checksum "Cargo.toml" }}-{{ checksum "Cargo.lock" }}-{{ arch }}
- repo-source-{{ .Branch }}-{{ .Revision }}

commands:
Expand All @@ -35,13 +35,22 @@ commands:
no_output_timeout: 5m
- run:
name: Test (pairing, GPU) (<< parameters.target >>)
command: TARGET=<< parameters.target >> cargo test --release --features gpu -- --test-threads=1
command: TARGET=<< parameters.target >> cargo test --release --no-default-features --features pairing,gpu -- --test-threads=1
no_output_timeout: 30m

- run:
name: Test (blst, GPU) (<< parameters.target >>)
command: TARGET=<< parameters.target >> cargo test --release --no-default-features --features blst,gpu -- --test-threads=1
no_output_timeout: 30m
- run:
name: Test (pairing, opencl) (<< parameters.target >>)
command: TARGET=<< parameters.target >> cargo test --release --no-default-features --features pairing,opencl -- --test-threads=1
no_output_timeout: 30m

- run:
name: Test (blst, opencl) (<< parameters.target >>)
command: TARGET=<< parameters.target >> cargo test --release --no-default-features --features blst,opencl -- --test-threads=1
no_output_timeout: 30m

jobs:
cargo_fetch:
Expand All @@ -63,12 +72,12 @@ jobs:
command: cargo generate-lockfile
- restore_cache:
keys:
- cargo-v0-{{ checksum "rust-toolchain" }}-{{ checksum "Cargo.toml" }}-{{ checksum "Cargo.lock" }}-{{ arch }}
- cargo-v1-{{ checksum "rust-toolchain" }}-{{ checksum "Cargo.toml" }}-{{ checksum "Cargo.lock" }}-{{ arch }}
- run: cargo update
- run: cargo fetch
- run: rustup install $(cat rust-toolchain)
- run: rustup default $(cat rust-toolchain)
- run: rustup install nightly
- run: rustup install nightly-2020-11-18
- run: rustup component add rustfmt-preview
- run: rustup component add clippy-preview
- run: rustc --version
Expand All @@ -78,7 +87,7 @@ jobs:
paths:
- gpuci
- save_cache:
key: cargo-v0-{{ checksum "rust-toolchain" }}-{{ checksum "Cargo.toml" }}-{{ checksum "Cargo.lock" }}-{{ arch }}
key: cargo-v1-{{ checksum "rust-toolchain" }}-{{ checksum "Cargo.toml" }}-{{ checksum "Cargo.lock" }}-{{ arch }}
paths:
- "~/.cargo"
- "~/.rustup"
Expand Down Expand Up @@ -175,10 +184,10 @@ jobs:
- run: sudo apt install -y ocl-icd-opencl-dev
- run:
name: Run cargo release build (pairing, gpu)
command: cargo +nightly build -Zpackage-features --release -p gbench --no-default-features --features pairing,gpu
command: cargo +nightly-2020-11-18 build -Zpackage-features --release -p gbench --no-default-features --features pairing,gpu
- run:
name: Run cargo release build (blst, gpu)
command: cargo +nightly build -Zpackage-features --release -p gbench --no-default-features --features blst,gpu
command: cargo +nightly-2020-11-18 build -Zpackage-features --release -p gbench --no-default-features --features blst,gpu

benches:
executor: default
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://book.async.rs/overview/stability-guarantees.html).

## Unreleased
- Pure OpenCL implementation of batch hashing. (https://github.com/filecoin-project/neptune/pull/78)

## 2.4.0 - 2020-11-17

Expand Down
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@ repository = "https://github.com/filecoin-project/neptune"
lazy_static = "1.4.0"
bellperson = { version = "0.12", default-features = false }
blake2s_simd = "0.5"
blstrs = { version = "0.2.0", optional = true }
byteorder = "1"
ff = { version = "0.2.1", package = "fff" }
generic-array = "0.14.4"
log = "0.4.8"
rust-gpu-tools = { version = "0.3.0", optional = true }
triton = { version = "2.1.0", package = "neptune-triton", default-features = false, features = ["opencl"], optional = true }
itertools = { version = "0.8.0" }
ff-cl-gen = "0.2.0"

[dev-dependencies]
criterion = "0.3"
Expand Down Expand Up @@ -45,6 +48,7 @@ codegen-units = 1
[features]
default = ["pairing"]
gpu = ["triton", "rust-gpu-tools"]
opencl = ["rust-gpu-tools"]
pairing = ["bellperson/pairing"]
blst = ["bellperson/blst"]

Expand Down
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@ proofs (in SNARKs).

Neptune also supports batch hashing and tree building, which can be performed on a GPU. The underlying GPU
implementation, [neptune-triton](https://github.com/filecoin-project/neptune-triton) is implemented in the [Futhark
Programming Language](https://futhark-lang.org/).
Programming Language](https://futhark-lang.org/). To use `neptune-triton` GPU batch hashing, compile `neptune` with the
`gpu` feature.

Neptune now implements GPU batch hashing in pure OpenCL. The initial implementation is a bit less than 2x faster than
the Futhark implementation, so once stabilized this will likely be the preferred option. The pure OpenCL batch hashing
is provided by the internal `proteus` module. To use `proteus`, compile `neptune` with the `opencl` feature.

The `gpu` and `opencl` features are mutually exclusive.

At the time of the 1.0.0 release, Neptune on RTX 2080Ti GPU can build 8-ary Merkle trees for 4GiB of input in 16 seconds.

Expand All @@ -35,7 +42,7 @@ The following are likely areas of future work:

- [x] Support for multiple GPUs.
- [x] Support domain separation tag.
- [ ] Improve throughput (?) by using OpenCL directly.
- [x] Improve throughput (?) by using OpenCL directly.

## History

Expand Down
3 changes: 2 additions & 1 deletion gbench/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@ env_logger = "0.7.1"
ff = { version = "0.2.1", package = "fff" }
generic-array = "0.14.4"
log = "0.4.8"
neptune = { path = "../", default-features = false, features=["gpu"] }
neptune = { path = "../", default-features = false }
rust-gpu-tools = { version = "0.3.0", optional = true }
structopt = { version = "0.3", default-features = false }

[features]
default = ["pairing", "gpu"]
gpu = ["neptune/gpu", "rust-gpu-tools"]
opencl = ["neptune/opencl", "rust-gpu-tools"]
pairing = ["neptune/pairing", "bellperson/pairing"]
blst = ["neptune/blst", "bellperson/blst"]
12 changes: 10 additions & 2 deletions gbench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ struct Opts {
}

fn main() -> Result<(), Error> {
#[cfg(all(feature = "gpu", target_os = "macos"))]
#[cfg(all(any(feature = "gpu", feature = "opencl"), target_os = "macos"))]
unimplemented!("Running on macos is not recommended and may have bad consequences -- experiment at your own risk.");
env_logger::init();

Expand All @@ -127,14 +127,22 @@ fn main() -> Result<(), Error> {

// Comma separated list of GPU bus-ids
let gpus = std::env::var("NEPTUNE_GBENCH_GPUS");

#[cfg(feature = "gpu")]
let default_type = BatcherType::GPU;

#[cfg(feature = "opencl")]
let default_type = BatcherType::OpenCL;

let batcher_types = gpus
.map(|v| {
v.split(",")
.map(|s| s.parse::<u32>().expect("Invalid Bus-Id number!"))
.map(|bus_id| BatcherType::CustomGPU(GPUSelector::BusId(bus_id)))
.collect::<Vec<_>>()
})
.unwrap_or(vec![BatcherType::GPU]);
.unwrap_or(vec![default_type]);

let mut threads = Vec::new();
for batcher_type in batcher_types {
threads.push(thread::spawn(move || {
Expand Down
99 changes: 63 additions & 36 deletions src/batch_hasher.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
use rust_gpu_tools::opencl;
use std::fmt::{self, Debug};
use std::marker::PhantomData;
use std::sync::{Arc, Mutex};

#[cfg(feature = "gpu")]
use crate::cl;
use crate::error::Error;
use crate::poseidon::SimplePoseidonBatchHasher;
#[cfg(feature = "opencl")]
use crate::proteus::gpu::{get_device, CLBatchHasher};
#[cfg(feature = "gpu")]
use crate::triton::cl;
use crate::{Arity, BatchHasher, Strength, DEFAULT_STRENGTH};
use bellperson::bls::Fr;
use generic_array::GenericArray;
Expand All @@ -16,34 +19,50 @@ use triton::FutharkContext;

#[derive(Clone)]
pub enum BatcherType {
#[cfg(feature = "gpu")]
#[cfg(any(feature = "gpu", feature = "opencl"))]
CustomGPU(GPUSelector),
#[cfg(feature = "gpu")]
FromFutharkContext(Arc<Mutex<FutharkContext>>),
#[cfg(feature = "opencl")]
FromDevice(opencl::Device),
#[cfg(feature = "gpu")]
GPU,
CPU,
#[cfg(feature = "opencl")]
OpenCL,
}

impl Debug for BatcherType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_fmt(format_args!("BatcherType::"))?;
match self {
#[cfg(feature = "gpu")]
BatcherType::FromFutharkContext(_) => f.write_fmt(format_args!("FromFutharkContext")),
#[cfg(feature = "opencl")]
BatcherType::FromDevice(_) => f.write_fmt(format_args!("FromDevice")),
#[cfg(any(feature = "gpu", feature = "opencl"))]
BatcherType::CustomGPU(x) => f.write_fmt(format_args!("CustomGPU({:?})", x)),
BatcherType::CPU => f.write_fmt(format_args!("CPU")),
#[cfg(feature = "gpu")]
BatcherType::GPU => f.write_fmt(format_args!("GPU")),
#[cfg(feature = "opencl")]
BatcherType::OpenCL => f.write_fmt(format_args!("OpenCL")),
}
}
}

use crate::gpu::GPUBatchHasher;
#[cfg(feature = "gpu")]
use crate::triton::gpu::GPUBatchHasher;

pub enum Batcher<A>
where
A: Arity<Fr>,
{
#[cfg(feature = "gpu")]
GPU(GPUBatchHasher<A>),
CPU(SimplePoseidonBatchHasher<A>),
#[cfg(feature = "opencl")]
OpenCL(CLBatchHasher<A>),
}

impl<A> Batcher<A>
Expand All @@ -52,8 +71,11 @@ where
{
pub(crate) fn t(&self) -> BatcherType {
match self {
#[cfg(feature = "gpu")]
Batcher::GPU(_) => BatcherType::GPU,
Batcher::CPU(_) => BatcherType::CPU,
#[cfg(feature = "opencl")]
Batcher::OpenCL(_) => BatcherType::OpenCL,
}
}

Expand All @@ -67,6 +89,9 @@ where
max_batch_size: usize,
) -> Result<Self, Error> {
match t {
BatcherType::CPU => Ok(Batcher::CPU(
SimplePoseidonBatchHasher::<A>::new_with_strength(strength, max_batch_size)?,
)),
#[cfg(feature = "gpu")]
BatcherType::GPU => Ok(Batcher::GPU(GPUBatchHasher::<A>::new_with_strength(
cl::default_futhark_context()?,
Expand All @@ -81,9 +106,6 @@ where
max_batch_size,
)?))
}
BatcherType::CPU => Ok(Batcher::CPU(
SimplePoseidonBatchHasher::<A>::new_with_strength(strength, max_batch_size)?,
)),
#[cfg(feature = "gpu")]
BatcherType::FromFutharkContext(futhark_context) => {
Ok(Batcher::GPU(GPUBatchHasher::<A>::new_with_strength(
Expand All @@ -92,6 +114,24 @@ where
max_batch_size,
)?))
}
#[cfg(feature = "opencl")]
BatcherType::OpenCL => Ok(Batcher::OpenCL(CLBatchHasher::<A>::new_with_strength(
get_device(&GPUSelector::Index(0))?,
strength,
max_batch_size,
)?)),
#[cfg(feature = "opencl")]
BatcherType::CustomGPU(selector) => {
Ok(Batcher::OpenCL(CLBatchHasher::<A>::new_with_strength(
get_device(selector)?,
strength,
max_batch_size,
)?))
}
#[cfg(feature = "opencl")]
BatcherType::FromDevice(device) => Ok(Batcher::OpenCL(
CLBatchHasher::<A>::new_with_strength(&device, strength, max_batch_size)?,
)),
}
}

Expand All @@ -102,6 +142,14 @@ where
_ => None,
}
}

#[cfg(feature = "opencl")]
pub(crate) fn device(&self) -> Option<opencl::Device> {
match self {
Batcher::OpenCL(b) => Some(b.device()),
_ => None,
}
}
}

impl<A> BatchHasher<A> for Batcher<A>
Expand All @@ -110,42 +158,21 @@ where
{
fn hash(&mut self, preimages: &[GenericArray<Fr, A>]) -> Result<Vec<Fr>, Error> {
match self {
Batcher::GPU(batcher) => batcher.hash(preimages),
Batcher::CPU(batcher) => batcher.hash(preimages),
#[cfg(feature = "gpu")]
Batcher::GPU(batcher) => batcher.hash(preimages),
#[cfg(feature = "opencl")]
Batcher::OpenCL(batcher) => batcher.hash(preimages),
}
}

fn max_batch_size(&self) -> usize {
match self {
Batcher::GPU(batcher) => batcher.max_batch_size(),
Batcher::CPU(batcher) => batcher.max_batch_size(),
#[cfg(feature = "gpu")]
Batcher::GPU(batcher) => batcher.max_batch_size(),
#[cfg(feature = "opencl")]
Batcher::OpenCL(batcher) => batcher.max_batch_size(),
}
}
}

// /// NoGPUBatchHasher is a dummy required so we can build with the gpu flag even on platforms on which we cannot currently
// /// run with GPU.
pub struct NoGPUBatchHasher<A>(PhantomData<A>);

impl<A> BatchHasher<A> for NoGPUBatchHasher<A>
where
A: Arity<Fr>,
{
fn hash(&mut self, _preimages: &[GenericArray<Fr, A>]) -> Result<Vec<Fr>, Error> {
unimplemented!();
}

fn max_batch_size(&self) -> usize {
unimplemented!();
}
}

#[cfg(feature = "gpu")]
impl<A> NoGPUBatchHasher<A>
where
A: Arity<Fr>,
{
fn futhark_context(&self) -> Arc<Mutex<FutharkContext>> {
unimplemented!()
}
}
Loading

0 comments on commit 7f90f5b

Please sign in to comment.