Skip to content

Commit

Permalink
cudnn release 1.0.1 (#53)
Browse files Browse the repository at this point in the history
[Bug Fix] Added a padding mask like sub-graph in sdpa node when
kv-sequence length is not a multiple of 64 and padding mask is not
enabled. This allows graphs with kv- sequence length not a multiple of
64 to be executed on cudnn version 8.9.5 onwards. cudnn versions prior
to this now correctly return NOT_SUPPORTED as expected.

[Bug Fix] Fixed an issue where creation of graph object leads to
compilation error in some compilers.

[Bug Fix] cudnn frontend now correctly sets the stream to on the handle.
This affected only the python bindings.

[Internal change] Streamlined includes of cudnn graph API header files
into `cudnn_frontend.h`.
  • Loading branch information
Anerudhan authored Jan 4, 2024
1 parent 9f82dda commit f87101b
Show file tree
Hide file tree
Showing 44 changed files with 234 additions and 110 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.17)

project(cudnn_frontend VERSION 1.0.0)
project(cudnn_frontend VERSION 1.0.1)

option(CUDNN_FRONTEND_BUILD_SAMPLES "Defines if samples are built or not." ON)
option(CUDNN_FRONTEND_BUILD_UNIT_TESTS "Defines if unittests are built or not." OFF)
Expand Down
5 changes: 5 additions & 0 deletions README.FE.1.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ Get workspace to run autotune on all plans.

`get_autotune_workspace_size() const`

### Error handling

C++ API returns a error object which has a error code and error message.

Python API throws an exception with similar error message to be handled in python API.

## Samples
Samples are meant to illustrate FE v1.0 API usage to users.
Expand Down
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,13 @@ Skip building python bindings by providing `CUDNN_FRONTEND_BUILD_PYTHON_BINDINGS
In case, you have a stale cmake cache and want to update the cudnn/cuda paths, please delete the cmake cache (or build directory and redo the above steps).

### Python API
Install FE python API by running: `CUDAToolkit_ROOT=/path/to/cuda CUDNN_PATH=/path/to/cudnn pip install /path/to/cudnn_frontend`.
Install FE python API by running:
pip install git+https://github.com/NVIDIA/cudnn-frontend.git

Incase of custom installation of CUDA and CUDNN, the default path can be overriden by:

`CUDAToolkit_ROOT=/path/to/cuda CUDNN_PATH=/path/to/cudnn pip install /path/to/cudnn_frontend`.

To provide a custom CUDA, export environment variable: `CUDAToolkit_ROOT`.
To provide a custom CUDNN, export environment variable: `CUDNN_PATH`.

Expand Down
2 changes: 0 additions & 2 deletions include/cudnn_backend_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@

#include <ostream>

#include <cudnn.h>

namespace cudnn_frontend {

///
Expand Down
5 changes: 4 additions & 1 deletion include/cudnn_frontend.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@
* - Simpler samples on how to use the new API.
*/

#include <cudnn.h>
#include <cudnn_backend.h>

#include "cudnn_frontend_ConvDesc.h"
#include "cudnn_frontend_Heuristics.h"
#include "cudnn_frontend_Engine.h"
Expand All @@ -122,7 +125,7 @@

#define CUDNN_FRONTEND_MAJOR_VERSION 1
#define CUDNN_FRONTEND_MINOR_VERSION 0
#define CUDNN_FRONTEND_PATCH_VERSION 0
#define CUDNN_FRONTEND_PATCH_VERSION 1
#define CUDNN_FRONTEND_VERSION \
((CUDNN_FRONTEND_MAJOR_VERSION * 10000) + (CUDNN_FRONTEND_MINOR_VERSION * 100) + CUDNN_FRONTEND_PATCH_VERSION)

Expand Down
6 changes: 3 additions & 3 deletions include/cudnn_frontend/cudnn_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class ICudnn {
// Key cannot be fe::Tensor, or shared_ptr<fe::Tensor>, or underlying object address of fe::Tensor.
// Hence using uid, as that uniquely identifies both types of tensors.
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>> uid_to_tensors;
std::vector<cudnn_frontend::Operation> operations;
std::vector<std::shared_ptr<cudnn_frontend::Operation>> operations;

std::vector<std::shared_ptr<OperationGraph_v8>> operation_graphs;
std::vector<std::unordered_set<uid_t>> variant_pack_uids;
Expand Down Expand Up @@ -91,8 +91,8 @@ class ICudnn {
error_t
create_cudnn_operation_graphs(cudnnHandle_t handle) {
std::vector<Operation const*> cudnn_operations;
for (auto const& operation : operations) {
cudnn_operations.push_back(&operation);
for (std::shared_ptr<cudnn_frontend::Operation> operation : operations) {
cudnn_operations.push_back(operation.get());
}
auto cudnn_operation_graph = cudnn_frontend::OperationGraphBuilder()
.setHandle(handle)
Expand Down
6 changes: 4 additions & 2 deletions include/cudnn_frontend/node/batchnorm.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class BatchNormNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building BatchNormNode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -195,7 +195,9 @@ class BatchNormNode : public INode {

batchnorm_operation_builder.setPeerStatTensor(peer_stats);

operations.push_back(std::move(batchnorm_operation_builder.build()));
auto operation = batchnorm_operation_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
Expand Down
7 changes: 5 additions & 2 deletions include/cudnn_frontend/node/batchnorm_inference.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class BatchnormInferenceNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building BatchnormInferenceNode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -129,7 +129,10 @@ class BatchnormInferenceNode : public INode {
CUDNN_FE_VALIDATE_AND_ASSIGN_OUTPUT_TENSOR(Y, Batchnorm_inference_attributes::output_names::Y);
batchnorm_operation_builder.setyDesc(*(tensors.at(Y->second->get_uid())));

operations.push_back(std::move(batchnorm_operation_builder.build()));
auto operation = batchnorm_operation_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
throw cudnnException(e.what(), e.getCudnnStatus());
Expand Down
6 changes: 4 additions & 2 deletions include/cudnn_frontend/node/bn_finalize.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class BatchNormFinalizeNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building BatchNormFinalizeNode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -158,7 +158,9 @@ class BatchNormFinalizeNode : public INode {
CUDNN_FE_VALIDATE_AND_ASSIGN_INPUT_TENSOR(ACCUM_COUNT, BN_finalize_attributes::input_names::ACCUM_COUNT);
batchnorm_operation_builder.setAccumCountTensor(*(tensors.at(ACCUM_COUNT->second->get_uid())));

operations.push_back(std::move(batchnorm_operation_builder.build()));
auto operation = batchnorm_operation_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
Expand Down
6 changes: 4 additions & 2 deletions include/cudnn_frontend/node/conv_dgrad.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class DgradNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building DgradNode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -132,7 +132,9 @@ class DgradNode : public INode {

dgrad_operation_builder.setcDesc(dgrad_descriptor).setAlpha(1.f).setBeta(0.f);

operations.push_back(std::move(dgrad_operation_builder.build()));
auto operation = dgrad_operation_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
Expand Down
7 changes: 5 additions & 2 deletions include/cudnn_frontend/node/conv_fprop.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class ConvolutionNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building ConvolutionNode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -148,7 +148,10 @@ class ConvolutionNode : public INode {
convolution_operation_builder.setyDesc(*(tensors[Y->second->get_uid()]));

convolution_operation_builder.setcDesc(convolution_descriptor).setAlpha(1.f).setBeta(0.f);
operations.push_back(std::move(convolution_operation_builder.build()));

auto operation = convolution_operation_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
Expand Down
6 changes: 4 additions & 2 deletions include/cudnn_frontend/node/conv_wgrad.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class WgradNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building WgradNode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -132,7 +132,9 @@ class WgradNode : public INode {

wgrad_operation_builder.setcDesc(wgrad_descriptor).setAlpha(1.f).setBeta(0.f);

operations.push_back(std::move(wgrad_operation_builder.build()));
auto operation = wgrad_operation_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
Expand Down
6 changes: 4 additions & 2 deletions include/cudnn_frontend/node/dbn.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ class DBNNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building DBNNode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -163,7 +163,9 @@ class DBNNode : public INode {

DBN_operation_builder.setPeerStatTensor(peer_stats);

operations.push_back(std::move(DBN_operation_builder.build()));
auto operation = DBN_operation_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
Expand Down
7 changes: 5 additions & 2 deletions include/cudnn_frontend/node/dbn_weight.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class DBNWeightNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building DBNWeightNode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -154,7 +154,10 @@ class DBNWeightNode : public INode {
batchnorm_operation_builder.setDScaleAndDBias(*(tensors.at(DSCALE->second->get_uid())),
*(tensors.at(DBIAS->second->get_uid())));

operations.push_back(std::move(batchnorm_operation_builder.build()));
auto operation = batchnorm_operation_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
throw cudnnException(e.what(), e.getCudnnStatus());
Expand Down
6 changes: 4 additions & 2 deletions include/cudnn_frontend/node/dln.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class DLNNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building DLNNode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -188,7 +188,9 @@ class DLNNode : public INode {
uids_involved_in_operations.insert(epsilon->get_uid());
}

operations.push_back(std::move(DLN_op_builder.build()));
auto operation = DLN_op_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
Expand Down
6 changes: 4 additions & 2 deletions include/cudnn_frontend/node/genstats.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ class GenstatsNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building GenstatsNode operations " << attributes.name << "..." << std::endl;
Expand All @@ -126,7 +126,9 @@ class GenstatsNode : public INode {
CUDNN_FE_VALIDATE_AND_ASSIGN_OUTPUT_TENSOR(SQ_SUM, Genstats_attributes::output_names::SQ_SUM);
genstats_operation_builder.setSqSumDesc(*(tensors.at(SQ_SUM->second->get_uid())));

operations.push_back(std::move(genstats_operation_builder.build()));
auto operation = genstats_operation_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
Expand Down
12 changes: 8 additions & 4 deletions include/cudnn_frontend/node/instancenorm.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ class InstanceNormNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building InstanceNormNode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -161,7 +161,9 @@ class InstanceNormNode : public INode {
*(tensors.at(INV_VARIANCE->second->get_uid())));
}

operations.push_back(std::move(op_builder.build()));
auto operation = op_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
Expand Down Expand Up @@ -305,7 +307,7 @@ class DINNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building DINode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -343,7 +345,9 @@ class DINNode : public INode {
CUDNN_FE_VALIDATE_AND_ASSIGN_OUTPUT_TENSOR(DX, Instancenorm_backward_attributes::output_names::DX);
DIN_operation_builder.setdxDesc(*(tensors.at(DX->second->get_uid())));

operations.push_back(std::move(DIN_operation_builder.build()));
auto operation = DIN_operation_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
Expand Down
7 changes: 5 additions & 2 deletions include/cudnn_frontend/node/layernorm.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class LayerNormNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building LayerNormNode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -204,7 +204,10 @@ class LayerNormNode : public INode {
*(tensors.at(INV_VARIANCE->second->get_uid())));
}

operations.push_back(std::move(layernorm_operation_builder.build()));
auto operation = layernorm_operation_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
throw cudnnException(e.what(), e.getCudnnStatus());
Expand Down
7 changes: 5 additions & 2 deletions include/cudnn_frontend/node/matmul.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class MatmulNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building MatmulNode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -152,7 +152,10 @@ class MatmulNode : public INode {
if ((K_override != attributes.inputs.end()) && (K_override->second != nullptr)) {
matmul_operation_builder.setkOverrideDesc(*tensors.at(K_override->second->get_uid()));
}
operations.push_back(std::move(matmul_operation_builder.build()));

auto operation = matmul_operation_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
Expand Down
6 changes: 4 additions & 2 deletions include/cudnn_frontend/node/pointwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class PointwiseNode : public INode {
error_t
create_cudnn_operations(
std::unordered_set<uid_t>& uids_involved_in_operations,
std::vector<cudnn_frontend::Operation_v8>& operations,
std::vector<std::shared_ptr<cudnn_frontend::Operation>>& operations,
std::unordered_map<int64_t, std::shared_ptr<cudnn_frontend::Tensor>>& tensors) const override final {
getLogger() << "[cudnn_frontend] INFO: "
<< "Building PointwiseNode operations " << attributes.name << "..." << std::endl;
Expand Down Expand Up @@ -154,7 +154,9 @@ class PointwiseNode : public INode {
pointwise_operation_builder.setyDesc(*(tensors.at(OUT_0->second->get_uid())));
}

operations.push_back(std::move(pointwise_operation_builder.build()));
auto operation = pointwise_operation_builder.build();

operations.push_back(std::make_shared<Operation_v8>(std::move(operation)));

#ifndef NV_CUDNN_DISABLE_EXCEPTION
} catch (cudnn_frontend::cudnnException& e) {
Expand Down
Loading

0 comments on commit f87101b

Please sign in to comment.