Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Slice layer #3055

Merged
merged 3 commits into from
Feb 15, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions dlib/cuda/cpu_dlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3105,6 +3105,76 @@ namespace dlib
}
}

// ------------------------------------------------------------------------------------

void copy_tensor(
bool add_to,
tensor& dest,
size_t dk, size_t dnr, size_t dnc,
const tensor& src,
size_t sk, size_t snr, size_t snc,
size_t k, size_t nr, size_t nc
)
{
size_t dest_stride_sample = static_cast<size_t>(dest.nc() * dest.nr() * dest.k());
size_t dest_stride_k = static_cast<size_t>(dest.nc() * dest.nr());
size_t dest_stride_nr = static_cast<size_t>(dest.nc());

size_t src_stride_sample = static_cast<size_t>(src.nc() * src.nr() * src.k());
size_t src_stride_k = static_cast<size_t>(src.nc() * src.nr());
size_t src_stride_nr = static_cast<size_t>(src.nc());

DLIB_CASSERT(dest.num_samples() == src.num_samples(), "All sources should fit into dest tensor size");
DLIB_CASSERT(dest.k() - dk >= k &&
dest.nr() - dnr >= nr &&
dest.nc() - dnc >= nc, "Not enough space in dest tensor");
DLIB_CASSERT(src.k() - sk >= k &&
src.nr() - snr >= nr &&
src.nc() - snc >= nc, "Not enough space in src tensor");

float* dest_p = dest.host() + dk * dest_stride_k \
+ dnr * dest_stride_nr \
+ dnc;

const float* src_p = src.host() + sk * src_stride_k \
+ snr * src_stride_nr \
+ snc;

for (long i = 0; i < src.num_samples(); ++i)
{
float* dest_channel_p = dest_p;
const float* src_channel_p = src_p;

for (long j = 0; j < k; ++j)
{
float* dest_row_p = dest_channel_p;
const float* src_row_p = src_channel_p;

for (long r = 0; r < nr; ++r)
{
if (add_to)
{
for (size_t c = 0; c < nc; ++c)
dest_row_p[c] += src_row_p[c];
}
else
{
::memcpy(dest_row_p, src_row_p, nc * sizeof(float));
}

dest_row_p += dest_stride_nr;
src_row_p += src_stride_nr;
}

dest_channel_p += dest_stride_k;
src_channel_p += src_stride_k;
}

dest_p += dest_stride_sample;
src_p += src_stride_sample;
}
}

// ------------------------------------------------------------------------------------

void transpose(
Expand Down
11 changes: 11 additions & 0 deletions dlib/cuda/cpu_dlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,17 @@ namespace dlib
size_t count_k
);

// -----------------------------------------------------------------------------------

void copy_tensor(
bool add_to,
tensor& dest,
size_t dk, size_t dnr, size_t dnc,
const tensor& src,
size_t sk, size_t snr, size_t snc,
size_t k, size_t nr, size_t nc
);

// -----------------------------------------------------------------------------------

void transpose(
Expand Down
71 changes: 71 additions & 0 deletions dlib/cuda/cuda_dlib.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2623,6 +2623,77 @@ namespace dlib
}
}

__global__ void _cuda_copy_strided_tensor_add_to (float* dest, const float* src,
size_t ns, size_t nk, size_t nr, size_t nc,
size_t dk, size_t dr, size_t dc,
size_t sk, size_t sr, size_t sc)
{
for(auto i : grid_stride_range(0, ns*nk*nr*nc))
{
size_t n,k,r,c;
unpack_idx(i, nk,nr,nc, n,k,r,c);
dest[pack_idx(dk,dr,dc, n,k,r,c)] += src[pack_idx(sk,sr,sc, n,k,r,c)];
}
}

__global__ void _cuda_copy_strided_tensor (float* dest, const float* src,
size_t ns, size_t nk, size_t nr, size_t nc,
size_t dk, size_t dr, size_t dc,
size_t sk, size_t sr, size_t sc)
{
for(auto i : grid_stride_range(0, ns*nk*nr*nc))
{
size_t n,k,r,c;
unpack_idx(i, nk,nr,nc, n,k,r,c);
dest[pack_idx(dk,dr,dc, n,k,r,c)] = src[pack_idx(sk,sr,sc, n,k,r,c)];
}
}

void copy_tensor(
bool add_to,
tensor& dest,
size_t dk, size_t dnr, size_t dnc,
const tensor& src,
size_t sk, size_t snr, size_t snc,
size_t k, size_t nr, size_t nc
)
{

DLIB_CASSERT(dest.num_samples() == src.num_samples(), "All sources should fit into dest tensor size");
DLIB_CASSERT(dest.k() - dk >= k &&
dest.nr() - dnr >= nr &&
dest.nc() - dnc >= nc, "Not enough space in dest tensor");
DLIB_CASSERT(src.k() - sk >= k &&
src.nr() - snr >= nr &&
src.nc() - snc >= nc, "Not enough space in src tensor");

float* dest_p = dest.device() + dk * static_cast<size_t>(dest.nc() * dest.nr()) \
+ dnr * static_cast<size_t>(dest.nc()) \
+ dnc;

const float* src_p = src.device() + sk * static_cast<size_t>(src.nc() * src.nr()) \
+ snr * static_cast<size_t>(src.nc()) \
+ snc;

if (add_to)
{
launch_kernel(_cuda_copy_strided_tensor_add_to, max_jobs(dest.size()),
dest_p, src_p, dest.num_samples(),
k, nr, nc,
dest.k(), dest.nr(), dest.nc(),
src.k(), src.nr(), src.nc());
}
else
{
launch_kernel(_cuda_copy_strided_tensor, max_jobs(dest.size()),
dest_p, src_p, dest.num_samples(),
k, nr, nc,
dest.k(), dest.nr(), dest.nc(),
src.k(), src.nr(), src.nc());
}
}


// ----------------------------------------------------------------------------------------

__global__ void _cuda_transpose(size_t dsize, size_t dk, size_t dnr, size_t dnc, float* d,
Expand Down
11 changes: 11 additions & 0 deletions dlib/cuda/cuda_dlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,17 @@ namespace dlib
size_t count_k
);

// ----------------------------------------------------------------------------------------

void copy_tensor(
bool add_to,
tensor& dest,
size_t dk, size_t dnr, size_t dnc,
const tensor& src,
size_t sk, size_t snr, size_t snc,
size_t k, size_t nr, size_t nc
);

// ----------------------------------------------------------------------------------------

void transpose(
Expand Down
18 changes: 18 additions & 0 deletions dlib/cuda/tensor_tools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,24 @@ namespace dlib { namespace tt
#endif
}

// ----------------------------------------------------------------------------------------

void copy_tensor(
bool add_to,
tensor& dest,
size_t dk, size_t dnr, size_t dnc,
const tensor& src,
size_t sk, size_t snr, size_t snc,
size_t k, size_t nr, size_t nc
)
{
#ifdef DLIB_USE_CUDA
cuda::copy_tensor(add_to, dest, dk, dnr, dnc , src, sk, snr, snc, k, nr, nc);
#else
cpu::copy_tensor(add_to, dest, dk, dnr, dnc, src, sk, snr, snc, k, nr, nc);
#endif
}

// ----------------------------------------------------------------------------------------

void inv::
Expand Down
11 changes: 11 additions & 0 deletions dlib/cuda/tensor_tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -2334,6 +2334,17 @@ namespace dlib { namespace tt
i.e., copies content of each sample from src in to corresponding place of sample at dest.
!*/

// ----------------------------------------------------------------------------------------

void copy_tensor(
bool add_to,
tensor& dest,
size_t dk, size_t dnr, size_t dnc,
const tensor& src,
size_t sk, size_t snr, size_t snc,
size_t k, size_t nr, size_t nc
);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add docs for this (like you see for the other functions in this file :) )

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I forgot that. Added the doc string. Thanks!


// ----------------------------------------------------------------------------------------

void transpose(
Expand Down
125 changes: 125 additions & 0 deletions dlib/dnn/layers.h
Original file line number Diff line number Diff line change
Expand Up @@ -4631,6 +4631,131 @@ namespace dlib
>
using extract = add_layer<extract_<offset,k,nr,nc>, SUBNET>;

// ----------------------------------------------------------------------------------------

template <
long _offset_k,
long _offset_nr,
long _offset_nc,
long _k,
long _nr,
long _nc
>
class slice_
{
static_assert(_offset_k >= 0, "The channel offset must be >= 0.");
static_assert(_offset_nr >= 0, "The row offset must be >= 0.");
static_assert(_offset_nc >= 0, "The column offset must be >= 0.");
static_assert(_k > 0, "The number of channels must be > 0.");
static_assert(_nr > 0, "The number of rows must be > 0.");
static_assert(_nc > 0, "The number of columns must be > 0.");
public:
slice_(
)
{
}

template <typename SUBNET>
void setup (const SUBNET& sub)
{
DLIB_CASSERT((long)sub.get_output().size() >= sub.get_output().num_samples()*(_offset_k+_offset_nr+_offset_nc+_k*_nr*_nc),
"The tensor we are trying to slice from the input tensor is too big to fit into the input tensor.");
}

template <typename SUBNET>
void forward(const SUBNET& sub, resizable_tensor& output)
{
output.set_size(sub.get_output().num_samples(), _k, _nr, _nc);
tt::copy_tensor(false, output, 0, 0, 0, sub.get_output(), _offset_k, _offset_nr, _offset_nc, _k, _nr, _nc);
}

template <typename SUBNET>
void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
{
tt::copy_tensor(true, sub.get_gradient_input(), _offset_k, _offset_nr, _offset_nc, gradient_input, 0, 0, 0, _k, _nr, _nc);
}

const tensor& get_layer_params() const { return params; }
tensor& get_layer_params() { return params; }

friend void serialize(const slice_& /*item*/, std::ostream& out)
{
serialize("slice_", out);
serialize(_offset_k, out);
serialize(_offset_nr, out);
serialize(_offset_nc, out);
serialize(_k, out);
serialize(_nr, out);
serialize(_nc, out);
}

friend void deserialize(slice_& /*item*/, std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "slice_")
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::slice_.");

long offset_k;
long offset_nr;
long offset_nc;
long k;
long nr;
long nc;
deserialize(offset_k, in);
deserialize(offset_nr, in);
deserialize(offset_nc, in);
deserialize(k, in);
deserialize(nr, in);
deserialize(nc, in);

if (offset_k != _offset_k) throw serialization_error("Wrong offset_k found while deserializing dlib::slice_");
if (offset_nr != _offset_nr) throw serialization_error("Wrong offset_nr found while deserializing dlib::slice_");
if (offset_nc != _offset_nc) throw serialization_error("Wrong offset_nc found while deserializing dlib::slice_");
if (k != _k) throw serialization_error("Wrong k found while deserializing dlib::slice_");
if (nr != _nr) throw serialization_error("Wrong nr found while deserializing dlib::slice_");
if (nc != _nc) throw serialization_error("Wrong nc found while deserializing dlib::slice_");
}

friend std::ostream& operator<<(std::ostream& out, const slice_& /*item*/)
{
out << "slice\t ("
<< "offset_k="<<_offset_k
<< "offset_nr="<<_offset_nr
<< "offset_nc="<<_offset_nc
<< ", k="<<_k
<< ", nr="<<_nr
<< ", nc="<<_nc
<< ")";
return out;
}

friend void to_xml(const slice_& /*item*/, std::ostream& out)
{
out << "<slice";
out << " offset_k='"<<_offset_k<<"'";
out << " offset_nr='"<<_offset_nr<<"'";
out << " offset_nr='"<<_offset_nc<<"'";
out << " k='"<<_k<<"'";
out << " nr='"<<_nr<<"'";
out << " nc='"<<_nc<<"'";
out << "/>\n";
}
private:
resizable_tensor params; // unused
};

template <
long offset_k,
long offset_nr,
long offset_nc,
long k,
long nr,
long nc,
typename SUBNET
>
using slice = add_layer<slice_<offset_k,offset_nr,offset_nc,k,nr,nc>, SUBNET>;

// ----------------------------------------------------------------------------------------

template <long long row_stride = 2, long long col_stride = 2>
Expand Down
Loading
Loading