Skip to content

Commit

Permalink
gpu: jit: conv_v2: rename loop_nest_t -> loop_desc_t
Browse files Browse the repository at this point in the history
  • Loading branch information
echeresh committed Apr 12, 2024
1 parent ea9d3f4 commit 50cc674
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 58 deletions.
2 changes: 1 addition & 1 deletion src/gpu/jit/v2/conv/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ perf,gpu,jit:ir_v2,"resnet_50_v1_5:res2a_branch2b*3",--mode=F --conv --engine=gp

# 3. Set kernel descriptor from environment
export enable_conv_v2=1
export desc="--prop fwd --src axb:f32 --wei axcb:f32 --dst axb:f32 --hw xehpc --fma mad --simd 16 --regs 128 --iter ic16mb16oc32 --tg ow4oc4 --loop-nest kw,kh,kd,ic --load a:2d,b:2d --store c:2d"
export desc="--prop fwd --src axb:f32 --wei axcb:f32 --dst axb:f32 --hw xehpc --fma mad --simd 16 --regs 128 --iter ic16mb16oc32 --tg ow4oc4 --loop-desc kw,kh,kd,ic --load a:2d,b:2d --store c:2d"
./build/tests/benchdnn/benchdnn -v5 --engine=gpu --mode=F --conv --dir=FWD_I --dt=f32 mb128ic256ih56oc64oh56kh1ph0
...
perf,gpu,jit:ir_v2,,--mode=F --conv --engine=gpu --dir=FWD_I mb128ic256ih56oc64oh56kh1ph0,13.1533,158.426,1.124,11702.3,1.13858,11552.4
Expand Down
30 changes: 15 additions & 15 deletions src/gpu/jit/v2/conv/ir_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,10 +225,10 @@ class send_mask_t {
class offset_ctx_t {
public:
offset_ctx_t(buffer_manager_t &buf_mgr, ir_context_t &ir_ctx,
const loop_nest_t &loop_nest, const coord_info_t &coord_info)
const loop_desc_t &loop_desc, const coord_info_t &coord_info)
: buf_mgr_(buf_mgr)
, ir_ctx_(ir_ctx)
, loop_nest_(loop_nest)
, loop_desc_(loop_desc)
, coord_info_(coord_info) {}

send_header_t add_header(const send_1d_desc_t &desc, const expr_t &mem_buf,
Expand Down Expand Up @@ -312,7 +312,7 @@ class offset_ctx_t {
return ret;
}

stmt_t inc_loop_stmt(const loop_nest_entry_t &e) const {
stmt_t inc_loop_stmt(const loop_desc_entry_t &e) const {
stmt_t ret;
for (auto &o : offsets_) {
auto inc = o.inc_stmt(e.idx);
Expand All @@ -333,7 +333,7 @@ class offset_ctx_t {
const offset_params_t &_params) {
auto params = _params;
std::vector<expr_t> loop_idxs;
for (auto &e : loop_nest_) {
for (auto &e : loop_desc_) {
loop_idxs.push_back(coord_info_.loop_index(e.dim));
}
expr_t _base_init;
Expand All @@ -359,7 +359,7 @@ class offset_ctx_t {
ret.esize = params.esize;

expr_t comp_value = 0;
for (auto &e : loop_nest_) {
for (auto &e : loop_desc_) {
auto loop_size = coord_info_.loop_size(e.dim);
auto inc_value = simplify(_loop_incs[e.idx] - comp_value);
auto inc = to_simple_expr(inc_value);
Expand Down Expand Up @@ -430,7 +430,7 @@ class offset_ctx_t {

buffer_manager_t &buf_mgr_;
ir_context_t &ir_ctx_;
loop_nest_t loop_nest_;
loop_desc_t loop_desc_;
coord_info_t coord_info_;

object_eq_map_t<expr_t, expr_t> expr2var_;
Expand All @@ -445,12 +445,12 @@ class iterator_t {
iterator_t() = default;

iterator_t(buffer_manager_t &buf_mgr) : buf_mgr_(&buf_mgr) {
linear_loop_ = loop_t(loop_nest_entry_t(), 0, buf_mgr);
linear_loop_ = loop_t(loop_desc_entry_t(), 0, buf_mgr);
}

int nloops() const { return (int)loops_.size(); }

void add_loop(const loop_nest_entry_t &e, const expr_t &bound) {
void add_loop(const loop_desc_entry_t &e, const expr_t &bound) {
if (is_one(bound)) return;
loops_.emplace_back(e, bound, *buf_mgr_);
}
Expand Down Expand Up @@ -490,12 +490,12 @@ class iterator_t {

private:
struct loop_t {
loop_nest_entry_t entry;
loop_desc_entry_t entry;
expr_t bound;
expr_t var_buf;

loop_t() = default;
loop_t(const loop_nest_entry_t &entry, const expr_t &bound,
loop_t(const loop_desc_entry_t &entry, const expr_t &bound,
buffer_manager_t &buf_mgr)
: entry(entry), bound(bound) {
auto buf_name = buf_mgr.ir_ctx().create_tmp_name("i");
Expand Down Expand Up @@ -688,9 +688,9 @@ class ir_builder_t {
, cset_(desc.spec_reqs.as_constraint_set(kernel_info))
, ir_ctx_(desc.exec_cfg(), cset_)
, buf_mgr_(ir_ctx_)
, off_ctx_(buf_mgr_, ir_ctx_, desc_.loop_nest, plan_.coord_info)
, off_ctx_(buf_mgr_, ir_ctx_, desc_.loop_desc, plan_.coord_info)
, prefetch_off_ctx_(
buf_mgr_, ir_ctx_, desc_.loop_nest, plan_.coord_info) {}
buf_mgr_, ir_ctx_, desc_.loop_desc, plan_.coord_info) {}

stmt_t build() {
build_prefetch();
Expand Down Expand Up @@ -718,14 +718,14 @@ class ir_builder_t {

private:
stmt_t loop() const {
auto &loop_nest = desc_.loop_nest;
auto &loop_desc = desc_.loop_desc;
auto &coord_info = plan_.coord_info;
int prefetch_dist = desc_.prefetch.dist;
stmt_t init_stmt;
iterator_t prefetch_it;
if (prefetch_dist > 0) {
prefetch_it = iterator_t(buf_mgr_);
for (auto &e : loop_nest) {
for (auto &e : loop_desc) {
auto bound = coord_info.loop_size(e.dim);
prefetch_it.add_loop(e, bound);
}
Expand All @@ -748,7 +748,7 @@ class ir_builder_t {
if (prefetch_dist > 0) {
ret = ret.append(prefetch_it.inc_stmt(prefetch_off_ctx_));
}
for (auto &e : loop_nest) {
for (auto &e : loop_desc) {
auto var = coord_info.loop_index(e.dim);
auto bound = coord_info.loop_size(e.dim);
ret = ret.append(off_ctx_.inc_loop_stmt(e));
Expand Down
38 changes: 19 additions & 19 deletions src/gpu/jit/v2/conv/kernel_desc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,26 +307,26 @@ void kernel_desc_t::set(const std::string &s) {
}

void kernel_desc_t::set_defaults() {
if (loop_nest.is_empty()) {
if (loop_desc.is_empty()) {
switch (prop) {
case prop_kind::forward_training:
case prop_kind::forward_inference:
loop_nest.add(prb_dims::kw);
loop_nest.add(prb_dims::kh);
loop_nest.add(prb_dims::kd);
loop_nest.add(prb_dims::ic);
loop_desc.add(prb_dims::kw);
loop_desc.add(prb_dims::kh);
loop_desc.add(prb_dims::kd);
loop_desc.add(prb_dims::ic);
break;
case prop_kind::backward_data:
loop_nest.add(prb_dims::kw);
loop_nest.add(prb_dims::kh);
loop_nest.add(prb_dims::kd);
loop_nest.add(prb_dims::oc);
loop_desc.add(prb_dims::kw);
loop_desc.add(prb_dims::kh);
loop_desc.add(prb_dims::kd);
loop_desc.add(prb_dims::oc);
break;
case prop_kind::backward_weights:
loop_nest.add(prb_dims::mb);
loop_nest.add(prb_dims::ow);
loop_nest.add(prb_dims::oh);
loop_nest.add(prb_dims::od);
loop_desc.add(prb_dims::mb);
loop_desc.add(prb_dims::ow);
loop_desc.add(prb_dims::oh);
loop_desc.add(prb_dims::od);
break;
default: ir_error_not_expected(); break;
}
Expand Down Expand Up @@ -356,7 +356,7 @@ std::string kernel_desc_t::str() const {
oss << "Registers: " << regs << std::endl;
oss << "Iteration tile: " << iter_tile << std::endl;
oss << "Thread group tile: " << thread_group_tile << std::endl;
oss << "Loop nest: " << loop_nest << std::endl;
oss << "Loop desc: " << loop_desc << std::endl;
oss << "Load: " << load.str() << std::endl;
oss << "Prefetch: " << prefetch.str() << std::endl;
oss << "Store: " << store.str() << std::endl;
Expand Down Expand Up @@ -482,11 +482,11 @@ ir_utils::cli_iface_t<kernel_desc_t> kernel_desc_t::cli_iface() {
iface.add_arg("--tg", "Threadgroup tile (e.g. ow4oc4).",
MAKE_GETTER(desc->thread_group_tile.str()),
MAKE_SETTER(thread_group_tile, str_to_prb_tile(value)));
iface.add_arg("--loop-nest",
"Loop nest, ordered from innermost to outermost (e.g. "
"kw,kh,kd,ic).",
MAKE_GETTER(desc->loop_nest.str()),
MAKE_SETTER(loop_nest, str_to_loop_nest(value)));
iface.add_arg("--loop-desc",
"Loop description, variables ordered from innermost to outermost "
"(e.g. kw,kh,kd,ic).",
MAKE_GETTER(desc->loop_desc.str()),
MAKE_SETTER(loop_desc, str_to_loop_desc(value)));
iface.add_arg("--load",
"Load type (block, scattered [default], 2d) for A and B, e.g. "
"a:2d,b:block.",
Expand Down
42 changes: 21 additions & 21 deletions src/gpu/jit/v2/conv/kernel_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,16 +166,16 @@ inline spec_reqs_t str_to_spec_reqs(const std::string &s) {
return spec_reqs_t(mode);
}

struct loop_nest_entry_t {
struct loop_desc_entry_t {
prb_dim_t dim;
int idx = -1;
bool is_outer = true;
// Whether the dimension range is distributed between thread groups (global
// k-slicing).
bool is_global = false;

loop_nest_entry_t() = default;
loop_nest_entry_t(const prb_dim_t &dim, int idx, bool is_global)
loop_desc_entry_t() = default;
loop_desc_entry_t(const prb_dim_t &dim, int idx, bool is_global)
: dim(dim), idx(idx), is_global(is_global) {}

bool is_empty() const { return dim.is_undef(); }
Expand All @@ -188,13 +188,13 @@ struct loop_nest_entry_t {

IR_DEFINE_DUMP()

bool operator==(const loop_nest_entry_t &other) const {
bool operator==(const loop_desc_entry_t &other) const {
return (dim == other.dim) && (idx == other.idx)
&& (is_outer == other.is_outer)
&& (is_global == other.is_global);
}

bool operator!=(const loop_nest_entry_t &other) const {
bool operator!=(const loop_desc_entry_t &other) const {
return !operator==(other);
}

Expand All @@ -217,16 +217,16 @@ struct loop_nest_entry_t {
}
};

class loop_nest_t {
class loop_desc_t {
public:
bool is_empty() const { return entries_.empty(); }
const std::vector<loop_nest_entry_t> &entries() const { return entries_; }
const std::vector<loop_desc_entry_t> &entries() const { return entries_; }
int ndims() const { return (int)entries_.size(); }
bool has(const prb_dim_t &dim) const { return !find(dim).is_empty(); }
loop_nest_entry_t find(const prb_dim_t &dim) const {
loop_desc_entry_t find(const prb_dim_t &dim) const {
for (auto &e : entries_)
if (e.dim == dim) return e;
return loop_nest_entry_t();
return loop_desc_entry_t();
}
bool is_global(const prb_dim_t &dim) const { return find(dim).is_global; }
void add(const prb_dim_t &dim, bool is_global = false) {
Expand All @@ -243,10 +243,10 @@ class loop_nest_t {
update_indices();
}
int index(const prb_dim_t &dim) const { return find(dim).idx; }
std::vector<loop_nest_entry_t>::const_iterator begin() const {
std::vector<loop_desc_entry_t>::const_iterator begin() const {
return entries_.begin();
}
std::vector<loop_nest_entry_t>::const_iterator end() const {
std::vector<loop_desc_entry_t>::const_iterator end() const {
return entries_.end();
}

Expand All @@ -261,11 +261,11 @@ class loop_nest_t {

IR_DEFINE_DUMP()

bool operator==(const loop_nest_t &other) const {
bool operator==(const loop_desc_t &other) const {
return entries_ == other.entries_;
}

bool operator!=(const loop_nest_t &other) const {
bool operator!=(const loop_desc_t &other) const {
return !operator==(other);
}

Expand All @@ -284,12 +284,12 @@ class loop_nest_t {
}

// Ordered from innermost to outermost.
std::vector<loop_nest_entry_t> entries_;
std::vector<loop_desc_entry_t> entries_;
};

inline loop_nest_t str_to_loop_nest(const std::string &s) {
inline loop_desc_t str_to_loop_desc(const std::string &s) {
auto parts = gpu_utils::split(s, ",");
loop_nest_t ret;
loop_desc_t ret;
for (auto &p : parts)
ret.add(prb_dim_t::from_name(p));
return ret;
Expand Down Expand Up @@ -422,7 +422,7 @@ class kernel_desc_t : public kernel_desc_base_t {
int regs = 0;
prb_tile_t iter_tile;
prb_tile_t thread_group_tile;
loop_nest_t loop_nest;
loop_desc_t loop_desc;
load_desc_t load;
store_desc_t store;
prefetch_desc_t prefetch;
Expand Down Expand Up @@ -467,7 +467,7 @@ class kernel_desc_t : public kernel_desc_base_t {
&& (simd == other.simd) && (regs == other.regs)
&& (iter_tile == other.iter_tile)
&& (thread_group_tile == other.thread_group_tile)
&& (loop_nest == other.loop_nest) && (load == other.load)
&& (loop_desc == other.loop_desc) && (load == other.load)
&& (prefetch == other.prefetch) && (store == other.store)
&& (is_finalized == other.is_finalized);
}
Expand All @@ -479,7 +479,7 @@ class kernel_desc_t : public kernel_desc_base_t {
size_t get_hash() const {
return ir_utils::get_hash(prop, is_dw, src_tag, wei_tag, dst_tag,
spec_reqs, hw, fma, simd, regs, iter_tile, thread_group_tile,
loop_nest, load, prefetch, store, is_finalized);
loop_desc, load, prefetch, store, is_finalized);
}

void serialize(std::ostream &out) const {
Expand All @@ -496,7 +496,7 @@ class kernel_desc_t : public kernel_desc_base_t {
ir_utils::serialize(regs, out);
ir_utils::serialize(iter_tile, out);
ir_utils::serialize(thread_group_tile, out);
ir_utils::serialize(loop_nest, out);
ir_utils::serialize(loop_desc, out);
ir_utils::serialize(load, out);
ir_utils::serialize(prefetch, out);
ir_utils::serialize(store, out);
Expand All @@ -516,7 +516,7 @@ class kernel_desc_t : public kernel_desc_base_t {
ir_utils::deserialize(regs, in);
ir_utils::deserialize(iter_tile, in);
ir_utils::deserialize(thread_group_tile, in);
ir_utils::deserialize(loop_nest, in);
ir_utils::deserialize(loop_desc, in);
ir_utils::deserialize(load, in);
ir_utils::deserialize(prefetch, in);
ir_utils::deserialize(store, in);
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/jit/v2/conv/plan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,8 @@ class plan_builder_t {
tg_grid_ = create_thread_group_grid(desc_);
thr_grid_ = create_thread_grid(desc_);
for (auto &d : conv_index_dims(desc_.prop)) {
bool is_loop = desc_.loop_nest.has(d);
bool is_global_loop = desc_.loop_nest.is_global(d);
bool is_loop = desc_.loop_desc.has(d);
bool is_global_loop = desc_.loop_desc.is_global(d);
int tg_tile = desc_.thread_group_tile.get(d, 1);
int iter_tile = desc_.iter_tile.get(d, 1);
auto thr_idx = thr_grid_.index_var(d);
Expand Down

0 comments on commit 50cc674

Please sign in to comment.