Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable transpose scheduler #1927

Merged
merged 48 commits into from
Sep 11, 2022
Merged
Show file tree
Hide file tree
Changes from 40 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
e84fff8
Enable transpose scheduler
zasdfgbnm Aug 24, 2022
bb8cef1
add lower index resolution
shmsong Aug 24, 2022
66765a7
Merge remote-tracking branch 'origin/devel' into local_index_expr
shmsong Aug 24, 2022
7992309
add repro
shmsong Aug 24, 2022
e9d09fe
clear GPU memory after test
shmsong Aug 24, 2022
f406e23
cleanup and comment
shmsong Aug 25, 2022
abe1f6d
fix
zasdfgbnm Aug 25, 2022
bcc6f6c
fix
zasdfgbnm Aug 25, 2022
63e3e76
Allow splitting inner-most ID to create virtual innermost ID
zasdfgbnm Aug 25, 2022
5a423a0
remove obselete comment
zasdfgbnm Aug 25, 2022
0b30aa1
Merge branch 'local_index_expr' of github.com:csarofeen/pytorch into …
zasdfgbnm Aug 26, 2022
8bfaa28
Merge branch 'transpose-split-inner' into enable-transpose-scheduler
zasdfgbnm Aug 26, 2022
217eb96
Merge branch 'devel' of github.com:csarofeen/pytorch into enable-tran…
zasdfgbnm Aug 27, 2022
bf956ac
Merge branch 'devel' of github.com:csarofeen/pytorch into transpose-s…
zasdfgbnm Aug 27, 2022
b796975
Merge branch 'transpose-split-inner' into enable-transpose-scheduler
zasdfgbnm Aug 27, 2022
10f64f5
Merge branch 'enable-transpose-scheduler' of github.com:csarofeen/pyt…
zasdfgbnm Aug 27, 2022
288b131
Merge branch 'devel' of github.com:csarofeen/pytorch into enable-tran…
zasdfgbnm Aug 27, 2022
5678868
Merge branch 'devel' of github.com:csarofeen/pytorch into transpose-s…
zasdfgbnm Aug 27, 2022
3752e80
Merge branch 'transpose-split-inner' into enable-transpose-scheduler
zasdfgbnm Aug 27, 2022
3157504
Merge branch 'devel' of github.com:csarofeen/pytorch into transpose-s…
zasdfgbnm Aug 30, 2022
090b582
Merge branch 'transpose-split-inner' into enable-transpose-scheduler
zasdfgbnm Aug 31, 2022
27b6893
Merge branch 'devel' of github.com:csarofeen/pytorch into transpose-s…
zasdfgbnm Aug 31, 2022
f506e87
Merge branch 'transpose-split-inner' into enable-transpose-scheduler
zasdfgbnm Aug 31, 2022
5f266b3
skip innermost split
zasdfgbnm Aug 31, 2022
bd93302
comment
zasdfgbnm Aug 31, 2022
5933f53
cleanup
zasdfgbnm Aug 31, 2022
7c366b7
save
zasdfgbnm Sep 1, 2022
2c4f646
save
zasdfgbnm Sep 1, 2022
6d5a79f
save
zasdfgbnm Sep 1, 2022
f9a2d88
no small tile
zasdfgbnm Sep 1, 2022
ade4d22
cleanup
zasdfgbnm Sep 1, 2022
ba9b01b
tune tile size
zasdfgbnm Sep 1, 2022
cddae0b
undo tune
zasdfgbnm Sep 1, 2022
3ce9784
no virt inner if low occupancy
zasdfgbnm Sep 1, 2022
5fb2b58
fix
zasdfgbnm Sep 1, 2022
41483a9
if one full wave can handle all elements, don't create virtual inner …
zasdfgbnm Sep 1, 2022
ab0c880
Merge branch 'devel' of github.com:csarofeen/pytorch into enable-tran…
zasdfgbnm Sep 6, 2022
21a7896
reject < 1 wave on runtime
zasdfgbnm Sep 7, 2022
f8551d7
cache inner most positions
zasdfgbnm Sep 8, 2022
53690a0
issue id
zasdfgbnm Sep 8, 2022
44f7810
lint
zasdfgbnm Sep 8, 2022
98ceddd
Merge branch 'devel' of github.com:csarofeen/pytorch into enable-tran…
zasdfgbnm Sep 9, 2022
ea0d1ff
maybe_tune
zasdfgbnm Sep 10, 2022
5920c62
Revert "maybe_tune"
zasdfgbnm Sep 10, 2022
1427438
reject at pointwise scheduler
zasdfgbnm Sep 10, 2022
27a4db8
fix
zasdfgbnm Sep 10, 2022
0c173fa
fix
zasdfgbnm Sep 10, 2022
911956e
fix fusion input reductions
zasdfgbnm Sep 11, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion torch/csrc/jit/codegen/cuda/scheduler/compile_time_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ enum class CompileTimeEntryType {
REDUCTION_TVS,
PERSISTENT_BUFFER_INFO,
SCOPE_PERSISTENT_FACTOR_INFO,
BROADCAST_BYTE_MULTIPLES
BROADCAST_BYTE_MULTIPLES,
INNER_MOST_DIMS_INFO,
};

//! Entry type definition class for `DOMAIN_MAP`,
Expand Down Expand Up @@ -99,6 +100,16 @@ class PersistentBufferInfo {
CompileTimeEntryType::PERSISTENT_BUFFER_INFO;
};

//! Entry type definition class for `INNER_MOST_DIMS_INFO`,
//! Used in the transpose scheduler to store inner most IterDomains and their
//! position in reference1 of group 1 and group 2
class InnerMostDimInfo {
public:
using DataType = std::vector<int64_t>;
static const CompileTimeEntryType EntryType =
CompileTimeEntryType::INNER_MOST_DIMS_INFO;
};

//! Auxiliary data types for `SCOPE_PERSISTENT_FACTOR_INFO` entry type.
using ScopedPersistenceBufferMap = std::unordered_map<Val*, std::vector<bool>>;

Expand Down
15 changes: 9 additions & 6 deletions torch/csrc/jit/codegen/cuda/scheduler/registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1227,12 +1227,6 @@ class TransposeScheduler : public SchedulerEntry {
}

static bool canScheduleCompileTime(Fusion* fusion) {
if (!isOptionEnabled(EnableOption::TransposeScheduler)) {
scheduler_debug_utils::canScheduleRejectReason(
ScheduleHeuristic::Transpose, "not enabled");
return false;
}

// Temporarily disallow view in transpose scheduler
// TODO Add more testing before enabling
auto view_tvs = scheduler_utils::getViewTVs(fusion);
Expand Down Expand Up @@ -1273,6 +1267,14 @@ class TransposeScheduler : public SchedulerEntry {
Fusion* fusion,
SchedulerRuntimeInfo& runtime_info,
HeuristicSummary* data_cache = nullptr) {
FUSER_PERF_SCOPE("TransposeScheduler::canScheduleRunTime");

auto reason = getRuntimeRejectReason(fusion, data_cache, runtime_info);
if (!reason.empty()) {
scheduler_debug_utils::canScheduleRejectReason(
ScheduleHeuristic::Transpose, reason);
return false;
}
return true;
}

Expand Down Expand Up @@ -1566,6 +1568,7 @@ template class HeuristicSummaryEntry<
template class HeuristicSummaryEntry<
HeuristicCompileTime::ScopePersistentFactorInfo>;
template class HeuristicSummaryEntry<HeuristicCompileTime::BroadcastMultiples>;
template class HeuristicSummaryEntry<HeuristicCompileTime::InnerMostDimInfo>;

} // namespace cuda
} // namespace fuser
Expand Down
Loading