diff --git a/include/taco/index_notation/index_notation.h b/include/taco/index_notation/index_notation.h index cacd1411c..a6886a1ae 100644 --- a/include/taco/index_notation/index_notation.h +++ b/include/taco/index_notation/index_notation.h @@ -1,6 +1,7 @@ #ifndef TACO_INDEX_NOTATION_H #define TACO_INDEX_NOTATION_H +#include #include #include #include @@ -30,6 +31,7 @@ class Format; class Schedule; class IndexVar; +class WindowedIndexVar; class TensorVar; class IndexExpr; @@ -228,6 +230,16 @@ class Access : public IndexExpr { /// Returns the index variables used to index into the Access's TensorVar. const std::vector& getIndexVars() const; + /// hasWindowedModes returns true if any accessed modes are windowed. + bool hasWindowedModes() const; + + /// Returns whether or not the input mode (0-indexed) is windowed. + bool isModeWindowed(int mode) const; + + /// Return the {lower,upper} bound of the window on the input mode (0-indexed). + int getWindowLowerBound(int mode) const; + int getWindowUpperBound(int mode) const; + /// Assign the result of an expression to a left-hand-side tensor access. /// ``` /// a(i) = b(i) * c(i); @@ -800,11 +812,67 @@ class Multi : public IndexStmt { /// Create a multi index statement. Multi multi(IndexStmt stmt1, IndexStmt stmt2); +/// IndexVarInterface is a marker superclass for IndexVar-like objects. +/// It is intended to be used in situations where many IndexVar-like objects +/// must be stored together, like when building an Access AST node where some +/// of the access variables are windowed. Use cases for IndexVarInterface +/// will inspect the underlying type of the IndexVarInterface. For sake of +/// completeness, the current implementers of IndexVarInterface are: +/// * IndexVar +/// * WindowedIndexVar +/// If this set changes, make sure to update the match function. +class IndexVarInterface { +public: + virtual ~IndexVarInterface() = default; + + /// match performs a dynamic case analysis of the implementers of IndexVarInterface + /// as a utility for handling the different values within. It mimics the dynamic + /// type assertion of Go. + static void match( + std::shared_ptr ptr, + std::function)> ivarFunc, + std::function)> wvarFunc + ) { + auto iptr = std::dynamic_pointer_cast(ptr); + auto wptr = std::dynamic_pointer_cast(ptr); + if (iptr != nullptr) { + ivarFunc(iptr); + } else if (wptr != nullptr) { + wvarFunc(wptr); + } else { + taco_iassert("IndexVarInterface was not IndexVar or WindowedIndexVar"); + } + } +}; + +/// WindowedIndexVar represents an IndexVar that has been windowed. For example, +/// A(i) = B(i(2, 4)) +/// In this case, i(2, 4) is a WindowedIndexVar. WindowedIndexVar is defined +/// before IndexVar so that IndexVar can return objects of type WindowedIndexVar. +class WindowedIndexVar : public util::Comparable, public IndexVarInterface { +public: + WindowedIndexVar(IndexVar base, int lo = -1, int hi = -1); + ~WindowedIndexVar() = default; + + /// getIndexVar returns the underlying IndexVar. + IndexVar getIndexVar() const; + + /// get{Lower,Upper}Bound returns the {lower,upper} bound of the window of + /// this index variable. + int getLowerBound() const; + int getUpperBound() const; + +private: + struct Content; + std::shared_ptr content; +}; + /// Index variables are used to index into tensors in index expressions, and /// they represent iteration over the tensor modes they index into. -class IndexVar : public util::Comparable { +class IndexVar : public util::Comparable, public IndexVarInterface { public: IndexVar(); + ~IndexVar() = default; IndexVar(const std::string& name); /// Returns the name of the index variable. @@ -813,6 +881,8 @@ class IndexVar : public util::Comparable { friend bool operator==(const IndexVar&, const IndexVar&); friend bool operator<(const IndexVar&, const IndexVar&); + /// Indexing into an IndexVar returns a window into it. + WindowedIndexVar operator()(int lo, int hi); private: struct Content; @@ -823,7 +893,15 @@ struct IndexVar::Content { std::string name; }; +struct WindowedIndexVar::Content { + IndexVar base; + int lo; + int hi; +}; + +std::ostream& operator<<(std::ostream&, const std::shared_ptr&); std::ostream& operator<<(std::ostream&, const IndexVar&); +std::ostream& operator<<(std::ostream&, const WindowedIndexVar&); /// A suchthat statement provides a set of IndexVarRel that constrain /// the iteration space for the child concrete index notation diff --git a/include/taco/index_notation/index_notation_nodes.h b/include/taco/index_notation/index_notation_nodes.h index 95439cd6b..f20584b85 100644 --- a/include/taco/index_notation/index_notation_nodes.h +++ b/include/taco/index_notation/index_notation_nodes.h @@ -26,6 +26,23 @@ struct AccessNode : public IndexExprNode { TensorVar tensorVar; std::vector indexVars; + + // An AccessNode carries the windowing information for an IndexVar + TensorVar + // combination. windowedModes contains the lower and upper bounds of each + // windowed mode (0-indexed). + struct Window { + int lo; + int hi; + friend bool operator==(const Window& a, const Window& b) { + return a.lo == b.lo && a.hi == b.hi; + } + }; + std::map windowedModes; + +protected: + /// Initialize an AccessNode with just a TensorVar. If this constructor is used, + /// then indexVars must be set afterwards. + explicit AccessNode(TensorVar tensorVar) : IndexExprNode(tensorVar.getType().getDataType()), tensorVar(tensorVar) {} }; struct LiteralNode : public IndexExprNode { diff --git a/include/taco/lower/iterator.h b/include/taco/lower/iterator.h index 1d871ffaa..0fe765653 100644 --- a/include/taco/lower/iterator.h +++ b/include/taco/lower/iterator.h @@ -159,6 +159,17 @@ class Iterator : public util::Comparable { /// Returns true if the iterator is defined, false otherwise. bool defined() const; + /// Methods for querying and operating on windowed tensor modes. + + /// isWindowed returns true if this iterator is operating over a window + /// of a tensor mode. + bool isWindowed() const; + + /// getWindow{Lower,Upper}Bound return the {Lower,Upper} bound of the + /// window that this iterator operates over. + ir::Expr getWindowLowerBound() const; + ir::Expr getWindowUpperBound() const; + friend bool operator==(const Iterator&, const Iterator&); friend bool operator<(const Iterator&, const Iterator&); friend std::ostream& operator<<(std::ostream&, const Iterator&); @@ -169,6 +180,10 @@ class Iterator : public util::Comparable { Iterator(std::shared_ptr content); void setChild(const Iterator& iterator) const; + + friend class Iterators; + /// setWindowBounds sets the window bounds of this iterator. + void setWindowBounds(ir::Expr lo, ir::Expr hi); }; /** diff --git a/include/taco/lower/lowerer_impl.h b/include/taco/lower/lowerer_impl.h index 39675d450..afc936145 100644 --- a/include/taco/lower/lowerer_impl.h +++ b/include/taco/lower/lowerer_impl.h @@ -375,9 +375,30 @@ class LowererImpl : public util::Uncopyable { /// Create an expression to index into a tensor value array. ir::Expr generateValueLocExpr(Access access) const; - /// Expression that evaluates to true if none of the iteratators are exhausted + /// Expression that evaluates to true if none of the iterators are exhausted ir::Expr checkThatNoneAreExhausted(std::vector iterators); + /// Expression that returns the beginning of a window to iterate over + /// in a compressed iterator. It is used when operating over windows of + /// tensors, instead of the full tensor. + ir::Expr searchForStartOfWindowPosition(Iterator iterator, ir::Expr start, ir::Expr end); + + /// Statement that guards against going out of bounds of the window that + /// the input iterator was configured with. + ir::Stmt upperBoundGuardForWindowPosition(Iterator iterator, ir::Expr access); + + /// Expression that recovers a canonical index variable from a position in + /// a windowed position iterator. A windowed position iterator iterates over + /// values in the range [lo, hi). This expression projects values in that + /// range back into the canonical range of [0, n). + ir::Expr projectWindowedPositionToCanonicalSpace(Iterator iterator, ir::Expr expr); + + // projectCanonicalSpaceToWindowedPosition is the opposite of + // projectWindowedPositionToCanonicalSpace. It takes an expression ranging + // through the canonical space of [0, n) and projects it up to the windowed + // range of [lo, hi). + ir::Expr projectCanonicalSpaceToWindowedPosition(Iterator iterator, ir::Expr expr); + private: bool assemble; bool compute; diff --git a/include/taco/tensor.h b/include/taco/tensor.h index 313758252..1eddecf2b 100644 --- a/include/taco/tensor.h +++ b/include/taco/tensor.h @@ -386,6 +386,9 @@ class TensorBase { /// Create an index expression that accesses (reads or writes) this tensor. Access operator()(const std::vector& indices); + /// Create a possibly windowed index expression that accesses (reads or writes) this tensor. + Access operator()(const std::vector>& indices); + /// Create an index expression that accesses (reads) this (scalar) tensor. Access operator()(); @@ -621,6 +624,20 @@ class Tensor : public TensorBase { template Access operator()(const IndexVars&... indices); + /// The below two Access methods are used to allow users to access tensors + /// with a mix of IndexVar's and WindowedIndexVar's. This allows natural + /// expressions like + /// A(i, j(1, 3)) = B(i(2, 4), j) * C(i(5, 7), j(7, 9)) + /// to be constructed without adjusting the original API. + + /// Create an index expression that accesses (reads, writes) this tensor. + template + Access operator()(const WindowedIndexVar& first, const IndexVars&... indices); + + /// Create an index expression that accesses (reads, writes) this tensor. + template + Access operator()(const IndexVar& first, const IndexVars&... indices); + ScalarAccess operator()(const std::vector& indices); /// Create an index expression that accesses (reads) this tensor. @@ -629,6 +646,15 @@ class Tensor : public TensorBase { /// Assign an expression to a scalar tensor. void operator=(const IndexExpr& expr); + +private: + /// The _access method family is the template level implementation of + /// Access() expressions containing mixes of IndexVar and WindowedIndexVar objects. + template + std::vector> _access(const First& first, const Rest&... rest); + std::vector> _access(); + template + Access _access_wrapper(const Args&... args); }; template @@ -1084,6 +1110,63 @@ Access Tensor::operator()(const IndexVars&... indices) { return TensorBase::operator()(std::vector{indices...}); } +/// The _access() methods perform primitive recursion on the input variadic template. +/// This means that each instance of the _access method matches on the first element +/// of the variadic template parameter pack, performs an "action", then recurses +/// with the remaining elements in the parameter pack through a recursive call +/// to _access. Since this is recursion, we need a base case. The empty argument +/// instance of _access returns an empty value of the desired type, in this case +/// a vector of IndexVarInterface. +template +std::vector> Tensor::_access() { + return std::vector>{}; +} + +/// The recursive case of _access matches on the first element, and attempts to +/// create a shared_ptr out of it. It then makes a recursive call to get a +/// vector with the rest of the elements. Then, it pushes the first element onto +/// the back of the vector -- this check ensures that the type First is indeed +/// a member of IndexVarInterface. +template +template +std::vector> Tensor::_access(const First& first, const Rest&... rest) { + auto var = std::make_shared(first); + auto ret = _access(rest...); + ret.push_back(var); + return ret; +} + +/// _access_wrapper just calls into _access and reverses the result to get the initial +/// order of the arguments. +template +template +Access Tensor::_access_wrapper(const Args&... args) { + auto resultReversed = this->_access(args...); + std::vector> result; + result.reserve(resultReversed.size()); + for (auto it = resultReversed.rbegin(); it != resultReversed.rend(); it++) { + result.push_back(*it); + } + return TensorBase::operator()(result); +} + +/// We have to case on whether the first argument is an IndexVar or a WindowedIndexVar +/// so that the template engine can differentiate between the two versions. +// TODO (rohany): I think that there is a chance here that I might not need these +// two methods if I have _access. I think that instead I would just have to remove +// the other operator() methods that also take in IndexVar... so that there isn't +// any confusion. +template +template +Access Tensor::operator()(const IndexVar& first, const IndexVars&... indices) { + return this->_access_wrapper(first, indices...); +} +template +template +Access Tensor::operator()(const WindowedIndexVar& first, const IndexVars&... indices) { + return this->_access_wrapper(first, indices...); +} + template ScalarAccess Tensor::operator()(const std::vector& indices) { taco_uassert(indices.size() == (size_t)getOrder()) diff --git a/src/codegen/codegen_c.cpp b/src/codegen/codegen_c.cpp index a0c6c9591..565757116 100644 --- a/src/codegen/codegen_c.cpp +++ b/src/codegen/codegen_c.cpp @@ -516,7 +516,7 @@ void CodeGen_C::visit(const Allocate* op) { stream << ", "; } else { - stream << "malloc("; + stream << "calloc(1, "; } stream << "sizeof(" << elementType << ")"; stream << " * "; diff --git a/src/error/error_checks.cpp b/src/error/error_checks.cpp index 9fc067d2d..7516ab3d2 100644 --- a/src/error/error_checks.cpp +++ b/src/error/error_checks.cpp @@ -53,6 +53,14 @@ std::pair dimensionsTypecheck(const std::vector& resultV for (size_t mode = 0; mode < readNode->indexVars.size(); mode++) { IndexVar var = readNode->indexVars[mode]; Dimension dimension = readNode->tensorVar.getType().getShape().getDimension(mode); + + // If this access has windowed modes, use the dimensions of those windows + // as the shape, rather than the shape of the underlying tensor. + auto a = Access(readNode); + if (a.isModeWindowed(mode)) { + dimension = Dimension(a.getWindowUpperBound(mode) - a.getWindowLowerBound(mode)); + } + if (util::contains(indexVarDims,var) && indexVarDims.at(var) != dimension) { errors.push_back(addDimensionError(var, indexVarDims.at(var), dimension)); } else { diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp index f00557eeb..80c32a2c2 100644 --- a/src/index_notation/index_notation.cpp +++ b/src/index_notation/index_notation.cpp @@ -185,7 +185,7 @@ struct Isomorphic : public IndexNotationVisitorStrict { return; } } - eq = true; + eq = anode->windowedModes == bnode->windowedModes; } void visit(const LiteralNode* anode) { @@ -758,11 +758,45 @@ const std::vector& Access::getIndexVars() const { return getNode(*this)->indexVars; } +bool Access::hasWindowedModes() const { + return !getNode(*this)->windowedModes.empty(); +} + +bool Access::isModeWindowed(int mode) const { + auto node = getNode(*this); + return node->windowedModes.find(mode) != node->windowedModes.end(); +} + +int Access::getWindowLowerBound(int mode) const { + taco_iassert(this->isModeWindowed(mode)); + return getNode(*this)->windowedModes.at(mode).lo; +} + +int Access::getWindowUpperBound(int mode) const { + taco_iassert(this->isModeWindowed(mode)); + return getNode(*this)->windowedModes.at(mode).hi; +} + static void check(Assignment assignment) { - auto tensorVar = assignment.getLhs().getTensorVar(); - auto freeVars = assignment.getLhs().getIndexVars(); + auto lhs = assignment.getLhs(); + auto tensorVar = lhs.getTensorVar(); + auto freeVars = lhs.getIndexVars(); auto indexExpr = assignment.getRhs(); auto shape = tensorVar.getType().getShape(); + + // If the LHS access has any windowed modes, use the dimensions of those + // windows as the shape, rather than the shape of the underlying tensor. + if (lhs.hasWindowedModes()) { + vector dims(shape.getOrder()); + for (int i = 0; i < shape.getOrder();i++) { + dims[i] = shape.getDimension(i); + if (lhs.isModeWindowed(i)) { + dims[i] = Dimension(lhs.getWindowUpperBound(i) - lhs.getWindowLowerBound(i)); + } + } + shape = Shape(dims); + } + auto typecheck = error::dimensionsTypecheck(freeVars, indexExpr, shape); taco_uassert(typecheck.first) << error::expr_dimension_mismatch << " " << typecheck.second; } @@ -1800,6 +1834,10 @@ std::string IndexVar::getName() const { return content->name; } +WindowedIndexVar IndexVar::operator()(int lo, int hi) { + return WindowedIndexVar(*this, lo, hi); +} + bool operator==(const IndexVar& a, const IndexVar& b) { return a.content == b.content; } @@ -1808,10 +1846,42 @@ bool operator<(const IndexVar& a, const IndexVar& b) { return a.content < b.content; } +std::ostream& operator<<(std::ostream& os, const std::shared_ptr& var) { + std::stringstream ss; + IndexVarInterface::match(var, [&](std::shared_ptr ivar) { + ss << *ivar; + }, [&](std::shared_ptr wvar) { + ss << *wvar; + }); + return os << ss.str(); +} + std::ostream& operator<<(std::ostream& os, const IndexVar& var) { return os << var.getName(); } +std::ostream& operator<<(std::ostream& os, const WindowedIndexVar& var) { + return os << var.getIndexVar(); +} + +WindowedIndexVar::WindowedIndexVar(IndexVar base, int lo, int hi) : content( new Content){ + this->content->base = base; + this->content->lo = lo; + this->content->hi = hi; +} + +IndexVar WindowedIndexVar::getIndexVar() const { + return this->content->base; +} + +int WindowedIndexVar::getLowerBound() const { + return this->content->lo; +} + +int WindowedIndexVar::getUpperBound() const { + return this->content->hi; +} + // class TensorVar struct TensorVar::Content { int id; @@ -1951,6 +2021,20 @@ static bool isValid(Assignment assignment, string* reason) { auto result = lhs.getTensorVar(); auto freeVars = lhs.getIndexVars(); auto shape = result.getType().getShape(); + + // If the LHS access has any windowed modes, use the dimensions of those + // windows as the shape, rather than the shape of the underlying tensor. + if (lhs.hasWindowedModes()) { + vector dims(shape.getOrder()); + for (int i = 0; i < shape.getOrder();i++) { + dims[i] = shape.getDimension(i); + if (lhs.isModeWindowed(i)) { + dims[i] = Dimension(lhs.getWindowUpperBound(i) - lhs.getWindowLowerBound(i)); + } + } + shape = Shape(dims); + } + auto typecheck = error::dimensionsTypecheck(freeVars, rhs, shape); if (!typecheck.first) { *reason = error::expr_dimension_mismatch + " " + typecheck.second; diff --git a/src/lower/iterator.cpp b/src/lower/iterator.cpp index 9f5b7dd4b..4f71f5ac7 100644 --- a/src/lower/iterator.cpp +++ b/src/lower/iterator.cpp @@ -28,6 +28,16 @@ struct Iterator::Content { ir::Expr segendVar; ir::Expr validVar; ir::Expr beginVar; + + // Window represents a window (or slice) into a tensor mode, given by + // the expressions representing an upper and lower bound. An iterator + // is windowed if window is not NULL. + struct Window { + ir::Expr lo; + ir::Expr hi; + Window(ir::Expr _lo, ir::Expr _hi) : lo(_lo), hi(_hi) {}; + }; + std::unique_ptr window; }; Iterator::Iterator() : content(nullptr) { @@ -323,6 +333,24 @@ bool Iterator::defined() const { return content != nullptr; } +bool Iterator::isWindowed() const { + return this->content->window != nullptr; +} + +ir::Expr Iterator::getWindowLowerBound() const { + taco_iassert(this->isWindowed()); + return this->content->window->lo; +} + +ir::Expr Iterator::getWindowUpperBound() const { + taco_iassert(this->isWindowed()); + return this->content->window->hi; +} + +void Iterator::setWindowBounds(ir::Expr lo, ir::Expr hi) { + this->content->window = std::make_unique(Content::Window(lo, hi)); +} + bool operator==(const Iterator& a, const Iterator& b) { if (a.isDimensionIterator() && b.isDimensionIterator()) { return a.getIndexVar() == b.getIndexVar(); @@ -425,7 +453,7 @@ Iterators::Iterators(IndexStmt stmt, const map& tensorVars) }) ); - // Reverse the levelITerators map for fast modeAccess lookup + // Reverse the levelIterators map for fast modeAccess lookup for (auto& iterator : content->levelIterators) { content->modeAccesses.insert({iterator.second, iterator.first}); } @@ -440,6 +468,8 @@ Iterators::createAccessIterators(Access access, Format format, Expr tensorIR, Pr << tensorConcrete << ", Format" << format; Shape shape = tensorConcrete.getType().getShape(); + // TODO (rohany): What's the deal with this parent iterator? It seems like + // I don't need to attempt to window it, because it doesn't "have" a mode. Iterator parent(tensorIR); content->levelIterators.insert({{access,0}, parent}); @@ -472,6 +502,15 @@ Iterators::createAccessIterators(Access access, Format format, Expr tensorIR, Pr string name = iteratorIndexVar.getName() + tensorConcrete.getName(); Iterator iterator(iteratorIndexVar, tensorIR, mode, parent, name, true); + + // If the access that this iterator corresponds to has a window, then + // adjust the iterator appropriately. + if (access.isModeWindowed(modeNumber)) { + auto lo = ir::Literal::make(access.getWindowLowerBound(modeNumber)); + auto hi = ir::Literal::make(access.getWindowUpperBound(modeNumber)); + iterator.setWindowBounds(lo, hi); + } + content->levelIterators.insert({{access,modeNumber+1}, iterator}); if (iteratorIndexVar != indexVar) { // add to allowing lowering to find correct iterator for this pos variable diff --git a/src/lower/lowerer_impl.cpp b/src/lower/lowerer_impl.cpp index 717ae3884..268986d3e 100644 --- a/src/lower/lowerer_impl.cpp +++ b/src/lower/lowerer_impl.cpp @@ -168,17 +168,32 @@ LowererImpl::lower(IndexStmt stmt, string name, vector indexVars = getIndexVars(stmt); for (auto& indexVar : indexVars) { Expr dimension; + // getDimension extracts an Expr that holds the dimension + // of a particular tensor mode. This Expr should be used as a loop bound + // when iterating over the dimension of the target tensor. + auto getDimension = [&](const TensorVar& tv, const Access& a, int mode) { + // If the tensor mode is windowed, then the dimension for iteration is the bounds + // of the window. Otherwise, it is the actual dimension of the mode. + if (a.isModeWindowed(mode)) { + // The mode value used to access .levelIterator is 1-indexed, while + // the mode input to getDimension is 0-indexed. So, we shift it up by 1. + auto iter = iterators.levelIterator(ModeAccess(a, mode+1)); + return ir::Sub::make(iter.getWindowUpperBound(), iter.getWindowLowerBound()); + } else { + return GetProperty::make(tensorVars.at(tv), TensorProperty::Dimension, mode); + } + }; match(stmt, function([&]( const AssignmentNode* n, Matcher* m) { m->match(n->rhs); if (!dimension.defined()) { auto ivars = n->lhs.getIndexVars(); + auto tv = n->lhs.getTensorVar(); int loc = (int)distance(ivars.begin(), find(ivars.begin(),ivars.end(), indexVar)); - if(!util::contains(temporariesSet, n->lhs.getTensorVar())) { - dimension = GetProperty::make(tensorVars.at(n->lhs.getTensorVar()), - TensorProperty::Dimension, loc); + if(!util::contains(temporariesSet, tv)) { + dimension = getDimension(tv, n->lhs, loc); } } }), @@ -189,8 +204,7 @@ LowererImpl::lower(IndexStmt stmt, string name, find(indexVars.begin(),indexVars.end(), indexVar)); if(!util::contains(temporariesSet, n->tensorVar)) { - dimension = GetProperty::make(tensorVars.at(n->tensorVar), - TensorProperty::Dimension, loc); + dimension = getDimension(n->tensorVar, Access(n), loc); } } }) @@ -1002,9 +1016,16 @@ Stmt LowererImpl::lowerForallPosition(Forall forall, Iterator iterator, { Expr coordinate = getCoordinateVar(forall.getIndexVar()); Stmt declareCoordinate = Stmt(); + Stmt boundsGuard = Stmt(); if (provGraph.isCoordVariable(forall.getIndexVar())) { Expr coordinateArray = iterator.posAccess(iterator.getPosVar(), coordinates(iterator)).getResults()[0]; + // If the iterator is windowed, we must recover the coordinate index + // variable from the windowed space. + if (iterator.isWindowed()) { + coordinateArray = this->projectWindowedPositionToCanonicalSpace(iterator, coordinateArray); + boundsGuard = this->upperBoundGuardForWindowPosition(iterator, coordinate); + } declareCoordinate = VarDecl::make(coordinate, coordinateArray); } if (forall.getParallelUnit() != ParallelUnit::NotParallel && forall.getOutputRaceStrategy() == OutputRaceStrategy::Atomics) { @@ -1038,6 +1059,11 @@ Stmt LowererImpl::lowerForallPosition(Forall forall, Iterator iterator, boundsCompute = bounds.compute(); startBound = bounds[0]; endBound = bounds[1]; + // If we have a window on this iterator, then search for the start of + // the window rather than starting at the beginning of the level. + if (iterator.isWindowed()) { + startBound = this->searchForStartOfWindowPosition(iterator, startBound, endBound); + } } else { taco_iassert(iterator.isOrdered() && iterator.getParent().isOrdered()); taco_iassert(iterator.isCompact() && iterator.getParent().isCompact()); @@ -1059,10 +1085,12 @@ Stmt LowererImpl::lowerForallPosition(Forall forall, Iterator iterator, && forall.getOutputRaceStrategy() != OutputRaceStrategy::ParallelReduction && !ignoreVectorize) { kind = LoopKind::Runtime; } + // Loop with preamble and postamble - return Block::blanks(boundsCompute, + return Block::blanks( + boundsCompute, For::make(iterator.getPosVar(), startBound, endBound, 1, - Block::make(declareCoordinate, body), + Block::make(declareCoordinate, boundsGuard, body), kind, ignoreVectorize ? ParallelUnit::NotParallel : forall.getParallelUnit(), ignoreVectorize ? 0 : forall.getUnrollFactor()), posAppend); @@ -1321,9 +1349,18 @@ Stmt LowererImpl::resolveCoordinate(std::vector mergers, ir::Expr coor // Just one position iterator so it is the resolved coordinate ModeFunction posAccess = merger.posAccess(merger.getPosVar(), coordinates(merger)); - Stmt resolution = emitVarDecl ? VarDecl::make(coordinate, posAccess[0]) : Assign::make(coordinate, posAccess[0]); + auto access = posAccess[0]; + auto guard = Stmt(); + // If the iterator is windowed, we must recover the coordinate index + // variable from the windowed space. + if (merger.isWindowed()) { + access = this->projectWindowedPositionToCanonicalSpace(merger, access); + guard = this->upperBoundGuardForWindowPosition(merger, coordinate); + } + Stmt resolution = emitVarDecl ? VarDecl::make(coordinate, access) : Assign::make(coordinate, access); return Block::make(posAccess.compute(), - resolution); + resolution, + guard); } else if (merger.hasCoordIter()) { taco_not_supported_yet; @@ -2262,7 +2299,6 @@ Stmt LowererImpl::zeroInitValues(Expr tensor, Expr begin, Expr size) { return For::make(p, lower, upper, 1, zeroInit, parallel); } - Stmt LowererImpl::declLocatePosVars(vector locators) { vector result; for (Iterator& locator : locators) { @@ -2284,7 +2320,14 @@ Stmt LowererImpl::declLocatePosVars(vector locators) { continue; // these will be recovered with separate procedure } do { - ModeFunction locate = locateIterator.locate(coordinates(locateIterator)); + auto coords = coordinates(locateIterator); + // If this dimension iterator operates over a window, then it needs + // to be projected up to the window's iteration space. + if (locateIterator.isWindowed()) { + auto expr = coords[coords.size() - 1]; + coords[coords.size() - 1] = this->projectCanonicalSpaceToWindowedPosition(locateIterator, expr); + } + ModeFunction locate = locateIterator.locate(coords); taco_iassert(isValue(locate.getResults()[1], true)); Stmt declarePosVar = VarDecl::make(locateIterator.getPosVar(), locate.getResults()[0]); @@ -2378,6 +2421,11 @@ Stmt LowererImpl::codeToInitializeIteratorVar(Iterator iterator, vectoriterators)[coordinateVar][0]; if (binarySearchTarget != underivedBounds[coordinateVar][0]) { + // If we have a window, then we need to project up the binary search target + // into the window rather than the beginning of the level. + if (iterator.isWindowed()) { + binarySearchTarget = this->projectCanonicalSpaceToWindowedPosition(iterator, binarySearchTarget); + } result.push_back(VarDecl::make(iterator.getBeginVar(), binarySearchTarget)); vector binarySearchArgs = { @@ -2394,7 +2442,13 @@ Stmt LowererImpl::codeToInitializeIteratorVar(Iterator iterator, vectorsearchForStartOfWindowPosition(iterator, bounds[0], bounds[1]); + } + result.push_back(VarDecl::make(iterVar, bound)); } result.push_back(VarDecl::make(endVar, bounds[1])); @@ -2556,13 +2610,22 @@ Stmt LowererImpl::codeToLoadCoordinatesFromPosIterators(vector iterato ModeFunction posAccess = posIter.posAccess(posIter.getPosVar(), coordinates(posIter)); loadPosIterCoordinateStmts.push_back(posAccess.compute()); + auto access = posAccess[0]; + // If this iterator is windowed, then it needs to be projected down to + // recover the coordinate variable. + // TODO (rohany): Would be cleaner to have this logic be moved into the + // ModeFunction, rather than having to check in some places? + if (posIter.isWindowed()) { + access = this->projectWindowedPositionToCanonicalSpace(posIter, access); + } if (declVars) { - loadPosIterCoordinateStmts.push_back(VarDecl::make(posIter.getCoordVar(), - posAccess[0])); + loadPosIterCoordinateStmts.push_back(VarDecl::make(posIter.getCoordVar(), access)); } else { - loadPosIterCoordinateStmts.push_back(Assign::make(posIter.getCoordVar(), - posAccess[0])); + loadPosIterCoordinateStmts.push_back(Assign::make(posIter.getCoordVar(), access)); + } + if (posIter.isWindowed()) { + loadPosIterCoordinateStmts.push_back(this->upperBoundGuardForWindowPosition(posIter, posIter.getCoordVar())); } } loadPosIterCoordinates = Block::make(loadPosIterCoordinateStmts); @@ -2701,4 +2764,33 @@ Expr LowererImpl::checkThatNoneAreExhausted(std::vector iterators) : Lt::make(iterators[0].getIteratorVar(), iterators[0].getEndVar()); } +Expr LowererImpl::searchForStartOfWindowPosition(Iterator iterator, ir::Expr start, ir::Expr end) { + taco_iassert(iterator.isWindowed()); + vector args = { + // Search over the `crd` array of the level, + iterator.getMode().getModePack().getArray(1), + // between the start and end position, + start, end, + // for the beginning of the window. + iterator.getWindowLowerBound(), + }; + return Call::make("taco_binarySearchAfter", args, Datatype::UInt64); +} + +Stmt LowererImpl::upperBoundGuardForWindowPosition(Iterator iterator, ir::Expr access) { + taco_iassert(iterator.isWindowed()); + return ir::IfThenElse::make( + ir::Gte::make(access, ir::Sub::make(iterator.getWindowUpperBound(), iterator.getWindowLowerBound())), + ir::Break::make() + ); +} + +Expr LowererImpl::projectWindowedPositionToCanonicalSpace(Iterator iterator, ir::Expr expr) { + return ir::Sub::make(expr, iterator.getWindowLowerBound()); +} + +Expr LowererImpl::projectCanonicalSpaceToWindowedPosition(Iterator iterator, ir::Expr expr) { + return ir::Add::make(expr, iterator.getWindowLowerBound()); +} + } diff --git a/src/tensor.cpp b/src/tensor.cpp index 5e3407337..dfb40e6f5 100644 --- a/src/tensor.cpp +++ b/src/tensor.cpp @@ -458,6 +458,31 @@ static inline map getTensors(const IndexExpr& expr); struct AccessTensorNode : public AccessNode { AccessTensorNode(TensorBase tensor, const std::vector& indices) : AccessNode(tensor.getTensorVar(), indices), tensor(tensor) {} + + AccessTensorNode(TensorBase tensor, const std::vector>& indices) + : AccessNode(tensor.getTensorVar()), tensor(tensor) { + // Create the vector of IndexVar to assign to this->indexVars. + std::vector ivars(indices.size()); + for (size_t i = 0; i < indices.size(); i++) { + auto var = indices[i]; + // Match on what the IndexVarInterface actually is. + IndexVarInterface::match(var, [&](std::shared_ptr ivar) { + ivars[i] = *ivar; + }, [&](std::shared_ptr wvar) { + ivars[i] = wvar->getIndexVar(); + auto lo = wvar->getLowerBound(); + auto hi = wvar->getUpperBound(); + taco_uassert(lo >= 0) << "slice lower bound must be >= 0"; + taco_uassert(hi <= tensor.getDimension(i)) << + "slice upper bound must be <= tensor dimension (" << tensor.getDimension(i) << ")"; + this->windowedModes[i].lo = lo; + this->windowedModes[i].hi = hi; + }); + } + // Initialize this->indexVars. + this->indexVars = std::move(ivars); + } + TensorBase tensor; virtual void setAssignment(const Assignment& assignment) { tensor.syncDependentTensors(); @@ -502,6 +527,14 @@ Access TensorBase::operator()(const std::vector& indices) { return Access(new AccessTensorNode(*this, indices)); } +Access TensorBase::operator()(const std::vector>& indices) { + taco_uassert(indices.size() == (size_t)getOrder()) + << "A tensor of order " << getOrder() << " must be indexed with " + << getOrder() << " variables, but is indexed with: " + << util::join(indices); + return Access(new AccessTensorNode(*this, indices)); +} + Access TensorBase::operator()() { return this->operator()(std::vector()); } diff --git a/test/test.cpp b/test/test.cpp index 597ddf2ef..a49f10ff7 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -1,3 +1,5 @@ +#include + #include "test.h" #include "taco/tensor.h" @@ -51,4 +53,18 @@ ostream& operator<<(ostream& os, const NotationTest& test) { return os; } +void ASSERT_THROWS_EXCEPTION_WITH_ERROR(std::function f, std::string err) { + EXPECT_THROW({ + try { + f(); + } catch (TacoException& e) { + // Catch and inspect the exception to make sure that err is within it. + auto s = std::string(e.what()); + ASSERT_TRUE(s.find(err) != std::string::npos); + // Throw the exception back up to gtest. + throw; + } + }, TacoException); +} + }} diff --git a/test/test.h b/test/test.h index 04000bafa..3302bf81f 100644 --- a/test/test.h +++ b/test/test.h @@ -3,6 +3,7 @@ #include "gtest/gtest.h" +#include #include #include #include @@ -93,6 +94,10 @@ void ASSERT_COMPONENTS_EQUALS(vector>> expectedIndices, ASSERT_ARRAY_EQ(expectedValues, {(double*)storage.getValues().getData(),nnz}); } +// ASSERT_THROWS_EXCEPTION_WITH_ERROR asserts that the input function throws +// a TacoException with the input string err contained within the body. +void ASSERT_THROWS_EXCEPTION_WITH_ERROR(std::function f, std::string err); + struct NotationTest { NotationTest(IndexStmt actual, IndexStmt expected) : actual(actual), expected(expected) {} diff --git a/test/tests-windowing.cpp b/test/tests-windowing.cpp new file mode 100644 index 000000000..ddeeb5655 --- /dev/null +++ b/test/tests-windowing.cpp @@ -0,0 +1,242 @@ +#include "test.h" +#include "taco/tensor.h" +#include "taco/codegen/module.h" +#include "taco/index_notation/index_notation.h" +#include "taco/lower/lower.h" + +using namespace taco; + +// mixIndexing is a compilation test to ensure that we can index into a +// tensor with a mix of IndexVars and WindowedIndexVars. +TEST(windowing, mixIndexing) { + auto dim = 10; + Tensor a("a", {dim, dim, dim, dim, dim}, {Dense, Dense, Dense, Dense, Dense}); + IndexVar i, j, k, l, m; + auto w1 = a(i, j(1, 3), k, l(4, 5), m(6, 7)); + auto w2 = a(i(1, 3), j(2, 4), k, l, m(3, 5)); +} + +TEST(windowing, boundsChecks) { + Tensor a("a", {5}, {Dense}); + IndexVar i("i"); + ASSERT_THROWS_EXCEPTION_WITH_ERROR([&]() { a(i(-1, 4)); }, "slice lower bound"); + ASSERT_THROWS_EXCEPTION_WITH_ERROR([&]() { a(i(0, 10)); }, "slice upper bound"); +} + +// sliceMultipleWays tests that the same tensor can be sliced in different ways +// in the same expression. +TEST(windowing, sliceMultipleWays) { + auto dim = 10; + Tensor a("a", {dim}, {Dense}); + Tensor b("b", {dim}, {Sparse}); + Tensor c("c", {dim}, {Dense}); + Tensor expected("expected", {dim}, {Dense}); + for (int i = 0; i < dim; i++) { + a.insert({i}, i); + b.insert({i}, i); + } + expected.insert({2}, 10); + expected.insert({3}, 13); + a.pack(); b.pack(); expected.pack(); + IndexVar i("i"), j("j"); + + c(i(2, 4)) = a(i(5, 7)) + a(i(1, 3)) + b(i(4, 6)); + c.evaluate(); + ASSERT_TRUE(equals(expected, c)); +} + +// basic tests a windowed tensor expression with different combinations +// of tensor formats. +TEST(windowing, basic) { + Tensor expectedAdd("expectedAdd", {2, 2}, {Dense, Dense}); + expectedAdd.insert({0, 0}, 14); + expectedAdd.insert({0, 1}, 17); + expectedAdd.insert({1, 0}, 17); + expectedAdd.insert({1, 1}, 20); + expectedAdd.pack(); + Tensor expectedMul("expectedMul", {2, 2}, {Dense, Dense}); + expectedMul.insert({0, 0}, 64); + expectedMul.insert({0, 1}, 135); + expectedMul.insert({1, 0}, 135); + expectedMul.insert({1, 1}, 240); + expectedMul.pack(); + Tensor d("d", {2, 2}, {Dense, Dense}); + + // These dimensions are chosen so that one is above the constant in `mode_format_dense.cpp:54` + // where the known stride is generated vs using the dimension. + // TODO (rohany): Change that constant to be in a header file and import it here. + for (auto& dim : {6, 20}) { + for (auto &x : {Dense, Sparse}) { + for (auto &y : {Dense, Sparse}) { + for (auto &z : {Dense, Sparse}) { + Tensor a("a", {dim, dim}, {Dense, x}); + Tensor b("b", {dim, dim}, {Dense, y}); + Tensor c("c", {dim, dim}, {Dense, z}); + for (int i = 0; i < dim; i++) { + for (int j = 0; j < dim; j++) { + a.insert({i, j}, i + j); + b.insert({i, j}, i + j); + c.insert({i, j}, i + j); + } + } + + a.pack(); + b.pack(); + c.pack(); + + IndexVar i, j; + d(i, j) = a(i(2, 4), j(2, 4)) + b(i(4, 6), j(4, 6)) + c(i(1, 3), j(1, 3)); + d.evaluate(); + ASSERT_TRUE(equals(expectedAdd, d)) + << endl << expectedAdd << endl << endl << d << endl + << dim << " " << x << " " << y << " " << z << endl; + + d(i, j) = a(i(2, 4), j(2, 4)) * b(i(4, 6), j(4, 6)) * c(i(1, 3), j(1, 3)); + d.evaluate(); + ASSERT_TRUE(equals(expectedMul, d)) + << endl << expectedMul << endl << endl << d << endl + << dim << " " << x << " " << y << " " << z << endl; + } + } + } + } +} + +// slicedOutput tests that operations can write to a window within an output tensor. +TEST(windowing, slicedOutput) { + auto dim = 10; + Tensor expected("expected", {10, 10}, {Dense, Dense}); + expected.insert({8, 8}, 12); + expected.insert({8, 9}, 14); + expected.insert({9, 8}, 14); + expected.insert({9, 9}, 16); + expected.pack(); + for (auto& x : {Dense, Sparse}) { + for (auto& y : {Dense, Sparse}) { + Tensor a("a", {dim, dim}, {Dense, x}); + Tensor b("b", {dim, dim}, {Dense, y}); + Tensor c("c", {dim, dim}, {Dense, Dense}); + for (int i = 0; i < dim; i++) { + for (int j = 0; j < dim; j++) { + a.insert({i, j}, i + j); + b.insert({i, j}, i + j); + } + } + a.pack(); + b.pack(); + + IndexVar i, j; + c(i(8, 10), j(8, 10)) = a(i(2, 4), j(2, 4)) + b(i(4, 6), j(4, 6)); + c.evaluate(); + ASSERT_TRUE(equals(expected, c)) + << endl << expected << endl << endl << c << endl + << dim << " " << x << " " << y << endl; + } + } +} + +// transformations tests how windowing interacts with sparse iteration space +// transformations and different mode formats. +TEST(windowing, transformations) { + auto dim = 10; + Tensor expected("expected", {2, 2}, {Dense, Dense}); + expected.insert({0, 0}, 12); + expected.insert({0, 1}, 14); + expected.insert({1, 0}, 14); + expected.insert({1, 1}, 16); + expected.pack(); + + IndexVar i("i"), j("j"), i1 ("i1"), i2 ("i2"); + auto testFn = [&](std::function modifier, std::vector formats) { + for (auto& format : formats) { + Tensor a("a", {dim, dim}, format); + Tensor b("b", {dim, dim}, format); + for (int i = 0; i < dim; i++) { + for (int j = 0; j < dim; j++) { + a.insert({i, j}, i + j); + b.insert({i, j}, i + j); + } + } + a.pack(); b.pack(); + + Tensor c("c", {2, 2}, {Dense, Dense}); + c(i, j) = a(i(2, 4), j(2, 4)) + b(i(4, 6), j(4, 6)); + auto stmt = c.getAssignment().concretize(); + c.compile(modifier(stmt)); + c.evaluate(); + equals(c, expected); + ASSERT_TRUE(equals(c, expected)) << endl << c << endl << expected << endl << format << endl; + } + }; + + std::vector allFormats = {{Dense, Dense}, {Dense, Sparse}, {Sparse, Dense}, {Sparse, Sparse}}; + testFn([&](IndexStmt stmt) { + return stmt.split(i, i1, i2, 4).unroll(i2, 4); + }, allFormats); + + // TODO (rohany): Can we only reorder these loops in the Dense,Dense case? It seems so. + testFn([&](IndexStmt stmt) { + return stmt.reorder(i, j); + }, {{Dense, Dense}}); + + // We can only (currently) parallelize the outer dimension loop if it is dense. + testFn([&](IndexStmt stmt) { + return stmt.parallelize(i, taco::ParallelUnit::CPUThread, taco::OutputRaceStrategy::NoRaces); + }, {{Dense, Dense}, {Dense, Sparse}}); +} + +// assignment tests assignments of and to windows in different combinations. +TEST(windowing, assignment) { + auto dim = 10; + + auto testFn = [&](Format srcFormat) { + Tensor A("A", {dim, dim}, srcFormat); + + for (int i = 0; i < dim; i++) { + for (int j = 0; j < dim; j++) { + A.insert({i, j}, i + j); + } + } + A.pack(); + + IndexVar i, j; + + // First assign a window of A to a window of B. + Tensor B("B", {dim, dim}, {Dense, Dense}); + B(i(2, 4), j(3, 5)) = A(i(4, 6), j(5, 7)); + B.evaluate(); + Tensor expected("expected", {dim, dim}, {Dense, Dense}); + expected.insert({2, 3}, 9); expected.insert({2, 4}, 10); + expected.insert({3, 3}, 10); expected.insert({3, 4}, 11); + expected.pack(); + ASSERT_TRUE(equals(B, expected)) << B << std::endl << expected << std::endl; + + // Assign a window of A to b. + B = Tensor("B", {2, 2}, {Dense, Dense}); + B(i, j) = A(i(4, 6), j(5, 7)); + B.evaluate(); + expected = Tensor("expected", {2, 2}, {Dense, Dense}); + expected.insert({0, 0}, 9); expected.insert({0, 1}, 10); + expected.insert({1, 0}, 10); expected.insert({1, 1}, 11); + expected.pack(); + ASSERT_TRUE(equals(B, expected)) << B << std::endl << expected << std::endl; + + // Assign A to a window of B. + A = Tensor("A", {2, 2}, srcFormat); + A.insert({0, 0}, 0); A.insert({0, 1}, 1); + A.insert({1, 0}, 1); A.insert({1, 1}, 2); + A.pack(); + B = Tensor("B", {dim, dim}, {Dense, Dense}); + B(i(4, 6), j(5, 7)) = A(i, j); + B.evaluate(); + expected = Tensor("expected", {dim, dim}, {Dense, Dense}); + expected.insert({4, 5}, 0); expected.insert({4, 6}, 1); + expected.insert({5, 5}, 1); expected.insert({5, 6}, 2); + expected.pack(); + ASSERT_TRUE(equals(B, expected)) << B << std::endl << expected << std::endl; + }; + + for (auto& x : {Dense, Sparse}) { + testFn({Dense, x}); + } +}