diff --git a/include/taco/index_notation/index_notation.h b/include/taco/index_notation/index_notation.h
index cacd1411c..a6886a1ae 100644
--- a/include/taco/index_notation/index_notation.h
+++ b/include/taco/index_notation/index_notation.h
@@ -1,6 +1,7 @@
 #ifndef TACO_INDEX_NOTATION_H
 #define TACO_INDEX_NOTATION_H
 
+#include <functional>
 #include <ostream>
 #include <string>
 #include <memory>
@@ -30,6 +31,7 @@ class Format;
 class Schedule;
 
 class IndexVar;
+class WindowedIndexVar;
 class TensorVar;
 
 class IndexExpr;
@@ -228,6 +230,16 @@ class Access : public IndexExpr {
   /// Returns the index variables used to index into the Access's TensorVar.
   const std::vector<IndexVar>& getIndexVars() const;
 
+  /// hasWindowedModes returns true if any accessed modes are windowed.
+  bool hasWindowedModes() const;
+
+  /// Returns whether or not the input mode (0-indexed) is windowed.
+  bool isModeWindowed(int mode) const;
+
+  /// Return the {lower,upper} bound of the window on the input mode (0-indexed).
+  int getWindowLowerBound(int mode) const;
+  int getWindowUpperBound(int mode) const;
+
   /// Assign the result of an expression to a left-hand-side tensor access.
   /// ```
   /// a(i) = b(i) * c(i);
@@ -800,11 +812,67 @@ class Multi : public IndexStmt {
 /// Create a multi index statement.
 Multi multi(IndexStmt stmt1, IndexStmt stmt2);
 
+/// IndexVarInterface is a marker superclass for IndexVar-like objects.
+/// It is intended to be used in situations where many IndexVar-like objects
+/// must be stored together, like when building an Access AST node where some
+/// of the access variables are windowed. Use cases for IndexVarInterface
+/// will inspect the underlying type of the IndexVarInterface. For sake of
+/// completeness, the current implementers of IndexVarInterface are:
+/// * IndexVar
+/// * WindowedIndexVar
+/// If this set changes, make sure to update the match function.
+class IndexVarInterface {
+public:
+  virtual ~IndexVarInterface() = default;
+
+  /// match performs a dynamic case analysis of the implementers of IndexVarInterface
+  /// as a utility for handling the different values within. It mimics the dynamic
+  /// type assertion of Go.
+  static void match(
+      std::shared_ptr<IndexVarInterface> ptr,
+      std::function<void(std::shared_ptr<IndexVar>)> ivarFunc,
+      std::function<void(std::shared_ptr<WindowedIndexVar>)> wvarFunc
+  ) {
+    auto iptr = std::dynamic_pointer_cast<IndexVar>(ptr);
+    auto wptr = std::dynamic_pointer_cast<WindowedIndexVar>(ptr);
+    if (iptr != nullptr) {
+      ivarFunc(iptr);
+    } else if (wptr != nullptr) {
+      wvarFunc(wptr);
+    } else {
+      taco_iassert("IndexVarInterface was not IndexVar or WindowedIndexVar");
+    }
+  }
+};
+
+/// WindowedIndexVar represents an IndexVar that has been windowed. For example,
+///   A(i) = B(i(2, 4))
+/// In this case, i(2, 4) is a WindowedIndexVar. WindowedIndexVar is defined
+/// before IndexVar so that IndexVar can return objects of type WindowedIndexVar.
+class WindowedIndexVar : public util::Comparable<WindowedIndexVar>, public IndexVarInterface {
+public:
+  WindowedIndexVar(IndexVar base, int lo = -1, int hi = -1);
+  ~WindowedIndexVar() = default;
+
+  /// getIndexVar returns the underlying IndexVar.
+  IndexVar getIndexVar() const;
+
+  /// get{Lower,Upper}Bound returns the {lower,upper} bound of the window of
+  /// this index variable.
+  int getLowerBound() const;
+  int getUpperBound() const;
+
+private:
+  struct Content;
+  std::shared_ptr<Content> content;
+};
+
 /// Index variables are used to index into tensors in index expressions, and
 /// they represent iteration over the tensor modes they index into.
-class IndexVar : public util::Comparable<IndexVar> {
+class IndexVar : public util::Comparable<IndexVar>, public IndexVarInterface {
 public:
   IndexVar();
+  ~IndexVar() = default;
   IndexVar(const std::string& name);
 
   /// Returns the name of the index variable.
@@ -813,6 +881,8 @@ class IndexVar : public util::Comparable<IndexVar> {
   friend bool operator==(const IndexVar&, const IndexVar&);
   friend bool operator<(const IndexVar&, const IndexVar&);
 
+  /// Indexing into an IndexVar returns a window into it.
+  WindowedIndexVar operator()(int lo, int hi);
 
 private:
   struct Content;
@@ -823,7 +893,15 @@ struct IndexVar::Content {
   std::string name;
 };
 
+struct WindowedIndexVar::Content {
+  IndexVar base;
+  int lo;
+  int hi;
+};
+
+std::ostream& operator<<(std::ostream&, const std::shared_ptr<IndexVarInterface>&);
 std::ostream& operator<<(std::ostream&, const IndexVar&);
+std::ostream& operator<<(std::ostream&, const WindowedIndexVar&);
 
 /// A suchthat statement provides a set of IndexVarRel that constrain
 /// the iteration space for the child concrete index notation
diff --git a/include/taco/index_notation/index_notation_nodes.h b/include/taco/index_notation/index_notation_nodes.h
index 95439cd6b..f20584b85 100644
--- a/include/taco/index_notation/index_notation_nodes.h
+++ b/include/taco/index_notation/index_notation_nodes.h
@@ -26,6 +26,23 @@ struct AccessNode : public IndexExprNode {
 
   TensorVar tensorVar;
   std::vector<IndexVar> indexVars;
+
+  // An AccessNode carries the windowing information for an IndexVar + TensorVar
+  // combination. windowedModes contains the lower and upper bounds of each
+  // windowed mode (0-indexed).
+  struct Window {
+    int lo;
+    int hi;
+    friend bool operator==(const Window& a, const Window& b) {
+      return a.lo == b.lo && a.hi == b.hi;
+    }
+  };
+  std::map<int, Window> windowedModes;
+
+protected:
+  /// Initialize an AccessNode with just a TensorVar. If this constructor is used,
+  /// then indexVars must be set afterwards.
+  explicit AccessNode(TensorVar tensorVar) : IndexExprNode(tensorVar.getType().getDataType()), tensorVar(tensorVar) {}
 };
 
 struct LiteralNode : public IndexExprNode {
diff --git a/include/taco/lower/iterator.h b/include/taco/lower/iterator.h
index 1d871ffaa..0fe765653 100644
--- a/include/taco/lower/iterator.h
+++ b/include/taco/lower/iterator.h
@@ -159,6 +159,17 @@ class Iterator : public util::Comparable<Iterator> {
   /// Returns true if the iterator is defined, false otherwise.
   bool defined() const;
 
+  /// Methods for querying and operating on windowed tensor modes.
+
+  /// isWindowed returns true if this iterator is operating over a window
+  /// of a tensor mode.
+  bool isWindowed() const;
+
+  /// getWindow{Lower,Upper}Bound return the {Lower,Upper} bound of the
+  /// window that this iterator operates over.
+  ir::Expr getWindowLowerBound() const;
+  ir::Expr getWindowUpperBound() const;
+
   friend bool operator==(const Iterator&, const Iterator&);
   friend bool operator<(const Iterator&, const Iterator&);
   friend std::ostream& operator<<(std::ostream&, const Iterator&);
@@ -169,6 +180,10 @@ class Iterator : public util::Comparable<Iterator> {
 
   Iterator(std::shared_ptr<Content> content);
   void setChild(const Iterator& iterator) const;
+
+  friend class Iterators;
+  /// setWindowBounds sets the window bounds of this iterator.
+  void setWindowBounds(ir::Expr lo, ir::Expr hi);
 };
 
 /**
diff --git a/include/taco/lower/lowerer_impl.h b/include/taco/lower/lowerer_impl.h
index 39675d450..afc936145 100644
--- a/include/taco/lower/lowerer_impl.h
+++ b/include/taco/lower/lowerer_impl.h
@@ -375,9 +375,30 @@ class LowererImpl : public util::Uncopyable {
   /// Create an expression to index into a tensor value array.
   ir::Expr generateValueLocExpr(Access access) const;
 
-  /// Expression that evaluates to true if none of the iteratators are exhausted
+  /// Expression that evaluates to true if none of the iterators are exhausted
   ir::Expr checkThatNoneAreExhausted(std::vector<Iterator> iterators);
 
+  /// Expression that returns the beginning of a window to iterate over
+  /// in a compressed iterator. It is used when operating over windows of
+  /// tensors, instead of the full tensor.
+  ir::Expr searchForStartOfWindowPosition(Iterator iterator, ir::Expr start, ir::Expr end);
+
+  /// Statement that guards against going out of bounds of the window that
+  /// the input iterator was configured with.
+  ir::Stmt upperBoundGuardForWindowPosition(Iterator iterator, ir::Expr access);
+
+  /// Expression that recovers a canonical index variable from a position in
+  /// a windowed position iterator. A windowed position iterator iterates over
+  /// values in the range [lo, hi). This expression projects values in that
+  /// range back into the canonical range of [0, n).
+  ir::Expr projectWindowedPositionToCanonicalSpace(Iterator iterator, ir::Expr expr);
+
+  // projectCanonicalSpaceToWindowedPosition is the opposite of
+  // projectWindowedPositionToCanonicalSpace. It takes an expression ranging
+  // through the canonical space of [0, n) and projects it up to the windowed
+  // range of [lo, hi).
+  ir::Expr projectCanonicalSpaceToWindowedPosition(Iterator iterator, ir::Expr expr);
+
 private:
   bool assemble;
   bool compute;
diff --git a/include/taco/tensor.h b/include/taco/tensor.h
index 313758252..1eddecf2b 100644
--- a/include/taco/tensor.h
+++ b/include/taco/tensor.h
@@ -386,6 +386,9 @@ class TensorBase {
   /// Create an index expression that accesses (reads or writes) this tensor.
   Access operator()(const std::vector<IndexVar>& indices);
 
+  /// Create a possibly windowed index expression that accesses (reads or writes) this tensor.
+  Access operator()(const std::vector<std::shared_ptr<IndexVarInterface>>& indices);
+
   /// Create an index expression that accesses (reads) this (scalar) tensor.
   Access operator()();
 
@@ -621,6 +624,20 @@ class Tensor : public TensorBase {
   template <typename... IndexVars>
   Access operator()(const IndexVars&... indices);
 
+  /// The below two Access methods are used to allow users to access tensors
+  /// with a mix of IndexVar's and WindowedIndexVar's. This allows natural
+  /// expressions like
+  ///   A(i, j(1, 3)) = B(i(2, 4), j) * C(i(5, 7), j(7, 9))
+  /// to be constructed without adjusting the original API.
+
+  /// Create an index expression that accesses (reads, writes) this tensor.
+  template <typename... IndexVars>
+  Access operator()(const WindowedIndexVar& first, const IndexVars&... indices);
+
+  /// Create an index expression that accesses (reads, writes) this tensor.
+  template <typename... IndexVars>
+  Access operator()(const IndexVar& first, const IndexVars&... indices);
+
   ScalarAccess<CType> operator()(const std::vector<int>& indices);
 
   /// Create an index expression that accesses (reads) this tensor.
@@ -629,6 +646,15 @@ class Tensor : public TensorBase {
 
   /// Assign an expression to a scalar tensor.
   void operator=(const IndexExpr& expr);
+
+private:
+  /// The _access method family is the template level implementation of
+  /// Access() expressions containing mixes of IndexVar and WindowedIndexVar objects.
+  template <typename First, typename... Rest>
+  std::vector<std::shared_ptr<IndexVarInterface>> _access(const First& first, const Rest&... rest);
+  std::vector<std::shared_ptr<IndexVarInterface>> _access();
+  template <typename... Args>
+  Access _access_wrapper(const Args&... args);
 };
 
 template <typename CType>
@@ -1084,6 +1110,63 @@ Access Tensor<CType>::operator()(const IndexVars&... indices) {
   return TensorBase::operator()(std::vector<IndexVar>{indices...});
 }
 
+/// The _access() methods perform primitive recursion on the input variadic template.
+/// This means that each instance of the _access method matches on the first element
+/// of the variadic template parameter pack, performs an "action", then recurses
+/// with the remaining elements in the parameter pack through a recursive call
+/// to _access. Since this is recursion, we need a base case. The empty argument
+/// instance of _access returns an empty value of the desired type, in this case
+/// a vector of IndexVarInterface.
+template <typename CType>
+std::vector<std::shared_ptr<IndexVarInterface>> Tensor<CType>::_access() {
+  return std::vector<std::shared_ptr<IndexVarInterface>>{};
+}
+
+/// The recursive case of _access matches on the first element, and attempts to
+/// create a shared_ptr out of it. It then makes a recursive call to get a
+/// vector with the rest of the elements. Then, it pushes the first element onto
+/// the back of the vector -- this check ensures that the type First is indeed
+/// a member of IndexVarInterface.
+template <typename CType>
+template <typename First, typename... Rest>
+std::vector<std::shared_ptr<IndexVarInterface>> Tensor<CType>::_access(const First& first, const Rest&... rest) {
+  auto var = std::make_shared<First>(first);
+  auto ret = _access(rest...);
+  ret.push_back(var);
+  return ret;
+}
+
+/// _access_wrapper just calls into _access and reverses the result to get the initial
+/// order of the arguments.
+template <typename CType>
+template <typename... Args>
+Access Tensor<CType>::_access_wrapper(const Args&... args) {
+  auto resultReversed = this->_access(args...);
+  std::vector<std::shared_ptr<IndexVarInterface>> result;
+  result.reserve(resultReversed.size());
+  for (auto it = resultReversed.rbegin(); it != resultReversed.rend(); it++) {
+    result.push_back(*it);
+  }
+  return TensorBase::operator()(result);
+}
+
+/// We have to case on whether the first argument is an IndexVar or a WindowedIndexVar
+/// so that the template engine can differentiate between the two versions.
+// TODO (rohany): I think that there is a chance here that I might not need these
+//  two methods if I have _access. I think that instead I would just have to remove
+//  the other operator() methods that also take in IndexVar... so that there isn't
+//  any confusion.
+template <typename CType>
+template <typename... IndexVars>
+Access Tensor<CType>::operator()(const IndexVar& first, const IndexVars&... indices) {
+  return this->_access_wrapper(first, indices...);
+}
+template <typename CType>
+template <typename... IndexVars>
+Access Tensor<CType>::operator()(const WindowedIndexVar& first, const IndexVars&... indices) {
+  return this->_access_wrapper(first, indices...);
+}
+
 template <typename CType>
 ScalarAccess<CType> Tensor<CType>::operator()(const std::vector<int>& indices) {
   taco_uassert(indices.size() == (size_t)getOrder())
diff --git a/src/codegen/codegen_c.cpp b/src/codegen/codegen_c.cpp
index a0c6c9591..565757116 100644
--- a/src/codegen/codegen_c.cpp
+++ b/src/codegen/codegen_c.cpp
@@ -516,7 +516,7 @@ void CodeGen_C::visit(const Allocate* op) {
     stream << ", ";
   }
   else {
-    stream << "malloc(";
+    stream << "calloc(1, ";
   }
   stream << "sizeof(" << elementType << ")";
   stream << " * ";
diff --git a/src/error/error_checks.cpp b/src/error/error_checks.cpp
index 9fc067d2d..7516ab3d2 100644
--- a/src/error/error_checks.cpp
+++ b/src/error/error_checks.cpp
@@ -53,6 +53,14 @@ std::pair<bool, string> dimensionsTypecheck(const std::vector<IndexVar>& resultV
     for (size_t mode = 0; mode < readNode->indexVars.size(); mode++) {
       IndexVar var = readNode->indexVars[mode];
       Dimension dimension = readNode->tensorVar.getType().getShape().getDimension(mode);
+
+      // If this access has windowed modes, use the dimensions of those windows
+      // as the shape, rather than the shape of the underlying tensor.
+      auto a = Access(readNode);
+      if (a.isModeWindowed(mode)) {
+        dimension = Dimension(a.getWindowUpperBound(mode) - a.getWindowLowerBound(mode));
+      }
+
       if (util::contains(indexVarDims,var) && indexVarDims.at(var) != dimension) {
         errors.push_back(addDimensionError(var, indexVarDims.at(var), dimension));
       } else {
diff --git a/src/index_notation/index_notation.cpp b/src/index_notation/index_notation.cpp
index f00557eeb..80c32a2c2 100644
--- a/src/index_notation/index_notation.cpp
+++ b/src/index_notation/index_notation.cpp
@@ -185,7 +185,7 @@ struct Isomorphic : public IndexNotationVisitorStrict {
         return;
       }
     }
-    eq = true;
+    eq = anode->windowedModes == bnode->windowedModes;
   }
 
   void visit(const LiteralNode* anode) {
@@ -758,11 +758,45 @@ const std::vector<IndexVar>& Access::getIndexVars() const {
   return getNode(*this)->indexVars;
 }
 
+bool Access::hasWindowedModes() const {
+  return !getNode(*this)->windowedModes.empty();
+}
+
+bool Access::isModeWindowed(int mode) const {
+  auto node = getNode(*this);
+  return node->windowedModes.find(mode) != node->windowedModes.end();
+}
+
+int Access::getWindowLowerBound(int mode) const {
+  taco_iassert(this->isModeWindowed(mode));
+  return getNode(*this)->windowedModes.at(mode).lo;
+}
+
+int Access::getWindowUpperBound(int mode) const {
+  taco_iassert(this->isModeWindowed(mode));
+  return getNode(*this)->windowedModes.at(mode).hi;
+}
+
 static void check(Assignment assignment) {
-  auto tensorVar = assignment.getLhs().getTensorVar();
-  auto freeVars = assignment.getLhs().getIndexVars();
+  auto lhs = assignment.getLhs();
+  auto tensorVar = lhs.getTensorVar();
+  auto freeVars = lhs.getIndexVars();
   auto indexExpr = assignment.getRhs();
   auto shape = tensorVar.getType().getShape();
+
+  // If the LHS access has any windowed modes, use the dimensions of those
+  // windows as the shape, rather than the shape of the underlying tensor.
+  if (lhs.hasWindowedModes()) {
+    vector<Dimension> dims(shape.getOrder());
+    for (int i = 0; i < shape.getOrder();i++) {
+      dims[i] = shape.getDimension(i);
+      if (lhs.isModeWindowed(i)) {
+        dims[i] = Dimension(lhs.getWindowUpperBound(i) - lhs.getWindowLowerBound(i));
+      }
+    }
+    shape = Shape(dims);
+  }
+
   auto typecheck = error::dimensionsTypecheck(freeVars, indexExpr, shape);
   taco_uassert(typecheck.first) << error::expr_dimension_mismatch << " " << typecheck.second;
 }
@@ -1800,6 +1834,10 @@ std::string IndexVar::getName() const {
   return content->name;
 }
 
+WindowedIndexVar IndexVar::operator()(int lo, int hi) {
+  return WindowedIndexVar(*this, lo, hi);
+}
+
 bool operator==(const IndexVar& a, const IndexVar& b) {
   return a.content == b.content;
 }
@@ -1808,10 +1846,42 @@ bool operator<(const IndexVar& a, const IndexVar& b) {
   return a.content < b.content;
 }
 
+std::ostream& operator<<(std::ostream& os, const std::shared_ptr<IndexVarInterface>& var) {
+  std::stringstream ss;
+  IndexVarInterface::match(var, [&](std::shared_ptr<IndexVar> ivar) {
+    ss << *ivar;
+  }, [&](std::shared_ptr<WindowedIndexVar> wvar) {
+    ss << *wvar;
+  });
+  return os << ss.str();
+}
+
 std::ostream& operator<<(std::ostream& os, const IndexVar& var) {
   return os << var.getName();
 }
 
+std::ostream& operator<<(std::ostream& os, const WindowedIndexVar& var) {
+  return os << var.getIndexVar();
+}
+
+WindowedIndexVar::WindowedIndexVar(IndexVar base, int lo, int hi) : content( new Content){
+  this->content->base = base;
+  this->content->lo = lo;
+  this->content->hi = hi;
+}
+
+IndexVar WindowedIndexVar::getIndexVar() const {
+  return this->content->base;
+}
+
+int WindowedIndexVar::getLowerBound() const {
+  return this->content->lo;
+}
+
+int WindowedIndexVar::getUpperBound() const {
+  return this->content->hi;
+}
+
 // class TensorVar
 struct TensorVar::Content {
   int id;
@@ -1951,6 +2021,20 @@ static bool isValid(Assignment assignment, string* reason) {
   auto result = lhs.getTensorVar();
   auto freeVars = lhs.getIndexVars();
   auto shape = result.getType().getShape();
+
+  // If the LHS access has any windowed modes, use the dimensions of those
+  // windows as the shape, rather than the shape of the underlying tensor.
+  if (lhs.hasWindowedModes()) {
+    vector<Dimension> dims(shape.getOrder());
+    for (int i = 0; i < shape.getOrder();i++) {
+      dims[i] = shape.getDimension(i);
+      if (lhs.isModeWindowed(i)) {
+        dims[i] = Dimension(lhs.getWindowUpperBound(i) - lhs.getWindowLowerBound(i));
+      }
+    }
+    shape = Shape(dims);
+  }
+
   auto typecheck = error::dimensionsTypecheck(freeVars, rhs, shape);
   if (!typecheck.first) {
     *reason = error::expr_dimension_mismatch + " " + typecheck.second;
diff --git a/src/lower/iterator.cpp b/src/lower/iterator.cpp
index 9f5b7dd4b..4f71f5ac7 100644
--- a/src/lower/iterator.cpp
+++ b/src/lower/iterator.cpp
@@ -28,6 +28,16 @@ struct Iterator::Content {
   ir::Expr segendVar;
   ir::Expr validVar;
   ir::Expr beginVar;
+
+  // Window represents a window (or slice) into a tensor mode, given by
+  // the expressions representing an upper and lower bound. An iterator
+  // is windowed if window is not NULL.
+  struct Window {
+      ir::Expr lo;
+      ir::Expr hi;
+      Window(ir::Expr _lo, ir::Expr _hi) : lo(_lo), hi(_hi) {};
+  };
+  std::unique_ptr<Window> window;
 };
 
 Iterator::Iterator() : content(nullptr) {
@@ -323,6 +333,24 @@ bool Iterator::defined() const {
   return content != nullptr;
 }
 
+bool Iterator::isWindowed() const {
+  return this->content->window != nullptr;
+}
+
+ir::Expr Iterator::getWindowLowerBound() const {
+  taco_iassert(this->isWindowed());
+  return this->content->window->lo;
+}
+
+ir::Expr Iterator::getWindowUpperBound() const {
+  taco_iassert(this->isWindowed());
+  return this->content->window->hi;
+}
+
+void Iterator::setWindowBounds(ir::Expr lo, ir::Expr hi) {
+  this->content->window = std::make_unique<Content::Window>(Content::Window(lo, hi));
+}
+
 bool operator==(const Iterator& a, const Iterator& b) {
   if (a.isDimensionIterator() && b.isDimensionIterator()) {
     return a.getIndexVar() == b.getIndexVar();
@@ -425,7 +453,7 @@ Iterators::Iterators(IndexStmt stmt, const map<TensorVar, Expr>& tensorVars)
     })
   );
 
-  // Reverse the levelITerators map for fast modeAccess lookup
+  // Reverse the levelIterators map for fast modeAccess lookup
   for (auto& iterator : content->levelIterators) {
     content->modeAccesses.insert({iterator.second, iterator.first});
   }
@@ -440,6 +468,8 @@ Iterators::createAccessIterators(Access access, Format format, Expr tensorIR, Pr
       << tensorConcrete << ", Format" << format;
   Shape shape = tensorConcrete.getType().getShape();
 
+  // TODO (rohany): What's the deal with this parent iterator? It seems like
+  //  I don't need to attempt to window it, because it doesn't "have" a mode.
   Iterator parent(tensorIR);
   content->levelIterators.insert({{access,0}, parent});
 
@@ -472,6 +502,15 @@ Iterators::createAccessIterators(Access access, Format format, Expr tensorIR, Pr
 
       string name = iteratorIndexVar.getName() + tensorConcrete.getName();
       Iterator iterator(iteratorIndexVar, tensorIR, mode, parent, name, true);
+
+      // If the access that this iterator corresponds to has a window, then
+      // adjust the iterator appropriately.
+      if (access.isModeWindowed(modeNumber)) {
+        auto lo = ir::Literal::make(access.getWindowLowerBound(modeNumber));
+        auto hi = ir::Literal::make(access.getWindowUpperBound(modeNumber));
+        iterator.setWindowBounds(lo, hi);
+      }
+
       content->levelIterators.insert({{access,modeNumber+1}, iterator});
       if (iteratorIndexVar != indexVar) {
         // add to allowing lowering to find correct iterator for this pos variable
diff --git a/src/lower/lowerer_impl.cpp b/src/lower/lowerer_impl.cpp
index 717ae3884..268986d3e 100644
--- a/src/lower/lowerer_impl.cpp
+++ b/src/lower/lowerer_impl.cpp
@@ -168,17 +168,32 @@ LowererImpl::lower(IndexStmt stmt, string name,
   vector<IndexVar> indexVars = getIndexVars(stmt);
   for (auto& indexVar : indexVars) {
     Expr dimension;
+    // getDimension extracts an Expr that holds the dimension
+    // of a particular tensor mode. This Expr should be used as a loop bound
+    // when iterating over the dimension of the target tensor.
+    auto getDimension = [&](const TensorVar& tv, const Access& a, int mode) {
+      // If the tensor mode is windowed, then the dimension for iteration is the bounds
+      // of the window. Otherwise, it is the actual dimension of the mode.
+      if (a.isModeWindowed(mode)) {
+        // The mode value used to access .levelIterator is 1-indexed, while
+        // the mode input to getDimension is 0-indexed. So, we shift it up by 1.
+        auto iter = iterators.levelIterator(ModeAccess(a, mode+1));
+        return ir::Sub::make(iter.getWindowUpperBound(), iter.getWindowLowerBound());
+      } else {
+        return GetProperty::make(tensorVars.at(tv), TensorProperty::Dimension, mode);
+      }
+    };
     match(stmt,
       function<void(const AssignmentNode*, Matcher*)>([&](
           const AssignmentNode* n, Matcher* m) {
         m->match(n->rhs);
         if (!dimension.defined()) {
           auto ivars = n->lhs.getIndexVars();
+          auto tv = n->lhs.getTensorVar();
           int loc = (int)distance(ivars.begin(),
                                   find(ivars.begin(),ivars.end(), indexVar));
-          if(!util::contains(temporariesSet, n->lhs.getTensorVar())) {
-            dimension = GetProperty::make(tensorVars.at(n->lhs.getTensorVar()),
-                                          TensorProperty::Dimension, loc);
+          if(!util::contains(temporariesSet, tv)) {
+            dimension = getDimension(tv, n->lhs, loc);
           }
         }
       }),
@@ -189,8 +204,7 @@ LowererImpl::lower(IndexStmt stmt, string name,
                                   find(indexVars.begin(),indexVars.end(),
                                        indexVar));
           if(!util::contains(temporariesSet, n->tensorVar)) {
-            dimension = GetProperty::make(tensorVars.at(n->tensorVar),
-                                          TensorProperty::Dimension, loc);
+            dimension = getDimension(n->tensorVar, Access(n), loc);
           }
         }
       })
@@ -1002,9 +1016,16 @@ Stmt LowererImpl::lowerForallPosition(Forall forall, Iterator iterator,
 {
   Expr coordinate = getCoordinateVar(forall.getIndexVar());
   Stmt declareCoordinate = Stmt();
+  Stmt boundsGuard = Stmt();
   if (provGraph.isCoordVariable(forall.getIndexVar())) {
     Expr coordinateArray = iterator.posAccess(iterator.getPosVar(),
                                               coordinates(iterator)).getResults()[0];
+    // If the iterator is windowed, we must recover the coordinate index
+    // variable from the windowed space.
+    if (iterator.isWindowed()) {
+      coordinateArray = this->projectWindowedPositionToCanonicalSpace(iterator, coordinateArray);
+      boundsGuard = this->upperBoundGuardForWindowPosition(iterator, coordinate);
+    }
     declareCoordinate = VarDecl::make(coordinate, coordinateArray);
   }
   if (forall.getParallelUnit() != ParallelUnit::NotParallel && forall.getOutputRaceStrategy() == OutputRaceStrategy::Atomics) {
@@ -1038,6 +1059,11 @@ Stmt LowererImpl::lowerForallPosition(Forall forall, Iterator iterator,
     boundsCompute = bounds.compute();
     startBound = bounds[0];
     endBound = bounds[1];
+    // If we have a window on this iterator, then search for the start of
+    // the window rather than starting at the beginning of the level.
+    if (iterator.isWindowed()) {
+        startBound = this->searchForStartOfWindowPosition(iterator, startBound, endBound);
+    }
   } else {
     taco_iassert(iterator.isOrdered() && iterator.getParent().isOrdered());
     taco_iassert(iterator.isCompact() && iterator.getParent().isCompact());
@@ -1059,10 +1085,12 @@ Stmt LowererImpl::lowerForallPosition(Forall forall, Iterator iterator,
            && forall.getOutputRaceStrategy() != OutputRaceStrategy::ParallelReduction && !ignoreVectorize) {
     kind = LoopKind::Runtime;
   }
+
   // Loop with preamble and postamble
-  return Block::blanks(boundsCompute,
+  return Block::blanks(
+                       boundsCompute,
                        For::make(iterator.getPosVar(), startBound, endBound, 1,
-                                 Block::make(declareCoordinate, body),
+                                 Block::make(declareCoordinate, boundsGuard, body),
                                  kind,
                                  ignoreVectorize ? ParallelUnit::NotParallel : forall.getParallelUnit(), ignoreVectorize ? 0 : forall.getUnrollFactor()),
                        posAppend);
@@ -1321,9 +1349,18 @@ Stmt LowererImpl::resolveCoordinate(std::vector<Iterator> mergers, ir::Expr coor
       // Just one position iterator so it is the resolved coordinate
       ModeFunction posAccess = merger.posAccess(merger.getPosVar(),
                                                 coordinates(merger));
-      Stmt resolution = emitVarDecl ? VarDecl::make(coordinate, posAccess[0]) : Assign::make(coordinate, posAccess[0]);
+      auto access = posAccess[0];
+      auto guard = Stmt();
+      // If the iterator is windowed, we must recover the coordinate index
+      // variable from the windowed space.
+      if (merger.isWindowed()) {
+        access = this->projectWindowedPositionToCanonicalSpace(merger, access);
+        guard = this->upperBoundGuardForWindowPosition(merger, coordinate);
+      }
+      Stmt resolution = emitVarDecl ? VarDecl::make(coordinate, access) : Assign::make(coordinate, access);
       return Block::make(posAccess.compute(),
-                         resolution);
+                         resolution,
+                         guard);
     }
     else if (merger.hasCoordIter()) {
       taco_not_supported_yet;
@@ -2262,7 +2299,6 @@ Stmt LowererImpl::zeroInitValues(Expr tensor, Expr begin, Expr size) {
   return For::make(p, lower, upper, 1, zeroInit, parallel);
 }
 
-
 Stmt LowererImpl::declLocatePosVars(vector<Iterator> locators) {
   vector<Stmt> result;
   for (Iterator& locator : locators) {
@@ -2284,7 +2320,14 @@ Stmt LowererImpl::declLocatePosVars(vector<Iterator> locators) {
         continue; // these will be recovered with separate procedure
       }
       do {
-        ModeFunction locate = locateIterator.locate(coordinates(locateIterator));
+        auto coords = coordinates(locateIterator);
+        // If this dimension iterator operates over a window, then it needs
+        // to be projected up to the window's iteration space.
+        if (locateIterator.isWindowed()) {
+          auto expr = coords[coords.size() - 1];
+          coords[coords.size() - 1] = this->projectCanonicalSpaceToWindowedPosition(locateIterator, expr);
+        }
+        ModeFunction locate = locateIterator.locate(coords);
         taco_iassert(isValue(locate.getResults()[1], true));
         Stmt declarePosVar = VarDecl::make(locateIterator.getPosVar(),
                                            locate.getResults()[0]);
@@ -2378,6 +2421,11 @@ Stmt LowererImpl::codeToInitializeIteratorVar(Iterator iterator, vector<Iterator
 
         Expr binarySearchTarget = provGraph.deriveCoordBounds(definedIndexVarsOrdered, underivedBounds, indexVarToExprMap, this->iterators)[coordinateVar][0];
         if (binarySearchTarget != underivedBounds[coordinateVar][0]) {
+          // If we have a window, then we need to project up the binary search target
+          // into the window rather than the beginning of the level.
+          if (iterator.isWindowed()) {
+            binarySearchTarget = this->projectCanonicalSpaceToWindowedPosition(iterator, binarySearchTarget);
+          }
           result.push_back(VarDecl::make(iterator.getBeginVar(), binarySearchTarget));
 
           vector<Expr> binarySearchArgs = {
@@ -2394,7 +2442,13 @@ Stmt LowererImpl::codeToInitializeIteratorVar(Iterator iterator, vector<Iterator
         }
       }
       else {
-        result.push_back(VarDecl::make(iterVar, bounds[0]));
+        auto bound = bounds[0];
+        // If we have a window on this iterator, then search for the start of
+        // the window rather than starting at the beginning of the level.
+        if (iterator.isWindowed()) {
+            bound = this->searchForStartOfWindowPosition(iterator, bounds[0], bounds[1]);
+        }
+        result.push_back(VarDecl::make(iterVar, bound));
       }
 
       result.push_back(VarDecl::make(endVar, bounds[1]));
@@ -2556,13 +2610,22 @@ Stmt LowererImpl::codeToLoadCoordinatesFromPosIterators(vector<Iterator> iterato
       ModeFunction posAccess = posIter.posAccess(posIter.getPosVar(),
                                                  coordinates(posIter));
       loadPosIterCoordinateStmts.push_back(posAccess.compute());
+      auto access = posAccess[0];
+      // If this iterator is windowed, then it needs to be projected down to
+      // recover the coordinate variable.
+      // TODO (rohany): Would be cleaner to have this logic be moved into the
+      //  ModeFunction, rather than having to check in some places?
+      if (posIter.isWindowed()) {
+        access = this->projectWindowedPositionToCanonicalSpace(posIter, access);
+      }
       if (declVars) {
-        loadPosIterCoordinateStmts.push_back(VarDecl::make(posIter.getCoordVar(),
-                                                           posAccess[0]));
+        loadPosIterCoordinateStmts.push_back(VarDecl::make(posIter.getCoordVar(), access));
       }
       else {
-        loadPosIterCoordinateStmts.push_back(Assign::make(posIter.getCoordVar(),
-                                                          posAccess[0]));
+        loadPosIterCoordinateStmts.push_back(Assign::make(posIter.getCoordVar(), access));
+      }
+      if (posIter.isWindowed()) {
+        loadPosIterCoordinateStmts.push_back(this->upperBoundGuardForWindowPosition(posIter, posIter.getCoordVar()));
       }
     }
     loadPosIterCoordinates = Block::make(loadPosIterCoordinateStmts);
@@ -2701,4 +2764,33 @@ Expr LowererImpl::checkThatNoneAreExhausted(std::vector<Iterator> iterators)
          : Lt::make(iterators[0].getIteratorVar(), iterators[0].getEndVar());
 }
 
+Expr LowererImpl::searchForStartOfWindowPosition(Iterator iterator, ir::Expr start, ir::Expr end) {
+    taco_iassert(iterator.isWindowed());
+    vector<Expr> args = {
+            // Search over the `crd` array of the level,
+            iterator.getMode().getModePack().getArray(1),
+            // between the start and end position,
+            start, end,
+            // for the beginning of the window.
+            iterator.getWindowLowerBound(),
+    };
+    return Call::make("taco_binarySearchAfter", args, Datatype::UInt64);
+}
+
+Stmt LowererImpl::upperBoundGuardForWindowPosition(Iterator iterator, ir::Expr access) {
+    taco_iassert(iterator.isWindowed());
+    return ir::IfThenElse::make(
+            ir::Gte::make(access, ir::Sub::make(iterator.getWindowUpperBound(), iterator.getWindowLowerBound())),
+            ir::Break::make()
+    );
+}
+
+Expr LowererImpl::projectWindowedPositionToCanonicalSpace(Iterator iterator, ir::Expr expr) {
+  return ir::Sub::make(expr, iterator.getWindowLowerBound());
+}
+
+Expr LowererImpl::projectCanonicalSpaceToWindowedPosition(Iterator iterator, ir::Expr expr) {
+  return ir::Add::make(expr, iterator.getWindowLowerBound());
+}
+
 }
diff --git a/src/tensor.cpp b/src/tensor.cpp
index 5e3407337..dfb40e6f5 100644
--- a/src/tensor.cpp
+++ b/src/tensor.cpp
@@ -458,6 +458,31 @@ static inline map<TensorVar, TensorBase> getTensors(const IndexExpr& expr);
 struct AccessTensorNode : public AccessNode {
   AccessTensorNode(TensorBase tensor, const std::vector<IndexVar>& indices)
       :  AccessNode(tensor.getTensorVar(), indices), tensor(tensor) {}
+
+  AccessTensorNode(TensorBase tensor, const std::vector<std::shared_ptr<IndexVarInterface>>& indices)
+    : AccessNode(tensor.getTensorVar()), tensor(tensor) {
+    // Create the vector of IndexVar to assign to this->indexVars.
+    std::vector<IndexVar> ivars(indices.size());
+    for (size_t i = 0; i < indices.size(); i++) {
+      auto var = indices[i];
+      // Match on what the IndexVarInterface actually is.
+      IndexVarInterface::match(var, [&](std::shared_ptr<IndexVar> ivar) {
+        ivars[i] = *ivar;
+      }, [&](std::shared_ptr<WindowedIndexVar> wvar) {
+        ivars[i] = wvar->getIndexVar();
+        auto lo = wvar->getLowerBound();
+        auto hi = wvar->getUpperBound();
+        taco_uassert(lo >= 0) << "slice lower bound must be >= 0";
+        taco_uassert(hi <= tensor.getDimension(i)) <<
+          "slice upper bound must be <= tensor dimension (" << tensor.getDimension(i) << ")";
+        this->windowedModes[i].lo = lo;
+        this->windowedModes[i].hi = hi;
+      });
+    }
+    // Initialize this->indexVars.
+    this->indexVars = std::move(ivars);
+  }
+
   TensorBase tensor;
   virtual void setAssignment(const Assignment& assignment) {
     tensor.syncDependentTensors();
@@ -502,6 +527,14 @@ Access TensorBase::operator()(const std::vector<IndexVar>& indices) {
   return Access(new AccessTensorNode(*this, indices));
 }
 
+Access TensorBase::operator()(const std::vector<std::shared_ptr<IndexVarInterface>>& indices) {
+  taco_uassert(indices.size() == (size_t)getOrder())
+      << "A tensor of order " << getOrder() << " must be indexed with "
+      << getOrder() << " variables, but is indexed with:  "
+      << util::join(indices);
+  return Access(new AccessTensorNode(*this, indices));
+}
+
 Access TensorBase::operator()() {
   return this->operator()(std::vector<IndexVar>());
 }
diff --git a/test/test.cpp b/test/test.cpp
index 597ddf2ef..a49f10ff7 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -1,3 +1,5 @@
+#include <functional>
+
 #include "test.h"
 #include "taco/tensor.h"
 
@@ -51,4 +53,18 @@ ostream& operator<<(ostream& os, const NotationTest& test) {
   return os;
 }
 
+void ASSERT_THROWS_EXCEPTION_WITH_ERROR(std::function<void()> f, std::string err) {
+  EXPECT_THROW({
+    try {
+      f();
+    } catch (TacoException& e) {
+      // Catch and inspect the exception to make sure that err is within it.
+      auto s = std::string(e.what());
+      ASSERT_TRUE(s.find(err) != std::string::npos);
+      // Throw the exception back up to gtest.
+      throw;
+    }
+  }, TacoException);
+}
+
 }}
diff --git a/test/test.h b/test/test.h
index 04000bafa..3302bf81f 100644
--- a/test/test.h
+++ b/test/test.h
@@ -3,6 +3,7 @@
 
 #include "gtest/gtest.h"
 
+#include <functional>
 #include <iostream>
 #include <vector>
 #include <memory>
@@ -93,6 +94,10 @@ void ASSERT_COMPONENTS_EQUALS(vector<vector<vector<int>>> expectedIndices,
   ASSERT_ARRAY_EQ(expectedValues, {(double*)storage.getValues().getData(),nnz});
 }
 
+// ASSERT_THROWS_EXCEPTION_WITH_ERROR asserts that the input function throws
+// a TacoException with the input string err contained within the body.
+void ASSERT_THROWS_EXCEPTION_WITH_ERROR(std::function<void()> f, std::string err);
+
 struct NotationTest {
   NotationTest(IndexStmt actual, IndexStmt expected)
       : actual(actual), expected(expected) {}
diff --git a/test/tests-windowing.cpp b/test/tests-windowing.cpp
new file mode 100644
index 000000000..ddeeb5655
--- /dev/null
+++ b/test/tests-windowing.cpp
@@ -0,0 +1,242 @@
+#include "test.h"
+#include "taco/tensor.h"
+#include "taco/codegen/module.h"
+#include "taco/index_notation/index_notation.h"
+#include "taco/lower/lower.h"
+
+using namespace taco;
+
+// mixIndexing is a compilation test to ensure that we can index into a
+// tensor with a mix of IndexVars and WindowedIndexVars.
+TEST(windowing, mixIndexing) {
+  auto dim = 10;
+  Tensor<int> a("a", {dim, dim, dim, dim, dim}, {Dense, Dense, Dense, Dense, Dense});
+  IndexVar i, j, k, l, m;
+  auto w1 = a(i, j(1, 3), k, l(4, 5), m(6, 7));
+  auto w2 = a(i(1, 3), j(2, 4), k, l, m(3, 5));
+}
+
+TEST(windowing, boundsChecks) {
+  Tensor<int> a("a", {5}, {Dense});
+  IndexVar i("i");
+  ASSERT_THROWS_EXCEPTION_WITH_ERROR([&]() { a(i(-1, 4)); }, "slice lower bound");
+  ASSERT_THROWS_EXCEPTION_WITH_ERROR([&]() { a(i(0, 10)); }, "slice upper bound");
+}
+
+// sliceMultipleWays tests that the same tensor can be sliced in different ways
+// in the same expression.
+TEST(windowing, sliceMultipleWays) {
+  auto dim = 10;
+  Tensor<int> a("a", {dim}, {Dense});
+  Tensor<int> b("b", {dim}, {Sparse});
+  Tensor<int> c("c", {dim}, {Dense});
+  Tensor<int> expected("expected", {dim}, {Dense});
+  for (int i = 0; i < dim; i++) {
+    a.insert({i}, i);
+    b.insert({i}, i);
+  }
+  expected.insert({2}, 10);
+  expected.insert({3}, 13);
+  a.pack(); b.pack(); expected.pack();
+  IndexVar i("i"), j("j");
+
+  c(i(2, 4)) = a(i(5, 7)) + a(i(1, 3)) + b(i(4, 6));
+  c.evaluate();
+  ASSERT_TRUE(equals(expected, c));
+}
+
+// basic tests a windowed tensor expression with different combinations
+// of tensor formats.
+TEST(windowing, basic) {
+  Tensor<int> expectedAdd("expectedAdd", {2, 2}, {Dense, Dense});
+  expectedAdd.insert({0, 0}, 14);
+  expectedAdd.insert({0, 1}, 17);
+  expectedAdd.insert({1, 0}, 17);
+  expectedAdd.insert({1, 1}, 20);
+  expectedAdd.pack();
+  Tensor<int> expectedMul("expectedMul", {2, 2}, {Dense, Dense});
+  expectedMul.insert({0, 0}, 64);
+  expectedMul.insert({0, 1}, 135);
+  expectedMul.insert({1, 0}, 135);
+  expectedMul.insert({1, 1}, 240);
+  expectedMul.pack();
+  Tensor<int> d("d", {2, 2}, {Dense, Dense});
+
+  // These dimensions are chosen so that one is above the constant in `mode_format_dense.cpp:54`
+  // where the known stride is generated vs using the dimension.
+  // TODO (rohany): Change that constant to be in a header file and import it here.
+  for (auto& dim : {6, 20}) {
+    for (auto &x : {Dense, Sparse}) {
+      for (auto &y : {Dense, Sparse}) {
+        for (auto &z : {Dense, Sparse}) {
+          Tensor<int> a("a", {dim, dim}, {Dense, x});
+          Tensor<int> b("b", {dim, dim}, {Dense, y});
+          Tensor<int> c("c", {dim, dim}, {Dense, z});
+          for (int i = 0; i < dim; i++) {
+            for (int j = 0; j < dim; j++) {
+              a.insert({i, j}, i + j);
+              b.insert({i, j}, i + j);
+              c.insert({i, j}, i + j);
+            }
+          }
+
+          a.pack();
+          b.pack();
+          c.pack();
+
+          IndexVar i, j;
+          d(i, j) = a(i(2, 4), j(2, 4)) + b(i(4, 6), j(4, 6)) + c(i(1, 3), j(1, 3));
+          d.evaluate();
+          ASSERT_TRUE(equals(expectedAdd, d))
+                        << endl << expectedAdd << endl << endl << d << endl
+                        << dim << " " << x << " " << y << " " << z << endl;
+
+          d(i, j) = a(i(2, 4), j(2, 4)) * b(i(4, 6), j(4, 6)) * c(i(1, 3), j(1, 3));
+          d.evaluate();
+          ASSERT_TRUE(equals(expectedMul, d))
+                        << endl << expectedMul << endl << endl << d << endl
+                        << dim << " " << x << " " << y << " " << z << endl;
+        }
+      }
+    }
+  }
+}
+
+// slicedOutput tests that operations can write to a window within an output tensor.
+TEST(windowing, slicedOutput) {
+  auto dim = 10;
+  Tensor<int> expected("expected", {10, 10}, {Dense, Dense});
+  expected.insert({8, 8}, 12);
+  expected.insert({8, 9}, 14);
+  expected.insert({9, 8}, 14);
+  expected.insert({9, 9}, 16);
+  expected.pack();
+  for (auto& x : {Dense, Sparse}) {
+    for (auto& y : {Dense, Sparse}) {
+      Tensor<int> a("a", {dim, dim}, {Dense, x});
+      Tensor<int> b("b", {dim, dim}, {Dense, y});
+      Tensor<int> c("c", {dim, dim}, {Dense, Dense});
+      for (int i = 0; i < dim; i++) {
+        for (int j = 0; j < dim; j++) {
+          a.insert({i, j}, i + j);
+          b.insert({i, j}, i + j);
+        }
+      }
+      a.pack();
+      b.pack();
+
+      IndexVar i, j;
+      c(i(8, 10), j(8, 10)) = a(i(2, 4), j(2, 4)) + b(i(4, 6), j(4, 6));
+      c.evaluate();
+      ASSERT_TRUE(equals(expected, c))
+                    << endl << expected << endl << endl << c << endl
+                    << dim << " " << x << " " << y << endl;
+    }
+  }
+}
+
+// transformations tests how windowing interacts with sparse iteration space
+// transformations and different mode formats.
+TEST(windowing, transformations) {
+  auto dim = 10;
+  Tensor<int> expected("expected", {2, 2}, {Dense, Dense});
+  expected.insert({0, 0}, 12);
+  expected.insert({0, 1}, 14);
+  expected.insert({1, 0}, 14);
+  expected.insert({1, 1}, 16);
+  expected.pack();
+
+  IndexVar i("i"), j("j"), i1 ("i1"), i2 ("i2");
+  auto testFn = [&](std::function<IndexStmt(IndexStmt)> modifier, std::vector<Format> formats) {
+    for (auto& format : formats) {
+      Tensor<int> a("a", {dim, dim}, format);
+      Tensor<int> b("b", {dim, dim}, format);
+      for (int i = 0; i < dim; i++) {
+        for (int j = 0; j < dim; j++) {
+          a.insert({i, j}, i + j);
+          b.insert({i, j}, i + j);
+        }
+      }
+      a.pack(); b.pack();
+
+      Tensor<int> c("c", {2, 2}, {Dense, Dense});
+      c(i, j) = a(i(2, 4), j(2, 4)) + b(i(4, 6), j(4, 6));
+      auto stmt = c.getAssignment().concretize();
+      c.compile(modifier(stmt));
+      c.evaluate();
+      equals(c, expected);
+      ASSERT_TRUE(equals(c, expected)) << endl << c << endl << expected << endl << format << endl;
+    }
+  };
+
+  std::vector<Format> allFormats = {{Dense, Dense}, {Dense, Sparse}, {Sparse, Dense}, {Sparse, Sparse}};
+  testFn([&](IndexStmt stmt) {
+    return stmt.split(i, i1, i2, 4).unroll(i2, 4);
+ }, allFormats);
+
+  // TODO (rohany): Can we only reorder these loops in the Dense,Dense case? It seems so.
+  testFn([&](IndexStmt stmt) {
+    return stmt.reorder(i, j);
+  }, {{Dense, Dense}});
+
+  // We can only (currently) parallelize the outer dimension loop if it is dense.
+  testFn([&](IndexStmt stmt) {
+    return stmt.parallelize(i, taco::ParallelUnit::CPUThread, taco::OutputRaceStrategy::NoRaces);
+  }, {{Dense, Dense}, {Dense, Sparse}});
+}
+
+// assignment tests assignments of and to windows in different combinations.
+TEST(windowing, assignment) {
+  auto dim = 10;
+
+  auto testFn = [&](Format srcFormat) {
+    Tensor<int> A("A", {dim, dim}, srcFormat);
+
+    for (int i = 0; i < dim; i++) {
+      for (int j = 0; j < dim; j++) {
+        A.insert({i, j}, i + j);
+      }
+    }
+    A.pack();
+
+    IndexVar i, j;
+
+    // First assign a window of A to a window of B.
+    Tensor<int> B("B", {dim, dim}, {Dense, Dense});
+    B(i(2, 4), j(3, 5)) = A(i(4, 6), j(5, 7));
+    B.evaluate();
+    Tensor<int> expected("expected", {dim, dim}, {Dense, Dense});
+    expected.insert({2, 3}, 9); expected.insert({2, 4}, 10);
+    expected.insert({3, 3}, 10); expected.insert({3, 4}, 11);
+    expected.pack();
+    ASSERT_TRUE(equals(B, expected)) << B << std::endl << expected << std::endl;
+
+    // Assign a window of A to b.
+    B = Tensor<int>("B", {2, 2}, {Dense, Dense});
+    B(i, j) = A(i(4, 6), j(5, 7));
+    B.evaluate();
+    expected = Tensor<int>("expected", {2, 2}, {Dense, Dense});
+    expected.insert({0, 0}, 9); expected.insert({0, 1}, 10);
+    expected.insert({1, 0}, 10); expected.insert({1, 1}, 11);
+    expected.pack();
+    ASSERT_TRUE(equals(B, expected)) << B << std::endl << expected << std::endl;
+
+    // Assign A to a window of B.
+    A = Tensor<int>("A", {2, 2}, srcFormat);
+    A.insert({0, 0}, 0); A.insert({0, 1}, 1);
+    A.insert({1, 0}, 1); A.insert({1, 1}, 2);
+    A.pack();
+    B = Tensor<int>("B", {dim, dim}, {Dense, Dense});
+    B(i(4, 6), j(5, 7)) = A(i, j);
+    B.evaluate();
+    expected = Tensor<int>("expected", {dim, dim}, {Dense, Dense});
+    expected.insert({4, 5}, 0); expected.insert({4, 6}, 1);
+    expected.insert({5, 5}, 1); expected.insert({5, 6}, 2);
+    expected.pack();
+    ASSERT_TRUE(equals(B, expected)) << B << std::endl << expected << std::endl;
+  };
+
+  for (auto& x : {Dense, Sparse}) {
+    testFn({Dense, x});
+  }
+}