From 9e4b439cb2cbe12384f34527bf71d282e7bb2a08 Mon Sep 17 00:00:00 2001
From: Joshua Oreman <oremanj@gmail.com>
Date: Mon, 11 Feb 2019 08:26:53 -0500
Subject: [PATCH] Add the ability for cancel scopes to specify a grace period

---
 docs/source/reference-core.rst | 165 ++++++++++++++--
 newsfragments/147.feature.rst  |   3 +
 trio/__init__.py               |   2 +-
 trio/_core/_run.py             | 334 ++++++++++++++++++++++++++-------
 trio/_core/tests/test_run.py   | 224 +++++++++++++++++++++-
 trio/_timeouts.py              |  69 ++++++-
 6 files changed, 708 insertions(+), 89 deletions(-)
 create mode 100644 newsfragments/147.feature.rst

diff --git a/docs/source/reference-core.rst b/docs/source/reference-core.rst
index 9790cd61f9..093e6faece 100644
--- a/docs/source/reference-core.rst
+++ b/docs/source/reference-core.rst
@@ -337,6 +337,9 @@ configure timeouts on individual requests.
 Cancellation semantics
 ~~~~~~~~~~~~~~~~~~~~~~
 
+Nesting of cancel scopes
+++++++++++++++++++++++++
+
 You can freely nest cancellation blocks, and each :exc:`Cancelled`
 exception "knows" which block it belongs to. So long as you don't stop
 it, the exception will keep propagating until it reaches the block
@@ -366,6 +369,9 @@ move_on_after(5)`` context manager. So this code will print:
 The end result is that trio has successfully cancelled exactly the
 work that was happening within the scope that was cancelled.
 
+Checking whether a scope was cancelled
+++++++++++++++++++++++++++++++++++++++
+
 Looking at this, you might wonder how you can tell whether the inner
 block timed out – perhaps you want to do something different, like try
 a fallback procedure or report a failure to our caller. To make this
@@ -384,6 +390,9 @@ so forth – see :class:`CancelScope` below for the full details.
 
 .. _blocking-cleanup-example:
 
+Cancellations affect blocking cleanup too
++++++++++++++++++++++++++++++++++++++++++
+
 Cancellations in trio are "level triggered", meaning that once a block
 has been cancelled, *all* cancellable operations in that block will
 keep raising :exc:`Cancelled`. This helps avoid some pitfalls around
@@ -411,30 +420,136 @@ forever. But in trio, this *doesn't* happen: the ``await
 conn.send_goodbye_msg()`` call is still inside the cancelled block, so
 it will also raise :exc:`Cancelled`.
 
+.. _cleanup-with-grace-period:
+
+Grace periods allow blocking cleanup within externally-specified limits
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
 Of course, if you really want to make another blocking call in your
-cleanup handler, trio will let you; it's trying to prevent you from
-accidentally shooting yourself in the foot. Intentional foot-shooting
-is no problem (or at least – it's not trio's problem). To do this,
-create a new scope, and set its :attr:`~CancelScope.shield`
-attribute to :data:`True`::
+cleanup handler, trio will let you, and it even lets you provide a
+top-down limit on the amount of time that the blocking cleanup should
+be allowed to take. To take advantage of this, you need to do two things:
+
+* surround the part of your code that wants to do blocking cleanup
+  after a cancellation in a ``with trio.shield_during_cleanup():``
+  block
+
+* specify a *grace period* alongside the original timeout or cancellation,
+  to indicate how long that blocking cleanup should be allowed to go on for
+
+For example::
+
+   with trio.move_on_after(TIMEOUT, grace_period=CLEANUP_TIMEOUT):
+       conn = await make_connection()
+       try:
+           await conn.send_hello_msg()
+       finally:
+           with trio.shield_during_cleanup():
+               await conn.send_goodbye_msg()
+
+If ``await conn.send_hello_msg()`` takes more than ``TIMEOUT``
+seconds, execution will proceed to the ``finally`` block. Then, since
+``await conn.send_goodbye_msg()`` is within a ``with
+trio.shield_during_cleanup():`` block, it gets an additional
+``CLEANUP_TIMEOUT`` seconds (the ``grace_period``)
+before it too becomes cancelled. If ``await conn.send_goodbye_msg()``
+uses any timeouts internally, they will continue to work normally.
+This is a pretty advanced feature that most people probably
+won't use, but it's there for the cases where you need it.
+
+If you do use grace periods, there are some additional semantics to
+keep in mind:
+
+* The grace period mechanism temporarily protects code *inside*
+  a :func:`shield_during_cleanup` block from cancellations originating
+  *outside* such a block. Outside of :func:`shield_during_cleanup` blocks,
+  cancellation behavior is not affected by the grace period. Trio does
+  *not* attempt to automatically protect code in typical cleanup
+  locations such as all ``finally`` blocks or ``__aexit__`` handlers.
+  (Explicit is better than implicit.)
+
+* If you explicitly cancel a scope by calling :meth:`~CancelScope.cancel`,
+  as documented below, the grace period specified in the call to
+  :func:`move_on_after` has no effect; it only applies to cancellations
+  that occur as a result of the :func:`move_on_after` timeout expiring.
+  Instead, you can specify a grace period directly in the call to
+  :meth:`~CancelScope.cancel`.
+
+* A grace period specified *inside* the cancelled scope doesn't affect
+  the outcome. This code::
+
+     print("starting...")
+     with trio.move_on_after(5, grace_period=1):
+         with trio.move_on_after(10, grace_period=2):
+             try:
+                 await trio.sleep(20)
+                 print("sleep finished without error")
+             finally:
+                 with trio.shield_during_cleanup():
+                     print("blocking cleanup starting")
+                     await trio.sleep(1.5)
+                     print("blocking cleanup done")
+         print("move_on_after(10) finished without error")
+     print("move_on_after(5) finished without error")
+
+  will print:
+
+  .. code-block:: none
+
+     starting...
+     <5 second delay>
+     blocking cleanup starting
+     <1 second delay>
+     move_on_after(5) finished without error
+
+  In other words: Imposing a grace period at top level constrains the
+  amount of time that cleanup is allowed to take, just like imposing a
+  cancel scope at top level constrains the amount of time that normal
+  execution is allowed to take. The allowable grace period is
+  fundamentally a decision made by the *user* of an interface, even
+  though the specification of which work should be protected by it
+  will be part of the implementation.
+
+* The grace period clock starts ticking as soon as a scope becomes
+  cancelled. It applies cumulatively to all cleanup within the
+  cancelled scope, *not* to each ``with trio.shield_during_cleanup()``
+  block individually. That is, if a cancelled scope has a grace period
+  of 5, and it was cancelled more than 5 seconds ago, any
+  :func:`shield_during_cleanup` blocks within it will be cancelled
+  just like the rest of the scope.
+
+Shielding allows unlimited blocking cleanup
++++++++++++++++++++++++++++++++++++++++++++
+
+Finally, if you really need to locally force some code to run beyond
+the point at which an enclosing scope said it should be cancelled,
+trio lets you do that too, by setting the :attr:`~CancelScope.shield`
+attribute of a cancel scope to :data:`True`. So, the above
+:ref:`grace period example <cleanup-with-grace-period>` could equivalently
+be written::
 
    with trio.move_on_after(TIMEOUT):
-       conn = make_connection()
+       conn = await make_connection()
        try:
            await conn.send_hello_msg()
        finally:
-           with move_on_after(CLEANUP_TIMEOUT) as cleanup_scope:
+           with trio.move_on_after(CLEANUP_TIMEOUT) as cleanup_scope:
                cleanup_scope.shield = True
                await conn.send_goodbye_msg()
 
-So long as you're inside a scope with ``shield = True`` set, then
+But the grace period approach works better as your application becomes
+more complex, because it lets you specify limits on cleanup duration
+as a matter of policy rather than at each place that does any cleanup.
+Shielding should only be used where you can't obtain correct behavior
+in any other way. (For an example, see :meth:`Condition.wait`.)
+
+So long as you're inside a scope with ``shield = True`` set,
 you'll be protected from outside cancellations. Note though that this
 *only* applies to *outside* cancellations: if ``CLEANUP_TIMEOUT``
 expires then ``await conn.send_goodbye_msg()`` will still be
 cancelled, and if ``await conn.send_goodbye_msg()`` call uses any
 timeouts internally, then those will continue to work normally as
-well. This is a pretty advanced feature that most people probably
-won't use, but it's there for the rare cases where you need it.
+well.
 
 
 .. _cancellable-primitives:
@@ -502,9 +617,13 @@ objects.
 
    .. autoattribute:: deadline
 
+   .. autoattribute:: cleanup_deadline
+
    .. autoattribute:: shield
 
-   .. automethod:: cancel()
+   .. autoattribute:: shield_during_cleanup
+
+   .. automethod:: cancel(*, grace_period=0)
 
    .. attribute:: cancelled_caught
 
@@ -537,6 +656,24 @@ objects.
       cancelled, then :attr:`cancelled_caught` is usually more
       appropriate.
 
+   .. attribute:: cleanup_expired
+
+      Readonly :class:`bool`. Records whether the cancellation of this
+      scope had its grace period expire while the ``with`` block was
+      still active. A true value of :attr:`cleanup_expired` implies
+      a true value of :attr:`cancel_called`, but not vice versa.
+
+      If a cancellation occurred with zero grace period,
+      :attr:`cleanup_expired` is always true.
+
+      The same caveats apply here as for :attr:`cancel_called`: you
+      usually want :attr:`cancelled_caught` instead. But if you
+      already know :attr:`cancelled_caught` is true, inspecting
+      :attr:`cleanup_expired` can assist in distinguishing an
+      "orderly" cancellation (where all the cleanup code was able to
+      run to completion) from one where some cleanup code may have
+      been interrupted.
+
 
 Trio also provides several convenience functions for the common
 situation of just wanting to impose a timeout on some code:
@@ -553,6 +690,12 @@ situation of just wanting to impose a timeout on some code:
 .. autofunction:: fail_at
    :with: cancel_scope
 
+And one for marking blocking cleanup code that should take advantage of
+any grace period that might exist if it's cancelled:
+
+.. autofunction:: shield_during_cleanup
+   :with:
+
 Cheat sheet:
 
 * If you want to impose a timeout on a function, but you don't care
diff --git a/newsfragments/147.feature.rst b/newsfragments/147.feature.rst
new file mode 100644
index 0000000000..0a5b219bca
--- /dev/null
+++ b/newsfragments/147.feature.rst
@@ -0,0 +1,3 @@
+Add the ability for cancel scopes to specify a :ref:`grace period
+<cleanup-with-grace-period>` providing additional time for blocking
+cleanup operations to complete after a cancellation.
diff --git a/trio/__init__.py b/trio/__init__.py
index acd4d726af..0ba5c2ac13 100644
--- a/trio/__init__.py
+++ b/trio/__init__.py
@@ -24,7 +24,7 @@
 
 from ._timeouts import (
     move_on_at, move_on_after, sleep_forever, sleep_until, sleep, fail_at,
-    fail_after, TooSlowError
+    fail_after, shield_during_cleanup, TooSlowError
 )
 
 from ._sync import (
diff --git a/trio/_core/_run.py b/trio/_core/_run.py
index 7251f7970c..5bd32d93d3 100644
--- a/trio/_core/_run.py
+++ b/trio/_core/_run.py
@@ -11,7 +11,7 @@
 from contextlib import contextmanager, closing
 
 from contextvars import copy_context
-from math import inf
+from math import inf, isnan
 from time import perf_counter
 
 from sniffio import current_async_library_cvar
@@ -124,6 +124,13 @@ def deadline_to_sleep_time(self, deadline):
 ################################################################
 
 
+def _convert_deadline(arg):
+    result = float(arg)
+    if isnan(result):
+        raise ValueError("deadlines cannot be NaN")
+    return result
+
+
 @attr.s(cmp=False, repr=False, slots=True)
 class CancelScope:
     """A *cancellation scope*: the link between a unit of cancellable
@@ -159,20 +166,57 @@ class CancelScope:
     :exc:`RuntimeError` if you violate this rule.)
 
     The :class:`CancelScope` constructor takes initial values for the
-    cancel scope's :attr:`deadline` and :attr:`shield` attributes; these
-    may be freely modified after construction, whether or not the scope
-    has been entered yet, and changes take immediate effect.
+    cancel scope's :attr:`deadline`, :attr:`cleanup_deadline`,
+    :attr:`shield`, and :attr:`shield_during_cleanup` attributes;
+    these may be freely modified after construction, whether or not
+    the scope has been entered yet, and changes take immediate effect.
+
     """
 
+    # The set of tasks that might possibly be affected by a cancellation
+    # of this cancel scope
     _tasks = attr.ib(factory=set, init=False)
+
+    # True if we have entered the cancel scope's 'with' block yet, and
+    # shouldn't be allowed to enter it again
     _has_been_entered = attr.ib(default=False, init=False)
-    _effective_deadline = attr.ib(default=inf, init=False)
+
+    # True if cancel() has been called or the deadline has expired
     cancel_called = attr.ib(default=False, init=False)
+
+    # True if we passed the _cleanup_deadline time before exiting the
+    # 'with' block, either via a deadline callback or via cancel()
+    # with no grace period
+    cleanup_expired = attr.ib(default=False, init=False)
+
+    # True if the 'with' block caught a Cancelled exception
     cancelled_caught = attr.ib(default=False, init=False)
 
+    # If other than +inf, this is the time at which we have registered
+    # ourselves in Runner.deadlines to have
+    # _registered_deadline_expired() called.
+    # This is _deadline until cancel_called is True, then
+    # _cleanup_deadline until cleanup_expired is True, then +inf;
+    # except that it's always +inf if we have no _tasks.
+    _registered_deadline = attr.ib(default=inf, init=False)
+
     # Constructor arguments:
-    _deadline = attr.ib(default=inf, kw_only=True)
-    _shield = attr.ib(default=False, kw_only=True)
+    _deadline = attr.ib(default=inf, converter=_convert_deadline, kw_only=True)
+    _cleanup_deadline = attr.ib(
+        default=attr.Factory(lambda self: self._deadline, takes_self=True),
+        converter=_convert_deadline,
+        kw_only=True
+    )
+    _shield = attr.ib(
+        default=False,
+        validator=attr.validators.instance_of(bool),
+        kw_only=True
+    )
+    _shield_during_cleanup = attr.ib(
+        default=False,
+        validator=attr.validators.instance_of(bool),
+        kw_only=True
+    )
 
     @enable_ki_protection
     def __enter__(self):
@@ -182,10 +226,13 @@ def __enter__(self):
                 "Each CancelScope may only be used for a single 'with' block"
             )
         self._has_been_entered = True
-        if current_time() >= self._deadline:
+        now = current_time()
+        if now >= self._deadline:
             self.cancel_called = True
-        with self._might_change_effective_deadline():
-            self._add_task(task)
+        if now >= self._cleanup_deadline:
+            self.cleanup_expired = True
+        self._add_task(task)
+        self._update_registered_deadline()
         return self
 
     @enable_ki_protection
@@ -222,43 +269,54 @@ def __repr__(self):
         else:
             binding = "unbound"
 
-        if self.cancel_called:
+        try:
+            now = current_time()
+        except RuntimeError:  # must be called from async context
+            now = None
+
+        if self.cleanup_expired:
             state = ", cancelled"
-        elif self.deadline == inf:
-            state = ""
-        else:
-            try:
-                now = current_time()
-            except RuntimeError:  # must be called from async context
-                state = ""
-            else:
-                state = ", deadline is {:.2f} seconds {}".format(
-                    abs(self.deadline - now),
-                    "from now" if self.deadline >= now else "ago"
+        elif self.cancel_called:
+            state = ", cancelled pending cleanup"
+            if now is not None and self._cleanup_deadline != inf:
+                state += ", cleanup deadline is {:.2f} seconds {}".format(
+                    abs(self._cleanup_deadline - now),
+                    "from now" if self._cleanup_deadline >= now else "ago"
                 )
+        elif self.deadline != inf and now is not None:
+            state = ", deadline is {:.2f} seconds {}".format(
+                abs(self.deadline - now),
+                "from now" if self.deadline >= now else "ago"
+            )
+        else:
+            state = ""
 
         return "<trio.CancelScope at {:#x}, {}{}>".format(
             id(self), binding, state
         )
 
-    @contextmanager
     @enable_ki_protection
-    def _might_change_effective_deadline(self):
-        try:
-            yield
-        finally:
-            old = self._effective_deadline
-            if self.cancel_called or not self._tasks:
-                new = inf
-            else:
-                new = self._deadline
-            if old != new:
-                self._effective_deadline = new
-                runner = GLOBAL_RUN_CONTEXT.runner
-                if old != inf:
-                    del runner.deadlines[old, id(self)]
-                if new != inf:
-                    runner.deadlines[new, id(self)] = self
+    def _update_registered_deadline(self):
+        old = self._registered_deadline
+        if self.cleanup_expired or not self._tasks:
+            # After grace period expires, no more callbacks needed.
+            # If no tasks are attached, our cancellation has no impact.
+            new = inf
+        elif self.cancel_called:
+            # cancel() has been called (or deadline expired) but
+            # grace period is still ticking -- set up to be called
+            # back when the grace period expires
+            new = self._cleanup_deadline
+        else:
+            new = self._deadline
+
+        if old != new:
+            self._registered_deadline = new
+            runner = GLOBAL_RUN_CONTEXT.runner
+            if old != inf:
+                del runner.deadlines[old, id(self)]
+            if new != inf:
+                runner.deadlines[new, id(self)] = self
 
     @property
     def deadline(self):
@@ -280,6 +338,12 @@ def deadline(self):
         course, then `we want to know!
         <https://github.com/python-trio/trio/issues>`__)
 
+        Changing the deadline will also change the :attr:`cleanup_deadline`
+        so as to keep the difference between the two attributes the
+        same as it was before. Changing the deadline from infinity
+        to some finite value will set the cleanup deadline to that same
+        value.
+
         Defaults to :data:`math.inf`, which means "no deadline", though
         this can be overridden by the ``deadline=`` argument to
         the :class:`~trio.CancelScope` constructor.
@@ -288,8 +352,44 @@ def deadline(self):
 
     @deadline.setter
     def deadline(self, new_deadline):
-        with self._might_change_effective_deadline():
-            self._deadline = float(new_deadline)
+        old_deadline = self._deadline
+        self._deadline = _convert_deadline(new_deadline)
+        if old_deadline == inf and self._deadline != inf:
+            self._cleanup_deadline = self._deadline
+        elif old_deadline != self._deadline:  # avoid inf minus inf creating NaN
+            self._cleanup_deadline += self._deadline - old_deadline
+        self._update_registered_deadline()
+
+    @property
+    def cleanup_deadline(self):
+        """Read-write, :class:`float`. An absolute time on the current run's
+        clock at which the cancellation of this scope will extend to
+        also cover nested scopes that set
+        :attr:`shield_during_cleanup` to :data:`True`.
+
+        Normally this is set implicitly by the ``grace_period`` argument
+        to :func:`move_on_after` and friends; for example,
+        ``move_on_at(DEADLINE, grace_period=5)`` creates a cancel scope
+        with ``cleanup_deadline = DEADLINE + 5``.
+
+        You can modify this attribute.  Setting it earlier than the
+        :attr:`deadline` is allowed, but will not result in a
+        cancellation being delivered before the :attr:`deadline` in
+        the absence of an explicit call to :meth:`cancel`. Calls to
+        :meth:`cancel` may also implicitly modify the
+        :attr:`cleanup_deadline`; see the :meth:`cancel` documentation
+        for details.
+
+        The default :attr:`cleanup_deadline` is the specified :attr:`deadline`,
+        i.e., no grace period.
+        """
+        return self._cleanup_deadline
+
+    @cleanup_deadline.setter
+    def cleanup_deadline(self, new_deadline):
+        new_deadline = _convert_deadline(new_deadline)
+        self._cleanup_deadline = new_deadline
+        self._update_registered_deadline()
 
     @property
     def shield(self):
@@ -316,29 +416,78 @@ def shield(self):
         return self._shield
 
     @shield.setter
-    @enable_ki_protection
     def shield(self, new_value):
+        self._set_shield("_shield", new_value)
+
+    @property
+    def shield_during_cleanup(self):
+        """Read-write, :class:`bool`, default :data:`False`. Like
+        :attr:`shield`, but instead of shielding this scope from outside
+        cancellations indefinitely, it only shields until the outside
+        cancellation's :attr:`cleanup_deadline` expires. This allows for
+        :ref:`blocking cleanup <cleanup-with-grace-period>` whose maximum
+        duration is specified externally.
+
+        You can set both :attr:`shield` and
+        :attr:`shield_during_cleanup` independently, but
+        :attr:`shield` offers strictly more protection, so if
+        :attr:`shield` is :data:`True` then the value of
+        :attr:`shield_during_cleanup` doesn't matter.
+        """
+        return self._shield_during_cleanup
+
+    @shield_during_cleanup.setter
+    def shield_during_cleanup(self, new_value):
+        self._set_shield("_shield_during_cleanup", new_value)
+
+    @enable_ki_protection
+    def _set_shield(self, attr_name, new_value):
         if not isinstance(new_value, bool):
-            raise TypeError("shield must be a bool")
-        self._shield = new_value
-        if not self._shield:
+            raise TypeError("{} must be a bool".format(attr_name.lstrip("_")))
+        setattr(self, attr_name, new_value)
+        if not new_value:
             for task in self._tasks:
                 task._attempt_delivery_of_any_pending_cancel()
 
     @enable_ki_protection
-    def cancel(self):
+    def cancel(self, *, grace_period=0, _as_of=None):
         """Cancels this scope immediately.
 
-        This method is idempotent, i.e., if the scope was already
-        cancelled then this method silently does nothing.
+        Multiple calls to :meth:`cancel` with the same ``grace_period``
+        are idempotent. Calling :meth:`cancel` again with a shorter
+        ``grace_period`` may reduce the amount of time available for
+        cleanup; the resulting :attr:`cleanup_deadline` is the minimum
+        of the cleanup deadlines implied by all calls to :meth:`cancel`.
+
+        Args:
+          grace_period (float): If specified, wait this many seconds
+              before extending this cancellation to also cover nested
+              scopes that set :attr:`shield_during_cleanup` to
+              :data:`True`. This is implemented by decreasing
+              :attr:`cleanup_deadline` to at most :func:`current_time`
+              plus ``grace_period``. If no ``grace_period`` is given,
+              perform a full cancellation immediately.
+
         """
-        if self.cancel_called:
+        if self.cleanup_expired:
             return
-        with self._might_change_effective_deadline():
-            self.cancel_called = True
+        self.cancel_called = True
+        now = _as_of if _as_of is not None else current_time()
+        self._cleanup_deadline = min(
+            self._cleanup_deadline, now + grace_period
+        )
+        self.cleanup_expired = (now >= self._cleanup_deadline)
+        self._update_registered_deadline()
         for task in self._tasks:
             task._attempt_delivery_of_any_pending_cancel()
 
+    def _registered_deadline_expired(self, as_of):
+        if not self.cancel_called:
+            self.cancel(grace_period=inf, _as_of=as_of)
+        else:
+            assert not self.cleanup_expired
+            self.cancel(grace_period=0, _as_of=as_of)
+
     def _add_task(self, task):
         self._tasks.add(task)
         task._cancel_stack.append(self)
@@ -350,8 +499,8 @@ def _remove_task(self, task):
 
     # Used by the nursery.start trickiness
     def _tasks_removed_by_adoption(self, tasks):
-        with self._might_change_effective_deadline():
-            self._tasks.difference_update(tasks)
+        self._tasks.difference_update(tasks)
+        self._update_registered_deadline()  # might have no tasks left
 
     # Used by the nursery.start trickiness
     def _tasks_added_by_adoption(self, tasks):
@@ -370,8 +519,8 @@ def _close(self, exc):
             and scope_task._pending_cancel_scope() is self
         ):
             exc = MultiError.filter(self._exc_filter, exc)
-        with self._might_change_effective_deadline():
-            self._remove_task(scope_task)
+        self._remove_task(scope_task)
+        self._update_registered_deadline()  # might have no tasks left
         return exc
 
 
@@ -529,6 +678,14 @@ def open_nursery():
     return NurseryManager()
 
 
+def _is_only_cancelleds(exc):
+    return isinstance(exc, _core.Cancelled) or (
+        isinstance(exc, _core.MultiError) and all(
+            _is_only_cancelleds(subexc) for subexc in exc.exceptions
+        )
+    )
+
+
 class Nursery:
     def __init__(self, parent_task, cancel_scope):
         self._parent_task = parent_task
@@ -560,7 +717,16 @@ def parent_task(self):
 
     def _add_exc(self, exc):
         self._pending_excs.append(exc)
-        self.cancel_scope.cancel()
+        if _is_only_cancelleds(exc):
+            # A cancellation that propagates out of a task must be
+            # associated with some cancel scope in the nursery's cancel
+            # stack. If that scope is cancelled, it's cancelled for all
+            # tasks in the nursery, and adding our own cancellation on
+            # top of that (with a potentially different grace period)
+            # just confuses things.
+            pass
+        else:
+            self.cancel_scope.cancel()
 
     def _check_nursery_closed(self):
         if not any(
@@ -644,15 +810,37 @@ def __del__(self):
 
 
 def _pending_cancel_scope(cancel_stack):
-    # Return the outermost exception that is is not outside a shield.
+    # Return the outermost of the following two possibilities:
+    # - the outermost cancel scope in cleanup state (cancel_called and not
+    #   cleanup_expired) that is not outside a shield_during_cleanup
+    # - the outermost cancel scope in fully-cancelled state
+    #   (cleanup_expired, which implies also cancel_called)
+    #   that is not outside any kind of shield
+
     pending_scope = None
+    pending_with_cleanup_expired = None
+
     for scope in cancel_stack:
-        # Check shield before _exc, because shield should not block
-        # processing of *this* scope's exception
-        if scope.shield:
+        # Check shielding before cancellation state, because shield
+        # should not block processing of *this* scope's exception
+        if scope._shield:
+            # Full shield: nothing outside this scope can affect a task
+            # that is currently executing code inside the scope.
             pending_scope = None
+            pending_with_cleanup_expired = None
+
+        if scope._shield_during_cleanup:
+            # Shield during cleanup: only cancel scopes with cleanup_expired
+            # outside this scope can affect a task that's executing code
+            # inside it.
+            pending_scope = pending_with_cleanup_expired
+
         if pending_scope is None and scope.cancel_called:
             pending_scope = scope
+
+        if pending_with_cleanup_expired is None and scope.cleanup_expired:
+            pending_with_cleanup_expired = scope
+
     return pending_scope
 
 
@@ -1501,8 +1689,10 @@ def run_impl(runner, async_fn, args):
         while runner.deadlines:
             (deadline, _), cancel_scope = runner.deadlines.peekitem(0)
             if deadline <= now:
-                # This removes the given scope from runner.deadlines:
-                cancel_scope.cancel()
+                # This removes the given scope from runner.deadlines,
+                # or reinserts it with a later deadline if there's a
+                # grace period involved.
+                cancel_scope._registered_deadline_expired(now)
                 idle_primed = False
             else:
                 break
@@ -1671,12 +1861,26 @@ def current_effective_deadline():
     """
     task = current_task()
     deadline = inf
+    cleanup_deadline = inf
+
     for scope in task._cancel_stack:
         if scope._shield:
-            deadline = inf
+            deadline = cleanup_deadline = inf
+        if scope._shield_during_cleanup:
+            deadline = cleanup_deadline
+
+        if scope.cleanup_expired:
+            cleanup_deadline = -inf
+        else:
+            cleanup_deadline = min(
+                cleanup_deadline, scope._cleanup_deadline
+            )
+
         if scope.cancel_called:
             deadline = -inf
-        deadline = min(deadline, scope._deadline)
+        else:
+            deadline = min(deadline, scope._deadline)
+
     return deadline
 
 
@@ -1704,7 +1908,7 @@ async def checkpoint_if_cancelled():
 
     Equivalent to (but potentially more efficient than)::
 
-        if trio.current_deadline() == -inf:
+        if trio.current_effective_deadline() == -inf:
             await trio.hazmat.checkpoint()
 
     This is either a no-op, or else it allow other tasks to be scheduled and
diff --git a/trio/_core/tests/test_run.py b/trio/_core/tests/test_run.py
index 14905b5068..367fd78fe8 100644
--- a/trio/_core/tests/test_run.py
+++ b/trio/_core/tests/test_run.py
@@ -18,7 +18,7 @@
 from .tutil import check_sequence_matches, gc_collect_harder
 from ... import _core
 from ..._threads import run_sync_in_worker_thread
-from ..._timeouts import sleep, fail_after
+from ..._timeouts import sleep, fail_after, move_on_at, shield_during_cleanup
 from ..._util import aiter_compat
 from ...testing import (
     wait_all_tasks_blocked,
@@ -562,11 +562,28 @@ async def test_cancel_scope_repr(mock_clock):
         assert "deadline is 10.00 seconds from now" in repr(scope)
         # when not in async context, can't get the current time
         assert "deadline" not in await run_sync_in_worker_thread(repr, scope)
+        scope.cancel(grace_period=inf)
+        assert "cancelled pending cleanup" in repr(scope)
+        scope.cleanup_deadline = _core.current_time() + 1
+        assert "cleanup deadline is 1.00 seconds from now" in repr(scope)
         scope.cancel()
-        assert "cancelled" in repr(scope)
+        assert "cancelled" in repr(scope) and "cleanup" not in repr(scope)
     assert "exited" in repr(scope)
 
 
+async def test_cancel_scope_validation():
+    with pytest.raises(ValueError):
+        _core.CancelScope(deadline="nan")
+    with pytest.raises(ValueError):
+        _core.CancelScope(cleanup_deadline=float("nan"))
+    scope = _core.CancelScope(deadline=42)
+    with pytest.raises(ValueError):
+        scope.cleanup_deadline = "nan"
+    with pytest.raises(ValueError):
+        scope.deadline = float("nan")
+    assert scope.deadline == scope.cleanup_deadline == 42
+
+
 def test_cancel_points():
     async def main1():
         with _core.CancelScope() as scope:
@@ -913,6 +930,209 @@ async def enter_scope():
         nursery.cancel_scope.cancel()
 
 
+async def test_cancel_graceful(autojump_clock):
+    start = _core.current_time()
+    cleanup_start = None
+    cleanup_finish = None
+
+    # cleanup deadline changes track deadline changes
+    with _core.CancelScope(deadline=start + 5) as scope:
+        assert scope.deadline == pytest.approx(start + 5)
+        assert scope.cleanup_deadline == pytest.approx(start + 5)
+        scope.cleanup_deadline += 2
+        assert scope.deadline == pytest.approx(start + 5)
+        assert scope.cleanup_deadline == pytest.approx(start + 7)
+        scope.deadline += 3
+        assert scope.deadline == pytest.approx(start + 8)
+        assert scope.cleanup_deadline == pytest.approx(start + 10)
+        scope.deadline = inf
+        assert scope.deadline == scope.cleanup_deadline == inf
+        # make sure we don't blindly add (new_deadline - old_deadline)
+        # and get a NaN
+        scope.deadline = inf
+        assert scope.deadline == scope.cleanup_deadline == inf
+        scope.deadline = start + 3
+        assert scope.deadline == pytest.approx(start + 3)
+        assert scope.cleanup_deadline == pytest.approx(start + 3)
+
+    # nested scopes, outer one's grace period expires first
+    with move_on_at(start + 1, grace_period=1) as outer:
+        with move_on_at(start + 1.5, grace_period=2) as inner:
+            try:
+                await sleep(10)
+            finally:
+                cleanup_start = _core.current_time()
+                with shield_during_cleanup():
+                    assert "cancelled pending cleanup" in repr(outer)
+                    assert not inner.cancel_called
+                    assert outer.cancel_called and not outer.cleanup_expired
+                    try:
+                        await sleep(5)
+                    finally:
+                        cleanup_finish = _core.current_time()
+
+    assert cleanup_start == pytest.approx(start + 1)
+    assert cleanup_finish == pytest.approx(cleanup_start + 1)
+    assert outer.cancelled_caught and not inner.cancelled_caught
+    assert outer.cancel_called and outer.cleanup_expired
+    assert inner.cancel_called and not inner.cleanup_expired
+
+    # now the inner one's grace period expires first even though the outer
+    # was cancelled first
+    start = _core.current_time()
+    with move_on_at(start + 1, grace_period=1) as outer:
+        with move_on_at(start + 1.5, grace_period=0.3) as inner:
+            try:
+                await sleep(10)
+            finally:
+                cleanup_start = _core.current_time()
+                with shield_during_cleanup():
+                    try:
+                        await sleep(5)
+                    finally:
+                        cleanup_finish = _core.current_time()
+
+    assert cleanup_start == pytest.approx(start + 1)
+    assert cleanup_finish == pytest.approx(cleanup_start + 0.8)
+    assert outer.cancelled_caught and not inner.cancelled_caught
+    assert outer.cancel_called and not outer.cleanup_expired
+    assert inner.cancel_called and inner.cleanup_expired
+
+    # cancel can specify grace period, second call may shorten it but
+    # not lengthen
+    start = _core.current_time()
+    with _core.CancelScope() as scope:
+        scope.cancel(grace_period=1)
+        assert scope.cancel_called and not scope.cleanup_expired
+        scope.cancel(grace_period=0.5)
+        assert scope.cancel_called and not scope.cleanup_expired
+        scope.cancel(grace_period=2)
+        assert scope.cancel_called and not scope.cleanup_expired
+
+        with pytest.raises(_core.Cancelled):
+            await sleep(5)
+        assert _core.current_time() == pytest.approx(start)
+
+        with pytest.raises(_core.Cancelled):
+            with shield_during_cleanup():
+                await sleep(5)
+        assert _core.current_time() == pytest.approx(start + 0.5)
+        assert scope.cleanup_expired
+
+    async def canary(*, task_status=_core.TASK_STATUS_IGNORED):
+        start = _core.current_time()
+        with pytest.raises(_core.Cancelled):
+            with shield_during_cleanup() as scope:
+                task_status.started(scope)
+                await sleep(5)
+        assert _core.current_time() == pytest.approx(start + 0.5)
+
+    # if grace period is zero, cancellation of shield_during_cleanup scopes
+    # is instantaneous
+    async with _core.open_nursery() as nursery:
+        shield_scope = await nursery.start(canary)
+        await sleep(0.5)
+        nursery.cancel_scope.cancel()
+        nursery.cancel_scope.cleanup_deadline += 1  # no effect/already expired
+        assert nursery.cancel_scope.cleanup_expired
+
+    # grace period changes after cancel do work
+    async with _core.open_nursery() as nursery:
+        nursery.start_soon(canary)
+        nursery.cancel_scope.cancel(grace_period=0.1)
+        nursery.cancel_scope.cleanup_deadline += 0.2
+        with _core.CancelScope(shield=True):
+            await sleep(0.2)
+        assert not nursery.cancel_scope.cleanup_expired
+        nursery.cancel_scope.cleanup_deadline += 0.2
+
+    # but not if the old grace period already expired
+    async with _core.open_nursery() as nursery:
+        nursery.start_soon(canary)
+        await sleep(0.3)
+        nursery.cancel_scope.cancel(grace_period=0.2)
+        with _core.CancelScope(shield=True):
+            await sleep(0.3)
+        assert nursery.cancel_scope.cleanup_expired
+        nursery.cancel_scope.cleanup_deadline += 0.3
+        assert nursery.cancel_scope.cleanup_expired
+
+    # cleanup_expired is not set after the with block is exited
+    with _core.CancelScope() as scope:
+        scope.cancel(grace_period=0.5)
+        with shield_during_cleanup():
+            await sleep(0.3)
+    await sleep(1)
+    assert scope.cancel_called and not scope.cleanup_expired
+
+    # entering an unbound scope whose grace period already expired works
+    start = _core.current_time()
+    scope = _core.CancelScope()
+    scope.cancel(grace_period=0.2)
+    await sleep(0.3)
+    with scope, shield_during_cleanup(), pytest.raises(_core.Cancelled):
+        await _core.checkpoint()
+
+    # cancel() without grace period can accelerate an existing cancel
+    async with _core.open_nursery() as nursery:
+        nursery.start_soon(canary)
+        await sleep(0.2)
+        nursery.cancel_scope.cancel(grace_period=1)
+        with _core.CancelScope(shield=True):
+            await sleep(0.3)
+        nursery.cancel_scope.cancel()
+        assert nursery.cancel_scope.cleanup_expired
+        nursery.cancel_scope.cancel()  # idempotent
+
+    # removing shield_during_cleanup during the grace period wakes up
+    # the affected task(s)
+    async with _core.open_nursery() as nursery:
+        shield_scope = await nursery.start(canary)
+        nursery.cancel_scope.cancel(grace_period=1)
+        with _core.CancelScope(shield=True):
+            await sleep(0.5)
+        assert shield_scope.shield_during_cleanup
+        shield_scope.shield_during_cleanup = False
+        assert not shield_scope.shield_during_cleanup
+
+
+async def test_grace_period_effective_deadline(autojump_clock):
+    start = _core.current_time()
+    with move_on_at(start + 1, grace_period=2) as outer:
+        with move_on_at(start + 2, grace_period=0.5) as inner:
+            assert _core.current_effective_deadline() == start + 1
+            inner.shield_during_cleanup = True
+            assert _core.current_effective_deadline() == start + 2
+            with shield_during_cleanup():
+                assert _core.current_effective_deadline() == start + 2.5
+                inner.cleanup_deadline += 4.5
+                assert _core.current_effective_deadline() == start + 3
+                inner.shield_during_cleanup = False
+                assert _core.current_effective_deadline() == start + 3
+            assert _core.current_effective_deadline() == start + 1
+            outer.deadline += 2
+            assert _core.current_effective_deadline() == start + 2
+            outer.deadline -= 2
+            assert _core.current_effective_deadline() == start + 1
+            inner.shield = True
+            assert _core.current_effective_deadline() == start + 2
+            with shield_during_cleanup() as scope:
+                assert _core.current_effective_deadline() == start + 7
+                scope.shield = True
+                assert _core.current_effective_deadline() == inf
+                scope.shield = False
+                inner.cancel(grace_period=5)
+                assert _core.current_effective_deadline() == start + 5
+            assert _core.current_effective_deadline() == -inf
+            with shield_during_cleanup():
+                inner.cleanup_deadline -= 2
+                assert _core.current_effective_deadline() == start + 3
+                outer.cancel()
+                assert _core.current_effective_deadline() == start + 3
+                inner.shield = False
+                assert _core.current_effective_deadline() == -inf
+
+
 async def test_timekeeping():
     # probably a good idea to use a real clock for *one* test anyway...
     TARGET = 0.1
diff --git a/trio/_timeouts.py b/trio/_timeouts.py
index b4c64f8759..9a2e39f5cc 100644
--- a/trio/_timeouts.py
+++ b/trio/_timeouts.py
@@ -10,36 +10,52 @@
     "sleep",
     "fail_at",
     "fail_after",
+    "shield_during_cleanup",
     "TooSlowError",
 ]
 
 
-def move_on_at(deadline):
+def move_on_at(deadline, *, grace_period=0):
     """Use as a context manager to create a cancel scope with the given
     absolute deadline.
 
     Args:
       deadline (float): The deadline.
+      grace_period (float): The number of additional seconds to permit
+          code in :func:`shield_during_cleanup` blocks within this
+          scope to run after the deadline expires.
+
+    Raises:
+      ValueError: if ``grace_period`` is less than zero
 
     """
-    return _core.CancelScope(deadline=deadline)
+    if grace_period < 0:
+        raise ValueError("grace_period must be non-negative")
+    return _core.CancelScope(
+        deadline=deadline, cleanup_deadline=deadline + grace_period
+    )
 
 
-def move_on_after(seconds):
+def move_on_after(seconds, *, grace_period=0):
     """Use as a context manager to create a cancel scope whose deadline is
     set to now + *seconds*.
 
     Args:
       seconds (float): The timeout.
+      grace_period (float): The number of additional seconds to permit
+          code in :func:`shield_during_cleanup` blocks within this
+          scope to run after the timeout expires.
 
     Raises:
-      ValueError: if timeout is less than zero.
+      ValueError: if timeout or grace_period is less than zero.
 
     """
 
     if seconds < 0:
         raise ValueError("timeout must be non-negative")
-    return move_on_at(_core.current_time() + seconds)
+    return move_on_at(
+        _core.current_time() + seconds, grace_period=grace_period
+    )
 
 
 async def sleep_forever():
@@ -94,7 +110,7 @@ class TooSlowError(Exception):
 
 
 @contextmanager
-def fail_at(deadline):
+def fail_at(deadline, *, grace_period=0):
     """Creates a cancel scope with the given deadline, and raises an error if it
     is actually cancelled.
 
@@ -109,16 +125,17 @@ def fail_at(deadline):
     Raises:
       TooSlowError: if a :exc:`Cancelled` exception is raised in this scope
         and caught by the context manager.
+      ValueError: if *grace_period* is less than zero.
 
     """
 
-    with move_on_at(deadline) as scope:
+    with move_on_at(deadline, grace_period=grace_period) as scope:
         yield scope
     if scope.cancelled_caught:
         raise TooSlowError
 
 
-def fail_after(seconds):
+def fail_after(seconds, *, grace_period=0):
     """Creates a cancel scope with the given timeout, and raises an error if
     it is actually cancelled.
 
@@ -132,9 +149,41 @@ def fail_after(seconds):
     Raises:
       TooSlowError: if a :exc:`Cancelled` exception is raised in this scope
         and caught by the context manager.
-      ValueError: if *seconds* is less than zero.
+      ValueError: if *seconds* or *grace_period* is less than zero.
 
     """
     if seconds < 0:
         raise ValueError("timeout must be non-negative")
-    return fail_at(_core.current_time() + seconds)
+    return fail_at(_core.current_time() + seconds, grace_period=grace_period)
+
+
+def shield_during_cleanup():
+    """Use as a context manager to mark code that should be allowed to
+    run for a bit longer after a cancel scope that surrounds it becomes
+    cancelled.
+
+    This is intended for use with cleanup code that might run while a
+    :exc:`~trio.Cancelled` exception is propagating (e.g., in
+    ``finally`` blocks or ``__aexit__`` handlers) for which an orderly
+    shutdown requires blocking. It can also be used to guard
+    non-cleanup code that is likely to be able to run to completion in
+    a modest amount of time, where the extra time taken to propagate
+    the cancellation is less disruptive than a forced immediate
+    shutdown would be.
+
+    The exact amount of additional time allowed is specified by the
+    ``grace_period`` argument to the :meth:`~trio.CancelScope.cancel`
+    call that caused the cancellation; if the cancellation occurred
+    due to deadline expiry, the grace period is as was specified when
+    creating the cancel scope that became cancelled (via
+    :func:`move_on_after` or similar) or as implied by the setting of
+    its underlying :attr:`CancelScope.cleanup_deadline` attribute.
+
+    The default grace period is *zero*, and code that uses
+    :func:`shield_during_cleanup` must still be prepared for
+    a possible cancellation.  Use a cancel scope with the
+    :attr:`~CancelScope.shield` attribute set to :data:`True` if
+    you really need to definitely not be interrupted.
+
+    """
+    return _core.CancelScope(shield_during_cleanup=True)