diff --git a/sys/kern/Cargo.toml b/sys/kern/Cargo.toml
index 0e50c8754..00460d667 100644
--- a/sys/kern/Cargo.toml
+++ b/sys/kern/Cargo.toml
@@ -36,6 +36,7 @@ phash-gen = { path = "../../build/phash-gen" }
 [features]
 dump = []
 nano = []
+stack-watermark = []
 
 [lib]
 test = false
diff --git a/sys/kern/src/arch/arm_m.rs b/sys/kern/src/arch/arm_m.rs
index 1168d4df0..8019d1a43 100644
--- a/sys/kern/src/arch/arm_m.rs
+++ b/sys/kern/src/arch/arm_m.rs
@@ -1039,6 +1039,17 @@ static TICKS: [AtomicU32; 2] = {
 #[no_mangle]
 pub unsafe extern "C" fn SysTick() {
     crate::profiling::event_timer_isr_enter();
+
+    // We don't need the current task pointer in this routine, but we'll access
+    // it briefly to update the stack watermark:
+    {
+        let current = CURRENT_TASK_PTR.load(Ordering::Relaxed);
+        // Safety: `CURRENT_TASK_PTR` is valid once the kernel is started, and
+        // this interrupt is only enabled once the kernel is started.
+        let t = unsafe { &mut *current };
+        t.update_stack_watermark();
+    }
+
     with_task_table(|tasks| {
         // Load the time before this tick event.
         let t0 = TICKS[0].load(Ordering::Relaxed);
@@ -1174,10 +1185,14 @@ unsafe extern "C" fn pendsv_entry() {
     crate::profiling::event_secondary_syscall_enter();
 
     let current = CURRENT_TASK_PTR.load(Ordering::Relaxed);
+    // We're being slightly pedantic about this, since it's straightforward (if
+    // weird) for pre-kernel main to trigger a PendSV, and it's not possible to
+    // disable this interrupt source.
     uassert!(!current.is_null()); // irq before kernel started?
 
     // Safety: we're dereferencing the current task pointer, which we're
-    // trusting the rest of this module to maintain correctly.
+    // trusting the rest of this module to maintain correctly (and we've just
+    // checked that the kernel has started).
     let current = usize::from(unsafe { (*current).descriptor().index });
 
     with_task_table(|tasks| {
@@ -1210,6 +1225,23 @@ pub unsafe extern "C" fn DefaultHandler() {
         ipsr & 0x1FF
     };
 
+    let current = CURRENT_TASK_PTR.load(Ordering::Relaxed);
+
+    // Because this handler is reachable in response to an NMI, it's possible to
+    // get here before the kernel has started. So before dereferencing the task
+    // pointer, check for NULL (the state at reset). If we manage to get here
+    // before the kernel has started, we'll skip the stack watermark update and
+    // hit a panic below.
+
+    // Safety: this dereferences the pointer only if it isn't NULL. If it
+    // isn't NULL, that means we've initialized it elsewhere in this module
+    // and it's a valid (but aliased) pointer to a task. We can safely
+    // dereference it as long as we throw it away before hitting
+    // `with_task_table` below.
+    if let Some(t) = unsafe { current.as_mut() } {
+        t.update_stack_watermark();
+    }
+
     // The first 16 exceptions are architecturally defined; vendor hardware
     // interrupts start at 16.
     match exception_num {
@@ -1232,6 +1264,8 @@ pub unsafe extern "C" fn DefaultHandler() {
                 .get(abi::InterruptNum(irq_num))
                 .unwrap_or_else(|| panic!("unhandled IRQ {irq_num}"));
 
+            // with_task_table will panic if the kernel has not yet been
+            // started. This is good.
             let switch = with_task_table(|tasks| {
                 disable_irq(irq_num, false);
 
@@ -1508,7 +1542,12 @@ unsafe extern "C" fn handle_fault(task: *mut task::Task) {
         // assembly fault handler to pass us a legitimate one. We use it
         // immediately and discard it because otherwise it would alias the task
         // table below.
-        let t = unsafe { &(*task) };
+        let t = unsafe { &mut (*task) };
+        // Take the opportunity to update the stack watermark. This is
+        // technically wasted effort if the fault is in the kernel, but it's
+        // still nice to keep it updated -- and it's critical if the fault is in
+        // the task!
+        t.update_stack_watermark();
         (
             t.save().exc_return & 0b1000 != 0,
             usize::from(t.descriptor().index),
@@ -1635,7 +1674,12 @@ unsafe extern "C" fn handle_fault(
     // of dereferencing it, as it would otherwise alias the task table obtained
     // later.
     let (exc_return, psp, idx) = unsafe {
-        let t = &(*task);
+        let t = &mut (*task);
+        // Take the opportunity to update the stack watermark. This is
+        // technically wasted effort if the fault is in the kernel, but it's
+        // still nice to keep it updated -- and it's critical if the fault is in
+        // the task!
+        t.update_stack_watermark();
         (
             t.save().exc_return,
             t.save().psp,
diff --git a/sys/kern/src/syscalls.rs b/sys/kern/src/syscalls.rs
index 10e4646ab..194b35bce 100644
--- a/sys/kern/src/syscalls.rs
+++ b/sys/kern/src/syscalls.rs
@@ -70,7 +70,10 @@ pub unsafe extern "C" fn syscall_entry(nr: u32, task: *mut Task) {
     let idx = {
         // Safety: we're trusting the interrupt entry routine to pass us a valid
         // task pointer.
-        let t = unsafe { &*task };
+        let t = unsafe { &mut *task };
+
+        t.update_stack_watermark();
+
         usize::from(t.descriptor().index)
     };
 
diff --git a/sys/kern/src/task.rs b/sys/kern/src/task.rs
index 31095b4b7..77b45ef5a 100644
--- a/sys/kern/src/task.rs
+++ b/sys/kern/src/task.rs
@@ -49,6 +49,13 @@ pub struct Task {
     /// Pointer to the ROM descriptor used to create this task, so it can be
     /// restarted.
     descriptor: &'static TaskDesc,
+
+    /// Stack watermark tracking support.
+    ///
+    /// This field is completely missing if the feature is disabled to make that
+    /// clear to debug tools.
+    #[cfg(feature = "stack-watermark")]
+    stack_watermark: StackWatermark,
 }
 
 impl Task {
@@ -69,6 +76,8 @@ impl Task {
             notifications: 0,
             save: crate::arch::SavedState::default(),
             timer: crate::task::TimerState::default(),
+            #[cfg(feature = "stack-watermark")]
+            stack_watermark: StackWatermark::default(),
         }
     }
 
@@ -321,9 +330,32 @@ impl Task {
         self.notifications = 0;
         self.state = TaskState::default();
 
+        #[cfg(feature = "stack-watermark")]
+        {
+            self.stack_watermark.past_low = u32::min(
+                self.stack_watermark.past_low,
+                self.stack_watermark.current_low,
+            );
+            self.stack_watermark.current_low = u32::MAX;
+        }
+
         crate::arch::reinitialize(self);
     }
 
+    /// Updates the task's stack watermark stats, if enabled.
+    ///
+    /// If not enabled, this does nothing, so it should be safe to call freely
+    /// without checking for the feature.
+    pub fn update_stack_watermark(&mut self) {
+        #[cfg(feature = "stack-watermark")]
+        {
+            self.stack_watermark.current_low = u32::min(
+                self.stack_watermark.current_low,
+                self.save().stack_pointer(),
+            );
+        }
+    }
+
     /// Returns a reference to the `TaskDesc` that was used to initially create
     /// this task.
     pub fn descriptor(&self) -> &'static TaskDesc {
@@ -381,6 +413,32 @@ impl Task {
     }
 }
 
+#[cfg(feature = "stack-watermark")]
+#[derive(Copy, Clone, Debug)]
+struct StackWatermark {
+    /// Tracks the lowest stack pointer value (e.g. fullest stack) observed on
+    /// any kernel entry for this instance of this task.
+    ///
+    /// Initialized to `u32::MAX` if the task has not yet run.
+    current_low: u32,
+
+    /// Tracks the lowest stack pointer value (e.g. fullest stack) observed on
+    /// any kernel entry across *any* instance of this task.
+    ///
+    /// Initialized to `u32::MAX` if the task has not yet run.
+    past_low: u32,
+}
+
+#[cfg(feature = "stack-watermark")]
+impl Default for StackWatermark {
+    fn default() -> Self {
+        Self {
+            current_low: u32::MAX,
+            past_low: u32::MAX,
+        }
+    }
+}
+
 /// Interface that must be implemented by the `arch::SavedState` type. This
 /// gives architecture-independent access to task state for the rest of the
 /// kernel.