diff --git a/README.md b/README.md
index ecf2d85..fc61986 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,13 @@
 # Embedded DSP algorithms
 
 [![GitHub release](https://img.shields.io/github/v/release/quartiq/idsp?include_prereleases)](https://github.com/quartiq/idsp/releases)
+[![crates.io](https://img.shields.io/crates/v/idsp.svg)](https://crates.io/crates/idsp)
 [![Documentation](https://img.shields.io/badge/docs-online-success)](https://docs.rs/idsp)
 [![QUARTIQ Matrix Chat](https://img.shields.io/matrix/quartiq:matrix.org)](https://matrix.to/#/#quartiq:matrix.org)
 [![Continuous Integration](https://github.com/quartiq/idsp/actions/workflows/ci.yml/badge.svg)](https://github.com/quartiq/idsp/actions/workflows/ci.yml)
 
 This crate contains some tuned DSP algorithms for general and especially embedded use.
-Many of the algorithms are implemented on integer datatypes for reasons that become important in certain cases:
-
-* Speed: even with a hard floating point unit integer operations are faster.
-* Accuracy: single precision FP has a 24 bit mantissa, `i32` has full 32 bit.
-* No rounding errors.
-* Natural wrap around (modulo) at the integer overflow: critical for phase/frequency applications.
-* Natural definition of "full scale".
+Many of the algorithms are implemented on integer (fixed point) datatypes.
 
 One comprehensive user for these algorithms is [Stabilizer](https://github.com/quartiq/stabilizer).
 
@@ -24,10 +19,6 @@ This uses a small (128 element or 512 byte) LUT, smart octant (un)mapping, linea
 
 This returns a phase given a complex signal (a pair of in-phase/`x`/cosine and quadrature/`y`/sine). The RMS phase error is less than 5e-6 rad, max error is less than 1.2e-5 rad, i.e. 20.5 bit RMS, 19.1 bit max accuracy. The bias is minimal.
 
-## ComplexExt
-
-An extension trait for the `num::Complex` type featuring especially a `std`-like API to the two functions above.
-
 ## PLL, RPLL
 
 High accuracy, zero-assumption, fully robust, forward and reciprocal PLLs with dynamically adjustable time constant and arbitrary (in the Nyquist sampling sense) capture range, and noise shaping.
@@ -36,13 +27,18 @@ High accuracy, zero-assumption, fully robust, forward and reciprocal PLLs with d
 
 Tools to handle, track, and unwrap phase signals or generate them.
 
-## iir
+## IIR/Biquad
 
 Fixed point (`i8`, `i16`, `i32`, `i64`) and floating point (`f32`, `f64`) biquad IIR filters.
 Robust and clean clipping and offset (anti-windup, no derivative kick, dynamically adjustable gains) suitable for PID controller applications.
 Direct Form 1 and Direct Form 2 Transposed supported.
 Coefficient sharing for multiple channels.
 
+## State variable filter
+
+Simple IIR state variable filter simultaneously providing highpass, lowpass,
+bandpass, and notch filtering of a signal.
+
 ## Lowpass, Lockin
 
 Fast, infinitely cascadable, first- and second-order lowpass and the corresponding integration into a lockin amplifier algorithm.
@@ -50,8 +46,3 @@ Fast, infinitely cascadable, first- and second-order lowpass and the correspondi
 ## FIR filters
 
 Fast `f32` symmetric FIR filters, optimized half-band filters, half-band filter decimators and integators and cascades.
-
-## SVF filter
-
-Simple IIR state variable filter simultaneously providing highpass, lowpass,
-bandpass, and notch filtering of a signal.
diff --git a/src/accu.rs b/src/accu.rs
index 343361d..6bf2d91 100644
--- a/src/accu.rs
+++ b/src/accu.rs
@@ -1,5 +1,6 @@
 use num_traits::ops::wrapping::WrappingAdd;
 
+/// Wrapping Accumulator
 #[derive(Copy, Clone, Default, PartialEq, Eq, Debug)]
 pub struct Accu<T> {
     state: T,
@@ -7,6 +8,7 @@ pub struct Accu<T> {
 }
 
 impl<T> Accu<T> {
+    /// Create a new accumulator with given initial state and step.
     pub fn new(state: T, step: T) -> Self {
         Self { state, step }
     }
diff --git a/src/complex.rs b/src/complex.rs
index 91ba4f4..27915b4 100644
--- a/src/complex.rs
+++ b/src/complex.rs
@@ -4,11 +4,17 @@ use super::{atan2, cossin};
 
 /// Complex extension trait offering DSP (fast, good accuracy) functionality.
 pub trait ComplexExt<T, U> {
+    /// Unit magnitude from angle
     fn from_angle(angle: T) -> Self;
+    /// Square of magnitude
     fn abs_sqr(&self) -> U;
+    /// Log2 approximation
     fn log2(&self) -> T;
+    /// Angle
     fn arg(&self) -> T;
+    /// Staturating addition
     fn saturating_add(&self, other: Self) -> Self;
+    /// Saturating subtraction
     fn saturating_sub(&self, other: Self) -> Self;
 }
 
@@ -97,6 +103,7 @@ impl ComplexExt<i32, u32> for Complex<i32> {
 
 /// Full scale fixed point multiplication.
 pub trait MulScaled<T> {
+    /// Scaled multiplication for fixed point
     fn mul_scaled(self, other: T) -> Self;
 }
 
diff --git a/src/filter.rs b/src/filter.rs
index db1a85b..0433cc5 100644
--- a/src/filter.rs
+++ b/src/filter.rs
@@ -1,4 +1,9 @@
+/// Single inpout single output i32 filter
 pub trait Filter {
+    /// Filter configuration type.
+    ///
+    /// While the filter struct owns the state,
+    /// the configuration is decoupled to allow sharing.
     type Config;
     /// Update the filter with a new sample.
     ///
@@ -16,6 +21,9 @@ pub trait Filter {
     fn set(&mut self, x: i32);
 }
 
+/// Nyquist zero
+///
+/// Filter with a flat transfer function and a transfer function zero at Nyquist.
 #[derive(Copy, Clone, Default)]
 pub struct Nyquist(i32);
 impl Filter for Nyquist {
@@ -34,6 +42,7 @@ impl Filter for Nyquist {
     }
 }
 
+/// Repeat another filter
 #[derive(Copy, Clone)]
 pub struct Repeat<const N: usize, T>([T; N]);
 impl<const N: usize, T: Filter> Filter for Repeat<N, T> {
@@ -54,6 +63,7 @@ impl<const N: usize, T: Default + Copy> Default for Repeat<N, T> {
     }
 }
 
+/// Combine two different filters in cascade
 #[derive(Copy, Clone, Default)]
 pub struct Cascade<T, U>(T, U);
 impl<T: Filter, U: Filter> Filter for Cascade<T, U> {
diff --git a/src/hbf.rs b/src/hbf.rs
index 757837f..ad404b5 100644
--- a/src/hbf.rs
+++ b/src/hbf.rs
@@ -1,3 +1,7 @@
+//! Half-band filters and cascades
+//!
+//! Used to perform very efficient high-dynamic range rate changes by powers of two.
+
 use core::{
     iter::Sum,
     ops::{Add, Mul},
@@ -457,12 +461,18 @@ impl Default for HbfDecCascade {
 }
 
 impl HbfDecCascade {
+    /// Set cascade depth
+    ///
+    /// Sets the number of HBF filter stages to apply.
     #[inline]
     pub fn set_depth(&mut self, n: usize) {
         assert!(n <= 4);
         self.depth = n;
     }
 
+    /// Cascade depth
+    ///
+    /// The number of HBF filter stages to apply.
     #[inline]
     pub fn depth(&self) -> usize {
         self.depth
@@ -543,7 +553,7 @@ impl Filter for HbfDecCascade {
 #[derive(Copy, Clone, Debug)]
 pub struct HbfIntCascade {
     depth: usize,
-    pub stages: (
+    stages: (
         HbfInt<
             'static,
             f32,
@@ -586,11 +596,17 @@ impl Default for HbfIntCascade {
 }
 
 impl HbfIntCascade {
+    /// Set cascade depth
+    ///
+    /// Sets the number of HBF filter stages to apply.
     pub fn set_depth(&mut self, n: usize) {
         assert!(n <= 4);
         self.depth = n;
     }
 
+    /// Cascade depth
+    ///
+    /// The number of HBF filter stages to apply.
     pub fn depth(&self) -> usize {
         self.depth
     }
diff --git a/src/iir/mod.rs b/src/iir/mod.rs
index 20713fb..700369d 100644
--- a/src/iir/mod.rs
+++ b/src/iir/mod.rs
@@ -1,3 +1,5 @@
+//! IIR filters, coefficients and applications
+
 mod biquad;
 pub use biquad::*;
 mod coefficients;
diff --git a/src/lib.rs b/src/lib.rs
index 9af5826..78a72a7 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,7 +1,10 @@
-#![cfg_attr(not(test), no_std)]
+#![cfg_attr(not(any(test, doctest, feature = "std")), no_std)]
+#![doc = include_str!("../README.md")]
+#![deny(rust_2018_compatibility)]
+#![deny(rust_2018_idioms)]
+#![warn(missing_docs)]
+#![forbid(unsafe_code)]
 
-mod tools;
-pub use tools::*;
 mod atan2;
 pub use atan2::*;
 mod accu;
diff --git a/src/lockin.rs b/src/lockin.rs
index e81abc7..0ca8669 100644
--- a/src/lockin.rs
+++ b/src/lockin.rs
@@ -1,5 +1,8 @@
 use super::{Complex, ComplexExt, Filter, MulScaled};
 
+/// Lockin filter
+///
+/// Combines two [`Filter`] and an NCO to perform demodulation
 #[derive(Copy, Clone, Default)]
 pub struct Lockin<T> {
     state: [T; 2],
diff --git a/src/lowpass.rs b/src/lowpass.rs
index 3f1bb7f..03ca250 100644
--- a/src/lowpass.rs
+++ b/src/lowpass.rs
@@ -7,7 +7,6 @@ use crate::Filter;
 ///
 /// The filter will cleanly saturate towards the `i32` range.
 ///
-///
 /// Both filters have been optimized for accuracy, dynamic range, and
 /// speed on Cortex-M7.
 #[derive(Copy, Clone)]
@@ -33,9 +32,13 @@ impl<const N: usize> Filter for Lowpass<N> {
     /// `1 << 16 <= k <= q*(1 << 31)`.
     type Config = [i32; N];
     fn update(&mut self, x: i32, k: &Self::Config) -> i32 {
-        let mut d = x.saturating_sub((self.0[0] >> 32) as i32) as i64 * k[0] as i64;
+        let mut d = x.saturating_sub(self.get()) as i64 * k[0] as i64;
         let y;
-        if N >= 2 {
+        if N == 1 {
+            self.0[0] += d;
+            y = self.get();
+            self.0[0] += d;
+        } else if N == 2 {
             d += (self.0[1] >> 32) * k[1] as i64;
             self.0[1] += d;
             self.0[0] += self.0[1];
@@ -47,15 +50,15 @@ impl<const N: usize> Filter for Lowpass<N> {
             self.0[0] += self.0[1];
             self.0[1] += d;
         } else {
-            self.0[0] += d;
-            y = self.get();
-            self.0[0] += d;
+            unimplemented!()
         }
         y
     }
+
     fn get(&self) -> i32 {
         (self.0[0] >> 32) as i32
     }
+
     fn set(&mut self, x: i32) {
         self.0[0] = (x as i64) << 32;
     }
diff --git a/src/num.rs b/src/num.rs
index b1e0b03..0c94963 100644
--- a/src/num.rs
+++ b/src/num.rs
@@ -20,6 +20,9 @@ pub trait FilterNum: 'static + Copy + Num + AsPrimitive<Self::ACCU> {
     /// Undefined result if `max < min`.
     fn macc(self, s: Self::ACCU, min: Self, max: Self, e1: Self) -> (Self, Self);
 
+    /// Clamp to between min and max
+    ///
+    /// Undefined if `min > max`.
     fn clip(self, min: Self, max: Self) -> Self;
 
     /// Multiplication (scaled)
@@ -33,6 +36,7 @@ pub trait FilterNum: 'static + Copy + Num + AsPrimitive<Self::ACCU> {
     where
         Self: AsPrimitive<C>,
         C: Float + AsPrimitive<Self>;
+    // TODO: range check and Result
 }
 
 macro_rules! impl_float {
diff --git a/src/svf.rs b/src/svf.rs
index 5ca408e..817d4cd 100644
--- a/src/svf.rs
+++ b/src/svf.rs
@@ -1,17 +1,28 @@
+//! State variable filter
+
 use num_traits::{Float, FloatConst};
 use serde::{Deserialize, Serialize};
+
+/// Second order state variable filter state
 pub struct State<T> {
+    /// Lowpass output
     pub lp: T,
+    /// Highpass output
     pub hp: T,
+    /// Bandpass output
     pub bp: T,
 }
 
 impl<T: Float> State<T> {
+    /// Bandreject (notch) output
     pub fn br(&self) -> T {
         self.hp + self.lp
     }
 }
 
+/// State variable filter
+///
+/// <https://www.earlevel.com/main/2003/03/02/the-digital-state-variable-filter/>
 #[derive(Copy, Clone, Debug, Deserialize, Serialize, PartialEq, PartialOrd)]
 pub struct Svf<T> {
     f: T,
@@ -19,14 +30,22 @@ pub struct Svf<T> {
 }
 
 impl<T: Float + FloatConst> Svf<T> {
+    /// Set the critical frequency
+    ///
+    /// In units of the sample frequency.
     pub fn set_frequency(&mut self, f0: T) {
         self.f = (T::one() + T::one()) * (T::PI() * f0).sin();
     }
 
+    /// Set the Q parameter
     pub fn set_q(&mut self, q: T) {
-        self.q = T::one()/q;
+        self.q = T::one() / q;
     }
 
+    /// Update the filter
+    ///
+    /// Ingest an input sample and update state correspondingly.
+    /// Selected output(s) are available from [`State`].
     pub fn update(&self, s: &mut State<T>, x0: T) {
         s.lp = s.bp * self.f + s.lp;
         s.hp = x0 - s.lp - s.bp * self.q;
diff --git a/src/tools.rs b/src/tools.rs
deleted file mode 100644
index 90a9627..0000000
--- a/src/tools.rs
+++ /dev/null
@@ -1,54 +0,0 @@
-use core::ops::{Add, Mul, Neg};
-use num_traits::Zero;
-
-pub fn abs<T>(x: T) -> T
-where
-    T: PartialOrd + Zero + Neg<Output = T>,
-{
-    if x >= T::zero() {
-        x
-    } else {
-        -x
-    }
-}
-
-// These are implemented here because core::f32 doesn't have them (yet).
-// They are naive and don't handle inf/nan.
-// `compiler-intrinsics`/llvm should have better (robust, universal, and
-// faster) implementations.
-
-pub fn copysign<T>(x: T, y: T) -> T
-where
-    T: PartialOrd + Zero + Neg<Output = T>,
-{
-    if (x >= T::zero() && y >= T::zero()) || (x <= T::zero() && y <= T::zero()) {
-        x
-    } else {
-        -x
-    }
-}
-
-// Multiply-accumulate vectors `x` and `a`.
-//
-// A.k.a. dot product.
-// Rust/LLVM optimize this nicely.
-pub fn macc<T>(y0: T, x: &[T], a: &[T]) -> T
-where
-    T: Add<Output = T> + Mul<Output = T> + Copy,
-{
-    x.iter()
-        .zip(a)
-        .map(|(x, a)| *x * *a)
-        .fold(y0, |y, xa| y + xa)
-}
-
-pub fn macc_i32(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 {
-    // Rounding bias, half up
-    let y0 = ((y0 as i64) << shift) + (1 << (shift - 1));
-    let y = x
-        .iter()
-        .zip(a)
-        .map(|(x, a)| *x as i64 * *a as i64)
-        .fold(y0, |y, xa| y + xa);
-    (y >> shift) as i32
-}