diff --git a/.gitignore b/.gitignore
index 1face5b..02e1207 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 *.jl.*.cov
 *.jl.mem
 .DS_Store
+Manifest.toml
\ No newline at end of file
diff --git a/Project.toml b/Project.toml
index 3dc683a..183993b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,13 +1,14 @@
 name = "KrylovKit"
 uuid = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
 authors = ["Jutho Haegeman"]
-version = "0.6.1"
+version = "0.7.0"
 
 [deps]
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
 
 [compat]
 Aqua = "0.6, 0.7, 0.8"
@@ -15,6 +16,7 @@ ChainRulesCore = "1"
 ChainRulesTestUtils = "1"
 FiniteDifferences = "0.12"
 GPUArraysCore = "0.1"
+VectorInterface = "0.4"
 LinearAlgebra = "1"
 Random = "1"
 Printf = "1"
diff --git a/docs/src/index.md b/docs/src/index.md
index fbe80e0..fca23ab 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -61,20 +61,8 @@ However, KrylovKit.jl distinguishes itself from the previous packages in the fol
 2.  KrylovKit does not assume that the vectors involved in the problem are actual subtypes
     of `AbstractVector`. Any Julia object that behaves as a vector is supported, so in
     particular higher-dimensional arrays or any custom user type that supports the
-    following functions (with `v` and `w` two instances of this type and `α, β` scalars
-    (i.e. `Number`)):
-    *   `Base.:*(α, v)`: multiply `v` with a scalar `α`, which can be of a different scalar
-        type; in particular this method is used to create vectors similar to `v` but with a
-        different type of underlying scalars.
-    *   `Base.similar(v)`: a way to construct vectors which are exactly similar to `v`
-    *   `LinearAlgebra.mul!(w, v, α)`: out of place scalar multiplication; multiply
-        vector `v` with scalar `α` and store the result in `w`
-    *   `LinearAlgebra.rmul!(v, α)`: in-place scalar multiplication of `v` with `α`; in
-        particular with `α = false`, `v` is the corresponding zero vector
-    *   `LinearAlgebra.axpy!(α, v, w)`: store in `w` the result of `α*v + w`
-    *   `LinearAlgebra.axpby!(α, v, β, w)`: store in `w` the result of `α*v + β*w`
-    *   `LinearAlgebra.dot(v,w)`: compute the inner product of two vectors
-    *   `LinearAlgebra.norm(v)`: compute the 2-norm of a vector
+    interface as defined in 
+    [`VectorInterface.jl`](https://github.com/Jutho/VectorInterface.jl)
 
     Algorithms in KrylovKit.jl are tested against such a minimal implementation (named
     `MinimalVec`) in the test suite. This type is only defined in the tests. However,
@@ -84,14 +72,14 @@ However, KrylovKit.jl distinguishes itself from the previous packages in the fol
     *   [`RecursiveVec`](@ref) can be used for grouping a set of vectors into a single
         vector like structure (can be used recursively). This is more robust than trying to
         use nested `Vector{<:Vector}` types.
-    *   [`InnerProductVec`](@ref) can be used to redefine the inner product (i.e. `dot`)
+    *   [`InnerProductVec`](@ref) can be used to redefine the inner product (i.e. `inner`)
         and corresponding norm (`norm`) of an already existing vector like object. The
         latter should help with implementing certain type of preconditioners.
 
 ## Current functionality
 
 The following algorithms are currently implemented
-*   `linsolve`: [`CG`](@ref), [`GMRES`](@ref)
+*   `linsolve`: [`CG`](@ref), [`GMRES`](@ref), [`BiCGStab`](@ref)
 *   `eigsolve`: a Krylov-Schur algorithm (i.e. with tick restarts) for extremal eigenvalues
     of normal (i.e. not generalized) eigenvalue problems, corresponding to
     [`Lanczos`](@ref) for real symmetric or complex hermitian linear maps, and to
diff --git a/docs/src/man/implementation.md b/docs/src/man/implementation.md
index 0d68c37..f34c8d4 100644
--- a/docs/src/man/implementation.md
+++ b/docs/src/man/implementation.md
@@ -22,11 +22,11 @@ KrylovKit.orthonormalize
 
 The expansion coefficients of a general vector in terms of a given orthonormal basis can be obtained as
 ```@docs
-KrylovKit.project!
+KrylovKit.project!!
 ```
 whereas the inverse calculation is obtained as
 ```@docs
-KrylovKit.unproject!
+KrylovKit.unproject!!
 ```
 
 An orthonormal basis can be transformed using a rank-1 update using
diff --git a/src/KrylovKit.jl b/src/KrylovKit.jl
index 8a5e34d..54a8da3 100644
--- a/src/KrylovKit.jl
+++ b/src/KrylovKit.jl
@@ -20,7 +20,8 @@ The high level interface of KrylovKit is provided by the following functions:
     computes a linear combination of the `ϕⱼ` functions which generalize `ϕ₀(z) = exp(z)`.
 """
 module KrylovKit
-
+using VectorInterface
+using VectorInterface: add!!
 using LinearAlgebra
 using Printf
 using ChainRulesCore
@@ -28,7 +29,7 @@ using GPUArraysCore
 const IndexRange = AbstractRange{Int}
 
 export linsolve, eigsolve, geneigsolve, svdsolve, schursolve, exponentiate, expintegrator
-export orthogonalize, orthogonalize!, orthonormalize, orthonormalize!
+export orthogonalize, orthogonalize!!, orthonormalize, orthonormalize!!
 export basis, rayleighquotient, residual, normres, rayleighextension
 export initialize, initialize!, expand!, shrink!
 export ClassicalGramSchmidt, ClassicalGramSchmidt2, ClassicalGramSchmidtIR
diff --git a/src/adrules/linsolve.jl b/src/adrules/linsolve.jl
index 0d2410c..10284b2 100644
--- a/src/adrules/linsolve.jl
+++ b/src/adrules/linsolve.jl
@@ -66,18 +66,20 @@ function ChainRulesCore.rrule(config::RuleConfig{>:HasReverseMode},
         ∂self = NoTangent()
         ∂x₀ = ZeroTangent()
         ∂algorithm = NoTangent()
-        (∂b, reverse_info) = linsolve(fᴴ, x̄, (zero(a₀) * zero(a₁)) * x̄, algorithm,
-                                      conj(a₀), conj(a₁))
+        T = VectorInterface.promote_scale(VectorInterface.promote_scale(x̄, a₀),
+                                          scalartype(a₁))
+        ∂b, reverse_info = linsolve(fᴴ, x̄, zerovector(x̄, T), algorithm, conj(a₀),
+                                    conj(a₁))
         if reverse_info.converged == 0
             @warn "Linear problem for reverse rule did not converge." reverse_info
         end
-        ∂f = @thunk(f_pullback(-conj(a₁) * ∂b)[1])
-        ∂a₀ = @thunk(-dot(x, ∂b))
+        ∂f = @thunk(f_pullback(scale(∂b, -conj(a₁)))[1])
+        ∂a₀ = @thunk(-inner(x, ∂b))
         # ∂a₁ = @thunk(-dot(f(x), ∂b))
         if a₀ == zero(a₀) && a₁ == one(a₁)
-            ∂a₁ = @thunk(-dot(b, ∂b))
+            ∂a₁ = @thunk(-inner(b, ∂b))
         else
-            ∂a₁ = @thunk(-dot((b - a₀ * x) / a₁, ∂b))
+            ∂a₁ = @thunk(-inner(scale!!(add(b, x, -a₀), inv(a₁)), ∂b))
         end
         return ∂self, ∂f, ∂b, ∂x₀, ∂algorithm, ∂a₀, ∂a₁
     end
@@ -91,17 +93,17 @@ function ChainRulesCore.frule((_, ΔA, Δb, Δx₀, _, Δa₀, Δa₁)::Tuple, :
     (x, info) = linsolve(A, b, x₀, algorithm, a₀, a₁)
 
     if Δb isa ChainRulesCore.AbstractZero
-        rhs = zero(b)
+        rhs = zerovector(b)
     else
-        rhs = (1 - Δa₁) * Δb
+        rhs = scale(Δb, (1 - Δa₁))
     end
     if !iszero(Δa₀)
-        rhs = axpy!(-Δa₀, x, rhs)
+        rhs = add!!(rhs, x, -Δa₀)
     end
     if !iszero(ΔA)
         rhs = mul!(rhs, ΔA, x, -a₁, true)
     end
-    (Δx, forward_info) = linsolve(A, rhs, zero(rhs), algorithm, a₀, a₁)
+    (Δx, forward_info) = linsolve(A, rhs, zerovector(rhs), algorithm, a₀, a₁)
     if info.converged > 0 && forward_info.converged == 0
         @warn "The tangent linear problem did not converge, whereas the primal linear problem did."
     end
@@ -121,17 +123,17 @@ function ChainRulesCore.frule(config::RuleConfig{>:HasForwardsMode},
     (x, info) = linsolve(f, b, x₀, algorithm, a₀, a₁)
 
     if Δb isa AbstractZero
-        rhs = false * b
+        rhs = zerovector(b)
     else
-        rhs = (1 - Δa₁) * Δb
+        rhs = scale(Δb, (1 - Δa₁))
     end
     if !iszero(Δa₀)
-        rhs = axpy!(-Δa₀, x, rhs)
+        rhs = add!!(rhs, x, -Δa₀)
     end
     if !(Δf isa AbstractZero)
-        rhs = axpy!(-a₁, frule_via_ad(config, (Δf, ZeroTangent()), f, x), rhs)
+        rhs = add!!(rhs, frule_via_ad(config, (Δf, ZeroTangent()), f, x), -a₀)
     end
-    (Δx, forward_info) = linsolve(f, rhs, false * rhs, algorithm, a₀, a₁)
+    (Δx, forward_info) = linsolve(f, rhs, zerovector(rhs), algorithm, a₀, a₁)
     if info.converged > 0 && forward_info.converged == 0
         @warn "The tangent linear problem did not converge, whereas the primal linear problem did."
     end
diff --git a/src/apply.jl b/src/apply.jl
index db61534..2a7f916 100644
--- a/src/apply.jl
+++ b/src/apply.jl
@@ -4,7 +4,7 @@ apply(f, x) = f(x)
 function apply(operator, x, α₀, α₁)
     y = apply(operator, x)
     if α₀ != zero(α₀) || α₁ != one(α₁)
-        axpby!(α₀, x, α₁, y)
+        y = add!!(y, x, α₀, α₁)
     end
 
     return y
diff --git a/src/dense/givens.jl b/src/dense/givens.jl
index 010713a..3c83f16 100644
--- a/src/dense/givens.jl
+++ b/src/dense/givens.jl
@@ -29,9 +29,9 @@ end
 
 function _rmul!(b::OrthonormalBasis, G::Givens)
     q1, q2 = b[G.i1], b[G.i2]
-    q1old = mul!(similar(q1), q1, true)
-    q1 = axpby!(-conj(G.s), q2, G.c, q1)
-    q2 = axpby!(G.s, q1old, G.c, q2)
+    q1′ = add(q1, q2, -conj(G.s), G.c)
+    q2′ = add!!(q2, q1, G.s, G.c)
+    b[G.i1], b[G.i2] = q1′, q2′
     return b
 end
 
diff --git a/src/dense/reflector.jl b/src/dense/reflector.jl
index fd0435c..ba59bf5 100644
--- a/src/dense/reflector.jl
+++ b/src/dense/reflector.jl
@@ -145,10 +145,10 @@ function LinearAlgebra.rmul!(b::OrthonormalBasis, H::Householder)
     r = H.r
     β = H.β
     iszero(β) && return b
-    w = similar(b[first(r)])
+    w = zerovector(b[first(r)])
     @inbounds begin
-        unproject!(w, b, v, 1, 0, r)
-        rank1update!(b, w, v, -β, 1, r)
+        w = unproject!!(w, b, v, 1, 0, r)
+        b = rank1update!(b, w, v, -β, 1, r)
     end
     return b
 end
diff --git a/src/eigsolve/arnoldi.jl b/src/eigsolve/arnoldi.jl
index f520da2..b5ef871 100644
--- a/src/eigsolve/arnoldi.jl
+++ b/src/eigsolve/arnoldi.jl
@@ -106,7 +106,7 @@ function schursolve(A, x₀, howmany::Int, which::Selector, alg::Arnoldi)
         [B * u for u in cols(U, 1:howmany)]
     end
     residuals = let r = residual(fact)
-        [last(u) * r for u in cols(U, 1:howmany)]
+        [scale(r, last(u)) for u in cols(U, 1:howmany)]
     end
     normresiduals = [normres(fact) * abs(last(u)) for u in cols(U, 1:howmany)]
 
@@ -145,7 +145,7 @@ function eigsolve(A, x₀, howmany::Int, which::Selector, alg::Arnoldi)
         [B * v for v in cols(V)]
     end
     residuals = let r = residual(fact)
-        [last(v) * r for v in cols(V)]
+        [scale(r, last(v)) for v in cols(V)]
     end
     normresiduals = [normres(fact) * abs(last(v)) for v in cols(V)]
 
@@ -271,7 +271,7 @@ function _schursolve(A, x₀, howmany::Int, which::Selector, alg::Arnoldi)
             B = basis(fact)
             basistransform!(B, view(U, :, 1:keep))
             r = residual(fact)
-            B[keep + 1] = rmul!(r, 1 / normres(fact))
+            B[keep + 1] = scale!!(r, 1 / normres(fact))
 
             # Shrink Arnoldi factorization
             fact = shrink!(fact, keep)
diff --git a/src/eigsolve/golubye.jl b/src/eigsolve/golubye.jl
index 50d525e..f6bc0ca 100644
--- a/src/eigsolve/golubye.jl
+++ b/src/eigsolve/golubye.jl
@@ -10,15 +10,15 @@ function geneigsolve(f, x₀, howmany::Int, which::Selector, alg::GolubYe)
     numops = 1
     β₀ = norm(x₀)
     iszero(β₀) && throw(ArgumentError("initial vector should not have norm zero"))
-    xax = dot(x₀, ax₀) / β₀^2
-    xbx = dot(x₀, bx₀) / β₀^2
+    xax = inner(x₀, ax₀) / β₀^2
+    xbx = inner(x₀, bx₀) / β₀^2
     T = promote_type(typeof(xax), typeof(xbx))
     invβ₀ = one(T) / β₀
-    v = invβ₀ * x₀ # v = mul!(similar(x₀, T), x₀, invβ₀)
-    av = mul!(similar(v), ax₀, invβ₀)
-    bv = mul!(similar(v), bx₀, invβ₀)
+    v = scale(x₀, invβ₀)
+    av = scale!!(zerovector(v), ax₀, invβ₀)
+    bv = scale!!(zerovector(v), bx₀, invβ₀)
     ρ = checkhermitian(xax) / checkposdef(xbx)
-    r = axpy!(-ρ, bv, av)
+    r = add!!(av, bv, -ρ)
     tol::typeof(ρ) = alg.tol
 
     # allocate storage
@@ -32,7 +32,8 @@ function geneigsolve(f, x₀, howmany::Int, which::Selector, alg::GolubYe)
     BV = [bv]
     sizehint!(V, krylovdim + 1)
     sizehint!(BV, krylovdim + 1)
-    r, α = orthogonalize!(r, v, alg.orth) # α should be zero, otherwise ρ was miscalculated
+
+    r, α = orthogonalize!!(r, v, alg.orth) # α should be zero, otherwise ρ was miscalculated
     β = norm(r)
     converged = 0
 
@@ -52,16 +53,16 @@ function geneigsolve(f, x₀, howmany::Int, which::Selector, alg::GolubYe)
         if K == krylovdim - converged || β <= tol # process
             if numiter > 1
                 # add vold - v, or thus just vold as v is first vector in subspace
-                v, = orthonormalize!(vold, V, alg.orth)
+                v, = orthonormalize!!(vold, V, alg.orth)
                 av, bv = genapply(f, v)
                 numops += 1
-                av = axpy!(-ρ, bv, av)
+                av = add!!(av, bv, -ρ)
                 for i in 1:K
-                    HHA[i, K + 1] = dot(V[i], av)
+                    HHA[i, K + 1] = inner(V[i], av)
                     HHA[K + 1, i] = conj(HHA[i, K + 1])
                 end
                 K += 1
-                HHA[K, K] = checkhermitian(dot(v, av))
+                HHA[K, K] = checkhermitian(inner(v, av))
                 push!(V, v)
                 push!(BV, bv)
             end
@@ -70,13 +71,13 @@ function geneigsolve(f, x₀, howmany::Int, which::Selector, alg::GolubYe)
                 v, = orthonormalize(vectors[i], V, alg.orth)
                 av, bv = genapply(f, v)
                 numops += 1
-                av = axpy!(-ρ, bv, av)
+                av = add!!(av, bv, -ρ)
                 for j in 1:K
-                    HHA[j, K + 1] = dot(V[j], av)
+                    HHA[j, K + 1] = inner(V[j], av)
                     HHA[K + 1, j] = conj(HHA[j, K + 1])
                 end
                 K += 1
-                HHA[K, K] = checkhermitian(dot(v, av))
+                HHA[K, K] = checkhermitian(inner(v, av))
                 push!(V, v)
                 push!(BV, bv)
             end
@@ -89,10 +90,8 @@ function geneigsolve(f, x₀, howmany::Int, which::Selector, alg::GolubYe)
 
             D, Z = geneigh!(HA, HB)
             by, rev = eigsort(which)
-            p = sortperm(D; by=by, rev=rev)
-
-            # replace vold
-            vold = V[1]
+            p = sortperm(D; by, rev)
+            xold = V[1]
 
             converged = 0
             resize!(values, 0)
@@ -101,11 +100,11 @@ function geneigsolve(f, x₀, howmany::Int, which::Selector, alg::GolubYe)
             resize!(normresiduals, 0)
             while converged < K
                 z = view(Z, :, p[converged + 1])
-                v = mul!(similar(vold), V, z)
+                v = mul!(zerovector(vold), V, z)
                 av, bv = genapply(f, v)
                 numops += 1
-                ρ = checkhermitian(dot(v, av)) / checkposdef(dot(v, bv))
-                r = axpy!(-ρ, bv, av)
+                ρ = checkhermitian(inner(v, av)) / checkposdef(inner(v, bv))
+                r = add!!(av, bv, -ρ)
                 β = norm(r)
 
                 if β > tol * norm(z)
@@ -125,11 +124,11 @@ function geneigsolve(f, x₀, howmany::Int, which::Selector, alg::GolubYe)
             elseif numiter == maxiter
                 for k in (converged + 1):howmany
                     z = view(Z, :, p[k])
-                    v = mul!(similar(vold), V, z)
+                    v = mul!(zerovector(vold), V, z)
                     av, bv = genapply(f, v)
                     numops += 1
-                    ρ = checkhermitian(dot(v, av)) / checkposdef(dot(v, bv))
-                    r = axpy!(-ρ, bv, av)
+                    ρ = checkhermitian(inner(v, av)) / checkposdef(inner(v, bv))
+                    r = add!!(av, bv, -ρ)
                     β = norm(r)
 
                     push!(values, ρ)
@@ -151,7 +150,7 @@ function geneigsolve(f, x₀, howmany::Int, which::Selector, alg::GolubYe)
 
         if K < krylovdim - converged
             # expand
-            v = rmul!(r, 1 / β)
+            v = scale!!(r, 1 / β)
             push!(V, v)
             HHA[K + 1, K] = β
             HHA[K, K + 1] = β
@@ -175,10 +174,10 @@ function geneigsolve(f, x₀, howmany::Int, which::Selector, alg::GolubYe)
             K = 1
 
             invβ = 1 / norm(v)
-            v = rmul!(v, invβ)
-            bv = rmul!(bv, invβ)
-            r = rmul!(r, invβ)
-            r, α = orthogonalize!(r, v, alg.orth) # α should be zero, otherwise ρ was miscalculated
+            v = scale!!(v, invβ)
+            bv = scale!!(bv, invβ)
+            r = scale!!(r, invβ)
+            r, α = orthogonalize!!(r, v, alg.orth) # α should be zero, otherwise ρ was miscalculated
             β = norm(r)
             push!(V, v)
             HHA[K, K] = real(α)
@@ -208,33 +207,33 @@ end
 function golubyerecurrence(f, ρ, V::OrthonormalBasis, β, orth::ClassicalGramSchmidt)
     v = V[end]
     av, bv = genapply(f, v)
-    w = axpy!(-ρ, bv, av)
-    α = dot(v, w)
+    w = add!!(av, bv, -ρ)
+    α = inner(v, w)
 
-    w = axpy!(-β, V[end - 1], w)
-    w = axpy!(-α, v, w)
+    w = add!!(w, V[end - 1], -β)
+    w = add!!(w, v, -α)
     β = norm(w)
     return w, α, β, bv
 end
 function golubyerecurrence(f, ρ, V::OrthonormalBasis, β, orth::ModifiedGramSchmidt)
     v = V[end]
     av, bv = genapply(f, v)
-    w = axpy!(-ρ, bv, av)
-    w = axpy!(-β, V[end - 1], w)
+    w = add!!(av, bv, -ρ)
+    w = add!!(w, V[end - 1], -β)
 
-    w, α = orthogonalize!(w, v, orth)
+    w, α = orthogonalize!!(w, v, orth)
     β = norm(w)
     return w, α, β, bv
 end
 function golubyerecurrence(f, ρ, V::OrthonormalBasis, β, orth::ClassicalGramSchmidt2)
     v = V[end]
     av, bv = genapply(f, v)
-    w = axpy!(-ρ, bv, av)
-    α = dot(v, w)
-    w = axpy!(-β, V[end - 1], w)
-    w = axpy!(-α, v, w)
+    w = add!!(av, bv, -ρ)
+    α = inner(v, w)
+    w = add!!(w, V[end - 1], -β)
+    w = add!!(w, v, -α)
 
-    w, s = orthogonalize!(w, V, ClassicalGramSchmidt())
+    w, s = orthogonalize!!(w, V, ClassicalGramSchmidt())
     α += s[end]
     β = norm(w)
     return w, α, β, bv
@@ -242,13 +241,13 @@ end
 function golubyerecurrence(f, ρ, V::OrthonormalBasis, β, orth::ModifiedGramSchmidt2)
     v = V[end]
     av, bv = genapply(f, v)
-    w = axpy!(-ρ, bv, av)
-    w = axpy!(-β, V[end - 1], w)
-    w, α = orthogonalize!(w, v, ModifiedGramSchmidt())
+    w = add!!(av, bv, -ρ)
+    w = add!!(w, V[end - 1], -β)
+    w, α = orthogonalize!!(w, v, ModifiedGramSchmidt())
 
     s = α
     for q in V
-        w, s = orthogonalize!(w, q, ModifiedGramSchmidt())
+        w, s = orthogonalize!!(w, q, ModifiedGramSchmidt())
     end
     α += s
     β = norm(w)
@@ -257,17 +256,17 @@ end
 function golubyerecurrence(f, ρ, V::OrthonormalBasis, β, orth::ClassicalGramSchmidtIR)
     v = V[end]
     av, bv = genapply(f, v)
-    w = axpy!(-ρ, bv, av)
-    α = dot(v, w)
-    w = axpy!(-β, V[end - 1], w)
-    w = axpy!(-α, v, w)
+    w = add!!(av, bv, -ρ)
+    α = inner(v, w)
+    w = add!!(w, V[end - 1], -β)
+    w = add!!(w, v, -α)
 
     ab2 = abs2(α) + abs2(β)
     β = norm(w)
     nold = sqrt(abs2(β) + ab2)
     while eps(one(β)) < β < orth.η * nold
         nold = β
-        w, s = orthogonalize!(w, V, ClassicalGramSchmidt())
+        w, s = orthogonalize!!(w, V, ClassicalGramSchmidt())
         α += s[end]
         β = norm(w)
     end
@@ -276,10 +275,10 @@ end
 function golubyerecurrence(f, ρ, V::OrthonormalBasis, β, orth::ModifiedGramSchmidtIR)
     v = V[end]
     av, bv = genapply(f, v)
-    w = axpy!(-ρ, bv, av)
-    w = axpy!(-β, V[end - 1], w)
+    w = add!!(av, bv, -ρ)
+    w = add!!(w, V[end - 1], -β)
 
-    w, α = orthogonalize!(w, v, ModifiedGramSchmidt())
+    w, α = orthogonalize!!(w, v, ModifiedGramSchmidt())
     ab2 = abs2(α) + abs2(β)
     β = norm(w)
     nold = sqrt(abs2(β) + ab2)
@@ -287,7 +286,7 @@ function golubyerecurrence(f, ρ, V::OrthonormalBasis, β, orth::ModifiedGramSch
         nold = β
         s = zero(α)
         for q in V
-            w, s = orthogonalize!(w, q, ModifiedGramSchmidt())
+            w, s = orthogonalize!!(w, q, ModifiedGramSchmidt())
         end
         α += s
         β = norm(w)
@@ -298,9 +297,9 @@ end
 function buildHB!(HB, V, BV)
     m = length(V)
     @inbounds for j in 1:m
-        HB[j, j] = checkposdef(dot(V[j], BV[j]))
+        HB[j, j] = checkposdef(inner(V[j], BV[j]))
         for i in (j + 1):m
-            HB[i, j] = dot(V[i], BV[j])
+            HB[i, j] = inner(V[i], BV[j])
             HB[j, i] = conj(HB[i, j])
         end
     end
diff --git a/src/eigsolve/lanczos.jl b/src/eigsolve/lanczos.jl
index b7159b2..097c7e1 100644
--- a/src/eigsolve/lanczos.jl
+++ b/src/eigsolve/lanczos.jl
@@ -104,9 +104,9 @@ function eigsolve(A, x₀, howmany::Int, which::Selector, alg::Lanczos)
 
             # Update B by applying U using Householder reflections
             B = basis(fact)
-            basistransform!(B, view(U, :, 1:keep))
+            B = basistransform!(B, view(U, :, 1:keep))
             r = residual(fact)
-            B[keep + 1] = rmul!(r, 1 / β)
+            B[keep + 1] = scale!!(r, 1 / β)
 
             # Shrink Lanczos factorization
             fact = shrink!(fact, keep)
@@ -127,7 +127,7 @@ function eigsolve(A, x₀, howmany::Int, which::Selector, alg::Lanczos)
         [B * v for v in cols(V)]
     end
     residuals = let r = residual(fact)
-        [last(v) * r for v in cols(V)]
+        [scale(r, last(v)) for v in cols(V)]
     end
     normresiduals = let f = f
         map(i -> abs(f[i]), 1:howmany)
diff --git a/src/eigsolve/svdsolve.jl b/src/eigsolve/svdsolve.jl
index d00dbc9..a74bf3d 100644
--- a/src/eigsolve/svdsolve.jl
+++ b/src/eigsolve/svdsolve.jl
@@ -231,7 +231,7 @@ function svdsolve(A, x₀, howmany::Int, which::Symbol, alg::GKL)
 
             # Shrink GKL factorization (no longer strictly GKL)
             r = residual(fact)
-            U[keep + 1] = rmul!(r, 1 / normres(fact))
+            U[keep + 1] = scale!!(r, 1 / normres(fact))
             H = fill!(view(HH, 1:(keep + 1), 1:keep), zero(eltype(HH)))
             @inbounds for j in 1:keep
                 H[j, j] = S[j]
@@ -277,7 +277,7 @@ function svdsolve(A, x₀, howmany::Int, which::Symbol, alg::GKL)
         [V * v for v in cols(Qv')]
     end
     residuals = let r = residual(fact)
-        [last(v) * r for v in cols(Qv')]
+        [scale(r, last(v)) for v in cols(Qv')]
     end
     normresiduals = let f = f
         map(i -> abs(f[i]), 1:howmany)
diff --git a/src/factorizations/arnoldi.jl b/src/factorizations/arnoldi.jl
index a472542..b02a87f 100644
--- a/src/factorizations/arnoldi.jl
+++ b/src/factorizations/arnoldi.jl
@@ -139,30 +139,30 @@ function initialize(iter::ArnoldiIterator; verbosity::Int=0)
     β₀ = norm(x₀)
     iszero(β₀) && throw(ArgumentError("initial vector should not have norm zero"))
     Ax₀ = apply(iter.operator, x₀)
-    α = dot(x₀, Ax₀) / (β₀ * β₀)
+    α = inner(x₀, Ax₀) / (β₀ * β₀)
     T = typeof(α)
     # this line determines the vector type that we will henceforth use
-    v = mul!(zero(T) * Ax₀, x₀, 1 / β₀) # (one(T) / β₀) * x₀ # mul!(similar(x₀, T), x₀, 1/β₀)
+    v = add!!(zerovector(Ax₀, T), x₀, 1 / β₀)
     if typeof(Ax₀) != typeof(v)
-        r = mul!(similar(v), Ax₀, 1 / β₀)
+        r = add!!(zerovector(v), Ax₀, 1 / β₀)
     else
-        r = rmul!(Ax₀, 1 / β₀)
+        r = scale!!(Ax₀, 1 / β₀)
     end
     βold = norm(r)
-    r = axpy!(-α, v, r)
+    r = add!!(r, v, -α)
     β = norm(r)
     # possibly reorthogonalize
     if iter.orth isa Union{ClassicalGramSchmidt2,ModifiedGramSchmidt2}
-        dα = dot(v, r)
+        dα = inner(v, r)
         α += dα
-        r = axpy!(-dα, v, r)
+        r = add!!(r, v, -dα)
         β = norm(r)
     elseif iter.orth isa Union{ClassicalGramSchmidtIR,ModifiedGramSchmidtIR}
         while eps(one(β)) < β < iter.orth.η * βold
             βold = β
-            dα = dot(v, r)
+            dα = inner(v, r)
             α += dα
-            r = axpy!(-dα, v, r)
+            r = add!!(r, v, -dα)
             β = norm(r)
         end
     end
@@ -181,9 +181,9 @@ function initialize!(iter::ArnoldiIterator, state::ArnoldiFactorization; verbosi
     end
     H = empty!(state.H)
 
-    v = mul!(V[1], x₀, 1 / norm(x₀))
-    w = apply(iter.operator, v)
-    r, α = orthogonalize!(w, v, iter.orth)
+    V[1] = scale!!(V[1], x₀, 1 / norm(x₀))
+    w = apply(iter.operator, V[1])
+    r, α = orthogonalize!!(w, V[1], iter.orth)
     β = norm(r)
     state.k = 1
     push!(H, α, β)
@@ -200,10 +200,10 @@ function expand!(iter::ArnoldiIterator, state::ArnoldiFactorization; verbosity::
     H = state.H
     r = state.r
     β = normres(state)
-    push!(V, rmul!(r, 1 / β))
+    push!(V, scale(r, 1 / β))
     m = length(H)
     resize!(H, m + k + 1)
-    r, β = arnoldirecurrence!(iter.operator, V, view(H, (m + 1):(m + k)), iter.orth)
+    r, β = arnoldirecurrence!!(iter.operator, V, view(H, (m + 1):(m + k)), iter.orth)
     H[m + k + 1] = β
     state.r = r
     if verbosity > 0
@@ -221,16 +221,16 @@ function shrink!(state::ArnoldiFactorization, k)
     r = pop!(V)
     resize!(H, (k * k + 3 * k) >> 1)
     state.k = k
-    state.r = rmul!(r, normres(state))
+    state.r = scale!!(r, normres(state))
     return state
 end
 
 # Arnoldi recurrence: simply use provided orthonormalization routines
-function arnoldirecurrence!(operator,
-                            V::OrthonormalBasis,
-                            h::AbstractVector,
-                            orth::Orthogonalizer)
+function arnoldirecurrence!!(operator,
+                             V::OrthonormalBasis,
+                             h::AbstractVector,
+                             orth::Orthogonalizer)
     w = apply(operator, last(V))
-    r, h = orthogonalize!(w, V, h, orth)
+    r, h = orthogonalize!!(w, V, h, orth)
     return r, norm(r)
 end
diff --git a/src/factorizations/gkl.jl b/src/factorizations/gkl.jl
index eec5e9d..02c42e3 100644
--- a/src/factorizations/gkl.jl
+++ b/src/factorizations/gkl.jl
@@ -191,18 +191,19 @@ function initialize(iter::GKLIterator; verbosity::Int=0)
     v₀ = apply_adjoint(iter.operator, u₀)
     α = norm(v₀) / β₀
     Av₀ = apply_normal(iter.operator, v₀) # apply operator
-    α² = dot(u₀, Av₀) / β₀^2
+    α² = inner(u₀, Av₀) / β₀^2
     α² ≈ α * α || throw(ArgumentError("operator and its adjoint are not compatible"))
     T = typeof(α²)
     # these lines determines the type that we will henceforth use
-    u = mul!(zero(T) * Av₀, u₀, 1 / β₀) # (one(T) / β₀) * u₀
-    v = (one(T) / (α * β₀)) * v₀
+
+    u = scale!!(zerovector(u₀, T), u₀, 1 / β₀) # (one(T) / β₀) * u₀
+    v = scale(v₀, one(T) / (α * β₀))
     if typeof(Av₀) == typeof(u)
-        r = rmul!(Av₀, 1 / (α * β₀))
+        r = scale!!(Av₀, 1 / (α * β₀))
     else
-        r = mul!(similar(u), Av₀, 1 / (α * β₀))
+        r = scale!!(zerovector(u), Av₀, 1 / (α * β₀))
     end
-    r = axpy!(-α, u, r)
+    r = add!!(r, u, -α)
     β = norm(r)
 
     U = OrthonormalBasis([u])
@@ -225,12 +226,12 @@ function initialize!(iter::GKLIterator, state::GKLFactorization; verbosity::Int=
     αs = empty!(state.αs)
     βs = empty!(state.βs)
 
-    u = mul!(U[1], iter.u₀, 1 / norm(iter.u₀))
+    u = scale!!(U[1], iter.u₀, 1 / norm(iter.u₀))
     v = apply_adjoint(iter.operator, u)
     α = norm(v)
-    rmul!(v, 1 / α)
+    v = scale!!(v, inv(α))
     r = apply_normal(iter.operator, v)
-    r = axpy!(-α, u, r)
+    r = add!!(r, u, -α)
     β = norm(r)
 
     state.k = 1
@@ -249,7 +250,7 @@ function expand!(iter::GKLIterator, state::GKLFactorization; verbosity::Int=0)
     U = state.U
     V = state.V
     r = state.r
-    U = push!(U, rmul!(r, 1 / βold))
+    U = push!(U, scale!!(r, 1 / βold))
     v, r, α, β = gklrecurrence(iter.operator, U, V, βold, iter.orth)
 
     push!(V, v)
@@ -281,7 +282,7 @@ function shrink!(state::GKLFactorization, k)
     resize!(state.αs, k)
     resize!(state.βs, k)
     state.k = k
-    state.r = rmul!(r, normres(state))
+    state.r = scale!!(r, normres(state))
     return state
 end
 
@@ -293,12 +294,12 @@ function gklrecurrence(operator,
                        orth::Union{ClassicalGramSchmidt,ModifiedGramSchmidt})
     u = U[end]
     v = apply_adjoint(operator, u)
-    v = axpy!(-β, V[end], v)
+    v = add!!(v, V[end], -β)
     α = norm(v)
-    rmul!(v, inv(α))
+    v = scale!!(v, inv(α))
 
     r = apply_normal(operator, v)
-    r = axpy!(-α, u, r)
+    r = add!!(r, u, -α)
     β = norm(r)
     return v, r, α, β
 end
@@ -309,14 +310,14 @@ function gklrecurrence(operator,
                        orth::ClassicalGramSchmidt2)
     u = U[end]
     v = apply_adjoint(operator, u)
-    v = axpy!(-β, V[end], v) # not necessary if we definitely reorthogonalize next step and previous step
+    v = add!!(v, V[end], -β) # not necessary if we definitely reorthogonalize next step and previous step
     # v, = orthogonalize!(v, V, ClassicalGramSchmidt())
     α = norm(v)
-    rmul!(v, inv(α))
+    v = scale!!(v, inv(α))
 
     r = apply_normal(operator, v)
-    r = axpy!(-α, u, r)
-    r, = orthogonalize!(r, U, ClassicalGramSchmidt())
+    r = add!!(r, u, -α)
+    r, = orthogonalize!!(r, U, ClassicalGramSchmidt())
     β = norm(r)
     return v, r, α, β
 end
@@ -327,17 +328,17 @@ function gklrecurrence(operator,
                        orth::ModifiedGramSchmidt2)
     u = U[end]
     v = apply_adjoint(operator, u)
-    v = axpy!(-β, V[end], v)
+    v = add!!(v, V[end], -β)
     # for q in V # not necessary if we definitely reorthogonalize next step and previous step
     #     v, = orthogonalize!(v, q, ModifiedGramSchmidt())
     # end
     α = norm(v)
-    rmul!(v, inv(α))
+    v = scale!!(v, inv(α))
 
     r = apply_normal(operator, v)
-    r = axpy!(-α, u, r)
+    r = add!!(r, u, -α)
     for q in U
-        r, = orthogonalize!(r, q, ModifiedGramSchmidt())
+        r, = orthogonalize!!(r, q, ModifiedGramSchmidt())
     end
     β = norm(r)
     return v, r, α, β
@@ -349,23 +350,23 @@ function gklrecurrence(operator,
                        orth::ClassicalGramSchmidtIR)
     u = U[end]
     v = apply_adjoint(operator, u)
-    v = axpy!(-β, V[end], v)
+    v = add!!(v, V[end], -β)
     α = norm(v)
     nold = sqrt(abs2(α) + abs2(β))
     while α < orth.η * nold
         nold = α
-        v, = orthogonalize!(v, V, ClassicalGramSchmidt())
+        v, = orthogonalize!!(v, V, ClassicalGramSchmidt())
         α = norm(v)
     end
-    rmul!(v, inv(α))
+    v = scale!!(v, inv(α))
 
     r = apply_normal(operator, v)
-    r = axpy!(-α, u, r)
+    r = add!!(r, u, -α)
     β = norm(r)
     nold = sqrt(abs2(α) + abs2(β))
     while eps(one(β)) < β < orth.η * nold
         nold = β
-        r, = orthogonalize!(r, U, ClassicalGramSchmidt())
+        r, = orthogonalize!!(r, U, ClassicalGramSchmidt())
         β = norm(r)
     end
 
@@ -378,26 +379,26 @@ function gklrecurrence(operator,
                        orth::ModifiedGramSchmidtIR)
     u = U[end]
     v = apply_adjoint(operator, u)
-    v = axpy!(-β, V[end], v)
+    v = add!!(v, V[end], -β)
     α = norm(v)
     nold = sqrt(abs2(α) + abs2(β))
     while eps(one(α)) < α < orth.η * nold
         nold = α
         for q in V
-            v, = orthogonalize!(v, q, ModifiedGramSchmidt())
+            v, = orthogonalize!!(v, q, ModifiedGramSchmidt())
         end
         α = norm(v)
     end
-    rmul!(v, inv(α))
+    v = scale!!(v, inv(α))
 
     r = apply_normal(operator, v)
-    r = axpy!(-α, u, r)
+    r = add!!(r, u, -α)
     β = norm(r)
     nold = sqrt(abs2(α) + abs2(β))
     while eps(one(β)) < β < orth.η * nold
         nold = β
         for q in U
-            r, = orthogonalize!(r, q, ModifiedGramSchmidt())
+            r, = orthogonalize!!(r, q, ModifiedGramSchmidt())
         end
         β = norm(r)
     end
diff --git a/src/factorizations/lanczos.jl b/src/factorizations/lanczos.jl
index dcb8668..ccf3ef5 100644
--- a/src/factorizations/lanczos.jl
+++ b/src/factorizations/lanczos.jl
@@ -67,7 +67,7 @@ particular, `LanczosIterator` uses the
 [Lanczos iteration](https://en.wikipedia.org/wiki/Lanczos_algorithm) scheme to build a
 successively expanding Lanczos factorization. While `f` cannot be tested to be symmetric or
 hermitian directly when the linear map is encoded as a general callable object or function,
-it is tested whether the imaginary part of `dot(v, f(v))` is sufficiently small to be
+it is tested whether the imaginary part of `inner(v, f(v))` is sufficiently small to be
 neglected.
 
 The argument `f` can be a matrix, or a function accepting a single argument `v`, so that
@@ -170,39 +170,39 @@ function initialize(iter::LanczosIterator; verbosity::Int=0)
     β₀ = norm(x₀)
     iszero(β₀) && throw(ArgumentError("initial vector should not have norm zero"))
     Ax₀ = apply(iter.operator, x₀)
-    α = dot(x₀, Ax₀) / (β₀ * β₀)
+    α = inner(x₀, Ax₀) / (β₀ * β₀)
     n = abs(α)
     imag(α) <= sqrt(max(eps(n), eps(one(n)))) ||
         error("operator does not appear to be hermitian: $(imag(α)) vs $n")
     T = typeof(α)
     # this line determines the vector type that we will henceforth use
-    v = mul!(zero(T) * Ax₀, x₀, 1 / β₀) # (one(T) / β₀) * x₀ # mul!(similar(x₀, T), x₀, 1/β₀)
+    v = add!!(zerovector(Ax₀, T), x₀, 1 / β₀)
     if typeof(Ax₀) != typeof(v)
-        r = mul!(similar(v), Ax₀, 1 / β₀)
+        r = add!!(zerovector(v), Ax₀, 1 / β₀)
     else
-        r = rmul!(Ax₀, 1 / β₀)
+        r = scale!!(Ax₀, 1 / β₀)
     end
     βold = norm(r)
-    r = axpy!(-α, v, r)
+    r = add!!(r, v, -α)
     β = norm(r)
     # possibly reorthogonalize
     if iter.orth isa Union{ClassicalGramSchmidt2,ModifiedGramSchmidt2}
-        dα = dot(v, r)
+        dα = inner(v, r)
         n = hypot(dα, β)
         imag(dα) <= sqrt(max(eps(n), eps(one(n)))) ||
             error("operator does not appear to be hermitian: $(imag(dα)) vs $n")
         α += dα
-        r = axpy!(-dα, v, r)
+        r = add!!(r, v, -dα)
         β = norm(r)
     elseif iter.orth isa Union{ClassicalGramSchmidtIR,ModifiedGramSchmidtIR}
         while eps(one(β)) < β < iter.orth.η * βold
             βold = β
-            dα = dot(v, r)
+            dα = inner(v, r)
             n = hypot(dα, β)
             imag(dα) <= sqrt(max(eps(n), eps(one(n)))) ||
                 error("operator does not appear to be hermitian: $(imag(dα)) vs $n")
             α += dα
-            r = axpy!(-dα, v, r)
+            r = add!!(r, v, -dα)
             β = norm(r)
         end
     end
@@ -223,9 +223,9 @@ function initialize!(iter::LanczosIterator, state::LanczosFactorization; verbosi
     αs = empty!(state.αs)
     βs = empty!(state.βs)
 
-    v = mul!(V[1], x₀, 1 / norm(x₀))
-    w = apply(iter.operator, v)
-    r, α = orthogonalize!(w, v, iter.orth)
+    V[1] = scale!!(V[1], x₀, 1 / norm(x₀))
+    w = apply(iter.operator, V[1])
+    r, α = orthogonalize!!(w, V[1], iter.orth)
     β = norm(r)
     n = hypot(α, β)
     imag(α) <= sqrt(max(eps(n), eps(one(n)))) ||
@@ -244,7 +244,7 @@ function expand!(iter::LanczosIterator, state::LanczosFactorization; verbosity::
     βold = normres(state)
     V = state.V
     r = state.r
-    V = push!(V, rmul!(r, 1 / βold))
+    V = push!(V, scale!!(r, 1 / βold))
     r, α, β = lanczosrecurrence(iter.operator, V, βold, iter.orth)
     n = hypot(α, β, βold)
     imag(α) <= sqrt(max(eps(n), eps(one(n)))) ||
@@ -274,7 +274,7 @@ function shrink!(state::LanczosFactorization, k)
     resize!(state.αs, k)
     resize!(state.βs, k)
     state.k = k
-    state.r = rmul!(r, normres(state))
+    state.r = scale!!(r, normres(state))
     return state
 end
 
@@ -283,29 +283,29 @@ end
 function lanczosrecurrence(operator, V::OrthonormalBasis, β, orth::ClassicalGramSchmidt)
     v = V[end]
     w = apply(operator, v)
-    α = dot(v, w)
-    w = axpy!(-β, V[end - 1], w)
-    w = axpy!(-α, v, w)
+    α = inner(v, w)
+    w = add!!(w, V[end - 1], -β)
+    w = add!!(w, v, -α)
     β = norm(w)
     return w, α, β
 end
 function lanczosrecurrence(operator, V::OrthonormalBasis, β, orth::ModifiedGramSchmidt)
     v = V[end]
     w = apply(operator, v)
-    w = axpy!(-β, V[end - 1], w)
-    α = dot(v, w)
-    w = axpy!(-α, v, w)
+    w = add!!(w, V[end - 1], -β)
+    α = inner(v, w)
+    w = add!!(w, v, -α)
     β = norm(w)
     return w, α, β
 end
 function lanczosrecurrence(operator, V::OrthonormalBasis, β, orth::ClassicalGramSchmidt2)
     v = V[end]
     w = apply(operator, v)
-    α = dot(v, w)
-    w = axpy!(-β, V[end - 1], w)
-    w = axpy!(-α, v, w)
+    α = inner(v, w)
+    w = add!!(w, V[end - 1], -β)
+    w = add!!(w, v, -α)
 
-    w, s = orthogonalize!(w, V, ClassicalGramSchmidt())
+    w, s = orthogonalize!!(w, V, ClassicalGramSchmidt())
     α += s[end]
     β = norm(w)
     return w, α, β
@@ -313,12 +313,12 @@ end
 function lanczosrecurrence(operator, V::OrthonormalBasis, β, orth::ModifiedGramSchmidt2)
     v = V[end]
     w = apply(operator, v)
-    w = axpy!(-β, V[end - 1], w)
-    w, α = orthogonalize!(w, v, ModifiedGramSchmidt())
+    w = add!!(w, V[end - 1], -β)
+    w, α = orthogonalize!!(w, v, ModifiedGramSchmidt())
 
     s = α
     for q in V
-        w, s = orthogonalize!(w, q, ModifiedGramSchmidt())
+        w, s = orthogonalize!!(w, q, ModifiedGramSchmidt())
     end
     α += s
     β = norm(w)
@@ -327,16 +327,16 @@ end
 function lanczosrecurrence(operator, V::OrthonormalBasis, β, orth::ClassicalGramSchmidtIR)
     v = V[end]
     w = apply(operator, v)
-    α = dot(v, w)
-    w = axpy!(-β, V[end - 1], w)
-    w = axpy!(-α, v, w)
+    α = inner(v, w)
+    w = add!!(w, V[end - 1], -β)
+    w = add!!(w, v, -α)
 
     ab2 = abs2(α) + abs2(β)
     β = norm(w)
     nold = sqrt(abs2(β) + ab2)
     while eps(one(β)) < β < orth.η * nold
         nold = β
-        w, s = orthogonalize!(w, V, ClassicalGramSchmidt())
+        w, s = orthogonalize!!(w, V, ClassicalGramSchmidt())
         α += s[end]
         β = norm(w)
     end
@@ -345,9 +345,9 @@ end
 function lanczosrecurrence(operator, V::OrthonormalBasis, β, orth::ModifiedGramSchmidtIR)
     v = V[end]
     w = apply(operator, v)
-    w = axpy!(-β, V[end - 1], w)
+    w = add!!(w, V[end - 1], -β)
 
-    w, α = orthogonalize!(w, v, ModifiedGramSchmidt())
+    w, α = orthogonalize!!(w, v, ModifiedGramSchmidt())
     ab2 = abs2(α) + abs2(β)
     β = norm(w)
     nold = sqrt(abs2(β) + ab2)
@@ -355,7 +355,7 @@ function lanczosrecurrence(operator, V::OrthonormalBasis, β, orth::ModifiedGram
         nold = β
         s = zero(α)
         for q in V
-            w, s = orthogonalize!(w, q, ModifiedGramSchmidt())
+            w, s = orthogonalize!!(w, q, ModifiedGramSchmidt())
         end
         α += s
         β = norm(w)
diff --git a/src/innerproductvec.jl b/src/innerproductvec.jl
index 3994bdb..e61de6b 100644
--- a/src/innerproductvec.jl
+++ b/src/innerproductvec.jl
@@ -32,7 +32,7 @@ Base.:*(a::Number, v::InnerProductVec) = InnerProductVec(a * v.vec, v.dotf)
 Base.:/(v::InnerProductVec, a::Number) = InnerProductVec(v.vec / a, v.dotf)
 Base.:\(a::Number, v::InnerProductVec) = InnerProductVec(a \ v.vec, v.dotf)
 
-function Base.similar(v::InnerProductVec, ::Type{T}=eltype(v)) where {T}
+function Base.similar(v::InnerProductVec, ::Type{T}=scalartype(v)) where {T}
     return InnerProductVec(similar(v.vec), v.dotf)
 end
 
@@ -79,4 +79,35 @@ end
 function LinearAlgebra.dot(v::InnerProductVec{F}, w::InnerProductVec{F}) where {F}
     return v.dotf(v.vec, w.vec)
 end
-LinearAlgebra.norm(v::InnerProductVec) = sqrt(real(dot(v, v)))
+
+VectorInterface.scalartype(::Type{<:InnerProductVec{F,T}}) where {F,T} = scalartype(T)
+
+function VectorInterface.zerovector(v::InnerProductVec, T::Type{<:Number})
+    return InnerProductVec(zerovector(v.vec, T), v.dotf)
+end
+
+function VectorInterface.scale(v::InnerProductVec, a::Number)
+    return InnerProductVec(scale(v.vec, a), v.dotf)
+end
+
+function VectorInterface.scale!(v::InnerProductVec, a::Number)
+    scale!(v.vec, a)
+    return v
+end
+function VectorInterface.scale!(w::InnerProductVec{F}, v::InnerProductVec{F},
+                                a::Number) where {F}
+    scale!(w.vec, v.vec, a)
+    return w
+end
+
+function VectorInterface.add!(v::InnerProductVec{F}, w::InnerProductVec{F}, a::Number,
+                              b::Number) where {F}
+    add!(v.vec, w.vec, a, b)
+    return v
+end
+
+function VectorInterface.inner(v::InnerProductVec{F}, w::InnerProductVec{F}) where {F}
+    return v.dotf(v.vec, w.vec)
+end
+
+VectorInterface.norm(v::InnerProductVec) = sqrt(real(inner(v, v)))
diff --git a/src/linsolve/bicgstab.jl b/src/linsolve/bicgstab.jl
index 4a6d00d..2ff7f87 100644
--- a/src/linsolve/bicgstab.jl
+++ b/src/linsolve/bicgstab.jl
@@ -1,14 +1,14 @@
 function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number=1)
     # Initial function operation and division defines number type
     y₀ = apply(operator, x₀)
-    T = typeof(dot(b, y₀) / norm(b) * one(a₀) * one(a₁))
+    T = typeof(inner(b, y₀) / norm(b) * one(a₀) * one(a₁))
     α₀ = convert(T, a₀)
     α₁ = convert(T, a₁)
     # Continue computing r = b - a₀ * x₀ - a₁ * operator(x₀)
-    r = one(T) * b # r = mul!(similar(b, T), b, 1)
-    r = iszero(α₀) ? r : axpy!(-α₀, x₀, r)
-    r = axpy!(-α₁, y₀, r)
-    x = mul!(similar(r), x₀, 1)
+    r = scale(b, one(T))
+    r = iszero(α₀) ? r : add!!(r, x₀, -α₀)
+    r = add!!(r, y₀, -α₁)
+    x = scale!!(zerovector(r), x₀, 1)
     normr = norm(r)
     S = typeof(normr)
 
@@ -30,8 +30,8 @@ function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number
 
     # First iteration
     numiter += 1
-    r_shadow = mul!(similar(r), r, 1)     # shadow residual
-    ρ = dot(r_shadow, r)
+    r_shadow = scale!!(zerovector(r), r, 1)     # shadow residual
+    ρ = inner(r_shadow, r)
 
     # Method fails if ρ is zero.
     if ρ ≈ 0.0
@@ -42,26 +42,26 @@ function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number
     end
 
     ## BiCG part of the algorithm.
-    p = mul!(similar(r), r, 1)
+    p = scale!!(zerovector(r), r, 1)
     v = apply(operator, p, α₀, α₁)
     numops += 1
 
-    σ = dot(r_shadow, v)
+    σ = inner(r_shadow, v)
     α = ρ / σ
 
-    s = mul!(similar(r), r, 1)
-    s = axpy!(-α, v, s) # half step residual
+    s = scale!!(zerovector(r), r, 1)
+    s = add!!(s, v, -α) # half step residual
 
-    xhalf = mul!(similar(x), x, 1)
-    xhalf = axpy!(+α, p, xhalf) # half step iteration
+    xhalf = scale!!(zerovector(x), x, 1)
+    xhalf = add!!(xhalf, p, +α) # half step iteration
 
     normr = norm(s)
 
     # Check for early return at half step.
     if normr < tol
         # Replace approximate residual with the actual residual.
-        s = mul!(similar(b), b, 1)
-        s = axpy!(-1, apply(operator, xhalf, α₀, α₁), s)
+        s = scale!!(zerovector(b), b, 1)
+        s = add!!(s, apply(operator, xhalf, α₀, α₁), -1)
         numops += 1
 
         normr_act = norm(s)
@@ -79,20 +79,20 @@ function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number
     t = apply(operator, s, α₀, α₁)
     numops += 1
 
-    ω = dot(t, s) / dot(t, t)
+    ω = inner(t, s) / inner(t, t)
 
-    x = mul!(x, xhalf, 1)
-    x = axpy!(+ω, s, x) # full step iteration
+    x = scale!!(x, xhalf, 1)
+    x = add!!(x, s, +ω) # full step iteration
 
-    r = mul!(r, s, 1)
-    r = axpy!(-ω, t, r) # full step residual
+    r = scale!!(r, s, 1)
+    r = add!!(r, t, -ω) # full step residual
 
     # Check for early return at full step.
     normr = norm(r)
     if normr < tol
         # Replace approximate residual with the actual residual.
-        r = mul!(r, b, 1)
-        r = axpy!(-1, apply(operator, x, α₀, α₁), r)
+        r = scale!!(r, b, 1)
+        r = add!!(r, apply(operator, x, α₀, α₁), -1)
         numops += 1
 
         normr_act = norm(r)
@@ -116,23 +116,23 @@ function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number
 
         numiter += 1
         ρold = ρ
-        ρ = dot(r_shadow, r)
+        ρ = inner(r_shadow, r)
         β = (ρ / ρold) * (α / ω)
 
-        p = axpy!(-ω, v, p)
-        p = axpby!(1, r, β, p)
+        p = add!!(p, v, -ω)
+        p = add!!(p, r, 1, β)
 
         v = apply(operator, p, α₀, α₁)
         numops += 1
 
-        σ = dot(r_shadow, v)
+        σ = inner(r_shadow, v)
         α = ρ / σ
 
-        s = mul!(s, r, 1)
-        s = axpy!(-α, v, s) # half step residual
+        s = scale!!(s, r, 1)
+        s = add!!(s, v, -α) # half step residual
 
-        xhalf = mul!(xhalf, x, 1)
-        xhalf = axpy!(+α, p, xhalf) # half step iteration
+        xhalf = scale!!(xhalf, x, 1)
+        xhalf = add!!(xhalf, p, +α) # half step iteration
 
         normr = norm(s)
 
@@ -146,8 +146,8 @@ function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number
         # Check for return at half step.
         if normr < tol
             # Compute non-approximate residual.
-            s = mul!(similar(b), b, 1)
-            s = axpy!(-1, apply(operator, xhalf, α₀, α₁), s)
+            s = scale!!(zerovector(b), b, 1)
+            s = add!!(s, apply(operator, xhalf, α₀, α₁), -1)
             numops += 1
 
             normr_act = norm(s)
@@ -165,20 +165,20 @@ function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number
         t = apply(operator, s, α₀, α₁)
         numops += 1
 
-        ω = dot(t, s) / dot(t, t)
+        ω = inner(t, s) / inner(t, t)
 
-        x = mul!(x, xhalf, 1)
-        x = axpy!(+ω, s, x) # full step iteration
+        x = scale!!(x, xhalf, 1)
+        x = add!!(x, s, +ω) # full step iteration
 
-        r = mul!(r, s, 1)
-        r = axpy!(-ω, t, r) # full step residual
+        r = scale!!(r, s, 1)
+        r = add!!(r, t, -ω) # full step residual
 
         # Check for return at full step.
         normr = norm(r)
         if normr < tol
             # Replace approximate residual with the actual residual.
-            r = mul!(r, b, 1)
-            r = axpy!(-1, apply(operator, x, α₀, α₁), r)
+            r = scale!!(r, b, 1)
+            r = add!!(r, apply(operator, x, α₀, α₁), -1)
             numops += 1
 
             normr_act = norm(r)
diff --git a/src/linsolve/cg.jl b/src/linsolve/cg.jl
index 722eea6..9270b1c 100644
--- a/src/linsolve/cg.jl
+++ b/src/linsolve/cg.jl
@@ -1,14 +1,14 @@
 function linsolve(operator, b, x₀, alg::CG, a₀::Real=0, a₁::Real=1)
     # Initial function operation and division defines number type
     y₀ = apply(operator, x₀)
-    T = typeof(dot(b, y₀) / norm(b) * one(a₀) * one(a₁))
+    T = typeof(inner(b, y₀) / norm(b) * one(a₀) * one(a₁))
     α₀ = convert(T, a₀)
     α₁ = convert(T, a₁)
     # Continue computing r = b - a₀ * x₀ - a₁ * operator(x₀)
-    r = one(T) * b # r = mul!(similar(b, T), b, 1)
-    r = iszero(α₀) ? r : axpy!(-α₀, x₀, r)
-    r = axpy!(-α₁, y₀, r)
-    x = mul!(similar(r), x₀, 1)
+    r = scale(b, one(T))
+    r = iszero(α₀) ? r : add!!(r, x₀, -α₀)
+    r = add!!(r, y₀, -α₁)
+    x = scale!!(zerovector(r), x₀, 1)
     normr = norm(r)
     S = typeof(normr)
 
@@ -23,11 +23,11 @@ function linsolve(operator, b, x₀, alg::CG, a₀::Real=0, a₁::Real=1)
 
     # First iteration
     ρ = normr^2
-    p = mul!(similar(r), r, 1)
+    p = scale!!(zerovector(r), r, 1)
     q = apply(operator, p, α₀, α₁)
-    α = ρ / dot(p, q)
-    x = axpy!(+α, p, x)
-    r = axpy!(-α, q, r)
+    α = ρ / inner(p, q)
+    x = add!!(x, p, +α)
+    r = add!!(r, q, -α)
     normr = norm(r)
     ρold = ρ
     ρ = normr^2
@@ -45,15 +45,15 @@ function linsolve(operator, b, x₀, alg::CG, a₀::Real=0, a₁::Real=1)
     normr < tol && return (x, ConvergenceInfo(1, r, normr, numiter, numops))
 
     while numiter < maxiter
-        axpby!(1, r, β, p)
+        p = add!!(p, r, 1, β)
         q = apply(operator, p, α₀, α₁)
-        α = ρ / dot(p, q)
-        x = axpy!(+α, p, x)
-        r = axpy!(-α, q, r)
+        α = ρ / inner(p, q)
+        x = add!!(x, p, α)
+        r = add!!(r, q, -α)
         normr = norm(r)
         if normr < tol # recompute to account for buildup of floating point errors
-            r = mul!(r, b, 1)
-            r = axpy!(-1, apply(operator, x, α₀, α₁), r)
+            r = scale!!(r, b, 1)
+            r = add!!(r, apply(operator, x, α₀, α₁), -1)
             normr = norm(r)
             ρ = normr^2
             β = zero(β) # restart CG
diff --git a/src/linsolve/gmres.jl b/src/linsolve/gmres.jl
index ba26155..129c43d 100644
--- a/src/linsolve/gmres.jl
+++ b/src/linsolve/gmres.jl
@@ -1,14 +1,14 @@
 function linsolve(operator, b, x₀, alg::GMRES, a₀::Number=0, a₁::Number=1)
     # Initial function operation and division defines number type
     y₀ = apply(operator, x₀)
-    T = typeof(dot(b, y₀) / norm(b) * one(a₀) * one(a₁))
+    T = typeof(inner(b, y₀) / norm(b) * one(a₀) * one(a₁))
     α₀ = convert(T, a₀)::T
     α₁ = convert(T, a₁)::T
     # Continue computing r = b - a₀ * x₀ - a₁ * operator(x₀)
-    r = one(T) * b # mul!(similar(b, T), b, 1)
-    r = iszero(α₀) ? r : axpy!(-α₀, x₀, r)
-    r = axpy!(-α₁, y₀, r)
-    x = mul!(similar(r), x₀, 1)
+    r = scale(b, one(T))
+    r = iszero(α₀) ? r : add!!(r, x₀, -α₀)
+    r = add!!(r, y₀, -α₁)
+    x = scale!!(zerovector(r), x₀, 1)
     β = norm(r)
     S = typeof(β)
 
@@ -103,22 +103,22 @@ function linsolve(operator, b, x₀, alg::GMRES, a₀::Number=0, a₁::Number=1)
         # Update x
         V = basis(fact)
         @inbounds for i in 1:k
-            x = axpy!(y[i], V[i], x)
+            x = add!!(x, V[i], y[i])
         end
 
         if β > tol
             # Recompute residual without reevaluating operator
             w = residual(fact)
-            push!(V, rmul!(w, 1 / normres(fact)))
+            push!(V, scale!!(w, 1 / normres(fact)))
             for i in 1:k
                 rmul!(V, gs[i]')
             end
-            r = mul!(r, V[k + 1], y[k + 1])
+            r = scale!!(r, V[k + 1], y[k + 1])
         else
             # Recompute residual and its norm explicitly, to ensure that no
             # numerical errors have accumulated
-            r = mul!(r, b, 1)
-            r = axpy!(-1, apply(operator, x, α₀, α₁), r)
+            r = scale!!(r, b, 1)
+            r = add!!(r, apply(operator, x, α₀, α₁), -1)
             numops += 1
             β = norm(r)
             if β < tol
diff --git a/src/linsolve/linsolve.jl b/src/linsolve/linsolve.jl
index 047eab5..738828e 100644
--- a/src/linsolve/linsolve.jl
+++ b/src/linsolve/linsolve.jl
@@ -92,14 +92,15 @@ function linsolve(A::AbstractMatrix, b::AbstractVector, a₀::Number=0, a₁::Nu
 end
 
 function linsolve(f, b, a₀::Number=0, a₁::Number=1; kwargs...)
-    return linsolve(f, b, (zero(a₀) * zero(a₁)) * b, a₀, a₁; kwargs...)
+    return linsolve(f, b, scale(b, zero(a₀) * zero(a₁)), a₀, a₁; kwargs...)
 end
 
 function linsolve(f, b, x₀, a₀::Number=0, a₁::Number=1; kwargs...)
     Tx = promote_type(typeof(x₀))
     Tb = typeof(b)
     Tfx = Core.Compiler.return_type(apply, Tuple{typeof(f),Tx})
-    T = promote_type(Core.Compiler.return_type(dot, Tuple{Tb,Tfx}), typeof(a₀), typeof(a₁))
+    T = promote_type(Core.Compiler.return_type(inner, Tuple{Tb,Tfx}), typeof(a₀),
+                     typeof(a₁))
     alg = linselector(f, b, T; kwargs...)
     return linsolve(f, b, x₀, alg, a₀, a₁)
 end
diff --git a/src/matrixfun/expintegrator.jl b/src/matrixfun/expintegrator.jl
index cc0f962..7c87c26 100644
--- a/src/matrixfun/expintegrator.jl
+++ b/src/matrixfun/expintegrator.jl
@@ -96,14 +96,14 @@ use complex time steps in combination with e.g. a real symmetric map.
 function expintegrator end
 
 function expintegrator(A, t::Number, u₀, us...; kwargs...)
-    Ts = typeof.(dot.((u₀, us...), (u₀,)))
+    Ts = typeof.(inner.((u₀, us...), (u₀,)))
     T = promote_type(typeof(t), Ts...)
     alg = eigselector(A, T; kwargs...)
     return expintegrator(A, t, (u₀, us...), alg)
 end
 
 function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi})
-    length(u) == 1 && return expintegrator(A, t, (u[1], rmul!(similar(u[1]), false)), alg)
+    length(u) == 1 && return expintegrator(A, t, (u[1], zerovector(u[1])), alg)
 
     p = length(u) - 1
 
@@ -112,9 +112,9 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi})
     β₀ = norm(u₀)
     Au₀ = apply(A, u₀) # used to determine return type
     numops = 1
-    T = promote_type(typeof(t), (typeof.(dot.(u, (Au₀,))))...)
+    T = promote_type(typeof(t), (typeof.(inner.(u, (Au₀,))))...)
     S = real(T)
-    w₀ = one(T) * u₀
+    w₀ = scale(u₀, one(T))
 
     # krylovdim and related allocations
     krylovdim = alg.krylovdim
@@ -137,7 +137,7 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi})
     w = Vector{typeof(w₀)}(undef, p + 1)
     w[1] = w₀
     # reuse the result of apply computed earlier:
-    w[2] = mul!(similar(w₀), Au₀, one(T))
+    w[2] = scale!!(zerovector(w₀), Au₀, one(T))
     for j in 1:p
         if j > 1
             w[j + 1] = apply(A, w[j])
@@ -145,11 +145,11 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi})
         end
         lfac = 1
         for l in 0:(p - j)
-            w[j + 1] = axpy!((sgn * τ₀)^l / lfac, u[j + l + 1], w[j + 1])
+            w[j + 1] = add!!(w[j + 1], u[j + l + 1], (sgn * τ₀)^l / lfac)
             lfac *= l + 1
         end
     end
-    v = similar(w₀)
+    v = zerovector(w₀)
     β = norm(w[p + 1])
     if β < alg.tol && p == 1
         if alg.verbosity > 0
@@ -157,7 +157,7 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi})
         end
         return w₀, ConvergenceInfo(1, zero(τ), β, 0, numops)
     end
-    mul!(v, w[p + 1], 1 / β)
+    v = scale!!(v, w[p + 1], 1 / β)
 
     # initialize iterator
     if alg isa Lanczos
@@ -211,13 +211,13 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi})
             totalerr += ϵ
             jfac = 1
             for j in 1:(p - 1)
-                w₀ = axpy!((sgn * Δτ)^j / jfac, w[j + 1], w₀)
+                w₀ = add!!(w₀, w[j + 1], (sgn * Δτ)^j / jfac)
                 jfac *= (j + 1)
             end
             w[p + 1] = mul!(w[p + 1], basis(fact), view(expH, 1:K, K + p))
             # add first correction
-            w[p + 1] = axpy!(expH[K, K + p + 1], residual(fact), w[p + 1])
-            w₀ = axpy!(β * (sgn * Δτ)^p, w[p + 1], w₀)
+            w[p + 1] = add!!(w[p + 1], residual(fact), expH[K, K + p + 1])
+            w₀ = add!!(w₀, w[p + 1], β * (sgn * Δτ)^p)
             τ₀ += Δτ
 
             # increase time step for next iteration:
@@ -247,13 +247,13 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi})
                 totalerr += ϵ
                 jfac = 1
                 for j in 1:(p - 1)
-                    w₀ = axpy!((sgn * (τ - τ₀))^j / jfac, w[j + 1], w₀)
+                    w₀ = add!!(w₀, w[j + 1], (sgn * (τ - τ₀))^j / jfac)
                     jfac *= (j + 1)
                 end
                 w[p + 1] = mul!(w[p + 1], basis(fact), view(expH, 1:K, K + p))
                 # add first correction
-                w[p + 1] = axpy!(expH[K, K + p + 1], residual(fact), w[p + 1])
-                w₀ = axpy!(β * (sgn * (τ - τ₀))^p, w[p + 1], w₀)
+                w[p + 1] = add!!(w[p + 1], residual(fact), expH[K, K + p + 1])
+                w₀ = add!!(w₀, w[p + 1], β * (sgn * (τ - τ₀))^p)
                 τ₀ = τ
             end
         end
@@ -279,7 +279,7 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi})
                     numops += 1
                     lfac = 1
                     for l in 0:(p - j)
-                        w[j + 1] = axpy!((sgn * τ₀)^l / lfac, u[j + l + 1], w[j + 1])
+                        w[j + 1] = add!!(w[j + 1], u[j + l + 1], (sgn * τ₀)^l / lfac)
                         lfac *= l + 1
                     end
                 end
@@ -290,7 +290,7 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi})
                     end
                     return w₀, ConvergenceInfo(1, zero(τ), β, numiter, numops)
                 end
-                mul!(v, w[p + 1], 1 / β)
+                v = scale!!(v, w[p + 1], 1 / β)
 
                 if alg isa Lanczos
                     iter = LanczosIterator(A, w[p + 1], alg.orth)
diff --git a/src/orthonormal.jl b/src/orthonormal.jl
index 166a834..5c3ec97 100644
--- a/src/orthonormal.jl
+++ b/src/orthonormal.jl
@@ -7,7 +7,7 @@ one, representing an orthonormal basis for some subspace (typically a Krylov sub
 also [`Basis`](@ref)
 
 Orthonormality of the vectors contained in an instance `b` of `OrthonormalBasis`
-(i.e. `all(dot(b[i],b[j]) == I[i,j] for i=1:length(b), j=1:length(b))`) is not checked when
+(i.e. `all(inner(b[i],b[j]) == I[i,j] for i=1:length(b), j=1:length(b))`) is not checked when
 elements are added; it is up to the algorithm that constructs `b` to guarantee
 orthonormality.
 
@@ -55,32 +55,32 @@ Base.resize!(b::OrthonormalBasis, k::Int) = (resize!(b.basis, k); return b)
 
 # Multiplication methods with OrthonormalBasis
 function Base.:*(b::OrthonormalBasis, x::AbstractVector)
-    y = zero(eltype(x)) * first(b)
+    y = zerovector(first(b), promote_type(scalartype(x), scalartype(first(b))))
     return mul!(y, b, x)
 end
-LinearAlgebra.mul!(y, b::OrthonormalBasis, x::AbstractVector) = unproject!(y, b, x, 1, 0)
+LinearAlgebra.mul!(y, b::OrthonormalBasis, x::AbstractVector) = unproject!!(y, b, x, 1, 0)
 
 const BLOCKSIZE = 4096
 
 """
-    project!(y::AbstractVector, b::OrthonormalBasis, x,
+    project!!(y::AbstractVector, b::OrthonormalBasis, x,
         [α::Number = 1, β::Number = 0, r = Base.OneTo(length(b))])
 
 For a given orthonormal basis `b`, compute the expansion coefficients `y` resulting from
 projecting the vector `x` onto the subspace spanned by `b`; more specifically this computes
 
 ```
-    y[j] = β*y[j] + α * dot(b[r[j]], x)
+    y[j] = β*y[j] + α * inner(b[r[j]], x)
 ```
 
 for all ``j ∈ r``.
 """
-function project!(y::AbstractVector,
-                  b::OrthonormalBasis,
-                  x,
-                  α::Number=true,
-                  β::Number=false,
-                  r=Base.OneTo(length(b)))
+function project!!(y::AbstractVector,
+                   b::OrthonormalBasis,
+                   x,
+                   α::Number=true,
+                   β::Number=false,
+                   r=Base.OneTo(length(b)))
     # no specialized routine for IndexLinear x because reduction dimension is large dimension
     length(y) == length(r) || throw(DimensionMismatch())
     if get_num_threads() > 1
@@ -88,9 +88,9 @@ function project!(y::AbstractVector,
             Threads.@spawn for j in $J
                 @inbounds begin
                     if β == 0
-                        y[j] = α * dot(b[r[j]], x)
+                        y[j] = α * inner(b[r[j]], x)
                     else
-                        y[j] = β * y[j] + α * dot(b[r[j]], x)
+                        y[j] = β * y[j] + α * inner(b[r[j]], x)
                     end
                 end
             end
@@ -99,9 +99,9 @@ function project!(y::AbstractVector,
         for j in 1:length(r)
             @inbounds begin
                 if β == 0
-                    y[j] = α * dot(b[r[j]], x)
+                    y[j] = α * inner(b[r[j]], x)
                 else
-                    y[j] = β * y[j] + α * dot(b[r[j]], x)
+                    y[j] = β * y[j] + α * inner(b[r[j]], x)
                 end
             end
         end
@@ -110,7 +110,7 @@ function project!(y::AbstractVector,
 end
 
 """
-    unproject!(y, b::OrthonormalBasis, x::AbstractVector,
+    unproject!!(y, b::OrthonormalBasis, x::AbstractVector,
         [α::Number = 1, β::Number = 0, r = Base.OneTo(length(b))])
 
 For a given orthonormal basis `b`, reconstruct the vector-like object `y` that is defined by
@@ -121,12 +121,12 @@ this computes
     y = β*y + α * sum(b[r[i]]*x[i] for i = 1:length(r))
 ```
 """
-function unproject!(y,
-                    b::OrthonormalBasis,
-                    x::AbstractVector,
-                    α::Number=true,
-                    β::Number=false,
-                    r=Base.OneTo(length(b)))
+function unproject!!(y,
+                     b::OrthonormalBasis,
+                     x::AbstractVector,
+                     α::Number=true,
+                     β::Number=false,
+                     r=Base.OneTo(length(b)))
     if y isa AbstractArray && !(y isa AbstractGPUArray) && IndexStyle(y) isa IndexLinear &&
        get_num_threads() > 1
         return unproject_linear_multithreaded!(y, b, x, α, β, r)
@@ -134,12 +134,12 @@ function unproject!(y,
     # general case: using only vector operations, i.e. axpy! (similar to BLAS level 1)
     length(x) == length(r) || throw(DimensionMismatch())
     if β == 0
-        rmul!(y, false) # should be hard zero
+        y = scale!!(y, false) # should be hard zero
     elseif β != 1
-        rmul!(y, β)
+        y = scale!!(y, β)
     end
     @inbounds for (i, ri) in enumerate(r)
-        y = axpy!(α * x[i], b[ri], y)
+        y = add!!(y, b[ri], α * x[i])
     end
     return y
 end
@@ -221,11 +221,11 @@ It is the user's responsibility to make sure that the result is still an orthono
     length(x) == length(r) || throw(DimensionMismatch())
     @inbounds for (i, ri) in enumerate(r)
         if β == 1
-            b[ri] = axpy!(α * conj(x[i]), y, b[ri])
+            b[ri] = add!!(b[ri], y, α * conj(x[i]))
         elseif β == 0
-            b[ri] = mul!(b[ri], α * x[i], y)
+            b[ri] = scale!!(b[ri], y, α * conj(x[i]))
         else
-            b[ri] = axpby!(α * x[i], y, β, b[ri])
+            b[ri] = add!!(b[ri], y, α * conj(x[i]), β)
         end
     end
     return b
@@ -301,21 +301,21 @@ function basistransform!(b::OrthonormalBasis{T}, U::AbstractMatrix) where {T} #
     m, n = size(U)
     m == length(b) || throw(DimensionMismatch())
 
-    let b2 = [similar(b[1]) for j in 1:n]
+    let b2 = [zerovector(b[1]) for j in 1:n]
         if get_num_threads() > 1
             @sync for J in splitrange(1:n, get_num_threads())
                 Threads.@spawn for j in $J
-                    mul!(b2[j], b[1], U[1, j])
+                    b2[j] = scale!!(b2[j], b[1], U[1, j])
                     for i in 2:m
-                        axpy!(U[i, j], b[i], b2[j])
+                        b2[j] = add!!(b2[j], b[i], U[i, j])
                     end
                 end
             end
         else
             for j in 1:n
-                mul!(b2[j], b[1], U[1, j])
+                b2[j] = scale!!(b2[j], b[1], U[1, j])
                 for i in 2:m
-                    axpy!(U[i, j], b[i], b2[j])
+                    b2[j] = add!!(b2[j], b[i], U[i, j])
                 end
             end
         end
@@ -374,127 +374,127 @@ end
 # Orthogonalization of a vector against a given OrthonormalBasis
 orthogonalize(v, args...) = orthogonalize!(true * v, args...)
 
-function orthogonalize!(v::T, b::OrthonormalBasis{T}, alg::Orthogonalizer) where {T}
+function orthogonalize!!(v::T, b::OrthonormalBasis{T}, alg::Orthogonalizer) where {T}
     S = promote_type(eltype(v), eltype(T))
     c = Vector{S}(undef, length(b))
-    return orthogonalize!(v, b, c, alg)
+    return orthogonalize!!(v, b, c, alg)
 end
 
-function orthogonalize!(v::T,
-                        b::OrthonormalBasis{T},
-                        x::AbstractVector,
-                        ::ClassicalGramSchmidt) where {T}
-    x = project!(x, b, v)
-    v = unproject!(v, b, x, -1, 1)
+function orthogonalize!!(v::T,
+                         b::OrthonormalBasis{T},
+                         x::AbstractVector,
+                         ::ClassicalGramSchmidt) where {T}
+    x = project!!(x, b, v)
+    v = unproject!!(v, b, x, -1, 1)
     return (v, x)
 end
-function reorthogonalize!(v::T,
-                          b::OrthonormalBasis{T},
-                          x::AbstractVector,
-                          ::ClassicalGramSchmidt) where {T}
+function reorthogonalize!!(v::T,
+                           b::OrthonormalBasis{T},
+                           x::AbstractVector,
+                           ::ClassicalGramSchmidt) where {T}
     s = similar(x) ## EXTRA ALLOCATION
-    s = project!(s, b, v)
-    v = unproject!(v, b, s, -1, 1)
+    s = project!!(s, b, v)
+    v = unproject!!(v, b, s, -1, 1)
     x .+= s
     return (v, x)
 end
-function orthogonalize!(v::T,
-                        b::OrthonormalBasis{T},
-                        x::AbstractVector,
-                        ::ClassicalGramSchmidt2) where {T}
-    (v, x) = orthogonalize!(v, b, x, ClassicalGramSchmidt())
-    return reorthogonalize!(v, b, x, ClassicalGramSchmidt())
+function orthogonalize!!(v::T,
+                         b::OrthonormalBasis{T},
+                         x::AbstractVector,
+                         ::ClassicalGramSchmidt2) where {T}
+    (v, x) = orthogonalize!!(v, b, x, ClassicalGramSchmidt())
+    return reorthogonalize!!(v, b, x, ClassicalGramSchmidt())
 end
-function orthogonalize!(v::T,
-                        b::OrthonormalBasis{T},
-                        x::AbstractVector,
-                        alg::ClassicalGramSchmidtIR) where {T}
+function orthogonalize!!(v::T,
+                         b::OrthonormalBasis{T},
+                         x::AbstractVector,
+                         alg::ClassicalGramSchmidtIR) where {T}
     nold = norm(v)
-    orthogonalize!(v, b, x, ClassicalGramSchmidt())
+    (v, x) = orthogonalize!!(v, b, x, ClassicalGramSchmidt())
     nnew = norm(v)
     while eps(one(nnew)) < nnew < alg.η * nold
         nold = nnew
-        (v, x) = reorthogonalize!(v, b, x, ClassicalGramSchmidt())
+        (v, x) = reorthogonalize!!(v, b, x, ClassicalGramSchmidt())
         nnew = norm(v)
     end
     return (v, x)
 end
 
-function orthogonalize!(v::T,
-                        b::OrthonormalBasis{T},
-                        x::AbstractVector,
-                        ::ModifiedGramSchmidt) where {T}
+function orthogonalize!!(v::T,
+                         b::OrthonormalBasis{T},
+                         x::AbstractVector,
+                         ::ModifiedGramSchmidt) where {T}
     for (i, q) in enumerate(b)
-        s = dot(q, v)
-        v = axpy!(-s, q, v)
+        s = inner(q, v)
+        v = add!!(v, q, -s)
         x[i] = s
     end
     return (v, x)
 end
-function reorthogonalize!(v::T,
-                          b::OrthonormalBasis{T},
-                          x::AbstractVector,
-                          ::ModifiedGramSchmidt) where {T}
+function reorthogonalize!!(v::T,
+                           b::OrthonormalBasis{T},
+                           x::AbstractVector,
+                           ::ModifiedGramSchmidt) where {T}
     for (i, q) in enumerate(b)
-        s = dot(q, v)
-        v = axpy!(-s, q, v)
+        s = inner(q, v)
+        v = add!!(v, q, -s)
         x[i] += s
     end
     return (v, x)
 end
-function orthogonalize!(v::T,
-                        b::OrthonormalBasis{T},
-                        x::AbstractVector,
-                        ::ModifiedGramSchmidt2) where {T}
-    (v, x) = orthogonalize!(v, b, x, ModifiedGramSchmidt())
-    return reorthogonalize!(v, b, x, ModifiedGramSchmidt())
+function orthogonalize!!(v::T,
+                         b::OrthonormalBasis{T},
+                         x::AbstractVector,
+                         ::ModifiedGramSchmidt2) where {T}
+    (v, x) = orthogonalize!!(v, b, x, ModifiedGramSchmidt())
+    return reorthogonalize!!(v, b, x, ModifiedGramSchmidt())
 end
-function orthogonalize!(v::T,
-                        b::OrthonormalBasis{T},
-                        x::AbstractVector,
-                        alg::ModifiedGramSchmidtIR) where {T}
+function orthogonalize!!(v::T,
+                         b::OrthonormalBasis{T},
+                         x::AbstractVector,
+                         alg::ModifiedGramSchmidtIR) where {T}
     nold = norm(v)
-    (v, x) = orthogonalize!(v, b, x, ModifiedGramSchmidt())
+    (v, x) = orthogonalize!!(v, b, x, ModifiedGramSchmidt())
     nnew = norm(v)
     while eps(one(nnew)) < nnew < alg.η * nold
         nold = nnew
-        (v, x) = reorthogonalize!(v, b, x, ModifiedGramSchmidt())
+        (v, x) = reorthogonalize!!(v, b, x, ModifiedGramSchmidt())
         nnew = norm(v)
     end
     return (v, x)
 end
 
 # Orthogonalization of a vector against a given normalized vector
-orthogonalize!(v::T, q::T, alg::Orthogonalizer) where {T} = _orthogonalize!(v, q, alg)
+orthogonalize!!(v::T, q::T, alg::Orthogonalizer) where {T} = _orthogonalize!!(v, q, alg)
 # avoid method ambiguity on Julia 1.0 according to Aqua.jl
 
-function _orthogonalize!(v::T,
-                         q::T,
-                         alg::Union{ClassicalGramSchmidt,ModifiedGramSchmidt}) where {T}
-    s = dot(q, v)
-    v = axpy!(-s, q, v)
+function _orthogonalize!!(v::T,
+                          q::T,
+                          alg::Union{ClassicalGramSchmidt,ModifiedGramSchmidt}) where {T}
+    s = inner(q, v)
+    v = add!!(v, q, -s)
     return (v, s)
 end
-function _orthogonalize!(v::T,
-                         q::T,
-                         alg::Union{ClassicalGramSchmidt2,ModifiedGramSchmidt2}) where {T}
-    s = dot(q, v)
-    v = axpy!(-s, q, v)
-    ds = dot(q, v)
-    v = axpy!(-ds, q, v)
+function _orthogonalize!!(v::T,
+                          q::T,
+                          alg::Union{ClassicalGramSchmidt2,ModifiedGramSchmidt2}) where {T}
+    s = inner(q, v)
+    v = add!!(v, q, -s)
+    ds = inner(q, v)
+    v = add!!(v, q, -ds)
     return (v, s + ds)
 end
-function _orthogonalize!(v::T,
-                         q::T,
-                         alg::Union{ClassicalGramSchmidtIR,ModifiedGramSchmidtIR}) where {T}
+function _orthogonalize!!(v::T,
+                          q::T,
+                          alg::Union{ClassicalGramSchmidtIR,ModifiedGramSchmidtIR}) where {T}
     nold = norm(v)
-    s = dot(q, v)
-    v = axpy!(-s, q, v)
+    s = inner(q, v)
+    v = add!!(v, q, -s)
     nnew = norm(v)
     while eps(one(nnew)) < nnew < alg.η * nold
         nold = nnew
-        ds = dot(q, v)
-        v = axpy!(-ds, q, v)
+        ds = inner(q, v)
+        v = add!!(v, q, -ds)
         s += ds
         nnew = norm(v)
     end
@@ -503,10 +503,10 @@ end
 
 """
     orthogonalize(v, b::OrthonormalBasis, [x::AbstractVector,] alg::Orthogonalizer]) -> w, x
-    orthogonalize!(v, b::OrthonormalBasis, [x::AbstractVector,] alg::Orthogonalizer]) -> w, x
+    orthogonalize!!(v, b::OrthonormalBasis, [x::AbstractVector,] alg::Orthogonalizer]) -> w, x
 
     orthogonalize(v, q, algorithm::Orthogonalizer]) -> w, s
-    orthogonalize!(v, q, algorithm::Orthogonalizer]) -> w, s
+    orthogonalize!!(v, q, algorithm::Orthogonalizer]) -> w, s
 
 Orthogonalize vector `v` against all the vectors in the orthonormal basis `b` using the
 orthogonalization algorithm `alg` of type [`Orthogonalizer`](@ref), and return the resulting
@@ -527,24 +527,24 @@ and its concrete subtypes [`ClassicalGramSchmidt`](@ref), [`ModifiedGramSchmidt`
 [`ClassicalGramSchmidt2`](@ref), [`ModifiedGramSchmidt2`](@ref),
 [`ClassicalGramSchmidtIR`](@ref) and [`ModifiedGramSchmidtIR`](@ref).
 """
-orthogonalize, orthogonalize!
+orthogonalize, orthogonalize!!
 
 # Orthonormalization: orthogonalization and normalization
-orthonormalize(v, args...) = orthonormalize!(true * v, args...)
+orthonormalize(v, args...) = orthonormalize!!(scale(v, VectorInterface.One()), args...)
 
-function orthonormalize!(v, args...)
-    out = orthogonalize!(v, args...) # out[1] === v
+function orthonormalize!!(v, args...)
+    out = orthogonalize!!(v, args...) # out[1] === v
     β = norm(v)
-    v = rmul!(v, inv(β))
+    v = scale!!(v, inv(β))
     return tuple(v, β, Base.tail(out)...)
 end
 
 """
     orthonormalize(v, b::OrthonormalBasis, [x::AbstractVector,] alg::Orthogonalizer]) -> w, β, x
-    orthonormalize!(v, b::OrthonormalBasis, [x::AbstractVector,] alg::Orthogonalizer]) -> w, β, x
+    orthonormalize!!(v, b::OrthonormalBasis, [x::AbstractVector,] alg::Orthogonalizer]) -> w, β, x
 
     orthonormalize(v, q, algorithm::Orthogonalizer]) -> w, β, s
-    orthonormalize!(v, q, algorithm::Orthogonalizer]) -> w, β, s
+    orthonormalize!!(v, q, algorithm::Orthogonalizer]) -> w, β, s
 
 Orthonormalize vector `v` against all the vectors in the orthonormal basis `b` using the
 orthogonalization algorithm `alg` of type [`Orthogonalizer`](@ref), and return the resulting
@@ -566,4 +566,4 @@ and its concrete subtypes [`ClassicalGramSchmidt`](@ref), [`ModifiedGramSchmidt`
 [`ClassicalGramSchmidt2`](@ref), [`ModifiedGramSchmidt2`](@ref),
 [`ClassicalGramSchmidtIR`](@ref) and [`ModifiedGramSchmidtIR`](@ref).
 """
-orthonormalize, orthonormalize!
+orthonormalize, orthonormalize!!
diff --git a/src/recursivevec.jl b/src/recursivevec.jl
index 13af5c7..cc7379c 100644
--- a/src/recursivevec.jl
+++ b/src/recursivevec.jl
@@ -53,52 +53,65 @@ function Base.similar(v::RecursiveVec)
 end
 
 function Base.copy!(w::RecursiveVec, v::RecursiveVec)
-    @assert length(w.vecs) == length(v.vecs)
-    @inbounds for i in 1:length(w.vecs)
-        copyto!(w.vecs[i], v.vecs[i])
+    @assert length(w) == length(v)
+    @inbounds for i in 1:length(w)
+        copyto!(w[i], v[i])
     end
     return w
 end
 
-function LinearAlgebra.mul!(w::RecursiveVec, a::Number, v::RecursiveVec)
-    @assert length(w.vecs) == length(v.vecs)
-    @inbounds for i in 1:length(w.vecs)
-        mul!(w.vecs[i], a, v.vecs[i])
-    end
-    return w
+function LinearAlgebra.dot(v::RecursiveVec{T}, w::RecursiveVec{T}) where {T}
+    return sum(dot.(v.vecs, w.vecs))
 end
 
-function LinearAlgebra.mul!(w::RecursiveVec, v::RecursiveVec, a::Number)
-    @assert length(w.vecs) == length(v.vecs)
-    @inbounds for i in 1:length(w.vecs)
-        mul!(w.vecs[i], v.vecs[i], a)
-    end
-    return w
+VectorInterface.scalartype(::Type{RecursiveVec{T}}) where {T} = scalartype(T)
+
+function VectorInterface.zerovector(v::RecursiveVec, T::Type{<:Number})
+    return RecursiveVec(zerovector(v.vecs, T))
 end
 
-function LinearAlgebra.rmul!(v::RecursiveVec, a::Number)
-    for x in v.vecs
-        rmul!(x, a)
-    end
+function VectorInterface.scale(v::RecursiveVec, a::Number)
+    return RecursiveVec(scale(v.vecs, a))
+end
+
+function VectorInterface.scale!(v::RecursiveVec, a::Number)
+    scale!(v.vecs, a)
     return v
 end
 
-function LinearAlgebra.axpy!(a::Number, v::RecursiveVec, w::RecursiveVec)
-    @assert length(w.vecs) == length(v.vecs)
-    @inbounds for i in 1:length(w.vecs)
-        axpy!(a, v.vecs[i], w.vecs[i])
-    end
+function VectorInterface.scale!(w::RecursiveVec, v::RecursiveVec, a::Number)
+    scale!(w.vecs, v.vecs, a)
     return w
 end
-function LinearAlgebra.axpby!(a::Number, v::RecursiveVec, b, w::RecursiveVec)
-    @assert length(w.vecs) == length(v.vecs)
-    @inbounds for i in 1:length(w.vecs)
-        axpby!(a, v.vecs[i], b, w.vecs[i])
-    end
+
+function VectorInterface.scale!!(x::RecursiveVec, a::Number)
+    return RecursiveVec(scale!!(x.vecs, a))
+end
+
+function VectorInterface.scale!!(w::RecursiveVec,
+                                 v::RecursiveVec, a::Number)
+    return RecursiveVec(scale!!(w.vecs, v.vecs, a))
+end
+
+function VectorInterface.add(w::RecursiveVec, v::RecursiveVec, a::Number=One(),
+                             b::Number=One())
+    return RecursiveVec(add(w.vecs, v.vecs, a, b))
+end
+
+function VectorInterface.add!(w::RecursiveVec, v::RecursiveVec, a::Number=One(),
+                              b::Number=One())
+    add!(w.vecs, v.vecs, a, b)
     return w
 end
 
-function LinearAlgebra.dot(v::RecursiveVec{T}, w::RecursiveVec{T}) where {T}
-    return sum(dot.(v.vecs, w.vecs))
+function VectorInterface.add!!(w::RecursiveVec, v::RecursiveVec,
+                               a::Number=One(),
+                               b::Number=One())
+    return RecursiveVec(add!!(w.vecs, v.vecs, a, b))
 end
-LinearAlgebra.norm(v::RecursiveVec) = norm(norm.(v.vecs))
+
+function VectorInterface.inner(v::RecursiveVec{T}, w::RecursiveVec{T}) where {T}
+    return inner(v.vecs, w.vecs)
+end
+
+VectorInterface.norm(v::RecursiveVec) = VectorInterface.norm(v.vecs)
diff --git a/test/ad.jl b/test/ad.jl
index bc719b7..11de1b9 100644
--- a/test/ad.jl
+++ b/test/ad.jl
@@ -4,11 +4,11 @@ using Random, Test
 using ChainRulesCore, ChainRulesTestUtils, Zygote, FiniteDifferences
 
 fdm = ChainRulesTestUtils._fdm
-precision(T::Type{<:Number}) = eps(real(T))^(2 / 3)
+tolerance(T::Type{<:Number}) = eps(real(T))^(2 / 3)
 n = 10
 N = 30
 
-function build_mat_example(A, b; tol=precision(eltype(A)), kwargs...)
+function build_mat_example(A, b; tol=tolerance(eltype(A)), kwargs...)
     Avec, A_fromvec = to_vec(A)
     bvec, b_fromvec = to_vec(b)
     T = eltype(A)
@@ -24,7 +24,7 @@ function build_mat_example(A, b; tol=precision(eltype(A)), kwargs...)
     return mat_example, Avec, bvec
 end
 
-function build_fun_example(A, b, c, d, e, f; tol=precision(eltype(A)), kwargs...)
+function build_fun_example(A, b, c, d, e, f; tol=tolerance(eltype(A)), kwargs...)
     Avec, matfromvec = to_vec(A)
     bvec, vecfromvec = to_vec(b)
     cvec, = to_vec(c)
@@ -61,8 +61,8 @@ end
 
         (JA, Jb) = FiniteDifferences.jacobian(fdm, mat_example, Avec, bvec)
         (JA′, Jb′) = Zygote.jacobian(mat_example, Avec, bvec)
-        @test JA ≈ JA′ rtol = cond(A) * precision(T)
-        @test Jb ≈ Jb′ rtol = cond(A) * precision(T)
+        @test JA ≈ JA′ rtol = cond(A) * tolerance(T)
+        @test Jb ≈ Jb′ rtol = cond(A) * tolerance(T)
     end
 end
 
@@ -78,7 +78,7 @@ end
 
         fun_example, Avec, bvec, cvec, dvec, evec, fvec = build_fun_example(A, b, c, d, e,
                                                                             f;
-                                                                            tol=precision(T),
+                                                                            tol=tolerance(T),
                                                                             krylovdim=20)
 
         (JA, Jb, Jc, Jd, Je, Jf) = FiniteDifferences.jacobian(fdm, fun_example,
diff --git a/test/eigsolve.jl b/test/eigsolve.jl
index 5fa0965..717d95e 100644
--- a/test/eigsolve.jl
+++ b/test/eigsolve.jl
@@ -1,44 +1,57 @@
-@testset "Lanczos - eigsolve full" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "Lanczos - eigsolve full ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (n, n)) .- one(T) / 2
             A = (A + A') / 2
             v = rand(T, (n,))
             n1 = div(n, 2)
-            D1, V1, info = eigsolve(wrapop(A), wrapvec(v), n1, :SR; orth=orth, krylovdim=n,
-                                    maxiter=1, tol=precision(T), verbosity=2)
-            @test KrylovKit.eigselector(A, eltype(v); orth=orth, krylovdim=n, maxiter=1,
-                                        tol=precision(T)) isa Lanczos
+            D1, V1, info = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1, :SR;
+                                    krylovdim=n,
+                                    maxiter=1, tol=tolerance(T), verbosity=2)
+            @test KrylovKit.eigselector(wrapop(A, Val(mode)), scalartype(v); krylovdim=n,
+                                        maxiter=1,
+                                        tol=tolerance(T), ishermitian=true) isa Lanczos
             n2 = n - n1
-            alg = Lanczos(; orth=orth, krylovdim=2 * n, maxiter=1, tol=precision(T),
+            alg = Lanczos(; krylovdim=2 * n, maxiter=1, tol=tolerance(T),
                           verbosity=1)
-            D2, V2, info = @constinferred eigsolve(wrapop(A), wrapvec(v), n2, :LR, alg)
+            D2, V2, info = @constinferred eigsolve(wrapop(A, Val(mode)),
+                                                   wrapvec(v, Val(mode)),
+                                                   n2, :LR, alg)
             @test vcat(D1[1:n1], reverse(D2[1:n2])) ≊ eigvals(A)
 
-            U1 = hcat(unwrapvec.(V1)...)
-            U2 = hcat(unwrapvec.(V2)...)
+            U1 = stack(unwrapvec, V1)
+            U2 = stack(unwrapvec, V2)
             @test U1' * U1 ≈ I
             @test U2' * U2 ≈ I
 
             @test A * U1 ≈ U1 * Diagonal(D1)
             @test A * U2 ≈ U2 * Diagonal(D2)
 
-            _ = eigsolve(wrapop(A), wrapvec(v), n + 1, :LM; orth=orth, krylovdim=2n,
-                         maxiter=1, tol=precision(T), verbosity=0)
+            _ = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n + 1, :LM;
+                         krylovdim=2n,
+                         maxiter=1, tol=tolerance(T), verbosity=0)
         end
     end
 end
 
-@testset "Lanczos - eigsolve iteratively" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "Lanczos - eigsolve iteratively ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (N, N)) .- one(T) / 2
             A = (A + A') / 2
             v = rand(T, (N,))
-            alg = Lanczos(; orth=orth, krylovdim=2 * n, maxiter=10,
-                          tol=precision(T), eager=true)
-            D1, V1, info1 = @constinferred eigsolve(wrapop(A), wrapvec(v), n, :SR, alg)
-            D2, V2, info2 = eigsolve(wrapop(A), wrapvec(v), n, :LR, alg)
+            alg = Lanczos(; krylovdim=2 * n, maxiter=10,
+                          tol=tolerance(T), eager=true)
+            D1, V1, info1 = @constinferred eigsolve(wrapop(A, Val(mode)),
+                                                    wrapvec(v, Val(mode)), n, :SR, alg)
+            D2, V2, info2 = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n, :LR,
+                                     alg)
 
             l1 = info1.converged
             l2 = info2.converged
@@ -47,73 +60,90 @@ end
             @test D1[1:l1] ≈ eigvals(A)[1:l1]
             @test D2[1:l2] ≈ eigvals(A)[N:-1:(N - l2 + 1)]
 
-            U1 = hcat(unwrapvec.(V1)...)
-            U2 = hcat(unwrapvec.(V2)...)
+            U1 = stack(unwrapvec, V1)
+            U2 = stack(unwrapvec, V2)
             @test U1' * U1 ≈ I
             @test U2' * U2 ≈ I
 
-            R1 = hcat(unwrapvec.(info1.residual)...)
-            R2 = hcat(unwrapvec.(info2.residual)...)
+            R1 = stack(unwrapvec, info1.residual)
+            R2 = stack(unwrapvec, info2.residual)
             @test A * U1 ≈ U1 * Diagonal(D1) + R1
             @test A * U2 ≈ U2 * Diagonal(D2) + R2
         end
     end
 end
 
-@testset "Arnoldi - eigsolve full" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "Arnoldi - eigsolve full ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (n, n)) .- one(T) / 2
             v = rand(T, (n,))
             n1 = div(n, 2)
-            D1, V1, info1 = eigsolve(wrapop(A), wrapvec(v), n1, :SR; orth=orth, krylovdim=n,
-                                     maxiter=1, tol=precision(T), verbosity=2)
-            @test KrylovKit.eigselector(A, eltype(v); orth=orth, krylovdim=n, maxiter=1,
-                                        tol=precision(T)) isa Arnoldi
+            D1, V1, info1 = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1, :SR;
+                                     orth=orth, krylovdim=n,
+                                     maxiter=1, tol=tolerance(T), verbosity=2)
+            @test KrylovKit.eigselector(wrapop(A, Val(mode)), eltype(v); orth=orth,
+                                        krylovdim=n, maxiter=1,
+                                        tol=tolerance(T)) isa Arnoldi
             n2 = n - n1
-            alg = Arnoldi(; orth=orth, krylovdim=2 * n, maxiter=1, tol=precision(T),
+            alg = Arnoldi(; orth=orth, krylovdim=2 * n, maxiter=1, tol=tolerance(T),
                           verbosity=1)
-            D2, V2, info2 = @constinferred eigsolve(wrapop(A), wrapvec(v), n2, :LR, alg)
+            D2, V2, info2 = @constinferred eigsolve(wrapop(A, Val(mode)),
+                                                    wrapvec(v, Val(mode)), n2, :LR, alg)
             D = sort(sort(eigvals(A); by=imag, rev=true); alg=MergeSort, by=real)
             D2′ = sort(sort(D2; by=imag, rev=true); alg=MergeSort, by=real)
             @test vcat(D1[1:n1], D2′[(end - n2 + 1):end]) ≈ D
 
-            U1 = hcat(unwrapvec.(V1)...)
-            U2 = hcat(unwrapvec.(V2)...)
+            U1 = stack(unwrapvec, V1)
+            U2 = stack(unwrapvec, V2)
             @test A * U1 ≈ U1 * Diagonal(D1)
             @test A * U2 ≈ U2 * Diagonal(D2)
 
             if T <: Complex
                 n1 = div(n, 2)
-                D1, V1, info = eigsolve(wrapop(A), wrapvec(v), n1, :SI, alg)
+                D1, V1, info = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1,
+                                        :SI,
+                                        alg)
                 n2 = n - n1
-                D2, V2, info = eigsolve(wrapop(A), wrapvec(v), n2, :LI, alg)
+                D2, V2, info = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n2,
+                                        :LI,
+                                        alg)
                 D = sort(eigvals(A); by=imag)
 
                 @test vcat(D1[1:n1], reverse(D2[1:n2])) ≊ D
 
-                U1 = hcat(unwrapvec.(V1)...)
-                U2 = hcat(unwrapvec.(V2)...)
+                U1 = stack(unwrapvec, V1)
+                U2 = stack(unwrapvec, V2)
                 @test A * U1 ≈ U1 * Diagonal(D1)
                 @test A * U2 ≈ U2 * Diagonal(D2)
             end
 
-            _ = eigsolve(wrapop(A), wrapvec(v), n + 1, :LM; orth=orth, krylovdim=2n,
-                         maxiter=1, tol=precision(T), verbosity=0)
+            _ = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n + 1, :LM; orth=orth,
+                         krylovdim=2n,
+                         maxiter=1, tol=tolerance(T), verbosity=0)
         end
     end
 end
 
-@testset "Arnoldi - eigsolve iteratively" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "Arnoldi - eigsolve iteratively ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (N, N)) .- one(T) / 2
             v = rand(T, (N,))
-            alg = Arnoldi(; orth=orth, krylovdim=3 * n, maxiter=20,
-                          tol=precision(T), eager=true)
-            D1, V1, info1 = @constinferred eigsolve(wrapop(A), wrapvec(v), n, :SR, alg)
-            D2, V2, info2 = eigsolve(wrapop(A), wrapvec(v), n, :LR, alg)
-            D3, V3, info3 = eigsolve(wrapop(A), wrapvec(v), n, :LM, alg)
+            alg = Arnoldi(; krylovdim=3 * n, maxiter=20,
+                          tol=tolerance(T), eager=true)
+            D1, V1, info1 = @constinferred eigsolve(wrapop(A, Val(mode)),
+                                                    wrapvec(v, Val(mode)), n, :SR, alg)
+            D2, V2, info2 = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n, :LR,
+                                     alg)
+            D3, V3, info3 = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n, :LM,
+                                     alg)
             D = sort(eigvals(A); by=imag, rev=true)
 
             l1 = info1.converged
@@ -128,19 +158,21 @@ end
             # in absolute value, so we perform a second sort afterwards using the real part
             @test D3[1:l3] ≊ sort(D; by=abs, rev=true)[1:l3]
 
-            U1 = hcat(unwrapvec.(V1)...)
-            U2 = hcat(unwrapvec.(V2)...)
-            U3 = hcat(unwrapvec.(V3)...)
-            R1 = hcat(unwrapvec.(info1.residual)...)
-            R2 = hcat(unwrapvec.(info2.residual)...)
-            R3 = hcat(unwrapvec.(info3.residual)...)
+            U1 = stack(unwrapvec, V1)
+            U2 = stack(unwrapvec, V2)
+            U3 = stack(unwrapvec, V3)
+            R1 = stack(unwrapvec, info1.residual)
+            R2 = stack(unwrapvec, info2.residual)
+            R3 = stack(unwrapvec, info3.residual)
             @test A * U1 ≈ U1 * Diagonal(D1) + R1
             @test A * U2 ≈ U2 * Diagonal(D2) + R2
             @test A * U3 ≈ U3 * Diagonal(D3) + R3
 
             if T <: Complex
-                D1, V1, info1 = eigsolve(wrapop(A), wrapvec(v), n, :SI, alg)
-                D2, V2, info2 = eigsolve(wrapop(A), wrapvec(v), n, :LI, alg)
+                D1, V1, info1 = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n,
+                                         :SI, alg)
+                D2, V2, info2 = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n,
+                                         :LI, alg)
                 D = eigvals(A)
 
                 l1 = info1.converged
@@ -150,10 +182,10 @@ end
                 @test D1[1:l1] ≈ sort(D; by=imag)[1:l1]
                 @test D2[1:l2] ≈ sort(D; by=imag, rev=true)[1:l2]
 
-                U1 = hcat(unwrapvec.(V1)...)
-                U2 = hcat(unwrapvec.(V2)...)
-                R1 = hcat(unwrapvec.(info1.residual)...)
-                R2 = hcat(unwrapvec.(info2.residual)...)
+                U1 = stack(unwrapvec, V1)
+                U2 = stack(unwrapvec, V2)
+                R1 = stack(unwrapvec, info1.residual)
+                R2 = stack(unwrapvec, info2.residual)
                 @test A * U1 ≈ U1 * Diagonal(D1) + R1
                 @test A * U2 ≈ U2 * Diagonal(D2) + R2
             end
diff --git a/test/expintegrator.jl b/test/expintegrator.jl
index 89ed3bb..fd48972 100644
--- a/test/expintegrator.jl
+++ b/test/expintegrator.jl
@@ -12,30 +12,36 @@ function ϕ(A, v, p)
     return exp(A′)[1:m, end]
 end
 
-@testset "Lanczos - expintegrator full" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "Lanczos - expintegrator full ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (n, n)) .- one(T) / 2
             A = (A + A') / 2
             V = one(A)
             W = zero(A)
-            alg = Lanczos(; orth=orth, krylovdim=n, maxiter=2, tol=precision(T),
+            alg = Lanczos(; orth=orth, krylovdim=n, maxiter=2, tol=tolerance(T),
                           verbosity=2)
             for k in 1:n
-                W[:, k] = unwrapvec(first(@constinferred exponentiate(wrapop(A), 1,
-                                                                      wrapvec(view(V, :, k)),
+                W[:, k] = unwrapvec(first(@constinferred exponentiate(wrapop(A, Val(mode)),
+                                                                      1,
+                                                                      wrapvec(view(V, :, k),
+                                                                              Val(mode)),
                                                                       alg)))
             end
             @test W ≈ exp(A)
 
             pmax = 5
-            alg = Lanczos(; orth=orth, krylovdim=n, maxiter=2, tol=precision(T),
+            alg = Lanczos(; orth=orth, krylovdim=n, maxiter=2, tol=tolerance(T),
                           verbosity=1)
             for t in (rand(real(T)), -rand(real(T)), im * randn(real(T)),
                       randn(real(T)) + im * randn(real(T)))
                 for p in 1:pmax
                     u = ntuple(i -> rand(T, n), p + 1)
-                    w, info = @constinferred expintegrator(wrapop(A), t, wrapvec.(u), alg)
+                    w, info = @constinferred expintegrator(wrapop(A, Val(mode)), t,
+                                                           wrapvec.(u, Ref(Val(mode))), alg)
                     w2 = exp(t * A) * u[1]
                     for j in 1:p
                         w2 .+= t^j * ϕ(t * A, u[j + 1], j)
@@ -48,29 +54,35 @@ end
     end
 end
 
-@testset "Arnoldi - expintegrator full" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "Arnoldi - expintegrator full ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (n, n)) .- one(T) / 2
             V = one(A)
             W = zero(A)
-            alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=2, tol=precision(T),
+            alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=2, tol=tolerance(T),
                           verbosity=2)
             for k in 1:n
-                W[:, k] = unwrapvec(first(@constinferred exponentiate(wrapop(A), 1,
-                                                                      wrapvec(view(V, :, k)),
+                W[:, k] = unwrapvec(first(@constinferred exponentiate(wrapop(A, Val(mode)),
+                                                                      1,
+                                                                      wrapvec(view(V, :, k),
+                                                                              Val(mode)),
                                                                       alg)))
             end
             @test W ≈ exp(A)
 
             pmax = 5
-            alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=2, tol=precision(T),
+            alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=2, tol=tolerance(T),
                           verbosity=1)
             for t in (rand(real(T)), -rand(real(T)), im * randn(real(T)),
                       randn(real(T)) + im * randn(real(T)))
                 for p in 1:pmax
                     u = ntuple(i -> rand(T, n), p + 1)
-                    w, info = @constinferred expintegrator(wrapop(A), t, wrapvec.(u), alg)
+                    w, info = @constinferred expintegrator(wrapop(A, Val(mode)), t,
+                                                           wrapvec.(u, Ref(Val(mode))), alg)
                     w2 = exp(t * A) * u[1]
                     for j in 1:p
                         w2 .+= t^j * ϕ(t * A, u[j + 1], j)
@@ -83,9 +95,13 @@ end
     end
 end
 
-@testset "Lanczos - expintegrator iteratively" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "Lanczos - expintegrator iteratively ($mode)" for mode in
+                                                           (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (N, N)) .- one(T) / 2
             A = (A + A') / 2
             s = norm(eigvals(A), 1)
@@ -95,7 +111,8 @@ end
                       randn(real(T)) + im * randn(real(T)))
                 for p in 1:pmax
                     u = ntuple(i -> rand(T, N), p + 1)
-                    w1, info = @constinferred expintegrator(wrapop(A), t, wrapvec.(u)...;
+                    w1, info = @constinferred expintegrator(wrapop(A, Val(mode)), t,
+                                                            wrapvec.(u, Ref(Val(mode)))...;
                                                             maxiter=100, krylovdim=n,
                                                             eager=true)
                     @assert info.converged > 0
@@ -104,19 +121,24 @@ end
                         w2 .+= t^j * ϕ(t * A, u[j + 1], j)
                     end
                     @test w2 ≈ unwrapvec(w1)
-                    w1, info = @constinferred expintegrator(wrapop(A), t, wrapvec.(u)...;
+                    w1, info = @constinferred expintegrator(wrapop(A, Val(mode)), t,
+                                                            wrapvec.(u, Ref(Val(mode)))...;
                                                             maxiter=100, krylovdim=n,
                                                             tol=1e-3, eager=true)
-                    @test norm(unwrapvec(w1) - w2) < 1e-2 * abs(t)
+                    @test unwrapvec(w1) ≈ w2 atol = 1e-2 * abs(t)
                 end
             end
         end
     end
 end
 
-@testset "Arnoldi - expintegrator iteratively" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "Arnoldi - expintegrator iteratively ($mode)" for mode in
+                                                           (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (N, N)) .- one(T) / 2
             s = norm(eigvals(A), 1)
             rmul!(A, 1 / (10 * s))
@@ -125,7 +147,8 @@ end
                       randn(real(T)) + im * randn(real(T)))
                 for p in 1:pmax
                     u = ntuple(i -> rand(T, N), p + 1)
-                    w1, info = @constinferred expintegrator(wrapop(A), t, wrapvec.(u)...;
+                    w1, info = @constinferred expintegrator(wrapop(A, Val(mode)), t,
+                                                            wrapvec.(u, Ref(Val(mode)))...;
                                                             maxiter=100, krylovdim=n,
                                                             eager=true)
                     @test info.converged > 0
@@ -134,10 +157,11 @@ end
                         w2 .+= t^j * ϕ(t * A, u[j + 1], j)
                     end
                     @test w2 ≈ unwrapvec(w1)
-                    w1, info = @constinferred expintegrator(wrapop(A), t, wrapvec.(u)...;
+                    w1, info = @constinferred expintegrator(wrapop(A, Val(mode)), t,
+                                                            wrapvec.(u, Ref(Val(mode)))...;
                                                             maxiter=100, krylovdim=n,
                                                             tol=1e-3, eager=true)
-                    @test norm(unwrapvec(w1) - w2) < 1e-2 * abs(t)
+                    @test unwrapvec(w1) ≈ w2 atol = 1e-2 * abs(t)
                 end
             end
         end
diff --git a/test/factorize.jl b/test/factorize.jl
index ea511ac..467b195 100644
--- a/test/factorize.jl
+++ b/test/factorize.jl
@@ -1,11 +1,15 @@
 # Test complete Lanczos factorization
-@testset "Complete Lanczos factorization" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr) # tests fail miserably for cgs and mgs
+@testset "Complete Lanczos factorization ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (cgs2,)
+
+    @testset for T in scalartypes
+        @testset for orth in orths # tests fail miserably for cgs and mgs
             A = rand(T, (n, n))
             v = rand(T, (n,))
             A = (A + A')
-            iter = LanczosIterator(wrapop(A), wrapvec(v), orth)
+            iter = LanczosIterator(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), orth)
             verbosity = 1
             fact = @constinferred initialize(iter; verbosity=verbosity)
             while length(fact) < n
@@ -13,7 +17,7 @@
                 verbosity = 0
             end
 
-            V = hcat(unwrapvec.(basis(fact))...)
+            V = stack(unwrapvec, basis(fact))
             H = rayleighquotient(fact)
             @test normres(fact) < 10 * n * eps(real(T))
             @test V' * V ≈ I
@@ -27,12 +31,16 @@
 end
 
 # Test complete Arnoldi factorization
-@testset "Complete Arnoldi factorization" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs, mgs, cgs2, mgs2, cgsr, mgsr)
+@testset "Complete Arnoldi factorization ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs, mgs, cgs2, mgs2, cgsr, mgsr) : (cgs2,)
+
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (n, n))
             v = rand(T, (n,))
-            iter = ArnoldiIterator(wrapop(A), wrapvec(v), orth)
+            iter = ArnoldiIterator(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), orth)
             verbosity = 1
             fact = @constinferred initialize(iter; verbosity=verbosity)
             while length(fact) < n
@@ -40,7 +48,7 @@ end
                 verbosity = 0
             end
 
-            V = hcat(unwrapvec.(basis(fact))...)
+            V = stack(unwrapvec, basis(fact))
             H = rayleighquotient(fact)
             factor = (orth == cgs || orth == mgs ? 250 : 10)
             @test normres(fact) < factor * n * eps(real(T))
@@ -55,10 +63,15 @@ end
 end
 
 # Test incomplete Lanczos factorization
-@testset "Incomplete Lanczos factorization" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64, Complex{Int})
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr) # tests fail miserably for cgs and mgs
-            if T == Complex{Int}
+@testset "Incomplete Lanczos factorization ($mode)" for mode in
+                                                        (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ?
+                  (Float32, Float64, ComplexF32, ComplexF64, Complex{Int}) : (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (cgs2,)
+
+    @testset for T in scalartypes
+        @testset for orth in orths # tests fail miserably for cgs and mgs
+            if T === Complex{Int}
                 A = rand(-100:100, (N, N)) + im * rand(-100:100, (N, N))
                 v = rand(-100:100, (N,))
             else
@@ -66,14 +79,15 @@ end
                 v = rand(T, (N,))
             end
             A = (A + A')
-            iter = @constinferred LanczosIterator(wrapop(A), wrapvec(v), orth)
+            iter = @constinferred LanczosIterator(wrapop(A, Val(mode)),
+                                                  wrapvec(v, Val(mode)),
+                                                  orth)
             krylovdim = n
             fact = @constinferred initialize(iter)
             while normres(fact) > eps(float(real(T))) && length(fact) < krylovdim
                 @constinferred expand!(iter, fact)
-
                 Ṽ, H, r̃, β, e = fact
-                V = hcat(unwrapvec.(Ṽ)...)
+                V = stack(unwrapvec, Ṽ)
                 r = unwrapvec(r̃)
                 @test V' * V ≈ I
                 @test norm(r) ≈ β
@@ -81,9 +95,9 @@ end
             end
 
             fact = @constinferred shrink!(fact, div(n, 2))
-            V = hcat(unwrapvec.(@constinferred basis(fact))...)
+            V = stack(unwrapvec, @constinferred basis(fact))
             H = @constinferred rayleighquotient(fact)
-            r = unwrapvec(@constinferred residual(fact))
+            r = @constinferred unwrapvec(residual(fact))
             β = @constinferred normres(fact)
             e = @constinferred rayleighextension(fact)
             @test V' * V ≈ I
@@ -94,24 +108,29 @@ end
 end
 
 # Test incomplete Arnoldi factorization
-@testset "Incomplete Arnoldi factorization" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64, Complex{Int})
-        @testset for orth in (cgs, mgs, cgs2, mgs2, cgsr, mgsr)
-            if T == Complex{Int}
+@testset "Incomplete Arnoldi factorization ($mode)" for mode in
+                                                        (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ?
+                  (Float32, Float64, ComplexF32, ComplexF64, Complex{Int}) : (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (cgs2,)
+
+    @testset for T in scalartypes
+        @testset for orth in orths
+            if T === Complex{Int}
                 A = rand(-100:100, (N, N)) + im * rand(-100:100, (N, N))
                 v = rand(-100:100, (N,))
             else
                 A = rand(T, (N, N))
                 v = rand(T, (N,))
             end
-            iter = @constinferred ArnoldiIterator(wrapop(A), wrapvec(v), orth)
+            iter = @constinferred ArnoldiIterator(wrapop(A, Val(mode)),
+                                                  wrapvec(v, Val(mode)), orth)
             krylovdim = 3 * n
             fact = @constinferred initialize(iter)
             while normres(fact) > eps(float(real(T))) && length(fact) < krylovdim
                 @constinferred expand!(iter, fact)
-
                 Ṽ, H, r̃, β, e = fact
-                V = hcat(unwrapvec.(Ṽ)...)
+                V = stack(unwrapvec, Ṽ)
                 r = unwrapvec(r̃)
                 @test V' * V ≈ I
                 @test norm(r) ≈ β
@@ -119,7 +138,7 @@ end
             end
 
             fact = @constinferred shrink!(fact, div(n, 2))
-            V = hcat(unwrapvec.(@constinferred basis(fact))...)
+            V = stack(unwrapvec, @constinferred basis(fact))
             H = @constinferred rayleighquotient(fact)
             r = unwrapvec(@constinferred residual(fact))
             β = @constinferred normres(fact)
diff --git a/test/geneigsolve.jl b/test/geneigsolve.jl
index 4dc8ba5..d154b81 100644
--- a/test/geneigsolve.jl
+++ b/test/geneigsolve.jl
@@ -1,30 +1,38 @@
 
-@testset "GolubYe - geneigsolve full" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "GolubYe - geneigsolve full ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (n, n)) .- one(T) / 2
             A = (A + A') / 2
             B = rand(T, (n, n)) .- one(T) / 2
             B = sqrt(B * B')
             v = rand(T, (n,))
-            alg = GolubYe(; orth=orth, krylovdim=n, maxiter=1, tol=precision(T),
+            alg = GolubYe(; orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T),
                           verbosity=1)
             n1 = div(n, 2)
-            D1, V1, info = @constinferred geneigsolve((wrapop(A), wrapop(B)), wrapvec(v),
+            D1, V1, info = @constinferred geneigsolve((wrapop(A, Val(mode)),
+                                                       wrapop(B, Val(mode))),
+                                                      wrapvec(v, Val(mode)),
                                                       n1, :SR; orth=orth, krylovdim=n,
-                                                      maxiter=1, tol=precision(T),
+                                                      maxiter=1, tol=tolerance(T),
                                                       ishermitian=true, isposdef=true,
                                                       verbosity=2)
-            @test KrylovKit.geneigselector((A, B), eltype(v); orth=orth, krylovdim=n,
-                                           maxiter=1, tol=precision(T), ishermitian=true,
+            @test KrylovKit.geneigselector((wrapop(A, Val(mode)), wrapop(B, Val(mode))),
+                                           scalartype(v); orth=orth, krylovdim=n,
+                                           maxiter=1, tol=tolerance(T), ishermitian=true,
                                            isposdef=true) isa GolubYe
             n2 = n - n1
-            D2, V2, info = @constinferred geneigsolve((wrapop(A), wrapop(B)), wrapvec(v),
+            D2, V2, info = @constinferred geneigsolve((wrapop(A, Val(mode)),
+                                                       wrapop(B, Val(mode))),
+                                                      wrapvec(v, Val(mode)),
                                                       n2, :LR, alg)
             @test vcat(D1[1:n1], reverse(D2[1:n2])) ≈ eigvals(A, B)
 
-            U1 = hcat(unwrapvec.(V1)...)
-            U2 = hcat(unwrapvec.(V2)...)
+            U1 = stack(unwrapvec, V1)
+            U2 = stack(unwrapvec, V2)
             @test U1' * B * U1 ≈ I
             @test U2' * B * U2 ≈ I
 
@@ -34,19 +42,26 @@
     end
 end
 
-@testset "GolubYe - geneigsolve iteratively" begin
-    @testset for T in (Float64, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "GolubYe - geneigsolve iteratively ($mode)" for mode in
+                                                         (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float64, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (N, N)) .- one(T) / 2
             A = (A + A') / 2
             B = rand(T, (N, N)) .- one(T) / 2
             B = sqrt(B * B')
             v = rand(T, (N,))
             alg = GolubYe(; orth=orth, krylovdim=3 * n, maxiter=100,
-                          tol=cond(B) * precision(T))
-            D1, V1, info1 = @constinferred geneigsolve((wrapop(A), wrapop(B)), wrapvec(v),
+                          tol=cond(B) * tolerance(T))
+            D1, V1, info1 = @constinferred geneigsolve((wrapop(A, Val(mode)),
+                                                        wrapop(B, Val(mode))),
+                                                       wrapvec(v, Val(mode)),
                                                        n, :SR, alg)
-            D2, V2, info2 = geneigsolve((wrapop(A), wrapop(B)), wrapvec(v), n, :LR, alg)
+            D2, V2, info2 = geneigsolve((wrapop(A, Val(mode)), wrapop(B, Val(mode))),
+                                        wrapvec(v, Val(mode)), n, :LR, alg)
 
             l1 = info1.converged
             l2 = info2.converged
@@ -55,13 +70,13 @@ end
             @test D1[1:l1] ≊ eigvals(A, B)[1:l1]
             @test D2[1:l2] ≊ eigvals(A, B)[N:-1:(N - l2 + 1)]
 
-            U1 = hcat(unwrapvec.(V1)...)
-            U2 = hcat(unwrapvec.(V2)...)
+            U1 = stack(unwrapvec, V1)
+            U2 = stack(unwrapvec, V2)
             @test U1' * B * U1 ≈ I
             @test U2' * B * U2 ≈ I
 
-            R1 = hcat(unwrapvec.(info1.residual)...)
-            R2 = hcat(unwrapvec.(info2.residual)...)
+            R1 = stack(unwrapvec, info1.residual)
+            R2 = stack(unwrapvec, info2.residual)
             @test A * U1 ≈ B * U1 * Diagonal(D1) + R1
             @test A * U2 ≈ B * U2 * Diagonal(D2) + R2
         end
diff --git a/test/gklfactorize.jl b/test/gklfactorize.jl
index dae772f..471e01b 100644
--- a/test/gklfactorize.jl
+++ b/test/gklfactorize.jl
@@ -1,10 +1,15 @@
 # Test complete Golub-Kahan-Lanczos factorization
-@testset "Complete Golub-Kahan-Lanczos factorization" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "Complete Golub-Kahan-Lanczos factorization ($mode)" for mode in
+                                                                  (:vector, :inplace,
+                                                                   :outplace, :mixed)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (n, n))
             v = A * rand(T, (n,)) # ensure v is in column space of A
-            iter = GKLIterator(wrapop(A), wrapvec2(v), orth)
+            iter = GKLIterator(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), orth)
             verbosity = 1
             fact = @constinferred initialize(iter; verbosity=verbosity)
             while length(fact) < n
@@ -12,8 +17,8 @@
                 verbosity = 0
             end
 
-            U = hcat(unwrapvec2.(basis(fact, :U))...)
-            V = hcat(unwrapvec.(basis(fact, :V))...)
+            U = stack(unwrapvec, basis(fact, :U))
+            V = stack(unwrapvec, basis(fact, :V))
             B = rayleighquotient(fact)
             @test normres(fact) < 10 * n * eps(real(T))
             @test U' * U ≈ I
@@ -29,9 +34,14 @@
 end
 
 # Test incomplete Golub-Kahan-Lanczos factorization
-@testset "Incomplete Golub-Kahan-Lanczos factorization" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64, Complex{Int})
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "Incomplete Golub-Kahan-Lanczos factorization ($mode)" for mode in
+                                                                    (:vector, :inplace,
+                                                                     :outplace, :mixed)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             if T == Complex{Int}
                 A = rand(-100:100, (N, N)) + im * rand(-100:100, (N, N))
                 v = rand(-100:100, (N,))
@@ -39,16 +49,16 @@ end
                 A = rand(T, (N, N))
                 v = rand(T, (N,))
             end
-            iter = @constinferred GKLIterator(wrapop(A), wrapvec2(v), orth)
+            iter = @constinferred GKLIterator(wrapop(A, Val(mode)), wrapvec(v, Val(mode)),
+                                              orth)
             krylovdim = 3 * n
             fact = @constinferred initialize(iter)
             while normres(fact) > eps(float(real(T))) && length(fact) < krylovdim
                 @constinferred expand!(iter, fact)
-
                 Ũ, Ṽ, B, r̃, β, e = fact
-                U = hcat(unwrapvec2.(Ũ)...)
-                V = hcat(unwrapvec.(Ṽ)...)
-                r = unwrapvec2(r̃)
+                U = stack(unwrapvec, Ũ)
+                V = stack(unwrapvec, Ṽ)
+                r = unwrapvec(r̃)
                 @test U' * U ≈ I
                 @test V' * V ≈ I
                 @test norm(r) ≈ β
@@ -57,10 +67,10 @@ end
             end
 
             fact = @constinferred shrink!(fact, div(n, 2))
-            U = hcat(unwrapvec2.(@constinferred basis(fact, :U))...)
-            V = hcat(unwrapvec.(@constinferred basis(fact, :V))...)
+            U = stack(unwrapvec, @constinferred basis(fact, :U))
+            V = stack(unwrapvec, @constinferred basis(fact, :V))
             B = @constinferred rayleighquotient(fact)
-            r = unwrapvec2(@constinferred residual(fact))
+            r = unwrapvec(@constinferred residual(fact))
             β = @constinferred normres(fact)
             e = @constinferred rayleighextension(fact)
             @test U' * U ≈ I
diff --git a/test/linsolve.jl b/test/linsolve.jl
index 7e20b86..2246011 100644
--- a/test/linsolve.jl
+++ b/test/linsolve.jl
@@ -1,107 +1,123 @@
 # Test CG complete
-@testset "CG small problem" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
+@testset "CG small problem ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    @testset for T in scalartypes
         A = rand(T, (n, n))
         A = sqrt(A * A')
         b = rand(T, n)
-        alg = CG(; maxiter=2n, tol=precision(T) * norm(b), verbosity=2) # because of loss of orthogonality, we choose maxiter = 2n
-        x, info = @constinferred linsolve(wrapop(A), wrapvec(b);
+        alg = CG(; maxiter=2n, tol=tolerance(T) * norm(b), verbosity=2) # because of loss of orthogonality, we choose maxiter = 2n
+        x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode));
                                           ishermitian=true, isposdef=true, maxiter=2n,
-                                          krylovdim=1, rtol=precision(T),
+                                          krylovdim=1, rtol=tolerance(T),
                                           verbosity=1)
         @test info.converged > 0
-        @test b ≈ A * unwrapvec(x)
-        x, info = @constinferred linsolve(wrapop(A), wrapvec(b), x;
+        @test unwrapvec(b) ≈ A * unwrapvec(x)
+        x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x;
                                           ishermitian=true, isposdef=true, maxiter=2n,
-                                          krylovdim=1, rtol=precision(T))
+                                          krylovdim=1, rtol=tolerance(T))
         @test info.numops == 1
 
         A = rand(T, (n, n))
         A = sqrt(A * A')
         α₀ = rand(real(T)) + 1
         α₁ = rand(real(T))
-        x, info = @constinferred linsolve(wrapop(A), wrapvec(b), wrapvec(zero(b)), alg, α₀,
-                                          α₁)
-        @test b ≈ (α₀ * I + α₁ * A) * unwrapvec(x)
+        x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)),
+                                          wrapvec(zerovector(b), Val(mode)), alg, α₀, α₁)
+        @test unwrapvec(b) ≈ (α₀ * I + α₁ * A) * unwrapvec(x)
         @test info.converged > 0
     end
 end
 
 # Test CG complete
-@testset "CG large problem" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
+@testset "CG large problem ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    @testset for T in scalartypes
         A = rand(T, (N, N))
         A = sqrt(sqrt(A * A')) / N
         b = rand(T, N)
-        x, info = @constinferred linsolve(wrapop(A), wrapvec(b);
+        x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode));
                                           isposdef=true, maxiter=1, krylovdim=N,
-                                          rtol=precision(T))
-        @test b ≈ A * unwrapvec(x) + unwrapvec(info.residual)
+                                          rtol=tolerance(T))
+        @test unwrapvec(b) ≈ A * unwrapvec(x) + unwrapvec(info.residual)
 
         α₀ = rand(real(T)) + 1
         α₁ = rand(real(T))
-        x, info = @constinferred linsolve(wrapop(A), wrapvec(b), α₀, α₁;
+        x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)),
+                                          α₀, α₁;
                                           isposdef=true, maxiter=1, krylovdim=N,
-                                          rtol=precision(T))
-        @test b ≈ (α₀ * I + α₁ * A) * unwrapvec(x) + unwrapvec(info.residual)
+                                          rtol=tolerance(T))
+        @test unwrapvec(b) ≈ (α₀ * I + α₁ * A) * unwrapvec(x) + unwrapvec(info.residual)
     end
 end
 
 # Test GMRES complete
-@testset "GMRES full factorization" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
+@testset "GMRES full factorization ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    @testset for T in scalartypes
         A = rand(T, (n, n)) .- one(T) / 2
         b = rand(T, n)
-        alg = GMRES(; krylovdim=n, maxiter=2, tol=precision(T) * norm(b), verbosity=2)
-        x, info = @constinferred linsolve(wrapop(A), wrapvec(b); krylovdim=n, maxiter=2,
-                                          rtol=precision(T), verbosity=1)
+        alg = GMRES(; krylovdim=n, maxiter=2, tol=tolerance(T) * norm(b), verbosity=2)
+        x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode));
+                                          krylovdim=n, maxiter=2,
+                                          rtol=tolerance(T), verbosity=1)
         @test info.converged > 0
-        @test b ≈ A * unwrapvec(x)
-        x, info = @constinferred linsolve(wrapop(A), wrapvec(b), x; krylovdim=n, maxiter=2,
-                                          rtol=precision(T))
+        @test unwrapvec(b) ≈ A * unwrapvec(x)
+        x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x;
+                                          krylovdim=n, maxiter=2,
+                                          rtol=tolerance(T))
         @test info.numops == 1
 
         A = rand(T, (n, n))
         α₀ = rand(T)
         α₁ = -rand(T)
-        x, info = @constinferred(linsolve(wrapop(A), wrapvec(b), wrapvec(zero(b)), alg, α₀,
-                                          α₁))
-        @test b ≈ (α₀ * I + α₁ * A) * unwrapvec(x)
+        x, info = @constinferred(linsolve(A, b, zerovector(b), alg, α₀, α₁))
+        @test unwrapvec(b) ≈ (α₀ * I + α₁ * A) * unwrapvec(x)
         @test info.converged > 0
     end
 end
 
 # Test GMRES with restart
-@testset "GMRES with restarts" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
+@testset "GMRES with restarts ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    @testset for T in scalartypes
         A = rand(T, (N, N)) .- one(T) / 2
         A = I - T(9 / 10) * A / maximum(abs, eigvals(A))
         b = rand(T, N)
-        x, info = @constinferred linsolve(wrapop(A), wrapvec(b); krylovdim=3 * n,
-                                          maxiter=50, rtol=precision(T))
-        @test b ≈ A * unwrapvec(x) + unwrapvec(info.residual)
+        x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode));
+                                          krylovdim=3 * n,
+                                          maxiter=50, rtol=tolerance(T))
+        @test unwrapvec(b) ≈ A * unwrapvec(x) + unwrapvec(info.residual)
 
         A = rand(T, (N, N)) .- one(T) / 2
         α₀ = maximum(abs, eigvals(A))
         α₁ = -rand(T)
         α₁ *= T(9) / T(10) / abs(α₁)
-        x, info = @constinferred linsolve(wrapop(A), wrapvec(b), α₀, α₁; krylovdim=3 * n,
-                                          maxiter=50, rtol=precision(T))
-        @test b ≈ (α₀ * I + α₁ * A) * unwrapvec(x) + unwrapvec(info.residual)
+        x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), α₀,
+                                          α₁; krylovdim=3 * n,
+                                          maxiter=50, rtol=tolerance(T))
+        @test unwrapvec(b) ≈ (α₀ * I + α₁ * A) * unwrapvec(x) + unwrapvec(info.residual)
     end
 end
 
 # Test BICGStab
-@testset "BiCGStab" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
+@testset "BiCGStab ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    @testset for T in scalartypes
         A = rand(T, (n, n)) .- one(T) / 2
         A = I - T(9 / 10) * A / maximum(abs, eigvals(A))
         b = rand(T, n)
-        alg = BiCGStab(; maxiter=4n, tol=precision(T) * norm(b), verbosity=2)
-        x, info = @constinferred linsolve(wrapop(A), wrapvec(b), wrapvec(zero(b)), alg)
+        alg = BiCGStab(; maxiter=4n, tol=tolerance(T) * norm(b), verbosity=2)
+        x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)),
+                                          wrapvec(zerovector(b), Val(mode)), alg)
         @test info.converged > 0
-        @test b ≈ A * unwrapvec(x)
-        x, info = @constinferred linsolve(wrapop(A), wrapvec(b), x, alg)
+        @test unwrapvec(b) ≈ A * unwrapvec(x)
+        x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x,
+                                          alg)
         @test info.numops == 1
 
         A = rand(T, (N, N)) .- one(T) / 2
@@ -109,13 +125,15 @@ end
         α₀ = maximum(abs, eigvals(A))
         α₁ = -rand(T)
         α₁ *= T(9) / T(10) / abs(α₁)
-        alg = BiCGStab(; maxiter=2, tol=precision(T) * norm(b), verbosity=1)
-        x, info = @constinferred linsolve(wrapop(A), wrapvec(b), wrapvec(zero(b)), alg, α₀,
+        alg = BiCGStab(; maxiter=2, tol=tolerance(T) * norm(b), verbosity=1)
+        x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)),
+                                          wrapvec(zerovector(b), Val(mode)), alg, α₀,
                                           α₁)
-        @test b ≈ (α₀ * I + α₁ * A) * unwrapvec(x) + unwrapvec(info.residual)
-        alg = BiCGStab(; maxiter=10 * N, tol=precision(T) * norm(b), verbosity=0)
-        x, info = @constinferred linsolve(wrapop(A), wrapvec(b), x, alg, α₀, α₁)
+        @test unwrapvec(b) ≈ (α₀ * I + α₁ * A) * unwrapvec(x) + unwrapvec(info.residual)
+        alg = BiCGStab(; maxiter=10 * N, tol=tolerance(T) * norm(b), verbosity=0)
+        x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x,
+                                          alg, α₀, α₁)
         @test info.converged > 0
-        @test b ≈ (α₀ * I + α₁ * A) * unwrapvec(x)
+        @test unwrapvec(b) ≈ (α₀ * I + α₁ * A) * unwrapvec(x)
     end
 end
diff --git a/test/minimalvec.jl b/test/minimalvec.jl
deleted file mode 100644
index be062f9..0000000
--- a/test/minimalvec.jl
+++ /dev/null
@@ -1,20 +0,0 @@
-struct MinimalVec{V<:AbstractVector}
-    vec::V
-end
-
-Base.getindex(v::MinimalVec) = v.vec # for convience, should not interfere
-
-# minimal interface according to docs
-Base.:*(a::Number, v::MinimalVec) = MinimalVec(a * v[])
-
-Base.similar(v::MinimalVec) = MinimalVec(similar(v[]))
-
-LinearAlgebra.axpy!(α, v::MinimalVec, w::MinimalVec) = (axpy!(α, v[], w[]); return w)
-function LinearAlgebra.axpby!(α, v::MinimalVec, β, w::MinimalVec)
-    (axpby!(α, v[], β, w[]); return w)
-end
-LinearAlgebra.rmul!(v::MinimalVec, α) = (rmul!(v[], α); return v)
-
-LinearAlgebra.mul!(w::MinimalVec, v::MinimalVec, α) = (mul!(w[], v[], α); return w)
-LinearAlgebra.dot(v::MinimalVec, w::MinimalVec) = dot(v[], w[])
-LinearAlgebra.norm(v::MinimalVec) = norm(v[])
diff --git a/test/recursivevec.jl b/test/recursivevec.jl
index 5b51b60..08d5a4c 100644
--- a/test/recursivevec.jl
+++ b/test/recursivevec.jl
@@ -3,8 +3,8 @@
         @testset for orth in (cgs2, mgs2, cgsr, mgsr)
             A = rand(T, (n, n))
             v = rand(T, (n,))
-            v2 = RecursiveVec((v, zero(v)))
-            alg = Lanczos(; orth=orth, krylovdim=2 * n, maxiter=1, tol=precision(T))
+            v2 = RecursiveVec(v, zero(v))
+            alg = Lanczos(; orth=orth, krylovdim=2 * n, maxiter=1, tol=tolerance(T))
             D, V, info = eigsolve(v2, n, :LR, alg) do x
                 x1, x2 = x
                 y1 = A * x2
@@ -27,8 +27,8 @@ end
             A = rand(T, (N, 2 * N))
             v = rand(T, (N,))
             w = rand(T, (2 * N,))
-            v2 = RecursiveVec((v, w))
-            alg = Lanczos(; orth=orth, krylovdim=n, maxiter=300, tol=precision(T))
+            v2 = RecursiveVec(v, w)
+            alg = Lanczos(; orth=orth, krylovdim=n, maxiter=300, tol=tolerance(T))
             n1 = div(n, 2)
             D, V, info = eigsolve(v2, n1, :LR, alg) do x
                 x1, x2 = x
diff --git a/test/runtests.jl b/test/runtests.jl
index 3f2d1c9..16a35c3 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,15 +1,16 @@
 using Random
 Random.seed!(76543210)
 
-module PureVecs
 using Test, TestExtras
 using LinearAlgebra
-using Random
 using KrylovKit
+using VectorInterface
 
-precision(T::Type{<:Number}) = eps(real(T))^(2 / 3)
-include("setcomparison.jl")
+include("testsetup.jl")
+using ..TestSetup
 
+# Parameters
+# ----------
 const n = 10
 const N = 100
 
@@ -21,143 +22,32 @@ const mgs2 = ModifiedGramSchmidt2()
 const cgsr = ClassicalGramSchmidtIR(η₀)
 const mgsr = ModifiedGramSchmidtIR(η₀)
 
-wrapvec(v) = v
-unwrapvec(v) = v
-wrapvec2(v) = v
-unwrapvec2(v) = v
-wrapop(A::AbstractMatrix) = A
-
+# Tests
+# -----
 t = time()
 include("factorize.jl")
 include("gklfactorize.jl")
-include("linsolve.jl")
-include("eigsolve.jl")
-include("schursolve.jl")
-include("geneigsolve.jl")
-include("svdsolve.jl")
-include("expintegrator.jl")
-t = time() - t
-println("Julia Vector type: tests finisthed in $t seconds")
-end
-
-module MinimalVecs
-using Test, TestExtras
-using LinearAlgebra
-using Random
-using KrylovKit
-
-precision(T::Type{<:Number}) = eps(real(T))^(2 / 3)
-include("setcomparison.jl")
-
-const n = 10
-const N = 100
-
-const η₀ = 0.75 # seems to be necessary to get sufficient convergence for GKL iteration with Float32 precision
-const cgs = ClassicalGramSchmidt()
-const mgs = ModifiedGramSchmidt()
-const cgs2 = ClassicalGramSchmidt2()
-const mgs2 = ModifiedGramSchmidt2()
-const cgsr = ClassicalGramSchmidtIR(η₀)
-const mgsr = ModifiedGramSchmidtIR(η₀)
 
-include("minimalvec.jl")
-
-wrapvec(v) = MinimalVec(v)
-unwrapvec(v::MinimalVec) = getindex(v)
-wrapvec2(v) = MinimalVec(v)
-unwrapvec2(v::MinimalVec) = getindex(v)
-wrapop(A::AbstractMatrix) = function (v, flag=Val(false))
-    if flag === Val(true)
-        return wrapvec(A' * unwrapvec2(v))
-    else
-        return wrapvec2(A * unwrapvec(v))
-    end
-end
-
-t = time()
-include("factorize.jl")
-include("gklfactorize.jl")
 include("linsolve.jl")
 include("eigsolve.jl")
 include("schursolve.jl")
 include("geneigsolve.jl")
 include("svdsolve.jl")
 include("expintegrator.jl")
-t = time() - t
-println("Minimal vector type: tests finisthed in $t seconds")
-end
-
-module MixedSVD
-using Test, TestExtras
-using LinearAlgebra
-using Random
-using KrylovKit
-
-precision(T::Type{<:Number}) = eps(real(T))^(2 / 3)
-include("setcomparison.jl")
-
-const n = 10
-const N = 100
-
-const η₀ = 0.75 # seems to be necessary to get sufficient convergence for GKL iteration with Float32 precision
-const cgs = ClassicalGramSchmidt()
-const mgs = ModifiedGramSchmidt()
-const cgs2 = ClassicalGramSchmidt2()
-const mgs2 = ModifiedGramSchmidt2()
-const cgsr = ClassicalGramSchmidtIR(η₀)
-const mgsr = ModifiedGramSchmidtIR(η₀)
-
-include("minimalvec.jl")
-
-wrapvec(v) = MinimalVec(v)
-unwrapvec(v::MinimalVec) = getindex(v)
-wrapvec2(v) = reshape(v, (length(v), 1)) # vector type 2 is a n x 1 Matrix
-unwrapvec2(v) = reshape(v, (length(v),))
-function wrapop(A::AbstractMatrix)
-    return (x -> wrapvec2(A * unwrapvec(x)), y -> wrapvec(A' * unwrapvec2(y)))
-end
-
-t = time()
-include("gklfactorize.jl")
-include("svdsolve.jl")
-t = time() - t
-println("Mixed vector type for GKL/SVD: tests finisthed in $t seconds")
-end
-
-module ExtrasTest
-using Test, TestExtras
-using LinearAlgebra
-using Random
-using KrylovKit
-
-precision(T::Type{<:Number}) = eps(real(T))^(2 / 3)
-include("setcomparison.jl")
-
-const n = 10
-const N = 100
-
-const η₀ = 0.75 # seems to be necessary to get sufficient convergence for GKL iteration with Float32 precision
-const cgs = ClassicalGramSchmidt()
-const mgs = ModifiedGramSchmidt()
-const cgs2 = ClassicalGramSchmidt2()
-const mgs2 = ModifiedGramSchmidt2()
-const cgsr = ClassicalGramSchmidtIR(η₀)
-const mgsr = ModifiedGramSchmidtIR(η₀)
 
 include("linalg.jl")
 include("recursivevec.jl")
-end
 
-if VERSION >= v"1.6"
-    include("ad.jl")
-end
+include("ad.jl")
+
+t = time() - t
+println("Tests finished in $t seconds")
 
 module AquaTests
 using KrylovKit
 using Aqua
 Aqua.test_all(KrylovKit; ambiguities=false)
 # treat ambiguities special because of ambiguities between ChainRulesCore and Base
-if VERSION >= v"1.6" # ChainRulesCore leads to more ambiguities on julia < v1.6
-    Aqua.test_ambiguities([KrylovKit, Base, Core]; exclude=[Base.:(==)])
-end
+Aqua.test_ambiguities([KrylovKit, Base, Core]; exclude=[Base.:(==)])
+
 end
diff --git a/test/schursolve.jl b/test/schursolve.jl
index 316a3d0..aebd418 100644
--- a/test/schursolve.jl
+++ b/test/schursolve.jl
@@ -1,21 +1,26 @@
-@testset "Arnoldi - schursolve full" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "Arnoldi - schursolve full ($mode)" for mode in (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (n, n)) .- one(T) / 2
             v = rand(T, (n,))
-            alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=1, tol=precision(T))
+            alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T))
             n1 = div(n, 2)
-            T1, V1, D1, info1 = @constinferred schursolve(wrapop(A), wrapvec(v), n1, :SR,
+            T1, V1, D1, info1 = @constinferred schursolve(wrapop(A, Val(mode)),
+                                                          wrapvec(v, Val(mode)), n1, :SR,
                                                           alg)
             n2 = n - n1
-            T2, V2, D2, info2 = schursolve(wrapop(A), wrapvec(v), n2, :LR, alg)
+            T2, V2, D2, info2 = schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n2,
+                                           :LR, alg)
             D = sort(sort(eigvals(A); by=imag, rev=true); alg=MergeSort, by=real)
             D2′ = sort(sort(D2; by=imag, rev=true); alg=MergeSort, by=real)
 
             @test vcat(D1[1:n1], D2′[(end - n2 + 1):end]) ≈ D
 
-            U1 = hcat(unwrapvec.(V1)...)
-            U2 = hcat(unwrapvec.(V2)...)
+            U1 = stack(unwrapvec, V1)
+            U2 = stack(unwrapvec, V2)
             @test U1' * U1 ≈ I
             @test U2' * U2 ≈ I
 
@@ -23,16 +28,16 @@
             @test A * U2 ≈ U2 * T2
 
             if T <: Complex
-                n1 = div(n, 2)
-                T1, V1, D1, info = schursolve(wrapop(A), wrapvec(v), n1, :SI, alg)
-                n2 = n - n1
-                T2, V2, D2, info = schursolve(wrapop(A), wrapvec(v), n2, :LI, alg)
+                T1, V1, D1, info = schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)),
+                                              n1, :SI, alg)
+                T2, V2, D2, info = schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)),
+                                              n2, :LI, alg)
                 D = sort(eigvals(A); by=imag)
 
                 @test vcat(D1[1:n1], reverse(D2[1:n2])) ≈ D
 
-                U1 = hcat(unwrapvec.(V1)...)
-                U2 = hcat(unwrapvec.(V2)...)
+                U1 = stack(unwrapvec, V1)
+                U2 = stack(unwrapvec, V2)
                 @test U1' * U1 ≈ I
                 @test U2' * U2 ≈ I
 
@@ -43,16 +48,23 @@
     end
 end
 
-@testset "Arnoldi - schursolve iteratively" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "Arnoldi - schursolve iteratively ($mode)" for mode in
+                                                        (:vector, :inplace, :outplace)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (N, N)) .- one(T) / 2
             v = rand(T, (N,))
-            alg = Arnoldi(; orth=orth, krylovdim=3 * n, maxiter=10, tol=precision(T))
-            T1, V1, D1, info1 = @constinferred schursolve(wrapop(A), wrapvec(v), n, :SR,
+            alg = Arnoldi(; orth=orth, krylovdim=3 * n, maxiter=10, tol=tolerance(T))
+            T1, V1, D1, info1 = @constinferred schursolve(wrapop(A, Val(mode)),
+                                                          wrapvec(v, Val(mode)), n, :SR,
                                                           alg)
-            T2, V2, D2, info2 = schursolve(wrapop(A), wrapvec(v), n, :LR, alg)
-            T3, V3, D3, info3 = schursolve(wrapop(A), wrapvec(v), n, :LM, alg)
+            T2, V2, D2, info2 = schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n,
+                                           :LR, alg)
+            T3, V3, D3, info3 = schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n,
+                                           :LM, alg)
             D = sort(eigvals(A); by=imag, rev=true)
 
             l1 = info1.converged
@@ -62,23 +74,25 @@ end
             @test D2[1:l2] ≊ sort(D; alg=MergeSort, by=real, rev=true)[1:l2]
             @test D3[1:l3] ≊ sort(D; alg=MergeSort, by=abs, rev=true)[1:l3]
 
-            U1 = hcat(unwrapvec.(V1)...)
-            U2 = hcat(unwrapvec.(V2)...)
-            U3 = hcat(unwrapvec.(V3)...)
+            U1 = stack(unwrapvec, V1)
+            U2 = stack(unwrapvec, V2)
+            U3 = stack(unwrapvec, V3)
             @test U1' * U1 ≈ one(U1' * U1)
             @test U2' * U2 ≈ one(U2' * U2)
             @test U3' * U3 ≈ one(U3' * U3)
 
-            R1 = hcat(unwrapvec.(info1.residual)...)
-            R2 = hcat(unwrapvec.(info2.residual)...)
-            R3 = hcat(unwrapvec.(info3.residual)...)
+            R1 = stack(unwrapvec, info1.residual)
+            R2 = stack(unwrapvec, info2.residual)
+            R3 = stack(unwrapvec, info3.residual)
             @test A * U1 ≈ U1 * T1 + R1
             @test A * U2 ≈ U2 * T2 + R2
             @test A * U3 ≈ U3 * T3 + R3
 
             if T <: Complex
-                T1, V1, D1, info1 = schursolve(wrapop(A), wrapvec(v), n, :SI, alg)
-                T2, V2, D2, info2 = schursolve(wrapop(A), wrapvec(v), n, :LI, alg)
+                T1, V1, D1, info1 = schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)),
+                                               n, :SI, alg)
+                T2, V2, D2, info2 = schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)),
+                                               n, :LI, alg)
                 D = eigvals(A)
 
                 l1 = info1.converged
@@ -86,13 +100,13 @@ end
                 @test D1[1:l1] ≊ sort(D; by=imag)[1:l1]
                 @test D2[1:l2] ≊ sort(D; by=imag, rev=true)[1:l2]
 
-                U1 = hcat(unwrapvec.(V1)...)
-                U2 = hcat(unwrapvec.(V2)...)
+                U1 = stack(unwrapvec, V1)
+                U2 = stack(unwrapvec, V2)
                 @test U1[:, 1:l1]' * U1[:, 1:l1] ≈ I
                 @test U2[:, 1:l2]' * U2[:, 1:l2] ≈ I
 
-                R1 = hcat(unwrapvec.(info1.residual)...)
-                R2 = hcat(unwrapvec.(info2.residual)...)
+                R1 = stack(unwrapvec, info1.residual)
+                R2 = stack(unwrapvec, info2.residual)
                 @test A * U1 ≈ U1 * T1 + R1
                 @test A * U2 ≈ U2 * T2 + R2
             end
diff --git a/test/setcomparison.jl b/test/setcomparison.jl
deleted file mode 100644
index d46170d..0000000
--- a/test/setcomparison.jl
+++ /dev/null
@@ -1,13 +0,0 @@
-# the following definition is used to compare sets of eigenvalues
-function ≊(list1::AbstractVector, list2::AbstractVector)
-    length(list1) == length(list2) || return false
-    n = length(list1)
-    ind2 = collect(1:n)
-    p = sizehint!(Int[], n)
-    for i in 1:n
-        j = argmin(abs.(view(list2, ind2) .- list1[i]))
-        p = push!(p, ind2[j])
-        ind2 = deleteat!(ind2, j)
-    end
-    return list1 ≈ view(list2, p)
-end
diff --git a/test/svdsolve.jl b/test/svdsolve.jl
index 89cf815..bad5157 100644
--- a/test/svdsolve.jl
+++ b/test/svdsolve.jl
@@ -1,15 +1,19 @@
-@testset "GKL - svdsolve full" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "GKL - svdsolve full ($mode)" for mode in (:vector, :inplace, :outplace, :mixed)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (n, n))
-            alg = GKL(; orth=orth, krylovdim=2 * n, maxiter=1, tol=precision(T))
-            S, lvecs, rvecs, info = @constinferred svdsolve(wrapop(A), wrapvec2(A[:, 1]), n,
+            alg = GKL(; orth=orth, krylovdim=2 * n, maxiter=1, tol=tolerance(T))
+            S, lvecs, rvecs, info = @constinferred svdsolve(wrapop(A, Val(mode)),
+                                                            wrapvec(A[:, 1], Val(mode)), n,
                                                             :LR, alg)
 
             @test S ≈ svdvals(A)
 
-            U = hcat(unwrapvec2.(lvecs)...)
-            V = hcat(unwrapvec.(rvecs)...)
+            U = stack(unwrapvec, lvecs)
+            V = stack(unwrapvec, rvecs)
             @test U' * U ≈ I
             @test V' * V ≈ I
             @test A * V ≈ U * Diagonal(S)
@@ -17,25 +21,30 @@
     end
 end
 
-@testset "GKL - svdsolve iteratively" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        @testset for orth in (cgs2, mgs2, cgsr, mgsr)
+@testset "GKL - svdsolve iteratively ($mode)" for mode in
+                                                  (:vector, :inplace, :outplace, :mixed)
+    scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) :
+                  (ComplexF64,)
+    orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,)
+    @testset for T in scalartypes
+        @testset for orth in orths
             A = rand(T, (2 * N, N))
             v = rand(T, (2 * N,))
             n₁ = div(n, 2)
-            alg = GKL(; orth=orth, krylovdim=n, maxiter=10, tol=precision(T), eager=true)
-            S, lvecs, rvecs, info = @constinferred svdsolve(wrapop(A), wrapvec2(v), n₁, :LR,
-                                                            alg)
+            alg = GKL(; orth=orth, krylovdim=n, maxiter=10, tol=tolerance(T), eager=true)
+            S, lvecs, rvecs, info = @constinferred svdsolve(wrapop(A, Val(mode)),
+                                                            wrapvec(v, Val(mode)),
+                                                            n₁, :LR, alg)
 
             l = info.converged
             @test S[1:l] ≈ svdvals(A)[1:l]
 
-            U = hcat(unwrapvec2.(lvecs)...)
-            V = hcat(unwrapvec.(rvecs)...)
+            U = stack(unwrapvec, lvecs)
+            V = stack(unwrapvec, rvecs)
             @test U[:, 1:l]' * U[:, 1:l] ≈ I
             @test V[:, 1:l]' * V[:, 1:l] ≈ I
 
-            R = hcat(unwrapvec2.(info.residual)...)
+            R = stack(unwrapvec, info.residual)
             @test A' * U ≈ V * Diagonal(S)
             @test A * V ≈ U * Diagonal(S) + R
         end
diff --git a/test/testsetup.jl b/test/testsetup.jl
new file mode 100644
index 0000000..8d4bb91
--- /dev/null
+++ b/test/testsetup.jl
@@ -0,0 +1,134 @@
+module TestSetup
+
+export tolerance, ≊, MinimalVec, isinplace, stack
+export wrapop, wrapvec, unwrapvec
+
+import VectorInterface as VI
+using VectorInterface
+using LinearAlgebra: LinearAlgebra
+
+# Utility functions
+# -----------------
+"function for determining the precision of a type"
+tolerance(T::Type{<:Number}) = eps(real(T))^(2 / 3)
+
+"function for comparing sets of eigenvalues"
+function ≊(list1::AbstractVector, list2::AbstractVector)
+    length(list1) == length(list2) || return false
+    n = length(list1)
+    ind2 = collect(1:n)
+    p = sizehint!(Int[], n)
+    for i in 1:n
+        j = argmin(abs.(view(list2, ind2) .- list1[i]))
+        p = push!(p, ind2[j])
+        ind2 = deleteat!(ind2, j)
+    end
+    return list1 ≈ view(list2, p)
+end
+
+# Minimal vector type
+# -------------------
+"""
+    MinimalVec{T<:Number,IP}
+
+Minimal interface for a vector. Can support either in-place assignments or not, depending on
+`IP=true` or `IP=false`.
+"""
+struct MinimalVec{IP,V<:AbstractVector}
+    vec::V
+    function MinimalVec{IP}(vec::V) where {IP,V}
+        return new{IP,V}(vec)
+    end
+end
+const InplaceVec{V} = MinimalVec{true,V}
+const OutplaceVec{V} = MinimalVec{false,V}
+
+isinplace(::Type{MinimalVec{IP,V}}) where {V,IP} = IP
+isinplace(v::MinimalVec) = isinplace(typeof(v))
+
+VI.scalartype(::Type{<:MinimalVec{IP,V}}) where {IP,V} = scalartype(V)
+
+function VI.zerovector(v::MinimalVec, S::Type{<:Number})
+    return MinimalVec{isinplace(v)}(zerovector(v.vec, S))
+end
+function VI.zerovector!(v::InplaceVec{V}) where {V}
+    zerovector!(v.vec)
+    return v
+end
+VI.zerovector!!(v::MinimalVec) = isinplace(v) ? zerovector!(v) : zerovector(v)
+
+function VI.scale(v::MinimalVec, α::Number)
+    return MinimalVec{isinplace(v)}(scale(v.vec, α))
+end
+function VI.scale!(v::InplaceVec{V}, α::Number) where {V}
+    scale!(v.vec, α)
+    return v
+end
+function VI.scale!!(v::MinimalVec, α::Number)
+    return isinplace(v) ? scale!(v, α) : scale(v, α)
+end
+function VI.scale!(w::InplaceVec{V}, v::InplaceVec{W}, α::Number) where {V,W}
+    scale!(w.vec, v.vec, α)
+    return w
+end
+function VI.scale!!(w::MinimalVec, v::MinimalVec, α::Number)
+    isinplace(w) && return scale!(w, v, α)
+    return MinimalVec{false}(scale!!(copy(w.vec), v.vec, α))
+end
+
+function VI.add(y::MinimalVec, x::MinimalVec, α::Number, β::Number)
+    return MinimalVec{isinplace(y)}(add(y.vec, x.vec, α, β))
+end
+function VI.add!(y::InplaceVec{W}, x::InplaceVec{V}, α::Number, β::Number) where {W,V}
+    add!(y.vec, x.vec, α, β)
+    return y
+end
+function VI.add!!(y::MinimalVec, x::MinimalVec, α::Number, β::Number)
+    return isinplace(y) ? add!(y, x, α, β) : add(y, x, α, β)
+end
+
+VI.inner(x::MinimalVec, y::MinimalVec) = inner(x.vec, y.vec)
+VI.norm(x::MinimalVec) = LinearAlgebra.norm(x.vec)
+
+# Wrappers
+# --------
+# dispatch on val is necessary for type stability
+
+function wrapvec(v, ::Val{mode}) where {mode}
+    return mode === :vector ? v :
+           mode === :inplace ? MinimalVec{true}(v) :
+           mode === :outplace ? MinimalVec{false}(v) :
+           mode === :mixed ? MinimalVec{false}(v) :
+           throw(ArgumentError("invalid mode ($mode)"))
+end
+function wrapvec2(v, ::Val{mode}) where {mode}
+    return mode === :mixed ? MinimalVec{true}(v) : wrapvec(v, mode)
+end
+
+unwrapvec(v::MinimalVec) = v.vec
+unwrapvec(v) = v
+
+function wrapop(A, ::Val{mode}) where {mode}
+    if mode === :vector
+        return A
+    elseif mode === :inplace || mode === :outplace
+        return function (v, flag=Val(false))
+            if flag === Val(true)
+                return wrapvec(A' * unwrapvec(v), Val(mode))
+            else
+                return wrapvec(A * unwrapvec(v), Val(mode))
+            end
+        end
+    elseif mode === :mixed
+        return (x -> wrapvec(A * unwrapvec(x), Val(mode)),
+                y -> wrapvec2(A' * unwrapvec(y), Val(mode)))
+    else
+        throw(ArgumentError("invalid mode ($mode)"))
+    end
+end
+
+if VERSION < v"1.9"
+    stack(f, itr) = mapreduce(f, hcat, itr)
+end
+
+end