diff --git a/base/array.jl b/base/array.jl
index 32c543ff12638..bec9e91580d50 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -637,7 +637,7 @@ function _collect(::Type{T}, itr, isz::SizeUnknown) where T
 end
 
 # make a collection similar to `c` and appropriate for collecting `itr`
-_similar_for(c, ::Type{T}, itr, isz, shp) where {T} = similar(c, T)
+_similar_for(c, ::Type{T}, itr, isz, shp) where {T} = similar(c, T, shp)
 
 _similar_shape(itr, ::SizeUnknown) = nothing
 _similar_shape(itr, ::HasLength) = length(itr)::Integer
@@ -699,6 +699,12 @@ collect(A::AbstractArray) = _collect_indices(axes(A), A)
 
 collect_similar(cont, itr) = _collect(cont, itr, IteratorEltype(itr), IteratorSize(itr))
 
+struct _Allocator{T}
+    f::T
+end
+similar(f::_Allocator, ::Type{T}, dims) where {T} = f.f(T, dims)
+collect_allocator(f::F, itr) where {F} = _collect(_Allocator(f), itr, IteratorEltype(itr), IteratorSize(itr))
+
 _collect(cont, itr, ::HasEltype, isz::Union{HasLength,HasShape}) =
     copyto!(_similar_for(cont, eltype(itr), itr, isz, _similar_shape(itr, isz)), itr)
 
diff --git a/base/broadcast.jl b/base/broadcast.jl
index 57eac7f3a094c..bf6a235b7229f 100644
--- a/base/broadcast.jl
+++ b/base/broadcast.jl
@@ -234,6 +234,9 @@ Base.similar(::Broadcasted{ArrayConflict}, ::Type{ElType}, dims) where ElType =
     similar(Array{ElType, length(dims)}, dims)
 Base.similar(::Broadcasted{ArrayConflict}, ::Type{Bool}, dims) =
     similar(BitArray, dims)
+# As well as the default behavior
+Base.similar(::Broadcasted, ::Type{ElType}, dims) where ElType =
+    similar(Array{ElType, length(dims)}, dims)
 
 @inline Base.axes(bc::Broadcasted) = _axes(bc, bc.axes)
 _axes(::Broadcasted, axes::Tuple) = axes
diff --git a/base/deprecated.jl b/base/deprecated.jl
index f88a53526aa37..50e62dfdb8f04 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -531,4 +531,8 @@ end
 
 @deprecate stat(fd::Integer) stat(RawFD(fd))
 
+function reducedim_initarray end
+const _dep_message_reducedim_initarray = ", these internals have been removed. To customize the array returned by dimensional reductions, implement mapreduce_similar instead"
+deprecate(Base, :reducedim_initarray)
+
 # END 1.12 deprecations
diff --git a/base/fastmath.jl b/base/fastmath.jl
index b82d613f1fc76..b757f16809f4d 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -403,11 +403,6 @@ minimum_fast(a; kw...) = Base.reduce(min_fast, a; kw...)
 maximum_fast(f, a; kw...) = Base.mapreduce(f, max_fast, a; kw...)
 minimum_fast(f, a; kw...) = Base.mapreduce(f, min_fast, a; kw...)
 
-Base.reducedim_init(f, ::typeof(max_fast), A::AbstractArray, region) =
-    Base.reducedim_init(f, max, A::AbstractArray, region)
-Base.reducedim_init(f, ::typeof(min_fast), A::AbstractArray, region) =
-    Base.reducedim_init(f, min, A::AbstractArray, region)
-
 maximum!_fast(r::AbstractArray, A::AbstractArray; kw...) =
     maximum!_fast(identity, r, A; kw...)
 minimum!_fast(r::AbstractArray, A::AbstractArray; kw...) =
diff --git a/base/reduce.jl b/base/reduce.jl
index bbfd66e5686ed..c9a7744a2755a 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -245,8 +245,7 @@ foldr(op, itr; kw...) = mapfoldr(identity, op, itr; kw...)
 @noinline function mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted,
                                   ifirst::Integer, ilast::Integer, blksize::Int)
     if ifirst == ilast
-        @inbounds a1 = A[ifirst]
-        return mapreduce_first(f, op, a1)
+        throw(AssertionError("mapreduce_impl must not be called with only one element"))
     elseif ilast - ifirst < blksize
         # sequential portion
         @inbounds a1 = A[ifirst]
@@ -348,6 +347,7 @@ reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:BitUnsignedSmall} = one(UI
 
 reduce_empty(op::BottomRF, ::Type{T}) where {T} = reduce_empty(op.rf, T)
 reduce_empty(op::MappingRF, ::Type{T}) where {T} = mapreduce_empty(op.f, op.rf, T)
+reduce_empty(op::MappingRF{<:Any,<:BottomRF}, ::Type{T}) where {T} = mapreduce_empty(op.f, op.rf.rf, T)
 reduce_empty(op::FilteringRF, ::Type{T}) where {T} = reduce_empty(op.rf, T)
 reduce_empty(op::FlipArgs, ::Type{T}) where {T} = reduce_empty(op.f, T)
 
@@ -1335,7 +1335,9 @@ end
 
 ## count
 
-_bool(f) = x->f(x)::Bool
+_assert_bool(x) = x::Bool
+_bool(f) = _assert_bool ∘ f
+mapreduce_empty(::ComposedFunction{typeof(_assert_bool), <:Any}, ::typeof(add_sum), itr) = false+false
 
 """
     count([f=identity,] itr; init=0) -> Integer
diff --git a/base/reducedim.jl b/base/reducedim.jl
index 4ab786804ff4c..b3379835167c3 100644
--- a/base/reducedim.jl
+++ b/base/reducedim.jl
@@ -2,7 +2,7 @@
 
 ## Functions to compute the reduced shape
 
-# for reductions that expand 0 dims to 1
+# for reductions that don't reduce over a zero-length dimension
 reduced_index(i::OneTo{T}) where {T} = OneTo(one(T))
 reduced_index(i::Union{Slice, IdentityUnitRange}) = oftype(i, first(i):first(i))
 reduced_index(i::AbstractUnitRange) =
@@ -13,9 +13,19 @@ reduced_index for this index type or report this as an issue.
 """
     ))
 reduced_indices(a::AbstractArrayOrBroadcasted, region) = reduced_indices(axes(a), region)
+reduced_indices_array(a::AbstractArrayOrBroadcasted, r::AbstractArray) = reduced_indices_axes(axes(a), axes(r), a, r)
 
-# for reductions that keep 0 dims as 0
+# for reductions that keep 0-length reduced dims as 0-length
 reduced_indices0(a::AbstractArray, region) = reduced_indices0(axes(a), region)
+reduced_indices0(a::AbstractArray, r::AbstractArray) = reduced_indices(a, r)
+
+function reduced_indices_axes(axs::Tuple, raxs::Tuple, a, r)
+    length(raxs[1]) in (1,length(axs[1])) || throw(DimensionMismatch("cannot reduce $(size(a)) array into an array of $(size(r))"))
+    (raxs[1], reduced_indices_axes(tail(axs), tail(raxs), a, r)...)
+end
+reduced_indices_axes(axs::Tuple{}, raxs::Tuple{}, a, r) = ()
+reduced_indices_axes(axs::Tuple{}, raxs::Tuple, a, r) = throw(DimensionMismatch("cannot reduce $(size(a)) array into an array of $(size(r))"))
+reduced_indices_axes(axs::Tuple, raxs::Tuple{}, a, r) = (reduced_index(axs[1]), reduced_indices_axes(tail(axs), (), a, r)...)
 
 function reduced_indices(axs::Indices{N}, region) where N
     _check_valid_region(region)
@@ -27,244 +37,73 @@ function reduced_indices0(axs::Indices{N}, region) where N
     ntuple(d -> d in region && !isempty(axs[d]) ? reduced_index(axs[d]) : axs[d], Val(N))
 end
 
-function _check_valid_region(region)
-    for d in region
-        isa(d, Integer) || throw(ArgumentError("reduced dimension(s) must be integers"))
-        Int(d) < 1 && throw(ArgumentError("region dimension(s) must be ≥ 1, got $d"))
-    end
+# The inverse of reduced_indices
+inner_indices(a::AbstractArrayOrBroadcasted, region) = inner_indices(axes(a), region)
+function inner_indices(axs::Indices{N}, region) where N
+    ntuple(d -> d in region ? axs[d] : reduced_index(axs[d]), Val(N))
 end
 
-###### Generic reduction functions #####
+# Given an outer and an inner cartesian index, merge them depending on the dims
+sliceall(x) = @inbounds x[begin:end]
+sliceat(x, y) = @inbounds x[y:y]
 
-## initialization
-# initarray! is only called by sum!, prod!, etc.
-for (Op, initfun) in ((:(typeof(add_sum)), :zero), (:(typeof(mul_prod)), :one))
-    @eval initarray!(a::AbstractArray{T}, ::Any, ::$(Op), init::Bool, src::AbstractArray) where {T} = (init && fill!(a, $(initfun)(T)); a)
-end
+mergeindices(b, x::CartesianIndices, y::CartesianIndices) = CartesianIndices(map((b,x,y)->sliceall(ifelse(b, x, y)), b, x.indices, y.indices))
+mergeindices(b, x::CartesianIndex, y::CartesianIndices) = mergeindices(b, CartesianIndices(map(sliceat, y.indices, x.I)), CartesianIndices(map(sliceall, y.indices)))
+mergeindices(b, x::CartesianIndex, y::CartesianIndex) = CartesianIndex(map((b,x,y)->ifelse(b, x, y), b, x.I, y.I))
 
-initarray!(a::AbstractArray{T}, f, ::Union{typeof(min),typeof(max),typeof(_extrema_rf)},
-    init::Bool, src::AbstractArray) where {T} = (init && mapfirst!(f, a, src); a)
-
-for (Op, initval) in ((:(typeof(&)), true), (:(typeof(|)), false))
-    @eval initarray!(a::AbstractArray, ::Any, ::$(Op), init::Bool, src::AbstractArray) = (init && fill!(a, $initval); a)
-end
-
-# reducedim_initarray is called by
-reducedim_initarray(A::AbstractArrayOrBroadcasted, region, init, ::Type{R}) where {R} = fill!(similar(A,R,reduced_indices(A,region)), init)
-reducedim_initarray(A::AbstractArrayOrBroadcasted, region, init::T) where {T} = reducedim_initarray(A, region, init, T)
-
-# TODO: better way to handle reducedim initialization
-#
-# The current scheme is basically following Steven G. Johnson's original implementation
-#
-promote_union(T::Union) = promote_type(promote_union(T.a), promote_union(T.b))
-promote_union(T) = T
-
-_realtype(::Type{<:Complex}) = Real
-_realtype(::Type{Complex{T}}) where T<:Real = T
-_realtype(T::Type) = T
-_realtype(::Union{typeof(abs),typeof(abs2)}, T) = _realtype(T)
-_realtype(::Any, T) = T
-
-function reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::AbstractArray, region)
-    _reducedim_init(f, op, zero, sum, A, region)
-end
-function reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::AbstractArray, region)
-    _reducedim_init(f, op, one, prod, A, region)
-end
-function _reducedim_init(f, op, fv, fop, A, region)
-    T = _realtype(f, promote_union(eltype(A)))
-    if T !== Any && applicable(zero, T)
-        x = f(zero(T))
-        z = op(fv(x), fv(x))
-        Tr = z isa T ? T : typeof(z)
-    else
-        z = fv(fop(f, A))
-        Tr = typeof(z)
-    end
-    return reducedim_initarray(A, region, z, Tr)
-end
-
-# initialization when computing minima and maxima requires a little care
-for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min, :(-Inf), :typemin))
-    @eval function reducedim_init(f, op::typeof($f1), A::AbstractArray, region)
-        # First compute the reduce indices. This will throw an ArgumentError
-        # if any region is invalid
-        ri = reduced_indices(A, region)
-
-        # Next, throw if reduction is over a region with length zero
-        any(i -> isempty(axes(A, i)), region) && _empty_reduce_error()
-
-        # Make a view of the first slice of the region
-        A1 = view(A, ri...)
-
-        if isempty(A1)
-            # If the slice is empty just return non-view version as the initial array
-            return map(f, A1)
-        else
-            # otherwise use the min/max of the first slice as initial value
-            v0 = mapreduce(f, $f2, A1)
-
-            T = _realtype(f, promote_union(eltype(A)))
-            Tr = v0 isa T ? T : typeof(v0)
-
-            # but NaNs, missing and unordered values need to be avoided as initial values
-            if v0 isa Number && isnan(v0)
-                # v0 is NaN
-                v0 = oftype(v0, $initval)
-            elseif isunordered(v0)
-                # v0 is missing or a third-party unordered value
-                Tnm = nonmissingtype(Tr)
-                if Tnm <: Union{BitInteger, IEEEFloat, BigFloat}
-                    v0 = $typeextreme(Tnm)
-                elseif !all(isunordered, A1)
-                    v0 = mapreduce(f, $f2, Iterators.filter(!isunordered, A1))
-                end
-            end
-            # v0 may have changed type.
-            Tr = v0 isa T ? T : typeof(v0)
-
-            return reducedim_initarray(A, region, v0, Tr)
-        end
-    end
-end
-
-function reducedim_init(f::ExtremaMap, op::typeof(_extrema_rf), A::AbstractArray, region)
-    # First compute the reduce indices. This will throw an ArgumentError
-    # if any region is invalid
-    ri = reduced_indices(A, region)
-
-    # Next, throw if reduction is over a region with length zero
-    any(i -> isempty(axes(A, i)), region) && _empty_reduce_error()
-
-    # Make a view of the first slice of the region
-    A1 = view(A, ri...)
-
-    isempty(A1) && return map(f, A1)
-    # use the max/min of the first slice as initial value for non-empty cases
-    v0 = reverse(mapreduce(f, op, A1)) # turn minmax to maxmin
-
-    T = _realtype(f.f, promote_union(eltype(A)))
-    Tmin = v0[1] isa T ? T : typeof(v0[1])
-    Tmax = v0[2] isa T ? T : typeof(v0[2])
-
-    # but NaNs and missing need to be avoided as initial values
-    if v0[1] isa Number && isnan(v0[1])
-        # v0 is NaN
-        v0 = oftype(v0[1], Inf), oftype(v0[2], -Inf)
-    elseif isunordered(v0[1])
-        # v0 is missing or a third-party unordered value
-        Tminnm = nonmissingtype(Tmin)
-        Tmaxnm = nonmissingtype(Tmax)
-        if Tminnm <: Union{BitInteger, IEEEFloat, BigFloat} &&
-            Tmaxnm <: Union{BitInteger, IEEEFloat, BigFloat}
-            v0 = (typemax(Tminnm), typemin(Tmaxnm))
-        elseif !all(isunordered, A1)
-            v0 = reverse(mapreduce(f, op, Iterators.filter(!isunordered, A1)))
-        end
+function _check_valid_region(region)
+    for d in region
+        isa(d, Integer) || throw(ArgumentError("reduced dimension(s) must be integers"))
+        Int(d) < 1 && throw(ArgumentError("region dimension(s) must be ≥ 1, got $d"))
     end
-    # v0 may have changed type.
-    Tmin = v0[1] isa T ? T : typeof(v0[1])
-    Tmax = v0[2] isa T ? T : typeof(v0[2])
-
-    return reducedim_initarray(A, region, v0, Tuple{Tmin,Tmax})
 end
 
-reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(max), A::AbstractArray{T}, region) where {T} =
-    reducedim_initarray(A, region, zero(f(zero(T))), _realtype(f, T))
-
-reducedim_init(f, op::typeof(&), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, true)
-reducedim_init(f, op::typeof(|), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, false)
-
-# specialize to make initialization more efficient for common cases
-
-let
-    BitIntFloat = Union{BitInteger, IEEEFloat}
-    T = Union{
-        Any[AbstractArray{t} for t in uniontypes(BitIntFloat)]...,
-        Any[AbstractArray{Complex{t}} for t in uniontypes(BitIntFloat)]...}
-
-    global function reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::T, region)
-        z = zero(f(zero(eltype(A))))
-        reducedim_initarray(A, region, op(z, z))
-    end
-    global function reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::T, region)
-        u = one(f(one(eltype(A))))
-        reducedim_initarray(A, region, op(u, u))
-    end
-end
+reducedim1(R, A) = length(axes1(R)) == 1
 
 ## generic (map)reduction
 
 has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = IndexStyle(a) === IndexLinear()
 has_fast_linear_indexing(a::AbstractVector) = true
 
-function check_reducedims(R, A)
-    # Check whether R has compatible dimensions w.r.t. A for reduction
-    #
+_linear_reduction_length(A, Rax) = _linear_reduction_length(IndexStyle(A), A, Rax)
+_linear_reduction_length(_, _, _) = 0
+function _linear_reduction_length(::IndexLinear, A, Rax)
     # It returns an integer value (useful for choosing implementation)
     # - If it reduces only along leading dimensions, e.g. sum(A, dims=1) or sum(A, dims=(1,2)),
     #   it returns the length of the leading slice. For the two examples above,
     #   it will be size(A, 1) or size(A, 1) * size(A, 2).
     # - Otherwise, e.g. sum(A, dims=2) or sum(A, dims=(1,3)), it returns 0.
     #
-    ndims(R) <= ndims(A) || throw(DimensionMismatch("cannot reduce $(ndims(A))-dimensional array to $(ndims(R)) dimensions"))
+    Aax = axes(A)
     lsiz = 1
-    had_nonreduc = false
-    for i = 1:ndims(A)
-        Ri, Ai = axes(R, i), axes(A, i)
+    for i = 1:length(Aax)
+        Ri, Ai = get(Rax, i, OneTo(1)), Aax[i]
         sRi, sAi = length(Ri), length(Ai)
         if sRi == 1
-            if sAi > 1
-                if had_nonreduc
-                    lsiz = 0  # to reduce along i, but some previous dimensions were non-reducing
-                else
-                    lsiz *= sAi  # if lsiz was set to zero, it will stay to be zero
-                end
-            end
+            lsiz *= sAi
         else
-            Ri == Ai || throw(DimensionMismatch("reduction on array with indices $(axes(A)) with output with indices $(axes(R))"))
-            had_nonreduc = true
+            return 0
         end
     end
     return lsiz
 end
 
-"""
-Extract first entry of slices of array A into existing array R.
-"""
-copyfirst!(R::AbstractArray, A::AbstractArray) = mapfirst!(identity, R, A)
-
-function mapfirst!(f::F, R::AbstractArray, A::AbstractArray{<:Any,N}) where {N, F}
-    lsiz = check_reducedims(R, A)
-    t = _firstreducedslice(axes(R), axes(A))
-    map!(f, R, view(A, t...))
-end
-# We know that the axes of R and A are compatible, but R might have a different number of
-# dimensions than A, which is trickier than it seems due to offset arrays and type stability
-_firstreducedslice(::Tuple{}, a::Tuple{}) = ()
-_firstreducedslice(::Tuple, ::Tuple{}) = ()
-@inline _firstreducedslice(::Tuple{}, a::Tuple) = (_firstslice(a[1]), _firstreducedslice((), tail(a))...)
-@inline _firstreducedslice(r::Tuple, a::Tuple) = (length(r[1])==1 ? _firstslice(a[1]) : r[1], _firstreducedslice(tail(r), tail(a))...)
-_firstslice(i::OneTo) = OneTo(1)
-_firstslice(i::Slice) = Slice(_firstslice(i.indices))
-_firstslice(i) = i[firstindex(i):firstindex(i)]
-
-function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
-    lsiz = check_reducedims(R,A)
+# !!! this internal function assumes that R is valid for use as an operand to `op`, and only
+#     visits values in A within the shape passed in `Aax`.
+function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted, Aax=axes(A))
     isempty(A) && return R
+    lsiz = _linear_reduction_length(A, axes(R))
 
-    if has_fast_linear_indexing(A) && lsiz > 16
+    if has_fast_linear_indexing(A) && lsiz > 16 && axes(A) === Aax
         # use mapreduce_impl, which is probably better tuned to achieve higher performance
-        nslices = div(length(A), lsiz)
         ibase = first(LinearIndices(A))-1
-        for i = 1:nslices
+        for i in eachindex(R) # TODO: add tests for this change
             @inbounds R[i] = op(R[i], mapreduce_impl(f, op, A, ibase+1, ibase+lsiz))
             ibase += lsiz
         end
         return R
     end
-    indsAt, indsRt = safe_tail(axes(A)), safe_tail(axes(R)) # handle d=1 manually
+    indsAt, indsRt = safe_tail(Aax), safe_tail(axes(R)) # handle d=1 manually
     keep, Idefault = Broadcast.shapeindexer(indsRt)
     if reducedim1(R, A)
         # keep the accumulator as a local variable when reducing along the first dimension
@@ -272,15 +111,15 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
         @inbounds for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
             r = R[i1,IR]
-            @simd for i in axes(A, 1)
-                r = op(r, f(A[i, IA]))
+            @simd for i in Aax[1]
+                r = op(r, f(A[i, IA])) # this could also be done pairwise
             end
             R[i1,IR] = r
         end
     else
         @inbounds for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
-            @simd for i in axes(A, 1)
+            @simd for i in Aax[1]
                 R[i,IR] = op(R[i,IR], f(A[i,IA]))
             end
         end
@@ -288,10 +127,233 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
     return R
 end
 
+if isdefined(Core, :Compiler)
+    _mapreduce_might_widen(_, _, _, _) = false
+    function _mapreduce_might_widen(f::F, op::G, A::T, ::Nothing) where {F,G,T}
+        return !isconcretetype(Core.Compiler.return_type(x->op(f(first(x)), f(first(x))), Tuple{T}))
+    end
+else
+    _mapreduce_might_widen(_, _, _, ::Nothing) = true
+    _mapreduce_might_widen(_, _, _, _) = false
+end
+
+mapreduce_similar(A, ::Type{T}, dims) where {T} = similar(A, T, dims)
+function _mapreduce_similar_curried(A)
+    return function(::Type{T}, dims) where {T}
+        mapreduce_similar(A, T, dims)
+    end
+end
+
+#### Dimensional reduction implementation ####
+#
+# There are four (!!) orthogonal switches that affect the possible code paths here:
+#   * There's the number of elements in the reduction (n == 0, 1, or 2+)
+#   * There's the possibility of a passed initial value (specified or not)
+#   * There's in-place or not
+#
+# The relatively simpler scalar mapreduce has 2*3 branches:
+#   * 0 values:
+#       * Have init? Return it. Otherwise:
+#       * Return mapreduce_empty (usually an error, sometimes an unambiguous value like 0.0)
+#   * 1 value (a):
+#       * Have init? Return op(init, f(a)). Otherwise:
+#       * Return mapreduce_first (typically f(a))
+#   * 2+ values (a1, a2):
+#       * Have init? Start every chain with op(op(init, f(a1)), f(a2)) and continue. Otherwise:
+#       * Start every chain with op(f(a1), f(a2)) and continue
+#
+# In the context of dimensional reduce we _additionally_ need to divine out an element type
+# for the returned array. So all of the above branches need to accomodate incremental widening,
+# both as more elements are added to each reduction and across the multiple reductions.
+#
+# There's one additional consideration for the 0 values case — the returned array might or
+# might not be empty. In both cases, however, the ability to return an array (and its element
+# type) is determined by `mapreduce_empty`. We also don't want to include this consideration
+# _inside_ the incremental widening portion as it adds an additional `Union{}` element type
+# for the error path.
+#
+# Finally, actually doing the work wouldn't normally be so tricky — it's just two nested `for`
+# loops.  The trouble is that the naive double for loop is leaving 100x+ performance on the
+# table when it doesn't iterate in a cache-friendly manner.  So you might indeed want to update
+# a single value in `R` multiple times — and possibly widen as you do so.
+#
+# It's **_so very tempting_** to do the standard Julian idiom of:
+#     mapreduce(f, op, A, dims) = mapreduce!(f, op, init(f, op, A, dims), A)
+# But that simply doesn't work because there's not _a single way_ to initialize the beginning 
+# of a reduction.  There are SIX.  But then we add support for pre-allocated result vectors and
+# you even _moreso_ want to be able to just write, e.g.,
+#     sum!(r, A) = mapreduce!(identity, +, first_step!(identity, +, r, A), A)
+# But at that point you've _already done the first step_, so you need to be able to _continue_
+# the reduction from an arbitrary point in the iteration.
+_mapreducedim_impl(f::Type{F}, op, init, A::AbstractArrayOrBroadcasted, raxs, out) where {F} =
+    _mapreducedim_impl(x->F(x), op, init, A::AbstractArrayOrBroadcasted, raxs, out)
+function _mapreducedim_impl(f, op, init, A::AbstractArrayOrBroadcasted, raxs, out)
+    # @show f, op, init, A, raxs, out
+    outer_inds = CartesianIndices(raxs)
+    a_inds = CartesianIndices(axes(A))
+    Aaxs = axes(A)
+    @assert length(Aaxs) == length(raxs)
+    # The "outer dimension" of the reduction are the preserved dimensions
+    is_outer_dim = map((>(1))∘length, raxs)
+    n = prod(map((b, ax)->ifelse(b, 1, length(ax)), is_outer_dim, Aaxs))
+
+    # We have special support for returning empty arrays
+    # isempty(outer_inds) && return collect_allocator(_mapreduce_similar_curried(A),
+    #     (_mapreduce_naive_inner_loop(f, op, init, A, is_outer_dim, outer_ind, a_inds) for outer_ind in outer_inds))
+    (n == 0 || isempty(A)) && return _mapreduce_empty_array(f, op, init, A, raxs, out)
+    n == 1 && return _mapreduce_one_array(f, op, init, A, raxs, out)
+
+    _mapreduce_might_widen(f, op, A, out) && return collect_allocator(_mapreduce_similar_curried(A),
+        (_mapreduce_naive_inner_loop(f, op, init, A, is_outer_dim, outer_ind, a_inds) for outer_ind in outer_inds))
+
+    # Create the result vector with the first step of the reduction, using the passed output if given
+    # note that this is using a (potentially) bad cache ordering, so we don't want to continue like this
+    i1 = first(outer_inds)
+    inner1 = mergeindices(is_outer_dim, i1, a_inds)
+    i1, i2 = inner1
+    @inbounds a1, a2 = A[i1], A[i2]
+    v = _mapreduce_start(f, op, init, a1, a2)
+    R = isnothing(out) ? mapreduce_similar(A, typeof(v), raxs) : out
+    @inbounds R[i1] = v
+    for i in Iterators.drop(outer_inds, 1)
+        innerj = mergeindices(is_outer_dim, i, a_inds)
+        j1, j2 = innerj
+        @inbounds c1, c2 = A[j1], A[j2]
+        @inbounds R[i] = _mapreduce_start(f, op, init, c1, c2)
+    end
+    # If we only had two elements we can quickly return
+    # n == 2 && return R
+
+    lsiz = _linear_reduction_length(A, raxs)
+    if has_fast_linear_indexing(A) && lsiz > 0
+        ibase = first(LinearIndices(A))-1
+        if lsiz > 16
+            # use mapreduce_impl, which is better tuned to achieve higher performance and works pairwise
+            for i in eachindex(R)
+                @inbounds R[i] = op(R[i], mapreduce_impl(f, op, A, ibase+3, ibase+lsiz))
+                ibase += lsiz
+            end
+        else
+            for i in eachindex(R)
+                @inbounds r = R[i]
+                @simd for Ai in ibase+3:ibase+lsiz
+                    r = op(r, f(@inbounds(A[Ai])))
+                end
+                @inbounds R[i] = r
+                ibase += lsiz
+            end
+        end
+    else
+        # Take care of the smallest cartesian "rectangle" that includes our two peeled elements
+        small_inner, large_inner = _split2(inner1)
+        # and we may as well try to do this small part in a cache-advantageous manner
+        if something(findfirst((>)(1), size(inner1))) < something(findfirst((>)(1), size(outer_inds)), typemax(Int))
+            for iR in outer_inds
+                @inbounds r = R[iR]
+                inner_inds = mergeindices(is_outer_dim, CartesianIndices(map(sliceat, Aaxs, iR.I)), small_inner)
+                for iA in Iterators.drop(inner_inds, 2)
+                    r = op(r, f(@inbounds(A[iA])))
+                end
+                @inbounds R[iR] = r
+            end
+        else
+            for iA in Iterators.drop(small_inner, 2)
+                for iR in outer_inds
+                    iA = mergeindices(is_outer_dim, iR, iA)
+                    v = op(@inbounds(R[iR]), f(@inbounds(A[iA])))
+                    @inbounds R[iR] = v
+                end
+            end
+        end
+        # And then if there's anything left, defer to _mapreducedim!
+        if !isempty(large_inner)
+            large_inds = mergeindices(is_outer_dim, outer_inds, large_inner)
+            _mapreducedim!(f, op, R, A, large_inds.indices)
+        end
+    end
+    return R
+end
+
+function _mapreduce_naive_inner_loop(f, op, init, A, is_outer_dim, outer_ind, a_inds)
+    inner_inds = mergeindices(is_outer_dim, outer_ind, a_inds)
+    i1, i2 = inner_inds
+    @inbounds a1, a2 = A[i1], A[i2]
+    r = _mapreduce_start(f, op, init, a1, a2)
+    for i in Iterators.drop(inner_inds, 2)
+        r = op(r, f(@inbounds A[i]))
+    end
+    return r
+end
+
+# Split a CartesianIndices into two parts such that we can Iterators.drop 2 elements from the smallest
+# rectangular subsection and then iterate more quickly over the larger remainder.
+#
+# We have a choice in how we do this split: we can take the away the first column, the first *two* rows, or the first page or...
+# We want to pick the _smallest_ split. For example, given four non-singleton dimensions, we have a choice of:
+# first: (:, :, :, 1:1)   or (:, :, 1:1,   :) or (:, 1:1, :, :)   or (1:2,   :, :, :)
+# rest:  (:, :, :, 2:end) or (:, :, 2:end, :) or (:, 2:end, :, :) or (3:end, :, :, :)
+# With dimension lengths of (a, b, c, d), that's minimizing: (a*b*c) vs (a*b*d) vs (a*c*d) vs (2*b*c*d)
+# The smallest subsection will always be the one that slices on the _longest_ dimension.
+function _split2(ci::CartesianIndices{N}) where {N}
+    inds = ci.indices
+    first_non_singleton = something(findfirst((>)(1), size(ci)))
+    # The first non-singleton dimension "costs half" because we take two elements
+    lengths = map(length, inds)
+    weights = setindex(lengths, lengths[first_non_singleton]÷2, first_non_singleton)
+    sliced_dim = argmax(weights)
+
+    nskip = Int(sliced_dim == first_non_singleton)
+    return (CartesianIndices(ntuple(d->d==sliced_dim ? inds[d][begin:begin+nskip] : inds[d][begin:end], Val(N))),
+            CartesianIndices(ntuple(d->d==sliced_dim ? inds[d][begin+nskip+1:end] : inds[d][begin:end], Val(N))))
+end
+
+_mapreduce_empty_array(_, _, init, A, axes, ::Nothing) = fill!(mapreduce_similar(A, typeof(init), axes), init)
+_mapreduce_empty_array(_, _, init, A, axes, out) = fill!(out, init)
+function _mapreduce_empty_array(f, op, ::_InitialValue, A, axes, ::Nothing)
+    init = mapreduce_empty(f, op, eltype(A))
+    return fill!(mapreduce_similar(A, typeof(init), axes), init)
+end
+function _mapreduce_empty_array(f, op, ::_InitialValue, A, axes, out)
+    init = mapreduce_empty(f, op, eltype(A))
+    return fill!(out, init)
+end
+
+_mapreduce_one_array(f, op, init, A, axes, ::Nothing) = collect_allocator(_mapreduce_similar_curried(A), (op(init, f(@inbounds(A[i]))) for i in CartesianIndices(axes)))
+_mapreduce_one_array(f, op, init, A, axes, out) = (for i in CartesianIndices(axes); out[i] = op(init, f(@inbounds(A[i]))); end; out)
+_mapreduce_one_array(f, op, ::_InitialValue, A, axes, ::Nothing) = collect_allocator(_mapreduce_similar_curried(A), (mapreduce_first(f, op, @inbounds(A[i])) for i in CartesianIndices(axes)))
+_mapreduce_one_array(f, op, ::_InitialValue, A, axes, out) = (for i in CartesianIndices(axes); out[i] = mapreduce_first(f, op, @inbounds(A[i])); end; out)
+
+# Given two initial values in a reduction, compute the beginning of a op chain
+_mapreduce_start(f, op, init, a1, a2) = op(op(init, f(a1)), f(a2))
+_mapreduce_start(f, op, ::_InitialValue, a1, a2) = op(f(a1), f(a2))
+
+"""
+    mapreduce!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted; [init])
+
+Compute `mapreduce(f, op, A; init, dims)` where `dims` are the singleton dimensions of `R`, storing the result into `R`.
+
+!!! note
+    The previous values in `R` are _not_ used as initial values; they are completely ignored
+"""
+function mapreduce!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted; init=_InitialValue())
+    raxs = reduced_indices_array(A,R)
+    _mapreducedim_impl(f, op, init, A, raxs, R)
+end
+"""
+    reduce!(op, R::AbstractArray, A::AbstractArrayOrBroadcasted; [init])
+
+Compute `reduce(op, A; init, dims)` where `dims` are the singleton dimensions of `R`, storing the result into `R`.
+
+!!! note
+    The previous values in `R` are _not_ used as initial values; they are completely ignored
+"""
+reduce!(op, R::AbstractArray, A::AbstractArrayOrBroadcasted; init=_InitialValue()) = mapreduce!(identity, op, R, A; init)
+
+# !!! this assumes that R holds the initial values
 mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted) =
-    (_mapreducedim!(f, op, R, A); R)
+    (reduced_indices_array(A, R); _mapreducedim!(f, op, R, A); R)
 
-reducedim!(op, R::AbstractArray{RT}, A::AbstractArrayOrBroadcasted) where {RT} =
+reducedim!(op, R::AbstractArray, A::AbstractArrayOrBroadcasted) =
     mapreducedim!(identity, op, R, A)
 
 """
@@ -329,15 +391,14 @@ mapreduce(f, op, A::AbstractArrayOrBroadcasted, B::AbstractArrayOrBroadcasted...
 _mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, ::Colon) =
     mapfoldl_impl(f, op, nt, A)
 
+function _mapreduce_dim(f, op, init, A::AbstractArrayOrBroadcasted, dims)
+    raxs = reduced_indices(A, dims)
+    _mapreducedim_impl(f, op, init, A, raxs, nothing)
+end
+
 _mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) =
     _mapreduce(f, op, IndexStyle(A), A)
 
-_mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, dims) =
-    mapreducedim!(f, op, reducedim_initarray(A, dims, nt), A)
-
-_mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims) =
-    mapreducedim!(f, op, reducedim_init(f, op, A, dims), A)
-
 """
     reduce(f, A::AbstractArray; dims=:, [init])
 
@@ -403,11 +464,7 @@ julia> count(<=(2), A, dims=2)
  0
 ```
 """
-count(A::AbstractArrayOrBroadcasted; dims=:, init=0) = count(identity, A; dims, init)
-count(f, A::AbstractArrayOrBroadcasted; dims=:, init=0) = _count(f, A, dims, init)
-
-_count(f, A::AbstractArrayOrBroadcasted, dims::Colon, init) = _simple_count(f, A, init)
-_count(f, A::AbstractArrayOrBroadcasted, dims, init) = mapreduce(_bool(f), add_sum, A; dims, init)
+count(f, A::AbstractArray; dims=:)
 
 """
     count!([f=identity,] r, A)
@@ -437,9 +494,7 @@ julia> count!(<=(2), [1; 1], A)
  0
 ```
 """
-count!(r::AbstractArray, A::AbstractArrayOrBroadcasted; init::Bool=true) = count!(identity, r, A; init=init)
-count!(f, r::AbstractArray, A::AbstractArrayOrBroadcasted; init::Bool=true) =
-    mapreducedim!(_bool(f), add_sum, initarray!(r, f, add_sum, init, A), A)
+count!(f, r::AbstractArray, A::AbstractArray)
 
 """
     sum(A::AbstractArray; dims)
@@ -970,8 +1025,9 @@ any!(r, A)
 
 for (fname, _fname, op) in [(:sum,     :_sum,     :add_sum), (:prod,    :_prod,    :mul_prod),
                             (:maximum, :_maximum, :max),     (:minimum, :_minimum, :min),
-                            (:extrema, :_extrema, :_extrema_rf)]
-    mapf = fname === :extrema ? :(ExtremaMap(f)) : :f
+                            (:count,   :_count,   :add_sum), (:extrema, :_extrema, :_extrema_rf)]
+    mapf = fname === :extrema ? :(ExtremaMap(f)) :
+           fname === :count ? :(_bool(f)) : :f
     @eval begin
         # User-facing methods with keyword arguments
         @inline ($fname)(a::AbstractArray; dims=:, kw...) = ($_fname)(a, dims; kw...)
@@ -982,7 +1038,7 @@ for (fname, _fname, op) in [(:sum,     :_sum,     :add_sum), (:prod,    :_prod,
         ($_fname)(f, a, ::Colon; kw...) = mapreduce($mapf, $op, a; kw...)
     end
 end
-
+# TODO: why are any/all different?
 any(a::AbstractArray; dims=:)              = _any(a, dims)
 any(f::Function, a::AbstractArray; dims=:) = _any(f, a, dims)
 _any(a, ::Colon)                           = _any(identity, a, :)
@@ -993,13 +1049,14 @@ _all(a, ::Colon)                           = _all(identity, a, :)
 for (fname, op) in [(:sum, :add_sum), (:prod, :mul_prod),
                     (:maximum, :max), (:minimum, :min),
                     (:all, :&),       (:any, :|),
-                    (:extrema, :_extrema_rf)]
+                    (:count, :add_sum), (:extrema, :_extrema_rf)]
     fname! = Symbol(fname, '!')
     _fname = Symbol('_', fname)
-    mapf = fname === :extrema ? :(ExtremaMap(f)) : :f
+    mapf = fname === :extrema ? :(ExtremaMap(f)) :
+           fname === :count ? :(_bool(f)) : :f
     @eval begin
-        $(fname!)(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) =
-            mapreducedim!($mapf, $(op), initarray!(r, $mapf, $(op), init, A), A)
+        $(fname!)(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) = init ?
+            mapreduce!($mapf, $op, r, A) : mapreducedim!($mapf, $op, r, A)
         $(fname!)(r::AbstractArray, A::AbstractArray; init::Bool=true) = $(fname!)(identity, r, A; init=init)
 
         $(_fname)(A, dims; kw...)    = $(_fname)(identity, A, dims; kw...)
@@ -1008,57 +1065,62 @@ for (fname, op) in [(:sum, :add_sum), (:prod, :mul_prod),
 end
 
 ##### findmin & findmax #####
-# The initial values of Rval are not used if the corresponding indices in Rind are 0.
-#
-function findminmax!(f, op, Rval, Rind, A::AbstractArray{T,N}) where {T,N}
-    (isempty(Rval) || isempty(A)) && return Rval, Rind
-    lsiz = check_reducedims(Rval, A)
-    for i = 1:N
-        axes(Rval, i) == axes(Rind, i) || throw(DimensionMismatch("Find-reduction: outputs must have the same indices"))
-    end
-    # If we're reducing along dimension 1, for efficiency we can make use of a temporary.
-    # Otherwise, keep the result in Rval/Rind so that we traverse A in storage order.
-    indsAt, indsRt = safe_tail(axes(A)), safe_tail(axes(Rval))
-    keep, Idefault = Broadcast.shapeindexer(indsRt)
-    ks = keys(A)
-    y = iterate(ks)
-    zi = zero(eltype(ks))
-    if reducedim1(Rval, A)
-        i1 = first(axes1(Rval))
-        @inbounds for IA in CartesianIndices(indsAt)
-            IR = Broadcast.newindex(IA, keep, Idefault)
-            tmpRv = Rval[i1,IR]
-            tmpRi = Rind[i1,IR]
-            for i in axes(A,1)
-                k, kss = y::Tuple
-                tmpAv = f(A[i,IA])
-                if tmpRi == zi || op(tmpRv, tmpAv)
-                    tmpRv = tmpAv
-                    tmpRi = k
-                end
-                y = iterate(ks, kss)
-            end
-            Rval[i1,IR] = tmpRv
-            Rind[i1,IR] = tmpRi
-        end
-    else
-        @inbounds for IA in CartesianIndices(indsAt)
-            IR = Broadcast.newindex(IA, keep, Idefault)
-            for i in axes(A, 1)
-                k, kss = y::Tuple
-                tmpAv = f(A[i,IA])
-                tmpRv = Rval[i,IR]
-                tmpRi = Rind[i,IR]
-                if tmpRi == zi || op(tmpRv, tmpAv)
-                    Rval[i,IR] = tmpAv
-                    Rind[i,IR] = k
-                end
-                y = iterate(ks, kss)
-            end
-        end
-    end
-    Rval, Rind
+
+struct PairsArray{T,N,A} <: AbstractArray{T,N}
+    array::A
+end
+PairsArray(array::AbstractArray{T,N}) where {T, N} = PairsArray{Tuple{keytype(array),T}, N, typeof(array)}(array)
+const PairsVector{T,A} = PairsArray{T, 1, A}
+IndexStyle(::PairsVector) = IndexLinear()
+IndexStyle(::PairsArray) = IndexCartesian()
+size(P::PairsArray) = size(P.array)
+axes(P::PairsArray) = axes(P.array)
+@inline function getindex(P::PairsVector, i::Int)
+    @boundscheck checkbounds(P, i)
+    @inbounds (i, P.array[i])
 end
+@inline function getindex(P::PairsArray{<:Any,N}, I::CartesianIndex{N}) where {N}
+    @boundscheck checkbounds(P, I)
+    @inbounds (I, P.array[I])
+end
+@propagate_inbounds getindex(P::PairsVector, i::CartesianIndex{1}) = P[i.I[1]]
+@propagate_inbounds getindex(P::PairsArray{<:Any,N}, I::Vararg{Int, N}) where {N} = P[CartesianIndex(I)]
+# Defer a few functions to the parent array
+similar(P::PairsArray, ::Type{T}, dims::Tuple{Union{Int, AbstractUnitRange}, Vararg{Union{Int, AbstractUnitRange}}}) where {T} = similar(P.array, T, dims)
+similar(P::PairsArray, ::Type{T}, dims::Tuple{Union{Int, Base.OneTo}, Vararg{Union{Int, Base.OneTo}}}) where {T} = similar(P.array, T, dims)
+similar(P::PairsArray, ::Type{T}, dims::Tuple{Int, Vararg{Int}}) where {T} = similar(P.array, T, dims)
+similar(P::PairsArray, ::Type{T}, dims::Tuple{}) where {T} = similar(P.array, T, dims)
+mapreduce_similar(P::PairsArray, ::Type{T}, dims) where {T} = mapreduce_similar(P.array, T, dims)
+
+# Use an ad-hoc specialized StructArray to allow in-place AoS->SoA transform
+struct ZippedArray{T,N,Style,A,B} <: AbstractArray{T,N}
+    first::A
+    second::B
+end
+function ZippedArray(A::AbstractArray{T,N},B::AbstractArray{S,N}) where {T,S,N}
+    axes(A) == axes(B) || throw(DimensionMismatch("both arrays must have the same shape"))
+    # TODO: It'd be better if we could transform a Tuple{Int, Union{Int, Missing}} to Union{Tuple{Int,Int}, Tuple{Int, Missing}}
+    ZippedArray{Tuple{T,S},N,IndexStyle(A,B),typeof(A),typeof(B)}(A,B)
+end
+size(Z::ZippedArray) = size(Z.first)
+axes(Z::ZippedArray) = axes(Z.first)
+IndexStyle(::ZippedArray{<:Any,<:Any,Style}) where {Style} = Style
+@inline function getindex(Z::ZippedArray, I::Int...)
+    @boundscheck checkbounds(Z, I...)
+    @inbounds (Z.first[I...], Z.second[I...])
+end
+@propagate_inbounds setindex!(Z::ZippedArray{T}, v, I::Int...) where {T} = setindex!(Z, convert(T, v), I...)
+@inline function setindex!(Z::ZippedArray{T}, v::T, I::Int...) where {T}
+    @boundscheck checkbounds(Z, I...)
+    @inbounds Z.first[I...] = v[1]
+    @inbounds Z.second[I...] = v[2]
+    return Z
+end
+_unzip(Z::ZippedArray) = (Z.first, Z.second)
+_unzip(A::AbstractArray) = ([a[1] for a in A], [a[2] for a in A])
+
+_transform_pair(f) = x-> (x[1], f(x[2]))
+_transform_pair(f::typeof(identity)) = f
 
 """
     findmin!(rval, rind, A) -> (minval, index)
@@ -1069,9 +1131,15 @@ dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
 
 $(_DOCS_ALIASING_WARNING)
 """
-function findmin!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
+findmin!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray; init::Bool=true) = findmin!(identity, rval, rind, A; init)
+function findmin!(f, rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
                   init::Bool=true)
-    findminmax!(identity, isgreater, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,zero(eltype(keys(A)))), A)
+    if init
+        mapreduce!(_transform_pair(f), (x,y)->ifelse(isgreater(x[2], y[2]), y, x), ZippedArray(rind, rval), PairsArray(A))
+    else
+        mapreducedim!(_transform_pair(f), (x,y)->ifelse(isgreater(x[2], y[2]), y, x), ZippedArray(rind, rval), PairsArray(A))
+    end
+    return (rval, rind)
 end
 
 """
@@ -1120,19 +1188,16 @@ julia> findmin(abs2, A, dims=2)
 findmin(f, A::AbstractArray; dims=:) = _findmin(f, A, dims)
 
 function _findmin(f, A, region)
-    ri = reduced_indices0(A, region)
-    if isempty(A)
-        if prod(map(length, reduced_indices(A, region))) != 0
-            throw(ArgumentError("collection slices must be non-empty"))
-        end
-        similar(A, promote_op(f, eltype(A)), ri), zeros(eltype(keys(A)), ri)
+    if f === identity
+        # Fast path with pre-allocated arrays
+        axs = reduced_indices(A, region)
+        return findmin!(identity, mapreduce_similar(A, eltype(A), axs), mapreduce_similar(A, keytype(A), axs), A)
     else
-        fA = f(first(A))
-        findminmax!(f, isgreater, fill!(similar(A, _findminmax_inittype(f, A), ri), fA),
-                    zeros(eltype(keys(A)), ri), A)
+        P = _mapreduce_dim(_transform_pair(f), (x,y)->ifelse(isgreater(x[2], y[2]), y, x), _InitialValue(), PairsArray(A), region)
+        (inds, vals) = _unzip(P)
+        return (vals, inds)
     end
 end
-
 """
     findmax!(rval, rind, A) -> (maxval, index)
 
@@ -1142,9 +1207,15 @@ dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
 
 $(_DOCS_ALIASING_WARNING)
 """
-function findmax!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
+findmax!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray; init::Bool=true) = findmax!(identity, rval, rind, A; init)
+function findmax!(f, rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
                   init::Bool=true)
-    findminmax!(identity, isless, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,zero(eltype(keys(A)))), A)
+    if init
+        mapreduce!(_transform_pair(f), (x,y)->ifelse(isless(x[2], y[2]), y, x), ZippedArray(rind, rval), PairsArray(A))
+    else
+        mapreducedim!(_transform_pair(f), (x,y)->ifelse(isless(x[2], y[2]), y, x), ZippedArray(rind, rval), PairsArray(A))
+    end
+    return (rval, rind)
 end
 
 """
@@ -1191,33 +1262,17 @@ julia> findmax(abs2, A, dims=2)
 ```
 """
 findmax(f, A::AbstractArray; dims=:) = _findmax(f, A, dims)
-
 function _findmax(f, A, region)
-    ri = reduced_indices0(A, region)
-    if isempty(A)
-        if prod(map(length, reduced_indices(A, region))) != 0
-            throw(ArgumentError("collection slices must be non-empty"))
-        end
-        similar(A, promote_op(f, eltype(A)), ri), zeros(eltype(keys(A)), ri)
+    if f === identity
+        axs = reduced_indices(A, region)
+        return findmax!(identity, mapreduce_similar(A, eltype(A), axs), mapreduce_similar(A, keytype(A), axs), A)
     else
-        fA = f(first(A))
-        findminmax!(f, isless, fill!(similar(A, _findminmax_inittype(f, A), ri), fA),
-                    zeros(eltype(keys(A)), ri), A)
+        P = _mapreduce_dim(_transform_pair(f), (x,y)->ifelse(isless(x[2], y[2]), y, x), _InitialValue(), PairsArray(A), region)
+        (inds, vals) = _unzip(P)
+        return (vals, inds)
     end
 end
 
-function _findminmax_inittype(f, A::AbstractArray)
-    T = _realtype(f, promote_union(eltype(A)))
-    v0 = f(first(A))
-    # First conditional: T is >: typeof(v0), so return it
-    # Second conditional: handle missing specifically, as most often, f(missing) = missing;
-    # certainly, some predicate functions return Bool, but not all.
-    # Else, return the type of the transformation.
-    Tr = v0 isa T ? T : Missing <: eltype(A) ? Union{Missing, typeof(v0)} : typeof(v0)
-end
-
-reducedim1(R, A) = length(axes1(R)) == 1
-
 """
     argmin(A; dims) -> indices
 
diff --git a/test/reduce.jl b/test/reduce.jl
index f5140c8a34bd9..484c5c14636a4 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -666,10 +666,12 @@ x = [j+7 for j in i]
 end
 
 # make sure we specialize on mapfoldl(::Type, ...)
-@test @inferred(mapfoldl(Int, +, [1, 2, 3]; init=0)) === 6
+@testset "inference" begin
+    @test @inferred(mapfoldl(Int, +, [1, 2, 3]; init=0)) === 6
 
-# issue #39281
-@test @inferred(extrema(rand(2), dims=1)) isa Vector{Tuple{Float64,Float64}}
+    # issue #39281
+    @test @inferred(extrema(rand(2), dims=1)) isa Vector{Tuple{Float64,Float64}}
+end
 
 # issue #38627
 @testset "overflow in mapreduce" begin
@@ -732,3 +734,11 @@ end
         Val(any(in((:one,:two,:three)),(:four,:three)))
     end |> only == Val{true}
 end
+
+@testset "type stability of internals for mean (etc); Statistics#160" begin
+    @test (@inferred Missing mapreduce(identity, (x,y)->Base.add_sum(x, y)/1, view([1.0,2.0,missing],1:2), dims=:)) == 3.0
+    @test (@inferred Union{Missing,Int} mapreduce(identity, (x,y)->Base.add_sum(x, y)/1, view([1,2,missing],1:2), dims=:)) == 3.0
+
+    @test (@inferred Missing reduce((x,y)->Base.add_sum(x, y)/1, view([1.0,2.0,missing],1:2), dims=:)) == 3.0
+    @test (@inferred Union{Missing,Int} reduce((x,y)->Base.add_sum(x, y)/1, view([1,2,missing],1:2), dims=:)) == 3.0
+end
diff --git a/test/reducedim.jl b/test/reducedim.jl
index 8f629fa83f28d..11f6975756763 100644
--- a/test/reducedim.jl
+++ b/test/reducedim.jl
@@ -2,6 +2,9 @@
 
 using Random
 
+isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+using .Main.OffsetArrays: OffsetVector, OffsetArray
+
 # main tests
 
 # issue #35800
@@ -173,7 +176,7 @@ end
 
     x = sum(Real[1.0], dims=1)
     @test x == [1.0]
-    @test x isa Vector{Real}
+    @test x isa Vector{<:Real}
 
     x = mapreduce(cos, +, Union{Int,Missing}[1, 2], dims=1)
     @test x == mapreduce(cos, +, [1, 2], dims=1)
@@ -209,19 +212,19 @@ end
 
     for f in (minimum, maximum)
         @test_throws "reducing over an empty collection is not allowed" f(A, dims=1)
-        @test isequal(f(A, dims=2), zeros(Int, 0, 1))
+        @test_throws "reducing over an empty collection is not allowed" isequal(f(A, dims=2), zeros(Int, 0, 1))
         @test_throws "reducing over an empty collection is not allowed" f(A, dims=(1, 2))
-        @test isequal(f(A, dims=3), zeros(Int, 0, 1))
+        @test_throws "reducing over an empty collection is not allowed" isequal(f(A, dims=3), zeros(Int, 0, 1))
     end
     for f in (findmin, findmax)
-        @test_throws ArgumentError f(A, dims=1)
-        @test isequal(f(A, dims=2), (zeros(Int, 0, 1), zeros(Int, 0, 1)))
-        @test_throws ArgumentError f(A, dims=(1, 2))
-        @test isequal(f(A, dims=3), (zeros(Int, 0, 1), zeros(Int, 0, 1)))
-        @test_throws ArgumentError f(abs2, A, dims=1)
-        @test isequal(f(abs2, A, dims=2), (zeros(Int, 0, 1), zeros(Int, 0, 1)))
-        @test_throws ArgumentError f(abs2, A, dims=(1, 2))
-        @test isequal(f(abs2, A, dims=3), (zeros(Int, 0, 1), zeros(Int, 0, 1)))
+        @test_throws "reducing over an empty collection is not allowed" f(A, dims=1)
+        @test_throws "reducing over an empty collection is not allowed" isequal(f(A, dims=2), (zeros(Int, 0, 1), zeros(Int, 0, 1)))
+        @test_throws "reducing over an empty collection is not allowed" f(A, dims=(1, 2))
+        @test_throws "reducing over an empty collection is not allowed" isequal(f(A, dims=3), (zeros(Int, 0, 1), zeros(Int, 0, 1)))
+        @test_throws "reducing over an empty collection is not allowed" f(abs2, A, dims=1)
+        @test_throws "reducing over an empty collection is not allowed" isequal(f(abs2, A, dims=2), (zeros(Int, 0, 1), zeros(Int, 0, 1)))
+        @test_throws "reducing over an empty collection is not allowed" f(abs2, A, dims=(1, 2))
+        @test_throws "reducing over an empty collection is not allowed" isequal(f(abs2, A, dims=3), (zeros(Int, 0, 1), zeros(Int, 0, 1)))
     end
 
 end
@@ -541,6 +544,14 @@ end
     end
 end
 
+f44906(x) = maximum(f44906, x); f44906(x::Number) = x
+g44906(x) = mapreduce(g44906, max, x); g44906(x::Number) = x
+@testset "inference with minimum/maximum, issue #44906" begin
+    x = [[1, 2], [3, 4]];
+    @test @inferred(f44906(x)) == 4
+    @test @inferred(g44906(x)) == 4
+end
+
 # issue #6672
 @test sum(Real[1 2 3; 4 5.3 7.1], dims=2) == reshape([6, 16.4], 2, 1)
 @test sum(Any[1 2;3 4], dims=1) == [4 6]
@@ -610,9 +621,10 @@ function unordered_test_for_extrema(a; dims_test = ((), 1, 2, (1,2), 3))
     for dims in dims_test
         vext = extrema(a; dims)
         vmin, vmax = minimum(a; dims), maximum(a; dims)
-        @test isequal(extrema!(copy(vext), a), vext)
+        @test isequal(extrema!(similar(vext, NTuple{2, eltype(a)}), a), vext)
         @test all(x -> isequal(x[1], x[2:3]), zip(vext,vmin,vmax))
     end
+    true
 end
 @testset "0.0,-0.0 test for extrema with dims" begin
     @test extrema([-0.0;0.0], dims = 1)[1] === (-0.0,0.0)
@@ -623,13 +635,13 @@ end
         for T in (Int, Float64, BigFloat, BigInt)
             Aₘ = Matrix{Union{T, Missing}}(rand(-sz:sz, sz, sz))
             Aₘ[rand(1:sz*sz, sz)] .= missing
-            unordered_test_for_extrema(Aₘ)
+            @test unordered_test_for_extrema(Aₘ)
             if T <: AbstractFloat
                 Aₙ = map(i -> ismissing(i) ? T(NaN) : i, Aₘ)
-                unordered_test_for_extrema(Aₙ)
+                @test unordered_test_for_extrema(Aₙ)
                 p = rand(1:sz*sz, sz)
                 Aₘ[p] .= NaN
-                unordered_test_for_extrema(Aₘ)
+                @test unordered_test_for_extrema(Aₘ)
             end
         end
     end
@@ -696,3 +708,225 @@ end
     @test_broken @inferred(maximum(exp, A; dims = 1))[1] === missing
     @test_broken @inferred(extrema(exp, A; dims = 1))[1] === (missing, missing)
 end
+
+@testset "generic sum reductions; issue #31427" begin
+    add31427(a, b) = a+b
+    A = rand(1:10, 5, 5)
+    @test reduce(add31427, A, dims = (1, 2)) == reduce(+, A, dims = (1, 2)) == [sum(A);;]
+
+    As = [rand(5, 4) for _ in 1:2, _ in 1:3]
+    @test reduce(hcat, reduce(vcat, As, dims=1)) == [As[1,1] As[1,2] As[1,3]; As[2,1] As[2,2] As[2,3]]
+end
+
+@testset "sum with `missing`s; issue #55213 and #32366" begin
+    @test isequal(sum([0.0 1; 0.0 missing], dims=2), [1.0; missing;;])
+    @test sum([0.0 1; 0.0 missing], dims=2)[1] === 1.0
+    @test isequal(sum(Any[0.0 1; 0.0 missing], dims=2), [1.0; missing;;])
+    @test sum(Any[0.0 1; 0.0 missing], dims=2)[1] === 1.0
+
+    @test isequal(sum([1 0.0; 0.0 missing], dims=1), [1.0 missing])
+    @test sum([1 0.0; 0.0 missing], dims=1)[1] === 1.0
+    @test isequal(sum(Any[1 0.0; 0.0 missing], dims=1), [1.0 missing])
+    @test sum(Any[1 0.0; 0.0 missing], dims=1)[1] === 1.0
+
+    @test isequal(sum([true false missing], dims=2), sum([1 0 missing], dims=2))
+    @test isequal(sum([true false missing], dims=2), [sum([true false missing]);;])
+    @test isequal(sum([true false missing], dims=2), [missing;;])
+end
+
+@testset "issues #45566 and #47231; initializers" begin
+    @test reduce(gcd, [1]; dims=1) == [reduce(gcd, [1])] == [1]
+
+    x = reshape(1:6, 2, 3)
+    @test reduce(+, x, dims=2) == [9;12;;]
+    @test reduce(-, x, dims=1) == [1-2 3-4 5-6]
+    @test reduce(/, x, dims=1) == [1/2 3/4 5/6]
+    @test reduce(^, x, dims=1) == [1^2 3^4 5^6]
+    # These have arbitrary associativity, but they shouldn't error
+    @test reduce(-, x, dims=2) in ([(1-3)-5; (2-4)-6;;], [1-(3-5); 2-(4-6);;])
+    @test reduce(/, x, dims=2) in ([(1/3)/5; (2/4)/6;;], [1/(3/5); 2/(4/6);;])
+    @test reduce(^, x, dims=2) in ([(1^3)^5; (2^4)^6;;], [1^(3^5); 2^(4^6);;])
+end
+
+@testset "better initializations" begin
+    @test mapreduce(_ -> pi, +, [1,2]; dims=1) == [mapreduce(_ -> pi, +, [1,2])] == [2pi]
+    @test mapreduce(_ -> pi, +, [1 2]; dims=2) == [mapreduce(_ -> pi, +, [1 2]);;] == [2pi;;]
+    @test mapreduce(_ -> pi, +, [1 2]; dims=(1,2)) == [2pi;;]
+    @test mapreduce(_ -> pi, +, [1]; dims=1) == [mapreduce(_ -> pi, +, [1])] == [pi]
+    @test mapreduce(_ -> pi, +, [1;;]; dims=2) == [mapreduce(_ -> pi, +, [1;;]);;] == [pi;;]
+    @test_throws ArgumentError mapreduce(_ -> pi, +, []; dims=1)
+    @test_throws ArgumentError mapreduce(_ -> pi, +, [;;]; dims=2)
+    @test_throws ArgumentError mapreduce(_ -> pi, +, [;;]; dims=(1,2))
+    @test_throws ArgumentError mapreduce(_ -> pi, +, [])
+    @test_throws ArgumentError mapreduce(_ -> pi, +, [;;])
+
+    @test mapreduce(x -> log(x-1), +, [2,3,4]; dims=1) == [mapreduce(x -> log(x-1), +, [2,3,4])] == [log(1) + log(2) + log(3)]
+    @test mapreduce(x -> log(x-1), +, [2,3]; dims=1) == [mapreduce(x -> log(x-1), +, [2,3])] == [log(1) + log(2)]
+    @test mapreduce(x -> log(x-1), +, [2]; dims=1) == [mapreduce(x -> log(x-1), +, [2])] == [log(1)]
+    @test mapreduce(x -> log(x-1), +, [2 3 4]; dims=2) == [mapreduce(x -> log(x-1), +, [2,3,4]);;] == [log(1) + log(2) + log(3);;]
+    @test mapreduce(x -> log(x-1), +, [2 3]; dims=2) == [mapreduce(x -> log(x-1), +, [2,3]);;] == [log(1) + log(2);;]
+    @test mapreduce(x -> log(x-1), +, [2;;]; dims=2) == [mapreduce(x -> log(x-1), +, [2]);;] == [log(1);;]
+    @test_throws ArgumentError mapreduce(x -> log(x-1), +, []; dims=1)
+    @test_throws ArgumentError mapreduce(x -> log(x-1), +, [;;]; dims=2)
+    @test_throws ArgumentError mapreduce(x -> log(x-1), +, [;;]; dims=(1,2))
+    @test_throws ArgumentError mapreduce(x -> log(x-1), +, [])
+    @test_throws ArgumentError mapreduce(x -> log(x-1), +, [;;])
+
+    @test sum(x->sqrt(x-1), ones(5); dims=1) == [sum(x->sqrt(x-1), ones(5))] == [0.0]
+    @test sum(x->sqrt(x-1), ones(1); dims=1) == [sum(x->sqrt(x-1), ones(1))] == [0.0]
+    @test_throws ArgumentError sum(x->sqrt(x-1), ones(0); dims=1)
+    @test_throws ArgumentError sum(x->sqrt(x-1), ones(0))
+end
+
+@testset "reductions on broadcasted; issue #41054" begin
+    A = clamp.(randn(3,4), -1, 1)
+    bc = Base.broadcasted(+, A, 2)
+    @test sum(bc, dims=1) ≈ sum(A .+ 2, dims=1)
+    @test mapreduce(sqrt, +, bc, dims=1) ≈ mapreduce(sqrt, +, A .+ 2, dims=1)
+
+    @test sum(bc, dims=2) ≈ sum(A .+ 2, dims=2)
+    @test mapreduce(sqrt, +, bc, dims=2) ≈ mapreduce(sqrt, +, A .+ 2, dims=2)
+
+    @test sum(bc, dims=(1,2)) ≈ [sum(A .+ 2)]
+    @test mapreduce(sqrt, +, bc, dims=(1,2)) ≈ [mapreduce(sqrt, +, A .+ 2)]
+end
+
+@testset "reductions over complex values; issue #54920" begin
+    A = Complex{Int}.(rand(Complex{Int8}, 2, 2, 2));
+    @test maximum(abs, A; dims=(1,)) == mapreduce(abs, max, A, dims=(1,)) == [maximum(abs, A[:,1,1]) maximum(abs, A[:,2,1]);;; maximum(abs, A[:,1,2]) maximum(abs, A[:,2,2])]
+    @test maximum(abs, A; dims=(2,)) == mapreduce(abs, max, A, dims=(2,)) == [maximum(abs, A[1,:,1]); maximum(abs, A[2,:,1]);;; maximum(abs, A[1,:,2]); maximum(abs, A[2,:,2])]
+    @test maximum(abs, A; dims=(1, 2)) == mapreduce(abs, max, A, dims=(1, 2)) == [maximum(abs, A[:,:,1]);;; maximum(abs, A[:,:,2])]
+    @test maximum(abs, A; dims=(1, 2, 3)) == mapreduce(abs, max, A, dims=(1, 2, 3)) == [maximum(abs, A);;;]
+end
+
+@testset "bitwise operators on integers; part of issue #45562" begin
+    @test mapreduce(identity, &, [3,3,3]; dims=1) == [mapreduce(identity, &, [3,3,3])] == [3 & 3 & 3] == [3]
+    @test mapreduce(identity, |, [3,3,3]; dims=1) == [mapreduce(identity, |, [3,3,3])] == [3 | 3 | 3] == [3]
+    @test mapreduce(identity, xor, [3,3,3]; dims=1) == [mapreduce(identity, xor, [3,3,3])] == [xor(xor(3, 3), 3)] == [3]
+
+    @test mapreduce(identity, &, [3,7,6]; dims=1) == [mapreduce(identity, &, [3,7,6])] == [3 & 7 & 6] == [2]
+    @test mapreduce(identity, |, [3,7,6]; dims=1) == [mapreduce(identity, |, [3,7,6])] == [3 | 7 | 6] == [7]
+    @test mapreduce(identity, xor, [3,7,6]; dims=1) == [mapreduce(identity, xor, [3,7,6])] == [xor(xor(3, 7), 6)] == [2]
+end
+
+@testset "indexing" begin
+    A = [1 2; 3 4]
+    B = [5 6; 7 8]
+
+    @test mapreduce(/, +, A, B) ≈ sum(A./B)
+    @test mapreduce(i -> A[i]/B[i], +, eachindex(A,B)) ≈ sum(A./B)
+    @test mapreduce(i -> A[i]/B'[i], +, eachindex(A,B')) ≈ sum(A./B')
+
+    @test mapreduce(/, +, A, B; dims=1) ≈ sum(A./B; dims=1)
+    @test mapreduce(i -> A[i]/B[i], +, reshape(eachindex(A,B), size(A)); dims=1) ≈ sum(A./B; dims=1)  # BoundsError
+    @test mapreduce(i -> A[i]/B'[i], +, eachindex(A,B'); dims=1) ≈ sum(A./B'; dims=1)  # BoundsError
+end
+
+@testset "more related to issue #26488" begin
+    @test mapreduce(x -> log10(x-1), +, [11, 101]) ≈ 3.0
+    @test mapreduce(x -> log10(x-1), *, [11, 101]) ≈ 2.0
+    @test mapreduce(x -> log10(x-1), +, [11, 101]; dims=1) ≈ [3.0]
+    @test mapreduce(x -> log10(x-1), +, [11, 101]; init=-10) ≈ -7.0
+    @test mapreduce(x -> log10(x-1), +, [11, 101]; init=-10, dims=1) ≈ [-7.0]
+
+    @test mapreduce(_ -> pi, +, [1,2]) ≈ 2pi
+    @test mapreduce(_ -> pi, *, [1,2]) ≈ pi^2
+    @test mapreduce(_ -> pi, +, [1,2]; dims=1) ≈ [2pi]
+end
+
+_add(x,y) = x+y  # this avoids typeof(+) dispatch
+
+@testset "mapreduce fast paths, dims=:, op=$op, kw=$kw" for op in (+,_add),
+                                                            kw in ((;), (; init=0.0))
+    @test_broken 0 == @allocated mapreduce(/, op, 1:3, 4:7; kw...)
+    @test_broken 0 == @allocated mapreduce(/, op, 1:3, 4:9; kw...)  # stops early
+    @test mapreduce(/, op, 1:3, 4:7; kw...) ≈ reduce(op, map(/, 1:3, 4:7); kw...)
+    @test mapreduce(/, op, 1:3, 4:9; kw...) ≈ reduce(op, map(/, 1:3, 4:9); kw...)
+
+    A = [1 2; 3 4]
+    B = [5 6; 7 8]
+
+    @test_broken 0 == @allocated mapreduce(*, op, A, B; kw...)  # LinearIndices
+    @test_broken 0 == @allocated mapreduce(*, op, A, B'; kw...)  # CartesianIndices
+
+    @test mapreduce(*, op, A, B; kw...) == reduce(op, map(*, A, B); kw...)
+    @test mapreduce(*, op, A, B'; kw...) == reduce(op, map(*, A, B'); kw...)
+
+    @test_broken 0 == @allocated mapreduce(*, op, A, 5:7; kw...)  # stops early
+    @test_broken 0 == @allocated mapreduce(*, op, 1:3, B'; kw...)  # stops early
+    @test mapreduce(*, op, A, 5:7; kw...) == reduce(op, map(*, A, 5:7); kw...)
+    @test mapreduce(*, op, 1:3, B'; kw...) == reduce(op, map(*, 1:3, B'); kw...)
+    @test mapreduce(*, op, 1:3, B', 10:20; kw...) == reduce(op, map(*, 1:3, B', 10:20); kw...)
+
+    @test_throws DimensionMismatch map(*, A, hcat(B, 9:10))  # same ndims, does not stop early
+    @test_throws DimensionMismatch mapreduce(*, op, A, hcat(B, 9:10); kw...)
+end
+
+@testset "mapreduce fast paths, dims=$dims, op=$op, kw=$kw" for dims in (1,2,[2],(1,2),3),
+                                                                op in (+,*,_add),
+                                                                kw in ((;), (; init=0.0))
+    (kw == (;) && op == _add) && continue
+
+    @test mapreduce(/, op, 1:3, 4:7; dims, kw...) ≈ reduce(op, map(/, 1:3, 4:7); dims, kw...)
+    @test mapreduce(/, op, 1:3, 4:9; dims, kw...) ≈ reduce(op, map(/, 1:3, 4:9); dims, kw...)
+
+    A = [1 2; 3 4]
+    B = [5 6; 7 8]
+    @test mapreduce(*, op, A, B; dims, kw...) == reduce(op, map(*, A, B); dims, kw...)  # LinearIndices
+    @test mapreduce(*, op, A, B'; dims, kw...) == reduce(op, map(*, A, B'); dims, kw...)  # CartesianIndices
+
+    @test_broken @allocated(mapreduce(*, op, A, B; dims, kw...)) < @allocated(reduce(op, map(*, A, B); dims, kw...))
+    @test_broken @allocated(mapreduce(*, op, A, B'; dims, kw...)) < @allocated(reduce(op, map(*, A, B'); dims, kw...))
+
+    @test mapreduce(*, op, A, 5:7; dims, kw...) == reduce(op, map(*, A, 5:7); dims, kw...)  # stops early
+    @test mapreduce(*, op, 1:3, B'; dims, kw...) == reduce(op, map(*, 1:3, B'); dims, kw...)
+
+    @test_throws DimensionMismatch mapreduce(*, +, A, hcat(B, 9:10); dims, kw...)
+end
+
+struct Infinity21097 <: Number
+end
+Base.:+(::Infinity21097,::Infinity21097) = Infinity21097()
+@testset "don't call zero on unknown numbers, issue #21097" begin
+    @test reduce(+, [Infinity21097()], init=Infinity21097()) == Infinity21097()
+    @test reduce(+, [Infinity21097()], init=Infinity21097(), dims=1) == [Infinity21097()]
+    @test reduce(+, [Infinity21097();;], init=Infinity21097(), dims=2) == [Infinity21097();;]
+    for len in [2,15,16,17,31,32,33,63,64,65,127,128,129]
+        for init in ((), (; init=Infinity21097()))
+            @test reduce(+,fill(Infinity21097(), len); init...) == Infinity21097()
+            @test reduce(+,fill(Infinity21097(), len, 2); init...) == Infinity21097()
+            @test reduce(+,fill(Infinity21097(), len, 16); init...) == Infinity21097()
+            @test reduce(+,fill(Infinity21097(), len); dims = 1, init...) == [Infinity21097()]
+            @test reduce(+,fill(Infinity21097(), len, 2); dims = 1, init...) == fill(Infinity21097(), 1, 2)
+            @test reduce(+,fill(Infinity21097(), len, 16); dims = 1, init...) == fill(Infinity21097(), 1, 16)
+            @test reduce(+,fill(Infinity21097(), len, 2); dims = 2, init...) == fill(Infinity21097(), len, 1)
+            @test reduce(+,fill(Infinity21097(), len, 16); dims = 2, init...) == fill(Infinity21097(), len, 1)
+        end
+    end
+end
+
+@testset "don't assume zero is the neutral element; issue #54875" begin
+    A = rand(3,4)
+    @test sum(I -> A[I], CartesianIndices(A); dims=1) == sum(I -> A[I], CartesianIndices(A); dims=1, init=0.)
+end
+
+@testset "cannot construct a value of type Union{} for return result; issue #43731" begin
+    a = collect(1.0:4)
+    f(x) = x < 3 ? missing : x
+    @test ismissing(minimum(f, a))
+    @test ismissing(minimum(f, a); dims = 1)
+end
+
+@testset "zero indices are not special, issue #38660" begin
+    v = OffsetVector([-1, 1], 0:1)
+    @test findmin(v, dims=1) == OffsetVector.(([-1], [0]), (0:0,))
+
+    A = rand(3,4,5)
+    OA = OffsetArray(A, (-1,-2,-3))
+    for dims in ((), 1, 2, 3, (1,2), (1,3), (2,3))
+        av, ai = findmin(A; dims)
+        ov, oi = findmin(OA; dims)
+        @test av == ov.parent
+        @test ai == oi.parent .+ CartesianIndex(1,2,3)
+    end
+end
diff --git a/test/testhelpers/OffsetArrays.jl b/test/testhelpers/OffsetArrays.jl
index f8da243da6b63..cb6c7b0ded4ea 100644
--- a/test/testhelpers/OffsetArrays.jl
+++ b/test/testhelpers/OffsetArrays.jl
@@ -197,7 +197,6 @@ Base.show(io::IO, r::IdOffsetRange) = print(io, IdOffsetRange, "(values=",first(
 
 # Optimizations
 @inline Base.checkindex(::Type{Bool}, inds::IdOffsetRange, i::Real) = Base.checkindex(Bool, inds.parent, i - inds.offset)
-Base._firstslice(r::IdOffsetRange) = IdOffsetRange(Base._firstslice(r.parent), r.offset)
 
 ########################################################################################################
 # origin.jl