Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add spreadmissings #122

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
305 changes: 192 additions & 113 deletions src/Missings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ abstract type AbstractSpread end
struct SpreadDefault <: AbstractSpread end
struct SpreadNonMissing <: AbstractSpread end
struct SpreadNone <: AbstractSpread end
struct SpreadAll <: AbstractSpread end

struct SpreadMissings{F, S <: AbstractSpread} <: Function
f::F
Expand Down Expand Up @@ -251,113 +252,173 @@ function new_args_subarray(args::Tuple, nonmissinginds::AbstractVector)
end
end

"""
maybespread_missing(
f::SpreadMissings,
newargs::Tuple,
new_kwargs,
vecs::Tuple,
nonmissinginds::AbstractVector{<:Integer},
nonmissingmask::AbstractVector{<:Bool})

Applied when `spreadmissing(f)(args...; kwargs...)` is called and
`args` or `kwargs` contain a `Vector{Union{T, Missing}}`.
"""
function spread_missing(
res::AbstractVector{T},
vecs::Tuple,
nonmissinginds::AbstractVector{<:Integer},
nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T}
Comment on lines +255 to +259
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
function spread_missing(
res::AbstractVector{T},
vecs::Tuple,
nonmissinginds::AbstractVector{<:Integer},
nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T}
function spread_missing(res::AbstractVector{T},
vecs::Tuple,
nonmissinginds::AbstractVector{<:Integer},
nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T}

Same below (for consistency with current style of the package).


if length(res) != length(nonmissinginds)
s = "When spreading a vector result with `spread=$(S)`, " *
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

S doesn't exist here. Apparently also needs testing.

"length of output must match number of jointly non-"
"missing values in inputs "
"(got $(length(res)) and $(length(nonmissinginds))).".

throw(DimensionMismatch(s))
end
out = similar(res, Union{eltype(res), Missing}, length(vecs[1]))
fill!(out, missing)
out[nonmissingmask] .= res
out
end

function maybespread_missing(
f::SpreadMissings{F, S},
newargs::Tuple,
new_kwargs,
res::T,
spread::SpreadDefault,
vecs::Tuple,
nonmissinginds::AbstractVector{<:Integer},
nonmissingmask::AbstractVector{<:Bool}) where {F, S}

res = f.f(newargs...; new_kwargs...)

spread = f.spread

if res isa AbstractVector
# Default and spread have the same behavior if
# output is a vector
if spread === SpreadDefault() || spread === SpreadNonMissing()
if length(res) != length(nonmissinginds)
s = "When spreading a vector result with `spread=$(S)`, " *
"length of output must match number of jointly non-"
"missing values in inputs "
"(got $(length(res)) and $(length(nonmissinginds))).".

throw(DimensionMismatch(s))
end
out = similar(res, Union{eltype(res), Missing}, length(vecs[1]))
fill!(out, missing)
out[nonmissingmask] .= res
elseif spread === SpreadNone()
out = res
else
throw(ArgumentError("Should not reach 1"))
end
else
if spread === SpreadNonMissing()
out = Vector{Union{typeof(res), Missing}}(undef, length(vecs[1]))
fill!(out, missing)
out[nonmissinginds] .= Ref(res)
elseif spread === SpreadDefault() || spread === SpreadNone()
out = res
else
throw(ArgumentError("Should not reach 2"))
end
nonmissingmask::AbstractVector{<:Bool})::T where{T}

res
end

function maybespread_missing(
res::AbstractVector{T},
spread::SpreadDefault,
vecs::Tuple,
nonmissinginds::AbstractVector{<:Integer},
nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T}

spread_missing(res, vecs, nonmissinginds, nonmissingmask)
end

function maybespread_missing(
res::T,
spread::SpreadNonMissing,
vecs::Tuple,
nonmissinginds::AbstractVector{<:Integer},
nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where{T}

out = Vector{Union{typeof(res), Missing}}(undef, length(vecs[1]))
fill!(out, missing)
out[nonmissinginds] .= Ref(res)
out
end

function maybespread_missing(
res::AbstractVector{T},
spread::SpreadNonMissing,
vecs::Tuple,
nonmissinginds::AbstractVector{<:Integer},
nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T}

spread_missing(res, vecs, nonmissinginds, nonmissingmask)
end

function maybespread_missing(
res::T,
spread::SpreadNone,
vecs::Tuple,
nonmissinginds::AbstractVector{<:Integer},
nonmissingmask::AbstractVector{<:Bool})::T where {T}

res
end

function maybespread_missing(
res::T,
spread::SpreadAll,
vecs::Tuple,
nonmissinginds::AbstractVector{<:Integer},
nonmissingmask::AbstractVector{<:Bool})::AbstractVector{T} where {T}

out = Vector{typeof(res)}(undef, length(first(vecs)))
out .= Ref(res)
out
end

function maybespread_missing(
res::AbstractVector,
spread::SpreadAll,
vecs::Tuple,
nonmissinginds::AbstractVector{<:Integer},
nonmissingmask::AbstractVector{<:Bool})

throw(ArgumentError("spreadmissings with :all on vector output is reserved"))
end

function spread_nomissing(
res::AbstractVector{T},
vecs::Tuple)::typeof(res) where {T}
Comment on lines +350 to +352
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For clarity, I would make this maybespread_nomissing(res::AbstractVector{T}, spread::SpreadNonMissing, vecs::Tuple). Same for maybespread_missing.


if length(res) != length(first(vecs))
s = "When spreading a vector result with `spread=$(S)`, " *
"length of output must match number of jointly non-"
"missing values in inputs "
"(got $(length(res)) and $(length(first(vecs)))).".
throw(DimensionMismatch(s))
end
res
end

function maybespread_nomissing(
res::T,
spread::SpreadDefault,
vecs::Tuple)::T where{T}

return out
res
end

"""
maybespread_nomissing(
f::SpreadMissings,
args::Tuple,
kwargs,
vecs::Tuple)

Applied when `spreadmissing(f)(args...; kwargs...)` is called and *neither*
`args` nor `kwargs` contain a `Vector{Union{T, Missing}}`.
"""
function maybespread_nomissing(
f::SpreadMissings{F, S},
args::Tuple,
kwargs,
vecs::Tuple) where {F, S}

res = f.f(args...; kwargs...)
spread = f.spread

if res isa AbstractVector
# Default and spread have the same behavior if
# output is a vector
if spread === SpreadDefault() || spread === SpreadNonMissing()
if length(res) != length(first(vecs))
s = "When spreading a vector result with `spread=$(S)`, " *
"length of output must match number of jointly non-"
"missing values in inputs "
"(got $(length(res)) and $(length(nonmissinginds))).".
throw(DimensionMismatch(s))
end
out = res
elseif spread === SpreadNone()
out = res
else
throw(ArgumentError("Should not reach 1"))
end
else
if spread === SpreadNonMissing()
out = Vector{typeof(res)}(undef, length(vecs[1]))
fill!(out, res)
elseif spread === SpreadDefault() || spread === SpreadNone()
out = res
else
throw(ArgumentError("Should not reach 2"))
end
end
res::AbstractVector{T},
spread::SpreadDefault,
vecs::Tuple)::typeof(res) where {T}

spread_nomissing(res, vecs)
end

function maybespread_nomissing(
res::T,
spread::SpreadNonMissing,
vecs::Tuple)::Vector{T} where{T}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are all these return type annotations needed? In many cases they seem redundant.


out = Vector{typeof(res)}(undef, length(vecs[1]))
fill!(out, res)
out
end

function maybespread_nomissing(
res::AbstractVector{T},
spread::SpreadNonMissing,
vecs::Tuple)::typeof(res) where {T}

spread_nomissing(res, vecs)
end

function maybespread_nomissing(
res::T,
spread::SpreadNone,
vecs::Tuple)::T where {T}

res
end

return out
function maybespread_nomissing(
res::T,
spread::SpreadAll,
vecs::Tuple)::AbstractVector{T} where {T}

out = Vector{typeof(res)}(undef, length(first(vecs)))
out .= Ref(res)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about calling maybspread_nomissing(res, SpreadNonMissing(), vecs)?

out
end

function maybespread_nomissing(
res::AbstractVector,
spread::SpreadAll,
vecs::Tuple)

throw(ArgumentError("spreadmissings with :all on vector output is reserved"))
end

function check_indices_match(vecs...)
Expand Down Expand Up @@ -399,12 +460,14 @@ function (f::SpreadMissings{F, S})(args...; kwargs...) where {F, S}
new_kwargs_vals = new_args_subarray(kwargs_vals, nonmissinginds)

new_kwargs = (k => v for (k, v) in zip(keys(kwargs), new_kwargs_vals))
maybespread_missing(f, newargs, new_kwargs, vecs, nonmissinginds, nonmissingmask)
res = f.f(newargs...; new_kwargs...)
maybespread_missing(res, f.spread, vecs, nonmissinginds, nonmissingmask)
# There is at least one vector, but none of the vectors can contain missing
elseif any(x -> x isa AbstractVector, xs)
vecs = Base.filter(x -> x isa AbstractVector, xs)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
vecs = Base.filter(x -> x isa AbstractVector, xs)
vecs = filter(x -> x isa AbstractVector, xs)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still applies.

check_indices_match(vecs...)
maybespread_nomissing(f, args, kwargs, vecs)
res = f.f(args...; kwargs...)
maybespread_nomissing(res, f.spread, vecs)
else
f.f(args...; kwargs...)
end
Expand Down Expand Up @@ -432,6 +495,10 @@ For each vector argument, `f` is passed a `SubArray`
view with an element type equal to `nonmissingtype(T)`,
with `T` the element type of the original argument.

If none of the input arguments are vectors of any kind,
`spreadmissings(f)` behaves exactly the same as `f`. No
pre or post-processing is done.

### Examples

```julia-repl
Expand Down Expand Up @@ -513,14 +580,20 @@ The `spread` keyword argument controls whether the output from
* If `output` is not a `Vector`, `output` is spread along non-missing
elements of the inputs.
* `:none`: output is returned directly, whether a vector or not.
* `:all`:
* If output is not a vector, it is spread over the full
length of the input vectors, not only the indices with
missing values with inputs.
* If the output is a vector, an error is thrown.

A summary of the behavior is given in the table below:

| spread \\ output type | Vector | Non-vector |
|:---------------------- |:------ |:-----------|
| :default | spread | return |
| :nonmissing | spread | spread |
| :none | return | return |
| spread \\ output type | Vector | Non-vector |
|:---------------------- |:------------------------------- |:------------------------------------|
| :default | spread over non-missing indices | return |
| :nonmissing | spread over non-missing indices | spread over non-missing indices |
| :none | return | return |
| :all | error | spread over all indices |

If there are `AbstractVector` inputs but none of these inputs are
`AbstractVector{>:Missing}`, the returned vectors will not allow
Expand Down Expand Up @@ -559,16 +632,23 @@ behaves the same as `f` regardless of `spread`.
elements of the original `x`.
2. `fillmean_skip` returns a vector which does not allow for `missing`, while
`spreadmissings(fillmean)` does.
"""

SpreadMissings(f, spread::Val{:default}) = SpreadMissings(f, SpreadDefault())
SpreadMissings(f, spread::Val{:nonmissing}) = SpreadMissings(f, SpreadNonMissing())
SpreadMissings(f, spread::Val{:none}) = SpreadMissings(f, SpreadNone())

Use the keyword `spread = :all` to emulate the `skipmissing` behavior.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Show an example?

"""
function spreadmissings(f; spread::Symbol = :default)
SpreadMissings(f, Val(spread))
# throw(ArgumentError("`spread` must be one of `:default`, `:nonmissing`, or `:none`"))
if spread === :default
SpreadMissings(f, SpreadDefault())
elseif spread === :nonmissing
SpreadMissings(f, SpreadNonMissing())
elseif spread === :none
SpreadMissings(f, SpreadNone())
elseif spread === :all
SpreadMissings(f, SpreadAll())
else
throw(ArgumentError("`spread` must be one of `:default`, `:nonmissing`, `:none`, or `:all`"))
end
end

"""
skipmissings(args...)

Expand All @@ -590,7 +670,6 @@ julia> collect(tx)
2-element Array{Int64,1}:
1
2

```
"""
function skipmissings(args...)
Expand Down
Loading