-
Notifications
You must be signed in to change notification settings - Fork 19
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add spreadmissings
#122
base: master
Are you sure you want to change the base?
Add spreadmissings
#122
Changes from 5 commits
a3a0cf5
2ed4b85
298a5df
b3163ce
60910e8
3b6621f
fdecc8b
4e77fa8
08045ef
6897e26
67be697
29d95c7
48ab861
4debddc
7db9f68
8bfb642
27a26f7
c625884
00b951a
c85a4c8
ea39189
e653c5a
788b9e9
4c0a744
50d8ffa
bfef988
c667ed8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -212,6 +212,7 @@ abstract type AbstractSpread end | |||||
struct SpreadDefault <: AbstractSpread end | ||||||
struct SpreadNonMissing <: AbstractSpread end | ||||||
struct SpreadNone <: AbstractSpread end | ||||||
struct SpreadAll <: AbstractSpread end | ||||||
|
||||||
struct SpreadMissings{F, S <: AbstractSpread} <: Function | ||||||
f::F | ||||||
|
@@ -251,113 +252,173 @@ function new_args_subarray(args::Tuple, nonmissinginds::AbstractVector) | |||||
end | ||||||
end | ||||||
|
||||||
""" | ||||||
maybespread_missing( | ||||||
f::SpreadMissings, | ||||||
newargs::Tuple, | ||||||
new_kwargs, | ||||||
vecs::Tuple, | ||||||
nonmissinginds::AbstractVector{<:Integer}, | ||||||
nonmissingmask::AbstractVector{<:Bool}) | ||||||
|
||||||
Applied when `spreadmissing(f)(args...; kwargs...)` is called and | ||||||
`args` or `kwargs` contain a `Vector{Union{T, Missing}}`. | ||||||
""" | ||||||
function spread_missing( | ||||||
res::AbstractVector{T}, | ||||||
vecs::Tuple, | ||||||
nonmissinginds::AbstractVector{<:Integer}, | ||||||
nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T} | ||||||
|
||||||
if length(res) != length(nonmissinginds) | ||||||
s = "When spreading a vector result with `spread=$(S)`, " * | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||
"length of output must match number of jointly non-" | ||||||
"missing values in inputs " | ||||||
"(got $(length(res)) and $(length(nonmissinginds))).". | ||||||
|
||||||
throw(DimensionMismatch(s)) | ||||||
end | ||||||
out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) | ||||||
fill!(out, missing) | ||||||
out[nonmissingmask] .= res | ||||||
out | ||||||
end | ||||||
|
||||||
function maybespread_missing( | ||||||
f::SpreadMissings{F, S}, | ||||||
newargs::Tuple, | ||||||
new_kwargs, | ||||||
res::T, | ||||||
spread::SpreadDefault, | ||||||
vecs::Tuple, | ||||||
nonmissinginds::AbstractVector{<:Integer}, | ||||||
nonmissingmask::AbstractVector{<:Bool}) where {F, S} | ||||||
|
||||||
res = f.f(newargs...; new_kwargs...) | ||||||
|
||||||
spread = f.spread | ||||||
|
||||||
if res isa AbstractVector | ||||||
# Default and spread have the same behavior if | ||||||
# output is a vector | ||||||
if spread === SpreadDefault() || spread === SpreadNonMissing() | ||||||
if length(res) != length(nonmissinginds) | ||||||
s = "When spreading a vector result with `spread=$(S)`, " * | ||||||
"length of output must match number of jointly non-" | ||||||
"missing values in inputs " | ||||||
"(got $(length(res)) and $(length(nonmissinginds))).". | ||||||
|
||||||
throw(DimensionMismatch(s)) | ||||||
end | ||||||
out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) | ||||||
fill!(out, missing) | ||||||
out[nonmissingmask] .= res | ||||||
elseif spread === SpreadNone() | ||||||
out = res | ||||||
else | ||||||
throw(ArgumentError("Should not reach 1")) | ||||||
end | ||||||
else | ||||||
if spread === SpreadNonMissing() | ||||||
out = Vector{Union{typeof(res), Missing}}(undef, length(vecs[1])) | ||||||
fill!(out, missing) | ||||||
out[nonmissinginds] .= Ref(res) | ||||||
elseif spread === SpreadDefault() || spread === SpreadNone() | ||||||
out = res | ||||||
else | ||||||
throw(ArgumentError("Should not reach 2")) | ||||||
end | ||||||
nonmissingmask::AbstractVector{<:Bool})::T where{T} | ||||||
|
||||||
res | ||||||
end | ||||||
|
||||||
function maybespread_missing( | ||||||
res::AbstractVector{T}, | ||||||
spread::SpreadDefault, | ||||||
vecs::Tuple, | ||||||
nonmissinginds::AbstractVector{<:Integer}, | ||||||
nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T} | ||||||
|
||||||
spread_missing(res, vecs, nonmissinginds, nonmissingmask) | ||||||
end | ||||||
|
||||||
function maybespread_missing( | ||||||
res::T, | ||||||
spread::SpreadNonMissing, | ||||||
vecs::Tuple, | ||||||
nonmissinginds::AbstractVector{<:Integer}, | ||||||
nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where{T} | ||||||
|
||||||
out = Vector{Union{typeof(res), Missing}}(undef, length(vecs[1])) | ||||||
fill!(out, missing) | ||||||
out[nonmissinginds] .= Ref(res) | ||||||
out | ||||||
end | ||||||
|
||||||
function maybespread_missing( | ||||||
res::AbstractVector{T}, | ||||||
spread::SpreadNonMissing, | ||||||
vecs::Tuple, | ||||||
nonmissinginds::AbstractVector{<:Integer}, | ||||||
nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T} | ||||||
|
||||||
spread_missing(res, vecs, nonmissinginds, nonmissingmask) | ||||||
end | ||||||
|
||||||
function maybespread_missing( | ||||||
res::T, | ||||||
spread::SpreadNone, | ||||||
vecs::Tuple, | ||||||
nonmissinginds::AbstractVector{<:Integer}, | ||||||
nonmissingmask::AbstractVector{<:Bool})::T where {T} | ||||||
|
||||||
res | ||||||
end | ||||||
|
||||||
function maybespread_missing( | ||||||
res::T, | ||||||
spread::SpreadAll, | ||||||
vecs::Tuple, | ||||||
nonmissinginds::AbstractVector{<:Integer}, | ||||||
nonmissingmask::AbstractVector{<:Bool})::AbstractVector{T} where {T} | ||||||
|
||||||
out = Vector{typeof(res)}(undef, length(first(vecs))) | ||||||
out .= Ref(res) | ||||||
out | ||||||
end | ||||||
|
||||||
function maybespread_missing( | ||||||
res::AbstractVector, | ||||||
spread::SpreadAll, | ||||||
vecs::Tuple, | ||||||
nonmissinginds::AbstractVector{<:Integer}, | ||||||
nonmissingmask::AbstractVector{<:Bool}) | ||||||
|
||||||
throw(ArgumentError("spreadmissings with :all on vector output is reserved")) | ||||||
end | ||||||
|
||||||
function spread_nomissing( | ||||||
res::AbstractVector{T}, | ||||||
vecs::Tuple)::typeof(res) where {T} | ||||||
Comment on lines
+350
to
+352
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For clarity, I would make this |
||||||
|
||||||
if length(res) != length(first(vecs)) | ||||||
s = "When spreading a vector result with `spread=$(S)`, " * | ||||||
"length of output must match number of jointly non-" | ||||||
"missing values in inputs " | ||||||
"(got $(length(res)) and $(length(first(vecs)))).". | ||||||
throw(DimensionMismatch(s)) | ||||||
end | ||||||
res | ||||||
end | ||||||
|
||||||
function maybespread_nomissing( | ||||||
res::T, | ||||||
spread::SpreadDefault, | ||||||
vecs::Tuple)::T where{T} | ||||||
|
||||||
return out | ||||||
res | ||||||
end | ||||||
|
||||||
""" | ||||||
maybespread_nomissing( | ||||||
f::SpreadMissings, | ||||||
args::Tuple, | ||||||
kwargs, | ||||||
vecs::Tuple) | ||||||
|
||||||
Applied when `spreadmissing(f)(args...; kwargs...)` is called and *neither* | ||||||
`args` nor `kwargs` contain a `Vector{Union{T, Missing}}`. | ||||||
""" | ||||||
function maybespread_nomissing( | ||||||
f::SpreadMissings{F, S}, | ||||||
args::Tuple, | ||||||
kwargs, | ||||||
vecs::Tuple) where {F, S} | ||||||
|
||||||
res = f.f(args...; kwargs...) | ||||||
spread = f.spread | ||||||
|
||||||
if res isa AbstractVector | ||||||
# Default and spread have the same behavior if | ||||||
# output is a vector | ||||||
if spread === SpreadDefault() || spread === SpreadNonMissing() | ||||||
if length(res) != length(first(vecs)) | ||||||
s = "When spreading a vector result with `spread=$(S)`, " * | ||||||
"length of output must match number of jointly non-" | ||||||
"missing values in inputs " | ||||||
"(got $(length(res)) and $(length(nonmissinginds))).". | ||||||
throw(DimensionMismatch(s)) | ||||||
end | ||||||
out = res | ||||||
elseif spread === SpreadNone() | ||||||
out = res | ||||||
else | ||||||
throw(ArgumentError("Should not reach 1")) | ||||||
end | ||||||
else | ||||||
if spread === SpreadNonMissing() | ||||||
out = Vector{typeof(res)}(undef, length(vecs[1])) | ||||||
fill!(out, res) | ||||||
elseif spread === SpreadDefault() || spread === SpreadNone() | ||||||
out = res | ||||||
else | ||||||
throw(ArgumentError("Should not reach 2")) | ||||||
end | ||||||
end | ||||||
res::AbstractVector{T}, | ||||||
spread::SpreadDefault, | ||||||
vecs::Tuple)::typeof(res) where {T} | ||||||
|
||||||
spread_nomissing(res, vecs) | ||||||
end | ||||||
|
||||||
function maybespread_nomissing( | ||||||
res::T, | ||||||
spread::SpreadNonMissing, | ||||||
vecs::Tuple)::Vector{T} where{T} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are all these return type annotations needed? In many cases they seem redundant. |
||||||
|
||||||
out = Vector{typeof(res)}(undef, length(vecs[1])) | ||||||
fill!(out, res) | ||||||
out | ||||||
end | ||||||
|
||||||
function maybespread_nomissing( | ||||||
res::AbstractVector{T}, | ||||||
spread::SpreadNonMissing, | ||||||
vecs::Tuple)::typeof(res) where {T} | ||||||
|
||||||
spread_nomissing(res, vecs) | ||||||
end | ||||||
|
||||||
function maybespread_nomissing( | ||||||
res::T, | ||||||
spread::SpreadNone, | ||||||
vecs::Tuple)::T where {T} | ||||||
|
||||||
res | ||||||
end | ||||||
|
||||||
return out | ||||||
function maybespread_nomissing( | ||||||
res::T, | ||||||
spread::SpreadAll, | ||||||
vecs::Tuple)::AbstractVector{T} where {T} | ||||||
|
||||||
out = Vector{typeof(res)}(undef, length(first(vecs))) | ||||||
out .= Ref(res) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about calling |
||||||
out | ||||||
end | ||||||
|
||||||
function maybespread_nomissing( | ||||||
res::AbstractVector, | ||||||
spread::SpreadAll, | ||||||
vecs::Tuple) | ||||||
|
||||||
throw(ArgumentError("spreadmissings with :all on vector output is reserved")) | ||||||
end | ||||||
|
||||||
function check_indices_match(vecs...) | ||||||
|
@@ -399,12 +460,14 @@ function (f::SpreadMissings{F, S})(args...; kwargs...) where {F, S} | |||||
new_kwargs_vals = new_args_subarray(kwargs_vals, nonmissinginds) | ||||||
|
||||||
new_kwargs = (k => v for (k, v) in zip(keys(kwargs), new_kwargs_vals)) | ||||||
maybespread_missing(f, newargs, new_kwargs, vecs, nonmissinginds, nonmissingmask) | ||||||
res = f.f(newargs...; new_kwargs...) | ||||||
maybespread_missing(res, f.spread, vecs, nonmissinginds, nonmissingmask) | ||||||
# There is at least one vector, but none of the vectors can contain missing | ||||||
elseif any(x -> x isa AbstractVector, xs) | ||||||
vecs = Base.filter(x -> x isa AbstractVector, xs) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still applies. |
||||||
check_indices_match(vecs...) | ||||||
maybespread_nomissing(f, args, kwargs, vecs) | ||||||
res = f.f(args...; kwargs...) | ||||||
maybespread_nomissing(res, f.spread, vecs) | ||||||
else | ||||||
f.f(args...; kwargs...) | ||||||
end | ||||||
|
@@ -432,6 +495,10 @@ For each vector argument, `f` is passed a `SubArray` | |||||
view with an element type equal to `nonmissingtype(T)`, | ||||||
with `T` the element type of the original argument. | ||||||
|
||||||
If none of the input arguments are vectors of any kind, | ||||||
`spreadmissings(f)` behaves exactly the same as `f`. No | ||||||
pre or post-processing is done. | ||||||
|
||||||
### Examples | ||||||
|
||||||
```julia-repl | ||||||
|
@@ -513,14 +580,20 @@ The `spread` keyword argument controls whether the output from | |||||
* If `output` is not a `Vector`, `output` is spread along non-missing | ||||||
elements of the inputs. | ||||||
* `:none`: output is returned directly, whether a vector or not. | ||||||
* `:all`: | ||||||
* If output is not a vector, it is spread over the full | ||||||
length of the input vectors, not only the indices with | ||||||
missing values with inputs. | ||||||
* If the output is a vector, an error is thrown. | ||||||
|
||||||
A summary of the behavior is given in the table below: | ||||||
|
||||||
| spread \\ output type | Vector | Non-vector | | ||||||
|:---------------------- |:------ |:-----------| | ||||||
| :default | spread | return | | ||||||
| :nonmissing | spread | spread | | ||||||
| :none | return | return | | ||||||
| spread \\ output type | Vector | Non-vector | | ||||||
|:---------------------- |:------------------------------- |:------------------------------------| | ||||||
| :default | spread over non-missing indices | return | | ||||||
| :nonmissing | spread over non-missing indices | spread over non-missing indices | | ||||||
| :none | return | return | | ||||||
| :all | error | spread over all indices | | ||||||
|
||||||
If there are `AbstractVector` inputs but none of these inputs are | ||||||
`AbstractVector{>:Missing}`, the returned vectors will not allow | ||||||
|
@@ -559,16 +632,23 @@ behaves the same as `f` regardless of `spread`. | |||||
elements of the original `x`. | ||||||
2. `fillmean_skip` returns a vector which does not allow for `missing`, while | ||||||
`spreadmissings(fillmean)` does. | ||||||
""" | ||||||
|
||||||
SpreadMissings(f, spread::Val{:default}) = SpreadMissings(f, SpreadDefault()) | ||||||
SpreadMissings(f, spread::Val{:nonmissing}) = SpreadMissings(f, SpreadNonMissing()) | ||||||
SpreadMissings(f, spread::Val{:none}) = SpreadMissings(f, SpreadNone()) | ||||||
|
||||||
Use the keyword `spread = :all` to emulate the `skipmissing` behavior. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Show an example? |
||||||
""" | ||||||
function spreadmissings(f; spread::Symbol = :default) | ||||||
SpreadMissings(f, Val(spread)) | ||||||
# throw(ArgumentError("`spread` must be one of `:default`, `:nonmissing`, or `:none`")) | ||||||
if spread === :default | ||||||
SpreadMissings(f, SpreadDefault()) | ||||||
elseif spread === :nonmissing | ||||||
SpreadMissings(f, SpreadNonMissing()) | ||||||
elseif spread === :none | ||||||
SpreadMissings(f, SpreadNone()) | ||||||
elseif spread === :all | ||||||
SpreadMissings(f, SpreadAll()) | ||||||
else | ||||||
throw(ArgumentError("`spread` must be one of `:default`, `:nonmissing`, `:none`, or `:all`")) | ||||||
end | ||||||
end | ||||||
|
||||||
""" | ||||||
skipmissings(args...) | ||||||
|
||||||
|
@@ -590,7 +670,6 @@ julia> collect(tx) | |||||
2-element Array{Int64,1}: | ||||||
1 | ||||||
2 | ||||||
|
||||||
``` | ||||||
""" | ||||||
function skipmissings(args...) | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same below (for consistency with current style of the package).