diff --git a/NEWS.md b/NEWS.md index 63d22b8be96e4..ba6799ae997c8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -52,6 +52,13 @@ This section lists changes that do not have deprecation warnings. * `broadcast` now treats `Ref` (except for `Ptr`) arguments as 0-dimensional arrays ([#18965]). + * `broadcast` now handles missing data (`Nullable`s) allowing operations to + be lifted over `Nullable`s, as if the `Nullable` were like an array with + zero or one element. ([#16961]). Note that many situations where `Nullable` + types had been treated like scalars before will no longer work. For + example, `get.(xs)` on `xs::Array{T <: Nullable}` will now treat the + nullables as a container, and attempt to operate on the data contained. + This use case will need to be migrated to `map(get, xs)`. * The runtime now enforces when new method definitions can take effect ([#17057]). The flip-side of this is that new method definitions should now reliably actually @@ -109,6 +116,10 @@ Library improvements * Additional methods for `ones` and `zeros` functions to support the same signature as the `similar` function ([#19635]). + * Methods for `map` and `filter` with `Nullable` arguments have been + implemented; the semantics are as if the `Nullable` were a container with + zero or one elements ([#16961]). + Compiler/Runtime improvements ----------------------------- @@ -620,6 +631,7 @@ Language tooling improvements calling C++ code from Julia. +[#265]: https://github.com/JuliaLang/julia/issues/265 [#550]: https://github.com/JuliaLang/julia/issues/550 [#964]: https://github.com/JuliaLang/julia/issues/964 [#1090]: https://github.com/JuliaLang/julia/issues/1090 @@ -731,10 +743,12 @@ Language tooling improvements [#16731]: https://github.com/JuliaLang/julia/issues/16731 [#16854]: https://github.com/JuliaLang/julia/issues/16854 [#16953]: https://github.com/JuliaLang/julia/issues/16953 +[#16961]: https://github.com/JuliaLang/julia/issues/16961 [#16972]: https://github.com/JuliaLang/julia/issues/16972 [#16986]: https://github.com/JuliaLang/julia/issues/16986 [#17033]: https://github.com/JuliaLang/julia/issues/17033 [#17037]: https://github.com/JuliaLang/julia/issues/17037 +[#17057]: https://github.com/JuliaLang/julia/issues/17057 [#17075]: https://github.com/JuliaLang/julia/issues/17075 [#17132]: https://github.com/JuliaLang/julia/issues/17132 [#17261]: https://github.com/JuliaLang/julia/issues/17261 @@ -748,17 +762,17 @@ Language tooling improvements [#17510]: https://github.com/JuliaLang/julia/issues/17510 [#17546]: https://github.com/JuliaLang/julia/issues/17546 [#17599]: https://github.com/JuliaLang/julia/issues/17599 +[#17623]: https://github.com/JuliaLang/julia/issues/17623 [#17668]: https://github.com/JuliaLang/julia/issues/17668 [#17758]: https://github.com/JuliaLang/julia/issues/17758 [#17785]: https://github.com/JuliaLang/julia/issues/17785 [#18330]: https://github.com/JuliaLang/julia/issues/18330 [#18339]: https://github.com/JuliaLang/julia/issues/18339 [#18346]: https://github.com/JuliaLang/julia/issues/18346 -[#18442]: https://github.com/JuliaLang/julia/issues/18442 [#18473]: https://github.com/JuliaLang/julia/issues/18473 +[#18628]: https://github.com/JuliaLang/julia/issues/18628 [#18644]: https://github.com/JuliaLang/julia/issues/18644 [#18690]: https://github.com/JuliaLang/julia/issues/18690 -[#18628]: https://github.com/JuliaLang/julia/issues/18628 [#18839]: https://github.com/JuliaLang/julia/issues/18839 [#18931]: https://github.com/JuliaLang/julia/issues/18931 [#18965]: https://github.com/JuliaLang/julia/issues/18965 @@ -768,3 +782,6 @@ Language tooling improvements [#19288]: https://github.com/JuliaLang/julia/issues/19288 [#19305]: https://github.com/JuliaLang/julia/issues/19305 [#19469]: https://github.com/JuliaLang/julia/issues/19469 +[#19543]: https://github.com/JuliaLang/julia/issues/19543 +[#19598]: https://github.com/JuliaLang/julia/issues/19598 +[#19635]: https://github.com/JuliaLang/julia/issues/19635 diff --git a/base/broadcast.jl b/base/broadcast.jl index 40908df116f13..a5163e3cd181b 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -4,17 +4,27 @@ module Broadcast using Base.Cartesian using Base: promote_eltype_op, linearindices, tail, OneTo, to_shape, - _msk_end, unsafe_bitgetindex, bitcache_chunks, bitcache_size, dumpbitcache + _msk_end, unsafe_bitgetindex, bitcache_chunks, bitcache_size, dumpbitcache, + nullable_returntype, null_safe_eltype_op, hasvalue, is_nullable_array import Base: broadcast, broadcast! export bitbroadcast, dotview export broadcast_getindex, broadcast_setindex! ## Broadcasting utilities ## +broadcast_array_type() = Array +broadcast_array_type(A, As...) = + if is_nullable_array(A) || broadcast_array_type(As...) === Array{Nullable} + Array{Nullable} + else + Array + end + # fallbacks for some special cases @inline broadcast(f, x::Number...) = f(x...) @inline broadcast{N}(f, t::NTuple{N}, ts::Vararg{NTuple{N}}) = map(f, t, ts...) -@inline broadcast(f, As::AbstractArray...) = broadcast_c(f, Array, As...) +@inline broadcast(f, As::AbstractArray...) = + broadcast_c(f, broadcast_array_type(As...), As...) # special cases for "X .= ..." (broadcast!) assignments broadcast!(::typeof(identity), X::AbstractArray, x::Number) = fill!(X, x) @@ -30,7 +40,9 @@ containertype(::Type) = Any containertype{T<:Ptr}(::Type{T}) = Any containertype{T<:Tuple}(::Type{T}) = Tuple containertype{T<:Ref}(::Type{T}) = Array -containertype{T<:AbstractArray}(::Type{T}) = Array +containertype{T<:AbstractArray}(::Type{T}) = + is_nullable_array(T) ? Array{Nullable} : Array +containertype{T<:Nullable}(::Type{T}) = Nullable containertype(ct1, ct2) = promote_containertype(containertype(ct1), containertype(ct2)) @inline containertype(ct1, ct2, cts...) = promote_containertype(containertype(ct1), containertype(ct2, cts...)) @@ -39,16 +51,26 @@ promote_containertype(::Type{Array}, ct) = Array promote_containertype(ct, ::Type{Array}) = Array promote_containertype(::Type{Tuple}, ::Type{Any}) = Tuple promote_containertype(::Type{Any}, ::Type{Tuple}) = Tuple +promote_containertype(::Type{Any}, ::Type{Nullable}) = Nullable +promote_containertype(::Type{Nullable}, ::Type{Any}) = Nullable +promote_containertype(::Type{Nullable}, ::Type{Array}) = Array{Nullable} +promote_containertype(::Type{Array}, ::Type{Nullable}) = Array{Nullable} +promote_containertype(::Type{Array{Nullable}}, ::Type{Array{Nullable}}) = + Array{Nullable} +promote_containertype(::Type{Array{Nullable}}, ::Type{Array}) = Array{Nullable} +promote_containertype(::Type{Array}, ::Type{Array{Nullable}}) = Array{Nullable} +promote_containertype(::Type{Array{Nullable}}, ct) = Array{Nullable} +promote_containertype(ct, ::Type{Array{Nullable}}) = Array{Nullable} promote_containertype{T}(::Type{T}, ::Type{T}) = T ## Calculate the broadcast indices of the arguments, or error if incompatible # array inputs broadcast_indices() = () broadcast_indices(A) = broadcast_indices(containertype(A), A) -broadcast_indices(::Type{Any}, A) = () +broadcast_indices(::Union{Type{Any}, Type{Nullable}}, A) = () broadcast_indices(::Type{Tuple}, A) = (OneTo(length(A)),) -broadcast_indices(::Type{Array}, A) = indices(A) broadcast_indices(::Type{Array}, A::Ref) = () +broadcast_indices{T<:Array}(::Type{T}, A) = indices(A) @inline broadcast_indices(A, B...) = broadcast_shape((), broadcast_indices(A), map(broadcast_indices, B)...) # shape (i.e., tuple-of-indices) inputs broadcast_shape(shape::Tuple) = shape @@ -123,6 +145,8 @@ end Base.@propagate_inbounds _broadcast_getindex(A, I) = _broadcast_getindex(containertype(A), A, I) Base.@propagate_inbounds _broadcast_getindex(::Type{Array}, A::Ref, I) = A[] Base.@propagate_inbounds _broadcast_getindex(::Type{Any}, A, I) = A +Base.@propagate_inbounds _broadcast_getindex(::Union{Type{Any}, + Type{Nullable}}, A, I) = A Base.@propagate_inbounds _broadcast_getindex(::Any, A, I) = A[I] ## Broadcasting core @@ -272,19 +296,29 @@ end ftype(f, A) = typeof(f) ftype(f, A...) = typeof(a -> f(a...)) ftype(T::Type, A...) = Type{T} -typestuple(a) = (Base.@_pure_meta; Tuple{eltype(a)}) -typestuple(T::Type) = (Base.@_pure_meta; Tuple{Type{T}}) -typestuple(a, b...) = (Base.@_pure_meta; Tuple{typestuple(a).types..., typestuple(b...).types...}) -ziptype(A) = typestuple(A) -ziptype(A, B) = (Base.@_pure_meta; Iterators.Zip2{typestuple(A), typestuple(B)}) -@inline ziptype(A, B, C, D...) = Iterators.Zip{typestuple(A), ziptype(B, C, D...)} -_broadcast_type(f, T::Type, As...) = Base._return_type(f, typestuple(T, As...)) -_broadcast_type(f, A, Bs...) = Base._default_eltype(Base.Generator{ziptype(A, Bs...), ftype(f, A, Bs...)}) +# nullables need to be treated like scalars sometimes and like containers +# other times, so there are two variants of typestuple. + +# if the first argument is Any, then Nullable should be treated like a +# scalar; if the first argument is Array, then Nullable should be treated +# like a container. +typestuple(::Type, a) = (Base.@_pure_meta; Tuple{eltype(a)}) +typestuple(::Type{Any}, a::Nullable) = (Base.@_pure_meta; Tuple{typeof(a)}) +typestuple(::Type, T::Type) = (Base.@_pure_meta; Tuple{Type{T}}) +typestuple{T}(::Type{T}, a, b...) = (Base.@_pure_meta; Tuple{typestuple(T, a).types..., typestuple(T, b...).types...}) + +# these functions take the variant of typestuple to be used as first argument +ziptype{T}(::Type{T}, A) = typestuple(T, A) +ziptype{T}(::Type{T}, A, B) = (Base.@_pure_meta; Iterators.Zip2{typestuple(T, A), typestuple(T, B)}) +@inline ziptype{T}(::Type{T}, A, B, C, D...) = Iterators.Zip{typestuple(T, A), ziptype(T, B, C, D...)} + +_broadcast_type{S}(::Type{S}, f, T::Type, As...) = Base._return_type(S, typestuple(S, T, As...)) +_broadcast_type{T}(::Type{T}, f, A, Bs...) = Base._default_eltype(Base.Generator{ziptype(T, A, Bs...), ftype(f, A, Bs...)}) # broadcast methods that dispatch on the type of the final container @inline function broadcast_c(f, ::Type{Array}, A, Bs...) - T = _broadcast_type(f, A, Bs...) + T = _broadcast_type(Any, f, A, Bs...) shape = broadcast_indices(A, Bs...) iter = CartesianRange(shape) if isleaftype(T) @@ -295,20 +329,41 @@ _broadcast_type(f, A, Bs...) = Base._default_eltype(Base.Generator{ziptype(A, Bs end return broadcast_t(f, Any, shape, iter, A, Bs...) end +@inline function broadcast_c(f, ::Type{Array{Nullable}}, A, Bs...) + @inline rec(x) = broadcast(f, x) + @inline rec(x, y) = broadcast(f, x, y) + @inline rec(x, y, z) = broadcast(f, x, y, z) + @inline rec(xs...) = broadcast(f, xs...) + broadcast_c(rec, Array, A, Bs...) +end function broadcast_c(f, ::Type{Tuple}, As...) shape = broadcast_indices(As...) n = length(shape[1]) return ntuple(k->f((_broadcast_getindex(A, k) for A in As)...), n) end +@inline function broadcast_c(f, ::Type{Nullable}, a...) + nonnull = all(hasvalue, a) + S = _broadcast_type(Array, f, a...) + if isleaftype(S) && null_safe_eltype_op(f, a...) + Nullable{S}(f(map(unsafe_get, a)...), nonnull) + else + if nonnull + Nullable(f(map(unsafe_get, a)...)) + else + Nullable{nullable_returntype(S)}() + end + end +end @inline broadcast_c(f, ::Type{Any}, a...) = f(a...) """ broadcast(f, As...) -Broadcasts the arrays, tuples, `Ref` and/or scalars `As` to a container of the -appropriate type and dimensions. In this context, anything that is not a -subtype of `AbstractArray`, `Ref` (except for `Ptr`s) or `Tuple` is considered -a scalar. The resulting container is established by the following rules: +Broadcasts the arrays, tuples, `Ref`, nullables, and/or scalars `As` to a +container of the appropriate type and dimensions. In this context, anything +that is not a subtype of `AbstractArray`, `Ref` (except for `Ptr`s) or `Tuple`, +or `Nullable` is considered a scalar. The resulting container is established by +the following rules: - If all the arguments are scalars, it returns a scalar. - If the arguments are tuples and zero or more scalars, it returns a tuple. @@ -316,6 +371,17 @@ a scalar. The resulting container is established by the following rules: (and treats any `Ref` as a 0-dimensional array of its contents and any tuple as a 1-dimensional array) expanding singleton dimensions. +The following additional rules apply to `Nullable` arguments: + + - If there is at least a `Nullable`, and all the arguments are scalars or + `Nullable`, it returns a `Nullable`. + - If there is at least an array or a `Ref` with `Nullable` entries, or there + is at least an array or a `Ref` (perhaps with scalar entries instead of + `Nullable` entries) and a nullable, then the result is an array of + `Nullable` entries. + - If there is a tuple and a nullable, the result is an error, as this case is + not currently supported. + A special syntax exists for broadcasting: `f.(args...)` is equivalent to `broadcast(f, args...)`, and nested `f.(g.(args...))` calls are fused into a single broadcast loop. @@ -372,6 +438,28 @@ julia> string.(("one","two","three","four"), ": ", 1:4) "two: 2" "three: 3" "four: 4" + +julia> Nullable("X") .* "Y" +Nullable{String}("XY") + +julia> broadcast(/, 1.0, Nullable(2.0)) +Nullable{Float64}(0.5) + +julia> [Nullable(1), Nullable(2), Nullable()] .* 3 +3-element Array{Nullable{Int64},1}: + 3 + 6 + #NULL + +julia> [1+im, 2+2im, 3+3im] ./ Nullable{Int}() +3-element Array{Nullable{Complex{Float64}},1}: + #NULL + #NULL + #NULL + +julia> Ref(7) .+ Nullable(3) +0-dimensional Array{Nullable{Int64},0}: +10 ``` """ @inline broadcast(f, A, Bs...) = broadcast_c(f, containertype(A, Bs...), A, Bs...) diff --git a/base/nullable.jl b/base/nullable.jl index 3b333d2612de9..e83c568030864 100644 --- a/base/nullable.jl +++ b/base/nullable.jl @@ -175,18 +175,29 @@ vectorization. """ null_safe_op(f::Any, ::Type, ::Type...) = false -typealias NullSafeSignedInts Union{Int128, Int16, Int32, Int64, Int8} -typealias NullSafeUnsignedInts Union{Bool, UInt128, UInt16, UInt32, UInt64, UInt8} +typealias NullSafeSignedInts Union{Type{Int128}, Type{Int16}, Type{Int32}, + Type{Int64}, Type{Int8}} +typealias NullSafeUnsignedInts Union{Type{Bool}, Type{UInt128}, Type{UInt16}, + Type{UInt32}, Type{UInt64}, Type{UInt8}} typealias NullSafeInts Union{NullSafeSignedInts, NullSafeUnsignedInts} -typealias NullSafeFloats Union{Float16, Float32, Float64} +typealias NullSafeFloats Union{Type{Float16}, Type{Float32}, Type{Float64}} typealias NullSafeTypes Union{NullSafeInts, NullSafeFloats} +typealias EqualOrLess Union{typeof(isequal), typeof(isless)} -null_safe_op{S<:NullSafeTypes, - T<:NullSafeTypes}(::typeof(isequal), ::Type{S}, ::Type{T}) = true -null_safe_op{S<:NullSafeTypes, - T<:NullSafeTypes}(::typeof(isequal), ::Type{Complex{S}}, ::Type{Complex{T}}) = true -null_safe_op{S<:NullSafeTypes, - T<:NullSafeTypes}(::typeof(isequal), ::Type{Rational{S}}, ::Type{Rational{T}}) = true +null_safe_op{T}(::typeof(identity), ::Type{T}) = isbits(T) + +eltypes() = Tuple{} +eltypes(x, xs...) = Tuple{eltype(x), eltypes(xs...).parameters...} + +@pure null_safe_eltype_op(op, xs...) = + null_safe_op(op, eltypes(xs...).parameters...) + +null_safe_op(f::EqualOrLess, ::NullSafeTypes, ::NullSafeTypes) = true +null_safe_op{S,T}(f::EqualOrLess, ::Type{Rational{S}}, ::Type{T}) = + null_safe_op(f, T, S) +# complex numbers can be compared for equality but not in general ordered +null_safe_op{S,T}(::typeof(isequal), ::Type{Complex{S}}, ::Type{T}) = + null_safe_op(isequal, T, S) """ isequal(x::Nullable, y::Nullable) @@ -207,13 +218,6 @@ isequal(x::Nullable{Union{}}, y::Nullable{Union{}}) = true isequal(x::Nullable{Union{}}, y::Nullable) = isnull(y) isequal(x::Nullable, y::Nullable{Union{}}) = isnull(x) -null_safe_op{S<:NullSafeTypes, - T<:NullSafeTypes}(::typeof(isless), ::Type{S}, ::Type{T}) = true -null_safe_op{S<:NullSafeTypes, - T<:NullSafeTypes}(::typeof(isless), ::Type{Complex{S}}, ::Type{Complex{T}}) = true -null_safe_op{S<:NullSafeTypes, - T<:NullSafeTypes}(::typeof(isless), ::Type{Rational{S}}, ::Type{Rational{T}}) = true - """ isless(x::Nullable, y::Nullable) @@ -246,3 +250,89 @@ function hash(x::Nullable, h::UInt) return hash(x.value, h + nullablehash_seed) end end + +# higher-order functions +""" + filter(p, x::Nullable) + +Return null if either `x` is null or `p(get(x))` is false, and `x` otherwise. +""" +function filter{T}(p, x::Nullable{T}) + if isbits(T) + val = unsafe_get(x) + Nullable{T}(val, !isnull(x) && p(val)) + else + isnull(x) || p(unsafe_get(x)) ? x : Nullable{T}() + end +end + +""" +Return the given type if it is concrete, and `Union{}` otherwise. +""" +nullable_returntype{T}(::Type{T}) = isleaftype(T) ? T : Union{} + +""" + map(f, x::Nullable) + +Return `f` applied to the value of `x` if it has one, as a `Nullable`. If `x` +is null, then return a null value of type `Nullable{S}`. `S` is guaranteed to +be either `Union{}` or a concrete type. Whichever of these is chosen is an +implementation detail, but typically the choice that maximizes performance +would be used. If `x` has a value, then the return type is guaranteed to be of +type `Nullable{typeof(f(x))}`. +""" +function map{T}(f, x::Nullable{T}) + S = promote_op(f, T) + if isleaftype(S) && null_safe_op(f, T) + Nullable(f(unsafe_get(x)), !isnull(x)) + else + if isnull(x) + Nullable{nullable_returntype(S)}() + else + Nullable(f(unsafe_get(x))) + end + end +end + +# We need the following function and specializations because LLVM cannot +# optimize !any(isnull, t) without further guidance. +hasvalue(x::Nullable) = x.hasvalue +hasvalue(x) = true +all(f::typeof(hasvalue), t::Tuple) = f(t[1]) & all(f, tail(t)) +all(f::typeof(hasvalue), t::Tuple{}) = true + +is_nullable_array(::Any) = false +is_nullable_array{T}(::Type{T}) = eltype(T) <: Nullable +is_nullable_array(A::AbstractArray) = eltype(A) <: Nullable + +# Overloads of null_safe_op +# Unary operators + +# Note this list does not include sqrt since it can raise a DomainError +for op in (+, -, abs, abs2) + null_safe_op(::typeof(op), ::NullSafeTypes) = true + null_safe_op{S}(::typeof(op), ::Type{Complex{S}}) = null_safe_op(op, S) + null_safe_op{S}(::typeof(op), ::Type{Rational{S}}) = null_safe_op(op, S) +end + +null_safe_op(::typeof(~), ::NullSafeInts) = true +null_safe_op(::typeof(!), ::Type{Bool}) = true + +# Binary operators + +# Note this list does not include ^, ÷ and % +# Operations between signed and unsigned types are not safe: promotion to unsigned +# gives an InexactError for negative numbers +for op in (+, -, *, /, &, |, <<, >>, >>>, + scalarmin, scalarmax) + # to fix ambiguities + null_safe_op(::typeof(op), ::NullSafeFloats, ::NullSafeFloats) = true + null_safe_op(::typeof(op), ::NullSafeSignedInts, ::NullSafeSignedInts) = true + null_safe_op(::typeof(op), ::NullSafeUnsignedInts, ::NullSafeUnsignedInts) = true +end +for op in (+, -, *, /) + null_safe_op{S,T}(::typeof(op), ::Type{Complex{S}}, ::Type{T}) = + null_safe_op(op, T, S) + null_safe_op{S,T}(::typeof(op), ::Type{Rational{S}}, ::Type{T}) = + null_safe_op(op, T, S) +end diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl index e2da90d672359..fd974d8c98354 100644 --- a/base/sparse/sparsematrix.jl +++ b/base/sparse/sparsematrix.jl @@ -1413,7 +1413,7 @@ function map{Tf,N}(f::Tf, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N}) fofzeros = f(_zeros_eltypes(A, Bs...)...) fpreszeros = fofzeros == zero(fofzeros) maxnnzC = fpreszeros ? min(length(A), _sumnnzs(A, Bs...)) : length(A) - entrytypeC = _broadcast_type(f, A, Bs...) + entrytypeC = _broadcast_type(Any, f, A, Bs...) indextypeC = _promote_indtype(A, Bs...) Ccolptr = Vector{indextypeC}(A.n + 1) Crowval = Vector{indextypeC}(maxnnzC) @@ -1443,7 +1443,7 @@ function broadcast{Tf,N}(f::Tf, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N fofzeros = f(_zeros_eltypes(A, Bs...)...) fpreszeros = fofzeros == zero(fofzeros) indextypeC = _promote_indtype(A, Bs...) - entrytypeC = _broadcast_type(f, A, Bs...) + entrytypeC = _broadcast_type(Any, f, A, Bs...) Cm, Cn = Base.to_shape(Base.Broadcast.broadcast_indices(A, Bs...)) maxnnzC = fpreszeros ? _checked_maxnnzbcres(Cm, Cn, A, Bs...) : (Cm * Cn) Ccolptr = Vector{indextypeC}(Cn + 1) diff --git a/base/sysimg.jl b/base/sysimg.jl index a25f2ac5895e9..f892e5ed78f9a 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -173,6 +173,10 @@ using .Cartesian include("multidimensional.jl") include("permuteddimsarray.jl") using .PermutedDimsArrays + +# nullable types +include("nullable.jl") + include("broadcast.jl") importall .Broadcast @@ -180,9 +184,6 @@ importall .Broadcast include("base64.jl") importall .Base64 -# nullable types -include("nullable.jl") - # version include("version.jl") diff --git a/doc/src/manual/types.md b/doc/src/manual/types.md index 3589b5167cab3..3f2364adfb889 100644 --- a/doc/src/manual/types.md +++ b/doc/src/manual/types.md @@ -1239,7 +1239,7 @@ In many settings, you need to interact with a value of type `T` that may or may handle these settings, Julia provides a parametric type called `Nullable{T}`, which can be thought of as a specialized container type that can contain either zero or one values. `Nullable{T}` provides a minimal interface designed to ensure that interactions with missing values are safe. At present, -the interface consists of four possible interactions: +the interface consists of several possible interactions: * Construct a [`Nullable`](@ref) object. * Check if a [`Nullable`](@ref) object has a missing value. @@ -1247,6 +1247,14 @@ the interface consists of four possible interactions: will be thrown if the object's value is missing. * Access the value of a [`Nullable`](@ref) object with a guarantee that a default value of type `T` will be returned if the object's value is missing. + * Perform an operation on the value (if it exists) of a [`Nullable`](@ref) + object, getting a [`Nullable`](@ref) result. The result will be missing + if the original value was missing. + * Performing a test on the value (if it exists) of a [`Nullable`](@ref) + object, getting a result that is missing if either the [`Nullable`](@ref) + itself was missing, or the test failed. + * Perform general operations on single or multiple [`Nullable`](@ref) + objects, propagating the missing data. ### Constructing [`Nullable`](@ref) objects @@ -1328,3 +1336,105 @@ julia> get(Nullable(1.0), 0.0) object match to avoid type instability, which could hurt performance. Use [`convert()`](@ref) manually if needed. +### Performing operations on [`Nullable`](@ref) objects + +[`Nullable`](@ref) objects represent values that are possibly missing, and it +is possible to write all code using these objects by first testing to see if +the value is missing with [`isnull()`](@ref), and then doing an appropriate +action. However, there are some common use cases where the code could be more +concise or clear by using a higher-order function. + +The [`map`](@ref) function takes as arguments a function `f` and a +[`Nullable`](@ref) value `x`. It produces a [`Nullable`](@ref): + + - If `x` is a missing value, then it produces a missing value; + - If `x` has a value, then it produces a [`Nullable`](@ref) containing + `f(get(x))` as value. + +This is useful for performing simple operations on values that might be missing +if the desired behaviour is to simply propagate the missing values forward. + +The [`filter`](@ref) function takes as arguments a predicate function `p` +(that is, a function returning a boolean) and a [`Nullable`](@ref) value `x`. +It produces a [`Nullable`](@ref) value: + + - If `x` is a missing value, then it produces a missing value; + - If `p(get(x))` is true, then it produces the original value `x`; + - If `p(get(x))` is false, then it produces a missing value. + +In this way, [`filter`](@ref) can be thought of as selecting only allowable +values, and converting non-allowable values to missing values. + +While [`map`](@ref) and [`filter`](@ref) are useful in specific cases, by far +the most useful higher-order function is [`broadcast`](@ref), which can handle +a wide variety of cases. + +[`broadcast`](@ref) can be thought of as a way to make existing operations work +on multiple data simultaneously and propagate nulls. An example will motivate +the need for [`broadcast`](@ref). Suppose we have a function that computes the +greater of two real roots of a quadratic equation, using the quadratic formula: + +```julia +""" +Compute the positive real root of ``ax^2 + bx + c = 0``. +""" +root(a::Real, b::Real, c::Real) = (-b + √(b^2 - 4a*c)) / 2a +``` + +We may verify that the result of `root(1, -9, 20)` is `5.0`, as we expect, +since `5.0` is the greater of two real roots of the quadratic equation. + +Suppose now that we want to find the greatest real root of a quadratic +equations where the coefficients might be missing values. Having missing values +in datasets is a common occurrence in real-world data, and so it is important +to be able to deal with them. But we cannot find the roots of an equation if we +do not know all the coefficients. The best solution to this will depend on the +particular use case; perhaps we should throw an error. However, for this +example, we will assume that the best solution is to propagate the missing +values forward; that is, if any input is missing, we simply produce a missing +output. + +The [`broadcast()`](@ref) function makes this task easy; we can simply pass the +`root` function we wrote to `broadcast`: + +```julia +julia> broadcast(root, Nullable(1), Nullable(-9), Nullable(20)) +Nullable{Float64}(5.0) + +julia> broadcast(root, Nullable(1), Nullable{Int}(), Nullable{Int}()) +Nullable{Float64}() + +julia> broadcast(root, Nullable{Int}(), Nullable(-9), Nullable(20)) +Nullable{Float64}() +``` + +If one or more of the inputs is missing, then the output of +[`broadcast()`](@ref) will be missing. + +There exists special syntactic sugar for the [`broadcast()`](@ref) function +using a dot notation: + +```julia +julia> root.(Nullable(1), Nullable(-9), Nullable(20)) +Nullable{Float64}(5.0) +``` + +In particular, the regular arithmetic operators can be [`broadcast()`](@ref) +conveniently using `.`-prefixed operators: + +```julia +julia> Nullable(2) ./ Nullable(3) .+ Nullable(1.0) +Nullable{Float64}(1.66667) +``` + +[`broadcast()`](@ref) also allows one to work with multiple data at the same +time, without manually writing for loops. This enables performing the same +operation to arrays where the data is possibly missing; for example + +```julia +julia> [Nullable(2), Nullable(), Nullable(3)] .+ 3 +3-element Array{Nullable{Int64},1}: + 5 + #NULL + 6 +``` diff --git a/test/nullable.jl b/test/nullable.jl index da751f2972d28..7e82eec5a8d36 100644 --- a/test/nullable.jl +++ b/test/nullable.jl @@ -1,5 +1,16 @@ # This file is a part of Julia. License is MIT: http://julialang.org/license +# "is a null with type T", curried on 2nd argument +isnull_oftype(x::Nullable, T::Type) = eltype(x) == T && isnull(x) +isnull_oftype(T::Type) = x -> isnull_oftype(x, T) + +# return true if nullables (or arrays of nullables) have the same type, +# nullity, and value (if they are non-null) +istypeequal(x::Nullable, y::Nullable) = + typeof(x) == typeof(y) && isnull(filter(!, x .== y)) +istypeequal(x::AbstractArray, y::AbstractArray) = + length(x) == length(y) && all(xy -> istypeequal(xy...), zip(x, y)) + types = [ Bool, Float16, @@ -285,9 +296,10 @@ for T in types end # Operators -TestTypes = Union{Base.NullSafeTypes, BigInt, BigFloat, - Complex{Int}, Complex{Float64}, Complex{BigFloat}, - Rational{Int}, Rational{BigInt}}.types +TestTypes = [[T.parameters[1] for T in Base.NullSafeTypes.types]; + [BigInt, BigFloat, + Complex{Int}, Complex{Float64}, Complex{BigFloat}, + Rational{Int}, Rational{BigInt}]] for S in TestTypes, T in TestTypes u0 = zero(S) u1 = one(S) @@ -388,6 +400,171 @@ end @test Base.promote_op(-, Nullable{Float64}, Nullable{Int}) == Nullable{Float64} @test Base.promote_op(-, Nullable{DateTime}, Nullable{DateTime}) == Nullable{Base.Dates.Millisecond} +# tests for istypeequal (which uses filter, broadcast) +@test istypeequal(Nullable(0), Nullable(0)) +@test !istypeequal(Nullable(0), Nullable(0.0)) +@test !istypeequal(Nullable(0), Nullable(1)) +@test !istypeequal(Nullable(0), Nullable(1.0)) +@test istypeequal([Nullable(0), Nullable(1)], [Nullable(0), Nullable(1)]) +@test istypeequal([Nullable(0), Nullable(1)], Any[Nullable(0), Nullable(1)]) +@test !istypeequal([Nullable(0), Nullable(1)], Any[Nullable(0.0), Nullable(1)]) +@test !istypeequal([Nullable(0), Nullable(1)], [Nullable(0), Nullable(2)]) +@test !istypeequal([Nullable(0), Nullable(1)], + [Nullable(0), Nullable(1), Nullable(2)]) + +# filter +for p in (_ -> true, _ -> false) + @test @inferred(filter(p, Nullable())) |> isnull_oftype(Union{}) + @test @inferred(filter(p, Nullable{Int}())) |> isnull_oftype(Int) +end +@test @inferred(filter(_ -> true, Nullable(85))) === Nullable(85) +@test @inferred(filter(_ -> false, Nullable(85))) |> isnull_oftype(Int) +@test @inferred(filter(x -> x > 0, Nullable(85))) === Nullable(85) +@test @inferred(filter(x -> x < 0, Nullable(85))) |> isnull_oftype(Int) +@test get(@inferred(filter(x -> length(x) > 2, Nullable("test")))) == "test" +@test @inferred(filter(x -> length(x) > 5, Nullable("test"))) |> + isnull_oftype(String) + +# map +sqr(x) = x^2 +@test @inferred(map(sqr, Nullable())) |> isnull_oftype(Union{}) +@test @inferred(map(sqr, Nullable{Int}())) |> isnull_oftype(Int) +@test @inferred(map(sqr, Nullable(2))) === Nullable(4) +@test @inferred(map(+, Nullable(0.0))) === Nullable(0.0) +@test @inferred(map(+, Nullable(3.0, false)))=== Nullable(3.0, false) +@test @inferred(map(-, Nullable(1.0))) === Nullable(-1.0) +@test @inferred(map(-, Nullable{Float64}())) |> isnull_oftype(Float64) +@test @inferred(map(sin, Nullable(1))) === Nullable(sin(1)) +@test @inferred(map(sin, Nullable{Int}())) |> isnull_oftype(Float64) + +# should not throw if function wouldn't be called +@test map(x -> x ? 0 : 0.0, Nullable()) |> isnull_oftype(Union{}) +@test map(x -> x ? 0 : 0.0, Nullable(true)) === Nullable(0) +@test map(x -> x ? 0 : 0.0, Nullable(false)) === Nullable(0.0) +@test map(x -> x ? 0 : 0.0, Nullable{Bool}()) |> isnull_oftype(Union{}) + +# broadcast and elementwise +@test sin.(Nullable(0.0)) === Nullable(0.0) +@test sin.(Nullable{Float64}()) |> isnull_oftype(Float64) +@test @inferred(broadcast(sin, Nullable(0.0))) === Nullable(0.0) +@test @inferred(broadcast(sin, Nullable{Float64}())) |> isnull_oftype(Float64) + +@test Nullable(8) .+ Nullable(10) === Nullable(18) +@test Nullable(8) .- Nullable(10) === Nullable(-2) +@test Nullable(8) .+ Nullable{Int}() |> isnull_oftype(Int) +@test Nullable{Int}() .- Nullable(10) |> isnull_oftype(Int) + +@test @inferred(broadcast(log, 10, Nullable(1.0))) === + Nullable(0.0) +@test @inferred(broadcast(log, 10, Nullable{Float64}())) |> + isnull_oftype(Float64) +@test @inferred(broadcast(log, Nullable(10), Nullable(1.0))) === + Nullable(0.0) +@test @inferred(broadcast(log, Nullable(10), Nullable{Float64}())) |> + isnull_oftype(Float64) + +@test Nullable(2) .^ Nullable(4) === Nullable(16) +@test Nullable(2) .^ Nullable{Int}() |> isnull_oftype(Int) + +# multi-arg broadcast +@test Nullable(1) .+ Nullable(1) .+ Nullable(1) .+ Nullable(1) .+ Nullable(1) .+ + Nullable(1) === Nullable(6) +@test Nullable(1) .+ Nullable(1) .+ Nullable(1) .+ Nullable{Int}() .+ + Nullable(1) .+ Nullable(1) |> isnull_oftype(Int) + +# these are not inferrable because there are too many arguments +us = map(Nullable, 1:20) +@test broadcast(max, us...) === Nullable(20) +@test isnull(broadcast(max, us..., Nullable{Int}())) + +# test all elementwise operations +# note that elementwise operations are the same as broadcast +for op in (+, -, *, /, \, //, ==, <, !=, <=, ÷, %, <<, >>, ^) + # op(1, 1) chosen because it works for all operations + res = op(1, 1) + @test @inferred(broadcast(op, Nullable(1), Nullable(1))) === + Nullable(res) + @test @inferred(broadcast(op, Nullable{Int}(), Nullable(1))) |> + isnull_oftype(typeof(res)) + @test @inferred(broadcast(op, Nullable(1), Nullable{Int}())) |> + isnull_oftype(typeof(res)) + @test @inferred(broadcast(op, Nullable{Int}(), Nullable{Int}())) |> + isnull_oftype(typeof(res)) + @test @inferred(broadcast(op, Nullable(1), 1)) === + Nullable(res) + @test @inferred(broadcast(op, 1, Nullable(1))) === + Nullable(res) +end + +# test reasonable results for Union{} +# the exact types of these is finnicky and depends on implementation details +# but is guaranteed to be at worst concrete and possibly Union{} on a good day +@test isnull(@inferred(Nullable() .+ Nullable())) +@test isnull(@inferred(Nullable() .+ 1)) +@test isnull(@inferred(Nullable() .+ Nullable(1))) + +# test that things don't pessimize because of non-homogenous types +@test Nullable(10.5) === + @inferred(broadcast(+, 1, 2, Nullable(3), Nullable(4.0), Nullable(1//2))) + +# broadcasting for arrays +@test istypeequal(@inferred(broadcast(+, [1, 2, 3], Nullable{Int}(1))), + Nullable{Int}[2, 3, 4]) +@test istypeequal(@inferred(broadcast(+, Nullable{Int}[1, 2, 3], 1)), + Nullable{Int}[2, 3, 4]) +@test istypeequal(@inferred(broadcast(+, Nullable{Int}[1, 2, 3], Nullable(1))), + Nullable{Int}[2, 3, 4]) +@test istypeequal(@inferred(broadcast(+, Nullable{Int}[1, Nullable()], Nullable(1))), + Nullable{Int}[2, Nullable()]) +@test istypeequal(@inferred(broadcast(+, Nullable{Int}[Nullable(), 1], + Nullable{Int}())), + Nullable{Int}[Nullable(), Nullable()]) +@test istypeequal(@inferred(broadcast(+, Nullable{Int}[Nullable(), 1], + Nullable{Int}[1, Nullable()])), + Nullable{Int}[Nullable(), Nullable()]) +@test istypeequal(@inferred(broadcast(+, Nullable{Int}[Nullable(), 1], + Nullable{Int}[Nullable(), 1])), + Nullable{Int}[Nullable(), 2]) +@test istypeequal(@inferred(broadcast(+, Nullable{Int}[Nullable(), Nullable()], + Nullable{Int}[1, 2])), + Nullable{Int}[Nullable(), Nullable()]) +@test istypeequal(@inferred(broadcast(+, Nullable{Int}[Nullable(), 1], + Nullable{Int}[1])), + Nullable{Int}[Nullable(), 2]) +@test istypeequal(@inferred(broadcast(+, Nullable{Float64}[1.0, 2.0], + Nullable{Float64}[1.0 2.0; 3.0 4.0])), + Nullable{Float64}[2.0 3.0; 5.0 6.0]) +@test istypeequal(@inferred(broadcast(+, Nullable{Int}[1, 2], [1, 2], 1)), + Nullable{Int}[3, 5]) + +@test istypeequal(@inferred(broadcast(/, 1, Nullable{Int}[1, 2, 4])), + Nullable{Float64}[1.0, 0.5, 0.25]) +@test istypeequal(@inferred(broadcast(muladd, Nullable(2), 42, + [Nullable(1337), Nullable{Int}()])), + Nullable{Int}[1421, Nullable()]) + +# heterogenous types (not inferrable) +@test istypeequal(broadcast(+, Any[1, 1.0], Nullable(1//2)), + Any[Nullable(3//2), Nullable(1.5)]) +@test istypeequal(broadcast(+, Any[Nullable(1) Nullable(1.0)], Nullable(big"1")), + Any[Nullable(big"2") Nullable(big"2.0")]) + +# test fast path taken +for op in (+, *, -) + for b1 in (false, true) + for b2 in (false, true) + @test Nullable{Int}(op(1, 2), b1 & b2) === + @inferred(broadcast(op, Nullable{Int}(1, b1), + Nullable{Int}(2, b2))) + end + A = [1, 2, 3] + res = @inferred(broadcast(op, A, Nullable{Int}(1, b1))) + @test res[1] === Nullable{Int}(op(1, 1), b1) + @test res[2] === Nullable{Int}(op(2, 1), b1) + @test res[3] === Nullable{Int}(op(3, 1), b1) + end +end + # issue #11675 @test repr(Nullable()) == "Nullable{Union{}}()"