From baf7ec5eecd218be4d5fec447ee6c6f6278bb0fc Mon Sep 17 00:00:00 2001 From: tan Date: Sat, 20 Dec 2014 16:05:00 +0530 Subject: [PATCH] maybefloat & maybeint: parse string to Nullable Introduces following methods that parse a string as the indicated type and return a `Nullable` with the result instead of throwing exception: - `maybeint{T<:Integer}(::Type{T<:Integer},s::AbstractString)` - `maybefloat32(s::AbstractString)` and `maybefloat64(s::AbstractString)` Ref: discussions at #9316, #3631, #5704 --- base/exports.jl | 3 + base/gmp.jl | 17 +++++- base/string.jl | 76 +++++++++++++++++++++++- src/builtins.c | 155 +++++++++++++++++++++++++++++++++++++----------- src/ccall.cpp | 2 +- src/julia.h | 11 ++++ test/strings.jl | 22 +++++++ 7 files changed, 245 insertions(+), 41 deletions(-) diff --git a/base/exports.jl b/base/exports.jl index 5fdf5b4830b4be..0f2f337c848cd6 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -360,6 +360,9 @@ export float16, float32, float64, + maybefloat32, + maybefloat64, + maybeint, floor, frexp, gamma, diff --git a/base/gmp.jl b/base/gmp.jl index c3acf79fa5792b..5b6bd018070712 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -74,17 +74,28 @@ widen(::Type{BigInt}) = BigInt BigInt(x::BigInt) = x BigInt(s::AbstractString) = parseint(BigInt,s) -function Base.parseint_nocheck(::Type{BigInt}, s::AbstractString, base::Int) +function parse_bigint(s::AbstractString, base::Int, nothrow::Bool) + _n = Nullable{BigInt}() s = bytestring(s) sgn, base, i = Base.parseint_preamble(true,s,base) + if i == 0 + nothrow && return _n + error("premature end of integer: $(repr(s))") + end z = BigInt() err = ccall((:__gmpz_set_str, :libgmp), Int32, (Ptr{BigInt}, Ptr{UInt8}, Int32), &z, convert(Ptr{UInt8},SubString(s,i)), base) - err == 0 || error("invalid big integer: $(repr(s))") - return sgn < 0 ? -z : z + if err != 0 + nothrow && return _n + error("invalid big integer: $(repr(s))") + end + Nullable(sgn < 0 ? -z : z) end +Base.maybeint_internal(::Type{BigInt}, s::AbstractString, base::Int) = parse_bigint(s, base, true) +Base.parseint_nocheck(::Type{BigInt}, s::AbstractString, base::Int) = get(parse_bigint(s, base, false)) + function BigInt(x::Union(Clong,Int32)) z = BigInt() ccall((:__gmpz_set_si, :libgmp), Void, (Ptr{BigInt}, Clong), &z, x) diff --git a/base/string.jl b/base/string.jl index 80883e6fb53266..c96dda8c4b56db 100644 --- a/base/string.jl +++ b/base/string.jl @@ -1493,7 +1493,7 @@ parseint{T<:Integer}(::Type{T}, c::Char, base::Integer) = convert(T,parseint(c,b parseint{T<:Integer}(::Type{T}, c::Char) = convert(T,parseint(c)) function parseint_next(s::AbstractString, i::Int=start(s)) - done(s,i) && error("premature end of integer: $(repr(s))") + done(s,i) && (return Char(0), 0, 0) j = i c, i = next(s,i) c, i, j @@ -1501,9 +1501,12 @@ end function parseint_preamble(signed::Bool, s::AbstractString, base::Int) c, i, j = parseint_next(s) + while isspace(c) c, i, j = parseint_next(s,i) end + (j == 0) && (return 0, 0, 0) + sgn = 1 if signed if c == '-' || c == '+' @@ -1511,9 +1514,12 @@ function parseint_preamble(signed::Bool, s::AbstractString, base::Int) c, i, j = parseint_next(s,i) end end + while isspace(c) c, i, j = parseint_next(s,i) end + (j == 0) && (return 0, 0, 0) + if base == 0 if c == '0' && !done(s,i) c, i = next(s,i) @@ -1528,9 +1534,71 @@ function parseint_preamble(signed::Bool, s::AbstractString, base::Int) return sgn, base, j end +safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2) +safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) : + (n2 < -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) : + ((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2) + +#safe_sub{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 < (typemin(T) + n2)) : (n1 > (typemax(T) + n2))) ? Nullable{T}() : Nullable{T}(n1 - n2) +#safe_div{T<:Integer}(n1::T, n2::T) = ((n1 == typemin(T)) && (n2 == T(-1))) ? Nullable{T}() : Nullable{T}(div(n1, n2)) +#safe_abs{T<:Integer}(n::T) = (n == typemin(T)) ? Nullable{T}() : abs(n) + +function maybeint_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int) + _n = Nullable{T}() + sgn, base, i = parseint_preamble(T<:Signed,s,base) + (i == 0) && return _n + c, i = parseint_next(s,i) + (i == 0) && return _n + + base = convert(T,base) + m::T = div(typemax(T)-base+1,base) + n::T = 0 + while n <= m + d::T = '0' <= c <= '9' ? c-'0' : + 'A' <= c <= 'Z' ? c-'A'+10 : + 'a' <= c <= 'z' ? c-'a'+a : base + d < base || return _n + n *= base + n += d + if done(s,i) + n *= sgn + return Nullable{T}(n) + end + c, i = next(s,i) + isspace(c) && break + end + (T <: Signed) && (n *= sgn) + while !isspace(c) + d::T = '0' <= c <= '9' ? c-'0' : + 'A' <= c <= 'Z' ? c-'A'+10 : + 'a' <= c <= 'z' ? c-'a'+a : base + d < base || return _n + (T <: Signed) && (d *= sgn) + + safe_n = safe_mul(n, base) + isnull(safe_n) || (safe_n = safe_add(get(safe_n), d)) + isnull(safe_n) && return Nullable{T}() + n = get(safe_n) + done(s,i) && return Nullable{T}(n) + c, i = next(s,i) + end + while !done(s,i) + c, i = next(s,i) + isspace(c) || return _n + end + return Nullable{T}(n) +end +maybeint_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int) = + maybeint_internal(T, s, base, base <= 36 ? 10 : 36) +maybeint{T<:Integer}(::Type{T}, s::AbstractString) = maybeint_internal(T,s,0) + function parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int) sgn, base, i = parseint_preamble(T<:Signed,s,base) + (i == 0) && error("premature end of integer: $(repr(s))") + c, i = parseint_next(s,i) + (i == 0) && error("premature end of integer: $(repr(s))") + base = convert(T,base) ## FIXME: remove 128-bit specific code once 128-bit div doesn't rely on BigInt m::T = T===UInt128 || T===Int128 ? typemax(T) : div(typemax(T)-base+1,base) @@ -1626,6 +1694,12 @@ begin end end +maybefloat64(s::AbstractString) = ccall(:jl_maybe_strtod, Nullable{Float64}, (Ptr{UInt8},), s) +maybefloat64(s::SubString) = ccall(:jl_maybe_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof) + +maybefloat32(s::AbstractString) = ccall(:jl_maybe_strtof, Nullable{Float32}, (Ptr{UInt8},), s) +maybefloat32(s::SubString) = ccall(:jl_maybe_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof) + float(x::AbstractString) = float64(x) parsefloat(x::AbstractString) = float64(x) parsefloat(::Type{Float64}, x::AbstractString) = float64(x) diff --git a/src/builtins.c b/src/builtins.c index 7ce82990c90dfd..5865c9efa30175 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -644,13 +644,14 @@ DLLEXPORT void jl_print_int64(JL_STREAM *s, int64_t i) JL_PRINTF(s, "%lld", i); } -DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out) +DLLEXPORT jl_nullable_float64_t jl_maybe_substrtod(char *str, size_t offset, int len) { char *p; - errno = 0; char *bstr = str+offset; char *pend = bstr+len; int err = 0; + + errno = 0; if (!(*pend == '\0' || isspace((unsigned char)*pend) || *pend == ',')) { // confusing data outside substring. must copy. char *newstr = (char*)malloc(len+1); @@ -659,29 +660,71 @@ DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out) bstr = newstr; pend = bstr+len; } - *out = strtod_c(bstr, &p); - if ((p == bstr) || (p != pend) || - (errno==ERANGE && (*out==0 || *out==HUGE_VAL || *out==-HUGE_VAL))) - err = 1; + double out = strtod_c(bstr, &p); + err = errno; + if (bstr != str+offset) free(bstr); - return err; + + if (err==ERANGE && (out==0 || out==HUGE_VAL || out==-HUGE_VAL)) { + errno = ERANGE; + err = 1; + } + else if ((p == bstr) || (p != pend)) { + err = 3; + } + else { + err = 0; + } + + return (jl_nullable_float64_t){(uint8_t)err, out}; } -DLLEXPORT int jl_strtod(char *str, double *out) +DLLEXPORT jl_nullable_float64_t jl_maybe_strtod(char *str) { char *p; + int err = 0; + errno = 0; - *out = strtod_c(str, &p); - if (p == str || - (errno==ERANGE && (*out==0 || *out==HUGE_VAL || *out==-HUGE_VAL))) - return 1; - while (*p != '\0') { - if (!isspace((unsigned char)*p)) - return 1; - p++; + double out = strtod_c(str, &p); + + if (errno==ERANGE && (out==0 || out==HUGE_VAL || out==-HUGE_VAL)) { + err = 1; + } + else if (p == str) { + err = 3; + } + else { + while (*p != '\0') { + if (!isspace((unsigned char)*p)) { + err = 3; + break; + } + p++; + } } - return 0; + + return (jl_nullable_float64_t){(uint8_t)err, out}; +} + +DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out) +{ + jl_nullable_float64_t nd = jl_maybe_substrtod(str, offset, len); + if(0 == nd.isnull) { + *out = nd.value; + return 0; + } + return 1; +} + +DLLEXPORT int jl_strtod(char *str, double *out) +{ + jl_nullable_float64_t nd = jl_maybe_strtod(str); + if(0 == nd.isnull) { + *out = nd.value; + return 0; + } + return 1; } // MSVC pre-2013 did not define HUGE_VALF @@ -689,13 +732,14 @@ DLLEXPORT int jl_strtod(char *str, double *out) #define HUGE_VALF (1e25f * 1e25f) #endif -DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out) +DLLEXPORT jl_nullable_float32_t jl_maybe_substrtof(char *str, size_t offset, int len) { char *p; - errno = 0; char *bstr = str+offset; char *pend = bstr+len; int err = 0; + + errno = 0; if (!(*pend == '\0' || isspace((unsigned char)*pend) || *pend == ',')) { // confusing data outside substring. must copy. char *newstr = (char*)malloc(len+1); @@ -705,37 +749,76 @@ DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out) pend = bstr+len; } #if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_) - *out = (float)strtod_c(bstr, &p); + float out = (float)strtod_c(bstr, &p); #else - *out = strtof_c(bstr, &p); + float out = strtof_c(bstr, &p); #endif + err = errno; - if ((p == bstr) || (p != pend) || - (errno==ERANGE && (*out==0 || *out==HUGE_VALF || *out==-HUGE_VALF))) - err = 1; if (bstr != str+offset) free(bstr); - return err; + + if (err==ERANGE && (out==0 || out==HUGE_VALF || out==-HUGE_VALF)) { + errno = ERANGE; + err = 1; + } + else if ((p == bstr) || (p != pend)) { + err = 3; + } + else { + err = 0; + } + + return (jl_nullable_float32_t){(uint8_t)err, out}; } -DLLEXPORT int jl_strtof(char *str, float *out) +DLLEXPORT jl_nullable_float32_t jl_maybe_strtof(char *str) { char *p; + int err = 0; + errno = 0; #if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_) - *out = (float)strtod_c(str, &p); + float out = (float)strtod_c(str, &p); #else - *out = strtof_c(str, &p); + float out = strtof_c(str, &p); #endif - if (p == str || - (errno==ERANGE && (*out==0 || *out==HUGE_VALF || *out==-HUGE_VALF))) - return 1; - while (*p != '\0') { - if (!isspace((unsigned char)*p)) - return 1; - p++; + if (errno==ERANGE && (out==0 || out==HUGE_VALF || out==-HUGE_VALF)) { + err = 1; + } + else if (p == str) { + err = 3; + } + else { + while (*p != '\0') { + if (!isspace((unsigned char)*p)) { + err = 3; + } + p++; + } } - return 0; + + return (jl_nullable_float32_t){(uint8_t)err, out}; +} + +DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out) +{ + jl_nullable_float32_t nf = jl_maybe_substrtof(str, offset, len); + if(0 == nf.isnull) { + *out = nf.value; + return 0; + } + return 1; +} + +DLLEXPORT int jl_strtof(char *str, float *out) +{ + jl_nullable_float32_t nf = jl_maybe_strtof(str); + if(0 == nf.isnull) { + *out = nf.value; + return 0; + } + return 1; } // showing -------------------------------------------------------------------- diff --git a/src/ccall.cpp b/src/ccall.cpp index 867c13d99b6bfa..655c3e52bb7255 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -1257,7 +1257,7 @@ static Value *emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx) JL_GC_POP(); if (!sret && lrt == T_void) return literal_pointer_val((jl_value_t*)jl_nothing); - if (lrt->isStructTy()) { + if (lrt->isStructTy() && !jl_isbits(rt)) { //fprintf(stderr, "ccall rt: %s -> %s\n", f_name, ((jl_tag_type_t*)rt)->name->name->name); assert(jl_is_structtype(rt)); Value *strct = diff --git a/src/julia.h b/src/julia.h index 52f2f96f11abf6..3e0bc0464aa6d5 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1360,6 +1360,17 @@ DLLEXPORT extern const int jl_ver_patch(void); DLLEXPORT extern const int jl_ver_is_release(void); DLLEXPORT extern const char* jl_ver_string(void); +// nullable struct representations +typedef struct { + uint8_t isnull; + double value; +} jl_nullable_float64_t; + +typedef struct { + uint8_t isnull; + float value; +} jl_nullable_float32_t; + #ifdef __cplusplus } #endif diff --git a/test/strings.jl b/test/strings.jl index c33b1e16420035..acc91cdb148045 100644 --- a/test/strings.jl +++ b/test/strings.jl @@ -1294,3 +1294,25 @@ for T in (ASCIIString, UTF8String, UTF16String, UTF32String) end end end + +for T in [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128] + for i in [typemax(T), typemin(T)] + s = "$i" + @test get(maybeint(T, s)) == i + end +end + +for T in [Int8, Int16, Int32, Int64, Int128] + for i in [typemax(T), typemin(T)] + f = "$(i)0" + @test isnull(maybeint(T, f)) + end +end + +@test get(maybeint(BigInt, "1234567890")) == BigInt(1234567890) +@test isnull(maybeint(BigInt, "1234567890-")) + +@test get(maybefloat64("64")) == 64.0 +@test isnull(maybefloat64("64o")) +@test get(maybefloat32("32")) == 32.0f0 +@test isnull(maybefloat32("32o"))