From 22b464a3e7d77201c2257c604f452960a06acd2e Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Tue, 6 Jan 2015 14:45:52 -0500 Subject: [PATCH 1/7] Introduce fast (non-IEEE) versions of floating point intrinsics --- base/inference.jl | 1 + src/init.c | 3 ++- src/intrinsics.cpp | 48 +++++++++++++++++++++++++++++++++++++++------- src/julia.h | 7 +++++++ 4 files changed, 51 insertions(+), 8 deletions(-) diff --git a/base/inference.jl b/base/inference.jl index 1dfc6d3c3b0c7..fcb17692191d5 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -50,6 +50,7 @@ immutable JLCompilerOpts opt_level::Int8 depwarn::Int8 can_inline::Int8 + fast_math::Int8 end compileropts() = unsafe_load(cglobal(:jl_compileropts, JLCompilerOpts)) diff --git a/src/init.c b/src/init.c index ac6071a8e0a6d..f4bfe96d0b93a 100644 --- a/src/init.c +++ b/src/init.c @@ -97,7 +97,8 @@ jl_compileropts_t jl_compileropts = { NULL, // julia_home JL_COMPILEROPT_COMPILE_DEFAULT, 0, // opt_level 1, // depwarn - 1 // can_inline + 1, // can_inline + JL_COMPILEROPT_FAST_MATH_DEFAULT }; int jl_boot_file_loaded = 0; diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index e2ffb45f68949..6ba00608a84b4 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -6,6 +6,9 @@ namespace JL_I { neg_int, add_int, sub_int, mul_int, sdiv_int, udiv_int, srem_int, urem_int, smod_int, neg_float, add_float, sub_float, mul_float, div_float, rem_float, + // fast arithmetic + neg_float_fast, add_float_fast, sub_float_fast, + mul_float_fast, div_float_fast, rem_float_fast, // same-type comparisons eq_int, ne_int, slt_int, ult_int, @@ -718,6 +721,26 @@ static Value *emit_srem(Value *x, Value *den, jl_codectx_t *ctx) return ret; } +// Temporarily switch the builder to fast-math mode if requested +struct math_builder { + FastMathFlags old_fmf; + math_builder(jl_codectx_t *ctx, bool always_fast = false): + old_fmf(builder.getFastMathFlags()) + { + if (jl_compileropts.fast_math != JL_COMPILEROPT_FAST_MATH_OFF && + (always_fast || + jl_compileropts.fast_math == JL_COMPILEROPT_FAST_MATH_ON)) { + FastMathFlags fmf; + fmf.setUnsafeAlgebra(); + builder.SetFastMathFlags(fmf); + } + } + IRBuilder<>& operator()() const { return builder; } + ~math_builder() { + builder.SetFastMathFlags(old_fmf); + } +}; + static Value *emit_smod(Value *x, Value *den, jl_codectx_t *ctx) { Type *t = den->getType(); @@ -926,15 +949,24 @@ static Value *emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs, // that do the correct thing on LLVM <= 3.3 and >= 3.5 respectively. // See issue #7868 #ifdef LLVM35 - HANDLE(neg_float,1) return builder.CreateFSub(ConstantFP::get(FT(t), -0.0), FP(x)); + HANDLE(neg_float,1) return math_builder(ctx)().CreateFSub(ConstantFP::get(FT(t), -0.0), FP(x)); + HANDLE(neg_float_fast,1) return math_builder(ctx, true)().CreateFNeg(FP(x)); #else - HANDLE(neg_float,1) return builder.CreateFMul(ConstantFP::get(FT(t), -1.0), FP(x)); + HANDLE(neg_float,1) + return math_builder(ctx)().CreateFMul(ConstantFP::get(FT(t), -1.0), FP(x)); + HANDLE(neg_float_fast,1) + return math_builder(ctx, true)().CreateFMul(ConstantFP::get(FT(t), -1.0), FP(x)); #endif - HANDLE(add_float,2) return builder.CreateFAdd(FP(x), FP(y)); - HANDLE(sub_float,2) return builder.CreateFSub(FP(x), FP(y)); - HANDLE(mul_float,2) return builder.CreateFMul(FP(x), FP(y)); - HANDLE(div_float,2) return builder.CreateFDiv(FP(x), FP(y)); - HANDLE(rem_float,2) return builder.CreateFRem(FP(x), FP(y)); + HANDLE(add_float,2) return math_builder(ctx)().CreateFAdd(FP(x), FP(y)); + HANDLE(sub_float,2) return math_builder(ctx)().CreateFSub(FP(x), FP(y)); + HANDLE(mul_float,2) return math_builder(ctx)().CreateFMul(FP(x), FP(y)); + HANDLE(div_float,2) return math_builder(ctx)().CreateFDiv(FP(x), FP(y)); + HANDLE(rem_float,2) return math_builder(ctx)().CreateFRem(FP(x), FP(y)); + HANDLE(add_float_fast,2) return math_builder(ctx, true)().CreateFAdd(FP(x), FP(y)); + HANDLE(sub_float_fast,2) return math_builder(ctx, true)().CreateFSub(FP(x), FP(y)); + HANDLE(mul_float_fast,2) return math_builder(ctx, true)().CreateFMul(FP(x), FP(y)); + HANDLE(div_float_fast,2) return math_builder(ctx, true)().CreateFDiv(FP(x), FP(y)); + HANDLE(rem_float_fast,2) return math_builder(ctx, true)().CreateFRem(FP(x), FP(y)); HANDLE(checked_sadd,2) HANDLE(checked_uadd,2) @@ -1262,6 +1294,8 @@ extern "C" void jl_init_intrinsic_functions(void) ADD_I(smod_int); ADD_I(neg_float); ADD_I(add_float); ADD_I(sub_float); ADD_I(mul_float); ADD_I(div_float); ADD_I(rem_float); + ADD_I(neg_float_fast); ADD_I(add_float_fast); ADD_I(sub_float_fast); + ADD_I(mul_float_fast); ADD_I(div_float_fast); ADD_I(rem_float_fast); ADD_I(eq_int); ADD_I(ne_int); ADD_I(slt_int); ADD_I(ult_int); ADD_I(sle_int); ADD_I(ule_int); diff --git a/src/julia.h b/src/julia.h index 578cdb3c68b48..4348474d56cc9 100644 --- a/src/julia.h +++ b/src/julia.h @@ -411,6 +411,7 @@ extern jl_sym_t *abstracttype_sym; extern jl_sym_t *bitstype_sym; extern jl_sym_t *compositetype_sym; extern jl_sym_t *type_goto_sym; extern jl_sym_t *global_sym; extern jl_sym_t *tuple_sym; extern jl_sym_t *boundscheck_sym; extern jl_sym_t *copyast_sym; +extern jl_sym_t *fastmath_sym; extern jl_sym_t *simdloop_sym; extern jl_sym_t *meta_sym; extern jl_sym_t *arrow_sym; extern jl_sym_t *ldots_sym; @@ -1327,6 +1328,8 @@ void show_execution_point(char *filename, int lno); // compiler options ----------------------------------------------------------- +// Note: need to keep this in sync with its initialization in +// src/init.c, and with JLCompilerOpts in base/inference.jl typedef struct { const char *julia_home; const char *julia_bin; @@ -1342,6 +1345,7 @@ typedef struct { int8_t opt_level; int8_t depwarn; int8_t can_inline; + int8_t fast_math; } jl_compileropts_t; extern DLLEXPORT jl_compileropts_t jl_compileropts; @@ -1354,6 +1358,9 @@ extern DLLEXPORT jl_compileropts_t jl_compileropts; #define JL_COMPILEROPT_CHECK_BOUNDS_DEFAULT 0 #define JL_COMPILEROPT_CHECK_BOUNDS_ON 1 #define JL_COMPILEROPT_CHECK_BOUNDS_OFF 2 +#define JL_COMPILEROPT_FAST_MATH_DEFAULT 0 +#define JL_COMPILEROPT_FAST_MATH_ON 1 +#define JL_COMPILEROPT_FAST_MATH_OFF 2 #define JL_COMPILEROPT_COMPILE_DEFAULT 1 #define JL_COMPILEROPT_COMPILE_OFF 0 #define JL_COMPILEROPT_COMPILE_ON 1 From 5417ba41ffb7efb1084b147d974876ddcecadeb6 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Tue, 6 Jan 2015 14:46:40 -0500 Subject: [PATCH 2/7] Add `@fastmath` macro that converts expressions to use fast math --- base/base.jl | 18 ++++++++++++++++++ base/exports.jl | 10 ++++++++++ base/float.jl | 42 ++++++++++++++++++++++++++++++++++++++++++ src/alloc.c | 1 + src/interpreter.c | 3 +++ src/jltypes.c | 1 + 6 files changed, 75 insertions(+) diff --git a/base/base.jl b/base/base.jl index daf59b0f9433a..92870891ea881 100644 --- a/base/base.jl +++ b/base/base.jl @@ -242,6 +242,24 @@ macro inbounds(blk) :(@boundscheck false $(esc(blk))) end +function make_fastmath(expr) + if isa(expr, Expr) + Expr(make_fastmath(expr.head), map(make_fastmath, expr.args)...) + elseif expr==:+; :add_fast + elseif expr==:-; :sub_fast + elseif expr==:*; :mul_fast + elseif expr==:/; :div_fast + elseif expr==:rem; :rem_fast + elseif expr==:mod; :mod_fast + elseif expr==:cmp; :cmp_fast + else expr + end +end + +macro fastmath(expr) + make_fastmath(esc(expr)) +end + macro label(name::Symbol) Expr(:symboliclabel, name) end diff --git a/base/exports.jl b/base/exports.jl index b14c9cfc2d7f7..e319785717794 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -464,6 +464,15 @@ export √, ∛, +# fast math + add_fast, + sub_fast, + mul_fast, + div_fast, + rem_fast, + mod_fast, + cmp_fast, + # specfun airy, airyai, @@ -1417,6 +1426,7 @@ export @deprecate, @boundscheck, @inbounds, + @fastmath, @simd, @label, @goto, diff --git a/base/float.jl b/base/float.jl index 6938ce799abef..2bfa86a713aa1 100644 --- a/base/float.jl +++ b/base/float.jl @@ -204,6 +204,35 @@ widen(::Type{Float32}) = Float64 rem(x::Float32, y::Float32) = box(Float32,rem_float(unbox(Float32,x),unbox(Float32,y))) rem(x::Float64, y::Float64) = box(Float64,rem_float(unbox(Float64,x),unbox(Float64,y))) +# fast versions that may violate strict IEEE semantics +# TODO: provide isnan_fast and friends +for (op_fast, op) in ((:add_fast, :+), (:sub_fast, :-), + (:mul_fast, :*), (:div_fast, :/), + (:rem_fast, :rem), (:mod_fast, :mod), + (:cmp_fast, :cmp)) + @eval begin + # fall-back implementation for non-numeric types + ($op_fast)(xs...) = ($op)(xs...) + # type promotion + ($op_fast)(x::Number, y::Number, zs::Number...) = + ($op_fast)(promote(x,y,zs...)...) + # fall-back implementation that applies after promotion + ($op_fast){T<:Number}(x::T,ys::T...) = ($op)(x,ys...) + end +end +for T in (Float32, Float64) + @eval begin + sub_fast(x::$T) = box($T,neg_float_fast(unbox($T,x))) + add_fast(x::$T, y::$T) = box($T,add_float_fast(unbox($T,x),unbox($T,y))) + sub_fast(x::$T, y::$T) = box($T,sub_float_fast(unbox($T,x),unbox($T,y))) + mul_fast(x::$T, y::$T) = box($T,mul_float_fast(unbox($T,x),unbox($T,y))) + div_fast(x::$T, y::$T) = box($T,div_float_fast(unbox($T,x),unbox($T,y))) + rem_fast(x::$T, y::$T) = box($T,rem_float_fast(unbox($T,x),unbox($T,y))) + add_fast(x::$T, y::$T, zs::$T...) = add_fast(add_fast(x, y), zs...) + mul_fast(x::$T, y::$T, zs::$T...) = mul_fast(mul_fast(x, y), zs...) + end +end + cld{T<:FloatingPoint}(x::T, y::T) = -fld(-x,y) function mod{T<:FloatingPoint}(x::T, y::T) @@ -217,6 +246,16 @@ function mod{T<:FloatingPoint}(x::T, y::T) end end +function mod_fast{T<:FloatingPoint}(x::T, y::T) + r = rem_fast(x,y) + if r == 0 + copysign(r,y) + elseif (r > 0) $ (y > 0) + r+y + else + r + end +end ## floating point comparisons ## ==(x::Float32, y::Float32) = eq_float(unbox(Float32,x),unbox(Float32,y)) @@ -248,6 +287,9 @@ function cmp(x::FloatingPoint, y::Real) ifelse(xy, 1, 0)) end +cmp_fast(x::Float32, y::Float32) = ifelse(xy, 1, 0)) +cmp_fast(x::Float64, y::Float64) = ifelse(xy, 1, 0)) + for Ti in (Int64,UInt64,Int128,UInt128) for Tf in (Float32,Float64) @eval begin diff --git a/src/alloc.c b/src/alloc.c index 9ea24664d5e9b..72abc7fa378be 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -93,6 +93,7 @@ jl_sym_t *compositetype_sym; jl_sym_t *type_goto_sym; jl_sym_t *global_sym; jl_sym_t *tuple_sym; jl_sym_t *dot_sym; jl_sym_t *newvar_sym; jl_sym_t *boundscheck_sym; jl_sym_t *copyast_sym; +jl_sym_t *fastmath_sym; jl_sym_t *simdloop_sym; jl_sym_t *meta_sym; jl_sym_t *arrow_sym; jl_sym_t *ldots_sym; diff --git a/src/interpreter.c b/src/interpreter.c index 307535251183f..37146f82ac53d 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -449,6 +449,9 @@ static jl_value_t *eval(jl_value_t *e, jl_value_t **locals, size_t nl) else if (ex->head == boundscheck_sym) { return (jl_value_t*)jl_nothing; } + else if (ex->head == fastmath_sym) { + return (jl_value_t*)jl_nothing; + } else if (ex->head == simdloop_sym) { return (jl_value_t*)jl_nothing; } diff --git a/src/jltypes.c b/src/jltypes.c index f58a723f7f162..f2825bad2eab8 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -3279,6 +3279,7 @@ void jl_init_types(void) kw_sym = jl_symbol("kw"); dot_sym = jl_symbol("."); boundscheck_sym = jl_symbol("boundscheck"); + fastmath_sym = jl_symbol("fastmath"); newvar_sym = jl_symbol("newvar"); copyast_sym = jl_symbol("copyast"); simdloop_sym = jl_symbol("simdloop"); From 7f23d5291fb308d7c6b6ea93da09f11553cf83a5 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Tue, 6 Jan 2015 14:46:54 -0500 Subject: [PATCH 3/7] Add test cases for fast math --- test/numbers.jl | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/test/numbers.jl b/test/numbers.jl index aab864fd9d2fd..411a155aaddd0 100644 --- a/test/numbers.jl +++ b/test/numbers.jl @@ -59,6 +59,36 @@ @test minmax(NaN, 3.) == (3., 3.) @test isequal(minmax(NaN, NaN), (NaN, NaN)) +# fast math +const one32 = one(Float32) +const eps32 = eps(Float32) +const eps32_2 = eps32/2 +# Note: Cannot use local functions since these are not yet optimized +fm_ieee_32(x) = x + eps32_2 + eps32_2 +fm_fast_32(x) = @fastmath x + eps32_2 + eps32_2 +@test fm_ieee_32(one32) == one32 +@test (fm_fast_32(one32) == one32 || + fm_fast_32(one32) == one32 + eps32 > one32) + +const one64 = one(Float64) +const eps64 = eps(Float64) +const eps64_2 = eps64/2 +# Note: Cannot use local functions since these are not yet optimized +fm_ieee_64(x) = x + eps64_2 + eps64_2 +fm_fast_64(x) = @fastmath x + eps64_2 + eps64_2 +@test fm_ieee_64(one64) == one64 +@test (fm_fast_64(one64) == one64 || + fm_fast_64(one64) == one64 + eps64 > one64) + +let epsf = 1.0f0/2^15, one_epsf = 1+epsf + @test isapprox((@fastmath one_epsf * one_epsf - 1), + float32(65537/1073741824)) +end +let eps = 1.0/2^30, one_eps = 1+eps + @test isapprox((@fastmath one_eps * one_eps - 1), + 2147483649/1152921504606846976) +end + # lexing typemin(Int64) @test (-9223372036854775808)^1 == -9223372036854775808 @test [1 -1 -9223372036854775808] == [1 -1 typemin(Int64)] From 9d462ed0526524ffed0a23d14ffcc6214ffc8f48 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Tue, 6 Jan 2015 14:47:19 -0500 Subject: [PATCH 4/7] Add command line option `--math-mode=ieee` to disable fast math --- ui/repl.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/ui/repl.c b/ui/repl.c index 2437350ac7aec..d6d303ad5f6ab 100644 --- a/ui/repl.c +++ b/ui/repl.c @@ -86,7 +86,8 @@ void parse_opts(int *argcp, char ***argvp) opt_dump_bitcode, opt_compile, opt_depwarn, - opt_inline + opt_inline, + opt_math_mode }; static const char shortopts[] = "+H:hJ:C:O"; static const struct option longopts[] = { @@ -105,6 +106,7 @@ void parse_opts(int *argcp, char ***argvp) { "compile", required_argument, 0, opt_compile }, { "depwarn", required_argument, 0, opt_depwarn }, { "inline", required_argument, 0, opt_inline }, + { "math-mode", required_argument, 0, opt_math_mode }, { 0, 0, 0, 0 } }; int c; @@ -218,6 +220,16 @@ void parse_opts(int *argcp, char ***argvp) exit(1); } break; + case opt_math_mode: + if (!strcmp(optarg, "ieee")) + jl_compileropts.fast_math = JL_COMPILEROPT_FAST_MATH_OFF; + else if (!strcmp(optarg, "user")) + jl_compileropts.fast_math = JL_COMPILEROPT_FAST_MATH_DEFAULT; + else { + ios_printf(ios_stderr, "julia: invalid argument to --math-mode (%s)\n", optarg); + exit(1); + } + break; default: ios_printf(ios_stderr, "julia: unhandled option -- %c\n", c); ios_printf(ios_stderr, "This is a bug, please report it.\n"); From 877603920235d85b55e332d49b408244c6254281 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Tue, 6 Jan 2015 14:47:29 -0500 Subject: [PATCH 5/7] Document fast math --- doc/man/julia.1 | 5 +++++ doc/manual/getting-started.rst | 2 ++ doc/manual/performance-tips.rst | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/doc/man/julia.1 b/doc/man/julia.1 index 62762e548d34a..d25a8bcbc3b2b 100644 --- a/doc/man/julia.1 +++ b/doc/man/julia.1 @@ -120,6 +120,11 @@ Enable or disable color text --check-bounds={yes|no} Emit bounds checks always or never (ignoring declarations) +.TP +--math-mode={ieee|user} +Always use IEEE semantics for math (ignoring declarations), +or adhere to declarations in source code + .TP --int-literals={32|64} Select integer literal size independent of platform diff --git a/doc/manual/getting-started.rst b/doc/manual/getting-started.rst index 533b3f5d7187a..656fd553724d7 100644 --- a/doc/manual/getting-started.rst +++ b/doc/manual/getting-started.rst @@ -130,6 +130,8 @@ those available for the ``perl`` and ``ruby`` programs:: --track-allocation={none|user|all} Count bytes allocated by each source line --check-bounds={yes|no} Emit bounds checks always or never (ignoring declarations) + --math-mode={ieee|user} Always use IEEE semantics for math (ignoring declarations), + or adhere to declarations in source code -O, --optimize Run time-intensive code optimizations --int-literals={32|64} Select integer literal size independent of platform --dump-bitcode={yes|no} Dump bitcode for the system image (used with --build) diff --git a/doc/manual/performance-tips.rst b/doc/manual/performance-tips.rst index e5b508f6428de..0fcaadb613d93 100644 --- a/doc/manual/performance-tips.rst +++ b/doc/manual/performance-tips.rst @@ -546,6 +546,10 @@ properties. - Use :obj:`@inbounds` to eliminate array bounds checking within expressions. Be certain before doing this. If the subscripts are ever out of bounds, you may suffer crashes or silent corruption. +- Use :obj:`@fastmath` to allow floating point optimizations that are + correct for real numbers, but lead to differences for IEEE numbers. + Be careful when doing this, as this may change numerical results. + This corresponds to the ``-ffast-math`` option of clang. - Write :obj:`@simd` in front of ``for`` loops that are amenable to vectorization. **This feature is experimental** and could change or disappear in future versions of Julia. From 0c4bab3179480d557d8d53c57a379d325ead4b43 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Tue, 6 Jan 2015 16:31:34 -0500 Subject: [PATCH 6/7] Add a non-trivial `@fastmath` example to the documentation --- doc/manual/performance-tips.rst | 91 ++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/doc/manual/performance-tips.rst b/doc/manual/performance-tips.rst index 0fcaadb613d93..7b251d1d6039b 100644 --- a/doc/manual/performance-tips.rst +++ b/doc/manual/performance-tips.rst @@ -554,7 +554,7 @@ properties. **This feature is experimental** and could change or disappear in future versions of Julia. -Here is an example with both forms of markup:: +Here is an example with both :obj:`@inbounds` and :obj:`@simd` markup:: function inner( x, y ) s = zero(eltype(x)) @@ -625,6 +625,95 @@ properties: LLVM auto-vectorization may kick in automatically, leading to no further speedup with :obj:`@simd`. +Here is an example with all three kinds of markup. This program first +calculates the finite difference of a one-dimensional array, and then +evaluates the L2-norm of the result:: + + function init!(u) + n = length(u) + dx = 1.0 / (n-1) + @fastmath @inbounds @simd for i in 1:n + u[i] = sin(2pi*dx*i) + end + end + + function deriv!(u, du) + n = length(u) + dx = 1.0 / (n-1) + @fastmath @inbounds du[1] = (u[2] - u[1]) / dx + @fastmath @inbounds @simd for i in 2:n-1 + du[i] = (u[i+1] - u[i-1]) / (2*dx) + end + @fastmath @inbounds du[n] = (u[n] - u[n-1]) / dx + end + + function norm(u) + n = length(u) + T = eltype(u) + s = zero(T) + @fastmath @inbounds @simd for i in 1:n + s += u[i]^2 + end + @fastmath @inbounds return sqrt(s/n) + end + + function main() + n = 2000 + u = Array(Float64, n) + init!(u) + du = similar(u) + + deriv!(u, du) + nu = norm(du) + + @time for i in 1:10^6 + deriv!(u, du) + nu = norm(du) + end + + println(nu) + end + + main() + +On a computer with a 2.7 GHz Intel Core i7 processor, this produces:: + + $ julia wave.jl + elapsed time: 1.207814709 seconds (0 bytes allocated) + 4.443986180758243 + + $ julia --math-mode=ieee wave.jl + elapsed time: 4.487083643 seconds (0 bytes allocated) + 4.443986180758243 + +Here, the option ``--math-mode=ieee`` disables the :opt:`@fastmath` +macro, so that we can compare results. + +In this case, the speedup due to :opt:`@fastmath` is a factor of about +3.7. This is unusually large -- in general, the speedup will be +smaller. (In this particular example, the working set of the benchmark +is small enough to fit into the L1 cache of the processor, so that +memory access latency does not play a role, and computing time is +dominated by CPU usage. In many real world programs this is not the +case.) Also, in this case this optimization does not change the result +-- in general, the result will be slightly different. In some cases, +especially for numerically unstable algorithms, the result can be very +different. + +The annotation :opt:`@fastmath` re-arranges floating point +expressions, e.g. changing the order of evaluation, or assuming that +certain special cases (inf, nan) cannot occur. In this case (and on +this particular computer), the main difference is that the expression +``1 / (2*dx)`` in the function ``deriv`` is hoisted out of the loop +(i.e. calculated outside the loop), as if one had written ``idx = 1 / +(2*dx)``. In the loop, the expression ``... / (2*dx)`` then becomes +``... * idx``, which is much faster to evaluate. Of course, both the +actual optimization that is applied by the compiler as well as the +resulting speedup depend very much on the hardware. You can examine +the change in generated code by using Julia's :obj:`code_native` +function. + + .. _man-code-warntype: :obj:`@code_warntype` From 8e1b7304e36b145ea19616077ebeb6b1b6596d22 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Mon, 12 Jan 2015 14:59:32 -0500 Subject: [PATCH 7/7] Describe `--math-mode` option in run-time help-text --- ui/repl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ui/repl.c b/ui/repl.c index d6d303ad5f6ab..aa72e7a6ae84e 100644 --- a/ui/repl.c +++ b/ui/repl.c @@ -73,6 +73,8 @@ static const char opts[] = " Count bytes allocated by each source line\n" " --check-bounds={yes|no} Emit bounds checks always or never (ignoring declarations)\n" " --inline={yes|no} Control whether inlining is permitted (even for functions declared as @inline)\n" + " --math-mode={ieee|user} Always use IEEE semantics for math (ignoring declarations),\n" + " or adhere to declarations in source code\n" " -O, --optimize Run time-intensive code optimizations\n" " --int-literals={32|64} Select integer literal size independent of platform\n" " --dump-bitcode={yes|no} Dump bitcode for the system image (used with --build)\n"