diff --git a/src/Random.cpp b/src/Random.cpp index 6f9e4bfa3e92..bb132a9ea536 100644 --- a/src/Random.cpp +++ b/src/Random.cpp @@ -82,6 +82,10 @@ Expr random_int(const vector &e) { rng32(Variable::make(UInt(32), name))); } } + // The low bytes of this have a poor period, so mix in the high bytes for + // two additional instructions. + result = result ^ (result >> 16); + return result; } @@ -101,7 +105,9 @@ class LowerRandom : public IRMutator { Expr visit(const Call *op) override { if (op->is_intrinsic(Call::random)) { vector args = op->args; - args.insert(args.end(), extra_args.begin(), extra_args.end()); + // Insert the free vars in reverse, so innermost vars typically end + // up last. + args.insert(args.end(), extra_args.rbegin(), extra_args.rend()); if (op->type == Float(32)) { return random_float(args); } else if (op->type == Int(32)) { @@ -121,7 +127,6 @@ class LowerRandom : public IRMutator { public: LowerRandom(const vector &free_vars, int tag) { - extra_args.emplace_back(tag); for (const VarOrRVar &v : free_vars) { if (v.is_rvar) { extra_args.push_back(v.rvar); @@ -129,6 +134,7 @@ class LowerRandom : public IRMutator { extra_args.push_back(v.var); } } + extra_args.emplace_back(tag); } }; diff --git a/src/Simplify_Mul.cpp b/src/Simplify_Mul.cpp index cfd73c072e19..54e07df14879 100644 --- a/src/Simplify_Mul.cpp +++ b/src/Simplify_Mul.cpp @@ -69,6 +69,11 @@ Expr Simplify::visit(const Mul *op, ExprInfo *bounds) { } if (rewrite(c0 * c1, fold(c0 * c1)) || + (!no_overflow(op->type) && // Intentionally-overflowing quadratics used in random number generation + (rewrite((x + c0) * (x + c1), x * (x + fold(c0 + c1)) + fold(c0 * c1)) || + rewrite((x * c0 + c1) * (x + c2), x * (x * c0 + fold(c1 + c0 * c2)) + fold(c1 * c2)) || + rewrite((x + c2) * (x * c0 + c1), x * (x * c0 + fold(c1 + c0 * c2)) + fold(c1 * c2)) || + rewrite((x * c0 + c1) * (x * c2 + c3), x * (x * fold(c0 * c2) + fold(c0 * c3 + c1 * c2)) + fold(c1 * c3)))) || rewrite((x + c0) * c1, x * c1 + fold(c0 * c1), !overflows(c0 * c1)) || rewrite((c0 - x) * c1, x * fold(-c1) + fold(c0 * c1), !overflows(c0 * c1)) || rewrite((0 - x) * y, 0 - x * y) || diff --git a/test/correctness/async_device_copy.cpp b/test/correctness/async_device_copy.cpp index 41492aed0127..490dc75b8dc1 100644 --- a/test/correctness/async_device_copy.cpp +++ b/test/correctness/async_device_copy.cpp @@ -9,9 +9,9 @@ Expr expensive_zero(Expr x, Expr y, Expr t, int n) { RDom r(0, n); Func a, b, c; Var z; - a(x, y, t, z) = random_int() % 1024; - b(x, y, t, z) = random_int() % 1024; - c(x, y, t, z) = random_int() % 1024; + a(x, y, t, z) = random_int() % 1024 + 5; + b(x, y, t, z) = random_int() % 1024 + 5; + c(x, y, t, z) = random_int() % 1024 + 5; return sum(select(pow(a(x, y, t, r), 3) + pow(b(x, y, t, r), 3) == pow(c(x, y, t, r), 3), 1, 0)); }