From e0df6878ae5525889859e8bc239d8541a6c2f590 Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Mon, 6 Dec 2021 12:34:44 -0800 Subject: [PATCH 1/4] decommissioning StackPrinter (#6470) --- src/runtime/d3d12compute.cpp | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/src/runtime/d3d12compute.cpp b/src/runtime/d3d12compute.cpp index b2dcfd855f74..03eaf44f3f0c 100644 --- a/src/runtime/d3d12compute.cpp +++ b/src/runtime/d3d12compute.cpp @@ -73,26 +73,6 @@ #define HALIDE_D3D12_COMMAND_LIST_TYPE D3D12_COMMAND_LIST_TYPE_DIRECT #endif -// A Printer that automatically reserves stack space for the printer buffer: -// (the managed printers in 'printer.h' rely on malloc) -template -class StackPrinter : public Printer { -public: - StackPrinter(void *ctx = nullptr) - : Printer(ctx, buffer) { - } - StackPrinter &operator()(void *ctx = nullptr) { - this->user_context = ctx; - return *this; - } - uint64_t capacity() const { - return length; - } - -private: - char buffer[length]; -}; - static void d3d12_debug_dump(error &err); #define d3d12_panic(...) \ From fb305fd73a2727fdf3682bade6a0c75ed1785524 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Tue, 7 Dec 2021 05:15:18 +0300 Subject: [PATCH 2/4] `apps/linear_algebra/benchmarks/macros.h`: don't forget SSE guard (#6471) This is breaking i386 build: https://buildd.debian.org/status/fetch.php?pkg=halide&arch=i386&ver=13.0.1-3&stamp=1638786518&raw=0 --- apps/linear_algebra/benchmarks/macros.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/linear_algebra/benchmarks/macros.h b/apps/linear_algebra/benchmarks/macros.h index 5f99c8c5e606..3c3e1dc33c8e 100644 --- a/apps/linear_algebra/benchmarks/macros.h +++ b/apps/linear_algebra/benchmarks/macros.h @@ -1,16 +1,16 @@ #include "halide_benchmark.h" #ifdef ENABLE_FTZ_DAZ -#if defined(__i386__) || defined(__x86_64__) +#if (defined(__i386__) || defined(__x86_64__)) && defined(__SSE__) #include #include -#endif // defined(__i386__) || defined(__x86_64__) +#endif // (defined(__i386__) || defined(__x86_64__)) && defined(__SSE__) #endif inline void set_math_flags() { #ifdef ENABLE_FTZ_DAZ -#if defined(__i386__) || defined(__x86_64__) +#if (defined(__i386__) || defined(__x86_64__)) && defined(__SSE__) // Flush denormals to zero (the FTZ flag). _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // Interpret denormal inputs as zero (the DAZ flag). From 799236949867b6a2be0d492137b36a0b67011622 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Tue, 7 Dec 2021 08:16:50 -0800 Subject: [PATCH 3/4] Add a fast integer divide that rounds to zero (#6455) * Add a version of fast_integer_divide that rounds towards zero * clang-format * Fix test condition * Clean up debugging code * Add explanatory comment to performance test * Pacify clang tidy --- src/CodeGen_Internal.cpp | 60 +- src/CodeGen_Internal.h | 2 +- src/CodeGen_LLVM.cpp | 6 + src/FastIntegerDivide.cpp | 100 +- src/FastIntegerDivide.h | 8 +- src/IntegerDivisionTable.cpp | 2350 ++++++++++++++++++++++----- src/IntegerDivisionTable.h | 6 + test/performance/const_division.cpp | 84 +- tools/find_inverse.cpp | 114 +- 9 files changed, 2246 insertions(+), 484 deletions(-) diff --git a/src/CodeGen_Internal.cpp b/src/CodeGen_Internal.cpp index fbe6cd09cfc2..45029999cf22 100644 --- a/src/CodeGen_Internal.cpp +++ b/src/CodeGen_Internal.cpp @@ -248,7 +248,7 @@ bool can_allocation_fit_on_stack(int64_t size) { return (size <= (int64_t)Runtime::Internal::Constants::maximum_stack_allocation_bytes); } -Expr lower_int_uint_div(const Expr &a, const Expr &b) { +Expr lower_int_uint_div(const Expr &a, const Expr &b, bool round_to_zero) { // Detect if it's a small int division internal_assert(a.type() == b.type()); const int64_t *const_int_divisor = as_const_int(b); @@ -261,7 +261,16 @@ Expr lower_int_uint_div(const Expr &a, const Expr &b) { int shift_amount; if (is_const_power_of_two_integer(b, &shift_amount) && (t.is_int() || t.is_uint())) { - return a >> make_const(UInt(a.type().bits()), shift_amount); + if (round_to_zero) { + Expr result = a; + // Normally a right-shift isn't right for division rounding to + // zero. It does the wrong thing for negative values. Add a fudge so + // that a right-shift becomes correct. + result += (result >> (t.bits() - 1)) & (b - 1); + return result >> shift_amount; + } else { + return a >> make_const(UInt(a.type().bits()), shift_amount); + } } else if (const_int_divisor && t.is_int() && (t.bits() == 8 || t.bits() == 16 || t.bits() == 32) && @@ -271,15 +280,30 @@ Expr lower_int_uint_div(const Expr &a, const Expr &b) { int64_t multiplier; int shift; if (t.bits() == 32) { - multiplier = IntegerDivision::table_s32[*const_int_divisor][2]; - shift = IntegerDivision::table_s32[*const_int_divisor][3]; + if (round_to_zero) { + multiplier = IntegerDivision::table_srz32[*const_int_divisor][2]; + shift = IntegerDivision::table_srz32[*const_int_divisor][3]; + } else { + multiplier = IntegerDivision::table_s32[*const_int_divisor][2]; + shift = IntegerDivision::table_s32[*const_int_divisor][3]; + } } else if (t.bits() == 16) { - multiplier = IntegerDivision::table_s16[*const_int_divisor][2]; - shift = IntegerDivision::table_s16[*const_int_divisor][3]; + if (round_to_zero) { + multiplier = IntegerDivision::table_srz16[*const_int_divisor][2]; + shift = IntegerDivision::table_srz16[*const_int_divisor][3]; + } else { + multiplier = IntegerDivision::table_s16[*const_int_divisor][2]; + shift = IntegerDivision::table_s16[*const_int_divisor][3]; + } } else { // 8 bit - multiplier = IntegerDivision::table_s8[*const_int_divisor][2]; - shift = IntegerDivision::table_s8[*const_int_divisor][3]; + if (round_to_zero) { + multiplier = IntegerDivision::table_srz8[*const_int_divisor][2]; + shift = IntegerDivision::table_srz8[*const_int_divisor][3]; + } else { + multiplier = IntegerDivision::table_s8[*const_int_divisor][2]; + shift = IntegerDivision::table_s8[*const_int_divisor][3]; + } } Expr num = a; @@ -287,17 +311,24 @@ Expr lower_int_uint_div(const Expr &a, const Expr &b) { Type num_as_uint_t = num.type().with_code(Type::UInt); Expr sign = cast(num_as_uint_t, num >> make_const(UInt(t.bits()), t.bits() - 1)); - // Flip the numerator bits if the mask is high. - num = cast(num_as_uint_t, num); - num = num ^ sign; + if (!round_to_zero) { + // Flip the numerator bits if the mask is high. + num = cast(num_as_uint_t, num); + num = num ^ sign; + } // Multiply and keep the high half of the // result, and then apply the shift. Expr mult = make_const(num.type(), multiplier); num = mul_shift_right(num, mult, shift + num.type().bits()); - // Maybe flip the bits back again. - num = cast(a.type(), num ^ sign); + if (round_to_zero) { + // Add one if the numerator was negative + num -= sign; + } else { + // Maybe flip the bits back again. + num = cast(a.type(), num ^ sign); + } return num; } else if (const_uint_divisor && @@ -352,6 +383,9 @@ Expr lower_int_uint_div(const Expr &a, const Expr &b) { } return val; + } else if (round_to_zero) { + // Return the input division unchanged. + return Call::make(a.type(), Call::div_round_to_zero, {a, b}, Call::PureIntrinsic); } else { return lower_euclidean_div(a, b); } diff --git a/src/CodeGen_Internal.h b/src/CodeGen_Internal.h index b73037a79aae..52f6e51ba2b9 100644 --- a/src/CodeGen_Internal.h +++ b/src/CodeGen_Internal.h @@ -95,7 +95,7 @@ std::pair long_div_mod_round_to_zero(const Expr &a, const Expr &b, * Can introduce mulhi_shr and sorted_avg intrinsics as well as those from the * lower_euclidean_ operation -- div_round_to_zero or mod_round_to_zero. */ ///@{ -Expr lower_int_uint_div(const Expr &a, const Expr &b); +Expr lower_int_uint_div(const Expr &a, const Expr &b, bool round_to_zero = false); Expr lower_int_uint_mod(const Expr &a, const Expr &b); ///@} diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp index 0d241ea02ea1..97b407638dbc 100644 --- a/src/CodeGen_LLVM.cpp +++ b/src/CodeGen_LLVM.cpp @@ -2670,6 +2670,12 @@ void CodeGen_LLVM::visit(const Call *op) { Let::make(b_name, op->args[1], Select::make(a_var < b_var, b_var - a_var, a_var - b_var)))); } else if (op->is_intrinsic(Call::div_round_to_zero)) { + // See if we can rewrite it to something faster (e.g. a shift) + Expr e = lower_int_uint_div(op->args[0], op->args[1], /** round to zero */ true); + if (!e.as()) { + codegen(e); + return; + } internal_assert(op->args.size() == 2); Value *a = codegen(op->args[0]); Value *b = codegen(op->args[1]); diff --git a/src/FastIntegerDivide.cpp b/src/FastIntegerDivide.cpp index 5012f96bd709..72da3d367546 100644 --- a/src/FastIntegerDivide.cpp +++ b/src/FastIntegerDivide.cpp @@ -48,6 +48,20 @@ Buffer integer_divide_table_s8() { return im; } +Buffer integer_divide_table_srz8() { + static auto im = []() { + Buffer im(256); + for (uint32_t i = 0; i < 256; i++) { + im(i) = table_runtime_srz8[i][2]; + if (i > 1) { + internal_assert(table_runtime_srz8[i][3] == shift_for_denominator(i)); + } + } + return im; + }(); + return im; +} + Buffer integer_divide_table_u16() { static auto im = []() { Buffer im(256); @@ -76,6 +90,20 @@ Buffer integer_divide_table_s16() { return im; } +Buffer integer_divide_table_srz16() { + static auto im = []() { + Buffer im(256); + for (uint32_t i = 0; i < 256; i++) { + im(i) = table_runtime_srz16[i][2]; + if (i > 1) { + internal_assert(table_runtime_srz16[i][3] == shift_for_denominator(i)); + } + } + return im; + }(); + return im; +} + Buffer integer_divide_table_u32() { static auto im = []() { Buffer im(256); @@ -104,9 +132,21 @@ Buffer integer_divide_table_s32() { return im; } -} // namespace +Buffer integer_divide_table_srz32() { + static auto im = []() { + Buffer im(256); + for (uint32_t i = 0; i < 256; i++) { + im(i) = table_runtime_srz32[i][2]; + if (i > 1) { + internal_assert(table_runtime_srz32[i][3] == shift_for_denominator(i)); + } + } + return im; + }(); + return im; +} -Expr fast_integer_divide(Expr numerator, Expr denominator) { +Expr fast_integer_divide_impl(Expr numerator, Expr denominator, bool round_to_zero) { if (is_const(denominator)) { // There's code elsewhere for this case. return numerator / cast(denominator); @@ -160,7 +200,7 @@ Expr fast_integer_divide(Expr numerator, Expr denominator) { // Do a final shift result = result >> cast(result.type(), shift); - } else { + } else if (!round_to_zero) { Expr mul, shift = shift_for_denominator(denominator); switch (t.bits()) { @@ -205,6 +245,46 @@ Expr fast_integer_divide(Expr numerator, Expr denominator) { // Maybe flip the bits again result = xsign ^ result; + } else { + // Signed round to zero + Expr mul, shift = shift_for_denominator(denominator); + switch (t.bits()) { + case 8: { + Buffer table = integer_divide_table_srz8(); + mul = table(denominator); + break; + } + case 16: { + Buffer table = integer_divide_table_srz16(); + mul = table(denominator); + break; + } + default: // 32 + { + Buffer table = integer_divide_table_srz32(); + mul = table(denominator); + break; + } + } + + // Extract sign bit + // Expr xsign = (t.bits() < 32) ? (numerator / (1 << (t.bits()-1))) : (numerator >> (t.bits()-1)); + Expr xsign = select(numerator > 0, cast(t, 0), cast(t, -1)); + + // Multiply-keep-high-half + result = (cast(wide, mul) * numerator); + if (t.bits() < 32) { + result = result / (1 << t.bits()); + } else { + result = result >> Internal::make_const(result.type(), t.bits()); + } + result = cast(t, result); + + // Do the final shift + result = result >> cast(result.type(), shift); + + // Add one if the numerator was negative + result -= xsign; } // The tables don't work for denominator == 1 @@ -215,9 +295,19 @@ Expr fast_integer_divide(Expr numerator, Expr denominator) { return result; } -Expr fast_integer_modulo(Expr numerator, Expr denominator) { +} // namespace + +Expr fast_integer_divide_round_to_zero(const Expr &numerator, const Expr &denominator) { + return fast_integer_divide_impl(numerator, denominator, /** round to zero **/ true); +} + +Expr fast_integer_divide(const Expr &numerator, const Expr &denominator) { + return fast_integer_divide_impl(numerator, denominator, /** round to zero **/ false); +} + +Expr fast_integer_modulo(const Expr &numerator, const Expr &denominator) { Expr ratio = fast_integer_divide(numerator, denominator); - return std::move(numerator) - ratio * std::move(denominator); + return numerator - ratio * denominator; } } // namespace Halide diff --git a/src/FastIntegerDivide.h b/src/FastIntegerDivide.h index 7a802ababa75..8e7a09f7382e 100644 --- a/src/FastIntegerDivide.h +++ b/src/FastIntegerDivide.h @@ -26,12 +26,16 @@ namespace Halide { * 256. I.e. it interprets the uint8 divisor as a number from 1 to 256 * inclusive. */ -Expr fast_integer_divide(Expr numerator, Expr denominator); +Expr fast_integer_divide(const Expr &numerator, const Expr &denominator); + +/** A variant of the above which rounds towards zero instead of rounding towards + * negative infinity. */ +Expr fast_integer_divide_round_to_zero(const Expr &numerator, const Expr &denominator); /** Use the fast integer division tables to implement a modulo * operation via the Euclidean identity: a%b = a - (a/b)*b */ -Expr fast_integer_modulo(Expr numerator, Expr denominator); +Expr fast_integer_modulo(const Expr &numerator, const Expr &denominator); } // namespace Halide diff --git a/src/IntegerDivisionTable.cpp b/src/IntegerDivisionTable.cpp index 1f5cd58a6d1e..fdff1fd47f63 100644 --- a/src/IntegerDivisionTable.cpp +++ b/src/IntegerDivisionTable.cpp @@ -532,6 +532,264 @@ const int64_t table_s8[256][4] = { {254, 0, 0, 7}, {255, 0, 0, 7}, }; +const int64_t table_srz8[256][4] = { + {256, 1, 129LL, 7}, + {1, 0, 0, 0}, + {2, 0, 0, 1}, + {3, 1, 171LL, 1}, + {4, 0, 0, 2}, + {5, 1, 205LL, 2}, + {6, 1, 171LL, 2}, + {7, 1, 147LL, 2}, + {8, 0, 0, 3}, + {9, 1, 228LL, 3}, + {10, 1, 205LL, 3}, + {11, 1, 187LL, 3}, + {12, 1, 171LL, 3}, + {13, 1, 158LL, 3}, + {14, 1, 147LL, 3}, + {15, 1, 137LL, 3}, + {16, 0, 0, 4}, + {17, 1, 241LL, 4}, + {18, 1, 228LL, 4}, + {19, 1, 216LL, 4}, + {20, 1, 205LL, 4}, + {21, 1, 196LL, 4}, + {22, 1, 187LL, 4}, + {23, 1, 179LL, 4}, + {24, 1, 171LL, 4}, + {25, 1, 164LL, 4}, + {26, 1, 158LL, 4}, + {27, 1, 152LL, 4}, + {28, 1, 147LL, 4}, + {29, 1, 142LL, 4}, + {30, 1, 137LL, 4}, + {31, 1, 133LL, 4}, + {32, 0, 0, 5}, + {33, 1, 249LL, 5}, + {34, 1, 241LL, 5}, + {35, 1, 235LL, 5}, + {36, 1, 228LL, 5}, + {37, 1, 222LL, 5}, + {38, 1, 216LL, 5}, + {39, 1, 211LL, 5}, + {40, 1, 205LL, 5}, + {41, 1, 200LL, 5}, + {42, 1, 196LL, 5}, + {43, 1, 191LL, 5}, + {44, 1, 187LL, 5}, + {45, 1, 183LL, 5}, + {46, 1, 179LL, 5}, + {47, 1, 175LL, 5}, + {48, 1, 171LL, 5}, + {49, 1, 168LL, 5}, + {50, 1, 164LL, 5}, + {51, 1, 161LL, 5}, + {52, 1, 158LL, 5}, + {53, 1, 155LL, 5}, + {54, 1, 152LL, 5}, + {55, 1, 149LL, 5}, + {56, 1, 147LL, 5}, + {57, 1, 144LL, 5}, + {58, 1, 142LL, 5}, + {59, 1, 139LL, 5}, + {60, 1, 137LL, 5}, + {61, 1, 135LL, 5}, + {62, 1, 133LL, 5}, + {63, 1, 131LL, 5}, + {64, 0, 0, 6}, + {65, 1, 253LL, 6}, + {66, 1, 249LL, 6}, + {67, 1, 245LL, 6}, + {68, 1, 241LL, 6}, + {69, 1, 238LL, 6}, + {70, 1, 235LL, 6}, + {71, 1, 231LL, 6}, + {72, 1, 228LL, 6}, + {73, 1, 225LL, 6}, + {74, 1, 222LL, 6}, + {75, 1, 219LL, 6}, + {76, 1, 216LL, 6}, + {77, 1, 213LL, 6}, + {78, 1, 211LL, 6}, + {79, 1, 208LL, 6}, + {80, 1, 205LL, 6}, + {81, 1, 203LL, 6}, + {82, 1, 200LL, 6}, + {83, 1, 198LL, 6}, + {84, 1, 196LL, 6}, + {85, 1, 193LL, 6}, + {86, 1, 191LL, 6}, + {87, 1, 189LL, 6}, + {88, 1, 187LL, 6}, + {89, 1, 185LL, 6}, + {90, 1, 183LL, 6}, + {91, 1, 181LL, 6}, + {92, 1, 179LL, 6}, + {93, 1, 177LL, 6}, + {94, 1, 175LL, 6}, + {95, 1, 173LL, 6}, + {96, 1, 171LL, 6}, + {97, 1, 169LL, 6}, + {98, 1, 168LL, 6}, + {99, 1, 166LL, 6}, + {100, 1, 164LL, 6}, + {101, 1, 163LL, 6}, + {102, 1, 161LL, 6}, + {103, 1, 160LL, 6}, + {104, 1, 158LL, 6}, + {105, 1, 157LL, 6}, + {106, 1, 155LL, 6}, + {107, 1, 154LL, 6}, + {108, 1, 152LL, 6}, + {109, 1, 151LL, 6}, + {110, 1, 149LL, 6}, + {111, 1, 148LL, 6}, + {112, 1, 147LL, 6}, + {113, 1, 145LL, 6}, + {114, 1, 144LL, 6}, + {115, 1, 143LL, 6}, + {116, 1, 142LL, 6}, + {117, 1, 141LL, 6}, + {118, 1, 139LL, 6}, + {119, 1, 138LL, 6}, + {120, 1, 137LL, 6}, + {121, 1, 136LL, 6}, + {122, 1, 135LL, 6}, + {123, 1, 134LL, 6}, + {124, 1, 133LL, 6}, + {125, 1, 132LL, 6}, + {126, 1, 131LL, 6}, + {127, 1, 130LL, 6}, + {128, 0, 0, 7}, + {129, 0, 0, 7}, + {130, 1, 253LL, 7}, + {131, 1, 251LL, 7}, + {132, 1, 249LL, 7}, + {133, 1, 247LL, 7}, + {134, 1, 245LL, 7}, + {135, 1, 243LL, 7}, + {136, 1, 241LL, 7}, + {137, 1, 240LL, 7}, + {138, 1, 238LL, 7}, + {139, 1, 236LL, 7}, + {140, 1, 235LL, 7}, + {141, 1, 233LL, 7}, + {142, 1, 231LL, 7}, + {143, 1, 230LL, 7}, + {144, 1, 228LL, 7}, + {145, 1, 226LL, 7}, + {146, 1, 225LL, 7}, + {147, 1, 223LL, 7}, + {148, 1, 222LL, 7}, + {149, 1, 220LL, 7}, + {150, 1, 219LL, 7}, + {151, 1, 218LL, 7}, + {152, 1, 216LL, 7}, + {153, 1, 215LL, 7}, + {154, 1, 213LL, 7}, + {155, 1, 212LL, 7}, + {156, 1, 211LL, 7}, + {157, 1, 209LL, 7}, + {158, 1, 208LL, 7}, + {159, 1, 207LL, 7}, + {160, 1, 205LL, 7}, + {161, 1, 204LL, 7}, + {162, 1, 203LL, 7}, + {163, 1, 202LL, 7}, + {164, 1, 200LL, 7}, + {165, 1, 199LL, 7}, + {166, 1, 198LL, 7}, + {167, 1, 197LL, 7}, + {168, 1, 196LL, 7}, + {169, 1, 194LL, 7}, + {170, 1, 193LL, 7}, + {171, 1, 192LL, 7}, + {172, 1, 191LL, 7}, + {173, 1, 190LL, 7}, + {174, 1, 189LL, 7}, + {175, 1, 188LL, 7}, + {176, 1, 187LL, 7}, + {177, 1, 186LL, 7}, + {178, 1, 185LL, 7}, + {179, 1, 184LL, 7}, + {180, 1, 183LL, 7}, + {181, 1, 182LL, 7}, + {182, 1, 181LL, 7}, + {183, 1, 180LL, 7}, + {184, 1, 179LL, 7}, + {185, 1, 178LL, 7}, + {186, 1, 177LL, 7}, + {187, 1, 176LL, 7}, + {188, 1, 175LL, 7}, + {189, 1, 174LL, 7}, + {190, 1, 173LL, 7}, + {191, 1, 172LL, 7}, + {192, 1, 171LL, 7}, + {193, 1, 170LL, 7}, + {194, 1, 169LL, 7}, + {195, 1, 169LL, 7}, + {196, 1, 168LL, 7}, + {197, 1, 167LL, 7}, + {198, 1, 166LL, 7}, + {199, 1, 165LL, 7}, + {200, 1, 164LL, 7}, + {201, 1, 164LL, 7}, + {202, 1, 163LL, 7}, + {203, 1, 162LL, 7}, + {204, 1, 161LL, 7}, + {205, 1, 160LL, 7}, + {206, 1, 160LL, 7}, + {207, 1, 159LL, 7}, + {208, 1, 158LL, 7}, + {209, 1, 157LL, 7}, + {210, 1, 157LL, 7}, + {211, 1, 156LL, 7}, + {212, 1, 155LL, 7}, + {213, 1, 154LL, 7}, + {214, 1, 154LL, 7}, + {215, 1, 153LL, 7}, + {216, 1, 152LL, 7}, + {217, 1, 152LL, 7}, + {218, 1, 151LL, 7}, + {219, 1, 150LL, 7}, + {220, 1, 149LL, 7}, + {221, 1, 149LL, 7}, + {222, 1, 148LL, 7}, + {223, 1, 147LL, 7}, + {224, 1, 147LL, 7}, + {225, 1, 146LL, 7}, + {226, 1, 145LL, 7}, + {227, 1, 145LL, 7}, + {228, 1, 144LL, 7}, + {229, 1, 144LL, 7}, + {230, 1, 143LL, 7}, + {231, 1, 142LL, 7}, + {232, 1, 142LL, 7}, + {233, 1, 141LL, 7}, + {234, 1, 141LL, 7}, + {235, 1, 140LL, 7}, + {236, 1, 139LL, 7}, + {237, 1, 139LL, 7}, + {238, 1, 138LL, 7}, + {239, 1, 138LL, 7}, + {240, 1, 137LL, 7}, + {241, 1, 136LL, 7}, + {242, 1, 136LL, 7}, + {243, 1, 135LL, 7}, + {244, 1, 135LL, 7}, + {245, 1, 134LL, 7}, + {246, 1, 134LL, 7}, + {247, 1, 133LL, 7}, + {248, 1, 133LL, 7}, + {249, 1, 132LL, 7}, + {250, 1, 132LL, 7}, + {251, 1, 131LL, 7}, + {252, 1, 131LL, 7}, + {253, 1, 130LL, 7}, + {254, 1, 130LL, 7}, + {255, 1, 129LL, 7}, +}; const int64_t table_u16[256][4] = { {256, 0, 0, 8}, {1, 0, 0, 0}, @@ -1048,421 +1306,937 @@ const int64_t table_s16[256][4] = { {254, 1, 33027LL, 7}, {255, 1, 32897LL, 7}, }; -const int64_t table_u32[256][4] = { - {256, 0, 0, 8}, +const int64_t table_srz16[256][4] = { + {256, 1, 32769LL, 7}, {1, 0, 0, 0}, {2, 0, 0, 1}, - {3, 1, 2863311531ULL, 1}, + {3, 1, 43691LL, 1}, {4, 0, 0, 2}, - {5, 1, 3435973837ULL, 2}, - {6, 1, 2863311531ULL, 2}, - {7, 3, 613566756ULL, 2}, + {5, 1, 52429LL, 2}, + {6, 1, 43691LL, 2}, + {7, 1, 37450LL, 2}, {8, 0, 0, 3}, - {9, 1, 954437177ULL, 1}, - {10, 1, 3435973837ULL, 3}, - {11, 1, 3123612579ULL, 3}, - {12, 1, 2863311531ULL, 3}, - {13, 1, 1321528399ULL, 2}, - {14, 3, 613566756ULL, 3}, - {15, 1, 2290649225ULL, 3}, + {9, 1, 58255LL, 3}, + {10, 1, 52429LL, 3}, + {11, 1, 47663LL, 3}, + {12, 1, 43691LL, 3}, + {13, 1, 40330LL, 3}, + {14, 1, 37450LL, 3}, + {15, 1, 34953LL, 3}, {16, 0, 0, 4}, - {17, 1, 4042322161ULL, 4}, - {18, 1, 954437177ULL, 2}, - {19, 3, 2938661834ULL, 4}, - {20, 1, 3435973837ULL, 4}, - {21, 3, 2249744774ULL, 4}, - {22, 1, 3123612579ULL, 4}, - {23, 1, 2987803337ULL, 4}, - {24, 1, 2863311531ULL, 4}, - {25, 1, 1374389535ULL, 3}, - {26, 1, 1321528399ULL, 3}, - {27, 3, 795364314ULL, 4}, - {28, 3, 613566756ULL, 4}, - {29, 1, 2369637129ULL, 4}, - {30, 1, 2290649225ULL, 4}, - {31, 3, 138547332ULL, 4}, + {17, 1, 61681LL, 4}, + {18, 1, 58255LL, 4}, + {19, 1, 55189LL, 4}, + {20, 1, 52429LL, 4}, + {21, 1, 49933LL, 4}, + {22, 1, 47663LL, 4}, + {23, 1, 45591LL, 4}, + {24, 1, 43691LL, 4}, + {25, 1, 41944LL, 4}, + {26, 1, 40330LL, 4}, + {27, 1, 38837LL, 4}, + {28, 1, 37450LL, 4}, + {29, 1, 36158LL, 4}, + {30, 1, 34953LL, 4}, + {31, 1, 33826LL, 4}, {32, 0, 0, 5}, - {33, 1, 1041204193ULL, 3}, - {34, 1, 4042322161ULL, 5}, - {35, 3, 3558687188ULL, 5}, - {36, 1, 954437177ULL, 3}, - {37, 3, 3134165324ULL, 5}, - {38, 3, 2938661834ULL, 5}, - {39, 3, 2753184164ULL, 5}, - {40, 1, 3435973837ULL, 5}, - {41, 1, 3352169597ULL, 5}, - {42, 3, 2249744774ULL, 5}, - {43, 1, 799063683ULL, 3}, - {44, 1, 3123612579ULL, 5}, - {45, 3, 1813430636ULL, 5}, - {46, 1, 2987803337ULL, 5}, - {47, 1, 2924233053ULL, 5}, - {48, 1, 2863311531ULL, 5}, - {49, 1, 1402438301ULL, 4}, - {50, 1, 1374389535ULL, 4}, - {51, 1, 2694881441ULL, 5}, - {52, 1, 1321528399ULL, 4}, - {53, 3, 891408306ULL, 5}, - {54, 3, 795364314ULL, 5}, - {55, 3, 702812830ULL, 5}, - {56, 3, 613566756ULL, 5}, - {57, 3, 527452124ULL, 5}, - {58, 1, 2369637129ULL, 5}, - {59, 1, 582368447ULL, 3}, - {60, 1, 2290649225ULL, 5}, - {61, 1, 1126548799ULL, 4}, - {62, 3, 138547332ULL, 5}, - {63, 3, 68174084ULL, 5}, + {33, 1, 63551LL, 5}, + {34, 1, 61681LL, 5}, + {35, 1, 59919LL, 5}, + {36, 1, 58255LL, 5}, + {37, 1, 56680LL, 5}, + {38, 1, 55189LL, 5}, + {39, 1, 53774LL, 5}, + {40, 1, 52429LL, 5}, + {41, 1, 51151LL, 5}, + {42, 1, 49933LL, 5}, + {43, 1, 48771LL, 5}, + {44, 1, 47663LL, 5}, + {45, 1, 46604LL, 5}, + {46, 1, 45591LL, 5}, + {47, 1, 44621LL, 5}, + {48, 1, 43691LL, 5}, + {49, 1, 42800LL, 5}, + {50, 1, 41944LL, 5}, + {51, 1, 41121LL, 5}, + {52, 1, 40330LL, 5}, + {53, 1, 39569LL, 5}, + {54, 1, 38837LL, 5}, + {55, 1, 38131LL, 5}, + {56, 1, 37450LL, 5}, + {57, 1, 36793LL, 5}, + {58, 1, 36158LL, 5}, + {59, 1, 35545LL, 5}, + {60, 1, 34953LL, 5}, + {61, 1, 34380LL, 5}, + {62, 1, 33826LL, 5}, + {63, 1, 33289LL, 5}, {64, 0, 0, 6}, - {65, 1, 4228890877ULL, 6}, - {66, 1, 1041204193ULL, 4}, - {67, 1, 128207979ULL, 1}, - {68, 1, 4042322161ULL, 6}, - {69, 1, 1991868891ULL, 5}, - {70, 3, 3558687188ULL, 6}, - {71, 1, 3871519817ULL, 6}, - {72, 1, 954437177ULL, 4}, - {73, 3, 3235934264ULL, 6}, - {74, 3, 3134165324ULL, 6}, - {75, 1, 458129845ULL, 3}, - {76, 3, 2938661834ULL, 6}, - {77, 1, 892460737ULL, 4}, - {78, 3, 2753184164ULL, 6}, - {79, 1, 3479467177ULL, 6}, - {80, 1, 3435973837ULL, 6}, - {81, 1, 3393554407ULL, 6}, - {82, 1, 3352169597ULL, 6}, - {83, 1, 827945503ULL, 4}, - {84, 3, 2249744774ULL, 6}, - {85, 1, 3233857729ULL, 6}, - {86, 1, 799063683ULL, 4}, - {87, 1, 789879043ULL, 4}, - {88, 1, 3123612579ULL, 6}, - {89, 1, 3088515809ULL, 6}, - {90, 3, 1813430636ULL, 6}, - {91, 3, 1746305384ULL, 6}, - {92, 1, 2987803337ULL, 6}, - {93, 1, 2955676419ULL, 6}, - {94, 1, 2924233053ULL, 6}, - {95, 3, 1491936008ULL, 6}, - {96, 1, 2863311531ULL, 6}, - {97, 3, 1372618414ULL, 6}, - {98, 1, 1402438301ULL, 5}, - {99, 1, 2776544515ULL, 6}, - {100, 1, 1374389535ULL, 5}, - {101, 3, 1148159574ULL, 6}, - {102, 1, 2694881441ULL, 6}, - {103, 3, 1042467790ULL, 6}, - {104, 1, 1321528399ULL, 5}, - {105, 3, 940802360ULL, 6}, - {106, 3, 891408306ULL, 6}, - {107, 3, 842937506ULL, 6}, - {108, 3, 795364314ULL, 6}, - {109, 3, 748664024ULL, 6}, - {110, 3, 702812830ULL, 6}, - {111, 3, 657787784ULL, 6}, - {112, 3, 613566756ULL, 6}, - {113, 3, 570128402ULL, 6}, - {114, 3, 527452124ULL, 6}, - {115, 3, 485518042ULL, 6}, - {116, 1, 2369637129ULL, 6}, - {117, 3, 403800344ULL, 6}, - {118, 1, 582368447ULL, 4}, - {119, 1, 1154949189ULL, 5}, - {120, 1, 2290649225ULL, 6}, - {121, 3, 248469182ULL, 6}, - {122, 1, 1126548799ULL, 5}, - {123, 3, 174592166ULL, 6}, - {124, 3, 138547332ULL, 6}, - {125, 1, 274877907ULL, 3}, - {126, 3, 68174084ULL, 6}, - {127, 3, 33818640ULL, 6}, + {65, 1, 64528LL, 6}, + {66, 1, 63551LL, 6}, + {67, 1, 62602LL, 6}, + {68, 1, 61681LL, 6}, + {69, 1, 60788LL, 6}, + {70, 1, 59919LL, 6}, + {71, 1, 59075LL, 6}, + {72, 1, 58255LL, 6}, + {73, 1, 57457LL, 6}, + {74, 1, 56680LL, 6}, + {75, 1, 55925LL, 6}, + {76, 1, 55189LL, 6}, + {77, 1, 54472LL, 6}, + {78, 1, 53774LL, 6}, + {79, 1, 53093LL, 6}, + {80, 1, 52429LL, 6}, + {81, 1, 51782LL, 6}, + {82, 1, 51151LL, 6}, + {83, 1, 50534LL, 6}, + {84, 1, 49933LL, 6}, + {85, 1, 49345LL, 6}, + {86, 1, 48771LL, 6}, + {87, 1, 48211LL, 6}, + {88, 1, 47663LL, 6}, + {89, 1, 47128LL, 6}, + {90, 1, 46604LL, 6}, + {91, 1, 46092LL, 6}, + {92, 1, 45591LL, 6}, + {93, 1, 45101LL, 6}, + {94, 1, 44621LL, 6}, + {95, 1, 44151LL, 6}, + {96, 1, 43691LL, 6}, + {97, 1, 43241LL, 6}, + {98, 1, 42800LL, 6}, + {99, 1, 42367LL, 6}, + {100, 1, 41944LL, 6}, + {101, 1, 41528LL, 6}, + {102, 1, 41121LL, 6}, + {103, 1, 40722LL, 6}, + {104, 1, 40330LL, 6}, + {105, 1, 39946LL, 6}, + {106, 1, 39569LL, 6}, + {107, 1, 39200LL, 6}, + {108, 1, 38837LL, 6}, + {109, 1, 38480LL, 6}, + {110, 1, 38131LL, 6}, + {111, 1, 37787LL, 6}, + {112, 1, 37450LL, 6}, + {113, 1, 37118LL, 6}, + {114, 1, 36793LL, 6}, + {115, 1, 36473LL, 6}, + {116, 1, 36158LL, 6}, + {117, 1, 35849LL, 6}, + {118, 1, 35545LL, 6}, + {119, 1, 35247LL, 6}, + {120, 1, 34953LL, 6}, + {121, 1, 34664LL, 6}, + {122, 1, 34380LL, 6}, + {123, 1, 34101LL, 6}, + {124, 1, 33826LL, 6}, + {125, 1, 33555LL, 6}, + {126, 1, 33289LL, 6}, + {127, 1, 33027LL, 6}, {128, 0, 0, 7}, - {129, 1, 266354561ULL, 3}, - {130, 1, 4228890877ULL, 7}, - {131, 1, 4196609267ULL, 7}, - {132, 1, 1041204193ULL, 5}, - {133, 1, 4133502361ULL, 7}, - {134, 1, 128207979ULL, 2}, - {135, 1, 4072265289ULL, 7}, - {136, 1, 4042322161ULL, 7}, - {137, 1, 125400505ULL, 2}, - {138, 1, 1991868891ULL, 6}, - {139, 1, 1977538899ULL, 6}, - {140, 3, 3558687188ULL, 7}, - {141, 1, 974744351ULL, 5}, - {142, 1, 3871519817ULL, 7}, - {143, 1, 3844446251ULL, 7}, - {144, 1, 954437177ULL, 5}, - {145, 1, 3791419407ULL, 7}, - {146, 3, 3235934264ULL, 7}, - {147, 1, 3739835469ULL, 7}, - {148, 3, 3134165324ULL, 7}, - {149, 1, 3689636335ULL, 7}, - {150, 1, 458129845ULL, 4}, - {151, 1, 910191745ULL, 5}, - {152, 3, 2938661834ULL, 7}, - {153, 1, 3593175255ULL, 7}, - {154, 1, 892460737ULL, 5}, - {155, 1, 3546811703ULL, 7}, - {156, 3, 2753184164ULL, 7}, - {157, 1, 875407347ULL, 5}, - {158, 1, 3479467177ULL, 7}, - {159, 3, 2620200174ULL, 7}, - {160, 1, 3435973837ULL, 7}, - {161, 1, 3414632385ULL, 7}, - {162, 1, 3393554407ULL, 7}, - {163, 1, 3372735055ULL, 7}, - {164, 1, 3352169597ULL, 7}, - {165, 1, 1665926709ULL, 6}, - {166, 1, 827945503ULL, 5}, - {167, 1, 1645975491ULL, 6}, - {168, 3, 2249744774ULL, 7}, - {169, 1, 1626496491ULL, 6}, - {170, 1, 3233857729ULL, 7}, - {171, 3, 2134925264ULL, 7}, - {172, 1, 799063683ULL, 5}, - {173, 3, 2060591246ULL, 7}, - {174, 1, 789879043ULL, 5}, - {175, 1, 1570730897ULL, 6}, - {176, 1, 3123612579ULL, 7}, - {177, 3, 1916962804ULL, 7}, - {178, 1, 3088515809ULL, 7}, - {179, 3, 1847555764ULL, 7}, - {180, 3, 1813430636ULL, 7}, - {181, 1, 3037324939ULL, 7}, - {182, 3, 1746305384ULL, 7}, - {183, 1, 3004130131ULL, 7}, - {184, 1, 2987803337ULL, 7}, - {185, 3, 1648338800ULL, 7}, - {186, 1, 2955676419ULL, 7}, - {187, 1, 2939870663ULL, 7}, - {188, 1, 2924233053ULL, 7}, - {189, 3, 1522554544ULL, 7}, - {190, 3, 1491936008ULL, 7}, - {191, 1, 2878302691ULL, 7}, - {192, 1, 2863311531ULL, 7}, - {193, 1, 356059465ULL, 4}, - {194, 3, 1372618414ULL, 7}, - {195, 3, 1343553872ULL, 7}, - {196, 1, 1402438301ULL, 6}, - {197, 3, 1286310002ULL, 7}, - {198, 1, 2776544515ULL, 7}, - {199, 1, 1381296015ULL, 6}, - {200, 1, 1374389535ULL, 6}, - {201, 1, 42735993ULL, 1}, - {202, 3, 1148159574ULL, 7}, - {203, 1, 2708156719ULL, 7}, - {204, 1, 2694881441ULL, 7}, - {205, 1, 1340867839ULL, 6}, - {206, 3, 1042467790ULL, 7}, - {207, 1, 663956297ULL, 5}, - {208, 1, 1321528399ULL, 6}, - {209, 1, 2630410593ULL, 7}, - {210, 3, 940802360ULL, 7}, - {211, 1, 2605477791ULL, 7}, - {212, 3, 891408306ULL, 7}, - {213, 1, 2581013211ULL, 7}, - {214, 3, 842937506ULL, 7}, - {215, 1, 1278501893ULL, 6}, - {216, 3, 795364314ULL, 7}, - {217, 3, 771906564ULL, 7}, - {218, 3, 748664024ULL, 7}, - {219, 3, 725633744ULL, 7}, - {220, 3, 702812830ULL, 7}, - {221, 3, 680198440ULL, 7}, - {222, 3, 657787784ULL, 7}, - {223, 3, 635578120ULL, 7}, - {224, 3, 613566756ULL, 7}, - {225, 1, 2443359173ULL, 7}, - {226, 3, 570128402ULL, 7}, - {227, 3, 548696262ULL, 7}, - {228, 3, 527452124ULL, 7}, - {229, 1, 1200340205ULL, 6}, - {230, 3, 485518042ULL, 7}, - {231, 3, 464823300ULL, 7}, - {232, 1, 2369637129ULL, 7}, - {233, 3, 423966728ULL, 7}, - {234, 3, 403800344ULL, 7}, - {235, 3, 383805588ULL, 7}, - {236, 1, 582368447ULL, 5}, - {237, 3, 344322272ULL, 7}, - {238, 1, 1154949189ULL, 6}, - {239, 1, 2300233531ULL, 7}, - {240, 1, 2290649225ULL, 7}, - {241, 1, 285143057ULL, 4}, - {242, 3, 248469182ULL, 7}, - {243, 1, 2262369605ULL, 7}, - {244, 1, 1126548799ULL, 6}, - {245, 3, 192835266ULL, 7}, - {246, 3, 174592166ULL, 7}, - {247, 3, 156496784ULL, 7}, - {248, 3, 138547332ULL, 7}, - {249, 3, 120742052ULL, 7}, - {250, 1, 274877907ULL, 4}, - {251, 1, 2190262207ULL, 7}, - {252, 3, 68174084ULL, 7}, - {253, 1, 2172947881ULL, 7}, - {254, 3, 33818640ULL, 7}, - {255, 1, 2155905153ULL, 7}, + {129, 1, 65028LL, 7}, + {130, 1, 64528LL, 7}, + {131, 1, 64036LL, 7}, + {132, 1, 63551LL, 7}, + {133, 1, 63073LL, 7}, + {134, 1, 62602LL, 7}, + {135, 1, 62138LL, 7}, + {136, 1, 61681LL, 7}, + {137, 1, 61231LL, 7}, + {138, 1, 60788LL, 7}, + {139, 1, 60350LL, 7}, + {140, 1, 59919LL, 7}, + {141, 1, 59494LL, 7}, + {142, 1, 59075LL, 7}, + {143, 1, 58662LL, 7}, + {144, 1, 58255LL, 7}, + {145, 1, 57853LL, 7}, + {146, 1, 57457LL, 7}, + {147, 1, 57066LL, 7}, + {148, 1, 56680LL, 7}, + {149, 1, 56300LL, 7}, + {150, 1, 55925LL, 7}, + {151, 1, 55554LL, 7}, + {152, 1, 55189LL, 7}, + {153, 1, 54828LL, 7}, + {154, 1, 54472LL, 7}, + {155, 1, 54121LL, 7}, + {156, 1, 53774LL, 7}, + {157, 1, 53431LL, 7}, + {158, 1, 53093LL, 7}, + {159, 1, 52759LL, 7}, + {160, 1, 52429LL, 7}, + {161, 1, 52104LL, 7}, + {162, 1, 51782LL, 7}, + {163, 1, 51464LL, 7}, + {164, 1, 51151LL, 7}, + {165, 1, 50841LL, 7}, + {166, 1, 50534LL, 7}, + {167, 1, 50232LL, 7}, + {168, 1, 49933LL, 7}, + {169, 1, 49637LL, 7}, + {170, 1, 49345LL, 7}, + {171, 1, 49057LL, 7}, + {172, 1, 48771LL, 7}, + {173, 1, 48490LL, 7}, + {174, 1, 48211LL, 7}, + {175, 1, 47935LL, 7}, + {176, 1, 47663LL, 7}, + {177, 1, 47394LL, 7}, + {178, 1, 47128LL, 7}, + {179, 1, 46864LL, 7}, + {180, 1, 46604LL, 7}, + {181, 1, 46346LL, 7}, + {182, 1, 46092LL, 7}, + {183, 1, 45840LL, 7}, + {184, 1, 45591LL, 7}, + {185, 1, 45344LL, 7}, + {186, 1, 45101LL, 7}, + {187, 1, 44859LL, 7}, + {188, 1, 44621LL, 7}, + {189, 1, 44385LL, 7}, + {190, 1, 44151LL, 7}, + {191, 1, 43920LL, 7}, + {192, 1, 43691LL, 7}, + {193, 1, 43465LL, 7}, + {194, 1, 43241LL, 7}, + {195, 1, 43019LL, 7}, + {196, 1, 42800LL, 7}, + {197, 1, 42582LL, 7}, + {198, 1, 42367LL, 7}, + {199, 1, 42154LL, 7}, + {200, 1, 41944LL, 7}, + {201, 1, 41735LL, 7}, + {202, 1, 41528LL, 7}, + {203, 1, 41324LL, 7}, + {204, 1, 41121LL, 7}, + {205, 1, 40921LL, 7}, + {206, 1, 40722LL, 7}, + {207, 1, 40525LL, 7}, + {208, 1, 40330LL, 7}, + {209, 1, 40137LL, 7}, + {210, 1, 39946LL, 7}, + {211, 1, 39757LL, 7}, + {212, 1, 39569LL, 7}, + {213, 1, 39384LL, 7}, + {214, 1, 39200LL, 7}, + {215, 1, 39017LL, 7}, + {216, 1, 38837LL, 7}, + {217, 1, 38658LL, 7}, + {218, 1, 38480LL, 7}, + {219, 1, 38305LL, 7}, + {220, 1, 38131LL, 7}, + {221, 1, 37958LL, 7}, + {222, 1, 37787LL, 7}, + {223, 1, 37618LL, 7}, + {224, 1, 37450LL, 7}, + {225, 1, 37283LL, 7}, + {226, 1, 37118LL, 7}, + {227, 1, 36955LL, 7}, + {228, 1, 36793LL, 7}, + {229, 1, 36632LL, 7}, + {230, 1, 36473LL, 7}, + {231, 1, 36315LL, 7}, + {232, 1, 36158LL, 7}, + {233, 1, 36003LL, 7}, + {234, 1, 35849LL, 7}, + {235, 1, 35697LL, 7}, + {236, 1, 35545LL, 7}, + {237, 1, 35395LL, 7}, + {238, 1, 35247LL, 7}, + {239, 1, 35099LL, 7}, + {240, 1, 34953LL, 7}, + {241, 1, 34808LL, 7}, + {242, 1, 34664LL, 7}, + {243, 1, 34522LL, 7}, + {244, 1, 34380LL, 7}, + {245, 1, 34240LL, 7}, + {246, 1, 34101LL, 7}, + {247, 1, 33962LL, 7}, + {248, 1, 33826LL, 7}, + {249, 1, 33690LL, 7}, + {250, 1, 33555LL, 7}, + {251, 1, 33421LL, 7}, + {252, 1, 33289LL, 7}, + {253, 1, 33157LL, 7}, + {254, 1, 33027LL, 7}, + {255, 1, 32897LL, 7}, }; -const int64_t table_s32[256][4] = { - {256, 1, 2147483649LL, 7}, +const int64_t table_u32[256][4] = { + {256, 0, 0, 8}, {1, 0, 0, 0}, {2, 0, 0, 1}, - {3, 1, 2863311531LL, 1}, + {3, 1, 2863311531ULL, 1}, {4, 0, 0, 2}, - {5, 1, 3435973837LL, 2}, - {6, 1, 2863311531LL, 2}, - {7, 1, 2454267027LL, 2}, + {5, 1, 3435973837ULL, 2}, + {6, 1, 2863311531ULL, 2}, + {7, 3, 613566756ULL, 2}, {8, 0, 0, 3}, - {9, 1, 3817748708LL, 3}, - {10, 1, 3435973837LL, 3}, - {11, 1, 3123612579LL, 3}, - {12, 1, 2863311531LL, 3}, - {13, 1, 2643056798LL, 3}, - {14, 1, 2454267027LL, 3}, - {15, 1, 2290649225LL, 3}, + {9, 1, 954437177ULL, 1}, + {10, 1, 3435973837ULL, 3}, + {11, 1, 3123612579ULL, 3}, + {12, 1, 2863311531ULL, 3}, + {13, 1, 1321528399ULL, 2}, + {14, 3, 613566756ULL, 3}, + {15, 1, 2290649225ULL, 3}, {16, 0, 0, 4}, - {17, 1, 4042322161LL, 4}, - {18, 1, 3817748708LL, 4}, - {19, 1, 3616814566LL, 4}, - {20, 1, 3435973837LL, 4}, - {21, 1, 3272356036LL, 4}, - {22, 1, 3123612579LL, 4}, - {23, 1, 2987803337LL, 4}, - {24, 1, 2863311531LL, 4}, - {25, 1, 2748779070LL, 4}, - {26, 1, 2643056798LL, 4}, - {27, 1, 2545165806LL, 4}, - {28, 1, 2454267027LL, 4}, - {29, 1, 2369637129LL, 4}, - {30, 1, 2290649225LL, 4}, - {31, 1, 2216757315LL, 4}, + {17, 1, 4042322161ULL, 4}, + {18, 1, 954437177ULL, 2}, + {19, 3, 2938661834ULL, 4}, + {20, 1, 3435973837ULL, 4}, + {21, 3, 2249744774ULL, 4}, + {22, 1, 3123612579ULL, 4}, + {23, 1, 2987803337ULL, 4}, + {24, 1, 2863311531ULL, 4}, + {25, 1, 1374389535ULL, 3}, + {26, 1, 1321528399ULL, 3}, + {27, 3, 795364314ULL, 4}, + {28, 3, 613566756ULL, 4}, + {29, 1, 2369637129ULL, 4}, + {30, 1, 2290649225ULL, 4}, + {31, 3, 138547332ULL, 4}, {32, 0, 0, 5}, - {33, 1, 4164816772LL, 5}, - {34, 1, 4042322161LL, 5}, - {35, 1, 3926827243LL, 5}, - {36, 1, 3817748708LL, 5}, - {37, 1, 3714566311LL, 5}, - {38, 1, 3616814566LL, 5}, - {39, 1, 3524075731LL, 5}, - {40, 1, 3435973837LL, 5}, - {41, 1, 3352169597LL, 5}, - {42, 1, 3272356036LL, 5}, - {43, 1, 3196254732LL, 5}, - {44, 1, 3123612579LL, 5}, - {45, 1, 3054198967LL, 5}, - {46, 1, 2987803337LL, 5}, - {47, 1, 2924233053LL, 5}, - {48, 1, 2863311531LL, 5}, - {49, 1, 2804876602LL, 5}, - {50, 1, 2748779070LL, 5}, - {51, 1, 2694881441LL, 5}, - {52, 1, 2643056798LL, 5}, - {53, 1, 2593187802LL, 5}, - {54, 1, 2545165806LL, 5}, - {55, 1, 2498890064LL, 5}, - {56, 1, 2454267027LL, 5}, - {57, 1, 2411209711LL, 5}, - {58, 1, 2369637129LL, 5}, - {59, 1, 2329473788LL, 5}, - {60, 1, 2290649225LL, 5}, - {61, 1, 2253097598LL, 5}, - {62, 1, 2216757315LL, 5}, - {63, 1, 2181570691LL, 5}, + {33, 1, 1041204193ULL, 3}, + {34, 1, 4042322161ULL, 5}, + {35, 3, 3558687188ULL, 5}, + {36, 1, 954437177ULL, 3}, + {37, 3, 3134165324ULL, 5}, + {38, 3, 2938661834ULL, 5}, + {39, 3, 2753184164ULL, 5}, + {40, 1, 3435973837ULL, 5}, + {41, 1, 3352169597ULL, 5}, + {42, 3, 2249744774ULL, 5}, + {43, 1, 799063683ULL, 3}, + {44, 1, 3123612579ULL, 5}, + {45, 3, 1813430636ULL, 5}, + {46, 1, 2987803337ULL, 5}, + {47, 1, 2924233053ULL, 5}, + {48, 1, 2863311531ULL, 5}, + {49, 1, 1402438301ULL, 4}, + {50, 1, 1374389535ULL, 4}, + {51, 1, 2694881441ULL, 5}, + {52, 1, 1321528399ULL, 4}, + {53, 3, 891408306ULL, 5}, + {54, 3, 795364314ULL, 5}, + {55, 3, 702812830ULL, 5}, + {56, 3, 613566756ULL, 5}, + {57, 3, 527452124ULL, 5}, + {58, 1, 2369637129ULL, 5}, + {59, 1, 582368447ULL, 3}, + {60, 1, 2290649225ULL, 5}, + {61, 1, 1126548799ULL, 4}, + {62, 3, 138547332ULL, 5}, + {63, 3, 68174084ULL, 5}, {64, 0, 0, 6}, - {65, 1, 4228890877LL, 6}, - {66, 1, 4164816772LL, 6}, - {67, 1, 4102655328LL, 6}, - {68, 1, 4042322161LL, 6}, - {69, 1, 3983737782LL, 6}, - {70, 1, 3926827243LL, 6}, - {71, 1, 3871519817LL, 6}, - {72, 1, 3817748708LL, 6}, - {73, 1, 3765450781LL, 6}, - {74, 1, 3714566311LL, 6}, - {75, 1, 3665038760LL, 6}, - {76, 1, 3616814566LL, 6}, - {77, 1, 3569842948LL, 6}, - {78, 1, 3524075731LL, 6}, - {79, 1, 3479467177LL, 6}, - {80, 1, 3435973837LL, 6}, - {81, 1, 3393554407LL, 6}, - {82, 1, 3352169597LL, 6}, - {83, 1, 3311782012LL, 6}, - {84, 1, 3272356036LL, 6}, - {85, 1, 3233857729LL, 6}, - {86, 1, 3196254732LL, 6}, - {87, 1, 3159516172LL, 6}, - {88, 1, 3123612579LL, 6}, - {89, 1, 3088515809LL, 6}, - {90, 1, 3054198967LL, 6}, - {91, 1, 3020636341LL, 6}, - {92, 1, 2987803337LL, 6}, - {93, 1, 2955676419LL, 6}, - {94, 1, 2924233053LL, 6}, - {95, 1, 2893451653LL, 6}, - {96, 1, 2863311531LL, 6}, - {97, 1, 2833792856LL, 6}, - {98, 1, 2804876602LL, 6}, - {99, 1, 2776544515LL, 6}, - {100, 1, 2748779070LL, 6}, - {101, 1, 2721563436LL, 6}, - {102, 1, 2694881441LL, 6}, - {103, 1, 2668717544LL, 6}, - {104, 1, 2643056798LL, 6}, - {105, 1, 2617884829LL, 6}, - {106, 1, 2593187802LL, 6}, - {107, 1, 2568952402LL, 6}, - {108, 1, 2545165806LL, 6}, - {109, 1, 2521815661LL, 6}, - {110, 1, 2498890064LL, 6}, - {111, 1, 2476377541LL, 6}, - {112, 1, 2454267027LL, 6}, - {113, 1, 2432547850LL, 6}, - {114, 1, 2411209711LL, 6}, - {115, 1, 2390242670LL, 6}, - {116, 1, 2369637129LL, 6}, - {117, 1, 2349383821LL, 6}, - {118, 1, 2329473788LL, 6}, - {119, 1, 2309898378LL, 6}, - {120, 1, 2290649225LL, 6}, - {121, 1, 2271718240LL, 6}, - {122, 1, 2253097598LL, 6}, - {123, 1, 2234779732LL, 6}, - {124, 1, 2216757315LL, 6}, - {125, 1, 2199023256LL, 6}, - {126, 1, 2181570691LL, 6}, - {127, 1, 2164392969LL, 6}, + {65, 1, 4228890877ULL, 6}, + {66, 1, 1041204193ULL, 4}, + {67, 1, 128207979ULL, 1}, + {68, 1, 4042322161ULL, 6}, + {69, 1, 1991868891ULL, 5}, + {70, 3, 3558687188ULL, 6}, + {71, 1, 3871519817ULL, 6}, + {72, 1, 954437177ULL, 4}, + {73, 3, 3235934264ULL, 6}, + {74, 3, 3134165324ULL, 6}, + {75, 1, 458129845ULL, 3}, + {76, 3, 2938661834ULL, 6}, + {77, 1, 892460737ULL, 4}, + {78, 3, 2753184164ULL, 6}, + {79, 1, 3479467177ULL, 6}, + {80, 1, 3435973837ULL, 6}, + {81, 1, 3393554407ULL, 6}, + {82, 1, 3352169597ULL, 6}, + {83, 1, 827945503ULL, 4}, + {84, 3, 2249744774ULL, 6}, + {85, 1, 3233857729ULL, 6}, + {86, 1, 799063683ULL, 4}, + {87, 1, 789879043ULL, 4}, + {88, 1, 3123612579ULL, 6}, + {89, 1, 3088515809ULL, 6}, + {90, 3, 1813430636ULL, 6}, + {91, 3, 1746305384ULL, 6}, + {92, 1, 2987803337ULL, 6}, + {93, 1, 2955676419ULL, 6}, + {94, 1, 2924233053ULL, 6}, + {95, 3, 1491936008ULL, 6}, + {96, 1, 2863311531ULL, 6}, + {97, 3, 1372618414ULL, 6}, + {98, 1, 1402438301ULL, 5}, + {99, 1, 2776544515ULL, 6}, + {100, 1, 1374389535ULL, 5}, + {101, 3, 1148159574ULL, 6}, + {102, 1, 2694881441ULL, 6}, + {103, 3, 1042467790ULL, 6}, + {104, 1, 1321528399ULL, 5}, + {105, 3, 940802360ULL, 6}, + {106, 3, 891408306ULL, 6}, + {107, 3, 842937506ULL, 6}, + {108, 3, 795364314ULL, 6}, + {109, 3, 748664024ULL, 6}, + {110, 3, 702812830ULL, 6}, + {111, 3, 657787784ULL, 6}, + {112, 3, 613566756ULL, 6}, + {113, 3, 570128402ULL, 6}, + {114, 3, 527452124ULL, 6}, + {115, 3, 485518042ULL, 6}, + {116, 1, 2369637129ULL, 6}, + {117, 3, 403800344ULL, 6}, + {118, 1, 582368447ULL, 4}, + {119, 1, 1154949189ULL, 5}, + {120, 1, 2290649225ULL, 6}, + {121, 3, 248469182ULL, 6}, + {122, 1, 1126548799ULL, 5}, + {123, 3, 174592166ULL, 6}, + {124, 3, 138547332ULL, 6}, + {125, 1, 274877907ULL, 3}, + {126, 3, 68174084ULL, 6}, + {127, 3, 33818640ULL, 6}, {128, 0, 0, 7}, - {129, 1, 4261672976LL, 7}, - {130, 1, 4228890877LL, 7}, - {131, 1, 4196609267LL, 7}, - {132, 1, 4164816772LL, 7}, - {133, 1, 4133502361LL, 7}, - {134, 1, 4102655328LL, 7}, - {135, 1, 4072265289LL, 7}, - {136, 1, 4042322161LL, 7}, - {137, 1, 4012816160LL, 7}, - {138, 1, 3983737782LL, 7}, - {139, 1, 3955077798LL, 7}, - {140, 1, 3926827243LL, 7}, - {141, 1, 3898977404LL, 7}, - {142, 1, 3871519817LL, 7}, - {143, 1, 3844446251LL, 7}, - {144, 1, 3817748708LL, 7}, - {145, 1, 3791419407LL, 7}, - {146, 1, 3765450781LL, 7}, - {147, 1, 3739835469LL, 7}, - {148, 1, 3714566311LL, 7}, - {149, 1, 3689636335LL, 7}, - {150, 1, 3665038760LL, 7}, - {151, 1, 3640766980LL, 7}, - {152, 1, 3616814566LL, 7}, - {153, 1, 3593175255LL, 7}, - {154, 1, 3569842948LL, 7}, - {155, 1, 3546811703LL, 7}, + {129, 1, 266354561ULL, 3}, + {130, 1, 4228890877ULL, 7}, + {131, 1, 4196609267ULL, 7}, + {132, 1, 1041204193ULL, 5}, + {133, 1, 4133502361ULL, 7}, + {134, 1, 128207979ULL, 2}, + {135, 1, 4072265289ULL, 7}, + {136, 1, 4042322161ULL, 7}, + {137, 1, 125400505ULL, 2}, + {138, 1, 1991868891ULL, 6}, + {139, 1, 1977538899ULL, 6}, + {140, 3, 3558687188ULL, 7}, + {141, 1, 974744351ULL, 5}, + {142, 1, 3871519817ULL, 7}, + {143, 1, 3844446251ULL, 7}, + {144, 1, 954437177ULL, 5}, + {145, 1, 3791419407ULL, 7}, + {146, 3, 3235934264ULL, 7}, + {147, 1, 3739835469ULL, 7}, + {148, 3, 3134165324ULL, 7}, + {149, 1, 3689636335ULL, 7}, + {150, 1, 458129845ULL, 4}, + {151, 1, 910191745ULL, 5}, + {152, 3, 2938661834ULL, 7}, + {153, 1, 3593175255ULL, 7}, + {154, 1, 892460737ULL, 5}, + {155, 1, 3546811703ULL, 7}, + {156, 3, 2753184164ULL, 7}, + {157, 1, 875407347ULL, 5}, + {158, 1, 3479467177ULL, 7}, + {159, 3, 2620200174ULL, 7}, + {160, 1, 3435973837ULL, 7}, + {161, 1, 3414632385ULL, 7}, + {162, 1, 3393554407ULL, 7}, + {163, 1, 3372735055ULL, 7}, + {164, 1, 3352169597ULL, 7}, + {165, 1, 1665926709ULL, 6}, + {166, 1, 827945503ULL, 5}, + {167, 1, 1645975491ULL, 6}, + {168, 3, 2249744774ULL, 7}, + {169, 1, 1626496491ULL, 6}, + {170, 1, 3233857729ULL, 7}, + {171, 3, 2134925264ULL, 7}, + {172, 1, 799063683ULL, 5}, + {173, 3, 2060591246ULL, 7}, + {174, 1, 789879043ULL, 5}, + {175, 1, 1570730897ULL, 6}, + {176, 1, 3123612579ULL, 7}, + {177, 3, 1916962804ULL, 7}, + {178, 1, 3088515809ULL, 7}, + {179, 3, 1847555764ULL, 7}, + {180, 3, 1813430636ULL, 7}, + {181, 1, 3037324939ULL, 7}, + {182, 3, 1746305384ULL, 7}, + {183, 1, 3004130131ULL, 7}, + {184, 1, 2987803337ULL, 7}, + {185, 3, 1648338800ULL, 7}, + {186, 1, 2955676419ULL, 7}, + {187, 1, 2939870663ULL, 7}, + {188, 1, 2924233053ULL, 7}, + {189, 3, 1522554544ULL, 7}, + {190, 3, 1491936008ULL, 7}, + {191, 1, 2878302691ULL, 7}, + {192, 1, 2863311531ULL, 7}, + {193, 1, 356059465ULL, 4}, + {194, 3, 1372618414ULL, 7}, + {195, 3, 1343553872ULL, 7}, + {196, 1, 1402438301ULL, 6}, + {197, 3, 1286310002ULL, 7}, + {198, 1, 2776544515ULL, 7}, + {199, 1, 1381296015ULL, 6}, + {200, 1, 1374389535ULL, 6}, + {201, 1, 42735993ULL, 1}, + {202, 3, 1148159574ULL, 7}, + {203, 1, 2708156719ULL, 7}, + {204, 1, 2694881441ULL, 7}, + {205, 1, 1340867839ULL, 6}, + {206, 3, 1042467790ULL, 7}, + {207, 1, 663956297ULL, 5}, + {208, 1, 1321528399ULL, 6}, + {209, 1, 2630410593ULL, 7}, + {210, 3, 940802360ULL, 7}, + {211, 1, 2605477791ULL, 7}, + {212, 3, 891408306ULL, 7}, + {213, 1, 2581013211ULL, 7}, + {214, 3, 842937506ULL, 7}, + {215, 1, 1278501893ULL, 6}, + {216, 3, 795364314ULL, 7}, + {217, 3, 771906564ULL, 7}, + {218, 3, 748664024ULL, 7}, + {219, 3, 725633744ULL, 7}, + {220, 3, 702812830ULL, 7}, + {221, 3, 680198440ULL, 7}, + {222, 3, 657787784ULL, 7}, + {223, 3, 635578120ULL, 7}, + {224, 3, 613566756ULL, 7}, + {225, 1, 2443359173ULL, 7}, + {226, 3, 570128402ULL, 7}, + {227, 3, 548696262ULL, 7}, + {228, 3, 527452124ULL, 7}, + {229, 1, 1200340205ULL, 6}, + {230, 3, 485518042ULL, 7}, + {231, 3, 464823300ULL, 7}, + {232, 1, 2369637129ULL, 7}, + {233, 3, 423966728ULL, 7}, + {234, 3, 403800344ULL, 7}, + {235, 3, 383805588ULL, 7}, + {236, 1, 582368447ULL, 5}, + {237, 3, 344322272ULL, 7}, + {238, 1, 1154949189ULL, 6}, + {239, 1, 2300233531ULL, 7}, + {240, 1, 2290649225ULL, 7}, + {241, 1, 285143057ULL, 4}, + {242, 3, 248469182ULL, 7}, + {243, 1, 2262369605ULL, 7}, + {244, 1, 1126548799ULL, 6}, + {245, 3, 192835266ULL, 7}, + {246, 3, 174592166ULL, 7}, + {247, 3, 156496784ULL, 7}, + {248, 3, 138547332ULL, 7}, + {249, 3, 120742052ULL, 7}, + {250, 1, 274877907ULL, 4}, + {251, 1, 2190262207ULL, 7}, + {252, 3, 68174084ULL, 7}, + {253, 1, 2172947881ULL, 7}, + {254, 3, 33818640ULL, 7}, + {255, 1, 2155905153ULL, 7}, +}; +const int64_t table_s32[256][4] = { + {256, 1, 2147483649LL, 7}, + {1, 0, 0, 0}, + {2, 0, 0, 1}, + {3, 1, 2863311531LL, 1}, + {4, 0, 0, 2}, + {5, 1, 3435973837LL, 2}, + {6, 1, 2863311531LL, 2}, + {7, 1, 2454267027LL, 2}, + {8, 0, 0, 3}, + {9, 1, 3817748708LL, 3}, + {10, 1, 3435973837LL, 3}, + {11, 1, 3123612579LL, 3}, + {12, 1, 2863311531LL, 3}, + {13, 1, 2643056798LL, 3}, + {14, 1, 2454267027LL, 3}, + {15, 1, 2290649225LL, 3}, + {16, 0, 0, 4}, + {17, 1, 4042322161LL, 4}, + {18, 1, 3817748708LL, 4}, + {19, 1, 3616814566LL, 4}, + {20, 1, 3435973837LL, 4}, + {21, 1, 3272356036LL, 4}, + {22, 1, 3123612579LL, 4}, + {23, 1, 2987803337LL, 4}, + {24, 1, 2863311531LL, 4}, + {25, 1, 2748779070LL, 4}, + {26, 1, 2643056798LL, 4}, + {27, 1, 2545165806LL, 4}, + {28, 1, 2454267027LL, 4}, + {29, 1, 2369637129LL, 4}, + {30, 1, 2290649225LL, 4}, + {31, 1, 2216757315LL, 4}, + {32, 0, 0, 5}, + {33, 1, 4164816772LL, 5}, + {34, 1, 4042322161LL, 5}, + {35, 1, 3926827243LL, 5}, + {36, 1, 3817748708LL, 5}, + {37, 1, 3714566311LL, 5}, + {38, 1, 3616814566LL, 5}, + {39, 1, 3524075731LL, 5}, + {40, 1, 3435973837LL, 5}, + {41, 1, 3352169597LL, 5}, + {42, 1, 3272356036LL, 5}, + {43, 1, 3196254732LL, 5}, + {44, 1, 3123612579LL, 5}, + {45, 1, 3054198967LL, 5}, + {46, 1, 2987803337LL, 5}, + {47, 1, 2924233053LL, 5}, + {48, 1, 2863311531LL, 5}, + {49, 1, 2804876602LL, 5}, + {50, 1, 2748779070LL, 5}, + {51, 1, 2694881441LL, 5}, + {52, 1, 2643056798LL, 5}, + {53, 1, 2593187802LL, 5}, + {54, 1, 2545165806LL, 5}, + {55, 1, 2498890064LL, 5}, + {56, 1, 2454267027LL, 5}, + {57, 1, 2411209711LL, 5}, + {58, 1, 2369637129LL, 5}, + {59, 1, 2329473788LL, 5}, + {60, 1, 2290649225LL, 5}, + {61, 1, 2253097598LL, 5}, + {62, 1, 2216757315LL, 5}, + {63, 1, 2181570691LL, 5}, + {64, 0, 0, 6}, + {65, 1, 4228890877LL, 6}, + {66, 1, 4164816772LL, 6}, + {67, 1, 4102655328LL, 6}, + {68, 1, 4042322161LL, 6}, + {69, 1, 3983737782LL, 6}, + {70, 1, 3926827243LL, 6}, + {71, 1, 3871519817LL, 6}, + {72, 1, 3817748708LL, 6}, + {73, 1, 3765450781LL, 6}, + {74, 1, 3714566311LL, 6}, + {75, 1, 3665038760LL, 6}, + {76, 1, 3616814566LL, 6}, + {77, 1, 3569842948LL, 6}, + {78, 1, 3524075731LL, 6}, + {79, 1, 3479467177LL, 6}, + {80, 1, 3435973837LL, 6}, + {81, 1, 3393554407LL, 6}, + {82, 1, 3352169597LL, 6}, + {83, 1, 3311782012LL, 6}, + {84, 1, 3272356036LL, 6}, + {85, 1, 3233857729LL, 6}, + {86, 1, 3196254732LL, 6}, + {87, 1, 3159516172LL, 6}, + {88, 1, 3123612579LL, 6}, + {89, 1, 3088515809LL, 6}, + {90, 1, 3054198967LL, 6}, + {91, 1, 3020636341LL, 6}, + {92, 1, 2987803337LL, 6}, + {93, 1, 2955676419LL, 6}, + {94, 1, 2924233053LL, 6}, + {95, 1, 2893451653LL, 6}, + {96, 1, 2863311531LL, 6}, + {97, 1, 2833792856LL, 6}, + {98, 1, 2804876602LL, 6}, + {99, 1, 2776544515LL, 6}, + {100, 1, 2748779070LL, 6}, + {101, 1, 2721563436LL, 6}, + {102, 1, 2694881441LL, 6}, + {103, 1, 2668717544LL, 6}, + {104, 1, 2643056798LL, 6}, + {105, 1, 2617884829LL, 6}, + {106, 1, 2593187802LL, 6}, + {107, 1, 2568952402LL, 6}, + {108, 1, 2545165806LL, 6}, + {109, 1, 2521815661LL, 6}, + {110, 1, 2498890064LL, 6}, + {111, 1, 2476377541LL, 6}, + {112, 1, 2454267027LL, 6}, + {113, 1, 2432547850LL, 6}, + {114, 1, 2411209711LL, 6}, + {115, 1, 2390242670LL, 6}, + {116, 1, 2369637129LL, 6}, + {117, 1, 2349383821LL, 6}, + {118, 1, 2329473788LL, 6}, + {119, 1, 2309898378LL, 6}, + {120, 1, 2290649225LL, 6}, + {121, 1, 2271718240LL, 6}, + {122, 1, 2253097598LL, 6}, + {123, 1, 2234779732LL, 6}, + {124, 1, 2216757315LL, 6}, + {125, 1, 2199023256LL, 6}, + {126, 1, 2181570691LL, 6}, + {127, 1, 2164392969LL, 6}, + {128, 0, 0, 7}, + {129, 1, 4261672976LL, 7}, + {130, 1, 4228890877LL, 7}, + {131, 1, 4196609267LL, 7}, + {132, 1, 4164816772LL, 7}, + {133, 1, 4133502361LL, 7}, + {134, 1, 4102655328LL, 7}, + {135, 1, 4072265289LL, 7}, + {136, 1, 4042322161LL, 7}, + {137, 1, 4012816160LL, 7}, + {138, 1, 3983737782LL, 7}, + {139, 1, 3955077798LL, 7}, + {140, 1, 3926827243LL, 7}, + {141, 1, 3898977404LL, 7}, + {142, 1, 3871519817LL, 7}, + {143, 1, 3844446251LL, 7}, + {144, 1, 3817748708LL, 7}, + {145, 1, 3791419407LL, 7}, + {146, 1, 3765450781LL, 7}, + {147, 1, 3739835469LL, 7}, + {148, 1, 3714566311LL, 7}, + {149, 1, 3689636335LL, 7}, + {150, 1, 3665038760LL, 7}, + {151, 1, 3640766980LL, 7}, + {152, 1, 3616814566LL, 7}, + {153, 1, 3593175255LL, 7}, + {154, 1, 3569842948LL, 7}, + {155, 1, 3546811703LL, 7}, + {156, 1, 3524075731LL, 7}, + {157, 1, 3501629388LL, 7}, + {158, 1, 3479467177LL, 7}, + {159, 1, 3457583736LL, 7}, + {160, 1, 3435973837LL, 7}, + {161, 1, 3414632385LL, 7}, + {162, 1, 3393554407LL, 7}, + {163, 1, 3372735055LL, 7}, + {164, 1, 3352169597LL, 7}, + {165, 1, 3331853418LL, 7}, + {166, 1, 3311782012LL, 7}, + {167, 1, 3291950982LL, 7}, + {168, 1, 3272356036LL, 7}, + {169, 1, 3252992982LL, 7}, + {170, 1, 3233857729LL, 7}, + {171, 1, 3214946281LL, 7}, + {172, 1, 3196254732LL, 7}, + {173, 1, 3177779272LL, 7}, + {174, 1, 3159516172LL, 7}, + {175, 1, 3141461794LL, 7}, + {176, 1, 3123612579LL, 7}, + {177, 1, 3105965051LL, 7}, + {178, 1, 3088515809LL, 7}, + {179, 1, 3071261531LL, 7}, + {180, 1, 3054198967LL, 7}, + {181, 1, 3037324939LL, 7}, + {182, 1, 3020636341LL, 7}, + {183, 1, 3004130131LL, 7}, + {184, 1, 2987803337LL, 7}, + {185, 1, 2971653049LL, 7}, + {186, 1, 2955676419LL, 7}, + {187, 1, 2939870663LL, 7}, + {188, 1, 2924233053LL, 7}, + {189, 1, 2908760921LL, 7}, + {190, 1, 2893451653LL, 7}, + {191, 1, 2878302691LL, 7}, + {192, 1, 2863311531LL, 7}, + {193, 1, 2848475720LL, 7}, + {194, 1, 2833792856LL, 7}, + {195, 1, 2819260585LL, 7}, + {196, 1, 2804876602LL, 7}, + {197, 1, 2790638650LL, 7}, + {198, 1, 2776544515LL, 7}, + {199, 1, 2762592030LL, 7}, + {200, 1, 2748779070LL, 7}, + {201, 1, 2735103552LL, 7}, + {202, 1, 2721563436LL, 7}, + {203, 1, 2708156719LL, 7}, + {204, 1, 2694881441LL, 7}, + {205, 1, 2681735678LL, 7}, + {206, 1, 2668717544LL, 7}, + {207, 1, 2655825188LL, 7}, + {208, 1, 2643056798LL, 7}, + {209, 1, 2630410593LL, 7}, + {210, 1, 2617884829LL, 7}, + {211, 1, 2605477791LL, 7}, + {212, 1, 2593187802LL, 7}, + {213, 1, 2581013211LL, 7}, + {214, 1, 2568952402LL, 7}, + {215, 1, 2557003786LL, 7}, + {216, 1, 2545165806LL, 7}, + {217, 1, 2533436931LL, 7}, + {218, 1, 2521815661LL, 7}, + {219, 1, 2510300521LL, 7}, + {220, 1, 2498890064LL, 7}, + {221, 1, 2487582869LL, 7}, + {222, 1, 2476377541LL, 7}, + {223, 1, 2465272709LL, 7}, + {224, 1, 2454267027LL, 7}, + {225, 1, 2443359173LL, 7}, + {226, 1, 2432547850LL, 7}, + {227, 1, 2421831780LL, 7}, + {228, 1, 2411209711LL, 7}, + {229, 1, 2400680410LL, 7}, + {230, 1, 2390242670LL, 7}, + {231, 1, 2379895299LL, 7}, + {232, 1, 2369637129LL, 7}, + {233, 1, 2359467013LL, 7}, + {234, 1, 2349383821LL, 7}, + {235, 1, 2339386443LL, 7}, + {236, 1, 2329473788LL, 7}, + {237, 1, 2319644785LL, 7}, + {238, 1, 2309898378LL, 7}, + {239, 1, 2300233531LL, 7}, + {240, 1, 2290649225LL, 7}, + {241, 1, 2281144456LL, 7}, + {242, 1, 2271718240LL, 7}, + {243, 1, 2262369605LL, 7}, + {244, 1, 2253097598LL, 7}, + {245, 1, 2243901282LL, 7}, + {246, 1, 2234779732LL, 7}, + {247, 1, 2225732041LL, 7}, + {248, 1, 2216757315LL, 7}, + {249, 1, 2207854675LL, 7}, + {250, 1, 2199023256LL, 7}, + {251, 1, 2190262207LL, 7}, + {252, 1, 2181570691LL, 7}, + {253, 1, 2172947881LL, 7}, + {254, 1, 2164392969LL, 7}, + {255, 1, 2155905153LL, 7}, +}; +const int64_t table_srz32[256][4] = { + {256, 1, 2147483649LL, 7}, + {1, 0, 0, 0}, + {2, 0, 0, 1}, + {3, 1, 2863311531LL, 1}, + {4, 0, 0, 2}, + {5, 1, 3435973837LL, 2}, + {6, 1, 2863311531LL, 2}, + {7, 1, 2454267027LL, 2}, + {8, 0, 0, 3}, + {9, 1, 3817748708LL, 3}, + {10, 1, 3435973837LL, 3}, + {11, 1, 3123612579LL, 3}, + {12, 1, 2863311531LL, 3}, + {13, 1, 2643056798LL, 3}, + {14, 1, 2454267027LL, 3}, + {15, 1, 2290649225LL, 3}, + {16, 0, 0, 4}, + {17, 1, 4042322161LL, 4}, + {18, 1, 3817748708LL, 4}, + {19, 1, 3616814566LL, 4}, + {20, 1, 3435973837LL, 4}, + {21, 1, 3272356036LL, 4}, + {22, 1, 3123612579LL, 4}, + {23, 1, 2987803337LL, 4}, + {24, 1, 2863311531LL, 4}, + {25, 1, 2748779070LL, 4}, + {26, 1, 2643056798LL, 4}, + {27, 1, 2545165806LL, 4}, + {28, 1, 2454267027LL, 4}, + {29, 1, 2369637129LL, 4}, + {30, 1, 2290649225LL, 4}, + {31, 1, 2216757315LL, 4}, + {32, 0, 0, 5}, + {33, 1, 4164816772LL, 5}, + {34, 1, 4042322161LL, 5}, + {35, 1, 3926827243LL, 5}, + {36, 1, 3817748708LL, 5}, + {37, 1, 3714566311LL, 5}, + {38, 1, 3616814566LL, 5}, + {39, 1, 3524075731LL, 5}, + {40, 1, 3435973837LL, 5}, + {41, 1, 3352169597LL, 5}, + {42, 1, 3272356036LL, 5}, + {43, 1, 3196254732LL, 5}, + {44, 1, 3123612579LL, 5}, + {45, 1, 3054198967LL, 5}, + {46, 1, 2987803337LL, 5}, + {47, 1, 2924233053LL, 5}, + {48, 1, 2863311531LL, 5}, + {49, 1, 2804876602LL, 5}, + {50, 1, 2748779070LL, 5}, + {51, 1, 2694881441LL, 5}, + {52, 1, 2643056798LL, 5}, + {53, 1, 2593187802LL, 5}, + {54, 1, 2545165806LL, 5}, + {55, 1, 2498890064LL, 5}, + {56, 1, 2454267027LL, 5}, + {57, 1, 2411209711LL, 5}, + {58, 1, 2369637129LL, 5}, + {59, 1, 2329473788LL, 5}, + {60, 1, 2290649225LL, 5}, + {61, 1, 2253097598LL, 5}, + {62, 1, 2216757315LL, 5}, + {63, 1, 2181570691LL, 5}, + {64, 0, 0, 6}, + {65, 1, 4228890877LL, 6}, + {66, 1, 4164816772LL, 6}, + {67, 1, 4102655328LL, 6}, + {68, 1, 4042322161LL, 6}, + {69, 1, 3983737782LL, 6}, + {70, 1, 3926827243LL, 6}, + {71, 1, 3871519817LL, 6}, + {72, 1, 3817748708LL, 6}, + {73, 1, 3765450781LL, 6}, + {74, 1, 3714566311LL, 6}, + {75, 1, 3665038760LL, 6}, + {76, 1, 3616814566LL, 6}, + {77, 1, 3569842948LL, 6}, + {78, 1, 3524075731LL, 6}, + {79, 1, 3479467177LL, 6}, + {80, 1, 3435973837LL, 6}, + {81, 1, 3393554407LL, 6}, + {82, 1, 3352169597LL, 6}, + {83, 1, 3311782012LL, 6}, + {84, 1, 3272356036LL, 6}, + {85, 1, 3233857729LL, 6}, + {86, 1, 3196254732LL, 6}, + {87, 1, 3159516172LL, 6}, + {88, 1, 3123612579LL, 6}, + {89, 1, 3088515809LL, 6}, + {90, 1, 3054198967LL, 6}, + {91, 1, 3020636341LL, 6}, + {92, 1, 2987803337LL, 6}, + {93, 1, 2955676419LL, 6}, + {94, 1, 2924233053LL, 6}, + {95, 1, 2893451653LL, 6}, + {96, 1, 2863311531LL, 6}, + {97, 1, 2833792856LL, 6}, + {98, 1, 2804876602LL, 6}, + {99, 1, 2776544515LL, 6}, + {100, 1, 2748779070LL, 6}, + {101, 1, 2721563436LL, 6}, + {102, 1, 2694881441LL, 6}, + {103, 1, 2668717544LL, 6}, + {104, 1, 2643056798LL, 6}, + {105, 1, 2617884829LL, 6}, + {106, 1, 2593187802LL, 6}, + {107, 1, 2568952402LL, 6}, + {108, 1, 2545165806LL, 6}, + {109, 1, 2521815661LL, 6}, + {110, 1, 2498890064LL, 6}, + {111, 1, 2476377541LL, 6}, + {112, 1, 2454267027LL, 6}, + {113, 1, 2432547850LL, 6}, + {114, 1, 2411209711LL, 6}, + {115, 1, 2390242670LL, 6}, + {116, 1, 2369637129LL, 6}, + {117, 1, 2349383821LL, 6}, + {118, 1, 2329473788LL, 6}, + {119, 1, 2309898378LL, 6}, + {120, 1, 2290649225LL, 6}, + {121, 1, 2271718240LL, 6}, + {122, 1, 2253097598LL, 6}, + {123, 1, 2234779732LL, 6}, + {124, 1, 2216757315LL, 6}, + {125, 1, 2199023256LL, 6}, + {126, 1, 2181570691LL, 6}, + {127, 1, 2164392969LL, 6}, + {128, 0, 0, 7}, + {129, 1, 4261672976LL, 7}, + {130, 1, 4228890877LL, 7}, + {131, 1, 4196609267LL, 7}, + {132, 1, 4164816772LL, 7}, + {133, 1, 4133502361LL, 7}, + {134, 1, 4102655328LL, 7}, + {135, 1, 4072265289LL, 7}, + {136, 1, 4042322161LL, 7}, + {137, 1, 4012816160LL, 7}, + {138, 1, 3983737782LL, 7}, + {139, 1, 3955077798LL, 7}, + {140, 1, 3926827243LL, 7}, + {141, 1, 3898977404LL, 7}, + {142, 1, 3871519817LL, 7}, + {143, 1, 3844446251LL, 7}, + {144, 1, 3817748708LL, 7}, + {145, 1, 3791419407LL, 7}, + {146, 1, 3765450781LL, 7}, + {147, 1, 3739835469LL, 7}, + {148, 1, 3714566311LL, 7}, + {149, 1, 3689636335LL, 7}, + {150, 1, 3665038760LL, 7}, + {151, 1, 3640766980LL, 7}, + {152, 1, 3616814566LL, 7}, + {153, 1, 3593175255LL, 7}, + {154, 1, 3569842948LL, 7}, + {155, 1, 3546811703LL, 7}, {156, 1, 3524075731LL, 7}, {157, 1, 3501629388LL, 7}, {158, 1, 3479467177LL, 7}, @@ -1822,7 +2596,265 @@ const int64_t table_runtime_u8[256][4] = { {254, 2, 3ULL, 7}, {255, 2, 2ULL, 7}, }; -const int64_t table_runtime_s8[256][4] = { +const int64_t table_runtime_s8[256][4] = { + {0, 0, 0, 0}, // unused + {0, 0, 0, 0}, // unused + {2, 1, 129LL, 0}, + {3, 1, 171LL, 1}, + {4, 1, 129LL, 1}, + {5, 1, 205LL, 2}, + {6, 1, 171LL, 2}, + {7, 1, 147LL, 2}, + {8, 1, 129LL, 2}, + {9, 1, 228LL, 3}, + {10, 1, 205LL, 3}, + {11, 1, 187LL, 3}, + {12, 1, 171LL, 3}, + {13, 1, 158LL, 3}, + {14, 1, 147LL, 3}, + {15, 1, 137LL, 3}, + {16, 1, 129LL, 3}, + {17, 1, 241LL, 4}, + {18, 1, 228LL, 4}, + {19, 1, 216LL, 4}, + {20, 1, 205LL, 4}, + {21, 1, 196LL, 4}, + {22, 1, 187LL, 4}, + {23, 1, 179LL, 4}, + {24, 1, 171LL, 4}, + {25, 1, 164LL, 4}, + {26, 1, 158LL, 4}, + {27, 1, 152LL, 4}, + {28, 1, 147LL, 4}, + {29, 1, 142LL, 4}, + {30, 1, 137LL, 4}, + {31, 1, 133LL, 4}, + {32, 1, 129LL, 4}, + {33, 1, 249LL, 5}, + {34, 1, 241LL, 5}, + {35, 1, 235LL, 5}, + {36, 1, 228LL, 5}, + {37, 1, 222LL, 5}, + {38, 1, 216LL, 5}, + {39, 1, 211LL, 5}, + {40, 1, 205LL, 5}, + {41, 1, 200LL, 5}, + {42, 1, 196LL, 5}, + {43, 1, 191LL, 5}, + {44, 1, 187LL, 5}, + {45, 1, 183LL, 5}, + {46, 1, 179LL, 5}, + {47, 1, 175LL, 5}, + {48, 1, 171LL, 5}, + {49, 1, 168LL, 5}, + {50, 1, 164LL, 5}, + {51, 1, 161LL, 5}, + {52, 1, 158LL, 5}, + {53, 1, 155LL, 5}, + {54, 1, 152LL, 5}, + {55, 1, 149LL, 5}, + {56, 1, 147LL, 5}, + {57, 1, 144LL, 5}, + {58, 1, 142LL, 5}, + {59, 1, 139LL, 5}, + {60, 1, 137LL, 5}, + {61, 1, 135LL, 5}, + {62, 1, 133LL, 5}, + {63, 1, 131LL, 5}, + {64, 1, 129LL, 5}, + {65, 1, 253LL, 6}, + {66, 1, 249LL, 6}, + {67, 1, 245LL, 6}, + {68, 1, 241LL, 6}, + {69, 1, 238LL, 6}, + {70, 1, 235LL, 6}, + {71, 1, 231LL, 6}, + {72, 1, 228LL, 6}, + {73, 1, 225LL, 6}, + {74, 1, 222LL, 6}, + {75, 1, 219LL, 6}, + {76, 1, 216LL, 6}, + {77, 1, 213LL, 6}, + {78, 1, 211LL, 6}, + {79, 1, 208LL, 6}, + {80, 1, 205LL, 6}, + {81, 1, 203LL, 6}, + {82, 1, 200LL, 6}, + {83, 1, 198LL, 6}, + {84, 1, 196LL, 6}, + {85, 1, 193LL, 6}, + {86, 1, 191LL, 6}, + {87, 1, 189LL, 6}, + {88, 1, 187LL, 6}, + {89, 1, 185LL, 6}, + {90, 1, 183LL, 6}, + {91, 1, 181LL, 6}, + {92, 1, 179LL, 6}, + {93, 1, 177LL, 6}, + {94, 1, 175LL, 6}, + {95, 1, 173LL, 6}, + {96, 1, 171LL, 6}, + {97, 1, 169LL, 6}, + {98, 1, 168LL, 6}, + {99, 1, 166LL, 6}, + {100, 1, 164LL, 6}, + {101, 1, 163LL, 6}, + {102, 1, 161LL, 6}, + {103, 1, 160LL, 6}, + {104, 1, 158LL, 6}, + {105, 1, 157LL, 6}, + {106, 1, 155LL, 6}, + {107, 1, 154LL, 6}, + {108, 1, 152LL, 6}, + {109, 1, 151LL, 6}, + {110, 1, 149LL, 6}, + {111, 1, 148LL, 6}, + {112, 1, 147LL, 6}, + {113, 1, 145LL, 6}, + {114, 1, 144LL, 6}, + {115, 1, 143LL, 6}, + {116, 1, 142LL, 6}, + {117, 1, 141LL, 6}, + {118, 1, 139LL, 6}, + {119, 1, 138LL, 6}, + {120, 1, 137LL, 6}, + {121, 1, 136LL, 6}, + {122, 1, 135LL, 6}, + {123, 1, 134LL, 6}, + {124, 1, 133LL, 6}, + {125, 1, 132LL, 6}, + {126, 1, 131LL, 6}, + {127, 1, 130LL, 6}, + {128, 1, 129LL, 6}, + {129, 1, 255LL, 7}, + {130, 1, 253LL, 7}, + {131, 1, 251LL, 7}, + {132, 1, 249LL, 7}, + {133, 1, 247LL, 7}, + {134, 1, 245LL, 7}, + {135, 1, 243LL, 7}, + {136, 1, 241LL, 7}, + {137, 1, 240LL, 7}, + {138, 1, 238LL, 7}, + {139, 1, 236LL, 7}, + {140, 1, 235LL, 7}, + {141, 1, 233LL, 7}, + {142, 1, 231LL, 7}, + {143, 1, 230LL, 7}, + {144, 1, 228LL, 7}, + {145, 1, 226LL, 7}, + {146, 1, 225LL, 7}, + {147, 1, 223LL, 7}, + {148, 1, 222LL, 7}, + {149, 1, 220LL, 7}, + {150, 1, 219LL, 7}, + {151, 1, 218LL, 7}, + {152, 1, 216LL, 7}, + {153, 1, 215LL, 7}, + {154, 1, 213LL, 7}, + {155, 1, 212LL, 7}, + {156, 1, 211LL, 7}, + {157, 1, 209LL, 7}, + {158, 1, 208LL, 7}, + {159, 1, 207LL, 7}, + {160, 1, 205LL, 7}, + {161, 1, 204LL, 7}, + {162, 1, 203LL, 7}, + {163, 1, 202LL, 7}, + {164, 1, 200LL, 7}, + {165, 1, 199LL, 7}, + {166, 1, 198LL, 7}, + {167, 1, 197LL, 7}, + {168, 1, 196LL, 7}, + {169, 1, 194LL, 7}, + {170, 1, 193LL, 7}, + {171, 1, 192LL, 7}, + {172, 1, 191LL, 7}, + {173, 1, 190LL, 7}, + {174, 1, 189LL, 7}, + {175, 1, 188LL, 7}, + {176, 1, 187LL, 7}, + {177, 1, 186LL, 7}, + {178, 1, 185LL, 7}, + {179, 1, 184LL, 7}, + {180, 1, 183LL, 7}, + {181, 1, 182LL, 7}, + {182, 1, 181LL, 7}, + {183, 1, 180LL, 7}, + {184, 1, 179LL, 7}, + {185, 1, 178LL, 7}, + {186, 1, 177LL, 7}, + {187, 1, 176LL, 7}, + {188, 1, 175LL, 7}, + {189, 1, 174LL, 7}, + {190, 1, 173LL, 7}, + {191, 1, 172LL, 7}, + {192, 1, 171LL, 7}, + {193, 1, 170LL, 7}, + {194, 1, 169LL, 7}, + {195, 1, 169LL, 7}, + {196, 1, 168LL, 7}, + {197, 1, 167LL, 7}, + {198, 1, 166LL, 7}, + {199, 1, 165LL, 7}, + {200, 1, 164LL, 7}, + {201, 1, 164LL, 7}, + {202, 1, 163LL, 7}, + {203, 1, 162LL, 7}, + {204, 1, 161LL, 7}, + {205, 1, 160LL, 7}, + {206, 1, 160LL, 7}, + {207, 1, 159LL, 7}, + {208, 1, 158LL, 7}, + {209, 1, 157LL, 7}, + {210, 1, 157LL, 7}, + {211, 1, 156LL, 7}, + {212, 1, 155LL, 7}, + {213, 1, 154LL, 7}, + {214, 1, 154LL, 7}, + {215, 1, 153LL, 7}, + {216, 1, 152LL, 7}, + {217, 1, 152LL, 7}, + {218, 1, 151LL, 7}, + {219, 1, 150LL, 7}, + {220, 1, 149LL, 7}, + {221, 1, 149LL, 7}, + {222, 1, 148LL, 7}, + {223, 1, 147LL, 7}, + {224, 1, 147LL, 7}, + {225, 1, 146LL, 7}, + {226, 1, 145LL, 7}, + {227, 1, 145LL, 7}, + {228, 1, 144LL, 7}, + {229, 1, 144LL, 7}, + {230, 1, 143LL, 7}, + {231, 1, 142LL, 7}, + {232, 1, 142LL, 7}, + {233, 1, 141LL, 7}, + {234, 1, 141LL, 7}, + {235, 1, 140LL, 7}, + {236, 1, 139LL, 7}, + {237, 1, 139LL, 7}, + {238, 1, 138LL, 7}, + {239, 1, 138LL, 7}, + {240, 1, 137LL, 7}, + {241, 1, 136LL, 7}, + {242, 1, 136LL, 7}, + {243, 1, 135LL, 7}, + {244, 1, 135LL, 7}, + {245, 1, 134LL, 7}, + {246, 1, 134LL, 7}, + {247, 1, 133LL, 7}, + {248, 1, 133LL, 7}, + {249, 1, 132LL, 7}, + {250, 1, 132LL, 7}, + {251, 1, 131LL, 7}, + {252, 1, 131LL, 7}, + {253, 1, 130LL, 7}, + {254, 1, 130LL, 7}, + {255, 1, 129LL, 7}, +}; +const int64_t table_runtime_srz8[256][4] = { {0, 0, 0, 0}, // unused {0, 0, 0, 0}, // unused {2, 1, 129LL, 0}, @@ -2338,7 +3370,265 @@ const int64_t table_runtime_u16[256][4] = { {254, 2, 517ULL, 7}, {255, 2, 258ULL, 7}, }; -const int64_t table_runtime_s16[256][4] = { +const int64_t table_runtime_s16[256][4] = { + {0, 0, 0, 0}, // unused + {0, 0, 0, 0}, // unused + {2, 1, 32769LL, 0}, + {3, 1, 43691LL, 1}, + {4, 1, 32769LL, 1}, + {5, 1, 52429LL, 2}, + {6, 1, 43691LL, 2}, + {7, 1, 37450LL, 2}, + {8, 1, 32769LL, 2}, + {9, 1, 58255LL, 3}, + {10, 1, 52429LL, 3}, + {11, 1, 47663LL, 3}, + {12, 1, 43691LL, 3}, + {13, 1, 40330LL, 3}, + {14, 1, 37450LL, 3}, + {15, 1, 34953LL, 3}, + {16, 1, 32769LL, 3}, + {17, 1, 61681LL, 4}, + {18, 1, 58255LL, 4}, + {19, 1, 55189LL, 4}, + {20, 1, 52429LL, 4}, + {21, 1, 49933LL, 4}, + {22, 1, 47663LL, 4}, + {23, 1, 45591LL, 4}, + {24, 1, 43691LL, 4}, + {25, 1, 41944LL, 4}, + {26, 1, 40330LL, 4}, + {27, 1, 38837LL, 4}, + {28, 1, 37450LL, 4}, + {29, 1, 36158LL, 4}, + {30, 1, 34953LL, 4}, + {31, 1, 33826LL, 4}, + {32, 1, 32769LL, 4}, + {33, 1, 63551LL, 5}, + {34, 1, 61681LL, 5}, + {35, 1, 59919LL, 5}, + {36, 1, 58255LL, 5}, + {37, 1, 56680LL, 5}, + {38, 1, 55189LL, 5}, + {39, 1, 53774LL, 5}, + {40, 1, 52429LL, 5}, + {41, 1, 51151LL, 5}, + {42, 1, 49933LL, 5}, + {43, 1, 48771LL, 5}, + {44, 1, 47663LL, 5}, + {45, 1, 46604LL, 5}, + {46, 1, 45591LL, 5}, + {47, 1, 44621LL, 5}, + {48, 1, 43691LL, 5}, + {49, 1, 42800LL, 5}, + {50, 1, 41944LL, 5}, + {51, 1, 41121LL, 5}, + {52, 1, 40330LL, 5}, + {53, 1, 39569LL, 5}, + {54, 1, 38837LL, 5}, + {55, 1, 38131LL, 5}, + {56, 1, 37450LL, 5}, + {57, 1, 36793LL, 5}, + {58, 1, 36158LL, 5}, + {59, 1, 35545LL, 5}, + {60, 1, 34953LL, 5}, + {61, 1, 34380LL, 5}, + {62, 1, 33826LL, 5}, + {63, 1, 33289LL, 5}, + {64, 1, 32769LL, 5}, + {65, 1, 64528LL, 6}, + {66, 1, 63551LL, 6}, + {67, 1, 62602LL, 6}, + {68, 1, 61681LL, 6}, + {69, 1, 60788LL, 6}, + {70, 1, 59919LL, 6}, + {71, 1, 59075LL, 6}, + {72, 1, 58255LL, 6}, + {73, 1, 57457LL, 6}, + {74, 1, 56680LL, 6}, + {75, 1, 55925LL, 6}, + {76, 1, 55189LL, 6}, + {77, 1, 54472LL, 6}, + {78, 1, 53774LL, 6}, + {79, 1, 53093LL, 6}, + {80, 1, 52429LL, 6}, + {81, 1, 51782LL, 6}, + {82, 1, 51151LL, 6}, + {83, 1, 50534LL, 6}, + {84, 1, 49933LL, 6}, + {85, 1, 49345LL, 6}, + {86, 1, 48771LL, 6}, + {87, 1, 48211LL, 6}, + {88, 1, 47663LL, 6}, + {89, 1, 47128LL, 6}, + {90, 1, 46604LL, 6}, + {91, 1, 46092LL, 6}, + {92, 1, 45591LL, 6}, + {93, 1, 45101LL, 6}, + {94, 1, 44621LL, 6}, + {95, 1, 44151LL, 6}, + {96, 1, 43691LL, 6}, + {97, 1, 43241LL, 6}, + {98, 1, 42800LL, 6}, + {99, 1, 42367LL, 6}, + {100, 1, 41944LL, 6}, + {101, 1, 41528LL, 6}, + {102, 1, 41121LL, 6}, + {103, 1, 40722LL, 6}, + {104, 1, 40330LL, 6}, + {105, 1, 39946LL, 6}, + {106, 1, 39569LL, 6}, + {107, 1, 39200LL, 6}, + {108, 1, 38837LL, 6}, + {109, 1, 38480LL, 6}, + {110, 1, 38131LL, 6}, + {111, 1, 37787LL, 6}, + {112, 1, 37450LL, 6}, + {113, 1, 37118LL, 6}, + {114, 1, 36793LL, 6}, + {115, 1, 36473LL, 6}, + {116, 1, 36158LL, 6}, + {117, 1, 35849LL, 6}, + {118, 1, 35545LL, 6}, + {119, 1, 35247LL, 6}, + {120, 1, 34953LL, 6}, + {121, 1, 34664LL, 6}, + {122, 1, 34380LL, 6}, + {123, 1, 34101LL, 6}, + {124, 1, 33826LL, 6}, + {125, 1, 33555LL, 6}, + {126, 1, 33289LL, 6}, + {127, 1, 33027LL, 6}, + {128, 1, 32769LL, 6}, + {129, 1, 65028LL, 7}, + {130, 1, 64528LL, 7}, + {131, 1, 64036LL, 7}, + {132, 1, 63551LL, 7}, + {133, 1, 63073LL, 7}, + {134, 1, 62602LL, 7}, + {135, 1, 62138LL, 7}, + {136, 1, 61681LL, 7}, + {137, 1, 61231LL, 7}, + {138, 1, 60788LL, 7}, + {139, 1, 60350LL, 7}, + {140, 1, 59919LL, 7}, + {141, 1, 59494LL, 7}, + {142, 1, 59075LL, 7}, + {143, 1, 58662LL, 7}, + {144, 1, 58255LL, 7}, + {145, 1, 57853LL, 7}, + {146, 1, 57457LL, 7}, + {147, 1, 57066LL, 7}, + {148, 1, 56680LL, 7}, + {149, 1, 56300LL, 7}, + {150, 1, 55925LL, 7}, + {151, 1, 55554LL, 7}, + {152, 1, 55189LL, 7}, + {153, 1, 54828LL, 7}, + {154, 1, 54472LL, 7}, + {155, 1, 54121LL, 7}, + {156, 1, 53774LL, 7}, + {157, 1, 53431LL, 7}, + {158, 1, 53093LL, 7}, + {159, 1, 52759LL, 7}, + {160, 1, 52429LL, 7}, + {161, 1, 52104LL, 7}, + {162, 1, 51782LL, 7}, + {163, 1, 51464LL, 7}, + {164, 1, 51151LL, 7}, + {165, 1, 50841LL, 7}, + {166, 1, 50534LL, 7}, + {167, 1, 50232LL, 7}, + {168, 1, 49933LL, 7}, + {169, 1, 49637LL, 7}, + {170, 1, 49345LL, 7}, + {171, 1, 49057LL, 7}, + {172, 1, 48771LL, 7}, + {173, 1, 48490LL, 7}, + {174, 1, 48211LL, 7}, + {175, 1, 47935LL, 7}, + {176, 1, 47663LL, 7}, + {177, 1, 47394LL, 7}, + {178, 1, 47128LL, 7}, + {179, 1, 46864LL, 7}, + {180, 1, 46604LL, 7}, + {181, 1, 46346LL, 7}, + {182, 1, 46092LL, 7}, + {183, 1, 45840LL, 7}, + {184, 1, 45591LL, 7}, + {185, 1, 45344LL, 7}, + {186, 1, 45101LL, 7}, + {187, 1, 44859LL, 7}, + {188, 1, 44621LL, 7}, + {189, 1, 44385LL, 7}, + {190, 1, 44151LL, 7}, + {191, 1, 43920LL, 7}, + {192, 1, 43691LL, 7}, + {193, 1, 43465LL, 7}, + {194, 1, 43241LL, 7}, + {195, 1, 43019LL, 7}, + {196, 1, 42800LL, 7}, + {197, 1, 42582LL, 7}, + {198, 1, 42367LL, 7}, + {199, 1, 42154LL, 7}, + {200, 1, 41944LL, 7}, + {201, 1, 41735LL, 7}, + {202, 1, 41528LL, 7}, + {203, 1, 41324LL, 7}, + {204, 1, 41121LL, 7}, + {205, 1, 40921LL, 7}, + {206, 1, 40722LL, 7}, + {207, 1, 40525LL, 7}, + {208, 1, 40330LL, 7}, + {209, 1, 40137LL, 7}, + {210, 1, 39946LL, 7}, + {211, 1, 39757LL, 7}, + {212, 1, 39569LL, 7}, + {213, 1, 39384LL, 7}, + {214, 1, 39200LL, 7}, + {215, 1, 39017LL, 7}, + {216, 1, 38837LL, 7}, + {217, 1, 38658LL, 7}, + {218, 1, 38480LL, 7}, + {219, 1, 38305LL, 7}, + {220, 1, 38131LL, 7}, + {221, 1, 37958LL, 7}, + {222, 1, 37787LL, 7}, + {223, 1, 37618LL, 7}, + {224, 1, 37450LL, 7}, + {225, 1, 37283LL, 7}, + {226, 1, 37118LL, 7}, + {227, 1, 36955LL, 7}, + {228, 1, 36793LL, 7}, + {229, 1, 36632LL, 7}, + {230, 1, 36473LL, 7}, + {231, 1, 36315LL, 7}, + {232, 1, 36158LL, 7}, + {233, 1, 36003LL, 7}, + {234, 1, 35849LL, 7}, + {235, 1, 35697LL, 7}, + {236, 1, 35545LL, 7}, + {237, 1, 35395LL, 7}, + {238, 1, 35247LL, 7}, + {239, 1, 35099LL, 7}, + {240, 1, 34953LL, 7}, + {241, 1, 34808LL, 7}, + {242, 1, 34664LL, 7}, + {243, 1, 34522LL, 7}, + {244, 1, 34380LL, 7}, + {245, 1, 34240LL, 7}, + {246, 1, 34101LL, 7}, + {247, 1, 33962LL, 7}, + {248, 1, 33826LL, 7}, + {249, 1, 33690LL, 7}, + {250, 1, 33555LL, 7}, + {251, 1, 33421LL, 7}, + {252, 1, 33289LL, 7}, + {253, 1, 33157LL, 7}, + {254, 1, 33027LL, 7}, + {255, 1, 32897LL, 7}, +}; +const int64_t table_runtime_srz16[256][4] = { {0, 0, 0, 0}, // unused {0, 0, 0, 0}, // unused {2, 1, 32769LL, 0}, @@ -2854,7 +4144,265 @@ const int64_t table_runtime_u32[256][4] = { {254, 2, 33818641ULL, 7}, {255, 2, 16843010ULL, 7}, }; -const int64_t table_runtime_s32[256][4] = { +const int64_t table_runtime_s32[256][4] = { + {0, 0, 0, 0}, // unused + {0, 0, 0, 0}, // unused + {2, 1, 2147483649LL, 0}, + {3, 1, 2863311531LL, 1}, + {4, 1, 2147483649LL, 1}, + {5, 1, 3435973837LL, 2}, + {6, 1, 2863311531LL, 2}, + {7, 1, 2454267027LL, 2}, + {8, 1, 2147483649LL, 2}, + {9, 1, 3817748708LL, 3}, + {10, 1, 3435973837LL, 3}, + {11, 1, 3123612579LL, 3}, + {12, 1, 2863311531LL, 3}, + {13, 1, 2643056798LL, 3}, + {14, 1, 2454267027LL, 3}, + {15, 1, 2290649225LL, 3}, + {16, 1, 2147483649LL, 3}, + {17, 1, 4042322161LL, 4}, + {18, 1, 3817748708LL, 4}, + {19, 1, 3616814566LL, 4}, + {20, 1, 3435973837LL, 4}, + {21, 1, 3272356036LL, 4}, + {22, 1, 3123612579LL, 4}, + {23, 1, 2987803337LL, 4}, + {24, 1, 2863311531LL, 4}, + {25, 1, 2748779070LL, 4}, + {26, 1, 2643056798LL, 4}, + {27, 1, 2545165806LL, 4}, + {28, 1, 2454267027LL, 4}, + {29, 1, 2369637129LL, 4}, + {30, 1, 2290649225LL, 4}, + {31, 1, 2216757315LL, 4}, + {32, 1, 2147483649LL, 4}, + {33, 1, 4164816772LL, 5}, + {34, 1, 4042322161LL, 5}, + {35, 1, 3926827243LL, 5}, + {36, 1, 3817748708LL, 5}, + {37, 1, 3714566311LL, 5}, + {38, 1, 3616814566LL, 5}, + {39, 1, 3524075731LL, 5}, + {40, 1, 3435973837LL, 5}, + {41, 1, 3352169597LL, 5}, + {42, 1, 3272356036LL, 5}, + {43, 1, 3196254732LL, 5}, + {44, 1, 3123612579LL, 5}, + {45, 1, 3054198967LL, 5}, + {46, 1, 2987803337LL, 5}, + {47, 1, 2924233053LL, 5}, + {48, 1, 2863311531LL, 5}, + {49, 1, 2804876602LL, 5}, + {50, 1, 2748779070LL, 5}, + {51, 1, 2694881441LL, 5}, + {52, 1, 2643056798LL, 5}, + {53, 1, 2593187802LL, 5}, + {54, 1, 2545165806LL, 5}, + {55, 1, 2498890064LL, 5}, + {56, 1, 2454267027LL, 5}, + {57, 1, 2411209711LL, 5}, + {58, 1, 2369637129LL, 5}, + {59, 1, 2329473788LL, 5}, + {60, 1, 2290649225LL, 5}, + {61, 1, 2253097598LL, 5}, + {62, 1, 2216757315LL, 5}, + {63, 1, 2181570691LL, 5}, + {64, 1, 2147483649LL, 5}, + {65, 1, 4228890877LL, 6}, + {66, 1, 4164816772LL, 6}, + {67, 1, 4102655328LL, 6}, + {68, 1, 4042322161LL, 6}, + {69, 1, 3983737782LL, 6}, + {70, 1, 3926827243LL, 6}, + {71, 1, 3871519817LL, 6}, + {72, 1, 3817748708LL, 6}, + {73, 1, 3765450781LL, 6}, + {74, 1, 3714566311LL, 6}, + {75, 1, 3665038760LL, 6}, + {76, 1, 3616814566LL, 6}, + {77, 1, 3569842948LL, 6}, + {78, 1, 3524075731LL, 6}, + {79, 1, 3479467177LL, 6}, + {80, 1, 3435973837LL, 6}, + {81, 1, 3393554407LL, 6}, + {82, 1, 3352169597LL, 6}, + {83, 1, 3311782012LL, 6}, + {84, 1, 3272356036LL, 6}, + {85, 1, 3233857729LL, 6}, + {86, 1, 3196254732LL, 6}, + {87, 1, 3159516172LL, 6}, + {88, 1, 3123612579LL, 6}, + {89, 1, 3088515809LL, 6}, + {90, 1, 3054198967LL, 6}, + {91, 1, 3020636341LL, 6}, + {92, 1, 2987803337LL, 6}, + {93, 1, 2955676419LL, 6}, + {94, 1, 2924233053LL, 6}, + {95, 1, 2893451653LL, 6}, + {96, 1, 2863311531LL, 6}, + {97, 1, 2833792856LL, 6}, + {98, 1, 2804876602LL, 6}, + {99, 1, 2776544515LL, 6}, + {100, 1, 2748779070LL, 6}, + {101, 1, 2721563436LL, 6}, + {102, 1, 2694881441LL, 6}, + {103, 1, 2668717544LL, 6}, + {104, 1, 2643056798LL, 6}, + {105, 1, 2617884829LL, 6}, + {106, 1, 2593187802LL, 6}, + {107, 1, 2568952402LL, 6}, + {108, 1, 2545165806LL, 6}, + {109, 1, 2521815661LL, 6}, + {110, 1, 2498890064LL, 6}, + {111, 1, 2476377541LL, 6}, + {112, 1, 2454267027LL, 6}, + {113, 1, 2432547850LL, 6}, + {114, 1, 2411209711LL, 6}, + {115, 1, 2390242670LL, 6}, + {116, 1, 2369637129LL, 6}, + {117, 1, 2349383821LL, 6}, + {118, 1, 2329473788LL, 6}, + {119, 1, 2309898378LL, 6}, + {120, 1, 2290649225LL, 6}, + {121, 1, 2271718240LL, 6}, + {122, 1, 2253097598LL, 6}, + {123, 1, 2234779732LL, 6}, + {124, 1, 2216757315LL, 6}, + {125, 1, 2199023256LL, 6}, + {126, 1, 2181570691LL, 6}, + {127, 1, 2164392969LL, 6}, + {128, 1, 2147483649LL, 6}, + {129, 1, 4261672976LL, 7}, + {130, 1, 4228890877LL, 7}, + {131, 1, 4196609267LL, 7}, + {132, 1, 4164816772LL, 7}, + {133, 1, 4133502361LL, 7}, + {134, 1, 4102655328LL, 7}, + {135, 1, 4072265289LL, 7}, + {136, 1, 4042322161LL, 7}, + {137, 1, 4012816160LL, 7}, + {138, 1, 3983737782LL, 7}, + {139, 1, 3955077798LL, 7}, + {140, 1, 3926827243LL, 7}, + {141, 1, 3898977404LL, 7}, + {142, 1, 3871519817LL, 7}, + {143, 1, 3844446251LL, 7}, + {144, 1, 3817748708LL, 7}, + {145, 1, 3791419407LL, 7}, + {146, 1, 3765450781LL, 7}, + {147, 1, 3739835469LL, 7}, + {148, 1, 3714566311LL, 7}, + {149, 1, 3689636335LL, 7}, + {150, 1, 3665038760LL, 7}, + {151, 1, 3640766980LL, 7}, + {152, 1, 3616814566LL, 7}, + {153, 1, 3593175255LL, 7}, + {154, 1, 3569842948LL, 7}, + {155, 1, 3546811703LL, 7}, + {156, 1, 3524075731LL, 7}, + {157, 1, 3501629388LL, 7}, + {158, 1, 3479467177LL, 7}, + {159, 1, 3457583736LL, 7}, + {160, 1, 3435973837LL, 7}, + {161, 1, 3414632385LL, 7}, + {162, 1, 3393554407LL, 7}, + {163, 1, 3372735055LL, 7}, + {164, 1, 3352169597LL, 7}, + {165, 1, 3331853418LL, 7}, + {166, 1, 3311782012LL, 7}, + {167, 1, 3291950982LL, 7}, + {168, 1, 3272356036LL, 7}, + {169, 1, 3252992982LL, 7}, + {170, 1, 3233857729LL, 7}, + {171, 1, 3214946281LL, 7}, + {172, 1, 3196254732LL, 7}, + {173, 1, 3177779272LL, 7}, + {174, 1, 3159516172LL, 7}, + {175, 1, 3141461794LL, 7}, + {176, 1, 3123612579LL, 7}, + {177, 1, 3105965051LL, 7}, + {178, 1, 3088515809LL, 7}, + {179, 1, 3071261531LL, 7}, + {180, 1, 3054198967LL, 7}, + {181, 1, 3037324939LL, 7}, + {182, 1, 3020636341LL, 7}, + {183, 1, 3004130131LL, 7}, + {184, 1, 2987803337LL, 7}, + {185, 1, 2971653049LL, 7}, + {186, 1, 2955676419LL, 7}, + {187, 1, 2939870663LL, 7}, + {188, 1, 2924233053LL, 7}, + {189, 1, 2908760921LL, 7}, + {190, 1, 2893451653LL, 7}, + {191, 1, 2878302691LL, 7}, + {192, 1, 2863311531LL, 7}, + {193, 1, 2848475720LL, 7}, + {194, 1, 2833792856LL, 7}, + {195, 1, 2819260585LL, 7}, + {196, 1, 2804876602LL, 7}, + {197, 1, 2790638650LL, 7}, + {198, 1, 2776544515LL, 7}, + {199, 1, 2762592030LL, 7}, + {200, 1, 2748779070LL, 7}, + {201, 1, 2735103552LL, 7}, + {202, 1, 2721563436LL, 7}, + {203, 1, 2708156719LL, 7}, + {204, 1, 2694881441LL, 7}, + {205, 1, 2681735678LL, 7}, + {206, 1, 2668717544LL, 7}, + {207, 1, 2655825188LL, 7}, + {208, 1, 2643056798LL, 7}, + {209, 1, 2630410593LL, 7}, + {210, 1, 2617884829LL, 7}, + {211, 1, 2605477791LL, 7}, + {212, 1, 2593187802LL, 7}, + {213, 1, 2581013211LL, 7}, + {214, 1, 2568952402LL, 7}, + {215, 1, 2557003786LL, 7}, + {216, 1, 2545165806LL, 7}, + {217, 1, 2533436931LL, 7}, + {218, 1, 2521815661LL, 7}, + {219, 1, 2510300521LL, 7}, + {220, 1, 2498890064LL, 7}, + {221, 1, 2487582869LL, 7}, + {222, 1, 2476377541LL, 7}, + {223, 1, 2465272709LL, 7}, + {224, 1, 2454267027LL, 7}, + {225, 1, 2443359173LL, 7}, + {226, 1, 2432547850LL, 7}, + {227, 1, 2421831780LL, 7}, + {228, 1, 2411209711LL, 7}, + {229, 1, 2400680410LL, 7}, + {230, 1, 2390242670LL, 7}, + {231, 1, 2379895299LL, 7}, + {232, 1, 2369637129LL, 7}, + {233, 1, 2359467013LL, 7}, + {234, 1, 2349383821LL, 7}, + {235, 1, 2339386443LL, 7}, + {236, 1, 2329473788LL, 7}, + {237, 1, 2319644785LL, 7}, + {238, 1, 2309898378LL, 7}, + {239, 1, 2300233531LL, 7}, + {240, 1, 2290649225LL, 7}, + {241, 1, 2281144456LL, 7}, + {242, 1, 2271718240LL, 7}, + {243, 1, 2262369605LL, 7}, + {244, 1, 2253097598LL, 7}, + {245, 1, 2243901282LL, 7}, + {246, 1, 2234779732LL, 7}, + {247, 1, 2225732041LL, 7}, + {248, 1, 2216757315LL, 7}, + {249, 1, 2207854675LL, 7}, + {250, 1, 2199023256LL, 7}, + {251, 1, 2190262207LL, 7}, + {252, 1, 2181570691LL, 7}, + {253, 1, 2172947881LL, 7}, + {254, 1, 2164392969LL, 7}, + {255, 1, 2155905153LL, 7}, +}; +const int64_t table_runtime_srz32[256][4] = { {0, 0, 0, 0}, // unused {0, 0, 0, 0}, // unused {2, 1, 2147483649LL, 0}, diff --git a/src/IntegerDivisionTable.h b/src/IntegerDivisionTable.h index 7808947085dc..d455f23f99f3 100644 --- a/src/IntegerDivisionTable.h +++ b/src/IntegerDivisionTable.h @@ -13,16 +13,22 @@ namespace Internal { namespace IntegerDivision { extern const int64_t table_u8[256][4]; extern const int64_t table_s8[256][4]; +extern const int64_t table_srz8[256][4]; extern const int64_t table_u16[256][4]; extern const int64_t table_s16[256][4]; +extern const int64_t table_srz16[256][4]; extern const int64_t table_u32[256][4]; extern const int64_t table_s32[256][4]; +extern const int64_t table_srz32[256][4]; extern const int64_t table_runtime_u8[256][4]; extern const int64_t table_runtime_s8[256][4]; +extern const int64_t table_runtime_srz8[256][4]; extern const int64_t table_runtime_u16[256][4]; extern const int64_t table_runtime_s16[256][4]; +extern const int64_t table_runtime_srz16[256][4]; extern const int64_t table_runtime_u32[256][4]; extern const int64_t table_runtime_s32[256][4]; +extern const int64_t table_runtime_srz32[256][4]; } // namespace IntegerDivision } // namespace Internal } // namespace Halide diff --git a/test/performance/const_division.cpp b/test/performance/const_division.cpp index c1f29975be83..2ddad95cf56a 100644 --- a/test/performance/const_division.cpp +++ b/test/performance/const_division.cpp @@ -12,7 +12,7 @@ using namespace Halide::Tools; std::mt19937 rng(0); template -bool test(int w, bool div) { +bool test(int w, bool div, bool round_to_zero) { Func f, g, h; Var x, y; @@ -40,18 +40,33 @@ bool test(int w, bool div) { for (int x = 0; x < input.width(); x++) { uint32_t bits = (uint32_t)rng(); input(x, y) = (T)bits; + // Round-to-zero faults on zero denominators + if (round_to_zero && (input(x, y) == 0)) { + input(x, y) = 1; + } } } if (div) { - // Test div - f(x, y) = input(x, y) / cast(y + min_val); + if (round_to_zero) { + // Test div. We'll unroll entirely across y to turn the denominator into a constant. + f(x, y) = div_round_to_zero(input(x, y), cast(y + min_val)); - // Reference good version - g(x, y) = input(x, y) / cast(y + min_val); + // Reference good version. Not unrolled across y. + g(x, y) = div_round_to_zero(input(x, y), cast(y + min_val)); + + // Version that uses fast_integer_divide + h(x, y) = Halide::fast_integer_divide_round_to_zero(input(x, y), cast(y + min_val)); + } else { + // Test div + f(x, y) = input(x, y) / cast(y + min_val); - // Version that uses fast_integer_divide - h(x, y) = Halide::fast_integer_divide(input(x, y), cast(y + min_val)); + // Reference good version + g(x, y) = input(x, y) / cast(y + min_val); + + // Version that uses fast_integer_divide + h(x, y) = Halide::fast_integer_divide(input(x, y), cast(y + min_val)); + } } else { // Test mod f(x, y) = input(x, y) % cast(y + min_val); @@ -70,10 +85,11 @@ bool test(int w, bool div) { f.vectorize(x); h.vectorize(x); } - - f.compile_jit(); - g.compile_jit(); - h.compile_jit(); + Target t = get_jit_target_from_environment(); + t.set_feature(Target::DisableLLVMLoopOpt); + f.compile_jit(t); + g.compile_jit(t); + h.compile_jit(t); Buffer correct = g.realize({input.width(), num_vals}); double t_correct = benchmark([&]() { g.realize(correct); }); @@ -115,23 +131,39 @@ int main(int argc, char **argv) { std::cout << "const_division test seed: " << seed << std::endl; bool success = true; - for (int i = 0; i < 2; i++) { - const char *name = (i == 0 ? "divisor" : "modulus"); - printf("type const-%s speed-up runtime-%s speed-up\n", name, name); + for (int i = 0; i < 3; i++) { + switch (i) { + case 0: + printf("division rounding to negative infinity:\n"); + break; + case 1: + printf("signed division rounding to zero:\n"); + break; + case 2: + printf("modulus:\n"); + break; + } + printf("type const-divisor speed-up runtime-divisor speed-up\n"); + // Scalar - success = success && test(1, i == 0); - success = success && test(1, i == 0); - success = success && test(1, i == 0); - success = success && test(1, i == 0); - success = success && test(1, i == 0); - success = success && test(1, i == 0); + success = success && test(1, i == 0, i == 1); + success = success && test(1, i == 0, i == 1); + success = success && test(1, i == 0, i == 1); + if (i != 1) { + success = success && test(1, i == 0, false); + success = success && test(1, i == 0, false); + success = success && test(1, i == 0, false); + } + // Vector - success = success && test(8, i == 0); - success = success && test(16, i == 0); - success = success && test(32, i == 0); - success = success && test(8, i == 0); - success = success && test(16, i == 0); - success = success && test(32, i == 0); + success = success && test(8, i == 0, i == 1); + success = success && test(16, i == 0, i == 1); + success = success && test(32, i == 0, i == 1); + if (i != 1) { + success = success && test(8, i == 0, false); + success = success && test(16, i == 0, false); + success = success && test(32, i == 0, false); + } } if (!success) { diff --git a/tools/find_inverse.cpp b/tools/find_inverse.cpp index 6f0c3f8bd800..cdf1b944d565 100644 --- a/tools/find_inverse.cpp +++ b/tools/find_inverse.cpp @@ -18,14 +18,13 @@ int64_t sdiv(int64_t a, int64_t b) { return (a - ((a % b) + b) % b) / b; } +int64_t srzdiv(int64_t a, int64_t b) { + return a / b; +} + bool u_method_0(int den, int sh_post, int bits) { uint64_t max = (1L << bits) - 1; - // for (int64_t num = 0; num <= max; num++) { - for (unsigned iter = 0; iter < 1000000UL; iter++) { - uint64_t num = r(0, max); - // Make sure we hit the extremes - if (iter == 0) num = 0; - if (iter == 1) num = max; + for (int64_t num = 0; num <= max; num++) { uint64_t result = num; result >>= sh_post; if (num / den != result) return false; @@ -36,12 +35,7 @@ bool u_method_0(int den, int sh_post, int bits) { bool u_method_1(int den, int64_t mul, int sh_post, int bits) { uint64_t max = (1L << bits) - 1; if (mul > max) return false; - // for (uint64_t num = 0; num <= max; num++) { - for (unsigned iter = 0; iter < 1000000UL; iter++) { - uint64_t num = r(0, max); - // Make sure we hit the extremes - if (iter == 0) num = 0; - if (iter == 1) num = max; + for (uint64_t num = 0; num <= max; num++) { uint64_t result = num; result *= mul; result >>= bits; @@ -55,12 +49,7 @@ bool u_method_1(int den, int64_t mul, int sh_post, int bits) { bool u_method_2(int den, int64_t mul, int sh_post, int bits) { uint64_t max = (1UL << bits) - 1; if (mul > max) return false; - // for (uint64_t num = 0; num <= max; num++) { - for (unsigned iter = 0; iter < 1000000UL; iter++) { - uint64_t num = r(0, max); - // Make sure we hit the extremes - if (iter == 0) num = 0; - if (iter == 1) num = max; + for (uint64_t num = 0; num <= max; num++) { uint64_t result = num; result *= mul; result >>= bits; @@ -76,12 +65,7 @@ bool u_method_2(int den, int64_t mul, int sh_post, int bits) { bool u_method_3(int den, int64_t mul, int sh_post, int bits) { uint64_t max = (1UL << bits) - 1; if (mul > max) return false; - // for (uint64_t num = 0; num <= max; num++) { - for (unsigned iter = 0; iter < 1000000UL; iter++) { - uint64_t num = r(0, max); - // Make sure we hit the extremes - if (iter == 0) num = 0; - if (iter == 1) num = max; + for (uint64_t num = 0; num <= max; num++) { uint64_t result = num; result *= mul; result >>= bits; @@ -96,12 +80,7 @@ bool u_method_3(int den, int64_t mul, int sh_post, int bits) { bool s_method_0(int den, int sh_post, int bits) { int64_t min = -(1L << (bits - 1)), max = (1L << (bits - 1)) - 1; - // for (int64_t num = min; num <= max; num++) { - for (int iter = 0; iter < 1000000L; iter++) { - int64_t num = r(min, max); - // Make sure we hit the extremes - if (iter == 0) num = min; - if (iter == 1) num = max; + for (int64_t num = min; num <= max; num++) { int64_t result = num; result >>= sh_post; if (sdiv(num, den) != result) return false; @@ -112,12 +91,7 @@ bool s_method_0(int den, int sh_post, int bits) { bool s_method_1(int den, int64_t mul, int sh_post, int bits) { int64_t min = -(1 << (bits - 1)), max = (1 << (bits - 1)) - 1; - // for (int64_t num = min; num <= max; num++) { - for (int iter = 0; iter < 1000000L; iter++) { - int64_t num = r(min, max); - // Make sure we hit the extremes - if (iter == 0) num = min; - if (iter == 1) num = max; + for (int64_t num = min; num <= max; num++) { int64_t result = num; uint64_t xsign = result >> (bits - 1); uint64_t q0 = (mul * (xsign ^ result)) >> bits; @@ -127,6 +101,38 @@ bool s_method_1(int den, int64_t mul, int sh_post, int bits) { return true; } +bool srz_method_0(int den, int sh_post, int bits) { + int64_t min = -(1L << (bits - 1)), max = (1L << (bits - 1)) - 1; + for (int64_t num = min; num <= max; num++) { + int64_t result = num; + result += (result >> (bits - 1)) & (den - 1); + result >>= sh_post; + if (srzdiv(num, den) != result) return false; + } + return true; +} + +bool srz_method_1(int den, int64_t mul, int sh_post, int bits) { + int64_t min = -(1 << (bits - 1)), max = (1 << (bits - 1)) - 1; + + for (int64_t num = min; num <= max; num++) { + int64_t result = num; + uint64_t xsign = result >> (bits - 1); + uint64_t q0 = (mul * result) >> bits; + result = (q0 >> sh_post); + uint64_t mask = (1ULL << bits) - 1; + result -= (xsign & mask); + // Fix-up the sign bits + result <<= (64 - bits); + result >>= (64 - bits); + if (srzdiv(num, den) != result) { + printf("Fail\n"); + return false; + } + } + return true; +} + int main(int argc, char **argv) { /* This program computes a table to help us do cheap integer division by a constant. It is based on the paper "Division by @@ -269,6 +275,42 @@ int main(int argc, char **argv) { next_signed:; } fprintf(c_out, "};\n"); + printf("Generating table%s_srz%d...\n", runtime ? "_runtime" : "", bits); + if (runtime) { + fprintf(h_out, "extern const int64_t table_runtime_srz%d[256][4];\n", bits); + fprintf(c_out, "const int64_t table_runtime_srz%d[256][4] = {\n", bits); + } else { + fprintf(h_out, "extern const int64_t table_srz%d[256][4];\n", bits); + fprintf(c_out, "const int64_t table_srz%d[256][4] = {\n", bits); + } + for (int d = 0; d < 256; d++) { + if (runtime && d < 2) { + fprintf(c_out, " {0, 0, 0, 0}, // unused\n"); + continue; + } + int den = d; + if (den == 0) den = 256; + if (!runtime) { + for (int shift = 0; shift < 8; shift++) { + if (srz_method_0(den, shift, bits)) { + fprintf(c_out, " {%d, 0, 0, %d},\n", den, shift); + goto next_signedrz; + } + } + } + + { + int shift = 31 - __builtin_clz(den - 1); + int64_t mul = (1L << (shift + bits)) / den + 1; + if (srz_method_1(den, mul, shift, bits)) { + fprintf(c_out, " {%d, 1, %lldLL, %d},\n", den, (long long)mul, shift); + goto next_signedrz; + } + } + fprintf(c_out, "ERROR! No solution found for signed %d\n", den); + next_signedrz:; + } + fprintf(c_out, "};\n"); } } From 7199e7da02ec690cf1361f173da59a9f98dbb182 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Tue, 7 Dec 2021 17:45:10 -0800 Subject: [PATCH 4/4] Try removing optional buffer added to closure --- src/CodeGen_Internal.cpp | 56 ++++++++++++---------------------------- 1 file changed, 16 insertions(+), 40 deletions(-) diff --git a/src/CodeGen_Internal.cpp b/src/CodeGen_Internal.cpp index 45029999cf22..bcb506741e02 100644 --- a/src/CodeGen_Internal.cpp +++ b/src/CodeGen_Internal.cpp @@ -49,37 +49,20 @@ void pack_closure(llvm::StructType *type, IRBuilder<> *builder) { // type, type of dst should be a pointer to a struct of the type returned by build_type int idx = 0; - for (const auto &v : closure.vars) { + + auto add_to_closure = [&](const std::string &name) { llvm::Type *t = type->elements()[idx]; Value *ptr = builder->CreateConstInBoundsGEP2_32(type, dst, 0, idx++); - Value *val = src.get(v.first); + Value *val = src.get(name); val = builder->CreateBitCast(val, t); builder->CreateStore(val, ptr); + }; + + for (const auto &v : closure.vars) { + add_to_closure(v.first); } for (const auto &b : closure.buffers) { - // For buffers we pass through base address (the symbol with - // the same name as the buffer), and the .buffer symbol (GPU - // code might implicitly need it). - // FIXME: This dependence should be explicitly encoded in the IR. - { - llvm::Type *t = type->elements()[idx]; - Value *ptr = builder->CreateConstInBoundsGEP2_32(type, dst, 0, idx++); - Value *val = src.get(b.first); - val = builder->CreateBitCast(val, t); - builder->CreateStore(val, ptr); - } - { - llvm::PointerType *t = halide_buffer_t_type->getPointerTo(); - Value *ptr = builder->CreateConstInBoundsGEP2_32(type, dst, 0, idx++); - Value *val = nullptr; - if (src.contains(b.first + ".buffer")) { - val = src.get(b.first + ".buffer"); - val = builder->CreateBitCast(val, t); - } else { - val = ConstantPointerNull::get(t); - } - builder->CreateStore(val, ptr); - } + add_to_closure(b.first); } } @@ -90,25 +73,18 @@ void unpack_closure(const Closure &closure, IRBuilder<> *builder) { // type, type of src should be a pointer to a struct of the type returned by build_type int idx = 0; - for (const auto &v : closure.vars) { + + auto load_from_closure = [&](const std::string &name) { Value *ptr = builder->CreateConstInBoundsGEP2_32(type, src, 0, idx++); LoadInst *load = builder->CreateLoad(ptr->getType()->getPointerElementType(), ptr); - dst.push(v.first, load); - load->setName(v.first); + dst.push(name, load); + load->setName(name); + }; + for (const auto &v : closure.vars) { + load_from_closure(v.first); } for (const auto &b : closure.buffers) { - { - Value *ptr = builder->CreateConstInBoundsGEP2_32(type, src, 0, idx++); - LoadInst *load = builder->CreateLoad(ptr->getType()->getPointerElementType(), ptr); - dst.push(b.first, load); - load->setName(b.first); - } - { - Value *ptr = builder->CreateConstInBoundsGEP2_32(type, src, 0, idx++); - LoadInst *load = builder->CreateLoad(ptr->getType()->getPointerElementType(), ptr); - dst.push(b.first + ".buffer", load); - load->setName(b.first + ".buffer"); - } + load_from_closure(b.first); } }