From 7aae1c5c789e7e7a3213ed11108dd8c9b2d9cf3e Mon Sep 17 00:00:00 2001 From: Aayush Atharva Date: Sun, 19 Feb 2023 13:05:36 +0530 Subject: [PATCH] Sync Brotli ode with Upstream (#87) * Sync Brotli ode with Upstream --- brotli/common/constants.h | 3 +- brotli/common/dictionary.c | 2 +- brotli/common/platform.c | 3 +- brotli/common/platform.h | 123 ++--- brotli/common/shared_dictionary_internal.h | 5 +- brotli/dec/bit_reader.c | 5 +- brotli/dec/bit_reader.h | 29 +- brotli/dec/decode.c | 103 ++++- brotli/dec/huffman.c | 9 +- brotli/dec/huffman.h | 3 +- brotli/dec/prefix.h | 3 +- brotli/dec/state.c | 20 +- brotli/dec/state.h | 13 +- brotli/enc/backward_references.c | 3 +- brotli/enc/backward_references.h | 3 +- brotli/enc/backward_references_hq.c | 3 +- brotli/enc/backward_references_hq.h | 3 +- brotli/enc/bit_cost.c | 3 +- brotli/enc/bit_cost.h | 3 +- brotli/enc/block_splitter.h | 3 +- brotli/enc/brotli_bit_stream.c | 3 +- brotli/enc/brotli_bit_stream.h | 3 +- brotli/enc/cluster.c | 3 +- brotli/enc/cluster.h | 3 +- brotli/enc/command.h | 3 +- brotli/enc/compound_dictionary.c | 25 +- brotli/enc/compound_dictionary.h | 22 +- brotli/enc/compress_fragment.c | 3 +- brotli/enc/compress_fragment.h | 3 +- brotli/enc/compress_fragment_two_pass.c | 3 +- brotli/enc/compress_fragment_two_pass.h | 3 +- brotli/enc/encode.c | 422 ++++-------------- brotli/enc/encoder_dict.h | 8 +- brotli/enc/entropy_encode.c | 3 +- brotli/enc/entropy_encode.h | 3 +- brotli/enc/entropy_encode_static.h | 3 +- brotli/enc/fast_log.h | 3 +- brotli/enc/find_match_length.h | 37 +- brotli/enc/hash.h | 28 +- brotli/enc/histogram.h | 3 +- brotli/enc/literal_cost.c | 3 +- brotli/enc/literal_cost.h | 3 +- brotli/enc/memory.c | 3 +- brotli/enc/memory.h | 3 +- brotli/enc/metablock.c | 3 +- brotli/enc/metablock.h | 3 +- brotli/enc/params.h | 1 + brotli/enc/prefix.h | 3 +- brotli/enc/quality.h | 3 +- brotli/enc/ringbuffer.h | 3 +- brotli/enc/state.h | 104 +++++ brotli/enc/static_dict.h | 3 +- brotli/enc/utf8_util.h | 3 +- brotli/enc/write_bits.h | 3 +- brotli/include/brotli/decode.h | 41 ++ brotli/include/brotli/encode.h | 2 +- brotli/include/brotli/port.h | 8 - brotli/tools/brotli.c | 50 ++- brotli/tools/brotli.md | 62 ++- .../brotli4j/common/annotations/Local.java | 2 +- .../brotli4j/decoder/Decoder.java | 58 +++ .../brotli4j/encoder/Encoder.java | 48 +- .../brotli4j/encoder/EncoderJNI.java | 17 +- natives/src/main/cpp/decoder_jni.cc | 6 +- natives/src/main/cpp/decoder_jni.h | 75 ++++ natives/src/main/cpp/encoder_jni.cc | 3 +- 66 files changed, 805 insertions(+), 634 deletions(-) create mode 100644 brotli/enc/state.h create mode 100644 natives/src/main/cpp/decoder_jni.h diff --git a/brotli/common/constants.h b/brotli/common/constants.h index 433c7b24..31e5bd37 100644 --- a/brotli/common/constants.h +++ b/brotli/common/constants.h @@ -12,10 +12,11 @@ #ifndef BROTLI_COMMON_CONSTANTS_H_ #define BROTLI_COMMON_CONSTANTS_H_ -#include "platform.h" #include #include +#include "platform.h" + /* Specification: 7.3. Encoding of the context map */ #define BROTLI_CONTEXT_MAP_MAX_RLE 16 diff --git a/brotli/common/dictionary.c b/brotli/common/dictionary.c index 30752578..7c015ab0 100644 --- a/brotli/common/dictionary.c +++ b/brotli/common/dictionary.c @@ -5897,7 +5897,7 @@ static BrotliDictionary kBrotliDictionary = { #endif }; -const BrotliDictionary* BrotliGetDictionary() { +const BrotliDictionary* BrotliGetDictionary(void) { return &kBrotliDictionary; } diff --git a/brotli/common/platform.c b/brotli/common/platform.c index acdc452f..25d84a94 100644 --- a/brotli/common/platform.c +++ b/brotli/common/platform.c @@ -6,9 +6,10 @@ #include -#include "platform.h" #include +#include "platform.h" + /* Default brotli_alloc_func */ void* BrotliDefaultAllocFunc(void* opaque, size_t size) { BROTLI_UNUSED(opaque); diff --git a/brotli/common/platform.h b/brotli/common/platform.h index 0e0e8aa4..4186a8e9 100644 --- a/brotli/common/platform.h +++ b/brotli/common/platform.h @@ -12,9 +12,9 @@ * BROTLI_BUILD_BIG_ENDIAN forces to use big-endian optimizations * BROTLI_BUILD_ENDIAN_NEUTRAL disables endian-aware optimizations * BROTLI_BUILD_LITTLE_ENDIAN forces to use little-endian optimizations - * BROTLI_BUILD_PORTABLE disables dangerous optimizations, like unaligned - read and overlapping memcpy; this reduces decompression speed by 5% * BROTLI_BUILD_NO_RBIT disables "rbit" optimization for ARM CPUs + * BROTLI_BUILD_NO_UNALIGNED_READ_FAST forces off the fast-unaligned-read + optimizations (mainly for testing purposes). * BROTLI_DEBUG dumps file name and line number when decoder detects stream or memory error * BROTLI_ENABLE_LOG enables asserts and dumps various state information @@ -208,15 +208,19 @@ To apply compiler hint, enclose the branching condition into macros, like this: #define BROTLI_TARGET_RISCV64 #endif +#if defined(BROTLI_TARGET_X64) || defined(BROTLI_TARGET_ARMV8_64) || \ + defined(BROTLI_TARGET_POWERPC64) || defined(BROTLI_TARGET_RISCV64) +#define BROTLI_TARGET_64_BITS 1 +#else +#define BROTLI_TARGET_64_BITS 0 +#endif + #if defined(BROTLI_BUILD_64_BIT) #define BROTLI_64_BITS 1 #elif defined(BROTLI_BUILD_32_BIT) #define BROTLI_64_BITS 0 -#elif defined(BROTLI_TARGET_X64) || defined(BROTLI_TARGET_ARMV8_64) || \ - defined(BROTLI_TARGET_POWERPC64) || defined(BROTLI_TARGET_RISCV64) -#define BROTLI_64_BITS 1 #else -#define BROTLI_64_BITS 0 +#define BROTLI_64_BITS BROTLI_TARGET_64_BITS #endif #if (BROTLI_64_BITS) @@ -260,18 +264,19 @@ To apply compiler hint, enclose the branching condition into macros, like this: #undef BROTLI_X_BIG_ENDIAN #endif -#if defined(BROTLI_BUILD_PORTABLE) -#define BROTLI_ALIGNED_READ (!!1) -#elif defined(BROTLI_TARGET_X86) || defined(BROTLI_TARGET_X64) || \ +#if defined(BROTLI_BUILD_NO_UNALIGNED_READ_FAST) +#define BROTLI_UNALIGNED_READ_FAST (!!0) +#elif defined(BROTLI_TARGET_X86) || defined(BROTLI_TARGET_X64) || \ defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_ANY) || \ defined(BROTLI_TARGET_RISCV64) -/* Allow unaligned read only for white-listed CPUs. */ -#define BROTLI_ALIGNED_READ (!!0) +/* These targets are known to generate efficient code for unaligned reads + * (e.g. a single instruction, not multiple 1-byte loads, shifted and or'd + * together). */ +#define BROTLI_UNALIGNED_READ_FAST (!!1) #else -#define BROTLI_ALIGNED_READ (!!1) +#define BROTLI_UNALIGNED_READ_FAST (!!0) #endif -#if BROTLI_ALIGNED_READ /* Portable unaligned memory access: read / write values via memcpy. */ static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) { uint16_t t; @@ -291,75 +296,6 @@ static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) { static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) { memcpy(p, &v, sizeof v); } -#else /* BROTLI_ALIGNED_READ */ -/* Unaligned memory access is allowed: just cast pointer to requested type. */ -#if BROTLI_SANITIZED -/* Consider we have an unaligned load/store of 4 bytes from address 0x...05. - AddressSanitizer will treat it as a 3-byte access to the range 05:07 and - will miss a bug if 08 is the first unaddressable byte. - ThreadSanitizer will also treat this as a 3-byte access to 05:07 and will - miss a race between this access and some other accesses to 08. - MemorySanitizer will correctly propagate the shadow on unaligned stores - and correctly report bugs on unaligned loads, but it may not properly - update and report the origin of the uninitialized memory. - For all three tools, replacing an unaligned access with a tool-specific - callback solves the problem. */ -#if defined(__cplusplus) -extern "C" { -#endif /* __cplusplus */ - uint16_t __sanitizer_unaligned_load16(const void* p); - uint32_t __sanitizer_unaligned_load32(const void* p); - uint64_t __sanitizer_unaligned_load64(const void* p); - void __sanitizer_unaligned_store64(void* p, uint64_t v); -#if defined(__cplusplus) -} /* extern "C" */ -#endif /* __cplusplus */ -#define BrotliUnalignedRead16 __sanitizer_unaligned_load16 -#define BrotliUnalignedRead32 __sanitizer_unaligned_load32 -#define BrotliUnalignedRead64 __sanitizer_unaligned_load64 -#define BrotliUnalignedWrite64 __sanitizer_unaligned_store64 -#else /* BROTLI_SANITIZED */ -static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) { - return *(const uint16_t*)p; -} -static BROTLI_INLINE uint32_t BrotliUnalignedRead32(const void* p) { - return *(const uint32_t*)p; -} -#if (BROTLI_64_BITS) -static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) { - return *(const uint64_t*)p; -} -static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) { - *(uint64_t*)p = v; -} -#else /* BROTLI_64_BITS */ -/* Avoid emitting LDRD / STRD, which require properly aligned address. */ -/* If __attribute__(aligned) is available, use that. Otherwise, memcpy. */ - -#if BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0) -typedef BROTLI_ALIGNED(1) uint64_t brotli_unaligned_uint64_t; - -static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) { - return (uint64_t) ((const brotli_unaligned_uint64_t*) p)[0]; -} -static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) { - brotli_unaligned_uint64_t* dwords = (brotli_unaligned_uint64_t*) p; - dwords[0] = (brotli_unaligned_uint64_t) v; -} -#else /* BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0) */ -static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) { - uint64_t v; - memcpy(&v, p, sizeof(uint64_t)); - return v; -} - -static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) { - memcpy(p, &v, sizeof(uint64_t)); -} -#endif /* BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0) */ -#endif /* BROTLI_64_BITS */ -#endif /* BROTLI_SANITIZED */ -#endif /* BROTLI_ALIGNED_READ */ #if BROTLI_LITTLE_ENDIAN /* Straight endianness. Just read / write values. */ @@ -435,6 +371,16 @@ static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64LE(void* p, uint64_t v) { } #endif /* BROTLI_LITTLE_ENDIAN */ +static BROTLI_INLINE void* BROTLI_UNALIGNED_LOAD_PTR(const void* p) { + void* v; + memcpy(&v, p, sizeof(void*)); + return v; +} + +static BROTLI_INLINE void BROTLI_UNALIGNED_STORE_PTR(void* p, const void* v) { + memcpy(p, &v, sizeof(void*)); +} + /* BROTLI_IS_CONSTANT macros returns true for compile-time constants. */ #if BROTLI_GNUC_HAS_BUILTIN(__builtin_constant_p, 3, 0, 1) || \ BROTLI_INTEL_VERSION_CHECK(16, 0, 0) @@ -467,6 +413,8 @@ static BROTLI_INLINE void BrotliDump(const char* f, int l, const char* fn) { #define BROTLI_DUMP() (void)(0) #endif +/* BrotliRBit assumes brotli_reg_t fits native CPU register type. */ +#if (BROTLI_64_BITS == BROTLI_TARGET_64_BITS) /* TODO(eustas): add appropriate icc/sunpro/arm/ibm/ti checks. */ #if (BROTLI_GNUC_VERSION_CHECK(3, 0, 0) || defined(__llvm__)) && \ !defined(BROTLI_BUILD_NO_RBIT) @@ -480,15 +428,14 @@ static BROTLI_INLINE brotli_reg_t BrotliRBit(brotli_reg_t input) { #define BROTLI_RBIT(x) BrotliRBit(x) #endif /* armv7 / armv8 */ #endif /* gcc || clang */ +#endif /* brotli_reg_t is native */ #if !defined(BROTLI_RBIT) static BROTLI_INLINE void BrotliRBit(void) { /* Should break build if used. */ } #endif /* BROTLI_RBIT */ -#define BROTLI_REPEAT(N, X) { \ - if ((N & 1) != 0) {X;} \ - if ((N & 2) != 0) {X; X;} \ - if ((N & 4) != 0) {X; X; X; X;} \ -} +#define BROTLI_REPEAT_4(X) {X; X; X; X;} +#define BROTLI_REPEAT_5(X) {X; X; X; X; X;} +#define BROTLI_REPEAT_6(X) {X; X; X; X; X; X;} #define BROTLI_UNUSED(X) (void)(X) @@ -553,6 +500,8 @@ BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) { BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD32LE); BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD64LE); BROTLI_UNUSED(&BROTLI_UNALIGNED_STORE64LE); + BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD_PTR); + BROTLI_UNUSED(&BROTLI_UNALIGNED_STORE_PTR); BROTLI_UNUSED(&BrotliRBit); BROTLI_UNUSED(&brotli_min_double); BROTLI_UNUSED(&brotli_max_double); diff --git a/brotli/common/shared_dictionary_internal.h b/brotli/common/shared_dictionary_internal.h index 87ab13b2..963762e4 100644 --- a/brotli/common/shared_dictionary_internal.h +++ b/brotli/common/shared_dictionary_internal.h @@ -9,11 +9,12 @@ #ifndef BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_ #define BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_ -#include "dictionary.h" #include -#include "transform.h" #include +#include "dictionary.h" +#include "transform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/brotli/dec/bit_reader.c b/brotli/dec/bit_reader.c index 3dc848b7..97e21f56 100644 --- a/brotli/dec/bit_reader.c +++ b/brotli/dec/bit_reader.c @@ -8,9 +8,10 @@ #include "bit_reader.h" -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif @@ -36,7 +37,7 @@ BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) { /* Fixing alignment after unaligned BrotliFillWindow would result accumulator overflow. If unalignment is caused by BrotliSafeReadBits, then there is enough space in accumulator to fix alignment. */ - if (!BROTLI_ALIGNED_READ) { + if (BROTLI_UNALIGNED_READ_FAST) { aligned_read_mask = 0; } if (BrotliGetAvailableBits(br) == 0) { diff --git a/brotli/dec/bit_reader.h b/brotli/dec/bit_reader.h index 39064551..64701eca 100644 --- a/brotli/dec/bit_reader.h +++ b/brotli/dec/bit_reader.h @@ -11,9 +11,10 @@ #include /* memcpy */ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -53,8 +54,8 @@ BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br); /* Ensures that accumulator is not empty. May consume up to sizeof(brotli_reg_t) - 1 bytes of input. Returns BROTLI_FALSE if data is required but there is no input available. - For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned - reading. */ + For !BROTLI_UNALIGNED_READ_FAST this function also prepares bit reader for + aligned reading. */ BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br); /* Fallback for BrotliSafeReadBits32. Extracted as noninlined method to unburden @@ -107,7 +108,8 @@ static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount( static BROTLI_INLINE void BrotliFillBitWindow( BrotliBitReader* const br, uint32_t n_bits) { #if (BROTLI_64_BITS) - if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) { + if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) && + (n_bits <= 8)) { uint32_t bit_pos = br->bit_pos_; if (bit_pos >= 56) { br->val_ = @@ -117,8 +119,8 @@ static BROTLI_INLINE void BrotliFillBitWindow( br->avail_in -= 7; br->next_in += 7; } - } else if ( - !BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 16)) { + } else if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) && + (n_bits <= 16)) { uint32_t bit_pos = br->bit_pos_; if (bit_pos >= 48) { br->val_ = @@ -140,7 +142,8 @@ static BROTLI_INLINE void BrotliFillBitWindow( } } #else - if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) { + if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) && + (n_bits <= 8)) { uint32_t bit_pos = br->bit_pos_; if (bit_pos >= 24) { br->val_ = @@ -338,6 +341,11 @@ static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) { return TO_BROTLI_BOOL(pad_bits == 0); } +static BROTLI_INLINE void BrotliDropBytes(BrotliBitReader* br, size_t num) { + br->avail_in -= num; + br->next_in += num; +} + /* Copies remaining input bytes stored in the bit reader to the output. Value |num| may not be larger than BrotliGetRemainingBytes. The bit reader must be warmed up again after this. */ @@ -349,9 +357,10 @@ static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest, ++dest; --num; } - memcpy(dest, br->next_in, num); - br->avail_in -= num; - br->next_in += num; + if (num > 0) { + memcpy(dest, br->next_in, num); + BrotliDropBytes(br, num); + } } #if defined(__cplusplus) || defined(c_plusplus) diff --git a/brotli/dec/decode.c b/brotli/dec/decode.c index 2fe58a7b..3ee1963a 100644 --- a/brotli/dec/decode.c +++ b/brotli/dec/decode.c @@ -113,8 +113,9 @@ void BrotliDecoderDestroyInstance(BrotliDecoderState* state) { /* Saves error code and converts it to BrotliDecoderResult. */ static BROTLI_NOINLINE BrotliDecoderResult SaveErrorCode( - BrotliDecoderState* s, BrotliDecoderErrorCode e) { + BrotliDecoderState* s, BrotliDecoderErrorCode e, size_t consumed_input) { s->error_code = (int)e; + s->used_input += consumed_input; switch (e) { case BROTLI_DECODER_SUCCESS: return BROTLI_DECODER_RESULT_SUCCESS; @@ -1172,7 +1173,7 @@ static BROTLI_INLINE void DetectTrivialLiteralBlockTypes( size_t sample = s->context_map[offset]; size_t j; for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS);) { - BROTLI_REPEAT(4, error |= s->context_map[offset + j++] ^ sample;) + BROTLI_REPEAT_4({ error |= s->context_map[offset + j++] ^ sample; }) } if (error == 0) { s->trivial_literal_contexts[i >> 5] |= 1u << (i & 31); @@ -1353,6 +1354,57 @@ static BROTLI_BOOL BROTLI_NOINLINE BrotliEnsureRingBuffer( return BROTLI_TRUE; } +static BrotliDecoderErrorCode BROTLI_NOINLINE +SkipMetadataBlock(BrotliDecoderState* s) { + BrotliBitReader* br = &s->br; + + if (s->meta_block_remaining_len == 0) { + return BROTLI_DECODER_SUCCESS; + } + + BROTLI_DCHECK((BrotliGetAvailableBits(br) & 7) == 0); + + /* Drain accumulator. */ + if (BrotliGetAvailableBits(br) >= 8) { + uint8_t buffer[8]; + int nbytes = (int)(BrotliGetAvailableBits(br)) >> 3; + BROTLI_DCHECK(nbytes <= 8); + if (nbytes > s->meta_block_remaining_len) { + nbytes = s->meta_block_remaining_len; + } + BrotliCopyBytes(buffer, br, (size_t)nbytes); + if (s->metadata_chunk_func) { + s->metadata_chunk_func(s->metadata_callback_opaque, buffer, + (size_t)nbytes); + } + s->meta_block_remaining_len -= nbytes; + if (s->meta_block_remaining_len == 0) { + return BROTLI_DECODER_SUCCESS; + } + } + + /* Direct access to metadata is possible. */ + int nbytes = (int)BrotliGetRemainingBytes(br); + if (nbytes > s->meta_block_remaining_len) { + nbytes = s->meta_block_remaining_len; + } + if (nbytes > 0) { + if (s->metadata_chunk_func) { + s->metadata_chunk_func(s->metadata_callback_opaque, br->next_in, + (size_t)nbytes); + } + BrotliDropBytes(br, (size_t)nbytes); + s->meta_block_remaining_len -= nbytes; + if (s->meta_block_remaining_len == 0) { + return BROTLI_DECODER_SUCCESS; + } + } + + BROTLI_DCHECK(BrotliGetRemainingBytes(br) == 0); + + return BROTLI_DECODER_NEEDS_MORE_INPUT; +} + static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput( size_t* available_out, uint8_t** next_out, size_t* total_out, BrotliDecoderState* s) { @@ -2243,6 +2295,9 @@ BrotliDecoderResult BrotliDecoderDecompressStream( size_t* available_out, uint8_t** next_out, size_t* total_out) { BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS; BrotliBitReader* br = &s->br; + size_t input_size = *available_in; +#define BROTLI_SAVE_ERROR_CODE(code) \ + SaveErrorCode(s, (code), input_size - *available_in) /* Ensure that |total_out| is set, even if no data will ever be pushed out. */ if (total_out) { *total_out = s->partial_pos_out; @@ -2252,8 +2307,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream( return BROTLI_DECODER_RESULT_ERROR; } if (*available_out && (!next_out || !*next_out)) { - return SaveErrorCode( - s, BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS)); + return BROTLI_SAVE_ERROR_CODE( + BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS)); } if (!*available_out) next_out = 0; if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */ @@ -2410,6 +2465,10 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } if (s->is_metadata) { s->state = BROTLI_STATE_METADATA; + if (s->metadata_start_func) { + s->metadata_start_func(s->metadata_callback_opaque, + (size_t)s->meta_block_remaining_len); + } break; } if (s->meta_block_remaining_len == 0) { @@ -2502,17 +2561,11 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } case BROTLI_STATE_METADATA: - for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) { - uint32_t bits; - /* Read one byte and ignore it. */ - if (!BrotliSafeReadBits(br, 8, &bits)) { - result = BROTLI_DECODER_NEEDS_MORE_INPUT; - break; - } - } - if (result == BROTLI_DECODER_SUCCESS) { - s->state = BROTLI_STATE_METABLOCK_DONE; + result = SkipMetadataBlock(s); + if (result != BROTLI_DECODER_SUCCESS) { + break; } + s->state = BROTLI_STATE_METABLOCK_DONE; break; case BROTLI_STATE_METABLOCK_HEADER_2: { @@ -2586,7 +2639,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s, &s->distance_hgroup, distance_alphabet_size_max, distance_alphabet_size_limit, s->num_dist_htrees); if (!allocation_success) { - return SaveErrorCode(s, + return BROTLI_SAVE_ERROR_CODE( BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS)); } s->loop_counter = 0; @@ -2600,7 +2653,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( case 0: hgroup = &s->literal_hgroup; break; case 1: hgroup = &s->insert_copy_hgroup; break; case 2: hgroup = &s->distance_hgroup; break; - default: return SaveErrorCode(s, BROTLI_FAILURE( + default: return BROTLI_SAVE_ERROR_CODE(BROTLI_FAILURE( BROTLI_DECODER_ERROR_UNREACHABLE)); /* COV_NF_LINE */ } result = HuffmanTreeGroupDecode(hgroup, s); @@ -2710,10 +2763,11 @@ BrotliDecoderResult BrotliDecoderDecompressStream( break; } } - return SaveErrorCode(s, result); + return BROTLI_SAVE_ERROR_CODE(result); } } - return SaveErrorCode(s, result); + return BROTLI_SAVE_ERROR_CODE(result); +#undef BROTLI_SAVE_ERROR_CODE } BROTLI_BOOL BrotliDecoderHasMoreOutput(const BrotliDecoderState* s) { @@ -2743,7 +2797,7 @@ const uint8_t* BrotliDecoderTakeOutput(BrotliDecoderState* s, size_t* size) { } else { /* ... or stream is broken. Normally this should be caught by BrotliDecoderDecompressStream, this is just a safeguard. */ - if ((int)status < 0) SaveErrorCode(s, status); + if ((int)status < 0) SaveErrorCode(s, status, 0); *size = 0; result = 0; } @@ -2776,10 +2830,19 @@ const char* BrotliDecoderErrorString(BrotliDecoderErrorCode c) { } } -uint32_t BrotliDecoderVersion() { +uint32_t BrotliDecoderVersion(void) { return BROTLI_VERSION; } +void BrotliDecoderSetMetadataCallbacks( + BrotliDecoderState* state, + brotli_decoder_metadata_start_func start_func, + brotli_decoder_metadata_chunk_func chunk_func, void* opaque) { + state->metadata_start_func = start_func; + state->metadata_chunk_func = chunk_func; + state->metadata_callback_opaque = opaque; +} + /* Escalate internal functions visibility; for testing purposes only. */ #if defined(BROTLI_TEST) BROTLI_BOOL SafeReadSymbolForTest( diff --git a/brotli/dec/huffman.c b/brotli/dec/huffman.c index 8f127d7b..38064548 100644 --- a/brotli/dec/huffman.c +++ b/brotli/dec/huffman.c @@ -10,9 +10,10 @@ #include /* memcpy, memset */ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -117,11 +118,13 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, int bits_count; BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <= BROTLI_REVERSE_BITS_MAX); + BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH == 5); /* Generate offsets into sorted symbol table by code length. */ symbol = -1; bits = 1; - BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, { + /* BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH == 5 */ + BROTLI_REPEAT_5({ symbol += count[bits]; offset[bits] = symbol; bits++; @@ -132,7 +135,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, /* Sort symbols by length, by symbol order within each length. */ symbol = BROTLI_CODE_LENGTH_CODES; do { - BROTLI_REPEAT(6, { + BROTLI_REPEAT_6({ symbol--; sorted[offset[code_lengths[symbol]]--] = symbol; }); diff --git a/brotli/dec/huffman.h b/brotli/dec/huffman.h index a8fbc453..50360962 100644 --- a/brotli/dec/huffman.h +++ b/brotli/dec/huffman.h @@ -9,9 +9,10 @@ #ifndef BROTLI_DEC_HUFFMAN_H_ #define BROTLI_DEC_HUFFMAN_H_ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/brotli/dec/prefix.h b/brotli/dec/prefix.h index 481a2c79..e8acf077 100644 --- a/brotli/dec/prefix.h +++ b/brotli/dec/prefix.h @@ -10,9 +10,10 @@ #ifndef BROTLI_DEC_PREFIX_H_ #define BROTLI_DEC_PREFIX_H_ -#include "../common/constants.h" #include +#include "../common/constants.h" + typedef struct CmdLutElement { uint8_t insert_len_extra_bits; uint8_t copy_len_extra_bits; diff --git a/brotli/dec/state.c b/brotli/dec/state.c index e3170c13..a3baf37b 100644 --- a/brotli/dec/state.c +++ b/brotli/dec/state.c @@ -8,8 +8,9 @@ #include /* free, malloc */ -#include "../common/dictionary.h" #include + +#include "../common/dictionary.h" #include "huffman.h" #if defined(__cplusplus) || defined(c_plusplus) @@ -43,6 +44,7 @@ BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s, s->pos = 0; s->rb_roundtrips = 0; s->partial_pos_out = 0; + s->used_input = 0; s->block_type_trees = NULL; s->block_len_trees = NULL; @@ -87,6 +89,10 @@ BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s, BrotliSharedDictionaryCreateInstance(alloc_func, free_func, opaque); if (!s->dictionary) return BROTLI_FALSE; + s->metadata_start_func = NULL; + s->metadata_chunk_func = NULL; + s->metadata_callback_opaque = 0; + return BROTLI_TRUE; } @@ -129,9 +135,21 @@ void BrotliDecoderStateCleanupAfterMetablock(BrotliDecoderState* s) { BROTLI_DECODER_FREE(s, s->distance_hgroup.htrees); } +#ifdef BROTLI_REPORTING +/* When BROTLI_REPORTING is defined extra reporting module have to be linked. */ +void BrotliDecoderOnFinish(const BrotliDecoderState* s); +#define BROTLI_DECODER_ON_FINISH(s) BrotliDecoderOnFinish(s); +#else +#if !defined(BROTLI_DECODER_ON_FINISH) +#define BROTLI_DECODER_ON_FINISH(s) (void)(s); +#endif +#endif + void BrotliDecoderStateCleanup(BrotliDecoderState* s) { BrotliDecoderStateCleanupAfterMetablock(s); + BROTLI_DECODER_ON_FINISH(s); + BROTLI_DECODER_FREE(s, s->compound_dictionary); BrotliSharedDictionaryDestroyInstance(s->dictionary); s->dictionary = NULL; diff --git a/brotli/dec/state.h b/brotli/dec/state.h index 81e6bb67..84fddc8a 100644 --- a/brotli/dec/state.h +++ b/brotli/dec/state.h @@ -9,12 +9,14 @@ #ifndef BROTLI_DEC_STATE_H_ #define BROTLI_DEC_STATE_H_ +#include +#include +#include + #include "../common/constants.h" #include "../common/dictionary.h" #include "../common/platform.h" -#include #include "../common/transform.h" -#include #include "bit_reader.h" #include "huffman.h" @@ -321,6 +323,13 @@ struct BrotliDecoderStateStruct { /* Less used attributes are at the end of this struct. */ + brotli_decoder_metadata_start_func metadata_start_func; + brotli_decoder_metadata_chunk_func metadata_chunk_func; + void* metadata_callback_opaque; + + /* For reporting. */ + uint64_t used_input; /* how many bytes of input are consumed */ + /* States inside function calls. */ BrotliRunningMetablockHeaderState substate_metablock_header; BrotliRunningUncompressedState substate_uncompressed; diff --git a/brotli/enc/backward_references.c b/brotli/enc/backward_references.c index 2cf01d8b..ff5b7bec 100644 --- a/brotli/enc/backward_references.c +++ b/brotli/enc/backward_references.c @@ -8,10 +8,11 @@ #include "backward_references.h" +#include + #include "../common/constants.h" #include "../common/dictionary.h" #include "../common/platform.h" -#include #include "command.h" #include "compound_dictionary.h" #include "dictionary_hash.h" diff --git a/brotli/enc/backward_references.h b/brotli/enc/backward_references.h index b051e18a..20fb98a4 100644 --- a/brotli/enc/backward_references.h +++ b/brotli/enc/backward_references.h @@ -9,11 +9,12 @@ #ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_ #define BROTLI_ENC_BACKWARD_REFERENCES_H_ +#include + #include "../common/constants.h" #include "../common/context.h" #include "../common/dictionary.h" #include "../common/platform.h" -#include #include "command.h" #include "hash.h" #include "quality.h" diff --git a/brotli/enc/backward_references_hq.c b/brotli/enc/backward_references_hq.c index c6a6c8c7..6325032e 100644 --- a/brotli/enc/backward_references_hq.c +++ b/brotli/enc/backward_references_hq.c @@ -10,9 +10,10 @@ #include /* memcpy, memset */ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "command.h" #include "compound_dictionary.h" #include "encoder_dict.h" diff --git a/brotli/enc/backward_references_hq.h b/brotli/enc/backward_references_hq.h index c9dcc808..8acf975a 100644 --- a/brotli/enc/backward_references_hq.h +++ b/brotli/enc/backward_references_hq.h @@ -9,11 +9,12 @@ #ifndef BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_ #define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_ +#include + #include "../common/constants.h" #include "../common/context.h" #include "../common/dictionary.h" #include "../common/platform.h" -#include #include "command.h" #include "hash.h" #include "memory.h" diff --git a/brotli/enc/bit_cost.c b/brotli/enc/bit_cost.c index 8ca4ab1a..6b7c904c 100644 --- a/brotli/enc/bit_cost.c +++ b/brotli/enc/bit_cost.c @@ -8,9 +8,10 @@ #include "bit_cost.h" +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "fast_log.h" #include "histogram.h" diff --git a/brotli/enc/bit_cost.h b/brotli/enc/bit_cost.h index 4cf3b182..f6f27739 100644 --- a/brotli/enc/bit_cost.h +++ b/brotli/enc/bit_cost.h @@ -9,8 +9,9 @@ #ifndef BROTLI_ENC_BIT_COST_H_ #define BROTLI_ENC_BIT_COST_H_ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "fast_log.h" #include "histogram.h" diff --git a/brotli/enc/block_splitter.h b/brotli/enc/block_splitter.h index 1de072f1..6046b90a 100644 --- a/brotli/enc/block_splitter.h +++ b/brotli/enc/block_splitter.h @@ -9,8 +9,9 @@ #ifndef BROTLI_ENC_BLOCK_SPLITTER_H_ #define BROTLI_ENC_BLOCK_SPLITTER_H_ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "command.h" #include "memory.h" #include "quality.h" diff --git a/brotli/enc/brotli_bit_stream.c b/brotli/enc/brotli_bit_stream.c index d1051029..5fa0c69a 100644 --- a/brotli/enc/brotli_bit_stream.c +++ b/brotli/enc/brotli_bit_stream.c @@ -12,10 +12,11 @@ #include /* memcpy, memset */ +#include + #include "../common/constants.h" #include "../common/context.h" #include "../common/platform.h" -#include #include "entropy_encode.h" #include "entropy_encode_static.h" #include "fast_log.h" diff --git a/brotli/enc/brotli_bit_stream.h b/brotli/enc/brotli_bit_stream.h index 4285b7f8..a289509a 100644 --- a/brotli/enc/brotli_bit_stream.h +++ b/brotli/enc/brotli_bit_stream.h @@ -16,9 +16,10 @@ #ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_ #define BROTLI_ENC_BROTLI_BIT_STREAM_H_ +#include + #include "../common/context.h" #include "../common/platform.h" -#include #include "command.h" #include "entropy_encode.h" #include "memory.h" diff --git a/brotli/enc/cluster.c b/brotli/enc/cluster.c index b86bbfba..b0faf811 100644 --- a/brotli/enc/cluster.c +++ b/brotli/enc/cluster.c @@ -8,8 +8,9 @@ #include "cluster.h" -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "bit_cost.h" /* BrotliPopulationCost */ #include "fast_log.h" #include "histogram.h" diff --git a/brotli/enc/cluster.h b/brotli/enc/cluster.h index 107e8a3c..013629c6 100644 --- a/brotli/enc/cluster.h +++ b/brotli/enc/cluster.h @@ -9,8 +9,9 @@ #ifndef BROTLI_ENC_CLUSTER_H_ #define BROTLI_ENC_CLUSTER_H_ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "histogram.h" #include "memory.h" diff --git a/brotli/enc/command.h b/brotli/enc/command.h index 43e35d7e..ba4de7ea 100644 --- a/brotli/enc/command.h +++ b/brotli/enc/command.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_COMMAND_H_ #define BROTLI_ENC_COMMAND_H_ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "fast_log.h" #include "params.h" #include "prefix.h" diff --git a/brotli/enc/compound_dictionary.c b/brotli/enc/compound_dictionary.c index d82772f2..a3b5e693 100644 --- a/brotli/enc/compound_dictionary.c +++ b/brotli/enc/compound_dictionary.c @@ -6,8 +6,9 @@ #include "compound_dictionary.h" -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "memory.h" #include "quality.h" @@ -33,7 +34,7 @@ static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m, uint32_t* slot_offsets = NULL; uint16_t* heads = NULL; uint32_t* items = NULL; - uint8_t* source_copy = NULL; + uint8_t** source_ref = NULL; uint32_t i; uint32_t* slot_size = NULL; uint32_t* slot_limit = NULL; @@ -97,7 +98,7 @@ static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m, /* Step 3: transfer data to "slim" hasher. */ alloc_size = sizeof(PreparedDictionary) + (sizeof(uint32_t) << slot_bits) + (sizeof(uint16_t) << bucket_bits) + (sizeof(uint32_t) * total_items) + - source_size; + sizeof(uint8_t*); result = (PreparedDictionary*)BROTLI_ALLOC(m, uint8_t, alloc_size); if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(result)) { @@ -107,14 +108,15 @@ static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m, slot_offsets = (uint32_t*)(&result[1]); heads = (uint16_t*)(&slot_offsets[num_slots]); items = (uint32_t*)(&heads[num_buckets]); - source_copy = (uint8_t*)(&items[total_items]); + source_ref = (uint8_t**)(&items[total_items]); - result->magic = kPreparedDictionaryMagic; - result->source_offset = total_items; + result->magic = kLeanPreparedDictionaryMagic; + result->num_items = total_items; result->source_size = (uint32_t)source_size; result->hash_bits = hash_bits; result->bucket_bits = bucket_bits; result->slot_bits = slot_bits; + BROTLI_UNALIGNED_STORE_PTR(source_ref, source); total_items = 0; for (i = 0; i < num_slots; ++i) { @@ -145,7 +147,6 @@ static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m, } BROTLI_FREE(m, flat); - memcpy(source_copy, source, source_size); return result; } @@ -192,8 +193,14 @@ BROTLI_BOOL AttachPreparedDictionary( uint32_t* slot_offsets = (uint32_t*)(&dictionary[1]); uint16_t* heads = (uint16_t*)(&slot_offsets[1u << dictionary->slot_bits]); uint32_t* items = (uint32_t*)(&heads[1u << dictionary->bucket_bits]); - compound->chunk_source[index] = - (const uint8_t*)(&items[dictionary->source_offset]); + const void* tail = (void*)&items[dictionary->num_items]; + if (dictionary->magic == kPreparedDictionaryMagic) { + compound->chunk_source[index] = (const uint8_t*)tail; + } else { + /* dictionary->magic == kLeanPreparedDictionaryMagic */ + compound->chunk_source[index] = + (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail); + } } compound->num_chunks++; return BROTLI_TRUE; diff --git a/brotli/enc/compound_dictionary.h b/brotli/enc/compound_dictionary.h index 60b12d26..9c531d5b 100644 --- a/brotli/enc/compound_dictionary.h +++ b/brotli/enc/compound_dictionary.h @@ -7,19 +7,32 @@ #ifndef BROTLI_ENC_PREPARED_DICTIONARY_H_ #define BROTLI_ENC_PREPARED_DICTIONARY_H_ -#include "../common/platform.h" -#include "../common/constants.h" #include #include + +#include "../common/platform.h" +#include "../common/constants.h" #include "memory.h" +/* "Fat" prepared dictionary, could be cooked outside of C implementation, + * e.g. on Java side. LZ77 data is copied inside PreparedDictionary struct. */ static const uint32_t kPreparedDictionaryMagic = 0xDEBCEDE0; + +static const uint32_t kSharedDictionaryMagic = 0xDEBCEDE1; + +static const uint32_t kManagedDictionaryMagic = 0xDEBCEDE2; + +/* "Lean" prepared dictionary. LZ77 data is referenced. It is the responsibility + * of caller of "prepare dictionary" to keep the LZ77 data while prepared + * dictionary is in use. */ +static const uint32_t kLeanPreparedDictionaryMagic = 0xDEBCEDE3; + static const uint64_t kPreparedDictionaryHashMul64Long = BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u); typedef struct PreparedDictionary { uint32_t magic; - uint32_t source_offset; + uint32_t num_items; uint32_t source_size; uint32_t hash_bits; uint32_t bucket_bits; @@ -31,7 +44,8 @@ typedef struct PreparedDictionary { /* uint16_t heads[1 << bucket_bits]; */ /* uint32_t items[variable]; */ - /* uint8_t source[source_size] */ + /* [maybe] uint8_t* source_ref, depending on magic. */ + /* [maybe] uint8_t source[source_size], depending on magic. */ } PreparedDictionary; BROTLI_INTERNAL PreparedDictionary* CreatePreparedDictionary(MemoryManager* m, diff --git a/brotli/enc/compress_fragment.c b/brotli/enc/compress_fragment.c index 1f478ca1..13890eab 100644 --- a/brotli/enc/compress_fragment.c +++ b/brotli/enc/compress_fragment.c @@ -16,8 +16,9 @@ #include /* memcmp, memcpy, memset */ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "brotli_bit_stream.h" #include "entropy_encode.h" #include "fast_log.h" diff --git a/brotli/enc/compress_fragment.h b/brotli/enc/compress_fragment.h index 099a9791..9c0780f8 100644 --- a/brotli/enc/compress_fragment.h +++ b/brotli/enc/compress_fragment.h @@ -12,9 +12,10 @@ #ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_ #define BROTLI_ENC_COMPRESS_FRAGMENT_H_ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "entropy_encode.h" #if defined(__cplusplus) || defined(c_plusplus) diff --git a/brotli/enc/compress_fragment_two_pass.c b/brotli/enc/compress_fragment_two_pass.c index 4cbb4185..a762679c 100644 --- a/brotli/enc/compress_fragment_two_pass.c +++ b/brotli/enc/compress_fragment_two_pass.c @@ -14,9 +14,10 @@ #include /* memcmp, memcpy, memset */ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "bit_cost.h" #include "brotli_bit_stream.h" #include "entropy_encode.h" diff --git a/brotli/enc/compress_fragment_two_pass.h b/brotli/enc/compress_fragment_two_pass.h index f5d07413..6d28d9bb 100644 --- a/brotli/enc/compress_fragment_two_pass.h +++ b/brotli/enc/compress_fragment_two_pass.h @@ -13,9 +13,10 @@ #ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ #define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "entropy_encode.h" #if defined(__cplusplus) || defined(c_plusplus) diff --git a/brotli/enc/encode.c b/brotli/enc/encode.c index afceba4a..4627ea00 100644 --- a/brotli/enc/encode.c +++ b/brotli/enc/encode.c @@ -30,6 +30,7 @@ #include "memory.h" #include "metablock.h" #include "prefix.h" +#include "state.h" #include "quality.h" #include "ringbuffer.h" #include "utf8_util.h" @@ -41,84 +42,6 @@ extern "C" { #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src)); -typedef enum BrotliEncoderStreamState { - /* Default state. */ - BROTLI_STREAM_PROCESSING = 0, - /* Intermediate state; after next block is emitted, byte-padding should be - performed before getting back to default state. */ - BROTLI_STREAM_FLUSH_REQUESTED = 1, - /* Last metablock was produced; no more input is acceptable. */ - BROTLI_STREAM_FINISHED = 2, - /* Flushing compressed block and writing meta-data block header. */ - BROTLI_STREAM_METADATA_HEAD = 3, - /* Writing metadata block body. */ - BROTLI_STREAM_METADATA_BODY = 4 -} BrotliEncoderStreamState; - -typedef enum BrotliEncoderFlintState { - BROTLI_FLINT_NEEDS_2_BYTES = 2, - BROTLI_FLINT_NEEDS_1_BYTE = 1, - BROTLI_FLINT_WAITING_FOR_PROCESSING = 0, - BROTLI_FLINT_WAITING_FOR_FLUSHING = -1, - BROTLI_FLINT_DONE = -2 -} BrotliEncoderFlintState; - -typedef struct BrotliEncoderStateStruct { - BrotliEncoderParams params; - - MemoryManager memory_manager_; - - uint64_t input_pos_; - RingBuffer ringbuffer_; - size_t cmd_alloc_size_; - Command* commands_; - size_t num_commands_; - size_t num_literals_; - size_t last_insert_len_; - uint64_t last_flush_pos_; - uint64_t last_processed_pos_; - int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES]; - int saved_dist_cache_[4]; - uint16_t last_bytes_; - uint8_t last_bytes_bits_; - /* "Flint" is a tiny uncompressed block emitted before the continuation - block to unwire literal context from previous data. Despite being int8_t, - field is actually BrotliEncoderFlintState enum. */ - int8_t flint_; - uint8_t prev_byte_; - uint8_t prev_byte2_; - size_t storage_size_; - uint8_t* storage_; - - Hasher hasher_; - - /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */ - int small_table_[1 << 10]; /* 4KiB */ - int* large_table_; /* Allocated only when needed */ - size_t large_table_size_; - - BrotliOnePassArena* one_pass_arena_; - BrotliTwoPassArena* two_pass_arena_; - - /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */ - uint32_t* command_buf_; - uint8_t* literal_buf_; - - uint8_t* next_out_; - size_t available_out_; - size_t total_out_; - /* Temporary buffer for padding flush bits or metadata block header / body. */ - union { - uint64_t u64[2]; - uint8_t u8[16]; - } tiny_buf_; - uint32_t remaining_metadata_bytes_; - BrotliEncoderStreamState stream_state_; - - BROTLI_BOOL is_last_block_emitted_; - BROTLI_BOOL is_initialized_; -} BrotliEncoderStateStruct; - static size_t InputBlockSize(BrotliEncoderState* s) { return (size_t)1 << s->params.lgblock; } @@ -780,6 +703,7 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) { s->two_pass_arena_ = NULL; s->command_buf_ = NULL; s->literal_buf_ = NULL; + s->total_in_ = 0; s->next_out_ = NULL; s->available_out_ = 0; s->total_out_ = 0; @@ -816,12 +740,26 @@ BrotliEncoderState* BrotliEncoderCreateInstance( return state; } +#ifdef BROTLI_REPORTING +/* When BROTLI_REPORTING is defined extra reporting module have to be linked. */ +void BrotliEncoderOnFinish(const BrotliEncoderState* s); +#define BROTLI_ENCODER_ON_FINISH(s) BrotliEncoderOnFinish(s); +#else +#if !defined(BROTLI_ENCODER_ON_FINISH) +#define BROTLI_ENCODER_ON_FINISH(s) (void)(s); +#endif +#endif + static void BrotliEncoderCleanupState(BrotliEncoderState* s) { MemoryManager* m = &s->memory_manager_; + + BROTLI_ENCODER_ON_FINISH(s); + if (BROTLI_IS_OOM(m)) { BrotliWipeOutMemoryManager(m); return; } + BROTLI_FREE(m, s->storage_); BROTLI_FREE(m, s->commands_); RingBufferFree(m, &s->ringbuffer_); @@ -1006,10 +944,38 @@ static BROTLI_BOOL EncodeData( MemoryManager* m = &s->memory_manager_; ContextType literal_context_mode; ContextLut literal_context_lut; + BROTLI_BOOL fast_compress = + s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || + s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY; data = s->ringbuffer_.buffer_; mask = s->ringbuffer_.mask_; + if (delta == 0) { /* No new input; still might want to flush or finish. */ + if (!data) { /* No input has been processed so far. */ + if (is_last) { /* Emit complete finalized stream. */ + BROTLI_DCHECK(s->last_bytes_bits_ <= 14); + s->last_bytes_ |= (uint16_t)(3u << s->last_bytes_bits_); + s->last_bytes_bits_ = (uint8_t)(s->last_bytes_bits_ + 2u); + s->tiny_buf_.u8[0] = (uint8_t)s->last_bytes_; + s->tiny_buf_.u8[1] = (uint8_t)(s->last_bytes_ >> 8); + *output = s->tiny_buf_.u8; + *out_size = (s->last_bytes_bits_ + 7u) >> 3u; + return BROTLI_TRUE; + } else { /* No data, not last -> no-op. */ + *out_size = 0; + return BROTLI_TRUE; + } + } else { + /* Fast compress performs flush every block -> flush is no-op. */ + if (!is_last && (!force_flush || fast_compress)) { /* Another no-op. */ + *out_size = 0; + return BROTLI_TRUE; + } + } + } + BROTLI_DCHECK(data); + if (s->params.quality > s->params.dictionary.max_quality) return BROTLI_FALSE; /* Adding more blocks after "last" block is forbidden. */ if (s->is_last_block_emitted_) return BROTLI_FALSE; @@ -1030,19 +996,12 @@ static BROTLI_BOOL EncodeData( } } - if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || - s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { + if (fast_compress) { uint8_t* storage; size_t storage_ix = s->last_bytes_bits_; size_t table_size; int* table; - if (delta == 0 && !is_last) { - /* We have no new input data and we don't have to finish the stream, so - nothing to do. */ - *out_size = 0; - return BROTLI_TRUE; - } storage = GetBrotliStorage(s, 2 * bytes + 503); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; storage[0] = (uint8_t)s->last_bytes_; @@ -1229,7 +1188,7 @@ static size_t WriteMetadataHeader( if (block_size == 0) { BrotliWriteBits(2, 0, &storage_ix, header); } else { - uint32_t nbits = (block_size == 1) ? 0 : + uint32_t nbits = (block_size == 1) ? 1 : (Log2FloorNonZero((uint32_t)block_size - 1) + 1); uint32_t nbytes = (nbits + 7) / 8; BrotliWriteBits(2, nbytes, &storage_ix, header); @@ -1238,242 +1197,6 @@ static size_t WriteMetadataHeader( return (storage_ix + 7u) >> 3; } -static BROTLI_NOINLINE BROTLI_BOOL BrotliCompressBufferQuality10( - int lgwin, size_t input_size, const uint8_t* input_buffer, - size_t* encoded_size, uint8_t* encoded_buffer) { - MemoryManager* m = - (MemoryManager*)BrotliBootstrapAlloc(sizeof(MemoryManager), 0, 0, 0); - - const size_t mask = BROTLI_SIZE_MAX >> 1; - int dist_cache[4] = { 4, 11, 15, 16 }; - int saved_dist_cache[4] = { 4, 11, 15, 16 }; - BROTLI_BOOL ok = BROTLI_TRUE; - const size_t max_out_size = *encoded_size; - size_t total_out_size = 0; - uint16_t last_bytes; - uint8_t last_bytes_bits; - - const size_t hasher_eff_size = BROTLI_MIN(size_t, - input_size, BROTLI_MAX_BACKWARD_LIMIT(lgwin) + BROTLI_WINDOW_GAP); - - const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1); - size_t max_block_size; - const size_t max_metablock_size = (size_t)1 << lgmetablock; - const size_t max_literals_per_metablock = max_metablock_size / 8; - const size_t max_commands_per_metablock = max_metablock_size / 8; - size_t metablock_start = 0; - uint8_t prev_byte = 0; - uint8_t prev_byte2 = 0; - - BrotliEncoderParams* params = NULL; - Hasher* hasher = NULL; - - if (m == NULL) return BROTLI_FALSE; - BrotliInitMemoryManager(m, 0, 0, 0); - params = BROTLI_ALLOC(m, BrotliEncoderParams, 2); - hasher = BROTLI_ALLOC(m, Hasher, 1); - if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(params) || BROTLI_IS_NULL(hasher)) { - goto oom; - } - BrotliEncoderInitParams(params); - HasherInit(hasher); - - params->quality = 10; - params->lgwin = lgwin; - if (lgwin > BROTLI_MAX_WINDOW_BITS) { - params->large_window = BROTLI_TRUE; - } - SanitizeParams(params); - params->lgblock = ComputeLgBlock(params); - ChooseDistanceParams(params); - max_block_size = (size_t)1 << params->lgblock; - - /* Since default static dictionary is used we assume that - * params->quality < params->dictionary.max_quality. */ - - BROTLI_DCHECK(input_size <= mask + 1); - EncodeWindowBits(lgwin, params->large_window, &last_bytes, &last_bytes_bits); - InitOrStitchToPreviousBlock(m, hasher, input_buffer, mask, params, - 0, hasher_eff_size, BROTLI_TRUE); - if (BROTLI_IS_OOM(m)) goto oom; - - while (ok && metablock_start < input_size) { - const size_t metablock_end = - BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size); - const size_t expected_num_commands = - (metablock_end - metablock_start) / 12 + 16; - Command* commands = 0; - size_t num_commands = 0; - size_t last_insert_len = 0; - size_t num_literals = 0; - size_t metablock_size = 0; - size_t cmd_alloc_size = 0; - BROTLI_BOOL is_last; - uint8_t* storage; - size_t storage_ix; - - ContextType literal_context_mode = ChooseContextMode(params, - input_buffer, metablock_start, mask, metablock_end - metablock_start); - ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode); - - size_t block_start; - for (block_start = metablock_start; block_start < metablock_end; ) { - size_t block_size = - BROTLI_MIN(size_t, metablock_end - block_start, max_block_size); - ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1); - size_t path_size; - size_t new_cmd_alloc_size; - if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) goto oom; - BrotliInitZopfliNodes(nodes, block_size + 1); - StitchToPreviousBlockH10(&hasher->privat._H10, block_size, block_start, - input_buffer, mask); - path_size = BrotliZopfliComputeShortestPath(m, block_size, block_start, - input_buffer, mask, literal_context_lut, params, dist_cache, hasher, - nodes); - if (BROTLI_IS_OOM(m)) goto oom; - /* We allocate a command buffer in the first iteration of this loop that - will be likely big enough for the whole metablock, so that for most - inputs we will not have to reallocate in later iterations. We do the - allocation here and not before the loop, because if the input is small, - this will be allocated after the Zopfli cost model is freed, so this - will not increase peak memory usage. - TODO(eustas): If the first allocation is too small, increase command - buffer size exponentially. */ - new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands, - num_commands + path_size + 1); - if (cmd_alloc_size != new_cmd_alloc_size) { - Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size); - if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) goto oom; - cmd_alloc_size = new_cmd_alloc_size; - if (commands) { - memcpy(new_commands, commands, sizeof(Command) * num_commands); - BROTLI_FREE(m, commands); - } - commands = new_commands; - } - BrotliZopfliCreateCommands(block_size, block_start, &nodes[0], dist_cache, - &last_insert_len, params, &commands[num_commands], &num_literals); - num_commands += path_size; - block_start += block_size; - metablock_size += block_size; - BROTLI_FREE(m, nodes); - if (num_literals > max_literals_per_metablock || - num_commands > max_commands_per_metablock) { - break; - } - } - - if (last_insert_len > 0) { - InitInsertCommand(&commands[num_commands++], last_insert_len); - num_literals += last_insert_len; - } - - is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size); - storage = NULL; - storage_ix = last_bytes_bits; - - if (metablock_size == 0) { - /* Write the ISLAST and ISEMPTY bits. */ - storage = BROTLI_ALLOC(m, uint8_t, 16); - if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom; - storage[0] = (uint8_t)last_bytes; - storage[1] = (uint8_t)(last_bytes >> 8); - BrotliWriteBits(2, 3, &storage_ix, storage); - storage_ix = (storage_ix + 7u) & ~7u; - } else if (!ShouldCompress(input_buffer, mask, metablock_start, - metablock_size, num_literals, num_commands)) { - /* Restore the distance cache, as its last update by - CreateBackwardReferences is now unused. */ - memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); - storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16); - if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom; - storage[0] = (uint8_t)last_bytes; - storage[1] = (uint8_t)(last_bytes >> 8); - BrotliStoreUncompressedMetaBlock(is_last, input_buffer, - metablock_start, mask, metablock_size, - &storage_ix, storage); - } else { - MetaBlockSplit mb; - BrotliEncoderParams* block_params = params + 1; - *block_params = *params; /* shallow copy */ - InitMetaBlockSplit(&mb); - BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask, - block_params, - prev_byte, prev_byte2, - commands, num_commands, - literal_context_mode, - &mb); - if (BROTLI_IS_OOM(m)) goto oom; - { - /* The number of distance symbols effectively used for distance - histograms. It might be less than distance alphabet size - for "Large Window Brotli" (32-bit). */ - BrotliOptimizeHistograms(block_params->dist.alphabet_size_limit, &mb); - } - storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503); - if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom; - storage[0] = (uint8_t)last_bytes; - storage[1] = (uint8_t)(last_bytes >> 8); - BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size, - mask, prev_byte, prev_byte2, - is_last, - block_params, - literal_context_mode, - commands, num_commands, - &mb, - &storage_ix, storage); - if (BROTLI_IS_OOM(m)) goto oom; - if (metablock_size + 4 < (storage_ix >> 3)) { - /* Restore the distance cache and last byte. */ - memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); - storage[0] = (uint8_t)last_bytes; - storage[1] = (uint8_t)(last_bytes >> 8); - storage_ix = last_bytes_bits; - BrotliStoreUncompressedMetaBlock(is_last, input_buffer, - metablock_start, mask, - metablock_size, &storage_ix, storage); - } - DestroyMetaBlockSplit(m, &mb); - } - last_bytes = (uint16_t)(storage[storage_ix >> 3]); - last_bytes_bits = storage_ix & 7u; - metablock_start += metablock_size; - if (metablock_start < input_size) { - prev_byte = input_buffer[metablock_start - 1]; - prev_byte2 = input_buffer[metablock_start - 2]; - } - /* Save the state of the distance cache in case we need to restore it for - emitting an uncompressed block. */ - memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0])); - - { - const size_t out_size = storage_ix >> 3; - total_out_size += out_size; - if (total_out_size <= max_out_size) { - memcpy(encoded_buffer, storage, out_size); - encoded_buffer += out_size; - } else { - ok = BROTLI_FALSE; - } - } - BROTLI_FREE(m, storage); - BROTLI_FREE(m, commands); - } - - *encoded_size = total_out_size; - DestroyHasher(m, hasher); - BROTLI_FREE(m, hasher); - BrotliEncoderCleanupParams(m, params); - BROTLI_FREE(m, params); - BrotliBootstrapFree(m, m); - return ok; - -oom: - BrotliWipeOutMemoryManager(m); - BrotliBootstrapFree(m, m); - return BROTLI_FALSE; -} - size_t BrotliEncoderMaxCompressedSize(size_t input_size) { /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */ size_t num_large_blocks = input_size >> 14; @@ -1539,17 +1262,6 @@ BROTLI_BOOL BrotliEncoderCompress( *encoded_buffer = 6; return BROTLI_TRUE; } - if (quality == 10) { - /* TODO(eustas): Implement this direct path for all quality levels. */ - const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS, - BROTLI_MAX(int, 16, lgwin)); - int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer, - encoded_size, encoded_buffer); - if (!ok || (max_out_size && *encoded_size > max_out_size)) { - goto fallback; - } - return BROTLI_TRUE; - } s = BrotliEncoderCreateInstance(0, 0, 0); if (!s) { @@ -1561,6 +1273,7 @@ BROTLI_BOOL BrotliEncoderCompress( uint8_t* next_out = encoded_buffer; size_t total_out = 0; BROTLI_BOOL result = BROTLI_FALSE; + /* TODO(eustas): check that parameters are sane. */ BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality); BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin); BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode); @@ -1612,6 +1325,18 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) { s->available_out_ += (seal_bits + 7) >> 3; } +/* Fills the |total_out|, if it is not NULL. */ +static void SetTotalOut(BrotliEncoderState* s, size_t* total_out) { + if (total_out) { + /* Saturating conversion uint64_t -> size_t */ + size_t result = (size_t)-1; + if (s->total_out_ < result) { + result = (size_t)s->total_out_; + } + *total_out = result; + } +} + /* Injects padding bits or pushes compressed data to output. Returns false if nothing is done. */ static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s, @@ -1631,7 +1356,7 @@ static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s, s->next_out_ += copy_output_size; s->available_out_ -= copy_output_size; s->total_out_ += copy_output_size; - if (total_out) *total_out = s->total_out_; + SetTotalOut(s, total_out); return BROTLI_TRUE; } @@ -1740,6 +1465,7 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast( if (block_size != 0) { *next_in += block_size; *available_in -= block_size; + s->total_in_ += block_size; } if (inplace) { size_t out_bytes = storage_ix >> 3; @@ -1748,7 +1474,7 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast( *next_out += out_bytes; *available_out -= out_bytes; s->total_out_ += out_bytes; - if (total_out) *total_out = s->total_out_; + SetTotalOut(s, total_out); } else { size_t out_bytes = storage_ix >> 3; s->next_out_ = storage; @@ -1817,6 +1543,7 @@ static BROTLI_BOOL ProcessMetadata( memcpy(*next_out, *next_in, copy); *next_in += copy; *available_in -= copy; + s->total_in_ += copy; /* not actually data input, though */ s->remaining_metadata_bytes_ -= copy; *next_out += copy; *available_out -= copy; @@ -1827,6 +1554,7 @@ static BROTLI_BOOL ProcessMetadata( memcpy(s->next_out_, *next_in, copy); *next_in += copy; *available_in -= copy; + s->total_in_ += copy; /* not actually data input, though */ s->remaining_metadata_bytes_ -= copy; s->available_out_ = copy; } @@ -1854,7 +1582,7 @@ static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) { BROTLI_BOOL BrotliEncoderCompressStream( BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in, - const uint8_t** next_in, size_t* available_out,uint8_t** next_out, + const uint8_t** next_in, size_t* available_out, uint8_t** next_out, size_t* total_out) { if (!EnsureInitialized(s)) return BROTLI_FALSE; @@ -1896,6 +1624,7 @@ BROTLI_BOOL BrotliEncoderCompressStream( CopyInputToRingBuffer(s, copy_input_size, *next_in); *next_in += copy_input_size; *available_in -= copy_input_size; + s->total_in_ += copy_input_size; if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size); continue; } @@ -2021,7 +1750,7 @@ void BrotliEncoderDestroyPreparedDictionary( } if (dict->dictionary == NULL) { /* This should never ever happen. */ - } else if (*dict->dictionary == kPreparedDictionaryMagic) { + } else if (*dict->dictionary == kLeanPreparedDictionaryMagic) { DestroyPreparedDictionary( &dict->memory_manager_, (PreparedDictionary*)dict->dictionary); } else if (*dict->dictionary == kSharedDictionaryMagic) { @@ -2029,7 +1758,8 @@ void BrotliEncoderDestroyPreparedDictionary( (SharedEncoderDictionary*)dict->dictionary); BrotliFree(&dict->memory_manager_, dict->dictionary); } else { - /* This should never ever happen. */ + /* There is also kPreparedDictionaryMagic, but such instances should be + * constructed and destroyed by different means. */ } dict->dictionary = NULL; BrotliDestroyManagedDictionary(dict); @@ -2048,7 +1778,8 @@ BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(BrotliEncoderState* state, dict = (BrotliEncoderPreparedDictionary*)managed_dictionary->dictionary; } current = &state->params.dictionary; - if (magic == kPreparedDictionaryMagic) { + if (magic == kPreparedDictionaryMagic || + magic == kLeanPreparedDictionaryMagic) { const PreparedDictionary* prepared = (const PreparedDictionary*)dict; if (!AttachPreparedDictionary(¤t->compound, prepared)) { return BROTLI_FALSE; @@ -2098,6 +1829,7 @@ size_t BrotliEncoderEstimatePeakMemoryUsage(int quality, int lgwin, params.quality = quality; params.lgwin = lgwin; params.size_hint = input_size; + params.large_window = lgwin > BROTLI_MAX_WINDOW_BITS; SanitizeParams(¶ms); params.lgblock = ComputeLgBlock(¶ms); ChooseHasher(¶ms, ¶ms.hasher); @@ -2176,7 +1908,15 @@ size_t BrotliEncoderGetPreparedDictionarySize( return sizeof(PreparedDictionary) + dictionary->source_size + (sizeof(uint32_t) << dictionary->slot_bits) + (sizeof(uint16_t) << dictionary->bucket_bits) + - (sizeof(uint32_t) * dictionary->source_offset) + overhead; + (sizeof(uint32_t) * dictionary->num_items) + overhead; + } else if (magic == kLeanPreparedDictionaryMagic) { + const PreparedDictionary* dictionary = + (const PreparedDictionary*)prepared; + /* Keep in sync with step 3 of CreatePreparedDictionary */ + return sizeof(PreparedDictionary) + sizeof(uint8_t*) + + (sizeof(uint32_t) << dictionary->slot_bits) + + (sizeof(uint16_t) << dictionary->bucket_bits) + + (sizeof(uint32_t) * dictionary->num_items) + overhead; } else if (magic == kSharedDictionaryMagic) { const SharedEncoderDictionary* dictionary = (const SharedEncoderDictionary*)prepared; diff --git a/brotli/enc/encoder_dict.h b/brotli/enc/encoder_dict.h index b5b591d3..b291f98b 100644 --- a/brotli/enc/encoder_dict.h +++ b/brotli/enc/encoder_dict.h @@ -7,10 +7,11 @@ #ifndef BROTLI_ENC_ENCODER_DICT_H_ #define BROTLI_ENC_ENCODER_DICT_H_ -#include "../common/dictionary.h" -#include "../common/platform.h" #include #include + +#include "../common/dictionary.h" +#include "../common/platform.h" #include "compound_dictionary.h" #include "memory.h" #include "static_dict_lut.h" @@ -103,9 +104,6 @@ typedef struct ContextualEncoderDictionary { BrotliEncoderDictionary* instances_; } ContextualEncoderDictionary; -static const uint32_t kSharedDictionaryMagic = 0xDEBCEDE1; -static const uint32_t kManagedDictionaryMagic = 0xDEBCEDE2; - typedef struct SharedEncoderDictionary { /* Magic value to distinguish this struct from PreparedDictionary for certain external usages. */ diff --git a/brotli/enc/entropy_encode.c b/brotli/enc/entropy_encode.c index b2dcbbdb..9aed43b6 100644 --- a/brotli/enc/entropy_encode.c +++ b/brotli/enc/entropy_encode.c @@ -10,9 +10,10 @@ #include /* memset */ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #if defined(__cplusplus) || defined(c_plusplus) extern "C" { diff --git a/brotli/enc/entropy_encode.h b/brotli/enc/entropy_encode.h index 9618e1d3..e1c779cc 100644 --- a/brotli/enc/entropy_encode.h +++ b/brotli/enc/entropy_encode.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_ENTROPY_ENCODE_H_ #define BROTLI_ENC_ENTROPY_ENCODE_H_ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/brotli/enc/entropy_encode_static.h b/brotli/enc/entropy_encode_static.h index 2be1c6d7..ecff1fe9 100644 --- a/brotli/enc/entropy_encode_static.h +++ b/brotli/enc/entropy_encode_static.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ #define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "write_bits.h" #if defined(__cplusplus) || defined(c_plusplus) diff --git a/brotli/enc/fast_log.h b/brotli/enc/fast_log.h index 2094f13e..f82f4cff 100644 --- a/brotli/enc/fast_log.h +++ b/brotli/enc/fast_log.h @@ -11,9 +11,10 @@ #include -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/brotli/enc/find_match_length.h b/brotli/enc/find_match_length.h index f8853a70..f3de0bdb 100644 --- a/brotli/enc/find_match_length.h +++ b/brotli/enc/find_match_length.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_ #define BROTLI_ENC_FIND_MATCH_LENGTH_H_ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif @@ -21,31 +22,23 @@ extern "C" { static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1, const uint8_t* s2, size_t limit) { - size_t matched = 0; - size_t limit2 = (limit >> 3) + 1; /* + 1 is for pre-decrement in while */ - while (BROTLI_PREDICT_TRUE(--limit2)) { - if (BROTLI_PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64LE(s2) == - BROTLI_UNALIGNED_LOAD64LE(s1 + matched))) { - s2 += 8; - matched += 8; - } else { - uint64_t x = BROTLI_UNALIGNED_LOAD64LE(s2) ^ - BROTLI_UNALIGNED_LOAD64LE(s1 + matched); + const uint8_t *s1_orig = s1; + for (; limit >= 8; limit -= 8) { + uint64_t x = BROTLI_UNALIGNED_LOAD64LE(s2) ^ + BROTLI_UNALIGNED_LOAD64LE(s1); + s2 += 8; + if (x != 0) { size_t matching_bits = (size_t)BROTLI_TZCNT64(x); - matched += matching_bits >> 3; - return matched; + return (size_t)(s1 - s1_orig) + (matching_bits >> 3); } + s1 += 8; } - limit = (limit & 7) + 1; /* + 1 is for pre-decrement in while */ - while (--limit) { - if (BROTLI_PREDICT_TRUE(s1[matched] == *s2)) { - ++s2; - ++matched; - } else { - return matched; - } + while (limit && *s1 == *s2) { + limit--; + ++s2; + ++s1; } - return matched; + return (size_t)(s1 - s1_orig); } #else static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1, diff --git a/brotli/enc/hash.h b/brotli/enc/hash.h index 9ead9e60..fc6e3340 100644 --- a/brotli/enc/hash.h +++ b/brotli/enc/hash.h @@ -13,10 +13,12 @@ #include /* exit */ #include /* memcmp, memset */ +#include + #include "../common/constants.h" #include "../common/dictionary.h" #include "../common/platform.h" -#include +#include "compound_dictionary.h" #include "encoder_dict.h" #include "fast_log.h" #include "find_match_length.h" @@ -511,7 +513,6 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch( const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t distance_offset, const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) { - const uint32_t source_offset = self->source_offset; const uint32_t source_size = self->source_size; const size_t boundary = distance_offset - source_size; const uint32_t hash_bits = self->hash_bits; @@ -525,7 +526,7 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch( const uint32_t* slot_offsets = (uint32_t*)(&self[1]); const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]); const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]); - const uint8_t* source = (uint8_t*)(&items[source_offset]); + const uint8_t* source = NULL; const size_t cur_ix_masked = cur_ix & ring_buffer_mask; score_t best_score = out->score; @@ -539,6 +540,15 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch( const uint32_t head = heads[key]; const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head]; uint32_t item = (head == 0xFFFF) ? 1 : 0; + + const void* tail = (void*)&items[self->num_items]; + if (self->magic == kPreparedDictionaryMagic) { + source = (const uint8_t*)tail; + } else { + /* kLeanPreparedDictionaryMagic */ + source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail); + } + for (i = 0; i < 4; ++i) { const size_t distance = (size_t)distance_cache[i]; size_t offset; @@ -608,7 +618,6 @@ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches( const size_t ring_buffer_mask, const size_t cur_ix, const size_t min_length, const size_t max_length, const size_t distance_offset, const size_t max_distance, BackwardMatch* matches, size_t match_limit) { - const uint32_t source_offset = self->source_offset; const uint32_t source_size = self->source_size; const uint32_t hash_bits = self->hash_bits; const uint32_t bucket_bits = self->bucket_bits; @@ -621,7 +630,7 @@ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches( const uint32_t* slot_offsets = (uint32_t*)(&self[1]); const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]); const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]); - const uint8_t* source = (uint8_t*)(&items[source_offset]); + const uint8_t* source = NULL; const size_t cur_ix_masked = cur_ix & ring_buffer_mask; size_t best_len = min_length; @@ -634,6 +643,15 @@ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches( const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head]; uint32_t item = (head == 0xFFFF) ? 1 : 0; size_t found = 0; + + const void* tail = (void*)&items[self->num_items]; + if (self->magic == kPreparedDictionaryMagic) { + source = (const uint8_t*)tail; + } else { + /* kLeanPreparedDictionaryMagic */ + source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail); + } + while (item == 0) { size_t offset; size_t distance; diff --git a/brotli/enc/histogram.h b/brotli/enc/histogram.h index b213a8bc..d1abd973 100644 --- a/brotli/enc/histogram.h +++ b/brotli/enc/histogram.h @@ -11,10 +11,11 @@ #include /* memset */ +#include + #include "../common/constants.h" #include "../common/context.h" #include "../common/platform.h" -#include #include "block_splitter.h" #include "command.h" diff --git a/brotli/enc/literal_cost.c b/brotli/enc/literal_cost.c index 4e5068ec..2ac847f3 100644 --- a/brotli/enc/literal_cost.c +++ b/brotli/enc/literal_cost.c @@ -11,8 +11,9 @@ #include /* memset */ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "fast_log.h" #include "utf8_util.h" diff --git a/brotli/enc/literal_cost.h b/brotli/enc/literal_cost.h index efc8e178..284a8e5a 100644 --- a/brotli/enc/literal_cost.h +++ b/brotli/enc/literal_cost.h @@ -10,9 +10,10 @@ #ifndef BROTLI_ENC_LITERAL_COST_H_ #define BROTLI_ENC_LITERAL_COST_H_ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/brotli/enc/memory.c b/brotli/enc/memory.c index f3afebcf..51e1b7f1 100644 --- a/brotli/enc/memory.c +++ b/brotli/enc/memory.c @@ -12,9 +12,10 @@ #include /* exit, free, malloc */ #include /* memcpy */ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/brotli/enc/memory.h b/brotli/enc/memory.h index 13b23d4a..cbe4e309 100644 --- a/brotli/enc/memory.h +++ b/brotli/enc/memory.h @@ -11,9 +11,10 @@ #include /* memcpy */ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/brotli/enc/metablock.c b/brotli/enc/metablock.c index 47b577b2..0c5c078d 100644 --- a/brotli/enc/metablock.c +++ b/brotli/enc/metablock.c @@ -9,10 +9,11 @@ #include "metablock.h" +#include + #include "../common/constants.h" #include "../common/context.h" #include "../common/platform.h" -#include #include "bit_cost.h" #include "block_splitter.h" #include "cluster.h" diff --git a/brotli/enc/metablock.h b/brotli/enc/metablock.h index 50bd2942..db38f8fd 100644 --- a/brotli/enc/metablock.h +++ b/brotli/enc/metablock.h @@ -10,9 +10,10 @@ #ifndef BROTLI_ENC_METABLOCK_H_ #define BROTLI_ENC_METABLOCK_H_ +#include + #include "../common/context.h" #include "../common/platform.h" -#include #include "block_splitter.h" #include "command.h" #include "histogram.h" diff --git a/brotli/enc/params.h b/brotli/enc/params.h index cc742795..baeb3196 100644 --- a/brotli/enc/params.h +++ b/brotli/enc/params.h @@ -10,6 +10,7 @@ #define BROTLI_ENC_PARAMS_H_ #include + #include "encoder_dict.h" typedef struct BrotliHasherParams { diff --git a/brotli/enc/prefix.h b/brotli/enc/prefix.h index b58d50b7..0f006f16 100644 --- a/brotli/enc/prefix.h +++ b/brotli/enc/prefix.h @@ -10,9 +10,10 @@ #ifndef BROTLI_ENC_PREFIX_H_ #define BROTLI_ENC_PREFIX_H_ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "fast_log.h" #if defined(__cplusplus) || defined(c_plusplus) diff --git a/brotli/enc/quality.h b/brotli/enc/quality.h index 392ab004..99891b47 100644 --- a/brotli/enc/quality.h +++ b/brotli/enc/quality.h @@ -10,8 +10,9 @@ #ifndef BROTLI_ENC_QUALITY_H_ #define BROTLI_ENC_QUALITY_H_ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "params.h" #define FAST_ONE_PASS_COMPRESSION_QUALITY 0 diff --git a/brotli/enc/ringbuffer.h b/brotli/enc/ringbuffer.h index 0db88cff..27245b7f 100644 --- a/brotli/enc/ringbuffer.h +++ b/brotli/enc/ringbuffer.h @@ -11,8 +11,9 @@ #include /* memcpy */ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "memory.h" #include "quality.h" diff --git a/brotli/enc/state.h b/brotli/enc/state.h new file mode 100644 index 00000000..cb829877 --- /dev/null +++ b/brotli/enc/state.h @@ -0,0 +1,104 @@ +/* Copyright 2022 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +/* Encoder state. */ + +#ifndef BROTLI_ENC_STATE_H_ +#define BROTLI_ENC_STATE_H_ + +#include + +#include "command.h" +#include "compress_fragment.h" +#include "compress_fragment_two_pass.h" +#include "hash.h" +#include "memory.h" +#include "params.h" +#include "ringbuffer.h" + +typedef enum BrotliEncoderStreamState { + /* Default state. */ + BROTLI_STREAM_PROCESSING = 0, + /* Intermediate state; after next block is emitted, byte-padding should be + performed before getting back to default state. */ + BROTLI_STREAM_FLUSH_REQUESTED = 1, + /* Last metablock was produced; no more input is acceptable. */ + BROTLI_STREAM_FINISHED = 2, + /* Flushing compressed block and writing meta-data block header. */ + BROTLI_STREAM_METADATA_HEAD = 3, + /* Writing metadata block body. */ + BROTLI_STREAM_METADATA_BODY = 4 +} BrotliEncoderStreamState; + +typedef enum BrotliEncoderFlintState { + BROTLI_FLINT_NEEDS_2_BYTES = 2, + BROTLI_FLINT_NEEDS_1_BYTE = 1, + BROTLI_FLINT_WAITING_FOR_PROCESSING = 0, + BROTLI_FLINT_WAITING_FOR_FLUSHING = -1, + BROTLI_FLINT_DONE = -2 +} BrotliEncoderFlintState; + +typedef struct BrotliEncoderStateStruct { + BrotliEncoderParams params; + + MemoryManager memory_manager_; + + uint64_t input_pos_; + RingBuffer ringbuffer_; + size_t cmd_alloc_size_; + Command* commands_; + size_t num_commands_; + size_t num_literals_; + size_t last_insert_len_; + uint64_t last_flush_pos_; + uint64_t last_processed_pos_; + int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES]; + int saved_dist_cache_[4]; + uint16_t last_bytes_; + uint8_t last_bytes_bits_; + /* "Flint" is a tiny uncompressed block emitted before the continuation + block to unwire literal context from previous data. Despite being int8_t, + field is actually BrotliEncoderFlintState enum. */ + int8_t flint_; + uint8_t prev_byte_; + uint8_t prev_byte2_; + size_t storage_size_; + uint8_t* storage_; + + Hasher hasher_; + + /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */ + int small_table_[1 << 10]; /* 4KiB */ + int* large_table_; /* Allocated only when needed */ + size_t large_table_size_; + + BrotliOnePassArena* one_pass_arena_; + BrotliTwoPassArena* two_pass_arena_; + + /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */ + uint32_t* command_buf_; + uint8_t* literal_buf_; + + uint64_t total_in_; + uint8_t* next_out_; + size_t available_out_; + uint64_t total_out_; + /* Temporary buffer for padding flush bits or metadata block header / body. */ + union { + uint64_t u64[2]; + uint8_t u8[16]; + } tiny_buf_; + uint32_t remaining_metadata_bytes_; + BrotliEncoderStreamState stream_state_; + + BROTLI_BOOL is_last_block_emitted_; + BROTLI_BOOL is_initialized_; +} BrotliEncoderStateStruct; + +typedef struct BrotliEncoderStateStruct BrotliEncoderStateInternal; +#define BrotliEncoderState BrotliEncoderStateInternal + +#endif // BROTLI_ENC_STATE_H_ diff --git a/brotli/enc/static_dict.h b/brotli/enc/static_dict.h index f572bc64..ab832207 100644 --- a/brotli/enc/static_dict.h +++ b/brotli/enc/static_dict.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_STATIC_DICT_H_ #define BROTLI_ENC_STATIC_DICT_H_ +#include + #include "../common/dictionary.h" #include "../common/platform.h" -#include #include "encoder_dict.h" #if defined(__cplusplus) || defined(c_plusplus) diff --git a/brotli/enc/utf8_util.h b/brotli/enc/utf8_util.h index 8fda80c2..a38a9538 100644 --- a/brotli/enc/utf8_util.h +++ b/brotli/enc/utf8_util.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_UTF8_UTIL_H_ #define BROTLI_ENC_UTF8_UTIL_H_ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/brotli/enc/write_bits.h b/brotli/enc/write_bits.h index f6f88b45..242754b0 100644 --- a/brotli/enc/write_bits.h +++ b/brotli/enc/write_bits.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_WRITE_BITS_H_ #define BROTLI_ENC_WRITE_BITS_H_ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/brotli/include/brotli/decode.h b/brotli/include/brotli/decode.h index 9b580d22..3c473d61 100644 --- a/brotli/include/brotli/decode.h +++ b/brotli/include/brotli/decode.h @@ -361,6 +361,47 @@ BROTLI_DEC_API const char* BrotliDecoderErrorString(BrotliDecoderErrorCode c); */ BROTLI_DEC_API uint32_t BrotliDecoderVersion(void); +/** + * Callback to fire on metadata block start. + * + * After this callback is fired, if @p size is not @c 0, it is followed by + * ::brotli_decoder_metadata_chunk_func as more metadata block contents become + * accessible. + * + * @param opaque callback handle + * @param size size of metadata block + */ +typedef void (*brotli_decoder_metadata_start_func)(void* opaque, size_t size); + +/** + * Callback to fire on metadata block chunk becomes available. + * + * This function can be invoked multiple times per metadata block; block should + * be considered finished when sum of @p size matches the announced metadata + * block size. Chunks contents pointed by @p data are transient and shouln not + * be accessed after leaving the callback. + * + * @param opaque callback handle + * @param data pointer to metadata contents + * @param size size of metadata block chunk, at least @c 1 + */ +typedef void (*brotli_decoder_metadata_chunk_func)(void* opaque, + const uint8_t* data, + size_t size); + +/** + * Sets callback for receiving metadata blocks. + * + * @param state decoder instance + * @param start_func callback on metadata block start + * @param chunk_func callback on metadata block chunk + * @param opaque callback handle + */ +BROTLI_DEC_API void BrotliDecoderSetMetadataCallbacks( + BrotliDecoderState* state, + brotli_decoder_metadata_start_func start_func, + brotli_decoder_metadata_chunk_func chunk_func, void* opaque); + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif diff --git a/brotli/include/brotli/encode.h b/brotli/include/brotli/encode.h index b2c6f61e..7247d3d6 100644 --- a/brotli/include/brotli/encode.h +++ b/brotli/include/brotli/encode.h @@ -453,7 +453,7 @@ BROTLI_ENC_API BROTLI_BOOL BrotliEncoderHasMoreOutput( * * This method is used to make language bindings easier and more efficient: * -# push data to ::BrotliEncoderCompressStream, - * until ::BrotliEncoderHasMoreOutput returns BROTL_TRUE + * until ::BrotliEncoderHasMoreOutput returns BROTLI_TRUE * -# use ::BrotliEncoderTakeOutput to peek bytes and copy to language-specific * entity * diff --git a/brotli/include/brotli/port.h b/brotli/include/brotli/port.h index a681ac48..0d500190 100644 --- a/brotli/include/brotli/port.h +++ b/brotli/include/brotli/port.h @@ -224,14 +224,6 @@ #define BROTLI_HAS_FEATURE(feature) (0) #endif -#if defined(ADDRESS_SANITIZER) || BROTLI_HAS_FEATURE(address_sanitizer) || \ - defined(THREAD_SANITIZER) || BROTLI_HAS_FEATURE(thread_sanitizer) || \ - defined(MEMORY_SANITIZER) || BROTLI_HAS_FEATURE(memory_sanitizer) -#define BROTLI_SANITIZED 1 -#else -#define BROTLI_SANITIZED 0 -#endif - #if defined(_WIN32) || defined(__CYGWIN__) #define BROTLI_PUBLIC #elif BROTLI_GNUC_VERSION_CHECK(3, 3, 0) || \ diff --git a/brotli/tools/brotli.c b/brotli/tools/brotli.c index 0ea45d31..102a87a7 100644 --- a/brotli/tools/brotli.c +++ b/brotli/tools/brotli.c @@ -20,16 +20,13 @@ #include #include -#include "../common/constants.h" -#include "../common/version.h" #include #include -#if !defined(_WIN32) -#include -#include -#define MAKE_BINARY(FILENO) (FILENO) -#else +#include "../common/constants.h" +#include "../common/version.h" + +#if defined(_WIN32) #include #include #include @@ -71,7 +68,23 @@ static int ms_open(const char* filename, int oflag, int pmode) { _sopen_s(&result, filename, oflag | O_BINARY, _SH_DENYNO, pmode); return result; } -#endif /* WIN32 */ +#else /* !defined(_WIN32) */ +#include +#include +#define MAKE_BINARY(FILENO) (FILENO) +#endif /* defined(_WIN32) */ + +#if defined(__APPLE__) && !defined(_POSIX_C_SOURCE) +#define HAVE_UTIMENSAT 1 +#define ATIME_NSEC(S) ((S)->st_atimespec.tv_nsec) +#define MTIME_NSEC(S) ((S)->st_mtimespec.tv_nsec) +#elif defined(_WIN32) || !defined(AT_SYMLINK_NOFOLLOW) +#define HAVE_UTIMENSAT 0 +#else +#define HAVE_UTIMENSAT 1 +#define ATIME_NSEC(S) ((S)->st_atim.tv_nsec) +#define MTIME_NSEC(S) ((S)->st_mtim.tv_nsec) +#endif typedef enum { COMMAND_COMPRESS, @@ -663,12 +676,27 @@ static int64_t FileSize(const char* path) { return retval; } +static int CopyTimeStat(const struct stat* statbuf, const char* output_path) { +#if HAVE_UTIMENSAT + struct timespec times[2]; + times[0].tv_sec = statbuf->st_atime; + times[0].tv_nsec = ATIME_NSEC(statbuf); + times[1].tv_sec = statbuf->st_mtime; + times[1].tv_nsec = MTIME_NSEC(statbuf); + return utimensat(AT_FDCWD, output_path, times, AT_SYMLINK_NOFOLLOW); +#else + struct utimbuf times; + times.actime = statbuf->st_atime; + times.modtime = statbuf->st_mtime; + return utime(output_path, ×); +#endif +} + /* Copy file times and permissions. TODO(eustas): this is a "best effort" implementation; honest cross-platform fully featured implementation is way too hacky; add more hacks by request. */ static void CopyStat(const char* input_path, const char* output_path) { struct stat statbuf; - struct utimbuf times; int res; if (input_path == 0 || output_path == 0) { return; @@ -676,9 +704,7 @@ static void CopyStat(const char* input_path, const char* output_path) { if (stat(input_path, &statbuf) != 0) { return; } - times.actime = statbuf.st_atime; - times.modtime = statbuf.st_mtime; - utime(output_path, ×); + res = CopyTimeStat(&statbuf, output_path); res = chmod(output_path, statbuf.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)); if (res != 0) { fprintf(stderr, "setting access bits failed for [%s]: %s\n", diff --git a/brotli/tools/brotli.md b/brotli/tools/brotli.md index bf3a9118..cb6d6f38 100644 --- a/brotli/tools/brotli.md +++ b/brotli/tools/brotli.md @@ -1,15 +1,15 @@ +# NAME + brotli(1) -- brotli, unbrotli - compress or decompress files -================================================================ -SYNOPSIS --------- +# SYNOPSIS `brotli` [*OPTION|FILE*]... `unbrotli` is equivalent to `brotli --decompress` -DESCRIPTION ------------ +# DESCRIPTION + `brotli` is a generic-purpose lossless compression algorithm that compresses data using a combination of a modern variant of the **LZ77** algorithm, Huffman coding and 2-nd order context modeling, with a compression ratio comparable to @@ -52,50 +52,48 @@ Default suffix is `.br`, but it could be specified with `--suffix` option. Conflicting or duplicate _options_ are not allowed. -OPTIONS -------- +# OPTIONS * `-#`: - compression level (0-9); bigger values cause denser, but slower compression + compression level (0-9); bigger values cause denser, but slower compression * `-c`, `--stdout`: - write on standard output + write on standard output * `-d`, `--decompress`: - decompress mode + decompress mode * `-f`, `--force`: - force output file overwrite + force output file overwrite * `-h`, `--help`: - display this help and exit + display this help and exit * `-j`, `--rm`: - remove source file(s); `gzip (1)`-like behaviour + remove source file(s); `gzip (1)`-like behaviour * `-k`, `--keep`: - keep source file(s); `zstd (1)`-like behaviour + keep source file(s); `zstd (1)`-like behaviour * `-n`, `--no-copy-stat`: - do not copy source file(s) attributes + do not copy source file(s) attributes * `-o FILE`, `--output=FILE` - output file; valid only if there is a single input entry + output file; valid only if there is a single input entry * `-q NUM`, `--quality=NUM`: - compression level (0-11); bigger values cause denser, but slower compression + compression level (0-11); bigger values cause denser, but slower compression * `-t`, `--test`: - test file integrity mode + test file integrity mode * `-v`, `--verbose`: - increase output verbosity + increase output verbosity * `-w NUM`, `--lgwin=NUM`: - set LZ77 window size (0, 10-24) (default: 24); window size is - `(2**NUM - 16)`; 0 lets compressor decide over the optimal value; bigger - windows size improve density; decoder might require up to window size - memory to operate + set LZ77 window size (0, 10-24) (default: 24); window size is + `(pow(2, NUM) - 16)`; 0 lets compressor decide over the optimal value; + bigger windows size improve density; decoder might require up to window size + memory to operate * `-D FILE`, `--dictionary=FILE`: - use FILE as raw (LZ77) dictionary; same dictionary MUST be used both for - compression and decompression + use FILE as raw (LZ77) dictionary; same dictionary MUST be used both for + compression and decompression * `-S SUF`, `--suffix=SUF`: - output file suffix (default: `.br`) + output file suffix (default: `.br`) * `-V`, `--version`: - display version and exit + display version and exit * `-Z`, `--best`: - use best compression level (default); same as "`-q 11`" + use best compression level (default); same as "`-q 11`" -SEE ALSO --------- +# SEE ALSO `brotli` file format is defined in [RFC 7932](https://www.ietf.org/rfc/rfc7932.txt). @@ -105,6 +103,6 @@ SEE ALSO Mailing list: https://groups.google.com/forum/#!forum/brotli -BUGS ----- +# BUGS + Report bugs at: https://github.com/google/brotli/issues diff --git a/brotli4j/src/main/java/com/aayushatharva/brotli4j/common/annotations/Local.java b/brotli4j/src/main/java/com/aayushatharva/brotli4j/common/annotations/Local.java index 44bd77ca..3efb3479 100644 --- a/brotli4j/src/main/java/com/aayushatharva/brotli4j/common/annotations/Local.java +++ b/brotli4j/src/main/java/com/aayushatharva/brotli4j/common/annotations/Local.java @@ -26,7 +26,7 @@ * represents code which created locally and not in sync with * Google Brotli upstream repository. */ -@Target({ElementType.TYPE, ElementType.METHOD}) +@Target({ElementType.FIELD, ElementType.TYPE, ElementType.METHOD}) @Retention(RetentionPolicy.SOURCE) public @interface Local { } diff --git a/brotli4j/src/main/java/com/aayushatharva/brotli4j/decoder/Decoder.java b/brotli4j/src/main/java/com/aayushatharva/brotli4j/decoder/Decoder.java index 9eef5915..abcce646 100644 --- a/brotli4j/src/main/java/com/aayushatharva/brotli4j/decoder/Decoder.java +++ b/brotli4j/src/main/java/com/aayushatharva/brotli4j/decoder/Decoder.java @@ -103,6 +103,7 @@ public static DirectDecompress decompress(byte[] data) throws IOException { return new DirectDecompress(decoder.getStatus(), result, null); } + @Upstream private void fail(String message) throws IOException { try { close(); @@ -112,12 +113,14 @@ private void fail(String message) throws IOException { throw new IOException(message); } + @Upstream void attachDictionary(ByteBuffer dictionary) throws IOException { if (!decoder.attachDictionary(dictionary)) { fail("failed to attach dictionary"); } } + @Upstream public void enableEagerOutput() { this.eager = true; } @@ -127,6 +130,7 @@ public void enableEagerOutput() { * * @return -1 if stream is finished, or number of bytes available in read buffer (> 0) */ + @Upstream int decode() throws IOException { while (true) { if (buffer != null) { @@ -175,6 +179,7 @@ int decode() throws IOException { } } + @Upstream void discard(int length) { ((Buffer) buffer).position(buffer.position() + length); if (!buffer.hasRemaining()) { @@ -182,6 +187,7 @@ void discard(int length) { } } + @Upstream int consume(ByteBuffer dst) { ByteBuffer slice = buffer.slice(); int limit = Math.min(slice.remaining(), dst.remaining()); @@ -191,6 +197,7 @@ int consume(ByteBuffer dst) { return limit; } + @Upstream void close() throws IOException { if (closed) { return; @@ -199,4 +206,55 @@ void close() throws IOException { decoder.destroy(); source.close(); } + + /** Decodes the given data buffer starting at offset till length. */ + @Upstream + public static byte[] decompress(byte[] data, int offset, int length) throws IOException { + DecoderJNI.Wrapper decoder = new DecoderJNI.Wrapper(length); + ArrayList output = new ArrayList<>(); + int totalOutputSize = 0; + try { + decoder.getInputBuffer().put(data, offset, length); + decoder.push(length); + while (decoder.getStatus() != DecoderJNI.Status.DONE) { + switch (decoder.getStatus()) { + case OK: + decoder.push(0); + break; + + case NEEDS_MORE_OUTPUT: + ByteBuffer buffer = decoder.pull(); + byte[] chunk = new byte[buffer.remaining()]; + buffer.get(chunk); + output.add(chunk); + totalOutputSize += chunk.length; + break; + + case NEEDS_MORE_INPUT: + // Give decoder a chance to process the remaining of the buffered byte. + decoder.push(0); + // If decoder still needs input, this means that stream is truncated. + if (decoder.getStatus() == DecoderJNI.Status.NEEDS_MORE_INPUT) { + throw new IOException("corrupted input"); + } + break; + + default: + throw new IOException("corrupted input"); + } + } + } finally { + decoder.destroy(); + } + if (output.size() == 1) { + return output.get(0); + } + byte[] result = new byte[totalOutputSize]; + int resultOffset = 0; + for (byte[] chunk : output) { + System.arraycopy(chunk, 0, result, resultOffset, chunk.length); + resultOffset += chunk.length; + } + return result; + } } diff --git a/brotli4j/src/main/java/com/aayushatharva/brotli4j/encoder/Encoder.java b/brotli4j/src/main/java/com/aayushatharva/brotli4j/encoder/Encoder.java index c763bf89..568ab3f0 100644 --- a/brotli4j/src/main/java/com/aayushatharva/brotli4j/encoder/Encoder.java +++ b/brotli4j/src/main/java/com/aayushatharva/brotli4j/encoder/Encoder.java @@ -21,12 +21,12 @@ @Upstream @Local public class Encoder { - final ByteBuffer inputBuffer; private final WritableByteChannel destination; private final List dictionaries; private final EncoderJNI.Wrapper encoder; - boolean closed; private ByteBuffer buffer; + final ByteBuffer inputBuffer; + boolean closed; /** * Creates a Encoder wrapper. @@ -35,11 +35,11 @@ public class Encoder { * @param params encoding parameters * @param inputBufferSize read buffer size */ - Encoder(WritableByteChannel destination, Parameters params, int inputBufferSize) - throws IOException { + Encoder(WritableByteChannel destination, Parameters params, int inputBufferSize) throws IOException { if (inputBufferSize <= 0) { throw new IllegalArgumentException("buffer size must be positive"); } + if (destination == null) { throw new NullPointerException("destination can not be null"); } @@ -49,7 +49,7 @@ public class Encoder { this.inputBuffer = this.encoder.getInputBuffer(); } - /** + /* * Encodes the given data buffer. * * @param data byte array to be compressed @@ -57,18 +57,19 @@ public class Encoder { * @return compressed byte array * @throws IOException If any failure during encoding */ - public static byte[] compress(byte[] data, Parameters params) throws IOException { - if (data.length == 0) { + @Upstream + public static byte[] compress(byte[] data, int offset, int length, Parameters params) throws IOException { + if (length == 0) { byte[] empty = new byte[1]; empty[0] = 6; return empty; } /* data.length > 0 */ - EncoderJNI.Wrapper encoder = new EncoderJNI.Wrapper(data.length, params.quality, params.lgwin, params.mode); + EncoderJNI.Wrapper encoder = new EncoderJNI.Wrapper(length, params.quality, params.lgwin, params.mode); ArrayList output = new ArrayList<>(); int totalOutputSize = 0; try { - encoder.getInputBuffer().put(data); + encoder.getInputBuffer().put(data, offset, length); encoder.push(EncoderJNI.Operation.FINISH, data.length); while (true) { if (!encoder.isSuccess()) { @@ -92,10 +93,10 @@ public static byte[] compress(byte[] data, Parameters params) throws IOException return output.get(0); } byte[] result = new byte[totalOutputSize]; - int offset = 0; + int resultOffset = 0; for (byte[] chunk : output) { - System.arraycopy(chunk, 0, result, offset, chunk.length); - offset += chunk.length; + System.arraycopy(chunk, 0, result, resultOffset, chunk.length); + resultOffset += chunk.length; } return result; } @@ -105,6 +106,17 @@ public static byte[] compress(byte[] data) throws IOException { return compress(data, Parameters.DEFAULT); } + @Upstream + /* Encodes the given data buffer. */ + public static byte[] compress(byte[] data, Parameters params) throws IOException { + return compress(data, 0, data.length, params); + } + + @Upstream + public static byte[] compress(byte[] data, int offset, int length) throws IOException { + return compress(data, offset, length, new Parameters()); + } + /** * Prepares raw or serialized dictionary for being used by encoder. * @@ -112,11 +124,12 @@ public static byte[] compress(byte[] data) throws IOException { * @param sharedDictionaryType dictionary data type * @return {@link PreparedDictionary} instance */ - public static PreparedDictionary prepareDictionary(ByteBuffer dictionary, - int sharedDictionaryType) { + @Upstream + public static PreparedDictionary prepareDictionary(ByteBuffer dictionary, int sharedDictionaryType) { return EncoderJNI.prepareDictionary(dictionary, sharedDictionaryType); } + @Upstream private void fail(String message) throws IOException { try { close(); @@ -126,6 +139,7 @@ private void fail(String message) throws IOException { throw new IOException(message); } + @Upstream public void attachDictionary(PreparedDictionary dictionary) throws IOException { if (!encoder.attachDictionary(dictionary.getData())) { fail("failed to attach dictionary"); @@ -138,6 +152,7 @@ public void attachDictionary(PreparedDictionary dictionary) throws IOException { * @param force repeat pushing until all output is consumed * @return true if all encoder output is consumed */ + @Upstream boolean pushOutput(boolean force) throws IOException { while (buffer != null) { if (buffer.hasRemaining()) { @@ -155,6 +170,7 @@ boolean pushOutput(boolean force) throws IOException { /** * @return true if there is space in inputBuffer. */ + @Upstream boolean encode(EncoderJNI.Operation op) throws IOException { boolean force = (op != EncoderJNI.Operation.PROCESS); if (force) { @@ -182,10 +198,12 @@ boolean encode(EncoderJNI.Operation op) throws IOException { } } + @Upstream void flush() throws IOException { encode(EncoderJNI.Operation.FLUSH); } + @Upstream void close() throws IOException { if (closed) { return; @@ -229,13 +247,13 @@ public static Mode of(int value) { } } - /** * Brotli encoder settings. */ @Upstream @Local public static final class Parameters { + @Local public static final Parameters DEFAULT = new Parameters(); private int quality = -1; diff --git a/brotli4j/src/main/java/com/aayushatharva/brotli4j/encoder/EncoderJNI.java b/brotli4j/src/main/java/com/aayushatharva/brotli4j/encoder/EncoderJNI.java index 1aca5329..e282a814 100644 --- a/brotli4j/src/main/java/com/aayushatharva/brotli4j/encoder/EncoderJNI.java +++ b/brotli4j/src/main/java/com/aayushatharva/brotli4j/encoder/EncoderJNI.java @@ -16,17 +16,11 @@ @Upstream class EncoderJNI { private static native ByteBuffer nativeCreate(long[] context); - private static native void nativePush(long[] context, int length); - private static native ByteBuffer nativePull(long[] context); - private static native void nativeDestroy(long[] context); - private static native boolean nativeAttachDictionary(long[] context, ByteBuffer dictionary); - private static native ByteBuffer nativePrepareDictionary(ByteBuffer dictionary, long type); - private static native void nativeDestroyDictionary(ByteBuffer dictionary); enum Operation { @@ -37,8 +31,10 @@ enum Operation { private static class PreparedDictionaryImpl implements PreparedDictionary { private ByteBuffer data; + /** Reference to (non-copied) LZ data. */ + private ByteBuffer rawData; - private PreparedDictionaryImpl(ByteBuffer data) { + private PreparedDictionaryImpl(ByteBuffer data, ByteBuffer rawData) { this.data = data; } @@ -52,6 +48,7 @@ protected void finalize() throws Throwable { try { ByteBuffer data = this.data; this.data = null; + this.rawData = null; nativeDestroyDictionary(data); } finally { super.finalize(); @@ -62,7 +59,7 @@ protected void finalize() throws Throwable { /** * Prepares raw or serialized dictionary for being used by encoder. * - * @param dictionary raw / serialized dictionary data; MUST be direct + * @param dictionary raw / serialized dictionary data; MUST be direct * @param sharedDictionaryType dictionary data type */ static PreparedDictionary prepareDictionary(ByteBuffer dictionary, int sharedDictionaryType) { @@ -73,7 +70,7 @@ static PreparedDictionary prepareDictionary(ByteBuffer dictionary, int sharedDic if (dictionaryData == null) { throw new IllegalStateException("OOM"); } - return new PreparedDictionaryImpl(dictionaryData); + return new PreparedDictionaryImpl(dictionaryData, dictionary); } static class Wrapper { @@ -176,7 +173,7 @@ void destroy() { @Override protected void finalize() throws Throwable { if (context[0] != 0) { - /* TODO: log resource leak? */ + /* TODO(eustas): log resource leak? */ destroy(); } super.finalize(); diff --git a/natives/src/main/cpp/decoder_jni.cc b/natives/src/main/cpp/decoder_jni.cc index 366b4cce..fc92a3f0 100644 --- a/natives/src/main/cpp/decoder_jni.cc +++ b/natives/src/main/cpp/decoder_jni.cc @@ -4,10 +4,12 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -#include -#include +#include "decoder_jni.h" // NOLINT: build/include + #include +#include + namespace { /* A structure used to persist the decoder's state in between calls. */ typedef struct DecoderHandle { diff --git a/natives/src/main/cpp/decoder_jni.h b/natives/src/main/cpp/decoder_jni.h new file mode 100644 index 00000000..2d18fcbe --- /dev/null +++ b/natives/src/main/cpp/decoder_jni.h @@ -0,0 +1,75 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +#ifndef BROTLI_WRAPPER_DEC_DECODER_JNI_H_ +#define BROTLI_WRAPPER_DEC_DECODER_JNI_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Creates a new Decoder. + * + * Cookie to address created decoder is stored in out_cookie. In case of failure + * cookie is 0. + * + * @param ctx {out_cookie, in_directBufferSize} tuple + * @returns direct ByteBuffer if directBufferSize is not 0; otherwise null + */ +JNIEXPORT jobject JNICALL +Java_com_aayushatharva_brotli4j_decoder_DecoderJNI_nativeCreate( + JNIEnv* env, jobject /*jobj*/, jlongArray ctx); + +/** + * Push data to decoder. + * + * status codes: + * - 0 error happened + * - 1 stream is finished, no more input / output expected + * - 2 needs more input to process further + * - 3 needs more output to process further + * - 4 ok, can proceed further without additional input + * + * @param ctx {in_cookie, out_status} tuple + * @param input_length number of bytes provided in input or direct input; + * 0 to process further previous input + */ +JNIEXPORT void JNICALL +Java_com_aayushatharva_brotli4j_decoder_DecoderJNI_nativePush( + JNIEnv* env, jobject /*jobj*/, jlongArray ctx, jint input_length); + +/** + * Pull decompressed data from decoder. + * + * @param ctx {in_cookie, out_status} tuple + * @returns direct ByteBuffer; all the produced data MUST be consumed before + * any further invocation; null in case of error + */ +JNIEXPORT jobject JNICALL +Java_com_aayushatharva_brotli4j_decoder_DecoderJNI_nativePull( + JNIEnv* env, jobject /*jobj*/, jlongArray ctx); + +/** + * Releases all used resources. + * + * @param ctx {in_cookie} tuple + */ +JNIEXPORT void JNICALL +Java_com_aayushatharva_brotli4j_decoder_DecoderJNI_nativeDestroy( + JNIEnv* env, jobject /*jobj*/, jlongArray ctx); + +JNIEXPORT jboolean JNICALL +Java_com_aayushatharva_brotli4j_decoder_DecoderJNI_nativeAttachDictionary( + JNIEnv* env, jobject /*jobj*/, jlongArray ctx, jobject dictionary); + +#ifdef __cplusplus +} +#endif + +#endif // BROTLI_WRAPPER_DEC_DECODER_JNI_H_ diff --git a/natives/src/main/cpp/encoder_jni.cc b/natives/src/main/cpp/encoder_jni.cc index 38ffd605..28d26e6e 100644 --- a/natives/src/main/cpp/encoder_jni.cc +++ b/natives/src/main/cpp/encoder_jni.cc @@ -4,12 +4,11 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ +#include #include #include -#include - namespace { /* A structure used to persist the encoder's state in between calls. */ typedef struct EncoderHandle {