diff --git a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Unix.targets b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Unix.targets index 4d58c7e4c012dd..60c242703268e9 100644 --- a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Unix.targets +++ b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Unix.targets @@ -34,6 +34,7 @@ The .NET Foundation licenses this file to you under the MIT license. true true + true libRuntime.WorkstationGC libRuntime.ServerGC @@ -188,6 +189,11 @@ The .NET Foundation licenses this file to you under the MIT license. + + + + + @@ -228,6 +234,7 @@ The .NET Foundation licenses this file to you under the MIT license. + diff --git a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets index 25fb0b7fae9205..20f7b1c6a5cceb 100644 --- a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets +++ b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets @@ -51,6 +51,7 @@ The .NET Foundation licenses this file to you under the MIT license. + diff --git a/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props b/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props index 7c752b511514ae..fc220fe6cec72c 100644 --- a/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props +++ b/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props @@ -302,6 +302,9 @@ + + + diff --git a/src/native/external/cgmanifest.json b/src/native/external/cgmanifest.json index 17c52798d922ed..6767e693a6a341 100644 --- a/src/native/external/cgmanifest.json +++ b/src/native/external/cgmanifest.json @@ -2,6 +2,16 @@ "$schema": "https://json.schemastore.org/component-detection-manifest.json", "version": 1, "registrations": [ + { + "component": { + "type": "git", + "git": { + "repositoryUrl": "https://github.com/tukaani-project/xz", + "commitHash": "3d078b52adbff566ccfc51067dfbf742ecf3ef86" + } + }, + "developmentDependency": false + }, { "component": { "type": "git", diff --git a/src/native/external/xz-patch.diff b/src/native/external/xz-patch.diff new file mode 100644 index 00000000000000..f5d1fc94950f24 --- /dev/null +++ b/src/native/external/xz-patch.diff @@ -0,0 +1,404 @@ +diff --git a/src/liblzma/check/crc_x86_clmul.h b/src/liblzma/check/crc_x86_clmul.h +index b302d6cf..35653623 100644 +--- a/src/liblzma/check/crc_x86_clmul.h ++++ b/src/liblzma/check/crc_x86_clmul.h +@@ -344,7 +344,7 @@ is_arch_extension_supported(void) + #if defined(_MSC_VER) + // This needs with MSVC. ICC has it as a built-in + // on all platforms. +- __cpuid(r, 1); ++ __cpuid((int *)r, 1); + #elif defined(HAVE_CPUID_H) + // Compared to just using __asm__ to run CPUID, this also checks + // that CPUID is supported and saves and restores ebx as that is +diff --git a/src/liblzma/common/block_buffer_encoder.c b/src/liblzma/common/block_buffer_encoder.c +index df3b90e8..3963c15c 100644 +--- a/src/liblzma/common/block_buffer_encoder.c ++++ b/src/liblzma/common/block_buffer_encoder.c +@@ -143,7 +143,7 @@ block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size, + // Size of the uncompressed chunk + const size_t copy_size + = my_min(in_size - in_pos, LZMA2_CHUNK_MAX); +- out[(*out_pos)++] = (copy_size - 1) >> 8; ++ out[(*out_pos)++] = (uint8_t)((copy_size - 1) >> 8); + out[(*out_pos)++] = (copy_size - 1) & 0xFF; + + // The actual data +diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c +index 45e57a26..33840d1b 100644 +--- a/src/liblzma/common/block_header_encoder.c ++++ b/src/liblzma/common/block_header_encoder.c +@@ -81,7 +81,7 @@ lzma_block_header_encode(const lzma_block *block, uint8_t *out) + const size_t out_size = block->header_size - 4; + + // Store the Block Header Size. +- out[0] = out_size / 4; ++ out[0] = (uint8_t)(out_size / 4); + + // We write Block Flags in pieces. + out[1] = 0x00; +diff --git a/src/liblzma/delta/delta_encoder.c b/src/liblzma/delta/delta_encoder.c +index ba4a50b1..c3f30273 100644 +--- a/src/liblzma/delta/delta_encoder.c ++++ b/src/liblzma/delta/delta_encoder.c +@@ -126,7 +126,7 @@ lzma_delta_props_encode(const void *options, uint8_t *out) + return LZMA_PROG_ERROR; + + const lzma_options_delta *opt = options; +- out[0] = opt->dist - LZMA_DELTA_DIST_MIN; ++ out[0] = (uint8_t)(opt->dist - LZMA_DELTA_DIST_MIN); + + return LZMA_OK; + } +diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h +index 2698e016..d26963a9 100644 +--- a/src/liblzma/lz/lz_decoder.h ++++ b/src/liblzma/lz/lz_decoder.h +@@ -205,7 +205,7 @@ dict_repeat(lzma_dict *restrict dict, + { + // Don't write past the end of the dictionary. + const size_t dict_avail = dict->limit - dict->pos; +- uint32_t left = my_min(dict_avail, *len); ++ uint32_t left = (uint32_t)my_min(dict_avail, *len); + *len -= left; + + size_t back = dict->pos - distance - 1; +diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c +index e5c4057d..0062882a 100644 +--- a/src/liblzma/lz/lz_encoder.c ++++ b/src/liblzma/lz/lz_encoder.c +@@ -107,7 +107,7 @@ fill_window(lzma_coder *coder, const lzma_allocator *allocator, + coder->mf.size, action); + } + +- coder->mf.write_pos = write_pos; ++ coder->mf.write_pos = (uint32_t)write_pos; + + // Silence Valgrind. lzma_memcmplen() can read extra bytes + // and Valgrind will give warnings if those bytes are uninitialized +@@ -199,10 +199,10 @@ lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, + || lz_options->nice_len > lz_options->match_len_max) + return true; + +- mf->keep_size_before = lz_options->before_size + lz_options->dict_size; ++ mf->keep_size_before = (uint32_t)(lz_options->before_size + lz_options->dict_size); + +- mf->keep_size_after = lz_options->after_size +- + lz_options->match_len_max; ++ mf->keep_size_after = (uint32_t)(lz_options->after_size ++ + lz_options->match_len_max); + + // To avoid constant memmove()s, allocate some extra space. Since + // memmove()s become more expensive when the size of the buffer +@@ -215,12 +215,12 @@ lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, + // to size_t. + // - Memory usage calculation needs something too, e.g. use uint64_t + // for mf->size. +- uint32_t reserve = lz_options->dict_size / 2; ++ uint32_t reserve = (uint32_t)(lz_options->dict_size / 2); + if (reserve > (UINT32_C(1) << 30)) + reserve /= 2; + +- reserve += (lz_options->before_size + lz_options->match_len_max +- + lz_options->after_size) / 2 + (UINT32_C(1) << 19); ++ reserve += (uint32_t)((lz_options->before_size + lz_options->match_len_max ++ + lz_options->after_size) / 2 + (UINT32_C(1) << 19)); + + const uint32_t old_size = mf->size; + mf->size = mf->keep_size_before + reserve + mf->keep_size_after; +@@ -233,8 +233,8 @@ lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, + } + + // Match finder options +- mf->match_len_max = lz_options->match_len_max; +- mf->nice_len = lz_options->nice_len; ++ mf->match_len_max = (uint32_t)lz_options->match_len_max; ++ mf->nice_len = (uint32_t)lz_options->nice_len; + + // cyclic_size has to stay smaller than 2 Gi. Note that this doesn't + // mean limiting dictionary size to less than 2 GiB. With a match +@@ -251,7 +251,7 @@ lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, + // memory to keep the code simpler. The current way is simple and + // still allows pretty big dictionaries, so I don't expect these + // limits to change. +- mf->cyclic_size = lz_options->dict_size + 1; ++ mf->cyclic_size = (uint32_t)(lz_options->dict_size + 1); + + // Validate the match finder ID and setup the function pointers. + switch (lz_options->match_finder) { +@@ -308,7 +308,7 @@ lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, + } else { + // Round dictionary size up to the next 2^n - 1 so it can + // be used as a hash mask. +- hs = lz_options->dict_size - 1; ++ hs = (uint32_t)(lz_options->dict_size - 1); + hs |= hs >> 1; + hs |= hs >> 2; + hs |= hs >> 4; +diff --git a/src/liblzma/lzma/lzma2_encoder.c b/src/liblzma/lzma/lzma2_encoder.c +index 71cfd9b4..957fa31e 100644 +--- a/src/liblzma/lzma/lzma2_encoder.c ++++ b/src/liblzma/lzma/lzma2_encoder.c +@@ -81,14 +81,14 @@ lzma2_header_lzma(lzma_lzma2_coder *coder) + + // Uncompressed size + size_t size = coder->uncompressed_size - 1; +- coder->buf[pos++] += size >> 16; +- coder->buf[pos++] = (size >> 8) & 0xFF; +- coder->buf[pos++] = size & 0xFF; ++ coder->buf[pos++] += (uint8_t)(size >> 16); ++ coder->buf[pos++] = (uint8_t)((size >> 8) & 0xFF); ++ coder->buf[pos++] = (uint8_t)(size & 0xFF); + + // Compressed size + size = coder->compressed_size - 1; +- coder->buf[pos++] = size >> 8; +- coder->buf[pos++] = size & 0xFF; ++ coder->buf[pos++] = (uint8_t)(size >> 8); ++ coder->buf[pos++] = (uint8_t)(size & 0xFF); + + // Properties, if needed + if (coder->need_properties) +@@ -122,7 +122,7 @@ lzma2_header_uncompressed(lzma_lzma2_coder *coder) + coder->need_dictionary_reset = false; + + // "Compressed" size +- coder->buf[1] = (coder->uncompressed_size - 1) >> 8; ++ coder->buf[1] = (uint8_t)((coder->uncompressed_size - 1) >> 8); + coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF; + + // Set the start position for copying. +@@ -164,8 +164,8 @@ lzma2_encode(void *coder_ptr, lzma_mf *restrict mf, + case SEQ_LZMA_ENCODE: { + // Calculate how much more uncompressed data this chunk + // could accept. +- const uint32_t left = LZMA2_UNCOMPRESSED_MAX +- - coder->uncompressed_size; ++ const uint32_t left = (uint32_t)(LZMA2_UNCOMPRESSED_MAX ++ - coder->uncompressed_size); + uint32_t limit; + + if (left < mf->match_len_max) { +@@ -394,7 +394,7 @@ lzma_lzma2_props_encode(const void *options, uint8_t *out) + if (d == UINT32_MAX) + out[0] = 40; + else +- out[0] = get_dist_slot(d + 1) - 24; ++ out[0] = (uint8_t)(get_dist_slot(d + 1) - 24); + + return LZMA_OK; + } +diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c +index 2088a2fa..627d2ca5 100644 +--- a/src/liblzma/lzma/lzma_decoder.c ++++ b/src/liblzma/lzma/lzma_decoder.c +@@ -378,7 +378,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, + } + + // Write decoded literal to dictionary +- dict_put(&dict, symbol); ++ dict_put(&dict, (uint8_t)symbol); + continue; + } + +@@ -742,7 +742,7 @@ slow: + } + + case SEQ_LITERAL_WRITE: +- if (dict_put_safe(&dict, symbol)) { ++ if (dict_put_safe(&dict, (uint8_t)symbol)) { + coder->sequence = SEQ_LITERAL_WRITE; + goto out; + } +@@ -1203,7 +1203,7 @@ lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) + + // See the file format specification to understand this. + options->pb = byte / (9 * 5); +- byte -= options->pb * 9 * 5; ++ byte -= (uint8_t)(options->pb * 9 * 5); + options->lp = byte / 9; + options->lc = byte - options->lp * 9; + +diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c +index 543ca321..20dc62d3 100644 +--- a/src/liblzma/lzma/lzma_encoder.c ++++ b/src/liblzma/lzma/lzma_encoder.c +@@ -753,7 +753,7 @@ lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte) + if (!is_lclppb_valid(options)) + return true; + +- *byte = (options->pb * 5 + options->lp) * 9 + options->lc; ++ *byte = (uint8_t)((options->pb * 5 + options->lp) * 9 + options->lc); + assert(*byte <= (4 * 5 + 4) * 9 + 8); + + return false; +diff --git a/src/liblzma/simple/arm.c b/src/liblzma/simple/arm.c +index f9d9c08b..159671a7 100644 +--- a/src/liblzma/simple/arm.c ++++ b/src/liblzma/simple/arm.c +@@ -35,9 +35,9 @@ arm_code(void *simple lzma_attribute((__unused__)), + dest = src - (now_pos + (uint32_t)(i) + 8); + + dest >>= 2; +- buffer[i + 2] = (dest >> 16); +- buffer[i + 1] = (dest >> 8); +- buffer[i + 0] = dest; ++ buffer[i + 2] = (uint8_t)(dest >> 16); ++ buffer[i + 1] = (uint8_t)(dest >> 8); ++ buffer[i + 0] = (uint8_t)dest; + } + } + +diff --git a/src/liblzma/simple/armthumb.c b/src/liblzma/simple/armthumb.c +index 368b51c7..36233550 100644 +--- a/src/liblzma/simple/armthumb.c ++++ b/src/liblzma/simple/armthumb.c +@@ -42,9 +42,9 @@ armthumb_code(void *simple lzma_attribute((__unused__)), + + dest >>= 1; + buffer[i + 1] = 0xF0 | ((dest >> 19) & 0x7); +- buffer[i + 0] = (dest >> 11); ++ buffer[i + 0] = (uint8_t)(dest >> 11); + buffer[i + 3] = 0xF8 | ((dest >> 8) & 0x7); +- buffer[i + 2] = (dest); ++ buffer[i + 2] = (uint8_t)(dest); + i += 2; + } + } +diff --git a/src/liblzma/simple/powerpc.c b/src/liblzma/simple/powerpc.c +index ea47d14d..2bcc638b 100644 +--- a/src/liblzma/simple/powerpc.c ++++ b/src/liblzma/simple/powerpc.c +@@ -39,8 +39,8 @@ powerpc_code(void *simple lzma_attribute((__unused__)), + dest = src - (now_pos + (uint32_t)(i)); + + buffer[i + 0] = 0x48 | ((dest >> 24) & 0x03); +- buffer[i + 1] = (dest >> 16); +- buffer[i + 2] = (dest >> 8); ++ buffer[i + 1] = (uint8_t)(dest >> 16); ++ buffer[i + 2] = (uint8_t)(dest >> 8); + buffer[i + 3] &= 0x03; + buffer[i + 3] |= dest; + } +diff --git a/src/liblzma/simple/simple_coder.c b/src/liblzma/simple/simple_coder.c +index 5cbfa822..930a532d 100644 +--- a/src/liblzma/simple/simple_coder.c ++++ b/src/liblzma/simple/simple_coder.c +@@ -59,7 +59,7 @@ call_filter(lzma_simple_coder *coder, uint8_t *buffer, size_t size) + const size_t filtered = coder->filter(coder->simple, + coder->now_pos, coder->is_encoder, + buffer, size); +- coder->now_pos += filtered; ++ coder->now_pos += (uint32_t)filtered; + return filtered; + } + +diff --git a/src/liblzma/common/block_buffer_encoder.c b/src/liblzma/common/block_buffer_encoder.c +index 3963c15c77a..2219593be1f 100644 +--- a/src/liblzma/common/block_buffer_encoder.c ++++ b/src/liblzma/common/block_buffer_encoder.c +@@ -80,7 +80,7 @@ lzma_block_buffer_bound(size_t uncompressed_size) + return 0; + #endif + +- return ret; ++ return (size_t)ret; + } + + +@@ -180,7 +180,7 @@ block_encode_normal(lzma_block *block, const lzma_allocator *allocator, + // Limit out_size so that we stop encoding if the output would grow + // bigger than what uncompressed Block would be. + if (out_size - *out_pos > block->compressed_size) +- out_size = *out_pos + block->compressed_size; ++ out_size = *out_pos + (size_t)block->compressed_size; + + // TODO: In many common cases this could be optimized to use + // significantly less memory. +diff --git a/src/liblzma/common/file_info.c b/src/liblzma/common/file_info.c +index 4b2eb5d0400..5efa73c4471 100644 +--- a/src/liblzma/common/file_info.c ++++ b/src/liblzma/common/file_info.c +@@ -399,7 +399,7 @@ file_info_decode(void *coder_ptr, const lzma_allocator *allocator, + // Set coder->temp_pos to point to the beginning + // of the Index. + coder->temp_pos = coder->temp_size +- - coder->footer_flags.backward_size; ++ - (size_t)coder->footer_flags.backward_size; + } else { + // These are set to zero to indicate that there's no + // useful data (Index or anything else) in coder->temp. +@@ -551,15 +551,15 @@ file_info_decode(void *coder_ptr, const lzma_allocator *allocator, + // Stream are cached and already handled a few lines above. + // So this isn't as useful as the other seek-avoidance cases. + if (coder->temp_size != 0 && coder->temp_size +- - coder->footer_flags.backward_size +- >= seek_amount) { ++ - (size_t)coder->footer_flags.backward_size ++ >= (size_t)seek_amount) { + // Make temp_pos and temp_size point to the *end* of + // Stream Header so that SEQ_HEADER_DECODE will find + // the start of Stream Header from coder->temp[ + // coder->temp_size - LZMA_STREAM_HEADER_SIZE]. + coder->temp_pos = coder->temp_size +- - coder->footer_flags.backward_size +- - seek_amount ++ - (size_t)coder->footer_flags.backward_size ++ - (size_t)seek_amount + + LZMA_STREAM_HEADER_SIZE; + coder->temp_size = coder->temp_pos; + } else { +diff --git a/src/liblzma/common/filter_flags_decoder.c b/src/liblzma/common/filter_flags_decoder.c +index 0f5d204d474..594c434b414 100644 +--- a/src/liblzma/common/filter_flags_decoder.c ++++ b/src/liblzma/common/filter_flags_decoder.c +@@ -33,13 +33,13 @@ lzma_filter_flags_decode( + in, in_pos, in_size)); + + // Filter Properties +- if (in_size - *in_pos < props_size) ++ if (in_size - *in_pos < (size_t)props_size) + return LZMA_DATA_ERROR; + + const lzma_ret ret = lzma_properties_decode( +- filter, allocator, in + *in_pos, props_size); ++ filter, allocator, in + *in_pos, (size_t)props_size); + +- *in_pos += props_size; ++ *in_pos += (size_t)props_size; + + return ret; + } +diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c +index 6add6a68350..0b81e9322ad 100644 +--- a/src/liblzma/common/index.c ++++ b/src/liblzma/common/index.c +@@ -883,7 +883,7 @@ static index_stream * + index_dup_stream(const index_stream *src, const lzma_allocator *allocator) + { + // Catch a somewhat theoretical integer overflow. +- if (src->record_count > PREALLOC_MAX) ++ if (src->record_count > (lzma_vli)PREALLOC_MAX) + return NULL; + + // Allocate and initialize a new Stream. +@@ -907,7 +907,7 @@ index_dup_stream(const index_stream *src, const lzma_allocator *allocator) + // a single group. It's simplest and also tends to make + // lzma_index_locate() a little bit faster with very big Indexes. + index_group *destg = lzma_alloc(sizeof(index_group) +- + src->record_count * sizeof(index_record), ++ + (size_t)src->record_count * sizeof(index_record), + allocator); + if (destg == NULL) { + index_stream_end(dest, allocator); +@@ -918,8 +918,8 @@ index_dup_stream(const index_stream *src, const lzma_allocator *allocator) + destg->node.uncompressed_base = 0; + destg->node.compressed_base = 0; + destg->number_base = 1; +- destg->allocated = src->record_count; +- destg->last = src->record_count - 1; ++ destg->allocated = (size_t)src->record_count; ++ destg->last = (size_t)src->record_count - 1; + + // Go through all the groups in src and copy the Records into destg. + const index_group *srcg = (const index_group *)(src->groups.leftmost); diff --git a/src/native/external/xz-version.txt b/src/native/external/xz-version.txt new file mode 100644 index 00000000000000..59e7c59b82d3c3 --- /dev/null +++ b/src/native/external/xz-version.txt @@ -0,0 +1,12 @@ +v5.8.2 +https://github.com/tukaani-project/xz/releases/tag/v5.8.2 + +To update the code in the future, follow these steps: +1. Download the source code tarball for the matching version from https://tukaani.org/xz/ or https://github.com/tukaani-project/xz/releases/ +2. Delete existing files in 'src/native/external/xz/' +3. From the extracted tarball, copy the 'cmake', 'lib' and 'src' directories into 'src/native/external/xz/' +4. Rebuild the native components and run the relevant tests + +Only the 'cmake', 'lib' and 'src' directories from the original source were included to minimize unnecessary code. + +To fix build-time warnings in the currently released version, the xz-patch.diff file was applied. These changes should be upstreamed to the xz project and should not be necessary when upgrading to a future version. \ No newline at end of file diff --git a/src/native/external/xz.cmake b/src/native/external/xz.cmake new file mode 100644 index 00000000000000..fdca1b94887482 --- /dev/null +++ b/src/native/external/xz.cmake @@ -0,0 +1,51 @@ +# IMPORTANT: do not use add_compile_options(), add_definitions() or similar functions here since it will leak to the including projects + +include(FetchContent) + +FetchContent_Declare( + xz + SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/xz + EXCLUDE_FROM_ALL +) + +# turn off multithreading support to lower the binary size +set(XZ_THREADS no) + +set(XZ_LZIP_DECODER OFF) +set(XZ_LZIP_ENCODER OFF) + +# turn off parts we don't need +set(BUILD_TESTING OFF) +set(XZ_DOXYGEN OFF) +set(XZ_DOC OFF) +set(XZ_NLS OFF) +set(XZ_MICROLZMA_ENCODER OFF) +set(XZ_MICROLZMA_DECODER OFF) +set(XZ_TOOL_XZ OFF) +set(XZ_TOOL_SCRIPTS OFF) +set(XZ_TOOL_XZDEC OFF) +set(XZ_TOOL_LZMADEC OFF) +set(XZ_TOOL_LZMAINFO OFF) +set(XZ_TOOL_SYMLINKS OFF) +set(XZ_TOOL_SYMLINKS_LZMA OFF) + +set(__CURRENT_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) +set(BUILD_SHARED_LIBS OFF) +FetchContent_MakeAvailable(xz) +set(BUILD_SHARED_LIBS ${__CURRENT_BUILD_SHARED_LIBS}) + +set(LZMA_INCLUDE_DIRS ${CMAKE_CURRENT_LIST_DIR}/xz/src/liblzma/api) + +# API functions are marked for export only when HAVE_VISIBILITY=1. However, +# that happens only if BUILD_SHARED_LIBS is ON. Since we want to export the API +# functions even for static library to be able to invoke them in the resulting shared +# libraries, so we need to force HAVE_VISIBILITY=1 for static liblzma as well. +if (NOT MSVC) + get_target_property(defs liblzma COMPILE_DEFINITIONS) + if(defs AND NOT defs STREQUAL "defs-NOTFOUND") + list(FILTER defs EXCLUDE REGEX "HAVE_VISIBILITY=.") + set_property(TARGET liblzma PROPERTY COMPILE_DEFINITIONS ${defs}) + endif() + target_compile_definitions(liblzma PRIVATE HAVE_VISIBILITY=1) +endif() + diff --git a/src/native/external/xz/AUTHORS b/src/native/external/xz/AUTHORS new file mode 100644 index 00000000000000..f805a204ecb75c --- /dev/null +++ b/src/native/external/xz/AUTHORS @@ -0,0 +1,58 @@ + +Authors of XZ Utils +=================== + + XZ Utils is developed and maintained by + Lasse Collin . + + Major parts of liblzma are based on code written by Igor Pavlov, + specifically the LZMA SDK . Without + this code, XZ Utils wouldn't exist. + + The SHA-256 implementation in liblzma is based on code written by + Wei Dai in Crypto++ Library . + + A few scripts have been adapted from GNU gzip. The original + versions were written by Jean-loup Gailly, Charles Levert, and + Paul Eggert. Andrew Dudman helped adapting the scripts and their + man pages for XZ Utils. + + The initial version of the threaded .xz decompressor was written + by Sebastian Andrzej Siewior. + + The initial version of the .lz (lzip) decoder was written + by Michał Górny. + + Architecture-specific CRC optimizations were contributed by + Ilya Kurdyukov, Chenxi Mao, and Xi Ruoyao. + + Other authors: + - Jonathan Nieder + - Joachim Henke + + Special author: Jia Tan was a co-maintainer in 2022-2024. He and + the team behind him inserted a backdoor (CVE-2024-3094) into + XZ Utils 5.6.0 and 5.6.1 releases. He suddenly disappeared when + this was discovered. + + Many people have contributed improvements or reported bugs. + Most of these people are mentioned in the file THANKS. + + The translations of the command line tools and man pages have been + contributed by many people via the Translation Project: + + - https://translationproject.org/domain/xz.html + - https://translationproject.org/domain/xz-man.html + + The authors of the translated man pages are in the header comments + of the man page files. In the source package, the authors of the + translations are in po/*.po and po4a/*.po files. + + Third-party code whose authors aren't listed here: + + - GNU getopt_long() in the 'lib' directory is included for + platforms that don't have a usable getopt_long(). + + - The build system files from GNU Autoconf, GNU Automake, + GNU Libtool, GNU Gettext, Autoconf Archive, and related files. + diff --git a/src/native/external/xz/CMakeLists.txt b/src/native/external/xz/CMakeLists.txt new file mode 100644 index 00000000000000..8d8d9a5306fb16 --- /dev/null +++ b/src/native/external/xz/CMakeLists.txt @@ -0,0 +1,2520 @@ +# SPDX-License-Identifier: 0BSD + +############################################################################# +# +# CMake support for building XZ Utils +# +# Requirements: +# +# - CMake 3.20 or later +# +# - To get translated messages, install GNU gettext tools (the command +# msgfmt is needed). Alternatively disable translations by setting +# XZ_NLS=OFF. +# +# - If building from xz.git instead of a release tarball: To generate +# translated man pages, run po4a/update-po which requires the po4a +# tool. The build works without this step too. +# +# About CMAKE_BUILD_TYPE: +# +# - CMake's standard choices are fine to use for production builds, +# including "Release" and "RelWithDebInfo". +# +# NOTE: While "Release" uses -O3 by default with some compilers, +# this file overrides -O3 to -O2 for "Release" builds if +# CMAKE_C_FLAGS_RELEASE is not defined by the user. At least +# with GCC and Clang/LLVM, -O3 doesn't seem useful for this +# package as it can result in bigger binaries without any +# improvement in speed compared to -O2. +# +# - Empty value (the default) is handled slightly specially: It +# adds -DNDEBUG to disable debugging code (assert() and a few +# other things). No optimization flags are added so an empty +# CMAKE_BUILD_TYPE is an easy way to build with whatever +# optimization flags one wants, and so this method is also +# suitable for production builds. +# +# If debugging is wanted when using empty CMAKE_BUILD_TYPE, +# include -UNDEBUG in the CFLAGS environment variable or +# in the CMAKE_C_FLAGS CMake variable to override -DNDEBUG. +# With empty CMAKE_BUILD_TYPE, the -UNDEBUG option will go +# after the -DNDEBUG option on the compiler command line and +# thus NDEBUG will be undefined. +# +# - Non-standard build types like "None" aren't treated specially +# and thus won't have -DNEBUG. Such non-standard build types +# SHOULD BE AVOIDED FOR PRODUCTION BUILDS. Or at least one +# should remember to add -DNDEBUG. +# +# This file provides the following installation components (if you only +# need liblzma, install only its components!): +# - liblzma_Runtime (shared library only) +# - liblzma_Development +# - liblzma_Documentation (examples and Doxygen-generated API docs as HTML) +# - xz_Runtime (xz, the symlinks, and possibly translation files) +# - xz_Documentation (xz man pages and the symlinks) +# - xzdec_Runtime +# - xzdec_Documentation (xzdec *and* lzmadec man pages) +# - lzmadec_Runtime +# - lzmainfo_Runtime +# - lzmainfo_Documentation (lzmainfo man pages) +# - scripts_Runtime (xzdiff, xzgrep, xzless, xzmore) +# - scripts_Documentation (their man pages) +# - Documentation (generic docs like README and licenses) +# +# To find the target liblzma::liblzma from other packages, use the CONFIG +# option with find_package() to avoid a conflict with the FindLibLZMA module +# with case-insensitive file systems. For example, to require liblzma 5.2.5 +# or a newer compatible version: +# +# find_package(liblzma 5.2.5 REQUIRED CONFIG) +# target_link_libraries(my_application liblzma::liblzma) +# +############################################################################# +# +# Author: Lasse Collin +# +############################################################################# + +cmake_minimum_required(VERSION 3.20...4.2 FATAL_ERROR) + +include(CMakePushCheckState) +include(CheckIncludeFile) +include(CheckSymbolExists) +include(CheckStructHasMember) +include(CheckCSourceCompiles) +include(CheckCCompilerFlag) +include(CheckLinkerFlag) +include(cmake/tuklib_large_file_support.cmake) +include(cmake/tuklib_integer.cmake) +include(cmake/tuklib_cpucores.cmake) +include(cmake/tuklib_physmem.cmake) +include(cmake/tuklib_progname.cmake) +include(cmake/tuklib_mbstr.cmake) + +set(PACKAGE_NAME "XZ Utils") +set(PACKAGE_BUGREPORT "xz@tukaani.org") +set(PACKAGE_URL "https://tukaani.org/xz/") + +# Get the package version from version.h into PACKAGE_VERSION_SHORT and +# PACKAGE_VERSION. The former variable won't include the possible "alpha" +# or "beta" suffix. +file(READ src/liblzma/api/lzma/version.h PACKAGE_VERSION) +string(REGEX REPLACE +"^.*\n\ +#define LZMA_VERSION_MAJOR ([0-9]+)\n\ +.*\ +#define LZMA_VERSION_MINOR ([0-9]+)\n\ +.*\ +#define LZMA_VERSION_PATCH ([0-9]+)\n\ +.*$" + "\\1.\\2.\\3" PACKAGE_VERSION_SHORT "${PACKAGE_VERSION}") + +if(PACKAGE_VERSION MATCHES "\n#define [A-Z_ ]*_ALPHA\n") + set(PACKAGE_VERSION "${PACKAGE_VERSION_SHORT}alpha") +elseif(PACKAGE_VERSION MATCHES "\n#define [A-Z_ ]*_BETA\n") + set(PACKAGE_VERSION "${PACKAGE_VERSION_SHORT}beta") +else() + set(PACKAGE_VERSION "${PACKAGE_VERSION_SHORT}") +endif() + +# With several compilers, CMAKE_BUILD_TYPE=Release uses -O3 optimization +# which results in bigger code without a clear difference in speed. If +# no user-defined CMAKE_C_FLAGS_RELEASE is present, override -O3 to -O2 +# to make it possible to recommend CMAKE_BUILD_TYPE=Release. +if(NOT DEFINED CMAKE_C_FLAGS_RELEASE) + set(OVERRIDE_O3_IN_C_FLAGS_RELEASE ON) +endif() + +# Among other things, this gives us variables xz_VERSION and xz_VERSION_MAJOR. +project(xz VERSION "${PACKAGE_VERSION_SHORT}" LANGUAGES C) + +if(OVERRIDE_O3_IN_C_FLAGS_RELEASE) + # Looking at CMake's source, there aren't any _FLAGS_RELEASE_INIT + # entries where "-O3" would appear as part of some other option, + # thus a simple search and replace should be fine. + string(REPLACE -O3 -O2 CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}") + + # Update the cache value while keeping its docstring unchanged. + set_property(CACHE CMAKE_C_FLAGS_RELEASE + PROPERTY VALUE "${CMAKE_C_FLAGS_RELEASE}") +endif() + +# Reject unsupported MSVC versions. +if(MSVC AND MSVC_VERSION LESS 1900) + message(FATAL_ERROR "Visual Studio older than 2015 is not supported") +endif() + +# We need a compiler that supports enough C99 or newer (variable-length arrays +# aren't needed, those are optional in C11/C17). C11 is preferred since C11 +# features may be optionally used if they are available. +# +# Setting CMAKE_C_STANDARD here makes it the default for all targets. +# It doesn't affect the INTERFACE so liblzma::liblzma won't end up with +# INTERFACE_COMPILE_FEATURES "c_std_99" or such (the API headers are C89 +# and C++ compatible). +# +# Avoid set(CMAKE_C_STANDARD_REQUIRED ON) because it's fine to decay +# to C99 if C11 isn't supported. +set(CMAKE_C_STANDARD 11) + +# On Apple OSes, don't build executables as bundles: +set(CMAKE_MACOSX_BUNDLE OFF) + +# The targets defined here don't support compiling as a unity build. Encoder +# and decoder source files define different types with the same name, and some +# internal header files don't have header guards leading to redeclaration +# errors. +set(CMAKE_UNITY_BUILD OFF) + +# Set CMAKE_INSTALL_LIBDIR and friends. This needs to be done before +# the LOCALEDIR_DEFINITION workaround below. +include(GNUInstallDirs) + +# windres from GNU binutils can be tricky with command line arguments +# that contain spaces or other funny characters. Unfortunately we need +# a space in PACKAGE_NAME. Using \x20 to encode the US-ASCII space seems +# to work in both cmd.exe and /bin/sh. +# +# However, even \x20 isn't enough in all situations, resulting in +# "syntax error" from windres. Using --use-temp-file prevents windres +# from using popen() and this seems to fix the problem. +# +# llvm-windres from Clang/LLVM 16.0.6 and older: The \x20 results +# in "XZx20Utils" in the compiled binary. The option --use-temp-file +# makes no difference. +# +# llvm-windres 17.0.0 and later: It emulates GNU windres more accurately, so +# the workarounds used with GNU windres must be used with llvm-windres too. +# +# CMake 3.27 doesn't have CMAKE_RC_COMPILER_ID so we rely on +# CMAKE_C_COMPILER_ID. +if((MINGW OR CYGWIN) AND ( + NOT CMAKE_C_COMPILER_ID STREQUAL "Clang" OR + CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "17")) + # Use workarounds with GNU windres and llvm-windres >= 17.0.0. The \x20 + # in PACKAGE_NAME_DEFINITION works with gcc and clang too so we don't need + # to worry how to pass different flags to windres and the C compiler. + # Keep the original PACKAGE_NAME intact for generation of liblzma.pc. + string(APPEND CMAKE_RC_FLAGS " --use-temp-file") + string(REPLACE " " "\\040" PACKAGE_NAME_DEFINITION "${PACKAGE_NAME}") + + # Use octal because "Program Files" would become \x20F. + string(REPLACE " " "\\040" LOCALEDIR_DEFINITION + "${CMAKE_INSTALL_FULL_LOCALEDIR}") +else() + # Elsewhere a space is safe. This also keeps things compatible with + # EBCDIC in case CMake-based build is ever done on such a system. + set(PACKAGE_NAME_DEFINITION "${PACKAGE_NAME}") + set(LOCALEDIR_DEFINITION "${CMAKE_INSTALL_FULL_LOCALEDIR}") +endif() + +# When used with MSVC, CMake can merge .manifest files with +# linker-generated manifests and embed the result in an executable. +# However, when paired with MinGW-w64, CMake (3.30) ignores .manifest +# files. Embedding a manifest with a resource file works with both +# toochains. It's also the way to do it with Autotools. +# +# With MSVC, we need to disable the default manifest; attempting to add +# two manifest entries would break the build. The flag /MANIFEST:NO +# goes to the linker and it also affects behavior of CMake itself: it +# looks what flags are being passed to the linker and when CMake sees +# the /MANIFEST:NO option, other manifest-related linker flags are +# no longer added (see the file Source/cmcmd.cxx in CMake). +# +# See: https://gitlab.kitware.com/cmake/cmake/-/issues/23066 +if(MSVC) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /MANIFEST:NO") +endif() + +# Dependencies for all Windows resource files: +set(W32RES_DEPENDENCIES + "${CMAKE_CURRENT_SOURCE_DIR}/src/common/common_w32res.rc" + "${CMAKE_CURRENT_SOURCE_DIR}/src/common/w32_application.manifest" +) + +# Definitions common to all targets: +add_compile_definitions( + # Package info: + PACKAGE_NAME="${PACKAGE_NAME_DEFINITION}" + PACKAGE_BUGREPORT="${PACKAGE_BUGREPORT}" + PACKAGE_URL="${PACKAGE_URL}" + + # Standard headers and types are available: + HAVE_STDBOOL_H + HAVE__BOOL + HAVE_STDINT_H + HAVE_INTTYPES_H + + # Always enable CRC32 since liblzma should never build without it. + HAVE_CHECK_CRC32 + + # Disable assert() checks when no build type has been specified. Non-empty + # build types like "Release" and "Debug" handle this by default. + $<$:NDEBUG> +) + +# Support 32-bit x86 assembly files. +if(NOT MSVC) + # It's simplest to ask the compiler for the architecture because we + # know that on supported platforms __i386__ is defined. + # + # Checking CMAKE_SYSTEM_PROCESSOR wouldn't be so simple or as reliable + # because it could indicate x86-64 even if building for 32-bit x86 + # because one doesn't necessarily use a CMake toolchain file when + # building 32-bit executables on a 64-bit system. Also, the strings + # that identify 32-bit or 64-bit x86 aren't standardized in CMake. + if(MINGW OR CYGWIN OR CMAKE_SYSTEM_NAME MATCHES + "^FreeBSD$|^GNU$|^Linux$|^MirBSD$|^NetBSD$|^OpenBSD$") + check_symbol_exists("__i386__" "" ASM_I386_DEFAULT) + else() + set(ASM_I386_DEFAULT OFF) + endif() + + option(XZ_ASM_I386 "Enable 32-bit x86 assembly code" + "${ASM_I386_DEFAULT}") + + if(XZ_ASM_I386) + enable_language(ASM) + endif() +endif() + + +###################### +# System definitions # +###################### + +# _GNU_SOURCE and such definitions. This specific macro is special since +# it also adds the definitions to CMAKE_REQUIRED_DEFINITIONS. +tuklib_use_system_extensions() + +# Check for large file support. It's required on some 32-bit platforms and +# even on 64-bit MinGW-w64 to get 64-bit off_t. This can be forced off on +# the CMake command line if needed: -DLARGE_FILE_SUPPORT=OFF +tuklib_large_file_support(ALL) + +# This is needed by liblzma and xz. +tuklib_integer(ALL) + +# This is used for liblzma.pc generation to add -lrt and -lmd if needed. +# +# The variable name LIBS comes from Autoconf where AC_SEARCH_LIBS adds the +# libraries it finds into the shell variable LIBS. These libraries need to +# be put into liblzma.pc too, thus liblzma.pc.in has @LIBS@ because that +# matches the Autoconf's variable. When CMake support was added, using +# the same variable with configure_file() was the simplest method. +set(LIBS) + +# Check for clock_gettime(). Do this before checking for threading so +# that we know there if CLOCK_MONOTONIC is available. +check_symbol_exists(clock_gettime time.h HAVE_CLOCK_GETTIME) + +if(NOT HAVE_CLOCK_GETTIME) + # With glibc <= 2.17 or Solaris 10 this needs librt. + # Add librt for the next check for HAVE_CLOCK_GETTIME. If it is + # found after including the library, we know that librt is required. + list(PREPEND CMAKE_REQUIRED_LIBRARIES rt) + check_symbol_exists(clock_gettime time.h HAVE_CLOCK_GETTIME_LIBRT) + + # If it was found now, add librt to all targets and keep it in + # CMAKE_REQUIRED_LIBRARIES for further tests too. + if(HAVE_CLOCK_GETTIME_LIBRT) + link_libraries(rt) + set(LIBS "-lrt ${LIBS}") # For liblzma.pc + else() + list(POP_FRONT CMAKE_REQUIRED_LIBRARIES) + endif() +endif() + +if(HAVE_CLOCK_GETTIME OR HAVE_CLOCK_GETTIME_LIBRT) + add_compile_definitions(HAVE_CLOCK_GETTIME) + + # Check if CLOCK_MONOTONIC is available for clock_gettime(). + check_symbol_exists(CLOCK_MONOTONIC time.h HAVE_CLOCK_MONOTONIC) + tuklib_add_definition_if(ALL HAVE_CLOCK_MONOTONIC) +endif() + + +# The definition ENABLE_NLS is added only to those targets that use it, thus +# it's not done here. (xz has translations, xzdec doesn't.) +set(XZ_NLS_DEFAULT OFF) +find_package(Intl) +find_package(Gettext) +if(Intl_FOUND AND GETTEXT_FOUND) + set(XZ_NLS_DEFAULT ON) +endif() + +option(XZ_NLS "Native Language Support (translated messages and man pages)" + "${XZ_NLS_DEFAULT}") + +if(XZ_NLS) + if(NOT Intl_FOUND) + message(FATAL_ERROR "Native language support (NLS) was enabled but " + "find_package(Intl) failed. " + "Install libintl or set XZ_NLS=OFF.") + endif() + + if(NOT GETTEXT_FOUND) + message(FATAL_ERROR "Native language support (NLS) was enabled but " + "find_package(Gettext) failed. " + "Install gettext tools or set XZ_NLS=OFF.") + endif() + + if(WIN32) + # The command line tools use UTF-8 on native Windows. + # Non-ASCII characters display correctly only when + # using UCRT and gettext-runtime >= 0.23.1. + check_c_source_compiles( + "#define WIN32_LEAN_AND_MEAN + #include + #include + + #ifndef _UCRT + #error \"Not UCRT\" + #endif + + #if LIBINTL_VERSION < 0x001701 + #error \"gettext-runtime < 0.23.1\" + #endif + + int main(void) { return 0; } + " + USING_UCRT_AND_RECENT_GETTEXT) + if(NOT USING_UCRT_AND_RECENT_GETTEXT) + message(FATAL_ERROR "Native language support (NLS) was enabled " + "but it requires UCRT and " + "gettext-runtime >= 0.23.1. To build with " + "MSVCRT or old gettext-runtime, " + "set XZ_NLS=OFF.") + endif() + endif() + + # Warn if translated man pages are missing. + if(UNIX AND NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/po4a/man") + message(WARNING "Native language support (NLS) was enabled " + "but pre-generated translated man pages " + "were not found and thus they won't be installed. " + "Run 'po4a/update-po' to generate them.") + endif() + + # The *installed* name of the translation files is "xz.mo". + set(TRANSLATION_DOMAIN "xz") +endif() + + +# Add warning options for GCC or Clang. Keep this in sync with configure.ac. +# +# NOTE: add_compile_options() doesn't affect the feature checks; +# only the new targets being created use these flags. Thus +# the -Werror usage in checks won't be break because of these. +if(CMAKE_C_COMPILER_ID MATCHES GNU|Clang) + foreach(OPT -Wall + -Wextra + -Wvla + -Wformat=2 + -Winit-self + -Wmissing-include-dirs + -Wshift-overflow=2 + -Wstrict-overflow=3 + -Walloc-zero + -Wduplicated-cond + -Wfloat-equal + -Wundef + -Wshadow + -Wpointer-arith + -Wbad-function-cast + -Wwrite-strings + -Wdate-time + -Wsign-conversion + -Wfloat-conversion + -Wlogical-op + -Waggregate-return + -Wstrict-prototypes + -Wold-style-definition + -Wmissing-prototypes + -Wmissing-declarations + -Wredundant-decls + -Wimplicit-fallthrough + -Wimplicit-fallthrough=5 + + -Wc99-compat + -Wc11-extensions + -Wc2x-compat + -Wc2x-extensions + -Wpre-c2x-compat + -Warray-bounds-pointer-arithmetic + -Wassign-enum + -Wconditional-uninitialized + -Wdocumentation + -Wduplicate-enum + -Wempty-translation-unit + -Wextra-semi-stmt + -Wflexible-array-extensions + -Wmissing-variable-declarations + -Wnewline-eof + -Wshift-sign-overflow + -Wstring-conversion + ) + # A variable name cannot have = in it so replace = with _. + string(REPLACE = _ CACHE_VAR "HAVE_COMPILER_OPTION_${OPT}") + + check_c_compiler_flag("${OPT}" "${CACHE_VAR}") + + if("${${CACHE_VAR}}") + add_compile_options("${OPT}") + endif() + endforeach() +endif() + + +############################################################################# +# liblzma +############################################################################# + +option(BUILD_SHARED_LIBS "Build liblzma as a shared library instead of static") + +# Symbol versioning is supported with ELF shared libraries on certain OSes. +# First assume that symbol versioning isn't supported. +set(SYMBOL_VERSIONING "no") + +if(NOT WIN32) + # The XZ_SYMBOL_VERSIONING option is ignored for static libraries but + # we keep the option visible still in case the project is reconfigured + # to build a shared library. + # + # auto Autodetect between no, generic, and linux + # yes Force on by autodetecting between linux and generic + # no Disable symbol versioning + # generic FreeBSD, some Linux/glibc systems, and GNU/Hurd + # linux Linux/glibc with extra symbol versions for compatibility + # with binaries that have been linked against a liblzma version + # that has been patched with "xz-5.2.2-compat-libs.patch" from + # RHEL/CentOS 7. + set(SUPPORTED_SYMBOL_VERSIONING_VARIANTS auto yes no generic linux) + set(XZ_SYMBOL_VERSIONING "auto" CACHE STRING "Enable ELF shared library \ +symbol versioning (${SUPPORTED_SYMBOL_VERSIONING_VARIANTS})") + + # Show a dropdown menu in CMake GUI: + set_property(CACHE XZ_SYMBOL_VERSIONING + PROPERTY STRINGS "${SUPPORTED_SYMBOL_VERSIONING_VARIANTS}") + + if(NOT XZ_SYMBOL_VERSIONING IN_LIST SUPPORTED_SYMBOL_VERSIONING_VARIANTS) + message(FATAL_ERROR "'${XZ_SYMBOL_VERSIONING}' is not a supported " + "symbol versioning variant") + endif() + + if(NOT XZ_SYMBOL_VERSIONING MATCHES "^auto$|^yes$") + # Autodetection was disabled. Use the user-specified value as is. + set(SYMBOL_VERSIONING "${XZ_SYMBOL_VERSIONING}") + else() + # Autodetect the symbol versioning variant. + # + # Avoid symvers on Linux with non-glibc like musl and uClibc. + # In Autoconf it's enough to check that $host_os equals linux-gnu + # instead of, for example, linux-musl. CMake doesn't provide such + # a method. + # + # This check is here for now since it's not strictly required + # by anything else. + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + check_c_source_compiles( + "#include + #if defined(__GLIBC__) && !defined(__UCLIBC__) + int main(void) { return 0; } + #else + compile error + #endif + " + IS_LINUX_WITH_GLIBC) + else() + set(IS_LINUX_WITH_GLIBC OFF) + endif() + + if(IS_LINUX_WITH_GLIBC AND + (CMAKE_SYSTEM_PROCESSOR MATCHES "[Mm]icro[Bb]laze" OR + CMAKE_C_COMPILER_ID STREQUAL "NVHPC")) + # As a special case, GNU/Linux on MicroBlaze gets the generic + # symbol versioning because GCC 12 doesn't support the __symver__ + # attribute on MicroBlaze. On Linux, CMAKE_SYSTEM_PROCESSOR comes + # from "uname -m" for native builds (should be "microblaze") or + # from the CMake toolchain file (not perfectly standardized but + # it very likely has "microblaze" in lower case or mixed case + # somewhere in the string). + # + # NVIDIA HPC Compiler doesn't support symbol versioning but + # it uses the linker from the system so the linker script + # can still be used to get the generic symbol versioning. + set(SYMBOL_VERSIONING "generic") + + elseif(IS_LINUX_WITH_GLIBC) + # GNU/Linux-specific symbol versioning for shared liblzma. This + # includes a few extra compatibility symbols for RHEL/CentOS 7 + # which are pointless on non-glibc non-Linux systems. + set(SYMBOL_VERSIONING "linux") + + elseif(CMAKE_SYSTEM_NAME MATCHES "^FreeBSD$|^GNU$" OR + XZ_SYMBOL_VERSIONING STREQUAL "yes") + set(SYMBOL_VERSIONING "generic") + endif() + endif() + + if(NOT SYMBOL_VERSIONING STREQUAL "no") + # If features are disabled in liblzma, some symbols may be missing. + # LLVM's lld defaults to --no-undefined-version and the build breaks + # if not all symbols in the version script exist. That is good for + # catching errors like typos, but in our case the downside is too big. + # Avoid the problem by using --undefined-version if the linker + # supports it. + # + # GNU ld has had --no-undefined-version for a long time but it's not + # the default. The opposite option --undefined-version was only added + # in 2022, thus we must use --undefined-version conditionally. + check_linker_flag(C "-Wl,--undefined-version" + HAVE_LINKER_FLAG_UNDEFINED_VERSION) + endif() +endif() + +set(LIBLZMA_API_HEADERS + src/liblzma/api/lzma.h + src/liblzma/api/lzma/base.h + src/liblzma/api/lzma/bcj.h + src/liblzma/api/lzma/block.h + src/liblzma/api/lzma/check.h + src/liblzma/api/lzma/container.h + src/liblzma/api/lzma/delta.h + src/liblzma/api/lzma/filter.h + src/liblzma/api/lzma/hardware.h + src/liblzma/api/lzma/index.h + src/liblzma/api/lzma/index_hash.h + src/liblzma/api/lzma/lzma12.h + src/liblzma/api/lzma/stream_flags.h + src/liblzma/api/lzma/version.h + src/liblzma/api/lzma/vli.h +) + +add_library(liblzma + src/common/mythread.h + src/common/sysdefs.h + src/common/tuklib_common.h + src/common/tuklib_config.h + src/common/tuklib_integer.h + src/common/tuklib_physmem.c + src/common/tuklib_physmem.h + ${LIBLZMA_API_HEADERS} + src/liblzma/check/check.c + src/liblzma/check/check.h + src/liblzma/check/crc_common.h + src/liblzma/check/crc_x86_clmul.h + src/liblzma/check/crc32_arm64.h + src/liblzma/check/crc32_loongarch.h + src/liblzma/common/block_util.c + src/liblzma/common/common.c + src/liblzma/common/common.h + src/liblzma/common/easy_preset.c + src/liblzma/common/easy_preset.h + src/liblzma/common/filter_common.c + src/liblzma/common/filter_common.h + src/liblzma/common/hardware_physmem.c + src/liblzma/common/index.c + src/liblzma/common/index.h + src/liblzma/common/memcmplen.h + src/liblzma/common/stream_flags_common.c + src/liblzma/common/stream_flags_common.h + src/liblzma/common/string_conversion.c + src/liblzma/common/vli_size.c +) + +target_include_directories(liblzma PRIVATE + src/liblzma/api + src/liblzma/common + src/liblzma/check + src/liblzma/lz + src/liblzma/rangecoder + src/liblzma/lzma + src/liblzma/delta + src/liblzma/simple + src/common +) + + +############# +# Threading # +############# + +# Supported threading methods: +# yes - Autodetect the best threading method. The autodetection will +# prefer Windows threading (win95 or vista) over posix if both are +# available. vista threads will be used over win95 unless it is a +# 32-bit build. Configuration fails if no threading support is found; +# threading won't be silently disabled. +# no - Disable threading. +# posix - Use posix threading (pthreads), or throw an error if not available. +# win95 - Use Windows win95 threading, or throw an error if not available. +# vista - Use Windows vista threading, or throw an error if not available. +set(SUPPORTED_THREADING_METHODS yes no posix win95 vista) + +set(XZ_THREADS yes CACHE STRING "Threading method: \ +'yes' to autodetect, 'no' to disable, 'posix' (pthreads), \ +'win95' (WinXP compatible), 'vista' (needs Windows Vista or later)") + +# Create dropdown in CMake GUI since only 1 threading method is possible +# to select in a build. +set_property(CACHE XZ_THREADS + PROPERTY STRINGS "${SUPPORTED_THREADING_METHODS}") + +# This is a flag variable set when win95 threads are used. We must ensure +# that the combination of XZ_SMALL and win95 threads is only used with a +# compiler that supports the __constructor__ attribute. +set(USE_WIN95_THREADS OFF) + +# This is a flag variable set when posix threading (pthreads) is used. +# It's needed when creating liblzma-config.cmake where dependency on +# Threads::Threads is only needed with pthreads. +set(USE_POSIX_THREADS OFF) + +if(NOT XZ_THREADS IN_LIST SUPPORTED_THREADING_METHODS) + message(FATAL_ERROR "'${XZ_THREADS}' is not a supported threading method") +endif() + +if(XZ_THREADS) + # Also set THREADS_PREFER_PTHREAD_FLAG since the flag has no effect + # for Windows threading. + set(THREADS_PREFER_PTHREAD_FLAG TRUE) + find_package(Threads REQUIRED) + + # If both Windows and posix threading are available, prefer Windows. + # Note that on Cygwin, CMAKE_USE_WIN32_THREADS_INIT is false. + if(CMAKE_USE_WIN32_THREADS_INIT AND NOT XZ_THREADS STREQUAL "posix") + if(XZ_THREADS STREQUAL "win95" + OR (XZ_THREADS STREQUAL "yes" AND CMAKE_SIZEOF_VOID_P EQUAL 4)) + # Use Windows 95 (and thus XP) compatible threads. + # This avoids use of features that were added in + # Windows Vista. This is used for 32-bit x86 builds for + # compatibility reasons since it makes no measurable difference + # in performance compared to Vista threads. + set(USE_WIN95_THREADS ON) + add_compile_definitions(MYTHREAD_WIN95) + else() + add_compile_definitions(MYTHREAD_VISTA) + endif() + elseif(CMAKE_USE_PTHREADS_INIT) + if(XZ_THREADS MATCHES "^posix$|^yes$") + # The threading library only needs to be explicitly linked + # for posix threads, so this is needed for creating + # liblzma-config.cmake later. + set(USE_POSIX_THREADS ON) + + target_link_libraries(liblzma PRIVATE Threads::Threads) + add_compile_definitions(MYTHREAD_POSIX) + + # Make the thread libs available in later checks. In practice + # only pthread_condattr_setclock check should need this. + list(PREPEND CMAKE_REQUIRED_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") + + # Check if pthread_condattr_setclock() exists to + # use CLOCK_MONOTONIC. + if(HAVE_CLOCK_MONOTONIC) + check_symbol_exists(pthread_condattr_setclock pthread.h + HAVE_PTHREAD_CONDATTR_SETCLOCK) + tuklib_add_definition_if(ALL HAVE_PTHREAD_CONDATTR_SETCLOCK) + endif() + else() + message(SEND_ERROR + "Windows threading method was requested but a compatible " + "library could not be found") + endif() + else() + message(SEND_ERROR "No supported threading library found") + endif() + + target_sources(liblzma PRIVATE + src/common/tuklib_cpucores.c + src/common/tuklib_cpucores.h + src/liblzma/common/hardware_cputhreads.c + src/liblzma/common/outqueue.c + src/liblzma/common/outqueue.h + ) +endif() + + +###################### +# Size optimizations # +###################### + +option(XZ_SMALL "Reduce code size at expense of speed. \ +This may be useful together with CMAKE_BUILD_TYPE=MinSizeRel.") + +if(XZ_SMALL) + add_compile_definitions(HAVE_SMALL) +endif() + + +########## +# Checks # +########## + +set(SUPPORTED_CHECKS crc32 crc64 sha256) + +set(XZ_CHECKS "${SUPPORTED_CHECKS}" CACHE STRING + "Check types to support (crc32 is always built)") + +foreach(CHECK IN LISTS XZ_CHECKS) + if(NOT CHECK IN_LIST SUPPORTED_CHECKS) + message(FATAL_ERROR "'${CHECK}' is not a supported check type") + endif() +endforeach() + +if(XZ_SMALL) + target_sources(liblzma PRIVATE src/liblzma/check/crc32_small.c) +else() + target_sources(liblzma PRIVATE + src/liblzma/check/crc32_fast.c + src/liblzma/check/crc32_table_be.h + src/liblzma/check/crc32_table_le.h + ) + + if(XZ_ASM_I386) + target_sources(liblzma PRIVATE src/liblzma/check/crc32_x86.S) + target_compile_definitions(liblzma PRIVATE HAVE_CRC_X86_ASM) + endif() +endif() + +if("crc64" IN_LIST XZ_CHECKS) + add_compile_definitions("HAVE_CHECK_CRC64") + + if(XZ_SMALL) + target_sources(liblzma PRIVATE src/liblzma/check/crc64_small.c) + else() + target_sources(liblzma PRIVATE + src/liblzma/check/crc64_fast.c + src/liblzma/check/crc64_table_be.h + src/liblzma/check/crc64_table_le.h + ) + + if(XZ_ASM_I386) + target_sources(liblzma PRIVATE src/liblzma/check/crc64_x86.S) + # Adding #define HAVE_CRC_X86_ASM was already handled in + # the CRC32 case a few lines above. CRC32 is always built. + endif() + endif() +endif() + +# External SHA-256 +# +# At least the following implementations are supported: +# +# OS Headers Library Type Function +# FreeBSD sys/types.h + sha256.h libmd SHA256_CTX SHA256_Init +# NetBSD sys/types.h + sha2.h SHA256_CTX SHA256_Init +# OpenBSD sys/types.h + sha2.h SHA2_CTX SHA256Init +# Solaris sys/types.h + sha2.h libmd SHA256_CTX SHA256Init +# MINIX 3 sys/types.h + sha2.h SHA256_CTX SHA256_Init +# Darwin CommonCrypto/CommonDigest.h CC_SHA256_CTX CC_SHA256_Init +# +# Note that Darwin's CC_SHA256_Update takes buffer size as uint32_t instead +# of size_t. +# +# This is disabled by default because it used to conflict with OpenSSL +# on some platforms and in some cases the builtin code in liblzma was faster. +# See INSTALL and the commit message ac398c3bafa6e4c80e20571373a96947db863b3d. +option(XZ_EXTERNAL_SHA256 "Use SHA-256 code from the operating system \ +if possible. See INSTALL for possible subtle problems." OFF) + +if("sha256" IN_LIST XZ_CHECKS) + add_compile_definitions("HAVE_CHECK_SHA256") + + # Assume that external code won't be used. We need this to know + # if the internal sha256.c should be built. + set(USE_INTERNAL_SHA256 ON) + + if(XZ_EXTERNAL_SHA256) + # Find the header. + set(SHA256_HEADER OFF) + + foreach(X CommonCrypto/CommonDigest.h sha256.h sha2.h) + string(TOUPPER "HAVE_${X}" HAVE_X) + string(REGEX REPLACE "[/.]" "_" HAVE_X "${HAVE_X}") + check_include_file("${X}" "${HAVE_X}") + if(${HAVE_X}) + target_compile_definitions(liblzma PRIVATE "${HAVE_X}") + set(SHA256_HEADER "${X}") + break() + endif() + endforeach() + + if(SHA256_HEADER) + # Find the type to hold the SHA-256 state. + set(SHA256_TYPE OFF) + + foreach(X CC_SHA256_CTX SHA256_CTX SHA2_CTX) + string(TOUPPER "HAVE_${X}" HAVE_X) + + # configure.ac uses conditionally but it's + # required on all cases except Darwin where it exists too. + # So just use it unconditionally here. + set(SOURCE + "#include + #include <${SHA256_HEADER}> + int main(void) + { + ${X} ctx; + return 0; + }") + check_c_source_compiles("${SOURCE}" "${HAVE_X}") + if(${HAVE_X}) + target_compile_definitions(liblzma PRIVATE "${HAVE_X}") + set(SHA256_TYPE "${X}") + break() + endif() + endforeach() + + if(SHA256_TYPE) + # Find the initialization function. It might required libmd. + foreach(X CC_SHA256_Init SHA256Init SHA256_Init) + string(TOUPPER "HAVE_${X}" HAVE_X) + + # On FreeBSD, defines the SHA-256 functions as + # macros to rename them for namespace reasons. Avoid + # check_symbol_exists as that would accept macros without + # checking if the program links. + set(SOURCE + "#include + #include <${SHA256_HEADER}> + int main(void) + { + ${SHA256_TYPE} ctx; + ${X}(&ctx); + return 0; + }") + + check_c_source_compiles("${SOURCE}" "${HAVE_X}") + if(${HAVE_X}) + target_compile_definitions(liblzma PRIVATE "${HAVE_X}") + set(USE_INTERNAL_SHA256 OFF) + break() + else() + # Try with libmd. Other checks don't need it so we + # don't need to leave it into CMAKE_REQUIRED_LIBRARIES. + list(PREPEND CMAKE_REQUIRED_LIBRARIES md) + check_c_source_compiles("${SOURCE}" "${HAVE_X}_LIBMD") + list(POP_FRONT CMAKE_REQUIRED_LIBRARIES) + if(${HAVE_X}_LIBMD) + # NOTE: Just "${HAVE_X}", not "${HAVE_X}_LIBMD": + target_compile_definitions(liblzma PRIVATE + "${HAVE_X}") + target_link_libraries(liblzma PRIVATE md) + set(LIBS "-lmd ${LIBS}") # For liblzma.pc + set(USE_INTERNAL_SHA256 OFF) + break() + endif() + endif() + endforeach() + endif() + endif() + endif() + + if(USE_INTERNAL_SHA256) + target_sources(liblzma PRIVATE src/liblzma/check/sha256.c) + endif() +endif() + + +################# +# Match finders # +################# + +set(SUPPORTED_MATCH_FINDERS hc3 hc4 bt2 bt3 bt4) + +set(XZ_MATCH_FINDERS "${SUPPORTED_MATCH_FINDERS}" CACHE STRING + "Match finders to support (at least one is required for LZMA1 or LZMA2)") + +foreach(MF IN LISTS XZ_MATCH_FINDERS) + if(MF IN_LIST SUPPORTED_MATCH_FINDERS) + string(TOUPPER "${MF}" MF_UPPER) + add_compile_definitions("HAVE_MF_${MF_UPPER}") + else() + message(FATAL_ERROR "'${MF}' is not a supported match finder") + endif() +endforeach() + + +############ +# Encoders # +############ + +set(SIMPLE_FILTERS + x86 + arm + armthumb + arm64 + powerpc + ia64 + sparc + riscv +) + +# The SUPPORTED_FILTERS are shared between Encoders and Decoders +# since only lzip does not appear in both lists. lzip is a special +# case anyway, so it is handled separately in the Decoders section. +set(SUPPORTED_FILTERS + lzma1 + lzma2 + delta + "${SIMPLE_FILTERS}" +) + +set(XZ_ENCODERS "${SUPPORTED_FILTERS}" CACHE STRING "Encoders to support") + +# If LZMA2 is enabled, then LZMA1 must also be enabled. +if(NOT "lzma1" IN_LIST XZ_ENCODERS AND "lzma2" IN_LIST XZ_ENCODERS) + message(FATAL_ERROR "LZMA2 encoder requires that LZMA1 is also enabled") +endif() + +# If LZMA1 is enabled, then at least one match finder must be enabled. +if(XZ_MATCH_FINDERS STREQUAL "" AND "lzma1" IN_LIST XZ_ENCODERS) + message(FATAL_ERROR "At least 1 match finder is required for an " + "LZ-based encoder") +endif() + +set(HAVE_DELTA_CODER OFF) +set(SIMPLE_ENCODERS OFF) +set(HAVE_ENCODERS OFF) + +foreach(ENCODER IN LISTS XZ_ENCODERS) + if(ENCODER IN_LIST SUPPORTED_FILTERS) + set(HAVE_ENCODERS ON) + + if(NOT SIMPLE_ENCODERS AND ENCODER IN_LIST SIMPLE_FILTERS) + set(SIMPLE_ENCODERS ON) + endif() + + string(TOUPPER "${ENCODER}" ENCODER_UPPER) + add_compile_definitions("HAVE_ENCODER_${ENCODER_UPPER}") + else() + message(FATAL_ERROR "'${ENCODER}' is not a supported encoder") + endif() +endforeach() + +if(HAVE_ENCODERS) + add_compile_definitions(HAVE_ENCODERS) + + target_sources(liblzma PRIVATE + src/liblzma/common/alone_encoder.c + src/liblzma/common/block_buffer_encoder.c + src/liblzma/common/block_buffer_encoder.h + src/liblzma/common/block_encoder.c + src/liblzma/common/block_encoder.h + src/liblzma/common/block_header_encoder.c + src/liblzma/common/easy_buffer_encoder.c + src/liblzma/common/easy_encoder.c + src/liblzma/common/easy_encoder_memusage.c + src/liblzma/common/filter_buffer_encoder.c + src/liblzma/common/filter_encoder.c + src/liblzma/common/filter_encoder.h + src/liblzma/common/filter_flags_encoder.c + src/liblzma/common/index_encoder.c + src/liblzma/common/index_encoder.h + src/liblzma/common/stream_buffer_encoder.c + src/liblzma/common/stream_encoder.c + src/liblzma/common/stream_flags_encoder.c + src/liblzma/common/vli_encoder.c + ) + + if(XZ_THREADS) + target_sources(liblzma PRIVATE + src/liblzma/common/stream_encoder_mt.c + ) + endif() + + if(SIMPLE_ENCODERS) + target_sources(liblzma PRIVATE + src/liblzma/simple/simple_encoder.c + src/liblzma/simple/simple_encoder.h + ) + endif() + + if("lzma1" IN_LIST XZ_ENCODERS) + target_sources(liblzma PRIVATE + src/liblzma/lzma/lzma_encoder.c + src/liblzma/lzma/lzma_encoder.h + src/liblzma/lzma/lzma_encoder_optimum_fast.c + src/liblzma/lzma/lzma_encoder_optimum_normal.c + src/liblzma/lzma/lzma_encoder_private.h + src/liblzma/lzma/fastpos.h + src/liblzma/lz/lz_encoder.c + src/liblzma/lz/lz_encoder.h + src/liblzma/lz/lz_encoder_hash.h + src/liblzma/lz/lz_encoder_hash_table.h + src/liblzma/lz/lz_encoder_mf.c + src/liblzma/rangecoder/price.h + src/liblzma/rangecoder/price_table.c + src/liblzma/rangecoder/range_encoder.h + ) + + if(NOT XZ_SMALL) + target_sources(liblzma PRIVATE src/liblzma/lzma/fastpos_table.c) + endif() + endif() + + if("lzma2" IN_LIST XZ_ENCODERS) + target_sources(liblzma PRIVATE + src/liblzma/lzma/lzma2_encoder.c + src/liblzma/lzma/lzma2_encoder.h + ) + endif() + + if("delta" IN_LIST XZ_ENCODERS) + set(HAVE_DELTA_CODER ON) + target_sources(liblzma PRIVATE + src/liblzma/delta/delta_encoder.c + src/liblzma/delta/delta_encoder.h + ) + endif() +endif() + + +############ +# Decoders # +############ + +set(XZ_DECODERS "${SUPPORTED_FILTERS}" CACHE STRING "Decoders to support") + +set(SIMPLE_DECODERS OFF) +set(HAVE_DECODERS OFF) + +foreach(DECODER IN LISTS XZ_DECODERS) + if(DECODER IN_LIST SUPPORTED_FILTERS) + set(HAVE_DECODERS ON) + + if(NOT SIMPLE_DECODERS AND DECODER IN_LIST SIMPLE_FILTERS) + set(SIMPLE_DECODERS ON) + endif() + + string(TOUPPER "${DECODER}" DECODER_UPPER) + add_compile_definitions("HAVE_DECODER_${DECODER_UPPER}") + else() + message(FATAL_ERROR "'${DECODER}' is not a supported decoder") + endif() +endforeach() + +if(HAVE_DECODERS) + add_compile_definitions(HAVE_DECODERS) + + target_sources(liblzma PRIVATE + src/liblzma/common/alone_decoder.c + src/liblzma/common/alone_decoder.h + src/liblzma/common/auto_decoder.c + src/liblzma/common/block_buffer_decoder.c + src/liblzma/common/block_decoder.c + src/liblzma/common/block_decoder.h + src/liblzma/common/block_header_decoder.c + src/liblzma/common/easy_decoder_memusage.c + src/liblzma/common/file_info.c + src/liblzma/common/filter_buffer_decoder.c + src/liblzma/common/filter_decoder.c + src/liblzma/common/filter_decoder.h + src/liblzma/common/filter_flags_decoder.c + src/liblzma/common/index_decoder.c + src/liblzma/common/index_decoder.h + src/liblzma/common/index_hash.c + src/liblzma/common/stream_buffer_decoder.c + src/liblzma/common/stream_decoder.c + src/liblzma/common/stream_flags_decoder.c + src/liblzma/common/stream_decoder.h + src/liblzma/common/vli_decoder.c + ) + + if(XZ_THREADS) + target_sources(liblzma PRIVATE + src/liblzma/common/stream_decoder_mt.c + ) + endif() + + if(SIMPLE_DECODERS) + target_sources(liblzma PRIVATE + src/liblzma/simple/simple_decoder.c + src/liblzma/simple/simple_decoder.h + ) + endif() + + if("lzma1" IN_LIST XZ_DECODERS) + target_sources(liblzma PRIVATE + src/liblzma/lzma/lzma_decoder.c + src/liblzma/lzma/lzma_decoder.h + src/liblzma/rangecoder/range_decoder.h + src/liblzma/lz/lz_decoder.c + src/liblzma/lz/lz_decoder.h + ) + endif() + + if("lzma2" IN_LIST XZ_DECODERS) + target_sources(liblzma PRIVATE + src/liblzma/lzma/lzma2_decoder.c + src/liblzma/lzma/lzma2_decoder.h + ) + endif() + + if("delta" IN_LIST XZ_DECODERS) + set(HAVE_DELTA_CODER ON) + target_sources(liblzma PRIVATE + src/liblzma/delta/delta_decoder.c + src/liblzma/delta/delta_decoder.h + ) + endif() +endif() + +# Some sources must appear if the filter is configured as either +# an encoder or decoder. +if("lzma1" IN_LIST XZ_ENCODERS OR "lzma1" IN_LIST XZ_DECODERS) + target_sources(liblzma PRIVATE + src/liblzma/rangecoder/range_common.h + src/liblzma/lzma/lzma_encoder_presets.c + src/liblzma/lzma/lzma_common.h + ) +endif() + +if(HAVE_DELTA_CODER) + target_sources(liblzma PRIVATE + src/liblzma/delta/delta_common.c + src/liblzma/delta/delta_common.h + src/liblzma/delta/delta_private.h + ) +endif() + +if(SIMPLE_ENCODERS OR SIMPLE_DECODERS) + target_sources(liblzma PRIVATE + src/liblzma/simple/simple_coder.c + src/liblzma/simple/simple_coder.h + src/liblzma/simple/simple_private.h + ) +endif() + +foreach(SIMPLE_CODER IN LISTS SIMPLE_FILTERS) + if(SIMPLE_CODER IN_LIST XZ_ENCODERS OR SIMPLE_CODER IN_LIST XZ_DECODERS) + target_sources(liblzma PRIVATE "src/liblzma/simple/${SIMPLE_CODER}.c") + endif() +endforeach() + + +############# +# MicroLZMA # +############# + +option(XZ_MICROLZMA_ENCODER + "MicroLZMA encoder (needed by specific applications only)" ON) + +option(XZ_MICROLZMA_DECODER + "MicroLZMA decoder (needed by specific applications only)" ON) + +if(XZ_MICROLZMA_ENCODER) + if(NOT "lzma1" IN_LIST XZ_ENCODERS) + message(FATAL_ERROR "The LZMA1 encoder is required to support the " + "MicroLZMA encoder") + endif() + + target_sources(liblzma PRIVATE src/liblzma/common/microlzma_encoder.c) +endif() + +if(XZ_MICROLZMA_DECODER) + if(NOT "lzma1" IN_LIST XZ_DECODERS) + message(FATAL_ERROR "The LZMA1 decoder is required to support the " + "MicroLZMA decoder") + endif() + + target_sources(liblzma PRIVATE src/liblzma/common/microlzma_decoder.c) +endif() + + +############################# +# lzip (.lz) format support # +############################# + +option(XZ_LZIP_DECODER "Support lzip decoder" ON) + +if(XZ_LZIP_DECODER) + # If lzip decoder support is requested, make sure LZMA1 decoder is enabled. + if(NOT "lzma1" IN_LIST XZ_DECODERS) + message(FATAL_ERROR "The LZMA1 decoder is required to support the " + "lzip decoder") + endif() + + add_compile_definitions(HAVE_LZIP_DECODER) + + target_sources(liblzma PRIVATE + src/liblzma/common/lzip_decoder.c + src/liblzma/common/lzip_decoder.h + ) +endif() + +### + +# Put the tuklib functions under the lzma_ namespace. +target_compile_definitions(liblzma PRIVATE TUKLIB_SYMBOL_PREFIX=lzma_) +tuklib_cpucores(liblzma) +tuklib_physmem(liblzma) + +# While liblzma can be built without tuklib_cpucores or tuklib_physmem +# modules, the liblzma API functions lzma_cputhreads() and lzma_physmem() +# will then be useless (which isn't too bad but still unfortunate). Since +# I expect the CMake-based builds to be only used on systems that are +# supported by these tuklib modules, problems with these tuklib modules +# are considered a hard error for now. This hopefully helps to catch bugs +# in the CMake versions of the tuklib checks. +if(NOT TUKLIB_CPUCORES_FOUND OR NOT TUKLIB_PHYSMEM_FOUND) + # Use SEND_ERROR instead of FATAL_ERROR. If someone reports a bug, + # seeing the results of the remaining checks can be useful too. + message(SEND_ERROR + "tuklib_cpucores() or tuklib_physmem() failed. " + "Unless you really are building for a system where these " + "modules are not supported (unlikely), this is a bug in the " + "included cmake/tuklib_*.cmake files that should be fixed. " + "To build anyway, edit this CMakeLists.txt to ignore this error.") +endif() + +# Check for __attribute__((__constructor__)) support. +# This needs -Werror because some compilers just warn +# about this being unsupported. +cmake_push_check_state() +set(CMAKE_REQUIRED_FLAGS "-Werror") +check_c_source_compiles(" + __attribute__((__constructor__)) + static void my_constructor_func(void) { return; } + int main(void) { return 0; } + " + HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR) +cmake_pop_check_state() +tuklib_add_definition_if(liblzma HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR) + +# The Win95 threading lacks a thread-safe one-time initialization function. +# The one-time initialization is needed for crc32_small.c and crc64_small.c +# create the CRC tables. So if small mode is enabled, the threading mode is +# win95, and the compiler does not support attribute constructor, then we +# would end up with a multithreaded build that is thread-unsafe. As a +# result this configuration is not allowed. +if(USE_WIN95_THREADS AND XZ_SMALL AND NOT HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR) + message(SEND_ERROR "Threading method win95 and XZ_SMALL " + "cannot be used at the same time because the compiler " + "doesn't support __attribute__((__constructor__))") +endif() + + +# cpuid.h +check_include_file(cpuid.h HAVE_CPUID_H) +tuklib_add_definition_if(liblzma HAVE_CPUID_H) + +# immintrin.h: +check_include_file(immintrin.h HAVE_IMMINTRIN_H) +if(HAVE_IMMINTRIN_H) + target_compile_definitions(liblzma PRIVATE HAVE_IMMINTRIN_H) + + # SSE2 intrinsics: + check_c_source_compiles(" + #include + int main(void) + { + __m128i x = { 0 }; + _mm_movemask_epi8(x); + return 0; + } + " + HAVE__MM_MOVEMASK_EPI8) + tuklib_add_definition_if(liblzma HAVE__MM_MOVEMASK_EPI8) + + # CLMUL intrinsic: + option(XZ_CLMUL_CRC "Use carryless multiplication for CRC \ +calculation (with runtime detection) if supported by the compiler" ON) + + if(XZ_CLMUL_CRC) + check_c_source_compiles(" + #include + #if defined(__e2k__) && __iset__ < 6 + # error + #endif + #if (defined(__GNUC__) || defined(__clang__)) \ + && !defined(__EDG__) + __attribute__((__target__(\"ssse3,sse4.1,pclmul\"))) + #endif + int main(void) + { + __m128i a = _mm_set_epi64x(1, 2); + a = _mm_clmulepi64_si128(a, a, 0); + return 0; + } + " + HAVE_USABLE_CLMUL) + tuklib_add_definition_if(liblzma HAVE_USABLE_CLMUL) + endif() +endif() + +# ARM64 C Language Extensions define CRC32 functions in arm_acle.h. +# These are supported by at least GCC and Clang which both need +# __attribute__((__target__("+crc"))), unless the needed compiler flags +# are used to support the CRC instruction. +option(XZ_ARM64_CRC32 "Use ARM64 CRC32 instructions (with runtime detection) \ +if supported by the compiler" ON) + +if(XZ_ARM64_CRC32) + check_c_source_compiles(" + #include + + #ifndef _MSC_VER + #include + #endif + + #if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__) + __attribute__((__target__(\"+crc\"))) + #endif + int main(void) + { + return __crc32d(1, 2) != 0; + } + " + HAVE_ARM64_CRC32) + + if(HAVE_ARM64_CRC32) + target_compile_definitions(liblzma PRIVATE HAVE_ARM64_CRC32) + + # Check for ARM64 CRC32 instruction runtime detection. + # getauxval() is supported on Linux. + check_symbol_exists(getauxval sys/auxv.h HAVE_GETAUXVAL) + tuklib_add_definition_if(liblzma HAVE_GETAUXVAL) + + # With getauxval() we also need HWCAP_CRC32 which was + # added in glibc 2.24. + if(HAVE_GETAUXVAL) + check_symbol_exists(HWCAP_CRC32 sys/auxv.h HAVE_HWCAP_CRC32) + tuklib_add_definition_if(liblzma HAVE_HWCAP_CRC32) + endif() + + # elf_aux_info() is supported on FreeBSD and OpenBSD >= 7.6. + check_symbol_exists(elf_aux_info sys/auxv.h HAVE_ELF_AUX_INFO) + tuklib_add_definition_if(liblzma HAVE_ELF_AUX_INFO) + + # sysctlbyname("hw.optional.armv8_crc32", ...) is supported on Darwin + # (macOS, iOS, etc.). Note that sysctlbyname() is supported on FreeBSD, + # NetBSD, and possibly others too but the string is specific to + # Apple OSes. The C code is responsible for checking + # defined(__APPLE__) before using + # sysctlbyname("hw.optional.armv8_crc32", ...). + check_symbol_exists(sysctlbyname sys/sysctl.h HAVE_SYSCTLBYNAME) + tuklib_add_definition_if(liblzma HAVE_SYSCTLBYNAME) + endif() +endif() + +option(XZ_LOONGARCH_CRC32 + "Use LoongArch CRC32 instructions if supported by the compiler" ON) + +if(XZ_LOONGARCH_CRC32) + # LoongArch CRC32 intrinsics are in larchintrin.h. + # These are supported by at least GCC and Clang. + # + # Only 64-bit LoongArch is currently supported. + # It doesn't need runtime detection. + check_c_source_compiles(" + #if !(defined(__loongarch__) && __loongarch_grlen >= 64) + # error + #endif + + #include + int main(void) + { + return __crc_w_w_w(1, 2); + } + " + HAVE_LOONGARCH_CRC32) + tuklib_add_definition_if(liblzma HAVE_LOONGARCH_CRC32) +endif() + + +# Symbol visibility support: +# +# The C_VISIBILITY_PRESET property takes care of adding the compiler +# option -fvisibility=hidden (or equivalent) if and only if it is supported. +# +# HAVE_VISIBILITY should always be defined to 0 or 1. It tells liblzma +# if __attribute__((__visibility__("default"))) +# and __attribute__((__visibility__("hidden"))) are supported. +# Those are useful only when the compiler supports -fvisibility=hidden +# or such option so HAVE_VISIBILITY should be 1 only when both option and +# the attribute support are present. HAVE_VISIBILITY is ignored on Windows +# and Cygwin by the liblzma C code; __declspec(dllexport) is used instead. +# +# CMake's GenerateExportHeader module is too fancy since liblzma already +# has the necessary macros. Instead, check CMake's internal variable +# CMAKE_C_COMPILE_OPTIONS_VISIBILITY (it's the C-specific variant of +# CMAKE__COMPILE_OPTIONS_VISIBILITY) which contains the compiler +# command line option for visibility support. It's empty or unset when +# visibility isn't supported. (It was added to CMake 2.8.12 in the commit +# 0e9f4bc00c6b26f254e74063e4026ac33b786513 in 2013.) This way we don't +# set HAVE_VISIBILITY to 1 when visibility isn't actually supported. +if(BUILD_SHARED_LIBS AND CMAKE_C_COMPILE_OPTIONS_VISIBILITY) + set_target_properties(liblzma PROPERTIES C_VISIBILITY_PRESET hidden) + target_compile_definitions(liblzma PRIVATE HAVE_VISIBILITY=1) +else() + target_compile_definitions(liblzma PRIVATE HAVE_VISIBILITY=0) +endif() + +if(WIN32 OR CYGWIN) + if(BUILD_SHARED_LIBS) + # Add the Windows resource file for liblzma.dll. + target_sources(liblzma PRIVATE src/liblzma/liblzma_w32res.rc) + + set_source_files_properties(src/liblzma/liblzma_w32res.rc PROPERTIES + OBJECT_DEPENDS "${W32RES_DEPENDENCIES}" + ) + + # Export the public API symbols with __declspec(dllexport). + target_compile_definitions(liblzma PRIVATE DLL_EXPORT) + + if(NOT MSVC AND NOT CYGWIN) + # Create a DEF file. The Autotools-based build creates a DEF file + # under Cygwin & MSYS2 too but it almost certainly is a useless + # file in that context, so the CMake build omits it. + # + # The linker puts the ordinal numbers in the DEF file + # too so the output from the linker isn't our final file. + target_link_options(liblzma PRIVATE + "-Wl,--output-def,liblzma.def.in") + + # Remove the ordinal numbers from the DEF file so that + # no one will create an import library that links by ordinal + # instead of by name. We don't maintain a DEF file so the + # ordinal numbers aren't stable. + add_custom_command(TARGET liblzma POST_BUILD + COMMAND "${CMAKE_COMMAND}" + -DINPUT_FILE=liblzma.def.in + -DOUTPUT_FILE=liblzma.def + -P + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/remove-ordinals.cmake" + BYPRODUCTS "liblzma.def" + VERBATIM) + endif() + else() + # Disable __declspec(dllimport) when linking against static liblzma. + target_compile_definitions(liblzma INTERFACE LZMA_API_STATIC) + endif() +elseif(BUILD_SHARED_LIBS AND SYMBOL_VERSIONING STREQUAL "linux") + # Note that adding link options doesn't affect static builds + # but HAVE_SYMBOL_VERSIONS_LINUX must not be used with static builds + # because it would put symbol versions into the static library which + # can cause problems. It's clearer if all symver related things are + # omitted when not building a shared library. + # + # NOTE: Set it explicitly to 1 to make it clear that versioning is + # done unconditionally in the C files. + target_compile_definitions(liblzma PRIVATE HAVE_SYMBOL_VERSIONS_LINUX=1) + if(HAVE_LINKER_FLAG_UNDEFINED_VERSION) + target_link_options(liblzma PRIVATE "-Wl,--undefined-version") + endif() + target_link_options(liblzma PRIVATE + "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/src/liblzma/liblzma_linux.map" + ) + set_target_properties(liblzma PROPERTIES + LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/liblzma/liblzma_linux.map" + ) +elseif(BUILD_SHARED_LIBS AND SYMBOL_VERSIONING STREQUAL "generic") + if(HAVE_LINKER_FLAG_UNDEFINED_VERSION) + target_link_options(liblzma PRIVATE "-Wl,--undefined-version") + endif() + target_link_options(liblzma PRIVATE + "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/src/liblzma/liblzma_generic.map" + ) + set_target_properties(liblzma PROPERTIES + LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/liblzma/liblzma_generic.map" + ) +endif() + +# Calculate Libtool-compatible Mach-O versions for Apple OSes. +# Switching from CMake's or Meson's default Mach-O versioning style to +# GNU Libtool style is always a backward compatible change because the +# Libtool style always results in higher Mach-O version values. +# +# The other way would be a breaking change, and switching from Autotools +# to CMake used to result in "Incompatible library version" error from the +# dynamic linker ("dyld") when trying to run existing executables. This +# happened because the Mach-O current_version in the CMake-built library +# was less than the compatibility_version stored in the executable. +# +# Example: If on GNU/Linux one had libfoo.so.5.6.7, on macOS one would +# have libfoo.5.dylib containing the following Mach-O versions: +# +# compatibility_version current_version +# Libtool 12.0.0 12.7.0 +# CMake 5.0.0 5.6.7 +# Meson 5.0.0 5.0.0 +# +# Apple's docs say that the major version is encoded in the library filename, +# and the Mach-O version fields are for tracking backward compatible changes +# (minor versions). The default Mach-O versioning styles in CMake and +# Meson don't store the minor version in the compatibility_version though +# but Libtool does (using its own idiosyncratic encoding). In practice the +# lack of minor ABI version tracking doesn't matter much; it's the +# compatibility between the build systems that counts. +# +# It's unclear how much this matters in 2024. It might be that the +# dynamic linker in macOS >= 12 doesn't enforce the version checks. +# But this is a simple and safe change so it's fine to do it anyway. +# +# Apple docs: +# https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html +# +# This change was made in XZ Utils 5.7.1alpha. +# +# * * * * * +# +# At least for now the xz package versioning matches the rules used for +# shared library versioning (excluding development releases) so it is +# fine to use the package version when setting the liblzma ABI version. +math(EXPR LIBLZMA_MACHO_COMPATIBILITY_VERSION + "${xz_VERSION_MAJOR} + ${xz_VERSION_MINOR} + 1") +set(LIBLZMA_MACHO_CURRENT_VERSION + "${LIBLZMA_MACHO_COMPATIBILITY_VERSION}.${xz_VERSION_PATCH}") + +set_target_properties(liblzma PROPERTIES + SOVERSION "${xz_VERSION_MAJOR}" + VERSION "${xz_VERSION}" + MACHO_COMPATIBILITY_VERSION "${LIBLZMA_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBLZMA_MACHO_CURRENT_VERSION}" + + # The name liblzma a mess because in many places "lib" is just a prefix + # and not part of the actual name. (Don't name a new library this way!) + # Cygwin uses "cyg", MSYS2 uses "msys-", and some platforms use no prefix. + # However, we want to avoid lzma.dll on Windows as that would conflict + # with LZMA SDK. liblzma has been liblzma.dll on Windows since the + # beginning so try to stick with it. + # + # Up to XZ Utils 5.6.2 we set PREFIX and IMPORT_PREFIX properties to "" + # while keeping the default "liblzma" OUTPUT_NAME that was derived from + # the target name. But this broke naming on Cygwin and MSYS2. + # + # Setting OUTPUT_NAME without the "lib" prefix means that CMake will add + # the platform-specific prefix as needed. So on most systems CMake will + # add "lib" but on Cygwin and MSYS2 the naming will be correct too. + # + # On Windows, CMake uses the "lib" prefix with MinGW-w64 but not with + # other toolchains. Those need to be handled specially to get the DLL + # file named liblzma.dll instead of lzma.dll. + OUTPUT_NAME "lzma" +) + +if(WIN32 AND NOT MINGW) + # Up to XZ Utils 5.6.2 and building with MSVC, we produced liblzma.dll + # and liblzma.lib. The downside of liblzma.lib is that it's not + # compatible with pkgconf usage. liblzma.pc contains "-llzma" which + # "pkgconf --msvc-syntax --libs liblzma" converts to "lzma.lib". + # So as a compromise, we can keep the liblzma.dll name but the import + # library and static liblzma need to be named lzma.lib so that pkgconf + # can be used with MSVC. (MinGW-w64 finds both names with "-llzma".) + set_target_properties(liblzma PROPERTIES RUNTIME_OUTPUT_NAME "liblzma") +endif() + +# Create liblzma-config-version.cmake. +# +# NOTE: SameMajorVersion is correct for stable releases but it is wrong +# for development releases where each release may have incompatible changes. +include(CMakePackageConfigHelpers) +write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/liblzma-config-version.cmake" + VERSION "${xz_VERSION}" + COMPATIBILITY SameMajorVersion) + +# Create liblzma-config.cmake. We use this spelling instead of +# liblzmaConfig.cmake to make find_package work in case insensitive +# manner even with case sensitive file systems. This gives more consistent +# behavior between operating systems. This optionally includes a dependency +# on a threading library, so the contents are created in two separate parts. +# The "second half" is always needed, so create it first. +set(LZMA_CONFIG_CONTENTS +"include(\"\${CMAKE_CURRENT_LIST_DIR}/liblzma-targets.cmake\") + +if(NOT TARGET LibLZMA::LibLZMA) + # Be compatible with the spelling used by the FindLibLZMA module. This + # doesn't use ALIAS because it would make CMake resolve LibLZMA::LibLZMA + # to liblzma::liblzma instead of keeping the original spelling. Keeping + # the original spelling is important for good FindLibLZMA compatibility. + add_library(LibLZMA::LibLZMA INTERFACE IMPORTED) + set_target_properties(LibLZMA::LibLZMA PROPERTIES + INTERFACE_LINK_LIBRARIES liblzma::liblzma) +endif() +") + +if(USE_POSIX_THREADS) + set(LZMA_CONFIG_CONTENTS +"include(CMakeFindDependencyMacro) +set(THREADS_PREFER_PTHREAD_FLAG TRUE) +find_dependency(Threads) + +${LZMA_CONFIG_CONTENTS} +") +endif() + +file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/liblzma-config.cmake" + "${LZMA_CONFIG_CONTENTS}") + + +# Create liblzma.pc. If CMAKE_INSTALL_ paths are relative to +# CMAKE_INSTALL_PREFIX, the .pc file will be relocatable (that is, +# all paths will be relative to ${prefix}). Otherwise absolute +# paths will be used. +set(prefix "${CMAKE_INSTALL_PREFIX}") +set(exec_prefix "\${prefix}") +cmake_path(APPEND libdir "\${exec_prefix}" "${CMAKE_INSTALL_LIBDIR}") +cmake_path(APPEND includedir "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}") + +# Threads::Threads is linked in only when using POSIX threads. +# Use an empty value if using Windows threads or if threading is disabled. +set(PTHREAD_CFLAGS) +if(USE_POSIX_THREADS) + set(PTHREAD_CFLAGS "${CMAKE_THREAD_LIBS_INIT}") +endif() + +configure_file(src/liblzma/liblzma.pc.in liblzma.pc @ONLY) + + +# Install the library binary. The INCLUDES specifies the include path that +# is exported for other projects to use but it doesn't install any files. +install(TARGETS liblzma EXPORT liblzmaTargets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + COMPONENT liblzma_Runtime + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + COMPONENT liblzma_Runtime + NAMELINK_COMPONENT liblzma_Development + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" + COMPONENT liblzma_Development + INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") + +# Install the liblzma API headers. These use a subdirectory so +# this has to be done as a separate step. +install(DIRECTORY src/liblzma/api/ + COMPONENT liblzma_Development + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" + FILES_MATCHING PATTERN "*.h") + +# Install the CMake files that other packages can use to find liblzma. +set(XZ_INSTALL_CMAKEDIR + "${CMAKE_INSTALL_LIBDIR}/cmake/liblzma" + CACHE STRING "Path to liblzma's .cmake files") + +install(EXPORT liblzmaTargets + NAMESPACE liblzma:: + FILE liblzma-targets.cmake + DESTINATION "${XZ_INSTALL_CMAKEDIR}" + COMPONENT liblzma_Development) + +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/liblzma-config.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/liblzma-config-version.cmake" + DESTINATION "${XZ_INSTALL_CMAKEDIR}" + COMPONENT liblzma_Development) + +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/liblzma.pc" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig" + COMPONENT liblzma_Development) + + +############################################################################# +# Helper functions for installing files +############################################################################# + +# For each non-empty element in the list LINK_NAMES, creates symbolic links +# ${LINK_NAME}${LINK_SUFFIX} -> ${TARGET_NAME} in the directory ${DIR}. +# The target file should exist because on Cygwin and MSYS2 symlink creation +# can fail under certain conditions if the target doesn't exist. +function(my_install_symlinks COMPONENT DIR TARGET_NAME LINK_SUFFIX LINK_NAMES) + install(CODE "set(D \"\$ENV{DESTDIR}\${CMAKE_INSTALL_PREFIX}/${DIR}\") + foreach(L ${LINK_NAMES}) + file(CREATE_LINK \"${TARGET_NAME}\" + \"\${D}/\${L}${LINK_SUFFIX}\" + SYMBOLIC) + endforeach()" + COMPONENT "${COMPONENT}") +endfunction() + +# Installs a man page file of a given language ("" for the untranslated file) +# and optionally its alternative names as symlinks. This is a helper function +# for my_install_man() below. +function(my_install_man_lang COMPONENT SRC_FILE MAN_LANG LINK_NAMES) + # Get the man page section from the filename suffix. + string(REGEX REPLACE "^.*\.([^/.]+)$" "\\1" MAN_SECTION "${SRC_FILE}") + + # A few man pages might be missing from translations. + # Don't attempt to install them or create the related symlinks. + if(NOT MAN_LANG STREQUAL "" AND NOT EXISTS "${SRC_FILE}") + return() + endif() + + # Installing the file must be done before creating the symlinks + # due to Cygwin and MSYS2. + install(FILES "${SRC_FILE}" + DESTINATION "${CMAKE_INSTALL_MANDIR}/${MAN_LANG}/man${MAN_SECTION}" + COMPONENT "${COMPONENT}") + + # Get the basename of the file to be used as the symlink target. + get_filename_component(BASENAME "${SRC_FILE}" NAME) + + # LINK_NAMES don't contain the man page filename suffix (like ".1") + # so it needs to be told to my_install_symlinks. + my_install_symlinks("${COMPONENT}" + "${CMAKE_INSTALL_MANDIR}/${MAN_LANG}/man${MAN_SECTION}" + "${BASENAME}" ".${MAN_SECTION}" "${LINK_NAMES}") +endfunction() + +# Installs a man page file and optionally its alternative names as symlinks. +# Does the same for translations if XZ_NLS. +function(my_install_man COMPONENT SRC_FILE LINK_NAMES) + my_install_man_lang("${COMPONENT}" "${SRC_FILE}" "" "${LINK_NAMES}") + + if(XZ_NLS) + # Find the translated versions of this man page. + get_filename_component(BASENAME "${SRC_FILE}" NAME) + file(GLOB MAN_FILES "po4a/man/*/${BASENAME}") + + foreach(F ${MAN_FILES}) + get_filename_component(MAN_LANG "${F}" DIRECTORY) + get_filename_component(MAN_LANG "${MAN_LANG}" NAME) + my_install_man_lang("${COMPONENT}" "${F}" "${MAN_LANG}" + "${LINK_NAMES}") + endforeach() + endif() +endfunction() + + +############################################################################# +# libgnu (getopt_long) +############################################################################# + +# This mirrors how the Autotools build system handles the getopt_long +# replacement, calling the object library libgnu since the replacement +# version comes from Gnulib. +add_library(libgnu OBJECT) + +# CMake requires that even an object library must have at least once source +# file. So we give it a header file that results in no output files. +# +# NOTE: Using a file outside the lib directory makes it possible to +# delete lib/*.h and lib/*.c and still keep the build working if +# getopt_long replacement isn't needed. It's convenient if one wishes +# to be certain that no GNU LGPL code gets included in the binaries. +target_sources(libgnu PRIVATE src/common/sysdefs.h) + +# The Ninja Generator requires setting the linker language since it cannot +# guess the programming language of just a header file. Setting this +# property avoids needing an empty .c file or an non-empty unnecessary .c +# file. +set_target_properties(libgnu PROPERTIES LINKER_LANGUAGE C) + +# Create /lib directory in the build directory and add it to the include path. +file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/lib") +target_include_directories(libgnu PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/lib") + +# Include /lib from the source directory. It does no harm even if none of +# the Gnulib replacements are used. +target_include_directories(libgnu PUBLIC lib) + +# The command line tools need getopt_long in order to parse arguments. If +# the system does not have a getopt_long implementation we can use the one +# from Gnulib instead. +check_symbol_exists(getopt_long getopt.h HAVE_GETOPT_LONG) + +if(NOT HAVE_GETOPT_LONG) + # Set the __GETOPT_PREFIX definition to "rpl_" (replacement) to avoid + # name conflicts with libc symbols. The same prefix is set if using + # the Autotools build (m4/getopt.m4). + target_compile_definitions(libgnu PUBLIC "__GETOPT_PREFIX=rpl_") + + # Copy the getopt header to the build directory and re-copy it + # if it is updated. (Gnulib does it this way because it allows + # choosing which .in.h files to actually use in the build. We + # need just getopt.h so this is a bit overcomplicated for + # a single header file only.) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lib/getopt.in.h" + "${CMAKE_CURRENT_BINARY_DIR}/lib/getopt.h" + COPYONLY) + + target_sources(libgnu PRIVATE + lib/getopt1.c + lib/getopt.c + lib/getopt_int.h + lib/getopt-cdefs.h + lib/getopt-core.h + lib/getopt-ext.h + lib/getopt-pfx-core.h + lib/getopt-pfx-ext.h + "${CMAKE_CURRENT_BINARY_DIR}/lib/getopt.h" + ) +endif() + + +############################################################################# +# Sandboxing for the command line tools +############################################################################# + +# auto Use sandboxing if a supported method is available in the OS. +# no Disable sandboxing. +# capsicum Require Capsicum (FreeBSD >= 10.2) and fail if not found. +# pledge Require pledge(2) (OpenBSD >= 5.9) and fail if not found. +# landlock Require Landlock (Linux >= 5.13) and fail if not found. +set(SUPPORTED_SANDBOX_METHODS auto no capsicum pledge landlock) + +set(XZ_SANDBOX auto CACHE STRING + "Sandboxing method to use in 'xz', 'xzdec', and 'lzmadec'") + +set_property(CACHE XZ_SANDBOX PROPERTY STRINGS "${SUPPORTED_SANDBOX_METHODS}") + +if(NOT XZ_SANDBOX IN_LIST SUPPORTED_SANDBOX_METHODS) + message(FATAL_ERROR "'${XZ_SANDBOX}' is not a supported " + "sandboxing method") +endif() + +# When autodetecting, the search order is fixed and we must not find +# more than one method. +if(XZ_SANDBOX STREQUAL "no") + set(SANDBOX_FOUND ON) +else() + set(SANDBOX_FOUND OFF) +endif() + +# Since xz and xzdec can both use sandboxing, the compile definition needed +# to use the sandbox must be added to both targets. +set(SANDBOX_COMPILE_DEFINITION OFF) + +# Sandboxing: Capsicum +if(NOT SANDBOX_FOUND AND XZ_SANDBOX MATCHES "^auto$|^capsicum$") + check_symbol_exists(cap_rights_limit sys/capsicum.h + HAVE_CAP_RIGHTS_LIMIT) + if(HAVE_CAP_RIGHTS_LIMIT) + set(SANDBOX_COMPILE_DEFINITION "HAVE_CAP_RIGHTS_LIMIT") + set(SANDBOX_FOUND ON) + endif() +endif() + +# Sandboxing: pledge(2) +if(NOT SANDBOX_FOUND AND XZ_SANDBOX MATCHES "^auto$|^pledge$") + check_symbol_exists(pledge unistd.h HAVE_PLEDGE) + if(HAVE_PLEDGE) + set(SANDBOX_COMPILE_DEFINITION "HAVE_PLEDGE") + set(SANDBOX_FOUND ON) + endif() +endif() + +# Sandboxing: Landlock +if(NOT SANDBOX_FOUND AND XZ_SANDBOX MATCHES "^auto$|^landlock$") + # A compile check is done here because some systems have + # linux/landlock.h, but do not have the syscalls defined + # in order to actually use Linux Landlock. + check_c_source_compiles(" + #include + #include + #include + + int main(void) + { + (void)prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + (void)SYS_landlock_create_ruleset; + (void)SYS_landlock_restrict_self; + (void)LANDLOCK_CREATE_RULESET_VERSION; + return 0; + } + " + HAVE_LINUX_LANDLOCK) + + if(HAVE_LINUX_LANDLOCK) + set(SANDBOX_COMPILE_DEFINITION "HAVE_LINUX_LANDLOCK") + set(SANDBOX_FOUND ON) + + # Of our three sandbox methods, only Landlock is incompatible + # with -fsanitize. FreeBSD 13.2 with Capsicum was tested with + # -fsanitize=address,undefined and had no issues. OpenBSD (as + # of version 7.4) has minimal support for process instrumentation. + # OpenBSD does not distribute the additional libraries needed + # (libasan, libubsan, etc.) with GCC or Clang needed for runtime + # sanitization support and instead only support + # -fsanitize-minimal-runtime for minimal undefined behavior + # sanitization. This minimal support is compatible with our use + # of the Pledge sandbox. So only Landlock will result in a + # build that cannot compress or decompress a single file to + # standard out. + if(CMAKE_C_FLAGS MATCHES "-fsanitize=") + message(SEND_ERROR + "CMAKE_C_FLAGS or the environment variable CFLAGS " + "contains '-fsanitize=' which is incompatible " + "with Landlock sandboxing. Use -DXZ_SANDBOX=no " + "as an argument to 'cmake' when using '-fsanitize'.") + endif() + endif() +endif() + +if(NOT SANDBOX_FOUND AND NOT XZ_SANDBOX MATCHES "^auto$|^no$") + message(SEND_ERROR "XZ_SANDBOX=${XZ_SANDBOX} was used but " + "support for the sandboxing method wasn't found.") +endif() + + +############################################################################# +# xzdec and lzmadec +############################################################################# + +option(XZ_TOOL_XZDEC "Build and install the xzdec command line tool" ON) +option(XZ_TOOL_LZMADEC "Build and install the lzmadec command line tool" ON) + +if(HAVE_DECODERS) + set(XZDEC_TOOLS) + + if(XZ_TOOL_XZDEC) + list(APPEND XZDEC_TOOLS xzdec) + endif() + + if(XZ_TOOL_LZMADEC) + list(APPEND XZDEC_TOOLS lzmadec) + endif() + + foreach(XZDEC ${XZDEC_TOOLS}) + add_executable("${XZDEC}" + src/common/my_landlock.h + src/common/sysdefs.h + src/common/tuklib_common.h + src/common/tuklib_config.h + src/common/tuklib_mbstr_nonprint.c + src/common/tuklib_mbstr_nonprint.h + src/common/tuklib_exit.c + src/common/tuklib_exit.h + src/common/tuklib_gettext.h + src/common/tuklib_progname.c + src/common/tuklib_progname.h + src/xzdec/xzdec.c + ) + + target_include_directories("${XZDEC}" PRIVATE + src/common + src/liblzma/api + ) + + target_link_libraries("${XZDEC}" PRIVATE liblzma libgnu) + + if(WIN32 OR CYGWIN) + # Add the Windows resource file for xzdec.exe or lzmadec.exe. + target_sources("${XZDEC}" PRIVATE "src/xzdec/${XZDEC}_w32res.rc") + set_source_files_properties( + "src/xzdec/${XZDEC}_w32res.rc" PROPERTIES + OBJECT_DEPENDS "${W32RES_DEPENDENCIES}" + ) + endif() + + if(SANDBOX_COMPILE_DEFINITION) + target_compile_definitions("${XZDEC}" PRIVATE + "${SANDBOX_COMPILE_DEFINITION}") + endif() + + tuklib_progname("${XZDEC}") + tuklib_mbstr("${XZDEC}") + + install(TARGETS "${XZDEC}" + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + COMPONENT "${XZDEC}_Runtime") + endforeach() + + if(XZ_TOOL_LZMADEC) + # This is the only build-time difference with lzmadec. + target_compile_definitions(lzmadec PRIVATE "LZMADEC") + endif() + + if(UNIX AND XZ_TOOL_XZDEC) + # NOTE: This puts the lzmadec.1 symlinks into xzdec_Documentation. + # This isn't great but doing them separately with translated + # man pages would require extra code. So this has to suffice for now. + # + # Also, if xzdec is disabled but lzmadec isn't, then the man page + # isn't installed at all. It could be done but it's not a typical + # situation so let's keep this simpler. + if(XZ_TOOL_LZMADEC) + my_install_man(xzdec_Documentation src/xzdec/xzdec.1 lzmadec) + else() + my_install_man(xzdec_Documentation src/xzdec/xzdec.1 "") + endif() + endif() +endif() + + +############################################################################# +# lzmainfo +############################################################################# + +option(XZ_TOOL_LZMAINFO "Build and install the lzmainfo command line tool" ON) + +if(XZ_TOOL_LZMAINFO AND HAVE_DECODERS) + add_executable(lzmainfo + src/common/sysdefs.h + src/common/tuklib_common.h + src/common/tuklib_config.h + src/common/tuklib_mbstr.h + src/common/tuklib_mbstr_nonprint.c + src/common/tuklib_mbstr_nonprint.h + src/common/tuklib_mbstr_width.c + src/common/tuklib_mbstr_wrap.c + src/common/tuklib_mbstr_wrap.h + src/common/tuklib_exit.c + src/common/tuklib_exit.h + src/common/tuklib_gettext.h + src/common/tuklib_progname.c + src/common/tuklib_progname.h + src/lzmainfo/lzmainfo.c + ) + + target_include_directories(lzmainfo PRIVATE + src/common + src/liblzma/api + ) + + target_link_libraries(lzmainfo PRIVATE liblzma libgnu) + + if(WIN32 OR CYGWIN) + # Add the Windows resource file for lzmainfo.exe. + target_sources(lzmainfo PRIVATE src/lzmainfo/lzmainfo_w32res.rc) + set_source_files_properties(src/lzmainfo/lzmainfo_w32res.rc PROPERTIES + OBJECT_DEPENDS "${W32RES_DEPENDENCIES}" + ) + endif() + + tuklib_progname(lzmainfo) + tuklib_mbstr(lzmainfo) + + # NOTE: The translations are in the "xz" domain and the .mo files are + # installed as part of the "xz" target. + if(XZ_NLS) + target_link_libraries(lzmainfo PRIVATE Intl::Intl) + + target_compile_definitions(lzmainfo PRIVATE + ENABLE_NLS + PACKAGE="${TRANSLATION_DOMAIN}" + LOCALEDIR="${LOCALEDIR_DEFINITION}" + ) + endif() + + install(TARGETS lzmainfo + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + COMPONENT lzmainfo_Runtime) + + if(UNIX) + my_install_man(lzmainfo_Documentation src/lzmainfo/lzmainfo.1 "") + endif() +endif() + + +############################################################################# +# xz +############################################################################# + +option(XZ_TOOL_XZ "Build and install the xz command line tool" ON) + +if(XZ_TOOL_XZ) + add_executable(xz + src/common/my_landlock.h + src/common/mythread.h + src/common/sysdefs.h + src/common/tuklib_common.h + src/common/tuklib_config.h + src/common/tuklib_mbstr_nonprint.c + src/common/tuklib_mbstr_nonprint.h + src/common/tuklib_exit.c + src/common/tuklib_exit.h + src/common/tuklib_gettext.h + src/common/tuklib_integer.h + src/common/tuklib_mbstr.h + src/common/tuklib_mbstr_fw.c + src/common/tuklib_mbstr_width.c + src/common/tuklib_mbstr_wrap.c + src/common/tuklib_mbstr_wrap.h + src/common/tuklib_open_stdxxx.c + src/common/tuklib_open_stdxxx.h + src/common/tuklib_progname.c + src/common/tuklib_progname.h + src/xz/args.c + src/xz/args.h + src/xz/coder.c + src/xz/coder.h + src/xz/file_io.c + src/xz/file_io.h + src/xz/hardware.c + src/xz/hardware.h + src/xz/main.c + src/xz/main.h + src/xz/message.c + src/xz/message.h + src/xz/mytime.c + src/xz/mytime.h + src/xz/options.c + src/xz/options.h + src/xz/private.h + src/xz/sandbox.c + src/xz/sandbox.h + src/xz/signals.c + src/xz/signals.h + src/xz/suffix.c + src/xz/suffix.h + src/xz/util.c + src/xz/util.h + ) + + target_include_directories(xz PRIVATE + src/common + src/liblzma/api + ) + + if(HAVE_DECODERS) + target_sources(xz PRIVATE + src/xz/list.c + src/xz/list.h + ) + endif() + + target_link_libraries(xz PRIVATE liblzma libgnu) + + if(USE_POSIX_THREADS) + # src/xz/signals.c uses mythread_sigmask() which with POSIX + # threads calls pthread_sigmask(). Thus, we need the threading + # library as a dependency for xz. The liblzma target links against + # Threads::Threads PRIVATEly, thus that won't provide the pthreads + # symbols for xz. + # + # NOTE: The build may work without this if the symbol is in libc + # but it is mandatory to have this here to keep it working with + # all pthread implementations. + target_link_libraries(xz PRIVATE Threads::Threads) + endif() + + set(XZ_ASSUME_RAM "128" CACHE STRING "Assume that the system has \ +this many MiB of RAM if xz cannot determine the amount at runtime") + target_compile_definitions(xz PRIVATE "ASSUME_RAM=${XZ_ASSUME_RAM}") + + if(WIN32 OR CYGWIN) + # Add the Windows resource file for xz.exe. + target_sources(xz PRIVATE src/xz/xz_w32res.rc) + set_source_files_properties(src/xz/xz_w32res.rc PROPERTIES + OBJECT_DEPENDS "${W32RES_DEPENDENCIES}" + ) + endif() + + if(SANDBOX_COMPILE_DEFINITION) + target_compile_definitions(xz PRIVATE "${SANDBOX_COMPILE_DEFINITION}") + endif() + + tuklib_progname(xz) + tuklib_mbstr(xz) + + if(HAVE_GETOPT_LONG) + check_symbol_exists(optreset getopt.h HAVE_OPTRESET) + tuklib_add_definition_if(xz HAVE_OPTRESET) + endif() + + check_symbol_exists(getrlimit sys/resource.h HAVE_GETRLIMIT) + tuklib_add_definition_if(xz HAVE_GETRLIMIT) + + check_symbol_exists(posix_fadvise fcntl.h HAVE_POSIX_FADVISE) + tuklib_add_definition_if(xz HAVE_POSIX_FADVISE) + + # How to get file time: + check_struct_has_member("struct stat" st_atim.tv_nsec + "sys/types.h;sys/stat.h" + HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) + if(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) + tuklib_add_definitions(xz HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) + else() + check_struct_has_member("struct stat" st_atimespec.tv_nsec + "sys/types.h;sys/stat.h" + HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) + if(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) + tuklib_add_definitions(xz HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) + else() + check_struct_has_member("struct stat" st_atimensec + "sys/types.h;sys/stat.h" + HAVE_STRUCT_STAT_ST_ATIMENSEC) + tuklib_add_definition_if(xz HAVE_STRUCT_STAT_ST_ATIMENSEC) + endif() + endif() + + # How to set file time: + check_symbol_exists(futimens "sys/types.h;sys/stat.h" HAVE_FUTIMENS) + if(HAVE_FUTIMENS) + tuklib_add_definitions(xz HAVE_FUTIMENS) + else() + check_symbol_exists(futimes "sys/time.h" HAVE_FUTIMES) + if(HAVE_FUTIMES) + tuklib_add_definitions(xz HAVE_FUTIMES) + else() + check_symbol_exists(futimesat "sys/time.h" HAVE_FUTIMESAT) + if(HAVE_FUTIMESAT) + tuklib_add_definitions(xz HAVE_FUTIMESAT) + else() + check_symbol_exists(utimes "sys/time.h" HAVE_UTIMES) + if(HAVE_UTIMES) + tuklib_add_definitions(xz HAVE_UTIMES) + else() + check_symbol_exists(_futime "sys/utime.h" HAVE__FUTIME) + if(HAVE__FUTIME) + tuklib_add_definitions(xz HAVE__FUTIME) + else() + check_symbol_exists(utime "utime.h" HAVE_UTIME) + tuklib_add_definition_if(xz HAVE_UTIME) + endif() + endif() + endif() + endif() + endif() + + if(XZ_NLS) + target_link_libraries(xz PRIVATE Intl::Intl) + + target_compile_definitions(xz PRIVATE + ENABLE_NLS + PACKAGE="${TRANSLATION_DOMAIN}" + LOCALEDIR="${LOCALEDIR_DEFINITION}" + ) + + file(STRINGS po/LINGUAS LINGUAS) + + # NOTE: gettext_process_po_files' INSTALL_DESTINATION is + # incompatible with how Autotools requires the .po files to + # be named. CMake would require each .po file to be named with + # the translation domain and thus each .po file would need its + # own language-specific directory (like "po/fi/xz.po"). On top + # of this, INSTALL_DESTINATION doesn't allow specifying COMPONENT + # and thus the .mo files go into "Unspecified" component. So we + # can use gettext_process_po_files to convert the .po files but + # installation needs to be done with our own code. + # + # Also, the .gmo files will go to root of the build directory + # instead of neatly into a subdirectory. This is hardcoded in + # CMake's FindGettext.cmake. + foreach(LANG IN LISTS LINGUAS) + gettext_process_po_files("${LANG}" ALL + PO_FILES "${CMAKE_CURRENT_SOURCE_DIR}/po/${LANG}.po") + endforeach() + + foreach(LANG IN LISTS LINGUAS) + install( + FILES "${CMAKE_CURRENT_BINARY_DIR}/${LANG}.gmo" + DESTINATION "${CMAKE_INSTALL_LOCALEDIR}/${LANG}/LC_MESSAGES" + RENAME "${TRANSLATION_DOMAIN}.mo" + COMPONENT xz_Runtime) + endforeach() + endif() + + # This command must be before the symlink creation to keep things working + # on Cygwin and MSYS2 in all cases. + # + # - Cygwin can encode symlinks in multiple ways. This can be + # controlled via the environment variable "CYGWIN". If it contains + # "winsymlinks:nativestrict" then symlink creation will fail if + # the link target doesn't exist. This mode isn't the default though. + # See: https://cygwin.com/faq.html#faq.api.symlinks + # + # - MSYS2 supports the same winsymlinks option in the environment + # variable "MSYS" (not "MSYS2). The default in MSYS2 is to make + # a copy of the file instead of any kind of symlink. Thus the link + # target must exist or the creation of the "symlink" (copy) will fail. + # + # Our installation order must be such that when a symbolic link is created + # its target must already exists. There is no race condition for parallel + # builds because the generated cmake_install.cmake executes serially. + install(TARGETS xz + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + COMPONENT xz_Runtime) + + if(UNIX) + option(XZ_TOOL_SYMLINKS "Create unxz and xzcat symlinks" ON) + option(XZ_TOOL_SYMLINKS_LZMA + "Create 'lzma' and other symlinks for LZMA Utils compatibility" + ON) + set(XZ_LINKS) + + if(XZ_TOOL_SYMLINKS) + list(APPEND XZ_LINKS "unxz" "xzcat") + endif() + + if(XZ_TOOL_SYMLINKS_LZMA) + list(APPEND XZ_LINKS "lzma" "unlzma" "lzcat") + endif() + + # On Cygwin, don't add the .exe suffix to the symlinks. + # + # FIXME? Does this make sense on MSYS & MSYS2 where "ln -s" + # by default makes copies? Inside MSYS & MSYS2 it is possible + # to execute files without the .exe suffix but not outside + # (like in Command Prompt). Omitting the suffix matches + # what configure.ac has done for many years though. + my_install_symlinks(xz_Runtime "${CMAKE_INSTALL_BINDIR}" + "xz${CMAKE_EXECUTABLE_SUFFIX}" "" "${XZ_LINKS}") + + # Install the man pages and (optionally) their symlinks + # and translations. + my_install_man(xz_Documentation src/xz/xz.1 "${XZ_LINKS}") + endif() +endif() + + +############################################################################# +# Scripts +############################################################################# + +set(ENABLE_SCRIPTS OFF) + +if(UNIX) + # NOTE: These depend on the xz tool and decoder support. + option(XZ_TOOL_SCRIPTS "Install the scripts \ +xzdiff, xzgrep, xzmore, xzless, and their symlinks" ON) + + if(XZ_TOOL_SCRIPTS AND XZ_TOOL_XZ AND HAVE_DECODERS) + set(ENABLE_SCRIPTS ON) + endif() + + # NOTE: This isn't as sophisticated as in the Autotools build which + # uses posix-shell.m4 but hopefully this doesn't need to be either. + # CMake likely won't be used on as many (old) obscure systems as the + # Autotools-based builds are. + if(CMAKE_SYSTEM_NAME STREQUAL "SunOS" AND EXISTS "/usr/xpg4/bin/sh") + set(POSIX_SHELL_DEFAULT "/usr/xpg4/bin/sh") + else() + set(POSIX_SHELL_DEFAULT "/bin/sh") + endif() + + set(XZ_POSIX_SHELL "${POSIX_SHELL_DEFAULT}" CACHE STRING + "Shell to use for scripts (xzgrep and others)") + + # Guess the extra path to add from XZ_POSIX_SHELL. Autotools-based build + # has a separate option --enable-path-for-scripts=PREFIX but this is + # enough for Solaris. + set(enable_path_for_scripts) + get_filename_component(POSIX_SHELL_DIR "${XZ_POSIX_SHELL}" DIRECTORY) + + if(NOT POSIX_SHELL_DIR MATCHES "^/bin$|^/usr/bin$") + set(enable_path_for_scripts "PATH=${POSIX_SHELL_DIR}:\$PATH") + endif() + + set(XZDIFF_LINKS xzcmp) + set(XZGREP_LINKS xzegrep xzfgrep) + set(XZMORE_LINKS) + set(XZLESS_LINKS) + + if(XZ_TOOL_SYMLINKS_LZMA) + list(APPEND XZDIFF_LINKS lzdiff lzcmp) + list(APPEND XZGREP_LINKS lzgrep lzegrep lzfgrep) + list(APPEND XZMORE_LINKS lzmore) + list(APPEND XZLESS_LINKS lzless) + endif() + + set(xz "xz") + set(POSIX_SHELL "${XZ_POSIX_SHELL}") + + foreach(S xzdiff xzgrep xzmore xzless) + configure_file("src/scripts/${S}.in" "${S}" + @ONLY + NEWLINE_STYLE LF + FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE) + + if(ENABLE_SCRIPTS) + install(PROGRAMS "${CMAKE_CURRENT_BINARY_DIR}/${S}" + DESTINATION "${CMAKE_INSTALL_BINDIR}" + COMPONENT scripts_Runtime) + endif() + endforeach() + + unset(xz) + unset(POSIX_SHELL) + unset(enable_path_for_scripts) + + if(ENABLE_SCRIPTS) + my_install_symlinks(scripts_Runtime "${CMAKE_INSTALL_BINDIR}" + xzdiff "" "${XZDIFF_LINKS}") + + my_install_symlinks(scripts_Runtime "${CMAKE_INSTALL_BINDIR}" + xzgrep "" "${XZGREP_LINKS}") + + my_install_symlinks(scripts_Runtime "${CMAKE_INSTALL_BINDIR}" + xzmore "" "${XZMORE_LINKS}") + + my_install_symlinks(scripts_Runtime "${CMAKE_INSTALL_BINDIR}" + xzless "" "${XZLESS_LINKS}") + + my_install_man(scripts_Documentation + src/scripts/xzdiff.1 "${XZDIFF_LINKS}") + + my_install_man(scripts_Documentation + src/scripts/xzgrep.1 "${XZGREP_LINKS}") + + my_install_man(scripts_Documentation + src/scripts/xzmore.1 "${XZMORE_LINKS}") + + my_install_man(scripts_Documentation + src/scripts/xzless.1 "${XZLESS_LINKS}") + endif() +endif() + + +############################################################################# +# Documentation +############################################################################# + +if(UNIX) + option(XZ_DOXYGEN "Use Doxygen to generate liblzma API docs" OFF) + + if (XZ_DOXYGEN) + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/doc") + + add_custom_command( + VERBATIM + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/doxygen/update-doxygen" + ARGS "api" + "${CMAKE_CURRENT_SOURCE_DIR}" + "${CMAKE_CURRENT_BINARY_DIR}/doc" + OUTPUT doc/api/index.html + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/doxygen/update-doxygen" + "${CMAKE_CURRENT_SOURCE_DIR}/doxygen/Doxyfile" + ${LIBLZMA_API_HEADERS} + ) + + add_custom_target( + liblzma-doc-api ALL + DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/doc/api/index.html" + ) + + install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/doc/api" + DESTINATION "${CMAKE_INSTALL_DOCDIR}" + COMPONENT liblzma_Documentation) + endif() +endif() + +option(XZ_DOC "Install basic documentation, examples, and license files" ON) +if(XZ_DOC) + install(DIRECTORY doc/examples + DESTINATION "${CMAKE_INSTALL_DOCDIR}" + COMPONENT liblzma_Documentation) + + # GPLv2 applies to the scripts. If GNU getopt_long is used then + # LGPLv2.1 applies to the command line tools but, using the + # section 3 of LGPLv2.1, GNU getopt_long can be handled as GPLv2 too. + # Thus GPLv2 should be enough here. + install(FILES AUTHORS + COPYING + COPYING.0BSD + COPYING.GPLv2 + NEWS + README + THANKS + doc/faq.txt + doc/history.txt + doc/lzma-file-format.txt + doc/xz-file-format.txt + DESTINATION "${CMAKE_INSTALL_DOCDIR}" + COMPONENT Documentation) +endif() + + +############################################################################# +# Tests +############################################################################# + +# Tests are in a separate file so that it's possible to delete the whole +# "tests" directory and still have a working build, just without the tests. +include(tests/tests.cmake OPTIONAL) diff --git a/src/native/external/xz/COPYING b/src/native/external/xz/COPYING new file mode 100644 index 00000000000000..ef3371389d7d45 --- /dev/null +++ b/src/native/external/xz/COPYING @@ -0,0 +1,70 @@ + +XZ Utils Licensing +================== + + Different licenses apply to different files in this package. Here + is a summary of which licenses apply to which parts of this package: + + - liblzma is under the BSD Zero Clause License (0BSD). + + - The command line tools xz, xzdec, lzmadec, and lzmainfo are + under 0BSD except that, on systems that don't have a usable + getopt_long, GNU getopt_long is compiled and linked in from the + 'lib' directory. The getopt_long code is under GNU LGPLv2.1+. + + - The scripts to grep, diff, and view compressed files have been + adapted from GNU gzip. These scripts (xzgrep, xzdiff, xzless, + and xzmore) are under GNU GPLv2+. The man pages of the scripts + are under 0BSD; they aren't based on the man pages of GNU gzip. + + - Most of the XZ Utils specific documentation that is in + plain text files (like README, INSTALL, PACKAGERS, NEWS, + and ChangeLog) are under 0BSD unless stated otherwise in + the file itself. The files xz-file-format.txt and + lzma-file-format.xt are in the public domain but may + be distributed under the terms of 0BSD too. + + - Translated messages and man pages are under 0BSD except that + some old translations are in the public domain. + + - Test files and test code in the 'tests' directory, and + debugging utilities in the 'debug' directory are under + the BSD Zero Clause License (0BSD). + + - The GNU Autotools based build system contains files that are + under GNU GPLv2+, GNU GPLv3+, and a few permissive licenses. + These files don't affect the licensing of the binaries being + built. + + - The 'extra' directory contains files that are under various + free software licenses. These aren't built or installed as + part of XZ Utils. + + The following command may be helpful in finding per-file license + information. It works on xz.git and on a clean file tree extracted + from a release tarball. + + sh build-aux/license-check.sh -v + + For the files under the BSD Zero Clause License (0BSD), if + a copyright notice is needed, the following is sufficient: + + Copyright (C) The XZ Utils authors and contributors + + If you copy significant amounts of 0BSD-licensed code from XZ Utils + into your project, acknowledging this somewhere in your software is + polite (especially if it is proprietary, non-free software), but + it is not legally required by the license terms. Here is an example + of a good notice to put into "about box" or into documentation: + + This software includes code from XZ Utils . + + The following license texts are included in the following files: + - COPYING.0BSD: BSD Zero Clause License + - COPYING.LGPLv2.1: GNU Lesser General Public License version 2.1 + - COPYING.GPLv2: GNU General Public License version 2 + - COPYING.GPLv3: GNU General Public License version 3 + + If you have questions, don't hesitate to ask for more information. + The contact information is in the README file. + diff --git a/src/native/external/xz/COPYING.0BSD b/src/native/external/xz/COPYING.0BSD new file mode 100644 index 00000000000000..4322122aecf1ae --- /dev/null +++ b/src/native/external/xz/COPYING.0BSD @@ -0,0 +1,11 @@ +Permission to use, copy, modify, and/or distribute this +software for any purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL +THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR +CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, +NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/src/native/external/xz/COPYING.GPLv2 b/src/native/external/xz/COPYING.GPLv2 new file mode 100644 index 00000000000000..9efa6fbc962836 --- /dev/null +++ b/src/native/external/xz/COPYING.GPLv2 @@ -0,0 +1,338 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, see . + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Moe Ghoul, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/src/native/external/xz/COPYING.GPLv3 b/src/native/external/xz/COPYING.GPLv3 new file mode 100644 index 00000000000000..f288702d2fa16d --- /dev/null +++ b/src/native/external/xz/COPYING.GPLv3 @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/src/native/external/xz/COPYING.LGPLv2.1 b/src/native/external/xz/COPYING.LGPLv2.1 new file mode 100644 index 00000000000000..f6683e74e0f013 --- /dev/null +++ b/src/native/external/xz/COPYING.LGPLv2.1 @@ -0,0 +1,501 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Moe Ghoul, President of Vice + +That's all there is to it! diff --git a/src/native/external/xz/ChangeLog b/src/native/external/xz/ChangeLog new file mode 100644 index 00000000000000..a24858d941dd3d --- /dev/null +++ b/src/native/external/xz/ChangeLog @@ -0,0 +1,7 @@ +See the commit log in the git repository: + + git clone https://github.com/tukaani-project/xz + +Note that "make dist" doesn't put this tiny file into the package. +Instead, the git commit log is used as ChangeLog. See dist-hook in +Makefile.am for details. diff --git a/src/native/external/xz/INSTALL b/src/native/external/xz/INSTALL new file mode 100644 index 00000000000000..ec89047274659c --- /dev/null +++ b/src/native/external/xz/INSTALL @@ -0,0 +1,976 @@ + +XZ Utils Installation +===================== + + 0. Preface + 1. Supported platforms + 1.1. Compilers + 1.2. Platform-specific notes + 1.2.1. AIX + 1.2.2. IRIX + 1.2.3. MINIX 3 + 1.2.4. OpenVMS + 1.2.5. Solaris, OpenSolaris, and derivatives + 1.2.6. Tru64 + 1.2.7. Windows + 1.2.8. DOS + 1.2.9. z/OS + 1.3. Adding support for new platforms + 2. configure and CMake options + 2.1. Static vs. dynamic linking of liblzma + 2.2. Optimizing xzdec and lzmadec + 3. xzgrep and other scripts + 3.1. Dependencies + 3.2. PATH + 4. Tests + 4.1 Testing in parallel + 4.2 Cross compiling + 5. Troubleshooting + 5.1. "No C99 compiler was found." + 5.2. "No POSIX conforming shell (sh) was found." + 5.3. configure works but build fails at crc32_x86.S + 5.4. Lots of warnings about symbol visibility + 5.5. "make check" fails + 5.6. liblzma.so (or similar) not found when running xz + + +0. Preface +---------- + + If you aren't familiar with building packages that use GNU Autotools, + see the file INSTALL.generic for generic instructions before reading + further. + + If you are going to build a package for distribution, see also the + file PACKAGERS. It contains information that should help making the + binary packages as good as possible, but the information isn't very + interesting to those making local builds for private use or for use + in special situations like embedded systems. + + +1. Supported platforms +---------------------- + + XZ Utils are developed on GNU/Linux, but they should work on many + POSIX-like operating systems like *BSDs and Solaris, and even on + a few non-POSIX operating systems. + + +1.1. Compilers + + A C99 compiler is required to compile XZ Utils. If you use GCC, you + need at least version 3.x.x. GCC version 2.xx.x doesn't support some + C99 features used in XZ Utils source code, thus GCC 2 won't compile + XZ Utils. + + XZ Utils takes advantage of some GNU C extensions when building + with GCC. Because these extensions are used only when building + with GCC, it should be possible to use any C99 compiler. + + +1.2. Platform-specific notes + +1.2.1. AIX + + If you use IBM XL C compiler, pass CC=xlc_r to configure. If + you use CC=xlc instead, you must disable threading support + with --disable-threads (usually not recommended). + + If building a 32-bit executable, the address space available to xz + might be limited to 256 MiB by default. To increase the address + space to 2 GiB, pass LDFLAGS=-Wl,-bmaxdata:0x80000000 as an argument + to configure. + + +1.2.2. IRIX + + MIPSpro 7.4.4m has been reported to produce broken code if using + the -O2 optimization flag ("make check" fails). Using -O1 should + work. + + A problem has been reported when using shared liblzma. Passing + --disable-shared to configure works around this. Alternatively, + putting "-64" to CFLAGS to build a 64-bit version might help too. + + +1.2.3. MINIX 3 + + Version 3.3.0 and later are supported. + + Multithreading isn't supported because MINIX 3 doesn't have + pthreads. The option --disable-threads must be passed to configure + as this isn't autodetected. + + Note that disabling threads causes "make check" to show a few tests + as skipped ("SKIP"). It's only due to a few threading-dependent + subtests are skipped. See the matching tests/test_*.log files. + + +1.2.4. OpenVMS + + XZ Utils can be built for OpenVMS, but the build system files + are not included in the XZ Utils source package. The required + OpenVMS-specific files are maintained by Jouk Jansen and can be + downloaded here: + + http://nchrem.tnw.tudelft.nl/openvms/software2.html#xzutils + + +1.2.5. Solaris, OpenSolaris, and derivatives + + The following linker error has been reported on some x86 systems: + + ld: fatal: relocation error: R_386_GOTOFF: ... + + This can be worked around by passing gl_cv_cc_visibility=no + as an argument to the configure script. + + test_scripts.sh in "make check" may fail if good enough tools are + missing from PATH (/usr/xpg4/bin or /usr/xpg6/bin). Nowadays + /usr/xpg4/bin is added to the script PATH by default on Solaris + (see --enable-path-for-scripts=PREFIX in section 2), but old xz + releases needed extra steps. See sections 5.5 and 3.2 for more + information. + + +1.2.6. Tru64 + + If you try to use the native C compiler on Tru64 (passing CC=cc to + configure), you may need the workaround mention in section 5.1 in + this file (pass also ac_cv_prog_cc_c99= to configure). + + +1.2.7. Windows + + The "windows" directory contains instructions for a few types + of builds: + + - INSTALL-MinGW-w64_with_CMake.txt + Simple instructions how to build XZ Utils natively on + Windows using only CMake and a prebuilt toolchain + (GCC + MinGW-w64 or Clang/LLVM + MinGW-w64). + + - INSTALL-MinGW-w64_with_Autotools.txt + Native build under MSYS2 or cross-compilation from + GNU/Linux using a bash script that creates a .zip + and .7z archives of the binaries and documentation. + The related file README-Windows.txt is for the + resulting binary package. + + - INSTALL-MSVC.txt + Building with MSVC / Visual Studio and CMake. + + - liblzma-crt-mixing.txt + Documentation what to take into account as a programmer + if liblzma.dll and the application don't use the same + CRT (MSVCRT or UCRT). + + Other choices: + + - Cygwin: https://cygwin.com/ + Building on Cygwin can be done like on many POSIX operating + systems. XZ Utils >= 5.2.0 isn't compatible with Cygwin older + than 1.7.35 (data loss!). 1.7.35 was released on 2015-03-04. + + - MSYS2: https://www.msys2.org/ + + +1.2.8. DOS + + There is a Makefile in the "dos" directory to build XZ Utils on + DOS using DJGPP. Support for long file names (LFN) is needed at + build time but the resulting xz.exe works without LFN support too. + See dos/INSTALL.txt and dos/README.txt for more information. + + +1.2.9. z/OS + + To build XZ Utils on z/OS UNIX System Services using xlc, pass + these options to the configure script: CC='xlc -qhaltonmsg=CCN3296' + CPPFLAS='-D_UNIX03_THREADS -D_XOPEN_SOURCE=600'. The first makes + xlc throw an error if a header file is missing, which is required + to make the tests in configure work. The CPPFLAGS are needed to + get pthread support (some other CPPFLAGS may work too; if there + are problems, try -D_UNIX95_THREADS instead of -D_UNIX03_THREADS). + + test_scripts.sh in "make check" will fail even if the scripts + actually work because the test data includes compressed files + with US-ASCII text. + + No other tests should fail. If test_files.sh fails, check that + the included .xz test files weren't affected by EBCDIC conversion. + + XZ Utils doesn't have code to detect the amount of physical RAM and + number of CPU cores on z/OS. + + +1.3. Adding support for new platforms + + If you have written patches to make XZ Utils to work on previously + unsupported platform, please send the patches to me! I will consider + including them to the official version. It's nice to minimize the + need of third-party patching. + + One exception: Don't request or send patches to change the whole + source package to C89. I find C99 substantially nicer to write and + maintain. However, the public library headers must be in C89 to + avoid frustrating those who maintain programs, which are strictly + in C89 or C++. + + +2. configure and CMake options +------------------------------ + + In most cases, the defaults are what you want. Many of the options + below are useful only when building a size-optimized version of + liblzma or command line tools. + + configure options are those that begin with two dashes "--" + or "gl_". + + CMake options begin with "XZ_", "TUKLIB_", or "CMAKE_". To use + them on the command line, prefix them with "-D", for example, + "cmake -DCMAKE_COMPILE_WARNING_AS_ERROR=ON". + + CMAKE_BUILD_TYPE=TYPE + CMake only: + + For release builds, CMAKE_BUILD_TYPE=Release is fine. + On targets where CMake defaults to -O3, the default + value is overridden to -O2. + + Empty value (CMAKE_BUILD_TYPE=) is fine if using custom + optimization options. *In this package* the empty build + type also disables debugging code just like "Release" + does. To enable debugging code with empty build type, + use -UNDEBUG in the CFLAGS environment variable or in + the CMAKE_C_FLAGS CMake variable to override -DNDEBUG. + + Non-standard build types like "None" do NOT disable + debugging code! Such non-standard build types should + be avoided for production builds! + + --enable-encoders=LIST + --disable-encoders + XZ_ENCODERS=LIST + Specify a LIST of filter encoders to build. In the + configure option the list is comma separated. + CMake lists are semicolon separated. + + To see the exact list of available filter encoders: + + - Autotools: ./configure --help + + - CMake: Configure the tree normally first, then use + "cmake -LH ." to list the cache variables. + + The default is to build all supported encoders. + + If LIST is empty or --disable-encoders is used, no filter + encoders will be built and also the code shared between + encoders will be omitted. + + Disabling encoders will remove some symbols from the + liblzma ABI, so this option should be used only when it + is known to not cause problems. + + --enable-decoders=LIST + --disable-decoders + XZ_DECODERS=LIST + This is like --enable-encoders but for decoders. The + default is to build all supported decoders. + + --enable-match-finders=LIST + XZ_MATCH_FINDERS=LIST + liblzma includes two categories of match finders: + hash chains and binary trees. Hash chains (hc3 and hc4) + are quite fast but they don't provide the best compression + ratio. Binary trees (bt2, bt3 and bt4) give excellent + compression ratio, but they are slower and need more + memory than hash chains. + + You need to enable at least one match finder to build the + LZMA1 or LZMA2 filter encoders. Usually hash chains are + used only in the fast mode, while binary trees are used to + when the best compression ratio is wanted. + + The default is to build all the match finders if LZMA1 + or LZMA2 filter encoders are being built. + + --enable-checks=LIST + XZ_CHECKS=LIST + liblzma support multiple integrity checks. CRC32 is + mandatory, and cannot be omitted. Supported check + types are "crc32", "crc64", and "sha256". By default + all supported check types are enabled. + + liblzma and the command line tools can decompress files + which use unsupported integrity check type, but naturally + the file integrity cannot be verified in that case. + + Disabling integrity checks may remove some symbols from + the liblzma ABI, so this option should be used only when + it is known to not cause problems. + + --enable-external-sha256 + XZ_EXTERNAL_SHA256=ON + Try to use SHA-256 code from the operating system libc + or similar base system libraries. This doesn't try to + use OpenSSL or libgcrypt or such libraries. + + The reasons to use this option: + + - It makes liblzma slightly smaller. + + - It might improve SHA-256 speed if the implementation + in the operating is very good (but see below). + + External SHA-256 is disabled by default for two reasons: + + - On some operating systems the symbol names of the + SHA-256 functions conflict with OpenSSL's libcrypto. + This causes weird problems such as decompression + errors if an application is linked against both + liblzma and libcrypto. This problem affects at least + FreeBSD 10 and older and MINIX 3.3.0 and older, but + other OSes that provide a function "SHA256_Init" might + also be affected. FreeBSD 11 has the problem fixed. + NetBSD had the problem but it was fixed it in 2009 + already. OpenBSD uses "SHA256Init" and thus never had + a conflict with libcrypto. + + - The SHA-256 code in liblzma is faster than the SHA-256 + code provided by some operating systems. If you are + curious, build two copies of xz (internal and external + SHA-256) and compare the decompression (xz --test) + times: + + dd if=/dev/zero bs=1024k count=1024 \ + | xz -v -0 -Csha256 > foo.xz + time xz --test foo.xz + + --disable-microlzma + XZ_MICROLZMA_ENCODER=OFF + XZ_MICROLZMA_DECODER=OFF + Don't build MicroLZMA encoder and decoder. This omits + lzma_microlzma_encoder() and lzma_microlzma_decoder() + API functions from liblzma. These functions are needed + by specific applications only. They were written for + erofs-utils but they may be used by others too. + + --disable-lzip-decoder + XZ_LZIP_DECODER=OFF + Disable decompression support for .lz (lzip) files. + This omits the API function lzma_lzip_decoder() from + liblzma and .lz support from the xz tool. + + --disable-xz + --disable-xzdec + --disable-lzmadec + --disable-lzmainfo + XZ_TOOL_XZ=OFF + XZ_TOOL_XZDEC=OFF + XZ_TOOL_LZMADEC=OFF + XZ_TOOL_LZMAINFO=OFF + Don't build and install the command line tool mentioned + in the option name. + + NOTE: Disabling xz will skip some tests in "make check". + + NOTE: If xzdec is disabled and lzmadec is left enabled, + a dangling man page symlink lzmadec.1 -> xzdec.1 is + created. + + XZ_TOOL_SYMLINKS=OFF + Don't create the unxz and xzcat symlinks. (There is + no "configure" option to disable these symlinks.) + + --disable-lzma-links + XZ_TOOL_SYMLINKS_LZMA=OFF + Don't create symlinks for LZMA Utils compatibility. + This includes lzma, unlzma, and lzcat. If scripts are + installed, also lzdiff, lzcmp, lzgrep, lzegrep, lzfgrep, + lzmore, and lzless will be omitted if this option is used. + + --disable-scripts + XZ_TOOL_SCRIPTS=OFF + Don't install the scripts xzdiff, xzgrep, xzmore, xzless, + and their symlinks. + + --disable-doc + XZ_DOC=OFF + Don't install the documentation files to $docdir + (often /usr/doc/xz or /usr/local/doc/xz). Man pages + will still be installed. The $docdir can be changed + with --docdir=DIR. + + --enable-doxygen + XZ_DOXYGEN=ON + Enable generation of the HTML version of the liblzma API + documentation using Doxygen. The resulting files are + installed to $docdir/api. This option assumes that + the 'doxygen' tool is available. + + NOTE: --disable-doc or XZ_DOC=OFF don't affect this. + + --disable-assembler + XZ_ASM_I386=OFF + This disables CRC32 and CRC64 assembly code on + 32-bit x86. This option currently does nothing + on other architectures (not even on x86-64). + + The 32-bit x86 assembly is position-independent code + which is suitable for use in shared libraries and + position-independent executables. It uses only i386 + instructions but the code is optimized for i686 class + CPUs. If you are compiling liblzma exclusively for + pre-i686 systems, you may want to disable the assembler + code. + + The assembly code is compatible with only certain OSes + and toolchains (it's not compatible with MSVC). + + Since XZ Utils 5.7.1alpha, the 32-bit x86 assembly code + co-exists with the modern CLMUL code: CLMUL is used if + support for it is detected at runtime. On old processors + the assembly code is used. + + --disable-clmul-crc + XZ_CLMUL_CRC=OFF + Disable the use of carryless multiplication for CRC + calculation even if compiler support for it is detected. + The code uses runtime detection of SSSE3, SSE4.1, and + CLMUL instructions on x86. On 32-bit x86 this currently + is used only if --disable-assembler is used (this might + be fixed in the future). The code works on E2K too. + + If using compiler options that unconditionally allow the + required extensions (-msse4.1 -mpclmul) then runtime + detection isn't used and the generic code is omitted. + + --disable-arm64-crc32 + XZ_ARM64_CRC32=OFF + Disable the use of the ARM64 CRC32 instruction extension + even if compiler support for it is detected. The code will + detect support for the instruction at runtime. + + If using compiler options that unconditionally allow the + required extensions (-march=armv8-a+crc or -march=armv8.1-a + and later) then runtime detection isn't used and the + generic code is omitted. + + --disable-loongarch-crc32 + XZ_LOONGARCH_CRC32=OFF + Disable the use of the 64-bit LoongArch CRC32 + instruction extension even if compiler support for + it is detected. There is no runtime detection because + all 64-bit LoongArch processors should support + the CRC32 instructions. + + --enable-unaligned-access + TUKLIB_FAST_UNALIGNED_ACCESS=ON + Allow liblzma to use unaligned memory access for 16-bit, + 32-bit, and 64-bit loads and stores. This should be + enabled only when the hardware supports this, that is, + when unaligned access is fast. Some operating system + kernels emulate unaligned access, which is extremely + slow. This option shouldn't be used on systems that + rely on such emulation. + + Unaligned access is enabled by default on these: + - 32-bit x86 + - 64-bit x86-64 + - 32-bit big endian PowerPC + - 64-bit big endian PowerPC + - 64-bit little endian PowerPC + - some RISC-V [1] + - some 32-bit ARM [2] + - some 64-bit ARM64 [2] (NOTE: Autodetection bug + if using GCC -mstrict-align, see below.) + + [1] Unaligned access is enabled by default if + configure sees that the C compiler + #defines __riscv_misaligned_fast. + + [2] Unaligned access is enabled by default if + configure sees that the C compiler + #defines __ARM_FEATURE_UNALIGNED: + + - ARMv7 + GCC or Clang: It works. The options + -munaligned-access and -mno-unaligned-access + affect this macro correctly. + + - ARM64 + Clang: It works. The options + -munaligned-access, -mno-unaligned-access, + and -mstrict-align affect this macro correctly. + Clang >= 17 supports -mno-strict-align too. + + - ARM64 + GCC: It partially works. The macro + is always #defined by GCC versions at least + up to 13.2, even when using -mstrict-align. + If building for strict-align ARM64, the + configure option --disable-unaligned-access + should be used if using a GCC version that has + this issue because otherwise the performance + may be degraded. It likely won't crash due to + how unaligned access is done in the C code. + + --enable-unsafe-type-punning + TUKLIB_USE_UNSAFE_TYPE_PUNNING=ON + This enables use of code like + + uint8_t *buf8 = ...; + *(uint32_t *)buf8 = ...; + + which violates strict aliasing rules and may result + in broken code. There should be no need to use this + option with recent GCC or Clang versions on any + arch as just as fast code can be generated in a safe + way too (using __builtin_assume_aligned + memcpy). + + However, this option might improve performance in some + other cases, especially with old compilers (for example, + GCC 3 and early 4.x on x86, GCC < 6 on ARMv6 and ARMv7). + + --enable-small + XZ_SMALL=ON + Reduce the size of liblzma by selecting smaller but + semantically equivalent version of some functions, and + omit precomputed lookup tables. This option tends to + make liblzma slightly slower. + + Note that while omitting the precomputed tables makes + liblzma smaller on disk, the tables are still needed at + run time, and need to be computed at startup. This also + means that the RAM holding the tables won't be shared + between applications linked against shared liblzma. + + This option doesn't modify CFLAGS to tell the compiler + to optimize for size. You need to add -Os or equivalent + flag(s) to CFLAGS manually. + + --enable-assume-ram=SIZE + XZ_ASSUME_RAM=SIZE + On the most common operating systems, XZ Utils is able to + detect the amount of physical memory on the system. This + information is used by the options --memlimit-compress, + --memlimit-decompress, and --memlimit when setting the + limit to a percentage of total RAM. + + On some systems, there is no code to detect the amount of + RAM though. Using --enable-assume-ram one can set how much + memory to assume on these systems. SIZE is given as MiB. + The default is 128 MiB. + + Feel free to send patches to add support for detecting + the amount of RAM on the operating system you use. See + src/common/tuklib_physmem.c for details. + + --enable-threads=METHOD + XZ_THREADS=METHOD + Threading support is enabled by default so normally there + is no need to specify this option. + + Supported values for METHOD: + + yes Autodetect the threading method. If none + is found, configure will give an error. + + posix Use POSIX pthreads. This is the default + except on Windows outside Cygwin. + + win95 Use Windows 95 compatible threads. This + is compatible with Windows XP and later + too. This is the default for 32-bit x86 + Windows builds. Unless the compiler + supports __attribute__((__constructor__)), + the 'win95' threading is incompatible with + --enable-small. + + vista Use Windows Vista compatible threads. The + resulting binaries won't run on Windows XP + or older. This is the default for Windows + excluding 32-bit x86 builds (that is, on + x86-64 the default is 'vista'). + + no Disable threading support. This is the + same as using --disable-threads. + NOTE: If combined with --enable-small + and the compiler doesn't support + __attribute__((__constructor__)), the + resulting liblzma won't be thread safe, + that is, if a multi-threaded application + calls any liblzma functions from more than + one thread, something bad may happen. + + --enable-sandbox=METHOD + XZ_SANDBOX=METHOD + There is limited sandboxing support in the xz and xzdec + tools. If built with sandbox support, xz uses it + automatically when (de)compressing exactly one file to + standard output when the options --files or --files0 aren't + used. This is a common use case, for example, + (de)compressing .tar.xz files via GNU tar. The sandbox is + also used for single-file 'xz --test' or 'xz --list'. + xzdec always uses the sandbox, except when more than one + file are decompressed. In this case it will enable the + sandbox for the last file that is decompressed. + + Supported METHODs: + + auto Look for a supported sandboxing method + and use it if found. If no method is + found, then sandboxing isn't used. + This is the default. + + no Disable sandboxing support. + + capsicum + Use Capsicum (FreeBSD >= 10.2) for + sandboxing. If no Capsicum support + is found, configure will give an error. + + pledge Use pledge(2) (OpenBSD >= 5.9) for + sandboxing. If pledge(2) isn't found, + configure will give an error. + + landlock + Use Landlock (Linux >= 5.13) for + sandboxing. If no Landlock support + is found, configure will give an error. + + --enable-symbol-versions[=VARIANT] + XZ_SYMBOL_VERSIONING=VARIANT + Use symbol versioning for liblzma shared library. + This is enabled by default on GNU/Linux (glibc only), + other GNU-based systems, and FreeBSD. + + Symbol versioning is never used for static liblzma. This + option is ignored when not building a shared library. + + Supported VARIANTs: + + no Disable symbol versioning. This is the + same as using --disable-symbol-versions. + + auto Autodetect between "no", "linux", + and "generic". + + yes Autodetect between "linux" and + "generic". This forces symbol + versioning to be used when + building a shared library. + + generic Generic version is the default for + FreeBSD and GNU/Linux on MicroBlaze. + + This is also used on GNU/Linux when + building with NVIDIA HPC Compiler + because the compiler doesn't support + the features required for the "linux" + variant below. + + linux Special version for GNU/Linux (glibc + only). This adds a few extra symbol + versions for compatibility with binaries + that have been linked against a liblzma + version that has been patched with + "xz-5.2.2-compat-libs.patch" from + RHEL/CentOS 7. That patch was used + by some build tools outside of + RHEL/CentOS 7 too. + + --enable-debug + This enables the assert() macro and possibly some other + run-time consistency checks. It makes the code slower, so + you normally don't want to have this enabled. + + In CMake, the build type (CMAKE_BUILD_TYPE) controls if + -DNDEBUG is passed to the compiler. *In this package*, + an empty build type disables debugging code too. + Non-standard build types like "None" do NOT disable + debugging code! + + To enable debugging code with empty build type in CMake, + use -UNDEBUG in the CFLAGS environment variable or in + the CMAKE_C_FLAGS CMake variable to override -DNDEBUG. + + --enable-werror + CMAKE_COMPILE_WARNING_AS_ERROR=ON (CMake >= 3.24) + If building with GCC, make all compiler warnings an error, + that abort the compilation. This may help catching bugs, + and should work on most systems. This has no effect on the + resulting binaries. + + --enable-path-for-scripts=PREFIX + (CMake determines this from the path of XZ_POSIX_SHELL) + If PREFIX isn't empty, PATH=PREFIX:$PATH will be set in + the beginning of the scripts (xzgrep and others). + The default is empty except on Solaris the default is + /usr/xpg4/bin. + + This can be useful if the default PATH doesn't contain + modern POSIX tools (as can be the case on Solaris) or if + one wants to ensure that the correct xz binary is in the + PATH for the scripts. Note that the latter use can break + "make check" if the prefixed PATH causes a wrong xz binary + (other than the one that was just built) to be used. + + Older xz releases support a different method for setting + the PATH for the scripts. It is described in section 3.2 + and is supported in this xz version too. + + gl_cv_posix_shell=/path/to/bin/sh + XZ_POSIX_SHELL=/path/to/bin/sh + POSIX shell to use for xzgrep and other scripts. + + - configure should autodetect this well enough. + Typically it's /bin/sh but in some cases, like + Solaris, something else is used. + + - CMake build uses /bin/sh except on Solaris the + default is /usr/xpg4/bin/sh. + + CMAKE_DLL_NAME_WITH_SOVERSION=ON + CMake on native Windows (not Cygwin) only: + + This changes the filename liblzma.dll to liblzma-5.dll. + + The unversioned filename liblzma.dll has been used + since XZ Utils 5.0.0 when creating binary packages + using the included windows/build.bash. The same + unversioned filename is the default with CMake. + However, there are popular builds that, very + understandably and reasonably, use the versioned + filename produced by GNU Libtool. + + This option should usually be left to its default value + (OFF). It can be set to ON if the liblzma DLL filename + must be compatible with the versioned filename + produced by GNU Libtool. For example, binaries + distributed in MSYS2 use a versioned DLL filename. + + +2.1. Static vs. dynamic linking of liblzma + + On 32-bit x86, linking against static liblzma can give a minor + speed improvement. Static libraries on x86 are usually compiled as + position-dependent code (non-PIC) and shared libraries are built as + position-independent code (PIC). PIC wastes one register, which can + make the code slightly slower compared to a non-PIC version. (Note + that this doesn't apply to x86-64.) + + If you want to link xz against static liblzma, the simplest way + is to pass --disable-shared to configure. If you want also shared + liblzma, run configure again and run "make install" only for + src/liblzma. + + +2.2. Optimizing xzdec and lzmadec + + xzdec and lzmadec are intended to be relatively small instead of + optimizing for the best speed. Thus, it is a good idea to build + xzdec and lzmadec separately: + + - To link the tools against static liblzma, pass --disable-shared + to configure. + + - To select somewhat size-optimized variant of some things in + liblzma, pass --enable-small to configure. + + - Tell the compiler to optimize for size instead of speed. + For example, with GCC, put -Os into CFLAGS. + + - xzdec and lzmadec will never use multithreading capabilities of + liblzma. You can avoid dependency on libpthread by passing + --disable-threads to configure. + + - There are and will be no translated messages for xzdec and + lzmadec, so it is fine to pass also --disable-nls to configure. + + - Only decoder code is needed, so you can speed up the build + slightly by passing --disable-encoders to configure. This + shouldn't affect the final size of the executables though, + because the linker is able to omit the encoder code anyway. + + If you have no use for xzdec or lzmadec, you can disable them with + --disable-xzdec and --disable-lzmadec. + + +3. xzgrep and other scripts +--------------------------- + +3.1. Dependencies + + POSIX shell (sh) and bunch of other standard POSIX tools are required + to run the scripts. The configure script tries to find a POSIX + compliant sh, but if it fails, you can force the shell by passing + gl_cv_posix_shell=/path/to/posix-sh as an argument to the configure + script. + + xzdiff (xzcmp/lzdiff/lzcmp) may use mktemp if it is available. As + a fallback xzdiff will use mkdir to securely create a temporary + directory. Having mktemp available is still recommended since the + mkdir fallback method isn't as robust as mktemp is. The original + mktemp can be found from . On GNU, most will + use the mktemp program from GNU coreutils instead of the original + implementation. Both mktemp versions are fine. + + In addition to using xz to decompress .xz files, xzgrep and xzdiff + use gzip, bzip2, and lzop to support .gz, bz2, and .lzo files. + + +3.2. PATH + + The method described below is supported by older xz releases. + It is supported by the current version too, but the newer + --enable-path-for-scripts=PREFIX described in section 2 may be + more convenient. + + The scripts assume that the required tools (standard POSIX utilities, + mktemp, and xz) are in PATH; the scripts don't set the PATH themselves + (except as described for --enable-path-for-scripts=PREFIX). Some + people like this while some think this is a bug. Those in the latter + group can easily patch the scripts before running the configure script + by taking advantage of a placeholder line in the scripts. + + For example, to make the scripts prefix /usr/bin:/bin to PATH: + + perl -pi -e 's|^#SET_PATH.*$|PATH=/usr/bin:/bin:\$PATH|' \ + src/scripts/xz*.in + + +4. Tests +-------- + + The test framework can be built and run by executing "make check" in + the build directory. The tests are a mix of executables and POSIX + shell scripts (sh). All tests should pass if the default configuration + is used. Disabling features through the configure options may cause + some tests to be skipped. If any tests do not pass, see section 5.5. + + +4.1. Testing in parallel + + The tests can be run in parallel using the "-j" make option on systems + that support it. For instance, "make -j4 check" will run up to four + tests simultaneously. + + +4.2. Cross compiling + + The tests can be built without running them: + + make check TESTS= + + The TESTS variable is the list of tests you wish to run. Leaving it + empty will compile the tests without running any. + + If the tests are copied to a target machine to execute, the test data + files in the directory tests/files must also be copied. The tests + search for the data files using the environment variable $srcdir, + expecting to find the data files under $srcdir/files/. If $srcdir + isn't set then it defaults to the current directory. + + The shell script tests can be copied from the source directory to the + target machine to execute. In addition to the test files, these tests + will expect the following relative file paths to execute properly: + + ./create_compress_files + ../config.h + ../src/xz/xz + ../src/xzdec/xzdec + ../src/scripts/xzdiff + ../src/scripts/xzgrep + + +5. Troubleshooting +------------------ + +5.1. "No C99 compiler was found." + + You need a C99 compiler to build XZ Utils. If the configure script + cannot find a C99 compiler and you think you have such a compiler + installed, set the compiler command by passing CC=/path/to/c99 as + an argument to the configure script. + + If you get this error even when you think your compiler supports C99, + you can override the test by passing ac_cv_prog_cc_c99= as an argument + to the configure script. The test for C99 compiler is not perfect (and + it is not as easy to make it perfect as it sounds), so sometimes this + may be needed. You will get a compile error if your compiler doesn't + support enough C99. + + +5.2. "No POSIX conforming shell (sh) was found." + + xzgrep and other scripts need a shell that (roughly) conforms + to POSIX. The configure script tries to find such a shell. If + it fails, you can force the shell to be used by passing + gl_cv_posix_shell=/path/to/posix-sh as an argument to the configure + script. Alternatively you can omit the installation of scripts and + this error by passing --disable-scripts to configure. + + +5.3. configure works but build fails at crc32_x86.S + + The easy fix is to pass --disable-assembler to the configure script. + + The configure script determines if assembler code can be used by + looking at the configure triplet; there is currently no check if + the assembler code can actually be built. The x86 assembler + code should work on x86 GNU/Linux, *BSDs, Solaris, Darwin, MinGW, + Cygwin, and DJGPP. On other x86 systems, there may be problems and + the assembler code may need to be disabled with the configure option. + + If you get this error when building for x86-64, you have specified or + the configure script has misguessed your architecture. Pass the + correct configure triplet using the --build=CPU-COMPANY-SYSTEM option + (see INSTALL.generic). + + +5.4. Lots of warnings about symbol visibility + + On some systems where symbol visibility isn't supported, GCC may + still accept the visibility options and attributes, which will make + configure think that visibility is supported. This will result in + many compiler warnings. You can avoid the warnings by forcing the + visibility support off by passing gl_cv_cc_visibility=no as an + argument to the configure script. This has no effect on the + resulting binaries, but fewer warnings looks nicer and may allow + using --enable-werror. + + +5.5. "make check" fails + + If the other tests pass but test_scripts.sh fails, then the problem + is in the scripts in src/scripts. Comparing the contents of + tests/xzgrep_test_output to tests/xzgrep_expected_output might + give a good idea about problems in xzgrep. One possibility is that + some tools are missing from the current PATH or the tools lack + support for some POSIX features. This can happen at least on + Solaris where the tools in /bin may be ancient but good enough + tools are available in /usr/xpg4/bin or /usr/xpg6/bin. For possible + fixes, see --enable-path-for-scripts=PREFIX in section 2 and the + older alternative method described in section 3.2 of this file. + + If tests other than test_scripts.sh fail, a likely reason is that + libtool links the test programs against an installed version of + liblzma instead of the version that was just built. This is + obviously a bug which seems to happen on some platforms. + A workaround is to uninstall the old liblzma versions first. + + If the problem isn't any of those described above, then it's likely + a bug in XZ Utils or in the compiler. See the platform-specific + notes in this file for possible known problems. Please report + a bug if you cannot solve the problem. See README for contact + information. + + +5.6. liblzma.so (or similar) not found when running xz + + If you installed the package with "make install" and get an error + about liblzma.so (or a similarly named file) being missing, try + running "ldconfig" to update the run-time linker cache (if your + operating system has such a command). + diff --git a/src/native/external/xz/INSTALL.generic b/src/native/external/xz/INSTALL.generic new file mode 100644 index 00000000000000..8865734f81b136 --- /dev/null +++ b/src/native/external/xz/INSTALL.generic @@ -0,0 +1,368 @@ +Installation Instructions +************************* + + Copyright (C) 1994-1996, 1999-2002, 2004-2016 Free Software +Foundation, Inc. + + Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. This file is offered as-is, +without warranty of any kind. + +Basic Installation +================== + + Briefly, the shell command './configure && make && make install' +should configure, build, and install this package. The following +more-detailed instructions are generic; see the 'README' file for +instructions specific to this package. Some packages provide this +'INSTALL' file but do not implement all of the features documented +below. The lack of an optional feature in a given package is not +necessarily a bug. More recommendations for GNU packages can be found +in *note Makefile Conventions: (standards)Makefile Conventions. + + The 'configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a 'Makefile' in each directory of the package. +It may also create one or more '.h' files containing system-dependent +definitions. Finally, it creates a shell script 'config.status' that +you can run in the future to recreate the current configuration, and a +file 'config.log' containing compiler output (useful mainly for +debugging 'configure'). + + It can also use an optional file (typically called 'config.cache' and +enabled with '--cache-file=config.cache' or simply '-C') that saves the +results of its tests to speed up reconfiguring. Caching is disabled by +default to prevent problems with accidental use of stale cache files. + + If you need to do unusual things to compile the package, please try +to figure out how 'configure' could check whether to do them, and mail +diffs or instructions to the address given in the 'README' so they can +be considered for the next release. If you are using the cache, and at +some point 'config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file 'configure.ac' (or 'configure.in') is used to create +'configure' by a program called 'autoconf'. You need 'configure.ac' if +you want to change it or regenerate 'configure' using a newer version of +'autoconf'. + + The simplest way to compile this package is: + + 1. 'cd' to the directory containing the package's source code and type + './configure' to configure the package for your system. + + Running 'configure' might take a while. While running, it prints + some messages telling which features it is checking for. + + 2. Type 'make' to compile the package. + + 3. Optionally, type 'make check' to run any self-tests that come with + the package, generally using the just-built uninstalled binaries. + + 4. Type 'make install' to install the programs and any data files and + documentation. When installing into a prefix owned by root, it is + recommended that the package be configured and built as a regular + user, and only the 'make install' phase executed with root + privileges. + + 5. Optionally, type 'make installcheck' to repeat any self-tests, but + this time using the binaries in their final installed location. + This target does not install anything. Running this target as a + regular user, particularly if the prior 'make install' required + root privileges, verifies that the installation completed + correctly. + + 6. You can remove the program binaries and object files from the + source code directory by typing 'make clean'. To also remove the + files that 'configure' created (so you can compile the package for + a different kind of computer), type 'make distclean'. There is + also a 'make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + + 7. Often, you can also type 'make uninstall' to remove the installed + files again. In practice, not all packages have tested that + uninstallation works correctly, even though it is required by the + GNU Coding Standards. + + 8. Some packages, particularly those that use Automake, provide 'make + distcheck', which can by used by developers to test that all other + targets like 'make install' and 'make uninstall' work correctly. + This target is generally not run by end users. + +Compilers and Options +===================== + + Some systems require unusual options for compilation or linking that +the 'configure' script does not know about. Run './configure --help' +for details on some of the pertinent environment variables. + + You can give 'configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here is +an example: + + ./configure CC=c99 CFLAGS=-g LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + + You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you can use GNU 'make'. 'cd' to the +directory where you want the object files and executables to go and run +the 'configure' script. 'configure' automatically checks for the source +code in the directory that 'configure' is in and in '..'. This is known +as a "VPATH" build. + + With a non-GNU 'make', it is safer to compile the package for one +architecture at a time in the source code directory. After you have +installed the package for one architecture, use 'make distclean' before +reconfiguring for another architecture. + + On MacOS X 10.5 and later systems, you can create libraries and +executables that work on multiple system types--known as "fat" or +"universal" binaries--by specifying multiple '-arch' options to the +compiler but only a single '-arch' option to the preprocessor. Like +this: + + ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CPP="gcc -E" CXXCPP="g++ -E" + + This is not guaranteed to produce working output in all cases, you +may have to build one architecture at a time and combine the results +using the 'lipo' tool if you have problems. + +Installation Names +================== + + By default, 'make install' installs the package's commands under +'/usr/local/bin', include files under '/usr/local/include', etc. You +can specify an installation prefix other than '/usr/local' by giving +'configure' the option '--prefix=PREFIX', where PREFIX must be an +absolute file name. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +pass the option '--exec-prefix=PREFIX' to 'configure', the package uses +PREFIX as the prefix for installing programs and libraries. +Documentation and other data files still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like '--bindir=DIR' to specify different values for particular +kinds of files. Run 'configure --help' for a list of the directories +you can set and what kinds of files go in them. In general, the default +for these options is expressed in terms of '${prefix}', so that +specifying just '--prefix' will affect all of the other directory +specifications that were not explicitly provided. + + The most portable way to affect installation locations is to pass the +correct locations to 'configure'; however, many packages provide one or +both of the following shortcuts of passing variable assignments to the +'make install' command line to change installation locations without +having to reconfigure or recompile. + + The first method involves providing an override variable for each +affected directory. For example, 'make install +prefix=/alternate/directory' will choose an alternate location for all +directory configuration variables that were expressed in terms of +'${prefix}'. Any directories that were specified during 'configure', +but not in terms of '${prefix}', must each be overridden at install time +for the entire installation to be relocated. The approach of makefile +variable overrides for each directory variable is required by the GNU +Coding Standards, and ideally causes no recompilation. However, some +platforms have known limitations with the semantics of shared libraries +that end up requiring recompilation when using this method, particularly +noticeable in packages that use GNU Libtool. + + The second method involves providing the 'DESTDIR' variable. For +example, 'make install DESTDIR=/alternate/directory' will prepend +'/alternate/directory' before all installation names. The approach of +'DESTDIR' overrides is not required by the GNU Coding Standards, and +does not work on platforms that have drive letters. On the other hand, +it does better at avoiding recompilation issues, and works well even +when some directory options were not specified in terms of '${prefix}' +at 'configure' time. + +Optional Features +================= + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving 'configure' the +option '--program-prefix=PREFIX' or '--program-suffix=SUFFIX'. + + Some packages pay attention to '--enable-FEATURE' options to +'configure', where FEATURE indicates an optional part of the package. +They may also pay attention to '--with-PACKAGE' options, where PACKAGE +is something like 'gnu-as' or 'x' (for the X Window System). The +'README' should mention any '--enable-' and '--with-' options that the +package recognizes. + + For packages that use the X Window System, 'configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the 'configure' options '--x-includes=DIR' and +'--x-libraries=DIR' to specify their locations. + + Some packages offer the ability to configure how verbose the +execution of 'make' will be. For these packages, running './configure +--enable-silent-rules' sets the default to minimal output, which can be +overridden with 'make V=1'; while running './configure +--disable-silent-rules' sets the default to verbose, which can be +overridden with 'make V=0'. + +Particular systems +================== + + On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC +is not installed, it is recommended to use the following options in +order to use an ANSI C compiler: + + ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" + +and if that doesn't work, install pre-built binaries of GCC for HP-UX. + + HP-UX 'make' updates targets which have the same time stamps as their +prerequisites, which makes it generally unusable when shipped generated +files such as 'configure' are involved. Use GNU 'make' instead. + + On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot +parse its '' header file. The option '-nodtk' can be used as a +workaround. If GNU CC is not installed, it is therefore recommended to +try + + ./configure CC="cc" + +and if that doesn't work, try + + ./configure CC="cc -nodtk" + + On Solaris, don't put '/usr/ucb' early in your 'PATH'. This +directory contains several dysfunctional programs; working variants of +these programs are available in '/usr/bin'. So, if you need '/usr/ucb' +in your 'PATH', put it _after_ '/usr/bin'. + + On Haiku, software installed for all users goes in '/boot/common', +not '/usr/local'. It is recommended to use the following options: + + ./configure --prefix=/boot/common + +Specifying the System Type +========================== + + There may be some features 'configure' cannot figure out +automatically, but needs to determine by the type of machine the package +will run on. Usually, assuming the package is built to be run on the +_same_ architectures, 'configure' can figure that out, but if it prints +a message saying it cannot guess the machine type, give it the +'--build=TYPE' option. TYPE can either be a short name for the system +type, such as 'sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS + KERNEL-OS + + See the file 'config.sub' for the possible values of each field. If +'config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the option '--target=TYPE' to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with '--host=TYPE'. + +Sharing Defaults +================ + + If you want to set default values for 'configure' scripts to share, +you can create a site shell script called 'config.site' that gives +default values for variables like 'CC', 'cache_file', and 'prefix'. +'configure' looks for 'PREFIX/share/config.site' if it exists, then +'PREFIX/etc/config.site' if it exists. Or, you can set the +'CONFIG_SITE' environment variable to the location of the site script. +A warning: not all 'configure' scripts look for a site script. + +Defining Variables +================== + + Variables not defined in a site shell script can be set in the +environment passed to 'configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the 'configure' command line, using 'VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified 'gcc' to be used as the C compiler (unless it is +overridden in the site shell script). + +Unfortunately, this technique does not work for 'CONFIG_SHELL' due to an +Autoconf limitation. Until the limitation is lifted, you can use this +workaround: + + CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash + +'configure' Invocation +====================== + + 'configure' recognizes the following options to control how it +operates. + +'--help' +'-h' + Print a summary of all of the options to 'configure', and exit. + +'--help=short' +'--help=recursive' + Print a summary of the options unique to this package's + 'configure', and exit. The 'short' variant lists options used only + in the top level, while the 'recursive' variant lists options also + present in any nested packages. + +'--version' +'-V' + Print the version of Autoconf used to generate the 'configure' + script, and exit. + +'--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally 'config.cache'. FILE defaults to '/dev/null' to + disable caching. + +'--config-cache' +'-C' + Alias for '--cache-file=config.cache'. + +'--quiet' +'--silent' +'-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to '/dev/null' (any error + messages will still be shown). + +'--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + 'configure' can determine that directory automatically. + +'--prefix=DIR' + Use DIR as the installation prefix. *note Installation Names:: for + more details, including other options available for fine-tuning the + installation locations. + +'--no-create' +'-n' + Run the configure checks, but stop before creating any output + files. + +'configure' also accepts some other, not widely useful, options. Run +'configure --help' for more details. diff --git a/src/native/external/xz/Makefile.am b/src/native/external/xz/Makefile.am new file mode 100644 index 00000000000000..fc54f477e1f83c --- /dev/null +++ b/src/native/external/xz/Makefile.am @@ -0,0 +1,123 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +# Use -n to prevent gzip from adding a timestamp to the .gz headers. +GZIP_ENV = -9n + +DIST_SUBDIRS = lib src po tests debug +SUBDIRS = + +if COND_GNULIB +SUBDIRS += lib +endif + +SUBDIRS += src po tests + +if COND_DOC +dist_doc_DATA = \ + AUTHORS \ + COPYING \ + COPYING.0BSD \ + COPYING.GPLv2 \ + NEWS \ + README \ + THANKS \ + doc/faq.txt \ + doc/history.txt \ + doc/xz-file-format.txt \ + doc/lzma-file-format.txt + +examplesdir = $(docdir)/examples +dist_examples_DATA = \ + doc/examples/00_README.txt \ + doc/examples/01_compress_easy.c \ + doc/examples/02_decompress.c \ + doc/examples/03_compress_custom.c \ + doc/examples/04_compress_easy_mt.c \ + doc/examples/11_file_info.c \ + doc/examples/Makefile +endif + +EXTRA_DIST = \ + cmake \ + dos \ + doxygen \ + extra \ + po4a \ + windows \ + CMakeLists.txt \ + COPYING.GPLv2 \ + COPYING.GPLv3 \ + COPYING.LGPLv2.1 \ + INSTALL.generic \ + PACKAGERS \ + TODO \ + autogen.sh \ + build-aux/license-check.sh \ + build-aux/manconv.sh \ + build-aux/version.sh \ + po/xz.pot-header + +ACLOCAL_AMFLAGS = -I m4 + +# List of man pages to convert to plain text in the dist-hook target +# or to PDF in the pdf-local target. +manfiles = \ + src/xz/xz.1 \ + src/xzdec/xzdec.1 \ + src/lzmainfo/lzmainfo.1 \ + src/scripts/xzdiff.1 \ + src/scripts/xzgrep.1 \ + src/scripts/xzless.1 \ + src/scripts/xzmore.1 + +# Create ChangeLog using "git log". +# Convert the man pages to plain text (ASCII only) format. +dist-hook: + if test -d "$(srcdir)/.git" && type git > /dev/null 2>&1; then \ + ( cd "$(srcdir)" && git log --pretty=medium --date=iso --stat \ + b69da6d4bb6bb11fc0cf066920791990d2b22a06^..HEAD ) \ + > "$(distdir)/ChangeLog"; \ + fi + if type groff > /dev/null 2>&1; then \ + dest="$(distdir)/doc/man" && \ + $(MKDIR_P) "$$dest/txt" && \ + for FILE in $(manfiles); do \ + BASE=`basename $$FILE .1` && \ + $(SHELL) "$(srcdir)/build-aux/manconv.sh" ascii \ + < "$(srcdir)/$$FILE" \ + > "$$dest/txt/$$BASE.txt"; \ + done; \ + fi + cd "$(distdir)" && $(SHELL) "build-aux/license-check.sh" + +# This works with GNU tar and gives cleaner package than normal 'make dist'. +# This also ensures that the translations are up to date (dist-hook +# would be too late for that). +mydist: + $(SHELL) "$(srcdir)/src/liblzma/validate_map.sh" + cd po && $(MAKE) xz.pot-update + cd "$(srcdir)/po4a" && $(SHELL) update-po + VERSION=$(VERSION); \ + if test -d "$(srcdir)/.git" && type git > /dev/null 2>&1; then \ + $(SHELL) "$(srcdir)/build-aux/license-check.sh" || exit 1; \ + SNAPSHOT=`cd "$(srcdir)" && git describe --abbrev=8 | cut -b2-`; \ + test -n "$$SNAPSHOT" && VERSION=$$SNAPSHOT; \ + fi; \ + TAR_OPTIONS='--owner=0 --group=0 --numeric-owner --mode=u+rw,go+r-w --sort=name' \ + LC_COLLATE=C \ + $(MAKE) VERSION="$$VERSION" dist-gzip + +# NOTE: This only creates the PDFs. The install rules are missing. +pdf-local: + dest="doc/man" && \ + $(MKDIR_P) "$$dest/pdf-a4" "$$dest/pdf-letter" && \ + for FILE in $(manfiles); do \ + BASE=`basename $$FILE .1` && \ + $(SHELL) "$(srcdir)/build-aux/manconv.sh" pdf a4 \ + < "$(srcdir)/$$FILE" \ + > "$$dest/pdf-a4/$$BASE-a4.pdf" && \ + $(SHELL) "$(srcdir)/build-aux/manconv.sh" pdf letter \ + < "$(srcdir)/$$FILE" \ + > "$$dest/pdf-letter/$$BASE-letter.pdf"; \ + done diff --git a/src/native/external/xz/NEWS b/src/native/external/xz/NEWS new file mode 100644 index 00000000000000..73ff8558da08fc --- /dev/null +++ b/src/native/external/xz/NEWS @@ -0,0 +1,2992 @@ + +XZ Utils Release Notes +====================== + +5.8.2 (2025-12-17) + + * liblzma: + + - Fix the build on ARM64 on glibc versions older than + 2.24 (2016). They don't have HWCAP_CRC32 in . + + - Disable CLMUL CRC code when building for 32-bit x86 with + old MSVC versions. This avoids a compiler bug. The exact + compiler version in which the issue was fixed is unknown, + but VS 2022 17.13 (MSVC 19.43.34808) is known to work, so + CLMUL CRC on 32-bit x86 is disabled with MSVC versions + older than that. + + * xz: + + - Add a workaround for Red Hat Enterprise Linux 9 kernel bug + which made xz fail with "xz: Failed to enable the sandbox". + It only occurs with xz 5.8.0 and 5.8.1 binaries built for + other distros. For example, running Debian 13 in a container + on RHEL/CentOS 9 would trigger the issue. + + The bug was introduced in RHEL 9 kernel 5.14.0-603.el9 + (2025-07-30) and fixed in 5.14.0-648.el9 (2025-12-05). + However, as of writing, the fixed kernel isn't available + to RHEL 9 users yet, so including the workaround in this + xz release seems reasonable. The workaround will be removed + when it's no longer needed. + + xzdec was also affected by this issue. + + - On AIX, don't use fsync() on directories because it fails. + + - Fix the build on Emscripten. + + - Fix the build on clang-cl on Windows. + + - Take resource limits (RLIMIT_DATA, RLIMIT_AS, and RLIMIT_VMEM) + into account when determining the default memory usage limit + for multithreaded mode. This should prevent xz from failing + when a resource limit has been set to a value that is less + than 1/4 of total RAM. Other memory limits can still trigger + the same issue, for example, Linux cgroup v2 memory.max. + + * Build systems: + + - When symbol versioning is enabled, pass --undefined-version + to the linker if the option is supported. This fixes the + build when using LLVM's lld and some liblzma features have + been disabled at build time. + + - ARM64: Fix autodetection of fast unaligned memory access when + using GCC and -mstrict-align is in effect. Previously the + build systems would incorrectly guess that unaligned access + is fast, which would result in much slower binaries than + needed. The fix is a workaround for GCC bug 111555; + autodetection already worked with Clang. + + - LoongArch: Autodetect if fast unaligned memory access is + supported. This can improve compression speed by 15 % (but + not decompression speed). + + * Translations: + + - Update the Spanish translation. + + - Add Swedish man page translations. + + - Update Italian, Korean, Romanian, Serbian, and Ukrainian + man page translations. + + +5.8.1 (2025-04-03) + + IMPORTANT: This includes a security fix for CVE-2025-31115 which + affects XZ Utils from 5.3.3alpha to 5.8.0. No new 5.4.x or 5.6.x + releases will be made, but the fix is in the v5.4 and v5.6 branches + in the xz Git repository. A standalone patch for all affected + versions is available as well. + + * Multithreaded .xz decoder (lzma_stream_decoder_mt()): + + - Fix a bug that could at least result in a crash with + invalid input. (CVE-2025-31115) + + - Fix a performance bug: Only one thread was used if the whole + input file was provided at once to lzma_code(), the output + buffer was big enough, timeout was disabled, and LZMA_FINISH + was used. There are no bug reports about this, thus it's + possible that no real-world application was affected. + + * Avoid even with C11/C17 compilers. This fixes the + build with Oracle Developer Studio 12.6 on Solaris 10 when the + compiler is in C11 mode (the header doesn't exist). + + * Autotools: Restore compatibility with GNU make versions older + than 4.0 by creating the package using GNU gettext 0.23.1 + infrastructure instead of 0.24. + + * Update Croatian translation. + + +5.8.0 (2025-03-25) + + This bumps the minor version of liblzma because new features were + added. The API and ABI are still backward compatible with liblzma + 5.6.x, 5.4.x, 5.2.x, and 5.0.x. + + * liblzma on 32/64-bit x86: When possible, use SSE2 intrinsics + instead of memcpy() in the LZMA/LZMA2 decoder. In typical cases, + this may reduce decompression time by 0-5 %. However, when built + against musl libc, over 15 % time reduction was observed with + highly compressed files. + + * CMake: Make the feature test macros match the Autotools-based + build on NetBSD, Darwin, and mingw-w64. + + * Update the Croatian, Italian, Portuguese, and Romanian + translations. + + * Update the German, Italian, Korean, Romanian, Serbian, and + Ukrainian man page translations. + + Summary of changes in the 5.7.x development releases: + + * Mark the following LZMA Utils script aliases as deprecated: + lzcmp, lzdiff, lzless, lzmore, lzgrep, lzegrep, and lzfgrep. + + * liblzma: + + - Improve LZMA/LZMA2 encoder speed on 64-bit PowerPC (both + endiannesses) and those 64-bit RISC-V processors that + support fast unaligned access. + + - Add low-level APIs for RISC-V, ARM64, and x86 BCJ filters + to lzma/bcj.h. These are primarily for erofs-utils. + + - x86/x86-64/E2K CLMUL CRC code was rewritten. + + - Use the CRC32 instructions on LoongArch. + + * xz: + + - Synchronize the output file and its directory using fsync() + before deleting the input file. No syncing is done when xz + isn't going to delete the input file. + + - Add --no-sync to disable the sync-before-delete behavior. + + - Make --single-stream imply --keep. + + * xz, xzdec, lzmainfo: When printing messages, replace + non-printable characters with question marks. + + * xz and xzdec on Linux: Support Landlock ABI versions 5 and 6. + + * CMake: Revise the configuration variables and some of their + options, and document them in the file INSTALL. CMake support + is no longer experimental. (It was already not experimental + when building for native Windows.) + + * Add build-aux/license-check.sh. + + +5.7.2beta (2025-03-08) + + * On the man pages, mark the following LZMA Utils script aliases as + deprecated: lzcmp, lzdiff, lzless, lzmore, lzgrep, lzegrep, and + lzfgrep. The commands that start with xz* instead of lz* have + identical behavior. + + The LZMA Utils aliases lzma, unlzma, and lzcat aren't deprecated + because some of these aliases are still in common use. lzmadec + and lzmainfo aren't deprecated either. + + * xz: In the ENVIRONMENT section of the man page, warn about + problems that some uses of XZ_DEFAULTS and XZ_OPT may create. + + * Windows (native builds, not Cygwin): In xz, xzdec, and lzmadec, + avoid an error message on broken pipe. + + * Autotools: Fix out-of-tree builds when using the bundled + getopt_long. + + * Translations: + + - Updated: Chinese (traditional), Croatian, Finnish, Georgian, + German, Korean, Polish, Romanian, Serbian, Spanish, Swedish, + Turkish, and Ukrainian + + - Added: Dutch + + * Man page translations: + + - Updated: German, Korean, Romanian, and Ukrainian + + - Added: Italian and Serbian + + +5.7.1alpha (2025-01-23) + + * All fixes from 5.6.4. + + * liblzma: + + - Improve LZMA/LZMA2 encoder speed on 64-bit PowerPC (both + endiannesses) and those 64-bit RISC-V processors that + support fast unaligned access. + + - x86/x86-64/E2K CLMUL CRC code was rewritten. It's faster and + doesn't cause false positives from sanitizers. Attributes + like __attribute__((__no_sanitize_address__)) are no longer + present. + + - On 32-bit x86, CLMUL CRC and the old (but still good) + assembly versions now co-exist with runtime detection. + Both Autotools and CMake build systems handle this + automatically now. + + - Use the CRC32 instructions on LoongArch to make CRC32 + calculation faster. + + - Add low-level APIs for RISC-V, ARM64, and x86 BCJ filters + to lzma/bcj.h. These are primarily for erofs-utils. + + - Minor tweaks to ARM64 CRC32 code and BCJ filters were made. + + * xz: + + - Synchronize the output file and its directory before deleting + the input file using fsync(). This reduces the probability of + data loss after a system crash. However, it can be a major + performance hit if processing many small files. + + NOTE: No syncing is done when xz isn't going to delete + the input file. + + - Add a new option --no-sync to disable the sync-before-delete + behavior. It's useful when compressing many small files and + one doesn't worry about data loss in case of a system crash. + + - Make --single-stream imply --keep. + + - Use automatic word wrapping for the text in --help and + similar situations to hopefully make the strings easier for + majority of translators (no need to count spaces anymore). + + * xz, xzdec, lzmainfo: When printing messages, replace + non-printable characters with question marks. This way + malicious filenames cannot be used to send escape sequences + to a terminal. This change is also applied to filenames shown + in "xz --robot --list". + + * xz and xzdec on Linux: Add support for Landlock ABI versions 5 + and 6. + + * CMake updates: + + - Increase the minimum required CMake version to 3.20. + + - Revise the configuration variables and some of their options. + Document them in the file INSTALL. + + - Attempt to produce liblzma.pc so that the paths are based on + ${prefix}, which makes it simpler to override the paths + if the liblzma files have been moved. + + - To enable translations, gettext-tools is now required. The + CMake build no longer supports installing pre-compiled + message catalog binary files (po/*.gmo). + + - Apple: Use Mach-O shared library versioning that is + compatible with GNU Libtool. This should make it easier to + switch between the build systems on Apple OSes that enforce + the correct compatibility_version (macOS >= 12 doesn't?). + This change is backward compatible: binaries linked against + old CMake-built liblzma will run with liblzma that uses + Libtool style versioning. + + - Windows (not Cygwin): Document CMAKE_DLL_NAME_WITH_SOVERSION + (CMake >= 3.27) in the file INSTALL. This option should + usually be left to its default value (OFF). It can be set + to ON if the liblzma DLL filename must be compatible with + the versioned filename produced by GNU Libtool. For example, + binaries distributed in MSYS2 use a versioned DLL filename. + + - CMake support is no longer experimental. (It was already + not experimental when building for native Windows.) + + * Windows: Building liblzma with Visual Studio 2013 is no longer + supported. Visual Studio 2015 or later (with CMake) can be used + to build liblzma and the command line tools. + + * Add preliminary Georgian translation. This already contains + translations of most of the strings that are now automatically + word wrapped. + + * Add build-aux/license-check.sh. Without arguments, it checks that + no license information has been forgotten. With the -v argument, + it shows the license info (or the lack of it) for each file. + + If the .git directory is available, only the files in the + repository are checked. Without the .git directory, a clean tree + from an extracted release tarball is expected. + + +5.6.4 (2025-01-23) + + * liblzma: Fix LZMA/LZMA2 encoder on big endian ARM64. + + * xz: + + - Fix --filters= and --filters1= ... --filters9= options + parsing. They require an argument, thus "xz --filters lzma2" + should work in addition to "xz --filters=lzma2". + + - On the man page, note in the --compress and --decompress + options that the default behavior is to delete the input + file unless writing to standard output. It was already + documented in the DESCRIPTION section but new users in + a hurry might miss it. + + * Windows (native builds, not Cygwin): Fix regressions introduced + in XZ Utils 5.6.3 which caused non-ASCII characters to display + incorrectly. Only builds with translation support were affected + (--enable-nls or ENABLE_NLS=ON). The following changes affect + builds that have translations enabled: + + - Require UCRT because MSVCRT doesn't support UTF-8 + locales and thus translations won't be readable on + Windows 10 version 1903 and later. (MSVCRT builds + are still possible with --disable-nls or ENABLE_NLS=OFF.) + + - Require gettext-runtime >= 0.23.1 because older versions + don't autodetect the use of the UTF-8 code page. This + resulted in garbled non-ASCII characters even with UCRT. + + - Partially fix alignment issues in xz --verbose --list + with translated messages. Chinese (simplified), + Chinese (traditional), and Korean column headings + are misaligned still because Windows and MinGW-w64 + don't provide wcwidth() and XZ Utils doesn't include + a replacement function either. + + * CMake: Explicitly disable unity builds. This prevents build + failures when another project uses XZ Utils via CMake's + FetchContent module, and that project enables unity builds. + + * Update Chinese (traditional) and Serbian translations. + + +5.6.3 (2024-10-01) + + IMPORTANT: This includes a Windows-specific security fix to + the command line tools (CVE-2024-47611). liblzma isn't affected + by this issue. + + * liblzma: + + - Fix x86-64 inline assembly compatibility with GNU Binutils + older than 2.27. + + - Fix the build with GCC 4.2 on OpenBSD/sparc64. + + * xzdec: Display an error instead of failing silently if the + unsupported option -M is specified. + + * lzmainfo: Fix integer overflows when rounding the dictionary and + uncompressed sizes to the nearest mebibyte. + + * Windows (except Cygwin and MSYS2): Add an application manifest to + xz, xzdec, lzmadec, and lzmainfo executables: + + - Declare them compatible with Vista/7/8/8.1/10/11. This way + the programs won't needlessly use Operating System Context + of Vista when running on later Windows versions. This setting + doesn't mean that the executables cannot run on even older + versions if otherwise built that way. + + - Declare them as UAC-compliant. MSVC added this by default + already but it wasn't done with MinGW-w64, at least not + with all toolchain variants. + + - Declare them long path aware. This makes long path names + work on Windows 10 and 11 if the feature has been enabled + in the Windows registry. + + - Use the UTF-8 code page on Windows 10 version 1903 and later. + + * Now command line tools can access files whose names + contain characters that don't exist in the current + legacy code page. + + * The options --files and --files0 now expect file lists + to be in UTF-8 instead of the legacy code page. + + * This fixes a security issue: If a command line contains + Unicode characters (for example, filenames) that don't + exist in the current legacy code page, the characters are + converted to similar-looking characters with best-fit + mapping. Some best-fit mappings result in ASCII + characters that change the meaning of the command line, + which can be exploited with malicious filenames to do + argument injection or directory traversal attacks. + UTF-8 avoids best-fit mappings and thus fixes the issue. + (CVE-2024-47611) + + Forcing the process code page to UTF-8 is possible only + on Windows 10 version 1903 and later. The command line + tools remain vulnerable if used on an old older + version of Windows. + + This issue was discovered by Orange Tsai and splitline + from DEVCORE Research Team. + + A related smaller issue remains: Windows filenames may + contain unpaired surrogates (invalid UTF-16). These are + converted to the replacement character U+FFFD in the + UTF-8 code page. Thus, filenames with different unpaired + surrogates appear identical and aren't distinguishable + from filenames that contain the actual replacement + character U+FFFD. + + * When building with MinGW-w64, it is recommended to use + UCRT version instead of the old MSVCRT. For example, + non-ASCII characters from filenames won't print + correctly in messages to console with MSVCRT with + the UTF-8 code page (a cosmetic issue). liblzma-only + builds are still fine with MSVCRT. + + - Cygwin and MSYS2 process command line options differently and + the above issues don't exist. There is no need to replace the + default application manifest on Cygwin and MSYS2. + + * Autotools-based build: + + - Fix feature checks with link-time optimization (-flto). + + - Solaris: Fix a compatibility issue in version.sh. It matters + if one wants to regenerate configure by running autoconf. + + * CMake: + + - Use paths relative to ${prefix} in liblzma.pc when possible. + This is done only with CMake >= 3.20. + + - MSVC: Install liblzma.pc as it can be useful with MSVC too. + + - Windows: Fix liblzma filename prefix, for example: + + * Cygwin: The DLL was incorrectly named liblzma-5.dll. + Now it is cyglzma-5.dll. + + * MSVC: Rename import library from liblzma.lib to lzma.lib + while keeping liblzma.dll name as is. This helps with + "pkgconf --msvc-syntax --libs liblzma" because it mungles + "-llzma" in liblzma.pc to "lzma.lib". + + * MinGW-w64: No changes. + + - Windows: Use the correct resource file for lzmadec.exe. + Previously the resource file for xzdec.exe was used for both. + Autotools-based build isn't affected. + + - Prefer a C11 compiler over a C99 compiler but accept both. + + - Link Threads::Threads against liblzma using PRIVATE so that + -pthread and such flags won't unnecessarily get included in + the usage requirements of shared liblzma. That is, + target_link_libraries(foo PRIVATE liblzma::liblzma) no + longer adds -pthread if using POSIX threads and linking + against shared liblzma. The threading flags are still added + if linking against static liblzma. + + * Updated translations: Catalan, Chinese (simplified), and + Brazilian Portuguese. + + +5.6.2 (2024-05-29) + + * Remove the backdoor (CVE-2024-3094). + + * Not changed: Memory sanitizer (MSAN) has a false positive + in the CRC CLMUL code which also makes OSS Fuzz unhappy. + Valgrind is smarter and doesn't complain. + + A revision to the CLMUL code is coming anyway and this issue + will be cleaned up as part of it. It won't be backported to + 5.6.x or 5.4.x because the old code isn't wrong. There is + no reason to risk introducing regressions in old branches + just to silence a false positive. + + * liblzma: + + - lzma_index_decoder() and lzma_index_buffer_decode(): Fix + a missing output pointer initialization (*i = NULL) if the + functions are called with invalid arguments. The API docs + say that such an initialization is always done. In practice + this matters very little because the problem can only occur + if the calling application has a bug and these functions + return LZMA_PROG_ERROR. + + - lzma_str_to_filters(): Fix a missing output pointer + initialization (*error_pos = 0). This is very similar + to the fix above. + + - Fix C standard conformance with function pointer types. + + - Remove GNU indirect function (IFUNC) support. This is *NOT* + done for security reasons even though the backdoor relied on + this code. The performance benefits of IFUNC are too tiny in + this project to make the extra complexity worth it. + + - FreeBSD on ARM64: Add error checking to CRC32 instruction + support detection. + + - Fix building with NVIDIA HPC SDK. + + * xz: + + - Fix a C standard conformance issue in --block-list parsing + (arithmetic on a null pointer). + + - Fix a warning from GNU groff when processing the man page: + "warning: cannot select font 'CW'" + + * xzdec: Add support for Linux Landlock ABI version 4. xz already + had the v3-to-v4 change but it had been forgotten from xzdec. + + * Autotools-based build system (configure): + + - Symbol versioning variant can now be overridden with + --enable-symbol-versions. Documentation in INSTALL was + updated to match. + + - Add new configure option --enable-doxygen to enable + generation and installation of the liblzma API documentation + using Doxygen. Documentation in INSTALL and PACKAGERS was + updated to match. + + CMake: + + - Fix detection of Linux Landlock support. The detection code + in CMakeLists.txt had been sabotaged. + + - Disable symbol versioning on non-glibc Linux to match what + the Autotools build does. For example, symbol versioning + isn't enabled with musl. + + - Symbol versioning variant can now be overridden by setting + SYMBOL_VERSIONING to "OFF", "generic", or "linux". + + - Add support for all tests in typical build configurations. + Now the only difference to the tests coverage to Autotools + is that CMake-based build will skip more tests if features + are disabled. Such builds are only for special cases like + embedded systems. + + - Separate the CMake code for the tests into tests/tests.cmake. + It is used conditionally, thus it is possible to + + rm -rf tests + + and the CMake-based build will still work normally except + that no tests are then available. + + - Add a option ENABLE_DOXYGEN to enable generation and + installation of the liblzma API documentation using Doxygen. + + * Documentation: + + - Omit the Doxygen-generated liblzma API documentation from the + package. Instead, the generation and installation of the API + docs can be enabled with a configure or CMake option if + Doxygen is available. + + - Remove the XZ logo which was used in the API documentation. + The logo has been retired and isn't used by the project + anymore. However, it's OK to use it in contexts that refer + to the backdoor incident. + + - Remove the PDF versions of the man pages from the source + package. These existed primarily for users of operating + systems which don't come with tools to render man page + source files. The plain text versions are still included + in doc/man/txt. PDF files can still be generated to doc/man, + if the required tools are available, using "make pdf" after + running "configure". + + - Update home page URLs back to their old locations on + tukaani.org. + + - Update maintainer info. + + * Tests: + + - In tests/files/README, explain how to recreate the ARM64 + test files. + + - Remove two tests that used tiny x86 and SPARC object files + as the input files. The matching .c file was included but + the object files aren't easy to reproduce. The test cases + weren't great anyway; they were from the early days (2009) + of the project when the test suite had very few tests. + + - Improve a few tests. + + +5.6.1 (2024-03-09) + + IMPORTANT: This fixed bugs in the backdoor (CVE-2024-3094) (someone + had forgot to run Valgrind). + + * liblzma: Fixed two bugs relating to GNU indirect function (IFUNC) + with GCC. The more serious bug caused a program linked with + liblzma to crash on start up if the flag -fprofile-generate was + used to build liblzma. The second bug caused liblzma to falsely + report an invalid write to Valgrind when loading liblzma. + + * xz: Changed the messages for thread reduction due to memory + constraints to only appear under the highest verbosity level. + + * Build: + + - Fixed a build issue when the header file + was present on the system but the Landlock system calls were + not defined in . + + - The CMake build now warns and disables NLS if both gettext + tools and pre-created .gmo files are missing. Previously, + this caused the CMake build to fail. + + * Minor improvements to man pages. + + * Minor improvements to tests. + + +5.6.0 (2024-02-24) + + IMPORTANT: This added a backdoor (CVE-2024-3094). It's enabled only + in the release tarballs. + + This bumps the minor version of liblzma because new features were + added. The API and ABI are still backward compatible with liblzma + 5.4.x and 5.2.x and 5.0.x. + + NOTE: As described in the NEWS for 5.5.2beta, the core components + are now under the BSD Zero Clause License (0BSD). + + Since 5.5.2beta: + + * liblzma: + + - Disabled the branchless C variant in the LZMA decoder based + on the benchmark results from the community. + + - Disabled x86-64 inline assembly on x32 to fix the build. + + * Sandboxing support in xz: + + - Landlock is now used even when xz needs to create files. + In this case the sandbox has to be more permissive than + when no files need to be created. A similar thing was + already in use with pledge(2) since 5.3.4alpha. + + - Landlock and pledge(2) are now stricter when reading from + more than one input file and only writing to standard output. + + - Added support for Landlock ABI version 4. + + * CMake: + + - Default to -O2 instead of -O3 with CMAKE_BUILD_TYPE=Release. + -O3 is not useful for speed and makes the code larger. + + - Now builds lzmainfo and lzmadec. + + - xzdiff, xzgrep, xzless, xzmore, and their symlinks are now + installed. The scripts are also tested during "make test". + + - Added translation support for xz, lzmainfo, and the + man pages. + + - Applied the symbol versioning workaround for MicroBlaze that + is used in the Autotools build. + + - The general XZ Utils and liblzma API documentation is now + installed. + + - The CMake component names were changed a little and several + were added. liblzma_Runtime and liblzma_Development are + unchanged. + + - Minimum required CMake version is now 3.14. However, + translation support is disabled with CMake versions + older than 3.20. + + - The CMake-based build is now close to feature parity with the + Autotools-based build. Most importantly a few tests aren't + run yet. Testing the CMake-based build on different operating + systems would be welcome now. See the comment at the top of + CMakeLists.txt. + + * Fixed a bug in the Autotools feature test for ARM64 CRC32 + instruction support for old versions of Clang. This did not + affect the CMake build. + + * Windows: + + - The build instructions in INSTALL and windows/INSTALL*.txt + were revised completely. + + - windows/build-with-cmake.bat along with the instructions + in windows/INSTALL-MinGW-w64_with_CMake.txt should make + it very easy to build liblzma.dll and xz.exe on Windows + using CMake and MinGW-w64 with either GCC or Clang/LLVM. + + - windows/build.bash was updated. It now works on MSYS2 and + on GNU/Linux (cross-compiling) to create a .zip and .7z + package for 32-bit and 64-bit x86 using GCC + MinGW-w64. + + * The TODO file is no longer installed as part of the + documentation. The file is out of date and does not reflect + the actual tasks that will be completed in the future. + + * Translations: + + - Translated lzmainfo man pages are now installed. These + had been forgotten in earlier versions. + + - Updated Croatian, Esperanto, German, Hungarian, Korean, + Polish, Romanian, Spanish, Swedish, Vietnamese, and Ukrainian + translations. + + - Updated German, Korean, Romanian, and Ukrainian man page + translations. + + * Added a few tests. + + Summary of new features added in the 5.5.x development releases: + + * liblzma: + + - LZMA decoder: Speed optimizations to the C code and + added GCC & Clang compatible inline assembly for x86-64. + + - Added lzma_mt_block_size() to recommend a Block size for + multithreaded encoding. + + - Added CLMUL-based CRC32 on x86-64 and E2K with runtime + processor detection. Similar to CRC64, on 32-bit x86 it + isn't available unless --disable-assembler is used. + + - Optimized the CRC32 calculation on ARM64 platforms using the + CRC32 instructions. Runtime detection for the instruction is + used on GNU/Linux, FreeBSD, Windows, and macOS. If the + compiler flags indicate unconditional CRC32 instruction + support (+crc) then the generic version is not built. + + - Added definitions of mask values like + LZMA_INDEX_CHECK_MASK_CRC32 to . + + * xz: + + - Multithreaded mode is now the default. This improves + compression speed and creates .xz files that can be + decompressed in multithreaded mode. The downsides are + increased memory usage and slightly worse compression ratio. + + - Added a new command line option --filters to set the filter + chain using the liblzma filter string syntax. + + - Added new command line options --filters1 ... --filters9 to + set additional filter chains using the liblzma filter string + syntax. The --block-list option now allows specifying filter + chains that were set using these new options. + + - Ported the command line tools to Windows MSVC. + Visual Studio 2015 or later is required. + + * Added lz4 support to xzdiff/xzcmp and xzgrep. + + +5.5.2beta (2024-02-14) + + * Licensing change: The core components are now under the + BSD Zero Clause License (0BSD). In XZ Utils 5.4.6 and older + and 5.5.1alpha these components are in the public domain and + obviously remain so; the change affects the new releases only. + + 0BSD is an extremely permissive license which doesn't require + retaining or reproducing copyright or license notices when + distributing the code, thus in practice there is extremely + little difference to public domain. + + * liblzma + + - Significant speed optimizations to the LZMA decoder were + made. There are now three variants that can be chosen at + build time: + + * Basic C version: This is a few percent faster than + 5.4.x due to some new optimizations. + + * Branchless C: This is currently the default on platforms + for which there is no assembly code. This should be a few + percent faster than the basic C version. + + * x86-64 inline assembly. This works with GCC and Clang. + + The default choice can currently be overridden by setting + LZMA_RANGE_DECODER_CONFIG in CPPFLAGS: 0 means the basic + version and 3 means that branchless C version. + + - Optimized the CRC32 calculation on ARM64 platforms using the + CRC32 instructions. The instructions are optional in ARMv8.0 + and are required in ARMv8.1 and later. Runtime detection for + the instruction is used on GNU/Linux, FreeBSD, Windows, and + macOS. If the compiler flags indicate unconditional CRC32 + instruction support (+crc) then the generic version is not + built. + + * Added lz4 support to xzdiff/xzcmp and xzgrep. + + * Man pages of xzdiff/xzcmp, xzgrep, and xzmore were rewritten + to simplify licensing of the man page translations. + + * Translations: + + - Updated Chinese (simplified), German, Korean, Polish, + Romanian, Spanish, Swedish, and Ukrainian translations. + + - Updated German, Korean, Romanian, and Ukrainian man page + translations. + + * Small improvements to the tests. + + * Added doc/examples/11_file_info.c. It was added to the Git + repository in 2017 but forgotten to be added into distribution + tarballs. + + * Removed doc/examples_old. These were from 2012. + + * Removed the macos/build.sh script. It had not been updated + since 2013. + + +5.5.1alpha (2024-01-26) + + * Added a new filter for RISC-V binaries. The filter can be used + for 32-bit and 64-bit binaries with either little or big + endianness. In liblzma, the Filter ID is LZMA_FILTER_RISCV (0x0B) + and the xz option is --riscv. liblzma filter string syntax + recognizes this filter as "riscv". + + * liblzma: + + - Added lzma_mt_block_size() to recommend a Block size for + multithreaded encoding + + - Added CLMUL-based CRC32 on x86-64 and E2K with runtime + processor detection. Similar to CRC64, on 32-bit x86 it + isn't available unless --disable-assembler is used. + + - Implemented GNU indirect function (IFUNC) as a runtime + function dispatching method for CRC32 and CRC64 fast + implementations on x86. Only GNU/Linux (glibc) and FreeBSD + builds will use IFUNC, unless --enable-ifunc is specified to + configure. + + - Added definitions of mask values like + LZMA_INDEX_CHECK_MASK_CRC32 to . + + - The XZ logo is now included in the Doxygen generated + documentation. It is licensed under Creative Commons + Attribution-ShareAlike 4.0. + + * xz: + + - Multithreaded mode is now the default. This improves + compression speed and creates .xz files that can be + decompressed multithreaded at the cost of increased memory + usage and slightly worse compression ratio. + + - Added new command line option --filters to set the filter + chain using liblzma filter string syntax. + + - Added new command line options --filters1 ... --filters9 to + set additional filter chains using liblzma filter string + syntax. The --block-list option now allows specifying filter + chains that were set using these new options. + + - Added support for Linux Landlock as a sandboxing method. + + - xzdec now supports pledge(2), Capsicum, and Linux Landlock as + sandboxing methods. + + - Progress indicator time stats remain accurate after pausing + xz with SIGTSTP. + + - Ported xz and xzdec to Windows MSVC. Visual Studio 2015 or + later is required. + + * CMake Build: + + - Supports pledge(2), Capsicum, and Linux Landlock sandboxing + methods. + + - Replacement functions for getopt_long() are used on platforms + that do not have it. + + * Enabled unaligned access by default on PowerPC64LE and on RISC-V + targets that define __riscv_misaligned_fast. + + * Tests: + + - Added two new fuzz targets to OSS-Fuzz. + + - Implemented Continuous Integration (CI) testing using + GitHub Actions. + + * Changed quoting style from `...' to '...' in all messages, + scripts, and documentation. + + * Added basic Codespell support to help catch typo errors. + + +5.4.7 (2024-05-29) + + * Not changed: Memory sanitizer (MSAN) has a false positive + in the CRC CLMUL code which also makes OSS Fuzz unhappy. + Valgrind is smarter and doesn't complain. + + A revision to the CLMUL code is coming anyway and this issue + will be cleaned up as part of it. It won't be backported to + 5.6.x or 5.4.x because the old code isn't wrong. There is + no reason to risk introducing regressions in old branches + just to silence a false positive. + + * liblzma: + + - lzma_index_decoder() and lzma_index_buffer_decode(): Fix + a missing output pointer initialization (*i = NULL) if the + functions are called with invalid arguments. The API docs + say that such an initialization is always done. In practice + this matters very little because the problem can only occur + if the calling application has a bug and these functions + return LZMA_PROG_ERROR. + + - lzma_str_to_filters(): Fix a missing output pointer + initialization (*error_pos = 0). This is very similar + to the fix above. + + - Fix C standard conformance with function pointer types. + This newly showed up with Clang 17 with -fsanitize=undefined. + There are no bug reports about this. + + - Fix building with NVIDIA HPC SDK. + + * xz: + + - Fix a C standard conformance issue in --block-list parsing + (arithmetic on a null pointer). + + - Fix a warning from GNU groff when processing the man page: + "warning: cannot select font 'CW'" + + - Fix outdated threading related information on the man page. + + * xzless: + + - With "less" version 451 and later, use "||-" instead of "|-" + in the environment variable LESSOPEN. This way compressed + files that contain no uncompressed data are shown correctly + as empty. + + - With "less" version 632 and later, use --show-preproc-errors + to make "less" show a warning on decompression errors. + + * Autotools-based build system (configure): + + - Symbol versioning variant can now be overridden with + --enable-symbol-versions. Documentation in INSTALL was + updated to match. + + CMake: + + - Linux on MicroBlaze is handled specially now. This matches + the changes made to the Autotools-based build in XZ Utils + 5.4.2 and 5.2.11. + + - Disable symbol versioning on non-glibc Linux to match what + the Autotools build does. For example, symbol versioning + isn't enabled with musl. + + - Symbol versioning variant can now be overridden by setting + SYMBOL_VERSIONING to "OFF", "generic", or "linux". + + * Documentation: + + - Clarify the description of --disable-assembler in INSTALL. + The option only affects 32-bit x86 assembly usage. + + - Add doc/examples/11_file_info.c. It was added to the + Git repository in 2017 but forgotten to be added into + distribution tarballs. + + - Don't install the TODO file as part of the documentation. + The file is out of date. + + - Update home page URLs back to their old locations on + tukaani.org. + + - Update maintainer info. + + +5.4.6 (2024-01-26) + + * Fixed a bug involving internal function pointers in liblzma not + being initialized to NULL. The bug can only be triggered if + lzma_filters_update() is called on a LZMA1 encoder, so it does + not affect xz or any application known to us that uses liblzma. + + * xz: + + - Fixed a regression introduced in 5.4.2 that caused encoding + in the raw format to unnecessarily fail if --suffix was not + used. For instance, the following command no longer reports + that --suffix must be used: + + echo foo | xz --format=raw --lzma2 | wc -c + + - Fixed an issue on MinGW-w64 builds that prevented reading + from or writing to non-terminal character devices like NUL. + + * Added a new test. + + +5.4.5 (2023-11-01) + + * liblzma: + + - Use __attribute__((__no_sanitize_address__)) to avoid address + sanitization with CRC64 CLMUL. It uses 16-byte-aligned reads + which can extend past the bounds of the input buffer and + inherently trigger address sanitization errors. This isn't + a bug. + + - Fixed an assertion failure that could be triggered by a large + unpadded_size argument. It was verified that there was no + other bug than the assertion failure. + + - Fixed a bug that prevented building with Windows Vista + threading when __attribute__((__constructor__)) is not + supported. + + * xz now properly handles special files such as "con" or "nul" on + Windows. Before this fix, the following wrote "foo" to the + console and deleted the input file "con_xz": + + echo foo | xz > con_xz + xz --suffix=_xz --decompress con_xz + + * Build systems: + + - Allow builds with Windows win95 threading and small mode when + __attribute__((__constructor__)) is supported. + + - Added a new line to liblzma.pc for MSYS2 (Windows): + + Cflags.private: -DLZMA_API_STATIC + + When compiling code that will link against static liblzma, + the LZMA_API_STATIC macro needs to be defined on Windows. + + - CMake specific changes: + + * Fixed a bug that allowed CLOCK_MONOTONIC to be used even + if the check for it failed. + + * Fixed a bug where configuring CMake multiple times + resulted in HAVE_CLOCK_GETTIME and HAVE_CLOCK_MONOTONIC + not being set. + + * Fixed the build with MinGW-w64-based Clang/LLVM 17. + llvm-windres now has more accurate GNU windres emulation + so the GNU windres workaround from 5.4.1 is needed with + llvm-windres version 17 too. + + * The import library on Windows is now properly named + "liblzma.dll.a" instead of "libliblzma.dll.a" + + * Fixed a bug causing the Ninja Generator to fail on + UNIX-like systems. This bug was introduced in 5.4.0. + + * Added a new option to disable CLMUL CRC64. + + * A module-definition (.def) file is now created when + building liblzma.dll with MinGW-w64. + + * The pkg-config liblzma.pc file is now installed on all + builds except when using MSVC on Windows. + + * Added large file support by default for platforms that + need it to handle files larger than 2 GiB. This includes + MinGW-w64, even 64-bit builds. + + * Small fixes and improvements to the tests. + + * Updated translations: Chinese (simplified) and Esperanto. + + +5.4.4 (2023-08-02) + + * liblzma and xzdec can now build against WASI SDK when threading + support is disabled. xz and tests don't build yet. + + * CMake: + + - Fixed a bug preventing other projects from including liblzma + multiple times using find_package(). + + - Don't create broken symlinks in Cygwin and MSYS2 unless + supported by the environment. This prevented building for the + default MSYS2 environment. The problem was introduced in + xz 5.4.0. + + * Documentation: + + - Small improvements to man pages. + + - Small improvements and typo fixes for liblzma API + documentation. + + * Tests: + + - Added a new section to INSTALL to describe basic test usage + and address recent questions about building the tests when + cross compiling. + + - Small fixes and improvements to the tests. + + * Translations: + + - Fixed a mistake that caused one of the error messages to not + be translated. This only affected versions 5.4.2 and 5.4.3. + + - Updated the Chinese (simplified), Croatian, Esperanto, German, + Korean, Polish, Romanian, Spanish, Swedish, Ukrainian, and + Vietnamese translations. + + - Updated the German, Korean, Romanian, and Ukrainian man page + translations. + + +5.4.3 (2023-05-04) + + * All fixes from 5.2.12 + + * Features in the CMake build can now be disabled as CMake cache + variables, similar to the Autotools build. + + * Minor update to the Croatian translation. + + +5.4.2 (2023-03-18) + + * All fixes from 5.2.11 that were not included in 5.4.1. + + * If xz is built with support for the Capsicum sandbox but running + in an environment that doesn't support Capsicum, xz now runs + normally without sandboxing instead of exiting with an error. + + * liblzma: + + - Documentation was updated to improve the style, consistency, + and completeness of the liblzma API headers. + + - The Doxygen-generated HTML documentation for the liblzma API + header files is now included in the source release and is + installed as part of "make install". All JavaScript is + removed to simplify license compliance and to reduce the + install size. + + - Fixed a minor bug in lzma_str_from_filters() that produced + too many filters in the output string instead of reporting + an error if the input array had more than four filters. This + bug did not affect xz. + + * Build systems: + + - autogen.sh now invokes the doxygen tool via the new wrapper + script doxygen/update-doxygen, unless the command line option + --no-doxygen is used. + + - Added microlzma_encoder.c and microlzma_decoder.c to the + VS project files for Windows and to the CMake build. These + should have been included in 5.3.2alpha. + + * Tests: + + - Added a test to the CMake build that was forgotten in the + previous release. + + - Added and refactored a few tests. + + * Translations: + + - Updated the Brazilian Portuguese translation. + + - Added Brazilian Portuguese man page translation. + + +5.4.1 (2023-01-11) + + * liblzma: + + - Fixed the return value of lzma_microlzma_encoder() if the + LZMA options lc/lp/pb are invalid. Invalid lc/lp/pb options + made the function return LZMA_STREAM_END without encoding + anything instead of returning LZMA_OPTIONS_ERROR. + + - Windows / Visual Studio: Workaround a possible compiler bug + when targeting 32-bit x86 and compiling the CLMUL version of + the CRC64 code. The CLMUL code isn't enabled by the Windows + project files but it is in the CMake-based builds. + + * Build systems: + + - Windows-specific CMake changes: + + * Don't try to enable CLMUL CRC64 code if _mm_set_epi64x() + isn't available. This fixes CMake-based build with Visual + Studio 2013. + + * Created a workaround for a build failure with windres + from GNU binutils. It is used only when the C compiler + is GCC (not Clang). The workaround is incompatible + with llvm-windres, resulting in "XZx20Utils" instead + of "XZ Utils" in the resource file, but without the + workaround llvm-windres works correctly. See the + comment in CMakeLists.txt for details. + + * Included the resource files in the xz and xzdec build + rules. Building the command line tools is still + experimental but possible with MinGW-w64. + + - Visual Studio: Added stream_decoder_mt.c to the project + files. Now the threaded decompressor lzma_stream_decoder_mt() + gets built. CMake-based build wasn't affected. + + - Updated windows/INSTALL-MSVC.txt to mention that CMake-based + build is now the preferred method with Visual Studio. The + project files will probably be removed after 5.4.x releases. + + - Changes to #defines in config.h: + + * HAVE_DECL_CLOCK_MONOTONIC was replaced by + HAVE_CLOCK_MONOTONIC. The old macro was always defined + in configure-generated config.h to either 0 or 1. The + new macro is defined (to 1) only if the declaration of + CLOCK_MONOTONIC is available. This matches the way most + other config.h macros work and makes things simpler with + other build systems. + + * HAVE_DECL_PROGRAM_INVOCATION_NAME was replaced by + HAVE_PROGRAM_INVOCATION_NAME for the same reason. + + * Tests: + + - Fixed test script compatibility with ancient /bin/sh + versions. Now the five test_compress_* tests should + no longer fail on Solaris 10. + + - Added and refactored a few tests. + + * Translations: + + - Updated the Catalan and Esperanto translations. + + - Added Korean and Ukrainian man page translations. + + +5.4.0 (2022-12-13) + + This bumps the minor version of liblzma because new features were + added. The API and ABI are still backward compatible with liblzma + 5.2.x and 5.0.x. + + Since 5.3.5beta: + + * All fixes from 5.2.10. + + * The ARM64 filter is now stable. The xz option is now --arm64. + Decompression requires XZ Utils 5.4.0. In the future the ARM64 + filter will be supported by XZ for Java, XZ Embedded (including + the version in Linux), LZMA SDK, and 7-Zip. + + * Translations: + + - Updated Catalan, Croatian, German, Romanian, and Turkish + translations. + + - Updated German man page translations. + + - Added Romanian man page translations. + + Summary of new features added in the 5.3.x development releases: + + * liblzma: + + - Added threaded .xz decompressor lzma_stream_decoder_mt(). + It can use multiple threads with .xz files that have multiple + Blocks with size information in Block Headers. The threaded + encoder in xz has always created such files. + + Single-threaded encoder cannot store the size information in + Block Headers even if one used LZMA_FULL_FLUSH to create + multiple Blocks, so this threaded decoder cannot use multiple + threads with such files. + + If there are multiple Streams (concatenated .xz files), one + Stream will be decompressed completely before starting the + next Stream. + + - A new decoder flag LZMA_FAIL_FAST was added. It makes the + threaded decompressor report errors soon instead of first + flushing all pending data before the error location. + + - New Filter IDs: + * LZMA_FILTER_ARM64 is for ARM64 binaries. + * LZMA_FILTER_LZMA1EXT is for raw LZMA1 streams that don't + necessarily use the end marker. + + - Added lzma_str_to_filters(), lzma_str_from_filters(), and + lzma_str_list_filters() to convert a preset or a filter chain + string to a lzma_filter[] and vice versa. These should make + it easier to write applications that allow users to specify + custom compression options. + + - Added lzma_filters_free() which can be convenient for freeing + the filter options in a filter chain (an array of lzma_filter + structures). + + - lzma_file_info_decoder() to makes it a little easier to get + the Index field from .xz files. This helps in getting the + uncompressed file size but an easy-to-use random access + API is still missing which has existed in XZ for Java for + a long time. + + - Added lzma_microlzma_encoder() and lzma_microlzma_decoder(). + It is used by erofs-utils and may be used by others too. + + The MicroLZMA format is a raw LZMA stream (without end marker) + whose first byte (always 0x00) has been replaced with + bitwise-negation of the LZMA properties (lc/lp/pb). It was + created for use in EROFS but may be used in other contexts + as well where it is important to avoid wasting bytes for + stream headers or footers. The format is also supported by + XZ Embedded (the XZ Embedded version in Linux got MicroLZMA + support in Linux 5.16). + + The MicroLZMA encoder API in liblzma can compress into a + fixed-sized output buffer so that as much data is compressed + as can be fit into the buffer while still creating a valid + MicroLZMA stream. This is needed for EROFS. + + - Added lzma_lzip_decoder() to decompress the .lz (lzip) file + format version 0 and the original unextended version 1 files. + Also lzma_auto_decoder() supports .lz files. + + - lzma_filters_update() can now be used with the multi-threaded + encoder (lzma_stream_encoder_mt()) to change the filter chain + after LZMA_FULL_BARRIER or LZMA_FULL_FLUSH. + + - In lzma_options_lzma, allow nice_len = 2 and 3 with the match + finders that require at least 3 or 4. Now it is internally + rounded up if needed. + + - CLMUL-based CRC64 on x86-64 and E2K with runtime processor + detection. On 32-bit x86 it currently isn't available unless + --disable-assembler is used which can make the non-CLMUL + CRC64 slower; this might be fixed in the future. + + - Building with --disable-threads --enable-small + is now thread-safe if the compiler supports + __attribute__((__constructor__)). + + * xz: + + - Using -T0 (--threads=0) will now use multi-threaded encoder + even on a single-core system. This is to ensure that output + from the same xz binary is identical on both single-core and + multi-core systems. + + - --threads=+1 or -T+1 is now a way to put xz into + multi-threaded mode while using only one worker thread. + The + is ignored if the number is not 1. + + - A default soft memory usage limit is now used for compression + when -T0 is used and no explicit limit has been specified. + This soft limit is used to restrict the number of threads + but if the limit is exceeded with even one thread then xz + will continue with one thread using the multi-threaded + encoder and this limit is ignored. If the number of threads + is specified manually then no default limit will be used; + this affects only -T0. + + This change helps on systems that have very many cores and + using all of them for xz makes no sense. Previously xz -T0 + could run out of memory on such systems because it attempted + to reserve memory for too many threads. + + This also helps with 32-bit builds which don't have a large + amount of address space that would be required for many + threads. The default soft limit for -T0 is at most 1400 MiB + on all 32-bit platforms. + + - Previously a low value in --memlimit-compress wouldn't cause + xz to switch from multi-threaded mode to single-threaded mode + if the limit cannot otherwise be met; xz failed instead. Now + xz can switch to single-threaded mode and then, if needed, + scale down the LZMA2 dictionary size too just like it already + did when it was started in single-threaded mode. + + - The option --no-adjust no longer prevents xz from scaling down + the number of threads as that doesn't affect the compressed + output (only performance). Now --no-adjust only prevents + adjustments that affect compressed output, that is, with + --no-adjust xz won't switch from multi-threaded mode to + single-threaded mode and won't scale down the LZMA2 + dictionary size. + + - Added a new option --memlimit-mt-decompress=LIMIT. This is + used to limit the number of decompressor threads (possibly + falling back to single-threaded mode) but it will never make + xz refuse to decompress a file. This has a system-specific + default value because without any limit xz could end up + allocating memory for the whole compressed input file, the + whole uncompressed output file, multiple thread-specific + decompressor instances and so on. Basically xz could + attempt to use an insane amount of memory even with fairly + common files. The system-specific default value is currently + the same as the one used for compression with -T0. + + The new option works together with the existing option + --memlimit-decompress=LIMIT. The old option sets a hard limit + that must not be exceeded (xz will refuse to decompress) + while the new option only restricts the number of threads. + If the limit set with --memlimit-mt-decompress is greater + than the limit set with --memlimit-compress, then the latter + value is used also for --memlimit-mt-decompress. + + - Added new information to the output of xz --info-memory and + new fields to the output of xz --robot --info-memory. + + - In --lzma2=nice=NUMBER allow 2 and 3 with all match finders + now that liblzma handles it. + + - Don't mention endianness for ARM and ARM-Thumb filters in + --long-help. The filters only work for little endian + instruction encoding but modern ARM processors using + big endian data access still use little endian + instruction encoding. So the help text was misleading. + In contrast, the PowerPC filter is only for big endian + 32/64-bit PowerPC code. Little endian PowerPC would need + a separate filter. + + - Added decompression support for the .lz (lzip) file format + version 0 and the original unextended version 1. It is + autodetected by default. See also the option --format on + the xz man page. + + - Sandboxing enabled by default: + * Capsicum (FreeBSD) + * pledge(2) (OpenBSD) + + * Scripts now support the .lz format using xz. + + * A few new tests were added. + + * The liblzma-specific tests are now supported in CMake-based + builds too ("make test"). + + +5.3.5beta (2022-12-01) + + * All fixes from 5.2.9. + + * liblzma: + + - Added new LZMA_FILTER_LZMA1EXT for raw encoder and decoder to + handle raw LZMA1 streams that don't have end of payload marker + (EOPM) alias end of stream (EOS) marker. It can be used in + filter chains, for example, with the x86 BCJ filter. + + - Added lzma_str_to_filters(), lzma_str_from_filters(), and + lzma_str_list_filters() to make it easier for applications + to get custom compression options from a user and convert + it to an array of lzma_filter structures. + + - Added lzma_filters_free(). + + - lzma_filters_update() can now be used with the multi-threaded + encoder (lzma_stream_encoder_mt()) to change the filter chain + after LZMA_FULL_BARRIER or LZMA_FULL_FLUSH. + + - In lzma_options_lzma, allow nice_len = 2 and 3 with the match + finders that require at least 3 or 4. Now it is internally + rounded up if needed. + + - ARM64 filter was modified. It is still experimental. + + - Fixed LTO build with Clang if -fgnuc-version=10 or similar + was used to make Clang look like GCC >= 10. Now it uses + __has_attribute(__symver__) which should be reliable. + + * xz: + + - --threads=+1 or -T+1 is now a way to put xz into multi-threaded + mode while using only one worker thread. + + - In --lzma2=nice=NUMBER allow 2 and 3 with all match finders + now that liblzma handles it. + + * Updated translations: Chinese (simplified), Korean, and Turkish. + + +5.3.4alpha (2022-11-15) + + * All fixes from 5.2.7 and 5.2.8. + + * liblzma: + + - Minor improvements to the threaded decoder. + + - Added CRC64 implementation that uses SSSE3, SSE4.1, and CLMUL + instructions on 32/64-bit x86 and E2K. On 32-bit x86 it's + not enabled unless --disable-assembler is used but then + the non-CLMUL code might be slower. Processor support is + detected at runtime so this is built by default on x86-64 + and E2K. On these platforms, if compiler flags indicate + unconditional CLMUL support (-msse4.1 -mpclmul) then the + generic version is not built, making liblzma 8-9 KiB smaller + compared to having both versions included. + + With extremely compressible files this can make decompression + up to twice as fast but with typical files 5 % improvement + is a more realistic expectation. + + The CLMUL version is slower than the generic version with + tiny inputs (especially at 1-8 bytes per call, but up to + 16 bytes). In normal use in xz this doesn't matter at all. + + - Added an experimental ARM64 filter. This is *not* the final + version! Files created with this experimental version won't + be supported in the future versions! The filter design is + a compromise where improving one use case makes some other + cases worse. + + - Added decompression support for the .lz (lzip) file format + version 0 and the original unextended version 1. See the + API docs of lzma_lzip_decoder() for details. Also + lzma_auto_decoder() supports .lz files. + + - Building with --disable-threads --enable-small + is now thread-safe if the compiler supports + __attribute__((__constructor__)) + + * xz: + + - Added support for OpenBSD's pledge(2) as a sandboxing method. + + - Don't mention endianness for ARM and ARM-Thumb filters in + --long-help. The filters only work for little endian + instruction encoding but modern ARM processors using + big endian data access still use little endian + instruction encoding. So the help text was misleading. + In contrast, the PowerPC filter is only for big endian + 32/64-bit PowerPC code. Little endian PowerPC would need + a separate filter. + + - Added --experimental-arm64. This will be renamed once the + filter is finished. Files created with this experimental + filter will not be supported in the future! + + - Added new fields to the output of xz --robot --info-memory. + + - Added decompression support for the .lz (lzip) file format + version 0 and the original unextended version 1. It is + autodetected by default. See also the option --format on + the xz man page. + + * Scripts now support the .lz format using xz. + + * Build systems: + + - New #defines in config.h: HAVE_ENCODER_ARM64, + HAVE_DECODER_ARM64, HAVE_LZIP_DECODER, HAVE_CPUID_H, + HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR, HAVE_USABLE_CLMUL + + - New configure options: --disable-clmul-crc, + --disable-microlzma, --disable-lzip-decoder, and + 'pledge' is now an option in --enable-sandbox (but + it's autodetected by default anyway). + + - INSTALL was updated to document the new configure options. + + - PACKAGERS now lists also --disable-microlzma and + --disable-lzip-decoder as configure options that must + not be used in builds for non-embedded use. + + * Tests: + + - Fix some of the tests so that they skip instead of fail if + certain features have been disabled with configure options. + It's still not perfect. + + - Other improvements to tests. + + * Updated translations: Croatian, Finnish, Hungarian, Polish, + Romanian, Spanish, Swedish, and Ukrainian. + + +5.3.3alpha (2022-08-22) + + * All fixes from 5.2.6. + + * liblzma: + + - Fixed 32-bit build. + + - Added threaded .xz decompressor lzma_stream_decoder_mt(). + It can use multiple threads with .xz files that have multiple + Blocks with size information in Block Headers. The threaded + encoder in xz has always created such files. + + Single-threaded encoder cannot store the size information in + Block Headers even if one used LZMA_FULL_FLUSH to create + multiple Blocks, so this threaded decoder cannot use multiple + threads with such files. + + If there are multiple Streams (concatenated .xz files), one + Stream will be decompressed completely before starting the + next Stream. + + - A new decoder flag LZMA_FAIL_FAST was added. It makes the + threaded decompressor report errors soon instead of first + flushing all pending data before the error location. + + * xz: + + - Using -T0 (--threads=0) will now use multi-threaded encoder + even on a single-core system. This is to ensure that output + from the same xz binary is identical on both single-core and + multi-core systems. + + - A default soft memory usage limit is now used for compression + when -T0 is used and no explicit limit has been specified. + This soft limit is used to restrict the number of threads + but if the limit is exceeded with even one thread then xz + will continue with one thread using the multi-threaded + encoder and this limit is ignored. If the number of threads + is specified manually then no default limit will be used; + this affects only -T0. + + This change helps on systems that have very many cores and + using all of them for xz makes no sense. Previously xz -T0 + could run out of memory on such systems because it attempted + to reserve memory for too many threads. + + This also helps with 32-bit builds which don't have a large + amount of address space that would be required for many + threads. The default limit is 1400 MiB on all 32-bit + platforms with -T0. + + Now xz -T0 should just work. It might use too few threads + in some cases but at least it shouldn't easily run out of + memory. It's possible that this will be tweaked before 5.4.0. + + - Changes to --memlimit-compress and --no-adjust: + + In single-threaded mode, --memlimit-compress can make xz + scale down the LZMA2 dictionary size to meet the memory usage + limit. This obviously affects the compressed output. However, + if xz was in threaded mode, --memlimit-compress could make xz + reduce the number of threads but it wouldn't make xz switch + from multi-threaded mode to single-threaded mode or scale + down the LZMA2 dictionary size. This seemed illogical. + + Now --memlimit-compress can make xz switch to single-threaded + mode if one thread in multi-threaded mode uses too much + memory. If memory usage is still too high, then the LZMA2 + dictionary size can be scaled down too. + + The option --no-adjust was also changed so that it no longer + prevents xz from scaling down the number of threads as that + doesn't affect compressed output (only performance). After + this commit --no-adjust only prevents adjustments that affect + compressed output, that is, with --no-adjust xz won't switch + from multithreaded mode to single-threaded mode and won't + scale down the LZMA2 dictionary size. + + - Added a new option --memlimit-mt-decompress=LIMIT. This is + used to limit the number of decompressor threads (possibly + falling back to single-threaded mode) but it will never make + xz refuse to decompress a file. This has a system-specific + default value because without any limit xz could end up + allocating memory for the whole compressed input file, the + whole uncompressed output file, multiple thread-specific + decompressor instances and so on. Basically xz could + attempt to use an insane amount of memory even with fairly + common files. + + The new option works together with the existing option + --memlimit-decompress=LIMIT. The old option sets a hard limit + that must not be exceeded (xz will refuse to decompress) + while the new option only restricts the number of threads. + If the limit set with --memlimit-mt-decompress is greater + than the limit set with --memlimit-compress, then the latter + value is used also for --memlimit-mt-decompress. + + * Tests: + + - Added a few more tests. + + - Added tests/code_coverage.sh to create a code coverage report + of the tests. + + * Build systems: + + - Automake's parallel test harness is now used to make tests + finish faster. + + - Added the CMake files to the distribution tarball. These were + supposed to be in 5.2.5 already. + + - Added liblzma tests to the CMake build. + + - Windows: Fix building of liblzma.dll with the included + Visual Studio project files. + + +5.3.2alpha (2021-10-28) + + This release was made on short notice so that recent erofs-utils can + be built with LZMA support without needing a snapshot from xz.git. + Thus many pending things were not included, not even updated + translations (which would need to be updated for the new --list + strings anyway). + + * All fixes from 5.2.5. + + * xz: + + - When copying metadata from the source file to the destination + file, don't try to set the group (GID) if it is already set + correctly. This avoids a failure on OpenBSD (and possibly on + a few other OSes) where files may get created so that their + group doesn't belong to the user, and fchown(2) can fail even + if it needs to do nothing. + + - The --keep option now accepts symlinks, hardlinks, and + setuid, setgid, and sticky files. Previously this required + using --force. + + - Split the long strings used in --list and --info-memory modes + to make them much easier for translators. + + - If built with sandbox support and enabling the sandbox fails, + xz will now immediately exit with exit status of 1. Previously + it would only display a warning if -vv was used. + + - Cap --memlimit-compress to 2000 MiB on MIPS32 because on + MIPS32 userspace processes are limited to 2 GiB of address + space. + + * liblzma: + + - Added lzma_microlzma_encoder() and lzma_microlzma_decoder(). + The API is in lzma/container.h. + + The MicroLZMA format is a raw LZMA stream (without end marker) + whose first byte (always 0x00) has been replaced with + bitwise-negation of the LZMA properties (lc/lp/pb). It was + created for use in EROFS but may be used in other contexts + as well where it is important to avoid wasting bytes for + stream headers or footers. The format is also supported by + XZ Embedded. + + The MicroLZMA encoder API in liblzma can compress into a + fixed-sized output buffer so that as much data is compressed + as can be fit into the buffer while still creating a valid + MicroLZMA stream. This is needed for EROFS. + + - Added fuzzing support. + + - Support Intel Control-flow Enforcement Technology (CET) in + 32-bit x86 assembly files. + + - Visual Studio: Use non-standard _MSVC_LANG to detect C++ + standard version in the lzma.h API header. It's used to + detect when "noexcept" can be used. + + * Scripts: + + - Fix exit status of xzdiff/xzcmp. Exit status could be 2 when + the correct value is 1. + + - Fix exit status of xzgrep. + + - Detect corrupt .bz2 files in xzgrep. + + - Add zstd support to xzgrep and xzdiff/xzcmp. + + - Fix less(1) version detection in xzless. It failed if the + version number from "less -V" contained a dot. + + * Fix typos and technical issues in man pages. + + * Build systems: + + - Windows: Fix building of resource files when config.h isn't + used. CMake + Visual Studio can now build liblzma.dll. + + - Various fixes to the CMake support. It might still need a few + more fixes even for liblzma-only builds. + + +5.3.1alpha (2018-04-29) + + * All fixes from 5.2.4. + + * Add lzma_file_info_decoder() into liblzma and use it in xz to + implement the --list feature. + + * Capsicum sandbox support is enabled by default where available + (FreeBSD >= 10). + + +5.2.13 (2024-05-29) + + * liblzma: + + - lzma_index_append(): Fix an assertion failure that could be + triggered by a large unpadded_size argument. It was verified + that there was no other bug than the assertion failure. + + - lzma_index_decoder() and lzma_index_buffer_decode(): Fix + a missing output pointer initialization (*i = NULL) if the + functions are called with invalid arguments. The API docs + say that such an initialization is always done. In practice + this matters very little because the problem can only occur + if the calling application has a bug and these functions + return LZMA_PROG_ERROR. + + - Fix C standard conformance with function pointer types. + This newly showed up with Clang 17 with -fsanitize=undefined. + There are no bug reports about this. + + - Fix building with NVIDIA HPC SDK. + + - Fix building with Windows Vista threads and --enable-small. + (CMake build doesn't support ENABLE_SMALL in XZ Utils 5.2.x.) + + * xz: + + - Fix a C standard conformance issue in --block-list parsing + (arithmetic on a null pointer). + + - Fix a warning from GNU groff when processing the man page: + "warning: cannot select font 'CW'" + + - Windows: Handle special files such as "con" or "nul". Earlier + the following wrote "foo" to the console and deleted the input + file "con_xz": + + echo foo | xz > con_xz + xz --suffix=_xz --decompress con_xz + + - Windows: Fix an issue that prevented reading from or writing + to non-terminal character devices like NUL. + + * xzless: + + - With "less" version 451 and later, use "||-" instead of "|-" + in the environment variable LESSOPEN. This way compressed + files that contain no uncompressed data are shown correctly + as empty. + + - With "less" version 632 and later, use --show-preproc-errors + to make "less" show a warning on decompression errors. + + * Build systems: + + - Add a new line to liblzma.pc for MSYS2 (Windows): + + Cflags.private: -DLZMA_API_STATIC + + When compiling code that will link against static liblzma, + the LZMA_API_STATIC macro needs to be defined on Windows. + + - Autotools (configure): + + * Symbol versioning variant can now be overridden with + --enable-symbol-versions. Documentation in INSTALL was + updated to match. + + - CMake: + + * Fix a bug that prevented other projects from including + liblzma multiple times using find_package(). + + * Fix a bug where configuring CMake multiple times resulted + in HAVE_CLOCK_GETTIME and HAVE_CLOCK_MONOTONIC not being + defined. + + * Fix the build with MinGW-w64-based Clang/LLVM 17. + llvm-windres now has more accurate GNU windres emulation + so the GNU windres workaround from 5.4.1 is needed with + llvm-windres version 17 too. + + * The import library on Windows is now properly named + "liblzma.dll.a" instead of "libliblzma.dll.a" + + * Add large file support by default for platforms that + need it to handle files larger than 2 GiB. This includes + MinGW-w64, even 64-bit builds. + + * Linux on MicroBlaze is handled specially now. This + matches the changes made to the Autotools-based build + in XZ Utils 5.4.2 and 5.2.11. + + * Disable symbol versioning on non-glibc Linux to match + what the Autotools build does. For example, symbol + versioning isn't enabled with musl. + + * Symbol versioning variant can now be overridden by + setting SYMBOL_VERSIONING to "OFF", "generic", or + "linux". + + * Documentation: + + - Clarify the description of --disable-assembler in INSTALL. + The option only affects 32-bit x86 assembly usage. + + - Don't install the TODO file as part of the documentation. + The file is out of date. + + - Update home page URLs back to their old locations on + tukaani.org. + + - Update maintainer info. + + +5.2.12 (2023-05-04) + + * Fixed a build system bug that prevented building liblzma as a + shared library when configured with --disable-threads. This bug + affected releases 5.2.6 to 5.2.11 and 5.4.0 to 5.4.2. + + * Include for Windows intrinsic functions where they are + needed. This fixed a bug that prevented building liblzma using + clang-cl on Windows. + + * Minor update to the Croatian translation. The small change + applies to a string in both 5.2 and 5.4 branches. + + +5.2.11 (2023-03-18) + + * Removed all possible cases of null pointer + 0. It is undefined + behavior in C99 and C17. This was detected by a sanitizer and had + not caused any known issues. + + * Build systems: + + - Added a workaround for building with GCC on MicroBlaze Linux. + GCC 12 on MicroBlaze doesn't support the __symver__ attribute + even though __has_attribute(__symver__) returns true. The + build is now done without the extra RHEL/CentOS 7 symbols + that were added in XZ Utils 5.2.7. The workaround only + applies to the Autotools build (not CMake). + + - CMake: Ensure that the C compiler language is set to C99 or + a newer standard. + + - CMake changes from XZ Utils 5.4.1: + + * Added a workaround for a build failure with + windres from GNU binutils. + + * Included the Windows resource files in the xz + and xzdec build rules. + + +5.2.10 (2022-12-13) + + * xz: Don't modify argv[] when parsing the --memlimit* and + --block-list command line options. This fixes confusing + arguments in process listing (like "ps auxf"). + + * GNU/Linux only: Use __has_attribute(__symver__) to detect if + that attribute is supported. This fixes build on Mandriva where + Clang is patched to define __GNUC__ to 11 by default (instead + of 4 as used by Clang upstream). + + +5.2.9 (2022-11-30) + + * liblzma: + + - Fixed an infinite loop in LZMA encoder initialization + if dict_size >= 2 GiB. (The encoder only supports up + to 1536 MiB.) + + - Fixed two cases of invalid free() that can happen if + a tiny allocation fails in encoder re-initialization + or in lzma_filters_update(). These bugs had some + similarities with the bug fixed in 5.2.7. + + - Fixed lzma_block_encoder() not allowing the use of + LZMA_SYNC_FLUSH with lzma_code() even though it was + documented to be supported. The sync-flush code in + the Block encoder was already used internally via + lzma_stream_encoder(), so this was just a missing flag + in the lzma_block_encoder() API function. + + - GNU/Linux only: Don't put symbol versions into static + liblzma as it breaks things in some cases (and even if + it didn't break anything, symbol versions in static + libraries are useless anyway). The downside of the fix + is that if the configure options --with-pic or --without-pic + are used then it's not possible to build both shared and + static liblzma at the same time on GNU/Linux anymore; + with those options --disable-static or --disable-shared + must be used too. + + * New email address for bug reports is which + forwards messages to Lasse Collin and Jia Tan. + + +5.2.8 (2022-11-13) + + * xz: + + - If xz cannot remove an input file when it should, this + is now treated as a warning (exit status 2) instead of + an error (exit status 1). This matches GNU gzip and it + is more logical as at that point the output file has + already been successfully closed. + + - Fix handling of .xz files with an unsupported check type. + Previously such printed a warning message but then xz + behaved as if an error had occurred (didn't decompress, + exit status 1). Now a warning is printed, decompression + is done anyway, and exit status is 2. This used to work + slightly before 5.0.0. In practice this bug matters only + if xz has been built with some check types disabled. As + instructed in PACKAGERS, such builds should be done in + special situations only. + + - Fix "xz -dc --single-stream tests/files/good-0-empty.xz" + which failed with "Internal error (bug)". That is, + --single-stream was broken if the first .xz stream in + the input file didn't contain any uncompressed data. + + - Fix displaying file sizes in the progress indicator when + working in passthru mode and there are multiple input files. + Just like "gzip -cdf", "xz -cdf" works like "cat" when the + input file isn't a supported compressed file format. In + this case the file size counters weren't reset between + files so with multiple input files the progress indicator + displayed an incorrect (too large) value. + + * liblzma: + + - API docs in lzma/container.h: + * Update the list of decoder flags in the decoder + function docs. + * Explain LZMA_CONCATENATED behavior with .lzma files + in lzma_auto_decoder() docs. + + - OpenBSD: Use HW_NCPUONLINE to detect the number of + available hardware threads in lzma_physmem(). + + - Fix use of wrong macro to detect x86 SSE2 support. + __SSE2_MATH__ was used with GCC/Clang but the correct + one is __SSE2__. The first one means that SSE2 is used + for floating point math which is irrelevant here. + The affected SSE2 code isn't used on x86-64 so this affects + only 32-bit x86 builds that use -msse2 without -mfpmath=sse + (there is no runtime detection for SSE2). It improves LZMA + compression speed (not decompression). + + - Fix the build with Intel C compiler 2021 (ICC, not ICX) + on Linux. It defines __GNUC__ to 10 but doesn't support + the __symver__ attribute introduced in GCC 10. + + * Scripts: Ignore warnings from xz by using --quiet --no-warn. + This is needed if the input .xz files use an unsupported + check type. + + * Translations: + + - Updated Croatian and Turkish translations. + + - One new translations wasn't included because it needed + technical fixes. It will be in upcoming 5.4.0. No new + translations will be added to the 5.2.x branch anymore. + + - Renamed the French man page translation file from + fr_FR.po to fr.po and thus also its install directory + (like /usr/share/man/fr_FR -> .../fr). + + - Man page translations for upcoming 5.4.0 are now handled + in the Translation Project. + + * Update doc/faq.txt a little so it's less out-of-date. + + +5.2.7 (2022-09-30) + + * liblzma: + + - Made lzma_filters_copy() to never modify the destination + array if an error occurs. lzma_stream_encoder() and + lzma_stream_encoder_mt() already assumed this. Before this + change, if a tiny memory allocation in lzma_filters_copy() + failed it would lead to a crash (invalid free() or invalid + memory reads) in the cleanup paths of these two encoder + initialization functions. + + - Added missing integer overflow check to lzma_index_append(). + This affects xz --list and other applications that decode + the Index field from .xz files using lzma_index_decoder(). + Normal decompression of .xz files doesn't call this code + and thus most applications using liblzma aren't affected + by this bug. + + - Single-threaded .xz decoder (lzma_stream_decoder()): If + lzma_code() returns LZMA_MEMLIMIT_ERROR it is now possible + to use lzma_memlimit_set() to increase the limit and continue + decoding. This was supposed to work from the beginning + but there was a bug. With other decoders (.lzma or + threaded .xz decoder) this already worked correctly. + + - Fixed accumulation of integrity check type statistics in + lzma_index_cat(). This bug made lzma_index_checks() return + only the type of the integrity check of the last Stream + when multiple lzma_indexes were concatenated. Most + applications don't use these APIs but in xz it made + xz --list not list all check types from concatenated .xz + files. In xz --list --verbose only the per-file "Check:" + lines were affected and in xz --robot --list only the "file" + line was affected. + + - Added ABI compatibility with executables that were linked + against liblzma in RHEL/CentOS 7 or other liblzma builds + that had copied the problematic patch from RHEL/CentOS 7 + (xz-5.2.2-compat-libs.patch). For the details, see the + comment at the top of src/liblzma/validate_map.sh. + + WARNING: This uses __symver__ attribute with GCC >= 10. + In other cases the traditional __asm__(".symver ...") + is used. Using link-time optimization (LTO, -flto) with + GCC versions older than 10 can silently result in + broken liblzma.so.5 (incorrect symbol versions)! If you + want to use -flto with GCC, you must use GCC >= 10. + LTO with Clang seems to work even with the traditional + __asm__(".symver ...") method. + + * xzgrep: Fixed compatibility with old shells that break if + comments inside command substitutions have apostrophes ('). + This problem was introduced in 5.2.6. + + * Build systems: + + - New #define in config.h: HAVE_SYMBOL_VERSIONS_LINUX + + - Windows: Fixed liblzma.dll build with Visual Studio project + files. It broke in 5.2.6 due to a change that was made to + improve CMake support. + + - Windows: Building liblzma with UNICODE defined should now + work. + + - CMake files are now actually included in the release tarball. + They should have been in 5.2.5 already. + + - Minor CMake fixes and improvements. + + * Added a new translation: Turkish + + +5.2.6 (2022-08-12) + + * xz: + + - The --keep option now accepts symlinks, hardlinks, and + setuid, setgid, and sticky files. Previously this required + using --force. + + - When copying metadata from the source file to the destination + file, don't try to set the group (GID) if it is already set + correctly. This avoids a failure on OpenBSD (and possibly on + a few other OSes) where files may get created so that their + group doesn't belong to the user, and fchown(2) can fail even + if it needs to do nothing. + + - Cap --memlimit-compress to 2000 MiB instead of 4020 MiB on + MIPS32 because on MIPS32 userspace processes are limited + to 2 GiB of address space. + + * liblzma: + + - Fixed a missing error-check in the threaded encoder. If a + small memory allocation fails, a .xz file with an invalid + Index field would be created. Decompressing such a file would + produce the correct output but result in an error at the end. + Thus this is a "mild" data corruption bug. Note that while + a failed memory allocation can trigger the bug, it cannot + cause invalid memory access. + + - The decoder for .lzma files now supports files that have + uncompressed size stored in the header and still use the + end of payload marker (end of stream marker) at the end + of the LZMA stream. Such files are rare but, according to + the documentation in LZMA SDK, they are valid. + doc/lzma-file-format.txt was updated too. + + - Improved 32-bit x86 assembly files: + * Support Intel Control-flow Enforcement Technology (CET) + * Use non-executable stack on FreeBSD. + + - Visual Studio: Use non-standard _MSVC_LANG to detect C++ + standard version in the lzma.h API header. It's used to + detect when "noexcept" can be used. + + * xzgrep: + + - Fixed arbitrary command injection via a malicious filename + (CVE-2022-1271, ZDI-CAN-16587). A standalone patch for + this was released to the public on 2022-04-07. A slight + robustness improvement has been made since then and, if + using GNU or *BSD grep, a new faster method is now used + that doesn't use the old sed-based construct at all. This + also fixes bad output with GNU grep >= 3.5 (2020-09-27) + when xzgrepping binary files. + + This vulnerability was discovered by: + cleemy desu wayo working with Trend Micro Zero Day Initiative + + - Fixed detection of corrupt .bz2 files. + + - Improved error handling to fix exit status in some situations + and to fix handling of signals: in some situations a signal + didn't make xzgrep exit when it clearly should have. It's + possible that the signal handling still isn't quite perfect + but hopefully it's good enough. + + - Documented exit statuses on the man page. + + - xzegrep and xzfgrep now use "grep -E" and "grep -F" instead + of the deprecated egrep and fgrep commands. + + - Fixed parsing of the options -E, -F, -G, -P, and -X. The + problem occurred when multiple options were specified in + a single argument, for example, + + echo foo | xzgrep -Fe foo + + treated foo as a filename because -Fe wasn't correctly + split into -F -e. + + - Added zstd support. + + * xzdiff/xzcmp: + + - Fixed wrong exit status. Exit status could be 2 when the + correct value is 1. + + - Documented on the man page that exit status of 2 is used + for decompression errors. + + - Added zstd support. + + * xzless: + + - Fix less(1) version detection. It failed if the version number + from "less -V" contained a dot. + + * Translations: + + - Added new translations: Catalan, Croatian, Esperanto, + Korean, Portuguese, Romanian, Serbian, Spanish, Swedish, + and Ukrainian + + - Updated the Brazilian Portuguese translation. + + - Added French man page translation. This and the existing + German translation aren't complete anymore because the + English man pages got a few updates and the translators + weren't reached so that they could update their work. + + * Build systems: + + - Windows: Fix building of resource files when config.h isn't + used. CMake + Visual Studio can now build liblzma.dll. + + - Various fixes to the CMake support. Building static or shared + liblzma should work fine in most cases. In contrast, building + the command line tools with CMake is still clearly incomplete + and experimental and should be used for testing only. + + +5.2.5 (2020-03-17) + + * liblzma: + + - Fixed several C99/C11 conformance bugs. Now the code is clean + under gcc/clang -fsanitize=undefined. Some of these changes + might have a negative effect on performance with old GCC + versions or compilers other than GCC and Clang. The configure + option --enable-unsafe-type-punning can be used to (mostly) + restore the old behavior but it shouldn't normally be used. + + - Improved API documentation of lzma_properties_decode(). + + - Added a very minor encoder speed optimization. + + * xz: + + - Fixed a crash in "xz -dcfv not_an_xz_file". All four options + were required to trigger it. The crash occurred in the + progress indicator code when xz was in passthru mode where + xz works like "cat". + + - Fixed an integer overflow with 32-bit off_t. It could happen + when decompressing a file that has a long run of zero bytes + which xz would try to write as a sparse file. Since the build + system enables large file support by default, off_t is + normally 64-bit even on 32-bit systems. + + - Fixes for --flush-timeout: + * Fix semi-busy-waiting. + * Avoid unneeded flushes when no new input has arrived + since the previous flush was completed. + + - Added a special case for 32-bit xz: If --memlimit-compress is + used to specify a limit that exceeds 4020 MiB, the limit will + be set to 4020 MiB. The values "0" and "max" aren't affected + by this and neither is decompression. This hack can be + helpful when a 32-bit xz has access to 4 GiB address space + but the specified memlimit exceeds 4 GiB. This can happen + e.g. with some scripts. + + - Capsicum sandbox is now enabled by default where available + (FreeBSD >= 10). The sandbox debug messages (xz -vv) were + removed since they seemed to be more annoying than useful. + + - DOS build now requires DJGPP 2.05 instead of 2.04beta. + A workaround for a locale problem with DJGPP 2.05 was added. + + * xzgrep and other scripts: + + - Added a configure option --enable-path-for-scripts=PREFIX. + It is disabled by default except on Solaris where the default + is /usr/xpg4/bin. See INSTALL for details. + + - Added a workaround for a POSIX shell detection problem on + Solaris. + + * Build systems: + + - Added preliminary build instructions for z/OS. See INSTALL + section 1.2.9. + + - Experimental CMake support was added. It should work to build + static liblzma on a few operating systems. It may or may not + work to build shared liblzma. On some platforms it can build + xz and xzdec too but those are only for testing. See the + comment in the beginning of CMakeLists.txt for details. + + - Visual Studio project files were updated. + WindowsTargetPlatformVersion was removed from VS2017 files + and set to "10.0" in the added VS2019 files. In the future + the VS project files will be removed when CMake support is + good enough. + + - New #defines in config.h: HAVE___BUILTIN_ASSUME_ALIGNED, + HAVE___BUILTIN_BSWAPXX, and TUKLIB_USE_UNSAFE_TYPE_PUNNING. + + - autogen.sh has a new optional dependency on po4a and a new + option --no-po4a to skip that step. This matters only if one + wants to remake the build files. po4a is used to update the + translated man pages but as long as the man pages haven't + been modified, there's nothing to update and one can use + --no-po4a to avoid the dependency on po4a. + + * Translations: + + - XZ Utils translations are now handled by the Translation + Project: https://translationproject.org/domain/xz.html + + - All man pages are now included in German too. + + - New xz translations: Brazilian Portuguese, Finnish, + Hungarian, Chinese (simplified), Chinese (traditional), + and Danish (partial translation) + + - Updated xz translations: French, German, Italian, and Polish + + - Unfortunately a few new xz translations weren't included due + to technical problems like too long lines in --help output or + misaligned column headings in tables. In the future, many of + these strings will be split and e.g. the table column + alignment will be handled in software. This should make the + strings easier to translate. + + +5.2.4 (2018-04-29) + + * liblzma: + + - Allow 0 as memory usage limit instead of returning + LZMA_PROG_ERROR. Now 0 is treated as if 1 byte was specified, + which effectively is the same as 0. + + - Use "noexcept" keyword instead of "throw()" in the public + headers when a C++11 (or newer standard) compiler is used. + + - Added a portability fix for recent Intel C Compilers. + + - Microsoft Visual Studio build files have been moved under + windows/vs2013 and windows/vs2017. + + * xz: + + - Fix "xz --list --robot missing_or_bad_file.xz" which would + try to print an uninitialized string and thus produce garbage + output. Since the exit status is non-zero, most uses of such + a command won't try to interpret the garbage output. + + - "xz --list foo.xz" could print "Internal error (bug)" in a + corner case where a specific memory usage limit had been set. + + +5.2.3 (2016-12-30) + + * xz: + + - Always close a file before trying to delete it to avoid + problems on some operating system and file system combinations. + + - Fixed copying of file timestamps on Windows. + + - Added experimental (disabled by default) sandbox support using + Capsicum (FreeBSD >= 10). See --enable-sandbox in INSTALL. + + * C99/C11 conformance fixes to liblzma. The issues affected at least + some builds using link-time optimizations. + + * Fixed bugs in the rarely-used function lzma_index_dup(). + + * Use of external SHA-256 code is now disabled by default. + It can still be enabled by passing --enable-external-sha256 + to configure. The reasons to disable it by default (see INSTALL + for more details): + + - Some OS-specific SHA-256 implementations conflict with + OpenSSL and cause problems in programs that link against both + liblzma and libcrypto. At least FreeBSD 10 and MINIX 3.3.0 + are affected. + + - The internal SHA-256 is faster than the SHA-256 code in + some operating systems. + + * Changed CPU core count detection to use sched_getaffinity() on + GNU/Linux and GNU/kFreeBSD. + + * Fixes to the build-system and xz to make xz buildable even when + encoders, decoders, or threading have been disabled from libilzma + using configure options. These fixes added two new #defines to + config.h: HAVE_ENCODERS and HAVE_DECODERS. + + +5.2.2 (2015-09-29) + + * Fixed bugs in QNX-specific code. + + * Omitted the use of pipe2() even if it is available to avoid + portability issues with some old Linux and glibc combinations. + + * Updated German translation. + + * Added project files to build static and shared liblzma (not the + whole XZ Utils) with Visual Studio 2013 update 2 or later. + + * Documented that threaded decompression hasn't been implemented + yet. A 5.2.0 NEWS entry describing multi-threading support had + incorrectly said "decompression" when it should have said + "compression". + + +5.2.1 (2015-02-26) + + * Fixed a compression-ratio regression in fast mode of LZMA1 and + LZMA2. The bug is present in 5.1.4beta and 5.2.0 releases. + + * Fixed a portability problem in xz that affected at least OpenBSD. + + * Fixed xzdiff to be compatible with FreeBSD's mktemp which differs + from most other mktemp implementations. + + * Changed CPU core count detection to use cpuset_getaffinity() on + FreeBSD. + + +5.2.0 (2014-12-21) + + Since 5.1.4beta: + + * All fixes from 5.0.8 + + * liblzma: Fixed lzma_stream_encoder_mt_memusage() when a preset + was used. + + * xzdiff: If mktemp isn't installed, mkdir will be used as + a fallback to create a temporary directory. Installing mktemp + is still recommended. + + * Updated French, German, Italian, Polish, and Vietnamese + translations. + + Summary of fixes and new features added in the 5.1.x development + releases: + + * liblzma: + + - Added support for multi-threaded compression. See the + lzma_mt structure, lzma_stream_encoder_mt(), and + lzma_stream_encoder_mt_memusage() in , + lzma_get_progress() in , and lzma_cputhreads() + in for details. + + - Made the uses of lzma_allocator const correct. + + - Added lzma_block_uncomp_encode() to create uncompressed + .xz Blocks using LZMA2 uncompressed chunks. + + - Added support for LZMA_IGNORE_CHECK. + + - A few speed optimizations were made. + + - Added support for symbol versioning. It is enabled by default + on GNU/Linux, other GNU-based systems, and FreeBSD. + + - liblzma (not the whole XZ Utils) should now be buildable + with MSVC 2013 update 2 or later using windows/config.h. + + * xz: + + - Fixed a race condition in the signal handling. It was + possible that e.g. the first SIGINT didn't make xz exit + if reading or writing blocked and one had bad luck. The fix + is non-trivial, so as of writing it is unknown if it will be + backported to the v5.0 branch. + + - Multi-threaded compression can be enabled with the + --threads (-T) option. + [Fixed: This originally said "decompression".] + + - New command line options in xz: --single-stream, + --block-size=SIZE, --block-list=SIZES, + --flush-timeout=TIMEOUT, and --ignore-check. + + - xz -lvv now shows the minimum xz version that is required to + decompress the file. Currently it is 5.0.0 for all supported + .xz files except files with empty LZMA2 streams require 5.0.2. + + * xzdiff and xzgrep now support .lzo files if lzop is installed. + The .tzo suffix is also recognized as a shorthand for .tar.lzo. + + +5.1.4beta (2014-09-14) + + * All fixes from 5.0.6 + + * liblzma: Fixed the use of presets in threaded encoder + initialization. + + * xz --block-list and --block-size can now be used together + in single-threaded mode. Previously the combination only + worked in multi-threaded mode. + + * Added support for LZMA_IGNORE_CHECK to liblzma and made it + available in xz as --ignore-check. + + * liblzma speed optimizations: + + - Initialization of a new LZMA1 or LZMA2 encoder has been + optimized. (The speed of reinitializing an already-allocated + encoder isn't affected.) This helps when compressing many + small buffers with lzma_stream_buffer_encode() and other + similar situations where an already-allocated encoder state + isn't reused. This speed-up is visible in xz too if one + compresses many small files one at a time instead running xz + once and giving all files as command-line arguments. + + - Buffer comparisons are now much faster when unaligned access + is allowed (configured with --enable-unaligned-access). This + speeds up encoding significantly. There is arch-specific code + for 32-bit and 64-bit x86 (32-bit needs SSE2 for the best + results and there's no run-time CPU detection for now). + For other archs there is only generic code which probably + isn't as optimal as arch-specific solutions could be. + + - A few speed optimizations were made to the SHA-256 code. + (Note that the builtin SHA-256 code isn't used on all + operating systems.) + + * liblzma can now be built with MSVC 2013 update 2 or later + using windows/config.h. + + * Vietnamese translation was added. + + +5.1.3alpha (2013-10-26) + + * All fixes from 5.0.5 + + * liblzma: + + - Fixed a deadlock in the threaded encoder. + + - Made the uses of lzma_allocator const correct. + + - Added lzma_block_uncomp_encode() to create uncompressed + .xz Blocks using LZMA2 uncompressed chunks. + + - Added support for native threads on Windows and the ability + to detect the number of CPU cores. + + * xz: + + - Fixed a race condition in the signal handling. It was + possible that e.g. the first SIGINT didn't make xz exit + if reading or writing blocked and one had bad luck. The fix + is non-trivial, so as of writing it is unknown if it will be + backported to the v5.0 branch. + + - Made the progress indicator work correctly in threaded mode. + + - Threaded encoder now works together with --block-list=SIZES. + + - Added preliminary support for --flush-timeout=TIMEOUT. + It can be useful for (somewhat) real-time streaming. For + now the decompression side has to be done with something + else than the xz tool due to how xz does buffering, but this + should be fixed. + + +5.1.2alpha (2012-07-04) + + * All fixes from 5.0.3 and 5.0.4 + + * liblzma: + + - Fixed a deadlock and an invalid free() in the threaded encoder. + + - Added support for symbol versioning. It is enabled by default + on GNU/Linux, other GNU-based systems, and FreeBSD. + + - Use SHA-256 implementation from the operating system if one is + available in libc, libmd, or libutil. liblzma won't use e.g. + OpenSSL or libgcrypt to avoid introducing new dependencies. + + - Fixed liblzma.pc for static linking. + + - Fixed a few portability bugs. + + * xz --decompress --single-stream now fixes the input position after + successful decompression. Now the following works: + + echo foo | xz > foo.xz + echo bar | xz >> foo.xz + ( xz -dc --single-stream ; xz -dc --single-stream ) < foo.xz + + Note that it doesn't work if the input is not seekable + or if there is Stream Padding between the concatenated + .xz Streams. + + * xz -lvv now shows the minimum xz version that is required to + decompress the file. Currently it is 5.0.0 for all supported .xz + files except files with empty LZMA2 streams require 5.0.2. + + * Added an *incomplete* implementation of --block-list=SIZES to xz. + It only works correctly in single-threaded mode and when + --block-size isn't used at the same time. --block-list allows + specifying the sizes of Blocks which can be useful e.g. when + creating files for random-access reading. + + +5.1.1alpha (2011-04-12) + + * All fixes from 5.0.2 + + * liblzma fixes that will also be included in 5.0.3: + + - A memory leak was fixed. + + - lzma_stream_buffer_encode() no longer creates an empty .xz + Block if encoding an empty buffer. Such an empty Block with + LZMA2 data would trigger a bug in 5.0.1 and older (see the + first bullet point in 5.0.2 notes). When releasing 5.0.2, + I thought that no encoder creates this kind of files but + I was wrong. + + - Validate function arguments better in a few functions. Most + importantly, specifying an unsupported integrity check to + lzma_stream_buffer_encode() no longer creates a corrupt .xz + file. Probably no application tries to do that, so this + shouldn't be a big problem in practice. + + - Document that lzma_block_buffer_encode(), + lzma_easy_buffer_encode(), lzma_stream_encoder(), and + lzma_stream_buffer_encode() may return LZMA_UNSUPPORTED_CHECK. + + - The return values of the _memusage() functions are now + documented better. + + * Support for multithreaded compression was added using the simplest + method, which splits the input data into blocks and compresses + them independently. Other methods will be added in the future. + The current method has room for improvement, e.g. it is possible + to reduce the memory usage. + + * Added the options --single-stream and --block-size=SIZE to xz. + + * xzdiff and xzgrep now support .lzo files if lzop is installed. + The .tzo suffix is also recognized as a shorthand for .tar.lzo. + + * Support for short 8.3 filenames under DOS was added to xz. It is + experimental and may change before it gets into a stable release. + + +5.0.8 (2014-12-21) + + * Fixed an old bug in xzgrep that affected OpenBSD and probably + a few other operating systems too. + + * Updated French and German translations. + + * Added support for detecting the amount of RAM on AmigaOS/AROS. + + * Minor build system updates. + + +5.0.7 (2014-09-20) + + * Fix regressions introduced in 5.0.6: + + - Fix building with non-GNU make. + + - Fix invalid Libs.private value in liblzma.pc which broke + static linking against liblzma if the linker flags were + taken from pkg-config. + + +5.0.6 (2014-09-14) + + * xzgrep now exits with status 0 if at least one file matched. + + * A few minor portability and build system fixes + + +5.0.5 (2013-06-30) + + * lzmadec and liblzma's lzma_alone_decoder(): Support decompressing + .lzma files that have less common settings in the headers + (dictionary size other than 2^n or 2^n + 2^(n-1), or uncompressed + size greater than 256 GiB). The limitations existed to avoid false + positives when detecting .lzma files. The lc + lp <= 4 limitation + still remains since liblzma's LZMA decoder has that limitation. + + NOTE: xz's .lzma support or liblzma's lzma_auto_decoder() are NOT + affected by this change. They still consider uncommon .lzma headers + as not being in the .lzma format. Changing this would give way too + many false positives. + + * xz: + + - Interaction of preset and custom filter chain options was + made less illogical. This affects only certain less typical + uses cases so few people are expected to notice this change. + + Now when a custom filter chain option (e.g. --lzma2) is + specified, all preset options (-0 ... -9, -e) earlier are on + the command line are completely forgotten. Similarly, when + a preset option is specified, all custom filter chain options + earlier on the command line are completely forgotten. + + Example 1: "xz -9 --lzma2=preset=5 -e" is equivalent to "xz -e" + which is equivalent to "xz -6e". Earlier -e didn't put xz back + into preset mode and thus the example command was equivalent + to "xz --lzma2=preset=5". + + Example 2: "xz -9e --lzma2=preset=5 -7" is equivalent to + "xz -7". Earlier a custom filter chain option didn't make + xz forget the -e option so the example was equivalent to + "xz -7e". + + - Fixes and improvements to error handling. + + - Various fixes to the man page. + + * xzless: Fixed to work with "less" versions 448 and later. + + * xzgrep: Made -h an alias for --no-filename. + + * Include the previously missing debug/translation.bash which can + be useful for translators. + + * Include a build script for Mac OS X. This has been in the Git + repository since 2010 but due to a mistake in Makefile.am the + script hasn't been included in a release tarball before. + + +5.0.4 (2012-06-22) + + * liblzma: + + - Fix lzma_index_init(). It could crash if memory allocation + failed. + + - Fix the possibility of an incorrect LZMA_BUF_ERROR when a BCJ + filter is used and the application only provides exactly as + much output space as is the uncompressed size of the file. + + - Fix a bug in doc/examples_old/xz_pipe_decompress.c. It didn't + check if the last call to lzma_code() really returned + LZMA_STREAM_END, which made the program think that truncated + files are valid. + + - New example programs in doc/examples (old programs are now in + doc/examples_old). These have more comments and more detailed + error handling. + + * Fix "xz -lvv foo.xz". It could crash on some corrupted files. + + * Fix output of "xz --robot -lv" and "xz --robot -lvv" which + incorrectly printed the filename also in the "foo (x/x)" format. + + * Fix exit status of "xzdiff foo.xz bar.xz". + + * Fix exit status of "xzgrep foo binary_file". + + * Fix portability to EBCDIC systems. + + * Fix a configure issue on AIX with the XL C compiler. See INSTALL + for details. + + * Update French, German, Italian, and Polish translations. + + +5.0.3 (2011-05-21) + + * liblzma fixes: + + - A memory leak was fixed. + + - lzma_stream_buffer_encode() no longer creates an empty .xz + Block if encoding an empty buffer. Such an empty Block with + LZMA2 data would trigger a bug in 5.0.1 and older (see the + first bullet point in 5.0.2 notes). When releasing 5.0.2, + I thought that no encoder creates this kind of files but + I was wrong. + + - Validate function arguments better in a few functions. Most + importantly, specifying an unsupported integrity check to + lzma_stream_buffer_encode() no longer creates a corrupt .xz + file. Probably no application tries to do that, so this + shouldn't be a big problem in practice. + + - Document that lzma_block_buffer_encode(), + lzma_easy_buffer_encode(), lzma_stream_encoder(), and + lzma_stream_buffer_encode() may return LZMA_UNSUPPORTED_CHECK. + + - The return values of the _memusage() functions are now + documented better. + + * Fix command name detection in xzgrep. xzegrep and xzfgrep now + correctly use egrep and fgrep instead of grep. + + * French translation was added. + + +5.0.2 (2011-04-01) + + * LZMA2 decompressor now correctly accepts LZMA2 streams with no + uncompressed data. Previously it considered them corrupt. The + bug can affect applications that use raw LZMA2 streams. It is + very unlikely to affect .xz files because no compressor creates + .xz files with empty LZMA2 streams. (Empty .xz files are a + different thing than empty LZMA2 streams.) + + * "xz --suffix=.foo filename.foo" now refuses to compress the + file due to it already having the suffix .foo. It was already + documented on the man page, but the code lacked the test. + + * "xzgrep -l foo bar.xz" works now. + + * Polish translation was added. + + +5.0.1 (2011-01-29) + + * xz --force now (de)compresses files that have setuid, setgid, + or sticky bit set and files that have multiple hard links. + The man page had it documented this way already, but the code + had a bug. + + * gzip and bzip2 support in xzdiff was fixed. + + * Portability fixes + + * Minor fix to Czech translation + + +5.0.0 (2010-10-23) + + Only the most important changes compared to 4.999.9beta are listed + here. One change is especially important: + + * The memory usage limit is now disabled by default. Some scripts + written before this change may have used --memory=max on xz command + line or in XZ_OPT. THESE USES OF --memory=max SHOULD BE REMOVED + NOW, because they interfere with user's ability to set the memory + usage limit himself. If user-specified limit causes problems to + your script, blame the user. + + Other significant changes: + + * Added support for XZ_DEFAULTS environment variable. This variable + allows users to set default options for xz, e.g. default memory + usage limit or default compression level. Scripts that use xz + must never set or unset XZ_DEFAULTS. Scripts should use XZ_OPT + instead if they need a way to pass options to xz via an + environment variable. + + * The compression settings associated with the preset levels + -0 ... -9 have been changed. --extreme was changed a little too. + It is now less likely to make compression worse, but with some + files the new --extreme may compress slightly worse than the old + --extreme. + + * If a preset level (-0 ... -9) is specified after a custom filter + chain options have been used (e.g. --lzma2), the custom filter + chain will be forgotten. Earlier the preset options were + completely ignored after custom filter chain options had been + seen. + + * xz will create sparse files when decompressing if the uncompressed + data contains long sequences of binary zeros. This is done even + when writing to standard output that is connected to a regular + file and certain additional conditions are met to make it safe. + + * Support for "xz --list" was added. Combine with --verbose or + --verbose --verbose (-vv) for detailed output. + + * I had hoped that liblzma API would have been stable after + 4.999.9beta, but there have been a couple of changes in the + advanced features, which don't affect most applications: + + - Index handling code was revised. If you were using the old + API, you will get a compiler error (so it's easy to notice). + + - A subtle but important change was made to the Block handling + API. lzma_block.version has to be initialized even for + lzma_block_header_decode(). Code that doesn't do it will work + for now, but might break in the future, which makes this API + change easy to miss. + + * The major soname has been bumped to 5.0.0. liblzma API and ABI + are now stable, so the need to recompile programs linking against + liblzma shouldn't arise soon. + diff --git a/src/native/external/xz/PACKAGERS b/src/native/external/xz/PACKAGERS new file mode 100644 index 00000000000000..b12c4851a44618 --- /dev/null +++ b/src/native/external/xz/PACKAGERS @@ -0,0 +1,245 @@ + +Information to packagers of XZ Utils +==================================== + + 0. Preface + 1. Package naming + 2. Package description + 3. License + 4. configure options + 5. Additional documentation + 6. Extra files + 7. Installing XZ Utils and LZMA Utils in parallel + 8. Example + + +0. Preface +---------- + + This document is meant for people who create and maintain XZ Utils + packages for operating system distributions. The focus is on GNU/Linux + systems, but most things apply to other systems too. + + While the standard "configure && make DESTDIR=$PKG install" should + give a pretty good package, there are some details which packagers + may want to tweak. + + Packagers should also read the INSTALL file. + + +1. Package naming +----------------- + + The preferred name for the XZ Utils package is "xz", because that's + the name of the upstream tarball. Naturally you may have good reasons + to use some other name; I won't get angry about it. ;-) It's just nice + to be able to point people to the correct package name without asking + what distro they have. + + If your distro policy is to split things into small pieces, here is + one suggestion: + + xz xz, xzdec, scripts (xzdiff, xzgrep, etc.), docs + xz-lzma lzma, unlzma, lzcat, lzgrep etc. symlinks and + lzmadec binary for compatibility with LZMA Utils + liblzma liblzma.so.* + liblzma-devel liblzma.so, liblzma.a, API headers + liblzma-doc Example programs and, if enabled at build time, + Doxygen-generated liblzma API docs (HTML) + + +2. Package description +---------------------- + + Here is a suggestion which you may use as the package description. + If you can use only one-line description, pick only the first line. + Naturally, feel free to use some other description if you find it + better, and maybe send it to me too. + + Library and command line tools for XZ and LZMA compressed files + + XZ Utils provide a general purpose data compression library + and command line tools. The native file format is the .xz + format, but also the legacy .lzma format is supported. The .xz + format supports multiple compression algorithms, of which LZMA2 + is currently the primary algorithm. With typical files, XZ Utils + create about 30 % smaller files than gzip. + + If you are splitting XZ Utils into multiple packages, here are some + suggestions for package descriptions: + + xz: + + Command line tools for XZ and LZMA compressed files + + This package includes the xz compression tool and other command + line tools from XZ Utils. xz has command line syntax similar to + that of gzip. The native file format is the .xz format, but also + the legacy .lzma format is supported. The .xz format supports + multiple compression algorithms, of which LZMA2 is currently the + primary algorithm. With typical files, XZ Utils create about 30 % + smaller files than gzip. + + Note that this package doesn't include the files needed for + LZMA Utils 4.32.x compatibility. Install also the xz-lzma + package to make XZ Utils emulate LZMA Utils 4.32.x. + + xz-lzma: + + LZMA Utils emulation with XZ Utils + + This package includes executables and symlinks to make + XZ Utils emulate lzma, unlzma, lzcat, and other command + line tools found from the legacy LZMA Utils 4.32.x package. + + liblzma: + + Library for XZ and LZMA compressed files + + liblzma is a general purpose data compression library with + an API similar to that of zlib. liblzma supports multiple + algorithms, of which LZMA2 is currently the primary algorithm. + The native file format is .xz, but also the legacy .lzma + format and raw streams (no headers at all) are supported. + + This package includes the shared library. + + liblzma-devel: + + Library for XZ and LZMA compressed files + + This package includes the API headers, static library, and + other development files related to liblzma. + + liblzma-doc: + + liblzma API documentation in HTML and example usage + + This package includes the Doxygen-generated liblzma API + HTML docs and example programs showing how to use liblzma. + + +3. License +---------- + + If the package manager supports a license field, you probably should + put GPLv2+ there (GNU GPL v2 or later). The interesting parts of + XZ Utils are under the BSD Zero Clause License (0BSD), but some less + important files ending up into the binary package are under GPLv2+. + So it is simplest to just say GPLv2+ if you cannot specify + "BSD0 and GPLv2+". + + If you split XZ Utils into multiple packages as described earlier + in this file, liblzma and liblzma-dev packages will contain only + 0BSD-licensed code from XZ Utils (compiler or linker may add some + third-party code which may have other licenses). + + +4. configure options +-------------------- + + Unless you are building a package for a distribution that is meant + only for embedded systems, don't use the following configure options: + + --enable-debug + --enable-encoders (*) + --enable-decoders + --enable-match-finders + --enable-checks + --enable-small (*) + --disable-threads (*) + --disable-microlzma (*) + --disable-lzip-decoder (*) + + (*) These are OK when building xzdec and lzmadec as described + in INSTALL. + + xzdec and lzmadec don't provide any functionality that isn't already + available in the xz tool. Shipping xzdec and lzmadec without size + optimization and statically-linked liblzma isn't very useful. Doing + that would give users the xzdec man page, which may make it easier + for people to find out that such tools exists, but the executables + wouldn't have any advantage over the full-featured xz. + + +5. Additional documentation +--------------------------- + + "make install" copies some additional documentation to $docdir + (--docdir in configure). There is a copy of the GNU GPL v2, which + can be replaced with a symlink if your distro ships with shared + copies of the common license texts. + + The Doxygen-generated liblzma API documentation (HTML) is built and + installed if the configure option --enable-doxygen is used (it's + disabled by default). This requires that Doxygen is available. The + API documentation is installed by "make install" to $docdir/api. + + NOTE: The files generated by Doxygen include content from + Doxygen itself. Check the license info before distributing + the Doxygen-generated files. + + +6. Extra files +-------------- + + The "extra" directory contains some small extra tools or other files. + The exact set of extra files can vary between XZ Utils releases. The + extra files have only limited use or they are too dangerous to be + put directly to $bindir (7z2lzma.sh is a good example, since it can + silently create corrupt output if certain conditions are not met). + + If you feel like it, you may copy the extra directory under the doc + directory (e.g. /usr/share/doc/xz/extra). Maybe some people will find + them useful. However, most people needing these tools probably are + able to find them from the source package too. + + The "debug" directory contains some tools that are useful only when + hacking on XZ Utils. Don't package these tools. + + +7. Installing XZ Utils and LZMA Utils in parallel +------------------------------------------------- + + XZ Utils and LZMA Utils 4.32.x can be installed in parallel by + omitting the compatibility symlinks (lzma, unlzma, lzcat, lzgrep etc.) + from the XZ Utils package. It's probably a good idea to still package + the symlinks into a separate package so that users may choose if they + want to use XZ Utils or LZMA Utils for handling .lzma files. + + +8. Example +---------- + + Here is an example for i686 GNU/Linux that + - links xz and lzmainfo against shared liblzma; + - links size-optimized xzdec and lzmadec against static liblzma + while avoiding libpthread dependency; + - includes only shared liblzma in the final package; and + - copies also the "extra" directory to the package. + + PKG=/tmp/xz-pkg + tar xf xz-x.y.z.tar.gz + cd xz-x.y.z + ./configure \ + --prefix=/usr \ + --disable-static \ + --disable-xzdec \ + --disable-lzmadec \ + CFLAGS='-march=i686 -mtune=generic -O2' + make + make DESTDIR=$PKG install-strip + make clean + ./configure \ + --prefix=/usr \ + --disable-shared \ + --disable-nls \ + --disable-encoders \ + --enable-small \ + --disable-threads \ + CFLAGS='-march=i686 -mtune=generic -Os' + make -C src/liblzma + make -C src/xzdec + make -C src/xzdec DESTDIR=$PKG install-strip + cp -a extra $PKG/usr/share/doc/xz + diff --git a/src/native/external/xz/README b/src/native/external/xz/README new file mode 100644 index 00000000000000..41671676a516e3 --- /dev/null +++ b/src/native/external/xz/README @@ -0,0 +1,281 @@ + +XZ Utils +======== + + 0. Overview + 1. Documentation + 1.1. Overall documentation + 1.2. Documentation for command-line tools + 1.3. Documentation for liblzma + 2. Version numbering + 3. Reporting bugs + 4. Translations + 4.1. Testing translations + 5. Other implementations of the .xz format + 6. Contact information + + +0. Overview +----------- + + XZ Utils provide a general-purpose data-compression library plus + command-line tools. The native file format is the .xz format, but + also the legacy .lzma format is supported. The .xz format supports + multiple compression algorithms, which are called "filters" in the + context of XZ Utils. The primary filter is currently LZMA2. With + typical files, XZ Utils create about 30 % smaller files than gzip. + + To ease adapting support for the .xz format into existing applications + and scripts, the API of liblzma is somewhat similar to the API of the + popular zlib library. For the same reason, the command-line tool xz + has a command-line syntax similar to that of gzip. + + When aiming for the highest compression ratio, the LZMA2 encoder uses + a lot of CPU time and may use, depending on the settings, even + hundreds of megabytes of RAM. However, in fast modes, the LZMA2 encoder + competes with bzip2 in compression speed, RAM usage, and compression + ratio. + + LZMA2 is reasonably fast to decompress. It is a little slower than + gzip, but a lot faster than bzip2. Being fast to decompress means + that the .xz format is especially nice when the same file will be + decompressed very many times (usually on different computers), which + is the case e.g. when distributing software packages. In such + situations, it's not too bad if the compression takes some time, + since that needs to be done only once to benefit many people. + + With some file types, combining (or "chaining") LZMA2 with an + additional filter can improve the compression ratio. A filter chain may + contain up to four filters, although usually only one or two are used. + For example, putting a BCJ (Branch/Call/Jump) filter before LZMA2 + in the filter chain can improve compression ratio of executable files. + + Since the .xz format allows adding new filter IDs, it is possible that + some day there will be a filter that is, for example, much faster to + compress than LZMA2 (but probably with worse compression ratio). + Similarly, it is possible that some day there is a filter that will + compress better than LZMA2. + + XZ Utils supports multithreaded compression. XZ Utils doesn't support + multithreaded decompression yet. It has been planned though and taken + into account when designing the .xz file format. In the future, files + that were created in threaded mode can be decompressed in threaded + mode too. + + +1. Documentation +---------------- + +1.1. Overall documentation + + README This file + + INSTALL.generic Generic install instructions for those not + familiar with packages using GNU Autotools + INSTALL Installation instructions specific to XZ Utils + PACKAGERS Information to packagers of XZ Utils + + COPYING XZ Utils copyright and license information + COPYING.0BSD BSD Zero Clause License + COPYING.GPLv2 GNU General Public License version 2 + COPYING.GPLv3 GNU General Public License version 3 + COPYING.LGPLv2.1 GNU Lesser General Public License version 2.1 + + AUTHORS The main authors of XZ Utils + THANKS Incomplete list of people who have helped making + this software + NEWS User-visible changes between XZ Utils releases + ChangeLog Detailed list of changes (commit log) + TODO Known bugs and some sort of to-do list + + Note that only some of the above files are included in binary + packages. + + +1.2. Documentation for command-line tools + + The command-line tools are documented as man pages. In source code + releases (and possibly also in some binary packages), the man pages + are also provided in plain text (ASCII only) format in the directory + "doc/man" to make the man pages more accessible to those whose + operating system doesn't provide an easy way to view man pages. + + +1.3. Documentation for liblzma + + The liblzma API headers include short docs about each function + and data type as Doxygen tags. These docs should be quite OK as + a quick reference. + + There are a few example/tutorial programs that should help in + getting started with liblzma. In the source package the examples + are in "doc/examples" and in binary packages they may be under + "examples" in the same directory as this README. + + Since the liblzma API has similarities to the zlib API, some people + may find it useful to read the zlib docs and tutorial too: + + https://zlib.net/manual.html + https://zlib.net/zlib_how.html + + +2. Version numbering +-------------------- + + The version number format of XZ Utils is X.Y.ZS: + + - X is the major version. When this is incremented, the library + API and ABI break. + + - Y is the minor version. It is incremented when new features + are added without breaking the existing API or ABI. An even Y + indicates a stable release and an odd Y indicates unstable + (alpha or beta version). + + - Z is the revision. This has a different meaning for stable and + unstable releases: + + * Stable: Z is incremented when bugs get fixed without adding + any new features. This is intended to be convenient for + downstream distributors that want bug fixes but don't want + any new features to minimize the risk of introducing new bugs. + + * Unstable: Z is just a counter. API or ABI of features added + in earlier unstable releases having the same X.Y may break. + + - S indicates stability of the release. It is missing from the + stable releases, where Y is an even number. When Y is odd, S + is either "alpha" or "beta" to make it very clear that such + versions are not stable releases. The same X.Y.Z combination is + not used for more than one stability level, i.e. after X.Y.Zalpha, + the next version can be X.Y.(Z+1)beta but not X.Y.Zbeta. + + +3. Reporting bugs +----------------- + + Naturally it is easiest for me if you already know what causes the + unexpected behavior. Even better if you have a patch to propose. + However, quite often the reason for unexpected behavior is unknown, + so here are a few things to do before sending a bug report: + + 1. Try to create a small example how to reproduce the issue. + + 2. Compile XZ Utils with debugging code using configure switches + --enable-debug and, if possible, --disable-shared. If you are + using GCC, use CFLAGS='-O0 -ggdb3'. Don't strip the resulting + binaries. + + 3. Turn on core dumps. The exact command depends on your shell; + for example in GNU bash it is done with "ulimit -c unlimited", + and in tcsh with "limit coredumpsize unlimited". + + 4. Try to reproduce the suspected bug. If you get "assertion failed" + message, be sure to include the complete message in your bug + report. If the application leaves a coredump, get a backtrace + using gdb: + $ gdb /path/to/app-binary # Load the app to the debugger. + (gdb) core core # Open the coredump. + (gdb) bt # Print the backtrace. Copy & paste to bug report. + (gdb) quit # Quit gdb. + + Report your bug via email or IRC (see Contact information below). + Don't send core dump files or any executables. If you have a small + example file(s) (total size less than 256 KiB), please include + it/them as an attachment. If you have bigger test files, put them + online somewhere and include a URL to the file(s) in the bug report. + + Always include the exact version number of XZ Utils in the bug report. + If you are using a snapshot from the git repository, use "git describe" + to get the exact snapshot version. If you are using XZ Utils shipped + in an operating system distribution, mention the distribution name, + distribution version, and exact xz package version; if you cannot + repeat the bug with the code compiled from unpatched source code, + you probably need to report a bug to your distribution's bug tracking + system. + + +4. Translations +--------------- + + The xz command line tool and all man pages can be translated. + The translations are handled via the Translation Project. If you + wish to help translating xz, please join the Translation Project: + + https://translationproject.org/html/translators.html + + Updates to translations won't be accepted by methods that bypass + the Translation Project because there is a risk of duplicate work: + translation updates made in the xz repository aren't seen by the + translators in the Translation Project. If you have found bugs in + a translation, please report them to the Language-Team address + which can be found near the beginning of the PO file. + + If you find language problems in the original English strings, + feel free to suggest improvements. Ask if something is unclear. + + +4.1. Testing translations + + Testing can be done by installing xz into a temporary directory. + + If building from Git repository (not tarball), generate the + Autotools files: + + ./autogen.sh + + Create a subdirectory for the build files. The tmp-build directory + can be deleted after testing. + + mkdir tmp-build + cd tmp-build + ../configure --disable-shared --enable-debug --prefix=$PWD/inst + + Edit the .po file in the po directory. Then build and install to + the "tmp-build/inst" directory, and use translations.bash to see + how some of the messages look. Repeat these steps if needed: + + make -C po update-po + make -j"$(nproc)" install + bash ../debug/translation.bash | less + bash ../debug/translation.bash | less -S # For --list outputs + + To test other languages, set the LANGUAGE environment variable + before running translations.bash. The value should match the PO file + name without the .po suffix. Example: + + export LANGUAGE=fi + + +5. Other implementations of the .xz format +------------------------------------------ + + 7-Zip and the p7zip port of 7-Zip support the .xz format starting + from the version 9.00alpha. + + https://7-zip.org/ + https://p7zip.sourceforge.net/ + + XZ Embedded is a limited implementation written for use in the Linux + kernel, but it is also suitable for other embedded use. + + https://tukaani.org/xz/embedded.html + + XZ for Java is a complete implementation written in pure Java. + + https://tukaani.org/xz/java.html + + +6. Contact information +---------------------- + + XZ Utils in general: + - Home page: https://tukaani.org/xz/ + - Email to maintainer(s): xz@tukaani.org + - IRC: #tukaani on Libera Chat + - GitHub: https://github.com/tukaani-project/xz + + Lead maintainer: + - Email: Lasse Collin + - IRC: Larhzu on Libera Chat + diff --git a/src/native/external/xz/THANKS b/src/native/external/xz/THANKS new file mode 100644 index 00000000000000..e1c21ebfe5d22a --- /dev/null +++ b/src/native/external/xz/THANKS @@ -0,0 +1,257 @@ + +Thanks +====== + +Some people have helped more, some less, but nevertheless everyone's help +has been important. :-) + - Adam Borowski + - Adam Walling + - Adrien Nader + - Agostino Sarubbo + - Alexander Bluhm + - Alexander M. Greenham + - Alexander Neumann + - Alexandre Sauvé + - Alexey Tourbin + - Anders F. Björklund + - Andraž 'ruskie' Levstik + - Andre Noll + - Andreas K. Hüttel + - Andreas Müller + - Andreas Schwab + - Andreas Zieringer + - Andrej Skenderija + - Andres Freund + - Andrew Dudman + - Andrew Murray + - Antoine Cœur + - Anton Kochkov + - Antonio Diaz Diaz + - Arkadiusz Miskiewicz + - Asgeir Storesund Nilsen + - Aziz Chaudhry + - Bela Lubkin + - Ben Boeckel + - Benjamin Buch + - Benno Schulenberg + - Bernhard Reutner-Fischer + - Bert Wesarg + - Bhargava Shastry + - Bill Glessner + - Bjarni Ingi Gislason + - Boud Roukema + - Brad Smith + - Bruce Stark + - Cary Lewis + - Charles Wilson + - Chenxi Mao + - Chien Wong + - Chris Donawa + - Chris McCrohan + - Christian Hesse + - Christian Kujau + - Christian von Roques + - Christian Weisgerber + - Christoph Junghans + - Collin Funk + - Conley Moorhous + - Cristian Rodríguez + - Cristiano Ceglia + - Dan Shechter + - Dan Stromberg + - Dan Weiss + - Daniel Leonard + - Daniel Mealha Cabrita + - Daniel Packard + - Daniel Richard G. + - David Burklund + - Denis Excoffier + - Derwin McGeary + - Dexter Castor Döpping + - Diederik de Haas + - Diego Elio Pettenò + - Dimitri Papadopoulos Orfanos + - Dirk Müller + - Douglas Thor + - Ed Maste + - Elbert Pol + - Eli Schwartz + - Elijah Almeida Coimbra + - Émilie Labbé + - Emmanuel Blot + - Eric Lindblad + - Eric S. Raymond + - Étienne Mollier + - Evan Nemerson + - Fangrui Song + - Felix Collin + - Filip Palian + - Firas Khalil Khana + - François Etcheverry + - Frank Busse + - Frank Prochnow + - Fredrik Wikstrom + - Gabi Davar + - Gabriela Gutierrez + - Gilles Espinasse + - Gregory Margo + - Guillaume Outters + - Guiorgy Potskhishvili + - H. Peter Anvin + - Hajin Jang + - Hans Jansen + - Harri K. Koskinen + - Hin-Tak Leung + - H.J. Lu + - Hongbo Ni + - Igor Pavlov + - İhsan Doğan + - Ilya Kurdyukov + - Iouri Kharon + - İsmail Dönmez + - Ivan A. Melnikov + - Jakub Bogusz + - James Buren + - James M Leddy + - Jan Kratochvil + - Jan Terje Hansen + - Jason Gorski + - Jeff Bastian + - Jeffrey Walton + - Jeroen Roovers + - Jim Meyering + - Jim Wilcoxson + - Joachim Henke + - John Paul Adrian Glaubitz + - Jonathan Nieder + - Jonathan Stott + - Joona Kannisto + - Jouk Jansen + - Juan Manuel Guerrero + - Jukka Salmi + - Julien Marrec + - Jun I Jin + - Kai Pastor + - Karl Beldan + - Karl Berry + - Keith Patton + - Kelvin Lee + - Kevin R. Bulgrien + - Kian-Meng Ang + - Kim Jinyeong + - Kirill A. Korinsky + - Kiyoshi Kanazawa + - Lars Wirzenius + - Li Chenggang + - Lizandro Heredia + - Loganaden Velvindron + - Lorenzo De Liso + - Lukas Braune + - Maarten Bosmans + - Maksym Vatsyk + - Marcin Kowalczyk + - Marcus Comstedt + - Marcus Tillmanns + - Marek Černocký + - Mark Adler + - Mark Wielaard + - Markus Duft + - Markus Rickert + - Martin Blumenstingl + - Martin Matuška + - Martin Storsjö + - Martin Väth + - Mathieu Vachon + - Matthew Good + - Matthieu Rakotojaona + - Melanie Blower + - Michael Felt + - Michael Fox + - Michał Górny + - Mike Frysinger + - Mikko Pouru + - Milo Casagrande + - Mohammed Adnène Trojette + - Nathan Moinvaziri + - Nelson H. F. Beebe + - Nicholas Jackson + - Ole André Vadla Ravnås + - Orange Tsai + - Orgad Shaneh + - Patrick J. Volkerding + - Paul Eggert + - Paul Townsend + - Pavel Raiskup + - Per Øyvind Karlsen + - Peter Ivanov + - Peter Lawler + - Peter O'Gorman + - Peter Pallinger + - Peter Seiderer + - Pierre-Yves Martin + - Pilorz Wojciech + - Pippijn van Steenhoven + - Rafał Mużyło + - Rainer Müller + - Ralf Wildenhues + - Rich Prohaska + - Richard Koch + - Richard W.M. Jones + - Robert Elz + - Robert Readman + - Roel Bouckaert + - Ron Desmond + - Ruarí Ødegaard + - Rui Paulo + - Ryan Colyer + - Ryan Young + - Sam James + - Scott McAllister + - Sean Fenian + - Sebastian Andrzej Siewior + - Sergey Kosukhin + - Simon Josefsson + - Siteshwar Vashisht + - Steffen Nurpmeso + - Stephan Kulow + - Stephen Sachs + - Stuart Shelton + - Taiki Tsunekawa + - Thomas Klausner + - Tobias Lahrmann Hansen + - Tobias Stoeckmann + - Tomasz Gajc + - Tomer Chachamu + - Torsten Rupp + - Trần Ngọc Quân + - Trent W. Buck + - Victoria Alexia + - Vijay Sarvepalli + - Ville Koskinen + - Ville Skyttä + - Vincent Cruz + - Vincent Fazio + - Vincent Lefevre + - Vincent Torri + - Vincent Wixsom + - Vincenzo Innocente + - Vitaly Chikunov + - Wim Lewis + - Xi Ruoyao + - Xin Li + - Yifeng Li + - 榆柳松 (ZhengSen Wang) + +Companies: + - Google + - Sandfly Security + +Other credits: + - cleemy desu wayo working with Trend Micro Zero Day Initiative + - Orange Tsai and splitline from DEVCORE Research Team + +Also thanks to all the people who have participated in the Tukaani project. + +I have probably forgot to add some names to the above list. Sorry about +that and thanks for your help. + diff --git a/src/native/external/xz/TODO b/src/native/external/xz/TODO new file mode 100644 index 00000000000000..7a0bf16ed86eb3 --- /dev/null +++ b/src/native/external/xz/TODO @@ -0,0 +1,88 @@ + +XZ Utils To-Do List +=================== + +Known bugs +---------- + + The test suite is incomplete. + + XZ Utils compress some files significantly worse than LZMA Utils. + This is due to faster compression presets used by XZ Utils, and + can often be worked around by using "xz --extreme". With some files + --extreme isn't enough though: it's most likely with files that + compress extremely well, so going from compression ratio of 0.003 + to 0.004 means big relative increase in the compressed file size. + + tuklib_exit() doesn't block signals => EINTR is possible. + + If liblzma has created threads and fork() gets called, liblzma + code will break in the child process unless it calls exec() and + doesn't touch liblzma. + + +Missing features +---------------- + + Add support for storing metadata in .xz files. A preliminary + idea is to create a new Stream type for metadata. When both + metadata and data are wanted in the same .xz file, two or more + Streams would be concatenated. + + The state stored in lzma_stream should be cloneable, which would + be mostly useful when using a preset dictionary in LZMA2, but + it may have other uses too. Compare to deflateCopy() in zlib. + + Adjust dictionary size when the input file size is known. + Maybe do this only if an option is given. + + xz doesn't support copying extended attributes, access control + lists etc. from source to target file. + + Multithreaded compression: + - Reduce memory usage of the current method. + - Implement threaded match finders. + - Implement pigz-style threading in LZMA2. + + Buffer-to-buffer coding could use less RAM (especially when + decompressing LZMA1 or LZMA2). + + I/O library is not implemented (similar to gzopen() in zlib). + It will be a separate library that supports uncompressed, .gz, + .bz2, .lzma, and .xz files. + + Support changing lzma_options_lzma.mode with lzma_filters_update(). + + Support LZMA_FULL_FLUSH for lzma_stream_decoder() to stop at + Block and Stream boundaries. + + Error codes from lzma_code() aren't very specific. A more detailed + error message (string) could be provided too. It could be returned + by a new function or use a currently-reserved member of lzma_stream. + + Make it possible to adjust LZMA2 options in the middle of a Block + so that the encoding speed vs. compression ratio can be optimized + when the compressed data is streamed over network. + + Improved BCJ filters. The current filters are small but they aren't + so great when compressing binary packages that contain various file + types. Specifically, they make things worse if there are static + libraries or Linux kernel modules. The filtering could also be + more effective (without getting overly complex), for example, + streamable variant BCJ2 from 7-Zip could be implemented. + + Filter that autodetects specific data types in the input stream + and applies appropriate filters for the corrects parts of the input. + Perhaps combine this with the BCJ filter improvement point above. + + Long-range LZ77 method as a separate filter or as a new LZMA2 + match finder. + + +Documentation +------------- + + More tutorial programs are needed for liblzma. + + Document the LZMA1 and LZMA2 algorithms. + diff --git a/src/native/external/xz/autogen.sh b/src/native/external/xz/autogen.sh new file mode 100644 index 00000000000000..231344d4de2c08 --- /dev/null +++ b/src/native/external/xz/autogen.sh @@ -0,0 +1,42 @@ +#!/bin/sh +# SPDX-License-Identifier: 0BSD + +############################################################################### +# +# Author: Lasse Collin +# +############################################################################### + +set -e -x + +# The following six lines are almost identical to "autoreconf -fi" but faster. +${AUTOPOINT:-autopoint} -f +${LIBTOOLIZE:-libtoolize} -c -f || glibtoolize -c -f +${ACLOCAL:-aclocal} -I m4 +${AUTOCONF:-autoconf} +${AUTOHEADER:-autoheader} +${AUTOMAKE:-automake} -acf --foreign + +# Generate the translated man pages if the "po4a" tool is available. +# This is *NOT* done by "autoreconf -fi" or when "make" is run. +# Pass --no-po4a to this script to skip this step. +# It can be useful when you know that po4a isn't available and +# don't want autogen.sh to exit with non-zero exit status. +generate_po4a="y" + +for arg in "$@" +do + case $arg in + "--no-po4a") + generate_po4a="n" + ;; + esac +done + +if test "$generate_po4a" != "n"; then + cd po4a + sh update-po + cd .. +fi + +exit 0 diff --git a/src/native/external/xz/cmake/remove-ordinals.cmake b/src/native/external/xz/cmake/remove-ordinals.cmake new file mode 100644 index 00000000000000..de85ddfd15c52e --- /dev/null +++ b/src/native/external/xz/cmake/remove-ordinals.cmake @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: 0BSD + +############################################################################# +# +# remove-ordinals.cmake +# +# Removes the ordinal numbers from a DEF file that has been created by +# GNU ld or LLVM lld option --output-def (when creating a Windows DLL). +# This should be equivalent: sed 's/ \+@ *[0-9]\+//' +# +# Usage: +# +# cmake -DINPUT_FILE=infile.def.in \ +# -DOUTPUT_FILE=outfile.def \ +# -P remove-ordinals.cmake +# +############################################################################# +# +# Author: Lasse Collin +# +############################################################################# + +file(READ "${INPUT_FILE}" STR) +string(REGEX REPLACE " +@ *[0-9]+" "" STR "${STR}") +file(WRITE "${OUTPUT_FILE}" "${STR}") diff --git a/src/native/external/xz/cmake/tuklib_common.cmake b/src/native/external/xz/cmake/tuklib_common.cmake new file mode 100644 index 00000000000000..9762e245f7bdd6 --- /dev/null +++ b/src/native/external/xz/cmake/tuklib_common.cmake @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: 0BSD + +############################################################################# +# +# tuklib_common.cmake - common functions and macros for tuklib_*.cmake files +# +# Author: Lasse Collin +# +############################################################################# + +function(tuklib_add_definitions TARGET_OR_ALL DEFINITIONS) + # DEFINITIONS may be an empty string/list but it's fine here. There is + # no need to quote ${DEFINITIONS} as empty arguments are fine here. + if(TARGET_OR_ALL STREQUAL "ALL") + add_compile_definitions(${DEFINITIONS}) + else() + target_compile_definitions("${TARGET_OR_ALL}" PRIVATE ${DEFINITIONS}) + endif() +endfunction() + +function(tuklib_add_definition_if TARGET_OR_ALL VAR) + if(${VAR}) + tuklib_add_definitions("${TARGET_OR_ALL}" "${VAR}") + endif() +endfunction() + +# This is an over-simplified version of AC_USE_SYSTEM_EXTENSIONS in Autoconf +# or gl_USE_SYSTEM_EXTENSIONS in gnulib. +# +# NOTE: This is a macro because the changes to CMAKE_REQUIRED_DEFINITIONS +# must be visible in the calling scope. +macro(tuklib_use_system_extensions) + if(NOT MSVC) + add_compile_definitions( + _GNU_SOURCE # glibc, musl, mingw-w64 + _NETBSD_SOURCE # NetBSD, MINIX 3 + _OPENBSD_SOURCE # Also NetBSD! + __EXTENSIONS__ # Solaris + _POSIX_PTHREAD_SEMANTICS # Solaris + _DARWIN_C_SOURCE # macOS + _TANDEM_SOURCE # HP NonStop + _ALL_SOURCE # AIX, z/OS + ) + + list(APPEND CMAKE_REQUIRED_DEFINITIONS + -D_GNU_SOURCE + -D_NETBSD_SOURCE + -D_OPENBSD_SOURCE + -D__EXTENSIONS__ + -D_POSIX_PTHREAD_SEMANTICS + -D_DARWIN_C_SOURCE + -D_TANDEM_SOURCE + -D_ALL_SOURCE + ) + endif() +endmacro() diff --git a/src/native/external/xz/cmake/tuklib_cpucores.cmake b/src/native/external/xz/cmake/tuklib_cpucores.cmake new file mode 100644 index 00000000000000..05f3ceef8d15eb --- /dev/null +++ b/src/native/external/xz/cmake/tuklib_cpucores.cmake @@ -0,0 +1,184 @@ +# SPDX-License-Identifier: 0BSD + +############################################################################# +# +# tuklib_cpucores.cmake - see tuklib_cpucores.m4 for description and comments +# +# Author: Lasse Collin +# +############################################################################# + +include("${CMAKE_CURRENT_LIST_DIR}/tuklib_common.cmake") +include(CMakePushCheckState) +include(CheckCSourceCompiles) +include(CheckIncludeFile) + +function(tuklib_cpucores_internal_check) + if(WIN32 OR CYGWIN) + # Nothing to do, the tuklib_cpucores.c handles it. + set(TUKLIB_CPUCORES_DEFINITIONS "" CACHE INTERNAL "") + return() + endif() + + # glibc-based systems (GNU/Linux and GNU/kFreeBSD) have + # sched_getaffinity(). The CPU_COUNT() macro was added in glibc 2.9. + # glibc 2.9 is old enough that if someone uses the code on older glibc, + # the fallback to sysconf() should be good enough. + # + # NOTE: This required that _GNU_SOURCE is defined. We assume that whatever + # feature test macros the caller wants to use are already set in + # CMAKE_REQUIRED_DEFINES and in the target defines. + check_c_source_compiles(" + #include + int main(void) + { + cpu_set_t cpu_mask; + sched_getaffinity(0, sizeof(cpu_mask), &cpu_mask); + return CPU_COUNT(&cpu_mask); + } + " + TUKLIB_CPUCORES_SCHED_GETAFFINITY) + if(TUKLIB_CPUCORES_SCHED_GETAFFINITY) + set(TUKLIB_CPUCORES_DEFINITIONS + "TUKLIB_CPUCORES_SCHED_GETAFFINITY" + CACHE INTERNAL "") + return() + endif() + + # FreeBSD has both cpuset and sysctl. Look for cpuset first because + # it's a better approach. + # + # This test would match on GNU/kFreeBSD too but it would require + # -lfreebsd-glue when linking and thus in the current form this would + # fail on GNU/kFreeBSD. The above test for sched_getaffinity() matches + # on GNU/kFreeBSD so the test below should never run on that OS. + check_c_source_compiles(" + #include + #include + int main(void) + { + cpuset_t set; + cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, + sizeof(set), &set); + return 0; + } + " + TUKLIB_CPUCORES_CPUSET) + if(TUKLIB_CPUCORES_CPUSET) + set(TUKLIB_CPUCORES_DEFINITIONS "HAVE_PARAM_H;TUKLIB_CPUCORES_CPUSET" + CACHE INTERNAL "") + return() + endif() + + # On OS/2, both sysconf() and sysctl() pass the tests in this file, + # but only sysctl() works. On QNX it's the opposite: only sysconf() works + # (although it assumes that _POSIX_SOURCE, _XOPEN_SOURCE, and + # _POSIX_C_SOURCE are undefined or alternatively _QNX_SOURCE is defined). + # + # We test sysctl() first and intentionally break the sysctl() test on QNX + # so that sysctl() is never used on QNX. + cmake_push_check_state() + check_include_file(sys/param.h HAVE_SYS_PARAM_H) + if(HAVE_SYS_PARAM_H) + list(APPEND CMAKE_REQUIRED_DEFINITIONS -DHAVE_SYS_PARAM_H) + endif() + check_c_source_compiles(" + #ifdef __QNX__ + compile error + #endif + #ifdef HAVE_SYS_PARAM_H + # include + #endif + #include + int main(void) + { + #ifdef HW_NCPUONLINE + /* This is preferred on OpenBSD, see tuklib_cpucores.c. */ + int name[2] = { CTL_HW, HW_NCPUONLINE }; + #else + int name[2] = { CTL_HW, HW_NCPU }; + #endif + int cpus; + size_t cpus_size = sizeof(cpus); + sysctl(name, 2, &cpus, &cpus_size, NULL, 0); + return 0; + } + " + TUKLIB_CPUCORES_SYSCTL) + cmake_pop_check_state() + if(TUKLIB_CPUCORES_SYSCTL) + if(HAVE_SYS_PARAM_H) + set(TUKLIB_CPUCORES_DEFINITIONS + "HAVE_PARAM_H;TUKLIB_CPUCORES_SYSCTL" + CACHE INTERNAL "") + else() + set(TUKLIB_CPUCORES_DEFINITIONS + "TUKLIB_CPUCORES_SYSCTL" + CACHE INTERNAL "") + endif() + return() + endif() + + # Many platforms support sysconf(). + check_c_source_compiles(" + #include + int main(void) + { + long i; + #ifdef _SC_NPROCESSORS_ONLN + /* Many systems using sysconf() */ + i = sysconf(_SC_NPROCESSORS_ONLN); + #else + /* IRIX */ + i = sysconf(_SC_NPROC_ONLN); + #endif + return 0; + } + " + TUKLIB_CPUCORES_SYSCONF) + if(TUKLIB_CPUCORES_SYSCONF) + set(TUKLIB_CPUCORES_DEFINITIONS "TUKLIB_CPUCORES_SYSCONF" + CACHE INTERNAL "") + return() + endif() + + # HP-UX + check_c_source_compiles(" + #include + #include + int main(void) + { + struct pst_dynamic pst; + pstat_getdynamic(&pst, sizeof(pst), 1, 0); + (void)pst.psd_proc_cnt; + return 0; + } + " + TUKLIB_CPUCORES_PSTAT_GETDYNAMIC) + if(TUKLIB_CPUCORES_PSTAT_GETDYNAMIC) + set(TUKLIB_CPUCORES_DEFINITIONS "TUKLIB_CPUCORES_PSTAT_GETDYNAMIC" + CACHE INTERNAL "") + return() + endif() +endfunction() + +function(tuklib_cpucores TARGET_OR_ALL) + if(NOT DEFINED TUKLIB_CPUCORES_FOUND) + message(STATUS + "Checking how to detect the number of available CPU cores") + tuklib_cpucores_internal_check() + + if(DEFINED TUKLIB_CPUCORES_DEFINITIONS) + set(TUKLIB_CPUCORES_FOUND 1 CACHE INTERNAL "") + else() + set(TUKLIB_CPUCORES_FOUND 0 CACHE INTERNAL "") + message(WARNING + "No method to detect the number of CPU cores was found") + endif() + endif() + + if(TUKLIB_CPUCORES_FOUND) + tuklib_add_definitions("${TARGET_OR_ALL}" + "${TUKLIB_CPUCORES_DEFINITIONS}") + endif() +endfunction() diff --git a/src/native/external/xz/cmake/tuklib_integer.cmake b/src/native/external/xz/cmake/tuklib_integer.cmake new file mode 100644 index 00000000000000..18facbf427f7be --- /dev/null +++ b/src/native/external/xz/cmake/tuklib_integer.cmake @@ -0,0 +1,288 @@ +# SPDX-License-Identifier: 0BSD + +############################################################################# +# +# tuklib_integer.cmake - see tuklib_integer.m4 for description and comments +# +# Author: Lasse Collin +# +############################################################################# + +include("${CMAKE_CURRENT_LIST_DIR}/tuklib_common.cmake") +include(TestBigEndian) +include(CheckCSourceCompiles) +include(CheckIncludeFile) +include(CheckSymbolExists) + +# An internal helper for tuklib_integer that attempts to detect if +# -mstrict-align or -mno-strict-align is in effect. This sets the +# cache variable TUKLIB_INTEGER_STRICT_ALIGN to ON if OBJDUMP_REGEX +# matches the objdump output of a check program. Otherwise it is set to OFF. +function(tuklib_integer_internal_strict_align OBJDUMP_REGEX) + if(NOT DEFINED TUKLIB_INTEGER_STRICT_ALIGN) + # Build a static library because then the function won't be optimized + # away, and there won't be any unrelated startup code either. + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + + # CMake >= 3.25 wouldn't require us to create a temporary file, + # but the following method is compatible with 3.20. + file(WRITE "${CMAKE_BINARY_DIR}/tuklib_integer_strict_align.c" " + #include + unsigned int check_strict_align(const void *p) + { + unsigned int i; + memcpy(&i, p, sizeof(i)); + return i; + } + ") + + # Force -O2 because memcpy() won't be optimized out if optimizations + # are disabled. + try_compile( + TRY_COMPILE_RESULT + "${CMAKE_BINARY_DIR}" + "${CMAKE_BINARY_DIR}/tuklib_integer_strict_align.c" + COMPILE_DEFINITIONS "${CMAKE_REQUIRED_DEFINITIONS}" + CMAKE_FLAGS "-DCOMPILE_DEFINITIONS=${CMAKE_REQUIRED_FLAGS} -O2" + COPY_FILE "${CMAKE_BINARY_DIR}/tuklib_integer_strict_align.a" + ) + + if(NOT TRY_COMPILE_RESULT) + message(FATAL_ERROR + "Compilation of the strict align check failed. " + "Either the specified compiler flags are broken " + "or ${CMAKE_CURRENT_FUNCTION_LIST_FILE} has a bug.") + endif() + + # Use WORKING_DIRECTORY instead of passing the full path to objdump. + # This ensures that the pathname won't affect the objdump output, + # which could result in an unwanted regex match in the next step. + execute_process( + COMMAND "${CMAKE_OBJDUMP}" -d "tuklib_integer_strict_align.a" + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" + OUTPUT_VARIABLE OBJDUMP_OUTPUT + RESULT_VARIABLE OBJDUMP_RESULT + ) + + # FIXME? Should we remove the temporary files here? + + # Look for instructions that load unsigned bytes. If none are found, + # assume that -mno-strict-align is in effect. + if(OBJDUMP_RESULT STREQUAL "0" AND + OBJDUMP_OUTPUT MATCHES "${OBJDUMP_REGEX}") + set(TUKLIB_INTEGER_STRICT_ALIGN ON CACHE INTERNAL "") + else() + set(TUKLIB_INTEGER_STRICT_ALIGN OFF CACHE INTERNAL "") + endif() + endif() +endfunction() + +function(tuklib_integer TARGET_OR_ALL) + # Check for endianness. Unlike the Autoconf's AC_C_BIGENDIAN, this doesn't + # support Apple universal binaries. The CMake module will leave the + # variable unset so we can catch that situation here instead of continuing + # as if we were little endian. + test_big_endian(WORDS_BIGENDIAN) + if(NOT DEFINED WORDS_BIGENDIAN) + message(FATAL_ERROR "Cannot determine endianness") + endif() + tuklib_add_definition_if("${TARGET_OR_ALL}" WORDS_BIGENDIAN) + + # Look for a byteswapping method. + check_c_source_compiles(" + int main(void) + { + __builtin_bswap16(1); + __builtin_bswap32(1); + __builtin_bswap64(1); + return 0; + } + " + HAVE___BUILTIN_BSWAPXX) + if(HAVE___BUILTIN_BSWAPXX) + tuklib_add_definitions("${TARGET_OR_ALL}" HAVE___BUILTIN_BSWAPXX) + else() + check_include_file(byteswap.h HAVE_BYTESWAP_H) + if(HAVE_BYTESWAP_H) + tuklib_add_definitions("${TARGET_OR_ALL}" HAVE_BYTESWAP_H) + check_symbol_exists(bswap_16 byteswap.h HAVE_BSWAP_16) + tuklib_add_definition_if("${TARGET_OR_ALL}" HAVE_BSWAP_16) + check_symbol_exists(bswap_32 byteswap.h HAVE_BSWAP_32) + tuklib_add_definition_if("${TARGET_OR_ALL}" HAVE_BSWAP_32) + check_symbol_exists(bswap_64 byteswap.h HAVE_BSWAP_64) + tuklib_add_definition_if("${TARGET_OR_ALL}" HAVE_BSWAP_64) + else() + check_include_file(sys/endian.h HAVE_SYS_ENDIAN_H) + if(HAVE_SYS_ENDIAN_H) + tuklib_add_definitions("${TARGET_OR_ALL}" HAVE_SYS_ENDIAN_H) + else() + check_include_file(sys/byteorder.h HAVE_SYS_BYTEORDER_H) + tuklib_add_definition_if("${TARGET_OR_ALL}" + HAVE_SYS_BYTEORDER_H) + endif() + endif() + endif() + + # Autodetect if unaligned memory access is fast when the cache variable + # TUKLIB_FAST_UNALIGNED_ACCESS isn't set. The result is stored in + # FAST_UNALIGNED_GUESS. Assume that unaligned access shouldn't be used. + # Initialize the variable here so that it's never undefined in the + # option() command after the if()...endif() block. + set(FAST_UNALIGNED_GUESS OFF) + if(NOT DEFINED TUKLIB_FAST_UNALIGNED_ACCESS) + message(CHECK_START "Check if unaligned memory access should be used") + + # Guess that unaligned access is fast on these archs: + # - 32/64-bit x86 / x86-64 + # - 32/64-bit big endian PowerPC + # - 64-bit little endian PowerPC + # - 32/64-bit Loongarch (*) + # - Some 32-bit ARM + # - Some 64-bit ARM64 (AArch64) + # - Some 32/64-bit RISC-V + # + # (*) See sections 7.4, 8.1, and 8.2: + # https://github.com/loongson/la-softdev-convention/blob/v0.2/la-softdev-convention.adoc + # + # That is, desktop and server processors likely support + # unaligned access in hardware but embedded processors + # might not. GCC defaults to -mno-strict-align and so + # do majority of GNU/Linux distributions. As of + # GCC 15.2, there is no predefined macro to detect + # if -mstrict-align or -mno-strict-align is in effect. + # We use heuristics based on compiler output. + # + # CMake < 4.1 doesn't provide a standardized/normalized list of arch + # names. For example, x86-64 may be "x86_64" (Linux), + # "AMD64" (Windows), or even "EM64T" (64-bit WinXP). + string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" PROCESSOR) + + # CMake 4.1 made CMAKE__COMPILER_ARCHITECTURE_ID useful on many + # targets. In earlier versions it's still useful with MSVC with which + # CMAKE_SYSTEM_PROCESSOR can refer to the build machine. + if(NOT CMAKE_C_COMPILER_ARCHITECTURE_ID STREQUAL "") + # CMake 4.2.0 docs say that the list typically has only one entry + # except possibly on macOS. On macOS, most (all?) archs support + # unaligned access. Just pick the first one from the list. + list(GET CMAKE_C_COMPILER_ARCHITECTURE_ID 0 PROCESSOR) + string(TOLOWER "${PROCESSOR}" PROCESSOR) + endif() + + # There is no ^ in the first regex branch to allow "i" at + # the beginning so it can match "i386" to "i786", and "x86_64". + if(PROCESSOR MATCHES "[x34567]86|^x64|^amd64|^em64t") + set(FAST_UNALIGNED_GUESS ON) + + elseif(PROCESSOR MATCHES "^powerpc|^ppc") + if(WORDS_BIGENDIAN OR PROCESSOR MATCHES "64") + set(FAST_UNALIGNED_GUESS ON) + endif() + + elseif(PROCESSOR MATCHES "^arm|^riscv" AND + NOT PROCESSOR MATCHES "^arm64") + # On 32-bit ARM, GCC and Clang # #define __ARM_FEATURE_UNALIGNED + # if and only if unaligned access is supported. + # + # RISC-V C API Specification says that if + # __riscv_misaligned_fast is defined then + # unaligned access is known to be fast. + # + # MSVC is handled as a special case: We assume that + # 32-bit ARM supports fast unaligned access. + # If MSVC gets RISC-V support then this will assume + # fast unaligned access on RISC-V too. + check_c_source_compiles(" + #if !defined(__ARM_FEATURE_UNALIGNED) \ + && !defined(__riscv_misaligned_fast) \ + && !defined(_MSC_VER) + compile error + #endif + int main(void) { return 0; } + " + TUKLIB_FAST_UNALIGNED_DEFINED_BY_PREPROCESSOR) + if(TUKLIB_FAST_UNALIGNED_DEFINED_BY_PREPROCESSOR) + set(FAST_UNALIGNED_GUESS ON) + endif() + + elseif(PROCESSOR MATCHES "^aarch64|^arm64") + # On ARM64, Clang defines __ARM_FEATURE_UNALIGNED if and only if + # unaligned access is supported. However, GCC (at least up to 15.2.0) + # defines it even when using -mstrict-align, so autodetection with + # this macro doesn't work with GCC on ARM64. (It does work on + # 32-bit ARM.) See: + # + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111555 + # + # We need three checks: + # + # 1. If __ARM_FEATURE_UNALIGNED is defined and the + # compiler isn't GCC, unaligned access is enabled. + # If the compiler is MSVC, unaligned access is + # enabled even without __ARM_FEATURE_UNALIGNED. + check_c_source_compiles(" + #if defined(__ARM_FEATURE_UNALIGNED) \ + && (!defined(__GNUC__) || defined(__clang__)) + #elif defined(_MSC_VER) + #else + compile error + #endif + int main(void) { return 0; } + " + TUKLIB_FAST_UNALIGNED_DEFINED_BY_PREPROCESSOR) + if(TUKLIB_FAST_UNALIGNED_DEFINED_BY_PREPROCESSOR) + set(FAST_UNALIGNED_GUESS ON) + else() + # 2. If __ARM_FEATURE_UNALIGNED is not defined, + # unaligned access is disabled. + check_c_source_compiles(" + #ifdef __ARM_FEATURE_UNALIGNED + compile error + #endif + int main(void) { return 0; } + " + TUKLIB_FAST_UNALIGNED_NOT_DEFINED_BY_PREPROCESSOR) + if(NOT TUKLIB_FAST_UNALIGNED_NOT_DEFINED_BY_PREPROCESSOR) + # 3. Use heuristics to detect if -mstrict-align is + # in effect when building with GCC. + tuklib_integer_internal_strict_align("[ \t]ldrb[ \t]") + if(NOT TUKLIB_INTEGER_STRICT_ALIGN) + set(FAST_UNALIGNED_GUESS ON) + endif() + endif() + endif() + + elseif(PROCESSOR MATCHES "^loongarch") + tuklib_integer_internal_strict_align("[ \t]ld\\.bu[ \t]") + if(NOT TUKLIB_INTEGER_STRICT_ALIGN) + set(FAST_UNALIGNED_GUESS ON) + endif() + endif() + + if(FAST_UNALIGNED_GUESS) + message(CHECK_PASS "yes") + else() + message(CHECK_PASS "no") + endif() + endif() + + option(TUKLIB_FAST_UNALIGNED_ACCESS + "Enable if the system supports *fast* unaligned memory access \ +with 16-bit, 32-bit, and 64-bit integers." + "${FAST_UNALIGNED_GUESS}") + tuklib_add_definition_if("${TARGET_OR_ALL}" TUKLIB_FAST_UNALIGNED_ACCESS) + + # Unsafe type punning: + option(TUKLIB_USE_UNSAFE_TYPE_PUNNING + "This introduces strict aliasing violations and \ +may result in broken code. However, this might improve performance \ +in some cases, especially with old compilers \ +(e.g. GCC 3 and early 4.x on x86, GCC < 6 on ARMv6 and ARMv7)." + OFF) + tuklib_add_definition_if("${TARGET_OR_ALL}" TUKLIB_USE_UNSAFE_TYPE_PUNNING) + + # Check for GCC/Clang __builtin_assume_aligned(). + check_c_source_compiles( + "int main(void) { __builtin_assume_aligned(\"\", 1); return 0; }" + HAVE___BUILTIN_ASSUME_ALIGNED) + tuklib_add_definition_if("${TARGET_OR_ALL}" HAVE___BUILTIN_ASSUME_ALIGNED) +endfunction() diff --git a/src/native/external/xz/cmake/tuklib_large_file_support.cmake b/src/native/external/xz/cmake/tuklib_large_file_support.cmake new file mode 100644 index 00000000000000..7c11969cdfe5f9 --- /dev/null +++ b/src/native/external/xz/cmake/tuklib_large_file_support.cmake @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: 0BSD + +############################################################################# +# +# tuklib_large_file_support.cmake +# +# If off_t is less than 64 bits by default and -D_FILE_OFFSET_BITS=64 +# makes off_t become 64-bit, the CMake option LARGE_FILE_SUPPORT is +# provided (ON by default) and -D_FILE_OFFSET_BITS=64 is added to +# the compile definitions if LARGE_FILE_SUPPORT is ON. +# +# Author: Lasse Collin +# +############################################################################# + +include("${CMAKE_CURRENT_LIST_DIR}/tuklib_common.cmake") +include(CMakePushCheckState) +include(CheckCSourceCompiles) + +function(tuklib_large_file_support TARGET_OR_ALL) + # MSVC must be handled specially in the C code. + if(MSVC) + return() + endif() + + set(TUKLIB_LARGE_FILE_SUPPORT_TEST + "#include + int foo[sizeof(off_t) >= 8 ? 1 : -1]; + int main(void) { return 0; }") + + check_c_source_compiles("${TUKLIB_LARGE_FILE_SUPPORT_TEST}" + TUKLIB_LARGE_FILE_SUPPORT_BY_DEFAULT) + + if(NOT TUKLIB_LARGE_FILE_SUPPORT_BY_DEFAULT) + cmake_push_check_state() + # This needs -D. + list(APPEND CMAKE_REQUIRED_DEFINITIONS "-D_FILE_OFFSET_BITS=64") + check_c_source_compiles("${TUKLIB_LARGE_FILE_SUPPORT_TEST}" + TUKLIB_LARGE_FILE_SUPPORT_WITH_FOB64) + cmake_pop_check_state() + endif() + + if(TUKLIB_LARGE_FILE_SUPPORT_WITH_FOB64) + # Show the option only when _FILE_OFFSET_BITS=64 affects sizeof(off_t). + option(LARGE_FILE_SUPPORT + "Use -D_FILE_OFFSET_BITS=64 to support files larger than 2 GiB." + ON) + + if(LARGE_FILE_SUPPORT) + # This must not use -D. + tuklib_add_definitions("${TARGET_OR_ALL}" "_FILE_OFFSET_BITS=64") + endif() + endif() +endfunction() diff --git a/src/native/external/xz/cmake/tuklib_mbstr.cmake b/src/native/external/xz/cmake/tuklib_mbstr.cmake new file mode 100644 index 00000000000000..bd234cbc96bbfe --- /dev/null +++ b/src/native/external/xz/cmake/tuklib_mbstr.cmake @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: 0BSD + +############################################################################# +# +# tuklib_mbstr.cmake - see tuklib_mbstr.m4 for description and comments +# +# Author: Lasse Collin +# +############################################################################# + +include("${CMAKE_CURRENT_LIST_DIR}/tuklib_common.cmake") +include(CheckSymbolExists) + +function(tuklib_mbstr TARGET_OR_ALL) + check_symbol_exists(mbrtowc wchar.h HAVE_MBRTOWC) + tuklib_add_definition_if("${TARGET_OR_ALL}" HAVE_MBRTOWC) + + # NOTE: wcwidth() requires _GNU_SOURCE or _XOPEN_SOURCE on GNU/Linux. + check_symbol_exists(wcwidth wchar.h HAVE_WCWIDTH) + tuklib_add_definition_if("${TARGET_OR_ALL}" HAVE_WCWIDTH) + + # NOTE: vasprintf() requires _GNU_SOURCE on GNU/Linux. + check_symbol_exists(vasprintf stdio.h HAVE_VASPRINTF) + tuklib_add_definition_if("${TARGET_OR_ALL}" HAVE_VASPRINTF) +endfunction() diff --git a/src/native/external/xz/cmake/tuklib_physmem.cmake b/src/native/external/xz/cmake/tuklib_physmem.cmake new file mode 100644 index 00000000000000..d4d3f3d260c38e --- /dev/null +++ b/src/native/external/xz/cmake/tuklib_physmem.cmake @@ -0,0 +1,153 @@ +# SPDX-License-Identifier: 0BSD + +############################################################################# +# +# tuklib_physmem.cmake - see tuklib_physmem.m4 for description and comments +# +# NOTE: Compared tuklib_physmem.m4, this lacks support for Tru64, IRIX, and +# Linux sysinfo() (usually sysconf() is used on GNU/Linux). +# +# Author: Lasse Collin +# +############################################################################# + +include("${CMAKE_CURRENT_LIST_DIR}/tuklib_common.cmake") +include(CMakePushCheckState) +include(CheckCSourceCompiles) +include(CheckIncludeFile) + +function(tuklib_physmem_internal_check) + # Shortcut on Windows: + if(WIN32 OR CYGWIN) + # Nothing to do, the tuklib_physmem.c handles it. + set(TUKLIB_PHYSMEM_DEFINITIONS "" CACHE INTERNAL "") + return() + endif() + + # Full check for special cases: + check_c_source_compiles(" + #if defined(_WIN32) || defined(__CYGWIN__) || defined(__OS2__) \ + || defined(__DJGPP__) || defined(__VMS) \ + || defined(AMIGA) || defined(__AROS__) || defined(__QNX__) + int main(void) { return 0; } + #else + compile error + #endif + " + TUKLIB_PHYSMEM_SPECIAL) + if(TUKLIB_PHYSMEM_SPECIAL) + # Nothing to do, the tuklib_physmem.c handles it. + set(TUKLIB_PHYSMEM_DEFINITIONS "" CACHE INTERNAL "") + return() + endif() + + # Look for AIX-specific solution before sysconf(), because the test + # for sysconf() will pass on AIX but won't actually work + # (sysconf(_SC_PHYS_PAGES) compiles but always returns -1 on AIX). + check_c_source_compiles(" + #include + int main(void) + { + (void)_system_configuration.physmem; + return 0; + } + " + TUKLIB_PHYSMEM_AIX) + if(TUKLIB_PHYSMEM_AIX) + set(TUKLIB_PHYSMEM_DEFINITIONS "TUKLIB_PHYSMEM_AIX" CACHE INTERNAL "") + return() + endif() + + # sysconf() + check_c_source_compiles(" + #include + int main(void) + { + long i; + i = sysconf(_SC_PAGESIZE); + i = sysconf(_SC_PHYS_PAGES); + return 0; + } + " + TUKLIB_PHYSMEM_SYSCONF) + if(TUKLIB_PHYSMEM_SYSCONF) + set(TUKLIB_PHYSMEM_DEFINITIONS "TUKLIB_PHYSMEM_SYSCONF" + CACHE INTERNAL "") + return() + endif() + + # sysctl() + cmake_push_check_state() + check_include_file(sys/param.h HAVE_SYS_PARAM_H) + if(HAVE_SYS_PARAM_H) + list(APPEND CMAKE_REQUIRED_DEFINITIONS -DHAVE_SYS_PARAM_H) + endif() + check_c_source_compiles(" + #ifdef HAVE_SYS_PARAM_H + # include + #endif + #include + int main(void) + { + int name[2] = { CTL_HW, HW_PHYSMEM }; + unsigned long mem; + size_t mem_ptr_size = sizeof(mem); + sysctl(name, 2, &mem, &mem_ptr_size, NULL, 0); + return 0; + } + " + TUKLIB_PHYSMEM_SYSCTL) + cmake_pop_check_state() + if(TUKLIB_PHYSMEM_SYSCTL) + if(HAVE_SYS_PARAM_H) + set(TUKLIB_PHYSMEM_DEFINITIONS + "HAVE_PARAM_H;TUKLIB_PHYSMEM_SYSCTL" + CACHE INTERNAL "") + else() + set(TUKLIB_PHYSMEM_DEFINITIONS + "TUKLIB_PHYSMEM_SYSCTL" + CACHE INTERNAL "") + endif() + return() + endif() + + # HP-UX + check_c_source_compiles(" + #include + #include + int main(void) + { + struct pst_static pst; + pstat_getstatic(&pst, sizeof(pst), 1, 0); + (void)pst.physical_memory; + (void)pst.page_size; + return 0; + } + " + TUKLIB_PHYSMEM_PSTAT_GETSTATIC) + if(TUKLIB_PHYSMEM_PSTAT_GETSTATIC) + set(TUKLIB_PHYSMEM_DEFINITIONS "TUKLIB_PHYSMEM_PSTAT_GETSTATIC" + CACHE INTERNAL "") + return() + endif() +endfunction() + +function(tuklib_physmem TARGET_OR_ALL) + if(NOT DEFINED TUKLIB_PHYSMEM_FOUND) + message(STATUS "Checking how to detect the amount of physical memory") + tuklib_physmem_internal_check() + + if(DEFINED TUKLIB_PHYSMEM_DEFINITIONS) + set(TUKLIB_PHYSMEM_FOUND 1 CACHE INTERNAL "") + else() + set(TUKLIB_PHYSMEM_FOUND 0 CACHE INTERNAL "") + message(WARNING + "No method to detect the amount of physical memory was found") + endif() + endif() + + if(TUKLIB_PHYSMEM_FOUND) + tuklib_add_definitions("${TARGET_OR_ALL}" + "${TUKLIB_PHYSMEM_DEFINITIONS}") + endif() +endfunction() diff --git a/src/native/external/xz/cmake/tuklib_progname.cmake b/src/native/external/xz/cmake/tuklib_progname.cmake new file mode 100644 index 00000000000000..a1f15bdeac306a --- /dev/null +++ b/src/native/external/xz/cmake/tuklib_progname.cmake @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: 0BSD + +############################################################################# +# +# tuklib_progname.cmake - see tuklib_progname.m4 for description and comments +# +# Author: Lasse Collin +# +############################################################################# + +include("${CMAKE_CURRENT_LIST_DIR}/tuklib_common.cmake") +include(CheckSymbolExists) + +function(tuklib_progname TARGET_OR_ALL) + # NOTE: This glibc extension requires _GNU_SOURCE. + check_symbol_exists(program_invocation_name errno.h + HAVE_PROGRAM_INVOCATION_NAME) + tuklib_add_definition_if("${TARGET_OR_ALL}" HAVE_PROGRAM_INVOCATION_NAME) +endfunction() diff --git a/src/native/external/xz/configure.ac b/src/native/external/xz/configure.ac new file mode 100644 index 00000000000000..30e53f219685af --- /dev/null +++ b/src/native/external/xz/configure.ac @@ -0,0 +1,1453 @@ +# -*- Autoconf -*- +# SPDX-License-Identifier: 0BSD + +############################################################################### +# +# Process this file with autoconf to produce a configure script. +# +# Author: Lasse Collin +# +############################################################################### + +# NOTE: Don't add useless checks. autoscan detects this and that, but don't +# let it confuse you. For example, we don't care about checking for behavior +# of malloc(), stat(), or lstat(), since we don't use those functions in +# a way that would cause the problems the autoconf macros check. + +AC_PREREQ([2.69]) + +AC_INIT([XZ Utils], m4_esyscmd([/bin/sh build-aux/version.sh]), + [xz@tukaani.org], [xz], [https://tukaani.org/xz/]) +AC_CONFIG_SRCDIR([src/liblzma/common/common.h]) +AC_CONFIG_AUX_DIR([build-aux]) +AC_CONFIG_MACRO_DIR([m4]) +AC_CONFIG_HEADERS([config.h]) + +echo +echo "$PACKAGE_STRING" + +echo +echo "System type:" +# This is needed to know if assembler optimizations can be used. +AC_CANONICAL_HOST + +# We do some special things on Windows (32-bit or 64-bit) builds. +case $host_os in + mingw* | cygwin | msys) is_w32=yes ;; + *) is_w32=no ;; +esac +AM_CONDITIONAL([COND_W32], [test "$is_w32" = yes]) + +# We need to use $EXEEXT with $(LN_S) when creating symlinks to +# executables. Cygwin is an exception to this, since it is recommended +# that symlinks don't have the .exe suffix. To make this work, we +# define LN_EXEEXT. +# +# MSYS2 is treated the same way as Cygwin. It uses plain "msys" like +# the original MSYS when building MSYS/MSYS2-binaries. Hopefully this +# doesn't break things for the original MSYS developers. Note that this +# doesn't affect normal MSYS/MSYS2 users building non-MSYS/MSYS2 binaries +# since in that case the $host_os is usually mingw32. +case $host_os in + cygwin | msys) LN_EXEEXT= ;; + *) LN_EXEEXT='$(EXEEXT)' ;; +esac +AC_SUBST([LN_EXEEXT]) + +echo +echo "Configure options:" +AM_CFLAGS= + + +############# +# Debugging # +############# + +AC_MSG_CHECKING([if debugging code should be compiled]) +AC_ARG_ENABLE([debug], AS_HELP_STRING([--enable-debug], [Enable debugging code.]), + [], enable_debug=no) +if test "x$enable_debug" = xyes; then + AC_MSG_RESULT([yes]) +else + AC_DEFINE([NDEBUG], [1], [Define to 1 to disable debugging code.]) + AC_MSG_RESULT([no]) +fi + + +########### +# Filters # +########### + +m4_define([SUPPORTED_FILTERS], [lzma1,lzma2,delta,x86,powerpc,ia64,arm,armthumb,arm64,sparc,riscv])dnl +m4_define([SIMPLE_FILTERS], [x86,powerpc,ia64,arm,armthumb,arm64,sparc,riscv]) +m4_define([LZ_FILTERS], [lzma1,lzma2]) + +m4_foreach([NAME], [SUPPORTED_FILTERS], +[enable_filter_[]NAME=no +enable_encoder_[]NAME=no +enable_decoder_[]NAME=no +])dnl + +AC_MSG_CHECKING([which encoders to build]) +AC_ARG_ENABLE([encoders], AS_HELP_STRING([--enable-encoders=LIST], + [Comma-separated list of encoders to build. Default=all. + Available encoders:] + m4_translit(m4_defn([SUPPORTED_FILTERS]), [,], [ ])), + [], [enable_encoders=SUPPORTED_FILTERS]) +enable_encoders=`echo "$enable_encoders" | sed 's/,/ /g'` +if test "x$enable_encoders" = xno || test "x$enable_encoders" = x; then + enable_encoders=no + AC_MSG_RESULT([(none)]) +else + for arg in $enable_encoders + do + case $arg in m4_foreach([NAME], [SUPPORTED_FILTERS], [ + NAME) + enable_filter_[]NAME=yes + enable_encoder_[]NAME=yes + AC_DEFINE(HAVE_ENCODER_[]m4_toupper(NAME), [1], + [Define to 1 if] NAME [encoder is enabled.]) + ;;]) + *) + AC_MSG_RESULT([]) + AC_MSG_ERROR([unknown filter: $arg]) + ;; + esac + done + AC_DEFINE([HAVE_ENCODERS], [1], + [Define to 1 if any of HAVE_ENCODER_foo have been defined.]) + AC_MSG_RESULT([$enable_encoders]) +fi + +AC_MSG_CHECKING([which decoders to build]) +AC_ARG_ENABLE([decoders], AS_HELP_STRING([--enable-decoders=LIST], + [Comma-separated list of decoders to build. Default=all. + Available decoders are the same as available encoders.]), + [], [enable_decoders=SUPPORTED_FILTERS]) +enable_decoders=`echo "$enable_decoders" | sed 's/,/ /g'` +if test "x$enable_decoders" = xno || test "x$enable_decoders" = x; then + enable_decoders=no + AC_MSG_RESULT([(none)]) +else + for arg in $enable_decoders + do + case $arg in m4_foreach([NAME], [SUPPORTED_FILTERS], [ + NAME) + enable_filter_[]NAME=yes + enable_decoder_[]NAME=yes + AC_DEFINE(HAVE_DECODER_[]m4_toupper(NAME), [1], + [Define to 1 if] NAME [decoder is enabled.]) + ;;]) + *) + AC_MSG_RESULT([]) + AC_MSG_ERROR([unknown filter: $arg]) + ;; + esac + done + AC_DEFINE([HAVE_DECODERS], [1], + [Define to 1 if any of HAVE_DECODER_foo have been defined.]) + AC_MSG_RESULT([$enable_decoders]) +fi + +if test "x$enable_encoder_lzma2$enable_encoder_lzma1" = xyesno \ + || test "x$enable_decoder_lzma2$enable_decoder_lzma1" = xyesno; then + AC_MSG_ERROR([LZMA2 requires that LZMA1 is also enabled.]) +fi + +AM_CONDITIONAL(COND_MAIN_ENCODER, test "x$enable_encoders" != xno) +AM_CONDITIONAL(COND_MAIN_DECODER, test "x$enable_decoders" != xno) + +m4_foreach([NAME], [SUPPORTED_FILTERS], +[AM_CONDITIONAL(COND_FILTER_[]m4_toupper(NAME), test "x$enable_filter_[]NAME" = xyes) +AM_CONDITIONAL(COND_ENCODER_[]m4_toupper(NAME), test "x$enable_encoder_[]NAME" = xyes) +AM_CONDITIONAL(COND_DECODER_[]m4_toupper(NAME), test "x$enable_decoder_[]NAME" = xyes) +])dnl + +# The so called "simple filters" share common code. +enable_filter_simple=no +enable_encoder_simple=no +enable_decoder_simple=no +m4_foreach([NAME], [SIMPLE_FILTERS], +[test "x$enable_filter_[]NAME" = xyes && enable_filter_simple=yes +test "x$enable_encoder_[]NAME" = xyes && enable_encoder_simple=yes +test "x$enable_decoder_[]NAME" = xyes && enable_decoder_simple=yes +])dnl +AM_CONDITIONAL(COND_FILTER_SIMPLE, test "x$enable_filter_simple" = xyes) +AM_CONDITIONAL(COND_ENCODER_SIMPLE, test "x$enable_encoder_simple" = xyes) +AM_CONDITIONAL(COND_DECODER_SIMPLE, test "x$enable_decoder_simple" = xyes) + +# LZ-based filters share common code. +enable_filter_lz=no +enable_encoder_lz=no +enable_decoder_lz=no +m4_foreach([NAME], [LZ_FILTERS], +[test "x$enable_filter_[]NAME" = xyes && enable_filter_lz=yes +test "x$enable_encoder_[]NAME" = xyes && enable_encoder_lz=yes +test "x$enable_decoder_[]NAME" = xyes && enable_decoder_lz=yes +])dnl +AM_CONDITIONAL(COND_FILTER_LZ, test "x$enable_filter_lz" = xyes) +AM_CONDITIONAL(COND_ENCODER_LZ, test "x$enable_encoder_lz" = xyes) +AM_CONDITIONAL(COND_DECODER_LZ, test "x$enable_decoder_lz" = xyes) + + +################# +# Match finders # +################# + +m4_define([SUPPORTED_MATCH_FINDERS], [hc3,hc4,bt2,bt3,bt4]) + +m4_foreach([NAME], [SUPPORTED_MATCH_FINDERS], +[enable_match_finder_[]NAME=no +]) + +AC_MSG_CHECKING([which match finders to build]) +AC_ARG_ENABLE([match-finders], AS_HELP_STRING([--enable-match-finders=LIST], + [Comma-separated list of match finders to build. Default=all. + At least one match finder is required for encoding with + the LZMA1 and LZMA2 filters. Available match finders:] + m4_translit(m4_defn([SUPPORTED_MATCH_FINDERS]), [,], [ ])), [], + [enable_match_finders=SUPPORTED_MATCH_FINDERS]) +enable_match_finders=`echo "$enable_match_finders" | sed 's/,/ /g'` +if test "x$enable_encoder_lz" = xyes ; then + if test -z "$enable_match_finders"; then + AC_MSG_ERROR([At least one match finder is required for an LZ-based encoder.]) + fi + + for arg in $enable_match_finders + do + case $arg in m4_foreach([NAME], [SUPPORTED_MATCH_FINDERS], [ + NAME) + enable_match_finder_[]NAME=yes + AC_DEFINE(HAVE_MF_[]m4_toupper(NAME), [1], + [Define to 1 to enable] NAME [match finder.]) + ;;]) + *) + AC_MSG_RESULT([]) + AC_MSG_ERROR([unknown match finder: $arg]) + ;; + esac + done + AC_MSG_RESULT([$enable_match_finders]) +else + AC_MSG_RESULT([(none because not building any LZ-based encoder)]) +fi + + +#################### +# Integrity checks # +#################### + +m4_define([SUPPORTED_CHECKS], [crc32,crc64,sha256]) + +m4_foreach([NAME], [SUPPORTED_CHECKS], +[enable_check_[]NAME=no +])dnl + +AC_MSG_CHECKING([which integrity checks to build]) +AC_ARG_ENABLE([checks], AS_HELP_STRING([--enable-checks=LIST], + [Comma-separated list of integrity checks to build. + Default=all. Available integrity checks:] + m4_translit(m4_defn([SUPPORTED_CHECKS]), [,], [ ])), + [], [enable_checks=SUPPORTED_CHECKS]) +enable_checks=`echo "$enable_checks" | sed 's/,/ /g'` +if test "x$enable_checks" = xno || test "x$enable_checks" = x; then + AC_MSG_RESULT([(none)]) +else + for arg in $enable_checks + do + case $arg in m4_foreach([NAME], [SUPPORTED_CHECKS], [ + NAME) + enable_check_[]NAME=yes + AC_DEFINE(HAVE_CHECK_[]m4_toupper(NAME), [1], + [Define to 1 if] NAME + [integrity check is enabled.]) + ;;]) + *) + AC_MSG_RESULT([]) + AC_MSG_ERROR([unknown integrity check: $arg]) + ;; + esac + done + AC_MSG_RESULT([$enable_checks]) +fi +if test "x$enable_check_crc32" = xno ; then + AC_MSG_ERROR([For now, the CRC32 check must always be enabled.]) +fi + +m4_foreach([NAME], [SUPPORTED_CHECKS], +[AM_CONDITIONAL(COND_CHECK_[]m4_toupper(NAME), test "x$enable_check_[]NAME" = xyes) +])dnl + +AC_MSG_CHECKING([if external SHA-256 should be used]) +AC_ARG_ENABLE([external-sha256], AS_HELP_STRING([--enable-external-sha256], + [Use SHA-256 code from the operating system. + See INSTALL for possible subtle problems.]), + [], [enable_external_sha256=no]) +if test "x$enable_check_sha256" != "xyes"; then + enable_external_sha256=no +fi +if test "x$enable_external_sha256" = xyes; then + AC_MSG_RESULT([yes]) +else + AC_MSG_RESULT([no]) +fi + + +############# +# MicroLZMA # +############# + +AC_MSG_CHECKING([if MicroLZMA support should be built]) +AC_ARG_ENABLE([microlzma], AS_HELP_STRING([--disable-microlzma], + [Do not build MicroLZMA encoder and decoder. + It is needed by specific applications only, + for example, erofs-utils.]), + [], [enable_microlzma=yes]) +case $enable_microlzma in + yes | no) + AC_MSG_RESULT([$enable_microlzma]) + ;; + *) + AC_MSG_RESULT([]) + AC_MSG_ERROR([--enable-microlzma accepts only 'yes' or 'no'.]) + ;; +esac +AM_CONDITIONAL(COND_MICROLZMA, test "x$enable_microlzma" = xyes) + + +############################# +# .lz (lzip) format support # +############################# + +AC_MSG_CHECKING([if .lz (lzip) decompression support should be built]) +AC_ARG_ENABLE([lzip-decoder], AS_HELP_STRING([--disable-lzip-decoder], + [Disable decompression support for .lz (lzip) files.]), + [], [enable_lzip_decoder=yes]) +if test "x$enable_decoder_lzma1" != xyes; then + enable_lzip_decoder=no + AC_MSG_RESULT([no because LZMA1 decoder is disabled]) +elif test "x$enable_lzip_decoder" = xyes; then + AC_DEFINE([HAVE_LZIP_DECODER], [1], + [Define to 1 if .lz (lzip) decompression support is enabled.]) + AC_MSG_RESULT([yes]) +else + AC_MSG_RESULT([no]) +fi +AM_CONDITIONAL(COND_LZIP_DECODER, test "x$enable_lzip_decoder" = xyes) + + +########################### +# Assembler optimizations # +########################### + +AC_MSG_CHECKING([if assembler optimizations should be used]) +AC_ARG_ENABLE([assembler], AS_HELP_STRING([--disable-assembler], + [Do not use assembler optimizations even if such exist + for the architecture.]), + [], [enable_assembler=yes]) +if test "x$enable_assembler" = xyes; then + enable_assembler=no + case $host_os in + # Darwin should work too but only if not creating universal + # binaries. Solaris x86 could work too but I cannot test. + linux* | *bsd* | mingw* | cygwin | msys | *djgpp*) + case $host_cpu in + i?86) enable_assembler=x86 ;; + esac + ;; + esac +fi +case $enable_assembler in + x86) + AC_DEFINE([HAVE_CRC_X86_ASM], [1], [Define to 1 if + the 32-bit x86 CRC assembly files are used.]) + AC_MSG_RESULT([x86]) + ;; + no) + AC_MSG_RESULT([no]) + ;; + *) + AC_MSG_RESULT([]) + AC_MSG_ERROR([--enable-assembler accepts only 'yes', 'no', or 'x86' (32-bit).]) + ;; +esac +AM_CONDITIONAL(COND_ASM_X86, test "x$enable_assembler" = xx86) + + +############# +# CLMUL CRC # +############# + +AC_ARG_ENABLE([clmul-crc], AS_HELP_STRING([--disable-clmul-crc], + [Do not use carryless multiplication for CRC calculation + even if support for it is detected.]), + [], [enable_clmul_crc=yes]) + + +############################ +# ARM64 CRC32 Instructions # +############################ + +AC_ARG_ENABLE([arm64-crc32], AS_HELP_STRING([--disable-arm64-crc32], + [Do not use ARM64 CRC32 instructions even if support for it + is detected.]), + [], [enable_arm64_crc32=yes]) + + +################################ +# LoongArch CRC32 instructions # +################################ + +AC_ARG_ENABLE([loongarch-crc32], AS_HELP_STRING([--disable-loongarch-crc32], + [Do not use LoongArch CRC32 instructions even if support for + them is detected.]), + [], [enable_loongarch_crc32=yes]) + + +##################### +# Size optimization # +##################### + +AC_MSG_CHECKING([if small size is preferred over speed]) +AC_ARG_ENABLE([small], AS_HELP_STRING([--enable-small], + [Make liblzma smaller and a little slower. + This is disabled by default to optimize for speed.]), + [], [enable_small=no]) +if test "x$enable_small" = xyes; then + AC_DEFINE([HAVE_SMALL], [1], [Define to 1 if optimizing for size.]) +elif test "x$enable_small" != xno; then + AC_MSG_RESULT([]) + AC_MSG_ERROR([--enable-small accepts only 'yes' or 'no']) +fi +AC_MSG_RESULT([$enable_small]) +AM_CONDITIONAL(COND_SMALL, test "x$enable_small" = xyes) + + +############# +# Threading # +############# + +AC_MSG_CHECKING([if threading support is wanted]) +AC_ARG_ENABLE([threads], AS_HELP_STRING([--enable-threads=METHOD], + [Supported METHODS are 'yes', 'no', 'posix', 'win95', and + 'vista'. The default is 'yes'. Using 'no' together with + --enable-small makes liblzma thread unsafe.]), + [], [enable_threads=yes]) + +if test "x$enable_threads" = xyes; then + case $host_os in + mingw*) + case $host_cpu in + i?86) enable_threads=win95 ;; + *) enable_threads=vista ;; + esac + ;; + *) + enable_threads=posix + ;; + esac +fi + +case $enable_threads in + posix | win95 | vista) + AC_MSG_RESULT([yes, $enable_threads]) + ;; + no) + AC_MSG_RESULT([no]) + ;; + *) + AC_MSG_RESULT([]) + AC_MSG_ERROR([--enable-threads only accepts 'yes', 'no', 'posix', 'win95', or 'vista']) + ;; +esac + +# We use the actual result a little later. + + +######################### +# Assumed amount of RAM # +######################### + +# We use 128 MiB as default, because it will allow decompressing files +# created with "xz -9". It would be slightly safer to guess a lower value, +# but most systems, on which we don't have any way to determine the amount +# of RAM, will probably have at least 128 MiB of RAM. +AC_MSG_CHECKING([how much RAM to assume if the real amount is unknown]) +AC_ARG_ENABLE([assume-ram], AS_HELP_STRING([--enable-assume-ram=SIZE], + [If and only if the real amount of RAM cannot be determined, + assume SIZE MiB. The default is 128 MiB. This affects the + default memory usage limit.]), + [], [enable_assume_ram=128]) +assume_ram_check=`echo "$enable_assume_ram" | tr -d 0123456789` +if test -z "$enable_assume_ram" || test -n "$assume_ram_check"; then + AC_MSG_RESULT([]) + AC_MSG_ERROR([--enable-assume-ram accepts only an integer argument]) +fi +AC_MSG_RESULT([$enable_assume_ram MiB]) +AC_DEFINE_UNQUOTED([ASSUME_RAM], [$enable_assume_ram], + [How many MiB of RAM to assume if the real amount cannot + be determined.]) + + +######################### +# Components to install # +######################### + +AC_ARG_ENABLE([xz], [AS_HELP_STRING([--disable-xz], + [do not build the xz tool])], + [], [enable_xz=yes]) +AM_CONDITIONAL([COND_XZ], [test x$enable_xz != xno]) + +AC_ARG_ENABLE([xzdec], [AS_HELP_STRING([--disable-xzdec], + [do not build xzdec])], + [], [enable_xzdec=yes]) +test "x$enable_decoders" = xno && enable_xzdec=no +AM_CONDITIONAL([COND_XZDEC], [test x$enable_xzdec != xno]) + +AC_ARG_ENABLE([lzmadec], [AS_HELP_STRING([--disable-lzmadec], + [do not build lzmadec + (it exists primarily for LZMA Utils compatibility)])], + [], [enable_lzmadec=yes]) +test "x$enable_decoder_lzma1" = xno && enable_lzmadec=no +AM_CONDITIONAL([COND_LZMADEC], [test x$enable_lzmadec != xno]) + +AC_ARG_ENABLE([lzmainfo], [AS_HELP_STRING([--disable-lzmainfo], + [do not build lzmainfo + (it exists primarily for LZMA Utils compatibility)])], + [], [enable_lzmainfo=yes]) +test "x$enable_decoder_lzma1" = xno && enable_lzmainfo=no +AM_CONDITIONAL([COND_LZMAINFO], [test x$enable_lzmainfo != xno]) + +AC_ARG_ENABLE([lzma-links], [AS_HELP_STRING([--disable-lzma-links], + [do not create symlinks for LZMA Utils compatibility])], + [], [enable_lzma_links=yes]) +AM_CONDITIONAL([COND_LZMALINKS], [test x$enable_lzma_links != xno]) + +AC_ARG_ENABLE([scripts], [AS_HELP_STRING([--disable-scripts], + [do not install the scripts xzdiff, xzgrep, xzless, xzmore, + and their symlinks])], + [], [enable_scripts=yes]) +AM_CONDITIONAL([COND_SCRIPTS], [test x$enable_scripts != xno]) + +AC_ARG_ENABLE([doc], [AS_HELP_STRING([--disable-doc], + [do not install documentation files to docdir + (man pages are still installed and, + if --enable-doxygen is used, + liblzma API documentation is installed too)])], + [], [enable_doc=yes]) +AM_CONDITIONAL([COND_DOC], [test x$enable_doc != xno]) + +AC_ARG_ENABLE([doxygen], [AS_HELP_STRING([--enable-doxygen], + [generate HTML version of the liblzma API documentation + using Doxygen and install the result to docdir])], + [], [enable_doxygen=no]) +AM_CONDITIONAL([COND_DOXYGEN], [test x$enable_doxygen != xno]) + + +############## +# Sandboxing # +############## + +AC_MSG_CHECKING([if sandboxing should be used]) +AC_ARG_ENABLE([sandbox], [AS_HELP_STRING([--enable-sandbox=METHOD], + [Sandboxing METHOD can be + 'auto', 'no', 'capsicum', 'pledge', or 'landlock'. + The default is 'auto' which enables sandboxing if + a supported sandboxing method is found.])], + [], [enable_sandbox=auto]) +case $enable_xzdec-$enable_xz-$enable_sandbox in + no-no-*) + enable_sandbox=no + AC_MSG_RESULT([no, --disable-xz and --disable-xzdec was used]) + ;; + *-*-auto) + AC_MSG_RESULT([maybe (autodetect)]) + ;; + *-*-no | *-*-capsicum | *-*-pledge | *-*-landlock) + AC_MSG_RESULT([$enable_sandbox]) + ;; + *) + AC_MSG_RESULT([]) + AC_MSG_ERROR([--enable-sandbox only accepts 'auto', 'no', 'capsicum', 'pledge', or 'landlock'.]) + ;; +esac + + +########################### +# PATH prefix for scripts # +########################### + +# The scripts can add a prefix to the search PATH so that POSIX tools +# or the xz binary is always in the PATH. +AC_ARG_ENABLE([path-for-scripts], + [AS_HELP_STRING([--enable-path-for-scripts=PREFIX], + [If PREFIX isn't empty, PATH=PREFIX:$PATH will be set in + the beginning of the scripts (xzgrep and others). + The default is empty except on Solaris the default is + /usr/xpg4/bin.])], + [], [ + case $host_os in + solaris*) enable_path_for_scripts=/usr/xpg4/bin ;; + *) enable_path_for_scripts= ;; + esac + ]) +if test -n "$enable_path_for_scripts" && test "x$enable_path_for_scripts" != xno ; then + enable_path_for_scripts="PATH=$enable_path_for_scripts:\$PATH" +else + enable_path_for_scripts= +fi +AC_SUBST([enable_path_for_scripts]) + + +############################################################################### +# Checks for programs. +############################################################################### + +echo +case $host_os in + solaris*) + # The gnulib POSIX shell macro below may pick a shell that + # doesn't work with xzgrep. Workaround by picking a shell + # that is known to work. + if test -z "$gl_cv_posix_shell" && test -x /usr/xpg4/bin/sh; then + gl_cv_posix_shell=/usr/xpg4/bin/sh + fi + ;; +esac +gl_POSIX_SHELL +if test -z "$POSIX_SHELL" && test "x$enable_scripts" = xyes ; then + AC_MSG_ERROR([No POSIX conforming shell (sh) was found.]) +fi + +echo +echo "Initializing Automake:" + +# We don't use "subdir-objects" yet because it breaks "make distclean" when +# dependencies are enabled (as of Automake 1.14.1) due to this bug: +# https://debbugs.gnu.org/cgi/bugreport.cgi?bug=17354 +# The -Wno-unsupported is used to silence warnings about missing +# "subdir-objects". +AM_INIT_AUTOMAKE([1.12 foreign tar-v7 filename-length-max=99 -Wno-unsupported]) +AC_PROG_LN_S + +dnl # Autoconf >= 2.70 warns that AC_PROG_CC_C99 is obsolete. However, +dnl # we have to keep using AC_PROG_CC_C99 instead of AC_PROG_CC +dnl # as long as we try to be compatible with Autoconf 2.69. +AC_PROG_CC_C99 +if test x$ac_cv_prog_cc_c99 = xno ; then + AC_MSG_ERROR([No C99 compiler was found.]) +fi + +AM_PROG_CC_C_O +AM_PROG_AS +AC_USE_SYSTEM_EXTENSIONS + +# If using GCC or compatible compiler, verify that CFLAGS doesn't contain +# something that makes -Werror unhappy. It's important to check this after +# the above check for system extensions. It adds macros that can trigger, +# for example, -Wunused-macros. +if test "$GCC" = yes && test "x$SKIP_WERROR_CHECK" != xyes ; then + AC_MSG_CHECKING([if the -Werror option is usable]) + OLD_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS -Werror" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[extern int foo; int foo;]])], [ + AC_MSG_RESULT([yes]) + ], [ + AC_MSG_RESULT([no]) + AC_MSG_ERROR([ + CFLAGS contains something that makes -Werror complain (see config.log). + This would break certain checks in 'configure'. It is strongly + recommended to modify CFLAGS to fix this. If you want to use noisy + warning options, for example, -Weverything, it is still possible to + add them later when running 'make': make CFLAGS+=-Weverything + + In case you really want to continue with the current CFLAGS, pass + 'SKIP_WERROR_CHECK=yes' as an argument to 'configure'. + ]) + ]) + CFLAGS=$OLD_CFLAGS +fi + +AS_CASE([$enable_threads], + [posix], [ + echo + echo "POSIX threading support:" + AX_PTHREAD([:]) dnl We don't need the HAVE_PTHREAD macro. + LIBS="$PTHREAD_LIBS $LIBS" + AM_CFLAGS="$AM_CFLAGS $PTHREAD_CFLAGS" + + dnl NOTE: PTHREAD_CC is ignored. It would be useful on AIX, + dnl but it's tricky to get it right together with + dnl AC_PROG_CC_C99. Thus, this is handled by telling the + dnl user in INSTALL to set the correct CC manually. + + AC_DEFINE([MYTHREAD_POSIX], [1], + [Define to 1 when using POSIX threads (pthreads).]) + + # This is nice to have but not mandatory. + OLD_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + AC_CHECK_FUNCS([pthread_condattr_setclock]) + CFLAGS=$OLD_CFLAGS + ], + [win95], [ + AC_DEFINE([MYTHREAD_WIN95], [1], [Define to 1 when using + Windows 95 (and thus XP) compatible threads. + This avoids use of features that were added in + Windows Vista.]) + ], + [vista], [ + AC_DEFINE([MYTHREAD_VISTA], [1], [Define to 1 when using + Windows Vista compatible threads. This uses + features that are not available on Windows XP.]) + ] +) +AM_CONDITIONAL([COND_THREADS], [test "x$enable_threads" != xno]) + +echo +echo "Initializing Libtool:" +LT_PREREQ([2.4]) +LT_INIT([win32-dll]) +LT_LANG([Windows Resource]) + +# This is a bit wrong since it is possible to request that only some libs +# are built as shared. Using that feature isn't so common though, and this +# breaks only on Windows (at least for now) if the user enables only some +# libs as shared. +AM_CONDITIONAL([COND_SHARED], [test "x$enable_shared" != xno]) + +##################### +# Symbol versioning # +##################### + +# NOTE: This checks if we are building shared or static library +# and if --with-pic or --without-pic was used. Thus this check +# must be after Libtool initialization. +AC_MSG_CHECKING([if library symbol versioning should be used]) +AC_ARG_ENABLE([symbol-versions], [AS_HELP_STRING([--enable-symbol-versions], + [Use symbol versioning for liblzma. Enabled by default on + GNU/Linux, other GNU-based systems, and FreeBSD.])], + [], [enable_symbol_versions=auto]) +if test "x$enable_symbol_versions" = xauto; then + case $host_os in + # NOTE: Even if one omits -gnu on GNU/Linux (e.g. + # i486-slackware-linux), configure will (via config.sub) + # append -gnu (e.g. i486-slackware-linux-gnu), and this + # test will work correctly. + gnu* | *-gnu* | freebsd*) + enable_symbol_versions=yes + ;; + *) + enable_symbol_versions=no + ;; + esac +fi + +# There are two variants for symbol versioning. +# See src/liblzma/validate_map.sh for details. +# +# On GNU/Linux, extra symbols are added in the C code. These extra symbols +# must not be put into a static library as they can cause problems (and +# even if they didn't cause problems, they would be useless). On other +# systems symbol versioning may be used too but there is no problem as only +# a linker script is specified in src/liblzma/Makefile.am and that isn't +# used when creating a static library. +# +# Libtool always uses -DPIC when building shared libraries by default and +# doesn't use it for static libs by default. This can be overridden with +# --with-pic and --without-pic though. As long as neither --with-pic nor +# --without-pic is used then we can use #ifdef PIC to detect if the file is +# being built for a shared library. +LINKER_FLAG_UNDEFINED_VERSION= +AS_IF([test "x$enable_symbol_versions" = xno], [ + enable_symbol_versions=no + AC_MSG_RESULT([no]) +], [test "x$enable_shared" = xno], [ + enable_symbol_versions=no + AC_MSG_RESULT([no (not building a shared library)]) +], [ + # "yes" means that symbol version are to be used but we need to + # autodetect which variant to use. + if test "x$enable_symbol_versions" = xyes ; then + case "$host_cpu-$host_os" in + microblaze*) + # GCC 12 on MicroBlaze doesn't support + # __symver__ attribute. It's simplest and + # safest to use the generic version on that + # platform since then only the linker script + # is needed. The RHEL/CentOS 7 compatibility + # symbols don't matter on MicroBlaze. + enable_symbol_versions=generic + ;; + *-linux*) + # NVIDIA HPC Compiler doesn't support symbol + # versioning but the linker script can still + # be used. + AC_EGREP_CPP([use_generic_symbol_versioning], + [#ifdef __NVCOMPILER + use_generic_symbol_versioning + #endif], + [enable_symbol_versions=generic], + [enable_symbol_versions=linux]) + ;; + *) + enable_symbol_versions=generic + ;; + esac + fi + + if test "x$enable_symbol_versions" = xlinux ; then + case "$pic_mode-$enable_static" in + default-*) + # Use symvers if PIC is defined. + have_symbol_versions_linux=2 + ;; + *-no) + # Not building static library. + # Use symvers unconditionally. + have_symbol_versions_linux=1 + ;; + *) + AC_MSG_RESULT([]) + AC_MSG_ERROR([ + On GNU/Linux, building both shared and static library at the same time + is not supported if --with-pic or --without-pic is used. + Use either --disable-shared or --disable-static to build one type + of library at a time. If both types are needed, build one at a time, + possibly picking only src/liblzma/.libs/liblzma.a from the static build.]) + ;; + esac + AC_DEFINE_UNQUOTED([HAVE_SYMBOL_VERSIONS_LINUX], + [$have_symbol_versions_linux], + [Define to 1 to if GNU/Linux-specific details + are unconditionally wanted for symbol + versioning. Define to 2 to if these are wanted + only if also PIC is defined (allows building + both shared and static liblzma at the same + time with Libtool if neither --with-pic nor + --without-pic is used). This define must be + used together with liblzma_linux.map.]) + elif test "x$enable_symbol_versions" != xgeneric ; then + AC_MSG_RESULT([]) + AC_MSG_ERROR([unknown symbol versioning variant '$enable_symbol_versions']) + fi + AC_MSG_RESULT([yes ($enable_symbol_versions)]) + + # If features are disabled in liblzma, some symbols may be missing. + # LLVM's lld defaults to --no-undefined-version and the build breaks + # if not all symbols in the version script exist. That is good for + # catching errors like typos, but in our case the downside is too big. + # Avoid the problem by using --undefined-version if the linker + # supports it. + # + # GNU ld has had --no-undefined-version for a long time but it's not + # the default. The opposite option --undefined-version was only added + # in 2022, thus we must use --undefined-version conditionally. + AC_MSG_CHECKING([if linker supports --undefined-version]) + OLD_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -Wl,--undefined-version" + AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void) { return 0; }]])], [ + LINKER_FLAG_UNDEFINED_VERSION=-Wl,--undefined-version + AC_MSG_RESULT([yes]) + ], [ + AC_MSG_RESULT([no]) + ]) + LDFLAGS=$OLD_LDFLAGS +]) + +AM_CONDITIONAL([COND_SYMVERS_LINUX], + [test "x$enable_symbol_versions" = xlinux]) +AM_CONDITIONAL([COND_SYMVERS_GENERIC], + [test "x$enable_symbol_versions" = xgeneric]) +AC_SUBST([LINKER_FLAG_UNDEFINED_VERSION]) + + +############################################################################### +# Checks for libraries. +############################################################################### + +dnl Support for _REQUIRE_VERSION was added in gettext 0.19.6. If both +dnl _REQUIRE_VERSION and _VERSION are present, the _VERSION is ignored. +dnl We use both for compatibility with other programs in the Autotools family. +echo +echo "Initializing gettext:" +AM_GNU_GETTEXT_REQUIRE_VERSION([0.19.6]) +AM_GNU_GETTEXT_VERSION([0.19.6]) +AM_GNU_GETTEXT([external]) + +# The command line tools use UTF-8 on native Windows. Non-ASCII characters +# display correctly only when using UCRT and gettext-runtime >= 0.23.1. +AS_CASE([$USE_NLS-$host_os], + [yes-mingw*], [ + AC_MSG_CHECKING([for UCRT and gettext-runtime >= 0.23.1]) + AC_PREPROC_IFELSE([AC_LANG_SOURCE([[ + #define WIN32_LEAN_AND_MEAN + #include + #include + + #ifndef _UCRT + #error "Not UCRT" + #endif + + #if LIBINTL_VERSION < 0x001701 + #error "gettext-runtime < 0.23.1" + #endif + ]])], [ + AC_MSG_RESULT([yes]) + ], [ + AC_MSG_RESULT([no]) + AC_MSG_ERROR([ + Translation support (--enable-nls) on native Windows requires + UCRT and gettext-runtime >= 0.23.1. Use --disable-nls to build + with MSVCRT or old gettext-runtime.]) + ]) + ] +) + + +############################################################################### +# Checks for header files. +############################################################################### + +echo +echo "System headers and functions:" + +# immintrin.h allows the use of the intrinsic functions if they are available. +# cpuid.h may be used for detecting x86 processor features at runtime. +AC_CHECK_HEADERS([immintrin.h cpuid.h]) + + +############################################################################### +# Checks for typedefs, structures, and compiler characteristics. +############################################################################### + +AC_HEADER_STDBOOL + +AC_TYPE_UINT8_T +AC_TYPE_UINT16_T +AC_TYPE_INT32_T +AC_TYPE_UINT32_T +AC_TYPE_INT64_T +AC_TYPE_UINT64_T +AC_TYPE_UINTPTR_T + +AC_CHECK_SIZEOF([size_t]) + +# The command line tool can copy high resolution timestamps if such +# information is available in struct stat. Otherwise one second accuracy +# is used. +AC_CHECK_MEMBERS([ + struct stat.st_atim.tv_nsec, + struct stat.st_atimespec.tv_nsec, + struct stat.st_atimensec, + struct stat.st_uatime, + struct stat.st_atim.st__tim.tv_nsec]) + +AC_SYS_LARGEFILE +AC_C_BIGENDIAN + +# __attribute__((__constructor__)) can be used for one-time initializations. +# Use -Werror because some compilers accept unknown attributes and just +# give a warning. +AC_MSG_CHECKING([if __attribute__((__constructor__)) can be used]) +have_func_attribute_constructor=no +OLD_CFLAGS="$CFLAGS" +CFLAGS="$CFLAGS -Werror" +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + __attribute__((__constructor__)) + static void my_constructor_func(void) { return; } +]])], [ + AC_DEFINE([HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR], [1], + [Define to 1 if __attribute__((__constructor__)) + is supported for functions.]) + have_func_attribute_constructor=yes + AC_MSG_RESULT([yes]) +], [ + AC_MSG_RESULT([no]) +]) +CFLAGS="$OLD_CFLAGS" + +# The Win95 threading lacks a thread-safe one-time initialization function. +# The one-time initialization is needed for crc32_small.c and crc64_small.c +# create the CRC tables. So if small mode is enabled, the threading mode is +# win95, and the compiler does not support attribute constructor, then we +# would end up with a multithreaded build that is thread-unsafe. As a +# result this configuration is not allowed. +if test "x$enable_small$enable_threads$have_func_attribute_constructor" \ + = xyeswin95no; then + AC_MSG_ERROR([ + --enable-threads=win95 and --enable-small cannot be used + at the same time with a compiler that doesn't support + __attribute__((__constructor__))]) +fi + + +############################################################################### +# Checks for library functions. +############################################################################### + +# Gnulib replacements as needed +gl_GETOPT + +# If clock_gettime() is available, liblzma with pthreads may use it, and +# xz may use it even when threading support is disabled. In XZ Utils 5.4.x +# and older, configure checked for clock_gettime() only when using pthreads. +# This way non-threaded builds of liblzma didn't get a useless dependency on +# librt which further had a dependency on libpthread. Avoiding these was +# useful when a small build was needed, for example, for initramfs use. +# +# The above reasoning is thoroughly obsolete: On GNU/Linux, librt hasn't +# been needed for clock_gettime() since glibc 2.17 (2012-12-25). +# Solaris 10 needs librt but Solaris 11 doesn't anymore. +AC_SEARCH_LIBS([clock_gettime], [rt]) +AC_CHECK_FUNCS([clock_gettime]) +AC_CHECK_DECL([CLOCK_MONOTONIC], [AC_DEFINE([HAVE_CLOCK_MONOTONIC], [1], + [Define to 1 if 'CLOCK_MONOTONIC' is declared in .])], [], + [[#include ]]) + +# Find the best function to set timestamps. +AC_CHECK_FUNCS([futimens futimes futimesat utimes _futime utime], [break]) + +# These are nice to have but not mandatory. +AC_CHECK_FUNCS([getrlimit posix_fadvise]) + +TUKLIB_PROGNAME +TUKLIB_INTEGER +TUKLIB_PHYSMEM +TUKLIB_CPUCORES +TUKLIB_MBSTR + +# If requested, check for system-provided SHA-256. At least the following +# implementations are supported: +# +# OS Headers Library Type Function +# FreeBSD sys/types.h + sha256.h libmd SHA256_CTX SHA256_Init +# NetBSD sys/types.h + sha2.h SHA256_CTX SHA256_Init +# OpenBSD sys/types.h + sha2.h SHA2_CTX SHA256Init +# Solaris sys/types.h + sha2.h libmd SHA256_CTX SHA256Init +# MINIX 3 sys/types.h + sha2.h SHA256_CTX SHA256_Init +# Darwin CommonCrypto/CommonDigest.h CC_SHA256_CTX CC_SHA256_Init +# +# Note that Darwin's CC_SHA256_Update takes buffer size as uint32_t instead +# of size_t. +# +sha256_header_found=no +sha256_type_found=no +sha256_func_found=no +AS_IF([test "x$enable_external_sha256" = "xyes"], [ + # Test for Common Crypto before others, because Darwin has sha256.h + # too and we don't want to use that, because on older versions it + # uses OpenSSL functions, whose SHA256_Init is not guaranteed to + # succeed. + AC_CHECK_HEADERS( + [CommonCrypto/CommonDigest.h sha256.h sha2.h], + [sha256_header_found=yes ; break]) + if test "x$sha256_header_found" = xyes; then + AC_CHECK_TYPES([CC_SHA256_CTX, SHA256_CTX, SHA2_CTX], + [sha256_type_found=yes], [], + [[#ifdef HAVE_SYS_TYPES_H + # include + #endif + #ifdef HAVE_COMMONCRYPTO_COMMONDIGEST_H + # include + #endif + #ifdef HAVE_SHA256_H + # include + #endif + #ifdef HAVE_SHA2_H + # include + #endif]]) + if test "x$sha256_type_found" = xyes ; then + AC_SEARCH_LIBS([SHA256Init], [md]) + AC_SEARCH_LIBS([SHA256_Init], [md]) + AC_CHECK_FUNCS([CC_SHA256_Init SHA256Init SHA256_Init], + [sha256_func_found=yes ; break]) + fi + fi +]) +AM_CONDITIONAL([COND_INTERNAL_SHA256], [test "x$sha256_func_found" = xno]) +if test "x$enable_external_sha256$sha256_func_found" = xyesno; then + AC_MSG_ERROR([--enable-external-sha256 was specified but no supported external SHA-256 implementation was found]) +fi + +# Check for SSE2 intrinsics. There is no run-time detection for SSE2 so if +# compiler options enable SSE2 then SSE2 support is required by the binaries. +# The compile-time check for SSE2 is done with #ifdefs because some compilers +# (ICC, MSVC) allow SSE2 intrinsics even when SSE2 isn't enabled. +AC_CHECK_DECL([_mm_movemask_epi8], + [AC_DEFINE([HAVE__MM_MOVEMASK_EPI8], [1], + [Define to 1 if _mm_movemask_epi8 is available.])], + [], +[#ifdef HAVE_IMMINTRIN_H +#include +#endif]) + +# For faster CRC on 32/64-bit x86 and E2K (see also crc64_fast.c): +# +# - Check for the CLMUL intrinsic _mm_clmulepi64_si128 in . +# Check also for _mm_set_epi64x for consistency with CMake build +# where it's needed to disable CLMUL with VS2013. +# +# - Check that __attribute__((__target__("ssse3,sse4.1,pclmul"))) works +# together with _mm_clmulepi64_si128 from . The attribute +# was added in GCC 4.4 but some GCC 4.x versions don't allow intrinsics +# with it. Exception: it must be not be used with EDG-based compilers +# like ICC and the compiler on E2K. +# +# If everything above is supported, runtime detection will be used to keep the +# binaries working on systems that don't support the required extensions. +# +# NOTE: Use a check that links and not merely compiles to ensure that +# missing intrinsics don't get accepted with compilers that allow +# implicit function declarations. +AC_MSG_CHECKING([if _mm_clmulepi64_si128 is usable]) +AS_IF([test "x$enable_clmul_crc" = xno], [ + AC_MSG_RESULT([no, --disable-clmul-crc was used]) +], [ + AC_LINK_IFELSE([AC_LANG_SOURCE([[ +#include + +// CLMUL works on older E2K instruction set but it is slow due to emulation. +#if defined(__e2k__) && __iset__ < 6 +# error +#endif + +// Intel's old compiler (ICC) can define __GNUC__ but the attribute must not +// be used with it. The new Clang-based ICX needs the attribute. +// Checking for !defined(__EDG__) catches ICC and other EDG-based compilers. +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__) +__attribute__((__target__("ssse3,sse4.1,pclmul"))) +#endif +int main(void) +{ + __m128i a = _mm_set_epi64x(1, 2); + a = _mm_clmulepi64_si128(a, a, 0); + return 0; +} + ]])], [ + AC_DEFINE([HAVE_USABLE_CLMUL], [1], + [Define to 1 if _mm_set_epi64x and + _mm_clmulepi64_si128 are usable. + See configure.ac for details.]) + enable_clmul_crc=yes + ], [ + enable_clmul_crc=no + ]) + AC_MSG_RESULT([$enable_clmul_crc]) +]) + +# ARM64 C Language Extensions define CRC32 functions in arm_acle.h. +# These are supported by at least GCC and Clang which both need +# __attribute__((__target__("+crc"))), unless the needed compiler flags +# are used to support the CRC instruction. +AC_MSG_CHECKING([if ARM64 CRC32 instruction is usable]) +AS_IF([test "x$enable_arm64_crc32" = xno], [ + AC_MSG_RESULT([no, --disable-arm64-crc32 was used]) +], [ + AC_LINK_IFELSE([AC_LANG_SOURCE([[ +#include +#include + +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__) +__attribute__((__target__("+crc"))) +#endif +int main(void) +{ + return __crc32d(1, 2) != 0; +} + ]])], [ + AC_DEFINE([HAVE_ARM64_CRC32], [1], + [Define to 1 if ARM64 CRC32 instruction is supported. + See configure.ac for details.]) + enable_arm64_crc32=yes + ], [ + enable_arm64_crc32=no + ]) + AC_MSG_RESULT([$enable_arm64_crc32]) +]) + +# Check for ARM64 CRC32 instruction runtime detection. +# +# - getauxval() is supported on Linux. We also need HWCAP_CRC32 which was +# added in glibc 2.24. +# +# - elf_aux_info() is supported on FreeBSD and OpenBSD >= 7.6. +# +# - sysctlbyname("hw.optional.armv8_crc32", ...) is supported on Darwin +# (macOS, iOS, etc.). Note that sysctlbyname() is supported on FreeBSD, +# NetBSD, and possibly others too but the string is specific to Apple +# OSes. The C code is responsible for checking defined(__APPLE__) +# before using sysctlbyname("hw.optional.armv8_crc32", ...). +# +AS_IF([test "x$enable_arm64_crc32" = xyes], [ + AC_CHECK_FUNCS([getauxval elf_aux_info sysctlbyname], [break]) + AC_CHECK_DECL([HWCAP_CRC32], [AC_DEFINE([HAVE_HWCAP_CRC32], [1], + [Define to 1 if 'HWCAP_CRC32' is declared in .])], + [], [[#include ]]) +]) + + +# LoongArch CRC32 intrinsics are in larchintrin.h. +# These are supported by at least GCC and Clang. +# +# Only 64-bit LoongArch is currently supported. +# It doesn't need runtime detection. +AC_MSG_CHECKING([if LoongArch CRC32 instructions are usable]) +AS_IF([test "x$enable_loongarch_crc32" = xno], [ + AC_MSG_RESULT([no, --disable-loongarch-crc32 was used]) +], [ + AC_LINK_IFELSE([AC_LANG_SOURCE([[ +#if !(defined(__loongarch__) && __loongarch_grlen >= 64) +# error +#endif + +#include +int main(void) +{ + return __crc_w_w_w(1, 2); +} + ]])], [ + AC_DEFINE([HAVE_LOONGARCH_CRC32], [1], [Define to 1 if + 64-bit LoongArch CRC32 instructions are supported.]) + enable_loongarch_crc32=yes + ], [ + enable_loongarch_crc32=no + ]) + AC_MSG_RESULT([$enable_loongarch_crc32]) +]) + + +# Check for sandbox support. If one is found, set enable_sandbox=found. +# +# About -fsanitize: Of our three sandbox methods, only Landlock is +# incompatible with -fsanitize. FreeBSD 13.2 with Capsicum was tested with +# -fsanitize=address,undefined and had no issues. OpenBSD (as of version +# 7.4) has minimal support for process instrumentation. OpenBSD does not +# distribute the additional libraries needed (libasan, libubsan, etc.) with +# GCC or Clang needed for runtime sanitization support and instead only +# support -fsanitize-minimal-runtime for minimal undefined behavior +# sanitization. This minimal support is compatible with our use of the +# Pledge sandbox. So only Landlock will result in a build that cannot +# compress or decompress a single file to standard out. +AS_CASE([$enable_sandbox], + [auto | capsicum], [ + AC_CHECK_FUNCS([cap_rights_limit], [enable_sandbox=found]) + ] +) +AS_CASE([$enable_sandbox], + [auto | pledge], [ + AC_CHECK_FUNCS([pledge], [enable_sandbox=found]) + ] +) +AS_CASE([$enable_sandbox], + [auto | landlock], [ + AC_MSG_CHECKING([if Linux Landlock is usable]) + + # A compile check is done here because some systems have + # linux/landlock.h, but do not have the syscalls defined + # in order to actually use Linux Landlock. + AC_LINK_IFELSE([AC_LANG_SOURCE([[ + #include + #include + #include + + int main(void) + { + (void)prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + (void)SYS_landlock_create_ruleset; + (void)SYS_landlock_restrict_self; + (void)LANDLOCK_CREATE_RULESET_VERSION; + return 0; + } + ]])], [ + enable_sandbox=found + + AS_CASE(["$CC $CFLAGS"], [*-fsanitize=*], + [AC_MSG_ERROR([ + CC or CFLAGS contain '-fsanitize=' which is incompatible with the Landlock + sandboxing. Use --disable-sandbox when using '-fsanitize'.])]) + + AC_DEFINE([HAVE_LINUX_LANDLOCK], [1], + [Define to 1 if Linux Landlock is supported. + See configure.ac for details.]) + AC_MSG_RESULT([yes]) + ], [ + AC_MSG_RESULT([no]) + ]) + ] +) + +# If a specific sandboxing method was explicitly requested and it wasn't +# found, give an error. +case $enable_sandbox in + auto | no | found) + ;; + *) + AC_MSG_ERROR([$enable_sandbox support not found]) + ;; +esac + + +############################################################################### +# If using GCC, set some additional AM_CFLAGS: +############################################################################### + +if test "$GCC" = yes ; then + echo + echo "GCC extensions:" +fi + +# Always do the visibility check but don't set AM_CFLAGS on Windows. +# This way things get set properly even on Windows. +gl_VISIBILITY +if test -n "$CFLAG_VISIBILITY" && test "$is_w32" = no; then + AM_CFLAGS="$AM_CFLAGS $CFLAG_VISIBILITY" +fi + +AS_IF([test "$GCC" = yes], [ + # Enable as much warnings as possible. These commented warnings won't + # work for this package though: + # * -Wunreachable-code breaks several assert(0) cases, which are + # backed up with "return LZMA_PROG_ERROR". + # * -Wcast-qual would break various things where we need a non-const + # pointer although we don't modify anything through it. + # * -Winline, -Wdisabled-optimization, -Wunsafe-loop-optimizations + # don't seem so useful here; at least the last one gives some + # warnings which are not bugs. + # * -Wconversion still shows too many warnings. + # + # The flags before the empty line are for GCC and many of them + # are supported by Clang too. The flags after the empty line are + # for Clang. + for NEW_FLAG in \ + -Wall \ + -Wextra \ + -Wvla \ + -Wformat=2 \ + -Winit-self \ + -Wmissing-include-dirs \ + -Wshift-overflow=2 \ + -Wstrict-overflow=3 \ + -Walloc-zero \ + -Wduplicated-cond \ + -Wfloat-equal \ + -Wundef \ + -Wshadow \ + -Wpointer-arith \ + -Wbad-function-cast \ + -Wwrite-strings \ + -Wdate-time \ + -Wsign-conversion \ + -Wfloat-conversion \ + -Wlogical-op \ + -Waggregate-return \ + -Wstrict-prototypes \ + -Wold-style-definition \ + -Wmissing-prototypes \ + -Wmissing-declarations \ + -Wredundant-decls \ + -Wimplicit-fallthrough \ + -Wimplicit-fallthrough=5 \ + \ + -Wc99-compat \ + -Wc11-extensions \ + -Wc2x-compat \ + -Wc2x-extensions \ + -Wpre-c2x-compat \ + -Warray-bounds-pointer-arithmetic \ + -Wassign-enum \ + -Wconditional-uninitialized \ + -Wdocumentation \ + -Wduplicate-enum \ + -Wempty-translation-unit \ + -Wextra-semi-stmt \ + -Wflexible-array-extensions \ + -Wmissing-variable-declarations \ + -Wnewline-eof \ + -Wshift-sign-overflow \ + -Wstring-conversion + do + AC_MSG_CHECKING([if $CC accepts $NEW_FLAG]) + OLD_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $NEW_FLAG -Werror" + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[void foo(void); void foo(void) { }]])], [ + AM_CFLAGS="$AM_CFLAGS $NEW_FLAG" + AC_MSG_RESULT([yes]) + ], [ + AC_MSG_RESULT([no]) + ]) + CFLAGS="$OLD_CFLAGS" + done + + AC_ARG_ENABLE([werror], + AS_HELP_STRING([--enable-werror], [Enable -Werror to abort + compilation on all compiler warnings.]), + [], [enable_werror=no]) + if test "x$enable_werror" = "xyes"; then + AM_CFLAGS="$AM_CFLAGS -Werror" + fi +]) + + +############################################################################### +# Create the makefiles and config.h +############################################################################### + +echo + +# Don't build the lib directory at all if we don't need any replacement +# functions. +AM_CONDITIONAL([COND_GNULIB], test -n "$LIBOBJS") + +# Add default AM_CFLAGS. +AC_SUBST([AM_CFLAGS]) + +# This is needed for src/scripts. +xz=`echo xz | sed "$program_transform_name"` +AC_SUBST([xz]) + +AC_CONFIG_FILES([ + Makefile + po/Makefile.in + lib/Makefile + src/Makefile + src/liblzma/Makefile + src/liblzma/api/Makefile + src/xz/Makefile + src/xzdec/Makefile + src/lzmainfo/Makefile + src/scripts/Makefile + tests/Makefile + debug/Makefile +]) +AC_CONFIG_FILES([src/scripts/xzdiff], [chmod +x src/scripts/xzdiff]) +AC_CONFIG_FILES([src/scripts/xzgrep], [chmod +x src/scripts/xzgrep]) +AC_CONFIG_FILES([src/scripts/xzmore], [chmod +x src/scripts/xzmore]) +AC_CONFIG_FILES([src/scripts/xzless], [chmod +x src/scripts/xzless]) + +AC_OUTPUT + +# Some warnings +if test x$tuklib_cv_physmem_method = xunknown; then + echo + echo "WARNING:" + echo "No supported method to detect the amount of RAM." + echo "Consider using --enable-assume-ram (if you didn't already)" + echo "or make a patch to add support for this operating system." +fi + +if test x$tuklib_cv_cpucores_method = xunknown; then + echo + echo "WARNING:" + echo "No supported method to detect the number of CPU cores." +fi + +if test "x$enable_threads$enable_small$have_func_attribute_constructor" \ + = xnoyesno; then + echo + echo "NOTE:" + echo "liblzma will be thread-unsafe due to the combination" + echo "of --disable-threads --enable-small when using a compiler" + echo "that doesn't support __attribute__((__constructor__))." +fi diff --git a/src/native/external/xz/lib/Makefile.am b/src/native/external/xz/lib/Makefile.am new file mode 100644 index 00000000000000..3309e228294c2b --- /dev/null +++ b/src/native/external/xz/lib/Makefile.am @@ -0,0 +1,44 @@ +## SPDX-License-Identifier: GPL-2.0-or-later + +## +## Copyright (C) 2004-2007 Free Software Foundation, Inc. +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## + +## Not using gnulib-tool, at least for now. It is likely that we won't +## need anything else from Gnulib than getopt_long(). + +noinst_LIBRARIES = libgnu.a + +libgnu_a_SOURCES = +libgnu_a_DEPENDENCIES = $(LIBOBJS) +libgnu_a_LIBADD = $(LIBOBJS) + +EXTRA_DIST = \ + getopt.in.h \ + getopt.c \ + getopt1.c \ + getopt_int.h \ + getopt-cdefs.h \ + getopt-core.h \ + getopt-ext.h \ + getopt-pfx-core.h \ + getopt-pfx-ext.h + +BUILT_SOURCES = $(GETOPT_H) +MOSTLYCLEANFILES = getopt.h getopt.h-t + +getopt.h: getopt.in.h + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ + cat $(srcdir)/getopt.in.h; \ + } > $@-t + mv -f $@-t $@ diff --git a/src/native/external/xz/lib/getopt-cdefs.h b/src/native/external/xz/lib/getopt-cdefs.h new file mode 100644 index 00000000000000..576428c1c42d1d --- /dev/null +++ b/src/native/external/xz/lib/getopt-cdefs.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* getopt-on-non-glibc compatibility macros. + Copyright (C) 1989-2023 Free Software Foundation, Inc. + This file is part of gnulib. + Unlike most of the getopt implementation, it is NOT shared + with the GNU C Library. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#ifndef _GETOPT_CDEFS_H +#define _GETOPT_CDEFS_H 1 + +/* This header should not be used directly; include getopt.h or + unistd.h instead. It does not have a protective #error, because + the guard macro for getopt.h in gnulib is not fixed. */ + +/* getopt-core.h and getopt-ext.h are shared with GNU libc, and expect + a number of the internal macros supplied to GNU libc's headers by + sys/cdefs.h. Provide fallback definitions for all of them. */ +#ifdef HAVE_SYS_CDEFS_H +# include +#endif + +#ifndef __BEGIN_DECLS +# ifdef __cplusplus +# define __BEGIN_DECLS extern "C" { +# else +# define __BEGIN_DECLS /* nothing */ +# endif +#endif +#ifndef __END_DECLS +# ifdef __cplusplus +# define __END_DECLS } +# else +# define __END_DECLS /* nothing */ +# endif +#endif + +#ifndef __GNUC_PREREQ +# if defined __GNUC__ && defined __GNUC_VERSION__ +# define __GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +# else +# define __GNUC_PREREQ(maj, min) 0 +# endif +#endif + +#ifndef __THROW +# if defined __cplusplus && (__GNUC_PREREQ (2,8) || __clang_major__ >= 4) +# if __cplusplus >= 201103L +# define __THROW noexcept (true) +# else +# define __THROW throw () +# endif +# else +# define __THROW +# endif +#endif + +#endif /* _GETOPT_CDEFS_H */ diff --git a/src/native/external/xz/lib/getopt-core.h b/src/native/external/xz/lib/getopt-core.h new file mode 100644 index 00000000000000..126ce8089203ac --- /dev/null +++ b/src/native/external/xz/lib/getopt-core.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* Declarations for getopt (basic, portable features only). + Copyright (C) 1989-2023 Free Software Foundation, Inc. + This file is part of the GNU C Library and is also part of gnulib. + Patches to this file should be submitted to both projects. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _GETOPT_CORE_H +#define _GETOPT_CORE_H 1 + +/* This header should not be used directly; include getopt.h or + unistd.h instead. Unlike most bits headers, it does not have + a protective #error, because the guard macro for getopt.h in + gnulib is not fixed. */ + +__BEGIN_DECLS + +/* For communication from 'getopt' to the caller. + When 'getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when 'ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to 'getopt'. + + On entry to 'getopt', zero means this is the first call; initialize. + + When 'getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, 'optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message 'getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, 'optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in 'optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU 'getopt'. + + The argument '--' causes premature termination of argument + scanning, explicitly telling 'getopt' that there are no more + options. + + If OPTS begins with '-', then non-option arguments are treated as + arguments to the option '\1'. This behavior is specific to the GNU + 'getopt'. If OPTS begins with '+', or POSIXLY_CORRECT is set in + the environment, then do not permute arguments. + + For standards compliance, the 'argv' argument has the type + char *const *, but this is inaccurate; if argument permutation is + enabled, the argv array (not the strings it points to) must be + writable. */ + +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts) + __THROW _GL_ARG_NONNULL ((2, 3)); + +__END_DECLS + +#endif /* _GETOPT_CORE_H */ diff --git a/src/native/external/xz/lib/getopt-ext.h b/src/native/external/xz/lib/getopt-ext.h new file mode 100644 index 00000000000000..006037ba86478d --- /dev/null +++ b/src/native/external/xz/lib/getopt-ext.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* Declarations for getopt (GNU extensions). + Copyright (C) 1989-2023 Free Software Foundation, Inc. + This file is part of the GNU C Library and is also part of gnulib. + Patches to this file should be submitted to both projects. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _GETOPT_EXT_H +#define _GETOPT_EXT_H 1 + +/* This header should not be used directly; include getopt.h instead. + Unlike most bits headers, it does not have a protective #error, + because the guard macro for getopt.h in gnulib is not fixed. */ + +__BEGIN_DECLS + +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of 'struct option' terminated by an element containing a name which is + zero. + + The field 'has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field 'flag' is not NULL, it points to a variable that is set + to the value given in the field 'val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an 'int' to + a compiled-in constant, such as set a value from 'optarg', set the + option's 'flag' field to zero and its 'val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero 'flag' field, 'getopt' + returns the contents of the 'val' field. */ + +struct option +{ + const char *name; + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the 'has_arg' field of 'struct option'. */ + +#define no_argument 0 +#define required_argument 1 +#define optional_argument 2 + +extern int getopt_long (int ___argc, char *__getopt_argv_const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind) + __THROW _GL_ARG_NONNULL ((2, 3)); +extern int getopt_long_only (int ___argc, char *__getopt_argv_const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind) + __THROW _GL_ARG_NONNULL ((2, 3)); + +__END_DECLS + +#endif /* _GETOPT_EXT_H */ diff --git a/src/native/external/xz/lib/getopt-pfx-core.h b/src/native/external/xz/lib/getopt-pfx-core.h new file mode 100644 index 00000000000000..ee22d3f77f427d --- /dev/null +++ b/src/native/external/xz/lib/getopt-pfx-core.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* getopt (basic, portable features) gnulib wrapper header. + Copyright (C) 1989-2023 Free Software Foundation, Inc. + This file is part of gnulib. + Unlike most of the getopt implementation, it is NOT shared + with the GNU C Library. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#ifndef _GETOPT_PFX_CORE_H +#define _GETOPT_PFX_CORE_H 1 + +/* This header should not be used directly; include getopt.h or + unistd.h instead. It does not have a protective #error, because + the guard macro for getopt.h in gnulib is not fixed. */ + +/* Standalone applications should #define __GETOPT_PREFIX to an + identifier that prefixes the external functions and variables + defined in getopt-core.h and getopt-ext.h. Systematically + rename identifiers so that they do not collide with the system + functions and variables. Renaming avoids problems with some + compilers and linkers. */ +#ifdef __GETOPT_PREFIX +# ifndef __GETOPT_ID +# define __GETOPT_CONCAT(x, y) x ## y +# define __GETOPT_XCONCAT(x, y) __GETOPT_CONCAT (x, y) +# define __GETOPT_ID(y) __GETOPT_XCONCAT (__GETOPT_PREFIX, y) +# endif +# undef getopt +# undef optarg +# undef opterr +# undef optind +# undef optopt +# define getopt __GETOPT_ID (getopt) +# define optarg __GETOPT_ID (optarg) +# define opterr __GETOPT_ID (opterr) +# define optind __GETOPT_ID (optind) +# define optopt __GETOPT_ID (optopt) + +/* Work around a problem on macOS, which declares getopt with a + trailing __DARWIN_ALIAS(getopt) that would expand to something like + __asm("_" "rpl_getopt" "$UNIX2003") were it not for the following + hack to suppress the macOS declaration . */ +# ifdef __APPLE__ +# define _GETOPT +# endif + +/* The system's getopt.h may have already included getopt-core.h to + declare the unprefixed identifiers. Undef _GETOPT_CORE_H so that + getopt-core.h declares them with prefixes. */ +# undef _GETOPT_CORE_H +#endif + +#include + +#endif /* _GETOPT_PFX_CORE_H */ diff --git a/src/native/external/xz/lib/getopt-pfx-ext.h b/src/native/external/xz/lib/getopt-pfx-ext.h new file mode 100644 index 00000000000000..00d2a0952f45ab --- /dev/null +++ b/src/native/external/xz/lib/getopt-pfx-ext.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* getopt (GNU extensions) gnulib wrapper header. + Copyright (C) 1989-2023 Free Software Foundation, Inc. + This file is part of gnulib. + Unlike most of the getopt implementation, it is NOT shared + with the GNU C Library. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#ifndef _GETOPT_PFX_EXT_H +#define _GETOPT_PFX_EXT_H 1 + +/* This header should not be used directly; include getopt.h instead. + It does not have a protective #error, because the guard macro for + getopt.h in gnulib is not fixed. */ + +/* Standalone applications should #define __GETOPT_PREFIX to an + identifier that prefixes the external functions and variables + defined in getopt-core.h and getopt-ext.h. Systematically + rename identifiers so that they do not collide with the system + functions and variables. Renaming avoids problems with some + compilers and linkers. */ +#ifdef __GETOPT_PREFIX +# ifndef __GETOPT_ID +# define __GETOPT_CONCAT(x, y) x ## y +# define __GETOPT_XCONCAT(x, y) __GETOPT_CONCAT (x, y) +# define __GETOPT_ID(y) __GETOPT_XCONCAT (__GETOPT_PREFIX, y) +# endif +# undef getopt_long +# undef getopt_long_only +# undef option +# undef _getopt_internal +# define getopt_long __GETOPT_ID (getopt_long) +# define getopt_long_only __GETOPT_ID (getopt_long_only) +# define option __GETOPT_ID (option) +# define _getopt_internal __GETOPT_ID (getopt_internal) + +/* The system's getopt.h may have already included getopt-ext.h to + declare the unprefixed identifiers. Undef _GETOPT_EXT_H so that + getopt-ext.h declares them with prefixes. */ +# undef _GETOPT_EXT_H +#endif + +/* Standalone applications get correct prototypes for getopt_long and + getopt_long_only; they declare "char **argv". For backward + compatibility with old applications, if __GETOPT_PREFIX is not + defined, we supply GNU-libc-compatible, but incorrect, prototypes + using "char *const *argv". (GNU libc is stuck with the incorrect + prototypes, as they are baked into older versions of LSB.) */ +#ifndef __getopt_argv_const +# if defined __GETOPT_PREFIX +# define __getopt_argv_const /* empty */ +# else +# define __getopt_argv_const const +# endif +#endif + +#include + +#endif /* _GETOPT_PFX_EXT_H */ diff --git a/src/native/external/xz/lib/getopt.c b/src/native/external/xz/lib/getopt.c new file mode 100644 index 00000000000000..ab3ff879ced0d7 --- /dev/null +++ b/src/native/external/xz/lib/getopt.c @@ -0,0 +1,823 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* Getopt for GNU. + Copyright (C) 1987-2023 Free Software Foundation, Inc. + This file is part of the GNU C Library and is also part of gnulib. + Patches to this file should be submitted to both projects. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _LIBC +# ifdef HAVE_CONFIG_H +# include +# endif +#endif + +#include "getopt.h" + +#include +#include +#include +#ifndef _MSC_VER +# include +#endif + +#ifdef _LIBC +/* When used as part of glibc, error printing must be done differently + for standards compliance. getopt is not a cancellation point, so + it must not call functions that are, and it is specified by an + older standard than stdio locking, so it must not refer to + functions in the "user namespace" related to stdio locking. + Finally, it must use glibc's internal message translation so that + the messages are looked up in the proper text domain. */ +# include +# define fprintf __fxprintf_nocancel +# define flockfile(fp) _IO_flockfile (fp) +# define funlockfile(fp) _IO_funlockfile (fp) +#else +/* Completely disable NLS for getopt. We won't include translations for it + anyway. If the system lacks getopt_long, missing translations probably + aren't a problem. */ +//# include "gettext.h" +//# define _(msgid) gettext (msgid) +#define _(msgid) (msgid) +/* When used standalone, flockfile and funlockfile might not be + available. */ +# if (!defined _POSIX_THREAD_SAFE_FUNCTIONS \ + || (defined _WIN32 && ! defined __CYGWIN__)) +# define flockfile(fp) /* nop */ +# define funlockfile(fp) /* nop */ +# endif +/* When used standalone, do not attempt to use alloca. */ +# define __libc_use_alloca(size) 0 +# undef alloca +# define alloca(size) (abort (), (void *)0) +#endif + +/* This implementation of 'getopt' has three modes for handling + options interspersed with non-option arguments. It can stop + scanning for options at the first non-option argument encountered, + as POSIX specifies. It can continue scanning for options after the + first non-option argument, but permute 'argv' as it goes so that, + after 'getopt' is done, all the options precede all the non-option + arguments and 'optind' points to the first non-option argument. + Or, it can report non-option arguments as if they were arguments to + the option character '\x01'. + + The default behavior of 'getopt_long' is to permute the argument list. + When this implementation is used standalone, the default behavior of + 'getopt' is to stop at the first non-option argument, but when it is + used as part of GNU libc it also permutes the argument list. In both + cases, setting the environment variable POSIXLY_CORRECT to any value + disables permutation. + + If the first character of the OPTSTRING argument to 'getopt' or + 'getopt_long' is '+', both functions will stop at the first + non-option argument. If it is '-', both functions will report + non-option arguments as arguments to the option character '\x01'. */ + +#include "getopt_int.h" + +/* For communication from 'getopt' to the caller. + When 'getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when 'ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to 'getopt'. + + On entry to 'getopt', zero means this is the first call; initialize. + + When 'getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, 'optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Keep a global copy of all internal members of getopt_data. */ + +static struct _getopt_data getopt_data; + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + 'first_nonopt' and 'last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +static void +exchange (char **argv, struct _getopt_data *d) +{ + int bottom = d->__first_nonopt; + int middle = d->__last_nonopt; + int top = d->optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + d->__first_nonopt += (d->optind - d->__last_nonopt); + d->__last_nonopt = d->optind; +} + +/* Process the argument starting with d->__nextchar as a long option. + d->optind should *not* have been advanced over this argument. + + If the value returned is -1, it was not actually a long option, the + state is unchanged, and the argument should be processed as a set + of short options (this can only happen when long_only is true). + Otherwise, the option (and its argument, if any) have been consumed + and the return value is the value to return from _getopt_internal_r. */ +static int +process_long_option (int argc, char **argv, const char *optstring, + const struct option *longopts, int *longind, + int long_only, struct _getopt_data *d, + int print_errors, const char *prefix) +{ + char *nameend; + size_t namelen; + const struct option *p; + const struct option *pfound = NULL; + int n_options; + int option_index; + + for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + namelen = (size_t)(nameend - d->__nextchar); + + /* First look for an exact match, counting the options as a side + effect. */ + for (p = longopts, n_options = 0; p->name; p++, n_options++) + if (!strncmp (p->name, d->__nextchar, namelen) + && namelen == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + option_index = n_options; + break; + } + + if (pfound == NULL) + { + /* Didn't find an exact match, so look for abbreviations. */ + unsigned char *ambig_set = NULL; + int ambig_malloced = 0; + int ambig_fallback = 0; + int indfound = -1; + + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, d->__nextchar, namelen)) + { + if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + { + /* Second or later nonexact match found. */ + if (!ambig_fallback) + { + if (!print_errors) + /* Don't waste effort tracking the ambig set if + we're not going to print it anyway. */ + ambig_fallback = 1; + else if (!ambig_set) + { + if (__libc_use_alloca (n_options)) + ambig_set = alloca (n_options); + else if ((ambig_set = malloc ((size_t)n_options)) == NULL) + /* Fall back to simpler error message. */ + ambig_fallback = 1; + else + ambig_malloced = 1; + + if (ambig_set) + { + memset (ambig_set, 0, (size_t)n_options); + ambig_set[indfound] = 1; + } + } + if (ambig_set) + ambig_set[option_index] = 1; + } + } + } + + if (ambig_set || ambig_fallback) + { + if (print_errors) + { + if (ambig_fallback) + fprintf (stderr, _("%s: option '%s%s' is ambiguous\n"), + argv[0], prefix, d->__nextchar); + else + { + flockfile (stderr); + fprintf (stderr, + _("%s: option '%s%s' is ambiguous; possibilities:"), + argv[0], prefix, d->__nextchar); + + for (option_index = 0; option_index < n_options; option_index++) + if (ambig_set[option_index]) + fprintf (stderr, " '%s%s'", + prefix, longopts[option_index].name); + + /* This must use 'fprintf' even though it's only + printing a single character, so that it goes through + __fxprintf_nocancel when compiled as part of glibc. */ + fprintf (stderr, "\n"); + funlockfile (stderr); + } + } + if (ambig_malloced) + free (ambig_set); + d->__nextchar += strlen (d->__nextchar); + d->optind++; + d->optopt = 0; + return '?'; + } + + option_index = indfound; + } + + if (pfound == NULL) + { + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short option, + then it's an error. */ + if (!long_only || argv[d->optind][1] == '-' + || strchr (optstring, *d->__nextchar) == NULL) + { + if (print_errors) + fprintf (stderr, _("%s: unrecognized option '%s%s'\n"), + argv[0], prefix, d->__nextchar); + + d->__nextchar = NULL; + d->optind++; + d->optopt = 0; + return '?'; + } + + /* Otherwise interpret it as a short option. */ + return -1; + } + + /* We have found a matching long option. Consume it. */ + d->optind++; + d->__nextchar = NULL; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + d->optarg = nameend + 1; + else + { + if (print_errors) + fprintf (stderr, + _("%s: option '%s%s' doesn't allow an argument\n"), + argv[0], prefix, pfound->name); + + d->optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (d->optind < argc) + d->optarg = argv[d->optind++]; + else + { + if (print_errors) + fprintf (stderr, + _("%s: option '%s%s' requires an argument\n"), + argv[0], prefix, pfound->name); + + d->optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; +} + +/* Initialize internal data upon the first call to getopt. */ + +static const char * +_getopt_initialize (int argc, + char **argv, const char *optstring, + struct _getopt_data *d, int posixly_correct) +{ + (void)argc; + (void)argv; + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + if (d->optind == 0) + d->optind = 1; + + d->__first_nonopt = d->__last_nonopt = d->optind; + d->__nextchar = NULL; + + /* Determine how to handle the ordering of options and nonoptions. */ + if (optstring[0] == '-') + { + d->__ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + d->__ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct || !!getenv ("POSIXLY_CORRECT")) + d->__ordering = REQUIRE_ORDER; + else + d->__ordering = PERMUTE; + + d->__initialized = 1; + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If 'getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If 'getopt' finds another option character, it returns that character, + updating 'optind' and 'nextchar' so that the next call to 'getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, 'getopt' returns -1. + Then 'optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set 'opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in 'optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in 'optarg', otherwise 'optarg' is set to zero. + + If OPTSTRING starts with '-' or '+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with '--' instead of '-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a '=', or else the in next ARGV-element. + When 'getopt' finds a long-named option, it returns 0 if that option's + 'flag' field is nonzero, the value of the option's 'val' field + if the 'flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of 'struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal_r (int argc, char **argv, const char *optstring, + const struct option *longopts, int *longind, + int long_only, struct _getopt_data *d, int posixly_correct) +{ + int print_errors = d->opterr; + + if (argc < 1) + return -1; + + d->optarg = NULL; + + if (d->optind == 0 || !d->__initialized) + optstring = _getopt_initialize (argc, argv, optstring, d, posixly_correct); + else if (optstring[0] == '-' || optstring[0] == '+') + optstring++; + + if (optstring[0] == ':') + print_errors = 0; + + /* Test whether ARGV[optind] points to a non-option argument. */ +#define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0') + + if (d->__nextchar == NULL || *d->__nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (d->__last_nonopt > d->optind) + d->__last_nonopt = d->optind; + if (d->__first_nonopt > d->optind) + d->__first_nonopt = d->optind; + + if (d->__ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (d->__first_nonopt != d->__last_nonopt + && d->__last_nonopt != d->optind) + exchange (argv, d); + else if (d->__last_nonopt != d->optind) + d->__first_nonopt = d->optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (d->optind < argc && NONOPTION_P) + d->optind++; + d->__last_nonopt = d->optind; + } + + /* The special ARGV-element '--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (d->optind != argc && !strcmp (argv[d->optind], "--")) + { + d->optind++; + + if (d->__first_nonopt != d->__last_nonopt + && d->__last_nonopt != d->optind) + exchange (argv, d); + else if (d->__first_nonopt == d->__last_nonopt) + d->__first_nonopt = d->optind; + d->__last_nonopt = argc; + + d->optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (d->optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (d->__first_nonopt != d->__last_nonopt) + d->optind = d->__first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (d->__ordering == REQUIRE_ORDER) + return -1; + d->optarg = argv[d->optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Check whether it might be a long option. */ + if (longopts) + { + if (argv[d->optind][1] == '-') + { + /* "--foo" is always a long option. The special option + "--" was handled above. */ + d->__nextchar = argv[d->optind] + 2; + return process_long_option (argc, argv, optstring, longopts, + longind, long_only, d, + print_errors, "--"); + } + + /* If long_only and the ARGV-element has the form "-f", + where f is a valid short option, don't consider it an + abbreviated form of a long option that starts with f. + Otherwise there would be no way to give the -f short + option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an + abbreviation of the long option, just like "--fu", and + not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + if (long_only && (argv[d->optind][2] + || !strchr (optstring, argv[d->optind][1]))) + { + int code; + d->__nextchar = argv[d->optind] + 1; + code = process_long_option (argc, argv, optstring, longopts, + longind, long_only, d, + print_errors, "-"); + if (code != -1) + return code; + } + } + + /* It is not a long option. Skip the initial punctuation. */ + d->__nextchar = argv[d->optind] + 1; + } + + /* Look at and handle the next short option-character. */ + + { + char c = *d->__nextchar++; + const char *temp = strchr (optstring, c); + + /* Increment 'optind' when we start to process its last character. */ + if (*d->__nextchar == '\0') + ++d->optind; + + if (temp == NULL || c == ':' || c == ';') + { + if (print_errors) + fprintf (stderr, _("%s: invalid option -- '%c'\n"), argv[0], c); + d->optopt = c; + return '?'; + } + + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';' && longopts != NULL) + { + /* This is an option that requires an argument. */ + if (*d->__nextchar != '\0') + d->optarg = d->__nextchar; + else if (d->optind == argc) + { + if (print_errors) + fprintf (stderr, + _("%s: option requires an argument -- '%c'\n"), + argv[0], c); + + d->optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + d->optarg = argv[d->optind]; + + d->__nextchar = d->optarg; + d->optarg = NULL; + return process_long_option (argc, argv, optstring, longopts, longind, + 0 /* long_only */, d, print_errors, "-W "); + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*d->__nextchar != '\0') + { + d->optarg = d->__nextchar; + d->optind++; + } + else + d->optarg = NULL; + d->__nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*d->__nextchar != '\0') + { + d->optarg = d->__nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + d->optind++; + } + else if (d->optind == argc) + { + if (print_errors) + fprintf (stderr, + _("%s: option requires an argument -- '%c'\n"), + argv[0], c); + + d->optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented 'optind' once; + increment it again when taking next ARGV-elt as argument. */ + d->optarg = argv[d->optind++]; + d->__nextchar = NULL; + } + } + return c; + } +} + +int +_getopt_internal (int argc, char **argv, const char *optstring, + const struct option *longopts, int *longind, int long_only, + int posixly_correct) +{ + int result; + + getopt_data.optind = optind; + getopt_data.opterr = opterr; + + result = _getopt_internal_r (argc, argv, optstring, longopts, + longind, long_only, &getopt_data, + posixly_correct); + + optind = getopt_data.optind; + optarg = getopt_data.optarg; + optopt = getopt_data.optopt; + + return result; +} + +/* glibc gets a LSB-compliant getopt and a POSIX-complaint __posix_getopt. + Standalone applications just get a POSIX-compliant getopt. + POSIX and LSB both require these functions to take 'char *const *argv' + even though this is incorrect (because of the permutation). */ +#define GETOPT_ENTRY(NAME, POSIXLY_CORRECT) \ + int \ + NAME (int argc, char *const *argv, const char *optstring) \ + { \ + return _getopt_internal (argc, (char **)argv, optstring, \ + 0, 0, 0, POSIXLY_CORRECT); \ + } + +#ifdef _LIBC +GETOPT_ENTRY(getopt, 0) +GETOPT_ENTRY(__posix_getopt, 1) +#else +GETOPT_ENTRY(getopt, 1) +#endif + + +#ifdef TEST + +/* Compile with -DTEST to make an executable for use in testing + the above definition of 'getopt'. */ + +int +main (int argc, char **argv) +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + + c = getopt (argc, argv, "abc:d:0123456789"); + if (c == -1) + break; + + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value '%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/src/native/external/xz/lib/getopt.in.h b/src/native/external/xz/lib/getopt.in.h new file mode 100644 index 00000000000000..6d602c5da24c80 --- /dev/null +++ b/src/native/external/xz/lib/getopt.in.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* Declarations for getopt. + Copyright (C) 1989-2023 Free Software Foundation, Inc. + This file is part of gnulib. + Unlike most of the getopt implementation, it is NOT shared + with the GNU C Library, which supplies a different version of + this file. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#ifndef _GETOPT_H + +#define _GETOPT_H 1 + +/* Standalone applications should #define __GETOPT_PREFIX to an + identifier that prefixes the external functions and variables + defined in this header. When this happens, include the + headers that might declare getopt so that they will not cause + confusion if included after this file. Then systematically rename + identifiers so that they do not collide with the system functions + and variables. Renaming avoids problems with some compilers and + linkers. */ +#if defined __GETOPT_PREFIX +# include +# include + +# ifndef _MSC_VER +# include +# endif +#endif + +/* From Gnulib's lib/arg-nonnull.h: */ +/* _GL_ARG_NONNULL((n,...,m)) tells the compiler and static analyzer tools + that the values passed as arguments n, ..., m must be non-NULL pointers. + n = 1 stands for the first argument, n = 2 for the second argument etc. */ +#ifndef _GL_ARG_NONNULL +# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || defined __clang__ +# define _GL_ARG_NONNULL(params) __attribute__ ((__nonnull__ params)) +# else +# define _GL_ARG_NONNULL(params) +# endif +#endif + +#include +#include +#include + +#endif /* _GETOPT_H */ diff --git a/src/native/external/xz/lib/getopt1.c b/src/native/external/xz/lib/getopt1.c new file mode 100644 index 00000000000000..5cb3b913d09fa3 --- /dev/null +++ b/src/native/external/xz/lib/getopt1.c @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* getopt_long and getopt_long_only entry points for GNU getopt. + Copyright (C) 1987-2023 Free Software Foundation, Inc. + This file is part of the GNU C Library and is also part of gnulib. + Patches to this file should be submitted to both projects. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _LIBC +# ifdef HAVE_CONFIG_H +# include +# endif +#endif + +#include "getopt.h" +#include "getopt_int.h" + +int +getopt_long (int argc, char *__getopt_argv_const *argv, const char *options, + const struct option *long_options, int *opt_index) +{ + return _getopt_internal (argc, (char **) argv, options, long_options, + opt_index, 0, 0); +} + +int +_getopt_long_r (int argc, char **argv, const char *options, + const struct option *long_options, int *opt_index, + struct _getopt_data *d) +{ + return _getopt_internal_r (argc, argv, options, long_options, opt_index, + 0, d, 0); +} + +/* Like getopt_long, but '-' as well as '--' can indicate a long option. + If an option that starts with '-' (not '--') doesn't match a long option, + but does match a short option, it is parsed as a short option + instead. */ + +int +getopt_long_only (int argc, char *__getopt_argv_const *argv, + const char *options, + const struct option *long_options, int *opt_index) +{ + return _getopt_internal (argc, (char **) argv, options, long_options, + opt_index, 1, 0); +} + +int +_getopt_long_only_r (int argc, char **argv, const char *options, + const struct option *long_options, int *opt_index, + struct _getopt_data *d) +{ + return _getopt_internal_r (argc, argv, options, long_options, opt_index, + 1, d, 0); +} + + +#ifdef TEST + +#include +#include + +int +main (int argc, char **argv) +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + int option_index = 0; + static const struct option long_options[] = + { + {"add", 1, 0, 0}, + {"append", 0, 0, 0}, + {"delete", 1, 0, 0}, + {"verbose", 0, 0, 0}, + {"create", 0, 0, 0}, + {"file", 1, 0, 0}, + {0, 0, 0, 0} + }; + + c = getopt_long (argc, argv, "abc:d:0123456789", + long_options, &option_index); + if (c == -1) + break; + + switch (c) + { + case 0: + printf ("option %s", long_options[option_index].name); + if (optarg) + printf (" with arg %s", optarg); + printf ("\n"); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value '%s'\n", optarg); + break; + + case 'd': + printf ("option d with value '%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/src/native/external/xz/lib/getopt_int.h b/src/native/external/xz/lib/getopt_int.h new file mode 100644 index 00000000000000..2dcb5538213d9f --- /dev/null +++ b/src/native/external/xz/lib/getopt_int.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* Internal declarations for getopt. + Copyright (C) 1989-2023 Free Software Foundation, Inc. + This file is part of the GNU C Library and is also part of gnulib. + Patches to this file should be submitted to both projects. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _GETOPT_INT_H +#define _GETOPT_INT_H 1 + +#include + +extern int _getopt_internal (int ___argc, char **___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only, int __posixly_correct); + + +/* Reentrant versions which can handle parsing multiple argument + vectors at the same time. */ + +/* Describe how to deal with options that follow non-option ARGV-elements. + + REQUIRE_ORDER means don't recognize them as options; stop option + processing when the first non-option is seen. This is what POSIX + specifies should happen. + + PERMUTE means permute the contents of ARGV as we scan, so that + eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written + to expect this. + + RETURN_IN_ORDER is an option available to programs that were + written to expect options and other ARGV-elements in any order + and that care about the ordering of the two. We describe each + non-option ARGV-element as if it were the argument of an option + with character code 1. + + The special argument '--' forces an end of option-scanning regardless + of the value of 'ordering'. In the case of RETURN_IN_ORDER, only + '--' can cause 'getopt' to return -1 with 'optind' != ARGC. */ + +enum __ord + { + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER + }; + +/* Data type for reentrant functions. */ +struct _getopt_data +{ + /* These have exactly the same meaning as the corresponding global + variables, except that they are used for the reentrant + versions of getopt. */ + int optind; + int opterr; + int optopt; + char *optarg; + + /* Internal members. */ + + /* True if the internal members have been initialized. */ + int __initialized; + + /* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + char *__nextchar; + + /* See __ord above. */ + enum __ord __ordering; + + /* Handle permutation of arguments. */ + + /* Describe the part of ARGV that contains non-options that have + been skipped. 'first_nonopt' is the index in ARGV of the first + of them; 'last_nonopt' is the index after the last of them. */ + + int __first_nonopt; + int __last_nonopt; +}; + +/* The initializer is necessary to set OPTIND and OPTERR to their + default values and to clear the initialization flag. */ +#define _GETOPT_DATA_INITIALIZER { 1, 1 } + +extern int _getopt_internal_r (int ___argc, char **___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only, struct _getopt_data *__data, + int __posixly_correct); + +extern int _getopt_long_r (int ___argc, char **___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + struct _getopt_data *__data); + +extern int _getopt_long_only_r (int ___argc, char **___argv, + const char *__shortopts, + const struct option *__longopts, + int *__longind, + struct _getopt_data *__data); + +#endif /* getopt_int.h */ diff --git a/src/native/external/xz/src/Makefile.am b/src/native/external/xz/src/Makefile.am new file mode 100644 index 00000000000000..52885299684890 --- /dev/null +++ b/src/native/external/xz/src/Makefile.am @@ -0,0 +1,45 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +SUBDIRS = liblzma xzdec + +if COND_XZ +SUBDIRS += xz +endif + +if COND_LZMAINFO +SUBDIRS += lzmainfo +endif + +if COND_SCRIPTS +SUBDIRS += scripts +endif + +EXTRA_DIST = \ + common/common_w32res.rc \ + common/my_landlock.h \ + common/mythread.h \ + common/sysdefs.h \ + common/tuklib_common.h \ + common/tuklib_config.h \ + common/tuklib_cpucores.c \ + common/tuklib_cpucores.h \ + common/tuklib_exit.c \ + common/tuklib_exit.h \ + common/tuklib_gettext.h \ + common/tuklib_integer.h \ + common/tuklib_mbstr.h \ + common/tuklib_mbstr_fw.c \ + common/tuklib_mbstr_nonprint.c \ + common/tuklib_mbstr_nonprint.h \ + common/tuklib_mbstr_width.c \ + common/tuklib_mbstr_wrap.c \ + common/tuklib_mbstr_wrap.h \ + common/tuklib_open_stdxxx.c \ + common/tuklib_open_stdxxx.h \ + common/tuklib_physmem.c \ + common/tuklib_physmem.h \ + common/tuklib_progname.c \ + common/tuklib_progname.h \ + common/w32_application.manifest \ + common/w32_application.manifest.comments.txt diff --git a/src/native/external/xz/src/common/common_w32res.rc b/src/native/external/xz/src/common/common_w32res.rc new file mode 100644 index 00000000000000..8114ee311e6367 --- /dev/null +++ b/src/native/external/xz/src/common/common_w32res.rc @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: 0BSD */ + +/* + * Author: Lasse Collin + */ + +#include +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#define LZMA_H_INTERNAL +#define LZMA_H_INTERNAL_RC +#include "lzma/version.h" + +#ifndef MY_BUILD +# define MY_BUILD 0 +#endif +#define MY_VERSION LZMA_VERSION_MAJOR,LZMA_VERSION_MINOR,LZMA_VERSION_PATCH,MY_BUILD + +#define MY_FILENAME MY_NAME MY_SUFFIX +#define MY_COMPANY "The Tukaani Project " +#define MY_PRODUCT PACKAGE_NAME " <" PACKAGE_URL ">" + +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US + +VS_VERSION_INFO VERSIONINFO +FILEVERSION MY_VERSION +PRODUCTVERSION MY_VERSION +FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +FILEFLAGS 0 +FILEOS VOS_NT_WINDOWS32 +FILETYPE MY_TYPE +FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "CompanyName", MY_COMPANY + VALUE "FileDescription", MY_DESC + VALUE "FileVersion", LZMA_VERSION_STRING + VALUE "InternalName", MY_NAME + VALUE "OriginalFilename", MY_FILENAME + VALUE "ProductName", MY_PRODUCT + VALUE "ProductVersion", LZMA_VERSION_STRING + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END + +/* Omit the manifest on Cygwin and MSYS2 (both define __CYGWIN__). */ +#if MY_TYPE == VFT_APP && !defined(__CYGWIN__) +CREATEPROCESS_MANIFEST_RESOURCE_ID RT_MANIFEST "w32_application.manifest" +#endif diff --git a/src/native/external/xz/src/common/my_landlock.h b/src/native/external/xz/src/common/my_landlock.h new file mode 100644 index 00000000000000..5f761695b6a503 --- /dev/null +++ b/src/native/external/xz/src/common/my_landlock.h @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file my_landlock.h +/// \brief Linux Landlock sandbox helper functions +/// +/// \note This uses static variables to cache the Landlock ABI version. +/// Only one file in an application should include this header. +/// Only one thread should call these functions. +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef MY_LANDLOCK_H +#define MY_LANDLOCK_H + +#include "sysdefs.h" + +#include +#include +#include +#include + + +/// \brief Initialize Landlock ruleset attributes to forbid everything +/// +/// The supported Landlock ABI is checked at runtime and only the supported +/// actions are forbidden in the attributes. Thus, if the attributes are +/// used with my_landlock_create_ruleset(), it shouldn't fail. +/// +/// \return On success, the Landlock ABI version is returned (a positive +/// integer). If Landlock isn't supported, -1 is returned. +static int +my_landlock_ruleset_attr_forbid_all(struct landlock_ruleset_attr *attr) +{ + memzero(attr, sizeof(*attr)); + + // Cache the Landlock ABI version: + // 0 = not checked yet + // -1 = Landlock not supported + // >0 = Landlock ABI version + static int abi_version = 0; + +#ifdef LANDLOCK_SCOPE_SIGNAL + // Red Hat Enterprise Linux 9 kernel since 5.14.0-603.el9 (2025-07-30) + // claims ABI version 6 support, but as of 5.14.0-643.el9 (2025-11-22) + // it lacks LANDLOCK_SCOPE_SIGNAL. ABI version 6 was added in upstream + // Linux 6.12 while RHEL 9 has Linux 5.14 with lots of backports. + // We assume that any kernel version 5.14 with ABI version 6 is buggy. + static bool is_rhel9 = false; +#endif + + if (abi_version == 0) { + abi_version = syscall(SYS_landlock_create_ruleset, + (void *)NULL, 0, LANDLOCK_CREATE_RULESET_VERSION); + +#ifdef LANDLOCK_SCOPE_SIGNAL + if (abi_version == 6) { + static const char rel[] = "5.14."; + const size_t rel_len = sizeof(rel) - 1; + + struct utsname un; + if (uname(&un) == 0 && strncmp( + un.release, rel, rel_len) == 0) + is_rhel9 = true; + } +#endif + } + + if (abi_version <= 0) + return -1; + + // ABI 1 except the few at the end + attr->handled_access_fs + = LANDLOCK_ACCESS_FS_EXECUTE + | LANDLOCK_ACCESS_FS_WRITE_FILE + | LANDLOCK_ACCESS_FS_READ_FILE + | LANDLOCK_ACCESS_FS_READ_DIR + | LANDLOCK_ACCESS_FS_REMOVE_DIR + | LANDLOCK_ACCESS_FS_REMOVE_FILE + | LANDLOCK_ACCESS_FS_MAKE_CHAR + | LANDLOCK_ACCESS_FS_MAKE_DIR + | LANDLOCK_ACCESS_FS_MAKE_REG + | LANDLOCK_ACCESS_FS_MAKE_SOCK + | LANDLOCK_ACCESS_FS_MAKE_FIFO + | LANDLOCK_ACCESS_FS_MAKE_BLOCK + | LANDLOCK_ACCESS_FS_MAKE_SYM +#ifdef LANDLOCK_ACCESS_FS_REFER + | LANDLOCK_ACCESS_FS_REFER // ABI 2 +#endif +#ifdef LANDLOCK_ACCESS_FS_TRUNCATE + | LANDLOCK_ACCESS_FS_TRUNCATE // ABI 3 +#endif +#ifdef LANDLOCK_ACCESS_FS_IOCTL_DEV + | LANDLOCK_ACCESS_FS_IOCTL_DEV // ABI 5 +#endif + ; + +#ifdef LANDLOCK_ACCESS_NET_BIND_TCP + // ABI 4 + attr->handled_access_net + = LANDLOCK_ACCESS_NET_BIND_TCP + | LANDLOCK_ACCESS_NET_CONNECT_TCP; +#endif + +#ifdef LANDLOCK_SCOPE_SIGNAL + // ABI 6 + attr->scoped + = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET + | LANDLOCK_SCOPE_SIGNAL; +#endif + + // Disable flags that require a new ABI version. + switch (abi_version) { + case 1: +#ifdef LANDLOCK_ACCESS_FS_REFER + attr->handled_access_fs &= ~LANDLOCK_ACCESS_FS_REFER; +#endif + FALLTHROUGH; + + case 2: +#ifdef LANDLOCK_ACCESS_FS_TRUNCATE + attr->handled_access_fs &= ~LANDLOCK_ACCESS_FS_TRUNCATE; +#endif + FALLTHROUGH; + + case 3: +#ifdef LANDLOCK_ACCESS_NET_BIND_TCP + attr->handled_access_net = 0; +#endif + FALLTHROUGH; + + case 4: +#ifdef LANDLOCK_ACCESS_FS_IOCTL_DEV + attr->handled_access_fs &= ~LANDLOCK_ACCESS_FS_IOCTL_DEV; +#endif + FALLTHROUGH; + + case 5: +#ifdef LANDLOCK_SCOPE_SIGNAL + attr->scoped = 0; +#endif + FALLTHROUGH; + + case 6: +#ifdef LANDLOCK_SCOPE_SIGNAL + if (is_rhel9) + attr->scoped &= ~LANDLOCK_SCOPE_SIGNAL; +#endif + + FALLTHROUGH; + + default: + // We only know about the features of the ABIs 1-6. + break; + } + + return abi_version; +} + + +/// \brief Wrapper for the landlock_create_ruleset(2) syscall +/// +/// Syscall wrappers provide argument type checking. +/// +/// \note Remember to call `prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)` too! +static inline int +my_landlock_create_ruleset(const struct landlock_ruleset_attr *attr, + size_t size, uint32_t flags) +{ + return syscall(SYS_landlock_create_ruleset, attr, size, flags); +} + + +/// \brief Wrapper for the landlock_restrict_self(2) syscall +static inline int +my_landlock_restrict_self(int ruleset_fd, uint32_t flags) +{ + return syscall(SYS_landlock_restrict_self, ruleset_fd, flags); +} + +#endif diff --git a/src/native/external/xz/src/common/mythread.h b/src/native/external/xz/src/common/mythread.h new file mode 100644 index 00000000000000..bec69b4487cb16 --- /dev/null +++ b/src/native/external/xz/src/common/mythread.h @@ -0,0 +1,549 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file mythread.h +/// \brief Some threading related helper macros and functions +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef MYTHREAD_H +#define MYTHREAD_H + +#include "sysdefs.h" + +// If any type of threading is enabled, #define MYTHREAD_ENABLED. +#if defined(MYTHREAD_POSIX) || defined(MYTHREAD_WIN95) \ + || defined(MYTHREAD_VISTA) +# define MYTHREAD_ENABLED 1 +#endif + + +#ifdef MYTHREAD_ENABLED + +//////////////////////////////////////// +// Shared between all threading types // +//////////////////////////////////////// + +// Locks a mutex for a duration of a block. +// +// Perform mythread_mutex_lock(&mutex) in the beginning of a block +// and mythread_mutex_unlock(&mutex) at the end of the block. "break" +// may be used to unlock the mutex and jump out of the block. +// mythread_sync blocks may be nested. +// +// Example: +// +// mythread_sync(mutex) { +// foo(); +// if (some_error) +// break; // Skips bar() +// bar(); +// } +// +// At least GCC optimizes the loops completely away so it doesn't slow +// things down at all compared to plain mythread_mutex_lock(&mutex) +// and mythread_mutex_unlock(&mutex) calls. +// +#define mythread_sync(mutex) mythread_sync_helper1(mutex, __LINE__) +#define mythread_sync_helper1(mutex, line) mythread_sync_helper2(mutex, line) +#define mythread_sync_helper2(mutex, line) \ + for (unsigned int mythread_i_ ## line = 0; \ + mythread_i_ ## line \ + ? (mythread_mutex_unlock(&(mutex)), 0) \ + : (mythread_mutex_lock(&(mutex)), 1); \ + mythread_i_ ## line = 1) \ + for (unsigned int mythread_j_ ## line = 0; \ + !mythread_j_ ## line; \ + mythread_j_ ## line = 1) +#endif + + +#if !defined(MYTHREAD_ENABLED) + +////////////////// +// No threading // +////////////////// + +// Calls the given function once. This isn't thread safe. +#define mythread_once(func) \ +do { \ + static bool once_ = false; \ + if (!once_) { \ + func(); \ + once_ = true; \ + } \ +} while (0) + + +#if !(defined(_WIN32) && !defined(__CYGWIN__)) \ + && (!defined(__wasm__) || defined(__EMSCRIPTEN__)) +// Use sigprocmask() to set the signal mask in single-threaded programs. +#include + +static inline void +mythread_sigmask(int how, const sigset_t *restrict set, + sigset_t *restrict oset) +{ + int ret = sigprocmask(how, set, oset); + assert(ret == 0); + (void)ret; +} +#endif + + +#elif defined(MYTHREAD_POSIX) + +//////////////////// +// Using pthreads // +//////////////////// + +#include +#include +#include +#include + +// If clock_gettime() isn't available, use gettimeofday() from +// as a fallback. gettimeofday() is in SUSv2 and thus is supported on all +// relevant POSIX systems. +#ifndef HAVE_CLOCK_GETTIME +# include +#endif + +// MinGW-w64 with winpthreads: +// +// NOTE: Typical builds with MinGW-w64 don't use this code (MYTHREAD_POSIX). +// Instead, native Windows threading APIs are used (MYTHREAD_VISTA or +// MYTHREAD_WIN95). +// +// MinGW-w64 has _sigset_t (an integer type) in . +// If _POSIX was #defined, the header would add the alias sigset_t too. +// Let's keep this working even without _POSIX. +// +// There are no functions that actually do something with sigset_t +// because signals barely exist on Windows. The sigfillset macro below +// is just to silence warnings. There is no sigfillset() in MinGW-w64. +#ifdef __MINGW32__ +# include +# define sigset_t _sigset_t +# define sigfillset(set_ptr) do { *(set_ptr) = 0; } while (0) +#endif + +#define MYTHREAD_RET_TYPE void * +#define MYTHREAD_RET_VALUE NULL + +typedef pthread_t mythread; +typedef pthread_mutex_t mythread_mutex; + +typedef struct { + pthread_cond_t cond; +#ifdef HAVE_CLOCK_GETTIME + // Clock ID (CLOCK_REALTIME or CLOCK_MONOTONIC) associated with + // the condition variable. + clockid_t clk_id; +#endif +} mythread_cond; + +typedef struct timespec mythread_condtime; + + +// Calls the given function once in a thread-safe way. +#define mythread_once(func) \ + do { \ + static pthread_once_t once_ = PTHREAD_ONCE_INIT; \ + pthread_once(&once_, &func); \ + } while (0) + + +// Use pthread_sigmask() to set the signal mask in multi-threaded programs. +// Do nothing on OpenVMS since it lacks pthread_sigmask(). +// Do nothing on MinGW-w64 too to silence warnings (its pthread_sigmask() +// is #defined to 0 so it's a no-op). +static inline void +mythread_sigmask(int how, const sigset_t *restrict set, + sigset_t *restrict oset) +{ +#if defined(__VMS) || defined(__MINGW32__) + (void)how; + (void)set; + (void)oset; +#else + int ret = pthread_sigmask(how, set, oset); + assert(ret == 0); + (void)ret; +#endif +} + + +// Creates a new thread with all signals blocked. Returns zero on success +// and non-zero on error. +static inline int +mythread_create(mythread *thread, void *(*func)(void *arg), void *arg) +{ + sigset_t old; + sigset_t all; + sigfillset(&all); + + mythread_sigmask(SIG_SETMASK, &all, &old); + const int ret = pthread_create(thread, NULL, func, arg); + mythread_sigmask(SIG_SETMASK, &old, NULL); + + return ret; +} + +// Joins a thread. Returns zero on success and non-zero on error. +static inline int +mythread_join(mythread thread) +{ + return pthread_join(thread, NULL); +} + + +// Initializes a mutex. Returns zero on success and non-zero on error. +static inline int +mythread_mutex_init(mythread_mutex *mutex) +{ + return pthread_mutex_init(mutex, NULL); +} + +static inline void +mythread_mutex_destroy(mythread_mutex *mutex) +{ + int ret = pthread_mutex_destroy(mutex); + assert(ret == 0); + (void)ret; +} + +static inline void +mythread_mutex_lock(mythread_mutex *mutex) +{ + int ret = pthread_mutex_lock(mutex); + assert(ret == 0); + (void)ret; +} + +static inline void +mythread_mutex_unlock(mythread_mutex *mutex) +{ + int ret = pthread_mutex_unlock(mutex); + assert(ret == 0); + (void)ret; +} + + +// Initializes a condition variable. +// +// Using CLOCK_MONOTONIC instead of the default CLOCK_REALTIME makes the +// timeout in pthread_cond_timedwait() work correctly also if system time +// is suddenly changed. Unfortunately CLOCK_MONOTONIC isn't available +// everywhere while the default CLOCK_REALTIME is, so the default is +// used if CLOCK_MONOTONIC isn't available. +// +// If clock_gettime() isn't available at all, gettimeofday() will be used. +static inline int +mythread_cond_init(mythread_cond *mycond) +{ +#ifdef HAVE_CLOCK_GETTIME +# if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) && \ + defined(HAVE_CLOCK_MONOTONIC) + struct timespec ts; + pthread_condattr_t condattr; + + // POSIX doesn't seem to *require* that pthread_condattr_setclock() + // will fail if given an unsupported clock ID. Test that + // CLOCK_MONOTONIC really is supported using clock_gettime(). + if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0 + && pthread_condattr_init(&condattr) == 0) { + int ret = pthread_condattr_setclock( + &condattr, CLOCK_MONOTONIC); + if (ret == 0) + ret = pthread_cond_init(&mycond->cond, &condattr); + + pthread_condattr_destroy(&condattr); + + if (ret == 0) { + mycond->clk_id = CLOCK_MONOTONIC; + return 0; + } + } + + // If anything above fails, fall back to the default CLOCK_REALTIME. + // POSIX requires that all implementations of clock_gettime() must + // support at least CLOCK_REALTIME. +# endif + + mycond->clk_id = CLOCK_REALTIME; +#endif + + return pthread_cond_init(&mycond->cond, NULL); +} + +static inline void +mythread_cond_destroy(mythread_cond *cond) +{ + int ret = pthread_cond_destroy(&cond->cond); + assert(ret == 0); + (void)ret; +} + +static inline void +mythread_cond_signal(mythread_cond *cond) +{ + int ret = pthread_cond_signal(&cond->cond); + assert(ret == 0); + (void)ret; +} + +static inline void +mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex) +{ + int ret = pthread_cond_wait(&cond->cond, mutex); + assert(ret == 0); + (void)ret; +} + +// Waits on a condition or until a timeout expires. If the timeout expires, +// non-zero is returned, otherwise zero is returned. +static inline int +mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex, + const mythread_condtime *condtime) +{ + int ret = pthread_cond_timedwait(&cond->cond, mutex, condtime); + assert(ret == 0 || ret == ETIMEDOUT); + return ret; +} + +// Sets condtime to the absolute time that is timeout_ms milliseconds +// in the future. The type of the clock to use is taken from cond. +static inline void +mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond, + uint32_t timeout_ms) +{ + condtime->tv_sec = (time_t)(timeout_ms / 1000); + condtime->tv_nsec = (long)((timeout_ms % 1000) * 1000000); + +#ifdef HAVE_CLOCK_GETTIME + struct timespec now; + int ret = clock_gettime(cond->clk_id, &now); + assert(ret == 0); + (void)ret; + + condtime->tv_sec += now.tv_sec; + condtime->tv_nsec += now.tv_nsec; +#else + (void)cond; + + struct timeval now; + gettimeofday(&now, NULL); + + condtime->tv_sec += now.tv_sec; + condtime->tv_nsec += now.tv_usec * 1000L; +#endif + + // tv_nsec must stay in the range [0, 999_999_999]. + if (condtime->tv_nsec >= 1000000000L) { + condtime->tv_nsec -= 1000000000L; + ++condtime->tv_sec; + } +} + + +#elif defined(MYTHREAD_WIN95) || defined(MYTHREAD_VISTA) + +///////////////////// +// Windows threads // +///////////////////// + +#define WIN32_LEAN_AND_MEAN +#ifdef MYTHREAD_VISTA +# undef _WIN32_WINNT +# define _WIN32_WINNT 0x0600 +#endif +#include +#include + +#define MYTHREAD_RET_TYPE unsigned int __stdcall +#define MYTHREAD_RET_VALUE 0 + +typedef HANDLE mythread; +typedef CRITICAL_SECTION mythread_mutex; + +#ifdef MYTHREAD_WIN95 +typedef HANDLE mythread_cond; +#else +typedef CONDITION_VARIABLE mythread_cond; +#endif + +typedef struct { + // Tick count (milliseconds) in the beginning of the timeout. + // NOTE: This is 32 bits so it wraps around after 49.7 days. + // Multi-day timeouts may not work as expected. + DWORD start; + + // Length of the timeout in milliseconds. The timeout expires + // when the current tick count minus "start" is equal or greater + // than "timeout". + DWORD timeout; +} mythread_condtime; + + +// mythread_once() is only available with Vista threads. +#ifdef MYTHREAD_VISTA +#define mythread_once(func) \ + do { \ + static INIT_ONCE once_ = INIT_ONCE_STATIC_INIT; \ + BOOL pending_; \ + if (!InitOnceBeginInitialize(&once_, 0, &pending_, NULL)) \ + abort(); \ + if (pending_) { \ + func(); \ + if (!InitOnceComplete(&once_, 0, NULL)) \ + abort(); \ + } \ + } while (0) +#endif + + +// mythread_sigmask() isn't available on Windows. Even a dummy version would +// make no sense because the other POSIX signal functions are missing anyway. + + +static inline int +mythread_create(mythread *thread, + unsigned int (__stdcall *func)(void *arg), void *arg) +{ + uintptr_t ret = _beginthreadex(NULL, 0, func, arg, 0, NULL); + if (ret == 0) + return -1; + + *thread = (HANDLE)ret; + return 0; +} + +static inline int +mythread_join(mythread thread) +{ + int ret = 0; + + if (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0) + ret = -1; + + if (!CloseHandle(thread)) + ret = -1; + + return ret; +} + + +static inline int +mythread_mutex_init(mythread_mutex *mutex) +{ + InitializeCriticalSection(mutex); + return 0; +} + +static inline void +mythread_mutex_destroy(mythread_mutex *mutex) +{ + DeleteCriticalSection(mutex); +} + +static inline void +mythread_mutex_lock(mythread_mutex *mutex) +{ + EnterCriticalSection(mutex); +} + +static inline void +mythread_mutex_unlock(mythread_mutex *mutex) +{ + LeaveCriticalSection(mutex); +} + + +static inline int +mythread_cond_init(mythread_cond *cond) +{ +#ifdef MYTHREAD_WIN95 + *cond = CreateEvent(NULL, FALSE, FALSE, NULL); + return *cond == NULL ? -1 : 0; +#else + InitializeConditionVariable(cond); + return 0; +#endif +} + +static inline void +mythread_cond_destroy(mythread_cond *cond) +{ +#ifdef MYTHREAD_WIN95 + CloseHandle(*cond); +#else + (void)cond; +#endif +} + +static inline void +mythread_cond_signal(mythread_cond *cond) +{ +#ifdef MYTHREAD_WIN95 + SetEvent(*cond); +#else + WakeConditionVariable(cond); +#endif +} + +static inline void +mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex) +{ +#ifdef MYTHREAD_WIN95 + LeaveCriticalSection(mutex); + WaitForSingleObject(*cond, INFINITE); + EnterCriticalSection(mutex); +#else + BOOL ret = SleepConditionVariableCS(cond, mutex, INFINITE); + assert(ret); + (void)ret; +#endif +} + +static inline int +mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex, + const mythread_condtime *condtime) +{ +#ifdef MYTHREAD_WIN95 + LeaveCriticalSection(mutex); +#endif + + DWORD elapsed = GetTickCount() - condtime->start; + DWORD timeout = elapsed >= condtime->timeout + ? 0 : condtime->timeout - elapsed; + +#ifdef MYTHREAD_WIN95 + DWORD ret = WaitForSingleObject(*cond, timeout); + assert(ret == WAIT_OBJECT_0 || ret == WAIT_TIMEOUT); + + EnterCriticalSection(mutex); + + return ret == WAIT_TIMEOUT; +#else + BOOL ret = SleepConditionVariableCS(cond, mutex, timeout); + assert(ret || GetLastError() == ERROR_TIMEOUT); + return !ret; +#endif +} + +static inline void +mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond, + uint32_t timeout) +{ + (void)cond; + condtime->start = GetTickCount(); + condtime->timeout = timeout; +} + +#endif + +#endif diff --git a/src/native/external/xz/src/common/sysdefs.h b/src/native/external/xz/src/common/sysdefs.h new file mode 100644 index 00000000000000..b10ffa7c3b1821 --- /dev/null +++ b/src/native/external/xz/src/common/sysdefs.h @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sysdefs.h +/// \brief Common includes, definitions, system-specific things etc. +/// +/// This file is used also by the lzma command line tool, that's why this +/// file is separate from common.h. +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SYSDEFS_H +#define LZMA_SYSDEFS_H + +////////////// +// Includes // +////////////// + +#ifdef HAVE_CONFIG_H +# include +#endif + +// Choose if MinGW-w64's stdio replacement functions should be used. +// The default has varied slightly in the past so it's clearest to always +// set it explicitly. +// +// Modern MinGW-w64 enables the replacement functions even with UCRT +// when _GNU_SOURCE is defined. That's good because UCRT doesn't support +// the POSIX thousand separator flag in printf (like "%'u"). Otherwise +// XZ Utils works with the UCRT stdio functions. +// +// The replacement functions add over 20 KiB to each executable. For +// size-optimized builds (HAVE_SMALL), disable the replacements. +// Then thousand separators aren't shown in xz's messages but this is +// a minor downside compare to the slower speed of the HAVE_SMALL builds. +// +// The legacy MSVCRT is pre-C99 and it's best to always use the stdio +// replacements functions from MinGW-w64. +#if defined(__MINGW32__) && !defined(__USE_MINGW_ANSI_STDIO) +# define __USE_MINGW_ANSI_STDIO 1 +# include <_mingw.h> +# if defined(_UCRT) && defined(HAVE_SMALL) +# undef __USE_MINGW_ANSI_STDIO +# define __USE_MINGW_ANSI_STDIO 0 +# endif +#endif + +// size_t and NULL +#include + +#ifdef HAVE_INTTYPES_H +# include +#endif + +// C99 says that inttypes.h always includes stdint.h, but some systems +// don't do that, and require including stdint.h separately. +#ifdef HAVE_STDINT_H +# include +#endif + +// Some pre-C99 systems have SIZE_MAX in limits.h instead of stdint.h. The +// limits are also used to figure out some macros missing from pre-C99 systems. +#include + +// Be more compatible with systems that have non-conforming inttypes.h. +// We assume that int is 32-bit and that long is either 32-bit or 64-bit. +// Full Autoconf test could be more correct, but this should work well enough. +// Note that this duplicates some code from lzma.h, but this is better since +// we can work without inttypes.h thanks to Autoconf tests. +#ifndef UINT32_C +# if UINT_MAX != 4294967295U +# error UINT32_C is not defined and unsigned int is not 32-bit. +# endif +# define UINT32_C(n) n ## U +#endif +#ifndef UINT32_MAX +# define UINT32_MAX UINT32_C(4294967295) +#endif +#ifndef PRIu32 +# define PRIu32 "u" +#endif +#ifndef PRIx32 +# define PRIx32 "x" +#endif +#ifndef PRIX32 +# define PRIX32 "X" +#endif + +#if ULONG_MAX == 4294967295UL +# ifndef UINT64_C +# define UINT64_C(n) n ## ULL +# endif +# ifndef PRIu64 +# define PRIu64 "llu" +# endif +# ifndef PRIx64 +# define PRIx64 "llx" +# endif +# ifndef PRIX64 +# define PRIX64 "llX" +# endif +#else +# ifndef UINT64_C +# define UINT64_C(n) n ## UL +# endif +# ifndef PRIu64 +# define PRIu64 "lu" +# endif +# ifndef PRIx64 +# define PRIx64 "lx" +# endif +# ifndef PRIX64 +# define PRIX64 "lX" +# endif +#endif +#ifndef UINT64_MAX +# define UINT64_MAX UINT64_C(18446744073709551615) +#endif + +// Incorrect(?) SIZE_MAX: +// - Interix headers typedef size_t to unsigned long, +// but a few lines later define SIZE_MAX to INT32_MAX. +// - SCO OpenServer (x86) headers typedef size_t to unsigned int +// but define SIZE_MAX to INT32_MAX. +#if defined(__INTERIX) || defined(_SCO_DS) +# undef SIZE_MAX +#endif + +// The code currently assumes that size_t is either 32-bit or 64-bit. +#ifndef SIZE_MAX +# if SIZEOF_SIZE_T == 4 +# define SIZE_MAX UINT32_MAX +# elif SIZEOF_SIZE_T == 8 +# define SIZE_MAX UINT64_MAX +# else +# error size_t is not 32-bit or 64-bit +# endif +#endif +#if SIZE_MAX != UINT32_MAX && SIZE_MAX != UINT64_MAX +# error size_t is not 32-bit or 64-bit +#endif + +#include +#include + +// Pre-C99 systems lack stdbool.h. All the code in XZ Utils must be written +// so that it works with fake bool type, for example: +// +// bool foo = (flags & 0x100) != 0; +// bool bar = !!(flags & 0x100); +// +// This works with the real C99 bool but breaks with fake bool: +// +// bool baz = (flags & 0x100); +// +#ifdef HAVE_STDBOOL_H +# include +#else +# if ! HAVE__BOOL +typedef unsigned char _Bool; +# endif +# define bool _Bool +# define false 0 +# define true 1 +# define __bool_true_false_are_defined 1 +#endif + +// We may need alignas from C11/C17/C23. +#if __STDC_VERSION__ >= 202311 + // alignas is a keyword in C23. Do nothing. +#elif __STDC_VERSION__ >= 201112 + // Oracle Developer Studio 12.6 lacks . + // For simplicity, avoid the header with all C11/C17 compilers. +# define alignas _Alignas +#elif defined(__GNUC__) || defined(__clang__) +# define alignas(n) __attribute__((__aligned__(n))) +#else +# define alignas(n) +#endif + +#include + +// MSVC v19.00 (VS 2015 version 14.0) and later should work. +// +// MSVC v19.27 (VS 2019 version 16.7) added support for restrict. +// Older ones support only __restrict. +#ifdef _MSC_VER +# if _MSC_VER < 1927 && !defined(restrict) +# define restrict __restrict +# endif +#endif + +//////////// +// Macros // +//////////// + +#undef memzero +#define memzero(s, n) memset(s, 0, n) + +// NOTE: Avoid using MIN() and MAX(), because even conditionally defining +// those macros can cause some portability trouble, since on some systems +// the system headers insist defining their own versions. +#define my_min(x, y) ((x) < (y) ? (x) : (y)) +#define my_max(x, y) ((x) > (y) ? (x) : (y)) + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) +#endif + +#if defined(__GNUC__) \ + && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4) +# define lzma_attr_alloc_size(x) __attribute__((__alloc_size__(x))) +#else +# define lzma_attr_alloc_size(x) +#endif + +#if __STDC_VERSION__ >= 202311 +# define FALLTHROUGH [[__fallthrough__]] +#elif (defined(__GNUC__) && __GNUC__ >= 7) \ + || (defined(__clang_major__) && __clang_major__ >= 10) +# define FALLTHROUGH __attribute__((__fallthrough__)) +#else +# define FALLTHROUGH ((void)0) +#endif + +#endif diff --git a/src/native/external/xz/src/common/tuklib_common.h b/src/native/external/xz/src/common/tuklib_common.h new file mode 100644 index 00000000000000..d73f07255e4d1a --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_common.h @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_common.h +/// \brief Common definitions for tuklib modules +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_COMMON_H +#define TUKLIB_COMMON_H + +// The config file may be replaced by a package-specific file. +// It should include at least stddef.h, stdbool.h, inttypes.h, and limits.h. +#include "tuklib_config.h" + +// TUKLIB_SYMBOL_PREFIX is prefixed to all symbols exported by +// the tuklib modules. If you use a tuklib module in a library, +// you should use TUKLIB_SYMBOL_PREFIX to make sure that there +// are no symbol conflicts in case someone links your library +// into application that also uses the same tuklib module. +#ifndef TUKLIB_SYMBOL_PREFIX +# define TUKLIB_SYMBOL_PREFIX +#endif + +#define TUKLIB_CAT_X(a, b) a ## b +#define TUKLIB_CAT(a, b) TUKLIB_CAT_X(a, b) + +#ifndef TUKLIB_SYMBOL +# define TUKLIB_SYMBOL(sym) TUKLIB_CAT(TUKLIB_SYMBOL_PREFIX, sym) +#endif + +#ifndef TUKLIB_DECLS_BEGIN +# ifdef __cplusplus +# define TUKLIB_DECLS_BEGIN extern "C" { +# else +# define TUKLIB_DECLS_BEGIN +# endif +#endif + +#ifndef TUKLIB_DECLS_END +# ifdef __cplusplus +# define TUKLIB_DECLS_END } +# else +# define TUKLIB_DECLS_END +# endif +#endif + +#if defined(__GNUC__) && defined(__GNUC_MINOR__) +# define TUKLIB_GNUC_REQ(major, minor) \ + ((__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)) \ + || __GNUC__ > (major)) +#else +# define TUKLIB_GNUC_REQ(major, minor) 0 +#endif + +#if defined(__GNUC__) || defined(__clang__) +# define tuklib_attr_format_printf(fmt_index, args_index) \ + __attribute__((__format__(__printf__, fmt_index, args_index))) +#else +# define tuklib_attr_format_printf(fmt_index, args_index) +#endif + +// tuklib_attr_noreturn attribute is used to mark functions as non-returning. +// We cannot use "noreturn" as the macro name because then C23 code that +// uses [[noreturn]] would break as it would expand to [[ [[noreturn]] ]]. +// +// tuklib_attr_noreturn must be used at the beginning of function declaration +// to work in all cases. The [[noreturn]] syntax is the most limiting, it +// must be even before any GNU C's __attribute__ keywords: +// +// tuklib_attr_noreturn +// __attribute__((nonnull(1))) +// extern void foo(const char *s); +// +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311 +# define tuklib_attr_noreturn [[noreturn]] +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112 +# define tuklib_attr_noreturn _Noreturn +#elif TUKLIB_GNUC_REQ(2, 5) +# define tuklib_attr_noreturn __attribute__((__noreturn__)) +#elif defined(_MSC_VER) +# define tuklib_attr_noreturn __declspec(noreturn) +#else +# define tuklib_attr_noreturn +#endif + +#if (defined(_WIN32) && !defined(__CYGWIN__)) \ + || defined(__OS2__) || defined(__MSDOS__) +# define TUKLIB_DOSLIKE 1 +#endif + +#endif diff --git a/src/native/external/xz/src/common/tuklib_config.h b/src/native/external/xz/src/common/tuklib_config.h new file mode 100644 index 00000000000000..b27251dc27657f --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_config.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: 0BSD + +// If config.h isn't available, assume that the headers required by +// tuklib_common.h are available. This is required by crc32_tablegen.c. +#ifdef HAVE_CONFIG_H +# include "sysdefs.h" +#else +# include +# include +# include +# include +#endif diff --git a/src/native/external/xz/src/common/tuklib_cpucores.c b/src/native/external/xz/src/common/tuklib_cpucores.c new file mode 100644 index 00000000000000..c4a781ac387c8f --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_cpucores.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_cpucores.c +/// \brief Get the number of CPU cores online +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_cpucores.h" + +#if defined(_WIN32) || defined(__CYGWIN__) +# ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0500 +# endif +# include + +// glibc >= 2.9 +#elif defined(TUKLIB_CPUCORES_SCHED_GETAFFINITY) +# include + +// FreeBSD +#elif defined(TUKLIB_CPUCORES_CPUSET) +# include +# include + +#elif defined(TUKLIB_CPUCORES_SYSCTL) +# ifdef HAVE_SYS_PARAM_H +# include +# endif +# include + +#elif defined(TUKLIB_CPUCORES_SYSCONF) +# include + +// HP-UX +#elif defined(TUKLIB_CPUCORES_PSTAT_GETDYNAMIC) +# include +# include +#endif + + +extern uint32_t +tuklib_cpucores(void) +{ + uint32_t ret = 0; + +#if defined(_WIN32) || defined(__CYGWIN__) + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + ret = sysinfo.dwNumberOfProcessors; + +#elif defined(TUKLIB_CPUCORES_SCHED_GETAFFINITY) + cpu_set_t cpu_mask; + if (sched_getaffinity(0, sizeof(cpu_mask), &cpu_mask) == 0) + ret = (uint32_t)CPU_COUNT(&cpu_mask); + +#elif defined(TUKLIB_CPUCORES_CPUSET) + cpuset_t set; + if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, + sizeof(set), &set) == 0) { +# ifdef CPU_COUNT + ret = (uint32_t)CPU_COUNT(&set); +# else + for (unsigned i = 0; i < CPU_SETSIZE; ++i) + if (CPU_ISSET(i, &set)) + ++ret; +# endif + } + +#elif defined(TUKLIB_CPUCORES_SYSCTL) + // On OpenBSD HW_NCPUONLINE tells the number of processor cores that + // are online so it is preferred over HW_NCPU which also counts cores + // that aren't currently available. The number of cores online is + // often less than HW_NCPU because OpenBSD disables simultaneous + // multi-threading (SMT) by default. +# ifdef HW_NCPUONLINE + int name[2] = { CTL_HW, HW_NCPUONLINE }; +# else + int name[2] = { CTL_HW, HW_NCPU }; +# endif + int cpus; + size_t cpus_size = sizeof(cpus); + if (sysctl(name, 2, &cpus, &cpus_size, NULL, 0) != -1 + && cpus_size == sizeof(cpus) && cpus > 0) + ret = (uint32_t)cpus; + +#elif defined(TUKLIB_CPUCORES_SYSCONF) +# ifdef _SC_NPROCESSORS_ONLN + // Most systems + const long cpus = sysconf(_SC_NPROCESSORS_ONLN); +# else + // IRIX + const long cpus = sysconf(_SC_NPROC_ONLN); +# endif + if (cpus > 0) + ret = (uint32_t)cpus; + +#elif defined(TUKLIB_CPUCORES_PSTAT_GETDYNAMIC) + struct pst_dynamic pst; + if (pstat_getdynamic(&pst, sizeof(pst), 1, 0) != -1) + ret = (uint32_t)pst.psd_proc_cnt; +#endif + + return ret; +} diff --git a/src/native/external/xz/src/common/tuklib_cpucores.h b/src/native/external/xz/src/common/tuklib_cpucores.h new file mode 100644 index 00000000000000..edff9395e41cca --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_cpucores.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_cpucores.h +/// \brief Get the number of CPU cores online +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_CPUCORES_H +#define TUKLIB_CPUCORES_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_cpucores TUKLIB_SYMBOL(tuklib_cpucores) +extern uint32_t tuklib_cpucores(void); + +TUKLIB_DECLS_END +#endif diff --git a/src/native/external/xz/src/common/tuklib_exit.c b/src/native/external/xz/src/common/tuklib_exit.c new file mode 100644 index 00000000000000..c84e0f679a64d0 --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_exit.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_exit.c +/// \brief Close stdout and stderr, and exit +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_common.h" + +#include +#include +#include + +#include "tuklib_gettext.h" +#include "tuklib_progname.h" +#include "tuklib_exit.h" + + +extern void +tuklib_exit(int status, int err_status, int show_error) +{ + if (status != err_status) { + // Close stdout. If something goes wrong, + // print an error message to stderr. + const int ferror_err = ferror(stdout); + const int fclose_err = fclose(stdout); + if (ferror_err || fclose_err) { + status = err_status; + + // If it was fclose() that failed, we have the reason + // in errno. If only ferror() indicated an error, + // we have no idea what the reason was. + if (show_error) + fprintf(stderr, "%s: %s: %s\n", progname, + _("Writing to standard " + "output failed"), + fclose_err ? strerror(errno) + : _("Unknown error")); + } + } + + if (status != err_status) { + // Close stderr. If something goes wrong, there's + // nothing where we could print an error message. + // Just set the exit status. + const int ferror_err = ferror(stderr); + const int fclose_err = fclose(stderr); + if (fclose_err || ferror_err) + status = err_status; + } + + exit(status); +} diff --git a/src/native/external/xz/src/common/tuklib_exit.h b/src/native/external/xz/src/common/tuklib_exit.h new file mode 100644 index 00000000000000..d4e6b4a7e832ba --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_exit.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_exit.h +/// \brief Close stdout and stderr, and exit +/// \note Requires tuklib_progname and tuklib_gettext modules +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_EXIT_H +#define TUKLIB_EXIT_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_exit TUKLIB_SYMBOL(tuklib_exit) +tuklib_attr_noreturn +extern void tuklib_exit(int status, int err_status, int show_error); + +TUKLIB_DECLS_END +#endif diff --git a/src/native/external/xz/src/common/tuklib_gettext.h b/src/native/external/xz/src/common/tuklib_gettext.h new file mode 100644 index 00000000000000..e5ad5e6f78a1fb --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_gettext.h @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_gettext.h +/// \brief Wrapper for gettext and friends +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_GETTEXT_H +#define TUKLIB_GETTEXT_H + +#include "tuklib_common.h" +#include + +#ifndef TUKLIB_GETTEXT +# ifdef ENABLE_NLS +# define TUKLIB_GETTEXT 1 +# else +# define TUKLIB_GETTEXT 0 +# endif +#endif + +#if TUKLIB_GETTEXT +# include +# define tuklib_gettext_init(package, localedir) \ + do { \ + setlocale(LC_ALL, ""); \ + bindtextdomain(package, localedir); \ + textdomain(package); \ + } while (0) +# define _(msgid) gettext(msgid) +#else +# define tuklib_gettext_init(package, localedir) \ + setlocale(LC_ALL, "") +# define _(msgid) (msgid) +# define ngettext(msgid1, msgid2, n) ((n) == 1 ? (msgid1) : (msgid2)) +#endif +#define N_(msgid) msgid + +// Optional: Strings that are word wrapped using tuklib_mbstr_wrap may be +// marked with W_("foo) in the source code. xgettext can then add a comment +// to all such strings to inform translators. The following option needs to +// be added to XGETTEXT_OPTIONS in po/Makevars or in an equivalent place: +// +// '--keyword=W_:1,"This is word wrapped at spaces. The Unicode character U+00A0 works as a non-breaking space. Tab (\t) is interpret as a zero-width space (the tab itself is not displayed); U+200B is NOT supported. Manual word wrapping with \n is supported but requires care."' +// +// NOTE: The double-quotes in the --keyword argument above must be passed to +// xgettext as is, thus one needs the single-quotes in Makevars. +#define W_(msgid) _(msgid) + +#endif diff --git a/src/native/external/xz/src/common/tuklib_integer.h b/src/native/external/xz/src/common/tuklib_integer.h new file mode 100644 index 00000000000000..4026249e5468b1 --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_integer.h @@ -0,0 +1,954 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_integer.h +/// \brief Various integer and bit operations +/// +/// This file provides macros or functions to do some basic integer and bit +/// operations. +/// +/// Native endian inline functions (XX = 16, 32, or 64): +/// - Unaligned native endian reads: readXXne(ptr) +/// - Unaligned native endian writes: writeXXne(ptr, num) +/// - Aligned native endian reads: aligned_readXXne(ptr) +/// - Aligned native endian writes: aligned_writeXXne(ptr, num) +/// +/// Endianness-converting integer operations (these can be macros!) +/// (XX = 16, 32, or 64; Y = b or l): +/// - Byte swapping: byteswapXX(num) +/// - Byte order conversions to/from native (byteswaps if Y isn't +/// the native endianness): convXXYe(num) +/// - Unaligned reads: readXXYe(ptr) +/// - Unaligned writes: writeXXYe(ptr, num) +/// - Aligned reads: aligned_readXXYe(ptr) +/// - Aligned writes: aligned_writeXXYe(ptr, num) +/// +/// Since the above can macros, the arguments should have no side effects +/// because they may be evaluated more than once. +/// +/// Bit scan operations for non-zero 32-bit integers (inline functions): +/// - Bit scan reverse (find highest non-zero bit): bsr32(num) +/// - Count leading zeros: clz32(num) +/// - Count trailing zeros: ctz32(num) +/// - Bit scan forward (simply an alias for ctz32()): bsf32(num) +/// +/// The above bit scan operations return 0-31. If num is zero, +/// the result is undefined. +// +// Authors: Lasse Collin +// Joachim Henke +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_INTEGER_H +#define TUKLIB_INTEGER_H + +#include "tuklib_common.h" +#include + +// Newer Intel C compilers require immintrin.h for _bit_scan_reverse() +// and such functions. +#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500) +# include +// Only include when it is needed. GCC and Clang can both +// use __builtin's, so we only need Windows instrincs when using MSVC. +// GCC and Clang can set _MSC_VER on Windows, so we need to exclude these +// cases explicitly. +#elif defined(_MSC_VER) && !TUKLIB_GNUC_REQ(3, 4) && !defined(__clang__) +# include +#endif + + +/////////////////// +// Byte swapping // +/////////////////// + +#if defined(HAVE___BUILTIN_BSWAPXX) + // GCC >= 4.8 and Clang +# define byteswap16(num) __builtin_bswap16(num) +# define byteswap32(num) __builtin_bswap32(num) +# define byteswap64(num) __builtin_bswap64(num) + +#elif defined(HAVE_BYTESWAP_H) + // glibc, uClibc, dietlibc +# include +# ifdef HAVE_BSWAP_16 +# define byteswap16(num) bswap_16(num) +# endif +# ifdef HAVE_BSWAP_32 +# define byteswap32(num) bswap_32(num) +# endif +# ifdef HAVE_BSWAP_64 +# define byteswap64(num) bswap_64(num) +# endif + +#elif defined(HAVE_SYS_ENDIAN_H) + // *BSDs and Darwin +# include +# ifdef __OpenBSD__ +# define byteswap16(num) swap16(num) +# define byteswap32(num) swap32(num) +# define byteswap64(num) swap64(num) +# else +# define byteswap16(num) bswap16(num) +# define byteswap32(num) bswap32(num) +# define byteswap64(num) bswap64(num) +# endif + +#elif defined(HAVE_SYS_BYTEORDER_H) + // Solaris +# include +# ifdef BSWAP_16 +# define byteswap16(num) BSWAP_16(num) +# endif +# ifdef BSWAP_32 +# define byteswap32(num) BSWAP_32(num) +# endif +# ifdef BSWAP_64 +# define byteswap64(num) BSWAP_64(num) +# endif +# ifdef BE_16 +# define conv16be(num) BE_16(num) +# endif +# ifdef BE_32 +# define conv32be(num) BE_32(num) +# endif +# ifdef BE_64 +# define conv64be(num) BE_64(num) +# endif +# ifdef LE_16 +# define conv16le(num) LE_16(num) +# endif +# ifdef LE_32 +# define conv32le(num) LE_32(num) +# endif +# ifdef LE_64 +# define conv64le(num) LE_64(num) +# endif +#endif + +#ifndef byteswap16 +# define byteswap16(n) (uint16_t)( \ + (((n) & 0x00FFU) << 8) \ + | (((n) & 0xFF00U) >> 8) \ + ) +#endif + +#ifndef byteswap32 +# define byteswap32(n) (uint32_t)( \ + (((n) & UINT32_C(0x000000FF)) << 24) \ + | (((n) & UINT32_C(0x0000FF00)) << 8) \ + | (((n) & UINT32_C(0x00FF0000)) >> 8) \ + | (((n) & UINT32_C(0xFF000000)) >> 24) \ + ) +#endif + +#ifndef byteswap64 +# define byteswap64(n) (uint64_t)( \ + (((n) & UINT64_C(0x00000000000000FF)) << 56) \ + | (((n) & UINT64_C(0x000000000000FF00)) << 40) \ + | (((n) & UINT64_C(0x0000000000FF0000)) << 24) \ + | (((n) & UINT64_C(0x00000000FF000000)) << 8) \ + | (((n) & UINT64_C(0x000000FF00000000)) >> 8) \ + | (((n) & UINT64_C(0x0000FF0000000000)) >> 24) \ + | (((n) & UINT64_C(0x00FF000000000000)) >> 40) \ + | (((n) & UINT64_C(0xFF00000000000000)) >> 56) \ + ) +#endif + +// Define conversion macros using the basic byte swapping macros. +#ifdef WORDS_BIGENDIAN +# ifndef conv16be +# define conv16be(num) ((uint16_t)(num)) +# endif +# ifndef conv32be +# define conv32be(num) ((uint32_t)(num)) +# endif +# ifndef conv64be +# define conv64be(num) ((uint64_t)(num)) +# endif +# ifndef conv16le +# define conv16le(num) byteswap16(num) +# endif +# ifndef conv32le +# define conv32le(num) byteswap32(num) +# endif +# ifndef conv64le +# define conv64le(num) byteswap64(num) +# endif +#else +# ifndef conv16be +# define conv16be(num) byteswap16(num) +# endif +# ifndef conv32be +# define conv32be(num) byteswap32(num) +# endif +# ifndef conv64be +# define conv64be(num) byteswap64(num) +# endif +# ifndef conv16le +# define conv16le(num) ((uint16_t)(num)) +# endif +# ifndef conv32le +# define conv32le(num) ((uint32_t)(num)) +# endif +# ifndef conv64le +# define conv64le(num) ((uint64_t)(num)) +# endif +#endif + + +//////////////////////////////// +// Unaligned reads and writes // +//////////////////////////////// + +// No-strict-align archs like x86-64 +// --------------------------------- +// +// The traditional way of casting e.g. *(const uint16_t *)uint8_pointer +// is bad even if the uint8_pointer is properly aligned because this kind +// of casts break strict aliasing rules and result in undefined behavior. +// With unaligned pointers it's even worse: compilers may emit vector +// instructions that require aligned pointers even if non-vector +// instructions work with unaligned pointers. +// +// Using memcpy() is the standard compliant way to do unaligned access. +// Many modern compilers inline it so there is no function call overhead. +// For those compilers that don't handle the memcpy() method well, the +// old casting method (that violates strict aliasing) can be requested at +// build time. A third method, casting to a packed struct, would also be +// an option but isn't provided to keep things simpler (it's already a mess). +// Hopefully this is flexible enough in practice. +// +// Some compilers on x86-64 like Clang >= 10 and GCC >= 5.1 detect that +// +// buf[0] | (buf[1] << 8) +// +// reads a 16-bit value and can emit a single 16-bit load and produce +// identical code than with the memcpy() method. In other cases Clang and GCC +// produce either the same or better code with memcpy(). For example, Clang 9 +// on x86-64 can detect 32-bit load but not 16-bit load. +// +// MSVC uses unaligned access with the memcpy() method but emits byte-by-byte +// code for "buf[0] | (buf[1] << 8)". +// +// Conclusion: The memcpy() method is the best choice when unaligned access +// is supported. +// +// Strict-align archs like SPARC +// ----------------------------- +// +// GCC versions from around 4.x to to at least 13.2.0 produce worse code +// from the memcpy() method than from simple byte-by-byte shift-or code +// when reading a 32-bit integer: +// +// (1) It may be constructed on stack using four 8-bit loads, +// four 8-bit stores to stack, and finally one 32-bit load from stack. +// +// (2) Especially with -Os, an actual memcpy() call may be emitted. +// +// This is true on at least on ARM, ARM64, SPARC, SPARC64, MIPS64EL, and +// RISC-V. Of these, ARM, ARM64, and RISC-V support unaligned access in +// some processors but not all so this is relevant only in the case when +// GCC assumes that unaligned is not supported or -mstrict-align or +// -mno-unaligned-access is used. +// +// For Clang it makes little difference. ARM64 with -O2 -mstrict-align +// was one the very few with a minor difference: the memcpy() version +// was one instruction longer. +// +// Conclusion: At least in case of GCC and Clang, byte-by-byte code is +// the best choice for strict-align archs to do unaligned access. +// +// See also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111502 +// +// Thanks to it was easy to test different compilers. +// The following is for little endian targets: +/* +#include +#include + +uint32_t bytes16(const uint8_t *b) +{ + return (uint32_t)b[0] + | ((uint32_t)b[1] << 8); +} + +uint32_t copy16(const uint8_t *b) +{ + uint16_t v; + memcpy(&v, b, sizeof(v)); + return v; +} + +uint32_t bytes32(const uint8_t *b) +{ + return (uint32_t)b[0] + | ((uint32_t)b[1] << 8) + | ((uint32_t)b[2] << 16) + | ((uint32_t)b[3] << 24); +} + +uint32_t copy32(const uint8_t *b) +{ + uint32_t v; + memcpy(&v, b, sizeof(v)); + return v; +} + +void wbytes16(uint8_t *b, uint16_t v) +{ + b[0] = (uint8_t)v; + b[1] = (uint8_t)(v >> 8); +} + +void wcopy16(uint8_t *b, uint16_t v) +{ + memcpy(b, &v, sizeof(v)); +} + +void wbytes32(uint8_t *b, uint32_t v) +{ + b[0] = (uint8_t)v; + b[1] = (uint8_t)(v >> 8); + b[2] = (uint8_t)(v >> 16); + b[3] = (uint8_t)(v >> 24); +} + +void wcopy32(uint8_t *b, uint32_t v) +{ + memcpy(b, &v, sizeof(v)); +} +*/ + + +#ifdef TUKLIB_FAST_UNALIGNED_ACCESS + +static inline uint16_t +read16ne(const uint8_t *buf) +{ +#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING + return *(const uint16_t *)buf; +#else + uint16_t num; + memcpy(&num, buf, sizeof(num)); + return num; +#endif +} + + +static inline uint32_t +read32ne(const uint8_t *buf) +{ +#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING + return *(const uint32_t *)buf; +#else + uint32_t num; + memcpy(&num, buf, sizeof(num)); + return num; +#endif +} + + +static inline uint64_t +read64ne(const uint8_t *buf) +{ +#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING + return *(const uint64_t *)buf; +#else + uint64_t num; + memcpy(&num, buf, sizeof(num)); + return num; +#endif +} + + +static inline void +write16ne(uint8_t *buf, uint16_t num) +{ +#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING + *(uint16_t *)buf = num; +#else + memcpy(buf, &num, sizeof(num)); +#endif + return; +} + + +static inline void +write32ne(uint8_t *buf, uint32_t num) +{ +#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING + *(uint32_t *)buf = num; +#else + memcpy(buf, &num, sizeof(num)); +#endif + return; +} + + +static inline void +write64ne(uint8_t *buf, uint64_t num) +{ +#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING + *(uint64_t *)buf = num; +#else + memcpy(buf, &num, sizeof(num)); +#endif + return; +} + + +static inline uint16_t +read16be(const uint8_t *buf) +{ + uint16_t num = read16ne(buf); + return conv16be(num); +} + + +static inline uint16_t +read16le(const uint8_t *buf) +{ + uint16_t num = read16ne(buf); + return conv16le(num); +} + + +static inline uint32_t +read32be(const uint8_t *buf) +{ + uint32_t num = read32ne(buf); + return conv32be(num); +} + + +static inline uint32_t +read32le(const uint8_t *buf) +{ + uint32_t num = read32ne(buf); + return conv32le(num); +} + + +static inline uint64_t +read64be(const uint8_t *buf) +{ + uint64_t num = read64ne(buf); + return conv64be(num); +} + + +static inline uint64_t +read64le(const uint8_t *buf) +{ + uint64_t num = read64ne(buf); + return conv64le(num); +} + + +// NOTE: Possible byte swapping must be done in a macro to allow the compiler +// to optimize byte swapping of constants when using glibc's or *BSD's +// byte swapping macros. The actual write is done in an inline function +// to make type checking of the buf pointer possible. +#define write16be(buf, num) write16ne(buf, conv16be(num)) +#define write32be(buf, num) write32ne(buf, conv32be(num)) +#define write64be(buf, num) write64ne(buf, conv64be(num)) +#define write16le(buf, num) write16ne(buf, conv16le(num)) +#define write32le(buf, num) write32ne(buf, conv32le(num)) +#define write64le(buf, num) write64ne(buf, conv64le(num)) + +#else + +#ifdef WORDS_BIGENDIAN +# define read16ne read16be +# define read32ne read32be +# define read64ne read64be +# define write16ne write16be +# define write32ne write32be +# define write64ne write64be +#else +# define read16ne read16le +# define read32ne read32le +# define read64ne read64le +# define write16ne write16le +# define write32ne write32le +# define write64ne write64le +#endif + + +static inline uint16_t +read16be(const uint8_t *buf) +{ + uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1]; + return num; +} + + +static inline uint16_t +read16le(const uint8_t *buf) +{ + uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8); + return num; +} + + +static inline uint32_t +read32be(const uint8_t *buf) +{ + uint32_t num = (uint32_t)buf[0] << 24; + num |= (uint32_t)buf[1] << 16; + num |= (uint32_t)buf[2] << 8; + num |= (uint32_t)buf[3]; + return num; +} + + +static inline uint32_t +read32le(const uint8_t *buf) +{ + uint32_t num = (uint32_t)buf[0]; + num |= (uint32_t)buf[1] << 8; + num |= (uint32_t)buf[2] << 16; + num |= (uint32_t)buf[3] << 24; + return num; +} + + +static inline uint64_t +read64be(const uint8_t *buf) +{ + uint64_t num = (uint64_t)buf[0] << 56; + num |= (uint64_t)buf[1] << 48; + num |= (uint64_t)buf[2] << 40; + num |= (uint64_t)buf[3] << 32; + num |= (uint64_t)buf[4] << 24; + num |= (uint64_t)buf[5] << 16; + num |= (uint64_t)buf[6] << 8; + num |= (uint64_t)buf[7]; + return num; +} + + +static inline uint64_t +read64le(const uint8_t *buf) +{ + uint64_t num = (uint64_t)buf[0]; + num |= (uint64_t)buf[1] << 8; + num |= (uint64_t)buf[2] << 16; + num |= (uint64_t)buf[3] << 24; + num |= (uint64_t)buf[4] << 32; + num |= (uint64_t)buf[5] << 40; + num |= (uint64_t)buf[6] << 48; + num |= (uint64_t)buf[7] << 56; + return num; +} + + +static inline void +write16be(uint8_t *buf, uint16_t num) +{ + buf[0] = (uint8_t)(num >> 8); + buf[1] = (uint8_t)num; + return; +} + + +static inline void +write16le(uint8_t *buf, uint16_t num) +{ + buf[0] = (uint8_t)num; + buf[1] = (uint8_t)(num >> 8); + return; +} + + +static inline void +write32be(uint8_t *buf, uint32_t num) +{ + buf[0] = (uint8_t)(num >> 24); + buf[1] = (uint8_t)(num >> 16); + buf[2] = (uint8_t)(num >> 8); + buf[3] = (uint8_t)num; + return; +} + + +static inline void +write32le(uint8_t *buf, uint32_t num) +{ + buf[0] = (uint8_t)num; + buf[1] = (uint8_t)(num >> 8); + buf[2] = (uint8_t)(num >> 16); + buf[3] = (uint8_t)(num >> 24); + return; +} + + +static inline void +write64be(uint8_t *buf, uint64_t num) +{ + buf[0] = (uint8_t)(num >> 56); + buf[1] = (uint8_t)(num >> 48); + buf[2] = (uint8_t)(num >> 40); + buf[3] = (uint8_t)(num >> 32); + buf[4] = (uint8_t)(num >> 24); + buf[5] = (uint8_t)(num >> 16); + buf[6] = (uint8_t)(num >> 8); + buf[7] = (uint8_t)num; + return; +} + + +static inline void +write64le(uint8_t *buf, uint64_t num) +{ + buf[0] = (uint8_t)num; + buf[1] = (uint8_t)(num >> 8); + buf[2] = (uint8_t)(num >> 16); + buf[3] = (uint8_t)(num >> 24); + buf[4] = (uint8_t)(num >> 32); + buf[5] = (uint8_t)(num >> 40); + buf[6] = (uint8_t)(num >> 48); + buf[7] = (uint8_t)(num >> 56); + return; +} + +#endif + + +////////////////////////////// +// Aligned reads and writes // +////////////////////////////// + +// Separate functions for aligned reads and writes are provided since on +// strict-align archs aligned access is much faster than unaligned access. +// +// Just like in the unaligned case, memcpy() is needed to avoid +// strict aliasing violations. However, on archs that don't support +// unaligned access the compiler cannot know that the pointers given +// to memcpy() are aligned which results in slow code. As of C11 there is +// no standard way to tell the compiler that we know that the address is +// aligned but some compilers have language extensions to do that. With +// such language extensions the memcpy() method gives excellent results. +// +// What to do on a strict-align system when no known language extensions +// are available? Falling back to byte-by-byte access would be safe but ruin +// optimizations that have been made specifically with aligned access in mind. +// As a compromise, aligned reads will fall back to non-compliant type punning +// but aligned writes will be byte-by-byte, that is, fast reads are preferred +// over fast writes. This obviously isn't great but hopefully it's a working +// compromise for now. +// +// __builtin_assume_aligned is support by GCC >= 4.7 and clang >= 3.6. +#ifdef HAVE___BUILTIN_ASSUME_ALIGNED +# define tuklib_memcpy_aligned(dest, src, size) \ + memcpy(dest, __builtin_assume_aligned(src, size), size) +#else +# define tuklib_memcpy_aligned(dest, src, size) \ + memcpy(dest, src, size) +# ifndef TUKLIB_FAST_UNALIGNED_ACCESS +# define TUKLIB_USE_UNSAFE_ALIGNED_READS 1 +# endif +#endif + + +static inline uint16_t +aligned_read16ne(const uint8_t *buf) +{ +#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ + || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) + return *(const uint16_t *)buf; +#else + uint16_t num; + tuklib_memcpy_aligned(&num, buf, sizeof(num)); + return num; +#endif +} + + +static inline uint32_t +aligned_read32ne(const uint8_t *buf) +{ +#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ + || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) + return *(const uint32_t *)buf; +#else + uint32_t num; + tuklib_memcpy_aligned(&num, buf, sizeof(num)); + return num; +#endif +} + + +static inline uint64_t +aligned_read64ne(const uint8_t *buf) +{ +#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ + || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) + return *(const uint64_t *)buf; +#else + uint64_t num; + tuklib_memcpy_aligned(&num, buf, sizeof(num)); + return num; +#endif +} + + +static inline void +aligned_write16ne(uint8_t *buf, uint16_t num) +{ +#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING + *(uint16_t *)buf = num; +#else + tuklib_memcpy_aligned(buf, &num, sizeof(num)); +#endif + return; +} + + +static inline void +aligned_write32ne(uint8_t *buf, uint32_t num) +{ +#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING + *(uint32_t *)buf = num; +#else + tuklib_memcpy_aligned(buf, &num, sizeof(num)); +#endif + return; +} + + +static inline void +aligned_write64ne(uint8_t *buf, uint64_t num) +{ +#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING + *(uint64_t *)buf = num; +#else + tuklib_memcpy_aligned(buf, &num, sizeof(num)); +#endif + return; +} + + +static inline uint16_t +aligned_read16be(const uint8_t *buf) +{ + uint16_t num = aligned_read16ne(buf); + return conv16be(num); +} + + +static inline uint16_t +aligned_read16le(const uint8_t *buf) +{ + uint16_t num = aligned_read16ne(buf); + return conv16le(num); +} + + +static inline uint32_t +aligned_read32be(const uint8_t *buf) +{ + uint32_t num = aligned_read32ne(buf); + return conv32be(num); +} + + +static inline uint32_t +aligned_read32le(const uint8_t *buf) +{ + uint32_t num = aligned_read32ne(buf); + return conv32le(num); +} + + +static inline uint64_t +aligned_read64be(const uint8_t *buf) +{ + uint64_t num = aligned_read64ne(buf); + return conv64be(num); +} + + +static inline uint64_t +aligned_read64le(const uint8_t *buf) +{ + uint64_t num = aligned_read64ne(buf); + return conv64le(num); +} + + +// These need to be macros like in the unaligned case. +#define aligned_write16be(buf, num) aligned_write16ne((buf), conv16be(num)) +#define aligned_write16le(buf, num) aligned_write16ne((buf), conv16le(num)) +#define aligned_write32be(buf, num) aligned_write32ne((buf), conv32be(num)) +#define aligned_write32le(buf, num) aligned_write32ne((buf), conv32le(num)) +#define aligned_write64be(buf, num) aligned_write64ne((buf), conv64be(num)) +#define aligned_write64le(buf, num) aligned_write64ne((buf), conv64le(num)) + + +//////////////////// +// Bit operations // +//////////////////// + +static inline uint32_t +bsr32(uint32_t n) +{ + // Check for ICC first, since it tends to define __GNUC__ too. +#if defined(__INTEL_COMPILER) + return _bit_scan_reverse(n); + +#elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX + // GCC >= 3.4 has __builtin_clz(), which gives good results on + // multiple architectures. On x86, __builtin_clz() ^ 31U becomes + // either plain BSR (so the XOR gets optimized away) or LZCNT and + // XOR (if -march indicates that SSE4a instructions are supported). + return (uint32_t)__builtin_clz(n) ^ 31U; + +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + uint32_t i; + __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n)); + return i; + +#elif defined(_MSC_VER) + unsigned long i; + _BitScanReverse(&i, n); + return i; + +#else + uint32_t i = 31; + + if ((n & 0xFFFF0000) == 0) { + n <<= 16; + i = 15; + } + + if ((n & 0xFF000000) == 0) { + n <<= 8; + i -= 8; + } + + if ((n & 0xF0000000) == 0) { + n <<= 4; + i -= 4; + } + + if ((n & 0xC0000000) == 0) { + n <<= 2; + i -= 2; + } + + if ((n & 0x80000000) == 0) + --i; + + return i; +#endif +} + + +static inline uint32_t +clz32(uint32_t n) +{ +#if defined(__INTEL_COMPILER) + return _bit_scan_reverse(n) ^ 31U; + +#elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX + return (uint32_t)__builtin_clz(n); + +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + uint32_t i; + __asm__("bsrl %1, %0\n\t" + "xorl $31, %0" + : "=r" (i) : "rm" (n)); + return i; + +#elif defined(_MSC_VER) + unsigned long i; + _BitScanReverse(&i, n); + return i ^ 31U; + +#else + uint32_t i = 0; + + if ((n & 0xFFFF0000) == 0) { + n <<= 16; + i = 16; + } + + if ((n & 0xFF000000) == 0) { + n <<= 8; + i += 8; + } + + if ((n & 0xF0000000) == 0) { + n <<= 4; + i += 4; + } + + if ((n & 0xC0000000) == 0) { + n <<= 2; + i += 2; + } + + if ((n & 0x80000000) == 0) + ++i; + + return i; +#endif +} + + +static inline uint32_t +ctz32(uint32_t n) +{ +#if defined(__INTEL_COMPILER) + return _bit_scan_forward(n); + +#elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX >= UINT32_MAX + return (uint32_t)__builtin_ctz(n); + +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + uint32_t i; + __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); + return i; + +#elif defined(_MSC_VER) + unsigned long i; + _BitScanForward(&i, n); + return i; + +#else + uint32_t i = 0; + + if ((n & 0x0000FFFF) == 0) { + n >>= 16; + i = 16; + } + + if ((n & 0x000000FF) == 0) { + n >>= 8; + i += 8; + } + + if ((n & 0x0000000F) == 0) { + n >>= 4; + i += 4; + } + + if ((n & 0x00000003) == 0) { + n >>= 2; + i += 2; + } + + if ((n & 0x00000001) == 0) + ++i; + + return i; +#endif +} + +#define bsf32 ctz32 + +#endif diff --git a/src/native/external/xz/src/common/tuklib_mbstr.h b/src/native/external/xz/src/common/tuklib_mbstr.h new file mode 100644 index 00000000000000..5ac06eb35e88ed --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_mbstr.h @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_mbstr.h +/// \brief Utility functions for handling multibyte strings +/// +/// If not enough multibyte string support is available in the C library, +/// these functions keep working with the assumption that all strings +/// are in a single-byte character set without combining characters, e.g. +/// US-ASCII or ISO-8859-*. +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_MBSTR_H +#define TUKLIB_MBSTR_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_mbstr_width TUKLIB_SYMBOL(tuklib_mbstr_width) +extern size_t tuklib_mbstr_width(const char *str, size_t *bytes); +///< +/// \brief Get the number of columns needed for the multibyte string +/// +/// This is somewhat similar to wcswidth() but works on multibyte strings. +/// +/// \param str String whose width is to be calculated. +/// \param bytes If this is not NULL, *bytes is set to the +/// value returned by strlen(str) (even if an +/// error occurs when calculating the width). +/// +/// \return On success, the number of columns needed to display the +/// string e.g. in a terminal emulator is returned. On error, +/// (size_t)-1 is returned. Possible errors include invalid, +/// partial, or non-printable multibyte character in str. + +#define tuklib_mbstr_width_mem TUKLIB_SYMBOL(tuklib_mbstr_width_mem) +extern size_t tuklib_mbstr_width_mem(const char *str, size_t len); +///< +/// \brief Get the number of columns needed for the multibyte buffer +/// +/// This is like tuklib_mbstr_width() except that this takes the buffer +/// length in bytes as the second argument. This allows using the function +/// for buffers that aren't terminated with '\0'. +/// +/// \param str String whose width is to be calculated. +/// \param len Number of bytes to read from str. +/// +/// \return On success, the number of columns needed to display the +/// string e.g. in a terminal emulator is returned. On error, +/// (size_t)-1 is returned. Possible errors include invalid, +/// partial, or non-printable multibyte character in str. + +#define tuklib_mbstr_fw TUKLIB_SYMBOL(tuklib_mbstr_fw) +extern int tuklib_mbstr_fw(const char *str, int columns_min); +///< +/// \brief Get the field width for printf() e.g. to align table columns +/// +/// Printing simple tables to a terminal can be done using the field field +/// feature in the printf() format string, but it works only with single-byte +/// character sets. To do the same with multibyte strings, tuklib_mbstr_fw() +/// can be used to calculate appropriate field width. +/// +/// The behavior of this function is undefined, if +/// - str is NULL or not terminated with '\0'; +/// - columns_min <= 0; or +/// - the calculated field width exceeds INT_MAX. +/// +/// \return If tuklib_mbstr_width(str, NULL) fails, -1 is returned. +/// If str needs more columns than columns_min, zero is returned. +/// Otherwise a positive integer is returned, which can be +/// used as the field width, e.g. printf("%*s", fw, str). + +TUKLIB_DECLS_END +#endif diff --git a/src/native/external/xz/src/common/tuklib_mbstr_fw.c b/src/native/external/xz/src/common/tuklib_mbstr_fw.c new file mode 100644 index 00000000000000..22d883b569fe33 --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_mbstr_fw.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_mbstr_fw.c +/// \brief Get the field width for printf() e.g. to align table columns +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_mbstr.h" + + +extern int +tuklib_mbstr_fw(const char *str, int columns_min) +{ + size_t len; + const size_t width = tuklib_mbstr_width(str, &len); + if (width == (size_t)-1) + return -1; + + if (width > (size_t)columns_min) + return 0; + + if (width < (size_t)columns_min) + len += (size_t)columns_min - width; + + return (int)len; +} diff --git a/src/native/external/xz/src/common/tuklib_mbstr_nonprint.c b/src/native/external/xz/src/common/tuklib_mbstr_nonprint.c new file mode 100644 index 00000000000000..dc778757b14815 --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_mbstr_nonprint.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_mbstr_nonprint.c +/// \brief Find and replace non-printable characters with question marks +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_mbstr_nonprint.h" +#include +#include +#include + +#ifdef HAVE_MBRTOWC +# include +# include +#else +# include +#endif + + +static bool +is_next_printable(const char *str, size_t len, size_t *next_len) +{ +#ifdef HAVE_MBRTOWC + // This assumes that character sets with locking shift states aren't + // used, and thus mbsinit() is never needed. + mbstate_t ps; + memset(&ps, 0, sizeof(ps)); + + wchar_t wc; + *next_len = mbrtowc(&wc, str, len, &ps); + + if (*next_len == (size_t)-2) { + // Incomplete multibyte sequence: Treat the whole sequence + // as a single non-printable multibyte character that ends + // the string. + *next_len = len; + return false; + } + + // Check more broadly than just ret == (size_t)-1 to be safe + // in case mbrtowc() returns something weird. This check + // covers (size_t)-1 (that is, SIZE_MAX) too because len is from + // strlen() and the terminating '\0' isn't part of the length. + if (*next_len < 1 || *next_len > len) { + // Invalid multibyte sequence: Treat the first byte as + // a non-printable single-byte character. Decoding will + // be restarted from the next byte on the next call to + // this function. + *next_len = 1; + return false; + } + +# if defined(_WIN32) && !defined(__CYGWIN__) + // On Windows, wchar_t stores UTF-16 code units, thus characters + // outside the Basic Multilingual Plane (BMP) don't fit into + // a single wchar_t. In an UTF-8 locale, UCRT's mbrtowc() returns + // successfully when the input is a non-BMP character but the + // output is the replacement character U+FFFD. + // + // iswprint() returns 0 for U+FFFD on Windows for some reason. Treat + // U+FFFD as printable and thus also all non-BMP chars as printable. + if (wc == 0xFFFD) + return true; +# endif + + return iswprint((wint_t)wc) != 0; +#else + (void)len; + *next_len = 1; + return isprint((unsigned char)str[0]) != 0; +#endif +} + + +static bool +has_nonprint(const char *str, size_t len) +{ + for (size_t i = 0; i < len; ) { + size_t next_len; + if (!is_next_printable(str + i, len - i, &next_len)) + return true; + + i += next_len; + } + + return false; +} + + +extern bool +tuklib_has_nonprint(const char *str) +{ + const int saved_errno = errno; + const bool ret = has_nonprint(str, strlen(str)); + errno = saved_errno; + return ret; +} + + +extern const char * +tuklib_mask_nonprint_r(const char *str, char **mem) +{ + const int saved_errno = errno; + + // Free the old string, if any. + free(*mem); + *mem = NULL; + + // If the whole input string contains only printable characters, + // return the input string. + const size_t len = strlen(str); + if (!has_nonprint(str, len)) { + errno = saved_errno; + return str; + } + + // Allocate memory for the masked string. Since we use the single-byte + // character '?' to mask non-printable characters, it's possible that + // a few bytes less memory would be needed in reality if multibyte + // characters are masked. + // + // If allocation fails, return "???" because it should be safer than + // returning the unmasked string. + *mem = malloc(len + 1); + if (*mem == NULL) { + errno = saved_errno; + return "???"; + } + + // Replace all non-printable characters with '?'. + char *dest = *mem; + + for (size_t i = 0; i < len; ) { + size_t next_len; + if (is_next_printable(str + i, len - i, &next_len)) { + memcpy(dest, str + i, next_len); + dest += next_len; + } else { + *dest++ = '?'; + } + + i += next_len; + } + + *dest = '\0'; + + errno = saved_errno; + return *mem; +} + + +extern const char * +tuklib_mask_nonprint(const char *str) +{ + static char *mem = NULL; + return tuklib_mask_nonprint_r(str, &mem); +} diff --git a/src/native/external/xz/src/common/tuklib_mbstr_nonprint.h b/src/native/external/xz/src/common/tuklib_mbstr_nonprint.h new file mode 100644 index 00000000000000..6fc969109fe08b --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_mbstr_nonprint.h @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_mbstr_nonprint.h +/// \brief Find and replace non-printable characters with question marks +/// +/// If mbrtowc(3) is available, it and iswprint(3) is used to check if all +/// characters are printable. Otherwise single-byte character set is assumed +/// and isprint(3) is used. +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_MBSTR_NONPRINT_H +#define TUKLIB_MBSTR_NONPRINT_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_has_nonprint TUKLIB_SYMBOL(tuklib_has_nonprint) +extern bool tuklib_has_nonprint(const char *str); +///< +/// \brief Check if a string contains any non-printable characters +/// +/// \return false if str contains only valid multibyte characters and +/// iswprint(3) returns non-zero for all of them; true otherwise. +/// The value of errno is preserved. +/// +/// \note In case mbrtowc(3) isn't available, single-byte character set +/// is assumed and isprint(3) is used instead of iswprint(3). + +#define tuklib_mask_nonprint_r TUKLIB_SYMBOL(tuklib_mask_nonprint_r) +extern const char *tuklib_mask_nonprint_r(const char *str, char **mem); +///< +/// \brief Replace non-printable characters with question marks +/// +/// \param str Untrusted string, for example, a filename +/// \param mem This function always calls free(*mem) to free the old +/// allocation and then sets *mem = NULL. Before the first +/// call, *mem should be initialized to NULL. If this +/// function needs to allocate memory for a modified +/// string, a pointer to the allocated memory will be +/// stored to *mem. Otherwise *mem will remain NULL. +/// +/// \return If tuklib_has_nonprint(str) returns false, this function +/// returns str. Otherwise memory is allocated to hold a modified +/// string and a pointer to that is returned. The pointer to the +/// allocated memory is also stored to *mem. A modified string +/// has the problematic characters replaced by '?'. If memory +/// allocation fails, "???" is returned and *mem is NULL. +/// The value of errno is preserved. + +#define tuklib_mask_nonprint TUKLIB_SYMBOL(tuklib_mask_nonprint) +extern const char *tuklib_mask_nonprint(const char *str); +///< +/// \brief Replace non-printable characters with question marks +/// +/// This is a convenience function for single-threaded use. This calls +/// tuklib_mask_nonprint_r() using an internal static variable to hold +/// the possible allocation. +/// +/// \param str Untrusted string, for example, a filename +/// +/// \return See tuklib_mask_nonprint_r(). +/// +/// \note This function is not thread safe! + +TUKLIB_DECLS_END +#endif diff --git a/src/native/external/xz/src/common/tuklib_mbstr_width.c b/src/native/external/xz/src/common/tuklib_mbstr_width.c new file mode 100644 index 00000000000000..98c611d8f38d02 --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_mbstr_width.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_mbstr_width.c +/// \brief Calculate width of a multibyte string +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_mbstr.h" +#include + +#ifdef HAVE_MBRTOWC +# include +#endif + + +extern size_t +tuklib_mbstr_width(const char *str, size_t *bytes) +{ + const size_t len = strlen(str); + if (bytes != NULL) + *bytes = len; + + return tuklib_mbstr_width_mem(str, len); +} + + +extern size_t +tuklib_mbstr_width_mem(const char *str, size_t len) +{ +#ifndef HAVE_MBRTOWC + // In single-byte mode, the width of the string is the same + // as its length. + (void)str; + return len; + +#else + mbstate_t state; + memset(&state, 0, sizeof(state)); + + size_t width = 0; + size_t i = 0; + + // Convert one multibyte character at a time to wchar_t + // and get its width using wcwidth(). + while (i < len) { + wchar_t wc; + const size_t ret = mbrtowc(&wc, str + i, len - i, &state); + if (ret < 1 || ret > len - i) + return (size_t)-1; + + i += ret; + +#ifdef HAVE_WCWIDTH + const int wc_width = wcwidth(wc); + if (wc_width < 0) + return (size_t)-1; + + width += (size_t)wc_width; +#else + // Without wcwidth() (like in a native Windows build), + // assume that one multibyte char == one column. With + // UTF-8, this is less bad than one byte == one column. + // This way quite a few languages will be handled correctly + // in practice; CJK chars will be very wrong though. + ++width; +#endif + } + + // It's good to check that the string ended in the initial state. + // However, in practice this is redundant: + // + // - No one will use this code with character sets that have + // locking shift states. + // + // - We already checked that mbrtowc() didn't return (size_t)-2 + // which would indicate a partial multibyte character. + if (!mbsinit(&state)) + return (size_t)-1; + + return width; +#endif +} diff --git a/src/native/external/xz/src/common/tuklib_mbstr_wrap.c b/src/native/external/xz/src/common/tuklib_mbstr_wrap.c new file mode 100644 index 00000000000000..8d906e004d75df --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_mbstr_wrap.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_mbstr_wrap.c +/// \brief Word wraps a string and prints it to a FILE stream +/// +/// This depends on tuklib_mbstr_width.c. +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_mbstr.h" +#include "tuklib_mbstr_wrap.h" +#include +#include +#include +#include + + +extern int +tuklib_wraps(FILE *outfile, const struct tuklib_wrap_opt *opt, const char *str) +{ + // left_cont may be less than left_margin. In that case, if the first + // word is extremely long, it will stay on the first line even if + // the line then gets overlong. + // + // On the other hand, left2_cont < left2_margin isn't allowed because + // it could result in inconsistent behavior when a very long word + // comes right after a \v. + // + // It is fine to have left2_margin < left_margin although it would be + // an odd use case. + if (!(opt->left_margin < opt->right_margin + && opt->left_cont < opt->right_margin + && opt->left2_margin <= opt->left2_cont + && opt->left2_cont < opt->right_margin)) + return TUKLIB_WRAP_ERR_OPT; + + // This is set to TUKLIB_WRAP_WARN_OVERLONG if one or more + // output lines extend past opt->right_margin columns. + int warn_overlong = 0; + + // Indentation of the first output line after \n or \r. + // \v sets this to opt->left2_margin. + // \r resets this back to the original value. + size_t first_indent = opt->left_margin; + + // Indentation of the output lines that occur due to word wrapping. + // \v sets this to opt->left2_cont and \r back to the original value. + size_t cont_indent = opt->left_cont; + + // If word wrapping occurs, the newline isn't printed unless more + // text would be put on the continuation line. This is also used + // when \v needs to start on a new line. + bool pending_newline = false; + + // Spaces are printed only when there is something else to put + // after the spaces on the line. This avoids unwanted empty lines + // in the output and makes it possible to ignore possible spaces + // before a \v character. + size_t pending_spaces = first_indent; + + // Current output column. When cur_col == pending_spaces, nothing + // has been actually printed to the current output line. + size_t cur_col = pending_spaces; + + while (true) { + // Number of bytes until the *next* line-break opportunity. + size_t len = 0; + + // Number of columns until the *next* line-break opportunity. + size_t width = 0; + + // Text between a pair of \b characters is treated as + // an unbreakable block even if it contains spaces. + // It must not contain any control characters before + // the closing \b. + bool unbreakable = false; + + while (true) { + // Find the next character that we handle specially. + // In an unbreakable block, search only for the + // closing \b; if missing, the unbreakable block + // extends to the end of the string. + const size_t n = strcspn(str + len, + unbreakable ? "\b" : " \t\n\r\v\b"); + + // Calculate how many columns the characters need. + const size_t w = tuklib_mbstr_width_mem(str + len, n); + if (w == (size_t)-1) + return TUKLIB_WRAP_ERR_STR; + + width += w; + len += n; + + // \b isn't a line-break opportunity so it has to + // be handled here. For simplicity, empty blocks + // are treated as zero-width characters. + if (str[len] == '\b') { + ++len; + unbreakable = !unbreakable; + continue; + } + + break; + } + + // Determine if adding this chunk of text would make the + // current output line exceed opt->right_margin columns. + const bool too_long = cur_col + width > opt->right_margin; + + // Wrap the line if needed. However: + // + // - Don't wrap if the current column is less than where + // the continuation line would begin. In that case + // the chunk wouldn't fit on the next line either so + // we just have to produce an overlong line. + // + // - Don't wrap if so far the line only contains spaces. + // Wrapping in that case would leave a weird empty line. + // NOTE: This "only contains spaces" condition is the + // reason why left2_margin > left2_cont isn't allowed. + if (too_long && cur_col > cont_indent + && cur_col > pending_spaces) { + // There might be trailing spaces or zero-width spaces + // which need to be ignored to keep the output pretty. + // + // Spaces need to be ignored because in some + // writing styles there are two spaces after + // a full stop. Example string: + // + // "Foo bar. Abc def." + // ^ + // If the first space after the first full stop + // triggers word wrapping, both spaces must be + // ignored. Otherwise the next line would be + // indented too much. + // + // Zero-width spaces are ignored the same way + // because they are meaningless if an adjacent + // character is a space. + while (*str == ' ' || *str == '\t') + ++str; + + // Don't print the newline here; only mark it as + // pending. This avoids an unwanted empty line if + // there is a \n or \r or \0 after the spaces have + // been ignored. + pending_newline = true; + pending_spaces = cont_indent; + cur_col = pending_spaces; + + // Since str may have been incremented due to the + // ignored spaces, the loop needs to be restarted. + continue; + } + + // Print the current chunk of text before the next + // line-break opportunity. If the chunk was empty, + // don't print anything so that the pending newline + // and pending spaces aren't printed on their own. + if (len > 0) { + if (pending_newline) { + pending_newline = false; + if (putc('\n', outfile) == EOF) + return TUKLIB_WRAP_ERR_IO; + } + + while (pending_spaces > 0) { + if (putc(' ', outfile) == EOF) + return TUKLIB_WRAP_ERR_IO; + + --pending_spaces; + } + + for (size_t i = 0; i < len; ++i) { + // Ignore unbreakable block characters (\b). + const int c = (unsigned char)str[i]; + if (c != '\b' && putc(c, outfile) == EOF) + return TUKLIB_WRAP_ERR_IO; + } + + str += len; + cur_col += width; + + // Remember if the line got overlong. If no other + // errors occur, we return warn_overlong. It might + // help in catching problematic strings. + if (too_long) + warn_overlong = TUKLIB_WRAP_WARN_OVERLONG; + } + + // Handle the special character after the chunk of text. + switch (*str) { + case ' ': + // Regular space. + ++cur_col; + ++pending_spaces; + break; + + case '\v': + // Set the alternative indentation settings. + first_indent = opt->left2_margin; + cont_indent = opt->left2_cont; + + if (first_indent > cur_col) { + // Add one or more spaces to reach + // the column specified in first_indent. + pending_spaces += first_indent - cur_col; + } else { + // There is no room to add even one space + // before reaching the column first_indent. + pending_newline = true; + pending_spaces = first_indent; + } + + cur_col = first_indent; + break; + + case '\0': // Implicit newline at the end of the string. + case '\r': // Newline that also resets the effect of \v. + case '\n': // Newline without resetting the indentation mode. + if (putc('\n', outfile) == EOF) + return TUKLIB_WRAP_ERR_IO; + + if (*str == '\0') + return warn_overlong; + + if (*str == '\r') { + first_indent = opt->left_margin; + cont_indent = opt->left_cont; + } + + pending_newline = false; + pending_spaces = first_indent; + cur_col = first_indent; + break; + } + + // Skip the specially-handled character. + ++str; + } +} + + +extern int +tuklib_wrapf(FILE *stream, const struct tuklib_wrap_opt *opt, + const char *fmt, ...) +{ + va_list ap; + char *buf; + +#ifdef HAVE_VASPRINTF + va_start(ap, fmt); + +#ifdef __clang__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif + const int n = vasprintf(&buf, fmt, ap); +#ifdef __clang__ +# pragma GCC diagnostic pop +#endif + + va_end(ap); + if (n == -1) + return TUKLIB_WRAP_ERR_FORMAT; +#else + // Fixed buffer size is dumb but in practice one shouldn't need + // huge strings for *formatted* output. This simple method is safe + // with pre-C99 vsnprintf() implementations too which don't return + // the required buffer size (they return -1 or buf_size - 1) or + // which might not null-terminate the buffer in case it's too small. + const size_t buf_size = 128 * 1024; + buf = malloc(buf_size); + if (buf == NULL) + return TUKLIB_WRAP_ERR_FORMAT; + + va_start(ap, fmt); + const int n = vsnprintf(buf, buf_size, fmt, ap); + va_end(ap); + + if (n <= 0 || n >= (int)(buf_size - 1)) { + free(buf); + return TUKLIB_WRAP_ERR_FORMAT; + } +#endif + + const int ret = tuklib_wraps(stream, opt, buf); + free(buf); + return ret; +} diff --git a/src/native/external/xz/src/common/tuklib_mbstr_wrap.h b/src/native/external/xz/src/common/tuklib_mbstr_wrap.h new file mode 100644 index 00000000000000..4e2f297dabb412 --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_mbstr_wrap.h @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_mbstr_wrap.h +/// \brief Word wrapping for multibyte strings +/// +/// The word wrapping functions are intended to be usable, for example, +/// for printing --help text in command line tools. While manually-wrapped +/// --help text allows precise formatting, such freedom requires translators +/// to count spaces and determine where line breaks should occur. It's +/// tedious and error prone, and experience has shown that only some +/// translators do it well. Automatic word wrapping is less flexible but +/// results in polished-enough look with less effort from everyone. +/// Right-to-left languages and languages that don't use spaces between +/// words will still need extra effort though. +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_MBSTR_WRAP_H +#define TUKLIB_MBSTR_WRAP_H + +#include "tuklib_common.h" +#include + +TUKLIB_DECLS_BEGIN + +/// One or more output lines exceeded right_margin. +/// This only a warning; everything was still printed successfully. +#define TUKLIB_WRAP_WARN_OVERLONG 0x01 + +/// Error writing to to the output FILE. The error flag in the FILE +/// should have been set as well. +#define TUKLIB_WRAP_ERR_IO 0x02 + +/// Invalid options in struct tuklib_wrap_opt. +/// Nothing was printed. +#define TUKLIB_WRAP_ERR_OPT 0x04 + +/// Invalid or unsupported multibyte character in the input string: +/// either mbrtowc() failed or wcwidth() returned a negative value. +#define TUKLIB_WRAP_ERR_STR 0x08 + +/// Only tuklib_wrapf(): Error in converting the format string. +/// It's either a memory allocation failure or something bad with the +/// format string or arguments. +#define TUKLIB_WRAP_ERR_FORMAT 0x10 + +/// Options for tuklib_wraps() and tuklib_wrapf() +struct tuklib_wrap_opt { + /// Indentation of the first output line after `\n` or `\r`. + /// This can be anything less than right_margin. + unsigned short left_margin; + + /// Column where word-wrapped continuation lines start. + /// This can be anything less than right_margin. + unsigned short left_cont; + + /// Column where the text after `\v` will start, either on the current + /// line (when there is room to add at least one space) or on a new + /// empty line. + unsigned short left2_margin; + + /// Like left_cont but for text after a `\v`. However, this must + /// be greater than or equal to left2_margin in addition to being + /// less than right_margin. + unsigned short left2_cont; + + /// For 80-column terminals, it is recommended to use 79 here for + /// maximum portability. 80 will work most of the time but it will + /// result in unwanted empty lines in the rare case where a terminal + /// moves the cursor to the beginning of the next line immediately + /// when the last column has been used. + unsigned short right_margin; +}; + +#define tuklib_wraps TUKLIB_SYMBOL(tuklib_wraps) +extern int tuklib_wraps(FILE *stream, const struct tuklib_wrap_opt *opt, + const char *str); +///< +/// \brief Word wrap a multibyte string and write it to a FILE +/// +/// Word wrapping is done only at spaces and at the special control characters +/// described below. Multiple consecutive spaces are handled properly: strings +/// that have two (or more) spaces after a full sentence will look good even +/// when the spaces occur at a word wrapping boundary. Trailing spaces are +/// ignored at the end of a line or at the end of a string. +/// +/// The following control characters have been repurposed: +/// +/// - `\t` = Zero-width space allows a line break without producing any +/// output by itself. This can be useful after hard hyphens as +/// hyphens aren't otherwise used for line breaking. This can also +/// be useful in languages that don't use spaces between words. +/// (The Unicode character U+200B isn't supported.) +/// - `\b` = Text between a pair of `\b` characters is treated as an +/// unbreakable block (not wrapped even if there are spaces). +/// For example, a non-breaking space can be done like +/// in `"123\b \bMiB"`. Control characters (like `\n` or `\t`) +/// aren't allowed before the closing `\b`. If closing `\b` is +/// missing, the block extends to the end of the string. Empty +/// blocks are treated as zero-width characters. If line breaks +/// are possible around an empty block (like in `"foo \b\b bar"` +/// or `"foo \b"`), it can result in weird output. +/// - `\v` = Change to alternative indentation (left2_margin). +/// - `\r` = Reset back to the initial indentation and add a newline. +/// The next line will be indented by left_margin. +/// - `\n` = Add a newline without resetting the effect of `\v`. The +/// next line will be indented by left_margin or left2_margin +/// (not left_cont or left2_cont). +/// +/// Only `\n` should appear in translatable strings. `\t` works too but +/// even that might confuse some translators even if there is a TRANSLATORS +/// comment explaining its meaning. +/// +/// To use the other control characters in messages, one should use +/// tuklib_wrapf() with appropriate printf format string to combine +/// translatable strings with non-translatable portions. For example: +/// +/// \code{.c} +/// static const struct tuklib_wrap_opt wrap2 = { 2, 2, 22, 22, 79 }; +/// int e = 0; +/// ... +/// e |= tuklib_wrapf(stdout, &wrap2, +/// "-h, --help\v%s\r" +/// " --version\v%s", +/// W_("display this help and exit"), +/// W_("display version information and exit")); +/// ... +/// if (e != 0) { +/// // Handle warning or error. +/// ... +/// } +/// \endcode +/// +/// Control characters other than `\n` and `\t` are unusable in +/// translatable strings: +/// +/// - Gettext tools show annoying warnings if C escape sequences other +/// than `\n` or `\t` are seen. (Otherwise they still work perfectly +/// fine though.) +/// +/// - While at least Poedit and Lokalize support all escapes, some +/// editors only support `\n` and `\t`. +/// +/// - They could confuse some translators, resulting in broken +/// translations. +/// +/// Using non-control characters would solve some issues but it wouldn't +/// help with the unfortunate real-world issue that some translators would +/// likely have trouble understanding a new syntax. The Gettext manual +/// specifically warns about this, see the subheading "No unusual markup" +/// in `info (gettext)Preparing Strings`. (While using `\t` for zero-width +/// space is such custom markup, most translators will never need it.) +/// +/// Translators can use the Unicode character U+00A0 (or U+202F) if they +/// need a non-breaking space. For example, in French a non-breaking space +/// may be needed before colons and question marks (U+00A0 is common in +/// real-world French PO files). +/// +/// Using a non-ASCII char in a string in the C code (like `"123\u00A0MiB"`) +/// can work if one tells xgettext that input encoding is UTF-8, one +/// ensures that the C compiler uses UTF-8 as the input charset, and one +/// is certain that the program is *always* run under an UTF-8 locale. +/// Unfortunately a portable program cannot make this kind of assumptions, +/// which means that there is no pretty way to have a non-breaking space in +/// a translatable string. +/// +/// Optional: To tell translators which strings are automatically word +/// wrapped, see the macro `W_` in tuklib_gettext.h. +/// +/// \param stream Output FILE stream. For decent performance, it +/// should be in buffered mode because this function +/// writes the output one byte at a time with fputc(). +/// \param opt Word wrapping options. +/// \param str Null-terminated multibyte string that is in +/// the encoding used by the current locale. +/// +/// \return Returns 0 on success. If an error or warning occurs, one of +/// TUKLIB_WRAP_* codes is returned. Those codes are powers +/// of two. When warning/error detection can be delayed, the +/// return values can be accumulated from multiple calls using +/// bitwise-or into a single variable which can be checked after +/// all strings have (hopefully) been printed. + +#define tuklib_wrapf TUKLIB_SYMBOL(tuklib_wrapf) +tuklib_attr_format_printf(3, 4) +extern int tuklib_wrapf(FILE *stream, const struct tuklib_wrap_opt *opt, + const char *fmt, ...); +///< +/// \brief Format and word-wrap a multibyte string and write it to a FILE +/// +/// This is like tuklib_wraps() except that this takes a printf +/// format string. +/// +/// \note On platforms that lack vasprintf(), the intermediate +/// result from vsnprintf() must fit into a 128 KiB buffer. +/// TUKLIB_WRAP_ERR_FORMAT is returned if it doesn't but +/// only on platforms that lack vasprintf(). + +TUKLIB_DECLS_END +#endif diff --git a/src/native/external/xz/src/common/tuklib_open_stdxxx.c b/src/native/external/xz/src/common/tuklib_open_stdxxx.c new file mode 100644 index 00000000000000..b93e61d3b6885a --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_open_stdxxx.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_open_stdxxx.c +/// \brief Make sure that file descriptors 0, 1, and 2 are open +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_open_stdxxx.h" + +#ifndef TUKLIB_DOSLIKE +# include +# include +# include +# include +#endif + + +extern void +tuklib_open_stdxxx(int err_status) +{ +#ifdef TUKLIB_DOSLIKE + // Do nothing, just silence warnings. + (void)err_status; + +#else + for (int i = 0; i <= 2; ++i) { + // We use fcntl() to check if the file descriptor is open. + if (fcntl(i, F_GETFD) == -1 && errno == EBADF) { + // With stdin, we could use /dev/full so that + // writing to stdin would fail. However, /dev/full + // is Linux specific, and if the program tries to + // write to stdin, there's already a problem anyway. + const int fd = open("/dev/null", O_NOCTTY + | (i == 0 ? O_WRONLY : O_RDONLY)); + + if (fd != i) { + if (fd != -1) + (void)close(fd); + + // Something went wrong. Exit with the + // exit status we were given. Don't try + // to print an error message, since stderr + // may very well be non-existent. This + // error should be extremely rare. + exit(err_status); + } + } + } +#endif + + return; +} diff --git a/src/native/external/xz/src/common/tuklib_open_stdxxx.h b/src/native/external/xz/src/common/tuklib_open_stdxxx.h new file mode 100644 index 00000000000000..3ee3ade3552732 --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_open_stdxxx.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_open_stdxxx.h +/// \brief Make sure that file descriptors 0, 1, and 2 are open +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_OPEN_STDXXX_H +#define TUKLIB_OPEN_STDXXX_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_open_stdxx TUKLIB_SYMBOL(tuklib_open_stdxxx) +extern void tuklib_open_stdxxx(int err_status); + +TUKLIB_DECLS_END +#endif diff --git a/src/native/external/xz/src/common/tuklib_physmem.c b/src/native/external/xz/src/common/tuklib_physmem.c new file mode 100644 index 00000000000000..5988ba77a2845a --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_physmem.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_physmem.c +/// \brief Get the amount of physical memory +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_physmem.h" + +// We want to use Windows-specific code on Cygwin, which also has memory +// information available via sysconf(), but on Cygwin 1.5 and older it +// gives wrong results (from our point of view). +#if defined(_WIN32) || defined(__CYGWIN__) +# ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0500 +# endif +# include + +#elif defined(__OS2__) +# define INCL_DOSMISC +# include + +#elif defined(__DJGPP__) +# include + +#elif defined(__VMS) +# include +# include +# include + +#elif defined(AMIGA) || defined(__AROS__) +# define __USE_INLINE__ +# include + +#elif defined(__QNX__) +# include +# include + +#elif defined(TUKLIB_PHYSMEM_AIX) +# include + +#elif defined(TUKLIB_PHYSMEM_SYSCONF) +# include + +#elif defined(TUKLIB_PHYSMEM_SYSCTL) +# ifdef HAVE_SYS_PARAM_H +# include +# endif +# include + +// Tru64 +#elif defined(TUKLIB_PHYSMEM_GETSYSINFO) +# include +# include + +// HP-UX +#elif defined(TUKLIB_PHYSMEM_PSTAT_GETSTATIC) +# include +# include + +// IRIX +#elif defined(TUKLIB_PHYSMEM_GETINVENT_R) +# include + +// This sysinfo() is Linux-specific. +#elif defined(TUKLIB_PHYSMEM_SYSINFO) +# include +#endif + + +extern uint64_t +tuklib_physmem(void) +{ + uint64_t ret = 0; + +#if defined(_WIN32) || defined(__CYGWIN__) + // This requires Windows 2000 or later. + MEMORYSTATUSEX meminfo; + meminfo.dwLength = sizeof(meminfo); + if (GlobalMemoryStatusEx(&meminfo)) + ret = meminfo.ullTotalPhys; + +/* + // Old version that is compatible with even Win95: + if ((GetVersion() & 0xFF) >= 5) { + // Windows 2000 and later have GlobalMemoryStatusEx() which + // supports reporting values greater than 4 GiB. To keep the + // code working also on older Windows versions, use + // GlobalMemoryStatusEx() conditionally. + HMODULE kernel32 = GetModuleHandleA("kernel32.dll"); + if (kernel32 != NULL) { + typedef BOOL (WINAPI *gmse_type)(LPMEMORYSTATUSEX); + gmse_type gmse = (gmse_type)GetProcAddress( + kernel32, "GlobalMemoryStatusEx"); + if (gmse != NULL) { + MEMORYSTATUSEX meminfo; + meminfo.dwLength = sizeof(meminfo); + if (gmse(&meminfo)) + ret = meminfo.ullTotalPhys; + } + } + } + + if (ret == 0) { + // GlobalMemoryStatus() is supported by Windows 95 and later, + // so it is fine to link against it unconditionally. Note that + // GlobalMemoryStatus() has no return value. + MEMORYSTATUS meminfo; + meminfo.dwLength = sizeof(meminfo); + GlobalMemoryStatus(&meminfo); + ret = meminfo.dwTotalPhys; + } +*/ + +#elif defined(__OS2__) + unsigned long mem; + if (DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, + &mem, sizeof(mem)) == 0) + ret = mem; + +#elif defined(__DJGPP__) + __dpmi_free_mem_info meminfo; + if (__dpmi_get_free_memory_information(&meminfo) == 0 + && meminfo.total_number_of_physical_pages + != (unsigned long)-1) + ret = (uint64_t)meminfo.total_number_of_physical_pages * 4096; + +#elif defined(__VMS) + int vms_mem; + int val = SYI$_MEMSIZE; + if (LIB$GETSYI(&val, &vms_mem, 0, 0, 0, 0) == SS$_NORMAL) + ret = (uint64_t)vms_mem * 8192; + +#elif defined(AMIGA) || defined(__AROS__) + ret = AvailMem(MEMF_TOTAL); + +#elif defined(__QNX__) + const struct asinfo_entry *entries = SYSPAGE_ENTRY(asinfo); + size_t count = SYSPAGE_ENTRY_SIZE(asinfo) / sizeof(struct asinfo_entry); + const char *strings = SYSPAGE_ENTRY(strings)->data; + + for (size_t i = 0; i < count; ++i) + if (strcmp(strings + entries[i].name, "ram") == 0) + ret += entries[i].end - entries[i].start + 1; + +#elif defined(TUKLIB_PHYSMEM_AIX) + ret = (uint64_t)_system_configuration.physmem; + +#elif defined(TUKLIB_PHYSMEM_SYSCONF) + const long pagesize = sysconf(_SC_PAGESIZE); + const long pages = sysconf(_SC_PHYS_PAGES); + if (pagesize != -1 && pages != -1) + // According to docs, pagesize * pages can overflow. + // Simple case is 32-bit box with 4 GiB or more RAM, + // which may report exactly 4 GiB of RAM, and "long" + // being 32-bit will overflow. Casting to uint64_t + // hopefully avoids overflows in the near future. + ret = (uint64_t)pagesize * (uint64_t)pages; + +#elif defined(TUKLIB_PHYSMEM_SYSCTL) + int name[2] = { + CTL_HW, +#ifdef HW_PHYSMEM64 + HW_PHYSMEM64 +#else + HW_PHYSMEM +#endif + }; + union { + uint32_t u32; + uint64_t u64; + } mem; + size_t mem_ptr_size = sizeof(mem.u64); + if (sysctl(name, 2, &mem.u64, &mem_ptr_size, NULL, 0) != -1) { + // IIRC, 64-bit "return value" is possible on some 64-bit + // BSD systems even with HW_PHYSMEM (instead of HW_PHYSMEM64), + // so support both. + if (mem_ptr_size == sizeof(mem.u64)) + ret = mem.u64; + else if (mem_ptr_size == sizeof(mem.u32)) + ret = mem.u32; + } + +#elif defined(TUKLIB_PHYSMEM_GETSYSINFO) + // Docs are unclear if "start" is needed, but it doesn't hurt + // much to have it. + int memkb; + int start = 0; + if (getsysinfo(GSI_PHYSMEM, (caddr_t)&memkb, sizeof(memkb), &start) + != -1) + ret = (uint64_t)memkb * 1024; + +#elif defined(TUKLIB_PHYSMEM_PSTAT_GETSTATIC) + struct pst_static pst; + if (pstat_getstatic(&pst, sizeof(pst), 1, 0) != -1) + ret = (uint64_t)pst.physical_memory * (uint64_t)pst.page_size; + +#elif defined(TUKLIB_PHYSMEM_GETINVENT_R) + inv_state_t *st = NULL; + if (setinvent_r(&st) != -1) { + inventory_t *i; + while ((i = getinvent_r(st)) != NULL) { + if (i->inv_class == INV_MEMORY + && i->inv_type == INV_MAIN_MB) { + ret = (uint64_t)i->inv_state << 20; + break; + } + } + + endinvent_r(st); + } + +#elif defined(TUKLIB_PHYSMEM_SYSINFO) + struct sysinfo si; + if (sysinfo(&si) == 0) + ret = (uint64_t)si.totalram * si.mem_unit; +#endif + + return ret; +} diff --git a/src/native/external/xz/src/common/tuklib_physmem.h b/src/native/external/xz/src/common/tuklib_physmem.h new file mode 100644 index 00000000000000..f35bfbab9c169f --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_physmem.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_physmem.h +/// \brief Get the amount of physical memory +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_PHYSMEM_H +#define TUKLIB_PHYSMEM_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_physmem TUKLIB_SYMBOL(tuklib_physmem) +extern uint64_t tuklib_physmem(void); +///< +/// \brief Get the amount of physical memory in bytes +/// +/// \return Amount of physical memory in bytes. On error, zero is +/// returned. + +TUKLIB_DECLS_END +#endif diff --git a/src/native/external/xz/src/common/tuklib_progname.c b/src/native/external/xz/src/common/tuklib_progname.c new file mode 100644 index 00000000000000..959c1270ce3292 --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_progname.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_progname.c +/// \brief Program name to be displayed in messages +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_progname.h" +#include + + +#ifndef HAVE_PROGRAM_INVOCATION_NAME +char *progname = NULL; +#endif + + +extern void +tuklib_progname_init(char **argv) +{ +#ifdef TUKLIB_DOSLIKE + // On these systems, argv[0] always has the full path and .exe + // suffix even if the user just types the plain program name. + // We modify argv[0] to make it nicer to read. + + // Strip the leading path. + char *p = argv[0] + strlen(argv[0]); + while (argv[0] < p && p[-1] != '/' && p[-1] != '\\') + --p; + + argv[0] = p; + + // Strip the .exe suffix. + p = strrchr(p, '.'); + if (p != NULL) + *p = '\0'; + + // Make it lowercase. + for (p = argv[0]; *p != '\0'; ++p) + if (*p >= 'A' && *p <= 'Z') + *p = *p - 'A' + 'a'; +#endif + + progname = argv[0]; + return; +} diff --git a/src/native/external/xz/src/common/tuklib_progname.h b/src/native/external/xz/src/common/tuklib_progname.h new file mode 100644 index 00000000000000..a3d90cb1f21a70 --- /dev/null +++ b/src/native/external/xz/src/common/tuklib_progname.h @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_progname.h +/// \brief Program name to be displayed in messages +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_PROGNAME_H +#define TUKLIB_PROGNAME_H + +#include "tuklib_common.h" +#include + +TUKLIB_DECLS_BEGIN + +#ifdef HAVE_PROGRAM_INVOCATION_NAME +# define progname program_invocation_name +#else +# define progname TUKLIB_SYMBOL(tuklib_progname) + extern char *progname; +#endif + +#define tuklib_progname_init TUKLIB_SYMBOL(tuklib_progname_init) +extern void tuklib_progname_init(char **argv); + +TUKLIB_DECLS_END +#endif diff --git a/src/native/external/xz/src/common/w32_application.manifest b/src/native/external/xz/src/common/w32_application.manifest new file mode 100644 index 00000000000000..2f8750879b1f48 --- /dev/null +++ b/src/native/external/xz/src/common/w32_application.manifest @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + true + UTF-8 + + + diff --git a/src/native/external/xz/src/common/w32_application.manifest.comments.txt b/src/native/external/xz/src/common/w32_application.manifest.comments.txt new file mode 100644 index 00000000000000..de5c2105acf95f --- /dev/null +++ b/src/native/external/xz/src/common/w32_application.manifest.comments.txt @@ -0,0 +1,192 @@ + +Windows application manifest for UTF-8 and long paths +===================================================== + +The .manifest file is embedded as is in the executables, thus +the comments are here in a separate file. These comments were +written in context of XZ Utils but might be useful when porting +other command line tools from POSIX environments to Windows. + + NOTE: On Cygwin and MSYS2, command line arguments and file + system access aren't tied to a Windows code page. Cygwin + and MSYS2 include a default application manifest. Replacing + it doesn't seem useful and might even be harmful if Cygwin + and MSYS2 some day change their default manifest. + + +UTF-8 code page +--------------- + +On Windows, command line applications can use main() or wmain(). +With the Windows-specific wmain(), argv contains UTF-16 code units +which is the native encoding on Windows. With main(), argv uses the +system active code page by default. It typically is a legacy code +page like Windows-1252. + + NOTE: On POSIX, argv for main() is constructed by the calling + process. On Windows, argv is constructed by a new process + itself: a program receives the command line as a single string, + and the startup code splits it into individual arguments, + including quote removal and wildcard expansion. Then main() or + wmain() is called. + +This application manifest forces the process code page to UTF-8 +when the application runs on Windows 10 version 1903 or later. +This is useful for programs that use main(): + + * UTF-8 allows such programs to access files whose names contain + characters that don't exist in the current legacy code page. + However, filenames on Windows may contain unpaired surrogates + (invalid UTF-16). Such files cannot be accesses even with the + UTF-8 code page. + + * UTF-8 avoids a security issue in command line argument handling: + If a command line contains Unicode characters (for example, + filenames) that don't exist in the current legacy code page, + the characters are converted to similar-looking characters + with best-fit mapping. Some best-fit mappings result in ASCII + characters that change the meaning of the command line, which + can be exploited with malicious filenames. For example: + + - Double quote (") breaks quoting and makes argument + injection possible. + + - Question mark (?) is a wildcard character which may + expand to one or more filenames. + + - Forward slash (/) makes a directory traversal attack + possible. This character can appear in a dangerous way + even from a wildcard expansion; a look-alike character + doesn't need to be passed directly on the command line. + + UTF-8 avoids best-fit mappings. However, it's still not + perfect. Unpaired surrogates (invalid UTF-16) on the command + line (including those from wildcard expansion) are converted + to the replacement character U+FFFD. Thus, filenames with + different unpaired surrogates appear identical when converted + to the UTF-8 code page and aren't distinguishable from + filenames that contain the actual replacement character U+FFFD. + + FindFirstFileA() and FindFirstFileExA() also suffer from the above + issue where unpaired surrogates become U+FFFD. Another issue is + that filenames may require more bytes in UTF-8 than in a legacy + code page. In UTF-8, a very long filename may exceed MAX_PATH bytes + and thus these APIs cannot list such filenames anymore because + WIN32_FIND_DATAA has a member "CHAR cFileName[MAX_PATH]". + +If different programs use different code pages, compatibility issues +are possible. For example, if one program produces a list of +filenames and another program reads it, both programs should use +the same code page because the code page affects filenames in the +char-based file system APIs. + +If building with a MinGW-w64 toolchain, it is strongly recommended +to use UCRT instead of the old MSVCRT. For example, with the UTF-8 +code page, MSVCRT doesn't convert non-ASCII characters correctly +when writing to console with printf(). With UCRT it works. + + +Long path names +--------------- + +The manifest enables support for path names longer than 260 wide +characters (UTF-16 code units) if the feature has been enabled in +the Windows registry. Omit the longPathAware element from the manifest +if the application isn't compatible with it. For example, some uses +of MAX_PATH might be a sign of incompatibility. + +Note that UTF-8 encoded filenames can exceed MAX_PATH (260) bytes when +the UTF-16 form is still within MAX_PATH wide characters. In this +situation the application doesn't need to be long path aware: functions +like _open() work with UTF-8 names that exceed MAX_PATH bytes if the +wide character form stays within MAX_PATH wide characters. (MAX_PATH +includes the terminating null character.) + +Documentation of the registry setting: +https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=registry#enable-long-paths-in-windows-10-version-1607-and-later + + +Summary of the manifest contents +-------------------------------- + +See also Microsoft's documentation: +https://learn.microsoft.com/en-us/windows/win32/sbscs/application-manifests + +assemblyIdentity (omitted) + + This is documented as mandatory but not all apps in the real world + have it, and of those that do, not all put an up-to-date version + number there. Things seem to work correctly without + so let's keep this simpler and omit it. + +compatibility + + Declare the application compatible with different Windows versions. + Without this, Windows versions newer than Vista will run the + application using Vista as the Operating System Context. + +trustInfo + + Declare the application as UAC-compliant. This avoids file system + and registry virtualization that Windows otherwise does with 32-bit + executables to make some ancient applications work. UAC-compliancy + also stops Windows from using heuristics based on the filename + (like setup.exe) to guess when elevated privileges might be + needed which would then bring up the UAC prompt. + +longPathAware + + Declare the application as long path aware. This way many file + system operations aren't limited to MAX_PATH (260) wide characters + (including the terminating null character). The feature has to be + enabled in the Windows registry too. + +activeCodePage + + Force the process code page to UTF-8 on Windows 10 version 1903 + and later. For example: + + - main() gets the command line arguments in UTF-8 instead of + in a legacy code page. + + - File system APIs that take char-based strings use UTF-8 + instead of a legacy code page. + + - Text written to the console via stdio.h's stdout or stderr + (like calling printf()) are expected to be in UTF-8. + + +CMake notes +----------- + +As of CMake 3.30, one can add a .manifest file as a source file but +it only works with MSVC; it's ignored with MinGW-w64 toolchains. +Embedding the manifest with a resource file works with all +toolchains. However, then the default manifest needs to be +disabled with MSVC in CMakeLists.txt to avoid duplicate +manifests which would break the build. + +w32_application.manifest.rc: + + #include + CREATEPROCESS_MANIFEST_RESOURCE_ID RT_MANIFEST "w32_application.manifest" + +Or the same thing without the #include: + + 1 24 "w32_application.manifest" + +CMakeLists.txt: + + if(MSVC) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /MANIFEST:NO") + endif() + + add_executable(foo foo.c) + + # WIN32 isn't set on Cygwin or MSYS2, thus if(WIN32) is correct here. + if(WIN32) + target_sources(foo PRIVATE w32_application.manifest.rc) + set_source_files_properties(w32_application.manifest.rc PROPERTIES + OBJECT_DEPENDS w32_application.manifest + ) + endif() diff --git a/src/native/external/xz/src/liblzma/Makefile.am b/src/native/external/xz/src/liblzma/Makefile.am new file mode 100644 index 00000000000000..b8532d5a0d5d14 --- /dev/null +++ b/src/native/external/xz/src/liblzma/Makefile.am @@ -0,0 +1,128 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +SUBDIRS = api + +EXTRA_DIST = +CLEANFILES = +doc_DATA = + +lib_LTLIBRARIES = liblzma.la +liblzma_la_SOURCES = +liblzma_la_CPPFLAGS = \ + -I$(top_srcdir)/src/liblzma/api \ + -I$(top_srcdir)/src/liblzma/common \ + -I$(top_srcdir)/src/liblzma/check \ + -I$(top_srcdir)/src/liblzma/lz \ + -I$(top_srcdir)/src/liblzma/rangecoder \ + -I$(top_srcdir)/src/liblzma/lzma \ + -I$(top_srcdir)/src/liblzma/delta \ + -I$(top_srcdir)/src/liblzma/simple \ + -I$(top_srcdir)/src/common \ + -DTUKLIB_SYMBOL_PREFIX=lzma_ +liblzma_la_LDFLAGS = -no-undefined -version-info 13:2:8 + +EXTRA_DIST += liblzma_generic.map liblzma_linux.map validate_map.sh +if COND_SYMVERS_GENERIC +liblzma_la_LDFLAGS += \ + $(LINKER_FLAG_UNDEFINED_VERSION) \ + -Wl,--version-script=$(top_srcdir)/src/liblzma/liblzma_generic.map +endif +if COND_SYMVERS_LINUX +liblzma_la_LDFLAGS += \ + $(LINKER_FLAG_UNDEFINED_VERSION) \ + -Wl,--version-script=$(top_srcdir)/src/liblzma/liblzma_linux.map +endif + +liblzma_la_SOURCES += ../common/tuklib_physmem.c + +if COND_THREADS +liblzma_la_SOURCES += ../common/tuklib_cpucores.c +endif + +include $(srcdir)/common/Makefile.inc +include $(srcdir)/check/Makefile.inc + +if COND_FILTER_LZ +include $(srcdir)/lz/Makefile.inc +endif + +if COND_FILTER_LZMA1 +include $(srcdir)/lzma/Makefile.inc +include $(srcdir)/rangecoder/Makefile.inc +endif + +if COND_FILTER_DELTA +include $(srcdir)/delta/Makefile.inc +endif + +if COND_FILTER_SIMPLE +include $(srcdir)/simple/Makefile.inc +endif + + +## Windows-specific stuff + +# Windows resource compiler support. libtool knows what to do with .rc +# files, but Automake (<= 1.11 at least) doesn't know. +# +# We want the resource file only in shared liblzma. To avoid linking it into +# static liblzma, we overwrite the static object file with an object file +# compiled from empty input. Note that GNU-specific features are OK here, +# because on Windows we are compiled with the GNU toolchain. +# +# The typedef in empty.c will prevent an empty translation unit, which is +# not allowed by the C standard. It results in a warning with +# -Wempty-translation-unit with Clang or -pedantic for GCC. +.rc.lo: + $(LIBTOOL) --mode=compile $(RC) $(DEFS) $(DEFAULT_INCLUDES) \ + $(INCLUDES) $(liblzma_la_CPPFLAGS) $(CPPFLAGS) $(RCFLAGS) \ + -i $< -o $@ + echo "typedef void empty;" > empty.c + $(COMPILE) -c empty.c -o $(*D)/$(*F).o + +# Remove ordinals from the generated .def file. People must link by name, +# not by ordinal, because no one is going to track the ordinal numbers. +liblzma.def: liblzma.la liblzma.def.in + sed 's/ \+@ *[0-9]\+//' liblzma.def.in > liblzma.def + +# Creating liblzma.def.in is a side effect of linking the library. +liblzma.def.in: liblzma.la + +if COND_W32 +CLEANFILES += liblzma.def liblzma.def.in empty.c +liblzma_la_SOURCES += liblzma_w32res.rc +liblzma_la_LDFLAGS += -Xlinker --output-def -Xlinker liblzma.def.in + +## liblzma.def.in is created only when building shared liblzma, so don't +## try to create liblzma.def when not building shared liblzma. +if COND_SHARED +doc_DATA += liblzma.def +endif +endif + + +## pkg-config +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = liblzma.pc +EXTRA_DIST += liblzma.pc.in + +pc_verbose = $(pc_verbose_@AM_V@) +pc_verbose_ = $(pc_verbose_@AM_DEFAULT_V@) +pc_verbose_0 = @echo " PC " $@; + +liblzma.pc: $(srcdir)/liblzma.pc.in + $(AM_V_at)rm -f $@ + $(pc_verbose)sed \ + -e 's,@prefix[@],$(prefix),g' \ + -e 's,@exec_prefix[@],$(exec_prefix),g' \ + -e 's,@libdir[@],$(libdir),g' \ + -e 's,@includedir[@],$(includedir),g' \ + -e 's,@PACKAGE_URL[@],$(PACKAGE_URL),g' \ + -e 's,@PACKAGE_VERSION[@],$(PACKAGE_VERSION),g' \ + -e 's,@PTHREAD_CFLAGS[@],$(PTHREAD_CFLAGS),g' \ + -e 's,@LIBS[@],$(LIBS),g' \ + < $(srcdir)/liblzma.pc.in > $@ || { rm -f $@; exit 1; } + +clean-local: + rm -f liblzma.pc diff --git a/src/native/external/xz/src/liblzma/api/Makefile.am b/src/native/external/xz/src/liblzma/api/Makefile.am new file mode 100644 index 00000000000000..4f91c77a60fba9 --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/Makefile.am @@ -0,0 +1,38 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +nobase_include_HEADERS = \ + lzma.h \ + lzma/base.h \ + lzma/bcj.h \ + lzma/block.h \ + lzma/check.h \ + lzma/container.h \ + lzma/delta.h \ + lzma/filter.h \ + lzma/hardware.h \ + lzma/index.h \ + lzma/index_hash.h \ + lzma/lzma12.h \ + lzma/stream_flags.h \ + lzma/version.h \ + lzma/vli.h + +if COND_DOXYGEN +$(top_builddir)/doc/api/index.html: $(top_srcdir)/doxygen/update-doxygen $(top_srcdir)/doxygen/Doxyfile $(nobase_include_HEADERS) + $(MKDIR_P) "$(top_builddir)/doc" + "$(top_srcdir)/doxygen/update-doxygen" api \ + "$(top_srcdir)" "$(top_builddir)/doc" + +all-local: $(top_builddir)/doc/api/index.html + +install-data-local: + $(MKDIR_P) "$(DESTDIR)$(docdir)/api" + $(INSTALL_DATA) "$(top_builddir)"/doc/api/* "$(DESTDIR)$(docdir)/api" + +uninstall-local: + rm -rf "$(DESTDIR)$(docdir)/api" + +clean-local: + rm -rf "$(top_builddir)/doc/api" +endif diff --git a/src/native/external/xz/src/liblzma/api/lzma.h b/src/native/external/xz/src/liblzma/api/lzma.h new file mode 100644 index 00000000000000..6ca6e503d8a6eb --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma.h @@ -0,0 +1,327 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file api/lzma.h + * \brief The public API of liblzma data compression library + * \mainpage + * + * liblzma is a general-purpose data compression library with a zlib-like API. + * The native file format is .xz, but also the old .lzma format and raw (no + * headers) streams are supported. Multiple compression algorithms (filters) + * are supported. Currently LZMA2 is the primary filter. + * + * liblzma is part of XZ Utils . XZ Utils + * includes a gzip-like command line tool named xz and some other tools. + * XZ Utils is developed and maintained by Lasse Collin. + * + * Major parts of liblzma are based on code written by Igor Pavlov, + * specifically the LZMA SDK . + * + * The SHA-256 implementation in liblzma is based on code written by + * Wei Dai in Crypto++ Library . + * + * liblzma is distributed under the BSD Zero Clause License (0BSD). + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H +#define LZMA_H + +/***************************** + * Required standard headers * + *****************************/ + +/* + * liblzma API headers need some standard types and macros. To allow + * including lzma.h without requiring the application to include other + * headers first, lzma.h includes the required standard headers unless + * they already seem to be included already or if LZMA_MANUAL_HEADERS + * has been defined. + * + * Here's what types and macros are needed and from which headers: + * - stddef.h: size_t, NULL + * - stdint.h: uint8_t, uint32_t, uint64_t, UINT32_C(n), uint64_C(n), + * UINT32_MAX, UINT64_MAX + * + * However, inttypes.h is a little more portable than stdint.h, although + * inttypes.h declares some unneeded things compared to plain stdint.h. + * + * The hacks below aren't perfect, specifically they assume that inttypes.h + * exists and that it typedefs at least uint8_t, uint32_t, and uint64_t, + * and that, in case of incomplete inttypes.h, unsigned int is 32-bit. + * If the application already takes care of setting up all the types and + * macros properly (for example by using gnulib's stdint.h or inttypes.h), + * we try to detect that the macros are already defined and don't include + * inttypes.h here again. However, you may define LZMA_MANUAL_HEADERS to + * force this file to never include any system headers. + * + * Some could argue that liblzma API should provide all the required types, + * for example lzma_uint64, LZMA_UINT64_C(n), and LZMA_UINT64_MAX. This was + * seen as an unnecessary mess, since most systems already provide all the + * necessary types and macros in the standard headers. + * + * Note that liblzma API still has lzma_bool, because using stdbool.h would + * break C89 and C++ programs on many systems. sizeof(bool) in C99 isn't + * necessarily the same as sizeof(bool) in C++. + */ + +#ifndef LZMA_MANUAL_HEADERS + /* + * I suppose this works portably also in C++. Note that in C++, + * we need to get size_t into the global namespace. + */ +# include + + /* + * Skip inttypes.h if we already have all the required macros. If we + * have the macros, we assume that we have the matching typedefs too. + */ +# if !defined(UINT32_C) || !defined(UINT64_C) \ + || !defined(UINT32_MAX) || !defined(UINT64_MAX) + /* + * MSVC versions older than 2013 have no C99 support, and + * thus they cannot be used to compile liblzma. Using an + * existing liblzma.dll with old MSVC can work though(*), + * but we need to define the required standard integer + * types here in a MSVC-specific way. + * + * (*) If you do this, the existing liblzma.dll probably uses + * a different runtime library than your MSVC-built + * application. Mixing runtimes is generally bad, but + * in this case it should work as long as you avoid + * the few rarely-needed liblzma functions that allocate + * memory and expect the caller to free it using free(). + */ +# if defined(_WIN32) && defined(_MSC_VER) && _MSC_VER < 1800 + typedef unsigned __int8 uint8_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else + /* Use the standard inttypes.h. */ +# ifdef __cplusplus + /* + * C99 sections 7.18.2 and 7.18.4 specify + * that C++ implementations define the limit + * and constant macros only if specifically + * requested. Note that if you want the + * format macros (PRIu64 etc.) too, you need + * to define __STDC_FORMAT_MACROS before + * including lzma.h, since re-including + * inttypes.h with __STDC_FORMAT_MACROS + * defined doesn't necessarily work. + */ +# ifndef __STDC_LIMIT_MACROS +# define __STDC_LIMIT_MACROS 1 +# endif +# ifndef __STDC_CONSTANT_MACROS +# define __STDC_CONSTANT_MACROS 1 +# endif +# endif + +# include +# endif + + /* + * Some old systems have only the typedefs in inttypes.h, and + * lack all the macros. For those systems, we need a few more + * hacks. We assume that unsigned int is 32-bit and unsigned + * long is either 32-bit or 64-bit. If these hacks aren't + * enough, the application has to setup the types manually + * before including lzma.h. + */ +# ifndef UINT32_C +# if defined(_WIN32) && defined(_MSC_VER) +# define UINT32_C(n) n ## UI32 +# else +# define UINT32_C(n) n ## U +# endif +# endif + +# ifndef UINT64_C +# if defined(_WIN32) && defined(_MSC_VER) +# define UINT64_C(n) n ## UI64 +# else + /* Get ULONG_MAX. */ +# include +# if ULONG_MAX == 4294967295UL +# define UINT64_C(n) n ## ULL +# else +# define UINT64_C(n) n ## UL +# endif +# endif +# endif + +# ifndef UINT32_MAX +# define UINT32_MAX (UINT32_C(4294967295)) +# endif + +# ifndef UINT64_MAX +# define UINT64_MAX (UINT64_C(18446744073709551615)) +# endif +# endif +#endif /* ifdef LZMA_MANUAL_HEADERS */ + + +/****************** + * LZMA_API macro * + ******************/ + +/* + * Some systems require that the functions and function pointers are + * declared specially in the headers. LZMA_API_IMPORT is for importing + * symbols and LZMA_API_CALL is to specify the calling convention. + * + * By default it is assumed that the application will link dynamically + * against liblzma. #define LZMA_API_STATIC in your application if you + * want to link against static liblzma. If you don't care about portability + * to operating systems like Windows, or at least don't care about linking + * against static liblzma on them, don't worry about LZMA_API_STATIC. That + * is, most developers will never need to use LZMA_API_STATIC. + * + * The GCC variants are a special case on Windows (Cygwin and MinGW-w64). + * We rely on GCC doing the right thing with its auto-import feature, + * and thus don't use __declspec(dllimport). This way developers don't + * need to worry about LZMA_API_STATIC. Also the calling convention is + * omitted on Cygwin but not on MinGW-w64. + */ +#ifndef LZMA_API_IMPORT +# if !defined(LZMA_API_STATIC) && defined(_WIN32) && !defined(__GNUC__) +# define LZMA_API_IMPORT __declspec(dllimport) +# else +# define LZMA_API_IMPORT +# endif +#endif + +#ifndef LZMA_API_CALL +# if defined(_WIN32) && !defined(__CYGWIN__) +# define LZMA_API_CALL __cdecl +# else +# define LZMA_API_CALL +# endif +#endif + +#ifndef LZMA_API +# define LZMA_API(type) LZMA_API_IMPORT type LZMA_API_CALL +#endif + + +/*********** + * nothrow * + ***********/ + +/* + * None of the functions in liblzma may throw an exception. Even + * the functions that use callback functions won't throw exceptions, + * because liblzma would break if a callback function threw an exception. + */ +#ifndef lzma_nothrow +# if defined(__cplusplus) +# if __cplusplus >= 201103L || (defined(_MSVC_LANG) \ + && _MSVC_LANG >= 201103L) +# define lzma_nothrow noexcept +# else +# define lzma_nothrow throw() +# endif +# elif defined(__GNUC__) && (__GNUC__ > 3 \ + || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) +# define lzma_nothrow __attribute__((__nothrow__)) +# else +# define lzma_nothrow +# endif +#endif + + +/******************** + * GNU C extensions * + ********************/ + +/* + * GNU C extensions are used conditionally in the public API. It doesn't + * break anything if these are sometimes enabled and sometimes not, only + * affects warnings and optimizations. + */ +#if defined(__GNUC__) && __GNUC__ >= 3 +# ifndef lzma_attribute +# define lzma_attribute(attr) __attribute__(attr) +# endif + + /* warn_unused_result was added in GCC 3.4. */ +# ifndef lzma_attr_warn_unused_result +# if __GNUC__ == 3 && __GNUC_MINOR__ < 4 +# define lzma_attr_warn_unused_result +# endif +# endif + +#else +# ifndef lzma_attribute +# define lzma_attribute(attr) +# endif +#endif + + +#ifndef lzma_attr_pure +# define lzma_attr_pure lzma_attribute((__pure__)) +#endif + +#ifndef lzma_attr_const +# define lzma_attr_const lzma_attribute((__const__)) +#endif + +#ifndef lzma_attr_warn_unused_result +# define lzma_attr_warn_unused_result \ + lzma_attribute((__warn_unused_result__)) +#endif + + +/************** + * Subheaders * + **************/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Subheaders check that this is defined. It is to prevent including + * them directly from applications. + */ +#define LZMA_H_INTERNAL 1 + +/* Basic features */ +#include "lzma/version.h" +#include "lzma/base.h" +#include "lzma/vli.h" +#include "lzma/check.h" + +/* Filters */ +#include "lzma/filter.h" +#include "lzma/bcj.h" +#include "lzma/delta.h" +#include "lzma/lzma12.h" + +/* Container formats */ +#include "lzma/container.h" + +/* Advanced features */ +#include "lzma/stream_flags.h" +#include "lzma/block.h" +#include "lzma/index.h" +#include "lzma/index_hash.h" + +/* Hardware information */ +#include "lzma/hardware.h" + +/* + * All subheaders included. Undefine LZMA_H_INTERNAL to prevent applications + * re-including the subheaders. + */ +#undef LZMA_H_INTERNAL + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef LZMA_H */ diff --git a/src/native/external/xz/src/liblzma/api/lzma/base.h b/src/native/external/xz/src/liblzma/api/lzma/base.h new file mode 100644 index 00000000000000..00499381426a84 --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/base.h @@ -0,0 +1,749 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/base.h + * \brief Data types and functions used in many places in liblzma API + * \note Never include this file directly. Use instead. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Boolean + * + * This is here because C89 doesn't have stdbool.h. To set a value for + * variables having type lzma_bool, you can use + * - C99's 'true' and 'false' from stdbool.h; + * - C++'s internal 'true' and 'false'; or + * - integers one (true) and zero (false). + */ +typedef unsigned char lzma_bool; + + +/** + * \brief Type of reserved enumeration variable in structures + * + * To avoid breaking library ABI when new features are added, several + * structures contain extra variables that may be used in future. Since + * sizeof(enum) can be different than sizeof(int), and sizeof(enum) may + * even vary depending on the range of enumeration constants, we specify + * a separate type to be used for reserved enumeration variables. All + * enumeration constants in liblzma API will be non-negative and less + * than 128, which should guarantee that the ABI won't break even when + * new constants are added to existing enumerations. + */ +typedef enum { + LZMA_RESERVED_ENUM = 0 +} lzma_reserved_enum; + + +/** + * \brief Return values used by several functions in liblzma + * + * Check the descriptions of specific functions to find out which return + * values they can return. With some functions the return values may have + * more specific meanings than described here; those differences are + * described per-function basis. + */ +typedef enum { + LZMA_OK = 0, + /**< + * \brief Operation completed successfully + */ + + LZMA_STREAM_END = 1, + /**< + * \brief End of stream was reached + * + * In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or + * LZMA_FINISH was finished. In decoder, this indicates + * that all the data was successfully decoded. + * + * In all cases, when LZMA_STREAM_END is returned, the last + * output bytes should be picked from strm->next_out. + */ + + LZMA_NO_CHECK = 2, + /**< + * \brief Input stream has no integrity check + * + * This return value can be returned only if the + * LZMA_TELL_NO_CHECK flag was used when initializing + * the decoder. LZMA_NO_CHECK is just a warning, and + * the decoding can be continued normally. + * + * It is possible to call lzma_get_check() immediately after + * lzma_code has returned LZMA_NO_CHECK. The result will + * naturally be LZMA_CHECK_NONE, but the possibility to call + * lzma_get_check() may be convenient in some applications. + */ + + LZMA_UNSUPPORTED_CHECK = 3, + /**< + * \brief Cannot calculate the integrity check + * + * The usage of this return value is different in encoders + * and decoders. + * + * Encoders can return this value only from the initialization + * function. If initialization fails with this value, the + * encoding cannot be done, because there's no way to produce + * output with the correct integrity check. + * + * Decoders can return this value only from lzma_code() and + * only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when + * initializing the decoder. The decoding can still be + * continued normally even if the check type is unsupported, + * but naturally the check will not be validated, and possible + * errors may go undetected. + * + * With decoder, it is possible to call lzma_get_check() + * immediately after lzma_code() has returned + * LZMA_UNSUPPORTED_CHECK. This way it is possible to find + * out what the unsupported Check ID was. + */ + + LZMA_GET_CHECK = 4, + /**< + * \brief Integrity check type is now available + * + * This value can be returned only by the lzma_code() function + * and only if the decoder was initialized with the + * LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the + * application that it may now call lzma_get_check() to find + * out the Check ID. This can be used, for example, to + * implement a decoder that accepts only files that have + * strong enough integrity check. + */ + + LZMA_MEM_ERROR = 5, + /**< + * \brief Cannot allocate memory + * + * Memory allocation failed, or the size of the allocation + * would be greater than SIZE_MAX. + * + * Due to internal implementation reasons, the coding cannot + * be continued even if more memory were made available after + * LZMA_MEM_ERROR. + */ + + LZMA_MEMLIMIT_ERROR = 6, + /**< + * \brief Memory usage limit was reached + * + * Decoder would need more memory than allowed by the + * specified memory usage limit. To continue decoding, + * the memory usage limit has to be increased with + * lzma_memlimit_set(). + * + * liblzma 5.2.6 and earlier had a bug in single-threaded .xz + * decoder (lzma_stream_decoder()) which made it impossible + * to continue decoding after LZMA_MEMLIMIT_ERROR even if + * the limit was increased using lzma_memlimit_set(). + * Other decoders worked correctly. + */ + + LZMA_FORMAT_ERROR = 7, + /**< + * \brief File format not recognized + * + * The decoder did not recognize the input as supported file + * format. This error can occur, for example, when trying to + * decode .lzma format file with lzma_stream_decoder, + * because lzma_stream_decoder accepts only the .xz format. + */ + + LZMA_OPTIONS_ERROR = 8, + /**< + * \brief Invalid or unsupported options + * + * Invalid or unsupported options, for example + * - unsupported filter(s) or filter options; or + * - reserved bits set in headers (decoder only). + * + * Rebuilding liblzma with more features enabled, or + * upgrading to a newer version of liblzma may help. + */ + + LZMA_DATA_ERROR = 9, + /**< + * \brief Data is corrupt + * + * The usage of this return value is different in encoders + * and decoders. In both encoder and decoder, the coding + * cannot continue after this error. + * + * Encoders return this if size limits of the target file + * format would be exceeded. These limits are huge, thus + * getting this error from an encoder is mostly theoretical. + * For example, the maximum compressed and uncompressed + * size of a .xz Stream is roughly 8 EiB (2^63 bytes). + * + * Decoders return this error if the input data is corrupt. + * This can mean, for example, invalid CRC32 in headers + * or invalid check of uncompressed data. + */ + + LZMA_BUF_ERROR = 10, + /**< + * \brief No progress is possible + * + * This error code is returned when the coder cannot consume + * any new input and produce any new output. The most common + * reason for this error is that the input stream being + * decoded is truncated or corrupt. + * + * This error is not fatal. Coding can be continued normally + * by providing more input and/or more output space, if + * possible. + * + * Typically the first call to lzma_code() that can do no + * progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only + * the second consecutive call doing no progress will return + * LZMA_BUF_ERROR. This is intentional. + * + * With zlib, Z_BUF_ERROR may be returned even if the + * application is doing nothing wrong, so apps will need + * to handle Z_BUF_ERROR specially. The above hack + * guarantees that liblzma never returns LZMA_BUF_ERROR + * to properly written applications unless the input file + * is truncated or corrupt. This should simplify the + * applications a little. + */ + + LZMA_PROG_ERROR = 11, + /**< + * \brief Programming error + * + * This indicates that the arguments given to the function are + * invalid or the internal state of the decoder is corrupt. + * - Function arguments are invalid or the structures + * pointed by the argument pointers are invalid + * e.g. if strm->next_out has been set to NULL and + * strm->avail_out > 0 when calling lzma_code(). + * - lzma_* functions have been called in wrong order + * e.g. lzma_code() was called right after lzma_end(). + * - If errors occur randomly, the reason might be flaky + * hardware. + * + * If you think that your code is correct, this error code + * can be a sign of a bug in liblzma. See the documentation + * how to report bugs. + */ + + LZMA_SEEK_NEEDED = 12, + /**< + * \brief Request to change the input file position + * + * Some coders can do random access in the input file. The + * initialization functions of these coders take the file size + * as an argument. No other coders can return LZMA_SEEK_NEEDED. + * + * When this value is returned, the application must seek to + * the file position given in lzma_stream.seek_pos. This value + * is guaranteed to never exceed the file size that was + * specified at the coder initialization. + * + * After seeking the application should read new input and + * pass it normally via lzma_stream.next_in and .avail_in. + */ + + /* + * These enumerations may be used internally by liblzma + * but they will never be returned to applications. + */ + LZMA_RET_INTERNAL1 = 101, + LZMA_RET_INTERNAL2 = 102, + LZMA_RET_INTERNAL3 = 103, + LZMA_RET_INTERNAL4 = 104, + LZMA_RET_INTERNAL5 = 105, + LZMA_RET_INTERNAL6 = 106, + LZMA_RET_INTERNAL7 = 107, + LZMA_RET_INTERNAL8 = 108 +} lzma_ret; + + +/** + * \brief The 'action' argument for lzma_code() + * + * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, LZMA_FULL_BARRIER, + * or LZMA_FINISH, the same 'action' must be used until lzma_code() returns + * LZMA_STREAM_END. Also, the amount of input (that is, strm->avail_in) must + * not be modified by the application until lzma_code() returns + * LZMA_STREAM_END. Changing the 'action' or modifying the amount of input + * will make lzma_code() return LZMA_PROG_ERROR. + */ +typedef enum { + LZMA_RUN = 0, + /**< + * \brief Continue coding + * + * Encoder: Encode as much input as possible. Some internal + * buffering will probably be done (depends on the filter + * chain in use), which causes latency: the input used won't + * usually be decodeable from the output of the same + * lzma_code() call. + * + * Decoder: Decode as much input as possible and produce as + * much output as possible. + */ + + LZMA_SYNC_FLUSH = 1, + /**< + * \brief Make all the input available at output + * + * Normally the encoder introduces some latency. + * LZMA_SYNC_FLUSH forces all the buffered data to be + * available at output without resetting the internal + * state of the encoder. This way it is possible to use + * compressed stream for example for communication over + * network. + * + * Only some filters support LZMA_SYNC_FLUSH. Trying to use + * LZMA_SYNC_FLUSH with filters that don't support it will + * make lzma_code() return LZMA_OPTIONS_ERROR. For example, + * LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does. + * + * Using LZMA_SYNC_FLUSH very often can dramatically reduce + * the compression ratio. With some filters (for example, + * LZMA2), fine-tuning the compression options may help + * mitigate this problem significantly (for example, + * match finder with LZMA2). + * + * Decoders don't support LZMA_SYNC_FLUSH. + */ + + LZMA_FULL_FLUSH = 2, + /**< + * \brief Finish encoding of the current Block + * + * All the input data going to the current Block must have + * been given to the encoder (the last bytes can still be + * pending in *next_in). Call lzma_code() with LZMA_FULL_FLUSH + * until it returns LZMA_STREAM_END. Then continue normally + * with LZMA_RUN or finish the Stream with LZMA_FINISH. + * + * This action is currently supported only by Stream encoder + * and easy encoder (which uses Stream encoder). If there is + * no unfinished Block, no empty Block is created. + */ + + LZMA_FULL_BARRIER = 4, + /**< + * \brief Finish encoding of the current Block + * + * This is like LZMA_FULL_FLUSH except that this doesn't + * necessarily wait until all the input has been made + * available via the output buffer. That is, lzma_code() + * might return LZMA_STREAM_END as soon as all the input + * has been consumed (avail_in == 0). + * + * LZMA_FULL_BARRIER is useful with a threaded encoder if + * one wants to split the .xz Stream into Blocks at specific + * offsets but doesn't care if the output isn't flushed + * immediately. Using LZMA_FULL_BARRIER allows keeping + * the threads busy while LZMA_FULL_FLUSH would make + * lzma_code() wait until all the threads have finished + * until more data could be passed to the encoder. + * + * With a lzma_stream initialized with the single-threaded + * lzma_stream_encoder() or lzma_easy_encoder(), + * LZMA_FULL_BARRIER is an alias for LZMA_FULL_FLUSH. + */ + + LZMA_FINISH = 3 + /**< + * \brief Finish the coding operation + * + * All the input data must have been given to the encoder + * (the last bytes can still be pending in next_in). + * Call lzma_code() with LZMA_FINISH until it returns + * LZMA_STREAM_END. Once LZMA_FINISH has been used, + * the amount of input must no longer be changed by + * the application. + * + * When decoding, using LZMA_FINISH is optional unless the + * LZMA_CONCATENATED flag was used when the decoder was + * initialized. When LZMA_CONCATENATED was not used, the only + * effect of LZMA_FINISH is that the amount of input must not + * be changed just like in the encoder. + */ +} lzma_action; + + +/** + * \brief Custom functions for memory handling + * + * A pointer to lzma_allocator may be passed via lzma_stream structure + * to liblzma, and some advanced functions take a pointer to lzma_allocator + * as a separate function argument. The library will use the functions + * specified in lzma_allocator for memory handling instead of the default + * malloc() and free(). C++ users should note that the custom memory + * handling functions must not throw exceptions. + * + * Single-threaded mode only: liblzma doesn't make an internal copy of + * lzma_allocator. Thus, it is OK to change these function pointers in + * the middle of the coding process, but obviously it must be done + * carefully to make sure that the replacement 'free' can deallocate + * memory allocated by the earlier 'alloc' function(s). + * + * Multithreaded mode: liblzma might internally store pointers to the + * lzma_allocator given via the lzma_stream structure. The application + * must not change the allocator pointer in lzma_stream or the contents + * of the pointed lzma_allocator structure until lzma_end() has been used + * to free the memory associated with that lzma_stream. The allocation + * functions might be called simultaneously from multiple threads, and + * thus they must be thread safe. + */ +typedef struct { + /** + * \brief Pointer to a custom memory allocation function + * + * If you don't want a custom allocator, but still want + * custom free(), set this to NULL and liblzma will use + * the standard malloc(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param nmemb Number of elements like in calloc(). liblzma + * will always set nmemb to 1, so it is safe to + * ignore nmemb in a custom allocator if you like. + * The nmemb argument exists only for + * compatibility with zlib and libbzip2. + * \param size Size of an element in bytes. + * liblzma never sets this to zero. + * + * \return Pointer to the beginning of a memory block of + * 'size' bytes, or NULL if allocation fails + * for some reason. When allocation fails, functions + * of liblzma return LZMA_MEM_ERROR. + * + * The allocator should not waste time zeroing the allocated buffers. + * This is not only about speed, but also memory usage, since the + * operating system kernel doesn't necessarily allocate the requested + * memory in physical memory until it is actually used. With small + * input files, liblzma may actually need only a fraction of the + * memory that it requested for allocation. + * + * \note LZMA_MEM_ERROR is also used when the size of the + * allocation would be greater than SIZE_MAX. Thus, + * don't assume that the custom allocator must have + * returned NULL if some function from liblzma + * returns LZMA_MEM_ERROR. + */ + void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size); + + /** + * \brief Pointer to a custom memory freeing function + * + * If you don't want a custom freeing function, but still + * want a custom allocator, set this to NULL and liblzma + * will use the standard free(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param ptr Pointer returned by lzma_allocator.alloc(), + * or when it is set to NULL, a pointer returned + * by the standard malloc(). In addition, NULL + * is a possible value. The function should do + * nothing when ptr == NULL. + */ + void (LZMA_API_CALL *free)(void *opaque, void *ptr); + + /** + * \brief Pointer passed to .alloc() and .free() + * + * opaque is passed as the first argument to lzma_allocator.alloc() + * and lzma_allocator.free(). This intended to ease implementing + * custom memory allocation functions for use with liblzma. + * + * If you don't need this, you should set this to NULL. + */ + void *opaque; + +} lzma_allocator; + + +/** + * \brief Internal data structure + * + * The contents of this structure is not visible outside the library. + */ +typedef struct lzma_internal_s lzma_internal; + + +/** + * \brief Passing data to and from liblzma + * + * The lzma_stream structure is used for + * - passing pointers to input and output buffers to liblzma; + * - defining custom memory handler functions; and + * - holding a pointer to coder-specific internal data structures. + * + * Typical usage: + * + * - After allocating lzma_stream (on stack or with malloc()), it must be + * initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details). + * + * - Initialize a coder to the lzma_stream, for example by using + * lzma_easy_encoder() or lzma_auto_decoder(). Some notes: + * - In contrast to zlib, strm->next_in and strm->next_out are + * ignored by all initialization functions, thus it is safe + * to not initialize them yet. + * - The initialization functions always set strm->total_in and + * strm->total_out to zero. + * - If the initialization function fails, no memory is left allocated + * that would require freeing with lzma_end() even if some memory was + * associated with the lzma_stream structure when the initialization + * function was called. + * + * - Use lzma_code() to do the actual work. + * + * - Once the coding has been finished, the existing lzma_stream can be + * reused. It is OK to reuse lzma_stream with different initialization + * function without calling lzma_end() first. Old allocations are + * automatically freed. + * + * - Finally, use lzma_end() to free the allocated memory. lzma_end() never + * frees the lzma_stream structure itself. + * + * Application may modify the values of total_in and total_out as it wants. + * They are updated by liblzma to match the amount of data read and + * written but aren't used for anything else except as a possible return + * values from lzma_get_progress(). + */ +typedef struct { + const uint8_t *next_in; /**< Pointer to the next input byte. */ + size_t avail_in; /**< Number of available input bytes in next_in. */ + uint64_t total_in; /**< Total number of bytes read by liblzma. */ + + uint8_t *next_out; /**< Pointer to the next output position. */ + size_t avail_out; /**< Amount of free space in next_out. */ + uint64_t total_out; /**< Total number of bytes written by liblzma. */ + + /** + * \brief Custom memory allocation functions + * + * In most cases this is NULL which makes liblzma use + * the standard malloc() and free(). + * + * \note In 5.0.x this is not a const pointer. + */ + const lzma_allocator *allocator; + + /** Internal state is not visible to applications. */ + lzma_internal *internal; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. Excluding the initialization of this structure, + * you should not touch these, because the names of these variables + * may change. + */ + + /** \private Reserved member. */ + void *reserved_ptr1; + + /** \private Reserved member. */ + void *reserved_ptr2; + + /** \private Reserved member. */ + void *reserved_ptr3; + + /** \private Reserved member. */ + void *reserved_ptr4; + + /** + * \brief New seek input position for LZMA_SEEK_NEEDED + * + * When lzma_code() returns LZMA_SEEK_NEEDED, the new input position + * needed by liblzma will be available in seek_pos. The value is + * guaranteed to not exceed the file size that was specified when + * this lzma_stream was initialized. + * + * In all other situations the value of this variable is undefined. + */ + uint64_t seek_pos; + + /** \private Reserved member. */ + uint64_t reserved_int2; + + /** \private Reserved member. */ + size_t reserved_int3; + + /** \private Reserved member. */ + size_t reserved_int4; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum1; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum2; + +} lzma_stream; + + +/** + * \brief Initialization for lzma_stream + * + * When you declare an instance of lzma_stream, you can immediately + * initialize it so that initialization functions know that no memory + * has been allocated yet: + * + * lzma_stream strm = LZMA_STREAM_INIT; + * + * If you need to initialize a dynamically allocated lzma_stream, you can use + * memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this + * violates the C standard since NULL may have different internal + * representation than zero, but it should be portable enough in practice. + * Anyway, for maximum portability, you can use something like this: + * + * lzma_stream tmp = LZMA_STREAM_INIT; + * *strm = tmp; + */ +#define LZMA_STREAM_INIT \ + { NULL, 0, 0, NULL, 0, 0, NULL, NULL, \ + NULL, NULL, NULL, NULL, 0, 0, 0, 0, \ + LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM } + + +/** + * \brief Encode or decode data + * + * Once the lzma_stream has been successfully initialized (e.g. with + * lzma_stream_encoder()), the actual encoding or decoding is done + * using this function. The application has to update strm->next_in, + * strm->avail_in, strm->next_out, and strm->avail_out to pass input + * to and get output from liblzma. + * + * See the description of the coder-specific initialization function to find + * out what 'action' values are supported by the coder. + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param action Action for this function to take. Must be a valid + * lzma_action enum value. + * + * \return Any valid lzma_ret. See the lzma_ret enum description for more + * information. + */ +extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Free memory allocated for the coder data structures + * + * After lzma_end(strm), strm->internal is guaranteed to be NULL. No other + * members of the lzma_stream structure are touched. + * + * \note zlib indicates an error if application end()s unfinished + * stream structure. liblzma doesn't do this, and assumes that + * application knows what it is doing. + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + */ +extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow; + + +/** + * \brief Get progress information + * + * In single-threaded mode, applications can get progress information from + * strm->total_in and strm->total_out. In multi-threaded mode this is less + * useful because a significant amount of both input and output data gets + * buffered internally by liblzma. This makes total_in and total_out give + * misleading information and also makes the progress indicator updates + * non-smooth. + * + * This function gives realistic progress information also in multi-threaded + * mode by taking into account the progress made by each thread. In + * single-threaded mode *progress_in and *progress_out are set to + * strm->total_in and strm->total_out, respectively. + * + * \param strm Pointer to lzma_stream that is at least + * initialized with LZMA_STREAM_INIT. + * \param[out] progress_in Pointer to the number of input bytes processed. + * \param[out] progress_out Pointer to the number of output bytes processed. + */ +extern LZMA_API(void) lzma_get_progress(lzma_stream *strm, + uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow; + + +/** + * \brief Get the memory usage of decoder filter chain + * + * This function is currently supported only when *strm has been initialized + * with a function that takes a memlimit argument. With other functions, you + * should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage() + * to estimate the memory requirements. + * + * This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big + * the memory usage limit should have been to decode the input. Note that + * this may give misleading information if decoding .xz Streams that have + * multiple Blocks, because each Block can have different memory requirements. + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * + * \return How much memory is currently allocated for the filter + * decoders. If no filter chain is currently allocated, + * some non-zero value is still returned, which is less than + * or equal to what any filter chain would indicate as its + * memory requirement. + * + * If this function isn't supported by *strm or some other error + * occurs, zero is returned. + */ +extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the current memory usage limit + * + * This function is supported only when *strm has been initialized with + * a function that takes a memlimit argument. + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * + * \return On success, the current memory usage limit is returned + * (always non-zero). On error, zero is returned. + */ +extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Set the memory usage limit + * + * This function is supported only when *strm has been initialized with + * a function that takes a memlimit argument. + * + * liblzma 5.2.3 and earlier has a bug where memlimit value of 0 causes + * this function to do nothing (leaving the limit unchanged) and still + * return LZMA_OK. Later versions treat 0 as if 1 had been specified (so + * lzma_memlimit_get() will return 1 even if you specify 0 here). + * + * liblzma 5.2.6 and earlier had a bug in single-threaded .xz decoder + * (lzma_stream_decoder()) which made it impossible to continue decoding + * after LZMA_MEMLIMIT_ERROR even if the limit was increased using + * lzma_memlimit_set(). Other decoders worked correctly. + * + * \return Possible lzma_ret values: + * - LZMA_OK: New memory usage limit successfully set. + * - LZMA_MEMLIMIT_ERROR: The new limit is too small. + * The limit was not changed. + * - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't + * support memory usage limit. + */ +extern LZMA_API(lzma_ret) lzma_memlimit_set( + lzma_stream *strm, uint64_t memlimit) lzma_nothrow; diff --git a/src/native/external/xz/src/liblzma/api/lzma/bcj.h b/src/native/external/xz/src/liblzma/api/lzma/bcj.h new file mode 100644 index 00000000000000..fb737cbba49c7c --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/bcj.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/bcj.h + * \brief Branch/Call/Jump conversion filters + * \note Never include this file directly. Use instead. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/* Filter IDs for lzma_filter.id */ + +/** + * \brief Filter for x86 binaries + */ +#define LZMA_FILTER_X86 LZMA_VLI_C(0x04) + +/** + * \brief Filter for Big endian PowerPC binaries + */ +#define LZMA_FILTER_POWERPC LZMA_VLI_C(0x05) + +/** + * \brief Filter for IA-64 (Itanium) binaries + */ +#define LZMA_FILTER_IA64 LZMA_VLI_C(0x06) + +/** + * \brief Filter for ARM binaries + */ +#define LZMA_FILTER_ARM LZMA_VLI_C(0x07) + +/** + * \brief Filter for ARM-Thumb binaries + */ +#define LZMA_FILTER_ARMTHUMB LZMA_VLI_C(0x08) + +/** + * \brief Filter for SPARC binaries + */ +#define LZMA_FILTER_SPARC LZMA_VLI_C(0x09) + +/** + * \brief Filter for ARM64 binaries + */ +#define LZMA_FILTER_ARM64 LZMA_VLI_C(0x0A) + +/** + * \brief Filter for RISC-V binaries + */ +#define LZMA_FILTER_RISCV LZMA_VLI_C(0x0B) + + +/** + * \brief Options for BCJ filters + * + * The BCJ filters never change the size of the data. Specifying options + * for them is optional: if pointer to options is NULL, default value is + * used. You probably never need to specify options to BCJ filters, so just + * set the options pointer to NULL and be happy. + * + * If options with non-default values have been specified when encoding, + * the same options must also be specified when decoding. + * + * \note At the moment, none of the BCJ filters support + * LZMA_SYNC_FLUSH. If LZMA_SYNC_FLUSH is specified, + * LZMA_OPTIONS_ERROR will be returned. If there is need, + * partial support for LZMA_SYNC_FLUSH can be added in future. + * Partial means that flushing would be possible only at + * offsets that are multiple of 2, 4, or 16 depending on + * the filter, except x86 which cannot be made to support + * LZMA_SYNC_FLUSH predictably. + */ +typedef struct { + /** + * \brief Start offset for conversions + * + * This setting is useful only when the same filter is used + * _separately_ for multiple sections of the same executable file, + * and the sections contain cross-section branch/call/jump + * instructions. In that case it is beneficial to set the start + * offset of the non-first sections so that the relative addresses + * of the cross-section branch/call/jump instructions will use the + * same absolute addresses as in the first section. + * + * When the pointer to options is NULL, the default value (zero) + * is used. + */ + uint32_t start_offset; + +} lzma_options_bcj; + + +/** + * \brief Raw ARM64 BCJ encoder + * + * This is for special use cases only. + * + * \param start_offset The lowest 32 bits of the offset in the + * executable being filtered. For the ARM64 + * filter, this must be a multiple of four. + * For the very best results, this should also + * be in sync with 4096-byte page boundaries + * in the executable due to how ARM64's ADRP + * instruction works. + * \param buf Buffer to be filtered in place + * \param size Size of the buffer + * + * \return Number of bytes that were processed in `buf`. This is at most + * `size`. With the ARM64 filter, the return value is always + * a multiple of 4, and at most 3 bytes are left unfiltered. + * + * \since 5.7.1alpha + */ +extern LZMA_API(size_t) lzma_bcj_arm64_encode( + uint32_t start_offset, uint8_t *buf, size_t size) lzma_nothrow; + +/** + * \brief Raw ARM64 BCJ decoder + * + * See lzma_bcj_arm64_encode(). + * + * \since 5.7.1alpha + */ +extern LZMA_API(size_t) lzma_bcj_arm64_decode( + uint32_t start_offset, uint8_t *buf, size_t size) lzma_nothrow; + + +/** + * \brief Raw RISC-V BCJ encoder + * + * This is for special use cases only. + * + * \param start_offset The lowest 32 bits of the offset in the + * executable being filtered. For the RISC-V + * filter, this must be a multiple of 2. + * \param buf Buffer to be filtered in place + * \param size Size of the buffer + * + * \return Number of bytes that were processed in `buf`. This is at most + * `size`. With the RISC-V filter, the return value is always + * a multiple of 2, and at most 7 bytes are left unfiltered. + * + * \since 5.7.1alpha + */ +extern LZMA_API(size_t) lzma_bcj_riscv_encode( + uint32_t start_offset, uint8_t *buf, size_t size) lzma_nothrow; + +/** + * \brief Raw RISC-V BCJ decoder + * + * See lzma_bcj_riscv_encode(). + * + * \since 5.7.1alpha + */ +extern LZMA_API(size_t) lzma_bcj_riscv_decode( + uint32_t start_offset, uint8_t *buf, size_t size) lzma_nothrow; + + +/** + * \brief Raw x86 BCJ encoder + * + * This is for special use cases only. + * + * \param start_offset The lowest 32 bits of the offset in the + * executable being filtered. For the x86 + * filter, all values are valid. + * \param buf Buffer to be filtered in place + * \param size Size of the buffer + * + * \return Number of bytes that were processed in `buf`. This is at most + * `size`. For the x86 filter, the return value is always + * a multiple of 1, and at most 4 bytes are left unfiltered. + * + * \since 5.7.1alpha + */ +extern LZMA_API(size_t) lzma_bcj_x86_encode( + uint32_t start_offset, uint8_t *buf, size_t size) lzma_nothrow; + +/** + * \brief Raw x86 BCJ decoder + * + * See lzma_bcj_x86_encode(). + * + * \since 5.7.1alpha + */ +extern LZMA_API(size_t) lzma_bcj_x86_decode( + uint32_t start_offset, uint8_t *buf, size_t size) lzma_nothrow; diff --git a/src/native/external/xz/src/liblzma/api/lzma/block.h b/src/native/external/xz/src/liblzma/api/lzma/block.h new file mode 100644 index 00000000000000..05b77e59aabbbd --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/block.h @@ -0,0 +1,694 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/block.h + * \brief .xz Block handling + * \note Never include this file directly. Use instead. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Options for the Block and Block Header encoders and decoders + * + * Different Block handling functions use different parts of this structure. + * Some read some members, other functions write, and some do both. Only the + * members listed for reading need to be initialized when the specified + * functions are called. The members marked for writing will be assigned + * new values at some point either by calling the given function or by + * later calls to lzma_code(). + */ +typedef struct { + /** + * \brief Block format version + * + * To prevent API and ABI breakages when new features are needed, + * a version number is used to indicate which members in this + * structure are in use: + * - liblzma >= 5.0.0: version = 0 is supported. + * - liblzma >= 5.1.4beta: Support for version = 1 was added, + * which adds the ignore_check member. + * + * If version is greater than one, most Block related functions + * will return LZMA_OPTIONS_ERROR (lzma_block_header_decode() works + * with any version value). + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_uncomp_encode() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode() + */ + uint32_t version; + + /** + * \brief Size of the Block Header field in bytes + * + * This is always a multiple of four. + * + * Read by: + * - lzma_block_header_encode() + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_size() + * - lzma_block_buffer_encode() + * - lzma_block_uncomp_encode() + */ + uint32_t header_size; +# define LZMA_BLOCK_HEADER_SIZE_MIN 8 +# define LZMA_BLOCK_HEADER_SIZE_MAX 1024 + + /** + * \brief Type of integrity Check + * + * The Check ID is not stored into the Block Header, thus its value + * must be provided also when decoding. + * + * Read by: + * - lzma_block_header_encode() + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_check check; + + /** + * \brief Size of the Compressed Data in bytes + * + * Encoding: If this is not LZMA_VLI_UNKNOWN, Block Header encoder + * will store this value to the Block Header. Block encoder doesn't + * care about this value, but will set it once the encoding has been + * finished. + * + * Decoding: If this is not LZMA_VLI_UNKNOWN, Block decoder will + * verify that the size of the Compressed Data field matches + * compressed_size. + * + * Usually you don't know this value when encoding in streamed mode, + * and thus cannot write this field into the Block Header. + * + * In non-streamed mode you can reserve space for this field before + * encoding the actual Block. After encoding the data, finish the + * Block by encoding the Block Header. Steps in detail: + * + * - Set compressed_size to some big enough value. If you don't know + * better, use LZMA_VLI_MAX, but remember that bigger values take + * more space in Block Header. + * + * - Call lzma_block_header_size() to see how much space you need to + * reserve for the Block Header. + * + * - Encode the Block using lzma_block_encoder() and lzma_code(). + * It sets compressed_size to the correct value. + * + * - Use lzma_block_header_encode() to encode the Block Header. + * Because space was reserved in the first step, you don't need + * to call lzma_block_header_size() anymore, because due to + * reserving, header_size has to be big enough. If it is "too big", + * lzma_block_header_encode() will add enough Header Padding to + * make Block Header to match the size specified by header_size. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_uncomp_encode() + * - lzma_block_buffer_decode() + */ + lzma_vli compressed_size; + + /** + * \brief Uncompressed Size in bytes + * + * This is handled very similarly to compressed_size above. + * + * uncompressed_size is needed by fewer functions than + * compressed_size. This is because uncompressed_size isn't + * needed to validate that Block stays within proper limits. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_uncomp_encode() + * - lzma_block_buffer_decode() + */ + lzma_vli uncompressed_size; + + /** + * \brief Array of filters + * + * There can be 1-4 filters. The end of the array is marked with + * .id = LZMA_VLI_UNKNOWN. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode(): Note that this does NOT free() + * the old filter options structures. All unused filters[] will + * have .id == LZMA_VLI_UNKNOWN and .options == NULL. If + * decoding fails, all filters[] are guaranteed to be + * LZMA_VLI_UNKNOWN and NULL. + * + * \note Because of the array is terminated with + * .id = LZMA_VLI_UNKNOWN, the actual array must + * have LZMA_FILTERS_MAX + 1 members or the Block + * Header decoder will overflow the buffer. + */ + lzma_filter *filters; + + /** + * \brief Raw value stored in the Check field + * + * After successful coding, the first lzma_check_size(check) bytes + * of this array contain the raw value stored in the Check field. + * + * Note that CRC32 and CRC64 are stored in little endian byte order. + * Take it into account if you display the Check values to the user. + * + * Written by: + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_uncomp_encode() + * - lzma_block_buffer_decode() + */ + uint8_t raw_check[LZMA_CHECK_SIZE_MAX]; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + + /** \private Reserved member. */ + void *reserved_ptr1; + + /** \private Reserved member. */ + void *reserved_ptr2; + + /** \private Reserved member. */ + void *reserved_ptr3; + + /** \private Reserved member. */ + uint32_t reserved_int1; + + /** \private Reserved member. */ + uint32_t reserved_int2; + + /** \private Reserved member. */ + lzma_vli reserved_int3; + + /** \private Reserved member. */ + lzma_vli reserved_int4; + + /** \private Reserved member. */ + lzma_vli reserved_int5; + + /** \private Reserved member. */ + lzma_vli reserved_int6; + + /** \private Reserved member. */ + lzma_vli reserved_int7; + + /** \private Reserved member. */ + lzma_vli reserved_int8; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum1; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum2; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum3; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum4; + + /** + * \brief A flag to Block decoder to not verify the Check field + * + * This member is supported by liblzma >= 5.1.4beta if .version >= 1. + * + * If this is set to true, the integrity check won't be calculated + * and verified. Unless you know what you are doing, you should + * leave this to false. (A reason to set this to true is when the + * file integrity is verified externally anyway and you want to + * speed up the decompression, which matters mostly when using + * SHA-256 as the integrity check.) + * + * If .version >= 1, read by: + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by (.version is ignored): + * - lzma_block_header_decode() always sets this to false + */ + lzma_bool ignore_check; + + /** \private Reserved member. */ + lzma_bool reserved_bool2; + + /** \private Reserved member. */ + lzma_bool reserved_bool3; + + /** \private Reserved member. */ + lzma_bool reserved_bool4; + + /** \private Reserved member. */ + lzma_bool reserved_bool5; + + /** \private Reserved member. */ + lzma_bool reserved_bool6; + + /** \private Reserved member. */ + lzma_bool reserved_bool7; + + /** \private Reserved member. */ + lzma_bool reserved_bool8; + +} lzma_block; + + +/** + * \brief Decode the Block Header Size field + * + * To decode Block Header using lzma_block_header_decode(), the size of the + * Block Header has to be known and stored into lzma_block.header_size. + * The size can be calculated from the first byte of a Block using this macro. + * Note that if the first byte is 0x00, it indicates beginning of Index; use + * this macro only when the byte is not 0x00. + * + * There is no encoding macro because lzma_block_header_size() and + * lzma_block_header_encode() should be used. + */ +#define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4) + + +/** + * \brief Calculate Block Header Size + * + * Calculate the minimum size needed for the Block Header field using the + * settings specified in the lzma_block structure. Note that it is OK to + * increase the calculated header_size value as long as it is a multiple of + * four and doesn't exceed LZMA_BLOCK_HEADER_SIZE_MAX. Increasing header_size + * just means that lzma_block_header_encode() will add Header Padding. + * + * \note This doesn't check that all the options are valid i.e. this + * may return LZMA_OK even if lzma_block_header_encode() or + * lzma_block_encoder() would fail. If you want to validate the + * filter chain, consider using lzma_memlimit_encoder() which as + * a side-effect validates the filter chain. + * + * \param block Block options + * + * \return Possible lzma_ret values: + * - LZMA_OK: Size calculated successfully and stored to + * block->header_size. + * - LZMA_OPTIONS_ERROR: Unsupported version, filters or + * filter options. + * - LZMA_PROG_ERROR: Invalid values like compressed_size == 0. + */ +extern LZMA_API(lzma_ret) lzma_block_header_size(lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Block Header + * + * The caller must have calculated the size of the Block Header already with + * lzma_block_header_size(). If a value larger than the one calculated by + * lzma_block_header_size() is used, the Block Header will be padded to the + * specified size. + * + * \param block Block options to be encoded. + * \param[out] out Beginning of the output buffer. This must be + * at least block->header_size bytes. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Encoding was successful. block->header_size + * bytes were written to output buffer. + * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR: Invalid arguments, for example + * block->header_size is invalid or block->filters is NULL. + */ +extern LZMA_API(lzma_ret) lzma_block_header_encode( + const lzma_block *block, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Block Header + * + * block->version should (usually) be set to the highest value supported + * by the application. If the application sets block->version to a value + * higher than supported by the current liblzma version, this function will + * downgrade block->version to the highest value supported by it. Thus one + * should check the value of block->version after calling this function if + * block->version was set to a non-zero value and the application doesn't + * otherwise know that the liblzma version being used is new enough to + * support the specified block->version. + * + * The size of the Block Header must have already been decoded with + * lzma_block_header_size_decode() macro and stored to block->header_size. + * + * The integrity check type from Stream Header must have been stored + * to block->check. + * + * block->filters must have been allocated, but they don't need to be + * initialized (possible existing filter options are not freed). + * + * \param[out] block Destination for Block options + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() (and also free() + * if an error occurs). + * \param in Beginning of the input buffer. This must be + * at least block->header_size bytes. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Decoding was successful. block->header_size + * bytes were read from the input buffer. + * - LZMA_OPTIONS_ERROR: The Block Header specifies some + * unsupported options such as unsupported filters. This can + * happen also if block->version was set to a too low value + * compared to what would be required to properly represent + * the information stored in the Block Header. + * - LZMA_DATA_ERROR: Block Header is corrupt, for example, + * the CRC32 doesn't match. + * - LZMA_PROG_ERROR: Invalid arguments, for example + * block->header_size is invalid or block->filters is NULL. + */ +extern LZMA_API(lzma_ret) lzma_block_header_decode(lzma_block *block, + const lzma_allocator *allocator, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Validate and set Compressed Size according to Unpadded Size + * + * Block Header stores Compressed Size, but Index has Unpadded Size. If the + * application has already parsed the Index and is now decoding Blocks, + * it can calculate Compressed Size from Unpadded Size. This function does + * exactly that with error checking: + * + * - Compressed Size calculated from Unpadded Size must be positive integer, + * that is, Unpadded Size must be big enough that after Block Header and + * Check fields there's still at least one byte for Compressed Size. + * + * - If Compressed Size was present in Block Header, the new value + * calculated from Unpadded Size is compared against the value + * from Block Header. + * + * \note This function must be called _after_ decoding the Block Header + * field so that it can properly validate Compressed Size if it + * was present in Block Header. + * + * \param block Block options: block->header_size must + * already be set with lzma_block_header_size(). + * \param unpadded_size Unpadded Size from the Index field in bytes + * + * \return Possible lzma_ret values: + * - LZMA_OK: block->compressed_size was set successfully. + * - LZMA_DATA_ERROR: unpadded_size is too small compared to + * block->header_size and lzma_check_size(block->check). + * - LZMA_PROG_ERROR: Some values are invalid. For example, + * block->header_size must be a multiple of four and + * between 8 and 1024 inclusive. + */ +extern LZMA_API(lzma_ret) lzma_block_compressed_size( + lzma_block *block, lzma_vli unpadded_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate Unpadded Size + * + * The Index field stores Unpadded Size and Uncompressed Size. The latter + * can be taken directly from the lzma_block structure after coding a Block, + * but Unpadded Size needs to be calculated from Block Header Size, + * Compressed Size, and size of the Check field. This is where this function + * is needed. + * + * \param block Block options: block->header_size must already be + * set with lzma_block_header_size(). + * + * \return Unpadded Size on success, or zero on error. + */ +extern LZMA_API(lzma_vli) lzma_block_unpadded_size(const lzma_block *block) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate the total encoded size of a Block + * + * This is equivalent to lzma_block_unpadded_size() except that the returned + * value includes the size of the Block Padding field. + * + * \param block Block options: block->header_size must already be + * set with lzma_block_header_size(). + * + * \return On success, total encoded size of the Block. On error, + * zero is returned. + */ +extern LZMA_API(lzma_vli) lzma_block_total_size(const lzma_block *block) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize .xz Block encoder + * + * Valid actions for lzma_code() are LZMA_RUN, LZMA_SYNC_FLUSH (only if the + * filter chain supports it), and LZMA_FINISH. + * + * The Block encoder encodes the Block Data, Block Padding, and Check value. + * It does NOT encode the Block Header which can be encoded with + * lzma_block_header_encode(). + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param block Block options: block->version, block->check, + * and block->filters must have been initialized. + * + * \return Possible lzma_ret values: + * - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_UNSUPPORTED_CHECK: block->check specifies a Check ID + * that is not supported by this build of liblzma. Initializing + * the encoder failed. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_encoder( + lzma_stream *strm, lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Block decoder + * + * Valid actions for lzma_code() are LZMA_RUN and LZMA_FINISH. Using + * LZMA_FINISH is not required. It is supported only for convenience. + * + * The Block decoder decodes the Block Data, Block Padding, and Check value. + * It does NOT decode the Block Header which can be decoded with + * lzma_block_header_decode(). + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param block Block options + * + * \return Possible lzma_ret values: + * - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_PROG_ERROR + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_decoder( + lzma_stream *strm, lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate maximum output size for single-call Block encoding + * + * This is equivalent to lzma_stream_buffer_bound() but for .xz Blocks. + * See the documentation of lzma_stream_buffer_bound(). + * + * \param uncompressed_size Size of the data to be encoded with the + * single-call Block encoder. + * + * \return Maximum output size in bytes for single-call Block encoding. + */ +extern LZMA_API(size_t) lzma_block_buffer_bound(size_t uncompressed_size) + lzma_nothrow; + + +/** + * \brief Single-call .xz Block encoder + * + * In contrast to the multi-call encoder initialized with + * lzma_block_encoder(), this function encodes also the Block Header. This + * is required to make it possible to write appropriate Block Header also + * in case the data isn't compressible, and different filter chain has to be + * used to encode the data in uncompressed form using uncompressed chunks + * of the LZMA2 filter. + * + * When the data isn't compressible, header_size, compressed_size, and + * uncompressed_size are set just like when the data was compressible, but + * it is possible that header_size is too small to hold the filter chain + * specified in block->filters, because that isn't necessarily the filter + * chain that was actually used to encode the data. lzma_block_unpadded_size() + * still works normally, because it doesn't read the filters array. + * + * \param block Block options: block->version, block->check, + * and block->filters must have been initialized. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param[out] out Beginning of the output buffer + * \param[out] out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_buffer_encode( + lzma_block *block, const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call uncompressed .xz Block encoder + * + * This is like lzma_block_buffer_encode() except this doesn't try to + * compress the data and instead encodes the data using LZMA2 uncompressed + * chunks. The required output buffer size can be determined with + * lzma_block_buffer_bound(). + * + * Since the data won't be compressed, this function ignores block->filters. + * This function doesn't take lzma_allocator because this function doesn't + * allocate any memory from the heap. + * + * \param block Block options: block->version, block->check, + * and block->filters must have been initialized. + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param[out] out Beginning of the output buffer + * \param[out] out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_uncomp_encode(lzma_block *block, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Block decoder + * + * This is single-call equivalent of lzma_block_decoder(), and requires that + * the caller has already decoded Block Header and checked its memory usage. + * + * \param block Block options + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param[out] out Beginning of the output buffer + * \param[out] out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Decoding was successful. + * - LZMA_OPTIONS_ERROR + * - LZMA_DATA_ERROR + * - LZMA_MEM_ERROR + * - LZMA_BUF_ERROR: Output buffer was too small. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_buffer_decode( + lzma_block *block, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow; diff --git a/src/native/external/xz/src/liblzma/api/lzma/check.h b/src/native/external/xz/src/liblzma/api/lzma/check.h new file mode 100644 index 00000000000000..e7a50ed3a3c3dc --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/check.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/check.h + * \brief Integrity checks + * \note Never include this file directly. Use instead. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Type of the integrity check (Check ID) + * + * The .xz format supports multiple types of checks that are calculated + * from the uncompressed data. They vary in both speed and ability to + * detect errors. + */ +typedef enum { + LZMA_CHECK_NONE = 0, + /**< + * No Check is calculated. + * + * Size of the Check field: 0 bytes + */ + + LZMA_CHECK_CRC32 = 1, + /**< + * CRC32 using the polynomial from the IEEE 802.3 standard + * + * Size of the Check field: 4 bytes + */ + + LZMA_CHECK_CRC64 = 4, + /**< + * CRC64 using the polynomial from the ECMA-182 standard + * + * Size of the Check field: 8 bytes + */ + + LZMA_CHECK_SHA256 = 10 + /**< + * SHA-256 + * + * Size of the Check field: 32 bytes + */ +} lzma_check; + + +/** + * \brief Maximum valid Check ID + * + * The .xz file format specification specifies 16 Check IDs (0-15). Some + * of them are only reserved, that is, no actual Check algorithm has been + * assigned. When decoding, liblzma still accepts unknown Check IDs for + * future compatibility. If a valid but unsupported Check ID is detected, + * liblzma can indicate a warning; see the flags LZMA_TELL_NO_CHECK, + * LZMA_TELL_UNSUPPORTED_CHECK, and LZMA_TELL_ANY_CHECK in container.h. + */ +#define LZMA_CHECK_ID_MAX 15 + + +/** + * \brief Test if the given Check ID is supported + * + * LZMA_CHECK_NONE and LZMA_CHECK_CRC32 are always supported (even if + * liblzma is built with limited features). + * + * \note It is safe to call this with a value that is not in the + * range [0, 15]; in that case the return value is always false. + * + * \param check Check ID + * + * \return lzma_bool: + * - true if Check ID is supported by this liblzma build. + * - false otherwise. + */ +extern LZMA_API(lzma_bool) lzma_check_is_supported(lzma_check check) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Get the size of the Check field with the given Check ID + * + * Although not all Check IDs have a check algorithm associated, the size of + * every Check is already frozen. This function returns the size (in bytes) of + * the Check field with the specified Check ID. The values are: + * { 0, 4, 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64 } + * + * \param check Check ID + * + * \return Size of the Check field in bytes. If the argument is not in + * the range [0, 15], UINT32_MAX is returned. + */ +extern LZMA_API(uint32_t) lzma_check_size(lzma_check check) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Maximum size of a Check field + */ +#define LZMA_CHECK_SIZE_MAX 64 + + +/** + * \brief Calculate CRC32 + * + * Calculate CRC32 using the polynomial from the IEEE 802.3 standard. + * + * \param buf Pointer to the input buffer + * \param size Size of the input buffer + * \param crc Previously returned CRC value. This is used to + * calculate the CRC of a big buffer in smaller chunks. + * Set to zero when starting a new calculation. + * + * \return Updated CRC value, which can be passed to this function + * again to continue CRC calculation. + */ +extern LZMA_API(uint32_t) lzma_crc32( + const uint8_t *buf, size_t size, uint32_t crc) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate CRC64 + * + * Calculate CRC64 using the polynomial from the ECMA-182 standard. + * + * This function is used similarly to lzma_crc32(). + * + * \param buf Pointer to the input buffer + * \param size Size of the input buffer + * \param crc Previously returned CRC value. This is used to + * calculate the CRC of a big buffer in smaller chunks. + * Set to zero when starting a new calculation. + * + * \return Updated CRC value, which can be passed to this function + * again to continue CRC calculation. + */ +extern LZMA_API(uint64_t) lzma_crc64( + const uint8_t *buf, size_t size, uint64_t crc) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the type of the integrity check + * + * This function can be called only immediately after lzma_code() has + * returned LZMA_NO_CHECK, LZMA_UNSUPPORTED_CHECK, or LZMA_GET_CHECK. + * Calling this function in any other situation has undefined behavior. + * + * \param strm Pointer to lzma_stream meeting the above conditions. + * + * \return Check ID in the lzma_stream, or undefined if called improperly. + */ +extern LZMA_API(lzma_check) lzma_get_check(const lzma_stream *strm) + lzma_nothrow; diff --git a/src/native/external/xz/src/liblzma/api/lzma/container.h b/src/native/external/xz/src/liblzma/api/lzma/container.h new file mode 100644 index 00000000000000..37d9ab92f4b52d --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/container.h @@ -0,0 +1,993 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/container.h + * \brief File formats + * \note Never include this file directly. Use instead. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/************ + * Encoding * + ************/ + +/** + * \brief Default compression preset + * + * It's not straightforward to recommend a default preset, because in some + * cases keeping the resource usage relatively low is more important that + * getting the maximum compression ratio. + */ +#define LZMA_PRESET_DEFAULT UINT32_C(6) + + +/** + * \brief Mask for preset level + * + * This is useful only if you need to extract the level from the preset + * variable. That should be rare. + */ +#define LZMA_PRESET_LEVEL_MASK UINT32_C(0x1F) + + +/* + * Preset flags + * + * Currently only one flag is defined. + */ + +/** + * \brief Extreme compression preset + * + * This flag modifies the preset to make the encoding significantly slower + * while improving the compression ratio only marginally. This is useful + * when you don't mind spending time to get as small result as possible. + * + * This flag doesn't affect the memory usage requirements of the decoder (at + * least not significantly). The memory usage of the encoder may be increased + * a little but only at the lowest preset levels (0-3). + */ +#define LZMA_PRESET_EXTREME (UINT32_C(1) << 31) + + +/** + * \brief Multithreading options + */ +typedef struct { + /** + * \brief Flags + * + * Set this to zero if no flags are wanted. + * + * Encoder: No flags are currently supported. + * + * Decoder: Bitwise-or of zero or more of the decoder flags: + * - LZMA_TELL_NO_CHECK + * - LZMA_TELL_UNSUPPORTED_CHECK + * - LZMA_TELL_ANY_CHECK + * - LZMA_IGNORE_CHECK + * - LZMA_CONCATENATED + * - LZMA_FAIL_FAST + */ + uint32_t flags; + + /** + * \brief Number of worker threads to use + */ + uint32_t threads; + + /** + * \brief Encoder only: Maximum uncompressed size of a Block + * + * The encoder will start a new .xz Block every block_size bytes. + * Using LZMA_FULL_FLUSH or LZMA_FULL_BARRIER with lzma_code() + * the caller may tell liblzma to start a new Block earlier. + * + * With LZMA2, a recommended block size is 2-4 times the LZMA2 + * dictionary size. With very small dictionaries, it is recommended + * to use at least 1 MiB block size for good compression ratio, even + * if this is more than four times the dictionary size. Note that + * these are only recommendations for typical use cases; feel free + * to use other values. Just keep in mind that using a block size + * less than the LZMA2 dictionary size is waste of RAM. + * + * Set this to 0 to let liblzma choose the block size depending + * on the compression options. For LZMA2 it will be 3*dict_size + * or 1 MiB, whichever is more. + * + * For each thread, about 3 * block_size bytes of memory will be + * allocated. This may change in later liblzma versions. If so, + * the memory usage will probably be reduced, not increased. + */ + uint64_t block_size; + + /** + * \brief Timeout to allow lzma_code() to return early + * + * Multithreading can make liblzma consume input and produce + * output in a very bursty way: it may first read a lot of input + * to fill internal buffers, then no input or output occurs for + * a while. + * + * In single-threaded mode, lzma_code() won't return until it has + * either consumed all the input or filled the output buffer. If + * this is done in multithreaded mode, it may cause a call + * lzma_code() to take even tens of seconds, which isn't acceptable + * in all applications. + * + * To avoid very long blocking times in lzma_code(), a timeout + * (in milliseconds) may be set here. If lzma_code() would block + * longer than this number of milliseconds, it will return with + * LZMA_OK. Reasonable values are 100 ms or more. The xz command + * line tool uses 300 ms. + * + * If long blocking times are acceptable, set timeout to a special + * value of 0. This will disable the timeout mechanism and will make + * lzma_code() block until all the input is consumed or the output + * buffer has been filled. + * + * \note Even with a timeout, lzma_code() might sometimes take + * a long time to return. No timing guarantees are made. + */ + uint32_t timeout; + + /** + * \brief Encoder only: Compression preset + * + * The preset is set just like with lzma_easy_encoder(). + * The preset is ignored if filters below is non-NULL. + */ + uint32_t preset; + + /** + * \brief Encoder only: Filter chain (alternative to a preset) + * + * If this is NULL, the preset above is used. Otherwise the preset + * is ignored and the filter chain specified here is used. + */ + const lzma_filter *filters; + + /** + * \brief Encoder only: Integrity check type + * + * See check.h for available checks. The xz command line tool + * defaults to LZMA_CHECK_CRC64, which is a good choice if you + * are unsure. + */ + lzma_check check; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum1; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum2; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum3; + + /** \private Reserved member. */ + uint32_t reserved_int1; + + /** \private Reserved member. */ + uint32_t reserved_int2; + + /** \private Reserved member. */ + uint32_t reserved_int3; + + /** \private Reserved member. */ + uint32_t reserved_int4; + + /** + * \brief Memory usage limit to reduce the number of threads + * + * Encoder: Ignored. + * + * Decoder: + * + * If the number of threads has been set so high that more than + * memlimit_threading bytes of memory would be needed, the number + * of threads will be reduced so that the memory usage will not exceed + * memlimit_threading bytes. However, if memlimit_threading cannot + * be met even in single-threaded mode, then decoding will continue + * in single-threaded mode and memlimit_threading may be exceeded + * even by a large amount. That is, memlimit_threading will never make + * lzma_code() return LZMA_MEMLIMIT_ERROR. To truly cap the memory + * usage, see memlimit_stop below. + * + * Setting memlimit_threading to UINT64_MAX or a similar huge value + * means that liblzma is allowed to keep the whole compressed file + * and the whole uncompressed file in memory in addition to the memory + * needed by the decompressor data structures used by each thread! + * In other words, a reasonable value limit must be set here or it + * will cause problems sooner or later. If you have no idea what + * a reasonable value could be, try lzma_physmem() / 4 as a starting + * point. Setting this limit will never prevent decompression of + * a file; this will only reduce the number of threads. + * + * If memlimit_threading is greater than memlimit_stop, then the value + * of memlimit_stop will be used for both. + */ + uint64_t memlimit_threading; + + /** + * \brief Memory usage limit that should never be exceeded + * + * Encoder: Ignored. + * + * Decoder: If decompressing will need more than this amount of + * memory even in the single-threaded mode, then lzma_code() will + * return LZMA_MEMLIMIT_ERROR. + */ + uint64_t memlimit_stop; + + /** \private Reserved member. */ + uint64_t reserved_int7; + + /** \private Reserved member. */ + uint64_t reserved_int8; + + /** \private Reserved member. */ + void *reserved_ptr1; + + /** \private Reserved member. */ + void *reserved_ptr2; + + /** \private Reserved member. */ + void *reserved_ptr3; + + /** \private Reserved member. */ + void *reserved_ptr4; + +} lzma_mt; + + +/** + * \brief Calculate approximate memory usage of easy encoder + * + * This function is a wrapper for lzma_raw_encoder_memusage(). + * + * \param preset Compression preset (level and possible flags) + * + * \return Number of bytes of memory required for the given + * preset when encoding or UINT64_MAX on error. + */ +extern LZMA_API(uint64_t) lzma_easy_encoder_memusage(uint32_t preset) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate approximate decoder memory usage of a preset + * + * This function is a wrapper for lzma_raw_decoder_memusage(). + * + * \param preset Compression preset (level and possible flags) + * + * \return Number of bytes of memory required to decompress a file + * that was compressed using the given preset or UINT64_MAX + * on error. + */ +extern LZMA_API(uint64_t) lzma_easy_decoder_memusage(uint32_t preset) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize .xz Stream encoder using a preset number + * + * This function is intended for those who just want to use the basic features + * of liblzma (that is, most developers out there). + * + * If initialization fails (return value is not LZMA_OK), all the memory + * allocated for *strm by liblzma is always freed. Thus, there is no need + * to call lzma_end() after failed initialization. + * + * If initialization succeeds, use lzma_code() to do the actual encoding. + * Valid values for 'action' (the second argument of lzma_code()) are + * LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, and LZMA_FINISH. In future, + * there may be compression levels or flags that don't support LZMA_SYNC_FLUSH. + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param preset Compression preset to use. A preset consist of level + * number and zero or more flags. Usually flags aren't + * used, so preset is simply a number [0, 9] which match + * the options -0 ... -9 of the xz command line tool. + * Additional flags can be set using bitwise-or with + * the preset level number, e.g. 6 | LZMA_PRESET_EXTREME. + * \param check Integrity check type to use. See check.h for available + * checks. The xz command line tool defaults to + * LZMA_CHECK_CRC64, which is a good choice if you are + * unsure. LZMA_CHECK_CRC32 is good too as long as the + * uncompressed file is not many gigabytes. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Initialization succeeded. Use lzma_code() to + * encode your data. + * - LZMA_MEM_ERROR: Memory allocation failed. + * - LZMA_OPTIONS_ERROR: The given compression preset is not + * supported by this build of liblzma. + * - LZMA_UNSUPPORTED_CHECK: The given check type is not + * supported by this liblzma build. + * - LZMA_PROG_ERROR: One or more of the parameters have values + * that will never be valid. For example, strm == NULL. + */ +extern LZMA_API(lzma_ret) lzma_easy_encoder( + lzma_stream *strm, uint32_t preset, lzma_check check) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Stream encoding using a preset number + * + * The maximum required output buffer size can be calculated with + * lzma_stream_buffer_bound(). + * + * \param preset Compression preset to use. See the description + * in lzma_easy_encoder(). + * \param check Type of the integrity check to calculate from + * uncompressed data. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param[out] out Beginning of the output buffer + * \param[out] out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_easy_buffer_encode( + uint32_t preset, lzma_check check, + const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Initialize .xz Stream encoder using a custom filter chain + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. See filters.h for more + * information. + * \param check Type of the integrity check to calculate from + * uncompressed data. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_encoder(lzma_stream *strm, + const lzma_filter *filters, lzma_check check) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate approximate memory usage of multithreaded .xz encoder + * + * Since doing the encoding in threaded mode doesn't affect the memory + * requirements of single-threaded decompressor, you can use + * lzma_easy_decoder_memusage(options->preset) or + * lzma_raw_decoder_memusage(options->filters) to calculate + * the decompressor memory requirements. + * + * \param options Compression options + * + * \return Number of bytes of memory required for encoding with the + * given options. If an error occurs, for example due to + * unsupported preset or filter chain, UINT64_MAX is returned. + */ +extern LZMA_API(uint64_t) lzma_stream_encoder_mt_memusage( + const lzma_mt *options) lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize multithreaded .xz Stream encoder + * + * This provides the functionality of lzma_easy_encoder() and + * lzma_stream_encoder() as a single function for multithreaded use. + * + * The supported actions for lzma_code() are LZMA_RUN, LZMA_FULL_FLUSH, + * LZMA_FULL_BARRIER, and LZMA_FINISH. Support for LZMA_SYNC_FLUSH might be + * added in the future. + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param options Pointer to multithreaded compression options + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_encoder_mt( + lzma_stream *strm, const lzma_mt *options) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate recommended Block size for multithreaded .xz encoder + * + * This calculates a recommended Block size for multithreaded encoding given + * a filter chain. This is used internally by lzma_stream_encoder_mt() to + * determine the Block size if the block_size member is not set to the + * special value of 0 in the lzma_mt options struct. + * + * If one wishes to change the filters between Blocks, this function is + * helpful to set the block_size member of the lzma_mt struct before calling + * lzma_stream_encoder_mt(). Since the block_size member represents the + * maximum possible Block size for the multithreaded .xz encoder, one can + * use this function to find the maximum recommended Block size based on + * all planned filter chains. Otherwise, the multithreaded encoder will + * base its maximum Block size on the first filter chain used (if the + * block_size member is not set), which may unnecessarily limit the Block + * size for a later filter chain. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Recommended Block size in bytes, or UINT64_MAX if + * an error occurred. + */ +extern LZMA_API(uint64_t) lzma_mt_block_size(const lzma_filter *filters) + lzma_nothrow; + + +/** + * \brief Initialize .lzma encoder (legacy file format) + * + * The .lzma format is sometimes called the LZMA_Alone format, which is the + * reason for the name of this function. The .lzma format supports only the + * LZMA1 filter. There is no support for integrity checks like CRC32. + * + * Use this function if and only if you need to create files readable by + * legacy LZMA tools such as LZMA Utils 4.32.x. Moving to the .xz format + * is strongly recommended. + * + * The valid action values for lzma_code() are LZMA_RUN and LZMA_FINISH. + * No kind of flushing is supported, because the file format doesn't make + * it possible. + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param options Pointer to encoder options + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_alone_encoder( + lzma_stream *strm, const lzma_options_lzma *options) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate output buffer size for single-call Stream encoder + * + * When trying to compress incompressible data, the encoded size will be + * slightly bigger than the input data. This function calculates how much + * output buffer space is required to be sure that lzma_stream_buffer_encode() + * doesn't return LZMA_BUF_ERROR. + * + * The calculated value is not exact, but it is guaranteed to be big enough. + * The actual maximum output space required may be slightly smaller (up to + * about 100 bytes). This should not be a problem in practice. + * + * If the calculated maximum size doesn't fit into size_t or would make the + * Stream grow past LZMA_VLI_MAX (which should never happen in practice), + * zero is returned to indicate the error. + * + * \note The limit calculated by this function applies only to + * single-call encoding. Multi-call encoding may (and probably + * will) have larger maximum expansion when encoding + * incompressible data. Currently there is no function to + * calculate the maximum expansion of multi-call encoding. + * + * \param uncompressed_size Size in bytes of the uncompressed + * input data + * + * \return Maximum number of bytes needed to store the compressed data. + */ +extern LZMA_API(size_t) lzma_stream_buffer_bound(size_t uncompressed_size) + lzma_nothrow; + + +/** + * \brief Single-call .xz Stream encoder + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. See filters.h for more + * information. + * \param check Type of the integrity check to calculate from + * uncompressed data. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param[out] out Beginning of the output buffer + * \param[out] out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_buffer_encode( + lzma_filter *filters, lzma_check check, + const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief MicroLZMA encoder + * + * The MicroLZMA format is a raw LZMA stream whose first byte (always 0x00) + * has been replaced with bitwise-negation of the LZMA properties (lc/lp/pb). + * This encoding ensures that the first byte of MicroLZMA stream is never + * 0x00. There is no end of payload marker and thus the uncompressed size + * must be stored separately. For the best error detection the dictionary + * size should be stored separately as well but alternatively one may use + * the uncompressed size as the dictionary size when decoding. + * + * With the MicroLZMA encoder, lzma_code() behaves slightly unusually. + * The action argument must be LZMA_FINISH and the return value will never be + * LZMA_OK. Thus the encoding is always done with a single lzma_code() after + * the initialization. The benefit of the combination of initialization + * function and lzma_code() is that memory allocations can be reused for + * better performance. + * + * lzma_code() will try to encode as much input as is possible to fit into + * the given output buffer. If not all input can be encoded, the stream will + * be finished without encoding all the input. The caller must check both + * input and output buffer usage after lzma_code() (total_in and total_out + * in lzma_stream can be convenient). Often lzma_code() can fill the output + * buffer completely if there is a lot of input, but sometimes a few bytes + * may remain unused because the next LZMA symbol would require more space. + * + * lzma_stream.avail_out must be at least 6. Otherwise LZMA_PROG_ERROR + * will be returned. + * + * The LZMA dictionary should be reasonably low to speed up the encoder + * re-initialization. A good value is bigger than the resulting + * uncompressed size of most of the output chunks. For example, if output + * size is 4 KiB, dictionary size of 32 KiB or 64 KiB is good. If the + * data compresses extremely well, even 128 KiB may be useful. + * + * The MicroLZMA format and this encoder variant were made with the EROFS + * file system in mind. This format may be convenient in other embedded + * uses too where many small streams are needed. XZ Embedded includes a + * decoder for this format. + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param options Pointer to encoder options + * + * \return Possible lzma_ret values: + * - LZMA_STREAM_END: All good. Check the amounts of input used + * and output produced. Store the amount of input used + * (uncompressed size) as it needs to be known to decompress + * the data. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR: In addition to the generic reasons for this + * error code, this may also be returned if there isn't enough + * output space (6 bytes) to create a valid MicroLZMA stream. + */ +extern LZMA_API(lzma_ret) lzma_microlzma_encoder( + lzma_stream *strm, const lzma_options_lzma *options) + lzma_nothrow; + + +/************ + * Decoding * + ************/ + +/** + * This flag makes lzma_code() return LZMA_NO_CHECK if the input stream + * being decoded has no integrity check. Note that when used with + * lzma_auto_decoder(), all .lzma files will trigger LZMA_NO_CHECK + * if LZMA_TELL_NO_CHECK is used. + */ +#define LZMA_TELL_NO_CHECK UINT32_C(0x01) + + +/** + * This flag makes lzma_code() return LZMA_UNSUPPORTED_CHECK if the input + * stream has an integrity check, but the type of the integrity check is not + * supported by this liblzma version or build. Such files can still be + * decoded, but the integrity check cannot be verified. + */ +#define LZMA_TELL_UNSUPPORTED_CHECK UINT32_C(0x02) + + +/** + * This flag makes lzma_code() return LZMA_GET_CHECK as soon as the type + * of the integrity check is known. The type can then be got with + * lzma_get_check(). + */ +#define LZMA_TELL_ANY_CHECK UINT32_C(0x04) + + +/** + * This flag makes lzma_code() not calculate and verify the integrity check + * of the compressed data in .xz files. This means that invalid integrity + * check values won't be detected and LZMA_DATA_ERROR won't be returned in + * such cases. + * + * This flag only affects the checks of the compressed data itself; the CRC32 + * values in the .xz headers will still be verified normally. + * + * Don't use this flag unless you know what you are doing. Possible reasons + * to use this flag: + * + * - Trying to recover data from a corrupt .xz file. + * + * - Speeding up decompression, which matters mostly with SHA-256 + * or with files that have compressed extremely well. It's recommended + * to not use this flag for this purpose unless the file integrity is + * verified externally in some other way. + * + * Support for this flag was added in liblzma 5.1.4beta. + */ +#define LZMA_IGNORE_CHECK UINT32_C(0x10) + + +/** + * This flag enables decoding of concatenated files with file formats that + * allow concatenating compressed files as is. From the formats currently + * supported by liblzma, only the .xz and .lz formats allow concatenated + * files. Concatenated files are not allowed with the legacy .lzma format. + * + * This flag also affects the usage of the 'action' argument for lzma_code(). + * When LZMA_CONCATENATED is used, lzma_code() won't return LZMA_STREAM_END + * unless LZMA_FINISH is used as 'action'. Thus, the application has to set + * LZMA_FINISH in the same way as it does when encoding. + * + * If LZMA_CONCATENATED is not used, the decoders still accept LZMA_FINISH + * as 'action' for lzma_code(), but the usage of LZMA_FINISH isn't required. + */ +#define LZMA_CONCATENATED UINT32_C(0x08) + + +/** + * This flag makes the threaded decoder report errors (like LZMA_DATA_ERROR) + * as soon as they are detected. This saves time when the application has no + * interest in a partially decompressed truncated or corrupt file. Note that + * due to timing randomness, if the same truncated or corrupt input is + * decompressed multiple times with this flag, a different amount of output + * may be produced by different runs, and even the error code might vary. + * + * When using LZMA_FAIL_FAST, it is recommended to use LZMA_FINISH to tell + * the decoder when no more input will be coming because it can help fast + * detection and reporting of truncated files. Note that in this situation + * truncated files might be diagnosed with LZMA_DATA_ERROR instead of + * LZMA_OK or LZMA_BUF_ERROR! + * + * Without this flag the threaded decoder will provide as much output as + * possible at first and then report the pending error. This default behavior + * matches the single-threaded decoder and provides repeatable behavior + * with truncated or corrupt input. There are a few special cases where the + * behavior can still differ like memory allocation failures (LZMA_MEM_ERROR). + * + * Single-threaded decoders currently ignore this flag. + * + * Support for this flag was added in liblzma 5.3.3alpha. Note that in older + * versions this flag isn't supported (LZMA_OPTIONS_ERROR) even by functions + * that ignore this flag in newer liblzma versions. + */ +#define LZMA_FAIL_FAST UINT32_C(0x20) + + +/** + * \brief Initialize .xz Stream decoder + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param memlimit Memory usage limit as bytes. Use UINT64_MAX + * to effectively disable the limiter. liblzma + * 5.2.3 and earlier don't allow 0 here and return + * LZMA_PROG_ERROR; later versions treat 0 as if 1 + * had been specified. + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK, + * LZMA_CONCATENATED, LZMA_FAIL_FAST + * + * \return Possible lzma_ret values: + * - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_OPTIONS_ERROR: Unsupported flags + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_decoder( + lzma_stream *strm, uint64_t memlimit, uint32_t flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize multithreaded .xz Stream decoder + * + * The decoder can decode multiple Blocks in parallel. This requires that each + * Block Header contains the Compressed Size and Uncompressed size fields + * which are added by the multi-threaded encoder, see lzma_stream_encoder_mt(). + * + * A Stream with one Block will only utilize one thread. A Stream with multiple + * Blocks but without size information in Block Headers will be processed in + * single-threaded mode in the same way as done by lzma_stream_decoder(). + * Concatenated Streams are processed one Stream at a time; no inter-Stream + * parallelization is done. + * + * This function behaves like lzma_stream_decoder() when options->threads == 1 + * and options->memlimit_threading <= 1. + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param options Pointer to multithreaded compression options + * + * \return Possible lzma_ret values: + * - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. + * - LZMA_OPTIONS_ERROR: Unsupported flags. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_decoder_mt( + lzma_stream *strm, const lzma_mt *options) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode .xz, .lzma, and .lz (lzip) files with autodetection + * + * This decoder autodetects between the .xz, .lzma, and .lz file formats, + * and calls lzma_stream_decoder(), lzma_alone_decoder(), or + * lzma_lzip_decoder() once the type of the input file has been detected. + * + * Support for .lz was added in 5.4.0. + * + * If the flag LZMA_CONCATENATED is used and the input is a .lzma file: + * For historical reasons concatenated .lzma files aren't supported. + * If there is trailing data after one .lzma stream, lzma_code() will + * return LZMA_DATA_ERROR. (lzma_alone_decoder() doesn't have such a check + * as it doesn't support any decoder flags. It will return LZMA_STREAM_END + * after one .lzma stream.) + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param memlimit Memory usage limit as bytes. Use UINT64_MAX + * to effectively disable the limiter. liblzma + * 5.2.3 and earlier don't allow 0 here and return + * LZMA_PROG_ERROR; later versions treat 0 as if 1 + * had been specified. + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK, + * LZMA_CONCATENATED, LZMA_FAIL_FAST + * + * \return Possible lzma_ret values: + * - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_OPTIONS_ERROR: Unsupported flags + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_auto_decoder( + lzma_stream *strm, uint64_t memlimit, uint32_t flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .lzma decoder (legacy file format) + * + * Valid 'action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. + * There is no need to use LZMA_FINISH, but it's allowed because it may + * simplify certain types of applications. + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param memlimit Memory usage limit as bytes. Use UINT64_MAX + * to effectively disable the limiter. liblzma + * 5.2.3 and earlier don't allow 0 here and return + * LZMA_PROG_ERROR; later versions treat 0 as if 1 + * had been specified. + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_alone_decoder( + lzma_stream *strm, uint64_t memlimit) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .lz (lzip) decoder (a foreign file format) + * + * This decoder supports the .lz format versions 0 and 1: + * + * - Files in the format version 0 were produced by lzip 1.3 and older. + * Such files aren't common but may be found from file archives + * as a few source packages were released in this format. People + * might have old personal files in this format too. Decompression + * support for the format version 0 was removed in lzip 1.18. + * + * - lzip 1.3 added decompression support for .lz format version 1 files. + * Compression support was added in lzip 1.4. + * + * - lzlib extends version 1 format with the Sync Flush marker. This + * extension is only meant for lzlib use; it's not valid in normal .lz + * files. This extension is not supported by liblzma. lzma_code() will + * return LZMA_DATA_ERROR at the location of the Sync Flush marker. + * + * Just like with lzma_stream_decoder() for .xz files, LZMA_CONCATENATED + * should be used when decompressing normal standalone .lz files. + * + * If LZMA_CONCATENATED is used and there is non-.lz data after at least one + * valid .lz member, lzma_code() leaves lzma_stream.next_in pointing to the + * first byte of the non-.lz data and returns LZMA_STREAM_END. That is, one + * can append custom data at the end of a .lz file and the decoder will + * ignore it. An exception to this is if the first 1-3 bytes of the non-.lz + * data are identical to the .lz magic bytes (0x4C, 0x5A, 0x49, 0x50; "LZIP" + * in US-ASCII). In such a case the 1-3 bytes are consumed by lzma_code(). + * If one wishes to locate the non-.lz data reliably, one must ensure that + * the first byte isn't 0x4C. It's best if none of the first four bytes of + * trailing data are equal to the magic bytes because if two or three bytes + * are, lzip >= 1.20 diagnoses it as a corrupt member header by default. + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param memlimit Memory usage limit as bytes. Use UINT64_MAX + * to effectively disable the limiter. + * \param flags Bitwise-or of flags, or zero for no flags. + * All decoder flags listed above are supported + * although only LZMA_CONCATENATED and (in very rare + * cases) LZMA_IGNORE_CHECK are actually useful. + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * and LZMA_FAIL_FAST do nothing. LZMA_TELL_ANY_CHECK + * is supported for consistency only as CRC32 is + * always used in the .lz format. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_OPTIONS_ERROR: Unsupported flags + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_lzip_decoder( + lzma_stream *strm, uint64_t memlimit, uint32_t flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Stream decoder + * + * \param memlimit Pointer to how much memory the decoder is allowed + * to allocate. The value pointed by this pointer is + * modified if and only if LZMA_MEMLIMIT_ERROR is + * returned. + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_IGNORE_CHECK, LZMA_CONCATENATED, + * LZMA_FAIL_FAST. Note that LZMA_TELL_ANY_CHECK + * is not allowed and will return LZMA_PROG_ERROR. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param[out] out Beginning of the output buffer + * \param[out] out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if decoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_DATA_ERROR + * - LZMA_NO_CHECK: This can be returned only if using + * the LZMA_TELL_NO_CHECK flag. + * - LZMA_UNSUPPORTED_CHECK: This can be returned only if using + * the LZMA_TELL_UNSUPPORTED_CHECK flag. + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. + * The minimum required memlimit value was stored to *memlimit. + * - LZMA_BUF_ERROR: Output buffer was too small. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_buffer_decode( + uint64_t *memlimit, uint32_t flags, + const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief MicroLZMA decoder + * + * See lzma_microlzma_encoder() for more information. + * + * The lzma_code() usage with this decoder is completely normal. The + * special behavior of lzma_code() applies to lzma_microlzma_encoder() only. + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param comp_size Compressed size of the MicroLZMA stream. + * The caller must somehow know this exactly. + * \param uncomp_size Uncompressed size of the MicroLZMA stream. + * If the exact uncompressed size isn't known, this + * can be set to a value that is at most as big as + * the exact uncompressed size would be, but then the + * next argument uncomp_size_is_exact must be false. + * \param uncomp_size_is_exact + * If true, uncomp_size must be exactly correct. + * This will improve error detection at the end of + * the stream. If the exact uncompressed size isn't + * known, this must be false. uncomp_size must still + * be at most as big as the exact uncompressed size + * is. Setting this to false when the exact size is + * known will work but error detection at the end of + * the stream will be weaker. + * \param dict_size LZMA dictionary size that was used when + * compressing the data. It is OK to use a bigger + * value too but liblzma will then allocate more + * memory than would actually be required and error + * detection will be slightly worse. (Note that with + * the implementation in XZ Embedded it doesn't + * affect the memory usage if one specifies bigger + * dictionary than actually required.) + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_microlzma_decoder( + lzma_stream *strm, uint64_t comp_size, + uint64_t uncomp_size, lzma_bool uncomp_size_is_exact, + uint32_t dict_size) lzma_nothrow; diff --git a/src/native/external/xz/src/liblzma/api/lzma/delta.h b/src/native/external/xz/src/liblzma/api/lzma/delta.h new file mode 100644 index 00000000000000..5ebacef815841a --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/delta.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/delta.h + * \brief Delta filter + * \note Never include this file directly. Use instead. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Filter ID + * + * Filter ID of the Delta filter. This is used as lzma_filter.id. + */ +#define LZMA_FILTER_DELTA LZMA_VLI_C(0x03) + + +/** + * \brief Type of the delta calculation + * + * Currently only byte-wise delta is supported. Other possible types could + * be, for example, delta of 16/32/64-bit little/big endian integers, but + * these are not currently planned since byte-wise delta is almost as good. + */ +typedef enum { + LZMA_DELTA_TYPE_BYTE +} lzma_delta_type; + + +/** + * \brief Options for the Delta filter + * + * These options are needed by both encoder and decoder. + */ +typedef struct { + /** For now, this must always be LZMA_DELTA_TYPE_BYTE. */ + lzma_delta_type type; + + /** + * \brief Delta distance + * + * With the only currently supported type, LZMA_DELTA_TYPE_BYTE, + * the distance is as bytes. + * + * Examples: + * - 16-bit stereo audio: distance = 4 bytes + * - 24-bit RGB image data: distance = 3 bytes + */ + uint32_t dist; + + /** + * \brief Minimum value for lzma_options_delta.dist. + */ +# define LZMA_DELTA_DIST_MIN 1 + + /** + * \brief Maximum value for lzma_options_delta.dist. + */ +# define LZMA_DELTA_DIST_MAX 256 + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * when type is LZMA_DELTA_TYPE_BYTE, so it is safe to leave these + * uninitialized. + */ + + /** \private Reserved member. */ + uint32_t reserved_int1; + + /** \private Reserved member. */ + uint32_t reserved_int2; + + /** \private Reserved member. */ + uint32_t reserved_int3; + + /** \private Reserved member. */ + uint32_t reserved_int4; + + /** \private Reserved member. */ + void *reserved_ptr1; + + /** \private Reserved member. */ + void *reserved_ptr2; + +} lzma_options_delta; diff --git a/src/native/external/xz/src/liblzma/api/lzma/filter.h b/src/native/external/xz/src/liblzma/api/lzma/filter.h new file mode 100644 index 00000000000000..e86809c4e395d7 --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/filter.h @@ -0,0 +1,769 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/filter.h + * \brief Common filter related types and functions + * \note Never include this file directly. Use instead. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Maximum number of filters in a chain + * + * A filter chain can have 1-4 filters, of which three are allowed to change + * the size of the data. Usually only one or two filters are needed. + */ +#define LZMA_FILTERS_MAX 4 + + +/** + * \brief Filter options + * + * This structure is used to pass a Filter ID and a pointer to the filter's + * options to liblzma. A few functions work with a single lzma_filter + * structure, while most functions expect a filter chain. + * + * A filter chain is indicated with an array of lzma_filter structures. + * The array is terminated with .id = LZMA_VLI_UNKNOWN. Thus, the filter + * array must have LZMA_FILTERS_MAX + 1 elements (that is, five) to + * be able to hold any arbitrary filter chain. This is important when + * using lzma_block_header_decode() from block.h, because a filter array + * that is too small would make liblzma write past the end of the array. + */ +typedef struct { + /** + * \brief Filter ID + * + * Use constants whose name begin with 'LZMA_FILTER_' to specify + * different filters. In an array of lzma_filter structures, use + * LZMA_VLI_UNKNOWN to indicate end of filters. + * + * \note This is not an enum, because on some systems enums + * cannot be 64-bit. + */ + lzma_vli id; + + /** + * \brief Pointer to filter-specific options structure + * + * If the filter doesn't need options, set this to NULL. If id is + * set to LZMA_VLI_UNKNOWN, options is ignored, and thus + * doesn't need be initialized. + */ + void *options; + +} lzma_filter; + + +/** + * \brief Test if the given Filter ID is supported for encoding + * + * \param id Filter ID + * + * \return lzma_bool: + * - true if the Filter ID is supported for encoding by this + * liblzma build. + * - false otherwise. + */ +extern LZMA_API(lzma_bool) lzma_filter_encoder_is_supported(lzma_vli id) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Test if the given Filter ID is supported for decoding + * + * \param id Filter ID + * + * \return lzma_bool: + * - true if the Filter ID is supported for decoding by this + * liblzma build. + * - false otherwise. + */ +extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Copy the filters array + * + * Copy the Filter IDs and filter-specific options from src to dest. + * Up to LZMA_FILTERS_MAX filters are copied, plus the terminating + * .id == LZMA_VLI_UNKNOWN. Thus, dest should have at least + * LZMA_FILTERS_MAX + 1 elements space unless the caller knows that + * src is smaller than that. + * + * Unless the filter-specific options is NULL, the Filter ID has to be + * supported by liblzma, because liblzma needs to know the size of every + * filter-specific options structure. The filter-specific options are not + * validated. If options is NULL, any unsupported Filter IDs are copied + * without returning an error. + * + * Old filter-specific options in dest are not freed, so dest doesn't + * need to be initialized by the caller in any way. + * + * If an error occurs, memory possibly already allocated by this function + * is always freed. liblzma versions older than 5.2.7 may modify the dest + * array and leave its contents in an undefined state if an error occurs. + * liblzma 5.2.7 and newer only modify the dest array when returning LZMA_OK. + * + * \param src Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * \param[out] dest Destination filter array + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR: Unsupported Filter ID and its options + * is not NULL. + * - LZMA_PROG_ERROR: src or dest is NULL. + */ +extern LZMA_API(lzma_ret) lzma_filters_copy( + const lzma_filter *src, lzma_filter *dest, + const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Free the options in the array of lzma_filter structures + * + * This frees the filter chain options. The filters array itself is not freed. + * + * The filters array must have at most LZMA_FILTERS_MAX + 1 elements + * including the terminating element which must have .id = LZMA_VLI_UNKNOWN. + * For all elements before the terminating element: + * - options will be freed using the given lzma_allocator or, + * if allocator is NULL, using free(). + * - options will be set to NULL. + * - id will be set to LZMA_VLI_UNKNOWN. + * + * If filters is NULL, this does nothing. Again, this never frees the + * filters array itself. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + */ +extern LZMA_API(void) lzma_filters_free( + lzma_filter *filters, const lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Calculate approximate memory requirements for raw encoder + * + * This function can be used to calculate the memory requirements for + * Block and Stream encoders too because Block and Stream encoders don't + * need significantly more memory than raw encoder. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Number of bytes of memory required for the given + * filter chain when encoding or UINT64_MAX on error. + */ +extern LZMA_API(uint64_t) lzma_raw_encoder_memusage(const lzma_filter *filters) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate approximate memory requirements for raw decoder + * + * This function can be used to calculate the memory requirements for + * Block and Stream decoders too because Block and Stream decoders don't + * need significantly more memory than raw decoder. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Number of bytes of memory required for the given + * filter chain when decoding or UINT64_MAX on error. + */ +extern LZMA_API(uint64_t) lzma_raw_decoder_memusage(const lzma_filter *filters) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize raw encoder + * + * This function may be useful when implementing custom file formats. + * + * The 'action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the + * filter chain supports it), or LZMA_FINISH. + * + * \param strm Pointer to lzma_stream that is at least + * initialized with LZMA_STREAM_INIT. + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_encoder( + lzma_stream *strm, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize raw decoder + * + * The initialization of raw decoder goes similarly to raw encoder. + * + * The 'action' with lzma_code() can be LZMA_RUN or LZMA_FINISH. Using + * LZMA_FINISH is not required, it is supported just for convenience. + * + * \param strm Pointer to lzma_stream that is at least + * initialized with LZMA_STREAM_INIT. + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_decoder( + lzma_stream *strm, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Update the filter chain in the encoder + * + * This function may be called after lzma_code() has returned LZMA_STREAM_END + * when LZMA_FULL_BARRIER, LZMA_FULL_FLUSH, or LZMA_SYNC_FLUSH was used: + * + * - After LZMA_FULL_BARRIER or LZMA_FULL_FLUSH: Single-threaded .xz Stream + * encoder (lzma_stream_encoder()) and (since liblzma 5.4.0) multi-threaded + * Stream encoder (lzma_stream_encoder_mt()) allow setting a new filter + * chain to be used for the next Block(s). + * + * - After LZMA_SYNC_FLUSH: Raw encoder (lzma_raw_encoder()), + * Block encoder (lzma_block_encoder()), and single-threaded .xz Stream + * encoder (lzma_stream_encoder()) allow changing certain filter-specific + * options in the middle of encoding. The actual filters in the chain + * (Filter IDs) must not be changed! Currently only the lc, lp, and pb + * options of LZMA2 (not LZMA1) can be changed this way. + * + * - In the future some filters might allow changing some of their options + * without any barrier or flushing but currently such filters don't exist. + * + * This function may also be called when no data has been compressed yet + * although this is rarely useful. In that case, this function will behave + * as if LZMA_FULL_FLUSH (Stream encoders) or LZMA_SYNC_FLUSH (Raw or Block + * encoder) had been used right before calling this function. + * + * \param strm Pointer to lzma_stream that is at least + * initialized with LZMA_STREAM_INIT. + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_filters_update( + lzma_stream *strm, const lzma_filter *filters) lzma_nothrow; + + +/** + * \brief Single-call raw encoder + * + * \note There is no function to calculate how big output buffer + * would surely be big enough. (lzma_stream_buffer_bound() + * works only for lzma_stream_buffer_encode(); raw encoder + * won't necessarily meet that bound.) + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param[out] out Beginning of the output buffer + * \param[out] out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_buffer_encode( + const lzma_filter *filters, const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, uint8_t *out, + size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Single-call raw decoder + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param[out] out Beginning of the output buffer + * \param[out] out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Decoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_buffer_decode( + const lzma_filter *filters, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Get the size of the Filter Properties field + * + * This function may be useful when implementing custom file formats + * using the raw encoder and decoder. + * + * \note This function validates the Filter ID, but does not + * necessarily validate the options. Thus, it is possible + * that this returns LZMA_OK while the following call to + * lzma_properties_encode() returns LZMA_OPTIONS_ERROR. + * + * \param[out] size Pointer to uint32_t to hold the size of the properties + * \param filter Filter ID and options (the size of the properties may + * vary depending on the options) + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_properties_size( + uint32_t *size, const lzma_filter *filter) lzma_nothrow; + + +/** + * \brief Encode the Filter Properties field + * + * \note Even this function won't validate more options than actually + * necessary. Thus, it is possible that encoding the properties + * succeeds but using the same options to initialize the encoder + * will fail. + * + * \note If lzma_properties_size() indicated that the size + * of the Filter Properties field is zero, calling + * lzma_properties_encode() is not required, but it + * won't do any harm either. + * + * \param filter Filter ID and options + * \param[out] props Buffer to hold the encoded options. The size of + * the buffer must have been already determined with + * lzma_properties_size(). + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_properties_encode( + const lzma_filter *filter, uint8_t *props) lzma_nothrow; + + +/** + * \brief Decode the Filter Properties field + * + * \param filter filter->id must have been set to the correct + * Filter ID. filter->options doesn't need to be + * initialized (it's not freed by this function). The + * decoded options will be stored in filter->options; + * it's application's responsibility to free it when + * appropriate. filter->options is set to NULL if + * there are no properties or if an error occurs. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * and in case of an error, also free(). + * \param props Input buffer containing the properties. + * \param props_size Size of the properties. This must be the exact + * size; giving too much or too little input will + * return LZMA_OPTIONS_ERROR. + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_properties_decode( + lzma_filter *filter, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size) lzma_nothrow; + + +/** + * \brief Calculate encoded size of a Filter Flags field + * + * Knowing the size of Filter Flags is useful to know when allocating + * memory to hold the encoded Filter Flags. + * + * \note If you need to calculate size of List of Filter Flags, + * you need to loop over every lzma_filter entry. + * + * \param[out] size Pointer to integer to hold the calculated size + * \param filter Filter ID and associated options whose encoded + * size is to be calculated + * + * \return Possible lzma_ret values: + * - LZMA_OK: *size set successfully. Note that this doesn't + * guarantee that filter->options is valid, thus + * lzma_filter_flags_encode() may still fail. + * - LZMA_OPTIONS_ERROR: Unknown Filter ID or unsupported options. + * - LZMA_PROG_ERROR: Invalid options + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_size( + uint32_t *size, const lzma_filter *filter) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Filter Flags into given buffer + * + * In contrast to some functions, this doesn't allocate the needed buffer. + * This is due to how this function is used internally by liblzma. + * + * \param filter Filter ID and options to be encoded + * \param[out] out Beginning of the output buffer + * \param[out] out_pos out[*out_pos] is the next write position. This + * is updated by the encoder. + * \param out_size out[out_size] is the first byte to not write. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR: Invalid options or not enough output + * buffer space (you should have checked it with + * lzma_filter_flags_size()). + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_encode(const lzma_filter *filter, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Filter Flags from given buffer + * + * The decoded result is stored into *filter. The old value of + * filter->options is not free()d. If anything other than LZMA_OK + * is returned, filter->options is set to NULL. + * + * \param[out] filter Destination filter. The decoded Filter ID will + * be stored in filter->id. If options are needed + * they will be allocated and the pointer will be + * stored in filter->options. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param[out] in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_decode( + lzma_filter *filter, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/*********** + * Strings * + ***********/ + +/** + * \brief Allow or show all filters + * + * By default only the filters supported in the .xz format are accept by + * lzma_str_to_filters() or shown by lzma_str_list_filters(). + */ +#define LZMA_STR_ALL_FILTERS UINT32_C(0x01) + + +/** + * \brief Do not validate the filter chain in lzma_str_to_filters() + * + * By default lzma_str_to_filters() can return an error if the filter chain + * as a whole isn't usable in the .xz format or in the raw encoder or decoder. + * With this flag, this validation is skipped. This flag doesn't affect the + * handling of the individual filter options. To allow non-.xz filters also + * LZMA_STR_ALL_FILTERS is needed. + */ +#define LZMA_STR_NO_VALIDATION UINT32_C(0x02) + + +/** + * \brief Stringify encoder options + * + * Show the filter-specific options that the encoder will use. + * This may be useful for verbose diagnostic messages. + * + * Note that if options were decoded from .xz headers then the encoder options + * may be undefined. This flag shouldn't be used in such a situation. + */ +#define LZMA_STR_ENCODER UINT32_C(0x10) + + +/** + * \brief Stringify decoder options + * + * Show the filter-specific options that the decoder will use. + * This may be useful for showing what filter options were decoded + * from file headers. + */ +#define LZMA_STR_DECODER UINT32_C(0x20) + + +/** + * \brief Produce xz-compatible getopt_long() syntax + * + * That is, "delta:dist=2 lzma2:dict=4MiB,pb=1,lp=1" becomes + * "--delta=dist=2 --lzma2=dict=4MiB,pb=1,lp=1". + * + * This syntax is compatible with xz 5.0.0 as long as the filters and + * their options are supported too. + */ +#define LZMA_STR_GETOPT_LONG UINT32_C(0x40) + + +/** + * \brief Use two dashes "--" instead of a space to separate filters + * + * That is, "delta:dist=2 lzma2:pb=1,lp=1" becomes + * "delta:dist=2--lzma2:pb=1,lp=1". This looks slightly odd but this + * kind of strings should be usable on the command line without quoting. + * However, it is possible that future versions with new filter options + * might produce strings that require shell quoting anyway as the exact + * set of possible characters isn't frozen for now. + * + * It is guaranteed that the single quote (') will never be used in + * filter chain strings (even if LZMA_STR_NO_SPACES isn't used). + */ +#define LZMA_STR_NO_SPACES UINT32_C(0x80) + + +/** + * \brief Convert a string to a filter chain + * + * This tries to make it easier to write applications that allow users + * to set custom compression options. This only handles the filter + * configuration (including presets) but not the number of threads, + * block size, check type, or memory limits. + * + * The input string can be either a preset or a filter chain. Presets + * begin with a digit 0-9 and may be followed by zero or more flags + * which are lower-case letters. Currently only "e" is supported, matching + * LZMA_PRESET_EXTREME. For partial xz command line syntax compatibility, + * a preset string may start with a single dash "-". + * + * A filter chain consists of one or more "filtername:opt1=value1,opt2=value2" + * strings separated by one or more spaces. Leading and trailing spaces are + * ignored. All names and values must be lower-case. Extra commas in the + * option list are ignored. The order of filters is significant: when + * encoding, the uncompressed input data goes to the leftmost filter first. + * Normally "lzma2" is the last filter in the chain. + * + * If one wishes to avoid spaces, for example, to avoid shell quoting, + * it is possible to use two dashes "--" instead of spaces to separate + * the filters. + * + * For xz command line compatibility, each filter may be prefixed with + * two dashes "--" and the colon ":" separating the filter name from + * the options may be replaced with an equals sign "=". + * + * By default, only filters that can be used in the .xz format are accepted. + * To allow all filters (LZMA1) use the flag LZMA_STR_ALL_FILTERS. + * + * By default, very basic validation is done for the filter chain as a whole, + * for example, that LZMA2 is only used as the last filter in the chain. + * The validation isn't perfect though and it's possible that this function + * succeeds but using the filter chain for encoding or decoding will still + * result in LZMA_OPTIONS_ERROR. To disable this validation, use the flag + * LZMA_STR_NO_VALIDATION. + * + * The available filter names and their options are available via + * lzma_str_list_filters(). See the xz man page for the description + * of filter names and options. + * + * For command line applications, below is an example how an error message + * can be displayed. Note the use of an empty string for the field width. + * If "^" was used there it would create an off-by-one error except at + * the very beginning of the line. + * + * \code{.c} + * const char *str = ...; // From user + * lzma_filter filters[LZMA_FILTERS_MAX + 1]; + * int pos; + * const char *msg = lzma_str_to_filters(str, &pos, filters, 0, NULL); + * if (msg != NULL) { + * printf("%s: Error in XZ compression options:\n", argv[0]); + * printf("%s: %s\n", argv[0], str); + * printf("%s: %*s^\n", argv[0], errpos, ""); + * printf("%s: %s\n", argv[0], msg); + * } + * \endcode + * + * \param str User-supplied string describing a preset or + * a filter chain. If a default value is needed and + * you don't know what would be good, use "6" since + * that is the default preset in xz too. + * \param[out] error_pos If this isn't NULL, this value will be set on + * both success and on all errors. This tells the + * location of the error in the string. This is + * an int to make it straightforward to use this + * as printf() field width. The value is guaranteed + * to be in the range [0, INT_MAX] even if strlen(str) + * somehow was greater than INT_MAX. + * \param[out] filters An array of lzma_filter structures. There must + * be LZMA_FILTERS_MAX + 1 (that is, five) elements + * in the array. The old contents are ignored so it + * doesn't need to be initialized. This array is + * modified only if this function returns NULL. + * Once the allocated filter options are no longer + * needed, lzma_filters_free() can be used to free the + * options (it doesn't free the filters array itself). + * \param flags Bitwise-or of zero or more of the flags + * LZMA_STR_ALL_FILTERS and LZMA_STR_NO_VALIDATION. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * + * \return On success, NULL is returned. On error, a statically-allocated + * error message is returned which together with the error_pos + * should give some idea what is wrong. + */ +extern LZMA_API(const char *) lzma_str_to_filters( + const char *str, int *error_pos, lzma_filter *filters, + uint32_t flags, const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Convert a filter chain to a string + * + * Use cases: + * + * - Verbose output showing the full encoder options to the user + * (use LZMA_STR_ENCODER in flags) + * + * - Showing the filters and options that are required to decode a file + * (use LZMA_STR_DECODER in flags) + * + * - Showing the filter names without any options in informational messages + * where the technical details aren't important (no flags). In this case + * the .options in the filters array are ignored and may be NULL even if + * a filter has a mandatory options structure. + * + * Note that even if the filter chain was specified using a preset, + * the resulting filter chain isn't reversed to a preset. So if you + * specify "6" to lzma_str_to_filters() then lzma_str_from_filters() + * will produce a string containing "lzma2". + * + * \param[out] str On success *str will be set to point to an + * allocated string describing the given filter + * chain. Old value is ignored. On error *str is + * always set to NULL. + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * \param flags Bitwise-or of zero or more of the flags + * LZMA_STR_ENCODER, LZMA_STR_DECODER, + * LZMA_STR_GETOPT_LONG, and LZMA_STR_NO_SPACES. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_OPTIONS_ERROR: Empty filter chain + * (filters[0].id == LZMA_VLI_UNKNOWN) or the filter chain + * includes a Filter ID that is not supported by this function. + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_str_from_filters( + char **str, const lzma_filter *filters, uint32_t flags, + const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief List available filters and/or their options (for help message) + * + * If a filter_id is given then only one line is created which contains the + * filter name. If LZMA_STR_ENCODER or LZMA_STR_DECODER is used then the + * options read by the encoder or decoder are printed on the same line. + * + * If filter_id is LZMA_VLI_UNKNOWN then all supported .xz-compatible filters + * are listed: + * + * - If neither LZMA_STR_ENCODER nor LZMA_STR_DECODER is used then + * the supported filter names are listed on a single line separated + * by spaces. + * + * - If LZMA_STR_ENCODER or LZMA_STR_DECODER is used then filters and + * the supported options are listed one filter per line. There won't + * be a newline after the last filter. + * + * - If LZMA_STR_ALL_FILTERS is used then the list will include also + * those filters that cannot be used in the .xz format (LZMA1). + * + * \param str On success *str will be set to point to an + * allocated string listing the filters and options. + * Old value is ignored. On error *str is always set + * to NULL. + * \param filter_id Filter ID or LZMA_VLI_UNKNOWN. + * \param flags Bitwise-or of zero or more of the flags + * LZMA_STR_ALL_FILTERS, LZMA_STR_ENCODER, + * LZMA_STR_DECODER, and LZMA_STR_GETOPT_LONG. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_OPTIONS_ERROR: Unsupported filter_id or flags + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_str_list_filters( + char **str, lzma_vli filter_id, uint32_t flags, + const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; diff --git a/src/native/external/xz/src/liblzma/api/lzma/hardware.h b/src/native/external/xz/src/liblzma/api/lzma/hardware.h new file mode 100644 index 00000000000000..7a1a84fcccfcbc --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/hardware.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/hardware.h + * \brief Hardware information + * \note Never include this file directly. Use instead. + * + * Since liblzma can consume a lot of system resources, it also provides + * ways to limit the resource usage. Applications linking against liblzma + * need to do the actual decisions how much resources to let liblzma to use. + * To ease making these decisions, liblzma provides functions to find out + * the relevant capabilities of the underlying hardware. Currently there + * is only a function to find out the amount of RAM, but in the future there + * will be also a function to detect how many concurrent threads the system + * can run. + * + * \note On some operating systems, these function may temporarily + * load a shared library or open file descriptor(s) to find out + * the requested hardware information. Unless the application + * assumes that specific file descriptors are not touched by + * other threads, this should have no effect on thread safety. + * Possible operations involving file descriptors will restart + * the syscalls if they return EINTR. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Get the total amount of physical memory (RAM) in bytes + * + * This function may be useful when determining a reasonable memory + * usage limit for decompressing or how much memory it is OK to use + * for compressing. + * + * \return On success, the total amount of physical memory in bytes + * is returned. If the amount of RAM cannot be determined, + * zero is returned. This can happen if an error occurs + * or if there is no code in liblzma to detect the amount + * of RAM on the specific operating system. + */ +extern LZMA_API(uint64_t) lzma_physmem(void) lzma_nothrow; + + +/** + * \brief Get the number of processor cores or threads + * + * This function may be useful when determining how many threads to use. + * If the hardware supports more than one thread per CPU core, the number + * of hardware threads is returned if that information is available. + * + * \return On success, the number of available CPU threads or cores is + * returned. If this information isn't available or an error + * occurs, zero is returned. + */ +extern LZMA_API(uint32_t) lzma_cputhreads(void) lzma_nothrow; diff --git a/src/native/external/xz/src/liblzma/api/lzma/index.h b/src/native/external/xz/src/liblzma/api/lzma/index.h new file mode 100644 index 00000000000000..b17025e3d90166 --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/index.h @@ -0,0 +1,882 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/index.h + * \brief Handling of .xz Index and related information + * \note Never include this file directly. Use instead. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Opaque data type to hold the Index(es) and other information + * + * lzma_index often holds just one .xz Index and possibly the Stream Flags + * of the same Stream and size of the Stream Padding field. However, + * multiple lzma_indexes can be concatenated with lzma_index_cat() and then + * there may be information about multiple Streams in the same lzma_index. + * + * Notes about thread safety: Only one thread may modify lzma_index at + * a time. All functions that take non-const pointer to lzma_index + * modify it. As long as no thread is modifying the lzma_index, getting + * information from the same lzma_index can be done from multiple threads + * at the same time with functions that take a const pointer to + * lzma_index or use lzma_index_iter. The same iterator must be used + * only by one thread at a time, of course, but there can be as many + * iterators for the same lzma_index as needed. + */ +typedef struct lzma_index_s lzma_index; + + +/** + * \brief Iterator to get information about Blocks and Streams + */ +typedef struct { + struct { + /** + * \brief Pointer to Stream Flags + * + * This is NULL if Stream Flags have not been set for + * this Stream with lzma_index_stream_flags(). + */ + const lzma_stream_flags *flags; + + /** \private Reserved member. */ + const void *reserved_ptr1; + + /** \private Reserved member. */ + const void *reserved_ptr2; + + /** \private Reserved member. */ + const void *reserved_ptr3; + + /** + * \brief Stream number in the lzma_index + * + * The first Stream is 1. + */ + lzma_vli number; + + /** + * \brief Number of Blocks in the Stream + * + * If this is zero, the block structure below has + * undefined values. + */ + lzma_vli block_count; + + /** + * \brief Compressed start offset of this Stream + * + * The offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli compressed_offset; + + /** + * \brief Uncompressed start offset of this Stream + * + * The offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli uncompressed_offset; + + /** + * \brief Compressed size of this Stream + * + * This includes all headers except the possible + * Stream Padding after this Stream. + */ + lzma_vli compressed_size; + + /** + * \brief Uncompressed size of this Stream + */ + lzma_vli uncompressed_size; + + /** + * \brief Size of Stream Padding after this Stream + * + * If it hasn't been set with lzma_index_stream_padding(), + * this defaults to zero. Stream Padding is always + * a multiple of four bytes. + */ + lzma_vli padding; + + + /** \private Reserved member. */ + lzma_vli reserved_vli1; + + /** \private Reserved member. */ + lzma_vli reserved_vli2; + + /** \private Reserved member. */ + lzma_vli reserved_vli3; + + /** \private Reserved member. */ + lzma_vli reserved_vli4; + } stream; + + struct { + /** + * \brief Block number in the file + * + * The first Block is 1. + */ + lzma_vli number_in_file; + + /** + * \brief Compressed start offset of this Block + * + * This offset is relative to the beginning of the + * lzma_index (i.e. usually the beginning of the .xz file). + * Normally this is where you should seek in the .xz file + * to start decompressing this Block. + */ + lzma_vli compressed_file_offset; + + /** + * \brief Uncompressed start offset of this Block + * + * This offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + * + * When doing random-access reading, it is possible that + * the target offset is not exactly at Block boundary. One + * will need to compare the target offset against + * uncompressed_file_offset or uncompressed_stream_offset, + * and possibly decode and throw away some amount of data + * before reaching the target offset. + */ + lzma_vli uncompressed_file_offset; + + /** + * \brief Block number in this Stream + * + * The first Block is 1. + */ + lzma_vli number_in_stream; + + /** + * \brief Compressed start offset of this Block + * + * This offset is relative to the beginning of the Stream + * containing this Block. + */ + lzma_vli compressed_stream_offset; + + /** + * \brief Uncompressed start offset of this Block + * + * This offset is relative to the beginning of the Stream + * containing this Block. + */ + lzma_vli uncompressed_stream_offset; + + /** + * \brief Uncompressed size of this Block + * + * You should pass this to the Block decoder if you will + * decode this Block. It will allow the Block decoder to + * validate the uncompressed size. + */ + lzma_vli uncompressed_size; + + /** + * \brief Unpadded size of this Block + * + * You should pass this to the Block decoder if you will + * decode this Block. It will allow the Block decoder to + * validate the unpadded size. + */ + lzma_vli unpadded_size; + + /** + * \brief Total compressed size + * + * This includes all headers and padding in this Block. + * This is useful if you need to know how many bytes + * the Block decoder will actually read. + */ + lzma_vli total_size; + + /** \private Reserved member. */ + lzma_vli reserved_vli1; + + /** \private Reserved member. */ + lzma_vli reserved_vli2; + + /** \private Reserved member. */ + lzma_vli reserved_vli3; + + /** \private Reserved member. */ + lzma_vli reserved_vli4; + + /** \private Reserved member. */ + const void *reserved_ptr1; + + /** \private Reserved member. */ + const void *reserved_ptr2; + + /** \private Reserved member. */ + const void *reserved_ptr3; + + /** \private Reserved member. */ + const void *reserved_ptr4; + } block; + + /** + * \private Internal data + * + * Internal data which is used to store the state of the iterator. + * The exact format may vary between liblzma versions, so don't + * touch these in any way. + */ + union { + /** \private Internal member. */ + const void *p; + + /** \private Internal member. */ + size_t s; + + /** \private Internal member. */ + lzma_vli v; + } internal[6]; +} lzma_index_iter; + + +/** + * \brief Operation mode for lzma_index_iter_next() + */ +typedef enum { + LZMA_INDEX_ITER_ANY = 0, + /**< + * \brief Get the next Block or Stream + * + * Go to the next Block if the current Stream has at least + * one Block left. Otherwise go to the next Stream even if + * it has no Blocks. If the Stream has no Blocks + * (lzma_index_iter.stream.block_count == 0), + * lzma_index_iter.block will have undefined values. + */ + + LZMA_INDEX_ITER_STREAM = 1, + /**< + * \brief Get the next Stream + * + * Go to the next Stream even if the current Stream has + * unread Blocks left. If the next Stream has at least one + * Block, the iterator will point to the first Block. + * If there are no Blocks, lzma_index_iter.block will have + * undefined values. + */ + + LZMA_INDEX_ITER_BLOCK = 2, + /**< + * \brief Get the next Block + * + * Go to the next Block if the current Stream has at least + * one Block left. If the current Stream has no Blocks left, + * the next Stream with at least one Block is located and + * the iterator will be made to point to the first Block of + * that Stream. + */ + + LZMA_INDEX_ITER_NONEMPTY_BLOCK = 3 + /**< + * \brief Get the next non-empty Block + * + * This is like LZMA_INDEX_ITER_BLOCK except that it will + * skip Blocks whose Uncompressed Size is zero. + */ + +} lzma_index_iter_mode; + + +/** + * \brief Mask for return value from lzma_index_checks() for check none + * + * \note This and the other CHECK_MASK macros were added in 5.5.1alpha. + */ +#define LZMA_INDEX_CHECK_MASK_NONE (UINT32_C(1) << LZMA_CHECK_NONE) + +/** + * \brief Mask for return value from lzma_index_checks() for check CRC32 + */ +#define LZMA_INDEX_CHECK_MASK_CRC32 (UINT32_C(1) << LZMA_CHECK_CRC32) + +/** + * \brief Mask for return value from lzma_index_checks() for check CRC64 + */ +#define LZMA_INDEX_CHECK_MASK_CRC64 (UINT32_C(1) << LZMA_CHECK_CRC64) + +/** + * \brief Mask for return value from lzma_index_checks() for check SHA256 + */ +#define LZMA_INDEX_CHECK_MASK_SHA256 (UINT32_C(1) << LZMA_CHECK_SHA256) + +/** + * \brief Calculate memory usage of lzma_index + * + * On disk, the size of the Index field depends on both the number of Records + * stored and the size of the Records (due to variable-length integer + * encoding). When the Index is kept in lzma_index structure, the memory usage + * depends only on the number of Records/Blocks stored in the Index(es), and + * in case of concatenated lzma_indexes, the number of Streams. The size in + * RAM is almost always significantly bigger than in the encoded form on disk. + * + * This function calculates an approximate amount of memory needed to hold + * the given number of Streams and Blocks in lzma_index structure. This + * value may vary between CPU architectures and also between liblzma versions + * if the internal implementation is modified. + * + * \param streams Number of Streams + * \param blocks Number of Blocks + * + * \return Approximate memory in bytes needed in a lzma_index structure. + */ +extern LZMA_API(uint64_t) lzma_index_memusage( + lzma_vli streams, lzma_vli blocks) lzma_nothrow; + + +/** + * \brief Calculate the memory usage of an existing lzma_index + * + * This is a shorthand for lzma_index_memusage(lzma_index_stream_count(i), + * lzma_index_block_count(i)). + * + * \param i Pointer to lzma_index structure + * + * \return Approximate memory in bytes used by the lzma_index structure. + */ +extern LZMA_API(uint64_t) lzma_index_memused(const lzma_index *i) + lzma_nothrow; + + +/** + * \brief Allocate and initialize a new lzma_index structure + * + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * + * \return On success, a pointer to an empty initialized lzma_index is + * returned. If allocation fails, NULL is returned. + */ +extern LZMA_API(lzma_index *) lzma_index_init(const lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Deallocate lzma_index + * + * If i is NULL, this does nothing. + * + * \param i Pointer to lzma_index structure to deallocate + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + */ +extern LZMA_API(void) lzma_index_end( + lzma_index *i, const lzma_allocator *allocator) lzma_nothrow; + + +/** + * \brief Add a new Block to lzma_index + * + * \param i Pointer to a lzma_index structure + * \param allocator lzma_allocator for custom allocator + * functions. Set to NULL to use malloc() + * and free(). + * \param unpadded_size Unpadded Size of a Block. This can be + * calculated with lzma_block_unpadded_size() + * after encoding or decoding the Block. + * \param uncompressed_size Uncompressed Size of a Block. This can be + * taken directly from lzma_block structure + * after encoding or decoding the Block. + * + * Appending a new Block does not invalidate iterators. For example, + * if an iterator was pointing to the end of the lzma_index, after + * lzma_index_append() it is possible to read the next Block with + * an existing iterator. + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_append( + lzma_index *i, const lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Set the Stream Flags + * + * Set the Stream Flags of the last (and typically the only) Stream + * in lzma_index. This can be useful when reading information from the + * lzma_index, because to decode Blocks, knowing the integrity check type + * is needed. + * + * \param i Pointer to lzma_index structure + * \param stream_flags Pointer to lzma_stream_flags structure. This + * is copied into the internal preallocated + * structure, so the caller doesn't need to keep + * the flags' data available after calling this + * function. + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_OPTIONS_ERROR: Unsupported stream_flags->version. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_stream_flags( + lzma_index *i, const lzma_stream_flags *stream_flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the types of integrity Checks + * + * If lzma_index_stream_flags() is used to set the Stream Flags for + * every Stream, lzma_index_checks() can be used to get a bitmask to + * indicate which Check types have been used. It can be useful e.g. if + * showing the Check types to the user. + * + * The bitmask is 1 << check_id, e.g. CRC32 is 1 << 1 and SHA-256 is 1 << 10. + * These masks are defined for convenience as LZMA_INDEX_CHECK_MASK_XXX + * + * \param i Pointer to lzma_index structure + * + * \return Bitmask indicating which Check types are used in the lzma_index + */ +extern LZMA_API(uint32_t) lzma_index_checks(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Set the amount of Stream Padding + * + * Set the amount of Stream Padding of the last (and typically the only) + * Stream in the lzma_index. This is needed when planning to do random-access + * reading within multiple concatenated Streams. + * + * By default, the amount of Stream Padding is assumed to be zero bytes. + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_DATA_ERROR: The file size would grow too big. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_stream_padding( + lzma_index *i, lzma_vli stream_padding) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the number of Streams + * + * \param i Pointer to lzma_index structure + * + * \return Number of Streams in the lzma_index + */ +extern LZMA_API(lzma_vli) lzma_index_stream_count(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the number of Blocks + * + * This returns the total number of Blocks in lzma_index. To get number + * of Blocks in individual Streams, use lzma_index_iter. + * + * \param i Pointer to lzma_index structure + * + * \return Number of blocks in the lzma_index + */ +extern LZMA_API(lzma_vli) lzma_index_block_count(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Backward Size field in the Stream Footer. + * + * \param i Pointer to lzma_index structure + * + * \return Size in bytes of the Index + */ +extern LZMA_API(lzma_vli) lzma_index_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the Stream + * + * If multiple lzma_indexes have been combined, this works as if the Blocks + * were in a single Stream. This is useful if you are going to combine + * Blocks from multiple Streams into a single new Stream. + * + * \param i Pointer to lzma_index structure + * + * \return Size in bytes of the Stream (if all Blocks are combined + * into one Stream). + */ +extern LZMA_API(lzma_vli) lzma_index_stream_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the Blocks + * + * This doesn't include the Stream Header, Stream Footer, Stream Padding, + * or Index fields. + * + * \param i Pointer to lzma_index structure + * + * \return Size in bytes of all Blocks in the Stream(s) + */ +extern LZMA_API(lzma_vli) lzma_index_total_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the file + * + * When no lzma_indexes have been combined with lzma_index_cat() and there is + * no Stream Padding, this function is identical to lzma_index_stream_size(). + * If multiple lzma_indexes have been combined, this includes also the headers + * of each separate Stream and the possible Stream Padding fields. + * + * \param i Pointer to lzma_index structure + * + * \return Total size of the .xz file in bytes + */ +extern LZMA_API(lzma_vli) lzma_index_file_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the uncompressed size of the file + * + * \param i Pointer to lzma_index structure + * + * \return Size in bytes of the uncompressed data in the file + */ +extern LZMA_API(lzma_vli) lzma_index_uncompressed_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize an iterator + * + * This function associates the iterator with the given lzma_index, and calls + * lzma_index_iter_rewind() on the iterator. + * + * This function doesn't allocate any memory, thus there is no + * lzma_index_iter_end(). The iterator is valid as long as the + * associated lzma_index is valid, that is, until lzma_index_end() or + * using it as source in lzma_index_cat(). Specifically, lzma_index doesn't + * become invalid if new Blocks are added to it with lzma_index_append() or + * if it is used as the destination in lzma_index_cat(). + * + * It is safe to make copies of an initialized lzma_index_iter, for example, + * to easily restart reading at some particular position. + * + * \param iter Pointer to a lzma_index_iter structure + * \param i lzma_index to which the iterator will be associated + */ +extern LZMA_API(void) lzma_index_iter_init( + lzma_index_iter *iter, const lzma_index *i) lzma_nothrow; + + +/** + * \brief Rewind the iterator + * + * Rewind the iterator so that next call to lzma_index_iter_next() will + * return the first Block or Stream. + * + * \param iter Pointer to a lzma_index_iter structure + */ +extern LZMA_API(void) lzma_index_iter_rewind(lzma_index_iter *iter) + lzma_nothrow; + + +/** + * \brief Get the next Block or Stream + * + * \param iter Iterator initialized with lzma_index_iter_init() + * \param mode Specify what kind of information the caller wants + * to get. See lzma_index_iter_mode for details. + * + * \return lzma_bool: + * - true if no Block or Stream matching the mode is found. + * *iter is not updated (failure). + * - false if the next Block or Stream matching the mode was + * found. *iter is updated (success). + */ +extern LZMA_API(lzma_bool) lzma_index_iter_next( + lzma_index_iter *iter, lzma_index_iter_mode mode) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Locate a Block + * + * If it is possible to seek in the .xz file, it is possible to parse + * the Index field(s) and use lzma_index_iter_locate() to do random-access + * reading with granularity of Block size. + * + * If the target is smaller than the uncompressed size of the Stream (can be + * checked with lzma_index_uncompressed_size()): + * - Information about the Stream and Block containing the requested + * uncompressed offset is stored into *iter. + * - Internal state of the iterator is adjusted so that + * lzma_index_iter_next() can be used to read subsequent Blocks or Streams. + * + * If the target is greater than the uncompressed size of the Stream, *iter + * is not modified. + * + * \param iter Iterator that was earlier initialized with + * lzma_index_iter_init(). + * \param target Uncompressed target offset which the caller would + * like to locate from the Stream + * + * \return lzma_bool: + * - true if the target is greater than or equal to the + * uncompressed size of the Stream (failure) + * - false if the target is smaller than the uncompressed size + * of the Stream (success) + */ +extern LZMA_API(lzma_bool) lzma_index_iter_locate( + lzma_index_iter *iter, lzma_vli target) lzma_nothrow; + + +/** + * \brief Concatenate lzma_indexes + * + * Concatenating lzma_indexes is useful when doing random-access reading in + * multi-Stream .xz file, or when combining multiple Streams into single + * Stream. + * + * \param[out] dest lzma_index after which src is appended + * \param src lzma_index to be appended after dest. If this + * function succeeds, the memory allocated for src + * is freed or moved to be part of dest, and all + * iterators pointing to src will become invalid. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * + * \return Possible lzma_ret values: + * - LZMA_OK: lzma_indexes were concatenated successfully. + * src is now a dangling pointer. + * - LZMA_DATA_ERROR: *dest would grow too big. + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_cat(lzma_index *dest, lzma_index *src, + const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Duplicate lzma_index + * + * \param i Pointer to lzma_index structure to be duplicated + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * + * \return A copy of the lzma_index, or NULL if memory allocation failed. + */ +extern LZMA_API(lzma_index *) lzma_index_dup( + const lzma_index *i, const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Index encoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param i Pointer to lzma_index which should be encoded. + * + * The valid 'action' values for lzma_code() are LZMA_RUN and LZMA_FINISH. + * It is enough to use only one of them (you can choose freely). + * + * \return Possible lzma_ret values: + * - LZMA_OK: Initialization succeeded, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_encoder( + lzma_stream *strm, const lzma_index *i) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Index decoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param[out] i The decoded Index will be made available via + * this pointer. Initially this function will + * set *i to NULL (the old value is ignored). If + * decoding succeeds (lzma_code() returns + * LZMA_STREAM_END), *i will be set to point + * to a new lzma_index, which the application + * has to later free with lzma_index_end(). + * \param memlimit How much memory the resulting lzma_index is + * allowed to require. liblzma 5.2.3 and earlier + * don't allow 0 here and return LZMA_PROG_ERROR; + * later versions treat 0 as if 1 had been specified. + * + * Valid 'action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. + * There is no need to use LZMA_FINISH, but it's allowed because it may + * simplify certain types of applications. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Initialization succeeded, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + * + * \note liblzma 5.2.3 and older list also LZMA_MEMLIMIT_ERROR here + * but that error code has never been possible from this + * initialization function. + */ +extern LZMA_API(lzma_ret) lzma_index_decoder( + lzma_stream *strm, lzma_index **i, uint64_t memlimit) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Index encoder + * + * \note This function doesn't take allocator argument since all + * the internal data is allocated on stack. + * + * \param i lzma_index to be encoded + * \param[out] out Beginning of the output buffer + * \param[out] out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Output buffer is too small. Use + * lzma_index_size() to find out how much output + * space is needed. + * - LZMA_PROG_ERROR + * + */ +extern LZMA_API(lzma_ret) lzma_index_buffer_encode(const lzma_index *i, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Single-call .xz Index decoder + * + * \param[out] i If decoding succeeds, *i will point to a new + * lzma_index, which the application has to + * later free with lzma_index_end(). If an error + * occurs, *i will be NULL. The old value of *i + * is always ignored and thus doesn't need to be + * initialized by the caller. + * \param[out] memlimit Pointer to how much memory the resulting + * lzma_index is allowed to require. The value + * pointed by this pointer is modified if and only + * if LZMA_MEMLIMIT_ERROR is returned. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Decoding was successful. + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. + * The minimum required memlimit value was stored to *memlimit. + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i, + uint64_t *memlimit, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow; + + +/** + * \brief Initialize a .xz file information decoder + * + * This decoder decodes the Stream Header, Stream Footer, Index, and + * Stream Padding field(s) from the input .xz file and stores the resulting + * combined index in *dest_index. This information can be used to get the + * uncompressed file size with lzma_index_uncompressed_size(*dest_index) or, + * for example, to implement random access reading by locating the Blocks + * in the Streams. + * + * To get the required information from the .xz file, lzma_code() may ask + * the application to seek in the input file by returning LZMA_SEEK_NEEDED + * and having the target file position specified in lzma_stream.seek_pos. + * The number of seeks required depends on the input file and how big buffers + * the application provides. When possible, the decoder will seek backward + * and forward in the given buffer to avoid useless seek requests. Thus, if + * the application provides the whole file at once, no external seeking will + * be required (that is, lzma_code() won't return LZMA_SEEK_NEEDED). + * + * The value in lzma_stream.total_in can be used to estimate how much data + * liblzma had to read to get the file information. However, due to seeking + * and the way total_in is updated, the value of total_in will be somewhat + * inaccurate (a little too big). Thus, total_in is a good estimate but don't + * expect to see the same exact value for the same file if you change the + * input buffer size or switch to a different liblzma version. + * + * Valid 'action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. + * You only need to use LZMA_RUN; LZMA_FINISH is only supported because it + * might be convenient for some applications. If you use LZMA_FINISH and if + * lzma_code() asks the application to seek, remember to reset 'action' back + * to LZMA_RUN unless you hit the end of the file again. + * + * Possible return values from lzma_code(): + * - LZMA_OK: All OK so far, more input needed + * - LZMA_SEEK_NEEDED: Provide more input starting from the absolute + * file position strm->seek_pos + * - LZMA_STREAM_END: Decoding was successful, *dest_index has been set + * - LZMA_FORMAT_ERROR: The input file is not in the .xz format (the + * expected magic bytes were not found from the beginning of the file) + * - LZMA_OPTIONS_ERROR: File looks valid but contains headers that aren't + * supported by this version of liblzma + * - LZMA_DATA_ERROR: File is corrupt + * - LZMA_BUF_ERROR + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR + * - LZMA_PROG_ERROR + * + * \param strm Pointer to a properly prepared lzma_stream + * \param[out] dest_index Pointer to a pointer where the decoder will put + * the decoded lzma_index. The old value + * of *dest_index is ignored (not freed). + * \param memlimit How much memory the resulting lzma_index is + * allowed to require. Use UINT64_MAX to + * effectively disable the limiter. + * \param file_size Size of the input .xz file + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_file_info_decoder( + lzma_stream *strm, lzma_index **dest_index, + uint64_t memlimit, uint64_t file_size) + lzma_nothrow; diff --git a/src/native/external/xz/src/liblzma/api/lzma/index_hash.h b/src/native/external/xz/src/liblzma/api/lzma/index_hash.h new file mode 100644 index 00000000000000..68f9024eb3bc66 --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/index_hash.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/index_hash.h + * \brief Validate Index by using a hash function + * \note Never include this file directly. Use instead. + * + * Hashing makes it possible to use constant amount of memory to validate + * Index of arbitrary size. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + +/** + * \brief Opaque data type to hold the Index hash + */ +typedef struct lzma_index_hash_s lzma_index_hash; + + +/** + * \brief Allocate and initialize a new lzma_index_hash structure + * + * If index_hash is NULL, this function allocates and initializes a new + * lzma_index_hash structure and returns a pointer to it. If allocation + * fails, NULL is returned. + * + * If index_hash is non-NULL, this function reinitializes the lzma_index_hash + * structure and returns the same pointer. In this case, return value cannot + * be NULL or a different pointer than the index_hash that was given as + * an argument. + * + * \param index_hash Pointer to a lzma_index_hash structure or NULL. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * + * \return Initialized lzma_index_hash structure on success or + * NULL on failure. + */ +extern LZMA_API(lzma_index_hash *) lzma_index_hash_init( + lzma_index_hash *index_hash, const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Deallocate lzma_index_hash structure + * + * \param index_hash Pointer to a lzma_index_hash structure to free. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + */ +extern LZMA_API(void) lzma_index_hash_end( + lzma_index_hash *index_hash, const lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Add a new Record to an Index hash + * + * \param index_hash Pointer to a lzma_index_hash structure + * \param unpadded_size Unpadded Size of a Block + * \param uncompressed_size Uncompressed Size of a Block + * + * \return Possible lzma_ret values: + * - LZMA_OK + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR: Invalid arguments or this function is being + * used when lzma_index_hash_decode() has already been used. + */ +extern LZMA_API(lzma_ret) lzma_index_hash_append(lzma_index_hash *index_hash, + lzma_vli unpadded_size, lzma_vli uncompressed_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode and validate the Index field + * + * After telling the sizes of all Blocks with lzma_index_hash_append(), + * the actual Index field is decoded with this function. Specifically, + * once decoding of the Index field has been started, no more Records + * can be added using lzma_index_hash_append(). + * + * This function doesn't use lzma_stream structure to pass the input data. + * Instead, the input buffer is specified using three arguments. This is + * because it matches better the internal APIs of liblzma. + * + * \param index_hash Pointer to a lzma_index_hash structure + * \param in Pointer to the beginning of the input buffer + * \param[out] in_pos in[*in_pos] is the next byte to process + * \param in_size in[in_size] is the first byte not to process + * + * \return Possible lzma_ret values: + * - LZMA_OK: So far good, but more input is needed. + * - LZMA_STREAM_END: Index decoded successfully and it matches + * the Records given with lzma_index_hash_append(). + * - LZMA_DATA_ERROR: Index is corrupt or doesn't match the + * information given with lzma_index_hash_append(). + * - LZMA_BUF_ERROR: Cannot progress because *in_pos >= in_size. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_hash_decode(lzma_index_hash *index_hash, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Backward Size field in the Stream Footer. + * + * \param index_hash Pointer to a lzma_index_hash structure + * + * \return Size of the Index field in bytes. + */ +extern LZMA_API(lzma_vli) lzma_index_hash_size( + const lzma_index_hash *index_hash) + lzma_nothrow lzma_attr_pure; diff --git a/src/native/external/xz/src/liblzma/api/lzma/lzma12.h b/src/native/external/xz/src/liblzma/api/lzma/lzma12.h new file mode 100644 index 00000000000000..fec3e0dadb2332 --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/lzma12.h @@ -0,0 +1,568 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/lzma12.h + * \brief LZMA1 and LZMA2 filters + * \note Never include this file directly. Use instead. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief LZMA1 Filter ID (for raw encoder/decoder only, not in .xz) + * + * LZMA1 is the very same thing as what was called just LZMA in LZMA Utils, + * 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from + * accidentally using LZMA when they actually want LZMA2. + */ +#define LZMA_FILTER_LZMA1 LZMA_VLI_C(0x4000000000000001) + +/** + * \brief LZMA1 Filter ID with extended options (for raw encoder/decoder) + * + * This is like LZMA_FILTER_LZMA1 but with this ID a few extra options + * are supported in the lzma_options_lzma structure: + * + * - A flag to tell the encoder if the end of payload marker (EOPM) alias + * end of stream (EOS) marker must be written at the end of the stream. + * In contrast, LZMA_FILTER_LZMA1 always writes the end marker. + * + * - Decoder needs to be told the uncompressed size of the stream + * or that it is unknown (using the special value UINT64_MAX). + * If the size is known, a flag can be set to allow the presence of + * the end marker anyway. In contrast, LZMA_FILTER_LZMA1 always + * behaves as if the uncompressed size was unknown. + * + * This allows handling file formats where LZMA1 streams are used but where + * the end marker isn't allowed or where it might not (always) be present. + * This extended LZMA1 functionality is provided as a Filter ID for raw + * encoder and decoder instead of adding new encoder and decoder initialization + * functions because this way it is possible to also use extra filters, + * for example, LZMA_FILTER_X86 in a filter chain with LZMA_FILTER_LZMA1EXT, + * which might be needed to handle some file formats. + */ +#define LZMA_FILTER_LZMA1EXT LZMA_VLI_C(0x4000000000000002) + +/** + * \brief LZMA2 Filter ID + * + * Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds + * support for LZMA_SYNC_FLUSH, uncompressed chunks (smaller expansion + * when trying to compress incompressible data), possibility to change + * lc/lp/pb in the middle of encoding, and some other internal improvements. + */ +#define LZMA_FILTER_LZMA2 LZMA_VLI_C(0x21) + + +/** + * \brief Match finders + * + * Match finder has major effect on both speed and compression ratio. + * Usually hash chains are faster than binary trees. + * + * If you will use LZMA_SYNC_FLUSH often, the hash chains may be a better + * choice, because binary trees get much higher compression ratio penalty + * with LZMA_SYNC_FLUSH. + * + * The memory usage formulas are only rough estimates, which are closest to + * reality when dict_size is a power of two. The formulas are more complex + * in reality, and can also change a little between liblzma versions. Use + * lzma_raw_encoder_memusage() to get more accurate estimate of memory usage. + */ +typedef enum { + LZMA_MF_HC3 = 0x03, + /**< + * \brief Hash Chain with 2- and 3-byte hashing + * + * Minimum nice_len: 3 + * + * Memory usage: + * - dict_size <= 16 MiB: dict_size * 7.5 + * - dict_size > 16 MiB: dict_size * 5.5 + 64 MiB + */ + + LZMA_MF_HC4 = 0x04, + /**< + * \brief Hash Chain with 2-, 3-, and 4-byte hashing + * + * Minimum nice_len: 4 + * + * Memory usage: + * - dict_size <= 32 MiB: dict_size * 7.5 + * - dict_size > 32 MiB: dict_size * 6.5 + */ + + LZMA_MF_BT2 = 0x12, + /**< + * \brief Binary Tree with 2-byte hashing + * + * Minimum nice_len: 2 + * + * Memory usage: dict_size * 9.5 + */ + + LZMA_MF_BT3 = 0x13, + /**< + * \brief Binary Tree with 2- and 3-byte hashing + * + * Minimum nice_len: 3 + * + * Memory usage: + * - dict_size <= 16 MiB: dict_size * 11.5 + * - dict_size > 16 MiB: dict_size * 9.5 + 64 MiB + */ + + LZMA_MF_BT4 = 0x14 + /**< + * \brief Binary Tree with 2-, 3-, and 4-byte hashing + * + * Minimum nice_len: 4 + * + * Memory usage: + * - dict_size <= 32 MiB: dict_size * 11.5 + * - dict_size > 32 MiB: dict_size * 10.5 + */ +} lzma_match_finder; + + +/** + * \brief Test if given match finder is supported + * + * It is safe to call this with a value that isn't listed in + * lzma_match_finder enumeration; the return value will be false. + * + * There is no way to list which match finders are available in this + * particular liblzma version and build. It would be useless, because + * a new match finder, which the application developer wasn't aware, + * could require giving additional options to the encoder that the older + * match finders don't need. + * + * \param match_finder Match finder ID + * + * \return lzma_bool: + * - true if the match finder is supported by this liblzma build. + * - false otherwise. + */ +extern LZMA_API(lzma_bool) lzma_mf_is_supported(lzma_match_finder match_finder) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Compression modes + * + * This selects the function used to analyze the data produced by the match + * finder. + */ +typedef enum { + LZMA_MODE_FAST = 1, + /**< + * \brief Fast compression + * + * Fast mode is usually at its best when combined with + * a hash chain match finder. + */ + + LZMA_MODE_NORMAL = 2 + /**< + * \brief Normal compression + * + * This is usually notably slower than fast mode. Use this + * together with binary tree match finders to expose the + * full potential of the LZMA1 or LZMA2 encoder. + */ +} lzma_mode; + + +/** + * \brief Test if given compression mode is supported + * + * It is safe to call this with a value that isn't listed in lzma_mode + * enumeration; the return value will be false. + * + * There is no way to list which modes are available in this particular + * liblzma version and build. It would be useless, because a new compression + * mode, which the application developer wasn't aware, could require giving + * additional options to the encoder that the older modes don't need. + * + * \param mode Mode ID. + * + * \return lzma_bool: + * - true if the compression mode is supported by this liblzma + * build. + * - false otherwise. + */ +extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Options specific to the LZMA1 and LZMA2 filters + * + * Since LZMA1 and LZMA2 share most of the code, it's simplest to share + * the options structure too. For encoding, all but the reserved variables + * need to be initialized unless specifically mentioned otherwise. + * lzma_lzma_preset() can be used to get a good starting point. + * + * For raw decoding, both LZMA1 and LZMA2 need dict_size, preset_dict, and + * preset_dict_size (if preset_dict != NULL). LZMA1 needs also lc, lp, and pb. + */ +typedef struct { + /** + * \brief Dictionary size in bytes + * + * Dictionary size indicates how many bytes of the recently processed + * uncompressed data is kept in memory. One method to reduce size of + * the uncompressed data is to store distance-length pairs, which + * indicate what data to repeat from the dictionary buffer. Thus, + * the bigger the dictionary, the better the compression ratio + * usually is. + * + * Maximum size of the dictionary depends on multiple things: + * - Memory usage limit + * - Available address space (not a problem on 64-bit systems) + * - Selected match finder (encoder only) + * + * Currently the maximum dictionary size for encoding is 1.5 GiB + * (i.e. (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) even on 64-bit + * systems for certain match finder implementation reasons. In the + * future, there may be match finders that support bigger + * dictionaries. + * + * Decoder already supports dictionaries up to 4 GiB - 1 B (i.e. + * UINT32_MAX), so increasing the maximum dictionary size of the + * encoder won't cause problems for old decoders. + * + * Because extremely small dictionaries sizes would have unneeded + * overhead in the decoder, the minimum dictionary size is 4096 bytes. + * + * \note When decoding, too big dictionary does no other harm + * than wasting memory. + */ + uint32_t dict_size; +# define LZMA_DICT_SIZE_MIN UINT32_C(4096) +# define LZMA_DICT_SIZE_DEFAULT (UINT32_C(1) << 23) + + /** + * \brief Pointer to an initial dictionary + * + * It is possible to initialize the LZ77 history window using + * a preset dictionary. It is useful when compressing many + * similar, relatively small chunks of data independently from + * each other. The preset dictionary should contain typical + * strings that occur in the files being compressed. The most + * probable strings should be near the end of the preset dictionary. + * + * This feature should be used only in special situations. For + * now, it works correctly only with raw encoding and decoding. + * Currently none of the container formats supported by + * liblzma allow preset dictionary when decoding, thus if + * you create a .xz or .lzma file with preset dictionary, it + * cannot be decoded with the regular decoder functions. In the + * future, the .xz format will likely get support for preset + * dictionary though. + */ + const uint8_t *preset_dict; + + /** + * \brief Size of the preset dictionary + * + * Specifies the size of the preset dictionary. If the size is + * bigger than dict_size, only the last dict_size bytes are + * processed. + * + * This variable is read only when preset_dict is not NULL. + * If preset_dict is not NULL but preset_dict_size is zero, + * no preset dictionary is used (identical to only setting + * preset_dict to NULL). + */ + uint32_t preset_dict_size; + + /** + * \brief Number of literal context bits + * + * How many of the highest bits of the previous uncompressed + * eight-bit byte (also known as 'literal') are taken into + * account when predicting the bits of the next literal. + * + * E.g. in typical English text, an upper-case letter is + * often followed by a lower-case letter, and a lower-case + * letter is usually followed by another lower-case letter. + * In the US-ASCII character set, the highest three bits are 010 + * for upper-case letters and 011 for lower-case letters. + * When lc is at least 3, the literal coding can take advantage of + * this property in the uncompressed data. + * + * There is a limit that applies to literal context bits and literal + * position bits together: lc + lp <= 4. Without this limit the + * decoding could become very slow, which could have security related + * results in some cases like email servers doing virus scanning. + * This limit also simplifies the internal implementation in liblzma. + * + * There may be LZMA1 streams that have lc + lp > 4 (maximum possible + * lc would be 8). It is not possible to decode such streams with + * liblzma. + */ + uint32_t lc; +# define LZMA_LCLP_MIN 0 +# define LZMA_LCLP_MAX 4 +# define LZMA_LC_DEFAULT 3 + + /** + * \brief Number of literal position bits + * + * lp affects what kind of alignment in the uncompressed data is + * assumed when encoding literals. A literal is a single 8-bit byte. + * See pb below for more information about alignment. + */ + uint32_t lp; +# define LZMA_LP_DEFAULT 0 + + /** + * \brief Number of position bits + * + * pb affects what kind of alignment in the uncompressed data is + * assumed in general. The default means four-byte alignment + * (2^ pb =2^2=4), which is often a good choice when there's + * no better guess. + * + * When the alignment is known, setting pb accordingly may reduce + * the file size a little. E.g. with text files having one-byte + * alignment (US-ASCII, ISO-8859-*, UTF-8), setting pb=0 can + * improve compression slightly. For UTF-16 text, pb=1 is a good + * choice. If the alignment is an odd number like 3 bytes, pb=0 + * might be the best choice. + * + * Even though the assumed alignment can be adjusted with pb and + * lp, LZMA1 and LZMA2 still slightly favor 16-byte alignment. + * It might be worth taking into account when designing file formats + * that are likely to be often compressed with LZMA1 or LZMA2. + */ + uint32_t pb; +# define LZMA_PB_MIN 0 +# define LZMA_PB_MAX 4 +# define LZMA_PB_DEFAULT 2 + + /** Compression mode */ + lzma_mode mode; + + /** + * \brief Nice length of a match + * + * This determines how many bytes the encoder compares from the match + * candidates when looking for the best match. Once a match of at + * least nice_len bytes long is found, the encoder stops looking for + * better candidates and encodes the match. (Naturally, if the found + * match is actually longer than nice_len, the actual length is + * encoded; it's not truncated to nice_len.) + * + * Bigger values usually increase the compression ratio and + * compression time. For most files, 32 to 128 is a good value, + * which gives very good compression ratio at good speed. + * + * The exact minimum value depends on the match finder. The maximum + * is 273, which is the maximum length of a match that LZMA1 and + * LZMA2 can encode. + */ + uint32_t nice_len; + + /** Match finder ID */ + lzma_match_finder mf; + + /** + * \brief Maximum search depth in the match finder + * + * For every input byte, match finder searches through the hash chain + * or binary tree in a loop, each iteration going one step deeper in + * the chain or tree. The searching stops if + * - a match of at least nice_len bytes long is found; + * - all match candidates from the hash chain or binary tree have + * been checked; or + * - maximum search depth is reached. + * + * Maximum search depth is needed to prevent the match finder from + * wasting too much time in case there are lots of short match + * candidates. On the other hand, stopping the search before all + * candidates have been checked can reduce compression ratio. + * + * Setting depth to zero tells liblzma to use an automatic default + * value, that depends on the selected match finder and nice_len. + * The default is in the range [4, 200] or so (it may vary between + * liblzma versions). + * + * Using a bigger depth value than the default can increase + * compression ratio in some cases. There is no strict maximum value, + * but high values (thousands or millions) should be used with care: + * the encoder could remain fast enough with typical input, but + * malicious input could cause the match finder to slow down + * dramatically, possibly creating a denial of service attack. + */ + uint32_t depth; + + /** + * \brief For LZMA_FILTER_LZMA1EXT: Extended flags + * + * This is used only with LZMA_FILTER_LZMA1EXT. + * + * Currently only one flag is supported, LZMA_LZMA1EXT_ALLOW_EOPM: + * + * - Encoder: If the flag is set, then end marker is written just + * like it is with LZMA_FILTER_LZMA1. Without this flag the + * end marker isn't written and the application has to store + * the uncompressed size somewhere outside the compressed stream. + * To decompress streams without the end marker, the application + * has to set the correct uncompressed size in ext_size_low and + * ext_size_high. + * + * - Decoder: If the uncompressed size in ext_size_low and + * ext_size_high is set to the special value UINT64_MAX + * (indicating unknown uncompressed size) then this flag is + * ignored and the end marker must always be present, that is, + * the behavior is identical to LZMA_FILTER_LZMA1. + * + * Otherwise, if this flag isn't set, then the input stream + * must not have the end marker; if the end marker is detected + * then it will result in LZMA_DATA_ERROR. This is useful when + * it is known that the stream must not have the end marker and + * strict validation is wanted. + * + * If this flag is set, then it is autodetected if the end marker + * is present after the specified number of uncompressed bytes + * has been decompressed (ext_size_low and ext_size_high). The + * end marker isn't allowed in any other position. This behavior + * is useful when uncompressed size is known but the end marker + * may or may not be present. This is the case, for example, + * in .7z files (valid .7z files that have the end marker in + * LZMA1 streams are rare but they do exist). + */ + uint32_t ext_flags; +# define LZMA_LZMA1EXT_ALLOW_EOPM UINT32_C(0x01) + + /** + * \brief For LZMA_FILTER_LZMA1EXT: Uncompressed size (low bits) + * + * The 64-bit uncompressed size is needed for decompression with + * LZMA_FILTER_LZMA1EXT. The size is ignored by the encoder. + * + * The special value UINT64_MAX indicates that the uncompressed size + * is unknown and that the end of payload marker (also known as + * end of stream marker) must be present to indicate the end of + * the LZMA1 stream. Any other value indicates the expected + * uncompressed size of the LZMA1 stream. (If LZMA1 was used together + * with filters that change the size of the data then the uncompressed + * size of the LZMA1 stream could be different than the final + * uncompressed size of the filtered stream.) + * + * ext_size_low holds the least significant 32 bits of the + * uncompressed size. The most significant 32 bits must be set + * in ext_size_high. The macro lzma_set_ext_size(opt_lzma, u64size) + * can be used to set these members. + * + * The 64-bit uncompressed size is split into two uint32_t variables + * because there were no reserved uint64_t members and using the + * same options structure for LZMA_FILTER_LZMA1, LZMA_FILTER_LZMA1EXT, + * and LZMA_FILTER_LZMA2 was otherwise more convenient than having + * a new options structure for LZMA_FILTER_LZMA1EXT. (Replacing two + * uint32_t members with one uint64_t changes the ABI on some systems + * as the alignment of this struct can increase from 4 bytes to 8.) + */ + uint32_t ext_size_low; + + /** + * \brief For LZMA_FILTER_LZMA1EXT: Uncompressed size (high bits) + * + * This holds the most significant 32 bits of the uncompressed size. + */ + uint32_t ext_size_high; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + + /** \private Reserved member. */ + uint32_t reserved_int4; + + /** \private Reserved member. */ + uint32_t reserved_int5; + + /** \private Reserved member. */ + uint32_t reserved_int6; + + /** \private Reserved member. */ + uint32_t reserved_int7; + + /** \private Reserved member. */ + uint32_t reserved_int8; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum1; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum2; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum3; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum4; + + /** \private Reserved member. */ + void *reserved_ptr1; + + /** \private Reserved member. */ + void *reserved_ptr2; + +} lzma_options_lzma; + + +/** + * \brief Macro to set the 64-bit uncompressed size in ext_size_* + * + * This might be convenient when decoding using LZMA_FILTER_LZMA1EXT. + * This isn't used with LZMA_FILTER_LZMA1 or LZMA_FILTER_LZMA2. + */ +#define lzma_set_ext_size(opt_lzma2, u64size) \ +do { \ + (opt_lzma2).ext_size_low = (uint32_t)(u64size); \ + (opt_lzma2).ext_size_high = (uint32_t)((uint64_t)(u64size) >> 32); \ +} while (0) + + +/** + * \brief Set a compression preset to lzma_options_lzma structure + * + * 0 is the fastest and 9 is the slowest. These match the switches -0 .. -9 + * of the xz command line tool. In addition, it is possible to bitwise-or + * flags to the preset. Currently only LZMA_PRESET_EXTREME is supported. + * The flags are defined in container.h, because the flags are used also + * with lzma_easy_encoder(). + * + * The preset levels are subject to changes between liblzma versions. + * + * This function is available only if LZMA1 or LZMA2 encoder has been enabled + * when building liblzma. + * + * If features (like certain match finders) have been disabled at build time, + * then the function may return success (false) even though the resulting + * LZMA1/LZMA2 options may not be usable for encoder initialization + * (LZMA_OPTIONS_ERROR). + * + * \param[out] options Pointer to LZMA1 or LZMA2 options to be filled + * \param preset Preset level bitwse-ORed with preset flags + * + * \return lzma_bool: + * - true if the preset is not supported (failure). + * - false otherwise (success). + */ +extern LZMA_API(lzma_bool) lzma_lzma_preset( + lzma_options_lzma *options, uint32_t preset) lzma_nothrow; diff --git a/src/native/external/xz/src/liblzma/api/lzma/stream_flags.h b/src/native/external/xz/src/liblzma/api/lzma/stream_flags.h new file mode 100644 index 00000000000000..a33fe46837602a --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/stream_flags.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/stream_flags.h + * \brief .xz Stream Header and Stream Footer encoder and decoder + * \note Never include this file directly. Use instead. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Size of Stream Header and Stream Footer + * + * Stream Header and Stream Footer have the same size and they are not + * going to change even if a newer version of the .xz file format is + * developed in future. + */ +#define LZMA_STREAM_HEADER_SIZE 12 + + +/** + * \brief Options for encoding/decoding Stream Header and Stream Footer + */ +typedef struct { + /** + * \brief Stream Flags format version + * + * To prevent API and ABI breakages if new features are needed in + * Stream Header or Stream Footer, a version number is used to + * indicate which members in this structure are in use. For now, + * version must always be zero. With non-zero version, the + * lzma_stream_header_encode() and lzma_stream_footer_encode() + * will return LZMA_OPTIONS_ERROR. + * + * lzma_stream_header_decode() and lzma_stream_footer_decode() + * will always set this to the lowest value that supports all the + * features indicated by the Stream Flags field. The application + * must check that the version number set by the decoding functions + * is supported by the application. Otherwise it is possible that + * the application will decode the Stream incorrectly. + */ + uint32_t version; + + /** + * \brief Backward Size + * + * Backward Size must be a multiple of four bytes. In this Stream + * format version, Backward Size is the size of the Index field. + * + * Backward Size isn't actually part of the Stream Flags field, but + * it is convenient to include in this structure anyway. Backward + * Size is present only in the Stream Footer. There is no need to + * initialize backward_size when encoding Stream Header. + * + * lzma_stream_header_decode() always sets backward_size to + * LZMA_VLI_UNKNOWN so that it is convenient to use + * lzma_stream_flags_compare() when both Stream Header and Stream + * Footer have been decoded. + */ + lzma_vli backward_size; + + /** + * \brief Minimum value for lzma_stream_flags.backward_size + */ +# define LZMA_BACKWARD_SIZE_MIN 4 + + /** + * \brief Maximum value for lzma_stream_flags.backward_size + */ +# define LZMA_BACKWARD_SIZE_MAX (LZMA_VLI_C(1) << 34) + + /** + * \brief Check ID + * + * This indicates the type of the integrity check calculated from + * uncompressed data. + */ + lzma_check check; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the + * names of these variables may change. + * + * (We will never be able to use all of these since Stream Flags + * is just two bytes plus Backward Size of four bytes. But it's + * nice to have the proper types when they are needed.) + */ + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum1; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum2; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum3; + + /** \private Reserved member. */ + lzma_reserved_enum reserved_enum4; + + /** \private Reserved member. */ + lzma_bool reserved_bool1; + + /** \private Reserved member. */ + lzma_bool reserved_bool2; + + /** \private Reserved member. */ + lzma_bool reserved_bool3; + + /** \private Reserved member. */ + lzma_bool reserved_bool4; + + /** \private Reserved member. */ + lzma_bool reserved_bool5; + + /** \private Reserved member. */ + lzma_bool reserved_bool6; + + /** \private Reserved member. */ + lzma_bool reserved_bool7; + + /** \private Reserved member. */ + lzma_bool reserved_bool8; + + /** \private Reserved member. */ + uint32_t reserved_int1; + + /** \private Reserved member. */ + uint32_t reserved_int2; + +} lzma_stream_flags; + + +/** + * \brief Encode Stream Header + * + * \param options Stream Header options to be encoded. + * options->backward_size is ignored and doesn't + * need to be initialized. + * \param[out] out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: options->version is not supported by + * this liblzma version. + * - LZMA_PROG_ERROR: Invalid options. + */ +extern LZMA_API(lzma_ret) lzma_stream_header_encode( + const lzma_stream_flags *options, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Stream Footer + * + * \param options Stream Footer options to be encoded. + * \param[out] out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: options->version is not supported by + * this liblzma version. + * - LZMA_PROG_ERROR: Invalid options. + */ +extern LZMA_API(lzma_ret) lzma_stream_footer_encode( + const lzma_stream_flags *options, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Stream Header + * + * options->backward_size is always set to LZMA_VLI_UNKNOWN. This is to + * help comparing Stream Flags from Stream Header and Stream Footer with + * lzma_stream_flags_compare(). + * + * \note When decoding .xz files that contain multiple Streams, it may + * make sense to print "file format not recognized" only if + * decoding of the Stream Header of the \a first Stream gives + * LZMA_FORMAT_ERROR. If non-first Stream Header gives + * LZMA_FORMAT_ERROR, the message used for LZMA_DATA_ERROR is + * probably more appropriate. + * For example, the Stream decoder in liblzma uses + * LZMA_DATA_ERROR if LZMA_FORMAT_ERROR is returned by + * lzma_stream_header_decode() when decoding non-first Stream. + * + * \param[out] options Target for the decoded Stream Header options. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * + * \return Possible lzma_ret values: + * - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Header. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the header + * is corrupt. + * - LZMA_OPTIONS_ERROR: Unsupported options are present + * in the header. + */ +extern LZMA_API(lzma_ret) lzma_stream_header_decode( + lzma_stream_flags *options, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Stream Footer + * + * \note If Stream Header was already decoded successfully, but + * decoding Stream Footer returns LZMA_FORMAT_ERROR, the + * application should probably report some other error message + * than "file format not recognized". The file likely + * is corrupt (possibly truncated). The Stream decoder in liblzma + * uses LZMA_DATA_ERROR in this situation. + * + * \param[out] options Target for the decoded Stream Footer options. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return Possible lzma_ret values: + * - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Footer. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the Stream Footer + * is corrupt. + * - LZMA_OPTIONS_ERROR: Unsupported options are present + * in Stream Footer. + */ +extern LZMA_API(lzma_ret) lzma_stream_footer_decode( + lzma_stream_flags *options, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Compare two lzma_stream_flags structures + * + * backward_size values are compared only if both are not + * LZMA_VLI_UNKNOWN. + * + * \param a Pointer to lzma_stream_flags structure + * \param b Pointer to lzma_stream_flags structure + * + * \return Possible lzma_ret values: + * - LZMA_OK: Both are equal. If either had backward_size set + * to LZMA_VLI_UNKNOWN, backward_size values were not + * compared or validated. + * - LZMA_DATA_ERROR: The structures differ. + * - LZMA_OPTIONS_ERROR: version in either structure is greater + * than the maximum supported version (currently zero). + * - LZMA_PROG_ERROR: Invalid value, e.g. invalid check or + * backward_size. + */ +extern LZMA_API(lzma_ret) lzma_stream_flags_compare( + const lzma_stream_flags *a, const lzma_stream_flags *b) + lzma_nothrow lzma_attr_pure; diff --git a/src/native/external/xz/src/liblzma/api/lzma/version.h b/src/native/external/xz/src/liblzma/api/lzma/version.h new file mode 100644 index 00000000000000..263ad819a90609 --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/version.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/version.h + * \brief Version number + * \note Never include this file directly. Use instead. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** \brief Major version number of the liblzma release. */ +#define LZMA_VERSION_MAJOR 5 + +/** \brief Minor version number of the liblzma release. */ +#define LZMA_VERSION_MINOR 8 + +/** \brief Patch version number of the liblzma release. */ +#define LZMA_VERSION_PATCH 2 + +/** + * \brief Version stability marker + * + * This will always be one of three values: + * - LZMA_VERSION_STABILITY_ALPHA + * - LZMA_VERSION_STABILITY_BETA + * - LZMA_VERSION_STABILITY_STABLE + */ +#define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_STABLE + +/** \brief Commit version number of the liblzma release */ +#ifndef LZMA_VERSION_COMMIT +# define LZMA_VERSION_COMMIT "" +#endif + + +/* + * Map symbolic stability levels to integers. + */ +#define LZMA_VERSION_STABILITY_ALPHA 0 +#define LZMA_VERSION_STABILITY_BETA 1 +#define LZMA_VERSION_STABILITY_STABLE 2 + + +/** + * \brief Compile-time version number + * + * The version number is of format xyyyzzzs where + * - x = major + * - yyy = minor + * - zzz = revision + * - s indicates stability: 0 = alpha, 1 = beta, 2 = stable + * + * The same xyyyzzz triplet is never reused with different stability levels. + * For example, if 5.1.0alpha has been released, there will never be 5.1.0beta + * or 5.1.0 stable. + * + * \note The version number of liblzma has nothing to with + * the version number of Igor Pavlov's LZMA SDK. + */ +#define LZMA_VERSION (LZMA_VERSION_MAJOR * UINT32_C(10000000) \ + + LZMA_VERSION_MINOR * UINT32_C(10000) \ + + LZMA_VERSION_PATCH * UINT32_C(10) \ + + LZMA_VERSION_STABILITY) + + +/* + * Macros to construct the compile-time version string + */ +#if LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_ALPHA +# define LZMA_VERSION_STABILITY_STRING "alpha" +#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_BETA +# define LZMA_VERSION_STABILITY_STRING "beta" +#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_STABLE +# define LZMA_VERSION_STABILITY_STRING "" +#else +# error Incorrect LZMA_VERSION_STABILITY +#endif + +#define LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) \ + #major "." #minor "." #patch stability commit + +#define LZMA_VERSION_STRING_C(major, minor, patch, stability, commit) \ + LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) + + +/** + * \brief Compile-time version as a string + * + * This can be for example "4.999.5alpha", "4.999.8beta", or "5.0.0" (stable + * versions don't have any "stable" suffix). In future, a snapshot built + * from source code repository may include an additional suffix, for example + * "4.999.8beta-21-g1d92". The commit ID won't be available in numeric form + * in LZMA_VERSION macro. + */ +#define LZMA_VERSION_STRING LZMA_VERSION_STRING_C( \ + LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, \ + LZMA_VERSION_PATCH, LZMA_VERSION_STABILITY_STRING, \ + LZMA_VERSION_COMMIT) + + +/* #ifndef is needed for use with windres (MinGW-w64 or Cygwin). */ +#ifndef LZMA_H_INTERNAL_RC + +/** + * \brief Run-time version number as an integer + * + * This allows an application to compare if it was built against the same, + * older, or newer version of liblzma that is currently running. + * + * \return The value of LZMA_VERSION macro at the compile time of liblzma + */ +extern LZMA_API(uint32_t) lzma_version_number(void) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Run-time version as a string + * + * This function may be useful to display which version of liblzma an + * application is currently using. + * + * \return Run-time version of liblzma + */ +extern LZMA_API(const char *) lzma_version_string(void) + lzma_nothrow lzma_attr_const; + +#endif diff --git a/src/native/external/xz/src/liblzma/api/lzma/vli.h b/src/native/external/xz/src/liblzma/api/lzma/vli.h new file mode 100644 index 00000000000000..6b049021b9090a --- /dev/null +++ b/src/native/external/xz/src/liblzma/api/lzma/vli.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file lzma/vli.h + * \brief Variable-length integer handling + * \note Never include this file directly. Use instead. + * + * In the .xz format, most integers are encoded in a variable-length + * representation, which is sometimes called little endian base-128 encoding. + * This saves space when smaller values are more likely than bigger values. + * + * The encoding scheme encodes seven bits to every byte, using minimum + * number of bytes required to represent the given value. Encodings that use + * non-minimum number of bytes are invalid, thus every integer has exactly + * one encoded representation. The maximum number of bits in a VLI is 63, + * thus the vli argument must be less than or equal to UINT64_MAX / 2. You + * should use LZMA_VLI_MAX for clarity. + */ + +/* + * Author: Lasse Collin + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Maximum supported value of a variable-length integer + */ +#define LZMA_VLI_MAX (UINT64_MAX / 2) + +/** + * \brief VLI value to denote that the value is unknown + */ +#define LZMA_VLI_UNKNOWN UINT64_MAX + +/** + * \brief Maximum supported encoded length of variable length integers + */ +#define LZMA_VLI_BYTES_MAX 9 + +/** + * \brief VLI constant suffix + */ +#define LZMA_VLI_C(n) UINT64_C(n) + + +/** + * \brief Variable-length integer type + * + * Valid VLI values are in the range [0, LZMA_VLI_MAX]. Unknown value is + * indicated with LZMA_VLI_UNKNOWN, which is the maximum value of the + * underlying integer type. + * + * lzma_vli will be uint64_t for the foreseeable future. If a bigger size + * is needed in the future, it is guaranteed that 2 * LZMA_VLI_MAX will + * not overflow lzma_vli. This simplifies integer overflow detection. + */ +typedef uint64_t lzma_vli; + + +/** + * \brief Validate a variable-length integer + * + * This is useful to test that application has given acceptable values + * for example in the uncompressed_size and compressed_size variables. + * + * \return True if the integer is representable as a VLI or if it + * indicates an unknown value. False otherwise. + */ +#define lzma_vli_is_valid(vli) \ + ((vli) <= LZMA_VLI_MAX || (vli) == LZMA_VLI_UNKNOWN) + + +/** + * \brief Encode a variable-length integer + * + * This function has two modes: single-call and multi-call. Single-call mode + * encodes the whole integer at once; it is an error if the output buffer is + * too small. Multi-call mode saves the position in *vli_pos, and thus it is + * possible to continue encoding if the buffer becomes full before the whole + * integer has been encoded. + * + * \param vli Integer to be encoded + * \param[out] vli_pos How many VLI-encoded bytes have already been written + * out. When starting to encode a new integer in + * multi-call mode, *vli_pos must be set to zero. + * To use single-call encoding, set vli_pos to NULL. + * \param[out] out Beginning of the output buffer + * \param[out] out_pos The next byte will be written to out[*out_pos]. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully encoded. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be due + * to too little output space; single-call mode doesn't use + * LZMA_BUF_ERROR, since the application should have checked + * the encoded size with lzma_vli_size(). + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not + * completely written out yet. + * - LZMA_STREAM_END: Integer successfully encoded. + * - LZMA_BUF_ERROR: No output space was provided. + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern LZMA_API(lzma_ret) lzma_vli_encode(lzma_vli vli, size_t *vli_pos, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Decode a variable-length integer + * + * Like lzma_vli_encode(), this function has single-call and multi-call modes. + * + * \param[out] vli Pointer to decoded integer. The decoder will + * initialize it to zero when *vli_pos == 0, so + * application isn't required to initialize *vli. + * \param[out] vli_pos How many bytes have already been decoded. When + * starting to decode a new integer in multi-call + * mode, *vli_pos must be initialized to zero. To + * use single-call decoding, set vli_pos to NULL. + * \param in Beginning of the input buffer + * \param[out] in_pos The next byte will be read from in[*in_pos]. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. This includes hitting + * the end of the input buffer before the whole integer was + * decoded; providing no input at all will use LZMA_DATA_ERROR. + * - LZMA_PROG_ERROR: Arguments are not sane. + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not + * completely decoded yet. + * - LZMA_STREAM_END: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. + * - LZMA_BUF_ERROR: No input was provided. + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern LZMA_API(lzma_ret) lzma_vli_decode(lzma_vli *vli, size_t *vli_pos, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow; + + +/** + * \brief Get the number of bytes required to encode a VLI + * + * \param vli Integer whose encoded size is to be determined + * + * \return Number of bytes on success (1-9). If vli isn't valid, + * zero is returned. + */ +extern LZMA_API(uint32_t) lzma_vli_size(lzma_vli vli) + lzma_nothrow lzma_attr_pure; diff --git a/src/native/external/xz/src/liblzma/check/Makefile.inc b/src/native/external/xz/src/liblzma/check/Makefile.inc new file mode 100644 index 00000000000000..00a26e687f7162 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/Makefile.inc @@ -0,0 +1,50 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +## Note: There is no check for COND_CHECK_CRC32 because +## currently crc32 is always enabled. + +EXTRA_DIST += \ + check/crc_clmul_consts_gen.c \ + check/crc32_tablegen.c \ + check/crc64_tablegen.c + +liblzma_la_SOURCES += \ + check/check.c \ + check/check.h \ + check/crc_common.h \ + check/crc_x86_clmul.h \ + check/crc32_arm64.h \ + check/crc32_loongarch.h + +if COND_SMALL +liblzma_la_SOURCES += check/crc32_small.c +else +liblzma_la_SOURCES += \ + check/crc32_fast.c \ + check/crc32_table_le.h \ + check/crc32_table_be.h +if COND_ASM_X86 +liblzma_la_SOURCES += check/crc32_x86.S +endif +endif + +if COND_CHECK_CRC64 +if COND_SMALL +liblzma_la_SOURCES += check/crc64_small.c +else +liblzma_la_SOURCES += \ + check/crc64_fast.c \ + check/crc64_table_le.h \ + check/crc64_table_be.h +if COND_ASM_X86 +liblzma_la_SOURCES += check/crc64_x86.S +endif +endif +endif + +if COND_CHECK_SHA256 +if COND_INTERNAL_SHA256 +liblzma_la_SOURCES += check/sha256.c +endif +endif diff --git a/src/native/external/xz/src/liblzma/check/check.c b/src/native/external/xz/src/liblzma/check/check.c new file mode 100644 index 00000000000000..7734ace1856edc --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/check.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file check.c +/// \brief Single API to access different integrity checks +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" + + +extern LZMA_API(lzma_bool) +lzma_check_is_supported(lzma_check type) +{ + if ((unsigned int)(type) > LZMA_CHECK_ID_MAX) + return false; + + static const lzma_bool available_checks[LZMA_CHECK_ID_MAX + 1] = { + true, // LZMA_CHECK_NONE + +#ifdef HAVE_CHECK_CRC32 + true, +#else + false, +#endif + + false, // Reserved + false, // Reserved + +#ifdef HAVE_CHECK_CRC64 + true, +#else + false, +#endif + + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + +#ifdef HAVE_CHECK_SHA256 + true, +#else + false, +#endif + + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + }; + + return available_checks[(unsigned int)(type)]; +} + + +extern LZMA_API(uint32_t) +lzma_check_size(lzma_check type) +{ + if ((unsigned int)(type) > LZMA_CHECK_ID_MAX) + return UINT32_MAX; + + // See file-format.txt section 2.1.1.2. + static const uint8_t check_sizes[LZMA_CHECK_ID_MAX + 1] = { + 0, + 4, 4, 4, + 8, 8, 8, + 16, 16, 16, + 32, 32, 32, + 64, 64, 64 + }; + + return check_sizes[(unsigned int)(type)]; +} + + +extern void +lzma_check_init(lzma_check_state *check, lzma_check type) +{ + switch (type) { + case LZMA_CHECK_NONE: + break; + +#ifdef HAVE_CHECK_CRC32 + case LZMA_CHECK_CRC32: + check->state.crc32 = 0; + break; +#endif + +#ifdef HAVE_CHECK_CRC64 + case LZMA_CHECK_CRC64: + check->state.crc64 = 0; + break; +#endif + +#ifdef HAVE_CHECK_SHA256 + case LZMA_CHECK_SHA256: + lzma_sha256_init(check); + break; +#endif + + default: + break; + } + + return; +} + + +extern void +lzma_check_update(lzma_check_state *check, lzma_check type, + const uint8_t *buf, size_t size) +{ + switch (type) { +#ifdef HAVE_CHECK_CRC32 + case LZMA_CHECK_CRC32: + check->state.crc32 = lzma_crc32(buf, size, check->state.crc32); + break; +#endif + +#ifdef HAVE_CHECK_CRC64 + case LZMA_CHECK_CRC64: + check->state.crc64 = lzma_crc64(buf, size, check->state.crc64); + break; +#endif + +#ifdef HAVE_CHECK_SHA256 + case LZMA_CHECK_SHA256: + lzma_sha256_update(buf, size, check); + break; +#endif + + default: + break; + } + + return; +} + + +extern void +lzma_check_finish(lzma_check_state *check, lzma_check type) +{ + switch (type) { +#ifdef HAVE_CHECK_CRC32 + case LZMA_CHECK_CRC32: + check->buffer.u32[0] = conv32le(check->state.crc32); + break; +#endif + +#ifdef HAVE_CHECK_CRC64 + case LZMA_CHECK_CRC64: + check->buffer.u64[0] = conv64le(check->state.crc64); + break; +#endif + +#ifdef HAVE_CHECK_SHA256 + case LZMA_CHECK_SHA256: + lzma_sha256_finish(check); + break; +#endif + + default: + break; + } + + return; +} diff --git a/src/native/external/xz/src/liblzma/check/check.h b/src/native/external/xz/src/liblzma/check/check.h new file mode 100644 index 00000000000000..16a56334211a1d --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/check.h @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file check.h +/// \brief Internal API to different integrity check functions +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_CHECK_H +#define LZMA_CHECK_H + +#include "common.h" + +// If the function for external SHA-256 is missing, use the internal SHA-256 +// code. Due to how configure works, these defines can only get defined when +// both a usable header and a type have already been found. +#if !(defined(HAVE_CC_SHA256_INIT) \ + || defined(HAVE_SHA256_INIT) \ + || defined(HAVE_SHA256INIT)) +# define HAVE_INTERNAL_SHA256 1 +#endif + +#if defined(HAVE_INTERNAL_SHA256) +// Nothing +#elif defined(HAVE_COMMONCRYPTO_COMMONDIGEST_H) +# include +#elif defined(HAVE_SHA256_H) +# include +# include +#elif defined(HAVE_SHA2_H) +# include +# include +#endif + +#if defined(HAVE_INTERNAL_SHA256) +/// State for the internal SHA-256 implementation +typedef struct { + /// Internal state + uint32_t state[8]; + + /// Size of the message excluding padding + uint64_t size; +} lzma_sha256_state; +#elif defined(HAVE_CC_SHA256_CTX) +typedef CC_SHA256_CTX lzma_sha256_state; +#elif defined(HAVE_SHA256_CTX) +typedef SHA256_CTX lzma_sha256_state; +#elif defined(HAVE_SHA2_CTX) +typedef SHA2_CTX lzma_sha256_state; +#endif + +#if defined(HAVE_INTERNAL_SHA256) +// Nothing +#elif defined(HAVE_CC_SHA256_INIT) +# define LZMA_SHA256FUNC(x) CC_SHA256_ ## x +#elif defined(HAVE_SHA256_INIT) +# define LZMA_SHA256FUNC(x) SHA256_ ## x +#elif defined(HAVE_SHA256INIT) +# define LZMA_SHA256FUNC(x) SHA256 ## x +#endif + +// Index hashing needs the best possible hash function (preferably +// a cryptographic hash) for maximum reliability. +#if defined(HAVE_CHECK_SHA256) +# define LZMA_CHECK_BEST LZMA_CHECK_SHA256 +#elif defined(HAVE_CHECK_CRC64) +# define LZMA_CHECK_BEST LZMA_CHECK_CRC64 +#else +# define LZMA_CHECK_BEST LZMA_CHECK_CRC32 +#endif + + +/// \brief Structure to hold internal state of the check being calculated +/// +/// \note This is not in the public API because this structure may +/// change in future if new integrity check algorithms are added. +typedef struct { + /// Buffer to hold the final result and a temporary buffer for SHA256. + union { + uint8_t u8[64]; + uint32_t u32[16]; + uint64_t u64[8]; + } buffer; + + /// Check-specific data + union { + uint32_t crc32; + uint64_t crc64; + lzma_sha256_state sha256; + } state; + +} lzma_check_state; + + +/// \brief Initialize *check depending on type +extern void lzma_check_init(lzma_check_state *check, lzma_check type); + +/// Update the check state +extern void lzma_check_update(lzma_check_state *check, lzma_check type, + const uint8_t *buf, size_t size); + +/// Finish the check calculation and store the result to check->buffer.u8. +extern void lzma_check_finish(lzma_check_state *check, lzma_check type); + + +#ifndef LZMA_SHA256FUNC + +/// Prepare SHA-256 state for new input. +extern void lzma_sha256_init(lzma_check_state *check); + +/// Update the SHA-256 hash state +extern void lzma_sha256_update( + const uint8_t *buf, size_t size, lzma_check_state *check); + +/// Finish the SHA-256 calculation and store the result to check->buffer.u8. +extern void lzma_sha256_finish(lzma_check_state *check); + + +#else + +static inline void +lzma_sha256_init(lzma_check_state *check) +{ + LZMA_SHA256FUNC(Init)(&check->state.sha256); +} + + +static inline void +lzma_sha256_update(const uint8_t *buf, size_t size, lzma_check_state *check) +{ +#if defined(HAVE_CC_SHA256_INIT) && SIZE_MAX > UINT32_MAX + // Darwin's CC_SHA256_Update takes uint32_t as the buffer size, + // so use a loop to support size_t. + while (size > UINT32_MAX) { + LZMA_SHA256FUNC(Update)(&check->state.sha256, buf, UINT32_MAX); + buf += UINT32_MAX; + size -= UINT32_MAX; + } +#endif + + LZMA_SHA256FUNC(Update)(&check->state.sha256, buf, size); +} + + +static inline void +lzma_sha256_finish(lzma_check_state *check) +{ + LZMA_SHA256FUNC(Final)(check->buffer.u8, &check->state.sha256); +} + +#endif + +#endif diff --git a/src/native/external/xz/src/liblzma/check/crc32_arm64.h b/src/native/external/xz/src/liblzma/check/crc32_arm64.h new file mode 100644 index 00000000000000..cce1131b336f99 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc32_arm64.h @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc32_arm64.h +/// \brief CRC32 calculation with ARM64 optimization +// +// Authors: Chenxi Mao +// Jia Tan +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_CRC32_ARM64_H +#define LZMA_CRC32_ARM64_H + +// MSVC always has the CRC intrinsics available when building for ARM64 +// there is no need to include any header files. +#ifndef _MSC_VER +# include +#endif + +// If both versions are going to be built, we need runtime detection +// to check if the instructions are supported. +#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED) +# if (defined(HAVE_GETAUXVAL) && defined(HAVE_HWCAP_CRC32)) \ + || defined(HAVE_ELF_AUX_INFO) +# include +# elif defined(_WIN32) +# include +# elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME) +# include +# endif +#endif + +// Some EDG-based compilers support ARM64 and define __GNUC__ +// (such as Nvidia's nvcc), but do not support function attributes. +// +// NOTE: Build systems check for this too, keep them in sync with this. +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__) +# define crc_attr_target __attribute__((__target__("+crc"))) +#else +# define crc_attr_target +#endif + + +crc_attr_target +static uint32_t +crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc) +{ + crc = ~crc; + + if (size >= 8) { + // Align the input buffer because this was shown to be + // significantly faster than unaligned accesses. + const size_t align = (0 - (uintptr_t)buf) & 7; + + if (align & 1) + crc = __crc32b(crc, *buf++); + + if (align & 2) { + crc = __crc32h(crc, aligned_read16le(buf)); + buf += 2; + } + + if (align & 4) { + crc = __crc32w(crc, aligned_read32le(buf)); + buf += 4; + } + + size -= align; + + // Process 8 bytes at a time. The end point is determined by + // ignoring the least significant three bits of size to + // ensure we do not process past the bounds of the buffer. + // This guarantees that limit is a multiple of 8 and is + // strictly less than size. + for (const uint8_t *limit = buf + (size & ~(size_t)7); + buf < limit; buf += 8) + crc = __crc32d(crc, aligned_read64le(buf)); + + size &= 7; + } + + // Process the remaining bytes that are not 8 byte aligned. + if (size & 4) { + crc = __crc32w(crc, aligned_read32le(buf)); + buf += 4; + } + + if (size & 2) { + crc = __crc32h(crc, aligned_read16le(buf)); + buf += 2; + } + + if (size & 1) + crc = __crc32b(crc, *buf); + + return ~crc; +} + + +#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED) +static inline bool +is_arch_extension_supported(void) +{ +#if defined(HAVE_GETAUXVAL) && defined(HAVE_HWCAP_CRC32) + return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0; + +#elif defined(HAVE_ELF_AUX_INFO) + unsigned long feature_flags; + + if (elf_aux_info(AT_HWCAP, &feature_flags, sizeof(feature_flags)) != 0) + return false; + + return (feature_flags & HWCAP_CRC32) != 0; + +#elif defined(_WIN32) + return IsProcessorFeaturePresent( + PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE); + +#elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME) + int has_crc32 = 0; + size_t size = sizeof(has_crc32); + + // The sysctlbyname() function requires a string identifier for the + // CPU feature it tests. The Apple documentation lists the string + // "hw.optional.armv8_crc32", which can be found here: + // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3915619 + if (sysctlbyname("hw.optional.armv8_crc32", &has_crc32, + &size, NULL, 0) != 0) + return false; + + return has_crc32; + +#else + // If a runtime detection method cannot be found, then this must + // be a compile time error. The checks in crc_common.h should ensure + // a runtime detection method is always found if this function is + // built. It would be possible to just return false here, but this + // is inefficient for binary size and runtime since only the generic + // method could ever be used. +# error Runtime detection method unavailable. +#endif +} +#endif + +#endif // LZMA_CRC32_ARM64_H diff --git a/src/native/external/xz/src/liblzma/check/crc32_fast.c b/src/native/external/xz/src/liblzma/check/crc32_fast.c new file mode 100644 index 00000000000000..6184e2b70e6ba6 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc32_fast.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc32_fast.c +/// \brief CRC32 calculation +// +// Authors: Lasse Collin +// Ilya Kurdyukov +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" +#include "crc_common.h" + +#if defined(CRC_X86_CLMUL) +# define BUILDING_CRC_CLMUL 32 +# include "crc_x86_clmul.h" +#elif defined(CRC32_ARM64) +# include "crc32_arm64.h" +#elif defined(CRC32_LOONGARCH) +# include "crc32_loongarch.h" +#endif + + +#ifdef CRC32_GENERIC + +/////////////////// +// Generic CRC32 // +/////////////////// + +#ifdef WORDS_BIGENDIAN +# include "crc32_table_be.h" +#else +# include "crc32_table_le.h" +#endif + + +#ifdef HAVE_CRC_X86_ASM +extern uint32_t lzma_crc32_generic( + const uint8_t *buf, size_t size, uint32_t crc); +#else +static uint32_t +lzma_crc32_generic(const uint8_t *buf, size_t size, uint32_t crc) +{ + crc = ~crc; + +#ifdef WORDS_BIGENDIAN + crc = byteswap32(crc); +#endif + + if (size > 8) { + // Fix the alignment, if needed. The if statement above + // ensures that this won't read past the end of buf[]. + while ((uintptr_t)(buf) & 7) { + crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc); + --size; + } + + // Calculate the position where to stop. + const uint8_t *const limit = buf + (size & ~(size_t)(7)); + + // Calculate how many bytes must be calculated separately + // before returning the result. + size &= (size_t)(7); + + // Calculate the CRC32 using the slice-by-eight algorithm. + while (buf < limit) { + crc ^= aligned_read32ne(buf); + buf += 4; + + crc = lzma_crc32_table[7][A(crc)] + ^ lzma_crc32_table[6][B(crc)] + ^ lzma_crc32_table[5][C(crc)] + ^ lzma_crc32_table[4][D(crc)]; + + const uint32_t tmp = aligned_read32ne(buf); + buf += 4; + + // At least with some compilers, it is critical for + // performance, that the crc variable is XORed + // between the two table-lookup pairs. + crc = lzma_crc32_table[3][A(tmp)] + ^ lzma_crc32_table[2][B(tmp)] + ^ crc + ^ lzma_crc32_table[1][C(tmp)] + ^ lzma_crc32_table[0][D(tmp)]; + } + } + + while (size-- != 0) + crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc); + +#ifdef WORDS_BIGENDIAN + crc = byteswap32(crc); +#endif + + return ~crc; +} +#endif // HAVE_CRC_X86_ASM +#endif // CRC32_GENERIC + + +#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED) + +////////////////////////// +// Function dispatching // +////////////////////////// + +// If both the generic and arch-optimized implementations are built, then +// the function to use is selected at runtime because the system running +// the binary might not have the arch-specific instruction set extension(s) +// available. The dispatch methods in order of priority: +// +// 1. Constructor. This method uses __attribute__((__constructor__)) to +// set crc32_func at load time. This avoids extra computation (and any +// unlikely threading bugs) on the first call to lzma_crc32() to decide +// which implementation should be used. +// +// 2. First Call Resolution. On the very first call to lzma_crc32(), the +// call will be directed to crc32_dispatch() instead. This will set the +// appropriate implementation function and will not be called again. +// This method does not use any kind of locking but is safe because if +// multiple threads run the dispatcher simultaneously then they will all +// set crc32_func to the same value. + +typedef uint32_t (*crc32_func_type)( + const uint8_t *buf, size_t size, uint32_t crc); + +// This resolver is shared between all dispatch methods. +static crc32_func_type +crc32_resolve(void) +{ + return is_arch_extension_supported() + ? &crc32_arch_optimized : &lzma_crc32_generic; +} + + +#ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR +// Constructor method. +# define CRC32_SET_FUNC_ATTR __attribute__((__constructor__)) +static crc32_func_type crc32_func; +#else +// First Call Resolution method. +# define CRC32_SET_FUNC_ATTR +static uint32_t crc32_dispatch(const uint8_t *buf, size_t size, uint32_t crc); +static crc32_func_type crc32_func = &crc32_dispatch; +#endif + +CRC32_SET_FUNC_ATTR +static void +crc32_set_func(void) +{ + crc32_func = crc32_resolve(); + return; +} + +#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR +static uint32_t +crc32_dispatch(const uint8_t *buf, size_t size, uint32_t crc) +{ + // When __attribute__((__constructor__)) isn't supported, set the + // function pointer without any locking. If multiple threads run + // the detection code in parallel, they will all end up setting + // the pointer to the same value. This avoids the use of + // mythread_once() on every call to lzma_crc32() but this likely + // isn't strictly standards compliant. Let's change it if it breaks. + crc32_set_func(); + return crc32_func(buf, size, crc); +} + +#endif +#endif + + +extern LZMA_API(uint32_t) +lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) +{ +#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED) +/* +#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR + // See crc32_dispatch(). This would be the alternative which uses + // locking and doesn't use crc32_dispatch(). Note that on Windows + // this method needs Vista threads. + mythread_once(crc64_set_func); +#endif +*/ + return crc32_func(buf, size, crc); + +#elif defined(CRC32_ARCH_OPTIMIZED) + return crc32_arch_optimized(buf, size, crc); + +#else + return lzma_crc32_generic(buf, size, crc); +#endif +} diff --git a/src/native/external/xz/src/liblzma/check/crc32_loongarch.h b/src/native/external/xz/src/liblzma/check/crc32_loongarch.h new file mode 100644 index 00000000000000..ec738b83d70ae2 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc32_loongarch.h @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc32_loongarch.h +/// \brief CRC32 calculation with LoongArch optimization +// +// Authors: Xi Ruoyao +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_CRC32_LOONGARCH_H +#define LZMA_CRC32_LOONGARCH_H + +#include + + +static uint32_t +crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc_unsigned) +{ + int32_t crc = (int32_t)~crc_unsigned; + + if (size >= 8) { + const size_t align = (0 - (uintptr_t)buf) & 7; + + if (align & 1) + crc = __crc_w_b_w((int8_t)*buf++, crc); + + if (align & 2) { + crc = __crc_w_h_w((int16_t)aligned_read16le(buf), crc); + buf += 2; + } + + if (align & 4) { + crc = __crc_w_w_w((int32_t)aligned_read32le(buf), crc); + buf += 4; + } + + size -= align; + + for (const uint8_t *limit = buf + (size & ~(size_t)7); + buf < limit; buf += 8) + crc = __crc_w_d_w((int64_t)aligned_read64le(buf), crc); + + size &= 7; + } + + if (size & 4) { + crc = __crc_w_w_w((int32_t)aligned_read32le(buf), crc); + buf += 4; + } + + if (size & 2) { + crc = __crc_w_h_w((int16_t)aligned_read16le(buf), crc); + buf += 2; + } + + if (size & 1) + crc = __crc_w_b_w((int8_t)*buf, crc); + + return (uint32_t)~crc; +} + +#endif // LZMA_CRC32_LOONGARCH_H diff --git a/src/native/external/xz/src/liblzma/check/crc32_small.c b/src/native/external/xz/src/liblzma/check/crc32_small.c new file mode 100644 index 00000000000000..4a62830c807a5a --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc32_small.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc32_small.c +/// \brief CRC32 calculation (size-optimized) +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" +#include "crc_common.h" + + +// The table is used by the LZ encoder too, thus it's not static like +// in crc64_small.c. +uint32_t lzma_crc32_table[1][256]; + + +#ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR +__attribute__((__constructor__)) +#endif +static void +crc32_init(void) +{ + static const uint32_t poly32 = UINT32_C(0xEDB88320); + + for (size_t b = 0; b < 256; ++b) { + uint32_t r = b; + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly32; + else + r >>= 1; + } + + lzma_crc32_table[0][b] = r; + } + + return; +} + + +#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR +extern void +lzma_crc32_init(void) +{ + mythread_once(crc32_init); + return; +} +#endif + + +extern LZMA_API(uint32_t) +lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) +{ +#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR + lzma_crc32_init(); +#endif + + crc = ~crc; + + while (size != 0) { + crc = lzma_crc32_table[0][*buf++ ^ (crc & 0xFF)] ^ (crc >> 8); + --size; + } + + return ~crc; +} diff --git a/src/native/external/xz/src/liblzma/check/crc32_table_be.h b/src/native/external/xz/src/liblzma/check/crc32_table_be.h new file mode 100644 index 00000000000000..505c23074c1136 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc32_table_be.h @@ -0,0 +1,527 @@ +// SPDX-License-Identifier: 0BSD + +// This file has been generated by crc32_tablegen.c. + +const uint32_t lzma_crc32_table[8][256] = { + { + 0x00000000, 0x96300777, 0x2C610EEE, 0xBA510999, + 0x19C46D07, 0x8FF46A70, 0x35A563E9, 0xA395649E, + 0x3288DB0E, 0xA4B8DC79, 0x1EE9D5E0, 0x88D9D297, + 0x2B4CB609, 0xBD7CB17E, 0x072DB8E7, 0x911DBF90, + 0x6410B71D, 0xF220B06A, 0x4871B9F3, 0xDE41BE84, + 0x7DD4DA1A, 0xEBE4DD6D, 0x51B5D4F4, 0xC785D383, + 0x56986C13, 0xC0A86B64, 0x7AF962FD, 0xECC9658A, + 0x4F5C0114, 0xD96C0663, 0x633D0FFA, 0xF50D088D, + 0xC8206E3B, 0x5E10694C, 0xE44160D5, 0x727167A2, + 0xD1E4033C, 0x47D4044B, 0xFD850DD2, 0x6BB50AA5, + 0xFAA8B535, 0x6C98B242, 0xD6C9BBDB, 0x40F9BCAC, + 0xE36CD832, 0x755CDF45, 0xCF0DD6DC, 0x593DD1AB, + 0xAC30D926, 0x3A00DE51, 0x8051D7C8, 0x1661D0BF, + 0xB5F4B421, 0x23C4B356, 0x9995BACF, 0x0FA5BDB8, + 0x9EB80228, 0x0888055F, 0xB2D90CC6, 0x24E90BB1, + 0x877C6F2F, 0x114C6858, 0xAB1D61C1, 0x3D2D66B6, + 0x9041DC76, 0x0671DB01, 0xBC20D298, 0x2A10D5EF, + 0x8985B171, 0x1FB5B606, 0xA5E4BF9F, 0x33D4B8E8, + 0xA2C90778, 0x34F9000F, 0x8EA80996, 0x18980EE1, + 0xBB0D6A7F, 0x2D3D6D08, 0x976C6491, 0x015C63E6, + 0xF4516B6B, 0x62616C1C, 0xD8306585, 0x4E0062F2, + 0xED95066C, 0x7BA5011B, 0xC1F40882, 0x57C40FF5, + 0xC6D9B065, 0x50E9B712, 0xEAB8BE8B, 0x7C88B9FC, + 0xDF1DDD62, 0x492DDA15, 0xF37CD38C, 0x654CD4FB, + 0x5861B24D, 0xCE51B53A, 0x7400BCA3, 0xE230BBD4, + 0x41A5DF4A, 0xD795D83D, 0x6DC4D1A4, 0xFBF4D6D3, + 0x6AE96943, 0xFCD96E34, 0x468867AD, 0xD0B860DA, + 0x732D0444, 0xE51D0333, 0x5F4C0AAA, 0xC97C0DDD, + 0x3C710550, 0xAA410227, 0x10100BBE, 0x86200CC9, + 0x25B56857, 0xB3856F20, 0x09D466B9, 0x9FE461CE, + 0x0EF9DE5E, 0x98C9D929, 0x2298D0B0, 0xB4A8D7C7, + 0x173DB359, 0x810DB42E, 0x3B5CBDB7, 0xAD6CBAC0, + 0x2083B8ED, 0xB6B3BF9A, 0x0CE2B603, 0x9AD2B174, + 0x3947D5EA, 0xAF77D29D, 0x1526DB04, 0x8316DC73, + 0x120B63E3, 0x843B6494, 0x3E6A6D0D, 0xA85A6A7A, + 0x0BCF0EE4, 0x9DFF0993, 0x27AE000A, 0xB19E077D, + 0x44930FF0, 0xD2A30887, 0x68F2011E, 0xFEC20669, + 0x5D5762F7, 0xCB676580, 0x71366C19, 0xE7066B6E, + 0x761BD4FE, 0xE02BD389, 0x5A7ADA10, 0xCC4ADD67, + 0x6FDFB9F9, 0xF9EFBE8E, 0x43BEB717, 0xD58EB060, + 0xE8A3D6D6, 0x7E93D1A1, 0xC4C2D838, 0x52F2DF4F, + 0xF167BBD1, 0x6757BCA6, 0xDD06B53F, 0x4B36B248, + 0xDA2B0DD8, 0x4C1B0AAF, 0xF64A0336, 0x607A0441, + 0xC3EF60DF, 0x55DF67A8, 0xEF8E6E31, 0x79BE6946, + 0x8CB361CB, 0x1A8366BC, 0xA0D26F25, 0x36E26852, + 0x95770CCC, 0x03470BBB, 0xB9160222, 0x2F260555, + 0xBE3BBAC5, 0x280BBDB2, 0x925AB42B, 0x046AB35C, + 0xA7FFD7C2, 0x31CFD0B5, 0x8B9ED92C, 0x1DAEDE5B, + 0xB0C2649B, 0x26F263EC, 0x9CA36A75, 0x0A936D02, + 0xA906099C, 0x3F360EEB, 0x85670772, 0x13570005, + 0x824ABF95, 0x147AB8E2, 0xAE2BB17B, 0x381BB60C, + 0x9B8ED292, 0x0DBED5E5, 0xB7EFDC7C, 0x21DFDB0B, + 0xD4D2D386, 0x42E2D4F1, 0xF8B3DD68, 0x6E83DA1F, + 0xCD16BE81, 0x5B26B9F6, 0xE177B06F, 0x7747B718, + 0xE65A0888, 0x706A0FFF, 0xCA3B0666, 0x5C0B0111, + 0xFF9E658F, 0x69AE62F8, 0xD3FF6B61, 0x45CF6C16, + 0x78E20AA0, 0xEED20DD7, 0x5483044E, 0xC2B30339, + 0x612667A7, 0xF71660D0, 0x4D476949, 0xDB776E3E, + 0x4A6AD1AE, 0xDC5AD6D9, 0x660BDF40, 0xF03BD837, + 0x53AEBCA9, 0xC59EBBDE, 0x7FCFB247, 0xE9FFB530, + 0x1CF2BDBD, 0x8AC2BACA, 0x3093B353, 0xA6A3B424, + 0x0536D0BA, 0x9306D7CD, 0x2957DE54, 0xBF67D923, + 0x2E7A66B3, 0xB84A61C4, 0x021B685D, 0x942B6F2A, + 0x37BE0BB4, 0xA18E0CC3, 0x1BDF055A, 0x8DEF022D + }, { + 0x00000000, 0x41311B19, 0x82623632, 0xC3532D2B, + 0x04C56C64, 0x45F4777D, 0x86A75A56, 0xC796414F, + 0x088AD9C8, 0x49BBC2D1, 0x8AE8EFFA, 0xCBD9F4E3, + 0x0C4FB5AC, 0x4D7EAEB5, 0x8E2D839E, 0xCF1C9887, + 0x5112C24A, 0x1023D953, 0xD370F478, 0x9241EF61, + 0x55D7AE2E, 0x14E6B537, 0xD7B5981C, 0x96848305, + 0x59981B82, 0x18A9009B, 0xDBFA2DB0, 0x9ACB36A9, + 0x5D5D77E6, 0x1C6C6CFF, 0xDF3F41D4, 0x9E0E5ACD, + 0xA2248495, 0xE3159F8C, 0x2046B2A7, 0x6177A9BE, + 0xA6E1E8F1, 0xE7D0F3E8, 0x2483DEC3, 0x65B2C5DA, + 0xAAAE5D5D, 0xEB9F4644, 0x28CC6B6F, 0x69FD7076, + 0xAE6B3139, 0xEF5A2A20, 0x2C09070B, 0x6D381C12, + 0xF33646DF, 0xB2075DC6, 0x715470ED, 0x30656BF4, + 0xF7F32ABB, 0xB6C231A2, 0x75911C89, 0x34A00790, + 0xFBBC9F17, 0xBA8D840E, 0x79DEA925, 0x38EFB23C, + 0xFF79F373, 0xBE48E86A, 0x7D1BC541, 0x3C2ADE58, + 0x054F79F0, 0x447E62E9, 0x872D4FC2, 0xC61C54DB, + 0x018A1594, 0x40BB0E8D, 0x83E823A6, 0xC2D938BF, + 0x0DC5A038, 0x4CF4BB21, 0x8FA7960A, 0xCE968D13, + 0x0900CC5C, 0x4831D745, 0x8B62FA6E, 0xCA53E177, + 0x545DBBBA, 0x156CA0A3, 0xD63F8D88, 0x970E9691, + 0x5098D7DE, 0x11A9CCC7, 0xD2FAE1EC, 0x93CBFAF5, + 0x5CD76272, 0x1DE6796B, 0xDEB55440, 0x9F844F59, + 0x58120E16, 0x1923150F, 0xDA703824, 0x9B41233D, + 0xA76BFD65, 0xE65AE67C, 0x2509CB57, 0x6438D04E, + 0xA3AE9101, 0xE29F8A18, 0x21CCA733, 0x60FDBC2A, + 0xAFE124AD, 0xEED03FB4, 0x2D83129F, 0x6CB20986, + 0xAB2448C9, 0xEA1553D0, 0x29467EFB, 0x687765E2, + 0xF6793F2F, 0xB7482436, 0x741B091D, 0x352A1204, + 0xF2BC534B, 0xB38D4852, 0x70DE6579, 0x31EF7E60, + 0xFEF3E6E7, 0xBFC2FDFE, 0x7C91D0D5, 0x3DA0CBCC, + 0xFA368A83, 0xBB07919A, 0x7854BCB1, 0x3965A7A8, + 0x4B98833B, 0x0AA99822, 0xC9FAB509, 0x88CBAE10, + 0x4F5DEF5F, 0x0E6CF446, 0xCD3FD96D, 0x8C0EC274, + 0x43125AF3, 0x022341EA, 0xC1706CC1, 0x804177D8, + 0x47D73697, 0x06E62D8E, 0xC5B500A5, 0x84841BBC, + 0x1A8A4171, 0x5BBB5A68, 0x98E87743, 0xD9D96C5A, + 0x1E4F2D15, 0x5F7E360C, 0x9C2D1B27, 0xDD1C003E, + 0x120098B9, 0x533183A0, 0x9062AE8B, 0xD153B592, + 0x16C5F4DD, 0x57F4EFC4, 0x94A7C2EF, 0xD596D9F6, + 0xE9BC07AE, 0xA88D1CB7, 0x6BDE319C, 0x2AEF2A85, + 0xED796BCA, 0xAC4870D3, 0x6F1B5DF8, 0x2E2A46E1, + 0xE136DE66, 0xA007C57F, 0x6354E854, 0x2265F34D, + 0xE5F3B202, 0xA4C2A91B, 0x67918430, 0x26A09F29, + 0xB8AEC5E4, 0xF99FDEFD, 0x3ACCF3D6, 0x7BFDE8CF, + 0xBC6BA980, 0xFD5AB299, 0x3E099FB2, 0x7F3884AB, + 0xB0241C2C, 0xF1150735, 0x32462A1E, 0x73773107, + 0xB4E17048, 0xF5D06B51, 0x3683467A, 0x77B25D63, + 0x4ED7FACB, 0x0FE6E1D2, 0xCCB5CCF9, 0x8D84D7E0, + 0x4A1296AF, 0x0B238DB6, 0xC870A09D, 0x8941BB84, + 0x465D2303, 0x076C381A, 0xC43F1531, 0x850E0E28, + 0x42984F67, 0x03A9547E, 0xC0FA7955, 0x81CB624C, + 0x1FC53881, 0x5EF42398, 0x9DA70EB3, 0xDC9615AA, + 0x1B0054E5, 0x5A314FFC, 0x996262D7, 0xD85379CE, + 0x174FE149, 0x567EFA50, 0x952DD77B, 0xD41CCC62, + 0x138A8D2D, 0x52BB9634, 0x91E8BB1F, 0xD0D9A006, + 0xECF37E5E, 0xADC26547, 0x6E91486C, 0x2FA05375, + 0xE836123A, 0xA9070923, 0x6A542408, 0x2B653F11, + 0xE479A796, 0xA548BC8F, 0x661B91A4, 0x272A8ABD, + 0xE0BCCBF2, 0xA18DD0EB, 0x62DEFDC0, 0x23EFE6D9, + 0xBDE1BC14, 0xFCD0A70D, 0x3F838A26, 0x7EB2913F, + 0xB924D070, 0xF815CB69, 0x3B46E642, 0x7A77FD5B, + 0xB56B65DC, 0xF45A7EC5, 0x370953EE, 0x763848F7, + 0xB1AE09B8, 0xF09F12A1, 0x33CC3F8A, 0x72FD2493 + }, { + 0x00000000, 0x376AC201, 0x6ED48403, 0x59BE4602, + 0xDCA80907, 0xEBC2CB06, 0xB27C8D04, 0x85164F05, + 0xB851130E, 0x8F3BD10F, 0xD685970D, 0xE1EF550C, + 0x64F91A09, 0x5393D808, 0x0A2D9E0A, 0x3D475C0B, + 0x70A3261C, 0x47C9E41D, 0x1E77A21F, 0x291D601E, + 0xAC0B2F1B, 0x9B61ED1A, 0xC2DFAB18, 0xF5B56919, + 0xC8F23512, 0xFF98F713, 0xA626B111, 0x914C7310, + 0x145A3C15, 0x2330FE14, 0x7A8EB816, 0x4DE47A17, + 0xE0464D38, 0xD72C8F39, 0x8E92C93B, 0xB9F80B3A, + 0x3CEE443F, 0x0B84863E, 0x523AC03C, 0x6550023D, + 0x58175E36, 0x6F7D9C37, 0x36C3DA35, 0x01A91834, + 0x84BF5731, 0xB3D59530, 0xEA6BD332, 0xDD011133, + 0x90E56B24, 0xA78FA925, 0xFE31EF27, 0xC95B2D26, + 0x4C4D6223, 0x7B27A022, 0x2299E620, 0x15F32421, + 0x28B4782A, 0x1FDEBA2B, 0x4660FC29, 0x710A3E28, + 0xF41C712D, 0xC376B32C, 0x9AC8F52E, 0xADA2372F, + 0xC08D9A70, 0xF7E75871, 0xAE591E73, 0x9933DC72, + 0x1C259377, 0x2B4F5176, 0x72F11774, 0x459BD575, + 0x78DC897E, 0x4FB64B7F, 0x16080D7D, 0x2162CF7C, + 0xA4748079, 0x931E4278, 0xCAA0047A, 0xFDCAC67B, + 0xB02EBC6C, 0x87447E6D, 0xDEFA386F, 0xE990FA6E, + 0x6C86B56B, 0x5BEC776A, 0x02523168, 0x3538F369, + 0x087FAF62, 0x3F156D63, 0x66AB2B61, 0x51C1E960, + 0xD4D7A665, 0xE3BD6464, 0xBA032266, 0x8D69E067, + 0x20CBD748, 0x17A11549, 0x4E1F534B, 0x7975914A, + 0xFC63DE4F, 0xCB091C4E, 0x92B75A4C, 0xA5DD984D, + 0x989AC446, 0xAFF00647, 0xF64E4045, 0xC1248244, + 0x4432CD41, 0x73580F40, 0x2AE64942, 0x1D8C8B43, + 0x5068F154, 0x67023355, 0x3EBC7557, 0x09D6B756, + 0x8CC0F853, 0xBBAA3A52, 0xE2147C50, 0xD57EBE51, + 0xE839E25A, 0xDF53205B, 0x86ED6659, 0xB187A458, + 0x3491EB5D, 0x03FB295C, 0x5A456F5E, 0x6D2FAD5F, + 0x801B35E1, 0xB771F7E0, 0xEECFB1E2, 0xD9A573E3, + 0x5CB33CE6, 0x6BD9FEE7, 0x3267B8E5, 0x050D7AE4, + 0x384A26EF, 0x0F20E4EE, 0x569EA2EC, 0x61F460ED, + 0xE4E22FE8, 0xD388EDE9, 0x8A36ABEB, 0xBD5C69EA, + 0xF0B813FD, 0xC7D2D1FC, 0x9E6C97FE, 0xA90655FF, + 0x2C101AFA, 0x1B7AD8FB, 0x42C49EF9, 0x75AE5CF8, + 0x48E900F3, 0x7F83C2F2, 0x263D84F0, 0x115746F1, + 0x944109F4, 0xA32BCBF5, 0xFA958DF7, 0xCDFF4FF6, + 0x605D78D9, 0x5737BAD8, 0x0E89FCDA, 0x39E33EDB, + 0xBCF571DE, 0x8B9FB3DF, 0xD221F5DD, 0xE54B37DC, + 0xD80C6BD7, 0xEF66A9D6, 0xB6D8EFD4, 0x81B22DD5, + 0x04A462D0, 0x33CEA0D1, 0x6A70E6D3, 0x5D1A24D2, + 0x10FE5EC5, 0x27949CC4, 0x7E2ADAC6, 0x494018C7, + 0xCC5657C2, 0xFB3C95C3, 0xA282D3C1, 0x95E811C0, + 0xA8AF4DCB, 0x9FC58FCA, 0xC67BC9C8, 0xF1110BC9, + 0x740744CC, 0x436D86CD, 0x1AD3C0CF, 0x2DB902CE, + 0x4096AF91, 0x77FC6D90, 0x2E422B92, 0x1928E993, + 0x9C3EA696, 0xAB546497, 0xF2EA2295, 0xC580E094, + 0xF8C7BC9F, 0xCFAD7E9E, 0x9613389C, 0xA179FA9D, + 0x246FB598, 0x13057799, 0x4ABB319B, 0x7DD1F39A, + 0x3035898D, 0x075F4B8C, 0x5EE10D8E, 0x698BCF8F, + 0xEC9D808A, 0xDBF7428B, 0x82490489, 0xB523C688, + 0x88649A83, 0xBF0E5882, 0xE6B01E80, 0xD1DADC81, + 0x54CC9384, 0x63A65185, 0x3A181787, 0x0D72D586, + 0xA0D0E2A9, 0x97BA20A8, 0xCE0466AA, 0xF96EA4AB, + 0x7C78EBAE, 0x4B1229AF, 0x12AC6FAD, 0x25C6ADAC, + 0x1881F1A7, 0x2FEB33A6, 0x765575A4, 0x413FB7A5, + 0xC429F8A0, 0xF3433AA1, 0xAAFD7CA3, 0x9D97BEA2, + 0xD073C4B5, 0xE71906B4, 0xBEA740B6, 0x89CD82B7, + 0x0CDBCDB2, 0x3BB10FB3, 0x620F49B1, 0x55658BB0, + 0x6822D7BB, 0x5F4815BA, 0x06F653B8, 0x319C91B9, + 0xB48ADEBC, 0x83E01CBD, 0xDA5E5ABF, 0xED3498BE + }, { + 0x00000000, 0x6567BCB8, 0x8BC809AA, 0xEEAFB512, + 0x5797628F, 0x32F0DE37, 0xDC5F6B25, 0xB938D79D, + 0xEF28B4C5, 0x8A4F087D, 0x64E0BD6F, 0x018701D7, + 0xB8BFD64A, 0xDDD86AF2, 0x3377DFE0, 0x56106358, + 0x9F571950, 0xFA30A5E8, 0x149F10FA, 0x71F8AC42, + 0xC8C07BDF, 0xADA7C767, 0x43087275, 0x266FCECD, + 0x707FAD95, 0x1518112D, 0xFBB7A43F, 0x9ED01887, + 0x27E8CF1A, 0x428F73A2, 0xAC20C6B0, 0xC9477A08, + 0x3EAF32A0, 0x5BC88E18, 0xB5673B0A, 0xD00087B2, + 0x6938502F, 0x0C5FEC97, 0xE2F05985, 0x8797E53D, + 0xD1878665, 0xB4E03ADD, 0x5A4F8FCF, 0x3F283377, + 0x8610E4EA, 0xE3775852, 0x0DD8ED40, 0x68BF51F8, + 0xA1F82BF0, 0xC49F9748, 0x2A30225A, 0x4F579EE2, + 0xF66F497F, 0x9308F5C7, 0x7DA740D5, 0x18C0FC6D, + 0x4ED09F35, 0x2BB7238D, 0xC518969F, 0xA07F2A27, + 0x1947FDBA, 0x7C204102, 0x928FF410, 0xF7E848A8, + 0x3D58149B, 0x583FA823, 0xB6901D31, 0xD3F7A189, + 0x6ACF7614, 0x0FA8CAAC, 0xE1077FBE, 0x8460C306, + 0xD270A05E, 0xB7171CE6, 0x59B8A9F4, 0x3CDF154C, + 0x85E7C2D1, 0xE0807E69, 0x0E2FCB7B, 0x6B4877C3, + 0xA20F0DCB, 0xC768B173, 0x29C70461, 0x4CA0B8D9, + 0xF5986F44, 0x90FFD3FC, 0x7E5066EE, 0x1B37DA56, + 0x4D27B90E, 0x284005B6, 0xC6EFB0A4, 0xA3880C1C, + 0x1AB0DB81, 0x7FD76739, 0x9178D22B, 0xF41F6E93, + 0x03F7263B, 0x66909A83, 0x883F2F91, 0xED589329, + 0x546044B4, 0x3107F80C, 0xDFA84D1E, 0xBACFF1A6, + 0xECDF92FE, 0x89B82E46, 0x67179B54, 0x027027EC, + 0xBB48F071, 0xDE2F4CC9, 0x3080F9DB, 0x55E74563, + 0x9CA03F6B, 0xF9C783D3, 0x176836C1, 0x720F8A79, + 0xCB375DE4, 0xAE50E15C, 0x40FF544E, 0x2598E8F6, + 0x73888BAE, 0x16EF3716, 0xF8408204, 0x9D273EBC, + 0x241FE921, 0x41785599, 0xAFD7E08B, 0xCAB05C33, + 0x3BB659ED, 0x5ED1E555, 0xB07E5047, 0xD519ECFF, + 0x6C213B62, 0x094687DA, 0xE7E932C8, 0x828E8E70, + 0xD49EED28, 0xB1F95190, 0x5F56E482, 0x3A31583A, + 0x83098FA7, 0xE66E331F, 0x08C1860D, 0x6DA63AB5, + 0xA4E140BD, 0xC186FC05, 0x2F294917, 0x4A4EF5AF, + 0xF3762232, 0x96119E8A, 0x78BE2B98, 0x1DD99720, + 0x4BC9F478, 0x2EAE48C0, 0xC001FDD2, 0xA566416A, + 0x1C5E96F7, 0x79392A4F, 0x97969F5D, 0xF2F123E5, + 0x05196B4D, 0x607ED7F5, 0x8ED162E7, 0xEBB6DE5F, + 0x528E09C2, 0x37E9B57A, 0xD9460068, 0xBC21BCD0, + 0xEA31DF88, 0x8F566330, 0x61F9D622, 0x049E6A9A, + 0xBDA6BD07, 0xD8C101BF, 0x366EB4AD, 0x53090815, + 0x9A4E721D, 0xFF29CEA5, 0x11867BB7, 0x74E1C70F, + 0xCDD91092, 0xA8BEAC2A, 0x46111938, 0x2376A580, + 0x7566C6D8, 0x10017A60, 0xFEAECF72, 0x9BC973CA, + 0x22F1A457, 0x479618EF, 0xA939ADFD, 0xCC5E1145, + 0x06EE4D76, 0x6389F1CE, 0x8D2644DC, 0xE841F864, + 0x51792FF9, 0x341E9341, 0xDAB12653, 0xBFD69AEB, + 0xE9C6F9B3, 0x8CA1450B, 0x620EF019, 0x07694CA1, + 0xBE519B3C, 0xDB362784, 0x35999296, 0x50FE2E2E, + 0x99B95426, 0xFCDEE89E, 0x12715D8C, 0x7716E134, + 0xCE2E36A9, 0xAB498A11, 0x45E63F03, 0x208183BB, + 0x7691E0E3, 0x13F65C5B, 0xFD59E949, 0x983E55F1, + 0x2106826C, 0x44613ED4, 0xAACE8BC6, 0xCFA9377E, + 0x38417FD6, 0x5D26C36E, 0xB389767C, 0xD6EECAC4, + 0x6FD61D59, 0x0AB1A1E1, 0xE41E14F3, 0x8179A84B, + 0xD769CB13, 0xB20E77AB, 0x5CA1C2B9, 0x39C67E01, + 0x80FEA99C, 0xE5991524, 0x0B36A036, 0x6E511C8E, + 0xA7166686, 0xC271DA3E, 0x2CDE6F2C, 0x49B9D394, + 0xF0810409, 0x95E6B8B1, 0x7B490DA3, 0x1E2EB11B, + 0x483ED243, 0x2D596EFB, 0xC3F6DBE9, 0xA6916751, + 0x1FA9B0CC, 0x7ACE0C74, 0x9461B966, 0xF10605DE + }, { + 0x00000000, 0xB029603D, 0x6053C07A, 0xD07AA047, + 0xC0A680F5, 0x708FE0C8, 0xA0F5408F, 0x10DC20B2, + 0xC14B7030, 0x7162100D, 0xA118B04A, 0x1131D077, + 0x01EDF0C5, 0xB1C490F8, 0x61BE30BF, 0xD1975082, + 0x8297E060, 0x32BE805D, 0xE2C4201A, 0x52ED4027, + 0x42316095, 0xF21800A8, 0x2262A0EF, 0x924BC0D2, + 0x43DC9050, 0xF3F5F06D, 0x238F502A, 0x93A63017, + 0x837A10A5, 0x33537098, 0xE329D0DF, 0x5300B0E2, + 0x042FC1C1, 0xB406A1FC, 0x647C01BB, 0xD4556186, + 0xC4894134, 0x74A02109, 0xA4DA814E, 0x14F3E173, + 0xC564B1F1, 0x754DD1CC, 0xA537718B, 0x151E11B6, + 0x05C23104, 0xB5EB5139, 0x6591F17E, 0xD5B89143, + 0x86B821A1, 0x3691419C, 0xE6EBE1DB, 0x56C281E6, + 0x461EA154, 0xF637C169, 0x264D612E, 0x96640113, + 0x47F35191, 0xF7DA31AC, 0x27A091EB, 0x9789F1D6, + 0x8755D164, 0x377CB159, 0xE706111E, 0x572F7123, + 0x4958F358, 0xF9719365, 0x290B3322, 0x9922531F, + 0x89FE73AD, 0x39D71390, 0xE9ADB3D7, 0x5984D3EA, + 0x88138368, 0x383AE355, 0xE8404312, 0x5869232F, + 0x48B5039D, 0xF89C63A0, 0x28E6C3E7, 0x98CFA3DA, + 0xCBCF1338, 0x7BE67305, 0xAB9CD342, 0x1BB5B37F, + 0x0B6993CD, 0xBB40F3F0, 0x6B3A53B7, 0xDB13338A, + 0x0A846308, 0xBAAD0335, 0x6AD7A372, 0xDAFEC34F, + 0xCA22E3FD, 0x7A0B83C0, 0xAA712387, 0x1A5843BA, + 0x4D773299, 0xFD5E52A4, 0x2D24F2E3, 0x9D0D92DE, + 0x8DD1B26C, 0x3DF8D251, 0xED827216, 0x5DAB122B, + 0x8C3C42A9, 0x3C152294, 0xEC6F82D3, 0x5C46E2EE, + 0x4C9AC25C, 0xFCB3A261, 0x2CC90226, 0x9CE0621B, + 0xCFE0D2F9, 0x7FC9B2C4, 0xAFB31283, 0x1F9A72BE, + 0x0F46520C, 0xBF6F3231, 0x6F159276, 0xDF3CF24B, + 0x0EABA2C9, 0xBE82C2F4, 0x6EF862B3, 0xDED1028E, + 0xCE0D223C, 0x7E244201, 0xAE5EE246, 0x1E77827B, + 0x92B0E6B1, 0x2299868C, 0xF2E326CB, 0x42CA46F6, + 0x52166644, 0xE23F0679, 0x3245A63E, 0x826CC603, + 0x53FB9681, 0xE3D2F6BC, 0x33A856FB, 0x838136C6, + 0x935D1674, 0x23747649, 0xF30ED60E, 0x4327B633, + 0x102706D1, 0xA00E66EC, 0x7074C6AB, 0xC05DA696, + 0xD0818624, 0x60A8E619, 0xB0D2465E, 0x00FB2663, + 0xD16C76E1, 0x614516DC, 0xB13FB69B, 0x0116D6A6, + 0x11CAF614, 0xA1E39629, 0x7199366E, 0xC1B05653, + 0x969F2770, 0x26B6474D, 0xF6CCE70A, 0x46E58737, + 0x5639A785, 0xE610C7B8, 0x366A67FF, 0x864307C2, + 0x57D45740, 0xE7FD377D, 0x3787973A, 0x87AEF707, + 0x9772D7B5, 0x275BB788, 0xF72117CF, 0x470877F2, + 0x1408C710, 0xA421A72D, 0x745B076A, 0xC4726757, + 0xD4AE47E5, 0x648727D8, 0xB4FD879F, 0x04D4E7A2, + 0xD543B720, 0x656AD71D, 0xB510775A, 0x05391767, + 0x15E537D5, 0xA5CC57E8, 0x75B6F7AF, 0xC59F9792, + 0xDBE815E9, 0x6BC175D4, 0xBBBBD593, 0x0B92B5AE, + 0x1B4E951C, 0xAB67F521, 0x7B1D5566, 0xCB34355B, + 0x1AA365D9, 0xAA8A05E4, 0x7AF0A5A3, 0xCAD9C59E, + 0xDA05E52C, 0x6A2C8511, 0xBA562556, 0x0A7F456B, + 0x597FF589, 0xE95695B4, 0x392C35F3, 0x890555CE, + 0x99D9757C, 0x29F01541, 0xF98AB506, 0x49A3D53B, + 0x983485B9, 0x281DE584, 0xF86745C3, 0x484E25FE, + 0x5892054C, 0xE8BB6571, 0x38C1C536, 0x88E8A50B, + 0xDFC7D428, 0x6FEEB415, 0xBF941452, 0x0FBD746F, + 0x1F6154DD, 0xAF4834E0, 0x7F3294A7, 0xCF1BF49A, + 0x1E8CA418, 0xAEA5C425, 0x7EDF6462, 0xCEF6045F, + 0xDE2A24ED, 0x6E0344D0, 0xBE79E497, 0x0E5084AA, + 0x5D503448, 0xED795475, 0x3D03F432, 0x8D2A940F, + 0x9DF6B4BD, 0x2DDFD480, 0xFDA574C7, 0x4D8C14FA, + 0x9C1B4478, 0x2C322445, 0xFC488402, 0x4C61E43F, + 0x5CBDC48D, 0xEC94A4B0, 0x3CEE04F7, 0x8CC764CA + }, { + 0x00000000, 0xA5D35CCB, 0x0BA1C84D, 0xAE729486, + 0x1642919B, 0xB391CD50, 0x1DE359D6, 0xB830051D, + 0x6D8253EC, 0xC8510F27, 0x66239BA1, 0xC3F0C76A, + 0x7BC0C277, 0xDE139EBC, 0x70610A3A, 0xD5B256F1, + 0x9B02D603, 0x3ED18AC8, 0x90A31E4E, 0x35704285, + 0x8D404798, 0x28931B53, 0x86E18FD5, 0x2332D31E, + 0xF68085EF, 0x5353D924, 0xFD214DA2, 0x58F21169, + 0xE0C21474, 0x451148BF, 0xEB63DC39, 0x4EB080F2, + 0x3605AC07, 0x93D6F0CC, 0x3DA4644A, 0x98773881, + 0x20473D9C, 0x85946157, 0x2BE6F5D1, 0x8E35A91A, + 0x5B87FFEB, 0xFE54A320, 0x502637A6, 0xF5F56B6D, + 0x4DC56E70, 0xE81632BB, 0x4664A63D, 0xE3B7FAF6, + 0xAD077A04, 0x08D426CF, 0xA6A6B249, 0x0375EE82, + 0xBB45EB9F, 0x1E96B754, 0xB0E423D2, 0x15377F19, + 0xC08529E8, 0x65567523, 0xCB24E1A5, 0x6EF7BD6E, + 0xD6C7B873, 0x7314E4B8, 0xDD66703E, 0x78B52CF5, + 0x6C0A580F, 0xC9D904C4, 0x67AB9042, 0xC278CC89, + 0x7A48C994, 0xDF9B955F, 0x71E901D9, 0xD43A5D12, + 0x01880BE3, 0xA45B5728, 0x0A29C3AE, 0xAFFA9F65, + 0x17CA9A78, 0xB219C6B3, 0x1C6B5235, 0xB9B80EFE, + 0xF7088E0C, 0x52DBD2C7, 0xFCA94641, 0x597A1A8A, + 0xE14A1F97, 0x4499435C, 0xEAEBD7DA, 0x4F388B11, + 0x9A8ADDE0, 0x3F59812B, 0x912B15AD, 0x34F84966, + 0x8CC84C7B, 0x291B10B0, 0x87698436, 0x22BAD8FD, + 0x5A0FF408, 0xFFDCA8C3, 0x51AE3C45, 0xF47D608E, + 0x4C4D6593, 0xE99E3958, 0x47ECADDE, 0xE23FF115, + 0x378DA7E4, 0x925EFB2F, 0x3C2C6FA9, 0x99FF3362, + 0x21CF367F, 0x841C6AB4, 0x2A6EFE32, 0x8FBDA2F9, + 0xC10D220B, 0x64DE7EC0, 0xCAACEA46, 0x6F7FB68D, + 0xD74FB390, 0x729CEF5B, 0xDCEE7BDD, 0x793D2716, + 0xAC8F71E7, 0x095C2D2C, 0xA72EB9AA, 0x02FDE561, + 0xBACDE07C, 0x1F1EBCB7, 0xB16C2831, 0x14BF74FA, + 0xD814B01E, 0x7DC7ECD5, 0xD3B57853, 0x76662498, + 0xCE562185, 0x6B857D4E, 0xC5F7E9C8, 0x6024B503, + 0xB596E3F2, 0x1045BF39, 0xBE372BBF, 0x1BE47774, + 0xA3D47269, 0x06072EA2, 0xA875BA24, 0x0DA6E6EF, + 0x4316661D, 0xE6C53AD6, 0x48B7AE50, 0xED64F29B, + 0x5554F786, 0xF087AB4D, 0x5EF53FCB, 0xFB266300, + 0x2E9435F1, 0x8B47693A, 0x2535FDBC, 0x80E6A177, + 0x38D6A46A, 0x9D05F8A1, 0x33776C27, 0x96A430EC, + 0xEE111C19, 0x4BC240D2, 0xE5B0D454, 0x4063889F, + 0xF8538D82, 0x5D80D149, 0xF3F245CF, 0x56211904, + 0x83934FF5, 0x2640133E, 0x883287B8, 0x2DE1DB73, + 0x95D1DE6E, 0x300282A5, 0x9E701623, 0x3BA34AE8, + 0x7513CA1A, 0xD0C096D1, 0x7EB20257, 0xDB615E9C, + 0x63515B81, 0xC682074A, 0x68F093CC, 0xCD23CF07, + 0x189199F6, 0xBD42C53D, 0x133051BB, 0xB6E30D70, + 0x0ED3086D, 0xAB0054A6, 0x0572C020, 0xA0A19CEB, + 0xB41EE811, 0x11CDB4DA, 0xBFBF205C, 0x1A6C7C97, + 0xA25C798A, 0x078F2541, 0xA9FDB1C7, 0x0C2EED0C, + 0xD99CBBFD, 0x7C4FE736, 0xD23D73B0, 0x77EE2F7B, + 0xCFDE2A66, 0x6A0D76AD, 0xC47FE22B, 0x61ACBEE0, + 0x2F1C3E12, 0x8ACF62D9, 0x24BDF65F, 0x816EAA94, + 0x395EAF89, 0x9C8DF342, 0x32FF67C4, 0x972C3B0F, + 0x429E6DFE, 0xE74D3135, 0x493FA5B3, 0xECECF978, + 0x54DCFC65, 0xF10FA0AE, 0x5F7D3428, 0xFAAE68E3, + 0x821B4416, 0x27C818DD, 0x89BA8C5B, 0x2C69D090, + 0x9459D58D, 0x318A8946, 0x9FF81DC0, 0x3A2B410B, + 0xEF9917FA, 0x4A4A4B31, 0xE438DFB7, 0x41EB837C, + 0xF9DB8661, 0x5C08DAAA, 0xF27A4E2C, 0x57A912E7, + 0x19199215, 0xBCCACEDE, 0x12B85A58, 0xB76B0693, + 0x0F5B038E, 0xAA885F45, 0x04FACBC3, 0xA1299708, + 0x749BC1F9, 0xD1489D32, 0x7F3A09B4, 0xDAE9557F, + 0x62D95062, 0xC70A0CA9, 0x6978982F, 0xCCABC4E4 + }, { + 0x00000000, 0xB40B77A6, 0x29119F97, 0x9D1AE831, + 0x13244FF4, 0xA72F3852, 0x3A35D063, 0x8E3EA7C5, + 0x674EEF33, 0xD3459895, 0x4E5F70A4, 0xFA540702, + 0x746AA0C7, 0xC061D761, 0x5D7B3F50, 0xE97048F6, + 0xCE9CDE67, 0x7A97A9C1, 0xE78D41F0, 0x53863656, + 0xDDB89193, 0x69B3E635, 0xF4A90E04, 0x40A279A2, + 0xA9D23154, 0x1DD946F2, 0x80C3AEC3, 0x34C8D965, + 0xBAF67EA0, 0x0EFD0906, 0x93E7E137, 0x27EC9691, + 0x9C39BDCF, 0x2832CA69, 0xB5282258, 0x012355FE, + 0x8F1DF23B, 0x3B16859D, 0xA60C6DAC, 0x12071A0A, + 0xFB7752FC, 0x4F7C255A, 0xD266CD6B, 0x666DBACD, + 0xE8531D08, 0x5C586AAE, 0xC142829F, 0x7549F539, + 0x52A563A8, 0xE6AE140E, 0x7BB4FC3F, 0xCFBF8B99, + 0x41812C5C, 0xF58A5BFA, 0x6890B3CB, 0xDC9BC46D, + 0x35EB8C9B, 0x81E0FB3D, 0x1CFA130C, 0xA8F164AA, + 0x26CFC36F, 0x92C4B4C9, 0x0FDE5CF8, 0xBBD52B5E, + 0x79750B44, 0xCD7E7CE2, 0x506494D3, 0xE46FE375, + 0x6A5144B0, 0xDE5A3316, 0x4340DB27, 0xF74BAC81, + 0x1E3BE477, 0xAA3093D1, 0x372A7BE0, 0x83210C46, + 0x0D1FAB83, 0xB914DC25, 0x240E3414, 0x900543B2, + 0xB7E9D523, 0x03E2A285, 0x9EF84AB4, 0x2AF33D12, + 0xA4CD9AD7, 0x10C6ED71, 0x8DDC0540, 0x39D772E6, + 0xD0A73A10, 0x64AC4DB6, 0xF9B6A587, 0x4DBDD221, + 0xC38375E4, 0x77880242, 0xEA92EA73, 0x5E999DD5, + 0xE54CB68B, 0x5147C12D, 0xCC5D291C, 0x78565EBA, + 0xF668F97F, 0x42638ED9, 0xDF7966E8, 0x6B72114E, + 0x820259B8, 0x36092E1E, 0xAB13C62F, 0x1F18B189, + 0x9126164C, 0x252D61EA, 0xB83789DB, 0x0C3CFE7D, + 0x2BD068EC, 0x9FDB1F4A, 0x02C1F77B, 0xB6CA80DD, + 0x38F42718, 0x8CFF50BE, 0x11E5B88F, 0xA5EECF29, + 0x4C9E87DF, 0xF895F079, 0x658F1848, 0xD1846FEE, + 0x5FBAC82B, 0xEBB1BF8D, 0x76AB57BC, 0xC2A0201A, + 0xF2EA1688, 0x46E1612E, 0xDBFB891F, 0x6FF0FEB9, + 0xE1CE597C, 0x55C52EDA, 0xC8DFC6EB, 0x7CD4B14D, + 0x95A4F9BB, 0x21AF8E1D, 0xBCB5662C, 0x08BE118A, + 0x8680B64F, 0x328BC1E9, 0xAF9129D8, 0x1B9A5E7E, + 0x3C76C8EF, 0x887DBF49, 0x15675778, 0xA16C20DE, + 0x2F52871B, 0x9B59F0BD, 0x0643188C, 0xB2486F2A, + 0x5B3827DC, 0xEF33507A, 0x7229B84B, 0xC622CFED, + 0x481C6828, 0xFC171F8E, 0x610DF7BF, 0xD5068019, + 0x6ED3AB47, 0xDAD8DCE1, 0x47C234D0, 0xF3C94376, + 0x7DF7E4B3, 0xC9FC9315, 0x54E67B24, 0xE0ED0C82, + 0x099D4474, 0xBD9633D2, 0x208CDBE3, 0x9487AC45, + 0x1AB90B80, 0xAEB27C26, 0x33A89417, 0x87A3E3B1, + 0xA04F7520, 0x14440286, 0x895EEAB7, 0x3D559D11, + 0xB36B3AD4, 0x07604D72, 0x9A7AA543, 0x2E71D2E5, + 0xC7019A13, 0x730AEDB5, 0xEE100584, 0x5A1B7222, + 0xD425D5E7, 0x602EA241, 0xFD344A70, 0x493F3DD6, + 0x8B9F1DCC, 0x3F946A6A, 0xA28E825B, 0x1685F5FD, + 0x98BB5238, 0x2CB0259E, 0xB1AACDAF, 0x05A1BA09, + 0xECD1F2FF, 0x58DA8559, 0xC5C06D68, 0x71CB1ACE, + 0xFFF5BD0B, 0x4BFECAAD, 0xD6E4229C, 0x62EF553A, + 0x4503C3AB, 0xF108B40D, 0x6C125C3C, 0xD8192B9A, + 0x56278C5F, 0xE22CFBF9, 0x7F3613C8, 0xCB3D646E, + 0x224D2C98, 0x96465B3E, 0x0B5CB30F, 0xBF57C4A9, + 0x3169636C, 0x856214CA, 0x1878FCFB, 0xAC738B5D, + 0x17A6A003, 0xA3ADD7A5, 0x3EB73F94, 0x8ABC4832, + 0x0482EFF7, 0xB0899851, 0x2D937060, 0x999807C6, + 0x70E84F30, 0xC4E33896, 0x59F9D0A7, 0xEDF2A701, + 0x63CC00C4, 0xD7C77762, 0x4ADD9F53, 0xFED6E8F5, + 0xD93A7E64, 0x6D3109C2, 0xF02BE1F3, 0x44209655, + 0xCA1E3190, 0x7E154636, 0xE30FAE07, 0x5704D9A1, + 0xBE749157, 0x0A7FE6F1, 0x97650EC0, 0x236E7966, + 0xAD50DEA3, 0x195BA905, 0x84414134, 0x304A3692 + }, { + 0x00000000, 0x9E00AACC, 0x7D072542, 0xE3078F8E, + 0xFA0E4A84, 0x640EE048, 0x87096FC6, 0x1909C50A, + 0xB51BE5D3, 0x2B1B4F1F, 0xC81CC091, 0x561C6A5D, + 0x4F15AF57, 0xD115059B, 0x32128A15, 0xAC1220D9, + 0x2B31BB7C, 0xB53111B0, 0x56369E3E, 0xC83634F2, + 0xD13FF1F8, 0x4F3F5B34, 0xAC38D4BA, 0x32387E76, + 0x9E2A5EAF, 0x002AF463, 0xE32D7BED, 0x7D2DD121, + 0x6424142B, 0xFA24BEE7, 0x19233169, 0x87239BA5, + 0x566276F9, 0xC862DC35, 0x2B6553BB, 0xB565F977, + 0xAC6C3C7D, 0x326C96B1, 0xD16B193F, 0x4F6BB3F3, + 0xE379932A, 0x7D7939E6, 0x9E7EB668, 0x007E1CA4, + 0x1977D9AE, 0x87777362, 0x6470FCEC, 0xFA705620, + 0x7D53CD85, 0xE3536749, 0x0054E8C7, 0x9E54420B, + 0x875D8701, 0x195D2DCD, 0xFA5AA243, 0x645A088F, + 0xC8482856, 0x5648829A, 0xB54F0D14, 0x2B4FA7D8, + 0x324662D2, 0xAC46C81E, 0x4F414790, 0xD141ED5C, + 0xEDC29D29, 0x73C237E5, 0x90C5B86B, 0x0EC512A7, + 0x17CCD7AD, 0x89CC7D61, 0x6ACBF2EF, 0xF4CB5823, + 0x58D978FA, 0xC6D9D236, 0x25DE5DB8, 0xBBDEF774, + 0xA2D7327E, 0x3CD798B2, 0xDFD0173C, 0x41D0BDF0, + 0xC6F32655, 0x58F38C99, 0xBBF40317, 0x25F4A9DB, + 0x3CFD6CD1, 0xA2FDC61D, 0x41FA4993, 0xDFFAE35F, + 0x73E8C386, 0xEDE8694A, 0x0EEFE6C4, 0x90EF4C08, + 0x89E68902, 0x17E623CE, 0xF4E1AC40, 0x6AE1068C, + 0xBBA0EBD0, 0x25A0411C, 0xC6A7CE92, 0x58A7645E, + 0x41AEA154, 0xDFAE0B98, 0x3CA98416, 0xA2A92EDA, + 0x0EBB0E03, 0x90BBA4CF, 0x73BC2B41, 0xEDBC818D, + 0xF4B54487, 0x6AB5EE4B, 0x89B261C5, 0x17B2CB09, + 0x909150AC, 0x0E91FA60, 0xED9675EE, 0x7396DF22, + 0x6A9F1A28, 0xF49FB0E4, 0x17983F6A, 0x899895A6, + 0x258AB57F, 0xBB8A1FB3, 0x588D903D, 0xC68D3AF1, + 0xDF84FFFB, 0x41845537, 0xA283DAB9, 0x3C837075, + 0xDA853B53, 0x4485919F, 0xA7821E11, 0x3982B4DD, + 0x208B71D7, 0xBE8BDB1B, 0x5D8C5495, 0xC38CFE59, + 0x6F9EDE80, 0xF19E744C, 0x1299FBC2, 0x8C99510E, + 0x95909404, 0x0B903EC8, 0xE897B146, 0x76971B8A, + 0xF1B4802F, 0x6FB42AE3, 0x8CB3A56D, 0x12B30FA1, + 0x0BBACAAB, 0x95BA6067, 0x76BDEFE9, 0xE8BD4525, + 0x44AF65FC, 0xDAAFCF30, 0x39A840BE, 0xA7A8EA72, + 0xBEA12F78, 0x20A185B4, 0xC3A60A3A, 0x5DA6A0F6, + 0x8CE74DAA, 0x12E7E766, 0xF1E068E8, 0x6FE0C224, + 0x76E9072E, 0xE8E9ADE2, 0x0BEE226C, 0x95EE88A0, + 0x39FCA879, 0xA7FC02B5, 0x44FB8D3B, 0xDAFB27F7, + 0xC3F2E2FD, 0x5DF24831, 0xBEF5C7BF, 0x20F56D73, + 0xA7D6F6D6, 0x39D65C1A, 0xDAD1D394, 0x44D17958, + 0x5DD8BC52, 0xC3D8169E, 0x20DF9910, 0xBEDF33DC, + 0x12CD1305, 0x8CCDB9C9, 0x6FCA3647, 0xF1CA9C8B, + 0xE8C35981, 0x76C3F34D, 0x95C47CC3, 0x0BC4D60F, + 0x3747A67A, 0xA9470CB6, 0x4A408338, 0xD44029F4, + 0xCD49ECFE, 0x53494632, 0xB04EC9BC, 0x2E4E6370, + 0x825C43A9, 0x1C5CE965, 0xFF5B66EB, 0x615BCC27, + 0x7852092D, 0xE652A3E1, 0x05552C6F, 0x9B5586A3, + 0x1C761D06, 0x8276B7CA, 0x61713844, 0xFF719288, + 0xE6785782, 0x7878FD4E, 0x9B7F72C0, 0x057FD80C, + 0xA96DF8D5, 0x376D5219, 0xD46ADD97, 0x4A6A775B, + 0x5363B251, 0xCD63189D, 0x2E649713, 0xB0643DDF, + 0x6125D083, 0xFF257A4F, 0x1C22F5C1, 0x82225F0D, + 0x9B2B9A07, 0x052B30CB, 0xE62CBF45, 0x782C1589, + 0xD43E3550, 0x4A3E9F9C, 0xA9391012, 0x3739BADE, + 0x2E307FD4, 0xB030D518, 0x53375A96, 0xCD37F05A, + 0x4A146BFF, 0xD414C133, 0x37134EBD, 0xA913E471, + 0xB01A217B, 0x2E1A8BB7, 0xCD1D0439, 0x531DAEF5, + 0xFF0F8E2C, 0x610F24E0, 0x8208AB6E, 0x1C0801A2, + 0x0501C4A8, 0x9B016E64, 0x7806E1EA, 0xE6064B26 + } +}; diff --git a/src/native/external/xz/src/liblzma/check/crc32_table_le.h b/src/native/external/xz/src/liblzma/check/crc32_table_le.h new file mode 100644 index 00000000000000..e89c21a7b23d6b --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc32_table_le.h @@ -0,0 +1,527 @@ +// SPDX-License-Identifier: 0BSD + +// This file has been generated by crc32_tablegen.c. + +const uint32_t lzma_crc32_table[8][256] = { + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, + 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, + 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, + 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, + 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, + 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, + 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, + 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, + 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, + 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, + 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, + 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, + 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, + 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, + 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, + 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, + 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, + 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, + 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }, { + 0x00000000, 0x191B3141, 0x32366282, 0x2B2D53C3, + 0x646CC504, 0x7D77F445, 0x565AA786, 0x4F4196C7, + 0xC8D98A08, 0xD1C2BB49, 0xFAEFE88A, 0xE3F4D9CB, + 0xACB54F0C, 0xB5AE7E4D, 0x9E832D8E, 0x87981CCF, + 0x4AC21251, 0x53D92310, 0x78F470D3, 0x61EF4192, + 0x2EAED755, 0x37B5E614, 0x1C98B5D7, 0x05838496, + 0x821B9859, 0x9B00A918, 0xB02DFADB, 0xA936CB9A, + 0xE6775D5D, 0xFF6C6C1C, 0xD4413FDF, 0xCD5A0E9E, + 0x958424A2, 0x8C9F15E3, 0xA7B24620, 0xBEA97761, + 0xF1E8E1A6, 0xE8F3D0E7, 0xC3DE8324, 0xDAC5B265, + 0x5D5DAEAA, 0x44469FEB, 0x6F6BCC28, 0x7670FD69, + 0x39316BAE, 0x202A5AEF, 0x0B07092C, 0x121C386D, + 0xDF4636F3, 0xC65D07B2, 0xED705471, 0xF46B6530, + 0xBB2AF3F7, 0xA231C2B6, 0x891C9175, 0x9007A034, + 0x179FBCFB, 0x0E848DBA, 0x25A9DE79, 0x3CB2EF38, + 0x73F379FF, 0x6AE848BE, 0x41C51B7D, 0x58DE2A3C, + 0xF0794F05, 0xE9627E44, 0xC24F2D87, 0xDB541CC6, + 0x94158A01, 0x8D0EBB40, 0xA623E883, 0xBF38D9C2, + 0x38A0C50D, 0x21BBF44C, 0x0A96A78F, 0x138D96CE, + 0x5CCC0009, 0x45D73148, 0x6EFA628B, 0x77E153CA, + 0xBABB5D54, 0xA3A06C15, 0x888D3FD6, 0x91960E97, + 0xDED79850, 0xC7CCA911, 0xECE1FAD2, 0xF5FACB93, + 0x7262D75C, 0x6B79E61D, 0x4054B5DE, 0x594F849F, + 0x160E1258, 0x0F152319, 0x243870DA, 0x3D23419B, + 0x65FD6BA7, 0x7CE65AE6, 0x57CB0925, 0x4ED03864, + 0x0191AEA3, 0x188A9FE2, 0x33A7CC21, 0x2ABCFD60, + 0xAD24E1AF, 0xB43FD0EE, 0x9F12832D, 0x8609B26C, + 0xC94824AB, 0xD05315EA, 0xFB7E4629, 0xE2657768, + 0x2F3F79F6, 0x362448B7, 0x1D091B74, 0x04122A35, + 0x4B53BCF2, 0x52488DB3, 0x7965DE70, 0x607EEF31, + 0xE7E6F3FE, 0xFEFDC2BF, 0xD5D0917C, 0xCCCBA03D, + 0x838A36FA, 0x9A9107BB, 0xB1BC5478, 0xA8A76539, + 0x3B83984B, 0x2298A90A, 0x09B5FAC9, 0x10AECB88, + 0x5FEF5D4F, 0x46F46C0E, 0x6DD93FCD, 0x74C20E8C, + 0xF35A1243, 0xEA412302, 0xC16C70C1, 0xD8774180, + 0x9736D747, 0x8E2DE606, 0xA500B5C5, 0xBC1B8484, + 0x71418A1A, 0x685ABB5B, 0x4377E898, 0x5A6CD9D9, + 0x152D4F1E, 0x0C367E5F, 0x271B2D9C, 0x3E001CDD, + 0xB9980012, 0xA0833153, 0x8BAE6290, 0x92B553D1, + 0xDDF4C516, 0xC4EFF457, 0xEFC2A794, 0xF6D996D5, + 0xAE07BCE9, 0xB71C8DA8, 0x9C31DE6B, 0x852AEF2A, + 0xCA6B79ED, 0xD37048AC, 0xF85D1B6F, 0xE1462A2E, + 0x66DE36E1, 0x7FC507A0, 0x54E85463, 0x4DF36522, + 0x02B2F3E5, 0x1BA9C2A4, 0x30849167, 0x299FA026, + 0xE4C5AEB8, 0xFDDE9FF9, 0xD6F3CC3A, 0xCFE8FD7B, + 0x80A96BBC, 0x99B25AFD, 0xB29F093E, 0xAB84387F, + 0x2C1C24B0, 0x350715F1, 0x1E2A4632, 0x07317773, + 0x4870E1B4, 0x516BD0F5, 0x7A468336, 0x635DB277, + 0xCBFAD74E, 0xD2E1E60F, 0xF9CCB5CC, 0xE0D7848D, + 0xAF96124A, 0xB68D230B, 0x9DA070C8, 0x84BB4189, + 0x03235D46, 0x1A386C07, 0x31153FC4, 0x280E0E85, + 0x674F9842, 0x7E54A903, 0x5579FAC0, 0x4C62CB81, + 0x8138C51F, 0x9823F45E, 0xB30EA79D, 0xAA1596DC, + 0xE554001B, 0xFC4F315A, 0xD7626299, 0xCE7953D8, + 0x49E14F17, 0x50FA7E56, 0x7BD72D95, 0x62CC1CD4, + 0x2D8D8A13, 0x3496BB52, 0x1FBBE891, 0x06A0D9D0, + 0x5E7EF3EC, 0x4765C2AD, 0x6C48916E, 0x7553A02F, + 0x3A1236E8, 0x230907A9, 0x0824546A, 0x113F652B, + 0x96A779E4, 0x8FBC48A5, 0xA4911B66, 0xBD8A2A27, + 0xF2CBBCE0, 0xEBD08DA1, 0xC0FDDE62, 0xD9E6EF23, + 0x14BCE1BD, 0x0DA7D0FC, 0x268A833F, 0x3F91B27E, + 0x70D024B9, 0x69CB15F8, 0x42E6463B, 0x5BFD777A, + 0xDC656BB5, 0xC57E5AF4, 0xEE530937, 0xF7483876, + 0xB809AEB1, 0xA1129FF0, 0x8A3FCC33, 0x9324FD72 + }, { + 0x00000000, 0x01C26A37, 0x0384D46E, 0x0246BE59, + 0x0709A8DC, 0x06CBC2EB, 0x048D7CB2, 0x054F1685, + 0x0E1351B8, 0x0FD13B8F, 0x0D9785D6, 0x0C55EFE1, + 0x091AF964, 0x08D89353, 0x0A9E2D0A, 0x0B5C473D, + 0x1C26A370, 0x1DE4C947, 0x1FA2771E, 0x1E601D29, + 0x1B2F0BAC, 0x1AED619B, 0x18ABDFC2, 0x1969B5F5, + 0x1235F2C8, 0x13F798FF, 0x11B126A6, 0x10734C91, + 0x153C5A14, 0x14FE3023, 0x16B88E7A, 0x177AE44D, + 0x384D46E0, 0x398F2CD7, 0x3BC9928E, 0x3A0BF8B9, + 0x3F44EE3C, 0x3E86840B, 0x3CC03A52, 0x3D025065, + 0x365E1758, 0x379C7D6F, 0x35DAC336, 0x3418A901, + 0x3157BF84, 0x3095D5B3, 0x32D36BEA, 0x331101DD, + 0x246BE590, 0x25A98FA7, 0x27EF31FE, 0x262D5BC9, + 0x23624D4C, 0x22A0277B, 0x20E69922, 0x2124F315, + 0x2A78B428, 0x2BBADE1F, 0x29FC6046, 0x283E0A71, + 0x2D711CF4, 0x2CB376C3, 0x2EF5C89A, 0x2F37A2AD, + 0x709A8DC0, 0x7158E7F7, 0x731E59AE, 0x72DC3399, + 0x7793251C, 0x76514F2B, 0x7417F172, 0x75D59B45, + 0x7E89DC78, 0x7F4BB64F, 0x7D0D0816, 0x7CCF6221, + 0x798074A4, 0x78421E93, 0x7A04A0CA, 0x7BC6CAFD, + 0x6CBC2EB0, 0x6D7E4487, 0x6F38FADE, 0x6EFA90E9, + 0x6BB5866C, 0x6A77EC5B, 0x68315202, 0x69F33835, + 0x62AF7F08, 0x636D153F, 0x612BAB66, 0x60E9C151, + 0x65A6D7D4, 0x6464BDE3, 0x662203BA, 0x67E0698D, + 0x48D7CB20, 0x4915A117, 0x4B531F4E, 0x4A917579, + 0x4FDE63FC, 0x4E1C09CB, 0x4C5AB792, 0x4D98DDA5, + 0x46C49A98, 0x4706F0AF, 0x45404EF6, 0x448224C1, + 0x41CD3244, 0x400F5873, 0x4249E62A, 0x438B8C1D, + 0x54F16850, 0x55330267, 0x5775BC3E, 0x56B7D609, + 0x53F8C08C, 0x523AAABB, 0x507C14E2, 0x51BE7ED5, + 0x5AE239E8, 0x5B2053DF, 0x5966ED86, 0x58A487B1, + 0x5DEB9134, 0x5C29FB03, 0x5E6F455A, 0x5FAD2F6D, + 0xE1351B80, 0xE0F771B7, 0xE2B1CFEE, 0xE373A5D9, + 0xE63CB35C, 0xE7FED96B, 0xE5B86732, 0xE47A0D05, + 0xEF264A38, 0xEEE4200F, 0xECA29E56, 0xED60F461, + 0xE82FE2E4, 0xE9ED88D3, 0xEBAB368A, 0xEA695CBD, + 0xFD13B8F0, 0xFCD1D2C7, 0xFE976C9E, 0xFF5506A9, + 0xFA1A102C, 0xFBD87A1B, 0xF99EC442, 0xF85CAE75, + 0xF300E948, 0xF2C2837F, 0xF0843D26, 0xF1465711, + 0xF4094194, 0xF5CB2BA3, 0xF78D95FA, 0xF64FFFCD, + 0xD9785D60, 0xD8BA3757, 0xDAFC890E, 0xDB3EE339, + 0xDE71F5BC, 0xDFB39F8B, 0xDDF521D2, 0xDC374BE5, + 0xD76B0CD8, 0xD6A966EF, 0xD4EFD8B6, 0xD52DB281, + 0xD062A404, 0xD1A0CE33, 0xD3E6706A, 0xD2241A5D, + 0xC55EFE10, 0xC49C9427, 0xC6DA2A7E, 0xC7184049, + 0xC25756CC, 0xC3953CFB, 0xC1D382A2, 0xC011E895, + 0xCB4DAFA8, 0xCA8FC59F, 0xC8C97BC6, 0xC90B11F1, + 0xCC440774, 0xCD866D43, 0xCFC0D31A, 0xCE02B92D, + 0x91AF9640, 0x906DFC77, 0x922B422E, 0x93E92819, + 0x96A63E9C, 0x976454AB, 0x9522EAF2, 0x94E080C5, + 0x9FBCC7F8, 0x9E7EADCF, 0x9C381396, 0x9DFA79A1, + 0x98B56F24, 0x99770513, 0x9B31BB4A, 0x9AF3D17D, + 0x8D893530, 0x8C4B5F07, 0x8E0DE15E, 0x8FCF8B69, + 0x8A809DEC, 0x8B42F7DB, 0x89044982, 0x88C623B5, + 0x839A6488, 0x82580EBF, 0x801EB0E6, 0x81DCDAD1, + 0x8493CC54, 0x8551A663, 0x8717183A, 0x86D5720D, + 0xA9E2D0A0, 0xA820BA97, 0xAA6604CE, 0xABA46EF9, + 0xAEEB787C, 0xAF29124B, 0xAD6FAC12, 0xACADC625, + 0xA7F18118, 0xA633EB2F, 0xA4755576, 0xA5B73F41, + 0xA0F829C4, 0xA13A43F3, 0xA37CFDAA, 0xA2BE979D, + 0xB5C473D0, 0xB40619E7, 0xB640A7BE, 0xB782CD89, + 0xB2CDDB0C, 0xB30FB13B, 0xB1490F62, 0xB08B6555, + 0xBBD72268, 0xBA15485F, 0xB853F606, 0xB9919C31, + 0xBCDE8AB4, 0xBD1CE083, 0xBF5A5EDA, 0xBE9834ED + }, { + 0x00000000, 0xB8BC6765, 0xAA09C88B, 0x12B5AFEE, + 0x8F629757, 0x37DEF032, 0x256B5FDC, 0x9DD738B9, + 0xC5B428EF, 0x7D084F8A, 0x6FBDE064, 0xD7018701, + 0x4AD6BFB8, 0xF26AD8DD, 0xE0DF7733, 0x58631056, + 0x5019579F, 0xE8A530FA, 0xFA109F14, 0x42ACF871, + 0xDF7BC0C8, 0x67C7A7AD, 0x75720843, 0xCDCE6F26, + 0x95AD7F70, 0x2D111815, 0x3FA4B7FB, 0x8718D09E, + 0x1ACFE827, 0xA2738F42, 0xB0C620AC, 0x087A47C9, + 0xA032AF3E, 0x188EC85B, 0x0A3B67B5, 0xB28700D0, + 0x2F503869, 0x97EC5F0C, 0x8559F0E2, 0x3DE59787, + 0x658687D1, 0xDD3AE0B4, 0xCF8F4F5A, 0x7733283F, + 0xEAE41086, 0x525877E3, 0x40EDD80D, 0xF851BF68, + 0xF02BF8A1, 0x48979FC4, 0x5A22302A, 0xE29E574F, + 0x7F496FF6, 0xC7F50893, 0xD540A77D, 0x6DFCC018, + 0x359FD04E, 0x8D23B72B, 0x9F9618C5, 0x272A7FA0, + 0xBAFD4719, 0x0241207C, 0x10F48F92, 0xA848E8F7, + 0x9B14583D, 0x23A83F58, 0x311D90B6, 0x89A1F7D3, + 0x1476CF6A, 0xACCAA80F, 0xBE7F07E1, 0x06C36084, + 0x5EA070D2, 0xE61C17B7, 0xF4A9B859, 0x4C15DF3C, + 0xD1C2E785, 0x697E80E0, 0x7BCB2F0E, 0xC377486B, + 0xCB0D0FA2, 0x73B168C7, 0x6104C729, 0xD9B8A04C, + 0x446F98F5, 0xFCD3FF90, 0xEE66507E, 0x56DA371B, + 0x0EB9274D, 0xB6054028, 0xA4B0EFC6, 0x1C0C88A3, + 0x81DBB01A, 0x3967D77F, 0x2BD27891, 0x936E1FF4, + 0x3B26F703, 0x839A9066, 0x912F3F88, 0x299358ED, + 0xB4446054, 0x0CF80731, 0x1E4DA8DF, 0xA6F1CFBA, + 0xFE92DFEC, 0x462EB889, 0x549B1767, 0xEC277002, + 0x71F048BB, 0xC94C2FDE, 0xDBF98030, 0x6345E755, + 0x6B3FA09C, 0xD383C7F9, 0xC1366817, 0x798A0F72, + 0xE45D37CB, 0x5CE150AE, 0x4E54FF40, 0xF6E89825, + 0xAE8B8873, 0x1637EF16, 0x048240F8, 0xBC3E279D, + 0x21E91F24, 0x99557841, 0x8BE0D7AF, 0x335CB0CA, + 0xED59B63B, 0x55E5D15E, 0x47507EB0, 0xFFEC19D5, + 0x623B216C, 0xDA874609, 0xC832E9E7, 0x708E8E82, + 0x28ED9ED4, 0x9051F9B1, 0x82E4565F, 0x3A58313A, + 0xA78F0983, 0x1F336EE6, 0x0D86C108, 0xB53AA66D, + 0xBD40E1A4, 0x05FC86C1, 0x1749292F, 0xAFF54E4A, + 0x322276F3, 0x8A9E1196, 0x982BBE78, 0x2097D91D, + 0x78F4C94B, 0xC048AE2E, 0xD2FD01C0, 0x6A4166A5, + 0xF7965E1C, 0x4F2A3979, 0x5D9F9697, 0xE523F1F2, + 0x4D6B1905, 0xF5D77E60, 0xE762D18E, 0x5FDEB6EB, + 0xC2098E52, 0x7AB5E937, 0x680046D9, 0xD0BC21BC, + 0x88DF31EA, 0x3063568F, 0x22D6F961, 0x9A6A9E04, + 0x07BDA6BD, 0xBF01C1D8, 0xADB46E36, 0x15080953, + 0x1D724E9A, 0xA5CE29FF, 0xB77B8611, 0x0FC7E174, + 0x9210D9CD, 0x2AACBEA8, 0x38191146, 0x80A57623, + 0xD8C66675, 0x607A0110, 0x72CFAEFE, 0xCA73C99B, + 0x57A4F122, 0xEF189647, 0xFDAD39A9, 0x45115ECC, + 0x764DEE06, 0xCEF18963, 0xDC44268D, 0x64F841E8, + 0xF92F7951, 0x41931E34, 0x5326B1DA, 0xEB9AD6BF, + 0xB3F9C6E9, 0x0B45A18C, 0x19F00E62, 0xA14C6907, + 0x3C9B51BE, 0x842736DB, 0x96929935, 0x2E2EFE50, + 0x2654B999, 0x9EE8DEFC, 0x8C5D7112, 0x34E11677, + 0xA9362ECE, 0x118A49AB, 0x033FE645, 0xBB838120, + 0xE3E09176, 0x5B5CF613, 0x49E959FD, 0xF1553E98, + 0x6C820621, 0xD43E6144, 0xC68BCEAA, 0x7E37A9CF, + 0xD67F4138, 0x6EC3265D, 0x7C7689B3, 0xC4CAEED6, + 0x591DD66F, 0xE1A1B10A, 0xF3141EE4, 0x4BA87981, + 0x13CB69D7, 0xAB770EB2, 0xB9C2A15C, 0x017EC639, + 0x9CA9FE80, 0x241599E5, 0x36A0360B, 0x8E1C516E, + 0x866616A7, 0x3EDA71C2, 0x2C6FDE2C, 0x94D3B949, + 0x090481F0, 0xB1B8E695, 0xA30D497B, 0x1BB12E1E, + 0x43D23E48, 0xFB6E592D, 0xE9DBF6C3, 0x516791A6, + 0xCCB0A91F, 0x740CCE7A, 0x66B96194, 0xDE0506F1 + }, { + 0x00000000, 0x3D6029B0, 0x7AC05360, 0x47A07AD0, + 0xF580A6C0, 0xC8E08F70, 0x8F40F5A0, 0xB220DC10, + 0x30704BC1, 0x0D106271, 0x4AB018A1, 0x77D03111, + 0xC5F0ED01, 0xF890C4B1, 0xBF30BE61, 0x825097D1, + 0x60E09782, 0x5D80BE32, 0x1A20C4E2, 0x2740ED52, + 0x95603142, 0xA80018F2, 0xEFA06222, 0xD2C04B92, + 0x5090DC43, 0x6DF0F5F3, 0x2A508F23, 0x1730A693, + 0xA5107A83, 0x98705333, 0xDFD029E3, 0xE2B00053, + 0xC1C12F04, 0xFCA106B4, 0xBB017C64, 0x866155D4, + 0x344189C4, 0x0921A074, 0x4E81DAA4, 0x73E1F314, + 0xF1B164C5, 0xCCD14D75, 0x8B7137A5, 0xB6111E15, + 0x0431C205, 0x3951EBB5, 0x7EF19165, 0x4391B8D5, + 0xA121B886, 0x9C419136, 0xDBE1EBE6, 0xE681C256, + 0x54A11E46, 0x69C137F6, 0x2E614D26, 0x13016496, + 0x9151F347, 0xAC31DAF7, 0xEB91A027, 0xD6F18997, + 0x64D15587, 0x59B17C37, 0x1E1106E7, 0x23712F57, + 0x58F35849, 0x659371F9, 0x22330B29, 0x1F532299, + 0xAD73FE89, 0x9013D739, 0xD7B3ADE9, 0xEAD38459, + 0x68831388, 0x55E33A38, 0x124340E8, 0x2F236958, + 0x9D03B548, 0xA0639CF8, 0xE7C3E628, 0xDAA3CF98, + 0x3813CFCB, 0x0573E67B, 0x42D39CAB, 0x7FB3B51B, + 0xCD93690B, 0xF0F340BB, 0xB7533A6B, 0x8A3313DB, + 0x0863840A, 0x3503ADBA, 0x72A3D76A, 0x4FC3FEDA, + 0xFDE322CA, 0xC0830B7A, 0x872371AA, 0xBA43581A, + 0x9932774D, 0xA4525EFD, 0xE3F2242D, 0xDE920D9D, + 0x6CB2D18D, 0x51D2F83D, 0x167282ED, 0x2B12AB5D, + 0xA9423C8C, 0x9422153C, 0xD3826FEC, 0xEEE2465C, + 0x5CC29A4C, 0x61A2B3FC, 0x2602C92C, 0x1B62E09C, + 0xF9D2E0CF, 0xC4B2C97F, 0x8312B3AF, 0xBE729A1F, + 0x0C52460F, 0x31326FBF, 0x7692156F, 0x4BF23CDF, + 0xC9A2AB0E, 0xF4C282BE, 0xB362F86E, 0x8E02D1DE, + 0x3C220DCE, 0x0142247E, 0x46E25EAE, 0x7B82771E, + 0xB1E6B092, 0x8C869922, 0xCB26E3F2, 0xF646CA42, + 0x44661652, 0x79063FE2, 0x3EA64532, 0x03C66C82, + 0x8196FB53, 0xBCF6D2E3, 0xFB56A833, 0xC6368183, + 0x74165D93, 0x49767423, 0x0ED60EF3, 0x33B62743, + 0xD1062710, 0xEC660EA0, 0xABC67470, 0x96A65DC0, + 0x248681D0, 0x19E6A860, 0x5E46D2B0, 0x6326FB00, + 0xE1766CD1, 0xDC164561, 0x9BB63FB1, 0xA6D61601, + 0x14F6CA11, 0x2996E3A1, 0x6E369971, 0x5356B0C1, + 0x70279F96, 0x4D47B626, 0x0AE7CCF6, 0x3787E546, + 0x85A73956, 0xB8C710E6, 0xFF676A36, 0xC2074386, + 0x4057D457, 0x7D37FDE7, 0x3A978737, 0x07F7AE87, + 0xB5D77297, 0x88B75B27, 0xCF1721F7, 0xF2770847, + 0x10C70814, 0x2DA721A4, 0x6A075B74, 0x576772C4, + 0xE547AED4, 0xD8278764, 0x9F87FDB4, 0xA2E7D404, + 0x20B743D5, 0x1DD76A65, 0x5A7710B5, 0x67173905, + 0xD537E515, 0xE857CCA5, 0xAFF7B675, 0x92979FC5, + 0xE915E8DB, 0xD475C16B, 0x93D5BBBB, 0xAEB5920B, + 0x1C954E1B, 0x21F567AB, 0x66551D7B, 0x5B3534CB, + 0xD965A31A, 0xE4058AAA, 0xA3A5F07A, 0x9EC5D9CA, + 0x2CE505DA, 0x11852C6A, 0x562556BA, 0x6B457F0A, + 0x89F57F59, 0xB49556E9, 0xF3352C39, 0xCE550589, + 0x7C75D999, 0x4115F029, 0x06B58AF9, 0x3BD5A349, + 0xB9853498, 0x84E51D28, 0xC34567F8, 0xFE254E48, + 0x4C059258, 0x7165BBE8, 0x36C5C138, 0x0BA5E888, + 0x28D4C7DF, 0x15B4EE6F, 0x521494BF, 0x6F74BD0F, + 0xDD54611F, 0xE03448AF, 0xA794327F, 0x9AF41BCF, + 0x18A48C1E, 0x25C4A5AE, 0x6264DF7E, 0x5F04F6CE, + 0xED242ADE, 0xD044036E, 0x97E479BE, 0xAA84500E, + 0x4834505D, 0x755479ED, 0x32F4033D, 0x0F942A8D, + 0xBDB4F69D, 0x80D4DF2D, 0xC774A5FD, 0xFA148C4D, + 0x78441B9C, 0x4524322C, 0x028448FC, 0x3FE4614C, + 0x8DC4BD5C, 0xB0A494EC, 0xF704EE3C, 0xCA64C78C + }, { + 0x00000000, 0xCB5CD3A5, 0x4DC8A10B, 0x869472AE, + 0x9B914216, 0x50CD91B3, 0xD659E31D, 0x1D0530B8, + 0xEC53826D, 0x270F51C8, 0xA19B2366, 0x6AC7F0C3, + 0x77C2C07B, 0xBC9E13DE, 0x3A0A6170, 0xF156B2D5, + 0x03D6029B, 0xC88AD13E, 0x4E1EA390, 0x85427035, + 0x9847408D, 0x531B9328, 0xD58FE186, 0x1ED33223, + 0xEF8580F6, 0x24D95353, 0xA24D21FD, 0x6911F258, + 0x7414C2E0, 0xBF481145, 0x39DC63EB, 0xF280B04E, + 0x07AC0536, 0xCCF0D693, 0x4A64A43D, 0x81387798, + 0x9C3D4720, 0x57619485, 0xD1F5E62B, 0x1AA9358E, + 0xEBFF875B, 0x20A354FE, 0xA6372650, 0x6D6BF5F5, + 0x706EC54D, 0xBB3216E8, 0x3DA66446, 0xF6FAB7E3, + 0x047A07AD, 0xCF26D408, 0x49B2A6A6, 0x82EE7503, + 0x9FEB45BB, 0x54B7961E, 0xD223E4B0, 0x197F3715, + 0xE82985C0, 0x23755665, 0xA5E124CB, 0x6EBDF76E, + 0x73B8C7D6, 0xB8E41473, 0x3E7066DD, 0xF52CB578, + 0x0F580A6C, 0xC404D9C9, 0x4290AB67, 0x89CC78C2, + 0x94C9487A, 0x5F959BDF, 0xD901E971, 0x125D3AD4, + 0xE30B8801, 0x28575BA4, 0xAEC3290A, 0x659FFAAF, + 0x789ACA17, 0xB3C619B2, 0x35526B1C, 0xFE0EB8B9, + 0x0C8E08F7, 0xC7D2DB52, 0x4146A9FC, 0x8A1A7A59, + 0x971F4AE1, 0x5C439944, 0xDAD7EBEA, 0x118B384F, + 0xE0DD8A9A, 0x2B81593F, 0xAD152B91, 0x6649F834, + 0x7B4CC88C, 0xB0101B29, 0x36846987, 0xFDD8BA22, + 0x08F40F5A, 0xC3A8DCFF, 0x453CAE51, 0x8E607DF4, + 0x93654D4C, 0x58399EE9, 0xDEADEC47, 0x15F13FE2, + 0xE4A78D37, 0x2FFB5E92, 0xA96F2C3C, 0x6233FF99, + 0x7F36CF21, 0xB46A1C84, 0x32FE6E2A, 0xF9A2BD8F, + 0x0B220DC1, 0xC07EDE64, 0x46EAACCA, 0x8DB67F6F, + 0x90B34FD7, 0x5BEF9C72, 0xDD7BEEDC, 0x16273D79, + 0xE7718FAC, 0x2C2D5C09, 0xAAB92EA7, 0x61E5FD02, + 0x7CE0CDBA, 0xB7BC1E1F, 0x31286CB1, 0xFA74BF14, + 0x1EB014D8, 0xD5ECC77D, 0x5378B5D3, 0x98246676, + 0x852156CE, 0x4E7D856B, 0xC8E9F7C5, 0x03B52460, + 0xF2E396B5, 0x39BF4510, 0xBF2B37BE, 0x7477E41B, + 0x6972D4A3, 0xA22E0706, 0x24BA75A8, 0xEFE6A60D, + 0x1D661643, 0xD63AC5E6, 0x50AEB748, 0x9BF264ED, + 0x86F75455, 0x4DAB87F0, 0xCB3FF55E, 0x006326FB, + 0xF135942E, 0x3A69478B, 0xBCFD3525, 0x77A1E680, + 0x6AA4D638, 0xA1F8059D, 0x276C7733, 0xEC30A496, + 0x191C11EE, 0xD240C24B, 0x54D4B0E5, 0x9F886340, + 0x828D53F8, 0x49D1805D, 0xCF45F2F3, 0x04192156, + 0xF54F9383, 0x3E134026, 0xB8873288, 0x73DBE12D, + 0x6EDED195, 0xA5820230, 0x2316709E, 0xE84AA33B, + 0x1ACA1375, 0xD196C0D0, 0x5702B27E, 0x9C5E61DB, + 0x815B5163, 0x4A0782C6, 0xCC93F068, 0x07CF23CD, + 0xF6999118, 0x3DC542BD, 0xBB513013, 0x700DE3B6, + 0x6D08D30E, 0xA65400AB, 0x20C07205, 0xEB9CA1A0, + 0x11E81EB4, 0xDAB4CD11, 0x5C20BFBF, 0x977C6C1A, + 0x8A795CA2, 0x41258F07, 0xC7B1FDA9, 0x0CED2E0C, + 0xFDBB9CD9, 0x36E74F7C, 0xB0733DD2, 0x7B2FEE77, + 0x662ADECF, 0xAD760D6A, 0x2BE27FC4, 0xE0BEAC61, + 0x123E1C2F, 0xD962CF8A, 0x5FF6BD24, 0x94AA6E81, + 0x89AF5E39, 0x42F38D9C, 0xC467FF32, 0x0F3B2C97, + 0xFE6D9E42, 0x35314DE7, 0xB3A53F49, 0x78F9ECEC, + 0x65FCDC54, 0xAEA00FF1, 0x28347D5F, 0xE368AEFA, + 0x16441B82, 0xDD18C827, 0x5B8CBA89, 0x90D0692C, + 0x8DD55994, 0x46898A31, 0xC01DF89F, 0x0B412B3A, + 0xFA1799EF, 0x314B4A4A, 0xB7DF38E4, 0x7C83EB41, + 0x6186DBF9, 0xAADA085C, 0x2C4E7AF2, 0xE712A957, + 0x15921919, 0xDECECABC, 0x585AB812, 0x93066BB7, + 0x8E035B0F, 0x455F88AA, 0xC3CBFA04, 0x089729A1, + 0xF9C19B74, 0x329D48D1, 0xB4093A7F, 0x7F55E9DA, + 0x6250D962, 0xA90C0AC7, 0x2F987869, 0xE4C4ABCC + }, { + 0x00000000, 0xA6770BB4, 0x979F1129, 0x31E81A9D, + 0xF44F2413, 0x52382FA7, 0x63D0353A, 0xC5A73E8E, + 0x33EF4E67, 0x959845D3, 0xA4705F4E, 0x020754FA, + 0xC7A06A74, 0x61D761C0, 0x503F7B5D, 0xF64870E9, + 0x67DE9CCE, 0xC1A9977A, 0xF0418DE7, 0x56368653, + 0x9391B8DD, 0x35E6B369, 0x040EA9F4, 0xA279A240, + 0x5431D2A9, 0xF246D91D, 0xC3AEC380, 0x65D9C834, + 0xA07EF6BA, 0x0609FD0E, 0x37E1E793, 0x9196EC27, + 0xCFBD399C, 0x69CA3228, 0x582228B5, 0xFE552301, + 0x3BF21D8F, 0x9D85163B, 0xAC6D0CA6, 0x0A1A0712, + 0xFC5277FB, 0x5A257C4F, 0x6BCD66D2, 0xCDBA6D66, + 0x081D53E8, 0xAE6A585C, 0x9F8242C1, 0x39F54975, + 0xA863A552, 0x0E14AEE6, 0x3FFCB47B, 0x998BBFCF, + 0x5C2C8141, 0xFA5B8AF5, 0xCBB39068, 0x6DC49BDC, + 0x9B8CEB35, 0x3DFBE081, 0x0C13FA1C, 0xAA64F1A8, + 0x6FC3CF26, 0xC9B4C492, 0xF85CDE0F, 0x5E2BD5BB, + 0x440B7579, 0xE27C7ECD, 0xD3946450, 0x75E36FE4, + 0xB044516A, 0x16335ADE, 0x27DB4043, 0x81AC4BF7, + 0x77E43B1E, 0xD19330AA, 0xE07B2A37, 0x460C2183, + 0x83AB1F0D, 0x25DC14B9, 0x14340E24, 0xB2430590, + 0x23D5E9B7, 0x85A2E203, 0xB44AF89E, 0x123DF32A, + 0xD79ACDA4, 0x71EDC610, 0x4005DC8D, 0xE672D739, + 0x103AA7D0, 0xB64DAC64, 0x87A5B6F9, 0x21D2BD4D, + 0xE47583C3, 0x42028877, 0x73EA92EA, 0xD59D995E, + 0x8BB64CE5, 0x2DC14751, 0x1C295DCC, 0xBA5E5678, + 0x7FF968F6, 0xD98E6342, 0xE86679DF, 0x4E11726B, + 0xB8590282, 0x1E2E0936, 0x2FC613AB, 0x89B1181F, + 0x4C162691, 0xEA612D25, 0xDB8937B8, 0x7DFE3C0C, + 0xEC68D02B, 0x4A1FDB9F, 0x7BF7C102, 0xDD80CAB6, + 0x1827F438, 0xBE50FF8C, 0x8FB8E511, 0x29CFEEA5, + 0xDF879E4C, 0x79F095F8, 0x48188F65, 0xEE6F84D1, + 0x2BC8BA5F, 0x8DBFB1EB, 0xBC57AB76, 0x1A20A0C2, + 0x8816EAF2, 0x2E61E146, 0x1F89FBDB, 0xB9FEF06F, + 0x7C59CEE1, 0xDA2EC555, 0xEBC6DFC8, 0x4DB1D47C, + 0xBBF9A495, 0x1D8EAF21, 0x2C66B5BC, 0x8A11BE08, + 0x4FB68086, 0xE9C18B32, 0xD82991AF, 0x7E5E9A1B, + 0xEFC8763C, 0x49BF7D88, 0x78576715, 0xDE206CA1, + 0x1B87522F, 0xBDF0599B, 0x8C184306, 0x2A6F48B2, + 0xDC27385B, 0x7A5033EF, 0x4BB82972, 0xEDCF22C6, + 0x28681C48, 0x8E1F17FC, 0xBFF70D61, 0x198006D5, + 0x47ABD36E, 0xE1DCD8DA, 0xD034C247, 0x7643C9F3, + 0xB3E4F77D, 0x1593FCC9, 0x247BE654, 0x820CEDE0, + 0x74449D09, 0xD23396BD, 0xE3DB8C20, 0x45AC8794, + 0x800BB91A, 0x267CB2AE, 0x1794A833, 0xB1E3A387, + 0x20754FA0, 0x86024414, 0xB7EA5E89, 0x119D553D, + 0xD43A6BB3, 0x724D6007, 0x43A57A9A, 0xE5D2712E, + 0x139A01C7, 0xB5ED0A73, 0x840510EE, 0x22721B5A, + 0xE7D525D4, 0x41A22E60, 0x704A34FD, 0xD63D3F49, + 0xCC1D9F8B, 0x6A6A943F, 0x5B828EA2, 0xFDF58516, + 0x3852BB98, 0x9E25B02C, 0xAFCDAAB1, 0x09BAA105, + 0xFFF2D1EC, 0x5985DA58, 0x686DC0C5, 0xCE1ACB71, + 0x0BBDF5FF, 0xADCAFE4B, 0x9C22E4D6, 0x3A55EF62, + 0xABC30345, 0x0DB408F1, 0x3C5C126C, 0x9A2B19D8, + 0x5F8C2756, 0xF9FB2CE2, 0xC813367F, 0x6E643DCB, + 0x982C4D22, 0x3E5B4696, 0x0FB35C0B, 0xA9C457BF, + 0x6C636931, 0xCA146285, 0xFBFC7818, 0x5D8B73AC, + 0x03A0A617, 0xA5D7ADA3, 0x943FB73E, 0x3248BC8A, + 0xF7EF8204, 0x519889B0, 0x6070932D, 0xC6079899, + 0x304FE870, 0x9638E3C4, 0xA7D0F959, 0x01A7F2ED, + 0xC400CC63, 0x6277C7D7, 0x539FDD4A, 0xF5E8D6FE, + 0x647E3AD9, 0xC209316D, 0xF3E12BF0, 0x55962044, + 0x90311ECA, 0x3646157E, 0x07AE0FE3, 0xA1D90457, + 0x579174BE, 0xF1E67F0A, 0xC00E6597, 0x66796E23, + 0xA3DE50AD, 0x05A95B19, 0x34414184, 0x92364A30 + }, { + 0x00000000, 0xCCAA009E, 0x4225077D, 0x8E8F07E3, + 0x844A0EFA, 0x48E00E64, 0xC66F0987, 0x0AC50919, + 0xD3E51BB5, 0x1F4F1B2B, 0x91C01CC8, 0x5D6A1C56, + 0x57AF154F, 0x9B0515D1, 0x158A1232, 0xD92012AC, + 0x7CBB312B, 0xB01131B5, 0x3E9E3656, 0xF23436C8, + 0xF8F13FD1, 0x345B3F4F, 0xBAD438AC, 0x767E3832, + 0xAF5E2A9E, 0x63F42A00, 0xED7B2DE3, 0x21D12D7D, + 0x2B142464, 0xE7BE24FA, 0x69312319, 0xA59B2387, + 0xF9766256, 0x35DC62C8, 0xBB53652B, 0x77F965B5, + 0x7D3C6CAC, 0xB1966C32, 0x3F196BD1, 0xF3B36B4F, + 0x2A9379E3, 0xE639797D, 0x68B67E9E, 0xA41C7E00, + 0xAED97719, 0x62737787, 0xECFC7064, 0x205670FA, + 0x85CD537D, 0x496753E3, 0xC7E85400, 0x0B42549E, + 0x01875D87, 0xCD2D5D19, 0x43A25AFA, 0x8F085A64, + 0x562848C8, 0x9A824856, 0x140D4FB5, 0xD8A74F2B, + 0xD2624632, 0x1EC846AC, 0x9047414F, 0x5CED41D1, + 0x299DC2ED, 0xE537C273, 0x6BB8C590, 0xA712C50E, + 0xADD7CC17, 0x617DCC89, 0xEFF2CB6A, 0x2358CBF4, + 0xFA78D958, 0x36D2D9C6, 0xB85DDE25, 0x74F7DEBB, + 0x7E32D7A2, 0xB298D73C, 0x3C17D0DF, 0xF0BDD041, + 0x5526F3C6, 0x998CF358, 0x1703F4BB, 0xDBA9F425, + 0xD16CFD3C, 0x1DC6FDA2, 0x9349FA41, 0x5FE3FADF, + 0x86C3E873, 0x4A69E8ED, 0xC4E6EF0E, 0x084CEF90, + 0x0289E689, 0xCE23E617, 0x40ACE1F4, 0x8C06E16A, + 0xD0EBA0BB, 0x1C41A025, 0x92CEA7C6, 0x5E64A758, + 0x54A1AE41, 0x980BAEDF, 0x1684A93C, 0xDA2EA9A2, + 0x030EBB0E, 0xCFA4BB90, 0x412BBC73, 0x8D81BCED, + 0x8744B5F4, 0x4BEEB56A, 0xC561B289, 0x09CBB217, + 0xAC509190, 0x60FA910E, 0xEE7596ED, 0x22DF9673, + 0x281A9F6A, 0xE4B09FF4, 0x6A3F9817, 0xA6959889, + 0x7FB58A25, 0xB31F8ABB, 0x3D908D58, 0xF13A8DC6, + 0xFBFF84DF, 0x37558441, 0xB9DA83A2, 0x7570833C, + 0x533B85DA, 0x9F918544, 0x111E82A7, 0xDDB48239, + 0xD7718B20, 0x1BDB8BBE, 0x95548C5D, 0x59FE8CC3, + 0x80DE9E6F, 0x4C749EF1, 0xC2FB9912, 0x0E51998C, + 0x04949095, 0xC83E900B, 0x46B197E8, 0x8A1B9776, + 0x2F80B4F1, 0xE32AB46F, 0x6DA5B38C, 0xA10FB312, + 0xABCABA0B, 0x6760BA95, 0xE9EFBD76, 0x2545BDE8, + 0xFC65AF44, 0x30CFAFDA, 0xBE40A839, 0x72EAA8A7, + 0x782FA1BE, 0xB485A120, 0x3A0AA6C3, 0xF6A0A65D, + 0xAA4DE78C, 0x66E7E712, 0xE868E0F1, 0x24C2E06F, + 0x2E07E976, 0xE2ADE9E8, 0x6C22EE0B, 0xA088EE95, + 0x79A8FC39, 0xB502FCA7, 0x3B8DFB44, 0xF727FBDA, + 0xFDE2F2C3, 0x3148F25D, 0xBFC7F5BE, 0x736DF520, + 0xD6F6D6A7, 0x1A5CD639, 0x94D3D1DA, 0x5879D144, + 0x52BCD85D, 0x9E16D8C3, 0x1099DF20, 0xDC33DFBE, + 0x0513CD12, 0xC9B9CD8C, 0x4736CA6F, 0x8B9CCAF1, + 0x8159C3E8, 0x4DF3C376, 0xC37CC495, 0x0FD6C40B, + 0x7AA64737, 0xB60C47A9, 0x3883404A, 0xF42940D4, + 0xFEEC49CD, 0x32464953, 0xBCC94EB0, 0x70634E2E, + 0xA9435C82, 0x65E95C1C, 0xEB665BFF, 0x27CC5B61, + 0x2D095278, 0xE1A352E6, 0x6F2C5505, 0xA386559B, + 0x061D761C, 0xCAB77682, 0x44387161, 0x889271FF, + 0x825778E6, 0x4EFD7878, 0xC0727F9B, 0x0CD87F05, + 0xD5F86DA9, 0x19526D37, 0x97DD6AD4, 0x5B776A4A, + 0x51B26353, 0x9D1863CD, 0x1397642E, 0xDF3D64B0, + 0x83D02561, 0x4F7A25FF, 0xC1F5221C, 0x0D5F2282, + 0x079A2B9B, 0xCB302B05, 0x45BF2CE6, 0x89152C78, + 0x50353ED4, 0x9C9F3E4A, 0x121039A9, 0xDEBA3937, + 0xD47F302E, 0x18D530B0, 0x965A3753, 0x5AF037CD, + 0xFF6B144A, 0x33C114D4, 0xBD4E1337, 0x71E413A9, + 0x7B211AB0, 0xB78B1A2E, 0x39041DCD, 0xF5AE1D53, + 0x2C8E0FFF, 0xE0240F61, 0x6EAB0882, 0xA201081C, + 0xA8C40105, 0x646E019B, 0xEAE10678, 0x264B06E6 + } +}; diff --git a/src/native/external/xz/src/liblzma/check/crc32_tablegen.c b/src/native/external/xz/src/liblzma/check/crc32_tablegen.c new file mode 100644 index 00000000000000..b8cf459f8e7669 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc32_tablegen.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc32_tablegen.c +/// \brief Generate crc32_table_le.h and crc32_table_be.h +/// +/// Compiling: gcc -std=c99 -o crc32_tablegen crc32_tablegen.c +/// Add -DWORDS_BIGENDIAN to generate big endian table. +/// Add -DLZ_HASH_TABLE to generate lz_encoder_hash_table.h (little endian). +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include "../../common/tuklib_integer.h" + + +static uint32_t crc32_table[8][256]; + + +static void +init_crc32_table(void) +{ + static const uint32_t poly32 = UINT32_C(0xEDB88320); + + for (size_t s = 0; s < 8; ++s) { + for (size_t b = 0; b < 256; ++b) { + uint32_t r = s == 0 ? b : crc32_table[s - 1][b]; + + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly32; + else + r >>= 1; + } + + crc32_table[s][b] = r; + } + } + +#ifdef WORDS_BIGENDIAN + for (size_t s = 0; s < 8; ++s) + for (size_t b = 0; b < 256; ++b) + crc32_table[s][b] = byteswap32(crc32_table[s][b]); +#endif + + return; +} + + +static void +print_crc32_table(void) +{ + // Split the SPDX string so that it won't accidentally match + // when tools search for the string. + printf("// SPDX" "-License-Identifier" ": 0BSD\n\n" + "// This file has been generated by crc32_tablegen.c.\n\n" + "const uint32_t lzma_crc32_table[8][256] = {\n\t{"); + + for (size_t s = 0; s < 8; ++s) { + for (size_t b = 0; b < 256; ++b) { + if ((b % 4) == 0) + printf("\n\t\t"); + + printf("0x%08" PRIX32, crc32_table[s][b]); + + if (b != 255) + printf(",%s", (b+1) % 4 == 0 ? "" : " "); + } + + if (s == 7) + printf("\n\t}\n};\n"); + else + printf("\n\t}, {"); + } + + return; +} + + +static void +print_lz_table(void) +{ + // Split the SPDX string so that it won't accidentally match + // when tools search for the string. + printf("// SPDX" "-License-Identifier" ": 0BSD\n\n" + "// This file has been generated by crc32_tablegen.c.\n\n" + "const uint32_t lzma_lz_hash_table[256] = {"); + + for (size_t b = 0; b < 256; ++b) { + if ((b % 4) == 0) + printf("\n\t"); + + printf("0x%08" PRIX32, crc32_table[0][b]); + + if (b != 255) + printf(",%s", (b+1) % 4 == 0 ? "" : " "); + } + + printf("\n};\n"); + + return; +} + + +int +main(void) +{ + init_crc32_table(); + +#ifdef LZ_HASH_TABLE + print_lz_table(); +#else + print_crc32_table(); +#endif + + return 0; +} diff --git a/src/native/external/xz/src/liblzma/check/crc32_x86.S b/src/native/external/xz/src/liblzma/check/crc32_x86.S new file mode 100644 index 00000000000000..37ee063d10682a --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc32_x86.S @@ -0,0 +1,308 @@ +/* SPDX-License-Identifier: 0BSD */ + +/* + * Speed-optimized CRC32 using slicing-by-eight algorithm + * + * This uses only i386 instructions, but it is optimized for i686 and later + * (including e.g. Pentium II/III/IV, Athlon XP, and Core 2). For i586 + * (e.g. Pentium), slicing-by-four would be better, and even the C version + * of slicing-by-eight built with gcc -march=i586 tends to be a little bit + * better than this. Very few probably run this code on i586 or older x86 + * so this shouldn't be a problem in practice. + * + * Authors: Igor Pavlov (original version) + * Lasse Collin (AT&T syntax, PIC support, better portability) + * + * This code needs lzma_crc32_table, which can be created using the + * following C code: + +uint32_t lzma_crc32_table[8][256]; + +void +init_table(void) +{ + // IEEE-802.3 + static const uint32_t poly32 = UINT32_C(0xEDB88320); + + // Castagnoli + // static const uint32_t poly32 = UINT32_C(0x82F63B78); + + // Koopman + // static const uint32_t poly32 = UINT32_C(0xEB31D82E); + + for (size_t s = 0; s < 8; ++s) { + for (size_t b = 0; b < 256; ++b) { + uint32_t r = s == 0 ? b : lzma_crc32_table[s - 1][b]; + + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly32; + else + r >>= 1; + } + + lzma_crc32_table[s][b] = r; + } + } +} + + * The prototype of the CRC32 function: + * extern uint32_t lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc); + */ + +/* When Intel CET is enabled, include in assembly code to mark + Intel CET support. */ +#ifdef __CET__ +# include +#else +# define _CET_ENDBR +#endif + +/* + * On some systems, the functions need to be prefixed. The prefix is + * usually an underscore. + */ +#ifndef __USER_LABEL_PREFIX__ +# define __USER_LABEL_PREFIX__ +#endif +#define MAKE_SYM_CAT(prefix, sym) prefix ## sym +#define MAKE_SYM(prefix, sym) MAKE_SYM_CAT(prefix, sym) +#define LZMA_CRC32 MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc32_generic) +#define LZMA_CRC32_TABLE MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc32_table) + +/* + * Solaris assembler doesn't have .p2align, and Darwin uses .align + * differently than GNU/Linux and Solaris. + */ +#if defined(__APPLE__) || defined(__MSDOS__) +# define ALIGN(pow2, abs) .align pow2 +#else +# define ALIGN(pow2, abs) .align abs +#endif + + .text + .globl LZMA_CRC32 +#ifdef __ELF__ + .hidden LZMA_CRC32 +#endif + +#if !defined(__APPLE__) && !defined(_WIN32) && !defined(__CYGWIN__) \ + && !defined(__MSDOS__) + .type LZMA_CRC32, @function +#endif + + ALIGN(4, 16) +LZMA_CRC32: + _CET_ENDBR + /* + * Register usage: + * %eax crc + * %esi buf + * %edi size or buf + size + * %ebx lzma_crc32_table + * %ebp Table index + * %ecx Temporary + * %edx Temporary + */ + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + movl 0x14(%esp), %esi /* buf */ + movl 0x18(%esp), %edi /* size */ + movl 0x1C(%esp), %eax /* crc */ + + /* + * Store the address of lzma_crc32_table to %ebx. This is needed to + * get position-independent code (PIC). + * + * The PIC macro is defined by libtool, while __PIC__ is defined + * by GCC but only on some systems. Testing for both makes it simpler + * to test this code without libtool, and keeps the code working also + * when built with libtool but using something else than GCC. + * + * I understood that libtool may define PIC on Windows even though + * the code in Windows DLLs is not PIC in sense that it is in ELF + * binaries, so we need a separate check to always use the non-PIC + * code on Windows. + */ +#if (!defined(PIC) && !defined(__PIC__)) \ + || (defined(_WIN32) || defined(__CYGWIN__)) + /* Not PIC */ + movl $ LZMA_CRC32_TABLE, %ebx +#elif defined(__APPLE__) + /* Mach-O */ + call .L_get_pc +.L_pic: + leal .L_lzma_crc32_table$non_lazy_ptr-.L_pic(%ebx), %ebx + movl (%ebx), %ebx +#else + /* ELF */ + call .L_get_pc + addl $_GLOBAL_OFFSET_TABLE_, %ebx + movl LZMA_CRC32_TABLE@GOT(%ebx), %ebx +#endif + + /* Complement the initial value. */ + notl %eax + + ALIGN(4, 16) +.L_align: + /* + * Check if there is enough input to use slicing-by-eight. + * We need 16 bytes, because the loop pre-reads eight bytes. + */ + cmpl $16, %edi + jb .L_rest + + /* Check if we have reached alignment of eight bytes. */ + testl $7, %esi + jz .L_slice + + /* Calculate CRC of the next input byte. */ + movzbl (%esi), %ebp + incl %esi + movzbl %al, %ecx + xorl %ecx, %ebp + shrl $8, %eax + xorl (%ebx, %ebp, 4), %eax + decl %edi + jmp .L_align + + ALIGN(2, 4) +.L_slice: + /* + * If we get here, there's at least 16 bytes of aligned input + * available. Make %edi multiple of eight bytes. Store the possible + * remainder over the "size" variable in the argument stack. + */ + movl %edi, 0x18(%esp) + andl $-8, %edi + subl %edi, 0x18(%esp) + + /* + * Let %edi be buf + size - 8 while running the main loop. This way + * we can compare for equality to determine when exit the loop. + */ + addl %esi, %edi + subl $8, %edi + + /* Read in the first eight aligned bytes. */ + xorl (%esi), %eax + movl 4(%esi), %ecx + movzbl %cl, %ebp + +.L_loop: + movl 0x0C00(%ebx, %ebp, 4), %edx + movzbl %ch, %ebp + xorl 0x0800(%ebx, %ebp, 4), %edx + shrl $16, %ecx + xorl 8(%esi), %edx + movzbl %cl, %ebp + xorl 0x0400(%ebx, %ebp, 4), %edx + movzbl %ch, %ebp + xorl (%ebx, %ebp, 4), %edx + movzbl %al, %ebp + + /* + * Read the next four bytes, for which the CRC is calculated + * on the next iteration of the loop. + */ + movl 12(%esi), %ecx + + xorl 0x1C00(%ebx, %ebp, 4), %edx + movzbl %ah, %ebp + shrl $16, %eax + xorl 0x1800(%ebx, %ebp, 4), %edx + movzbl %ah, %ebp + movzbl %al, %eax + movl 0x1400(%ebx, %eax, 4), %eax + addl $8, %esi + xorl %edx, %eax + xorl 0x1000(%ebx, %ebp, 4), %eax + + /* Check for end of aligned input. */ + cmpl %edi, %esi + movzbl %cl, %ebp + jne .L_loop + + /* + * Process the remaining eight bytes, which we have already + * copied to %ecx and %edx. + */ + movl 0x0C00(%ebx, %ebp, 4), %edx + movzbl %ch, %ebp + xorl 0x0800(%ebx, %ebp, 4), %edx + shrl $16, %ecx + movzbl %cl, %ebp + xorl 0x0400(%ebx, %ebp, 4), %edx + movzbl %ch, %ebp + xorl (%ebx, %ebp, 4), %edx + movzbl %al, %ebp + + xorl 0x1C00(%ebx, %ebp, 4), %edx + movzbl %ah, %ebp + shrl $16, %eax + xorl 0x1800(%ebx, %ebp, 4), %edx + movzbl %ah, %ebp + movzbl %al, %eax + movl 0x1400(%ebx, %eax, 4), %eax + addl $8, %esi + xorl %edx, %eax + xorl 0x1000(%ebx, %ebp, 4), %eax + + /* Copy the number of remaining bytes to %edi. */ + movl 0x18(%esp), %edi + +.L_rest: + /* Check for end of input. */ + testl %edi, %edi + jz .L_return + + /* Calculate CRC of the next input byte. */ + movzbl (%esi), %ebp + incl %esi + movzbl %al, %ecx + xorl %ecx, %ebp + shrl $8, %eax + xorl (%ebx, %ebp, 4), %eax + decl %edi + jmp .L_rest + +.L_return: + /* Complement the final value. */ + notl %eax + + popl %ebp + popl %edi + popl %esi + popl %ebx + ret + +#if defined(PIC) || defined(__PIC__) + ALIGN(4, 16) +.L_get_pc: + movl (%esp), %ebx + ret +#endif + +#if defined(__APPLE__) && (defined(PIC) || defined(__PIC__)) + /* Mach-O PIC */ + .section __IMPORT,__pointers,non_lazy_symbol_pointers +.L_lzma_crc32_table$non_lazy_ptr: + .indirect_symbol LZMA_CRC32_TABLE + .long 0 + +#elif !defined(_WIN32) && !defined(__CYGWIN__) && !defined(__MSDOS__) + /* ELF */ + .size LZMA_CRC32, .-LZMA_CRC32 +#endif + +/* + * This is needed to support non-executable stack. It's ugly to + * use __FreeBSD__ and __linux__ here, but I don't know a way to detect when + * we are using GNU assembler. + */ +#if defined(__ELF__) && (defined(__FreeBSD__) || defined(__linux__)) + .section .note.GNU-stack,"",@progbits +#endif diff --git a/src/native/external/xz/src/liblzma/check/crc64_fast.c b/src/native/external/xz/src/liblzma/check/crc64_fast.c new file mode 100644 index 00000000000000..792d0f9488bd26 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc64_fast.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc64_fast.c +/// \brief CRC64 calculation +// +// Authors: Lasse Collin +// Ilya Kurdyukov +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" +#include "crc_common.h" + +#if defined(CRC_X86_CLMUL) +# define BUILDING_CRC_CLMUL 64 +# include "crc_x86_clmul.h" +#endif + + +#ifdef CRC64_GENERIC + +///////////////////////////////// +// Generic slice-by-four CRC64 // +///////////////////////////////// + +#if defined(WORDS_BIGENDIAN) +# include "crc64_table_be.h" +#else +# include "crc64_table_le.h" +#endif + + +#ifdef HAVE_CRC_X86_ASM +extern uint64_t lzma_crc64_generic( + const uint8_t *buf, size_t size, uint64_t crc); +#else + +#ifdef WORDS_BIGENDIAN +# define A1(x) ((x) >> 56) +#else +# define A1 A +#endif + + +// See the comments in crc32_fast.c. They aren't duplicated here. +static uint64_t +lzma_crc64_generic(const uint8_t *buf, size_t size, uint64_t crc) +{ + crc = ~crc; + +#ifdef WORDS_BIGENDIAN + crc = byteswap64(crc); +#endif + + if (size > 4) { + while ((uintptr_t)(buf) & 3) { + crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc); + --size; + } + + const uint8_t *const limit = buf + (size & ~(size_t)(3)); + size &= (size_t)(3); + + while (buf < limit) { +#ifdef WORDS_BIGENDIAN + const uint32_t tmp = (uint32_t)(crc >> 32) + ^ aligned_read32ne(buf); +#else + const uint32_t tmp = (uint32_t)crc + ^ aligned_read32ne(buf); +#endif + buf += 4; + + crc = lzma_crc64_table[3][A(tmp)] + ^ lzma_crc64_table[2][B(tmp)] + ^ S32(crc) + ^ lzma_crc64_table[1][C(tmp)] + ^ lzma_crc64_table[0][D(tmp)]; + } + } + + while (size-- != 0) + crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc); + +#ifdef WORDS_BIGENDIAN + crc = byteswap64(crc); +#endif + + return ~crc; +} +#endif // HAVE_CRC_X86_ASM +#endif // CRC64_GENERIC + + +#if defined(CRC64_GENERIC) && defined(CRC64_ARCH_OPTIMIZED) + +////////////////////////// +// Function dispatching // +////////////////////////// + +// If both the generic and arch-optimized implementations are usable, then +// the function that is used is selected at runtime. See crc32_fast.c. + +typedef uint64_t (*crc64_func_type)( + const uint8_t *buf, size_t size, uint64_t crc); + +static crc64_func_type +crc64_resolve(void) +{ + return is_arch_extension_supported() + ? &crc64_arch_optimized : &lzma_crc64_generic; +} + +#ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR +# define CRC64_SET_FUNC_ATTR __attribute__((__constructor__)) +static crc64_func_type crc64_func; +#else +# define CRC64_SET_FUNC_ATTR +static uint64_t crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc); +static crc64_func_type crc64_func = &crc64_dispatch; +#endif + + +CRC64_SET_FUNC_ATTR +static void +crc64_set_func(void) +{ + crc64_func = crc64_resolve(); + return; +} + + +#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR +static uint64_t +crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc) +{ + crc64_set_func(); + return crc64_func(buf, size, crc); +} +#endif +#endif + + +extern LZMA_API(uint64_t) +lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc) +{ +#if defined(CRC64_GENERIC) && defined(CRC64_ARCH_OPTIMIZED) + return crc64_func(buf, size, crc); + +#elif defined(CRC64_ARCH_OPTIMIZED) + // If arch-optimized version is used unconditionally without runtime + // CPU detection then omitting the generic version and its 8 KiB + // lookup table makes the library smaller. + return crc64_arch_optimized(buf, size, crc); + +#else + return lzma_crc64_generic(buf, size, crc); +#endif +} diff --git a/src/native/external/xz/src/liblzma/check/crc64_small.c b/src/native/external/xz/src/liblzma/check/crc64_small.c new file mode 100644 index 00000000000000..ee4ea26f67d0df --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc64_small.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc64_small.c +/// \brief CRC64 calculation (size-optimized) +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" + + +static uint64_t crc64_table[256]; + + +#ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR +__attribute__((__constructor__)) +#endif +static void +crc64_init(void) +{ + static const uint64_t poly64 = UINT64_C(0xC96C5795D7870F42); + + for (size_t b = 0; b < 256; ++b) { + uint64_t r = b; + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly64; + else + r >>= 1; + } + + crc64_table[b] = r; + } + + return; +} + + +extern LZMA_API(uint64_t) +lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc) +{ +#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR + mythread_once(crc64_init); +#endif + + crc = ~crc; + + while (size != 0) { + crc = crc64_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8); + --size; + } + + return ~crc; +} diff --git a/src/native/external/xz/src/liblzma/check/crc64_table_be.h b/src/native/external/xz/src/liblzma/check/crc64_table_be.h new file mode 100644 index 00000000000000..db76cc70e07c74 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc64_table_be.h @@ -0,0 +1,523 @@ +// SPDX-License-Identifier: 0BSD + +// This file has been generated by crc64_tablegen.c. + +const uint64_t lzma_crc64_table[4][256] = { + { + UINT64_C(0x0000000000000000), UINT64_C(0x6F5FA703BE4C2EB3), + UINT64_C(0x5BA040A8573684F4), UINT64_C(0x34FFE7ABE97AAA47), + UINT64_C(0x335E8FFF84C3D07B), UINT64_C(0x5C0128FC3A8FFEC8), + UINT64_C(0x68FECF57D3F5548F), UINT64_C(0x07A168546DB97A3C), + UINT64_C(0x66BC1EFF0987A1F7), UINT64_C(0x09E3B9FCB7CB8F44), + UINT64_C(0x3D1C5E575EB12503), UINT64_C(0x5243F954E0FD0BB0), + UINT64_C(0x55E291008D44718C), UINT64_C(0x3ABD360333085F3F), + UINT64_C(0x0E42D1A8DA72F578), UINT64_C(0x611D76AB643EDBCB), + UINT64_C(0x4966335138A19B7D), UINT64_C(0x2639945286EDB5CE), + UINT64_C(0x12C673F96F971F89), UINT64_C(0x7D99D4FAD1DB313A), + UINT64_C(0x7A38BCAEBC624B06), UINT64_C(0x15671BAD022E65B5), + UINT64_C(0x2198FC06EB54CFF2), UINT64_C(0x4EC75B055518E141), + UINT64_C(0x2FDA2DAE31263A8A), UINT64_C(0x40858AAD8F6A1439), + UINT64_C(0x747A6D066610BE7E), UINT64_C(0x1B25CA05D85C90CD), + UINT64_C(0x1C84A251B5E5EAF1), UINT64_C(0x73DB05520BA9C442), + UINT64_C(0x4724E2F9E2D36E05), UINT64_C(0x287B45FA5C9F40B6), + UINT64_C(0x92CC66A2704237FB), UINT64_C(0xFD93C1A1CE0E1948), + UINT64_C(0xC96C260A2774B30F), UINT64_C(0xA633810999389DBC), + UINT64_C(0xA192E95DF481E780), UINT64_C(0xCECD4E5E4ACDC933), + UINT64_C(0xFA32A9F5A3B76374), UINT64_C(0x956D0EF61DFB4DC7), + UINT64_C(0xF470785D79C5960C), UINT64_C(0x9B2FDF5EC789B8BF), + UINT64_C(0xAFD038F52EF312F8), UINT64_C(0xC08F9FF690BF3C4B), + UINT64_C(0xC72EF7A2FD064677), UINT64_C(0xA87150A1434A68C4), + UINT64_C(0x9C8EB70AAA30C283), UINT64_C(0xF3D11009147CEC30), + UINT64_C(0xDBAA55F348E3AC86), UINT64_C(0xB4F5F2F0F6AF8235), + UINT64_C(0x800A155B1FD52872), UINT64_C(0xEF55B258A19906C1), + UINT64_C(0xE8F4DA0CCC207CFD), UINT64_C(0x87AB7D0F726C524E), + UINT64_C(0xB3549AA49B16F809), UINT64_C(0xDC0B3DA7255AD6BA), + UINT64_C(0xBD164B0C41640D71), UINT64_C(0xD249EC0FFF2823C2), + UINT64_C(0xE6B60BA416528985), UINT64_C(0x89E9ACA7A81EA736), + UINT64_C(0x8E48C4F3C5A7DD0A), UINT64_C(0xE11763F07BEBF3B9), + UINT64_C(0xD5E8845B929159FE), UINT64_C(0xBAB723582CDD774D), + UINT64_C(0xA187C3EBCA2BB664), UINT64_C(0xCED864E8746798D7), + UINT64_C(0xFA2783439D1D3290), UINT64_C(0x9578244023511C23), + UINT64_C(0x92D94C144EE8661F), UINT64_C(0xFD86EB17F0A448AC), + UINT64_C(0xC9790CBC19DEE2EB), UINT64_C(0xA626ABBFA792CC58), + UINT64_C(0xC73BDD14C3AC1793), UINT64_C(0xA8647A177DE03920), + UINT64_C(0x9C9B9DBC949A9367), UINT64_C(0xF3C43ABF2AD6BDD4), + UINT64_C(0xF46552EB476FC7E8), UINT64_C(0x9B3AF5E8F923E95B), + UINT64_C(0xAFC512431059431C), UINT64_C(0xC09AB540AE156DAF), + UINT64_C(0xE8E1F0BAF28A2D19), UINT64_C(0x87BE57B94CC603AA), + UINT64_C(0xB341B012A5BCA9ED), UINT64_C(0xDC1E17111BF0875E), + UINT64_C(0xDBBF7F457649FD62), UINT64_C(0xB4E0D846C805D3D1), + UINT64_C(0x801F3FED217F7996), UINT64_C(0xEF4098EE9F335725), + UINT64_C(0x8E5DEE45FB0D8CEE), UINT64_C(0xE10249464541A25D), + UINT64_C(0xD5FDAEEDAC3B081A), UINT64_C(0xBAA209EE127726A9), + UINT64_C(0xBD0361BA7FCE5C95), UINT64_C(0xD25CC6B9C1827226), + UINT64_C(0xE6A3211228F8D861), UINT64_C(0x89FC861196B4F6D2), + UINT64_C(0x334BA549BA69819F), UINT64_C(0x5C14024A0425AF2C), + UINT64_C(0x68EBE5E1ED5F056B), UINT64_C(0x07B442E253132BD8), + UINT64_C(0x00152AB63EAA51E4), UINT64_C(0x6F4A8DB580E67F57), + UINT64_C(0x5BB56A1E699CD510), UINT64_C(0x34EACD1DD7D0FBA3), + UINT64_C(0x55F7BBB6B3EE2068), UINT64_C(0x3AA81CB50DA20EDB), + UINT64_C(0x0E57FB1EE4D8A49C), UINT64_C(0x61085C1D5A948A2F), + UINT64_C(0x66A93449372DF013), UINT64_C(0x09F6934A8961DEA0), + UINT64_C(0x3D0974E1601B74E7), UINT64_C(0x5256D3E2DE575A54), + UINT64_C(0x7A2D961882C81AE2), UINT64_C(0x1572311B3C843451), + UINT64_C(0x218DD6B0D5FE9E16), UINT64_C(0x4ED271B36BB2B0A5), + UINT64_C(0x497319E7060BCA99), UINT64_C(0x262CBEE4B847E42A), + UINT64_C(0x12D3594F513D4E6D), UINT64_C(0x7D8CFE4CEF7160DE), + UINT64_C(0x1C9188E78B4FBB15), UINT64_C(0x73CE2FE4350395A6), + UINT64_C(0x4731C84FDC793FE1), UINT64_C(0x286E6F4C62351152), + UINT64_C(0x2FCF07180F8C6B6E), UINT64_C(0x4090A01BB1C045DD), + UINT64_C(0x746F47B058BAEF9A), UINT64_C(0x1B30E0B3E6F6C129), + UINT64_C(0x420F87D795576CC9), UINT64_C(0x2D5020D42B1B427A), + UINT64_C(0x19AFC77FC261E83D), UINT64_C(0x76F0607C7C2DC68E), + UINT64_C(0x715108281194BCB2), UINT64_C(0x1E0EAF2BAFD89201), + UINT64_C(0x2AF1488046A23846), UINT64_C(0x45AEEF83F8EE16F5), + UINT64_C(0x24B399289CD0CD3E), UINT64_C(0x4BEC3E2B229CE38D), + UINT64_C(0x7F13D980CBE649CA), UINT64_C(0x104C7E8375AA6779), + UINT64_C(0x17ED16D718131D45), UINT64_C(0x78B2B1D4A65F33F6), + UINT64_C(0x4C4D567F4F2599B1), UINT64_C(0x2312F17CF169B702), + UINT64_C(0x0B69B486ADF6F7B4), UINT64_C(0x6436138513BAD907), + UINT64_C(0x50C9F42EFAC07340), UINT64_C(0x3F96532D448C5DF3), + UINT64_C(0x38373B79293527CF), UINT64_C(0x57689C7A9779097C), + UINT64_C(0x63977BD17E03A33B), UINT64_C(0x0CC8DCD2C04F8D88), + UINT64_C(0x6DD5AA79A4715643), UINT64_C(0x028A0D7A1A3D78F0), + UINT64_C(0x3675EAD1F347D2B7), UINT64_C(0x592A4DD24D0BFC04), + UINT64_C(0x5E8B258620B28638), UINT64_C(0x31D482859EFEA88B), + UINT64_C(0x052B652E778402CC), UINT64_C(0x6A74C22DC9C82C7F), + UINT64_C(0xD0C3E175E5155B32), UINT64_C(0xBF9C46765B597581), + UINT64_C(0x8B63A1DDB223DFC6), UINT64_C(0xE43C06DE0C6FF175), + UINT64_C(0xE39D6E8A61D68B49), UINT64_C(0x8CC2C989DF9AA5FA), + UINT64_C(0xB83D2E2236E00FBD), UINT64_C(0xD762892188AC210E), + UINT64_C(0xB67FFF8AEC92FAC5), UINT64_C(0xD920588952DED476), + UINT64_C(0xEDDFBF22BBA47E31), UINT64_C(0x8280182105E85082), + UINT64_C(0x8521707568512ABE), UINT64_C(0xEA7ED776D61D040D), + UINT64_C(0xDE8130DD3F67AE4A), UINT64_C(0xB1DE97DE812B80F9), + UINT64_C(0x99A5D224DDB4C04F), UINT64_C(0xF6FA752763F8EEFC), + UINT64_C(0xC205928C8A8244BB), UINT64_C(0xAD5A358F34CE6A08), + UINT64_C(0xAAFB5DDB59771034), UINT64_C(0xC5A4FAD8E73B3E87), + UINT64_C(0xF15B1D730E4194C0), UINT64_C(0x9E04BA70B00DBA73), + UINT64_C(0xFF19CCDBD43361B8), UINT64_C(0x90466BD86A7F4F0B), + UINT64_C(0xA4B98C738305E54C), UINT64_C(0xCBE62B703D49CBFF), + UINT64_C(0xCC47432450F0B1C3), UINT64_C(0xA318E427EEBC9F70), + UINT64_C(0x97E7038C07C63537), UINT64_C(0xF8B8A48FB98A1B84), + UINT64_C(0xE388443C5F7CDAAD), UINT64_C(0x8CD7E33FE130F41E), + UINT64_C(0xB8280494084A5E59), UINT64_C(0xD777A397B60670EA), + UINT64_C(0xD0D6CBC3DBBF0AD6), UINT64_C(0xBF896CC065F32465), + UINT64_C(0x8B768B6B8C898E22), UINT64_C(0xE4292C6832C5A091), + UINT64_C(0x85345AC356FB7B5A), UINT64_C(0xEA6BFDC0E8B755E9), + UINT64_C(0xDE941A6B01CDFFAE), UINT64_C(0xB1CBBD68BF81D11D), + UINT64_C(0xB66AD53CD238AB21), UINT64_C(0xD935723F6C748592), + UINT64_C(0xEDCA9594850E2FD5), UINT64_C(0x829532973B420166), + UINT64_C(0xAAEE776D67DD41D0), UINT64_C(0xC5B1D06ED9916F63), + UINT64_C(0xF14E37C530EBC524), UINT64_C(0x9E1190C68EA7EB97), + UINT64_C(0x99B0F892E31E91AB), UINT64_C(0xF6EF5F915D52BF18), + UINT64_C(0xC210B83AB428155F), UINT64_C(0xAD4F1F390A643BEC), + UINT64_C(0xCC5269926E5AE027), UINT64_C(0xA30DCE91D016CE94), + UINT64_C(0x97F2293A396C64D3), UINT64_C(0xF8AD8E3987204A60), + UINT64_C(0xFF0CE66DEA99305C), UINT64_C(0x9053416E54D51EEF), + UINT64_C(0xA4ACA6C5BDAFB4A8), UINT64_C(0xCBF301C603E39A1B), + UINT64_C(0x7144229E2F3EED56), UINT64_C(0x1E1B859D9172C3E5), + UINT64_C(0x2AE46236780869A2), UINT64_C(0x45BBC535C6444711), + UINT64_C(0x421AAD61ABFD3D2D), UINT64_C(0x2D450A6215B1139E), + UINT64_C(0x19BAEDC9FCCBB9D9), UINT64_C(0x76E54ACA4287976A), + UINT64_C(0x17F83C6126B94CA1), UINT64_C(0x78A79B6298F56212), + UINT64_C(0x4C587CC9718FC855), UINT64_C(0x2307DBCACFC3E6E6), + UINT64_C(0x24A6B39EA27A9CDA), UINT64_C(0x4BF9149D1C36B269), + UINT64_C(0x7F06F336F54C182E), UINT64_C(0x105954354B00369D), + UINT64_C(0x382211CF179F762B), UINT64_C(0x577DB6CCA9D35898), + UINT64_C(0x6382516740A9F2DF), UINT64_C(0x0CDDF664FEE5DC6C), + UINT64_C(0x0B7C9E30935CA650), UINT64_C(0x642339332D1088E3), + UINT64_C(0x50DCDE98C46A22A4), UINT64_C(0x3F83799B7A260C17), + UINT64_C(0x5E9E0F301E18D7DC), UINT64_C(0x31C1A833A054F96F), + UINT64_C(0x053E4F98492E5328), UINT64_C(0x6A61E89BF7627D9B), + UINT64_C(0x6DC080CF9ADB07A7), UINT64_C(0x029F27CC24972914), + UINT64_C(0x3660C067CDED8353), UINT64_C(0x593F676473A1ADE0) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0x0DF1D05C9279E954), + UINT64_C(0x1AE2A1B924F3D2A9), UINT64_C(0x171371E5B68A3BFD), + UINT64_C(0xB1DA4DDC62497DC1), UINT64_C(0xBC2B9D80F0309495), + UINT64_C(0xAB38EC6546BAAF68), UINT64_C(0xA6C93C39D4C3463C), + UINT64_C(0xE7AB9517EE3D2210), UINT64_C(0xEA5A454B7C44CB44), + UINT64_C(0xFD4934AECACEF0B9), UINT64_C(0xF0B8E4F258B719ED), + UINT64_C(0x5671D8CB8C745FD1), UINT64_C(0x5B8008971E0DB685), + UINT64_C(0x4C937972A8878D78), UINT64_C(0x4162A92E3AFE642C), + UINT64_C(0xCE572B2FDC7B4420), UINT64_C(0xC3A6FB734E02AD74), + UINT64_C(0xD4B58A96F8889689), UINT64_C(0xD9445ACA6AF17FDD), + UINT64_C(0x7F8D66F3BE3239E1), UINT64_C(0x727CB6AF2C4BD0B5), + UINT64_C(0x656FC74A9AC1EB48), UINT64_C(0x689E171608B8021C), + UINT64_C(0x29FCBE3832466630), UINT64_C(0x240D6E64A03F8F64), + UINT64_C(0x331E1F8116B5B499), UINT64_C(0x3EEFCFDD84CC5DCD), + UINT64_C(0x9826F3E4500F1BF1), UINT64_C(0x95D723B8C276F2A5), + UINT64_C(0x82C4525D74FCC958), UINT64_C(0x8F358201E685200C), + UINT64_C(0x9CAF565EB8F78840), UINT64_C(0x915E86022A8E6114), + UINT64_C(0x864DF7E79C045AE9), UINT64_C(0x8BBC27BB0E7DB3BD), + UINT64_C(0x2D751B82DABEF581), UINT64_C(0x2084CBDE48C71CD5), + UINT64_C(0x3797BA3BFE4D2728), UINT64_C(0x3A666A676C34CE7C), + UINT64_C(0x7B04C34956CAAA50), UINT64_C(0x76F51315C4B34304), + UINT64_C(0x61E662F0723978F9), UINT64_C(0x6C17B2ACE04091AD), + UINT64_C(0xCADE8E953483D791), UINT64_C(0xC72F5EC9A6FA3EC5), + UINT64_C(0xD03C2F2C10700538), UINT64_C(0xDDCDFF708209EC6C), + UINT64_C(0x52F87D71648CCC60), UINT64_C(0x5F09AD2DF6F52534), + UINT64_C(0x481ADCC8407F1EC9), UINT64_C(0x45EB0C94D206F79D), + UINT64_C(0xE32230AD06C5B1A1), UINT64_C(0xEED3E0F194BC58F5), + UINT64_C(0xF9C0911422366308), UINT64_C(0xF4314148B04F8A5C), + UINT64_C(0xB553E8668AB1EE70), UINT64_C(0xB8A2383A18C80724), + UINT64_C(0xAFB149DFAE423CD9), UINT64_C(0xA24099833C3BD58D), + UINT64_C(0x0489A5BAE8F893B1), UINT64_C(0x097875E67A817AE5), + UINT64_C(0x1E6B0403CC0B4118), UINT64_C(0x139AD45F5E72A84C), + UINT64_C(0x385FADBC70EF1181), UINT64_C(0x35AE7DE0E296F8D5), + UINT64_C(0x22BD0C05541CC328), UINT64_C(0x2F4CDC59C6652A7C), + UINT64_C(0x8985E06012A66C40), UINT64_C(0x8474303C80DF8514), + UINT64_C(0x936741D93655BEE9), UINT64_C(0x9E969185A42C57BD), + UINT64_C(0xDFF438AB9ED23391), UINT64_C(0xD205E8F70CABDAC5), + UINT64_C(0xC5169912BA21E138), UINT64_C(0xC8E7494E2858086C), + UINT64_C(0x6E2E7577FC9B4E50), UINT64_C(0x63DFA52B6EE2A704), + UINT64_C(0x74CCD4CED8689CF9), UINT64_C(0x793D04924A1175AD), + UINT64_C(0xF6088693AC9455A1), UINT64_C(0xFBF956CF3EEDBCF5), + UINT64_C(0xECEA272A88678708), UINT64_C(0xE11BF7761A1E6E5C), + UINT64_C(0x47D2CB4FCEDD2860), UINT64_C(0x4A231B135CA4C134), + UINT64_C(0x5D306AF6EA2EFAC9), UINT64_C(0x50C1BAAA7857139D), + UINT64_C(0x11A3138442A977B1), UINT64_C(0x1C52C3D8D0D09EE5), + UINT64_C(0x0B41B23D665AA518), UINT64_C(0x06B06261F4234C4C), + UINT64_C(0xA0795E5820E00A70), UINT64_C(0xAD888E04B299E324), + UINT64_C(0xBA9BFFE10413D8D9), UINT64_C(0xB76A2FBD966A318D), + UINT64_C(0xA4F0FBE2C81899C1), UINT64_C(0xA9012BBE5A617095), + UINT64_C(0xBE125A5BECEB4B68), UINT64_C(0xB3E38A077E92A23C), + UINT64_C(0x152AB63EAA51E400), UINT64_C(0x18DB666238280D54), + UINT64_C(0x0FC817878EA236A9), UINT64_C(0x0239C7DB1CDBDFFD), + UINT64_C(0x435B6EF52625BBD1), UINT64_C(0x4EAABEA9B45C5285), + UINT64_C(0x59B9CF4C02D66978), UINT64_C(0x54481F1090AF802C), + UINT64_C(0xF2812329446CC610), UINT64_C(0xFF70F375D6152F44), + UINT64_C(0xE8638290609F14B9), UINT64_C(0xE59252CCF2E6FDED), + UINT64_C(0x6AA7D0CD1463DDE1), UINT64_C(0x67560091861A34B5), + UINT64_C(0x7045717430900F48), UINT64_C(0x7DB4A128A2E9E61C), + UINT64_C(0xDB7D9D11762AA020), UINT64_C(0xD68C4D4DE4534974), + UINT64_C(0xC19F3CA852D97289), UINT64_C(0xCC6EECF4C0A09BDD), + UINT64_C(0x8D0C45DAFA5EFFF1), UINT64_C(0x80FD9586682716A5), + UINT64_C(0x97EEE463DEAD2D58), UINT64_C(0x9A1F343F4CD4C40C), + UINT64_C(0x3CD6080698178230), UINT64_C(0x3127D85A0A6E6B64), + UINT64_C(0x2634A9BFBCE45099), UINT64_C(0x2BC579E32E9DB9CD), + UINT64_C(0xF5A054D6CA71FB90), UINT64_C(0xF851848A580812C4), + UINT64_C(0xEF42F56FEE822939), UINT64_C(0xE2B325337CFBC06D), + UINT64_C(0x447A190AA8388651), UINT64_C(0x498BC9563A416F05), + UINT64_C(0x5E98B8B38CCB54F8), UINT64_C(0x536968EF1EB2BDAC), + UINT64_C(0x120BC1C1244CD980), UINT64_C(0x1FFA119DB63530D4), + UINT64_C(0x08E9607800BF0B29), UINT64_C(0x0518B02492C6E27D), + UINT64_C(0xA3D18C1D4605A441), UINT64_C(0xAE205C41D47C4D15), + UINT64_C(0xB9332DA462F676E8), UINT64_C(0xB4C2FDF8F08F9FBC), + UINT64_C(0x3BF77FF9160ABFB0), UINT64_C(0x3606AFA5847356E4), + UINT64_C(0x2115DE4032F96D19), UINT64_C(0x2CE40E1CA080844D), + UINT64_C(0x8A2D32257443C271), UINT64_C(0x87DCE279E63A2B25), + UINT64_C(0x90CF939C50B010D8), UINT64_C(0x9D3E43C0C2C9F98C), + UINT64_C(0xDC5CEAEEF8379DA0), UINT64_C(0xD1AD3AB26A4E74F4), + UINT64_C(0xC6BE4B57DCC44F09), UINT64_C(0xCB4F9B0B4EBDA65D), + UINT64_C(0x6D86A7329A7EE061), UINT64_C(0x6077776E08070935), + UINT64_C(0x7764068BBE8D32C8), UINT64_C(0x7A95D6D72CF4DB9C), + UINT64_C(0x690F0288728673D0), UINT64_C(0x64FED2D4E0FF9A84), + UINT64_C(0x73EDA3315675A179), UINT64_C(0x7E1C736DC40C482D), + UINT64_C(0xD8D54F5410CF0E11), UINT64_C(0xD5249F0882B6E745), + UINT64_C(0xC237EEED343CDCB8), UINT64_C(0xCFC63EB1A64535EC), + UINT64_C(0x8EA4979F9CBB51C0), UINT64_C(0x835547C30EC2B894), + UINT64_C(0x94463626B8488369), UINT64_C(0x99B7E67A2A316A3D), + UINT64_C(0x3F7EDA43FEF22C01), UINT64_C(0x328F0A1F6C8BC555), + UINT64_C(0x259C7BFADA01FEA8), UINT64_C(0x286DABA6487817FC), + UINT64_C(0xA75829A7AEFD37F0), UINT64_C(0xAAA9F9FB3C84DEA4), + UINT64_C(0xBDBA881E8A0EE559), UINT64_C(0xB04B584218770C0D), + UINT64_C(0x1682647BCCB44A31), UINT64_C(0x1B73B4275ECDA365), + UINT64_C(0x0C60C5C2E8479898), UINT64_C(0x0191159E7A3E71CC), + UINT64_C(0x40F3BCB040C015E0), UINT64_C(0x4D026CECD2B9FCB4), + UINT64_C(0x5A111D096433C749), UINT64_C(0x57E0CD55F64A2E1D), + UINT64_C(0xF129F16C22896821), UINT64_C(0xFCD82130B0F08175), + UINT64_C(0xEBCB50D5067ABA88), UINT64_C(0xE63A8089940353DC), + UINT64_C(0xCDFFF96ABA9EEA11), UINT64_C(0xC00E293628E70345), + UINT64_C(0xD71D58D39E6D38B8), UINT64_C(0xDAEC888F0C14D1EC), + UINT64_C(0x7C25B4B6D8D797D0), UINT64_C(0x71D464EA4AAE7E84), + UINT64_C(0x66C7150FFC244579), UINT64_C(0x6B36C5536E5DAC2D), + UINT64_C(0x2A546C7D54A3C801), UINT64_C(0x27A5BC21C6DA2155), + UINT64_C(0x30B6CDC470501AA8), UINT64_C(0x3D471D98E229F3FC), + UINT64_C(0x9B8E21A136EAB5C0), UINT64_C(0x967FF1FDA4935C94), + UINT64_C(0x816C801812196769), UINT64_C(0x8C9D504480608E3D), + UINT64_C(0x03A8D24566E5AE31), UINT64_C(0x0E590219F49C4765), + UINT64_C(0x194A73FC42167C98), UINT64_C(0x14BBA3A0D06F95CC), + UINT64_C(0xB2729F9904ACD3F0), UINT64_C(0xBF834FC596D53AA4), + UINT64_C(0xA8903E20205F0159), UINT64_C(0xA561EE7CB226E80D), + UINT64_C(0xE403475288D88C21), UINT64_C(0xE9F2970E1AA16575), + UINT64_C(0xFEE1E6EBAC2B5E88), UINT64_C(0xF31036B73E52B7DC), + UINT64_C(0x55D90A8EEA91F1E0), UINT64_C(0x5828DAD278E818B4), + UINT64_C(0x4F3BAB37CE622349), UINT64_C(0x42CA7B6B5C1BCA1D), + UINT64_C(0x5150AF3402696251), UINT64_C(0x5CA17F6890108B05), + UINT64_C(0x4BB20E8D269AB0F8), UINT64_C(0x4643DED1B4E359AC), + UINT64_C(0xE08AE2E860201F90), UINT64_C(0xED7B32B4F259F6C4), + UINT64_C(0xFA68435144D3CD39), UINT64_C(0xF799930DD6AA246D), + UINT64_C(0xB6FB3A23EC544041), UINT64_C(0xBB0AEA7F7E2DA915), + UINT64_C(0xAC199B9AC8A792E8), UINT64_C(0xA1E84BC65ADE7BBC), + UINT64_C(0x072177FF8E1D3D80), UINT64_C(0x0AD0A7A31C64D4D4), + UINT64_C(0x1DC3D646AAEEEF29), UINT64_C(0x1032061A3897067D), + UINT64_C(0x9F07841BDE122671), UINT64_C(0x92F654474C6BCF25), + UINT64_C(0x85E525A2FAE1F4D8), UINT64_C(0x8814F5FE68981D8C), + UINT64_C(0x2EDDC9C7BC5B5BB0), UINT64_C(0x232C199B2E22B2E4), + UINT64_C(0x343F687E98A88919), UINT64_C(0x39CEB8220AD1604D), + UINT64_C(0x78AC110C302F0461), UINT64_C(0x755DC150A256ED35), + UINT64_C(0x624EB0B514DCD6C8), UINT64_C(0x6FBF60E986A53F9C), + UINT64_C(0xC9765CD0526679A0), UINT64_C(0xC4878C8CC01F90F4), + UINT64_C(0xD394FD697695AB09), UINT64_C(0xDE652D35E4EC425D) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0xCB6D6A914AE10B3F), + UINT64_C(0x96DBD42295C2177E), UINT64_C(0x5DB6BEB3DF231C41), + UINT64_C(0x2CB7A9452A852FFC), UINT64_C(0xE7DAC3D4606424C3), + UINT64_C(0xBA6C7D67BF473882), UINT64_C(0x710117F6F5A633BD), + UINT64_C(0xDD705D247FA5876A), UINT64_C(0x161D37B535448C55), + UINT64_C(0x4BAB8906EA679014), UINT64_C(0x80C6E397A0869B2B), + UINT64_C(0xF1C7F4615520A896), UINT64_C(0x3AAA9EF01FC1A3A9), + UINT64_C(0x671C2043C0E2BFE8), UINT64_C(0xAC714AD28A03B4D7), + UINT64_C(0xBAE1BA48FE4A0FD5), UINT64_C(0x718CD0D9B4AB04EA), + UINT64_C(0x2C3A6E6A6B8818AB), UINT64_C(0xE75704FB21691394), + UINT64_C(0x9656130DD4CF2029), UINT64_C(0x5D3B799C9E2E2B16), + UINT64_C(0x008DC72F410D3757), UINT64_C(0xCBE0ADBE0BEC3C68), + UINT64_C(0x6791E76C81EF88BF), UINT64_C(0xACFC8DFDCB0E8380), + UINT64_C(0xF14A334E142D9FC1), UINT64_C(0x3A2759DF5ECC94FE), + UINT64_C(0x4B264E29AB6AA743), UINT64_C(0x804B24B8E18BAC7C), + UINT64_C(0xDDFD9A0B3EA8B03D), UINT64_C(0x1690F09A7449BB02), + UINT64_C(0xF1DD7B3ED73AC638), UINT64_C(0x3AB011AF9DDBCD07), + UINT64_C(0x6706AF1C42F8D146), UINT64_C(0xAC6BC58D0819DA79), + UINT64_C(0xDD6AD27BFDBFE9C4), UINT64_C(0x1607B8EAB75EE2FB), + UINT64_C(0x4BB10659687DFEBA), UINT64_C(0x80DC6CC8229CF585), + UINT64_C(0x2CAD261AA89F4152), UINT64_C(0xE7C04C8BE27E4A6D), + UINT64_C(0xBA76F2383D5D562C), UINT64_C(0x711B98A977BC5D13), + UINT64_C(0x001A8F5F821A6EAE), UINT64_C(0xCB77E5CEC8FB6591), + UINT64_C(0x96C15B7D17D879D0), UINT64_C(0x5DAC31EC5D3972EF), + UINT64_C(0x4B3CC1762970C9ED), UINT64_C(0x8051ABE76391C2D2), + UINT64_C(0xDDE71554BCB2DE93), UINT64_C(0x168A7FC5F653D5AC), + UINT64_C(0x678B683303F5E611), UINT64_C(0xACE602A24914ED2E), + UINT64_C(0xF150BC119637F16F), UINT64_C(0x3A3DD680DCD6FA50), + UINT64_C(0x964C9C5256D54E87), UINT64_C(0x5D21F6C31C3445B8), + UINT64_C(0x00974870C31759F9), UINT64_C(0xCBFA22E189F652C6), + UINT64_C(0xBAFB35177C50617B), UINT64_C(0x71965F8636B16A44), + UINT64_C(0x2C20E135E9927605), UINT64_C(0xE74D8BA4A3737D3A), + UINT64_C(0xE2BBF77CAE758C71), UINT64_C(0x29D69DEDE494874E), + UINT64_C(0x7460235E3BB79B0F), UINT64_C(0xBF0D49CF71569030), + UINT64_C(0xCE0C5E3984F0A38D), UINT64_C(0x056134A8CE11A8B2), + UINT64_C(0x58D78A1B1132B4F3), UINT64_C(0x93BAE08A5BD3BFCC), + UINT64_C(0x3FCBAA58D1D00B1B), UINT64_C(0xF4A6C0C99B310024), + UINT64_C(0xA9107E7A44121C65), UINT64_C(0x627D14EB0EF3175A), + UINT64_C(0x137C031DFB5524E7), UINT64_C(0xD811698CB1B42FD8), + UINT64_C(0x85A7D73F6E973399), UINT64_C(0x4ECABDAE247638A6), + UINT64_C(0x585A4D34503F83A4), UINT64_C(0x933727A51ADE889B), + UINT64_C(0xCE819916C5FD94DA), UINT64_C(0x05ECF3878F1C9FE5), + UINT64_C(0x74EDE4717ABAAC58), UINT64_C(0xBF808EE0305BA767), + UINT64_C(0xE2363053EF78BB26), UINT64_C(0x295B5AC2A599B019), + UINT64_C(0x852A10102F9A04CE), UINT64_C(0x4E477A81657B0FF1), + UINT64_C(0x13F1C432BA5813B0), UINT64_C(0xD89CAEA3F0B9188F), + UINT64_C(0xA99DB955051F2B32), UINT64_C(0x62F0D3C44FFE200D), + UINT64_C(0x3F466D7790DD3C4C), UINT64_C(0xF42B07E6DA3C3773), + UINT64_C(0x13668C42794F4A49), UINT64_C(0xD80BE6D333AE4176), + UINT64_C(0x85BD5860EC8D5D37), UINT64_C(0x4ED032F1A66C5608), + UINT64_C(0x3FD1250753CA65B5), UINT64_C(0xF4BC4F96192B6E8A), + UINT64_C(0xA90AF125C60872CB), UINT64_C(0x62679BB48CE979F4), + UINT64_C(0xCE16D16606EACD23), UINT64_C(0x057BBBF74C0BC61C), + UINT64_C(0x58CD05449328DA5D), UINT64_C(0x93A06FD5D9C9D162), + UINT64_C(0xE2A178232C6FE2DF), UINT64_C(0x29CC12B2668EE9E0), + UINT64_C(0x747AAC01B9ADF5A1), UINT64_C(0xBF17C690F34CFE9E), + UINT64_C(0xA987360A8705459C), UINT64_C(0x62EA5C9BCDE44EA3), + UINT64_C(0x3F5CE22812C752E2), UINT64_C(0xF43188B9582659DD), + UINT64_C(0x85309F4FAD806A60), UINT64_C(0x4E5DF5DEE761615F), + UINT64_C(0x13EB4B6D38427D1E), UINT64_C(0xD88621FC72A37621), + UINT64_C(0x74F76B2EF8A0C2F6), UINT64_C(0xBF9A01BFB241C9C9), + UINT64_C(0xE22CBF0C6D62D588), UINT64_C(0x2941D59D2783DEB7), + UINT64_C(0x5840C26BD225ED0A), UINT64_C(0x932DA8FA98C4E635), + UINT64_C(0xCE9B164947E7FA74), UINT64_C(0x05F67CD80D06F14B), + UINT64_C(0xC477EFF95CEB18E3), UINT64_C(0x0F1A8568160A13DC), + UINT64_C(0x52AC3BDBC9290F9D), UINT64_C(0x99C1514A83C804A2), + UINT64_C(0xE8C046BC766E371F), UINT64_C(0x23AD2C2D3C8F3C20), + UINT64_C(0x7E1B929EE3AC2061), UINT64_C(0xB576F80FA94D2B5E), + UINT64_C(0x1907B2DD234E9F89), UINT64_C(0xD26AD84C69AF94B6), + UINT64_C(0x8FDC66FFB68C88F7), UINT64_C(0x44B10C6EFC6D83C8), + UINT64_C(0x35B01B9809CBB075), UINT64_C(0xFEDD7109432ABB4A), + UINT64_C(0xA36BCFBA9C09A70B), UINT64_C(0x6806A52BD6E8AC34), + UINT64_C(0x7E9655B1A2A11736), UINT64_C(0xB5FB3F20E8401C09), + UINT64_C(0xE84D819337630048), UINT64_C(0x2320EB027D820B77), + UINT64_C(0x5221FCF4882438CA), UINT64_C(0x994C9665C2C533F5), + UINT64_C(0xC4FA28D61DE62FB4), UINT64_C(0x0F9742475707248B), + UINT64_C(0xA3E60895DD04905C), UINT64_C(0x688B620497E59B63), + UINT64_C(0x353DDCB748C68722), UINT64_C(0xFE50B62602278C1D), + UINT64_C(0x8F51A1D0F781BFA0), UINT64_C(0x443CCB41BD60B49F), + UINT64_C(0x198A75F26243A8DE), UINT64_C(0xD2E71F6328A2A3E1), + UINT64_C(0x35AA94C78BD1DEDB), UINT64_C(0xFEC7FE56C130D5E4), + UINT64_C(0xA37140E51E13C9A5), UINT64_C(0x681C2A7454F2C29A), + UINT64_C(0x191D3D82A154F127), UINT64_C(0xD2705713EBB5FA18), + UINT64_C(0x8FC6E9A03496E659), UINT64_C(0x44AB83317E77ED66), + UINT64_C(0xE8DAC9E3F47459B1), UINT64_C(0x23B7A372BE95528E), + UINT64_C(0x7E011DC161B64ECF), UINT64_C(0xB56C77502B5745F0), + UINT64_C(0xC46D60A6DEF1764D), UINT64_C(0x0F000A3794107D72), + UINT64_C(0x52B6B4844B336133), UINT64_C(0x99DBDE1501D26A0C), + UINT64_C(0x8F4B2E8F759BD10E), UINT64_C(0x4426441E3F7ADA31), + UINT64_C(0x1990FAADE059C670), UINT64_C(0xD2FD903CAAB8CD4F), + UINT64_C(0xA3FC87CA5F1EFEF2), UINT64_C(0x6891ED5B15FFF5CD), + UINT64_C(0x352753E8CADCE98C), UINT64_C(0xFE4A3979803DE2B3), + UINT64_C(0x523B73AB0A3E5664), UINT64_C(0x9956193A40DF5D5B), + UINT64_C(0xC4E0A7899FFC411A), UINT64_C(0x0F8DCD18D51D4A25), + UINT64_C(0x7E8CDAEE20BB7998), UINT64_C(0xB5E1B07F6A5A72A7), + UINT64_C(0xE8570ECCB5796EE6), UINT64_C(0x233A645DFF9865D9), + UINT64_C(0x26CC1885F29E9492), UINT64_C(0xEDA17214B87F9FAD), + UINT64_C(0xB017CCA7675C83EC), UINT64_C(0x7B7AA6362DBD88D3), + UINT64_C(0x0A7BB1C0D81BBB6E), UINT64_C(0xC116DB5192FAB051), + UINT64_C(0x9CA065E24DD9AC10), UINT64_C(0x57CD0F730738A72F), + UINT64_C(0xFBBC45A18D3B13F8), UINT64_C(0x30D12F30C7DA18C7), + UINT64_C(0x6D67918318F90486), UINT64_C(0xA60AFB1252180FB9), + UINT64_C(0xD70BECE4A7BE3C04), UINT64_C(0x1C668675ED5F373B), + UINT64_C(0x41D038C6327C2B7A), UINT64_C(0x8ABD5257789D2045), + UINT64_C(0x9C2DA2CD0CD49B47), UINT64_C(0x5740C85C46359078), + UINT64_C(0x0AF676EF99168C39), UINT64_C(0xC19B1C7ED3F78706), + UINT64_C(0xB09A0B882651B4BB), UINT64_C(0x7BF761196CB0BF84), + UINT64_C(0x2641DFAAB393A3C5), UINT64_C(0xED2CB53BF972A8FA), + UINT64_C(0x415DFFE973711C2D), UINT64_C(0x8A30957839901712), + UINT64_C(0xD7862BCBE6B30B53), UINT64_C(0x1CEB415AAC52006C), + UINT64_C(0x6DEA56AC59F433D1), UINT64_C(0xA6873C3D131538EE), + UINT64_C(0xFB31828ECC3624AF), UINT64_C(0x305CE81F86D72F90), + UINT64_C(0xD71163BB25A452AA), UINT64_C(0x1C7C092A6F455995), + UINT64_C(0x41CAB799B06645D4), UINT64_C(0x8AA7DD08FA874EEB), + UINT64_C(0xFBA6CAFE0F217D56), UINT64_C(0x30CBA06F45C07669), + UINT64_C(0x6D7D1EDC9AE36A28), UINT64_C(0xA610744DD0026117), + UINT64_C(0x0A613E9F5A01D5C0), UINT64_C(0xC10C540E10E0DEFF), + UINT64_C(0x9CBAEABDCFC3C2BE), UINT64_C(0x57D7802C8522C981), + UINT64_C(0x26D697DA7084FA3C), UINT64_C(0xEDBBFD4B3A65F103), + UINT64_C(0xB00D43F8E546ED42), UINT64_C(0x7B602969AFA7E67D), + UINT64_C(0x6DF0D9F3DBEE5D7F), UINT64_C(0xA69DB362910F5640), + UINT64_C(0xFB2B0DD14E2C4A01), UINT64_C(0x3046674004CD413E), + UINT64_C(0x414770B6F16B7283), UINT64_C(0x8A2A1A27BB8A79BC), + UINT64_C(0xD79CA49464A965FD), UINT64_C(0x1CF1CE052E486EC2), + UINT64_C(0xB08084D7A44BDA15), UINT64_C(0x7BEDEE46EEAAD12A), + UINT64_C(0x265B50F53189CD6B), UINT64_C(0xED363A647B68C654), + UINT64_C(0x9C372D928ECEF5E9), UINT64_C(0x575A4703C42FFED6), + UINT64_C(0x0AECF9B01B0CE297), UINT64_C(0xC181932151EDE9A8) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0xDCA12C225E8AEE1D), + UINT64_C(0xB8435944BC14DD3B), UINT64_C(0x64E27566E29E3326), + UINT64_C(0x7087B2887829BA77), UINT64_C(0xAC269EAA26A3546A), + UINT64_C(0xC8C4EBCCC43D674C), UINT64_C(0x1465C7EE9AB78951), + UINT64_C(0xE00E6511F15274EF), UINT64_C(0x3CAF4933AFD89AF2), + UINT64_C(0x584D3C554D46A9D4), UINT64_C(0x84EC107713CC47C9), + UINT64_C(0x9089D799897BCE98), UINT64_C(0x4C28FBBBD7F12085), + UINT64_C(0x28CA8EDD356F13A3), UINT64_C(0xF46BA2FF6BE5FDBE), + UINT64_C(0x4503C48DC90A304C), UINT64_C(0x99A2E8AF9780DE51), + UINT64_C(0xFD409DC9751EED77), UINT64_C(0x21E1B1EB2B94036A), + UINT64_C(0x35847605B1238A3B), UINT64_C(0xE9255A27EFA96426), + UINT64_C(0x8DC72F410D375700), UINT64_C(0x5166036353BDB91D), + UINT64_C(0xA50DA19C385844A3), UINT64_C(0x79AC8DBE66D2AABE), + UINT64_C(0x1D4EF8D8844C9998), UINT64_C(0xC1EFD4FADAC67785), + UINT64_C(0xD58A13144071FED4), UINT64_C(0x092B3F361EFB10C9), + UINT64_C(0x6DC94A50FC6523EF), UINT64_C(0xB1686672A2EFCDF2), + UINT64_C(0x8A06881B93156098), UINT64_C(0x56A7A439CD9F8E85), + UINT64_C(0x3245D15F2F01BDA3), UINT64_C(0xEEE4FD7D718B53BE), + UINT64_C(0xFA813A93EB3CDAEF), UINT64_C(0x262016B1B5B634F2), + UINT64_C(0x42C263D7572807D4), UINT64_C(0x9E634FF509A2E9C9), + UINT64_C(0x6A08ED0A62471477), UINT64_C(0xB6A9C1283CCDFA6A), + UINT64_C(0xD24BB44EDE53C94C), UINT64_C(0x0EEA986C80D92751), + UINT64_C(0x1A8F5F821A6EAE00), UINT64_C(0xC62E73A044E4401D), + UINT64_C(0xA2CC06C6A67A733B), UINT64_C(0x7E6D2AE4F8F09D26), + UINT64_C(0xCF054C965A1F50D4), UINT64_C(0x13A460B40495BEC9), + UINT64_C(0x774615D2E60B8DEF), UINT64_C(0xABE739F0B88163F2), + UINT64_C(0xBF82FE1E2236EAA3), UINT64_C(0x6323D23C7CBC04BE), + UINT64_C(0x07C1A75A9E223798), UINT64_C(0xDB608B78C0A8D985), + UINT64_C(0x2F0B2987AB4D243B), UINT64_C(0xF3AA05A5F5C7CA26), + UINT64_C(0x974870C31759F900), UINT64_C(0x4BE95CE149D3171D), + UINT64_C(0x5F8C9B0FD3649E4C), UINT64_C(0x832DB72D8DEE7051), + UINT64_C(0xE7CFC24B6F704377), UINT64_C(0x3B6EEE6931FAAD6A), + UINT64_C(0x91131E980D8418A2), UINT64_C(0x4DB232BA530EF6BF), + UINT64_C(0x295047DCB190C599), UINT64_C(0xF5F16BFEEF1A2B84), + UINT64_C(0xE194AC1075ADA2D5), UINT64_C(0x3D3580322B274CC8), + UINT64_C(0x59D7F554C9B97FEE), UINT64_C(0x8576D976973391F3), + UINT64_C(0x711D7B89FCD66C4D), UINT64_C(0xADBC57ABA25C8250), + UINT64_C(0xC95E22CD40C2B176), UINT64_C(0x15FF0EEF1E485F6B), + UINT64_C(0x019AC90184FFD63A), UINT64_C(0xDD3BE523DA753827), + UINT64_C(0xB9D9904538EB0B01), UINT64_C(0x6578BC676661E51C), + UINT64_C(0xD410DA15C48E28EE), UINT64_C(0x08B1F6379A04C6F3), + UINT64_C(0x6C538351789AF5D5), UINT64_C(0xB0F2AF7326101BC8), + UINT64_C(0xA497689DBCA79299), UINT64_C(0x783644BFE22D7C84), + UINT64_C(0x1CD431D900B34FA2), UINT64_C(0xC0751DFB5E39A1BF), + UINT64_C(0x341EBF0435DC5C01), UINT64_C(0xE8BF93266B56B21C), + UINT64_C(0x8C5DE64089C8813A), UINT64_C(0x50FCCA62D7426F27), + UINT64_C(0x44990D8C4DF5E676), UINT64_C(0x983821AE137F086B), + UINT64_C(0xFCDA54C8F1E13B4D), UINT64_C(0x207B78EAAF6BD550), + UINT64_C(0x1B1596839E91783A), UINT64_C(0xC7B4BAA1C01B9627), + UINT64_C(0xA356CFC72285A501), UINT64_C(0x7FF7E3E57C0F4B1C), + UINT64_C(0x6B92240BE6B8C24D), UINT64_C(0xB7330829B8322C50), + UINT64_C(0xD3D17D4F5AAC1F76), UINT64_C(0x0F70516D0426F16B), + UINT64_C(0xFB1BF3926FC30CD5), UINT64_C(0x27BADFB03149E2C8), + UINT64_C(0x4358AAD6D3D7D1EE), UINT64_C(0x9FF986F48D5D3FF3), + UINT64_C(0x8B9C411A17EAB6A2), UINT64_C(0x573D6D38496058BF), + UINT64_C(0x33DF185EABFE6B99), UINT64_C(0xEF7E347CF5748584), + UINT64_C(0x5E16520E579B4876), UINT64_C(0x82B77E2C0911A66B), + UINT64_C(0xE6550B4AEB8F954D), UINT64_C(0x3AF42768B5057B50), + UINT64_C(0x2E91E0862FB2F201), UINT64_C(0xF230CCA471381C1C), + UINT64_C(0x96D2B9C293A62F3A), UINT64_C(0x4A7395E0CD2CC127), + UINT64_C(0xBE18371FA6C93C99), UINT64_C(0x62B91B3DF843D284), + UINT64_C(0x065B6E5B1ADDE1A2), UINT64_C(0xDAFA427944570FBF), + UINT64_C(0xCE9F8597DEE086EE), UINT64_C(0x123EA9B5806A68F3), + UINT64_C(0x76DCDCD362F45BD5), UINT64_C(0xAA7DF0F13C7EB5C8), + UINT64_C(0xA739329F30A7E9D6), UINT64_C(0x7B981EBD6E2D07CB), + UINT64_C(0x1F7A6BDB8CB334ED), UINT64_C(0xC3DB47F9D239DAF0), + UINT64_C(0xD7BE8017488E53A1), UINT64_C(0x0B1FAC351604BDBC), + UINT64_C(0x6FFDD953F49A8E9A), UINT64_C(0xB35CF571AA106087), + UINT64_C(0x4737578EC1F59D39), UINT64_C(0x9B967BAC9F7F7324), + UINT64_C(0xFF740ECA7DE14002), UINT64_C(0x23D522E8236BAE1F), + UINT64_C(0x37B0E506B9DC274E), UINT64_C(0xEB11C924E756C953), + UINT64_C(0x8FF3BC4205C8FA75), UINT64_C(0x535290605B421468), + UINT64_C(0xE23AF612F9ADD99A), UINT64_C(0x3E9BDA30A7273787), + UINT64_C(0x5A79AF5645B904A1), UINT64_C(0x86D883741B33EABC), + UINT64_C(0x92BD449A818463ED), UINT64_C(0x4E1C68B8DF0E8DF0), + UINT64_C(0x2AFE1DDE3D90BED6), UINT64_C(0xF65F31FC631A50CB), + UINT64_C(0x0234930308FFAD75), UINT64_C(0xDE95BF2156754368), + UINT64_C(0xBA77CA47B4EB704E), UINT64_C(0x66D6E665EA619E53), + UINT64_C(0x72B3218B70D61702), UINT64_C(0xAE120DA92E5CF91F), + UINT64_C(0xCAF078CFCCC2CA39), UINT64_C(0x165154ED92482424), + UINT64_C(0x2D3FBA84A3B2894E), UINT64_C(0xF19E96A6FD386753), + UINT64_C(0x957CE3C01FA65475), UINT64_C(0x49DDCFE2412CBA68), + UINT64_C(0x5DB8080CDB9B3339), UINT64_C(0x8119242E8511DD24), + UINT64_C(0xE5FB5148678FEE02), UINT64_C(0x395A7D6A3905001F), + UINT64_C(0xCD31DF9552E0FDA1), UINT64_C(0x1190F3B70C6A13BC), + UINT64_C(0x757286D1EEF4209A), UINT64_C(0xA9D3AAF3B07ECE87), + UINT64_C(0xBDB66D1D2AC947D6), UINT64_C(0x6117413F7443A9CB), + UINT64_C(0x05F5345996DD9AED), UINT64_C(0xD954187BC85774F0), + UINT64_C(0x683C7E096AB8B902), UINT64_C(0xB49D522B3432571F), + UINT64_C(0xD07F274DD6AC6439), UINT64_C(0x0CDE0B6F88268A24), + UINT64_C(0x18BBCC8112910375), UINT64_C(0xC41AE0A34C1BED68), + UINT64_C(0xA0F895C5AE85DE4E), UINT64_C(0x7C59B9E7F00F3053), + UINT64_C(0x88321B189BEACDED), UINT64_C(0x5493373AC56023F0), + UINT64_C(0x3071425C27FE10D6), UINT64_C(0xECD06E7E7974FECB), + UINT64_C(0xF8B5A990E3C3779A), UINT64_C(0x241485B2BD499987), + UINT64_C(0x40F6F0D45FD7AAA1), UINT64_C(0x9C57DCF6015D44BC), + UINT64_C(0x362A2C073D23F174), UINT64_C(0xEA8B002563A91F69), + UINT64_C(0x8E69754381372C4F), UINT64_C(0x52C85961DFBDC252), + UINT64_C(0x46AD9E8F450A4B03), UINT64_C(0x9A0CB2AD1B80A51E), + UINT64_C(0xFEEEC7CBF91E9638), UINT64_C(0x224FEBE9A7947825), + UINT64_C(0xD6244916CC71859B), UINT64_C(0x0A85653492FB6B86), + UINT64_C(0x6E671052706558A0), UINT64_C(0xB2C63C702EEFB6BD), + UINT64_C(0xA6A3FB9EB4583FEC), UINT64_C(0x7A02D7BCEAD2D1F1), + UINT64_C(0x1EE0A2DA084CE2D7), UINT64_C(0xC2418EF856C60CCA), + UINT64_C(0x7329E88AF429C138), UINT64_C(0xAF88C4A8AAA32F25), + UINT64_C(0xCB6AB1CE483D1C03), UINT64_C(0x17CB9DEC16B7F21E), + UINT64_C(0x03AE5A028C007B4F), UINT64_C(0xDF0F7620D28A9552), + UINT64_C(0xBBED03463014A674), UINT64_C(0x674C2F646E9E4869), + UINT64_C(0x93278D9B057BB5D7), UINT64_C(0x4F86A1B95BF15BCA), + UINT64_C(0x2B64D4DFB96F68EC), UINT64_C(0xF7C5F8FDE7E586F1), + UINT64_C(0xE3A03F137D520FA0), UINT64_C(0x3F01133123D8E1BD), + UINT64_C(0x5BE36657C146D29B), UINT64_C(0x87424A759FCC3C86), + UINT64_C(0xBC2CA41CAE3691EC), UINT64_C(0x608D883EF0BC7FF1), + UINT64_C(0x046FFD5812224CD7), UINT64_C(0xD8CED17A4CA8A2CA), + UINT64_C(0xCCAB1694D61F2B9B), UINT64_C(0x100A3AB68895C586), + UINT64_C(0x74E84FD06A0BF6A0), UINT64_C(0xA84963F2348118BD), + UINT64_C(0x5C22C10D5F64E503), UINT64_C(0x8083ED2F01EE0B1E), + UINT64_C(0xE4619849E3703838), UINT64_C(0x38C0B46BBDFAD625), + UINT64_C(0x2CA57385274D5F74), UINT64_C(0xF0045FA779C7B169), + UINT64_C(0x94E62AC19B59824F), UINT64_C(0x484706E3C5D36C52), + UINT64_C(0xF92F6091673CA1A0), UINT64_C(0x258E4CB339B64FBD), + UINT64_C(0x416C39D5DB287C9B), UINT64_C(0x9DCD15F785A29286), + UINT64_C(0x89A8D2191F151BD7), UINT64_C(0x5509FE3B419FF5CA), + UINT64_C(0x31EB8B5DA301C6EC), UINT64_C(0xED4AA77FFD8B28F1), + UINT64_C(0x19210580966ED54F), UINT64_C(0xC58029A2C8E43B52), + UINT64_C(0xA1625CC42A7A0874), UINT64_C(0x7DC370E674F0E669), + UINT64_C(0x69A6B708EE476F38), UINT64_C(0xB5079B2AB0CD8125), + UINT64_C(0xD1E5EE4C5253B203), UINT64_C(0x0D44C26E0CD95C1E) + } +}; diff --git a/src/native/external/xz/src/liblzma/check/crc64_table_le.h b/src/native/external/xz/src/liblzma/check/crc64_table_le.h new file mode 100644 index 00000000000000..e40a8c82105ea3 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc64_table_le.h @@ -0,0 +1,523 @@ +// SPDX-License-Identifier: 0BSD + +// This file has been generated by crc64_tablegen.c. + +const uint64_t lzma_crc64_table[4][256] = { + { + UINT64_C(0x0000000000000000), UINT64_C(0xB32E4CBE03A75F6F), + UINT64_C(0xF4843657A840A05B), UINT64_C(0x47AA7AE9ABE7FF34), + UINT64_C(0x7BD0C384FF8F5E33), UINT64_C(0xC8FE8F3AFC28015C), + UINT64_C(0x8F54F5D357CFFE68), UINT64_C(0x3C7AB96D5468A107), + UINT64_C(0xF7A18709FF1EBC66), UINT64_C(0x448FCBB7FCB9E309), + UINT64_C(0x0325B15E575E1C3D), UINT64_C(0xB00BFDE054F94352), + UINT64_C(0x8C71448D0091E255), UINT64_C(0x3F5F08330336BD3A), + UINT64_C(0x78F572DAA8D1420E), UINT64_C(0xCBDB3E64AB761D61), + UINT64_C(0x7D9BA13851336649), UINT64_C(0xCEB5ED8652943926), + UINT64_C(0x891F976FF973C612), UINT64_C(0x3A31DBD1FAD4997D), + UINT64_C(0x064B62BCAEBC387A), UINT64_C(0xB5652E02AD1B6715), + UINT64_C(0xF2CF54EB06FC9821), UINT64_C(0x41E11855055BC74E), + UINT64_C(0x8A3A2631AE2DDA2F), UINT64_C(0x39146A8FAD8A8540), + UINT64_C(0x7EBE1066066D7A74), UINT64_C(0xCD905CD805CA251B), + UINT64_C(0xF1EAE5B551A2841C), UINT64_C(0x42C4A90B5205DB73), + UINT64_C(0x056ED3E2F9E22447), UINT64_C(0xB6409F5CFA457B28), + UINT64_C(0xFB374270A266CC92), UINT64_C(0x48190ECEA1C193FD), + UINT64_C(0x0FB374270A266CC9), UINT64_C(0xBC9D3899098133A6), + UINT64_C(0x80E781F45DE992A1), UINT64_C(0x33C9CD4A5E4ECDCE), + UINT64_C(0x7463B7A3F5A932FA), UINT64_C(0xC74DFB1DF60E6D95), + UINT64_C(0x0C96C5795D7870F4), UINT64_C(0xBFB889C75EDF2F9B), + UINT64_C(0xF812F32EF538D0AF), UINT64_C(0x4B3CBF90F69F8FC0), + UINT64_C(0x774606FDA2F72EC7), UINT64_C(0xC4684A43A15071A8), + UINT64_C(0x83C230AA0AB78E9C), UINT64_C(0x30EC7C140910D1F3), + UINT64_C(0x86ACE348F355AADB), UINT64_C(0x3582AFF6F0F2F5B4), + UINT64_C(0x7228D51F5B150A80), UINT64_C(0xC10699A158B255EF), + UINT64_C(0xFD7C20CC0CDAF4E8), UINT64_C(0x4E526C720F7DAB87), + UINT64_C(0x09F8169BA49A54B3), UINT64_C(0xBAD65A25A73D0BDC), + UINT64_C(0x710D64410C4B16BD), UINT64_C(0xC22328FF0FEC49D2), + UINT64_C(0x85895216A40BB6E6), UINT64_C(0x36A71EA8A7ACE989), + UINT64_C(0x0ADDA7C5F3C4488E), UINT64_C(0xB9F3EB7BF06317E1), + UINT64_C(0xFE5991925B84E8D5), UINT64_C(0x4D77DD2C5823B7BA), + UINT64_C(0x64B62BCAEBC387A1), UINT64_C(0xD7986774E864D8CE), + UINT64_C(0x90321D9D438327FA), UINT64_C(0x231C512340247895), + UINT64_C(0x1F66E84E144CD992), UINT64_C(0xAC48A4F017EB86FD), + UINT64_C(0xEBE2DE19BC0C79C9), UINT64_C(0x58CC92A7BFAB26A6), + UINT64_C(0x9317ACC314DD3BC7), UINT64_C(0x2039E07D177A64A8), + UINT64_C(0x67939A94BC9D9B9C), UINT64_C(0xD4BDD62ABF3AC4F3), + UINT64_C(0xE8C76F47EB5265F4), UINT64_C(0x5BE923F9E8F53A9B), + UINT64_C(0x1C4359104312C5AF), UINT64_C(0xAF6D15AE40B59AC0), + UINT64_C(0x192D8AF2BAF0E1E8), UINT64_C(0xAA03C64CB957BE87), + UINT64_C(0xEDA9BCA512B041B3), UINT64_C(0x5E87F01B11171EDC), + UINT64_C(0x62FD4976457FBFDB), UINT64_C(0xD1D305C846D8E0B4), + UINT64_C(0x96797F21ED3F1F80), UINT64_C(0x2557339FEE9840EF), + UINT64_C(0xEE8C0DFB45EE5D8E), UINT64_C(0x5DA24145464902E1), + UINT64_C(0x1A083BACEDAEFDD5), UINT64_C(0xA9267712EE09A2BA), + UINT64_C(0x955CCE7FBA6103BD), UINT64_C(0x267282C1B9C65CD2), + UINT64_C(0x61D8F8281221A3E6), UINT64_C(0xD2F6B4961186FC89), + UINT64_C(0x9F8169BA49A54B33), UINT64_C(0x2CAF25044A02145C), + UINT64_C(0x6B055FEDE1E5EB68), UINT64_C(0xD82B1353E242B407), + UINT64_C(0xE451AA3EB62A1500), UINT64_C(0x577FE680B58D4A6F), + UINT64_C(0x10D59C691E6AB55B), UINT64_C(0xA3FBD0D71DCDEA34), + UINT64_C(0x6820EEB3B6BBF755), UINT64_C(0xDB0EA20DB51CA83A), + UINT64_C(0x9CA4D8E41EFB570E), UINT64_C(0x2F8A945A1D5C0861), + UINT64_C(0x13F02D374934A966), UINT64_C(0xA0DE61894A93F609), + UINT64_C(0xE7741B60E174093D), UINT64_C(0x545A57DEE2D35652), + UINT64_C(0xE21AC88218962D7A), UINT64_C(0x5134843C1B317215), + UINT64_C(0x169EFED5B0D68D21), UINT64_C(0xA5B0B26BB371D24E), + UINT64_C(0x99CA0B06E7197349), UINT64_C(0x2AE447B8E4BE2C26), + UINT64_C(0x6D4E3D514F59D312), UINT64_C(0xDE6071EF4CFE8C7D), + UINT64_C(0x15BB4F8BE788911C), UINT64_C(0xA6950335E42FCE73), + UINT64_C(0xE13F79DC4FC83147), UINT64_C(0x521135624C6F6E28), + UINT64_C(0x6E6B8C0F1807CF2F), UINT64_C(0xDD45C0B11BA09040), + UINT64_C(0x9AEFBA58B0476F74), UINT64_C(0x29C1F6E6B3E0301B), + UINT64_C(0xC96C5795D7870F42), UINT64_C(0x7A421B2BD420502D), + UINT64_C(0x3DE861C27FC7AF19), UINT64_C(0x8EC62D7C7C60F076), + UINT64_C(0xB2BC941128085171), UINT64_C(0x0192D8AF2BAF0E1E), + UINT64_C(0x4638A2468048F12A), UINT64_C(0xF516EEF883EFAE45), + UINT64_C(0x3ECDD09C2899B324), UINT64_C(0x8DE39C222B3EEC4B), + UINT64_C(0xCA49E6CB80D9137F), UINT64_C(0x7967AA75837E4C10), + UINT64_C(0x451D1318D716ED17), UINT64_C(0xF6335FA6D4B1B278), + UINT64_C(0xB199254F7F564D4C), UINT64_C(0x02B769F17CF11223), + UINT64_C(0xB4F7F6AD86B4690B), UINT64_C(0x07D9BA1385133664), + UINT64_C(0x4073C0FA2EF4C950), UINT64_C(0xF35D8C442D53963F), + UINT64_C(0xCF273529793B3738), UINT64_C(0x7C0979977A9C6857), + UINT64_C(0x3BA3037ED17B9763), UINT64_C(0x888D4FC0D2DCC80C), + UINT64_C(0x435671A479AAD56D), UINT64_C(0xF0783D1A7A0D8A02), + UINT64_C(0xB7D247F3D1EA7536), UINT64_C(0x04FC0B4DD24D2A59), + UINT64_C(0x3886B22086258B5E), UINT64_C(0x8BA8FE9E8582D431), + UINT64_C(0xCC0284772E652B05), UINT64_C(0x7F2CC8C92DC2746A), + UINT64_C(0x325B15E575E1C3D0), UINT64_C(0x8175595B76469CBF), + UINT64_C(0xC6DF23B2DDA1638B), UINT64_C(0x75F16F0CDE063CE4), + UINT64_C(0x498BD6618A6E9DE3), UINT64_C(0xFAA59ADF89C9C28C), + UINT64_C(0xBD0FE036222E3DB8), UINT64_C(0x0E21AC88218962D7), + UINT64_C(0xC5FA92EC8AFF7FB6), UINT64_C(0x76D4DE52895820D9), + UINT64_C(0x317EA4BB22BFDFED), UINT64_C(0x8250E80521188082), + UINT64_C(0xBE2A516875702185), UINT64_C(0x0D041DD676D77EEA), + UINT64_C(0x4AAE673FDD3081DE), UINT64_C(0xF9802B81DE97DEB1), + UINT64_C(0x4FC0B4DD24D2A599), UINT64_C(0xFCEEF8632775FAF6), + UINT64_C(0xBB44828A8C9205C2), UINT64_C(0x086ACE348F355AAD), + UINT64_C(0x34107759DB5DFBAA), UINT64_C(0x873E3BE7D8FAA4C5), + UINT64_C(0xC094410E731D5BF1), UINT64_C(0x73BA0DB070BA049E), + UINT64_C(0xB86133D4DBCC19FF), UINT64_C(0x0B4F7F6AD86B4690), + UINT64_C(0x4CE50583738CB9A4), UINT64_C(0xFFCB493D702BE6CB), + UINT64_C(0xC3B1F050244347CC), UINT64_C(0x709FBCEE27E418A3), + UINT64_C(0x3735C6078C03E797), UINT64_C(0x841B8AB98FA4B8F8), + UINT64_C(0xADDA7C5F3C4488E3), UINT64_C(0x1EF430E13FE3D78C), + UINT64_C(0x595E4A08940428B8), UINT64_C(0xEA7006B697A377D7), + UINT64_C(0xD60ABFDBC3CBD6D0), UINT64_C(0x6524F365C06C89BF), + UINT64_C(0x228E898C6B8B768B), UINT64_C(0x91A0C532682C29E4), + UINT64_C(0x5A7BFB56C35A3485), UINT64_C(0xE955B7E8C0FD6BEA), + UINT64_C(0xAEFFCD016B1A94DE), UINT64_C(0x1DD181BF68BDCBB1), + UINT64_C(0x21AB38D23CD56AB6), UINT64_C(0x9285746C3F7235D9), + UINT64_C(0xD52F0E859495CAED), UINT64_C(0x6601423B97329582), + UINT64_C(0xD041DD676D77EEAA), UINT64_C(0x636F91D96ED0B1C5), + UINT64_C(0x24C5EB30C5374EF1), UINT64_C(0x97EBA78EC690119E), + UINT64_C(0xAB911EE392F8B099), UINT64_C(0x18BF525D915FEFF6), + UINT64_C(0x5F1528B43AB810C2), UINT64_C(0xEC3B640A391F4FAD), + UINT64_C(0x27E05A6E926952CC), UINT64_C(0x94CE16D091CE0DA3), + UINT64_C(0xD3646C393A29F297), UINT64_C(0x604A2087398EADF8), + UINT64_C(0x5C3099EA6DE60CFF), UINT64_C(0xEF1ED5546E415390), + UINT64_C(0xA8B4AFBDC5A6ACA4), UINT64_C(0x1B9AE303C601F3CB), + UINT64_C(0x56ED3E2F9E224471), UINT64_C(0xE5C372919D851B1E), + UINT64_C(0xA26908783662E42A), UINT64_C(0x114744C635C5BB45), + UINT64_C(0x2D3DFDAB61AD1A42), UINT64_C(0x9E13B115620A452D), + UINT64_C(0xD9B9CBFCC9EDBA19), UINT64_C(0x6A978742CA4AE576), + UINT64_C(0xA14CB926613CF817), UINT64_C(0x1262F598629BA778), + UINT64_C(0x55C88F71C97C584C), UINT64_C(0xE6E6C3CFCADB0723), + UINT64_C(0xDA9C7AA29EB3A624), UINT64_C(0x69B2361C9D14F94B), + UINT64_C(0x2E184CF536F3067F), UINT64_C(0x9D36004B35545910), + UINT64_C(0x2B769F17CF112238), UINT64_C(0x9858D3A9CCB67D57), + UINT64_C(0xDFF2A94067518263), UINT64_C(0x6CDCE5FE64F6DD0C), + UINT64_C(0x50A65C93309E7C0B), UINT64_C(0xE388102D33392364), + UINT64_C(0xA4226AC498DEDC50), UINT64_C(0x170C267A9B79833F), + UINT64_C(0xDCD7181E300F9E5E), UINT64_C(0x6FF954A033A8C131), + UINT64_C(0x28532E49984F3E05), UINT64_C(0x9B7D62F79BE8616A), + UINT64_C(0xA707DB9ACF80C06D), UINT64_C(0x14299724CC279F02), + UINT64_C(0x5383EDCD67C06036), UINT64_C(0xE0ADA17364673F59) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0x54E979925CD0F10D), + UINT64_C(0xA9D2F324B9A1E21A), UINT64_C(0xFD3B8AB6E5711317), + UINT64_C(0xC17D4962DC4DDAB1), UINT64_C(0x959430F0809D2BBC), + UINT64_C(0x68AFBA4665EC38AB), UINT64_C(0x3C46C3D4393CC9A6), + UINT64_C(0x10223DEE1795ABE7), UINT64_C(0x44CB447C4B455AEA), + UINT64_C(0xB9F0CECAAE3449FD), UINT64_C(0xED19B758F2E4B8F0), + UINT64_C(0xD15F748CCBD87156), UINT64_C(0x85B60D1E9708805B), + UINT64_C(0x788D87A87279934C), UINT64_C(0x2C64FE3A2EA96241), + UINT64_C(0x20447BDC2F2B57CE), UINT64_C(0x74AD024E73FBA6C3), + UINT64_C(0x899688F8968AB5D4), UINT64_C(0xDD7FF16ACA5A44D9), + UINT64_C(0xE13932BEF3668D7F), UINT64_C(0xB5D04B2CAFB67C72), + UINT64_C(0x48EBC19A4AC76F65), UINT64_C(0x1C02B80816179E68), + UINT64_C(0x3066463238BEFC29), UINT64_C(0x648F3FA0646E0D24), + UINT64_C(0x99B4B516811F1E33), UINT64_C(0xCD5DCC84DDCFEF3E), + UINT64_C(0xF11B0F50E4F32698), UINT64_C(0xA5F276C2B823D795), + UINT64_C(0x58C9FC745D52C482), UINT64_C(0x0C2085E60182358F), + UINT64_C(0x4088F7B85E56AF9C), UINT64_C(0x14618E2A02865E91), + UINT64_C(0xE95A049CE7F74D86), UINT64_C(0xBDB37D0EBB27BC8B), + UINT64_C(0x81F5BEDA821B752D), UINT64_C(0xD51CC748DECB8420), + UINT64_C(0x28274DFE3BBA9737), UINT64_C(0x7CCE346C676A663A), + UINT64_C(0x50AACA5649C3047B), UINT64_C(0x0443B3C41513F576), + UINT64_C(0xF9783972F062E661), UINT64_C(0xAD9140E0ACB2176C), + UINT64_C(0x91D78334958EDECA), UINT64_C(0xC53EFAA6C95E2FC7), + UINT64_C(0x380570102C2F3CD0), UINT64_C(0x6CEC098270FFCDDD), + UINT64_C(0x60CC8C64717DF852), UINT64_C(0x3425F5F62DAD095F), + UINT64_C(0xC91E7F40C8DC1A48), UINT64_C(0x9DF706D2940CEB45), + UINT64_C(0xA1B1C506AD3022E3), UINT64_C(0xF558BC94F1E0D3EE), + UINT64_C(0x086336221491C0F9), UINT64_C(0x5C8A4FB0484131F4), + UINT64_C(0x70EEB18A66E853B5), UINT64_C(0x2407C8183A38A2B8), + UINT64_C(0xD93C42AEDF49B1AF), UINT64_C(0x8DD53B3C839940A2), + UINT64_C(0xB193F8E8BAA58904), UINT64_C(0xE57A817AE6757809), + UINT64_C(0x18410BCC03046B1E), UINT64_C(0x4CA8725E5FD49A13), + UINT64_C(0x8111EF70BCAD5F38), UINT64_C(0xD5F896E2E07DAE35), + UINT64_C(0x28C31C54050CBD22), UINT64_C(0x7C2A65C659DC4C2F), + UINT64_C(0x406CA61260E08589), UINT64_C(0x1485DF803C307484), + UINT64_C(0xE9BE5536D9416793), UINT64_C(0xBD572CA48591969E), + UINT64_C(0x9133D29EAB38F4DF), UINT64_C(0xC5DAAB0CF7E805D2), + UINT64_C(0x38E121BA129916C5), UINT64_C(0x6C0858284E49E7C8), + UINT64_C(0x504E9BFC77752E6E), UINT64_C(0x04A7E26E2BA5DF63), + UINT64_C(0xF99C68D8CED4CC74), UINT64_C(0xAD75114A92043D79), + UINT64_C(0xA15594AC938608F6), UINT64_C(0xF5BCED3ECF56F9FB), + UINT64_C(0x088767882A27EAEC), UINT64_C(0x5C6E1E1A76F71BE1), + UINT64_C(0x6028DDCE4FCBD247), UINT64_C(0x34C1A45C131B234A), + UINT64_C(0xC9FA2EEAF66A305D), UINT64_C(0x9D135778AABAC150), + UINT64_C(0xB177A9428413A311), UINT64_C(0xE59ED0D0D8C3521C), + UINT64_C(0x18A55A663DB2410B), UINT64_C(0x4C4C23F46162B006), + UINT64_C(0x700AE020585E79A0), UINT64_C(0x24E399B2048E88AD), + UINT64_C(0xD9D81304E1FF9BBA), UINT64_C(0x8D316A96BD2F6AB7), + UINT64_C(0xC19918C8E2FBF0A4), UINT64_C(0x9570615ABE2B01A9), + UINT64_C(0x684BEBEC5B5A12BE), UINT64_C(0x3CA2927E078AE3B3), + UINT64_C(0x00E451AA3EB62A15), UINT64_C(0x540D28386266DB18), + UINT64_C(0xA936A28E8717C80F), UINT64_C(0xFDDFDB1CDBC73902), + UINT64_C(0xD1BB2526F56E5B43), UINT64_C(0x85525CB4A9BEAA4E), + UINT64_C(0x7869D6024CCFB959), UINT64_C(0x2C80AF90101F4854), + UINT64_C(0x10C66C44292381F2), UINT64_C(0x442F15D675F370FF), + UINT64_C(0xB9149F60908263E8), UINT64_C(0xEDFDE6F2CC5292E5), + UINT64_C(0xE1DD6314CDD0A76A), UINT64_C(0xB5341A8691005667), + UINT64_C(0x480F903074714570), UINT64_C(0x1CE6E9A228A1B47D), + UINT64_C(0x20A02A76119D7DDB), UINT64_C(0x744953E44D4D8CD6), + UINT64_C(0x8972D952A83C9FC1), UINT64_C(0xDD9BA0C0F4EC6ECC), + UINT64_C(0xF1FF5EFADA450C8D), UINT64_C(0xA51627688695FD80), + UINT64_C(0x582DADDE63E4EE97), UINT64_C(0x0CC4D44C3F341F9A), + UINT64_C(0x308217980608D63C), UINT64_C(0x646B6E0A5AD82731), + UINT64_C(0x9950E4BCBFA93426), UINT64_C(0xCDB99D2EE379C52B), + UINT64_C(0x90FB71CAD654A0F5), UINT64_C(0xC41208588A8451F8), + UINT64_C(0x392982EE6FF542EF), UINT64_C(0x6DC0FB7C3325B3E2), + UINT64_C(0x518638A80A197A44), UINT64_C(0x056F413A56C98B49), + UINT64_C(0xF854CB8CB3B8985E), UINT64_C(0xACBDB21EEF686953), + UINT64_C(0x80D94C24C1C10B12), UINT64_C(0xD43035B69D11FA1F), + UINT64_C(0x290BBF007860E908), UINT64_C(0x7DE2C69224B01805), + UINT64_C(0x41A405461D8CD1A3), UINT64_C(0x154D7CD4415C20AE), + UINT64_C(0xE876F662A42D33B9), UINT64_C(0xBC9F8FF0F8FDC2B4), + UINT64_C(0xB0BF0A16F97FF73B), UINT64_C(0xE4567384A5AF0636), + UINT64_C(0x196DF93240DE1521), UINT64_C(0x4D8480A01C0EE42C), + UINT64_C(0x71C2437425322D8A), UINT64_C(0x252B3AE679E2DC87), + UINT64_C(0xD810B0509C93CF90), UINT64_C(0x8CF9C9C2C0433E9D), + UINT64_C(0xA09D37F8EEEA5CDC), UINT64_C(0xF4744E6AB23AADD1), + UINT64_C(0x094FC4DC574BBEC6), UINT64_C(0x5DA6BD4E0B9B4FCB), + UINT64_C(0x61E07E9A32A7866D), UINT64_C(0x350907086E777760), + UINT64_C(0xC8328DBE8B066477), UINT64_C(0x9CDBF42CD7D6957A), + UINT64_C(0xD073867288020F69), UINT64_C(0x849AFFE0D4D2FE64), + UINT64_C(0x79A1755631A3ED73), UINT64_C(0x2D480CC46D731C7E), + UINT64_C(0x110ECF10544FD5D8), UINT64_C(0x45E7B682089F24D5), + UINT64_C(0xB8DC3C34EDEE37C2), UINT64_C(0xEC3545A6B13EC6CF), + UINT64_C(0xC051BB9C9F97A48E), UINT64_C(0x94B8C20EC3475583), + UINT64_C(0x698348B826364694), UINT64_C(0x3D6A312A7AE6B799), + UINT64_C(0x012CF2FE43DA7E3F), UINT64_C(0x55C58B6C1F0A8F32), + UINT64_C(0xA8FE01DAFA7B9C25), UINT64_C(0xFC177848A6AB6D28), + UINT64_C(0xF037FDAEA72958A7), UINT64_C(0xA4DE843CFBF9A9AA), + UINT64_C(0x59E50E8A1E88BABD), UINT64_C(0x0D0C771842584BB0), + UINT64_C(0x314AB4CC7B648216), UINT64_C(0x65A3CD5E27B4731B), + UINT64_C(0x989847E8C2C5600C), UINT64_C(0xCC713E7A9E159101), + UINT64_C(0xE015C040B0BCF340), UINT64_C(0xB4FCB9D2EC6C024D), + UINT64_C(0x49C73364091D115A), UINT64_C(0x1D2E4AF655CDE057), + UINT64_C(0x216889226CF129F1), UINT64_C(0x7581F0B03021D8FC), + UINT64_C(0x88BA7A06D550CBEB), UINT64_C(0xDC53039489803AE6), + UINT64_C(0x11EA9EBA6AF9FFCD), UINT64_C(0x4503E72836290EC0), + UINT64_C(0xB8386D9ED3581DD7), UINT64_C(0xECD1140C8F88ECDA), + UINT64_C(0xD097D7D8B6B4257C), UINT64_C(0x847EAE4AEA64D471), + UINT64_C(0x794524FC0F15C766), UINT64_C(0x2DAC5D6E53C5366B), + UINT64_C(0x01C8A3547D6C542A), UINT64_C(0x5521DAC621BCA527), + UINT64_C(0xA81A5070C4CDB630), UINT64_C(0xFCF329E2981D473D), + UINT64_C(0xC0B5EA36A1218E9B), UINT64_C(0x945C93A4FDF17F96), + UINT64_C(0x6967191218806C81), UINT64_C(0x3D8E608044509D8C), + UINT64_C(0x31AEE56645D2A803), UINT64_C(0x65479CF41902590E), + UINT64_C(0x987C1642FC734A19), UINT64_C(0xCC956FD0A0A3BB14), + UINT64_C(0xF0D3AC04999F72B2), UINT64_C(0xA43AD596C54F83BF), + UINT64_C(0x59015F20203E90A8), UINT64_C(0x0DE826B27CEE61A5), + UINT64_C(0x218CD888524703E4), UINT64_C(0x7565A11A0E97F2E9), + UINT64_C(0x885E2BACEBE6E1FE), UINT64_C(0xDCB7523EB73610F3), + UINT64_C(0xE0F191EA8E0AD955), UINT64_C(0xB418E878D2DA2858), + UINT64_C(0x492362CE37AB3B4F), UINT64_C(0x1DCA1B5C6B7BCA42), + UINT64_C(0x5162690234AF5051), UINT64_C(0x058B1090687FA15C), + UINT64_C(0xF8B09A268D0EB24B), UINT64_C(0xAC59E3B4D1DE4346), + UINT64_C(0x901F2060E8E28AE0), UINT64_C(0xC4F659F2B4327BED), + UINT64_C(0x39CDD344514368FA), UINT64_C(0x6D24AAD60D9399F7), + UINT64_C(0x414054EC233AFBB6), UINT64_C(0x15A92D7E7FEA0ABB), + UINT64_C(0xE892A7C89A9B19AC), UINT64_C(0xBC7BDE5AC64BE8A1), + UINT64_C(0x803D1D8EFF772107), UINT64_C(0xD4D4641CA3A7D00A), + UINT64_C(0x29EFEEAA46D6C31D), UINT64_C(0x7D0697381A063210), + UINT64_C(0x712612DE1B84079F), UINT64_C(0x25CF6B4C4754F692), + UINT64_C(0xD8F4E1FAA225E585), UINT64_C(0x8C1D9868FEF51488), + UINT64_C(0xB05B5BBCC7C9DD2E), UINT64_C(0xE4B2222E9B192C23), + UINT64_C(0x1989A8987E683F34), UINT64_C(0x4D60D10A22B8CE39), + UINT64_C(0x61042F300C11AC78), UINT64_C(0x35ED56A250C15D75), + UINT64_C(0xC8D6DC14B5B04E62), UINT64_C(0x9C3FA586E960BF6F), + UINT64_C(0xA0796652D05C76C9), UINT64_C(0xF4901FC08C8C87C4), + UINT64_C(0x09AB957669FD94D3), UINT64_C(0x5D42ECE4352D65DE) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0x3F0BE14A916A6DCB), + UINT64_C(0x7E17C29522D4DB96), UINT64_C(0x411C23DFB3BEB65D), + UINT64_C(0xFC2F852A45A9B72C), UINT64_C(0xC3246460D4C3DAE7), + UINT64_C(0x823847BF677D6CBA), UINT64_C(0xBD33A6F5F6170171), + UINT64_C(0x6A87A57F245D70DD), UINT64_C(0x558C4435B5371D16), + UINT64_C(0x149067EA0689AB4B), UINT64_C(0x2B9B86A097E3C680), + UINT64_C(0x96A8205561F4C7F1), UINT64_C(0xA9A3C11FF09EAA3A), + UINT64_C(0xE8BFE2C043201C67), UINT64_C(0xD7B4038AD24A71AC), + UINT64_C(0xD50F4AFE48BAE1BA), UINT64_C(0xEA04ABB4D9D08C71), + UINT64_C(0xAB18886B6A6E3A2C), UINT64_C(0x94136921FB0457E7), + UINT64_C(0x2920CFD40D135696), UINT64_C(0x162B2E9E9C793B5D), + UINT64_C(0x57370D412FC78D00), UINT64_C(0x683CEC0BBEADE0CB), + UINT64_C(0xBF88EF816CE79167), UINT64_C(0x80830ECBFD8DFCAC), + UINT64_C(0xC19F2D144E334AF1), UINT64_C(0xFE94CC5EDF59273A), + UINT64_C(0x43A76AAB294E264B), UINT64_C(0x7CAC8BE1B8244B80), + UINT64_C(0x3DB0A83E0B9AFDDD), UINT64_C(0x02BB49749AF09016), + UINT64_C(0x38C63AD73E7BDDF1), UINT64_C(0x07CDDB9DAF11B03A), + UINT64_C(0x46D1F8421CAF0667), UINT64_C(0x79DA19088DC56BAC), + UINT64_C(0xC4E9BFFD7BD26ADD), UINT64_C(0xFBE25EB7EAB80716), + UINT64_C(0xBAFE7D685906B14B), UINT64_C(0x85F59C22C86CDC80), + UINT64_C(0x52419FA81A26AD2C), UINT64_C(0x6D4A7EE28B4CC0E7), + UINT64_C(0x2C565D3D38F276BA), UINT64_C(0x135DBC77A9981B71), + UINT64_C(0xAE6E1A825F8F1A00), UINT64_C(0x9165FBC8CEE577CB), + UINT64_C(0xD079D8177D5BC196), UINT64_C(0xEF72395DEC31AC5D), + UINT64_C(0xEDC9702976C13C4B), UINT64_C(0xD2C29163E7AB5180), + UINT64_C(0x93DEB2BC5415E7DD), UINT64_C(0xACD553F6C57F8A16), + UINT64_C(0x11E6F50333688B67), UINT64_C(0x2EED1449A202E6AC), + UINT64_C(0x6FF1379611BC50F1), UINT64_C(0x50FAD6DC80D63D3A), + UINT64_C(0x874ED556529C4C96), UINT64_C(0xB845341CC3F6215D), + UINT64_C(0xF95917C370489700), UINT64_C(0xC652F689E122FACB), + UINT64_C(0x7B61507C1735FBBA), UINT64_C(0x446AB136865F9671), + UINT64_C(0x057692E935E1202C), UINT64_C(0x3A7D73A3A48B4DE7), + UINT64_C(0x718C75AE7CF7BBE2), UINT64_C(0x4E8794E4ED9DD629), + UINT64_C(0x0F9BB73B5E236074), UINT64_C(0x30905671CF490DBF), + UINT64_C(0x8DA3F084395E0CCE), UINT64_C(0xB2A811CEA8346105), + UINT64_C(0xF3B432111B8AD758), UINT64_C(0xCCBFD35B8AE0BA93), + UINT64_C(0x1B0BD0D158AACB3F), UINT64_C(0x2400319BC9C0A6F4), + UINT64_C(0x651C12447A7E10A9), UINT64_C(0x5A17F30EEB147D62), + UINT64_C(0xE72455FB1D037C13), UINT64_C(0xD82FB4B18C6911D8), + UINT64_C(0x9933976E3FD7A785), UINT64_C(0xA6387624AEBDCA4E), + UINT64_C(0xA4833F50344D5A58), UINT64_C(0x9B88DE1AA5273793), + UINT64_C(0xDA94FDC5169981CE), UINT64_C(0xE59F1C8F87F3EC05), + UINT64_C(0x58ACBA7A71E4ED74), UINT64_C(0x67A75B30E08E80BF), + UINT64_C(0x26BB78EF533036E2), UINT64_C(0x19B099A5C25A5B29), + UINT64_C(0xCE049A2F10102A85), UINT64_C(0xF10F7B65817A474E), + UINT64_C(0xB01358BA32C4F113), UINT64_C(0x8F18B9F0A3AE9CD8), + UINT64_C(0x322B1F0555B99DA9), UINT64_C(0x0D20FE4FC4D3F062), + UINT64_C(0x4C3CDD90776D463F), UINT64_C(0x73373CDAE6072BF4), + UINT64_C(0x494A4F79428C6613), UINT64_C(0x7641AE33D3E60BD8), + UINT64_C(0x375D8DEC6058BD85), UINT64_C(0x08566CA6F132D04E), + UINT64_C(0xB565CA530725D13F), UINT64_C(0x8A6E2B19964FBCF4), + UINT64_C(0xCB7208C625F10AA9), UINT64_C(0xF479E98CB49B6762), + UINT64_C(0x23CDEA0666D116CE), UINT64_C(0x1CC60B4CF7BB7B05), + UINT64_C(0x5DDA28934405CD58), UINT64_C(0x62D1C9D9D56FA093), + UINT64_C(0xDFE26F2C2378A1E2), UINT64_C(0xE0E98E66B212CC29), + UINT64_C(0xA1F5ADB901AC7A74), UINT64_C(0x9EFE4CF390C617BF), + UINT64_C(0x9C4505870A3687A9), UINT64_C(0xA34EE4CD9B5CEA62), + UINT64_C(0xE252C71228E25C3F), UINT64_C(0xDD592658B98831F4), + UINT64_C(0x606A80AD4F9F3085), UINT64_C(0x5F6161E7DEF55D4E), + UINT64_C(0x1E7D42386D4BEB13), UINT64_C(0x2176A372FC2186D8), + UINT64_C(0xF6C2A0F82E6BF774), UINT64_C(0xC9C941B2BF019ABF), + UINT64_C(0x88D5626D0CBF2CE2), UINT64_C(0xB7DE83279DD54129), + UINT64_C(0x0AED25D26BC24058), UINT64_C(0x35E6C498FAA82D93), + UINT64_C(0x74FAE74749169BCE), UINT64_C(0x4BF1060DD87CF605), + UINT64_C(0xE318EB5CF9EF77C4), UINT64_C(0xDC130A1668851A0F), + UINT64_C(0x9D0F29C9DB3BAC52), UINT64_C(0xA204C8834A51C199), + UINT64_C(0x1F376E76BC46C0E8), UINT64_C(0x203C8F3C2D2CAD23), + UINT64_C(0x6120ACE39E921B7E), UINT64_C(0x5E2B4DA90FF876B5), + UINT64_C(0x899F4E23DDB20719), UINT64_C(0xB694AF694CD86AD2), + UINT64_C(0xF7888CB6FF66DC8F), UINT64_C(0xC8836DFC6E0CB144), + UINT64_C(0x75B0CB09981BB035), UINT64_C(0x4ABB2A430971DDFE), + UINT64_C(0x0BA7099CBACF6BA3), UINT64_C(0x34ACE8D62BA50668), + UINT64_C(0x3617A1A2B155967E), UINT64_C(0x091C40E8203FFBB5), + UINT64_C(0x4800633793814DE8), UINT64_C(0x770B827D02EB2023), + UINT64_C(0xCA382488F4FC2152), UINT64_C(0xF533C5C265964C99), + UINT64_C(0xB42FE61DD628FAC4), UINT64_C(0x8B2407574742970F), + UINT64_C(0x5C9004DD9508E6A3), UINT64_C(0x639BE59704628B68), + UINT64_C(0x2287C648B7DC3D35), UINT64_C(0x1D8C270226B650FE), + UINT64_C(0xA0BF81F7D0A1518F), UINT64_C(0x9FB460BD41CB3C44), + UINT64_C(0xDEA84362F2758A19), UINT64_C(0xE1A3A228631FE7D2), + UINT64_C(0xDBDED18BC794AA35), UINT64_C(0xE4D530C156FEC7FE), + UINT64_C(0xA5C9131EE54071A3), UINT64_C(0x9AC2F254742A1C68), + UINT64_C(0x27F154A1823D1D19), UINT64_C(0x18FAB5EB135770D2), + UINT64_C(0x59E69634A0E9C68F), UINT64_C(0x66ED777E3183AB44), + UINT64_C(0xB15974F4E3C9DAE8), UINT64_C(0x8E5295BE72A3B723), + UINT64_C(0xCF4EB661C11D017E), UINT64_C(0xF045572B50776CB5), + UINT64_C(0x4D76F1DEA6606DC4), UINT64_C(0x727D1094370A000F), + UINT64_C(0x3361334B84B4B652), UINT64_C(0x0C6AD20115DEDB99), + UINT64_C(0x0ED19B758F2E4B8F), UINT64_C(0x31DA7A3F1E442644), + UINT64_C(0x70C659E0ADFA9019), UINT64_C(0x4FCDB8AA3C90FDD2), + UINT64_C(0xF2FE1E5FCA87FCA3), UINT64_C(0xCDF5FF155BED9168), + UINT64_C(0x8CE9DCCAE8532735), UINT64_C(0xB3E23D8079394AFE), + UINT64_C(0x64563E0AAB733B52), UINT64_C(0x5B5DDF403A195699), + UINT64_C(0x1A41FC9F89A7E0C4), UINT64_C(0x254A1DD518CD8D0F), + UINT64_C(0x9879BB20EEDA8C7E), UINT64_C(0xA7725A6A7FB0E1B5), + UINT64_C(0xE66E79B5CC0E57E8), UINT64_C(0xD96598FF5D643A23), + UINT64_C(0x92949EF28518CC26), UINT64_C(0xAD9F7FB81472A1ED), + UINT64_C(0xEC835C67A7CC17B0), UINT64_C(0xD388BD2D36A67A7B), + UINT64_C(0x6EBB1BD8C0B17B0A), UINT64_C(0x51B0FA9251DB16C1), + UINT64_C(0x10ACD94DE265A09C), UINT64_C(0x2FA73807730FCD57), + UINT64_C(0xF8133B8DA145BCFB), UINT64_C(0xC718DAC7302FD130), + UINT64_C(0x8604F9188391676D), UINT64_C(0xB90F185212FB0AA6), + UINT64_C(0x043CBEA7E4EC0BD7), UINT64_C(0x3B375FED7586661C), + UINT64_C(0x7A2B7C32C638D041), UINT64_C(0x45209D785752BD8A), + UINT64_C(0x479BD40CCDA22D9C), UINT64_C(0x789035465CC84057), + UINT64_C(0x398C1699EF76F60A), UINT64_C(0x0687F7D37E1C9BC1), + UINT64_C(0xBBB45126880B9AB0), UINT64_C(0x84BFB06C1961F77B), + UINT64_C(0xC5A393B3AADF4126), UINT64_C(0xFAA872F93BB52CED), + UINT64_C(0x2D1C7173E9FF5D41), UINT64_C(0x121790397895308A), + UINT64_C(0x530BB3E6CB2B86D7), UINT64_C(0x6C0052AC5A41EB1C), + UINT64_C(0xD133F459AC56EA6D), UINT64_C(0xEE3815133D3C87A6), + UINT64_C(0xAF2436CC8E8231FB), UINT64_C(0x902FD7861FE85C30), + UINT64_C(0xAA52A425BB6311D7), UINT64_C(0x9559456F2A097C1C), + UINT64_C(0xD44566B099B7CA41), UINT64_C(0xEB4E87FA08DDA78A), + UINT64_C(0x567D210FFECAA6FB), UINT64_C(0x6976C0456FA0CB30), + UINT64_C(0x286AE39ADC1E7D6D), UINT64_C(0x176102D04D7410A6), + UINT64_C(0xC0D5015A9F3E610A), UINT64_C(0xFFDEE0100E540CC1), + UINT64_C(0xBEC2C3CFBDEABA9C), UINT64_C(0x81C922852C80D757), + UINT64_C(0x3CFA8470DA97D626), UINT64_C(0x03F1653A4BFDBBED), + UINT64_C(0x42ED46E5F8430DB0), UINT64_C(0x7DE6A7AF6929607B), + UINT64_C(0x7F5DEEDBF3D9F06D), UINT64_C(0x40560F9162B39DA6), + UINT64_C(0x014A2C4ED10D2BFB), UINT64_C(0x3E41CD0440674630), + UINT64_C(0x83726BF1B6704741), UINT64_C(0xBC798ABB271A2A8A), + UINT64_C(0xFD65A96494A49CD7), UINT64_C(0xC26E482E05CEF11C), + UINT64_C(0x15DA4BA4D78480B0), UINT64_C(0x2AD1AAEE46EEED7B), + UINT64_C(0x6BCD8931F5505B26), UINT64_C(0x54C6687B643A36ED), + UINT64_C(0xE9F5CE8E922D379C), UINT64_C(0xD6FE2FC403475A57), + UINT64_C(0x97E20C1BB0F9EC0A), UINT64_C(0xA8E9ED51219381C1) + }, { + UINT64_C(0x0000000000000000), UINT64_C(0x1DEE8A5E222CA1DC), + UINT64_C(0x3BDD14BC445943B8), UINT64_C(0x26339EE26675E264), + UINT64_C(0x77BA297888B28770), UINT64_C(0x6A54A326AA9E26AC), + UINT64_C(0x4C673DC4CCEBC4C8), UINT64_C(0x5189B79AEEC76514), + UINT64_C(0xEF7452F111650EE0), UINT64_C(0xF29AD8AF3349AF3C), + UINT64_C(0xD4A9464D553C4D58), UINT64_C(0xC947CC137710EC84), + UINT64_C(0x98CE7B8999D78990), UINT64_C(0x8520F1D7BBFB284C), + UINT64_C(0xA3136F35DD8ECA28), UINT64_C(0xBEFDE56BFFA26BF4), + UINT64_C(0x4C300AC98DC40345), UINT64_C(0x51DE8097AFE8A299), + UINT64_C(0x77ED1E75C99D40FD), UINT64_C(0x6A03942BEBB1E121), + UINT64_C(0x3B8A23B105768435), UINT64_C(0x2664A9EF275A25E9), + UINT64_C(0x0057370D412FC78D), UINT64_C(0x1DB9BD5363036651), + UINT64_C(0xA34458389CA10DA5), UINT64_C(0xBEAAD266BE8DAC79), + UINT64_C(0x98994C84D8F84E1D), UINT64_C(0x8577C6DAFAD4EFC1), + UINT64_C(0xD4FE714014138AD5), UINT64_C(0xC910FB1E363F2B09), + UINT64_C(0xEF2365FC504AC96D), UINT64_C(0xF2CDEFA2726668B1), + UINT64_C(0x986015931B88068A), UINT64_C(0x858E9FCD39A4A756), + UINT64_C(0xA3BD012F5FD14532), UINT64_C(0xBE538B717DFDE4EE), + UINT64_C(0xEFDA3CEB933A81FA), UINT64_C(0xF234B6B5B1162026), + UINT64_C(0xD4072857D763C242), UINT64_C(0xC9E9A209F54F639E), + UINT64_C(0x771447620AED086A), UINT64_C(0x6AFACD3C28C1A9B6), + UINT64_C(0x4CC953DE4EB44BD2), UINT64_C(0x5127D9806C98EA0E), + UINT64_C(0x00AE6E1A825F8F1A), UINT64_C(0x1D40E444A0732EC6), + UINT64_C(0x3B737AA6C606CCA2), UINT64_C(0x269DF0F8E42A6D7E), + UINT64_C(0xD4501F5A964C05CF), UINT64_C(0xC9BE9504B460A413), + UINT64_C(0xEF8D0BE6D2154677), UINT64_C(0xF26381B8F039E7AB), + UINT64_C(0xA3EA36221EFE82BF), UINT64_C(0xBE04BC7C3CD22363), + UINT64_C(0x9837229E5AA7C107), UINT64_C(0x85D9A8C0788B60DB), + UINT64_C(0x3B244DAB87290B2F), UINT64_C(0x26CAC7F5A505AAF3), + UINT64_C(0x00F95917C3704897), UINT64_C(0x1D17D349E15CE94B), + UINT64_C(0x4C9E64D30F9B8C5F), UINT64_C(0x5170EE8D2DB72D83), + UINT64_C(0x7743706F4BC2CFE7), UINT64_C(0x6AADFA3169EE6E3B), + UINT64_C(0xA218840D981E1391), UINT64_C(0xBFF60E53BA32B24D), + UINT64_C(0x99C590B1DC475029), UINT64_C(0x842B1AEFFE6BF1F5), + UINT64_C(0xD5A2AD7510AC94E1), UINT64_C(0xC84C272B3280353D), + UINT64_C(0xEE7FB9C954F5D759), UINT64_C(0xF391339776D97685), + UINT64_C(0x4D6CD6FC897B1D71), UINT64_C(0x50825CA2AB57BCAD), + UINT64_C(0x76B1C240CD225EC9), UINT64_C(0x6B5F481EEF0EFF15), + UINT64_C(0x3AD6FF8401C99A01), UINT64_C(0x273875DA23E53BDD), + UINT64_C(0x010BEB384590D9B9), UINT64_C(0x1CE5616667BC7865), + UINT64_C(0xEE288EC415DA10D4), UINT64_C(0xF3C6049A37F6B108), + UINT64_C(0xD5F59A785183536C), UINT64_C(0xC81B102673AFF2B0), + UINT64_C(0x9992A7BC9D6897A4), UINT64_C(0x847C2DE2BF443678), + UINT64_C(0xA24FB300D931D41C), UINT64_C(0xBFA1395EFB1D75C0), + UINT64_C(0x015CDC3504BF1E34), UINT64_C(0x1CB2566B2693BFE8), + UINT64_C(0x3A81C88940E65D8C), UINT64_C(0x276F42D762CAFC50), + UINT64_C(0x76E6F54D8C0D9944), UINT64_C(0x6B087F13AE213898), + UINT64_C(0x4D3BE1F1C854DAFC), UINT64_C(0x50D56BAFEA787B20), + UINT64_C(0x3A78919E8396151B), UINT64_C(0x27961BC0A1BAB4C7), + UINT64_C(0x01A58522C7CF56A3), UINT64_C(0x1C4B0F7CE5E3F77F), + UINT64_C(0x4DC2B8E60B24926B), UINT64_C(0x502C32B8290833B7), + UINT64_C(0x761FAC5A4F7DD1D3), UINT64_C(0x6BF126046D51700F), + UINT64_C(0xD50CC36F92F31BFB), UINT64_C(0xC8E24931B0DFBA27), + UINT64_C(0xEED1D7D3D6AA5843), UINT64_C(0xF33F5D8DF486F99F), + UINT64_C(0xA2B6EA171A419C8B), UINT64_C(0xBF586049386D3D57), + UINT64_C(0x996BFEAB5E18DF33), UINT64_C(0x848574F57C347EEF), + UINT64_C(0x76489B570E52165E), UINT64_C(0x6BA611092C7EB782), + UINT64_C(0x4D958FEB4A0B55E6), UINT64_C(0x507B05B56827F43A), + UINT64_C(0x01F2B22F86E0912E), UINT64_C(0x1C1C3871A4CC30F2), + UINT64_C(0x3A2FA693C2B9D296), UINT64_C(0x27C12CCDE095734A), + UINT64_C(0x993CC9A61F3718BE), UINT64_C(0x84D243F83D1BB962), + UINT64_C(0xA2E1DD1A5B6E5B06), UINT64_C(0xBF0F57447942FADA), + UINT64_C(0xEE86E0DE97859FCE), UINT64_C(0xF3686A80B5A93E12), + UINT64_C(0xD55BF462D3DCDC76), UINT64_C(0xC8B57E3CF1F07DAA), + UINT64_C(0xD6E9A7309F3239A7), UINT64_C(0xCB072D6EBD1E987B), + UINT64_C(0xED34B38CDB6B7A1F), UINT64_C(0xF0DA39D2F947DBC3), + UINT64_C(0xA1538E481780BED7), UINT64_C(0xBCBD041635AC1F0B), + UINT64_C(0x9A8E9AF453D9FD6F), UINT64_C(0x876010AA71F55CB3), + UINT64_C(0x399DF5C18E573747), UINT64_C(0x24737F9FAC7B969B), + UINT64_C(0x0240E17DCA0E74FF), UINT64_C(0x1FAE6B23E822D523), + UINT64_C(0x4E27DCB906E5B037), UINT64_C(0x53C956E724C911EB), + UINT64_C(0x75FAC80542BCF38F), UINT64_C(0x6814425B60905253), + UINT64_C(0x9AD9ADF912F63AE2), UINT64_C(0x873727A730DA9B3E), + UINT64_C(0xA104B94556AF795A), UINT64_C(0xBCEA331B7483D886), + UINT64_C(0xED6384819A44BD92), UINT64_C(0xF08D0EDFB8681C4E), + UINT64_C(0xD6BE903DDE1DFE2A), UINT64_C(0xCB501A63FC315FF6), + UINT64_C(0x75ADFF0803933402), UINT64_C(0x6843755621BF95DE), + UINT64_C(0x4E70EBB447CA77BA), UINT64_C(0x539E61EA65E6D666), + UINT64_C(0x0217D6708B21B372), UINT64_C(0x1FF95C2EA90D12AE), + UINT64_C(0x39CAC2CCCF78F0CA), UINT64_C(0x24244892ED545116), + UINT64_C(0x4E89B2A384BA3F2D), UINT64_C(0x536738FDA6969EF1), + UINT64_C(0x7554A61FC0E37C95), UINT64_C(0x68BA2C41E2CFDD49), + UINT64_C(0x39339BDB0C08B85D), UINT64_C(0x24DD11852E241981), + UINT64_C(0x02EE8F674851FBE5), UINT64_C(0x1F0005396A7D5A39), + UINT64_C(0xA1FDE05295DF31CD), UINT64_C(0xBC136A0CB7F39011), + UINT64_C(0x9A20F4EED1867275), UINT64_C(0x87CE7EB0F3AAD3A9), + UINT64_C(0xD647C92A1D6DB6BD), UINT64_C(0xCBA943743F411761), + UINT64_C(0xED9ADD965934F505), UINT64_C(0xF07457C87B1854D9), + UINT64_C(0x02B9B86A097E3C68), UINT64_C(0x1F5732342B529DB4), + UINT64_C(0x3964ACD64D277FD0), UINT64_C(0x248A26886F0BDE0C), + UINT64_C(0x7503911281CCBB18), UINT64_C(0x68ED1B4CA3E01AC4), + UINT64_C(0x4EDE85AEC595F8A0), UINT64_C(0x53300FF0E7B9597C), + UINT64_C(0xEDCDEA9B181B3288), UINT64_C(0xF02360C53A379354), + UINT64_C(0xD610FE275C427130), UINT64_C(0xCBFE74797E6ED0EC), + UINT64_C(0x9A77C3E390A9B5F8), UINT64_C(0x879949BDB2851424), + UINT64_C(0xA1AAD75FD4F0F640), UINT64_C(0xBC445D01F6DC579C), + UINT64_C(0x74F1233D072C2A36), UINT64_C(0x691FA96325008BEA), + UINT64_C(0x4F2C37814375698E), UINT64_C(0x52C2BDDF6159C852), + UINT64_C(0x034B0A458F9EAD46), UINT64_C(0x1EA5801BADB20C9A), + UINT64_C(0x38961EF9CBC7EEFE), UINT64_C(0x257894A7E9EB4F22), + UINT64_C(0x9B8571CC164924D6), UINT64_C(0x866BFB923465850A), + UINT64_C(0xA05865705210676E), UINT64_C(0xBDB6EF2E703CC6B2), + UINT64_C(0xEC3F58B49EFBA3A6), UINT64_C(0xF1D1D2EABCD7027A), + UINT64_C(0xD7E24C08DAA2E01E), UINT64_C(0xCA0CC656F88E41C2), + UINT64_C(0x38C129F48AE82973), UINT64_C(0x252FA3AAA8C488AF), + UINT64_C(0x031C3D48CEB16ACB), UINT64_C(0x1EF2B716EC9DCB17), + UINT64_C(0x4F7B008C025AAE03), UINT64_C(0x52958AD220760FDF), + UINT64_C(0x74A614304603EDBB), UINT64_C(0x69489E6E642F4C67), + UINT64_C(0xD7B57B059B8D2793), UINT64_C(0xCA5BF15BB9A1864F), + UINT64_C(0xEC686FB9DFD4642B), UINT64_C(0xF186E5E7FDF8C5F7), + UINT64_C(0xA00F527D133FA0E3), UINT64_C(0xBDE1D8233113013F), + UINT64_C(0x9BD246C15766E35B), UINT64_C(0x863CCC9F754A4287), + UINT64_C(0xEC9136AE1CA42CBC), UINT64_C(0xF17FBCF03E888D60), + UINT64_C(0xD74C221258FD6F04), UINT64_C(0xCAA2A84C7AD1CED8), + UINT64_C(0x9B2B1FD69416ABCC), UINT64_C(0x86C59588B63A0A10), + UINT64_C(0xA0F60B6AD04FE874), UINT64_C(0xBD188134F26349A8), + UINT64_C(0x03E5645F0DC1225C), UINT64_C(0x1E0BEE012FED8380), + UINT64_C(0x383870E3499861E4), UINT64_C(0x25D6FABD6BB4C038), + UINT64_C(0x745F4D278573A52C), UINT64_C(0x69B1C779A75F04F0), + UINT64_C(0x4F82599BC12AE694), UINT64_C(0x526CD3C5E3064748), + UINT64_C(0xA0A13C6791602FF9), UINT64_C(0xBD4FB639B34C8E25), + UINT64_C(0x9B7C28DBD5396C41), UINT64_C(0x8692A285F715CD9D), + UINT64_C(0xD71B151F19D2A889), UINT64_C(0xCAF59F413BFE0955), + UINT64_C(0xECC601A35D8BEB31), UINT64_C(0xF1288BFD7FA74AED), + UINT64_C(0x4FD56E9680052119), UINT64_C(0x523BE4C8A22980C5), + UINT64_C(0x74087A2AC45C62A1), UINT64_C(0x69E6F074E670C37D), + UINT64_C(0x386F47EE08B7A669), UINT64_C(0x2581CDB02A9B07B5), + UINT64_C(0x03B253524CEEE5D1), UINT64_C(0x1E5CD90C6EC2440D) + } +}; diff --git a/src/native/external/xz/src/liblzma/check/crc64_tablegen.c b/src/native/external/xz/src/liblzma/check/crc64_tablegen.c new file mode 100644 index 00000000000000..2035127a1123f3 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc64_tablegen.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc64_tablegen.c +/// \brief Generate crc64_table_le.h and crc64_table_be.h +/// +/// Compiling: gcc -std=c99 -o crc64_tablegen crc64_tablegen.c +/// Add -DWORDS_BIGENDIAN to generate big endian table. +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include "../../common/tuklib_integer.h" + + +static uint64_t crc64_table[4][256]; + + +extern void +init_crc64_table(void) +{ + static const uint64_t poly64 = UINT64_C(0xC96C5795D7870F42); + + for (size_t s = 0; s < 4; ++s) { + for (size_t b = 0; b < 256; ++b) { + uint64_t r = s == 0 ? b : crc64_table[s - 1][b]; + + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly64; + else + r >>= 1; + } + + crc64_table[s][b] = r; + } + } + +#ifdef WORDS_BIGENDIAN + for (size_t s = 0; s < 4; ++s) + for (size_t b = 0; b < 256; ++b) + crc64_table[s][b] = byteswap64(crc64_table[s][b]); +#endif + + return; +} + + +static void +print_crc64_table(void) +{ + // Split the SPDX string so that it won't accidentally match + // when tools search for the string. + printf("// SPDX" "-License-Identifier" ": 0BSD\n\n" + "// This file has been generated by crc64_tablegen.c.\n\n" + "const uint64_t lzma_crc64_table[4][256] = {\n\t{"); + + for (size_t s = 0; s < 4; ++s) { + for (size_t b = 0; b < 256; ++b) { + if ((b % 2) == 0) + printf("\n\t\t"); + + printf("UINT64_C(0x%016" PRIX64 ")", + crc64_table[s][b]); + + if (b != 255) + printf(",%s", (b+1) % 2 == 0 ? "" : " "); + } + + if (s == 3) + printf("\n\t}\n};\n"); + else + printf("\n\t}, {"); + } + + return; +} + + +int +main(void) +{ + init_crc64_table(); + print_crc64_table(); + return 0; +} diff --git a/src/native/external/xz/src/liblzma/check/crc64_x86.S b/src/native/external/xz/src/liblzma/check/crc64_x86.S new file mode 100644 index 00000000000000..df50018653b408 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc64_x86.S @@ -0,0 +1,291 @@ +/* SPDX-License-Identifier: 0BSD */ + +/* + * Speed-optimized CRC64 using slicing-by-four algorithm + * + * This uses only i386 instructions, but it is optimized for i686 and later + * (including e.g. Pentium II/III/IV, Athlon XP, and Core 2). + * + * Authors: Igor Pavlov (original CRC32 assembly code) + * Lasse Collin (CRC64 adaptation of the modified CRC32 code) + * + * This code needs lzma_crc64_table, which can be created using the + * following C code: + +uint64_t lzma_crc64_table[4][256]; + +void +init_table(void) +{ + // ECMA-182 + static const uint64_t poly64 = UINT64_C(0xC96C5795D7870F42); + + for (size_t s = 0; s < 4; ++s) { + for (size_t b = 0; b < 256; ++b) { + uint64_t r = s == 0 ? b : lzma_crc64_table[s - 1][b]; + + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly64; + else + r >>= 1; + } + + lzma_crc64_table[s][b] = r; + } + } +} + + * The prototype of the CRC64 function: + * extern uint64_t lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc); + */ + +/* When Intel CET is enabled, include in assembly code to mark + Intel CET support. */ +#ifdef __CET__ +# include +#else +# define _CET_ENDBR +#endif + +/* + * On some systems, the functions need to be prefixed. The prefix is + * usually an underscore. + */ +#ifndef __USER_LABEL_PREFIX__ +# define __USER_LABEL_PREFIX__ +#endif +#define MAKE_SYM_CAT(prefix, sym) prefix ## sym +#define MAKE_SYM(prefix, sym) MAKE_SYM_CAT(prefix, sym) +#define LZMA_CRC64 MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc64_generic) +#define LZMA_CRC64_TABLE MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc64_table) + +/* + * Solaris assembler doesn't have .p2align, and Darwin uses .align + * differently than GNU/Linux and Solaris. + */ +#if defined(__APPLE__) || defined(__MSDOS__) +# define ALIGN(pow2, abs) .align pow2 +#else +# define ALIGN(pow2, abs) .align abs +#endif + + .text + .globl LZMA_CRC64 +#ifdef __ELF__ + .hidden LZMA_CRC64 +#endif + +#if !defined(__APPLE__) && !defined(_WIN32) && !defined(__CYGWIN__) \ + && !defined(__MSDOS__) + .type LZMA_CRC64, @function +#endif + + ALIGN(4, 16) +LZMA_CRC64: + _CET_ENDBR + /* + * Register usage: + * %eax crc LSB + * %edx crc MSB + * %esi buf + * %edi size or buf + size + * %ebx lzma_crc64_table + * %ebp Table index + * %ecx Temporary + */ + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + movl 0x14(%esp), %esi /* buf */ + movl 0x18(%esp), %edi /* size */ + movl 0x1C(%esp), %eax /* crc LSB */ + movl 0x20(%esp), %edx /* crc MSB */ + + /* + * Store the address of lzma_crc64_table to %ebx. This is needed to + * get position-independent code (PIC). + * + * The PIC macro is defined by libtool, while __PIC__ is defined + * by GCC but only on some systems. Testing for both makes it simpler + * to test this code without libtool, and keeps the code working also + * when built with libtool but using something else than GCC. + * + * I understood that libtool may define PIC on Windows even though + * the code in Windows DLLs is not PIC in sense that it is in ELF + * binaries, so we need a separate check to always use the non-PIC + * code on Windows. + */ +#if (!defined(PIC) && !defined(__PIC__)) \ + || (defined(_WIN32) || defined(__CYGWIN__)) + /* Not PIC */ + movl $ LZMA_CRC64_TABLE, %ebx +#elif defined(__APPLE__) + /* Mach-O */ + call .L_get_pc +.L_pic: + leal .L_lzma_crc64_table$non_lazy_ptr-.L_pic(%ebx), %ebx + movl (%ebx), %ebx +#else + /* ELF */ + call .L_get_pc + addl $_GLOBAL_OFFSET_TABLE_, %ebx + movl LZMA_CRC64_TABLE@GOT(%ebx), %ebx +#endif + + /* Complement the initial value. */ + notl %eax + notl %edx + +.L_align: + /* + * Check if there is enough input to use slicing-by-four. + * We need eight bytes, because the loop pre-reads four bytes. + */ + cmpl $8, %edi + jb .L_rest + + /* Check if we have reached alignment of four bytes. */ + testl $3, %esi + jz .L_slice + + /* Calculate CRC of the next input byte. */ + movzbl (%esi), %ebp + incl %esi + movzbl %al, %ecx + xorl %ecx, %ebp + shrdl $8, %edx, %eax + xorl (%ebx, %ebp, 8), %eax + shrl $8, %edx + xorl 4(%ebx, %ebp, 8), %edx + decl %edi + jmp .L_align + +.L_slice: + /* + * If we get here, there's at least eight bytes of aligned input + * available. Make %edi multiple of four bytes. Store the possible + * remainder over the "size" variable in the argument stack. + */ + movl %edi, 0x18(%esp) + andl $-4, %edi + subl %edi, 0x18(%esp) + + /* + * Let %edi be buf + size - 4 while running the main loop. This way + * we can compare for equality to determine when exit the loop. + */ + addl %esi, %edi + subl $4, %edi + + /* Read in the first four aligned bytes. */ + movl (%esi), %ecx + +.L_loop: + xorl %eax, %ecx + movzbl %cl, %ebp + movl 0x1800(%ebx, %ebp, 8), %eax + xorl %edx, %eax + movl 0x1804(%ebx, %ebp, 8), %edx + movzbl %ch, %ebp + xorl 0x1000(%ebx, %ebp, 8), %eax + xorl 0x1004(%ebx, %ebp, 8), %edx + shrl $16, %ecx + movzbl %cl, %ebp + xorl 0x0800(%ebx, %ebp, 8), %eax + xorl 0x0804(%ebx, %ebp, 8), %edx + movzbl %ch, %ebp + addl $4, %esi + xorl (%ebx, %ebp, 8), %eax + xorl 4(%ebx, %ebp, 8), %edx + + /* Check for end of aligned input. */ + cmpl %edi, %esi + + /* + * Copy the next input byte to %ecx. It is slightly faster to + * read it here than at the top of the loop. + */ + movl (%esi), %ecx + jb .L_loop + + /* + * Process the remaining four bytes, which we have already + * copied to %ecx. + */ + xorl %eax, %ecx + movzbl %cl, %ebp + movl 0x1800(%ebx, %ebp, 8), %eax + xorl %edx, %eax + movl 0x1804(%ebx, %ebp, 8), %edx + movzbl %ch, %ebp + xorl 0x1000(%ebx, %ebp, 8), %eax + xorl 0x1004(%ebx, %ebp, 8), %edx + shrl $16, %ecx + movzbl %cl, %ebp + xorl 0x0800(%ebx, %ebp, 8), %eax + xorl 0x0804(%ebx, %ebp, 8), %edx + movzbl %ch, %ebp + addl $4, %esi + xorl (%ebx, %ebp, 8), %eax + xorl 4(%ebx, %ebp, 8), %edx + + /* Copy the number of remaining bytes to %edi. */ + movl 0x18(%esp), %edi + +.L_rest: + /* Check for end of input. */ + testl %edi, %edi + jz .L_return + + /* Calculate CRC of the next input byte. */ + movzbl (%esi), %ebp + incl %esi + movzbl %al, %ecx + xorl %ecx, %ebp + shrdl $8, %edx, %eax + xorl (%ebx, %ebp, 8), %eax + shrl $8, %edx + xorl 4(%ebx, %ebp, 8), %edx + decl %edi + jmp .L_rest + +.L_return: + /* Complement the final value. */ + notl %eax + notl %edx + + popl %ebp + popl %edi + popl %esi + popl %ebx + ret + +#if defined(PIC) || defined(__PIC__) + ALIGN(4, 16) +.L_get_pc: + movl (%esp), %ebx + ret +#endif + +#if defined(__APPLE__) && (defined(PIC) || defined(__PIC__)) + /* Mach-O PIC */ + .section __IMPORT,__pointers,non_lazy_symbol_pointers +.L_lzma_crc64_table$non_lazy_ptr: + .indirect_symbol LZMA_CRC64_TABLE + .long 0 + +#elif !defined(_WIN32) && !defined(__CYGWIN__) && !defined(__MSDOS__) + /* ELF */ + .size LZMA_CRC64, .-LZMA_CRC64 +#endif + +/* + * This is needed to support non-executable stack. It's ugly to + * use __FreeBSD__ and __linux__ here, but I don't know a way to detect when + * we are using GNU assembler. + */ +#if defined(__ELF__) && (defined(__FreeBSD__) || defined(__linux__)) + .section .note.GNU-stack,"",@progbits +#endif diff --git a/src/native/external/xz/src/liblzma/check/crc_clmul_consts_gen.c b/src/native/external/xz/src/liblzma/check/crc_clmul_consts_gen.c new file mode 100644 index 00000000000000..5fe14bd6f042a4 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc_clmul_consts_gen.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc_clmul_consts_gen.c +/// \brief Generate constants for CLMUL CRC code +/// +/// Compiling: gcc -std=c99 -o crc_clmul_consts_gen crc_clmul_consts_gen.c +/// +/// This is for CRCs that use reversed bit order (bit reflection). +/// The same CLMUL CRC code can be used with CRC64 and smaller ones like +/// CRC32 apart from one special case: CRC64 needs an extra step in the +/// Barrett reduction to handle the 65th bit; the smaller ones don't. +/// Otherwise it's enough to just change the polynomial and the derived +/// constants and use the same code. +/// +/// See the Intel white paper "Fast CRC Computation for Generic Polynomials +/// Using PCLMULQDQ Instruction" from 2009. +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include + + +/// CRC32 (Ethernet) polynomial in reversed representation +static const uint64_t p32 = 0xedb88320; + +// CRC64 (ECMA-182) polynomial in reversed representation +static const uint64_t p64 = 0xc96c5795d7870f42; + + +/// Calculates floor(x^128 / p) where p is a CRC64 polynomial in +/// reversed representation. The result is in reversed representation too. +static uint64_t +calc_cldiv(uint64_t p) +{ + // Quotient + uint64_t q = 0; + + // Align the x^64 term with the x^128 (the implied high bits of the + // divisor and the dividend) and do the first step of polynomial long + // division, calculating the first remainder. The variable q remains + // zero because the highest bit of the quotient is an implied bit 1 + // (we kind of set q = 1 << -1). + uint64_t r = p; + + // Then process the remaining 64 terms. Note that r has no implied + // high bit, only q and p do. (And remember that a high bit in the + // polynomial is stored at a low bit in the variable due to the + // reversed bit order.) + for (unsigned i = 0; i < 64; ++i) { + q |= (r & 1) << i; + r = (r >> 1) ^ (r & 1 ? p : 0); + } + + return q; +} + + +/// Calculate the remainder of carryless division: +/// +/// x^(bits + n - 1) % p, where n=64 (for CRC64) +/// +/// p must be in reversed representation which omits the bit of +/// the highest term of the polynomial. Instead, it is an implied bit +/// at kind of like "1 << -1" position, as if it had just been shifted out. +/// +/// The return value is in the reversed bit order. (There are no implied bits.) +static uint64_t +calc_clrem(uint64_t p, unsigned bits) +{ + // Do the first step of polynomial long division. + uint64_t r = p; + + // Then process the remaining terms. Start with i = 1 instead of i = 0 + // to account for the -1 in x^(bits + n - 1). This -1 is convenient + // with the reversed bit order. See the "Bit-Reflection" section in + // the Intel white paper. + for (unsigned i = 1; i < bits; ++i) + r = (r >> 1) ^ (r & 1 ? p : 0); + + return r; +} + + +extern int +main(void) +{ + puts("// CRC64"); + + // The order of the two 64-bit constants in a vector don't matter. + // It feels logical to put them in this order as it matches the + // order in which the input bytes are read. + printf("const __m128i fold512 = _mm_set_epi64x(" + "0x%016" PRIx64 ", 0x%016" PRIx64 ");\n", + calc_clrem(p64, 4 * 128 - 64), + calc_clrem(p64, 4 * 128)); + + printf("const __m128i fold128 = _mm_set_epi64x(" + "0x%016" PRIx64 ", 0x%016" PRIx64 ");\n", + calc_clrem(p64, 128 - 64), + calc_clrem(p64, 128)); + + // When we multiply by mu, we care about the high bits of the result + // (in reversed bit order!). It doesn't matter that the low bit gets + // shifted out because the affected output bits will be ignored. + // Below we add the implied high bit with "| 1" after the shifting + // so that the high bits of the multiplication will be correct. + // + // p64 is shifted left by one so that the final multiplication + // in Barrett reduction won't be misaligned by one bit. We could + // use "(p64 << 1) | 1" instead of "p64 << 1" too but it makes + // no difference as that bit won't affect the relevant output bits + // (we only care about the lowest 64 bits of the result, that is, + // lowest in the reversed bit order). + // + // NOTE: The 65rd bit of p64 gets shifted out. It needs to be + // compensated with 64-bit shift and xor in the CRC64 code. + printf("const __m128i mu_p = _mm_set_epi64x(" + "0x%016" PRIx64 ", 0x%016" PRIx64 ");\n", + (calc_cldiv(p64) << 1) | 1, + p64 << 1); + + puts(""); + + puts("// CRC32"); + + printf("const __m128i fold512 = _mm_set_epi64x(" + "0x%08" PRIx64 ", 0x%08" PRIx64 ");\n", + calc_clrem(p32, 4 * 128 - 64), + calc_clrem(p32, 4 * 128)); + + printf("const __m128i fold128 = _mm_set_epi64x(" + "0x%08" PRIx64 ", 0x%08" PRIx64 ");\n", + calc_clrem(p32, 128 - 64), + calc_clrem(p32, 128)); + + // CRC32 calculation is done by modulus scaling it to a CRC64. + // Since the CRC is in reversed representation, only the mu + // constant changes with the modulus scaling. This method avoids + // one additional constant and one additional clmul in the final + // reduction steps, making the code both simpler and faster. + // + // p32 is shifted left by one so that the final multiplication + // in Barrett reduction won't be misaligned by one bit. We could + // use "(p32 << 1) | 1" instead of "p32 << 1" too but it makes + // no difference as that bit won't affect the relevant output bits. + // + // NOTE: The 33-bit value fits in 64 bits so, unlike with CRC64, + // there is no need to compensate for any missing bits in the code. + printf("const __m128i mu_p = _mm_set_epi64x(" + "0x%016" PRIx64 ", 0x%" PRIx64 ");\n", + (calc_cldiv(p32) << 1) | 1, + p32 << 1); + + return 0; +} diff --git a/src/native/external/xz/src/liblzma/check/crc_common.h b/src/native/external/xz/src/liblzma/check/crc_common.h new file mode 100644 index 00000000000000..4897dfee49ae02 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc_common.h @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc_common.h +/// \brief Macros and declarations for CRC32 and CRC64 +// +// Authors: Lasse Collin +// Ilya Kurdyukov +// Jia Tan +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_CRC_COMMON_H +#define LZMA_CRC_COMMON_H + +#include "common.h" + + +///////////// +// Generic // +///////////// + +#ifdef WORDS_BIGENDIAN +# define A(x) ((x) >> 24) +# define B(x) (((x) >> 16) & 0xFF) +# define C(x) (((x) >> 8) & 0xFF) +# define D(x) ((x) & 0xFF) + +# define S8(x) ((x) << 8) +# define S32(x) ((x) << 32) + +#else +# define A(x) ((x) & 0xFF) +# define B(x) (((x) >> 8) & 0xFF) +# define C(x) (((x) >> 16) & 0xFF) +# define D(x) ((x) >> 24) + +# define S8(x) ((x) >> 8) +# define S32(x) ((x) >> 32) +#endif + + +/// lzma_crc32_table[0] is needed by LZ encoder so we need to keep +/// the array two-dimensional. +#ifdef HAVE_SMALL +lzma_attr_visibility_hidden +extern uint32_t lzma_crc32_table[1][256]; + +extern void lzma_crc32_init(void); + +#else + +lzma_attr_visibility_hidden +extern const uint32_t lzma_crc32_table[8][256]; + +lzma_attr_visibility_hidden +extern const uint64_t lzma_crc64_table[4][256]; +#endif + + +/////////////////// +// Configuration // +/////////////////// + +// NOTE: This config isn't used if HAVE_SMALL is defined! + +// These are defined if the generic slicing-by-n implementations and their +// lookup tables are built. +#undef CRC32_GENERIC +#undef CRC64_GENERIC + +// These are defined if an arch-specific version is built. If both this +// and matching _GENERIC is defined then runtime detection must be used. +#undef CRC32_ARCH_OPTIMIZED +#undef CRC64_ARCH_OPTIMIZED + +// The x86 CLMUL is used for both CRC32 and CRC64. +#undef CRC_X86_CLMUL + +// Many ARM64 processor have CRC32 instructions. +// CRC64 could be done with CLMUL but it's not implemented yet. +#undef CRC32_ARM64 + +// 64-bit LoongArch has CRC32 instructions. +#undef CRC32_LOONGARCH + + +// ARM64 +// +// Keep this in sync with changes to crc32_arm64.h +#if defined(_WIN32) \ + || (defined(HAVE_GETAUXVAL) && defined(HAVE_HWCAP_CRC32)) \ + || defined(HAVE_ELF_AUX_INFO) \ + || (defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME)) +# define CRC_ARM64_RUNTIME_DETECTION 1 +#endif + +// ARM64 CRC32 instruction is only useful for CRC32. Currently, only +// little endian is supported since we were unable to test on a big +// endian machine. +#if defined(HAVE_ARM64_CRC32) && !defined(WORDS_BIGENDIAN) + // Allow ARM64 CRC32 instruction without a runtime check if + // __ARM_FEATURE_CRC32 is defined. GCC and Clang only define + // this if the proper compiler options are used. +# if defined(__ARM_FEATURE_CRC32) +# define CRC32_ARCH_OPTIMIZED 1 +# define CRC32_ARM64 1 +# elif defined(CRC_ARM64_RUNTIME_DETECTION) +# define CRC32_ARCH_OPTIMIZED 1 +# define CRC32_ARM64 1 +# define CRC32_GENERIC 1 +# endif +#endif + + +// LoongArch +// +// Only 64-bit LoongArch is supported for now. No runtime detection +// is needed because the LoongArch specification says that the CRC32 +// instructions are a part of the Basic Integer Instructions and +// they shall be implemented by 64-bit LoongArch implementations. +#ifdef HAVE_LOONGARCH_CRC32 +# define CRC32_ARCH_OPTIMIZED 1 +# define CRC32_LOONGARCH 1 +#endif + + +// x86 and E2K +#if defined(HAVE_USABLE_CLMUL) + // If CLMUL is allowed unconditionally in the compiler options then + // the generic version and the tables can be omitted. Exceptions: + // + // - If 32-bit x86 assembly files are enabled then those are always + // built and runtime detection is used even if compiler flags + // were set to allow CLMUL unconditionally. + // + // - The unconditional use doesn't work with MSVC as I don't know + // how to detect the features here. + // + // Don't enable CLMUL at all on old MSVC that targets 32-bit x86. + // There seems to be a compiler bug that produces broken code + // in optimized (Release) builds. It results in crashing tests. + // It is known that VS 2019 16.11 (MSVC 19.29.30158) is broken + // and that VS 2022 17.13 (MSVC 19.43.34808) works. +# if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 194334808 \ + && !defined(__INTEL_COMPILER) && !defined(__clang__) \ + && defined(_M_IX86) + // Old MSVC targeting 32-bit x86: Don't enable CLMUL at all. +# elif (defined(__SSSE3__) && defined(__SSE4_1__) \ + && defined(__PCLMUL__) \ + && !defined(HAVE_CRC_X86_ASM)) \ + || (defined(__e2k__) && __iset__ >= 6) +# define CRC32_ARCH_OPTIMIZED 1 +# define CRC64_ARCH_OPTIMIZED 1 +# define CRC_X86_CLMUL 1 +# else +# define CRC32_GENERIC 1 +# define CRC64_GENERIC 1 +# define CRC32_ARCH_OPTIMIZED 1 +# define CRC64_ARCH_OPTIMIZED 1 +# define CRC_X86_CLMUL 1 +# endif +#endif + + +// Fallback configuration +// +// For CRC32 use the generic slice-by-eight implementation if no optimized +// version is available. +#if !defined(CRC32_ARCH_OPTIMIZED) && !defined(CRC32_GENERIC) +# define CRC32_GENERIC 1 +#endif + +// For CRC64 use the generic slice-by-four implementation if no optimized +// version is available. +#if !defined(CRC64_ARCH_OPTIMIZED) && !defined(CRC64_GENERIC) +# define CRC64_GENERIC 1 +#endif + +#endif diff --git a/src/native/external/xz/src/liblzma/check/crc_x86_clmul.h b/src/native/external/xz/src/liblzma/check/crc_x86_clmul.h new file mode 100644 index 00000000000000..356536233080e4 --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/crc_x86_clmul.h @@ -0,0 +1,377 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc_x86_clmul.h +/// \brief CRC32 and CRC64 implementations using CLMUL instructions. +/// +/// The CRC32 and CRC64 implementations use 32/64-bit x86 SSSE3, SSE4.1, and +/// CLMUL instructions. This is compatible with Elbrus 2000 (E2K) too. +/// +/// See the Intel white paper "Fast CRC Computation for Generic Polynomials +/// Using PCLMULQDQ Instruction" from 2009. The original file seems to be +/// gone from Intel's website but a version is available here: +/// https://www.researchgate.net/publication/263424619_Fast_CRC_computation +/// (The link was checked on 2024-06-11.) +/// +/// While this file has both CRC32 and CRC64 implementations, only one +/// can be built at a time. The version to build is selected by defining +/// BUILDING_CRC_CLMUL to 32 or 64 before including this file. +/// +/// NOTE: The x86 CLMUL CRC implementation was rewritten for XZ Utils 5.8.0. +// +// Authors: Lasse Collin +// Ilya Kurdyukov +// +/////////////////////////////////////////////////////////////////////////////// + +// This file must not be included more than once. +#ifdef LZMA_CRC_X86_CLMUL_H +# error crc_x86_clmul.h was included twice. +#endif +#define LZMA_CRC_X86_CLMUL_H + +#if BUILDING_CRC_CLMUL != 32 && BUILDING_CRC_CLMUL != 64 +# error BUILDING_CRC_CLMUL is undefined or has an invalid value +#endif + +#include + +#if defined(_MSC_VER) +# include +#elif defined(HAVE_CPUID_H) +# include +#endif + + +// EDG-based compilers (Intel's classic compiler and compiler for E2K) can +// define __GNUC__ but the attribute must not be used with them. +// The new Clang-based ICX needs the attribute. +// +// NOTE: Build systems check for this too, keep them in sync with this. +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__) +# define crc_attr_target \ + __attribute__((__target__("ssse3,sse4.1,pclmul"))) +#else +# define crc_attr_target +#endif + + +// GCC and Clang would produce good code with _mm_set_epi64x +// but MSVC needs _mm_cvtsi64_si128 on x86-64. +#if defined(__i386__) || defined(_M_IX86) +# define my_set_low64(a) _mm_set_epi64x(0, (a)) +#else +# define my_set_low64(a) _mm_cvtsi64_si128(a) +#endif + + +// Align it so that the whole array is within the same cache line. +// More than one unaligned load can be done from this during the +// same CRC function call. +// +// The bytes [0] to [31] are used with AND to clear the low bytes. (With ANDN +// those could be used to clear the high bytes too but it's not needed here.) +// +// The bytes [16] to [47] are for left shifts. +// The bytes [32] to [63] are for right shifts. +alignas(64) +static uint8_t vmasks[64] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +}; + + +// *Unaligned* 128-bit load +crc_attr_target +static inline __m128i +my_load128(const uint8_t *p) +{ + return _mm_loadu_si128((const __m128i *)p); +} + + +// Keep the highest "count" bytes as is and clear the remaining low bytes. +crc_attr_target +static inline __m128i +keep_high_bytes(__m128i v, size_t count) +{ + return _mm_and_si128(my_load128((vmasks + count)), v); +} + + +// Shift the 128-bit value left by "amount" bytes (not bits). +crc_attr_target +static inline __m128i +shift_left(__m128i v, size_t amount) +{ + return _mm_shuffle_epi8(v, my_load128((vmasks + 32 - amount))); +} + + +// Shift the 128-bit value right by "amount" bytes (not bits). +crc_attr_target +static inline __m128i +shift_right(__m128i v, size_t amount) +{ + return _mm_shuffle_epi8(v, my_load128((vmasks + 32 + amount))); +} + + +crc_attr_target +static inline __m128i +fold(__m128i v, __m128i k) +{ + __m128i a = _mm_clmulepi64_si128(v, k, 0x00); + __m128i b = _mm_clmulepi64_si128(v, k, 0x11); + return _mm_xor_si128(a, b); +} + + +crc_attr_target +static inline __m128i +fold_xor(__m128i v, __m128i k, const uint8_t *buf) +{ + return _mm_xor_si128(my_load128(buf), fold(v, k)); +} + + +#if BUILDING_CRC_CLMUL == 32 +crc_attr_target +static uint32_t +crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc) +#else +crc_attr_target +static uint64_t +crc64_arch_optimized(const uint8_t *buf, size_t size, uint64_t crc) +#endif +{ + // We will assume that there is at least one byte of input. + if (size == 0) + return crc; + + // See crc_clmul_consts_gen.c. +#if BUILDING_CRC_CLMUL == 32 + const __m128i fold512 = _mm_set_epi64x(0x1d9513d7, 0x8f352d95); + const __m128i fold128 = _mm_set_epi64x(0xccaa009e, 0xae689191); + const __m128i mu_p = _mm_set_epi64x( + (int64_t)0xb4e5b025f7011641, 0x1db710640); +#else + const __m128i fold512 = _mm_set_epi64x( + (int64_t)0x081f6054a7842df4, (int64_t)0x6ae3efbb9dd441f3); + + const __m128i fold128 = _mm_set_epi64x( + (int64_t)0xdabe95afc7875f40, (int64_t)0xe05dd497ca393ae4); + + const __m128i mu_p = _mm_set_epi64x( + (int64_t)0x9c3e466c172963d5, (int64_t)0x92d8af2baf0e1e84); +#endif + + __m128i v0, v1, v2, v3; + + crc = ~crc; + + if (size < 8) { + uint64_t x = crc; + size_t i = 0; + + // Checking the bit instead of comparing the size means + // that we don't need to update the size between the steps. + if (size & 4) { + x ^= read32le(buf); + buf += 4; + i = 32; + } + + if (size & 2) { + x ^= (uint64_t)read16le(buf) << i; + buf += 2; + i += 16; + } + + if (size & 1) + x ^= (uint64_t)*buf << i; + + v0 = my_set_low64((int64_t)x); + v0 = shift_left(v0, 8 - size); + + } else if (size < 16) { + v0 = my_set_low64((int64_t)(crc ^ read64le(buf))); + + // NOTE: buf is intentionally left 8 bytes behind so that + // we can read the last 1-7 bytes with read64le(buf + size). + size -= 8; + + // Handling 8-byte input specially is a speed optimization + // as the clmul can be skipped. A branch is also needed to + // avoid a too high shift amount. + if (size > 0) { + const size_t padding = 8 - size; + uint64_t high = read64le(buf + size) >> (padding * 8); + +#if defined(__i386__) || defined(_M_IX86) + // Simple but likely not the best code for 32-bit x86. + v0 = _mm_insert_epi32(v0, (int32_t)high, 2); + v0 = _mm_insert_epi32(v0, (int32_t)(high >> 32), 3); +#else + v0 = _mm_insert_epi64(v0, (int64_t)high, 1); +#endif + + v0 = shift_left(v0, padding); + + v1 = _mm_srli_si128(v0, 8); + v0 = _mm_clmulepi64_si128(v0, fold128, 0x10); + v0 = _mm_xor_si128(v0, v1); + } + } else { + v0 = my_set_low64((int64_t)crc); + + // To align or not to align the buf pointer? If the end of + // the buffer isn't aligned, aligning the pointer here would + // make us do an extra folding step with the associated byte + // shuffling overhead. The cost of that would need to be + // lower than the benefit of aligned reads. Testing on an old + // Intel Ivy Bridge processor suggested that aligning isn't + // worth the cost but it likely depends on the processor and + // buffer size. Unaligned loads (MOVDQU) should be fast on + // x86 processors that support PCLMULQDQ, so we don't align + // the buf pointer here. + + // Read the first (and possibly the only) full 16 bytes. + v0 = _mm_xor_si128(v0, my_load128(buf)); + buf += 16; + size -= 16; + + if (size >= 48) { + v1 = my_load128(buf); + v2 = my_load128(buf + 16); + v3 = my_load128(buf + 32); + buf += 48; + size -= 48; + + while (size >= 64) { + v0 = fold_xor(v0, fold512, buf); + v1 = fold_xor(v1, fold512, buf + 16); + v2 = fold_xor(v2, fold512, buf + 32); + v3 = fold_xor(v3, fold512, buf + 48); + buf += 64; + size -= 64; + } + + v0 = _mm_xor_si128(v1, fold(v0, fold128)); + v0 = _mm_xor_si128(v2, fold(v0, fold128)); + v0 = _mm_xor_si128(v3, fold(v0, fold128)); + } + + while (size >= 16) { + v0 = fold_xor(v0, fold128, buf); + buf += 16; + size -= 16; + } + + if (size > 0) { + // We want the last "size" number of input bytes to + // be at the high bits of v1. First do a full 16-byte + // load and then mask the low bytes to zeros. + v1 = my_load128(buf + size - 16); + v1 = keep_high_bytes(v1, size); + + // Shift high bytes from v0 to the low bytes of v1. + // + // Alternatively we could replace the combination + // keep_high_bytes + shift_right + _mm_or_si128 with + // _mm_shuffle_epi8 + _mm_blendv_epi8 but that would + // require larger tables for the masks. Now there are + // three loads (instead of two) from the mask tables + // but they all are from the same cache line. + v1 = _mm_or_si128(v1, shift_right(v0, size)); + + // Shift high bytes of v0 away, padding the + // low bytes with zeros. + v0 = shift_left(v0, 16 - size); + + v0 = _mm_xor_si128(v1, fold(v0, fold128)); + } + + v1 = _mm_srli_si128(v0, 8); + v0 = _mm_clmulepi64_si128(v0, fold128, 0x10); + v0 = _mm_xor_si128(v0, v1); + } + + // Barrett reduction + +#if BUILDING_CRC_CLMUL == 32 + v1 = _mm_clmulepi64_si128(v0, mu_p, 0x10); // v0 * mu + v1 = _mm_clmulepi64_si128(v1, mu_p, 0x00); // v1 * p + v0 = _mm_xor_si128(v0, v1); + return ~(uint32_t)_mm_extract_epi32(v0, 2); +#else + // Because p is 65 bits but one bit doesn't fit into the 64-bit + // half of __m128i, finish the second clmul by shifting v1 left + // by 64 bits and xorring it to the final result. + v1 = _mm_clmulepi64_si128(v0, mu_p, 0x10); // v0 * mu + v2 = _mm_slli_si128(v1, 8); + v1 = _mm_clmulepi64_si128(v1, mu_p, 0x00); // v1 * p + v0 = _mm_xor_si128(v0, v2); + v0 = _mm_xor_si128(v0, v1); +#if defined(__i386__) || defined(_M_IX86) + return ~(((uint64_t)(uint32_t)_mm_extract_epi32(v0, 3) << 32) | + (uint64_t)(uint32_t)_mm_extract_epi32(v0, 2)); +#else + return ~(uint64_t)_mm_extract_epi64(v0, 1); +#endif +#endif +} + + +// Even though this is an inline function, compile it only when needed. +// This way it won't appear in E2K builds at all. +#if defined(CRC32_GENERIC) || defined(CRC64_GENERIC) +// Inlining this function duplicates the function body in crc32_resolve() and +// crc64_resolve(), but this is acceptable because this is a tiny function. +static inline bool +is_arch_extension_supported(void) +{ + int success = 1; + uint32_t r[4]; // eax, ebx, ecx, edx + +#if defined(_MSC_VER) + // This needs with MSVC. ICC has it as a built-in + // on all platforms. + __cpuid((int *)r, 1); +#elif defined(HAVE_CPUID_H) + // Compared to just using __asm__ to run CPUID, this also checks + // that CPUID is supported and saves and restores ebx as that is + // needed with GCC < 5 with position-independent code (PIC). + success = __get_cpuid(1, &r[0], &r[1], &r[2], &r[3]); +#else + // Just a fallback that shouldn't be needed. + __asm__("cpuid\n\t" + : "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3]) + : "a"(1), "c"(0)); +#endif + + // Returns true if these are supported: + // CLMUL (bit 1 in ecx) + // SSSE3 (bit 9 in ecx) + // SSE4.1 (bit 19 in ecx) + const uint32_t ecx_mask = (1 << 1) | (1 << 9) | (1 << 19); + return success && (r[2] & ecx_mask) == ecx_mask; + + // Alternative methods that weren't used: + // - ICC's _may_i_use_cpu_feature: the other methods should work too. + // - GCC >= 6 / Clang / ICX __builtin_cpu_supports("pclmul") + // + // CPUID decoding is needed with MSVC anyway and older GCC. This keeps + // the feature checks in the build system simpler too. The nice thing + // about __builtin_cpu_supports would be that it generates very short + // code as is it only reads a variable set at startup but a few bytes + // doesn't matter here. +} +#endif diff --git a/src/native/external/xz/src/liblzma/check/sha256.c b/src/native/external/xz/src/liblzma/check/sha256.c new file mode 100644 index 00000000000000..c067a3a693fa4a --- /dev/null +++ b/src/native/external/xz/src/liblzma/check/sha256.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sha256.c +/// \brief SHA-256 +// +// The C code is based on the public domain SHA-256 code found from +// Crypto++ Library 5.5.1 released in 2007: https://www.cryptopp.com/ +// A few minor tweaks have been made in liblzma. +// +// Authors: Wei Dai +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" + +// Rotate a uint32_t. GCC can optimize this to a rotate instruction +// at least on x86. +static inline uint32_t +rotr_32(uint32_t num, unsigned amount) +{ + return (num >> amount) | (num << (32 - amount)); +} + +#define blk0(i) (W[i] = conv32be(data[i])) +#define blk2(i) (W[i & 15] += s1(W[(i - 2) & 15]) + W[(i - 7) & 15] \ + + s0(W[(i - 15) & 15])) + +#define Ch(x, y, z) (z ^ (x & (y ^ z))) +#define Maj(x, y, z) ((x & (y ^ z)) + (y & z)) + +#define a(i) T[(0 - i) & 7] +#define b(i) T[(1 - i) & 7] +#define c(i) T[(2 - i) & 7] +#define d(i) T[(3 - i) & 7] +#define e(i) T[(4 - i) & 7] +#define f(i) T[(5 - i) & 7] +#define g(i) T[(6 - i) & 7] +#define h(i) T[(7 - i) & 7] + +#define R(i, j, blk) \ + h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] + blk; \ + d(i) += h(i); \ + h(i) += S0(a(i)) + Maj(a(i), b(i), c(i)) +#define R0(i) R(i, 0, blk0(i)) +#define R2(i) R(i, j, blk2(i)) + +#define S0(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 9), 11), 2) +#define S1(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 14), 5), 6) +#define s0(x) (rotr_32(x ^ rotr_32(x, 11), 7) ^ (x >> 3)) +#define s1(x) (rotr_32(x ^ rotr_32(x, 2), 17) ^ (x >> 10)) + + +static const uint32_t SHA256_K[64] = { + 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, + 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, + 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, + 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, + 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, + 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, + 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, + 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, + 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, + 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, + 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, + 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, + 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, + 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, + 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, + 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2, +}; + + +static void +transform(uint32_t state[8], const uint32_t data[16]) +{ + uint32_t W[16]; + uint32_t T[8]; + + // Copy state[] to working vars. + memcpy(T, state, sizeof(T)); + + // The first 16 operations unrolled + R0( 0); R0( 1); R0( 2); R0( 3); + R0( 4); R0( 5); R0( 6); R0( 7); + R0( 8); R0( 9); R0(10); R0(11); + R0(12); R0(13); R0(14); R0(15); + + // The remaining 48 operations partially unrolled + for (unsigned int j = 16; j < 64; j += 16) { + R2( 0); R2( 1); R2( 2); R2( 3); + R2( 4); R2( 5); R2( 6); R2( 7); + R2( 8); R2( 9); R2(10); R2(11); + R2(12); R2(13); R2(14); R2(15); + } + + // Add the working vars back into state[]. + state[0] += a(0); + state[1] += b(0); + state[2] += c(0); + state[3] += d(0); + state[4] += e(0); + state[5] += f(0); + state[6] += g(0); + state[7] += h(0); +} + + +static void +process(lzma_check_state *check) +{ + transform(check->state.sha256.state, check->buffer.u32); + return; +} + + +extern void +lzma_sha256_init(lzma_check_state *check) +{ + static const uint32_t s[8] = { + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, + 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19, + }; + + memcpy(check->state.sha256.state, s, sizeof(s)); + check->state.sha256.size = 0; + + return; +} + + +extern void +lzma_sha256_update(const uint8_t *buf, size_t size, lzma_check_state *check) +{ + // Copy the input data into a properly aligned temporary buffer. + // This way we can be called with arbitrarily sized buffers + // (no need to be multiple of 64 bytes), and the code works also + // on architectures that don't allow unaligned memory access. + while (size > 0) { + const size_t copy_start = check->state.sha256.size & 0x3F; + size_t copy_size = 64 - copy_start; + if (copy_size > size) + copy_size = size; + + memcpy(check->buffer.u8 + copy_start, buf, copy_size); + + buf += copy_size; + size -= copy_size; + check->state.sha256.size += copy_size; + + if ((check->state.sha256.size & 0x3F) == 0) + process(check); + } + + return; +} + + +extern void +lzma_sha256_finish(lzma_check_state *check) +{ + // Add padding as described in RFC 3174 (it describes SHA-1 but + // the same padding style is used for SHA-256 too). + size_t pos = check->state.sha256.size & 0x3F; + check->buffer.u8[pos++] = 0x80; + + while (pos != 64 - 8) { + if (pos == 64) { + process(check); + pos = 0; + } + + check->buffer.u8[pos++] = 0x00; + } + + // Convert the message size from bytes to bits. + check->state.sha256.size *= 8; + + check->buffer.u64[(64 - 8) / 8] = conv64be(check->state.sha256.size); + + process(check); + + for (size_t i = 0; i < 8; ++i) + check->buffer.u32[i] = conv32be(check->state.sha256.state[i]); + + return; +} diff --git a/src/native/external/xz/src/liblzma/common/Makefile.inc b/src/native/external/xz/src/liblzma/common/Makefile.inc new file mode 100644 index 00000000000000..51b1aae86e298e --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/Makefile.inc @@ -0,0 +1,100 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +liblzma_la_SOURCES += \ + common/common.c \ + common/common.h \ + common/memcmplen.h \ + common/block_util.c \ + common/easy_preset.c \ + common/easy_preset.h \ + common/filter_common.c \ + common/filter_common.h \ + common/hardware_physmem.c \ + common/index.c \ + common/index.h \ + common/stream_flags_common.c \ + common/stream_flags_common.h \ + common/string_conversion.c \ + common/vli_size.c + +if COND_THREADS +liblzma_la_SOURCES += \ + common/hardware_cputhreads.c \ + common/outqueue.c \ + common/outqueue.h +endif + +if COND_MAIN_ENCODER +liblzma_la_SOURCES += \ + common/alone_encoder.c \ + common/block_buffer_encoder.c \ + common/block_buffer_encoder.h \ + common/block_encoder.c \ + common/block_encoder.h \ + common/block_header_encoder.c \ + common/easy_buffer_encoder.c \ + common/easy_encoder.c \ + common/easy_encoder_memusage.c \ + common/filter_buffer_encoder.c \ + common/filter_encoder.c \ + common/filter_encoder.h \ + common/filter_flags_encoder.c \ + common/index_encoder.c \ + common/index_encoder.h \ + common/stream_buffer_encoder.c \ + common/stream_encoder.c \ + common/stream_flags_encoder.c \ + common/vli_encoder.c + +if COND_THREADS +liblzma_la_SOURCES += \ + common/stream_encoder_mt.c +endif + +if COND_MICROLZMA +liblzma_la_SOURCES += \ + common/microlzma_encoder.c +endif +endif + +if COND_MAIN_DECODER +liblzma_la_SOURCES += \ + common/alone_decoder.c \ + common/alone_decoder.h \ + common/auto_decoder.c \ + common/block_buffer_decoder.c \ + common/block_decoder.c \ + common/block_decoder.h \ + common/block_header_decoder.c \ + common/easy_decoder_memusage.c \ + common/file_info.c \ + common/filter_buffer_decoder.c \ + common/filter_decoder.c \ + common/filter_decoder.h \ + common/filter_flags_decoder.c \ + common/index_decoder.c \ + common/index_decoder.h \ + common/index_hash.c \ + common/stream_buffer_decoder.c \ + common/stream_decoder.c \ + common/stream_decoder.h \ + common/stream_flags_decoder.c \ + common/vli_decoder.c + +if COND_THREADS +liblzma_la_SOURCES += \ + common/stream_decoder_mt.c +endif + +if COND_MICROLZMA +liblzma_la_SOURCES += \ + common/microlzma_decoder.c +endif + +if COND_LZIP_DECODER +liblzma_la_SOURCES += \ + common/lzip_decoder.c \ + common/lzip_decoder.h +endif +endif diff --git a/src/native/external/xz/src/liblzma/common/alone_decoder.c b/src/native/external/xz/src/liblzma/common/alone_decoder.c new file mode 100644 index 00000000000000..8ebbbe8aa70fd1 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/alone_decoder.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_decoder.c +/// \brief Decoder for LZMA_Alone files +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "alone_decoder.h" +#include "lzma_decoder.h" +#include "lz_decoder.h" + + +typedef struct { + lzma_next_coder next; + + enum { + SEQ_PROPERTIES, + SEQ_DICTIONARY_SIZE, + SEQ_UNCOMPRESSED_SIZE, + SEQ_CODER_INIT, + SEQ_CODE, + } sequence; + + /// If true, reject files that are unlikely to be .lzma files. + /// If false, more non-.lzma files get accepted and will give + /// LZMA_DATA_ERROR either immediately or after a few output bytes. + bool picky; + + /// Position in the header fields + size_t pos; + + /// Uncompressed size decoded from the header + lzma_vli uncompressed_size; + + /// Memory usage limit + uint64_t memlimit; + + /// Amount of memory actually needed (only an estimate) + uint64_t memusage; + + /// Options decoded from the header needed to initialize + /// the LZMA decoder + lzma_options_lzma options; +} lzma_alone_coder; + + +static lzma_ret +alone_decode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + lzma_alone_coder *coder = coder_ptr; + + while (*out_pos < out_size + && (coder->sequence == SEQ_CODE || *in_pos < in_size)) + switch (coder->sequence) { + case SEQ_PROPERTIES: + if (lzma_lzma_lclppb_decode(&coder->options, in[*in_pos])) + return LZMA_FORMAT_ERROR; + + coder->sequence = SEQ_DICTIONARY_SIZE; + ++*in_pos; + break; + + case SEQ_DICTIONARY_SIZE: + coder->options.dict_size + |= (size_t)(in[*in_pos]) << (coder->pos * 8); + + if (++coder->pos == 4) { + if (coder->picky && coder->options.dict_size + != UINT32_MAX) { + // A hack to ditch tons of false positives: + // We allow only dictionary sizes that are + // 2^n or 2^n + 2^(n-1). LZMA_Alone created + // only files with 2^n, but accepts any + // dictionary size. + uint32_t d = coder->options.dict_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + + if (d != coder->options.dict_size) + return LZMA_FORMAT_ERROR; + } + + coder->pos = 0; + coder->sequence = SEQ_UNCOMPRESSED_SIZE; + } + + ++*in_pos; + break; + + case SEQ_UNCOMPRESSED_SIZE: + coder->uncompressed_size + |= (lzma_vli)(in[*in_pos]) << (coder->pos * 8); + ++*in_pos; + if (++coder->pos < 8) + break; + + // Another hack to ditch false positives: Assume that + // if the uncompressed size is known, it must be less + // than 256 GiB. + // + // FIXME? Without picky we allow > LZMA_VLI_MAX which doesn't + // really matter in this specific situation (> LZMA_VLI_MAX is + // safe in the LZMA decoder) but it's somewhat weird still. + if (coder->picky + && coder->uncompressed_size != LZMA_VLI_UNKNOWN + && coder->uncompressed_size + >= (LZMA_VLI_C(1) << 38)) + return LZMA_FORMAT_ERROR; + + // Use LZMA_FILTER_LZMA1EXT features to specify the + // uncompressed size and that the end marker is allowed + // even when the uncompressed size is known. Both .lzma + // header and LZMA1EXT use UINT64_MAX indicate that size + // is unknown. + coder->options.ext_flags = LZMA_LZMA1EXT_ALLOW_EOPM; + lzma_set_ext_size(coder->options, coder->uncompressed_size); + + // Calculate the memory usage so that it is ready + // for SEQ_CODER_INIT. We know that lc/lp/pb are valid + // so we can use the _nocheck variant. + coder->memusage + = lzma_lzma_decoder_memusage_nocheck(&coder->options) + + LZMA_MEMUSAGE_BASE; + + coder->pos = 0; + coder->sequence = SEQ_CODER_INIT; + FALLTHROUGH; + + case SEQ_CODER_INIT: { + if (coder->memusage > coder->memlimit) + return LZMA_MEMLIMIT_ERROR; + + lzma_filter_info filters[2] = { + { + .id = LZMA_FILTER_LZMA1EXT, + .init = &lzma_lzma_decoder_init, + .options = &coder->options, + }, { + .init = NULL, + } + }; + + return_if_error(lzma_next_filter_init(&coder->next, + allocator, filters)); + + coder->sequence = SEQ_CODE; + break; + } + + case SEQ_CODE: { + return coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + } + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +alone_decoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_alone_coder *coder = coder_ptr; + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +alone_decoder_memconfig(void *coder_ptr, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + lzma_alone_coder *coder = coder_ptr; + + *memusage = coder->memusage; + *old_memlimit = coder->memlimit; + + if (new_memlimit != 0) { + if (new_memlimit < coder->memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +extern lzma_ret +lzma_alone_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + uint64_t memlimit, bool picky) +{ + lzma_next_coder_init(&lzma_alone_decoder_init, next, allocator); + + lzma_alone_coder *coder = next->coder; + + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_alone_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &alone_decode; + next->end = &alone_decoder_end; + next->memconfig = &alone_decoder_memconfig; + coder->next = LZMA_NEXT_CODER_INIT; + } + + coder->sequence = SEQ_PROPERTIES; + coder->picky = picky; + coder->pos = 0; + coder->options.dict_size = 0; + coder->options.preset_dict = NULL; + coder->options.preset_dict_size = 0; + coder->uncompressed_size = 0; + coder->memlimit = my_max(1, memlimit); + coder->memusage = LZMA_MEMUSAGE_BASE; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_alone_decoder(lzma_stream *strm, uint64_t memlimit) +{ + lzma_next_strm_init(lzma_alone_decoder_init, strm, memlimit, false); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/alone_decoder.h b/src/native/external/xz/src/liblzma/common/alone_decoder.h new file mode 100644 index 00000000000000..61ee24d97fe4a1 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/alone_decoder.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_decoder.h +/// \brief Decoder for LZMA_Alone files +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_ALONE_DECODER_H +#define LZMA_ALONE_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_alone_decoder_init( + lzma_next_coder *next, const lzma_allocator *allocator, + uint64_t memlimit, bool picky); + +#endif diff --git a/src/native/external/xz/src/liblzma/common/alone_encoder.c b/src/native/external/xz/src/liblzma/common/alone_encoder.c new file mode 100644 index 00000000000000..21b039509ae5b6 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/alone_encoder.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_encoder.c +/// \brief Encoder for LZMA_Alone files +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "lzma_encoder.h" + + +#define ALONE_HEADER_SIZE (1 + 4 + 8) + + +typedef struct { + lzma_next_coder next; + + enum { + SEQ_HEADER, + SEQ_CODE, + } sequence; + + size_t header_pos; + uint8_t header[ALONE_HEADER_SIZE]; +} lzma_alone_coder; + + +static lzma_ret +alone_encode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + lzma_alone_coder *coder = coder_ptr; + + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_HEADER: + lzma_bufcpy(coder->header, &coder->header_pos, + ALONE_HEADER_SIZE, + out, out_pos, out_size); + if (coder->header_pos < ALONE_HEADER_SIZE) + return LZMA_OK; + + coder->sequence = SEQ_CODE; + break; + + case SEQ_CODE: + return coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +alone_encoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_alone_coder *coder = coder_ptr; + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +alone_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_options_lzma *options) +{ + lzma_next_coder_init(&alone_encoder_init, next, allocator); + + lzma_alone_coder *coder = next->coder; + + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_alone_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &alone_encode; + next->end = &alone_encoder_end; + coder->next = LZMA_NEXT_CODER_INIT; + } + + // Basic initializations + coder->sequence = SEQ_HEADER; + coder->header_pos = 0; + + // Encode the header: + // - Properties (1 byte) + if (lzma_lzma_lclppb_encode(options, coder->header)) + return LZMA_OPTIONS_ERROR; + + // - Dictionary size (4 bytes) + if (options->dict_size < LZMA_DICT_SIZE_MIN) + return LZMA_OPTIONS_ERROR; + + // Round up to the next 2^n or 2^n + 2^(n - 1) depending on which + // one is the next unless it is UINT32_MAX. While the header would + // allow any 32-bit integer, we do this to keep the decoder of liblzma + // accepting the resulting files. + uint32_t d = options->dict_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + if (d != UINT32_MAX) + ++d; + + write32le(coder->header + 1, d); + + // - Uncompressed size (always unknown and using EOPM) + memset(coder->header + 1 + 4, 0xFF, 8); + + // Initialize the LZMA encoder. + const lzma_filter_info filters[2] = { + { + .id = LZMA_FILTER_LZMA1, + .init = &lzma_lzma_encoder_init, + .options = (void *)(options), + }, { + .init = NULL, + } + }; + + return lzma_next_filter_init(&coder->next, allocator, filters); +} + + +extern LZMA_API(lzma_ret) +lzma_alone_encoder(lzma_stream *strm, const lzma_options_lzma *options) +{ + lzma_next_strm_init(alone_encoder_init, strm, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/auto_decoder.c b/src/native/external/xz/src/liblzma/common/auto_decoder.c new file mode 100644 index 00000000000000..da49345f909de1 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/auto_decoder.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file auto_decoder.c +/// \brief Autodetect between .xz, .lzma (LZMA_Alone), and .lz (lzip) +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_decoder.h" +#include "alone_decoder.h" +#ifdef HAVE_LZIP_DECODER +# include "lzip_decoder.h" +#endif + + +typedef struct { + /// .xz Stream decoder, LZMA_Alone decoder, or lzip decoder + lzma_next_coder next; + + uint64_t memlimit; + uint32_t flags; + + enum { + SEQ_INIT, + SEQ_CODE, + SEQ_FINISH, + } sequence; +} lzma_auto_coder; + + +static lzma_ret +auto_decode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_auto_coder *coder = coder_ptr; + + switch (coder->sequence) { + case SEQ_INIT: + if (*in_pos >= in_size) + return LZMA_OK; + + // Update the sequence now, because we want to continue from + // SEQ_CODE even if we return some LZMA_*_CHECK. + coder->sequence = SEQ_CODE; + + // Detect the file format. .xz files start with 0xFD which + // cannot be the first byte of .lzma (LZMA_Alone) format. + // The .lz format starts with 0x4C which could be the + // first byte of a .lzma file but luckily it would mean + // lc/lp/pb being 4/3/1 which liblzma doesn't support because + // lc + lp > 4. So using just 0x4C to detect .lz is OK here. + if (in[*in_pos] == 0xFD) { + return_if_error(lzma_stream_decoder_init( + &coder->next, allocator, + coder->memlimit, coder->flags)); +#ifdef HAVE_LZIP_DECODER + } else if (in[*in_pos] == 0x4C) { + return_if_error(lzma_lzip_decoder_init( + &coder->next, allocator, + coder->memlimit, coder->flags)); +#endif + } else { + return_if_error(lzma_alone_decoder_init(&coder->next, + allocator, coder->memlimit, true)); + + // If the application wants to know about missing + // integrity check or about the check in general, we + // need to handle it here, because LZMA_Alone decoder + // doesn't accept any flags. + if (coder->flags & LZMA_TELL_NO_CHECK) + return LZMA_NO_CHECK; + + if (coder->flags & LZMA_TELL_ANY_CHECK) + return LZMA_GET_CHECK; + } + + FALLTHROUGH; + + case SEQ_CODE: { + const lzma_ret ret = coder->next.code( + coder->next.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + if (ret != LZMA_STREAM_END + || (coder->flags & LZMA_CONCATENATED) == 0) + return ret; + + coder->sequence = SEQ_FINISH; + FALLTHROUGH; + } + + case SEQ_FINISH: + // When LZMA_CONCATENATED was used and we were decoding + // a LZMA_Alone file, we need to check that there is no + // trailing garbage and wait for LZMA_FINISH. + if (*in_pos < in_size) + return LZMA_DATA_ERROR; + + return action == LZMA_FINISH ? LZMA_STREAM_END : LZMA_OK; + + default: + assert(0); + return LZMA_PROG_ERROR; + } +} + + +static void +auto_decoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_auto_coder *coder = coder_ptr; + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_check +auto_decoder_get_check(const void *coder_ptr) +{ + const lzma_auto_coder *coder = coder_ptr; + + // It is LZMA_Alone if get_check is NULL. + return coder->next.get_check == NULL ? LZMA_CHECK_NONE + : coder->next.get_check(coder->next.coder); +} + + +static lzma_ret +auto_decoder_memconfig(void *coder_ptr, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + lzma_auto_coder *coder = coder_ptr; + + lzma_ret ret; + + if (coder->next.memconfig != NULL) { + ret = coder->next.memconfig(coder->next.coder, + memusage, old_memlimit, new_memlimit); + assert(*old_memlimit == coder->memlimit); + } else { + // No coder is configured yet. Use the base value as + // the current memory usage. + *memusage = LZMA_MEMUSAGE_BASE; + *old_memlimit = coder->memlimit; + + ret = LZMA_OK; + if (new_memlimit != 0 && new_memlimit < *memusage) + ret = LZMA_MEMLIMIT_ERROR; + } + + if (ret == LZMA_OK && new_memlimit != 0) + coder->memlimit = new_memlimit; + + return ret; +} + + +static lzma_ret +auto_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + uint64_t memlimit, uint32_t flags) +{ + lzma_next_coder_init(&auto_decoder_init, next, allocator); + + if (flags & ~LZMA_SUPPORTED_FLAGS) + return LZMA_OPTIONS_ERROR; + + lzma_auto_coder *coder = next->coder; + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_auto_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &auto_decode; + next->end = &auto_decoder_end; + next->get_check = &auto_decoder_get_check; + next->memconfig = &auto_decoder_memconfig; + coder->next = LZMA_NEXT_CODER_INIT; + } + + coder->memlimit = my_max(1, memlimit); + coder->flags = flags; + coder->sequence = SEQ_INIT; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_auto_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) +{ + lzma_next_strm_init(auto_decoder_init, strm, memlimit, flags); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/block_buffer_decoder.c b/src/native/external/xz/src/liblzma/common/block_buffer_decoder.c new file mode 100644 index 00000000000000..55566cd2f2b092 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/block_buffer_decoder.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_buffer_decoder.c +/// \brief Single-call .xz Block decoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_block_buffer_decode(lzma_block *block, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + if (in_pos == NULL || (in == NULL && *in_pos != in_size) + || *in_pos > in_size || out_pos == NULL + || (out == NULL && *out_pos != out_size) + || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Initialize the Block decoder. + lzma_next_coder block_decoder = LZMA_NEXT_CODER_INIT; + lzma_ret ret = lzma_block_decoder_init( + &block_decoder, allocator, block); + + if (ret == LZMA_OK) { + // Save the positions so that we can restore them in case + // an error occurs. + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + // Do the actual decoding. + ret = block_decoder.code(block_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + LZMA_FINISH); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + if (ret == LZMA_OK) { + // Either the input was truncated or the + // output buffer was too small. + assert(*in_pos == in_size + || *out_pos == out_size); + + // If all the input was consumed, then the + // input is truncated, even if the output + // buffer is also full. This is because + // processing the last byte of the Block + // never produces output. + // + // NOTE: This assumption may break when new + // filters are added, if the end marker of + // the filter doesn't consume at least one + // complete byte. + if (*in_pos == in_size) + ret = LZMA_DATA_ERROR; + else + ret = LZMA_BUF_ERROR; + } + + // Restore the positions. + *in_pos = in_start; + *out_pos = out_start; + } + } + + // Free the decoder memory. This needs to be done even if + // initialization fails, because the internal API doesn't + // require the initialization function to free its memory on error. + lzma_next_end(&block_decoder, allocator); + + return ret; +} diff --git a/src/native/external/xz/src/liblzma/common/block_buffer_encoder.c b/src/native/external/xz/src/liblzma/common/block_buffer_encoder.c new file mode 100644 index 00000000000000..2219593be1f4df --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/block_buffer_encoder.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_buffer_encoder.c +/// \brief Single-call .xz Block encoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_buffer_encoder.h" +#include "block_encoder.h" +#include "filter_encoder.h" +#include "lzma2_encoder.h" +#include "check.h" + + +/// Estimate the maximum size of the Block Header and Check fields for +/// a Block that uses LZMA2 uncompressed chunks. We could use +/// lzma_block_header_size() but this is simpler. +/// +/// Block Header Size + Block Flags + Compressed Size +/// + Uncompressed Size + Filter Flags for LZMA2 + CRC32 + Check +/// and round up to the next multiple of four to take Header Padding +/// into account. +#define HEADERS_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 3 + 4 \ + + LZMA_CHECK_SIZE_MAX + 3) & ~3) + + +static uint64_t +lzma2_bound(uint64_t uncompressed_size) +{ + // Prevent integer overflow in overhead calculation. + if (uncompressed_size > COMPRESSED_SIZE_MAX) + return 0; + + // Calculate the exact overhead of the LZMA2 headers: Round + // uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX, + // multiply by the size of per-chunk header, and add one byte for + // the end marker. + const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1) + / LZMA2_CHUNK_MAX) + * LZMA2_HEADER_UNCOMPRESSED + 1; + + // Catch the possible integer overflow. + if (COMPRESSED_SIZE_MAX - overhead < uncompressed_size) + return 0; + + return uncompressed_size + overhead; +} + + +extern uint64_t +lzma_block_buffer_bound64(uint64_t uncompressed_size) +{ + // If the data doesn't compress, we always use uncompressed + // LZMA2 chunks. + uint64_t lzma2_size = lzma2_bound(uncompressed_size); + if (lzma2_size == 0) + return 0; + + // Take Block Padding into account. + lzma2_size = (lzma2_size + 3) & ~UINT64_C(3); + + // No risk of integer overflow because lzma2_bound() already takes + // into account the size of the headers in the Block. + return HEADERS_BOUND + lzma2_size; +} + + +extern LZMA_API(size_t) +lzma_block_buffer_bound(size_t uncompressed_size) +{ + uint64_t ret = lzma_block_buffer_bound64(uncompressed_size); + +#if SIZE_MAX < UINT64_MAX + // Catch the possible integer overflow on 32-bit systems. + if (ret > SIZE_MAX) + return 0; +#endif + + return (size_t)ret; +} + + +static lzma_ret +block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at + // all, but LZMA2 always requires a dictionary, so use the minimum + // value to minimize memory usage of the decoder. + lzma_options_lzma lzma2 = { + .dict_size = LZMA_DICT_SIZE_MIN, + }; + + lzma_filter filters[2]; + filters[0].id = LZMA_FILTER_LZMA2; + filters[0].options = &lzma2; + filters[1].id = LZMA_VLI_UNKNOWN; + + // Set the above filter options to *block temporarily so that we can + // encode the Block Header. + lzma_filter *filters_orig = block->filters; + block->filters = filters; + + if (lzma_block_header_size(block) != LZMA_OK) { + block->filters = filters_orig; + return LZMA_PROG_ERROR; + } + + // Check that there's enough output space. The caller has already + // set block->compressed_size to what lzma2_bound() has returned, + // so we can reuse that value. We know that compressed_size is a + // known valid VLI and header_size is a small value so their sum + // will never overflow. + assert(block->compressed_size == lzma2_bound(in_size)); + if (out_size - *out_pos + < block->header_size + block->compressed_size) { + block->filters = filters_orig; + return LZMA_BUF_ERROR; + } + + if (lzma_block_header_encode(block, out + *out_pos) != LZMA_OK) { + block->filters = filters_orig; + return LZMA_PROG_ERROR; + } + + block->filters = filters_orig; + *out_pos += block->header_size; + + // Encode the data using LZMA2 uncompressed chunks. + size_t in_pos = 0; + uint8_t control = 0x01; // Dictionary reset + + while (in_pos < in_size) { + // Control byte: Indicate uncompressed chunk, of which + // the first resets the dictionary. + out[(*out_pos)++] = control; + control = 0x02; // No dictionary reset + + // Size of the uncompressed chunk + const size_t copy_size + = my_min(in_size - in_pos, LZMA2_CHUNK_MAX); + out[(*out_pos)++] = (uint8_t)((copy_size - 1) >> 8); + out[(*out_pos)++] = (copy_size - 1) & 0xFF; + + // The actual data + assert(*out_pos + copy_size <= out_size); + memcpy(out + *out_pos, in + in_pos, copy_size); + + in_pos += copy_size; + *out_pos += copy_size; + } + + // End marker + out[(*out_pos)++] = 0x00; + assert(*out_pos <= out_size); + + return LZMA_OK; +} + + +static lzma_ret +block_encode_normal(lzma_block *block, const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Find out the size of the Block Header. + return_if_error(lzma_block_header_size(block)); + + // Reserve space for the Block Header and skip it for now. + if (out_size - *out_pos <= block->header_size) + return LZMA_BUF_ERROR; + + const size_t out_start = *out_pos; + *out_pos += block->header_size; + + // Limit out_size so that we stop encoding if the output would grow + // bigger than what uncompressed Block would be. + if (out_size - *out_pos > block->compressed_size) + out_size = *out_pos + (size_t)block->compressed_size; + + // TODO: In many common cases this could be optimized to use + // significantly less memory. + lzma_next_coder raw_encoder = LZMA_NEXT_CODER_INIT; + lzma_ret ret = lzma_raw_encoder_init( + &raw_encoder, allocator, block->filters); + + if (ret == LZMA_OK) { + size_t in_pos = 0; + ret = raw_encoder.code(raw_encoder.coder, allocator, + in, &in_pos, in_size, out, out_pos, out_size, + LZMA_FINISH); + } + + // NOTE: This needs to be run even if lzma_raw_encoder_init() failed. + lzma_next_end(&raw_encoder, allocator); + + if (ret == LZMA_STREAM_END) { + // Compression was successful. Write the Block Header. + block->compressed_size + = *out_pos - (out_start + block->header_size); + ret = lzma_block_header_encode(block, out + out_start); + if (ret != LZMA_OK) + ret = LZMA_PROG_ERROR; + + } else if (ret == LZMA_OK) { + // Output buffer became full. + ret = LZMA_BUF_ERROR; + } + + // Reset *out_pos if something went wrong. + if (ret != LZMA_OK) + *out_pos = out_start; + + return ret; +} + + +static lzma_ret +block_buffer_encode(lzma_block *block, const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size, + bool try_to_compress) +{ + // Validate the arguments. + if (block == NULL || (in == NULL && in_size != 0) || out == NULL + || out_pos == NULL || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // The contents of the structure may depend on the version so + // check the version before validating the contents of *block. + if (block->version > 1) + return LZMA_OPTIONS_ERROR; + + if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX + || (try_to_compress && block->filters == NULL)) + return LZMA_PROG_ERROR; + + if (!lzma_check_is_supported(block->check)) + return LZMA_UNSUPPORTED_CHECK; + + // Size of a Block has to be a multiple of four, so limit the size + // here already. This way we don't need to check it again when adding + // Block Padding. + out_size -= (out_size - *out_pos) & 3; + + // Get the size of the Check field. + const size_t check_size = lzma_check_size(block->check); + assert(check_size != UINT32_MAX); + + // Reserve space for the Check field. + if (out_size - *out_pos <= check_size) + return LZMA_BUF_ERROR; + + out_size -= check_size; + + // Initialize block->uncompressed_size and calculate the worst-case + // value for block->compressed_size. + block->uncompressed_size = in_size; + block->compressed_size = lzma2_bound(in_size); + if (block->compressed_size == 0) + return LZMA_DATA_ERROR; + + // Do the actual compression. + lzma_ret ret = LZMA_BUF_ERROR; + if (try_to_compress) + ret = block_encode_normal(block, allocator, + in, in_size, out, out_pos, out_size); + + if (ret != LZMA_OK) { + // If the error was something else than output buffer + // becoming full, return the error now. + if (ret != LZMA_BUF_ERROR) + return ret; + + // The data was incompressible (at least with the options + // given to us) or the output buffer was too small. Use the + // uncompressed chunks of LZMA2 to wrap the data into a valid + // Block. If we haven't been given enough output space, even + // this may fail. + return_if_error(block_encode_uncompressed(block, in, in_size, + out, out_pos, out_size)); + } + + assert(*out_pos <= out_size); + + // Block Padding. No buffer overflow here, because we already adjusted + // out_size so that (out_size - out_start) is a multiple of four. + // Thus, if the buffer is full, the loop body can never run. + for (size_t i = (size_t)(block->compressed_size); i & 3; ++i) { + assert(*out_pos < out_size); + out[(*out_pos)++] = 0x00; + } + + // If there's no Check field, we are done now. + if (check_size > 0) { + // Calculate the integrity check. We reserved space for + // the Check field earlier so we don't need to check for + // available output space here. + lzma_check_state check; + lzma_check_init(&check, block->check); + lzma_check_update(&check, block->check, in, in_size); + lzma_check_finish(&check, block->check); + + memcpy(block->raw_check, check.buffer.u8, check_size); + memcpy(out + *out_pos, check.buffer.u8, check_size); + *out_pos += check_size; + } + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + return block_buffer_encode(block, allocator, + in, in_size, out, out_pos, out_size, true); +} + + +#ifdef HAVE_SYMBOL_VERSIONS_LINUX +// This is for compatibility with binaries linked against liblzma that +// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7. +LZMA_SYMVER_API("lzma_block_uncomp_encode@XZ_5.2.2", + lzma_ret, lzma_block_uncomp_encode_522)(lzma_block *block, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result + __attribute__((__alias__("lzma_block_uncomp_encode_52"))); + +LZMA_SYMVER_API("lzma_block_uncomp_encode@@XZ_5.2", + lzma_ret, lzma_block_uncomp_encode_52)(lzma_block *block, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + +#define lzma_block_uncomp_encode lzma_block_uncomp_encode_52 +#endif +extern LZMA_API(lzma_ret) +lzma_block_uncomp_encode(lzma_block *block, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // It won't allocate any memory from heap so no need + // for lzma_allocator. + return block_buffer_encode(block, NULL, + in, in_size, out, out_pos, out_size, false); +} diff --git a/src/native/external/xz/src/liblzma/common/block_buffer_encoder.h b/src/native/external/xz/src/liblzma/common/block_buffer_encoder.h new file mode 100644 index 00000000000000..5274ac40d3aae1 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/block_buffer_encoder.h @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_buffer_encoder.h +/// \brief Single-call .xz Block encoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BLOCK_BUFFER_ENCODER_H +#define LZMA_BLOCK_BUFFER_ENCODER_H + +#include "common.h" + + +/// uint64_t version of lzma_block_buffer_bound(). It is used by +/// stream_encoder_mt.c. Probably the original lzma_block_buffer_bound() +/// should have been 64-bit, but fixing it would break the ABI. +extern uint64_t lzma_block_buffer_bound64(uint64_t uncompressed_size); + +#endif diff --git a/src/native/external/xz/src/liblzma/common/block_decoder.c b/src/native/external/xz/src/liblzma/common/block_decoder.c new file mode 100644 index 00000000000000..bbc9f5566c8bfc --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/block_decoder.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_decoder.c +/// \brief Decodes .xz Blocks +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_decoder.h" +#include "filter_decoder.h" +#include "check.h" + + +typedef struct { + enum { + SEQ_CODE, + SEQ_PADDING, + SEQ_CHECK, + } sequence; + + /// The filters in the chain; initialized with lzma_raw_decoder_init(). + lzma_next_coder next; + + /// Decoding options; we also write Compressed Size and Uncompressed + /// Size back to this structure when the decoding has been finished. + lzma_block *block; + + /// Compressed Size calculated while decoding + lzma_vli compressed_size; + + /// Uncompressed Size calculated while decoding + lzma_vli uncompressed_size; + + /// Maximum allowed Compressed Size; this takes into account the + /// size of the Block Header and Check fields when Compressed Size + /// is unknown. + lzma_vli compressed_limit; + + /// Maximum allowed Uncompressed Size. + lzma_vli uncompressed_limit; + + /// Position when reading the Check field + size_t check_pos; + + /// Check of the uncompressed data + lzma_check_state check; + + /// True if the integrity check won't be calculated and verified. + bool ignore_check; +} lzma_block_coder; + + +static inline bool +is_size_valid(lzma_vli size, lzma_vli reference) +{ + return reference == LZMA_VLI_UNKNOWN || reference == size; +} + + +static lzma_ret +block_decode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_block_coder *coder = coder_ptr; + + switch (coder->sequence) { + case SEQ_CODE: { + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + // Limit the amount of input and output space that we give + // to the raw decoder based on the information we have + // (or don't have) from Block Header. + const size_t in_stop = *in_pos + (size_t)my_min( + in_size - *in_pos, + coder->compressed_limit - coder->compressed_size); + const size_t out_stop = *out_pos + (size_t)my_min( + out_size - *out_pos, + coder->uncompressed_limit - coder->uncompressed_size); + + const lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_stop, + out, out_pos, out_stop, action); + + const size_t in_used = *in_pos - in_start; + const size_t out_used = *out_pos - out_start; + + // Because we have limited the input and output sizes, + // we know that these cannot grow too big or overflow. + coder->compressed_size += in_used; + coder->uncompressed_size += out_used; + + if (ret == LZMA_OK) { + const bool comp_done = coder->compressed_size + == coder->block->compressed_size; + const bool uncomp_done = coder->uncompressed_size + == coder->block->uncompressed_size; + + // If both input and output amounts match the sizes + // in Block Header but we still got LZMA_OK instead + // of LZMA_STREAM_END, the file is broken. + if (comp_done && uncomp_done) + return LZMA_DATA_ERROR; + + // If the decoder has consumed all the input that it + // needs but it still couldn't fill the output buffer + // or return LZMA_STREAM_END, the file is broken. + if (comp_done && *out_pos < out_size) + return LZMA_DATA_ERROR; + + // If the decoder has produced all the output but + // it still didn't return LZMA_STREAM_END or consume + // more input (for example, detecting an end of + // payload marker may need more input but produce + // no output) the file is broken. + if (uncomp_done && *in_pos < in_size) + return LZMA_DATA_ERROR; + } + + // Don't waste time updating the integrity check if it will be + // ignored. Also skip it if no new output was produced. This + // avoids null pointer + 0 (undefined behavior) when out == 0. + if (!coder->ignore_check && out_used > 0) + lzma_check_update(&coder->check, coder->block->check, + out + out_start, out_used); + + if (ret != LZMA_STREAM_END) + return ret; + + // Compressed and Uncompressed Sizes are now at their final + // values. Verify that they match the values given to us. + if (!is_size_valid(coder->compressed_size, + coder->block->compressed_size) + || !is_size_valid(coder->uncompressed_size, + coder->block->uncompressed_size)) + return LZMA_DATA_ERROR; + + // Copy the values into coder->block. The caller + // may use this information to construct Index. + coder->block->compressed_size = coder->compressed_size; + coder->block->uncompressed_size = coder->uncompressed_size; + + coder->sequence = SEQ_PADDING; + FALLTHROUGH; + } + + case SEQ_PADDING: + // Compressed Data is padded to a multiple of four bytes. + while (coder->compressed_size & 3) { + if (*in_pos >= in_size) + return LZMA_OK; + + // We use compressed_size here just get the Padding + // right. The actual Compressed Size was stored to + // coder->block already, and won't be modified by + // us anymore. + ++coder->compressed_size; + + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + } + + if (coder->block->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; + + if (!coder->ignore_check) + lzma_check_finish(&coder->check, coder->block->check); + + coder->sequence = SEQ_CHECK; + FALLTHROUGH; + + case SEQ_CHECK: { + const size_t check_size = lzma_check_size(coder->block->check); + lzma_bufcpy(in, in_pos, in_size, coder->block->raw_check, + &coder->check_pos, check_size); + if (coder->check_pos < check_size) + return LZMA_OK; + + // Validate the Check only if we support it. + // coder->check.buffer may be uninitialized + // when the Check ID is not supported. + if (!coder->ignore_check + && lzma_check_is_supported(coder->block->check) + && memcmp(coder->block->raw_check, + coder->check.buffer.u8, + check_size) != 0) + return LZMA_DATA_ERROR; + + return LZMA_STREAM_END; + } + } + + return LZMA_PROG_ERROR; +} + + +static void +block_decoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_block_coder *coder = coder_ptr; + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + lzma_block *block) +{ + lzma_next_coder_init(&lzma_block_decoder_init, next, allocator); + + // Validate the options. lzma_block_unpadded_size() does that for us + // except for Uncompressed Size and filters. Filters are validated + // by the raw decoder. + if (lzma_block_unpadded_size(block) == 0 + || !lzma_vli_is_valid(block->uncompressed_size)) + return LZMA_PROG_ERROR; + + // Allocate *next->coder if needed. + lzma_block_coder *coder = next->coder; + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_block_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &block_decode; + next->end = &block_decoder_end; + coder->next = LZMA_NEXT_CODER_INIT; + } + + // Basic initializations + coder->sequence = SEQ_CODE; + coder->block = block; + coder->compressed_size = 0; + coder->uncompressed_size = 0; + + // If Compressed Size is not known, we calculate the maximum allowed + // value so that encoded size of the Block (including Block Padding) + // is still a valid VLI and a multiple of four. + coder->compressed_limit + = block->compressed_size == LZMA_VLI_UNKNOWN + ? (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) + - block->header_size + - lzma_check_size(block->check) + : block->compressed_size; + + // With Uncompressed Size this is simpler. If Block Header lacks + // the size info, then LZMA_VLI_MAX is the maximum possible + // Uncompressed Size. + coder->uncompressed_limit + = block->uncompressed_size == LZMA_VLI_UNKNOWN + ? LZMA_VLI_MAX + : block->uncompressed_size; + + // Initialize the check. It's caller's problem if the Check ID is not + // supported, and the Block decoder cannot verify the Check field. + // Caller can test lzma_check_is_supported(block->check). + coder->check_pos = 0; + lzma_check_init(&coder->check, block->check); + + coder->ignore_check = block->version >= 1 + ? block->ignore_check : false; + + // Initialize the filter chain. + return lzma_raw_decoder_init(&coder->next, allocator, + block->filters); +} + + +extern LZMA_API(lzma_ret) +lzma_block_decoder(lzma_stream *strm, lzma_block *block) +{ + lzma_next_strm_init(lzma_block_decoder_init, strm, block); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/block_decoder.h b/src/native/external/xz/src/liblzma/common/block_decoder.h new file mode 100644 index 00000000000000..2cbf9ba6db832a --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/block_decoder.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_decoder.h +/// \brief Decodes .xz Blocks +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BLOCK_DECODER_H +#define LZMA_BLOCK_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_block_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, lzma_block *block); + +#endif diff --git a/src/native/external/xz/src/liblzma/common/block_encoder.c b/src/native/external/xz/src/liblzma/common/block_encoder.c new file mode 100644 index 00000000000000..eb7997a72aeb2c --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/block_encoder.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_encoder.c +/// \brief Encodes .xz Blocks +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_encoder.h" +#include "filter_encoder.h" +#include "check.h" + + +typedef struct { + /// The filters in the chain; initialized with lzma_raw_decoder_init(). + lzma_next_coder next; + + /// Encoding options; we also write Unpadded Size, Compressed Size, + /// and Uncompressed Size back to this structure when the encoding + /// has been finished. + lzma_block *block; + + enum { + SEQ_CODE, + SEQ_PADDING, + SEQ_CHECK, + } sequence; + + /// Compressed Size calculated while encoding + lzma_vli compressed_size; + + /// Uncompressed Size calculated while encoding + lzma_vli uncompressed_size; + + /// Position in the Check field + size_t pos; + + /// Check of the uncompressed data + lzma_check_state check; +} lzma_block_coder; + + +static lzma_ret +block_encode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_block_coder *coder = coder_ptr; + + // Check that our amount of input stays in proper limits. + if (LZMA_VLI_MAX - coder->uncompressed_size < in_size - *in_pos) + return LZMA_DATA_ERROR; + + switch (coder->sequence) { + case SEQ_CODE: { + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + const lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + const size_t in_used = *in_pos - in_start; + const size_t out_used = *out_pos - out_start; + + if (COMPRESSED_SIZE_MAX - coder->compressed_size < out_used) + return LZMA_DATA_ERROR; + + coder->compressed_size += out_used; + + // No need to check for overflow because we have already + // checked it at the beginning of this function. + coder->uncompressed_size += in_used; + + // Call lzma_check_update() only if input was consumed. This + // avoids null pointer + 0 (undefined behavior) when in == 0. + if (in_used > 0) + lzma_check_update(&coder->check, coder->block->check, + in + in_start, in_used); + + if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) + return ret; + + assert(*in_pos == in_size); + assert(action == LZMA_FINISH); + + // Copy the values into coder->block. The caller + // may use this information to construct Index. + coder->block->compressed_size = coder->compressed_size; + coder->block->uncompressed_size = coder->uncompressed_size; + + coder->sequence = SEQ_PADDING; + FALLTHROUGH; + } + + case SEQ_PADDING: + // Pad Compressed Data to a multiple of four bytes. We can + // use coder->compressed_size for this since we don't need + // it for anything else anymore. + while (coder->compressed_size & 3) { + if (*out_pos >= out_size) + return LZMA_OK; + + out[*out_pos] = 0x00; + ++*out_pos; + ++coder->compressed_size; + } + + if (coder->block->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; + + lzma_check_finish(&coder->check, coder->block->check); + + coder->sequence = SEQ_CHECK; + FALLTHROUGH; + + case SEQ_CHECK: { + const size_t check_size = lzma_check_size(coder->block->check); + lzma_bufcpy(coder->check.buffer.u8, &coder->pos, check_size, + out, out_pos, out_size); + if (coder->pos < check_size) + return LZMA_OK; + + memcpy(coder->block->raw_check, coder->check.buffer.u8, + check_size); + return LZMA_STREAM_END; + } + } + + return LZMA_PROG_ERROR; +} + + +static void +block_encoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_block_coder *coder = coder_ptr; + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +block_encoder_update(void *coder_ptr, const lzma_allocator *allocator, + const lzma_filter *filters lzma_attribute((__unused__)), + const lzma_filter *reversed_filters) +{ + lzma_block_coder *coder = coder_ptr; + + if (coder->sequence != SEQ_CODE) + return LZMA_PROG_ERROR; + + return lzma_next_filter_update( + &coder->next, allocator, reversed_filters); +} + + +extern lzma_ret +lzma_block_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + lzma_block *block) +{ + lzma_next_coder_init(&lzma_block_encoder_init, next, allocator); + + if (block == NULL) + return LZMA_PROG_ERROR; + + // The contents of the structure may depend on the version so + // check the version first. + if (block->version > 1) + return LZMA_OPTIONS_ERROR; + + // If the Check ID is not supported, we cannot calculate the check and + // thus not create a proper Block. + if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX) + return LZMA_PROG_ERROR; + + if (!lzma_check_is_supported(block->check)) + return LZMA_UNSUPPORTED_CHECK; + + // Allocate and initialize *next->coder if needed. + lzma_block_coder *coder = next->coder; + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_block_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &block_encode; + next->end = &block_encoder_end; + next->update = &block_encoder_update; + coder->next = LZMA_NEXT_CODER_INIT; + } + + // Basic initializations + coder->sequence = SEQ_CODE; + coder->block = block; + coder->compressed_size = 0; + coder->uncompressed_size = 0; + coder->pos = 0; + + // Initialize the check + lzma_check_init(&coder->check, block->check); + + // Initialize the requested filters. + return lzma_raw_encoder_init(&coder->next, allocator, block->filters); +} + + +extern LZMA_API(lzma_ret) +lzma_block_encoder(lzma_stream *strm, lzma_block *block) +{ + lzma_next_strm_init(lzma_block_encoder_init, strm, block); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/block_encoder.h b/src/native/external/xz/src/liblzma/common/block_encoder.h new file mode 100644 index 00000000000000..b7dfe9a084171c --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/block_encoder.h @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_encoder.h +/// \brief Encodes .xz Blocks +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BLOCK_ENCODER_H +#define LZMA_BLOCK_ENCODER_H + +#include "common.h" + + +/// \brief Biggest Compressed Size value that the Block encoder supports +/// +/// The maximum size of a single Block is limited by the maximum size of +/// a Stream, which in theory is 2^63 - 3 bytes (i.e. LZMA_VLI_MAX - 3). +/// While the size is really big and no one should hit it in practice, we +/// take it into account in some places anyway to catch some errors e.g. if +/// application passes insanely big value to some function. +/// +/// We could take into account the headers etc. to determine the exact +/// maximum size of the Compressed Data field, but the complexity would give +/// us nothing useful. Instead, limit the size of Compressed Data so that +/// even with biggest possible Block Header and Check fields the total +/// encoded size of the Block stays as a valid VLI. This doesn't guarantee +/// that the size of the Stream doesn't grow too big, but that problem is +/// taken care outside the Block handling code. +/// +/// ~LZMA_VLI_C(3) is to guarantee that if we need padding at the end of +/// the Compressed Data field, it will still stay in the proper limit. +/// +/// This constant is in this file because it is needed in both +/// block_encoder.c and block_buffer_encoder.c. +#define COMPRESSED_SIZE_MAX ((LZMA_VLI_MAX - LZMA_BLOCK_HEADER_SIZE_MAX \ + - LZMA_CHECK_SIZE_MAX) & ~LZMA_VLI_C(3)) + + +extern lzma_ret lzma_block_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, lzma_block *block); + +#endif diff --git a/src/native/external/xz/src/liblzma/common/block_header_decoder.c b/src/native/external/xz/src/liblzma/common/block_header_decoder.c new file mode 100644 index 00000000000000..f0b2fbe54d8d3e --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/block_header_decoder.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header_decoder.c +/// \brief Decodes Block Header from .xz files +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "check.h" + + +extern LZMA_API(lzma_ret) +lzma_block_header_decode(lzma_block *block, + const lzma_allocator *allocator, const uint8_t *in) +{ + // NOTE: We consider the header to be corrupt not only when the + // CRC32 doesn't match, but also when variable-length integers + // are invalid or over 63 bits, or if the header is too small + // to contain the claimed information. + + // Catch unexpected NULL pointers. + if (block == NULL || block->filters == NULL || in == NULL) + return LZMA_PROG_ERROR; + + // Initialize the filter options array. This way the caller can + // safely free() the options even if an error occurs in this function. + for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) { + block->filters[i].id = LZMA_VLI_UNKNOWN; + block->filters[i].options = NULL; + } + + // Versions 0 and 1 are supported. If a newer version was specified, + // we need to downgrade it. + if (block->version > 1) + block->version = 1; + + // This isn't a Block Header option, but since the decompressor will + // read it if version >= 1, it's better to initialize it here than + // to expect the caller to do it since in almost all cases this + // should be false. + block->ignore_check = false; + + // Validate Block Header Size and Check type. The caller must have + // already set these, so it is a programming error if this test fails. + if (lzma_block_header_size_decode(in[0]) != block->header_size + || (unsigned int)(block->check) > LZMA_CHECK_ID_MAX) + return LZMA_PROG_ERROR; + + // Exclude the CRC32 field. + const size_t in_size = block->header_size - 4; + + // Verify CRC32 + if (lzma_crc32(in, in_size, 0) != read32le(in + in_size)) { +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + return LZMA_DATA_ERROR; +#endif + } + + // Check for unsupported flags. + if (in[1] & 0x3C) + return LZMA_OPTIONS_ERROR; + + // Start after the Block Header Size and Block Flags fields. + size_t in_pos = 2; + + // Compressed Size + if (in[1] & 0x40) { + return_if_error(lzma_vli_decode(&block->compressed_size, + NULL, in, &in_pos, in_size)); + + // Validate Compressed Size. This checks that it isn't zero + // and that the total size of the Block is a valid VLI. + if (lzma_block_unpadded_size(block) == 0) + return LZMA_DATA_ERROR; + } else { + block->compressed_size = LZMA_VLI_UNKNOWN; + } + + // Uncompressed Size + if (in[1] & 0x80) + return_if_error(lzma_vli_decode(&block->uncompressed_size, + NULL, in, &in_pos, in_size)); + else + block->uncompressed_size = LZMA_VLI_UNKNOWN; + + // Filter Flags + const size_t filter_count = (in[1] & 3U) + 1; + for (size_t i = 0; i < filter_count; ++i) { + const lzma_ret ret = lzma_filter_flags_decode( + &block->filters[i], allocator, + in, &in_pos, in_size); + if (ret != LZMA_OK) { + lzma_filters_free(block->filters, allocator); + return ret; + } + } + + // Padding + while (in_pos < in_size) { + if (in[in_pos++] != 0x00) { + lzma_filters_free(block->filters, allocator); + + // Possibly some new field present so use + // LZMA_OPTIONS_ERROR instead of LZMA_DATA_ERROR. + return LZMA_OPTIONS_ERROR; + } + } + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/block_header_encoder.c b/src/native/external/xz/src/liblzma/common/block_header_encoder.c new file mode 100644 index 00000000000000..33840d1bd21745 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/block_header_encoder.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header_encoder.c +/// \brief Encodes Block Header for .xz files +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "check.h" + + +extern LZMA_API(lzma_ret) +lzma_block_header_size(lzma_block *block) +{ + if (block->version > 1) + return LZMA_OPTIONS_ERROR; + + // Block Header Size + Block Flags + CRC32. + uint32_t size = 1 + 1 + 4; + + // Compressed Size + if (block->compressed_size != LZMA_VLI_UNKNOWN) { + const uint32_t add = lzma_vli_size(block->compressed_size); + if (add == 0 || block->compressed_size == 0) + return LZMA_PROG_ERROR; + + size += add; + } + + // Uncompressed Size + if (block->uncompressed_size != LZMA_VLI_UNKNOWN) { + const uint32_t add = lzma_vli_size(block->uncompressed_size); + if (add == 0) + return LZMA_PROG_ERROR; + + size += add; + } + + // List of Filter Flags + if (block->filters == NULL || block->filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_PROG_ERROR; + + for (size_t i = 0; block->filters[i].id != LZMA_VLI_UNKNOWN; ++i) { + // Don't allow too many filters. + if (i == LZMA_FILTERS_MAX) + return LZMA_PROG_ERROR; + + uint32_t add; + return_if_error(lzma_filter_flags_size(&add, + block->filters + i)); + + size += add; + } + + // Pad to a multiple of four bytes. + block->header_size = (size + 3) & ~UINT32_C(3); + + // NOTE: We don't verify that the encoded size of the Block stays + // within limits. This is because it is possible that we are called + // with exaggerated Compressed Size (e.g. LZMA_VLI_MAX) to reserve + // space for Block Header, and later called again with lower, + // real values. + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_block_header_encode(const lzma_block *block, uint8_t *out) +{ + // Validate everything but filters. + if (lzma_block_unpadded_size(block) == 0 + || !lzma_vli_is_valid(block->uncompressed_size)) + return LZMA_PROG_ERROR; + + // Indicate the size of the buffer _excluding_ the CRC32 field. + const size_t out_size = block->header_size - 4; + + // Store the Block Header Size. + out[0] = (uint8_t)(out_size / 4); + + // We write Block Flags in pieces. + out[1] = 0x00; + size_t out_pos = 2; + + // Compressed Size + if (block->compressed_size != LZMA_VLI_UNKNOWN) { + return_if_error(lzma_vli_encode(block->compressed_size, NULL, + out, &out_pos, out_size)); + + out[1] |= 0x40; + } + + // Uncompressed Size + if (block->uncompressed_size != LZMA_VLI_UNKNOWN) { + return_if_error(lzma_vli_encode(block->uncompressed_size, NULL, + out, &out_pos, out_size)); + + out[1] |= 0x80; + } + + // Filter Flags + if (block->filters == NULL || block->filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_PROG_ERROR; + + size_t filter_count = 0; + do { + // There can be a maximum of four filters. + if (filter_count == LZMA_FILTERS_MAX) + return LZMA_PROG_ERROR; + + return_if_error(lzma_filter_flags_encode( + block->filters + filter_count, + out, &out_pos, out_size)); + + } while (block->filters[++filter_count].id != LZMA_VLI_UNKNOWN); + + out[1] |= filter_count - 1; + + // Padding + memzero(out + out_pos, out_size - out_pos); + + // CRC32 + write32le(out + out_size, lzma_crc32(out, out_size, 0)); + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/block_util.c b/src/native/external/xz/src/liblzma/common/block_util.c new file mode 100644 index 00000000000000..191f6d444aa603 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/block_util.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_util.c +/// \brief Utility functions to handle lzma_block +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" + + +extern LZMA_API(lzma_ret) +lzma_block_compressed_size(lzma_block *block, lzma_vli unpadded_size) +{ + // Validate everything but Uncompressed Size and filters. + if (lzma_block_unpadded_size(block) == 0) + return LZMA_PROG_ERROR; + + const uint32_t container_size = block->header_size + + lzma_check_size(block->check); + + // Validate that Compressed Size will be greater than zero. + if (unpadded_size <= container_size) + return LZMA_DATA_ERROR; + + // Calculate what Compressed Size is supposed to be. + // If Compressed Size was present in Block Header, + // compare that the new value matches it. + const lzma_vli compressed_size = unpadded_size - container_size; + if (block->compressed_size != LZMA_VLI_UNKNOWN + && block->compressed_size != compressed_size) + return LZMA_DATA_ERROR; + + block->compressed_size = compressed_size; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_vli) +lzma_block_unpadded_size(const lzma_block *block) +{ + // Validate the values that we are interested in i.e. all but + // Uncompressed Size and the filters. + // + // NOTE: This function is used for validation too, so it is + // essential that these checks are always done even if + // Compressed Size is unknown. + if (block == NULL || block->version > 1 + || block->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || block->header_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (block->header_size & 3) + || !lzma_vli_is_valid(block->compressed_size) + || block->compressed_size == 0 + || (unsigned int)(block->check) > LZMA_CHECK_ID_MAX) + return 0; + + // If Compressed Size is unknown, return that we cannot know + // size of the Block either. + if (block->compressed_size == LZMA_VLI_UNKNOWN) + return LZMA_VLI_UNKNOWN; + + // Calculate Unpadded Size and validate it. + const lzma_vli unpadded_size = block->compressed_size + + block->header_size + + lzma_check_size(block->check); + + assert(unpadded_size >= UNPADDED_SIZE_MIN); + if (unpadded_size > UNPADDED_SIZE_MAX) + return 0; + + return unpadded_size; +} + + +extern LZMA_API(lzma_vli) +lzma_block_total_size(const lzma_block *block) +{ + lzma_vli unpadded_size = lzma_block_unpadded_size(block); + + if (unpadded_size != LZMA_VLI_UNKNOWN) + unpadded_size = vli_ceil4(unpadded_size); + + return unpadded_size; +} diff --git a/src/native/external/xz/src/liblzma/common/common.c b/src/native/external/xz/src/liblzma/common/common.c new file mode 100644 index 00000000000000..6e031a56c888c2 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/common.c @@ -0,0 +1,486 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file common.c +/// \brief Common functions needed in many places in liblzma +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +///////////// +// Version // +///////////// + +extern LZMA_API(uint32_t) +lzma_version_number(void) +{ + return LZMA_VERSION; +} + + +extern LZMA_API(const char *) +lzma_version_string(void) +{ + return LZMA_VERSION_STRING; +} + + +/////////////////////// +// Memory allocation // +/////////////////////// + +lzma_attr_alloc_size(1) +extern void * +lzma_alloc(size_t size, const lzma_allocator *allocator) +{ + // Some malloc() variants return NULL if called with size == 0. + if (size == 0) + size = 1; + + void *ptr; + + if (allocator != NULL && allocator->alloc != NULL) + ptr = allocator->alloc(allocator->opaque, 1, size); + else + ptr = malloc(size); + + return ptr; +} + + +lzma_attr_alloc_size(1) +extern void * +lzma_alloc_zero(size_t size, const lzma_allocator *allocator) +{ + // Some calloc() variants return NULL if called with size == 0. + if (size == 0) + size = 1; + + void *ptr; + + if (allocator != NULL && allocator->alloc != NULL) { + ptr = allocator->alloc(allocator->opaque, 1, size); + if (ptr != NULL) + memzero(ptr, size); + } else { + ptr = calloc(1, size); + } + + return ptr; +} + + +extern void +lzma_free(void *ptr, const lzma_allocator *allocator) +{ + if (allocator != NULL && allocator->free != NULL) + allocator->free(allocator->opaque, ptr); + else + free(ptr); + + return; +} + + +////////// +// Misc // +////////// + +extern size_t +lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size) +{ + assert(in != NULL || *in_pos == in_size); + assert(out != NULL || *out_pos == out_size); + + assert(*in_pos <= in_size); + assert(*out_pos <= out_size); + + const size_t in_avail = in_size - *in_pos; + const size_t out_avail = out_size - *out_pos; + const size_t copy_size = my_min(in_avail, out_avail); + + // Call memcpy() only if there is something to copy. If there is + // nothing to copy, in or out might be NULL and then the memcpy() + // call would trigger undefined behavior. + if (copy_size > 0) + memcpy(out + *out_pos, in + *in_pos, copy_size); + + *in_pos += copy_size; + *out_pos += copy_size; + + return copy_size; +} + + +extern lzma_ret +lzma_next_filter_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + lzma_next_coder_init(filters[0].init, next, allocator); + next->id = filters[0].id; + return filters[0].init == NULL + ? LZMA_OK : filters[0].init(next, allocator, filters); +} + + +extern lzma_ret +lzma_next_filter_update(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter *reversed_filters) +{ + // Check that the application isn't trying to change the Filter ID. + // End of filters is indicated with LZMA_VLI_UNKNOWN in both + // reversed_filters[0].id and next->id. + if (reversed_filters[0].id != next->id) + return LZMA_PROG_ERROR; + + if (reversed_filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_OK; + + assert(next->update != NULL); + return next->update(next->coder, allocator, NULL, reversed_filters); +} + + +extern void +lzma_next_end(lzma_next_coder *next, const lzma_allocator *allocator) +{ + if (next->init != (uintptr_t)(NULL)) { + // To avoid tiny end functions that simply call + // lzma_free(coder, allocator), we allow leaving next->end + // NULL and call lzma_free() here. + if (next->end != NULL) + next->end(next->coder, allocator); + else + lzma_free(next->coder, allocator); + + // Reset the variables so the we don't accidentally think + // that it is an already initialized coder. + *next = LZMA_NEXT_CODER_INIT; + } + + return; +} + + +////////////////////////////////////// +// External to internal API wrapper // +////////////////////////////////////// + +extern lzma_ret +lzma_strm_init(lzma_stream *strm) +{ + if (strm == NULL) + return LZMA_PROG_ERROR; + + if (strm->internal == NULL) { + strm->internal = lzma_alloc(sizeof(lzma_internal), + strm->allocator); + if (strm->internal == NULL) + return LZMA_MEM_ERROR; + + strm->internal->next = LZMA_NEXT_CODER_INIT; + } + + memzero(strm->internal->supported_actions, + sizeof(strm->internal->supported_actions)); + strm->internal->sequence = ISEQ_RUN; + strm->internal->allow_buf_error = false; + + strm->total_in = 0; + strm->total_out = 0; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_code(lzma_stream *strm, lzma_action action) +{ + // Sanity checks + if ((strm->next_in == NULL && strm->avail_in != 0) + || (strm->next_out == NULL && strm->avail_out != 0) + || strm->internal == NULL + || strm->internal->next.code == NULL + || (unsigned int)(action) > LZMA_ACTION_MAX + || !strm->internal->supported_actions[action]) + return LZMA_PROG_ERROR; + + // Check if unsupported members have been set to non-zero or non-NULL, + // which would indicate that some new feature is wanted. + if (strm->reserved_ptr1 != NULL + || strm->reserved_ptr2 != NULL + || strm->reserved_ptr3 != NULL + || strm->reserved_ptr4 != NULL + || strm->reserved_int2 != 0 + || strm->reserved_int3 != 0 + || strm->reserved_int4 != 0 + || strm->reserved_enum1 != LZMA_RESERVED_ENUM + || strm->reserved_enum2 != LZMA_RESERVED_ENUM) + return LZMA_OPTIONS_ERROR; + + switch (strm->internal->sequence) { + case ISEQ_RUN: + switch (action) { + case LZMA_RUN: + break; + + case LZMA_SYNC_FLUSH: + strm->internal->sequence = ISEQ_SYNC_FLUSH; + break; + + case LZMA_FULL_FLUSH: + strm->internal->sequence = ISEQ_FULL_FLUSH; + break; + + case LZMA_FINISH: + strm->internal->sequence = ISEQ_FINISH; + break; + + case LZMA_FULL_BARRIER: + strm->internal->sequence = ISEQ_FULL_BARRIER; + break; + } + + break; + + case ISEQ_SYNC_FLUSH: + // The same action must be used until we return + // LZMA_STREAM_END, and the amount of input must not change. + if (action != LZMA_SYNC_FLUSH + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + + case ISEQ_FULL_FLUSH: + if (action != LZMA_FULL_FLUSH + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + + case ISEQ_FINISH: + if (action != LZMA_FINISH + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + + case ISEQ_FULL_BARRIER: + if (action != LZMA_FULL_BARRIER + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + + case ISEQ_END: + return LZMA_STREAM_END; + + case ISEQ_ERROR: + default: + return LZMA_PROG_ERROR; + } + + size_t in_pos = 0; + size_t out_pos = 0; + lzma_ret ret = strm->internal->next.code( + strm->internal->next.coder, strm->allocator, + strm->next_in, &in_pos, strm->avail_in, + strm->next_out, &out_pos, strm->avail_out, action); + + // Updating next_in and next_out has to be skipped when they are NULL + // to avoid null pointer + 0 (undefined behavior). Do this by checking + // in_pos > 0 and out_pos > 0 because this way NULL + non-zero (a bug) + // will get caught one way or other. + if (in_pos > 0) { + strm->next_in += in_pos; + strm->avail_in -= in_pos; + strm->total_in += in_pos; + } + + if (out_pos > 0) { + strm->next_out += out_pos; + strm->avail_out -= out_pos; + strm->total_out += out_pos; + } + + strm->internal->avail_in = strm->avail_in; + + switch (ret) { + case LZMA_OK: + // Don't return LZMA_BUF_ERROR when it happens the first time. + // This is to avoid returning LZMA_BUF_ERROR when avail_out + // was zero but still there was no more data left to written + // to next_out. + if (out_pos == 0 && in_pos == 0) { + if (strm->internal->allow_buf_error) + ret = LZMA_BUF_ERROR; + else + strm->internal->allow_buf_error = true; + } else { + strm->internal->allow_buf_error = false; + } + break; + + case LZMA_TIMED_OUT: + strm->internal->allow_buf_error = false; + ret = LZMA_OK; + break; + + case LZMA_SEEK_NEEDED: + strm->internal->allow_buf_error = false; + + // If LZMA_FINISH was used, reset it back to the + // LZMA_RUN-based state so that new input can be supplied + // by the application. + if (strm->internal->sequence == ISEQ_FINISH) + strm->internal->sequence = ISEQ_RUN; + + break; + + case LZMA_STREAM_END: + if (strm->internal->sequence == ISEQ_SYNC_FLUSH + || strm->internal->sequence == ISEQ_FULL_FLUSH + || strm->internal->sequence + == ISEQ_FULL_BARRIER) + strm->internal->sequence = ISEQ_RUN; + else + strm->internal->sequence = ISEQ_END; + + FALLTHROUGH; + + case LZMA_NO_CHECK: + case LZMA_UNSUPPORTED_CHECK: + case LZMA_GET_CHECK: + case LZMA_MEMLIMIT_ERROR: + // Something else than LZMA_OK, but not a fatal error, + // that is, coding may be continued (except if ISEQ_END). + strm->internal->allow_buf_error = false; + break; + + default: + // All the other errors are fatal; coding cannot be continued. + assert(ret != LZMA_BUF_ERROR); + strm->internal->sequence = ISEQ_ERROR; + break; + } + + return ret; +} + + +extern LZMA_API(void) +lzma_end(lzma_stream *strm) +{ + if (strm != NULL && strm->internal != NULL) { + lzma_next_end(&strm->internal->next, strm->allocator); + lzma_free(strm->internal, strm->allocator); + strm->internal = NULL; + } + + return; +} + + +#ifdef HAVE_SYMBOL_VERSIONS_LINUX +// This is for compatibility with binaries linked against liblzma that +// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7. +LZMA_SYMVER_API("lzma_get_progress@XZ_5.2.2", + void, lzma_get_progress_522)(lzma_stream *strm, + uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow + __attribute__((__alias__("lzma_get_progress_52"))); + +LZMA_SYMVER_API("lzma_get_progress@@XZ_5.2", + void, lzma_get_progress_52)(lzma_stream *strm, + uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow; + +#define lzma_get_progress lzma_get_progress_52 +#endif +extern LZMA_API(void) +lzma_get_progress(lzma_stream *strm, + uint64_t *progress_in, uint64_t *progress_out) +{ + if (strm->internal->next.get_progress != NULL) { + strm->internal->next.get_progress(strm->internal->next.coder, + progress_in, progress_out); + } else { + *progress_in = strm->total_in; + *progress_out = strm->total_out; + } + + return; +} + + +extern LZMA_API(lzma_check) +lzma_get_check(const lzma_stream *strm) +{ + // Return LZMA_CHECK_NONE if we cannot know the check type. + // It's a bug in the application if this happens. + if (strm->internal->next.get_check == NULL) + return LZMA_CHECK_NONE; + + return strm->internal->next.get_check(strm->internal->next.coder); +} + + +extern LZMA_API(uint64_t) +lzma_memusage(const lzma_stream *strm) +{ + uint64_t memusage; + uint64_t old_memlimit; + + if (strm == NULL || strm->internal == NULL + || strm->internal->next.memconfig == NULL + || strm->internal->next.memconfig( + strm->internal->next.coder, + &memusage, &old_memlimit, 0) != LZMA_OK) + return 0; + + return memusage; +} + + +extern LZMA_API(uint64_t) +lzma_memlimit_get(const lzma_stream *strm) +{ + uint64_t old_memlimit; + uint64_t memusage; + + if (strm == NULL || strm->internal == NULL + || strm->internal->next.memconfig == NULL + || strm->internal->next.memconfig( + strm->internal->next.coder, + &memusage, &old_memlimit, 0) != LZMA_OK) + return 0; + + return old_memlimit; +} + + +extern LZMA_API(lzma_ret) +lzma_memlimit_set(lzma_stream *strm, uint64_t new_memlimit) +{ + // Dummy variables to simplify memconfig functions + uint64_t old_memlimit; + uint64_t memusage; + + if (strm == NULL || strm->internal == NULL + || strm->internal->next.memconfig == NULL) + return LZMA_PROG_ERROR; + + // Zero is a special value that cannot be used as an actual limit. + // If 0 was specified, use 1 instead. + if (new_memlimit == 0) + new_memlimit = 1; + + return strm->internal->next.memconfig(strm->internal->next.coder, + &memusage, &old_memlimit, new_memlimit); +} diff --git a/src/native/external/xz/src/liblzma/common/common.h b/src/native/external/xz/src/liblzma/common/common.h new file mode 100644 index 00000000000000..20af32f6d6cd9a --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/common.h @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file common.h +/// \brief Definitions common to the whole liblzma library +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_COMMON_H +#define LZMA_COMMON_H + +#include "sysdefs.h" +#include "mythread.h" +#include "tuklib_integer.h" + +// LZMA_API_EXPORT is used to mark the exported API functions. +// It's used to define the LZMA_API macro. +// +// lzma_attr_visibility_hidden is used for marking *declarations* of extern +// variables that are internal to liblzma (-fvisibility=hidden alone is +// enough to hide the *definitions*). Such markings allow slightly more +// efficient code to accesses those variables in ELF shared libraries. +#if defined(_WIN32) || defined(__CYGWIN__) +# ifdef DLL_EXPORT +# define LZMA_API_EXPORT __declspec(dllexport) +# else +# define LZMA_API_EXPORT +# endif +# define lzma_attr_visibility_hidden +// Don't use ifdef or defined() below. +#elif HAVE_VISIBILITY +# define LZMA_API_EXPORT __attribute__((__visibility__("default"))) +# define lzma_attr_visibility_hidden \ + __attribute__((__visibility__("hidden"))) +#else +# define LZMA_API_EXPORT +# define lzma_attr_visibility_hidden +#endif + +#define LZMA_API(type) LZMA_API_EXPORT type LZMA_API_CALL + +#include "lzma.h" + +// This is for detecting modern GCC and Clang attributes +// like __symver__ in GCC >= 10. +#ifdef __has_attribute +# define lzma_has_attribute(attr) __has_attribute(attr) +#else +# define lzma_has_attribute(attr) 0 +#endif + +// The extra symbol versioning in the C files may only be used when +// building a shared library. If HAVE_SYMBOL_VERSIONS_LINUX is defined +// to 2 then symbol versioning is done only if also PIC is defined. +// By default Libtool defines PIC when building a shared library and +// doesn't define it when building a static library but it can be +// overridden with --with-pic and --without-pic. configure let's rely +// on PIC if neither --with-pic or --without-pic was used. +#if defined(HAVE_SYMBOL_VERSIONS_LINUX) \ + && (HAVE_SYMBOL_VERSIONS_LINUX == 2 && !defined(PIC)) +# undef HAVE_SYMBOL_VERSIONS_LINUX +#endif + +#ifdef HAVE_SYMBOL_VERSIONS_LINUX +// To keep link-time optimization (LTO, -flto) working with GCC, +// the __symver__ attribute must be used instead of __asm__(".symver ..."). +// Otherwise the symbol versions may be lost, resulting in broken liblzma +// that has wrong default versions in the exported symbol list! +// The attribute was added in GCC 10; LTO with older GCC is not supported. +// +// To keep -Wmissing-prototypes happy, use LZMA_SYMVER_API only with function +// declarations (including those with __alias__ attribute) and LZMA_API with +// the function definitions. This means a little bit of silly copy-and-paste +// between declarations and definitions though. +// +// As of GCC 12.2, the __symver__ attribute supports only @ and @@ but the +// very convenient @@@ isn't supported (it's supported by GNU assembler +// since 2000). When using @@ instead of @@@, the internal name must not be +// the same as the external name to avoid problems in some situations. This +// is why "#define foo_52 foo" is needed for the default symbol versions. +// +// __has_attribute is supported before GCC 10 and it is supported in Clang 14 +// too (which doesn't support __symver__) so use it to detect if __symver__ +// is available. This should be far more reliable than looking at compiler +// version macros as nowadays especially __GNUC__ is defined by many compilers. +# if lzma_has_attribute(__symver__) +# define LZMA_SYMVER_API(extnamever, type, intname) \ + extern __attribute__((__symver__(extnamever))) \ + LZMA_API(type) intname +# else +# define LZMA_SYMVER_API(extnamever, type, intname) \ + __asm__(".symver " #intname "," extnamever); \ + extern LZMA_API(type) intname +# endif +#endif + +// MSVC has __forceinline which shouldn't be combined with the inline keyword +// (results in a warning). +// +// GCC 3.1 added always_inline attribute so we don't need to check +// for __GNUC__ version. Similarly, all relevant Clang versions +// support it (at least Clang 3.0.0 does already). +// Other compilers might support too which also support __has_attribute +// (Solaris Studio) so do that check too. +#if defined(_MSC_VER) +# define lzma_always_inline __forceinline +#elif defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) \ + || lzma_has_attribute(__always_inline__) +# define lzma_always_inline inline __attribute__((__always_inline__)) +#else +# define lzma_always_inline inline +#endif + +// These allow helping the compiler in some often-executed branches, whose +// result is almost always the same. +#ifdef __GNUC__ +# define likely(expr) __builtin_expect(expr, true) +# define unlikely(expr) __builtin_expect(expr, false) +#else +# define likely(expr) (expr) +# define unlikely(expr) (expr) +#endif + + +/// Size of temporary buffers needed in some filters +#define LZMA_BUFFER_SIZE 4096 + + +/// Maximum number of worker threads within one multithreaded component. +/// The limit exists solely to make it simpler to prevent integer overflows +/// when allocating structures etc. This should be big enough for now... +/// the code won't scale anywhere close to this number anyway. +#define LZMA_THREADS_MAX 16384 + + +/// Starting value for memory usage estimates. Instead of calculating size +/// of _every_ structure and taking into account malloc() overhead etc., we +/// add a base size to all memory usage estimates. It's not very accurate +/// but should be easily good enough. +#define LZMA_MEMUSAGE_BASE (UINT64_C(1) << 15) + +/// Start of internal Filter ID space. These IDs must never be used +/// in Streams. +#define LZMA_FILTER_RESERVED_START (LZMA_VLI_C(1) << 62) + + +/// Supported flags that can be passed to lzma_stream_decoder(), +/// lzma_auto_decoder(), or lzma_stream_decoder_mt(). +#define LZMA_SUPPORTED_FLAGS \ + ( LZMA_TELL_NO_CHECK \ + | LZMA_TELL_UNSUPPORTED_CHECK \ + | LZMA_TELL_ANY_CHECK \ + | LZMA_IGNORE_CHECK \ + | LZMA_CONCATENATED \ + | LZMA_FAIL_FAST ) + + +/// Largest valid lzma_action value as unsigned integer. +#define LZMA_ACTION_MAX ((unsigned int)(LZMA_FULL_BARRIER)) + + +/// Special return value (lzma_ret) to indicate that a timeout was reached +/// and lzma_code() must not return LZMA_BUF_ERROR. This is converted to +/// LZMA_OK in lzma_code(). +#define LZMA_TIMED_OUT LZMA_RET_INTERNAL1 + +/// Special return value (lzma_ret) for use in stream_decoder_mt.c to +/// indicate Index was detected instead of a Block Header. +#define LZMA_INDEX_DETECTED LZMA_RET_INTERNAL2 + + +typedef struct lzma_next_coder_s lzma_next_coder; + +typedef struct lzma_filter_info_s lzma_filter_info; + + +/// Type of a function used to initialize a filter encoder or decoder +typedef lzma_ret (*lzma_init_function)( + lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters); + +/// Type of a function to do some kind of coding work (filters, Stream, +/// Block encoders/decoders etc.). Some special coders use don't use both +/// input and output buffers, but for simplicity they still use this same +/// function prototype. +typedef lzma_ret (*lzma_code_function)( + void *coder, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action); + +/// Type of a function to free the memory allocated for the coder +typedef void (*lzma_end_function)( + void *coder, const lzma_allocator *allocator); + + +/// Raw coder validates and converts an array of lzma_filter structures to +/// an array of lzma_filter_info structures. This array is used with +/// lzma_next_filter_init to initialize the filter chain. +struct lzma_filter_info_s { + /// Filter ID. This can be used to share the same initiazation + /// function *and* data structures with different Filter IDs + /// (LZMA_FILTER_LZMA1EXT does it), and also by the encoder + /// with lzma_filters_update() if filter chain is updated + /// in the middle of a raw stream or Block (LZMA_SYNC_FLUSH). + lzma_vli id; + + /// Pointer to function used to initialize the filter. + /// This is NULL to indicate end of array. + lzma_init_function init; + + /// Pointer to filter's options structure + void *options; +}; + + +/// Hold data and function pointers of the next filter in the chain. +struct lzma_next_coder_s { + /// Pointer to coder-specific data + void *coder; + + /// Filter ID. This is LZMA_VLI_UNKNOWN when this structure doesn't + /// point to a filter coder. + lzma_vli id; + + /// "Pointer" to init function. This is never called here. + /// We need only to detect if we are initializing a coder + /// that was allocated earlier. See lzma_next_coder_init and + /// lzma_next_strm_init macros in this file. + uintptr_t init; + + /// Pointer to function to do the actual coding + lzma_code_function code; + + /// Pointer to function to free lzma_next_coder.coder. This can + /// be NULL; in that case, lzma_free is called to free + /// lzma_next_coder.coder. + lzma_end_function end; + + /// Pointer to a function to get progress information. If this is NULL, + /// lzma_stream.total_in and .total_out are used instead. + void (*get_progress)(void *coder, + uint64_t *progress_in, uint64_t *progress_out); + + /// Pointer to function to return the type of the integrity check. + /// Most coders won't support this. + lzma_check (*get_check)(const void *coder); + + /// Pointer to function to get and/or change the memory usage limit. + /// If new_memlimit == 0, the limit is not changed. + lzma_ret (*memconfig)(void *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit); + + /// Update the filter-specific options or the whole filter chain + /// in the encoder. + lzma_ret (*update)(void *coder, const lzma_allocator *allocator, + const lzma_filter *filters, + const lzma_filter *reversed_filters); + + /// Set how many bytes of output this coder may produce at maximum. + /// On success LZMA_OK must be returned. + /// If the filter chain as a whole cannot support this feature, + /// this must return LZMA_OPTIONS_ERROR. + /// If no input has been given to the coder and the requested limit + /// is too small, this must return LZMA_BUF_ERROR. If input has been + /// seen, LZMA_OK is allowed too. + lzma_ret (*set_out_limit)(void *coder, uint64_t *uncomp_size, + uint64_t out_limit); +}; + + +/// Macro to initialize lzma_next_coder structure +#define LZMA_NEXT_CODER_INIT \ + (lzma_next_coder){ \ + .coder = NULL, \ + .init = (uintptr_t)(NULL), \ + .id = LZMA_VLI_UNKNOWN, \ + .code = NULL, \ + .end = NULL, \ + .get_progress = NULL, \ + .get_check = NULL, \ + .memconfig = NULL, \ + .update = NULL, \ + .set_out_limit = NULL, \ + } + + +/// Internal data for lzma_strm_init, lzma_code, and lzma_end. A pointer to +/// this is stored in lzma_stream. +struct lzma_internal_s { + /// The actual coder that should do something useful + lzma_next_coder next; + + /// Track the state of the coder. This is used to validate arguments + /// so that the actual coders can rely on e.g. that LZMA_SYNC_FLUSH + /// is used on every call to lzma_code until next.code has returned + /// LZMA_STREAM_END. + enum { + ISEQ_RUN, + ISEQ_SYNC_FLUSH, + ISEQ_FULL_FLUSH, + ISEQ_FINISH, + ISEQ_FULL_BARRIER, + ISEQ_END, + ISEQ_ERROR, + } sequence; + + /// A copy of lzma_stream avail_in. This is used to verify that the + /// amount of input doesn't change once e.g. LZMA_FINISH has been + /// used. + size_t avail_in; + + /// Indicates which lzma_action values are allowed by next.code. + bool supported_actions[LZMA_ACTION_MAX + 1]; + + /// If true, lzma_code will return LZMA_BUF_ERROR if no progress was + /// made (no input consumed and no output produced by next.code). + bool allow_buf_error; +}; + + +/// Allocates memory +lzma_attr_alloc_size(1) +extern void *lzma_alloc(size_t size, const lzma_allocator *allocator); + +/// Allocates memory and zeroes it (like calloc()). This can be faster +/// than lzma_alloc() + memzero() while being backward compatible with +/// custom allocators. +lzma_attr_alloc_size(1) +extern void *lzma_alloc_zero(size_t size, const lzma_allocator *allocator); + +/// Frees memory +extern void lzma_free(void *ptr, const lzma_allocator *allocator); + + +/// Allocates strm->internal if it is NULL, and initializes *strm and +/// strm->internal. This function is only called via lzma_next_strm_init macro. +extern lzma_ret lzma_strm_init(lzma_stream *strm); + +/// Initializes the next filter in the chain, if any. This takes care of +/// freeing the memory of previously initialized filter if it is different +/// than the filter being initialized now. This way the actual filter +/// initialization functions don't need to use lzma_next_coder_init macro. +extern lzma_ret lzma_next_filter_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +/// Update the next filter in the chain, if any. This checks that +/// the application is not trying to change the Filter IDs. +extern lzma_ret lzma_next_filter_update( + lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter *reversed_filters); + +/// Frees the memory allocated for next->coder either using next->end or, +/// if next->end is NULL, using lzma_free. +extern void lzma_next_end(lzma_next_coder *next, + const lzma_allocator *allocator); + + +/// Copy as much data as possible from in[] to out[] and update *in_pos +/// and *out_pos accordingly. Returns the number of bytes copied. +extern size_t lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size); + + +/// \brief Return if expression doesn't evaluate to LZMA_OK +/// +/// There are several situations where we want to return immediately +/// with the value of expr if it isn't LZMA_OK. This macro shortens +/// the code a little. +#define return_if_error(expr) \ +do { \ + const lzma_ret ret_ = (expr); \ + if (ret_ != LZMA_OK) \ + return ret_; \ +} while (0) + + +/// If next isn't already initialized, free the previous coder. Then mark +/// that next is _possibly_ initialized for the coder using this macro. +/// "Possibly" means that if e.g. allocation of next->coder fails, the +/// structure isn't actually initialized for this coder, but leaving +/// next->init to func is still OK. +#define lzma_next_coder_init(func, next, allocator) \ +do { \ + if ((uintptr_t)(func) != (next)->init) \ + lzma_next_end(next, allocator); \ + (next)->init = (uintptr_t)(func); \ +} while (0) + + +/// Initializes lzma_strm and calls func() to initialize strm->internal->next. +/// (The function being called will use lzma_next_coder_init()). If +/// initialization fails, memory that wasn't freed by func() is freed +/// along strm->internal. +#define lzma_next_strm_init(func, strm, ...) \ +do { \ + return_if_error(lzma_strm_init(strm)); \ + const lzma_ret ret_ = func(&(strm)->internal->next, \ + (strm)->allocator, __VA_ARGS__); \ + if (ret_ != LZMA_OK) { \ + lzma_end(strm); \ + return ret_; \ + } \ +} while (0) + +#endif diff --git a/src/native/external/xz/src/liblzma/common/easy_buffer_encoder.c b/src/native/external/xz/src/liblzma/common/easy_buffer_encoder.c new file mode 100644 index 00000000000000..da610cea6bfa69 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/easy_buffer_encoder.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_buffer_encoder.c +/// \brief Easy single-call .xz Stream encoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern LZMA_API(lzma_ret) +lzma_easy_buffer_encode(uint32_t preset, lzma_check check, + const lzma_allocator *allocator, const uint8_t *in, + size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) +{ + lzma_options_easy opt_easy; + if (lzma_easy_preset(&opt_easy, preset)) + return LZMA_OPTIONS_ERROR; + + return lzma_stream_buffer_encode(opt_easy.filters, check, + allocator, in, in_size, out, out_pos, out_size); +} diff --git a/src/native/external/xz/src/liblzma/common/easy_decoder_memusage.c b/src/native/external/xz/src/liblzma/common/easy_decoder_memusage.c new file mode 100644 index 00000000000000..0c76f10033b62a --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/easy_decoder_memusage.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_decoder_memusage.c +/// \brief Decoder memory usage calculation to match easy encoder presets +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern LZMA_API(uint64_t) +lzma_easy_decoder_memusage(uint32_t preset) +{ + lzma_options_easy opt_easy; + if (lzma_easy_preset(&opt_easy, preset)) + return UINT32_MAX; + + return lzma_raw_decoder_memusage(opt_easy.filters); +} diff --git a/src/native/external/xz/src/liblzma/common/easy_encoder.c b/src/native/external/xz/src/liblzma/common/easy_encoder.c new file mode 100644 index 00000000000000..8dfe29610f7966 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/easy_encoder.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_encoder.c +/// \brief Easy .xz Stream encoder initialization +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern LZMA_API(lzma_ret) +lzma_easy_encoder(lzma_stream *strm, uint32_t preset, lzma_check check) +{ + lzma_options_easy opt_easy; + if (lzma_easy_preset(&opt_easy, preset)) + return LZMA_OPTIONS_ERROR; + + return lzma_stream_encoder(strm, opt_easy.filters, check); +} diff --git a/src/native/external/xz/src/liblzma/common/easy_encoder_memusage.c b/src/native/external/xz/src/liblzma/common/easy_encoder_memusage.c new file mode 100644 index 00000000000000..1184ac665425b6 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/easy_encoder_memusage.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_encoder_memusage.c +/// \brief Easy .xz Stream encoder memory usage calculation +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern LZMA_API(uint64_t) +lzma_easy_encoder_memusage(uint32_t preset) +{ + lzma_options_easy opt_easy; + if (lzma_easy_preset(&opt_easy, preset)) + return UINT32_MAX; + + return lzma_raw_encoder_memusage(opt_easy.filters); +} diff --git a/src/native/external/xz/src/liblzma/common/easy_preset.c b/src/native/external/xz/src/liblzma/common/easy_preset.c new file mode 100644 index 00000000000000..7908a2bb73c895 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/easy_preset.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_preset.c +/// \brief Preset handling for easy encoder and decoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern bool +lzma_easy_preset(lzma_options_easy *opt_easy, uint32_t preset) +{ + if (lzma_lzma_preset(&opt_easy->opt_lzma, preset)) + return true; + + opt_easy->filters[0].id = LZMA_FILTER_LZMA2; + opt_easy->filters[0].options = &opt_easy->opt_lzma; + opt_easy->filters[1].id = LZMA_VLI_UNKNOWN; + + return false; +} diff --git a/src/native/external/xz/src/liblzma/common/easy_preset.h b/src/native/external/xz/src/liblzma/common/easy_preset.h new file mode 100644 index 00000000000000..4ef6d044ad57a0 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/easy_preset.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_preset.h +/// \brief Preset handling for easy encoder and decoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_EASY_PRESET_H +#define LZMA_EASY_PRESET_H + +#include "common.h" + + +typedef struct { + /// We need to keep the filters array available in case + /// LZMA_FULL_FLUSH is used. + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + /// Options for LZMA2 + lzma_options_lzma opt_lzma; + + // Options for more filters can be added later, so this struct + // is not ready to be put into the public API. + +} lzma_options_easy; + + +/// Set *easy to the settings given by the preset. Returns true on error, +/// false on success. +extern bool lzma_easy_preset(lzma_options_easy *easy, uint32_t preset); + +#endif diff --git a/src/native/external/xz/src/liblzma/common/file_info.c b/src/native/external/xz/src/liblzma/common/file_info.c new file mode 100644 index 00000000000000..5efa73c4471ac5 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/file_info.c @@ -0,0 +1,850 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file file_info.c +/// \brief Decode .xz file information into a lzma_index structure +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index_decoder.h" + + +typedef struct { + enum { + SEQ_MAGIC_BYTES, + SEQ_PADDING_SEEK, + SEQ_PADDING_DECODE, + SEQ_FOOTER, + SEQ_INDEX_INIT, + SEQ_INDEX_DECODE, + SEQ_HEADER_DECODE, + SEQ_HEADER_COMPARE, + } sequence; + + /// Absolute position of in[*in_pos] in the file. All code that + /// modifies *in_pos also updates this. seek_to_pos() needs this + /// to determine if we need to request the application to seek for + /// us or if we can do the seeking internally by adjusting *in_pos. + uint64_t file_cur_pos; + + /// This refers to absolute positions of interesting parts of the + /// input file. Sometimes it points to the *beginning* of a specific + /// field and sometimes to the *end* of a field. The current target + /// position at each moment is explained in the comments. + uint64_t file_target_pos; + + /// Size of the .xz file (from the application). + uint64_t file_size; + + /// Index decoder + lzma_next_coder index_decoder; + + /// Number of bytes remaining in the Index field that is currently + /// being decoded. + lzma_vli index_remaining; + + /// The Index decoder will store the decoded Index in this pointer. + lzma_index *this_index; + + /// Amount of Stream Padding in the current Stream. + lzma_vli stream_padding; + + /// The final combined index is collected here. + lzma_index *combined_index; + + /// Pointer from the application where to store the index information + /// after successful decoding. + lzma_index **dest_index; + + /// Pointer to lzma_stream.seek_pos to be used when returning + /// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed. + uint64_t *external_seek_pos; + + /// Memory usage limit + uint64_t memlimit; + + /// Stream Flags from the very beginning of the file. + lzma_stream_flags first_header_flags; + + /// Stream Flags from Stream Header of the current Stream. + lzma_stream_flags header_flags; + + /// Stream Flags from Stream Footer of the current Stream. + lzma_stream_flags footer_flags; + + size_t temp_pos; + size_t temp_size; + uint8_t temp[8192]; + +} lzma_file_info_coder; + + +/// Copies data from in[*in_pos] into coder->temp until +/// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos +/// in sync with *in_pos. Returns true if more input is needed. +static bool +fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) +{ + coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size, + coder->temp, &coder->temp_pos, coder->temp_size); + return coder->temp_pos < coder->temp_size; +} + + +/// Seeks to the absolute file position specified by target_pos. +/// This tries to do the seeking by only modifying *in_pos, if possible. +/// The main benefit of this is that if one passes the whole file at once +/// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED +/// as all the seeking can be done by adjusting *in_pos in this function. +/// +/// Returns true if an external seek is needed and the caller must return +/// LZMA_SEEK_NEEDED. +static bool +seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos, + size_t in_start, size_t *in_pos, size_t in_size) +{ + // The input buffer doesn't extend beyond the end of the file. + // This has been checked by file_info_decode() already. + assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos); + + const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start); + const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos); + + bool external_seek_needed; + + if (target_pos >= pos_min && target_pos <= pos_max) { + // The requested position is available in the current input + // buffer or right after it. That is, in a corner case we + // end up setting *in_pos == in_size and thus will immediately + // need new input bytes from the application. + *in_pos += (size_t)(target_pos - coder->file_cur_pos); + external_seek_needed = false; + } else { + // Ask the application to seek the input file. + *coder->external_seek_pos = target_pos; + external_seek_needed = true; + + // Mark the whole input buffer as used. This way + // lzma_stream.total_in will have a better estimate + // of the amount of data read. It still won't be perfect + // as the value will depend on the input buffer size that + // the application uses, but it should be good enough for + // those few who want an estimate. + *in_pos = in_size; + } + + // After seeking (internal or external) the current position + // will match the requested target position. + coder->file_cur_pos = target_pos; + + return external_seek_needed; +} + + +/// The caller sets coder->file_target_pos so that it points to the *end* +/// of the desired file position. This function then determines how far +/// backwards from that position we can seek. After seeking fill_temp() +/// can be used to read data into coder->temp. When fill_temp() has finished, +/// coder->temp[coder->temp_size] will match coder->file_target_pos. +/// +/// This also validates that coder->target_file_pos is sane in sense that +/// we aren't trying to seek too far backwards (too close or beyond the +/// beginning of the file). +static lzma_ret +reverse_seek(lzma_file_info_coder *coder, + size_t in_start, size_t *in_pos, size_t in_size) +{ + // Check that there is enough data before the target position + // to contain at least Stream Header and Stream Footer. If there + // isn't, the file cannot be valid. + if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE) + return LZMA_DATA_ERROR; + + coder->temp_pos = 0; + + // The Stream Header at the very beginning of the file gets handled + // specially in SEQ_MAGIC_BYTES and thus we will never need to seek + // there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes + // we avoid a useless external seek after SEQ_MAGIC_BYTES if the + // application uses an extremely small input buffer and the input + // file is very small. + if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE + < sizeof(coder->temp)) + coder->temp_size = (size_t)(coder->file_target_pos + - LZMA_STREAM_HEADER_SIZE); + else + coder->temp_size = sizeof(coder->temp); + + // The above if-statements guarantee this. This is important because + // the Stream Header/Footer decoders assume that there's at least + // LZMA_STREAM_HEADER_SIZE bytes in coder->temp. + assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE); + + if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size, + in_start, in_pos, in_size)) + return LZMA_SEEK_NEEDED; + + return LZMA_OK; +} + + +/// Gets the number of zero-bytes at the end of the buffer. +static size_t +get_padding_size(const uint8_t *buf, size_t buf_size) +{ + size_t padding = 0; + while (buf_size > 0 && buf[--buf_size] == 0x00) + ++padding; + + return padding; +} + + +/// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR +/// is used to tell the application that Magic Bytes didn't match. In other +/// Stream Header/Footer fields (in the middle/end of the file) it could be +/// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there +/// is a valid Stream Header at the beginning of the file. For those cases +/// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR. +static lzma_ret +hide_format_error(lzma_ret ret) +{ + if (ret == LZMA_FORMAT_ERROR) + ret = LZMA_DATA_ERROR; + + return ret; +} + + +/// Calls the Index decoder and updates coder->index_remaining. +/// This is a separate function because the input can be either directly +/// from the application or from coder->temp. +static lzma_ret +decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, bool update_file_cur_pos) +{ + const size_t in_start = *in_pos; + + const lzma_ret ret = coder->index_decoder.code( + coder->index_decoder.coder, + allocator, in, in_pos, in_size, + NULL, NULL, 0, LZMA_RUN); + + coder->index_remaining -= *in_pos - in_start; + + if (update_file_cur_pos) + coder->file_cur_pos += *in_pos - in_start; + + return ret; +} + + +static lzma_ret +file_info_decode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, + uint8_t *restrict out lzma_attribute((__unused__)), + size_t *restrict out_pos lzma_attribute((__unused__)), + size_t out_size lzma_attribute((__unused__)), + lzma_action action lzma_attribute((__unused__))) +{ + lzma_file_info_coder *coder = coder_ptr; + const size_t in_start = *in_pos; + + // If the caller provides input past the end of the file, trim + // the extra bytes from the buffer so that we won't read too far. + assert(coder->file_size >= coder->file_cur_pos); + if (coder->file_size - coder->file_cur_pos < in_size - in_start) + in_size = in_start + + (size_t)(coder->file_size - coder->file_cur_pos); + + while (true) + switch (coder->sequence) { + case SEQ_MAGIC_BYTES: + // Decode the Stream Header at the beginning of the file + // first to check if the Magic Bytes match. The flags + // are stored in coder->first_header_flags so that we + // don't need to seek to it again. + // + // Check that the file is big enough to contain at least + // Stream Header. + if (coder->file_size < LZMA_STREAM_HEADER_SIZE) + return LZMA_FORMAT_ERROR; + + // Read the Stream Header field into coder->temp. + if (fill_temp(coder, in, in_pos, in_size)) + return LZMA_OK; + + // This is the only Stream Header/Footer decoding where we + // want to return LZMA_FORMAT_ERROR if the Magic Bytes don't + // match. Elsewhere it will be converted to LZMA_DATA_ERROR. + return_if_error(lzma_stream_header_decode( + &coder->first_header_flags, coder->temp)); + + // Now that we know that the Magic Bytes match, check the + // file size. It's better to do this here after checking the + // Magic Bytes since this way we can give LZMA_FORMAT_ERROR + // instead of LZMA_DATA_ERROR when the Magic Bytes don't + // match in a file that is too big or isn't a multiple of + // four bytes. + if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3)) + return LZMA_DATA_ERROR; + + // Start looking for Stream Padding and Stream Footer + // at the end of the file. + coder->file_target_pos = coder->file_size; + FALLTHROUGH; + + case SEQ_PADDING_SEEK: + coder->sequence = SEQ_PADDING_DECODE; + return_if_error(reverse_seek( + coder, in_start, in_pos, in_size)); + FALLTHROUGH; + + case SEQ_PADDING_DECODE: { + // Copy to coder->temp first. This keeps the code simpler if + // the application only provides input a few bytes at a time. + if (fill_temp(coder, in, in_pos, in_size)) + return LZMA_OK; + + // Scan the buffer backwards to get the size of the + // Stream Padding field (if any). + const size_t new_padding = get_padding_size( + coder->temp, coder->temp_size); + coder->stream_padding += new_padding; + + // Set the target position to the beginning of Stream Padding + // that has been observed so far. If all Stream Padding has + // been seen, then the target position will be at the end + // of the Stream Footer field. + coder->file_target_pos -= new_padding; + + if (new_padding == coder->temp_size) { + // The whole buffer was padding. Seek backwards in + // the file to get more input. + coder->sequence = SEQ_PADDING_SEEK; + break; + } + + // Size of Stream Padding must be a multiple of 4 bytes. + if (coder->stream_padding & 3) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_FOOTER; + + // Calculate the amount of non-padding data in coder->temp. + coder->temp_size -= new_padding; + coder->temp_pos = coder->temp_size; + + // We can avoid an external seek if the whole Stream Footer + // is already in coder->temp. In that case SEQ_FOOTER won't + // read more input and will find the Stream Footer from + // coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE]. + // + // Otherwise we will need to seek. The seeking is done so + // that Stream Footer will be at the end of coder->temp. + // This way it's likely that we also get a complete Index + // field into coder->temp without needing a separate seek + // for that (unless the Index field is big). + if (coder->temp_size < LZMA_STREAM_HEADER_SIZE) + return_if_error(reverse_seek( + coder, in_start, in_pos, in_size)); + + FALLTHROUGH; + } + + case SEQ_FOOTER: + // Copy the Stream Footer field into coder->temp. + // If Stream Footer was already available in coder->temp + // in SEQ_PADDING_DECODE, then this does nothing. + if (fill_temp(coder, in, in_pos, in_size)) + return LZMA_OK; + + // Make coder->file_target_pos and coder->temp_size point + // to the beginning of Stream Footer and thus to the end + // of the Index field. coder->temp_pos will be updated + // a bit later. + coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE; + coder->temp_size -= LZMA_STREAM_HEADER_SIZE; + + // Decode Stream Footer. + return_if_error(hide_format_error(lzma_stream_footer_decode( + &coder->footer_flags, + coder->temp + coder->temp_size))); + + // Check that we won't seek past the beginning of the file. + // + // LZMA_STREAM_HEADER_SIZE is added because there must be + // space for Stream Header too even though we won't seek + // there before decoding the Index field. + // + // There's no risk of integer overflow here because + // Backward Size cannot be greater than 2^34. + if (coder->file_target_pos < coder->footer_flags.backward_size + + LZMA_STREAM_HEADER_SIZE) + return LZMA_DATA_ERROR; + + // Set the target position to the beginning of the Index field. + coder->file_target_pos -= coder->footer_flags.backward_size; + coder->sequence = SEQ_INDEX_INIT; + + // We can avoid an external seek if the whole Index field is + // already available in coder->temp. + if (coder->temp_size >= coder->footer_flags.backward_size) { + // Set coder->temp_pos to point to the beginning + // of the Index. + coder->temp_pos = coder->temp_size + - (size_t)coder->footer_flags.backward_size; + } else { + // These are set to zero to indicate that there's no + // useful data (Index or anything else) in coder->temp. + coder->temp_pos = 0; + coder->temp_size = 0; + + // Seek to the beginning of the Index field. + if (seek_to_pos(coder, coder->file_target_pos, + in_start, in_pos, in_size)) + return LZMA_SEEK_NEEDED; + } + + FALLTHROUGH; + + case SEQ_INDEX_INIT: { + // Calculate the amount of memory already used by the earlier + // Indexes so that we know how big memory limit to pass to + // the Index decoder. + // + // NOTE: When there are multiple Streams, the separate + // lzma_index structures can use more RAM (as measured by + // lzma_index_memused()) than the final combined lzma_index. + // Thus memlimit may need to be slightly higher than the final + // calculated memory usage will be. This is perhaps a bit + // confusing to the application, but I think it shouldn't + // cause problems in practice. + uint64_t memused = 0; + if (coder->combined_index != NULL) { + memused = lzma_index_memused(coder->combined_index); + assert(memused <= coder->memlimit); + if (memused > coder->memlimit) // Extra sanity check + return LZMA_PROG_ERROR; + } + + // Initialize the Index decoder. + return_if_error(lzma_index_decoder_init( + &coder->index_decoder, allocator, + &coder->this_index, + coder->memlimit - memused)); + + coder->index_remaining = coder->footer_flags.backward_size; + coder->sequence = SEQ_INDEX_DECODE; + FALLTHROUGH; + } + + case SEQ_INDEX_DECODE: { + // Decode (a part of) the Index. If the whole Index is already + // in coder->temp, read it from there. Otherwise read from + // in[*in_pos] onwards. Note that index_decode() updates + // coder->index_remaining and optionally coder->file_cur_pos. + lzma_ret ret; + if (coder->temp_size != 0) { + assert(coder->temp_size - coder->temp_pos + == coder->index_remaining); + ret = decode_index(coder, allocator, coder->temp, + &coder->temp_pos, coder->temp_size, + false); + } else { + // Don't give the decoder more input than the known + // remaining size of the Index field. + size_t in_stop = in_size; + if (in_size - *in_pos > coder->index_remaining) + in_stop = *in_pos + + (size_t)(coder->index_remaining); + + ret = decode_index(coder, allocator, + in, in_pos, in_stop, true); + } + + switch (ret) { + case LZMA_OK: + // If the Index docoder asks for more input when we + // have already given it as much input as Backward Size + // indicated, the file is invalid. + if (coder->index_remaining == 0) + return LZMA_DATA_ERROR; + + // We cannot get here if we were reading Index from + // coder->temp because when reading from coder->temp + // we give the Index decoder exactly + // coder->index_remaining bytes of input. + assert(coder->temp_size == 0); + + return LZMA_OK; + + case LZMA_STREAM_END: + // If the decoding seems to be successful, check also + // that the Index decoder consumed as much input as + // indicated by the Backward Size field. + if (coder->index_remaining != 0) + return LZMA_DATA_ERROR; + + break; + + default: + return ret; + } + + // Calculate how much the Index tells us to seek backwards + // (relative to the beginning of the Index): Total size of + // all Blocks plus the size of the Stream Header field. + // No integer overflow here because lzma_index_total_size() + // cannot return a value greater than LZMA_VLI_MAX. + const uint64_t seek_amount + = lzma_index_total_size(coder->this_index) + + LZMA_STREAM_HEADER_SIZE; + + // Check that Index is sane in sense that seek_amount won't + // make us seek past the beginning of the file when locating + // the Stream Header. + // + // coder->file_target_pos still points to the beginning of + // the Index field. + if (coder->file_target_pos < seek_amount) + return LZMA_DATA_ERROR; + + // Set the target to the beginning of Stream Header. + coder->file_target_pos -= seek_amount; + + if (coder->file_target_pos == 0) { + // We would seek to the beginning of the file, but + // since we already decoded that Stream Header in + // SEQ_MAGIC_BYTES, we can use the cached value from + // coder->first_header_flags to avoid the seek. + coder->header_flags = coder->first_header_flags; + coder->sequence = SEQ_HEADER_COMPARE; + break; + } + + coder->sequence = SEQ_HEADER_DECODE; + + // Make coder->file_target_pos point to the end of + // the Stream Header field. + coder->file_target_pos += LZMA_STREAM_HEADER_SIZE; + + // If coder->temp_size is non-zero, it points to the end + // of the Index field. Then the beginning of the Index + // field is at coder->temp[coder->temp_size + // - coder->footer_flags.backward_size]. + assert(coder->temp_size == 0 || coder->temp_size + >= coder->footer_flags.backward_size); + + // If coder->temp contained the whole Index, see if it has + // enough data to contain also the Stream Header. If so, + // we avoid an external seek. + // + // NOTE: This can happen only with small .xz files and only + // for the non-first Stream as the Stream Flags of the first + // Stream are cached and already handled a few lines above. + // So this isn't as useful as the other seek-avoidance cases. + if (coder->temp_size != 0 && coder->temp_size + - (size_t)coder->footer_flags.backward_size + >= (size_t)seek_amount) { + // Make temp_pos and temp_size point to the *end* of + // Stream Header so that SEQ_HEADER_DECODE will find + // the start of Stream Header from coder->temp[ + // coder->temp_size - LZMA_STREAM_HEADER_SIZE]. + coder->temp_pos = coder->temp_size + - (size_t)coder->footer_flags.backward_size + - (size_t)seek_amount + + LZMA_STREAM_HEADER_SIZE; + coder->temp_size = coder->temp_pos; + } else { + // Seek so that Stream Header will be at the end of + // coder->temp. With typical multi-Stream files we + // will usually also get the Stream Footer and Index + // of the *previous* Stream in coder->temp and thus + // won't need a separate seek for them. + return_if_error(reverse_seek(coder, + in_start, in_pos, in_size)); + } + + FALLTHROUGH; + } + + case SEQ_HEADER_DECODE: + // Copy the Stream Header field into coder->temp. + // If Stream Header was already available in coder->temp + // in SEQ_INDEX_DECODE, then this does nothing. + if (fill_temp(coder, in, in_pos, in_size)) + return LZMA_OK; + + // Make all these point to the beginning of Stream Header. + coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE; + coder->temp_size -= LZMA_STREAM_HEADER_SIZE; + coder->temp_pos = coder->temp_size; + + // Decode the Stream Header. + return_if_error(hide_format_error(lzma_stream_header_decode( + &coder->header_flags, + coder->temp + coder->temp_size))); + + coder->sequence = SEQ_HEADER_COMPARE; + FALLTHROUGH; + + case SEQ_HEADER_COMPARE: + // Compare Stream Header against Stream Footer. They must + // match. + return_if_error(lzma_stream_flags_compare( + &coder->header_flags, &coder->footer_flags)); + + // Store the decoded Stream Flags into the Index. Use the + // Footer Flags because it contains Backward Size, although + // it shouldn't matter in practice. + if (lzma_index_stream_flags(coder->this_index, + &coder->footer_flags) != LZMA_OK) + return LZMA_PROG_ERROR; + + // Store also the size of the Stream Padding field. It is + // needed to calculate the offsets of the Streams correctly. + if (lzma_index_stream_padding(coder->this_index, + coder->stream_padding) != LZMA_OK) + return LZMA_PROG_ERROR; + + // Reset it so that it's ready for the next Stream. + coder->stream_padding = 0; + + // Append the earlier decoded Indexes after this_index. + if (coder->combined_index != NULL) + return_if_error(lzma_index_cat(coder->this_index, + coder->combined_index, allocator)); + + coder->combined_index = coder->this_index; + coder->this_index = NULL; + + // If the whole file was decoded, tell the caller that we + // are finished. + if (coder->file_target_pos == 0) { + // The combined index must indicate the same file + // size as was told to us at initialization. + assert(lzma_index_file_size(coder->combined_index) + == coder->file_size); + + // Make the combined index available to + // the application. + *coder->dest_index = coder->combined_index; + coder->combined_index = NULL; + + // Mark the input buffer as used since we may have + // done internal seeking and thus don't know how + // many input bytes were actually used. This way + // lzma_stream.total_in gets a slightly better + // estimate of the amount of input used. + *in_pos = in_size; + return LZMA_STREAM_END; + } + + // We didn't hit the beginning of the file yet, so continue + // reading backwards in the file. If we have unprocessed + // data in coder->temp, use it before requesting more data + // from the application. + // + // coder->file_target_pos, coder->temp_size, and + // coder->temp_pos all point to the beginning of Stream Header + // and thus the end of the previous Stream in the file. + coder->sequence = coder->temp_size > 0 + ? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK; + break; + + default: + assert(0); + return LZMA_PROG_ERROR; + } +} + + +static lzma_ret +file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + lzma_file_info_coder *coder = coder_ptr; + + // The memory usage calculation comes from three things: + // + // (1) The Indexes that have already been decoded and processed into + // coder->combined_index. + // + // (2) The latest Index in coder->this_index that has been decoded but + // not yet put into coder->combined_index. + // + // (3) The latest Index that we have started decoding but haven't + // finished and thus isn't available in coder->this_index yet. + // Memory usage and limit information needs to be communicated + // from/to coder->index_decoder. + // + // Care has to be taken to not do both (2) and (3) when calculating + // the memory usage. + uint64_t combined_index_memusage = 0; + uint64_t this_index_memusage = 0; + + // (1) If we have already successfully decoded one or more Indexes, + // get their memory usage. + if (coder->combined_index != NULL) + combined_index_memusage = lzma_index_memused( + coder->combined_index); + + // Choose between (2), (3), or neither. + if (coder->this_index != NULL) { + // (2) The latest Index is available. Use its memory usage. + this_index_memusage = lzma_index_memused(coder->this_index); + + } else if (coder->sequence == SEQ_INDEX_DECODE) { + // (3) The Index decoder is activate and hasn't yet stored + // the new index in coder->this_index. Get the memory usage + // information from the Index decoder. + // + // NOTE: If the Index decoder doesn't yet know how much memory + // it will eventually need, it will return a tiny value here. + uint64_t dummy; + if (coder->index_decoder.memconfig(coder->index_decoder.coder, + &this_index_memusage, &dummy, 0) + != LZMA_OK) { + assert(0); + return LZMA_PROG_ERROR; + } + } + + // Now we know the total memory usage/requirement. If we had neither + // old Indexes nor a new Index, this will be zero which isn't + // acceptable as lzma_memusage() has to return non-zero on success + // and even with an empty .xz file we will end up with a lzma_index + // that takes some memory. + *memusage = combined_index_memusage + this_index_memusage; + if (*memusage == 0) + *memusage = lzma_index_memusage(1, 0); + + *old_memlimit = coder->memlimit; + + // If requested, set a new memory usage limit. + if (new_memlimit != 0) { + if (new_memlimit < *memusage) + return LZMA_MEMLIMIT_ERROR; + + // In the condition (3) we need to tell the Index decoder + // its new memory usage limit. + if (coder->this_index == NULL + && coder->sequence == SEQ_INDEX_DECODE) { + const uint64_t idec_new_memlimit = new_memlimit + - combined_index_memusage; + + assert(this_index_memusage > 0); + assert(idec_new_memlimit > 0); + + uint64_t dummy1; + uint64_t dummy2; + + if (coder->index_decoder.memconfig( + coder->index_decoder.coder, + &dummy1, &dummy2, idec_new_memlimit) + != LZMA_OK) { + assert(0); + return LZMA_PROG_ERROR; + } + } + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +static void +file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_file_info_coder *coder = coder_ptr; + + lzma_next_end(&coder->index_decoder, allocator); + lzma_index_end(coder->this_index, allocator); + lzma_index_end(coder->combined_index, allocator); + + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +lzma_file_info_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, uint64_t *seek_pos, + lzma_index **dest_index, + uint64_t memlimit, uint64_t file_size) +{ + lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator); + + if (dest_index == NULL) + return LZMA_PROG_ERROR; + + lzma_file_info_coder *coder = next->coder; + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &file_info_decode; + next->end = &file_info_decoder_end; + next->memconfig = &file_info_decoder_memconfig; + + coder->index_decoder = LZMA_NEXT_CODER_INIT; + coder->this_index = NULL; + coder->combined_index = NULL; + } + + coder->sequence = SEQ_MAGIC_BYTES; + coder->file_cur_pos = 0; + coder->file_target_pos = 0; + coder->file_size = file_size; + + lzma_index_end(coder->this_index, allocator); + coder->this_index = NULL; + + lzma_index_end(coder->combined_index, allocator); + coder->combined_index = NULL; + + coder->stream_padding = 0; + + coder->dest_index = dest_index; + coder->external_seek_pos = seek_pos; + + // If memlimit is 0, make it 1 to ensure that lzma_memlimit_get() + // won't return 0 (which would indicate an error). + coder->memlimit = my_max(1, memlimit); + + // Prepare these for reading the first Stream Header into coder->temp. + coder->temp_pos = 0; + coder->temp_size = LZMA_STREAM_HEADER_SIZE; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index, + uint64_t memlimit, uint64_t file_size) +{ + lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos, + dest_index, memlimit, file_size); + + // We allow LZMA_FINISH in addition to LZMA_RUN for convenience. + // lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED + // combination in a sane way. Applications still need to be careful + // if they use LZMA_FINISH so that they remember to reset it back + // to LZMA_RUN after seeking if needed. + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/filter_buffer_decoder.c b/src/native/external/xz/src/liblzma/common/filter_buffer_decoder.c new file mode 100644 index 00000000000000..cc0d88cc71c183 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/filter_buffer_decoder.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_buffer_decoder.c +/// \brief Single-call raw decoding +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_raw_buffer_decode( + const lzma_filter *filters, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Validate what isn't validated later in filter_common.c. + if (in == NULL || in_pos == NULL || *in_pos > in_size || out == NULL + || out_pos == NULL || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Initialize the decoder. + lzma_next_coder next = LZMA_NEXT_CODER_INIT; + return_if_error(lzma_raw_decoder_init(&next, allocator, filters)); + + // Store the positions so that we can restore them if something + // goes wrong. + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + // Do the actual decoding and free decoder's memory. + lzma_ret ret = next.code(next.coder, allocator, in, in_pos, in_size, + out, out_pos, out_size, LZMA_FINISH); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + if (ret == LZMA_OK) { + // Either the input was truncated or the + // output buffer was too small. + assert(*in_pos == in_size || *out_pos == out_size); + + if (*in_pos != in_size) { + // Since input wasn't consumed completely, + // the output buffer became full and is + // too small. + ret = LZMA_BUF_ERROR; + + } else if (*out_pos != out_size) { + // Since output didn't became full, the input + // has to be truncated. + ret = LZMA_DATA_ERROR; + + } else { + // All the input was consumed and output + // buffer is full. Now we don't immediately + // know the reason for the error. Try + // decoding one more byte. If it succeeds, + // then the output buffer was too small. If + // we cannot get a new output byte, the input + // is truncated. + uint8_t tmp[1]; + size_t tmp_pos = 0; + (void)next.code(next.coder, allocator, + in, in_pos, in_size, + tmp, &tmp_pos, 1, LZMA_FINISH); + + if (tmp_pos == 1) + ret = LZMA_BUF_ERROR; + else + ret = LZMA_DATA_ERROR; + } + } + + // Restore the positions. + *in_pos = in_start; + *out_pos = out_start; + } + + lzma_next_end(&next, allocator); + + return ret; +} diff --git a/src/native/external/xz/src/liblzma/common/filter_buffer_encoder.c b/src/native/external/xz/src/liblzma/common/filter_buffer_encoder.c new file mode 100644 index 00000000000000..7fb8922ae90ef7 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/filter_buffer_encoder.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_buffer_encoder.c +/// \brief Single-call raw encoding +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_encoder.h" + + +extern LZMA_API(lzma_ret) +lzma_raw_buffer_encode( + const lzma_filter *filters, const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Validate what isn't validated later in filter_common.c. + if ((in == NULL && in_size != 0) || out == NULL + || out_pos == NULL || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Initialize the encoder + lzma_next_coder next = LZMA_NEXT_CODER_INIT; + return_if_error(lzma_raw_encoder_init(&next, allocator, filters)); + + // Store the output position so that we can restore it if + // something goes wrong. + const size_t out_start = *out_pos; + + // Do the actual encoding and free coder's memory. + size_t in_pos = 0; + lzma_ret ret = next.code(next.coder, allocator, in, &in_pos, in_size, + out, out_pos, out_size, LZMA_FINISH); + lzma_next_end(&next, allocator); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + if (ret == LZMA_OK) { + // Output buffer was too small. + assert(*out_pos == out_size); + ret = LZMA_BUF_ERROR; + } + + // Restore the output position. + *out_pos = out_start; + } + + return ret; +} diff --git a/src/native/external/xz/src/liblzma/common/filter_common.c b/src/native/external/xz/src/liblzma/common/filter_common.c new file mode 100644 index 00000000000000..6c06c78ddc152b --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/filter_common.c @@ -0,0 +1,393 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_common.c +/// \brief Filter-specific stuff common for both encoder and decoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_common.h" + + +static const struct { + /// Filter ID + lzma_vli id; + + /// Size of the filter-specific options structure + size_t options_size; + + /// True if it is OK to use this filter as non-last filter in + /// the chain. + bool non_last_ok; + + /// True if it is OK to use this filter as the last filter in + /// the chain. + bool last_ok; + + /// True if the filter may change the size of the data (that is, the + /// amount of encoded output can be different than the amount of + /// uncompressed input). + bool changes_size; + +} features[] = { +#if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) + { + .id = LZMA_FILTER_LZMA1, + .options_size = sizeof(lzma_options_lzma), + .non_last_ok = false, + .last_ok = true, + .changes_size = true, + }, + { + .id = LZMA_FILTER_LZMA1EXT, + .options_size = sizeof(lzma_options_lzma), + .non_last_ok = false, + .last_ok = true, + .changes_size = true, + }, +#endif +#if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) + { + .id = LZMA_FILTER_LZMA2, + .options_size = sizeof(lzma_options_lzma), + .non_last_ok = false, + .last_ok = true, + .changes_size = true, + }, +#endif +#if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86) + { + .id = LZMA_FILTER_X86, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC) + { + .id = LZMA_FILTER_POWERPC, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64) + { + .id = LZMA_FILTER_IA64, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM) + { + .id = LZMA_FILTER_ARM, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB) + { + .id = LZMA_FILTER_ARMTHUMB, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64) + { + .id = LZMA_FILTER_ARM64, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC) + { + .id = LZMA_FILTER_SPARC, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_RISCV) || defined(HAVE_DECODER_RISCV) + { + .id = LZMA_FILTER_RISCV, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) + { + .id = LZMA_FILTER_DELTA, + .options_size = sizeof(lzma_options_delta), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif + { + .id = LZMA_VLI_UNKNOWN + } +}; + + +extern LZMA_API(lzma_ret) +lzma_filters_copy(const lzma_filter *src, lzma_filter *real_dest, + const lzma_allocator *allocator) +{ + if (src == NULL || real_dest == NULL) + return LZMA_PROG_ERROR; + + // Use a temporary destination so that the real destination + // will never be modified if an error occurs. + lzma_filter dest[LZMA_FILTERS_MAX + 1]; + + lzma_ret ret; + size_t i; + for (i = 0; src[i].id != LZMA_VLI_UNKNOWN; ++i) { + // There must be a maximum of four filters plus + // the array terminator. + if (i == LZMA_FILTERS_MAX) { + ret = LZMA_OPTIONS_ERROR; + goto error; + } + + dest[i].id = src[i].id; + + if (src[i].options == NULL) { + dest[i].options = NULL; + } else { + // See if the filter is supported only when the + // options is not NULL. This might be convenient + // sometimes if the app is actually copying only + // a partial filter chain with a place holder ID. + // + // When options is not NULL, the Filter ID must be + // supported by us, because otherwise we don't know + // how big the options are. + size_t j; + for (j = 0; src[i].id != features[j].id; ++j) { + if (features[j].id == LZMA_VLI_UNKNOWN) { + ret = LZMA_OPTIONS_ERROR; + goto error; + } + } + + // Allocate and copy the options. + dest[i].options = lzma_alloc(features[j].options_size, + allocator); + if (dest[i].options == NULL) { + ret = LZMA_MEM_ERROR; + goto error; + } + + memcpy(dest[i].options, src[i].options, + features[j].options_size); + } + } + + // Terminate the filter array. + assert(i < LZMA_FILTERS_MAX + 1); + dest[i].id = LZMA_VLI_UNKNOWN; + dest[i].options = NULL; + + // Copy it to the caller-supplied array now that we know that + // no errors occurred. + memcpy(real_dest, dest, (i + 1) * sizeof(lzma_filter)); + + return LZMA_OK; + +error: + // Free the options which we have already allocated. + while (i > 0) + lzma_free(dest[--i].options, allocator); + + return ret; +} + + +extern LZMA_API(void) +lzma_filters_free(lzma_filter *filters, const lzma_allocator *allocator) +{ + if (filters == NULL) + return; + + for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { + if (i == LZMA_FILTERS_MAX) { + // The API says that LZMA_FILTERS_MAX + 1 is the + // maximum allowed size including the terminating + // element. Thus, we should never get here but in + // case there is a bug and we do anyway, don't go + // past the (probable) end of the array. + assert(0); + break; + } + + lzma_free(filters[i].options, allocator); + filters[i].options = NULL; + filters[i].id = LZMA_VLI_UNKNOWN; + } + + return; +} + + +extern lzma_ret +lzma_validate_chain(const lzma_filter *filters, size_t *count) +{ + // There must be at least one filter. + if (filters == NULL || filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_PROG_ERROR; + + // Number of non-last filters that may change the size of the data + // significantly (that is, more than 1-2 % or so). + size_t changes_size_count = 0; + + // True if it is OK to add a new filter after the current filter. + bool non_last_ok = true; + + // True if the last filter in the given chain is actually usable as + // the last filter. Only filters that support embedding End of Payload + // Marker can be used as the last filter in the chain. + bool last_ok = false; + + size_t i = 0; + do { + size_t j; + for (j = 0; filters[i].id != features[j].id; ++j) + if (features[j].id == LZMA_VLI_UNKNOWN) + return LZMA_OPTIONS_ERROR; + + // If the previous filter in the chain cannot be a non-last + // filter, the chain is invalid. + if (!non_last_ok) + return LZMA_OPTIONS_ERROR; + + non_last_ok = features[j].non_last_ok; + last_ok = features[j].last_ok; + changes_size_count += features[j].changes_size; + + } while (filters[++i].id != LZMA_VLI_UNKNOWN); + + // There must be 1-4 filters. The last filter must be usable as + // the last filter in the chain. A maximum of three filters are + // allowed to change the size of the data. + if (i > LZMA_FILTERS_MAX || !last_ok || changes_size_count > 3) + return LZMA_OPTIONS_ERROR; + + *count = i; + return LZMA_OK; +} + + +extern lzma_ret +lzma_raw_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter *options, + lzma_filter_find coder_find, bool is_encoder) +{ + // Do some basic validation and get the number of filters. + size_t count; + return_if_error(lzma_validate_chain(options, &count)); + + // Set the filter functions and copy the options pointer. + lzma_filter_info filters[LZMA_FILTERS_MAX + 1]; + if (is_encoder) { + for (size_t i = 0; i < count; ++i) { + // The order of the filters is reversed in the + // encoder. It allows more efficient handling + // of the uncompressed data. + const size_t j = count - i - 1; + + const lzma_filter_coder *const fc + = coder_find(options[i].id); + if (fc == NULL || fc->init == NULL) + return LZMA_OPTIONS_ERROR; + + filters[j].id = options[i].id; + filters[j].init = fc->init; + filters[j].options = options[i].options; + } + } else { + for (size_t i = 0; i < count; ++i) { + const lzma_filter_coder *const fc + = coder_find(options[i].id); + if (fc == NULL || fc->init == NULL) + return LZMA_OPTIONS_ERROR; + + filters[i].id = options[i].id; + filters[i].init = fc->init; + filters[i].options = options[i].options; + } + } + + // Terminate the array. + filters[count].id = LZMA_VLI_UNKNOWN; + filters[count].init = NULL; + + // Initialize the filters. + const lzma_ret ret = lzma_next_filter_init(next, allocator, filters); + if (ret != LZMA_OK) + lzma_next_end(next, allocator); + + return ret; +} + + +extern uint64_t +lzma_raw_coder_memusage(lzma_filter_find coder_find, + const lzma_filter *filters) +{ + // The chain has to have at least one filter. + { + size_t tmp; + if (lzma_validate_chain(filters, &tmp) != LZMA_OK) + return UINT64_MAX; + } + + uint64_t total = 0; + size_t i = 0; + + do { + const lzma_filter_coder *const fc + = coder_find(filters[i].id); + if (fc == NULL) + return UINT64_MAX; // Unsupported Filter ID + + if (fc->memusage == NULL) { + // This filter doesn't have a function to calculate + // the memory usage and validate the options. Such + // filters need only little memory, so we use 1 KiB + // as a good estimate. They also accept all possible + // options, so there's no need to worry about lack + // of validation. + total += 1024; + } else { + // Call the filter-specific memory usage calculation + // function. + const uint64_t usage + = fc->memusage(filters[i].options); + if (usage == UINT64_MAX) + return UINT64_MAX; // Invalid options + + total += usage; + } + } while (filters[++i].id != LZMA_VLI_UNKNOWN); + + // Add some fixed amount of extra. It's to compensate memory usage + // of Stream, Block etc. coders, malloc() overhead, stack etc. + return total + LZMA_MEMUSAGE_BASE; +} diff --git a/src/native/external/xz/src/liblzma/common/filter_common.h b/src/native/external/xz/src/liblzma/common/filter_common.h new file mode 100644 index 00000000000000..95f9fe27017bf7 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/filter_common.h @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_common.h +/// \brief Filter-specific stuff common for both encoder and decoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FILTER_COMMON_H +#define LZMA_FILTER_COMMON_H + +#include "common.h" + + +/// Both lzma_filter_encoder and lzma_filter_decoder begin with these members. +typedef struct { + /// Filter ID + lzma_vli id; + + /// Initializes the filter encoder and calls lzma_next_filter_init() + /// for filters + 1. + lzma_init_function init; + + /// Calculates memory usage of the encoder. If the options are + /// invalid, UINT64_MAX is returned. + uint64_t (*memusage)(const void *options); + +} lzma_filter_coder; + + +typedef const lzma_filter_coder *(*lzma_filter_find)(lzma_vli id); + + +extern lzma_ret lzma_validate_chain(const lzma_filter *filters, size_t *count); + + +extern lzma_ret lzma_raw_coder_init( + lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter *filters, + lzma_filter_find coder_find, bool is_encoder); + + +extern uint64_t lzma_raw_coder_memusage(lzma_filter_find coder_find, + const lzma_filter *filters); + + +#endif diff --git a/src/native/external/xz/src/liblzma/common/filter_decoder.c b/src/native/external/xz/src/liblzma/common/filter_decoder.c new file mode 100644 index 00000000000000..cbdeb5858f6625 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/filter_decoder.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_decoder.c +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_decoder.h" +#include "filter_common.h" +#include "lzma_decoder.h" +#include "lzma2_decoder.h" +#include "simple_decoder.h" +#include "delta_decoder.h" + + +typedef struct { + /// Filter ID + lzma_vli id; + + /// Initializes the filter encoder and calls lzma_next_filter_init() + /// for filters + 1. + lzma_init_function init; + + /// Calculates memory usage of the encoder. If the options are + /// invalid, UINT64_MAX is returned. + uint64_t (*memusage)(const void *options); + + /// Decodes Filter Properties. + /// + /// \return - LZMA_OK: Properties decoded successfully. + /// - LZMA_OPTIONS_ERROR: Unsupported properties + /// - LZMA_MEM_ERROR: Memory allocation failed. + lzma_ret (*props_decode)( + void **options, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +} lzma_filter_decoder; + + +static const lzma_filter_decoder decoders[] = { +#ifdef HAVE_DECODER_LZMA1 + { + .id = LZMA_FILTER_LZMA1, + .init = &lzma_lzma_decoder_init, + .memusage = &lzma_lzma_decoder_memusage, + .props_decode = &lzma_lzma_props_decode, + }, + { + .id = LZMA_FILTER_LZMA1EXT, + .init = &lzma_lzma_decoder_init, + .memusage = &lzma_lzma_decoder_memusage, + .props_decode = &lzma_lzma_props_decode, + }, +#endif +#ifdef HAVE_DECODER_LZMA2 + { + .id = LZMA_FILTER_LZMA2, + .init = &lzma_lzma2_decoder_init, + .memusage = &lzma_lzma2_decoder_memusage, + .props_decode = &lzma_lzma2_props_decode, + }, +#endif +#ifdef HAVE_DECODER_X86 + { + .id = LZMA_FILTER_X86, + .init = &lzma_simple_x86_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_POWERPC + { + .id = LZMA_FILTER_POWERPC, + .init = &lzma_simple_powerpc_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_IA64 + { + .id = LZMA_FILTER_IA64, + .init = &lzma_simple_ia64_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_ARM + { + .id = LZMA_FILTER_ARM, + .init = &lzma_simple_arm_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_ARMTHUMB + { + .id = LZMA_FILTER_ARMTHUMB, + .init = &lzma_simple_armthumb_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_ARM64 + { + .id = LZMA_FILTER_ARM64, + .init = &lzma_simple_arm64_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_SPARC + { + .id = LZMA_FILTER_SPARC, + .init = &lzma_simple_sparc_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_RISCV + { + .id = LZMA_FILTER_RISCV, + .init = &lzma_simple_riscv_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_DELTA + { + .id = LZMA_FILTER_DELTA, + .init = &lzma_delta_decoder_init, + .memusage = &lzma_delta_coder_memusage, + .props_decode = &lzma_delta_props_decode, + }, +#endif +}; + + +static const lzma_filter_decoder * +decoder_find(lzma_vli id) +{ + for (size_t i = 0; i < ARRAY_SIZE(decoders); ++i) + if (decoders[i].id == id) + return decoders + i; + + return NULL; +} + + +// lzma_filter_coder begins with the same members as lzma_filter_decoder. +// This function is a wrapper with a type that is compatible with the +// typedef of lzma_filter_find in filter_common.h. +static const lzma_filter_coder * +coder_find(lzma_vli id) +{ + return (const lzma_filter_coder *)decoder_find(id); +} + + +extern LZMA_API(lzma_bool) +lzma_filter_decoder_is_supported(lzma_vli id) +{ + return decoder_find(id) != NULL; +} + + +extern lzma_ret +lzma_raw_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter *options) +{ + return lzma_raw_coder_init(next, allocator, + options, &coder_find, false); +} + + +extern LZMA_API(lzma_ret) +lzma_raw_decoder(lzma_stream *strm, const lzma_filter *options) +{ + lzma_next_strm_init(lzma_raw_decoder_init, strm, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +extern LZMA_API(uint64_t) +lzma_raw_decoder_memusage(const lzma_filter *filters) +{ + return lzma_raw_coder_memusage(&coder_find, filters); +} + + +extern LZMA_API(lzma_ret) +lzma_properties_decode(lzma_filter *filter, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + // Make it always NULL so that the caller can always safely free() it. + filter->options = NULL; + + const lzma_filter_decoder *const fd = decoder_find(filter->id); + if (fd == NULL) + return LZMA_OPTIONS_ERROR; + + if (fd->props_decode == NULL) + return props_size == 0 ? LZMA_OK : LZMA_OPTIONS_ERROR; + + return fd->props_decode( + &filter->options, allocator, props, props_size); +} diff --git a/src/native/external/xz/src/liblzma/common/filter_decoder.h b/src/native/external/xz/src/liblzma/common/filter_decoder.h new file mode 100644 index 00000000000000..e610bc1f44ec24 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/filter_decoder.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_decoder.h +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FILTER_DECODER_H +#define LZMA_FILTER_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_raw_decoder_init( + lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter *options); + +#endif diff --git a/src/native/external/xz/src/liblzma/common/filter_encoder.c b/src/native/external/xz/src/liblzma/common/filter_encoder.c new file mode 100644 index 00000000000000..bc394448985a08 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/filter_encoder.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_decoder.c +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_encoder.h" +#include "filter_common.h" +#include "lzma_encoder.h" +#include "lzma2_encoder.h" +#include "simple_encoder.h" +#include "delta_encoder.h" + + +typedef struct { + /// Filter ID + lzma_vli id; + + /// Initializes the filter encoder and calls lzma_next_filter_init() + /// for filters + 1. + lzma_init_function init; + + /// Calculates memory usage of the encoder. If the options are + /// invalid, UINT64_MAX is returned. + uint64_t (*memusage)(const void *options); + + /// Calculates the recommended Uncompressed Size for .xz Blocks to + /// which the input data can be split to make multithreaded + /// encoding possible. If this is NULL, it is assumed that + /// the encoder is fast enough with single thread. If the options + /// are invalid, UINT64_MAX is returned. + uint64_t (*block_size)(const void *options); + + /// Tells the size of the Filter Properties field. If options are + /// invalid, LZMA_OPTIONS_ERROR is returned and size is set to + /// UINT32_MAX. + lzma_ret (*props_size_get)(uint32_t *size, const void *options); + + /// Some filters will always have the same size Filter Properties + /// field. If props_size_get is NULL, this value is used. + uint32_t props_size_fixed; + + /// Encodes Filter Properties. + /// + /// \return - LZMA_OK: Properties encoded successfully. + /// - LZMA_OPTIONS_ERROR: Unsupported options + /// - LZMA_PROG_ERROR: Invalid options or not enough + /// output space + lzma_ret (*props_encode)(const void *options, uint8_t *out); + +} lzma_filter_encoder; + + +static const lzma_filter_encoder encoders[] = { +#ifdef HAVE_ENCODER_LZMA1 + { + .id = LZMA_FILTER_LZMA1, + .init = &lzma_lzma_encoder_init, + .memusage = &lzma_lzma_encoder_memusage, + .block_size = NULL, // Not needed for LZMA1 + .props_size_get = NULL, + .props_size_fixed = 5, + .props_encode = &lzma_lzma_props_encode, + }, + { + .id = LZMA_FILTER_LZMA1EXT, + .init = &lzma_lzma_encoder_init, + .memusage = &lzma_lzma_encoder_memusage, + .block_size = NULL, // Not needed for LZMA1 + .props_size_get = NULL, + .props_size_fixed = 5, + .props_encode = &lzma_lzma_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_LZMA2 + { + .id = LZMA_FILTER_LZMA2, + .init = &lzma_lzma2_encoder_init, + .memusage = &lzma_lzma2_encoder_memusage, + .block_size = &lzma_lzma2_block_size, + .props_size_get = NULL, + .props_size_fixed = 1, + .props_encode = &lzma_lzma2_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_X86 + { + .id = LZMA_FILTER_X86, + .init = &lzma_simple_x86_encoder_init, + .memusage = NULL, + .block_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_POWERPC + { + .id = LZMA_FILTER_POWERPC, + .init = &lzma_simple_powerpc_encoder_init, + .memusage = NULL, + .block_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_IA64 + { + .id = LZMA_FILTER_IA64, + .init = &lzma_simple_ia64_encoder_init, + .memusage = NULL, + .block_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_ARM + { + .id = LZMA_FILTER_ARM, + .init = &lzma_simple_arm_encoder_init, + .memusage = NULL, + .block_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_ARMTHUMB + { + .id = LZMA_FILTER_ARMTHUMB, + .init = &lzma_simple_armthumb_encoder_init, + .memusage = NULL, + .block_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_ARM64 + { + .id = LZMA_FILTER_ARM64, + .init = &lzma_simple_arm64_encoder_init, + .memusage = NULL, + .block_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_SPARC + { + .id = LZMA_FILTER_SPARC, + .init = &lzma_simple_sparc_encoder_init, + .memusage = NULL, + .block_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_RISCV + { + .id = LZMA_FILTER_RISCV, + .init = &lzma_simple_riscv_encoder_init, + .memusage = NULL, + .block_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_DELTA + { + .id = LZMA_FILTER_DELTA, + .init = &lzma_delta_encoder_init, + .memusage = &lzma_delta_coder_memusage, + .block_size = NULL, + .props_size_get = NULL, + .props_size_fixed = 1, + .props_encode = &lzma_delta_props_encode, + }, +#endif +}; + + +static const lzma_filter_encoder * +encoder_find(lzma_vli id) +{ + for (size_t i = 0; i < ARRAY_SIZE(encoders); ++i) + if (encoders[i].id == id) + return encoders + i; + + return NULL; +} + + +// lzma_filter_coder begins with the same members as lzma_filter_encoder. +// This function is a wrapper with a type that is compatible with the +// typedef of lzma_filter_find in filter_common.h. +static const lzma_filter_coder * +coder_find(lzma_vli id) +{ + return (const lzma_filter_coder *)encoder_find(id); +} + + +extern LZMA_API(lzma_bool) +lzma_filter_encoder_is_supported(lzma_vli id) +{ + return encoder_find(id) != NULL; +} + + +extern LZMA_API(lzma_ret) +lzma_filters_update(lzma_stream *strm, const lzma_filter *filters) +{ + if (strm->internal->next.update == NULL) + return LZMA_PROG_ERROR; + + // Validate the filter chain. + if (lzma_raw_encoder_memusage(filters) == UINT64_MAX) + return LZMA_OPTIONS_ERROR; + + // The actual filter chain in the encoder is reversed. Some things + // still want the normal order chain, so we provide both. + size_t count = 1; + while (filters[count].id != LZMA_VLI_UNKNOWN) + ++count; + + lzma_filter reversed_filters[LZMA_FILTERS_MAX + 1]; + for (size_t i = 0; i < count; ++i) + reversed_filters[count - i - 1] = filters[i]; + + reversed_filters[count].id = LZMA_VLI_UNKNOWN; + + return strm->internal->next.update(strm->internal->next.coder, + strm->allocator, filters, reversed_filters); +} + + +extern lzma_ret +lzma_raw_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter *filters) +{ + return lzma_raw_coder_init(next, allocator, + filters, &coder_find, true); +} + + +extern LZMA_API(lzma_ret) +lzma_raw_encoder(lzma_stream *strm, const lzma_filter *filters) +{ + lzma_next_strm_init(lzma_raw_coder_init, strm, filters, + &coder_find, true); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +extern LZMA_API(uint64_t) +lzma_raw_encoder_memusage(const lzma_filter *filters) +{ + return lzma_raw_coder_memusage(&coder_find, filters); +} + + +extern LZMA_API(uint64_t) +lzma_mt_block_size(const lzma_filter *filters) +{ + if (filters == NULL) + return UINT64_MAX; + + uint64_t max = 0; + + for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { + const lzma_filter_encoder *const fe + = encoder_find(filters[i].id); + if (fe == NULL) + return UINT64_MAX; + + if (fe->block_size != NULL) { + const uint64_t size + = fe->block_size(filters[i].options); + if (size > max) + max = size; + } + } + + return max == 0 ? UINT64_MAX : max; +} + + +extern LZMA_API(lzma_ret) +lzma_properties_size(uint32_t *size, const lzma_filter *filter) +{ + const lzma_filter_encoder *const fe = encoder_find(filter->id); + if (fe == NULL) { + // Unknown filter - if the Filter ID is a proper VLI, + // return LZMA_OPTIONS_ERROR instead of LZMA_PROG_ERROR, + // because it's possible that we just don't have support + // compiled in for the requested filter. + return filter->id <= LZMA_VLI_MAX + ? LZMA_OPTIONS_ERROR : LZMA_PROG_ERROR; + } + + if (fe->props_size_get == NULL) { + // No props_size_get() function, use props_size_fixed. + *size = fe->props_size_fixed; + return LZMA_OK; + } + + return fe->props_size_get(size, filter->options); +} + + +extern LZMA_API(lzma_ret) +lzma_properties_encode(const lzma_filter *filter, uint8_t *props) +{ + const lzma_filter_encoder *const fe = encoder_find(filter->id); + if (fe == NULL) + return LZMA_PROG_ERROR; + + if (fe->props_encode == NULL) + return LZMA_OK; + + return fe->props_encode(filter->options, props); +} diff --git a/src/native/external/xz/src/liblzma/common/filter_encoder.h b/src/native/external/xz/src/liblzma/common/filter_encoder.h new file mode 100644 index 00000000000000..88f2dafa43b046 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/filter_encoder.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_encoder.h +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FILTER_ENCODER_H +#define LZMA_FILTER_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_raw_encoder_init( + lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter *filters); + +#endif diff --git a/src/native/external/xz/src/liblzma/common/filter_flags_decoder.c b/src/native/external/xz/src/liblzma/common/filter_flags_decoder.c new file mode 100644 index 00000000000000..594c434b41466d --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/filter_flags_decoder.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_flags_decoder.c +/// \brief Decodes a Filter Flags field +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_filter_flags_decode( + lzma_filter *filter, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) +{ + // Set the pointer to NULL so the caller can always safely free it. + filter->options = NULL; + + // Filter ID + return_if_error(lzma_vli_decode(&filter->id, NULL, + in, in_pos, in_size)); + + if (filter->id >= LZMA_FILTER_RESERVED_START) + return LZMA_DATA_ERROR; + + // Size of Properties + lzma_vli props_size; + return_if_error(lzma_vli_decode(&props_size, NULL, + in, in_pos, in_size)); + + // Filter Properties + if (in_size - *in_pos < (size_t)props_size) + return LZMA_DATA_ERROR; + + const lzma_ret ret = lzma_properties_decode( + filter, allocator, in + *in_pos, (size_t)props_size); + + *in_pos += (size_t)props_size; + + return ret; +} diff --git a/src/native/external/xz/src/liblzma/common/filter_flags_encoder.c b/src/native/external/xz/src/liblzma/common/filter_flags_encoder.c new file mode 100644 index 00000000000000..e1d65884fb0bd1 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/filter_flags_encoder.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_flags_encoder.c +/// \brief Encodes a Filter Flags field +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_encoder.h" + + +extern LZMA_API(lzma_ret) +lzma_filter_flags_size(uint32_t *size, const lzma_filter *filter) +{ + if (filter->id >= LZMA_FILTER_RESERVED_START) + return LZMA_PROG_ERROR; + + return_if_error(lzma_properties_size(size, filter)); + + *size += lzma_vli_size(filter->id) + lzma_vli_size(*size); + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_filter_flags_encode(const lzma_filter *filter, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Filter ID + if (filter->id >= LZMA_FILTER_RESERVED_START) + return LZMA_PROG_ERROR; + + return_if_error(lzma_vli_encode(filter->id, NULL, + out, out_pos, out_size)); + + // Size of Properties + uint32_t props_size; + return_if_error(lzma_properties_size(&props_size, filter)); + return_if_error(lzma_vli_encode(props_size, NULL, + out, out_pos, out_size)); + + // Filter Properties + if (out_size - *out_pos < props_size) + return LZMA_PROG_ERROR; + + return_if_error(lzma_properties_encode(filter, out + *out_pos)); + + *out_pos += props_size; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/hardware_cputhreads.c b/src/native/external/xz/src/liblzma/common/hardware_cputhreads.c new file mode 100644 index 00000000000000..4ce852b42c3da3 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/hardware_cputhreads.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware_cputhreads.c +/// \brief Get the number of CPU threads or cores +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + +#include "tuklib_cpucores.h" + + +#ifdef HAVE_SYMBOL_VERSIONS_LINUX +// This is for compatibility with binaries linked against liblzma that +// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7. +LZMA_SYMVER_API("lzma_cputhreads@XZ_5.2.2", + uint32_t, lzma_cputhreads_522)(void) lzma_nothrow + __attribute__((__alias__("lzma_cputhreads_52"))); + +LZMA_SYMVER_API("lzma_cputhreads@@XZ_5.2", + uint32_t, lzma_cputhreads_52)(void) lzma_nothrow; + +#define lzma_cputhreads lzma_cputhreads_52 +#endif +extern LZMA_API(uint32_t) +lzma_cputhreads(void) +{ + return tuklib_cpucores(); +} diff --git a/src/native/external/xz/src/liblzma/common/hardware_physmem.c b/src/native/external/xz/src/liblzma/common/hardware_physmem.c new file mode 100644 index 00000000000000..1bc34864e849e0 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/hardware_physmem.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware_physmem.c +/// \brief Get the total amount of physical memory (RAM) +// +// Author: Jonathan Nieder +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + +#include "tuklib_physmem.h" + + +extern LZMA_API(uint64_t) +lzma_physmem(void) +{ + // It is simpler to make lzma_physmem() a wrapper for + // tuklib_physmem() than to hack appropriate symbol visibility + // support for the tuklib modules. + return tuklib_physmem(); +} diff --git a/src/native/external/xz/src/liblzma/common/index.c b/src/native/external/xz/src/liblzma/common/index.c new file mode 100644 index 00000000000000..0b81e9322ad60c --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/index.c @@ -0,0 +1,1268 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index.c +/// \brief Handling of .xz Indexes and some other Stream information +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" +#include "stream_flags_common.h" + + +/// \brief How many Records to allocate at once +/// +/// This should be big enough to avoid making lots of tiny allocations +/// but small enough to avoid too much unused memory at once. +#define INDEX_GROUP_SIZE 512 + + +/// \brief How many Records can be allocated at once at maximum +#define PREALLOC_MAX ((SIZE_MAX - sizeof(index_group)) / sizeof(index_record)) + + +/// \brief Base structure for index_stream and index_group structures +typedef struct index_tree_node_s index_tree_node; +struct index_tree_node_s { + /// Uncompressed start offset of this Stream (relative to the + /// beginning of the file) or Block (relative to the beginning + /// of the Stream) + lzma_vli uncompressed_base; + + /// Compressed start offset of this Stream or Block + lzma_vli compressed_base; + + index_tree_node *parent; + index_tree_node *left; + index_tree_node *right; +}; + + +/// \brief AVL tree to hold index_stream or index_group structures +typedef struct { + /// Root node + index_tree_node *root; + + /// Leftmost node. Since the tree will be filled sequentially, + /// this won't change after the first node has been added to + /// the tree. + index_tree_node *leftmost; + + /// The rightmost node in the tree. Since the tree is filled + /// sequentially, this is always the node where to add the new data. + index_tree_node *rightmost; + + /// Number of nodes in the tree + uint32_t count; + +} index_tree; + + +typedef struct { + lzma_vli uncompressed_sum; + lzma_vli unpadded_sum; +} index_record; + + +typedef struct { + /// Every Record group is part of index_stream.groups tree. + index_tree_node node; + + /// Number of Blocks in this Stream before this group. + lzma_vli number_base; + + /// Number of Records that can be put in records[]. + size_t allocated; + + /// Index of the last Record in use. + size_t last; + + /// The sizes in this array are stored as cumulative sums relative + /// to the beginning of the Stream. This makes it possible to + /// use binary search in lzma_index_locate(). + /// + /// Note that the cumulative summing is done specially for + /// unpadded_sum: The previous value is rounded up to the next + /// multiple of four before adding the Unpadded Size of the new + /// Block. The total encoded size of the Blocks in the Stream + /// is records[last].unpadded_sum in the last Record group of + /// the Stream. + /// + /// For example, if the Unpadded Sizes are 39, 57, and 81, the + /// stored values are 39, 97 (40 + 57), and 181 (100 + 181). + /// The total encoded size of these Blocks is 184. + /// + /// This is a flexible array, because it makes easy to optimize + /// memory usage in case someone concatenates many Streams that + /// have only one or few Blocks. + index_record records[]; + +} index_group; + + +typedef struct { + /// Every index_stream is a node in the tree of Streams. + index_tree_node node; + + /// Number of this Stream (first one is 1) + uint32_t number; + + /// Total number of Blocks before this Stream + lzma_vli block_number_base; + + /// Record groups of this Stream are stored in a tree. + /// It's a T-tree with AVL-tree balancing. There are + /// INDEX_GROUP_SIZE Records per node by default. + /// This keeps the number of memory allocations reasonable + /// and finding a Record is fast. + index_tree groups; + + /// Number of Records in this Stream + lzma_vli record_count; + + /// Size of the List of Records field in this Stream. This is used + /// together with record_count to calculate the size of the Index + /// field and thus the total size of the Stream. + lzma_vli index_list_size; + + /// Stream Flags of this Stream. This is meaningful only if + /// the Stream Flags have been told us with lzma_index_stream_flags(). + /// Initially stream_flags.version is set to UINT32_MAX to indicate + /// that the Stream Flags are unknown. + lzma_stream_flags stream_flags; + + /// Amount of Stream Padding after this Stream. This defaults to + /// zero and can be set with lzma_index_stream_padding(). + lzma_vli stream_padding; + +} index_stream; + + +struct lzma_index_s { + /// AVL-tree containing the Stream(s). Often there is just one + /// Stream, but using a tree keeps lookups fast even when there + /// are many concatenated Streams. + index_tree streams; + + /// Uncompressed size of all the Blocks in the Stream(s) + lzma_vli uncompressed_size; + + /// Total size of all the Blocks in the Stream(s) + lzma_vli total_size; + + /// Total number of Records in all Streams in this lzma_index + lzma_vli record_count; + + /// Size of the List of Records field if all the Streams in this + /// lzma_index were packed into a single Stream (makes it simpler to + /// take many .xz files and combine them into a single Stream). + /// + /// This value together with record_count is needed to calculate + /// Backward Size that is stored into Stream Footer. + lzma_vli index_list_size; + + /// How many Records to allocate at once in lzma_index_append(). + /// This defaults to INDEX_GROUP_SIZE but can be overridden with + /// lzma_index_prealloc(). + size_t prealloc; + + /// Bitmask indicating what integrity check types have been used + /// as set by lzma_index_stream_flags(). The bit of the last Stream + /// is not included here, since it is possible to change it by + /// calling lzma_index_stream_flags() again. + uint32_t checks; +}; + + +static void +index_tree_init(index_tree *tree) +{ + tree->root = NULL; + tree->leftmost = NULL; + tree->rightmost = NULL; + tree->count = 0; + return; +} + + +/// Helper for index_tree_end() +static void +index_tree_node_end(index_tree_node *node, const lzma_allocator *allocator, + void (*free_func)(void *node, const lzma_allocator *allocator)) +{ + // The tree won't ever be very huge, so recursion should be fine. + // 20 levels in the tree is likely quite a lot already in practice. + if (node->left != NULL) + index_tree_node_end(node->left, allocator, free_func); + + if (node->right != NULL) + index_tree_node_end(node->right, allocator, free_func); + + free_func(node, allocator); + return; +} + + +/// Free the memory allocated for a tree. Each node is freed using the +/// given free_func which is either &lzma_free or &index_stream_end. +/// The latter is used to free the Record groups from each index_stream +/// before freeing the index_stream itself. +static void +index_tree_end(index_tree *tree, const lzma_allocator *allocator, + void (*free_func)(void *node, const lzma_allocator *allocator)) +{ + assert(free_func != NULL); + + if (tree->root != NULL) + index_tree_node_end(tree->root, allocator, free_func); + + return; +} + + +/// Add a new node to the tree. node->uncompressed_base and +/// node->compressed_base must have been set by the caller already. +static void +index_tree_append(index_tree *tree, index_tree_node *node) +{ + node->parent = tree->rightmost; + node->left = NULL; + node->right = NULL; + + ++tree->count; + + // Handle the special case of adding the first node. + if (tree->root == NULL) { + tree->root = node; + tree->leftmost = node; + tree->rightmost = node; + return; + } + + // The tree is always filled sequentially. + assert(tree->rightmost->uncompressed_base <= node->uncompressed_base); + assert(tree->rightmost->compressed_base < node->compressed_base); + + // Add the new node after the rightmost node. It's the correct + // place due to the reason above. + tree->rightmost->right = node; + tree->rightmost = node; + + // Balance the AVL-tree if needed. We don't need to keep the balance + // factors in nodes, because we always fill the tree sequentially, + // and thus know the state of the tree just by looking at the node + // count. From the node count we can calculate how many steps to go + // up in the tree to find the rotation root. + uint32_t up = tree->count ^ (UINT32_C(1) << bsr32(tree->count)); + if (up != 0) { + // Locate the root node for the rotation. + up = ctz32(tree->count) + 2; + do { + node = node->parent; + } while (--up > 0); + + // Rotate left using node as the rotation root. + index_tree_node *pivot = node->right; + + if (node->parent == NULL) { + tree->root = pivot; + } else { + assert(node->parent->right == node); + node->parent->right = pivot; + } + + pivot->parent = node->parent; + + node->right = pivot->left; + if (node->right != NULL) + node->right->parent = node; + + pivot->left = node; + node->parent = pivot; + } + + return; +} + + +/// Get the next node in the tree. Return NULL if there are no more nodes. +static void * +index_tree_next(const index_tree_node *node) +{ + if (node->right != NULL) { + node = node->right; + while (node->left != NULL) + node = node->left; + + return (void *)(node); + } + + while (node->parent != NULL && node->parent->right == node) + node = node->parent; + + return (void *)(node->parent); +} + + +/// Locate a node that contains the given uncompressed offset. It is +/// caller's job to check that target is not bigger than the uncompressed +/// size of the tree (the last node would be returned in that case still). +static void * +index_tree_locate(const index_tree *tree, lzma_vli target) +{ + const index_tree_node *result = NULL; + const index_tree_node *node = tree->root; + + assert(tree->leftmost == NULL + || tree->leftmost->uncompressed_base == 0); + + // Consecutive nodes may have the same uncompressed_base. + // We must pick the rightmost one. + while (node != NULL) { + if (node->uncompressed_base > target) { + node = node->left; + } else { + result = node; + node = node->right; + } + } + + return (void *)(result); +} + + +/// Allocate and initialize a new Stream using the given base offsets. +static index_stream * +index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base, + uint32_t stream_number, lzma_vli block_number_base, + const lzma_allocator *allocator) +{ + index_stream *s = lzma_alloc(sizeof(index_stream), allocator); + if (s == NULL) + return NULL; + + s->node.uncompressed_base = uncompressed_base; + s->node.compressed_base = compressed_base; + s->node.parent = NULL; + s->node.left = NULL; + s->node.right = NULL; + + s->number = stream_number; + s->block_number_base = block_number_base; + + index_tree_init(&s->groups); + + s->record_count = 0; + s->index_list_size = 0; + s->stream_flags.version = UINT32_MAX; + s->stream_padding = 0; + + return s; +} + + +/// Free the memory allocated for a Stream and its Record groups. +static void +index_stream_end(void *node, const lzma_allocator *allocator) +{ + index_stream *s = node; + index_tree_end(&s->groups, allocator, &lzma_free); + lzma_free(s, allocator); + return; +} + + +static lzma_index * +index_init_plain(const lzma_allocator *allocator) +{ + lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); + if (i != NULL) { + index_tree_init(&i->streams); + i->uncompressed_size = 0; + i->total_size = 0; + i->record_count = 0; + i->index_list_size = 0; + i->prealloc = INDEX_GROUP_SIZE; + i->checks = 0; + } + + return i; +} + + +extern LZMA_API(lzma_index *) +lzma_index_init(const lzma_allocator *allocator) +{ + lzma_index *i = index_init_plain(allocator); + if (i == NULL) + return NULL; + + index_stream *s = index_stream_init(0, 0, 1, 0, allocator); + if (s == NULL) { + lzma_free(i, allocator); + return NULL; + } + + index_tree_append(&i->streams, &s->node); + + return i; +} + + +extern LZMA_API(void) +lzma_index_end(lzma_index *i, const lzma_allocator *allocator) +{ + // NOTE: If you modify this function, check also the bottom + // of lzma_index_cat(). + if (i != NULL) { + index_tree_end(&i->streams, allocator, &index_stream_end); + lzma_free(i, allocator); + } + + return; +} + + +extern void +lzma_index_prealloc(lzma_index *i, lzma_vli records) +{ + if (records > PREALLOC_MAX) + records = PREALLOC_MAX; + + i->prealloc = (size_t)(records); + return; +} + + +extern LZMA_API(uint64_t) +lzma_index_memusage(lzma_vli streams, lzma_vli blocks) +{ + // This calculates an upper bound that is only a little bit + // bigger than the exact maximum memory usage with the given + // parameters. + + // Typical malloc() overhead is 2 * sizeof(void *) but we take + // a little bit extra just in case. Using LZMA_MEMUSAGE_BASE + // instead would give too inaccurate estimate. + const size_t alloc_overhead = 4 * sizeof(void *); + + // Amount of memory needed for each Stream base structures. + // We assume that every Stream has at least one Block and + // thus at least one group. + const size_t stream_base = sizeof(index_stream) + + sizeof(index_group) + 2 * alloc_overhead; + + // Amount of memory needed per group. + const size_t group_base = sizeof(index_group) + + INDEX_GROUP_SIZE * sizeof(index_record) + + alloc_overhead; + + // Number of groups. There may actually be more, but that overhead + // has been taken into account in stream_base already. + const lzma_vli groups + = (blocks + INDEX_GROUP_SIZE - 1) / INDEX_GROUP_SIZE; + + // Memory used by index_stream and index_group structures. + const uint64_t streams_mem = streams * stream_base; + const uint64_t groups_mem = groups * group_base; + + // Memory used by the base structure. + const uint64_t index_base = sizeof(lzma_index) + alloc_overhead; + + // Validate the arguments and catch integer overflows. + // Maximum number of Streams is "only" UINT32_MAX, because + // that limit is used by the tree containing the Streams. + const uint64_t limit = UINT64_MAX - index_base; + if (streams == 0 || streams > UINT32_MAX || blocks > LZMA_VLI_MAX + || streams > limit / stream_base + || groups > limit / group_base + || limit - streams_mem < groups_mem) + return UINT64_MAX; + + return index_base + streams_mem + groups_mem; +} + + +extern LZMA_API(uint64_t) +lzma_index_memused(const lzma_index *i) +{ + return lzma_index_memusage(i->streams.count, i->record_count); +} + + +extern LZMA_API(lzma_vli) +lzma_index_block_count(const lzma_index *i) +{ + return i->record_count; +} + + +extern LZMA_API(lzma_vli) +lzma_index_stream_count(const lzma_index *i) +{ + return i->streams.count; +} + + +extern LZMA_API(lzma_vli) +lzma_index_size(const lzma_index *i) +{ + return index_size(i->record_count, i->index_list_size); +} + + +extern LZMA_API(lzma_vli) +lzma_index_total_size(const lzma_index *i) +{ + return i->total_size; +} + + +extern LZMA_API(lzma_vli) +lzma_index_stream_size(const lzma_index *i) +{ + // Stream Header + Blocks + Index + Stream Footer + return LZMA_STREAM_HEADER_SIZE + i->total_size + + index_size(i->record_count, i->index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + + +static lzma_vli +index_file_size(lzma_vli compressed_base, lzma_vli unpadded_sum, + lzma_vli record_count, lzma_vli index_list_size, + lzma_vli stream_padding) +{ + // Earlier Streams and Stream Paddings + Stream Header + // + Blocks + Index + Stream Footer + Stream Padding + // + // This might go over LZMA_VLI_MAX due to too big unpadded_sum + // when this function is used in lzma_index_append(). + lzma_vli file_size = compressed_base + 2 * LZMA_STREAM_HEADER_SIZE + + stream_padding + vli_ceil4(unpadded_sum); + if (file_size > LZMA_VLI_MAX) + return LZMA_VLI_UNKNOWN; + + // The same applies here. + file_size += index_size(record_count, index_list_size); + if (file_size > LZMA_VLI_MAX) + return LZMA_VLI_UNKNOWN; + + return file_size; +} + + +extern LZMA_API(lzma_vli) +lzma_index_file_size(const lzma_index *i) +{ + const index_stream *s = (const index_stream *)(i->streams.rightmost); + const index_group *g = (const index_group *)(s->groups.rightmost); + return index_file_size(s->node.compressed_base, + g == NULL ? 0 : g->records[g->last].unpadded_sum, + s->record_count, s->index_list_size, + s->stream_padding); +} + + +extern LZMA_API(lzma_vli) +lzma_index_uncompressed_size(const lzma_index *i) +{ + return i->uncompressed_size; +} + + +extern LZMA_API(uint32_t) +lzma_index_checks(const lzma_index *i) +{ + uint32_t checks = i->checks; + + // Get the type of the Check of the last Stream too. + const index_stream *s = (const index_stream *)(i->streams.rightmost); + if (s->stream_flags.version != UINT32_MAX) + checks |= UINT32_C(1) << s->stream_flags.check; + + return checks; +} + + +extern uint32_t +lzma_index_padding_size(const lzma_index *i) +{ + return (LZMA_VLI_C(4) - index_size_unpadded( + i->record_count, i->index_list_size)) & 3; +} + + +extern LZMA_API(lzma_ret) +lzma_index_stream_flags(lzma_index *i, const lzma_stream_flags *stream_flags) +{ + if (i == NULL || stream_flags == NULL) + return LZMA_PROG_ERROR; + + // Validate the Stream Flags. + return_if_error(lzma_stream_flags_compare( + stream_flags, stream_flags)); + + index_stream *s = (index_stream *)(i->streams.rightmost); + s->stream_flags = *stream_flags; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_stream_padding(lzma_index *i, lzma_vli stream_padding) +{ + if (i == NULL || stream_padding > LZMA_VLI_MAX + || (stream_padding & 3) != 0) + return LZMA_PROG_ERROR; + + index_stream *s = (index_stream *)(i->streams.rightmost); + + // Check that the new value won't make the file grow too big. + const lzma_vli old_stream_padding = s->stream_padding; + s->stream_padding = 0; + if (lzma_index_file_size(i) + stream_padding > LZMA_VLI_MAX) { + s->stream_padding = old_stream_padding; + return LZMA_DATA_ERROR; + } + + s->stream_padding = stream_padding; + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_append(lzma_index *i, const lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) +{ + // Validate. + if (i == NULL || unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX + || uncompressed_size > LZMA_VLI_MAX) + return LZMA_PROG_ERROR; + + index_stream *s = (index_stream *)(i->streams.rightmost); + index_group *g = (index_group *)(s->groups.rightmost); + + const lzma_vli compressed_base = g == NULL ? 0 + : vli_ceil4(g->records[g->last].unpadded_sum); + const lzma_vli uncompressed_base = g == NULL ? 0 + : g->records[g->last].uncompressed_sum; + const uint32_t index_list_size_add = lzma_vli_size(unpadded_size) + + lzma_vli_size(uncompressed_size); + + // Check that uncompressed size will not overflow. + if (uncompressed_base + uncompressed_size > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + + // Check that the new unpadded sum will not overflow. This is + // checked again in index_file_size(), but the unpadded sum is + // passed to vli_ceil4() which expects a valid lzma_vli value. + if (compressed_base + unpadded_size > UNPADDED_SIZE_MAX) + return LZMA_DATA_ERROR; + + // Check that the file size will stay within limits. + if (index_file_size(s->node.compressed_base, + compressed_base + unpadded_size, s->record_count + 1, + s->index_list_size + index_list_size_add, + s->stream_padding) == LZMA_VLI_UNKNOWN) + return LZMA_DATA_ERROR; + + // The size of the Index field must not exceed the maximum value + // that can be stored in the Backward Size field. + if (index_size(i->record_count + 1, + i->index_list_size + index_list_size_add) + > LZMA_BACKWARD_SIZE_MAX) + return LZMA_DATA_ERROR; + + if (g != NULL && g->last + 1 < g->allocated) { + // There is space in the last group at least for one Record. + ++g->last; + } else { + // We need to allocate a new group. + g = lzma_alloc(sizeof(index_group) + + i->prealloc * sizeof(index_record), + allocator); + if (g == NULL) + return LZMA_MEM_ERROR; + + g->last = 0; + g->allocated = i->prealloc; + + // Reset prealloc so that if the application happens to + // add new Records, the allocation size will be sane. + i->prealloc = INDEX_GROUP_SIZE; + + // Set the start offsets of this group. + g->node.uncompressed_base = uncompressed_base; + g->node.compressed_base = compressed_base; + g->number_base = s->record_count + 1; + + // Add the new group to the Stream. + index_tree_append(&s->groups, &g->node); + } + + // Add the new Record to the group. + g->records[g->last].uncompressed_sum + = uncompressed_base + uncompressed_size; + g->records[g->last].unpadded_sum + = compressed_base + unpadded_size; + + // Update the totals. + ++s->record_count; + s->index_list_size += index_list_size_add; + + i->total_size += vli_ceil4(unpadded_size); + i->uncompressed_size += uncompressed_size; + ++i->record_count; + i->index_list_size += index_list_size_add; + + return LZMA_OK; +} + + +/// Structure to pass info to index_cat_helper() +typedef struct { + /// Uncompressed size of the destination + lzma_vli uncompressed_size; + + /// Compressed file size of the destination + lzma_vli file_size; + + /// Same as above but for Block numbers + lzma_vli block_number_add; + + /// Number of Streams that were in the destination index before we + /// started appending new Streams from the source index. This is + /// used to fix the Stream numbering. + uint32_t stream_number_add; + + /// Destination index' Stream tree + index_tree *streams; + +} index_cat_info; + + +/// Add the Stream nodes from the source index to dest using recursion. +/// Simplest iterative traversal of the source tree wouldn't work, because +/// we update the pointers in nodes when moving them to the destination tree. +static void +index_cat_helper(const index_cat_info *info, index_stream *this) +{ + index_stream *left = (index_stream *)(this->node.left); + index_stream *right = (index_stream *)(this->node.right); + + if (left != NULL) + index_cat_helper(info, left); + + this->node.uncompressed_base += info->uncompressed_size; + this->node.compressed_base += info->file_size; + this->number += info->stream_number_add; + this->block_number_base += info->block_number_add; + index_tree_append(info->streams, &this->node); + + if (right != NULL) + index_cat_helper(info, right); + + return; +} + + +extern LZMA_API(lzma_ret) +lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, + const lzma_allocator *allocator) +{ + if (dest == NULL || src == NULL) + return LZMA_PROG_ERROR; + + const lzma_vli dest_file_size = lzma_index_file_size(dest); + + // Check that we don't exceed the file size limits. + if (dest_file_size + lzma_index_file_size(src) > LZMA_VLI_MAX + || dest->uncompressed_size + src->uncompressed_size + > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + + // Check that the encoded size of the combined lzma_indexes stays + // within limits. In theory, this should be done only if we know + // that the user plans to actually combine the Streams and thus + // construct a single Index (probably rare). However, exceeding + // this limit is quite theoretical, so we do this check always + // to simplify things elsewhere. + { + const lzma_vli dest_size = index_size_unpadded( + dest->record_count, dest->index_list_size); + const lzma_vli src_size = index_size_unpadded( + src->record_count, src->index_list_size); + if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX) + return LZMA_DATA_ERROR; + } + + // Optimize the last group to minimize memory usage. Allocation has + // to be done before modifying dest or src. + { + index_stream *s = (index_stream *)(dest->streams.rightmost); + index_group *g = (index_group *)(s->groups.rightmost); + if (g != NULL && g->last + 1 < g->allocated) { + assert(g->node.left == NULL); + assert(g->node.right == NULL); + + index_group *newg = lzma_alloc(sizeof(index_group) + + (g->last + 1) + * sizeof(index_record), + allocator); + if (newg == NULL) + return LZMA_MEM_ERROR; + + newg->node = g->node; + newg->allocated = g->last + 1; + newg->last = g->last; + newg->number_base = g->number_base; + + memcpy(newg->records, g->records, newg->allocated + * sizeof(index_record)); + + if (g->node.parent != NULL) { + assert(g->node.parent->right == &g->node); + g->node.parent->right = &newg->node; + } + + if (s->groups.leftmost == &g->node) { + assert(s->groups.root == &g->node); + s->groups.leftmost = &newg->node; + s->groups.root = &newg->node; + } + + assert(s->groups.rightmost == &g->node); + s->groups.rightmost = &newg->node; + + lzma_free(g, allocator); + + // NOTE: newg isn't leaked here because + // newg == (void *)&newg->node. + } + } + + // dest->checks includes the check types of all except the last Stream + // in dest. Set the bit for the check type of the last Stream now so + // that it won't get lost when Stream(s) from src are appended to dest. + dest->checks = lzma_index_checks(dest); + + // Add all the Streams from src to dest. Update the base offsets + // of each Stream from src. + const index_cat_info info = { + .uncompressed_size = dest->uncompressed_size, + .file_size = dest_file_size, + .stream_number_add = dest->streams.count, + .block_number_add = dest->record_count, + .streams = &dest->streams, + }; + index_cat_helper(&info, (index_stream *)(src->streams.root)); + + // Update info about all the combined Streams. + dest->uncompressed_size += src->uncompressed_size; + dest->total_size += src->total_size; + dest->record_count += src->record_count; + dest->index_list_size += src->index_list_size; + dest->checks |= src->checks; + + // There's nothing else left in src than the base structure. + lzma_free(src, allocator); + + return LZMA_OK; +} + + +/// Duplicate an index_stream. +static index_stream * +index_dup_stream(const index_stream *src, const lzma_allocator *allocator) +{ + // Catch a somewhat theoretical integer overflow. + if (src->record_count > (lzma_vli)PREALLOC_MAX) + return NULL; + + // Allocate and initialize a new Stream. + index_stream *dest = index_stream_init(src->node.compressed_base, + src->node.uncompressed_base, src->number, + src->block_number_base, allocator); + if (dest == NULL) + return NULL; + + // Copy the overall information. + dest->record_count = src->record_count; + dest->index_list_size = src->index_list_size; + dest->stream_flags = src->stream_flags; + dest->stream_padding = src->stream_padding; + + // Return if there are no groups to duplicate. + if (src->groups.leftmost == NULL) + return dest; + + // Allocate memory for the Records. We put all the Records into + // a single group. It's simplest and also tends to make + // lzma_index_locate() a little bit faster with very big Indexes. + index_group *destg = lzma_alloc(sizeof(index_group) + + (size_t)src->record_count * sizeof(index_record), + allocator); + if (destg == NULL) { + index_stream_end(dest, allocator); + return NULL; + } + + // Initialize destg. + destg->node.uncompressed_base = 0; + destg->node.compressed_base = 0; + destg->number_base = 1; + destg->allocated = (size_t)src->record_count; + destg->last = (size_t)src->record_count - 1; + + // Go through all the groups in src and copy the Records into destg. + const index_group *srcg = (const index_group *)(src->groups.leftmost); + size_t i = 0; + do { + memcpy(destg->records + i, srcg->records, + (srcg->last + 1) * sizeof(index_record)); + i += srcg->last + 1; + srcg = index_tree_next(&srcg->node); + } while (srcg != NULL); + + assert(i == destg->allocated); + + // Add the group to the new Stream. + index_tree_append(&dest->groups, &destg->node); + + return dest; +} + + +extern LZMA_API(lzma_index *) +lzma_index_dup(const lzma_index *src, const lzma_allocator *allocator) +{ + // Allocate the base structure (no initial Stream). + lzma_index *dest = index_init_plain(allocator); + if (dest == NULL) + return NULL; + + // Copy the totals. + dest->uncompressed_size = src->uncompressed_size; + dest->total_size = src->total_size; + dest->record_count = src->record_count; + dest->index_list_size = src->index_list_size; + + // Copy the Streams and the groups in them. + const index_stream *srcstream + = (const index_stream *)(src->streams.leftmost); + do { + index_stream *deststream = index_dup_stream( + srcstream, allocator); + if (deststream == NULL) { + lzma_index_end(dest, allocator); + return NULL; + } + + index_tree_append(&dest->streams, &deststream->node); + + srcstream = index_tree_next(&srcstream->node); + } while (srcstream != NULL); + + return dest; +} + + +/// Indexing for lzma_index_iter.internal[] +enum { + ITER_INDEX, + ITER_STREAM, + ITER_GROUP, + ITER_RECORD, + ITER_METHOD, +}; + + +/// Values for lzma_index_iter.internal[ITER_METHOD].s +enum { + ITER_METHOD_NORMAL, + ITER_METHOD_NEXT, + ITER_METHOD_LEFTMOST, +}; + + +static void +iter_set_info(lzma_index_iter *iter) +{ + const lzma_index *i = iter->internal[ITER_INDEX].p; + const index_stream *stream = iter->internal[ITER_STREAM].p; + const index_group *group = iter->internal[ITER_GROUP].p; + const size_t record = iter->internal[ITER_RECORD].s; + + // lzma_index_iter.internal must not contain a pointer to the last + // group in the index, because that may be reallocated by + // lzma_index_cat(). + if (group == NULL) { + // There are no groups. + assert(stream->groups.root == NULL); + iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST; + + } else if (i->streams.rightmost != &stream->node + || stream->groups.rightmost != &group->node) { + // The group is not not the last group in the index. + iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL; + + } else if (stream->groups.leftmost != &group->node) { + // The group isn't the only group in the Stream, thus we + // know that it must have a parent group i.e. it's not + // the root node. + assert(stream->groups.root != &group->node); + assert(group->node.parent->right == &group->node); + iter->internal[ITER_METHOD].s = ITER_METHOD_NEXT; + iter->internal[ITER_GROUP].p = group->node.parent; + + } else { + // The Stream has only one group. + assert(stream->groups.root == &group->node); + assert(group->node.parent == NULL); + iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST; + iter->internal[ITER_GROUP].p = NULL; + } + + // NOTE: lzma_index_iter.stream.number is lzma_vli but we use uint32_t + // internally. + iter->stream.number = stream->number; + iter->stream.block_count = stream->record_count; + iter->stream.compressed_offset = stream->node.compressed_base; + iter->stream.uncompressed_offset = stream->node.uncompressed_base; + + // iter->stream.flags will be NULL if the Stream Flags haven't been + // set with lzma_index_stream_flags(). + iter->stream.flags = stream->stream_flags.version == UINT32_MAX + ? NULL : &stream->stream_flags; + iter->stream.padding = stream->stream_padding; + + if (stream->groups.rightmost == NULL) { + // Stream has no Blocks. + iter->stream.compressed_size = index_size(0, 0) + + 2 * LZMA_STREAM_HEADER_SIZE; + iter->stream.uncompressed_size = 0; + } else { + const index_group *g = (const index_group *)( + stream->groups.rightmost); + + // Stream Header + Stream Footer + Index + Blocks + iter->stream.compressed_size = 2 * LZMA_STREAM_HEADER_SIZE + + index_size(stream->record_count, + stream->index_list_size) + + vli_ceil4(g->records[g->last].unpadded_sum); + iter->stream.uncompressed_size + = g->records[g->last].uncompressed_sum; + } + + if (group != NULL) { + iter->block.number_in_stream = group->number_base + record; + iter->block.number_in_file = iter->block.number_in_stream + + stream->block_number_base; + + iter->block.compressed_stream_offset + = record == 0 ? group->node.compressed_base + : vli_ceil4(group->records[ + record - 1].unpadded_sum); + iter->block.uncompressed_stream_offset + = record == 0 ? group->node.uncompressed_base + : group->records[record - 1].uncompressed_sum; + + iter->block.uncompressed_size + = group->records[record].uncompressed_sum + - iter->block.uncompressed_stream_offset; + iter->block.unpadded_size + = group->records[record].unpadded_sum + - iter->block.compressed_stream_offset; + iter->block.total_size = vli_ceil4(iter->block.unpadded_size); + + iter->block.compressed_stream_offset + += LZMA_STREAM_HEADER_SIZE; + + iter->block.compressed_file_offset + = iter->block.compressed_stream_offset + + iter->stream.compressed_offset; + iter->block.uncompressed_file_offset + = iter->block.uncompressed_stream_offset + + iter->stream.uncompressed_offset; + } + + return; +} + + +extern LZMA_API(void) +lzma_index_iter_init(lzma_index_iter *iter, const lzma_index *i) +{ + iter->internal[ITER_INDEX].p = i; + lzma_index_iter_rewind(iter); + return; +} + + +extern LZMA_API(void) +lzma_index_iter_rewind(lzma_index_iter *iter) +{ + iter->internal[ITER_STREAM].p = NULL; + iter->internal[ITER_GROUP].p = NULL; + iter->internal[ITER_RECORD].s = 0; + iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL; + return; +} + + +extern LZMA_API(lzma_bool) +lzma_index_iter_next(lzma_index_iter *iter, lzma_index_iter_mode mode) +{ + // Catch unsupported mode values. + if ((unsigned int)(mode) > LZMA_INDEX_ITER_NONEMPTY_BLOCK) + return true; + + const lzma_index *i = iter->internal[ITER_INDEX].p; + const index_stream *stream = iter->internal[ITER_STREAM].p; + const index_group *group = NULL; + size_t record = iter->internal[ITER_RECORD].s; + + // If we are being asked for the next Stream, leave group to NULL + // so that the rest of the this function thinks that this Stream + // has no groups and will thus go to the next Stream. + if (mode != LZMA_INDEX_ITER_STREAM) { + // Get the pointer to the current group. See iter_set_inf() + // for explanation. + switch (iter->internal[ITER_METHOD].s) { + case ITER_METHOD_NORMAL: + group = iter->internal[ITER_GROUP].p; + break; + + case ITER_METHOD_NEXT: + group = index_tree_next(iter->internal[ITER_GROUP].p); + break; + + case ITER_METHOD_LEFTMOST: + group = (const index_group *)( + stream->groups.leftmost); + break; + } + } + +again: + if (stream == NULL) { + // We at the beginning of the lzma_index. + // Locate the first Stream. + stream = (const index_stream *)(i->streams.leftmost); + if (mode >= LZMA_INDEX_ITER_BLOCK) { + // Since we are being asked to return information + // about the first a Block, skip Streams that have + // no Blocks. + while (stream->groups.leftmost == NULL) { + stream = index_tree_next(&stream->node); + if (stream == NULL) + return true; + } + } + + // Start from the first Record in the Stream. + group = (const index_group *)(stream->groups.leftmost); + record = 0; + + } else if (group != NULL && record < group->last) { + // The next Record is in the same group. + ++record; + + } else { + // This group has no more Records or this Stream has + // no Blocks at all. + record = 0; + + // If group is not NULL, this Stream has at least one Block + // and thus at least one group. Find the next group. + if (group != NULL) + group = index_tree_next(&group->node); + + if (group == NULL) { + // This Stream has no more Records. Find the next + // Stream. If we are being asked to return information + // about a Block, we skip empty Streams. + do { + stream = index_tree_next(&stream->node); + if (stream == NULL) + return true; + } while (mode >= LZMA_INDEX_ITER_BLOCK + && stream->groups.leftmost == NULL); + + group = (const index_group *)( + stream->groups.leftmost); + } + } + + if (mode == LZMA_INDEX_ITER_NONEMPTY_BLOCK) { + // We need to look for the next Block again if this Block + // is empty. + if (record == 0) { + if (group->node.uncompressed_base + == group->records[0].uncompressed_sum) + goto again; + } else if (group->records[record - 1].uncompressed_sum + == group->records[record].uncompressed_sum) { + goto again; + } + } + + iter->internal[ITER_STREAM].p = stream; + iter->internal[ITER_GROUP].p = group; + iter->internal[ITER_RECORD].s = record; + + iter_set_info(iter); + + return false; +} + + +extern LZMA_API(lzma_bool) +lzma_index_iter_locate(lzma_index_iter *iter, lzma_vli target) +{ + const lzma_index *i = iter->internal[ITER_INDEX].p; + + // If the target is past the end of the file, return immediately. + if (i->uncompressed_size <= target) + return true; + + // Locate the Stream containing the target offset. + const index_stream *stream = index_tree_locate(&i->streams, target); + assert(stream != NULL); + target -= stream->node.uncompressed_base; + + // Locate the group containing the target offset. + const index_group *group = index_tree_locate(&stream->groups, target); + assert(group != NULL); + + // Use binary search to locate the exact Record. It is the first + // Record whose uncompressed_sum is greater than target. + // This is because we want the rightmost Record that fulfills the + // search criterion. It is possible that there are empty Blocks; + // we don't want to return them. + size_t left = 0; + size_t right = group->last; + + while (left < right) { + const size_t pos = left + (right - left) / 2; + if (group->records[pos].uncompressed_sum <= target) + left = pos + 1; + else + right = pos; + } + + iter->internal[ITER_STREAM].p = stream; + iter->internal[ITER_GROUP].p = group; + iter->internal[ITER_RECORD].s = left; + + iter_set_info(iter); + + return false; +} diff --git a/src/native/external/xz/src/liblzma/common/index.h b/src/native/external/xz/src/liblzma/common/index.h new file mode 100644 index 00000000000000..007e1188f25956 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/index.h @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index.h +/// \brief Handling of Index +/// \note This header file does not include common.h or lzma.h because +/// this file is needed by both liblzma internally and by the +/// tests. Including common.h will include and define many things +/// the tests do not need and prevents issues with header file +/// include order. This way, if lzma.h or common.h are not +/// included before this file it will break on every OS instead +/// of causing more subtle errors. +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INDEX_H +#define LZMA_INDEX_H + + +/// Minimum Unpadded Size +#define UNPADDED_SIZE_MIN LZMA_VLI_C(5) + +/// Maximum Unpadded Size +#define UNPADDED_SIZE_MAX (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) + +/// Index Indicator based on xz specification +#define INDEX_INDICATOR 0 + + +/// Get the size of the Index Padding field. This is needed by Index encoder +/// and decoder, but applications should have no use for this. +extern uint32_t lzma_index_padding_size(const lzma_index *i); + + +/// Set for how many Records to allocate memory the next time +/// lzma_index_append() needs to allocate space for a new Record. +/// This is used only by the Index decoder. +extern void lzma_index_prealloc(lzma_index *i, lzma_vli records); + + +/// Round the variable-length integer to the next multiple of four. +static inline lzma_vli +vli_ceil4(lzma_vli vli) +{ + assert(vli <= UNPADDED_SIZE_MAX); + return (vli + 3) & ~LZMA_VLI_C(3); +} + + +/// Calculate the size of the Index field excluding Index Padding +static inline lzma_vli +index_size_unpadded(lzma_vli count, lzma_vli index_list_size) +{ + // Index Indicator + Number of Records + List of Records + CRC32 + return 1 + lzma_vli_size(count) + index_list_size + 4; +} + + +/// Calculate the size of the Index field including Index Padding +static inline lzma_vli +index_size(lzma_vli count, lzma_vli index_list_size) +{ + return vli_ceil4(index_size_unpadded(count, index_list_size)); +} + + +/// Calculate the total size of the Stream +static inline lzma_vli +index_stream_size(lzma_vli blocks_size, + lzma_vli count, lzma_vli index_list_size) +{ + return LZMA_STREAM_HEADER_SIZE + blocks_size + + index_size(count, index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + +#endif diff --git a/src/native/external/xz/src/liblzma/common/index_decoder.c b/src/native/external/xz/src/liblzma/common/index_decoder.c new file mode 100644 index 00000000000000..4eab56d942e1c4 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/index_decoder.c @@ -0,0 +1,369 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_decoder.c +/// \brief Decodes the Index field +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index_decoder.h" +#include "check.h" + + +typedef struct { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_MEMUSAGE, + SEQ_UNPADDED, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Memory usage limit + uint64_t memlimit; + + /// Target Index + lzma_index *index; + + /// Pointer give by the application, which is set after + /// successful decoding. + lzma_index **index_ptr; + + /// Number of Records left to decode. + lzma_vli count; + + /// The most recent Unpadded Size field + lzma_vli unpadded_size; + + /// The most recent Uncompressed Size field + lzma_vli uncompressed_size; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +} lzma_index_coder; + + +static lzma_ret +index_decode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, + uint8_t *restrict out lzma_attribute((__unused__)), + size_t *restrict out_pos lzma_attribute((__unused__)), + size_t out_size lzma_attribute((__unused__)), + lzma_action action lzma_attribute((__unused__))) +{ + lzma_index_coder *coder = coder_ptr; + + // Similar optimization as in index_encoder.c + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or + // LZMA_FORMAT_ERROR, because a typical usage case for Index + // decoder is when parsing the Stream backwards. If seeking + // backward from the Stream Footer gives us something that + // doesn't begin with Index Indicator, the file is considered + // corrupt, not "programming error" or "unrecognized file + // format". One could argue that the application should + // verify the Index Indicator before trying to decode the + // Index, but well, I suppose it is simpler this way. + if (in[(*in_pos)++] != INDEX_INDICATOR) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: + ret = lzma_vli_decode(&coder->count, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + coder->pos = 0; + coder->sequence = SEQ_MEMUSAGE; + FALLTHROUGH; + + case SEQ_MEMUSAGE: + if (lzma_index_memusage(1, coder->count) > coder->memlimit) { + ret = LZMA_MEMLIMIT_ERROR; + goto out; + } + + // Tell the Index handling code how many Records this + // Index has to allow it to allocate memory more efficiently. + lzma_index_prealloc(coder->index, coder->count); + + ret = LZMA_OK; + coder->sequence = coder->count == 0 + ? SEQ_PADDING_INIT : SEQ_UNPADDED; + break; + + case SEQ_UNPADDED: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = coder->sequence == SEQ_UNPADDED + ? &coder->unpadded_size + : &coder->uncompressed_size; + + ret = lzma_vli_decode(size, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + if (coder->sequence == SEQ_UNPADDED) { + // Validate that encoded Unpadded Size isn't too small + // or too big. + if (coder->unpadded_size < UNPADDED_SIZE_MIN + || coder->unpadded_size + > UNPADDED_SIZE_MAX) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_UNCOMPRESSED; + } else { + // Add the decoded Record to the Index. + return_if_error(lzma_index_append( + coder->index, allocator, + coder->unpadded_size, + coder->uncompressed_size)); + + // Check if this was the last Record. + coder->sequence = --coder->count == 0 + ? SEQ_PADDING_INIT + : SEQ_UNPADDED; + } + + break; + } + + case SEQ_PADDING_INIT: + coder->pos = lzma_index_padding_size(coder->index); + coder->sequence = SEQ_PADDING; + FALLTHROUGH; + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + FALLTHROUGH; + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) + != in[(*in_pos)++]) { +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + return LZMA_DATA_ERROR; +#endif + } + + } while (++coder->pos < 4); + + // Decoding was successful, now we can let the application + // see the decoded Index. + *coder->index_ptr = coder->index; + + // Make index NULL so we don't free it unintentionally. + coder->index = NULL; + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32. + // + // Avoid null pointer + 0 (undefined behavior) in "in + in_start". + // In such a case we had no input and thus in_used == 0. + { + const size_t in_used = *in_pos - in_start; + if (in_used > 0) + coder->crc32 = lzma_crc32(in + in_start, + in_used, coder->crc32); + } + + return ret; +} + + +static void +index_decoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_index_coder *coder = coder_ptr; + lzma_index_end(coder->index, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +index_decoder_memconfig(void *coder_ptr, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + lzma_index_coder *coder = coder_ptr; + + *memusage = lzma_index_memusage(1, coder->count); + *old_memlimit = coder->memlimit; + + if (new_memlimit != 0) { + if (new_memlimit < *memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +static lzma_ret +index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator, + lzma_index **i, uint64_t memlimit) +{ + // Remember the pointer given by the application. We will set it + // to point to the decoded Index only if decoding is successful. + // Before that, keep it NULL so that applications can always safely + // pass it to lzma_index_end() no matter did decoding succeed or not. + coder->index_ptr = i; + *i = NULL; + + // We always allocate a new lzma_index. + coder->index = lzma_index_init(allocator); + if (coder->index == NULL) + return LZMA_MEM_ERROR; + + // Initialize the rest. + coder->sequence = SEQ_INDICATOR; + coder->memlimit = my_max(1, memlimit); + coder->count = 0; // Needs to be initialized due to _memconfig(). + coder->pos = 0; + coder->crc32 = 0; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + lzma_index **i, uint64_t memlimit) +{ + lzma_next_coder_init(&lzma_index_decoder_init, next, allocator); + + if (i == NULL) + return LZMA_PROG_ERROR; + + lzma_index_coder *coder = next->coder; + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_index_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &index_decode; + next->end = &index_decoder_end; + next->memconfig = &index_decoder_memconfig; + coder->index = NULL; + } else { + lzma_index_end(coder->index, allocator); + } + + return index_decoder_reset(coder, allocator, i, memlimit); +} + + +extern LZMA_API(lzma_ret) +lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit) +{ + // If i isn't NULL, *i must always be initialized due to + // the wording in the API docs. This way it is initialized + // if we return LZMA_PROG_ERROR due to strm == NULL. + if (i != NULL) + *i = NULL; + + lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit, + const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) +{ + // If i isn't NULL, *i must always be initialized due to + // the wording in the API docs. + if (i != NULL) + *i = NULL; + + // Sanity checks + if (i == NULL || memlimit == NULL + || in == NULL || in_pos == NULL || *in_pos > in_size) + return LZMA_PROG_ERROR; + + // Initialize the decoder. + lzma_index_coder coder; + return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit)); + + // Store the input start position so that we can restore it in case + // of an error. + const size_t in_start = *in_pos; + + // Do the actual decoding. + lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size, + NULL, NULL, 0, LZMA_RUN); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + // Something went wrong, free the Index structure and restore + // the input position. + lzma_index_end(coder.index, allocator); + *in_pos = in_start; + + if (ret == LZMA_OK) { + // The input is truncated or otherwise corrupt. + // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR + // like lzma_vli_decode() does in single-call mode. + ret = LZMA_DATA_ERROR; + + } else if (ret == LZMA_MEMLIMIT_ERROR) { + // Tell the caller how much memory would have + // been needed. + *memlimit = lzma_index_memusage(1, coder.count); + } + } + + return ret; +} diff --git a/src/native/external/xz/src/liblzma/common/index_decoder.h b/src/native/external/xz/src/liblzma/common/index_decoder.h new file mode 100644 index 00000000000000..5351d2f0dfa4b8 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/index_decoder.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_decoder.h +/// \brief Decodes the Index field +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INDEX_DECODER_H +#define LZMA_INDEX_DECODER_H + +#include "common.h" +#include "index.h" + + +extern lzma_ret lzma_index_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + lzma_index **i, uint64_t memlimit); + + +#endif diff --git a/src/native/external/xz/src/liblzma/common/index_encoder.c b/src/native/external/xz/src/liblzma/common/index_encoder.c new file mode 100644 index 00000000000000..80f1be1e3aea70 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/index_encoder.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_encoder.c +/// \brief Encodes the Index field +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index_encoder.h" +#include "index.h" +#include "check.h" + + +typedef struct { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_UNPADDED, + SEQ_UNCOMPRESSED, + SEQ_NEXT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Index being encoded + const lzma_index *index; + + /// Iterator for the Index being encoded + lzma_index_iter iter; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +} lzma_index_coder; + + +static lzma_ret +index_encode(void *coder_ptr, + const lzma_allocator *allocator lzma_attribute((__unused__)), + const uint8_t *restrict in lzma_attribute((__unused__)), + size_t *restrict in_pos lzma_attribute((__unused__)), + size_t in_size lzma_attribute((__unused__)), + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, + lzma_action action lzma_attribute((__unused__))) +{ + lzma_index_coder *coder = coder_ptr; + + // Position where to start calculating CRC32. The idea is that we + // need to call lzma_crc32() only once per call to index_encode(). + const size_t out_start = *out_pos; + + // Return value to use if we return at the end of this function. + // We use "goto out" to jump out of the while-switch construct + // instead of returning directly, because that way we don't need + // to copypaste the lzma_crc32() call to many places. + lzma_ret ret = LZMA_OK; + + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + out[*out_pos] = INDEX_INDICATOR; + ++*out_pos; + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + const lzma_vli count = lzma_index_block_count(coder->index); + ret = lzma_vli_encode(count, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + coder->sequence = SEQ_NEXT; + break; + } + + case SEQ_NEXT: + if (lzma_index_iter_next( + &coder->iter, LZMA_INDEX_ITER_BLOCK)) { + // Get the size of the Index Padding field. + coder->pos = lzma_index_padding_size(coder->index); + assert(coder->pos <= 3); + coder->sequence = SEQ_PADDING; + break; + } + + coder->sequence = SEQ_UNPADDED; + FALLTHROUGH; + + case SEQ_UNPADDED: + case SEQ_UNCOMPRESSED: { + const lzma_vli size = coder->sequence == SEQ_UNPADDED + ? coder->iter.block.unpadded_size + : coder->iter.block.uncompressed_size; + + ret = lzma_vli_encode(size, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + // Advance to SEQ_UNCOMPRESSED or SEQ_NEXT. + ++coder->sequence; + break; + } + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + out[(*out_pos)++] = 0x00; + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(out + out_start, + *out_pos - out_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + FALLTHROUGH; + + case SEQ_CRC32: + // We don't use the main loop, because we don't want + // coder->crc32 to be touched anymore. + do { + if (*out_pos == out_size) + return LZMA_OK; + + out[*out_pos] = (coder->crc32 >> (coder->pos * 8)) + & 0xFF; + ++*out_pos; + + } while (++coder->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32. + // + // Avoid null pointer + 0 (undefined behavior) in "out + out_start". + // In such a case we had no input and thus out_used == 0. + { + const size_t out_used = *out_pos - out_start; + if (out_used > 0) + coder->crc32 = lzma_crc32(out + out_start, + out_used, coder->crc32); + } + + return ret; +} + + +static void +index_encoder_end(void *coder, const lzma_allocator *allocator) +{ + lzma_free(coder, allocator); + return; +} + + +static void +index_encoder_reset(lzma_index_coder *coder, const lzma_index *i) +{ + lzma_index_iter_init(&coder->iter, i); + + coder->sequence = SEQ_INDICATOR; + coder->index = i; + coder->pos = 0; + coder->crc32 = 0; + + return; +} + + +extern lzma_ret +lzma_index_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_index *i) +{ + lzma_next_coder_init(&lzma_index_encoder_init, next, allocator); + + if (i == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_index_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_encode; + next->end = &index_encoder_end; + } + + index_encoder_reset(next->coder, i); + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_encoder(lzma_stream *strm, const lzma_index *i) +{ + lzma_next_strm_init(lzma_index_encoder_init, strm, i); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_buffer_encode(const lzma_index *i, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Validate the arguments. + if (i == NULL || out == NULL || out_pos == NULL || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Don't try to encode if there's not enough output space. + if (out_size - *out_pos < lzma_index_size(i)) + return LZMA_BUF_ERROR; + + // The Index encoder needs just one small data structure so we can + // allocate it on stack. + lzma_index_coder coder; + index_encoder_reset(&coder, i); + + // Do the actual encoding. This should never fail, but store + // the original *out_pos just in case. + const size_t out_start = *out_pos; + lzma_ret ret = index_encode(&coder, NULL, NULL, NULL, 0, + out, out_pos, out_size, LZMA_RUN); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + // We should never get here, but just in case, restore the + // output position and set the error accordingly if something + // goes wrong and debugging isn't enabled. + assert(0); + *out_pos = out_start; + ret = LZMA_PROG_ERROR; + } + + return ret; +} diff --git a/src/native/external/xz/src/liblzma/common/index_encoder.h b/src/native/external/xz/src/liblzma/common/index_encoder.h new file mode 100644 index 00000000000000..29ba11066963a5 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/index_encoder.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_encoder.h +/// \brief Encodes the Index field +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INDEX_ENCODER_H +#define LZMA_INDEX_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_index_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, const lzma_index *i); + + +#endif diff --git a/src/native/external/xz/src/liblzma/common/index_hash.c b/src/native/external/xz/src/liblzma/common/index_hash.c new file mode 100644 index 00000000000000..b7f1b6b58d1ac3 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/index_hash.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_hash.c +/// \brief Validates Index by using a hash function +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" +#include "check.h" + + +typedef struct { + /// Sum of the Block sizes (including Block Padding) + lzma_vli blocks_size; + + /// Sum of the Uncompressed Size fields + lzma_vli uncompressed_size; + + /// Number of Records + lzma_vli count; + + /// Size of the List of Index Records as bytes + lzma_vli index_list_size; + + /// Check calculated from Unpadded Sizes and Uncompressed Sizes. + lzma_check_state check; + +} lzma_index_hash_info; + + +struct lzma_index_hash_s { + enum { + SEQ_BLOCK, + SEQ_COUNT, + SEQ_UNPADDED, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Information collected while decoding the actual Blocks. + lzma_index_hash_info blocks; + + /// Information collected from the Index field. + lzma_index_hash_info records; + + /// Number of Records not fully decoded + lzma_vli remaining; + + /// Unpadded Size currently being read from an Index Record. + lzma_vli unpadded_size; + + /// Uncompressed Size currently being read from an Index Record. + lzma_vli uncompressed_size; + + /// Position in variable-length integers when decoding them from + /// the List of Records. + size_t pos; + + /// CRC32 of the Index + uint32_t crc32; +}; + + +extern LZMA_API(lzma_index_hash *) +lzma_index_hash_init(lzma_index_hash *index_hash, + const lzma_allocator *allocator) +{ + if (index_hash == NULL) { + index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); + if (index_hash == NULL) + return NULL; + } + + index_hash->sequence = SEQ_BLOCK; + index_hash->blocks.blocks_size = 0; + index_hash->blocks.uncompressed_size = 0; + index_hash->blocks.count = 0; + index_hash->blocks.index_list_size = 0; + index_hash->records.blocks_size = 0; + index_hash->records.uncompressed_size = 0; + index_hash->records.count = 0; + index_hash->records.index_list_size = 0; + index_hash->unpadded_size = 0; + index_hash->uncompressed_size = 0; + index_hash->pos = 0; + index_hash->crc32 = 0; + + // These cannot fail because LZMA_CHECK_BEST is known to be supported. + (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); + (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); + + return index_hash; +} + + +extern LZMA_API(void) +lzma_index_hash_end(lzma_index_hash *index_hash, + const lzma_allocator *allocator) +{ + lzma_free(index_hash, allocator); + return; +} + + +extern LZMA_API(lzma_vli) +lzma_index_hash_size(const lzma_index_hash *index_hash) +{ + // Get the size of the Index from ->blocks instead of ->records for + // cases where application wants to know the Index Size before + // decoding the Index. + return index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size); +} + + +/// Updates the sizes and the hash without any validation. +static void +hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, + lzma_vli uncompressed_size) +{ + info->blocks_size += vli_ceil4(unpadded_size); + info->uncompressed_size += uncompressed_size; + info->index_list_size += lzma_vli_size(unpadded_size) + + lzma_vli_size(uncompressed_size); + ++info->count; + + const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; + lzma_check_update(&info->check, LZMA_CHECK_BEST, + (const uint8_t *)(sizes), sizeof(sizes)); + + return; +} + + +extern LZMA_API(lzma_ret) +lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, + lzma_vli uncompressed_size) +{ + // Validate the arguments. + if (index_hash == NULL || index_hash->sequence != SEQ_BLOCK + || unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX + || uncompressed_size > LZMA_VLI_MAX) + return LZMA_PROG_ERROR; + + // Update the hash. + hash_append(&index_hash->blocks, unpadded_size, uncompressed_size); + + // Validate the properties of *info are still in allowed limits. + if (index_hash->blocks.blocks_size > LZMA_VLI_MAX + || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX + || index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_BACKWARD_SIZE_MAX + || index_stream_size(index_hash->blocks.blocks_size, + index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, + size_t *in_pos, size_t in_size) +{ + // Catch zero input buffer here, because in contrast to Index encoder + // and decoder functions, applications call this function directly + // instead of via lzma_code(), which does the buffer checking. + if (*in_pos >= in_size) + return LZMA_BUF_ERROR; + + // NOTE: This function has many similarities to index_encode() and + // index_decode() functions found from index_encoder.c and + // index_decoder.c. See the comments especially in index_encoder.c. + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (index_hash->sequence) { + case SEQ_BLOCK: + // Check the Index Indicator is present. + if (in[(*in_pos)++] != INDEX_INDICATOR) + return LZMA_DATA_ERROR; + + index_hash->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + ret = lzma_vli_decode(&index_hash->remaining, + &index_hash->pos, in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + // The count must match the count of the Blocks decoded. + if (index_hash->remaining != index_hash->blocks.count) + return LZMA_DATA_ERROR; + + ret = LZMA_OK; + index_hash->pos = 0; + + // Handle the special case when there are no Blocks. + index_hash->sequence = index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_UNPADDED; + break; + } + + case SEQ_UNPADDED: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = index_hash->sequence == SEQ_UNPADDED + ? &index_hash->unpadded_size + : &index_hash->uncompressed_size; + + ret = lzma_vli_decode(size, &index_hash->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + index_hash->pos = 0; + + if (index_hash->sequence == SEQ_UNPADDED) { + if (index_hash->unpadded_size < UNPADDED_SIZE_MIN + || index_hash->unpadded_size + > UNPADDED_SIZE_MAX) + return LZMA_DATA_ERROR; + + index_hash->sequence = SEQ_UNCOMPRESSED; + } else { + // Update the hash. + hash_append(&index_hash->records, + index_hash->unpadded_size, + index_hash->uncompressed_size); + + // Verify that we don't go over the known sizes. Note + // that this validation is simpler than the one used + // in lzma_index_hash_append(), because here we know + // that values in index_hash->blocks are already + // validated and we are fine as long as we don't + // exceed them in index_hash->records. + if (index_hash->blocks.blocks_size + < index_hash->records.blocks_size + || index_hash->blocks.uncompressed_size + < index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + < index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Check if this was the last Record. + index_hash->sequence = --index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_UNPADDED; + } + + break; + } + + case SEQ_PADDING_INIT: + index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( + index_hash->records.count, + index_hash->records.index_list_size)) & 3; + + index_hash->sequence = SEQ_PADDING; + FALLTHROUGH; + + case SEQ_PADDING: + if (index_hash->pos > 0) { + --index_hash->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Compare the sizes. + if (index_hash->blocks.blocks_size + != index_hash->records.blocks_size + || index_hash->blocks.uncompressed_size + != index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + != index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Finish the hashes and compare them. + lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); + lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); + if (memcmp(index_hash->blocks.check.buffer.u8, + index_hash->records.check.buffer.u8, + lzma_check_size(LZMA_CHECK_BEST)) != 0) + return LZMA_DATA_ERROR; + + // Finish the CRC32 calculation. + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + index_hash->sequence = SEQ_CRC32; + FALLTHROUGH; + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((index_hash->crc32 >> (index_hash->pos * 8)) + & 0xFF) != in[(*in_pos)++]) { +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + return LZMA_DATA_ERROR; +#endif + } + + } while (++index_hash->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32. + // + // Avoid null pointer + 0 (undefined behavior) in "in + in_start". + // In such a case we had no input and thus in_used == 0. + { + const size_t in_used = *in_pos - in_start; + if (in_used > 0) + index_hash->crc32 = lzma_crc32(in + in_start, + in_used, index_hash->crc32); + } + + return ret; +} diff --git a/src/native/external/xz/src/liblzma/common/lzip_decoder.c b/src/native/external/xz/src/liblzma/common/lzip_decoder.c new file mode 100644 index 00000000000000..5630039f97b914 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/lzip_decoder.c @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzip_decoder.c +/// \brief Decodes .lz (lzip) files +// +// Author: Michał Górny +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzip_decoder.h" +#include "lzma_decoder.h" +#include "check.h" + + +// .lz format version 0 lacks the 64-bit Member size field in the footer. +#define LZIP_V0_FOOTER_SIZE 12 +#define LZIP_V1_FOOTER_SIZE 20 +#define LZIP_FOOTER_SIZE_MAX LZIP_V1_FOOTER_SIZE + +// lc/lp/pb are hardcoded in the .lz format. +#define LZIP_LC 3 +#define LZIP_LP 0 +#define LZIP_PB 2 + + +typedef struct { + enum { + SEQ_ID_STRING, + SEQ_VERSION, + SEQ_DICT_SIZE, + SEQ_CODER_INIT, + SEQ_LZMA_STREAM, + SEQ_MEMBER_FOOTER, + } sequence; + + /// .lz member format version + uint32_t version; + + /// CRC32 of the uncompressed data in the .lz member + uint32_t crc32; + + /// Uncompressed size of the .lz member + uint64_t uncompressed_size; + + /// Compressed size of the .lz member + uint64_t member_size; + + /// Memory usage limit + uint64_t memlimit; + + /// Amount of memory actually needed + uint64_t memusage; + + /// If true, LZMA_GET_CHECK is returned after decoding the header + /// fields. As all files use CRC32 this is redundant but it's + /// implemented anyway since the initialization functions supports + /// all other flags in addition to LZMA_TELL_ANY_CHECK. + bool tell_any_check; + + /// If true, we won't calculate or verify the CRC32 of + /// the uncompressed data. + bool ignore_check; + + /// If true, we will decode concatenated .lz members and stop if + /// non-.lz data is seen after at least one member has been + /// successfully decoded. + bool concatenated; + + /// When decoding concatenated .lz members, this is true as long as + /// we are decoding the first .lz member. This is needed to avoid + /// incorrect LZMA_FORMAT_ERROR in case there is non-.lz data at + /// the end of the file. + bool first_member; + + /// Reading position in the header and footer fields + size_t pos; + + /// Buffer to hold the .lz footer fields + uint8_t buffer[LZIP_FOOTER_SIZE_MAX]; + + /// Options decoded from the .lz header that needed to initialize + /// the LZMA1 decoder. + lzma_options_lzma options; + + /// LZMA1 decoder + lzma_next_coder lzma_decoder; + +} lzma_lzip_coder; + + +static lzma_ret +lzip_decode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_lzip_coder *coder = coder_ptr; + + while (true) + switch (coder->sequence) { + case SEQ_ID_STRING: { + // The "ID string" or magic bytes are "LZIP" in US-ASCII. + const uint8_t lzip_id_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; + + while (coder->pos < sizeof(lzip_id_string)) { + if (*in_pos >= in_size) { + // If we are on the 2nd+ concatenated member + // and the input ends before we can read + // the magic bytes, we discard the bytes that + // were already read (up to 3) and finish. + // See the reasoning below. + return !coder->first_member + && action == LZMA_FINISH + ? LZMA_STREAM_END : LZMA_OK; + } + + if (in[*in_pos] != lzip_id_string[coder->pos]) { + // The .lz format allows putting non-.lz data + // at the end of the file. If we have seen + // at least one valid .lz member already, + // then we won't consume the byte at *in_pos + // and will return LZMA_STREAM_END. This way + // apps can easily locate and read the non-.lz + // data after the .lz member(s). + // + // NOTE: If the first 1-3 bytes of the non-.lz + // data match the .lz ID string then the first + // 1-3 bytes of the junk will get ignored by + // us. If apps want to properly locate the + // trailing data they must ensure that the + // first byte of their custom data isn't the + // same as the first byte of .lz ID string. + // With the liblzma API we cannot rewind the + // input position across calls to lzma_code(). + return !coder->first_member + ? LZMA_STREAM_END : LZMA_FORMAT_ERROR; + } + + ++*in_pos; + ++coder->pos; + } + + coder->pos = 0; + + coder->crc32 = 0; + coder->uncompressed_size = 0; + coder->member_size = sizeof(lzip_id_string); + + coder->sequence = SEQ_VERSION; + FALLTHROUGH; + } + + case SEQ_VERSION: + if (*in_pos >= in_size) + return LZMA_OK; + + coder->version = in[(*in_pos)++]; + + // We support version 0 and unextended version 1. + if (coder->version > 1) + return LZMA_OPTIONS_ERROR; + + ++coder->member_size; + coder->sequence = SEQ_DICT_SIZE; + + // .lz versions 0 and 1 use CRC32 as the integrity check + // so if the application wanted to know that + // (LZMA_TELL_ANY_CHECK) we can tell it now. + if (coder->tell_any_check) + return LZMA_GET_CHECK; + + FALLTHROUGH; + + case SEQ_DICT_SIZE: { + if (*in_pos >= in_size) + return LZMA_OK; + + const uint32_t ds = in[(*in_pos)++]; + ++coder->member_size; + + // The five lowest bits are for the base-2 logarithm of + // the dictionary size and the highest three bits are + // the fractional part (0/16 to 7/16) that will be + // subtracted to get the final value. + // + // For example, with 0xB5: + // b2log = 21 + // fracnum = 5 + // dict_size = 2^21 - 2^21 * 5 / 16 = 1408 KiB + const uint32_t b2log = ds & 0x1F; + const uint32_t fracnum = ds >> 5; + + // The format versions 0 and 1 allow dictionary size in the + // range [4 KiB, 512 MiB]. + if (b2log < 12 || b2log > 29 || (b2log == 12 && fracnum > 0)) + return LZMA_DATA_ERROR; + + // 2^[b2log] - 2^[b2log] * [fracnum] / 16 + // = 2^[b2log] - [fracnum] * 2^([b2log] - 4) + coder->options.dict_size = (UINT32_C(1) << b2log) + - (fracnum << (b2log - 4)); + + assert(coder->options.dict_size >= 4096); + assert(coder->options.dict_size <= (UINT32_C(512) << 20)); + + coder->options.preset_dict = NULL; + coder->options.lc = LZIP_LC; + coder->options.lp = LZIP_LP; + coder->options.pb = LZIP_PB; + + // Calculate the memory usage. + coder->memusage + = lzma_lzma_decoder_memusage_nocheck(&coder->options) + + LZMA_MEMUSAGE_BASE; + + // Initialization is a separate step because if we return + // LZMA_MEMLIMIT_ERROR we need to be able to restart after + // the memlimit has been increased. + coder->sequence = SEQ_CODER_INIT; + FALLTHROUGH; + } + + case SEQ_CODER_INIT: { + if (coder->memusage > coder->memlimit) + return LZMA_MEMLIMIT_ERROR; + + const lzma_filter_info filters[2] = { + { + .id = LZMA_FILTER_LZMA1, + .init = &lzma_lzma_decoder_init, + .options = &coder->options, + }, { + .init = NULL, + } + }; + + return_if_error(lzma_next_filter_init(&coder->lzma_decoder, + allocator, filters)); + + coder->crc32 = 0; + coder->sequence = SEQ_LZMA_STREAM; + FALLTHROUGH; + } + + case SEQ_LZMA_STREAM: { + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + const lzma_ret ret = coder->lzma_decoder.code( + coder->lzma_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + const size_t out_used = *out_pos - out_start; + + coder->member_size += *in_pos - in_start; + coder->uncompressed_size += out_used; + + // Don't update the CRC32 if the integrity check will be + // ignored or if there was no new output. The latter is + // important in case out == NULL to avoid null pointer + 0 + // which is undefined behavior. + if (!coder->ignore_check && out_used > 0) + coder->crc32 = lzma_crc32(out + out_start, out_used, + coder->crc32); + + if (ret != LZMA_STREAM_END) + return ret; + + coder->sequence = SEQ_MEMBER_FOOTER; + FALLTHROUGH; + } + + case SEQ_MEMBER_FOOTER: { + // The footer of .lz version 0 lacks the Member size field. + // This is the only difference between version 0 and + // unextended version 1 formats. + const size_t footer_size = coder->version == 0 + ? LZIP_V0_FOOTER_SIZE + : LZIP_V1_FOOTER_SIZE; + + // Copy the CRC32, Data size, and Member size fields to + // the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + footer_size); + + // Return if we didn't get the whole footer yet. + if (coder->pos < footer_size) + return LZMA_OK; + + coder->pos = 0; + coder->member_size += footer_size; + + // Check that the footer fields match the observed data. + if (!coder->ignore_check + && coder->crc32 != read32le(&coder->buffer[0])) + return LZMA_DATA_ERROR; + + if (coder->uncompressed_size != read64le(&coder->buffer[4])) + return LZMA_DATA_ERROR; + + if (coder->version > 0) { + // .lz version 0 has no Member size field. + if (coder->member_size != read64le(&coder->buffer[12])) + return LZMA_DATA_ERROR; + } + + // Decoding is finished if we weren't requested to decode + // more than one .lz member. + if (!coder->concatenated) + return LZMA_STREAM_END; + + coder->first_member = false; + coder->sequence = SEQ_ID_STRING; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + // Never reached +} + + +static void +lzip_decoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_lzip_coder *coder = coder_ptr; + lzma_next_end(&coder->lzma_decoder, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_check +lzip_decoder_get_check(const void *coder_ptr lzma_attribute((__unused__))) +{ + return LZMA_CHECK_CRC32; +} + + +static lzma_ret +lzip_decoder_memconfig(void *coder_ptr, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + lzma_lzip_coder *coder = coder_ptr; + + *memusage = coder->memusage; + *old_memlimit = coder->memlimit; + + if (new_memlimit != 0) { + if (new_memlimit < coder->memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzip_decoder_init( + lzma_next_coder *next, const lzma_allocator *allocator, + uint64_t memlimit, uint32_t flags) +{ + lzma_next_coder_init(&lzma_lzip_decoder_init, next, allocator); + + if (flags & ~LZMA_SUPPORTED_FLAGS) + return LZMA_OPTIONS_ERROR; + + lzma_lzip_coder *coder = next->coder; + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_lzip_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &lzip_decode; + next->end = &lzip_decoder_end; + next->get_check = &lzip_decoder_get_check; + next->memconfig = &lzip_decoder_memconfig; + + coder->lzma_decoder = LZMA_NEXT_CODER_INIT; + } + + coder->sequence = SEQ_ID_STRING; + coder->memlimit = my_max(1, memlimit); + coder->memusage = LZMA_MEMUSAGE_BASE; + coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0; + coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0; + coder->concatenated = (flags & LZMA_CONCATENATED) != 0; + coder->first_member = true; + coder->pos = 0; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_lzip_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) +{ + lzma_next_strm_init(lzma_lzip_decoder_init, strm, memlimit, flags); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/lzip_decoder.h b/src/native/external/xz/src/liblzma/common/lzip_decoder.h new file mode 100644 index 00000000000000..0e1f7bebd45b46 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/lzip_decoder.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzip_decoder.h +/// \brief Decodes .lz (lzip) files +// +// Author: Michał Górny +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZIP_DECODER_H +#define LZMA_LZIP_DECODER_H + +#include "common.h" + +extern lzma_ret lzma_lzip_decoder_init( + lzma_next_coder *next, const lzma_allocator *allocator, + uint64_t memlimit, uint32_t flags); + +#endif diff --git a/src/native/external/xz/src/liblzma/common/memcmplen.h b/src/native/external/xz/src/liblzma/common/memcmplen.h new file mode 100644 index 00000000000000..82e9085422954b --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/memcmplen.h @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file memcmplen.h +/// \brief Optimized comparison of two buffers +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_MEMCMPLEN_H +#define LZMA_MEMCMPLEN_H + +#include "common.h" + +#ifdef HAVE_IMMINTRIN_H +# include +#endif + +// Only include if it is needed. The header is only needed +// on Windows when using an MSVC compatible compiler. The Intel compiler +// can use the intrinsics without the header file. +#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ + && defined(_MSC_VER) \ + && (defined(_M_X64) \ + || defined(_M_ARM64) || defined(_M_ARM64EC)) \ + && !defined(__INTEL_COMPILER) +# include +#endif + + +/// Find out how many equal bytes the two buffers have. +/// +/// \param buf1 First buffer +/// \param buf2 Second buffer +/// \param len How many bytes have already been compared and will +/// be assumed to match +/// \param limit How many bytes to compare at most, including the +/// already-compared bytes. This must be significantly +/// smaller than UINT32_MAX to avoid integer overflows. +/// Up to LZMA_MEMCMPLEN_EXTRA bytes may be read past +/// the specified limit from both buf1 and buf2. +/// +/// \return Number of equal bytes in the buffers is returned. +/// This is always at least len and at most limit. +/// +/// \note LZMA_MEMCMPLEN_EXTRA defines how many extra bytes may be read. +/// It's rounded up to 2^n. This extra amount needs to be +/// allocated in the buffers being used. It needs to be +/// initialized too to keep Valgrind quiet. +static lzma_always_inline uint32_t +lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2, + uint32_t len, uint32_t limit) +{ + assert(len <= limit); + assert(limit <= UINT32_MAX / 2); + +#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ + && (((TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) \ + && SIZE_MAX == UINT64_MAX) \ + || (defined(__INTEL_COMPILER) && defined(__x86_64__)) \ + || (defined(__INTEL_COMPILER) && defined(_M_X64)) \ + || (defined(_MSC_VER) && (defined(_M_X64) \ + || defined(_M_ARM64) || defined(_M_ARM64EC)))) + // This is only for x86-64 and ARM64 for now. This might be fine on + // other 64-bit processors too. + // + // Reasons to use subtraction instead of xor: + // + // - On some x86-64 processors (Intel Sandy Bridge to Tiger Lake), + // sub+jz and sub+jnz can be fused but xor+jz or xor+jnz cannot. + // Thus using subtraction has potential to be a tiny amount faster + // since the code checks if the quotient is non-zero. + // + // - Some processors (Intel Pentium 4) used to have more ALU + // resources for add/sub instructions than and/or/xor. + // + // The processor info is based on Agner Fog's microarchitecture.pdf + // version 2023-05-26. https://www.agner.org/optimize/ +#define LZMA_MEMCMPLEN_EXTRA 8 + while (len < limit) { +# ifdef WORDS_BIGENDIAN + const uint64_t x = read64ne(buf1 + len) ^ read64ne(buf2 + len); +# else + const uint64_t x = read64ne(buf1 + len) - read64ne(buf2 + len); +# endif + if (x != 0) { + // MSVC or Intel C compiler on Windows +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) + unsigned long tmp; + _BitScanForward64(&tmp, x); + len += (uint32_t)tmp >> 3; + // GCC, Clang, or Intel C compiler +# elif defined(WORDS_BIGENDIAN) + len += (uint32_t)__builtin_clzll(x) >> 3; +# else + len += (uint32_t)__builtin_ctzll(x) >> 3; +# endif + return my_min(len, limit); + } + + len += 8; + } + + return limit; + +#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ + && defined(HAVE__MM_MOVEMASK_EPI8) \ + && (defined(__SSE2__) \ + || (defined(_MSC_VER) && defined(_M_IX86_FP) \ + && _M_IX86_FP >= 2)) + // NOTE: This will use 128-bit unaligned access which + // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit, + // but it's convenient here since this is x86-only. + // + // SSE2 version for 32-bit and 64-bit x86. On x86-64 the above + // version is sometimes significantly faster and sometimes + // slightly slower than this SSE2 version, so this SSE2 + // version isn't used on x86-64. +# define LZMA_MEMCMPLEN_EXTRA 16 + while (len < limit) { + const uint32_t x = 0xFFFF ^ (uint32_t)_mm_movemask_epi8( + _mm_cmpeq_epi8( + _mm_loadu_si128((const __m128i *)(buf1 + len)), + _mm_loadu_si128((const __m128i *)(buf2 + len)))); + + if (x != 0) { + len += ctz32(x); + return my_min(len, limit); + } + + len += 16; + } + + return limit; + +#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && !defined(WORDS_BIGENDIAN) + // Generic 32-bit little endian method +# define LZMA_MEMCMPLEN_EXTRA 4 + while (len < limit) { + uint32_t x = read32ne(buf1 + len) - read32ne(buf2 + len); + if (x != 0) { + if ((x & 0xFFFF) == 0) { + len += 2; + x >>= 16; + } + + if ((x & 0xFF) == 0) + ++len; + + return my_min(len, limit); + } + + len += 4; + } + + return limit; + +#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && defined(WORDS_BIGENDIAN) + // Generic 32-bit big endian method +# define LZMA_MEMCMPLEN_EXTRA 4 + while (len < limit) { + uint32_t x = read32ne(buf1 + len) ^ read32ne(buf2 + len); + if (x != 0) { + if ((x & 0xFFFF0000) == 0) { + len += 2; + x <<= 16; + } + + if ((x & 0xFF000000) == 0) + ++len; + + return my_min(len, limit); + } + + len += 4; + } + + return limit; + +#else + // Simple portable version that doesn't use unaligned access. +# define LZMA_MEMCMPLEN_EXTRA 0 + while (len < limit && buf1[len] == buf2[len]) + ++len; + + return len; +#endif +} + +#endif diff --git a/src/native/external/xz/src/liblzma/common/microlzma_decoder.c b/src/native/external/xz/src/liblzma/common/microlzma_decoder.c new file mode 100644 index 00000000000000..882cb2c808d114 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/microlzma_decoder.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file microlzma_decoder.c +/// \brief Decode MicroLZMA format +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_decoder.h" +#include "lz_decoder.h" + + +typedef struct { + /// LZMA1 decoder + lzma_next_coder lzma; + + /// Compressed size of the stream as given by the application. + /// This must be exactly correct. + /// + /// This will be decremented when input is read. + uint64_t comp_size; + + /// Uncompressed size of the stream as given by the application. + /// This may be less than the actual uncompressed size if + /// uncomp_size_is_exact is false. + /// + /// This will be decremented when output is produced. + lzma_vli uncomp_size; + + /// LZMA dictionary size as given by the application + uint32_t dict_size; + + /// If true, the exact uncompressed size is known. If false, + /// uncomp_size may be smaller than the real uncompressed size; + /// uncomp_size may never be bigger than the real uncompressed size. + bool uncomp_size_is_exact; + + /// True once the first byte of the MicroLZMA stream + /// has been processed. + bool props_decoded; +} lzma_microlzma_coder; + + +static lzma_ret +microlzma_decode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_microlzma_coder *coder = coder_ptr; + + // Remember the in start position so that we can update comp_size. + const size_t in_start = *in_pos; + + // Remember the out start position so that we can update uncomp_size. + const size_t out_start = *out_pos; + + // Limit the amount of input so that the decoder won't read more than + // comp_size. This is required when uncomp_size isn't exact because + // in that case the LZMA decoder will try to decode more input even + // when it has no output space (it can be looking for EOPM). + if (in_size - *in_pos > coder->comp_size) + in_size = *in_pos + (size_t)(coder->comp_size); + + // When the exact uncompressed size isn't known, we must limit + // the available output space to prevent the LZMA decoder from + // trying to decode too much. + if (!coder->uncomp_size_is_exact + && out_size - *out_pos > coder->uncomp_size) + out_size = *out_pos + (size_t)(coder->uncomp_size); + + if (!coder->props_decoded) { + // There must be at least one byte of input to decode + // the properties byte. + if (*in_pos >= in_size) + return LZMA_OK; + + lzma_options_lzma options = { + .dict_size = coder->dict_size, + .preset_dict = NULL, + .preset_dict_size = 0, + .ext_flags = 0, // EOPM not allowed when size is known + .ext_size_low = UINT32_MAX, // Unknown size by default + .ext_size_high = UINT32_MAX, + }; + + if (coder->uncomp_size_is_exact) + lzma_set_ext_size(options, coder->uncomp_size); + + // The properties are stored as bitwise-negation + // of the typical encoding. + if (lzma_lzma_lclppb_decode(&options, ~in[*in_pos])) + return LZMA_OPTIONS_ERROR; + + ++*in_pos; + + // Initialize the decoder. + lzma_filter_info filters[2] = { + { + .id = LZMA_FILTER_LZMA1EXT, + .init = &lzma_lzma_decoder_init, + .options = &options, + }, { + .init = NULL, + } + }; + + return_if_error(lzma_next_filter_init(&coder->lzma, + allocator, filters)); + + // Pass one dummy 0x00 byte to the LZMA decoder since that + // is what it expects the first byte to be. + const uint8_t dummy_in = 0; + size_t dummy_in_pos = 0; + if (coder->lzma.code(coder->lzma.coder, allocator, + &dummy_in, &dummy_in_pos, 1, + out, out_pos, out_size, LZMA_RUN) != LZMA_OK) + return LZMA_PROG_ERROR; + + assert(dummy_in_pos == 1); + coder->props_decoded = true; + } + + // The rest is normal LZMA decoding. + lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + + // Update the remaining compressed size. + assert(coder->comp_size >= *in_pos - in_start); + coder->comp_size -= *in_pos - in_start; + + if (coder->uncomp_size_is_exact) { + // After successful decompression of the complete stream + // the compressed size must match. + if (ret == LZMA_STREAM_END && coder->comp_size != 0) + ret = LZMA_DATA_ERROR; + } else { + // Update the amount of output remaining. + assert(coder->uncomp_size >= *out_pos - out_start); + coder->uncomp_size -= *out_pos - out_start; + + // - We must not get LZMA_STREAM_END because the stream + // shouldn't have EOPM. + // - We must use uncomp_size to determine when to + // return LZMA_STREAM_END. + if (ret == LZMA_STREAM_END) + ret = LZMA_DATA_ERROR; + else if (coder->uncomp_size == 0) + ret = LZMA_STREAM_END; + } + + return ret; +} + + +static void +microlzma_decoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_microlzma_coder *coder = coder_ptr; + lzma_next_end(&coder->lzma, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +microlzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + uint64_t comp_size, + uint64_t uncomp_size, bool uncomp_size_is_exact, + uint32_t dict_size) +{ + lzma_next_coder_init(µlzma_decoder_init, next, allocator); + + lzma_microlzma_coder *coder = next->coder; + + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_microlzma_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = µlzma_decode; + next->end = µlzma_decoder_end; + + coder->lzma = LZMA_NEXT_CODER_INIT; + } + + // The public API is uint64_t but the internal LZ decoder API uses + // lzma_vli. + if (uncomp_size > LZMA_VLI_MAX) + return LZMA_OPTIONS_ERROR; + + coder->comp_size = comp_size; + coder->uncomp_size = uncomp_size; + coder->uncomp_size_is_exact = uncomp_size_is_exact; + coder->dict_size = dict_size; + + coder->props_decoded = false; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_microlzma_decoder(lzma_stream *strm, uint64_t comp_size, + uint64_t uncomp_size, lzma_bool uncomp_size_is_exact, + uint32_t dict_size) +{ + lzma_next_strm_init(microlzma_decoder_init, strm, comp_size, + uncomp_size, uncomp_size_is_exact, dict_size); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/microlzma_encoder.c b/src/native/external/xz/src/liblzma/common/microlzma_encoder.c new file mode 100644 index 00000000000000..45ec0b12f45dd0 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/microlzma_encoder.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file microlzma_encoder.c +/// \brief Encode into MicroLZMA format +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_encoder.h" + + +typedef struct { + /// LZMA1 encoder + lzma_next_coder lzma; + + /// LZMA properties byte (lc/lp/pb) + uint8_t props; +} lzma_microlzma_coder; + + +static lzma_ret +microlzma_encode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_microlzma_coder *coder = coder_ptr; + + // Remember *out_pos so that we can overwrite the first byte with + // the LZMA properties byte. + const size_t out_start = *out_pos; + + // Remember *in_pos so that we can set it based on how many + // uncompressed bytes were actually encoded. + const size_t in_start = *in_pos; + + // Set the output size limit based on the available output space. + // We know that the encoder supports set_out_limit() so + // LZMA_OPTIONS_ERROR isn't possible. LZMA_BUF_ERROR is possible + // but lzma_code() has an assertion to not allow it to be returned + // from here and I don't want to change that for now, so + // LZMA_BUF_ERROR becomes LZMA_PROG_ERROR. + uint64_t uncomp_size; + if (coder->lzma.set_out_limit(coder->lzma.coder, + &uncomp_size, out_size - *out_pos) != LZMA_OK) + return LZMA_PROG_ERROR; + + // set_out_limit fails if this isn't true. + assert(out_size - *out_pos >= 6); + + // Encode as much as possible. + const lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, action); + + if (ret != LZMA_STREAM_END) { + if (ret == LZMA_OK) { + assert(0); + return LZMA_PROG_ERROR; + } + + return ret; + } + + // The first output byte is bitwise-negation of the properties byte. + // We know that there is space for this byte because set_out_limit + // and the actual encoding succeeded. + out[out_start] = (uint8_t)(~coder->props); + + // The LZMA encoder likely read more input than it was able to encode. + // Set *in_pos based on uncomp_size. + assert(uncomp_size <= in_size - in_start); + *in_pos = in_start + (size_t)(uncomp_size); + + return ret; +} + + +static void +microlzma_encoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_microlzma_coder *coder = coder_ptr; + lzma_next_end(&coder->lzma, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +microlzma_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_options_lzma *options) +{ + lzma_next_coder_init(µlzma_encoder_init, next, allocator); + + lzma_microlzma_coder *coder = next->coder; + + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_microlzma_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = µlzma_encode; + next->end = µlzma_encoder_end; + + coder->lzma = LZMA_NEXT_CODER_INIT; + } + + // Encode the properties byte. Bitwise-negation of it will be the + // first output byte. + if (lzma_lzma_lclppb_encode(options, &coder->props)) + return LZMA_OPTIONS_ERROR; + + // Initialize the LZMA encoder. + const lzma_filter_info filters[2] = { + { + .id = LZMA_FILTER_LZMA1, + .init = &lzma_lzma_encoder_init, + .options = (void *)(options), + }, { + .init = NULL, + } + }; + + return lzma_next_filter_init(&coder->lzma, allocator, filters); +} + + +extern LZMA_API(lzma_ret) +lzma_microlzma_encoder(lzma_stream *strm, const lzma_options_lzma *options) +{ + lzma_next_strm_init(microlzma_encoder_init, strm, options); + + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; + +} diff --git a/src/native/external/xz/src/liblzma/common/outqueue.c b/src/native/external/xz/src/liblzma/common/outqueue.c new file mode 100644 index 00000000000000..eb018eb42b2692 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/outqueue.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file outqueue.c +/// \brief Output queue handling in multithreaded coding +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "outqueue.h" + + +/// Get the maximum number of buffers that may be allocated based +/// on the number of threads. For now this is twice the number of threads. +/// It's a compromise between RAM usage and keeping the worker threads busy +/// when buffers finish out of order. +#define GET_BUFS_LIMIT(threads) (2 * (threads)) + + +extern uint64_t +lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads) +{ + // This is to ease integer overflow checking: We may allocate up to + // GET_BUFS_LIMIT(LZMA_THREADS_MAX) buffers and we need some extra + // memory for other data structures too (that's the /2). + // + // lzma_outq_prealloc_buf() will still accept bigger buffers than this. + const uint64_t limit + = UINT64_MAX / GET_BUFS_LIMIT(LZMA_THREADS_MAX) / 2; + + if (threads > LZMA_THREADS_MAX || buf_size_max > limit) + return UINT64_MAX; + + return GET_BUFS_LIMIT(threads) + * lzma_outq_outbuf_memusage(buf_size_max); +} + + +static void +move_head_to_cache(lzma_outq *outq, const lzma_allocator *allocator) +{ + assert(outq->head != NULL); + assert(outq->tail != NULL); + assert(outq->bufs_in_use > 0); + + lzma_outbuf *buf = outq->head; + outq->head = buf->next; + if (outq->head == NULL) + outq->tail = NULL; + + if (outq->cache != NULL && outq->cache->allocated != buf->allocated) + lzma_outq_clear_cache(outq, allocator); + + buf->next = outq->cache; + outq->cache = buf; + + --outq->bufs_in_use; + outq->mem_in_use -= lzma_outq_outbuf_memusage(buf->allocated); + + return; +} + + +static void +free_one_cached_buffer(lzma_outq *outq, const lzma_allocator *allocator) +{ + assert(outq->cache != NULL); + + lzma_outbuf *buf = outq->cache; + outq->cache = buf->next; + + --outq->bufs_allocated; + outq->mem_allocated -= lzma_outq_outbuf_memusage(buf->allocated); + + lzma_free(buf, allocator); + return; +} + + +extern void +lzma_outq_clear_cache(lzma_outq *outq, const lzma_allocator *allocator) +{ + while (outq->cache != NULL) + free_one_cached_buffer(outq, allocator); + + return; +} + + +extern void +lzma_outq_clear_cache2(lzma_outq *outq, const lzma_allocator *allocator, + size_t keep_size) +{ + if (outq->cache == NULL) + return; + + // Free all but one. + while (outq->cache->next != NULL) + free_one_cached_buffer(outq, allocator); + + // Free the last one only if its size doesn't equal to keep_size. + if (outq->cache->allocated != keep_size) + free_one_cached_buffer(outq, allocator); + + return; +} + + +extern lzma_ret +lzma_outq_init(lzma_outq *outq, const lzma_allocator *allocator, + uint32_t threads) +{ + if (threads > LZMA_THREADS_MAX) + return LZMA_OPTIONS_ERROR; + + const uint32_t bufs_limit = GET_BUFS_LIMIT(threads); + + // Clear head/tail. + while (outq->head != NULL) + move_head_to_cache(outq, allocator); + + // If new buf_limit is lower than the old one, we may need to free + // a few cached buffers. + while (bufs_limit < outq->bufs_allocated) + free_one_cached_buffer(outq, allocator); + + outq->bufs_limit = bufs_limit; + outq->read_pos = 0; + + return LZMA_OK; +} + + +extern void +lzma_outq_end(lzma_outq *outq, const lzma_allocator *allocator) +{ + while (outq->head != NULL) + move_head_to_cache(outq, allocator); + + lzma_outq_clear_cache(outq, allocator); + return; +} + + +extern lzma_ret +lzma_outq_prealloc_buf(lzma_outq *outq, const lzma_allocator *allocator, + size_t size) +{ + // Caller must have checked it with lzma_outq_has_buf(). + assert(outq->bufs_in_use < outq->bufs_limit); + + // If there already is appropriately-sized buffer in the cache, + // we need to do nothing. + if (outq->cache != NULL && outq->cache->allocated == size) + return LZMA_OK; + + if (size > SIZE_MAX - sizeof(lzma_outbuf)) + return LZMA_MEM_ERROR; + + const size_t alloc_size = lzma_outq_outbuf_memusage(size); + + // The cache may have buffers but their size is wrong. + lzma_outq_clear_cache(outq, allocator); + + outq->cache = lzma_alloc(alloc_size, allocator); + if (outq->cache == NULL) + return LZMA_MEM_ERROR; + + outq->cache->next = NULL; + outq->cache->allocated = size; + + ++outq->bufs_allocated; + outq->mem_allocated += alloc_size; + + return LZMA_OK; +} + + +extern lzma_outbuf * +lzma_outq_get_buf(lzma_outq *outq, void *worker) +{ + // Caller must have used lzma_outq_prealloc_buf() to ensure these. + assert(outq->bufs_in_use < outq->bufs_limit); + assert(outq->bufs_in_use < outq->bufs_allocated); + assert(outq->cache != NULL); + + lzma_outbuf *buf = outq->cache; + outq->cache = buf->next; + buf->next = NULL; + + if (outq->tail != NULL) { + assert(outq->head != NULL); + outq->tail->next = buf; + } else { + assert(outq->head == NULL); + outq->head = buf; + } + + outq->tail = buf; + + buf->worker = worker; + buf->finished = false; + buf->finish_ret = LZMA_STREAM_END; + buf->pos = 0; + buf->decoder_in_pos = 0; + + buf->unpadded_size = 0; + buf->uncompressed_size = 0; + + ++outq->bufs_in_use; + outq->mem_in_use += lzma_outq_outbuf_memusage(buf->allocated); + + return buf; +} + + +extern bool +lzma_outq_is_readable(const lzma_outq *outq) +{ + if (outq->head == NULL) + return false; + + return outq->read_pos < outq->head->pos || outq->head->finished; +} + + +extern lzma_ret +lzma_outq_read(lzma_outq *restrict outq, + const lzma_allocator *restrict allocator, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, + lzma_vli *restrict unpadded_size, + lzma_vli *restrict uncompressed_size) +{ + // There must be at least one buffer from which to read. + if (outq->bufs_in_use == 0) + return LZMA_OK; + + // Get the buffer. + lzma_outbuf *buf = outq->head; + + // Copy from the buffer to output. + // + // FIXME? In threaded decoder it may be bad to do this copy while + // the mutex is being held. + lzma_bufcpy(buf->buf, &outq->read_pos, buf->pos, + out, out_pos, out_size); + + // Return if we didn't get all the data from the buffer. + if (!buf->finished || outq->read_pos < buf->pos) + return LZMA_OK; + + // The buffer was finished. Tell the caller its size information. + if (unpadded_size != NULL) + *unpadded_size = buf->unpadded_size; + + if (uncompressed_size != NULL) + *uncompressed_size = buf->uncompressed_size; + + // Remember the return value. + const lzma_ret finish_ret = buf->finish_ret; + + // Free this buffer for further use. + move_head_to_cache(outq, allocator); + outq->read_pos = 0; + + return finish_ret; +} + + +extern void +lzma_outq_enable_partial_output(lzma_outq *outq, + void (*enable_partial_output)(void *worker)) +{ + if (outq->head != NULL && !outq->head->finished + && outq->head->worker != NULL) { + enable_partial_output(outq->head->worker); + + // Set it to NULL since calling it twice is pointless. + outq->head->worker = NULL; + } + + return; +} diff --git a/src/native/external/xz/src/liblzma/common/outqueue.h b/src/native/external/xz/src/liblzma/common/outqueue.h new file mode 100644 index 00000000000000..0e4e9141dab559 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/outqueue.h @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file outqueue.h +/// \brief Output queue handling in multithreaded coding +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_OUTQUEUE_H +#define LZMA_OUTQUEUE_H + +#include "common.h" + + +/// Output buffer for a single thread +typedef struct lzma_outbuf_s lzma_outbuf; +struct lzma_outbuf_s { + /// Pointer to the next buffer. This is used for the cached buffers. + /// The worker thread must not modify this. + lzma_outbuf *next; + + /// This initialized by lzma_outq_get_buf() and + /// is used by lzma_outq_enable_partial_output(). + /// The worker thread must not modify this. + void *worker; + + /// Amount of memory allocated for buf[]. + /// The worker thread must not modify this. + size_t allocated; + + /// Writing position in the worker thread or, in other words, the + /// amount of finished data written to buf[] which can be copied out + /// + /// \note This is read by another thread and thus access + /// to this variable needs a mutex. + size_t pos; + + /// Decompression: Position in the input buffer in the worker thread + /// that matches the output "pos" above. This is used to detect if + /// more output might be possible from the worker thread: if it has + /// consumed all its input, then more output isn't possible. + /// + /// \note This is read by another thread and thus access + /// to this variable needs a mutex. + size_t decoder_in_pos; + + /// True when no more data will be written into this buffer. + /// + /// \note This is read by another thread and thus access + /// to this variable needs a mutex. + bool finished; + + /// Return value for lzma_outq_read() when the last byte from + /// a finished buffer has been read. Defaults to LZMA_STREAM_END. + /// This must *not* be LZMA_OK. The idea is to allow a decoder to + /// pass an error code to the main thread, setting the code here + /// together with finished = true. + lzma_ret finish_ret; + + /// Additional size information. lzma_outq_read() may read these + /// when "finished" is true. + lzma_vli unpadded_size; + lzma_vli uncompressed_size; + + /// Buffer of "allocated" bytes + uint8_t buf[]; +}; + + +typedef struct { + /// Linked list of buffers in use. The next output byte will be + /// read from the head and buffers for the next thread will be + /// appended to the tail. tail->next is always NULL. + lzma_outbuf *head; + lzma_outbuf *tail; + + /// Number of bytes read from head->buf[] in lzma_outq_read() + size_t read_pos; + + /// Linked list of allocated buffers that aren't currently used. + /// This way buffers of similar size can be reused and don't + /// need to be reallocated every time. For simplicity, all + /// cached buffers in the list have the same allocated size. + lzma_outbuf *cache; + + /// Total amount of memory allocated for buffers + uint64_t mem_allocated; + + /// Amount of memory used by the buffers that are in use in + /// the head...tail linked list. + uint64_t mem_in_use; + + /// Number of buffers in use in the head...tail list. If and only if + /// this is zero, the pointers head and tail above are NULL. + uint32_t bufs_in_use; + + /// Number of buffers allocated (in use + cached) + uint32_t bufs_allocated; + + /// Maximum allowed number of allocated buffers + uint32_t bufs_limit; +} lzma_outq; + + +/** + * \brief Calculate the memory usage of an output queue + * + * \return Approximate memory usage in bytes or UINT64_MAX on error. + */ +extern uint64_t lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads); + + +/// \brief Initialize an output queue +/// +/// \param outq Pointer to an output queue. Before calling +/// this function the first time, *outq should +/// have been zeroed with memzero() so that this +/// function knows that there are no previous +/// allocations to free. +/// \param allocator Pointer to allocator or NULL +/// \param threads Number of buffers that may be in use +/// concurrently. Note that more than this number +/// of buffers may actually get allocated to +/// improve performance when buffers finish +/// out of order. The actual maximum number of +/// allocated buffers is derived from the number +/// of threads. +/// +/// \return - LZMA_OK +/// - LZMA_MEM_ERROR +/// +extern lzma_ret lzma_outq_init(lzma_outq *outq, + const lzma_allocator *allocator, uint32_t threads); + + +/// \brief Free the memory associated with the output queue +extern void lzma_outq_end(lzma_outq *outq, const lzma_allocator *allocator); + + +/// \brief Free all cached buffers that consume memory but aren't in use +extern void lzma_outq_clear_cache( + lzma_outq *outq, const lzma_allocator *allocator); + + +/// \brief Like lzma_outq_clear_cache() but might keep one buffer +/// +/// One buffer is not freed if its size is equal to keep_size. +/// This is useful if the caller knows that it will soon need a buffer of +/// keep_size bytes. This way it won't be freed and immediately reallocated. +extern void lzma_outq_clear_cache2( + lzma_outq *outq, const lzma_allocator *allocator, + size_t keep_size); + + +/// \brief Preallocate a new buffer into cache +/// +/// Splitting the buffer allocation into a separate function makes it +/// possible to ensure that way lzma_outq_get_buf() cannot fail. +/// If the preallocated buffer isn't actually used (for example, some +/// other error occurs), the caller has to do nothing as the buffer will +/// be used later or cleared from the cache when not needed. +/// +/// \return LZMA_OK on success, LZMA_MEM_ERROR if allocation fails +/// +extern lzma_ret lzma_outq_prealloc_buf( + lzma_outq *outq, const lzma_allocator *allocator, size_t size); + + +/// \brief Get a new buffer +/// +/// lzma_outq_prealloc_buf() must be used to ensure that there is a buffer +/// available before calling lzma_outq_get_buf(). +/// +extern lzma_outbuf *lzma_outq_get_buf(lzma_outq *outq, void *worker); + + +/// \brief Test if there is data ready to be read +/// +/// Call to this function must be protected with the same mutex that +/// is used to protect lzma_outbuf.finished. +/// +extern bool lzma_outq_is_readable(const lzma_outq *outq); + + +/// \brief Read finished data +/// +/// \param outq Pointer to an output queue +/// \param allocator lzma_allocator for custom allocator functions +/// \param out Beginning of the output buffer +/// \param out_pos The next byte will be written to +/// out[*out_pos]. +/// \param out_size Size of the out buffer; the first byte into +/// which no data is written to is out[out_size]. +/// \param unpadded_size Unpadded Size from the Block encoder +/// \param uncompressed_size Uncompressed Size from the Block encoder +/// +/// \return - LZMA: All OK. Either no data was available or the buffer +/// being read didn't become empty yet. +/// - LZMA_STREAM_END: The buffer being read was finished. +/// *unpadded_size and *uncompressed_size were set if they +/// were not NULL. +/// +/// \note This reads lzma_outbuf.finished and .pos variables and thus +/// calls to this function need to be protected with a mutex. +/// +extern lzma_ret lzma_outq_read(lzma_outq *restrict outq, + const lzma_allocator *restrict allocator, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_vli *restrict unpadded_size, + lzma_vli *restrict uncompressed_size); + + +/// \brief Enable partial output from a worker thread +/// +/// If the buffer at the head of the output queue isn't finished, +/// this will call enable_partial_output on the worker associated with +/// that output buffer. +/// +/// \note This reads a lzma_outbuf.finished variable and thus +/// calls to this function need to be protected with a mutex. +/// +extern void lzma_outq_enable_partial_output(lzma_outq *outq, + void (*enable_partial_output)(void *worker)); + + +/// \brief Test if there is at least one buffer free +/// +/// This must be used before getting a new buffer with lzma_outq_get_buf(). +/// +static inline bool +lzma_outq_has_buf(const lzma_outq *outq) +{ + return outq->bufs_in_use < outq->bufs_limit; +} + + +/// \brief Test if the queue is completely empty +static inline bool +lzma_outq_is_empty(const lzma_outq *outq) +{ + return outq->bufs_in_use == 0; +} + + +/// \brief Get the amount of memory needed for a single lzma_outbuf +/// +/// \note Caller must check that the argument is significantly less +/// than SIZE_MAX to avoid an integer overflow! +static inline uint64_t +lzma_outq_outbuf_memusage(size_t buf_size) +{ + assert(buf_size <= SIZE_MAX - sizeof(lzma_outbuf)); + return sizeof(lzma_outbuf) + buf_size; +} + +#endif diff --git a/src/native/external/xz/src/liblzma/common/stream_buffer_decoder.c b/src/native/external/xz/src/liblzma/common/stream_buffer_decoder.c new file mode 100644 index 00000000000000..c4f91fb498393f --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/stream_buffer_decoder.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_buffer_decoder.c +/// \brief Single-call .xz Stream decoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_stream_buffer_decode(uint64_t *memlimit, uint32_t flags, + const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Sanity checks + if (in_pos == NULL || (in == NULL && *in_pos != in_size) + || *in_pos > in_size || out_pos == NULL + || (out == NULL && *out_pos != out_size) + || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Catch flags that are not allowed in buffer-to-buffer decoding. + if (flags & LZMA_TELL_ANY_CHECK) + return LZMA_PROG_ERROR; + + // Initialize the Stream decoder. + // TODO: We need something to tell the decoder that it can use the + // output buffer as workspace, and thus save significant amount of RAM. + lzma_next_coder stream_decoder = LZMA_NEXT_CODER_INIT; + lzma_ret ret = lzma_stream_decoder_init( + &stream_decoder, allocator, *memlimit, flags); + + if (ret == LZMA_OK) { + // Save the positions so that we can restore them in case + // an error occurs. + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + // Do the actual decoding. + ret = stream_decoder.code(stream_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + LZMA_FINISH); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + // Something went wrong, restore the positions. + *in_pos = in_start; + *out_pos = out_start; + + if (ret == LZMA_OK) { + // Either the input was truncated or the + // output buffer was too small. + assert(*in_pos == in_size + || *out_pos == out_size); + + // If all the input was consumed, then the + // input is truncated, even if the output + // buffer is also full. This is because + // processing the last byte of the Stream + // never produces output. + if (*in_pos == in_size) + ret = LZMA_DATA_ERROR; + else + ret = LZMA_BUF_ERROR; + + } else if (ret == LZMA_MEMLIMIT_ERROR) { + // Let the caller know how much memory would + // have been needed. + uint64_t memusage; + (void)stream_decoder.memconfig( + stream_decoder.coder, + memlimit, &memusage, 0); + } + } + } + + // Free the decoder memory. This needs to be done even if + // initialization fails, because the internal API doesn't + // require the initialization function to free its memory on error. + lzma_next_end(&stream_decoder, allocator); + + return ret; +} diff --git a/src/native/external/xz/src/liblzma/common/stream_buffer_encoder.c b/src/native/external/xz/src/liblzma/common/stream_buffer_encoder.c new file mode 100644 index 00000000000000..04d586959469df --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/stream_buffer_encoder.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_buffer_encoder.c +/// \brief Single-call .xz Stream encoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" + + +/// Maximum size of Index that has exactly one Record. +/// Index Indicator + Number of Records + Record + CRC32 rounded up to +/// the next multiple of four. +#define INDEX_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 4 + 3) & ~3) + +/// Stream Header, Stream Footer, and Index +#define HEADERS_BOUND (2 * LZMA_STREAM_HEADER_SIZE + INDEX_BOUND) + + +extern LZMA_API(size_t) +lzma_stream_buffer_bound(size_t uncompressed_size) +{ + // Get the maximum possible size of a Block. + const size_t block_bound = lzma_block_buffer_bound(uncompressed_size); + if (block_bound == 0) + return 0; + + // Catch the possible integer overflow and also prevent the size of + // the Stream exceeding LZMA_VLI_MAX (theoretically possible on + // 64-bit systems). + if (my_min(SIZE_MAX, LZMA_VLI_MAX) - block_bound < HEADERS_BOUND) + return 0; + + return block_bound + HEADERS_BOUND; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_buffer_encode(lzma_filter *filters, lzma_check check, + const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos_ptr, size_t out_size) +{ + // Sanity checks + if (filters == NULL || (unsigned int)(check) > LZMA_CHECK_ID_MAX + || (in == NULL && in_size != 0) || out == NULL + || out_pos_ptr == NULL || *out_pos_ptr > out_size) + return LZMA_PROG_ERROR; + + if (!lzma_check_is_supported(check)) + return LZMA_UNSUPPORTED_CHECK; + + // Note for the paranoids: Index encoder prevents the Stream from + // getting too big and still being accepted with LZMA_OK, and Block + // encoder catches if the input is too big. So we don't need to + // separately check if the buffers are too big. + + // Use a local copy. We update *out_pos_ptr only if everything + // succeeds. + size_t out_pos = *out_pos_ptr; + + // Check that there's enough space for both Stream Header and + // Stream Footer. + if (out_size - out_pos <= 2 * LZMA_STREAM_HEADER_SIZE) + return LZMA_BUF_ERROR; + + // Reserve space for Stream Footer so we don't need to check for + // available space again before encoding Stream Footer. + out_size -= LZMA_STREAM_HEADER_SIZE; + + // Encode the Stream Header. + lzma_stream_flags stream_flags = { + .version = 0, + .check = check, + }; + + if (lzma_stream_header_encode(&stream_flags, out + out_pos) + != LZMA_OK) + return LZMA_PROG_ERROR; + + out_pos += LZMA_STREAM_HEADER_SIZE; + + // Encode a Block but only if there is at least one byte of input. + lzma_block block = { + .version = 0, + .check = check, + .filters = filters, + }; + + if (in_size > 0) + return_if_error(lzma_block_buffer_encode(&block, allocator, + in, in_size, out, &out_pos, out_size)); + + // Index + { + // Create an Index. It will have one Record if there was + // at least one byte of input to encode. Otherwise the + // Index will be empty. + lzma_index *i = lzma_index_init(allocator); + if (i == NULL) + return LZMA_MEM_ERROR; + + lzma_ret ret = LZMA_OK; + + if (in_size > 0) + ret = lzma_index_append(i, allocator, + lzma_block_unpadded_size(&block), + block.uncompressed_size); + + // If adding the Record was successful, encode the Index + // and get its size which will be stored into Stream Footer. + if (ret == LZMA_OK) { + ret = lzma_index_buffer_encode( + i, out, &out_pos, out_size); + + stream_flags.backward_size = lzma_index_size(i); + } + + lzma_index_end(i, allocator); + + if (ret != LZMA_OK) + return ret; + } + + // Stream Footer. We have already reserved space for this. + if (lzma_stream_footer_encode(&stream_flags, out + out_pos) + != LZMA_OK) + return LZMA_PROG_ERROR; + + out_pos += LZMA_STREAM_HEADER_SIZE; + + // Everything went fine, make the new output position available + // to the application. + *out_pos_ptr = out_pos; + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/stream_decoder.c b/src/native/external/xz/src/liblzma/common/stream_decoder.c new file mode 100644 index 00000000000000..94004b74a16591 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/stream_decoder.c @@ -0,0 +1,469 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_decoder.c +/// \brief Decodes .xz Streams +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_decoder.h" +#include "block_decoder.h" +#include "index.h" + + +typedef struct { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_INIT, + SEQ_BLOCK_RUN, + SEQ_INDEX, + SEQ_STREAM_FOOTER, + SEQ_STREAM_PADDING, + } sequence; + + /// Block decoder + lzma_next_coder block_decoder; + + /// Block options decoded by the Block Header decoder and used by + /// the Block decoder. + lzma_block block_options; + + /// Stream Flags from Stream Header + lzma_stream_flags stream_flags; + + /// Index is hashed so that it can be compared to the sizes of Blocks + /// with O(1) memory usage. + lzma_index_hash *index_hash; + + /// Memory usage limit + uint64_t memlimit; + + /// Amount of memory actually needed (only an estimate) + uint64_t memusage; + + /// If true, LZMA_NO_CHECK is returned if the Stream has + /// no integrity check. + bool tell_no_check; + + /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has + /// an integrity check that isn't supported by this liblzma build. + bool tell_unsupported_check; + + /// If true, LZMA_GET_CHECK is returned after decoding Stream Header. + bool tell_any_check; + + /// If true, we will tell the Block decoder to skip calculating + /// and verifying the integrity check. + bool ignore_check; + + /// If true, we will decode concatenated Streams that possibly have + /// Stream Padding between or after them. LZMA_STREAM_END is returned + /// once the application isn't giving us any new input (LZMA_FINISH), + /// and we aren't in the middle of a Stream, and possible + /// Stream Padding is a multiple of four bytes. + bool concatenated; + + /// When decoding concatenated Streams, this is true as long as we + /// are decoding the first Stream. This is needed to avoid misleading + /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic + /// bytes. + bool first_stream; + + /// Write position in buffer[] and position in Stream Padding + size_t pos; + + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; +} lzma_stream_coder; + + +static lzma_ret +stream_decoder_reset(lzma_stream_coder *coder, const lzma_allocator *allocator) +{ + // Initialize the Index hash used to verify the Index. + coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator); + if (coder->index_hash == NULL) + return LZMA_MEM_ERROR; + + // Reset the rest of the variables. + coder->sequence = SEQ_STREAM_HEADER; + coder->pos = 0; + + return LZMA_OK; +} + + +static lzma_ret +stream_decode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_stream_coder *coder = coder_ptr; + + // When decoding the actual Block, it may be able to produce more + // output even if we don't give it any new input. + while (true) + switch (coder->sequence) { + case SEQ_STREAM_HEADER: { + // Copy the Stream Header to the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + LZMA_STREAM_HEADER_SIZE); + + // Return if we didn't get the whole Stream Header yet. + if (coder->pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->pos = 0; + + // Decode the Stream Header. + const lzma_ret ret = lzma_stream_header_decode( + &coder->stream_flags, coder->buffer); + if (ret != LZMA_OK) + return ret == LZMA_FORMAT_ERROR && !coder->first_stream + ? LZMA_DATA_ERROR : ret; + + // If we are decoding concatenated Streams, and the later + // Streams have invalid Header Magic Bytes, we give + // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR. + coder->first_stream = false; + + // Copy the type of the Check so that Block Header and Block + // decoders see it. + coder->block_options.check = coder->stream_flags.check; + + // Even if we return LZMA_*_CHECK below, we want + // to continue from Block Header decoding. + coder->sequence = SEQ_BLOCK_HEADER; + + // Detect if there's no integrity check or if it is + // unsupported if those were requested by the application. + if (coder->tell_no_check && coder->stream_flags.check + == LZMA_CHECK_NONE) + return LZMA_NO_CHECK; + + if (coder->tell_unsupported_check + && !lzma_check_is_supported( + coder->stream_flags.check)) + return LZMA_UNSUPPORTED_CHECK; + + if (coder->tell_any_check) + return LZMA_GET_CHECK; + + FALLTHROUGH; + } + + case SEQ_BLOCK_HEADER: { + if (*in_pos >= in_size) + return LZMA_OK; + + if (coder->pos == 0) { + // Detect if it's Index. + if (in[*in_pos] == INDEX_INDICATOR) { + coder->sequence = SEQ_INDEX; + break; + } + + // Calculate the size of the Block Header. Note that + // Block Header decoder wants to see this byte too + // so don't advance *in_pos. + coder->block_options.header_size + = lzma_block_header_size_decode( + in[*in_pos]); + } + + // Copy the Block Header to the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + coder->block_options.header_size); + + // Return if we didn't get the whole Block Header yet. + if (coder->pos < coder->block_options.header_size) + return LZMA_OK; + + coder->pos = 0; + coder->sequence = SEQ_BLOCK_INIT; + FALLTHROUGH; + } + + case SEQ_BLOCK_INIT: { + // Checking memusage and doing the initialization needs + // its own sequence point because we need to be able to + // retry if we return LZMA_MEMLIMIT_ERROR. + + // Version 1 is needed to support the .ignore_check option. + coder->block_options.version = 1; + + // Set up a buffer to hold the filter chain. Block Header + // decoder will initialize all members of this array so + // we don't need to do it here. + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + coder->block_options.filters = filters; + + // Decode the Block Header. + return_if_error(lzma_block_header_decode(&coder->block_options, + allocator, coder->buffer)); + + // If LZMA_IGNORE_CHECK was used, this flag needs to be set. + // It has to be set after lzma_block_header_decode() because + // it always resets this to false. + coder->block_options.ignore_check = coder->ignore_check; + + // Check the memory usage limit. + const uint64_t memusage = lzma_raw_decoder_memusage(filters); + lzma_ret ret; + + if (memusage == UINT64_MAX) { + // One or more unknown Filter IDs. + ret = LZMA_OPTIONS_ERROR; + } else { + // Now we can set coder->memusage since we know that + // the filter chain is valid. We don't want + // lzma_memusage() to return UINT64_MAX in case of + // invalid filter chain. + coder->memusage = memusage; + + if (memusage > coder->memlimit) { + // The chain would need too much memory. + ret = LZMA_MEMLIMIT_ERROR; + } else { + // Memory usage is OK. + // Initialize the Block decoder. + ret = lzma_block_decoder_init( + &coder->block_decoder, + allocator, + &coder->block_options); + } + } + + // Free the allocated filter options since they are needed + // only to initialize the Block decoder. + lzma_filters_free(filters, allocator); + coder->block_options.filters = NULL; + + // Check if memory usage calculation and Block decoder + // initialization succeeded. + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_BLOCK_RUN; + FALLTHROUGH; + } + + case SEQ_BLOCK_RUN: { + const lzma_ret ret = coder->block_decoder.code( + coder->block_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + if (ret != LZMA_STREAM_END) + return ret; + + // Block decoded successfully. Add the new size pair to + // the Index hash. + return_if_error(lzma_index_hash_append(coder->index_hash, + lzma_block_unpadded_size( + &coder->block_options), + coder->block_options.uncompressed_size)); + + coder->sequence = SEQ_BLOCK_HEADER; + break; + } + + case SEQ_INDEX: { + // If we don't have any input, don't call + // lzma_index_hash_decode() since it would return + // LZMA_BUF_ERROR, which we must not do here. + if (*in_pos >= in_size) + return LZMA_OK; + + // Decode the Index and compare it to the hash calculated + // from the sizes of the Blocks (if any). + const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + return ret; + + coder->sequence = SEQ_STREAM_FOOTER; + FALLTHROUGH; + } + + case SEQ_STREAM_FOOTER: { + // Copy the Stream Footer to the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + LZMA_STREAM_HEADER_SIZE); + + // Return if we didn't get the whole Stream Footer yet. + if (coder->pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->pos = 0; + + // Decode the Stream Footer. The decoder gives + // LZMA_FORMAT_ERROR if the magic bytes don't match, + // so convert that return code to LZMA_DATA_ERROR. + lzma_stream_flags footer_flags; + const lzma_ret ret = lzma_stream_footer_decode( + &footer_flags, coder->buffer); + if (ret != LZMA_OK) + return ret == LZMA_FORMAT_ERROR + ? LZMA_DATA_ERROR : ret; + + // Check that Index Size stored in the Stream Footer matches + // the real size of the Index field. + if (lzma_index_hash_size(coder->index_hash) + != footer_flags.backward_size) + return LZMA_DATA_ERROR; + + // Compare that the Stream Flags fields are identical in + // both Stream Header and Stream Footer. + return_if_error(lzma_stream_flags_compare( + &coder->stream_flags, &footer_flags)); + + if (!coder->concatenated) + return LZMA_STREAM_END; + + coder->sequence = SEQ_STREAM_PADDING; + FALLTHROUGH; + } + + case SEQ_STREAM_PADDING: + assert(coder->concatenated); + + // Skip over possible Stream Padding. + while (true) { + if (*in_pos >= in_size) { + // Unless LZMA_FINISH was used, we cannot + // know if there's more input coming later. + if (action != LZMA_FINISH) + return LZMA_OK; + + // Stream Padding must be a multiple of + // four bytes. + return coder->pos == 0 + ? LZMA_STREAM_END + : LZMA_DATA_ERROR; + } + + // If the byte is not zero, it probably indicates + // beginning of a new Stream (or the file is corrupt). + if (in[*in_pos] != 0x00) + break; + + ++*in_pos; + coder->pos = (coder->pos + 1) & 3; + } + + // Stream Padding must be a multiple of four bytes (empty + // Stream Padding is OK). + if (coder->pos != 0) { + ++*in_pos; + return LZMA_DATA_ERROR; + } + + // Prepare to decode the next Stream. + return_if_error(stream_decoder_reset(coder, allocator)); + break; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + // Never reached +} + + +static void +stream_decoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_stream_coder *coder = coder_ptr; + lzma_next_end(&coder->block_decoder, allocator); + lzma_index_hash_end(coder->index_hash, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_check +stream_decoder_get_check(const void *coder_ptr) +{ + const lzma_stream_coder *coder = coder_ptr; + return coder->stream_flags.check; +} + + +static lzma_ret +stream_decoder_memconfig(void *coder_ptr, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + lzma_stream_coder *coder = coder_ptr; + + *memusage = coder->memusage; + *old_memlimit = coder->memlimit; + + if (new_memlimit != 0) { + if (new_memlimit < coder->memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +extern lzma_ret +lzma_stream_decoder_init( + lzma_next_coder *next, const lzma_allocator *allocator, + uint64_t memlimit, uint32_t flags) +{ + lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator); + + if (flags & ~LZMA_SUPPORTED_FLAGS) + return LZMA_OPTIONS_ERROR; + + lzma_stream_coder *coder = next->coder; + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_stream_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &stream_decode; + next->end = &stream_decoder_end; + next->get_check = &stream_decoder_get_check; + next->memconfig = &stream_decoder_memconfig; + + coder->block_decoder = LZMA_NEXT_CODER_INIT; + coder->index_hash = NULL; + } + + coder->memlimit = my_max(1, memlimit); + coder->memusage = LZMA_MEMUSAGE_BASE; + coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0; + coder->tell_unsupported_check + = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0; + coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0; + coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0; + coder->concatenated = (flags & LZMA_CONCATENATED) != 0; + coder->first_stream = true; + + return stream_decoder_reset(coder, allocator); +} + + +extern LZMA_API(lzma_ret) +lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) +{ + lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/stream_decoder.h b/src/native/external/xz/src/liblzma/common/stream_decoder.h new file mode 100644 index 00000000000000..5803715374d614 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/stream_decoder.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_decoder.h +/// \brief Decodes .xz Streams +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_DECODER_H +#define LZMA_STREAM_DECODER_H + +#include "common.h" + +extern lzma_ret lzma_stream_decoder_init( + lzma_next_coder *next, const lzma_allocator *allocator, + uint64_t memlimit, uint32_t flags); + +#endif diff --git a/src/native/external/xz/src/liblzma/common/stream_decoder_mt.c b/src/native/external/xz/src/liblzma/common/stream_decoder_mt.c new file mode 100644 index 00000000000000..d51366e1b02cd6 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/stream_decoder_mt.c @@ -0,0 +1,2004 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_decoder_mt.c +/// \brief Multithreaded .xz Stream decoder +// +// Authors: Sebastian Andrzej Siewior +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "block_decoder.h" +#include "stream_decoder.h" +#include "index.h" +#include "outqueue.h" + + +typedef enum { + /// Waiting for work. + /// Main thread may change this to THR_RUN or THR_EXIT. + THR_IDLE, + + /// Decoding is in progress. + /// Main thread may change this to THR_IDLE or THR_EXIT. + /// The worker thread may change this to THR_IDLE. + THR_RUN, + + /// The main thread wants the thread to exit. + THR_EXIT, + +} worker_state; + + +struct worker_thread { + /// Worker state is protected with our mutex. + worker_state state; + + /// Input buffer that will contain the whole Block except Block Header. + uint8_t *in; + + /// Amount of memory allocated for "in" + size_t in_size; + + /// Number of bytes written to "in" by the main thread + size_t in_filled; + + /// Number of bytes consumed from "in" by the worker thread. + size_t in_pos; + + /// Amount of uncompressed data that has been decoded. This local + /// copy is needed because updating outbuf->pos requires locking + /// the main mutex (coder->mutex). + size_t out_pos; + + /// Pointer to the main structure is needed to (1) lock the main + /// mutex (coder->mutex) when updating outbuf->pos and (2) when + /// putting this thread back to the stack of free threads. + struct lzma_stream_coder *coder; + + /// The allocator is set by the main thread. Since a copy of the + /// pointer is kept here, the application must not change the + /// allocator before calling lzma_end(). + const lzma_allocator *allocator; + + /// Output queue buffer to which the uncompressed data is written. + lzma_outbuf *outbuf; + + /// Amount of compressed data that has already been decompressed. + /// This is updated from in_pos when our mutex is locked. + /// This is size_t, not uint64_t, because per-thread progress + /// is limited to sizes of allocated buffers. + size_t progress_in; + + /// Like progress_in but for uncompressed data. + size_t progress_out; + + /// Updating outbuf->pos requires locking the main mutex + /// (coder->mutex). Since the main thread will only read output + /// from the oldest outbuf in the queue, only the worker thread + /// that is associated with the oldest outbuf needs to update its + /// outbuf->pos. This avoids useless mutex contention that would + /// happen if all worker threads were frequently locking the main + /// mutex to update their outbuf->pos. + /// + /// When partial_update_enabled is true, this worker thread will + /// update outbuf->pos and outbuf->decoder_in_pos after each call + /// to the Block decoder. This is initially false. Main thread may + /// set this to true. + bool partial_update_enabled; + + /// Once the main thread has set partial_updated_enabled = true, + /// we will do the first partial update as soon as we can and + /// set partial_update_started = true. After the first update, + /// we only update if we have made progress. This avoids useless + /// locking of thr->coder->mutex. + bool partial_update_started; + + /// Block decoder + lzma_next_coder block_decoder; + + /// Thread-specific Block options are needed because the Block + /// decoder modifies the struct given to it at initialization. + lzma_block block_options; + + /// Filter chain memory usage + uint64_t mem_filters; + + /// Next structure in the stack of free worker threads. + struct worker_thread *next; + + mythread_mutex mutex; + mythread_cond cond; + + /// The ID of this thread is used to join the thread + /// when it's not needed anymore. + mythread thread_id; +}; + + +struct lzma_stream_coder { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_INIT, + SEQ_BLOCK_THR_INIT, + SEQ_BLOCK_THR_RUN, + SEQ_BLOCK_DIRECT_INIT, + SEQ_BLOCK_DIRECT_RUN, + SEQ_INDEX_WAIT_OUTPUT, + SEQ_INDEX_DECODE, + SEQ_STREAM_FOOTER, + SEQ_STREAM_PADDING, + SEQ_ERROR, + } sequence; + + /// Block decoder + lzma_next_coder block_decoder; + + /// Every Block Header will be decoded into this structure. + /// This is also used to initialize a Block decoder when in + /// direct mode. In threaded mode, a thread-specific copy will + /// be made for decoder initialization because the Block decoder + /// will modify the structure given to it. + lzma_block block_options; + + /// Buffer to hold a filter chain for Block Header decoding and + /// initialization. These are freed after successful Block decoder + /// initialization or at stream_decoder_mt_end(). The thread-specific + /// copy of block_options won't hold a pointer to filters[] after + /// initialization. + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + /// Stream Flags from Stream Header + lzma_stream_flags stream_flags; + + /// Index is hashed so that it can be compared to the sizes of Blocks + /// with O(1) memory usage. + lzma_index_hash *index_hash; + + + /// Maximum wait time if cannot use all the input and cannot + /// fill the output buffer. This is in milliseconds. + uint32_t timeout; + + + /// Error code from a worker thread. + /// + /// \note Use mutex. + lzma_ret thread_error; + + /// Error code to return after pending output has been copied out. If + /// set in read_output_and_wait(), this is a mirror of thread_error. + /// If set in stream_decode_mt() then it's, for example, error that + /// occurred when decoding Block Header. + lzma_ret pending_error; + + /// Number of threads that will be created at maximum. + uint32_t threads_max; + + /// Number of thread structures that have been initialized from + /// "threads", and thus the number of worker threads actually + /// created so far. + uint32_t threads_initialized; + + /// Array of allocated thread-specific structures. When no threads + /// are in use (direct mode) this is NULL. In threaded mode this + /// points to an array of threads_max number of worker_thread structs. + struct worker_thread *threads; + + /// Stack of free threads. When a thread finishes, it puts itself + /// back into this stack. This starts as empty because threads + /// are created only when actually needed. + /// + /// \note Use mutex. + struct worker_thread *threads_free; + + /// The most recent worker thread to which the main thread writes + /// the new input from the application. + struct worker_thread *thr; + + /// Output buffer queue for decompressed data from the worker threads + /// + /// \note Use mutex with operations that need it. + lzma_outq outq; + + mythread_mutex mutex; + mythread_cond cond; + + + /// Memory usage that will not be exceeded in multi-threaded mode. + /// Single-threaded mode can exceed this even by a large amount. + uint64_t memlimit_threading; + + /// Memory usage limit that should never be exceeded. + /// LZMA_MEMLIMIT_ERROR will be returned if decoding isn't possible + /// even in single-threaded mode without exceeding this limit. + uint64_t memlimit_stop; + + /// Amount of memory in use by the direct mode decoder + /// (coder->block_decoder). In threaded mode this is 0. + uint64_t mem_direct_mode; + + /// Amount of memory needed by the running worker threads. + /// This doesn't include the memory needed by the output buffer. + /// + /// \note Use mutex. + uint64_t mem_in_use; + + /// Amount of memory used by the idle (cached) threads. + /// + /// \note Use mutex. + uint64_t mem_cached; + + + /// Amount of memory needed for the filter chain of the next Block. + uint64_t mem_next_filters; + + /// Amount of memory needed for the thread-specific input buffer + /// for the next Block. + uint64_t mem_next_in; + + /// Amount of memory actually needed to decode the next Block + /// in threaded mode. This is + /// mem_next_filters + mem_next_in + memory needed for lzma_outbuf. + uint64_t mem_next_block; + + + /// Amount of compressed data in Stream Header + Blocks that have + /// already been finished. + /// + /// \note Use mutex. + uint64_t progress_in; + + /// Amount of uncompressed data in Blocks that have already + /// been finished. + /// + /// \note Use mutex. + uint64_t progress_out; + + + /// If true, LZMA_NO_CHECK is returned if the Stream has + /// no integrity check. + bool tell_no_check; + + /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has + /// an integrity check that isn't supported by this liblzma build. + bool tell_unsupported_check; + + /// If true, LZMA_GET_CHECK is returned after decoding Stream Header. + bool tell_any_check; + + /// If true, we will tell the Block decoder to skip calculating + /// and verifying the integrity check. + bool ignore_check; + + /// If true, we will decode concatenated Streams that possibly have + /// Stream Padding between or after them. LZMA_STREAM_END is returned + /// once the application isn't giving us any new input (LZMA_FINISH), + /// and we aren't in the middle of a Stream, and possible + /// Stream Padding is a multiple of four bytes. + bool concatenated; + + /// If true, we will return any errors immediately instead of first + /// producing all output before the location of the error. + bool fail_fast; + + + /// When decoding concatenated Streams, this is true as long as we + /// are decoding the first Stream. This is needed to avoid misleading + /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic + /// bytes. + bool first_stream; + + /// This is used to track if the previous call to stream_decode_mt() + /// had output space (*out_pos < out_size) and managed to fill the + /// output buffer (*out_pos == out_size). This may be set to true + /// in read_output_and_wait(). This is read and then reset to false + /// at the beginning of stream_decode_mt(). + /// + /// This is needed to support applications that call lzma_code() in + /// such a way that more input is provided only when lzma_code() + /// didn't fill the output buffer completely. Basically, this makes + /// it easier to convert such applications from single-threaded + /// decoder to multi-threaded decoder. + bool out_was_filled; + + /// Write position in buffer[] and position in Stream Padding + size_t pos; + + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; +}; + + +/// Enables updating of outbuf->pos. This is a callback function that is +/// used with lzma_outq_enable_partial_output(). +static void +worker_enable_partial_update(void *thr_ptr) +{ + struct worker_thread *thr = thr_ptr; + + mythread_sync(thr->mutex) { + thr->partial_update_enabled = true; + mythread_cond_signal(&thr->cond); + } +} + + +static MYTHREAD_RET_TYPE +worker_decoder(void *thr_ptr) +{ + struct worker_thread *thr = thr_ptr; + size_t in_filled; + bool partial_update_enabled; + lzma_ret ret; + +next_loop_lock: + + mythread_mutex_lock(&thr->mutex); +next_loop_unlocked: + + if (thr->state == THR_IDLE) { + mythread_cond_wait(&thr->cond, &thr->mutex); + goto next_loop_unlocked; + } + + if (thr->state == THR_EXIT) { + mythread_mutex_unlock(&thr->mutex); + + lzma_free(thr->in, thr->allocator); + lzma_next_end(&thr->block_decoder, thr->allocator); + + mythread_mutex_destroy(&thr->mutex); + mythread_cond_destroy(&thr->cond); + + return MYTHREAD_RET_VALUE; + } + + assert(thr->state == THR_RUN); + + // Update progress info for get_progress(). + thr->progress_in = thr->in_pos; + thr->progress_out = thr->out_pos; + + // If we don't have any new input, wait for a signal from the main + // thread except if partial output has just been enabled + // (partial_update_enabled is true but _started is false). In that + // case we will do one normal run so that the partial output info + // gets passed to the main thread. The call to block_decoder.code() + // is useless but harmless as it can occur only once per Block. + in_filled = thr->in_filled; + partial_update_enabled = thr->partial_update_enabled; + + if (in_filled == thr->in_pos && !(partial_update_enabled + && !thr->partial_update_started)) { + mythread_cond_wait(&thr->cond, &thr->mutex); + goto next_loop_unlocked; + } + + mythread_mutex_unlock(&thr->mutex); + + // Pass the input in small chunks to the Block decoder. + // This way we react reasonably fast if we are told to stop/exit, + // and (when partial update is enabled) we tell about our progress + // to the main thread frequently enough. + const size_t chunk_size = 16384; + if ((in_filled - thr->in_pos) > chunk_size) + in_filled = thr->in_pos + chunk_size; + + ret = thr->block_decoder.code( + thr->block_decoder.coder, thr->allocator, + thr->in, &thr->in_pos, in_filled, + thr->outbuf->buf, &thr->out_pos, + thr->outbuf->allocated, LZMA_RUN); + + if (ret == LZMA_OK) { + if (partial_update_enabled) { + // Remember that we have done at least one partial + // update. After the first update we won't do updates + // unless we have made progress. + thr->partial_update_started = true; + + // The main thread is reading decompressed data + // from thr->outbuf. Tell the main thread about + // our progress. + // + // NOTE: It's possible that we consumed input without + // producing any new output so it's possible that only + // in_pos has changed. If thr->partial_update_started + // was false, it is possible that neither in_pos nor + // out_pos has changed. + mythread_sync(thr->coder->mutex) { + thr->outbuf->pos = thr->out_pos; + thr->outbuf->decoder_in_pos = thr->in_pos; + mythread_cond_signal(&thr->coder->cond); + } + } + + goto next_loop_lock; + } + + // Either we finished successfully (LZMA_STREAM_END) or an error + // occurred. + // + // The sizes are in the Block Header and the Block decoder + // checks that they match, thus we know these: + assert(ret != LZMA_STREAM_END || thr->in_pos == thr->in_size); + assert(ret != LZMA_STREAM_END + || thr->out_pos == thr->block_options.uncompressed_size); + + mythread_sync(thr->mutex) { + // Block decoder ensures this, but do a sanity check anyway + // because thr->in_filled < thr->in_size means that the main + // thread is still writing to thr->in. + if (ret == LZMA_STREAM_END && thr->in_filled != thr->in_size) { + assert(0); + ret = LZMA_PROG_ERROR; + } + + if (thr->state != THR_EXIT) + thr->state = THR_IDLE; + } + + // Free the input buffer. Don't update in_size as we need + // it later to update thr->coder->mem_in_use. + // + // This step is skipped if an error occurred because the main thread + // might still be writing to thr->in. The memory will be freed after + // threads_end() sets thr->state = THR_EXIT. + if (ret == LZMA_STREAM_END) { + lzma_free(thr->in, thr->allocator); + thr->in = NULL; + } + + mythread_sync(thr->coder->mutex) { + // Move our progress info to the main thread. + thr->coder->progress_in += thr->in_pos; + thr->coder->progress_out += thr->out_pos; + thr->progress_in = 0; + thr->progress_out = 0; + + // Mark the outbuf as finished. + thr->outbuf->pos = thr->out_pos; + thr->outbuf->decoder_in_pos = thr->in_pos; + thr->outbuf->finished = true; + thr->outbuf->finish_ret = ret; + thr->outbuf = NULL; + + // If an error occurred, tell it to the main thread. + if (ret != LZMA_STREAM_END + && thr->coder->thread_error == LZMA_OK) + thr->coder->thread_error = ret; + + // Return the worker thread to the stack of available + // threads only if no errors occurred. + if (ret == LZMA_STREAM_END) { + // Update memory usage counters. + thr->coder->mem_in_use -= thr->in_size; + thr->coder->mem_in_use -= thr->mem_filters; + thr->coder->mem_cached += thr->mem_filters; + + // Put this thread to the stack of free threads. + thr->next = thr->coder->threads_free; + thr->coder->threads_free = thr; + } + + mythread_cond_signal(&thr->coder->cond); + } + + goto next_loop_lock; +} + + +/// Tells the worker threads to exit and waits for them to terminate. +static void +threads_end(struct lzma_stream_coder *coder, const lzma_allocator *allocator) +{ + for (uint32_t i = 0; i < coder->threads_initialized; ++i) { + mythread_sync(coder->threads[i].mutex) { + coder->threads[i].state = THR_EXIT; + mythread_cond_signal(&coder->threads[i].cond); + } + } + + for (uint32_t i = 0; i < coder->threads_initialized; ++i) + mythread_join(coder->threads[i].thread_id); + + lzma_free(coder->threads, allocator); + coder->threads_initialized = 0; + coder->threads = NULL; + coder->threads_free = NULL; + + // The threads don't update these when they exit. Do it here. + coder->mem_in_use = 0; + coder->mem_cached = 0; + + return; +} + + +/// Tell worker threads to stop without doing any cleaning up. +/// The clean up will be done when threads_exit() is called; +/// it's not possible to reuse the threads after threads_stop(). +/// +/// This is called before returning an unrecoverable error code +/// to the application. It would be waste of processor time +/// to keep the threads running in such a situation. +static void +threads_stop(struct lzma_stream_coder *coder) +{ + for (uint32_t i = 0; i < coder->threads_initialized; ++i) { + // The threads that are in the THR_RUN state will stop + // when they check the state the next time. There's no + // need to signal coder->threads[i].cond. + mythread_sync(coder->threads[i].mutex) { + coder->threads[i].state = THR_IDLE; + } + } + + return; +} + + +/// Initialize a new worker_thread structure and create a new thread. +static lzma_ret +initialize_new_thread(struct lzma_stream_coder *coder, + const lzma_allocator *allocator) +{ + // Allocate the coder->threads array if needed. It's done here instead + // of when initializing the decoder because we don't need this if we + // use the direct mode (we may even free coder->threads in the middle + // of the file if we switch from threaded to direct mode). + if (coder->threads == NULL) { + coder->threads = lzma_alloc( + coder->threads_max * sizeof(struct worker_thread), + allocator); + + if (coder->threads == NULL) + return LZMA_MEM_ERROR; + } + + // Pick a free structure. + assert(coder->threads_initialized < coder->threads_max); + struct worker_thread *thr + = &coder->threads[coder->threads_initialized]; + + if (mythread_mutex_init(&thr->mutex)) + goto error_mutex; + + if (mythread_cond_init(&thr->cond)) + goto error_cond; + + thr->state = THR_IDLE; + thr->in = NULL; + thr->in_size = 0; + thr->allocator = allocator; + thr->coder = coder; + thr->outbuf = NULL; + thr->block_decoder = LZMA_NEXT_CODER_INIT; + thr->mem_filters = 0; + + if (mythread_create(&thr->thread_id, worker_decoder, thr)) + goto error_thread; + + ++coder->threads_initialized; + coder->thr = thr; + + return LZMA_OK; + +error_thread: + mythread_cond_destroy(&thr->cond); + +error_cond: + mythread_mutex_destroy(&thr->mutex); + +error_mutex: + return LZMA_MEM_ERROR; +} + + +static lzma_ret +get_thread(struct lzma_stream_coder *coder, const lzma_allocator *allocator) +{ + // If there is a free structure on the stack, use it. + mythread_sync(coder->mutex) { + if (coder->threads_free != NULL) { + coder->thr = coder->threads_free; + coder->threads_free = coder->threads_free->next; + + // The thread is no longer in the cache so subtract + // it from the cached memory usage. Don't add it + // to mem_in_use though; the caller will handle it + // since it knows how much memory it will actually + // use (the filter chain might change). + coder->mem_cached -= coder->thr->mem_filters; + } + } + + if (coder->thr == NULL) { + assert(coder->threads_initialized < coder->threads_max); + + // Initialize a new thread. + return_if_error(initialize_new_thread(coder, allocator)); + } + + coder->thr->in_filled = 0; + coder->thr->in_pos = 0; + coder->thr->out_pos = 0; + + coder->thr->progress_in = 0; + coder->thr->progress_out = 0; + + coder->thr->partial_update_enabled = false; + coder->thr->partial_update_started = false; + + return LZMA_OK; +} + + +static lzma_ret +read_output_and_wait(struct lzma_stream_coder *coder, + const lzma_allocator *allocator, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, + bool *input_is_possible, + bool waiting_allowed, + mythread_condtime *wait_abs, bool *has_blocked) +{ + lzma_ret ret = LZMA_OK; + + mythread_sync(coder->mutex) { + do { + // Get as much output from the queue as is possible + // without blocking. + const size_t out_start = *out_pos; + do { + ret = lzma_outq_read(&coder->outq, allocator, + out, out_pos, out_size, + NULL, NULL); + + // If a Block was finished, tell the worker + // thread of the next Block (if it is still + // running) to start telling the main thread + // when new output is available. + if (ret == LZMA_STREAM_END) + lzma_outq_enable_partial_output( + &coder->outq, + &worker_enable_partial_update); + + // Loop until a Block wasn't finished. + // It's important to loop around even if + // *out_pos == out_size because there could + // be an empty Block that will return + // LZMA_STREAM_END without needing any + // output space. + } while (ret == LZMA_STREAM_END); + + // Check if lzma_outq_read reported an error from + // the Block decoder. + if (ret != LZMA_OK) + break; + + // If the output buffer is now full but it wasn't full + // when this function was called, set out_was_filled. + // This way the next call to stream_decode_mt() knows + // that some output was produced and no output space + // remained in the previous call to stream_decode_mt(). + if (*out_pos == out_size && *out_pos != out_start) + coder->out_was_filled = true; + + // Check if any thread has indicated an error. + if (coder->thread_error != LZMA_OK) { + // If LZMA_FAIL_FAST was used, report errors + // from worker threads immediately. + if (coder->fail_fast) { + ret = coder->thread_error; + break; + } + + // Otherwise set pending_error. The value we + // set here will not actually get used other + // than working as a flag that an error has + // occurred. This is because in SEQ_ERROR + // all output before the error will be read + // first by calling this function, and once we + // reach the location of the (first) error the + // error code from the above lzma_outq_read() + // will be returned to the application. + // + // Use LZMA_PROG_ERROR since the value should + // never leak to the application. It's + // possible that pending_error has already + // been set but that doesn't matter: if we get + // here, pending_error only works as a flag. + coder->pending_error = LZMA_PROG_ERROR; + } + + // Check if decoding of the next Block can be started. + // The memusage of the active threads must be low + // enough, there must be a free buffer slot in the + // output queue, and there must be a free thread + // (that can be either created or an existing one + // reused). + // + // NOTE: This is checked after reading the output + // above because reading the output can free a slot in + // the output queue and also reduce active memusage. + // + // NOTE: If output queue is empty, then input will + // always be possible. + if (input_is_possible != NULL + && coder->memlimit_threading + - coder->mem_in_use + - coder->outq.mem_in_use + >= coder->mem_next_block + && lzma_outq_has_buf(&coder->outq) + && (coder->threads_initialized + < coder->threads_max + || coder->threads_free + != NULL)) { + *input_is_possible = true; + break; + } + + // If the caller doesn't want us to block, return now. + if (!waiting_allowed) + break; + + // This check is needed only when input_is_possible + // is NULL. We must return if we aren't waiting for + // input to become possible and there is no more + // output coming from the queue. + if (lzma_outq_is_empty(&coder->outq)) { + assert(input_is_possible == NULL); + break; + } + + // If there is more data available from the queue, + // our out buffer must be full and we need to return + // so that the application can provide more output + // space. + // + // NOTE: In general lzma_outq_is_readable() can return + // true also when there are no more bytes available. + // This can happen when a Block has finished without + // providing any new output. We know that this is not + // the case because in the beginning of this loop we + // tried to read as much as possible even when we had + // no output space left and the mutex has been locked + // all the time (so worker threads cannot have changed + // anything). Thus there must be actual pending output + // in the queue. + if (lzma_outq_is_readable(&coder->outq)) { + assert(*out_pos == out_size); + break; + } + + // If the application stops providing more input + // in the middle of a Block, there will eventually + // be one worker thread left that is stuck waiting for + // more input (that might never arrive) and a matching + // outbuf which the worker thread cannot finish due + // to lack of input. We must detect this situation, + // otherwise we would end up waiting indefinitely + // (if no timeout is in use) or keep returning + // LZMA_TIMED_OUT while making no progress. Thus, the + // application would never get LZMA_BUF_ERROR from + // lzma_code() which would tell the application that + // no more progress is possible. No LZMA_BUF_ERROR + // means that, for example, truncated .xz files could + // cause an infinite loop. + // + // A worker thread doing partial updates will + // store not only the output position in outbuf->pos + // but also the matching input position in + // outbuf->decoder_in_pos. Here we check if that + // input position matches the amount of input that + // the worker thread has been given (in_filled). + // If so, we must return and not wait as no more + // output will be coming without first getting more + // input to the worker thread. If the application + // keeps calling lzma_code() without providing more + // input, it will eventually get LZMA_BUF_ERROR. + // + // NOTE: We can read partial_update_enabled and + // in_filled without thr->mutex as only the main thread + // modifies these variables. decoder_in_pos requires + // coder->mutex which we are already holding. + if (coder->thr != NULL && + coder->thr->partial_update_enabled) { + // There is exactly one outbuf in the queue. + assert(coder->thr->outbuf == coder->outq.head); + assert(coder->thr->outbuf == coder->outq.tail); + + if (coder->thr->outbuf->decoder_in_pos + == coder->thr->in_filled) + break; + } + + // Wait for input or output to become possible. + if (coder->timeout != 0) { + // See the comment in stream_encoder_mt.c + // about why mythread_condtime_set() is used + // like this. + // + // FIXME? + // In contrast to the encoder, this calls + // _condtime_set while the mutex is locked. + if (!*has_blocked) { + *has_blocked = true; + mythread_condtime_set(wait_abs, + &coder->cond, + coder->timeout); + } + + if (mythread_cond_timedwait(&coder->cond, + &coder->mutex, + wait_abs) != 0) { + ret = LZMA_TIMED_OUT; + break; + } + } else { + mythread_cond_wait(&coder->cond, + &coder->mutex); + } + } while (ret == LZMA_OK); + } + + // If we are returning an error, then the application cannot get + // more output from us and thus keeping the threads running is + // useless and waste of CPU time. + if (ret != LZMA_OK && ret != LZMA_TIMED_OUT) + threads_stop(coder); + + return ret; +} + + +static lzma_ret +decode_block_header(struct lzma_stream_coder *coder, + const lzma_allocator *allocator, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) +{ + if (*in_pos >= in_size) + return LZMA_OK; + + if (coder->pos == 0) { + // Detect if it's Index. + if (in[*in_pos] == INDEX_INDICATOR) + return LZMA_INDEX_DETECTED; + + // Calculate the size of the Block Header. Note that + // Block Header decoder wants to see this byte too + // so don't advance *in_pos. + coder->block_options.header_size + = lzma_block_header_size_decode( + in[*in_pos]); + } + + // Copy the Block Header to the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + coder->block_options.header_size); + + // Return if we didn't get the whole Block Header yet. + if (coder->pos < coder->block_options.header_size) + return LZMA_OK; + + coder->pos = 0; + + // Version 1 is needed to support the .ignore_check option. + coder->block_options.version = 1; + + // Block Header decoder will initialize all members of this array + // so we don't need to do it here. + coder->block_options.filters = coder->filters; + + // Decode the Block Header. + return_if_error(lzma_block_header_decode(&coder->block_options, + allocator, coder->buffer)); + + // If LZMA_IGNORE_CHECK was used, this flag needs to be set. + // It has to be set after lzma_block_header_decode() because + // it always resets this to false. + coder->block_options.ignore_check = coder->ignore_check; + + // coder->block_options is ready now. + return LZMA_STREAM_END; +} + + +/// Get the size of the Compressed Data + Block Padding + Check. +static size_t +comp_blk_size(const struct lzma_stream_coder *coder) +{ + return vli_ceil4(coder->block_options.compressed_size) + + lzma_check_size(coder->stream_flags.check); +} + + +/// Returns true if the size (compressed or uncompressed) is such that +/// threaded decompression cannot be used. Sizes that are too big compared +/// to SIZE_MAX must be rejected to avoid integer overflows and truncations +/// when lzma_vli is assigned to a size_t. +static bool +is_direct_mode_needed(lzma_vli size) +{ + return size == LZMA_VLI_UNKNOWN || size > SIZE_MAX / 3; +} + + +static lzma_ret +stream_decoder_reset(struct lzma_stream_coder *coder, + const lzma_allocator *allocator) +{ + // Initialize the Index hash used to verify the Index. + coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator); + if (coder->index_hash == NULL) + return LZMA_MEM_ERROR; + + // Reset the rest of the variables. + coder->sequence = SEQ_STREAM_HEADER; + coder->pos = 0; + + return LZMA_OK; +} + + +static lzma_ret +stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action) +{ + struct lzma_stream_coder *coder = coder_ptr; + + mythread_condtime wait_abs; + bool has_blocked = false; + + // Determine if in SEQ_BLOCK_HEADER and SEQ_BLOCK_THR_RUN we should + // tell read_output_and_wait() to wait until it can fill the output + // buffer (or a timeout occurs). Two conditions must be met: + // + // (1) If the caller provided no new input. The reason for this + // can be, for example, the end of the file or that there is + // a pause in the input stream and more input is available + // a little later. In this situation we should wait for output + // because otherwise we would end up in a busy-waiting loop where + // we make no progress and the application just calls us again + // without providing any new input. This would then result in + // LZMA_BUF_ERROR even though more output would be available + // once the worker threads decode more data. + // + // (2) Even if (1) is true, we will not wait if the previous call to + // this function managed to produce some output and the output + // buffer became full. This is for compatibility with applications + // that call lzma_code() in such a way that new input is provided + // only when the output buffer didn't become full. Without this + // trick such applications would have bad performance (bad + // parallelization due to decoder not getting input fast enough). + // + // NOTE: Such loops might require that timeout is disabled (0) + // if they assume that output-not-full implies that all input has + // been consumed. If and only if timeout is enabled, we may return + // when output isn't full *and* not all input has been consumed. + // + // However, if LZMA_FINISH is used, the above is ignored and we always + // wait (timeout can still cause us to return) because we know that + // we won't get any more input. This matters if the input file is + // truncated and we are doing single-shot decoding, that is, + // timeout = 0 and LZMA_FINISH is used on the first call to + // lzma_code() and the output buffer is known to be big enough + // to hold all uncompressed data: + // + // - If LZMA_FINISH wasn't handled specially, we could return + // LZMA_OK before providing all output that is possible with the + // truncated input. The rest would be available if lzma_code() was + // called again but then it's not single-shot decoding anymore. + // + // - By handling LZMA_FINISH specially here, the first call will + // produce all the output, matching the behavior of the + // single-threaded decoder. + // + // So it's a very specific corner case but also easy to avoid. Note + // that this special handling of LZMA_FINISH has no effect for + // single-shot decoding when the input file is valid (not truncated); + // premature LZMA_OK wouldn't be possible as long as timeout = 0. + const bool waiting_allowed = action == LZMA_FINISH + || (*in_pos == in_size && !coder->out_was_filled); + coder->out_was_filled = false; + + while (true) + switch (coder->sequence) { + case SEQ_STREAM_HEADER: { + // Copy the Stream Header to the internal buffer. + const size_t in_old = *in_pos; + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + LZMA_STREAM_HEADER_SIZE); + coder->progress_in += *in_pos - in_old; + + // Return if we didn't get the whole Stream Header yet. + if (coder->pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->pos = 0; + + // Decode the Stream Header. + const lzma_ret ret = lzma_stream_header_decode( + &coder->stream_flags, coder->buffer); + if (ret != LZMA_OK) + return ret == LZMA_FORMAT_ERROR && !coder->first_stream + ? LZMA_DATA_ERROR : ret; + + // If we are decoding concatenated Streams, and the later + // Streams have invalid Header Magic Bytes, we give + // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR. + coder->first_stream = false; + + // Copy the type of the Check so that Block Header and Block + // decoders see it. + coder->block_options.check = coder->stream_flags.check; + + // Even if we return LZMA_*_CHECK below, we want + // to continue from Block Header decoding. + coder->sequence = SEQ_BLOCK_HEADER; + + // Detect if there's no integrity check or if it is + // unsupported if those were requested by the application. + if (coder->tell_no_check && coder->stream_flags.check + == LZMA_CHECK_NONE) + return LZMA_NO_CHECK; + + if (coder->tell_unsupported_check + && !lzma_check_is_supported( + coder->stream_flags.check)) + return LZMA_UNSUPPORTED_CHECK; + + if (coder->tell_any_check) + return LZMA_GET_CHECK; + + FALLTHROUGH; + } + + case SEQ_BLOCK_HEADER: { + const size_t in_old = *in_pos; + const lzma_ret ret = decode_block_header(coder, allocator, + in, in_pos, in_size); + coder->progress_in += *in_pos - in_old; + + if (ret == LZMA_OK) { + // We didn't decode the whole Block Header yet. + // + // Read output from the queue before returning. This + // is important because it is possible that the + // application doesn't have any new input available + // immediately. If we didn't try to copy output from + // the output queue here, lzma_code() could end up + // returning LZMA_BUF_ERROR even though queued output + // is available. + // + // If the lzma_code() call provided at least one input + // byte, only copy as much data from the output queue + // as is available immediately. This way the + // application will be able to provide more input + // without a delay. + // + // On the other hand, if lzma_code() was called with + // an empty input buffer(*), treat it specially: try + // to fill the output buffer even if it requires + // waiting for the worker threads to provide output + // (timeout, if specified, can still cause us to + // return). + // + // - This way the application will be able to get all + // data that can be decoded from the input provided + // so far. + // + // - We avoid both premature LZMA_BUF_ERROR and + // busy-waiting where the application repeatedly + // calls lzma_code() which immediately returns + // LZMA_OK without providing new data. + // + // - If the queue becomes empty, we won't wait + // anything and will return LZMA_OK immediately + // (coder->timeout is completely ignored). + // + // (*) See the comment at the beginning of this + // function how waiting_allowed is determined + // and why there is an exception to the rule + // of "called with an empty input buffer". + assert(*in_pos == in_size); + + // If LZMA_FINISH was used we know that we won't get + // more input, so the file must be truncated if we + // get here. If worker threads don't detect any + // errors, eventually there will be no more output + // while we keep returning LZMA_OK which gets + // converted to LZMA_BUF_ERROR in lzma_code(). + // + // If fail-fast is enabled then we will return + // immediately using LZMA_DATA_ERROR instead of + // LZMA_OK or LZMA_BUF_ERROR. Rationale for the + // error code: + // + // - Worker threads may have a large amount of + // not-yet-decoded input data and we don't + // know for sure if all data is valid. Bad + // data there would result in LZMA_DATA_ERROR + // when fail-fast isn't used. + // + // - Immediate LZMA_BUF_ERROR would be a bit weird + // considering the older liblzma code. lzma_code() + // even has an assertion to prevent coders from + // returning LZMA_BUF_ERROR directly. + // + // The downside of this is that with fail-fast apps + // cannot always distinguish between corrupt and + // truncated files. + if (action == LZMA_FINISH && coder->fail_fast) { + // We won't produce any more output. Stop + // the unfinished worker threads so they + // won't waste CPU time. + threads_stop(coder); + return LZMA_DATA_ERROR; + } + + // read_output_and_wait() will call threads_stop() + // if needed so with that we can use return_if_error. + return_if_error(read_output_and_wait(coder, allocator, + out, out_pos, out_size, + NULL, waiting_allowed, + &wait_abs, &has_blocked)); + + if (coder->pending_error != LZMA_OK) { + coder->sequence = SEQ_ERROR; + break; + } + + return LZMA_OK; + } + + if (ret == LZMA_INDEX_DETECTED) { + coder->sequence = SEQ_INDEX_WAIT_OUTPUT; + break; + } + + // See if an error occurred. + if (ret != LZMA_STREAM_END) { + // NOTE: Here and in all other places where + // pending_error is set, it may overwrite the value + // (LZMA_PROG_ERROR) set by read_output_and_wait(). + // That function might overwrite value set here too. + // These are fine because when read_output_and_wait() + // sets pending_error, it actually works as a flag + // variable only ("some error has occurred") and the + // actual value of pending_error is not used in + // SEQ_ERROR. In such cases SEQ_ERROR will eventually + // get the correct error code from the return value of + // a later read_output_and_wait() call. + coder->pending_error = ret; + coder->sequence = SEQ_ERROR; + break; + } + + // Calculate the memory usage of the filters / Block decoder. + coder->mem_next_filters = lzma_raw_decoder_memusage( + coder->filters); + + if (coder->mem_next_filters == UINT64_MAX) { + // One or more unknown Filter IDs. + coder->pending_error = LZMA_OPTIONS_ERROR; + coder->sequence = SEQ_ERROR; + break; + } + + coder->sequence = SEQ_BLOCK_INIT; + FALLTHROUGH; + } + + case SEQ_BLOCK_INIT: { + // Check if decoding is possible at all with the current + // memlimit_stop which we must never exceed. + // + // This needs to be the first thing in SEQ_BLOCK_INIT + // to make it possible to restart decoding after increasing + // memlimit_stop with lzma_memlimit_set(). + if (coder->mem_next_filters > coder->memlimit_stop) { + // Flush pending output before returning + // LZMA_MEMLIMIT_ERROR. If the application doesn't + // want to increase the limit, at least it will get + // all the output possible so far. + return_if_error(read_output_and_wait(coder, allocator, + out, out_pos, out_size, + NULL, true, &wait_abs, &has_blocked)); + + if (!lzma_outq_is_empty(&coder->outq)) + return LZMA_OK; + + return LZMA_MEMLIMIT_ERROR; + } + + // Check if the size information is available in Block Header. + // If it is, check if the sizes are small enough that we don't + // need to worry *too* much about integer overflows later in + // the code. If these conditions are not met, we must use the + // single-threaded direct mode. + if (is_direct_mode_needed(coder->block_options.compressed_size) + || is_direct_mode_needed( + coder->block_options.uncompressed_size)) { + coder->sequence = SEQ_BLOCK_DIRECT_INIT; + break; + } + + // Calculate the amount of memory needed for the input and + // output buffers in threaded mode. + // + // These cannot overflow because we already checked that + // the sizes are small enough using is_direct_mode_needed(). + coder->mem_next_in = comp_blk_size(coder); + const uint64_t mem_buffers = coder->mem_next_in + + lzma_outq_outbuf_memusage( + coder->block_options.uncompressed_size); + + // Add the amount needed by the filters. + // Avoid integer overflows. + if (UINT64_MAX - mem_buffers < coder->mem_next_filters) { + // Use direct mode if the memusage would overflow. + // This is a theoretical case that shouldn't happen + // in practice unless the input file is weird (broken + // or malicious). + coder->sequence = SEQ_BLOCK_DIRECT_INIT; + break; + } + + // Amount of memory needed to decode this Block in + // threaded mode: + coder->mem_next_block = coder->mem_next_filters + mem_buffers; + + // If this alone would exceed memlimit_threading, then we must + // use the single-threaded direct mode. + if (coder->mem_next_block > coder->memlimit_threading) { + coder->sequence = SEQ_BLOCK_DIRECT_INIT; + break; + } + + // Use the threaded mode. Free the direct mode decoder in + // case it has been initialized. + lzma_next_end(&coder->block_decoder, allocator); + coder->mem_direct_mode = 0; + + // Since we already know what the sizes are supposed to be, + // we can already add them to the Index hash. The Block + // decoder will verify the values while decoding. + const lzma_ret ret = lzma_index_hash_append(coder->index_hash, + lzma_block_unpadded_size( + &coder->block_options), + coder->block_options.uncompressed_size); + if (ret != LZMA_OK) { + coder->pending_error = ret; + coder->sequence = SEQ_ERROR; + break; + } + + coder->sequence = SEQ_BLOCK_THR_INIT; + FALLTHROUGH; + } + + case SEQ_BLOCK_THR_INIT: { + // We need to wait for a multiple conditions to become true + // until we can initialize the Block decoder and let a worker + // thread decode it: + // + // - Wait for the memory usage of the active threads to drop + // so that starting the decoding of this Block won't make + // us go over memlimit_threading. + // + // - Wait for at least one free output queue slot. + // + // - Wait for a free worker thread. + // + // While we wait, we must copy decompressed data to the out + // buffer and catch possible decoder errors. + // + // read_output_and_wait() does all the above. + bool block_can_start = false; + + return_if_error(read_output_and_wait(coder, allocator, + out, out_pos, out_size, + &block_can_start, true, + &wait_abs, &has_blocked)); + + if (coder->pending_error != LZMA_OK) { + coder->sequence = SEQ_ERROR; + break; + } + + if (!block_can_start) { + // It's not a timeout because return_if_error handles + // it already. Output queue cannot be empty either + // because in that case block_can_start would have + // been true. Thus the output buffer must be full and + // the queue isn't empty. + assert(*out_pos == out_size); + assert(!lzma_outq_is_empty(&coder->outq)); + return LZMA_OK; + } + + // We know that we can start decoding this Block without + // exceeding memlimit_threading. However, to stay below + // memlimit_threading may require freeing some of the + // cached memory. + // + // Get a local copy of variables that require locking the + // mutex. It is fine if the worker threads modify the real + // values after we read these as those changes can only be + // towards more favorable conditions (less memory in use, + // more in cache). + // + // These are initialized to silence warnings. + uint64_t mem_in_use = 0; + uint64_t mem_cached = 0; + struct worker_thread *thr = NULL; + + mythread_sync(coder->mutex) { + mem_in_use = coder->mem_in_use; + mem_cached = coder->mem_cached; + thr = coder->threads_free; + } + + // The maximum amount of memory that can be held by other + // threads and cached buffers while allowing us to start + // decoding the next Block. + const uint64_t mem_max = coder->memlimit_threading + - coder->mem_next_block; + + // If the existing allocations are so large that starting + // to decode this Block might exceed memlimit_threads, + // try to free memory from the output queue cache first. + // + // NOTE: This math assumes the worst case. It's possible + // that the limit wouldn't be exceeded if the existing cached + // allocations are reused. + if (mem_in_use + mem_cached + coder->outq.mem_allocated + > mem_max) { + // Clear the outq cache except leave one buffer in + // the cache if its size is correct. That way we + // don't free and almost immediately reallocate + // an identical buffer. + lzma_outq_clear_cache2(&coder->outq, allocator, + coder->block_options.uncompressed_size); + } + + // If there is at least one worker_thread in the cache and + // the existing allocations are so large that starting to + // decode this Block might exceed memlimit_threads, free + // memory by freeing cached Block decoders. + // + // NOTE: The comparison is different here than above. + // Here we don't care about cached buffers in outq anymore + // and only look at memory actually in use. This is because + // if there is something in outq cache, it's a single buffer + // that can be used as is. We ensured this in the above + // if-block. + uint64_t mem_freed = 0; + if (thr != NULL && mem_in_use + mem_cached + + coder->outq.mem_in_use > mem_max) { + // Don't free the first Block decoder if its memory + // usage isn't greater than what this Block will need. + // Typically the same filter chain is used for all + // Blocks so this way the allocations can be reused + // when get_thread() picks the first worker_thread + // from the cache. + if (thr->mem_filters <= coder->mem_next_filters) + thr = thr->next; + + while (thr != NULL) { + lzma_next_end(&thr->block_decoder, allocator); + mem_freed += thr->mem_filters; + thr->mem_filters = 0; + thr = thr->next; + } + } + + // Update the memory usage counters. Note that coder->mem_* + // may have changed since we read them so we must subtract + // or add the changes. + mythread_sync(coder->mutex) { + coder->mem_cached -= mem_freed; + + // Memory needed for the filters and the input buffer. + // The output queue takes care of its own counter so + // we don't touch it here. + // + // NOTE: After this, coder->mem_in_use + + // coder->mem_cached might count the same thing twice. + // If so, this will get corrected in get_thread() when + // a worker_thread is picked from coder->free_threads + // and its memory usage is subtracted from mem_cached. + coder->mem_in_use += coder->mem_next_in + + coder->mem_next_filters; + } + + // Allocate memory for the output buffer in the output queue. + lzma_ret ret = lzma_outq_prealloc_buf( + &coder->outq, allocator, + coder->block_options.uncompressed_size); + if (ret != LZMA_OK) { + threads_stop(coder); + return ret; + } + + // Set up coder->thr. + ret = get_thread(coder, allocator); + if (ret != LZMA_OK) { + threads_stop(coder); + return ret; + } + + // The new Block decoder memory usage is already counted in + // coder->mem_in_use. Store it in the thread too. + coder->thr->mem_filters = coder->mem_next_filters; + + // Initialize the Block decoder. + coder->thr->block_options = coder->block_options; + ret = lzma_block_decoder_init( + &coder->thr->block_decoder, allocator, + &coder->thr->block_options); + + // Free the allocated filter options since they are needed + // only to initialize the Block decoder. + lzma_filters_free(coder->filters, allocator); + coder->thr->block_options.filters = NULL; + + // Check if memory usage calculation and Block encoder + // initialization succeeded. + if (ret != LZMA_OK) { + coder->pending_error = ret; + coder->sequence = SEQ_ERROR; + break; + } + + // Allocate the input buffer. + coder->thr->in_size = coder->mem_next_in; + coder->thr->in = lzma_alloc(coder->thr->in_size, allocator); + if (coder->thr->in == NULL) { + threads_stop(coder); + return LZMA_MEM_ERROR; + } + + // Get the preallocated output buffer. + coder->thr->outbuf = lzma_outq_get_buf( + &coder->outq, coder->thr); + + // Start the decoder. + mythread_sync(coder->thr->mutex) { + assert(coder->thr->state == THR_IDLE); + coder->thr->state = THR_RUN; + mythread_cond_signal(&coder->thr->cond); + } + + // Enable output from the thread that holds the oldest output + // buffer in the output queue (if such a thread exists). + mythread_sync(coder->mutex) { + lzma_outq_enable_partial_output(&coder->outq, + &worker_enable_partial_update); + } + + coder->sequence = SEQ_BLOCK_THR_RUN; + FALLTHROUGH; + } + + case SEQ_BLOCK_THR_RUN: { + if (action == LZMA_FINISH && coder->fail_fast) { + // We know that we won't get more input and that + // the caller wants fail-fast behavior. If we see + // that we don't have enough input to finish this + // Block, return LZMA_DATA_ERROR immediately. + // See SEQ_BLOCK_HEADER for the error code rationale. + const size_t in_avail = in_size - *in_pos; + const size_t in_needed = coder->thr->in_size + - coder->thr->in_filled; + if (in_avail < in_needed) { + threads_stop(coder); + return LZMA_DATA_ERROR; + } + } + + // Copy input to the worker thread. + size_t cur_in_filled = coder->thr->in_filled; + lzma_bufcpy(in, in_pos, in_size, coder->thr->in, + &cur_in_filled, coder->thr->in_size); + + // Tell the thread how much we copied. + mythread_sync(coder->thr->mutex) { + coder->thr->in_filled = cur_in_filled; + + // NOTE: Most of the time we are copying input faster + // than the thread can decode so most of the time + // calling mythread_cond_signal() is useless but + // we cannot make it conditional because thr->in_pos + // is updated without a mutex. And the overhead should + // be very much negligible anyway. + mythread_cond_signal(&coder->thr->cond); + } + + // Read output from the output queue. Just like in + // SEQ_BLOCK_HEADER, we wait to fill the output buffer + // only if waiting_allowed was set to true in the beginning + // of this function (see the comment there) and there is + // no input available. In SEQ_BLOCK_HEADER, there is never + // input available when read_output_and_wait() is called, + // but here there can be when LZMA_FINISH is used, thus we + // need to check if *in_pos == in_size. Otherwise we would + // wait here instead of using the available input to start + // a new thread. + return_if_error(read_output_and_wait(coder, allocator, + out, out_pos, out_size, + NULL, + waiting_allowed && *in_pos == in_size, + &wait_abs, &has_blocked)); + + if (coder->pending_error != LZMA_OK) { + coder->sequence = SEQ_ERROR; + break; + } + + // Return if the input didn't contain the whole Block. + // + // NOTE: When we updated coder->thr->in_filled a few lines + // above, the worker thread might by now have finished its + // work and returned itself back to the stack of free threads. + if (coder->thr->in_filled < coder->thr->in_size) { + assert(*in_pos == in_size); + return LZMA_OK; + } + + // The whole Block has been copied to the thread-specific + // buffer. Continue from the next Block Header or Index. + coder->thr = NULL; + coder->sequence = SEQ_BLOCK_HEADER; + break; + } + + case SEQ_BLOCK_DIRECT_INIT: { + // Wait for the threads to finish and that all decoded data + // has been copied to the output. That is, wait until the + // output queue becomes empty. + // + // NOTE: No need to check for coder->pending_error as + // we aren't consuming any input until the queue is empty + // and if there is a pending error, read_output_and_wait() + // will eventually return it before the queue is empty. + return_if_error(read_output_and_wait(coder, allocator, + out, out_pos, out_size, + NULL, true, &wait_abs, &has_blocked)); + if (!lzma_outq_is_empty(&coder->outq)) + return LZMA_OK; + + // Free the cached output buffers. + lzma_outq_clear_cache(&coder->outq, allocator); + + // Get rid of the worker threads, including the coder->threads + // array. + threads_end(coder, allocator); + + // Initialize the Block decoder. + const lzma_ret ret = lzma_block_decoder_init( + &coder->block_decoder, allocator, + &coder->block_options); + + // Free the allocated filter options since they are needed + // only to initialize the Block decoder. + lzma_filters_free(coder->filters, allocator); + coder->block_options.filters = NULL; + + // Check if Block decoder initialization succeeded. + if (ret != LZMA_OK) + return ret; + + // Make the memory usage visible to _memconfig(). + coder->mem_direct_mode = coder->mem_next_filters; + + coder->sequence = SEQ_BLOCK_DIRECT_RUN; + FALLTHROUGH; + } + + case SEQ_BLOCK_DIRECT_RUN: { + const size_t in_old = *in_pos; + const size_t out_old = *out_pos; + const lzma_ret ret = coder->block_decoder.code( + coder->block_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + coder->progress_in += *in_pos - in_old; + coder->progress_out += *out_pos - out_old; + + if (ret != LZMA_STREAM_END) + return ret; + + // Block decoded successfully. Add the new size pair to + // the Index hash. + return_if_error(lzma_index_hash_append(coder->index_hash, + lzma_block_unpadded_size( + &coder->block_options), + coder->block_options.uncompressed_size)); + + coder->sequence = SEQ_BLOCK_HEADER; + break; + } + + case SEQ_INDEX_WAIT_OUTPUT: + // Flush the output from all worker threads so that we can + // decode the Index without thinking about threading. + return_if_error(read_output_and_wait(coder, allocator, + out, out_pos, out_size, + NULL, true, &wait_abs, &has_blocked)); + + if (!lzma_outq_is_empty(&coder->outq)) + return LZMA_OK; + + coder->sequence = SEQ_INDEX_DECODE; + FALLTHROUGH; + + case SEQ_INDEX_DECODE: { + // If we don't have any input, don't call + // lzma_index_hash_decode() since it would return + // LZMA_BUF_ERROR, which we must not do here. + if (*in_pos >= in_size) + return LZMA_OK; + + // Decode the Index and compare it to the hash calculated + // from the sizes of the Blocks (if any). + const size_t in_old = *in_pos; + const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, + in, in_pos, in_size); + coder->progress_in += *in_pos - in_old; + if (ret != LZMA_STREAM_END) + return ret; + + coder->sequence = SEQ_STREAM_FOOTER; + FALLTHROUGH; + } + + case SEQ_STREAM_FOOTER: { + // Copy the Stream Footer to the internal buffer. + const size_t in_old = *in_pos; + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + LZMA_STREAM_HEADER_SIZE); + coder->progress_in += *in_pos - in_old; + + // Return if we didn't get the whole Stream Footer yet. + if (coder->pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->pos = 0; + + // Decode the Stream Footer. The decoder gives + // LZMA_FORMAT_ERROR if the magic bytes don't match, + // so convert that return code to LZMA_DATA_ERROR. + lzma_stream_flags footer_flags; + const lzma_ret ret = lzma_stream_footer_decode( + &footer_flags, coder->buffer); + if (ret != LZMA_OK) + return ret == LZMA_FORMAT_ERROR + ? LZMA_DATA_ERROR : ret; + + // Check that Index Size stored in the Stream Footer matches + // the real size of the Index field. + if (lzma_index_hash_size(coder->index_hash) + != footer_flags.backward_size) + return LZMA_DATA_ERROR; + + // Compare that the Stream Flags fields are identical in + // both Stream Header and Stream Footer. + return_if_error(lzma_stream_flags_compare( + &coder->stream_flags, &footer_flags)); + + if (!coder->concatenated) + return LZMA_STREAM_END; + + coder->sequence = SEQ_STREAM_PADDING; + FALLTHROUGH; + } + + case SEQ_STREAM_PADDING: + assert(coder->concatenated); + + // Skip over possible Stream Padding. + while (true) { + if (*in_pos >= in_size) { + // Unless LZMA_FINISH was used, we cannot + // know if there's more input coming later. + if (action != LZMA_FINISH) + return LZMA_OK; + + // Stream Padding must be a multiple of + // four bytes. + return coder->pos == 0 + ? LZMA_STREAM_END + : LZMA_DATA_ERROR; + } + + // If the byte is not zero, it probably indicates + // beginning of a new Stream (or the file is corrupt). + if (in[*in_pos] != 0x00) + break; + + ++*in_pos; + ++coder->progress_in; + coder->pos = (coder->pos + 1) & 3; + } + + // Stream Padding must be a multiple of four bytes (empty + // Stream Padding is OK). + if (coder->pos != 0) { + ++*in_pos; + ++coder->progress_in; + return LZMA_DATA_ERROR; + } + + // Prepare to decode the next Stream. + return_if_error(stream_decoder_reset(coder, allocator)); + break; + + case SEQ_ERROR: + if (!coder->fail_fast) { + // Let the application get all data before the point + // where the error was detected. This matches the + // behavior of single-threaded use. + // + // FIXME? Some errors (LZMA_MEM_ERROR) don't get here, + // they are returned immediately. Thus in rare cases + // the output will be less than in the single-threaded + // mode. Maybe this doesn't matter much in practice. + return_if_error(read_output_and_wait(coder, allocator, + out, out_pos, out_size, + NULL, true, &wait_abs, &has_blocked)); + + // We get here only if the error happened in the main + // thread, for example, unsupported Block Header. + if (!lzma_outq_is_empty(&coder->outq)) + return LZMA_OK; + } + + // We only get here if no errors were detected by the worker + // threads. Errors from worker threads would have already been + // returned by the call to read_output_and_wait() above. + return coder->pending_error; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + // Never reached +} + + +static void +stream_decoder_mt_end(void *coder_ptr, const lzma_allocator *allocator) +{ + struct lzma_stream_coder *coder = coder_ptr; + + threads_end(coder, allocator); + lzma_outq_end(&coder->outq, allocator); + + lzma_next_end(&coder->block_decoder, allocator); + lzma_filters_free(coder->filters, allocator); + lzma_index_hash_end(coder->index_hash, allocator); + + lzma_free(coder, allocator); + return; +} + + +static lzma_check +stream_decoder_mt_get_check(const void *coder_ptr) +{ + const struct lzma_stream_coder *coder = coder_ptr; + return coder->stream_flags.check; +} + + +static lzma_ret +stream_decoder_mt_memconfig(void *coder_ptr, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + // NOTE: This function gets/sets memlimit_stop. For now, + // memlimit_threading cannot be modified after initialization. + // + // *memusage will include cached memory too. Excluding cached memory + // would be misleading and it wouldn't help the applications to + // know how much memory is actually needed to decompress the file + // because the higher the number of threads and the memlimits are + // the more memory the decoder may use. + // + // Setting a new limit includes the cached memory too and too low + // limits will be rejected. Alternative could be to free the cached + // memory immediately if that helps to bring the limit down but + // the current way is the simplest. It's unlikely that limit needs + // to be lowered in the middle of a file anyway; the typical reason + // to want a new limit is to increase after LZMA_MEMLIMIT_ERROR + // and even such use isn't common. + struct lzma_stream_coder *coder = coder_ptr; + + mythread_sync(coder->mutex) { + *memusage = coder->mem_direct_mode + + coder->mem_in_use + + coder->mem_cached + + coder->outq.mem_allocated; + } + + // If no filter chains are allocated, *memusage may be zero. + // Always return at least LZMA_MEMUSAGE_BASE. + if (*memusage < LZMA_MEMUSAGE_BASE) + *memusage = LZMA_MEMUSAGE_BASE; + + *old_memlimit = coder->memlimit_stop; + + if (new_memlimit != 0) { + if (new_memlimit < *memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit_stop = new_memlimit; + } + + return LZMA_OK; +} + + +static void +stream_decoder_mt_get_progress(void *coder_ptr, + uint64_t *progress_in, uint64_t *progress_out) +{ + struct lzma_stream_coder *coder = coder_ptr; + + // Lock coder->mutex to prevent finishing threads from moving their + // progress info from the worker_thread structure to lzma_stream_coder. + mythread_sync(coder->mutex) { + *progress_in = coder->progress_in; + *progress_out = coder->progress_out; + + for (size_t i = 0; i < coder->threads_initialized; ++i) { + mythread_sync(coder->threads[i].mutex) { + *progress_in += coder->threads[i].progress_in; + *progress_out += coder->threads[i] + .progress_out; + } + } + } + + return; +} + + +static lzma_ret +stream_decoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_mt *options) +{ + struct lzma_stream_coder *coder; + + if (options->threads == 0 || options->threads > LZMA_THREADS_MAX) + return LZMA_OPTIONS_ERROR; + + if (options->flags & ~LZMA_SUPPORTED_FLAGS) + return LZMA_OPTIONS_ERROR; + + lzma_next_coder_init(&stream_decoder_mt_init, next, allocator); + + coder = next->coder; + if (!coder) { + coder = lzma_alloc(sizeof(struct lzma_stream_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + + if (mythread_mutex_init(&coder->mutex)) { + lzma_free(coder, allocator); + return LZMA_MEM_ERROR; + } + + if (mythread_cond_init(&coder->cond)) { + mythread_mutex_destroy(&coder->mutex); + lzma_free(coder, allocator); + return LZMA_MEM_ERROR; + } + + next->code = &stream_decode_mt; + next->end = &stream_decoder_mt_end; + next->get_check = &stream_decoder_mt_get_check; + next->memconfig = &stream_decoder_mt_memconfig; + next->get_progress = &stream_decoder_mt_get_progress; + + coder->filters[0].id = LZMA_VLI_UNKNOWN; + memzero(&coder->outq, sizeof(coder->outq)); + + coder->block_decoder = LZMA_NEXT_CODER_INIT; + coder->mem_direct_mode = 0; + + coder->index_hash = NULL; + coder->threads = NULL; + coder->threads_free = NULL; + coder->threads_initialized = 0; + } + + // Cleanup old filter chain if one remains after unfinished decoding + // of a previous Stream. + lzma_filters_free(coder->filters, allocator); + + // By allocating threads from scratch we can start memory-usage + // accounting from scratch, too. Changes in filter and block sizes may + // affect number of threads. + // + // Reusing threads doesn't seem worth it. Unlike the single-threaded + // decoder, with some types of input file combinations reusing + // could leave quite a lot of memory allocated but unused (first + // file could allocate a lot, the next files could use fewer + // threads and some of the allocations from the first file would not + // get freed unless memlimit_threading forces us to clear caches). + // + // NOTE: The direct mode decoder isn't freed here if one exists. + // It will be reused or freed as needed in the main loop. + threads_end(coder, allocator); + + // All memusage counters start at 0 (including mem_direct_mode). + // The little extra that is needed for the structs in this file + // get accounted well enough by the filter chain memory usage + // which adds LZMA_MEMUSAGE_BASE for each chain. However, + // stream_decoder_mt_memconfig() has to handle this specially so that + // it will never return less than LZMA_MEMUSAGE_BASE as memory usage. + coder->mem_in_use = 0; + coder->mem_cached = 0; + coder->mem_next_block = 0; + + coder->progress_in = 0; + coder->progress_out = 0; + + coder->sequence = SEQ_STREAM_HEADER; + coder->thread_error = LZMA_OK; + coder->pending_error = LZMA_OK; + coder->thr = NULL; + + coder->timeout = options->timeout; + + coder->memlimit_threading = my_max(1, options->memlimit_threading); + coder->memlimit_stop = my_max(1, options->memlimit_stop); + if (coder->memlimit_threading > coder->memlimit_stop) + coder->memlimit_threading = coder->memlimit_stop; + + coder->tell_no_check = (options->flags & LZMA_TELL_NO_CHECK) != 0; + coder->tell_unsupported_check + = (options->flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0; + coder->tell_any_check = (options->flags & LZMA_TELL_ANY_CHECK) != 0; + coder->ignore_check = (options->flags & LZMA_IGNORE_CHECK) != 0; + coder->concatenated = (options->flags & LZMA_CONCATENATED) != 0; + coder->fail_fast = (options->flags & LZMA_FAIL_FAST) != 0; + + coder->first_stream = true; + coder->out_was_filled = false; + coder->pos = 0; + + coder->threads_max = options->threads; + + return_if_error(lzma_outq_init(&coder->outq, allocator, + coder->threads_max)); + + return stream_decoder_reset(coder, allocator); +} + + +extern LZMA_API(lzma_ret) +lzma_stream_decoder_mt(lzma_stream *strm, const lzma_mt *options) +{ + lzma_next_strm_init(stream_decoder_mt_init, strm, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/stream_encoder.c b/src/native/external/xz/src/liblzma/common/stream_encoder.c new file mode 100644 index 00000000000000..e7e5b3fce7e01c --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/stream_encoder.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder.c +/// \brief Encodes .xz Streams +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_encoder.h" +#include "index_encoder.h" + + +typedef struct { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_INIT, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_ENCODE, + SEQ_INDEX_ENCODE, + SEQ_STREAM_FOOTER, + } sequence; + + /// True if Block encoder has been initialized by + /// stream_encoder_init() or stream_encoder_update() + /// and thus doesn't need to be initialized in stream_encode(). + bool block_encoder_is_initialized; + + /// Block + lzma_next_coder block_encoder; + + /// Options for the Block encoder + lzma_block block_options; + + /// The filter chain currently in use + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + /// Index encoder. This is separate from Block encoder, because this + /// doesn't take much memory, and when encoding multiple Streams + /// with the same encoding options we avoid reallocating memory. + lzma_next_coder index_encoder; + + /// Index to hold sizes of the Blocks + lzma_index *index; + + /// Read position in buffer[] + size_t buffer_pos; + + /// Total number of bytes in buffer[] + size_t buffer_size; + + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; +} lzma_stream_coder; + + +static lzma_ret +block_encoder_init(lzma_stream_coder *coder, const lzma_allocator *allocator) +{ + // Prepare the Block options. Even though Block encoder doesn't need + // compressed_size, uncompressed_size, and header_size to be + // initialized, it is a good idea to do it here, because this way + // we catch if someone gave us Filter ID that cannot be used in + // Blocks/Streams. + coder->block_options.compressed_size = LZMA_VLI_UNKNOWN; + coder->block_options.uncompressed_size = LZMA_VLI_UNKNOWN; + + return_if_error(lzma_block_header_size(&coder->block_options)); + + // Initialize the actual Block encoder. + return lzma_block_encoder_init(&coder->block_encoder, allocator, + &coder->block_options); +} + + +static lzma_ret +stream_encode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_stream_coder *coder = coder_ptr; + + // Main loop + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_STREAM_HEADER: + case SEQ_BLOCK_HEADER: + case SEQ_STREAM_FOOTER: + lzma_bufcpy(coder->buffer, &coder->buffer_pos, + coder->buffer_size, out, out_pos, out_size); + if (coder->buffer_pos < coder->buffer_size) + return LZMA_OK; + + if (coder->sequence == SEQ_STREAM_FOOTER) + return LZMA_STREAM_END; + + coder->buffer_pos = 0; + ++coder->sequence; + break; + + case SEQ_BLOCK_INIT: { + if (*in_pos == in_size) { + // If we are requested to flush or finish the current + // Block, return LZMA_STREAM_END immediately since + // there's nothing to do. + if (action != LZMA_FINISH) + return action == LZMA_RUN + ? LZMA_OK : LZMA_STREAM_END; + + // The application had used LZMA_FULL_FLUSH to finish + // the previous Block, but now wants to finish without + // encoding new data, or it is simply creating an + // empty Stream with no Blocks. + // + // Initialize the Index encoder, and continue to + // actually encoding the Index. + return_if_error(lzma_index_encoder_init( + &coder->index_encoder, allocator, + coder->index)); + coder->sequence = SEQ_INDEX_ENCODE; + break; + } + + // Initialize the Block encoder unless it was already + // initialized by stream_encoder_init() or + // stream_encoder_update(). + if (!coder->block_encoder_is_initialized) + return_if_error(block_encoder_init(coder, allocator)); + + // Make it false so that we don't skip the initialization + // with the next Block. + coder->block_encoder_is_initialized = false; + + // Encode the Block Header. This shouldn't fail since we have + // already initialized the Block encoder. + if (lzma_block_header_encode(&coder->block_options, + coder->buffer) != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = coder->block_options.header_size; + coder->sequence = SEQ_BLOCK_HEADER; + break; + } + + case SEQ_BLOCK_ENCODE: { + static const lzma_action convert[LZMA_ACTION_MAX + 1] = { + LZMA_RUN, + LZMA_SYNC_FLUSH, + LZMA_FINISH, + LZMA_FINISH, + LZMA_FINISH, + }; + + const lzma_ret ret = coder->block_encoder.code( + coder->block_encoder.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, convert[action]); + if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) + return ret; + + // Add a new Index Record. + const lzma_vli unpadded_size = lzma_block_unpadded_size( + &coder->block_options); + assert(unpadded_size != 0); + return_if_error(lzma_index_append(coder->index, allocator, + unpadded_size, + coder->block_options.uncompressed_size)); + + coder->sequence = SEQ_BLOCK_INIT; + break; + } + + case SEQ_INDEX_ENCODE: { + // Call the Index encoder. It doesn't take any input, so + // those pointers can be NULL. + const lzma_ret ret = coder->index_encoder.code( + coder->index_encoder.coder, allocator, + NULL, NULL, 0, + out, out_pos, out_size, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret; + + // Encode the Stream Footer into coder->buffer. + const lzma_stream_flags stream_flags = { + .version = 0, + .backward_size = lzma_index_size(coder->index), + .check = coder->block_options.check, + }; + + if (lzma_stream_footer_encode(&stream_flags, coder->buffer) + != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + coder->sequence = SEQ_STREAM_FOOTER; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +stream_encoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_stream_coder *coder = coder_ptr; + + lzma_next_end(&coder->block_encoder, allocator); + lzma_next_end(&coder->index_encoder, allocator); + lzma_index_end(coder->index, allocator); + + lzma_filters_free(coder->filters, allocator); + + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +stream_encoder_update(void *coder_ptr, const lzma_allocator *allocator, + const lzma_filter *filters, + const lzma_filter *reversed_filters) +{ + lzma_stream_coder *coder = coder_ptr; + lzma_ret ret; + + // Make a copy to a temporary buffer first. This way it is easier + // to keep the encoder state unchanged if an error occurs with + // lzma_filters_copy(). + lzma_filter temp[LZMA_FILTERS_MAX + 1]; + return_if_error(lzma_filters_copy(filters, temp, allocator)); + + if (coder->sequence <= SEQ_BLOCK_INIT) { + // There is no incomplete Block waiting to be finished, + // thus we can change the whole filter chain. Start by + // trying to initialize the Block encoder with the new + // chain. This way we detect if the chain is valid. + coder->block_encoder_is_initialized = false; + coder->block_options.filters = temp; + ret = block_encoder_init(coder, allocator); + coder->block_options.filters = coder->filters; + if (ret != LZMA_OK) + goto error; + + coder->block_encoder_is_initialized = true; + + } else if (coder->sequence <= SEQ_BLOCK_ENCODE) { + // We are in the middle of a Block. Try to update only + // the filter-specific options. + ret = coder->block_encoder.update( + coder->block_encoder.coder, allocator, + filters, reversed_filters); + if (ret != LZMA_OK) + goto error; + } else { + // Trying to update the filter chain when we are already + // encoding Index or Stream Footer. + ret = LZMA_PROG_ERROR; + goto error; + } + + // Free the options of the old chain. + lzma_filters_free(coder->filters, allocator); + + // Copy the new filter chain in place. + memcpy(coder->filters, temp, sizeof(temp)); + + return LZMA_OK; + +error: + lzma_filters_free(temp, allocator); + return ret; +} + + +static lzma_ret +stream_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter *filters, lzma_check check) +{ + lzma_next_coder_init(&stream_encoder_init, next, allocator); + + if (filters == NULL) + return LZMA_PROG_ERROR; + + lzma_stream_coder *coder = next->coder; + + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_stream_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &stream_encode; + next->end = &stream_encoder_end; + next->update = &stream_encoder_update; + + coder->filters[0].id = LZMA_VLI_UNKNOWN; + coder->block_encoder = LZMA_NEXT_CODER_INIT; + coder->index_encoder = LZMA_NEXT_CODER_INIT; + coder->index = NULL; + } + + // Basic initializations + coder->sequence = SEQ_STREAM_HEADER; + coder->block_options.version = 0; + coder->block_options.check = check; + + // Initialize the Index + lzma_index_end(coder->index, allocator); + coder->index = lzma_index_init(allocator); + if (coder->index == NULL) + return LZMA_MEM_ERROR; + + // Encode the Stream Header + lzma_stream_flags stream_flags = { + .version = 0, + .check = check, + }; + return_if_error(lzma_stream_header_encode( + &stream_flags, coder->buffer)); + + coder->buffer_pos = 0; + coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + + // Initialize the Block encoder. This way we detect unsupported + // filter chains when initializing the Stream encoder instead of + // giving an error after Stream Header has already been written out. + return stream_encoder_update(coder, allocator, filters, NULL); +} + + +extern LZMA_API(lzma_ret) +lzma_stream_encoder(lzma_stream *strm, + const lzma_filter *filters, lzma_check check) +{ + lzma_next_strm_init(stream_encoder_init, strm, filters, check); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_BARRIER] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/stream_encoder_mt.c b/src/native/external/xz/src/liblzma/common/stream_encoder_mt.c new file mode 100644 index 00000000000000..fd0eb98df68220 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/stream_encoder_mt.c @@ -0,0 +1,1278 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder_mt.c +/// \brief Multithreaded .xz Stream encoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_encoder.h" +#include "easy_preset.h" +#include "block_encoder.h" +#include "block_buffer_encoder.h" +#include "index_encoder.h" +#include "outqueue.h" + + +/// Maximum supported block size. This makes it simpler to prevent integer +/// overflows if we are given unusually large block size. +#define BLOCK_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX) + + +typedef enum { + /// Waiting for work. + THR_IDLE, + + /// Encoding is in progress. + THR_RUN, + + /// Encoding is in progress but no more input data will + /// be read. + THR_FINISH, + + /// The main thread wants the thread to stop whatever it was doing + /// but not exit. + THR_STOP, + + /// The main thread wants the thread to exit. We could use + /// cancellation but since there's stopped anyway, this is lazier. + THR_EXIT, + +} worker_state; + +typedef struct lzma_stream_coder_s lzma_stream_coder; + +typedef struct worker_thread_s worker_thread; +struct worker_thread_s { + worker_state state; + + /// Input buffer of coder->block_size bytes. The main thread will + /// put new input into this and update in_size accordingly. Once + /// no more input is coming, state will be set to THR_FINISH. + uint8_t *in; + + /// Amount of data available in the input buffer. This is modified + /// only by the main thread. + size_t in_size; + + /// Output buffer for this thread. This is set by the main + /// thread every time a new Block is started with this thread + /// structure. + lzma_outbuf *outbuf; + + /// Pointer to the main structure is needed when putting this + /// thread back to the stack of free threads. + lzma_stream_coder *coder; + + /// The allocator is set by the main thread. Since a copy of the + /// pointer is kept here, the application must not change the + /// allocator before calling lzma_end(). + const lzma_allocator *allocator; + + /// Amount of uncompressed data that has already been compressed. + uint64_t progress_in; + + /// Amount of compressed data that is ready. + uint64_t progress_out; + + /// Block encoder + lzma_next_coder block_encoder; + + /// Compression options for this Block + lzma_block block_options; + + /// Filter chain for this thread. By copying the filters array + /// to each thread it is possible to change the filter chain + /// between Blocks using lzma_filters_update(). + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + /// Next structure in the stack of free worker threads. + worker_thread *next; + + mythread_mutex mutex; + mythread_cond cond; + + /// The ID of this thread is used to join the thread + /// when it's not needed anymore. + mythread thread_id; +}; + + +struct lzma_stream_coder_s { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK, + SEQ_INDEX, + SEQ_STREAM_FOOTER, + } sequence; + + /// Start a new Block every block_size bytes of input unless + /// LZMA_FULL_FLUSH or LZMA_FULL_BARRIER is used earlier. + size_t block_size; + + /// The filter chain to use for the next Block. + /// This can be updated using lzma_filters_update() + /// after LZMA_FULL_BARRIER or LZMA_FULL_FLUSH. + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + /// A copy of filters[] will be put here when attempting to get + /// a new worker thread. This will be copied to a worker thread + /// when a thread becomes free and then this cache is marked as + /// empty by setting [0].id = LZMA_VLI_UNKNOWN. Without this cache + /// the filter options from filters[] would get uselessly copied + /// multiple times (allocated and freed) when waiting for a new free + /// worker thread. + /// + /// This is freed if filters[] is updated via lzma_filters_update(). + lzma_filter filters_cache[LZMA_FILTERS_MAX + 1]; + + + /// Index to hold sizes of the Blocks + lzma_index *index; + + /// Index encoder + lzma_next_coder index_encoder; + + + /// Stream Flags for encoding the Stream Header and Stream Footer. + lzma_stream_flags stream_flags; + + /// Buffer to hold Stream Header and Stream Footer. + uint8_t header[LZMA_STREAM_HEADER_SIZE]; + + /// Read position in header[] + size_t header_pos; + + + /// Output buffer queue for compressed data + lzma_outq outq; + + /// How much memory to allocate for each lzma_outbuf.buf + size_t outbuf_alloc_size; + + + /// Maximum wait time if cannot use all the input and cannot + /// fill the output buffer. This is in milliseconds. + uint32_t timeout; + + + /// Error code from a worker thread + lzma_ret thread_error; + + /// Array of allocated thread-specific structures + worker_thread *threads; + + /// Number of structures in "threads" above. This is also the + /// number of threads that will be created at maximum. + uint32_t threads_max; + + /// Number of thread structures that have been initialized, and + /// thus the number of worker threads actually created so far. + uint32_t threads_initialized; + + /// Stack of free threads. When a thread finishes, it puts itself + /// back into this stack. This starts as empty because threads + /// are created only when actually needed. + worker_thread *threads_free; + + /// The most recent worker thread to which the main thread writes + /// the new input from the application. + worker_thread *thr; + + + /// Amount of uncompressed data in Blocks that have already + /// been finished. + uint64_t progress_in; + + /// Amount of compressed data in Stream Header + Blocks that + /// have already been finished. + uint64_t progress_out; + + + mythread_mutex mutex; + mythread_cond cond; +}; + + +/// Tell the main thread that something has gone wrong. +static void +worker_error(worker_thread *thr, lzma_ret ret) +{ + assert(ret != LZMA_OK); + assert(ret != LZMA_STREAM_END); + + mythread_sync(thr->coder->mutex) { + if (thr->coder->thread_error == LZMA_OK) + thr->coder->thread_error = ret; + + mythread_cond_signal(&thr->coder->cond); + } + + return; +} + + +static worker_state +worker_encode(worker_thread *thr, size_t *out_pos, worker_state state) +{ + assert(thr->progress_in == 0); + assert(thr->progress_out == 0); + + // Set the Block options. + thr->block_options = (lzma_block){ + .version = 0, + .check = thr->coder->stream_flags.check, + .compressed_size = thr->outbuf->allocated, + .uncompressed_size = thr->coder->block_size, + .filters = thr->filters, + }; + + // Calculate maximum size of the Block Header. This amount is + // reserved in the beginning of the buffer so that Block Header + // along with Compressed Size and Uncompressed Size can be + // written there. + lzma_ret ret = lzma_block_header_size(&thr->block_options); + if (ret != LZMA_OK) { + worker_error(thr, ret); + return THR_STOP; + } + + // Initialize the Block encoder. + ret = lzma_block_encoder_init(&thr->block_encoder, + thr->allocator, &thr->block_options); + if (ret != LZMA_OK) { + worker_error(thr, ret); + return THR_STOP; + } + + size_t in_pos = 0; + size_t in_size = 0; + + *out_pos = thr->block_options.header_size; + const size_t out_size = thr->outbuf->allocated; + + do { + mythread_sync(thr->mutex) { + // Store in_pos and *out_pos into *thr so that + // an application may read them via + // lzma_get_progress() to get progress information. + // + // NOTE: These aren't updated when the encoding + // finishes. Instead, the final values are taken + // later from thr->outbuf. + thr->progress_in = in_pos; + thr->progress_out = *out_pos; + + while (in_size == thr->in_size + && thr->state == THR_RUN) + mythread_cond_wait(&thr->cond, &thr->mutex); + + state = thr->state; + in_size = thr->in_size; + } + + // Return if we were asked to stop or exit. + if (state >= THR_STOP) + return state; + + lzma_action action = state == THR_FINISH + ? LZMA_FINISH : LZMA_RUN; + + // Limit the amount of input given to the Block encoder + // at once. This way this thread can react fairly quickly + // if the main thread wants us to stop or exit. + static const size_t in_chunk_max = 16384; + size_t in_limit = in_size; + if (in_size - in_pos > in_chunk_max) { + in_limit = in_pos + in_chunk_max; + action = LZMA_RUN; + } + + ret = thr->block_encoder.code( + thr->block_encoder.coder, thr->allocator, + thr->in, &in_pos, in_limit, thr->outbuf->buf, + out_pos, out_size, action); + } while (ret == LZMA_OK && *out_pos < out_size); + + switch (ret) { + case LZMA_STREAM_END: + assert(state == THR_FINISH); + + // Encode the Block Header. By doing it after + // the compression, we can store the Compressed Size + // and Uncompressed Size fields. + ret = lzma_block_header_encode(&thr->block_options, + thr->outbuf->buf); + if (ret != LZMA_OK) { + worker_error(thr, ret); + return THR_STOP; + } + + break; + + case LZMA_OK: + // The data was incompressible. Encode it using uncompressed + // LZMA2 chunks. + // + // First wait that we have gotten all the input. + mythread_sync(thr->mutex) { + while (thr->state == THR_RUN) + mythread_cond_wait(&thr->cond, &thr->mutex); + + state = thr->state; + in_size = thr->in_size; + } + + if (state >= THR_STOP) + return state; + + // Do the encoding. This takes care of the Block Header too. + *out_pos = 0; + ret = lzma_block_uncomp_encode(&thr->block_options, + thr->in, in_size, thr->outbuf->buf, + out_pos, out_size); + + // It shouldn't fail. + if (ret != LZMA_OK) { + worker_error(thr, LZMA_PROG_ERROR); + return THR_STOP; + } + + break; + + default: + worker_error(thr, ret); + return THR_STOP; + } + + // Set the size information that will be read by the main thread + // to write the Index field. + thr->outbuf->unpadded_size + = lzma_block_unpadded_size(&thr->block_options); + assert(thr->outbuf->unpadded_size != 0); + thr->outbuf->uncompressed_size = thr->block_options.uncompressed_size; + + return THR_FINISH; +} + + +static MYTHREAD_RET_TYPE +worker_start(void *thr_ptr) +{ + worker_thread *thr = thr_ptr; + worker_state state = THR_IDLE; // Init to silence a warning + + while (true) { + // Wait for work. + mythread_sync(thr->mutex) { + while (true) { + // The thread is already idle so if we are + // requested to stop, just set the state. + if (thr->state == THR_STOP) { + thr->state = THR_IDLE; + mythread_cond_signal(&thr->cond); + } + + state = thr->state; + if (state != THR_IDLE) + break; + + mythread_cond_wait(&thr->cond, &thr->mutex); + } + } + + size_t out_pos = 0; + + assert(state != THR_IDLE); + assert(state != THR_STOP); + + if (state <= THR_FINISH) + state = worker_encode(thr, &out_pos, state); + + if (state == THR_EXIT) + break; + + // Mark the thread as idle unless the main thread has + // told us to exit. Signal is needed for the case + // where the main thread is waiting for the threads to stop. + mythread_sync(thr->mutex) { + if (thr->state != THR_EXIT) { + thr->state = THR_IDLE; + mythread_cond_signal(&thr->cond); + } + } + + mythread_sync(thr->coder->mutex) { + // If no errors occurred, make the encoded data + // available to be copied out. + if (state == THR_FINISH) { + thr->outbuf->pos = out_pos; + thr->outbuf->finished = true; + } + + // Update the main progress info. + thr->coder->progress_in + += thr->outbuf->uncompressed_size; + thr->coder->progress_out += out_pos; + thr->progress_in = 0; + thr->progress_out = 0; + + // Return this thread to the stack of free threads. + thr->next = thr->coder->threads_free; + thr->coder->threads_free = thr; + + mythread_cond_signal(&thr->coder->cond); + } + } + + // Exiting, free the resources. + lzma_filters_free(thr->filters, thr->allocator); + + mythread_mutex_destroy(&thr->mutex); + mythread_cond_destroy(&thr->cond); + + lzma_next_end(&thr->block_encoder, thr->allocator); + lzma_free(thr->in, thr->allocator); + return MYTHREAD_RET_VALUE; +} + + +/// Make the threads stop but not exit. Optionally wait for them to stop. +static void +threads_stop(lzma_stream_coder *coder, bool wait_for_threads) +{ + // Tell the threads to stop. + for (uint32_t i = 0; i < coder->threads_initialized; ++i) { + mythread_sync(coder->threads[i].mutex) { + coder->threads[i].state = THR_STOP; + mythread_cond_signal(&coder->threads[i].cond); + } + } + + if (!wait_for_threads) + return; + + // Wait for the threads to settle in the idle state. + for (uint32_t i = 0; i < coder->threads_initialized; ++i) { + mythread_sync(coder->threads[i].mutex) { + while (coder->threads[i].state != THR_IDLE) + mythread_cond_wait(&coder->threads[i].cond, + &coder->threads[i].mutex); + } + } + + return; +} + + +/// Stop the threads and free the resources associated with them. +/// Wait until the threads have exited. +static void +threads_end(lzma_stream_coder *coder, const lzma_allocator *allocator) +{ + for (uint32_t i = 0; i < coder->threads_initialized; ++i) { + mythread_sync(coder->threads[i].mutex) { + coder->threads[i].state = THR_EXIT; + mythread_cond_signal(&coder->threads[i].cond); + } + } + + for (uint32_t i = 0; i < coder->threads_initialized; ++i) { + int ret = mythread_join(coder->threads[i].thread_id); + assert(ret == 0); + (void)ret; + } + + lzma_free(coder->threads, allocator); + return; +} + + +/// Initialize a new worker_thread structure and create a new thread. +static lzma_ret +initialize_new_thread(lzma_stream_coder *coder, + const lzma_allocator *allocator) +{ + worker_thread *thr = &coder->threads[coder->threads_initialized]; + + thr->in = lzma_alloc(coder->block_size, allocator); + if (thr->in == NULL) + return LZMA_MEM_ERROR; + + if (mythread_mutex_init(&thr->mutex)) + goto error_mutex; + + if (mythread_cond_init(&thr->cond)) + goto error_cond; + + thr->state = THR_IDLE; + thr->allocator = allocator; + thr->coder = coder; + thr->progress_in = 0; + thr->progress_out = 0; + thr->block_encoder = LZMA_NEXT_CODER_INIT; + thr->filters[0].id = LZMA_VLI_UNKNOWN; + + if (mythread_create(&thr->thread_id, &worker_start, thr)) + goto error_thread; + + ++coder->threads_initialized; + coder->thr = thr; + + return LZMA_OK; + +error_thread: + mythread_cond_destroy(&thr->cond); + +error_cond: + mythread_mutex_destroy(&thr->mutex); + +error_mutex: + lzma_free(thr->in, allocator); + return LZMA_MEM_ERROR; +} + + +static lzma_ret +get_thread(lzma_stream_coder *coder, const lzma_allocator *allocator) +{ + // If there are no free output subqueues, there is no + // point to try getting a thread. + if (!lzma_outq_has_buf(&coder->outq)) + return LZMA_OK; + + // That's also true if we cannot allocate memory for the output + // buffer in the output queue. + return_if_error(lzma_outq_prealloc_buf(&coder->outq, allocator, + coder->outbuf_alloc_size)); + + // Make a thread-specific copy of the filter chain. Put it in + // the cache array first so that if we cannot get a new thread yet, + // the allocation is ready when we try again. + if (coder->filters_cache[0].id == LZMA_VLI_UNKNOWN) + return_if_error(lzma_filters_copy( + coder->filters, coder->filters_cache, allocator)); + + // If there is a free structure on the stack, use it. + mythread_sync(coder->mutex) { + if (coder->threads_free != NULL) { + coder->thr = coder->threads_free; + coder->threads_free = coder->threads_free->next; + } + } + + if (coder->thr == NULL) { + // If there are no uninitialized structures left, return. + if (coder->threads_initialized == coder->threads_max) + return LZMA_OK; + + // Initialize a new thread. + return_if_error(initialize_new_thread(coder, allocator)); + } + + // Reset the parts of the thread state that have to be done + // in the main thread. + mythread_sync(coder->thr->mutex) { + coder->thr->state = THR_RUN; + coder->thr->in_size = 0; + coder->thr->outbuf = lzma_outq_get_buf(&coder->outq, NULL); + + // Free the old thread-specific filter options and replace + // them with the already-allocated new options from + // coder->filters_cache[]. Then mark the cache as empty. + lzma_filters_free(coder->thr->filters, allocator); + memcpy(coder->thr->filters, coder->filters_cache, + sizeof(coder->filters_cache)); + coder->filters_cache[0].id = LZMA_VLI_UNKNOWN; + + mythread_cond_signal(&coder->thr->cond); + } + + return LZMA_OK; +} + + +static lzma_ret +stream_encode_in(lzma_stream_coder *coder, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, lzma_action action) +{ + while (*in_pos < in_size + || (coder->thr != NULL && action != LZMA_RUN)) { + if (coder->thr == NULL) { + // Get a new thread. + const lzma_ret ret = get_thread(coder, allocator); + if (coder->thr == NULL) + return ret; + } + + // Copy the input data to thread's buffer. + size_t thr_in_size = coder->thr->in_size; + lzma_bufcpy(in, in_pos, in_size, coder->thr->in, + &thr_in_size, coder->block_size); + + // Tell the Block encoder to finish if + // - it has got block_size bytes of input; or + // - all input was used and LZMA_FINISH, LZMA_FULL_FLUSH, + // or LZMA_FULL_BARRIER was used. + // + // TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER. + const bool finish = thr_in_size == coder->block_size + || (*in_pos == in_size && action != LZMA_RUN); + + bool block_error = false; + + mythread_sync(coder->thr->mutex) { + if (coder->thr->state == THR_IDLE) { + // Something has gone wrong with the Block + // encoder. It has set coder->thread_error + // which we will read a few lines later. + block_error = true; + } else { + // Tell the Block encoder its new amount + // of input and update the state if needed. + coder->thr->in_size = thr_in_size; + + if (finish) + coder->thr->state = THR_FINISH; + + mythread_cond_signal(&coder->thr->cond); + } + } + + if (block_error) { + lzma_ret ret = LZMA_OK; // Init to silence a warning. + + mythread_sync(coder->mutex) { + ret = coder->thread_error; + } + + return ret; + } + + if (finish) + coder->thr = NULL; + } + + return LZMA_OK; +} + + +/// Wait until more input can be consumed, more output can be read, or +/// an optional timeout is reached. +static bool +wait_for_work(lzma_stream_coder *coder, mythread_condtime *wait_abs, + bool *has_blocked, bool has_input) +{ + if (coder->timeout != 0 && !*has_blocked) { + // Every time when stream_encode_mt() is called via + // lzma_code(), *has_blocked starts as false. We set it + // to true here and calculate the absolute time when + // we must return if there's nothing to do. + // + // This way if we block multiple times for short moments + // less than "timeout" milliseconds, we will return once + // "timeout" amount of time has passed since the *first* + // blocking occurred. If the absolute time was calculated + // again every time we block, "timeout" would effectively + // be meaningless if we never consecutively block longer + // than "timeout" ms. + *has_blocked = true; + mythread_condtime_set(wait_abs, &coder->cond, coder->timeout); + } + + bool timed_out = false; + + mythread_sync(coder->mutex) { + // There are four things that we wait. If one of them + // becomes possible, we return. + // - If there is input left, we need to get a free + // worker thread and an output buffer for it. + // - Data ready to be read from the output queue. + // - A worker thread indicates an error. + // - Time out occurs. + while ((!has_input || coder->threads_free == NULL + || !lzma_outq_has_buf(&coder->outq)) + && !lzma_outq_is_readable(&coder->outq) + && coder->thread_error == LZMA_OK + && !timed_out) { + if (coder->timeout != 0) + timed_out = mythread_cond_timedwait( + &coder->cond, &coder->mutex, + wait_abs) != 0; + else + mythread_cond_wait(&coder->cond, + &coder->mutex); + } + } + + return timed_out; +} + + +static lzma_ret +stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_stream_coder *coder = coder_ptr; + + switch (coder->sequence) { + case SEQ_STREAM_HEADER: + lzma_bufcpy(coder->header, &coder->header_pos, + sizeof(coder->header), + out, out_pos, out_size); + if (coder->header_pos < sizeof(coder->header)) + return LZMA_OK; + + coder->header_pos = 0; + coder->sequence = SEQ_BLOCK; + FALLTHROUGH; + + case SEQ_BLOCK: { + // Initialized to silence warnings. + lzma_vli unpadded_size = 0; + lzma_vli uncompressed_size = 0; + lzma_ret ret = LZMA_OK; + + // These are for wait_for_work(). + bool has_blocked = false; + mythread_condtime wait_abs = { 0 }; + + while (true) { + mythread_sync(coder->mutex) { + // Check for Block encoder errors. + ret = coder->thread_error; + if (ret != LZMA_OK) { + assert(ret != LZMA_STREAM_END); + break; // Break out of mythread_sync. + } + + // Try to read compressed data to out[]. + ret = lzma_outq_read(&coder->outq, allocator, + out, out_pos, out_size, + &unpadded_size, + &uncompressed_size); + } + + if (ret == LZMA_STREAM_END) { + // End of Block. Add it to the Index. + ret = lzma_index_append(coder->index, + allocator, unpadded_size, + uncompressed_size); + if (ret != LZMA_OK) { + threads_stop(coder, false); + return ret; + } + + // If we didn't fill the output buffer yet, + // try to read more data. Maybe the next + // outbuf has been finished already too. + if (*out_pos < out_size) + continue; + } + + if (ret != LZMA_OK) { + // coder->thread_error was set. + threads_stop(coder, false); + return ret; + } + + // Try to give uncompressed data to a worker thread. + ret = stream_encode_in(coder, allocator, + in, in_pos, in_size, action); + if (ret != LZMA_OK) { + threads_stop(coder, false); + return ret; + } + + // See if we should wait or return. + // + // TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER. + if (*in_pos == in_size) { + // LZMA_RUN: More data is probably coming + // so return to let the caller fill the + // input buffer. + if (action == LZMA_RUN) + return LZMA_OK; + + // LZMA_FULL_BARRIER: The same as with + // LZMA_RUN but tell the caller that the + // barrier was completed. + if (action == LZMA_FULL_BARRIER) + return LZMA_STREAM_END; + + // Finishing or flushing isn't completed until + // all input data has been encoded and copied + // to the output buffer. + if (lzma_outq_is_empty(&coder->outq)) { + // LZMA_FINISH: Continue to encode + // the Index field. + if (action == LZMA_FINISH) + break; + + // LZMA_FULL_FLUSH: Return to tell + // the caller that flushing was + // completed. + if (action == LZMA_FULL_FLUSH) + return LZMA_STREAM_END; + } + } + + // Return if there is no output space left. + // This check must be done after testing the input + // buffer, because we might want to use a different + // return code. + if (*out_pos == out_size) + return LZMA_OK; + + // Neither in nor out has been used completely. + // Wait until there's something we can do. + if (wait_for_work(coder, &wait_abs, &has_blocked, + *in_pos < in_size)) + return LZMA_TIMED_OUT; + } + + // All Blocks have been encoded and the threads have stopped. + // Prepare to encode the Index field. + return_if_error(lzma_index_encoder_init( + &coder->index_encoder, allocator, + coder->index)); + coder->sequence = SEQ_INDEX; + + // Update the progress info to take the Index and + // Stream Footer into account. Those are very fast to encode + // so in terms of progress information they can be thought + // to be ready to be copied out. + coder->progress_out += lzma_index_size(coder->index) + + LZMA_STREAM_HEADER_SIZE; + + FALLTHROUGH; + } + + case SEQ_INDEX: { + // Call the Index encoder. It doesn't take any input, so + // those pointers can be NULL. + const lzma_ret ret = coder->index_encoder.code( + coder->index_encoder.coder, allocator, + NULL, NULL, 0, + out, out_pos, out_size, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret; + + // Encode the Stream Footer into coder->buffer. + coder->stream_flags.backward_size + = lzma_index_size(coder->index); + if (lzma_stream_footer_encode(&coder->stream_flags, + coder->header) != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->sequence = SEQ_STREAM_FOOTER; + FALLTHROUGH; + } + + case SEQ_STREAM_FOOTER: + lzma_bufcpy(coder->header, &coder->header_pos, + sizeof(coder->header), + out, out_pos, out_size); + return coder->header_pos < sizeof(coder->header) + ? LZMA_OK : LZMA_STREAM_END; + } + + assert(0); + return LZMA_PROG_ERROR; +} + + +static void +stream_encoder_mt_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_stream_coder *coder = coder_ptr; + + // Threads must be killed before the output queue can be freed. + threads_end(coder, allocator); + lzma_outq_end(&coder->outq, allocator); + + lzma_filters_free(coder->filters, allocator); + lzma_filters_free(coder->filters_cache, allocator); + + lzma_next_end(&coder->index_encoder, allocator); + lzma_index_end(coder->index, allocator); + + mythread_cond_destroy(&coder->cond); + mythread_mutex_destroy(&coder->mutex); + + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +stream_encoder_mt_update(void *coder_ptr, const lzma_allocator *allocator, + const lzma_filter *filters, + const lzma_filter *reversed_filters + lzma_attribute((__unused__))) +{ + lzma_stream_coder *coder = coder_ptr; + + // Applications shouldn't attempt to change the options when + // we are already encoding the Index or Stream Footer. + if (coder->sequence > SEQ_BLOCK) + return LZMA_PROG_ERROR; + + // For now the threaded encoder doesn't support changing + // the options in the middle of a Block. + if (coder->thr != NULL) + return LZMA_PROG_ERROR; + + // Check if the filter chain seems mostly valid. See the comment + // in stream_encoder_mt_init(). + if (lzma_raw_encoder_memusage(filters) == UINT64_MAX) + return LZMA_OPTIONS_ERROR; + + // Make a copy to a temporary buffer first. This way the encoder + // state stays unchanged if an error occurs in lzma_filters_copy(). + lzma_filter temp[LZMA_FILTERS_MAX + 1]; + return_if_error(lzma_filters_copy(filters, temp, allocator)); + + // Free the options of the old chain as well as the cache. + lzma_filters_free(coder->filters, allocator); + lzma_filters_free(coder->filters_cache, allocator); + + // Copy the new filter chain in place. + memcpy(coder->filters, temp, sizeof(temp)); + + return LZMA_OK; +} + + +/// Options handling for lzma_stream_encoder_mt_init() and +/// lzma_stream_encoder_mt_memusage() +static lzma_ret +get_options(const lzma_mt *options, lzma_options_easy *opt_easy, + const lzma_filter **filters, uint64_t *block_size, + uint64_t *outbuf_size_max) +{ + // Validate some of the options. + if (options == NULL) + return LZMA_PROG_ERROR; + + if (options->flags != 0 || options->threads == 0 + || options->threads > LZMA_THREADS_MAX) + return LZMA_OPTIONS_ERROR; + + if (options->filters != NULL) { + // Filter chain was given, use it as is. + *filters = options->filters; + } else { + // Use a preset. + if (lzma_easy_preset(opt_easy, options->preset)) + return LZMA_OPTIONS_ERROR; + + *filters = opt_easy->filters; + } + + // If the Block size is not set, determine it from the filter chain. + if (options->block_size > 0) + *block_size = options->block_size; + else + *block_size = lzma_mt_block_size(*filters); + + // UINT64_MAX > BLOCK_SIZE_MAX, so the second condition + // should be optimized out by any reasonable compiler. + // The second condition should be there in the unlikely event that + // the macros change and UINT64_MAX < BLOCK_SIZE_MAX. + if (*block_size > BLOCK_SIZE_MAX || *block_size == UINT64_MAX) + return LZMA_OPTIONS_ERROR; + + // Calculate the maximum amount output that a single output buffer + // may need to hold. This is the same as the maximum total size of + // a Block. + *outbuf_size_max = lzma_block_buffer_bound64(*block_size); + if (*outbuf_size_max == 0) + return LZMA_MEM_ERROR; + + return LZMA_OK; +} + + +static void +get_progress(void *coder_ptr, uint64_t *progress_in, uint64_t *progress_out) +{ + lzma_stream_coder *coder = coder_ptr; + + // Lock coder->mutex to prevent finishing threads from moving their + // progress info from the worker_thread structure to lzma_stream_coder. + mythread_sync(coder->mutex) { + *progress_in = coder->progress_in; + *progress_out = coder->progress_out; + + for (size_t i = 0; i < coder->threads_initialized; ++i) { + mythread_sync(coder->threads[i].mutex) { + *progress_in += coder->threads[i].progress_in; + *progress_out += coder->threads[i] + .progress_out; + } + } + } + + return; +} + + +static lzma_ret +stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_mt *options) +{ + lzma_next_coder_init(&stream_encoder_mt_init, next, allocator); + + // Get the filter chain. + lzma_options_easy easy; + const lzma_filter *filters; + uint64_t block_size; + uint64_t outbuf_size_max; + return_if_error(get_options(options, &easy, &filters, + &block_size, &outbuf_size_max)); + +#if SIZE_MAX < UINT64_MAX + if (block_size > SIZE_MAX || outbuf_size_max > SIZE_MAX) + return LZMA_MEM_ERROR; +#endif + + // Validate the filter chain so that we can give an error in this + // function instead of delaying it to the first call to lzma_code(). + // The memory usage calculation verifies the filter chain as + // a side effect so we take advantage of that. It's not a perfect + // check though as raw encoder allows LZMA1 too but such problems + // will be caught eventually with Block Header encoder. + if (lzma_raw_encoder_memusage(filters) == UINT64_MAX) + return LZMA_OPTIONS_ERROR; + + // Validate the Check ID. + if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX) + return LZMA_PROG_ERROR; + + if (!lzma_check_is_supported(options->check)) + return LZMA_UNSUPPORTED_CHECK; + + // Allocate and initialize the base structure if needed. + lzma_stream_coder *coder = next->coder; + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_stream_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + + // For the mutex and condition variable initializations + // the error handling has to be done here because + // stream_encoder_mt_end() doesn't know if they have + // already been initialized or not. + if (mythread_mutex_init(&coder->mutex)) { + lzma_free(coder, allocator); + next->coder = NULL; + return LZMA_MEM_ERROR; + } + + if (mythread_cond_init(&coder->cond)) { + mythread_mutex_destroy(&coder->mutex); + lzma_free(coder, allocator); + next->coder = NULL; + return LZMA_MEM_ERROR; + } + + next->code = &stream_encode_mt; + next->end = &stream_encoder_mt_end; + next->get_progress = &get_progress; + next->update = &stream_encoder_mt_update; + + coder->filters[0].id = LZMA_VLI_UNKNOWN; + coder->filters_cache[0].id = LZMA_VLI_UNKNOWN; + coder->index_encoder = LZMA_NEXT_CODER_INIT; + coder->index = NULL; + memzero(&coder->outq, sizeof(coder->outq)); + coder->threads = NULL; + coder->threads_max = 0; + coder->threads_initialized = 0; + } + + // Basic initializations + coder->sequence = SEQ_STREAM_HEADER; + coder->block_size = (size_t)(block_size); + coder->outbuf_alloc_size = (size_t)(outbuf_size_max); + coder->thread_error = LZMA_OK; + coder->thr = NULL; + + // Allocate the thread-specific base structures. + assert(options->threads > 0); + if (coder->threads_max != options->threads) { + threads_end(coder, allocator); + + coder->threads = NULL; + coder->threads_max = 0; + + coder->threads_initialized = 0; + coder->threads_free = NULL; + + coder->threads = lzma_alloc( + options->threads * sizeof(worker_thread), + allocator); + if (coder->threads == NULL) + return LZMA_MEM_ERROR; + + coder->threads_max = options->threads; + } else { + // Reuse the old structures and threads. Tell the running + // threads to stop and wait until they have stopped. + threads_stop(coder, true); + } + + // Output queue + return_if_error(lzma_outq_init(&coder->outq, allocator, + options->threads)); + + // Timeout + coder->timeout = options->timeout; + + // Free the old filter chain and the cache. + lzma_filters_free(coder->filters, allocator); + lzma_filters_free(coder->filters_cache, allocator); + + // Copy the new filter chain. + return_if_error(lzma_filters_copy( + filters, coder->filters, allocator)); + + // Index + lzma_index_end(coder->index, allocator); + coder->index = lzma_index_init(allocator); + if (coder->index == NULL) + return LZMA_MEM_ERROR; + + // Stream Header + coder->stream_flags.version = 0; + coder->stream_flags.check = options->check; + return_if_error(lzma_stream_header_encode( + &coder->stream_flags, coder->header)); + + coder->header_pos = 0; + + // Progress info + coder->progress_in = 0; + coder->progress_out = LZMA_STREAM_HEADER_SIZE; + + return LZMA_OK; +} + + +#ifdef HAVE_SYMBOL_VERSIONS_LINUX +// These are for compatibility with binaries linked against liblzma that +// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7. +// Actually that patch didn't create lzma_stream_encoder_mt@XZ_5.2.2 +// but it has been added here anyway since someone might misread the +// RHEL patch and think both @XZ_5.1.2alpha and @XZ_5.2.2 exist. +LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.1.2alpha", + lzma_ret, lzma_stream_encoder_mt_512a)( + lzma_stream *strm, const lzma_mt *options) + lzma_nothrow lzma_attr_warn_unused_result + __attribute__((__alias__("lzma_stream_encoder_mt_52"))); + +LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.2.2", + lzma_ret, lzma_stream_encoder_mt_522)( + lzma_stream *strm, const lzma_mt *options) + lzma_nothrow lzma_attr_warn_unused_result + __attribute__((__alias__("lzma_stream_encoder_mt_52"))); + +LZMA_SYMVER_API("lzma_stream_encoder_mt@@XZ_5.2", + lzma_ret, lzma_stream_encoder_mt_52)( + lzma_stream *strm, const lzma_mt *options) + lzma_nothrow lzma_attr_warn_unused_result; + +#define lzma_stream_encoder_mt lzma_stream_encoder_mt_52 +#endif +extern LZMA_API(lzma_ret) +lzma_stream_encoder_mt(lzma_stream *strm, const lzma_mt *options) +{ + lzma_next_strm_init(stream_encoder_mt_init, strm, options); + + strm->internal->supported_actions[LZMA_RUN] = true; +// strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_BARRIER] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +#ifdef HAVE_SYMBOL_VERSIONS_LINUX +LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.1.2alpha", + uint64_t, lzma_stream_encoder_mt_memusage_512a)( + const lzma_mt *options) lzma_nothrow lzma_attr_pure + __attribute__((__alias__("lzma_stream_encoder_mt_memusage_52"))); + +LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.2.2", + uint64_t, lzma_stream_encoder_mt_memusage_522)( + const lzma_mt *options) lzma_nothrow lzma_attr_pure + __attribute__((__alias__("lzma_stream_encoder_mt_memusage_52"))); + +LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@@XZ_5.2", + uint64_t, lzma_stream_encoder_mt_memusage_52)( + const lzma_mt *options) lzma_nothrow lzma_attr_pure; + +#define lzma_stream_encoder_mt_memusage lzma_stream_encoder_mt_memusage_52 +#endif +// This function name is a monster but it's consistent with the older +// monster names. :-( 31 chars is the max that C99 requires so in that +// sense it's not too long. ;-) +extern LZMA_API(uint64_t) +lzma_stream_encoder_mt_memusage(const lzma_mt *options) +{ + lzma_options_easy easy; + const lzma_filter *filters; + uint64_t block_size; + uint64_t outbuf_size_max; + + if (get_options(options, &easy, &filters, &block_size, + &outbuf_size_max) != LZMA_OK) + return UINT64_MAX; + + // Memory usage of the input buffers + const uint64_t inbuf_memusage = options->threads * block_size; + + // Memory usage of the filter encoders + uint64_t filters_memusage = lzma_raw_encoder_memusage(filters); + if (filters_memusage == UINT64_MAX) + return UINT64_MAX; + + filters_memusage *= options->threads; + + // Memory usage of the output queue + const uint64_t outq_memusage = lzma_outq_memusage( + outbuf_size_max, options->threads); + if (outq_memusage == UINT64_MAX) + return UINT64_MAX; + + // Sum them with overflow checking. + uint64_t total_memusage = LZMA_MEMUSAGE_BASE + + sizeof(lzma_stream_coder) + + options->threads * sizeof(worker_thread); + + if (UINT64_MAX - total_memusage < inbuf_memusage) + return UINT64_MAX; + + total_memusage += inbuf_memusage; + + if (UINT64_MAX - total_memusage < filters_memusage) + return UINT64_MAX; + + total_memusage += filters_memusage; + + if (UINT64_MAX - total_memusage < outq_memusage) + return UINT64_MAX; + + return total_memusage + outq_memusage; +} diff --git a/src/native/external/xz/src/liblzma/common/stream_flags_common.c b/src/native/external/xz/src/liblzma/common/stream_flags_common.c new file mode 100644 index 00000000000000..41b8dcb70d7457 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/stream_flags_common.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_common.c +/// \brief Common stuff for Stream flags coders +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_flags_common.h" + + +const uint8_t lzma_header_magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 }; +const uint8_t lzma_footer_magic[2] = { 0x59, 0x5A }; + + +extern LZMA_API(lzma_ret) +lzma_stream_flags_compare( + const lzma_stream_flags *a, const lzma_stream_flags *b) +{ + // We can compare only version 0 structures. + if (a->version != 0 || b->version != 0) + return LZMA_OPTIONS_ERROR; + + // Check type + if ((unsigned int)(a->check) > LZMA_CHECK_ID_MAX + || (unsigned int)(b->check) > LZMA_CHECK_ID_MAX) + return LZMA_PROG_ERROR; + + if (a->check != b->check) + return LZMA_DATA_ERROR; + + // Backward Sizes are compared only if they are known in both. + if (a->backward_size != LZMA_VLI_UNKNOWN + && b->backward_size != LZMA_VLI_UNKNOWN) { + if (!is_backward_size_valid(a) || !is_backward_size_valid(b)) + return LZMA_PROG_ERROR; + + if (a->backward_size != b->backward_size) + return LZMA_DATA_ERROR; + } + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/stream_flags_common.h b/src/native/external/xz/src/liblzma/common/stream_flags_common.h new file mode 100644 index 00000000000000..28729dbcb6f286 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/stream_flags_common.h @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_common.h +/// \brief Common stuff for Stream flags coders +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_FLAGS_COMMON_H +#define LZMA_STREAM_FLAGS_COMMON_H + +#include "common.h" + +/// Size of the Stream Flags field +#define LZMA_STREAM_FLAGS_SIZE 2 + +lzma_attr_visibility_hidden +extern const uint8_t lzma_header_magic[6]; + +lzma_attr_visibility_hidden +extern const uint8_t lzma_footer_magic[2]; + + +static inline bool +is_backward_size_valid(const lzma_stream_flags *options) +{ + return options->backward_size >= LZMA_BACKWARD_SIZE_MIN + && options->backward_size <= LZMA_BACKWARD_SIZE_MAX + && (options->backward_size & 3) == 0; +} + +#endif diff --git a/src/native/external/xz/src/liblzma/common/stream_flags_decoder.c b/src/native/external/xz/src/liblzma/common/stream_flags_decoder.c new file mode 100644 index 00000000000000..522c98b6fd5cc8 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/stream_flags_decoder.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_decoder.c +/// \brief Decodes Stream Header and Stream Footer from .xz files +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_flags_common.h" + + +static bool +stream_flags_decode(lzma_stream_flags *options, const uint8_t *in) +{ + // Reserved bits must be unset. + if (in[0] != 0x00 || (in[1] & 0xF0)) + return true; + + options->version = 0; + options->check = in[1] & 0x0F; + + return false; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_header_decode(lzma_stream_flags *options, const uint8_t *in) +{ + // Magic + if (memcmp(in, lzma_header_magic, sizeof(lzma_header_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // Verify the CRC32 so we can distinguish between corrupt + // and unsupported files. + const uint32_t crc = lzma_crc32(in + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != read32le(in + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE)) { +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + return LZMA_DATA_ERROR; +#endif + } + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(lzma_header_magic))) + return LZMA_OPTIONS_ERROR; + + // Set Backward Size to indicate unknown value. That way + // lzma_stream_flags_compare() can be used to compare Stream Header + // and Stream Footer while keeping it useful also for comparing + // two Stream Footers. + options->backward_size = LZMA_VLI_UNKNOWN; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_footer_decode(lzma_stream_flags *options, const uint8_t *in) +{ + // Magic + if (memcmp(in + sizeof(uint32_t) * 2 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // CRC32 + const uint32_t crc = lzma_crc32(in + sizeof(uint32_t), + sizeof(uint32_t) + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != read32le(in)) { +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + return LZMA_DATA_ERROR; +#endif + } + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(uint32_t) * 2)) + return LZMA_OPTIONS_ERROR; + + // Backward Size + options->backward_size = read32le(in + sizeof(uint32_t)); + options->backward_size = (options->backward_size + 1) * 4; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/stream_flags_encoder.c b/src/native/external/xz/src/liblzma/common/stream_flags_encoder.c new file mode 100644 index 00000000000000..f94b5cd0a2373e --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/stream_flags_encoder.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_encoder.c +/// \brief Encodes Stream Header and Stream Footer for .xz files +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_flags_common.h" + + +static bool +stream_flags_encode(const lzma_stream_flags *options, uint8_t *out) +{ + if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX) + return true; + + out[0] = 0x00; + out[1] = options->check; + + return false; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_header_encode(const lzma_stream_flags *options, uint8_t *out) +{ + assert(sizeof(lzma_header_magic) + LZMA_STREAM_FLAGS_SIZE + + 4 == LZMA_STREAM_HEADER_SIZE); + + if (options->version != 0) + return LZMA_OPTIONS_ERROR; + + // Magic + memcpy(out, lzma_header_magic, sizeof(lzma_header_magic)); + + // Stream Flags + if (stream_flags_encode(options, out + sizeof(lzma_header_magic))) + return LZMA_PROG_ERROR; + + // CRC32 of the Stream Header + const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); + + write32le(out + sizeof(lzma_header_magic) + LZMA_STREAM_FLAGS_SIZE, + crc); + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_footer_encode(const lzma_stream_flags *options, uint8_t *out) +{ + assert(2 * 4 + LZMA_STREAM_FLAGS_SIZE + sizeof(lzma_footer_magic) + == LZMA_STREAM_HEADER_SIZE); + + if (options->version != 0) + return LZMA_OPTIONS_ERROR; + + // Backward Size + if (!is_backward_size_valid(options)) + return LZMA_PROG_ERROR; + + write32le(out + 4, options->backward_size / 4 - 1); + + // Stream Flags + if (stream_flags_encode(options, out + 2 * 4)) + return LZMA_PROG_ERROR; + + // CRC32 + const uint32_t crc = lzma_crc32( + out + 4, 4 + LZMA_STREAM_FLAGS_SIZE, 0); + + write32le(out, crc); + + // Magic + memcpy(out + 2 * 4 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)); + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/string_conversion.c b/src/native/external/xz/src/liblzma/common/string_conversion.c new file mode 100644 index 00000000000000..015acf225856fd --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/string_conversion.c @@ -0,0 +1,1363 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file string_conversion.c +/// \brief Conversion of strings to filter chain and vice versa +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_common.h" + + +// liblzma itself doesn't use gettext to translate messages. +// Mark the strings still so that xz can translate them. +#define N_(msgid) msgid + + +///////////////////// +// String building // +///////////////////// + +/// How much memory to allocate for strings. For now, no realloc is used +/// so this needs to be big enough even though there of course is +/// an overflow check still. +/// +/// FIXME? Using a fixed size is wasteful if the application doesn't free +/// the string fairly quickly but this can be improved later if needed. +#define STR_ALLOC_SIZE 800 + + +typedef struct { + char *buf; + size_t pos; +} lzma_str; + + +static lzma_ret +str_init(lzma_str *str, const lzma_allocator *allocator) +{ + str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator); + if (str->buf == NULL) + return LZMA_MEM_ERROR; + + str->pos = 0; + return LZMA_OK; +} + + +static void +str_free(lzma_str *str, const lzma_allocator *allocator) +{ + lzma_free(str->buf, allocator); + return; +} + + +static bool +str_is_full(const lzma_str *str) +{ + return str->pos == STR_ALLOC_SIZE - 1; +} + + +static lzma_ret +str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator) +{ + if (str_is_full(str)) { + // The preallocated buffer was too small. + // This shouldn't happen as STR_ALLOC_SIZE should + // be adjusted if new filters are added. + lzma_free(str->buf, allocator); + *dest = NULL; + assert(0); + return LZMA_PROG_ERROR; + } + + str->buf[str->pos] = '\0'; + *dest = str->buf; + return LZMA_OK; +} + + +static void +str_append_str(lzma_str *str, const char *s) +{ + const size_t len = strlen(s); + const size_t limit = STR_ALLOC_SIZE - 1 - str->pos; + const size_t copy_size = my_min(len, limit); + + memcpy(str->buf + str->pos, s, copy_size); + str->pos += copy_size; + return; +} + + +static void +str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix) +{ + if (v == 0) { + str_append_str(str, "0"); + } else { + // NOTE: Don't use plain "B" because xz and the parser in this + // file don't support it and at glance it may look like 8 + // (there cannot be a space before the suffix). + static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" }; + + size_t suf = 0; + if (use_byte_suffix) { + while ((v & 1023) == 0 + && suf < ARRAY_SIZE(suffixes) - 1) { + v >>= 10; + ++suf; + } + } + + // UINT32_MAX in base 10 would need 10 + 1 bytes. Remember + // that initializing to "" initializes all elements to + // zero so '\0'-termination gets handled by this. + char buf[16] = ""; + size_t pos = sizeof(buf) - 1; + + do { + buf[--pos] = '0' + (v % 10); + v /= 10; + } while (v != 0); + + str_append_str(str, buf + pos); + str_append_str(str, suffixes[suf]); + } + + return; +} + + +////////////////////////////////////////////// +// Parsing and stringification declarations // +////////////////////////////////////////////// + +/// Maximum length for filter and option names. +/// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes +#define NAME_LEN_MAX 11 + + +/// For option_map.flags: Use .u.map to do convert the input value +/// to an integer. Without this flag, .u.range.{min,max} are used +/// as the allowed range for the integer. +#define OPTMAP_USE_NAME_VALUE_MAP 0x01 + +/// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in +/// the stringified output if the value is an exact multiple of these. +/// This is used e.g. for LZMA1/2 dictionary size. +#define OPTMAP_USE_BYTE_SUFFIX 0x02 + +/// For option_map.flags: If the integer value is zero then this option +/// won't be included in the stringified output. It's used e.g. for +/// BCJ filter start offset which usually is zero. +#define OPTMAP_NO_STRFY_ZERO 0x04 + +/// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0, +/// it doesn't need to be specified in the initializers as it is +/// the implicit value. +enum { + OPTMAP_TYPE_UINT32, + OPTMAP_TYPE_LZMA_MODE, + OPTMAP_TYPE_LZMA_MATCH_FINDER, + OPTMAP_TYPE_LZMA_PRESET, +}; + + +/// This is for mapping string values in options to integers. +/// The last element of an array must have "" as the name. +/// It's used e.g. for match finder names in LZMA1/2. +typedef struct { + const char name[NAME_LEN_MAX + 1]; + const uint32_t value; +} name_value_map; + + +/// Each filter that has options needs an array of option_map structures. +/// The array doesn't need to be terminated as the functions take the +/// length of the array as an argument. +/// +/// When converting a string to filter options structure, option values +/// will be handled in a few different ways: +/// +/// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string +/// is handled specially. +/// +/// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is +/// converted to an integer using the name_value_map pointed by .u.map. +/// The last element in .u.map must have .name = "" as the terminator. +/// +/// (3) Otherwise the string is treated as a non-negative unsigned decimal +/// integer which must be in the range set in .u.range. If .flags has +/// OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed. +/// +/// The integer value from (2) or (3) is then stored to filter_options +/// at the offset specified in .offset using the type specified in .type +/// (default is uint32_t). +/// +/// Stringifying a filter is done by processing a given number of options +/// in order from the beginning of an option_map array. The integer is +/// read from filter_options at .offset using the type from .type. +/// +/// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the +/// option is skipped. +/// +/// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used +/// to convert the option to a string. If the map doesn't contain a string +/// for the integer value then "UNKNOWN" is used. +/// +/// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is +/// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB, +/// MiB, or GiB suffix is used if the value is an exact multiple of these. +/// Plain "B" suffix is never used. +typedef struct { + char name[NAME_LEN_MAX + 1]; + uint8_t type; + uint8_t flags; + uint16_t offset; + + union { + // NVHPC has problems with unions that contain pointers that + // are not the first members, so keep "map" at the top. + const name_value_map *map; + + struct { + uint32_t min; + uint32_t max; + } range; + } u; +} option_map; + + +static const char *parse_options(const char **const str, const char *str_end, + void *filter_options, + const option_map *const optmap, const size_t optmap_size); + + +///////// +// BCJ // +///////// + +#if defined(HAVE_ENCODER_X86) \ + || defined(HAVE_DECODER_X86) \ + || defined(HAVE_ENCODER_ARM) \ + || defined(HAVE_DECODER_ARM) \ + || defined(HAVE_ENCODER_ARMTHUMB) \ + || defined(HAVE_DECODER_ARMTHUMB) \ + || defined(HAVE_ENCODER_ARM64) \ + || defined(HAVE_DECODER_ARM64) \ + || defined(HAVE_ENCODER_POWERPC) \ + || defined(HAVE_DECODER_POWERPC) \ + || defined(HAVE_ENCODER_IA64) \ + || defined(HAVE_DECODER_IA64) \ + || defined(HAVE_ENCODER_SPARC) \ + || defined(HAVE_DECODER_SPARC) \ + || defined(HAVE_ENCODER_RISCV) \ + || defined(HAVE_DECODER_RISCV) +static const option_map bcj_optmap[] = { + { + .name = "start", + .flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX, + .offset = offsetof(lzma_options_bcj, start_offset), + .u.range.min = 0, + .u.range.max = UINT32_MAX, + } +}; + + +static const char * +parse_bcj(const char **const str, const char *str_end, void *filter_options) +{ + // filter_options was zeroed on allocation and that is enough + // for the default value. + return parse_options(str, str_end, filter_options, + bcj_optmap, ARRAY_SIZE(bcj_optmap)); +} +#endif + + +/////////// +// Delta // +/////////// + +#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) +static const option_map delta_optmap[] = { + { + .name = "dist", + .offset = offsetof(lzma_options_delta, dist), + .u.range.min = LZMA_DELTA_DIST_MIN, + .u.range.max = LZMA_DELTA_DIST_MAX, + } +}; + + +static const char * +parse_delta(const char **const str, const char *str_end, void *filter_options) +{ + lzma_options_delta *opts = filter_options; + opts->type = LZMA_DELTA_TYPE_BYTE; + opts->dist = LZMA_DELTA_DIST_MIN; + + return parse_options(str, str_end, filter_options, + delta_optmap, ARRAY_SIZE(delta_optmap)); +} +#endif + + +/////////////////// +// LZMA1 & LZMA2 // +/////////////////// + +/// Help string for presets +#define LZMA12_PRESET_STR "0-9[e]" + + +static const char * +parse_lzma12_preset(const char **const str, const char *str_end, + uint32_t *preset) +{ + assert(*str < str_end); + + if (!(**str >= '0' && **str <= '9')) + return N_("Unsupported preset"); + + *preset = (uint32_t)(**str - '0'); + + // NOTE: Remember to update LZMA12_PRESET_STR if this is modified! + while (++*str < str_end) { + switch (**str) { + case 'e': + *preset |= LZMA_PRESET_EXTREME; + break; + + default: + return N_("Unsupported flag in the preset"); + } + } + + return NULL; +} + + +static const char * +set_lzma12_preset(const char **const str, const char *str_end, + void *filter_options) +{ + uint32_t preset; + const char *errmsg = parse_lzma12_preset(str, str_end, &preset); + if (errmsg != NULL) + return errmsg; + + lzma_options_lzma *opts = filter_options; + if (lzma_lzma_preset(opts, preset)) + return N_("Unsupported preset"); + + return NULL; +} + + +static const name_value_map lzma12_mode_map[] = { + { "fast", LZMA_MODE_FAST }, + { "normal", LZMA_MODE_NORMAL }, + { "", 0 } +}; + + +static const name_value_map lzma12_mf_map[] = { + { "hc3", LZMA_MF_HC3 }, + { "hc4", LZMA_MF_HC4 }, + { "bt2", LZMA_MF_BT2 }, + { "bt3", LZMA_MF_BT3 }, + { "bt4", LZMA_MF_BT4 }, + { "", 0 } +}; + + +static const option_map lzma12_optmap[] = { + { + .name = "preset", + .type = OPTMAP_TYPE_LZMA_PRESET, + }, { + .name = "dict", + .flags = OPTMAP_USE_BYTE_SUFFIX, + .offset = offsetof(lzma_options_lzma, dict_size), + .u.range.min = LZMA_DICT_SIZE_MIN, + // FIXME? The max is really max for encoding but decoding + // would allow 4 GiB - 1 B. + .u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29), + }, { + .name = "lc", + .offset = offsetof(lzma_options_lzma, lc), + .u.range.min = LZMA_LCLP_MIN, + .u.range.max = LZMA_LCLP_MAX, + }, { + .name = "lp", + .offset = offsetof(lzma_options_lzma, lp), + .u.range.min = LZMA_LCLP_MIN, + .u.range.max = LZMA_LCLP_MAX, + }, { + .name = "pb", + .offset = offsetof(lzma_options_lzma, pb), + .u.range.min = LZMA_PB_MIN, + .u.range.max = LZMA_PB_MAX, + }, { + .name = "mode", + .type = OPTMAP_TYPE_LZMA_MODE, + .flags = OPTMAP_USE_NAME_VALUE_MAP, + .offset = offsetof(lzma_options_lzma, mode), + .u.map = lzma12_mode_map, + }, { + .name = "nice", + .offset = offsetof(lzma_options_lzma, nice_len), + .u.range.min = 2, + .u.range.max = 273, + }, { + .name = "mf", + .type = OPTMAP_TYPE_LZMA_MATCH_FINDER, + .flags = OPTMAP_USE_NAME_VALUE_MAP, + .offset = offsetof(lzma_options_lzma, mf), + .u.map = lzma12_mf_map, + }, { + .name = "depth", + .offset = offsetof(lzma_options_lzma, depth), + .u.range.min = 0, + .u.range.max = UINT32_MAX, + } +}; + + +static const char * +parse_lzma12(const char **const str, const char *str_end, void *filter_options) +{ + lzma_options_lzma *opts = filter_options; + + // It cannot fail. + const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT); + assert(!preset_ret); + (void)preset_ret; + + const char *errmsg = parse_options(str, str_end, filter_options, + lzma12_optmap, ARRAY_SIZE(lzma12_optmap)); + if (errmsg != NULL) + return errmsg; + + if (opts->lc + opts->lp > LZMA_LCLP_MAX) + return N_("The sum of lc and lp must not exceed 4"); + + return NULL; +} + + +///////////////////////////////////////// +// Generic parsing and stringification // +///////////////////////////////////////// + +static const struct { + /// Name of the filter + char name[NAME_LEN_MAX + 1]; + + /// For lzma_str_to_filters: + /// Size of the filter-specific options structure. + uint32_t opts_size; + + /// Filter ID + lzma_vli id; + + /// For lzma_str_to_filters: + /// Function to parse the filter-specific options. The filter_options + /// will already have been allocated using lzma_alloc_zero(). + const char *(*parse)(const char **str, const char *str_end, + void *filter_options); + + /// For lzma_str_from_filters: + /// If the flag LZMA_STR_ENCODER is used then the first + /// strfy_encoder elements of optmap are stringified. + /// With LZMA_STR_DECODER strfy_decoder is used. + /// Currently encoders use all options that decoders do but if + /// that changes then this needs to be changed too, for example, + /// add a new OPTMAP flag to skip printing some decoder-only options. + const option_map *optmap; + uint8_t strfy_encoder; + uint8_t strfy_decoder; + + /// For lzma_str_from_filters: + /// If true, lzma_filter.options is allowed to be NULL. In that case, + /// only the filter name is printed without any options. + bool allow_null; + +} filter_name_map[] = { +#if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) + { "lzma1", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA1, + &parse_lzma12, lzma12_optmap, 9, 5, false }, +#endif + +#if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) + { "lzma2", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA2, + &parse_lzma12, lzma12_optmap, 9, 2, false }, +#endif + +#if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86) + { "x86", sizeof(lzma_options_bcj), LZMA_FILTER_X86, + &parse_bcj, bcj_optmap, 1, 1, true }, +#endif + +#if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM) + { "arm", sizeof(lzma_options_bcj), LZMA_FILTER_ARM, + &parse_bcj, bcj_optmap, 1, 1, true }, +#endif + +#if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB) + { "armthumb", sizeof(lzma_options_bcj), LZMA_FILTER_ARMTHUMB, + &parse_bcj, bcj_optmap, 1, 1, true }, +#endif + +#if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64) + { "arm64", sizeof(lzma_options_bcj), LZMA_FILTER_ARM64, + &parse_bcj, bcj_optmap, 1, 1, true }, +#endif + +#if defined(HAVE_ENCODER_RISCV) || defined(HAVE_DECODER_RISCV) + { "riscv", sizeof(lzma_options_bcj), LZMA_FILTER_RISCV, + &parse_bcj, bcj_optmap, 1, 1, true }, +#endif + +#if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC) + { "powerpc", sizeof(lzma_options_bcj), LZMA_FILTER_POWERPC, + &parse_bcj, bcj_optmap, 1, 1, true }, +#endif + +#if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64) + { "ia64", sizeof(lzma_options_bcj), LZMA_FILTER_IA64, + &parse_bcj, bcj_optmap, 1, 1, true }, +#endif + +#if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC) + { "sparc", sizeof(lzma_options_bcj), LZMA_FILTER_SPARC, + &parse_bcj, bcj_optmap, 1, 1, true }, +#endif + +#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) + { "delta", sizeof(lzma_options_delta), LZMA_FILTER_DELTA, + &parse_delta, delta_optmap, 1, 1, false }, +#endif +}; + + +/// Decodes options from a string for one filter (name1=value1,name2=value2). +/// Caller must have allocated memory for filter_options already and set +/// the initial default values. This is called from the filter-specific +/// parse_* functions. +/// +/// The input string starts at *str and the address in str_end is the first +/// char that is not part of the string anymore. So no '\0' terminator is +/// used. *str is advanced every time something has been decoded successfully. +static const char * +parse_options(const char **const str, const char *str_end, + void *filter_options, + const option_map *const optmap, const size_t optmap_size) +{ + while (*str < str_end && **str != '\0') { + // Each option is of the form name=value. + // Commas (',') separate options. Extra commas are ignored. + // Ignoring extra commas makes it simpler if an optional + // option stored in a shell variable which can be empty. + if (**str == ',') { + ++*str; + continue; + } + + // Find where the next name=value ends. + const size_t str_len = (size_t)(str_end - *str); + const char *name_eq_value_end = memchr(*str, ',', str_len); + if (name_eq_value_end == NULL) + name_eq_value_end = str_end; + + const char *equals_sign = memchr(*str, '=', + (size_t)(name_eq_value_end - *str)); + + // Fail if the '=' wasn't found or the option name is missing + // (the first char is '='). + if (equals_sign == NULL || **str == '=') + return N_("Options must be 'name=value' pairs " + "separated with commas"); + + // Reject a too long option name so that the memcmp() + // in the loop below won't read past the end of the + // string in optmap[i].name. + const size_t name_len = (size_t)(equals_sign - *str); + if (name_len > NAME_LEN_MAX) + return N_("Unknown option name"); + + // Find the option name from optmap[]. + size_t i = 0; + while (true) { + if (i == optmap_size) + return N_("Unknown option name"); + + if (memcmp(*str, optmap[i].name, name_len) == 0 + && optmap[i].name[name_len] == '\0') + break; + + ++i; + } + + // The input string is good at least until the start of + // the option value. + *str = equals_sign + 1; + + // The code assumes that the option value isn't an empty + // string so check it here. + const size_t value_len = (size_t)(name_eq_value_end - *str); + if (value_len == 0) + return N_("Option value cannot be empty"); + + // LZMA1/2 preset has its own parsing function. + if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) { + const char *errmsg = set_lzma12_preset(str, + name_eq_value_end, filter_options); + if (errmsg != NULL) + return errmsg; + + continue; + } + + // It's an integer value. + uint32_t v; + if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { + // The integer is picked from a string-to-integer map. + // + // Reject a too long value string so that the memcmp() + // in the loop below won't read past the end of the + // string in optmap[i].u.map[j].name. + if (value_len > NAME_LEN_MAX) + return N_("Invalid option value"); + + const name_value_map *map = optmap[i].u.map; + size_t j = 0; + while (true) { + // The array is terminated with an empty name. + if (map[j].name[0] == '\0') + return N_("Invalid option value"); + + if (memcmp(*str, map[j].name, value_len) == 0 + && map[j].name[value_len] + == '\0') { + v = map[j].value; + break; + } + + ++j; + } + } else if (**str < '0' || **str > '9') { + // Note that "max" isn't supported while it is + // supported in xz. It's not useful here. + return N_("Value is not a non-negative " + "decimal integer"); + } else { + // strtoul() has locale-specific behavior so it cannot + // be relied on to get reproducible results since we + // cannot change the locate in a thread-safe library. + // It also needs '\0'-termination. + // + // Use a temporary pointer so that *str will point + // to the beginning of the value string in case + // an error occurs. + const char *p = *str; + v = 0; + do { + if (v > UINT32_MAX / 10) + return N_("Value out of range"); + + v *= 10; + + const uint32_t add = (uint32_t)(*p - '0'); + if (UINT32_MAX - add < v) + return N_("Value out of range"); + + v += add; + ++p; + } while (p < name_eq_value_end + && *p >= '0' && *p <= '9'); + + if (p < name_eq_value_end) { + // Remember this position so that it can be + // used for error messages that are + // specifically about the suffix. (Out of + // range values are about the whole value + // and those error messages point to the + // beginning of the number part, + // not to the suffix.) + const char *multiplier_start = p; + + // If multiplier suffix shouldn't be used + // then don't allow them even if the value + // would stay within limits. This is a somewhat + // unnecessary check but it rejects silly + // things like lzma2:pb=0MiB which xz allows. + if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX) + == 0) { + *str = multiplier_start; + return N_("This option does not " + "support any multiplier " + "suffixes"); + } + + uint32_t shift; + + switch (*p) { + case 'k': + case 'K': + shift = 10; + break; + + case 'm': + case 'M': + shift = 20; + break; + + case 'g': + case 'G': + shift = 30; + break; + + default: + *str = multiplier_start; + + // TRANSLATORS: Don't translate the + // suffixes "KiB", "MiB", or "GiB" + // because a user can only specify + // untranslated suffixes. + return N_("Invalid multiplier suffix " + "(KiB, MiB, or GiB)"); + } + + ++p; + + // Allow "M", "Mi", "MB", "MiB" and the same + // for the other five characters from the + // switch-statement above. All are handled + // as base-2 (perhaps a mistake, perhaps not). + // Note that 'i' and 'B' are case sensitive. + if (p < name_eq_value_end && *p == 'i') + ++p; + + if (p < name_eq_value_end && *p == 'B') + ++p; + + // Now we must have no chars remaining. + if (p < name_eq_value_end) { + *str = multiplier_start; + return N_("Invalid multiplier suffix " + "(KiB, MiB, or GiB)"); + } + + if (v > (UINT32_MAX >> shift)) + return N_("Value out of range"); + + v <<= shift; + } + + if (v < optmap[i].u.range.min + || v > optmap[i].u.range.max) + return N_("Value out of range"); + } + + // Set the value in filter_options. Enums are handled + // specially since the underlying type isn't the same + // as uint32_t on all systems. + void *ptr = (char *)filter_options + optmap[i].offset; + switch (optmap[i].type) { + case OPTMAP_TYPE_LZMA_MODE: + *(lzma_mode *)ptr = (lzma_mode)v; + break; + + case OPTMAP_TYPE_LZMA_MATCH_FINDER: + *(lzma_match_finder *)ptr = (lzma_match_finder)v; + break; + + default: + *(uint32_t *)ptr = v; + break; + } + + // This option has been successfully handled. + *str = name_eq_value_end; + } + + // No errors. + return NULL; +} + + +/// Finds the name of the filter at the beginning of the string and +/// calls filter_name_map[i].parse() to decode the filter-specific options. +/// The caller must have set str_end so that exactly one filter and its +/// options are present without any trailing characters. +static const char * +parse_filter(const char **const str, const char *str_end, lzma_filter *filter, + const lzma_allocator *allocator, bool only_xz) +{ + // Search for a colon or equals sign that would separate the filter + // name from filter options. If neither is found, then the input + // string only contains a filter name and there are no options. + // + // First assume that a colon or equals sign won't be found: + const char *name_end = str_end; + const char *opts_start = str_end; + + for (const char *p = *str; p < str_end; ++p) { + if (*p == ':' || *p == '=') { + name_end = p; + + // Filter options (name1=value1,name2=value2,...) + // begin after the colon or equals sign. + opts_start = p + 1; + break; + } + } + + // Reject a too long filter name so that the memcmp() + // in the loop below won't read past the end of the + // string in filter_name_map[i].name. + const size_t name_len = (size_t)(name_end - *str); + if (name_len > NAME_LEN_MAX) + return N_("Unknown filter name"); + + for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { + if (memcmp(*str, filter_name_map[i].name, name_len) == 0 + && filter_name_map[i].name[name_len] == '\0') { + if (only_xz && filter_name_map[i].id + >= LZMA_FILTER_RESERVED_START) + return N_("This filter cannot be used in " + "the .xz format"); + + // Allocate the filter-specific options and + // initialize the memory with zeros. + void *options = lzma_alloc_zero( + filter_name_map[i].opts_size, + allocator); + if (options == NULL) + return N_("Memory allocation failed"); + + // Filter name was found so the input string is good + // at least this far. + *str = opts_start; + + const char *errmsg = filter_name_map[i].parse( + str, str_end, options); + if (errmsg != NULL) { + lzma_free(options, allocator); + return errmsg; + } + + // *filter is modified only when parsing is successful. + filter->id = filter_name_map[i].id; + filter->options = options; + return NULL; + } + } + + return N_("Unknown filter name"); +} + + +/// Converts the string to a filter chain (array of lzma_filter structures). +/// +/// *str is advanced every time something has been decoded successfully. +/// This way the caller knows where in the string a possible error occurred. +static const char * +str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags, + const lzma_allocator *allocator) +{ + const char *errmsg; + + // Skip leading spaces. + while (**str == ' ') + ++*str; + + if (**str == '\0') + return N_("Empty string is not allowed, " + "try '6' if a default value is needed"); + + // Detect the type of the string. + // + // A string beginning with a digit or a string beginning with + // one dash and a digit are treated as presets. Trailing spaces + // will be ignored too (leading spaces were already ignored above). + // + // For example, "6", "7 ", "-9e", or " -3 " are treated as presets. + // Strings like "-" or "- " aren't preset. +#define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9') + if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) { + if (**str == '-') + ++*str; + + // Ignore trailing spaces. + const size_t str_len = strlen(*str); + const char *str_end = memchr(*str, ' ', str_len); + if (str_end != NULL) { + // There is at least one trailing space. Check that + // there are no chars other than spaces. + for (size_t i = 1; str_end[i] != '\0'; ++i) + if (str_end[i] != ' ') + return N_("Unsupported preset"); + } else { + // There are no trailing spaces. Use the whole string. + str_end = *str + str_len; + } + + uint32_t preset; + errmsg = parse_lzma12_preset(str, str_end, &preset); + if (errmsg != NULL) + return errmsg; + + lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator); + if (opts == NULL) + return N_("Memory allocation failed"); + + if (lzma_lzma_preset(opts, preset)) { + lzma_free(opts, allocator); + return N_("Unsupported preset"); + } + + filters[0].id = LZMA_FILTER_LZMA2; + filters[0].options = opts; + filters[1].id = LZMA_VLI_UNKNOWN; + filters[1].options = NULL; + + return NULL; + } + + // Not a preset so it must be a filter chain. + // + // If LZMA_STR_ALL_FILTERS isn't used we allow only filters that + // can be used in .xz. + const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0; + + // Use a temporary array so that we don't modify the caller-supplied + // one until we know that no errors occurred. + lzma_filter temp_filters[LZMA_FILTERS_MAX + 1]; + + size_t i = 0; + do { + if (i == LZMA_FILTERS_MAX) { + errmsg = N_("The maximum number of filters is four"); + goto error; + } + + // Skip "--" if present. + if ((*str)[0] == '-' && (*str)[1] == '-') + *str += 2; + + // Locate the end of "filter:name1=value1,name2=value2", + // stopping at the first "--" or a single space. + const char *filter_end = *str; + while (filter_end[0] != '\0') { + if ((filter_end[0] == '-' && filter_end[1] == '-') + || filter_end[0] == ' ') + break; + + ++filter_end; + } + + // Inputs that have "--" at the end or "-- " in the middle + // will result in an empty filter name. + if (filter_end == *str) { + errmsg = N_("Filter name is missing"); + goto error; + } + + errmsg = parse_filter(str, filter_end, &temp_filters[i], + allocator, only_xz); + if (errmsg != NULL) + goto error; + + // Skip trailing spaces. + while (**str == ' ') + ++*str; + + ++i; + } while (**str != '\0'); + + // Seems to be good, terminate the array so that + // basic validation can be done. + temp_filters[i].id = LZMA_VLI_UNKNOWN; + temp_filters[i].options = NULL; + + // Do basic validation if the application didn't prohibit it. + if ((flags & LZMA_STR_NO_VALIDATION) == 0) { + size_t dummy; + const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy); + assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR); + if (ret != LZMA_OK) { + errmsg = N_("Invalid filter chain " + "('lzma2' missing at the end?)"); + goto error; + } + } + + // All good. Copy the filters to the application supplied array. + memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter)); + return NULL; + +error: + // Free the filter options that were successfully decoded. + while (i-- > 0) + lzma_free(temp_filters[i].options, allocator); + + return errmsg; +} + + +extern LZMA_API(const char *) +lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters, + uint32_t flags, const lzma_allocator *allocator) +{ + // If error_pos isn't NULL, *error_pos must always be set. + // liblzma <= 5.4.6 and <= 5.6.1 have a bug and don't do this + // when str == NULL or filters == NULL or flags are unsupported. + if (error_pos != NULL) + *error_pos = 0; + + if (str == NULL || filters == NULL) { + // Don't translate this because it's only shown in case of + // a programming error. + return "Unexpected NULL pointer argument(s) " + "to lzma_str_to_filters()"; + } + + // Validate the flags. + const uint32_t supported_flags + = LZMA_STR_ALL_FILTERS + | LZMA_STR_NO_VALIDATION; + + if (flags & ~supported_flags) { + // This message is possible only if the caller uses flags + // that are only supported in a newer liblzma version (or + // the flags are simply buggy). Don't translate this at least + // when liblzma itself doesn't use gettext; xz and liblzma + // are usually upgraded at the same time. + return "Unsupported flags to lzma_str_to_filters()"; + } + + const char *used = str; + const char *errmsg = str_to_filters(&used, filters, flags, allocator); + + if (error_pos != NULL) { + const size_t n = (size_t)(used - str); + *error_pos = n > INT_MAX ? INT_MAX : (int)n; + } + + return errmsg; +} + + +/// Converts options of one filter to a string. +/// +/// The caller must have already put the filter name in the destination +/// string. Since it is possible that no options will be needed, the caller +/// won't have put a delimiter character (':' or '=') in the string yet. +/// We will add it if at least one option will be added to the string. +static void +strfy_filter(lzma_str *dest, const char *delimiter, + const option_map *optmap, size_t optmap_count, + const void *filter_options) +{ + for (size_t i = 0; i < optmap_count; ++i) { + // No attempt is made to reverse LZMA1/2 preset. + if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) + continue; + + // All options have integer values, some just are mapped + // to a string with a name_value_map. LZMA1/2 preset + // isn't reversed back to preset=PRESET form. + uint32_t v; + const void *ptr + = (const char *)filter_options + optmap[i].offset; + switch (optmap[i].type) { + case OPTMAP_TYPE_LZMA_MODE: + v = *(const lzma_mode *)ptr; + break; + + case OPTMAP_TYPE_LZMA_MATCH_FINDER: + v = *(const lzma_match_finder *)ptr; + break; + + default: + v = *(const uint32_t *)ptr; + break; + } + + // Skip this if this option should be omitted from + // the string when the value is zero. + if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO)) + continue; + + // Before the first option we add whatever delimiter + // the caller gave us. For later options a comma is used. + str_append_str(dest, delimiter); + delimiter = ","; + + // Add the option name and equals sign. + str_append_str(dest, optmap[i].name); + str_append_str(dest, "="); + + if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { + const name_value_map *map = optmap[i].u.map; + size_t j = 0; + while (true) { + if (map[j].name[0] == '\0') { + str_append_str(dest, "UNKNOWN"); + break; + } + + if (map[j].value == v) { + str_append_str(dest, map[j].name); + break; + } + + ++j; + } + } else { + str_append_u32(dest, v, + optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX); + } + } + + return; +} + + +extern LZMA_API(lzma_ret) +lzma_str_from_filters(char **output_str, const lzma_filter *filters, + uint32_t flags, const lzma_allocator *allocator) +{ + // On error *output_str is always set to NULL. + // Do it as the very first step. + if (output_str == NULL) + return LZMA_PROG_ERROR; + + *output_str = NULL; + + if (filters == NULL) + return LZMA_PROG_ERROR; + + // Validate the flags. + const uint32_t supported_flags + = LZMA_STR_ENCODER + | LZMA_STR_DECODER + | LZMA_STR_GETOPT_LONG + | LZMA_STR_NO_SPACES; + + if (flags & ~supported_flags) + return LZMA_OPTIONS_ERROR; + + // There must be at least one filter. + if (filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_OPTIONS_ERROR; + + // Allocate memory for the output string. + lzma_str dest; + return_if_error(str_init(&dest, allocator)); + + const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); + + const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; + + for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { + // If we reach LZMA_FILTERS_MAX, then the filters array + // is too large since the ID cannot be LZMA_VLI_UNKNOWN here. + if (i == LZMA_FILTERS_MAX) { + str_free(&dest, allocator); + return LZMA_OPTIONS_ERROR; + } + + // Don't add a space between filters if the caller + // doesn't want them. + if (i > 0 && !(flags & LZMA_STR_NO_SPACES)) + str_append_str(&dest, " "); + + // Use dashes for xz getopt_long() compatible syntax but also + // use dashes to separate filters when spaces weren't wanted. + if ((flags & LZMA_STR_GETOPT_LONG) + || (i > 0 && (flags & LZMA_STR_NO_SPACES))) + str_append_str(&dest, "--"); + + size_t j = 0; + while (true) { + if (j == ARRAY_SIZE(filter_name_map)) { + // Filter ID in filters[i].id isn't supported. + str_free(&dest, allocator); + return LZMA_OPTIONS_ERROR; + } + + if (filter_name_map[j].id == filters[i].id) { + // Add the filter name. + str_append_str(&dest, filter_name_map[j].name); + + // If only the filter names were wanted then + // skip to the next filter. In this case + // .options is ignored and may be NULL even + // when the filter doesn't allow NULL options. + if (!show_opts) + break; + + if (filters[i].options == NULL) { + if (!filter_name_map[j].allow_null) { + // Filter-specific options + // are missing but with + // this filter the options + // structure is mandatory. + str_free(&dest, allocator); + return LZMA_OPTIONS_ERROR; + } + + // .options is allowed to be NULL. + // There is no need to add any + // options to the string. + break; + } + + // Options structure is available. Add + // the filter options to the string. + const size_t optmap_count + = (flags & LZMA_STR_ENCODER) + ? filter_name_map[j].strfy_encoder + : filter_name_map[j].strfy_decoder; + strfy_filter(&dest, opt_delim, + filter_name_map[j].optmap, + optmap_count, + filters[i].options); + break; + } + + ++j; + } + } + + return str_finish(output_str, &dest, allocator); +} + + +extern LZMA_API(lzma_ret) +lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags, + const lzma_allocator *allocator) +{ + // On error *output_str is always set to NULL. + // Do it as the very first step. + if (output_str == NULL) + return LZMA_PROG_ERROR; + + *output_str = NULL; + + // Validate the flags. + const uint32_t supported_flags + = LZMA_STR_ALL_FILTERS + | LZMA_STR_ENCODER + | LZMA_STR_DECODER + | LZMA_STR_GETOPT_LONG; + + if (flags & ~supported_flags) + return LZMA_OPTIONS_ERROR; + + // Allocate memory for the output string. + lzma_str dest; + return_if_error(str_init(&dest, allocator)); + + // If only listing the filter names then separate them with spaces. + // Otherwise use newlines. + const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); + const char *filter_delim = show_opts ? "\n" : " "; + + const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; + bool first_filter_printed = false; + + for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { + // If we are printing only one filter then skip others. + if (filter_id != LZMA_VLI_UNKNOWN + && filter_id != filter_name_map[i].id) + continue; + + // If we are printing only .xz filters then skip the others. + if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START + && (flags & LZMA_STR_ALL_FILTERS) == 0 + && filter_id == LZMA_VLI_UNKNOWN) + continue; + + // Add a new line if this isn't the first filter being + // written to the string. + if (first_filter_printed) + str_append_str(&dest, filter_delim); + + first_filter_printed = true; + + if (flags & LZMA_STR_GETOPT_LONG) + str_append_str(&dest, "--"); + + str_append_str(&dest, filter_name_map[i].name); + + // If only the filter names were wanted then continue + // to the next filter. + if (!show_opts) + continue; + + const option_map *optmap = filter_name_map[i].optmap; + const char *d = opt_delim; + + const size_t end = (flags & LZMA_STR_ENCODER) + ? filter_name_map[i].strfy_encoder + : filter_name_map[i].strfy_decoder; + + for (size_t j = 0; j < end; ++j) { + // The first option is delimited from the filter + // name using "=" or ":" and the rest of the options + // are separated with ",". + str_append_str(&dest, d); + d = ","; + + // optname= + str_append_str(&dest, optmap[j].name); + str_append_str(&dest, "=<"); + + if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) { + // LZMA1/2 preset has its custom help string. + str_append_str(&dest, LZMA12_PRESET_STR); + } else if (optmap[j].flags + & OPTMAP_USE_NAME_VALUE_MAP) { + // Separate the possible option values by "|". + const name_value_map *m = optmap[j].u.map; + for (size_t k = 0; m[k].name[0] != '\0'; ++k) { + if (k > 0) + str_append_str(&dest, "|"); + + str_append_str(&dest, m[k].name); + } + } else { + // Integer range is shown as min-max. + const bool use_byte_suffix = optmap[j].flags + & OPTMAP_USE_BYTE_SUFFIX; + str_append_u32(&dest, optmap[j].u.range.min, + use_byte_suffix); + str_append_str(&dest, "-"); + str_append_u32(&dest, optmap[j].u.range.max, + use_byte_suffix); + } + + str_append_str(&dest, ">"); + } + } + + // If no filters were added to the string then it must be because + // the caller provided an unsupported Filter ID. + if (!first_filter_printed) { + str_free(&dest, allocator); + return LZMA_OPTIONS_ERROR; + } + + return str_finish(output_str, &dest, allocator); +} diff --git a/src/native/external/xz/src/liblzma/common/vli_decoder.c b/src/native/external/xz/src/liblzma/common/vli_decoder.c new file mode 100644 index 00000000000000..7fa5f47e10c47a --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/vli_decoder.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_decoder.c +/// \brief Decodes variable-length integers +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(lzma_ret) +lzma_vli_decode(lzma_vli *restrict vli, size_t *vli_pos, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size) +{ + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) { + vli_pos = &vli_pos_internal; + *vli = 0; + + // If there's no input, use LZMA_DATA_ERROR. This way it is + // easy to decode VLIs from buffers that have known size, + // and get the correct error code in case the buffer is + // too short. + if (*in_pos >= in_size) + return LZMA_DATA_ERROR; + + } else { + // Initialize *vli when starting to decode a new integer. + if (*vli_pos == 0) + *vli = 0; + + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX + || (*vli >> (*vli_pos * 7)) != 0) + return LZMA_PROG_ERROR; + + if (*in_pos >= in_size) + return LZMA_BUF_ERROR; + } + + do { + // Read the next byte. Use a temporary variable so that we + // can update *in_pos immediately. + const uint8_t byte = in[*in_pos]; + ++*in_pos; + + // Add the newly read byte to *vli. + *vli += (lzma_vli)(byte & 0x7F) << (*vli_pos * 7); + ++*vli_pos; + + // Check if this is the last byte of a multibyte integer. + if ((byte & 0x80) == 0) { + // We don't allow using variable-length integers as + // padding i.e. the encoding must use the most the + // compact form. + if (byte == 0x00 && *vli_pos > 1) + return LZMA_DATA_ERROR; + + return vli_pos == &vli_pos_internal + ? LZMA_OK : LZMA_STREAM_END; + } + + // There is at least one more byte coming. If we have already + // read maximum number of bytes, the integer is considered + // corrupt. + // + // If we need bigger integers in future, old versions liblzma + // will confusingly indicate the file being corrupt instead of + // unsupported. I suppose it's still better this way, because + // in the foreseeable future (writing this in 2008) the only + // reason why files would appear having over 63-bit integers + // is that the files are simply corrupt. + if (*vli_pos == LZMA_VLI_BYTES_MAX) + return LZMA_DATA_ERROR; + + } while (*in_pos < in_size); + + return vli_pos == &vli_pos_internal ? LZMA_DATA_ERROR : LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/common/vli_encoder.c b/src/native/external/xz/src/liblzma/common/vli_encoder.c new file mode 100644 index 00000000000000..3859006a94f11b --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/vli_encoder.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_encoder.c +/// \brief Encodes variable-length integers +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(lzma_ret) +lzma_vli_encode(lzma_vli vli, size_t *vli_pos, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) { + vli_pos = &vli_pos_internal; + + // In single-call mode, we expect that the caller has + // reserved enough output space. + if (*out_pos >= out_size) + return LZMA_PROG_ERROR; + } else { + // This never happens when we are called by liblzma, but + // may happen if called directly from an application. + if (*out_pos >= out_size) + return LZMA_BUF_ERROR; + } + + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX || vli > LZMA_VLI_MAX) + return LZMA_PROG_ERROR; + + // Shift vli so that the next bits to encode are the lowest. In + // single-call mode this never changes vli since *vli_pos is zero. + vli >>= *vli_pos * 7; + + // Write the non-last bytes in a loop. + while (vli >= 0x80) { + // We don't need *vli_pos during this function call anymore, + // but update it here so that it is ready if we need to + // return before the whole integer has been decoded. + ++*vli_pos; + assert(*vli_pos < LZMA_VLI_BYTES_MAX); + + // Write the next byte. + out[*out_pos] = (uint8_t)(vli) | 0x80; + vli >>= 7; + + if (++*out_pos == out_size) + return vli_pos == &vli_pos_internal + ? LZMA_PROG_ERROR : LZMA_OK; + } + + // Write the last byte. + out[*out_pos] = (uint8_t)(vli); + ++*out_pos; + ++*vli_pos; + + return vli_pos == &vli_pos_internal ? LZMA_OK : LZMA_STREAM_END; + +} diff --git a/src/native/external/xz/src/liblzma/common/vli_size.c b/src/native/external/xz/src/liblzma/common/vli_size.c new file mode 100644 index 00000000000000..c8cb2ec10adeb5 --- /dev/null +++ b/src/native/external/xz/src/liblzma/common/vli_size.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_size.c +/// \brief Calculates the encoded size of a variable-length integer +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(uint32_t) +lzma_vli_size(lzma_vli vli) +{ + if (vli > LZMA_VLI_MAX) + return 0; + + uint32_t i = 0; + do { + vli >>= 7; + ++i; + } while (vli != 0); + + assert(i <= LZMA_VLI_BYTES_MAX); + return i; +} diff --git a/src/native/external/xz/src/liblzma/delta/Makefile.inc b/src/native/external/xz/src/liblzma/delta/Makefile.inc new file mode 100644 index 00000000000000..e5b919e0ede4fb --- /dev/null +++ b/src/native/external/xz/src/liblzma/delta/Makefile.inc @@ -0,0 +1,19 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +liblzma_la_SOURCES += \ + delta/delta_common.c \ + delta/delta_common.h \ + delta/delta_private.h + +if COND_ENCODER_DELTA +liblzma_la_SOURCES += \ + delta/delta_encoder.c \ + delta/delta_encoder.h +endif + +if COND_DECODER_DELTA +liblzma_la_SOURCES += \ + delta/delta_decoder.c \ + delta/delta_decoder.h +endif diff --git a/src/native/external/xz/src/liblzma/delta/delta_common.c b/src/native/external/xz/src/liblzma/delta/delta_common.c new file mode 100644 index 00000000000000..5dbe253b4b3ab3 --- /dev/null +++ b/src/native/external/xz/src/liblzma/delta/delta_common.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_common.c +/// \brief Common stuff for Delta encoder and decoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "delta_common.h" +#include "delta_private.h" + + +static void +delta_coder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_delta_coder *coder = coder_ptr; + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_delta_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // Allocate memory for the decoder if needed. + lzma_delta_coder *coder = next->coder; + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_delta_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + + // End function is the same for encoder and decoder. + next->end = &delta_coder_end; + coder->next = LZMA_NEXT_CODER_INIT; + } + + // Validate the options. + if (lzma_delta_coder_memusage(filters[0].options) == UINT64_MAX) + return LZMA_OPTIONS_ERROR; + + // Set the delta distance. + const lzma_options_delta *opt = filters[0].options; + coder->distance = opt->dist; + + // Initialize the rest of the variables. + coder->pos = 0; + memzero(coder->history, LZMA_DELTA_DIST_MAX); + + // Initialize the next decoder in the chain, if any. + return lzma_next_filter_init(&coder->next, allocator, filters + 1); +} + + +extern uint64_t +lzma_delta_coder_memusage(const void *options) +{ + const lzma_options_delta *opt = options; + + if (opt == NULL || opt->type != LZMA_DELTA_TYPE_BYTE + || opt->dist < LZMA_DELTA_DIST_MIN + || opt->dist > LZMA_DELTA_DIST_MAX) + return UINT64_MAX; + + return sizeof(lzma_delta_coder); +} diff --git a/src/native/external/xz/src/liblzma/delta/delta_common.h b/src/native/external/xz/src/liblzma/delta/delta_common.h new file mode 100644 index 00000000000000..bd0912769724d1 --- /dev/null +++ b/src/native/external/xz/src/liblzma/delta/delta_common.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_common.h +/// \brief Common stuff for Delta encoder and decoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_COMMON_H +#define LZMA_DELTA_COMMON_H + +#include "common.h" + +extern uint64_t lzma_delta_coder_memusage(const void *options); + +#endif diff --git a/src/native/external/xz/src/liblzma/delta/delta_decoder.c b/src/native/external/xz/src/liblzma/delta/delta_decoder.c new file mode 100644 index 00000000000000..9f0d49ca415a45 --- /dev/null +++ b/src/native/external/xz/src/liblzma/delta/delta_decoder.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_decoder.c +/// \brief Delta filter decoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "delta_decoder.h" +#include "delta_private.h" + + +static void +decode_buffer(lzma_delta_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t distance = coder->distance; + + for (size_t i = 0; i < size; ++i) { + buffer[i] += coder->history[(distance + coder->pos) & 0xFF]; + coder->history[coder->pos-- & 0xFF] = buffer[i]; + } +} + + +// For an unknown reason NVIDIA HPC Compiler needs this pragma +// to produce working code. +#ifdef __NVCOMPILER +# pragma routine novector +#endif +static lzma_ret +delta_decode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_delta_coder *coder = coder_ptr; + + assert(coder->next.code != NULL); + + const size_t out_start = *out_pos; + + const lzma_ret ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + // out might be NULL. In that case size == 0. Null pointer + 0 is + // undefined behavior so skip the call in that case as it would + // do nothing anyway. + const size_t size = *out_pos - out_start; + if (size > 0) + decode_buffer(coder, out + out_start, size); + + return ret; +} + + +extern lzma_ret +lzma_delta_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + next->code = &delta_decode; + return lzma_delta_coder_init(next, allocator, filters); +} + + +extern lzma_ret +lzma_delta_props_decode(void **options, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 1) + return LZMA_OPTIONS_ERROR; + + lzma_options_delta *opt + = lzma_alloc(sizeof(lzma_options_delta), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + opt->type = LZMA_DELTA_TYPE_BYTE; + opt->dist = props[0] + 1U; + + *options = opt; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/delta/delta_decoder.h b/src/native/external/xz/src/liblzma/delta/delta_decoder.h new file mode 100644 index 00000000000000..e2268ed44e7264 --- /dev/null +++ b/src/native/external/xz/src/liblzma/delta/delta_decoder.h @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_decoder.h +/// \brief Delta filter decoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_DECODER_H +#define LZMA_DELTA_DECODER_H + +#include "delta_common.h" + +extern lzma_ret lzma_delta_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern lzma_ret lzma_delta_props_decode( + void **options, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif diff --git a/src/native/external/xz/src/liblzma/delta/delta_encoder.c b/src/native/external/xz/src/liblzma/delta/delta_encoder.c new file mode 100644 index 00000000000000..c3f30273d4d771 --- /dev/null +++ b/src/native/external/xz/src/liblzma/delta/delta_encoder.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_encoder.c +/// \brief Delta filter encoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "delta_encoder.h" +#include "delta_private.h" + + +/// Copies and encodes the data at the same time. This is used when Delta +/// is the first filter in the chain (and thus the last filter in the +/// encoder's filter stack). +static void +copy_and_encode(lzma_delta_coder *coder, + const uint8_t *restrict in, uint8_t *restrict out, size_t size) +{ + const size_t distance = coder->distance; + + for (size_t i = 0; i < size; ++i) { + const uint8_t tmp = coder->history[ + (distance + coder->pos) & 0xFF]; + coder->history[coder->pos-- & 0xFF] = in[i]; + out[i] = in[i] - tmp; + } +} + + +/// Encodes the data in place. This is used when we are the last filter +/// in the chain (and thus non-last filter in the encoder's filter stack). +static void +encode_in_place(lzma_delta_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t distance = coder->distance; + + for (size_t i = 0; i < size; ++i) { + const uint8_t tmp = coder->history[ + (distance + coder->pos) & 0xFF]; + coder->history[coder->pos-- & 0xFF] = buffer[i]; + buffer[i] -= tmp; + } +} + + +static lzma_ret +delta_encode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_delta_coder *coder = coder_ptr; + + lzma_ret ret; + + if (coder->next.code == NULL) { + const size_t in_avail = in_size - *in_pos; + const size_t out_avail = out_size - *out_pos; + const size_t size = my_min(in_avail, out_avail); + + // in and out might be NULL. In such cases size == 0. + // Null pointer + 0 is undefined behavior so skip + // the call in that case as it would do nothing anyway. + if (size > 0) + copy_and_encode(coder, in + *in_pos, out + *out_pos, + size); + + *in_pos += size; + *out_pos += size; + + ret = action != LZMA_RUN && *in_pos == in_size + ? LZMA_STREAM_END : LZMA_OK; + + } else { + const size_t out_start = *out_pos; + + ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + // Like above, avoid null pointer + 0. + const size_t size = *out_pos - out_start; + if (size > 0) + encode_in_place(coder, out + out_start, size); + } + + return ret; +} + + +static lzma_ret +delta_encoder_update(void *coder_ptr, const lzma_allocator *allocator, + const lzma_filter *filters_null lzma_attribute((__unused__)), + const lzma_filter *reversed_filters) +{ + lzma_delta_coder *coder = coder_ptr; + + // Delta doesn't and will never support changing the options in + // the middle of encoding. If the app tries to change them, we + // simply ignore them. + return lzma_next_filter_update( + &coder->next, allocator, reversed_filters + 1); +} + + +extern lzma_ret +lzma_delta_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + next->code = &delta_encode; + next->update = &delta_encoder_update; + return lzma_delta_coder_init(next, allocator, filters); +} + + +extern lzma_ret +lzma_delta_props_encode(const void *options, uint8_t *out) +{ + // The caller must have already validated the options, so it's + // LZMA_PROG_ERROR if they are invalid. + if (lzma_delta_coder_memusage(options) == UINT64_MAX) + return LZMA_PROG_ERROR; + + const lzma_options_delta *opt = options; + out[0] = (uint8_t)(opt->dist - LZMA_DELTA_DIST_MIN); + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/delta/delta_encoder.h b/src/native/external/xz/src/liblzma/delta/delta_encoder.h new file mode 100644 index 00000000000000..735f0ed0091ba2 --- /dev/null +++ b/src/native/external/xz/src/liblzma/delta/delta_encoder.h @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_encoder.h +/// \brief Delta filter encoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_ENCODER_H +#define LZMA_DELTA_ENCODER_H + +#include "delta_common.h" + +extern lzma_ret lzma_delta_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern lzma_ret lzma_delta_props_encode(const void *options, uint8_t *out); + +#endif diff --git a/src/native/external/xz/src/liblzma/delta/delta_private.h b/src/native/external/xz/src/liblzma/delta/delta_private.h new file mode 100644 index 00000000000000..e54721a8466565 --- /dev/null +++ b/src/native/external/xz/src/liblzma/delta/delta_private.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_private.h +/// \brief Private common stuff for Delta encoder and decoder +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_PRIVATE_H +#define LZMA_DELTA_PRIVATE_H + +#include "delta_common.h" + +typedef struct { + /// Next coder in the chain + lzma_next_coder next; + + /// Delta distance + size_t distance; + + /// Position in history[] + uint8_t pos; + + /// Buffer to hold history of the original data + uint8_t history[LZMA_DELTA_DIST_MAX]; +} lzma_delta_coder; + + +extern lzma_ret lzma_delta_coder_init( + lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters); + +#endif diff --git a/src/native/external/xz/src/liblzma/liblzma.pc.in b/src/native/external/xz/src/liblzma/liblzma.pc.in new file mode 100644 index 00000000000000..a432992b707235 --- /dev/null +++ b/src/native/external/xz/src/liblzma/liblzma.pc.in @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: 0BSD +# Author: Lasse Collin + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: liblzma +Description: General purpose data compression library +URL: @PACKAGE_URL@ +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir} +Cflags.private: -DLZMA_API_STATIC +Libs: -L${libdir} -llzma +Libs.private: @PTHREAD_CFLAGS@ @LIBS@ diff --git a/src/native/external/xz/src/liblzma/liblzma_generic.map b/src/native/external/xz/src/liblzma/liblzma_generic.map new file mode 100644 index 00000000000000..2bef27a8f7d75c --- /dev/null +++ b/src/native/external/xz/src/liblzma/liblzma_generic.map @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: 0BSD */ + +XZ_5.0 { +global: + lzma_alone_decoder; + lzma_alone_encoder; + lzma_auto_decoder; + lzma_block_buffer_bound; + lzma_block_buffer_decode; + lzma_block_buffer_encode; + lzma_block_compressed_size; + lzma_block_decoder; + lzma_block_encoder; + lzma_block_header_decode; + lzma_block_header_encode; + lzma_block_header_size; + lzma_block_total_size; + lzma_block_unpadded_size; + lzma_check_is_supported; + lzma_check_size; + lzma_code; + lzma_crc32; + lzma_crc64; + lzma_easy_buffer_encode; + lzma_easy_decoder_memusage; + lzma_easy_encoder; + lzma_easy_encoder_memusage; + lzma_end; + lzma_filter_decoder_is_supported; + lzma_filter_encoder_is_supported; + lzma_filter_flags_decode; + lzma_filter_flags_encode; + lzma_filter_flags_size; + lzma_filters_copy; + lzma_filters_update; + lzma_get_check; + lzma_index_append; + lzma_index_block_count; + lzma_index_buffer_decode; + lzma_index_buffer_encode; + lzma_index_cat; + lzma_index_checks; + lzma_index_decoder; + lzma_index_dup; + lzma_index_encoder; + lzma_index_end; + lzma_index_file_size; + lzma_index_hash_append; + lzma_index_hash_decode; + lzma_index_hash_end; + lzma_index_hash_init; + lzma_index_hash_size; + lzma_index_init; + lzma_index_iter_init; + lzma_index_iter_locate; + lzma_index_iter_next; + lzma_index_iter_rewind; + lzma_index_memusage; + lzma_index_memused; + lzma_index_size; + lzma_index_stream_count; + lzma_index_stream_flags; + lzma_index_stream_padding; + lzma_index_stream_size; + lzma_index_total_size; + lzma_index_uncompressed_size; + lzma_lzma_preset; + lzma_memlimit_get; + lzma_memlimit_set; + lzma_memusage; + lzma_mf_is_supported; + lzma_mode_is_supported; + lzma_physmem; + lzma_properties_decode; + lzma_properties_encode; + lzma_properties_size; + lzma_raw_buffer_decode; + lzma_raw_buffer_encode; + lzma_raw_decoder; + lzma_raw_decoder_memusage; + lzma_raw_encoder; + lzma_raw_encoder_memusage; + lzma_stream_buffer_bound; + lzma_stream_buffer_decode; + lzma_stream_buffer_encode; + lzma_stream_decoder; + lzma_stream_encoder; + lzma_stream_flags_compare; + lzma_stream_footer_decode; + lzma_stream_footer_encode; + lzma_stream_header_decode; + lzma_stream_header_encode; + lzma_version_number; + lzma_version_string; + lzma_vli_decode; + lzma_vli_encode; + lzma_vli_size; + +local: + *; +}; + +XZ_5.2 { +global: + lzma_block_uncomp_encode; + lzma_cputhreads; + lzma_get_progress; + lzma_stream_encoder_mt; + lzma_stream_encoder_mt_memusage; +} XZ_5.0; + +XZ_5.4 { +global: + lzma_file_info_decoder; + lzma_filters_free; + lzma_lzip_decoder; + lzma_microlzma_decoder; + lzma_microlzma_encoder; + lzma_stream_decoder_mt; + lzma_str_from_filters; + lzma_str_list_filters; + lzma_str_to_filters; +} XZ_5.2; + +XZ_5.6.0 { +global: + lzma_mt_block_size; +} XZ_5.4; + +XZ_5.8 { +global: + lzma_bcj_arm64_encode; + lzma_bcj_arm64_decode; + lzma_bcj_riscv_encode; + lzma_bcj_riscv_decode; + lzma_bcj_x86_encode; + lzma_bcj_x86_decode; +} XZ_5.6.0; diff --git a/src/native/external/xz/src/liblzma/liblzma_linux.map b/src/native/external/xz/src/liblzma/liblzma_linux.map new file mode 100644 index 00000000000000..50f1571de21966 --- /dev/null +++ b/src/native/external/xz/src/liblzma/liblzma_linux.map @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: 0BSD */ + +XZ_5.0 { +global: + lzma_alone_decoder; + lzma_alone_encoder; + lzma_auto_decoder; + lzma_block_buffer_bound; + lzma_block_buffer_decode; + lzma_block_buffer_encode; + lzma_block_compressed_size; + lzma_block_decoder; + lzma_block_encoder; + lzma_block_header_decode; + lzma_block_header_encode; + lzma_block_header_size; + lzma_block_total_size; + lzma_block_unpadded_size; + lzma_check_is_supported; + lzma_check_size; + lzma_code; + lzma_crc32; + lzma_crc64; + lzma_easy_buffer_encode; + lzma_easy_decoder_memusage; + lzma_easy_encoder; + lzma_easy_encoder_memusage; + lzma_end; + lzma_filter_decoder_is_supported; + lzma_filter_encoder_is_supported; + lzma_filter_flags_decode; + lzma_filter_flags_encode; + lzma_filter_flags_size; + lzma_filters_copy; + lzma_filters_update; + lzma_get_check; + lzma_index_append; + lzma_index_block_count; + lzma_index_buffer_decode; + lzma_index_buffer_encode; + lzma_index_cat; + lzma_index_checks; + lzma_index_decoder; + lzma_index_dup; + lzma_index_encoder; + lzma_index_end; + lzma_index_file_size; + lzma_index_hash_append; + lzma_index_hash_decode; + lzma_index_hash_end; + lzma_index_hash_init; + lzma_index_hash_size; + lzma_index_init; + lzma_index_iter_init; + lzma_index_iter_locate; + lzma_index_iter_next; + lzma_index_iter_rewind; + lzma_index_memusage; + lzma_index_memused; + lzma_index_size; + lzma_index_stream_count; + lzma_index_stream_flags; + lzma_index_stream_padding; + lzma_index_stream_size; + lzma_index_total_size; + lzma_index_uncompressed_size; + lzma_lzma_preset; + lzma_memlimit_get; + lzma_memlimit_set; + lzma_memusage; + lzma_mf_is_supported; + lzma_mode_is_supported; + lzma_physmem; + lzma_properties_decode; + lzma_properties_encode; + lzma_properties_size; + lzma_raw_buffer_decode; + lzma_raw_buffer_encode; + lzma_raw_decoder; + lzma_raw_decoder_memusage; + lzma_raw_encoder; + lzma_raw_encoder_memusage; + lzma_stream_buffer_bound; + lzma_stream_buffer_decode; + lzma_stream_buffer_encode; + lzma_stream_decoder; + lzma_stream_encoder; + lzma_stream_flags_compare; + lzma_stream_footer_decode; + lzma_stream_footer_encode; + lzma_stream_header_decode; + lzma_stream_header_encode; + lzma_version_number; + lzma_version_string; + lzma_vli_decode; + lzma_vli_encode; + lzma_vli_size; + +local: + *; +}; + +XZ_5.2 { +global: + lzma_block_uncomp_encode; + lzma_cputhreads; + lzma_get_progress; + lzma_stream_encoder_mt; + lzma_stream_encoder_mt_memusage; +} XZ_5.0; + +XZ_5.1.2alpha { +global: + lzma_stream_encoder_mt; + lzma_stream_encoder_mt_memusage; +} XZ_5.0; + +XZ_5.2.2 { +global: + lzma_block_uncomp_encode; + lzma_cputhreads; + lzma_get_progress; + lzma_stream_encoder_mt; + lzma_stream_encoder_mt_memusage; +} XZ_5.1.2alpha; + +XZ_5.4 { +global: + lzma_file_info_decoder; + lzma_filters_free; + lzma_lzip_decoder; + lzma_microlzma_decoder; + lzma_microlzma_encoder; + lzma_stream_decoder_mt; + lzma_str_from_filters; + lzma_str_list_filters; + lzma_str_to_filters; +} XZ_5.2; + +XZ_5.6.0 { +global: + lzma_mt_block_size; +} XZ_5.4; + +XZ_5.8 { +global: + lzma_bcj_arm64_encode; + lzma_bcj_arm64_decode; + lzma_bcj_riscv_encode; + lzma_bcj_riscv_decode; + lzma_bcj_x86_encode; + lzma_bcj_x86_decode; +} XZ_5.6.0; diff --git a/src/native/external/xz/src/liblzma/liblzma_w32res.rc b/src/native/external/xz/src/liblzma/liblzma_w32res.rc new file mode 100644 index 00000000000000..19507b6e9669e6 --- /dev/null +++ b/src/native/external/xz/src/liblzma/liblzma_w32res.rc @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: 0BSD */ + +/* + * Author: Lasse Collin + */ + +#define MY_TYPE VFT_DLL + +#if defined(__MSYS__) +# define MY_NAME "msys-lzma-5" +#elif defined(__CYGWIN__) +# define MY_NAME "cyglzma-5" +#else +# define MY_NAME "liblzma" +#endif + +#define MY_SUFFIX ".dll" +#define MY_DESC "liblzma data compression library" +#include "common_w32res.rc" diff --git a/src/native/external/xz/src/liblzma/lz/Makefile.inc b/src/native/external/xz/src/liblzma/lz/Makefile.inc new file mode 100644 index 00000000000000..15235d7d10709f --- /dev/null +++ b/src/native/external/xz/src/liblzma/lz/Makefile.inc @@ -0,0 +1,18 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +if COND_ENCODER_LZ +liblzma_la_SOURCES += \ + lz/lz_encoder.c \ + lz/lz_encoder.h \ + lz/lz_encoder_hash.h \ + lz/lz_encoder_hash_table.h \ + lz/lz_encoder_mf.c +endif + + +if COND_DECODER_LZ +liblzma_la_SOURCES += \ + lz/lz_decoder.c \ + lz/lz_decoder.h +endif diff --git a/src/native/external/xz/src/liblzma/lz/lz_decoder.c b/src/native/external/xz/src/liblzma/lz/lz_decoder.c new file mode 100644 index 00000000000000..1cb120ab3b0922 --- /dev/null +++ b/src/native/external/xz/src/liblzma/lz/lz_decoder.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_decoder.c +/// \brief LZ out window +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +// liblzma supports multiple LZ77-based filters. The LZ part is shared +// between these filters. The LZ code takes care of dictionary handling +// and passing the data between filters in the chain. The filter-specific +// part decodes from the input buffer to the dictionary. + + +#include "lz_decoder.h" + + +typedef struct { + /// Dictionary (history buffer) + lzma_dict dict; + + /// The actual LZ-based decoder e.g. LZMA + lzma_lz_decoder lz; + + /// Next filter in the chain, if any. Note that LZMA and LZMA2 are + /// only allowed as the last filter, but the long-range filter in + /// future can be in the middle of the chain. + lzma_next_coder next; + + /// True if the next filter in the chain has returned LZMA_STREAM_END. + bool next_finished; + + /// True if the LZ decoder (e.g. LZMA) has detected end of payload + /// marker. This may become true before next_finished becomes true. + bool this_finished; + + /// Temporary buffer needed when the LZ-based filter is not the last + /// filter in the chain. The output of the next filter is first + /// decoded into buffer[], which is then used as input for the actual + /// LZ-based decoder. + struct { + size_t pos; + size_t size; + uint8_t buffer[LZMA_BUFFER_SIZE]; + } temp; +} lzma_coder; + + +static void +lz_decoder_reset(lzma_coder *coder) +{ + coder->dict.pos = LZ_DICT_INIT_POS; + coder->dict.full = 0; + coder->dict.buf[LZ_DICT_INIT_POS - 1] = '\0'; + coder->dict.has_wrapped = false; + coder->dict.need_reset = false; + return; +} + + +static lzma_ret +decode_buffer(lzma_coder *coder, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size) +{ + while (true) { + // Wrap the dictionary if needed. + if (coder->dict.pos == coder->dict.size) { + // See the comment of #define LZ_DICT_REPEAT_MAX. + coder->dict.pos = LZ_DICT_REPEAT_MAX; + coder->dict.has_wrapped = true; + memcpy(coder->dict.buf, coder->dict.buf + + coder->dict.size + - LZ_DICT_REPEAT_MAX, + LZ_DICT_REPEAT_MAX); + } + + // Store the current dictionary position. It is needed to know + // where to start copying to the out[] buffer. + const size_t dict_start = coder->dict.pos; + + // Calculate how much we allow coder->lz.code() to decode. + // It must not decode past the end of the dictionary + // buffer, and we don't want it to decode more than is + // actually needed to fill the out[] buffer. + coder->dict.limit = coder->dict.pos + + my_min(out_size - *out_pos, + coder->dict.size - coder->dict.pos); + + // Call the coder->lz.code() to do the actual decoding. + const lzma_ret ret = coder->lz.code( + coder->lz.coder, &coder->dict, + in, in_pos, in_size); + + // Copy the decoded data from the dictionary to the out[] + // buffer. Do it conditionally because out can be NULL + // (in which case copy_size is always 0). Calling memcpy() + // with a null-pointer is undefined even if the third + // argument is 0. + const size_t copy_size = coder->dict.pos - dict_start; + assert(copy_size <= out_size - *out_pos); + + if (copy_size > 0) + memcpy(out + *out_pos, coder->dict.buf + dict_start, + copy_size); + + *out_pos += copy_size; + + // Reset the dictionary if so requested by coder->lz.code(). + if (coder->dict.need_reset) { + lz_decoder_reset(coder); + + // Since we reset dictionary, we don't check if + // dictionary became full. + if (ret != LZMA_OK || *out_pos == out_size) + return ret; + } else { + // Return if everything got decoded or an error + // occurred, or if there's no more data to decode. + // + // Note that detecting if there's something to decode + // is done by looking if dictionary become full + // instead of looking if *in_pos == in_size. This + // is because it is possible that all the input was + // consumed already but some data is pending to be + // written to the dictionary. + if (ret != LZMA_OK || *out_pos == out_size + || coder->dict.pos < coder->dict.size) + return ret; + } + } +} + + +static lzma_ret +lz_decode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + lzma_coder *coder = coder_ptr; + + if (coder->next.code == NULL) + return decode_buffer(coder, in, in_pos, in_size, + out, out_pos, out_size); + + // We aren't the last coder in the chain, we need to decode + // our input to a temporary buffer. + while (*out_pos < out_size) { + // Fill the temporary buffer if it is empty. + if (!coder->next_finished + && coder->temp.pos == coder->temp.size) { + coder->temp.pos = 0; + coder->temp.size = 0; + + const lzma_ret ret = coder->next.code( + coder->next.coder, + allocator, in, in_pos, in_size, + coder->temp.buffer, &coder->temp.size, + LZMA_BUFFER_SIZE, action); + + if (ret == LZMA_STREAM_END) + coder->next_finished = true; + else if (ret != LZMA_OK || coder->temp.size == 0) + return ret; + } + + if (coder->this_finished) { + if (coder->temp.size != 0) + return LZMA_DATA_ERROR; + + if (coder->next_finished) + return LZMA_STREAM_END; + + return LZMA_OK; + } + + const lzma_ret ret = decode_buffer(coder, coder->temp.buffer, + &coder->temp.pos, coder->temp.size, + out, out_pos, out_size); + + if (ret == LZMA_STREAM_END) + coder->this_finished = true; + else if (ret != LZMA_OK) + return ret; + else if (coder->next_finished && *out_pos < out_size) + return LZMA_DATA_ERROR; + } + + return LZMA_OK; +} + + +static void +lz_decoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_coder *coder = coder_ptr; + + lzma_next_end(&coder->next, allocator); + lzma_free(coder->dict.buf, allocator); + + if (coder->lz.end != NULL) + coder->lz.end(coder->lz.coder, allocator); + else + lzma_free(coder->lz.coder, allocator); + + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_decoder *lz, + const lzma_allocator *allocator, + lzma_vli id, const void *options, + lzma_lz_options *lz_options)) +{ + // Allocate the base structure if it isn't already allocated. + lzma_coder *coder = next->coder; + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &lz_decode; + next->end = &lz_decoder_end; + + coder->dict.buf = NULL; + coder->dict.size = 0; + coder->lz = LZMA_LZ_DECODER_INIT; + coder->next = LZMA_NEXT_CODER_INIT; + } + + // Allocate and initialize the LZ-based decoder. It will also give + // us the dictionary size. + lzma_lz_options lz_options; + return_if_error(lz_init(&coder->lz, allocator, + filters[0].id, filters[0].options, &lz_options)); + + // If the dictionary size is very small, increase it to 4096 bytes. + // This is to prevent constant wrapping of the dictionary, which + // would slow things down. The downside is that since we don't check + // separately for the real dictionary size, we may happily accept + // corrupt files. + if (lz_options.dict_size < 4096) + lz_options.dict_size = 4096; + + // Make dictionary size a multiple of 16. Some LZ-based decoders like + // LZMA use the lowest bits lzma_dict.pos to know the alignment of the + // data. Aligned buffer is also good when memcpying from the + // dictionary to the output buffer, since applications are + // recommended to give aligned buffers to liblzma. + // + // Reserve 2 * LZ_DICT_REPEAT_MAX bytes of extra space which is + // needed for alloc_size. Reserve also LZ_DICT_EXTRA bytes of extra + // space which is *not* counted in alloc_size or coder->dict.size. + // + // Avoid integer overflow. + if (lz_options.dict_size > SIZE_MAX - 15 - 2 * LZ_DICT_REPEAT_MAX + - LZ_DICT_EXTRA) + return LZMA_MEM_ERROR; + + lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15)); + + // Reserve extra space as explained in the comment + // of #define LZ_DICT_REPEAT_MAX. + const size_t alloc_size + = lz_options.dict_size + 2 * LZ_DICT_REPEAT_MAX; + + // Allocate and initialize the dictionary. + if (coder->dict.size != alloc_size) { + lzma_free(coder->dict.buf, allocator); + + // The LZ_DICT_EXTRA bytes at the end of the buffer aren't + // included in alloc_size. These extra bytes allow + // dict_repeat() to read and write more data than requested. + // Otherwise this extra space is ignored. + coder->dict.buf = lzma_alloc(alloc_size + LZ_DICT_EXTRA, + allocator); + if (coder->dict.buf == NULL) + return LZMA_MEM_ERROR; + + // NOTE: Yes, alloc_size, not lz_options.dict_size. The way + // coder->dict.full is updated will take care that we will + // still reject distances larger than lz_options.dict_size. + coder->dict.size = alloc_size; + } + + lz_decoder_reset(next->coder); + + // Use the preset dictionary if it was given to us. + if (lz_options.preset_dict != NULL + && lz_options.preset_dict_size > 0) { + // If the preset dictionary is bigger than the actual + // dictionary, copy only the tail. + const size_t copy_size = my_min(lz_options.preset_dict_size, + lz_options.dict_size); + const size_t offset = lz_options.preset_dict_size - copy_size; + memcpy(coder->dict.buf + coder->dict.pos, + lz_options.preset_dict + offset, + copy_size); + + // dict.pos isn't zero after lz_decoder_reset(). + coder->dict.pos += copy_size; + coder->dict.full = copy_size; + } + + // Miscellaneous initializations + coder->next_finished = false; + coder->this_finished = false; + coder->temp.pos = 0; + coder->temp.size = 0; + + // Initialize the next filter in the chain, if any. + return lzma_next_filter_init(&coder->next, allocator, filters + 1); +} + + +extern uint64_t +lzma_lz_decoder_memusage(size_t dictionary_size) +{ + return sizeof(lzma_coder) + (uint64_t)(dictionary_size) + + 2 * LZ_DICT_REPEAT_MAX + LZ_DICT_EXTRA; +} diff --git a/src/native/external/xz/src/liblzma/lz/lz_decoder.h b/src/native/external/xz/src/liblzma/lz/lz_decoder.h new file mode 100644 index 00000000000000..d26963a9776440 --- /dev/null +++ b/src/native/external/xz/src/liblzma/lz/lz_decoder.h @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_decoder.h +/// \brief LZ out window +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_DECODER_H +#define LZMA_LZ_DECODER_H + +#include "common.h" + +#ifdef HAVE_IMMINTRIN_H +# include +#endif + + +// dict_repeat() implementation variant: +// 0 = Byte-by-byte copying only. +// 1 = Use memcpy() for non-overlapping copies. +// 2 = Use x86 SSE2 for non-overlapping copies. +#ifndef LZMA_LZ_DECODER_CONFIG +# if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ + && defined(HAVE_IMMINTRIN_H) \ + && (defined(__SSE2__) || defined(_M_X64) \ + || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)) +# define LZMA_LZ_DECODER_CONFIG 2 +# else +# define LZMA_LZ_DECODER_CONFIG 1 +# endif +#endif + +/// Byte-by-byte and memcpy() copy exactly the amount needed. Other methods +/// can copy up to LZ_DICT_EXTRA bytes more than requested, and this amount +/// of extra space is needed at the end of the allocated dictionary buffer. +/// +/// NOTE: If this is increased, update LZMA_DICT_REPEAT_MAX too. +#if LZMA_LZ_DECODER_CONFIG >= 2 +# define LZ_DICT_EXTRA 32 +#else +# define LZ_DICT_EXTRA 0 +#endif + +/// Maximum number of bytes that dict_repeat() may copy. The allocated +/// dictionary buffer will be 2 * LZ_DICT_REPEAT_MAX + LZMA_DICT_EXTRA bytes +/// larger than the actual dictionary size: +/// +/// (1) Every time the decoder reaches the end of the dictionary buffer, +/// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning. +/// This way dict_repeat() will only need to copy from one place, +/// never from both the end and beginning of the buffer. +/// +/// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between +/// the oldest byte still in the dictionary and the current write +/// position. This way dict_repeat() with the maximum valid distance +/// won't need memmove() as the copying cannot overlap. +/// +/// (3) LZ_DICT_EXTRA bytes are required at the end of the dictionary buffer +/// so that extra copying done by dict_repeat() won't write or read past +/// the end of the allocated buffer. This amount is *not* counted as part +/// of lzma_dict.size. +/// +/// Note that memcpy() still cannot be used if distance < len. +/// +/// LZMA's longest match length is 273 bytes. The LZMA decoder looks at +/// the lowest four bits of the dictionary position, thus 273 must be +/// rounded up to the next multiple of 16 (288). In addition, optimized +/// dict_repeat() copies 32 bytes at a time, thus this must also be +/// a multiple of 32. +#define LZ_DICT_REPEAT_MAX 288 + +/// Initial position in lzma_dict.buf when the dictionary is empty. +#define LZ_DICT_INIT_POS (2 * LZ_DICT_REPEAT_MAX) + + +typedef struct { + /// Pointer to the dictionary buffer. + uint8_t *buf; + + /// Write position in dictionary. The next byte will be written to + /// buf[pos]. + size_t pos; + + /// Indicates how full the dictionary is. This is used by + /// dict_is_distance_valid() to detect corrupt files that would + /// read beyond the beginning of the dictionary. + size_t full; + + /// Write limit + size_t limit; + + /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes + /// larger than the actual dictionary size. This is enforced by + /// how the value for "full" is set; it can be at most + /// "size - 2 * LZ_DICT_REPEAT_MAX". + size_t size; + + /// True once the dictionary has become full and the writing position + /// has been wrapped in decode_buffer() in lz_decoder.c. + bool has_wrapped; + + /// True when dictionary should be reset before decoding more data. + bool need_reset; + +} lzma_dict; + + +typedef struct { + size_t dict_size; + const uint8_t *preset_dict; + size_t preset_dict_size; +} lzma_lz_options; + + +typedef struct { + /// Data specific to the LZ-based decoder + void *coder; + + /// Function to decode from in[] to *dict + lzma_ret (*code)(void *coder, + lzma_dict *restrict dict, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size); + + void (*reset)(void *coder, const void *options); + + /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN + /// then allow_eopm will always be true. + void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size, + bool allow_eopm); + + /// Free allocated resources + void (*end)(void *coder, const lzma_allocator *allocator); + +} lzma_lz_decoder; + + +#define LZMA_LZ_DECODER_INIT \ + (lzma_lz_decoder){ \ + .coder = NULL, \ + .code = NULL, \ + .reset = NULL, \ + .set_uncompressed = NULL, \ + .end = NULL, \ + } + + +extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_decoder *lz, + const lzma_allocator *allocator, + lzma_vli id, const void *options, + lzma_lz_options *lz_options)); + +extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); + + +////////////////////// +// Inline functions // +////////////////////// + +/// Get a byte from the history buffer. +static inline uint8_t +dict_get(const lzma_dict *const dict, const uint32_t distance) +{ + return dict->buf[dict->pos - distance - 1 + + (distance < dict->pos + ? 0 : dict->size - LZ_DICT_REPEAT_MAX)]; +} + + +/// Optimized version of dict_get(dict, 0) +static inline uint8_t +dict_get0(const lzma_dict *const dict) +{ + return dict->buf[dict->pos - 1]; +} + + +/// Test if dictionary is empty. +static inline bool +dict_is_empty(const lzma_dict *const dict) +{ + return dict->full == 0; +} + + +/// Validate the match distance +static inline bool +dict_is_distance_valid(const lzma_dict *const dict, const size_t distance) +{ + return dict->full > distance; +} + + +/// Repeat *len bytes at distance. +static inline bool +dict_repeat(lzma_dict *restrict dict, + uint32_t distance, uint32_t *restrict len) +{ + // Don't write past the end of the dictionary. + const size_t dict_avail = dict->limit - dict->pos; + uint32_t left = (uint32_t)my_min(dict_avail, *len); + *len -= left; + + size_t back = dict->pos - distance - 1; + if (distance >= dict->pos) + back += dict->size - LZ_DICT_REPEAT_MAX; + +#if LZMA_LZ_DECODER_CONFIG == 0 + // Minimal byte-by-byte method. This might be the least bad choice + // if memcpy() isn't fast and there's no replacement for it below. + while (left-- > 0) { + dict->buf[dict->pos++] = dict->buf[back++]; + } + +#else + // Because memcpy() or a similar method can be faster than copying + // byte by byte in a loop, the copying process is split into + // two cases. + if (distance < left) { + // Source and target areas overlap, thus we can't use + // memcpy() nor even memmove() safely. + do { + dict->buf[dict->pos++] = dict->buf[back++]; + } while (--left > 0); + } else { +# if LZMA_LZ_DECODER_CONFIG == 1 + memcpy(dict->buf + dict->pos, dict->buf + back, left); + dict->pos += left; + +# elif LZMA_LZ_DECODER_CONFIG == 2 + // This can copy up to 32 bytes more than required. + // (If left == 0, we still copy 32 bytes.) + size_t pos = dict->pos; + dict->pos += left; + do { + const __m128i x0 = _mm_loadu_si128( + (__m128i *)(dict->buf + back)); + const __m128i x1 = _mm_loadu_si128( + (__m128i *)(dict->buf + back + 16)); + back += 32; + _mm_storeu_si128( + (__m128i *)(dict->buf + pos), x0); + _mm_storeu_si128( + (__m128i *)(dict->buf + pos + 16), x1); + pos += 32; + } while (pos < dict->pos); + +# else +# error "Invalid LZMA_LZ_DECODER_CONFIG value" +# endif + } +#endif + + // Update how full the dictionary is. + if (!dict->has_wrapped) + dict->full = dict->pos - LZ_DICT_INIT_POS; + + return *len != 0; +} + + +static inline void +dict_put(lzma_dict *restrict dict, uint8_t byte) +{ + dict->buf[dict->pos++] = byte; + + if (!dict->has_wrapped) + dict->full = dict->pos - LZ_DICT_INIT_POS; +} + + +/// Puts one byte into the dictionary. Returns true if the dictionary was +/// already full and the byte couldn't be added. +static inline bool +dict_put_safe(lzma_dict *restrict dict, uint8_t byte) +{ + if (unlikely(dict->pos == dict->limit)) + return true; + + dict_put(dict, byte); + return false; +} + + +/// Copies arbitrary amount of data into the dictionary. +static inline void +dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size, + size_t *restrict left) +{ + // NOTE: If we are being given more data than the size of the + // dictionary, it could be possible to optimize the LZ decoder + // so that not everything needs to go through the dictionary. + // This shouldn't be very common thing in practice though, and + // the slowdown of one extra memcpy() isn't bad compared to how + // much time it would have taken if the data were compressed. + + if (in_size - *in_pos > *left) + in_size = *in_pos + *left; + + *left -= lzma_bufcpy(in, in_pos, in_size, + dict->buf, &dict->pos, dict->limit); + + if (!dict->has_wrapped) + dict->full = dict->pos - LZ_DICT_INIT_POS; + + return; +} + + +static inline void +dict_reset(lzma_dict *dict) +{ + dict->need_reset = true; + return; +} + +#endif diff --git a/src/native/external/xz/src/liblzma/lz/lz_encoder.c b/src/native/external/xz/src/liblzma/lz/lz_encoder.c new file mode 100644 index 00000000000000..0062882ad97f63 --- /dev/null +++ b/src/native/external/xz/src/liblzma/lz/lz_encoder.c @@ -0,0 +1,632 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder.c +/// \brief LZ in window +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder.h" +#include "lz_encoder_hash.h" + +// See lz_encoder_hash.h. This is a bit hackish but avoids making +// endianness a conditional in makefiles. +#ifdef LZMA_LZ_HASH_TABLE_IS_NEEDED +# include "lz_encoder_hash_table.h" +#endif + +#include "memcmplen.h" + + +typedef struct { + /// LZ-based encoder e.g. LZMA + lzma_lz_encoder lz; + + /// History buffer and match finder + lzma_mf mf; + + /// Next coder in the chain + lzma_next_coder next; +} lzma_coder; + + +/// \brief Moves the data in the input window to free space for new data +/// +/// mf->buffer is a sliding input window, which keeps mf->keep_size_before +/// bytes of input history available all the time. Now and then we need to +/// "slide" the buffer to make space for the new data to the end of the +/// buffer. At the same time, data older than keep_size_before is dropped. +/// +static void +move_window(lzma_mf *mf) +{ + // Align the move to a multiple of 16 bytes. Some LZ-based encoders + // like LZMA use the lowest bits of mf->read_pos to know the + // alignment of the uncompressed data. We also get better speed + // for memmove() with aligned buffers. + assert(mf->read_pos > mf->keep_size_before); + const uint32_t move_offset + = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15); + + assert(mf->write_pos > move_offset); + const size_t move_size = mf->write_pos - move_offset; + + assert(move_offset + move_size <= mf->size); + + memmove(mf->buffer, mf->buffer + move_offset, move_size); + + mf->offset += move_offset; + mf->read_pos -= move_offset; + mf->read_limit -= move_offset; + mf->write_pos -= move_offset; + + return; +} + + +/// \brief Tries to fill the input window (mf->buffer) +/// +/// If we are the last encoder in the chain, our input data is in in[]. +/// Otherwise we call the next filter in the chain to process in[] and +/// write its output to mf->buffer. +/// +/// This function must not be called once it has returned LZMA_STREAM_END. +/// +static lzma_ret +fill_window(lzma_coder *coder, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + lzma_action action) +{ + assert(coder->mf.read_pos <= coder->mf.write_pos); + + // Move the sliding window if needed. + if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after) + move_window(&coder->mf); + + // Maybe this is ugly, but lzma_mf uses uint32_t for most things + // (which I find cleanest), but we need size_t here when filling + // the history window. + size_t write_pos = coder->mf.write_pos; + lzma_ret ret; + if (coder->next.code == NULL) { + // Not using a filter, simply memcpy() as much as possible. + lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer, + &write_pos, coder->mf.size); + + ret = action != LZMA_RUN && *in_pos == in_size + ? LZMA_STREAM_END : LZMA_OK; + + } else { + ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, + coder->mf.buffer, &write_pos, + coder->mf.size, action); + } + + coder->mf.write_pos = (uint32_t)write_pos; + + // Silence Valgrind. lzma_memcmplen() can read extra bytes + // and Valgrind will give warnings if those bytes are uninitialized + // because Valgrind cannot see that the values of the uninitialized + // bytes are eventually ignored. + memzero(coder->mf.buffer + write_pos, LZMA_MEMCMPLEN_EXTRA); + + // If end of stream has been reached or flushing completed, we allow + // the encoder to process all the input (that is, read_pos is allowed + // to reach write_pos). Otherwise we keep keep_size_after bytes + // available as prebuffer. + if (ret == LZMA_STREAM_END) { + assert(*in_pos == in_size); + ret = LZMA_OK; + coder->mf.action = action; + coder->mf.read_limit = coder->mf.write_pos; + + } else if (coder->mf.write_pos > coder->mf.keep_size_after) { + // This needs to be done conditionally, because if we got + // only little new input, there may be too little input + // to do any encoding yet. + coder->mf.read_limit = coder->mf.write_pos + - coder->mf.keep_size_after; + } + + // Restart the match finder after finished LZMA_SYNC_FLUSH. + if (coder->mf.pending > 0 + && coder->mf.read_pos < coder->mf.read_limit) { + // Match finder may update coder->pending and expects it to + // start from zero, so use a temporary variable. + const uint32_t pending = coder->mf.pending; + coder->mf.pending = 0; + + // Rewind read_pos so that the match finder can hash + // the pending bytes. + assert(coder->mf.read_pos >= pending); + coder->mf.read_pos -= pending; + + // Call the skip function directly instead of using + // mf_skip(), since we don't want to touch mf->read_ahead. + coder->mf.skip(&coder->mf, pending); + } + + return ret; +} + + +static lzma_ret +lz_encode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action) +{ + lzma_coder *coder = coder_ptr; + + while (*out_pos < out_size + && (*in_pos < in_size || action != LZMA_RUN)) { + // Read more data to coder->mf.buffer if needed. + if (coder->mf.action == LZMA_RUN && coder->mf.read_pos + >= coder->mf.read_limit) + return_if_error(fill_window(coder, allocator, + in, in_pos, in_size, action)); + + // Encode + const lzma_ret ret = coder->lz.code(coder->lz.coder, + &coder->mf, out, out_pos, out_size); + if (ret != LZMA_OK) { + // Setting this to LZMA_RUN for cases when we are + // flushing. It doesn't matter when finishing or if + // an error occurred. + coder->mf.action = LZMA_RUN; + return ret; + } + } + + return LZMA_OK; +} + + +static bool +lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, + const lzma_lz_options *lz_options) +{ + // For now, the dictionary size is limited to 1.5 GiB. This may grow + // in the future if needed, but it needs a little more work than just + // changing this check. + if (!IS_ENC_DICT_SIZE_VALID(lz_options->dict_size) + || lz_options->nice_len > lz_options->match_len_max) + return true; + + mf->keep_size_before = (uint32_t)(lz_options->before_size + lz_options->dict_size); + + mf->keep_size_after = (uint32_t)(lz_options->after_size + + lz_options->match_len_max); + + // To avoid constant memmove()s, allocate some extra space. Since + // memmove()s become more expensive when the size of the buffer + // increases, we reserve more space when a large dictionary is + // used to make the memmove() calls rarer. + // + // This works with dictionaries up to about 3 GiB. If bigger + // dictionary is wanted, some extra work is needed: + // - Several variables in lzma_mf have to be changed from uint32_t + // to size_t. + // - Memory usage calculation needs something too, e.g. use uint64_t + // for mf->size. + uint32_t reserve = (uint32_t)(lz_options->dict_size / 2); + if (reserve > (UINT32_C(1) << 30)) + reserve /= 2; + + reserve += (uint32_t)((lz_options->before_size + lz_options->match_len_max + + lz_options->after_size) / 2 + (UINT32_C(1) << 19)); + + const uint32_t old_size = mf->size; + mf->size = mf->keep_size_before + reserve + mf->keep_size_after; + + // Deallocate the old history buffer if it exists but has different + // size than what is needed now. + if (mf->buffer != NULL && old_size != mf->size) { + lzma_free(mf->buffer, allocator); + mf->buffer = NULL; + } + + // Match finder options + mf->match_len_max = (uint32_t)lz_options->match_len_max; + mf->nice_len = (uint32_t)lz_options->nice_len; + + // cyclic_size has to stay smaller than 2 Gi. Note that this doesn't + // mean limiting dictionary size to less than 2 GiB. With a match + // finder that uses multibyte resolution (hashes start at e.g. every + // fourth byte), cyclic_size would stay below 2 Gi even when + // dictionary size is greater than 2 GiB. + // + // It would be possible to allow cyclic_size >= 2 Gi, but then we + // would need to be careful to use 64-bit types in various places + // (size_t could do since we would need bigger than 32-bit address + // space anyway). It would also require either zeroing a multigigabyte + // buffer at initialization (waste of time and RAM) or allow + // normalization in lz_encoder_mf.c to access uninitialized + // memory to keep the code simpler. The current way is simple and + // still allows pretty big dictionaries, so I don't expect these + // limits to change. + mf->cyclic_size = (uint32_t)(lz_options->dict_size + 1); + + // Validate the match finder ID and setup the function pointers. + switch (lz_options->match_finder) { +#ifdef HAVE_MF_HC3 + case LZMA_MF_HC3: + mf->find = &lzma_mf_hc3_find; + mf->skip = &lzma_mf_hc3_skip; + break; +#endif +#ifdef HAVE_MF_HC4 + case LZMA_MF_HC4: + mf->find = &lzma_mf_hc4_find; + mf->skip = &lzma_mf_hc4_skip; + break; +#endif +#ifdef HAVE_MF_BT2 + case LZMA_MF_BT2: + mf->find = &lzma_mf_bt2_find; + mf->skip = &lzma_mf_bt2_skip; + break; +#endif +#ifdef HAVE_MF_BT3 + case LZMA_MF_BT3: + mf->find = &lzma_mf_bt3_find; + mf->skip = &lzma_mf_bt3_skip; + break; +#endif +#ifdef HAVE_MF_BT4 + case LZMA_MF_BT4: + mf->find = &lzma_mf_bt4_find; + mf->skip = &lzma_mf_bt4_skip; + break; +#endif + + default: + return true; + } + + // Calculate the sizes of mf->hash and mf->son. + // + // NOTE: Since 5.3.5beta the LZMA encoder ensures that nice_len + // is big enough for the selected match finder. This makes it + // easier for applications as nice_len = 2 will always be accepted + // even though the effective value can be slightly bigger. + const uint32_t hash_bytes + = mf_get_hash_bytes(lz_options->match_finder); + assert(hash_bytes <= mf->nice_len); + + const bool is_bt = (lz_options->match_finder & 0x10) != 0; + uint32_t hs; + + if (hash_bytes == 2) { + hs = 0xFFFF; + } else { + // Round dictionary size up to the next 2^n - 1 so it can + // be used as a hash mask. + hs = (uint32_t)(lz_options->dict_size - 1); + hs |= hs >> 1; + hs |= hs >> 2; + hs |= hs >> 4; + hs |= hs >> 8; + hs >>= 1; + hs |= 0xFFFF; + + if (hs > (UINT32_C(1) << 24)) { + if (hash_bytes == 3) + hs = (UINT32_C(1) << 24) - 1; + else + hs >>= 1; + } + } + + mf->hash_mask = hs; + + ++hs; + if (hash_bytes > 2) + hs += HASH_2_SIZE; + if (hash_bytes > 3) + hs += HASH_3_SIZE; +/* + No match finder uses this at the moment. + if (mf->hash_bytes > 4) + hs += HASH_4_SIZE; +*/ + + const uint32_t old_hash_count = mf->hash_count; + const uint32_t old_sons_count = mf->sons_count; + mf->hash_count = hs; + mf->sons_count = mf->cyclic_size; + if (is_bt) + mf->sons_count *= 2; + + // Deallocate the old hash array if it exists and has different size + // than what is needed now. + if (old_hash_count != mf->hash_count + || old_sons_count != mf->sons_count) { + lzma_free(mf->hash, allocator); + mf->hash = NULL; + + lzma_free(mf->son, allocator); + mf->son = NULL; + } + + // Maximum number of match finder cycles + mf->depth = lz_options->depth; + if (mf->depth == 0) { + if (is_bt) + mf->depth = 16 + mf->nice_len / 2; + else + mf->depth = 4 + mf->nice_len / 4; + } + + return false; +} + + +static bool +lz_encoder_init(lzma_mf *mf, const lzma_allocator *allocator, + const lzma_lz_options *lz_options) +{ + // Allocate the history buffer. + if (mf->buffer == NULL) { + // lzma_memcmplen() is used for the dictionary buffer + // so we need to allocate a few extra bytes to prevent + // it from reading past the end of the buffer. + mf->buffer = lzma_alloc(mf->size + LZMA_MEMCMPLEN_EXTRA, + allocator); + if (mf->buffer == NULL) + return true; + + // Keep Valgrind happy with lzma_memcmplen() and initialize + // the extra bytes whose value may get read but which will + // effectively get ignored. + memzero(mf->buffer + mf->size, LZMA_MEMCMPLEN_EXTRA); + } + + // Use cyclic_size as initial mf->offset. This allows + // avoiding a few branches in the match finders. The downside is + // that match finder needs to be normalized more often, which may + // hurt performance with huge dictionaries. + mf->offset = mf->cyclic_size; + mf->read_pos = 0; + mf->read_ahead = 0; + mf->read_limit = 0; + mf->write_pos = 0; + mf->pending = 0; + +#if UINT32_MAX >= SIZE_MAX / 4 + // Check for integer overflow. (Huge dictionaries are not + // possible on 32-bit CPU.) + if (mf->hash_count > SIZE_MAX / sizeof(uint32_t) + || mf->sons_count > SIZE_MAX / sizeof(uint32_t)) + return true; +#endif + + // Allocate and initialize the hash table. Since EMPTY_HASH_VALUE + // is zero, we can use lzma_alloc_zero() or memzero() for mf->hash. + // + // We don't need to initialize mf->son, but not doing that may + // make Valgrind complain in normalization (see normalize() in + // lz_encoder_mf.c). Skipping the initialization is *very* good + // when big dictionary is used but only small amount of data gets + // actually compressed: most of the mf->son won't get actually + // allocated by the kernel, so we avoid wasting RAM and improve + // initialization speed a lot. + if (mf->hash == NULL) { + mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t), + allocator); + mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t), + allocator); + + if (mf->hash == NULL || mf->son == NULL) { + lzma_free(mf->hash, allocator); + mf->hash = NULL; + + lzma_free(mf->son, allocator); + mf->son = NULL; + + return true; + } + } else { +/* + for (uint32_t i = 0; i < mf->hash_count; ++i) + mf->hash[i] = EMPTY_HASH_VALUE; +*/ + memzero(mf->hash, mf->hash_count * sizeof(uint32_t)); + } + + mf->cyclic_pos = 0; + + // Handle preset dictionary. + if (lz_options->preset_dict != NULL + && lz_options->preset_dict_size > 0) { + // If the preset dictionary is bigger than the actual + // dictionary, use only the tail. + mf->write_pos = my_min(lz_options->preset_dict_size, mf->size); + memcpy(mf->buffer, lz_options->preset_dict + + lz_options->preset_dict_size - mf->write_pos, + mf->write_pos); + mf->action = LZMA_SYNC_FLUSH; + mf->skip(mf, mf->write_pos); + } + + mf->action = LZMA_RUN; + + return false; +} + + +extern uint64_t +lzma_lz_encoder_memusage(const lzma_lz_options *lz_options) +{ + // Old buffers must not exist when calling lz_encoder_prepare(). + lzma_mf mf = { + .buffer = NULL, + .hash = NULL, + .son = NULL, + .hash_count = 0, + .sons_count = 0, + }; + + // Setup the size information into mf. + if (lz_encoder_prepare(&mf, NULL, lz_options)) + return UINT64_MAX; + + // Calculate the memory usage. + return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t) + + mf.size + sizeof(lzma_coder); +} + + +static void +lz_encoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_coder *coder = coder_ptr; + + lzma_next_end(&coder->next, allocator); + + lzma_free(coder->mf.son, allocator); + lzma_free(coder->mf.hash, allocator); + lzma_free(coder->mf.buffer, allocator); + + if (coder->lz.end != NULL) + coder->lz.end(coder->lz.coder, allocator); + else + lzma_free(coder->lz.coder, allocator); + + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +lz_encoder_update(void *coder_ptr, const lzma_allocator *allocator, + const lzma_filter *filters_null lzma_attribute((__unused__)), + const lzma_filter *reversed_filters) +{ + lzma_coder *coder = coder_ptr; + + if (coder->lz.options_update == NULL) + return LZMA_PROG_ERROR; + + return_if_error(coder->lz.options_update( + coder->lz.coder, reversed_filters)); + + return lzma_next_filter_update( + &coder->next, allocator, reversed_filters + 1); +} + + +static lzma_ret +lz_encoder_set_out_limit(void *coder_ptr, uint64_t *uncomp_size, + uint64_t out_limit) +{ + lzma_coder *coder = coder_ptr; + + // This is supported only if there are no other filters chained. + if (coder->next.code == NULL && coder->lz.set_out_limit != NULL) + return coder->lz.set_out_limit( + coder->lz.coder, uncomp_size, out_limit); + + return LZMA_OPTIONS_ERROR; +} + + +extern lzma_ret +lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_encoder *lz, + const lzma_allocator *allocator, + lzma_vli id, const void *options, + lzma_lz_options *lz_options)) +{ +#if defined(HAVE_SMALL) && !defined(HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR) + // The CRC32 table must be initialized. + lzma_crc32_init(); +#endif + + // Allocate and initialize the base data structure. + lzma_coder *coder = next->coder; + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &lz_encode; + next->end = &lz_encoder_end; + next->update = &lz_encoder_update; + next->set_out_limit = &lz_encoder_set_out_limit; + + coder->lz.coder = NULL; + coder->lz.code = NULL; + coder->lz.end = NULL; + coder->lz.options_update = NULL; + coder->lz.set_out_limit = NULL; + + // mf.size is initialized to silence Valgrind + // when used on optimized binaries (GCC may reorder + // code in a way that Valgrind gets unhappy). + coder->mf.buffer = NULL; + coder->mf.size = 0; + coder->mf.hash = NULL; + coder->mf.son = NULL; + coder->mf.hash_count = 0; + coder->mf.sons_count = 0; + + coder->next = LZMA_NEXT_CODER_INIT; + } + + // Initialize the LZ-based encoder. + lzma_lz_options lz_options; + return_if_error(lz_init(&coder->lz, allocator, + filters[0].id, filters[0].options, &lz_options)); + + // Setup the size information into coder->mf and deallocate + // old buffers if they have wrong size. + if (lz_encoder_prepare(&coder->mf, allocator, &lz_options)) + return LZMA_OPTIONS_ERROR; + + // Allocate new buffers if needed, and do the rest of + // the initialization. + if (lz_encoder_init(&coder->mf, allocator, &lz_options)) + return LZMA_MEM_ERROR; + + // Initialize the next filter in the chain, if any. + return lzma_next_filter_init(&coder->next, allocator, filters + 1); +} + + +extern LZMA_API(lzma_bool) +lzma_mf_is_supported(lzma_match_finder mf) +{ + switch (mf) { +#ifdef HAVE_MF_HC3 + case LZMA_MF_HC3: + return true; +#endif +#ifdef HAVE_MF_HC4 + case LZMA_MF_HC4: + return true; +#endif +#ifdef HAVE_MF_BT2 + case LZMA_MF_BT2: + return true; +#endif +#ifdef HAVE_MF_BT3 + case LZMA_MF_BT3: + return true; +#endif +#ifdef HAVE_MF_BT4 + case LZMA_MF_BT4: + return true; +#endif + default: + return false; + } +} diff --git a/src/native/external/xz/src/liblzma/lz/lz_encoder.h b/src/native/external/xz/src/liblzma/lz/lz_encoder.h new file mode 100644 index 00000000000000..eb197c6b6cd8ac --- /dev/null +++ b/src/native/external/xz/src/liblzma/lz/lz_encoder.h @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder.h +/// \brief LZ in window and match finder API +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_ENCODER_H +#define LZMA_LZ_ENCODER_H + +#include "common.h" + + +// For now, the dictionary size is limited to 1.5 GiB. This may grow +// in the future if needed, but it needs a little more work than just +// changing this check. +#define IS_ENC_DICT_SIZE_VALID(size) \ + ((size) >= LZMA_DICT_SIZE_MIN \ + && (size) <= (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) + + +/// A table of these is used by the LZ-based encoder to hold +/// the length-distance pairs found by the match finder. +typedef struct { + uint32_t len; + uint32_t dist; +} lzma_match; + + +typedef struct lzma_mf_s lzma_mf; +struct lzma_mf_s { + /////////////// + // In Window // + /////////////// + + /// Pointer to buffer with data to be compressed + uint8_t *buffer; + + /// Total size of the allocated buffer (that is, including all + /// the extra space) + uint32_t size; + + /// Number of bytes that must be kept available in our input history. + /// That is, once keep_size_before bytes have been processed, + /// buffer[read_pos - keep_size_before] is the oldest byte that + /// must be available for reading. + uint32_t keep_size_before; + + /// Number of bytes that must be kept in buffer after read_pos. + /// That is, read_pos <= write_pos - keep_size_after as long as + /// action is LZMA_RUN; when action != LZMA_RUN, read_pos is allowed + /// to reach write_pos so that the last bytes get encoded too. + uint32_t keep_size_after; + + /// Match finders store locations of matches using 32-bit integers. + /// To avoid adjusting several megabytes of integers every time the + /// input window is moved with move_window, we only adjust the + /// offset of the buffer. Thus, buffer[value_in_hash_table - offset] + /// is the byte pointed by value_in_hash_table. + uint32_t offset; + + /// buffer[read_pos] is the next byte to run through the match + /// finder. This is incremented in the match finder once the byte + /// has been processed. + uint32_t read_pos; + + /// Number of bytes that have been ran through the match finder, but + /// which haven't been encoded by the LZ-based encoder yet. + uint32_t read_ahead; + + /// As long as read_pos is less than read_limit, there is enough + /// input available in buffer for at least one encoding loop. + /// + /// Because of the stateful API, read_limit may and will get greater + /// than read_pos quite often. This is taken into account when + /// calculating the value for keep_size_after. + uint32_t read_limit; + + /// buffer[write_pos] is the first byte that doesn't contain valid + /// uncompressed data; that is, the next input byte will be copied + /// to buffer[write_pos]. + uint32_t write_pos; + + /// Number of bytes not hashed before read_pos. This is needed to + /// restart the match finder after LZMA_SYNC_FLUSH. + uint32_t pending; + + ////////////////// + // Match Finder // + ////////////////// + + /// Find matches. Returns the number of distance-length pairs written + /// to the matches array. This is called only via lzma_mf_find(). + uint32_t (*find)(lzma_mf *mf, lzma_match *matches); + + /// Skips num bytes. This is like find() but doesn't make the + /// distance-length pairs available, thus being a little faster. + /// This is called only via mf_skip(). + void (*skip)(lzma_mf *mf, uint32_t num); + + uint32_t *hash; + uint32_t *son; + uint32_t cyclic_pos; + uint32_t cyclic_size; // Must be dictionary size + 1. + uint32_t hash_mask; + + /// Maximum number of loops in the match finder + uint32_t depth; + + /// Maximum length of a match that the match finder will try to find. + uint32_t nice_len; + + /// Maximum length of a match supported by the LZ-based encoder. + /// If the longest match found by the match finder is nice_len, + /// mf_find() tries to expand it up to match_len_max bytes. + uint32_t match_len_max; + + /// When running out of input, binary tree match finders need to know + /// if it is due to flushing or finishing. The action is used also + /// by the LZ-based encoders themselves. + lzma_action action; + + /// Number of elements in hash[] + uint32_t hash_count; + + /// Number of elements in son[] + uint32_t sons_count; +}; + + +typedef struct { + /// Extra amount of data to keep available before the "actual" + /// dictionary. + size_t before_size; + + /// Size of the history buffer + size_t dict_size; + + /// Extra amount of data to keep available after the "actual" + /// dictionary. + size_t after_size; + + /// Maximum length of a match that the LZ-based encoder can accept. + /// This is used to extend matches of length nice_len to the + /// maximum possible length. + size_t match_len_max; + + /// Match finder will search matches up to this length. + /// This must be less than or equal to match_len_max. + size_t nice_len; + + /// Type of the match finder to use + lzma_match_finder match_finder; + + /// Maximum search depth + uint32_t depth; + + /// Initial dictionary for the match finder to search. + const uint8_t *preset_dict; + + /// If the preset dictionary is NULL, this value is ignored. + /// Otherwise this member must indicate the preset dictionary's + /// buffer size. If this size is larger than dict_size, then only + /// the dict_size sized tail of the preset_dict will be used. + uint32_t preset_dict_size; + +} lzma_lz_options; + + +// The total usable buffer space at any moment outside the match finder: +// before_size + dict_size + after_size + match_len_max +// +// In reality, there's some extra space allocated to prevent the number of +// memmove() calls reasonable. The bigger the dict_size is, the bigger +// this extra buffer will be since with bigger dictionaries memmove() would +// also take longer. +// +// A single encoder loop in the LZ-based encoder may call the match finder +// (mf_find() or mf_skip()) at most after_size times. In other words, +// a single encoder loop may increment lzma_mf.read_pos at most after_size +// times. Since matches are looked up to +// lzma_mf.buffer[lzma_mf.read_pos + match_len_max - 1], the total +// amount of extra buffer needed after dict_size becomes +// after_size + match_len_max. +// +// before_size has two uses. The first one is to keep literals available +// in cases when the LZ-based encoder has made some read ahead. +// TODO: Maybe this could be changed by making the LZ-based encoders to +// store the actual literals as they do with length-distance pairs. +// +// Algorithms such as LZMA2 first try to compress a chunk, and then check +// if the encoded result is smaller than the uncompressed one. If the chunk +// was incompressible, it is better to store it in uncompressed form in +// the output stream. To do this, the whole uncompressed chunk has to be +// still available in the history buffer. before_size achieves that. + + +typedef struct { + /// Data specific to the LZ-based encoder + void *coder; + + /// Function to encode from *dict to out[] + lzma_ret (*code)(void *coder, + lzma_mf *restrict mf, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size); + + /// Free allocated resources + void (*end)(void *coder, const lzma_allocator *allocator); + + /// Update the options in the middle of the encoding. + lzma_ret (*options_update)(void *coder, const lzma_filter *filter); + + /// Set maximum allowed output size + lzma_ret (*set_out_limit)(void *coder, uint64_t *uncomp_size, + uint64_t out_limit); + +} lzma_lz_encoder; + + +// Basic steps: +// 1. Input gets copied into the dictionary. +// 2. Data in dictionary gets run through the match finder byte by byte. +// 3. The literals and matches are encoded using e.g. LZMA. +// +// The bytes that have been ran through the match finder, but not encoded yet, +// are called 'read ahead'. + + +/// Get how many bytes the match finder hashes in its initial step. +/// This is also the minimum nice_len value with the match finder. +static inline uint32_t +mf_get_hash_bytes(lzma_match_finder match_finder) +{ + return (uint32_t)match_finder & 0x0F; +} + + +/// Get pointer to the first byte not ran through the match finder +static inline const uint8_t * +mf_ptr(const lzma_mf *mf) +{ + return mf->buffer + mf->read_pos; +} + + +/// Get the number of bytes that haven't been ran through the match finder yet. +static inline uint32_t +mf_avail(const lzma_mf *mf) +{ + return mf->write_pos - mf->read_pos; +} + + +/// Get the number of bytes that haven't been encoded yet (some of these +/// bytes may have been ran through the match finder though). +static inline uint32_t +mf_unencoded(const lzma_mf *mf) +{ + return mf->write_pos - mf->read_pos + mf->read_ahead; +} + + +/// Calculate the absolute offset from the beginning of the most recent +/// dictionary reset. Only the lowest four bits are important, so there's no +/// problem that we don't know the 64-bit size of the data encoded so far. +/// +/// NOTE: When moving the input window, we need to do it so that the lowest +/// bits of dict->read_pos are not modified to keep this macro working +/// as intended. +static inline uint32_t +mf_position(const lzma_mf *mf) +{ + return mf->read_pos - mf->read_ahead; +} + + +/// Since everything else begins with mf_, use it also for lzma_mf_find(). +#define mf_find lzma_mf_find + + +/// Skip the given number of bytes. This is used when a good match was found. +/// For example, if mf_find() finds a match of 200 bytes long, the first byte +/// of that match was already consumed by mf_find(), and the rest 199 bytes +/// have to be skipped with mf_skip(mf, 199). +static inline void +mf_skip(lzma_mf *mf, uint32_t amount) +{ + if (amount != 0) { + mf->skip(mf, amount); + mf->read_ahead += amount; + } +} + + +/// Copies at most *left number of bytes from the history buffer +/// to out[]. This is needed by LZMA2 to encode uncompressed chunks. +static inline void +mf_read(lzma_mf *mf, uint8_t *out, size_t *out_pos, size_t out_size, + size_t *left) +{ + const size_t out_avail = out_size - *out_pos; + const size_t copy_size = my_min(out_avail, *left); + + assert(mf->read_ahead == 0); + assert(mf->read_pos >= *left); + + memcpy(out + *out_pos, mf->buffer + mf->read_pos - *left, + copy_size); + + *out_pos += copy_size; + *left -= copy_size; + return; +} + + +extern lzma_ret lzma_lz_encoder_init( + lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_encoder *lz, + const lzma_allocator *allocator, + lzma_vli id, const void *options, + lzma_lz_options *lz_options)); + + +extern uint64_t lzma_lz_encoder_memusage(const lzma_lz_options *lz_options); + + +// These are only for LZ encoder's internal use. +extern uint32_t lzma_mf_find( + lzma_mf *mf, uint32_t *count, lzma_match *matches); + +extern uint32_t lzma_mf_hc3_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_hc3_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_hc4_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_hc4_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_bt2_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_bt2_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_bt3_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_bt3_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_bt4_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_bt4_skip(lzma_mf *dict, uint32_t amount); + +#endif diff --git a/src/native/external/xz/src/liblzma/lz/lz_encoder_hash.h b/src/native/external/xz/src/liblzma/lz/lz_encoder_hash.h new file mode 100644 index 00000000000000..6d4bf837fd168e --- /dev/null +++ b/src/native/external/xz/src/liblzma/lz/lz_encoder_hash.h @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder_hash.h +/// \brief Hash macros for match finders +// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_ENCODER_HASH_H +#define LZMA_LZ_ENCODER_HASH_H + +// We need to know if CRC32_GENERIC is defined and we may need the declaration +// of lzma_crc32_table[][]. +#include "crc_common.h" + +// If HAVE_SMALL is defined, then lzma_crc32_table[][] exists and +// it's little endian even on big endian systems. +// +// If HAVE_SMALL isn't defined, lzma_crc32_table[][] is in native endian +// but we want a little endian one so that the compressed output won't +// depend on the processor endianness. Big endian systems are less common +// so those get the burden of an extra 1 KiB table. +// +// If HAVE_SMALL isn't defined and CRC32_GENERIC isn't defined either, +// then lzma_crc32_table[][] doesn't exist. +#if defined(HAVE_SMALL) \ + || (defined(CRC32_GENERIC) && !defined(WORDS_BIGENDIAN)) +# define hash_table lzma_crc32_table[0] +#else + // lz_encoder.c takes care of including the actual table. + lzma_attr_visibility_hidden + extern const uint32_t lzma_lz_hash_table[256]; +# define hash_table lzma_lz_hash_table +# define LZMA_LZ_HASH_TABLE_IS_NEEDED 1 +#endif + +#define HASH_2_SIZE (UINT32_C(1) << 10) +#define HASH_3_SIZE (UINT32_C(1) << 16) +#define HASH_4_SIZE (UINT32_C(1) << 20) + +#define HASH_2_MASK (HASH_2_SIZE - 1) +#define HASH_3_MASK (HASH_3_SIZE - 1) +#define HASH_4_MASK (HASH_4_SIZE - 1) + +#define FIX_3_HASH_SIZE (HASH_2_SIZE) +#define FIX_4_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE) +#define FIX_5_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE + HASH_4_SIZE) + +// Endianness doesn't matter in hash_2_calc() (no effect on the output). +#ifdef TUKLIB_FAST_UNALIGNED_ACCESS +# define hash_2_calc() \ + const uint32_t hash_value = read16ne(cur) +#else +# define hash_2_calc() \ + const uint32_t hash_value \ + = (uint32_t)(cur[0]) | ((uint32_t)(cur[1]) << 8) +#endif + +#define hash_3_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask + +#define hash_4_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ + const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \ + ^ (hash_table[cur[3]] << 5)) & mf->hash_mask + + +// The following are not currently used. + +#define hash_5_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ + uint32_t hash_4_value = (temp ^ ((uint32_t)(cur[2]) << 8) ^ \ + ^ hash_table[cur[3]] << 5); \ + const uint32_t hash_value \ + = (hash_4_value ^ (hash_table[cur[4]] << 3)) \ + & mf->hash_mask; \ + hash_4_value &= HASH_4_MASK + +/* +#define hash_zip_calc() \ + const uint32_t hash_value \ + = (((uint32_t)(cur[0]) | ((uint32_t)(cur[1]) << 8)) \ + ^ hash_table[cur[2]]) & 0xFFFF +*/ + +#define hash_zip_calc() \ + const uint32_t hash_value \ + = (((uint32_t)(cur[2]) | ((uint32_t)(cur[0]) << 8)) \ + ^ hash_table[cur[1]]) & 0xFFFF + +#define mt_hash_2_calc() \ + const uint32_t hash_2_value \ + = (hash_table[cur[0]] ^ cur[1]) & HASH_2_MASK + +#define mt_hash_3_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK + +#define mt_hash_4_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ + const uint32_t hash_4_value = (temp ^ ((uint32_t)(cur[2]) << 8) ^ \ + (hash_table[cur[3]] << 5)) & HASH_4_MASK + +#endif diff --git a/src/native/external/xz/src/liblzma/lz/lz_encoder_hash_table.h b/src/native/external/xz/src/liblzma/lz/lz_encoder_hash_table.h new file mode 100644 index 00000000000000..2b3a60e43e8016 --- /dev/null +++ b/src/native/external/xz/src/liblzma/lz/lz_encoder_hash_table.h @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: 0BSD + +// This file has been generated by crc32_tablegen.c. + +const uint32_t lzma_lz_hash_table[256] = { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, + 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, + 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, + 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, + 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, + 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, + 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, + 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, + 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, + 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, + 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, + 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, + 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, + 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, + 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, + 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, + 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, + 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, + 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D +}; diff --git a/src/native/external/xz/src/liblzma/lz/lz_encoder_mf.c b/src/native/external/xz/src/liblzma/lz/lz_encoder_mf.c new file mode 100644 index 00000000000000..557c2612f2a24c --- /dev/null +++ b/src/native/external/xz/src/liblzma/lz/lz_encoder_mf.c @@ -0,0 +1,744 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder_mf.c +/// \brief Match finders +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder.h" +#include "lz_encoder_hash.h" +#include "memcmplen.h" + + +/// \brief Find matches starting from the current byte +/// +/// \return The length of the longest match found +extern uint32_t +lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches) +{ + // Call the match finder. It returns the number of length-distance + // pairs found. + // FIXME: Minimum count is zero, what _exactly_ is the maximum? + const uint32_t count = mf->find(mf, matches); + + // Length of the longest match; assume that no matches were found + // and thus the maximum length is zero. + uint32_t len_best = 0; + + if (count > 0) { +#ifndef NDEBUG + // Validate the matches. + for (uint32_t i = 0; i < count; ++i) { + assert(matches[i].len <= mf->nice_len); + assert(matches[i].dist < mf->read_pos); + assert(memcmp(mf_ptr(mf) - 1, + mf_ptr(mf) - matches[i].dist - 2, + matches[i].len) == 0); + } +#endif + + // The last used element in the array contains + // the longest match. + len_best = matches[count - 1].len; + + // If a match of maximum search length was found, try to + // extend the match to maximum possible length. + if (len_best == mf->nice_len) { + // The limit for the match length is either the + // maximum match length supported by the LZ-based + // encoder or the number of bytes left in the + // dictionary, whichever is smaller. + uint32_t limit = mf_avail(mf) + 1; + if (limit > mf->match_len_max) + limit = mf->match_len_max; + + // Pointer to the byte we just ran through + // the match finder. + const uint8_t *p1 = mf_ptr(mf) - 1; + + // Pointer to the beginning of the match. We need -1 + // here because the match distances are zero based. + const uint8_t *p2 = p1 - matches[count - 1].dist - 1; + + len_best = lzma_memcmplen(p1, p2, len_best, limit); + } + } + + *count_ptr = count; + + // Finally update the read position to indicate that match finder was + // run for this dictionary offset. + ++mf->read_ahead; + + return len_best; +} + + +/// Hash value to indicate unused element in the hash. Since we start the +/// positions from dict_size + 1, zero is always too far to qualify +/// as usable match position. +#define EMPTY_HASH_VALUE 0 + + +/// Normalization must be done when lzma_mf.offset + lzma_mf.read_pos +/// reaches MUST_NORMALIZE_POS. +#define MUST_NORMALIZE_POS UINT32_MAX + + +/// \brief Normalizes hash values +/// +/// The hash arrays store positions of match candidates. The positions are +/// relative to an arbitrary offset that is not the same as the absolute +/// offset in the input stream. The relative position of the current byte +/// is lzma_mf.offset + lzma_mf.read_pos. The distances of the matches are +/// the differences of the current read position and the position found from +/// the hash. +/// +/// To prevent integer overflows of the offsets stored in the hash arrays, +/// we need to "normalize" the stored values now and then. During the +/// normalization, we drop values that indicate distance greater than the +/// dictionary size, thus making space for new values. +static void +normalize(lzma_mf *mf) +{ + assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS); + + // In future we may not want to touch the lowest bits, because there + // may be match finders that use larger resolution than one byte. + const uint32_t subvalue + = (MUST_NORMALIZE_POS - mf->cyclic_size); + // & ~((UINT32_C(1) << 10) - 1); + + for (uint32_t i = 0; i < mf->hash_count; ++i) { + // If the distance is greater than the dictionary size, + // we can simply mark the hash element as empty. + if (mf->hash[i] <= subvalue) + mf->hash[i] = EMPTY_HASH_VALUE; + else + mf->hash[i] -= subvalue; + } + + for (uint32_t i = 0; i < mf->sons_count; ++i) { + // Do the same for mf->son. + // + // NOTE: There may be uninitialized elements in mf->son. + // Valgrind may complain that the "if" below depends on + // an uninitialized value. In this case it is safe to ignore + // the warning. See also the comments in lz_encoder_init() + // in lz_encoder.c. + if (mf->son[i] <= subvalue) + mf->son[i] = EMPTY_HASH_VALUE; + else + mf->son[i] -= subvalue; + } + + // Update offset to match the new locations. + mf->offset -= subvalue; + + return; +} + + +/// Mark the current byte as processed from point of view of the match finder. +static void +move_pos(lzma_mf *mf) +{ + if (++mf->cyclic_pos == mf->cyclic_size) + mf->cyclic_pos = 0; + + ++mf->read_pos; + assert(mf->read_pos <= mf->write_pos); + + if (unlikely(mf->read_pos + mf->offset == UINT32_MAX)) + normalize(mf); +} + + +/// When flushing, we cannot run the match finder unless there is nice_len +/// bytes available in the dictionary. Instead, we skip running the match +/// finder (indicating that no match was found), and count how many bytes we +/// have ignored this way. +/// +/// When new data is given after the flushing was completed, the match finder +/// is restarted by rewinding mf->read_pos backwards by mf->pending. Then +/// the missed bytes are added to the hash using the match finder's skip +/// function (with small amount of input, it may start using mf->pending +/// again if flushing). +/// +/// Due to this rewinding, we don't touch cyclic_pos or test for +/// normalization. It will be done when the match finder's skip function +/// catches up after a flush. +static void +move_pending(lzma_mf *mf) +{ + ++mf->read_pos; + assert(mf->read_pos <= mf->write_pos); + ++mf->pending; +} + + +/// Calculate len_limit and determine if there is enough input to run +/// the actual match finder code. Sets up "cur" and "pos". This macro +/// is used by all find functions and binary tree skip functions. Hash +/// chain skip function doesn't need len_limit so a simpler code is used +/// in them. +#define header(is_bt, len_min, ret_op) \ + uint32_t len_limit = mf_avail(mf); \ + if (mf->nice_len <= len_limit) { \ + len_limit = mf->nice_len; \ + } else if (len_limit < (len_min) \ + || (is_bt && mf->action == LZMA_SYNC_FLUSH)) { \ + assert(mf->action != LZMA_RUN); \ + move_pending(mf); \ + ret_op; \ + } \ + const uint8_t *cur = mf_ptr(mf); \ + const uint32_t pos = mf->read_pos + mf->offset + + +/// Header for find functions. "return 0" indicates that zero matches +/// were found. +#define header_find(is_bt, len_min) \ + header(is_bt, len_min, return 0); \ + uint32_t matches_count = 0 + + +/// Header for a loop in a skip function. "continue" tells to skip the rest +/// of the code in the loop. +#define header_skip(is_bt, len_min) \ + header(is_bt, len_min, continue) + + +/// Calls hc_find_func() or bt_find_func() and calculates the total number +/// of matches found. Updates the dictionary position and returns the number +/// of matches found. +#define call_find(func, len_best) \ +do { \ + matches_count = (uint32_t)(func(len_limit, pos, cur, cur_match, \ + mf->depth, mf->son, \ + mf->cyclic_pos, mf->cyclic_size, \ + matches + matches_count, len_best) \ + - matches); \ + move_pos(mf); \ + return matches_count; \ +} while (0) + + +//////////////// +// Hash Chain // +//////////////// + +#if defined(HAVE_MF_HC3) || defined(HAVE_MF_HC4) +/// +/// +/// \param len_limit Don't look for matches longer than len_limit. +/// \param pos lzma_mf.read_pos + lzma_mf.offset +/// \param cur Pointer to current byte (mf_ptr(mf)) +/// \param cur_match Start position of the current match candidate +/// \param depth Maximum length of the hash chain +/// \param son lzma_mf.son (contains the hash chain) +/// \param cyclic_pos lzma_mf.cyclic_pos +/// \param cyclic_size lzma_mf_cyclic_size +/// \param matches Array to hold the matches. +/// \param len_best The length of the longest match found so far. +static lzma_match * +hc_find_func( + const uint32_t len_limit, + const uint32_t pos, + const uint8_t *const cur, + uint32_t cur_match, + uint32_t depth, + uint32_t *const son, + const uint32_t cyclic_pos, + const uint32_t cyclic_size, + lzma_match *matches, + uint32_t len_best) +{ + son[cyclic_pos] = cur_match; + + while (true) { + const uint32_t delta = pos - cur_match; + if (depth-- == 0 || delta >= cyclic_size) + return matches; + + const uint8_t *const pb = cur - delta; + cur_match = son[cyclic_pos - delta + + (delta > cyclic_pos ? cyclic_size : 0)]; + + if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) { + uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit); + + if (len_best < len) { + len_best = len; + matches->len = len; + matches->dist = delta - 1; + ++matches; + + if (len == len_limit) + return matches; + } + } + } +} + + +#define hc_find(len_best) \ + call_find(hc_find_func, len_best) + + +#define hc_skip() \ +do { \ + mf->son[mf->cyclic_pos] = cur_match; \ + move_pos(mf); \ +} while (0) + +#endif + + +#ifdef HAVE_MF_HC3 +extern uint32_t +lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(false, 3); + + hash_3_calc(); + + const uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 2; + + if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { + len_best = lzma_memcmplen(cur - delta2, cur, + len_best, len_limit); + + matches[0].len = len_best; + matches[0].dist = delta2 - 1; + matches_count = 1; + + if (len_best == len_limit) { + hc_skip(); + return 1; // matches_count + } + } + + hc_find(len_best); +} + + +extern void +lzma_mf_hc3_skip(lzma_mf *mf, uint32_t amount) +{ + do { + if (mf_avail(mf) < 3) { + move_pending(mf); + continue; + } + + const uint8_t *cur = mf_ptr(mf); + const uint32_t pos = mf->read_pos + mf->offset; + + hash_3_calc(); + + const uint32_t cur_match + = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + hc_skip(); + + } while (--amount != 0); +} +#endif + + +#ifdef HAVE_MF_HC4 +extern uint32_t +lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(false, 4); + + hash_4_calc(); + + uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t delta3 + = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value]; + const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value ] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 1; + + if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { + len_best = 2; + matches[0].len = 2; + matches[0].dist = delta2 - 1; + matches_count = 1; + } + + if (delta2 != delta3 && delta3 < mf->cyclic_size + && *(cur - delta3) == *cur) { + len_best = 3; + matches[matches_count++].dist = delta3 - 1; + delta2 = delta3; + } + + if (matches_count != 0) { + len_best = lzma_memcmplen(cur - delta2, cur, + len_best, len_limit); + + matches[matches_count - 1].len = len_best; + + if (len_best == len_limit) { + hc_skip(); + return matches_count; + } + } + + if (len_best < 3) + len_best = 3; + + hc_find(len_best); +} + + +extern void +lzma_mf_hc4_skip(lzma_mf *mf, uint32_t amount) +{ + do { + if (mf_avail(mf) < 4) { + move_pending(mf); + continue; + } + + const uint8_t *cur = mf_ptr(mf); + const uint32_t pos = mf->read_pos + mf->offset; + + hash_4_calc(); + + const uint32_t cur_match + = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + hc_skip(); + + } while (--amount != 0); +} +#endif + + +///////////////// +// Binary Tree // +///////////////// + +#if defined(HAVE_MF_BT2) || defined(HAVE_MF_BT3) || defined(HAVE_MF_BT4) +static lzma_match * +bt_find_func( + const uint32_t len_limit, + const uint32_t pos, + const uint8_t *const cur, + uint32_t cur_match, + uint32_t depth, + uint32_t *const son, + const uint32_t cyclic_pos, + const uint32_t cyclic_size, + lzma_match *matches, + uint32_t len_best) +{ + uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; + uint32_t *ptr1 = son + (cyclic_pos << 1); + + uint32_t len0 = 0; + uint32_t len1 = 0; + + while (true) { + const uint32_t delta = pos - cur_match; + if (depth-- == 0 || delta >= cyclic_size) { + *ptr0 = EMPTY_HASH_VALUE; + *ptr1 = EMPTY_HASH_VALUE; + return matches; + } + + uint32_t *const pair = son + ((cyclic_pos - delta + + (delta > cyclic_pos ? cyclic_size : 0)) + << 1); + + const uint8_t *const pb = cur - delta; + uint32_t len = my_min(len0, len1); + + if (pb[len] == cur[len]) { + len = lzma_memcmplen(pb, cur, len + 1, len_limit); + + if (len_best < len) { + len_best = len; + matches->len = len; + matches->dist = delta - 1; + ++matches; + + if (len == len_limit) { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return matches; + } + } + } + + if (pb[len] < cur[len]) { + *ptr1 = cur_match; + ptr1 = pair + 1; + cur_match = *ptr1; + len1 = len; + } else { + *ptr0 = cur_match; + ptr0 = pair; + cur_match = *ptr0; + len0 = len; + } + } +} + + +static void +bt_skip_func( + const uint32_t len_limit, + const uint32_t pos, + const uint8_t *const cur, + uint32_t cur_match, + uint32_t depth, + uint32_t *const son, + const uint32_t cyclic_pos, + const uint32_t cyclic_size) +{ + uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; + uint32_t *ptr1 = son + (cyclic_pos << 1); + + uint32_t len0 = 0; + uint32_t len1 = 0; + + while (true) { + const uint32_t delta = pos - cur_match; + if (depth-- == 0 || delta >= cyclic_size) { + *ptr0 = EMPTY_HASH_VALUE; + *ptr1 = EMPTY_HASH_VALUE; + return; + } + + uint32_t *pair = son + ((cyclic_pos - delta + + (delta > cyclic_pos ? cyclic_size : 0)) + << 1); + const uint8_t *pb = cur - delta; + uint32_t len = my_min(len0, len1); + + if (pb[len] == cur[len]) { + len = lzma_memcmplen(pb, cur, len + 1, len_limit); + + if (len == len_limit) { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return; + } + } + + if (pb[len] < cur[len]) { + *ptr1 = cur_match; + ptr1 = pair + 1; + cur_match = *ptr1; + len1 = len; + } else { + *ptr0 = cur_match; + ptr0 = pair; + cur_match = *ptr0; + len0 = len; + } + } +} + + +#define bt_find(len_best) \ + call_find(bt_find_func, len_best) + +#define bt_skip() \ +do { \ + bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, \ + mf->son, mf->cyclic_pos, \ + mf->cyclic_size); \ + move_pos(mf); \ +} while (0) + +#endif + + +#ifdef HAVE_MF_BT2 +extern uint32_t +lzma_mf_bt2_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(true, 2); + + hash_2_calc(); + + const uint32_t cur_match = mf->hash[hash_value]; + mf->hash[hash_value] = pos; + + bt_find(1); +} + + +extern void +lzma_mf_bt2_skip(lzma_mf *mf, uint32_t amount) +{ + do { + header_skip(true, 2); + + hash_2_calc(); + + const uint32_t cur_match = mf->hash[hash_value]; + mf->hash[hash_value] = pos; + + bt_skip(); + + } while (--amount != 0); +} +#endif + + +#ifdef HAVE_MF_BT3 +extern uint32_t +lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(true, 3); + + hash_3_calc(); + + const uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 2; + + if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { + len_best = lzma_memcmplen( + cur, cur - delta2, len_best, len_limit); + + matches[0].len = len_best; + matches[0].dist = delta2 - 1; + matches_count = 1; + + if (len_best == len_limit) { + bt_skip(); + return 1; // matches_count + } + } + + bt_find(len_best); +} + + +extern void +lzma_mf_bt3_skip(lzma_mf *mf, uint32_t amount) +{ + do { + header_skip(true, 3); + + hash_3_calc(); + + const uint32_t cur_match + = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + bt_skip(); + + } while (--amount != 0); +} +#endif + + +#ifdef HAVE_MF_BT4 +extern uint32_t +lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(true, 4); + + hash_4_calc(); + + uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t delta3 + = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value]; + const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 1; + + if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { + len_best = 2; + matches[0].len = 2; + matches[0].dist = delta2 - 1; + matches_count = 1; + } + + if (delta2 != delta3 && delta3 < mf->cyclic_size + && *(cur - delta3) == *cur) { + len_best = 3; + matches[matches_count++].dist = delta3 - 1; + delta2 = delta3; + } + + if (matches_count != 0) { + len_best = lzma_memcmplen( + cur, cur - delta2, len_best, len_limit); + + matches[matches_count - 1].len = len_best; + + if (len_best == len_limit) { + bt_skip(); + return matches_count; + } + } + + if (len_best < 3) + len_best = 3; + + bt_find(len_best); +} + + +extern void +lzma_mf_bt4_skip(lzma_mf *mf, uint32_t amount) +{ + do { + header_skip(true, 4); + + hash_4_calc(); + + const uint32_t cur_match + = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + bt_skip(); + + } while (--amount != 0); +} +#endif diff --git a/src/native/external/xz/src/liblzma/lzma/Makefile.inc b/src/native/external/xz/src/liblzma/lzma/Makefile.inc new file mode 100644 index 00000000000000..dca6b764e16f2c --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/Makefile.inc @@ -0,0 +1,40 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +EXTRA_DIST += lzma/fastpos_tablegen.c + +liblzma_la_SOURCES += \ + lzma/lzma_common.h \ + lzma/lzma_encoder_presets.c + +if COND_ENCODER_LZMA1 +liblzma_la_SOURCES += \ + lzma/fastpos.h \ + lzma/lzma_encoder.h \ + lzma/lzma_encoder.c \ + lzma/lzma_encoder_private.h \ + lzma/lzma_encoder_optimum_fast.c \ + lzma/lzma_encoder_optimum_normal.c + +if !COND_SMALL +liblzma_la_SOURCES += lzma/fastpos_table.c +endif +endif + +if COND_DECODER_LZMA1 +liblzma_la_SOURCES += \ + lzma/lzma_decoder.c \ + lzma/lzma_decoder.h +endif + +if COND_ENCODER_LZMA2 +liblzma_la_SOURCES += \ + lzma/lzma2_encoder.c \ + lzma/lzma2_encoder.h +endif + +if COND_DECODER_LZMA2 +liblzma_la_SOURCES += \ + lzma/lzma2_decoder.c \ + lzma/lzma2_decoder.h +endif diff --git a/src/native/external/xz/src/liblzma/lzma/fastpos.h b/src/native/external/xz/src/liblzma/lzma/fastpos.h new file mode 100644 index 00000000000000..d3969a753fac26 --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/fastpos.h @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file fastpos.h +/// \brief Kind of two-bit version of bit scan reverse +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FASTPOS_H +#define LZMA_FASTPOS_H + +// LZMA encodes match distances by storing the highest two bits using +// a six-bit value [0, 63], and then the missing lower bits. +// Dictionary size is also stored using this encoding in the .xz +// file format header. +// +// fastpos.h provides a way to quickly find out the correct six-bit +// values. The following table gives some examples of this encoding: +// +// dist return +// 0 0 +// 1 1 +// 2 2 +// 3 3 +// 4 4 +// 5 4 +// 6 5 +// 7 5 +// 8 6 +// 11 6 +// 12 7 +// ... ... +// 15 7 +// 16 8 +// 17 8 +// ... ... +// 23 8 +// 24 9 +// 25 9 +// ... ... +// +// +// Provided functions or macros +// ---------------------------- +// +// get_dist_slot(dist) is the basic version. get_dist_slot_2(dist) +// assumes that dist >= FULL_DISTANCES, thus the result is at least +// FULL_DISTANCES_BITS * 2. Using get_dist_slot(dist) instead of +// get_dist_slot_2(dist) would give the same result, but get_dist_slot_2(dist) +// should be tiny bit faster due to the assumption being made. +// +// +// Size vs. speed +// -------------- +// +// With some CPUs that have fast BSR (bit scan reverse) instruction, the +// size optimized version is slightly faster than the bigger table based +// approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III +// and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that +// would still have speed roughly comparable to the table version. Older +// x86 CPUs like the original Pentium have very slow BSR; on those systems +// the table version is a lot faster. +// +// On some CPUs, the table version is a lot faster when using position +// dependent code, but with position independent code the size optimized +// version is slightly faster. This occurs at least on 32-bit SPARC (no +// ASM optimizations). +// +// I'm making the table version the default, because that has good speed +// on all systems I have tried. The size optimized version is sometimes +// slightly faster, but sometimes it is a lot slower. + +#ifdef HAVE_SMALL +# define get_dist_slot(dist) \ + ((dist) <= 4 ? (dist) : get_dist_slot_2(dist)) + +static inline uint32_t +get_dist_slot_2(uint32_t dist) +{ + const uint32_t i = bsr32(dist); + return (i + i) + ((dist >> (i - 1)) & 1); +} + + +#else + +#define FASTPOS_BITS 13 + +lzma_attr_visibility_hidden +extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS]; + + +#define fastpos_shift(extra, n) \ + ((extra) + (n) * (FASTPOS_BITS - 1)) + +#define fastpos_limit(extra, n) \ + (UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n))) + +#define fastpos_result(dist, extra, n) \ + (uint32_t)(lzma_fastpos[(dist) >> fastpos_shift(extra, n)]) \ + + 2 * fastpos_shift(extra, n) + + +static inline uint32_t +get_dist_slot(uint32_t dist) +{ + // If it is small enough, we can pick the result directly from + // the precalculated table. + if (dist < fastpos_limit(0, 0)) + return lzma_fastpos[dist]; + + if (dist < fastpos_limit(0, 1)) + return fastpos_result(dist, 0, 1); + + return fastpos_result(dist, 0, 2); +} + + +#ifdef FULL_DISTANCES_BITS +static inline uint32_t +get_dist_slot_2(uint32_t dist) +{ + assert(dist >= FULL_DISTANCES); + + if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0)) + return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0); + + if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1)) + return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1); + + return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2); +} +#endif + +#endif + +#endif diff --git a/src/native/external/xz/src/liblzma/lzma/fastpos_table.c b/src/native/external/xz/src/liblzma/lzma/fastpos_table.c new file mode 100644 index 00000000000000..4e10e3795e294d --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/fastpos_table.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: 0BSD + +// This file has been generated by fastpos_tablegen.c. + +#include "common.h" +#include "fastpos.h" + +const uint8_t lzma_fastpos[1 << FASTPOS_BITS] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25 +}; diff --git a/src/native/external/xz/src/liblzma/lzma/fastpos_tablegen.c b/src/native/external/xz/src/liblzma/lzma/fastpos_tablegen.c new file mode 100644 index 00000000000000..957ccb7a643627 --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/fastpos_tablegen.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file fastpos_tablegen.c +/// \brief Generates the lzma_fastpos[] lookup table +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include + +#define lzma_attr_visibility_hidden +#include "fastpos.h" + + +int +main(void) +{ + uint8_t fastpos[1 << FASTPOS_BITS]; + + const uint8_t fast_slots = 2 * FASTPOS_BITS; + uint32_t c = 2; + + fastpos[0] = 0; + fastpos[1] = 1; + + for (uint8_t slot_fast = 2; slot_fast < fast_slots; ++slot_fast) { + const uint32_t k = 1 << ((slot_fast >> 1) - 1); + for (uint32_t j = 0; j < k; ++j, ++c) + fastpos[c] = slot_fast; + } + + // Split the SPDX string so that it won't accidentally match + // when tools search for the string. + printf("// SPDX" "-License-Identifier" ": 0BSD\n\n" + "// This file has been generated by fastpos_tablegen.c.\n\n" + "#include \"common.h\"\n" + "#include \"fastpos.h\"\n\n" + "const uint8_t lzma_fastpos[1 << FASTPOS_BITS] = {"); + + for (size_t i = 0; i < (1 << FASTPOS_BITS); ++i) { + if (i % 16 == 0) + printf("\n\t"); + + printf("%3u", (unsigned int)(fastpos[i])); + + if (i != (1 << FASTPOS_BITS) - 1) + printf(","); + } + + printf("\n};\n"); + + return 0; +} diff --git a/src/native/external/xz/src/liblzma/lzma/lzma2_decoder.c b/src/native/external/xz/src/liblzma/lzma/lzma2_decoder.c new file mode 100644 index 00000000000000..37ab253f5b0a10 --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/lzma2_decoder.c @@ -0,0 +1,310 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_decoder.c +/// \brief LZMA2 decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma2_decoder.h" +#include "lz_decoder.h" +#include "lzma_decoder.h" + + +typedef struct { + enum sequence { + SEQ_CONTROL, + SEQ_UNCOMPRESSED_1, + SEQ_UNCOMPRESSED_2, + SEQ_COMPRESSED_0, + SEQ_COMPRESSED_1, + SEQ_PROPERTIES, + SEQ_LZMA, + SEQ_COPY, + } sequence; + + /// Sequence after the size fields have been decoded. + enum sequence next_sequence; + + /// LZMA decoder + lzma_lz_decoder lzma; + + /// Uncompressed size of LZMA chunk + size_t uncompressed_size; + + /// Compressed size of the chunk (naturally equals to uncompressed + /// size of uncompressed chunk) + size_t compressed_size; + + /// True if properties are needed. This is false before the + /// first LZMA chunk. + bool need_properties; + + /// True if dictionary reset is needed. This is false before the + /// first chunk (LZMA or uncompressed). + bool need_dictionary_reset; + + lzma_options_lzma options; +} lzma_lzma2_coder; + + +static lzma_ret +lzma2_decode(void *coder_ptr, lzma_dict *restrict dict, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size) +{ + lzma_lzma2_coder *restrict coder = coder_ptr; + + // With SEQ_LZMA it is possible that no new input is needed to do + // some progress. The rest of the sequences assume that there is + // at least one byte of input. + while (*in_pos < in_size || coder->sequence == SEQ_LZMA) + switch (coder->sequence) { + case SEQ_CONTROL: { + const uint32_t control = in[*in_pos]; + ++*in_pos; + + // End marker + if (control == 0x00) + return LZMA_STREAM_END; + + if (control >= 0xE0 || control == 1) { + // Dictionary reset implies that next LZMA chunk has + // to set new properties. + coder->need_properties = true; + coder->need_dictionary_reset = true; + } else if (coder->need_dictionary_reset) { + return LZMA_DATA_ERROR; + } + + if (control >= 0x80) { + // LZMA chunk. The highest five bits of the + // uncompressed size are taken from the control byte. + coder->uncompressed_size = (control & 0x1F) << 16; + coder->sequence = SEQ_UNCOMPRESSED_1; + + // See if there are new properties or if we need to + // reset the state. + if (control >= 0xC0) { + // When there are new properties, state reset + // is done at SEQ_PROPERTIES. + coder->need_properties = false; + coder->next_sequence = SEQ_PROPERTIES; + + } else if (coder->need_properties) { + return LZMA_DATA_ERROR; + + } else { + coder->next_sequence = SEQ_LZMA; + + // If only state reset is wanted with old + // properties, do the resetting here for + // simplicity. + if (control >= 0xA0) + coder->lzma.reset(coder->lzma.coder, + &coder->options); + } + } else { + // Invalid control values + if (control > 2) + return LZMA_DATA_ERROR; + + // It's uncompressed chunk + coder->sequence = SEQ_COMPRESSED_0; + coder->next_sequence = SEQ_COPY; + } + + if (coder->need_dictionary_reset) { + // Finish the dictionary reset and let the caller + // flush the dictionary to the actual output buffer. + coder->need_dictionary_reset = false; + dict_reset(dict); + return LZMA_OK; + } + + break; + } + + case SEQ_UNCOMPRESSED_1: + coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; + coder->sequence = SEQ_UNCOMPRESSED_2; + break; + + case SEQ_UNCOMPRESSED_2: + coder->uncompressed_size += in[(*in_pos)++] + 1U; + coder->sequence = SEQ_COMPRESSED_0; + coder->lzma.set_uncompressed(coder->lzma.coder, + coder->uncompressed_size, false); + break; + + case SEQ_COMPRESSED_0: + coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; + coder->sequence = SEQ_COMPRESSED_1; + break; + + case SEQ_COMPRESSED_1: + coder->compressed_size += in[(*in_pos)++] + 1U; + coder->sequence = coder->next_sequence; + break; + + case SEQ_PROPERTIES: + if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) + return LZMA_DATA_ERROR; + + coder->lzma.reset(coder->lzma.coder, &coder->options); + + coder->sequence = SEQ_LZMA; + break; + + case SEQ_LZMA: { + // Store the start offset so that we can update + // coder->compressed_size later. + const size_t in_start = *in_pos; + + // Decode from in[] to *dict. + const lzma_ret ret = coder->lzma.code(coder->lzma.coder, + dict, in, in_pos, in_size); + + // Validate and update coder->compressed_size. + const size_t in_used = *in_pos - in_start; + if (in_used > coder->compressed_size) + return LZMA_DATA_ERROR; + + coder->compressed_size -= in_used; + + // Return if we didn't finish the chunk, or an error occurred. + if (ret != LZMA_STREAM_END) + return ret; + + // The LZMA decoder must have consumed the whole chunk now. + // We don't need to worry about uncompressed size since it + // is checked by the LZMA decoder. + if (coder->compressed_size != 0) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_CONTROL; + break; + } + + case SEQ_COPY: { + // Copy from input to the dictionary as is. + dict_write(dict, in, in_pos, in_size, &coder->compressed_size); + if (coder->compressed_size != 0) + return LZMA_OK; + + coder->sequence = SEQ_CONTROL; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +lzma2_decoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_lzma2_coder *coder = coder_ptr; + + assert(coder->lzma.end == NULL); + lzma_free(coder->lzma.coder, allocator); + + lzma_free(coder, allocator); + + return; +} + + +static lzma_ret +lzma2_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator, + lzma_vli id lzma_attribute((__unused__)), const void *opt, + lzma_lz_options *lz_options) +{ + lzma_lzma2_coder *coder = lz->coder; + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_lzma2_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + lz->coder = coder; + lz->code = &lzma2_decode; + lz->end = &lzma2_decoder_end; + + coder->lzma = LZMA_LZ_DECODER_INIT; + } + + const lzma_options_lzma *options = opt; + + coder->sequence = SEQ_CONTROL; + coder->need_properties = true; + coder->need_dictionary_reset = options->preset_dict == NULL + || options->preset_dict_size == 0; + + return lzma_lzma_decoder_create(&coder->lzma, + allocator, options, lz_options); +} + + +extern lzma_ret +lzma_lzma2_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // LZMA2 can only be the last filter in the chain. This is enforced + // by the raw_decoder initialization. + assert(filters[1].init == NULL); + + return lzma_lz_decoder_init(next, allocator, filters, + &lzma2_decoder_init); +} + + +extern uint64_t +lzma_lzma2_decoder_memusage(const void *options) +{ + return sizeof(lzma_lzma2_coder) + + lzma_lzma_decoder_memusage_nocheck(options); +} + + +extern lzma_ret +lzma_lzma2_props_decode(void **options, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 1) + return LZMA_OPTIONS_ERROR; + + // Check that reserved bits are unset. + if (props[0] & 0xC0) + return LZMA_OPTIONS_ERROR; + + // Decode the dictionary size. + if (props[0] > 40) + return LZMA_OPTIONS_ERROR; + + lzma_options_lzma *opt = lzma_alloc( + sizeof(lzma_options_lzma), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + if (props[0] == 40) { + opt->dict_size = UINT32_MAX; + } else { + opt->dict_size = 2 | (props[0] & 1U); + opt->dict_size <<= props[0] / 2U + 11; + } + + opt->preset_dict = NULL; + opt->preset_dict_size = 0; + + *options = opt; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/lzma/lzma2_decoder.h b/src/native/external/xz/src/liblzma/lzma/lzma2_decoder.h new file mode 100644 index 00000000000000..cdd8b463abfdce --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/lzma2_decoder.h @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_decoder.h +/// \brief LZMA2 decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA2_DECODER_H +#define LZMA_LZMA2_DECODER_H + +#include "common.h" + +extern lzma_ret lzma_lzma2_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern uint64_t lzma_lzma2_decoder_memusage(const void *options); + +extern lzma_ret lzma_lzma2_props_decode( + void **options, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif diff --git a/src/native/external/xz/src/liblzma/lzma/lzma2_encoder.c b/src/native/external/xz/src/liblzma/lzma/lzma2_encoder.c new file mode 100644 index 00000000000000..957fa31ecf529c --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/lzma2_encoder.c @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_encoder.c +/// \brief LZMA2 encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder.h" +#include "lzma_encoder.h" +#include "fastpos.h" +#include "lzma2_encoder.h" + + +typedef struct { + enum { + SEQ_INIT, + SEQ_LZMA_ENCODE, + SEQ_LZMA_COPY, + SEQ_UNCOMPRESSED_HEADER, + SEQ_UNCOMPRESSED_COPY, + } sequence; + + /// LZMA encoder + void *lzma; + + /// LZMA options currently in use. + lzma_options_lzma opt_cur; + + bool need_properties; + bool need_state_reset; + bool need_dictionary_reset; + + /// Uncompressed size of a chunk + size_t uncompressed_size; + + /// Compressed size of a chunk (excluding headers); this is also used + /// to indicate the end of buf[] in SEQ_LZMA_COPY. + size_t compressed_size; + + /// Read position in buf[] + size_t buf_pos; + + /// Buffer to hold the chunk header and LZMA compressed data + uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX]; +} lzma_lzma2_coder; + + +static void +lzma2_header_lzma(lzma_lzma2_coder *coder) +{ + assert(coder->uncompressed_size > 0); + assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); + assert(coder->compressed_size > 0); + assert(coder->compressed_size <= LZMA2_CHUNK_MAX); + + size_t pos; + + if (coder->need_properties) { + pos = 0; + + if (coder->need_dictionary_reset) + coder->buf[pos] = 0x80 + (3 << 5); + else + coder->buf[pos] = 0x80 + (2 << 5); + } else { + pos = 1; + + if (coder->need_state_reset) + coder->buf[pos] = 0x80 + (1 << 5); + else + coder->buf[pos] = 0x80; + } + + // Set the start position for copying. + coder->buf_pos = pos; + + // Uncompressed size + size_t size = coder->uncompressed_size - 1; + coder->buf[pos++] += (uint8_t)(size >> 16); + coder->buf[pos++] = (uint8_t)((size >> 8) & 0xFF); + coder->buf[pos++] = (uint8_t)(size & 0xFF); + + // Compressed size + size = coder->compressed_size - 1; + coder->buf[pos++] = (uint8_t)(size >> 8); + coder->buf[pos++] = (uint8_t)(size & 0xFF); + + // Properties, if needed + if (coder->need_properties) + lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos); + + coder->need_properties = false; + coder->need_state_reset = false; + coder->need_dictionary_reset = false; + + // The copying code uses coder->compressed_size to indicate the end + // of coder->buf[], so we need add the maximum size of the header here. + coder->compressed_size += LZMA2_HEADER_MAX; + + return; +} + + +static void +lzma2_header_uncompressed(lzma_lzma2_coder *coder) +{ + assert(coder->uncompressed_size > 0); + assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX); + + // If this is the first chunk, we need to include dictionary + // reset indicator. + if (coder->need_dictionary_reset) + coder->buf[0] = 1; + else + coder->buf[0] = 2; + + coder->need_dictionary_reset = false; + + // "Compressed" size + coder->buf[1] = (uint8_t)((coder->uncompressed_size - 1) >> 8); + coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF; + + // Set the start position for copying. + coder->buf_pos = 0; + return; +} + + +static lzma_ret +lzma2_encode(void *coder_ptr, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + lzma_lzma2_coder *restrict coder = coder_ptr; + + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_INIT: + // If there's no input left and we are flushing or finishing, + // don't start a new chunk. + if (mf_unencoded(mf) == 0) { + // Write end of payload marker if finishing. + if (mf->action == LZMA_FINISH) + out[(*out_pos)++] = 0; + + return mf->action == LZMA_RUN + ? LZMA_OK : LZMA_STREAM_END; + } + + if (coder->need_state_reset) + return_if_error(lzma_lzma_encoder_reset( + coder->lzma, &coder->opt_cur)); + + coder->uncompressed_size = 0; + coder->compressed_size = 0; + coder->sequence = SEQ_LZMA_ENCODE; + FALLTHROUGH; + + case SEQ_LZMA_ENCODE: { + // Calculate how much more uncompressed data this chunk + // could accept. + const uint32_t left = (uint32_t)(LZMA2_UNCOMPRESSED_MAX + - coder->uncompressed_size); + uint32_t limit; + + if (left < mf->match_len_max) { + // Must flush immediately since the next LZMA symbol + // could make the uncompressed size of the chunk too + // big. + limit = 0; + } else { + // Calculate maximum read_limit that is OK from point + // of view of LZMA2 chunk size. + limit = mf->read_pos - mf->read_ahead + + left - mf->match_len_max; + } + + // Save the start position so that we can update + // coder->uncompressed_size. + const uint32_t read_start = mf->read_pos - mf->read_ahead; + + // Call the LZMA encoder until the chunk is finished. + const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf, + coder->buf + LZMA2_HEADER_MAX, + &coder->compressed_size, + LZMA2_CHUNK_MAX, limit); + + coder->uncompressed_size += mf->read_pos - mf->read_ahead + - read_start; + + assert(coder->compressed_size <= LZMA2_CHUNK_MAX); + assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); + + if (ret != LZMA_STREAM_END) + return LZMA_OK; + + // See if the chunk compressed. If it didn't, we encode it + // as uncompressed chunk. This saves a few bytes of space + // and makes decoding faster. + if (coder->compressed_size >= coder->uncompressed_size) { + coder->uncompressed_size += mf->read_ahead; + assert(coder->uncompressed_size + <= LZMA2_UNCOMPRESSED_MAX); + mf->read_ahead = 0; + lzma2_header_uncompressed(coder); + coder->need_state_reset = true; + coder->sequence = SEQ_UNCOMPRESSED_HEADER; + break; + } + + // The chunk did compress at least by one byte, so we store + // the chunk as LZMA. + lzma2_header_lzma(coder); + + coder->sequence = SEQ_LZMA_COPY; + FALLTHROUGH; + } + + case SEQ_LZMA_COPY: + // Copy the compressed chunk along its headers to the + // output buffer. + lzma_bufcpy(coder->buf, &coder->buf_pos, + coder->compressed_size, + out, out_pos, out_size); + if (coder->buf_pos != coder->compressed_size) + return LZMA_OK; + + coder->sequence = SEQ_INIT; + break; + + case SEQ_UNCOMPRESSED_HEADER: + // Copy the three-byte header to indicate uncompressed chunk. + lzma_bufcpy(coder->buf, &coder->buf_pos, + LZMA2_HEADER_UNCOMPRESSED, + out, out_pos, out_size); + if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED) + return LZMA_OK; + + coder->sequence = SEQ_UNCOMPRESSED_COPY; + FALLTHROUGH; + + case SEQ_UNCOMPRESSED_COPY: + // Copy the uncompressed data as is from the dictionary + // to the output buffer. + mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size); + if (coder->uncompressed_size != 0) + return LZMA_OK; + + coder->sequence = SEQ_INIT; + break; + } + + return LZMA_OK; +} + + +static void +lzma2_encoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_lzma2_coder *coder = coder_ptr; + lzma_free(coder->lzma, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +lzma2_encoder_options_update(void *coder_ptr, const lzma_filter *filter) +{ + lzma_lzma2_coder *coder = coder_ptr; + + // New options can be set only when there is no incomplete chunk. + // This is the case at the beginning of the raw stream and right + // after LZMA_SYNC_FLUSH. + if (filter->options == NULL || coder->sequence != SEQ_INIT) + return LZMA_PROG_ERROR; + + // Look if there are new options. At least for now, + // only lc/lp/pb can be changed. + const lzma_options_lzma *opt = filter->options; + if (coder->opt_cur.lc != opt->lc || coder->opt_cur.lp != opt->lp + || coder->opt_cur.pb != opt->pb) { + // Validate the options. + if (opt->lc > LZMA_LCLP_MAX || opt->lp > LZMA_LCLP_MAX + || opt->lc + opt->lp > LZMA_LCLP_MAX + || opt->pb > LZMA_PB_MAX) + return LZMA_OPTIONS_ERROR; + + // The new options will be used when the encoder starts + // a new LZMA2 chunk. + coder->opt_cur.lc = opt->lc; + coder->opt_cur.lp = opt->lp; + coder->opt_cur.pb = opt->pb; + coder->need_properties = true; + coder->need_state_reset = true; + } + + return LZMA_OK; +} + + +static lzma_ret +lzma2_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator, + lzma_vli id lzma_attribute((__unused__)), const void *options, + lzma_lz_options *lz_options) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + lzma_lzma2_coder *coder = lz->coder; + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_lzma2_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + lz->coder = coder; + lz->code = &lzma2_encode; + lz->end = &lzma2_encoder_end; + lz->options_update = &lzma2_encoder_options_update; + + coder->lzma = NULL; + } + + coder->opt_cur = *(const lzma_options_lzma *)(options); + + coder->sequence = SEQ_INIT; + coder->need_properties = true; + coder->need_state_reset = false; + coder->need_dictionary_reset + = coder->opt_cur.preset_dict == NULL + || coder->opt_cur.preset_dict_size == 0; + + // Initialize LZMA encoder + return_if_error(lzma_lzma_encoder_create(&coder->lzma, allocator, + LZMA_FILTER_LZMA2, &coder->opt_cur, lz_options)); + + // Make sure that we will always have enough history available in + // case we need to use uncompressed chunks. They are used when the + // compressed size of a chunk is not smaller than the uncompressed + // size, so we need to have at least LZMA2_COMPRESSED_MAX bytes + // history available. + if (lz_options->before_size + lz_options->dict_size < LZMA2_CHUNK_MAX) + lz_options->before_size + = LZMA2_CHUNK_MAX - lz_options->dict_size; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma2_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return lzma_lz_encoder_init( + next, allocator, filters, &lzma2_encoder_init); +} + + +extern uint64_t +lzma_lzma2_encoder_memusage(const void *options) +{ + const uint64_t lzma_mem = lzma_lzma_encoder_memusage(options); + if (lzma_mem == UINT64_MAX) + return UINT64_MAX; + + return sizeof(lzma_lzma2_coder) + lzma_mem; +} + + +extern lzma_ret +lzma_lzma2_props_encode(const void *options, uint8_t *out) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + const lzma_options_lzma *const opt = options; + uint32_t d = my_max(opt->dict_size, LZMA_DICT_SIZE_MIN); + + // Round up to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending + // on which one is the next: + --d; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + + // Get the highest two bits using the proper encoding: + if (d == UINT32_MAX) + out[0] = 40; + else + out[0] = (uint8_t)(get_dist_slot(d + 1) - 24); + + return LZMA_OK; +} + + +extern uint64_t +lzma_lzma2_block_size(const void *options) +{ + const lzma_options_lzma *const opt = options; + + if (!IS_ENC_DICT_SIZE_VALID(opt->dict_size)) + return UINT64_MAX; + + // Use at least 1 MiB to keep compression ratio better. + return my_max((uint64_t)(opt->dict_size) * 3, UINT64_C(1) << 20); +} diff --git a/src/native/external/xz/src/liblzma/lzma/lzma2_encoder.h b/src/native/external/xz/src/liblzma/lzma/lzma2_encoder.h new file mode 100644 index 00000000000000..29966a66d23740 --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/lzma2_encoder.h @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_encoder.h +/// \brief LZMA2 encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA2_ENCODER_H +#define LZMA_LZMA2_ENCODER_H + +#include "common.h" + + +/// Maximum number of bytes of actual data per chunk (no headers) +#define LZMA2_CHUNK_MAX (UINT32_C(1) << 16) + +/// Maximum uncompressed size of LZMA chunk (no headers) +#define LZMA2_UNCOMPRESSED_MAX (UINT32_C(1) << 21) + +/// Maximum size of LZMA2 headers +#define LZMA2_HEADER_MAX 6 + +/// Size of a header for uncompressed chunk +#define LZMA2_HEADER_UNCOMPRESSED 3 + + +extern lzma_ret lzma_lzma2_encoder_init( + lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern uint64_t lzma_lzma2_encoder_memusage(const void *options); + +extern lzma_ret lzma_lzma2_props_encode(const void *options, uint8_t *out); + +extern uint64_t lzma_lzma2_block_size(const void *options); + +#endif diff --git a/src/native/external/xz/src/liblzma/lzma/lzma_common.h b/src/native/external/xz/src/liblzma/lzma/lzma_common.h new file mode 100644 index 00000000000000..49d9516bd1930a --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/lzma_common.h @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_common.h +/// \brief Private definitions common to LZMA encoder and decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_COMMON_H +#define LZMA_LZMA_COMMON_H + +#include "common.h" +#include "range_common.h" + + +/////////////////// +// Miscellaneous // +/////////////////// + +/// Maximum number of position states. A position state is the lowest pos bits +/// number of bits of the current uncompressed offset. In some places there +/// are different sets of probabilities for different pos states. +#define POS_STATES_MAX (1 << LZMA_PB_MAX) + + +/// Validates lc, lp, and pb. +static inline bool +is_lclppb_valid(const lzma_options_lzma *options) +{ + return options->lc <= LZMA_LCLP_MAX && options->lp <= LZMA_LCLP_MAX + && options->lc + options->lp <= LZMA_LCLP_MAX + && options->pb <= LZMA_PB_MAX; +} + + +/////////// +// State // +/////////// + +/// This enum is used to track which events have occurred most recently and +/// in which order. This information is used to predict the next event. +/// +/// Events: +/// - Literal: One 8-bit byte +/// - Match: Repeat a chunk of data at some distance +/// - Long repeat: Multi-byte match at a recently seen distance +/// - Short repeat: One-byte repeat at a recently seen distance +/// +/// The event names are in from STATE_oldest_older_previous. REP means +/// either short or long repeated match, and NONLIT means any non-literal. +typedef enum { + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_LIT_MATCH, + STATE_LIT_LONGREP, + STATE_LIT_SHORTREP, + STATE_NONLIT_MATCH, + STATE_NONLIT_REP, +} lzma_lzma_state; + + +/// Total number of states +#define STATES 12 + +/// The lowest 7 states indicate that the previous state was a literal. +#define LIT_STATES 7 + + +/// Indicate that the latest state was a literal. +#define update_literal(state) \ + state = ((state) <= STATE_SHORTREP_LIT_LIT \ + ? STATE_LIT_LIT \ + : ((state) <= STATE_LIT_SHORTREP \ + ? (state) - 3 \ + : (state) - 6)) + +/// Like update_literal(state) but when it is already known that +/// is_literal_state(state) is true. +#define update_literal_normal(state) \ + state = ((state) <= STATE_SHORTREP_LIT_LIT \ + ? STATE_LIT_LIT \ + : (state) - 3) + +/// Like update_literal(state) but when it is already known that +/// is_literal_state(state) is false. +#define update_literal_matched(state) \ + state = ((state) <= STATE_LIT_SHORTREP \ + ? (state) - 3 \ + : (state) - 6) + +/// Indicate that the latest state was a match. +#define update_match(state) \ + state = ((state) < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH) + +/// Indicate that the latest state was a long repeated match. +#define update_long_rep(state) \ + state = ((state) < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP) + +/// Indicate that the latest state was a short match. +#define update_short_rep(state) \ + state = ((state) < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP) + +/// Test if the previous state was a literal. +#define is_literal_state(state) \ + ((state) < LIT_STATES) + + +///////////// +// Literal // +///////////// + +/// Each literal coder is divided in three sections: +/// - 0x001-0x0FF: Without match byte +/// - 0x101-0x1FF: With match byte; match bit is 0 +/// - 0x201-0x2FF: With match byte; match bit is 1 +/// +/// Match byte is used when the previous LZMA symbol was something else than +/// a literal (that is, it was some kind of match). +#define LITERAL_CODER_SIZE UINT32_C(0x300) + +/// Maximum number of literal coders +#define LITERAL_CODERS_MAX (1 << LZMA_LCLP_MAX) + +/// Calculates the literal_mask that literal_subcoder() needs. +#define literal_mask_calc(lc, lp) \ + ((UINT32_C(0x100) << (lp)) - (UINT32_C(0x100) >> (lc))) + +/// Locate the literal coder for the next literal byte. The choice depends on +/// - the lowest literal_pos_bits bits of the position of the current +/// byte; and +/// - the highest literal_context_bits bits of the previous byte. +#define literal_subcoder(probs, lc, literal_mask, pos, prev_byte) \ + ((probs) + UINT32_C(3) * \ + (((((pos) << 8) + (prev_byte)) & (literal_mask)) << (lc))) + + +static inline void +literal_init(probability *probs, uint32_t lc, uint32_t lp) +{ + assert(lc + lp <= LZMA_LCLP_MAX); + + const size_t coders = LITERAL_CODER_SIZE << (lc + lp); + + for (size_t i = 0; i < coders; ++i) + bit_reset(probs[i]); + + return; +} + + +////////////////// +// Match length // +////////////////// + +// Minimum length of a match is two bytes. +#define MATCH_LEN_MIN 2 + +// Match length is encoded with 4, 5, or 10 bits. +// +// Length Bits +// 2-9 4 = Choice=0 + 3 bits +// 10-17 5 = Choice=1 + Choice2=0 + 3 bits +// 18-273 10 = Choice=1 + Choice2=1 + 8 bits +#define LEN_LOW_BITS 3 +#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) +#define LEN_MID_BITS 3 +#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) +#define LEN_HIGH_BITS 8 +#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) +#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) + +// Maximum length of a match is 273 which is a result of the encoding +// described above. +#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1) + + +//////////////////// +// Match distance // +//////////////////// + +// Different sets of probabilities are used for match distances that have very +// short match length: Lengths of 2, 3, and 4 bytes have a separate set of +// probabilities for each length. The matches with longer length use a shared +// set of probabilities. +#define DIST_STATES 4 + +// Macro to get the index of the appropriate probability array. +#define get_dist_state(len) \ + ((len) < DIST_STATES + MATCH_LEN_MIN \ + ? (len) - MATCH_LEN_MIN \ + : DIST_STATES - 1) + +// The highest two bits of a match distance (distance slot) are encoded +// using six bits. See fastpos.h for more explanation. +#define DIST_SLOT_BITS 6 +#define DIST_SLOTS (1 << DIST_SLOT_BITS) + +// Match distances up to 127 are fully encoded using probabilities. Since +// the highest two bits (distance slot) are always encoded using six bits, +// the distances 0-3 don't need any additional bits to encode, since the +// distance slot itself is the same as the actual distance. DIST_MODEL_START +// indicates the first distance slot where at least one additional bit is +// needed. +#define DIST_MODEL_START 4 + +// Match distances greater than 127 are encoded in three pieces: +// - distance slot: the highest two bits +// - direct bits: 2-26 bits below the highest two bits +// - alignment bits: four lowest bits +// +// Direct bits don't use any probabilities. +// +// The distance slot value of 14 is for distances 128-191 (see the table in +// fastpos.h to understand why). +#define DIST_MODEL_END 14 + +// Distance slots that indicate a distance <= 127. +#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2) +#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) + +// For match distances greater than 127, only the highest two bits and the +// lowest four bits (alignment) is encoded using probabilities. +#define ALIGN_BITS 4 +#define ALIGN_SIZE (1 << ALIGN_BITS) +#define ALIGN_MASK (ALIGN_SIZE - 1) + +// LZMA remembers the four most recent match distances. Reusing these distances +// tends to take less space than re-encoding the actual distance value. +#define REPS 4 + +#endif diff --git a/src/native/external/xz/src/liblzma/lzma/lzma_decoder.c b/src/native/external/xz/src/liblzma/lzma/lzma_decoder.c new file mode 100644 index 00000000000000..627d2ca5cd726c --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/lzma_decoder.c @@ -0,0 +1,1263 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_decoder.c +/// \brief LZMA decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// Jia Tan +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_decoder.h" +#include "lzma_common.h" +#include "lzma_decoder.h" +#include "range_decoder.h" + +// The macros unroll loops with switch statements. +// Silence warnings about missing fall-through comments. +#if TUKLIB_GNUC_REQ(7, 0) || defined(__clang__) +# pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#endif + +// Minimum number of input bytes to safely decode one LZMA symbol. +// The worst case is that we decode 22 bits using probabilities and 26 +// direct bits. This may decode at maximum 20 bytes of input. +#define LZMA_IN_REQUIRED 20 + + +// Macros for (somewhat) size-optimized code. +// This is used to decode the match length (how many bytes must be repeated +// from the dictionary). This version is used in the Resumable mode and +// does not unroll any loops. +#define len_decode(target, ld, pos_state, seq) \ +do { \ +case seq ## _CHOICE: \ + rc_if_0_safe(ld.choice, seq ## _CHOICE) { \ + rc_update_0(ld.choice); \ + probs = ld.low[pos_state];\ + limit = LEN_LOW_SYMBOLS; \ + target = MATCH_LEN_MIN; \ + } else { \ + rc_update_1(ld.choice); \ +case seq ## _CHOICE2: \ + rc_if_0_safe(ld.choice2, seq ## _CHOICE2) { \ + rc_update_0(ld.choice2); \ + probs = ld.mid[pos_state]; \ + limit = LEN_MID_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ + } else { \ + rc_update_1(ld.choice2); \ + probs = ld.high; \ + limit = LEN_HIGH_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \ + + LEN_MID_SYMBOLS; \ + } \ + } \ + symbol = 1; \ +case seq ## _BITTREE: \ + do { \ + rc_bit_safe(probs[symbol], , , seq ## _BITTREE); \ + } while (symbol < limit); \ + target += symbol - limit; \ +} while (0) + + +// This is the faster version of the match length decoder that does not +// worry about being resumable. It unrolls the bittree decoding loop. +#define len_decode_fast(target, ld, pos_state) \ +do { \ + symbol = 1; \ + rc_if_0(ld.choice) { \ + rc_update_0(ld.choice); \ + rc_bittree3(ld.low[pos_state], \ + -LEN_LOW_SYMBOLS + MATCH_LEN_MIN); \ + target = symbol; \ + } else { \ + rc_update_1(ld.choice); \ + rc_if_0(ld.choice2) { \ + rc_update_0(ld.choice2); \ + rc_bittree3(ld.mid[pos_state], -LEN_MID_SYMBOLS \ + + MATCH_LEN_MIN + LEN_LOW_SYMBOLS); \ + target = symbol; \ + } else { \ + rc_update_1(ld.choice2); \ + rc_bittree8(ld.high, -LEN_HIGH_SYMBOLS \ + + MATCH_LEN_MIN \ + + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS); \ + target = symbol; \ + } \ + } \ +} while (0) + + +/// Length decoder probabilities; see comments in lzma_common.h. +typedef struct { + probability choice; + probability choice2; + probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; + probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; + probability high[LEN_HIGH_SYMBOLS]; +} lzma_length_decoder; + + +typedef struct { + /////////////////// + // Probabilities // + /////////////////// + + /// Literals; see comments in lzma_common.h. + probability literal[LITERAL_CODERS_MAX * LITERAL_CODER_SIZE]; + + /// If 1, it's a match. Otherwise it's a single 8-bit literal. + probability is_match[STATES][POS_STATES_MAX]; + + /// If 1, it's a repeated match. The distance is one of rep0 .. rep3. + probability is_rep[STATES]; + + /// If 0, distance of a repeated match is rep0. + /// Otherwise check is_rep1. + probability is_rep0[STATES]; + + /// If 0, distance of a repeated match is rep1. + /// Otherwise check is_rep2. + probability is_rep1[STATES]; + + /// If 0, distance of a repeated match is rep2. Otherwise it is rep3. + probability is_rep2[STATES]; + + /// If 1, the repeated match has length of one byte. Otherwise + /// the length is decoded from rep_len_decoder. + probability is_rep0_long[STATES][POS_STATES_MAX]; + + /// Probability tree for the highest two bits of the match distance. + /// There is a separate probability tree for match lengths of + /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. + probability dist_slot[DIST_STATES][DIST_SLOTS]; + + /// Probability trees for additional bits for match distance when the + /// distance is in the range [4, 127]. + probability pos_special[FULL_DISTANCES - DIST_MODEL_END]; + + /// Probability tree for the lowest four bits of a match distance + /// that is equal to or greater than 128. + probability pos_align[ALIGN_SIZE]; + + /// Length of a normal match + lzma_length_decoder match_len_decoder; + + /// Length of a repeated match + lzma_length_decoder rep_len_decoder; + + /////////////////// + // Decoder state // + /////////////////// + + // Range coder + lzma_range_decoder rc; + + // Types of the most recently seen LZMA symbols + lzma_lzma_state state; + + uint32_t rep0; ///< Distance of the latest match + uint32_t rep1; ///< Distance of second latest match + uint32_t rep2; ///< Distance of third latest match + uint32_t rep3; ///< Distance of fourth latest match + + uint32_t pos_mask; // (1U << pb) - 1 + uint32_t literal_context_bits; + uint32_t literal_mask; + + /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of + /// payload marker is expected. + lzma_vli uncompressed_size; + + /// True if end of payload marker (EOPM) is allowed even when + /// uncompressed_size is known; false if EOPM must not be present. + /// This is ignored if uncompressed_size == LZMA_VLI_UNKNOWN. + bool allow_eopm; + + //////////////////////////////// + // State of incomplete symbol // + //////////////////////////////// + + /// Position where to continue the decoder loop + enum { + SEQ_NORMALIZE, + SEQ_IS_MATCH, + SEQ_LITERAL, + SEQ_LITERAL_MATCHED, + SEQ_LITERAL_WRITE, + SEQ_IS_REP, + SEQ_MATCH_LEN_CHOICE, + SEQ_MATCH_LEN_CHOICE2, + SEQ_MATCH_LEN_BITTREE, + SEQ_DIST_SLOT, + SEQ_DIST_MODEL, + SEQ_DIRECT, + SEQ_ALIGN, + SEQ_EOPM, + SEQ_IS_REP0, + SEQ_SHORTREP, + SEQ_IS_REP0_LONG, + SEQ_IS_REP1, + SEQ_IS_REP2, + SEQ_REP_LEN_CHOICE, + SEQ_REP_LEN_CHOICE2, + SEQ_REP_LEN_BITTREE, + SEQ_COPY, + } sequence; + + /// Base of the current probability tree + probability *probs; + + /// Symbol being decoded. This is also used as an index variable in + /// bittree decoders: probs[symbol] + uint32_t symbol; + + /// Used as a loop termination condition on bittree decoders and + /// direct bits decoder. + uint32_t limit; + + /// Matched literal decoder: 0x100 or 0 to help avoiding branches. + /// Bittree reverse decoders: Offset of the next bit: 1 << offset + uint32_t offset; + + /// If decoding a literal: match byte. + /// If decoding a match: length of the match. + uint32_t len; +} lzma_lzma1_decoder; + + +static lzma_ret +lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, + const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) +{ + lzma_lzma1_decoder *restrict coder = coder_ptr; + + //////////////////// + // Initialization // + //////////////////// + + { + const lzma_ret ret = rc_read_init( + &coder->rc, in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + return ret; + } + + /////////////// + // Variables // + /////////////// + + // Making local copies of often-used variables improves both + // speed and readability. + + lzma_dict dict = *dictptr; + + const size_t dict_start = dict.pos; + + // Range decoder + rc_to_local(coder->rc, *in_pos, LZMA_IN_REQUIRED); + + // State + uint32_t state = coder->state; + uint32_t rep0 = coder->rep0; + uint32_t rep1 = coder->rep1; + uint32_t rep2 = coder->rep2; + uint32_t rep3 = coder->rep3; + + const uint32_t pos_mask = coder->pos_mask; + + // These variables are actually needed only if we last time ran + // out of input in the middle of the decoder loop. + probability *probs = coder->probs; + uint32_t symbol = coder->symbol; + uint32_t limit = coder->limit; + uint32_t offset = coder->offset; + uint32_t len = coder->len; + + const uint32_t literal_mask = coder->literal_mask; + const uint32_t literal_context_bits = coder->literal_context_bits; + + // Temporary variables + uint32_t pos_state = dict.pos & pos_mask; + + lzma_ret ret = LZMA_OK; + + // This is true when the next LZMA symbol is allowed to be EOPM. + // That is, if this is false, then EOPM is considered + // an invalid symbol and we will return LZMA_DATA_ERROR. + // + // EOPM is always required (not just allowed) when + // the uncompressed size isn't known. When uncompressed size + // is known, eopm_is_valid may be set to true later. + bool eopm_is_valid = coder->uncompressed_size == LZMA_VLI_UNKNOWN; + + // If uncompressed size is known and there is enough output space + // to decode all the data, limit the available buffer space so that + // the main loop won't try to decode past the end of the stream. + bool might_finish_without_eopm = false; + if (coder->uncompressed_size != LZMA_VLI_UNKNOWN + && coder->uncompressed_size <= dict.limit - dict.pos) { + dict.limit = dict.pos + (size_t)(coder->uncompressed_size); + might_finish_without_eopm = true; + } + + // The main decoder loop. The "switch" is used to resume the decoder at + // correct location. Once resumed, the "switch" is no longer used. + // The decoder loops is split into two modes: + // + // 1 - Non-resumable mode (fast). This is used when it is guaranteed + // there is enough input to decode the next symbol. If the output + // limit is reached, then the decoder loop will save the place + // for the resumable mode to continue. This mode is not used if + // HAVE_SMALL is defined. This is faster than Resumable mode + // because it reduces the number of branches needed and allows + // for more compiler optimizations. + // + // 2 - Resumable mode (slow). This is used when a previous decoder + // loop did not have enough space in the input or output buffers + // to complete. It uses sequence enum values to set remind + // coder->sequence where to resume in the decoder loop. This + // is the only mode used when HAVE_SMALL is defined. + + switch (coder->sequence) + while (true) { + // Calculate new pos_state. This is skipped on the first loop + // since we already calculated it when setting up the local + // variables. + pos_state = dict.pos & pos_mask; + +#ifndef HAVE_SMALL + + /////////////////////////////// + // Non-resumable Mode (fast) // + /////////////////////////////// + + // Go to Resumable mode (1) if there is not enough input to + // safely decode any possible LZMA symbol or (2) if the + // dictionary is full, which may need special checks that + // are only done in the Resumable mode. + if (unlikely(!rc_is_fast_allowed() + || dict.pos == dict.limit)) + goto slow; + + // Decode the first bit from the next LZMA symbol. + // If the bit is a 0, then we handle it as a literal. + // If the bit is a 1, then it is a match of previously + // decoded data. + rc_if_0(coder->is_match[state][pos_state]) { + ///////////////////// + // Decode literal. // + ///////////////////// + + // Update the RC that we have decoded a 0. + rc_update_0(coder->is_match[state][pos_state]); + + // Get the correct probability array from lp and + // lc params. + probs = literal_subcoder(coder->literal, + literal_context_bits, literal_mask, + dict.pos, dict_get0(&dict)); + + if (is_literal_state(state)) { + update_literal_normal(state); + + // Decode literal without match byte. + rc_bittree8(probs, 0); + } else { + update_literal_matched(state); + + // Decode literal with match byte. + rc_matched_literal(probs, + dict_get(&dict, rep0)); + } + + // Write decoded literal to dictionary + dict_put(&dict, (uint8_t)symbol); + continue; + } + + /////////////////// + // Decode match. // + /////////////////// + + // Instead of a new byte we are going to decode a + // distance-length pair. The distance represents how far + // back in the dictionary to begin copying. The length + // represents how many bytes to copy. + + rc_update_1(coder->is_match[state][pos_state]); + + rc_if_0(coder->is_rep[state]) { + /////////////////// + // Simple match. // + /////////////////// + + // Not a repeated match. In this case, + // the length (how many bytes to copy) must be + // decoded first. Then, the distance (where to + // start copying) is decoded. + // + // This is also how we know when we are done + // decoding. If the distance decodes to UINT32_MAX, + // then we know to stop decoding (end of payload + // marker). + + rc_update_0(coder->is_rep[state]); + update_match(state); + + // The latest three match distances are kept in + // memory in case there are repeated matches. + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + + // Decode the length of the match. + len_decode_fast(len, coder->match_len_decoder, + pos_state); + + // Next, decode the distance into rep0. + + // The next 6 bits determine how to decode the + // rest of the distance. + probs = coder->dist_slot[get_dist_state(len)]; + + rc_bittree6(probs, -DIST_SLOTS); + assert(symbol <= 63); + + if (symbol < DIST_MODEL_START) { + // If the decoded symbol is < DIST_MODEL_START + // then we use its value directly as the + // match distance. No other bits are needed. + // The only possible distance values + // are [0, 3]. + rep0 = symbol; + } else { + // Use the first two bits of symbol as the + // highest bits of the match distance. + + // "limit" represents the number of low bits + // to decode. + limit = (symbol >> 1) - 1; + assert(limit >= 1 && limit <= 30); + rep0 = 2 + (symbol & 1); + + if (symbol < DIST_MODEL_END) { + // When symbol is > DIST_MODEL_START, + // but symbol < DIST_MODEL_END, then + // it can decode distances between + // [4, 127]. + assert(limit <= 5); + rep0 <<= limit; + assert(rep0 <= 96); + + // -1 is fine, because we start + // decoding at probs[1], not probs[0]. + // NOTE: This violates the C standard, + // since we are doing pointer + // arithmetic past the beginning of + // the array. + assert((int32_t)(rep0 - symbol - 1) + >= -1); + assert((int32_t)(rep0 - symbol - 1) + <= 82); + probs = coder->pos_special + rep0 + - symbol - 1; + symbol = 1; + offset = 1; + + // Variable number (1-5) of bits + // from a reverse bittree. This + // isn't worth manual unrolling. + // + // NOTE: Making one or many of the + // variables (probs, symbol, offset, + // or limit) local here (instead of + // using those declared outside the + // main loop) can affect code size + // and performance which isn't a + // surprise but it's not so clear + // what is the best. + do { + rc_bit_add_if_1(probs, + rep0, offset); + offset <<= 1; + } while (--limit > 0); + } else { + // The distance is >= 128. Decode the + // lower bits without probabilities + // except the lowest four bits. + assert(symbol >= 14); + assert(limit >= 6); + + limit -= ALIGN_BITS; + assert(limit >= 2); + + rc_direct(rep0, limit); + + // Decode the lowest four bits using + // probabilities. + rep0 <<= ALIGN_BITS; + rc_bittree_rev4(coder->pos_align); + rep0 += symbol; + + // If the end of payload marker (EOPM) + // is detected, jump to the safe code. + // The EOPM handling isn't speed + // critical at all. + // + // A final normalization is needed + // after the EOPM (there can be a + // dummy byte to read in some cases). + // If the normalization was done here + // in the fast code, it would need to + // be taken into account in the value + // of LZMA_IN_REQUIRED. Using the + // safe code allows keeping + // LZMA_IN_REQUIRED as 20 instead of + // 21. + if (rep0 == UINT32_MAX) + goto eopm; + } + } + + // Validate the distance we just decoded. + if (unlikely(!dict_is_distance_valid(&dict, rep0))) { + ret = LZMA_DATA_ERROR; + goto out; + } + + } else { + rc_update_1(coder->is_rep[state]); + + ///////////////////// + // Repeated match. // + ///////////////////// + + // The match distance is a value that we have decoded + // recently. The latest four match distances are + // available as rep0, rep1, rep2 and rep3. We will + // now decode which of them is the new distance. + // + // There cannot be a match if we haven't produced + // any output, so check that first. + if (unlikely(!dict_is_distance_valid(&dict, 0))) { + ret = LZMA_DATA_ERROR; + goto out; + } + + rc_if_0(coder->is_rep0[state]) { + rc_update_0(coder->is_rep0[state]); + // The distance is rep0. + + // Decode the next bit to determine if 1 byte + // should be copied from rep0 distance or + // if the number of bytes needs to be decoded. + + // If the next bit is 0, then it is a + // "Short Rep Match" and only 1 bit is copied. + // Otherwise, the length of the match is + // decoded after the "else" statement. + rc_if_0(coder->is_rep0_long[state][pos_state]) { + rc_update_0(coder->is_rep0_long[ + state][pos_state]); + + update_short_rep(state); + dict_put(&dict, dict_get(&dict, rep0)); + continue; + } + + // Repeating more than one byte at + // distance of rep0. + rc_update_1(coder->is_rep0_long[ + state][pos_state]); + + } else { + rc_update_1(coder->is_rep0[state]); + + // The distance is rep1, rep2 or rep3. Once + // we find out which one of these three, it + // is stored to rep0 and rep1, rep2 and rep3 + // are updated accordingly. There is no + // "Short Rep Match" option, so the length + // of the match must always be decoded next. + rc_if_0(coder->is_rep1[state]) { + // The distance is rep1. + rc_update_0(coder->is_rep1[state]); + + const uint32_t distance = rep1; + rep1 = rep0; + rep0 = distance; + + } else { + rc_update_1(coder->is_rep1[state]); + + rc_if_0(coder->is_rep2[state]) { + // The distance is rep2. + rc_update_0(coder->is_rep2[ + state]); + + const uint32_t distance = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; + + } else { + // The distance is rep3. + rc_update_1(coder->is_rep2[ + state]); + + const uint32_t distance = rep3; + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; + } + } + } + + update_long_rep(state); + + // Decode the length of the repeated match. + len_decode_fast(len, coder->rep_len_decoder, + pos_state); + } + + ///////////////////////////////// + // Repeat from history buffer. // + ///////////////////////////////// + + // The length is always between these limits. There is no way + // to trigger the algorithm to set len outside this range. + assert(len >= MATCH_LEN_MIN); + assert(len <= MATCH_LEN_MAX); + + // Repeat len bytes from distance of rep0. + if (unlikely(dict_repeat(&dict, rep0, &len))) { + coder->sequence = SEQ_COPY; + goto out; + } + + continue; + +slow: +#endif + /////////////////////////// + // Resumable Mode (slow) // + /////////////////////////// + + // This is very similar to Non-resumable Mode, so most of the + // comments are not repeated. The main differences are: + // - case labels are used to resume at the correct location. + // - Loops are not unrolled. + // - Range coder macros take an extra sequence argument + // so they can save to coder->sequence the location to + // resume in case there is not enough input. + case SEQ_NORMALIZE: + case SEQ_IS_MATCH: + if (unlikely(might_finish_without_eopm + && dict.pos == dict.limit)) { + // In rare cases there is a useless byte that needs + // to be read anyway. + rc_normalize_safe(SEQ_NORMALIZE); + + // If the range decoder state is such that we can + // be at the end of the LZMA stream, then the + // decoding is finished. + if (rc_is_finished(rc)) { + ret = LZMA_STREAM_END; + goto out; + } + + // If the caller hasn't allowed EOPM to be present + // together with known uncompressed size, then the + // LZMA stream is corrupt. + if (!coder->allow_eopm) { + ret = LZMA_DATA_ERROR; + goto out; + } + + // Otherwise continue decoding with the expectation + // that the next LZMA symbol is EOPM. + eopm_is_valid = true; + } + + rc_if_0_safe(coder->is_match[state][pos_state], SEQ_IS_MATCH) { + ///////////////////// + // Decode literal. // + ///////////////////// + + rc_update_0(coder->is_match[state][pos_state]); + + probs = literal_subcoder(coder->literal, + literal_context_bits, literal_mask, + dict.pos, dict_get0(&dict)); + symbol = 1; + + if (is_literal_state(state)) { + update_literal_normal(state); + + // Decode literal without match byte. + // The "slow" version does not unroll + // the loop. + case SEQ_LITERAL: + do { + rc_bit_safe(probs[symbol], , , + SEQ_LITERAL); + } while (symbol < (1 << 8)); + } else { + update_literal_matched(state); + + // Decode literal with match byte. + len = (uint32_t)(dict_get(&dict, rep0)) << 1; + + offset = 0x100; + + case SEQ_LITERAL_MATCHED: + do { + const uint32_t match_bit + = len & offset; + const uint32_t subcoder_index + = offset + match_bit + + symbol; + + rc_bit_safe(probs[subcoder_index], + offset &= ~match_bit, + offset &= match_bit, + SEQ_LITERAL_MATCHED); + + // It seems to be faster to do this + // here instead of putting it to the + // beginning of the loop and then + // putting the "case" in the middle + // of the loop. + len <<= 1; + + } while (symbol < (1 << 8)); + } + + case SEQ_LITERAL_WRITE: + if (dict_put_safe(&dict, (uint8_t)symbol)) { + coder->sequence = SEQ_LITERAL_WRITE; + goto out; + } + + continue; + } + + /////////////////// + // Decode match. // + /////////////////// + + rc_update_1(coder->is_match[state][pos_state]); + + case SEQ_IS_REP: + rc_if_0_safe(coder->is_rep[state], SEQ_IS_REP) { + /////////////////// + // Simple match. // + /////////////////// + + rc_update_0(coder->is_rep[state]); + update_match(state); + + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + + len_decode(len, coder->match_len_decoder, + pos_state, SEQ_MATCH_LEN); + + probs = coder->dist_slot[get_dist_state(len)]; + symbol = 1; + + case SEQ_DIST_SLOT: + do { + rc_bit_safe(probs[symbol], , , SEQ_DIST_SLOT); + } while (symbol < DIST_SLOTS); + + symbol -= DIST_SLOTS; + assert(symbol <= 63); + + if (symbol < DIST_MODEL_START) { + rep0 = symbol; + } else { + limit = (symbol >> 1) - 1; + assert(limit >= 1 && limit <= 30); + rep0 = 2 + (symbol & 1); + + if (symbol < DIST_MODEL_END) { + assert(limit <= 5); + rep0 <<= limit; + assert(rep0 <= 96); + // -1 is fine, because we start + // decoding at probs[1], not probs[0]. + // NOTE: This violates the C standard, + // since we are doing pointer + // arithmetic past the beginning of + // the array. + assert((int32_t)(rep0 - symbol - 1) + >= -1); + assert((int32_t)(rep0 - symbol - 1) + <= 82); + probs = coder->pos_special + rep0 + - symbol - 1; + symbol = 1; + offset = 0; + case SEQ_DIST_MODEL: + do { + rc_bit_safe(probs[symbol], , + rep0 += 1U << offset, + SEQ_DIST_MODEL); + } while (++offset < limit); + } else { + assert(symbol >= 14); + assert(limit >= 6); + limit -= ALIGN_BITS; + assert(limit >= 2); + case SEQ_DIRECT: + rc_direct_safe(rep0, limit, + SEQ_DIRECT); + + rep0 <<= ALIGN_BITS; + symbol = 0; + offset = 1; + case SEQ_ALIGN: + do { + rc_bit_last_safe( + coder->pos_align[ + offset + + symbol], + , + symbol += offset, + SEQ_ALIGN); + offset <<= 1; + } while (offset < ALIGN_SIZE); + + rep0 += symbol; + + if (rep0 == UINT32_MAX) { + // End of payload marker was + // found. It may only be + // present if + // - uncompressed size is + // unknown or + // - after known uncompressed + // size amount of bytes has + // been decompressed and + // caller has indicated + // that EOPM might be used + // (it's not allowed in + // LZMA2). +#ifndef HAVE_SMALL +eopm: +#endif + if (!eopm_is_valid) { + ret = LZMA_DATA_ERROR; + goto out; + } + + case SEQ_EOPM: + // LZMA1 stream with + // end-of-payload marker. + rc_normalize_safe(SEQ_EOPM); + ret = rc_is_finished(rc) + ? LZMA_STREAM_END + : LZMA_DATA_ERROR; + goto out; + } + } + } + + if (unlikely(!dict_is_distance_valid(&dict, rep0))) { + ret = LZMA_DATA_ERROR; + goto out; + } + + } else { + ///////////////////// + // Repeated match. // + ///////////////////// + + rc_update_1(coder->is_rep[state]); + + if (unlikely(!dict_is_distance_valid(&dict, 0))) { + ret = LZMA_DATA_ERROR; + goto out; + } + + case SEQ_IS_REP0: + rc_if_0_safe(coder->is_rep0[state], SEQ_IS_REP0) { + rc_update_0(coder->is_rep0[state]); + + case SEQ_IS_REP0_LONG: + rc_if_0_safe(coder->is_rep0_long + [state][pos_state], + SEQ_IS_REP0_LONG) { + rc_update_0(coder->is_rep0_long[ + state][pos_state]); + + update_short_rep(state); + + case SEQ_SHORTREP: + if (dict_put_safe(&dict, + dict_get(&dict, + rep0))) { + coder->sequence = SEQ_SHORTREP; + goto out; + } + + continue; + } + + rc_update_1(coder->is_rep0_long[ + state][pos_state]); + + } else { + rc_update_1(coder->is_rep0[state]); + + case SEQ_IS_REP1: + rc_if_0_safe(coder->is_rep1[state], SEQ_IS_REP1) { + rc_update_0(coder->is_rep1[state]); + + const uint32_t distance = rep1; + rep1 = rep0; + rep0 = distance; + + } else { + rc_update_1(coder->is_rep1[state]); + case SEQ_IS_REP2: + rc_if_0_safe(coder->is_rep2[state], + SEQ_IS_REP2) { + rc_update_0(coder->is_rep2[ + state]); + + const uint32_t distance = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; + + } else { + rc_update_1(coder->is_rep2[ + state]); + + const uint32_t distance = rep3; + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; + } + } + } + + update_long_rep(state); + + len_decode(len, coder->rep_len_decoder, + pos_state, SEQ_REP_LEN); + } + + ///////////////////////////////// + // Repeat from history buffer. // + ///////////////////////////////// + + assert(len >= MATCH_LEN_MIN); + assert(len <= MATCH_LEN_MAX); + + case SEQ_COPY: + if (unlikely(dict_repeat(&dict, rep0, &len))) { + coder->sequence = SEQ_COPY; + goto out; + } + } + +out: + // Save state + + // NOTE: Must not copy dict.limit. + dictptr->pos = dict.pos; + dictptr->full = dict.full; + + rc_from_local(coder->rc, *in_pos); + + coder->state = state; + coder->rep0 = rep0; + coder->rep1 = rep1; + coder->rep2 = rep2; + coder->rep3 = rep3; + + coder->probs = probs; + coder->symbol = symbol; + coder->limit = limit; + coder->offset = offset; + coder->len = len; + + // Update the remaining amount of uncompressed data if uncompressed + // size was known. + if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) { + coder->uncompressed_size -= dict.pos - dict_start; + + // If we have gotten all the output but the decoder wants + // to write more output, the file is corrupt. There are + // three SEQ values where output is produced. + if (coder->uncompressed_size == 0 && ret == LZMA_OK + && (coder->sequence == SEQ_LITERAL_WRITE + || coder->sequence == SEQ_SHORTREP + || coder->sequence == SEQ_COPY)) + ret = LZMA_DATA_ERROR; + } + + if (ret == LZMA_STREAM_END) { + // Reset the range decoder so that it is ready to reinitialize + // for a new LZMA2 chunk. + rc_reset(coder->rc); + coder->sequence = SEQ_IS_MATCH; + } + + return ret; +} + + +static void +lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size, + bool allow_eopm) +{ + lzma_lzma1_decoder *coder = coder_ptr; + coder->uncompressed_size = uncompressed_size; + coder->allow_eopm = allow_eopm; +} + + +static void +lzma_decoder_reset(void *coder_ptr, const void *opt) +{ + lzma_lzma1_decoder *coder = coder_ptr; + const lzma_options_lzma *options = opt; + + // NOTE: We assume that lc/lp/pb are valid since they were + // successfully decoded with lzma_lzma_decode_properties(). + + // Calculate pos_mask. We don't need pos_bits as is for anything. + coder->pos_mask = (1U << options->pb) - 1; + + // Initialize the literal decoder. + literal_init(coder->literal, options->lc, options->lp); + + coder->literal_context_bits = options->lc; + coder->literal_mask = literal_mask_calc(options->lc, options->lp); + + // State + coder->state = STATE_LIT_LIT; + coder->rep0 = 0; + coder->rep1 = 0; + coder->rep2 = 0; + coder->rep3 = 0; + coder->pos_mask = (1U << options->pb) - 1; + + // Range decoder + rc_reset(coder->rc); + + // Bit and bittree decoders + for (uint32_t i = 0; i < STATES; ++i) { + for (uint32_t j = 0; j <= coder->pos_mask; ++j) { + bit_reset(coder->is_match[i][j]); + bit_reset(coder->is_rep0_long[i][j]); + } + + bit_reset(coder->is_rep[i]); + bit_reset(coder->is_rep0[i]); + bit_reset(coder->is_rep1[i]); + bit_reset(coder->is_rep2[i]); + } + + for (uint32_t i = 0; i < DIST_STATES; ++i) + bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS); + + for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i) + bit_reset(coder->pos_special[i]); + + bittree_reset(coder->pos_align, ALIGN_BITS); + + // Len decoders (also bit/bittree) + const uint32_t num_pos_states = 1U << options->pb; + bit_reset(coder->match_len_decoder.choice); + bit_reset(coder->match_len_decoder.choice2); + bit_reset(coder->rep_len_decoder.choice); + bit_reset(coder->rep_len_decoder.choice2); + + for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { + bittree_reset(coder->match_len_decoder.low[pos_state], + LEN_LOW_BITS); + bittree_reset(coder->match_len_decoder.mid[pos_state], + LEN_MID_BITS); + + bittree_reset(coder->rep_len_decoder.low[pos_state], + LEN_LOW_BITS); + bittree_reset(coder->rep_len_decoder.mid[pos_state], + LEN_MID_BITS); + } + + bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS); + bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS); + + coder->sequence = SEQ_IS_MATCH; + coder->probs = NULL; + coder->symbol = 0; + coder->limit = 0; + coder->offset = 0; + coder->len = 0; + + return; +} + + +extern lzma_ret +lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator, + const lzma_options_lzma *options, lzma_lz_options *lz_options) +{ + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma_decode; + lz->reset = &lzma_decoder_reset; + lz->set_uncompressed = &lzma_decoder_uncompressed; + } + + // All dictionary sizes are OK here. LZ decoder will take care of + // the special cases. + lz_options->dict_size = options->dict_size; + lz_options->preset_dict = options->preset_dict; + lz_options->preset_dict_size = options->preset_dict_size; + + return LZMA_OK; +} + + +/// Allocate and initialize LZMA decoder. This is used only via LZ +/// initialization (lzma_lzma_decoder_init() passes function pointer to +/// the LZ initialization). +static lzma_ret +lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator, + lzma_vli id, const void *options, lzma_lz_options *lz_options) +{ + if (!is_lclppb_valid(options)) + return LZMA_PROG_ERROR; + + lzma_vli uncomp_size = LZMA_VLI_UNKNOWN; + bool allow_eopm = true; + + if (id == LZMA_FILTER_LZMA1EXT) { + const lzma_options_lzma *opt = options; + + // Only one flag is supported. + if (opt->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM) + return LZMA_OPTIONS_ERROR; + + // FIXME? Using lzma_vli instead of uint64_t is weird because + // this has nothing to do with .xz headers and variable-length + // integer encoding. On the other hand, using LZMA_VLI_UNKNOWN + // instead of UINT64_MAX is clearer when unknown size is + // meant. A problem with using lzma_vli is that now we + // allow > LZMA_VLI_MAX which is fine in this file but + // it's still confusing. Note that alone_decoder.c also + // allows > LZMA_VLI_MAX when setting uncompressed size. + uncomp_size = opt->ext_size_low + + ((uint64_t)(opt->ext_size_high) << 32); + allow_eopm = (opt->ext_flags & LZMA_LZMA1EXT_ALLOW_EOPM) != 0 + || uncomp_size == LZMA_VLI_UNKNOWN; + } + + return_if_error(lzma_lzma_decoder_create( + lz, allocator, options, lz_options)); + + lzma_decoder_reset(lz->coder, options); + lzma_decoder_uncompressed(lz->coder, uncomp_size, allow_eopm); + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // LZMA can only be the last filter in the chain. This is enforced + // by the raw_decoder initialization. + assert(filters[1].init == NULL); + + return lzma_lz_decoder_init(next, allocator, filters, + &lzma_decoder_init); +} + + +extern bool +lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) +{ + if (byte > (4 * 5 + 4) * 9 + 8) + return true; + + // See the file format specification to understand this. + options->pb = byte / (9 * 5); + byte -= (uint8_t)(options->pb * 9 * 5); + options->lp = byte / 9; + options->lc = byte - options->lp * 9; + + return options->lc + options->lp > LZMA_LCLP_MAX; +} + + +extern uint64_t +lzma_lzma_decoder_memusage_nocheck(const void *options) +{ + const lzma_options_lzma *const opt = options; + return sizeof(lzma_lzma1_decoder) + + lzma_lz_decoder_memusage(opt->dict_size); +} + + +extern uint64_t +lzma_lzma_decoder_memusage(const void *options) +{ + if (!is_lclppb_valid(options)) + return UINT64_MAX; + + return lzma_lzma_decoder_memusage_nocheck(options); +} + + +extern lzma_ret +lzma_lzma_props_decode(void **options, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 5) + return LZMA_OPTIONS_ERROR; + + lzma_options_lzma *opt + = lzma_alloc(sizeof(lzma_options_lzma), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + if (lzma_lzma_lclppb_decode(opt, props[0])) + goto error; + + // All dictionary sizes are accepted, including zero. LZ decoder + // will automatically use a dictionary at least a few KiB even if + // a smaller dictionary is requested. + opt->dict_size = read32le(props + 1); + + opt->preset_dict = NULL; + opt->preset_dict_size = 0; + + *options = opt; + + return LZMA_OK; + +error: + lzma_free(opt, allocator); + return LZMA_OPTIONS_ERROR; +} diff --git a/src/native/external/xz/src/liblzma/lzma/lzma_decoder.h b/src/native/external/xz/src/liblzma/lzma/lzma_decoder.h new file mode 100644 index 00000000000000..09aa16eacb2bc8 --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/lzma_decoder.h @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_decoder.h +/// \brief LZMA decoder API +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_DECODER_H +#define LZMA_LZMA_DECODER_H + +#include "common.h" + + +/// Allocates and initializes LZMA decoder +extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern uint64_t lzma_lzma_decoder_memusage(const void *options); + +/// Gets memory usage without validating lc/lp/pb. This is used by LZMA2 +/// decoder, because raw LZMA2 decoding doesn't need lc/lp/pb. Also +/// alone_decoder.c and lzip_decoder.c use this because there lc/lp/pb +/// are known to be valid. +extern uint64_t lzma_lzma_decoder_memusage_nocheck(const void *options); + +extern lzma_ret lzma_lzma_props_decode( + void **options, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + + +/// \brief Decodes the LZMA Properties byte (lc/lp/pb) +/// +/// \return true if error occurred, false on success +/// +extern bool lzma_lzma_lclppb_decode( + lzma_options_lzma *options, uint8_t byte); + + +#ifdef LZMA_LZ_DECODER_H +/// Allocate and setup function pointers only. This is used by LZMA1 and +/// LZMA2 decoders. +extern lzma_ret lzma_lzma_decoder_create( + lzma_lz_decoder *lz, const lzma_allocator *allocator, + const lzma_options_lzma *opt, lzma_lz_options *lz_options); +#endif + +#endif diff --git a/src/native/external/xz/src/liblzma/lzma/lzma_encoder.c b/src/native/external/xz/src/liblzma/lzma/lzma_encoder.c new file mode 100644 index 00000000000000..20dc62d3aac8bd --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/lzma_encoder.c @@ -0,0 +1,786 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder.c +/// \brief LZMA encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma2_encoder.h" +#include "lzma_encoder_private.h" +#include "fastpos.h" + + +///////////// +// Literal // +///////////// + +static inline void +literal_matched(lzma_range_encoder *rc, probability *subcoder, + uint32_t match_byte, uint32_t symbol) +{ + uint32_t offset = 0x100; + symbol += UINT32_C(1) << 8; + + do { + match_byte <<= 1; + const uint32_t match_bit = match_byte & offset; + const uint32_t subcoder_index + = offset + match_bit + (symbol >> 8); + const uint32_t bit = (symbol >> 7) & 1; + rc_bit(rc, &subcoder[subcoder_index], bit); + + symbol <<= 1; + offset &= ~(match_byte ^ symbol); + + } while (symbol < (UINT32_C(1) << 16)); +} + + +static inline void +literal(lzma_lzma1_encoder *coder, lzma_mf *mf, uint32_t position) +{ + // Locate the literal byte to be encoded and the subcoder. + const uint8_t cur_byte = mf->buffer[ + mf->read_pos - mf->read_ahead]; + probability *subcoder = literal_subcoder(coder->literal, + coder->literal_context_bits, coder->literal_mask, + position, mf->buffer[mf->read_pos - mf->read_ahead - 1]); + + if (is_literal_state(coder->state)) { + // Previous LZMA-symbol was a literal. Encode a normal + // literal without a match byte. + update_literal_normal(coder->state); + rc_bittree(&coder->rc, subcoder, 8, cur_byte); + } else { + // Previous LZMA-symbol was a match. Use the last byte of + // the match as a "match byte". That is, compare the bits + // of the current literal and the match byte. + update_literal_matched(coder->state); + const uint8_t match_byte = mf->buffer[ + mf->read_pos - coder->reps[0] - 1 + - mf->read_ahead]; + literal_matched(&coder->rc, subcoder, match_byte, cur_byte); + } +} + + +////////////////// +// Match length // +////////////////// + +static void +length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state) +{ + const uint32_t table_size = lc->table_size; + lc->counters[pos_state] = table_size; + + const uint32_t a0 = rc_bit_0_price(lc->choice); + const uint32_t a1 = rc_bit_1_price(lc->choice); + const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2); + const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2); + uint32_t *const prices = lc->prices[pos_state]; + + uint32_t i; + for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i) + prices[i] = a0 + rc_bittree_price(lc->low[pos_state], + LEN_LOW_BITS, i); + + for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) + prices[i] = b0 + rc_bittree_price(lc->mid[pos_state], + LEN_MID_BITS, i - LEN_LOW_SYMBOLS); + + for (; i < table_size; ++i) + prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS, + i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); + + return; +} + + +static inline void +length(lzma_range_encoder *rc, lzma_length_encoder *lc, + const uint32_t pos_state, uint32_t len, const bool fast_mode) +{ + assert(len <= MATCH_LEN_MAX); + len -= MATCH_LEN_MIN; + + if (len < LEN_LOW_SYMBOLS) { + rc_bit(rc, &lc->choice, 0); + rc_bittree(rc, lc->low[pos_state], LEN_LOW_BITS, len); + } else { + rc_bit(rc, &lc->choice, 1); + len -= LEN_LOW_SYMBOLS; + + if (len < LEN_MID_SYMBOLS) { + rc_bit(rc, &lc->choice2, 0); + rc_bittree(rc, lc->mid[pos_state], LEN_MID_BITS, len); + } else { + rc_bit(rc, &lc->choice2, 1); + len -= LEN_MID_SYMBOLS; + rc_bittree(rc, lc->high, LEN_HIGH_BITS, len); + } + } + + // Only getoptimum uses the prices so don't update the table when + // in fast mode. + if (!fast_mode) + if (--lc->counters[pos_state] == 0) + length_update_prices(lc, pos_state); +} + + +/////////// +// Match // +/////////// + +static inline void +match(lzma_lzma1_encoder *coder, const uint32_t pos_state, + const uint32_t distance, const uint32_t len) +{ + update_match(coder->state); + + length(&coder->rc, &coder->match_len_encoder, pos_state, len, + coder->fast_mode); + + const uint32_t dist_slot = get_dist_slot(distance); + const uint32_t dist_state = get_dist_state(len); + rc_bittree(&coder->rc, coder->dist_slot[dist_state], + DIST_SLOT_BITS, dist_slot); + + if (dist_slot >= DIST_MODEL_START) { + const uint32_t footer_bits = (dist_slot >> 1) - 1; + const uint32_t base = (2 | (dist_slot & 1)) << footer_bits; + const uint32_t dist_reduced = distance - base; + + if (dist_slot < DIST_MODEL_END) { + // Careful here: base - dist_slot - 1 can be -1, but + // rc_bittree_reverse starts at probs[1], not probs[0]. + rc_bittree_reverse(&coder->rc, + coder->dist_special + base - dist_slot - 1, + footer_bits, dist_reduced); + } else { + rc_direct(&coder->rc, dist_reduced >> ALIGN_BITS, + footer_bits - ALIGN_BITS); + rc_bittree_reverse( + &coder->rc, coder->dist_align, + ALIGN_BITS, dist_reduced & ALIGN_MASK); + ++coder->align_price_count; + } + } + + coder->reps[3] = coder->reps[2]; + coder->reps[2] = coder->reps[1]; + coder->reps[1] = coder->reps[0]; + coder->reps[0] = distance; + ++coder->match_price_count; +} + + +//////////////////// +// Repeated match // +//////////////////// + +static inline void +rep_match(lzma_lzma1_encoder *coder, const uint32_t pos_state, + const uint32_t rep, const uint32_t len) +{ + if (rep == 0) { + rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0); + rc_bit(&coder->rc, + &coder->is_rep0_long[coder->state][pos_state], + len != 1); + } else { + const uint32_t distance = coder->reps[rep]; + rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1); + + if (rep == 1) { + rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0); + } else { + rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1); + rc_bit(&coder->rc, &coder->is_rep2[coder->state], + rep - 2); + + if (rep == 3) + coder->reps[3] = coder->reps[2]; + + coder->reps[2] = coder->reps[1]; + } + + coder->reps[1] = coder->reps[0]; + coder->reps[0] = distance; + } + + if (len == 1) { + update_short_rep(coder->state); + } else { + length(&coder->rc, &coder->rep_len_encoder, pos_state, len, + coder->fast_mode); + update_long_rep(coder->state); + } +} + + +////////// +// Main // +////////// + +static void +encode_symbol(lzma_lzma1_encoder *coder, lzma_mf *mf, + uint32_t back, uint32_t len, uint32_t position) +{ + const uint32_t pos_state = position & coder->pos_mask; + + if (back == UINT32_MAX) { + // Literal i.e. eight-bit byte + assert(len == 1); + rc_bit(&coder->rc, + &coder->is_match[coder->state][pos_state], 0); + literal(coder, mf, position); + } else { + // Some type of match + rc_bit(&coder->rc, + &coder->is_match[coder->state][pos_state], 1); + + if (back < REPS) { + // It's a repeated match i.e. the same distance + // has been used earlier. + rc_bit(&coder->rc, &coder->is_rep[coder->state], 1); + rep_match(coder, pos_state, back, len); + } else { + // Normal match + rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); + match(coder, pos_state, back - REPS, len); + } + } + + assert(mf->read_ahead >= len); + mf->read_ahead -= len; +} + + +static bool +encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf) +{ + assert(mf_position(mf) == 0); + assert(coder->uncomp_size == 0); + + if (mf->read_pos == mf->read_limit) { + if (mf->action == LZMA_RUN) + return false; // We cannot do anything. + + // We are finishing (we cannot get here when flushing). + assert(mf->write_pos == mf->read_pos); + assert(mf->action == LZMA_FINISH); + } else { + // Do the actual initialization. The first LZMA symbol must + // always be a literal. + mf_skip(mf, 1); + mf->read_ahead = 0; + rc_bit(&coder->rc, &coder->is_match[0][0], 0); + rc_bittree(&coder->rc, coder->literal + 0, 8, mf->buffer[0]); + ++coder->uncomp_size; + } + + // Initialization is done (except if empty file). + coder->is_initialized = true; + + return true; +} + + +static void +encode_eopm(lzma_lzma1_encoder *coder, uint32_t position) +{ + const uint32_t pos_state = position & coder->pos_mask; + rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); + rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); + match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN); +} + + +/// Number of bytes that a single encoding loop in lzma_lzma_encode() can +/// consume from the dictionary. This limit comes from lzma_lzma_optimum() +/// and may need to be updated if that function is significantly modified. +#define LOOP_INPUT_MAX (OPTS + 1) + + +extern lzma_ret +lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, uint32_t limit) +{ + // Initialize the stream if no data has been encoded yet. + if (!coder->is_initialized && !encode_init(coder, mf)) + return LZMA_OK; + + // Encode pending output bytes from the range encoder. + // At the start of the stream, encode_init() encodes one literal. + // Later there can be pending output only with LZMA1 because LZMA2 + // ensures that there is always enough output space. Thus when using + // LZMA2, rc_encode() calls in this function will always return false. + if (rc_encode(&coder->rc, out, out_pos, out_size)) { + // We don't get here with LZMA2. + assert(limit == UINT32_MAX); + return LZMA_OK; + } + + // If the range encoder was flushed in an earlier call to this + // function but there wasn't enough output buffer space, those + // bytes would have now been encoded by the above rc_encode() call + // and the stream has now been finished. This can only happen with + // LZMA1 as LZMA2 always provides enough output buffer space. + if (coder->is_flushed) { + assert(limit == UINT32_MAX); + return LZMA_STREAM_END; + } + + while (true) { + // With LZMA2 we need to take care that compressed size of + // a chunk doesn't get too big. + // FIXME? Check if this could be improved. + if (limit != UINT32_MAX + && (mf->read_pos - mf->read_ahead >= limit + || *out_pos + rc_pending(&coder->rc) + >= LZMA2_CHUNK_MAX + - LOOP_INPUT_MAX)) + break; + + // Check that there is some input to process. + if (mf->read_pos >= mf->read_limit) { + if (mf->action == LZMA_RUN) + return LZMA_OK; + + if (mf->read_ahead == 0) + break; + } + + // Get optimal match (repeat position and length). + // Value ranges for pos: + // - [0, REPS): repeated match + // - [REPS, UINT32_MAX): + // match at (pos - REPS) + // - UINT32_MAX: not a match but a literal + // Value ranges for len: + // - [MATCH_LEN_MIN, MATCH_LEN_MAX] + uint32_t len; + uint32_t back; + + if (coder->fast_mode) + lzma_lzma_optimum_fast(coder, mf, &back, &len); + else + lzma_lzma_optimum_normal(coder, mf, &back, &len, + (uint32_t)(coder->uncomp_size)); + + encode_symbol(coder, mf, back, len, + (uint32_t)(coder->uncomp_size)); + + // If output size limiting is active (out_limit != 0), check + // if encoding this LZMA symbol would make the output size + // exceed the specified limit. + if (coder->out_limit != 0 && rc_encode_dummy( + &coder->rc, coder->out_limit)) { + // The most recent LZMA symbol would make the output + // too big. Throw it away. + rc_forget(&coder->rc); + + // FIXME: Tell the LZ layer to not read more input as + // it would be waste of time. This doesn't matter if + // output-size-limited encoding is done with a single + // call though. + + break; + } + + // This symbol will be encoded so update the uncompressed size. + coder->uncomp_size += len; + + // Encode the LZMA symbol. + if (rc_encode(&coder->rc, out, out_pos, out_size)) { + // Once again, this can only happen with LZMA1. + assert(limit == UINT32_MAX); + return LZMA_OK; + } + } + + // Make the uncompressed size available to the application. + if (coder->uncomp_size_ptr != NULL) + *coder->uncomp_size_ptr = coder->uncomp_size; + + // LZMA2 doesn't use EOPM at LZMA level. + // + // Plain LZMA streams without EOPM aren't supported except when + // output size limiting is enabled. + if (coder->use_eopm) + encode_eopm(coder, (uint32_t)(coder->uncomp_size)); + + // Flush the remaining bytes from the range encoder. + rc_flush(&coder->rc); + + // Copy the remaining bytes to the output buffer. If there + // isn't enough output space, we will copy out the remaining + // bytes on the next call to this function. + if (rc_encode(&coder->rc, out, out_pos, out_size)) { + // This cannot happen with LZMA2. + assert(limit == UINT32_MAX); + + coder->is_flushed = true; + return LZMA_OK; + } + + return LZMA_STREAM_END; +} + + +static lzma_ret +lzma_encode(void *coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + // Plain LZMA has no support for sync-flushing. + if (unlikely(mf->action == LZMA_SYNC_FLUSH)) + return LZMA_OPTIONS_ERROR; + + return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX); +} + + +static lzma_ret +lzma_lzma_set_out_limit( + void *coder_ptr, uint64_t *uncomp_size, uint64_t out_limit) +{ + // Minimum output size is 5 bytes but that cannot hold any output + // so we use 6 bytes. + if (out_limit < 6) + return LZMA_BUF_ERROR; + + lzma_lzma1_encoder *coder = coder_ptr; + coder->out_limit = out_limit; + coder->uncomp_size_ptr = uncomp_size; + coder->use_eopm = false; + return LZMA_OK; +} + + +//////////////////// +// Initialization // +//////////////////// + +static bool +is_options_valid(const lzma_options_lzma *options) +{ + // Validate some of the options. LZ encoder validates nice_len too + // but we need a valid value here earlier. + return is_lclppb_valid(options) + && options->nice_len >= MATCH_LEN_MIN + && options->nice_len <= MATCH_LEN_MAX + && (options->mode == LZMA_MODE_FAST + || options->mode == LZMA_MODE_NORMAL); +} + + +static void +set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options) +{ + // LZ encoder initialization does the validation for these so we + // don't need to validate here. + lz_options->before_size = OPTS; + lz_options->dict_size = options->dict_size; + lz_options->after_size = LOOP_INPUT_MAX; + lz_options->match_len_max = MATCH_LEN_MAX; + lz_options->nice_len = my_max(mf_get_hash_bytes(options->mf), + options->nice_len); + lz_options->match_finder = options->mf; + lz_options->depth = options->depth; + lz_options->preset_dict = options->preset_dict; + lz_options->preset_dict_size = options->preset_dict_size; + return; +} + + +static void +length_encoder_reset(lzma_length_encoder *lencoder, + const uint32_t num_pos_states, const bool fast_mode) +{ + bit_reset(lencoder->choice); + bit_reset(lencoder->choice2); + + for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { + bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); + bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); + } + + bittree_reset(lencoder->high, LEN_HIGH_BITS); + + if (!fast_mode) + for (uint32_t pos_state = 0; pos_state < num_pos_states; + ++pos_state) + length_update_prices(lencoder, pos_state); + + return; +} + + +extern lzma_ret +lzma_lzma_encoder_reset(lzma_lzma1_encoder *coder, + const lzma_options_lzma *options) +{ + if (!is_options_valid(options)) + return LZMA_OPTIONS_ERROR; + + coder->pos_mask = (1U << options->pb) - 1; + coder->literal_context_bits = options->lc; + coder->literal_mask = literal_mask_calc(options->lc, options->lp); + + // Range coder + rc_reset(&coder->rc); + + // State + coder->state = STATE_LIT_LIT; + for (size_t i = 0; i < REPS; ++i) + coder->reps[i] = 0; + + literal_init(coder->literal, options->lc, options->lp); + + // Bit encoders + for (size_t i = 0; i < STATES; ++i) { + for (size_t j = 0; j <= coder->pos_mask; ++j) { + bit_reset(coder->is_match[i][j]); + bit_reset(coder->is_rep0_long[i][j]); + } + + bit_reset(coder->is_rep[i]); + bit_reset(coder->is_rep0[i]); + bit_reset(coder->is_rep1[i]); + bit_reset(coder->is_rep2[i]); + } + + for (size_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i) + bit_reset(coder->dist_special[i]); + + // Bit tree encoders + for (size_t i = 0; i < DIST_STATES; ++i) + bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS); + + bittree_reset(coder->dist_align, ALIGN_BITS); + + // Length encoders + length_encoder_reset(&coder->match_len_encoder, + 1U << options->pb, coder->fast_mode); + + length_encoder_reset(&coder->rep_len_encoder, + 1U << options->pb, coder->fast_mode); + + // Price counts are incremented every time appropriate probabilities + // are changed. price counts are set to zero when the price tables + // are updated, which is done when the appropriate price counts have + // big enough value, and lzma_mf.read_ahead == 0 which happens at + // least every OPTS (a few thousand) possible price count increments. + // + // By resetting price counts to UINT32_MAX / 2, we make sure that the + // price tables will be initialized before they will be used (since + // the value is definitely big enough), and that it is OK to increment + // price counts without risk of integer overflow (since UINT32_MAX / 2 + // is small enough). The current code doesn't increment price counts + // before initializing price tables, but it maybe done in future if + // we add support for saving the state between LZMA2 chunks. + coder->match_price_count = UINT32_MAX / 2; + coder->align_price_count = UINT32_MAX / 2; + + coder->opts_end_index = 0; + coder->opts_current_index = 0; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma_encoder_create(void **coder_ptr, const lzma_allocator *allocator, + lzma_vli id, const lzma_options_lzma *options, + lzma_lz_options *lz_options) +{ + assert(id == LZMA_FILTER_LZMA1 || id == LZMA_FILTER_LZMA1EXT + || id == LZMA_FILTER_LZMA2); + + // Allocate lzma_lzma1_encoder if it wasn't already allocated. + if (*coder_ptr == NULL) { + *coder_ptr = lzma_alloc(sizeof(lzma_lzma1_encoder), allocator); + if (*coder_ptr == NULL) + return LZMA_MEM_ERROR; + } + + lzma_lzma1_encoder *coder = *coder_ptr; + + // Set compression mode. Note that we haven't validated the options + // yet. Invalid options will get rejected by lzma_lzma_encoder_reset() + // call at the end of this function. + switch (options->mode) { + case LZMA_MODE_FAST: + coder->fast_mode = true; + break; + + case LZMA_MODE_NORMAL: { + coder->fast_mode = false; + + // Set dist_table_size. + // Round the dictionary size up to next 2^n. + // + // Currently the maximum encoder dictionary size + // is 1.5 GiB due to lz_encoder.c and here we need + // to be below 2 GiB to make the rounded up value + // fit in an uint32_t and avoid an infinite while-loop + // (and undefined behavior due to a too large shift). + // So do the same check as in LZ encoder, + // limiting to 1.5 GiB. + if (options->dict_size > (UINT32_C(1) << 30) + + (UINT32_C(1) << 29)) + return LZMA_OPTIONS_ERROR; + + uint32_t log_size = 0; + while ((UINT32_C(1) << log_size) < options->dict_size) + ++log_size; + + coder->dist_table_size = log_size * 2; + + // Length encoders' price table size + const uint32_t nice_len = my_max( + mf_get_hash_bytes(options->mf), + options->nice_len); + + coder->match_len_encoder.table_size + = nice_len + 1 - MATCH_LEN_MIN; + coder->rep_len_encoder.table_size + = nice_len + 1 - MATCH_LEN_MIN; + break; + } + + default: + return LZMA_OPTIONS_ERROR; + } + + // We don't need to write the first byte as literal if there is + // a non-empty preset dictionary. encode_init() wouldn't even work + // if there is a non-empty preset dictionary, because encode_init() + // assumes that position is zero and previous byte is also zero. + coder->is_initialized = options->preset_dict != NULL + && options->preset_dict_size > 0; + coder->is_flushed = false; + coder->uncomp_size = 0; + coder->uncomp_size_ptr = NULL; + + // Output size limiting is disabled by default. + coder->out_limit = 0; + + // Determine if end marker is wanted: + // - It is never used with LZMA2. + // - It is always used with LZMA_FILTER_LZMA1 (unless + // lzma_lzma_set_out_limit() is called later). + // - LZMA_FILTER_LZMA1EXT has a flag for it in the options. + coder->use_eopm = (id == LZMA_FILTER_LZMA1); + if (id == LZMA_FILTER_LZMA1EXT) { + // Check if unsupported flags are present. + if (options->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM) + return LZMA_OPTIONS_ERROR; + + coder->use_eopm = (options->ext_flags + & LZMA_LZMA1EXT_ALLOW_EOPM) != 0; + + // TODO? As long as there are no filters that change the size + // of the data, it is enough to look at lzma_stream.total_in + // after encoding has been finished to know the uncompressed + // size of the LZMA1 stream. But in the future there could be + // filters that change the size of the data and then total_in + // doesn't work as the LZMA1 stream size might be different + // due to another filter in the chain. The problem is simple + // to solve: Add another flag to ext_flags and then set + // coder->uncomp_size_ptr to the address stored in + // lzma_options_lzma.reserved_ptr2 (or _ptr1). + } + + set_lz_options(lz_options, options); + + return lzma_lzma_encoder_reset(coder, options); +} + + +static lzma_ret +lzma_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator, + lzma_vli id, const void *options, lzma_lz_options *lz_options) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + lz->code = &lzma_encode; + lz->set_out_limit = &lzma_lzma_set_out_limit; + return lzma_lzma_encoder_create( + &lz->coder, allocator, id, options, lz_options); +} + + +extern lzma_ret +lzma_lzma_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return lzma_lz_encoder_init( + next, allocator, filters, &lzma_encoder_init); +} + + +extern uint64_t +lzma_lzma_encoder_memusage(const void *options) +{ + if (!is_options_valid(options)) + return UINT64_MAX; + + lzma_lz_options lz_options; + set_lz_options(&lz_options, options); + + const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options); + if (lz_memusage == UINT64_MAX) + return UINT64_MAX; + + return (uint64_t)(sizeof(lzma_lzma1_encoder)) + lz_memusage; +} + + +extern bool +lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte) +{ + if (!is_lclppb_valid(options)) + return true; + + *byte = (uint8_t)((options->pb * 5 + options->lp) * 9 + options->lc); + assert(*byte <= (4 * 5 + 4) * 9 + 8); + + return false; +} + + +#ifdef HAVE_ENCODER_LZMA1 +extern lzma_ret +lzma_lzma_props_encode(const void *options, uint8_t *out) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + const lzma_options_lzma *const opt = options; + + if (lzma_lzma_lclppb_encode(opt, out)) + return LZMA_PROG_ERROR; + + write32le(out + 1, opt->dict_size); + + return LZMA_OK; +} +#endif + + +extern LZMA_API(lzma_bool) +lzma_mode_is_supported(lzma_mode mode) +{ + return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL; +} diff --git a/src/native/external/xz/src/liblzma/lzma/lzma_encoder.h b/src/native/external/xz/src/liblzma/lzma/lzma_encoder.h new file mode 100644 index 00000000000000..e8ae8079306c5a --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/lzma_encoder.h @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder.h +/// \brief LZMA encoder API +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_ENCODER_H +#define LZMA_LZMA_ENCODER_H + +#include "common.h" + + +typedef struct lzma_lzma1_encoder_s lzma_lzma1_encoder; + + +extern lzma_ret lzma_lzma_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + + +extern uint64_t lzma_lzma_encoder_memusage(const void *options); + +extern lzma_ret lzma_lzma_props_encode(const void *options, uint8_t *out); + + +/// Encodes lc/lp/pb into one byte. Returns false on success and true on error. +extern bool lzma_lzma_lclppb_encode( + const lzma_options_lzma *options, uint8_t *byte); + + +#ifdef LZMA_LZ_ENCODER_H + +/// Initializes raw LZMA encoder; this is used by LZMA2. +extern lzma_ret lzma_lzma_encoder_create( + void **coder_ptr, const lzma_allocator *allocator, + lzma_vli id, const lzma_options_lzma *options, + lzma_lz_options *lz_options); + + +/// Resets an already initialized LZMA encoder; this is used by LZMA2. +extern lzma_ret lzma_lzma_encoder_reset( + lzma_lzma1_encoder *coder, const lzma_options_lzma *options); + + +extern lzma_ret lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, + lzma_mf *restrict mf, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + uint32_t read_limit); + +#endif + +#endif diff --git a/src/native/external/xz/src/liblzma/lzma/lzma_encoder_optimum_fast.c b/src/native/external/xz/src/liblzma/lzma/lzma_encoder_optimum_fast.c new file mode 100644 index 00000000000000..0f063d5be7a5a4 --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/lzma_encoder_optimum_fast.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_optimum_fast.c +// +// Author: Igor Pavlov +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_encoder_private.h" +#include "memcmplen.h" + + +#define change_pair(small_dist, big_dist) \ + (((big_dist) >> 7) > (small_dist)) + + +extern void +lzma_lzma_optimum_fast(lzma_lzma1_encoder *restrict coder, + lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res) +{ + const uint32_t nice_len = mf->nice_len; + + uint32_t len_main; + uint32_t matches_count; + if (mf->read_ahead == 0) { + len_main = mf_find(mf, &matches_count, coder->matches); + } else { + assert(mf->read_ahead == 1); + len_main = coder->longest_match_length; + matches_count = coder->matches_count; + } + + const uint8_t *buf = mf_ptr(mf) - 1; + const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX); + + if (buf_avail < 2) { + // There's not enough input left to encode a match. + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + + // Look for repeated matches; scan the previous four match distances + uint32_t rep_len = 0; + uint32_t rep_index = 0; + + for (uint32_t i = 0; i < REPS; ++i) { + // Pointer to the beginning of the match candidate + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + // If the first two bytes (2 == MATCH_LEN_MIN) do not match, + // this rep is not useful. + if (not_equal_16(buf, buf_back)) + continue; + + // The first two bytes matched. + // Calculate the length of the match. + const uint32_t len = lzma_memcmplen( + buf, buf_back, 2, buf_avail); + + // If we have found a repeated match that is at least + // nice_len long, return it immediately. + if (len >= nice_len) { + *back_res = i; + *len_res = len; + mf_skip(mf, len - 1); + return; + } + + if (len > rep_len) { + rep_index = i; + rep_len = len; + } + } + + // We didn't find a long enough repeated match. Encode it as a normal + // match if the match length is at least nice_len. + if (len_main >= nice_len) { + *back_res = coder->matches[matches_count - 1].dist + REPS; + *len_res = len_main; + mf_skip(mf, len_main - 1); + return; + } + + uint32_t back_main = 0; + if (len_main >= 2) { + back_main = coder->matches[matches_count - 1].dist; + + while (matches_count > 1 && len_main == + coder->matches[matches_count - 2].len + 1) { + if (!change_pair(coder->matches[ + matches_count - 2].dist, + back_main)) + break; + + --matches_count; + len_main = coder->matches[matches_count - 1].len; + back_main = coder->matches[matches_count - 1].dist; + } + + if (len_main == 2 && back_main >= 0x80) + len_main = 1; + } + + if (rep_len >= 2) { + if (rep_len + 1 >= len_main + || (rep_len + 2 >= len_main + && back_main > (UINT32_C(1) << 9)) + || (rep_len + 3 >= len_main + && back_main > (UINT32_C(1) << 15))) { + *back_res = rep_index; + *len_res = rep_len; + mf_skip(mf, rep_len - 1); + return; + } + } + + if (len_main < 2 || buf_avail <= 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + + // Get the matches for the next byte. If we find a better match, + // the current byte is encoded as a literal. + coder->longest_match_length = mf_find(mf, + &coder->matches_count, coder->matches); + + if (coder->longest_match_length >= 2) { + const uint32_t new_dist = coder->matches[ + coder->matches_count - 1].dist; + + if ((coder->longest_match_length >= len_main + && new_dist < back_main) + || (coder->longest_match_length == len_main + 1 + && !change_pair(back_main, new_dist)) + || (coder->longest_match_length > len_main + 1) + || (coder->longest_match_length + 1 >= len_main + && len_main >= 3 + && change_pair(new_dist, back_main))) { + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + } + + // In contrast to LZMA SDK, dictionary could not have been moved + // between mf_find() calls, thus it is safe to just increment + // the old buf pointer instead of recalculating it with mf_ptr(). + ++buf; + + const uint32_t limit = my_max(2, len_main - 1); + + for (uint32_t i = 0; i < REPS; ++i) { + if (memcmp(buf, buf - coder->reps[i] - 1, limit) == 0) { + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + } + + *back_res = back_main + REPS; + *len_res = len_main; + mf_skip(mf, len_main - 2); + return; +} diff --git a/src/native/external/xz/src/liblzma/lzma/lzma_encoder_optimum_normal.c b/src/native/external/xz/src/liblzma/lzma/lzma_encoder_optimum_normal.c new file mode 100644 index 00000000000000..a6c0398f3af319 --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/lzma_encoder_optimum_normal.c @@ -0,0 +1,858 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_optimum_normal.c +// +// Author: Igor Pavlov +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_encoder_private.h" +#include "fastpos.h" +#include "memcmplen.h" + + +//////////// +// Prices // +//////////// + +static uint32_t +get_literal_price(const lzma_lzma1_encoder *const coder, const uint32_t pos, + const uint32_t prev_byte, const bool match_mode, + uint32_t match_byte, uint32_t symbol) +{ + const probability *const subcoder = literal_subcoder(coder->literal, + coder->literal_context_bits, coder->literal_mask, + pos, prev_byte); + + uint32_t price = 0; + + if (!match_mode) { + price = rc_bittree_price(subcoder, 8, symbol); + } else { + uint32_t offset = 0x100; + symbol += UINT32_C(1) << 8; + + do { + match_byte <<= 1; + + const uint32_t match_bit = match_byte & offset; + const uint32_t subcoder_index + = offset + match_bit + (symbol >> 8); + const uint32_t bit = (symbol >> 7) & 1; + price += rc_bit_price(subcoder[subcoder_index], bit); + + symbol <<= 1; + offset &= ~(match_byte ^ symbol); + + } while (symbol < (UINT32_C(1) << 16)); + } + + return price; +} + + +static inline uint32_t +get_len_price(const lzma_length_encoder *const lencoder, + const uint32_t len, const uint32_t pos_state) +{ + // NOTE: Unlike the other price tables, length prices are updated + // in lzma_encoder.c + return lencoder->prices[pos_state][len - MATCH_LEN_MIN]; +} + + +static inline uint32_t +get_short_rep_price(const lzma_lzma1_encoder *const coder, + const lzma_lzma_state state, const uint32_t pos_state) +{ + return rc_bit_0_price(coder->is_rep0[state]) + + rc_bit_0_price(coder->is_rep0_long[state][pos_state]); +} + + +static inline uint32_t +get_pure_rep_price(const lzma_lzma1_encoder *const coder, const uint32_t rep_index, + const lzma_lzma_state state, uint32_t pos_state) +{ + uint32_t price; + + if (rep_index == 0) { + price = rc_bit_0_price(coder->is_rep0[state]); + price += rc_bit_1_price(coder->is_rep0_long[state][pos_state]); + } else { + price = rc_bit_1_price(coder->is_rep0[state]); + + if (rep_index == 1) { + price += rc_bit_0_price(coder->is_rep1[state]); + } else { + price += rc_bit_1_price(coder->is_rep1[state]); + price += rc_bit_price(coder->is_rep2[state], + rep_index - 2); + } + } + + return price; +} + + +static inline uint32_t +get_rep_price(const lzma_lzma1_encoder *const coder, const uint32_t rep_index, + const uint32_t len, const lzma_lzma_state state, + const uint32_t pos_state) +{ + return get_len_price(&coder->rep_len_encoder, len, pos_state) + + get_pure_rep_price(coder, rep_index, state, pos_state); +} + + +static inline uint32_t +get_dist_len_price(const lzma_lzma1_encoder *const coder, const uint32_t dist, + const uint32_t len, const uint32_t pos_state) +{ + const uint32_t dist_state = get_dist_state(len); + uint32_t price; + + if (dist < FULL_DISTANCES) { + price = coder->dist_prices[dist_state][dist]; + } else { + const uint32_t dist_slot = get_dist_slot_2(dist); + price = coder->dist_slot_prices[dist_state][dist_slot] + + coder->align_prices[dist & ALIGN_MASK]; + } + + price += get_len_price(&coder->match_len_encoder, len, pos_state); + + return price; +} + + +static void +fill_dist_prices(lzma_lzma1_encoder *coder) +{ + for (uint32_t dist_state = 0; dist_state < DIST_STATES; ++dist_state) { + + uint32_t *const dist_slot_prices + = coder->dist_slot_prices[dist_state]; + + // Price to encode the dist_slot. + for (uint32_t dist_slot = 0; + dist_slot < coder->dist_table_size; ++dist_slot) + dist_slot_prices[dist_slot] = rc_bittree_price( + coder->dist_slot[dist_state], + DIST_SLOT_BITS, dist_slot); + + // For matches with distance >= FULL_DISTANCES, add the price + // of the direct bits part of the match distance. (Align bits + // are handled by fill_align_prices()). + for (uint32_t dist_slot = DIST_MODEL_END; + dist_slot < coder->dist_table_size; + ++dist_slot) + dist_slot_prices[dist_slot] += rc_direct_price( + ((dist_slot >> 1) - 1) - ALIGN_BITS); + + // Distances in the range [0, 3] are fully encoded with + // dist_slot, so they are used for coder->dist_prices + // as is. + for (uint32_t i = 0; i < DIST_MODEL_START; ++i) + coder->dist_prices[dist_state][i] + = dist_slot_prices[i]; + } + + // Distances in the range [4, 127] depend on dist_slot and + // dist_special. We do this in a loop separate from the above + // loop to avoid redundant calls to get_dist_slot(). + for (uint32_t i = DIST_MODEL_START; i < FULL_DISTANCES; ++i) { + const uint32_t dist_slot = get_dist_slot(i); + const uint32_t footer_bits = ((dist_slot >> 1) - 1); + const uint32_t base = (2 | (dist_slot & 1)) << footer_bits; + const uint32_t price = rc_bittree_reverse_price( + coder->dist_special + base - dist_slot - 1, + footer_bits, i - base); + + for (uint32_t dist_state = 0; dist_state < DIST_STATES; + ++dist_state) + coder->dist_prices[dist_state][i] + = price + coder->dist_slot_prices[ + dist_state][dist_slot]; + } + + coder->match_price_count = 0; + return; +} + + +static void +fill_align_prices(lzma_lzma1_encoder *coder) +{ + for (uint32_t i = 0; i < ALIGN_SIZE; ++i) + coder->align_prices[i] = rc_bittree_reverse_price( + coder->dist_align, ALIGN_BITS, i); + + coder->align_price_count = 0; + return; +} + + +///////////// +// Optimal // +///////////// + +static inline void +make_literal(lzma_optimal *optimal) +{ + optimal->back_prev = UINT32_MAX; + optimal->prev_1_is_literal = false; +} + + +static inline void +make_short_rep(lzma_optimal *optimal) +{ + optimal->back_prev = 0; + optimal->prev_1_is_literal = false; +} + + +#define is_short_rep(optimal) \ + ((optimal).back_prev == 0) + + +static void +backward(lzma_lzma1_encoder *restrict coder, uint32_t *restrict len_res, + uint32_t *restrict back_res, uint32_t cur) +{ + coder->opts_end_index = cur; + + uint32_t pos_mem = coder->opts[cur].pos_prev; + uint32_t back_mem = coder->opts[cur].back_prev; + + do { + if (coder->opts[cur].prev_1_is_literal) { + make_literal(&coder->opts[pos_mem]); + coder->opts[pos_mem].pos_prev = pos_mem - 1; + + if (coder->opts[cur].prev_2) { + coder->opts[pos_mem - 1].prev_1_is_literal + = false; + coder->opts[pos_mem - 1].pos_prev + = coder->opts[cur].pos_prev_2; + coder->opts[pos_mem - 1].back_prev + = coder->opts[cur].back_prev_2; + } + } + + const uint32_t pos_prev = pos_mem; + const uint32_t back_cur = back_mem; + + back_mem = coder->opts[pos_prev].back_prev; + pos_mem = coder->opts[pos_prev].pos_prev; + + coder->opts[pos_prev].back_prev = back_cur; + coder->opts[pos_prev].pos_prev = cur; + cur = pos_prev; + + } while (cur != 0); + + coder->opts_current_index = coder->opts[0].pos_prev; + *len_res = coder->opts[0].pos_prev; + *back_res = coder->opts[0].back_prev; + + return; +} + + +////////// +// Main // +////////// + +static inline uint32_t +helper1(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res, + uint32_t position) +{ + const uint32_t nice_len = mf->nice_len; + + uint32_t len_main; + uint32_t matches_count; + + if (mf->read_ahead == 0) { + len_main = mf_find(mf, &matches_count, coder->matches); + } else { + assert(mf->read_ahead == 1); + len_main = coder->longest_match_length; + matches_count = coder->matches_count; + } + + const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX); + if (buf_avail < 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return UINT32_MAX; + } + + const uint8_t *const buf = mf_ptr(mf) - 1; + + uint32_t rep_lens[REPS]; + uint32_t rep_max_index = 0; + + for (uint32_t i = 0; i < REPS; ++i) { + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + if (not_equal_16(buf, buf_back)) { + rep_lens[i] = 0; + continue; + } + + rep_lens[i] = lzma_memcmplen(buf, buf_back, 2, buf_avail); + + if (rep_lens[i] > rep_lens[rep_max_index]) + rep_max_index = i; + } + + if (rep_lens[rep_max_index] >= nice_len) { + *back_res = rep_max_index; + *len_res = rep_lens[rep_max_index]; + mf_skip(mf, *len_res - 1); + return UINT32_MAX; + } + + + if (len_main >= nice_len) { + *back_res = coder->matches[matches_count - 1].dist + REPS; + *len_res = len_main; + mf_skip(mf, len_main - 1); + return UINT32_MAX; + } + + const uint8_t current_byte = *buf; + const uint8_t match_byte = *(buf - coder->reps[0] - 1); + + if (len_main < 2 && current_byte != match_byte + && rep_lens[rep_max_index] < 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return UINT32_MAX; + } + + coder->opts[0].state = coder->state; + + const uint32_t pos_state = position & coder->pos_mask; + + coder->opts[1].price = rc_bit_0_price( + coder->is_match[coder->state][pos_state]) + + get_literal_price(coder, position, buf[-1], + !is_literal_state(coder->state), + match_byte, current_byte); + + make_literal(&coder->opts[1]); + + const uint32_t match_price = rc_bit_1_price( + coder->is_match[coder->state][pos_state]); + const uint32_t rep_match_price = match_price + + rc_bit_1_price(coder->is_rep[coder->state]); + + if (match_byte == current_byte) { + const uint32_t short_rep_price = rep_match_price + + get_short_rep_price( + coder, coder->state, pos_state); + + if (short_rep_price < coder->opts[1].price) { + coder->opts[1].price = short_rep_price; + make_short_rep(&coder->opts[1]); + } + } + + const uint32_t len_end = my_max(len_main, rep_lens[rep_max_index]); + + if (len_end < 2) { + *back_res = coder->opts[1].back_prev; + *len_res = 1; + return UINT32_MAX; + } + + coder->opts[1].pos_prev = 0; + + for (uint32_t i = 0; i < REPS; ++i) + coder->opts[0].backs[i] = coder->reps[i]; + + uint32_t len = len_end; + do { + coder->opts[len].price = RC_INFINITY_PRICE; + } while (--len >= 2); + + + for (uint32_t i = 0; i < REPS; ++i) { + uint32_t rep_len = rep_lens[i]; + if (rep_len < 2) + continue; + + const uint32_t price = rep_match_price + get_pure_rep_price( + coder, i, coder->state, pos_state); + + do { + const uint32_t cur_and_len_price = price + + get_len_price( + &coder->rep_len_encoder, + rep_len, pos_state); + + if (cur_and_len_price < coder->opts[rep_len].price) { + coder->opts[rep_len].price = cur_and_len_price; + coder->opts[rep_len].pos_prev = 0; + coder->opts[rep_len].back_prev = i; + coder->opts[rep_len].prev_1_is_literal = false; + } + } while (--rep_len >= 2); + } + + + const uint32_t normal_match_price = match_price + + rc_bit_0_price(coder->is_rep[coder->state]); + + len = rep_lens[0] >= 2 ? rep_lens[0] + 1 : 2; + if (len <= len_main) { + uint32_t i = 0; + while (len > coder->matches[i].len) + ++i; + + for(; ; ++len) { + const uint32_t dist = coder->matches[i].dist; + const uint32_t cur_and_len_price = normal_match_price + + get_dist_len_price(coder, + dist, len, pos_state); + + if (cur_and_len_price < coder->opts[len].price) { + coder->opts[len].price = cur_and_len_price; + coder->opts[len].pos_prev = 0; + coder->opts[len].back_prev = dist + REPS; + coder->opts[len].prev_1_is_literal = false; + } + + if (len == coder->matches[i].len) + if (++i == matches_count) + break; + } + } + + return len_end; +} + + +static inline uint32_t +helper2(lzma_lzma1_encoder *coder, uint32_t *reps, const uint8_t *buf, + uint32_t len_end, uint32_t position, const uint32_t cur, + const uint32_t nice_len, const uint32_t buf_avail_full) +{ + uint32_t matches_count = coder->matches_count; + uint32_t new_len = coder->longest_match_length; + uint32_t pos_prev = coder->opts[cur].pos_prev; + lzma_lzma_state state; + + if (coder->opts[cur].prev_1_is_literal) { + --pos_prev; + + if (coder->opts[cur].prev_2) { + state = coder->opts[coder->opts[cur].pos_prev_2].state; + + if (coder->opts[cur].back_prev_2 < REPS) + update_long_rep(state); + else + update_match(state); + + } else { + state = coder->opts[pos_prev].state; + } + + update_literal(state); + + } else { + state = coder->opts[pos_prev].state; + } + + if (pos_prev == cur - 1) { + if (is_short_rep(coder->opts[cur])) + update_short_rep(state); + else + update_literal(state); + } else { + uint32_t pos; + if (coder->opts[cur].prev_1_is_literal + && coder->opts[cur].prev_2) { + pos_prev = coder->opts[cur].pos_prev_2; + pos = coder->opts[cur].back_prev_2; + update_long_rep(state); + } else { + pos = coder->opts[cur].back_prev; + if (pos < REPS) + update_long_rep(state); + else + update_match(state); + } + + if (pos < REPS) { + reps[0] = coder->opts[pos_prev].backs[pos]; + + uint32_t i; + for (i = 1; i <= pos; ++i) + reps[i] = coder->opts[pos_prev].backs[i - 1]; + + for (; i < REPS; ++i) + reps[i] = coder->opts[pos_prev].backs[i]; + + } else { + reps[0] = pos - REPS; + + for (uint32_t i = 1; i < REPS; ++i) + reps[i] = coder->opts[pos_prev].backs[i - 1]; + } + } + + coder->opts[cur].state = state; + + for (uint32_t i = 0; i < REPS; ++i) + coder->opts[cur].backs[i] = reps[i]; + + const uint32_t cur_price = coder->opts[cur].price; + + const uint8_t current_byte = *buf; + const uint8_t match_byte = *(buf - reps[0] - 1); + + const uint32_t pos_state = position & coder->pos_mask; + + const uint32_t cur_and_1_price = cur_price + + rc_bit_0_price(coder->is_match[state][pos_state]) + + get_literal_price(coder, position, buf[-1], + !is_literal_state(state), match_byte, current_byte); + + bool next_is_literal = false; + + if (cur_and_1_price < coder->opts[cur + 1].price) { + coder->opts[cur + 1].price = cur_and_1_price; + coder->opts[cur + 1].pos_prev = cur; + make_literal(&coder->opts[cur + 1]); + next_is_literal = true; + } + + const uint32_t match_price = cur_price + + rc_bit_1_price(coder->is_match[state][pos_state]); + const uint32_t rep_match_price = match_price + + rc_bit_1_price(coder->is_rep[state]); + + if (match_byte == current_byte + && !(coder->opts[cur + 1].pos_prev < cur + && coder->opts[cur + 1].back_prev == 0)) { + + const uint32_t short_rep_price = rep_match_price + + get_short_rep_price(coder, state, pos_state); + + if (short_rep_price <= coder->opts[cur + 1].price) { + coder->opts[cur + 1].price = short_rep_price; + coder->opts[cur + 1].pos_prev = cur; + make_short_rep(&coder->opts[cur + 1]); + next_is_literal = true; + } + } + + if (buf_avail_full < 2) + return len_end; + + const uint32_t buf_avail = my_min(buf_avail_full, nice_len); + + if (!next_is_literal && match_byte != current_byte) { // speed optimization + // try literal + rep0 + const uint8_t *const buf_back = buf - reps[0] - 1; + const uint32_t limit = my_min(buf_avail_full, nice_len + 1); + + const uint32_t len_test = lzma_memcmplen(buf, buf_back, 1, limit) - 1; + + if (len_test >= 2) { + lzma_lzma_state state_2 = state; + update_literal(state_2); + + const uint32_t pos_state_next = (position + 1) & coder->pos_mask; + const uint32_t next_rep_match_price = cur_and_1_price + + rc_bit_1_price(coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + //for (; len_test >= 2; --len_test) { + const uint32_t offset = cur + 1 + len_test; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = false; + } + //} + } + } + + + uint32_t start_len = 2; // speed optimization + + for (uint32_t rep_index = 0; rep_index < REPS; ++rep_index) { + const uint8_t *const buf_back = buf - reps[rep_index] - 1; + if (not_equal_16(buf, buf_back)) + continue; + + uint32_t len_test = lzma_memcmplen(buf, buf_back, 2, buf_avail); + + while (len_end < cur + len_test) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t len_test_temp = len_test; + const uint32_t price = rep_match_price + get_pure_rep_price( + coder, rep_index, state, pos_state); + + do { + const uint32_t cur_and_len_price = price + + get_len_price(&coder->rep_len_encoder, + len_test, pos_state); + + if (cur_and_len_price < coder->opts[cur + len_test].price) { + coder->opts[cur + len_test].price = cur_and_len_price; + coder->opts[cur + len_test].pos_prev = cur; + coder->opts[cur + len_test].back_prev = rep_index; + coder->opts[cur + len_test].prev_1_is_literal = false; + } + } while (--len_test >= 2); + + len_test = len_test_temp; + + if (rep_index == 0) + start_len = len_test + 1; + + + uint32_t len_test_2 = len_test + 1; + const uint32_t limit = my_min(buf_avail_full, + len_test_2 + nice_len); + // NOTE: len_test_2 may be greater than limit so the call to + // lzma_memcmplen() must be done conditionally. + if (len_test_2 < limit) + len_test_2 = lzma_memcmplen(buf, buf_back, len_test_2, limit); + + len_test_2 -= len_test + 1; + + if (len_test_2 >= 2) { + lzma_lzma_state state_2 = state; + update_long_rep(state_2); + + uint32_t pos_state_next = (position + len_test) & coder->pos_mask; + + const uint32_t cur_and_len_literal_price = price + + get_len_price(&coder->rep_len_encoder, + len_test, pos_state) + + rc_bit_0_price(coder->is_match[state_2][pos_state_next]) + + get_literal_price(coder, position + len_test, + buf[len_test - 1], true, + buf_back[len_test], buf[len_test]); + + update_literal(state_2); + + pos_state_next = (position + len_test + 1) & coder->pos_mask; + + const uint32_t next_rep_match_price = cur_and_len_literal_price + + rc_bit_1_price(coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + //for(; len_test_2 >= 2; len_test_2--) { + const uint32_t offset = cur + len_test + 1 + len_test_2; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test_2, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + len_test + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = true; + coder->opts[offset].pos_prev_2 = cur; + coder->opts[offset].back_prev_2 = rep_index; + } + //} + } + } + + + //for (uint32_t len_test = 2; len_test <= new_len; ++len_test) + if (new_len > buf_avail) { + new_len = buf_avail; + + matches_count = 0; + while (new_len > coder->matches[matches_count].len) + ++matches_count; + + coder->matches[matches_count++].len = new_len; + } + + + if (new_len >= start_len) { + const uint32_t normal_match_price = match_price + + rc_bit_0_price(coder->is_rep[state]); + + while (len_end < cur + new_len) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + uint32_t i = 0; + while (start_len > coder->matches[i].len) + ++i; + + for (uint32_t len_test = start_len; ; ++len_test) { + const uint32_t cur_back = coder->matches[i].dist; + uint32_t cur_and_len_price = normal_match_price + + get_dist_len_price(coder, + cur_back, len_test, pos_state); + + if (cur_and_len_price < coder->opts[cur + len_test].price) { + coder->opts[cur + len_test].price = cur_and_len_price; + coder->opts[cur + len_test].pos_prev = cur; + coder->opts[cur + len_test].back_prev + = cur_back + REPS; + coder->opts[cur + len_test].prev_1_is_literal = false; + } + + if (len_test == coder->matches[i].len) { + // Try Match + Literal + Rep0 + const uint8_t *const buf_back = buf - cur_back - 1; + uint32_t len_test_2 = len_test + 1; + const uint32_t limit = my_min(buf_avail_full, + len_test_2 + nice_len); + + // NOTE: len_test_2 may be greater than limit + // so the call to lzma_memcmplen() must be + // done conditionally. + if (len_test_2 < limit) + len_test_2 = lzma_memcmplen(buf, buf_back, + len_test_2, limit); + + len_test_2 -= len_test + 1; + + if (len_test_2 >= 2) { + lzma_lzma_state state_2 = state; + update_match(state_2); + uint32_t pos_state_next + = (position + len_test) & coder->pos_mask; + + const uint32_t cur_and_len_literal_price = cur_and_len_price + + rc_bit_0_price( + coder->is_match[state_2][pos_state_next]) + + get_literal_price(coder, + position + len_test, + buf[len_test - 1], + true, + buf_back[len_test], + buf[len_test]); + + update_literal(state_2); + pos_state_next = (pos_state_next + 1) & coder->pos_mask; + + const uint32_t next_rep_match_price + = cur_and_len_literal_price + + rc_bit_1_price( + coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + // for(; len_test_2 >= 2; --len_test_2) { + const uint32_t offset = cur + len_test + 1 + len_test_2; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test_2, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + len_test + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = true; + coder->opts[offset].pos_prev_2 = cur; + coder->opts[offset].back_prev_2 + = cur_back + REPS; + } + //} + } + + if (++i == matches_count) + break; + } + } + } + + return len_end; +} + + +extern void +lzma_lzma_optimum_normal(lzma_lzma1_encoder *restrict coder, + lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res, + uint32_t position) +{ + // If we have symbols pending, return the next pending symbol. + if (coder->opts_end_index != coder->opts_current_index) { + assert(mf->read_ahead > 0); + *len_res = coder->opts[coder->opts_current_index].pos_prev + - coder->opts_current_index; + *back_res = coder->opts[coder->opts_current_index].back_prev; + coder->opts_current_index = coder->opts[ + coder->opts_current_index].pos_prev; + return; + } + + // Update the price tables. In LZMA SDK <= 4.60 (and possibly later) + // this was done in both initialization function and in the main loop. + // In liblzma they were moved into this single place. + if (mf->read_ahead == 0) { + if (coder->match_price_count >= (1 << 7)) + fill_dist_prices(coder); + + if (coder->align_price_count >= ALIGN_SIZE) + fill_align_prices(coder); + } + + // TODO: This needs quite a bit of cleaning still. But splitting + // the original function into two pieces makes it at least a little + // more readable, since those two parts don't share many variables. + + uint32_t len_end = helper1(coder, mf, back_res, len_res, position); + if (len_end == UINT32_MAX) + return; + + uint32_t reps[REPS]; + memcpy(reps, coder->reps, sizeof(reps)); + + uint32_t cur; + for (cur = 1; cur < len_end; ++cur) { + assert(cur < OPTS); + + coder->longest_match_length = mf_find( + mf, &coder->matches_count, coder->matches); + + if (coder->longest_match_length >= mf->nice_len) + break; + + len_end = helper2(coder, reps, mf_ptr(mf) - 1, len_end, + position + cur, cur, mf->nice_len, + my_min(mf_avail(mf) + 1, OPTS - 1 - cur)); + } + + backward(coder, len_res, back_res, cur); + return; +} diff --git a/src/native/external/xz/src/liblzma/lzma/lzma_encoder_presets.c b/src/native/external/xz/src/liblzma/lzma/lzma_encoder_presets.c new file mode 100644 index 00000000000000..e53483f99582e5 --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/lzma_encoder_presets.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_presets.c +/// \brief Encoder presets +/// \note xz needs this even when only decoding is enabled. +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(lzma_bool) +lzma_lzma_preset(lzma_options_lzma *options, uint32_t preset) +{ + const uint32_t level = preset & LZMA_PRESET_LEVEL_MASK; + const uint32_t flags = preset & ~LZMA_PRESET_LEVEL_MASK; + const uint32_t supported_flags = LZMA_PRESET_EXTREME; + + if (level > 9 || (flags & ~supported_flags)) + return true; + + options->preset_dict = NULL; + options->preset_dict_size = 0; + + options->lc = LZMA_LC_DEFAULT; + options->lp = LZMA_LP_DEFAULT; + options->pb = LZMA_PB_DEFAULT; + + static const uint8_t dict_pow2[] + = { 18, 20, 21, 22, 22, 23, 23, 24, 25, 26 }; + options->dict_size = UINT32_C(1) << dict_pow2[level]; + + if (level <= 3) { + options->mode = LZMA_MODE_FAST; + options->mf = level == 0 ? LZMA_MF_HC3 : LZMA_MF_HC4; + options->nice_len = level <= 1 ? 128 : 273; + static const uint8_t depths[] = { 4, 8, 24, 48 }; + options->depth = depths[level]; + } else { + options->mode = LZMA_MODE_NORMAL; + options->mf = LZMA_MF_BT4; + options->nice_len = level == 4 ? 16 : level == 5 ? 32 : 64; + options->depth = 0; + } + + if (flags & LZMA_PRESET_EXTREME) { + options->mode = LZMA_MODE_NORMAL; + options->mf = LZMA_MF_BT4; + if (level == 3 || level == 5) { + options->nice_len = 192; + options->depth = 0; + } else { + options->nice_len = 273; + options->depth = 512; + } + } + + return false; +} diff --git a/src/native/external/xz/src/liblzma/lzma/lzma_encoder_private.h b/src/native/external/xz/src/liblzma/lzma/lzma_encoder_private.h new file mode 100644 index 00000000000000..eeea5e9c128914 --- /dev/null +++ b/src/native/external/xz/src/liblzma/lzma/lzma_encoder_private.h @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_private.h +/// \brief Private definitions for LZMA encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_ENCODER_PRIVATE_H +#define LZMA_LZMA_ENCODER_PRIVATE_H + +#include "lz_encoder.h" +#include "range_encoder.h" +#include "lzma_common.h" +#include "lzma_encoder.h" + + +// Macro to compare if the first two bytes in two buffers differ. This is +// needed in lzma_lzma_optimum_*() to test if the match is at least +// MATCH_LEN_MIN bytes. Unaligned access gives tiny gain so there's no +// reason to not use it when it is supported. +#ifdef TUKLIB_FAST_UNALIGNED_ACCESS +# define not_equal_16(a, b) (read16ne(a) != read16ne(b)) +#else +# define not_equal_16(a, b) \ + ((a)[0] != (b)[0] || (a)[1] != (b)[1]) +#endif + + +// Optimal - Number of entries in the optimum array. +#define OPTS (1 << 12) + + +typedef struct { + probability choice; + probability choice2; + probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; + probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; + probability high[LEN_HIGH_SYMBOLS]; + + uint32_t prices[POS_STATES_MAX][LEN_SYMBOLS]; + uint32_t table_size; + uint32_t counters[POS_STATES_MAX]; + +} lzma_length_encoder; + + +typedef struct { + lzma_lzma_state state; + + bool prev_1_is_literal; + bool prev_2; + + uint32_t pos_prev_2; + uint32_t back_prev_2; + + uint32_t price; + uint32_t pos_prev; // pos_next; + uint32_t back_prev; + + uint32_t backs[REPS]; + +} lzma_optimal; + + +struct lzma_lzma1_encoder_s { + /// Range encoder + lzma_range_encoder rc; + + /// Uncompressed size (doesn't include possible preset dictionary) + uint64_t uncomp_size; + + /// If non-zero, produce at most this much output. + /// Some input may then be missing from the output. + uint64_t out_limit; + + /// If the above out_limit is non-zero, *uncomp_size_ptr is set to + /// the amount of uncompressed data that we were able to fit + /// in the output buffer. + uint64_t *uncomp_size_ptr; + + /// State + lzma_lzma_state state; + + /// The four most recent match distances + uint32_t reps[REPS]; + + /// Array of match candidates + lzma_match matches[MATCH_LEN_MAX + 1]; + + /// Number of match candidates in matches[] + uint32_t matches_count; + + /// Variable to hold the length of the longest match between calls + /// to lzma_lzma_optimum_*(). + uint32_t longest_match_length; + + /// True if using getoptimumfast + bool fast_mode; + + /// True if the encoder has been initialized by encoding the first + /// byte as a literal. + bool is_initialized; + + /// True if the range encoder has been flushed, but not all bytes + /// have been written to the output buffer yet. + bool is_flushed; + + /// True if end of payload marker will be written. + bool use_eopm; + + uint32_t pos_mask; ///< (1 << pos_bits) - 1 + uint32_t literal_context_bits; + uint32_t literal_mask; + + // These are the same as in lzma_decoder.c. See comments there. + probability literal[LITERAL_CODERS_MAX * LITERAL_CODER_SIZE]; + probability is_match[STATES][POS_STATES_MAX]; + probability is_rep[STATES]; + probability is_rep0[STATES]; + probability is_rep1[STATES]; + probability is_rep2[STATES]; + probability is_rep0_long[STATES][POS_STATES_MAX]; + probability dist_slot[DIST_STATES][DIST_SLOTS]; + probability dist_special[FULL_DISTANCES - DIST_MODEL_END]; + probability dist_align[ALIGN_SIZE]; + + // These are the same as in lzma_decoder.c except that the encoders + // include also price tables. + lzma_length_encoder match_len_encoder; + lzma_length_encoder rep_len_encoder; + + // Price tables + uint32_t dist_slot_prices[DIST_STATES][DIST_SLOTS]; + uint32_t dist_prices[DIST_STATES][FULL_DISTANCES]; + uint32_t dist_table_size; + uint32_t match_price_count; + + uint32_t align_prices[ALIGN_SIZE]; + uint32_t align_price_count; + + // Optimal + uint32_t opts_end_index; + uint32_t opts_current_index; + lzma_optimal opts[OPTS]; +}; + + +extern void lzma_lzma_optimum_fast( + lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res); + +extern void lzma_lzma_optimum_normal(lzma_lzma1_encoder *restrict coder, + lzma_mf *restrict mf, uint32_t *restrict back_res, + uint32_t *restrict len_res, uint32_t position); + +#endif diff --git a/src/native/external/xz/src/liblzma/rangecoder/Makefile.inc b/src/native/external/xz/src/liblzma/rangecoder/Makefile.inc new file mode 100644 index 00000000000000..9a00e6e15531ed --- /dev/null +++ b/src/native/external/xz/src/liblzma/rangecoder/Makefile.inc @@ -0,0 +1,17 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +EXTRA_DIST += rangecoder/price_tablegen.c + +liblzma_la_SOURCES += rangecoder/range_common.h + +if COND_ENCODER_LZMA1 +liblzma_la_SOURCES += \ + rangecoder/range_encoder.h \ + rangecoder/price.h \ + rangecoder/price_table.c +endif + +if COND_DECODER_LZMA1 +liblzma_la_SOURCES += rangecoder/range_decoder.h +endif diff --git a/src/native/external/xz/src/liblzma/rangecoder/price.h b/src/native/external/xz/src/liblzma/rangecoder/price.h new file mode 100644 index 00000000000000..cce6bdae5f93af --- /dev/null +++ b/src/native/external/xz/src/liblzma/rangecoder/price.h @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file price.h +/// \brief Probability price calculation +// +// Author: Igor Pavlov +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_PRICE_H +#define LZMA_PRICE_H + + +#define RC_MOVE_REDUCING_BITS 4 +#define RC_BIT_PRICE_SHIFT_BITS 4 +#define RC_PRICE_TABLE_SIZE (RC_BIT_MODEL_TOTAL >> RC_MOVE_REDUCING_BITS) + +#define RC_INFINITY_PRICE (UINT32_C(1) << 30) + + +/// Lookup table for the inline functions defined in this file. +lzma_attr_visibility_hidden +extern const uint8_t lzma_rc_prices[RC_PRICE_TABLE_SIZE]; + + +static inline uint32_t +rc_bit_price(const probability prob, const uint32_t bit) +{ + return lzma_rc_prices[(prob ^ ((UINT32_C(0) - bit) + & (RC_BIT_MODEL_TOTAL - 1))) >> RC_MOVE_REDUCING_BITS]; +} + + +static inline uint32_t +rc_bit_0_price(const probability prob) +{ + return lzma_rc_prices[prob >> RC_MOVE_REDUCING_BITS]; +} + + +static inline uint32_t +rc_bit_1_price(const probability prob) +{ + return lzma_rc_prices[(prob ^ (RC_BIT_MODEL_TOTAL - 1)) + >> RC_MOVE_REDUCING_BITS]; +} + + +static inline uint32_t +rc_bittree_price(const probability *const probs, + const uint32_t bit_levels, uint32_t symbol) +{ + uint32_t price = 0; + symbol += UINT32_C(1) << bit_levels; + + do { + const uint32_t bit = symbol & 1; + symbol >>= 1; + price += rc_bit_price(probs[symbol], bit); + } while (symbol != 1); + + return price; +} + + +static inline uint32_t +rc_bittree_reverse_price(const probability *const probs, + uint32_t bit_levels, uint32_t symbol) +{ + uint32_t price = 0; + uint32_t model_index = 1; + + do { + const uint32_t bit = symbol & 1; + symbol >>= 1; + price += rc_bit_price(probs[model_index], bit); + model_index = (model_index << 1) + bit; + } while (--bit_levels != 0); + + return price; +} + + +static inline uint32_t +rc_direct_price(const uint32_t bits) +{ + return bits << RC_BIT_PRICE_SHIFT_BITS; +} + +#endif diff --git a/src/native/external/xz/src/liblzma/rangecoder/price_table.c b/src/native/external/xz/src/liblzma/rangecoder/price_table.c new file mode 100644 index 00000000000000..c33433f718ca32 --- /dev/null +++ b/src/native/external/xz/src/liblzma/rangecoder/price_table.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: 0BSD + +// This file has been generated by price_tablegen.c. + +#include "range_encoder.h" + +const uint8_t lzma_rc_prices[RC_PRICE_TABLE_SIZE] = { + 128, 103, 91, 84, 78, 73, 69, 66, + 63, 61, 58, 56, 54, 52, 51, 49, + 48, 46, 45, 44, 43, 42, 41, 40, + 39, 38, 37, 36, 35, 34, 34, 33, + 32, 31, 31, 30, 29, 29, 28, 28, + 27, 26, 26, 25, 25, 24, 24, 23, + 23, 22, 22, 22, 21, 21, 20, 20, + 19, 19, 19, 18, 18, 17, 17, 17, + 16, 16, 16, 15, 15, 15, 14, 14, + 14, 13, 13, 13, 12, 12, 12, 11, + 11, 11, 11, 10, 10, 10, 10, 9, + 9, 9, 9, 8, 8, 8, 8, 7, + 7, 7, 7, 6, 6, 6, 6, 5, + 5, 5, 5, 5, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 2, 2, 2, + 2, 2, 2, 1, 1, 1, 1, 1 +}; diff --git a/src/native/external/xz/src/liblzma/rangecoder/price_tablegen.c b/src/native/external/xz/src/liblzma/rangecoder/price_tablegen.c new file mode 100644 index 00000000000000..4b6ca37efadfcb --- /dev/null +++ b/src/native/external/xz/src/liblzma/rangecoder/price_tablegen.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file price_tablegen.c +/// \brief Probability price table generator +/// +/// Compiling: gcc -std=c99 -o price_tablegen price_tablegen.c +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include + +// Make it compile without common.h. +#define BUILDING_PRICE_TABLEGEN +#define lzma_attr_visibility_hidden + +#include "range_common.h" +#include "price.h" + + +static uint32_t rc_prices[RC_PRICE_TABLE_SIZE]; + + +static void +init_price_table(void) +{ + for (uint32_t i = (UINT32_C(1) << RC_MOVE_REDUCING_BITS) / 2; + i < RC_BIT_MODEL_TOTAL; + i += (UINT32_C(1) << RC_MOVE_REDUCING_BITS)) { + const uint32_t cycles_bits = RC_BIT_PRICE_SHIFT_BITS; + uint32_t w = i; + uint32_t bit_count = 0; + + for (uint32_t j = 0; j < cycles_bits; ++j) { + w *= w; + bit_count <<= 1; + + while (w >= (UINT32_C(1) << 16)) { + w >>= 1; + ++bit_count; + } + } + + rc_prices[i >> RC_MOVE_REDUCING_BITS] + = (RC_BIT_MODEL_TOTAL_BITS << cycles_bits) + - 15 - bit_count; + } + + return; +} + + +static void +print_price_table(void) +{ + // Split the SPDX string so that it won't accidentally match + // when tools search for the string. + printf("// SPDX" "-License-Identifier" ": 0BSD\n\n" + "// This file has been generated by price_tablegen.c.\n\n" + "#include \"range_encoder.h\"\n\n" + "const uint8_t lzma_rc_prices[" + "RC_PRICE_TABLE_SIZE] = {"); + + const size_t array_size = sizeof(lzma_rc_prices) + / sizeof(lzma_rc_prices[0]); + for (size_t i = 0; i < array_size; ++i) { + if (i % 8 == 0) + printf("\n\t"); + + printf("%4" PRIu32, rc_prices[i]); + + if (i != array_size - 1) + printf(","); + } + + printf("\n};\n"); + + return; +} + + +int +main(void) +{ + init_price_table(); + print_price_table(); + return 0; +} diff --git a/src/native/external/xz/src/liblzma/rangecoder/range_common.h b/src/native/external/xz/src/liblzma/rangecoder/range_common.h new file mode 100644 index 00000000000000..ac4dbe196f5010 --- /dev/null +++ b/src/native/external/xz/src/liblzma/rangecoder/range_common.h @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_common.h +/// \brief Common things for range encoder and decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_COMMON_H +#define LZMA_RANGE_COMMON_H + +// Skip common.h if building price_tablegen.c. +#ifndef BUILDING_PRICE_TABLEGEN +# include "common.h" +#endif + + +/////////////// +// Constants // +/////////////// + +#define RC_SHIFT_BITS 8 +#define RC_TOP_BITS 24 +#define RC_TOP_VALUE (UINT32_C(1) << RC_TOP_BITS) +#define RC_BIT_MODEL_TOTAL_BITS 11 +#define RC_BIT_MODEL_TOTAL (UINT32_C(1) << RC_BIT_MODEL_TOTAL_BITS) +#define RC_MOVE_BITS 5 + + +//////////// +// Macros // +//////////// + +// Resets the probability so that both 0 and 1 have probability of 50 % +#define bit_reset(prob) \ + prob = RC_BIT_MODEL_TOTAL >> 1 + +// This does the same for a complete bit tree. +// (A tree represented as an array.) +#define bittree_reset(probs, bit_levels) \ + for (uint32_t bt_i = 0; bt_i < (1 << (bit_levels)); ++bt_i) \ + bit_reset((probs)[bt_i]) + + +////////////////////// +// Type definitions // +////////////////////// + +/// \brief Type of probabilities used with range coder +/// +/// This needs to be at least 12-bit integer, so uint16_t is a logical choice. +/// However, on some architecture and compiler combinations, a bigger type +/// may give better speed, because the probability variables are accessed +/// a lot. On the other hand, bigger probability type increases cache +/// footprint, since there are 2 to 14 thousand probability variables in +/// LZMA (assuming the limit of lc + lp <= 4; with lc + lp <= 12 there +/// would be about 1.5 million variables). +/// +/// With malicious files, the initialization speed of the LZMA decoder can +/// become important. In that case, smaller probability variables mean that +/// there is less bytes to write to RAM, which makes initialization faster. +/// With big probability type, the initialization can become so slow that it +/// can be a problem e.g. for email servers doing virus scanning. +/// +/// I will be sticking to uint16_t unless some specific architectures +/// are *much* faster (20-50 %) with uint32_t. +/// +/// Update in 2024: The branchless C and x86-64 assembly was written so that +/// probability is assumed to be uint16_t. (In contrast, LZMA SDK 23.01 +/// assembly supports both types.) +typedef uint16_t probability; + +#endif diff --git a/src/native/external/xz/src/liblzma/rangecoder/range_decoder.h b/src/native/external/xz/src/liblzma/rangecoder/range_decoder.h new file mode 100644 index 00000000000000..77cfaf7857d9b6 --- /dev/null +++ b/src/native/external/xz/src/liblzma/rangecoder/range_decoder.h @@ -0,0 +1,966 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_decoder.h +/// \brief Range Decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_DECODER_H +#define LZMA_RANGE_DECODER_H + +#include "range_common.h" + + +// Choose the range decoder variants to use using a bitmask. +// If no bits are set, only the basic version is used. +// If more than one version is selected for the same feature, +// the last one on the list below is used. +// +// Bitwise-or of the following enable branchless C versions: +// 0x01 normal bittrees +// 0x02 fixed-sized reverse bittrees +// 0x04 variable-sized reverse bittrees (not faster) +// 0x08 matched literal (not faster) +// +// GCC & Clang compatible x86-64 inline assembly: +// 0x010 normal bittrees +// 0x020 fixed-sized reverse bittrees +// 0x040 variable-sized reverse bittrees +// 0x080 matched literal +// 0x100 direct bits +// +// The default can be overridden at build time by defining +// LZMA_RANGE_DECODER_CONFIG to the desired mask. +// +// 2024-02-22: Feedback from benchmarks: +// - Brancless C (0x003) can be better than basic on x86-64 but often it's +// slightly worse on other archs. Since asm is much better on x86-64, +// branchless C is not used at all. +// - With x86-64 asm, there are slight differences between GCC and Clang +// and different processors. Overall 0x1F0 seems to be the best choice. +#ifndef LZMA_RANGE_DECODER_CONFIG +# if defined(__x86_64__) && !defined(__ILP32__) \ + && !defined(__NVCOMPILER) \ + && (defined(__GNUC__) || defined(__clang__)) +# define LZMA_RANGE_DECODER_CONFIG 0x1F0 +# else +# define LZMA_RANGE_DECODER_CONFIG 0 +# endif +#endif + + +// Negative RC_BIT_MODEL_TOTAL but the lowest RC_MOVE_BITS are flipped. +// This is useful for updating probability variables in branchless decoding: +// +// uint32_t decoded_bit = ...; +// probability tmp = RC_BIT_MODEL_OFFSET; +// tmp &= decoded_bit - 1; +// prob -= (prob + tmp) >> RC_MOVE_BITS; +#define RC_BIT_MODEL_OFFSET \ + ((UINT32_C(1) << RC_MOVE_BITS) - 1 - RC_BIT_MODEL_TOTAL) + + +typedef struct { + uint32_t range; + uint32_t code; + uint32_t init_bytes_left; +} lzma_range_decoder; + + +/// Reads the first five bytes to initialize the range decoder. +static inline lzma_ret +rc_read_init(lzma_range_decoder *rc, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) +{ + while (rc->init_bytes_left > 0) { + if (*in_pos == in_size) + return LZMA_OK; + + // The first byte is always 0x00. It could have been omitted + // in LZMA2 but it wasn't, so one byte is wasted in every + // LZMA2 chunk. + if (rc->init_bytes_left == 5 && in[*in_pos] != 0x00) + return LZMA_DATA_ERROR; + + rc->code = (rc->code << 8) | in[*in_pos]; + ++*in_pos; + --rc->init_bytes_left; + } + + return LZMA_STREAM_END; +} + + +/// Makes local copies of range decoder and *in_pos variables. Doing this +/// improves speed significantly. The range decoder macros expect also +/// variables 'in' and 'in_size' to be defined. +#define rc_to_local(range_decoder, in_pos, fast_mode_in_required) \ + lzma_range_decoder rc = range_decoder; \ + const uint8_t *rc_in_ptr = in + (in_pos); \ + const uint8_t *rc_in_end = in + in_size; \ + const uint8_t *rc_in_fast_end \ + = (rc_in_end - rc_in_ptr) <= (fast_mode_in_required) \ + ? rc_in_ptr \ + : rc_in_end - (fast_mode_in_required); \ + (void)rc_in_fast_end; /* Silence a warning with HAVE_SMALL. */ \ + uint32_t rc_bound + + +/// Evaluates to true if there is enough input remaining to use fast mode. +#define rc_is_fast_allowed() (rc_in_ptr < rc_in_fast_end) + + +/// Stores the local copes back to the range decoder structure. +#define rc_from_local(range_decoder, in_pos) \ +do { \ + range_decoder = rc; \ + in_pos = (size_t)(rc_in_ptr - in); \ +} while (0) + + +/// Resets the range decoder structure. +#define rc_reset(range_decoder) \ +do { \ + (range_decoder).range = UINT32_MAX; \ + (range_decoder).code = 0; \ + (range_decoder).init_bytes_left = 5; \ +} while (0) + + +/// When decoding has been properly finished, rc.code is always zero unless +/// the input stream is corrupt. So checking this can catch some corrupt +/// files especially if they don't have any other integrity check. +#define rc_is_finished(range_decoder) \ + ((range_decoder).code == 0) + + +// Read the next input byte if needed. +#define rc_normalize() \ +do { \ + if (rc.range < RC_TOP_VALUE) { \ + rc.range <<= RC_SHIFT_BITS; \ + rc.code = (rc.code << RC_SHIFT_BITS) | *rc_in_ptr++; \ + } \ +} while (0) + + +/// If more input is needed but there is +/// no more input available, "goto out" is used to jump out of the main +/// decoder loop. The "_safe" macros are used in the Resumable decoder +/// mode in order to save the sequence to continue decoding from that +/// point later. +#define rc_normalize_safe(seq) \ +do { \ + if (rc.range < RC_TOP_VALUE) { \ + if (rc_in_ptr == rc_in_end) { \ + coder->sequence = seq; \ + goto out; \ + } \ + rc.range <<= RC_SHIFT_BITS; \ + rc.code = (rc.code << RC_SHIFT_BITS) | *rc_in_ptr++; \ + } \ +} while (0) + + +/// Start decoding a bit. This must be used together with rc_update_0() +/// and rc_update_1(): +/// +/// rc_if_0(prob) { +/// rc_update_0(prob); +/// // Do something +/// } else { +/// rc_update_1(prob); +/// // Do something else +/// } +/// +#define rc_if_0(prob) \ + rc_normalize(); \ + rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \ + if (rc.code < rc_bound) + + +#define rc_if_0_safe(prob, seq) \ + rc_normalize_safe(seq); \ + rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \ + if (rc.code < rc_bound) + + +/// Update the range decoder state and the used probability variable to +/// match a decoded bit of 0. +/// +/// The x86-64 assembly uses the commented method but it seems that, +/// at least on x86-64, the first version is slightly faster as C code. +#define rc_update_0(prob) \ +do { \ + rc.range = rc_bound; \ + prob += (RC_BIT_MODEL_TOTAL - (prob)) >> RC_MOVE_BITS; \ + /* prob -= ((prob) + RC_BIT_MODEL_OFFSET) >> RC_MOVE_BITS; */ \ +} while (0) + + +/// Update the range decoder state and the used probability variable to +/// match a decoded bit of 1. +#define rc_update_1(prob) \ +do { \ + rc.range -= rc_bound; \ + rc.code -= rc_bound; \ + prob -= (prob) >> RC_MOVE_BITS; \ +} while (0) + + +/// Decodes one bit and runs action0 or action1 depending on the decoded bit. +/// This macro is used as the last step in bittree reverse decoders since +/// those don't use "symbol" for anything else than indexing the probability +/// arrays. +#define rc_bit_last(prob, action0, action1) \ +do { \ + rc_if_0(prob) { \ + rc_update_0(prob); \ + action0; \ + } else { \ + rc_update_1(prob); \ + action1; \ + } \ +} while (0) + + +#define rc_bit_last_safe(prob, action0, action1, seq) \ +do { \ + rc_if_0_safe(prob, seq) { \ + rc_update_0(prob); \ + action0; \ + } else { \ + rc_update_1(prob); \ + action1; \ + } \ +} while (0) + + +/// Decodes one bit, updates "symbol", and runs action0 or action1 depending +/// on the decoded bit. +#define rc_bit(prob, action0, action1) \ + rc_bit_last(prob, \ + symbol <<= 1; action0, \ + symbol = (symbol << 1) + 1; action1) + + +#define rc_bit_safe(prob, action0, action1, seq) \ + rc_bit_last_safe(prob, \ + symbol <<= 1; action0, \ + symbol = (symbol << 1) + 1; action1, \ + seq) + +// Unroll fixed-sized bittree decoding. +// +// A compile-time constant in final_add can be used to get rid of the high bit +// from symbol that is used for the array indexing (1U << bittree_bits). +// final_add may also be used to add offset to the result (LZMA length +// decoder does that). +// +// The reason to have final_add here is that in the asm code the addition +// can be done for free: in x86-64 there is SBB instruction with -1 as +// the immediate value, and final_add is combined with that value. +#define rc_bittree_bit(prob) \ + rc_bit(prob, , ) + +#define rc_bittree3(probs, final_add) \ +do { \ + symbol = 1; \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + symbol += (uint32_t)(final_add); \ +} while (0) + +#define rc_bittree6(probs, final_add) \ +do { \ + symbol = 1; \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + symbol += (uint32_t)(final_add); \ +} while (0) + +#define rc_bittree8(probs, final_add) \ +do { \ + symbol = 1; \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + rc_bittree_bit(probs[symbol]); \ + symbol += (uint32_t)(final_add); \ +} while (0) + + +// Fixed-sized reverse bittree +#define rc_bittree_rev4(probs) \ +do { \ + symbol = 0; \ + rc_bit_last(probs[symbol + 1], , symbol += 1); \ + rc_bit_last(probs[symbol + 2], , symbol += 2); \ + rc_bit_last(probs[symbol + 4], , symbol += 4); \ + rc_bit_last(probs[symbol + 8], , symbol += 8); \ +} while (0) + + +// Decode one bit from variable-sized reverse bittree. The loop is done +// in the code that uses this macro. This could be changed if the assembly +// version benefited from having the loop done in assembly but it didn't +// seem so in early 2024. +// +// Also, if the loop was done here, the loop counter would likely be local +// to the macro so that it wouldn't modify yet another input variable. +// If a _safe version of a macro with a loop was done then a modifiable +// input variable couldn't be avoided though. +#define rc_bit_add_if_1(probs, dest, value_to_add_if_1) \ + rc_bit(probs[symbol], \ + , \ + dest += value_to_add_if_1) + + +// Matched literal +#define decode_with_match_bit \ + t_match_byte <<= 1; \ + t_match_bit = t_match_byte & t_offset; \ + t_subcoder_index = t_offset + t_match_bit + symbol; \ + rc_bit(probs[t_subcoder_index], \ + t_offset &= ~t_match_bit, \ + t_offset &= t_match_bit) + +#define rc_matched_literal(probs_base_var, match_byte) \ +do { \ + uint32_t t_match_byte = (match_byte); \ + uint32_t t_match_bit; \ + uint32_t t_subcoder_index; \ + uint32_t t_offset = 0x100; \ + symbol = 1; \ + decode_with_match_bit; \ + decode_with_match_bit; \ + decode_with_match_bit; \ + decode_with_match_bit; \ + decode_with_match_bit; \ + decode_with_match_bit; \ + decode_with_match_bit; \ + decode_with_match_bit; \ +} while (0) + + +/// Decode a bit without using a probability. +// +// NOTE: GCC 13 and Clang/LLVM 16 can, at least on x86-64, optimize the bound +// calculation to use an arithmetic right shift so there's no need to provide +// the alternative code which, according to C99/C11/C23 6.3.1.3-p3 isn't +// perfectly portable: rc_bound = (uint32_t)((int32_t)rc.code >> 31); +#define rc_direct(dest, count_var) \ +do { \ + dest = (dest << 1) + 1; \ + rc_normalize(); \ + rc.range >>= 1; \ + rc.code -= rc.range; \ + rc_bound = UINT32_C(0) - (rc.code >> 31); \ + dest += rc_bound; \ + rc.code += rc.range & rc_bound; \ +} while (--count_var > 0) + + + +#define rc_direct_safe(dest, count_var, seq) \ +do { \ + rc_normalize_safe(seq); \ + rc.range >>= 1; \ + rc.code -= rc.range; \ + rc_bound = UINT32_C(0) - (rc.code >> 31); \ + rc.code += rc.range & rc_bound; \ + dest = (dest << 1) + (rc_bound + 1); \ +} while (--count_var > 0) + + +////////////////// +// Branchless C // +////////////////// + +/// Decode a bit using a branchless method. This reduces the number of +/// mispredicted branches and thus can improve speed. +#define rc_c_bit(prob, action_bit, action_neg) \ +do { \ + probability *p = &(prob); \ + rc_normalize(); \ + rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * *p; \ + uint32_t rc_mask = rc.code >= rc_bound; /* rc_mask = decoded bit */ \ + action_bit; /* action when rc_mask is 0 or 1 */ \ + /* rc_mask becomes 0 if bit is 0 and 0xFFFFFFFF if bit is 1: */ \ + rc_mask = 0U - rc_mask; \ + rc.range &= rc_mask; /* If bit 0: set rc.range = 0 */ \ + rc_bound ^= rc_mask; \ + rc_bound -= rc_mask; /* If bit 1: rc_bound = 0U - rc_bound */ \ + rc.range += rc_bound; \ + rc_bound &= rc_mask; \ + rc.code += rc_bound; \ + action_neg; /* action when rc_mask is 0 or 0xFFFFFFFF */ \ + rc_mask = ~rc_mask; /* If bit 0: all bits are set in rc_mask */ \ + rc_mask &= RC_BIT_MODEL_OFFSET; \ + *p -= (*p + rc_mask) >> RC_MOVE_BITS; \ +} while (0) + + +// Testing on x86-64 give an impression that only the normal bittrees and +// the fixed-sized reverse bittrees are worth the branchless C code. +// It should be tested on other archs for which there isn't assembly code +// in this file. + +// Using addition in "(symbol << 1) + rc_mask" allows use of x86 LEA +// or RISC-V SH1ADD instructions. Compilers might infer it from +// "(symbol << 1) | rc_mask" too if they see that mask is 0 or 1 but +// the use of addition doesn't require such analysis from compilers. +#if LZMA_RANGE_DECODER_CONFIG & 0x01 +#undef rc_bittree_bit +#define rc_bittree_bit(prob) \ + rc_c_bit(prob, \ + symbol = (symbol << 1) + rc_mask, \ + ) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x01 + +#if LZMA_RANGE_DECODER_CONFIG & 0x02 +#undef rc_bittree_rev4 +#define rc_bittree_rev4(probs) \ +do { \ + symbol = 0; \ + rc_c_bit(probs[symbol + 1], symbol += rc_mask, ); \ + rc_c_bit(probs[symbol + 2], symbol += rc_mask << 1, ); \ + rc_c_bit(probs[symbol + 4], symbol += rc_mask << 2, ); \ + rc_c_bit(probs[symbol + 8], symbol += rc_mask << 3, ); \ +} while (0) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x02 + +#if LZMA_RANGE_DECODER_CONFIG & 0x04 +#undef rc_bit_add_if_1 +#define rc_bit_add_if_1(probs, dest, value_to_add_if_1) \ + rc_c_bit(probs[symbol], \ + symbol = (symbol << 1) + rc_mask, \ + dest += (value_to_add_if_1) & rc_mask) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x04 + + +#if LZMA_RANGE_DECODER_CONFIG & 0x08 +#undef decode_with_match_bit +#define decode_with_match_bit \ + t_match_byte <<= 1; \ + t_match_bit = t_match_byte & t_offset; \ + t_subcoder_index = t_offset + t_match_bit + symbol; \ + rc_c_bit(probs[t_subcoder_index], \ + symbol = (symbol << 1) + rc_mask, \ + t_offset &= ~t_match_bit ^ rc_mask) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x08 + + +//////////// +// x86-64 // +//////////// + +#if LZMA_RANGE_DECODER_CONFIG & 0x1F0 + +// rc_asm_y and rc_asm_n are used as arguments to macros to control which +// strings to include or omit. +#define rc_asm_y(str) str +#define rc_asm_n(str) + +// There are a few possible variations for normalization. +// This is the smallest variant which is also used by LZMA SDK. +// +// - This has partial register write (the MOV from (%[in_ptr])). +// +// - INC saves one byte in code size over ADD. False dependency on +// partial flags from INC shouldn't become a problem on any processor +// because the instructions after normalization don't read the flags +// until SUB which sets all flags. +// +#define rc_asm_normalize \ + "cmp %[top_value], %[range]\n\t" \ + "jae 1f\n\t" \ + "shl %[shift_bits], %[code]\n\t" \ + "mov (%[in_ptr]), %b[code]\n\t" \ + "shl %[shift_bits], %[range]\n\t" \ + "inc %[in_ptr]\n" \ + "1:\n" + +// rc_asm_calc(prob) is roughly equivalent to the C version of rc_if_0(prob)... +// +// rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); +// if (rc.code < rc_bound) +// +// ...but the bound is stored in "range": +// +// t0 = range; +// range = (range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); +// t0 -= range; +// t1 = code; +// code -= range; +// +// The carry flag (CF) from the last subtraction holds the negation of +// the decoded bit (if CF==0 then the decoded bit is 1). +// The values in t0 and t1 are needed for rc_update_0(prob) and +// rc_update_1(prob). If the bit is 0, rc_update_0(prob)... +// +// rc.range = rc_bound; +// +// ...has already been done but the "code -= range" has to be reverted using +// the old value stored in t1. (Also, prob needs to be updated.) +// +// If the bit is 1, rc_update_1(prob)... +// +// rc.range -= rc_bound; +// rc.code -= rc_bound; +// +// ...is already done for "code" but the value for "range" needs to be taken +// from t0. (Also, prob needs to be updated here as well.) +// +// The assignments from t0 and t1 can be done in a branchless manner with CMOV +// after the instructions from this macro. The CF from SUB tells which moves +// are needed. +#define rc_asm_calc(prob) \ + "mov %[range], %[t0]\n\t" \ + "shr %[bit_model_total_bits], %[range]\n\t" \ + "imul %[" prob "], %[range]\n\t" \ + "sub %[range], %[t0]\n\t" \ + "mov %[code], %[t1]\n\t" \ + "sub %[range], %[code]\n\t" + +// Also, prob needs to be updated: The update math depends on the decoded bit. +// It can be expressed in a few slightly different ways but this is fairly +// convenient here: +// +// prob -= (prob + (bit ? 0 : RC_BIT_MODEL_OFFSET)) >> RC_MOVE_BITS; +// +// To do it in branchless way when the negation of the decoded bit is in CF, +// both "prob" and "prob + RC_BIT_MODEL_OFFSET" are needed. Then the desired +// value can be picked with CMOV. The addition can be done using LEA without +// affecting CF. +// +// (This prob update method is a tiny bit different from LZMA SDK 23.01. +// In the LZMA SDK a single register is reserved solely for a constant to +// be used with CMOV when updating prob. That is fine since there are enough +// free registers to do so. The method used here uses one fewer register, +// which is valuable with inline assembly.) +// +// * * * +// +// In bittree decoding, each (unrolled) loop iteration decodes one bit +// and needs one prob variable. To make it faster, the prob variable of +// the iteration N+1 is loaded during iteration N. There are two possible +// prob variables to choose from for N+1. Both are loaded from memory and +// the correct one is chosen with CMOV using the same CF as is used for +// other things described above. +// +// This preloading/prefetching requires an extra register. To avoid +// useless moves from "preloaded prob register" to "current prob register", +// the macros swap between the two registers for odd and even iterations. +// +// * * * +// +// Finally, the decoded bit has to be stored in "symbol". Since the negation +// of the bit is in CF, this can be done with SBB: symbol -= CF - 1. That is, +// if the decoded bit is 0 (CF==1) the operation is a no-op "symbol -= 0" +// and when bit is 1 (CF==0) the operation is "symbol -= 0 - 1" which is +// the same as "symbol += 1". +// +// The instructions for all things are intertwined for a few reasons: +// - freeing temporary registers for new use +// - not modifying CF too early +// - instruction scheduling +// +// The first and last iterations can cheat a little. For example, +// on the first iteration "symbol" is known to start from 1 so it +// doesn't need to be read; it can even be immediately initialized +// to 2 to prepare for the second iteration of the loop. +// +// * * * +// +// a = number of the current prob variable (0 or 1) +// b = number of the next prob variable (1 or 0) +// *_only = rc_asm_y or _n to include or exclude code marked with them +#define rc_asm_bittree(a, b, first_only, middle_only, last_only) \ + first_only( \ + "movzwl 2(%[probs_base]), %[prob" #a "]\n\t" \ + "mov $2, %[symbol]\n\t" \ + "movzwl 4(%[probs_base]), %[prob" #b "]\n\t" \ + ) \ + middle_only( \ + /* Note the scaling of 4 instead of 2: */ \ + "movzwl (%[probs_base], %q[symbol], 4), %[prob" #b "]\n\t" \ + ) \ + last_only( \ + "add %[symbol], %[symbol]\n\t" \ + ) \ + \ + rc_asm_normalize \ + rc_asm_calc("prob" #a) \ + \ + "cmovae %[t0], %[range]\n\t" \ + \ + first_only( \ + "movzwl 6(%[probs_base]), %[t0]\n\t" \ + "cmovae %[t0], %[prob" #b "]\n\t" \ + ) \ + middle_only( \ + "movzwl 2(%[probs_base], %q[symbol], 4), %[t0]\n\t" \ + "lea (%q[symbol], %q[symbol]), %[symbol]\n\t" \ + "cmovae %[t0], %[prob" #b "]\n\t" \ + ) \ + \ + "lea %c[bit_model_offset](%q[prob" #a "]), %[t0]\n\t" \ + "cmovb %[t1], %[code]\n\t" \ + "mov %[symbol], %[t1]\n\t" \ + "cmovae %[prob" #a "], %[t0]\n\t" \ + \ + first_only( \ + "sbb $-1, %[symbol]\n\t" \ + ) \ + middle_only( \ + "sbb $-1, %[symbol]\n\t" \ + ) \ + last_only( \ + "sbb %[last_sbb], %[symbol]\n\t" \ + ) \ + \ + "shr %[move_bits], %[t0]\n\t" \ + "sub %[t0], %[prob" #a "]\n\t" \ + /* Scaling of 1 instead of 2 because symbol <<= 1. */ \ + "mov %w[prob" #a "], (%[probs_base], %q[t1], 1)\n\t" + +// NOTE: The order of variables in __asm__ can affect speed and code size. +#define rc_asm_bittree_n(probs_base_var, final_add, asm_str) \ +do { \ + uint32_t t0; \ + uint32_t t1; \ + uint32_t t_prob0; \ + uint32_t t_prob1; \ + \ + __asm__( \ + asm_str \ + : \ + [range] "+&r"(rc.range), \ + [code] "+&r"(rc.code), \ + [t0] "=&r"(t0), \ + [t1] "=&r"(t1), \ + [prob0] "=&r"(t_prob0), \ + [prob1] "=&r"(t_prob1), \ + [symbol] "=&r"(symbol), \ + [in_ptr] "+&r"(rc_in_ptr) \ + : \ + [probs_base] "r"(probs_base_var), \ + [last_sbb] "n"(-1 - (final_add)), \ + [top_value] "n"(RC_TOP_VALUE), \ + [shift_bits] "n"(RC_SHIFT_BITS), \ + [bit_model_total_bits] "n"(RC_BIT_MODEL_TOTAL_BITS), \ + [bit_model_offset] "n"(RC_BIT_MODEL_OFFSET), \ + [move_bits] "n"(RC_MOVE_BITS) \ + : \ + "cc", "memory"); \ +} while (0) + + +#if LZMA_RANGE_DECODER_CONFIG & 0x010 +#undef rc_bittree3 +#define rc_bittree3(probs_base_var, final_add) \ + rc_asm_bittree_n(probs_base_var, final_add, \ + rc_asm_bittree(0, 1, rc_asm_y, rc_asm_n, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(0, 1, rc_asm_n, rc_asm_n, rc_asm_y) \ + ) + +#undef rc_bittree6 +#define rc_bittree6(probs_base_var, final_add) \ + rc_asm_bittree_n(probs_base_var, final_add, \ + rc_asm_bittree(0, 1, rc_asm_y, rc_asm_n, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(0, 1, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(0, 1, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_n, rc_asm_y) \ + ) + +#undef rc_bittree8 +#define rc_bittree8(probs_base_var, final_add) \ + rc_asm_bittree_n(probs_base_var, final_add, \ + rc_asm_bittree(0, 1, rc_asm_y, rc_asm_n, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(0, 1, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(0, 1, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(0, 1, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree(1, 0, rc_asm_n, rc_asm_n, rc_asm_y) \ + ) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x010 + + +// Fixed-sized reverse bittree +// +// This uses the indexing that constructs the final value in symbol directly. +// add = 1, 2, 4, 8 +// dcur = -, 4, 8, 16 +// dnext0 = 4, 8, 16, - +// dnext0 = 6, 12, 24, - +#define rc_asm_bittree_rev(a, b, add, dcur, dnext0, dnext1, \ + first_only, middle_only, last_only) \ + first_only( \ + "movzwl 2(%[probs_base]), %[prob" #a "]\n\t" \ + "xor %[symbol], %[symbol]\n\t" \ + "movzwl 4(%[probs_base]), %[prob" #b "]\n\t" \ + ) \ + middle_only( \ + "movzwl " #dnext0 "(%[probs_base], %q[symbol], 2), " \ + "%[prob" #b "]\n\t" \ + ) \ + \ + rc_asm_normalize \ + rc_asm_calc("prob" #a) \ + \ + "cmovae %[t0], %[range]\n\t" \ + \ + first_only( \ + "movzwl 6(%[probs_base]), %[t0]\n\t" \ + "cmovae %[t0], %[prob" #b "]\n\t" \ + ) \ + middle_only( \ + "movzwl " #dnext1 "(%[probs_base], %q[symbol], 2), %[t0]\n\t" \ + "cmovae %[t0], %[prob" #b "]\n\t" \ + ) \ + \ + "lea " #add "(%q[symbol]), %[t0]\n\t" \ + "cmovb %[t1], %[code]\n\t" \ + middle_only( \ + "mov %[symbol], %[t1]\n\t" \ + ) \ + last_only( \ + "mov %[symbol], %[t1]\n\t" \ + ) \ + "cmovae %[t0], %[symbol]\n\t" \ + "lea %c[bit_model_offset](%q[prob" #a "]), %[t0]\n\t" \ + "cmovae %[prob" #a "], %[t0]\n\t" \ + \ + "shr %[move_bits], %[t0]\n\t" \ + "sub %[t0], %[prob" #a "]\n\t" \ + first_only( \ + "mov %w[prob" #a "], 2(%[probs_base])\n\t" \ + ) \ + middle_only( \ + "mov %w[prob" #a "], " \ + #dcur "(%[probs_base], %q[t1], 2)\n\t" \ + ) \ + last_only( \ + "mov %w[prob" #a "], " \ + #dcur "(%[probs_base], %q[t1], 2)\n\t" \ + ) + +#if LZMA_RANGE_DECODER_CONFIG & 0x020 +#undef rc_bittree_rev4 +#define rc_bittree_rev4(probs_base_var) \ +rc_asm_bittree_n(probs_base_var, 4, \ + rc_asm_bittree_rev(0, 1, 1, -, 4, 6, rc_asm_y, rc_asm_n, rc_asm_n) \ + rc_asm_bittree_rev(1, 0, 2, 4, 8, 12, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree_rev(0, 1, 4, 8, 16, 24, rc_asm_n, rc_asm_y, rc_asm_n) \ + rc_asm_bittree_rev(1, 0, 8, 16, -, -, rc_asm_n, rc_asm_n, rc_asm_y) \ +) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x020 + + +#if LZMA_RANGE_DECODER_CONFIG & 0x040 +#undef rc_bit_add_if_1 +#define rc_bit_add_if_1(probs_base_var, dest_var, value_to_add_if_1) \ +do { \ + uint32_t t0; \ + uint32_t t1; \ + uint32_t t2 = (value_to_add_if_1); \ + uint32_t t_prob; \ + uint32_t t_index; \ + \ + __asm__( \ + "movzwl (%[probs_base], %q[symbol], 2), %[prob]\n\t" \ + "mov %[symbol], %[index]\n\t" \ + \ + "add %[dest], %[t2]\n\t" \ + "add %[symbol], %[symbol]\n\t" \ + \ + rc_asm_normalize \ + rc_asm_calc("prob") \ + \ + "cmovae %[t0], %[range]\n\t" \ + "lea %c[bit_model_offset](%q[prob]), %[t0]\n\t" \ + "cmovb %[t1], %[code]\n\t" \ + "cmovae %[prob], %[t0]\n\t" \ + \ + "cmovae %[t2], %[dest]\n\t" \ + "sbb $-1, %[symbol]\n\t" \ + \ + "sar %[move_bits], %[t0]\n\t" \ + "sub %[t0], %[prob]\n\t" \ + "mov %w[prob], (%[probs_base], %q[index], 2)" \ + : \ + [range] "+&r"(rc.range), \ + [code] "+&r"(rc.code), \ + [t0] "=&r"(t0), \ + [t1] "=&r"(t1), \ + [prob] "=&r"(t_prob), \ + [index] "=&r"(t_index), \ + [symbol] "+&r"(symbol), \ + [t2] "+&r"(t2), \ + [dest] "+&r"(dest_var), \ + [in_ptr] "+&r"(rc_in_ptr) \ + : \ + [probs_base] "r"(probs_base_var), \ + [top_value] "n"(RC_TOP_VALUE), \ + [shift_bits] "n"(RC_SHIFT_BITS), \ + [bit_model_total_bits] "n"(RC_BIT_MODEL_TOTAL_BITS), \ + [bit_model_offset] "n"(RC_BIT_MODEL_OFFSET), \ + [move_bits] "n"(RC_MOVE_BITS) \ + : \ + "cc", "memory"); \ +} while (0) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x040 + + +// Literal decoding uses a normal 8-bit bittree but literal with match byte +// is more complex in picking the probability variable from the correct +// subtree. This doesn't use preloading/prefetching of the next prob because +// there are four choices instead of two. +// +// FIXME? The first iteration starts with symbol = 1 so it could be optimized +// by a tiny amount. +#define rc_asm_matched_literal(nonlast_only) \ + "add %[offset], %[symbol]\n\t" \ + "and %[offset], %[match_bit]\n\t" \ + "add %[match_bit], %[symbol]\n\t" \ + \ + "movzwl (%[probs_base], %q[symbol], 2), %[prob]\n\t" \ + \ + "add %[symbol], %[symbol]\n\t" \ + \ + nonlast_only( \ + "xor %[match_bit], %[offset]\n\t" \ + "add %[match_byte], %[match_byte]\n\t" \ + ) \ + \ + rc_asm_normalize \ + rc_asm_calc("prob") \ + \ + "cmovae %[t0], %[range]\n\t" \ + "lea %c[bit_model_offset](%q[prob]), %[t0]\n\t" \ + "cmovb %[t1], %[code]\n\t" \ + "mov %[symbol], %[t1]\n\t" \ + "cmovae %[prob], %[t0]\n\t" \ + \ + nonlast_only( \ + "cmovae %[match_bit], %[offset]\n\t" \ + "mov %[match_byte], %[match_bit]\n\t" \ + ) \ + \ + "sbb $-1, %[symbol]\n\t" \ + \ + "shr %[move_bits], %[t0]\n\t" \ + /* Undo symbol += match_bit + offset: */ \ + "and $0x1FF, %[symbol]\n\t" \ + "sub %[t0], %[prob]\n\t" \ + \ + /* Scaling of 1 instead of 2 because symbol <<= 1. */ \ + "mov %w[prob], (%[probs_base], %q[t1], 1)\n\t" + + +#if LZMA_RANGE_DECODER_CONFIG & 0x080 +#undef rc_matched_literal +#define rc_matched_literal(probs_base_var, match_byte_value) \ +do { \ + uint32_t t0; \ + uint32_t t1; \ + uint32_t t_prob; \ + uint32_t t_match_byte = (uint32_t)(match_byte_value) << 1; \ + uint32_t t_match_bit = t_match_byte; \ + uint32_t t_offset = 0x100; \ + symbol = 1; \ + \ + __asm__( \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_y) \ + rc_asm_matched_literal(rc_asm_n) \ + : \ + [range] "+&r"(rc.range), \ + [code] "+&r"(rc.code), \ + [t0] "=&r"(t0), \ + [t1] "=&r"(t1), \ + [prob] "=&r"(t_prob), \ + [match_bit] "+&r"(t_match_bit), \ + [symbol] "+&r"(symbol), \ + [match_byte] "+&r"(t_match_byte), \ + [offset] "+&r"(t_offset), \ + [in_ptr] "+&r"(rc_in_ptr) \ + : \ + [probs_base] "r"(probs_base_var), \ + [top_value] "n"(RC_TOP_VALUE), \ + [shift_bits] "n"(RC_SHIFT_BITS), \ + [bit_model_total_bits] "n"(RC_BIT_MODEL_TOTAL_BITS), \ + [bit_model_offset] "n"(RC_BIT_MODEL_OFFSET), \ + [move_bits] "n"(RC_MOVE_BITS) \ + : \ + "cc", "memory"); \ +} while (0) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x080 + + +// Doing the loop in asm instead of C seems to help a little. +#if LZMA_RANGE_DECODER_CONFIG & 0x100 +#undef rc_direct +#define rc_direct(dest_var, count_var) \ +do { \ + uint32_t t0; \ + uint32_t t1; \ + \ + __asm__( \ + "2:\n\t" \ + "add %[dest], %[dest]\n\t" \ + "lea 1(%q[dest]), %[t1]\n\t" \ + \ + rc_asm_normalize \ + \ + "shr $1, %[range]\n\t" \ + "mov %[code], %[t0]\n\t" \ + "sub %[range], %[code]\n\t" \ + "cmovns %[t1], %[dest]\n\t" \ + "cmovs %[t0], %[code]\n\t" \ + "dec %[count]\n\t" \ + "jnz 2b\n\t" \ + : \ + [range] "+&r"(rc.range), \ + [code] "+&r"(rc.code), \ + [t0] "=&r"(t0), \ + [t1] "=&r"(t1), \ + [dest] "+&r"(dest_var), \ + [count] "+&r"(count_var), \ + [in_ptr] "+&r"(rc_in_ptr) \ + : \ + [top_value] "n"(RC_TOP_VALUE), \ + [shift_bits] "n"(RC_SHIFT_BITS) \ + : \ + "cc", "memory"); \ +} while (0) +#endif // LZMA_RANGE_DECODER_CONFIG & 0x100 + +#endif // x86_64 + +#endif diff --git a/src/native/external/xz/src/liblzma/rangecoder/range_encoder.h b/src/native/external/xz/src/liblzma/rangecoder/range_encoder.h new file mode 100644 index 00000000000000..8f62a47ac0a636 --- /dev/null +++ b/src/native/external/xz/src/liblzma/rangecoder/range_encoder.h @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_encoder.h +/// \brief Range Encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_ENCODER_H +#define LZMA_RANGE_ENCODER_H + +#include "range_common.h" +#include "price.h" + + +/// Maximum number of symbols that can be put pending into lzma_range_encoder +/// structure between calls to lzma_rc_encode(). For LZMA, 48+5 is enough +/// (match with big distance and length followed by range encoder flush). +#define RC_SYMBOLS_MAX 53 + + +typedef struct { + uint64_t low; + uint64_t cache_size; + uint32_t range; + uint8_t cache; + + /// Number of bytes written out by rc_encode() -> rc_shift_low() + uint64_t out_total; + + /// Number of symbols in the tables + size_t count; + + /// rc_encode()'s position in the tables + size_t pos; + + /// Symbols to encode + enum { + RC_BIT_0, + RC_BIT_1, + RC_DIRECT_0, + RC_DIRECT_1, + RC_FLUSH, + } symbols[RC_SYMBOLS_MAX]; + + /// Probabilities associated with RC_BIT_0 or RC_BIT_1 + probability *probs[RC_SYMBOLS_MAX]; + +} lzma_range_encoder; + + +static inline void +rc_reset(lzma_range_encoder *rc) +{ + rc->low = 0; + rc->cache_size = 1; + rc->range = UINT32_MAX; + rc->cache = 0; + rc->out_total = 0; + rc->count = 0; + rc->pos = 0; +} + + +static inline void +rc_forget(lzma_range_encoder *rc) +{ + // This must not be called when rc_encode() is partially done. + assert(rc->pos == 0); + rc->count = 0; +} + + +static inline void +rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit) +{ + rc->symbols[rc->count] = bit; + rc->probs[rc->count] = prob; + ++rc->count; +} + + +static inline void +rc_bittree(lzma_range_encoder *rc, probability *probs, + uint32_t bit_count, uint32_t symbol) +{ + uint32_t model_index = 1; + + do { + const uint32_t bit = (symbol >> --bit_count) & 1; + rc_bit(rc, &probs[model_index], bit); + model_index = (model_index << 1) + bit; + } while (bit_count != 0); +} + + +static inline void +rc_bittree_reverse(lzma_range_encoder *rc, probability *probs, + uint32_t bit_count, uint32_t symbol) +{ + uint32_t model_index = 1; + + do { + const uint32_t bit = symbol & 1; + symbol >>= 1; + rc_bit(rc, &probs[model_index], bit); + model_index = (model_index << 1) + bit; + } while (--bit_count != 0); +} + + +static inline void +rc_direct(lzma_range_encoder *rc, + uint32_t value, uint32_t bit_count) +{ + do { + rc->symbols[rc->count++] + = RC_DIRECT_0 + ((value >> --bit_count) & 1); + } while (bit_count != 0); +} + + +static inline void +rc_flush(lzma_range_encoder *rc) +{ + for (size_t i = 0; i < 5; ++i) + rc->symbols[rc->count++] = RC_FLUSH; +} + + +static inline bool +rc_shift_low(lzma_range_encoder *rc, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000) + || (uint32_t)(rc->low >> 32) != 0) { + do { + if (*out_pos == out_size) + return true; + + out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32); + ++*out_pos; + ++rc->out_total; + rc->cache = 0xFF; + + } while (--rc->cache_size != 0); + + rc->cache = (rc->low >> 24) & 0xFF; + } + + ++rc->cache_size; + rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS; + + return false; +} + + +// NOTE: The last two arguments are uint64_t instead of size_t because in +// the dummy version these refer to the size of the whole range-encoded +// output stream, not just to the currently available output buffer space. +static inline bool +rc_shift_low_dummy(uint64_t *low, uint64_t *cache_size, uint8_t *cache, + uint64_t *out_pos, uint64_t out_size) +{ + if ((uint32_t)(*low) < (uint32_t)(0xFF000000) + || (uint32_t)(*low >> 32) != 0) { + do { + if (*out_pos == out_size) + return true; + + ++*out_pos; + *cache = 0xFF; + + } while (--*cache_size != 0); + + *cache = (*low >> 24) & 0xFF; + } + + ++*cache_size; + *low = (*low & 0x00FFFFFF) << RC_SHIFT_BITS; + + return false; +} + + +static inline bool +rc_encode(lzma_range_encoder *rc, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + assert(rc->count <= RC_SYMBOLS_MAX); + + while (rc->pos < rc->count) { + // Normalize + if (rc->range < RC_TOP_VALUE) { + if (rc_shift_low(rc, out, out_pos, out_size)) + return true; + + rc->range <<= RC_SHIFT_BITS; + } + + // Encode a bit + switch (rc->symbols[rc->pos]) { + case RC_BIT_0: { + probability prob = *rc->probs[rc->pos]; + rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) + * prob; + prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS; + *rc->probs[rc->pos] = prob; + break; + } + + case RC_BIT_1: { + probability prob = *rc->probs[rc->pos]; + const uint32_t bound = prob * (rc->range + >> RC_BIT_MODEL_TOTAL_BITS); + rc->low += bound; + rc->range -= bound; + prob -= prob >> RC_MOVE_BITS; + *rc->probs[rc->pos] = prob; + break; + } + + case RC_DIRECT_0: + rc->range >>= 1; + break; + + case RC_DIRECT_1: + rc->range >>= 1; + rc->low += rc->range; + break; + + case RC_FLUSH: + // Prevent further normalizations. + rc->range = UINT32_MAX; + + // Flush the last five bytes (see rc_flush()). + do { + if (rc_shift_low(rc, out, out_pos, out_size)) + return true; + } while (++rc->pos < rc->count); + + // Reset the range encoder so we are ready to continue + // encoding if we weren't finishing the stream. + rc_reset(rc); + return false; + + default: + assert(0); + break; + } + + ++rc->pos; + } + + rc->count = 0; + rc->pos = 0; + + return false; +} + + +static inline bool +rc_encode_dummy(const lzma_range_encoder *rc, uint64_t out_limit) +{ + assert(rc->count <= RC_SYMBOLS_MAX); + + uint64_t low = rc->low; + uint64_t cache_size = rc->cache_size; + uint32_t range = rc->range; + uint8_t cache = rc->cache; + uint64_t out_pos = rc->out_total; + + size_t pos = rc->pos; + + while (true) { + // Normalize + if (range < RC_TOP_VALUE) { + if (rc_shift_low_dummy(&low, &cache_size, &cache, + &out_pos, out_limit)) + return true; + + range <<= RC_SHIFT_BITS; + } + + // This check is here because the normalization above must + // be done before flushing the last bytes. + if (pos == rc->count) + break; + + // Encode a bit + switch (rc->symbols[pos]) { + case RC_BIT_0: { + probability prob = *rc->probs[pos]; + range = (range >> RC_BIT_MODEL_TOTAL_BITS) + * prob; + break; + } + + case RC_BIT_1: { + probability prob = *rc->probs[pos]; + const uint32_t bound = prob * (range + >> RC_BIT_MODEL_TOTAL_BITS); + low += bound; + range -= bound; + break; + } + + case RC_DIRECT_0: + range >>= 1; + break; + + case RC_DIRECT_1: + range >>= 1; + low += range; + break; + + case RC_FLUSH: + default: + assert(0); + break; + } + + ++pos; + } + + // Flush the last bytes. This isn't in rc->symbols[] so we do + // it after the above loop to take into account the size of + // the flushing that will be done at the end of the stream. + for (pos = 0; pos < 5; ++pos) { + if (rc_shift_low_dummy(&low, &cache_size, + &cache, &out_pos, out_limit)) + return true; + } + + return false; +} + + +static inline uint64_t +rc_pending(const lzma_range_encoder *rc) +{ + return rc->cache_size + 5 - 1; +} + +#endif diff --git a/src/native/external/xz/src/liblzma/simple/Makefile.inc b/src/native/external/xz/src/liblzma/simple/Makefile.inc new file mode 100644 index 00000000000000..f42ad1ba513701 --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/Makefile.inc @@ -0,0 +1,51 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +liblzma_la_SOURCES += \ + simple/simple_coder.c \ + simple/simple_coder.h \ + simple/simple_private.h + +if COND_ENCODER_SIMPLE +liblzma_la_SOURCES += \ + simple/simple_encoder.c \ + simple/simple_encoder.h +endif + +if COND_DECODER_SIMPLE +liblzma_la_SOURCES += \ + simple/simple_decoder.c \ + simple/simple_decoder.h +endif + +if COND_FILTER_X86 +liblzma_la_SOURCES += simple/x86.c +endif + +if COND_FILTER_POWERPC +liblzma_la_SOURCES += simple/powerpc.c +endif + +if COND_FILTER_IA64 +liblzma_la_SOURCES += simple/ia64.c +endif + +if COND_FILTER_ARM +liblzma_la_SOURCES += simple/arm.c +endif + +if COND_FILTER_ARMTHUMB +liblzma_la_SOURCES += simple/armthumb.c +endif + +if COND_FILTER_ARM64 +liblzma_la_SOURCES += simple/arm64.c +endif + +if COND_FILTER_SPARC +liblzma_la_SOURCES += simple/sparc.c +endif + +if COND_FILTER_RISCV +liblzma_la_SOURCES += simple/riscv.c +endif diff --git a/src/native/external/xz/src/liblzma/simple/arm.c b/src/native/external/xz/src/liblzma/simple/arm.c new file mode 100644 index 00000000000000..159671a7b3045a --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/arm.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file arm.c +/// \brief Filter for ARM binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +arm_code(void *simple lzma_attribute((__unused__)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size &= ~(size_t)3; + + size_t i; + for (i = 0; i < size; i += 4) { + if (buffer[i + 3] == 0xEB) { + uint32_t src = ((uint32_t)(buffer[i + 2]) << 16) + | ((uint32_t)(buffer[i + 1]) << 8) + | (uint32_t)(buffer[i + 0]); + src <<= 2; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + 8 + src; + else + dest = src - (now_pos + (uint32_t)(i) + 8); + + dest >>= 2; + buffer[i + 2] = (uint8_t)(dest >> 16); + buffer[i + 1] = (uint8_t)(dest >> 8); + buffer[i + 0] = (uint8_t)dest; + } + } + + return i; +} + + +static lzma_ret +arm_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &arm_code, 0, 4, 4, is_encoder); +} + + +#ifdef HAVE_ENCODER_ARM +extern lzma_ret +lzma_simple_arm_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm_coder_init(next, allocator, filters, true); +} +#endif + + +#ifdef HAVE_DECODER_ARM +extern lzma_ret +lzma_simple_arm_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm_coder_init(next, allocator, filters, false); +} +#endif diff --git a/src/native/external/xz/src/liblzma/simple/arm64.c b/src/native/external/xz/src/liblzma/simple/arm64.c new file mode 100644 index 00000000000000..2ec10d937fbdea --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/arm64.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file arm64.c +/// \brief Filter for ARM64 binaries +/// +/// This converts ARM64 relative addresses in the BL and ADRP immediates +/// to absolute values to increase redundancy of ARM64 code. +/// +/// Converting B or ADR instructions was also tested but it's not useful. +/// A majority of the jumps for the B instruction are very small (+/- 0xFF). +/// These are typical for loops and if-statements. Encoding them to their +/// absolute address reduces redundancy since many of the small relative +/// jump values are repeated, but very few of the absolute addresses are. +// +// Authors: Lasse Collin +// Jia Tan +// Igor Pavlov +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +arm64_code(void *simple lzma_attribute((__unused__)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size &= ~(size_t)3; + + size_t i; + + // Clang 14.0.6 on x86-64 makes this four times bigger and 40 % slower + // with auto-vectorization that is enabled by default with -O2. + // Such vectorization bloat happens with -O2 when targeting ARM64 too + // but performance hasn't been tested. +#ifdef __clang__ +# pragma clang loop vectorize(disable) +#endif + for (i = 0; i < size; i += 4) { + uint32_t pc = (uint32_t)(now_pos + i); + uint32_t instr = read32le(buffer + i); + + if ((instr >> 26) == 0x25) { + // BL instruction: + // The full 26-bit immediate is converted. + // The range is +/-128 MiB. + // + // Using the full range helps quite a lot with + // big executables. Smaller range would reduce false + // positives in non-code sections of the input though + // so this is a compromise that slightly favors big + // files. With the full range, only six bits of the 32 + // need to match to trigger a conversion. + const uint32_t src = instr; + instr = 0x94000000; + + pc >>= 2; + if (!is_encoder) + pc = 0U - pc; + + instr |= (src + pc) & 0x03FFFFFF; + write32le(buffer + i, instr); + + } else if ((instr & 0x9F000000) == 0x90000000) { + // ADRP instruction: + // Only values in the range +/-512 MiB are converted. + // + // Using less than the full +/-4 GiB range reduces + // false positives on non-code sections of the input + // while being excellent for executables up to 512 MiB. + // The positive effect of ADRP conversion is smaller + // than that of BL but it also doesn't hurt so much in + // non-code sections of input because, with +/-512 MiB + // range, nine bits of 32 need to match to trigger a + // conversion (two 10-bit match choices = 9 bits). + const uint32_t src = ((instr >> 29) & 3) + | ((instr >> 3) & 0x001FFFFC); + + // With the addition only one branch is needed to + // check the +/- range. This is usually false when + // processing ARM64 code so branch prediction will + // handle it well in terms of performance. + // + //if ((src & 0x001E0000) != 0 + // && (src & 0x001E0000) != 0x001E0000) + if ((src + 0x00020000) & 0x001C0000) + continue; + + instr &= 0x9000001F; + + pc >>= 12; + if (!is_encoder) + pc = 0U - pc; + + const uint32_t dest = src + pc; + instr |= (dest & 3) << 29; + instr |= (dest & 0x0003FFFC) << 3; + instr |= (0U - (dest & 0x00020000)) & 0x00E00000; + write32le(buffer + i, instr); + } + } + + return i; +} + + +static lzma_ret +arm64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &arm64_code, 0, 4, 4, is_encoder); +} + + +#ifdef HAVE_ENCODER_ARM64 +extern lzma_ret +lzma_simple_arm64_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm64_coder_init(next, allocator, filters, true); +} + + +extern LZMA_API(size_t) +lzma_bcj_arm64_encode(uint32_t start_offset, uint8_t *buf, size_t size) +{ + // start_offset must be a multiple of four. + start_offset &= ~UINT32_C(3); + return arm64_code(NULL, start_offset, true, buf, size); +} +#endif + + +#ifdef HAVE_DECODER_ARM64 +extern lzma_ret +lzma_simple_arm64_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm64_coder_init(next, allocator, filters, false); +} + + +extern LZMA_API(size_t) +lzma_bcj_arm64_decode(uint32_t start_offset, uint8_t *buf, size_t size) +{ + // start_offset must be a multiple of four. + start_offset &= ~UINT32_C(3); + return arm64_code(NULL, start_offset, false, buf, size); +} +#endif diff --git a/src/native/external/xz/src/liblzma/simple/armthumb.c b/src/native/external/xz/src/liblzma/simple/armthumb.c new file mode 100644 index 00000000000000..362335503b5f07 --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/armthumb.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file armthumb.c +/// \brief Filter for ARM-Thumb binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +armthumb_code(void *simple lzma_attribute((__unused__)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + if (size < 4) + return 0; + + size -= 4; + + size_t i; + for (i = 0; i <= size; i += 2) { + if ((buffer[i + 1] & 0xF8) == 0xF0 + && (buffer[i + 3] & 0xF8) == 0xF8) { + uint32_t src = (((uint32_t)(buffer[i + 1]) & 7) << 19) + | ((uint32_t)(buffer[i + 0]) << 11) + | (((uint32_t)(buffer[i + 3]) & 7) << 8) + | (uint32_t)(buffer[i + 2]); + + src <<= 1; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + 4 + src; + else + dest = src - (now_pos + (uint32_t)(i) + 4); + + dest >>= 1; + buffer[i + 1] = 0xF0 | ((dest >> 19) & 0x7); + buffer[i + 0] = (uint8_t)(dest >> 11); + buffer[i + 3] = 0xF8 | ((dest >> 8) & 0x7); + buffer[i + 2] = (uint8_t)(dest); + i += 2; + } + } + + return i; +} + + +static lzma_ret +armthumb_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &armthumb_code, 0, 4, 2, is_encoder); +} + + +#ifdef HAVE_ENCODER_ARMTHUMB +extern lzma_ret +lzma_simple_armthumb_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return armthumb_coder_init(next, allocator, filters, true); +} +#endif + + +#ifdef HAVE_DECODER_ARMTHUMB +extern lzma_ret +lzma_simple_armthumb_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return armthumb_coder_init(next, allocator, filters, false); +} +#endif diff --git a/src/native/external/xz/src/liblzma/simple/ia64.c b/src/native/external/xz/src/liblzma/simple/ia64.c new file mode 100644 index 00000000000000..2a4aaebb472078 --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/ia64.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file ia64.c +/// \brief Filter for IA64 (Itanium) binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +ia64_code(void *simple lzma_attribute((__unused__)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + static const uint32_t BRANCH_TABLE[32] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0 + }; + + size &= ~(size_t)15; + + size_t i; + for (i = 0; i < size; i += 16) { + const uint32_t instr_template = buffer[i] & 0x1F; + const uint32_t mask = BRANCH_TABLE[instr_template]; + uint32_t bit_pos = 5; + + for (size_t slot = 0; slot < 3; ++slot, bit_pos += 41) { + if (((mask >> slot) & 1) == 0) + continue; + + const size_t byte_pos = (bit_pos >> 3); + const uint32_t bit_res = bit_pos & 0x7; + uint64_t instruction = 0; + + for (size_t j = 0; j < 6; ++j) + instruction += (uint64_t)( + buffer[i + j + byte_pos]) + << (8 * j); + + uint64_t inst_norm = instruction >> bit_res; + + if (((inst_norm >> 37) & 0xF) == 0x5 + && ((inst_norm >> 9) & 0x7) == 0 + /* && (inst_norm & 0x3F)== 0 */ + ) { + uint32_t src = (uint32_t)( + (inst_norm >> 13) & 0xFFFFF); + src |= ((inst_norm >> 36) & 1) << 20; + + src <<= 4; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + dest >>= 4; + + inst_norm &= ~((uint64_t)(0x8FFFFF) << 13); + inst_norm |= (uint64_t)(dest & 0xFFFFF) << 13; + inst_norm |= (uint64_t)(dest & 0x100000) + << (36 - 20); + + instruction &= (1U << bit_res) - 1; + instruction |= (inst_norm << bit_res); + + for (size_t j = 0; j < 6; j++) + buffer[i + j + byte_pos] = (uint8_t)( + instruction + >> (8 * j)); + } + } + } + + return i; +} + + +static lzma_ret +ia64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &ia64_code, 0, 16, 16, is_encoder); +} + + +#ifdef HAVE_ENCODER_IA64 +extern lzma_ret +lzma_simple_ia64_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return ia64_coder_init(next, allocator, filters, true); +} +#endif + + +#ifdef HAVE_DECODER_IA64 +extern lzma_ret +lzma_simple_ia64_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return ia64_coder_init(next, allocator, filters, false); +} +#endif diff --git a/src/native/external/xz/src/liblzma/simple/powerpc.c b/src/native/external/xz/src/liblzma/simple/powerpc.c new file mode 100644 index 00000000000000..2bcc638b7be85d --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/powerpc.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file powerpc.c +/// \brief Filter for PowerPC (big endian) binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +powerpc_code(void *simple lzma_attribute((__unused__)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size &= ~(size_t)3; + + size_t i; + for (i = 0; i < size; i += 4) { + // PowerPC branch 6(48) 24(Offset) 1(Abs) 1(Link) + if ((buffer[i] >> 2) == 0x12 + && ((buffer[i + 3] & 3) == 1)) { + + const uint32_t src + = (((uint32_t)(buffer[i + 0]) & 3) << 24) + | ((uint32_t)(buffer[i + 1]) << 16) + | ((uint32_t)(buffer[i + 2]) << 8) + | ((uint32_t)(buffer[i + 3]) & ~UINT32_C(3)); + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + buffer[i + 0] = 0x48 | ((dest >> 24) & 0x03); + buffer[i + 1] = (uint8_t)(dest >> 16); + buffer[i + 2] = (uint8_t)(dest >> 8); + buffer[i + 3] &= 0x03; + buffer[i + 3] |= dest; + } + } + + return i; +} + + +static lzma_ret +powerpc_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &powerpc_code, 0, 4, 4, is_encoder); +} + + +#ifdef HAVE_ENCODER_POWERPC +extern lzma_ret +lzma_simple_powerpc_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return powerpc_coder_init(next, allocator, filters, true); +} +#endif + + +#ifdef HAVE_DECODER_POWERPC +extern lzma_ret +lzma_simple_powerpc_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return powerpc_coder_init(next, allocator, filters, false); +} +#endif diff --git a/src/native/external/xz/src/liblzma/simple/riscv.c b/src/native/external/xz/src/liblzma/simple/riscv.c new file mode 100644 index 00000000000000..bc97ebdbb0fbfa --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/riscv.c @@ -0,0 +1,773 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file riscv.c +/// \brief Filter for 32-bit/64-bit little/big endian RISC-V binaries +/// +/// This converts program counter relative addresses in function calls +/// (JAL, AUIPC+JALR), address calculation of functions and global +/// variables (AUIPC+ADDI), loads (AUIPC+load), and stores (AUIPC+store). +/// +/// For AUIPC+inst2 pairs, the paired instruction checking is fairly relaxed. +/// The paired instruction opcode must only have its lowest two bits set, +/// meaning it will convert any paired instruction that is not a 16-bit +/// compressed instruction. This was shown to be enough to keep the number +/// of false matches low while improving code size and speed. +// +// Authors: Lasse Collin +// Jia Tan +// +// Special thanks: +// +// - Chien Wong provided a few early versions of RISC-V +// filter variants along with test files and benchmark results. +// +// - Igor Pavlov helped a lot in the filter design, getting it both +// faster and smaller. The implementation here is still independently +// written, not based on LZMA SDK. +// +/////////////////////////////////////////////////////////////////////////////// + +/* + +RISC-V filtering +================ + + RV32I and RV64I, possibly combined with extensions C, Zfh, F, D, + and Q, are identical enough that the same filter works for both. + + The instruction encoding is always little endian, even on systems + with big endian data access. Thus the same filter works for both + endiannesses. + + The following instructions have program counter relative + (pc-relative) behavior: + +JAL +--- + + JAL is used for function calls (including tail calls) and + unconditional jumps within functions. Jumps within functions + aren't useful to filter because the absolute addresses often + appear only once or at most a few times. Tail calls and jumps + within functions look the same to a simple filter so neither + are filtered, that is, JAL x0 is ignored (the ABI name of the + register x0 is "zero"). + + Almost all calls store the return address to register x1 (ra) + or x5 (t0). To reduce false matches when the filter is applied + to non-code data, only the JAL instructions that use x1 or x5 + are converted. JAL has pc-relative range of +/-1 MiB so longer + calls and jumps need another method (AUIPC+JALR). + +C.J and C.JAL +------------- + + C.J and C.JAL have pc-relative range of +/-2 KiB. + + C.J is for tail calls and jumps within functions and isn't + filtered for the reasons mentioned for JAL x0. + + C.JAL is an RV32C-only instruction. Its encoding overlaps with + RV64C-only C.ADDIW which is a common instruction. So if filtering + C.JAL was useful (it wasn't tested) then a separate filter would + be needed for RV32 and RV64. Also, false positives would be a + significant problem when the filter is applied to non-code data + because C.JAL needs only five bits to match. Thus, this filter + doesn't modify C.JAL instructions. + +BEQ, BNE, BLT, BGE, BLTU, BGEU, C.BEQZ, and C.BNEZ +-------------------------------------------------- + + These are conditional branches with pc-relative range + of +/-4 KiB (+/-256 B for C.*). The absolute addresses often + appear only once and very short distances are the most common, + so filtering these instructions would make compression worse. + +AUIPC with rd != x0 +------------------- + + AUIPC is paired with a second instruction (inst2) to do + pc-relative jumps, calls, loads, stores, and for taking + an address of a symbol. AUIPC has a 20-bit immediate and + the possible inst2 choices have a 12-bit immediate. + + AUIPC stores pc + 20-bit signed immediate to a register. + The immediate encodes a multiple of 4 KiB so AUIPC itself + has a pc-relative range of +/-2 GiB. AUIPC does *NOT* set + the lowest 12 bits of the result to zero! This means that + the 12-bit immediate in inst2 cannot just include the lowest + 12 bits of the absolute address as is; the immediate has to + compensate for the lowest 12 bits that AUIPC copies from the + program counter. This means that a good filter has to convert + not only AUIPC but also the paired inst2. + + A strict filter would focus on filtering the following + AUIPC+inst2 pairs: + + - AUIPC+JALR: Function calls, including tail calls. + + - AUIPC+ADDI: Calculating the address of a function + or a global variable. + + - AUIPC+load/store from the base instruction sets + (RV32I, RV64I) or from the floating point extensions + Zfh, F, D, and Q: + * RV32I: LB, LH, LW, LBU, LHU, SB, SH, SW + * RV64I has also: LD, LWU, SD + * Zfh: FLH, FSH + * F: FLW, FSW + * D: FLD, FSD + * Q: FLQ, FSQ + + NOTE: AUIPC+inst2 can only be a pair if AUIPC's rd specifies + the same register as inst2's rs1. + + Instead of strictly accepting only the above instructions as inst2, + this filter uses a much simpler condition: the lowest two bits of + inst2 must be set, that is, inst2 must not be a 16-bit compressed + instruction. So this will accept all 32-bit and possible future + extended instructions as a pair to AUIPC if the bits in AUIPC's + rd [11:7] match the bits [19:15] in inst2 (the bits that I-type and + S-type instructions use for rs1). Testing showed that this relaxed + condition for inst2 did not consistently or significantly affect + compression ratio but it reduced code size and improved speed. + + Additionally, the paired instruction is always treated as an I-type + instruction. The S-type instructions used by stores (SB, SH, SW, + etc.) place the lowest 5 bits of the immediate in a different + location than I-type instructions. AUIPC+store pairs are less + common than other pairs, and testing showed that the extra + code required to handle S-type instructions was not worth the + compression ratio gained. + + AUIPC+inst2 don't necessarily appear sequentially next to each + other although very often they do. Especially AUIPC+JALR are + sequential as that may allow instruction fusion in processors + (and perhaps help branch prediction as a fused AUIPC+JALR is + a direct branch while JALR alone is an indirect branch). + + Clang 16 can generate code where AUIPC+inst2 is split: + + - AUIPC is outside a loop and inst2 (load/store) is inside + the loop. This way the AUIPC instruction needs to be + executed only once. + + - Load-modify-store may have AUIPC for the load and the same + AUIPC-result is used for the store too. This may get combined + with AUIPC being outside the loop. + + - AUIPC is before a conditional branch and inst2 is hundreds + of bytes away at the branch target. + + - Inner and outer pair: + + auipc a1,0x2f + auipc a2,0x3d + ld a2,-500(a2) + addi a1,a1,-233 + + - Many split pairs with an untaken conditional branch between: + + auipc s9,0x1613 # Pair 1 + auipc s4,0x1613 # Pair 2 + auipc s6,0x1613 # Pair 3 + auipc s10,0x1613 # Pair 4 + beqz a5,a3baae + ld a0,0(a6) + ld a6,246(s9) # Pair 1 + ld a1,250(s4) # Pair 2 + ld a3,254(s6) # Pair 3 + ld a4,258(s10) # Pair 4 + + It's not possible to find all split pairs in a filter like this. + At least in 2024, simple sequential pairs are 99 % of AUIPC uses + so filtering only such pairs gives good results and makes the + filter simpler. However, it's possible that future compilers will + produce different code where sequential pairs aren't as common. + + This filter doesn't convert AUIPC instructions alone because: + + (1) The conversion would be off-by-one (or off-by-4096) half the + time because the lowest 12 bits from inst2 (inst2_imm12) + aren't known. We only know that the absolute address is + pc + AUIPC_imm20 + [-2048, +2047] but there is no way to + know the exact 4096-byte multiple (or 4096 * n + 2048): + there are always two possibilities because AUIPC copies + the 12 lowest bits from pc instead of zeroing them. + + NOTE: The sign-extension of inst2_imm12 adds a tiny bit + of extra complexity to AUIPC math in general but it's not + the reason for this problem. The sign-extension only changes + the relative position of the pc-relative 4096-byte window. + + (2) Matching AUIPC instruction alone requires only seven bits. + When the filter is applied to non-code data, that leads + to many false positives which make compression worse. + As long as most AUIPC+inst2 pairs appear as two consecutive + instructions, converting only such pairs gives better results. + + In assembly, AUIPC+inst2 tend to look like this: + + # Call: + auipc ra, 0x12345 + jalr ra, -42(ra) + + # Tail call: + auipc t1, 0x12345 + jalr zero, -42(t1) + + # Getting the absolute address: + auipc a0, 0x12345 + addi a0, a0, -42 + + # rd of inst2 isn't necessarily the same as rs1 even + # in cases where there is no reason to preserve rs1. + auipc a0, 0x12345 + addi a1, a0, -42 + + As of 2024, 16-bit instructions from the C extension don't + appear as inst2. The RISC-V psABI doesn't list AUIPC+C.* as + a linker relaxation type explicitly but it's not disallowed + either. Usefulness is limited as most of the time the lowest + 12 bits won't fit in a C instruction. This filter doesn't + support AUIPC+C.* combinations because this makes the filter + simpler, there are no test files, and it hopefully will never + be needed anyway. + + (Compare AUIPC to ARM64 where ADRP does set the lowest 12 bits + to zero. The paired instruction has the lowest 12 bits of the + absolute address as is in a zero-extended immediate. Thus the + ARM64 filter doesn't need to care about the instructions that + are paired with ADRP. An off-by-4096 issue can still occur if + the code section isn't aligned with the filter's start offset. + It's not a problem with standalone ELF files but Windows PE + files need start_offset=3072 for best results. Also, a .tar + stores files with 512-byte alignment so most of the time it + won't be the best for ARM64.) + +AUIPC with rd == x0 +------------------- + + AUIPC instructions with rd=x0 are reserved for HINTs in the base + instruction set. Such AUIPC instructions are never filtered. + + As of January 2024, it seems likely that AUIPC with rd=x0 will + be used for landing pads (pseudoinstruction LPAD). LPAD is used + to mark valid targets for indirect jumps (for JALR), for example, + beginnings of functions. The 20-bit immediate in LPAD instruction + is a label, not a pc-relative address. Thus it would be + counterproductive to convert AUIPC instructions with rd=x0. + + Often the next instruction after LPAD won't have rs1=x0 and thus + the filtering would be skipped for that reason alone. However, + it's not good to rely on this. For example, consider a function + that begins like this: + + int foo(int i) + { + if (i <= 234) { + ... + } + + A compiler may generate something like this: + + lpad 0x54321 + li a5, 234 + bgt a0, a5, .L2 + + Converting the pseudoinstructions to raw instructions: + + auipc x0, 0x54321 + addi x15, x0, 234 + blt x15, x10, .L2 + + In this case the filter would undesirably convert the AUIPC+ADDI + pair if the filter didn't explicitly skip AUIPC instructions + that have rd=x0. + +*/ + + +#include "simple_private.h" + + +// This checks two conditions at once: +// - AUIPC rd == inst2 rs1. +// - inst2 opcode has the lowest two bits set. +// +// The 8 bit left shift aligns the rd of AUIPC with the rs1 of inst2. +// By XORing the registers, any non-zero value in those bits indicates the +// registers are not equal and thus not an AUIPC pair. Subtracting 3 from +// inst2 will zero out the first two opcode bits only when they are set. +// The mask tests if any of the register or opcode bits are set (and thus +// not an AUIPC pair). +// +// Alternative expression: (((((auipc) << 8) ^ (inst2)) & 0xF8003) != 3) +#define NOT_AUIPC_PAIR(auipc, inst2) \ + ((((auipc) << 8) ^ ((inst2) - 3)) & 0xF8003) + +// This macro checks multiple conditions: +// (1) AUIPC rd [11:7] == x2 (special rd value). +// (2) AUIPC bits 12 and 13 set (the lowest two opcode bits of packed inst2). +// (3) inst2_rs1 doesn't equal x0 or x2 because the opposite +// conversion is only done when +// auipc_rd != x0 && +// auipc_rd != x2 && +// auipc_rd == inst2_rs1. +// +// The left-hand side takes care of (1) and (2). +// (a) The lowest 7 bits are already known to be AUIPC so subtracting 0x17 +// makes those bits zeros. +// (b) If AUIPC rd equals x2, subtracting 0x100 makes bits [11:7] zeros. +// If rd doesn't equal x2, then there will be at least one non-zero bit +// and the next step (c) is irrelevant. +// (c) If the lowest two opcode bits of the packed inst2 are set in [13:12], +// then subtracting 0x3000 will make those bits zeros. Otherwise there +// will be at least one non-zero bit. +// +// The shift by 18 removes the high bits from the final '>=' comparison and +// ensures that any non-zero result will be larger than any possible result +// from the right-hand side of the comparison. The cast ensures that the +// left-hand side didn't get promoted to a larger type than uint32_t. +// +// On the right-hand side, inst2_rs1 & 0x1D will be non-zero as long as +// inst2_rs1 is not x0 or x2. +// +// The final '>=' comparison will make the expression true if: +// - The subtraction caused any bits to be set (special AUIPC rd value not +// used or inst2 opcode bits not set). (non-zero >= non-zero or 0) +// - The subtraction did not cause any bits to be set but inst2_rs1 was +// x0 or x2. (0 >= 0) +#define NOT_SPECIAL_AUIPC(auipc, inst2_rs1) \ + ((uint32_t)(((auipc) - 0x3117) << 18) >= ((inst2_rs1) & 0x1D)) + + +// The encode and decode functions are split for this filter because of the +// AUIPC+inst2 filtering. This filter design allows a decoder-only +// implementation to be smaller than alternative designs. + +#ifdef HAVE_ENCODER_RISCV +static size_t +riscv_encode(void *simple lzma_attribute((__unused__)), + uint32_t now_pos, + bool is_encoder lzma_attribute((__unused__)), + uint8_t *buffer, size_t size) +{ + // Avoid using i + 8 <= size in the loop condition. + // + // NOTE: If there is a JAL in the last six bytes of the stream, it + // won't be converted. This is intentional to keep the code simpler. + if (size < 8) + return 0; + + size -= 8; + + size_t i; + + // The loop is advanced by 2 bytes every iteration since the + // instruction stream may include 16-bit instructions (C extension). + for (i = 0; i <= size; i += 2) { + uint32_t inst = buffer[i]; + + if (inst == 0xEF) { + // JAL + const uint32_t b1 = buffer[i + 1]; + + // Only filter rd=x1(ra) and rd=x5(t0). + if ((b1 & 0x0D) != 0) + continue; + + // The 20-bit immediate is in four pieces. + // The encoder stores it in big endian form + // since it improves compression slightly. + const uint32_t b2 = buffer[i + 2]; + const uint32_t b3 = buffer[i + 3]; + const uint32_t pc = now_pos + (uint32_t)i; + +// The following chart shows the highest three bytes of JAL, focusing on +// the 20-bit immediate field [31:12]. The first row of numbers is the +// bit position in a 32-bit little endian instruction. The second row of +// numbers shows the order of the immediate field in a J-type instruction. +// The last row is the bit number in each byte. +// +// To determine the amount to shift each bit, subtract the value in +// the last row from the value in the second last row. If the number +// is positive, shift left. If negative, shift right. +// +// For example, at the rightmost side of the chart, the bit 4 in b1 is +// the bit 12 of the address. Thus that bit needs to be shifted left +// by 12 - 4 = 8 bits to put it in the right place in the addr variable. +// +// NOTE: The immediate of a J-type instruction holds bits [20:1] of +// the address. The bit [0] is always 0 and not part of the immediate. +// +// | b3 | b2 | b1 | +// | 31 30 29 28 27 26 25 24 | 23 22 21 20 19 18 17 16 | 15 14 13 12 x x x x | +// | 20 10 9 8 7 6 5 4 | 3 2 1 11 19 18 17 16 | 15 14 13 12 x x x x | +// | 7 6 5 4 3 2 1 0 | 7 6 5 4 3 2 1 0 | 7 6 5 4 x x x x | + + uint32_t addr = ((b1 & 0xF0) << 8) + | ((b2 & 0x0F) << 16) + | ((b2 & 0x10) << 7) + | ((b2 & 0xE0) >> 4) + | ((b3 & 0x7F) << 4) + | ((b3 & 0x80) << 13); + + addr += pc; + + buffer[i + 1] = (uint8_t)((b1 & 0x0F) + | ((addr >> 13) & 0xF0)); + + buffer[i + 2] = (uint8_t)(addr >> 9); + buffer[i + 3] = (uint8_t)(addr >> 1); + + // The "-2" is included because the for-loop will + // always increment by 2. In this case, we want to + // skip an extra 2 bytes since we used 4 bytes + // of input. + i += 4 - 2; + + } else if ((inst & 0x7F) == 0x17) { + // AUIPC + inst |= (uint32_t)buffer[i + 1] << 8; + inst |= (uint32_t)buffer[i + 2] << 16; + inst |= (uint32_t)buffer[i + 3] << 24; + + // Branch based on AUIPC's rd. The bitmask test does + // the same thing as this: + // + // const uint32_t auipc_rd = (inst >> 7) & 0x1F; + // if (auipc_rd != 0 && auipc_rd != 2) { + if (inst & 0xE80) { + // AUIPC's rd doesn't equal x0 or x2. + + // Check if AUIPC+inst2 are a pair. + uint32_t inst2 = read32le(buffer + i + 4); + + if (NOT_AUIPC_PAIR(inst, inst2)) { + // The NOT_AUIPC_PAIR macro allows + // a false AUIPC+AUIPC pair if the + // bits [19:15] (where rs1 would be) + // in the second AUIPC match the rd + // of the first AUIPC. + // + // We must skip enough forward so + // that the first two bytes of the + // second AUIPC cannot get converted. + // Such a conversion could make the + // current pair become a valid pair + // which would desync the decoder. + // + // Skipping six bytes is enough even + // though the above condition looks + // at the lowest four bits of the + // buffer[i + 6] too. This is safe + // because this filter never changes + // those bits if a conversion at + // that position is done. + i += 6 - 2; + continue; + } + + // Convert AUIPC+inst2 to a special format: + // + // - The lowest 7 bits [6:0] retain the + // AUIPC opcode. + // + // - The rd [11:7] is set to x2(sp). x2 is + // used as the stack pointer so AUIPC with + // rd=x2 should be very rare in real-world + // executables. + // + // - The remaining 20 bits [31:12] (that + // normally hold the pc-relative immediate) + // are used to store the lowest 20 bits of + // inst2. That is, the 12-bit immediate of + // inst2 is not included. + // + // - The location of the original inst2 is + // used to store the 32-bit absolute + // address in big endian format. Compared + // to the 20+12-bit split encoding, this + // results in a longer uninterrupted + // sequence of identical common bytes + // when the same address is referred + // with different instruction pairs + // (like AUIPC+LD vs. AUIPC+ADDI) or + // when the occurrences of the same + // pair use different registers. When + // referring to adjacent memory locations + // (like function calls that go via the + // ELF PLT), in big endian order only the + // last 1-2 bytes differ; in little endian + // the differing 1-2 bytes would be in the + // middle of the 8-byte sequence. + // + // When reversing the transformation, the + // original rd of AUIPC can be restored + // from inst2's rs1 as they are required to + // be the same. + + // Arithmetic right shift makes sign extension + // trivial but (1) it's implementation-defined + // behavior (C99/C11/C23 6.5.7-p5) and so is + // (2) casting unsigned to signed (6.3.1.3-p3). + // + // One can check for (1) with + // + // if ((-1 >> 1) == -1) ... + // + // but (2) has to be checked from the + // compiler docs. GCC promises that (1) + // and (2) behave in the common expected + // way and thus + // + // addr += (uint32_t)( + // (int32_t)inst2 >> 20); + // + // does the same as the code below. But since + // the 100 % portable way is only a few bytes + // bigger code and there is no real speed + // difference, let's just use that, especially + // since the decoder doesn't need this at all. + uint32_t addr = inst & 0xFFFFF000; + addr += (inst2 >> 20) + - ((inst2 >> 19) & 0x1000); + + addr += now_pos + (uint32_t)i; + + // Construct the first 32 bits: + // [6:0] AUIPC opcode + // [11:7] Special AUIPC rd = x2 + // [31:12] The lowest 20 bits of inst2 + inst = 0x17 | (2 << 7) | (inst2 << 12); + + write32le(buffer + i, inst); + + // The second 32 bits store the absolute + // address in big endian order. + write32be(buffer + i + 4, addr); + } else { + // AUIPC's rd equals x0 or x2. + // + // x0 indicates a landing pad (LPAD). + // It's always skipped. + // + // AUIPC with rd == x2 is used for the special + // format as explained above. When the input + // contains a byte sequence that matches the + // special format, "fake" decoding must be + // done to keep the filter bijective (that + // is, safe to apply on arbitrary data). + // + // See the "x0 or x2" section in riscv_decode() + // for how the "real" decoding is done. The + // "fake" decoding is a simplified version + // of "real" decoding with the following + // differences (these reduce code size of + // the decoder): + // (1) The lowest 12 bits aren't sign-extended. + // (2) No address conversion is done. + // (3) Big endian format isn't used (the fake + // address is in little endian order). + + // Check if inst matches the special format. + const uint32_t fake_rs1 = inst >> 27; + + if (NOT_SPECIAL_AUIPC(inst, fake_rs1)) { + i += 4 - 2; + continue; + } + + const uint32_t fake_addr = + read32le(buffer + i + 4); + + // Construct the second 32 bits: + // [19:0] Upper 20 bits from AUIPC + // [31:20] The lowest 12 bits of fake_addr + const uint32_t fake_inst2 = (inst >> 12) + | (fake_addr << 20); + + // Construct new first 32 bits from: + // [6:0] AUIPC opcode + // [11:7] Fake AUIPC rd = fake_rs1 + // [31:12] The highest 20 bits of fake_addr + inst = 0x17 | (fake_rs1 << 7) + | (fake_addr & 0xFFFFF000); + + write32le(buffer + i, inst); + write32le(buffer + i + 4, fake_inst2); + } + + i += 8 - 2; + } + } + + return i; +} + + +extern lzma_ret +lzma_simple_riscv_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return lzma_simple_coder_init(next, allocator, filters, + &riscv_encode, 0, 8, 2, true); +} + + +extern LZMA_API(size_t) +lzma_bcj_riscv_encode(uint32_t start_offset, uint8_t *buf, size_t size) +{ + // start_offset must be a multiple of two. + start_offset &= ~UINT32_C(1); + return riscv_encode(NULL, start_offset, true, buf, size); +} +#endif + + +#ifdef HAVE_DECODER_RISCV +static size_t +riscv_decode(void *simple lzma_attribute((__unused__)), + uint32_t now_pos, + bool is_encoder lzma_attribute((__unused__)), + uint8_t *buffer, size_t size) +{ + if (size < 8) + return 0; + + size -= 8; + + size_t i; + for (i = 0; i <= size; i += 2) { + uint32_t inst = buffer[i]; + + if (inst == 0xEF) { + // JAL + const uint32_t b1 = buffer[i + 1]; + + // Only filter rd=x1(ra) and rd=x5(t0). + if ((b1 & 0x0D) != 0) + continue; + + const uint32_t b2 = buffer[i + 2]; + const uint32_t b3 = buffer[i + 3]; + const uint32_t pc = now_pos + (uint32_t)i; + +// | b3 | b2 | b1 | +// | 31 30 29 28 27 26 25 24 | 23 22 21 20 19 18 17 16 | 15 14 13 12 x x x x | +// | 20 10 9 8 7 6 5 4 | 3 2 1 11 19 18 17 16 | 15 14 13 12 x x x x | +// | 7 6 5 4 3 2 1 0 | 7 6 5 4 3 2 1 0 | 7 6 5 4 x x x x | + + uint32_t addr = ((b1 & 0xF0) << 13) + | (b2 << 9) | (b3 << 1); + + addr -= pc; + + buffer[i + 1] = (uint8_t)((b1 & 0x0F) + | ((addr >> 8) & 0xF0)); + + buffer[i + 2] = (uint8_t)(((addr >> 16) & 0x0F) + | ((addr >> 7) & 0x10) + | ((addr << 4) & 0xE0)); + + buffer[i + 3] = (uint8_t)(((addr >> 4) & 0x7F) + | ((addr >> 13) & 0x80)); + + i += 4 - 2; + + } else if ((inst & 0x7F) == 0x17) { + // AUIPC + uint32_t inst2; + + inst |= (uint32_t)buffer[i + 1] << 8; + inst |= (uint32_t)buffer[i + 2] << 16; + inst |= (uint32_t)buffer[i + 3] << 24; + + if (inst & 0xE80) { + // AUIPC's rd doesn't equal x0 or x2. + + // Check if it is a "fake" AUIPC+inst2 pair. + inst2 = read32le(buffer + i + 4); + + if (NOT_AUIPC_PAIR(inst, inst2)) { + i += 6 - 2; + continue; + } + + // Decode (or more like re-encode) the "fake" + // pair. The "fake" format doesn't do + // sign-extension, address conversion, or + // use big endian. (The use of little endian + // allows sharing the write32le() calls in + // the decoder to reduce code size when + // unaligned access isn't supported.) + uint32_t addr = inst & 0xFFFFF000; + addr += inst2 >> 20; + + inst = 0x17 | (2 << 7) | (inst2 << 12); + inst2 = addr; + } else { + // AUIPC's rd equals x0 or x2. + + // Check if inst matches the special format + // used by the encoder. + const uint32_t inst2_rs1 = inst >> 27; + + if (NOT_SPECIAL_AUIPC(inst, inst2_rs1)) { + i += 4 - 2; + continue; + } + + // Decode the "real" pair. + uint32_t addr = read32be(buffer + i + 4); + + addr -= now_pos + (uint32_t)i; + + // The second instruction: + // - Get the lowest 20 bits from inst. + // - Add the lowest 12 bits of the address + // as the immediate field. + inst2 = (inst >> 12) | (addr << 20); + + // AUIPC: + // - rd is the same as inst2_rs1. + // - The sign extension of the lowest 12 bits + // must be taken into account. + inst = 0x17 | (inst2_rs1 << 7) + | ((addr + 0x800) & 0xFFFFF000); + } + + // Both decoder branches write in little endian order. + write32le(buffer + i, inst); + write32le(buffer + i + 4, inst2); + + i += 8 - 2; + } + } + + return i; +} + + +extern lzma_ret +lzma_simple_riscv_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return lzma_simple_coder_init(next, allocator, filters, + &riscv_decode, 0, 8, 2, false); +} + + +extern LZMA_API(size_t) +lzma_bcj_riscv_decode(uint32_t start_offset, uint8_t *buf, size_t size) +{ + // start_offset must be a multiple of two. + start_offset &= ~UINT32_C(1); + return riscv_decode(NULL, start_offset, false, buf, size); +} +#endif diff --git a/src/native/external/xz/src/liblzma/simple/simple_coder.c b/src/native/external/xz/src/liblzma/simple/simple_coder.c new file mode 100644 index 00000000000000..930a532de261db --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/simple_coder.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_coder.c +/// \brief Wrapper for simple filters +/// +/// Simple filters don't change the size of the data i.e. number of bytes +/// in equals the number of bytes out. +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +/// Copied or encodes/decodes more data to out[]. +static lzma_ret +copy_or_code(lzma_simple_coder *coder, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + assert(!coder->end_was_reached); + + if (coder->next.code == NULL) { + lzma_bufcpy(in, in_pos, in_size, out, out_pos, out_size); + + // Check if end of stream was reached. + if (coder->is_encoder && action == LZMA_FINISH + && *in_pos == in_size) + coder->end_was_reached = true; + + } else { + // Call the next coder in the chain to provide us some data. + const lzma_ret ret = coder->next.code( + coder->next.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + + if (ret == LZMA_STREAM_END) { + assert(!coder->is_encoder + || action == LZMA_FINISH); + coder->end_was_reached = true; + + } else if (ret != LZMA_OK) { + return ret; + } + } + + return LZMA_OK; +} + + +static size_t +call_filter(lzma_simple_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t filtered = coder->filter(coder->simple, + coder->now_pos, coder->is_encoder, + buffer, size); + coder->now_pos += (uint32_t)filtered; + return filtered; +} + + +static lzma_ret +simple_code(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_simple_coder *coder = coder_ptr; + + // TODO: Add partial support for LZMA_SYNC_FLUSH. We can support it + // in cases when the filter is able to filter everything. With most + // simple filters it can be done at offset that is a multiple of 2, + // 4, or 16. With x86 filter, it needs good luck, and thus cannot + // be made to work predictably. + if (action == LZMA_SYNC_FLUSH) + return LZMA_OPTIONS_ERROR; + + // Flush already filtered data from coder->buffer[] to out[]. + if (coder->pos < coder->filtered) { + lzma_bufcpy(coder->buffer, &coder->pos, coder->filtered, + out, out_pos, out_size); + + // If we couldn't flush all the filtered data, return to + // application immediately. + if (coder->pos < coder->filtered) + return LZMA_OK; + + if (coder->end_was_reached) { + assert(coder->filtered == coder->size); + return LZMA_STREAM_END; + } + } + + // If we get here, there is no filtered data left in the buffer. + coder->filtered = 0; + + assert(!coder->end_was_reached); + + // If there is more output space left than there is unfiltered data + // in coder->buffer[], flush coder->buffer[] to out[], and copy/code + // more data to out[] hopefully filling it completely. Then filter + // the data in out[]. This step is where most of the data gets + // filtered if the buffer sizes used by the application are reasonable. + const size_t out_avail = out_size - *out_pos; + const size_t buf_avail = coder->size - coder->pos; + if (out_avail > buf_avail || buf_avail == 0) { + // Store the old position so that we know from which byte + // to start filtering. + const size_t out_start = *out_pos; + + // Flush data from coder->buffer[] to out[], but don't reset + // coder->pos and coder->size yet. This way the coder can be + // restarted if the next filter in the chain returns e.g. + // LZMA_MEM_ERROR. + // + // Do the memcpy() conditionally because out can be NULL + // (in which case buf_avail is always 0). Calling memcpy() + // with a null-pointer is undefined even if the third + // argument is 0. + if (buf_avail > 0) + memcpy(out + *out_pos, coder->buffer + coder->pos, + buf_avail); + + *out_pos += buf_avail; + + // Copy/Encode/Decode more data to out[]. + { + const lzma_ret ret = copy_or_code(coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + assert(ret != LZMA_STREAM_END); + if (ret != LZMA_OK) + return ret; + } + + // Filter out[] unless there is nothing to filter. + // This way we avoid null pointer + 0 (undefined behavior) + // when out == NULL. + const size_t size = *out_pos - out_start; + const size_t filtered = size == 0 ? 0 : call_filter( + coder, out + out_start, size); + + const size_t unfiltered = size - filtered; + assert(unfiltered <= coder->allocated / 2); + + // Now we can update coder->pos and coder->size, because + // the next coder in the chain (if any) was successful. + coder->pos = 0; + coder->size = unfiltered; + + if (coder->end_was_reached) { + // The last byte has been copied to out[] already. + // They are left as is. + coder->size = 0; + + } else if (unfiltered > 0) { + // There is unfiltered data left in out[]. Copy it to + // coder->buffer[] and rewind *out_pos appropriately. + *out_pos -= unfiltered; + memcpy(coder->buffer, out + *out_pos, unfiltered); + } + } else if (coder->pos > 0) { + memmove(coder->buffer, coder->buffer + coder->pos, buf_avail); + coder->size -= coder->pos; + coder->pos = 0; + } + + assert(coder->pos == 0); + + // If coder->buffer[] isn't empty, try to fill it by copying/decoding + // more data. Then filter coder->buffer[] and copy the successfully + // filtered data to out[]. It is probable, that some filtered and + // unfiltered data will be left to coder->buffer[]. + if (coder->size > 0) { + { + const lzma_ret ret = copy_or_code(coder, allocator, + in, in_pos, in_size, + coder->buffer, &coder->size, + coder->allocated, action); + assert(ret != LZMA_STREAM_END); + if (ret != LZMA_OK) + return ret; + } + + coder->filtered = call_filter( + coder, coder->buffer, coder->size); + + // Everything is considered to be filtered if coder->buffer[] + // contains the last bytes of the data. + if (coder->end_was_reached) + coder->filtered = coder->size; + + // Flush as much as possible. + lzma_bufcpy(coder->buffer, &coder->pos, coder->filtered, + out, out_pos, out_size); + } + + // Check if we got everything done. + if (coder->end_was_reached && coder->pos == coder->size) + return LZMA_STREAM_END; + + return LZMA_OK; +} + + +static void +simple_coder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_simple_coder *coder = coder_ptr; + lzma_next_end(&coder->next, allocator); + lzma_free(coder->simple, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +simple_coder_update(void *coder_ptr, const lzma_allocator *allocator, + const lzma_filter *filters_null lzma_attribute((__unused__)), + const lzma_filter *reversed_filters) +{ + lzma_simple_coder *coder = coder_ptr; + + // No update support, just call the next filter in the chain. + return lzma_next_filter_update( + &coder->next, allocator, reversed_filters + 1); +} + + +extern lzma_ret +lzma_simple_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters, + size_t (*filter)(void *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size), + size_t simple_size, size_t unfiltered_max, + uint32_t alignment, bool is_encoder) +{ + // Allocate memory for the lzma_simple_coder structure if needed. + lzma_simple_coder *coder = next->coder; + if (coder == NULL) { + // Here we allocate space also for the temporary buffer. We + // need twice the size of unfiltered_max, because then it + // is always possible to filter at least unfiltered_max bytes + // more data in coder->buffer[] if it can be filled completely. + coder = lzma_alloc(sizeof(lzma_simple_coder) + + 2 * unfiltered_max, allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &simple_code; + next->end = &simple_coder_end; + next->update = &simple_coder_update; + + coder->next = LZMA_NEXT_CODER_INIT; + coder->filter = filter; + coder->allocated = 2 * unfiltered_max; + + // Allocate memory for filter-specific data structure. + if (simple_size > 0) { + coder->simple = lzma_alloc(simple_size, allocator); + if (coder->simple == NULL) + return LZMA_MEM_ERROR; + } else { + coder->simple = NULL; + } + } + + if (filters[0].options != NULL) { + const lzma_options_bcj *simple = filters[0].options; + coder->now_pos = simple->start_offset; + if (coder->now_pos & (alignment - 1)) + return LZMA_OPTIONS_ERROR; + } else { + coder->now_pos = 0; + } + + // Reset variables. + coder->is_encoder = is_encoder; + coder->end_was_reached = false; + coder->pos = 0; + coder->filtered = 0; + coder->size = 0; + + return lzma_next_filter_init(&coder->next, allocator, filters + 1); +} diff --git a/src/native/external/xz/src/liblzma/simple/simple_coder.h b/src/native/external/xz/src/liblzma/simple/simple_coder.h new file mode 100644 index 00000000000000..2b762d50071d0e --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/simple_coder.h @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_coder.h +/// \brief Wrapper for simple filters +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_CODER_H +#define LZMA_SIMPLE_CODER_H + +#include "common.h" + + +extern lzma_ret lzma_simple_x86_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_x86_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_powerpc_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_powerpc_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_ia64_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_ia64_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_arm_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_arm_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_armthumb_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_armthumb_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_arm64_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_arm64_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_sparc_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_sparc_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_riscv_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_riscv_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters); + +#endif diff --git a/src/native/external/xz/src/liblzma/simple/simple_decoder.c b/src/native/external/xz/src/liblzma/simple/simple_decoder.c new file mode 100644 index 00000000000000..d9820ee8ed2345 --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/simple_decoder.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_decoder.c +/// \brief Properties decoder for simple filters +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_decoder.h" + + +extern lzma_ret +lzma_simple_props_decode(void **options, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size == 0) + return LZMA_OK; + + if (props_size != 4) + return LZMA_OPTIONS_ERROR; + + lzma_options_bcj *opt = lzma_alloc( + sizeof(lzma_options_bcj), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + opt->start_offset = read32le(props); + + // Don't leave an options structure allocated if start_offset is zero. + if (opt->start_offset == 0) + lzma_free(opt, allocator); + else + *options = opt; + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/simple/simple_decoder.h b/src/native/external/xz/src/liblzma/simple/simple_decoder.h new file mode 100644 index 00000000000000..2ae87bb863288e --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/simple_decoder.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_decoder.h +/// \brief Properties decoder for simple filters +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_DECODER_H +#define LZMA_SIMPLE_DECODER_H + +#include "simple_coder.h" + +extern lzma_ret lzma_simple_props_decode( + void **options, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif diff --git a/src/native/external/xz/src/liblzma/simple/simple_encoder.c b/src/native/external/xz/src/liblzma/simple/simple_encoder.c new file mode 100644 index 00000000000000..d1f35096e2ae01 --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/simple_encoder.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_encoder.c +/// \brief Properties encoder for simple filters +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_encoder.h" + + +extern lzma_ret +lzma_simple_props_size(uint32_t *size, const void *options) +{ + const lzma_options_bcj *const opt = options; + *size = (opt == NULL || opt->start_offset == 0) ? 0 : 4; + return LZMA_OK; +} + + +extern lzma_ret +lzma_simple_props_encode(const void *options, uint8_t *out) +{ + const lzma_options_bcj *const opt = options; + + // The default start offset is zero, so we don't need to store any + // options unless the start offset is non-zero. + if (opt == NULL || opt->start_offset == 0) + return LZMA_OK; + + write32le(out, opt->start_offset); + + return LZMA_OK; +} diff --git a/src/native/external/xz/src/liblzma/simple/simple_encoder.h b/src/native/external/xz/src/liblzma/simple/simple_encoder.h new file mode 100644 index 00000000000000..bf5edbb1c3f4d8 --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/simple_encoder.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_encoder.c +/// \brief Properties encoder for simple filters +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_ENCODER_H +#define LZMA_SIMPLE_ENCODER_H + +#include "simple_coder.h" + + +extern lzma_ret lzma_simple_props_size(uint32_t *size, const void *options); + +extern lzma_ret lzma_simple_props_encode(const void *options, uint8_t *out); + +#endif diff --git a/src/native/external/xz/src/liblzma/simple/simple_private.h b/src/native/external/xz/src/liblzma/simple/simple_private.h new file mode 100644 index 00000000000000..7aa360ff49e8ae --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/simple_private.h @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_private.h +/// \brief Private definitions for so called simple filters +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_PRIVATE_H +#define LZMA_SIMPLE_PRIVATE_H + +#include "simple_coder.h" + + +typedef struct { + /// Next filter in the chain + lzma_next_coder next; + + /// True if the next coder in the chain has returned LZMA_STREAM_END. + bool end_was_reached; + + /// True if filter() should encode the data; false to decode. + /// Currently all simple filters use the same function for encoding + /// and decoding, because the difference between encoders and decoders + /// is very small. + bool is_encoder; + + /// Pointer to filter-specific function, which does + /// the actual filtering. + size_t (*filter)(void *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size); + + /// Pointer to filter-specific data, or NULL if filter doesn't need + /// any extra data. + void *simple; + + /// The lowest 32 bits of the current position in the data. Most + /// filters need this to do conversions between absolute and relative + /// addresses. + uint32_t now_pos; + + /// Size of the memory allocated for the buffer. + size_t allocated; + + /// Flushing position in the temporary buffer. buffer[pos] is the + /// next byte to be copied to out[]. + size_t pos; + + /// buffer[filtered] is the first unfiltered byte. When pos is smaller + /// than filtered, there is unflushed filtered data in the buffer. + size_t filtered; + + /// Total number of bytes (both filtered and unfiltered) currently + /// in the temporary buffer. + size_t size; + + /// Temporary buffer + uint8_t buffer[]; +} lzma_simple_coder; + + +extern lzma_ret lzma_simple_coder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters, + size_t (*filter)(void *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size), + size_t simple_size, size_t unfiltered_max, + uint32_t alignment, bool is_encoder); + +#endif diff --git a/src/native/external/xz/src/liblzma/simple/sparc.c b/src/native/external/xz/src/liblzma/simple/sparc.c new file mode 100644 index 00000000000000..1fa4850458e808 --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/sparc.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sparc.c +/// \brief Filter for SPARC binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +sparc_code(void *simple lzma_attribute((__unused__)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size &= ~(size_t)3; + + size_t i; + for (i = 0; i < size; i += 4) { + if ((buffer[i] == 0x40 && (buffer[i + 1] & 0xC0) == 0x00) + || (buffer[i] == 0x7F + && (buffer[i + 1] & 0xC0) == 0xC0)) { + + uint32_t src = ((uint32_t)buffer[i + 0] << 24) + | ((uint32_t)buffer[i + 1] << 16) + | ((uint32_t)buffer[i + 2] << 8) + | ((uint32_t)buffer[i + 3]); + + src <<= 2; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + dest >>= 2; + + dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) + | (dest & 0x3FFFFF) + | 0x40000000; + + buffer[i + 0] = (uint8_t)(dest >> 24); + buffer[i + 1] = (uint8_t)(dest >> 16); + buffer[i + 2] = (uint8_t)(dest >> 8); + buffer[i + 3] = (uint8_t)(dest); + } + } + + return i; +} + + +static lzma_ret +sparc_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &sparc_code, 0, 4, 4, is_encoder); +} + + +#ifdef HAVE_ENCODER_SPARC +extern lzma_ret +lzma_simple_sparc_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return sparc_coder_init(next, allocator, filters, true); +} +#endif + + +#ifdef HAVE_DECODER_SPARC +extern lzma_ret +lzma_simple_sparc_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return sparc_coder_init(next, allocator, filters, false); +} +#endif diff --git a/src/native/external/xz/src/liblzma/simple/x86.c b/src/native/external/xz/src/liblzma/simple/x86.c new file mode 100644 index 00000000000000..dffa7863131ae9 --- /dev/null +++ b/src/native/external/xz/src/liblzma/simple/x86.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file x86.c +/// \brief Filter for x86 binaries (BCJ filter) +/// +// Authors: Igor Pavlov +// Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF) + + +typedef struct { + uint32_t prev_mask; + uint32_t prev_pos; +} lzma_simple_x86; + + +static size_t +x86_code(void *simple_ptr, uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + static const uint32_t MASK_TO_BIT_NUMBER[5] = { 0, 1, 2, 2, 3 }; + + lzma_simple_x86 *simple = simple_ptr; + uint32_t prev_mask = simple->prev_mask; + uint32_t prev_pos = simple->prev_pos; + + if (size < 5) + return 0; + + if (now_pos - prev_pos > 5) + prev_pos = now_pos - 5; + + const size_t limit = size - 5; + size_t buffer_pos = 0; + + while (buffer_pos <= limit) { + uint8_t b = buffer[buffer_pos]; + if (b != 0xE8 && b != 0xE9) { + ++buffer_pos; + continue; + } + + const uint32_t offset = now_pos + (uint32_t)(buffer_pos) + - prev_pos; + prev_pos = now_pos + (uint32_t)(buffer_pos); + + if (offset > 5) { + prev_mask = 0; + } else { + for (uint32_t i = 0; i < offset; ++i) { + prev_mask &= 0x77; + prev_mask <<= 1; + } + } + + b = buffer[buffer_pos + 4]; + + if (Test86MSByte(b) && (prev_mask >> 1) <= 4 + && (prev_mask >> 1) != 3) { + + uint32_t src = ((uint32_t)(b) << 24) + | ((uint32_t)(buffer[buffer_pos + 3]) << 16) + | ((uint32_t)(buffer[buffer_pos + 2]) << 8) + | (buffer[buffer_pos + 1]); + + uint32_t dest; + while (true) { + if (is_encoder) + dest = src + (now_pos + (uint32_t)( + buffer_pos) + 5); + else + dest = src - (now_pos + (uint32_t)( + buffer_pos) + 5); + + if (prev_mask == 0) + break; + + const uint32_t i = MASK_TO_BIT_NUMBER[ + prev_mask >> 1]; + + b = (uint8_t)(dest >> (24 - i * 8)); + + if (!Test86MSByte(b)) + break; + + src = dest ^ ((1U << (32 - i * 8)) - 1); + } + + buffer[buffer_pos + 4] + = (uint8_t)(~(((dest >> 24) & 1) - 1)); + buffer[buffer_pos + 3] = (uint8_t)(dest >> 16); + buffer[buffer_pos + 2] = (uint8_t)(dest >> 8); + buffer[buffer_pos + 1] = (uint8_t)(dest); + buffer_pos += 5; + prev_mask = 0; + + } else { + ++buffer_pos; + prev_mask |= 1; + if (Test86MSByte(b)) + prev_mask |= 0x10; + } + } + + simple->prev_mask = prev_mask; + simple->prev_pos = prev_pos; + + return buffer_pos; +} + + +static lzma_ret +x86_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + const lzma_ret ret = lzma_simple_coder_init(next, allocator, filters, + &x86_code, sizeof(lzma_simple_x86), 5, 1, is_encoder); + + if (ret == LZMA_OK) { + lzma_simple_coder *coder = next->coder; + lzma_simple_x86 *simple = coder->simple; + simple->prev_mask = 0; + simple->prev_pos = (uint32_t)(-5); + } + + return ret; +} + + +#ifdef HAVE_ENCODER_X86 +extern lzma_ret +lzma_simple_x86_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return x86_coder_init(next, allocator, filters, true); +} + + +extern LZMA_API(size_t) +lzma_bcj_x86_encode(uint32_t start_offset, uint8_t *buf, size_t size) +{ + lzma_simple_x86 simple = { + .prev_mask = 0, + .prev_pos = (uint32_t)(-5), + }; + + return x86_code(&simple, start_offset, true, buf, size); +} +#endif + + +#ifdef HAVE_DECODER_X86 +extern lzma_ret +lzma_simple_x86_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return x86_coder_init(next, allocator, filters, false); +} + + +extern LZMA_API(size_t) +lzma_bcj_x86_decode(uint32_t start_offset, uint8_t *buf, size_t size) +{ + lzma_simple_x86 simple = { + .prev_mask = 0, + .prev_pos = (uint32_t)(-5), + }; + + return x86_code(&simple, start_offset, false, buf, size); +} +#endif diff --git a/src/native/external/xz/src/liblzma/validate_map.sh b/src/native/external/xz/src/liblzma/validate_map.sh new file mode 100644 index 00000000000000..3b706069e0b3b8 --- /dev/null +++ b/src/native/external/xz/src/liblzma/validate_map.sh @@ -0,0 +1,163 @@ +#!/bin/sh +# SPDX-License-Identifier: 0BSD + +############################################################################### +# +# Check liblzma_*.map for certain types of errors. +# +# liblzma_generic.map is for FreeBSD and Solaris and possibly others +# except GNU/Linux. +# +# liblzma_linux.map is for GNU/Linux only. This and the matching extra code +# in the .c files make liblzma >= 5.2.7 compatible with binaries that were +# linked against ill-patched liblzma in RHEL/CentOS 7. By providing the +# compatibility in official XZ Utils release will hopefully prevent people +# from further copying the broken patch to other places when they want +# compatibility with binaries linked on RHEL/CentOS 7. The long version +# of the story: +# +# RHEL/CentOS 7 shipped with 5.1.2alpha, including the threaded +# encoder that is behind #ifdef LZMA_UNSTABLE in the API headers. +# In 5.1.2alpha these symbols are under XZ_5.1.2alpha in liblzma.map. +# API/ABI compatibility tracking isn't done between development +# releases so newer releases didn't have XZ_5.1.2alpha anymore. +# +# Later RHEL/CentOS 7 updated xz to 5.2.2 but they wanted to keep +# the exported symbols compatible with 5.1.2alpha. After checking +# the ABI changes it turned out that >= 5.2.0 ABI is backward +# compatible with the threaded encoder functions from 5.1.2alpha +# (but not vice versa as fixes and extensions to these functions +# were made between 5.1.2alpha and 5.2.0). +# +# In RHEL/CentOS 7, XZ Utils 5.2.2 was patched with +# xz-5.2.2-compat-libs.patch to modify liblzma.map: +# +# - XZ_5.1.2alpha was added with lzma_stream_encoder_mt and +# lzma_stream_encoder_mt_memusage. This matched XZ Utils 5.1.2alpha. +# +# - XZ_5.2 was replaced with XZ_5.2.2. It is clear that this was +# an error; the intention was to keep using XZ_5.2 (XZ_5.2.2 +# has never been used in XZ Utils). So XZ_5.2.2 lists all +# symbols that were listed under XZ_5.2 before the patch. +# lzma_stream_encoder_mt and _mt_memusage are included too so +# they are listed both here and under XZ_5.1.2alpha. +# +# The patch didn't add any __asm__(".symver ...") lines to the .c +# files. Thus the resulting liblzma.so exports the threaded encoder +# functions under XZ_5.1.2alpha only. Listing the two functions +# also under XZ_5.2.2 in liblzma.map has no effect without +# matching .symver lines. +# +# The lack of XZ_5.2 in RHEL/CentOS 7 means that binaries linked +# against unpatched XZ Utils 5.2.x won't run on RHEL/CentOS 7. +# This is unfortunate but this alone isn't too bad as the problem +# is contained within RHEL/CentOS 7 and doesn't affect users +# of other distributions. It could also be fixed internally in +# RHEL/CentOS 7. +# +# The second problem is more serious: In XZ Utils 5.2.2 the API +# headers don't have #ifdef LZMA_UNSTABLE for obvious reasons. +# This is true in RHEL/CentOS 7 version too. Thus now programs +# using new APIs can be compiled without an extra #define. However, +# the programs end up depending on symbol version XZ_5.1.2alpha +# (and possibly also XZ_5.2.2) instead of XZ_5.2 as they would +# with an unpatched XZ Utils 5.2.2. This means that such binaries +# won't run on other distributions shipping XZ Utils >= 5.2.0 as +# they don't provide XZ_5.1.2alpha or XZ_5.2.2; they only provide +# XZ_5.2 (and XZ_5.0). (This includes RHEL/CentOS 8 as the patch +# luckily isn't included there anymore with XZ Utils 5.2.4.) +# +# Binaries built by RHEL/CentOS 7 users get distributed and then +# people wonder why they don't run on some other distribution. +# Seems that people have found out about the patch and been copying +# it to some build scripts, seemingly curing the symptoms but +# actually spreading the illness further and outside RHEL/CentOS 7. +# Adding compatibility in an official XZ Utils release should work +# as a vaccine against this ill patch and stop it from spreading. +# The vaccine is kept GNU/Linux-only as other OSes should be immune +# (hopefully it hasn't spread via some build script to other OSes). +# +# Author: Lasse Collin +# +############################################################################### + +LC_ALL=C +export LC_ALL + +STATUS=0 + +cd "$(dirname "$0")" || exit 1 + +# Get the list of symbols that aren't defined in liblzma_generic.map. +SYMS=$(sed -n 's/^extern LZMA_API([^)]*) \([a-z0-9_]*\)(.*$/\1;/p' \ + api/lzma/*.h \ + | sort \ + | grep -Fve "$(sed '/[{}:*]/d;/^$/d;s/^ //' liblzma_generic.map)") + +# Check that there are no old alpha or beta versions listed. +VER=$(cd ../.. && sh build-aux/version.sh) || exit 1 +NAMES= +case $VER in + *alpha | *beta) + NAMES=$(sed -n 's/^.*XZ_\([^ ]*\)\(alpha\|beta\) .*$/\1\2/p' \ + liblzma_generic.map | grep -Fv "$VER") + ;; +esac + +# Check for duplicate lines. It can catch missing dependencies. +DUPS=$(sort liblzma_generic.map | sed '/^$/d;/^global:$/d' | uniq -d) + +# Check that liblzma_linux.map is in sync with liblzma_generic.map. +# The RHEL/CentOS 7 compatibility symbols are in a fixed location +# so it makes it easy to remove them for comparison with liblzma_generic.map. +# +# NOTE: Putting XZ_5.2 before the compatibility symbols XZ_5.1.2alpha +# and XZ_5.2.2 in liblzma_linux.map is important: If liblzma_linux.map is +# incorrectly used without #define HAVE_SYMBOL_VERSIONS_LINUX, only the first +# occurrence of each function name will be used from liblzma_linux.map; +# the rest are ignored by the linker. Thus having XZ_5.2 before the +# compatibility symbols means that @@XZ_5.2 will be used for the symbols +# listed under XZ_5.2 {...} and the same function names later in +# the file under XZ_5.1.2alpha {...} and XZ_5.2.2 {...} will be +# ignored (@XZ_5.1.2alpha or @XZ_5.2.2 won't be added at all when +# the #define HAVE_SYMBOL_VERSIONS_LINUX isn't used). +IN_SYNC= +if ! sed '111,125d' liblzma_linux.map \ + | cmp -s - liblzma_generic.map; then + IN_SYNC=no +fi + +# Print error messages if needed. +if test -n "$SYMS$NAMES$DUPS$IN_SYNC"; then + echo + echo 'validate_map.sh found problems from liblzma_*.map:' + echo + + if test -n "$SYMS"; then + echo 'liblzma_generic.map lacks the following symbols:' + echo "$SYMS" + echo + fi + + if test -n "$NAMES"; then + echo 'Obsolete alpha or beta version names:' + echo "$NAMES" + echo + fi + + if test -n "$DUPS"; then + echo 'Duplicate lines:' + echo "$DUPS" + echo + fi + + if test -n "$IN_SYNC"; then + echo "liblzma_generic.map and liblzma_linux.map aren't in sync" + echo + fi + + STATUS=1 +fi + +# Exit status is 1 if problems were found, 0 otherwise. +exit "$STATUS" diff --git a/src/native/external/xz/src/lzmainfo/Makefile.am b/src/native/external/xz/src/lzmainfo/Makefile.am new file mode 100644 index 00000000000000..5cb14a156acfc7 --- /dev/null +++ b/src/native/external/xz/src/lzmainfo/Makefile.am @@ -0,0 +1,66 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +bin_PROGRAMS = lzmainfo + +lzmainfo_SOURCES = \ + lzmainfo.c \ + ../common/tuklib_progname.c \ + ../common/tuklib_mbstr_nonprint.c \ + ../common/tuklib_mbstr_width.c \ + ../common/tuklib_mbstr_wrap.c \ + ../common/tuklib_exit.c + +if COND_W32 +lzmainfo_SOURCES += lzmainfo_w32res.rc +endif + +lzmainfo_CPPFLAGS = \ + -DLOCALEDIR=\"$(localedir)\" \ + -I$(top_srcdir)/src/common \ + -I$(top_srcdir)/src/liblzma/api + +lzmainfo_LDADD = $(top_builddir)/src/liblzma/liblzma.la + +if COND_GNULIB +lzmainfo_CPPFLAGS += \ + -I$(top_builddir)/lib \ + -I$(top_srcdir)/lib +lzmainfo_LDADD += $(top_builddir)/lib/libgnu.a +endif + +lzmainfo_LDADD += $(LTLIBINTL) + + +dist_man_MANS = lzmainfo.1 + +# Windows resource compiler support +.rc.o: + $(RC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(lzmainfo_CPPFLAGS) $(CPPFLAGS) $(RCFLAGS) -i $< -o $@ + +# The installation of translated man pages abuses Automake internals +# by calling "install-man" with redefined dist_man_MANS and man_MANS. +# If this breaks some day, don't blame Automake developers. +install-data-hook: + languages= ; \ + if test "$(USE_NLS)" = yes && test -d "$(top_srcdir)/po4a/man"; then \ + languages=`ls "$(top_srcdir)/po4a/man"`; \ + fi; \ + for lang in . $$languages; do \ + man="$(top_srcdir)/po4a/man/$$lang/lzmainfo.1" ; \ + if test -f "$$man"; then \ + $(MAKE) dist_man_MANS="$$man" man_MANS= \ + mandir="$(mandir)/$$lang" install-man; \ + fi; \ + done + +uninstall-hook: + languages= ; \ + if test "$(USE_NLS)" = yes && test -d "$(top_srcdir)/po4a/man"; then \ + languages=`ls "$(top_srcdir)/po4a/man"`; \ + fi; \ + for lang in . $$languages; do \ + name=`echo lzmainfo | sed '$(transform)'` && \ + rm -f "$(DESTDIR)$(mandir)/$$lang/man1/$$name.1"; \ + done diff --git a/src/native/external/xz/src/lzmainfo/lzmainfo.1 b/src/native/external/xz/src/lzmainfo/lzmainfo.1 new file mode 100644 index 00000000000000..114e0f30ca6ecc --- /dev/null +++ b/src/native/external/xz/src/lzmainfo/lzmainfo.1 @@ -0,0 +1,58 @@ +.\" SPDX-License-Identifier: 0BSD +.\" +.\" Author: Lasse Collin +.\" +.TH LZMAINFO 1 "2013-06-30" "Tukaani" "XZ Utils" +.SH NAME +lzmainfo \- show information stored in the .lzma file header +.SH SYNOPSIS +.B lzmainfo +.RB [ \-\-help ] +.RB [ \-\-version ] +.RI [ file... ] +.SH DESCRIPTION +.B lzmainfo +shows information stored in the +.B .lzma +file header. +It reads the first 13 bytes from the specified +.IR file , +decodes the header, and prints it to standard output in human +readable format. +If no +.I files +are given or +.I file +is +.BR \- , +standard input is read. +.PP +Usually the most interesting information is +the uncompressed size and the dictionary size. +Uncompressed size can be shown only if +the file is in the non-streamed +.B .lzma +format variant. +The amount of memory required to decompress the file is +a few dozen kilobytes plus the dictionary size. +.PP +.B lzmainfo +is included in XZ Utils primarily for +backward compatibility with LZMA Utils. +.SH "EXIT STATUS" +.TP +.B 0 +All is good. +.TP +.B 1 +An error occurred. +.SH BUGS +.B lzmainfo +uses +.B MB +while the correct suffix would be +.B MiB +(2^20 bytes). +This is to keep the output compatible with LZMA Utils. +.SH "SEE ALSO" +.BR xz (1) diff --git a/src/native/external/xz/src/lzmainfo/lzmainfo.c b/src/native/external/xz/src/lzmainfo/lzmainfo.c new file mode 100644 index 00000000000000..0b0b0d3d09a4d5 --- /dev/null +++ b/src/native/external/xz/src/lzmainfo/lzmainfo.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzmainfo.c +/// \brief lzmainfo tool for compatibility with LZMA Utils +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "sysdefs.h" +#include +#include + +#include "lzma.h" +#include "getopt.h" +#include "tuklib_gettext.h" +#include "tuklib_progname.h" +#include "tuklib_mbstr_nonprint.h" +#include "tuklib_mbstr_wrap.h" +#include "tuklib_exit.h" + +#ifdef TUKLIB_DOSLIKE +# include +# include +#endif + + +tuklib_attr_noreturn +static void +help(void) +{ + // A few languages use so long strings that we need automatic + // wrapping. A few strings are the same as in xz/message.c and + // should be kept in sync. + static const struct tuklib_wrap_opt wrap0 = { 0, 0, 0, 0, 79 }; + int e = 0; + + printf(_("Usage: %s [--help] [--version] [FILE]...\n"), progname); + + e |= tuklib_wraps(stdout, &wrap0, + W_("Show information stored in the .lzma file header.")); + e |= tuklib_wraps(stdout, &wrap0, + W_("With no FILE, or when FILE is -, read standard input.")); + + putchar('\n'); + + e |= tuklib_wrapf(stdout, &wrap0, + W_("Report bugs to <%s> (in English or Finnish)."), + PACKAGE_BUGREPORT); + + e |= tuklib_wrapf(stdout, &wrap0, + W_("%s home page: <%s>"), PACKAGE_NAME, PACKAGE_URL); + + if (e != 0) { + // Avoid new translatable strings by printing the message + // in pieces. + fprintf(stderr, _("%s: "), progname); + fprintf(stderr, _("Error printing the help text " + "(error code %d)"), e); + fprintf(stderr, "\n"); + } + + tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, true); +} + + +tuklib_attr_noreturn +static void +version(void) +{ + puts("lzmainfo (" PACKAGE_NAME ") " LZMA_VERSION_STRING); + tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, true); +} + + +/// Parse command line options. +static void +parse_args(int argc, char **argv) +{ + enum { + OPT_HELP, + OPT_VERSION, + }; + + static const struct option long_opts[] = { + { "help", no_argument, NULL, OPT_HELP }, + { "version", no_argument, NULL, OPT_VERSION }, + { NULL, 0, NULL, 0 } + }; + + int c; + while ((c = getopt_long(argc, argv, "", long_opts, NULL)) != -1) { + switch (c) { + case OPT_HELP: + help(); + + case OPT_VERSION: + version(); + + default: + exit(EXIT_FAILURE); + } + } + + return; +} + + +/// Primitive base-2 logarithm for integers +static uint32_t +my_log2(uint32_t n) +{ + uint32_t e; + for (e = 0; n > 1; ++e, n /= 2) ; + return e; +} + + +/// Parse the .lzma header and display information about it. +static bool +lzmainfo(const char *name, FILE *f) +{ + uint8_t buf[13]; + const size_t size = fread(buf, 1, sizeof(buf), f); + if (size != 13) { + fprintf(stderr, "%s: %s: %s\n", progname, + tuklib_mask_nonprint(name), + ferror(f) ? strerror(errno) + : _("File is too small to be a .lzma file")); + return true; + } + + lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; + + // Parse the first five bytes. + switch (lzma_properties_decode(&filter, NULL, buf, 5)) { + case LZMA_OK: + break; + + case LZMA_OPTIONS_ERROR: + fprintf(stderr, "%s: %s: %s\n", progname, + tuklib_mask_nonprint(name), + _("Not a .lzma file")); + return true; + + case LZMA_MEM_ERROR: + fprintf(stderr, "%s: %s\n", progname, strerror(ENOMEM)); + exit(EXIT_FAILURE); + + default: + fprintf(stderr, "%s: %s\n", progname, + _("Internal error (bug)")); + exit(EXIT_FAILURE); + } + + // Uncompressed size + uint64_t uncompressed_size = 0; + for (size_t i = 0; i < 8; ++i) + uncompressed_size |= (uint64_t)(buf[5 + i]) << (i * 8); + + // Display the results. We don't want to translate these and also + // will use MB instead of MiB, because someone could be parsing + // this output and we don't want to break that when people move + // from LZMA Utils to XZ Utils. + if (f != stdin) + printf("%s\n", tuklib_mask_nonprint(name)); + + printf("Uncompressed size: "); + if (uncompressed_size == UINT64_MAX) + printf("Unknown"); + else + printf("%" PRIu64 " MB (%" PRIu64 " bytes)", + (uncompressed_size / 1024 + 512) / 1024, + uncompressed_size); + + lzma_options_lzma *opt = filter.options; + + printf("\nDictionary size: " + "%" PRIu32 " MB (2^%" PRIu32 " bytes)\n" + "Literal context bits (lc): %" PRIu32 "\n" + "Literal pos bits (lp): %" PRIu32 "\n" + "Number of pos bits (pb): %" PRIu32 "\n", + (opt->dict_size / 1024 + 512) / 1024, + my_log2(opt->dict_size), opt->lc, opt->lp, opt->pb); + + free(opt); + + return false; +} + + +extern int +main(int argc, char **argv) +{ + tuklib_progname_init(argv); + tuklib_gettext_init(PACKAGE, LOCALEDIR); + + parse_args(argc, argv); + +#ifdef TUKLIB_DOSLIKE + setmode(fileno(stdin), O_BINARY); +#endif + + int ret = EXIT_SUCCESS; + + // We print empty lines around the output only when reading from + // files specified on the command line. This is due to how + // LZMA Utils did it. + if (optind == argc) { + if (lzmainfo("(stdin)", stdin)) + ret = EXIT_FAILURE; + } else { + printf("\n"); + + do { + if (strcmp(argv[optind], "-") == 0) { + if (lzmainfo("(stdin)", stdin)) + ret = EXIT_FAILURE; + } else { + FILE *f = fopen(argv[optind], "r"); + if (f == NULL) { + ret = EXIT_FAILURE; + fprintf(stderr, "%s: %s: %s\n", + progname, + tuklib_mask_nonprint( + argv[optind]), + strerror(errno)); + continue; + } + + if (lzmainfo(argv[optind], f)) + ret = EXIT_FAILURE; + + printf("\n"); + fclose(f); + } + } while (++optind < argc); + } + + tuklib_exit(ret, EXIT_FAILURE, true); +} diff --git a/src/native/external/xz/src/lzmainfo/lzmainfo_w32res.rc b/src/native/external/xz/src/lzmainfo/lzmainfo_w32res.rc new file mode 100644 index 00000000000000..f2ec2889b25cdc --- /dev/null +++ b/src/native/external/xz/src/lzmainfo/lzmainfo_w32res.rc @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: 0BSD */ + +/* + * Author: Lasse Collin + */ + +#define MY_TYPE VFT_APP +#define MY_NAME "lzmainfo" +#define MY_SUFFIX ".exe" +#define MY_DESC "lzmainfo shows information about .lzma files" +#include "common_w32res.rc" diff --git a/src/native/external/xz/src/scripts/Makefile.am b/src/native/external/xz/src/scripts/Makefile.am new file mode 100644 index 00000000000000..29c561cadb120a --- /dev/null +++ b/src/native/external/xz/src/scripts/Makefile.am @@ -0,0 +1,85 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +nodist_bin_SCRIPTS = xzdiff xzgrep xzmore xzless +dist_man_MANS = xzdiff.1 xzgrep.1 xzmore.1 xzless.1 + +links = \ + xzdiff-xzcmp \ + xzgrep-xzegrep \ + xzgrep-xzfgrep + +if COND_LZMALINKS +links += \ + xzdiff-lzdiff \ + xzdiff-lzcmp \ + xzgrep-lzgrep \ + xzgrep-lzegrep \ + xzgrep-lzfgrep \ + xzmore-lzmore \ + xzless-lzless +endif + +install-exec-hook: + cd "$(DESTDIR)$(bindir)" && \ + for pair in $(links); do \ + target=`echo $$pair | sed 's/-.*$$//' | sed '$(transform)'` && \ + link=`echo $$pair | sed 's/^.*-//' | sed '$(transform)'` && \ + rm -f "$$link" && \ + $(LN_S) "$$target" "$$link"; \ + done + +# The installation of translated man pages abuses Automake internals +# by calling "install-man" with redefined dist_man_MANS and man_MANS. +# If this breaks some day, don't blame Automake developers. +install-data-hook: + languages= ; \ + if test "$(USE_NLS)" = yes && test -d "$(top_srcdir)/po4a/man"; then \ + languages=`ls "$(top_srcdir)/po4a/man"`; \ + fi; \ + for lang in $$languages; do \ + mans= ; \ + for man in $(dist_man_MANS); do \ + man="$(top_srcdir)/po4a/man/$$lang/$$man" ; \ + if test -f "$$man"; then \ + mans="$$mans $$man"; \ + fi; \ + done; \ + $(MAKE) dist_man_MANS="$$mans" man_MANS= \ + mandir="$(mandir)/$$lang" install-man; \ + done; \ + for lang in . $$languages; do \ + for pair in $(links); do \ + target=`echo $$pair | sed 's/-.*$$//' \ + | sed '$(transform)'` && \ + link=`echo $$pair | sed 's/^.*-//' \ + | sed '$(transform)'` && \ + man1dir="$(DESTDIR)$(mandir)/$$lang/man1" && \ + if test -f "$$man1dir/$$target.1"; then ( \ + cd "$$man1dir" && \ + rm -f "$$link.1" && \ + $(LN_S) "$$target.1" "$$link.1" \ + ); fi; \ + done; \ + done + +uninstall-hook: + cd "$(DESTDIR)$(bindir)" && \ + for pair in $(links); do \ + link=`echo $$pair | sed 's/^.*-//' | sed '$(transform)'` && \ + rm -f "$$link"; \ + done + languages= ; \ + if test "$(USE_NLS)" = yes && test -d "$(top_srcdir)/po4a/man"; then \ + languages=`ls "$(top_srcdir)/po4a/man"`; \ + fi; \ + for lang in . $$languages; do \ + for pair in $(links); do \ + target=`echo $$pair | sed 's/-.*$$//' \ + | sed '$(transform)'` && \ + link=`echo $$pair | sed 's/^.*-//' \ + | sed '$(transform)'` && \ + rm -f "$(DESTDIR)$(mandir)/$$lang/man1/$$target.1" \ + "$(DESTDIR)$(mandir)/$$lang/man1/$$link.1"; \ + done; \ + done diff --git a/src/native/external/xz/src/scripts/xzdiff.1 b/src/native/external/xz/src/scripts/xzdiff.1 new file mode 100644 index 00000000000000..9d2608e50b429e --- /dev/null +++ b/src/native/external/xz/src/scripts/xzdiff.1 @@ -0,0 +1,99 @@ +.\" SPDX-License-Identifier: 0BSD +.\" +.\" Authors: Lasse Collin +.\" Jia Tan +.\" +.\" (Note that this file is not based on gzip's zdiff.1.) +.\" +.TH XZDIFF 1 "2025-03-06" "Tukaani" "XZ Utils" +.SH NAME +xzcmp, xzdiff, lzcmp, lzdiff \- compare compressed files +. +.SH SYNOPSIS +.B xzcmp +.RI [ option... ] +.I file1 +.RI [ file2 ] +.br +.B xzdiff +\&... +.br +.B lzcmp +\&... +(DEPRECATED) +.br +.B lzdiff +\&... +(DEPRECATED) +. +.SH DESCRIPTION +.B xzcmp +and +.B xzdiff +compare uncompressed contents of two files. +Uncompressed data and options are passed to +.BR cmp (1) +or +.BR diff (1) +unless +.B \-\-help +or +.B \-\-version +is specified. +.PP +If both +.I file1 +and +.I file2 +are specified, they can be uncompressed files or files in formats that +.BR xz (1), +.BR gzip (1), +.BR bzip2 (1), +.BR lzop (1), +.BR zstd (1), +or +.BR lz4 (1) +can decompress. +The required decompression commands are determined from +the filename suffixes of +.I file1 +and +.IR file2 . +A file with an unknown suffix is assumed to be either uncompressed +or in a format that +.BR xz (1) +can decompress. +.PP +If only one filename is provided, +.I file1 +must have a suffix of a supported compression format and the name for +.I file2 +is assumed to be +.I file1 +with the compression format suffix removed. +.PP +The commands +.B lzcmp +and +.B lzdiff +are provided for backward compatibility with LZMA Utils. +They are deprecated and will be removed in a future version. +. +.SH EXIT STATUS +If a decompression error occurs, the exit status is +.BR 2 . +Otherwise the exit status of +.BR cmp (1) +or +.BR diff (1) +is used. +. +.SH "SEE ALSO" +.BR cmp (1), +.BR diff (1), +.BR xz (1), +.BR gzip (1), +.BR bzip2 (1), +.BR lzop (1), +.BR zstd (1), +.BR lz4 (1) diff --git a/src/native/external/xz/src/scripts/xzdiff.in b/src/native/external/xz/src/scripts/xzdiff.in new file mode 100644 index 00000000000000..8e1e0a22ca0eb2 --- /dev/null +++ b/src/native/external/xz/src/scripts/xzdiff.in @@ -0,0 +1,224 @@ +#!@POSIX_SHELL@ +# SPDX-License-Identifier: GPL-2.0-or-later + +# Copyright (C) 1998, 2002, 2006, 2007 Free Software Foundation +# Copyright (C) 1993 Jean-loup Gailly + +# Modified for XZ Utils by Andrew Dudman and Lasse Collin. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# shellcheck shell=sh disable=SC1003,SC2016 + +@enable_path_for_scripts@ +#SET_PATH - This line is a placeholder to ease patching this script. + +# Instead of unsetting XZ_OPT, just make sure that xz will use file format +# autodetection. This way memory usage limit and thread limit can be +# specified via XZ_OPT. With gzip, bzip2, and lzop it's OK to just unset the +# environment variables. +xz='@xz@ --format=auto' +unset GZIP BZIP BZIP2 LZOP + +case ${0##*/} in + *cmp*) prog=xzcmp; cmp=${CMP:-cmp};; + *) prog=xzdiff; cmp=${DIFF:-diff};; +esac + +version="$prog (@PACKAGE_NAME@) @PACKAGE_VERSION@" + +usage="Usage: ${0##*/} [OPTION]... FILE1 [FILE2] +Compare FILE1 to FILE2, using their uncompressed contents if they are +compressed. If FILE2 is omitted, then the files compared are FILE1 and +FILE1 from which the compression format suffix has been stripped. + +Do comparisons like '$cmp' does. OPTIONs are the same as for '$cmp'. + +Report bugs to <@PACKAGE_BUGREPORT@>." + +# sed script to escape all ' for the shell, and then (to handle trailing +# newlines correctly) turn trailing X on last line into '. +escape=' + s/'\''/'\''\\'\'''\''/g + $s/X$/'\''/ +' + +while :; do + case $1 in + --h*) printf '%s\n' "$usage" || exit 2; exit;; + --v*) printf '%s\n' "$version" || exit 2; exit;; + --) shift; break;; + -*\'*) cmp="$cmp '"`printf '%sX\n' "$1" | sed "$escape"`;; + -?*) cmp="$cmp '$1'";; + *) break;; + esac + shift +done +cmp="$cmp --" + +for file; do + test "X$file" = X- || true < "$file" || exit 2 +done + +# xz needs -qQ to ignore warnings like unsupported check type. +xz1="$xz -qQ" +xz2="$xz -qQ" +xz_status=0 +exec 3>&1 + +if test $# -eq 1; then + case $1 in + *[-.]xz | *[-.]lzma | *[-.]lz | *.t[lx]z) + ;; + *[-.]bz2 | *.tbz | *.tbz2) + xz1=bzip2;; + *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) + xz1=gzip;; + *[-.]lzo | *.tzo) + xz1=lzop;; + *[-.]zst | *.tzst) + xz1='zstd -q';; + *[-.]lz4) + xz1=lz4;; + *) + printf '%s\n' "$0: $1: Unknown compressed file name suffix" >&2 + exit 2;; + esac + # The FILE variable is used with eval, and shellcheck doesn't see that. + # shellcheck disable=SC2034 + case $1 in + *[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *[-.]lzo | *[-.]zst | *[-.]lz4) + FILE=`expr "X$1" : 'X\(.*\)[-.][abglmostxzZ24]*$'`;; + *.t[abglx]z) + FILE=`expr "X$1" : 'X\(.*[-.]t\)[abglx]z$'`ar;; + *.tbz2) + FILE=`expr "X$1" : 'X\(.*[-.]t\)bz2$'`ar;; + *.tzo) + FILE=`expr "X$1" : 'X\(.*[-.]t\)zo$'`ar;; + *.tzst) + FILE=`expr "X$1" : 'X\(.*[-.]t\)zst$'`ar;; + esac + xz_status=$( + exec 4>&1 + ($xz1 -cd -- "$1" 4>&-; echo $? >&4) 3>&- | eval "$cmp" - '"$FILE"' >&3 + ) +elif test $# -eq 2; then + case $1 in + *[-.]bz2 | *.tbz | *.tbz2) xz1=bzip2;; + *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) xz1=gzip;; + *[-.]lzo | *.tzo) xz1=lzop;; + *[-.]zst | *.tzst) xz1='zstd -q';; + *[-.]lz4) xz1=lz4;; + esac + case $2 in + *[-.]bz2 | *.tbz | *.tbz2) xz2=bzip2;; + *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) xz2=gzip;; + *[-.]lzo | *.tzo) xz2=lzop;; + *[-.]zst | *.tzst) xz2='zstd -q';; + *[-.]lz4) xz2=lz4;; + esac + case $1 in + *[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *.t[abglx]z | *.tbz2 | *[-.]lzo | *.tzo | *[-.]zst | *.tzst | *[-.]lz4 | -) + case "$2" in + *[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *.t[abglx]z | *.tbz2 | *[-.]lzo | *.tzo | *[-.]zst | *.tzst | *[-.]lz4 | -) + if test "$1$2" = --; then + xz_status=$( + exec 4>&1 + ($xz1 -cdf - 4>&-; echo $? >&4) 3>&- | + eval "$cmp" - - >&3 + ) + elif # Reject Solaris 8's buggy /bin/bash 2.03. + echo X | (echo X | eval "$cmp" /dev/fd/5 - >/dev/null 2>&1) 5<&0; then + # NOTE: xz_status will contain two numbers. + xz_status=$( + exec 4>&1 + ($xz1 -cdf -- "$1" 4>&-; echo $? >&4) 3>&- | + ( ($xz2 -cdf -- "$2" 4>&-; echo $? >&4) 3>&- 5<&- &3) 5<&0 + ) + else + F=`expr "/$2" : '.*/\(.*\)[-.][ablmotxz2]*$'` || F=$prog + tmp= + trap ' + test -n "$tmp" && rm -rf "$tmp" + (exit 2); exit 2 + ' HUP INT PIPE TERM 0 + if type mktemp >/dev/null 2>&1; then + # Note that FreeBSD's mktemp isn't fully compatible with + # the implementations from mktemp.org and GNU coreutils. + # It is important that the -t argument is the last argument + # and that no "--" is used between -t and the template argument. + # This way this command works on all implementations. + tmp=`mktemp -d -t "$prog.XXXXXXXXXX"` || exit 2 + else + # Fallback code if mktemp is missing. This isn't as + # robust as using mktemp since this doesn't try with + # different file names in case of a file name conflict. + # + # There's no need to save the original umask since + # we don't create any non-temp files. Note that using + # mkdir -m 0077 isn't secure since some mkdir implementations + # create the dir with the default umask and chmod the + # the dir afterwards. + umask 0077 + mkdir -- "${TMPDIR-/tmp}/$prog.$$" || exit 2 + tmp="${TMPDIR-/tmp}/$prog.$$" + fi + $xz2 -cdf -- "$2" > "$tmp/$F" || exit 2 + xz_status=$( + exec 4>&1 + ($xz1 -cdf -- "$1" 4>&-; echo $? >&4) 3>&- | + eval "$cmp" - '"$tmp/$F"' >&3 + ) + cmp_status=$? + rm -rf "$tmp" || xz_status=$? + trap - HUP INT PIPE TERM 0 + (exit $cmp_status) + fi;; + *) + xz_status=$( + exec 4>&1 + ($xz1 -cdf -- "$1" 4>&-; echo $? >&4) 3>&- | + eval "$cmp" - '"$2"' >&3 + );; + esac;; + *) + case "$2" in + *[-.][zZ] | *_z | *[-.][gx]z | *[-.]bz2 | *[-.]lzma | *[-.]lz | *.t[abglx]z | *.tbz2 | *[-.]lzo | *.tzo | *[-.]zst | *.tzst | *[-.]lz4 | -) + xz_status=$( + exec 4>&1 + ($xz2 -cdf -- "$2" 4>&-; echo $? >&4) 3>&- | + eval "$cmp" '"$1"' - >&3 + );; + *) + eval "$cmp" '"$1"' '"$2"';; + esac;; + esac +else + printf '%s\n' "$0: Invalid number of operands; try '${0##*/} --help' for help" >&2 + exit 2 +fi + +cmp_status=$? +for num in $xz_status ; do + # 0 from decompressor means successful decompression. SIGPIPE from + # decompressor is possible when diff or cmp exits before the whole file + # has been decompressed. In that case we want to retain the exit status + # from diff or cmp. Note that using "trap '' PIPE" is not possible + # because gzip changes its behavior (including exit status) if SIGPIPE + # is ignored. + test "$num" -eq 0 && continue + test "$num" -ge 128 \ + && test "$(kill -l "$num" 2> /dev/null)" = "PIPE" \ + && continue + exit 2 +done +exit $cmp_status diff --git a/src/native/external/xz/src/scripts/xzgrep.1 b/src/native/external/xz/src/scripts/xzgrep.1 new file mode 100644 index 00000000000000..c6bb0eb41f130d --- /dev/null +++ b/src/native/external/xz/src/scripts/xzgrep.1 @@ -0,0 +1,148 @@ +.\" SPDX-License-Identifier: 0BSD +.\" +.\" Authors: Lasse Collin +.\" Jia Tan +.\" +.\" (Note that this file is not based on gzip's zgrep.1.) +.\" +.TH XZGREP 1 "2025-03-06" "Tukaani" "XZ Utils" +.SH NAME +xzgrep \- search possibly-compressed files for patterns +. +.SH SYNOPSIS +.B xzgrep +.RI [ option... ] +.RI [ pattern_list ] +.RI [ file... ] +.br +.B xzegrep +\&... +.br +.B xzfgrep +\&... +.br +.B lzgrep +\&... +(DEPRECATED) +.br +.B lzegrep +\&... +(DEPRECATED) +.br +.B lzfgrep +\&... +(DEPRECATED) +. +.SH DESCRIPTION +.B xzgrep +invokes +.BR grep (1) +on uncompressed contents of files. +The formats of the +.I files +are determined from the filename suffixes. +Any +.I file +with a suffix supported by +.BR xz (1), +.BR gzip (1), +.BR bzip2 (1), +.BR lzop (1), +.BR zstd (1), +or +.BR lz4 (1) +will be decompressed; +all other files are assumed to be uncompressed. +.PP +If no +.I files +are specified or +.I file +is +.B \- +then standard input is read. +When reading from standard input, only files supported by +.BR xz (1) +are decompressed. +Other files are assumed to be in uncompressed form already. +.PP +Most +.I options +of +.BR grep (1) +are supported. +However, the following options are not supported: +.IP "" 4 +.BR \-r , +.B \-\-recursive +.IP "" 4 +.BR \-R , +.B \-\-dereference\-recursive +.IP "" 4 +.BR \-d , +.BI \-\-directories= action +.IP "" 4 +.BR \-Z , +.B \-\-null +.IP "" 4 +.BR \-z , +.B \-\-null\-data +.IP "" 4 +.BI \-\-include= glob +.IP "" 4 +.BI \-\-exclude= glob +.IP "" 4 +.BI \-\-exclude\-from= file +.IP "" 4 +.BI \-\-exclude\-dir= glob +.PP +.B xzegrep +is an alias for +.BR "xzgrep \-E" . +.B xzfgrep +is an alias for +.BR "xzgrep \-F" . +.PP +The commands +.BR lzgrep , +.BR lzegrep , +and +.B lzfgrep +are provided for backward compatibility with LZMA Utils. +They are deprecated and will be removed in a future version. +. +.SH EXIT STATUS +.TP +0 +At least one match was found from at least one of the input files. +No errors occurred. +.TP +1 +No matches were found from any of the input files. +No errors occurred. +.TP +>1 +One or more errors occurred. +It is unknown if matches were found. +. +.SH ENVIRONMENT +.TP +.B GREP +If +.B GREP +is set to a non-empty value, +it is used instead of +.BR "grep" , +.BR "grep \-E" , +or +.BR "grep \-F" . +. +.SH "SEE ALSO" +.BR grep (1), +.BR xz (1), +.BR gzip (1), +.BR bzip2 (1), +.BR lzop (1), +.BR zstd (1), +.BR lz4 (1), +.BR zgrep (1) diff --git a/src/native/external/xz/src/scripts/xzgrep.in b/src/native/external/xz/src/scripts/xzgrep.in new file mode 100644 index 00000000000000..511ea39c7fd6ee --- /dev/null +++ b/src/native/external/xz/src/scripts/xzgrep.in @@ -0,0 +1,302 @@ +#!@POSIX_SHELL@ +# SPDX-License-Identifier: GPL-2.0-or-later + +# xzgrep -- a wrapper around a grep program that decompresses files as needed +# Adapted from a version sent by Charles Levert + +# Copyright (C) 1998, 2001, 2002, 2006, 2007 Free Software Foundation +# Copyright (C) 1993 Jean-loup Gailly + +# Modified for XZ Utils by Andrew Dudman and Lasse Collin. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# shellcheck shell=sh disable=SC1003,SC2016 + +@enable_path_for_scripts@ +#SET_PATH - This line is a placeholder to ease patching this script. + +# Instead of unsetting XZ_OPT, just make sure that xz will use file format +# autodetection. This way memory usage limit and thread limit can be +# specified via XZ_OPT. With gzip, bzip2, and lzop it's OK to just unset the +# environment variables. +xz='@xz@ --format=auto' +unset GZIP BZIP BZIP2 LZOP + +case ${0##*/} in + *egrep*) prog=xzegrep; grep=${GREP:-grep -E};; + *fgrep*) prog=xzfgrep; grep=${GREP:-grep -F};; + *) prog=xzgrep; grep=${GREP:-grep};; +esac + +version="$prog (@PACKAGE_NAME@) @PACKAGE_VERSION@" + +usage="Usage: ${0##*/} [OPTION]... [-e] PATTERN [FILE]... +Look for instances of PATTERN in the input FILEs, using their +uncompressed contents if they are compressed. + +OPTIONs are the same as for '$grep'. + +Report bugs to <@PACKAGE_BUGREPORT@>." + +# sed script to escape all ' for the shell, and then (to handle trailing +# newlines correctly) turn trailing X on last line into '. +escape=' + s/'\''/'\''\\'\'''\''/g + $s/X$/'\''/ +' +operands= +have_pat=0 +files_with_matches=0 +files_without_matches=0 +no_filename=0 +with_filename=0 + +# See if -H and --label options are supported (GNU and *BSDs). +if test f:x = "$(eval "echo x | $grep -H --label=f x 2> /dev/null")"; then + grep_supports_label=1 +else + grep_supports_label=0 +fi + +while test $# -ne 0; do + option=$1 + shift + optarg= + + case $option in + (-[0123456789abcdEFGhHiIKlLnoPqrRsTuUvVwxyzZ]*[!0123456789]*) + # Something like -Fiv was specified, that is, $option contains more + # than one option of which the first option (in this example -F) + # doesn't take an argument. Split the first option into a standalone + # argument and continue parsing the rest of the options (in this example, + # replace -Fiv with -iv in the argument list and set option=-F). + # + # If there are digits [0-9] they are treated as if they were a single + # option character because this syntax is an alias for -C for GNU grep. + # For example, "grep -25F" is equivalent to "grep -C25 -F". If only + # digits are specified like "grep -25" we don't get here because the + # above pattern in the case-statement doesn't match such strings. + arg2=-\'$(LC_ALL=C expr "X${option}X" : 'X-.[0-9]*\(.*\)' | + LC_ALL=C sed "$escape") + eval "set -- $arg2 "'${1+"$@"}' + option=$(LC_ALL=C expr "X$option" : 'X\(-.[0-9]*\)');; + (--binary-*=* | --[lm]a*=* | --reg*=*) + # These options require an argument and an argument has been provided + # with the --foo=argument syntax. All is good. + ;; + (-[ABCDefmX] | --binary-* | --file | --[lm]a* | --reg*) + # These options require an argument which should now be in $1. + # If it isn't, display an error and exit. + case ${1?"$option option requires an argument"} in + (*\'*) + optarg=" '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");; + (*) + optarg=" '$1'";; + esac + shift;; + (--) + break;; + (-?*) + ;; + (*) + case $option in + (*\'*) + operands="$operands '"$(printf '%sX\n' "$option" | + LC_ALL=C sed "$escape");; + (*) + operands="$operands '$option'";; + esac + ${POSIXLY_CORRECT+break} + continue;; + esac + + case $option in + (-[drRzZ] | --di* | --exc* | --inc* | --rec* | --nu*) + printf >&2 '%s: %s: Option not supported\n' "$0" "$option" + exit 2;; + (-[ef]* | --file | --file=* | --reg*) + have_pat=1;; + (--h | --he | --hel | --help) + printf '%s\n' "$usage" || exit 2 + exit;; + (-H | --wi | --wit | --with | --with- | --with-f | --with-fi \ + | --with-fil | --with-file | --with-filen | --with-filena | --with-filenam \ + | --with-filename) + with_filename=1 + continue;; + (-l | --files-with-*) + files_with_matches=1 + continue;; + (-L | --files-witho*) + files_without_matches=1 + continue;; + (-h | --no-f*) + no_filename=1;; + (-V | --v | --ve | --ver | --vers | --versi | --versio | --version) + printf '%s\n' "$version" || exit 2 + exit;; + esac + + case $option in + (*\'?*) + option=\'$(printf '%sX\n' "$option" | LC_ALL=C sed "$escape");; + (*) + option="'$option'";; + esac + + grep="$grep $option$optarg" +done + +eval "set -- $operands "'${1+"$@"}' + +if test $have_pat -eq 0; then + case ${1?"Missing pattern; try '${0##*/} --help' for help"} in + (*\'*) + grep="$grep -e '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");; + (*) + grep="$grep -e '$1'";; + esac + shift +fi + +if test $# -eq 0; then + set -- - +fi + +exec 3>&1 + +# res=1 means that no file matched yet +res=1 + +for i; do + case $i in + *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) uncompress="gzip -cdf";; + *[-.]bz2 | *[-.]tbz | *.tbz2) uncompress="bzip2 -cdf";; + *[-.]lzo | *[-.]tzo) uncompress="lzop -cdf";; + *[-.]zst | *[-.]tzst) uncompress="zstd -cdfq";; # zstd needs -q. + *[-.]lz4) uncompress="lz4 -cdf";; + *) uncompress="$xz -cdfqQ";; # -qQ to ignore warnings like unsupp. check. + esac + # xz_status will hold the decompressor's exit status. + # Exit status of grep (and in rare cases, printf or sed) is + # available as the exit status of this assignment command. + xz_status=$( + exec 5>&1 + ($uncompress -- "$i" 5>&-; echo $? >&5) 3>&- | + if test $files_with_matches -eq 1; then + eval "$grep -q" && { printf '%s\n' "$i" || exit 2; } + elif test $files_without_matches -eq 1; then + eval "$grep -q" || { + r=$? + if test $r -eq 1; then + printf '%s\n' "$i" || r=2 + fi + exit $r + } + elif test $with_filename -eq 0 && + { test $# -eq 1 || test $no_filename -eq 1; }; then + eval "$grep" + elif test $grep_supports_label -eq 1; then + # The grep implementation in use allows us to specify the filename + # that grep will prefix to the output lines. This is faster and + # less prone to security bugs than the fallback method that uses sed. + # This also avoids confusing output with GNU grep >= 3.5 (2020-09-27) + # which prints "binary file matches" to stderr instead of stdout. + # + # If reading from stdin, let grep use whatever name it prefers for + # stdin. With GNU grep it is a locale-specific translated string. + if test "x$i" = "x-"; then + eval "$grep -H" + else + eval "$grep -H --label \"\$i\"" + fi + else + # Append a colon so that the last character will never be a newline + # which would otherwise get lost in shell command substitution. + i="$i:" + + # Escape & \ | and newlines only if such characters are present + # (speed optimization). + case $i in + (*' +'* | *'&'* | *'\'* | *'|'*) + # If sed fails, set i to a known safe string to ensure that + # failing sed did not create a half-escaped dangerous string. + i=$(printf '%s\n' "$i" | LC_ALL=C sed 's/[&\|]/\\&/g; $!s/$/\\/') || + i='(unknown filename):';; + esac + + # $i already ends with a colon so do not add it here. + sed_script="s|^|$i|" + + # If grep or sed fails, pick the larger value of the two exit statuses. + # If sed fails, use at least 2 since we use >= 2 to indicate errors. + r=$( + exec 4>&1 + (eval "$grep" 4>&-; echo $? >&4) 3>&- | + LC_ALL=C sed "$sed_script" >&3 4>&- + ) || { + sed_status=$? + test "$sed_status" -lt 2 && sed_status=2 + test "$r" -lt "$sed_status" && r=$sed_status + } + exit $r + fi >&3 5>&- + ) + r=$? + + # If grep or sed or other non-decompression command failed with a signal, + # exit immediately and ignore the possible remaining files. + # + # NOTE: Instead of 128 + signal_number, some shells use + # 256 + signal_number (ksh) or 384 + signal_number (yash). + # This is fine for us since their "exit" and "kill -l" commands take + # this into account. (At least the versions I tried do but there is + # a report of an old ksh variant whose "exit" truncates the exit status + # to 8 bits without any special handling for values indicating a signal.) + test "$r" -ge 128 && exit "$r" + + if test -z "$xz_status"; then + # Something unusual happened, for example, we got a signal and + # the exit status of the decompressor was never echoed and thus + # $xz_status is empty. Exit immediately and ignore the possible + # remaining files. + exit 2 + elif test "$xz_status" -ge 128; then + # The decompressor died due to a signal. SIGPIPE is ignored since it can + # occur if grep exits before the whole file has been decompressed (grep -q + # can do that). If the decompressor died with some other signal, exit + # immediately and ignore the possible remaining files. + test "$(kill -l "$xz_status" 2> /dev/null)" != "PIPE" && exit "$xz_status" + elif test "$xz_status" -gt 0; then + # Decompression failed but we will continue with the remaining + # files anyway. Set exit status to at least 2 to indicate an error. + test "$r" -lt 2 && r=2 + fi + + # Since res=1 is the initial value, we only need to care about + # matches (r == 0) and errors (r >= 2) here; r == 1 can be ignored. + if test "$r" -ge 2; then + # An error occurred in decompressor, grep, or some other command. Update + # res unless a larger error code has been seen with an earlier file. + test "$res" -lt "$r" && res=$r + elif test "$r" -eq 0; then + # grep found a match and no errors occurred. Update res if no errors have + # occurred with earlier files. + test "$res" -eq 1 && res=0 + fi +done + +# 0: At least one file matched and no errors occurred. +# 1: No matches were found and no errors occurred. +# >=2: Error. It's unknown if matches were found. +exit "$res" diff --git a/src/native/external/xz/src/scripts/xzless.1 b/src/native/external/xz/src/scripts/xzless.1 new file mode 100644 index 00000000000000..17118b9b85cf48 --- /dev/null +++ b/src/native/external/xz/src/scripts/xzless.1 @@ -0,0 +1,69 @@ +.\" SPDX-License-Identifier: 0BSD +.\" +.\" Authors: Andrew Dudman +.\" Lasse Collin +.\" +.\" (Note that this file is not based on gzip's zless.1.) +.\" +.TH XZLESS 1 "2025-03-06" "Tukaani" "XZ Utils" +.SH NAME +xzless, lzless \- view xz or lzma compressed (text) files +.SH SYNOPSIS +.B xzless +.RI [ file ...] +.br +.B lzless +.RI [ file ...] +(DEPRECATED) +.SH DESCRIPTION +.B xzless +is a filter that displays text from compressed files to a terminal. +Files supported by +.BR xz (1) +are decompressed; +other files are assumed to be in uncompressed form already. +If no +.I files +are given, +.B xzless +reads from standard input. +.PP +.B xzless +uses +.BR less (1) +to present its output. +Unlike +.BR xzmore , +its choice of pager cannot be altered by +setting an environment variable. +Commands are based on both +.BR more (1) +and +.BR vi (1) +and allow back and forth movement and searching. +See the +.BR less (1) +manual for more information. +.PP +The command named +.B lzless +is provided for backward compatibility with LZMA Utils. +It is deprecated and will be removed in a future version. +.SH ENVIRONMENT +.TP +.B LESSMETACHARS +A list of characters special to the shell. +Set by +.B xzless +unless it is already set in the environment. +.TP +.B LESSOPEN +Set to a command line to invoke the +.BR xz (1) +decompressor for preprocessing the input files to +.BR less (1). +.SH "SEE ALSO" +.BR less (1), +.BR xz (1), +.BR xzmore (1), +.BR zless (1) diff --git a/src/native/external/xz/src/scripts/xzless.in b/src/native/external/xz/src/scripts/xzless.in new file mode 100644 index 00000000000000..47f5b0b2ed7099 --- /dev/null +++ b/src/native/external/xz/src/scripts/xzless.in @@ -0,0 +1,76 @@ +#!@POSIX_SHELL@ +# SPDX-License-Identifier: GPL-2.0-or-later + +# Copyright (C) 1998, 2002, 2006, 2007 Free Software Foundation + +# The original version for gzip was written by Paul Eggert. +# Modified for XZ Utils by Andrew Dudman and Lasse Collin. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# shellcheck shell=sh disable=SC2089,SC2090 + +@enable_path_for_scripts@ +#SET_PATH - This line is a placeholder to ease patching this script. + +# Instead of unsetting XZ_OPT, just make sure that xz will use file format +# autodetection. This way memory usage limit and thread limit can be +# specified via XZ_OPT. +xz='@xz@ --format=auto' + +version='xzless (@PACKAGE_NAME@) @PACKAGE_VERSION@' + +usage="Usage: ${0##*/} [OPTION]... [FILE]... +Like 'less', but operate on the uncompressed contents of xz compressed FILEs. + +Options are the same as for 'less'. + +Report bugs to <@PACKAGE_BUGREPORT@>." + +case $1 in + --help) printf '%s\n' "$usage" || exit 2; exit;; + --version) printf '%s\n' "$version" || exit 2; exit;; +esac + +if test "${LESSMETACHARS+set}" != set; then + # Work around a bug in less 394 and earlier; + # it mishandles the metacharacters '$%=~'. + space=' ' + tab=' ' + nl=' +' + LESSMETACHARS="$space$tab$nl'"';*?"()<>[|&^`#\$%=~' +fi + +VER=$(less -V | { read _ ver _ && echo ${ver%%.*}; }) +if test "$VER" -ge 451; then + # less 451 or later: If the compressed file is valid but has + # zero bytes of uncompressed data, using two vertical bars ||- makes + # "less" check the exit status of xz and if it is zero then display + # an empty file. With a single vertical bar |- and no output from xz, + # "less" would attempt to display the raw input file instead. + LESSOPEN="||-$xz -cdfqQ -- %s" +elif test "$VER" -ge 429; then + # less 429 or later: LESSOPEN pipe will be used on + # standard input if $LESSOPEN begins with |-. + LESSOPEN="|-$xz -cdfqQ -- %s" +else + LESSOPEN="|$xz -cdfqQ -- %s" +fi + +SHOW_PREPROC_ERRORS= +if test "$VER" -ge 632; then + SHOW_PREPROC_ERRORS=--show-preproc-errors +fi + +export LESSMETACHARS LESSOPEN + +exec less $SHOW_PREPROC_ERRORS "$@" diff --git a/src/native/external/xz/src/scripts/xzmore.1 b/src/native/external/xz/src/scripts/xzmore.1 new file mode 100644 index 00000000000000..8a0e21c9e16641 --- /dev/null +++ b/src/native/external/xz/src/scripts/xzmore.1 @@ -0,0 +1,69 @@ +.\" SPDX-License-Identifier: 0BSD +.\" +.\" Authors: Andrew Dudman +.\" Lasse Collin +.\" +.\" (Note that this file is based on xzless.1 instead of gzip's zmore.1.) +.\" +.TH XZMORE 1 "2025-03-06" "Tukaani" "XZ Utils" +.SH NAME +xzmore, lzmore \- view xz or lzma compressed (text) files +. +.SH SYNOPSIS +.B xzmore +.RI [ file ...] +.br +.B lzmore +.RI [ file ...] +(DEPRECATED) +. +.SH DESCRIPTION +.B xzmore +displays text from compressed files to a terminal using +.BR more (1). +Files supported by +.BR xz (1) +are decompressed; +other files are assumed to be in uncompressed form already. +If no +.I files +are given, +.B xzmore +reads from standard input. +See the +.BR more (1) +manual for the keyboard commands. +.PP +Note that scrolling backwards might not be possible +depending on the implementation of +.BR more (1). +This is because +.B xzmore +uses a pipe to pass the decompressed data to +.BR more (1). +.BR xzless (1) +uses +.BR less (1) +which provides more advanced features. +.PP +The command +.B lzmore +is provided for backward compatibility with LZMA Utils. +It is deprecated and will be removed in a future version. +. +.SH ENVIRONMENT +.TP +.\" TRANSLATORS: Don't translate the uppercase PAGER. +.\" It is a name of an environment variable. +.B PAGER +If +.B PAGER +is set, +its value is used as the pager instead of +.BR more (1). +. +.SH "SEE ALSO" +.BR more (1), +.BR xz (1), +.BR xzless (1), +.BR zmore (1) diff --git a/src/native/external/xz/src/scripts/xzmore.in b/src/native/external/xz/src/scripts/xzmore.in new file mode 100644 index 00000000000000..d9656720110cba --- /dev/null +++ b/src/native/external/xz/src/scripts/xzmore.in @@ -0,0 +1,82 @@ +#!@POSIX_SHELL@ +# SPDX-License-Identifier: GPL-2.0-or-later + +# Copyright (C) 2001, 2002, 2007 Free Software Foundation +# Copyright (C) 1992, 1993 Jean-loup Gailly + +# Modified for XZ Utils by Andrew Dudman and Lasse Collin. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# shellcheck shell=sh + +@enable_path_for_scripts@ +#SET_PATH - This line is a placeholder to ease patching this script. + +# Instead of unsetting XZ_OPT, just make sure that xz will use file format +# autodetection. This way memory usage limit and thread limit can be +# specified via XZ_OPT. +xz='@xz@ --format=auto' + +version='xzmore (@PACKAGE_NAME@) @PACKAGE_VERSION@' + +usage="Usage: ${0##*/} [OPTION]... [FILE]... +Like 'more', but operate on the uncompressed contents of xz compressed FILEs. + +Report bugs to <@PACKAGE_BUGREPORT@>." + +case $1 in + --help) printf '%s\n' "$usage" || exit 2; exit;; + --version) printf '%s\n' "$version" || exit 2; exit;; +esac + +oldtty=`stty -g 2>/dev/null` +if stty -cbreak 2>/dev/null; then + cb='cbreak'; ncb='-cbreak' +else + # 'stty min 1' resets eof to ^a on both SunOS and SysV! + cb='min 1 -icanon'; ncb='icanon eof ^d' +fi +if test $? -eq 0 && test -n "$oldtty"; then + trap 'stty $oldtty 2>/dev/null; exit' 0 2 3 5 10 13 15 +else + trap 'stty $ncb echo 2>/dev/null; exit' 0 2 3 5 10 13 15 +fi + +if test $# = 0; then + if test -t 0; then + printf '%s\n' "$usage"; exit 1 + else + $xz -cdfqQ | eval "${PAGER:-more}" + fi +else + FIRST=1 + for FILE; do + true < "$FILE" || continue + if test $FIRST -eq 0; then + printf "%s--More--(Next file: %s)" "" "$FILE" + stty $cb -echo 2>/dev/null + ANS=`dd bs=1 count=1 2>/dev/null` + stty $ncb echo 2>/dev/null + echo " " + case "$ANS" in + [eq]) exit;; + esac + fi + if test "$ANS" != 's'; then + printf '%s\n' "------> $FILE <------" + $xz -cdfqQ -- "$FILE" | eval "${PAGER:-more}" + fi + if test -t 1; then + FIRST=0 + fi + done +fi diff --git a/src/native/external/xz/src/xz/Makefile.am b/src/native/external/xz/src/xz/Makefile.am new file mode 100644 index 00000000000000..391d7145d76de7 --- /dev/null +++ b/src/native/external/xz/src/xz/Makefile.am @@ -0,0 +1,135 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +bin_PROGRAMS = xz + +xz_SOURCES = \ + args.c \ + args.h \ + coder.c \ + coder.h \ + file_io.c \ + file_io.h \ + hardware.c \ + hardware.h \ + main.c \ + main.h \ + message.c \ + message.h \ + mytime.c \ + mytime.h \ + options.c \ + options.h \ + private.h \ + sandbox.c \ + sandbox.h \ + signals.c \ + signals.h \ + suffix.c \ + suffix.h \ + util.c \ + util.h \ + ../common/tuklib_open_stdxxx.c \ + ../common/tuklib_progname.c \ + ../common/tuklib_exit.c \ + ../common/tuklib_mbstr_fw.c \ + ../common/tuklib_mbstr_nonprint.c \ + ../common/tuklib_mbstr_width.c \ + ../common/tuklib_mbstr_wrap.c + +if COND_MAIN_DECODER +xz_SOURCES += \ + list.c \ + list.h +endif + +if COND_W32 +xz_SOURCES += xz_w32res.rc +endif + +xz_CPPFLAGS = \ + -DLOCALEDIR=\"$(localedir)\" \ + -I$(top_srcdir)/src/common \ + -I$(top_srcdir)/src/liblzma/api + +xz_LDADD = $(top_builddir)/src/liblzma/liblzma.la + +if COND_GNULIB +xz_CPPFLAGS += \ + -I$(top_builddir)/lib \ + -I$(top_srcdir)/lib +xz_LDADD += $(top_builddir)/lib/libgnu.a +endif + +# libgnu.a may need these libs, so this must be after libgnu.a. +xz_LDADD += $(LTLIBINTL) + + +# Windows resource compiler support +.rc.o: + $(RC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(xz_CPPFLAGS) $(CPPFLAGS) $(RCFLAGS) -i $< -o $@ + + +dist_man_MANS = xz.1 + + +## Create symlinks for unxz and xzcat for convenience. Create symlinks also +## for lzma, unlzma, and lzcat for compatibility with LZMA Utils 4.32.x. +xzlinks = unxz xzcat + +if COND_LZMALINKS +xzlinks += lzma unlzma lzcat +endif + +install-exec-hook: + cd "$(DESTDIR)$(bindir)" && \ + target=`echo xz | sed '$(transform)'`$(EXEEXT) && \ + for name in $(xzlinks); do \ + link=`echo $$name | sed '$(transform)'`$(LN_EXEEXT) && \ + rm -f "$$link" && \ + $(LN_S) "$$target" "$$link"; \ + done + +# The installation of translated man pages abuses Automake internals +# by calling "install-man" with redefined dist_man_MANS and man_MANS. +# If this breaks some day, don't blame Automake developers. +install-data-hook: + languages= ; \ + if test "$(USE_NLS)" = yes && test -d "$(top_srcdir)/po4a/man"; then \ + languages=`ls "$(top_srcdir)/po4a/man"`; \ + fi; \ + target=`echo xz | sed '$(transform)'` && \ + for lang in . $$languages; do \ + man="$(top_srcdir)/po4a/man/$$lang/xz.1" ; \ + if test -f "$$man"; then \ + $(MAKE) dist_man_MANS="$$man" man_MANS= \ + mandir="$(mandir)/$$lang" install-man; \ + fi; \ + man1dir="$(DESTDIR)$(mandir)/$$lang/man1" && \ + if test -f "$$man1dir/$$target.1"; then ( \ + cd "$$man1dir" && \ + for name in $(xzlinks); do \ + link=`echo $$name | sed '$(transform)'` && \ + rm -f "$$link.1" && \ + $(LN_S) "$$target.1" "$$link.1"; \ + done \ + ); fi; \ + done + +uninstall-hook: + cd "$(DESTDIR)$(bindir)" && \ + for name in $(xzlinks); do \ + link=`echo $$name | sed '$(transform)'`$(LN_EXEEXT) && \ + rm -f "$$link"; \ + done + languages= ; \ + if test "$(USE_NLS)" = yes && test -d "$(top_srcdir)/po4a/man"; then \ + languages=`ls "$(top_srcdir)/po4a/man"`; \ + fi; \ + for lang in . $$languages; do \ + for name in xz $(xzlinks); do \ + name=`echo $$name | sed '$(transform)'` && \ + rm -f "$(DESTDIR)$(mandir)/$$lang/man1/$$name.1"; \ + done; \ + done diff --git a/src/native/external/xz/src/xz/args.c b/src/native/external/xz/src/xz/args.c new file mode 100644 index 00000000000000..c32ee5c11159d6 --- /dev/null +++ b/src/native/external/xz/src/xz/args.c @@ -0,0 +1,914 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file args.c +/// \brief Argument parsing +/// +/// \note Filter-specific options parsing is in options.c. +// +// Authors: Lasse Collin +// Jia Tan +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#include "getopt.h" +#include + + +bool opt_stdout = false; +bool opt_force = false; +bool opt_keep_original = false; +bool opt_synchronous = true; +bool opt_robot = false; +bool opt_ignore_check = false; + +// We don't modify or free() this, but we need to assign it in some +// non-const pointers. +const char stdin_filename[] = "(stdin)"; + + +/// Parse and set the memory usage limit for compression, decompression, +/// and/or multithreaded decompression. +static void +parse_memlimit(const char *name, const char *name_percentage, const char *str, + bool set_compress, bool set_decompress, bool set_mtdec) +{ + bool is_percentage = false; + uint64_t value; + + const size_t len = strlen(str); + if (len > 0 && str[len - 1] == '%') { + // Make a copy so that we can get rid of %. + // + // In the past str wasn't const and we modified it directly + // but that modified argv[] and thus affected what was visible + // in "ps auxf" or similar tools which was confusing. For + // example, --memlimit=50% would show up as --memlimit=50 + // since the percent sign was overwritten here. + char *s = xstrdup(str); + s[len - 1] = '\0'; + is_percentage = true; + value = str_to_uint64(name_percentage, s, 1, 100); + free(s); + } else { + // On 32-bit systems, SIZE_MAX would make more sense than + // UINT64_MAX. But use UINT64_MAX still so that scripts + // that assume > 4 GiB values don't break. + value = str_to_uint64(name, str, 0, UINT64_MAX); + } + + hardware_memlimit_set(value, set_compress, set_decompress, set_mtdec, + is_percentage); + return; +} + + +static void +parse_block_list(const char *str_const) +{ + // We need a modifiable string in the for-loop. + char *str_start = xstrdup(str_const); + char *str = str_start; + + // It must be non-empty and not begin with a comma. + if (str[0] == '\0' || str[0] == ',') + message_fatal(_("%s: Invalid argument to --block-list"), str); + + // Count the number of comma-separated strings. + size_t count = 1; + for (size_t i = 0; str[i] != '\0'; ++i) + if (str[i] == ',') + ++count; + + // Prevent an unlikely integer overflow. + if (count > SIZE_MAX / sizeof(block_list_entry) - 1) + message_fatal(_("%s: Too many arguments to --block-list"), + str); + + // Allocate memory to hold all the sizes specified. + // If --block-list was specified already, its value is forgotten. + free(opt_block_list); + opt_block_list = xmalloc((count + 1) * sizeof(block_list_entry)); + + // Clear the bitmask of filter chains in use. + block_list_chain_mask = 0; + + // Reset the largest Block size found in --block-list. + block_list_largest = 0; + + for (size_t i = 0; i < count; ++i) { + // Locate the next comma and replace it with \0. + char *p = strchr(str, ','); + if (p != NULL) + *p = '\0'; + + // Use the default filter chain unless overridden. + opt_block_list[i].chain_num = 0; + + // To specify a filter chain, the block list entry may be + // prepended with "[filter-chain-number]:". The size is + // still required for every block. + // For instance: + // --block-list=2:10MiB,1:5MiB,,8MiB,0:0 + // + // Translates to: + // 1. Block of 10 MiB using filter chain 2 + // 2. Block of 5 MiB using filter chain 1 + // 3. Block of 5 MiB using filter chain 1 + // 4. Block of 8 MiB using the default filter chain + // 5. The last block uses the default filter chain + // + // The block list: + // --block-list=2:MiB,1:,0 + // + // Is not allowed because the second block does not specify + // the block size, only the filter chain. + if (str[0] >= '0' && str[0] <= '9' && str[1] == ':') { + if (str[2] == '\0') + message_fatal(_("In --block-list, block " + "size is missing after " + "filter chain number '%c:'"), + str[0]); + + const unsigned chain_num = (unsigned)(str[0] - '0'); + opt_block_list[i].chain_num = chain_num; + block_list_chain_mask |= 1U << chain_num; + str += 2; + } else { + // This Block uses the default filter chain. + block_list_chain_mask |= 1U << 0; + } + + if (str[0] == '\0') { + // There is no string, that is, a comma follows + // another comma. Use the previous value. + // + // NOTE: We checked earlier that the first char + // of the whole list cannot be a comma. + assert(i > 0); + opt_block_list[i] = opt_block_list[i - 1]; + } else { + opt_block_list[i].size = str_to_uint64("block-list", + str, 0, UINT64_MAX); + + // Zero indicates no more new Blocks. + if (opt_block_list[i].size == 0) { + if (i + 1 != count) + message_fatal(_("0 can only be used " + "as the last element " + "in --block-list")); + + opt_block_list[i].size = UINT64_MAX; + } + + // Remember the largest Block size in the list. + // + // NOTE: Do this after handling the special value 0 + // because when 0 is used, we don't want to reduce + // the Block size of the multithreaded encoder. + if (block_list_largest < opt_block_list[i].size) + block_list_largest = opt_block_list[i].size; + } + + // Be standards compliant: p + 1 is undefined behavior + // if p == NULL. That occurs on the last iteration of + // the loop when we won't care about the value of str + // anymore anyway. That is, this is done conditionally + // solely for standard conformance reasons. + if (p != NULL) + str = p + 1; + } + + // Terminate the array. + opt_block_list[count].size = 0; + + free(str_start); + return; +} + + +static void +parse_real(args_info *args, int argc, char **argv) +{ + enum { + OPT_FILTERS = INT_MIN, + OPT_FILTERS1, + OPT_FILTERS2, + OPT_FILTERS3, + OPT_FILTERS4, + OPT_FILTERS5, + OPT_FILTERS6, + OPT_FILTERS7, + OPT_FILTERS8, + OPT_FILTERS9, + OPT_FILTERS_HELP, + + OPT_X86, + OPT_POWERPC, + OPT_IA64, + OPT_ARM, + OPT_ARMTHUMB, + OPT_ARM64, + OPT_SPARC, + OPT_RISCV, + OPT_DELTA, + OPT_LZMA1, + OPT_LZMA2, + + OPT_NO_SYNC, + OPT_SINGLE_STREAM, + OPT_NO_SPARSE, + OPT_FILES, + OPT_FILES0, + OPT_BLOCK_SIZE, + OPT_BLOCK_LIST, + OPT_MEM_COMPRESS, + OPT_MEM_DECOMPRESS, + OPT_MEM_MT_DECOMPRESS, + OPT_NO_ADJUST, + OPT_INFO_MEMORY, + OPT_ROBOT, + OPT_FLUSH_TIMEOUT, + OPT_IGNORE_CHECK, + }; + + static const char short_opts[] + = "cC:defF:hHlkM:qQrS:tT:vVz0123456789"; + + static const struct option long_opts[] = { + // Operation mode + { "compress", no_argument, NULL, 'z' }, + { "decompress", no_argument, NULL, 'd' }, + { "uncompress", no_argument, NULL, 'd' }, + { "test", no_argument, NULL, 't' }, + { "list", no_argument, NULL, 'l' }, + + // Operation modifiers + { "keep", no_argument, NULL, 'k' }, + { "force", no_argument, NULL, 'f' }, + { "stdout", no_argument, NULL, 'c' }, + { "to-stdout", no_argument, NULL, 'c' }, + { "no-sync", no_argument, NULL, OPT_NO_SYNC }, + { "single-stream", no_argument, NULL, OPT_SINGLE_STREAM }, + { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, + { "suffix", required_argument, NULL, 'S' }, + { "files", optional_argument, NULL, OPT_FILES }, + { "files0", optional_argument, NULL, OPT_FILES0 }, + + // Basic compression settings + { "format", required_argument, NULL, 'F' }, + { "check", required_argument, NULL, 'C' }, + { "ignore-check", no_argument, NULL, OPT_IGNORE_CHECK }, + { "block-size", required_argument, NULL, OPT_BLOCK_SIZE }, + { "block-list", required_argument, NULL, OPT_BLOCK_LIST }, + { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS }, + { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS }, + { "memlimit-mt-decompress", required_argument, NULL, OPT_MEM_MT_DECOMPRESS }, + { "memlimit", required_argument, NULL, 'M' }, + { "memory", required_argument, NULL, 'M' }, // Old alias + { "no-adjust", no_argument, NULL, OPT_NO_ADJUST }, + { "threads", required_argument, NULL, 'T' }, + { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT }, + + { "extreme", no_argument, NULL, 'e' }, + { "fast", no_argument, NULL, '0' }, + { "best", no_argument, NULL, '9' }, + + // Filters + { "filters", required_argument, NULL, OPT_FILTERS}, + { "filters1", required_argument, NULL, OPT_FILTERS1}, + { "filters2", required_argument, NULL, OPT_FILTERS2}, + { "filters3", required_argument, NULL, OPT_FILTERS3}, + { "filters4", required_argument, NULL, OPT_FILTERS4}, + { "filters5", required_argument, NULL, OPT_FILTERS5}, + { "filters6", required_argument, NULL, OPT_FILTERS6}, + { "filters7", required_argument, NULL, OPT_FILTERS7}, + { "filters8", required_argument, NULL, OPT_FILTERS8}, + { "filters9", required_argument, NULL, OPT_FILTERS9}, + { "filters-help", no_argument, NULL, OPT_FILTERS_HELP}, + + { "lzma1", optional_argument, NULL, OPT_LZMA1 }, + { "lzma2", optional_argument, NULL, OPT_LZMA2 }, + { "x86", optional_argument, NULL, OPT_X86 }, + { "powerpc", optional_argument, NULL, OPT_POWERPC }, + { "ia64", optional_argument, NULL, OPT_IA64 }, + { "arm", optional_argument, NULL, OPT_ARM }, + { "armthumb", optional_argument, NULL, OPT_ARMTHUMB }, + { "arm64", optional_argument, NULL, OPT_ARM64 }, + { "sparc", optional_argument, NULL, OPT_SPARC }, + { "riscv", optional_argument, NULL, OPT_RISCV }, + { "delta", optional_argument, NULL, OPT_DELTA }, + + // Other options + { "quiet", no_argument, NULL, 'q' }, + { "verbose", no_argument, NULL, 'v' }, + { "no-warn", no_argument, NULL, 'Q' }, + { "robot", no_argument, NULL, OPT_ROBOT }, + { "info-memory", no_argument, NULL, OPT_INFO_MEMORY }, + { "help", no_argument, NULL, 'h' }, + { "long-help", no_argument, NULL, 'H' }, + { "version", no_argument, NULL, 'V' }, + + { NULL, 0, NULL, 0 } + }; + + int c; + + while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) + != -1) { + switch (c) { + // Compression preset (also for decompression if --format=raw) + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + coder_set_preset((uint32_t)(c - '0')); + break; + + // --memlimit-compress + case OPT_MEM_COMPRESS: + parse_memlimit("memlimit-compress", + "memlimit-compress%", optarg, + true, false, false); + break; + + // --memlimit-decompress + case OPT_MEM_DECOMPRESS: + parse_memlimit("memlimit-decompress", + "memlimit-decompress%", optarg, + false, true, false); + break; + + // --memlimit-mt-decompress + case OPT_MEM_MT_DECOMPRESS: + parse_memlimit("memlimit-mt-decompress", + "memlimit-mt-decompress%", optarg, + false, false, true); + break; + + // --memlimit + case 'M': + parse_memlimit("memlimit", "memlimit%", optarg, + true, true, true); + break; + + // --suffix + case 'S': + suffix_set(optarg); + break; + + case 'T': { + // Since xz 5.4.0: Ignore leading '+' first. + const char *s = optarg; + if (optarg[0] == '+') + ++s; + + // The max is from src/liblzma/common/common.h. + uint32_t t = str_to_uint64("threads", s, 0, 16384); + + // If leading '+' was used then use multi-threaded + // mode even if exactly one thread was specified. + if (t == 1 && optarg[0] == '+') + t = UINT32_MAX; + + hardware_threads_set(t); + break; + } + + // --version + case 'V': + // This doesn't return. + message_version(); + + // --stdout + case 'c': + opt_stdout = true; + break; + + // --decompress + case 'd': + opt_mode = MODE_DECOMPRESS; + break; + + // --extreme + case 'e': + coder_set_extreme(); + break; + + // --force + case 'f': + opt_force = true; + break; + + // --info-memory + case OPT_INFO_MEMORY: + // This doesn't return. + hardware_memlimit_show(); + + // --help + case 'h': + // This doesn't return. + message_help(false); + + // --long-help + case 'H': + // This doesn't return. + message_help(true); + + // --list + case 'l': + opt_mode = MODE_LIST; + break; + + // --keep + case 'k': + opt_keep_original = true; + break; + + // --quiet + case 'q': + message_verbosity_decrease(); + break; + + case 'Q': + set_exit_no_warn(); + break; + + case 't': + opt_mode = MODE_TEST; + break; + + // --verbose + case 'v': + message_verbosity_increase(); + break; + + // --robot + case OPT_ROBOT: + opt_robot = true; + + // This is to make sure that floating point numbers + // always have a dot as decimal separator. + setlocale(LC_NUMERIC, "C"); + break; + + case 'z': + opt_mode = MODE_COMPRESS; + break; + + // --filters + case OPT_FILTERS: + coder_add_filters_from_str(optarg); + break; + + // --filters1...--filters9 + case OPT_FILTERS1: + case OPT_FILTERS2: + case OPT_FILTERS3: + case OPT_FILTERS4: + case OPT_FILTERS5: + case OPT_FILTERS6: + case OPT_FILTERS7: + case OPT_FILTERS8: + case OPT_FILTERS9: + coder_add_block_filters(optarg, + (size_t)(c - OPT_FILTERS)); + break; + + // --filters-help + case OPT_FILTERS_HELP: + // This doesn't return. + message_filters_help(); + + case OPT_X86: + coder_add_filter(LZMA_FILTER_X86, + options_bcj(optarg)); + break; + + case OPT_POWERPC: + coder_add_filter(LZMA_FILTER_POWERPC, + options_bcj(optarg)); + break; + + case OPT_IA64: + coder_add_filter(LZMA_FILTER_IA64, + options_bcj(optarg)); + break; + + case OPT_ARM: + coder_add_filter(LZMA_FILTER_ARM, + options_bcj(optarg)); + break; + + case OPT_ARMTHUMB: + coder_add_filter(LZMA_FILTER_ARMTHUMB, + options_bcj(optarg)); + break; + + case OPT_ARM64: + coder_add_filter(LZMA_FILTER_ARM64, + options_bcj(optarg)); + break; + + case OPT_SPARC: + coder_add_filter(LZMA_FILTER_SPARC, + options_bcj(optarg)); + break; + + case OPT_RISCV: + coder_add_filter(LZMA_FILTER_RISCV, + options_bcj(optarg)); + break; + + case OPT_DELTA: + coder_add_filter(LZMA_FILTER_DELTA, + options_delta(optarg)); + break; + + case OPT_LZMA1: + coder_add_filter(LZMA_FILTER_LZMA1, + options_lzma(optarg)); + break; + + case OPT_LZMA2: + coder_add_filter(LZMA_FILTER_LZMA2, + options_lzma(optarg)); + break; + + // Other + + // --format + case 'F': { + // Just in case, support both "lzma" and "alone" since + // the latter was used for forward compatibility in + // LZMA Utils 4.32.x. + static const struct { + char str[8]; + enum format_type format; + } types[] = { + { "auto", FORMAT_AUTO }, + { "xz", FORMAT_XZ }, + { "lzma", FORMAT_LZMA }, + { "alone", FORMAT_LZMA }, +#ifdef HAVE_LZIP_DECODER + { "lzip", FORMAT_LZIP }, +#endif + { "raw", FORMAT_RAW }, + }; + + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unknown file " + "format type"), + optarg); + + opt_format = types[i].format; + break; + } + + // --check + case 'C': { + static const struct { + char str[8]; + lzma_check check; + } types[] = { + { "none", LZMA_CHECK_NONE }, + { "crc32", LZMA_CHECK_CRC32 }, + { "crc64", LZMA_CHECK_CRC64 }, + { "sha256", LZMA_CHECK_SHA256 }, + }; + + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) { + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unsupported " + "integrity " + "check type"), optarg); + } + + // Use a separate check in case we are using different + // liblzma than what was used to compile us. + if (!lzma_check_is_supported(types[i].check)) + message_fatal(_("%s: Unsupported integrity " + "check type"), optarg); + + coder_set_check(types[i].check); + break; + } + + case OPT_IGNORE_CHECK: + opt_ignore_check = true; + break; + + case OPT_BLOCK_SIZE: + opt_block_size = str_to_uint64("block-size", optarg, + 0, LZMA_VLI_MAX); + break; + + case OPT_BLOCK_LIST: { + parse_block_list(optarg); + break; + } + + case OPT_SINGLE_STREAM: + opt_single_stream = true; + + // Since 5.7.1alpha --single-stream implies --keep. + opt_keep_original = true; + break; + + case OPT_NO_SPARSE: + io_no_sparse(); + break; + + case OPT_FILES: + args->files_delim = '\n'; + + FALLTHROUGH; + + case OPT_FILES0: + if (args->files_name != NULL) + message_fatal(_("Only one file can be " + "specified with '--files' " + "or '--files0'.")); + + if (optarg == NULL) { + args->files_name = stdin_filename; + args->files_file = stdin; + } else { + args->files_name = optarg; + args->files_file = fopen(optarg, + c == OPT_FILES ? "r" : "rb"); + if (args->files_file == NULL) + // TRANSLATORS: This is a translatable + // string because French needs a space + // before the colon ("%s : %s"). + message_fatal(_("%s: %s"), optarg, + strerror(errno)); + } + + break; + + case OPT_NO_ADJUST: + opt_auto_adjust = false; + break; + + case OPT_FLUSH_TIMEOUT: + opt_flush_timeout = str_to_uint64("flush-timeout", + optarg, 0, UINT64_MAX); + break; + + case OPT_NO_SYNC: + opt_synchronous = false; + break; + + default: + message_try_help(); + tuklib_exit(E_ERROR, E_ERROR, false); + } + } + + return; +} + + +static void +parse_environment(args_info *args, char *argv0, const char *varname) +{ + char *env = getenv(varname); + if (env == NULL) + return; + + // We modify the string, so make a copy of it. + env = xstrdup(env); + + // Calculate the number of arguments in env. argc stats at one + // to include space for the program name. + int argc = 1; + bool prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + // NOTE: Cast to unsigned char is needed so that correct + // value gets passed to isspace(), which expects + // unsigned char cast to int. Casting to int is done + // automatically due to integer promotion, but we need to + // force char to unsigned char manually. Otherwise 8-bit + // characters would get promoted to wrong value if + // char is signed. + if (isspace((unsigned char)env[i])) { + prev_was_space = true; + } else if (prev_was_space) { + prev_was_space = false; + + // Keep argc small enough to fit into a signed int + // and to keep it usable for memory allocation. + if (++argc == my_min( + INT_MAX, SIZE_MAX / sizeof(char *))) + message_fatal(_("The environment variable " + "%s contains too many " + "arguments"), varname); + } + } + + // Allocate memory to hold pointers to the arguments. Add one to get + // space for the terminating NULL (if some systems happen to need it). + char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); + argv[0] = argv0; + argv[argc] = NULL; + + // Go through the string again. Split the arguments using '\0' + // characters and add pointers to the resulting strings to argv. + argc = 1; + prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + if (isspace((unsigned char)env[i])) { + prev_was_space = true; + env[i] = '\0'; + } else if (prev_was_space) { + prev_was_space = false; + argv[argc++] = env + i; + } + } + + // Parse the argument list we got from the environment. All non-option + // arguments i.e. filenames are ignored. + parse_real(args, argc, argv); + + // Reset the state of the getopt_long() so that we can parse the + // command line options too. There are two incompatible ways to + // do it. +#ifdef HAVE_OPTRESET + // BSD + optind = 1; + optreset = 1; +#else + // GNU, Solaris + optind = 0; +#endif + + // We don't need the argument list from environment anymore. + free(argv); + free(env); + + return; +} + + +extern void +args_parse(args_info *args, int argc, char **argv) +{ + // Initialize those parts of *args that we need later. + args->files_name = NULL; + args->files_file = NULL; + args->files_delim = '\0'; + + // Check how we were called. + { + // Remove the leading path name, if any. + const char *name = strrchr(argv[0], '/'); + if (name == NULL) + name = argv[0]; + else + ++name; + + // NOTE: It's possible that name[0] is now '\0' if argv[0] + // is weird, but it doesn't matter here. + + // Look for full command names instead of substrings like + // "un", "cat", and "lz" to reduce possibility of false + // positives when the programs have been renamed. + if (strstr(name, "xzcat") != NULL) { + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "unxz") != NULL) { + opt_mode = MODE_DECOMPRESS; + } else if (strstr(name, "lzcat") != NULL) { + opt_format = FORMAT_LZMA; + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "unlzma") != NULL) { + opt_format = FORMAT_LZMA; + opt_mode = MODE_DECOMPRESS; + } else if (strstr(name, "lzma") != NULL) { + opt_format = FORMAT_LZMA; + } + } + + // First the flags from the environment + parse_environment(args, argv[0], "XZ_DEFAULTS"); + parse_environment(args, argv[0], "XZ_OPT"); + + // Then from the command line + parse_real(args, argc, argv); + + // If encoder or decoder support was omitted at build time, + // show an error now so that the rest of the code can rely on + // that whatever is in opt_mode is also supported. +#ifndef HAVE_ENCODERS + if (opt_mode == MODE_COMPRESS) + message_fatal(_("Compression support was disabled " + "at build time")); +#endif +#ifndef HAVE_DECODERS + // Even MODE_LIST cannot work without decoder support so MODE_COMPRESS + // is the only valid choice. + if (opt_mode != MODE_COMPRESS) + message_fatal(_("Decompression support was disabled " + "at build time")); +#endif + +#ifdef HAVE_LZIP_DECODER + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_LZIP) + message_fatal(_("Compression of lzip files (.lz) " + "is not supported")); +#endif + + // Never remove the source file when the destination is not on disk. + // In test mode the data is written nowhere, but setting opt_stdout + // will make the rest of the code behave well. + if (opt_stdout || opt_mode == MODE_TEST) { + opt_keep_original = true; + opt_stdout = true; + } + + // Don't use fsync() if --keep is specified or implied. + // However, don't document this as "--keep implies --no-sync" + // because if syncing support was added to --flush-timeout, + // it would sync even if --keep was specified. + if (opt_keep_original) + opt_synchronous = false; + + // When compressing, if no --format flag was used, or it + // was --format=auto, we compress to the .xz format. + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) + opt_format = FORMAT_XZ; + + // Set opt_block_list to NULL if we are not compressing to the .xz + // format. This option cannot be used outside of this case, and + // simplifies the implementation later. + if ((opt_mode != MODE_COMPRESS || opt_format != FORMAT_XZ) + && opt_block_list != NULL) { + message(V_WARNING, _("--block-list is ignored unless " + "compressing to the .xz format")); + free(opt_block_list); + opt_block_list = NULL; + } + + // If raw format is used and a custom suffix is not provided, + // then only stdout mode can be used when compressing or + // decompressing. + if (opt_format == FORMAT_RAW && !suffix_is_set() && !opt_stdout + && (opt_mode == MODE_COMPRESS + || opt_mode == MODE_DECOMPRESS)) { + if (args->files_name != NULL) + message_fatal(_("With --format=raw, " + "--suffix=.SUF is required " + "unless writing to stdout")); + + // If all of the filenames provided are "-" (more than one + // "-" could be specified) or no filenames are provided, + // then we are only going to be writing to standard out. + for (int i = optind; i < argc; i++) { + if (strcmp(argv[i], "-") != 0) + message_fatal(_("With --format=raw, " + "--suffix=.SUF is required " + "unless writing to stdout")); + } + } + + // Compression settings need to be validated (options themselves and + // their memory usage) when compressing to any file format. It has to + // be done also when uncompressing raw data, since for raw decoding + // the options given on the command line are used to know what kind + // of raw data we are supposed to decode. + if (opt_mode == MODE_COMPRESS || (opt_format == FORMAT_RAW + && opt_mode != MODE_LIST)) + coder_set_compression_settings(); + + // If no filenames are given, use stdin. + if (argv[optind] == NULL && args->files_name == NULL) { + // We don't modify or free() the "-" constant. The caller + // modifies this so don't make the struct itself const. + static char *names_stdin[2] = { (char *)"-", NULL }; + args->arg_names = names_stdin; + args->arg_count = 1; + } else { + // We got at least one filename from the command line, or + // --files or --files0 was specified. + args->arg_names = argv + optind; + args->arg_count = (unsigned int)(argc - optind); + } + + return; +} + + +#ifndef NDEBUG +extern void +args_free(void) +{ + free(opt_block_list); + return; +} +#endif diff --git a/src/native/external/xz/src/xz/args.h b/src/native/external/xz/src/xz/args.h new file mode 100644 index 00000000000000..7fdf37f1420f57 --- /dev/null +++ b/src/native/external/xz/src/xz/args.h @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file args.h +/// \brief Argument parsing +// +// Authors: Lasse Collin +// Jia Tan +// +/////////////////////////////////////////////////////////////////////////////// + +typedef struct { + /// Filenames from command line + char **arg_names; + + /// Number of filenames from command line + unsigned int arg_count; + + /// Name of the file from which to read filenames. This is NULL + /// if --files or --files0 was not used. + const char *files_name; + + /// File opened for reading from which filenames are read. This is + /// non-NULL only if files_name is non-NULL. + FILE *files_file; + + /// Delimiter for filenames read from files_file + char files_delim; + +} args_info; + + +extern bool opt_stdout; +extern bool opt_force; +extern bool opt_keep_original; +extern bool opt_synchronous; +extern bool opt_robot; +extern bool opt_ignore_check; + +extern const char stdin_filename[]; + +extern void args_parse(args_info *args, int argc, char **argv); +extern void args_free(void); diff --git a/src/native/external/xz/src/xz/coder.c b/src/native/external/xz/src/xz/coder.c new file mode 100644 index 00000000000000..c28f874a25f70a --- /dev/null +++ b/src/native/external/xz/src/xz/coder.c @@ -0,0 +1,1476 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file coder.c +/// \brief Compresses or uncompresses a file +// +// Authors: Lasse Collin +// Jia Tan +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include "tuklib_integer.h" + + +/// Return value type for coder_init(). +enum coder_init_ret { + CODER_INIT_NORMAL, + CODER_INIT_PASSTHRU, + CODER_INIT_ERROR, +}; + + +enum operation_mode opt_mode = MODE_COMPRESS; +enum format_type opt_format = FORMAT_AUTO; +bool opt_auto_adjust = true; +bool opt_single_stream = false; +uint64_t opt_block_size = 0; +block_list_entry *opt_block_list = NULL; +uint64_t block_list_largest; +uint32_t block_list_chain_mask; + +/// Stream used to communicate with liblzma +static lzma_stream strm = LZMA_STREAM_INIT; + +/// Maximum number of filter chains. The first filter chain is the default, +/// and 9 other filter chains can be specified with --filtersX. +#define NUM_FILTER_CHAIN_MAX 10 + +/// The default filter chain is in chains[0]. It is used for encoding +/// in all supported formats and also for decdoing raw streams. The other +/// filter chains are set by --filtersX to support changing filters with +/// the --block-list option. +static lzma_filter chains[NUM_FILTER_CHAIN_MAX][LZMA_FILTERS_MAX + 1]; + +/// Bitmask indicating which filter chains are actually used when encoding +/// in the .xz format. This is needed since the filter chains specified using +/// --filtersX (or the default filter chain) might in reality be unneeded +/// if they are never used in --block-list. When --block-list isn't +/// specified, only the default filter chain is used, thus the initial +/// value of this variable is 1U << 0 (the number of the default chain is 0). +static uint32_t chains_used_mask = 1U << 0; + +/// Input and output buffers +static io_buf in_buf; +static io_buf out_buf; + +/// Number of filters in the default filter chain. Zero indicates that +/// we are using a preset. +static uint32_t filters_count = 0; + +/// Number of the preset (0-9) +static uint32_t preset_number = LZMA_PRESET_DEFAULT; + +/// True if the current default filter chain was set using the --filters +/// option. The filter chain is reset if a preset option (like -9) or an +/// old-style filter option (like --lzma2) is used after a --filters option. +static bool string_to_filter_used = false; + +/// Integrity check type +static lzma_check check; + +/// This becomes false if the --check=CHECK option is used. +static bool check_default = true; + +/// Indicates if unconsumed input is allowed to remain after +/// decoding has successfully finished. This is set for each file +/// in coder_init(). +static bool allow_trailing_input; + +#ifdef MYTHREAD_ENABLED +static lzma_mt mt_options = { + .flags = 0, + .timeout = 300, +}; +#endif + + +extern void +coder_set_check(lzma_check new_check) +{ + check = new_check; + check_default = false; + return; +} + + +static void +forget_filter_chain(void) +{ + // Setting a preset or using --filters makes us forget + // the earlier custom filter chain (if any). + if (filters_count > 0) { + lzma_filters_free(chains[0], NULL); + filters_count = 0; + } + + string_to_filter_used = false; + return; +} + + +extern void +coder_set_preset(uint32_t new_preset) +{ + preset_number &= ~LZMA_PRESET_LEVEL_MASK; + preset_number |= new_preset; + forget_filter_chain(); + return; +} + + +extern void +coder_set_extreme(void) +{ + preset_number |= LZMA_PRESET_EXTREME; + forget_filter_chain(); + return; +} + + +extern void +coder_add_filter(lzma_vli id, void *options) +{ + if (filters_count == LZMA_FILTERS_MAX) + message_fatal(_("Maximum number of filters is four")); + + if (string_to_filter_used) + forget_filter_chain(); + + chains[0][filters_count].id = id; + chains[0][filters_count].options = options; + + // Terminate the filter chain with LZMA_VLI_UNKNOWN to simplify + // implementation of forget_filter_chain(). + chains[0][++filters_count].id = LZMA_VLI_UNKNOWN; + + // Setting a custom filter chain makes us forget the preset options. + // This makes a difference if one specifies e.g. "xz -9 --lzma2 -e" + // where the custom filter chain resets the preset level back to + // the default 6, making the example equivalent to "xz -6e". + preset_number = LZMA_PRESET_DEFAULT; + + return; +} + + +static void +str_to_filters(const char *str, uint32_t index, uint32_t flags) +{ + int error_pos; + const char *err = lzma_str_to_filters(str, &error_pos, + chains[index], flags, NULL); + + if (err != NULL) { + char filter_num[2] = ""; + if (index > 0) + filter_num[0] = '0' + index; + + // liblzma doesn't translate the error messages but + // the messages are included in xz's translations. + message(V_ERROR, _("Error in --filters%s=FILTERS option:"), + filter_num); + message(V_ERROR, "%s", str); + message(V_ERROR, "%*s^", error_pos, ""); + message_fatal("%s", _(err)); + } +} + + +extern void +coder_add_filters_from_str(const char *filter_str) +{ + // Forget presets and previously defined filter chain. See + // coder_add_filter() above for why preset_number must be reset too. + forget_filter_chain(); + preset_number = LZMA_PRESET_DEFAULT; + + string_to_filter_used = true; + + // Include LZMA_STR_ALL_FILTERS so this can be used with --format=raw. + str_to_filters(filter_str, 0, LZMA_STR_ALL_FILTERS); + + // Set the filters_count to be the number of filters converted from + // the string. + for (filters_count = 0; chains[0][filters_count].id + != LZMA_VLI_UNKNOWN; + ++filters_count) ; + + assert(filters_count > 0); + return; +} + + +extern void +coder_add_block_filters(const char *str, size_t slot) +{ + // Free old filters first, if they were previously allocated. + if (chains_used_mask & (1U << slot)) + lzma_filters_free(chains[slot], NULL); + + str_to_filters(str, slot, 0); + + chains_used_mask |= 1U << slot; +} + + +tuklib_attr_noreturn +static void +memlimit_too_small(uint64_t memory_usage) +{ + message(V_ERROR, _("Memory usage limit is too low for the given " + "filter setup.")); + message_mem_needed(V_ERROR, memory_usage); + tuklib_exit(E_ERROR, E_ERROR, false); +} + + +#ifdef HAVE_ENCODERS +/// \brief Calculate the memory usage of each filter chain. +/// +/// \param chains_memusages If non-NULL, the memusage of the encoder +/// or decoder for each chain is stored in +/// this array. +/// \param mt If non-NULL, calculate memory usage of +/// multithreaded encoder. +/// \param encode Whether to calculate encoder or decoder +/// memory usage. This must be true if +/// mt != NULL. +/// +/// \return Return the highest memory usage of all of the filter chains. +static uint64_t +get_chains_memusage(uint64_t *chains_memusages, const lzma_mt *mt, bool encode) +{ + uint64_t max_memusage = 0; + +#ifdef MYTHREAD_ENABLED + // Copy multithreading options to a temporary struct since the + // "filters" member needs to be changed. + lzma_mt mt_local; + if (mt != NULL) + mt_local = *mt; +#else + (void)mt; +#endif + + for (uint32_t i = 0; i < ARRAY_SIZE(chains); i++) { + if (!(chains_used_mask & (1U << i))) + continue; + + uint64_t memusage = UINT64_MAX; +#ifdef MYTHREAD_ENABLED + if (mt != NULL) { + assert(encode); + mt_local.filters = chains[i]; + memusage = lzma_stream_encoder_mt_memusage(&mt_local); + } else +#endif + if (encode) { + memusage = lzma_raw_encoder_memusage(chains[i]); + } +#ifdef HAVE_DECODERS + else { + memusage = lzma_raw_decoder_memusage(chains[i]); + } +#endif + + if (chains_memusages != NULL) + chains_memusages[i] = memusage; + + if (memusage > max_memusage) + max_memusage = memusage; + } + + return max_memusage; +} +#endif + + +extern void +coder_set_compression_settings(void) +{ +#ifdef HAVE_LZIP_DECODER + // .lz compression isn't supported. + assert(opt_format != FORMAT_LZIP); +#endif + + // The default check type is CRC64, but fallback to CRC32 + // if CRC64 isn't supported by the copy of liblzma we are + // using. CRC32 is always supported. + if (check_default) { + check = LZMA_CHECK_CRC64; + if (!lzma_check_is_supported(check)) + check = LZMA_CHECK_CRC32; + } + +#ifdef HAVE_ENCODERS + if (opt_block_list != NULL) { + // args.c ensures these. + assert(opt_mode == MODE_COMPRESS); + assert(opt_format == FORMAT_XZ); + + // Find out if block_list_chain_mask has a bit set that + // isn't set in chains_used_mask. + const uint32_t missing_chains_mask + = (block_list_chain_mask ^ chains_used_mask) + & block_list_chain_mask; + + // If a filter chain was specified in --block-list but no + // matching --filtersX option was used, exit with an error. + if (missing_chains_mask != 0) { + // Get the number of the first missing filter chain + // and show it in the error message. + const unsigned first_missing + = (unsigned)ctz32(missing_chains_mask); + + message_fatal(_("filter chain %u used by " + "--block-list but not specified " + "with --filters%u="), + first_missing, first_missing); + } + + // Omit the unused filter chains from mask of used chains. + // + // (FIXME? When built with debugging, coder_free() will free() + // the filter chains (except the default chain) which makes + // Valgrind show fewer reachable allocations. But coder_free() + // uses this mask to determine which chains to free. Thus it + // won't free the ones that are cleared here from the mask. + // In practice this doesn't matter.) + chains_used_mask &= block_list_chain_mask; + } else { + // Reset filters used mask in case --block-list is not + // used, but --filtersX is used. + chains_used_mask = 1U << 0; + } +#endif + + // Options for LZMA1 or LZMA2 in case we are using a preset. + static lzma_options_lzma opt_lzma; + + // The first filter in the chains[] array is for the default + // filter chain. + lzma_filter *default_filters = chains[0]; + + if (filters_count == 0 && chains_used_mask & 1) { + // We are using a preset. This is not a good idea in raw mode + // except when playing around with things. Different versions + // of this software may use different options in presets, and + // thus make uncompressing the raw data difficult. + if (opt_format == FORMAT_RAW) { + // The message is shown only if warnings are allowed + // but the exit status isn't changed. + message(V_WARNING, _("Using a preset in raw mode " + "is discouraged.")); + message(V_WARNING, _("The exact options of the " + "presets may vary between software " + "versions.")); + } + + // Get the preset for LZMA1 or LZMA2. + if (lzma_lzma_preset(&opt_lzma, preset_number)) + message_bug(); + + // Use LZMA2 except with --format=lzma we use LZMA1. + default_filters[0].id = opt_format == FORMAT_LZMA + ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; + default_filters[0].options = &opt_lzma; + + filters_count = 1; + + // Terminate the filter options array. + default_filters[1].id = LZMA_VLI_UNKNOWN; + } + + // If we are using the .lzma format, allow exactly one filter + // which has to be LZMA1. There is no need to check if the default + // filter chain is being used since it can only be disabled if + // --block-list is used, which is incompatible with FORMAT_LZMA. + if (opt_format == FORMAT_LZMA && (filters_count != 1 + || default_filters[0].id != LZMA_FILTER_LZMA1)) + message_fatal(_("The .lzma format supports only " + "the LZMA1 filter")); + + // If we are using the .xz format, make sure that there is no LZMA1 + // filter to prevent LZMA_PROG_ERROR. With the chains from --filtersX + // we have already ensured this by calling lzma_str_to_filters() + // without setting the flags that would allow non-.xz filters. + if (opt_format == FORMAT_XZ && chains_used_mask & 1) + for (size_t i = 0; i < filters_count; ++i) + if (default_filters[i].id == LZMA_FILTER_LZMA1) + message_fatal(_("LZMA1 cannot be used " + "with the .xz format")); + + if (chains_used_mask & 1) { + // Print the selected default filter chain. + message_filters_show(V_DEBUG, default_filters); + } + + // The --flush-timeout option requires LZMA_SYNC_FLUSH support + // from the filter chain. Currently the threaded encoder doesn't + // support LZMA_SYNC_FLUSH so single-threaded mode must be used. + if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) { + for (unsigned i = 0; i < ARRAY_SIZE(chains); ++i) { + if (!(chains_used_mask & (1U << i))) + continue; + + const lzma_filter *fc = chains[i]; + for (size_t j = 0; fc[j].id != LZMA_VLI_UNKNOWN; j++) { + switch (fc[j].id) { + case LZMA_FILTER_LZMA2: + case LZMA_FILTER_DELTA: + break; + + default: + message_fatal(_("Filter chain %u is " + "incompatible with " + "--flush-timeout"), + i); + } + } + } + + if (hardware_threads_is_mt()) { + message(V_WARNING, _("Switching to single-threaded " + "mode due to --flush-timeout")); + hardware_threads_set(1); + } + } + + // Get memory limit and the memory usage of the used filter chains. + // Note that if --format=raw was used, we can be decompressing + // using the default filter chain. + // + // If multithreaded .xz compression is done, the memory limit + // will be replaced. + uint64_t memory_limit = hardware_memlimit_get(opt_mode); + uint64_t memory_usage = UINT64_MAX; + +#ifdef HAVE_ENCODERS + // Memory usage for each encoder filter chain (default + // or --filtersX). The encoder options may need to be + // scaled down depending on the memory usage limit. + uint64_t encoder_memusages[ARRAY_SIZE(chains)]; +#endif + + if (opt_mode == MODE_COMPRESS) { +#ifdef HAVE_ENCODERS +# ifdef MYTHREAD_ENABLED + if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) { + memory_limit = hardware_memlimit_mtenc_get(); + mt_options.threads = hardware_threads_get(); + + uint64_t block_size = opt_block_size; + + // If opt_block_size is not set, find the maximum + // recommended Block size based on the filter chains + if (block_size == 0) { + for (unsigned i = 0; i < ARRAY_SIZE(chains); + i++) { + if (!(chains_used_mask & (1U << i))) + continue; + + uint64_t size = lzma_mt_block_size( + chains[i]); + + // If this returns an error, then one + // of the filter chains in use is + // invalid, so there is no point in + // progressing further. + if (size == UINT64_MAX) + message_fatal(_("Unsupported " + "options in filter " + "chain %u"), i); + + if (size > block_size) + block_size = size; + } + + // If --block-list was used and our current + // Block size exceeds the largest size + // in --block-list, reduce the Block size of + // the multithreaded encoder. The extra size + // would only be a waste of RAM. With a + // smaller Block size we might even be able + // to use more threads in some cases. + if (block_list_largest > 0 && block_size + > block_list_largest) + block_size = block_list_largest; + } + + mt_options.block_size = block_size; + mt_options.check = check; + + memory_usage = get_chains_memusage(encoder_memusages, + &mt_options, true); + if (memory_usage != UINT64_MAX) + message(V_DEBUG, _("Using up to %" PRIu32 + " threads."), + mt_options.threads); + } else +# endif + { + memory_usage = get_chains_memusage(encoder_memusages, + NULL, true); + } +#endif + } else { +#ifdef HAVE_DECODERS + memory_usage = lzma_raw_decoder_memusage(default_filters); +#endif + } + + if (memory_usage == UINT64_MAX) + message_fatal(_("Unsupported filter chain or filter options")); + + // Print memory usage info before possible dictionary + // size auto-adjusting. + // + // NOTE: If only encoder support was built, we cannot show + // what the decoder memory usage will be. + message_mem_needed(V_DEBUG, memory_usage); + +#if defined(HAVE_ENCODERS) && defined(HAVE_DECODERS) + if (opt_mode == MODE_COMPRESS && message_verbosity_get() >= V_DEBUG) { + const uint64_t decmem = get_chains_memusage(NULL, NULL, false); + if (decmem != UINT64_MAX) + message(V_DEBUG, _("Decompression will need " + "%s MiB of memory."), uint64_to_str( + round_up_to_mib(decmem), 0)); + } +#endif + + if (memory_usage <= memory_limit) + return; + + // With --format=raw settings are never adjusted to meet + // the memory usage limit. + if (opt_format == FORMAT_RAW) + memlimit_too_small(memory_usage); + + assert(opt_mode == MODE_COMPRESS); + +#ifdef HAVE_ENCODERS +# ifdef MYTHREAD_ENABLED + if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) { + // Try to reduce the number of threads before + // adjusting the compression settings down. + while (mt_options.threads > 1) { + // Reduce the number of threads by one and check + // the memory usage. + --mt_options.threads; + memory_usage = get_chains_memusage(encoder_memusages, + &mt_options, true); + if (memory_usage == UINT64_MAX) + message_bug(); + + if (memory_usage <= memory_limit) { + // The memory usage is now low enough. + // + // Since 5.6.1: This is only shown at + // V_DEBUG instead of V_WARNING because + // changing the number of threads doesn't + // affect the output. On some systems this + // message would be too common now that + // multithreaded compression is the default. + message(V_DEBUG, _("Reduced the number of " + "threads from %s to %s to not exceed " + "the memory usage limit of %s MiB"), + uint64_to_str( + hardware_threads_get(), 0), + uint64_to_str(mt_options.threads, 1), + uint64_to_str(round_up_to_mib( + memory_limit), 2)); + return; + } + } + + // If the memory usage limit is only a soft limit (automatic + // number of threads and no --memlimit-compress), the limit + // is only used to reduce the number of threads and once at + // just one thread, the limit is completely ignored. This + // way -T0 won't use insane amount of memory but at the same + // time the soft limit will never make xz fail and never make + // xz change settings that would affect the compressed output. + // + // Since 5.6.1: Like above, this is now shown at V_DEBUG + // instead of V_WARNING. + if (hardware_memlimit_mtenc_is_default()) { + message(V_DEBUG, _("Reduced the number of threads " + "from %s to one. The automatic memory usage " + "limit of %s MiB is still being exceeded. " + "%s MiB of memory is required. " + "Continuing anyway."), + uint64_to_str(hardware_threads_get(), 0), + uint64_to_str( + round_up_to_mib(memory_limit), 1), + uint64_to_str( + round_up_to_mib(memory_usage), 2)); + return; + } + + // If --no-adjust was used, we cannot drop to single-threaded + // mode since it produces different compressed output. + // + // NOTE: In xz 5.2.x, --no-adjust also prevented reducing + // the number of threads. This changed in 5.3.3alpha. + if (!opt_auto_adjust) + memlimit_too_small(memory_usage); + + // Switch to single-threaded mode. It uses + // less memory than using one thread in + // the multithreaded mode but the output + // is also different. + hardware_threads_set(1); + memory_usage = get_chains_memusage(encoder_memusages, + NULL, true); + message(V_WARNING, _("Switching to single-threaded mode " + "to not exceed the memory usage limit of %s MiB"), + uint64_to_str(round_up_to_mib(memory_limit), 0)); + } +# endif + + if (memory_usage <= memory_limit) + return; + + // Don't adjust LZMA2 or LZMA1 dictionary size if --no-adjust + // was specified as that would change the compressed output. + if (!opt_auto_adjust) + memlimit_too_small(memory_usage); + + // Adjust each filter chain that is exceeding the memory usage limit. + for (unsigned i = 0; i < ARRAY_SIZE(chains); i++) { + // Skip unused chains. + if (!(chains_used_mask & (1U << i))) + continue; + + // Skip chains that already meet the memory usage limit. + if (encoder_memusages[i] <= memory_limit) + continue; + + // Look for the last filter if it is LZMA2 or LZMA1, so we + // can make it use less RAM. We cannot adjust other filters. + unsigned j = 0; + while (chains[i][j].id != LZMA_FILTER_LZMA2 + && chains[i][j].id != LZMA_FILTER_LZMA1) { + // NOTE: This displays the too high limit of this + // particular filter chain. If multiple chains are + // specified and another one would need more then + // this message could be confusing. As long as LZMA2 + // is the only memory hungry filter in .xz this + // doesn't matter at all in practice. + // + // FIXME? However, it's sort of odd still if we had + // switched from multithreaded mode to single-threaded + // mode because single-threaded produces different + // output. So the messages could perhaps be clearer. + // Another case of this is a few lines below. + if (chains[i][j].id == LZMA_VLI_UNKNOWN) + memlimit_too_small(encoder_memusages[i]); + + ++j; + } + + // Decrease the dictionary size until we meet the memory + // usage limit. First round down to full mebibytes. + lzma_options_lzma *opt = chains[i][j].options; + const uint32_t orig_dict_size = opt->dict_size; + opt->dict_size &= ~((UINT32_C(1) << 20) - 1); + + while (true) { + // If it is below 1 MiB, auto-adjusting failed. + // + // FIXME? See the FIXME a few lines above. + if (opt->dict_size < (UINT32_C(1) << 20)) + memlimit_too_small(encoder_memusages[i]); + + encoder_memusages[i] + = lzma_raw_encoder_memusage(chains[i]); + if (encoder_memusages[i] == UINT64_MAX) + message_bug(); + + // Accept it if it is low enough. + if (encoder_memusages[i] <= memory_limit) + break; + + // Otherwise adjust it 1 MiB down and try again. + opt->dict_size -= UINT32_C(1) << 20; + } + + // Tell the user that we decreased the dictionary size. + // The message is slightly different between the default + // filter chain (0) or and chains from --filtersX. + const char lzma_num = chains[i][j].id == LZMA_FILTER_LZMA2 + ? '2' : '1'; + const char *from_size = uint64_to_str(orig_dict_size >> 20, 0); + const char *to_size = uint64_to_str(opt->dict_size >> 20, 1); + const char *limit_size = uint64_to_str(round_up_to_mib( + memory_limit), 2); + if (i == 0) + message(V_WARNING, _("Adjusted LZMA%c dictionary " + "size from %s MiB to %s MiB to not exceed the " + "memory usage limit of %s MiB"), + lzma_num, from_size, to_size, limit_size); + else + message(V_WARNING, _("Adjusted LZMA%c dictionary size " + "for --filters%u from %s MiB to %s MiB to not " + "exceed the memory usage limit of %s MiB"), + lzma_num, i, from_size, to_size, limit_size); + } +#endif + + return; +} + + +#ifdef HAVE_DECODERS +/// Return true if the data in in_buf seems to be in the .xz format. +static bool +is_format_xz(void) +{ + // Specify the magic as hex to be compatible with EBCDIC systems. + static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 }; + return strm.avail_in >= sizeof(magic) + && memcmp(in_buf.u8, magic, sizeof(magic)) == 0; +} + + +/// Return true if the data in in_buf seems to be in the .lzma format. +static bool +is_format_lzma(void) +{ + // The .lzma header is 13 bytes. + if (strm.avail_in < 13) + return false; + + // Decode the LZMA1 properties. + lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; + if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK) + return false; + + // A hack to ditch tons of false positives: We allow only dictionary + // sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone + // created only files with 2^n, but accepts any dictionary size. + // If someone complains, this will be reconsidered. + lzma_options_lzma *opt = filter.options; + const uint32_t dict_size = opt->dict_size; + free(opt); + + if (dict_size != UINT32_MAX) { + uint32_t d = dict_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + if (d != dict_size || dict_size == 0) + return false; + } + + // Another hack to ditch false positives: Assume that if the + // uncompressed size is known, it must be less than 256 GiB. + // Again, if someone complains, this will be reconsidered. + uint64_t uncompressed_size = 0; + for (size_t i = 0; i < 8; ++i) + uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8); + + if (uncompressed_size != UINT64_MAX + && uncompressed_size > (UINT64_C(1) << 38)) + return false; + + return true; +} + + +#ifdef HAVE_LZIP_DECODER +/// Return true if the data in in_buf seems to be in the .lz format. +static bool +is_format_lzip(void) +{ + static const uint8_t magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; + return strm.avail_in >= sizeof(magic) + && memcmp(in_buf.u8, magic, sizeof(magic)) == 0; +} +#endif +#endif + + +/// Detect the input file type (for now, this done only when decompressing), +/// and initialize an appropriate coder. Return value indicates if a normal +/// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru +/// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred +/// (CODER_INIT_ERROR). +static enum coder_init_ret +coder_init(file_pair *pair) +{ + lzma_ret ret = LZMA_PROG_ERROR; + + // In most cases if there is input left when coding finishes, + // something has gone wrong. Exceptions are --single-stream + // and decoding .lz files which can contain trailing non-.lz data. + // These will be handled later in this function. + allow_trailing_input = false; + + // Set the first filter chain. If the --block-list option is not + // used then use the default filter chain (chains[0]). + // Otherwise, use first filter chain from the block list. + lzma_filter *active_filters = opt_block_list == NULL + ? chains[0] + : chains[opt_block_list[0].chain_num]; + + if (opt_mode == MODE_COMPRESS) { +#ifdef HAVE_ENCODERS + switch (opt_format) { + case FORMAT_AUTO: + // args.c ensures this. + assert(0); + break; + + case FORMAT_XZ: +# ifdef MYTHREAD_ENABLED + mt_options.filters = active_filters; + if (hardware_threads_is_mt()) + ret = lzma_stream_encoder_mt( + &strm, &mt_options); + else +# endif + ret = lzma_stream_encoder( + &strm, active_filters, check); + break; + + case FORMAT_LZMA: + ret = lzma_alone_encoder(&strm, + active_filters[0].options); + break; + +# ifdef HAVE_LZIP_DECODER + case FORMAT_LZIP: + // args.c should disallow this. + assert(0); + ret = LZMA_PROG_ERROR; + break; +# endif + + case FORMAT_RAW: + ret = lzma_raw_encoder(&strm, active_filters); + break; + } +#endif + } else { +#ifdef HAVE_DECODERS + uint32_t flags = 0; + + // It seems silly to warn about unsupported check if the + // check won't be verified anyway due to --ignore-check. + if (opt_ignore_check) + flags |= LZMA_IGNORE_CHECK; + else + flags |= LZMA_TELL_UNSUPPORTED_CHECK; + + if (opt_single_stream) + allow_trailing_input = true; + else + flags |= LZMA_CONCATENATED; + + // We abuse FORMAT_AUTO to indicate unknown file format, + // for which we may consider passthru mode. + enum format_type init_format = FORMAT_AUTO; + + switch (opt_format) { + case FORMAT_AUTO: + // .lz is checked before .lzma since .lzma detection + // is more complicated (no magic bytes). + if (is_format_xz()) + init_format = FORMAT_XZ; +# ifdef HAVE_LZIP_DECODER + else if (is_format_lzip()) + init_format = FORMAT_LZIP; +# endif + else if (is_format_lzma()) + init_format = FORMAT_LZMA; + break; + + case FORMAT_XZ: + if (is_format_xz()) + init_format = FORMAT_XZ; + break; + + case FORMAT_LZMA: + if (is_format_lzma()) + init_format = FORMAT_LZMA; + break; + +# ifdef HAVE_LZIP_DECODER + case FORMAT_LZIP: + if (is_format_lzip()) + init_format = FORMAT_LZIP; + break; +# endif + + case FORMAT_RAW: + init_format = FORMAT_RAW; + break; + } + + switch (init_format) { + case FORMAT_AUTO: + // Unknown file format. If --decompress --stdout + // --force have been given, then we copy the input + // as is to stdout. Checking for MODE_DECOMPRESS + // is needed, because we don't want to do use + // passthru mode with --test. + if (opt_mode == MODE_DECOMPRESS + && opt_stdout && opt_force) { + // These are needed for progress info. + strm.total_in = 0; + strm.total_out = 0; + return CODER_INIT_PASSTHRU; + } + + ret = LZMA_FORMAT_ERROR; + break; + + case FORMAT_XZ: +# ifdef MYTHREAD_ENABLED + mt_options.flags = flags; + + mt_options.threads = hardware_threads_get(); + mt_options.memlimit_stop + = hardware_memlimit_get(MODE_DECOMPRESS); + + // If single-threaded mode was requested, set the + // memlimit for threading to zero. This forces the + // decoder to use single-threaded mode which matches + // the behavior of lzma_stream_decoder(). + // + // Otherwise use the limit for threaded decompression + // which has a sane default (users are still free to + // make it insanely high though). + mt_options.memlimit_threading + = mt_options.threads == 1 + ? 0 : hardware_memlimit_mtdec_get(); + + ret = lzma_stream_decoder_mt(&strm, &mt_options); +# else + ret = lzma_stream_decoder(&strm, + hardware_memlimit_get( + MODE_DECOMPRESS), flags); +# endif + break; + + case FORMAT_LZMA: + ret = lzma_alone_decoder(&strm, + hardware_memlimit_get( + MODE_DECOMPRESS)); + break; + +# ifdef HAVE_LZIP_DECODER + case FORMAT_LZIP: + allow_trailing_input = true; + ret = lzma_lzip_decoder(&strm, + hardware_memlimit_get( + MODE_DECOMPRESS), flags); + break; +# endif + + case FORMAT_RAW: + // Memory usage has already been checked in + // coder_set_compression_settings(). + ret = lzma_raw_decoder(&strm, active_filters); + break; + } + + // Try to decode the headers. This will catch too low + // memory usage limit in case it happens in the first + // Block of the first Stream, which is where it very + // probably will happen if it is going to happen. + // + // This will also catch unsupported check type which + // we treat as a warning only. If there are empty + // concatenated Streams with unsupported check type then + // the message can be shown more than once here. The loop + // is used in case there is first a warning about + // unsupported check type and then the first Block + // would exceed the memlimit. + if (ret == LZMA_OK && init_format != FORMAT_RAW) { + strm.next_out = NULL; + strm.avail_out = 0; + while ((ret = lzma_code(&strm, LZMA_RUN)) + == LZMA_UNSUPPORTED_CHECK) + message_warning(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(ret)); + + // With --single-stream lzma_code won't wait for + // LZMA_FINISH and thus it can return LZMA_STREAM_END + // if the file has no uncompressed data inside. + // So treat LZMA_STREAM_END as LZMA_OK here. + // When lzma_code() is called again in coder_normal() + // it will return LZMA_STREAM_END again. + if (ret == LZMA_STREAM_END) + ret = LZMA_OK; + } +#endif + } + + if (ret != LZMA_OK) { + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(ret)); + if (ret == LZMA_MEMLIMIT_ERROR) + message_mem_needed(V_ERROR, lzma_memusage(&strm)); + + return CODER_INIT_ERROR; + } + + return CODER_INIT_NORMAL; +} + + +#ifdef HAVE_ENCODERS +/// Resolve conflicts between opt_block_size and opt_block_list in single +/// threaded mode. We want to default to opt_block_list, except when it is +/// larger than opt_block_size. If this is the case for the current Block +/// at *list_pos, then we break into smaller Blocks. Otherwise advance +/// to the next Block in opt_block_list, and break apart if needed. +static void +split_block(uint64_t *block_remaining, + uint64_t *next_block_remaining, + size_t *list_pos) +{ + if (*next_block_remaining > 0) { + // The Block at *list_pos has previously been split up. + assert(!hardware_threads_is_mt()); + assert(opt_block_size > 0); + assert(opt_block_list != NULL); + + if (*next_block_remaining > opt_block_size) { + // We have to split the current Block at *list_pos + // into another opt_block_size length Block. + *block_remaining = opt_block_size; + } else { + // This is the last remaining split Block for the + // Block at *list_pos. + *block_remaining = *next_block_remaining; + } + + *next_block_remaining -= *block_remaining; + + } else { + // The Block at *list_pos has been finished. Go to the next + // entry in the list. If the end of the list has been + // reached, reuse the size and filters of the last Block. + if (opt_block_list[*list_pos + 1].size != 0) { + ++*list_pos; + + // Update the filters if needed. + if (opt_block_list[*list_pos - 1].chain_num + != opt_block_list[*list_pos].chain_num) { + const unsigned chain_num + = opt_block_list[*list_pos].chain_num; + const lzma_filter *next = chains[chain_num]; + const lzma_ret ret = lzma_filters_update( + &strm, next); + + if (ret != LZMA_OK) { + // This message is only possible if + // the filter chain has unsupported + // options since the filter chain is + // validated using + // lzma_raw_encoder_memusage() or + // lzma_stream_encoder_mt_memusage(). + // Some options are not validated until + // the encoders are initialized. + message_fatal( + _("Error changing to " + "filter chain %u: %s"), + chain_num, + message_strm(ret)); + } + } + } + + *block_remaining = opt_block_list[*list_pos].size; + + // If in single-threaded mode, split up the Block if needed. + // This is not needed in multi-threaded mode because liblzma + // will do this due to how threaded encoding works. + if (!hardware_threads_is_mt() && opt_block_size > 0 + && *block_remaining > opt_block_size) { + *next_block_remaining + = *block_remaining - opt_block_size; + *block_remaining = opt_block_size; + } + } +} +#endif + + +static bool +coder_write_output(file_pair *pair) +{ + if (opt_mode != MODE_TEST) { + if (io_write(pair, &out_buf, IO_BUFFER_SIZE - strm.avail_out)) + return true; + } + + strm.next_out = out_buf.u8; + strm.avail_out = IO_BUFFER_SIZE; + return false; +} + + +/// Compress or decompress using liblzma. +static bool +coder_normal(file_pair *pair) +{ + // Encoder needs to know when we have given all the input to it. + // The decoders need to know it too when we are using + // LZMA_CONCATENATED. We need to check for src_eof here, because + // the first input chunk has been already read if decompressing, + // and that may have been the only chunk we will read. + lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN; + + lzma_ret ret; + + // Assume that something goes wrong. + bool success = false; + +#ifdef HAVE_ENCODERS + // block_remaining indicates how many input bytes to encode before + // finishing the current .xz Block. The Block size is set with + // --block-size=SIZE and --block-list. They have an effect only when + // compressing to the .xz format. If block_remaining == UINT64_MAX, + // only a single block is created. + uint64_t block_remaining = UINT64_MAX; + + // next_block_remaining for when we are in single-threaded mode and + // the Block in --block-list is larger than the --block-size=SIZE. + uint64_t next_block_remaining = 0; + + // Position in opt_block_list. Unused if --block-list wasn't used. + size_t list_pos = 0; + + // Handle --block-size for single-threaded mode and the first step + // of --block-list. + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) { + // --block-size doesn't do anything here in threaded mode, + // because the threaded encoder will take care of splitting + // to fixed-sized Blocks. + if (!hardware_threads_is_mt() && opt_block_size > 0) + block_remaining = opt_block_size; + + // If --block-list was used, start with the first size. + // + // For threaded case, --block-size specifies how big Blocks + // the encoder needs to be prepared to create at maximum + // and --block-list will simultaneously cause new Blocks + // to be started at specified intervals. To keep things + // logical, the same is done in single-threaded mode. The + // output is still not identical because in single-threaded + // mode the size info isn't written into Block Headers. + if (opt_block_list != NULL) { + if (block_remaining < opt_block_list[list_pos].size) { + assert(!hardware_threads_is_mt()); + next_block_remaining = + opt_block_list[list_pos].size + - block_remaining; + } else { + block_remaining = + opt_block_list[list_pos].size; + } + } + } +#endif + + strm.next_out = out_buf.u8; + strm.avail_out = IO_BUFFER_SIZE; + + while (!user_abort) { + // Fill the input buffer if it is empty and we aren't + // flushing or finishing. + if (strm.avail_in == 0 && action == LZMA_RUN) { + strm.next_in = in_buf.u8; +#ifdef HAVE_ENCODERS + const size_t read_size = my_min(block_remaining, + IO_BUFFER_SIZE); +#else + const size_t read_size = IO_BUFFER_SIZE; +#endif + strm.avail_in = io_read(pair, &in_buf, read_size); + + if (strm.avail_in == SIZE_MAX) + break; + + if (pair->src_eof) { + action = LZMA_FINISH; + } +#ifdef HAVE_ENCODERS + else if (block_remaining != UINT64_MAX) { + // Start a new Block after every + // opt_block_size bytes of input. + block_remaining -= strm.avail_in; + if (block_remaining == 0) + action = LZMA_FULL_BARRIER; + } + + if (action == LZMA_RUN && pair->flush_needed) + action = LZMA_SYNC_FLUSH; +#endif + } + + // Let liblzma do the actual work. + ret = lzma_code(&strm, action); + + // Write out if the output buffer became full. + if (strm.avail_out == 0) { + if (coder_write_output(pair)) + break; + } + +#ifdef HAVE_ENCODERS + if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH + || action == LZMA_FULL_BARRIER)) { + if (action == LZMA_SYNC_FLUSH) { + // Flushing completed. Write the pending data + // out immediately so that the reading side + // can decompress everything compressed so far. + if (coder_write_output(pair)) + break; + + // Mark that we haven't seen any new input + // since the previous flush. + pair->src_has_seen_input = false; + pair->flush_needed = false; + } else { + // Start a new Block after LZMA_FULL_BARRIER. + if (opt_block_list == NULL) { + assert(!hardware_threads_is_mt()); + assert(opt_block_size > 0); + block_remaining = opt_block_size; + } else { + split_block(&block_remaining, + &next_block_remaining, + &list_pos); + } + } + + // Start a new Block after LZMA_FULL_FLUSH or continue + // the same block after LZMA_SYNC_FLUSH. + action = LZMA_RUN; + } else +#endif + if (ret != LZMA_OK) { + // Determine if the return value indicates that we + // won't continue coding. LZMA_NO_CHECK would be + // here too if LZMA_TELL_ANY_CHECK was used. + const bool stop = ret != LZMA_UNSUPPORTED_CHECK; + + if (stop) { + // Write the remaining bytes even if something + // went wrong, because that way the user gets + // as much data as possible, which can be good + // when trying to get at least some useful + // data out of damaged files. + if (coder_write_output(pair)) + break; + } + + if (ret == LZMA_STREAM_END) { + if (allow_trailing_input) { + io_fix_src_pos(pair, strm.avail_in); + success = true; + break; + } + + // Check that there is no trailing garbage. + // This is needed for LZMA_Alone and raw + // streams. This is *not* done with .lz files + // as that format specifically requires + // allowing trailing garbage. + if (strm.avail_in == 0 && !pair->src_eof) { + // Try reading one more byte. + // Hopefully we don't get any more + // input, and thus pair->src_eof + // becomes true. + strm.avail_in = io_read( + pair, &in_buf, 1); + if (strm.avail_in == SIZE_MAX) + break; + + assert(strm.avail_in == 0 + || strm.avail_in == 1); + } + + if (strm.avail_in == 0) { + assert(pair->src_eof); + success = true; + break; + } + + // We hadn't reached the end of the file. + ret = LZMA_DATA_ERROR; + assert(stop); + } + + // If we get here and stop is true, something went + // wrong and we print an error. Otherwise it's just + // a warning and coding can continue. + if (stop) { + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(ret)); + } else { + message_warning(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(ret)); + + // When compressing, all possible errors set + // stop to true. + assert(opt_mode != MODE_COMPRESS); + } + + if (ret == LZMA_MEMLIMIT_ERROR) { + // Display how much memory it would have + // actually needed. + message_mem_needed(V_ERROR, + lzma_memusage(&strm)); + } + + if (stop) + break; + } + + // Show progress information under certain conditions. + message_progress_update(); + } + + return success; +} + + +/// Copy from input file to output file without processing the data in any +/// way. This is used only when trying to decompress unrecognized files +/// with --decompress --stdout --force, so the output is always stdout. +static bool +coder_passthru(file_pair *pair) +{ + while (strm.avail_in != 0) { + if (user_abort) + return false; + + if (io_write(pair, &in_buf, strm.avail_in)) + return false; + + strm.total_in += strm.avail_in; + strm.total_out = strm.total_in; + message_progress_update(); + + strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); + if (strm.avail_in == SIZE_MAX) + return false; + } + + return true; +} + + +extern void +coder_run(const char *filename) +{ + // Set and possibly print the filename for the progress message. + message_filename(filename); + + // Try to open the input file. + file_pair *pair = io_open_src(filename); + if (pair == NULL) + return; + + // Assume that something goes wrong. + bool success = false; + + if (opt_mode == MODE_COMPRESS) { + strm.next_in = NULL; + strm.avail_in = 0; + } else { + // Read the first chunk of input data. This is needed + // to detect the input file type. + strm.next_in = in_buf.u8; + strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); + } + + if (strm.avail_in != SIZE_MAX) { + // Initialize the coder. This will detect the file format + // and, in decompression or testing mode, check the memory + // usage of the first Block too. This way we don't try to + // open the destination file if we see that coding wouldn't + // work at all anyway. This also avoids deleting the old + // "target" file if --force was used. + const enum coder_init_ret init_ret = coder_init(pair); + + if (init_ret != CODER_INIT_ERROR && !user_abort) { + // Don't open the destination file when --test + // is used. + if (opt_mode == MODE_TEST || !io_open_dest(pair)) { + // Remember the current time. It is needed + // for progress indicator. + mytime_set_start_time(); + + // Initialize the progress indicator. + // + // NOTE: When reading from stdin, fstat() + // isn't called on it and thus src_st.st_size + // is zero. If stdin pointed to a regular + // file, it would still be possible to know + // the file size but then we would also need + // to take into account the current reading + // position since with stdin it isn't + // necessarily at the beginning of the file. + const bool is_passthru = init_ret + == CODER_INIT_PASSTHRU; + const uint64_t in_size + = pair->src_st.st_size <= 0 + ? 0 : (uint64_t)(pair->src_st.st_size); + message_progress_start(&strm, + is_passthru, in_size); + + // Do the actual coding or passthru. + if (is_passthru) + success = coder_passthru(pair); + else + success = coder_normal(pair); + + message_progress_end(success); + } + } + } + + // Close the file pair. It needs to know if coding was successful to + // know if the source or target file should be unlinked. + io_close(pair, success); + + return; +} + + +#ifndef NDEBUG +extern void +coder_free(void) +{ + // Free starting from the second filter chain since the default + // filter chain may have its options set from a static variable + // in coder_set_compression_settings(). Since this is only run in + // debug mode and will be freed when the process ends anyway, we + // don't worry about freeing it. + for (uint32_t i = 1; i < ARRAY_SIZE(chains); i++) { + if (chains_used_mask & (1U << i)) + lzma_filters_free(chains[i], NULL); + } + + lzma_end(&strm); + return; +} +#endif diff --git a/src/native/external/xz/src/xz/coder.h b/src/native/external/xz/src/xz/coder.h new file mode 100644 index 00000000000000..96755f30d106af --- /dev/null +++ b/src/native/external/xz/src/xz/coder.h @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file coder.h +/// \brief Compresses or uncompresses a file +// +// Authors: Lasse Collin +// Jia Tan +// +/////////////////////////////////////////////////////////////////////////////// + +enum operation_mode { + MODE_COMPRESS, + MODE_DECOMPRESS, + MODE_TEST, + MODE_LIST, +}; + + +// NOTE: The order of these is significant in suffix.c. +enum format_type { + FORMAT_AUTO, + FORMAT_XZ, + FORMAT_LZMA, +#ifdef HAVE_LZIP_DECODER + FORMAT_LZIP, +#endif + FORMAT_RAW, +}; + + +/// Array of these hold the entries specified with --block-list. +typedef struct { + /// Uncompressed size of the Block + uint64_t size; + + /// Filter chain to use for this Block (chains[chain_num]) + unsigned chain_num; +} block_list_entry; + + +/// Operation mode of the command line tool. This is set in args.c and read +/// in several files. +extern enum operation_mode opt_mode; + +/// File format to use when encoding or what format(s) to accept when +/// decoding. This is a global because it's needed also in suffix.c. +/// This is set in args.c. +extern enum format_type opt_format; + +/// If true, the compression settings are automatically adjusted down if +/// they exceed the memory usage limit. +extern bool opt_auto_adjust; + +/// If true, stop after decoding the first stream. +extern bool opt_single_stream; + +/// If non-zero, start a new .xz Block after every opt_block_size bytes +/// of input. This has an effect only when compressing to the .xz format. +extern uint64_t opt_block_size; + +/// List of block size and filter chain pointer pairs. +extern block_list_entry *opt_block_list; + +/// Size of the largest Block that was specified in --block-list. +/// This is used to limit the block_size option of multithreaded encoder. +/// It's waste of memory to specify a too large block_size and reducing +/// it might even allow using more threads in some cases. +/// +/// NOTE: If the last entry in --block-list is the special value of 0 +/// (which gets converted to UINT64_MAX), it counts here as UINT64_MAX too. +/// This way the multithreaded encoder's Block size won't be reduced. +extern uint64_t block_list_largest; + +/// Bitmask indicating which filter chains we specified in --block-list. +extern uint32_t block_list_chain_mask; + +/// Set the integrity check type used when compressing +extern void coder_set_check(lzma_check check); + +/// Set preset number +extern void coder_set_preset(uint32_t new_preset); + +/// Enable extreme mode +extern void coder_set_extreme(void); + +/// Add a filter to the custom filter chain +extern void coder_add_filter(lzma_vli id, void *options); + +/// Set and partially validate compression settings. This can also be used +/// in decompression or test mode with the raw format. +extern void coder_set_compression_settings(void); + +/// Compress or decompress the given file +extern void coder_run(const char *filename); + +#ifndef NDEBUG +/// Free the memory allocated for the coder and kill the worker threads. +extern void coder_free(void); +#endif + +/// Create filter chain from string +extern void coder_add_filters_from_str(const char *filter_str); + +/// Add or overwrite a filter that can be used by the block-list. +extern void coder_add_block_filters(const char *str, size_t slot); diff --git a/src/native/external/xz/src/xz/file_io.c b/src/native/external/xz/src/xz/file_io.c new file mode 100644 index 00000000000000..6b1a1536fb07cf --- /dev/null +++ b/src/native/external/xz/src/xz/file_io.c @@ -0,0 +1,1484 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file file_io.c +/// \brief File opening, unlinking, and closing +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#include + +#ifdef TUKLIB_DOSLIKE +# include +#else +# include +# include +static bool warn_fchown; +#endif + +#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) +# include +#elif defined(HAVE__FUTIME) +# include +#elif defined(HAVE_UTIME) +# include +#endif + +#include "tuklib_open_stdxxx.h" + +#ifdef _MSC_VER +# ifdef _WIN64 + typedef __int64 ssize_t; +# else + typedef int ssize_t; +# endif + + typedef int mode_t; +# define S_IRUSR _S_IREAD +# define S_IWUSR _S_IWRITE + +# define setmode _setmode +# define open _open +# define close _close +# define lseek _lseeki64 +# define unlink _unlink + + // The casts are to silence warnings. + // The sizes are known to be small enough. +# define read(fd, buf, size) _read(fd, buf, (unsigned int)(size)) +# define write(fd, buf, size) _write(fd, buf, (unsigned int)(size)) + +# define S_ISDIR(m) (((m) & _S_IFMT) == _S_IFDIR) +# define S_ISREG(m) (((m) & _S_IFMT) == _S_IFREG) +#endif + +#if defined(_WIN32) && !defined(__CYGWIN__) +# define fsync _commit +#endif + +#ifndef O_BINARY +# define O_BINARY 0 +#endif + +#ifndef O_NOCTTY +# define O_NOCTTY 0 +#endif + +// In musl 1.2.5, O_SEARCH is defined to O_PATH. As of Linux 6.12, +// a file descriptor from open("dir", O_SEARCH | O_DIRECTORY) cannot be +// used with fsync() (fails with EBADF). musl 1.2.5 doesn't emulate it +// using /proc/self/fd. Even if it did, it might need to do it with +// fd = open("/proc/...", O_RDONLY); fsync(fd); which fails if the +// directory lacks read permission. Since we need a working fsync(), +// O_RDONLY imitates O_SEARCH better than O_PATH. +#if defined(O_SEARCH) && defined(O_PATH) && O_SEARCH == O_PATH +# undef O_SEARCH +#endif + +#ifndef O_SEARCH +# define O_SEARCH O_RDONLY +#endif + +#ifndef O_DIRECTORY +# define O_DIRECTORY 0 +#endif + +// Using this macro to silence a warning from gcc -Wlogical-op. +#if EAGAIN == EWOULDBLOCK +# define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN) +#else +# define IS_EAGAIN_OR_EWOULDBLOCK(e) \ + ((e) == EAGAIN || (e) == EWOULDBLOCK) +#endif + + +typedef enum { + IO_WAIT_MORE, // Reading or writing is possible. + IO_WAIT_ERROR, // Error or user_abort + IO_WAIT_TIMEOUT, // poll() timed out +} io_wait_ret; + + +/// If true, try to create sparse files when decompressing. +static bool try_sparse = true; + +#ifndef TUKLIB_DOSLIKE +/// File status flags of standard input. This is used by io_open_src() +/// and io_close_src(). +static int stdin_flags; +static bool restore_stdin_flags = false; + +/// Original file status flags of standard output. This is used by +/// io_open_dest() and io_close_dest() to save and restore the flags. +static int stdout_flags; +static bool restore_stdout_flags = false; + +/// Self-pipe used together with the user_abort variable to avoid +/// race conditions with signal handling. +static int user_abort_pipe[2]; +#endif + + +static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); + + +extern void +io_init(void) +{ + // Make sure that stdin, stdout, and stderr are connected to + // a valid file descriptor. Exit immediately with exit code ERROR + // if we cannot make the file descriptors valid. Maybe we should + // print an error message, but our stderr could be screwed anyway. + tuklib_open_stdxxx(E_ERROR); + +#ifndef TUKLIB_DOSLIKE + // If fchown() fails setting the owner, we warn about it only if + // we are root. + warn_fchown = geteuid() == 0; + + // Create a pipe for the self-pipe trick. + if (pipe(user_abort_pipe)) + message_fatal(_("Error creating a pipe: %s"), + strerror(errno)); + + // Make both ends of the pipe non-blocking. + for (unsigned i = 0; i < 2; ++i) { + int flags = fcntl(user_abort_pipe[i], F_GETFL); + if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL, + flags | O_NONBLOCK) == -1) + message_fatal(_("Error creating a pipe: %s"), + strerror(errno)); + } +#endif + +#ifdef __DJGPP__ + // Avoid doing useless things when statting files. + // This isn't important but doesn't hurt. + _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; +#endif + + return; +} + + +#ifndef TUKLIB_DOSLIKE +extern void +io_write_to_user_abort_pipe(void) +{ + // If the write() fails, it's probably due to the pipe being full. + // Failing in that case is fine. If the reason is something else, + // there's not much we can do since this is called in a signal + // handler. So ignore the errors and try to avoid warnings with + // GCC and glibc when _FORTIFY_SOURCE=2 is used. + uint8_t b = '\0'; + const ssize_t ret = write(user_abort_pipe[1], &b, 1); + (void)ret; + return; +} +#endif + + +extern void +io_no_sparse(void) +{ + try_sparse = false; + return; +} + + +#ifndef TUKLIB_DOSLIKE +/// \brief Waits for input or output to become available or for a signal +/// +/// This uses the self-pipe trick to avoid a race condition that can occur +/// if a signal is caught after user_abort has been checked but before e.g. +/// read() has been called. In that situation read() could block unless +/// non-blocking I/O is used. With non-blocking I/O something like select() +/// or poll() is needed to avoid a busy-wait loop, and the same race condition +/// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in +/// POSIX) but neither is portable enough in 2013. The self-pipe trick is +/// old and very portable. +static io_wait_ret +io_wait(file_pair *pair, int timeout, bool is_reading) +{ + struct pollfd pfd[2]; + + if (is_reading) { + pfd[0].fd = pair->src_fd; + pfd[0].events = POLLIN; + } else { + pfd[0].fd = pair->dest_fd; + pfd[0].events = POLLOUT; + } + + pfd[1].fd = user_abort_pipe[0]; + pfd[1].events = POLLIN; + + while (true) { + const int ret = poll(pfd, 2, timeout); + + if (user_abort) + return IO_WAIT_ERROR; + + if (ret == -1) { + if (errno == EINTR || errno == EAGAIN) + continue; + + message_error(_("%s: poll() failed: %s"), + tuklib_mask_nonprint(is_reading + ? pair->src_name + : pair->dest_name), + strerror(errno)); + return IO_WAIT_ERROR; + } + + if (ret == 0) + return IO_WAIT_TIMEOUT; + + if (pfd[0].revents != 0) + return IO_WAIT_MORE; + } +} +#endif + + +/// \brief Unlink a file +/// +/// This tries to verify that the file being unlinked really is the file that +/// we want to unlink by verifying device and inode numbers. There's still +/// a small unavoidable race, but this is much better than nothing (the file +/// could have been moved/replaced even hours earlier). +static void +io_unlink(const char *name, const struct stat *known_st) +{ +#if defined(TUKLIB_DOSLIKE) + // On DOS-like systems, st_ino is meaningless, so don't bother + // testing it. Just silence a compiler warning. + (void)known_st; +#else + struct stat new_st; + + // If --force was used, use stat() instead of lstat(). This way + // (de)compressing symlinks works correctly. However, it also means + // that xz cannot detect if a regular file foo is renamed to bar + // and then a symlink foo -> bar is created. Because of stat() + // instead of lstat(), xz will think that foo hasn't been replaced + // with another file. Thus, xz will remove foo even though it no + // longer is the same file that xz used when it started compressing. + // Probably it's not too bad though, so this doesn't need a more + // complex fix. + const int stat_ret = opt_force + ? stat(name, &new_st) : lstat(name, &new_st); + + if (stat_ret +# ifdef __VMS + // st_ino is an array, and we don't want to + // compare st_dev at all. + || memcmp(&new_st.st_ino, &known_st->st_ino, + sizeof(new_st.st_ino)) != 0 +# else + // Typical POSIX-like system + || new_st.st_dev != known_st->st_dev + || new_st.st_ino != known_st->st_ino +# endif + ) + // TRANSLATORS: When compression or decompression finishes, + // and xz is going to remove the source file, xz first checks + // if the source file still exists, and if it does, does its + // device and inode numbers match what xz saw when it opened + // the source file. If these checks fail, this message is + // shown, %s being the filename, and the file is not deleted. + // The check for device and inode numbers is there, because + // it is possible that the user has put a new file in place + // of the original file, and in that case it obviously + // shouldn't be removed. + message_warning(_("%s: File seems to have been moved, " + "not removing"), tuklib_mask_nonprint(name)); + else +#endif + // There's a race condition between lstat() and unlink() + // but at least we have tried to avoid removing wrong file. + if (unlink(name)) + message_warning(_("%s: Cannot remove: %s"), + tuklib_mask_nonprint(name), + strerror(errno)); + + return; +} + + +/// \brief Copies owner/group and permissions +/// +/// \todo ACL and EA support +/// +static void +io_copy_attrs(const file_pair *pair) +{ + // Skip chown and chmod on Windows. +#ifndef TUKLIB_DOSLIKE + // This function is more tricky than you may think at first. + // Blindly copying permissions may permit users to access the + // destination file who didn't have permission to access the + // source file. + + // Try changing the owner of the file. If we aren't root or the owner + // isn't already us, fchown() probably doesn't succeed. We warn + // about failing fchown() only if we are root. + if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1)) + && warn_fchown) + message_warning(_("%s: Cannot set the file owner: %s"), + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); + + mode_t mode; + + // With BSD semantics the new dest file may have a group that + // does not belong to the user. If the src file has the same gid + // nothing has to be done. Nevertheless OpenBSD fchown(2) fails + // in this case which seems to be POSIX compliant. As there is + // nothing to do, skip the system call. + if (pair->dest_st.st_gid != pair->src_st.st_gid + && fchown(pair->dest_fd, (uid_t)(-1), + pair->src_st.st_gid)) { + message_warning(_("%s: Cannot set the file group: %s"), + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); + // We can still safely copy some additional permissions: + // 'group' must be at least as strict as 'other' and + // also vice versa. + // + // NOTE: After this, the owner of the source file may + // get additional permissions. This shouldn't be too bad, + // because the owner would have had permission to chmod + // the original file anyway. + mode = ((pair->src_st.st_mode & 0070) >> 3) + & (pair->src_st.st_mode & 0007); + mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; + } else { + // Drop the setuid, setgid, and sticky bits. + mode = pair->src_st.st_mode & 0777; + } + + if (fchmod(pair->dest_fd, mode)) + message_warning(_("%s: Cannot set the file permissions: %s"), + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); +#endif + + // Copy the timestamps. We have several possible ways to do this, of + // which some are better in both security and precision. + // + // First, get the nanosecond part of the timestamps. As of writing, + // it's not standardized by POSIX, and there are several names for + // the same thing in struct stat. + long atime_nsec; + long mtime_nsec; + +# if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) + // GNU and Solaris + atime_nsec = pair->src_st.st_atim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.tv_nsec; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) + // BSD + atime_nsec = pair->src_st.st_atimespec.tv_nsec; + mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) + // GNU and BSD without extensions + atime_nsec = pair->src_st.st_atimensec; + mtime_nsec = pair->src_st.st_mtimensec; + +# elif defined(HAVE_STRUCT_STAT_ST_UATIME) + // Tru64 + atime_nsec = pair->src_st.st_uatime * 1000; + mtime_nsec = pair->src_st.st_umtime * 1000; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) + // UnixWare + atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; + +# else + // Safe fallback + atime_nsec = 0; + mtime_nsec = 0; +# endif + + // Construct a structure to hold the timestamps and call appropriate + // function to set the timestamps. +#if defined(HAVE_FUTIMENS) + // Use nanosecond precision. + struct timespec tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_nsec = atime_nsec; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_nsec = mtime_nsec; + + (void)futimens(pair->dest_fd, tv); + +#elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) + // Use microsecond precision. + struct timeval tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_usec = atime_nsec / 1000; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_usec = mtime_nsec / 1000; + +# if defined(HAVE_FUTIMES) + (void)futimes(pair->dest_fd, tv); +# elif defined(HAVE_FUTIMESAT) + (void)futimesat(pair->dest_fd, NULL, tv); +# else + // Argh, no function to use a file descriptor to set the timestamp. + (void)utimes(pair->dest_name, tv); +# endif + +#elif defined(HAVE__FUTIME) + // Use one-second precision with Windows-specific _futime(). + // We could use utime() too except that for some reason the + // timestamp will get reset at close(). With _futime() it works. + // This struct cannot be const as _futime() takes a non-const pointer. + struct _utimbuf buf = { + .actime = pair->src_st.st_atime, + .modtime = pair->src_st.st_mtime, + }; + + // Avoid warnings. + (void)atime_nsec; + (void)mtime_nsec; + + (void)_futime(pair->dest_fd, &buf); + +#elif defined(HAVE_UTIME) + // Use one-second precision. utime() doesn't support using file + // descriptor either. Some systems have broken utime() prototype + // so don't make this const. + struct utimbuf buf = { + .actime = pair->src_st.st_atime, + .modtime = pair->src_st.st_mtime, + }; + + // Avoid warnings. + (void)atime_nsec; + (void)mtime_nsec; + + (void)utime(pair->dest_name, &buf); +#endif + + return; +} + + +/// \brief Synchronizes the destination file to permanent storage +/// +/// \param pair File pair having the destination file open for writing +/// +/// \return On success, false is returned. On error, error message +/// is printed and true is returned. +static bool +io_sync_dest(file_pair *pair) +{ + assert(pair->dest_fd != -1); + assert(pair->dest_fd != STDOUT_FILENO); + + if (fsync(pair->dest_fd)) { + message_error(_("%s: Synchronizing the file failed: %s"), + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); + return true; + } + +#if !defined(TUKLIB_DOSLIKE) && !defined(_AIX) + // On AIX, this would fail with EBADF. + if (fsync(pair->dir_fd)) { + message_error(_("%s: Synchronizing the directory of " + "the file failed: %s"), + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); + return true; + } +#endif + + return false; +} + + +/// Opens the source file. Returns false on success, true on error. +static bool +io_open_src_real(file_pair *pair) +{ + // There's nothing to open when reading from stdin. + if (pair->src_name == stdin_filename) { + pair->src_fd = STDIN_FILENO; +#ifdef TUKLIB_DOSLIKE + setmode(STDIN_FILENO, O_BINARY); +#else + // Try to set stdin to non-blocking mode. It won't work + // e.g. on OpenBSD if stdout is e.g. /dev/null. In such + // case we proceed as if stdin were non-blocking anyway + // (in case of /dev/null it will be in practice). The + // same applies to stdout in io_open_dest_real(). + stdin_flags = fcntl(STDIN_FILENO, F_GETFL); + if (stdin_flags == -1) { + message_error(_("Error getting the file status flags " + "from standard input: %s"), + strerror(errno)); + return true; + } + + if ((stdin_flags & O_NONBLOCK) == 0 + && fcntl(STDIN_FILENO, F_SETFL, + stdin_flags | O_NONBLOCK) != -1) + restore_stdin_flags = true; +#endif +#ifdef HAVE_POSIX_FADVISE + // It will fail if stdin is a pipe and that's fine. + (void)posix_fadvise(STDIN_FILENO, 0, 0, + opt_mode == MODE_LIST + ? POSIX_FADV_RANDOM + : POSIX_FADV_SEQUENTIAL); +#endif + return false; + } + + // Symlinks are not followed unless writing to stdout or --force + // or --keep was used. + const bool follow_symlinks + = opt_stdout || opt_force || opt_keep_original; + + // We accept only regular files if we are writing the output + // to disk too. bzip2 allows overriding this with --force but + // gzip and xz don't. + const bool reg_files_only = !opt_stdout; + + // Flags for open() + int flags = O_RDONLY | O_BINARY | O_NOCTTY; + +#ifndef TUKLIB_DOSLIKE + // Use non-blocking I/O: + // - It prevents blocking when opening FIFOs and some other + // special files, which is good if we want to accept only + // regular files. + // - It can help avoiding some race conditions with signal handling. + flags |= O_NONBLOCK; +#endif + +#if defined(O_NOFOLLOW) + if (!follow_symlinks) + flags |= O_NOFOLLOW; +#elif !defined(TUKLIB_DOSLIKE) + // Some POSIX-like systems lack O_NOFOLLOW (it's not required + // by POSIX). Check for symlinks with a separate lstat() on + // these systems. + if (!follow_symlinks) { + struct stat st; + if (lstat(pair->src_name, &st)) { + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + strerror(errno)); + return true; + + } else if (S_ISLNK(st.st_mode)) { + message_warning(_("%s: Is a symbolic link, " + "skipping"), + tuklib_mask_nonprint(pair->src_name)); + return true; + } + } +#else + // Avoid warnings. + (void)follow_symlinks; +#endif + + // Try to open the file. Signals have been blocked so EINTR shouldn't + // be possible. + pair->src_fd = open(pair->src_name, flags); + + if (pair->src_fd == -1) { + // Signals (that have a signal handler) have been blocked. + assert(errno != EINTR); + +#ifdef O_NOFOLLOW + // Give an understandable error message if the reason + // for failing was that the file was a symbolic link. + // + // Note that at least Linux, OpenBSD, Solaris, and Darwin + // use ELOOP to indicate that O_NOFOLLOW was the reason + // that open() failed. Because there may be + // directories in the pathname, ELOOP may occur also + // because of a symlink loop in the directory part. + // So ELOOP doesn't tell us what actually went wrong, + // and this stupidity went into POSIX-1.2008 too. + // + // FreeBSD associates EMLINK with O_NOFOLLOW and + // Tru64 uses ENOTSUP. We use these directly here + // and skip the lstat() call and the associated race. + // I want to hear if there are other kernels that + // fail with something else than ELOOP with O_NOFOLLOW. + bool was_symlink = false; + +# if defined(__FreeBSD__) || defined(__DragonFly__) + if (errno == EMLINK) + was_symlink = true; + +# elif defined(__digital__) && defined(__unix__) + if (errno == ENOTSUP) + was_symlink = true; + +# elif defined(__NetBSD__) + if (errno == EFTYPE) + was_symlink = true; + +# else + if (errno == ELOOP && !follow_symlinks) { + const int saved_errno = errno; + struct stat st; + if (lstat(pair->src_name, &st) == 0 + && S_ISLNK(st.st_mode)) + was_symlink = true; + + errno = saved_errno; + } +# endif + + if (was_symlink) + message_warning(_("%s: Is a symbolic link, " + "skipping"), + tuklib_mask_nonprint(pair->src_name)); + else +#endif + // Something else than O_NOFOLLOW failing + // (assuming that the race conditions didn't + // confuse us). + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + strerror(errno)); + + return true; + } + + // Stat the source file. We need the result also when we copy + // the permissions, and when unlinking. + // + // NOTE: Use stat() instead of fstat() with DJGPP, because + // then we have a better chance to get st_ino value that can + // be used in io_open_dest_real() to prevent overwriting the + // source file. +#ifdef __DJGPP__ + if (stat(pair->src_name, &pair->src_st)) + goto error_msg; +#else + if (fstat(pair->src_fd, &pair->src_st)) + goto error_msg; +#endif + + if (S_ISDIR(pair->src_st.st_mode)) { + message_warning(_("%s: Is a directory, skipping"), + tuklib_mask_nonprint(pair->src_name)); + goto error; + } + + if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { + message_warning(_("%s: Not a regular file, skipping"), + tuklib_mask_nonprint(pair->src_name)); + goto error; + } + +#ifndef TUKLIB_DOSLIKE + if (reg_files_only && !opt_force && !opt_keep_original) { + if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { + // gzip rejects setuid and setgid files even + // when --force was used. bzip2 doesn't check + // for them, but calls fchown() after fchmod(), + // and many systems automatically drop setuid + // and setgid bits there. + // + // We accept setuid and setgid files if + // --force or --keep was used. We drop these bits + // explicitly in io_copy_attr(). + message_warning(_("%s: File has setuid or " + "setgid bit set, skipping"), + tuklib_mask_nonprint(pair->src_name)); + goto error; + } + + if (pair->src_st.st_mode & S_ISVTX) { + message_warning(_("%s: File has sticky bit " + "set, skipping"), + tuklib_mask_nonprint(pair->src_name)); + goto error; + } + + if (pair->src_st.st_nlink > 1) { + message_warning(_("%s: Input file has more " + "than one hard link, skipping"), + tuklib_mask_nonprint(pair->src_name)); + goto error; + } + } + + // If it is something else than a regular file, wait until + // there is input available. This way reading from FIFOs + // will work when open() is used with O_NONBLOCK. + if (!S_ISREG(pair->src_st.st_mode)) { + signals_unblock(); + const io_wait_ret ret = io_wait(pair, -1, true); + signals_block(); + + if (ret != IO_WAIT_MORE) + goto error; + } +#endif + +#ifdef HAVE_POSIX_FADVISE + // It will fail with some special files like FIFOs but that is fine. + (void)posix_fadvise(pair->src_fd, 0, 0, + opt_mode == MODE_LIST + ? POSIX_FADV_RANDOM + : POSIX_FADV_SEQUENTIAL); +#endif + + return false; + +error_msg: + message_error(_("%s: %s"), tuklib_mask_nonprint(pair->src_name), + strerror(errno)); +error: + (void)close(pair->src_fd); + return true; +} + + +extern file_pair * +io_open_src(const char *src_name) +{ + if (src_name[0] == '\0') { + message_error(_("Empty filename, skipping")); + return NULL; + } + + // Since we have only one file open at a time, we can use + // a statically allocated structure. + static file_pair pair; + + // This implicitly also initializes src_st.st_size to zero + // which is expected to be <= 0 by default. fstat() isn't + // called when reading from standard input but src_st.st_size + // is still read. + pair = (file_pair){ + .src_name = src_name, + .dest_name = NULL, + .src_fd = -1, + .dest_fd = -1, +#ifndef TUKLIB_DOSLIKE + .dir_fd = -1, +#endif + .src_eof = false, + .src_has_seen_input = false, + .flush_needed = false, + .dest_try_sparse = false, + .dest_pending_sparse = 0, + }; + + // Block the signals, for which we have a custom signal handler, so + // that we don't need to worry about EINTR. + signals_block(); + const bool error = io_open_src_real(&pair); + signals_unblock(); + +#ifdef ENABLE_SANDBOX + if (!error) + sandbox_enable_strict_if_allowed(pair.src_fd, + user_abort_pipe[0], user_abort_pipe[1]); +#endif + + return error ? NULL : &pair; +} + + +/// \brief Closes source file of the file_pair structure +/// +/// \param pair File whose src_fd should be closed +/// \param success If true, the file will be removed from the disk if +/// closing succeeds and --keep hasn't been used. +static void +io_close_src(file_pair *pair, bool success) +{ +#ifndef TUKLIB_DOSLIKE + if (restore_stdin_flags) { + assert(pair->src_fd == STDIN_FILENO); + + restore_stdin_flags = false; + + if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) + message_error(_("Error restoring the status flags " + "to standard input: %s"), + strerror(errno)); + } +#endif + + if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { + // Close the file before possibly unlinking it. On DOS-like + // systems this is always required since unlinking will fail + // if the file is open. On POSIX systems it usually works + // to unlink open files, but in some cases it doesn't and + // one gets EBUSY in errno. + // + // xz 5.2.2 and older unlinked the file before closing it + // (except on DOS-like systems). The old code didn't handle + // EBUSY and could fail e.g. on some CIFS shares. The + // advantage of unlinking before closing is negligible + // (avoids a race between close() and stat()/lstat() and + // unlink()), so let's keep this simple. + (void)close(pair->src_fd); + + if (success && !opt_keep_original) + io_unlink(pair->src_name, &pair->src_st); + } + + return; +} + + +static bool +io_open_dest_real(file_pair *pair) +{ + if (opt_stdout || pair->src_fd == STDIN_FILENO) { + // We don't modify or free() this. + pair->dest_name = (char *)"(stdout)"; + pair->dest_fd = STDOUT_FILENO; +#ifdef TUKLIB_DOSLIKE + setmode(STDOUT_FILENO, O_BINARY); +#else + // Try to set O_NONBLOCK if it isn't already set. + // If it fails, we assume that stdout is non-blocking + // in practice. See the comments in io_open_src_real() + // for similar situation with stdin. + // + // NOTE: O_APPEND may be unset later in this function + // and it relies on stdout_flags being set here. + stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); + if (stdout_flags == -1) { + message_error(_("Error getting the file status flags " + "from standard output: %s"), + strerror(errno)); + return true; + } + + if ((stdout_flags & O_NONBLOCK) == 0 + && fcntl(STDOUT_FILENO, F_SETFL, + stdout_flags | O_NONBLOCK) != -1) + restore_stdout_flags = true; +#endif + } else { + pair->dest_name = suffix_get_dest_name(pair->src_name); + if (pair->dest_name == NULL) + return true; + +#ifndef TUKLIB_DOSLIKE + if (opt_synchronous) { + // Open the directory where the destination file will + // be created (the file descriptor is needed for + // fsync()). Do this before creating the destination + // file: + // + // - We currently have no files to clean up if + // opening the directory fails. (We aren't + // reading from stdin so there are no stdin_flags + // to restore either.) + // + // - Allocating memory with xstrdup() is safe only + // when we have nothing to clean up. + char *buf = xstrdup(pair->dest_name); + const char *dir_name = dirname(buf); + + // O_NOCTTY and O_NONBLOCK are there in case + // O_DIRECTORY is 0 and dir_name doesn't refer + // to a directory. (We opened the source file + // already but directories might have been renamed + // after the source file was opened.) + pair->dir_fd = open(dir_name, O_SEARCH | O_DIRECTORY + | O_NOCTTY | O_NONBLOCK); + if (pair->dir_fd == -1) { + // Since we did open the source file + // successfully, we should rarely get here. + // Perhaps something has been renamed or + // had its permissions changed. + // + // In an odd case, the directory has write + // and search permissions but not read + // permission (d-wx------), and O_SEARCH is + // actually O_RDONLY. Then we would be able + // to create a new file and only the directory + // syncing would be impossible. But let's be + // strict about syncing and require users to + // explicitly disable it if they don't want it. + message_error(_("%s: Opening the directory " + "failed: %s"), + tuklib_mask_nonprint(dir_name), + strerror(errno)); + free(buf); + goto error; + } + + free(buf); + } +#endif + +#ifdef __DJGPP__ + struct stat st; + if (stat(pair->dest_name, &st) == 0) { + // Check that it isn't a special file like "prn". + if (st.st_dev == -1) { + message_error("%s: Refusing to write to " + "a DOS special file", + tuklib_mask_nonprint( + pair->dest_name)); + goto error; + } + + // Check that we aren't overwriting the source file. + if (st.st_dev == pair->src_st.st_dev + && st.st_ino == pair->src_st.st_ino) { + message_error("%s: Output file is the same " + "as the input file", + tuklib_mask_nonprint( + pair->dest_name)); + goto error; + } + } +#endif + + // If --force was used, unlink the target file first. + if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { + message_error(_("%s: Cannot remove: %s"), + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); + goto error; + } + + // Open the file. + int flags = O_WRONLY | O_BINARY | O_NOCTTY + | O_CREAT | O_EXCL; +#ifndef TUKLIB_DOSLIKE + flags |= O_NONBLOCK; +#endif + const mode_t mode = S_IRUSR | S_IWUSR; + pair->dest_fd = open(pair->dest_name, flags, mode); + + if (pair->dest_fd == -1) { + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); + goto error; + } + + // We could sync dir_fd now and close it. However, performance + // can be better if this is delayed until dest_fd has been + // synced in io_sync_dest(). + } + + if (fstat(pair->dest_fd, &pair->dest_st)) { + // If fstat() really fails, we have a safe fallback here. +#if defined(__VMS) + pair->dest_st.st_ino[0] = 0; + pair->dest_st.st_ino[1] = 0; + pair->dest_st.st_ino[2] = 0; +#else + pair->dest_st.st_dev = 0; + pair->dest_st.st_ino = 0; +#endif + } +#if defined(TUKLIB_DOSLIKE) && !defined(__DJGPP__) + // Check that the output file is a regular file. We open with O_EXCL + // but that doesn't prevent open()/_open() on Windows from opening + // files like "con" or "nul". + // + // With DJGPP this check is done with stat() even before opening + // the output file. That method or a variant of it doesn't work on + // Windows because on Windows stat()/_stat64() sets st.st_mode so + // that S_ISREG(st.st_mode) will be true even for special files. + // With fstat()/_fstat64() it works. + else if (pair->dest_fd != STDOUT_FILENO + && !S_ISREG(pair->dest_st.st_mode)) { + message_error(_("%s: Destination is not a regular file"), + tuklib_mask_nonprint(pair->dest_name)); + + // dest_fd needs to be reset to -1 to keep io_close() working. + (void)close(pair->dest_fd); + pair->dest_fd = -1; + goto error; + } +#elif !defined(TUKLIB_DOSLIKE) + else if (try_sparse && opt_mode == MODE_DECOMPRESS) { + // When writing to standard output, we need to be extra + // careful: + // - It may be connected to something else than + // a regular file. + // - We aren't necessarily writing to a new empty file + // or to the end of an existing file. + // - O_APPEND may be active. + // + // TODO: I'm keeping this disabled for DOS-like systems + // for now. FAT doesn't support sparse files, but NTFS + // does, so maybe this should be enabled on Windows after + // some testing. + if (pair->dest_fd == STDOUT_FILENO) { + if (!S_ISREG(pair->dest_st.st_mode)) + return false; + + if (stdout_flags & O_APPEND) { + // Creating a sparse file is not possible + // when O_APPEND is active (it's used by + // shell's >> redirection). As I understand + // it, it is safe to temporarily disable + // O_APPEND in xz, because if someone + // happened to write to the same file at the + // same time, results would be bad anyway + // (users shouldn't assume that xz uses any + // specific block size when writing data). + // + // The write position may be something else + // than the end of the file, so we must fix + // it to start writing at the end of the file + // to imitate O_APPEND. + if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) + return false; + + // Construct the new file status flags. + // If O_NONBLOCK was set earlier in this + // function, it must be kept here too. + int flags = stdout_flags & ~O_APPEND; + if (restore_stdout_flags) + flags |= O_NONBLOCK; + + // If this fcntl() fails, we continue but won't + // try to create sparse output. The original + // flags will still be restored if needed (to + // unset O_NONBLOCK) when the file is finished. + if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1) + return false; + + // Disabling O_APPEND succeeded. Mark + // that the flags should be restored + // in io_close_dest(). (This may have already + // been set when enabling O_NONBLOCK.) + restore_stdout_flags = true; + + } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) + != pair->dest_st.st_size) { + // Writing won't start exactly at the end + // of the file. We cannot use sparse output, + // because it would probably corrupt the file. + return false; + } + } + + pair->dest_try_sparse = true; + } +#endif + + return false; + +error: +#ifndef TUKLIB_DOSLIKE + // io_close() closes pair->dir_fd but let's do it here anyway. + if (pair->dir_fd != -1) { + (void)close(pair->dir_fd); + pair->dir_fd = -1; + } +#endif + + free(pair->dest_name); + return true; +} + + +extern bool +io_open_dest(file_pair *pair) +{ + signals_block(); + const bool ret = io_open_dest_real(pair); + signals_unblock(); + return ret; +} + + +/// \brief Closes destination file of the file_pair structure +/// +/// \param pair File whose dest_fd should be closed +/// \param success If false, the file will be removed from the disk. +/// +/// \return If closing succeeds, false is returned. On error, an error +/// message is printed and true is returned. +static bool +io_close_dest(file_pair *pair, bool success) +{ +#ifndef TUKLIB_DOSLIKE + // If io_open_dest() has disabled O_APPEND, restore it here. + if (restore_stdout_flags) { + assert(pair->dest_fd == STDOUT_FILENO); + + restore_stdout_flags = false; + + if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { + message_error(_("Error restoring the O_APPEND flag " + "to standard output: %s"), + strerror(errno)); + return true; + } + } +#endif + + if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) + return false; + +#ifndef TUKLIB_DOSLIKE + // dir_fd was only used for syncing the directory. + // Error checking was done when syncing. + if (pair->dir_fd != -1) + (void)close(pair->dir_fd); +#endif + + if (close(pair->dest_fd)) { + message_error(_("%s: Closing the file failed: %s"), + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); + + // Closing destination file failed, so we cannot trust its + // contents. Get rid of junk: + io_unlink(pair->dest_name, &pair->dest_st); + free(pair->dest_name); + return true; + } + + // If the operation using this file wasn't successful, we git rid + // of the junk file. + if (!success) + io_unlink(pair->dest_name, &pair->dest_st); + + free(pair->dest_name); + + return false; +} + + +extern void +io_close(file_pair *pair, bool success) +{ + // Take care of sparseness at the end of the output file. + if (success && pair->dest_try_sparse + && pair->dest_pending_sparse > 0) { + // Seek forward one byte less than the size of the pending + // hole, then write one zero-byte. This way the file grows + // to its correct size. An alternative would be to use + // ftruncate() but that isn't portable enough (e.g. it + // doesn't work with FAT on Linux; FAT isn't that important + // since it doesn't support sparse files anyway, but we don't + // want to create corrupt files on it). + if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, + SEEK_CUR) == -1) { + message_error(_("%s: Seeking failed when trying " + "to create a sparse file: %s"), + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); + success = false; + } else { + const uint8_t zero[1] = { '\0' }; + if (io_write_buf(pair, zero, 1)) + success = false; + } + } + + signals_block(); + + if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) { + // Copy the file attributes. This may produce warnings but + // not errors so "success" isn't affected. + io_copy_attrs(pair); + + // Synchronize the file and its directory if needed. + if (opt_synchronous) + success = !io_sync_dest(pair); + } + + // Close the destination first. If it fails, we must not remove + // the source file! + if (io_close_dest(pair, success)) + success = false; + + // Close the source file, and unlink it if the operation using this + // file pair was successful and we haven't requested to keep the + // source file. + io_close_src(pair, success); + + signals_unblock(); + + return; +} + + +extern void +io_fix_src_pos(file_pair *pair, size_t rewind_size) +{ + assert(rewind_size <= IO_BUFFER_SIZE); + + if (rewind_size > 0) { + // This doesn't need to work on unseekable file descriptors, + // so just ignore possible errors. + (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); + } + + return; +} + + +extern size_t +io_read(file_pair *pair, io_buf *buf, size_t size) +{ + assert(size <= IO_BUFFER_SIZE); + + size_t pos = 0; + + while (pos < size) { + const ssize_t amount = read( + pair->src_fd, buf->u8 + pos, size - pos); + + if (amount == 0) { + pair->src_eof = true; + break; + } + + if (amount == -1) { + if (errno == EINTR) { + if (user_abort) + return SIZE_MAX; + + continue; + } + +#ifndef TUKLIB_DOSLIKE + if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { + // Disable the flush-timeout if no input has + // been seen since the previous flush and thus + // there would be nothing to flush after the + // timeout expires (avoids busy waiting). + const int timeout = pair->src_has_seen_input + ? mytime_get_flush_timeout() + : -1; + + switch (io_wait(pair, timeout, true)) { + case IO_WAIT_MORE: + continue; + + case IO_WAIT_ERROR: + return SIZE_MAX; + + case IO_WAIT_TIMEOUT: + pair->flush_needed = true; + return pos; + + default: + message_bug(); + } + } +#endif + + message_error(_("%s: Read error: %s"), + tuklib_mask_nonprint(pair->src_name), + strerror(errno)); + + return SIZE_MAX; + } + + pos += (size_t)(amount); + + if (!pair->src_has_seen_input) { + pair->src_has_seen_input = true; + mytime_set_flush_time(); + } + } + + return pos; +} + + +extern bool +io_seek_src(file_pair *pair, uint64_t pos) +{ + // Caller must not attempt to seek past the end of the input file + // (seeking to 100 in a 100-byte file is seeking to the end of + // the file, not past the end of the file, and thus that is allowed). + // + // This also validates that pos can be safely cast to off_t. + if (pos > (uint64_t)(pair->src_st.st_size)) + message_bug(); + + if (lseek(pair->src_fd, (off_t)(pos), SEEK_SET) == -1) { + message_error(_("%s: Error seeking the file: %s"), + tuklib_mask_nonprint(pair->src_name), + strerror(errno)); + return true; + } + + pair->src_eof = false; + + return false; +} + + +extern bool +io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos) +{ + // Using lseek() and read() is more portable than pread() and + // for us it is as good as real pread(). + if (io_seek_src(pair, pos)) + return true; + + const size_t amount = io_read(pair, buf, size); + if (amount == SIZE_MAX) + return true; + + if (amount != size) { + message_error(_("%s: Unexpected end of file"), + tuklib_mask_nonprint(pair->src_name)); + return true; + } + + return false; +} + + +static bool +is_sparse(const io_buf *buf) +{ + assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); + + for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) + if (buf->u64[i] != 0) + return false; + + return true; +} + + +static bool +io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) +{ + assert(size <= IO_BUFFER_SIZE); + + while (size > 0) { + const ssize_t amount = write(pair->dest_fd, buf, size); + if (amount == -1) { + if (errno == EINTR) { + if (user_abort) + return true; + + continue; + } + +#ifndef TUKLIB_DOSLIKE + if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { + if (io_wait(pair, -1, false) == IO_WAIT_MORE) + continue; + + return true; + } +#endif + +#if defined(_WIN32) && !defined(__CYGWIN__) + // On native Windows, broken pipe is reported as + // EINVAL. Don't show an error message in this case. + // Try: xz -dc bigfile.xz | head -n1 + if (errno == EINVAL + && pair->dest_fd == STDOUT_FILENO) { + // Emulate SIGPIPE by setting user_abort here. + user_abort = true; + set_exit_status(E_ERROR); + return true; + } +#endif + + // Handle broken pipe specially. gzip and bzip2 + // don't print anything on SIGPIPE. In addition, + // gzip --quiet uses exit status 2 (warning) on + // broken pipe instead of whatever raise(SIGPIPE) + // would make it return. It is there to hide "Broken + // pipe" message on some old shells (probably old + // GNU bash). + // + // We don't do anything special with --quiet, which + // is what bzip2 does too. If we get SIGPIPE, we + // will handle it like other signals by setting + // user_abort, and get EPIPE here. + if (errno != EPIPE) + message_error(_("%s: Write error: %s"), + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); + + return true; + } + + buf += (size_t)(amount); + size -= (size_t)(amount); + } + + return false; +} + + +extern bool +io_write(file_pair *pair, const io_buf *buf, size_t size) +{ + assert(size <= IO_BUFFER_SIZE); + + if (pair->dest_try_sparse) { + // Check if the block is sparse (contains only zeros). If it + // sparse, we just store the amount and return. We will take + // care of actually skipping over the hole when we hit the + // next data block or close the file. + // + // Since io_close() requires that dest_pending_sparse > 0 + // if the file ends with sparse block, we must also return + // if size == 0 to avoid doing the lseek(). + if (size == IO_BUFFER_SIZE) { + // Even if the block was sparse, treat it as non-sparse + // if the pending sparse amount is large compared to + // the size of off_t. In practice this only matters + // on 32-bit systems where off_t isn't always 64 bits. + const off_t pending_max + = (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2); + if (is_sparse(buf) && pair->dest_pending_sparse + < pending_max) { + pair->dest_pending_sparse += (off_t)(size); + return false; + } + } else if (size == 0) { + return false; + } + + // This is not a sparse block. If we have a pending hole, + // skip it now. + if (pair->dest_pending_sparse > 0) { + if (lseek(pair->dest_fd, pair->dest_pending_sparse, + SEEK_CUR) == -1) { + message_error(_("%s: Seeking failed when " + "trying to create a sparse " + "file: %s"), + tuklib_mask_nonprint( + pair->dest_name), + strerror(errno)); + return true; + } + + pair->dest_pending_sparse = 0; + } + } + + return io_write_buf(pair, buf->u8, size); +} diff --git a/src/native/external/xz/src/xz/file_io.h b/src/native/external/xz/src/xz/file_io.h new file mode 100644 index 00000000000000..9903f5a0adf867 --- /dev/null +++ b/src/native/external/xz/src/xz/file_io.h @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file file_io.h +/// \brief I/O types and functions +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. +// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) +#if BUFSIZ <= 1024 +# define IO_BUFFER_SIZE 8192 +#else +# define IO_BUFFER_SIZE (BUFSIZ & ~7U) +#endif + +#ifdef _MSC_VER + // The first one renames both "struct stat" -> "struct _stat64" + // and stat() -> _stat64(). The documentation mentions only + // "struct __stat64", not "struct _stat64", but the latter + // works too. +# define stat _stat64 +# define fstat _fstat64 +# define off_t __int64 +#endif + + +/// is_sparse() accesses the buffer as uint64_t for maximum speed. +/// The u32 and u64 members must only be access through this union +/// to avoid strict aliasing violations. Taking a pointer of u8 +/// should be fine as long as uint8_t maps to unsigned char which +/// can alias anything. +typedef union { + uint8_t u8[IO_BUFFER_SIZE]; + uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; + uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; +} io_buf; + + +typedef struct { + /// Name of the source filename (as given on the command line) or + /// pointer to static "(stdin)" when reading from standard input. + const char *src_name; + + /// Destination filename converted from src_name or pointer to static + /// "(stdout)" when writing to standard output. + char *dest_name; + + /// File descriptor of the source file + int src_fd; + + /// File descriptor of the target file + int dest_fd; + +#ifndef TUKLIB_DOSLIKE + /// File descriptor of the directory of the target file (which is + /// also the directory of the source file) + int dir_fd; +#endif + + /// True once end of the source file has been detected. + bool src_eof; + + /// For --flush-timeout: True if at least one byte has been read + /// since the previous flush or the start of the file. + bool src_has_seen_input; + + /// For --flush-timeout: True when flushing is needed. + bool flush_needed; + + /// If true, we look for long chunks of zeros and try to create + /// a sparse file. + bool dest_try_sparse; + + /// This is used only if dest_try_sparse is true. This holds the + /// number of zero bytes we haven't written out, because we plan + /// to make that byte range a sparse chunk. + off_t dest_pending_sparse; + + /// Stat of the source file. + struct stat src_st; + + /// Stat of the destination file. + struct stat dest_st; + +} file_pair; + + +/// \brief Initialize the I/O module +extern void io_init(void); + + +#ifndef TUKLIB_DOSLIKE +/// \brief Write a byte to user_abort_pipe[1] +/// +/// This is called from a signal handler. +extern void io_write_to_user_abort_pipe(void); +#endif + + +/// \brief Disable creation of sparse files when decompressing +extern void io_no_sparse(void); + + +/// \brief Open the source file +extern file_pair *io_open_src(const char *src_name); + + +/// \brief Open the destination file +extern bool io_open_dest(file_pair *pair); + + +/// \brief Closes the file descriptors and frees possible allocated memory +/// +/// The success argument determines if source or destination file gets +/// unlinked: +/// - false: The destination file is unlinked. +/// - true: The source file is unlinked unless writing to stdout or --keep +/// was used. +extern void io_close(file_pair *pair, bool success); + + +/// \brief Reads from the source file to a buffer +/// +/// \param pair File pair having the source file open for reading +/// \param buf Destination buffer to hold the read data +/// \param size Size of the buffer; must be at most IO_BUFFER_SIZE +/// +/// \return On success, number of bytes read is returned. On end of +/// file zero is returned and pair->src_eof set to true. +/// On error, SIZE_MAX is returned and error message printed. +extern size_t io_read(file_pair *pair, io_buf *buf, size_t size); + + +/// \brief Fix the position in src_fd +/// +/// This is used when --single-thream has been specified and decompression +/// is successful. If the input file descriptor supports seeking, this +/// function fixes the input position to point to the next byte after the +/// decompressed stream. +/// +/// \param pair File pair having the source file open for reading +/// \param rewind_size How many bytes of extra have been read i.e. +/// how much to seek backwards. +extern void io_fix_src_pos(file_pair *pair, size_t rewind_size); + + +/// \brief Seek to the given absolute position in the source file +/// +/// This calls lseek() and also clears pair->src_eof. +/// +/// \param pair Seekable source file +/// \param pos Offset relative to the beginning of the file, +/// from which the data should be read. +/// +/// \return On success, false is returned. On error, error message +/// is printed and true is returned. +extern bool io_seek_src(file_pair *pair, uint64_t pos); + + +/// \brief Read from source file from given offset to a buffer +/// +/// This is remotely similar to standard pread(). This uses lseek() though, +/// so the read offset is changed on each call. +/// +/// \param pair Seekable source file +/// \param buf Destination buffer +/// \param size Amount of data to read +/// \param pos Offset relative to the beginning of the file, +/// from which the data should be read. +/// +/// \return On success, false is returned. On error, error message +/// is printed and true is returned. +extern bool io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos); + + +/// \brief Writes a buffer to the destination file +/// +/// \param pair File pair having the destination file open for writing +/// \param buf Buffer containing the data to be written +/// \param size Size of the buffer; must be at most IO_BUFFER_SIZE +/// +/// \return On success, false is returned. On error, error message +/// is printed and true is returned. +extern bool io_write(file_pair *pair, const io_buf *buf, size_t size); diff --git a/src/native/external/xz/src/xz/hardware.c b/src/native/external/xz/src/xz/hardware.c new file mode 100644 index 00000000000000..3a1a709f118183 --- /dev/null +++ b/src/native/external/xz/src/xz/hardware.c @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware.c +/// \brief Detection of available hardware resources +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#ifdef HAVE_GETRLIMIT +# include +#endif + + +/// Maximum number of worker threads. This can be set with +/// the --threads=NUM command line option. +static uint32_t threads_max; + +/// True when the number of threads is automatically determined based +/// on the available hardware threads. +static bool threads_are_automatic = false; + +/// If true, then try to use multi-threaded mode (if memlimit allows) +/// even if only one thread was requested explicitly (-T+1). +static bool use_mt_mode_with_one_thread = false; + +/// Memory usage limit for compression +static uint64_t memlimit_compress = 0; + +/// Memory usage limit for decompression +static uint64_t memlimit_decompress = 0; + +/// Default memory usage for multithreaded modes: +/// +/// - Default value for --memlimit-compress when automatic number of threads +/// is used. However, if the limit wouldn't allow even one thread then +/// the limit is ignored in coder.c and one thread will be used anyway. +/// This mess is a compromise: we wish to prevent -T0 from using too +/// many threads but we also don't want xz to give an error due to +/// a memlimit that the user didn't explicitly set. +/// +/// - Default value for --memlimit-mt-decompress +/// +/// This value is calculated in hardware_init() and cannot be changed later. +static uint64_t memlimit_mt_default; + +/// Memory usage limit for multithreaded decompression. This is a soft limit: +/// if reducing the number of threads to one isn't enough to keep memory +/// usage below this limit, then one thread is used and this limit is ignored. +/// memlimit_decompress is still obeyed. +/// +/// This can be set with --memlimit-mt-decompress. The default value for +/// this is memlimit_mt_default. +static uint64_t memlimit_mtdec; + +/// Total amount of physical RAM +static uint64_t total_ram; + + +extern void +hardware_threads_set(uint32_t n) +{ + // Reset these to false first and set them to true when appropriate. + threads_are_automatic = false; + use_mt_mode_with_one_thread = false; + + if (n == 0) { + // Automatic number of threads was requested. + // If there is only one hardware thread, multi-threaded + // mode will still be used if memory limit allows. + threads_are_automatic = true; + use_mt_mode_with_one_thread = true; + + // If threading support was enabled at build time, + // use the number of available CPU cores. Otherwise + // use one thread since disabling threading support + // omits lzma_cputhreads() from liblzma. +#ifdef MYTHREAD_ENABLED + threads_max = lzma_cputhreads(); + if (threads_max == 0) + threads_max = 1; +#else + threads_max = 1; +#endif + } else if (n == UINT32_MAX) { + use_mt_mode_with_one_thread = true; + threads_max = 1; + } else { + threads_max = n; + } + + return; +} + + +extern uint32_t +hardware_threads_get(void) +{ + return threads_max; +} + + +extern bool +hardware_threads_is_mt(void) +{ +#ifdef MYTHREAD_ENABLED + return threads_max > 1 || use_mt_mode_with_one_thread; +#else + return false; +#endif +} + + +extern void +hardware_memlimit_set(uint64_t new_memlimit, + bool set_compress, bool set_decompress, bool set_mtdec, + bool is_percentage) +{ + if (is_percentage) { + assert(new_memlimit > 0); + assert(new_memlimit <= 100); + new_memlimit = (uint32_t)new_memlimit * total_ram / 100; + } + + if (set_compress) { + memlimit_compress = new_memlimit; + +#if SIZE_MAX == UINT32_MAX + // FIXME? + // + // When running a 32-bit xz on a system with a lot of RAM and + // using a percentage-based memory limit, the result can be + // bigger than the 32-bit address space. Limiting the limit + // below SIZE_MAX for compression (not decompression) makes + // xz lower the compression settings (or number of threads) + // to a level that *might* work. In practice it has worked + // when using a 64-bit kernel that gives full 4 GiB address + // space to 32-bit programs. In other situations this might + // still be too high, like 32-bit kernels that may give much + // less than 4 GiB to a single application. + // + // So this is an ugly hack but I will keep it here while + // it does more good than bad. + // + // Use a value less than SIZE_MAX so that there's some room + // for the xz program and so on. Don't use 4000 MiB because + // it could look like someone mixed up base-2 and base-10. +#ifdef __mips__ + // For MIPS32, due to architectural peculiarities, + // the limit is even lower. + const uint64_t limit_max = UINT64_C(2000) << 20; +#else + const uint64_t limit_max = UINT64_C(4020) << 20; +#endif + + // UINT64_MAX is a special case for the string "max" so + // that has to be handled specially. + if (memlimit_compress != UINT64_MAX + && memlimit_compress > limit_max) + memlimit_compress = limit_max; +#endif + } + + if (set_decompress) + memlimit_decompress = new_memlimit; + + if (set_mtdec) + memlimit_mtdec = new_memlimit; + + return; +} + + +extern uint64_t +hardware_memlimit_get(enum operation_mode mode) +{ + // 0 is a special value that indicates the default. + // It disables the limit in single-threaded mode. + // + // NOTE: For multithreaded decompression, this is the hard limit + // (memlimit_stop). hardware_memlimit_mtdec_get() gives the + // soft limit (memlimit_threaded). + const uint64_t memlimit = mode == MODE_COMPRESS + ? memlimit_compress : memlimit_decompress; + return memlimit != 0 ? memlimit : UINT64_MAX; +} + + +extern uint64_t +hardware_memlimit_mtenc_get(void) +{ + return hardware_memlimit_mtenc_is_default() + ? memlimit_mt_default + : hardware_memlimit_get(MODE_COMPRESS); +} + + +extern bool +hardware_memlimit_mtenc_is_default(void) +{ + return memlimit_compress == 0 && threads_are_automatic; +} + + +extern uint64_t +hardware_memlimit_mtdec_get(void) +{ + uint64_t m = memlimit_mtdec != 0 + ? memlimit_mtdec + : memlimit_mt_default; + + // Cap the value to memlimit_decompress if it has been specified. + // This is nice for --info-memory. It wouldn't be needed for liblzma + // since it does this anyway. + if (memlimit_decompress != 0 && m > memlimit_decompress) + m = memlimit_decompress; + + return m; +} + + +/// Helper for hardware_memlimit_show() to print one human-readable info line. +static void +memlimit_show(const char *str, size_t str_columns, uint64_t value) +{ + // Calculate the field width so that str will be padded to take + // str_columns on the terminal. + // + // NOTE: If the string is invalid, this will be -1. Using -1 as + // the field width is fine here so it's not handled specially. + const int fw = tuklib_mbstr_fw(str, (int)(str_columns)); + + // The memory usage limit is considered to be disabled if value + // is 0 or UINT64_MAX. This might get a bit more complex once there + // is threading support. See the comment in hardware_memlimit_get(). + if (value == 0 || value == UINT64_MAX) + printf(" %-*s %s\n", fw, str, _("Disabled")); + else + printf(" %-*s %s MiB (%s B)\n", fw, str, + uint64_to_str(round_up_to_mib(value), 0), + uint64_to_str(value, 1)); + + return; +} + + +extern void +hardware_memlimit_show(void) +{ + uint32_t cputhreads = 1; +#ifdef MYTHREAD_ENABLED + cputhreads = lzma_cputhreads(); + if (cputhreads == 0) + cputhreads = 1; +#endif + + if (opt_robot) { + printf("%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu32 "\n", + total_ram, + memlimit_compress, + memlimit_decompress, + hardware_memlimit_mtdec_get(), + memlimit_mt_default, + cputhreads); + } else { + const char *msgs[] = { + _("Amount of physical memory (RAM):"), + _("Number of processor threads:"), + _("Compression:"), + _("Decompression:"), + _("Multi-threaded decompression:"), + _("Default for -T0:"), + }; + + size_t width_max = 1; + for (unsigned i = 0; i < ARRAY_SIZE(msgs); ++i) { + size_t w = tuklib_mbstr_width(msgs[i], NULL); + + // When debugging, catch invalid strings with + // an assertion. Otherwise fallback to 1 so + // that the columns just won't be aligned. + assert(w != (size_t)-1); + if (w == (size_t)-1) + w = 1; + + if (width_max < w) + width_max = w; + } + + puts(_("Hardware information:")); + memlimit_show(msgs[0], width_max, total_ram); + printf(" %-*s %" PRIu32 "\n", + tuklib_mbstr_fw(msgs[1], (int)(width_max)), + msgs[1], cputhreads); + + putchar('\n'); + puts(_("Memory usage limits:")); + memlimit_show(msgs[2], width_max, memlimit_compress); + memlimit_show(msgs[3], width_max, memlimit_decompress); + memlimit_show(msgs[4], width_max, + hardware_memlimit_mtdec_get()); + memlimit_show(msgs[5], width_max, memlimit_mt_default); + } + + tuklib_exit(E_SUCCESS, E_ERROR, message_verbosity_get() != V_SILENT); +} + + +extern void +hardware_init(void) +{ + // Get the amount of RAM. If we cannot determine it, + // use the assumption defined by the configure script. + total_ram = lzma_physmem(); + if (total_ram == 0) + total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024; + + // FIXME? There may be better methods to determine the default value. + // One Linux-specific suggestion is to use MemAvailable from + // /proc/meminfo as the starting point. + memlimit_mt_default = total_ram / 4; + +#ifdef HAVE_GETRLIMIT + // Try to set the default multithreaded memory usage limit so that + // we won't exceed resource limits. Exceeding the limits would result + // in allocation failures, which currently make liblzma and xz fail + // (instead of continuing by reducing the number of threads). + const int resources[] = { + RLIMIT_DATA, +# ifdef RLIMIT_AS + RLIMIT_AS, // OpenBSD 7.8 doesn't have RLIMIT_AS. +# endif +# if defined(RLIMIT_VMEM) && RLIMIT_VMEM != RLIMIT_AS + RLIMIT_VMEM, // For Solaris. On FreeBSD this is an alias. +# endif + }; + + // The resource limits cannot be passed to liblzma directly; + // some margin is required: + // - The memory usage limit counts only liblzma's memory usage, + // but xz itself needs some memory (including gettext usage etc.). + // - Memory allocation has some overhead. + // - Address space limit counts code size too. + // + // The following value is a guess based on quick testing on Linux. + const rlim_t margin = 64 << 20; + + for (size_t i = 0; i < ARRAY_SIZE(resources); ++i) { + // glibc: When GNU extensions are enabled, + // declares getrlimit() so that the first argument is an enum + // instead of int as in POSIX. GCC and Clang use unsigned int + // for enums when possible, so a sign conversion occurs when + // resources[i] is convert to the enum type. Clang warns about + // this with -Wsign-conversion but GCC doesn't. +#ifdef __clang__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wsign-conversion" +#endif + // RLIM_SAVED_* might be used on some 32-bit OSes + // (AIX at least) when the limit doesn't fit in a 32-bit + // unsigned integer. Thus, for us these are the same thing + // as no limit at all. + struct rlimit rl; + if (getrlimit(resources[i], &rl) == 0 + && rl.rlim_cur != RLIM_INFINITY + && rl.rlim_cur != RLIM_SAVED_CUR + && rl.rlim_cur != RLIM_SAVED_MAX) { +#ifdef __clang__ +# pragma GCC diagnostic pop +#endif + // Subtract the margin from the current resource + // limit, but avoid negative results. Avoid also 0 + // because hardware_memlimit_show() (--info-memory) + // treats it specially. In practice, 1 byte is + // effectively 0 anyway. + // + // SUSv2 and POSIX.1-2024 require rlimit_t to be + // unsigned. A cast is needed to silence a compiler + // warning still because, for historical reasons, + // rlim_t is intentionally signed on FreeBSD 14. + const uint64_t rl_with_margin = rl.rlim_cur > margin + ? (uint64_t)(rl.rlim_cur - margin) : 1; + + // Lower the memory usage limit if needed. + if (memlimit_mt_default > rl_with_margin) + memlimit_mt_default = rl_with_margin; + } + } +#endif + +#if SIZE_MAX == UINT32_MAX + // A too high value may cause 32-bit xz to run out of address space. + // Use a conservative maximum value here. A few typical address space + // sizes with Linux: + // - x86-64 with 32-bit xz: 4 GiB + // - x86: 3 GiB + // - MIPS32: 2 GiB + const size_t mem_ceiling = 1400U << 20; + if (memlimit_mt_default > mem_ceiling) + memlimit_mt_default = mem_ceiling; +#endif + + // Enable threaded mode by default. xz 5.4.x and older + // used single-threaded mode by default. + hardware_threads_set(0); + + return; +} diff --git a/src/native/external/xz/src/xz/hardware.h b/src/native/external/xz/src/xz/hardware.h new file mode 100644 index 00000000000000..25b351e32b195f --- /dev/null +++ b/src/native/external/xz/src/xz/hardware.h @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware.h +/// \brief Detection of available hardware resources +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +/// Initialize some hardware-specific variables, which are needed by other +/// hardware_* functions. +extern void hardware_init(void); + + +/// Set the maximum number of worker threads. +/// A special value of UINT32_MAX sets one thread in multi-threaded mode. +extern void hardware_threads_set(uint32_t threadlimit); + +/// Get the maximum number of worker threads. +extern uint32_t hardware_threads_get(void); + +/// Returns true if multithreaded mode should be used for .xz compression. +/// This can be true even if the number of threads is one. +extern bool hardware_threads_is_mt(void); + + +/// Set the memory usage limit. There are separate limits for compression, +/// decompression (also includes --list), and multithreaded decompression. +/// Any combination of these can be set with a single call to this function. +/// Zero indicates resetting the limit back to the defaults. +/// The limit can also be set as a percentage of installed RAM; the +/// percentage must be in the range [1, 100]. +extern void hardware_memlimit_set(uint64_t new_memlimit, + bool set_compress, bool set_decompress, bool set_mtdec, + bool is_percentage); + +/// Get the current memory usage limit for compression or decompression. +/// This is a hard limit that will not be exceeded. This is obeyed in +/// both single-threaded and multithreaded modes. +extern uint64_t hardware_memlimit_get(enum operation_mode mode); + +/// This returns a system-specific default value if all of the following +/// conditions are true: +/// +/// - An automatic number of threads was requested (--threads=0). +/// +/// - --memlimit-compress wasn't used or it was reset to the default +/// value by setting it to 0. +/// +/// Otherwise this is identical to hardware_memlimit_get(MODE_COMPRESS). +/// +/// The idea is to keep automatic thread count reasonable so that too +/// high memory usage is avoided and, with 32-bit xz, running out of +/// address space is avoided. +extern uint64_t hardware_memlimit_mtenc_get(void); + +/// Returns true if the value returned by hardware_memlimit_mtenc_get() is +/// a system-specific default value. coder.c uses this to ignore the default +/// memlimit in case it's too small even for a single thread in multithreaded +/// mode. This way the default limit will never make xz fail or affect the +/// compressed output; it will only make xz reduce the number of threads. +extern bool hardware_memlimit_mtenc_is_default(void); + +/// Get the current memory usage limit for multithreaded decompression. +/// This is only used to reduce the number of threads. This limit can be +/// exceeded if the number of threads are reduce to one. Then the value +/// from hardware_memlimit_get() will be honored like in single-threaded mode. +extern uint64_t hardware_memlimit_mtdec_get(void); + +/// Display the amount of RAM and memory usage limits and exit. +tuklib_attr_noreturn +extern void hardware_memlimit_show(void); diff --git a/src/native/external/xz/src/xz/list.c b/src/native/external/xz/src/xz/list.c new file mode 100644 index 00000000000000..210f23ad1baa5b --- /dev/null +++ b/src/native/external/xz/src/xz/list.c @@ -0,0 +1,1357 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.c +/// \brief Listing information about .xz files +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include "tuklib_integer.h" + + +/// Information about a .xz file +typedef struct { + /// Combined Index of all Streams in the file + lzma_index *idx; + + /// Total amount of Stream Padding + uint64_t stream_padding; + + /// Highest memory usage so far + uint64_t memusage_max; + + /// True if all Blocks so far have Compressed Size and + /// Uncompressed Size fields + bool all_have_sizes; + + /// Oldest XZ Utils version that will decompress the file + uint32_t min_version; + +} xz_file_info; + +#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } + + +/// Information about a .xz Block +typedef struct { + /// Size of the Block Header + uint32_t header_size; + + /// A few of the Block Flags as a string + char flags[3]; + + /// Size of the Compressed Data field in the Block + lzma_vli compressed_size; + + /// Decoder memory usage for this Block + uint64_t memusage; + + /// The filter chain of this Block in human-readable form + char *filter_chain; + +} block_header_info; + +#define BLOCK_HEADER_INFO_INIT { .filter_chain = NULL } +#define block_header_info_end(bhi) free((bhi)->filter_chain) + + +/// Strings ending in a colon. These are used for lines like +/// " Foo: 123 MiB". These are grouped because translated strings +/// may have different maximum string length, and we want to pad all +/// strings so that the values are aligned nicely. +static const char *colon_strs[] = { + N_("Streams:"), + N_("Blocks:"), + N_("Compressed size:"), + N_("Uncompressed size:"), + N_("Ratio:"), + N_("Check:"), + N_("Stream Padding:"), + N_("Memory needed:"), + N_("Sizes in headers:"), + // This won't be aligned because it's so long: + //N_("Minimum XZ Utils version:"), + N_("Number of files:"), +}; + +/// Enum matching the above strings. +enum { + COLON_STR_STREAMS, + COLON_STR_BLOCKS, + COLON_STR_COMPRESSED_SIZE, + COLON_STR_UNCOMPRESSED_SIZE, + COLON_STR_RATIO, + COLON_STR_CHECK, + COLON_STR_STREAM_PADDING, + COLON_STR_MEMORY_NEEDED, + COLON_STR_SIZES_IN_HEADERS, + //COLON_STR_MINIMUM_XZ_VERSION, + COLON_STR_NUMBER_OF_FILES, +}; + +/// Field widths to use with printf to pad the strings to use the same number +/// of columns on a terminal. +static int colon_strs_fw[ARRAY_SIZE(colon_strs)]; + +/// Convenience macro to get the translated string and its field width +/// using a COLON_STR_foo enum. +#define COLON_STR(num) colon_strs_fw[num], _(colon_strs[num]) + + +/// Column headings +static struct { + /// Table column heading string + const char *str; + + /// Number of terminal-columns to use for this table-column. + /// If a translated string is longer than the initial value, + /// this value will be increased in init_headings(). + int columns; + + /// Field width to use for printf() to pad "str" to use "columns" + /// number of columns on a terminal. This is calculated in + /// init_headings(). + int fw; + +} headings[] = { + { N_("Stream"), 6, 0 }, + { N_("Block"), 9, 0 }, + { N_("Blocks"), 9, 0 }, + { N_("CompOffset"), 15, 0 }, + { N_("UncompOffset"), 15, 0 }, + { N_("CompSize"), 15, 0 }, + { N_("UncompSize"), 15, 0 }, + { N_("TotalSize"), 15, 0 }, + { N_("Ratio"), 5, 0 }, + { N_("Check"), 10, 0 }, + { N_("CheckVal"), 1, 0 }, + { N_("Padding"), 7, 0 }, + { N_("Header"), 5, 0 }, + { N_("Flags"), 2, 0 }, + { N_("MemUsage"), 7 + 4, 0 }, // +4 is for " MiB" + { N_("Filters"), 1, 0 }, +}; + +/// Enum matching the above strings. +enum { + HEADING_STREAM, + HEADING_BLOCK, + HEADING_BLOCKS, + HEADING_COMPOFFSET, + HEADING_UNCOMPOFFSET, + HEADING_COMPSIZE, + HEADING_UNCOMPSIZE, + HEADING_TOTALSIZE, + HEADING_RATIO, + HEADING_CHECK, + HEADING_CHECKVAL, + HEADING_PADDING, + HEADING_HEADERSIZE, + HEADING_HEADERFLAGS, + HEADING_MEMUSAGE, + HEADING_FILTERS, +}; + +#define HEADING_STR(num) headings[num].fw, _(headings[num].str) + + +/// Check ID to string mapping +static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = { + // TRANSLATORS: Indicates that there is no integrity check. + // This string is used in tables. In older xz version this + // string was limited to ten columns in a fixed-width font, but + // nowadays there is no strict length restriction anymore. + N_("None"), + "CRC32", + // TRANSLATORS: Indicates that integrity check name is not known, + // but the Check ID is known (here 2). In older xz version these + // strings were limited to ten columns in a fixed-width font, but + // nowadays there is no strict length restriction anymore. + N_("Unknown-2"), + N_("Unknown-3"), + "CRC64", + N_("Unknown-5"), + N_("Unknown-6"), + N_("Unknown-7"), + N_("Unknown-8"), + N_("Unknown-9"), + "SHA-256", + N_("Unknown-11"), + N_("Unknown-12"), + N_("Unknown-13"), + N_("Unknown-14"), + N_("Unknown-15"), +}; + +/// Buffer size for get_check_names(). This may be a bit ridiculous, +/// but at least it's enough if some language needs many multibyte chars. +#define CHECKS_STR_SIZE 1024 + + +/// Value of the Check field as hexadecimal string. +/// This is set by parse_check_value(). +static char check_value[2 * LZMA_CHECK_SIZE_MAX + 1]; + + +/// Totals that are displayed if there was more than one file. +/// The "files" counter is also used in print_info_adv() to show +/// the file number. +static struct { + uint64_t files; + uint64_t streams; + uint64_t blocks; + uint64_t compressed_size; + uint64_t uncompressed_size; + uint64_t stream_padding; + uint64_t memusage_max; + uint32_t checks; + uint32_t min_version; + bool all_have_sizes; +} totals = { 0, 0, 0, 0, 0, 0, 0, 0, 50000002, true }; + + +/// Initialize colon_strs_fw[]. +static void +init_colon_strs(void) +{ + // Lengths of translated strings as bytes. + size_t lens[ARRAY_SIZE(colon_strs)]; + + // Lengths of translated strings as columns. + size_t widths[ARRAY_SIZE(colon_strs)]; + + // Maximum number of columns needed by a translated string. + size_t width_max = 0; + + for (unsigned i = 0; i < ARRAY_SIZE(colon_strs); ++i) { + widths[i] = tuklib_mbstr_width(_(colon_strs[i]), &lens[i]); + + // If debugging is enabled, catch invalid strings with + // an assertion. However, when not debugging, use the + // byte count as the fallback width. This shouldn't + // ever happen unless there is a bad string in the + // translations, but in such case I guess it's better + // to try to print something useful instead of failing + // completely. + assert(widths[i] != (size_t)-1); + if (widths[i] == (size_t)-1) + widths[i] = lens[i]; + + if (widths[i] > width_max) + width_max = widths[i]; + } + + // Calculate the field width for printf("%*s") so that the strings + // will use width_max columns on a terminal. + for (unsigned i = 0; i < ARRAY_SIZE(colon_strs); ++i) + colon_strs_fw[i] = (int)(lens[i] + width_max - widths[i]); + + return; +} + + +/// Initialize headings[]. +static void +init_headings(void) +{ + // Before going through the heading strings themselves, treat + // the Check heading specially: Look at the widths of the various + // check names and increase the width of the Check column if needed. + // The width of the heading name "Check" will then be handled normally + // with other heading names in the second loop in this function. + for (unsigned i = 0; i < ARRAY_SIZE(check_names); ++i) { + size_t len; + size_t w = tuklib_mbstr_width(_(check_names[i]), &len); + + // Error handling like in init_colon_strs(). + assert(w != (size_t)-1); + if (w == (size_t)-1) + w = len; + + // If the translated string is wider than the minimum width + // set at compile time, increase the width. + if ((size_t)(headings[HEADING_CHECK].columns) < w) + headings[HEADING_CHECK].columns = (int)w; + } + + for (unsigned i = 0; i < ARRAY_SIZE(headings); ++i) { + size_t len; + size_t w = tuklib_mbstr_width(_(headings[i].str), &len); + + // Error handling like in init_colon_strs(). + assert(w != (size_t)-1); + if (w == (size_t)-1) + w = len; + + // If the translated string is wider than the minimum width + // set at compile time, increase the width. + if ((size_t)(headings[i].columns) < w) + headings[i].columns = (int)w; + + // Calculate the field width for printf("%*s") so that + // the string uses .columns number of columns on a terminal. + headings[i].fw = (int)(len + (size_t)headings[i].columns - w); + } + + return; +} + + +/// Initialize the printf field widths that are needed to get nicely aligned +/// output with translated strings. +static void +init_field_widths(void) +{ + init_colon_strs(); + init_headings(); + return; +} + + +/// Convert XZ Utils version number to a string. +static const char * +xz_ver_to_str(uint32_t ver) +{ + static char buf[32]; + + unsigned int major = ver / 10000000U; + ver -= major * 10000000U; + + unsigned int minor = ver / 10000U; + ver -= minor * 10000U; + + unsigned int patch = ver / 10U; + ver -= patch * 10U; + + const char *stability = ver == 0 ? "alpha" : ver == 1 ? "beta" : ""; + + snprintf(buf, sizeof(buf), "%u.%u.%u%s", + major, minor, patch, stability); + return buf; +} + + +/// \brief Parse the Index(es) from the given .xz file +/// +/// \param xfi Pointer to structure where the decoded information +/// is stored. +/// \param pair Input file +/// +/// \return On success, false is returned. On error, true is returned. +/// +static bool +parse_indexes(xz_file_info *xfi, file_pair *pair) +{ + if (pair->src_st.st_size <= 0) { + message_error(_("%s: File is empty"), + tuklib_mask_nonprint(pair->src_name)); + return true; + } + + if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { + message_error(_("%s: Too small to be a valid .xz file"), + tuklib_mask_nonprint(pair->src_name)); + return true; + } + + io_buf buf; + lzma_stream strm = LZMA_STREAM_INIT; + lzma_index *idx = NULL; + + lzma_ret ret = lzma_file_info_decoder(&strm, &idx, + hardware_memlimit_get(MODE_LIST), + (uint64_t)(pair->src_st.st_size)); + if (ret != LZMA_OK) { + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(ret)); + return true; + } + + while (true) { + if (strm.avail_in == 0) { + strm.next_in = buf.u8; + strm.avail_in = io_read(pair, &buf, IO_BUFFER_SIZE); + if (strm.avail_in == SIZE_MAX) + goto error; + } + + ret = lzma_code(&strm, LZMA_RUN); + + switch (ret) { + case LZMA_OK: + break; + + case LZMA_SEEK_NEEDED: + // liblzma won't ask us to seek past the known size + // of the input file. + assert(strm.seek_pos + <= (uint64_t)(pair->src_st.st_size)); + if (io_seek_src(pair, strm.seek_pos)) + goto error; + + // avail_in must be zero so that we will read new + // input. + strm.avail_in = 0; + break; + + case LZMA_STREAM_END: { + lzma_end(&strm); + xfi->idx = idx; + + // Calculate xfi->stream_padding. + lzma_index_iter iter; + lzma_index_iter_init(&iter, xfi->idx); + while (!lzma_index_iter_next(&iter, + LZMA_INDEX_ITER_STREAM)) + xfi->stream_padding += iter.stream.padding; + + return false; + } + + default: + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(ret)); + + // If the error was too low memory usage limit, + // show also how much memory would have been needed. + if (ret == LZMA_MEMLIMIT_ERROR) + message_mem_needed(V_ERROR, + lzma_memusage(&strm)); + + goto error; + } + } + +error: + lzma_end(&strm); + return true; +} + + +/// \brief Parse the Block Header +/// +/// The result is stored into *bhi. The caller takes care of initializing it. +/// +/// \return False on success, true on error. +static bool +parse_block_header(file_pair *pair, const lzma_index_iter *iter, + block_header_info *bhi, xz_file_info *xfi) +{ +#if IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX +# error IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX +#endif + + // Get the whole Block Header with one read, but don't read past + // the end of the Block (or even its Check field). + const uint32_t size = my_min(iter->block.total_size + - lzma_check_size(iter->stream.flags->check), + LZMA_BLOCK_HEADER_SIZE_MAX); + io_buf buf; + if (io_pread(pair, &buf, size, iter->block.compressed_file_offset)) + return true; + + // Zero would mean Index Indicator and thus not a valid Block. + if (buf.u8[0] == 0) + goto data_error; + + // Initialize the block structure and decode Block Header Size. + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + lzma_block block; + block.version = 0; + block.check = iter->stream.flags->check; + block.filters = filters; + + block.header_size = lzma_block_header_size_decode(buf.u8[0]); + if (block.header_size > size) + goto data_error; + + // Decode the Block Header. + switch (lzma_block_header_decode(&block, NULL, buf.u8)) { + case LZMA_OK: + break; + + case LZMA_OPTIONS_ERROR: + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(LZMA_OPTIONS_ERROR)); + return true; + + case LZMA_DATA_ERROR: + goto data_error; + + default: + message_bug(); + } + + // Check the Block Flags. These must be done before calling + // lzma_block_compressed_size(), because it overwrites + // block.compressed_size. + // + // NOTE: If you add new characters here, update the minimum number of + // columns in headings[HEADING_HEADERFLAGS] to match the number of + // characters used here. + bhi->flags[0] = block.compressed_size != LZMA_VLI_UNKNOWN + ? 'c' : '-'; + bhi->flags[1] = block.uncompressed_size != LZMA_VLI_UNKNOWN + ? 'u' : '-'; + bhi->flags[2] = '\0'; + + // Collect information if all Blocks have both Compressed Size + // and Uncompressed Size fields. They can be useful e.g. for + // multi-threaded decompression so it can be useful to know it. + xfi->all_have_sizes &= block.compressed_size != LZMA_VLI_UNKNOWN + && block.uncompressed_size != LZMA_VLI_UNKNOWN; + + // Validate or set block.compressed_size. + switch (lzma_block_compressed_size(&block, + iter->block.unpadded_size)) { + case LZMA_OK: + // Validate also block.uncompressed_size if it is present. + // If it isn't present, there's no need to set it since + // we aren't going to actually decompress the Block; if + // we were decompressing, then we should set it so that + // the Block decoder could validate the Uncompressed Size + // that was stored in the Index. + if (block.uncompressed_size == LZMA_VLI_UNKNOWN + || block.uncompressed_size + == iter->block.uncompressed_size) + break; + + // If the above fails, the file is corrupt so + // LZMA_DATA_ERROR is a good error code. + FALLTHROUGH; + + case LZMA_DATA_ERROR: + // Free the memory allocated by lzma_block_header_decode(). + lzma_filters_free(filters, NULL); + goto data_error; + + default: + message_bug(); + } + + // Copy the known sizes. + bhi->header_size = block.header_size; + bhi->compressed_size = block.compressed_size; + + // Calculate the decoder memory usage and update the maximum + // memory usage of this Block. + bhi->memusage = lzma_raw_decoder_memusage(filters); + if (xfi->memusage_max < bhi->memusage) + xfi->memusage_max = bhi->memusage; + + // Determine the minimum XZ Utils version that supports this Block. + // - RISC-V filter needs 5.6.0. + // + // - ARM64 filter needs 5.4.0. + // + // - 5.0.0 doesn't support empty LZMA2 streams and thus empty + // Blocks that use LZMA2. This decoder bug was fixed in 5.0.2. + if (xfi->min_version < 50060002U) { + for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { + if (filters[i].id == LZMA_FILTER_RISCV) { + xfi->min_version = 50060002U; + break; + } + } + } + + if (xfi->min_version < 50040002U) { + for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { + if (filters[i].id == LZMA_FILTER_ARM64) { + xfi->min_version = 50040002U; + break; + } + } + } + + if (xfi->min_version < 50000022U) { + size_t i = 0; + while (filters[i + 1].id != LZMA_VLI_UNKNOWN) + ++i; + + if (filters[i].id == LZMA_FILTER_LZMA2 + && iter->block.uncompressed_size == 0) + xfi->min_version = 50000022U; + } + + // Convert the filter chain to human readable form. + const lzma_ret str_ret = lzma_str_from_filters( + &bhi->filter_chain, filters, + LZMA_STR_DECODER | LZMA_STR_GETOPT_LONG, NULL); + + // Free the memory allocated by lzma_block_header_decode(). + lzma_filters_free(filters, NULL); + + // Check if the stringification succeeded. + if (str_ret != LZMA_OK) { + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(str_ret)); + return true; + } + + return false; + +data_error: + // Show the error message. + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(LZMA_DATA_ERROR)); + return true; +} + + +/// \brief Parse the Check field and put it into check_value[] +/// +/// \return False on success, true on error. +static bool +parse_check_value(file_pair *pair, const lzma_index_iter *iter) +{ + // Don't read anything from the file if there is no integrity Check. + if (iter->stream.flags->check == LZMA_CHECK_NONE) { + snprintf(check_value, sizeof(check_value), "---"); + return false; + } + + // Locate and read the Check field. + const uint32_t size = lzma_check_size(iter->stream.flags->check); + const uint64_t offset = iter->block.compressed_file_offset + + iter->block.total_size - size; + io_buf buf; + if (io_pread(pair, &buf, size, offset)) + return true; + + // CRC32 and CRC64 are in little endian. Guess that all the future + // 32-bit and 64-bit Check values are little endian too. It shouldn't + // be a too big problem if this guess is wrong. + if (size == 4) + snprintf(check_value, sizeof(check_value), + "%08" PRIx32, conv32le(buf.u32[0])); + else if (size == 8) + snprintf(check_value, sizeof(check_value), + "%016" PRIx64, conv64le(buf.u64[0])); + else + for (size_t i = 0; i < size; ++i) + snprintf(check_value + i * 2, 3, "%02x", buf.u8[i]); + + return false; +} + + +/// \brief Parse detailed information about a Block +/// +/// Since this requires seek(s), listing information about all Blocks can +/// be slow. +/// +/// \param pair Input file +/// \param iter Location of the Block whose Check value should +/// be printed. +/// \param bhi Pointer to structure where to store the information +/// about the Block Header field. +/// \param xfi Pointer to structure where to store the information +/// about the entire .xz file. +/// +/// \return False on success, true on error. If an error occurs, +/// the error message is printed too so the caller doesn't +/// need to worry about that. +static bool +parse_details(file_pair *pair, const lzma_index_iter *iter, + block_header_info *bhi, xz_file_info *xfi) +{ + if (parse_block_header(pair, iter, bhi, xfi)) + return true; + + if (parse_check_value(pair, iter)) + return true; + + return false; +} + + +/// \brief Get the compression ratio +/// +/// This has slightly different format than that is used in message.c. +static const char * +get_ratio(uint64_t compressed_size, uint64_t uncompressed_size) +{ + if (uncompressed_size == 0) + return "---"; + + const double ratio = (double)(compressed_size) + / (double)(uncompressed_size); + if (ratio > 9.999) + return "---"; + + static char buf[16]; + snprintf(buf, sizeof(buf), "%.3f", ratio); + return buf; +} + + +/// \brief Get a comma-separated list of Check names +/// +/// The check names are translated with gettext except when in robot mode. +/// +/// \param buf Buffer to hold the resulting string +/// \param checks Bit mask of Checks to print +/// \param space_after_comma +/// It's better to not use spaces in table-like listings, +/// but in more verbose formats a space after a comma +/// is good for readability. +static void +get_check_names(char buf[CHECKS_STR_SIZE], + uint32_t checks, bool space_after_comma) +{ + // If we get called when there are no Checks to print, set checks + // to 1 so that we print "None". This can happen in the robot mode + // when printing the totals line if there are no valid input files. + if (checks == 0) + checks = 1; + + char *pos = buf; + size_t left = CHECKS_STR_SIZE; + + const char *sep = space_after_comma ? ", " : ","; + bool comma = false; + + for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) { + if (checks & (UINT32_C(1) << i)) { + my_snprintf(&pos, &left, "%s%s", + comma ? sep : "", + opt_robot ? check_names[i] + : _(check_names[i])); + comma = true; + } + } + + return; +} + + +static bool +print_info_basic(const xz_file_info *xfi, file_pair *pair) +{ + static bool headings_displayed = false; + if (!headings_displayed) { + headings_displayed = true; + // TRANSLATORS: These are column headings. From Strms (Streams) + // to Ratio, the columns are right aligned. Check and Filename + // are left aligned. If you need longer words, it's OK to + // use two lines here. Test with "xz -l foo.xz". + puts(_("Strms Blocks Compressed Uncompressed Ratio " + "Check Filename")); + } + + char checks[CHECKS_STR_SIZE]; + get_check_names(checks, lzma_index_checks(xfi->idx), false); + + const char *cols[6] = { + uint64_to_str(lzma_index_stream_count(xfi->idx), 0), + uint64_to_str(lzma_index_block_count(xfi->idx), 1), + uint64_to_nicestr(lzma_index_file_size(xfi->idx), + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr(lzma_index_uncompressed_size(xfi->idx), + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(lzma_index_file_size(xfi->idx), + lzma_index_uncompressed_size(xfi->idx)), + checks, + }; + printf("%*s %*s %*s %*s %*s %-*s %s\n", + tuklib_mbstr_fw(cols[0], 5), cols[0], + tuklib_mbstr_fw(cols[1], 7), cols[1], + tuklib_mbstr_fw(cols[2], 11), cols[2], + tuklib_mbstr_fw(cols[3], 11), cols[3], + tuklib_mbstr_fw(cols[4], 5), cols[4], + tuklib_mbstr_fw(cols[5], 7), cols[5], + tuklib_mask_nonprint(pair->src_name)); + + return false; +} + + +static void +print_adv_helper(uint64_t stream_count, uint64_t block_count, + uint64_t compressed_size, uint64_t uncompressed_size, + uint32_t checks, uint64_t stream_padding) +{ + char checks_str[CHECKS_STR_SIZE]; + get_check_names(checks_str, checks, true); + + printf(" %-*s %s\n", COLON_STR(COLON_STR_STREAMS), + uint64_to_str(stream_count, 0)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_BLOCKS), + uint64_to_str(block_count, 0)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_COMPRESSED_SIZE), + uint64_to_nicestr(compressed_size, + NICESTR_B, NICESTR_TIB, true, 0)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_UNCOMPRESSED_SIZE), + uint64_to_nicestr(uncompressed_size, + NICESTR_B, NICESTR_TIB, true, 0)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_RATIO), + get_ratio(compressed_size, uncompressed_size)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_CHECK), checks_str); + printf(" %-*s %s\n", COLON_STR(COLON_STR_STREAM_PADDING), + uint64_to_nicestr(stream_padding, + NICESTR_B, NICESTR_TIB, true, 0)); + return; +} + + +static bool +print_info_adv(xz_file_info *xfi, file_pair *pair) +{ + // Print the overall information. + print_adv_helper(lzma_index_stream_count(xfi->idx), + lzma_index_block_count(xfi->idx), + lzma_index_file_size(xfi->idx), + lzma_index_uncompressed_size(xfi->idx), + lzma_index_checks(xfi->idx), + xfi->stream_padding); + + // Size of the biggest Check. This is used to calculate the width + // of the CheckVal field. The table would get insanely wide if + // we always reserved space for 64-byte Check (128 chars as hex). + uint32_t check_max = 0; + + // Print information about the Streams. + // + // All except Check are right aligned; Check is left aligned. + // Test with "xz -lv foo.xz". + printf(" %s\n %*s %*s %*s %*s %*s %*s %*s %-*s %*s\n", + _(colon_strs[COLON_STR_STREAMS]), + HEADING_STR(HEADING_STREAM), + HEADING_STR(HEADING_BLOCKS), + HEADING_STR(HEADING_COMPOFFSET), + HEADING_STR(HEADING_UNCOMPOFFSET), + HEADING_STR(HEADING_COMPSIZE), + HEADING_STR(HEADING_UNCOMPSIZE), + HEADING_STR(HEADING_RATIO), + HEADING_STR(HEADING_CHECK), + HEADING_STR(HEADING_PADDING)); + + lzma_index_iter iter; + lzma_index_iter_init(&iter, xfi->idx); + + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) { + const char *cols1[4] = { + uint64_to_str(iter.stream.number, 0), + uint64_to_str(iter.stream.block_count, 1), + uint64_to_str(iter.stream.compressed_offset, 2), + uint64_to_str(iter.stream.uncompressed_offset, 3), + }; + printf(" %*s %*s %*s %*s ", + tuklib_mbstr_fw(cols1[0], + headings[HEADING_STREAM].columns), + cols1[0], + tuklib_mbstr_fw(cols1[1], + headings[HEADING_BLOCKS].columns), + cols1[1], + tuklib_mbstr_fw(cols1[2], + headings[HEADING_COMPOFFSET].columns), + cols1[2], + tuklib_mbstr_fw(cols1[3], + headings[HEADING_UNCOMPOFFSET].columns), + cols1[3]); + + const char *cols2[5] = { + uint64_to_str(iter.stream.compressed_size, 0), + uint64_to_str(iter.stream.uncompressed_size, 1), + get_ratio(iter.stream.compressed_size, + iter.stream.uncompressed_size), + _(check_names[iter.stream.flags->check]), + uint64_to_str(iter.stream.padding, 2), + }; + printf("%*s %*s %*s %-*s %*s\n", + tuklib_mbstr_fw(cols2[0], + headings[HEADING_COMPSIZE].columns), + cols2[0], + tuklib_mbstr_fw(cols2[1], + headings[HEADING_UNCOMPSIZE].columns), + cols2[1], + tuklib_mbstr_fw(cols2[2], + headings[HEADING_RATIO].columns), + cols2[2], + tuklib_mbstr_fw(cols2[3], + headings[HEADING_CHECK].columns), + cols2[3], + tuklib_mbstr_fw(cols2[4], + headings[HEADING_PADDING].columns), + cols2[4]); + + // Update the maximum Check size. + if (lzma_check_size(iter.stream.flags->check) > check_max) + check_max = lzma_check_size(iter.stream.flags->check); + } + + // Cache the verbosity level to a local variable. + const bool detailed = message_verbosity_get() >= V_DEBUG; + + // Print information about the Blocks but only if there is + // at least one Block. + if (lzma_index_block_count(xfi->idx) > 0) { + // Calculate the width of the CheckVal column. This can be + // used as is as the field width for printf() when printing + // the actual check value as it is hexadecimal. However, to + // print the column heading, further calculation is needed + // to handle a translated string (it's done a few lines later). + assert(check_max <= LZMA_CHECK_SIZE_MAX); + const int checkval_width = my_max( + headings[HEADING_CHECKVAL].columns, + (int)(2 * check_max)); + + // All except Check are right aligned; Check is left aligned. + printf(" %s\n %*s %*s %*s %*s %*s %*s %*s %-*s", + _(colon_strs[COLON_STR_BLOCKS]), + HEADING_STR(HEADING_STREAM), + HEADING_STR(HEADING_BLOCK), + HEADING_STR(HEADING_COMPOFFSET), + HEADING_STR(HEADING_UNCOMPOFFSET), + HEADING_STR(HEADING_TOTALSIZE), + HEADING_STR(HEADING_UNCOMPSIZE), + HEADING_STR(HEADING_RATIO), + detailed ? headings[HEADING_CHECK].fw : 1, + _(headings[HEADING_CHECK].str)); + + if (detailed) { + // CheckVal (Check value), Flags, and Filters are + // left aligned. Block Header Size, CompSize, and + // MemUsage are right aligned. Test with + // "xz -lvv foo.xz". + printf(" %-*s %*s %-*s %*s %*s %s", + headings[HEADING_CHECKVAL].fw + + checkval_width + - headings[HEADING_CHECKVAL].columns, + _(headings[HEADING_CHECKVAL].str), + HEADING_STR(HEADING_HEADERSIZE), + HEADING_STR(HEADING_HEADERFLAGS), + HEADING_STR(HEADING_COMPSIZE), + HEADING_STR(HEADING_MEMUSAGE), + _(headings[HEADING_FILTERS].str)); + } + + putchar('\n'); + + lzma_index_iter_init(&iter, xfi->idx); + + // Iterate over the Blocks. + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { + // If in detailed mode, collect the information from + // Block Header before starting to print the next line. + block_header_info bhi = BLOCK_HEADER_INFO_INIT; + if (detailed && parse_details(pair, &iter, &bhi, xfi)) + return true; + + const char *cols1[4] = { + uint64_to_str(iter.stream.number, 0), + uint64_to_str( + iter.block.number_in_stream, 1), + uint64_to_str( + iter.block.compressed_file_offset, 2), + uint64_to_str( + iter.block.uncompressed_file_offset, 3) + }; + printf(" %*s %*s %*s %*s ", + tuklib_mbstr_fw(cols1[0], + headings[HEADING_STREAM].columns), + cols1[0], + tuklib_mbstr_fw(cols1[1], + headings[HEADING_BLOCK].columns), + cols1[1], + tuklib_mbstr_fw(cols1[2], + headings[HEADING_COMPOFFSET].columns), + cols1[2], + tuklib_mbstr_fw(cols1[3], headings[ + HEADING_UNCOMPOFFSET].columns), + cols1[3]); + + const char *cols2[4] = { + uint64_to_str(iter.block.total_size, 0), + uint64_to_str(iter.block.uncompressed_size, + 1), + get_ratio(iter.block.total_size, + iter.block.uncompressed_size), + _(check_names[iter.stream.flags->check]) + }; + printf("%*s %*s %*s %-*s", + tuklib_mbstr_fw(cols2[0], + headings[HEADING_TOTALSIZE].columns), + cols2[0], + tuklib_mbstr_fw(cols2[1], + headings[HEADING_UNCOMPSIZE].columns), + cols2[1], + tuklib_mbstr_fw(cols2[2], + headings[HEADING_RATIO].columns), + cols2[2], + tuklib_mbstr_fw(cols2[3], detailed + ? headings[HEADING_CHECK].columns : 1), + cols2[3]); + + if (detailed) { + const lzma_vli compressed_size + = iter.block.unpadded_size + - bhi.header_size + - lzma_check_size( + iter.stream.flags->check); + + const char *cols3[6] = { + check_value, + uint64_to_str(bhi.header_size, 0), + bhi.flags, + uint64_to_str(compressed_size, 1), + uint64_to_str( + round_up_to_mib(bhi.memusage), + 2), + bhi.filter_chain + }; + // Show MiB for memory usage, because it + // is the only size which is not in bytes. + printf(" %-*s %*s %-*s %*s %*s MiB %s", + checkval_width, cols3[0], + tuklib_mbstr_fw(cols3[1], headings[ + HEADING_HEADERSIZE].columns), + cols3[1], + tuklib_mbstr_fw(cols3[2], headings[ + HEADING_HEADERFLAGS].columns), + cols3[2], + tuklib_mbstr_fw(cols3[3], headings[ + HEADING_COMPSIZE].columns), + cols3[3], + tuklib_mbstr_fw(cols3[4], headings[ + HEADING_MEMUSAGE].columns - 4), + cols3[4], + cols3[5]); + } + + putchar('\n'); + block_header_info_end(&bhi); + } + } + + if (detailed) { + printf(" %-*s %s MiB\n", COLON_STR(COLON_STR_MEMORY_NEEDED), + uint64_to_str( + round_up_to_mib(xfi->memusage_max), 0)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_SIZES_IN_HEADERS), + xfi->all_have_sizes ? _("Yes") : _("No")); + //printf(" %-*s %s\n", COLON_STR(COLON_STR_MINIMUM_XZ_VERSION), + printf(" %s %s\n", _("Minimum XZ Utils version:"), + xz_ver_to_str(xfi->min_version)); + } + + return false; +} + + +static bool +print_info_robot(xz_file_info *xfi, file_pair *pair) +{ + char checks[CHECKS_STR_SIZE]; + get_check_names(checks, lzma_index_checks(xfi->idx), false); + + // Robot mode has to mask at least some control chars to prevent + // the output from getting out of sync if filename is malicious. + // Masking all non-printable chars is more than we need but + // perhaps this is good enough in practice. + printf("name\t%s\n", tuklib_mask_nonprint(pair->src_name)); + + printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%" PRIu64 "\n", + lzma_index_stream_count(xfi->idx), + lzma_index_block_count(xfi->idx), + lzma_index_file_size(xfi->idx), + lzma_index_uncompressed_size(xfi->idx), + get_ratio(lzma_index_file_size(xfi->idx), + lzma_index_uncompressed_size(xfi->idx)), + checks, + xfi->stream_padding); + + if (message_verbosity_get() >= V_VERBOSE) { + lzma_index_iter iter; + lzma_index_iter_init(&iter, xfi->idx); + + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) + printf("stream\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%" PRIu64 "\n", + iter.stream.number, + iter.stream.block_count, + iter.stream.compressed_offset, + iter.stream.uncompressed_offset, + iter.stream.compressed_size, + iter.stream.uncompressed_size, + get_ratio(iter.stream.compressed_size, + iter.stream.uncompressed_size), + check_names[iter.stream.flags->check], + iter.stream.padding); + + lzma_index_iter_rewind(&iter); + + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { + block_header_info bhi = BLOCK_HEADER_INFO_INIT; + if (message_verbosity_get() >= V_DEBUG + && parse_details( + pair, &iter, &bhi, xfi)) + return true; + + printf("block\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s", + iter.stream.number, + iter.block.number_in_stream, + iter.block.number_in_file, + iter.block.compressed_file_offset, + iter.block.uncompressed_file_offset, + iter.block.total_size, + iter.block.uncompressed_size, + get_ratio(iter.block.total_size, + iter.block.uncompressed_size), + check_names[iter.stream.flags->check]); + + if (message_verbosity_get() >= V_DEBUG) + printf("\t%s\t%" PRIu32 "\t%s\t%" PRIu64 + "\t%" PRIu64 "\t%s", + check_value, + bhi.header_size, + bhi.flags, + bhi.compressed_size, + bhi.memusage, + bhi.filter_chain); + + putchar('\n'); + block_header_info_end(&bhi); + } + } + + if (message_verbosity_get() >= V_DEBUG) + printf("summary\t%" PRIu64 "\t%s\t%" PRIu32 "\n", + xfi->memusage_max, + xfi->all_have_sizes ? "yes" : "no", + xfi->min_version); + + return false; +} + + +static void +update_totals(const xz_file_info *xfi) +{ + // TODO: Integer overflow checks + ++totals.files; + totals.streams += lzma_index_stream_count(xfi->idx); + totals.blocks += lzma_index_block_count(xfi->idx); + totals.compressed_size += lzma_index_file_size(xfi->idx); + totals.uncompressed_size += lzma_index_uncompressed_size(xfi->idx); + totals.stream_padding += xfi->stream_padding; + totals.checks |= lzma_index_checks(xfi->idx); + + if (totals.memusage_max < xfi->memusage_max) + totals.memusage_max = xfi->memusage_max; + + if (totals.min_version < xfi->min_version) + totals.min_version = xfi->min_version; + + totals.all_have_sizes &= xfi->all_have_sizes; + + return; +} + + +static void +print_totals_basic(void) +{ + // Print a separator line. + char line[80]; + memset(line, '-', sizeof(line)); + line[sizeof(line) - 1] = '\0'; + puts(line); + + // Get the check names. + char checks[CHECKS_STR_SIZE]; + get_check_names(checks, totals.checks, false); + + // Print the totals except the file count, which needs + // special handling. + printf("%5s %7s %11s %11s %5s %-7s ", + uint64_to_str(totals.streams, 0), + uint64_to_str(totals.blocks, 1), + uint64_to_nicestr(totals.compressed_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr(totals.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(totals.compressed_size, + totals.uncompressed_size), + checks); + +#if defined(__sun) && (defined(__GNUC__) || defined(__clang__)) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif + // Since we print totals only when there are at least two files, + // the English message will always use "%s files". But some other + // languages need different forms for different plurals so we + // have to translate this with ngettext(). + // + // TRANSLATORS: %s is an integer. Only the plural form of this + // message is used (e.g. "2 files"). Test with "xz -l foo.xz bar.xz". + printf(ngettext("%s file\n", "%s files\n", + totals.files <= ULONG_MAX ? totals.files + : (totals.files % 1000000) + 1000000), + uint64_to_str(totals.files, 0)); +#if defined(__sun) && (defined(__GNUC__) || defined(__clang__)) +# pragma GCC diagnostic pop +#endif + + return; +} + + +static void +print_totals_adv(void) +{ + putchar('\n'); + puts(_("Totals:")); + printf(" %-*s %s\n", COLON_STR(COLON_STR_NUMBER_OF_FILES), + uint64_to_str(totals.files, 0)); + print_adv_helper(totals.streams, totals.blocks, + totals.compressed_size, totals.uncompressed_size, + totals.checks, totals.stream_padding); + + if (message_verbosity_get() >= V_DEBUG) { + printf(" %-*s %s MiB\n", COLON_STR(COLON_STR_MEMORY_NEEDED), + uint64_to_str( + round_up_to_mib(totals.memusage_max), 0)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_SIZES_IN_HEADERS), + totals.all_have_sizes ? _("Yes") : _("No")); + //printf(" %-*s %s\n", COLON_STR(COLON_STR_MINIMUM_XZ_VERSION), + printf(" %s %s\n", _("Minimum XZ Utils version:"), + xz_ver_to_str(totals.min_version)); + } + + return; +} + + +static void +print_totals_robot(void) +{ + char checks[CHECKS_STR_SIZE]; + get_check_names(checks, totals.checks, false); + + printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%" PRIu64 "\t%" PRIu64, + totals.streams, + totals.blocks, + totals.compressed_size, + totals.uncompressed_size, + get_ratio(totals.compressed_size, + totals.uncompressed_size), + checks, + totals.stream_padding, + totals.files); + + if (message_verbosity_get() >= V_DEBUG) + printf("\t%" PRIu64 "\t%s\t%" PRIu32, + totals.memusage_max, + totals.all_have_sizes ? "yes" : "no", + totals.min_version); + + putchar('\n'); + + return; +} + + +extern void +list_totals(void) +{ + if (opt_robot) { + // Always print totals in --robot mode. It can be convenient + // in some cases and doesn't complicate usage of the + // single-file case much. + print_totals_robot(); + + } else if (totals.files > 1) { + // For non-robot mode, totals are printed only if there + // is more than one file. + if (message_verbosity_get() <= V_WARNING) + print_totals_basic(); + else + print_totals_adv(); + } + + return; +} + + +extern void +list_file(const char *filename) +{ + if (opt_format != FORMAT_XZ && opt_format != FORMAT_AUTO) { + // The 'lzmainfo' message is printed only when --format=lzma + // is used (it is implied if using "lzma" as the command + // name). Thus instead of using message_fatal(), print + // the messages separately and then call tuklib_exit() + // like message_fatal() does. + message(V_ERROR, _("--list works only on .xz files " + "(--format=xz or --format=auto)")); + + if (opt_format == FORMAT_LZMA) + message(V_ERROR, + _("Try 'lzmainfo' with .lzma files.")); + + tuklib_exit(E_ERROR, E_ERROR, false); + } + + message_filename(filename); + + if (filename == stdin_filename) { + message_error(_("--list does not support reading from " + "standard input")); + return; + } + + init_field_widths(); + + // Unset opt_stdout so that io_open_src() won't accept special files. + // Set opt_force so that io_open_src() will follow symlinks. + opt_stdout = false; + opt_force = true; + file_pair *pair = io_open_src(filename); + if (pair == NULL) + return; + + xz_file_info xfi = XZ_FILE_INFO_INIT; + if (!parse_indexes(&xfi, pair)) { + bool fail; + + // We have three main modes: + // - --robot, which has submodes if --verbose is specified + // once or twice + // - Normal --list without --verbose + // - --list with one or two --verbose + if (opt_robot) + fail = print_info_robot(&xfi, pair); + else if (message_verbosity_get() <= V_WARNING) + fail = print_info_basic(&xfi, pair); + else + fail = print_info_adv(&xfi, pair); + + // Update the totals that are displayed after all + // the individual files have been listed. Don't count + // broken files. + if (!fail) + update_totals(&xfi); + + lzma_index_end(xfi.idx, NULL); + } + + io_close(pair, false); + return; +} diff --git a/src/native/external/xz/src/xz/list.h b/src/native/external/xz/src/xz/list.h new file mode 100644 index 00000000000000..805880de40958c --- /dev/null +++ b/src/native/external/xz/src/xz/list.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.h +/// \brief List information about .xz files +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief List information about the given .xz file +extern void list_file(const char *filename); + + +/// \brief Show the totals after all files have been listed +extern void list_totals(void); diff --git a/src/native/external/xz/src/xz/main.c b/src/native/external/xz/src/xz/main.c new file mode 100644 index 00000000000000..1b8b3788117269 --- /dev/null +++ b/src/native/external/xz/src/xz/main.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file main.c +/// \brief main() +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include + + +/// Exit status to use. This can be changed with set_exit_status(). +static enum exit_status_type exit_status = E_SUCCESS; + +#if defined(_WIN32) && !defined(__CYGWIN__) +/// exit_status has to be protected with a critical section due to +/// how "signal handling" is done on Windows. See signals.c for details. +static CRITICAL_SECTION exit_status_cs; +#endif + +/// True if --no-warn is specified. When this is true, we don't set +/// the exit status to E_WARNING when something worth a warning happens. +static bool no_warn = false; + + +extern void +set_exit_status(enum exit_status_type new_status) +{ + assert(new_status == E_WARNING || new_status == E_ERROR); + +#if defined(_WIN32) && !defined(__CYGWIN__) + EnterCriticalSection(&exit_status_cs); +#endif + + if (exit_status != E_ERROR) + exit_status = new_status; + +#if defined(_WIN32) && !defined(__CYGWIN__) + LeaveCriticalSection(&exit_status_cs); +#endif + + return; +} + + +extern void +set_exit_no_warn(void) +{ + no_warn = true; + return; +} + + +static const char * +read_name(const args_info *args) +{ + // FIXME: Maybe we should have some kind of memory usage limit here + // like the tool has for the actual compression and decompression. + // Giving some huge text file with --files0 makes us to read the + // whole file in RAM. + static char *name = NULL; + static size_t size = 256; + + // Allocate the initial buffer. This is never freed, since after it + // is no longer needed, the program exits very soon. It is safe to + // use xmalloc() and xrealloc() in this function, because while + // executing this function, no files are open for writing, and thus + // there's no need to cleanup anything before exiting. + if (name == NULL) + name = xmalloc(size); + + // Write position in name + size_t pos = 0; + + // Read one character at a time into name. + while (!user_abort) { + const int c = fgetc(args->files_file); + + if (ferror(args->files_file)) { + // Take care of EINTR since we have established + // the signal handlers already. + if (errno == EINTR) + continue; + + message_error(_("%s: Error reading filenames: %s"), + tuklib_mask_nonprint(args->files_name), + strerror(errno)); + return NULL; + } + + if (feof(args->files_file)) { + if (pos != 0) + message_error(_("%s: Unexpected end of input " + "when reading filenames"), + tuklib_mask_nonprint( + args->files_name)); + + return NULL; + } + + if (c == args->files_delim) { + // We allow consecutive newline (--files) or '\0' + // characters (--files0), and ignore such empty + // filenames. + if (pos == 0) + continue; + + // A non-empty name was read. Terminate it with '\0' + // and return it. + name[pos] = '\0'; + return name; + } + + if (c == '\0') { + // A null character was found when using --files, + // which expects plain text input separated with + // newlines. + message_error(_("%s: Null character found when " + "reading filenames; maybe you meant " + "to use '--files0' instead " + "of '--files'?"), + tuklib_mask_nonprint( + args->files_name)); + return NULL; + } + + name[pos++] = c; + + // Allocate more memory if needed. There must always be space + // at least for one character to allow terminating the string + // with '\0'. + if (pos == size) { + size *= 2; + name = xrealloc(name, size); + } + } + + return NULL; +} + + +int +main(int argc, char **argv) +{ +#if defined(_WIN32) && !defined(__CYGWIN__) + InitializeCriticalSection(&exit_status_cs); +#endif + + // Set up the progname variable needed for messages. + tuklib_progname_init(argv); + + // Initialize the file I/O. This makes sure that + // stdin, stdout, and stderr are something valid. + // This must be done before we might open any files + // even indirectly like locale and gettext initializations. + io_init(); + +#ifdef ENABLE_SANDBOX + // Enable such sandboxing that can always be enabled. + // This requires that progname has been set up. + // It's also good that io_init() has been called because it + // might need to do things that the initial sandbox won't allow. + // Otherwise this should be called as early as possible. + // + // NOTE: Calling this before tuklib_gettext_init() means that + // translated error message won't be available if sandbox + // initialization fails. However, sandbox_init() shouldn't + // fail and this order simply feels better. + sandbox_init(); +#endif + + // Set up the locale and message translations. + tuklib_gettext_init(PACKAGE, LOCALEDIR); + + // Initialize progress message handling. It's not always needed + // but it's simpler to do this unconditionally. + message_init(); + + // Set hardware-dependent default values. These can be overridden + // on the command line, thus this must be done before args_parse(). + hardware_init(); + + // Parse the command line arguments and get an array of filenames. + // This doesn't return if something is wrong with the command line + // arguments. If there are no arguments, one filename ("-") is still + // returned to indicate stdin. + args_info args; + args_parse(&args, argc, argv); + + if (opt_mode != MODE_LIST && opt_robot) + message_fatal(_("Compression and decompression with --robot " + "are not supported yet.")); + + // Tell the message handling code how many input files there are if + // we know it. This way the progress indicator can show it. + if (args.files_name != NULL) + message_set_files(0); + else + message_set_files(args.arg_count); + + // Refuse to write compressed data to standard output if it is + // a terminal. + if (opt_mode == MODE_COMPRESS) { + if (opt_stdout || (args.arg_count == 1 + && strcmp(args.arg_names[0], "-") == 0)) { + if (is_tty_stdout()) { + message_try_help(); + tuklib_exit(E_ERROR, E_ERROR, false); + } + } + } + + // Set up the signal handlers. We don't need these before we + // start the actual action and not in --list mode, so this is + // done after parsing the command line arguments. + // + // It's good to keep signal handlers in normal compression and + // decompression modes even when only writing to stdout, because + // we might need to restore O_APPEND flag on stdout before exiting. + // In --test mode, signal handlers aren't really needed, but let's + // keep them there for consistency with normal decompression. + if (opt_mode != MODE_LIST) + signals_init(); + +#ifdef ENABLE_SANDBOX + // Read-only sandbox can be enabled if we won't create or delete + // any files: + // + // - --stdout, --test, or --list was used. Note that --test + // implies opt_stdout = true but --list doesn't. + // + // - Output goes to stdout because --files or --files0 wasn't used + // and no arguments were given on the command line or the + // arguments are all "-" (indicating standard input). + bool to_stdout_only = opt_stdout || opt_mode == MODE_LIST; + if (!to_stdout_only && args.files_name == NULL) { + // If all of the filenames provided are "-" (more than one + // "-" could be specified), then we are only going to be + // writing to standard output. Note that if no filename args + // were provided, args.c puts a single "-" in arg_names[0]. + to_stdout_only = true; + + for (unsigned i = 0; i < args.arg_count; ++i) { + if (strcmp("-", args.arg_names[i]) != 0) { + to_stdout_only = false; + break; + } + } + } + + if (to_stdout_only) { + sandbox_enable_read_only(); + + // Allow strict sandboxing if we are processing exactly one + // file to standard output. This requires that --files or + // --files0 wasn't specified (an unknown number of filenames + // could be provided that way). + if (args.files_name == NULL && args.arg_count == 1) + sandbox_allow_strict(); + } +#endif + + // coder_run() handles compression, decompression, and testing. + // list_file() is for --list. + void (*run)(const char *filename) = &coder_run; +#ifdef HAVE_DECODERS + if (opt_mode == MODE_LIST) + run = &list_file; +#endif + + // Process the files given on the command line. Note that if no names + // were given, args_parse() gave us a fake "-" filename. + for (unsigned i = 0; i < args.arg_count && !user_abort; ++i) { + if (strcmp("-", args.arg_names[i]) == 0) { + // Processing from stdin to stdout. Check that we + // aren't writing compressed data to a terminal or + // reading it from a terminal. + if (opt_mode == MODE_COMPRESS) { + if (is_tty_stdout()) + continue; + } else if (is_tty_stdin()) { + continue; + } + + // It doesn't make sense to compress data from stdin + // if we are supposed to read filenames from stdin + // too (enabled with --files or --files0). + if (args.files_name == stdin_filename) { + message_error(_("Cannot read data from " + "standard input when " + "reading filenames " + "from standard input")); + continue; + } + + // Replace the "-" with a special pointer, which is + // recognized by coder_run() and other things. + // This way error messages get a proper filename + // string and the code still knows that it is + // handling the special case of stdin. + args.arg_names[i] = (char *)stdin_filename; + } + + // Do the actual compression or decompression. + run(args.arg_names[i]); + } + + // If --files or --files0 was used, process the filenames from the + // given file or stdin. Note that here we don't consider "-" to + // indicate stdin like we do with the command line arguments. + if (args.files_name != NULL) { + // read_name() checks for user_abort so we don't need to + // check it as loop termination condition. + while (true) { + const char *name = read_name(&args); + if (name == NULL) + break; + + // read_name() doesn't return empty names. + assert(name[0] != '\0'); + run(name); + } + + if (args.files_name != stdin_filename) + (void)fclose(args.files_file); + } + +#ifdef HAVE_DECODERS + // All files have now been handled. If in --list mode, display + // the totals before exiting. We don't have signal handlers + // enabled in --list mode, so we don't need to check user_abort. + if (opt_mode == MODE_LIST) { + assert(!user_abort); + list_totals(); + } +#endif + +#ifndef NDEBUG + coder_free(); + args_free(); +#endif + + // If we have got a signal, raise it to kill the program instead + // of calling tuklib_exit(). + signals_exit(); + + // Make a local copy of exit_status to keep the Windows code + // thread safe. At this point it is fine if we miss the user + // pressing C-c and don't set the exit_status to E_ERROR on + // Windows. +#if defined(_WIN32) && !defined(__CYGWIN__) + EnterCriticalSection(&exit_status_cs); +#endif + + enum exit_status_type es = exit_status; + +#if defined(_WIN32) && !defined(__CYGWIN__) + LeaveCriticalSection(&exit_status_cs); +#endif + + // Suppress the exit status indicating a warning if --no-warn + // was specified. + if (es == E_WARNING && no_warn) + es = E_SUCCESS; + + tuklib_exit((int)es, E_ERROR, message_verbosity_get() != V_SILENT); +} diff --git a/src/native/external/xz/src/xz/main.h b/src/native/external/xz/src/xz/main.h new file mode 100644 index 00000000000000..a8a1b457760add --- /dev/null +++ b/src/native/external/xz/src/xz/main.h @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file main.h +/// \brief Miscellaneous declarations +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +/// Possible exit status values. These are the same as used by gzip and bzip2. +enum exit_status_type { + E_SUCCESS = 0, + E_ERROR = 1, + E_WARNING = 2, +}; + + +/// Sets the exit status after a warning or error has occurred. If new_status +/// is E_WARNING and the old exit status was already E_ERROR, the exit +/// status is not changed. +extern void set_exit_status(enum exit_status_type new_status); + + +/// Use E_SUCCESS instead of E_WARNING if something worth a warning occurs +/// but nothing worth an error has occurred. This is called when --no-warn +/// is specified. +extern void set_exit_no_warn(void); diff --git a/src/native/external/xz/src/xz/message.c b/src/native/external/xz/src/xz/message.c new file mode 100644 index 00000000000000..7657e85648dadd --- /dev/null +++ b/src/native/external/xz/src/xz/message.c @@ -0,0 +1,1326 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.c +/// \brief Printing messages +// +// Authors: Lasse Collin +// Jia Tan +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include "tuklib_mbstr_wrap.h" +#include + + +/// Number of the current file +static unsigned int files_pos = 0; + +/// Total number of input files; zero if unknown. +static unsigned int files_total; + +/// Verbosity level +static enum message_verbosity verbosity = V_WARNING; + +/// Filename which we will print with the verbose messages +static const char *filename; + +/// True once the a filename has been printed to stderr as part of progress +/// message. If automatic progress updating isn't enabled, this becomes true +/// after the first progress message has been printed due to user sending +/// SIGINFO, SIGUSR1, or SIGALRM. Once this variable is true, we will print +/// an empty line before the next filename to make the output more readable. +static bool first_filename_printed = false; + +/// This is set to true when we have printed the current filename to stderr +/// as part of a progress message. This variable is useful only if not +/// updating progress automatically: if user sends many SIGINFO, SIGUSR1, or +/// SIGALRM signals, we won't print the name of the same file multiple times. +static bool current_filename_printed = false; + +/// True if we should print progress indicator and update it automatically +/// if also verbose >= V_VERBOSE. +static bool progress_automatic = false; + +/// True if message_progress_start() has been called but +/// message_progress_end() hasn't been called yet. +static bool progress_started = false; + +/// This is true when a progress message was printed and the cursor is still +/// on the same line with the progress message. In that case, a newline has +/// to be printed before any error messages. +static bool progress_active = false; + +/// Pointer to lzma_stream used to do the encoding or decoding. +static lzma_stream *progress_strm; + +/// This is true if we are in passthru mode (not actually compressing or +/// decompressing) and thus cannot use lzma_get_progress(progress_strm, ...). +/// That is, we are using coder_passthru() in coder.c. +static bool progress_is_from_passthru; + +/// Expected size of the input stream is needed to show completion percentage +/// and estimate remaining time. +static uint64_t expected_in_size; + + +// Use alarm() and SIGALRM when they are supported. This has two minor +// advantages over the alternative of polling gettimeofday(): +// - It is possible for the user to send SIGINFO, SIGUSR1, or SIGALRM to +// get intermediate progress information even when --verbose wasn't used +// or stderr is not a terminal. +// - alarm() + SIGALRM seems to have slightly less overhead than polling +// gettimeofday(). +#ifdef SIGALRM + +const int message_progress_sigs[] = { + SIGALRM, +#ifdef SIGINFO + SIGINFO, +#endif +#ifdef SIGUSR1 + SIGUSR1, +#endif + 0 +}; + +/// The signal handler for SIGALRM sets this to true. It is set back to false +/// once the progress message has been updated. +static volatile sig_atomic_t progress_needs_updating = false; + +/// Signal handler for SIGALRM +static void +progress_signal_handler(int sig lzma_attribute((__unused__))) +{ + progress_needs_updating = true; + return; +} + +#else + +/// This is true when progress message printing is wanted. Using the same +/// variable name as above to avoid some ifdefs. +static bool progress_needs_updating = false; + +/// Elapsed time when the next progress message update should be done. +static uint64_t progress_next_update; + +#endif + + +extern void +message_init(void) +{ + // If --verbose is used, we use a progress indicator if and only + // if stderr is a terminal. If stderr is not a terminal, we print + // verbose information only after finishing the file. As a special + // exception, even if --verbose was not used, user can send SIGALRM + // to make us print progress information once without automatic + // updating. + progress_automatic = is_tty(STDERR_FILENO); + +#ifdef SIGALRM + // Establish the signal handlers which set a flag to tell us that + // progress info should be updated. + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sa.sa_handler = &progress_signal_handler; + + for (size_t i = 0; message_progress_sigs[i] != 0; ++i) + if (sigaction(message_progress_sigs[i], &sa, NULL)) + message_signal_handler(); +#endif + + return; +} + + +extern void +message_verbosity_increase(void) +{ + if (verbosity < V_DEBUG) + ++verbosity; + + return; +} + + +extern void +message_verbosity_decrease(void) +{ + if (verbosity > V_SILENT) + --verbosity; + + return; +} + + +extern enum message_verbosity +message_verbosity_get(void) +{ + return verbosity; +} + + +extern void +message_set_files(unsigned int files) +{ + files_total = files; + return; +} + + +/// Prints the name of the current file if it hasn't been printed already, +/// except if we are processing exactly one stream from stdin to stdout. +/// I think it looks nicer to not print "(stdin)" when --verbose is used +/// in a pipe and no other files are processed. +static void +print_filename(void) +{ + if (!opt_robot && (files_total != 1 || filename != stdin_filename)) { + signals_block(); + + FILE *file = opt_mode == MODE_LIST ? stdout : stderr; + + // If a file was already processed, put an empty line + // before the next filename to improve readability. + if (first_filename_printed) + fputc('\n', file); + + first_filename_printed = true; + current_filename_printed = true; + + // If we don't know how many files there will be due + // to usage of --files or --files0. + if (files_total == 0) + fprintf(file, "%s (%u)\n", + tuklib_mask_nonprint(filename), + files_pos); + else + fprintf(file, "%s (%u/%u)\n", + tuklib_mask_nonprint(filename), + files_pos, files_total); + + signals_unblock(); + } + + return; +} + + +extern void +message_filename(const char *src_name) +{ + // Start numbering the files starting from one. + ++files_pos; + filename = src_name; + + if (verbosity >= V_VERBOSE + && (progress_automatic || opt_mode == MODE_LIST)) + print_filename(); + else + current_filename_printed = false; + + return; +} + + +extern void +message_progress_start(lzma_stream *strm, bool is_passthru, uint64_t in_size) +{ + // Store the pointer to the lzma_stream used to do the coding. + // It is needed to find out the position in the stream. + progress_strm = strm; + progress_is_from_passthru = is_passthru; + + // Store the expected size of the file. If we aren't printing any + // statistics, then is will be unused. But since it is possible + // that the user sends us a signal to show statistics, we need + // to have it available anyway. + expected_in_size = in_size; + + // Indicate that progress info may need to be printed before + // printing error messages. + progress_started = true; + + // If progress indicator is wanted, print the filename and possibly + // the file count now. + if (verbosity >= V_VERBOSE && progress_automatic) { + // Start the timer to display the first progress message + // after one second. An alternative would be to show the + // first message almost immediately, but delaying by one + // second looks better to me, since extremely early + // progress info is pretty much useless. +#ifdef SIGALRM + // First disable a possibly existing alarm. + alarm(0); + progress_needs_updating = false; + alarm(1); +#else + progress_needs_updating = true; + progress_next_update = 1000; +#endif + } + + return; +} + + +/// Make the string indicating completion percentage. +static const char * +progress_percentage(uint64_t in_pos) +{ + // If the size of the input file is unknown or the size told us is + // clearly wrong since we have processed more data than the alleged + // size of the file, show a static string indicating that we have + // no idea of the completion percentage. + if (expected_in_size == 0 || in_pos > expected_in_size) + return "--- %"; + + // Never show 100.0 % before we actually are finished. + double percentage = (double)(in_pos) / (double)(expected_in_size) + * 99.9; + + // Use big enough buffer to hold e.g. a multibyte decimal point. + static char buf[16]; + snprintf(buf, sizeof(buf), "%.1f %%", percentage); + + return buf; +} + + +/// Make the string containing the amount of input processed, amount of +/// output produced, and the compression ratio. +static const char * +progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final) +{ + // Use big enough buffer to hold e.g. a multibyte thousand separators. + static char buf[128]; + char *pos = buf; + size_t left = sizeof(buf); + + // Print the sizes. If this the final message, use more reasonable + // units than MiB if the file was small. + const enum nicestr_unit unit_min = final ? NICESTR_B : NICESTR_MIB; + my_snprintf(&pos, &left, "%s / %s", + uint64_to_nicestr(compressed_pos, + unit_min, NICESTR_TIB, false, 0), + uint64_to_nicestr(uncompressed_pos, + unit_min, NICESTR_TIB, false, 1)); + + // Avoid division by zero. If we cannot calculate the ratio, set + // it to some nice number greater than 10.0 so that it gets caught + // in the next if-clause. + const double ratio = uncompressed_pos > 0 + ? (double)(compressed_pos) / (double)(uncompressed_pos) + : 16.0; + + // If the ratio is very bad, just indicate that it is greater than + // 9.999. This way the length of the ratio field stays fixed. + if (ratio > 9.999) + snprintf(pos, left, " > %.3f", 9.999); + else + snprintf(pos, left, " = %.3f", ratio); + + return buf; +} + + +/// Make the string containing the processing speed of uncompressed data. +static const char * +progress_speed(uint64_t uncompressed_pos, uint64_t elapsed) +{ + // Don't print the speed immediately, since the early values look + // somewhat random. + if (elapsed < 3000) + return ""; + + // The first character of KiB/s, MiB/s, or GiB/s: + static const char unit[] = { 'K', 'M', 'G' }; + + size_t unit_index = 0; + + // Calculate the speed as KiB/s. + double speed = (double)(uncompressed_pos) + / ((double)(elapsed) * (1024.0 / 1000.0)); + + // Adjust the unit of the speed if needed. + while (speed > 999.0) { + speed /= 1024.0; + if (++unit_index == ARRAY_SIZE(unit)) + return ""; // Way too fast ;-) + } + + // Use decimal point only if the number is small. Examples: + // - 0.1 KiB/s + // - 9.9 KiB/s + // - 99 KiB/s + // - 999 KiB/s + // Use big enough buffer to hold e.g. a multibyte decimal point. + static char buf[16]; + snprintf(buf, sizeof(buf), "%.*f %ciB/s", + speed > 9.9 ? 0 : 1, speed, unit[unit_index]); + return buf; +} + + +/// Make a string indicating elapsed time. The format is either +/// M:SS or H:MM:SS depending on if the time is an hour or more. +static const char * +progress_time(uint64_t mseconds) +{ + // 9999 hours = 416 days + static char buf[sizeof("9999:59:59")]; + + // 32-bit variable is enough for elapsed time (136 years). + uint32_t seconds = (uint32_t)(mseconds / 1000); + + // Don't show anything if the time is zero or ridiculously big. + if (seconds == 0 || seconds > ((9999 * 60) + 59) * 60 + 59) + return ""; + + uint32_t minutes = seconds / 60; + seconds %= 60; + + if (minutes >= 60) { + const uint32_t hours = minutes / 60; + minutes %= 60; + snprintf(buf, sizeof(buf), + "%" PRIu32 ":%02" PRIu32 ":%02" PRIu32, + hours, minutes, seconds); + } else { + snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32, + minutes, seconds); + } + + return buf; +} + + +/// Return a string containing estimated remaining time when +/// reasonably possible. +static const char * +progress_remaining(uint64_t in_pos, uint64_t elapsed) +{ + // Don't show the estimated remaining time when it wouldn't + // make sense: + // - Input size is unknown. + // - Input has grown bigger since we started (de)compressing. + // - We haven't processed much data yet, so estimate would be + // too inaccurate. + // - Only a few seconds has passed since we started (de)compressing, + // so estimate would be too inaccurate. + if (expected_in_size == 0 || in_pos > expected_in_size + || in_pos < (UINT64_C(1) << 19) || elapsed < 8000) + return ""; + + // Calculate the estimate. Don't give an estimate of zero seconds, + // since it is possible that all the input has been already passed + // to the library, but there is still quite a bit of output pending. + uint32_t remaining = (uint32_t)((double)(expected_in_size - in_pos) + * ((double)(elapsed) / 1000.0) / (double)(in_pos)); + if (remaining < 1) + remaining = 1; + + static char buf[sizeof("9 h 55 min")]; + + // Select appropriate precision for the estimated remaining time. + if (remaining <= 10) { + // A maximum of 10 seconds remaining. + // Show the number of seconds as is. + snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); + + } else if (remaining <= 50) { + // A maximum of 50 seconds remaining. + // Round up to the next multiple of five seconds. + remaining = (remaining + 4) / 5 * 5; + snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); + + } else if (remaining <= 590) { + // A maximum of 9 minutes and 50 seconds remaining. + // Round up to the next multiple of ten seconds. + remaining = (remaining + 9) / 10 * 10; + snprintf(buf, sizeof(buf), "%" PRIu32 " min %" PRIu32 " s", + remaining / 60, remaining % 60); + + } else if (remaining <= 59 * 60) { + // A maximum of 59 minutes remaining. + // Round up to the next multiple of a minute. + remaining = (remaining + 59) / 60; + snprintf(buf, sizeof(buf), "%" PRIu32 " min", remaining); + + } else if (remaining <= 9 * 3600 + 50 * 60) { + // A maximum of 9 hours and 50 minutes left. + // Round up to the next multiple of ten minutes. + remaining = (remaining + 599) / 600 * 10; + snprintf(buf, sizeof(buf), "%" PRIu32 " h %" PRIu32 " min", + remaining / 60, remaining % 60); + + } else if (remaining <= 23 * 3600) { + // A maximum of 23 hours remaining. + // Round up to the next multiple of an hour. + remaining = (remaining + 3599) / 3600; + snprintf(buf, sizeof(buf), "%" PRIu32 " h", remaining); + + } else if (remaining <= 9 * 24 * 3600 + 23 * 3600) { + // A maximum of 9 days and 23 hours remaining. + // Round up to the next multiple of an hour. + remaining = (remaining + 3599) / 3600; + snprintf(buf, sizeof(buf), "%" PRIu32 " d %" PRIu32 " h", + remaining / 24, remaining % 24); + + } else if (remaining <= 999 * 24 * 3600) { + // A maximum of 999 days remaining. ;-) + // Round up to the next multiple of a day. + remaining = (remaining + 24 * 3600 - 1) / (24 * 3600); + snprintf(buf, sizeof(buf), "%" PRIu32 " d", remaining); + + } else { + // The estimated remaining time is too big. Don't show it. + return ""; + } + + return buf; +} + + +/// Get how much uncompressed and compressed data has been processed. +static void +progress_pos(uint64_t *in_pos, + uint64_t *compressed_pos, uint64_t *uncompressed_pos) +{ + uint64_t out_pos; + if (progress_is_from_passthru) { + // In passthru mode the progress info is in total_in/out but + // the *progress_strm itself isn't initialized and thus we + // cannot use lzma_get_progress(). + *in_pos = progress_strm->total_in; + out_pos = progress_strm->total_out; + } else { + lzma_get_progress(progress_strm, in_pos, &out_pos); + } + + // It cannot have processed more input than it has been given. + assert(*in_pos <= progress_strm->total_in); + + // It cannot have produced more output than it claims to have ready. + assert(out_pos >= progress_strm->total_out); + + if (opt_mode == MODE_COMPRESS) { + *compressed_pos = out_pos; + *uncompressed_pos = *in_pos; + } else { + *compressed_pos = *in_pos; + *uncompressed_pos = out_pos; + } + + return; +} + + +extern void +message_progress_update(void) +{ + if (!progress_needs_updating) + return; + + // Calculate how long we have been processing this file. + const uint64_t elapsed = mytime_get_elapsed(); + +#ifndef SIGALRM + if (progress_next_update > elapsed) + return; + + progress_next_update = elapsed + 1000; +#endif + + // Get our current position in the stream. + uint64_t in_pos; + uint64_t compressed_pos; + uint64_t uncompressed_pos; + progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); + + // Block signals so that fprintf() doesn't get interrupted. + signals_block(); + + // Print the filename if it hasn't been printed yet. + if (!current_filename_printed) + print_filename(); + + // Print the actual progress message. The idea is that there is at + // least three spaces between the fields in typical situations, but + // even in rare situations there is at least one space. + const char *cols[5] = { + progress_percentage(in_pos), + progress_sizes(compressed_pos, uncompressed_pos, false), + progress_speed(uncompressed_pos, elapsed), + progress_time(elapsed), + progress_remaining(in_pos, elapsed), + }; + fprintf(stderr, "\r %*s %*s %*s %10s %10s\r", + tuklib_mbstr_fw(cols[0], 6), cols[0], + tuklib_mbstr_fw(cols[1], 35), cols[1], + tuklib_mbstr_fw(cols[2], 9), cols[2], + cols[3], + cols[4]); + +#ifdef SIGALRM + // Updating the progress info was finished. Reset + // progress_needs_updating to wait for the next SIGALRM. + // + // NOTE: This has to be done before alarm(1) or with (very) bad + // luck we could be setting this to false after the alarm has already + // been triggered. + progress_needs_updating = false; + + if (verbosity >= V_VERBOSE && progress_automatic) { + // Mark that the progress indicator is active, so if an error + // occurs, the error message gets printed cleanly. + progress_active = true; + + // Restart the timer so that progress_needs_updating gets + // set to true after about one second. + alarm(1); + } else { + // The progress message was printed because user had sent us + // SIGALRM. In this case, each progress message is printed + // on its own line. + fputc('\n', stderr); + } +#else + // When SIGALRM isn't supported and we get here, it's always due to + // automatic progress update. We set progress_active here too like + // described above. + assert(verbosity >= V_VERBOSE); + assert(progress_automatic); + progress_active = true; +#endif + + signals_unblock(); + + return; +} + + +static void +progress_flush(bool finished) +{ + if (!progress_started || verbosity < V_VERBOSE) + return; + + uint64_t in_pos; + uint64_t compressed_pos; + uint64_t uncompressed_pos; + progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); + + // Avoid printing intermediate progress info if some error occurs + // in the beginning of the stream. (If something goes wrong later in + // the stream, it is sometimes useful to tell the user where the + // error approximately occurred, especially if the error occurs + // after a time-consuming operation.) + if (!finished && !progress_active + && (compressed_pos == 0 || uncompressed_pos == 0)) + return; + + progress_active = false; + + const uint64_t elapsed = mytime_get_elapsed(); + + signals_block(); + + // When using the auto-updating progress indicator, the final + // statistics are printed in the same format as the progress + // indicator itself. + if (progress_automatic) { + const char *cols[5] = { + finished ? "100 %" : progress_percentage(in_pos), + progress_sizes(compressed_pos, uncompressed_pos, true), + progress_speed(uncompressed_pos, elapsed), + progress_time(elapsed), + finished ? "" : progress_remaining(in_pos, elapsed), + }; + fprintf(stderr, "\r %*s %*s %*s %10s %10s\n", + tuklib_mbstr_fw(cols[0], 6), cols[0], + tuklib_mbstr_fw(cols[1], 35), cols[1], + tuklib_mbstr_fw(cols[2], 9), cols[2], + cols[3], + cols[4]); + } else { + // The filename is always printed. + fprintf(stderr, _("%s: "), tuklib_mask_nonprint(filename)); + + // Percentage is printed only if we didn't finish yet. + if (!finished) { + // Don't print the percentage when it isn't known + // (starts with a dash). + const char *percentage = progress_percentage(in_pos); + if (percentage[0] != '-') + fprintf(stderr, "%s, ", percentage); + } + + // Size information is always printed. + fprintf(stderr, "%s", progress_sizes( + compressed_pos, uncompressed_pos, true)); + + // The speed and elapsed time aren't always shown. + const char *speed = progress_speed(uncompressed_pos, elapsed); + if (speed[0] != '\0') + fprintf(stderr, ", %s", speed); + + const char *elapsed_str = progress_time(elapsed); + if (elapsed_str[0] != '\0') + fprintf(stderr, ", %s", elapsed_str); + + fputc('\n', stderr); + } + + signals_unblock(); + + return; +} + + +extern void +message_progress_end(bool success) +{ + assert(progress_started); + progress_flush(success); + progress_started = false; + return; +} + + +static void +vmessage(enum message_verbosity v, const char *fmt, va_list ap) +{ + if (v <= verbosity) { + signals_block(); + + progress_flush(false); + + // TRANSLATORS: This is the program name in the beginning + // of the line in messages. Usually it becomes "xz: ". + // This is a translatable string because French needs + // a space before a colon. + fprintf(stderr, _("%s: "), progname); + +#ifdef __clang__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif + vfprintf(stderr, fmt, ap); +#ifdef __clang__ +# pragma GCC diagnostic pop +#endif + + fputc('\n', stderr); + + signals_unblock(); + } + + return; +} + + +extern void +message(enum message_verbosity v, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(v, fmt, ap); + va_end(ap); + return; +} + + +extern void +message_warning(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_WARNING, fmt, ap); + va_end(ap); + + set_exit_status(E_WARNING); + return; +} + + +extern void +message_error(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + set_exit_status(E_ERROR); + return; +} + + +extern void +message_fatal(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + tuklib_exit(E_ERROR, E_ERROR, false); +} + + +extern void +message_bug(void) +{ + message_fatal(_("Internal error (bug)")); +} + + +extern void +message_signal_handler(void) +{ + message_fatal(_("Cannot establish signal handlers")); +} + + +extern const char * +message_strm(lzma_ret code) +{ + switch (code) { + case LZMA_NO_CHECK: + return _("No integrity check; not verifying file integrity"); + + case LZMA_UNSUPPORTED_CHECK: + return _("Unsupported type of integrity check; " + "not verifying file integrity"); + + case LZMA_MEM_ERROR: + return strerror(ENOMEM); + + case LZMA_MEMLIMIT_ERROR: + return _("Memory usage limit reached"); + + case LZMA_FORMAT_ERROR: + return _("File format not recognized"); + + case LZMA_OPTIONS_ERROR: + return _("Unsupported options"); + + case LZMA_DATA_ERROR: + return _("Compressed data is corrupt"); + + case LZMA_BUF_ERROR: + return _("Unexpected end of input"); + + case LZMA_OK: + case LZMA_STREAM_END: + case LZMA_GET_CHECK: + case LZMA_PROG_ERROR: + case LZMA_SEEK_NEEDED: + case LZMA_RET_INTERNAL1: + case LZMA_RET_INTERNAL2: + case LZMA_RET_INTERNAL3: + case LZMA_RET_INTERNAL4: + case LZMA_RET_INTERNAL5: + case LZMA_RET_INTERNAL6: + case LZMA_RET_INTERNAL7: + case LZMA_RET_INTERNAL8: + // Without "default", compiler will warn if new constants + // are added to lzma_ret, it is not too easy to forget to + // add the new constants to this function. + break; + } + + return _("Internal error (bug)"); +} + + +extern void +message_mem_needed(enum message_verbosity v, uint64_t memusage) +{ + if (v > verbosity) + return; + + // Convert memusage to MiB, rounding up to the next full MiB. + // This way the user can always use the displayed usage as + // the new memory usage limit. (If we rounded to the nearest, + // the user might need to +1 MiB to get high enough limit.) + memusage = round_up_to_mib(memusage); + + uint64_t memlimit = hardware_memlimit_get(opt_mode); + + // Handle the case when there is no memory usage limit. + // This way we don't print a weird message with a huge number. + if (memlimit == UINT64_MAX) { + message(v, _("%s MiB of memory is required. " + "The limiter is disabled."), + uint64_to_str(memusage, 0)); + return; + } + + // With US-ASCII: + // 2^64 with thousand separators + " MiB" suffix + '\0' = 26 + 4 + 1 + // But there may be multibyte chars so reserve enough space. + char memlimitstr[128]; + + // Show the memory usage limit as MiB unless it is less than 1 MiB. + // This way it's easy to notice errors where one has typed + // --memory=123 instead of --memory=123MiB. + if (memlimit < (UINT32_C(1) << 20)) { + snprintf(memlimitstr, sizeof(memlimitstr), "%s B", + uint64_to_str(memlimit, 1)); + } else { + // Round up just like with memusage. If this function is + // called for informational purposes (to just show the + // current usage and limit), we should never show that + // the usage is higher than the limit, which would give + // a false impression that the memory usage limit isn't + // properly enforced. + snprintf(memlimitstr, sizeof(memlimitstr), "%s MiB", + uint64_to_str(round_up_to_mib(memlimit), 1)); + } + + message(v, _("%s MiB of memory is required. The limit is %s."), + uint64_to_str(memusage, 0), memlimitstr); + + return; +} + + +extern void +message_filters_show(enum message_verbosity v, const lzma_filter *filters) +{ + if (v > verbosity) + return; + + char *buf; + const lzma_ret ret = lzma_str_from_filters(&buf, filters, + LZMA_STR_ENCODER | LZMA_STR_GETOPT_LONG, NULL); + if (ret != LZMA_OK) + message_fatal("%s", message_strm(ret)); + + fprintf(stderr, _("%s: Filter chain: %s\n"), progname, buf); + free(buf); + return; +} + + +extern void +message_try_help(void) +{ + // Print this with V_WARNING instead of V_ERROR to prevent it from + // showing up when --quiet has been specified. + message(V_WARNING, _("Try '%s --help' for more information."), + progname); + return; +} + + +extern void +message_version(void) +{ + // It is possible that liblzma version is different than the command + // line tool version, so print both. + if (opt_robot) { + printf("XZ_VERSION=%" PRIu32 "\nLIBLZMA_VERSION=%" PRIu32 "\n", + LZMA_VERSION, lzma_version_number()); + } else { + printf("xz (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"); + printf("liblzma %s\n", lzma_version_string()); + } + + tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); +} + + +static void +detect_wrapping_errors(int error_mask) +{ +#ifndef NDEBUG + // This might help in catching problematic strings in translations. + // It's a debug message so don't translate this. + if (error_mask & TUKLIB_WRAP_WARN_OVERLONG) + message_fatal("The help text contains overlong lines"); +#endif + + if (error_mask & ~TUKLIB_WRAP_WARN_OVERLONG) + message_fatal(_("Error printing the help text " + "(error code %d)"), error_mask); + + return; +} + + +extern void +message_help(bool long_help) +{ + static const struct tuklib_wrap_opt wrap0 = { 0, 0, 0, 0, 79 }; + static const struct tuklib_wrap_opt wrap1 = { 1, 1, 1, 1, 79 }; + static const struct tuklib_wrap_opt wrap2 = { 2, 2, 22, 22, 79 }; + static const struct tuklib_wrap_opt wrap3 = { 24, 24, 36, 36, 79 }; + + // Accumulated error codes from tuklib_wraps() and tuklib_wrapf() + int e = 0; + + printf(_("Usage: %s [OPTION]... [FILE]...\n"), progname); + e |= tuklib_wraps(stdout, &wrap0, + W_("Compress or decompress FILEs in the .xz format.")); + putchar('\n'); + + e |= tuklib_wraps(stdout, &wrap0, + W_("Mandatory arguments to long options are " + "mandatory for short options too.")); + putchar('\n'); + + if (long_help) { + e |= tuklib_wraps(stdout, &wrap1, W_("Operation mode:")); + putchar('\n'); + } + + e |= tuklib_wrapf(stdout, &wrap2, + "-z, --compress\v%s\r" + "-d, --decompress\v%s\r" + "-t, --test\v%s\r" + "-l, --list\v%s", + W_("force compression"), + W_("force decompression"), + W_("test compressed file integrity"), + W_("list information about .xz files")); + + if (long_help) { + putchar('\n'); + e |= tuklib_wraps(stdout, &wrap1, W_("Operation modifiers:")); + putchar('\n'); + } + + e |= tuklib_wrapf(stdout, &wrap2, + "-k, --keep\v%s\r" + "-f, --force\v%s\r" + "-c, --stdout\v%s", + W_("keep (don't delete) input files"), + W_("force overwrite of output file and (de)compress links"), + W_("write to standard output and don't delete input files")); + // NOTE: --to-stdout isn't included above because it's not + // the recommended spelling. It was copied from gzip but other + // compressors with gzip-like syntax don't support it. + + if (long_help) { + e |= tuklib_wrapf(stdout, &wrap2, + " --no-sync\v%s\r" + " --single-stream\v%s\r" + " --no-sparse\v%s\r" + "-S, --suffix=%s\v%s\r" + " --files[=%s]\v%s\r" + " --files0[=%s]\v%s\r", + W_("don't synchronize the output file to the storage " + "device before removing the input file"), + W_("decompress only the first stream, and silently " + "ignore possible remaining input data"), + W_("do not create sparse files when decompressing"), + _(".SUF"), + W_("use the suffix '.SUF' on compressed files"), + _("FILE"), + W_("read filenames to process from FILE; " + "if FILE is omitted, " + "filenames are read from the standard input; " + "filenames must be terminated with " + "the newline character"), + _("FILE"), + W_("like --files but use the null character as " + "terminator")); + + e |= tuklib_wraps(stdout, &wrap1, + W_("Basic file format and compression options:")); + + e |= tuklib_wrapf(stdout, &wrap2, + "\n" + "-F, --format=%s\v%s\r" + "-C, --check=%s\v%s\r" + " --ignore-check\v%s", + _("FORMAT"), + W_("file format to encode or decode; possible values " + "are 'auto' (default), 'xz', 'lzma', 'lzip', " + "and 'raw'"), + _("NAME"), + W_("integrity check type: 'none' (use with caution), " + "'crc32', 'crc64' (default), or 'sha256'"), + W_("don't verify the integrity check when " + "decompressing")); + } + + e |= tuklib_wrapf(stdout, &wrap2, + "-0 ... -9\v%s\r" + "-e, --extreme\v%s\r" + "-T, --threads=%s\v%s", + W_("compression preset; default is 6; take compressor *and* " + "decompressor memory usage into account before " + "using 7-9!"), + W_("try to improve compression ratio by using more CPU time; " + "does not affect decompressor memory requirements"), + // TRANSLATORS: Short for NUMBER. A longer string is fine but + // wider than 5 columns makes --long-help a few lines longer. + _("NUM"), + W_("use at most NUM threads; the default is 0 which uses " + "as many threads as there are processor cores")); + + if (long_help) { + e |= tuklib_wrapf(stdout, &wrap2, + " --block-size=%s\v%s\r" + " --block-list=%s\v%s\r" + " --flush-timeout=%s\v%s", + _("SIZE"), + W_("start a new .xz block after every SIZE bytes " + "of input; use this to set the block size " + "for threaded compression"), + _("BLOCKS"), + W_("start a new .xz block after the given " + "comma-separated intervals of uncompressed " + "data; optionally, specify a " + "filter chain number (0-9) followed by " + "a ':' before the uncompressed data size"), + _("NUM"), + W_("when compressing, if more than NUM " + "milliseconds has passed since the previous " + "flush and reading more input would block, " + "all pending data is flushed out")); + + e |= tuklib_wrapf(stdout, &wrap2, + " --memlimit-compress=%s\n" + " --memlimit-decompress=%s\n" + " --memlimit-mt-decompress=%s\n" + "-M, --memlimit=%s\v%s\r" + " --no-adjust\v%s", + _("LIMIT"), + _("LIMIT"), + _("LIMIT"), + _("LIMIT"), + // xgettext:no-c-format + W_("set memory usage limit for compression, " + "decompression, threaded decompression, " + "or all of these; LIMIT is in " + "bytes, % of RAM, or 0 for defaults"), + W_("if compression settings exceed the " + "memory usage limit, " + "give an error instead of adjusting " + "the settings downwards")); + } + + if (long_help) { + putchar('\n'); + + e |= tuklib_wraps(stdout, &wrap1, + W_("Custom filter chain for compression " + "(an alternative to using presets):")); + + e |= tuklib_wrapf(stdout, &wrap2, + "\n" + "--filters=%s\v%s\r" + "--filters1=%s ... --filters9=%s\v%s\r" + "--filters-help\v%s", + _("FILTERS"), + W_("set the filter chain using the " + "liblzma filter string syntax; " + "use --filters-help for more information"), + _("FILTERS"), + _("FILTERS"), + W_("set additional filter chains using the " + "liblzma filter string syntax to use " + "with --block-list"), + W_("display more information about the " + "liblzma filter string syntax and exit")); + +#if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \ + || defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) + e |= tuklib_wrapf(stdout, &wrap2, + "\n" + "--lzma1[=%s]\n" + "--lzma2[=%s]\v%s", + // TRANSLATORS: Short for OPTIONS. + _("OPTS"), + _("OPTS"), + // TRANSLATORS: Use semicolon (or its fullwidth form) + // in "(valid values; default)" even if it is weird in + // your language. There are non-translatable strings + // that look like "(foo, bar, baz; foo)" which list + // the supported values and the default value. + W_("LZMA1 or LZMA2; OPTS is a comma-separated list " + "of zero or more of the following options " + "(valid values; default):")); + + e |= tuklib_wrapf(stdout, &wrap3, + "preset=%s\v%s (0-9[e])\r" + "dict=%s\v%s \b(4KiB - 1536MiB; 8MiB)\b\r" + "lc=%s\v%s \b(0-4; 3)\b\r" + "lp=%s\v%s \b(0-4; 0)\b\r" + "pb=%s\v%s \b(0-4; 2)\b\r" + "mode=%s\v%s (fast, normal; normal)\r" + "nice=%s\v%s \b(2-273; 64)\b\r" + "mf=%s\v%s (hc3, hc4, bt2, bt3, bt4; bt4)\r" + "depth=%s\v%s", + // TRANSLATORS: Short for PRESET. A longer string is + // fine but wider than 4 columns makes --long-help + // one line longer. + _("PRE"), + W_("reset options to a preset"), + _("NUM"), W_("dictionary size"), + _("NUM"), + // TRANSLATORS: The word "literal" in "literal context + // bits" means how many "context bits" to use when + // encoding literals. A literal is a single 8-bit + // byte. It doesn't mean "literally" here. + W_("number of literal context bits"), + _("NUM"), W_("number of literal position bits"), + _("NUM"), W_("number of position bits"), + _("MODE"), W_("compression mode"), + _("NUM"), W_("nice length of a match"), + _("NAME"), W_("match finder"), + _("NUM"), W_("maximum search depth; " + "0=automatic (default)")); +#endif + + e |= tuklib_wrapf(stdout, &wrap2, + "\n" + "--x86[=%s]\v%s\r" + "--arm[=%s]\v%s\r" + "--armthumb[=%s]\v%s\r" + "--arm64[=%s]\v%s\r" + "--powerpc[=%s]\v%s\r" + "--ia64[=%s]\v%s\r" + "--sparc[=%s]\v%s\r" + "--riscv[=%s]\v%s\r" + "\v%s", + _("OPTS"), + W_("x86 BCJ filter (32-bit and 64-bit)"), + _("OPTS"), + W_("ARM BCJ filter"), + _("OPTS"), + W_("ARM-Thumb BCJ filter"), + _("OPTS"), + W_("ARM64 BCJ filter"), + _("OPTS"), + W_("PowerPC BCJ filter (big endian only)"), + _("OPTS"), + W_("IA-64 (Itanium) BCJ filter"), + _("OPTS"), + W_("SPARC BCJ filter"), + _("OPTS"), + W_("RISC-V BCJ filter"), + W_("Valid OPTS for all BCJ filters:")); + e |= tuklib_wrapf(stdout, &wrap3, + "start=%s\v%s", + _("NUM"), + W_("start offset for conversions (default=0)")); + +#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) + e |= tuklib_wrapf(stdout, &wrap2, + "\n" + "--delta[=%s]\v%s", + _("OPTS"), + W_("Delta filter; valid OPTS " + "(valid values; default):")); + e |= tuklib_wrapf(stdout, &wrap3, + "dist=%s\v%s \b(1-256; 1)\b", + _("NUM"), + W_("distance between bytes being subtracted " + "from each other")); +#endif + } + + if (long_help) { + putchar('\n'); + e |= tuklib_wraps(stdout, &wrap1, W_("Other options:")); + putchar('\n'); + } + + e |= tuklib_wrapf(stdout, &wrap2, + "-q, --quiet\v%s\r" + "-v, --verbose\v%s", + W_("suppress warnings; specify twice to suppress errors too"), + W_("be verbose; specify twice for even more verbose")); + + if (long_help) { + e |= tuklib_wrapf(stdout, &wrap2, + "-Q, --no-warn\v%s\r" + " --robot\v%s\r" + "\n" + " --info-memory\v%s\r" + "-h, --help\v%s\r" + "-H, --long-help\v%s", + W_("make warnings not affect the exit status"), + W_("use machine-parsable messages (useful for scripts)"), + W_("display the total amount of RAM and the currently active " + "memory usage limits, and exit"), + W_("display the short help (lists only the basic options)"), + W_("display this long help and exit")); + } else { + e |= tuklib_wrapf(stdout, &wrap2, + "-h, --help\v%s\r" + "-H, --long-help\v%s", + W_("display this short help and exit"), + W_("display the long help (lists also the advanced options)")); + } + + e |= tuklib_wrapf(stdout, &wrap2, "-V, --version\v%s", + W_("display the version number and exit")); + + putchar('\n'); + e |= tuklib_wraps(stdout, &wrap0, + W_("With no FILE, or when FILE is -, read standard input.")); + putchar('\n'); + + e |= tuklib_wrapf(stdout, &wrap0, + // TRANSLATORS: This message indicates the bug reporting + // address for this package. Please add another line saying + // "\nReport translation bugs to <...>." with the email or WWW + // address for translation bugs. Thanks! + W_("Report bugs to <%s> (in English or Finnish)."), + PACKAGE_BUGREPORT); + + e |= tuklib_wrapf(stdout, &wrap0, + // TRANSLATORS: The first %s is the name of this software. + // The second <%s> is an URL. + W_("%s home page: <%s>"), PACKAGE_NAME, PACKAGE_URL); + +#if LZMA_VERSION_STABILITY != LZMA_VERSION_STABILITY_STABLE + e |= tuklib_wraps(stdout, &wrap0, W_( +"THIS IS A DEVELOPMENT VERSION NOT INTENDED FOR PRODUCTION USE.")); +#endif + + detect_wrapping_errors(e); + tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); +} + + +extern void +message_filters_help(void) +{ + static const struct tuklib_wrap_opt wrap = { .right_margin = 76 }; + + char *encoder_options; + if (lzma_str_list_filters(&encoder_options, LZMA_VLI_UNKNOWN, + LZMA_STR_ENCODER, NULL) != LZMA_OK) + message_bug(); + + if (!opt_robot) { + int e = tuklib_wrapf(stdout, &wrap, +W_("Filter chains are set using the --filters=FILTERS or " +"--filters1=FILTERS ... --filters9=FILTERS options. " +"Each filter in the chain can be separated by spaces or '--'. " +"Alternatively a preset %s can be specified instead of a filter chain."), + "<0-9>[e]"); + putchar('\n'); + e |= tuklib_wraps(stdout, &wrap, + W_("The supported filters and their options are:")); + + detect_wrapping_errors(e); + } + + puts(encoder_options); + + tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); +} diff --git a/src/native/external/xz/src/xz/message.h b/src/native/external/xz/src/xz/message.h new file mode 100644 index 00000000000000..5f0af7f7e8a522 --- /dev/null +++ b/src/native/external/xz/src/xz/message.h @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.h +/// \brief Printing messages to stderr +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +/// Verbosity levels +enum message_verbosity { + V_SILENT, ///< No messages + V_ERROR, ///< Only error messages + V_WARNING, ///< Errors and warnings + V_VERBOSE, ///< Errors, warnings, and verbose statistics + V_DEBUG, ///< Very verbose +}; + + +/// \brief Signals used for progress message handling +extern const int message_progress_sigs[]; + + +/// \brief Initializes the progress message functions +/// +/// message_fatal() and such can be called even before message_init() +/// has been called. +/// +/// If an error occurs, this function doesn't return. +/// +extern void message_init(void); + + +/// Increase verbosity level by one step unless it was at maximum. +extern void message_verbosity_increase(void); + +/// Decrease verbosity level by one step unless it was at minimum. +extern void message_verbosity_decrease(void); + +/// Get the current verbosity level. +extern enum message_verbosity message_verbosity_get(void); + + +/// \brief Print a message if verbosity level is at least "verbosity" +/// +/// This doesn't touch the exit status. +lzma_attribute((__format__(__printf__, 2, 3))) +extern void message(enum message_verbosity verbosity, const char *fmt, ...); + + +/// \brief Prints a warning and possibly sets exit status +/// +/// The message is printed only if verbosity level is at least V_WARNING. +/// The exit status is set to WARNING unless it was already at ERROR. +lzma_attribute((__format__(__printf__, 1, 2))) +extern void message_warning(const char *fmt, ...); + + +/// \brief Prints an error message and sets exit status +/// +/// The message is printed only if verbosity level is at least V_ERROR. +/// The exit status is set to ERROR. +lzma_attribute((__format__(__printf__, 1, 2))) +extern void message_error(const char *fmt, ...); + + +/// \brief Prints an error message and exits with EXIT_ERROR +/// +/// The message is printed only if verbosity level is at least V_ERROR. +tuklib_attr_noreturn +lzma_attribute((__format__(__printf__, 1, 2))) +extern void message_fatal(const char *fmt, ...); + + +/// Print an error message that an internal error occurred and exit with +/// EXIT_ERROR. +tuklib_attr_noreturn +extern void message_bug(void); + + +/// Print a message that establishing signal handlers failed, and exit with +/// exit status ERROR. +tuklib_attr_noreturn +extern void message_signal_handler(void); + + +/// Convert lzma_ret to a string. +extern const char *message_strm(lzma_ret code); + + +/// Display how much memory was needed and how much the limit was. +extern void message_mem_needed(enum message_verbosity v, uint64_t memusage); + + +/// Print the filter chain. +extern void message_filters_show( + enum message_verbosity v, const lzma_filter *filters); + + +/// Print a message that user should try --help. +extern void message_try_help(void); + + +/// Prints the version number to stdout and exits with exit status SUCCESS. +tuklib_attr_noreturn +extern void message_version(void); + + +/// Print the help message. +tuklib_attr_noreturn +extern void message_help(bool long_help); + + +/// Prints a help message specifically for using the --filters and +/// --filtersX command line options. +tuklib_attr_noreturn +extern void message_filters_help(void); + + +/// \brief Set the total number of files to be processed +/// +/// Standard input is counted as a file here. This is used when printing +/// the filename via message_filename(). +extern void message_set_files(unsigned int files); + + +/// \brief Set the name of the current file and possibly print it too +/// +/// The name is printed immediately if --list was used or if --verbose +/// was used and stderr is a terminal. Even when the filename isn't printed, +/// it is stored so that it can be printed later if needed for progress +/// messages. +extern void message_filename(const char *src_name); + + +/// \brief Start progress info handling +/// +/// message_filename() must be called before this function to set +/// the filename. +/// +/// This must be paired with a call to message_progress_end() before the +/// given *strm becomes invalid. +/// +/// \param strm Pointer to lzma_stream used for the coding. +/// \param is_passthru +/// If true, we are copying input to output without +/// encoding or decoding, and thus cannot use +/// lzma_get_progress(). +/// \param in_size Size of the input file, or zero if unknown. +/// +extern void message_progress_start(lzma_stream *strm, + bool is_passthru, uint64_t in_size); + + +/// Update the progress info if in verbose mode and enough time has passed +/// since the previous update. This can be called only when +/// message_progress_start() has already been used. +extern void message_progress_update(void); + + +/// \brief Finishes the progress message if we were in verbose mode +/// +/// \param finished True if the whole stream was successfully coded +/// and output written to the output stream. +/// +extern void message_progress_end(bool finished); diff --git a/src/native/external/xz/src/xz/mytime.c b/src/native/external/xz/src/xz/mytime.c new file mode 100644 index 00000000000000..c603051dcdaed3 --- /dev/null +++ b/src/native/external/xz/src/xz/mytime.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file mytime.c +/// \brief Time handling functions +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#if defined(MYTHREAD_VISTA) || defined(_MSC_VER) + // Nothing +#elif defined(HAVE_CLOCK_GETTIME) \ + && (!defined(__MINGW32__) || defined(MYTHREAD_POSIX)) +# include +#else +# include +#endif + +uint64_t opt_flush_timeout = 0; + +// start_time holds the time when the (de)compression was started. +// It's from mytime_now() and thus only useful for calculating relative +// time differences (elapsed time). start_time is initialized by calling +// mytime_set_start_time() and modified by mytime_sigtstp_handler(). +// +// When mytime_sigtstp_handler() is used, start_time is made volatile. +// I'm not sure if that is really required since access to it is guarded +// by signals_block()/signals_unblock() since accessing an uint64_t isn't +// atomic on all systems. But since the variable isn't accessed very +// frequently making it volatile doesn't hurt. +#ifdef USE_SIGTSTP_HANDLER +static volatile uint64_t start_time; +#else +static uint64_t start_time; +#endif + +static uint64_t next_flush; + + +/// \brief Get the current time as milliseconds +/// +/// It's relative to some point but not necessarily to the UNIX Epoch. +static uint64_t +mytime_now(void) +{ +#if defined(MYTHREAD_VISTA) || defined(_MSC_VER) + // Since there is no SIGALRM on Windows, this function gets + // called frequently when the progress indicator is in use. + // Progress indicator doesn't need high-resolution time. + // GetTickCount64() has very low overhead but needs at least WinVista. + // + // MinGW-w64 provides the POSIX functions clock_gettime() and + // gettimeofday() in a manner that allow xz to run on older + // than WinVista. If the threading method needs WinVista anyway, + // there's no reason to avoid a WinVista API here either. + return GetTickCount64(); + +#elif defined(HAVE_CLOCK_GETTIME) \ + && (!defined(__MINGW32__) || defined(MYTHREAD_POSIX)) + // MinGW-w64: clock_gettime() is defined in winpthreads but we need + // nothing else from winpthreads (unless, for some odd reason, POSIX + // threading has been selected). By avoiding clock_gettime(), we + // avoid the dependency on libwinpthread-1.dll or the need to link + // against the static version. The downside is that the fallback + // method, gettimeofday(), doesn't provide monotonic time. + struct timespec tv; + +# ifdef HAVE_CLOCK_MONOTONIC + // If CLOCK_MONOTONIC was available at compile time but for some + // reason isn't at runtime, fallback to CLOCK_REALTIME which + // according to POSIX is mandatory for all implementations. + static clockid_t clk_id = CLOCK_MONOTONIC; + while (clock_gettime(clk_id, &tv)) + clk_id = CLOCK_REALTIME; +# else + clock_gettime(CLOCK_REALTIME, &tv); +# endif + + return (uint64_t)tv.tv_sec * 1000 + (uint64_t)(tv.tv_nsec / 1000000); + +#else + struct timeval tv; + gettimeofday(&tv, NULL); + return (uint64_t)tv.tv_sec * 1000 + (uint64_t)(tv.tv_usec / 1000); +#endif +} + + +#ifdef USE_SIGTSTP_HANDLER +extern void +mytime_sigtstp_handler(int sig lzma_attribute((__unused__))) +{ + // Measure how long the process stays in the stopped state and add + // that amount to start_time. This way the progress indicator + // won't count the stopped time as elapsed time and the estimated + // remaining time won't be confused by the time spent in the + // stopped state. + // + // FIXME? Is raising SIGSTOP the correct thing to do? POSIX.1-2017 + // says that orphan processes shouldn't stop on SIGTSTP. So perhaps + // the most correct thing to do could be to revert to the default + // handler for SIGTSTP, unblock SIGTSTP, and then raise(SIGTSTP). + // It's quite a bit more complicated than just raising SIGSTOP though. + // + // The difference between raising SIGTSTP vs. SIGSTOP can be seen on + // the shell command line too by running "echo $?" after stopping + // a process but perhaps that doesn't matter. + const uint64_t t = mytime_now(); + raise(SIGSTOP); + start_time += mytime_now() - t; + return; +} +#endif + + +extern void +mytime_set_start_time(void) +{ +#ifdef USE_SIGTSTP_HANDLER + // Block the signals when accessing start_time so that we cannot + // end up with a garbage value. start_time is volatile but access + // to it isn't atomic at least on 32-bit systems. + signals_block(); +#endif + + start_time = mytime_now(); + +#ifdef USE_SIGTSTP_HANDLER + signals_unblock(); +#endif + + return; +} + + +extern uint64_t +mytime_get_elapsed(void) +{ +#ifdef USE_SIGTSTP_HANDLER + signals_block(); +#endif + + const uint64_t t = mytime_now() - start_time; + +#ifdef USE_SIGTSTP_HANDLER + signals_unblock(); +#endif + + return t; +} + + +extern void +mytime_set_flush_time(void) +{ + next_flush = mytime_now() + opt_flush_timeout; + return; +} + + +extern int +mytime_get_flush_timeout(void) +{ + if (opt_flush_timeout == 0 || opt_mode != MODE_COMPRESS) + return -1; + + const uint64_t now = mytime_now(); + if (now >= next_flush) + return 0; + + const uint64_t remaining = next_flush - now; + return remaining > INT_MAX ? INT_MAX : (int)remaining; +} diff --git a/src/native/external/xz/src/xz/mytime.h b/src/native/external/xz/src/xz/mytime.h new file mode 100644 index 00000000000000..6dfaeae9b683fb --- /dev/null +++ b/src/native/external/xz/src/xz/mytime.h @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file mytime.h +/// \brief Time handling functions +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + + +/// \brief Number of milliseconds to between LZMA_SYNC_FLUSHes +/// +/// If 0, timed flushing is disabled. Otherwise if no more input is available +/// and not at the end of the file and at least opt_flush_timeout milliseconds +/// has elapsed since the start of compression or the previous flushing +/// (LZMA_SYNC_FLUSH or LZMA_FULL_FLUSH), set LZMA_SYNC_FLUSH to flush +/// the pending data. +extern uint64_t opt_flush_timeout; + + +#ifdef USE_SIGTSTP_HANDLER +/// \brief Signal handler for SIGTSTP +extern void mytime_sigtstp_handler(int sig); +#endif + + +/// \brief Store the time when (de)compression was started +/// +/// The start time is also stored as the time of the first flush. +extern void mytime_set_start_time(void); + + +/// \brief Get the number of milliseconds since the operation started +extern uint64_t mytime_get_elapsed(void); + + +/// \brief Store the time of when compressor was flushed +extern void mytime_set_flush_time(void); + + +/// \brief Get the number of milliseconds until the next flush +/// +/// This returns -1 if no timed flushing is used. +/// +/// The return value is intended for use with poll(). +extern int mytime_get_flush_timeout(void); diff --git a/src/native/external/xz/src/xz/options.c b/src/native/external/xz/src/xz/options.c new file mode 100644 index 00000000000000..c4f56b49560928 --- /dev/null +++ b/src/native/external/xz/src/xz/options.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file options.c +/// \brief Parser for filter-specific options +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/////////////////// +// Generic stuff // +/////////////////// + +typedef struct { + const char *name; + uint64_t id; +} name_id_map; + + +typedef struct { + const char *name; + const name_id_map *map; + uint64_t min; + uint64_t max; +} option_map; + + +/// Parses option=value pairs that are separated with commas: +/// opt=val,opt=val,opt=val +/// +/// Each option is a string, that is converted to an integer using the +/// index where the option string is in the array. +/// +/// Value can be +/// - a string-id map mapping a list of possible string values to integers +/// (opts[i].map != NULL, opts[i].min and opts[i].max are ignored); +/// - a number with minimum and maximum value limit +/// (opts[i].map == NULL && opts[i].min != UINT64_MAX); +/// - a string that will be parsed by the filter-specific code +/// (opts[i].map == NULL && opts[i].min == UINT64_MAX, opts[i].max ignored) +/// +/// When parsing both option and value succeed, a filter-specific function +/// is called, which should update the given value to filter-specific +/// options structure. +/// +/// This returns only if no errors occur. +/// +/// \param str String containing the options from the command line +/// \param opts Filter-specific option map +/// \param set Filter-specific function to update filter_options +/// \param filter_options Pointer to filter-specific options structure +/// +static void +parse_options(const char *str, const option_map *opts, + void (*set)(void *filter_options, + unsigned key, uint64_t value, const char *valuestr), + void *filter_options) +{ + if (str == NULL || str[0] == '\0') + return; + + char *s = xstrdup(str); + char *name = s; + + while (*name != '\0') { + if (*name == ',') { + ++name; + continue; + } + + char *split = strchr(name, ','); + if (split != NULL) + *split = '\0'; + + char *value = strchr(name, '='); + if (value != NULL) + *value++ = '\0'; + + if (value == NULL || value[0] == '\0') + message_fatal(_("%s: %s"), tuklib_mask_nonprint(str), + _("Options must be 'name=value' " + "pairs separated with commas")); + + // Look for the option name from the option map. + unsigned i = 0; + while (true) { + if (opts[i].name == NULL) + message_fatal(_("%s: Invalid option name"), + tuklib_mask_nonprint(name)); + + if (strcmp(name, opts[i].name) == 0) + break; + + ++i; + } + + // Option was found from the map. See how we should handle it. + if (opts[i].map != NULL) { + // value is a string which we should map + // to an integer. + unsigned j; + for (j = 0; opts[i].map[j].name != NULL; ++j) { + if (strcmp(opts[i].map[j].name, value) == 0) + break; + } + + if (opts[i].map[j].name == NULL) + message_fatal(_("%s: %s"), + tuklib_mask_nonprint(value), + _("Invalid option value")); + + set(filter_options, i, opts[i].map[j].id, value); + + } else if (opts[i].min == UINT64_MAX) { + // value is a special string that will be + // parsed by set(). + set(filter_options, i, 0, value); + + } else { + // value is an integer. + const uint64_t v = str_to_uint64(name, value, + opts[i].min, opts[i].max); + set(filter_options, i, v, value); + } + + // Check if it was the last option. + if (split == NULL) + break; + + name = split + 1; + } + + free(s); + return; +} + + +/////////// +// Delta // +/////////// + +enum { + OPT_DIST, +}; + + +static void +set_delta(void *options, unsigned key, uint64_t value, + const char *valuestr lzma_attribute((__unused__))) +{ + lzma_options_delta *opt = options; + switch (key) { + case OPT_DIST: + opt->dist = value; + break; + } +} + + +extern lzma_options_delta * +options_delta(const char *str) +{ + static const option_map opts[] = { + { "dist", NULL, LZMA_DELTA_DIST_MIN, + LZMA_DELTA_DIST_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_delta *options = xmalloc(sizeof(lzma_options_delta)); + *options = (lzma_options_delta){ + // It's hard to give a useful default for this. + .type = LZMA_DELTA_TYPE_BYTE, + .dist = LZMA_DELTA_DIST_MIN, + }; + + parse_options(str, opts, &set_delta, options); + + return options; +} + + +///////// +// BCJ // +///////// + +enum { + OPT_START_OFFSET, +}; + + +static void +set_bcj(void *options, unsigned key, uint64_t value, + const char *valuestr lzma_attribute((__unused__))) +{ + lzma_options_bcj *opt = options; + switch (key) { + case OPT_START_OFFSET: + opt->start_offset = value; + break; + } +} + + +extern lzma_options_bcj * +options_bcj(const char *str) +{ + static const option_map opts[] = { + { "start", NULL, 0, UINT32_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_bcj *options = xmalloc(sizeof(lzma_options_bcj)); + *options = (lzma_options_bcj){ + .start_offset = 0, + }; + + parse_options(str, opts, &set_bcj, options); + + return options; +} + + +////////// +// LZMA // +////////// + +enum { + OPT_PRESET, + OPT_DICT, + OPT_LC, + OPT_LP, + OPT_PB, + OPT_MODE, + OPT_NICE, + OPT_MF, + OPT_DEPTH, +}; + + +tuklib_attr_noreturn +static void +error_lzma_preset(const char *valuestr) +{ + message_fatal(_("Unsupported LZMA1/LZMA2 preset: %s"), + tuklib_mask_nonprint(valuestr)); +} + + +static void +set_lzma(void *options, unsigned key, uint64_t value, const char *valuestr) +{ + lzma_options_lzma *opt = options; + + switch (key) { + case OPT_PRESET: { + if (valuestr[0] < '0' || valuestr[0] > '9') + error_lzma_preset(valuestr); + + uint32_t preset = (uint32_t)(valuestr[0] - '0'); + + // Currently only "e" is supported as a modifier, + // so keep this simple for now. + if (valuestr[1] != '\0') { + if (valuestr[1] == 'e') + preset |= LZMA_PRESET_EXTREME; + else + error_lzma_preset(valuestr); + + if (valuestr[2] != '\0') + error_lzma_preset(valuestr); + } + + if (lzma_lzma_preset(options, preset)) + error_lzma_preset(valuestr); + + break; + } + + case OPT_DICT: + opt->dict_size = value; + break; + + case OPT_LC: + opt->lc = value; + break; + + case OPT_LP: + opt->lp = value; + break; + + case OPT_PB: + opt->pb = value; + break; + + case OPT_MODE: + opt->mode = value; + break; + + case OPT_NICE: + opt->nice_len = value; + break; + + case OPT_MF: + opt->mf = value; + break; + + case OPT_DEPTH: + opt->depth = value; + break; + } +} + + +extern lzma_options_lzma * +options_lzma(const char *str) +{ + static const name_id_map modes[] = { + { "fast", LZMA_MODE_FAST }, + { "normal", LZMA_MODE_NORMAL }, + { NULL, 0 } + }; + + static const name_id_map mfs[] = { + { "hc3", LZMA_MF_HC3 }, + { "hc4", LZMA_MF_HC4 }, + { "bt2", LZMA_MF_BT2 }, + { "bt3", LZMA_MF_BT3 }, + { "bt4", LZMA_MF_BT4 }, + { NULL, 0 } + }; + + static const option_map opts[] = { + { "preset", NULL, UINT64_MAX, 0 }, + { "dict", NULL, LZMA_DICT_SIZE_MIN, + (UINT32_C(1) << 30) + (UINT32_C(1) << 29) }, + { "lc", NULL, LZMA_LCLP_MIN, LZMA_LCLP_MAX }, + { "lp", NULL, LZMA_LCLP_MIN, LZMA_LCLP_MAX }, + { "pb", NULL, LZMA_PB_MIN, LZMA_PB_MAX }, + { "mode", modes, 0, 0 }, + { "nice", NULL, 2, 273 }, + { "mf", mfs, 0, 0 }, + { "depth", NULL, 0, UINT32_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_lzma *options = xmalloc(sizeof(lzma_options_lzma)); + if (lzma_lzma_preset(options, LZMA_PRESET_DEFAULT)) + message_bug(); + + parse_options(str, opts, &set_lzma, options); + + if (options->lc + options->lp > LZMA_LCLP_MAX) + message_fatal(_("The sum of lc and lp must not exceed 4")); + + return options; +} diff --git a/src/native/external/xz/src/xz/options.h b/src/native/external/xz/src/xz/options.h new file mode 100644 index 00000000000000..4a1314db26f089 --- /dev/null +++ b/src/native/external/xz/src/xz/options.h @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file options.h +/// \brief Parser for filter-specific options +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief Parser for Delta options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_delta *options_delta(const char *str); + + +/// \brief Parser for BCJ options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_bcj *options_bcj(const char *str); + + +/// \brief Parser for LZMA options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_lzma *options_lzma(const char *str); diff --git a/src/native/external/xz/src/xz/private.h b/src/native/external/xz/src/xz/private.h new file mode 100644 index 00000000000000..d351a995eec410 --- /dev/null +++ b/src/native/external/xz/src/xz/private.h @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file private.h +/// \brief Common includes, definitions, and prototypes +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "sysdefs.h" +#include "mythread.h" + +#include "lzma.h" + +#include +#include +#include +#include +#include +#include + +#ifndef _MSC_VER +# include +#endif + +#include "tuklib_gettext.h" +#include "tuklib_progname.h" +#include "tuklib_exit.h" +#include "tuklib_mbstr_nonprint.h" +#include "tuklib_mbstr.h" + +#if defined(_WIN32) && !defined(__CYGWIN__) +# define WIN32_LEAN_AND_MEAN +# include +#endif + +#ifdef _MSC_VER +# define fileno _fileno +#endif + +#ifndef STDIN_FILENO +# define STDIN_FILENO (fileno(stdin)) +#endif + +#ifndef STDOUT_FILENO +# define STDOUT_FILENO (fileno(stdout)) +#endif + +#ifndef STDERR_FILENO +# define STDERR_FILENO (fileno(stderr)) +#endif + +// Handling SIGTSTP keeps time-keeping for progress indicator correct +// if xz is stopped. It requires use of clock_gettime() as that is +// async-signal safe in POSIX. Require also SIGALRM support since +// on systems where SIGALRM isn't available, progress indicator code +// polls the time and the SIGTSTP handling adds slight overhead to +// that code. Most (all?) systems that have SIGTSTP also have SIGALRM +// so this requirement won't exclude many systems. +#if defined(HAVE_CLOCK_GETTIME) && defined(SIGTSTP) && defined(SIGALRM) +# define USE_SIGTSTP_HANDLER 1 +#endif + +#include "main.h" +#include "mytime.h" +#include "coder.h" +#include "message.h" +#include "args.h" +#include "hardware.h" +#include "file_io.h" +#include "options.h" +#include "sandbox.h" +#include "signals.h" +#include "suffix.h" +#include "util.h" + +#ifdef HAVE_DECODERS +# include "list.h" +#endif diff --git a/src/native/external/xz/src/xz/sandbox.c b/src/native/external/xz/src/xz/sandbox.c new file mode 100644 index 00000000000000..f1826f7c6174c5 --- /dev/null +++ b/src/native/external/xz/src/xz/sandbox.c @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sandbox.c +/// \brief Sandbox support +/// +/// \note When sandbox_init() is called, gettext hasn't been +/// initialized yet, and thus wrapping error messages +/// in _("...") is pointless in that function. In other +/// functions gettext can be used, but the only error message +/// we have is "Failed to enable the sandbox" which should +/// (almost) never occur. If it does occur anyway, leaving +/// the message untranslated can make it easier to find +/// bug reports about the issue. +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +#ifndef ENABLE_SANDBOX + +// Prevent an empty translation unit when no sandboxing is supported. +typedef int dummy; + +#else + +/// If the conditions for strict sandboxing (described in main()) +/// have been met, sandbox_allow_strict() can be called to set this +/// variable to true. +static bool strict_sandbox_allowed = false; + + +extern void +sandbox_allow_strict(void) +{ + strict_sandbox_allowed = true; + return; +} + + +// Strict sandboxing prevents opening any files. This *tries* to ensure +// that any auxiliary files that might be required are already open. +// +// Returns true if strict sandboxing is allowed, false otherwise. +static bool +prepare_for_strict_sandbox(void) +{ + if (!strict_sandbox_allowed) + return false; + + const char dummy_str[] = "x"; + + // Try to ensure that both libc and xz locale files have been + // loaded when NLS is enabled. + snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL)); + + // Try to ensure that iconv data files needed for handling multibyte + // characters have been loaded. This is needed at least with glibc. + tuklib_mbstr_width(dummy_str, NULL); + + return true; +} + +#endif + + +#if defined(HAVE_PLEDGE) + +/////////////// +// pledge(2) // +/////////////// + +#include + + +extern void +sandbox_init(void) +{ + if (pledge("stdio rpath wpath cpath fattr", "")) + message_fatal("Failed to enable the sandbox"); + + return; +} + + +extern void +sandbox_enable_read_only(void) +{ + // We will be opening files for reading but + // won't create or remove any files. + if (pledge("stdio rpath", "")) + message_fatal("Failed to enable the sandbox"); + + return; +} + + +extern void +sandbox_enable_strict_if_allowed(int src_fd lzma_attribute((__unused__)), + int pipe_event_fd lzma_attribute((__unused__)), + int pipe_write_fd lzma_attribute((__unused__))) +{ + if (!prepare_for_strict_sandbox()) + return; + + // All files that need to be opened have already been opened. + if (pledge("stdio", "")) + message_fatal("Failed to enable the sandbox"); + + return; +} + + +#elif defined(HAVE_LINUX_LANDLOCK) + +////////////// +// Landlock // +////////////// + +#include "my_landlock.h" + + +// The required_rights should have those bits set that must not be restricted. +// This function will then bitwise-and ~required_rights with a mask matching +// the Landlock ABI version, leaving only those bits set that are supported +// by the ABI and allowed to be restricted by the function argument. +static void +enable_landlock(uint64_t required_rights) +{ + // Initialize the ruleset to forbid all actions that the available + // Landlock ABI version supports. Return if Landlock isn't supported + // at all. + struct landlock_ruleset_attr attr; + if (my_landlock_ruleset_attr_forbid_all(&attr) == -1) + return; + + // Allow the required rights. + attr.handled_access_fs &= ~required_rights; + + // Create the ruleset in the kernel. This shouldn't fail. + const int ruleset_fd = my_landlock_create_ruleset( + &attr, sizeof(attr), 0); + if (ruleset_fd < 0) + message_fatal("Failed to enable the sandbox"); + + // All files we need should have already been opened. Thus, + // we don't need to add any rules using landlock_add_rule(2) + // before activating the sandbox. + // + // NOTE: It's possible that the hack prepare_for_strict_sandbox() + // isn't be good enough. It tries to get translations and + // libc-specific files loaded but if it's not good enough + // then perhaps a Landlock rule to allow reading from /usr + // and/or the xz installation prefix would be needed. + // + // prctl(PR_SET_NO_NEW_PRIVS, ...) was already called in + // sandbox_init() so we don't do it here again. + if (my_landlock_restrict_self(ruleset_fd, 0) != 0) + message_fatal("Failed to enable the sandbox"); + + (void)close(ruleset_fd); + return; +} + + +extern void +sandbox_init(void) +{ + // Prevent the process from gaining new privileges. This must be done + // before landlock_restrict_self(2) but since we will never need new + // privileges, this call can be done here already. + // + // This is supported since Linux 3.5. Ignore the return value to + // keep compatibility with old kernels. landlock_restrict_self(2) + // will fail if the no_new_privs attribute isn't set, thus if prctl() + // fails here the error will still be detected when it matters. + (void)prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + + // These are all in ABI version 1 already. We don't need truncate + // rights because files are created with open() using O_EXCL and + // without O_TRUNC. + // + // LANDLOCK_ACCESS_FS_READ_DIR is required to synchronize the + // directory before removing the source file. + // + // LANDLOCK_ACCESS_FS_READ_DIR is also helpful to show a clear error + // message if xz is given a directory name. Without this permission + // the message would be "Permission denied" but with this permission + // it's "Is a directory, skipping". It could be worked around with + // stat()/lstat() but just giving this permission is simpler and + // shouldn't make the sandbox much weaker in practice. + const uint64_t required_rights + = LANDLOCK_ACCESS_FS_WRITE_FILE + | LANDLOCK_ACCESS_FS_READ_FILE + | LANDLOCK_ACCESS_FS_READ_DIR + | LANDLOCK_ACCESS_FS_REMOVE_FILE + | LANDLOCK_ACCESS_FS_MAKE_REG; + + enable_landlock(required_rights); + return; +} + + +extern void +sandbox_enable_read_only(void) +{ + // We will be opening files for reading but + // won't create or remove any files. + const uint64_t required_rights + = LANDLOCK_ACCESS_FS_READ_FILE + | LANDLOCK_ACCESS_FS_READ_DIR; + enable_landlock(required_rights); + return; +} + + +extern void +sandbox_enable_strict_if_allowed(int src_fd lzma_attribute((__unused__)), + int pipe_event_fd lzma_attribute((__unused__)), + int pipe_write_fd lzma_attribute((__unused__))) +{ + if (!prepare_for_strict_sandbox()) + return; + + // Allow all restrictions that the kernel supports with the + // highest Landlock ABI version that the kernel or xz supports. + // + // NOTE: LANDLOCK_ACCESS_FS_READ_DIR isn't needed here because + // the only input file has already been opened. + enable_landlock(0); + return; +} + + +#elif defined(HAVE_CAP_RIGHTS_LIMIT) + +////////////// +// Capsicum // +////////////// + +#include + + +extern void +sandbox_init(void) +{ + // Nothing to do. + return; +} + + +extern void +sandbox_enable_read_only(void) +{ + // Nothing to do. + return; +} + + +extern void +sandbox_enable_strict_if_allowed( + int src_fd, int pipe_event_fd, int pipe_write_fd) +{ + if (!prepare_for_strict_sandbox()) + return; + + // Capsicum needs FreeBSD 10.2 or later. + cap_rights_t rights; + + if (cap_enter()) + goto error; + + if (cap_rights_limit(src_fd, cap_rights_init(&rights, + CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK))) + goto error; + + // If not reading from stdin, remove all capabilities from it. + if (src_fd != STDIN_FILENO && cap_rights_limit( + STDIN_FILENO, cap_rights_init(&rights))) + goto error; + + if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights, + CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP, + CAP_WRITE, CAP_SEEK))) + goto error; + + if (cap_rights_limit(STDERR_FILENO, cap_rights_init(&rights, + CAP_WRITE))) + goto error; + + if (cap_rights_limit(pipe_event_fd, cap_rights_init(&rights, + CAP_EVENT))) + goto error; + + if (cap_rights_limit(pipe_write_fd, cap_rights_init(&rights, + CAP_WRITE))) + goto error; + + return; + +error: + // If a kernel is configured without capability mode support or + // used in an emulator that does not implement the capability + // system calls, then the Capsicum system calls will fail and set + // errno to ENOSYS. In that case xz will silently run without + // the sandbox. + if (errno == ENOSYS) + return; + + message_fatal("Failed to enable the sandbox"); +} + +#endif diff --git a/src/native/external/xz/src/xz/sandbox.h b/src/native/external/xz/src/xz/sandbox.h new file mode 100644 index 00000000000000..98b9862a1c0a4d --- /dev/null +++ b/src/native/external/xz/src/xz/sandbox.h @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sandbox.h +/// \brief Sandbox support +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#if defined(HAVE_PLEDGE) || defined(HAVE_LINUX_LANDLOCK) \ + || defined(HAVE_CAP_RIGHTS_LIMIT) +# define ENABLE_SANDBOX 1 +#endif + + +/// \brief Enables early sandboxing that can always be enabled +/// +/// This requires that tuklib_progname() and io_init() have been called. +extern void sandbox_init(void); + + +/// \brief Enable sandboxing that only allows opening files for reading +extern void sandbox_enable_read_only(void); + + +/// \brief Tell sandboxing code that strict sandboxing can be used +/// +/// This function only sets a flag which will be read by +/// sandbox_enable_strict_if_allowed(). +extern void sandbox_allow_strict(void); + + +/// \brief Enable sandboxing that allows reading from one file +/// +/// This does nothing if sandbox_allow_strict() hasn't been called. +/// +/// \param src_fd File descriptor open for reading +/// \param pipe_event_fd user_abort_pipe[0] from file_io.c +/// \param pipe_write_fd user_abort_pipe[1] from file_io.c +extern void sandbox_enable_strict_if_allowed( + int src_fd, int pipe_event_fd, int pipe_write_fd); diff --git a/src/native/external/xz/src/xz/signals.c b/src/native/external/xz/src/xz/signals.c new file mode 100644 index 00000000000000..64643695ad4dbf --- /dev/null +++ b/src/native/external/xz/src/xz/signals.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file signals.c +/// \brief Handling signals to abort operation +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +volatile sig_atomic_t user_abort = false; + + +#if !(defined(_WIN32) && !defined(__CYGWIN__)) + +/// If we were interrupted by a signal, we store the signal number so that +/// we can raise that signal to kill the program when all cleanups have +/// been done. +static volatile sig_atomic_t exit_signal = 0; + +/// Mask of signals for which we have established a signal handler to set +/// user_abort to true. +static sigset_t hooked_signals; + +/// True once signals_init() has finished. This is used to skip blocking +/// signals (with uninitialized hooked_signals) if signals_block() and +/// signals_unblock() are called before signals_init() has been called. +static bool signals_are_initialized = false; + + +static void +signal_handler(int sig) +{ + exit_signal = sig; + user_abort = true; + +#ifndef TUKLIB_DOSLIKE + io_write_to_user_abort_pipe(); +#endif + + return; +} + + +#ifdef __APPLE__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wsign-conversion" +#endif +extern void +signals_init(void) +{ + // List of signals for which we establish the signal handler. + static const int sigs[] = { + SIGINT, + SIGTERM, +#ifdef SIGHUP + SIGHUP, +#endif +#ifdef SIGPIPE + SIGPIPE, +#endif +#ifdef SIGXCPU + SIGXCPU, +#endif +#ifdef SIGXFSZ + SIGXFSZ, +#endif + }; + + // Mask of the signals for which we have established a signal handler. + sigemptyset(&hooked_signals); + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) + sigaddset(&hooked_signals, sigs[i]); + +#ifdef SIGALRM + // Add also the signals from message.c to hooked_signals. + for (size_t i = 0; message_progress_sigs[i] != 0; ++i) + sigaddset(&hooked_signals, message_progress_sigs[i]); +#endif + +#ifdef USE_SIGTSTP_HANDLER + // Add the SIGTSTP handler from mytime.c to hooked_signals. + sigaddset(&hooked_signals, SIGTSTP); +#endif + + // Using "my_sa" because "sa" may conflict with a sockaddr variable + // from system headers on Solaris. + struct sigaction my_sa; + + // All the signals that we handle we also blocked while the signal + // handler runs. + my_sa.sa_mask = hooked_signals; + + // Don't set SA_RESTART, because we want EINTR so that we can check + // for user_abort and cleanup before exiting. We block the signals + // for which we have established a handler when we don't want EINTR. + my_sa.sa_flags = 0; + my_sa.sa_handler = &signal_handler; + + struct sigaction old; + + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) { + // If the parent process has left some signals ignored, + // we don't unignore them. + if (sigaction(sigs[i], NULL, &old) == 0 + && old.sa_handler == SIG_IGN) + continue; + + // Establish the signal handler. + if (sigaction(sigs[i], &my_sa, NULL)) + message_signal_handler(); + } + +#ifdef USE_SIGTSTP_HANDLER + if (!(sigaction(SIGTSTP, NULL, &old) == 0 + && old.sa_handler == SIG_IGN)) { + my_sa.sa_handler = &mytime_sigtstp_handler; + if (sigaction(SIGTSTP, &my_sa, NULL)) + message_signal_handler(); + } +#endif + + signals_are_initialized = true; + + return; +} +#ifdef __APPLE__ +# pragma GCC diagnostic pop +#endif + + +#ifndef __VMS +/// signals_block() and signals_unblock() can be called recursively. +static size_t signals_block_count = 0; + + +extern void +signals_block(void) +{ + if (signals_are_initialized) { + if (signals_block_count++ == 0) { + const int saved_errno = errno; + mythread_sigmask(SIG_BLOCK, &hooked_signals, NULL); + errno = saved_errno; + } + } + + return; +} + + +extern void +signals_unblock(void) +{ + if (signals_are_initialized) { + assert(signals_block_count > 0); + + if (--signals_block_count == 0) { + const int saved_errno = errno; + mythread_sigmask(SIG_UNBLOCK, &hooked_signals, NULL); + errno = saved_errno; + } + } + + return; +} +#endif + + +extern void +signals_exit(void) +{ + const int sig = (int)exit_signal; + + if (sig != 0) { +#if defined(TUKLIB_DOSLIKE) || defined(__VMS) + // Don't raise(), set only exit status. This avoids + // printing unwanted message about SIGINT when the user + // presses C-c. + set_exit_status(E_ERROR); +#else + struct sigaction sa; + sa.sa_handler = SIG_DFL; + sigfillset(&sa.sa_mask); + sa.sa_flags = 0; + + // This shouldn't fail, but check it anyway. + if (sigaction(sig, &sa, NULL) == 0) + raise(sig); + + // We shouldn't get here, but just in case we do, + // make main() exit with E_ERROR. + set_exit_status(E_ERROR); +#endif + } + + return; +} + +#else + +// While Windows has some very basic signal handling functions as required +// by C89, they are not really used, and e.g. SIGINT doesn't work exactly +// the way it does on POSIX (Windows creates a new thread for the signal +// handler). Instead, we use SetConsoleCtrlHandler() to catch user +// pressing C-c, because that seems to be the recommended way to do it. +// +// NOTE: This doesn't work under MSYS. Trying with SIGINT doesn't work +// either even if it appeared to work at first. So test using Windows +// console window. + +static BOOL WINAPI +signal_handler(DWORD type lzma_attribute((__unused__))) +{ + // Since we don't get a signal number which we could raise() at + // signals_exit() like on POSIX, just set the exit status to + // indicate an error, so that we cannot return with zero exit status. + set_exit_status(E_ERROR); + user_abort = true; + return TRUE; +} + + +extern void +signals_init(void) +{ + if (!SetConsoleCtrlHandler(&signal_handler, TRUE)) + message_signal_handler(); + + return; +} + +#endif diff --git a/src/native/external/xz/src/xz/signals.h b/src/native/external/xz/src/xz/signals.h new file mode 100644 index 00000000000000..629335d4c96b1f --- /dev/null +++ b/src/native/external/xz/src/xz/signals.h @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file signals.h +/// \brief Handling signals to abort operation +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +/// If this is true, we will clean up the possibly incomplete output file, +/// return to main() as soon as practical. That is, the code needs to poll +/// this variable in various places. +extern volatile sig_atomic_t user_abort; + + +/// Initialize the signal handler, which will set user_abort to true when +/// user e.g. presses C-c. +extern void signals_init(void); + + +#if (defined(_WIN32) && !defined(__CYGWIN__)) || defined(__VMS) +# define signals_block() do { } while (0) +# define signals_unblock() do { } while (0) +#else +/// Block the signals which don't have SA_RESTART and which would just set +/// user_abort to true. This is handy when we don't want to handle EINTR +/// and don't want SA_RESTART either. +extern void signals_block(void); + +/// Unblock the signals blocked by signals_block(). +extern void signals_unblock(void); +#endif + +#if defined(_WIN32) && !defined(__CYGWIN__) +# define signals_exit() do { } while (0) +#else +/// If user has sent us a signal earlier to terminate the process, +/// re-raise that signal to actually terminate the process. +extern void signals_exit(void); +#endif diff --git a/src/native/external/xz/src/xz/suffix.c b/src/native/external/xz/src/xz/suffix.c new file mode 100644 index 00000000000000..2fd4c7fc9573f4 --- /dev/null +++ b/src/native/external/xz/src/xz/suffix.c @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file suffix.c +/// \brief Checks filename suffix and creates the destination filename +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#ifdef __DJGPP__ +# include +#endif + +// For case-insensitive filename suffix on case-insensitive systems +#if defined(TUKLIB_DOSLIKE) || defined(__VMS) +# ifdef HAVE_STRINGS_H +# include +# endif +# ifdef _MSC_VER +# define suffix_strcmp _stricmp +# else +# define suffix_strcmp strcasecmp +# endif +#else +# define suffix_strcmp strcmp +#endif + + +static char *custom_suffix = NULL; + + +/// \brief Test if the char is a directory separator +static bool +is_dir_sep(char c) +{ +#ifdef TUKLIB_DOSLIKE + return c == '/' || c == '\\' || c == ':'; +#else + return c == '/'; +#endif +} + + +/// \brief Test if the string contains a directory separator +static bool +has_dir_sep(const char *str) +{ +#ifdef TUKLIB_DOSLIKE + return strpbrk(str, "/\\:") != NULL; +#else + return strchr(str, '/') != NULL; +#endif +} + + +#ifdef __DJGPP__ +/// \brief Test for special suffix used for 8.3 short filenames (SFN) +/// +/// \return If str matches *.?- or *.??-, true is returned. Otherwise +/// false is returned. +static bool +has_sfn_suffix(const char *str, size_t len) +{ + if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.' + && !is_dir_sep(str[len - 2])) { + // *.?- + if (str[len - 3] == '.') + return !is_dir_sep(str[len - 4]); + + // *.??- + if (len >= 5 && !is_dir_sep(str[len - 3]) + && str[len - 4] == '.') + return !is_dir_sep(str[len - 5]); + } + + return false; +} +#endif + + +/// \brief Checks if src_name has given compressed_suffix +/// +/// \param suffix Filename suffix to look for +/// \param src_name Input filename +/// \param src_len strlen(src_name) +/// +/// \return If src_name has the suffix, src_len - strlen(suffix) is +/// returned. It's always a positive integer. Otherwise zero +/// is returned. +static size_t +test_suffix(const char *suffix, const char *src_name, size_t src_len) +{ + const size_t suffix_len = strlen(suffix); + + // The filename must have at least one character in addition to + // the suffix. src_name may contain path to the filename, so we + // need to check for directory separator too. + if (src_len <= suffix_len + || is_dir_sep(src_name[src_len - suffix_len - 1])) + return 0; + + if (suffix_strcmp(suffix, src_name + src_len - suffix_len) == 0) + return src_len - suffix_len; + + return 0; +} + + +/// \brief Removes the filename suffix of the compressed file +/// +/// \return Name of the uncompressed file, or NULL if file has unknown +/// suffix. +static char * +uncompressed_name(const char *src_name, const size_t src_len) +{ + static const struct { + const char *compressed; + const char *uncompressed; + } suffixes[] = { + { ".xz", "" }, + { ".txz", ".tar" }, // .txz abbreviation for .txt.gz is rare. + { ".lzma", "" }, +#ifdef __DJGPP__ + { ".lzm", "" }, +#endif + { ".tlz", ".tar" }, // Both .tar.lzma and .tar.lz +#ifdef HAVE_LZIP_DECODER + { ".lz", "" }, +#endif + }; + + const char *new_suffix = ""; + size_t new_len = 0; + + if (opt_format != FORMAT_RAW) { + for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) { + new_len = test_suffix(suffixes[i].compressed, + src_name, src_len); + if (new_len != 0) { + new_suffix = suffixes[i].uncompressed; + break; + } + } + +#ifdef __DJGPP__ + // Support also *.?- -> *.? and *.??- -> *.?? on DOS. + // This is done also when long filenames are available + // to keep it easy to decompress files created when + // long filename support wasn't available. + if (new_len == 0 && has_sfn_suffix(src_name, src_len)) { + new_suffix = ""; + new_len = src_len - 1; + } +#endif + } + + if (new_len == 0 && custom_suffix != NULL) + new_len = test_suffix(custom_suffix, src_name, src_len); + + if (new_len == 0) { + message_warning(_("%s: Filename has an unknown suffix, " + "skipping"), tuklib_mask_nonprint(src_name)); + return NULL; + } + + const size_t new_suffix_len = strlen(new_suffix); + char *dest_name = xmalloc(new_len + new_suffix_len + 1); + + memcpy(dest_name, src_name, new_len); + memcpy(dest_name + new_len, new_suffix, new_suffix_len); + dest_name[new_len + new_suffix_len] = '\0'; + + return dest_name; +} + + +static void +msg_suffix(const char *src_name, const char *suffix) +{ + char *mem = NULL; + message_warning(_("%s: File already has '%s' suffix, skipping"), + tuklib_mask_nonprint(src_name), + tuklib_mask_nonprint_r(suffix, &mem)); + free(mem); + return; +} + + +/// \brief Appends suffix to src_name +/// +/// In contrast to uncompressed_name(), we check only suffixes that are valid +/// for the specified file format. +static char * +compressed_name(const char *src_name, size_t src_len) +{ + // The order of these must match the order in args.h. + static const char *const all_suffixes[][4] = { + { + ".xz", + ".txz", + NULL + }, { + ".lzma", +#ifdef __DJGPP__ + ".lzm", +#endif + ".tlz", + NULL +#ifdef HAVE_LZIP_DECODER + // This is needed to keep the table indexing in sync with + // enum format_type from coder.h. + }, { +/* + ".lz", +*/ + NULL +#endif + }, { + // --format=raw requires specifying the suffix + // manually or using stdout. + NULL + } + }; + + // args.c ensures these. + assert(opt_format != FORMAT_AUTO); +#ifdef HAVE_LZIP_DECODER + assert(opt_format != FORMAT_LZIP); +#endif + + const size_t format = opt_format - 1; + const char *const *suffixes = all_suffixes[format]; + + // Look for known filename suffixes and refuse to compress them. + for (size_t i = 0; suffixes[i] != NULL; ++i) { + if (test_suffix(suffixes[i], src_name, src_len) != 0) { + msg_suffix(src_name, suffixes[i]); + return NULL; + } + } + +#ifdef __DJGPP__ + // Recognize also the special suffix that is used when long + // filename (LFN) support isn't available. This suffix is + // recognized on LFN systems too. + if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) { + msg_suffix(src_name, "-"); + return NULL; + } +#endif + + if (custom_suffix != NULL) { + if (test_suffix(custom_suffix, src_name, src_len) != 0) { + msg_suffix(src_name, custom_suffix); + return NULL; + } + } + + const char *suffix = custom_suffix != NULL + ? custom_suffix : suffixes[0]; + size_t suffix_len = strlen(suffix); + +#ifdef __DJGPP__ + if (!_use_lfn(src_name)) { + // Long filename (LFN) support isn't available and we are + // limited to 8.3 short filenames (SFN). + // + // Look for suffix separator from the filename, and make sure + // that it is in the filename, not in a directory name. + const char *sufsep = strrchr(src_name, '.'); + if (sufsep == NULL || sufsep[1] == '\0' + || has_dir_sep(sufsep)) { + // src_name has no filename extension. + // + // Examples: + // xz foo -> foo.xz + // xz -F lzma foo -> foo.lzm + // xz -S x foo -> foox + // xz -S x foo. -> foo.x + // xz -S x.y foo -> foox.y + // xz -S .x foo -> foo.x + // xz -S .x foo. -> foo.x + // + // Avoid double dots: + if (sufsep != NULL && sufsep[1] == '\0' + && suffix[0] == '.') + --src_len; + + } else if (custom_suffix == NULL + && strcasecmp(sufsep, ".tar") == 0) { + // ".tar" is handled specially. + // + // Examples: + // xz foo.tar -> foo.txz + // xz -F lzma foo.tar -> foo.tlz + static const char *const tar_suffixes[] = { + ".txz", // .tar.xz + ".tlz", // .tar.lzma +/* + ".tlz", // .tar.lz +*/ + }; + suffix = tar_suffixes[format]; + suffix_len = 4; + src_len -= 4; + + } else { + if (custom_suffix == NULL && opt_format == FORMAT_XZ) { + // Instead of the .xz suffix, use a single + // character at the end of the filename + // extension. This is to minimize name + // conflicts when compressing multiple files + // with the same basename. E.g. foo.txt and + // foo.exe become foo.tx- and foo.ex-. Dash + // is rare as the last character of the + // filename extension, so it seems to be + // quite safe choice and it stands out better + // in directory listings than e.g. x. For + // comparison, gzip uses z. + suffix = "-"; + suffix_len = 1; + } + + if (suffix[0] == '.') { + // The first character of the suffix is a dot. + // Throw away the original filename extension + // and replace it with the new suffix. + // + // Examples: + // xz -F lzma foo.txt -> foo.lzm + // xz -S .x foo.txt -> foo.x + src_len = sufsep - src_name; + + } else { + // The first character of the suffix is not + // a dot. Preserve the first 0-2 characters + // of the original filename extension. + // + // Examples: + // xz foo.txt -> foo.tx- + // xz -S x foo.c -> foo.cx + // xz -S ab foo.c -> foo.cab + // xz -S ab foo.txt -> foo.tab + // xz -S abc foo.txt -> foo.abc + // + // Truncate the suffix to three chars: + if (suffix_len > 3) + suffix_len = 3; + + // If needed, overwrite 1-3 characters. + if (strlen(sufsep) > 4 - suffix_len) + src_len = sufsep - src_name + + 4 - suffix_len; + } + } + } +#endif + + char *dest_name = xmalloc(src_len + suffix_len + 1); + + memcpy(dest_name, src_name, src_len); + memcpy(dest_name + src_len, suffix, suffix_len); + dest_name[src_len + suffix_len] = '\0'; + + return dest_name; +} + + +extern char * +suffix_get_dest_name(const char *src_name) +{ + assert(src_name != NULL); + + // Length of the name is needed in all cases to locate the end of + // the string to compare the suffix, so calculate the length here. + const size_t src_len = strlen(src_name); + + return opt_mode == MODE_COMPRESS + ? compressed_name(src_name, src_len) + : uncompressed_name(src_name, src_len); +} + + +extern void +suffix_set(const char *suffix) +{ + // Empty suffix and suffixes having a directory separator are + // rejected. Such suffixes would break things later. + if (suffix[0] == '\0' || has_dir_sep(suffix)) + message_fatal(_("%s: Invalid filename suffix"), + tuklib_mask_nonprint(suffix)); + + // Replace the old custom_suffix (if any) with the new suffix. + free(custom_suffix); + custom_suffix = xstrdup(suffix); + return; +} + + +extern bool +suffix_is_set(void) +{ + return custom_suffix != NULL; +} diff --git a/src/native/external/xz/src/xz/suffix.h b/src/native/external/xz/src/xz/suffix.h new file mode 100644 index 00000000000000..f59e3123ca8dbf --- /dev/null +++ b/src/native/external/xz/src/xz/suffix.h @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file suffix.h +/// \brief Checks filename suffix and creates the destination filename +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief Get the name of the destination file +/// +/// Depending on the global variable opt_mode, this tries to find a matching +/// counterpart for src_name. If the name can be constructed, it is allocated +/// and returned (caller must free it). On error, a message is printed and +/// NULL is returned. +extern char *suffix_get_dest_name(const char *src_name); + + +/// \brief Set a custom filename suffix +/// +/// This function calls xstrdup() for the given suffix, thus the caller +/// doesn't need to keep the memory allocated. There can be only one custom +/// suffix, thus if this is called multiple times, the old suffixes are freed +/// and forgotten. +extern void suffix_set(const char *suffix); + + +/// \brief Check if a custom suffix has been set +/// +/// Returns true if the internal tracking of the suffix string has been set +/// and false if the string has not been set. This will keep the suffix +/// string encapsulated instead of extern-ing the variable. +extern bool suffix_is_set(void); diff --git a/src/native/external/xz/src/xz/util.c b/src/native/external/xz/src/xz/util.c new file mode 100644 index 00000000000000..bb9b86ad382cba --- /dev/null +++ b/src/native/external/xz/src/xz/util.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file util.c +/// \brief Miscellaneous utility functions +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include + +#if defined(_WIN32) && !defined(__CYGWIN__) +# include +#endif + + +/// Buffers for uint64_to_str() and uint64_to_nicestr() +static char bufs[4][128]; + + +// Thousand separator support in uint64_to_str() and uint64_to_nicestr(): +// +// DJGPP 2.05 added support for thousands separators but it's broken +// at least under WinXP with Finnish locale that uses a non-breaking space +// as the thousands separator. Workaround by disabling thousands separators +// for DJGPP builds. +// +// MSVC doesn't support thousand separators. +// +// MinGW-w64 supports thousand separators only with its own stdio functions +// which our sysdefs.h disables when _UCRT && HAVE_SMALL. +#if defined(__DJGPP__) || defined(_MSC_VER) \ + || (defined(__MINGW32__) && __USE_MINGW_ANSI_STDIO == 0) +# define FORMAT_THOUSAND_SEP(prefix, suffix) prefix suffix +# define check_thousand_sep(slot) do { } while (0) +#else +# define FORMAT_THOUSAND_SEP(prefix, suffix) ((thousand == WORKS) \ + ? prefix "'" suffix \ + : prefix suffix) + +static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN; + +/// Check if thousands separator is supported. Run-time checking is easiest +/// because it seems to be sometimes lacking even on a POSIXish system. +/// Note that trying to use thousands separators when snprintf() doesn't +/// support them results in undefined behavior. This just has happened to +/// work well enough in practice. +/// +/// This must be called before using the FORMAT_THOUSAND_SEP macro. +static void +check_thousand_sep(uint32_t slot) +{ + if (thousand == UNKNOWN) { + bufs[slot][0] = '\0'; + snprintf(bufs[slot], sizeof(bufs[slot]), "%'u", 1U); + thousand = bufs[slot][0] == '1' ? WORKS : BROKEN; + } + + return; +} +#endif + + +extern void * +xrealloc(void *ptr, size_t size) +{ + assert(size > 0); + + // Save ptr so that we can free it if realloc fails. + // The point is that message_fatal ends up calling stdio functions + // which in some libc implementations might allocate memory from + // the heap. Freeing ptr improves the chances that there's free + // memory for stdio functions if they need it. + void *p = ptr; + ptr = realloc(ptr, size); + + if (ptr == NULL) { + const int saved_errno = errno; + free(p); + message_fatal("%s", strerror(saved_errno)); + } + + return ptr; +} + + +extern char * +xstrdup(const char *src) +{ + assert(src != NULL); + const size_t size = strlen(src) + 1; + char *dest = xmalloc(size); + return memcpy(dest, src, size); +} + + +extern uint64_t +str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) +{ + uint64_t result = 0; + + // Skip blanks. + while (*value == ' ' || *value == '\t') + ++value; + + // Accept special value "max". Supporting "min" doesn't seem useful. + if (strcmp(value, "max") == 0) + return max; + + if (*value < '0' || *value > '9') + message_fatal(_("%s: %s"), value, + _("Value is not a non-negative decimal integer")); + + do { + // Don't overflow. + if (result > UINT64_MAX / 10) + goto error; + + result *= 10; + + // Another overflow check + const uint32_t add = (uint32_t)(*value - '0'); + if (UINT64_MAX - add < result) + goto error; + + result += add; + ++value; + } while (*value >= '0' && *value <= '9'); + + if (*value != '\0') { + // Look for suffix. Originally this supported both base-2 + // and base-10, but since there seems to be little need + // for base-10 in this program, treat everything as base-2 + // and also be more relaxed about the case of the first + // letter of the suffix. + uint64_t multiplier = 0; + if (*value == 'k' || *value == 'K') + multiplier = UINT64_C(1) << 10; + else if (*value == 'm' || *value == 'M') + multiplier = UINT64_C(1) << 20; + else if (*value == 'g' || *value == 'G') + multiplier = UINT64_C(1) << 30; + + ++value; + + // Allow also e.g. Ki, KiB, and KB. + if (*value != '\0' && strcmp(value, "i") != 0 + && strcmp(value, "iB") != 0 + && strcmp(value, "B") != 0) + multiplier = 0; + + if (multiplier == 0) { + message(V_ERROR, _("%s: Invalid multiplier suffix"), + value - 1); + message_fatal(_("Valid suffixes are 'KiB' (2^10), " + "'MiB' (2^20), and 'GiB' (2^30).")); + } + + // Don't overflow here either. + if (result > UINT64_MAX / multiplier) + goto error; + + result *= multiplier; + } + + if (result < min || result > max) + goto error; + + return result; + +error: + message_fatal(_("Value of the option '%s' must be in the range " + "[%" PRIu64 ", %" PRIu64 "]"), + name, min, max); +} + + +extern uint64_t +round_up_to_mib(uint64_t n) +{ + return (n >> 20) + ((n & ((UINT32_C(1) << 20) - 1)) != 0); +} + + +extern const char * +uint64_to_str(uint64_t value, uint32_t slot) +{ + assert(slot < ARRAY_SIZE(bufs)); + + check_thousand_sep(slot); + + snprintf(bufs[slot], sizeof(bufs[slot]), + FORMAT_THOUSAND_SEP("%", PRIu64), value); + + return bufs[slot]; +} + + +extern const char * +uint64_to_nicestr(uint64_t value, enum nicestr_unit unit_min, + enum nicestr_unit unit_max, bool always_also_bytes, + uint32_t slot) +{ + assert(unit_min <= unit_max); + assert(unit_max <= NICESTR_TIB); + assert(slot < ARRAY_SIZE(bufs)); + + check_thousand_sep(slot); + + enum nicestr_unit unit = NICESTR_B; + char *pos = bufs[slot]; + size_t left = sizeof(bufs[slot]); + + if ((unit_min == NICESTR_B && value < 10000) + || unit_max == NICESTR_B) { + // The value is shown as bytes. + my_snprintf(&pos, &left, FORMAT_THOUSAND_SEP("%", "u"), + (unsigned int)value); + } else { + // Scale the value to a nicer unit. Unless unit_min and + // unit_max limit us, we will show at most five significant + // digits with one decimal place. + double d = (double)(value); + do { + d /= 1024.0; + ++unit; + } while (unit < unit_min || (d > 9999.9 && unit < unit_max)); + + my_snprintf(&pos, &left, FORMAT_THOUSAND_SEP("%", ".1f"), d); + } + + static const char suffix[5][4] = { "B", "KiB", "MiB", "GiB", "TiB" }; + my_snprintf(&pos, &left, " %s", suffix[unit]); + + if (always_also_bytes && value >= 10000) + snprintf(pos, left, FORMAT_THOUSAND_SEP(" (%", PRIu64 " B)"), + value); + + return bufs[slot]; +} + + +extern void +my_snprintf(char **pos, size_t *left, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + const int len = vsnprintf(*pos, *left, fmt, ap); + va_end(ap); + + // If an error occurred, we want the caller to think that the whole + // buffer was used. This way no more data will be written to the + // buffer. We don't need better error handling here, although it + // is possible that the result looks garbage on the terminal if + // e.g. an UTF-8 character gets split. That shouldn't (easily) + // happen though, because the buffers used have some extra room. + if (len < 0 || (size_t)(len) >= *left) { + *left = 0; + } else { + *pos += len; + *left -= (size_t)(len); + } + + return; +} + + +extern bool +is_tty(int fd) +{ +#if defined(_WIN32) && !defined(__CYGWIN__) + // There is no need to check if handle == INVALID_HANDLE_VALUE + // because it will return false anyway when used in GetConsoleMode(). + // The resulting HANDLE is owned by the file descriptor. + // The HANDLE must not be closed here. + intptr_t handle = _get_osfhandle(fd); + DWORD mode; + + // GetConsoleMode() is an easy way to tell if the HANDLE is a + // console or not. We do not care about the value of mode since we + // do not plan to use any further Windows console functions. + return GetConsoleMode((HANDLE)handle, &mode); +#else + return isatty(fd); +#endif +} + + +extern bool +is_tty_stdin(void) +{ + const bool ret = is_tty(STDIN_FILENO); + + if (ret) + message_error(_("Compressed data cannot be read from " + "a terminal")); + + return ret; +} + + +extern bool +is_tty_stdout(void) +{ + const bool ret = is_tty(STDOUT_FILENO); + + if (ret) + message_error(_("Compressed data cannot be written to " + "a terminal")); + + return ret; +} diff --git a/src/native/external/xz/src/xz/util.h b/src/native/external/xz/src/xz/util.h new file mode 100644 index 00000000000000..a2fdd0593372e6 --- /dev/null +++ b/src/native/external/xz/src/xz/util.h @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file util.h +/// \brief Miscellaneous utility functions +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief Safe malloc() that never returns NULL +/// +/// \note xmalloc(), xrealloc(), and xstrdup() must not be used when +/// there are files open for writing, that should be cleaned up +/// before exiting. +#define xmalloc(size) xrealloc(NULL, size) + + +/// \brief Safe realloc() that never returns NULL +lzma_attr_alloc_size(2) +extern void *xrealloc(void *ptr, size_t size); + + +/// \brief Safe strdup() that never returns NULL +extern char *xstrdup(const char *src); + + +/// \brief Fancy version of strtoull() +/// +/// \param name Name of the option to show in case of an error +/// \param value String containing the number to be parsed; may +/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi" +/// \param min Minimum valid value +/// \param max Maximum valid value +/// +/// \return Parsed value that is in the range [min, max]. Does not return +/// if an error occurs. +/// +extern uint64_t str_to_uint64(const char *name, const char *value, + uint64_t min, uint64_t max); + + +/// \brief Round an integer up to the next full MiB and convert to MiB +/// +/// This is used when printing memory usage and limit. +extern uint64_t round_up_to_mib(uint64_t n); + + +/// \brief Convert uint64_t to a string +/// +/// Convert the given value to a string with locale-specific thousand +/// separators, if supported by the snprintf() implementation. The string +/// is stored into an internal static buffer indicated by the slot argument. +/// A pointer to the selected buffer is returned. +/// +/// This function exists, because non-POSIX systems don't support thousand +/// separator in format strings. Solving the problem in a simple way doesn't +/// work, because it breaks gettext (specifically, the xgettext tool). +extern const char *uint64_to_str(uint64_t value, uint32_t slot); + + +enum nicestr_unit { + NICESTR_B, + NICESTR_KIB, + NICESTR_MIB, + NICESTR_GIB, + NICESTR_TIB, +}; + + +/// \brief Convert uint64_t to a nice human readable string +/// +/// This is like uint64_to_str() but uses B, KiB, MiB, GiB, or TiB suffix +/// and optionally includes the exact size in parenthesis. +/// +/// \param value Value to be printed +/// \param unit_min Smallest unit to use. This and unit_max are used +/// e.g. when showing the progress indicator to force +/// the unit to MiB. +/// \param unit_max Biggest unit to use. assert(unit_min <= unit_max). +/// \param always_also_bytes +/// Show also the exact byte value in parenthesis +/// if the nicely formatted string uses bigger unit +/// than bytes. +/// \param slot Which static buffer to use to hold the string. +/// This is shared with uint64_to_str(). +/// +/// \return Pointer to statically allocated buffer containing the string. +/// +/// \note This uses double_to_str() internally so the static buffer +/// in double_to_str() will be overwritten. +/// +extern const char *uint64_to_nicestr(uint64_t value, + enum nicestr_unit unit_min, enum nicestr_unit unit_max, + bool always_also_bytes, uint32_t slot); + + +/// \brief Wrapper for snprintf() to help constructing a string in pieces +/// +/// A maximum of *left bytes is written starting from *pos. *pos and *left +/// are updated accordingly. +lzma_attribute((__format__(__printf__, 3, 4))) +extern void my_snprintf(char **pos, size_t *left, const char *fmt, ...); + + +/// \brief Test if file descriptor is a terminal +/// +/// For POSIX systems, this is a simple wrapper around isatty(). However on +/// Windows, isatty() returns true for all character devices, not just +/// terminals. +/// +/// \param fd File descriptor to test +/// +/// \return bool: +/// - true if file descriptor is a terminal +/// - false otherwise +extern bool is_tty(int fd); + + +/// \brief Test if stdin is a terminal +/// +/// If stdin is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdin(void); + + +/// \brief Test if stdout is a terminal +/// +/// If stdout is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdout(void); diff --git a/src/native/external/xz/src/xz/xz.1 b/src/native/external/xz/src/xz/xz.1 new file mode 100644 index 00000000000000..6e37b426a3b5c8 --- /dev/null +++ b/src/native/external/xz/src/xz/xz.1 @@ -0,0 +1,3264 @@ +'\" t +.\" SPDX-License-Identifier: 0BSD +.\" +.\" Authors: Lasse Collin +.\" Jia Tan +.\" +.TH XZ 1 "2025-03-08" "Tukaani" "XZ Utils" +. +.SH NAME +xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files +. +.SH SYNOPSIS +.B xz +.RI [ option... ] +.RI [ file... ] +. +.SH COMMAND ALIASES +.B unxz +is equivalent to +.BR "xz \-\-decompress" . +.br +.B xzcat +is equivalent to +.BR "xz \-\-decompress \-\-stdout" . +.br +.B lzma +is equivalent to +.BR "xz \-\-format=lzma" . +.br +.B unlzma +is equivalent to +.BR "xz \-\-format=lzma \-\-decompress" . +.br +.B lzcat +is equivalent to +.BR "xz \-\-format=lzma \-\-decompress \-\-stdout" . +.PP +When writing scripts that need to decompress files, +it is recommended to always use the name +.B xz +with appropriate arguments +.RB ( "xz \-d" +or +.BR "xz \-dc" ) +instead of the names +.B unxz +and +.BR xzcat . +. +.SH DESCRIPTION +.B xz +is a general-purpose data compression tool with +command line syntax similar to +.BR gzip (1) +and +.BR bzip2 (1). +The native file format is the +.B .xz +format, but the legacy +.B .lzma +format used by LZMA Utils and +raw compressed streams with no container format headers +are also supported. +In addition, decompression of the +.B .lz +format used by +.B lzip +is supported. +.PP +.B xz +compresses or decompresses each +.I file +according to the selected operation mode. +If no +.I files +are given or +.I file +is +.BR \- , +.B xz +reads from standard input and writes the processed data +to standard output. +.B xz +will refuse (display an error and skip the +.IR file ) +to write compressed data to standard output if it is a terminal. +Similarly, +.B xz +will refuse to read compressed data +from standard input if it is a terminal. +.PP +Unless +.B \-\-stdout +is specified, +.I files +other than +.B \- +are written to a new file whose name is derived from the source +.I file +name: +.IP \(bu 3 +When compressing, the suffix of the target file format +.RB ( .xz +or +.BR .lzma ) +is appended to the source filename to get the target filename. +.IP \(bu 3 +When decompressing, the +.BR .xz , +.BR .lzma , +or +.B .lz +suffix is removed from the filename to get the target filename. +.B xz +also recognizes the suffixes +.B .txz +and +.BR .tlz , +and replaces them with the +.B .tar +suffix. +.PP +If the target file already exists, an error is displayed and the +.I file +is skipped. +.PP +Unless writing to standard output, +.B xz +will display a warning and skip the +.I file +if any of the following applies: +.IP \(bu 3 +.I File +is not a regular file. +Symbolic links are not followed, +and thus they are not considered to be regular files. +.IP \(bu 3 +.I File +has more than one hard link. +.IP \(bu 3 +.I File +has setuid, setgid, or sticky bit set. +.IP \(bu 3 +The operation mode is set to compress and the +.I file +already has a suffix of the target file format +.RB ( .xz +or +.B .txz +when compressing to the +.B .xz +format, and +.B .lzma +or +.B .tlz +when compressing to the +.B .lzma +format). +.IP \(bu 3 +The operation mode is set to decompress and the +.I file +doesn't have a suffix of any of the supported file formats +.RB ( .xz , +.BR .txz , +.BR .lzma , +.BR .tlz , +or +.BR .lz ). +.PP +After successfully compressing or decompressing the +.IR file , +.B xz +copies the owner, group, permissions, access time, +and modification time from the source +.I file +to the target file. +If copying the group fails, the permissions are modified +so that the target file doesn't become accessible to users +who didn't have permission to access the source +.IR file . +.B xz +doesn't support copying other metadata like access control lists +or extended attributes yet. +.PP +Once the target file has been successfully closed, the source +.I file +is removed unless +.B \-\-keep +was specified. +The source +.I file +is never removed if the output is written to standard output +or if an error occurs. +.PP +Sending +.B SIGINFO +or +.B SIGUSR1 +to the +.B xz +process makes it print progress information to standard error. +This has only limited use since when standard error +is a terminal, using +.B \-\-verbose +will display an automatically updating progress indicator. +. +.SS "Memory usage" +The memory usage of +.B xz +varies from a few hundred kilobytes to several gigabytes +depending on the compression settings. +The settings used when compressing a file determine +the memory requirements of the decompressor. +Typically the decompressor needs 5\ % to 20\ % of +the amount of memory that the compressor needed when +creating the file. +For example, decompressing a file created with +.B xz \-9 +currently requires 65\ MiB of memory. +Still, it is possible to have +.B .xz +files that require several gigabytes of memory to decompress. +.PP +Especially users of older systems may find +the possibility of very large memory usage annoying. +To prevent uncomfortable surprises, +.B xz +has a built-in memory usage limiter, which is disabled by default. +While some operating systems provide ways to limit +the memory usage of processes, relying on it +wasn't deemed to be flexible enough (for example, using +.BR ulimit (1) +to limit virtual memory tends to cripple +.BR mmap (2)). +.PP +The memory usage limiter can be enabled with +the command line option \fB\-\-memlimit=\fIlimit\fR. +Often it is more convenient to enable the limiter +by default by setting the environment variable +.\" TRANSLATORS: Don't translate the uppercase XZ_DEFAULTS. +.\" It's a name of an environment variable. +.BR XZ_DEFAULTS , +for example, +.BR XZ_DEFAULTS=\-\-memlimit=150MiB . +It is possible to set the limits separately +for compression and decompression by using +.BI \-\-memlimit\-compress= limit +and \fB\-\-memlimit\-decompress=\fIlimit\fR. +Using these two options outside +.B XZ_DEFAULTS +is rarely useful because a single run of +.B xz +cannot do both compression and decompression and +.BI \-\-memlimit= limit +(or +.B \-M +.IR limit ) +is shorter to type on the command line. +.PP +If the specified memory usage limit is exceeded when decompressing, +.B xz +will display an error and decompressing the file will fail. +If the limit is exceeded when compressing, +.B xz +will try to scale the settings down so that the limit +is no longer exceeded (except when using +.B \-\-format=raw +or +.BR \-\-no\-adjust ). +This way the operation won't fail unless the limit is very small. +The scaling of the settings is done in steps that don't +match the compression level presets, for example, if the limit is +only slightly less than the amount required for +.BR "xz \-9" , +the settings will be scaled down only a little, +not all the way down to +.BR "xz \-8" . +. +.SS "Concatenation and padding with .xz files" +It is possible to concatenate +.B .xz +files as is. +.B xz +will decompress such files as if they were a single +.B .xz +file. +.PP +It is possible to insert padding between the concatenated parts +or after the last part. +The padding must consist of null bytes and the size +of the padding must be a multiple of four bytes. +This can be useful, for example, if the +.B .xz +file is stored on a medium that measures file sizes +in 512-byte blocks. +.PP +Concatenation and padding are not allowed with +.B .lzma +files or raw streams. +. +.SH OPTIONS +. +.SS "Integer suffixes and special values" +In most places where an integer argument is expected, +an optional suffix is supported to easily indicate large integers. +There must be no space between the integer and the suffix. +.TP +.B KiB +Multiply the integer by 1,024 (2^10). +.BR Ki , +.BR k , +.BR kB , +.BR K , +and +.B KB +are accepted as synonyms for +.BR KiB . +.TP +.B MiB +Multiply the integer by 1,048,576 (2^20). +.BR Mi , +.BR m , +.BR M , +and +.B MB +are accepted as synonyms for +.BR MiB . +.TP +.B GiB +Multiply the integer by 1,073,741,824 (2^30). +.BR Gi , +.BR g , +.BR G , +and +.B GB +are accepted as synonyms for +.BR GiB . +.PP +The special value +.B max +can be used to indicate the maximum integer value +supported by the option. +. +.SS "Operation mode" +If multiple operation mode options are given, +the last one takes effect. +.TP +.BR \-z ", " \-\-compress +Compress. +This is the default operation mode when no operation mode option +is specified and no other operation mode is implied from +the command name (for example, +.B unxz +implies +.BR \-\-decompress ). +.IP "" +.\" The DESCRIPTION section already says this but it's good to repeat it +.\" here because the default behavior is a bit dangerous and new users +.\" in a hurry may skip reading the DESCRIPTION section. +After successful compression, the source file is removed +unless writing to standard output or +.B \-\-keep +was specified. +.TP +.BR \-d ", " \-\-decompress ", " \-\-uncompress +Decompress. +.\" The DESCRIPTION section already says this but it's good to repeat it +.\" here because the default behavior is a bit dangerous and new users +.\" in a hurry may skip reading the DESCRIPTION section. +After successful decompression, the source file is removed +unless writing to standard output or +.B \-\-keep +was specified. +.TP +.BR \-t ", " \-\-test +Test the integrity of compressed +.IR files . +This option is equivalent to +.B "\-\-decompress \-\-stdout" +except that the decompressed data is discarded instead of being +written to standard output. +No files are created or removed. +.TP +.BR \-l ", " \-\-list +Print information about compressed +.IR files . +No uncompressed output is produced, +and no files are created or removed. +In list mode, the program cannot read +the compressed data from standard +input or from other unseekable sources. +.IP "" +The default listing shows basic information about +.IR files , +one file per line. +To get more detailed information, use also the +.B \-\-verbose +option. +For even more information, use +.B \-\-verbose +twice, but note that this may be slow, because getting all the extra +information requires many seeks. +The width of verbose output exceeds +80 characters, so piping the output to, for example, +.B "less\ \-S" +may be convenient if the terminal isn't wide enough. +.IP "" +The exact output may vary between +.B xz +versions and different locales. +For machine-readable output, +.B \-\-robot \-\-list +should be used. +. +.SS "Operation modifiers" +.TP +.BR \-k ", " \-\-keep +Don't delete the input files. +.IP "" +Since +.B xz +5.2.6, +this option also makes +.B xz +compress or decompress even if the input is +a symbolic link to a regular file, +has more than one hard link, +or has the setuid, setgid, or sticky bit set. +The setuid, setgid, and sticky bits are not copied +to the target file. +In earlier versions this was only done with +.BR \-\-force . +.TP +.BR \-f ", " \-\-force +This option has several effects: +.RS +.IP \(bu 3 +If the target file already exists, +delete it before compressing or decompressing. +.IP \(bu 3 +Compress or decompress even if the input is +a symbolic link to a regular file, +has more than one hard link, +or has the setuid, setgid, or sticky bit set. +The setuid, setgid, and sticky bits are not copied +to the target file. +.IP \(bu 3 +When used with +.B \-\-decompress +.B \-\-stdout +and +.B xz +cannot recognize the type of the source file, +copy the source file as is to standard output. +This allows +.B xzcat +.B \-\-force +to be used like +.BR cat (1) +for files that have not been compressed with +.BR xz . +Note that in future, +.B xz +might support new compressed file formats, which may make +.B xz +decompress more types of files instead of copying them as is to +standard output. +.BI \-\-format= format +can be used to restrict +.B xz +to decompress only a single file format. +.RE +.TP +.BR \-c ", " \-\-stdout ", " \-\-to\-stdout +Write the compressed or decompressed data to +standard output instead of a file. +This implies +.BR \-\-keep . +.TP +.B \-\-single\-stream +Decompress only the first +.B .xz +stream, and +silently ignore possible remaining input data following the stream. +Normally such trailing garbage makes +.B xz +display an error. +.IP "" +.B xz +never decompresses more than one stream from +.B .lzma +files or raw streams, but this option still makes +.B xz +ignore the possible trailing data after the +.B .lzma +file or raw stream. +.IP "" +This option has no effect if the operation mode is not +.B \-\-decompress +or +.BR \-\-test . +.IP "" +Since +.B xz +5.7.1alpha, +.B \-\-single\-stream +implies +.BR \-\-keep . +.TP +.B \-\-no\-sparse +Disable creation of sparse files. +By default, if decompressing into a regular file, +.B xz +tries to make the file sparse if the decompressed data contains +long sequences of binary zeros. +It also works when writing to standard output +as long as standard output is connected to a regular file +and certain additional conditions are met to make it safe. +Creating sparse files may save disk space and speed up +the decompression by reducing the amount of disk I/O. +.TP +\fB\-S\fR \fI.suf\fR, \fB\-\-suffix=\fI.suf +When compressing, use +.I .suf +as the suffix for the target file instead of +.B .xz +or +.BR .lzma . +If not writing to standard output and +the source file already has the suffix +.IR .suf , +a warning is displayed and the file is skipped. +.IP "" +When decompressing, recognize files with the suffix +.I .suf +in addition to files with the +.BR .xz , +.BR .txz , +.BR .lzma , +.BR .tlz , +or +.B .lz +suffix. +If the source file has the suffix +.IR .suf , +the suffix is removed to get the target filename. +.IP "" +When compressing or decompressing raw streams +.RB ( \-\-format=raw ), +the suffix must always be specified unless +writing to standard output, +because there is no default suffix for raw streams. +.TP +\fB\-\-files\fR[\fB=\fIfile\fR] +Read the filenames to process from +.IR file ; +if +.I file +is omitted, filenames are read from standard input. +Filenames must be terminated with the newline character. +A dash +.RB ( \- ) +is taken as a regular filename; it doesn't mean standard input. +If filenames are given also as command line arguments, they are +processed before the filenames read from +.IR file . +.TP +\fB\-\-files0\fR[\fB=\fIfile\fR] +This is identical to \fB\-\-files\fR[\fB=\fIfile\fR] except +that each filename must be terminated with the null character. +. +.SS "Basic file format and compression options" +.TP +\fB\-F\fR \fIformat\fR, \fB\-\-format=\fIformat +Specify the file +.I format +to compress or decompress: +.RS +.TP +.\" TRANSLATORS: Don't translate bold string B. +.B auto +This is the default. +When compressing, +.B auto +is equivalent to +.BR xz . +When decompressing, +the format of the input file is automatically detected. +Note that raw streams (created with +.BR \-\-format=raw ) +cannot be auto-detected. +.TP +.B xz +Compress to the +.B .xz +file format, or accept only +.B .xz +files when decompressing. +.TP +.BR lzma ", " alone +Compress to the legacy +.B .lzma +file format, or accept only +.B .lzma +files when decompressing. +The alternative name +.B alone +is provided for backwards compatibility with LZMA Utils. +.TP +.B lzip +Accept only +.B .lz +files when decompressing. +Compression is not supported. +.IP "" +The +.B .lz +format versions 0 and 1 are supported. +Version 0 files were produced by +.B lzip +1.3 and older. +Such files aren't common but may be found from file archives +as a few source packages were released in this format. +People might have old personal files in this format too. +Decompression support for the format version 0 was removed in +.B lzip +1.18. +.B lzip +1.4 and later create files in the format version 1. +.TP +.B raw +Compress or uncompress a raw stream (no headers). +This is meant for advanced users only. +To decode raw streams, you need use +.B \-\-format=raw +and explicitly specify the filter chain, +which normally would have been stored in the container headers. +.RE +.TP +\fB\-C\fR \fIcheck\fR, \fB\-\-check=\fIcheck +Specify the type of the integrity check. +The check is calculated from the uncompressed data and +stored in the +.B .xz +file. +This option has an effect only when compressing into the +.B .xz +format; the +.B .lzma +format doesn't support integrity checks. +The integrity check (if any) is verified when the +.B .xz +file is decompressed. +.IP "" +Supported +.I check +types: +.RS +.TP +.\" TRANSLATORS: Don't translate the bold strings B, B, +.\" B, and B. The command line option --check accepts +.\" only the untranslated strings. +.B none +Don't calculate an integrity check at all. +This is usually a bad idea. +This can be useful when integrity of the data is verified +by other means anyway. +.TP +.B crc32 +Calculate CRC32 using the polynomial from IEEE-802.3 (Ethernet). +.TP +.B crc64 +Calculate CRC64 using the polynomial from ECMA-182. +This is the default, since it is slightly better than CRC32 +at detecting damaged files and the speed difference is negligible. +.TP +.B sha256 +Calculate SHA-256. +This is somewhat slower than CRC32 and CRC64. +.RE +.IP "" +Integrity of the +.B .xz +headers is always verified with CRC32. +It is not possible to change or disable it. +.TP +.B \-\-ignore\-check +Don't verify the integrity check of the compressed data when decompressing. +The CRC32 values in the +.B .xz +headers will still be verified normally. +.IP "" +.B "Do not use this option unless you know what you are doing." +Possible reasons to use this option: +.RS +.IP \(bu 3 +Trying to recover data from a corrupt .xz file. +.IP \(bu 3 +Speeding up decompression. +This matters mostly with SHA-256 or +with files that have compressed extremely well. +It's recommended to not use this option for this purpose +unless the file integrity is verified externally in some other way. +.RE +.TP +.BR \-0 " ... " \-9 +Select a compression preset level. +The default is +.BR \-6 . +If multiple preset levels are specified, +the last one takes effect. +If a custom filter chain was already specified, setting +a compression preset level clears the custom filter chain. +.IP "" +The differences between the presets are more significant than with +.BR gzip (1) +and +.BR bzip2 (1). +The selected compression settings determine +the memory requirements of the decompressor, +thus using a too high preset level might make it painful +to decompress the file on an old system with little RAM. +Specifically, +.B "it's not a good idea to blindly use \-9 for everything" +like it often is with +.BR gzip (1) +and +.BR bzip2 (1). +.RS +.TP +.BR "\-0" " ... " "\-3" +These are somewhat fast presets. +.B \-0 +is sometimes faster than +.B "gzip \-9" +while compressing much better. +The higher ones often have speed comparable to +.BR bzip2 (1) +with comparable or better compression ratio, +although the results +depend a lot on the type of data being compressed. +.TP +.BR "\-4" " ... " "\-6" +Good to very good compression while keeping +decompressor memory usage reasonable even for old systems. +.B \-6 +is the default, which is usually a good choice +for distributing files that need to be decompressible +even on systems with only 16\ MiB RAM. +.RB ( \-5e +or +.B \-6e +may be worth considering too. +See +.BR \-\-extreme .) +.TP +.B "\-7 ... \-9" +These are like +.B \-6 +but with higher compressor and decompressor memory requirements. +These are useful only when compressing files bigger than +8\ MiB, 16\ MiB, and 32\ MiB, respectively. +.RE +.IP "" +On the same hardware, the decompression speed is approximately +a constant number of bytes of compressed data per second. +In other words, the better the compression, +the faster the decompression will usually be. +This also means that the amount of uncompressed output +produced per second can vary a lot. +.IP "" +The following table summarises the features of the presets: +.RS +.RS +.PP +.TS +tab(;); +c c c c c +n n n n n. +Preset;DictSize;CompCPU;CompMem;DecMem +\-0;256 KiB;0;3 MiB;1 MiB +\-1;1 MiB;1;9 MiB;2 MiB +\-2;2 MiB;2;17 MiB;3 MiB +\-3;4 MiB;3;32 MiB;5 MiB +\-4;4 MiB;4;48 MiB;5 MiB +\-5;8 MiB;5;94 MiB;9 MiB +\-6;8 MiB;6;94 MiB;9 MiB +\-7;16 MiB;6;186 MiB;17 MiB +\-8;32 MiB;6;370 MiB;33 MiB +\-9;64 MiB;6;674 MiB;65 MiB +.TE +.RE +.RE +.IP "" +Column descriptions: +.RS +.IP \(bu 3 +DictSize is the LZMA2 dictionary size. +It is waste of memory to use a dictionary bigger than +the size of the uncompressed file. +This is why it is good to avoid using the presets +.BR \-7 " ... " \-9 +when there's no real need for them. +At +.B \-6 +and lower, the amount of memory wasted is +usually low enough to not matter. +.IP \(bu 3 +CompCPU is a simplified representation of the LZMA2 settings +that affect compression speed. +The dictionary size affects speed too, +so while CompCPU is the same for levels +.BR \-6 " ... " \-9 , +higher levels still tend to be a little slower. +To get even slower and thus possibly better compression, see +.BR \-\-extreme . +.IP \(bu 3 +CompMem contains the compressor memory requirements +in the single-threaded mode. +It may vary slightly between +.B xz +versions. +.IP \(bu 3 +DecMem contains the decompressor memory requirements. +That is, the compression settings determine +the memory requirements of the decompressor. +The exact decompressor memory usage is slightly more than +the LZMA2 dictionary size, but the values in the table +have been rounded up to the next full MiB. +.RE +.IP "" +Memory requirements of the multi-threaded mode are +significantly higher than that of the single-threaded mode. +With the default value of +.BR \-\-block\-size , +each thread needs 3*3*DictSize plus CompMem or DecMem. +For example, four threads with preset +.B \-6 +needs 660\(en670\ MiB of memory. +.TP +.BR \-e ", " \-\-extreme +Use a slower variant of the selected compression preset level +.RB ( \-0 " ... " \-9 ) +to hopefully get a little bit better compression ratio, +but with bad luck this can also make it worse. +Decompressor memory usage is not affected, +but compressor memory usage increases a little at preset levels +.BR \-0 " ... " \-3 . +.IP "" +Since there are two presets with dictionary sizes +4\ MiB and 8\ MiB, the presets +.B \-3e +and +.B \-5e +use slightly faster settings (lower CompCPU) than +.B \-4e +and +.BR \-6e , +respectively. +That way no two presets are identical. +.RS +.RS +.PP +.TS +tab(;); +c c c c c +n n n n n. +Preset;DictSize;CompCPU;CompMem;DecMem +\-0e;256 KiB;8;4 MiB;1 MiB +\-1e;1 MiB;8;13 MiB;2 MiB +\-2e;2 MiB;8;25 MiB;3 MiB +\-3e;4 MiB;7;48 MiB;5 MiB +\-4e;4 MiB;8;48 MiB;5 MiB +\-5e;8 MiB;7;94 MiB;9 MiB +\-6e;8 MiB;8;94 MiB;9 MiB +\-7e;16 MiB;8;186 MiB;17 MiB +\-8e;32 MiB;8;370 MiB;33 MiB +\-9e;64 MiB;8;674 MiB;65 MiB +.TE +.RE +.RE +.IP "" +For example, there are a total of four presets that use +8\ MiB dictionary, whose order from the fastest to the slowest is +.BR \-5 , +.BR \-6 , +.BR \-5e , +and +.BR \-6e . +.TP +.B \-\-fast +.PD 0 +.TP +.B \-\-best +.PD +These are somewhat misleading aliases for +.B \-0 +and +.BR \-9 , +respectively. +These are provided only for backwards compatibility +with LZMA Utils. +Avoid using these options. +.TP +.BI \-\-block\-size= size +When compressing to the +.B .xz +format, split the input data into blocks of +.I size +bytes. +The blocks are compressed independently from each other, +which helps with multi-threading and +makes limited random-access decompression possible. +This option is typically used to override the default +block size in multi-threaded mode, +but this option can be used in single-threaded mode too. +.IP "" +In multi-threaded mode about three times +.I size +bytes will be allocated in each thread for buffering input and output. +The default +.I size +is three times the LZMA2 dictionary size or 1 MiB, +whichever is more. +Typically a good value is 2\(en4 times +the size of the LZMA2 dictionary or at least 1 MiB. +Using +.I size +less than the LZMA2 dictionary size is waste of RAM +because then the LZMA2 dictionary buffer will never get fully used. +In multi-threaded mode, +the sizes of the blocks are stored in the block headers. +This size information is required for multi-threaded decompression. +.IP "" +In single-threaded mode no block splitting is done by default. +Setting this option doesn't affect memory usage. +No size information is stored in block headers, +thus files created in single-threaded mode +won't be identical to files created in multi-threaded mode. +The lack of size information also means that +.B xz +won't be able decompress the files in multi-threaded mode. +.TP +.BI \-\-block\-list= items +When compressing to the +.B .xz +format, start a new block with an optional custom filter chain after +the given intervals of uncompressed data. +.IP "" +The +.I items +are a comma-separated list. +Each item consists of an optional filter chain number +between 0 and 9 followed by a colon +.RB ( : ) +and a required size of uncompressed data. +Omitting an item (two or more consecutive commas) is a +shorthand to use the size and filters of the previous item. +.IP "" +If the input file is bigger than the sum of +the sizes in +.IR items , +the last item is repeated until the end of the file. +A special value of +.B 0 +may be used as the last size to indicate that +the rest of the file should be encoded as a single block. +.IP "" +An alternative filter chain for each block can be +specified in combination with the +.BI \-\-filters1= filters +\&...\& +.BI \-\-filters9= filters +options. +These options define filter chains with an identifier +between 1\(en9. +Filter chain 0 can be used to refer to the default filter chain, +which is the same as not specifying a filter chain. +The filter chain identifier can be used before the uncompressed +size, followed by a colon +.RB ( : ). +For example, if one specifies +.B \-\-block\-list=1:2MiB,3:2MiB,2:4MiB,,2MiB,0:4MiB +then blocks will be created using: +.RS +.IP \(bu 3 +The filter chain specified by +.B \-\-filters1 +and 2 MiB input +.IP \(bu 3 +The filter chain specified by +.B \-\-filters3 +and 2 MiB input +.IP \(bu 3 +The filter chain specified by +.B \-\-filters2 +and 4 MiB input +.IP \(bu 3 +The filter chain specified by +.B \-\-filters2 +and 4 MiB input +.IP \(bu 3 +The default filter chain and 2 MiB input +.IP \(bu 3 +The default filter chain and 4 MiB input for every block until +end of input. +.RE +.IP "" +If one specifies a size that exceeds the encoder's block size +(either the default value in threaded mode or +the value specified with \fB\-\-block\-size=\fIsize\fR), +the encoder will create additional blocks while +keeping the boundaries specified in +.IR items . +For example, if one specifies +.B \-\-block\-size=10MiB +.B \-\-block\-list=5MiB,10MiB,8MiB,12MiB,24MiB +and the input file is 80 MiB, +one will get 11 blocks: +5, 10, 8, 10, 2, 10, 10, 4, 10, 10, and 1 MiB. +.IP "" +In multi-threaded mode the sizes of the blocks +are stored in the block headers. +This isn't done in single-threaded mode, +so the encoded output won't be +identical to that of the multi-threaded mode. +.TP +.BI \-\-flush\-timeout= timeout +When compressing, if more than +.I timeout +milliseconds (a positive integer) has passed since the previous flush and +reading more input would block, +all the pending input data is flushed from the encoder and +made available in the output stream. +This can be useful if +.B xz +is used to compress data that is streamed over a network. +Small +.I timeout +values make the data available at the receiving end +with a small delay, but large +.I timeout +values give better compression ratio. +.IP "" +This feature is disabled by default. +If this option is specified more than once, the last one takes effect. +The special +.I timeout +value of +.B 0 +can be used to explicitly disable this feature. +.IP "" +This feature is not available on non-POSIX systems. +.IP "" +.\" FIXME +.B "This feature is still experimental." +Currently +.B xz +is unsuitable for decompressing the stream in real time due to how +.B xz +does buffering. +.TP +.B \-\-no\-sync +Do not synchronize the target file and its directory +to the storage device before removing the source file. +This can improve performance if compressing or decompressing +many small files. +However, if the system crashes soon after the deletion, +it is possible that the target file was not written +to the storage device but the delete operation was. +In that case neither the original source file +nor the target file is available. +.IP "" +This option has an effect only when +.B xz +is going to remove the source file. +In other cases synchronization is never done. +.IP "" +The synchronization and +.B \-\-no\-sync +were added in +.B xz +5.7.1alpha. +.TP +.BI \-\-memlimit\-compress= limit +Set a memory usage limit for compression. +If this option is specified multiple times, +the last one takes effect. +.IP "" +If the compression settings exceed the +.IR limit , +.B xz +will attempt to adjust the settings downwards so that +the limit is no longer exceeded and display a notice that +automatic adjustment was done. +The adjustments are done in this order: +reducing the number of threads, +switching to single-threaded mode +if even one thread in multi-threaded mode exceeds the +.IR limit , +and finally reducing the LZMA2 dictionary size. +.IP "" +When compressing with +.B \-\-format=raw +or if +.B \-\-no\-adjust +has been specified, +only the number of threads may be reduced +since it can be done without affecting the compressed output. +.IP "" +If the +.I limit +cannot be met even with the adjustments described above, +an error is displayed and +.B xz +will exit with exit status 1. +.IP "" +The +.I limit +can be specified in multiple ways: +.RS +.IP \(bu 3 +The +.I limit +can be an absolute value in bytes. +Using an integer suffix like +.B MiB +can be useful. +Example: +.B "\-\-memlimit\-compress=80MiB" +.IP \(bu 3 +The +.I limit +can be specified as a percentage of total physical memory (RAM). +This can be useful especially when setting the +.B XZ_DEFAULTS +environment variable in a shell initialization script +that is shared between different computers. +That way the limit is automatically bigger +on systems with more memory. +Example: +.B "\-\-memlimit\-compress=70%" +.IP \(bu 3 +The +.I limit +can be reset back to its default value by setting it to +.BR 0 . +This is currently equivalent to setting the +.I limit +to +.B max +(no memory usage limit). +.RE +.IP "" +For 32-bit +.B xz +there is a special case: if the +.I limit +would be over +.BR "4020\ MiB" , +the +.I limit +is set to +.BR "4020\ MiB" . +On MIPS32 +.B "2000\ MiB" +is used instead. +(The values +.B 0 +and +.B max +aren't affected by this. +A similar feature doesn't exist for decompression.) +This can be helpful when a 32-bit executable has access +to 4\ GiB address space (2 GiB on MIPS32) +while hopefully doing no harm in other situations. +.IP "" +See also the section +.BR "Memory usage" . +.TP +.BI \-\-memlimit\-decompress= limit +Set a memory usage limit for decompression. +This also affects the +.B \-\-list +mode. +If the operation is not possible without exceeding the +.IR limit , +.B xz +will display an error and decompressing the file will fail. +See +.BI \-\-memlimit\-compress= limit +for possible ways to specify the +.IR limit . +.TP +.BI \-\-memlimit\-mt\-decompress= limit +Set a memory usage limit for multi-threaded decompression. +This can only affect the number of threads; +this will never make +.B xz +refuse to decompress a file. +If +.I limit +is too low to allow any multi-threading, the +.I limit +is ignored and +.B xz +will continue in single-threaded mode. +Note that if also +.B \-\-memlimit\-decompress +is used, +it will always apply to both single-threaded and multi-threaded modes, +and so the effective +.I limit +for multi-threading will never be higher than the limit set with +.BR \-\-memlimit\-decompress . +.IP "" +In contrast to the other memory usage limit options, +.BI \-\-memlimit\-mt\-decompress= limit +has a system-specific default +.IR limit . +.B "xz \-\-info\-memory" +can be used to see the current value. +.IP "" +This option and its default value exist +because without any limit the threaded decompressor +could end up allocating an insane amount of memory with some input files. +If the default +.I limit +is too low on your system, +feel free to increase the +.I limit +but never set it to a value larger than the amount of usable RAM +as with appropriate input files +.B xz +will attempt to use that amount of memory +even with a low number of threads. +Running out of memory or swapping +will not improve decompression performance. +.IP "" +See +.BI \-\-memlimit\-compress= limit +for possible ways to specify the +.IR limit . +Setting +.I limit +to +.B 0 +resets the +.I limit +to the default system-specific value. +.TP +\fB\-M\fR \fIlimit\fR, \fB\-\-memlimit=\fIlimit\fR, \fB\-\-memory=\fIlimit +This is equivalent to specifying +.BI \-\-memlimit\-compress= limit +.BI \-\-memlimit-decompress= limit +\fB\-\-memlimit\-mt\-decompress=\fIlimit\fR. +.TP +.B \-\-no\-adjust +Display an error and exit if the memory usage limit cannot be +met without adjusting settings that affect the compressed output. +That is, this prevents +.B xz +from switching the encoder from multi-threaded mode to single-threaded mode +and from reducing the LZMA2 dictionary size. +Even when this option is used the number of threads may be reduced +to meet the memory usage limit as that won't affect the compressed output. +.IP "" +Automatic adjusting is always disabled when creating raw streams +.RB ( \-\-format=raw ). +.TP +\fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads +Specify the number of worker threads to use. +Setting +.I threads +to a special value +.B 0 +makes +.B xz +use up to as many threads as the processor(s) on the system support. +The actual number of threads can be fewer than +.I threads +if the input file is not big enough +for threading with the given settings or +if using more threads would exceed the memory usage limit. +.IP "" +The single-threaded and multi-threaded compressors produce different output. +Single-threaded compressor will give the smallest file size but +only the output from the multi-threaded compressor can be decompressed +using multiple threads. +Setting +.I threads +to +.B 1 +will use the single-threaded mode. +Setting +.I threads +to any other value, including +.BR 0 , +will use the multi-threaded compressor +even if the system supports only one hardware thread. +.RB ( xz +5.2.x +used single-threaded mode in this situation.) +.IP "" +To use multi-threaded mode with only one thread, set +.I threads +to +.BR +1 . +The +.B + +prefix has no effect with values other than +.BR 1 . +A memory usage limit can still make +.B xz +switch to single-threaded mode unless +.B \-\-no\-adjust +is used. +Support for the +.B + +prefix was added in +.B xz +5.4.0. +.IP "" +If an automatic number of threads has been requested and +no memory usage limit has been specified, +then a system-specific default soft limit will be used to possibly +limit the number of threads. +It is a soft limit in sense that it is ignored +if the number of threads becomes one, +thus a soft limit will never stop +.B xz +from compressing or decompressing. +This default soft limit will not make +.B xz +switch from multi-threaded mode to single-threaded mode. +The active limits can be seen with +.BR "xz \-\-info\-memory" . +.IP "" +Currently the only threading method is to split the input into +blocks and compress them independently from each other. +The default block size depends on the compression level and +can be overridden with the +.BI \-\-block\-size= size +option. +.IP "" +Threaded decompression only works on files that contain +multiple blocks with size information in block headers. +All large enough files compressed in multi-threaded mode +meet this condition, +but files compressed in single-threaded mode don't even if +.BI \-\-block\-size= size +has been used. +.IP "" +The default value for +.I threads +is +.BR 0 . +In +.B xz +5.4.x and older the default is +.BR 1 . +. +.SS "Custom compressor filter chains" +A custom filter chain allows specifying +the compression settings in detail instead of relying on +the settings associated to the presets. +When a custom filter chain is specified, +preset options +.RB ( \-0 +\&...\& +.B \-9 +and +.BR \-\-extreme ) +earlier on the command line are forgotten. +If a preset option is specified +after one or more custom filter chain options, +the new preset takes effect and +the custom filter chain options specified earlier are forgotten. +.PP +A filter chain is comparable to piping on the command line. +When compressing, the uncompressed input goes to the first filter, +whose output goes to the next filter (if any). +The output of the last filter gets written to the compressed file. +The maximum number of filters in the chain is four, +but typically a filter chain has only one or two filters. +.PP +Many filters have limitations on where they can be +in the filter chain: +some filters can work only as the last filter in the chain, +some only as a non-last filter, and some work in any position +in the chain. +Depending on the filter, this limitation is either inherent to +the filter design or exists to prevent security issues. +.PP +A custom filter chain can be specified in two different ways. +The options +.BI \-\-filters= filters +and +.BI \-\-filters1= filters +\&...\& +.BI \-\-filters9= filters +allow specifying an entire filter chain in one option using the +liblzma filter string syntax. +Alternatively, a filter chain can be specified by using one or more +individual filter options in the order they are wanted in the filter chain. +That is, the order of the individual filter options is significant! +When decoding raw streams +.RB ( \-\-format=raw ), +the filter chain must be specified in the same order as +it was specified when compressing. +Any individual filter or preset options specified before the full +chain option +(\fB\-\-filters=\fIfilters\fR) +will be forgotten. +Individual filters specified after the full chain option will reset the +filter chain. +.PP +Both the full and individual filter options take filter-specific +.I options +as a comma-separated list. +Extra commas in +.I options +are ignored. +Every option has a default value, so +specify those you want to change. +.PP +To see the whole filter chain and +.IR options , +use +.B "xz \-vv" +(that is, use +.B \-\-verbose +twice). +This works also for viewing the filter chain options used by presets. +.TP +.BI \-\-filters= filters +Specify the full filter chain or a preset in a single option. +Each filter can be separated by spaces or two dashes +.RB ( \-\- ). +.I filters +may need to be quoted on the shell command line so it is +parsed as a single option. +To denote +.IR options , +use +.B : +or +.BR = . +A preset can be prefixed with a +.B \- +and followed with zero or more flags. +The only supported flag is +.B e +to apply the same options as +.BR \-\-extreme . +.TP +\fB\-\-filters1\fR=\fIfilters\fR ... \fB\-\-filters9\fR=\fIfilters +Specify up to nine additional filter chains that can be used with +.BR \-\-block\-list . +.IP "" +For example, when compressing an archive with executable files +followed by text files, the executable part could use a filter +chain with a BCJ filter and the text part only the LZMA2 filter. +.TP +.B \-\-filters-help +Display a help message describing how to specify presets and +custom filter chains in the +.B \-\-filters +and +.BI \-\-filters1= filters +\&...\& +.BI \-\-filters9= filters +options, and exit successfully. +.TP +\fB\-\-lzma1\fR[\fB=\fIoptions\fR] +.PD 0 +.TP +\fB\-\-lzma2\fR[\fB=\fIoptions\fR] +.PD +Add LZMA1 or LZMA2 filter to the filter chain. +These filters can be used only as the last filter in the chain. +.IP "" +LZMA1 is a legacy filter, +which is supported almost solely due to the legacy +.B .lzma +file format, which supports only LZMA1. +LZMA2 is an updated +version of LZMA1 to fix some practical issues of LZMA1. +The +.B .xz +format uses LZMA2 and doesn't support LZMA1 at all. +Compression speed and ratios of LZMA1 and LZMA2 +are practically the same. +.IP "" +LZMA1 and LZMA2 share the same set of +.IR options : +.RS +.TP +.\" TRANSLATORS: Don't translate bold strings like B, B, +.\" B, B, B, or B because those are command line +.\" options. On the other hand, do translate the italic strings like +.\" I, I, and I, because such italic strings are +.\" placeholders which a user replaces with an actual value. +.BI preset= preset +Reset all LZMA1 or LZMA2 +.I options +to +.IR preset . +.I Preset +consist of an integer, which may be followed by single-letter +preset modifiers. +The integer can be from +.B 0 +to +.BR 9 , +matching the command line options +.B \-0 +\&...\& +.BR \-9 . +The only supported modifier is currently +.BR e , +which matches +.BR \-\-extreme . +If no +.B preset +is specified, the default values of LZMA1 or LZMA2 +.I options +are taken from the preset +.BR 6 . +.TP +.BI dict= size +Dictionary (history buffer) +.I size +indicates how many bytes of the recently processed +uncompressed data is kept in memory. +The algorithm tries to find repeating byte sequences (matches) in +the uncompressed data, and replace them with references +to the data currently in the dictionary. +The bigger the dictionary, the higher is the chance +to find a match. +Thus, increasing dictionary +.I size +usually improves compression ratio, but +a dictionary bigger than the uncompressed file is waste of memory. +.IP "" +Typical dictionary +.I size +is from 64\ KiB to 64\ MiB. +The minimum is 4\ KiB. +The maximum for compression is currently 1.5\ GiB (1536\ MiB). +The decompressor already supports dictionaries up to +one byte less than 4\ GiB, which is the maximum for +the LZMA1 and LZMA2 stream formats. +.IP "" +Dictionary +.I size +and match finder +.RI ( mf ) +together determine the memory usage of the LZMA1 or LZMA2 encoder. +The same (or bigger) dictionary +.I size +is required for decompressing that was used when compressing, +thus the memory usage of the decoder is determined +by the dictionary size used when compressing. +The +.B .xz +headers store the dictionary +.I size +either as +.RI "2^" n +or +.RI "2^" n " + 2^(" n "\-1)," +so these +.I sizes +are somewhat preferred for compression. +Other +.I sizes +will get rounded up when stored in the +.B .xz +headers. +.TP +.BI lc= lc +Specify the number of literal context bits. +The minimum is 0 and the maximum is 4; the default is 3. +In addition, the sum of +.I lc +and +.I lp +must not exceed 4. +.IP "" +All bytes that cannot be encoded as matches +are encoded as literals. +That is, literals are simply 8-bit bytes +that are encoded one at a time. +.IP "" +The literal coding makes an assumption that the highest +.I lc +bits of the previous uncompressed byte correlate +with the next byte. +For example, in typical English text, an upper-case letter is +often followed by a lower-case letter, and a lower-case +letter is usually followed by another lower-case letter. +In the US-ASCII character set, the highest three bits are 010 +for upper-case letters and 011 for lower-case letters. +When +.I lc +is at least 3, the literal coding can take advantage of +this property in the uncompressed data. +.IP "" +The default value (3) is usually good. +If you want maximum compression, test +.BR lc=4 . +Sometimes it helps a little, and +sometimes it makes compression worse. +If it makes it worse, test +.B lc=2 +too. +.TP +.BI lp= lp +Specify the number of literal position bits. +The minimum is 0 and the maximum is 4; the default is 0. +.IP "" +.I Lp +affects what kind of alignment in the uncompressed data is +assumed when encoding literals. +See +.I pb +below for more information about alignment. +.TP +.BI pb= pb +Specify the number of position bits. +The minimum is 0 and the maximum is 4; the default is 2. +.IP "" +.I Pb +affects what kind of alignment in the uncompressed data is +assumed in general. +The default means four-byte alignment +.RI (2^ pb =2^2=4), +which is often a good choice when there's no better guess. +.IP "" +When the alignment is known, setting +.I pb +accordingly may reduce the file size a little. +For example, with text files having one-byte +alignment (US-ASCII, ISO-8859-*, UTF-8), setting +.B pb=0 +can improve compression slightly. +For UTF-16 text, +.B pb=1 +is a good choice. +If the alignment is an odd number like 3 bytes, +.B pb=0 +might be the best choice. +.IP "" +Even though the assumed alignment can be adjusted with +.I pb +and +.IR lp , +LZMA1 and LZMA2 still slightly favor 16-byte alignment. +It might be worth taking into account when designing file formats +that are likely to be often compressed with LZMA1 or LZMA2. +.TP +.BI mf= mf +Match finder has a major effect on encoder speed, +memory usage, and compression ratio. +Usually Hash Chain match finders are faster than Binary Tree +match finders. +The default depends on the +.IR preset : +0 uses +.BR hc3 , +1\(en3 +use +.BR hc4 , +and the rest use +.BR bt4 . +.IP "" +The following match finders are supported. +The memory usage formulas below are rough approximations, +which are closest to the reality when +.I dict +is a power of two. +.RS +.TP +.B hc3 +Hash Chain with 2- and 3-byte hashing +.br +Minimum value for +.IR nice : +3 +.br +Memory usage: +.br +.I dict +* 7.5 (if +.I dict +<= 16 MiB); +.br +.I dict +* 5.5 + 64 MiB (if +.I dict +> 16 MiB) +.TP +.B hc4 +Hash Chain with 2-, 3-, and 4-byte hashing +.br +Minimum value for +.IR nice : +4 +.br +Memory usage: +.br +.I dict +* 7.5 (if +.I dict +<= 32 MiB); +.br +.I dict +* 6.5 (if +.I dict +> 32 MiB) +.TP +.B bt2 +Binary Tree with 2-byte hashing +.br +Minimum value for +.IR nice : +2 +.br +Memory usage: +.I dict +* 9.5 +.TP +.B bt3 +Binary Tree with 2- and 3-byte hashing +.br +Minimum value for +.IR nice : +3 +.br +Memory usage: +.br +.I dict +* 11.5 (if +.I dict +<= 16 MiB); +.br +.I dict +* 9.5 + 64 MiB (if +.I dict +> 16 MiB) +.TP +.B bt4 +Binary Tree with 2-, 3-, and 4-byte hashing +.br +Minimum value for +.IR nice : +4 +.br +Memory usage: +.br +.I dict +* 11.5 (if +.I dict +<= 32 MiB); +.br +.I dict +* 10.5 (if +.I dict +> 32 MiB) +.RE +.TP +.BI mode= mode +Compression +.I mode +specifies the method to analyze +the data produced by the match finder. +Supported +.I modes +are +.B fast +and +.BR normal . +The default is +.B fast +for +.I presets +0\(en3 and +.B normal +for +.I presets +4\(en9. +.IP "" +Usually +.B fast +is used with Hash Chain match finders and +.B normal +with Binary Tree match finders. +This is also what the +.I presets +do. +.TP +.BI nice= nice +Specify what is considered to be a nice length for a match. +Once a match of at least +.I nice +bytes is found, the algorithm stops +looking for possibly better matches. +.IP "" +.I Nice +can be 2\(en273 bytes. +Higher values tend to give better compression ratio +at the expense of speed. +The default depends on the +.IR preset . +.TP +.BI depth= depth +Specify the maximum search depth in the match finder. +The default is the special value of 0, +which makes the compressor determine a reasonable +.I depth +from +.I mf +and +.IR nice . +.IP "" +Reasonable +.I depth +for Hash Chains is 4\(en100 and 16\(en1000 for Binary Trees. +Using very high values for +.I depth +can make the encoder extremely slow with some files. +Avoid setting the +.I depth +over 1000 unless you are prepared to interrupt +the compression in case it is taking far too long. +.RE +.IP "" +When decoding raw streams +.RB ( \-\-format=raw ), +LZMA2 needs only the dictionary +.IR size . +LZMA1 needs also +.IR lc , +.IR lp , +and +.IR pb . +.TP +\fB\-\-x86\fR[\fB=\fIoptions\fR] +.PD 0 +.TP +\fB\-\-arm\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-armthumb\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-arm64\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-powerpc\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-ia64\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-sparc\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-riscv\fR[\fB=\fIoptions\fR] +.PD +Add a branch/call/jump (BCJ) filter to the filter chain. +These filters can be used only as a non-last filter +in the filter chain. +.IP "" +A BCJ filter converts relative addresses in +the machine code to their absolute counterparts. +This doesn't change the size of the data +but it increases redundancy, +which can help LZMA2 to produce 0\(en15\ % smaller +.B .xz +file. +The BCJ filters are always reversible, +so using a BCJ filter for wrong type of data +doesn't cause any data loss, although it may make +the compression ratio slightly worse. +The BCJ filters are very fast and +use an insignificant amount of memory. +.IP "" +These BCJ filters have known problems related to +the compression ratio: +.RS +.IP \(bu 3 +Some types of files containing executable code +(for example, object files, static libraries, and Linux kernel modules) +have the addresses in the instructions filled with filler values. +These BCJ filters will still do the address conversion, +which will make the compression worse with these files. +.IP \(bu 3 +If a BCJ filter is applied on an archive, +it is possible that it makes the compression ratio +worse than not using a BCJ filter. +For example, if there are similar or even identical executables +then filtering will likely make the files less similar +and thus compression is worse. +The contents of non-executable files in the same archive can matter too. +In practice one has to try with and without a BCJ filter to see +which is better in each situation. +.RE +.IP "" +Different instruction sets have different alignment: +the executable file must be aligned to a multiple of +this value in the input data to make the filter work. +.RS +.RS +.PP +.TS +tab(;); +l n l +l n l. +Filter;Alignment;Notes +x86;1;32-bit or 64-bit x86 +ARM;4; +ARM-Thumb;2; +ARM64;4;4096-byte alignment is best +PowerPC;4;Big endian only +IA-64;16;Itanium +SPARC;4; +RISC-V;2; +.TE +.RE +.RE +.IP "" +Since the BCJ-filtered data is usually compressed with LZMA2, +the compression ratio may be improved slightly if +the LZMA2 options are set to match the +alignment of the selected BCJ filter. +Examples: +.RS +.IP \(bu 3 +IA-64 filter has 16-byte alignment so +.B pb=4,lp=4,lc=0 +is good +with LZMA2 (2^4=16). +.IP \(bu 3 +RISC-V code has 2-byte or 4-byte alignment +depending on whether the file contains +16-bit compressed instructions (the C extension). +When 16-bit instructions are used, +.B pb=2,lp=1,lc=3 +or +.B pb=1,lp=1,lc=3 +is good. +When 16-bit instructions aren't present, +.B pb=2,lp=2,lc=2 +is the best. +.B readelf \-h +can be used to check if "RVC" +appears on the "Flags" line. +.IP \(bu 3 +ARM64 is always 4-byte aligned so +.B pb=2,lp=2,lc=2 +is the best. +.IP \(bu 3 +The x86 filter is an exception. +It's usually good to stick to LZMA2's defaults +.RB ( pb=2,lp=0,lc=3 ) +when compressing x86 executables. +.RE +.IP "" +All BCJ filters support the same +.IR options : +.RS +.TP +.BI start= offset +Specify the start +.I offset +that is used when converting between relative +and absolute addresses. +The +.I offset +must be a multiple of the alignment of the filter +(see the table above). +The default is zero. +In practice, the default is good; specifying a custom +.I offset +is almost never useful. +.RE +.TP +\fB\-\-delta\fR[\fB=\fIoptions\fR] +Add the Delta filter to the filter chain. +The Delta filter can be only used as a non-last filter +in the filter chain. +.IP "" +Currently only simple byte-wise delta calculation is supported. +It can be useful when compressing, for example, uncompressed bitmap images +or uncompressed PCM audio. +However, special purpose algorithms may give significantly better +results than Delta + LZMA2. +This is true especially with audio, +which compresses faster and better, for example, with +.BR flac (1). +.IP "" +Supported +.IR options : +.RS +.TP +.BI dist= distance +Specify the +.I distance +of the delta calculation in bytes. +.I distance +must be 1\(en256. +The default is 1. +.IP "" +For example, with +.B dist=2 +and eight-byte input A1 B1 A2 B3 A3 B5 A4 B7, the output will be +A1 B1 01 02 01 02 01 02. +.RE +. +.SS "Other options" +.TP +.BR \-q ", " \-\-quiet +Suppress warnings and notices. +Specify this twice to suppress errors too. +This option has no effect on the exit status. +That is, even if a warning was suppressed, +the exit status to indicate a warning is still used. +.TP +.BR \-v ", " \-\-verbose +Be verbose. +If standard error is connected to a terminal, +.B xz +will display a progress indicator. +Specifying +.B \-\-verbose +twice will give even more verbose output. +.IP "" +The progress indicator shows the following information: +.RS +.IP \(bu 3 +Completion percentage is shown +if the size of the input file is known. +That is, the percentage cannot be shown in pipes. +.IP \(bu 3 +Amount of compressed data produced (compressing) +or consumed (decompressing). +.IP \(bu 3 +Amount of uncompressed data consumed (compressing) +or produced (decompressing). +.IP \(bu 3 +Compression ratio, which is calculated by dividing +the amount of compressed data processed so far by +the amount of uncompressed data processed so far. +.IP \(bu 3 +Compression or decompression speed. +This is measured as the amount of uncompressed data consumed +(compression) or produced (decompression) per second. +It is shown after a few seconds have passed since +.B xz +started processing the file. +.IP \(bu 3 +Elapsed time in the format M:SS or H:MM:SS. +.IP \(bu 3 +Estimated remaining time is shown +only when the size of the input file is +known and a couple of seconds have already passed since +.B xz +started processing the file. +The time is shown in a less precise format which +never has any colons, for example, 2 min 30 s. +.RE +.IP "" +When standard error is not a terminal, +.B \-\-verbose +will make +.B xz +print the filename, compressed size, uncompressed size, +compression ratio, and possibly also the speed and elapsed time +on a single line to standard error after compressing or +decompressing the file. +The speed and elapsed time are included only when +the operation took at least a few seconds. +If the operation didn't finish, for example, due to user interruption, +also the completion percentage is printed +if the size of the input file is known. +.TP +.BR \-Q ", " \-\-no\-warn +Don't set the exit status to 2 +even if a condition worth a warning was detected. +This option doesn't affect the verbosity level, thus both +.B \-\-quiet +and +.B \-\-no\-warn +have to be used to not display warnings and +to not alter the exit status. +.TP +.B \-\-robot +Print messages in a machine-parsable format. +This is intended to ease writing frontends that want to use +.B xz +instead of liblzma, which may be the case with various scripts. +The output with this option enabled is meant to be stable across +.B xz +releases. +See the section +.B "ROBOT MODE" +for details. +.TP +.B \-\-info\-memory +Display, in human-readable format, how much physical memory (RAM) +and how many processor threads +.B xz +thinks the system has and the memory usage limits for compression +and decompression, and exit successfully. +.TP +.BR \-h ", " \-\-help +Display a help message describing the most commonly used options, +and exit successfully. +.TP +.BR \-H ", " \-\-long\-help +Display a help message describing all features of +.BR xz , +and exit successfully +.TP +.BR \-V ", " \-\-version +Display the version number of +.B xz +and liblzma in human readable format. +To get machine-parsable output, specify +.B \-\-robot +before +.BR \-\-version . +. +.SH "ROBOT MODE" +The robot mode is activated with the +.B \-\-robot +option. +It makes the output of +.B xz +easier to parse by other programs. +Currently +.B \-\-robot +is supported only together with +.BR \-\-list , +.BR \-\-filters\-help , +.BR \-\-info\-memory , +and +.BR \-\-version . +It will be supported for compression and +decompression in the future. +. +.SS "List mode" +.B "xz \-\-robot \-\-list" +uses tab-separated output. +The first column of every line has a string +that indicates the type of the information found on that line: +.TP +.\" TRANSLATORS: The bold strings B, B, B, B, +.\" B, and B are produced by the xz tool for scripts to +.\" parse, thus the untranslated strings must be included in the translated +.\" man page. It may be useful to provide a translated string in parenthesis +.\" without bold, for example: "B (nimi)" +.B name +This is always the first line when starting to list a file. +The second column on the line is the filename. +.TP +.B file +This line contains overall information about the +.B .xz +file. +This line is always printed after the +.B name +line. +.TP +.B stream +This line type is used only when +.B \-\-verbose +was specified. +There are as many +.B stream +lines as there are streams in the +.B .xz +file. +.TP +.B block +This line type is used only when +.B \-\-verbose +was specified. +There are as many +.B block +lines as there are blocks in the +.B .xz +file. +The +.B block +lines are shown after all the +.B stream +lines; different line types are not interleaved. +.TP +.B summary +This line type is used only when +.B \-\-verbose +was specified twice. +This line is printed after all +.B block +lines. +Like the +.B file +line, the +.B summary +line contains overall information about the +.B .xz +file. +.TP +.B totals +This line is always the very last line of the list output. +It shows the total counts and sizes. +.PP +The columns of the +.B file +lines: +.PD 0 +.RS +.IP 2. 4 +Number of streams in the file +.IP 3. 4 +Total number of blocks in the stream(s) +.IP 4. 4 +Compressed size of the file +.IP 5. 4 +Uncompressed size of the file +.IP 6. 4 +Compression ratio, for example, +.BR 0.123 . +If ratio is over 9.999, three dashes +.RB ( \-\-\- ) +are displayed instead of the ratio. +.IP 7. 4 +Comma-separated list of integrity check names. +The following strings are used for the known check types: +.\" TRANSLATORS: Don't translate the bold strings B, B, +.\" B, B, or B here. In robot mode, xz produces +.\" them in untranslated form for scripts to parse. +.BR None , +.BR CRC32 , +.BR CRC64 , +and +.BR SHA\-256 . +For unknown check types, +.BI Unknown\- N +is used, where +.I N +is the Check ID as a decimal number (one or two digits). +.IP 8. 4 +Total size of stream padding in the file +.RE +.PD +.PP +The columns of the +.B stream +lines: +.PD 0 +.RS +.IP 2. 4 +Stream number (the first stream is 1) +.IP 3. 4 +Number of blocks in the stream +.IP 4. 4 +Compressed start offset +.IP 5. 4 +Uncompressed start offset +.IP 6. 4 +Compressed size (does not include stream padding) +.IP 7. 4 +Uncompressed size +.IP 8. 4 +Compression ratio +.IP 9. 4 +Name of the integrity check +.IP 10. 4 +Size of stream padding +.RE +.PD +.PP +The columns of the +.B block +lines: +.PD 0 +.RS +.IP 2. 4 +Number of the stream containing this block +.IP 3. 4 +Block number relative to the beginning of the stream +(the first block is 1) +.IP 4. 4 +Block number relative to the beginning of the file +.IP 5. 4 +Compressed start offset relative to the beginning of the file +.IP 6. 4 +Uncompressed start offset relative to the beginning of the file +.IP 7. 4 +Total compressed size of the block (includes headers) +.IP 8. 4 +Uncompressed size +.IP 9. 4 +Compression ratio +.IP 10. 4 +Name of the integrity check +.RE +.PD +.PP +If +.B \-\-verbose +was specified twice, additional columns are included on the +.B block +lines. +These are not displayed with a single +.BR \-\-verbose , +because getting this information requires many seeks +and can thus be slow: +.PD 0 +.RS +.IP 11. 4 +Value of the integrity check in hexadecimal +.IP 12. 4 +Block header size +.IP 13. 4 +Block flags: +.B c +indicates that compressed size is present, and +.B u +indicates that uncompressed size is present. +If the flag is not set, a dash +.RB ( \- ) +is shown instead to keep the string length fixed. +New flags may be added to the end of the string in the future. +.IP 14. 4 +Size of the actual compressed data in the block (this excludes +the block header, block padding, and check fields) +.IP 15. 4 +Amount of memory (in bytes) required to decompress +this block with this +.B xz +version +.IP 16. 4 +Filter chain. +Note that most of the options used at compression time +cannot be known, because only the options +that are needed for decompression are stored in the +.B .xz +headers. +.RE +.PD +.PP +The columns of the +.B summary +lines: +.PD 0 +.RS +.IP 2. 4 +Amount of memory (in bytes) required to decompress +this file with this +.B xz +version +.IP 3. 4 +.B yes +or +.B no +indicating if all block headers have both compressed size and +uncompressed size stored in them +.PP +.I Since +.B xz +.I 5.1.2alpha: +.IP 4. 4 +Minimum +.B xz +version required to decompress the file +.RE +.PD +.PP +The columns of the +.B totals +line: +.PD 0 +.RS +.IP 2. 4 +Number of streams +.IP 3. 4 +Number of blocks +.IP 4. 4 +Compressed size +.IP 5. 4 +Uncompressed size +.IP 6. 4 +Average compression ratio +.IP 7. 4 +Comma-separated list of integrity check names +that were present in the files +.IP 8. 4 +Stream padding size +.IP 9. 4 +Number of files. +This is here to +keep the order of the earlier columns the same as on +.B file +lines. +.PD +.RE +.PP +If +.B \-\-verbose +was specified twice, additional columns are included on the +.B totals +line: +.PD 0 +.RS +.IP 10. 4 +Maximum amount of memory (in bytes) required to decompress +the files with this +.B xz +version +.IP 11. 4 +.B yes +or +.B no +indicating if all block headers have both compressed size and +uncompressed size stored in them +.PP +.I Since +.B xz +.I 5.1.2alpha: +.IP 12. 4 +Minimum +.B xz +version required to decompress the file +.RE +.PD +.PP +Future versions may add new line types and +new columns can be added to the existing line types, +but the existing columns won't be changed. +. +.SS "Filters help" +.B "xz \-\-robot \-\-filters-help" +prints the supported filters in the following format: +.PP +\fIfilter\fB:\fIoption\fB=<\fIvalue\fB>,\fIoption\fB=<\fIvalue\fB>\fR... +.TP +.I filter +Name of the filter +.TP +.I option +Name of a filter specific option +.TP +.I value +Numeric +.I value +ranges appear as +\fB<\fImin\fB\-\fImax\fB>\fR. +String +.I value +choices are shown within +.B "< >" +and separated by a +.B | +character. +.PP +Each filter is printed on its own line. +. +.SS "Memory limit information" +.B "xz \-\-robot \-\-info\-memory" +prints a single line with multiple tab-separated columns: +.IP 1. 4 +Total amount of physical memory (RAM) in bytes. +.IP 2. 4 +Memory usage limit for compression in bytes +.RB ( \-\-memlimit\-compress ). +A special value of +.B 0 +indicates the default setting +which for single-threaded mode is the same as no limit. +.IP 3. 4 +Memory usage limit for decompression in bytes +.RB ( \-\-memlimit\-decompress ). +A special value of +.B 0 +indicates the default setting +which for single-threaded mode is the same as no limit. +.IP 4. 4 +Since +.B xz +5.3.4alpha: +Memory usage for multi-threaded decompression in bytes +.RB ( \-\-memlimit\-mt\-decompress ). +This is never zero because a system-specific default value +shown in the column 5 +is used if no limit has been specified explicitly. +This is also never greater than the value in the column 3 +even if a larger value has been specified with +.BR \-\-memlimit\-mt\-decompress . +.IP 5. 4 +Since +.B xz +5.3.4alpha: +A system-specific default memory usage limit +that is used to limit the number of threads +when compressing with an automatic number of threads +.RB ( \-\-threads=0 ) +and no memory usage limit has been specified +.RB ( \-\-memlimit\-compress ). +This is also used as the default value for +.BR \-\-memlimit\-mt\-decompress . +.IP 6. 4 +Since +.B xz +5.3.4alpha: +Number of available processor threads. +.PP +In the future, the output of +.B "xz \-\-robot \-\-info\-memory" +may have more columns, but never more than a single line. +. +.SS Version +.B "xz \-\-robot \-\-version" +prints the version number of +.B xz +and liblzma in the following format: +.PP +.\" TRANSLATORS: Don't translate the uppercase XZ_VERSION or LIBLZMA_VERSION. +.BI XZ_VERSION= XYYYZZZS +.br +.BI LIBLZMA_VERSION= XYYYZZZS +.TP +.I X +Major version. +.TP +.I YYY +Minor version. +Even numbers are stable. +Odd numbers are alpha or beta versions. +.TP +.I ZZZ +Patch level for stable releases or +just a counter for development releases. +.TP +.I S +Stability. +0 is alpha, 1 is beta, and 2 is stable. +.I S +should be always 2 when +.I YYY +is even. +.PP +.I XYYYZZZS +are the same on both lines if +.B xz +and liblzma are from the same XZ Utils release. +.PP +Examples: 4.999.9beta is +.B 49990091 +and +5.0.0 is +.BR 50000002 . +. +.SH "EXIT STATUS" +.TP +.B 0 +All is good. +.TP +.B 1 +An error occurred. +.TP +.B 2 +Something worth a warning occurred, +but no actual errors occurred. +.PP +Notices (not warnings or errors) printed on standard error +don't affect the exit status. +. +.SH ENVIRONMENT +.B xz +parses space-separated lists of options +from the environment variables +.\" TRANSLATORS: Don't translate the uppercase XZ_DEFAULTS or XZ_OPT. +.\" They are names of environment variables. +.B XZ_DEFAULTS +and +.BR XZ_OPT , +in this order, before parsing the options from the command line. +Note that only options are parsed from the environment variables; +all non-options are silently ignored. +Parsing is done with +.BR getopt_long (3) +which is used also for the command line arguments. +.PP +.B Warning: +By setting these environment variables, +one is effectively modifying programs and scripts that run +.BR xz . +Most of the time it is safe to set memory usage limits, number of threads, +and compression options via the environment variables. +However, some options can break scripts. +An obvious example is +.B \-\-help +which makes +.B xz +show the help text instead of compressing or decompressing a file. +More subtle examples are +.B \-\-quiet +and +.BR \-\-verbose . +In many cases it works well to enable the progress indicator using +.BR \-\-verbose , +but in some situations the extra messages create problems. +The verbosity level also affects the behavior of +.BR \-\-list . +.TP +.B XZ_DEFAULTS +User-specific or system-wide default options. +Typically this is set in a shell initialization script to enable +.BR xz 's +memory usage limiter by default or set the default number of threads. +Excluding shell initialization scripts +and similar special cases, scripts should never set or unset +.BR XZ_DEFAULTS . +.TP +.B XZ_OPT +This is for passing options to +.B xz +when it is not possible to set the options directly on the +.B xz +command line. +This is the case when +.B xz +is run by a script or tool, for example, GNU +.BR tar (1): +.RS +.RS +.PP +.nf +.ft CR +XZ_OPT=\-2v tar caf foo.tar.xz foo +.ft R +.fi +.RE +.RE +.IP "" +Scripts may use +.BR XZ_OPT , +for example, to set script-specific default compression options. +It is still recommended to allow users to override +.B XZ_OPT +if that is reasonable. +For example, in +.BR sh (1) +scripts one may use something like this: +.RS +.RS +.PP +.nf +.ft CR +XZ_OPT=${XZ_OPT\-"\-7e"} +export XZ_OPT +.ft R +.fi +.RE +.RE +. +.SH "LZMA UTILS COMPATIBILITY" +The command line syntax of +.B xz +is practically a superset of +.BR lzma , +.BR unlzma , +and +.B lzcat +as found from LZMA Utils 4.32.x. +In most cases, it is possible to replace +LZMA Utils with XZ Utils without breaking existing scripts. +There are some incompatibilities though, +which may sometimes cause problems. +. +.SS "Compression preset levels" +The numbering of the compression level presets is not identical in +.B xz +and LZMA Utils. +The most important difference is how dictionary sizes +are mapped to different presets. +Dictionary size is roughly equal to the decompressor memory usage. +.RS +.PP +.TS +tab(;); +c c c +c n n. +Level;xz;LZMA Utils +\-0;256 KiB;N/A +\-1;1 MiB;64 KiB +\-2;2 MiB;1 MiB +\-3;4 MiB;512 KiB +\-4;4 MiB;1 MiB +\-5;8 MiB;2 MiB +\-6;8 MiB;4 MiB +\-7;16 MiB;8 MiB +\-8;32 MiB;16 MiB +\-9;64 MiB;32 MiB +.TE +.RE +.PP +The dictionary size differences affect +the compressor memory usage too, +but there are some other differences between +LZMA Utils and XZ Utils, which +make the difference even bigger: +.RS +.PP +.TS +tab(;); +c c c +c n n. +Level;xz;LZMA Utils 4.32.x +\-0;3 MiB;N/A +\-1;9 MiB;2 MiB +\-2;17 MiB;12 MiB +\-3;32 MiB;12 MiB +\-4;48 MiB;16 MiB +\-5;94 MiB;26 MiB +\-6;94 MiB;45 MiB +\-7;186 MiB;83 MiB +\-8;370 MiB;159 MiB +\-9;674 MiB;311 MiB +.TE +.RE +.PP +The default preset level in LZMA Utils is +.B \-7 +while in XZ Utils it is +.BR \-6 , +so both use an 8 MiB dictionary by default. +. +.SS "Streamed vs. non-streamed .lzma files" +The uncompressed size of the file can be stored in the +.B .lzma +header. +LZMA Utils does that when compressing regular files. +The alternative is to mark that uncompressed size is unknown +and use end-of-payload marker to indicate +where the decompressor should stop. +LZMA Utils uses this method when uncompressed size isn't known, +which is the case, for example, in pipes. +.PP +.B xz +supports decompressing +.B .lzma +files with or without end-of-payload marker, but all +.B .lzma +files created by +.B xz +will use end-of-payload marker and have uncompressed size +marked as unknown in the +.B .lzma +header. +This may be a problem in some uncommon situations. +For example, a +.B .lzma +decompressor in an embedded device might work +only with files that have known uncompressed size. +If you hit this problem, you need to use LZMA Utils +or LZMA SDK to create +.B .lzma +files with known uncompressed size. +. +.SS "Unsupported .lzma files" +The +.B .lzma +format allows +.I lc +values up to 8, and +.I lp +values up to 4. +LZMA Utils can decompress files with any +.I lc +and +.IR lp , +but always creates files with +.B lc=3 +and +.BR lp=0 . +Creating files with other +.I lc +and +.I lp +is possible with +.B xz +and with LZMA SDK. +.PP +The implementation of the LZMA1 filter in liblzma +requires that the sum of +.I lc +and +.I lp +must not exceed 4. +Thus, +.B .lzma +files, which exceed this limitation, cannot be decompressed with +.BR xz . +.PP +LZMA Utils creates only +.B .lzma +files which have a dictionary size of +.RI "2^" n +(a power of 2) but accepts files with any dictionary size. +liblzma accepts only +.B .lzma +files which have a dictionary size of +.RI "2^" n +or +.RI "2^" n " + 2^(" n "\-1)." +This is to decrease false positives when detecting +.B .lzma +files. +.PP +These limitations shouldn't be a problem in practice, +since practically all +.B .lzma +files have been compressed with settings that liblzma will accept. +. +.SS "Trailing garbage" +When decompressing, +LZMA Utils silently ignore everything after the first +.B .lzma +stream. +In most situations, this is a bug. +This also means that LZMA Utils +don't support decompressing concatenated +.B .lzma +files. +.PP +If there is data left after the first +.B .lzma +stream, +.B xz +considers the file to be corrupt unless +.B \-\-single\-stream +was used. +This may break obscure scripts which have +assumed that trailing garbage is ignored. +. +.SH NOTES +. +.SS "Compressed output may vary" +The exact compressed output produced from +the same uncompressed input file +may vary between XZ Utils versions even if +compression options are identical. +This is because the encoder can be improved +(faster or better compression) +without affecting the file format. +The output can vary even between different +builds of the same XZ Utils version, +if different build options are used. +.PP +The above means that once +.B \-\-rsyncable +has been implemented, +the resulting files won't necessarily be rsyncable +unless both old and new files have been compressed +with the same xz version. +This problem can be fixed if a part of the encoder +implementation is frozen to keep rsyncable output +stable across xz versions. +. +.SS "Embedded .xz decompressors" +Embedded +.B .xz +decompressor implementations like XZ Embedded don't necessarily +support files created with integrity +.I check +types other than +.B none +and +.BR crc32 . +Since the default is +.BR \-\-check=crc64 , +you must use +.B \-\-check=none +or +.B \-\-check=crc32 +when creating files for embedded systems. +.PP +Outside embedded systems, all +.B .xz +format decompressors support all the +.I check +types, or at least are able to decompress +the file without verifying the +integrity check if the particular +.I check +is not supported. +.PP +XZ Embedded supports BCJ filters, +but only with the default start offset. +. +.SH EXAMPLES +. +.SS Basics +Compress the file +.I foo +into +.I foo.xz +using the default compression level +.RB ( \-6 ), +and remove +.I foo +if compression is successful: +.RS +.PP +.nf +.ft CR +xz foo +.ft R +.fi +.RE +.PP +Decompress +.I bar.xz +into +.I bar +and don't remove +.I bar.xz +even if decompression is successful: +.RS +.PP +.nf +.ft CR +xz \-dk bar.xz +.ft R +.fi +.RE +.PP +Create +.I baz.tar.xz +with the preset +.B \-4e +.RB ( "\-4 \-\-extreme" ), +which is slower than the default +.BR \-6 , +but needs less memory for compression and decompression (48\ MiB +and 5\ MiB, respectively): +.RS +.PP +.nf +.ft CR +tar cf \- baz | xz \-4e > baz.tar.xz +.ft R +.fi +.RE +.PP +A mix of compressed and uncompressed files can be decompressed +to standard output with a single command: +.RS +.PP +.nf +.ft CR +xz \-dcf a.txt b.txt.xz c.txt d.txt.lzma > abcd.txt +.ft R +.fi +.RE +. +.SS "Parallel compression of many files" +On GNU and *BSD, +.BR find (1) +and +.BR xargs (1) +can be used to parallelize compression of many files: +.RS +.PP +.nf +.ft CR +find . \-type f \e! \-name '*.xz' \-print0 \e + | xargs \-0r \-P4 \-n16 xz \-T1 +.ft R +.fi +.RE +.PP +The +.B \-P +option to +.BR xargs (1) +sets the number of parallel +.B xz +processes. +The best value for the +.B \-n +option depends on how many files there are to be compressed. +If there are only a couple of files, +the value should probably be 1; +with tens of thousands of files, +100 or even more may be appropriate to reduce the number of +.B xz +processes that +.BR xargs (1) +will eventually create. +.PP +The option +.B \-T1 +for +.B xz +is there to force it to single-threaded mode, because +.BR xargs (1) +is used to control the amount of parallelization. +. +.SS "Robot mode" +Calculate how many bytes have been saved in total +after compressing multiple files: +.RS +.PP +.nf +.ft CR +xz \-\-robot \-\-list *.xz | awk '/^totals/{print $5\-$4}' +.ft R +.fi +.RE +.PP +A script may want to know that it is using new enough +.BR xz . +The following +.BR sh (1) +script checks that the version number of the +.B xz +tool is at least 5.0.0. +This method is compatible with old beta versions, +which didn't support the +.B \-\-robot +option: +.RS +.PP +.nf +.ft CR +if ! eval "$(xz \-\-robot \-\-version 2> /dev/null)" || + [ "$XZ_VERSION" \-lt 50000002 ]; then + echo "Your xz is too old." +fi +unset XZ_VERSION LIBLZMA_VERSION +.ft R +.fi +.RE +.PP +Set a memory usage limit for decompression using +.BR XZ_OPT , +but if a limit has already been set, don't increase it: +.RS +.PP +.nf +.ft CR +NEWLIM=$((123 << 20))\ \ # 123 MiB +OLDLIM=$(xz \-\-robot \-\-info\-memory | cut \-f3) +if [ $OLDLIM \-eq 0 \-o $OLDLIM \-gt $NEWLIM ]; then + XZ_OPT="$XZ_OPT \-\-memlimit\-decompress=$NEWLIM" + export XZ_OPT +fi +.ft R +.fi +.RE +. +.SS "Custom compressor filter chains" +The simplest use for custom filter chains is +customizing a LZMA2 preset. +This can be useful, +because the presets cover only a subset of the +potentially useful combinations of compression settings. +.PP +The CompCPU columns of the tables +from the descriptions of the options +.BR "\-0" " ... " "\-9" +and +.B \-\-extreme +are useful when customizing LZMA2 presets. +Here are the relevant parts collected from those two tables: +.RS +.PP +.TS +tab(;); +c c +n n. +Preset;CompCPU +\-0;0 +\-1;1 +\-2;2 +\-3;3 +\-4;4 +\-5;5 +\-6;6 +\-5e;7 +\-6e;8 +.TE +.RE +.PP +If you know that a file requires +somewhat big dictionary (for example, 32\ MiB) to compress well, +but you want to compress it quicker than +.B "xz \-8" +would do, a preset with a low CompCPU value (for example, 1) +can be modified to use a bigger dictionary: +.RS +.PP +.nf +.ft CR +xz \-\-lzma2=preset=1,dict=32MiB foo.tar +.ft R +.fi +.RE +.PP +With certain files, the above command may be faster than +.B "xz \-6" +while compressing significantly better. +However, it must be emphasized that only some files benefit from +a big dictionary while keeping the CompCPU value low. +The most obvious situation, +where a big dictionary can help a lot, +is an archive containing very similar files +of at least a few megabytes each. +The dictionary size has to be significantly bigger +than any individual file to allow LZMA2 to take +full advantage of the similarities between consecutive files. +.PP +If very high compressor and decompressor memory usage is fine, +and the file being compressed is +at least several hundred megabytes, it may be useful +to use an even bigger dictionary than the 64 MiB that +.B "xz \-9" +would use: +.RS +.PP +.nf +.ft CR +xz \-vv \-\-lzma2=dict=192MiB big_foo.tar +.ft R +.fi +.RE +.PP +Using +.B \-vv +.RB ( "\-\-verbose \-\-verbose" ) +like in the above example can be useful +to see the memory requirements +of the compressor and decompressor. +Remember that using a dictionary bigger than +the size of the uncompressed file is waste of memory, +so the above command isn't useful for small files. +.PP +Sometimes the compression time doesn't matter, +but the decompressor memory usage has to be kept low, for example, +to make it possible to decompress the file on an embedded system. +The following command uses +.B \-6e +.RB ( "\-6 \-\-extreme" ) +as a base and sets the dictionary to only 64\ KiB. +The resulting file can be decompressed with XZ Embedded +(that's why there is +.BR \-\-check=crc32 ) +using about 100\ KiB of memory. +.RS +.PP +.nf +.ft CR +xz \-\-check=crc32 \-\-lzma2=preset=6e,dict=64KiB foo +.ft R +.fi +.RE +.PP +If you want to squeeze out as many bytes as possible, +adjusting the number of literal context bits +.RI ( lc ) +and number of position bits +.RI ( pb ) +can sometimes help. +Adjusting the number of literal position bits +.RI ( lp ) +might help too, but usually +.I lc +and +.I pb +are more important. +For example, a source code archive contains mostly US-ASCII text, +so something like the following might give +slightly (like 0.1\ %) smaller file than +.B "xz \-6e" +(try also without +.BR lc=4 ): +.RS +.PP +.nf +.ft CR +xz \-\-lzma2=preset=6e,pb=0,lc=4 source_code.tar +.ft R +.fi +.RE +.PP +Using another filter together with LZMA2 can improve +compression with certain file types. +For example, to compress a x86-32 or x86-64 shared library +using the x86 BCJ filter: +.RS +.PP +.nf +.ft CR +xz \-\-x86 \-\-lzma2 libfoo.so +.ft R +.fi +.RE +.PP +Note that the order of the filter options is significant. +If +.B \-\-x86 +is specified after +.BR \-\-lzma2 , +.B xz +will give an error, +because there cannot be any filter after LZMA2, +and also because the x86 BCJ filter cannot be used +as the last filter in the chain. +.PP +The Delta filter together with LZMA2 +can give good results with bitmap images. +It should usually beat PNG, +which has a few more advanced filters than simple +delta but uses Deflate for the actual compression. +.PP +The image has to be saved in uncompressed format, +for example, as uncompressed TIFF. +The distance parameter of the Delta filter is set +to match the number of bytes per pixel in the image. +For example, 24-bit RGB bitmap needs +.BR dist=3 , +and it is also good to pass +.B pb=0 +to LZMA2 to accommodate the three-byte alignment: +.RS +.PP +.nf +.ft CR +xz \-\-delta=dist=3 \-\-lzma2=pb=0 foo.tiff +.ft R +.fi +.RE +.PP +If multiple images have been put into a single archive (for example, +.BR .tar ), +the Delta filter will work on that too as long as all images +have the same number of bytes per pixel. +. +.SH "SEE ALSO" +.BR xzdec (1), +.BR xzdiff (1), +.BR xzgrep (1), +.BR xzless (1), +.BR xzmore (1), +.BR gzip (1), +.BR bzip2 (1), +.BR 7z (1) +.PP +XZ Utils: +.br +XZ Embedded: +.br +LZMA SDK: diff --git a/src/native/external/xz/src/xz/xz_w32res.rc b/src/native/external/xz/src/xz/xz_w32res.rc new file mode 100644 index 00000000000000..e80903eb726a2d --- /dev/null +++ b/src/native/external/xz/src/xz/xz_w32res.rc @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: 0BSD */ + +/* + * Author: Lasse Collin + */ + +#define MY_TYPE VFT_APP +#define MY_NAME "xz" +#define MY_SUFFIX ".exe" +#define MY_DESC "xz data compression tool for .xz and .lzma files" +#include "common_w32res.rc" diff --git a/src/native/external/xz/src/xzdec/Makefile.am b/src/native/external/xz/src/xzdec/Makefile.am new file mode 100644 index 00000000000000..c2810479ac28c8 --- /dev/null +++ b/src/native/external/xz/src/xzdec/Makefile.am @@ -0,0 +1,111 @@ +## SPDX-License-Identifier: 0BSD +## Author: Lasse Collin + +# Windows resource compiler support. It's fine to use xz_CPPFLAGS +# also for lzmadec. +.rc.o: + $(RC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(xzdec_CPPFLAGS) $(CPPFLAGS) $(RCFLAGS) -i $< -o $@ + + +xzdec_SOURCES = \ + xzdec.c \ + ../common/tuklib_progname.c \ + ../common/tuklib_mbstr_nonprint.c \ + ../common/tuklib_exit.c + +if COND_W32 +xzdec_SOURCES += xzdec_w32res.rc +endif + +xzdec_CPPFLAGS = \ + -DTUKLIB_GETTEXT=0 \ + -I$(top_srcdir)/src/common \ + -I$(top_srcdir)/src/liblzma/api +xzdec_LDADD = $(top_builddir)/src/liblzma/liblzma.la + +if COND_GNULIB +xzdec_CPPFLAGS += \ + -I$(top_builddir)/lib \ + -I$(top_srcdir)/lib +xzdec_LDADD += $(top_builddir)/lib/libgnu.a +endif + +xzdec_LDADD += $(LTLIBINTL) + + +lzmadec_SOURCES = \ + xzdec.c \ + ../common/tuklib_progname.c \ + ../common/tuklib_mbstr_nonprint.c \ + ../common/tuklib_exit.c + +if COND_W32 +lzmadec_SOURCES += lzmadec_w32res.rc +endif + +lzmadec_CPPFLAGS = $(xzdec_CPPFLAGS) -DLZMADEC +lzmadec_LDFLAGS = $(xzdec_LDFLAGS) +lzmadec_LDADD = $(xzdec_LDADD) + + +bin_PROGRAMS = +lzmadecmanlink = + +if COND_XZDEC +bin_PROGRAMS += xzdec +dist_man_MANS = xzdec.1 +endif + +if COND_LZMADEC +bin_PROGRAMS += lzmadec + +# Create the symlink lzmadec.1->xzdec.1 only if xzdec.1 was installed. +# This is better than creating a dangling symlink. The correct solution +# would be to install xzdec.1 as lzmadec.1 but this code is already too +# complicated so I won't do it. Installing only lzmadec is a bit unusual +# situation anyway so it's not that important. +if COND_XZDEC +lzmadecmanlink += lzmadec +endif +endif + +if COND_XZDEC +# The installation of translated man pages abuses Automake internals +# by calling "install-man" with redefined dist_man_MANS and man_MANS. +# If this breaks some day, don't blame Automake developers. +install-data-hook: + languages= ; \ + if test "$(USE_NLS)" = yes && test -d "$(top_srcdir)/po4a/man"; then \ + languages=`ls "$(top_srcdir)/po4a/man"`; \ + fi; \ + target=`echo xzdec | sed '$(transform)'` && \ + link=`echo lzmadec | sed '$(transform)'` && \ + for lang in . $$languages; do \ + man="$(top_srcdir)/po4a/man/$$lang/xzdec.1" ; \ + if test -f "$$man"; then \ + $(MAKE) dist_man_MANS="$$man" man_MANS= \ + mandir="$(mandir)/$$lang" install-man; \ + fi; \ + man1dir="$(DESTDIR)$(mandir)/$$lang/man1" && \ + if test -f "$$man1dir/$$target.1"; then \ + if test -n "$(lzmadecmanlink)"; then ( \ + cd "$$man1dir" && \ + rm -f "$$link.1" && \ + $(LN_S) "$$target.1" "$$link.1" \ + ); fi; \ + fi; \ + done + +uninstall-hook: + languages= ; \ + if test "$(USE_NLS)" = yes && test -d "$(top_srcdir)/po4a/man"; then \ + languages=`ls "$(top_srcdir)/po4a/man"`; \ + fi; \ + for lang in . $$languages; do \ + for name in xzdec $(lzmadecmanlink); do \ + name=`echo $$name | sed '$(transform)'` && \ + rm -f "$(DESTDIR)$(mandir)/$$lang/man1/$$name.1"; \ + done; \ + done +endif diff --git a/src/native/external/xz/src/xzdec/lzmadec_w32res.rc b/src/native/external/xz/src/xzdec/lzmadec_w32res.rc new file mode 100644 index 00000000000000..b4b89b1f3080a6 --- /dev/null +++ b/src/native/external/xz/src/xzdec/lzmadec_w32res.rc @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: 0BSD */ + +/* + * Author: Lasse Collin + */ + +#define MY_TYPE VFT_APP +#define MY_NAME "lzmadec" +#define MY_SUFFIX ".exe" +#define MY_DESC "lzmadec decompression tool for .lzma files" +#include "common_w32res.rc" diff --git a/src/native/external/xz/src/xzdec/xzdec.1 b/src/native/external/xz/src/xzdec/xzdec.1 new file mode 100644 index 00000000000000..5198efb4612e2b --- /dev/null +++ b/src/native/external/xz/src/xzdec/xzdec.1 @@ -0,0 +1,144 @@ +.\" SPDX-License-Identifier: 0BSD +.\" +.\" Author: Lasse Collin +.\" +.TH XZDEC 1 "2024-04-08" "Tukaani" "XZ Utils" +.SH NAME +xzdec, lzmadec \- Small .xz and .lzma decompressors +.SH SYNOPSIS +.B xzdec +.RI [ option... ] +.RI [ file... ] +.br +.B lzmadec +.RI [ option... ] +.RI [ file... ] +.SH DESCRIPTION +.B xzdec +is a liblzma-based decompression-only tool for +.B .xz +(and only +.BR .xz ) +files. +.B xzdec +is intended to work as a drop-in replacement for +.BR xz (1) +in the most common situations where a script +has been written to use +.B "xz \-\-decompress \-\-stdout" +(and possibly a few other commonly used options) to decompress +.B .xz +files. +.B lzmadec +is identical to +.B xzdec +except that +.B lzmadec +supports +.B .lzma +files instead of +.B .xz +files. +.PP +To reduce the size of the executable, +.B xzdec +doesn't support multithreading or localization, +and doesn't read options from +.B XZ_DEFAULTS +and +.B XZ_OPT +environment variables. +.B xzdec +doesn't support displaying intermediate progress information: sending +.B SIGINFO +to +.B xzdec +does nothing, but sending +.B SIGUSR1 +terminates the process instead of displaying progress information. +.SH OPTIONS +.TP +.BR \-d ", " \-\-decompress ", " \-\-uncompress +Ignored for +.BR xz (1) +compatibility. +.B xzdec +supports only decompression. +.TP +.BR \-k ", " \-\-keep +Ignored for +.BR xz (1) +compatibility. +.B xzdec +never creates or removes any files. +.TP +.BR \-c ", " \-\-stdout ", " \-\-to-stdout +Ignored for +.BR xz (1) +compatibility. +.B xzdec +always writes the decompressed data to standard output. +.TP +.BR \-q ", " \-\-quiet +Specifying this once does nothing since +.B xzdec +never displays any warnings or notices. +Specify this twice to suppress errors. +.TP +.BR \-Q ", " \-\-no-warn +Ignored for +.BR xz (1) +compatibility. +.B xzdec +never uses the exit status 2. +.TP +.BR \-h ", " \-\-help +Display a help message and exit successfully. +.TP +.BR \-V ", " \-\-version +Display the version number of +.B xzdec +and liblzma. +.SH "EXIT STATUS" +.TP +.B 0 +All was good. +.TP +.B 1 +An error occurred. +.PP +.B xzdec +doesn't have any warning messages like +.BR xz (1) +has, thus the exit status 2 is not used by +.BR xzdec . +.SH NOTES +Use +.BR xz (1) +instead of +.B xzdec +or +.B lzmadec +for normal everyday use. +.B xzdec +or +.B lzmadec +are meant only for situations where it is important to have +a smaller decompressor than the full-featured +.BR xz (1). +.PP +.B xzdec +and +.B lzmadec +are not really that small. +The size can be reduced further by dropping +features from liblzma at compile time, +but that shouldn't usually be done for executables distributed +in typical non-embedded operating system distributions. +If you need a truly small +.B .xz +decompressor, consider using XZ Embedded. +.SH "SEE ALSO" +.BR xz (1) +.PP +XZ Embedded: diff --git a/src/native/external/xz/src/xzdec/xzdec.c b/src/native/external/xz/src/xzdec/xzdec.c new file mode 100644 index 00000000000000..e1e2744911720f --- /dev/null +++ b/src/native/external/xz/src/xzdec/xzdec.c @@ -0,0 +1,489 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file xzdec.c +/// \brief Simple single-threaded tool to uncompress .xz or .lzma files +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "sysdefs.h" +#include "lzma.h" + +#include +#include +#include +#include + +#ifndef _MSC_VER +# include +#endif + +#ifdef HAVE_CAP_RIGHTS_LIMIT +# include +#endif + +#ifdef HAVE_LINUX_LANDLOCK +# include "my_landlock.h" +#endif + +#if defined(HAVE_CAP_RIGHTS_LIMIT) || defined(HAVE_PLEDGE) \ + || defined(HAVE_LINUX_LANDLOCK) +# define ENABLE_SANDBOX 1 +#endif + +#include "getopt.h" +#include "tuklib_progname.h" +#include "tuklib_mbstr_nonprint.h" +#include "tuklib_exit.h" + +#ifdef TUKLIB_DOSLIKE +# include +# include +# ifdef _MSC_VER +# define fileno _fileno +# define setmode _setmode +# endif +#endif + + +#ifdef LZMADEC +# define TOOL_FORMAT "lzma" +#else +# define TOOL_FORMAT "xz" +#endif + + +/// Error messages are suppressed if this is zero, which is the case when +/// --quiet has been given at least twice. +static int display_errors = 2; + + +lzma_attribute((__format__(__printf__, 1, 2))) +static void +my_errorf(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + + if (display_errors) { + fprintf(stderr, "%s: ", progname); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + } + + va_end(ap); + return; +} + + +tuklib_attr_noreturn +static void +help(void) +{ + printf( +"Usage: %s [OPTION]... [FILE]...\n" +"Decompress files in the ." TOOL_FORMAT " format to standard output.\n" +"\n" +" -d, --decompress (ignored, only decompression is supported)\n" +" -k, --keep (ignored, files are never deleted)\n" +" -c, --stdout (ignored, output is always written to standard output)\n" +" -q, --quiet specify *twice* to suppress errors\n" +" -Q, --no-warn (ignored, the exit status 2 is never used)\n" +" -h, --help display this help and exit\n" +" -V, --version display the version number and exit\n" +"\n" +"With no FILE, or when FILE is -, read standard input.\n" +"\n" +"Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n" +PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname); + + tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); +} + + +tuklib_attr_noreturn +static void +version(void) +{ + printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n" + "liblzma %s\n", lzma_version_string()); + + tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); +} + + +/// Parses command line options. +static void +parse_options(int argc, char **argv) +{ + static const char short_opts[] = "cdkhqQV"; + static const struct option long_opts[] = { + { "stdout", no_argument, NULL, 'c' }, + { "to-stdout", no_argument, NULL, 'c' }, + { "decompress", no_argument, NULL, 'd' }, + { "uncompress", no_argument, NULL, 'd' }, + { "keep", no_argument, NULL, 'k' }, + { "quiet", no_argument, NULL, 'q' }, + { "no-warn", no_argument, NULL, 'Q' }, + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + int c; + + while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) + != -1) { + switch (c) { + case 'c': + case 'd': + case 'k': + case 'Q': + break; + + case 'q': + if (display_errors > 0) + --display_errors; + + break; + + case 'h': + help(); + + case 'V': + version(); + + default: + exit(EXIT_FAILURE); + } + } + + return; +} + + +static void +uncompress(lzma_stream *strm, FILE *file, const char *filename) +{ + lzma_ret ret; + + // Initialize the decoder +#ifdef LZMADEC + ret = lzma_alone_decoder(strm, UINT64_MAX); +#else + ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED); +#endif + + // The only reasonable error here is LZMA_MEM_ERROR. + if (ret != LZMA_OK) { + my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM) + : "Internal error (bug)"); + exit(EXIT_FAILURE); + } + + // Input and output buffers + uint8_t in_buf[BUFSIZ]; + uint8_t out_buf[BUFSIZ]; + + strm->avail_in = 0; + strm->next_out = out_buf; + strm->avail_out = BUFSIZ; + + lzma_action action = LZMA_RUN; + + while (true) { + if (strm->avail_in == 0) { + strm->next_in = in_buf; + strm->avail_in = fread(in_buf, 1, BUFSIZ, file); + + if (ferror(file)) { + // POSIX says that fread() sets errno if + // an error occurred. ferror() doesn't + // touch errno. + my_errorf("%s: Error reading input file: %s", + tuklib_mask_nonprint(filename), + strerror(errno)); + exit(EXIT_FAILURE); + } + +#ifndef LZMADEC + // When using LZMA_CONCATENATED, we need to tell + // liblzma when it has got all the input. + if (feof(file)) + action = LZMA_FINISH; +#endif + } + + ret = lzma_code(strm, action); + + // Write and check write error before checking decoder error. + // This way as much data as possible gets written to output + // even if decoder detected an error. + if (strm->avail_out == 0 || ret != LZMA_OK) { + const size_t write_size = BUFSIZ - strm->avail_out; + + if (fwrite(out_buf, 1, write_size, stdout) + != write_size) { + // Wouldn't be a surprise if writing to stderr + // would fail too but at least try to show an + // error message. +#if defined(_WIN32) && !defined(__CYGWIN__) + // On native Windows, broken pipe is reported + // as EINVAL. Don't show an error message + // in this case. + if (errno != EINVAL) +#endif + { + my_errorf("Cannot write to " + "standard output: " + "%s", strerror(errno)); + } + exit(EXIT_FAILURE); + } + + strm->next_out = out_buf; + strm->avail_out = BUFSIZ; + } + + if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END) { +#ifdef LZMADEC + // Check that there's no trailing garbage. + if (strm->avail_in != 0 + || fread(in_buf, 1, 1, file) + != 0 + || !feof(file)) + ret = LZMA_DATA_ERROR; + else + return; +#else + // lzma_stream_decoder() already guarantees + // that there's no trailing garbage. + assert(strm->avail_in == 0); + assert(action == LZMA_FINISH); + assert(feof(file)); + return; +#endif + } + + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = strerror(ENOMEM); + break; + + case LZMA_FORMAT_ERROR: + msg = "File format not recognized"; + break; + + case LZMA_OPTIONS_ERROR: + // FIXME: Better message? + msg = "Unsupported compression options"; + break; + + case LZMA_DATA_ERROR: + msg = "File is corrupt"; + break; + + case LZMA_BUF_ERROR: + msg = "Unexpected end of input"; + break; + + default: + msg = "Internal error (bug)"; + break; + } + + my_errorf("%s: %s", tuklib_mask_nonprint(filename), + msg); + exit(EXIT_FAILURE); + } + } +} + + +#ifdef ENABLE_SANDBOX +static void +sandbox_enter(int src_fd) +{ +#if defined(HAVE_CAP_RIGHTS_LIMIT) + // Capsicum needs FreeBSD 10.2 or later. + cap_rights_t rights; + + if (cap_enter()) + goto error; + + if (cap_rights_limit(src_fd, cap_rights_init(&rights, CAP_READ))) + goto error; + + // If not reading from stdin, remove all capabilities from it. + if (src_fd != STDIN_FILENO && cap_rights_limit( + STDIN_FILENO, cap_rights_init(&rights))) + goto error; + + if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights, + CAP_WRITE))) + goto error; + + if (cap_rights_limit(STDERR_FILENO, cap_rights_init(&rights, + CAP_WRITE))) + goto error; + +#elif defined(HAVE_PLEDGE) + // pledge() was introduced in OpenBSD 5.9. + if (pledge("stdio", "")) + goto error; + + (void)src_fd; + +#elif defined(HAVE_LINUX_LANDLOCK) + struct landlock_ruleset_attr attr; + if (my_landlock_ruleset_attr_forbid_all(&attr) > 0) { + const int ruleset_fd = my_landlock_create_ruleset( + &attr, sizeof(attr), 0); + if (ruleset_fd < 0) + goto error; + + // All files we need should have already been opened. Thus, + // we don't need to add any rules using landlock_add_rule(2) + // before activating the sandbox. + if (my_landlock_restrict_self(ruleset_fd, 0) != 0) + goto error; + + (void)close(ruleset_fd); + } + + (void)src_fd; + +#else +# error ENABLE_SANDBOX is defined but no sandboxing method was found. +#endif + + return; + +error: +#ifdef HAVE_CAP_RIGHTS_LIMIT + // If a kernel is configured without capability mode support or + // used in an emulator that does not implement the capability + // system calls, then the Capsicum system calls will fail and set + // errno to ENOSYS. In that case xzdec will silently run without + // the sandbox. + if (errno == ENOSYS) + return; +#endif + + my_errorf("Failed to enable the sandbox"); + exit(EXIT_FAILURE); +} +#endif + + +int +main(int argc, char **argv) +{ + // Initialize progname which will be used in error messages. + tuklib_progname_init(argv); + +#ifdef HAVE_PLEDGE + // OpenBSD's pledge(2) sandbox. + // Initially enable the sandbox slightly more relaxed so that + // the process can still open files. This allows the sandbox to + // be enabled when parsing command line arguments and decompressing + // all files (the more strict sandbox only restricts the last file + // that is decompressed). + if (pledge("stdio rpath", "")) { + my_errorf("Failed to enable the sandbox"); + exit(EXIT_FAILURE); + } +#endif + +#ifdef HAVE_LINUX_LANDLOCK + // Prevent the process from gaining new privileges. This must be done + // before landlock_restrict_self(2) but since we will never need new + // privileges, this call can be done here already. + // + // This is supported since Linux 3.5. Ignore the return value to + // keep compatibility with old kernels. landlock_restrict_self(2) + // will fail if the no_new_privs attribute isn't set, thus if prctl() + // fails here the error will still be detected when it matters. + (void)prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); +#endif + + // We need to set the locale even though we don't have any + // translated messages: + // + // - tuklib_mask_nonprint() has locale-specific behavior (LC_CTYPE). + // + // - This is needed on Windows to make non-ASCII filenames display + // properly when the active code page has been set to UTF-8 + // in the application manifest. + setlocale(LC_ALL, ""); + + // Parse the command line options. + parse_options(argc, argv); + + // The same lzma_stream is used for all files that we decode. This way + // we don't need to reallocate memory for every file if they use same + // compression settings. + lzma_stream strm = LZMA_STREAM_INIT; + + // Some systems require setting stdin and stdout to binary mode. +#ifdef TUKLIB_DOSLIKE + setmode(fileno(stdin), O_BINARY); + setmode(fileno(stdout), O_BINARY); +#endif + + if (optind == argc) { + // No filenames given, decode from stdin. +#ifdef ENABLE_SANDBOX + sandbox_enter(STDIN_FILENO); +#endif + uncompress(&strm, stdin, "(stdin)"); + } else { + // Loop through the filenames given on the command line. + do { + FILE *src_file; + const char *src_name; + + // "-" indicates stdin. + if (strcmp(argv[optind], "-") == 0) { + src_file = stdin; + src_name = "(stdin)"; + } else { + src_name = argv[optind]; + src_file = fopen(src_name, "rb"); + if (src_file == NULL) { + my_errorf("%s: %s", + tuklib_mask_nonprint( + src_name), + strerror(errno)); + exit(EXIT_FAILURE); + } + } +#ifdef ENABLE_SANDBOX + // Enable the strict sandbox for the last file. + // Then the process can no longer open additional + // files. The typical xzdec use case is to decompress + // a single file so this way the strictest sandboxing + // is used in most cases. + if (optind == argc - 1) + sandbox_enter(fileno(src_file)); +#endif + uncompress(&strm, src_file, src_name); + + if (src_file != stdin) + (void)fclose(src_file); + } while (++optind < argc); + } + +#ifndef NDEBUG + // Free the memory only when debugging. Freeing wastes some time, + // but allows detecting possible memory leaks with Valgrind. + lzma_end(&strm); +#endif + + tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); +} diff --git a/src/native/external/xz/src/xzdec/xzdec_w32res.rc b/src/native/external/xz/src/xzdec/xzdec_w32res.rc new file mode 100644 index 00000000000000..fdbb1024392932 --- /dev/null +++ b/src/native/external/xz/src/xzdec/xzdec_w32res.rc @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: 0BSD */ + +/* + * Author: Lasse Collin + */ + +#define MY_TYPE VFT_APP +#define MY_NAME "xzdec" +#define MY_SUFFIX ".exe" +#define MY_DESC "xzdec decompression tool for .xz files" +#include "common_w32res.rc" diff --git a/src/native/libs/System.IO.Compression.Native/CMakeLists.txt b/src/native/libs/System.IO.Compression.Native/CMakeLists.txt index 2aee0a0d4ceb56..c15a77a5182313 100644 --- a/src/native/libs/System.IO.Compression.Native/CMakeLists.txt +++ b/src/native/libs/System.IO.Compression.Native/CMakeLists.txt @@ -24,10 +24,14 @@ if (NOT CLR_CMAKE_USE_SYSTEM_ZSTD) include(${CLR_SRC_NATIVE_DIR}/external/zstd.cmake) endif() +if (NOT CLR_CMAKE_USE_SYSTEM_LZMA) + include(${CLR_SRC_NATIVE_DIR}/external/xz.cmake) +endif() + if (STATIC_LIBS_ONLY) # For every vendored library that we're actually vendoring (and not referencing the system one) # mark it as "no interprocedural optimization" so that it's compatible with our NativeAOT shipping story. - foreach(VENDORED_LIB IN LISTS BROTLI_LIBRARIES ITEMS libzstd_static zlib) + foreach(VENDORED_LIB IN LISTS BROTLI_LIBRARIES ITEMS libzstd_static zlib liblzma) if (TARGET ${VENDORED_LIB}) set_target_properties(${VENDORED_LIB} PROPERTIES INTERPROCEDURAL_OPTIMIZATION OFF) endif() @@ -83,7 +87,12 @@ if (CLR_CMAKE_TARGET_UNIX OR CLR_CMAKE_TARGET_BROWSER OR CLR_CMAKE_TARGET_WASI) target_link_libraries(System.IO.Compression.Native PRIVATE libzstd_static) endif() - if (NOT CLR_CMAKE_TARGET_ARCH_WASM AND NOT CLR_CMAKE_TARGET_MACCATALYST AND NOT CLR_CMAKE_TARGET_IOS AND NOT CLR_CMAKE_TARGET_TVOS AND NOT CLR_CMAKE_TARGET_ANDROID AND NOT CLR_CMAKE_USE_SYSTEM_BROTLI AND NOT CLR_CMAKE_USE_SYSTEM_ZSTD) + if (NOT CLR_CMAKE_USE_SYSTEM_LZMA) + target_link_libraries(System.IO.Compression.Native PRIVATE liblzma) + target_include_directories(System.IO.Compression.Native PRIVATE ${LZMA_INCLUDE_DIRS}) + endif() + + if (NOT CLR_CMAKE_TARGET_ARCH_WASM AND NOT CLR_CMAKE_TARGET_MACCATALYST AND NOT CLR_CMAKE_TARGET_IOS AND NOT CLR_CMAKE_TARGET_TVOS AND NOT CLR_CMAKE_TARGET_ANDROID AND NOT CLR_CMAKE_USE_SYSTEM_BROTLI AND NOT CLR_CMAKE_USE_SYSTEM_ZSTD AND NOT CLR_CMAKE_USE_SYSTEM_LZMA) set(DEF_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/System.IO.Compression.Native_unixexports.src) set(EXPORTS_FILE ${CMAKE_CURRENT_BINARY_DIR}/System.IO.Compression.Native.exports) generate_exports_file(${DEF_SOURCES} ${EXPORTS_FILE}) @@ -121,6 +130,11 @@ if (CLR_CMAKE_TARGET_UNIX OR CLR_CMAKE_TARGET_BROWSER OR CLR_CMAKE_TARGET_WASI) target_link_libraries(System.IO.Compression.Native-Static PUBLIC libzstd_static) endif() + if (NOT CLR_CMAKE_USE_SYSTEM_LZMA) + target_link_libraries(System.IO.Compression.Native-Static PUBLIC liblzma) + target_include_directories(System.IO.Compression.Native-Static PUBLIC ${LZMA_INCLUDE_DIRS}) + endif() + set_target_properties(System.IO.Compression.Native-Static PROPERTIES OUTPUT_NAME System.IO.Compression.Native CLEAN_DIRECT_OUTPUT 1) target_link_libraries(System.IO.Compression.Native-Static PUBLIC @@ -145,6 +159,10 @@ if (CLR_CMAKE_TARGET_UNIX OR CLR_CMAKE_TARGET_BROWSER OR CLR_CMAKE_TARGET_WASI) install (TARGETS libzstd_static DESTINATION ${STATIC_LIB_DESTINATION} COMPONENT libs) endif() + if (NOT CLR_CMAKE_USE_SYSTEM_LZMA) + install (TARGETS liblzma DESTINATION ${STATIC_LIB_DESTINATION} COMPONENT libs) + endif() + if (NOT CLR_CMAKE_USE_SYSTEM_ZLIB) install (TARGETS zlib DESTINATION ${STATIC_LIB_DESTINATION} COMPONENT libs) endif() @@ -176,6 +194,11 @@ else () target_link_libraries(System.IO.Compression.Native PUBLIC libzstd_static) endif() + if (NOT CLR_CMAKE_USE_SYSTEM_LZMA) + target_link_libraries(System.IO.Compression.Native PUBLIC liblzma) + target_include_directories(System.IO.Compression.Native PUBLIC ${LZMA_INCLUDE_DIRS}) + endif() + target_include_directories(System.IO.Compression.Native PUBLIC ${BROTLI_INCLUDE_DIRS}) target_link_libraries(System.IO.Compression.Native PUBLIC ${BROTLI_LIBRARIES}) endif () @@ -197,6 +220,11 @@ else () target_link_libraries(System.IO.Compression.Native-Static PUBLIC libzstd_static) endif() + if (NOT CLR_CMAKE_USE_SYSTEM_LZMA) + target_link_libraries(System.IO.Compression.Native-Static PUBLIC liblzma) + target_include_directories(System.IO.Compression.Native-Static PUBLIC ${LZMA_INCLUDE_DIRS}) + endif() + target_include_directories(System.IO.Compression.Native-Static PUBLIC ${BROTLI_INCLUDE_DIRS}) target_link_libraries(System.IO.Compression.Native-Static PUBLIC ${BROTLI_LIBRARIES}) @@ -214,6 +242,11 @@ else () target_link_libraries(System.IO.Compression.Native.Aot PUBLIC libzstd_static) endif() + if (NOT CLR_CMAKE_USE_SYSTEM_LZMA) + target_link_libraries(System.IO.Compression.Native.Aot PUBLIC liblzma) + target_include_directories(System.IO.Compression.Native.Aot PUBLIC ${LZMA_INCLUDE_DIRS}) + endif() + target_include_directories(System.IO.Compression.Native.Aot PUBLIC ${BROTLI_INCLUDE_DIRS}) target_link_libraries(System.IO.Compression.Native.Aot PUBLIC ${BROTLI_LIBRARIES}) set_target_properties(System.IO.Compression.Native.Aot PROPERTIES INTERPROCEDURAL_OPTIMIZATION OFF) @@ -247,6 +280,9 @@ else () if (NOT CLR_CMAKE_USE_SYSTEM_ZSTD) install_static_library(libzstd_static aotsdk nativeaot) endif() + if (NOT CLR_CMAKE_USE_SYSTEM_LZMA) + install_static_library(liblzma aotsdk nativeaot) + endif() endif() endif () diff --git a/src/native/libs/System.IO.Compression.Native/System.IO.Compression.Native.def b/src/native/libs/System.IO.Compression.Native/System.IO.Compression.Native.def index 1dd180a79a3239..534717cffd0d77 100644 --- a/src/native/libs/System.IO.Compression.Native/System.IO.Compression.Native.def +++ b/src/native/libs/System.IO.Compression.Native/System.IO.Compression.Native.def @@ -52,4 +52,6 @@ EXPORTS ZSTD_DCtx_refPrefix ZSTD_CCtx_refPrefix ZSTD_CCtx_setPledgedSrcSize - ZDICT_trainFromBuffer \ No newline at end of file + ZDICT_trainFromBuffer + lzma_stream_decoder + \ No newline at end of file diff --git a/src/native/libs/System.IO.Compression.Native/System.IO.Compression.Native_unixexports.src b/src/native/libs/System.IO.Compression.Native/System.IO.Compression.Native_unixexports.src index f22e8648ff5b42..29de108a927296 100644 --- a/src/native/libs/System.IO.Compression.Native/System.IO.Compression.Native_unixexports.src +++ b/src/native/libs/System.IO.Compression.Native/System.IO.Compression.Native_unixexports.src @@ -53,3 +53,4 @@ ZSTD_DCtx_refPrefix ZSTD_CCtx_refPrefix ZSTD_CCtx_setPledgedSrcSize ZDICT_trainFromBuffer +lzma_stream_decoder diff --git a/src/native/libs/System.IO.Compression.Native/entrypoints.c b/src/native/libs/System.IO.Compression.Native/entrypoints.c index 533ba5aabdf9a2..d54eefa16a7ee4 100644 --- a/src/native/libs/System.IO.Compression.Native/entrypoints.c +++ b/src/native/libs/System.IO.Compression.Native/entrypoints.c @@ -15,6 +15,9 @@ #define ZSTD_STATIC_LINKING_ONLY #include #include + +#include + #endif // !TARGET_WASM static const Entry s_compressionNative[] = @@ -42,6 +45,7 @@ static const Entry s_compressionNative[] = DllImportEntry(CompressionNative_InflateInit2_) DllImportEntry(CompressionNative_InflateReset2_) #if !defined(TARGET_WASM) + // zstd DllImportEntry(ZSTD_createCCtx) DllImportEntry(ZSTD_createDCtx) DllImportEntry(ZSTD_freeCCtx) @@ -74,6 +78,8 @@ static const Entry s_compressionNative[] = DllImportEntry(ZSTD_CCtx_refPrefix) DllImportEntry(ZSTD_CCtx_setPledgedSrcSize) DllImportEntry(ZDICT_trainFromBuffer) + // xz / lzma + DllImportEntry(lzma_stream_decoder) #endif // !TARGET_WASM }; diff --git a/src/native/libs/System.IO.Compression.Native/extra_libs.cmake b/src/native/libs/System.IO.Compression.Native/extra_libs.cmake index 1eb0639478e4b6..143255cc34026e 100644 --- a/src/native/libs/System.IO.Compression.Native/extra_libs.cmake +++ b/src/native/libs/System.IO.Compression.Native/extra_libs.cmake @@ -31,4 +31,9 @@ macro(append_extra_compression_libs NativeLibsExtra) find_library(ZSTD zstd REQUIRED) list(APPEND ${NativeLibsExtra} ${ZSTD}) endif () + + if (CLR_CMAKE_USE_SYSTEM_LZMA) + find_library(LZMA lzma REQUIRED) + list(APPEND ${NativeLibsExtra} ${LZMA}) + endif () endmacro()