From 861037f4fc157d50415618e902f5c37cf1467119 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 10 Jul 2023 19:02:48 +0800 Subject: [PATCH 1/9] erofs-utils: add a built-in DEFLATE compressor As Apple documentation written "If you require interoperability with non-Apple devices, use COMPRESSION_ZLIB. [1]", DEFLATE is a popular generic-purpose compression algorithm for a quite long time (many advanced formats like zlib, gzip, zip, png are all based on that), which is made of LZ77 as well as Huffman coding, fully documented as RFC1951 [2] and quite easy to understand, implement. There are several hardware on-market DEFLATE accelerators as well, such as (s390) DFLTCC, (Intel) IAA/QAT, (HiSilicon) ZIP accelerator, etc. Therefore, it's useful to support DEFLATE compression in order to use these for async I/Os and get benefits from these. Since there is _no fixed-sized output DEFLATE compression appoach_ available in public (fitblk is somewhat ineffective) and the original zlib is quite slowly developping, let's work out one for our use cases. Fortunately, it's only less than 1.5kLOC with lazy matching to just match the full zlib abilities. Besides, near-optimal block splitting (based on price function) doesn't support since it's no rush to us. In the future, there might be more built-in optimizers landed to fulfill our needs even further (especially for other popular algorithms without native fixed-sized output support). In addition, I'd be quite happy to see more popular encoders to support native fixed-sized output compression too. [1] https://developer.apple.com/documentation/compression/compression_algorithm [2] https://datatracker.ietf.org/doc/html/rfc1951 Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20230710110251.89464-2-hsiangkao@linux.alibaba.com --- lib/Makefile.am | 2 + lib/kite_deflate.c | 1270 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1272 insertions(+) create mode 100644 lib/kite_deflate.c diff --git a/lib/Makefile.am b/lib/Makefile.am index e243c1cd..b7290982 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -43,3 +43,5 @@ if ENABLE_LIBLZMA liberofs_la_CFLAGS += ${liblzma_CFLAGS} liberofs_la_SOURCES += compressor_liblzma.c endif + +liberofs_la_SOURCES += kite_deflate.c diff --git a/lib/kite_deflate.c b/lib/kite_deflate.c new file mode 100644 index 00000000..f5bb2fd2 --- /dev/null +++ b/lib/kite_deflate.c @@ -0,0 +1,1270 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +/* + * erofs-utils/lib/kite_deflate.c + * + * Copyright (C) 2023, Alibaba Cloud + * Copyright (C) 2023, Gao Xiang + */ +#include "erofs/defs.h" +#include "erofs/print.h" +#include +#include +#include +#include +#include + +unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2, + unsigned long sz); + +#ifdef TEST +#define kite_dbg(x, ...) fprintf(stderr, x "\n", ##__VA_ARGS__) +#else +#define kite_dbg(x, ...) +#endif + +#define kHistorySize32 (1U << 15) + +#define kNumLenSymbols32 256 +#define kNumLenSymbolsMax kNumLenSymbols32 + +#define kSymbolEndOfBlock 256 +#define kSymbolMatch (kSymbolEndOfBlock + 1) +#define kNumLenSlots 29 +#define kMainTableSize (kSymbolMatch + kNumLenSlots) + +#define kFixedLenTableSize (kSymbolMatch + 31) +#define FixedDistTableSize 32 + +#define kMainTableSize (kSymbolMatch + kNumLenSlots) +#define kDistTableSize32 30 + +#define kNumLitLenCodesMin 257 +#define kNumDistCodesMin 1 + +#define kNumLensCodesMin 4 +#define kLensTableSize 19 + +#define kMatchMinLen 3 +#define kMatchMaxLen32 kNumLenSymbols32 + kMatchMinLen - 1 + +static u32 kstaticHuff_mainCodes[kFixedLenTableSize]; +static const u8 kstaticHuff_litLenLevels[kFixedLenTableSize] = { + [0 ... 143] = 8, [144 ... 255] = 9, + [256 ... 279] = 7, [280 ... 287] = 8, +}; +static u32 kstaticHuff_distCodes[kFixedLenTableSize]; + +const u8 kLenStart32[kNumLenSlots] = + {0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224, 255}; + +const u8 kLenExtraBits32[kNumLenSlots] = + {0,0,0,0,0,0,0,0,1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, + 5, 5, 5, 0}; + +/* First normalized distance for each code (0 = distance of 1) */ +const u32 kDistStart[kDistTableSize32] = + {0,1,2,3,4,6,8,12,16,24,32,48,64,96,128,192,256,384,512,768, + 1024,1536,2048,3072,4096,6144,8192,12288,16384,24576}; + +/* extra bits for each distance code */ +const u8 kDistExtraBits[kDistTableSize32] = + {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +const u8 kCodeLengthAlphabetOrder[kLensTableSize] = + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + +const u8 kLevelExtraBits[3] = {2, 3, 7}; + +const unsigned int kTableDirectLevels = 16; +const unsigned int kBitLensRepNumber_3_6 = kTableDirectLevels; +const unsigned int kBitLens0Number_3_10 = kBitLensRepNumber_3_6 + 1; +const unsigned int kBitLens0Number_11_138 = kBitLens0Number_3_10 + 1; + +#define kStored 0 +#define kFixedHuffman 1 +#define kDynamicHuffman 2 + +struct kite_deflate_symbol { + u16 len, dist; +}; + +struct kite_deflate_table { + u32 mainCodes[kMainTableSize]; + u8 litLenLevels[kMainTableSize]; + u32 distCodes[kDistTableSize32]; + u8 distLevels[kDistTableSize32]; + u32 levelCodes[kLensTableSize]; + u8 levelLens[kLensTableSize]; + + u8 numdistlens, numblcodes; + u16 numlitlens; +}; + +struct kite_deflate { + struct kite_deflate_table *tab; + const u8 *in; + u8 *out; + + u32 inlen, outlen; + u32 pos_in, pos_out; + u32 inflightbits; + u8 bitpos; + u8 numHuffBits; + u32 symbols; + + u32 costbits, startpos; + u8 encode_mode; + bool freq_changed, lastblock; + + /* Previous match for lazy matching */ + bool prev_valid; + u16 prev_longest; + + u32 mainFreqs[kMainTableSize]; + u32 distFreqs[kDistTableSize32]; + struct kite_deflate_table tables[2]; + + /* don't reset the following fields */ + struct kite_matchfinder *mf; + struct kite_deflate_symbol *sym; + u32 max_symbols; + bool lazy_search; +}; + +#define ZLIB_DISTANCE_TOO_FAR 4096 + +static u8 g_LenSlots[kNumLenSymbolsMax]; + +#define kNumLogBits 9 // do not change it +static u8 g_FastPos[1 << kNumLogBits]; + +static void writebits(struct kite_deflate *s, unsigned int v, u8 bits) +{ + unsigned int rem = sizeof(s->inflightbits) * 8 - s->bitpos; + + s->inflightbits |= (v << s->bitpos) & (!rem - 1); + if (bits > rem) { + u8 *out = s->out + s->pos_out; + + out[0] = s->inflightbits & 0xff; + out[1] = (s->inflightbits >> 8) & 0xff; + out[2] = (s->inflightbits >> 16) & 0xff; + out[3] = (s->inflightbits >> 24) & 0xff; + s->pos_out += 4; + DBG_BUGON(s->pos_out > s->outlen); + s->inflightbits = v >> rem; + s->bitpos = bits - rem; + return; + } + s->bitpos += bits; +} + +static void flushbits(struct kite_deflate *s) +{ + u8 *out = s->out + s->pos_out; + + if (!s->bitpos) + return; + out[0] = s->inflightbits & 0xff; + if (s->bitpos >= 8) { + out[1] = (s->inflightbits >> 8) & 0xff; + if (s->bitpos >= 16) { + out[2] = (s->inflightbits >> 16) & 0xff; + if (s->bitpos >= 24) + out[3] = (s->inflightbits >> 24) & 0xff; + } + } + s->pos_out += round_up(s->bitpos, 8) >> 3; + DBG_BUGON(s->pos_out > s->outlen); + s->bitpos = 0; + s->inflightbits = 0; +} + +#define kMaxLen 16 + +static void deflate_genhuffcodes(const u8 *lens, u32 *p, unsigned int nr_codes, + const u32 *bl_count) +{ + u32 nextCodes[kMaxLen + 1]; /* next code value for each bit length */ + unsigned int code = 0; /* running code value */ + unsigned int bits, k; + + for (bits = 1; bits <= kMaxLen; ++bits) { + code = (code + bl_count[bits - 1]) << 1; + nextCodes[bits] = code; + } + + DBG_BUGON(code + bl_count[kMaxLen] != 1 << kMaxLen); + + for (k = 0; k < nr_codes; ++k) + p[k] = nextCodes[lens[k]]++; +} + +static u32 deflate_reversebits_one(u32 code, u8 bits) +{ + unsigned int x = code; + + x = ((x & 0x5555) << 1) | ((x & 0xAAAA) >> 1); + x = ((x & 0x3333) << 2) | ((x & 0xCCCC) >> 2); + x = ((x & 0x0F0F) << 4) | ((x & 0xF0F0) >> 4); + + return (((x & 0x00FF) << 8) | ((x & 0xFF00) >> 8)) >> (16 - bits); +} + +static void Huffman_ReverseBits(u32 *codes, const u8 *lens, unsigned int n) +{ + while (n) { + u32 code = *codes; + + *codes++ = deflate_reversebits_one(code, *lens++); + --n; + } +} + +static void kite_deflate_init_once(void) +{ + static const u32 static_bl_count[kMaxLen + 1] = { + [7] = 279 - 256 + 1, + [8] = (143 + 1) + (287 - 280 + 1), + [9] = 255 - 144 + 1, + }; + unsigned int i, c, j, k; + + if (kstaticHuff_distCodes[31]) + return; + deflate_genhuffcodes(kstaticHuff_litLenLevels, kstaticHuff_mainCodes, + kFixedLenTableSize, static_bl_count); + Huffman_ReverseBits(kstaticHuff_mainCodes, kstaticHuff_litLenLevels, + kFixedLenTableSize); + + for (i = 0; i < ARRAY_SIZE(kstaticHuff_distCodes); ++i) + kstaticHuff_distCodes[i] = deflate_reversebits_one(i, 5); + + for (i = 0; i < kNumLenSlots; i++) { + c = kLenStart32[i]; + j = 1 << kLenExtraBits32[i]; + + for (k = 0; k < j; k++, c++) + g_LenSlots[c] = (u8)i; + } + + c = 0; + for (i = 0; i < /*kFastSlots*/ kNumLogBits * 2; i++) { + k = 1 << kDistExtraBits[i]; + for (j = 0; j < k; j++) + g_FastPos[c++] = i; + } +} + +static void kite_deflate_scanlens(unsigned int numlens, u8 *lens, u32 *freqs) +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = lens[0]; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (!nextlen) + max_count = 138, min_count = 3; + + for (n = 0; n < numlens; n++) { + curlen = nextlen; + nextlen = n + 1 < numlens ? lens[n + 1] : -1; + ++count; + + if (count < max_count && curlen == nextlen) + continue; + if (count < min_count) { + freqs[curlen] += count; + } else if (curlen != 0) { + if (curlen != prevlen) + freqs[curlen]++; + freqs[kBitLensRepNumber_3_6]++; + } else if (count <= 10) { + freqs[kBitLens0Number_3_10]++; + } else { + freqs[kBitLens0Number_11_138]++; + } + + count = 0; + prevlen = curlen; + if (!nextlen) + max_count = 138, min_count = 3; + else if (curlen == nextlen) + max_count = 6, min_count = 3; + else + max_count = 7, min_count = 4; + } +} + +static void kite_deflate_sendtree(struct kite_deflate *s, const u8 *lens, + unsigned int numlens) +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = lens[0]; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + const u8 *bl_lens = s->tab->levelLens; + const u32 *bl_codes = s->tab->levelCodes; + + if (!nextlen) + max_count = 138, min_count = 3; + + for (n = 0; n < numlens; n++) { + curlen = nextlen; + nextlen = n + 1 < numlens ? lens[n + 1] : -1; + ++count; + + if (count < max_count && curlen == nextlen) + continue; + if (count < min_count) { + do { + writebits(s, bl_codes[curlen], bl_lens[curlen]); + } while (--count); + } else if (curlen) { + if (curlen != prevlen) { + writebits(s, bl_codes[curlen], bl_lens[curlen]); + count--; + } + writebits(s, bl_codes[kBitLensRepNumber_3_6], + bl_lens[kBitLensRepNumber_3_6]); + writebits(s, count - 3, 2); + } else if (count <= 10) { + writebits(s, bl_codes[kBitLens0Number_3_10], + bl_lens[kBitLens0Number_3_10]); + writebits(s, count - 3, 3); + } else { + writebits(s, bl_codes[kBitLens0Number_11_138], + bl_lens[kBitLens0Number_11_138]); + writebits(s, count - 11, 7); + } + + count = 0; + prevlen = curlen; + if (!nextlen) + max_count = 138, min_count = 3; + else if (curlen == nextlen) + max_count = 6, min_count = 3; + else + max_count = 7, min_count = 4; + } +} + +static void kite_deflate_setfixedtrees(struct kite_deflate *s) +{ + writebits(s, (kFixedHuffman << 1) + s->lastblock, 3); +} + +static void kite_deflate_sendtrees(struct kite_deflate *s) +{ + struct kite_deflate_table *t = s->tab; + unsigned int i; + + writebits(s, (kDynamicHuffman << 1) + s->lastblock, 3); + writebits(s, t->numlitlens - kNumLitLenCodesMin, 5); + writebits(s, t->numdistlens - kNumDistCodesMin, 5); + writebits(s, t->numblcodes - kNumLensCodesMin, 4); + + for (i = 0; i < t->numblcodes; i++) + writebits(s, t->levelLens[kCodeLengthAlphabetOrder[i]], 3); + + Huffman_ReverseBits(t->levelCodes, t->levelLens, kLensTableSize); + kite_deflate_sendtree(s, t->litLenLevels, t->numlitlens); + kite_deflate_sendtree(s, t->distLevels, t->numdistlens); +} + +static inline unsigned int deflateDistSlot(unsigned int pos) +{ + const unsigned int zz = (kNumLogBits - 1) & + ((((1U << kNumLogBits) - 1) - pos) >> (31 - 3)); + + return g_FastPos[pos >> zz] + (zz * 2); +} + +static void kite_deflate_writeblock(struct kite_deflate *s, bool fixed) +{ + int i; + u32 *mainCodes, *distCodes; + const u8 *litLenLevels, *distLevels; + + if (!fixed) { + struct kite_deflate_table *t = s->tab; + + mainCodes = t->mainCodes; distCodes = t->distCodes; + litLenLevels = t->litLenLevels; distLevels = t->distLevels; + + Huffman_ReverseBits(mainCodes, litLenLevels, kMainTableSize); + Huffman_ReverseBits(distCodes, distLevels, kDistTableSize32); + } else { + mainCodes = kstaticHuff_mainCodes; + distCodes = kstaticHuff_distCodes; + + litLenLevels = kstaticHuff_litLenLevels; + } + + for (i = 0; i < s->symbols; ++i) { + struct kite_deflate_symbol *sym = &s->sym[i]; + + if (sym->len < kMatchMinLen) { /* literal */ + writebits(s, mainCodes[sym->dist], + litLenLevels[sym->dist]); + } else { + unsigned int lenSlot, distSlot; + unsigned int lc = sym->len - kMatchMinLen; + + lenSlot = g_LenSlots[lc]; + writebits(s, mainCodes[kSymbolMatch + lenSlot], + litLenLevels[kSymbolMatch + lenSlot]); + writebits(s, lc - kLenStart32[lenSlot], + kLenExtraBits32[lenSlot]); + + distSlot = deflateDistSlot(sym->dist - 1); + writebits(s, distCodes[distSlot], + fixed ? 5 : distLevels[distSlot]); + writebits(s, sym->dist - 1 - kDistStart[distSlot], + kDistExtraBits[distSlot]); + } + } + writebits(s, mainCodes[kSymbolEndOfBlock], + litLenLevels[kSymbolEndOfBlock]); +} + +static u32 Huffman_GetPrice(const u32 *freqs, const u8 *lens, u32 num) +{ + u32 price = 0; + + while (num) { + price += (*lens++) * (*freqs++); + --num; + } + return price; +} + +static u32 Huffman_GetPriceEx(const u32 *freqs, const u8 *lens, u32 num, + const u8 *extraBits, u32 extraBase) +{ + return Huffman_GetPrice(freqs, lens, num) + + Huffman_GetPrice(freqs + extraBase, extraBits, num - extraBase); +} + +/* Adapted from C/HuffEnc.c (7zip) for now */ +#define HeapSortDown(p, k, size, temp) \ + { for (;;) { \ + size_t s = (k << 1); \ + if (s > size) break; \ + if (s < size && p[s + 1] > p[s]) s++; \ + if (temp >= p[s]) break; \ + p[k] = p[s]; k = s; \ + } p[k] = temp; } + +static void HeapSort(u32 *p, size_t size) +{ + if (size <= 1) + return; + p--; + { + size_t i = size / 2; + do + { + u32 temp = p[i]; + size_t k = i; + HeapSortDown(p, k, size, temp) + } + while (--i != 0); + } + /* + do + { + size_t k = 1; + UInt32 temp = p[size]; + p[size--] = p[1]; + HeapSortDown(p, k, size, temp) + } + while (size > 1); + */ + while (size > 3) + { + u32 temp = p[size]; + size_t k = (p[3] > p[2]) ? 3 : 2; + p[size--] = p[1]; + p[1] = p[k]; + HeapSortDown(p, k, size, temp) + } + { + u32 temp = p[size]; + p[size] = p[1]; + if (size > 2 && p[2] < temp) + { + p[1] = p[2]; + p[2] = temp; + } + else + p[1] = temp; + } +} + +#define NUM_BITS 10 +#define MASK (((unsigned)1 << NUM_BITS) - 1) + +static void Huffman_Generate(const u32 *freqs, u32 *p, u8 *lens, + unsigned int numSymbols, unsigned int maxLen) +{ + u32 num, i; + + num = 0; + /* if (maxLen > 10) maxLen = 10; */ + + for (i = 0; i < numSymbols; i++) { + u32 freq = freqs[i]; + + if (!freq) + lens[i] = 0; + else + p[num++] = i | (freq << NUM_BITS); + } + HeapSort(p, num); + + if (num < 2) { + unsigned int minCode = 0, maxCode = 1; + + if (num == 1) { + maxCode = (unsigned int)p[0] & MASK; + if (!maxCode) + maxCode++; + } + p[minCode] = 0; + p[maxCode] = 1; + lens[minCode] = lens[maxCode] = 1; + return; + } + + { + u32 b, e, i; + + i = b = e = 0; + do { + u32 n, m, freq; + + n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; + freq = (p[n] & ~MASK); + p[n] = (p[n] & MASK) | (e << NUM_BITS); + m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; + freq += (p[m] & ~MASK); + p[m] = (p[m] & MASK) | (e << NUM_BITS); + p[e] = (p[e] & MASK) | freq; + e++; + } while (num - e > 1); + + { + u32 lenCounters[kMaxLen + 1]; + + for (i = 0; i <= kMaxLen; i++) + lenCounters[i] = 0; + + p[--e] &= MASK; + lenCounters[1] = 2; + while (e > 0) { + u32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1; + + p[e] = (p[e] & MASK) | (len << NUM_BITS); + if (len >= maxLen) + for (len = maxLen - 1; lenCounters[len] == 0; len--); + lenCounters[len]--; + lenCounters[(size_t)len + 1] += 2; + } + + { + u32 len; + + i = 0; + for (len = maxLen; len != 0; len--) { + u32 k; + for (k = lenCounters[len]; k != 0; k--) + lens[p[i++] & MASK] = (u8)len; + } + } + deflate_genhuffcodes(lens, p, numSymbols, lenCounters); + } + } +} + +static void kite_deflate_fixdynblock(struct kite_deflate *s) +{ + struct kite_deflate_table *t = s->tab; + unsigned int numlitlens, numdistlens, numblcodes; + u32 levelFreqs[kLensTableSize] = {0}; + u32 opt_mainlen; + + if (!s->freq_changed) + return; + + /* in order to match zlib */ + s->numHuffBits = kMaxLen; +// s->numHuffBits = (s->symbols > 18000 ? 12 : +// (s->symbols > 7000 ? 11 : (s->symbols > 2000 ? 10 : 9))); + + Huffman_Generate(s->mainFreqs, t->mainCodes, t->litLenLevels, + kMainTableSize, s->numHuffBits); + Huffman_Generate(s->distFreqs, t->distCodes, t->distLevels, + kDistTableSize32, s->numHuffBits); + + /* code lengths for the literal/length alphabet */ + numlitlens = kMainTableSize; + while (numlitlens > kNumLitLenCodesMin && + !t->litLenLevels[numlitlens - 1]) + --numlitlens; + + /* code lengths for the distance alphabet */ + numdistlens = kDistTableSize32; + while (numdistlens > kNumDistCodesMin && + !t->distLevels[numdistlens - 1]) + --numdistlens; + + kite_deflate_scanlens(numlitlens, t->litLenLevels, levelFreqs); + kite_deflate_scanlens(numdistlens, t->distLevels, levelFreqs); + Huffman_Generate(levelFreqs, t->levelCodes, t->levelLens, + kLensTableSize, 7); + numblcodes = kLensTableSize; + while (numblcodes > kNumLensCodesMin && + !t->levelLens[kCodeLengthAlphabetOrder[numblcodes - 1]]) + --numblcodes; + + t->numlitlens = numlitlens; + t->numdistlens = numdistlens; + t->numblcodes = numblcodes; + + opt_mainlen = Huffman_GetPriceEx(s->mainFreqs, t->litLenLevels, + kMainTableSize, kLenExtraBits32, kSymbolMatch) + + Huffman_GetPriceEx(s->distFreqs, t->distLevels, + kDistTableSize32, kDistExtraBits, 0); + s->costbits = 3 + 5 + 5 + 4 + 3 * numblcodes + + Huffman_GetPriceEx(levelFreqs, t->levelLens, + kLensTableSize, kLevelExtraBits, kTableDirectLevels) + + opt_mainlen; + s->freq_changed = false; +} + + +/* + * an array used used by the LZ-based encoder to hold the length-distance pairs + * found by LZ matchfinder. + */ +struct kite_match { + unsigned int len; + unsigned int dist; +}; + +struct kite_matchfinder { + /* pointer to buffer with data to be compressed */ + const u8 *buffer; + + /* indicate the first byte that doesn't contain valid input data */ + const u8 *end; + + /* LZ matchfinder hash chain representation */ + u32 *hash, *chain; + + u32 base; + + /* indicate the next byte to run through the match finder */ + u32 offset; + + u32 cyclic_pos; + + /* maximum length of a match that the matchfinder will try to find. */ + u16 nice_len; + + /* the total sliding window size */ + u16 wsiz; + + /* how many rounds a matchfinder searches on a hash chain for */ + u16 depth; + + /* do not perform lazy search no less than this match length */ + u16 max_lazy; + + /* reduce lazy search no less than this match length */ + u8 good_len; + + /* current match for lazy matching */ + struct kite_match *matches; + struct kite_match matches_matrix[2][4]; +}; + +/* + * This mysterious table is just the CRC of each possible byte. It can be + * computed using the standard bit-at-a-time methods. The polynomial can + * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12. + * Add the implicit x^16, and you have the standard CRC-CCITT. + */ +u16 const crc_ccitt_table[256] __attribute__((__aligned__(128))) = { + 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, + 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, + 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, + 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, + 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, + 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, + 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, + 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, + 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, + 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, + 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, + 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, + 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, + 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, + 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, + 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, + 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, + 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, + 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, + 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, + 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, + 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, + 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, + 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, + 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, + 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, + 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, + 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, + 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, + 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, + 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, + 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78 +}; + +int kite_mf_getmatches_hc3(struct kite_matchfinder *mf, u16 depth, u16 bestlen) +{ + const u8 *cur = mf->buffer + mf->offset; + const u8 *qbase = mf->buffer - mf->base; + u32 curMatch; + unsigned int v, hv, i, k, p, wsiz; + + if (mf->end - cur < bestlen + 1) + return 0; + + v = get_unaligned((u16 *)cur); + hv = v ^ crc_ccitt_table[cur[2]]; + curMatch = mf->hash[hv]; + p = mf->base + mf->offset; + mf->hash[hv] = p; + mf->chain[mf->cyclic_pos] = curMatch; + wsiz = mf->wsiz; + k = 1; + + if (depth) { + unsigned int wpos = wsiz + mf->cyclic_pos; + + hv = min_t(unsigned int, mf->nice_len, mf->end - cur); + DBG_BUGON(hv > kMatchMaxLen32); + do { + unsigned int diff = p - curMatch; + const u8 *q; + + if (diff >= wsiz) + break; + + q = qbase + curMatch; + curMatch = mf->chain[(wpos - diff) & (wsiz - 1)]; + if (v == get_unaligned((u16 *)q) && (bestlen < 3 || ( + get_unaligned((u16 *)(cur + bestlen - 1)) == + get_unaligned((u16 *)(q + bestlen - 1)) && + !memcmp(cur + 3, q + 3, bestlen - 3)))) { + DBG_BUGON(cur[2] != q[2]); + i = erofs_memcmp2(cur + bestlen + 1, + q + bestlen + 1, hv - bestlen - 1); + bestlen += 1 + i; + + k -= (k >= ARRAY_SIZE(mf->matches_matrix[0])); + mf->matches[k++] = (struct kite_match) { + .len = bestlen, + .dist = diff, + }; + if (bestlen >= hv) + break; + } + } while (--depth); + } + mf->offset++; + mf->cyclic_pos = (mf->cyclic_pos + 1) & (wsiz - 1); + return k - 1; +} + +/* let's align with zlib */ +static const struct kite_matchfinder_cfg { + u16 good_length; /* reduce lazy search above this match length */ + u16 max_lazy; /* do not perform lazy search above this match length */ + u16 nice_length; /* quit search above this match length */ + u16 depth; + bool lazy_search; +} kite_mfcfg[10] = { +/* good lazy nice depth */ +/* 0 */ {0, 0, 0, 0, false}, /* store only [unsupported] */ +/* 1 */ {4, 4, 8, 4, false}, /* maximum speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, false}, +/* 3 */ {4, 6, 32, 32, false}, + +/* 4 */ {4, 4, 16, 16, true}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, true}, +/* 6 */ {8, 16, 128, 128, true}, +/* 7 */ {8, 32, 128, 256, true}, +/* 8 */ {32, 128, 258, 1024, true}, +/* 9 */ {32, 258, 258, 4096, true}, /* maximum compression */ +}; + +static int kite_mf_init(struct kite_matchfinder *mf, int wsiz, int level) +{ + const struct kite_matchfinder_cfg *cfg; + + if (!level || level >= ARRAY_SIZE(kite_mfcfg)) + return -EINVAL; + cfg = &kite_mfcfg[level]; + + if (wsiz > kHistorySize32 || (1 << ilog2(wsiz)) != wsiz) + return -EINVAL; + + mf->hash = calloc(0x10000, sizeof(mf->hash[0])); + if (!mf->hash) + return -ENOMEM; + + mf->chain = malloc(sizeof(mf->chain[0]) * wsiz); + if (!mf->chain) { + free(mf->hash); + mf->hash = NULL; + return -ENOMEM; + } + mf->wsiz = wsiz; + + mf->good_len = cfg->good_length; + mf->nice_len = cfg->nice_length; + mf->depth = cfg->depth; + mf->max_lazy = cfg->max_lazy; + return cfg->lazy_search; +} + +static void kite_mf_reset(struct kite_matchfinder *mf, + const void *buffer, const void *end) +{ + mf->buffer = buffer; + mf->end = end; + + /* + * Set the initial value as max_distance + 1. This would avoid hash + * zero initialization. + */ + mf->base += mf->offset + kHistorySize32 + 1; + + mf->offset = 0; + mf->cyclic_pos = 0; + + mf->matches = mf->matches_matrix[0]; + mf->matches_matrix[0][0].len = + mf->matches_matrix[1][0].len = kMatchMinLen - 1; +} + +static bool deflate_count_code(struct kite_deflate *s, bool literal, + unsigned int lenSlot, unsigned int distSlot) +{ + struct kite_deflate_table *t = s->tab; + unsigned int lenbase = (literal ? 0 : kSymbolMatch); + u64 rem = (s->outlen - s->pos_out) * 8 - s->bitpos; + bool recalc = false; + unsigned int bits; + + s->freq_changed = true; + ++s->mainFreqs[lenbase + lenSlot]; + if (!literal) + ++s->distFreqs[distSlot]; + + if (s->encode_mode == 1) { + if (literal) { + bits = kstaticHuff_litLenLevels[lenSlot]; + goto out; + } + bits = kstaticHuff_litLenLevels[kSymbolMatch + lenSlot] + + kLenExtraBits32[lenSlot] + 5 + kDistExtraBits[distSlot]; + goto out; + } + + /* XXX: more ideas to be done later */ + recalc |= (!literal && !t->distLevels[distSlot]); + recalc |= !t->litLenLevels[lenbase + lenSlot]; + if (recalc) { + kite_dbg("recalc %c lS %u dS %u", literal ? 'l' : 'm', + lenSlot, distSlot); + s->tab = s->tables + (s->tab == s->tables); + kite_deflate_fixdynblock(s); + bits = 0; + goto out; + } + + if (literal) { + bits = t->litLenLevels[lenSlot]; + goto out; + } + + bits = t->distLevels[distSlot] + kDistExtraBits[distSlot] + + t->litLenLevels[kSymbolMatch + lenSlot] + + kLenExtraBits32[lenSlot]; +out: + if (rem < s->costbits + bits) { + --s->mainFreqs[lenbase + lenSlot]; + if (!literal) + --s->distFreqs[distSlot]; + if (recalc) + s->tab = s->tables + (s->tab == s->tables); + return false; + } + s->costbits += bits; + return true; +} + +static bool kite_deflate_tally(struct kite_deflate *s, + struct kite_match *match) +{ + struct kite_deflate_symbol *sym = s->sym + s->symbols; + u32 fixedcost = ~0; + bool hassp; + + *sym = (struct kite_deflate_symbol) { + .len = match->len, + .dist = match->dist, + }; + +retry: + if (sym->len < kMatchMinLen) { + hassp = deflate_count_code(s, true, sym->dist, 0); + } else { + unsigned int lc = sym->len - kMatchMinLen; + unsigned int lenSlot = g_LenSlots[lc]; + unsigned int distSlot = deflateDistSlot(sym->dist - 1); + + hassp = deflate_count_code(s, false, lenSlot, distSlot); + } + + if (!hassp) { + if (s->encode_mode == 1) { + fixedcost = s->costbits; + s->encode_mode = 2; + goto retry; + } + s->lastblock = true; + if (fixedcost <= s->costbits) + s->encode_mode = 1; + return true; + } + ++s->symbols; + return false; +} + +static void kite_deflate_writestore(struct kite_deflate *s) +{ + bool fb = !s->startpos && !s->bitpos; + unsigned int totalsiz = s->pos_in - s->prev_valid - s->startpos; + + do { + unsigned int len = min_t(unsigned int, totalsiz, 65535); + + totalsiz -= len; + writebits(s, (fb << 3) | (kStored << 1) | + (s->lastblock && !totalsiz), 3 + fb); + flushbits(s); + writebits(s, len, 16); + writebits(s, len ^ 0xffff, 16); + flushbits(s); + memcpy(s->out + s->pos_out, s->in + s->startpos, len); + s->pos_out += len; + s->startpos += len; + } while (totalsiz); +} + +static void kite_deflate_endblock(struct kite_deflate *s) +{ + if (s->encode_mode == 1) { + u32 fixedcost = s->costbits; + unsigned int storelen, storeblocks, storecost; + + kite_deflate_fixdynblock(s); + if (fixedcost > s->costbits) + s->encode_mode = 2; + else + s->costbits = fixedcost; + + storelen = s->pos_in - s->prev_valid - s->startpos; + storeblocks = max(DIV_ROUND_UP(storelen, 65535), 1U); + storecost = (8 - s->bitpos) + storeblocks - 1 + + storeblocks * 32 + storelen * 8; + if (s->costbits > storecost) { + s->costbits = storecost; + s->encode_mode = 0; + } + } + + s->lastblock |= (s->costbits + s->bitpos >= + (s->outlen - s->pos_out) * 8); +} + +static void kite_deflate_startblock(struct kite_deflate *s) +{ + memset(s->mainFreqs, 0, sizeof(s->mainFreqs)); + memset(s->distFreqs, 0, sizeof(s->distFreqs)); + memset(s->tables, 0, sizeof(s->tables[0])); + s->symbols = 0; + s->mainFreqs[kSymbolEndOfBlock]++; + s->encode_mode = 1; + s->tab = s->tables; + s->costbits = 3 + kstaticHuff_litLenLevels[kSymbolEndOfBlock]; +} + +static bool kite_deflate_commitblock(struct kite_deflate *s) +{ + if (s->encode_mode == 1) { + kite_deflate_setfixedtrees(s); + kite_deflate_writeblock(s, true); + } else if (s->encode_mode == 2) { + kite_deflate_sendtrees(s); + kite_deflate_writeblock(s, false); + } else { + kite_deflate_writestore(s); + } + s->startpos = s->pos_in - s->prev_valid; + return s->lastblock; +} + +static bool kite_deflate_fast(struct kite_deflate *s) +{ + struct kite_matchfinder *mf = s->mf; + + kite_deflate_startblock(s); + while (1) { + int matches = kite_mf_getmatches_hc3(mf, mf->depth, + kMatchMinLen - 1); + + if (matches) { + unsigned int len = mf->matches[matches].len; + unsigned int dist = mf->matches[matches].dist; + + if (len == kMatchMinLen && dist > ZLIB_DISTANCE_TOO_FAR) + goto nomatch; + + kite_dbg("%u matches found: longest [%u,%u] of distance %u", + matches, s->pos_in, s->pos_in + len - 1, dist); + + if (kite_deflate_tally(s, mf->matches + matches)) + break; + s->pos_in += len; + /* skip the rest bytes */ + while (--len) + (void)kite_mf_getmatches_hc3(mf, 0, 0); + } else { +nomatch: + mf->matches[0].dist = s->in[s->pos_in]; + if (isprint(s->in[s->pos_in])) + kite_dbg("literal %c pos_in %u", s->in[s->pos_in], s->pos_in); + else + kite_dbg("literal %x pos_in %u", s->in[s->pos_in], s->pos_in); + + if (kite_deflate_tally(s, mf->matches)) + break; + ++s->pos_in; + } + + s->lastblock |= (s->pos_in >= s->inlen); + if (s->pos_in >= s->inlen || s->symbols >= s->max_symbols) { + kite_deflate_endblock(s); + break; + } + } + return kite_deflate_commitblock(s); +} + +static bool kite_deflate_slow(struct kite_deflate *s) +{ + struct kite_matchfinder *mf = s->mf; + bool flush = false; + + kite_deflate_startblock(s); + while (1) { + struct kite_match *prev_matches = mf->matches; + unsigned int len = kMatchMinLen - 1; + int matches; + unsigned int len0; + + mf->matches = mf->matches_matrix[ + mf->matches == mf->matches_matrix[0]]; + mf->matches[0].dist = s->in[s->pos_in]; + + len0 = prev_matches[s->prev_longest].len; + if (len0 < mf->max_lazy) { + matches = kite_mf_getmatches_hc3(mf, mf->depth >> + (len0 >= mf->good_len), len0); + if (matches) { + len = mf->matches[matches].len; + if (len == kMatchMinLen && + mf->matches[matches].dist > ZLIB_DISTANCE_TOO_FAR) { + matches = 0; + len = kMatchMinLen - 1; + } + } + } else { + matches = 0; + (void)kite_mf_getmatches_hc3(mf, 0, 0); + } + + if (len < len0) { + if (kite_deflate_tally(s, + prev_matches + s->prev_longest)) + break; + + s->pos_in += --len0; + /* skip the rest bytes */ + while (--len0) + (void)kite_mf_getmatches_hc3(mf, 0, 0); + s->prev_valid = false; + s->prev_longest = 0; + } else { + if (!s->prev_valid) + s->prev_valid = true; + else if (kite_deflate_tally(s, prev_matches)) + break; + ++s->pos_in; + s->prev_longest = matches; + } + + s->lastblock |= (s->pos_in >= s->inlen); + if (s->pos_in >= s->inlen) { + flush = true; + break; + } + if (s->symbols >= s->max_symbols) { + kite_deflate_endblock(s); + break; + } + } + + if (flush && s->prev_valid) { + (void)kite_deflate_tally(s, mf->matches + s->prev_longest); + s->prev_valid = false; + } + return kite_deflate_commitblock(s); +} + +void kite_deflate_end(struct kite_deflate *s) +{ + if (s->mf) { + if (s->mf->hash) + free(s->mf->hash); + if (s->mf->chain) + free(s->mf->chain); + free(s->mf); + } + if (s->sym) + free(s->sym); + free(s); +} + +struct kite_deflate *kite_deflate_init(int level, unsigned int dict_size) +{ + struct kite_deflate *s; + int err; + + kite_deflate_init_once(); + s = calloc(1, sizeof(*s)); + if (!s) + return ERR_PTR(-ENOMEM); + + s->max_symbols = 16384; + s->sym = malloc(sizeof(s->sym[0]) * s->max_symbols); + if (!s->sym) { + err = -ENOMEM; + goto err_out; + } + + s->mf = malloc(sizeof(*s->mf)); + if (!s->mf) { + err = -ENOMEM; + goto err_out; + } + + if (!dict_size) + dict_size = kHistorySize32; + + err = kite_mf_init(s->mf, dict_size, level); + if (err < 0) + goto err_out; + + s->lazy_search = err; + return s; +err_out: + if (s->mf) + free(s->mf); + if (s->sym) + free(s->sym); + free(s); + return ERR_PTR(err); +} + +int kite_deflate_destsize(struct kite_deflate *s, const u8 *in, u8 *out, + unsigned int *srcsize, unsigned int target_dstsize) +{ + memset(s, 0, offsetof(struct kite_deflate, mainFreqs)); + s->in = in; + s->inlen = *srcsize; + s->out = out; + s->outlen = target_dstsize; + kite_mf_reset(s->mf, in, in + s->inlen); + + if (s->lazy_search) + while (!kite_deflate_slow(s)); + else + while (!kite_deflate_fast(s)); + flushbits(s); + + *srcsize = s->startpos; + return s->pos_out; +} + +#if TEST +#include +#include +#include + +int main(int argc, char *argv[]) +{ + int fd; + u64 filelength; + u8 out[1048576], *buf; + int dstsize = 4096; + unsigned int srcsize, outsize; + struct kite_deflate *s; + + fd = open(argv[1], O_RDONLY); + if (fd < 0) + return -errno; + if (argc > 2) + dstsize = atoi(argv[2]); + filelength = lseek(fd, 0, SEEK_END); + + s = kite_deflate_init(9, 0); + if (IS_ERR(s)) + return PTR_ERR(s); + + filelength = lseek(fd, 0, SEEK_END); + buf = mmap(NULL, filelength, PROT_READ, MAP_SHARED, fd, 0); + if (buf == MAP_FAILED) + return -errno; + close(fd); + + srcsize = filelength; + outsize = kite_deflate_destsize(s, buf, out, &srcsize, dstsize); + fd = open("out.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644); + write(fd, out, outsize); + close(fd); + kite_deflate_end(s); + return 0; +} +#endif From 29b9e71401621d3417599067a75bd9b13154c6a7 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 13 Jul 2023 07:51:43 +0800 Subject: [PATCH 2/9] erofs-utils: fuse,fsck: add DEFLATE algorithm support This patch adds DEFLATE compression algorithm support to erofsfuse by using zlib (by default) and libdeflate. libdeflate will be used instead of zlib if libdeflate is enabled. Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20230712235143.10125-1-hsiangkao@linux.alibaba.com --- configure.ac | 45 ++++++++++++++ dump/Makefile.am | 2 +- fsck/Makefile.am | 4 +- fuse/Makefile.am | 2 +- include/erofs_fs.h | 7 +++ lib/decompress.c | 147 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 203 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index 54608fbc..d6dc7afa 100644 --- a/configure.ac +++ b/configure.ac @@ -122,6 +122,15 @@ AC_ARG_ENABLE(lzma, [AS_HELP_STRING([--enable-lzma], [enable LZMA compression support @<:@default=no@:>@])], [enable_lzma="$enableval"], [enable_lzma="no"]) +AC_ARG_WITH(zlib, + [AS_HELP_STRING([--without-zlib], + [Ignore presence of zlib inflate support @<:@default=enabled@:>@])]) + +AC_ARG_WITH(libdeflate, + [AS_HELP_STRING([--with-libdeflate], + [Enable and build with libdeflate inflate support @<:@default=disabled@:>@])], [], + [with_libdeflate="no"]) + AC_ARG_ENABLE(fuse, [AS_HELP_STRING([--enable-fuse], [enable erofsfuse @<:@default=no@:>@])], [enable_fuse="$enableval"], [enable_fuse="no"]) @@ -395,6 +404,34 @@ if test "x$enable_lzma" = "xyes"; then CPPFLAGS="${saved_CPPFLAGS}" fi +# Configure zlib +AS_IF([test "x$with_zlib" != "xno"], [ + PKG_CHECK_MODULES([zlib], [zlib]) + # Paranoia: don't trust the result reported by pkgconfig before trying out + saved_LIBS="$LIBS" + saved_CPPFLAGS=${CPPFLAGS} + CPPFLAGS="${zlib_CFLAGS} ${CPPFLAGS}" + LIBS="${zlib_LIBS} $LIBS" + AC_CHECK_LIB(z, inflate, [ + have_zlib="yes" ], [ + AC_MSG_ERROR([zlib doesn't work properly])]) + LIBS="${saved_LIBS}" + CPPFLAGS="${saved_CPPFLAGS}"], [have_zlib="no"]) + +# Configure libdeflate +AS_IF([test "x$with_libdeflate" != "xno"], [ + PKG_CHECK_MODULES([libdeflate], [libdeflate]) + # Paranoia: don't trust the result reported by pkgconfig before trying out + saved_LIBS="$LIBS" + saved_CPPFLAGS=${CPPFLAGS} + CPPFLAGS="${libdeflate_CFLAGS} ${CPPFLAGS}" + LIBS="${libdeflate_LIBS} $LIBS" + AC_CHECK_LIB(deflate, libdeflate_deflate_decompress, [ + have_libdeflate="yes" ], [ + AC_MSG_ERROR([libdeflate doesn't work properly])]) + LIBS="${saved_LIBS}" + CPPFLAGS="${saved_CPPFLAGS}"], [have_libdeflate="no"]) + # Enable 64-bit off_t CFLAGS+=" -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64" @@ -450,6 +487,14 @@ if test "x${have_liblzma}" = "xyes"; then AC_SUBST([liblzma_CFLAGS]) fi +if test "x$have_zlib" = "xyes"; then + AC_DEFINE([HAVE_ZLIB], 1, [Define to 1 if zlib is found]) +fi + +if test "x$have_libdeflate" = "xyes"; then + AC_DEFINE([HAVE_LIBDEFLATE], 1, [Define to 1 if libdeflate is found]) +fi + # Dump maximum block size AS_IF([test "x$erofs_cv_max_block_size" = "x"], [$erofs_cv_max_block_size = 4096], []) diff --git a/dump/Makefile.am b/dump/Makefile.am index 90227a57..aed20c2f 100644 --- a/dump/Makefile.am +++ b/dump/Makefile.am @@ -7,4 +7,4 @@ AM_CPPFLAGS = ${libuuid_CFLAGS} dump_erofs_SOURCES = main.c dump_erofs_CFLAGS = -Wall -I$(top_srcdir)/include dump_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \ - ${liblz4_LIBS} ${liblzma_LIBS} + ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} diff --git a/fsck/Makefile.am b/fsck/Makefile.am index 369cb2f8..d024405f 100644 --- a/fsck/Makefile.am +++ b/fsck/Makefile.am @@ -7,7 +7,7 @@ AM_CPPFLAGS = ${libuuid_CFLAGS} fsck_erofs_SOURCES = main.c fsck_erofs_CFLAGS = -Wall -I$(top_srcdir)/include fsck_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \ - ${liblz4_LIBS} ${liblzma_LIBS} + ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} if ENABLE_FUZZING noinst_PROGRAMS = fuzz_erofsfsck @@ -15,5 +15,5 @@ fuzz_erofsfsck_SOURCES = main.c fuzz_erofsfsck_CFLAGS = -Wall -I$(top_srcdir)/include -DFUZZING fuzz_erofsfsck_LDFLAGS = -fsanitize=address,fuzzer fuzz_erofsfsck_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \ - ${liblz4_LIBS} ${liblzma_LIBS} + ${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} endif diff --git a/fuse/Makefile.am b/fuse/Makefile.am index 3179a2b5..50be7833 100644 --- a/fuse/Makefile.am +++ b/fuse/Makefile.am @@ -7,4 +7,4 @@ erofsfuse_SOURCES = main.c erofsfuse_CFLAGS = -Wall -I$(top_srcdir)/include erofsfuse_CFLAGS += -DFUSE_USE_VERSION=26 ${libfuse_CFLAGS} ${libselinux_CFLAGS} erofsfuse_LDADD = $(top_builddir)/lib/liberofs.la ${libfuse_LIBS} ${liblz4_LIBS} \ - ${libselinux_LIBS} ${liblzma_LIBS} + ${libselinux_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} diff --git a/include/erofs_fs.h b/include/erofs_fs.h index 3697882e..850438a7 100644 --- a/include/erofs_fs.h +++ b/include/erofs_fs.h @@ -297,6 +297,7 @@ enum { enum { Z_EROFS_COMPRESSION_LZ4 = 0, Z_EROFS_COMPRESSION_LZMA = 1, + Z_EROFS_COMPRESSION_DEFLATE = 2, Z_EROFS_COMPRESSION_MAX }; #define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1) @@ -317,6 +318,12 @@ struct z_erofs_lzma_cfgs { #define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE) +/* 6 bytes (+ length field = 8 bytes) */ +struct z_erofs_deflate_cfgs { + u8 windowbits; /* 8..15 for DEFLATE */ + u8 reserved[5]; +} __packed; + /* * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on) * e.g. for 4k logical cluster size, 4B if compacted 2B is off; diff --git a/lib/decompress.c b/lib/decompress.c index 59a9ca06..0b41ff46 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -9,6 +9,149 @@ #include "erofs/err.h" #include "erofs/print.h" +#ifdef HAVE_LIBDEFLATE +/* if libdeflate is available, use libdeflate instead. */ +#include + +static int z_erofs_decompress_deflate(struct z_erofs_decompress_req *rq) +{ + u8 *dest = (u8 *)rq->out; + u8 *src = (u8 *)rq->in; + u8 *buff = NULL; + size_t actual_out; + unsigned int inputmargin = 0; + struct libdeflate_decompressor *inf; + enum libdeflate_result ret; + + while (!src[inputmargin & (erofs_blksiz() - 1)]) + if (!(++inputmargin & (erofs_blksiz() - 1))) + break; + + if (inputmargin >= rq->inputsize) + return -EFSCORRUPTED; + + if (rq->decodedskip) { + buff = malloc(rq->decodedlength); + if (!buff) + return -ENOMEM; + dest = buff; + } + + inf = libdeflate_alloc_decompressor(); + if (!inf) + return -ENOMEM; + + if (rq->partial_decoding) { + ret = libdeflate_deflate_decompress(inf, src + inputmargin, + rq->inputsize - inputmargin, dest, + rq->decodedlength, &actual_out); + if (ret && ret != LIBDEFLATE_INSUFFICIENT_SPACE) { + ret = -EIO; + goto out_inflate_end; + } + + if (actual_out != rq->decodedlength) { + ret = -EIO; + goto out_inflate_end; + } + } else { + ret = libdeflate_deflate_decompress(inf, src + inputmargin, + rq->inputsize - inputmargin, dest, + rq->decodedlength, NULL); + if (ret) { + ret = -EIO; + goto out_inflate_end; + } + } + + if (rq->decodedskip) + memcpy(rq->out, dest + rq->decodedskip, + rq->decodedlength - rq->decodedskip); + +out_inflate_end: + libdeflate_free_decompressor(inf); + if (buff) + free(buff); + return ret; +} +#elif defined(HAVE_ZLIB) +#include + +/* report a zlib or i/o error */ +static int zerr(int ret) +{ + switch (ret) { + case Z_STREAM_ERROR: + return -EINVAL; + case Z_DATA_ERROR: + return -EIO; + case Z_MEM_ERROR: + return -ENOMEM; + case Z_ERRNO: + case Z_VERSION_ERROR: + default: + return -EFAULT; + } +} + +static int z_erofs_decompress_deflate(struct z_erofs_decompress_req *rq) +{ + int ret = 0; + u8 *dest = (u8 *)rq->out; + u8 *src = (u8 *)rq->in; + u8 *buff = NULL; + unsigned int inputmargin = 0; + z_stream strm; + + while (!src[inputmargin & (erofs_blksiz() - 1)]) + if (!(++inputmargin & (erofs_blksiz() - 1))) + break; + + if (inputmargin >= rq->inputsize) + return -EFSCORRUPTED; + + if (rq->decodedskip) { + buff = malloc(rq->decodedlength); + if (!buff) + return -ENOMEM; + dest = buff; + } + + /* allocate inflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -15); + if (ret != Z_OK) + return zerr(ret); + + strm.next_in = src + inputmargin; + strm.avail_in = rq->inputsize - inputmargin; + strm.next_out = dest; + strm.avail_out = rq->decodedlength; + + ret = inflate(&strm, rq->partial_decoding ? Z_SYNC_FLUSH : Z_FINISH); + if (ret != Z_STREAM_END || strm.total_out != rq->decodedlength) { + if (ret != Z_OK || !rq->partial_decoding) { + ret = zerr(ret); + goto out_inflate_end; + } + } + + if (rq->decodedskip) + memcpy(rq->out, dest + rq->decodedskip, + rq->decodedlength - rq->decodedskip); + +out_inflate_end: + inflateEnd(&strm); + if (buff) + free(buff); + return ret; +} +#endif + #ifdef HAVE_LIBLZMA #include @@ -167,6 +310,10 @@ int z_erofs_decompress(struct z_erofs_decompress_req *rq) #ifdef HAVE_LIBLZMA if (rq->alg == Z_EROFS_COMPRESSION_LZMA) return z_erofs_decompress_lzma(rq); +#endif +#if defined(HAVE_ZLIB) || defined(HAVE_LIBDEFLATE) + if (rq->alg == Z_EROFS_COMPRESSION_DEFLATE) + return z_erofs_decompress_deflate(rq); #endif return -EOPNOTSUPP; } From 47e91b6b57f2626784052921e0ee4a9a83d6aba9 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 10 Jul 2023 19:02:50 +0800 Subject: [PATCH 3/9] erofs-utils: mkfs: add DEFLATE algorithm support This patch adds DEFLATE compression algorithm support to erofs-utils compression framework. Note that windowbits (which indicates dictionary size) is recorded in the on-disk compression configuration. Since some accelerators (e.g. Intel IAA) don't have enough on-chip memory, compressed data generated with large windowbits (e.g. > 12 for the IAA accelerator) doesn't seem to be worked properly on those. Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20230710110251.89464-4-hsiangkao@linux.alibaba.com --- lib/Makefile.am | 2 +- lib/compress.c | 24 +++++++++++++ lib/compressor.c | 1 + lib/compressor.h | 1 + lib/compressor_deflate.c | 78 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 lib/compressor_deflate.c diff --git a/lib/Makefile.am b/lib/Makefile.am index b7290982..ae19b74c 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -44,4 +44,4 @@ liberofs_la_CFLAGS += ${liblzma_CFLAGS} liberofs_la_SOURCES += compressor_liblzma.c endif -liberofs_la_SOURCES += kite_deflate.c +liberofs_la_SOURCES += kite_deflate.c compressor_deflate.c diff --git a/lib/compress.c b/lib/compress.c index 14d228ff..318b8de6 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -1026,6 +1026,8 @@ static int erofs_get_compress_algorithm_id(const char *name) return Z_EROFS_COMPRESSION_LZ4; if (!strcmp(name, "lzma")) return Z_EROFS_COMPRESSION_LZMA; + if (!strcmp(name, "deflate")) + return Z_EROFS_COMPRESSION_DEFLATE; return -ENOTSUP; } @@ -1080,6 +1082,28 @@ int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh) bh->op = &erofs_drop_directly_bhops; } #endif + if (sbi.available_compr_algs & (1 << Z_EROFS_COMPRESSION_DEFLATE)) { + struct { + __le16 size; + struct z_erofs_deflate_cfgs z; + } __packed zalg = { + .size = cpu_to_le16(sizeof(struct z_erofs_deflate_cfgs)), + .z = { + .windowbits = + cpu_to_le32(ilog2(cfg.c_dict_size)), + } + }; + + bh = erofs_battach(bh, META, sizeof(zalg)); + if (IS_ERR(bh)) { + DBG_BUGON(1); + return PTR_ERR(bh); + } + erofs_mapbh(bh->block); + ret = dev_write(&zalg, erofs_btell(bh, false), + sizeof(zalg)); + bh->op = &erofs_drop_directly_bhops; + } return ret; } diff --git a/lib/compressor.c b/lib/compressor.c index 52eb7613..ca4d3645 100644 --- a/lib/compressor.c +++ b/lib/compressor.c @@ -20,6 +20,7 @@ static const struct erofs_compressor *compressors[] = { #if HAVE_LIBLZMA &erofs_compressor_lzma, #endif + &erofs_compressor_deflate, }; int erofs_compress_destsize(const struct erofs_compress *c, diff --git a/lib/compressor.h b/lib/compressor.h index cf063f12..c1eee20f 100644 --- a/lib/compressor.h +++ b/lib/compressor.h @@ -44,6 +44,7 @@ struct erofs_compress { extern const struct erofs_compressor erofs_compressor_lz4; extern const struct erofs_compressor erofs_compressor_lz4hc; extern const struct erofs_compressor erofs_compressor_lzma; +extern const struct erofs_compressor erofs_compressor_deflate; int erofs_compress_destsize(const struct erofs_compress *c, const void *src, unsigned int *srcsize, diff --git a/lib/compressor_deflate.c b/lib/compressor_deflate.c new file mode 100644 index 00000000..5a7a657b --- /dev/null +++ b/lib/compressor_deflate.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +/* + * Copyright (C) 2023, Alibaba Cloud + * Copyright (C) 2023, Gao Xiang + */ +#include "erofs/internal.h" +#include "erofs/print.h" +#include "erofs/config.h" +#include "compressor.h" + +void *kite_deflate_init(int level, unsigned int dict_size); +void kite_deflate_end(void *s); +int kite_deflate_destsize(void *s, const u8 *in, u8 *out, + unsigned int *srcsize, unsigned int target_dstsize); + +static int deflate_compress_destsize(const struct erofs_compress *c, + const void *src, unsigned int *srcsize, + void *dst, unsigned int dstsize) +{ + int rc = kite_deflate_destsize(c->private_data, src, dst, + srcsize, dstsize); + + if (rc <= 0) + return -EFAULT; + return rc; +} + +static int compressor_deflate_exit(struct erofs_compress *c) +{ + if (!c->private_data) + return -EINVAL; + + kite_deflate_end(c->private_data); + return 0; +} + +static int compressor_deflate_init(struct erofs_compress *c) +{ + c->alg = &erofs_compressor_deflate; + c->private_data = NULL; + + erofs_warn("EXPERIMENTAL DEFLATE algorithm in use. Use at your own risk!"); + erofs_warn("*Carefully* check filesystem data correctness to avoid corruption!"); + erofs_warn("Please send a report to if something is wrong."); + return 0; +} + +static int erofs_compressor_deflate_setlevel(struct erofs_compress *c, + int compression_level) +{ + void *s; + + if (c->private_data) { + kite_deflate_end(c->private_data); + c->private_data = NULL; + } + + if (compression_level < 0) + compression_level = erofs_compressor_deflate.default_level; + + s = kite_deflate_init(compression_level, cfg.c_dict_size); + if (IS_ERR(s)) + return PTR_ERR(s); + + c->private_data = s; + c->compression_level = compression_level; + return 0; +} + +const struct erofs_compressor erofs_compressor_deflate = { + .name = "deflate", + .default_level = 1, + .best_level = 9, + .init = compressor_deflate_init, + .exit = compressor_deflate_exit, + .setlevel = erofs_compressor_deflate_setlevel, + .compress_destsize = deflate_compress_destsize, +}; From 073633e09cb5d55c23fd07faec69a2f05b668d0d Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 10 Jul 2023 19:02:51 +0800 Subject: [PATCH 4/9] erofs-utils: mkfs: add libdeflate compressor support Eric suggests a "binary search + heuristics" way by using the current libdeflate APIs to generate fixed-sized output DEFLATE streams. Compared to the previous built-in one, it will generate smaller images (which is expected since the built-in one is roughly just the original zlib replacement), yet the total compression time might be amplified a lot especially if some larger pclusters are used by users compared to the built-in one. For example: $ time mkfs.erofs -zdeflate,9 -C65536 enwik8.z enwik8 real 0m9.559s user 0m9.453s sys 0m0.069s $ time mkfs.erofs -zlibdeflate,9 -C65536 enwik8.libdeflate.9.z enwik8 real 0m50.184s user 0m50.082s sys 0m0.074s $ mkfs/mkfs.erofs -zlibdeflate,6 -C65536 enwik8.libdeflate.6.z enwik8 real 0m23.428s user 0m23.329s sys 0m0.067s 37175296 enwik8.libdeflate.6.z 37142528 enwik8.z 36835328 enwik8.libdeflate.9.z Anyway, let's use the current APIs for users who needs smaller image sizes for now. Besides, EROFS also supports multiple per-file algorithms in one image, so it can be used for specific files as well. Suggested-by: Eric Biggers Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20230710110251.89464-5-hsiangkao@linux.alibaba.com --- configure.ac | 1 + lib/Makefile.am | 3 + lib/compress.c | 2 +- lib/compressor.c | 3 + lib/compressor.h | 1 + lib/compressor_libdeflate.c | 114 ++++++++++++++++++++++++++++++++++++ mkfs/Makefile.am | 2 +- 7 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 lib/compressor_libdeflate.c diff --git a/configure.ac b/configure.ac index d6dc7afa..ac0b0edb 100644 --- a/configure.ac +++ b/configure.ac @@ -450,6 +450,7 @@ AM_CONDITIONAL([ENABLE_LZ4], [test "x${have_lz4}" = "xyes"]) AM_CONDITIONAL([ENABLE_LZ4HC], [test "x${have_lz4hc}" = "xyes"]) AM_CONDITIONAL([ENABLE_FUSE], [test "x${have_fuse}" = "xyes"]) AM_CONDITIONAL([ENABLE_LIBLZMA], [test "x${have_liblzma}" = "xyes"]) +AM_CONDITIONAL([ENABLE_LIBDEFLATE], [test "x${have_libdeflate}" = "xyes"]) if test "x$have_uuid" = "xyes"; then AC_DEFINE([HAVE_LIBUUID], 1, [Define to 1 if libuuid is found]) diff --git a/lib/Makefile.am b/lib/Makefile.am index ae19b74c..694888e8 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -45,3 +45,6 @@ liberofs_la_SOURCES += compressor_liblzma.c endif liberofs_la_SOURCES += kite_deflate.c compressor_deflate.c +if ENABLE_LIBDEFLATE +liberofs_la_SOURCES += compressor_libdeflate.c +endif diff --git a/lib/compress.c b/lib/compress.c index 318b8de6..6fb63cb9 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -1026,7 +1026,7 @@ static int erofs_get_compress_algorithm_id(const char *name) return Z_EROFS_COMPRESSION_LZ4; if (!strcmp(name, "lzma")) return Z_EROFS_COMPRESSION_LZMA; - if (!strcmp(name, "deflate")) + if (!strcmp(name, "deflate") || !strcmp(name, "libdeflate")) return Z_EROFS_COMPRESSION_DEFLATE; return -ENOTSUP; } diff --git a/lib/compressor.c b/lib/compressor.c index ca4d3645..f81db5bb 100644 --- a/lib/compressor.c +++ b/lib/compressor.c @@ -21,6 +21,9 @@ static const struct erofs_compressor *compressors[] = { &erofs_compressor_lzma, #endif &erofs_compressor_deflate, +#if HAVE_LIBDEFLATE + &erofs_compressor_libdeflate, +#endif }; int erofs_compress_destsize(const struct erofs_compress *c, diff --git a/lib/compressor.h b/lib/compressor.h index c1eee20f..f699fe7e 100644 --- a/lib/compressor.h +++ b/lib/compressor.h @@ -45,6 +45,7 @@ extern const struct erofs_compressor erofs_compressor_lz4; extern const struct erofs_compressor erofs_compressor_lz4hc; extern const struct erofs_compressor erofs_compressor_lzma; extern const struct erofs_compressor erofs_compressor_deflate; +extern const struct erofs_compressor erofs_compressor_libdeflate; int erofs_compress_destsize(const struct erofs_compress *c, const void *src, unsigned int *srcsize, diff --git a/lib/compressor_libdeflate.c b/lib/compressor_libdeflate.c new file mode 100644 index 00000000..2756dd86 --- /dev/null +++ b/lib/compressor_libdeflate.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +#include "erofs/internal.h" +#include "erofs/print.h" +#include "erofs/config.h" +#include +#include "compressor.h" + +static int libdeflate_compress_destsize(const struct erofs_compress *c, + const void *src, unsigned int *srcsize, + void *dst, unsigned int dstsize) +{ + static size_t last_uncompressed_size = 0; + size_t l = 0; /* largest input that fits so far */ + size_t l_csize = 0; + size_t r = *srcsize + 1; /* smallest input that doesn't fit so far */ + size_t m; + u8 tmpbuf[dstsize + 9]; + + if (last_uncompressed_size) + m = last_uncompressed_size * 15 / 16; + else + m = dstsize * 4; + for (;;) { + size_t csize; + + m = max(m, l + 1); + m = min(m, r - 1); + + csize = libdeflate_deflate_compress(c->private_data, src, m, + tmpbuf, dstsize + 9); + /*printf("Tried %zu => %zu\n", m, csize);*/ + if (csize > 0 && csize <= dstsize) { + /* Fits */ + memcpy(dst, tmpbuf, csize); + l = m; + l_csize = csize; + if (r <= l + 1 || csize + + (22 - 2*(int)c->compression_level) >= dstsize) + break; + /* + * Estimate needed input prefix size based on current + * compression ratio. + */ + m = (dstsize * m) / csize; + } else { + /* Doesn't fit */ + r = m; + if (r <= l + 1) + break; + m = (l + r) / 2; + } + } + + /* + * Since generic EROFS on-disk compressed data will be filled with + * leading 0s (but no more than one block, 4KB for example, even the + * whole pcluster is 128KB) if not filled, it will be used to identify + * the actual compressed length as well without taking more reserved + * compressed bytes or some extra metadata to record this. + * + * DEFLATE streams can also be used in this way, if it starts from a + * non-last stored block, flag an unused bit instead to avoid the zero + * byte. It's still a valid one according to the DEFLATE specification. + */ + if (l_csize && !((u8 *)dst)[0]) + ((u8 *)dst)[0] = 1 << (2 + 1); + + /*printf("Choosing %zu => %zu\n", l, l_csize);*/ + *srcsize = l; + last_uncompressed_size = l; + return l_csize; +} + +static int compressor_libdeflate_exit(struct erofs_compress *c) +{ + if (!c->private_data) + return -EINVAL; + + libdeflate_free_compressor(c->private_data); + return 0; +} + +static int compressor_libdeflate_init(struct erofs_compress *c) +{ + c->alg = &erofs_compressor_libdeflate; + c->private_data = NULL; + + erofs_warn("EXPERIMENTAL libdeflate compressor in use. Use at your own risk!"); + return 0; +} + +static int erofs_compressor_libdeflate_setlevel(struct erofs_compress *c, + int compression_level) +{ + if (compression_level < 0) + compression_level = erofs_compressor_deflate.default_level; + + libdeflate_free_compressor(c->private_data); + c->private_data = libdeflate_alloc_compressor(compression_level); + if (!c->private_data) + return -ENOMEM; + c->compression_level = compression_level; + return 0; +} + +const struct erofs_compressor erofs_compressor_libdeflate = { + .name = "libdeflate", + .default_level = 1, + .best_level = 12, + .init = compressor_libdeflate_init, + .exit = compressor_libdeflate_exit, + .setlevel = erofs_compressor_libdeflate_setlevel, + .compress_destsize = libdeflate_compress_destsize, +}; diff --git a/mkfs/Makefile.am b/mkfs/Makefile.am index a08dc537..603c2f33 100644 --- a/mkfs/Makefile.am +++ b/mkfs/Makefile.am @@ -6,4 +6,4 @@ AM_CPPFLAGS = ${libselinux_CFLAGS} mkfs_erofs_SOURCES = main.c mkfs_erofs_CFLAGS = -Wall -I$(top_srcdir)/include mkfs_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \ - ${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS} + ${libuuid_LIBS} ${liblz4_LIBS} ${liblzma_LIBS} ${libdeflate_LIBS} From 95d315fd7958886ff26f42b0051881ac7593219d Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 14 Jul 2023 14:58:51 +0800 Subject: [PATCH 5/9] erofs-utils: introduce tarerofs Let's try to add a new mode "tarerofs" for mkfs.erofs. It mainly aims at two use cases: - Convert a tarball (or later tarballs with a merged view) into a full EROFS image [--tar=f]; - Generate an EROFS manifest image to reuse tar data [--tar=i], which also enables EROFS 512-byte blocks. The second use case is mainly prepared for OCI direct mount without OCI blob unpacking. This also adds another `--aufs` option to transform aufs special files into overlayfs metadata. [ Note that `--tar=f` generates lots of temporary files for now which can impact performance since the original tar stream(s) may be non-seekable. ] Signed-off-by: Jingbo Xu Link: https://lore.kernel.org/r/20230714065851.70583-1-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- configure.ac | 1 + include/erofs/blobchunk.h | 4 +- include/erofs/inode.h | 12 + include/erofs/internal.h | 7 +- include/erofs/tar.h | 29 ++ include/erofs/xattr.h | 4 + lib/Makefile.am | 3 +- lib/blobchunk.c | 47 ++- lib/inode.c | 194 ++++++--- lib/tar.c | 809 ++++++++++++++++++++++++++++++++++++++ lib/xattr.c | 46 ++- mkfs/main.c | 134 +++++-- 12 files changed, 1184 insertions(+), 106 deletions(-) create mode 100644 include/erofs/tar.h create mode 100644 lib/tar.c diff --git a/configure.ac b/configure.ac index ac0b0edb..a8cecd08 100644 --- a/configure.ac +++ b/configure.ac @@ -176,6 +176,7 @@ AC_CHECK_HEADERS(m4_flatten([ fcntl.h getopt.h inttypes.h + linux/aufs_type.h linux/falloc.h linux/fs.h linux/types.h diff --git a/include/erofs/blobchunk.h b/include/erofs/blobchunk.h index 49cb7bf9..4269d822 100644 --- a/include/erofs/blobchunk.h +++ b/include/erofs/blobchunk.h @@ -14,8 +14,10 @@ extern "C" #include "erofs/internal.h" +struct erofs_blobchunk *erofs_get_unhashed_chunk(erofs_off_t chunksize, + unsigned int device_id, erofs_blk_t blkaddr); int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, erofs_off_t off); -int erofs_blob_write_chunked_file(struct erofs_inode *inode); +int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd); int erofs_blob_remap(void); void erofs_blob_exit(void); int erofs_blob_init(const char *blobfile_path); diff --git a/include/erofs/inode.h b/include/erofs/inode.h index 058a235b..e8a56701 100644 --- a/include/erofs/inode.h +++ b/include/erofs/inode.h @@ -15,11 +15,23 @@ extern "C" #include "erofs/internal.h" +static inline struct erofs_inode *erofs_igrab(struct erofs_inode *inode) +{ + ++inode->i_count; + return inode; +} + +u32 erofs_new_encode_dev(dev_t dev); unsigned char erofs_mode_to_ftype(umode_t mode); unsigned char erofs_ftype_to_dtype(unsigned int filetype); void erofs_inode_manager_init(void); unsigned int erofs_iput(struct erofs_inode *inode); erofs_nid_t erofs_lookupnid(struct erofs_inode *inode); +struct erofs_dentry *erofs_d_alloc(struct erofs_inode *parent, + const char *name); +int tarerofs_dump_tree(struct erofs_inode *dir); +int erofs_init_empty_dir(struct erofs_inode *dir); +struct erofs_inode *erofs_new_inode(void); struct erofs_inode *erofs_mkfs_build_tree_from_path(const char *path); struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name); diff --git a/include/erofs/internal.h b/include/erofs/internal.h index aad21151..46690f51 100644 --- a/include/erofs/internal.h +++ b/include/erofs/internal.h @@ -20,6 +20,7 @@ typedef unsigned short umode_t; #include "erofs_fs.h" #include #include /* for off_t definition */ +#include #ifndef PATH_MAX #define PATH_MAX 4096 /* # chars in a path name including nul */ @@ -170,13 +171,17 @@ struct erofs_inode { } u; char *i_srcpath; - + union { + char *i_link; + FILE *i_tmpfile; + }; unsigned char datalayout; unsigned char inode_isize; /* inline tail-end packing size */ unsigned short idata_size; bool compressed_idata; bool lazy_tailblock; + bool with_tmpfile; unsigned int xattr_isize; unsigned int extent_isize; diff --git a/include/erofs/tar.h b/include/erofs/tar.h new file mode 100644 index 00000000..268c57b0 --- /dev/null +++ b/include/erofs/tar.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */ +#ifndef __EROFS_TAR_H +#define __EROFS_TAR_H + +#include + +struct erofs_pax_header { + struct stat st; + bool use_mtime; + bool use_size; + bool use_uid; + bool use_gid; + char *path, *link; +}; + +struct erofs_tarfile { + struct erofs_pax_header global; + + int fd; + u64 offset; + bool index_mode, aufs; +}; + +int tarerofs_init_empty_dir(struct erofs_inode *inode); +int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar); +int tarerofs_reserve_devtable(unsigned int devices); +int tarerofs_write_devtable(struct erofs_tarfile *tar); + +#endif diff --git a/include/erofs/xattr.h b/include/erofs/xattr.h index 14fc0815..27e14bf0 100644 --- a/include/erofs/xattr.h +++ b/include/erofs/xattr.h @@ -72,6 +72,7 @@ static inline unsigned int xattrblock_offset(unsigned int xattr_id) #define XATTR_NAME_POSIX_ACL_DEFAULT "system.posix_acl_default" #endif +int erofs_scan_file_xattrs(struct erofs_inode *inode); int erofs_prepare_xattr_ibody(struct erofs_inode *inode); char *erofs_export_xattr_ibody(struct list_head *ixattrs, unsigned int size); int erofs_build_shared_xattrs_from_path(const char *path); @@ -80,6 +81,9 @@ int erofs_xattr_insert_name_prefix(const char *prefix); void erofs_xattr_cleanup_name_prefixes(void); int erofs_xattr_write_name_prefixes(FILE *f); +int erofs_setxattr(struct erofs_inode *inode, char *key, + const void *value, size_t size); + #ifdef __cplusplus } #endif diff --git a/lib/Makefile.am b/lib/Makefile.am index 694888e8..ebe466b8 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -19,6 +19,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \ $(top_srcdir)/include/erofs/io.h \ $(top_srcdir)/include/erofs/list.h \ $(top_srcdir)/include/erofs/print.h \ + $(top_srcdir)/include/erofs/tar.h \ $(top_srcdir)/include/erofs/trace.h \ $(top_srcdir)/include/erofs/xattr.h \ $(top_srcdir)/include/erofs/compress_hints.h \ @@ -29,7 +30,7 @@ noinst_HEADERS += compressor.h liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \ namei.c data.c compress.c compressor.c zmap.c decompress.c \ compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \ - fragments.c rb_tree.c dedupe.c uuid_unparse.c uuid.c + fragments.c rb_tree.c dedupe.c uuid_unparse.c uuid.c tar.c liberofs_la_CFLAGS = -Wall ${libuuid_CFLAGS} -I$(top_srcdir)/include if ENABLE_LZ4 diff --git a/lib/blobchunk.c b/lib/blobchunk.c index 6fbc15b6..1d91a67d 100644 --- a/lib/blobchunk.c +++ b/lib/blobchunk.c @@ -14,7 +14,10 @@ #include struct erofs_blobchunk { - struct hashmap_entry ent; + union { + struct hashmap_entry ent; + struct list_head list; + }; char sha256[32]; unsigned int device_id; erofs_off_t chunksize; @@ -29,6 +32,23 @@ static struct erofs_buffer_head *bh_devt; struct erofs_blobchunk erofs_holechunk = { .blkaddr = EROFS_NULL_ADDR, }; +static LIST_HEAD(unhashed_blobchunks); + +struct erofs_blobchunk *erofs_get_unhashed_chunk(erofs_off_t chunksize, + unsigned int device_id, erofs_blk_t blkaddr) +{ + struct erofs_blobchunk *chunk; + + chunk = calloc(1, sizeof(struct erofs_blobchunk)); + if (!chunk) + return ERR_PTR(-ENOMEM); + + chunk->chunksize = chunksize; + chunk->device_id = device_id; + chunk->blkaddr = blkaddr; + list_add_tail(&chunk->list, &unhashed_blobchunks); + return chunk; +} static struct erofs_blobchunk *erofs_blob_getchunk(int fd, erofs_off_t chunksize) @@ -165,17 +185,14 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, return dev_write(inode->chunkindexes, off, inode->extent_isize); } -int erofs_blob_write_chunked_file(struct erofs_inode *inode) +int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd) { unsigned int chunkbits = cfg.c_chunkbits; unsigned int count, unit; struct erofs_inode_chunk_index *idx; erofs_off_t pos, len, chunksize; - int fd, ret; + int ret; - fd = open(inode->i_srcpath, O_RDONLY | O_BINARY); - if (fd < 0) - return -errno; #ifdef SEEK_DATA /* if the file is fully sparsed, use one big chunk instead */ if (lseek(fd, 0, SEEK_DATA) < 0 && errno == ENXIO) { @@ -199,10 +216,8 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode) inode->extent_isize = count * unit; idx = malloc(count * max(sizeof(*idx), sizeof(void *))); - if (!idx) { - close(fd); + if (!idx) return -ENOMEM; - } inode->chunkindexes = idx; for (pos = 0; pos < inode->i_size; pos += len) { @@ -241,10 +256,8 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode) *(void **)idx++ = chunk; } inode->datalayout = EROFS_INODE_CHUNK_BASED; - close(fd); return 0; err: - close(fd); free(inode->chunkindexes); inode->chunkindexes = NULL; return ret; @@ -296,19 +309,23 @@ void erofs_blob_exit(void) { struct hashmap_iter iter; struct hashmap_entry *e; + struct erofs_blobchunk *bc, *n; if (blobfile) fclose(blobfile); while ((e = hashmap_iter_first(&blob_hashmap, &iter))) { - struct erofs_blobchunk *bc = - container_of((struct hashmap_entry *)e, - struct erofs_blobchunk, ent); - + bc = container_of((struct hashmap_entry *)e, + struct erofs_blobchunk, ent); DBG_BUGON(hashmap_remove(&blob_hashmap, e) != e); free(bc); } DBG_BUGON(hashmap_free(&blob_hashmap)); + + list_for_each_entry_safe(bc, n, &unhashed_blobchunks, list) { + list_del(&bc->list); + free(bc); + } } int erofs_blob_init(const char *blobfile_path) diff --git a/lib/inode.c b/lib/inode.c index f1401d01..0d14441d 100644 --- a/lib/inode.c +++ b/lib/inode.c @@ -75,12 +75,6 @@ void erofs_inode_manager_init(void) init_list_head(&inode_hashtable[i]); } -static struct erofs_inode *erofs_igrab(struct erofs_inode *inode) -{ - ++inode->i_count; - return inode; -} - /* get the inode from the (source) inode # */ struct erofs_inode *erofs_iget(dev_t dev, ino_t ino) { @@ -121,6 +115,10 @@ unsigned int erofs_iput(struct erofs_inode *inode) list_del(&inode->i_hash); if (inode->i_srcpath) free(inode->i_srcpath); + if (inode->with_tmpfile) + fclose(inode->i_tmpfile); + else if (inode->i_link) + free(inode->i_link); free(inode); return 0; } @@ -180,27 +178,13 @@ static int comp_subdir(const void *a, const void *b) return strcmp(da->name, db->name); } -int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs) +static int erofs_prepare_dir_layout(struct erofs_inode *dir, + unsigned int nr_subdirs) { struct erofs_dentry *d, *n, **sorted_d; - unsigned int d_size, i; - - /* dot is pointed to the current dir inode */ - d = erofs_d_alloc(dir, "."); - if (IS_ERR(d)) - return PTR_ERR(d); - d->inode = erofs_igrab(dir); - d->type = EROFS_FT_DIR; - - /* dotdot is pointed to the parent dir */ - d = erofs_d_alloc(dir, ".."); - if (IS_ERR(d)) - return PTR_ERR(d); - d->inode = erofs_igrab(dir->i_parent); - d->type = EROFS_FT_DIR; + unsigned int i; + unsigned int d_size = 0; - /* sort subdirs */ - nr_subdirs += 2; sorted_d = malloc(nr_subdirs * sizeof(d)); if (!sorted_d) return -ENOMEM; @@ -216,7 +200,6 @@ int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs) free(sorted_d); /* let's calculate dir size */ - d_size = 0; list_for_each_entry(d, &dir->i_subdirs, d_child) { int len = strlen(d->name) + sizeof(struct erofs_dirent); @@ -234,6 +217,39 @@ int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs) return 0; } +int erofs_init_empty_dir(struct erofs_inode *dir) +{ + struct erofs_dentry *d; + + /* dot is pointed to the current dir inode */ + d = erofs_d_alloc(dir, "."); + if (IS_ERR(d)) + return PTR_ERR(d); + d->inode = erofs_igrab(dir); + d->type = EROFS_FT_DIR; + + /* dotdot is pointed to the parent dir */ + d = erofs_d_alloc(dir, ".."); + if (IS_ERR(d)) + return PTR_ERR(d); + d->inode = erofs_igrab(dir->i_parent); + d->type = EROFS_FT_DIR; + return 0; +} + +int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs) +{ + int ret; + + ret = erofs_init_empty_dir(dir); + if (ret) + return ret; + + /* sort subdirs */ + nr_subdirs += 2; + return erofs_prepare_dir_layout(dir, nr_subdirs); +} + static void fill_dirblock(char *buf, unsigned int size, unsigned int q, struct erofs_dentry *head, struct erofs_dentry *end) { @@ -347,7 +363,7 @@ static int erofs_write_dir_file(struct erofs_inode *dir) return 0; } -static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf) +int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf) { const unsigned int nblocks = erofs_blknr(inode->i_size); int ret; @@ -424,14 +440,12 @@ static int write_uncompressed_file_from_fd(struct erofs_inode *inode, int fd) return 0; } -static int erofs_write_file(struct erofs_inode *inode) +int erofs_write_file(struct erofs_inode *inode, int fd) { - int ret, fd; + int ret; - if (!inode->i_size) { - inode->datalayout = EROFS_INODE_FLAT_PLAIN; + if (!inode->i_size) return 0; - } if (cfg.c_chunkbits) { inode->u.chunkbits = cfg.c_chunkbits; @@ -439,28 +453,21 @@ static int erofs_write_file(struct erofs_inode *inode) inode->u.chunkformat = 0; if (cfg.c_force_chunkformat == FORCE_INODE_CHUNK_INDEXES) inode->u.chunkformat = EROFS_CHUNK_FORMAT_INDEXES; - return erofs_blob_write_chunked_file(inode); + return erofs_blob_write_chunked_file(inode, fd); } if (cfg.c_compr_alg[0] && erofs_file_is_compressible(inode)) { - fd = open(inode->i_srcpath, O_RDONLY | O_BINARY); - if (fd < 0) - return -errno; ret = erofs_write_compressed_file(inode, fd); - close(fd); - if (!ret || ret != -ENOSPC) return ret; + + ret = lseek(fd, 0, SEEK_SET); + if (ret < 0) + return -errno; } /* fallback to all data uncompressed */ - fd = open(inode->i_srcpath, O_RDONLY | O_BINARY); - if (fd < 0) - return -errno; - - ret = write_uncompressed_file_from_fd(inode, fd); - close(fd); - return ret; + return write_uncompressed_file_from_fd(inode, fd); } static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh) @@ -821,7 +828,7 @@ static bool erofs_should_use_inode_extended(struct erofs_inode *inode) return false; } -static u32 erofs_new_encode_dev(dev_t dev) +u32 erofs_new_encode_dev(dev_t dev) { const unsigned int major = major(dev); const unsigned int minor = minor(dev); @@ -963,7 +970,7 @@ static int erofs_fill_inode(struct erofs_inode *inode, struct stat *st, return 0; } -static struct erofs_inode *erofs_new_inode(void) +struct erofs_inode *erofs_new_inode(void) { struct erofs_inode *inode; @@ -973,7 +980,9 @@ static struct erofs_inode *erofs_new_inode(void) inode->i_ino[0] = sbi.inos++; /* inode serial number */ inode->i_count = 1; + inode->datalayout = EROFS_INODE_FLAT_PLAIN; + init_list_head(&inode->i_hash); init_list_head(&inode->i_subdirs); init_list_head(&inode->i_xattrs); return inode; @@ -1043,6 +1052,10 @@ static int erofs_mkfs_build_tree(struct erofs_inode *dir, struct list_head *dirs struct erofs_dentry *d; unsigned int nr_subdirs, i_nlink; + ret = erofs_scan_file_xattrs(dir); + if (ret < 0) + return ret; + ret = erofs_prepare_xattr_ibody(dir); if (ret < 0) return ret; @@ -1060,8 +1073,15 @@ static int erofs_mkfs_build_tree(struct erofs_inode *dir, struct list_head *dirs } ret = erofs_write_file_from_buffer(dir, symlink); free(symlink); + } else if (dir->i_size) { + int fd = open(dir->i_srcpath, O_RDONLY | O_BINARY); + if (fd < 0) + return -errno; + + ret = erofs_write_file(dir, fd); + close(fd); } else { - ret = erofs_write_file(dir); + ret = 0; } if (ret) return ret; @@ -1284,3 +1304,83 @@ struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name) erofs_write_tail_end(inode); return inode; } + +int tarerofs_dump_tree(struct erofs_inode *dir) +{ + struct erofs_dentry *d; + unsigned int nr_subdirs; + int ret; + + if (erofs_should_use_inode_extended(dir)) { + if (cfg.c_force_inodeversion == FORCE_INODE_COMPACT) { + erofs_err("file %s cannot be in compact form", + dir->i_srcpath); + return -EINVAL; + } + dir->inode_isize = sizeof(struct erofs_inode_extended); + } else { + dir->inode_isize = sizeof(struct erofs_inode_compact); + } + + ret = erofs_prepare_xattr_ibody(dir); + if (ret < 0) + return ret; + + if (!S_ISDIR(dir->i_mode)) { + if (dir->bh) + return 0; + if (S_ISLNK(dir->i_mode)) { + ret = erofs_write_file_from_buffer(dir, dir->i_link); + free(dir->i_link); + dir->i_link = NULL; + } else if (dir->i_tmpfile) { + ret = erofs_write_file(dir, fileno(dir->i_tmpfile)); + fclose(dir->i_tmpfile); + dir->i_tmpfile = NULL; + dir->with_tmpfile = false; + } else { + ret = 0; + } + if (ret) + return ret; + ret = erofs_prepare_inode_buffer(dir); + if (ret) + return ret; + erofs_write_tail_end(dir); + return 0; + } + + nr_subdirs = 0; + list_for_each_entry(d, &dir->i_subdirs, d_child) + ++nr_subdirs; + + ret = erofs_prepare_dir_layout(dir, nr_subdirs); + if (ret) + return ret; + + ret = erofs_prepare_inode_buffer(dir); + if (ret) + return ret; + dir->bh->op = &erofs_skip_write_bhops; + + if (IS_ROOT(dir)) + erofs_fixup_meta_blkaddr(dir); + + list_for_each_entry(d, &dir->i_subdirs, d_child) { + struct erofs_inode *inode; + + if (is_dot_dotdot(d->name)) + continue; + + inode = erofs_igrab(d->inode); + ret = tarerofs_dump_tree(inode); + dir->i_nlink += (erofs_mode_to_ftype(inode->i_mode) == EROFS_FT_DIR); + erofs_iput(inode); + if (ret) + return ret; + } + erofs_write_dir_file(dir); + erofs_write_tail_end(dir); + dir->bh->op = &erofs_write_inode_bhops; + return 0; +} diff --git a/lib/tar.c b/lib/tar.c new file mode 100644 index 00000000..8edfe758 --- /dev/null +++ b/lib/tar.c @@ -0,0 +1,809 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +#include +#include +#include +#include +#ifdef HAVE_LINUX_AUFS_TYPE_H +#include +#else +#define AUFS_WH_PFX ".wh." +#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" +#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME +#endif +#include "erofs/print.h" +#include "erofs/cache.h" +#include "erofs/inode.h" +#include "erofs/list.h" +#include "erofs/tar.h" +#include "erofs/io.h" +#include "erofs/xattr.h" +#include "erofs/blobchunk.h" + +#define OVL_XATTR_NAMESPACE "overlay." +#define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE +#define OVL_XATTR_OPAQUE_POSTFIX "opaque" +#define OVL_XATTR_OPAQUE OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_OPAQUE_POSTFIX + +#define EROFS_WHITEOUT_DEV 0 + +static char erofs_libbuf[16384]; + +struct tar_header { + char name[100]; /* 0-99 */ + char mode[8]; /* 100-107 */ + char uid[8]; /* 108-115 */ + char gid[8]; /* 116-123 */ + char size[12]; /* 124-135 */ + char mtime[12]; /* 136-147 */ + char chksum[8]; /* 148-155 */ + char typeflag; /* 156-156 */ + char linkname[100]; /* 157-256 */ + char magic[6]; /* 257-262 */ + char version[2]; /* 263-264 */ + char uname[32]; /* 265-296 */ + char gname[32]; /* 297-328 */ + char devmajor[8]; /* 329-336 */ + char devminor[8]; /* 337-344 */ + char prefix[155]; /* 345-499 */ + char padding[12]; /* 500-512 (pad to exactly the 512 byte) */ +}; + +s64 erofs_read_from_fd(int fd, void *buf, u64 bytes) +{ + s64 i = 0; + + while (bytes) { + int len = bytes > INT_MAX ? INT_MAX : bytes; + int ret; + + ret = read(fd, buf + i, len); + if (ret < 1) { + if (ret == 0) { + break; + } else if (errno != EINTR) { + erofs_err("failed to read : %s\n", + strerror(errno)); + return -errno; + } + } + bytes -= ret; + i += ret; + } + return i; +} + +/* + * skip this many bytes of input. Return 0 for success, >0 means this much + * left after input skipped. + */ +u64 erofs_lskip(int fd, u64 sz) +{ + s64 cur = lseek(fd, 0, SEEK_CUR); + + if (cur >= 0) { + s64 end = lseek(fd, 0, SEEK_END) - cur; + + if (end > 0 && end < sz) + return sz - end; + + end = cur + sz; + if (end == lseek(fd, end, SEEK_SET)) + return 0; + } + + while (sz) { + int try = min_t(u64, sz, sizeof(erofs_libbuf)); + int or; + + or = read(fd, erofs_libbuf, try); + if (or <= 0) + break; + else + sz -= or; + } + return sz; +} + +static long long tarerofs_otoi(const char *ptr, int len) +{ + char inp[32]; + char *endp = inp; + long long val; + + memcpy(inp, ptr, len); + inp[len] = '\0'; + + errno = 0; + val = strtol(ptr, &endp, 8); + if ((!val && endp == inp) | + (*endp && *endp != ' ')) + errno = -EINVAL; + return val; +} + +static long long tarerofs_parsenum(const char *ptr, int len) +{ + /* + * For fields containing numbers or timestamps that are out of range + * for the basic format, the GNU format uses a base-256 representation + * instead of an ASCII octal number. + */ + if (*(char *)ptr == '\200') { + long long res = 0; + + while (--len) + res = (res << 8) + (u8)*(++ptr); + return res; + } + return tarerofs_otoi(ptr, len); +} + +int tarerofs_init_empty_dir(struct erofs_inode *inode) +{ + int ret = erofs_init_empty_dir(inode); + + if (ret) + return ret; + inode->i_nlink = 2; + return 0; +} + +static struct erofs_dentry *tarerofs_mkdir(struct erofs_inode *dir, const char *s) +{ + struct erofs_inode *inode; + struct erofs_dentry *d; + + inode = erofs_new_inode(); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + inode->i_mode = S_IFDIR | 0755; + inode->i_parent = dir; + inode->i_uid = getuid(); + inode->i_gid = getgid(); + inode->i_mtime = sbi.build_time; + inode->i_mtime_nsec = sbi.build_time_nsec; + tarerofs_init_empty_dir(inode); + + d = erofs_d_alloc(dir, s); + if (!IS_ERR(d)) { + d->type = EROFS_FT_DIR; + d->inode = inode; + } + return d; +} + +static struct erofs_dentry *tarerofs_get_dentry(struct erofs_inode *pwd, char *path, + bool aufs, bool *whout, bool *opq) +{ + struct erofs_dentry *d = NULL; + unsigned int len = strlen(path); + char *s = path; + + *whout = false; + *opq = false; + + while (s < path + len) { + char *slash = memchr(s, '/', path + len - s); + if (slash) { + if (s == slash) { + while (*++s == '/'); /* skip '//...' */ + continue; + } + *slash = '\0'; + } + + if (!memcmp(s, ".", 2)) { + /* null */ + } else if (!memcmp(s, "..", 3)) { + pwd = pwd->i_parent; + } else { + struct erofs_inode *inode = NULL; + + if (aufs && !slash) { + if (!memcmp(s, AUFS_WH_DIROPQ, sizeof(AUFS_WH_DIROPQ))) { + *opq = true; + break; + } + if (!memcmp(s, AUFS_WH_PFX, sizeof(AUFS_WH_PFX) - 1)) { + s += sizeof(AUFS_WH_PFX) - 1; + *whout = true; + } + } + + list_for_each_entry(d, &pwd->i_subdirs, d_child) { + if (!strcmp(d->name, s)) { + if (d->type != EROFS_FT_DIR && slash) + return ERR_PTR(-EIO); + inode = d->inode; + break; + } + } + + if (inode) { + pwd = inode; + } else if (!slash) { + d = erofs_d_alloc(pwd, s); + if (IS_ERR(d)) + return d; + d->type = EROFS_FT_UNKNOWN; + d->inode = pwd; + } else { + d = tarerofs_mkdir(pwd, s); + if (IS_ERR(d)) + return d; + pwd = d->inode; + } + } + if (slash) { + *slash = '/'; + s = slash + 1; + } else { + break; + } + } + return d; +} + +int tarerofs_parse_pax_header(int fd, struct erofs_pax_header *eh, u32 size) +{ + char *buf, *p; + int ret; + + buf = malloc(size); + if (!buf) + return -ENOMEM; + p = buf; + + ret = erofs_read_from_fd(fd, buf, size); + if (ret != size) + goto out; + + while (p < buf + size) { + char *kv, *value; + int len, n; + /* extended records are of the format: "LEN NAME=VALUE\n" */ + ret = sscanf(p, "%d %n", &len, &n); + if (ret < 1 || len <= n || len > buf + size - p) { + ret = -EIO; + goto out; + } + kv = p + n; + p += len; + + if (p[-1] != '\n') { + ret = -EIO; + goto out; + } + p[-1] = '\0'; + + value = memchr(kv, '=', p - kv); + if (!value) { + ret = -EIO; + goto out; + } else { + long long lln; + + value++; + + if (!strncmp(kv, "path=", sizeof("path=") - 1)) { + int j = p - 1 - value; + free(eh->path); + eh->path = strdup(value); + while (eh->path[j - 1] == '/') + eh->path[--j] = '\0'; + } else if (!strncmp(kv, "linkpath=", + sizeof("linkpath=") - 1)) { + free(eh->link); + eh->link = strdup(value); + } else if (!strncmp(kv, "mtime=", + sizeof("mtime=") - 1)) { + ret = sscanf(value, "%lld %n", &lln, &n); + if(ret < 1) { + ret = -EIO; + goto out; + } + eh->st.st_mtime = lln; + if (value[n] == '.') { + ret = sscanf(value + n + 1, "%d", &n); + if (ret < 1) { + ret = -EIO; + goto out; + } +#if ST_MTIM_NSEC + ST_MTIM_NSEC(&eh->st) = n; +#endif + } + eh->use_mtime = true; + } else if (!strncmp(kv, "size=", + sizeof("size=") - 1)) { + ret = sscanf(value, "%lld %n", &lln, &n); + if(ret < 1 || value[n] != '\0') { + ret = -EIO; + goto out; + } + eh->st.st_size = lln; + eh->use_size = true; + } else if (!strncmp(kv, "uid=", sizeof("uid=") - 1)) { + ret = sscanf(value, "%lld %n", &lln, &n); + if(ret < 1 || value[n] != '\0') { + ret = -EIO; + goto out; + } + eh->st.st_uid = lln; + eh->use_uid = true; + } else if (!strncmp(kv, "gid=", sizeof("gid=") - 1)) { + ret = sscanf(value, "%lld %n", &lln, &n); + if(ret < 1 || value[n] != '\0') { + ret = -EIO; + goto out; + } + eh->st.st_gid = lln; + eh->use_gid = true; + } else { + erofs_info("unrecognized pax keyword \"%s\", ignoring", kv); + } + } + } + ret = 0; +out: + free(buf); + return ret; +} + +int tarerofs_write_chunk_indexes(struct erofs_inode *inode, erofs_blk_t blkaddr) +{ + unsigned int chunkbits = ilog2(inode->i_size - 1) + 1; + unsigned int count, unit; + erofs_off_t chunksize, len, pos; + struct erofs_inode_chunk_index *idx; + + if (chunkbits < sbi.blkszbits) + chunkbits = sbi.blkszbits; + inode->u.chunkformat |= chunkbits - sbi.blkszbits; + inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES; + chunksize = 1ULL << chunkbits; + count = DIV_ROUND_UP(inode->i_size, chunksize); + + unit = sizeof(struct erofs_inode_chunk_index); + inode->extent_isize = count * unit; + idx = calloc(count, max(sizeof(*idx), sizeof(void *))); + if (!idx) + return -ENOMEM; + inode->chunkindexes = idx; + + for (pos = 0; pos < inode->i_size; pos += len) { + struct erofs_blobchunk *chunk; + + len = min_t(erofs_off_t, inode->i_size - pos, chunksize); + + chunk = erofs_get_unhashed_chunk(chunksize, 1, blkaddr); + if (IS_ERR(chunk)) + return PTR_ERR(chunk); + + *(void **)idx++ = chunk; + blkaddr += erofs_blknr(len); + } + inode->datalayout = EROFS_INODE_CHUNK_BASED; + return 0; +} + +void tarerofs_remove_inode(struct erofs_inode *inode) +{ + struct erofs_dentry *d; + + --inode->i_nlink; + if (!S_ISDIR(inode->i_mode)) + return; + + /* remove all subdirss */ + list_for_each_entry(d, &inode->i_subdirs, d_child) { + if (!is_dot_dotdot(d->name)) + tarerofs_remove_inode(d->inode); + erofs_iput(d->inode); + d->inode = NULL; + } + --inode->i_parent->i_nlink; +} + +int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar) +{ + char path[PATH_MAX]; + struct erofs_pax_header eh = tar->global; + bool e, whout, opq; + struct stat st; + erofs_off_t tar_offset, data_offset; + + struct tar_header th; + struct erofs_dentry *d; + struct erofs_inode *inode; + unsigned int j, csum, cksum; + int ckksum, ret, rem; + + if (eh.path) + eh.path = strdup(eh.path); + if (eh.link) + eh.link = strdup(eh.link); + +restart: + rem = tar->offset & 511; + if (rem) { + if (erofs_lskip(tar->fd, 512 - rem)) { + ret = -EIO; + goto out; + } + tar->offset += 512 - rem; + } + + tar_offset = tar->offset; + ret = erofs_read_from_fd(tar->fd, &th, sizeof(th)); + if (ret != sizeof(th)) + goto out; + tar->offset += sizeof(th); + if (*th.name == '\0') { + if (e) { /* end of tar 2 empty blocks */ + ret = 1; + goto out; + } + e = true; /* empty jump to next block */ + goto restart; + } + + if (strncmp(th.magic, "ustar", 5)) { + erofs_err("invalid tar magic @ %llu", tar_offset); + ret = -EIO; + goto out; + } + + /* chksum field itself treated as ' ' */ + csum = tarerofs_otoi(th.chksum, sizeof(th.chksum)); + if (errno) { + erofs_err("invalid chksum @ %llu", tar_offset); + ret = -EBADMSG; + goto out; + } + cksum = 0; + for (j = 0; j < 8; ++j) + cksum += (unsigned int)' '; + ckksum = cksum; + for (j = 0; j < 148; ++j) { + cksum += (unsigned int)((u8*)&th)[j]; + ckksum += (int)((char*)&th)[j]; + } + for (j = 156; j < 500; ++j) { + cksum += (unsigned int)((u8*)&th)[j]; + ckksum += (int)((char*)&th)[j]; + } + if (csum != cksum && csum != ckksum) { + erofs_err("chksum mismatch @ %llu", tar_offset); + ret = -EBADMSG; + goto out; + } + + st.st_mode = tarerofs_otoi(th.mode, sizeof(th.mode)); + if (errno) + goto invalid_tar; + + if (eh.use_uid) { + st.st_uid = eh.st.st_uid; + } else { + st.st_uid = tarerofs_parsenum(th.uid, sizeof(th.uid)); + if (errno) + goto invalid_tar; + } + + if (eh.use_gid) { + st.st_gid = eh.st.st_gid; + } else { + st.st_gid = tarerofs_parsenum(th.gid, sizeof(th.gid)); + if (errno) + goto invalid_tar; + } + + if (eh.use_size) { + st.st_size = eh.st.st_size; + } else { + st.st_size = tarerofs_parsenum(th.size, sizeof(th.size)); + if (errno) + goto invalid_tar; + } + + if (eh.use_mtime) { + st.st_mtime = eh.st.st_mtime; +#if ST_MTIM_NSEC + ST_MTIM_NSEC(&st) = ST_MTIM_NSEC(&eh.st); +#endif + } else { + st.st_mtime = tarerofs_parsenum(th.mtime, sizeof(th.mtime)); + if (errno) + goto invalid_tar; + } + + if (th.typeflag <= '7' && !eh.path) { + eh.path = path; + j = 0; + if (*th.prefix) { + memcpy(path, th.prefix, sizeof(th.prefix)); + path[sizeof(th.prefix)] = '\0'; + j = strlen(path); + if (path[j - 1] != '/') { + path[j] = '/'; + path[++j] = '\0'; + } + } + memcpy(path + j, th.name, sizeof(th.name)); + path[j + sizeof(th.name)] = '\0'; + j = strlen(path); + while (path[j - 1] == '/') + path[--j] = '\0'; + } + + data_offset = tar->offset; + tar->offset += st.st_size; + if (th.typeflag == '0' || th.typeflag == '7' || th.typeflag == '1') { + st.st_mode |= S_IFREG; + } else if (th.typeflag == '2') { + st.st_mode |= S_IFLNK; + } else if (th.typeflag == '3') { + st.st_mode |= S_IFCHR; + } else if (th.typeflag == '4') { + st.st_mode |= S_IFBLK; + } else if (th.typeflag == '5') { + st.st_mode |= S_IFDIR; + } else if (th.typeflag == '6') { + st.st_mode |= S_IFIFO; + } else if (th.typeflag == 'g') { + ret = tarerofs_parse_pax_header(tar->fd, &tar->global, st.st_size); + if (ret) + goto out; + if (tar->global.path) { + free(eh.path); + eh.path = strdup(tar->global.path); + } + if (tar->global.link) { + free(eh.link); + eh.link = strdup(tar->global.link); + } + goto restart; + } else if (th.typeflag == 'x') { + ret = tarerofs_parse_pax_header(tar->fd, &eh, st.st_size); + if (ret) + goto out; + goto restart; + } else if (th.typeflag == 'K') { + free(eh.link); + eh.link = malloc(st.st_size + 1); + if (st.st_size > PATH_MAX || st.st_size != + erofs_read_from_fd(tar->fd, eh.link, st.st_size)) + goto invalid_tar; + eh.link[st.st_size] = '\0'; + goto restart; + } else { + erofs_info("unrecognized typeflag %xh @ %llu - ignoring", + th.typeflag, tar_offset); + (void)erofs_lskip(tar->fd, st.st_size); + ret = 0; + goto out; + } + + st.st_rdev = 0; + if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) { + int major, minor; + + major = tarerofs_parsenum(th.devmajor, sizeof(th.devmajor)); + if (errno) { + erofs_err("invalid device major @ %llu", tar_offset); + goto out; + } + + minor = tarerofs_parsenum(th.devminor, sizeof(th.devminor)); + if (errno) { + erofs_err("invalid device minor @ %llu", tar_offset); + goto out; + } + + st.st_rdev = (major << 8) | (minor & 0xff) | ((minor & ~0xff) << 12); + } else if (th.typeflag == '1' || th.typeflag == '2') { + if (!eh.link) + eh.link = strndup(th.linkname, sizeof(th.linkname)); + } + + if (tar->index_mode && erofs_blkoff(tar_offset + sizeof(th))) { + erofs_err("invalid tar data alignment @ %llu", tar_offset); + ret = -EIO; + goto out; + } + + erofs_dbg("parsing %s (mode %05o)", eh.path, st.st_mode); + + d = tarerofs_get_dentry(root, eh.path, tar->aufs, &whout, &opq); + if (IS_ERR(d)) { + ret = PTR_ERR(d); + goto out; + } + + if (!d) { + /* some tarballs include '.' which indicates the root directory */ + if (!S_ISDIR(st.st_mode)) { + ret = -ENOTDIR; + goto out; + } + inode = root; + } else if (opq) { + DBG_BUGON(d->type == EROFS_FT_UNKNOWN); + DBG_BUGON(!d->inode); + ret = erofs_setxattr(d->inode, OVL_XATTR_OPAQUE, "y", 1); + goto out; + } else if (th.typeflag == '1') { /* hard link cases */ + struct erofs_dentry *d2; + bool dumb; + + if (S_ISDIR(st.st_mode)) { + ret = -EISDIR; + goto out; + } + + if (d->type != EROFS_FT_UNKNOWN) { + tarerofs_remove_inode(d->inode); + erofs_iput(d->inode); + } + d->inode = NULL; + + d2 = tarerofs_get_dentry(root, eh.link, tar->aufs, &dumb, &dumb); + if (IS_ERR(d2)) { + ret = PTR_ERR(d2); + goto out; + } + if (d2->type == EROFS_FT_UNKNOWN) { + ret = -ENOENT; + goto out; + } + if (S_ISDIR(d2->inode->i_mode)) { + ret = -EISDIR; + goto out; + } + inode = erofs_igrab(d2->inode); + d->inode = inode; + d->type = d2->type; + ++inode->i_nlink; + ret = 0; + goto out; + } else if (d->type != EROFS_FT_UNKNOWN) { + if (d->type != EROFS_FT_DIR || !S_ISDIR(st.st_mode)) { + struct erofs_inode *parent = d->inode->i_parent; + + tarerofs_remove_inode(d->inode); + erofs_iput(d->inode); + d->inode = parent; + goto new_inode; + } + inode = d->inode; + } else { +new_inode: + inode = erofs_new_inode(); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out; + } + inode->i_parent = d->inode; + d->inode = inode; + d->type = erofs_mode_to_ftype(st.st_mode); + } + + if (whout) { + inode->i_mode = (inode->i_mode & ~S_IFMT) | S_IFCHR; + inode->u.i_rdev = EROFS_WHITEOUT_DEV; + } else { + inode->i_mode = st.st_mode; + if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) + inode->u.i_rdev = erofs_new_encode_dev(st.st_rdev); + } + inode->i_srcpath = strdup(eh.path); + inode->i_uid = st.st_uid; + inode->i_gid = st.st_gid; + inode->i_size = st.st_size; + inode->i_mtime = st.st_mtime; + + if (!S_ISDIR(inode->i_mode)) { + if (S_ISLNK(inode->i_mode)) { + inode->i_size = strlen(eh.link); + inode->i_link = malloc(inode->i_size + 1); + memcpy(inode->i_link, eh.link, inode->i_size + 1); + } else if (tar->index_mode) { + ret = tarerofs_write_chunk_indexes(inode, + erofs_blknr(data_offset)); + if (ret) + goto out; + if (erofs_lskip(tar->fd, inode->i_size)) { + erofs_iput(inode); + ret = -EIO; + goto out; + } + } else { + char buf[65536]; + + if (!inode->i_tmpfile) { + inode->i_tmpfile = tmpfile(); + + if (!inode->i_tmpfile) { + erofs_iput(inode); + ret = -ENOSPC; + goto out; + } + } + + for (j = inode->i_size; j; ) { + rem = min_t(int, sizeof(buf), j); + + if (erofs_read_from_fd(tar->fd, buf, rem) != rem || + fwrite(buf, rem, 1, inode->i_tmpfile) != 1) { + erofs_iput(inode); + ret = -EIO; + goto out; + } + j -= rem; + } + fseek(inode->i_tmpfile, 0, SEEK_SET); + inode->with_tmpfile = true; + } + inode->i_nlink++; + ret = 0; + } else if (!inode->i_nlink) + ret = tarerofs_init_empty_dir(inode); + else + ret = 0; +out: + if (eh.path != path) + free(eh.path); + free(eh.link); + return ret; + +invalid_tar: + erofs_err("invalid tar @ %llu", tar_offset); + ret = -EIO; + goto out; +} + +static struct erofs_buffer_head *bh_devt; + +int tarerofs_reserve_devtable(unsigned int devices) +{ + if (!devices) + return 0; + + bh_devt = erofs_balloc(DEVT, + sizeof(struct erofs_deviceslot) * devices, 0, 0); + if (IS_ERR(bh_devt)) + return PTR_ERR(bh_devt); + + erofs_mapbh(bh_devt->block); + bh_devt->op = &erofs_skip_write_bhops; + sbi.devt_slotoff = erofs_btell(bh_devt, false) / EROFS_DEVT_SLOT_SIZE; + sbi.extra_devices = devices; + erofs_sb_set_device_table(); + return 0; +} + +int tarerofs_write_devtable(struct erofs_tarfile *tar) +{ + erofs_off_t pos_out; + unsigned int i; + + if (!sbi.extra_devices) + return 0; + pos_out = erofs_btell(bh_devt, false); + for (i = 0; i < sbi.extra_devices; ++i) { + struct erofs_deviceslot dis = { + .blocks = erofs_blknr(tar->offset), + }; + int ret; + + ret = dev_write(&dis, pos_out, sizeof(dis)); + if (ret) + return ret; + pos_out += sizeof(dis); + } + bh_devt->op = &erofs_drop_directly_bhops; + erofs_bdrop(bh_devt, false); + return 0; +} diff --git a/lib/xattr.c b/lib/xattr.c index 7d7dc54c..87a95c7c 100644 --- a/lib/xattr.c +++ b/lib/xattr.c @@ -403,6 +403,38 @@ static int read_xattrs_from_file(const char *path, mode_t mode, return ret; } +int erofs_setxattr(struct erofs_inode *inode, char *key, + const void *value, size_t size) +{ + char *kvbuf; + unsigned int len[2]; + struct xattr_item *item; + u8 prefix; + u16 prefixlen; + + if (!match_prefix(key, &prefix, &prefixlen)) + return -ENODATA; + + len[1] = size; + /* allocate key-value buffer */ + len[0] = strlen(key) - prefixlen; + + kvbuf = malloc(len[0] + len[1]); + if (!kvbuf) + return -ENOMEM; + + memcpy(kvbuf, key + prefixlen, len[0]); + memcpy(kvbuf + len[0], value, size); + + item = get_xattritem(prefix, kvbuf, len); + if (IS_ERR(item)) + return PTR_ERR(item); + if (!item) + return 0; + + return erofs_xattr_add(&inode->i_xattrs, item); +} + #ifdef WITH_ANDROID static int erofs_droid_xattr_set_caps(struct erofs_inode *inode) { @@ -445,10 +477,9 @@ static int erofs_droid_xattr_set_caps(struct erofs_inode *inode) } #endif -int erofs_prepare_xattr_ibody(struct erofs_inode *inode) +int erofs_scan_file_xattrs(struct erofs_inode *inode) { int ret; - struct inode_xattr_node *node; struct list_head *ixattrs = &inode->i_xattrs; /* check if xattr is disabled */ @@ -459,9 +490,14 @@ int erofs_prepare_xattr_ibody(struct erofs_inode *inode) if (ret < 0) return ret; - ret = erofs_droid_xattr_set_caps(inode); - if (ret < 0) - return ret; + return erofs_droid_xattr_set_caps(inode); +} + +int erofs_prepare_xattr_ibody(struct erofs_inode *inode) +{ + int ret; + struct inode_xattr_node *node; + struct list_head *ixattrs = &inode->i_xattrs; if (list_empty(ixattrs)) return 0; diff --git a/mkfs/main.c b/mkfs/main.c index 438bab82..7369b908 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -16,6 +16,7 @@ #include "erofs/print.h" #include "erofs/cache.h" #include "erofs/inode.h" +#include "erofs/tar.h" #include "erofs/io.h" #include "erofs/compress.h" #include "erofs/dedupe.h" @@ -53,6 +54,8 @@ static struct option long_options[] = { {"preserve-mtime", no_argument, NULL, 15}, {"uid-offset", required_argument, NULL, 16}, {"gid-offset", required_argument, NULL, 17}, + {"tar", optional_argument, NULL, 20}, + {"aufs", no_argument, NULL, 21}, {"mount-point", required_argument, NULL, 512}, {"xattr-prefix", required_argument, NULL, 19}, #ifdef WITH_ANDROID @@ -107,6 +110,8 @@ static void usage(void) " --ignore-mtime use build time instead of strict per-file modification time\n" " --max-extent-bytes=# set maximum decompressed extent size # in bytes\n" " --preserve-mtime keep per-file modification time strictly\n" + " --aufs replace aufs special files with overlayfs metadata\n" + " --tar=[fi] generate an image from tarball(s)\n" " --quiet quiet execution (do not write anything to standard output.)\n" #ifndef NDEBUG " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n" @@ -125,6 +130,8 @@ static void usage(void) } static unsigned int pclustersize_packed, pclustersize_max; +static struct erofs_tarfile erofstar; +static bool tar_mode; static int parse_extended_opts(const char *opts) { @@ -475,6 +482,15 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) } cfg.c_extra_ea_name_prefixes = true; break; + case 20: + if (optarg && (!strcmp(optarg, "i") || + !strcmp(optarg, "0"))) + erofstar.index_mode = true; + tar_mode = true; + break; + case 21: + erofstar.aufs = true; + break; case 1: usage(); exit(0); @@ -506,20 +522,24 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) return -ENOMEM; if (optind >= argc) { - erofs_err("missing argument: DIRECTORY"); - return -EINVAL; - } - - cfg.c_src_path = realpath(argv[optind++], NULL); - if (!cfg.c_src_path) { - erofs_err("failed to parse source directory: %s", - erofs_strerror(-errno)); - return -ENOENT; - } + if (!tar_mode) { + erofs_err("missing argument: DIRECTORY"); + return -EINVAL; + } else { + erofstar.fd = STDIN_FILENO; + } + }else { + cfg.c_src_path = realpath(argv[optind++], NULL); + if (!cfg.c_src_path) { + erofs_err("failed to parse source directory: %s", + erofs_strerror(-errno)); + return -ENOENT; + } - if (optind < argc) { - erofs_err("unexpected argument: %s\n", argv[optind]); - return -EINVAL; + if (optind < argc) { + erofs_err("unexpected argument: %s\n", argv[optind]); + return -EINVAL; + } } if (quiet) { cfg.c_dbg_lvl = EROFS_ERR; @@ -734,14 +754,24 @@ int main(int argc, char **argv) return 1; } - err = lstat(cfg.c_src_path, &st); - if (err) - return 1; - if (!S_ISDIR(st.st_mode)) { - erofs_err("root of the filesystem is not a directory - %s", - cfg.c_src_path); - usage(); - return 1; + if (!tar_mode) { + err = lstat(cfg.c_src_path, &st); + if (err) + return 1; + if (!S_ISDIR(st.st_mode)) { + erofs_err("root of the filesystem is not a directory - %s", + cfg.c_src_path); + usage(); + return 1; + } + erofs_set_fs_root(cfg.c_src_path); + } else if (cfg.c_src_path) { + erofstar.fd = open(cfg.c_src_path, O_RDONLY); + if (erofstar.fd < 0) { + erofs_err("failed to open file: %s", cfg.c_src_path); + usage(); + return 1; + } } if (cfg.c_unix_timestamp != -1) { @@ -792,11 +822,13 @@ int main(int argc, char **argv) } if (cfg.c_dedupe) erofs_warn("EXPERIMENTAL data deduplication feature in use. Use at your own risk!"); - erofs_set_fs_root(cfg.c_src_path); + #ifndef NDEBUG if (cfg.c_random_pclusterblks) srand(time(NULL)); #endif + if (tar_mode && erofstar.index_mode) + sbi.blkszbits = 9; sb_bh = erofs_buffer_init(); if (IS_ERR(sb_bh)) { err = PTR_ERR(sb_bh); @@ -852,7 +884,10 @@ int main(int argc, char **argv) return 1; } - err = erofs_generate_devtable(); + if (tar_mode && erofstar.index_mode) + err = tarerofs_reserve_devtable(1); + else + err = erofs_generate_devtable(); if (err) { erofs_err("failed to generate device table: %s", erofs_strerror(err)); @@ -863,25 +898,52 @@ int main(int argc, char **argv) erofs_inode_manager_init(); - err = erofs_build_shared_xattrs_from_path(cfg.c_src_path); - if (err) { - erofs_err("failed to build shared xattrs: %s", - erofs_strerror(err)); - goto exit; - } - - root_inode = erofs_mkfs_build_tree_from_path(cfg.c_src_path); - if (IS_ERR(root_inode)) { - err = PTR_ERR(root_inode); - goto exit; - } - if (cfg.c_extra_ea_name_prefixes) erofs_xattr_write_name_prefixes(packedfile); + if (!tar_mode) { + err = erofs_build_shared_xattrs_from_path(cfg.c_src_path); + if (err) { + erofs_err("failed to build shared xattrs: %s", + erofs_strerror(err)); + goto exit; + } + + if (cfg.c_extra_ea_name_prefixes) + erofs_xattr_write_name_prefixes(packedfile); + + root_inode = erofs_mkfs_build_tree_from_path(cfg.c_src_path); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); + goto exit; + } + } else { + root_inode = erofs_new_inode(); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); + goto exit; + } + root_inode->i_srcpath = strdup("/"); + root_inode->i_mode = S_IFDIR | 0777; + root_inode->i_parent = root_inode; + root_inode->i_mtime = sbi.build_time; + root_inode->i_mtime_nsec = sbi.build_time_nsec; + tarerofs_init_empty_dir(root_inode); + + while (!(err = tarerofs_parse_tar(root_inode, &erofstar))); + + if (err < 0) + goto exit; + + err = tarerofs_dump_tree(root_inode); + if (err < 0) + goto exit; + } root_nid = erofs_lookupnid(root_inode); erofs_iput(root_inode); + if (tar_mode) + tarerofs_write_devtable(&erofstar); if (cfg.c_chunkbits) { erofs_info("total metadata: %u blocks", erofs_mapbh(NULL)); err = erofs_blob_remap(); From 142e0da828723705893cd7321ddea4ee77ca377a Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 17 Jul 2023 15:35:31 +0800 Subject: [PATCH 6/9] erofs-utils: lib: support GNUTYPE_LONGNAME for tarerofs The 'L' entry is present in a header for a series of 1 or more 512-byte tar blocks that hold just the filename for a file or directory with a name over 100 chars. Following that series is another header block, in the traditional form: A header with type '0' (regular file) or '5' (directory), followed by the appropriate number of data blocks with the entry data. In the header for this series, the name will be truncated to the 1st 100 characters of the actual name. Tested-by: Jingbo Xu Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20230717073531.43203-1-hsiangkao@linux.alibaba.com --- lib/tar.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/tar.c b/lib/tar.c index 8edfe758..b62e562b 100644 --- a/lib/tar.c +++ b/lib/tar.c @@ -570,6 +570,14 @@ int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar) if (ret) goto out; goto restart; + } else if (th.typeflag == 'L') { + free(eh.path); + eh.path = malloc(st.st_size + 1); + if (st.st_size != erofs_read_from_fd(tar->fd, eh.path, + st.st_size)) + goto invalid_tar; + eh.path[st.st_size] = '\0'; + goto restart; } else if (th.typeflag == 'K') { free(eh.link); eh.link = malloc(st.st_size + 1); From de182b7e65ccc309d2eff081a7a351f81d57e883 Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Tue, 18 Jul 2023 13:20:59 +0800 Subject: [PATCH 7/9] erofs-utils: lib: fix erofs_iterate_dir() recursion ctx->dir may have changed when ctx is reused along erofs_iterate_dir() recursion. Signed-off-by: Jingbo Xu Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20230718052101.124039-3-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- lib/dir.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/dir.c b/lib/dir.c index abbf27a3..6758b8df 100644 --- a/lib/dir.c +++ b/lib/dir.c @@ -9,6 +9,7 @@ static int traverse_dirents(struct erofs_dir_context *ctx, unsigned int next_nameoff, unsigned int maxsize, bool fsck) { + struct erofs_inode *dir = ctx->dir; struct erofs_dirent *de = dentry_blk; const struct erofs_dirent *end = dentry_blk + next_nameoff; const char *prev_name = NULL; @@ -76,7 +77,7 @@ static int traverse_dirents(struct erofs_dir_context *ctx, goto out; } ctx->flags |= EROFS_READDIR_DOTDOT_FOUND; - if (sbi.root_nid == ctx->dir->nid) { + if (sbi.root_nid == dir->nid) { ctx->pnid = sbi.root_nid; ctx->flags |= EROFS_READDIR_VALID_PNID; } @@ -95,7 +96,7 @@ static int traverse_dirents(struct erofs_dir_context *ctx, } ctx->flags |= EROFS_READDIR_DOT_FOUND; - if (fsck && ctx->de_nid != ctx->dir->nid) { + if (fsck && ctx->de_nid != dir->nid) { errmsg = "corrupted `.' dirent"; goto out; } @@ -115,7 +116,7 @@ static int traverse_dirents(struct erofs_dir_context *ctx, out: if (ret && !silent) erofs_err("%s @ nid %llu, lblk %u, index %lu", - errmsg, ctx->dir->nid | 0ULL, lblk, + errmsg, dir->nid | 0ULL, lblk, (de - (struct erofs_dirent *)dentry_blk) | 0UL); return ret; } From 2187bea8da717ade9ae1cefa65aa656f331686e4 Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Tue, 18 Jul 2023 13:21:00 +0800 Subject: [PATCH 8/9] erofs-utils: lib: inline vle_compressmeta_capacity() The helper is not quite useful. Signed-off-by: Jingbo Xu Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20230718052101.124039-4-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- lib/compress.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/lib/compress.c b/lib/compress.c index 6fb63cb9..a8713225 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -49,14 +49,6 @@ struct z_erofs_vle_compress_ctx { #define Z_EROFS_LEGACY_MAP_HEADER_SIZE Z_EROFS_FULL_INDEX_ALIGN(0) -static unsigned int vle_compressmeta_capacity(erofs_off_t filesize) -{ - const unsigned int indexsize = BLK_ROUND_UP(filesize) * - sizeof(struct z_erofs_lcluster_index); - - return Z_EROFS_LEGACY_MAP_HEADER_SIZE + indexsize; -} - static void z_erofs_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx) { const unsigned int type = Z_EROFS_LCLUSTER_TYPE_PLAIN; @@ -843,7 +835,9 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd) erofs_blk_t blkaddr, compressed_blocks; unsigned int legacymetasize; int ret; - u8 *compressmeta = malloc(vle_compressmeta_capacity(inode->i_size)); + u8 *compressmeta = malloc(BLK_ROUND_UP(inode->i_size) * + sizeof(struct z_erofs_lcluster_index) + + Z_EROFS_LEGACY_MAP_HEADER_SIZE); if (!compressmeta) return -ENOMEM; From fe6de7dd6659f12a1ecd523a35858e8162dfd8a0 Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Wed, 19 Jul 2023 15:33:18 +0800 Subject: [PATCH 9/9] erofs-utils: simplify iloc() Let's follow kernel commit b780d3fc6107 ("erofs: simplify iloc()") by passing in inodes directly to clean up all callers. Also rename iloc() as erofs_iloc(). Signed-off-by: Jingbo Xu Link: https://lore.kernel.org/r/20230719073319.27996-2-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- fsck/main.c | 2 +- include/erofs/internal.h | 10 +++++----- lib/data.c | 4 ++-- lib/namei.c | 2 +- lib/xattr.c | 8 ++++---- lib/zmap.c | 6 +++--- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fsck/main.c b/fsck/main.c index 608635eb..aabfda4d 100644 --- a/fsck/main.c +++ b/fsck/main.c @@ -325,7 +325,7 @@ static int erofs_verify_xattr(struct erofs_inode *inode) } } - addr = iloc(inode->nid) + inode->inode_isize; + addr = erofs_iloc(inode) + inode->inode_isize; ret = dev_read(0, buf, addr, xattr_hdr_size); if (ret < 0) { erofs_err("failed to read xattr header @ nid %llu: %d", diff --git a/include/erofs/internal.h b/include/erofs/internal.h index 46690f51..93e3a0ba 100644 --- a/include/erofs/internal.h +++ b/include/erofs/internal.h @@ -103,11 +103,6 @@ struct erofs_sb_info { /* make sure that any user of the erofs headers has atleast 64bit off_t type */ extern int erofs_assert_largefile[sizeof(off_t)-8]; -static inline erofs_off_t iloc(erofs_nid_t nid) -{ - return erofs_pos(sbi.meta_blkaddr) + (nid << sbi.islotbits); -} - #define EROFS_FEATURE_FUNCS(name, compat, feature) \ static inline bool erofs_sb_has_##name(void) \ { \ @@ -219,6 +214,11 @@ struct erofs_inode { unsigned int fragment_size; }; +static inline erofs_off_t erofs_iloc(struct erofs_inode *inode) +{ + return erofs_pos(sbi.meta_blkaddr) + (inode->nid << sbi.islotbits); +} + static inline bool is_inode_layout_compression(struct erofs_inode *inode) { return erofs_inode_is_data_compressed(inode->datalayout); diff --git a/lib/data.c b/lib/data.c index 612112a9..86e28d9f 100644 --- a/lib/data.c +++ b/lib/data.c @@ -33,7 +33,7 @@ static int erofs_map_blocks_flatmode(struct erofs_inode *inode, map->m_plen = erofs_pos(lastblk) - offset; } else if (tailendpacking) { /* 2 - inode inline B: inode, [xattrs], inline last blk... */ - map->m_pa = iloc(vi->nid) + vi->inode_isize + + map->m_pa = erofs_iloc(vi) + vi->inode_isize + vi->xattr_isize + erofs_blkoff(map->m_la); map->m_plen = inode->i_size - offset; @@ -89,7 +89,7 @@ int erofs_map_blocks(struct erofs_inode *inode, unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */ chunknr = map->m_la >> vi->u.chunkbits; - pos = roundup(iloc(vi->nid) + vi->inode_isize + + pos = roundup(erofs_iloc(vi) + vi->inode_isize + vi->xattr_isize, unit) + unit * chunknr; err = blk_read(0, buf, erofs_blknr(pos), 1); diff --git a/lib/namei.c b/lib/namei.c index 37517419..423c1ddc 100644 --- a/lib/namei.c +++ b/lib/namei.c @@ -28,7 +28,7 @@ int erofs_read_inode_from_disk(struct erofs_inode *vi) char buf[sizeof(struct erofs_inode_extended)]; struct erofs_inode_compact *dic; struct erofs_inode_extended *die; - const erofs_off_t inode_loc = iloc(vi->nid); + const erofs_off_t inode_loc = erofs_iloc(vi); ret = dev_read(0, buf, inode_loc, sizeof(*dic)); if (ret < 0) diff --git a/lib/xattr.c b/lib/xattr.c index 87a95c7c..9e839353 100644 --- a/lib/xattr.c +++ b/lib/xattr.c @@ -871,8 +871,8 @@ static int init_inode_xattrs(struct erofs_inode *vi) return -ENOATTR; } - it.blkaddr = erofs_blknr(iloc(vi->nid) + vi->inode_isize); - it.ofs = erofs_blkoff(iloc(vi->nid) + vi->inode_isize); + it.blkaddr = erofs_blknr(erofs_iloc(vi) + vi->inode_isize); + it.ofs = erofs_blkoff(erofs_iloc(vi) + vi->inode_isize); ret = blk_read(0, it.page, it.blkaddr, 1); if (ret < 0) @@ -962,8 +962,8 @@ static int inline_xattr_iter_pre(struct xattr_iter *it, inline_xattr_ofs = vi->inode_isize + xattr_header_sz; - it->blkaddr = erofs_blknr(iloc(vi->nid) + inline_xattr_ofs); - it->ofs = erofs_blkoff(iloc(vi->nid) + inline_xattr_ofs); + it->blkaddr = erofs_blknr(erofs_iloc(vi) + inline_xattr_ofs); + it->ofs = erofs_blkoff(erofs_iloc(vi) + inline_xattr_ofs); ret = blk_read(0, it->page, it->blkaddr, 1); if (ret < 0) diff --git a/lib/zmap.c b/lib/zmap.c index 209b5d78..7428d115 100644 --- a/lib/zmap.c +++ b/lib/zmap.c @@ -39,7 +39,7 @@ static int z_erofs_fill_inode_lazy(struct erofs_inode *vi) if (vi->flags & EROFS_I_Z_INITED) return 0; - pos = round_up(iloc(vi->nid) + vi->inode_isize + vi->xattr_isize, 8); + pos = round_up(erofs_iloc(vi) + vi->inode_isize + vi->xattr_isize, 8); ret = dev_read(0, buf, pos, sizeof(buf)); if (ret < 0) return -EIO; @@ -143,7 +143,7 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, unsigned long lcn) { struct erofs_inode *const vi = m->inode; - const erofs_off_t ibase = iloc(vi->nid); + const erofs_off_t ibase = erofs_iloc(vi); const erofs_off_t pos = Z_EROFS_FULL_INDEX_ALIGN(ibase + vi->inode_isize + vi->xattr_isize) + lcn * sizeof(struct z_erofs_lcluster_index); @@ -342,7 +342,7 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, { struct erofs_inode *const vi = m->inode; const unsigned int lclusterbits = vi->z_logical_clusterbits; - const erofs_off_t ebase = round_up(iloc(vi->nid) + vi->inode_isize + + const erofs_off_t ebase = round_up(erofs_iloc(vi) + vi->inode_isize + vi->xattr_isize, 8) + sizeof(struct z_erofs_map_header); const unsigned int totalidx = BLK_ROUND_UP(vi->i_size);