From 7ac8b10344491ad05174cf8bd5186c04f2850d68 Mon Sep 17 00:00:00 2001 From: Alex Sharov Date: Sat, 27 Apr 2024 12:10:44 +0700 Subject: [PATCH] downloader: docs on MMAP for data-files r/w and experiments with bufio (#10074) Pros: - it allows to not pre-alloc files: https://github.com/ledgerwatch/erigon/issues/8688 - it allows to not "sig-bus" when no space left on disk (return user-friendly error). see: https://github.com/ledgerwatch/erigon/issues/8500 - but DB will be MMAP anyway and may get "sig-bus" FYI: - seems no perf difference (but i tested only on cloud drives) - erigon will anyway open it as mmap Cons: - i did implemented `fsync` for mmap ( https://github.com/anacrolix/torrent/pull/755 ) - probably will need implement it for bufio: https://github.com/anacrolix/torrent/pull/937 - no zero-copy: more `alloc` memory will be holded by APP (PageCache starvation). I see 2x mem usage (at `--torrent.download.slots=500` 20gb vs 40gb) - i see "10K threads exchaused" error earlier (on `--torrent.download.slots=500`). - what else? --- .../downloader/{downloader.go => diag_downloader.go} | 0 erigon-lib/downloader/downloader.go | 11 +++++++++++ 2 files changed, 11 insertions(+) rename cmd/diag/downloader/{downloader.go => diag_downloader.go} (100%) diff --git a/cmd/diag/downloader/downloader.go b/cmd/diag/downloader/diag_downloader.go similarity index 100% rename from cmd/diag/downloader/downloader.go rename to cmd/diag/downloader/diag_downloader.go diff --git a/erigon-lib/downloader/downloader.go b/erigon-lib/downloader/downloader.go index b70af3c1304..cd5c8f80d2b 100644 --- a/erigon-lib/downloader/downloader.go +++ b/erigon-lib/downloader/downloader.go @@ -2568,7 +2568,18 @@ func openClient(ctx context.Context, dbDir, snapDir string, cfg *torrent.ClientC if err != nil { return nil, nil, nil, nil, fmt.Errorf("torrentcfg.NewMdbxPieceCompletion: %w", err) } + + //Reasons why using MMAP instead of files-API: + // - i see "10K threads exchaused" error earlier (on `--torrent.download.slots=500` and `pd-ssd`) + // - "sig-bus" at disk-full - may happen anyway, because DB is mmap + // - MMAP - means less GC pressure, more zero-copy + // - MMAP files are pre-allocated - which is not cool, but: 1. we can live with it 2. maybe can just resize MMAP in future + // See also: https://github.com/ledgerwatch/erigon/pull/10074 m = storage.NewMMapWithCompletion(snapDir, c) + //m = storage.NewFileOpts(storage.NewFileClientOpts{ + // ClientBaseDir: snapDir, + // PieceCompletion: c, + //}) cfg.DefaultStorage = m torrentClient, err = torrent.NewClient(cfg)