From 5e1a193ed7203b3355c3f93ce15cbb9f0c963c58 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Wed, 21 Sep 2022 10:23:19 +0200 Subject: [PATCH 1/5] Use tokenizer to read packages index --- storage/index.go | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/storage/index.go b/storage/index.go index c5b84f1b9..55e211e01 100644 --- a/storage/index.go +++ b/storage/index.go @@ -38,9 +38,46 @@ func loadSearchIndexAll(ctx context.Context, storageClient *storage.Client, buck defer objectReader.Close() var sia searchIndexAll - err = json.NewDecoder(objectReader).Decode(&sia) - if err != nil { - return nil, errors.Wrapf(err, "can't decode the index file (path: %s)", rootedIndexStoragePath) + dec := json.NewDecoder(objectReader) + + for dec.More() { + // Read everything till the "packages" key in the map. + token, err := dec.Token() + if err != nil { + return nil, errors.Wrapf(err, "unexpected error while reading index file") + } + if key, ok := token.(string); !ok || key != "packages" { + continue + } + + // Read the opening array now. + token, err = dec.Token() + if err != nil { + return nil, errors.Wrapf(err, "unexpected error while reading index file") + } + if delim, ok := token.(json.Delim); !ok || delim != '[' { + return nil, errors.Errorf("expected opening array, found %v", token) + } + + // Read the array of packages one by one. + for dec.More() { + var p packageIndex + err = dec.Decode(&p) + if err != nil { + return nil, errors.Wrapf(err, "unexpected error parsing package from index file (token: %v)", token) + } + sia.Packages = append(sia.Packages, p) + } + + // Read the closing array delimiter. + token, err = dec.Token() + if err != nil { + return nil, errors.Wrapf(err, "unexpected error while reading index file") + } + // We only want an array now. + if delim, ok := token.(json.Delim); !ok || delim != ']' { + return nil, errors.Errorf("expected closing array, found %v", token) + } } return &sia, nil } From 97fcf38b0f005b6834e8645facd2fefff971e568 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Wed, 21 Sep 2022 10:31:34 +0200 Subject: [PATCH 2/5] Benchmark init --- storage/fakestorage.go | 14 +++++++------- storage/indexer_test.go | 13 +++++++++++++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/storage/fakestorage.go b/storage/fakestorage.go index afaae29a4..d5345bede 100644 --- a/storage/fakestorage.go +++ b/storage/fakestorage.go @@ -21,12 +21,12 @@ var FakeIndexerOptions = IndexerOptions{ WatchInterval: 0, } -func PrepareFakeServer(t *testing.T, indexPath string) *fakestorage.Server { +func PrepareFakeServer(tb testing.TB, indexPath string) *fakestorage.Server { indexContent, err := ioutil.ReadFile(indexPath) - require.NoError(t, err, "index file must be populated") + require.NoError(tb, err, "index file must be populated") const firstRevision = "1" - serverObjects := prepareServerObjects(t, firstRevision, indexContent) + serverObjects := prepareServerObjects(tb, firstRevision, indexContent) return fakestorage.NewServer(serverObjects) } @@ -41,11 +41,11 @@ func updateFakeServer(t *testing.T, server *fakestorage.Server, revision, indexP } } -func prepareServerObjects(t *testing.T, revision string, indexContent []byte) []fakestorage.Object { +func prepareServerObjects(tb testing.TB, revision string, indexContent []byte) []fakestorage.Object { var index searchIndexAll err := json.Unmarshal(indexContent, &index) - require.NoError(t, err, "index file must be valid") - require.NotEmpty(t, index.Packages, "index file must contain some package entries") + require.NoError(tb, err, "index file must be valid") + require.NotEmpty(tb, index.Packages, "index file must contain some package entries") var serverObjects []fakestorage.Object // Add cursor and index file @@ -61,6 +61,6 @@ func prepareServerObjects(t *testing.T, revision string, indexContent []byte) [] }, Content: indexContent, }) - t.Logf("Prepared %d packages with total %d server objects.", len(index.Packages), len(serverObjects)) + tb.Logf("Prepared %d packages with total %d server objects.", len(index.Packages), len(serverObjects)) return serverObjects } diff --git a/storage/indexer_test.go b/storage/indexer_test.go index 61fe8a569..5ec86f078 100644 --- a/storage/indexer_test.go +++ b/storage/indexer_test.go @@ -27,6 +27,19 @@ func TestInit(t *testing.T) { require.NoError(t, err) } +func BenchmarkInit(b *testing.B) { + // given + fs := PrepareFakeServer(b, "testdata/search-index-all-full.json") + defer fs.Stop() + storageClient := fs.Client() + + for i := 0; i < b.N; i++ { + indexer := NewIndexer(storageClient, FakeIndexerOptions) + err := indexer.Init(context.Background()) + require.NoError(b, err) + } +} + func TestGet_ListAllPackages(t *testing.T) { // given fs := PrepareFakeServer(t, "testdata/search-index-all-full.json") From 4de26a4af837b808eb987053fe41bcc0c1d4304a Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Wed, 21 Sep 2022 10:35:36 +0200 Subject: [PATCH 3/5] Add TODO --- storage/index.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/index.go b/storage/index.go index 55e211e01..143a39813 100644 --- a/storage/index.go +++ b/storage/index.go @@ -66,6 +66,7 @@ func loadSearchIndexAll(ctx context.Context, storageClient *storage.Client, buck if err != nil { return nil, errors.Wrapf(err, "unexpected error parsing package from index file (token: %v)", token) } + // TODO: Apply transforms for package here and directly build the `packages.Packages` array? sia.Packages = append(sia.Packages, p) } @@ -74,7 +75,6 @@ func loadSearchIndexAll(ctx context.Context, storageClient *storage.Client, buck if err != nil { return nil, errors.Wrapf(err, "unexpected error while reading index file") } - // We only want an array now. if delim, ok := token.(json.Delim); !ok || delim != ']' { return nil, errors.Errorf("expected closing array, found %v", token) } From 889b38c94eedecb38afc6fb30c1bb1f4cd2dafeb Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Wed, 21 Sep 2022 11:05:05 +0200 Subject: [PATCH 4/5] Comments --- storage/index.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/storage/index.go b/storage/index.go index 143a39813..c0f879677 100644 --- a/storage/index.go +++ b/storage/index.go @@ -37,9 +37,14 @@ func loadSearchIndexAll(ctx context.Context, storageClient *storage.Client, buck } defer objectReader.Close() + // Using a decoder here as tokenizer to parse the list of packages as a stream + // instead of needing the whole document in memory at the same time. This helps + // reducing memory usage. + // Using `Unmarshal(doc, &sia)` would require to read the whole document. + // Using `dec.Decode(&sia)` would also make the decoder to keep the whole document + // in memory. var sia searchIndexAll dec := json.NewDecoder(objectReader) - for dec.More() { // Read everything till the "packages" key in the map. token, err := dec.Token() @@ -66,7 +71,7 @@ func loadSearchIndexAll(ctx context.Context, storageClient *storage.Client, buck if err != nil { return nil, errors.Wrapf(err, "unexpected error parsing package from index file (token: %v)", token) } - // TODO: Apply transforms for package here and directly build the `packages.Packages` array? + // TODO: Apply transforms for package here and directly build the `packages.Packages` array. sia.Packages = append(sia.Packages, p) } From c89fcf7b1b02e5101f149e04f40074d55e75523a Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Wed, 21 Sep 2022 11:30:47 +0200 Subject: [PATCH 5/5] Add changelog entry --- CHANGELOG.md | 2 ++ storage/index.go | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2d6d3a3f..6d4995272 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Bugfixes +* Reduce peak memory footprint of recycling indices from storage. [#881](https://github.com/elastic/package-registry/pull/881) + ### Added ### Deprecated diff --git a/storage/index.go b/storage/index.go index c0f879677..d5fec160b 100644 --- a/storage/index.go +++ b/storage/index.go @@ -43,6 +43,8 @@ func loadSearchIndexAll(ctx context.Context, storageClient *storage.Client, buck // Using `Unmarshal(doc, &sia)` would require to read the whole document. // Using `dec.Decode(&sia)` would also make the decoder to keep the whole document // in memory. + // `jsoniter` seemed to be slightly faster, but to use more memory for our use case, + // and we are looking to optimize for memory use. var sia searchIndexAll dec := json.NewDecoder(objectReader) for dec.More() { @@ -71,7 +73,6 @@ func loadSearchIndexAll(ctx context.Context, storageClient *storage.Client, buck if err != nil { return nil, errors.Wrapf(err, "unexpected error parsing package from index file (token: %v)", token) } - // TODO: Apply transforms for package here and directly build the `packages.Packages` array. sia.Packages = append(sia.Packages, p) }