From a0285bb8933d562cc913ee77a92ccc19faae4c41 Mon Sep 17 00:00:00 2001 From: "Ariel Shaqed (Scolnicov)" Date: Sun, 10 Mar 2024 14:47:34 +0200 Subject: [PATCH 1/4] Count KV retries not tries in Graveler Metric `graveler_kv_retries` currently counts "tries", i.e. at least one for each try. So this includes the actual operations rate: if lakeFS performs 100 ops/sec, then it will increment graveler_kv_retries by _at least_ 100 / sec. That defeats using this metric to detect when KV retries impact performance. Instead count from 0, i.e. count "_retries_". This is changelog-worthy because people may be tracking this. But given that it's an internal metric, and that we rarely examine it, this is not a breaking change. --- pkg/graveler/graveler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/graveler/graveler.go b/pkg/graveler/graveler.go index 15ce1e5e934..dec7929cd2f 100644 --- a/pkg/graveler/graveler.go +++ b/pkg/graveler/graveler.go @@ -1291,7 +1291,7 @@ func (g *Graveler) UpdateBranch(ctx context.Context, repository *RepositoryRecor // monitorRetries reports the number of retries of operation while updating // branchID of repository. func (g *Graveler) monitorRetries(ctx context.Context, tries int, repositoryID RepositoryID, branchID BranchID, operation string) { - kvRetriesCounter.WithLabelValues(operation).Add(float64(tries)) + kvRetriesCounter.WithLabelValues(operation).Add(float64(tries - 1)) l := g.log(ctx).WithFields(logging.Fields{ "tries": tries, "repository": repositoryID, From 092081f682406a67efa63a91bca3256a8b12c269 Mon Sep 17 00:00:00 2001 From: "Ariel Shaqed (Scolnicov)" Date: Sun, 10 Mar 2024 18:26:15 +0200 Subject: [PATCH 2/4] [CR] monitorRetries should take a "retries" parameter --- pkg/graveler/graveler.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pkg/graveler/graveler.go b/pkg/graveler/graveler.go index dec7929cd2f..0875cebf3e8 100644 --- a/pkg/graveler/graveler.go +++ b/pkg/graveler/graveler.go @@ -1289,9 +1289,10 @@ func (g *Graveler) UpdateBranch(ctx context.Context, repository *RepositoryRecor } // monitorRetries reports the number of retries of operation while updating -// branchID of repository. -func (g *Graveler) monitorRetries(ctx context.Context, tries int, repositoryID RepositoryID, branchID BranchID, operation string) { - kvRetriesCounter.WithLabelValues(operation).Add(float64(tries - 1)) +// branchID of repository. Parameter retries should be 0 if the operation +// succeeded on the very first attempt. +func (g *Graveler) monitorRetries(ctx context.Context, retries int, repositoryID RepositoryID, branchID BranchID, operation string) { + kvRetriesCounter.WithLabelValues(operation).Add(float64(tries)) l := g.log(ctx).WithFields(logging.Fields{ "tries": tries, "repository": repositoryID, @@ -1744,11 +1745,7 @@ func (g *Graveler) safeBranchWrite(ctx context.Context, log logging.Logger, repo branchPreOp *Branch ) defer func() { - retries := try - if retries > options.MaxTries { - retries = options.MaxTries - } - g.monitorRetries(ctx, retries, repository.RepositoryID, branchID, operation) + g.monitorRetries(ctx, try-1, repository.RepositoryID, branchID, operation) }() for try = 1; try <= options.MaxTries; try++ { @@ -2128,7 +2125,7 @@ func (g *Graveler) CreateCommitRecord(ctx context.Context, repository *Repositor // between 1 and BranchUpdateMaxTries. func (g *Graveler) retryBranchUpdate(ctx context.Context, repository *RepositoryRecord, branchID BranchID, f BranchUpdateFunc, operation string) error { tries := 0 - defer g.monitorRetries(ctx, tries, repository.RepositoryID, branchID, operation) + defer g.monitorRetries(ctx, tries-1, repository.RepositoryID, branchID, operation) err := backoff.Retry(func() error { // TODO(eden) issue 3586 - if the branch commit id hasn't changed, update the fields instead of fail tries += 1 From 88449508b46c2809d89af9bdaaf36e34a27b7312 Mon Sep 17 00:00:00 2001 From: "Ariel Shaqed (Scolnicov)" Date: Sun, 10 Mar 2024 22:06:10 +0200 Subject: [PATCH 3/4] [CR] Fix silly bugs in previous commit That's not how you subtract 1. --- pkg/graveler/graveler.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pkg/graveler/graveler.go b/pkg/graveler/graveler.go index 0875cebf3e8..7968c3f5176 100644 --- a/pkg/graveler/graveler.go +++ b/pkg/graveler/graveler.go @@ -1292,14 +1292,14 @@ func (g *Graveler) UpdateBranch(ctx context.Context, repository *RepositoryRecor // branchID of repository. Parameter retries should be 0 if the operation // succeeded on the very first attempt. func (g *Graveler) monitorRetries(ctx context.Context, retries int, repositoryID RepositoryID, branchID BranchID, operation string) { - kvRetriesCounter.WithLabelValues(operation).Add(float64(tries)) + kvRetriesCounter.WithLabelValues(operation).Add(float64(retries)) l := g.log(ctx).WithFields(logging.Fields{ - "tries": tries, + "tries": retries, "repository": repositoryID, "branch": branchID, "operation": operation, }) - if tries > 1 { + if retries > 1 { l.Info("Operation retried") } else { l.Trace("No retries needed") @@ -2125,7 +2125,9 @@ func (g *Graveler) CreateCommitRecord(ctx context.Context, repository *Repositor // between 1 and BranchUpdateMaxTries. func (g *Graveler) retryBranchUpdate(ctx context.Context, repository *RepositoryRecord, branchID BranchID, f BranchUpdateFunc, operation string) error { tries := 0 - defer g.monitorRetries(ctx, tries-1, repository.RepositoryID, branchID, operation) + defer func() { + g.monitorRetries(ctx, tries-1, repository.RepositoryID, branchID, operation) + }() err := backoff.Retry(func() error { // TODO(eden) issue 3586 - if the branch commit id hasn't changed, update the fields instead of fail tries += 1 From 51f8b8c9359aefa74467ab9d5579a47532c9195b Mon Sep 17 00:00:00 2001 From: "Ariel Shaqed (Scolnicov)" Date: Mon, 11 Mar 2024 12:39:41 +0200 Subject: [PATCH 4/4] [CR] Correctly name "retries" field --- pkg/graveler/graveler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/graveler/graveler.go b/pkg/graveler/graveler.go index 7968c3f5176..5ce40e9e2dc 100644 --- a/pkg/graveler/graveler.go +++ b/pkg/graveler/graveler.go @@ -1294,7 +1294,7 @@ func (g *Graveler) UpdateBranch(ctx context.Context, repository *RepositoryRecor func (g *Graveler) monitorRetries(ctx context.Context, retries int, repositoryID RepositoryID, branchID BranchID, operation string) { kvRetriesCounter.WithLabelValues(operation).Add(float64(retries)) l := g.log(ctx).WithFields(logging.Fields{ - "tries": retries, + "retries": retries, "repository": repositoryID, "branch": branchID, "operation": operation,