Skip to content

Commit 329b8ca

Browse files
craig[bot]Renato Costa
andcommitted
Merge #127170
127170: server: do not initialize cluster version to minimum supported r=stevendanna a=renatolabs Previously, shared-process tenants would initialize their view of the cluster version early and set it to the `MinSupportedVersion` as a "safe" option. However, that setting can likely be wrong, especially now that we plan to support skip-version upgrades. In this commit, we remove that code and rely on the initialization that happens from storage in `preStart`. This means that we now expose a non-negligible amount of code to a state where we don't have an initialized cluster version. However, having that error out is better than have code assume a potentially wrong version. A similar discussion and approach happened for separate process tenants a while ago: see #100684 and #104859. Fixes: #126754 Release note: None Co-authored-by: Renato Costa <renato@cockroachlabs.com>
2 parents 558771f + e36acf8 commit 329b8ca

File tree

2 files changed

+41
-25
lines changed

2 files changed

+41
-25
lines changed

pkg/server/server_controller_new_server.go

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"path/filepath"
1919

2020
"github.com/cockroachdb/cockroach/pkg/base"
21-
"github.com/cockroachdb/cockroach/pkg/clusterversion"
2221
"github.com/cockroachdb/cockroach/pkg/multitenant/mtinfopb"
2322
"github.com/cockroachdb/cockroach/pkg/roachpb"
2423
"github.com/cockroachdb/cockroach/pkg/security/clientsecopts"
@@ -189,21 +188,6 @@ func makeSharedProcessTenantServerConfig(
189188
stopper *stop.Stopper,
190189
nodeMetricsRecorder *status.MetricsRecorder,
191190
) (baseCfg BaseConfig, sqlCfg SQLConfig, err error) {
192-
// We need a value in the version setting prior to the update
193-
// coming from the system.settings table. This value must be valid
194-
// and compatible with the state of the tenant's keyspace.
195-
//
196-
// Since we don't know at which binary version the tenant
197-
// keyspace was initialized, we must be conservative and
198-
// assume it was created a long time ago; and that we may
199-
// have to run all known migrations since then. So initialize
200-
// the version setting to the minimum supported version.
201-
if err := clusterversion.Initialize(
202-
ctx, st.Version.MinSupportedVersion(), &st.SV,
203-
); err != nil {
204-
return BaseConfig{}, SQLConfig{}, err
205-
}
206-
207191
tr := tracing.NewTracerWithOpt(ctx, tracing.WithClusterSettings(&st.SV))
208192

209193
// Define a tenant store. This will be used to write the

pkg/server/server_sql.go

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1472,16 +1472,48 @@ func (s *SQLServer) preStart(
14721472
// NB: In the context of the system tenant, we may not have
14731473
// the version setting in SQL yet even though the in memory
14741474
// setting has been initialized in.
1475-
currentVersion := s.execCfg.Settings.Version.ActiveVersionOrEmpty(ctx).Version
1476-
if currentVersion.Equal(roachpb.Version{}) {
1477-
if err := s.execCfg.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) (err error) {
1478-
v, err := s.settingsWatcher.GetClusterVersionFromStorage(ctx, txn)
1479-
if err != nil {
1480-
return err
1475+
initializeClusterVersion := func(ctx context.Context) error {
1476+
currentVersion := s.execCfg.Settings.Version.ActiveVersionOrEmpty(ctx).Version
1477+
if currentVersion.Equal(roachpb.Version{}) {
1478+
if err := s.execCfg.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) (err error) {
1479+
v, err := s.settingsWatcher.GetClusterVersionFromStorage(ctx, txn)
1480+
if err != nil {
1481+
return err
1482+
}
1483+
return clusterversion.Initialize(ctx, v.Version, &s.execCfg.Settings.SV)
1484+
}); err != nil {
1485+
return errors.Wrap(err, "initializing cluster version")
14811486
}
1482-
return clusterversion.Initialize(ctx, v.Version, &s.execCfg.Settings.SV)
1483-
}); err != nil {
1484-
return errors.Wrap(err, "initializing cluster version")
1487+
}
1488+
1489+
return nil
1490+
}
1491+
1492+
if s.execCfg.Codec.ForSystemTenant() {
1493+
if err := initializeClusterVersion(ctx); err != nil {
1494+
return err
1495+
}
1496+
} else {
1497+
// When initializing the cluster version for tenants, we retry if
1498+
// the error indicates that the KV authorizer hasn't seen our
1499+
// service-mode transition. This is possible because both the
1500+
// service controller and the authorizer receive tenant mode
1501+
// updates asynchronously via rangefeeds.
1502+
opts := retry.Options{MaxRetries: 5}
1503+
var nonRetryableErr error
1504+
err := opts.Do(ctx, func(ctx context.Context) error {
1505+
if err := initializeClusterVersion(ctx); err != nil {
1506+
if strings.Contains(err.Error(), `operation not allowed when in service mode "none"`) {
1507+
return err
1508+
}
1509+
nonRetryableErr = err
1510+
}
1511+
1512+
return nil
1513+
})
1514+
1515+
if err != nil || nonRetryableErr != nil {
1516+
return errors.CombineErrors(err, nonRetryableErr)
14851517
}
14861518
}
14871519

0 commit comments

Comments
 (0)