Merge #50053 #51567 #52754 #53023 #53049 #53132

50053: Script for the PublishRelease TC build configuration r=pbardea a=jlinder Before: the script wasn't implemented. Now: Part of the new release process, this script - tags the selected SHA to the provided name - compiles the binaries and archive and uploads them to S3 as the versioned name - uploads the docker image to docker.io/cockroachdb/cockroach - pushes the tag to github.com/cockroachdb/cockroach - push all the artificats to their respective `latest` locations as appropriate Release note: None 51567: kvserver: Allow rebalances between stores on the same nodes. r=lunevalex a=lunevalex Closes #6782 This change modifies the replica_queue to allow rebalances between multiple stores within a single node. This is possible because we previously introduced atomic rebalances in #12768. The first step was to remove the constraints in the allocator that prevented same node rebalances and update the validation in the replica_queue to accept these rebalance proposals. There is one caveat that with 1x replication an atomic rebalance is not possible, so we now support adding multiple replicas of the range to the same node under this condition. With the constraints removed there would be nothing in the allocator to prevent it from placing multiple replicas of a range on the same node across multiple stores. This is not desired because in a simple 3x replication scenario a single node crash may result in a whole range becoming unavailable. Allocator uses locality tags to model failure domains, but a node was not considered to be a locality. It is thus natural to extend the failure domain definition to the node and model it as a locality tier. Now stores on the same node would be factored into the diversity_score and repel each other, just like nodes in the same datacenter do in a multi-region setup. Release note (performance improvement): This change removes the last roadblock to running CockroachDB with multiple stores (i.e. disks) per node. The allocation algorithm now supports intra-node rebalances, which means CRDB can fully utilize the additional stores on the same node. 52754: importccl: speed up revert of IMPORT INTO empty table r=dt a=dt When IMPORT INTO fails, it reverts the tables to their pre-IMPORT state. Typically this requires running a somewhat expensive RevertRange operation that finds the keys written by the IMPORT in amongst all the table data and deletes just those keys. This is somewhat expensive -- we need to iterate the keys in the target table and check them to see if they need to be reverted. Non-INTO style IMPORTs create the table into which they will IMPORT and thus can just drop it wholesale on failure, instead of doing this expensive revert. However INTO-style IMPORTs could use a similarly fast/cheap wholesale delete *if they knew the table was empty* when the IMPORT was started. This change tracks which tables were empty when the IMPORT started and then deletes, rather than reverts, the table span on failure. Release note (performance improvement): Cleaning up after a failure during IMPORT INTO a table which was empty is now faster. 53023: opt: add index acceleration support for ~ and && bounding box operators r=rytaft a=rytaft This commit adds index acceleration support for the bounding box comparison operators, `~` and `&&`. It maps `~` to Covers and `&&` to Intersects. Release note (performance improvement): The ~ and && geospatial bounding box operations can now benefit from index acceleration if one of the operands is an indexed geometry column. 53049: bulkio: Fix transaction semantics in job scheduler. r=miretskiy a=miretskiy Fixes #53033 Fixes #52959 Use transaction when querying for the schedules to run. In addition, ensure that a single bad schedule does not cause all of the previous work to be wasted by using transaction savepoints. Release Notes: None 53132: sql/opt: add implicit SELECT FOR UPDATE support for UPSERT statements r=nvanbenschoten a=nvanbenschoten Fixes #50180. This commit adds support for implicit SELECT FOR UPDATE support for UPSERT statements with a VALUES clause. This should improve throughput and latency for contended UPSERT statements in much the same way that 435fa43 did for UPDATE statements. However, this only has an effect on UPSERT statements into tables with multiple indexes because UPSERT statements into single-index tables hit a fast-path where they perform a blind-write without doing an initial row scan. Conceptually, if we picture an UPSERT statement as the composition of a SELECT statement and an INSERT statement (with loosened semantics around existing rows) then this change performs the following transformation: ``` UPSERT t = SELECT FROM t + INSERT INTO t => UPSERT t = SELECT FROM t FOR UPDATE + INSERT INTO t ``` I plan to test this out on a contended `indexes` workload at some point in the future. Release note (sql change): UPSERT statements now acquire locks using the FOR UPDATE locking mode during their initial row scan, which improves performance for contended workloads when UPSERTing into tables with multiple indexes. This behavior is configurable using the enable_implicit_select_for_update session variable and the sql.defaults.implicit_select_for_update.enabled cluster setting. Co-authored-by: James H. Linder <jamesl@cockroachlabs.com> Co-authored-by: Alex Lunev <alexl@cockroachlabs.com> Co-authored-by: David Taylor <tinystatemachine@gmail.com> Co-authored-by: Rebecca Taft <becca@cockroachlabs.com> Co-authored-by: Yevgeniy Miretskiy <yevgeniy@cockroachlabs.com> Co-authored-by: Nathan VanBenschoten <nvanbenschoten@gmail.com>
cockroachdb · Aug 20, 2020 · 979127c · 979127c
7 parents 1f49884 + d73710d + ed34965 + d8ac1f1 + 19b1480 + aab9608 + d67890c
commit 979127c
Show file tree

Hide file tree

Showing 50 changed files with 2,394 additions and 770 deletions.
diff --git a/build/release/teamcity-publish-release.sh b/build/release/teamcity-publish-release.sh
@@ -1,4 +1,153 @@
 #!/usr/bin/env bash
 
-echo "Implement me!"
+set -euxo pipefail
 
+source "$(dirname "${0}")/teamcity-support.sh"
+
+
+if [[ -n "${PUBLISH_LATEST}" && -n "$PRE_RELEASE" ]]; then
+  echo "Invalid parameter combination: PUBLISH_LATEST and PRE_RELEASE can't both be set."
+  exit 6
+fi
+
+
+tc_start_block "Variable Setup"
+export BUILDER_HIDE_GOPATH_SRC=1
+
+# Matching the version name regex from within the cockroach code except
+# for the `metadata` part at the end because Docker tags don't support
+# `+` in the tag name.
+# https://github.com/cockroachdb/cockroach/blob/4c6864b44b9044874488cfedee3a31e6b23a6790/pkg/util/version/version.go#L75
+build_name="$(echo "${NAME}" | grep -E -o '^v(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-[-.0-9A-Za-z]+)?$')"
+#                                         ^major           ^minor           ^patch         ^preRelease
+
+if [[ -z "$build_name" ]] ; then
+    echo "Invalid NAME \"${NAME}\". Must be of the format \"vMAJOR.MINOR.PATCH(-PRERELEASE)?\"."
+    exit 1
+fi
+
+release_branch=$(echo ${build_name} | grep -E -o '^v[0-9]+\.[0-9]+')
+
+if [[ -z "${DRY_RUN}" ]] ; then
+  bucket="${BUCKET:-binaries.cockroachdb.com}"
+  google_credentials="$GOOGLE_COCKROACH_CLOUD_IMAGES_CREDENTIALS"
+  dockerhub_repository="docker.io/cockroachdb/cockroach"
+  gcr_repository="us.gcr.io/cockroach-cloud-images/cockroach"
+  s3_download_hostname="${bucket}"
+  git_repo_for_tag="cockroachdb/cockroach"
+else
+  bucket="${BUCKET:-cockroach-builds-test}"
+  google_credentials="$GOOGLE_COCKROACH_RELEASE_CREDENTIALS"
+  dockerhub_repository="docker.io/cockroachdb/cockroach-misc"
+  gcr_repository="us.gcr.io/cockroach-release/cockroach-test"
+  s3_download_hostname="${bucket}.s3.amazonaws.com"
+  git_repo_for_tag="cockroachlabs/release-staging"
+  if [[ -z "$(echo ${build_name} | grep -E -o '^v[0-9]+\.[0-9]+\.[0-9]+$')" ]] ; then
+    # Using `.` to match how we usually format the pre-release portion of the
+    # version string using '.' separators.
+    # ex: v20.2.0-rc.2.dryrun
+    build_name="${build_name}.dryrun"
+  else
+    # Using `-` to put dryrun in the pre-release portion of the version string.
+    # ex: v20.2.0-dryrun
+    build_name="${build_name}-dryrun"
+  fi
+fi
+
+# Used for docker login for gcloud
+gcr_hostname="us.gcr.io"
+
+tc_end_block "Variable Setup"
+
+
+tc_start_block "Tag the release"
+git tag "${build_name}"
+tc_end_block "Tag the release"
+
+
+tc_start_block "Compile publish-artifacts"
+build/builder.sh go install ./pkg/cmd/publish-artifacts
+tc_end_block "Compile publish-artifacts"
+
+
+tc_start_block "Compile publish-provisional-artifacts"
+build/builder.sh go install ./pkg/cmd/publish-provisional-artifacts
+tc_end_block "Compile publish-provisional-artifacts"
+
+
+tc_start_block "Make and publish release S3 artifacts"
+build/builder.sh env \
+  AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \
+  AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \
+  TC_BUILD_BRANCH="$build_name" \
+  publish-artifacts -release -bucket "$bucket"
+tc_end_block "Make and publish release S3 artifacts"
+
+
+tc_start_block "Make and push docker images"
+configure_docker_creds
+docker_login_with_google
+docker_login
+
+# TODO: update publish-artifacts with option to leave one or more cockroach binaries in the local filesystem
+curl -f -s -S -o- "https://${s3_download_hostname}/cockroach-${build_name}.linux-amd64.tgz" | tar ixfz - --strip-components 1
+cp cockroach build/deploy
+
+docker build --no-cache --tag=${dockerhub_repository}:{"$build_name",latest,latest-"${release_branch}"} --tag=${gcr_repository}:${build_name} build/deploy
+
+docker push "${dockerhub_repository}:${build_name}"
+docker push "${gcr_repository}:${build_name}"
+tc_end_block "Make and push docker images"
+
+
+tc_start_block "Push release tag to GitHub"
+github_ssh_key="${GITHUB_COCKROACH_TEAMCITY_PRIVATE_SSH_KEY}"
+configure_git_ssh_key
+push_to_git "ssh://git@github.com/${git_repo_for_tag}.git" "$build_name"
+tc_end_block "Push release tag to GitHub"
+
+
+tc_start_block "Publish S3 binaries and archive as latest-RELEASE_BRANCH"
+# example: v20.1-latest
+if [[ -z "$PRE_RELEASE" ]]; then
+  #TODO: implement me!
+  echo "Pushing latest-RELEASE_BRANCH S3 binaries and archive is not implemented."
+else
+  echo "Pushing latest-RELEASE_BRANCH S3 binaries and archive is not implemented."
+fi
+tc_end_block "Publish S3 binaries and archive as latest-RELEASE_BRANCH"
+
+
+tc_start_block "Publish S3 binaries and archive as latest"
+# Only push the "latest" for our most recent release branch.
+# https://github.com/cockroachdb/cockroach/issues/41067
+if [[ -n "${PUBLISH_LATEST}" && -z "${PRE_RELEASE}" ]]; then
+  build/builder.sh env \
+    AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \
+    AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \
+    TC_BUILD_BRANCH="$build_name" \
+    publish-provisional-artifacts -bless -release -bucket "${bucket}"
+else
+  echo "The latest S3 binaries and archive were _not_ updated."
+fi
+tc_end_block "Publish S3 binaries and archive as latest"
+
+
+tc_start_block "Tag docker image as latest-RELEASE_BRANCH"
+if [[ -z "$PRE_RELEASE" ]]; then
+  docker push "${dockerhub_repository}:latest-${release_branch}"
+else
+  echo "The ${dockerhub_repository}:latest-${release_branch} docker image tag was _not_ pushed."
+fi
+tc_end_block "Tag docker image as latest-RELEASE_BRANCH"
+
+
+tc_start_block "Tag docker image as latest"
+# Only push the "latest" tag for our most recent release branch.
+# https://github.com/cockroachdb/cockroach/issues/41067
+if [[ -n "${PUBLISH_LATEST}" && -z "$PRE_RELEASE" ]]; then
+  docker push "${dockerhub_repository}:latest"
+else
+  echo "The ${dockerhub_repository}:latest docker image tag was _not_ pushed."
+fi
+tc_end_block "Tag docker image as latest"
diff --git a/build/release/teamcity-support.sh b/build/release/teamcity-support.sh
@@ -29,7 +29,7 @@ docker_login_with_google() {
 }
 
 docker_login() {
-  echo "${DOCKER_AUTH}" | docker login --username "${DOCKER_ID}" --password-stdin
+  echo "${DOCKER_ACCESS_TOKEN}" | docker login --username "${DOCKER_ID}" --password-stdin
 }
 
 configure_docker_creds() {

diff --git a/pkg/ccl/importccl/import_stmt.go b/pkg/ccl/importccl/import_stmt.go
@@ -34,6 +34,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv"
 	"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
 	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
+	"github.com/cockroachdb/cockroach/pkg/sql/gcjob"
 	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
 	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
 	"github.com/cockroachdb/cockroach/pkg/sql/privilege"
@@ -1206,12 +1207,30 @@ func (r *importResumer) Resume(
 	if details.Walltime == 0 {
 		// TODO(dt): update job status to mention waiting for tables to go offline.
 		for _, i := range details.Tables {
-			if _, err := p.ExecCfg().LeaseManager.WaitForOneVersion(ctx, i.Desc.ID, retry.Options{}); err != nil {
-				return err
+			if !i.IsNew {
+				if _, err := p.ExecCfg().LeaseManager.WaitForOneVersion(ctx, i.Desc.ID, retry.Options{}); err != nil {
+					return err
+				}
 			}
 		}
 
+		// Now that we know all the tables are offline, pick a walltime at which we
+		// will write.
 		details.Walltime = p.ExecCfg().Clock.Now().WallTime
+
+		// Check if the tables being imported into are starting empty, in which
+		// case we can cheaply clear-range instead of revert-range to cleanup.
+		for i := range details.Tables {
+			if !details.Tables[i].IsNew {
+				tblSpan := sqlbase.NewImmutableTableDescriptor(*details.Tables[i].Desc).TableSpan(keys.TODOSQLCodec)
+				res, err := p.ExecCfg().DB.Scan(ctx, tblSpan.Key, tblSpan.EndKey, 1 /* maxRows */)
+				if err != nil {
+					return errors.Wrap(err, "checking if existing table is empty")
+				}
+				details.Tables[i].WasEmpty = len(res) == 0
+			}
+		}
+
 		if err := r.job.WithTxn(nil).SetDetails(ctx, details); err != nil {
 			return err
 		}
@@ -1432,9 +1451,14 @@ func (r *importResumer) dropTables(
 	}
 
 	var revert []*sqlbase.ImmutableTableDescriptor
+	var empty []*sqlbase.ImmutableTableDescriptor
 	for _, tbl := range details.Tables {
 		if !tbl.IsNew {
-			revert = append(revert, sqlbase.NewImmutableTableDescriptor(*tbl.Desc))
+			if tbl.WasEmpty {
+				empty = append(empty, sqlbase.NewImmutableTableDescriptor(*tbl.Desc))
+			} else {
+				revert = append(revert, sqlbase.NewImmutableTableDescriptor(*tbl.Desc))
+			}
 		}
 	}
 
@@ -1457,6 +1481,12 @@ func (r *importResumer) dropTables(
 		}
 	}
 
+	for i := range empty {
+		if err := gcjob.ClearTableData(ctx, execCfg.DB, execCfg.DistSender, execCfg.Codec, empty[i]); err != nil {
+			return errors.Wrapf(err, "clearing data for table %d", empty[i].ID)
+		}
+	}
+
 	b := txn.NewBatch()
 	dropTime := int64(1)
 	tablesToGC := make([]descpb.ID, 0, len(details.Tables))

diff --git a/pkg/jobs/job_scheduler.go b/pkg/jobs/job_scheduler.go
@@ -72,7 +72,7 @@ const allSchedules = 0
 // scheduled jobs that should be started.
 func getFindSchedulesStatement(env scheduledjobs.JobSchedulerEnv, maxSchedules int64) string {
 	limitClause := ""
-	if maxSchedules > 0 {
+	if maxSchedules != allSchedules {
 		limitClause = fmt.Sprintf("LIMIT %d", maxSchedules)
 	}
 
@@ -236,8 +236,10 @@ func (s *jobScheduler) executeSchedules(
 	defer stats.updateMetrics(&s.metrics)
 
 	findSchedulesStmt := getFindSchedulesStatement(s.env, maxSchedules)
-	rows, cols, err := s.InternalExecutor.QueryWithCols(ctx, "find-scheduled-jobs", nil,
-		sqlbase.InternalExecutorSessionDataOverride{User: security.RootUser},
+	rows, cols, err := s.InternalExecutor.QueryWithCols(
+		ctx, "find-scheduled-jobs",
+		txn,
+		sqlbase.InternalExecutorSessionDataOverride{User: security.NodeUser},
 		findSchedulesStmt)
 
 	if err != nil {
@@ -252,8 +254,20 @@ func (s *jobScheduler) executeSchedules(
 			continue
 		}
 
+		sp, err := txn.CreateSavepoint(ctx)
+		if err != nil {
+			return err
+		}
+
 		if err := s.processSchedule(ctx, schedule, numRunning, stats, txn); err != nil {
-			// We don't know if txn is good at this point, so bail out.
+			log.Errorf(ctx, "error processing schedule %d: %+v", schedule.ScheduleID(), err)
+
+			if err := txn.RollbackToSavepoint(ctx, sp); err != nil {
+				return errors.Wrapf(err, "failed to rollback savepoint for schedule %d", schedule.ScheduleID())
+			}
+		}
+
+		if err := txn.ReleaseSavepoint(ctx, sp); err != nil {
 			return err
 		}
 	}