Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Commit

Permalink
Order all bulk modifications to code intelligence tables. (#22932)
Browse files Browse the repository at this point in the history
  • Loading branch information
efritz authored Jul 16, 2021
1 parent 50ec75d commit f1fef7f
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 58 deletions.
39 changes: 20 additions & 19 deletions enterprise/internal/codeintel/stores/dbstore/dumps.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ func (s *Store) FindClosestDumps(ctx context.Context, repositoryID int, commit,

const findClosestDumpsQuery = `
-- source: enterprise/internal/codeintel/stores/dbstore/dumps.go:FindClosestDumps
WITH visible_uploads AS (%s)
WITH
visible_uploads AS (%s)
SELECT
u.id,
u.commit,
Expand Down Expand Up @@ -245,7 +246,8 @@ func (s *Store) FindClosestDumpsFromGraphFragment(ctx context.Context, repositor

const findClosestDumpsFromGraphFragmentCommitGraphQuery = `
-- source: enterprise/internal/codeintel/stores/dbstore/dumps.go:FindClosestDumpsFromGraphFragment
WITH visible_uploads AS (%s)
WITH
visible_uploads AS (%s)
SELECT
vu.upload_id,
encode(vu.commit_bytea, 'hex'),
Expand Down Expand Up @@ -379,26 +381,25 @@ func (s *Store) DeleteOverlappingDumps(ctx context.Context, repositoryID int, co

const deleteOverlappingDumpsQuery = `
-- source: enterprise/internal/codeintel/stores/dbstore/dumps.go:DeleteOverlappingDumps
WITH overlapping_dumps AS (
SELECT id
FROM lsif_uploads
WITH
candidates AS (
SELECT u.id
FROM lsif_uploads u
WHERE
state = 'completed' AND
repository_id = %s AND
commit = %s AND
root = %s AND
indexer = %s
-- Lock these rows in a deterministic order before the update
-- below. If we don't do this then we run into a pretty high
-- deadlock rate during upload processing as multiple workers
-- issue commands for the same set of records, but upload locks
-- records nondeterministically.
ORDER BY id FOR UPDATE
u.state = 'completed' AND
u.repository_id = %s AND
u.commit = %s AND
u.root = %s AND
u.indexer = %s
-- Lock these rows in a deterministic order so that we don't
-- deadlock with other processes updating the lsif_uploads table.
ORDER BY u.id FOR UPDATE
),
updated AS (
UPDATE lsif_uploads SET state = 'deleted'
WHERE id IN (SELECT id FROM overlapping_dumps)
UPDATE lsif_uploads u
SET state = 'deleted'
WHERE id IN (SELECT id FROM candidates)
RETURNING 1
)
SELECT COUNT(*) FROM updated
Expand Down
39 changes: 28 additions & 11 deletions enterprise/internal/codeintel/stores/dbstore/indexes.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,17 +499,23 @@ func (s *Store) DeleteIndexesWithoutRepository(ctx context.Context, now time.Tim

const deleteIndexesWithoutRepositoryQuery = `
-- source: enterprise/internal/codeintel/stores/dbstore/indexes.go:DeleteIndexesWithoutRepository
WITH deleted_repos AS (
SELECT r.id AS id FROM repo r
WHERE
%s - r.deleted_at >= %s * interval '1 second' AND
EXISTS (SELECT 1 from lsif_indexes u WHERE u.repository_id = r.id)
WITH
candidates AS (
SELECT u.id
FROM repo r
JOIN lsif_indexes u ON u.repository_id = r.id
WHERE %s - r.deleted_at >= %s * interval '1 second'
-- Lock these rows in a deterministic order so that we don't
-- deadlock with other processes updating the lsif_indexes table.
ORDER BY u.id FOR UPDATE
),
deleted_uploads AS (
DELETE FROM lsif_indexes u WHERE repository_id IN (SELECT id FROM deleted_repos)
deleted AS (
DELETE FROM lsif_indexes u
WHERE id IN (SELECT id FROM candidates)
RETURNING u.id, u.repository_id
)
SELECT d.repository_id, COUNT(*) FROM deleted_uploads d GROUP BY d.repository_id
SELECT d.repository_id, COUNT(*) FROM deleted d GROUP BY d.repository_id
`

// DeleteOldIndexes deletes indexes older than the given age.
Expand Down Expand Up @@ -543,9 +549,20 @@ func (s *Store) DeleteOldIndexes(ctx context.Context, maxAge time.Duration, now

const deleteOldIndexesQuery = `
-- source: enterprise/internal/codeintel/stores/dbstore/indexes.go:DeleteOldIndexes
WITH deleted_indexes AS (
DELETE FROM lsif_indexes u WHERE %s - u.queued_at > (%s || ' second')::interval
WITH
candidates AS (
SELECT u.id
FROM lsif_indexes u
WHERE %s - u.queued_at > (%s || ' second')::interval
-- Lock these rows in a deterministic order so that we don't
-- deadlock with other processes updating the lsif_indexes table.
ORDER BY u.id FOR UPDATE
),
deleted AS (
DELETE FROM lsif_indexes u
WHERE id IN (SELECT id FROM candidates)
RETURNING u.id, u.repository_id
)
SELECT d.repository_id, COUNT(*) FROM deleted_indexes d GROUP BY d.repository_id
SELECT d.repository_id, COUNT(*) FROM deleted d GROUP BY d.repository_id
`
39 changes: 34 additions & 5 deletions enterprise/internal/codeintel/stores/dbstore/janitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ func (s *Store) StaleSourcedCommits(ctx context.Context, minimumTimeSinceLastChe

const staleSourcedCommitsQuery = `
-- source: enterprise/internal/codeintel/stores/dbstore/janitor.go:StaleSourcedCommits
WITH candidates AS (%s UNION %s)
WITH
candidates AS (%s UNION %s)
SELECT r.id, r.name, c.commit
FROM candidates c
JOIN repo r ON r.id = c.repository_id
Expand Down Expand Up @@ -141,8 +142,8 @@ func (s *Store) RefreshCommitResolvability(ctx context.Context, repositoryID int

rows, err := s.Query(ctx, sqlf.Sprintf(
refreshCommitResolvabilityQuery,
assignmentExpression, repositoryID, commit,
assignmentExpression, repositoryID, commit,
repositoryID, commit, assignmentExpression,
repositoryID, commit, assignmentExpression,
))
if err != nil {
return 0, 0, err
Expand All @@ -167,8 +168,36 @@ func (s *Store) RefreshCommitResolvability(ctx context.Context, repositoryID int
const refreshCommitResolvabilityQuery = `
-- source: enterprise/internal/codeintel/stores/dbstore/janitor.go:RefreshCommitResolvability
WITH
update_uploads AS (UPDATE lsif_uploads SET %s WHERE repository_id = %s AND commit = %s RETURNING 1),
update_indexes AS (UPDATE lsif_indexes SET %s WHERE repository_id = %s AND commit = %s RETURNING 1)
candidate_uploads AS (
SELECT u.id
FROM lsif_uploads u
WHERE u.repository_id = %s AND u.commit = %s
-- Lock these rows in a deterministic order so that we don't
-- deadlock with other processes updating the lsif_uploads table.
ORDER BY u.id FOR UPDATE
),
update_uploads AS (
UPDATE lsif_uploads u
SET %s
WHERE id IN (SELECT id FROM candidate_uploads)
RETURNING 1
),
candidate_indexes AS (
SELECT u.id
FROM lsif_indexes u
WHERE u.repository_id = %s AND u.commit = %s
-- Lock these rows in a deterministic order so that we don't
-- deadlock with other processes updating the lsif_indexes table.
ORDER BY u.id FOR UPDATE
),
update_indexes AS (
UPDATE lsif_indexes u
SET %s
WHERE id IN (SELECT id FROM candidate_indexes)
RETURNING 1
)
SELECT
(SELECT COUNT(*) FROM update_uploads) AS num_uploads,
(SELECT COUNT(*) FROM update_indexes) AS num_indexes
Expand Down
66 changes: 44 additions & 22 deletions enterprise/internal/codeintel/stores/dbstore/uploads.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package dbstore
import (
"context"
"database/sql"
"sort"
"strconv"
"strings"
"time"
Expand Down Expand Up @@ -257,11 +258,21 @@ func (s *Store) DeleteUploadsStuckUploading(ctx context.Context, uploadedBefore

const deleteUploadsStuckUploadingQuery = `
-- source: enterprise/internal/codeintel/stores/dbstore/uploads.go:DeleteUploadsStuckUploading
WITH deleted AS (
UPDATE lsif_uploads
WITH
candidates AS (
SELECT u.id
FROM lsif_uploads u
WHERE u.state = 'uploading' AND u.uploaded_at < %s
-- Lock these rows in a deterministic order so that we don't
-- deadlock with other processes updating the lsif_uploads table.
ORDER BY u.id FOR UPDATE
),
deleted AS (
UPDATE lsif_uploads u
SET state = 'deleted'
WHERE state = 'uploading' AND uploaded_at < %s
RETURNING repository_id
WHERE id IN (SELECT id FROM candidates)
RETURNING u.repository_id
)
SELECT count(*) FROM deleted
`
Expand Down Expand Up @@ -604,19 +615,24 @@ func (s *Store) DeleteUploadsWithoutRepository(ctx context.Context, now time.Tim

const deleteUploadsWithoutRepositoryQuery = `
-- source: enterprise/internal/codeintel/stores/dbstore/uploads.go:DeleteUploadsWithoutRepository
WITH deleted_repos AS (
SELECT r.id AS id FROM repo r
WHERE
%s - r.deleted_at >= %s * interval '1 second' AND
EXISTS (SELECT 1 from lsif_uploads u WHERE u.repository_id = r.id)
WITH
candidates AS (
SELECT u.id
FROM repo r
JOIN lsif_uploads u ON u.repository_id = r.id
WHERE %s - r.deleted_at >= %s * interval '1 second'
-- Lock these rows in a deterministic order so that we don't
-- deadlock with other processes updating the lsif_uploads table.
ORDER BY u.id FOR UPDATE
),
deleted_uploads AS (
deleted AS (
UPDATE lsif_uploads u
SET state = 'deleted'
WHERE u.repository_id IN (SELECT id FROM deleted_repos)
WHERE u.id IN (SELECT id FROM candidates)
RETURNING u.id, u.repository_id
)
SELECT d.repository_id, COUNT(*) FROM deleted_uploads d GROUP BY d.repository_id
SELECT d.repository_id, COUNT(*) FROM deleted d GROUP BY d.repository_id
`

// HardDeleteUploadByID deletes the upload record with the given identifier.
Expand All @@ -631,6 +647,11 @@ func (s *Store) HardDeleteUploadByID(ctx context.Context, ids ...int) (err error
return nil
}

// Ensure ids are sorted so that we take row locks during the
// DELETE query in a determinstic order. This should prevent
// deadlocks with other queries that mass update lsif_uploads.
sort.Ints(ids)

var idQueries []*sqlf.Query
for _, id := range ids {
idQueries = append(idQueries, sqlf.Sprintf("%s", id))
Expand Down Expand Up @@ -710,19 +731,20 @@ protected_uploads AS (
candidates AS (
-- Find the inverse of protected_uploads, which contains each upload record
-- that is older than the configured retention age and is not reachable via
-- the dependencies of any upload in protected_uploads. We also order the
-- candidates here to try to acquire the locks in the following update in
-- a determinstic order so that we do not deadlock with another query updating
-- overlapping records.
(SELECT id FROM lsif_uploads EXCEPT SELECT id FROM protected_uploads) ORDER BY id
-- the dependencies of any upload in protected_uploads.
SELECT u.id
FROM lsif_uploads u
WHERE u.id NOT IN (SELECT id FROM protected_uploads)
-- Lock these rows in a deterministic order so that we don't
-- deadlock with other processes updating the lsif_uploads table.
ORDER BY u.id FOR UPDATE
),
updated AS (
UPDATE lsif_uploads u
SET state = 'deleted'
WHERE
u.id IN (SELECT id FROM candidates)
RETURNING id, repository_id
SET state = 'deleted'
WHERE u.id IN (SELECT id FROM candidates)
RETURNING u.id, u.repository_id
)
SELECT u.repository_id, count(*) FROM updated u GROUP BY u.repository_id
`
Expand Down
3 changes: 2 additions & 1 deletion enterprise/internal/codeintel/stores/dbstore/xrepo.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ func (s *Store) ReferenceIDsAndFilters(ctx context.Context, repositoryID int, co

const referenceIDsAndFiltersCTEDefinitions = `
-- source: enterprise/internal/codeintel/stores/dbstore/xrepo.go:ReferenceIDsAndFilters
WITH visible_uploads AS (
WITH
visible_uploads AS (
(%s)
UNION
(SELECT uvt.upload_id FROM lsif_uploads_visible_at_tip uvt WHERE uvt.repository_id != %s AND uvt.is_default_branch)
Expand Down
6 changes: 6 additions & 0 deletions enterprise/internal/codeintel/stores/lsifstore/clear.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package lsifstore

import (
"context"
"sort"
"strconv"
"strings"

Expand Down Expand Up @@ -33,6 +34,11 @@ func (s *Store) Clear(ctx context.Context, bundleIDs ...int) (err error) {
return nil
}

// Ensure ids are sorted so that we take row locks during the
// DELETE query in a determinstic order. This should prevent
// deadlocks with other queries that mass update the same table.
sort.Ints(bundleIDs)

var ids []*sqlf.Query
for _, bundleID := range bundleIDs {
ids = append(ids, sqlf.Sprintf("%d", bundleID))
Expand Down

0 comments on commit f1fef7f

Please sign in to comment.