Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add document hard deletion functionality to housekeeping tasks. #718

Open
wants to merge 30 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
5f32970
Add document hard deletion functionality to housekeeping tasks.
fourjae Dec 10, 2023
d4b61a6
Modified function for document removal, added memdb-based function, r…
fourjae Jan 6, 2024
5429670
Add document hard deletion functionality to housekeeping tasks.
fourjae Dec 10, 2023
c310a64
Modified function for document removal, added memdb-based function, r…
fourjae Jan 6, 2024
30c95b2
Added HardDeletion testCode for MongoDB, Changed function return type…
fourjae Jan 28, 2024
6eadd87
Added Hard Deletion test Code for MemoryDB and MemoryDB Based Functio…
fourjae Jan 28, 2024
89ad3f7
Conflict Resolution
fourjae Jan 28, 2024
5367c34
update default Hard Deletion Limit Per Project Value 100 -> 500
fourjae Jan 28, 2024
8053340
second Conflict Resolution
fourjae Jan 29, 2024
7daf5de
Changed and Deleted all functions and test code to match the changed …
fourjae Jan 29, 2024
764babd
Change housekeeping Structure for intervals, Changed and Deleted all …
fourjae Jan 29, 2024
0066e8b
Fix missing housekeeping config for TestMain
fourjae Jan 29, 2024
696031a
Fix missing housekeeping config for Test Config
fourjae Jan 29, 2024
e26ba6b
Fix missing housekeeping config for Test Config
fourjae Jan 29, 2024
b158ee9
Revert 'Change housekeeping Structure for intervals'
fourjae Jan 30, 2024
1a4d2c3
Added DeleteAfterTime value to housekeeping config setting and modifi…
fourjae Jan 30, 2024
78a83f0
Addition of missing housekeeping config
fourjae Jan 31, 2024
ad81cfe
All comments have been reflected, some function changes and comment t…
fourjae Feb 12, 2024
d45f2ab
Change the FindDocumentHardDeletionCandidatesPerProject condition fro…
fourjae Feb 12, 2024
3bb69e0
merge solve
fourjae Jul 27, 2024
96f70c4
Changed document hard deletion code to improve housekeeping structure
fourjae Jul 29, 2024
6162e71
merge conflict resolved
fourjae Jul 30, 2024
c171085
DocumentHardDeletionGracefulPeriod type and value change
fourjae Jul 30, 2024
13369fc
coderabbitai comments reflected,
fourjae Aug 1, 2024
6cc0e19
Resolving race conditions and separating gocron task code
fourjae Aug 2, 2024
7d636a3
testConfig ProjectFetchSize change
fourjae Aug 3, 2024
ce7abac
Merge branch 'main' into DB-hard-delete
fourjae Aug 3, 2024
9bef472
Merge branch 'main' into DB-hard-delete
fourjae Aug 27, 2024
16d8749
Comment reflection, large-scale editing
fourjae Sep 4, 2024
86a3f5d
Merge remote-tracking branch 'upstream/main' into DB-hard-delete
hackerwins Sep 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 32 additions & 10 deletions cmd/yorkie/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,11 @@ var (
flagConfPath string
flagLogLevel string

adminTokenDuration time.Duration
housekeepingInterval time.Duration
clientDeactivateThreshold string
adminTokenDuration time.Duration
housekeepingDeactivateCandidatesInterval time.Duration
housekeepingDeleteDocumentsInterval time.Duration
documentHardDeletionGracefulPeriod time.Duration
clientDeactivateThreshold string

mongoConnectionURI string
mongoConnectionTimeout time.Duration
Expand Down Expand Up @@ -69,7 +71,9 @@ func newServerCmd() *cobra.Command {
conf.Backend.AuthWebhookCacheUnauthTTL = authWebhookCacheUnauthTTL.String()
conf.Backend.ProjectInfoCacheTTL = projectInfoCacheTTL.String()

conf.Housekeeping.Interval = housekeepingInterval.String()
conf.Housekeeping.DeactivateCandidatesInterval = housekeepingDeactivateCandidatesInterval.String()
conf.Housekeeping.DeleteDocumentsInterval = housekeepingDeleteDocumentsInterval.String()
conf.Housekeeping.DocumentHardDeletionGracefulPeriod = documentHardDeletionGracefulPeriod

if mongoConnectionURI != "" {
conf.Mongo = &mongo.Config{
Expand Down Expand Up @@ -193,18 +197,36 @@ func init() {
false,
"Enable runtime profiling data via HTTP server.",
)
cmd.Flags().StringVar(
&conf.Housekeeping.DeactivateCandidatesInterval,
"housekeeping-interval-Deactivate-Candidates",
server.DefaultHousekeepingDeactivateCandidatesInterval.String(),
"housekeeping Interval deactivate candidates between housekeeping runs",
)
cmd.Flags().StringVar(
&conf.Housekeeping.DeleteDocumentsInterval,
"housekeeping-interval-Delete-Documents",
server.DefaultHousekeepingDeleteDocumentsInterval.String(),
"housekeeping Interval delete documents between housekeeping runs",
)
cmd.Flags().DurationVar(
&housekeepingInterval,
"housekeeping-interval",
server.DefaultHousekeepingInterval,
"housekeeping interval between housekeeping runs",
&conf.Housekeeping.DocumentHardDeletionGracefulPeriod,
"housekeeping-DocumentHardDeletion-delete-graceful-period",
server.DefaultHousekeepingDocumentHardDeletionGracefulPeriod,
"Document deletion over time after a single housekeeping run",
)
cmd.Flags().IntVar(
&conf.Housekeeping.CandidatesLimitPerProject,
&conf.Housekeeping.ClientDeactivationCandidateLimitPerProject,
"housekeeping-candidates-limit-per-project",
server.DefaultHousekeepingCandidatesLimitPerProject,
server.DefaultHousekeepingClientDeactivationCandidateLimitPerProject,
"candidates limit per project for a single housekeeping run",
)
cmd.Flags().IntVar(
&conf.Housekeeping.DocumentHardDeletionCandidateLimitPerProject,
"housekeeping-DocumentHardDeletion-limit-per-project",
server.DefaultHousekeepingDocumentHardDeletionCandidateLimitPerProject,
"Document Deletion limit per project for a single housekeeping run",
)
cmd.Flags().IntVar(
&conf.Housekeeping.ProjectFetchSize,
"housekeeping-project-fetch-size",
Expand Down
15 changes: 15 additions & 0 deletions server/backend/database/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package database
import (
"context"
"errors"
gotime "time"

"github.com/yorkie-team/yorkie/api/types"
"github.com/yorkie-team/yorkie/pkg/document"
Expand Down Expand Up @@ -163,6 +164,20 @@ type Database interface {
candidatesLimit int,
) ([]*ClientInfo, error)

// FindDocumentHardDeletionCandidatesPerProject finds the documents that need to be deleted housekeeping per project.
FindDocumentHardDeletionCandidatesPerProject(
ctx context.Context,
project *ProjectInfo,
candidatesLimit int,
documentHardDeletionGracefulPeriod gotime.Duration,
) ([]*DocInfo, error)

// DeleteDocuments deletes document of the given key
DeleteDocuments(
ctx context.Context,
candidates []*DocInfo,
) (int64, error)

// FindDocInfoByKey finds the document of the given key.
FindDocInfoByKey(
ctx context.Context,
Expand Down
62 changes: 62 additions & 0 deletions server/backend/database/memory/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,44 @@ func (d *DB) UpdateClientInfoAfterPushPull(
return nil
}

// FindDocumentHardDeletionCandidatesPerProject finds the documents that need housekeeping per project.
func (d *DB) FindDocumentHardDeletionCandidatesPerProject(
_ context.Context,
project *database.ProjectInfo,
candidatesLimit int,
documentHardDeletionGracefulPeriod gotime.Duration,
) ([]*database.DocInfo, error) {
txn := d.db.Txn(false)
defer txn.Abort()

offset := gotime.Now().Add(-documentHardDeletionGracefulPeriod)

var documents []*database.DocInfo
iterator, err := txn.ReverseLowerBound(
tblDocuments,
"project_id_removed_at",
project.ID.String(),
offset,
)

if err != nil {
return nil, fmt.Errorf("fetch hard deletion candidates: %w", err)
}

for raw := iterator.Next(); raw != nil; raw = iterator.Next() {
document := raw.(*database.DocInfo)
if candidatesLimit <= len(documents) && candidatesLimit != 0 {
break
}

if !document.RemovedAt.After(offset) {
documents = append(documents, document)
}
}

return documents, nil
}

// FindDeactivateCandidatesPerProject finds the clients that need housekeeping per project.
func (d *DB) FindDeactivateCandidatesPerProject(
_ context.Context,
Expand Down Expand Up @@ -717,6 +755,30 @@ func (d *DB) FindDeactivateCandidatesPerProject(
return infos, nil
}

// DeleteDocuments Deletes the documents completely.
func (d *DB) DeleteDocuments(
_ context.Context,
candidates []*database.DocInfo,
) (int64, error) {
if len(candidates) <= 0 {
return 0, nil
}

txn := d.db.Txn(true)
defer txn.Abort()

var deletedCount int64
for _, candidate := range candidates {
if err := txn.Delete(tblDocuments, candidate); err != nil {
return 0, fmt.Errorf("fetch hard deletion candidates: %w", err)
}
deletedCount++
}
txn.Commit()

return deletedCount, nil
}

// FindDocInfoByKeyAndOwner finds the document of the given key. If the
// createDocIfNotExist condition is true, create the document if it does not
// exist.
Expand Down
4 changes: 4 additions & 0 deletions server/backend/database/memory/database_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,8 @@ func TestDB(t *testing.T) {
t.Run("IsDocumentAttached test", func(t *testing.T) {
testcases.RunIsDocumentAttachedTest(t, db, projectID)
})

t.Run("DocumentHardDeletion test", func(t *testing.T) {
testcases.RunDocumentHardDeletionTest(t, db)
})
}
9 changes: 9 additions & 0 deletions server/backend/database/memory/indexes.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,15 @@ var schema = &memdb.DBSchema{
},
},
},
"project_id_removed_at": {
Name: "project_id_removed_at",
Indexer: &memdb.CompoundIndex{
Indexes: []memdb.Indexer{
&memdb.StringFieldIndex{Field: "ProjectID"},
&memdb.TimeFieldIndex{Field: "RemovedAt"},
},
},
},
"project_id_key_removed_at": {
Name: "project_id_key_removed_at",
Indexer: &memdb.CompoundIndex{
Expand Down
63 changes: 63 additions & 0 deletions server/backend/database/mongo/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,41 @@ func (c *Client) CreateProjectInfo(
return info, nil
}

// DeleteDocuments Deletes the documents completely.
func (c *Client) DeleteDocuments(
ctx context.Context,
candidates []*database.DocInfo,
) (int64, error) {
if len(candidates) <= 0 {
return 0, nil
}

for _, docInfo := range candidates {
if docInfo.ID == "" {
return 0, fmt.Errorf("invalid document ID")
}
}

var idList []types.ID
for _, docInfo := range candidates {
idList = append(
idList,
docInfo.ID,
)
}

deletedResult, err := c.collection(ColDocuments).DeleteMany(
ctx,
bson.M{"_id": bson.M{"$in": idList}},
)

if err != nil {
return deletedResult.DeletedCount, fmt.Errorf("failed to delete documents: %w", err)
}

return deletedResult.DeletedCount, nil
}

// FindNextNCyclingProjectInfos finds the next N cycling projects from the given projectID.
func (c *Client) FindNextNCyclingProjectInfos(
ctx context.Context,
Expand Down Expand Up @@ -672,6 +707,34 @@ func (c *Client) UpdateClientInfoAfterPushPull(
return nil
}

// FindDocumentHardDeletionCandidatesPerProject finds the documents that need housekeeping per project.
func (c *Client) FindDocumentHardDeletionCandidatesPerProject(
ctx context.Context,
project *database.ProjectInfo,
candidatesLimit int,
documentHardDeletionGracefulPeriod gotime.Duration,
) ([]*database.DocInfo, error) {

currentTime := gotime.Now()
hardDeletionGracefulPeriod := currentTime.Add(-documentHardDeletionGracefulPeriod)

var DocInfos []*database.DocInfo
cursor, err := c.collection(ColDocuments).Find(ctx, bson.M{
"project_id": project.ID,
"removed_at": bson.M{"$lte": hardDeletionGracefulPeriod},
}, options.Find().SetLimit(int64(candidatesLimit)))

if err != nil {
return nil, err
}

if err := cursor.All(ctx, &DocInfos); err != nil {
return nil, fmt.Errorf("fetch hard deletion candidates: %w", err)
}

return DocInfos, nil
}

// FindDeactivateCandidatesPerProject finds the clients that need housekeeping per project.
func (c *Client) FindDeactivateCandidatesPerProject(
ctx context.Context,
Expand Down
4 changes: 4 additions & 0 deletions server/backend/database/mongo/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,8 @@ func TestClient(t *testing.T) {
t.Run("IsDocumentAttached test", func(t *testing.T) {
testcases.RunIsDocumentAttachedTest(t, cli, dummyProjectID)
})

t.Run("DocumentHardDeletion test", func(t *testing.T) {
testcases.RunDocumentHardDeletionTest(t, cli)
})
}
56 changes: 56 additions & 0 deletions server/backend/database/testcases/testcases.go
Original file line number Diff line number Diff line change
Expand Up @@ -1410,3 +1410,59 @@ func AssertKeys(t *testing.T, expectedKeys []key.Key, infos []*database.DocInfo)
}
assert.EqualValues(t, expectedKeys, keys)
}

// RunDocumentHardDeletionTest runs the DocumentHardDeletion tests for the given db
func RunDocumentHardDeletionTest(t *testing.T, db database.Database) {
t.Run("housekeeping DocumentHardDeletion test", func(t *testing.T) {
ctx := context.Background()
docKey := helper.TestDocKey(t)

// 00. Create a project
projectInfo, err := db.CreateProjectInfo(ctx, t.Name(), dummyOwnerID, clientDeactivateThreshold)
assert.NoError(t, err)

// 01. Create a client and a document then attach the document to the client.
clientInfo, err := db.ActivateClient(ctx, projectInfo.ID, t.Name())
assert.NoError(t, err)
docInfo, err := db.FindDocInfoByKeyAndOwner(ctx, clientInfo.RefKey(), docKey, true)
assert.NoError(t, err)
docRefKey := docInfo.RefKey()
assert.NoError(t, clientInfo.AttachDocument(docInfo.ID, false))
assert.NoError(t, db.UpdateClientInfoAfterPushPull(ctx, clientInfo, docInfo))

doc := document.New(key.Key(t.Name()))
pack := doc.CreateChangePack()

// 02. Set removed_at in docInfo and store changes
assert.NoError(t, clientInfo.RemoveDocument(docInfo.ID))
err = db.CreateChangeInfos(ctx, projectInfo.ID, docInfo, 0, pack.Changes, true)
assert.NoError(t, err)

// 03. Set the grace period to 0 seconds.
var candidates []*database.DocInfo
GracePeriod := "-1s"
documentHardDeletionGracefulPeriod, err := gotime.ParseDuration(GracePeriod)
assert.NoError(t, err)

// 04. Find documents whose deleted_at time is less than or equal to current time minus GracePeriod.
fetchSize := 100
candidates, err = db.FindDocumentHardDeletionCandidatesPerProject(
ctx,
projectInfo,
fetchSize,
documentHardDeletionGracefulPeriod,
)
assert.NoError(t, err)

// 05. Deletes document of the given key
// Compare the number of candidates for deletion with the number of deleted documents.
deletedDocumentsCount, err := db.DeleteDocuments(ctx, candidates)
assert.NoError(t, err)
assert.Equal(t, int(deletedDocumentsCount), len(candidates))

_, err = db.FindDocInfoByRefKey(ctx, docRefKey)
assert.ErrorIs(t, err, database.ErrDocumentNotFound)

})

}
Loading
Loading