Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add document hard deletion functionality to housekeeping tasks. #718

Draft
wants to merge 30 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
5f32970
Add document hard deletion functionality to housekeeping tasks.
fourjae Dec 10, 2023
d4b61a6
Modified function for document removal, added memdb-based function, r…
fourjae Jan 6, 2024
5429670
Add document hard deletion functionality to housekeeping tasks.
fourjae Dec 10, 2023
c310a64
Modified function for document removal, added memdb-based function, r…
fourjae Jan 6, 2024
30c95b2
Added HardDeletion testCode for MongoDB, Changed function return type…
fourjae Jan 28, 2024
6eadd87
Added Hard Deletion test Code for MemoryDB and MemoryDB Based Functio…
fourjae Jan 28, 2024
89ad3f7
Conflict Resolution
fourjae Jan 28, 2024
5367c34
update default Hard Deletion Limit Per Project Value 100 -> 500
fourjae Jan 28, 2024
8053340
second Conflict Resolution
fourjae Jan 29, 2024
7daf5de
Changed and Deleted all functions and test code to match the changed …
fourjae Jan 29, 2024
764babd
Change housekeeping Structure for intervals, Changed and Deleted all …
fourjae Jan 29, 2024
0066e8b
Fix missing housekeeping config for TestMain
fourjae Jan 29, 2024
696031a
Fix missing housekeeping config for Test Config
fourjae Jan 29, 2024
e26ba6b
Fix missing housekeeping config for Test Config
fourjae Jan 29, 2024
b158ee9
Revert 'Change housekeeping Structure for intervals'
fourjae Jan 30, 2024
1a4d2c3
Added DeleteAfterTime value to housekeeping config setting and modifi…
fourjae Jan 30, 2024
78a83f0
Addition of missing housekeeping config
fourjae Jan 31, 2024
ad81cfe
All comments have been reflected, some function changes and comment t…
fourjae Feb 12, 2024
d45f2ab
Change the FindDocumentHardDeletionCandidatesPerProject condition fro…
fourjae Feb 12, 2024
3bb69e0
merge solve
fourjae Jul 27, 2024
96f70c4
Changed document hard deletion code to improve housekeeping structure
fourjae Jul 29, 2024
6162e71
merge conflict resolved
fourjae Jul 30, 2024
c171085
DocumentHardDeletionGracefulPeriod type and value change
fourjae Jul 30, 2024
13369fc
coderabbitai comments reflected,
fourjae Aug 1, 2024
6cc0e19
Resolving race conditions and separating gocron task code
fourjae Aug 2, 2024
7d636a3
testConfig ProjectFetchSize change
fourjae Aug 3, 2024
ce7abac
Merge branch 'main' into DB-hard-delete
fourjae Aug 3, 2024
9bef472
Merge branch 'main' into DB-hard-delete
fourjae Aug 27, 2024
16d8749
Comment reflection, large-scale editing
fourjae Sep 4, 2024
86a3f5d
Merge remote-tracking branch 'upstream/main' into DB-hard-delete
hackerwins Sep 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 32 additions & 10 deletions cmd/yorkie/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,11 @@ var (
flagConfPath string
flagLogLevel string

adminTokenDuration time.Duration
housekeepingInterval time.Duration
clientDeactivateThreshold string
adminTokenDuration time.Duration
housekeepingIntervalDeactivateCandidates time.Duration
housekeepingIntervalDeleteDocuments time.Duration
documentHardDeletionGracefulPeriod time.Duration
clientDeactivateThreshold string

mongoConnectionURI string
mongoConnectionTimeout time.Duration
Expand Down Expand Up @@ -69,7 +71,9 @@ func newServerCmd() *cobra.Command {
conf.Backend.AuthWebhookCacheUnauthTTL = authWebhookCacheUnauthTTL.String()
conf.Backend.ProjectInfoCacheTTL = projectInfoCacheTTL.String()

conf.Housekeeping.Interval = housekeepingInterval.String()
conf.Housekeeping.IntervalDeactivateCandidates = housekeepingIntervalDeactivateCandidates.String()
conf.Housekeeping.IntervalDeleteDocuments = housekeepingIntervalDeleteDocuments.String()
conf.Housekeeping.DocumentHardDeletionGracefulPeriod = documentHardDeletionGracefulPeriod

if mongoConnectionURI != "" {
conf.Mongo = &mongo.Config{
Expand Down Expand Up @@ -193,18 +197,36 @@ func init() {
false,
"Enable runtime profiling data via HTTP server.",
)
cmd.Flags().StringVar(
&conf.Housekeeping.IntervalDeactivateCandidates,
"housekeeping-interval-Deactivate-Candidates",
server.DefaultHousekeepingIntervalDeactivateCandidates.String(),
"housekeeping Interval deactivate candidates between housekeeping runs",
)
cmd.Flags().StringVar(
&conf.Housekeeping.IntervalDeleteDocuments,
"housekeeping-interval-Delete-Documents",
server.DefaultHousekeepingIntervalDeleteDocuments.String(),
"housekeeping Interval delete documents between housekeeping runs",
)
cmd.Flags().DurationVar(
&housekeepingInterval,
"housekeeping-interval",
server.DefaultHousekeepingInterval,
"housekeeping interval between housekeeping runs",
&conf.Housekeeping.DocumentHardDeletionGracefulPeriod,
"housekeeping-DocumentHardDeletion-delete-graceful-period",
server.DefaultHousekeepingDocumentHardDeletionGracefulPeriod,
"Document deletion over time after a single housekeeping run",
)
cmd.Flags().IntVar(
&conf.Housekeeping.CandidatesLimitPerProject,
&conf.Housekeeping.ClientDeactivationCandidateLimitPerProject,
"housekeeping-candidates-limit-per-project",
server.DefaultHousekeepingCandidatesLimitPerProject,
server.DefaultHousekeepingClientDeactivationCandidateLimitPerProject,
"candidates limit per project for a single housekeeping run",
)
cmd.Flags().IntVar(
&conf.Housekeeping.DocumentHardDeletionCandidateLimitPerProject,
"housekeeping-DocumentHardDeletion-limit-per-project",
server.DefaultHousekeepingDocumentHardDeletionCandidateLimitPerProject,
"Document Deletion limit per project for a single housekeeping run",
)
cmd.Flags().IntVar(
&conf.Housekeeping.ProjectFetchSize,
"housekeeping-project-fetch-size",
Expand Down
15 changes: 15 additions & 0 deletions server/backend/database/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package database
import (
"context"
"errors"
gotime "time"

"github.com/yorkie-team/yorkie/api/types"
"github.com/yorkie-team/yorkie/pkg/document"
Expand Down Expand Up @@ -163,6 +164,20 @@ type Database interface {
candidatesLimit int,
) ([]*ClientInfo, error)

// FindDocumentHardDeletionCandidatesPerProject finds the documents that need to be deleted housekeeping per project.
FindDocumentHardDeletionCandidatesPerProject(
ctx context.Context,
project *ProjectInfo,
candidatesLimit int,
documentHardDeletionGracefulPeriod gotime.Duration,
) ([]*DocInfo, error)

// DeleteDocument Document complete deletion in progress
fourjae marked this conversation as resolved.
Show resolved Hide resolved
DeleteDocument(
fourjae marked this conversation as resolved.
Show resolved Hide resolved
ctx context.Context,
candidates []*DocInfo,
) (int64, error)

// FindDocInfoByKey finds the document of the given key.
FindDocInfoByKey(
ctx context.Context,
Expand Down
62 changes: 62 additions & 0 deletions server/backend/database/memory/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,44 @@ func (d *DB) UpdateClientInfoAfterPushPull(
return nil
}

// FindDocumentHardDeletionCandidatesPerProject finds the documents that need housekeeping per project.
func (d *DB) FindDocumentHardDeletionCandidatesPerProject(
_ context.Context,
project *database.ProjectInfo,
candidatesLimit int,
documentHardDeletionGracefulPeriod gotime.Duration,
) ([]*database.DocInfo, error) {

txn := d.db.Txn(false)
defer txn.Abort()

var documents []*database.DocInfo
iterator, err := txn.Get(
tblDocuments,
"project_id",
project.ID.String(),
)

if err != nil {
return nil, fmt.Errorf("fetch hard deletion candidates: %w", err)
}

currentTime := gotime.Now()
conditionDocumentHardDeletionGracefulPeriod := currentTime.Add(-documentHardDeletionGracefulPeriod)
fourjae marked this conversation as resolved.
Show resolved Hide resolved
for raw := iterator.Next(); raw != nil; raw = iterator.Next() {
document := raw.(*database.DocInfo)
if candidatesLimit <= len(documents) && candidatesLimit != 0 {
break
}

if !document.RemovedAt.After(conditionDocumentHardDeletionGracefulPeriod) {
documents = append(documents, document)
}
}

return documents, nil
}

// FindDeactivateCandidatesPerProject finds the clients that need housekeeping per project.
func (d *DB) FindDeactivateCandidatesPerProject(
_ context.Context,
Expand Down Expand Up @@ -717,6 +755,30 @@ func (d *DB) FindDeactivateCandidatesPerProject(
return infos, nil
}

// DeleteDocument Deletes the documents completely.
func (d *DB) DeleteDocument(
_ context.Context,
candidates []*database.DocInfo,
) (int64, error) {
if len(candidates) <= 0 {
return 0, nil
}

txn := d.db.Txn(true)
defer txn.Abort()

var deletedCount int64
for _, candidate := range candidates {
if err := txn.Delete(tblDocuments, candidate); err != nil {
return 0, fmt.Errorf("fetch hard deletion candidates: %w", err)
}
deletedCount++
}
txn.Commit()

return deletedCount, nil
}

// FindDocInfoByKeyAndOwner finds the document of the given key. If the
// createDocIfNotExist condition is true, create the document if it does not
// exist.
Expand Down
4 changes: 4 additions & 0 deletions server/backend/database/memory/database_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,8 @@ func TestDB(t *testing.T) {
t.Run("IsDocumentAttached test", func(t *testing.T) {
testcases.RunIsDocumentAttachedTest(t, db, projectID)
})

t.Run("DocumentHardDeletion test", func(t *testing.T) {
testcases.RunDocumentHardDeletionTest(t, db)
})
}
5 changes: 5 additions & 0 deletions server/backend/database/memory/indexes.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ var schema = &memdb.DBSchema{
Unique: true,
Indexer: &memdb.StringFieldIndex{Field: "ID"},
},
"project_id": {
Name: "project_id",
Unique: true,
Indexer: &memdb.StringFieldIndex{Field: "ProjectID"},
},
"project_id_id": {
Name: "project_id_id",
Unique: true,
Expand Down
94 changes: 94 additions & 0 deletions server/backend/database/mongo/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,41 @@ func (c *Client) CreateProjectInfo(
return info, nil
}

// DeleteDocument Deletes the documents completely.
func (c *Client) DeleteDocument(
ctx context.Context,
candidates []*database.DocInfo,
) (int64, error) {
if len(candidates) <= 0 {
return 0, nil
}

for _, docInfo := range candidates {
if docInfo.ID == "" {
return 0, fmt.Errorf("invalid document ID")
}
}

var idList []types.ID
for _, docInfo := range candidates {
idList = append(
idList,
docInfo.ID,
)
}

deletedResult, err := c.collection(ColDocuments).DeleteMany(
ctx,
bson.M{"_id": bson.M{"$in": idList}},
)

if err != nil {
return deletedResult.DeletedCount, fmt.Errorf("failed to delete documents: %w", err)
}

return deletedResult.DeletedCount, nil
}

// FindNextNCyclingProjectInfos finds the next N cycling projects from the given projectID.
func (c *Client) FindNextNCyclingProjectInfos(
ctx context.Context,
Expand Down Expand Up @@ -672,6 +707,34 @@ func (c *Client) UpdateClientInfoAfterPushPull(
return nil
}

// FindDocumentHardDeletionCandidatesPerProject finds the documents that need housekeeping per project.
func (c *Client) FindDocumentHardDeletionCandidatesPerProject(
ctx context.Context,
project *database.ProjectInfo,
candidatesLimit int,
documentHardDeletionGracefulPeriod gotime.Duration,
) ([]*database.DocInfo, error) {

currentTime := gotime.Now()
hardDeletionGracefulPeriod := currentTime.Add(-documentHardDeletionGracefulPeriod)

var DocInfos []*database.DocInfo
cursor, err := c.collection(ColDocuments).Find(ctx, bson.M{
"project_id": project.ID,
"removed_at": bson.M{"$lte": hardDeletionGracefulPeriod},
}, options.Find().SetLimit(int64(candidatesLimit)))

if err != nil {
return nil, err
}

if err := cursor.All(ctx, &DocInfos); err != nil {
return nil, fmt.Errorf("fetch hard deletion candidates: %w", err)
}

return DocInfos, nil
}

// FindDeactivateCandidatesPerProject finds the clients that need housekeeping per project.
func (c *Client) FindDeactivateCandidatesPerProject(
ctx context.Context,
Expand Down Expand Up @@ -703,6 +766,37 @@ func (c *Client) FindDeactivateCandidatesPerProject(
return clientInfos, nil
}

// FindDeactivateCandidates finds the clients that need housekeeping.
func (c *Client) FindDeactivateCandidates(
fourjae marked this conversation as resolved.
Show resolved Hide resolved
ctx context.Context,
documentHardDeletionCandidateLimitPerProject int,
projectFetchSize int,
lastProjectID types.ID,
) (types.ID, []*database.ClientInfo, error) {
projects, err := c.FindNextNCyclingProjectInfos(ctx, projectFetchSize, lastProjectID)
if err != nil {
return database.DefaultProjectID, nil, fmt.Errorf("failed to find cycling project infos: %w", err)
}

var candidates []*database.ClientInfo
for _, project := range projects {
clientInfos, err := c.FindDeactivateCandidatesPerProject(ctx, project, documentHardDeletionCandidateLimitPerProject)
if err != nil {
return database.DefaultProjectID, nil, err
}

candidates = append(candidates, clientInfos...)
}

var topProjectID types.ID
if len(projects) < projectFetchSize {
topProjectID = database.DefaultProjectID
} else {
topProjectID = projects[len(projects)-1].ID
}
return topProjectID, candidates, nil
}

// FindDocInfoByKeyAndOwner finds the document of the given key. If the
// createDocIfNotExist condition is true, create the document if it does not
// exist.
Expand Down
4 changes: 4 additions & 0 deletions server/backend/database/mongo/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,8 @@ func TestClient(t *testing.T) {
t.Run("IsDocumentAttached test", func(t *testing.T) {
testcases.RunIsDocumentAttachedTest(t, cli, dummyProjectID)
})

t.Run("DocumentHardDeletion test", func(t *testing.T) {
testcases.RunDocumentHardDeletionTest(t, cli)
})
}
51 changes: 51 additions & 0 deletions server/backend/database/testcases/testcases.go
Original file line number Diff line number Diff line change
Expand Up @@ -1410,3 +1410,54 @@ func AssertKeys(t *testing.T, expectedKeys []key.Key, infos []*database.DocInfo)
}
assert.EqualValues(t, expectedKeys, keys)
}

// RunDocumentHardDeletionTest runs Delete document permanently
fourjae marked this conversation as resolved.
Show resolved Hide resolved
func RunDocumentHardDeletionTest(t *testing.T, db database.Database) {
t.Run("housekeeping DocumentHardDeletion test", func(t *testing.T) {
ctx := context.Background()
projectInfo, err := db.CreateProjectInfo(ctx, t.Name(), dummyOwnerID, clientDeactivateThreshold)
assert.NoError(t, err)
projectID := projectInfo.ID

// Create a client and two documents
clientInfo, err := db.ActivateClient(ctx, projectID, t.Name())
assert.NoError(t, err)
docInfo, err := db.FindDocInfoByKeyAndOwner(ctx, clientInfo.RefKey(), helper.TestDocKey(t), true)
assert.NoError(t, err)
assert.NoError(t, clientInfo.AttachDocument(docInfo.ID, false))
assert.NoError(t, db.UpdateClientInfoAfterPushPull(ctx, clientInfo, docInfo))

assert.NoError(t, clientInfo.RemoveDocument(docInfo.ID))

doc := document.New(key.Key(t.Name()))
pack := doc.CreateChangePack()
err = db.CreateChangeInfos(ctx, projectID, docInfo, 0, pack.Changes, true)
assert.NoError(t, err)

fetchSize := 100
lastProjectID := database.DefaultProjectID

var candidates []*database.DocInfo
GracePeriod := "0s"
documentHardDeletionGracefulPeriod, err := gotime.ParseDuration(GracePeriod)
assert.NoError(t, err)

candidates, err = db.FindDocumentHardDeletionCandidatesPerProject(
ctx,
projectInfo,
fetchSize,
documentHardDeletionGracefulPeriod,
)
assert.NoError(t, err)
assert.Equal(t, database.DefaultProjectID, lastProjectID)

deletedDocumentsCount, err := db.DeleteDocument(ctx, candidates)
assert.NoError(t, err)
assert.Equal(t, int(deletedDocumentsCount), len(candidates))

_, err = db.FindDocInfoByRefKey(ctx, docInfo.RefKey())
assert.ErrorIs(t, err, database.ErrDocumentNotFound)

})

}
Loading
Loading