Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add document hard deletion functionality to housekeeping tasks. #718

Draft
wants to merge 30 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
5f32970
Add document hard deletion functionality to housekeeping tasks.
fourjae Dec 10, 2023
d4b61a6
Modified function for document removal, added memdb-based function, r…
fourjae Jan 6, 2024
5429670
Add document hard deletion functionality to housekeeping tasks.
fourjae Dec 10, 2023
c310a64
Modified function for document removal, added memdb-based function, r…
fourjae Jan 6, 2024
30c95b2
Added HardDeletion testCode for MongoDB, Changed function return type…
fourjae Jan 28, 2024
6eadd87
Added Hard Deletion test Code for MemoryDB and MemoryDB Based Functio…
fourjae Jan 28, 2024
89ad3f7
Conflict Resolution
fourjae Jan 28, 2024
5367c34
update default Hard Deletion Limit Per Project Value 100 -> 500
fourjae Jan 28, 2024
8053340
second Conflict Resolution
fourjae Jan 29, 2024
7daf5de
Changed and Deleted all functions and test code to match the changed …
fourjae Jan 29, 2024
764babd
Change housekeeping Structure for intervals, Changed and Deleted all …
fourjae Jan 29, 2024
0066e8b
Fix missing housekeeping config for TestMain
fourjae Jan 29, 2024
696031a
Fix missing housekeeping config for Test Config
fourjae Jan 29, 2024
e26ba6b
Fix missing housekeeping config for Test Config
fourjae Jan 29, 2024
b158ee9
Revert 'Change housekeeping Structure for intervals'
fourjae Jan 30, 2024
1a4d2c3
Added DeleteAfterTime value to housekeeping config setting and modifi…
fourjae Jan 30, 2024
78a83f0
Addition of missing housekeeping config
fourjae Jan 31, 2024
ad81cfe
All comments have been reflected, some function changes and comment t…
fourjae Feb 12, 2024
d45f2ab
Change the FindDocumentHardDeletionCandidatesPerProject condition fro…
fourjae Feb 12, 2024
3bb69e0
merge solve
fourjae Jul 27, 2024
96f70c4
Changed document hard deletion code to improve housekeeping structure
fourjae Jul 29, 2024
6162e71
merge conflict resolved
fourjae Jul 30, 2024
c171085
DocumentHardDeletionGracefulPeriod type and value change
fourjae Jul 30, 2024
13369fc
coderabbitai comments reflected,
fourjae Aug 1, 2024
6cc0e19
Resolving race conditions and separating gocron task code
fourjae Aug 2, 2024
7d636a3
testConfig ProjectFetchSize change
fourjae Aug 3, 2024
ce7abac
Merge branch 'main' into DB-hard-delete
fourjae Aug 3, 2024
9bef472
Merge branch 'main' into DB-hard-delete
fourjae Aug 27, 2024
16d8749
Comment reflection, large-scale editing
fourjae Sep 4, 2024
86a3f5d
Merge remote-tracking branch 'upstream/main' into DB-hard-delete
hackerwins Sep 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions cmd/yorkie/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ var (

adminTokenDuration time.Duration
housekeepingInterval time.Duration
deleteAfterTime time.Duration
krapie marked this conversation as resolved.
Show resolved Hide resolved
clientDeactivateThreshold string

mongoConnectionURI string
Expand Down Expand Up @@ -70,6 +71,7 @@ func newServerCmd() *cobra.Command {
conf.Backend.ProjectInfoCacheTTL = projectInfoCacheTTL.String()

conf.Housekeeping.Interval = housekeepingInterval.String()
conf.Housekeeping.DeleteAfterTime = deleteAfterTime.String()

if mongoConnectionURI != "" {
conf.Mongo = &mongo.Config{
Expand Down Expand Up @@ -205,6 +207,18 @@ func init() {
server.DefaultHousekeepingCandidatesLimitPerProject,
"candidates limit per project for a single housekeeping run",
)
cmd.Flags().IntVar(
&conf.Housekeeping.DocumentHardDeletionLimitPerProject,
"housekeeping-DocumentHardDeletion-limit-per-project",
server.DefaultHousekeepingDocumentHardDeletionLimitPerProject,
krapie marked this conversation as resolved.
Show resolved Hide resolved
"DocumentHardDeletion limit per project for a single housekeeping run",
)
cmd.Flags().DurationVar(
&deleteAfterTime,
"housekeeping-DocumentHardDeletion-delete-after-time",
server.DefaultHousekeepingDeleteAfterTime,
"DocumentHardDeletion delete after time for a single housekeeping run",
krapie marked this conversation as resolved.
Show resolved Hide resolved
)
cmd.Flags().IntVar(
&conf.Housekeeping.ProjectFetchSize,
"housekeeping-project-fetch-size",
Expand Down
16 changes: 16 additions & 0 deletions server/backend/database/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package database
import (
"context"
"errors"
gotime "time"

"github.com/yorkie-team/yorkie/api/types"
"github.com/yorkie-team/yorkie/pkg/document"
Expand Down Expand Up @@ -157,6 +158,21 @@ type Database interface {
candidatesLimit int,
) ([]*ClientInfo, error)

// FindDocumentHardDeletionCandidates finds the housekeeping deletion candidates.
FindDocumentHardDeletionCandidates(
ctx context.Context,
candidatesLimitPerProject int,
projectFetchSize int,
deleteAfterTime gotime.Duration,
lastProjectID types.ID,
) (types.ID, []*DocInfo, error)

// DocumentHardDeletion Document complete deletion in progress
DocumentHardDeletion(
krapie marked this conversation as resolved.
Show resolved Hide resolved
ctx context.Context,
candidate []*DocInfo,
krapie marked this conversation as resolved.
Show resolved Hide resolved
) error

// FindDocInfoByKey finds the document of the given key.
FindDocInfoByKey(
ctx context.Context,
Expand Down
98 changes: 98 additions & 0 deletions server/backend/database/memory/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,44 @@ func (d *DB) UpdateClientInfoAfterPushPull(
return nil
}

// FindDocumentHardDeletionCandidatesPerProject finds the documents that need housekeeping per project.
func (d *DB) FindDocumentHardDeletionCandidatesPerProject(
_ context.Context,
project *database.ProjectInfo,
candidatesLimit int,
deleteAfterTime gotime.Duration,
) ([]*database.DocInfo, error) {

txn := d.db.Txn(false)
defer txn.Abort()

var documents []*database.DocInfo
iterator, err := txn.Get(
tblDocuments,
"project_id",
project.ID.String(),
)

if err != nil {
return nil, fmt.Errorf("fetch hard deletion candidates: %w", err)
}

currentTime := gotime.Now()
conditionDeleteAfterTime := currentTime.Add(deleteAfterTime)
for raw := iterator.Next(); raw != nil; raw = iterator.Next() {
document := raw.(*database.DocInfo)
if candidatesLimit <= len(documents) && candidatesLimit != 0 {
break
}

if !document.RemovedAt.After(conditionDeleteAfterTime) {
documents = append(documents, document)
}
}

return documents, nil
}

// FindDeactivateCandidatesPerProject finds the clients that need housekeeping per project.
func (d *DB) FindDeactivateCandidatesPerProject(
_ context.Context,
Expand Down Expand Up @@ -663,6 +701,66 @@ func (d *DB) FindDeactivateCandidatesPerProject(
return infos, nil
}

// FindDocumentHardDeletionCandidates finds the clients that need housekeeping.
func (d *DB) FindDocumentHardDeletionCandidates(
ctx context.Context,
candidatesLimitPerProject int,
projectFetchSize int,
deletedAfterTime gotime.Duration,
lastProjectID types.ID,
) (types.ID, []*database.DocInfo, error) {
projects, err := d.FindNextNCyclingProjectInfos(ctx, projectFetchSize, lastProjectID)
if err != nil {
return database.DefaultProjectID, nil, err
}

var candidates []*database.DocInfo
for _, project := range projects {
infos, err := d.FindDocumentHardDeletionCandidatesPerProject(
ctx,
project,
candidatesLimitPerProject,
deletedAfterTime,
)
if err != nil {
return database.DefaultProjectID, nil, err
}

candidates = append(candidates, infos...)
}

var topProjectID types.ID
if len(projects) < projectFetchSize {
topProjectID = database.DefaultProjectID
} else {
topProjectID = projects[len(projects)-1].ID
}

return topProjectID, candidates, nil
}

// DocumentHardDeletion Deletes the documents completely.
func (d *DB) DocumentHardDeletion(
_ context.Context,
candidates []*database.DocInfo,
) error {
if len(candidates) <= 0 {
return nil
}

txn := d.db.Txn(true)
defer txn.Abort()

for _, candidate := range candidates {
if err := txn.Delete(tblDocuments, candidate); err != nil {
return fmt.Errorf("fetch hard deletion candidates: %w", err)
}
}
txn.Commit()

return nil
}

// FindDocInfoByKeyAndOwner finds the document of the given key. If the
// createDocIfNotExist condition is true, create the document if it does not
// exist.
Expand Down
4 changes: 4 additions & 0 deletions server/backend/database/memory/database_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,8 @@ func TestDB(t *testing.T) {
t.Run("IsDocumentAttached test", func(t *testing.T) {
testcases.RunIsDocumentAttachedTest(t, db, projectID)
})

t.Run("DocumentHardDeletion test", func(t *testing.T) {
testcases.RunDocumentHardDeletion(t, db)
})
}
5 changes: 5 additions & 0 deletions server/backend/database/memory/indexes.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ var schema = &memdb.DBSchema{
Unique: true,
Indexer: &memdb.StringFieldIndex{Field: "ID"},
},
"project_id": {
Name: "project_id",
Unique: true,
Indexer: &memdb.StringFieldIndex{Field: "ProjectID"},
},
"project_id_id": {
Name: "project_id_id",
Unique: true,
Expand Down
129 changes: 129 additions & 0 deletions server/backend/database/mongo/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,36 @@ func (c *Client) CreateProjectInfo(
return info, nil
}

// DocumentHardDeletion Deletes the documents completely.
func (c *Client) DocumentHardDeletion(
ctx context.Context,
candidates []*database.DocInfo,
) error {
if len(candidates) <= 0 {
return nil
}

var idList []primitive.ObjectID
for _, docInfo := range candidates {
encodedID, err := encodeID(docInfo.ID)
krapie marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return err
}
idList = append(idList, encodedID)
}

_, err := c.collection(ColDocuments).DeleteMany(
ctx,
bson.M{"_id": bson.M{"$in": idList}},
)

if err != nil {
return fmt.Errorf("deletion Error : %w", err)
}

return nil
}

// FindNextNCyclingProjectInfos finds the next N cycling projects from the given projectID.
func (c *Client) FindNextNCyclingProjectInfos(
ctx context.Context,
Expand Down Expand Up @@ -619,6 +649,38 @@ func (c *Client) UpdateClientInfoAfterPushPull(
return nil
}

// FindDocumentHardDeletionCandidatesPerProject finds the clients that need housekeeping per project.
func (c *Client) FindDocumentHardDeletionCandidatesPerProject(
ctx context.Context,
project *database.ProjectInfo,
candidatesLimit int,
deleteAfterTime gotime.Duration,
) ([]*database.DocInfo, error) {
encodedProjectID, err := encodeID(project.ID)
krapie marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, err
}

currentTime := gotime.Now()
conditionDeleteAfterTime := currentTime.Add(deleteAfterTime)

var DocInfos []*database.DocInfo
cursor, err := c.collection(ColDocuments).Find(ctx, bson.M{
"project_id": encodedProjectID,
"removed_at": bson.M{"$lt": conditionDeleteAfterTime},
krapie marked this conversation as resolved.
Show resolved Hide resolved
}, options.Find().SetLimit(int64(candidatesLimit)))

if err != nil {
return nil, err
}

if err := cursor.All(ctx, &DocInfos); err != nil {
return nil, fmt.Errorf("fetch hard deletion candidates: %w", err)
}

return DocInfos, nil
}

// FindDeactivateCandidatesPerProject finds the clients that need housekeeping per project.
func (c *Client) FindDeactivateCandidatesPerProject(
ctx context.Context,
Expand Down Expand Up @@ -650,6 +712,73 @@ func (c *Client) FindDeactivateCandidatesPerProject(
return clientInfos, nil
}

// FindDeactivateCandidates finds the clients that need housekeeping.
func (c *Client) FindDeactivateCandidates(
fourjae marked this conversation as resolved.
Show resolved Hide resolved
ctx context.Context,
candidatesLimitPerProject int,
projectFetchSize int,
lastProjectID types.ID,
) (types.ID, []*database.ClientInfo, error) {
projects, err := c.FindNextNCyclingProjectInfos(ctx, projectFetchSize, lastProjectID)
if err != nil {
return database.DefaultProjectID, nil, err
fourjae marked this conversation as resolved.
Show resolved Hide resolved
}

var candidates []*database.ClientInfo
for _, project := range projects {
clientInfos, err := c.FindDeactivateCandidatesPerProject(ctx, project, candidatesLimitPerProject)
if err != nil {
return database.DefaultProjectID, nil, err
}

candidates = append(candidates, clientInfos...)
}

var topProjectID types.ID
if len(projects) < projectFetchSize {
topProjectID = database.DefaultProjectID
} else {
topProjectID = projects[len(projects)-1].ID
}
return topProjectID, candidates, nil
}

// FindDocumentHardDeletionCandidates finds the clients that need housekeeping.
func (c *Client) FindDocumentHardDeletionCandidates(
ctx context.Context,
candidatesLimitPerProject int,
projectFetchSize int,
deletedAfterTime gotime.Duration,
lastProjectID types.ID,
) (types.ID, []*database.DocInfo, error) {
projects, err := c.FindNextNCyclingProjectInfos(ctx, projectFetchSize, lastProjectID)
if err != nil {
return database.DefaultProjectID, nil, err
}

var candidates []*database.DocInfo
for _, project := range projects {
docInfos, err := c.FindDocumentHardDeletionCandidatesPerProject(
ctx,
project,
candidatesLimitPerProject,
deletedAfterTime,
)
if err != nil {
return database.DefaultProjectID, nil, err
}
candidates = append(candidates, docInfos...)
}

var topProjectID types.ID
if len(projects) < projectFetchSize {
topProjectID = database.DefaultProjectID
} else {
topProjectID = projects[len(projects)-1].ID
}
return topProjectID, candidates, nil
}

// FindDocInfoByKeyAndOwner finds the document of the given key. If the
// createDocIfNotExist condition is true, create the document if it does not
// exist.
Expand Down
4 changes: 4 additions & 0 deletions server/backend/database/mongo/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,8 @@ func TestClient(t *testing.T) {
t.Run("IsDocumentAttached test", func(t *testing.T) {
testcases.RunIsDocumentAttachedTest(t, cli, dummyProjectID)
})

t.Run("DocumentHardDeletion test", func(t *testing.T) {
testcases.RunDocumentHardDeletion(t, cli)
})
}
Loading
Loading