Skip to content

Commit

Permalink
🎨 Support for cleaning up unreferenced data snapshots siyuan-note/siy…
Browse files Browse the repository at this point in the history
  • Loading branch information
88250 committed Apr 20, 2023
1 parent 782a2db commit 43494aa
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 2 deletions.
15 changes: 14 additions & 1 deletion repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,20 @@ func NewRepo(dataPath, repoPath, historyPath, tempPath, deviceID string, aesKey

var ErrRepoFatalErr = errors.New("repo fatal error")

var lock = sync.Mutex{} // 仓库锁, Checkout、Index 和 Sync 等不能同时执行
var lock = sync.Mutex{} // 仓库锁,Checkout、Index 和 Sync 等不能同时执行

type PurgeStat struct {
Objects int
Indexes int
Size int64
}

// Purge 清理所有未引用数据。
func (repo *Repo) Purge() (ret *PurgeStat, err error) {
lock.Lock()
defer lock.Unlock()
return repo.store.Purge()
}

// GetIndex 从仓库根据 id 获取索引。
func (repo *Repo) GetIndex(id string) (index *entity.Index, err error) {
Expand Down
14 changes: 14 additions & 0 deletions repo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,20 @@ const (
testDataCheckoutPath = "testdata/data-checkout"
)

func TestPurge(t *testing.T) {
clearTestdata(t)
subscribeEvents(t)

repo, _ := initIndex(t)
stat, err := repo.Purge()
if nil != err {
t.Fatalf("purge failed: %s", err)
return
}

t.Logf("purge stat: %#v", stat)
}

func TestIndexCheckout(t *testing.T) {
clearTestdata(t)
subscribeEvents(t)
Expand Down
153 changes: 152 additions & 1 deletion store.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ package dejavu

import (
"errors"
"github.com/siyuan-note/logging"
"os"
"path/filepath"
"strings"

"github.com/88250/gulu"
"github.com/dgraph-io/ristretto"
Expand Down Expand Up @@ -55,6 +57,155 @@ func NewStore(path string, aesKey []byte) (ret *Store, err error) {
return
}

func (store *Store) Purge() (ret *PurgeStat, err error) {
objectsDir := filepath.Join(store.Path, "objects")
if !gulu.File.IsDir(objectsDir) {
return
}

entries, err := os.ReadDir(objectsDir)
if nil != err {
return
}

objIDs := map[string]bool{}
for _, entry := range entries {
if !entry.IsDir() {
continue
}

dirName := entry.Name()
dir := filepath.Join(objectsDir, dirName)
objs, readErr := os.ReadDir(dir)
if nil != readErr {
err = readErr
return
}

for _, obj := range objs {
id := dirName + obj.Name()
objIDs[id] = true
}
}

indexIDs := map[string]bool{}
indexesDir := filepath.Join(store.Path, "indexes")
if gulu.File.IsDir(indexesDir) {
entries, err = os.ReadDir(indexesDir)
if nil != err {
return
}

for _, entry := range entries {
id := entry.Name()
if 40 != len(id) {
continue
}

indexIDs[id] = true
}
}

refIndexIDs, err := store.readRefs()
if nil != err {
return
}

unreferencedIndexIDs := map[string]bool{}
for indexID := range indexIDs {
if !refIndexIDs[indexID] {
unreferencedIndexIDs[indexID] = true
}
}

referencedObjIDs := map[string]bool{}
for refID := range refIndexIDs {
index, getErr := store.GetIndex(refID)
if nil != getErr {
err = getErr
return
}

for _, fileID := range index.Files {
referencedObjIDs[fileID] = true
file, getFileErr := store.GetFile(fileID)
if nil != getFileErr {
err = getFileErr
return
}

for _, chunkID := range file.Chunks {
referencedObjIDs[chunkID] = true
}
}
}

unreferencedIDs := map[string]bool{}
for objID := range objIDs {
if !referencedObjIDs[objID] {
unreferencedIDs[objID] = true
}
}

ret = &PurgeStat{}
ret.Indexes = len(unreferencedIndexIDs)

for unreferencedID := range unreferencedIDs {
stat, statErr := store.Stat(unreferencedID)
if nil != statErr {
err = statErr
return
}

ret.Size += stat.Size()
ret.Objects++

logging.LogInfof("removing unreferenced object [%s]", unreferencedID)
//if err = store.Remove(unreferencedID); nil != err {
// return
//}
}
return
}

func (store *Store) readRefs() (ret map[string]bool, err error) {
ret = map[string]bool{}
refsDir := filepath.Join(store.Path, "refs")
if !gulu.File.IsDir(refsDir) {
return
}

err = filepath.Walk(refsDir, func(path string, info os.FileInfo, err error) error {
if nil != err {
return err
}

if info.IsDir() {
return nil
}

if 42 < info.Size() {
logging.LogWarnf("ref file [%s] is invalid", path)
return nil
}

data, err := filelock.ReadFile(path)
if nil != err {
return err
}

content := strings.TrimSpace(string(data))
if 40 != len(content) {
logging.LogWarnf("ref file [%s] is invalid", path)
return nil
}

ret[content] = true
return nil
})
return
}

func (store *Store) PutIndex(index *entity.Index) (err error) {
if "" == index.ID {
return errors.New("invalid id")
Expand Down Expand Up @@ -204,7 +355,7 @@ func (store *Store) GetChunk(id string) (ret *entity.Chunk, err error) {

func (store *Store) Remove(id string) (err error) {
_, file := store.AbsPath(id)
err = os.Remove(file)
err = filelock.Remove(file)
return
}

Expand Down

0 comments on commit 43494aa

Please sign in to comment.