diff --git a/cleaner_test.go b/cleaner_test.go index 78cc8e7600..4889225bf4 100644 --- a/cleaner_test.go +++ b/cleaner_test.go @@ -16,7 +16,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestArchiveCleaner(t *testing.T) { +func TestCleaner(t *testing.T) { dbs := make(map[string]*DB) defer func() { for _, db := range dbs { @@ -26,19 +26,14 @@ func TestArchiveCleaner(t *testing.T) { mem := vfs.NewMem() var memLog base.InMemLogger - opts := (&Options{ - Cleaner: ArchiveCleaner{}, - FS: vfs.WithLogging(mem, memLog.Infof), - WALDir: "wal", - }).WithFSDefaults() - + fs := vfs.WithLogging(mem, memLog.Infof) datadriven.RunTest(t, "testdata/cleaner", func(t *testing.T, td *datadriven.TestData) string { + memLog.Reset() switch td.Cmd { case "batch": if len(td.CmdArgs) != 1 { return "batch " } - memLog.Reset() d := dbs[td.CmdArgs[0].String()] b := d.NewBatch() if err := runBatchDefineCmd(td, b); err != nil { @@ -53,7 +48,6 @@ func TestArchiveCleaner(t *testing.T) { if len(td.CmdArgs) != 1 { return "compact " } - memLog.Reset() d := dbs[td.CmdArgs[0].String()] if err := d.Compact(nil, []byte("\xff"), false); err != nil { return err.Error() @@ -64,13 +58,24 @@ func TestArchiveCleaner(t *testing.T) { if len(td.CmdArgs) != 1 { return "flush " } - memLog.Reset() d := dbs[td.CmdArgs[0].String()] if err := d.Flush(); err != nil { return err.Error() } return memLog.String() + case "close": + if len(td.CmdArgs) != 1 { + return "close " + } + dbDir := td.CmdArgs[0].String() + d := dbs[dbDir] + if err := d.Close(); err != nil { + return err.Error() + } + delete(dbs, dbDir) + return memLog.String() + case "list": if len(td.CmdArgs) != 1 { return "list " @@ -83,19 +88,25 @@ func TestArchiveCleaner(t *testing.T) { return fmt.Sprintf("%s\n", strings.Join(paths, "\n")) case "open": - if len(td.CmdArgs) != 1 && len(td.CmdArgs) != 2 { - return "open [readonly]" + if len(td.CmdArgs) < 1 || len(td.CmdArgs) > 3 { + return "open [archive] [readonly]" } - opts.ReadOnly = false - if len(td.CmdArgs) == 2 { - if td.CmdArgs[1].String() != "readonly" { - return "open [readonly]" + dir := td.CmdArgs[0].String() + opts := (&Options{ + FS: fs, + WALDir: dir + "_wal", + }).WithFSDefaults() + + for i := 1; i < len(td.CmdArgs); i++ { + switch td.CmdArgs[i].String() { + case "readonly": + opts.ReadOnly = true + case "archive": + opts.Cleaner = ArchiveCleaner{} + default: + return "open [archive] [readonly]" } - opts.ReadOnly = true } - - memLog.Reset() - dir := td.CmdArgs[0].String() d, err := Open(dir, opts) if err != nil { return err.Error() @@ -103,6 +114,18 @@ func TestArchiveCleaner(t *testing.T) { dbs[dir] = d return memLog.String() + case "create-bogus-file": + if len(td.CmdArgs) != 1 { + return "create-bogus-file " + } + dst, err := fs.Create(td.CmdArgs[0].String()) + require.NoError(t, err) + _, err = dst.Write([]byte("bogus data")) + require.NoError(t, err) + require.NoError(t, dst.Sync()) + require.NoError(t, dst.Close()) + return memLog.String() + default: return fmt.Sprintf("unknown command: %s", td.Cmd) } diff --git a/compaction.go b/compaction.go index e0d820a6b8..ed52281f67 100644 --- a/compaction.go +++ b/compaction.go @@ -3268,20 +3268,29 @@ func (d *DB) scanObsoleteFiles(list []string) { } obsoleteOptions = append(obsoleteOptions, fi) case fileTypeTable: - // TODO(radu): use the objstorage provider to find obsolete tables instead. - if _, ok := liveFileNums[fileNum]; ok { + // Objects are handled through the objstorage provider below. + default: + // Don't delete files we don't know about. + } + } + + objects := d.objProvider.List() + for _, obj := range objects { + switch obj.FileType { + case fileTypeTable: + if _, ok := liveFileNums[obj.FileNum]; ok { continue } fileMeta := &fileMetadata{ - FileNum: fileNum, + FileNum: obj.FileNum, } - if stat, err := d.opts.FS.Stat(filename); err == nil { - fileMeta.Size = uint64(stat.Size()) + if size, err := d.objProvider.Size(obj); err == nil { + fileMeta.Size = uint64(size) } obsoleteTables = append(obsoleteTables, fileMeta) + default: - // Don't delete files we don't know about. - continue + // Ignore object types we don't know about. } } diff --git a/objstorage/provider.go b/objstorage/provider.go index f531fc1541..2a5561aea6 100644 --- a/objstorage/provider.go +++ b/objstorage/provider.go @@ -7,6 +7,7 @@ package objstorage import ( "io" "os" + "sort" "sync" "github.com/cockroachdb/errors" @@ -306,15 +307,6 @@ func (p *Provider) LinkOrCopyFromLocal( panic("unimplemented") } -// Path returns an internal, implementation-dependent path for the object. It is -// meant to be used for informational purposes (like logging). -func (p *Provider) Path(meta ObjectMetadata) string { - if !meta.IsShared() { - return p.vfsPath(meta.FileType, meta.FileNum) - } - panic("unimplemented") -} - // Lookup returns the metadata of an object that is already known to the Provider. // Does not perform any I/O. func (p *Provider) Lookup(fileType base.FileType, fileNum base.FileNum) (ObjectMetadata, error) { @@ -337,6 +329,37 @@ func (p *Provider) Lookup(fileType base.FileType, fileNum base.FileNum) (ObjectM return meta, nil } +// Path returns an internal, implementation-dependent path for the object. It is +// meant to be used for informational purposes (like logging). +func (p *Provider) Path(meta ObjectMetadata) string { + if !meta.IsShared() { + return p.vfsPath(meta.FileType, meta.FileNum) + } + panic("unimplemented") +} + +// Size returns the size of the object. +func (p *Provider) Size(meta ObjectMetadata) (int64, error) { + if !meta.IsShared() { + return p.vfsSize(meta.FileType, meta.FileNum) + } + panic("unimplemented") +} + +// List returns the objects currently known to the provider. Does not perform any I/O. +func (p *Provider) List() []ObjectMetadata { + p.mu.RLock() + defer p.mu.RUnlock() + res := make([]ObjectMetadata, len(p.mu.knownObjects)) + for _, meta := range p.mu.knownObjects { + res = append(res, meta) + } + sort.Slice(res, func(i, j int) bool { + return res[i].FileNum < res[j].FileNum + }) + return res +} + func (p *Provider) addMetadata(meta ObjectMetadata) { p.mu.Lock() defer p.mu.Unlock() diff --git a/objstorage/vfs.go b/objstorage/vfs.go index 013b96cbdd..c91bef1515 100644 --- a/objstorage/vfs.go +++ b/objstorage/vfs.go @@ -63,3 +63,12 @@ func (p *Provider) vfsFindExisting(ls []string) []ObjectMetadata { } return res } + +func (p *Provider) vfsSize(fileType base.FileType, fileNum base.FileNum) (int64, error) { + filename := p.vfsPath(fileType, fileNum) + stat, err := p.st.FS.Stat(filename) + if err != nil { + return 0, err + } + return stat.Size(), nil +} diff --git a/testdata/cleaner b/testdata/cleaner index 886edeee4b..753cdce5cc 100644 --- a/testdata/cleaner +++ b/testdata/cleaner @@ -1,9 +1,10 @@ -open db +# Test archive cleaner. +open db archive ---- mkdir-all: db 0755 -mkdir-all: wal 0755 +mkdir-all: db_wal 0755 open-dir: db -open-dir: wal +open-dir: db_wal lock: db/LOCK open-dir: db open-dir: db @@ -17,8 +18,8 @@ rename: db/temporary.000001.dbtmp -> db/CURRENT sync: db open-dir: db sync: db/MANIFEST-000001 -create: wal/000002.log -sync: wal +create: db_wal/000002.log +sync: db_wal create: db/temporary.000003.dbtmp sync: db/temporary.000003.dbtmp close: db/temporary.000003.dbtmp @@ -30,40 +31,40 @@ set a 1 set b 2 set c 3 ---- -sync-data: wal/000002.log +sync-data: db_wal/000002.log flush db ---- -sync-data: wal/000002.log -close: wal/000002.log -create: wal/000004.log -sync: wal +sync-data: db_wal/000002.log +close: db_wal/000002.log +create: db_wal/000004.log +sync: db_wal create: db/000005.sst sync-data: db/000005.sst close: db/000005.sst sync: db sync: db/MANIFEST-000001 -mkdir-all: wal/archive 0755 -rename: wal/000002.log -> wal/archive/000002.log +mkdir-all: db_wal/archive 0755 +rename: db_wal/000002.log -> db_wal/archive/000002.log batch db set d 4 ---- -sync-data: wal/000004.log +sync-data: db_wal/000004.log compact db ---- -sync-data: wal/000004.log -close: wal/000004.log -create: wal/000006.log -sync: wal +sync-data: db_wal/000004.log +close: db_wal/000004.log +create: db_wal/000006.log +sync: db_wal create: db/000007.sst sync-data: db/000007.sst close: db/000007.sst sync: db sync: db/MANIFEST-000001 -mkdir-all: wal/archive 0755 -rename: wal/000004.log -> wal/archive/000004.log +mkdir-all: db_wal/archive 0755 +rename: db_wal/000004.log -> db_wal/archive/000004.log create: db/000008.sst sync-data: db/000008.sst close: db/000008.sst @@ -83,7 +84,7 @@ MANIFEST-000001 OPTIONS-000003 archive -list wal +list db_wal ---- 000006.log archive @@ -93,7 +94,118 @@ list db/archive 000005.sst 000007.sst -list wal/archive +list db_wal/archive ---- 000002.log 000004.log + +# Test cleanup of extra sstables on open. +open db1 +---- +mkdir-all: db1 0755 +mkdir-all: db1_wal 0755 +open-dir: db1 +open-dir: db1_wal +lock: db1/LOCK +open-dir: db1 +open-dir: db1 +create: db1/MANIFEST-000001 +sync: db1/MANIFEST-000001 +remove: db1/temporary.000001.dbtmp +create: db1/temporary.000001.dbtmp +sync: db1/temporary.000001.dbtmp +close: db1/temporary.000001.dbtmp +rename: db1/temporary.000001.dbtmp -> db1/CURRENT +sync: db1 +open-dir: db1 +sync: db1/MANIFEST-000001 +create: db1_wal/000002.log +sync: db1_wal +create: db1/temporary.000003.dbtmp +sync: db1/temporary.000003.dbtmp +close: db1/temporary.000003.dbtmp +rename: db1/temporary.000003.dbtmp -> db1/OPTIONS-000003 +sync: db1 + +batch db1 +set a 1 +set b 2 +set c 3 +---- +sync-data: db1_wal/000002.log + +flush db1 +---- +sync-data: db1_wal/000002.log +close: db1_wal/000002.log +create: db1_wal/000004.log +sync: db1_wal +create: db1/000005.sst +sync-data: db1/000005.sst +close: db1/000005.sst +sync: db1 +sync: db1/MANIFEST-000001 + +close db1 +---- +close: db1 +sync-data: db1_wal/000004.log +close: db1_wal/000004.log +close: db1/MANIFEST-000001 +close: db1 +close: db1 +close: db1_wal +close: db1 + +create-bogus-file db1/000123.sst +---- +create: db1/000123.sst +sync: db1/000123.sst +close: db1/000123.sst + +create-bogus-file db1/000456.sst +---- +create: db1/000456.sst +sync: db1/000456.sst +close: db1/000456.sst + +open db1 +---- +mkdir-all: db1 0755 +mkdir-all: db1_wal 0755 +open-dir: db1 +open-dir: db1_wal +lock: db1/LOCK +open-dir: db1 +open-dir: db1 +open-dir: db1 +sync: db1 +create: db1/MANIFEST-000458 +sync: db1/MANIFEST-000458 +remove: db1/temporary.000458.dbtmp +create: db1/temporary.000458.dbtmp +sync: db1/temporary.000458.dbtmp +close: db1/temporary.000458.dbtmp +rename: db1/temporary.000458.dbtmp -> db1/CURRENT +sync: db1 +create: db1_wal/000457.log +sync: db1_wal +create: db1/temporary.000459.dbtmp +sync: db1/temporary.000459.dbtmp +close: db1/temporary.000459.dbtmp +rename: db1/temporary.000459.dbtmp -> db1/OPTIONS-000459 +sync: db1 +remove: db1_wal/000002.log +remove: db1_wal/000004.log +remove: db1/000123.sst +remove: db1/000456.sst +remove: db1/OPTIONS-000003 + +list db1 +---- +000005.sst +CURRENT +LOCK +MANIFEST-000001 +MANIFEST-000458 +OPTIONS-000459