Skip to content

Commit

Permalink
dump: skip files in trash when backup metadata (#4479)
Browse files Browse the repository at this point in the history
  • Loading branch information
davies authored Mar 12, 2024
1 parent c4a7bd9 commit 4d97cdb
Show file tree
Hide file tree
Showing 17 changed files with 46 additions and 25 deletions.
6 changes: 5 additions & 1 deletion cmd/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ Details: https://juicefs.com/docs/community/metadata_dump_load`,
Name: "fast",
Usage: "speedup dump by load all metadata into memory",
},
&cli.BoolFlag{
Name: "skip-trash",
Usage: "skip files in trash",
},
},
}
}
Expand Down Expand Up @@ -105,7 +109,7 @@ func dump(ctx *cli.Context) (err error) {
if st := m.Chroot(meta.Background, metaConf.Subdir); st != 0 {
return st
}
if err := m.DumpMeta(w, 1, ctx.Bool("keep-secret-key"), ctx.Bool("fast")); err != nil {
if err := m.DumpMeta(w, 1, ctx.Bool("keep-secret-key"), ctx.Bool("fast"), ctx.Bool("skip-trash")); err != nil {
return err
}
logger.Infof("Dump metadata into %s succeed", dst)
Expand Down
4 changes: 4 additions & 0 deletions cmd/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ func metaFlags() []cli.Flag {
Value: "3600",
Usage: "interval (in seconds) to automatically backup metadata in the object storage (0 means disable backup)",
},
&cli.BoolFlag{
Name: "backup-skip-trash",
Usage: "skip files in trash when backup metadata",
},
&cli.StringFlag{
Name: "heartbeat",
Value: "12",
Expand Down
21 changes: 11 additions & 10 deletions cmd/mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,15 +246,16 @@ func expandPathForEmbedded(addr string) string {

func getVfsConf(c *cli.Context, metaConf *meta.Config, format *meta.Format, chunkConf *chunk.Config) *vfs.Config {
cfg := &vfs.Config{
Meta: metaConf,
Format: *format,
Version: version.Version(),
Chunk: chunkConf,
BackupMeta: duration(c.String("backup-meta")),
Port: &vfs.Port{DebugAgent: debugAgent, PyroscopeAddr: c.String("pyroscope")},
PrefixInternal: c.Bool("prefix-internal"),
Pid: os.Getpid(),
PPid: os.Getppid(),
Meta: metaConf,
Format: *format,
Version: version.Version(),
Chunk: chunkConf,
BackupMeta: duration(c.String("backup-meta")),
BackupSkipTrash: c.Bool("backup-skip-trash"),
Port: &vfs.Port{DebugAgent: debugAgent, PyroscopeAddr: c.String("pyroscope")},
PrefixInternal: c.Bool("prefix-internal"),
Pid: os.Getpid(),
PPid: os.Getppid(),
}
skip_check := os.Getenv("SKIP_BACKUP_META_CHECK") == "true"
if !skip_check && cfg.BackupMeta > 0 && cfg.BackupMeta < time.Minute*5 {
Expand Down Expand Up @@ -362,7 +363,7 @@ func initBackgroundTasks(c *cli.Context, vfsConf *vfs.Config, metaConf *meta.Con
if !metaConf.ReadOnly && !metaConf.NoBGJob && vfsConf.BackupMeta > 0 {
registerer.MustRegister(vfs.LastBackupTimeG)
registerer.MustRegister(vfs.LastBackupDurationG)
go vfs.Backup(m, blob, vfsConf.BackupMeta)
go vfs.Backup(m, blob, vfsConf.BackupMeta, vfsConf.BackupSkipTrash)
}
if !c.Bool("no-usage-report") {
go usage.ReportUsage(m, version.Version())
Expand Down
1 change: 1 addition & 0 deletions docs/en/deployment/hadoop_java_sdk.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ Please refer to the following table to set the relevant parameters of the JuiceF
| `juicefs.no-usage-report` | `false` | Whether disable usage reporting. JuiceFS only collects anonymous usage data (e.g. version number), no user or any sensitive data will be collected. |
| `juicefs.no-bgjob` | `false` | Disable background jobs (clean-up, backup, etc.) |
| `juicefs.backup-meta` | 3600 | Interval (in seconds) to automatically backup metadata in the object storage (0 means disable backup) |
|`juicefs.backup-skip-trash`| `false` | Skip files and directories in trash when backup metadata. |
| `juicefs.heartbeat` | 12 | Heartbeat interval (in seconds) between client and metadata engine. It's recommended that all clients use the same value. |

#### Multiple file systems configuration
Expand Down
3 changes: 3 additions & 0 deletions docs/en/reference/command_reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ juicefs dump redis://localhost sub-meta-dump.json --subdir /dir/in/jfs
|`FILE`|Export file path, if not specified, it will be exported to standard output. If the filename ends with `.gz`, it will be automatically compressed.|
|`--subdir=path`|Only export metadata for the specified subdirectory.|
|`--keep-secret-key` <VersionAdd>1.1</VersionAdd> |Export object storage authentication information, the default is `false`. Since it is exported in plain text, pay attention to data security when using it. If the export file does not contain object storage authentication information, you need to use [`juicefs config`](#config) to reconfigure object storage authentication information after the subsequent import is completed.|
|`--fast` <VersionAdd>1.2</VersionAdd>|Use more memory to speedup dump.|
|`--skip-trash` <VersionAdd>1.2</VersionAdd>|Skip files and directories in trash.|

### `juicefs load` {#load}

Expand Down Expand Up @@ -628,6 +630,7 @@ juicefs mount redis://localhost /mnt/jfs --backup-meta 0
|-|-|
|`--subdir=value`|mount a sub-directory as root (default: "")|
|`--backup-meta=3600`|interval (in seconds) to automatically backup metadata in the object storage (0 means disable backup) (default: "3600")|
|`--backup-skip-trash` <VersionAdd>1.2</VersionAdd>|skip files and directories in trash when backup metadata.|
|`--heartbeat=12`|interval (in seconds) to send heartbeat; it's recommended that all clients use the same heartbeat value (default: "12")|
|`--read-only`|allow lookup/read operations only (default: false)|
|`--no-bgjob`|Disable background jobs, default to false, which means clients by default carry out background jobs, including:<br/><ul><li>Clean up expired files in Trash (look for `cleanupDeletedFiles`, `cleanupTrash` in [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go))</li><li>Delete slices that's not referenced (look for `cleanupSlices` in [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go))</li><li>Clean up stale client sessions (look for `CleanStaleSessions` in [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go))</li></ul>Note that compaction isn't affected by this option, it happens automatically with file reads and writes, client will check if compaction is in need, and run in background (take Redis for example, look for `compactChunk` in [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/redis.go)).|
Expand Down
1 change: 1 addition & 0 deletions docs/zh_cn/deployment/hadoop_java_sdk.md
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ make win
| `juicefs.file.checksum` | `false` | DistCp 使用 `-update` 参数时,是否计算文件 Checksum |
| `juicefs.no-bgjob` | `false` | 是否关闭后台任务(清理、备份等) |
| `juicefs.backup-meta` | 3600 | 自动将 JuiceFS 元数据备份到对象存储间隔(单位:秒),设置为 0 关闭自动备份 |
|`juicefs.backup-skip-trash`| `false` | 备份元数据时忽略回收站中的文件和目录。 |
| `juicefs.heartbeat` | 12 | 客户端和元数据引擎之间的心跳间隔(单位:秒),建议所有客户端都设置一样 |

#### 多文件系统配置
Expand Down
3 changes: 3 additions & 0 deletions docs/zh_cn/reference/command_reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ juicefs dump redis://localhost sub-meta-dump.json --subdir /dir/in/jfs
|`FILE`|导出文件路径,如果不指定,则会导出到标准输出。如果文件名以 `.gz` 结尾,将会自动压缩。|
|`--subdir=path`|只导出指定子目录的元数据。|
|`--keep-secret-key` <VersionAdd>1.1</VersionAdd>|导出对象存储认证信息,默认为 `false`。由于是明文导出,使用时注意数据安全。如果导出文件不包含对象存储认证信息,后续的导入完成后,需要用 [`juicefs config`](#config) 重新配置对象存储认证信息。|
|`--fast` <VersionAdd>1.2</VersionAdd>|使用更多内存来加速导出。|
|`--skip-trash` <VersionAdd>1.2</VersionAdd>|跳过回收站中的文件和目录。|

### `juicefs load` {#load}

Expand Down Expand Up @@ -628,6 +630,7 @@ juicefs mount redis://localhost /mnt/jfs --backup-meta 0
|-|-|
|`--subdir=value`|挂载指定的子目录,默认挂载整个文件系统。|
|`--backup-meta=3600`|自动备份元数据到对象存储的间隔时间;单位秒,默认 3600,设为 0 表示不备份。|
|`--backup-skip-trash` <VersionAdd>1.2</VersionAdd>|备份元数据时跳过回收站中的文件和目录。|
|`--heartbeat=12`|发送心跳的间隔(单位秒),建议所有客户端使用相同的心跳值 (默认:12)|
|`--read-only`|启用只读模式挂载。|
|`--no-bgjob`|禁用后台任务,默认为 false,也就是说客户端会默认运行后台任务。后台任务包含:<br/><ul><li>清理回收站中过期的文件(在 [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go) 中搜索 `cleanupDeletedFiles` 和 `cleanupTrash`)</li><li>清理引用计数为 0 的 Slice(在 [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go) 中搜索 `cleanupSlices`)</li><li>清理过期的客户端会话(在 [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go) 中搜索 `CleanStaleSessions`)</li></ul>特别地,与[企业版](https://juicefs.com/docs/zh/cloud/guide/background-job)不同,社区版碎片合并(Compaction)不受该选项的影响,而是随着文件读写操作,自动判断是否需要合并,然后异步执行(以 Redis 为例,在 [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/redis.go) 中搜索 `compactChunk`)|
Expand Down
2 changes: 1 addition & 1 deletion pkg/meta/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ type Meta interface {
HandleQuota(ctx Context, cmd uint8, dpath string, quotas map[string]*Quota, strict, repair bool) error

// Dump the tree under root, which may be modified by checkRoot
DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) error
DumpMeta(w io.Writer, root Ino, keepSecret, fast, skipTrash bool) error
LoadMeta(r io.Reader) error

// getBase return the base engine.
Expand Down
2 changes: 1 addition & 1 deletion pkg/meta/load_dump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ func testDump(t *testing.T, m Meta, root Ino, expect, result string) {
if _, err = m.Load(true); err != nil {
t.Fatalf("load setting: %s", err)
}
if err = m.DumpMeta(fp, root, false, true); err != nil {
if err = m.DumpMeta(fp, root, false, true, false); err != nil {
t.Fatalf("dump meta: %s", err)
}
cmd := exec.Command("diff", expect, result)
Expand Down
4 changes: 2 additions & 2 deletions pkg/meta/redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -3890,7 +3890,7 @@ func (m *redisMeta) dumpDir(inode Ino, tree *DumpedEntry, bw *bufio.Writer, dept
return nil
}

func (m *redisMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err error) {
func (m *redisMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast, skipTrash bool) (err error) {
defer func() {
if p := recover(); p != nil {
if e, ok := p.(error); ok {
Expand Down Expand Up @@ -4011,7 +4011,7 @@ func (m *redisMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err
if err = m.dumpDir(root, tree, bw, 1, bar); err != nil {
return err
}
if root == RootInode {
if root == RootInode && !skipTrash {
trash := &DumpedEntry{
Name: "Trash",
Attr: &DumpedAttr{
Expand Down
4 changes: 2 additions & 2 deletions pkg/meta/sql.go
Original file line number Diff line number Diff line change
Expand Up @@ -3577,7 +3577,7 @@ func (m *dbMeta) makeSnap(ses *xorm.Session, bar *utils.Bar) error {
return nil
}

func (m *dbMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err error) {
func (m *dbMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast, skipTrash bool) (err error) {
defer func() {
if p := recover(); p != nil {
if e, ok := p.(error); ok {
Expand Down Expand Up @@ -3605,7 +3605,7 @@ func (m *dbMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err err
if tree, err = m.dumpEntry(s, root, TypeDirectory); err != nil {
return err
}
if root == 1 {
if root == 1 && !skipTrash {
if trash, err = m.dumpEntry(s, TrashInode, TypeDirectory); err != nil {
return err
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/meta/tkv.go
Original file line number Diff line number Diff line change
Expand Up @@ -2999,7 +2999,7 @@ func (m *kvMeta) dumpDir(inode Ino, tree *DumpedEntry, bw *bufio.Writer, depth i
return nil
}

func (m *kvMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err error) {
func (m *kvMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast, skipTrash bool) (err error) {
defer func() {
if p := recover(); p != nil {
debug.PrintStack()
Expand Down Expand Up @@ -3115,7 +3115,7 @@ func (m *kvMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err err
if err = m.dumpEntry(root, tree); err != nil {
return err
}
if root == 1 {
if root == 1 && !skipTrash {
trash = &DumpedEntry{
Attr: &DumpedAttr{
Inode: TrashInode,
Expand Down
8 changes: 4 additions & 4 deletions pkg/vfs/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ var (
)

// Backup metadata periodically in the object storage
func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration) {
func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration, skipTrash bool) {
ctx := meta.Background
key := "lastBackup"
for {
Expand Down Expand Up @@ -77,7 +77,7 @@ func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration) {
}
go cleanupBackups(blob, now)
logger.Debugf("backup metadata started")
if err = backup(m, blob, now); err == nil {
if err = backup(m, blob, now, skipTrash); err == nil {
LastBackupTimeG.Set(float64(now.UnixNano()) / 1e9)
logger.Infof("backup metadata succeed, used %s", time.Since(now))
} else {
Expand All @@ -88,7 +88,7 @@ func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration) {
}
}

func backup(m meta.Meta, blob object.ObjectStorage, now time.Time) error {
func backup(m meta.Meta, blob object.ObjectStorage, now time.Time, skipTrash bool) error {
name := "dump-" + now.UTC().Format("2006-01-02-150405") + ".json.gz"
fp, err := os.CreateTemp("", "juicefs-meta-*")
if err != nil {
Expand All @@ -97,7 +97,7 @@ func backup(m meta.Meta, blob object.ObjectStorage, now time.Time) error {
defer os.Remove(fp.Name())
defer fp.Close()
zw := gzip.NewWriter(fp)
err = m.DumpMeta(zw, 0, false, false) // force dump the whole tree
err = m.DumpMeta(zw, 0, false, false, skipTrash) // force dump the whole tree
_ = zw.Close()
if err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion pkg/vfs/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func TestRotate(t *testing.T) {

func TestBackup(t *testing.T) {
v, blob := createTestVFS()
go Backup(v.Meta, blob, time.Millisecond*100)
go Backup(v.Meta, blob, time.Millisecond*100, false)
time.Sleep(time.Millisecond * 100)

blob = object.WithPrefix(blob, "meta/")
Expand Down
1 change: 1 addition & 0 deletions pkg/vfs/vfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ type Config struct {
DirEntryTimeout time.Duration
EntryTimeout time.Duration
BackupMeta time.Duration
BackupSkipTrash bool
FastResolve bool `json:",omitempty"`
AccessLog string `json:",omitempty"`
PrefixInternal bool
Expand Down
4 changes: 3 additions & 1 deletion sdk/java/libjfs/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ type javaConf struct {
NoBGJob bool `json:"noBGJob"`
OpenCache float64 `json:"openCache"`
BackupMeta int64 `json:"backupMeta"`
BackupSkipTrash bool `json:"backupSkipTrash"`
Heartbeat int `json:"heartbeat"`
CacheDir string `json:"cacheDir"`
CacheSize int64 `json:"cacheSize"`
Expand Down Expand Up @@ -576,9 +577,10 @@ func jfs_init(cname, jsonConf, user, group, superuser, supergroup *C.char) int64
AccessLog: jConf.AccessLog,
FastResolve: jConf.FastResolve,
BackupMeta: time.Second * time.Duration(jConf.BackupMeta),
BackupSkipTrash: jConf.BackupSkipTrash,
}
if !jConf.ReadOnly && !jConf.NoSession && !jConf.NoBGJob && conf.BackupMeta > 0 {
go vfs.Backup(m, blob, conf.BackupMeta)
go vfs.Backup(m, blob, conf.BackupMeta, conf.BackupSkipTrash)
}
if !jConf.NoUsageReport && !jConf.NoSession {
go usage.ReportUsage(m, "java-sdk "+version.Version())
Expand Down
1 change: 1 addition & 0 deletions sdk/java/src/main/java/io/juicefs/JuiceFileSystemImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ public void initialize(URI uri, Configuration conf) throws IOException {
obj.put("cacheSize", Integer.valueOf(getConf(conf, "cache-size", "100")));
obj.put("openCache", Float.valueOf(getConf(conf, "open-cache", "0.0")));
obj.put("backupMeta", Integer.valueOf(getConf(conf, "backup-meta", "3600")));
obj.put("backupSkipTrash", Boolean.valueOf(getConf(conf, "backup-skip-trash", "false")));
obj.put("heartbeat", Integer.valueOf(getConf(conf, "heartbeat", "12")));
obj.put("attrTimeout", Float.valueOf(getConf(conf, "attr-cache", "0.0")));
obj.put("entryTimeout", Float.valueOf(getConf(conf, "entry-cache", "0.0")));
Expand Down

0 comments on commit 4d97cdb

Please sign in to comment.