diff --git a/cmd/dump.go b/cmd/dump.go index baf57ac17983..a56b2dabeebe 100644 --- a/cmd/dump.go +++ b/cmd/dump.go @@ -59,6 +59,10 @@ Details: https://juicefs.com/docs/community/metadata_dump_load`, Name: "fast", Usage: "speedup dump by load all metadata into memory", }, + &cli.BoolFlag{ + Name: "skip-trash", + Usage: "skip files in trash", + }, }, } } @@ -105,7 +109,7 @@ func dump(ctx *cli.Context) (err error) { if st := m.Chroot(meta.Background, metaConf.Subdir); st != 0 { return st } - if err := m.DumpMeta(w, 1, ctx.Bool("keep-secret-key"), ctx.Bool("fast")); err != nil { + if err := m.DumpMeta(w, 1, ctx.Bool("keep-secret-key"), ctx.Bool("fast"), ctx.Bool("skip-trash")); err != nil { return err } logger.Infof("Dump metadata into %s succeed", dst) diff --git a/cmd/flags.go b/cmd/flags.go index a9b624b94115..d1dd26edafd5 100644 --- a/cmd/flags.go +++ b/cmd/flags.go @@ -245,6 +245,10 @@ func metaFlags() []cli.Flag { Value: "3600", Usage: "interval (in seconds) to automatically backup metadata in the object storage (0 means disable backup)", }, + &cli.BoolFlag{ + Name: "backup-skip-trash", + Usage: "skip files in trash when backup metadata", + }, &cli.StringFlag{ Name: "heartbeat", Value: "12", diff --git a/cmd/mount.go b/cmd/mount.go index 5468a881196c..e981a278b9da 100644 --- a/cmd/mount.go +++ b/cmd/mount.go @@ -246,15 +246,16 @@ func expandPathForEmbedded(addr string) string { func getVfsConf(c *cli.Context, metaConf *meta.Config, format *meta.Format, chunkConf *chunk.Config) *vfs.Config { cfg := &vfs.Config{ - Meta: metaConf, - Format: *format, - Version: version.Version(), - Chunk: chunkConf, - BackupMeta: duration(c.String("backup-meta")), - Port: &vfs.Port{DebugAgent: debugAgent, PyroscopeAddr: c.String("pyroscope")}, - PrefixInternal: c.Bool("prefix-internal"), - Pid: os.Getpid(), - PPid: os.Getppid(), + Meta: metaConf, + Format: *format, + Version: version.Version(), + Chunk: chunkConf, + BackupMeta: duration(c.String("backup-meta")), + BackupSkipTrash: c.Bool("backup-skip-trash"), + Port: &vfs.Port{DebugAgent: debugAgent, PyroscopeAddr: c.String("pyroscope")}, + PrefixInternal: c.Bool("prefix-internal"), + Pid: os.Getpid(), + PPid: os.Getppid(), } skip_check := os.Getenv("SKIP_BACKUP_META_CHECK") == "true" if !skip_check && cfg.BackupMeta > 0 && cfg.BackupMeta < time.Minute*5 { @@ -362,7 +363,7 @@ func initBackgroundTasks(c *cli.Context, vfsConf *vfs.Config, metaConf *meta.Con if !metaConf.ReadOnly && !metaConf.NoBGJob && vfsConf.BackupMeta > 0 { registerer.MustRegister(vfs.LastBackupTimeG) registerer.MustRegister(vfs.LastBackupDurationG) - go vfs.Backup(m, blob, vfsConf.BackupMeta) + go vfs.Backup(m, blob, vfsConf.BackupMeta, vfsConf.BackupSkipTrash) } if !c.Bool("no-usage-report") { go usage.ReportUsage(m, version.Version()) diff --git a/docs/en/deployment/hadoop_java_sdk.md b/docs/en/deployment/hadoop_java_sdk.md index 107b9f434276..fe714e233388 100644 --- a/docs/en/deployment/hadoop_java_sdk.md +++ b/docs/en/deployment/hadoop_java_sdk.md @@ -209,6 +209,7 @@ Please refer to the following table to set the relevant parameters of the JuiceF | `juicefs.no-usage-report` | `false` | Whether disable usage reporting. JuiceFS only collects anonymous usage data (e.g. version number), no user or any sensitive data will be collected. | | `juicefs.no-bgjob` | `false` | Disable background jobs (clean-up, backup, etc.) | | `juicefs.backup-meta` | 3600 | Interval (in seconds) to automatically backup metadata in the object storage (0 means disable backup) | +|`juicefs.backup-skip-trash`| `false` | Skip files and directories in trash when backup metadata. | | `juicefs.heartbeat` | 12 | Heartbeat interval (in seconds) between client and metadata engine. It's recommended that all clients use the same value. | #### Multiple file systems configuration diff --git a/docs/en/reference/command_reference.md b/docs/en/reference/command_reference.md index 0cc881848b33..d7ca95ae75d7 100644 --- a/docs/en/reference/command_reference.md +++ b/docs/en/reference/command_reference.md @@ -384,6 +384,8 @@ juicefs dump redis://localhost sub-meta-dump.json --subdir /dir/in/jfs |`FILE`|Export file path, if not specified, it will be exported to standard output. If the filename ends with `.gz`, it will be automatically compressed.| |`--subdir=path`|Only export metadata for the specified subdirectory.| |`--keep-secret-key` 1.1 |Export object storage authentication information, the default is `false`. Since it is exported in plain text, pay attention to data security when using it. If the export file does not contain object storage authentication information, you need to use [`juicefs config`](#config) to reconfigure object storage authentication information after the subsequent import is completed.| +|`--fast` 1.2|Use more memory to speedup dump.| +|`--skip-trash` 1.2|Skip files and directories in trash.| ### `juicefs load` {#load} @@ -628,6 +630,7 @@ juicefs mount redis://localhost /mnt/jfs --backup-meta 0 |-|-| |`--subdir=value`|mount a sub-directory as root (default: "")| |`--backup-meta=3600`|interval (in seconds) to automatically backup metadata in the object storage (0 means disable backup) (default: "3600")| +|`--backup-skip-trash` 1.2|skip files and directories in trash when backup metadata.| |`--heartbeat=12`|interval (in seconds) to send heartbeat; it's recommended that all clients use the same heartbeat value (default: "12")| |`--read-only`|allow lookup/read operations only (default: false)| |`--no-bgjob`|Disable background jobs, default to false, which means clients by default carry out background jobs, including:
Note that compaction isn't affected by this option, it happens automatically with file reads and writes, client will check if compaction is in need, and run in background (take Redis for example, look for `compactChunk` in [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/redis.go)).| diff --git a/docs/zh_cn/deployment/hadoop_java_sdk.md b/docs/zh_cn/deployment/hadoop_java_sdk.md index 88a51bb7590c..84487881b71b 100644 --- a/docs/zh_cn/deployment/hadoop_java_sdk.md +++ b/docs/zh_cn/deployment/hadoop_java_sdk.md @@ -211,6 +211,7 @@ make win | `juicefs.file.checksum` | `false` | DistCp 使用 `-update` 参数时,是否计算文件 Checksum | | `juicefs.no-bgjob` | `false` | 是否关闭后台任务(清理、备份等) | | `juicefs.backup-meta` | 3600 | 自动将 JuiceFS 元数据备份到对象存储间隔(单位:秒),设置为 0 关闭自动备份 | +|`juicefs.backup-skip-trash`| `false` | 备份元数据时忽略回收站中的文件和目录。 | | `juicefs.heartbeat` | 12 | 客户端和元数据引擎之间的心跳间隔(单位:秒),建议所有客户端都设置一样 | #### 多文件系统配置 diff --git a/docs/zh_cn/reference/command_reference.md b/docs/zh_cn/reference/command_reference.md index a7361df16c64..1b8ae35245f7 100644 --- a/docs/zh_cn/reference/command_reference.md +++ b/docs/zh_cn/reference/command_reference.md @@ -384,6 +384,8 @@ juicefs dump redis://localhost sub-meta-dump.json --subdir /dir/in/jfs |`FILE`|导出文件路径,如果不指定,则会导出到标准输出。如果文件名以 `.gz` 结尾,将会自动压缩。| |`--subdir=path`|只导出指定子目录的元数据。| |`--keep-secret-key` 1.1|导出对象存储认证信息,默认为 `false`。由于是明文导出,使用时注意数据安全。如果导出文件不包含对象存储认证信息,后续的导入完成后,需要用 [`juicefs config`](#config) 重新配置对象存储认证信息。| +|`--fast` 1.2|使用更多内存来加速导出。| +|`--skip-trash` 1.2|跳过回收站中的文件和目录。| ### `juicefs load` {#load} @@ -628,6 +630,7 @@ juicefs mount redis://localhost /mnt/jfs --backup-meta 0 |-|-| |`--subdir=value`|挂载指定的子目录,默认挂载整个文件系统。| |`--backup-meta=3600`|自动备份元数据到对象存储的间隔时间;单位秒,默认 3600,设为 0 表示不备份。| +|`--backup-skip-trash` 1.2|备份元数据时跳过回收站中的文件和目录。| |`--heartbeat=12`|发送心跳的间隔(单位秒),建议所有客户端使用相同的心跳值 (默认:12)| |`--read-only`|启用只读模式挂载。| |`--no-bgjob`|禁用后台任务,默认为 false,也就是说客户端会默认运行后台任务。后台任务包含:
特别地,与[企业版](https://juicefs.com/docs/zh/cloud/guide/background-job)不同,社区版碎片合并(Compaction)不受该选项的影响,而是随着文件读写操作,自动判断是否需要合并,然后异步执行(以 Redis 为例,在 [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/redis.go) 中搜索 `compactChunk`)| diff --git a/pkg/meta/interface.go b/pkg/meta/interface.go index 7ccfcab01311..ba5869f45e66 100644 --- a/pkg/meta/interface.go +++ b/pkg/meta/interface.go @@ -468,7 +468,7 @@ type Meta interface { HandleQuota(ctx Context, cmd uint8, dpath string, quotas map[string]*Quota, strict, repair bool) error // Dump the tree under root, which may be modified by checkRoot - DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) error + DumpMeta(w io.Writer, root Ino, keepSecret, fast, skipTrash bool) error LoadMeta(r io.Reader) error // getBase return the base engine. diff --git a/pkg/meta/load_dump_test.go b/pkg/meta/load_dump_test.go index 303ba33bce8b..820f53fc7759 100644 --- a/pkg/meta/load_dump_test.go +++ b/pkg/meta/load_dump_test.go @@ -248,7 +248,7 @@ func testDump(t *testing.T, m Meta, root Ino, expect, result string) { if _, err = m.Load(true); err != nil { t.Fatalf("load setting: %s", err) } - if err = m.DumpMeta(fp, root, false, true); err != nil { + if err = m.DumpMeta(fp, root, false, true, false); err != nil { t.Fatalf("dump meta: %s", err) } cmd := exec.Command("diff", expect, result) diff --git a/pkg/meta/redis.go b/pkg/meta/redis.go index 0a0cfbe2e0a9..2823859f3b67 100644 --- a/pkg/meta/redis.go +++ b/pkg/meta/redis.go @@ -3890,7 +3890,7 @@ func (m *redisMeta) dumpDir(inode Ino, tree *DumpedEntry, bw *bufio.Writer, dept return nil } -func (m *redisMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err error) { +func (m *redisMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast, skipTrash bool) (err error) { defer func() { if p := recover(); p != nil { if e, ok := p.(error); ok { @@ -4011,7 +4011,7 @@ func (m *redisMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err if err = m.dumpDir(root, tree, bw, 1, bar); err != nil { return err } - if root == RootInode { + if root == RootInode && !skipTrash { trash := &DumpedEntry{ Name: "Trash", Attr: &DumpedAttr{ diff --git a/pkg/meta/sql.go b/pkg/meta/sql.go index 7cbd3b54c554..2a9b50e20ca3 100644 --- a/pkg/meta/sql.go +++ b/pkg/meta/sql.go @@ -3577,7 +3577,7 @@ func (m *dbMeta) makeSnap(ses *xorm.Session, bar *utils.Bar) error { return nil } -func (m *dbMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err error) { +func (m *dbMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast, skipTrash bool) (err error) { defer func() { if p := recover(); p != nil { if e, ok := p.(error); ok { @@ -3605,7 +3605,7 @@ func (m *dbMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err err if tree, err = m.dumpEntry(s, root, TypeDirectory); err != nil { return err } - if root == 1 { + if root == 1 && !skipTrash { if trash, err = m.dumpEntry(s, TrashInode, TypeDirectory); err != nil { return err } diff --git a/pkg/meta/tkv.go b/pkg/meta/tkv.go index 50c68fc64f07..fe45a9f6a651 100644 --- a/pkg/meta/tkv.go +++ b/pkg/meta/tkv.go @@ -2999,7 +2999,7 @@ func (m *kvMeta) dumpDir(inode Ino, tree *DumpedEntry, bw *bufio.Writer, depth i return nil } -func (m *kvMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err error) { +func (m *kvMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast, skipTrash bool) (err error) { defer func() { if p := recover(); p != nil { debug.PrintStack() @@ -3115,7 +3115,7 @@ func (m *kvMeta) DumpMeta(w io.Writer, root Ino, keepSecret, fast bool) (err err if err = m.dumpEntry(root, tree); err != nil { return err } - if root == 1 { + if root == 1 && !skipTrash { trash = &DumpedEntry{ Attr: &DumpedAttr{ Inode: TrashInode, diff --git a/pkg/vfs/backup.go b/pkg/vfs/backup.go index 33bb5342e5ff..3b214a4dfb32 100644 --- a/pkg/vfs/backup.go +++ b/pkg/vfs/backup.go @@ -42,7 +42,7 @@ var ( ) // Backup metadata periodically in the object storage -func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration) { +func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration, skipTrash bool) { ctx := meta.Background key := "lastBackup" for { @@ -77,7 +77,7 @@ func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration) { } go cleanupBackups(blob, now) logger.Debugf("backup metadata started") - if err = backup(m, blob, now); err == nil { + if err = backup(m, blob, now, skipTrash); err == nil { LastBackupTimeG.Set(float64(now.UnixNano()) / 1e9) logger.Infof("backup metadata succeed, used %s", time.Since(now)) } else { @@ -88,7 +88,7 @@ func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration) { } } -func backup(m meta.Meta, blob object.ObjectStorage, now time.Time) error { +func backup(m meta.Meta, blob object.ObjectStorage, now time.Time, skipTrash bool) error { name := "dump-" + now.UTC().Format("2006-01-02-150405") + ".json.gz" fp, err := os.CreateTemp("", "juicefs-meta-*") if err != nil { @@ -97,7 +97,7 @@ func backup(m meta.Meta, blob object.ObjectStorage, now time.Time) error { defer os.Remove(fp.Name()) defer fp.Close() zw := gzip.NewWriter(fp) - err = m.DumpMeta(zw, 0, false, false) // force dump the whole tree + err = m.DumpMeta(zw, 0, false, false, skipTrash) // force dump the whole tree _ = zw.Close() if err != nil { return err diff --git a/pkg/vfs/backup_test.go b/pkg/vfs/backup_test.go index ad72290e5ce5..0838de968100 100644 --- a/pkg/vfs/backup_test.go +++ b/pkg/vfs/backup_test.go @@ -69,7 +69,7 @@ func TestRotate(t *testing.T) { func TestBackup(t *testing.T) { v, blob := createTestVFS() - go Backup(v.Meta, blob, time.Millisecond*100) + go Backup(v.Meta, blob, time.Millisecond*100, false) time.Sleep(time.Millisecond * 100) blob = object.WithPrefix(blob, "meta/") diff --git a/pkg/vfs/vfs.go b/pkg/vfs/vfs.go index eac28658b9ae..71ee3cdbd2d8 100644 --- a/pkg/vfs/vfs.go +++ b/pkg/vfs/vfs.go @@ -117,6 +117,7 @@ type Config struct { DirEntryTimeout time.Duration EntryTimeout time.Duration BackupMeta time.Duration + BackupSkipTrash bool FastResolve bool `json:",omitempty"` AccessLog string `json:",omitempty"` PrefixInternal bool diff --git a/sdk/java/libjfs/main.go b/sdk/java/libjfs/main.go index cd985339885f..adf971d09a3c 100644 --- a/sdk/java/libjfs/main.go +++ b/sdk/java/libjfs/main.go @@ -279,6 +279,7 @@ type javaConf struct { NoBGJob bool `json:"noBGJob"` OpenCache float64 `json:"openCache"` BackupMeta int64 `json:"backupMeta"` + BackupSkipTrash bool `json:"backupSkipTrash"` Heartbeat int `json:"heartbeat"` CacheDir string `json:"cacheDir"` CacheSize int64 `json:"cacheSize"` @@ -576,9 +577,10 @@ func jfs_init(cname, jsonConf, user, group, superuser, supergroup *C.char) int64 AccessLog: jConf.AccessLog, FastResolve: jConf.FastResolve, BackupMeta: time.Second * time.Duration(jConf.BackupMeta), + BackupSkipTrash: jConf.BackupSkipTrash, } if !jConf.ReadOnly && !jConf.NoSession && !jConf.NoBGJob && conf.BackupMeta > 0 { - go vfs.Backup(m, blob, conf.BackupMeta) + go vfs.Backup(m, blob, conf.BackupMeta, conf.BackupSkipTrash) } if !jConf.NoUsageReport && !jConf.NoSession { go usage.ReportUsage(m, "java-sdk "+version.Version()) diff --git a/sdk/java/src/main/java/io/juicefs/JuiceFileSystemImpl.java b/sdk/java/src/main/java/io/juicefs/JuiceFileSystemImpl.java index b4d65a7b489e..05d87230a52c 100644 --- a/sdk/java/src/main/java/io/juicefs/JuiceFileSystemImpl.java +++ b/sdk/java/src/main/java/io/juicefs/JuiceFileSystemImpl.java @@ -372,6 +372,7 @@ public void initialize(URI uri, Configuration conf) throws IOException { obj.put("cacheSize", Integer.valueOf(getConf(conf, "cache-size", "100"))); obj.put("openCache", Float.valueOf(getConf(conf, "open-cache", "0.0"))); obj.put("backupMeta", Integer.valueOf(getConf(conf, "backup-meta", "3600"))); + obj.put("backupSkipTrash", Boolean.valueOf(getConf(conf, "backup-skip-trash", "false"))); obj.put("heartbeat", Integer.valueOf(getConf(conf, "heartbeat", "12"))); obj.put("attrTimeout", Float.valueOf(getConf(conf, "attr-cache", "0.0"))); obj.put("entryTimeout", Float.valueOf(getConf(conf, "entry-cache", "0.0")));