From 7293cffbf1ad9ed3fadcf0d9dfab83b9a2c9fa26 Mon Sep 17 00:00:00 2001 From: Olivier Dalle Date: Thu, 20 Apr 2023 00:09:10 +0200 Subject: [PATCH 1/4] Added no-skip-clone option --- syncoid | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/syncoid b/syncoid index e5046f37..c60e24a4 100755 --- a/syncoid +++ b/syncoid @@ -25,7 +25,7 @@ GetOptions(\%args, "no-command-checks", "monitor-version", "compress=s", "dumpsn "source-bwlimit=s", "target-bwlimit=s", "sshconfig=s", "sshkey=s", "sshport=i", "sshcipher|c=s", "sshoption|o=s@", "debug", "quiet", "no-stream", "no-sync-snap", "no-resume", "exclude=s@", "skip-parent", "identifier=s", "no-clone-handling", "no-privilege-elevation", "force-delete", "no-rollback", "create-bookmark", - "pv-options=s" => \$pvoptions, "keep-sync-snap", "preserve-recordsize", "mbuffer-size=s" => \$mbuffer_size) + "pv-options=s" => \$pvoptions, "keep-sync-snap", "preserve-recordsize", "mbuffer-size=s" => \$mbuffer_size, "no-skip-clone") or pod2usage(2); my %compressargs = %{compressargset($args{'compress'} || 'default')}; # Can't be done with GetOptions arg, as default still needs to be set @@ -184,6 +184,15 @@ if (!defined $args{'recursive'}) { if ($found == 0) { # clone source is not replicated, do a full replication + if (defined $args{'no-skip-clone'}) { + # In case a clone is missing because its transfer was interrupted + # we dont want a full clone. We want to be able to finish the + # interrupted transfer and then do the clone. + # Also, it is unclear why a clone origin could be missing + # for a good reason. + print("SKIPPING: $dataset because origin clone is missing. Retry running syncoid.\n"); + next; + } $origin = undef; } else { # clone source is replicated, defer until all non clones are replicated From ec8b648bde27782a86e603fe9528adc2e230b350 Mon Sep 17 00:00:00 2001 From: Olivier Dalle Date: Thu, 20 Apr 2023 00:29:15 +0200 Subject: [PATCH 2/4] Added manual --- syncoid | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/syncoid b/syncoid index c60e24a4..b177995f 100755 --- a/syncoid +++ b/syncoid @@ -25,7 +25,7 @@ GetOptions(\%args, "no-command-checks", "monitor-version", "compress=s", "dumpsn "source-bwlimit=s", "target-bwlimit=s", "sshconfig=s", "sshkey=s", "sshport=i", "sshcipher|c=s", "sshoption|o=s@", "debug", "quiet", "no-stream", "no-sync-snap", "no-resume", "exclude=s@", "skip-parent", "identifier=s", "no-clone-handling", "no-privilege-elevation", "force-delete", "no-rollback", "create-bookmark", - "pv-options=s" => \$pvoptions, "keep-sync-snap", "preserve-recordsize", "mbuffer-size=s" => \$mbuffer_size, "no-skip-clone") + "pv-options=s" => \$pvoptions, "keep-sync-snap", "preserve-recordsize", "mbuffer-size=s" => \$mbuffer_size, "no-full-clone") or pod2usage(2); my %compressargs = %{compressargset($args{'compress'} || 'default')}; # Can't be done with GetOptions arg, as default still needs to be set @@ -184,12 +184,15 @@ if (!defined $args{'recursive'}) { if ($found == 0) { # clone source is not replicated, do a full replication - if (defined $args{'no-skip-clone'}) { - # In case a clone is missing because its transfer was interrupted - # we dont want a full clone. We want to be able to finish the + if (defined $args{'no-full-clone'}) { + # In case a clone origin is missing because its transfer was interrupted + # we may not want a full clone. We want to be able to finish the # interrupted transfer and then do the clone. - # Also, it is unclear why a clone origin could be missing - # for a good reason. + # Example worst case scenario: + # A 1TB dataset has 500 identical clones. Overall it uses just 1 TB storage space. + # Assume the recursive transfer is started and the origin dataset transfer + # ends up failing. The recursive transfer proceeeds to next and because origin + # is missing, each clone becomes a full clone and we end up trying to transfer 500TB ! print("SKIPPING: $dataset because origin clone is missing. Retry running syncoid.\n"); next; } @@ -2027,5 +2030,6 @@ Options: --no-resume Don't use the ZFS resume feature if available --no-clone-handling Don't try to recreate clones on target --no-privilege-elevation Bypass the root check, for use with ZFS permission delegation + --no-full-clone Don't convert a clone into a full clone if the clone's origin is missing at destination --force-delete Remove target datasets recursively, if there are no matching snapshots/bookmarks (also overwrites conflicting named snapshots) From b53f09c127d9680ccfc2773cbb07a5223b0d19b5 Mon Sep 17 00:00:00 2001 From: Olivier Dalle Date: Fri, 21 Apr 2023 12:02:05 +0200 Subject: [PATCH 3/4] Fixed no-fulll-clone option --- syncoid | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/syncoid b/syncoid index b177995f..e05e6ade 100755 --- a/syncoid +++ b/syncoid @@ -503,8 +503,21 @@ sub syncdataset { system($synccmd) == 0 or do { if (defined $origin) { print "INFO: clone creation failed, trying ordinary replication as fallback\n"; - syncdataset($sourcehost, $sourcefs, $targethost, $targetfs, undef, 1); - return 0; + if (defined $args{'no-full-clone'}) { + # In case a clone origin is missing because its transfer was interrupted + # we may not want a full clone. We want to be able to finish the + # interrupted transfer and then do the clone. + # Example worst case scenario: + # A 1TB dataset has 500 identical clones. Overall it uses just 1 TB storage space. + # Assume the recursive transfer is started and the origin dataset transfer + # ends up failing. The recursive transfer proceeeds to next and because origin + # is missing, each clone becomes a full clone and we end up trying to transfer 500TB ! + print("SKIPPING: $synccmd because origin clone is missing. Retry running syncoid.\n"); + # Fall through to normal error + } else { + syncdataset($sourcehost, $sourcefs, $targethost, $targetfs, undef, 1); + return 0; + } } warn "CRITICAL ERROR: $synccmd failed: $?"; From a672ee015cb981d86dada93bacac7c2105af7fcc Mon Sep 17 00:00:00 2001 From: Olivier Dalle Date: Fri, 21 Apr 2023 23:16:19 +0200 Subject: [PATCH 4/4] Improved doc --- README.md | 12 ++++++++++++ syncoid | 20 ++------------------ 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 1cb2f17c..0cc7d041 100644 --- a/README.md +++ b/README.md @@ -344,6 +344,18 @@ As of 1.4.18, syncoid also automatically supports and enables resume of interrup This argument tells syncoid to not recreate clones on the target on initial sync, and do a normal replication instead. ++ --no-full-clone + Only applies to recursive transfer when clone handling is active. + In case a clone origin is missing (on destination) because its transfer was interrupted + don't switch to a full clone sync. + Example worst case scenario this may help prevent: + A 1TB dataset has 500 identical clones. Overall it uses just 1 TB storage space at the source, because + all clone share the storage with origin. + Assume a recursive transfer is started that needs to send all datasets (origin and 500 clones). + Assume also the origin dataset transfer ends up failing. Without selecting this option, the recursive + transfer proceeds to next recursive dataset, and because origin is missing (receive not completed), + each clone becomes a full clone and we end up trying to transfer 500TB ! + + --dumpsnaps This prints a list of snapshots during the run. diff --git a/syncoid b/syncoid index e05e6ade..c9b4cf03 100755 --- a/syncoid +++ b/syncoid @@ -183,19 +183,11 @@ if (!defined $args{'recursive'}) { } if ($found == 0) { - # clone source is not replicated, do a full replication if (defined $args{'no-full-clone'}) { - # In case a clone origin is missing because its transfer was interrupted - # we may not want a full clone. We want to be able to finish the - # interrupted transfer and then do the clone. - # Example worst case scenario: - # A 1TB dataset has 500 identical clones. Overall it uses just 1 TB storage space. - # Assume the recursive transfer is started and the origin dataset transfer - # ends up failing. The recursive transfer proceeeds to next and because origin - # is missing, each clone becomes a full clone and we end up trying to transfer 500TB ! print("SKIPPING: $dataset because origin clone is missing. Retry running syncoid.\n"); next; } + # clone source is not replicated, do a full replication $origin = undef; } else { # clone source is replicated, defer until all non clones are replicated @@ -503,15 +495,7 @@ sub syncdataset { system($synccmd) == 0 or do { if (defined $origin) { print "INFO: clone creation failed, trying ordinary replication as fallback\n"; - if (defined $args{'no-full-clone'}) { - # In case a clone origin is missing because its transfer was interrupted - # we may not want a full clone. We want to be able to finish the - # interrupted transfer and then do the clone. - # Example worst case scenario: - # A 1TB dataset has 500 identical clones. Overall it uses just 1 TB storage space. - # Assume the recursive transfer is started and the origin dataset transfer - # ends up failing. The recursive transfer proceeeds to next and because origin - # is missing, each clone becomes a full clone and we end up trying to transfer 500TB ! + if (defined $args{'no-full-clone'}) { print("SKIPPING: $synccmd because origin clone is missing. Retry running syncoid.\n"); # Fall through to normal error } else {