From 12d263bfd3ee9e1e44a8d3704088edd038fd8883 Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Tue, 4 Jan 2022 15:15:41 -0700 Subject: [PATCH 1/3] apply: Use staged deployment on booted systems When finalizing an OSTree deployment, the current `/etc` is merged with the new commit's `/usr/etc`. Any changes that happen in the current `/etc` after the deployment has been finalized will not appear in the new deployment. Since eos-updater is often run in the background, it's likely the user will make changes in `/etc` (such as creating a new user) long before the new deployment is booted into. To address this issue, OSTree has provided the concept of a staged deployment since 2018.5. The new deployment is initialized but not finalized until shutdown via the `ostree-finalize-staged.service` systemd unit. Since staged deployments only work on OSTree booted systems that can initiate systemd units, this can't really work in the current test suite. The old full deployment method is kept for that case. Note that staged deployment finalization depends on the `ostree-finalize-staged.path` systemd unit being activated. Currently, OSTree does this on demand but in the future it may require the OS to explicitly activate the unit via a systemd preset or similar mechanism. https://phabricator.endlessm.com/T5658 --- eos-updater/apply.c | 84 +++++++++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 30 deletions(-) diff --git a/eos-updater/apply.c b/eos-updater/apply.c index eadedb3cd..a2bb1bc9a 100644 --- a/eos-updater/apply.c +++ b/eos-updater/apply.c @@ -241,6 +241,7 @@ apply_internal (ApplyData *apply_data, g_autoptr(GKeyFile) origin = NULL; g_autoptr(OstreeSysroot) sysroot = NULL; const gchar *osname = get_test_osname (); + gboolean staged_deploy; g_autoptr(GError) local_error = NULL; sysroot = ostree_sysroot_new_default (); @@ -269,21 +270,62 @@ apply_internal (ApplyData *apply_data, origin = ostree_sysroot_origin_new_from_refspec (sysroot, update_refspec); - if (!ostree_sysroot_deploy_tree (sysroot, - osname, - update_id, - origin, - booted_deployment, - NULL, - &new_deployment, - cancellable, - error)) - return FALSE; + /* When booted into an OSTree system, stage the deployment so that the + * /etc merge happens during shutdown. Otherwise (primarily the test + * suite), deploy the finalized tree immediately. + */ + staged_deploy = ostree_sysroot_is_booted (sysroot); + if (staged_deploy) + { + g_message ("Creating staged deployment for revision %s", update_id); + if (!ostree_sysroot_stage_tree (sysroot, + osname, + update_id, + origin, + booted_deployment, + NULL, + &new_deployment, + cancellable, + error)) + return FALSE; + } + else + { + g_message ("Creating finalized deployment for revision %s", update_id); + if (!ostree_sysroot_deploy_tree (sysroot, + osname, + update_id, + origin, + booted_deployment, + NULL, + &new_deployment, + cancellable, + error)) + return FALSE; + + if (!ostree_sysroot_simple_write_deployment (sysroot, + osname, + new_deployment, + booted_deployment, + OSTREE_SYSROOT_SIMPLE_WRITE_DEPLOYMENT_FLAGS_NO_CLEAN, + cancellable, + error)) + return FALSE; + } + + g_message ("New deployment: index: %d, OS name: %s, deploy serial: %d, " + "checksum: %s, boot checksum: %s, boot serial: %d", + ostree_deployment_get_index (new_deployment), + ostree_deployment_get_osname (new_deployment), + ostree_deployment_get_deployserial (new_deployment), + ostree_deployment_get_csum (new_deployment), + ostree_deployment_get_bootcsum (new_deployment), + ostree_deployment_get_bootserial (new_deployment)); /* If the original refspec is not the update refspec, then we may have * a ref to a no longer needed tree. Delete that remote ref so the - * cleanup done in simple_write_deployment() really removes that tree - * if no deployments point to it anymore. + * sysroot cleanup below really removes that tree if no deployments + * point to it anymore. */ if (g_strcmp0 (update_refspec, orig_refspec) != 0) { @@ -304,24 +346,6 @@ apply_internal (ApplyData *apply_data, } } - if (!ostree_sysroot_simple_write_deployment (sysroot, - osname, - new_deployment, - booted_deployment, - OSTREE_SYSROOT_SIMPLE_WRITE_DEPLOYMENT_FLAGS_NO_CLEAN, - cancellable, - error)) - return FALSE; - - g_message ("New deployment: index: %d, OS name: %s, deploy serial: %d, " - "checksum: %s, boot checksum: %s, boot serial: %d", - ostree_deployment_get_index (new_deployment), - ostree_deployment_get_osname (new_deployment), - ostree_deployment_get_deployserial (new_deployment), - ostree_deployment_get_csum (new_deployment), - ostree_deployment_get_bootcsum (new_deployment), - ostree_deployment_get_bootserial (new_deployment)); - /* FIXME: Cleaning up after update should be non-fatal, since we've * already successfully deployed the new OS. This clearly is a * workaround for a more serious issue, likely related to concurrent From a49c5e9ec814010f483c41b8994b46acdbf2ffee Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Wed, 5 Jan 2022 14:55:40 -0700 Subject: [PATCH 2/3] tests: Bump slow test timeout to 600 seconds On our CI and package builders `test-update-install-flatpaks` often exceeds the 360 second timeout. Even on my fast laptop it routinely takes nearly 300 seconds. Bump the timeout for slow tests to 600 seconds to ensure it has time to complete. https://phabricator.endlessm.com/T5658 --- tests/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/meson.build b/tests/meson.build index da3beeaba..6e5083f51 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -106,7 +106,7 @@ foreach test_name, extra_args : test_programs exe, env: envs, suite: ['eos-updater'] + extra_args.get('suite', []), - timeout: extra_args.get('slow', false) ? 360 : 60, + timeout: extra_args.get('slow', false) ? 600 : 60, is_parallel: extra_args.get('parallel', true), ) endforeach From 9fa6754cfbcf6a50a5846d77bef7ba79ba7b275e Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Mon, 10 Jan 2022 10:37:04 -0700 Subject: [PATCH 3/3] eos-updater: Add support for OSTree sysroot autocleanup When OSTree staged deployments are used, the old rollback deployment is deleted during system shutdown. To keep from slowing down shutdown, the OSTree repo is not pruned at that time. That means that even though the deployment was deleted, the objects are still on disk. Since that may be a significant amount of wasted disk space, the full cleanup with repo pruning needs to be run at some time after rebooting. See https://github.com/ostreedev/ostree/issues/2510 for details. To detect when cleanup is necessary, a systemd drop in is added to touch the `/sysroot/.cleanup` file after `ostree-finalize-staged.service` has finalized the new deployment. The reason to use a drop-in for `ostree-finalize-staged.service` rather then creating the file from `eos-updater` is to avoid the situation where an unclean shutdown occurs and the new deployment is not finalized. In that case, cleanup would be run unnecessarily on the next boot. A new systemd service, `eos-updater-autocleanup.service`, is added to run `ostree admin cleanup` when `/sysroot/.cleanup` exists and then delete it afterwards. This adds a dependency on the `ostree` CLI but a separate program could be provided calling the `ostree_sysroot_cleanup` API and deleting the `/sysroot/.cleanup` file itself. https://phabricator.endlessm.com/T5658 --- debian/control | 1 + debian/eos-updater.install | 2 + eos-updater/apply.c | 9 ++++- .../eos-updater-autocleanup.service.in | 37 +++++++++++++++++++ eos-updater/finalize-autocleanup.conf.in | 9 +++++ eos-updater/meson.build | 15 ++++++++ 6 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 eos-updater/eos-updater-autocleanup.service.in create mode 100644 eos-updater/finalize-autocleanup.conf.in diff --git a/debian/control b/debian/control index 7158d342b..1750c3c7d 100644 --- a/debian/control +++ b/debian/control @@ -39,6 +39,7 @@ Multi-arch: no Depends: gir1.2-glib-2.0, mogwai-scheduled, + ostree, python3-gi, systemd (>= 200), ${misc:Depends}, diff --git a/debian/eos-updater.install b/debian/eos-updater.install index fed070954..144e100c3 100644 --- a/debian/eos-updater.install +++ b/debian/eos-updater.install @@ -2,12 +2,14 @@ lib/systemd/system/eos-autoupdater.service lib/systemd/system/eos-autoupdater.timer lib/systemd/system/eos-updater-avahi.path lib/systemd/system/eos-updater-avahi.service +lib/systemd/system/eos-updater-autocleanup.service lib/systemd/system/eos-updater-flatpak-installer.service lib/systemd/system/eos-updater-flatpak-installer-fallback.service lib/systemd/system/eos-updater-flatpak-installer-fallback.timer lib/systemd/system/eos-updater.service lib/systemd/system/eos-update-server.service lib/systemd/system/eos-update-server.socket +lib/systemd/system/ostree-finalize-staged.service.d/autocleanup.conf usr/libexec/eos-updater usr/libexec/eos-autoupdater usr/libexec/eos-updater-avahi diff --git a/eos-updater/apply.c b/eos-updater/apply.c index a2bb1bc9a..374d76e4f 100644 --- a/eos-updater/apply.c +++ b/eos-updater/apply.c @@ -349,7 +349,14 @@ apply_internal (ApplyData *apply_data, /* FIXME: Cleaning up after update should be non-fatal, since we've * already successfully deployed the new OS. This clearly is a * workaround for a more serious issue, likely related to concurrent - * prunes (https://phabricator.endlessm.com/T16736). */ + * prunes (https://phabricator.endlessm.com/T16736). + * + * TODO: When using staged deployments, there's likely nothing to + * prune since the old rollback deployment isn't removed until the + * staged deployment is finalized during system shutdown. Pruning + * happens during the subsequent boot, so this cleanup could be + * skipped for staged deploys. + */ if (!ostree_sysroot_cleanup (sysroot, cancellable, &local_error)) g_warning ("Failed to clean up the sysroot after successful deployment: %s", local_error->message); diff --git a/eos-updater/eos-updater-autocleanup.service.in b/eos-updater/eos-updater-autocleanup.service.in new file mode 100644 index 000000000..0d20e2e44 --- /dev/null +++ b/eos-updater/eos-updater-autocleanup.service.in @@ -0,0 +1,37 @@ +[Unit] +Description=Automatically cleanup after staged Endless OS Updater deployment +Documentation=man:ostree-admin-cleanup(1) man:eos-updater(8) + +# Run ostree admin cleanup only if /sysroot/.cleanup is present and then +# delete it when cleanup is successful. +# +# FIXME: Drop this when https://github.com/ostreedev/ostree/issues/2510 +# is resolved. +ConditionPathExists=/sysroot/.cleanup + +# We want this to be triggered by multi-user.target but not block it via +# the default After added to target units since pruning the repo can be +# slow. See the Default Dependencies sections in systemd.service(5) and +# systemd.target(5). +DefaultDependencies=no +Requires=sysinit.target +After=sysinit.target basic.target +Conflicts=shutdown.target +Before=shutdown.target + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=@ostree@ admin cleanup +ExecStart=/bin/rm -f /sysroot/.cleanup + +# Only /sysroot and /boot need to be written to. +ProtectSystem=strict +ReadWritePaths=/sysroot /boot + +# This will be allowed to run in the background, so try to make it less +# disruptive while it prunes the repo. +IOSchedulingClass=idle + +[Install] +WantedBy=multi-user.target diff --git a/eos-updater/finalize-autocleanup.conf.in b/eos-updater/finalize-autocleanup.conf.in new file mode 100644 index 000000000..f1804947d --- /dev/null +++ b/eos-updater/finalize-autocleanup.conf.in @@ -0,0 +1,9 @@ +# This is a drop-in file for ostree-finalize-staged.service. +# +# FIXME: Drop this when https://github.com/ostreedev/ostree/issues/2510 +# is resolved. + +[Service] +# After finalizing the staged deployment, touch the .cleanup file so +# that the cleanup can be completed on the next boot. +ExecStop=-/bin/touch /sysroot/.cleanup diff --git a/eos-updater/meson.build b/eos-updater/meson.build index dd4d5a19a..61156308e 100644 --- a/eos-updater/meson.build +++ b/eos-updater/meson.build @@ -105,6 +105,7 @@ install_data( # systemd files config = configuration_data() config.set('libexecdir', join_paths(get_option('prefix'), get_option('libexecdir'))) +config.set('ostree', find_program('ostree').path()) configure_file( input: 'eos-updater.service.in', @@ -113,6 +114,20 @@ configure_file( configuration: config, ) +configure_file( + input: 'eos-updater-autocleanup.service.in', + output: 'eos-updater-autocleanup.service', + install_dir: dependency('systemd').get_pkgconfig_variable('systemdsystemunitdir'), + configuration: config, +) + +configure_file( + input: 'finalize-autocleanup.conf.in', + output: 'autocleanup.conf', + install_dir: join_paths(dependency('systemd').get_pkgconfig_variable('systemdsystemunitdir'), 'ostree-finalize-staged.service.d'), + configuration: config, +) + # Example configuration install_data( files('eos-updater.conf'),