Skip to content

Commit

Permalink
update: support upgrading a subset of nodes
Browse files Browse the repository at this point in the history
It can be useful in a large cluster deployment to split the upgrade and
only upgrade a group of nodes at a time.

Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2014304

Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
(cherry picked from commit e5cf9db)
  • Loading branch information
guits committed Oct 25, 2021
1 parent 3edc6ac commit ca25ebb
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 5 deletions.
32 changes: 27 additions & 5 deletions infrastructure-playbooks/rolling_update.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

- name: confirm whether user really meant to upgrade the cluster
hosts: localhost
tags: always
become: false
gather_facts: false
vars:
Expand Down Expand Up @@ -65,7 +66,6 @@


- name: gather facts and check the init system

hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
Expand All @@ -77,7 +77,7 @@
- "{{ client_group_name|default('clients') }}"
- "{{ iscsi_gw_group_name|default('iscsigws') }}"
- "{{ monitoring_group_name|default('monitoring') }}"

tags: always
any_errors_fatal: True
become: True
gather_facts: False
Expand Down Expand Up @@ -154,6 +154,7 @@
rolling_update: true

- name: upgrade ceph mon cluster
tags: mons
vars:
health_mon_check_retries: 5
health_mon_check_delay: 15
Expand Down Expand Up @@ -333,6 +334,7 @@

- name: reset mon_host
hosts: "{{ mon_group_name|default('mons') }}"
tags: always
become: True
gather_facts: false
tasks:
Expand All @@ -350,6 +352,7 @@
health_mon_check_delay: 15
upgrade_ceph_packages: True
hosts: "{{ mon_group_name|default('mons') }}"
tags: mgrs
serial: 1
become: True
gather_facts: false
Expand Down Expand Up @@ -385,6 +388,7 @@
upgrade_ceph_packages: True
ceph_release: "{{ ceph_stable_release }}"
hosts: "{{ mgr_group_name|default('mgrs') }}"
tags: mgrs
serial: 1
become: True
gather_facts: false
Expand Down Expand Up @@ -420,6 +424,7 @@

- name: set osd flags
hosts: "{{ mon_group_name | default('mons') }}[0]"
tags: osds
become: True
gather_facts: false
tasks:
Expand Down Expand Up @@ -481,8 +486,8 @@
health_osd_check_retries: 40
health_osd_check_delay: 30
upgrade_ceph_packages: True

hosts: "{{ osd_group_name|default('osds') }}"
tags: osds
serial: 1
become: True
gather_facts: false
Expand Down Expand Up @@ -558,6 +563,7 @@

- name: complete osd upgrade
hosts: "{{ mon_group_name|default('mons') }}[0]"
tags: osds
become: True
gather_facts: false
tasks:
Expand Down Expand Up @@ -599,6 +605,7 @@

- name: upgrade ceph mdss cluster, deactivate all rank > 0
hosts: "{{ mon_group_name | default('mons') }}[0]"
tags: mdss
become: true
gather_facts: false
tasks:
Expand Down Expand Up @@ -700,6 +707,7 @@
vars:
upgrade_ceph_packages: True
hosts: active_mdss
tags: mdss
become: true
gather_facts: false
tasks:
Expand Down Expand Up @@ -746,6 +754,7 @@
vars:
upgrade_ceph_packages: True
hosts: standby_mdss
tags: mdss
become: True
gather_facts: false

Expand Down Expand Up @@ -794,6 +803,7 @@
vars:
upgrade_ceph_packages: True
hosts: "{{ rgw_group_name|default('rgws') }}"
tags: rgws
serial: 1
become: True
gather_facts: false
Expand Down Expand Up @@ -838,6 +848,7 @@
vars:
upgrade_ceph_packages: True
hosts: "{{ rbdmirror_group_name|default('rbdmirrors') }}"
tags: rbdmirrors
serial: 1
become: True
gather_facts: false
Expand Down Expand Up @@ -871,6 +882,7 @@
vars:
upgrade_ceph_packages: True
hosts: "{{ nfs_group_name|default('nfss') }}"
tags: nfss
serial: 1
become: True
gather_facts: false
Expand Down Expand Up @@ -919,8 +931,8 @@
- name: upgrade ceph iscsi gateway node
vars:
upgrade_ceph_packages: True
hosts:
- "{{ iscsi_gw_group_name|default('iscsigws') }}"
hosts: "{{ iscsi_gw_group_name|default('iscsigws') }}"
tags: iscsigws
serial: 1
become: True
gather_facts: false
Expand Down Expand Up @@ -962,6 +974,7 @@
vars:
upgrade_ceph_packages: True
hosts: "{{ client_group_name|default('clients') }}"
tags: clients
serial: "{{ client_update_batch | default(20) }}"
become: True
gather_facts: false
Expand Down Expand Up @@ -993,6 +1006,9 @@
- "{{ rgw_group_name | default('rgws') }}"
- "{{ rbdmirror_group_name | default('rbdmirrors') }}"
- "{{ mgr_group_name | default('mgrs') }}"
tags:
- post_upgrade
- crash
gather_facts: false
become: true
tasks:
Expand Down Expand Up @@ -1020,6 +1036,7 @@

- name: complete upgrade
hosts: "{{ mon_group_name | default('mons') }}"
tags: post_upgrade
become: True
gather_facts: false
tasks:
Expand Down Expand Up @@ -1056,6 +1073,7 @@
- "{{ nfs_group_name|default('nfss') }}"
- "{{ iscsi_gw_group_name|default('iscsigws') }}"
- "{{ monitoring_group_name|default('monitoring') }}"
tags: monitoring
gather_facts: false
become: true
tasks:
Expand Down Expand Up @@ -1086,6 +1104,7 @@

- name: upgrade monitoring node
hosts: "{{ monitoring_group_name }}"
tags: monitoring
gather_facts: false
become: true
tasks:
Expand Down Expand Up @@ -1117,6 +1136,7 @@

- name: upgrade ceph dashboard
hosts: "{{ groups[mgr_group_name] | default(groups[mon_group_name]) | default(omit) }}"
tags: monitoring
gather_facts: false
become: true
tasks:
Expand All @@ -1136,6 +1156,7 @@

- name: switch any existing crush buckets to straw2
hosts: "{{ mon_group_name | default('mons') }}[0]"
tags: post_upgrade
become: true
any_errors_fatal: true
gather_facts: false
Expand Down Expand Up @@ -1177,6 +1198,7 @@

- name: show ceph status
hosts: "{{ mon_group_name|default('mons') }}"
tags: always
become: True
gather_facts: false
tasks:
Expand Down
96 changes: 96 additions & 0 deletions tox-subset_update.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
[tox]
envlist = centos-{container,non_container}-subset_update

skipsdist = True

[testenv]
whitelist_externals =
vagrant
bash
git
pip
passenv=*
setenv=
ANSIBLE_SSH_ARGS = -F {changedir}/vagrant_ssh_config -o ControlMaster=auto -o ControlPersist=600s -o PreferredAuthentications=publickey
ANSIBLE_CONFIG = {toxinidir}/ansible.cfg
ANSIBLE_CALLBACK_WHITELIST = profile_tasks
ANSIBLE_CACHE_PLUGIN = memory
ANSIBLE_GATHERING = implicit
# only available for ansible >= 2.5
ANSIBLE_STDOUT_CALLBACK = yaml
# non_container: DEV_SETUP = True
# Set the vagrant box image to use
centos-non_container: CEPH_ANSIBLE_VAGRANT_BOX = centos/8
centos-container: CEPH_ANSIBLE_VAGRANT_BOX = centos/8

INVENTORY = {env:_INVENTORY:hosts}
container: CONTAINER_DIR = /container
container: PLAYBOOK = site-container.yml.sample
non_container: PLAYBOOK = site.yml.sample

UPDATE_CEPH_DOCKER_IMAGE_TAG = latest-pacific
UPDATE_CEPH_DEV_BRANCH = master
UPDATE_CEPH_DEV_SHA1 = latest
ROLLING_UPDATE = True
deps= -r{toxinidir}/tests/requirements.txt
changedir={toxinidir}/tests/functional/subset_update{env:CONTAINER_DIR:}
commands=
bash {toxinidir}/tests/scripts/vagrant_up.sh --no-provision {posargs:--provider=virtualbox}
bash {toxinidir}/tests/scripts/generate_ssh_config.sh {changedir}

ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/tests/functional/setup.yml

ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/{env:PLAYBOOK:site.yml.sample} --extra-vars "\
delegate_facts_host={env:DELEGATE_FACTS_HOST:True} \
ceph_docker_registry_auth=True \
ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \
ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \
"

# upgrade mons
ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/rolling_update.yml --tags=mons --extra-vars "\
ireallymeanit=yes \
ceph_docker_registry_auth=True \
ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \
ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \
"
# upgrade mgrs
ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/rolling_update.yml --tags=mgrs --extra-vars "\
ireallymeanit=yes \
ceph_docker_registry_auth=True \
ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \
ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \
"
# upgrade osd1
ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/rolling_update.yml --limit=osd1 --tags=osds --extra-vars "\
ireallymeanit=yes \
ceph_docker_registry_auth=True \
ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \
ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \
"
# upgrade remaining osds (serially)
ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/rolling_update.yml --limit='osds:!osd1' --tags=osds --extra-vars "\
ireallymeanit=yes \
ceph_docker_registry_auth=True \
ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \
ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \
"
# upgrade rgws
ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/rolling_update.yml --tags=rgws --extra-vars "\
ireallymeanit=yes \
ceph_docker_registry_auth=True \
ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \
ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \
"
# post upgrade actions
ansible-playbook -vv -i {changedir}/{env:INVENTORY} {toxinidir}/infrastructure-playbooks/rolling_update.yml --tags=post_upgrade --extra-vars "\
ireallymeanit=yes \
ceph_docker_registry_auth=True \
ceph_docker_registry_username={env:DOCKER_HUB_USERNAME} \
ceph_docker_registry_password={env:DOCKER_HUB_PASSWORD} \
"


bash -c "CEPH_STABLE_RELEASE=quincy py.test --reruns 5 --reruns-delay 1 -n 8 --durations=0 --sudo -v --connection=ansible --ansible-inventory={changedir}/{env:INVENTORY} --ssh-config={changedir}/vagrant_ssh_config {toxinidir}/tests/functional/tests"

vagrant destroy --force

0 comments on commit ca25ebb

Please sign in to comment.