Skip to content

Commit

Permalink
cephadm-adopt: make the playbook idempotent
Browse files Browse the repository at this point in the history
If the cephadm-adopt.yml fails during the first execution and some
daemons have already been adopted by cephadm then we can't rerun
the playbook because the old container won't exist anymore.

Error: no container with name or ID ceph-mon-xxx found: no such container

If the daemons are adopted then the old systemd unit doesn't exist anymore
so any call to that unit with systemd will fail.

Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1918424

Signed-off-by: Dimitri Savineau <dsavinea@redhat.com>
(cherry picked from commit 6886700)
  • Loading branch information
dsavineau authored and guits committed Oct 18, 2021
1 parent 360cfb1 commit 864acaa
Showing 1 changed file with 47 additions and 28 deletions.
75 changes: 47 additions & 28 deletions infrastructure-playbooks/cephadm-adopt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,47 +169,47 @@

- name: set_fact ceph_cmd
set_fact:
container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_facts']['hostname'] }}"
when: containerized_deployment | bool
ceph_cmd: "{{ container_binary + ' run --rm --net=host -v /etc/ceph:/etc/ceph:z -v /var/lib/ceph:/var/lib/ceph:z -v /var/run/ceph:/var/run/ceph:z --entrypoint=ceph ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else 'ceph' }} --cluster {{ cluster }}"

- name: get current fsid
command: "{{ container_exec_cmd | default('') }} ceph --admin-daemon /var/run/ceph/{{ cluster }}-mon.{{ hostvars[groups[mon_group_name][0]]['ansible_facts']['hostname'] }}.asok config get fsid --format json"
command: "{{ ceph_cmd }} fsid"
register: current_fsid
run_once: true
changed_when: false
delegate_to: "{{ groups[mon_group_name][0] }}"

- name: get a minimal ceph configuration
command: "{{ container_exec_cmd }} ceph config generate-minimal-conf"
command: "{{ ceph_cmd }} config generate-minimal-conf"
register: minimal_config
run_once: true
changed_when: false
delegate_to: "{{ groups[mon_group_name][0] }}"

- name: set_fact fsid
set_fact:
fsid: "{{ (current_fsid.stdout | from_json).fsid }}"
fsid: "{{ current_fsid.stdout }}"
run_once: true

- name: enable cephadm mgr module
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} mgr module enable cephadm"
command: "{{ ceph_cmd }} mgr module enable cephadm"
changed_when: false
run_once: true
delegate_to: '{{ groups[mon_group_name][0] }}'

- name: set cephadm as orchestrator backend
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} orch set backend cephadm"
command: "{{ ceph_cmd }} orch set backend cephadm"
changed_when: false
run_once: true
delegate_to: '{{ groups[mon_group_name][0] }}'

- name: generate cephadm ssh key
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} cephadm generate-key"
command: "{{ ceph_cmd }} cephadm generate-key"
changed_when: false
run_once: true
delegate_to: '{{ groups[mon_group_name][0] }}'

- name: get the cephadm ssh pub key
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} cephadm get-pub-key"
command: "{{ ceph_cmd }} cephadm get-pub-key"
changed_when: false
run_once: true
register: cephadm_pubpkey
Expand All @@ -221,7 +221,7 @@
key: '{{ cephadm_pubpkey.stdout }}'

- name: set cephadm ssh user to {{ cephadm_ssh_user | default('root') }}
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} cephadm set-user {{ cephadm_ssh_user | default('root') }}"
command: "{{ ceph_cmd }} cephadm set-user {{ cephadm_ssh_user | default('root') }}"
changed_when: false
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
Expand All @@ -233,13 +233,13 @@
CEPHADM_IMAGE: '{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}'

- name: set default container image in ceph configuration
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} config set global container_image {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}"
command: "{{ ceph_cmd }} config set global container_image {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}"
changed_when: false
run_once: true
delegate_to: '{{ groups[mon_group_name][0] }}'

- name: set container image base in ceph configuration
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} config set mgr mgr/cephadm/container_image_base {{ ceph_docker_registry }}/{{ ceph_docker_image }}"
command: "{{ ceph_cmd }} config set mgr mgr/cephadm/container_image_base {{ ceph_docker_registry }}/{{ ceph_docker_image }}"
changed_when: false
run_once: true
delegate_to: '{{ groups[mon_group_name][0] }}'
Expand All @@ -249,32 +249,32 @@
run_once: true
block:
- name: set alertmanager container image in ceph configuration
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} config set mgr mgr/cephadm/container_image_alertmanager {{ alertmanager_container_image }}"
command: "{{ ceph_cmd }} config set mgr mgr/cephadm/container_image_alertmanager {{ alertmanager_container_image }}"
changed_when: false
delegate_to: '{{ groups[mon_group_name][0] }}'

- name: set grafana container image in ceph configuration
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} config set mgr mgr/cephadm/container_image_grafana {{ grafana_container_image }}"
command: "{{ ceph_cmd }} config set mgr mgr/cephadm/container_image_grafana {{ grafana_container_image }}"
changed_when: false
delegate_to: '{{ groups[mon_group_name][0] }}'

- name: set node-exporter container image in ceph configuration
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} config set mgr mgr/cephadm/container_image_node_exporter {{ node_exporter_container_image }}"
command: "{{ ceph_cmd }} config set mgr mgr/cephadm/container_image_node_exporter {{ node_exporter_container_image }}"
changed_when: false
delegate_to: '{{ groups[mon_group_name][0] }}'

- name: set prometheus container image in ceph configuration
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} config set mgr mgr/cephadm/container_image_prometheus {{ prometheus_container_image }}"
command: "{{ ceph_cmd }} config set mgr mgr/cephadm/container_image_prometheus {{ prometheus_container_image }}"
changed_when: false
delegate_to: '{{ groups[mon_group_name][0] }}'

- name: manage nodes with cephadm
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} orch host add {{ ansible_facts['hostname'] }} {{ ansible_facts['default_ipv4']['address'] }} {{ group_names | join(' ') }}"
command: "{{ ceph_cmd }} orch host add {{ ansible_facts['hostname'] }} {{ ansible_facts['default_ipv4']['address'] }} {{ group_names | join(' ') }}"
changed_when: false
delegate_to: '{{ groups[mon_group_name][0] }}'

- name: add ceph label for core component
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} orch host label add {{ ansible_facts['hostname'] }} ceph"
command: "{{ ceph_cmd }} orch host label add {{ ansible_facts['hostname'] }} ceph"
changed_when: false
delegate_to: '{{ groups[mon_group_name][0] }}'
when: inventory_hostname in groups.get(mon_group_name, []) or
Expand All @@ -284,12 +284,8 @@
inventory_hostname in groups.get(mgr_group_name, []) or
inventory_hostname in groups.get(rbdmirror_group_name, [])

- name: set_fact ceph_cmd
set_fact:
ceph_cmd: "{{ container_binary + ' run --rm --net=host -v /etc/ceph:/etc/ceph:z -v /var/lib/ceph:/var/lib/ceph:z -v /var/run/ceph:/var/run/ceph:z --entrypoint=ceph ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else 'ceph' }}"

- name: get the client.admin keyring
command: "{{ ceph_cmd }} --cluster {{ cluster }} auth get client.admin"
command: "{{ ceph_cmd }} auth get client.admin"
changed_when: false
run_once: true
delegate_to: '{{ groups[mon_group_name][0] }}'
Expand All @@ -312,7 +308,7 @@
- "{{ groups.get(rbdmirror_group_name, []) }}"

- name: assimilate ceph configuration
command: "{{ ceph_cmd }} --cluster {{ cluster }} config assimilate-conf -i /etc/ceph/{{ cluster }}.conf"
command: "{{ ceph_cmd }} config assimilate-conf -i /etc/ceph/{{ cluster }}.conf"
changed_when: false
when: inventory_hostname in groups.get(mon_group_name, []) or
inventory_hostname in groups.get(osd_group_name, []) or
Expand Down Expand Up @@ -483,15 +479,15 @@
name: ceph-defaults

- name: get pool list
command: "{{ ceph_cmd }} --cluster {{ cluster }} osd pool ls detail -f json"
command: "{{ ceph_cmd }} osd pool ls detail -f json"
register: pool_list
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
check_mode: false

- name: get balancer module status
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json"
command: "{{ ceph_cmd }} balancer status -f json"
register: balancer_status_adopt
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
Expand All @@ -505,7 +501,7 @@
with_items: "{{ pool_list.stdout | default('{}') | from_json }}"

- name: disable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off"
command: "{{ ceph_cmd }} balancer off"
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
Expand Down Expand Up @@ -599,6 +595,12 @@
- /etc/systemd/system/ceph-osd.target
- "{{ ceph_osd_docker_run_script_path | default('/usr/share') }}/ceph-osd-run.sh"

- name: remove osd directory
file:
path: "/var/lib/ceph/osd/{{ cluster }}-{{ item }}"
state: absent
loop: '{{ (osd_list.stdout | from_json).keys() | list }}'

- name: waiting for clean pgs...
command: "{{ cephadm_cmd }} shell --fsid {{ fsid }} -- ceph --cluster {{ cluster }} pg stat --format json"
changed_when: false
Expand Down Expand Up @@ -647,7 +649,7 @@
CEPHADM_IMAGE: '{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}'

- name: re-enable balancer
command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on"
command: "{{ ceph_cmd }} balancer on"
run_once: true
delegate_to: "{{ groups[mon_group_name][0] }}"
changed_when: false
Expand Down Expand Up @@ -685,6 +687,7 @@
name: "ceph-mds@{{ ansible_facts['hostname'] }}"
state: stopped
enabled: false
failed_when: false

- name: stop and disable ceph-mds systemd target
service:
Expand Down Expand Up @@ -796,6 +799,7 @@
name: "ceph-radosgw@rgw.{{ ansible_facts['hostname'] }}.{{ item.instance_name }}"
state: stopped
enabled: false
failed_when: false
loop: '{{ rgw_instances }}'

- name: stop and disable ceph-radosgw systemd target
Expand Down Expand Up @@ -1001,6 +1005,7 @@
name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_facts['hostname'] }}"
state: stopped
enabled: false
failed_when: false

- name: stop and disable rbd-mirror systemd target
service:
Expand Down Expand Up @@ -1088,12 +1093,23 @@
- name: with dashboard enabled
when: dashboard_enabled | bool
block:
- name: ensure alertmanager/prometheus data directories are present
file:
path: "{{ item }}"
state: directory
owner: "{{ prometheus_user_id }}"
group: "{{ prometheus_user_id }}"
with_items:
- "{{ alertmanager_data_dir }}"
- "{{ prometheus_data_dir }}"

# (workaround) cephadm adopt alertmanager only stops prometheus-alertmanager systemd service
- name: stop and disable alertmanager systemd unit
service:
name: alertmanager
state: stopped
enabled: false
failed_when: false

# (workaround) cephadm adopt alertmanager only uses /etc/prometheus/alertmanager.yml
- name: create alertmanager config symlink
Expand Down Expand Up @@ -1133,6 +1149,7 @@
name: prometheus
state: stopped
enabled: false
failed_when: false

- name: remove alertmanager data symlink
file:
Expand Down Expand Up @@ -1196,6 +1213,7 @@
name: grafana-server
state: stopped
enabled: false
failed_when: false

- name: adopt grafana daemon
cephadm_adopt:
Expand Down Expand Up @@ -1242,6 +1260,7 @@
name: node_exporter
state: stopped
enabled: false
failed_when: false

- name: remove node_exporter systemd unit file
file:
Expand Down

0 comments on commit 864acaa

Please sign in to comment.