--- # ============================================================================= # proxmox_ceph — tasks # Manages CEPH noout flag and health checks during maintenance. # Skips gracefully if CEPH is not configured on this cluster. # ============================================================================= # ── Detect CEPH ─────────────────────────────────────────────────────────────── - name: "CEPH | Detect if CEPH is configured" ansible.builtin.command: ceph status register: ceph_detect changed_when: false failed_when: false run_once: true - name: "CEPH | Set CEPH enabled fact" ansible.builtin.set_fact: ceph_is_enabled: "{{ ceph_detect.rc == 0 }}" run_once: true - name: "CEPH | Skip — CEPH not configured on this cluster" ansible.builtin.debug: msg: "CEPH is not configured on this cluster — skipping all CEPH tasks." when: not ceph_is_enabled run_once: true # ── CEPH status ─────────────────────────────────────────────────────────────── - name: "CEPH | Get cluster status" ansible.builtin.command: ceph status --format json register: ceph_status_raw changed_when: false run_once: true when: ceph_is_enabled - name: "CEPH | Parse status" ansible.builtin.set_fact: ceph_status: "{{ ceph_status_raw.stdout | from_json }}" run_once: true when: ceph_is_enabled - name: "CEPH | Log health" ansible.builtin.debug: msg: "CEPH health: {{ ceph_status.health.status }}" run_once: true when: - ceph_is_enabled - ceph_action == 'status' # ── Health check ────────────────────────────────────────────────────────────── - name: "CEPH | Check health | Abort if HEALTH_ERR" ansible.builtin.fail: msg: >- CEPH is in HEALTH_ERR state — aborting to prevent data loss. Run 'ceph status' to investigate. Set ceph_abort_on_error=false to override. when: - ceph_is_enabled - ceph_abort_on_error - ceph_status.health.status == 'HEALTH_ERR' run_once: true - name: "CEPH | Check health | Warn on HEALTH_WARN" ansible.builtin.debug: msg: >- WARNING: CEPH is in HEALTH_WARN state. Proceeding — set ceph_warn_on_warning=false to suppress this message. Checks: {{ ceph_status.health.checks | default({}) | dict2items | map(attribute='key') | list }} when: - ceph_is_enabled - ceph_warn_on_warning - ceph_status.health.status == 'HEALTH_WARN' run_once: true # ── Set noout ───────────────────────────────────────────────────────────────── - name: "CEPH | Set noout flag" ansible.builtin.command: ceph osd set noout changed_when: true run_once: true when: - ceph_is_enabled - ceph_action == 'set_noout' - name: "CEPH | Confirm noout set" ansible.builtin.debug: msg: "✓ CEPH noout flag SET — OSDs will not be marked out during maintenance." when: - ceph_is_enabled - ceph_action == 'set_noout' run_once: true # ── Clear noout ─────────────────────────────────────────────────────────────── - name: "CEPH | Clear noout flag" ansible.builtin.command: ceph osd unset noout changed_when: true run_once: true when: - ceph_is_enabled - ceph_action == 'clear_noout' - name: "CEPH | Wait for HEALTH_OK after clearing noout" ansible.builtin.command: ceph status --format json register: ceph_recovery_check changed_when: false until: "(ceph_recovery_check.stdout | from_json).health.status in ['HEALTH_OK', 'HEALTH_WARN']" retries: "{{ ceph_health_retries }}" delay: "{{ ceph_health_delay }}" run_once: true when: - ceph_is_enabled - ceph_action == 'clear_noout' - name: "CEPH | Log recovery status" ansible.builtin.debug: msg: >- ✓ CEPH noout CLEARED — health: {{ (ceph_recovery_check.stdout | from_json).health.status }} when: - ceph_is_enabled - ceph_action == 'clear_noout' run_once: true # ── check_health action ─────────────────────────────────────────────────────── - name: "CEPH | Wait for healthy state" ansible.builtin.command: ceph status --format json register: ceph_health_wait changed_when: false until: "(ceph_health_wait.stdout | from_json).health.status in ['HEALTH_OK', 'HEALTH_WARN']" retries: "{{ ceph_health_retries }}" delay: "{{ ceph_health_delay }}" run_once: true when: - ceph_is_enabled - ceph_action == 'check_health' - name: "CEPH | Health check result" ansible.builtin.debug: msg: "CEPH health: {{ (ceph_health_wait.stdout | from_json).health.status }}" when: - ceph_is_enabled - ceph_action == 'check_health' run_once: true