--- # ============================================================================= # proxmox_upgrade — preflight.yml # Check cluster health before starting any upgrade work # All API checks done in single tasks on localhost to avoid variable scope issues # ============================================================================= - name: Preflight | Check quorum via pvecm ansible.builtin.shell: | pvecm status 2>/dev/null | grep -i "quorate" | grep -i "yes" register: quorum_check changed_when: false failed_when: quorum_check.rc != 0 - name: Preflight | Check all cluster nodes online via API ansible.builtin.shell: | python3 << 'PYEOF' import urllib.request, urllib.error, json, ssl ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE req = urllib.request.Request( "https://{{ api_host }}:{{ api_port }}/api2/json/nodes", headers={"Authorization": "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"} ) with urllib.request.urlopen(req, context=ctx) as r: data = json.loads(r.read())["data"] offline = [n for n in data if n["status"] != "online"] if offline: print("FAIL: " + ", ".join(f"{n['node']}={n['status']}" for n in offline)) exit(1) else: print("OK: " + ", ".join(f"{n['node']}={n['status']}" for n in data)) exit(0) PYEOF register: node_check changed_when: false failed_when: node_check.rc != 0 delegate_to: localhost run_once: true - name: Preflight | Log node status ansible.builtin.debug: msg: "{{ node_check.stdout }}" delegate_to: localhost run_once: true - name: Preflight | Check CEPH health when: ceph_enabled | bool block: - name: Preflight | Get CEPH health status ansible.builtin.shell: ceph health 2>/dev/null register: ceph_health changed_when: false - name: Preflight | Abort if CEPH is in error state ansible.builtin.fail: msg: > CEPH health check FAILED — status: {{ ceph_health.stdout }}. Aborting upgrade. Resolve CEPH issues before retrying. when: "'HEALTH_OK' not in ceph_health.stdout and 'HEALTH_WARN' not in ceph_health.stdout" - name: Preflight | Warn if CEPH has warnings ansible.builtin.debug: msg: "WARNING — CEPH has warnings: {{ ceph_health.stdout }}. Proceeding but monitor closely." when: "'HEALTH_WARN' in ceph_health.stdout" - name: Preflight | Cluster health check passed ansible.builtin.debug: msg: "Cluster health check passed — all nodes online, quorum OK{{ ', CEPH checked' if ceph_enabled else '' }}" delegate_to: localhost run_once: true