Files
ansible-msp-automations/roles/proxmox_upgrade/tasks/preflight.yml

76 lines
2.6 KiB
YAML

---
# =============================================================================
# proxmox_upgrade — preflight.yml
# Check cluster health before starting any upgrade work
# All API checks done in single tasks on localhost to avoid variable scope issues
# =============================================================================
- name: Preflight | Check quorum via pvecm
ansible.builtin.shell: |
pvecm status 2>/dev/null | grep -i "quorate" | grep -i "yes"
register: quorum_check
changed_when: false
failed_when: quorum_check.rc != 0
- name: Preflight | Check all cluster nodes online via API
ansible.builtin.shell: |
python3 << 'PYEOF'
import urllib.request, urllib.error, json, ssl
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
req = urllib.request.Request(
"https://{{ api_host }}:{{ api_port }}/api2/json/nodes",
headers={"Authorization": "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"}
)
with urllib.request.urlopen(req, context=ctx) as r:
data = json.loads(r.read())["data"]
offline = [n for n in data if n["status"] != "online"]
if offline:
print("FAIL: " + ", ".join(f"{n['node']}={n['status']}" for n in offline))
exit(1)
else:
print("OK: " + ", ".join(f"{n['node']}={n['status']}" for n in data))
exit(0)
PYEOF
register: node_check
changed_when: false
failed_when: node_check.rc != 0
delegate_to: localhost
run_once: true
- name: Preflight | Log node status
ansible.builtin.debug:
msg: "{{ node_check.stdout }}"
delegate_to: localhost
run_once: true
- name: Preflight | Check CEPH health
when: ceph_enabled | bool
block:
- name: Preflight | Get CEPH health status
ansible.builtin.shell: ceph health 2>/dev/null
register: ceph_health
changed_when: false
- name: Preflight | Abort if CEPH is in error state
ansible.builtin.fail:
msg: >
CEPH health check FAILED — status: {{ ceph_health.stdout }}.
Aborting upgrade. Resolve CEPH issues before retrying.
when: "'HEALTH_OK' not in ceph_health.stdout and 'HEALTH_WARN' not in ceph_health.stdout"
- name: Preflight | Warn if CEPH has warnings
ansible.builtin.debug:
msg: "WARNING — CEPH has warnings: {{ ceph_health.stdout }}. Proceeding but monitor closely."
when: "'HEALTH_WARN' in ceph_health.stdout"
- name: Preflight | Cluster health check passed
ansible.builtin.debug:
msg: "Cluster health check passed — all nodes online, quorum OK{{ ', CEPH checked' if ceph_enabled else '' }}"
delegate_to: localhost
run_once: true