77 lines
2.6 KiB
YAML
77 lines
2.6 KiB
YAML
---
|
|
# =============================================================================
|
|
# proxmox_upgrade — preflight.yml
|
|
# Check cluster health before starting any upgrade work
|
|
# All API checks done in single tasks on localhost to avoid variable scope issues
|
|
# =============================================================================
|
|
|
|
- name: Preflight | Check quorum via pvecm
|
|
ansible.builtin.shell: |
|
|
pvecm status 2>/dev/null | grep -i "quorate" | grep -i "yes"
|
|
register: quorum_check
|
|
changed_when: false
|
|
failed_when: quorum_check.rc != 0
|
|
|
|
- name: Preflight | Check all cluster nodes online via API
|
|
ansible.builtin.shell: |
|
|
python3 << 'PYEOF'
|
|
import urllib.request, urllib.error, json, ssl
|
|
|
|
ctx = ssl.create_default_context()
|
|
ctx.check_hostname = False
|
|
ctx.verify_mode = ssl.CERT_NONE
|
|
|
|
req = urllib.request.Request(
|
|
"https://{{ api_host }}:{{ api_port }}/api2/json/nodes",
|
|
headers={"Authorization": "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"}
|
|
)
|
|
with urllib.request.urlopen(req, context=ctx) as r:
|
|
data = json.loads(r.read())["data"]
|
|
|
|
offline = [n for n in data if n["status"] != "online"]
|
|
if offline:
|
|
print("FAIL: " + ", ".join(f"{n['node']}={n['status']}" for n in offline))
|
|
exit(1)
|
|
else:
|
|
print("OK: " + ", ".join(f"{n['node']}={n['status']}" for n in data))
|
|
exit(0)
|
|
PYEOF
|
|
register: node_check
|
|
changed_when: false
|
|
failed_when: node_check.rc != 0
|
|
delegate_to: localhost
|
|
run_once: true
|
|
|
|
- name: Preflight | Log node status
|
|
ansible.builtin.debug:
|
|
msg: "{{ node_check.stdout }}"
|
|
delegate_to: localhost
|
|
run_once: true
|
|
|
|
- name: Preflight | Check CEPH health
|
|
when: ceph_enabled | bool
|
|
block:
|
|
- name: Preflight | Get CEPH health status
|
|
ansible.builtin.shell: ceph health 2>/dev/null
|
|
register: ceph_health
|
|
changed_when: false
|
|
|
|
- name: Preflight | Abort if CEPH is in error state
|
|
ansible.builtin.fail:
|
|
msg: >
|
|
CEPH health check FAILED — status: {{ ceph_health.stdout }}.
|
|
Aborting upgrade. Resolve CEPH issues before retrying.
|
|
when: "'HEALTH_OK' not in ceph_health.stdout and 'HEALTH_WARN' not in ceph_health.stdout"
|
|
|
|
- name: Preflight | Warn if CEPH has warnings
|
|
ansible.builtin.debug:
|
|
msg: "WARNING — CEPH has warnings: {{ ceph_health.stdout }}. Proceeding but monitor closely."
|
|
when: "'HEALTH_WARN' in ceph_health.stdout"
|
|
|
|
- name: Preflight | Cluster health check passed
|
|
ansible.builtin.debug:
|
|
msg: "Cluster health check passed — all nodes online, quorum OK{{ ', CEPH checked' if ceph_enabled else '' }}"
|
|
delegate_to: localhost
|
|
run_once: true
|
|
|