Files
ansible-msp-automations/roles/proxmox_upgrade/tasks/preflight.yml
2026-03-14 14:05:40 -07:00

65 lines
2.3 KiB
YAML

---
# =============================================================================
# proxmox_upgrade — preflight.yml
# Check cluster health before starting any upgrade work
# Runs delegate_to: first node in upgrade_order
# =============================================================================
- name: Preflight | Check all cluster nodes are online
ansible.builtin.shell: |
pvecm status 2>/dev/null | grep -E "^Nodes|Quorate"
register: pvecm_status
changed_when: false
- name: Preflight | Get cluster node status via API
ansible.builtin.uri:
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes"
method: GET
headers:
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
validate_certs: false
register: cluster_nodes
delegate_to: localhost
- name: Preflight | Check all nodes are online
ansible.builtin.fail:
msg: >
Cluster health check FAILED — node {{ item.node }} is {{ item.status }}.
Aborting upgrade to prevent data loss. Investigate before retrying.
loop: "{{ cluster_nodes.json.data }}"
when: item.status != 'online'
delegate_to: localhost
- name: Preflight | Check quorum via pvecm
ansible.builtin.shell: |
pvecm status 2>/dev/null | grep -i "quorate" | grep -i "yes"
register: quorum_check
changed_when: false
failed_when: quorum_check.rc != 0
- name: Preflight | Check CEPH health
when: ceph_enabled | bool
block:
- name: Preflight | Get CEPH health status
ansible.builtin.shell: |
ceph health 2>/dev/null
register: ceph_health
changed_when: false
- name: Preflight | Abort if CEPH is not healthy
ansible.builtin.fail:
msg: >
CEPH health check FAILED — status: {{ ceph_health.stdout }}.
Aborting upgrade. Resolve CEPH issues before retrying.
when: "'HEALTH_OK' not in ceph_health.stdout and 'HEALTH_WARN' not in ceph_health.stdout"
- name: Preflight | Warn if CEPH has warnings
ansible.builtin.debug:
msg: "WARNING — CEPH has warnings: {{ ceph_health.stdout }}. Proceeding but monitor closely."
when: "'HEALTH_WARN' in ceph_health.stdout"
- name: Preflight | Cluster health check passed
ansible.builtin.debug:
msg: "Cluster health check passed — all nodes online, quorum OK{{ ', CEPH checked' if ceph_enabled else '' }}"