Files
ansible-msp-automations/roles/proxmox_upgrade/tasks/preflight.yml

67 lines
2.3 KiB
YAML

---
# =============================================================================
# proxmox_upgrade — preflight.yml
# Cluster health check before starting any upgrade work
# pvecm runs on the node directly (SSH) — no delegation needed
# API node check runs delegate_to: localhost via community.proxmox
# =============================================================================
- name: Preflight | Check quorum via pvecm
ansible.builtin.shell: |
pvecm status 2>/dev/null | grep -i "quorate" | grep -i "yes"
register: quorum_check
changed_when: false
failed_when: quorum_check.rc != 0
run_once: true
- name: Preflight | Get all cluster nodes via API
community.proxmox.proxmox_node_info:
api_host: "{{ api_host }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port }}"
register: cluster_node_info
delegate_to: localhost
run_once: true
- name: Preflight | Fail if any node is offline
ansible.builtin.fail:
msg: >
Cluster health check FAILED — node {{ item.node }} is {{ item.status }}.
Aborting upgrade to prevent data loss. Investigate before retrying.
loop: "{{ cluster_node_info.proxmox_nodes }}"
when: item.status != 'online'
delegate_to: localhost
run_once: true
- name: Preflight | Check CEPH health
when: ceph_enabled | bool
block:
- name: Preflight | Get CEPH health
ansible.builtin.shell: ceph health
register: ceph_health
changed_when: false
run_once: true
- name: Preflight | Abort if CEPH is in error state
ansible.builtin.fail:
msg: >
CEPH health check FAILED — {{ ceph_health.stdout }}.
Resolve CEPH issues before retrying.
when: "'HEALTH_OK' not in ceph_health.stdout and 'HEALTH_WARN' not in ceph_health.stdout"
run_once: true
- name: Preflight | Warn if CEPH has warnings
ansible.builtin.debug:
msg: "WARNING — CEPH has warnings: {{ ceph_health.stdout }}. Proceeding but monitor closely."
when: "'HEALTH_WARN' in ceph_health.stdout"
run_once: true
- name: Preflight | Cluster health check passed
ansible.builtin.debug:
msg: >-
Cluster health OK — {{ cluster_node_info.proxmox_nodes | length }} nodes online,
quorum confirmed{{ ', CEPH checked' if ceph_enabled else '' }}
delegate_to: localhost
run_once: true