feat: proxmox_upgrade role and playbook
This commit is contained in:
64
roles/proxmox_upgrade/tasks/preflight.yml
Normal file
64
roles/proxmox_upgrade/tasks/preflight.yml
Normal file
@@ -0,0 +1,64 @@
|
||||
---
|
||||
# =============================================================================
|
||||
# proxmox_upgrade — preflight.yml
|
||||
# Check cluster health before starting any upgrade work
|
||||
# Runs delegate_to: first node in upgrade_order
|
||||
# =============================================================================
|
||||
|
||||
- name: Preflight | Check all cluster nodes are online
|
||||
ansible.builtin.shell: |
|
||||
pvecm status 2>/dev/null | grep -E "^Nodes|Quorate"
|
||||
register: pvecm_status
|
||||
changed_when: false
|
||||
|
||||
- name: Preflight | Get cluster node status via API
|
||||
ansible.builtin.uri:
|
||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes"
|
||||
method: GET
|
||||
headers:
|
||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
||||
validate_certs: false
|
||||
register: cluster_nodes
|
||||
delegate_to: localhost
|
||||
|
||||
- name: Preflight | Check all nodes are online
|
||||
ansible.builtin.fail:
|
||||
msg: >
|
||||
Cluster health check FAILED — node {{ item.node }} is {{ item.status }}.
|
||||
Aborting upgrade to prevent data loss. Investigate before retrying.
|
||||
loop: "{{ cluster_nodes.json.data }}"
|
||||
when: item.status != 'online'
|
||||
delegate_to: localhost
|
||||
|
||||
- name: Preflight | Check quorum via pvecm
|
||||
ansible.builtin.shell: |
|
||||
pvecm status 2>/dev/null | grep -i "quorate" | grep -i "yes"
|
||||
register: quorum_check
|
||||
changed_when: false
|
||||
failed_when: quorum_check.rc != 0
|
||||
|
||||
- name: Preflight | Check CEPH health
|
||||
when: ceph_enabled | bool
|
||||
block:
|
||||
- name: Preflight | Get CEPH health status
|
||||
ansible.builtin.shell: |
|
||||
ceph health 2>/dev/null
|
||||
register: ceph_health
|
||||
changed_when: false
|
||||
|
||||
- name: Preflight | Abort if CEPH is not healthy
|
||||
ansible.builtin.fail:
|
||||
msg: >
|
||||
CEPH health check FAILED — status: {{ ceph_health.stdout }}.
|
||||
Aborting upgrade. Resolve CEPH issues before retrying.
|
||||
when: "'HEALTH_OK' not in ceph_health.stdout and 'HEALTH_WARN' not in ceph_health.stdout"
|
||||
|
||||
- name: Preflight | Warn if CEPH has warnings
|
||||
ansible.builtin.debug:
|
||||
msg: "WARNING — CEPH has warnings: {{ ceph_health.stdout }}. Proceeding but monitor closely."
|
||||
when: "'HEALTH_WARN' in ceph_health.stdout"
|
||||
|
||||
- name: Preflight | Cluster health check passed
|
||||
ansible.builtin.debug:
|
||||
msg: "Cluster health check passed — all nodes online, quorum OK{{ ', CEPH checked' if ceph_enabled else '' }}"
|
||||
|
||||
Reference in New Issue
Block a user