Files
ansible-msp-automations/roles/proxmox_ha/tasks/main.yml
2026-03-15 15:48:59 -07:00

97 lines
3.5 KiB
YAML

---
# =============================================================================
# proxmox_ha — tasks
# Manages Proxmox HA group membership and maintenance mode.
# Proxmox HA is self-managing during migrations — this role handles
# cases where you need to explicitly pause or resume HA for a node.
# =============================================================================
# ── Detect HA ─────────────────────────────────────────────────────────────────
- name: "HA | Detect if HA is configured"
ansible.builtin.command: ha-manager status
register: ha_detect
changed_when: false
failed_when: false
run_once: true
- name: "HA | Set HA enabled fact"
ansible.builtin.set_fact:
ha_is_enabled: "{{ ha_detect.rc == 0 and ha_detect.stdout != '' }}"
run_once: true
- name: "HA | Skip — HA not configured"
ansible.builtin.debug:
msg: "HA is not configured on this cluster — skipping."
when: not ha_is_enabled
run_once: true
# ── HA status ─────────────────────────────────────────────────────────────────
- name: "HA | Get status"
ansible.builtin.command: ha-manager status
register: ha_status
changed_when: false
when:
- ha_is_enabled
- ha_action == 'status'
run_once: true
- name: "HA | Log status"
ansible.builtin.debug:
msg: "{{ ha_status.stdout_lines }}"
when:
- ha_is_enabled
- ha_action == 'status'
run_once: true
# ── Put node in maintenance mode ──────────────────────────────────────────────
# Proxmox uses node maintenance mode via ha-manager to gracefully migrate
# HA-managed VMs before maintenance. This is the correct HA-aware drain.
- name: "HA | Enable maintenance mode for {{ current_node }}"
ansible.builtin.command: >
ha-manager crm-command node-maintenance enable {{ current_node }}
changed_when: true
run_once: true
when:
- ha_is_enabled
- ha_action == 'disable'
- name: "HA | Wait for {{ current_node }} maintenance mode to be acknowledged"
ansible.builtin.command: ha-manager status
register: ha_maintenance_check
changed_when: false
until: >-
'maintenance' in ha_maintenance_check.stdout
or current_node + ' (maintenance)' in ha_maintenance_check.stdout
retries: "{{ (ha_timeout / 5) | int }}"
delay: 5
run_once: true
when:
- ha_is_enabled
- ha_action == 'disable'
- name: "HA | Maintenance mode enabled for {{ current_node }}"
ansible.builtin.debug:
msg: "✓ HA maintenance mode enabled for {{ current_node }} — HA will not restart VMs on this node."
when:
- ha_is_enabled
- ha_action == 'disable'
run_once: true
# ── Resume HA management ──────────────────────────────────────────────────────
- name: "HA | Disable maintenance mode for {{ current_node }}"
ansible.builtin.command: >
ha-manager crm-command node-maintenance disable {{ current_node }}
changed_when: true
run_once: true
when:
- ha_is_enabled
- ha_action == 'enable'
- name: "HA | Maintenance mode disabled for {{ current_node }}"
ansible.builtin.debug:
msg: "✓ HA management resumed for {{ current_node }}."
when:
- ha_is_enabled
- ha_action == 'enable'
run_once: true