Files
ansible-msp-automations/roles/proxmox_upgrade/tasks/upgrade.yml
2026-03-14 14:05:40 -07:00

96 lines
3.6 KiB
YAML

---
# =============================================================================
# proxmox_upgrade — upgrade.yml
# Run apt dist-upgrade and reboot, wait for node to rejoin cluster
# =============================================================================
- name: Upgrade | Set CEPH noout flag before upgrade
ansible.builtin.shell: ceph osd set noout
when: ceph_enabled | bool
changed_when: true
- name: Upgrade | Run apt update
ansible.builtin.shell: apt-get update -q
changed_when: false
- name: Upgrade | Run apt dist-upgrade
ansible.builtin.shell: "{{ apt_upgrade_cmd }}"
register: apt_upgrade_result
changed_when: "'0 upgraded' not in apt_upgrade_result.stdout"
- name: Upgrade | Log packages upgraded
ansible.builtin.debug:
msg: "{{ apt_upgrade_result.stdout_lines | select('match', '.*upgraded.*') | list | first | default('No output') }}"
- name: Upgrade | Run apt autoremove
ansible.builtin.shell: DEBIAN_FRONTEND=noninteractive apt-get autoremove -y
when: apt_autoremove | bool
changed_when: false
- name: Upgrade | Check if reboot is required
ansible.builtin.stat:
path: /var/run/reboot-required
register: reboot_required
- name: Upgrade | Log reboot status
ansible.builtin.debug:
msg: "{{ 'Reboot required — rebooting node' if reboot_required.stat.exists else 'No reboot required — skipping reboot' }}"
- name: Upgrade | Reboot node
ansible.builtin.reboot:
reboot_timeout: "{{ node_rejoin_timeout }}"
msg: "Rebooting for Proxmox upgrade"
pre_reboot_delay: 5
post_reboot_delay: 30
when: reboot_required.stat.exists
# ── Wait for node to rejoin cluster ──────────────────────────────────────────
- name: Upgrade | Wait for node to appear online in cluster
ansible.builtin.uri:
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes"
method: GET
headers:
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
validate_certs: false
register: nodes_status
until: >-
nodes_status.json.data
| selectattr('node', 'equalto', current_node)
| selectattr('status', 'equalto', 'online')
| list | length > 0
retries: "{{ (node_rejoin_timeout | int / 10) | int }}"
delay: 10
delegate_to: localhost
- name: Upgrade | Node {{ current_node }} back online
ansible.builtin.debug:
msg: "Node {{ current_node }} has rejoined the cluster"
# ── CEPH recovery wait ────────────────────────────────────────────────────────
- name: Upgrade | Wait for CEPH to recover
when: ceph_enabled | bool
block:
- name: Upgrade | CEPH | Wait for HEALTH_OK or HEALTH_WARN
ansible.builtin.shell: ceph health
register: ceph_health_post
until: "'HEALTH_OK' in ceph_health_post.stdout or 'HEALTH_WARN' in ceph_health_post.stdout"
retries: "{{ (ceph_recover_timeout | int / 10) | int }}"
delay: 10
changed_when: false
- name: Upgrade | CEPH | Clear noout flag
ansible.builtin.shell: ceph osd unset noout
changed_when: true
- name: Upgrade | CEPH | Log recovery status
ansible.builtin.debug:
msg: "CEPH recovered: {{ ceph_health_post.stdout }}"
- name: Upgrade | Node {{ current_node }} upgrade complete
ansible.builtin.debug:
msg: >-
Node {{ current_node }} upgrade complete —
{{ apt_upgrade_result.stdout_lines | select('match', '.*upgraded.*') | list | first | default('packages updated') }}
{{ '— rebooted' if reboot_required.stat.exists else '— no reboot needed' }}