Files

95 lines
3.5 KiB
YAML

---
# =============================================================================
# proxmox_upgrade — upgrade.yml
# apt dist-upgrade, reboot, wait for node to rejoin cluster
# Runs directly on the node via SSH — no delegation
# =============================================================================
- name: "Upgrade | {{ current_node }} | Set CEPH noout flag"
ansible.builtin.command: ceph osd set noout
when: ceph_enabled | bool
changed_when: true
- name: "Upgrade | {{ current_node }} | apt-get update"
ansible.builtin.apt:
update_cache: true
changed_when: false
- name: "Upgrade | {{ current_node }} | apt dist-upgrade"
ansible.builtin.apt:
upgrade: dist
autoremove: "{{ apt_autoremove | bool }}"
register: apt_result
- name: "Upgrade | {{ current_node }} | Log upgraded packages"
ansible.builtin.debug:
msg: "{{ apt_result.stdout_lines | select('match', '.*upgraded.*') | list | first | default('apt dist-upgrade complete') }}"
- name: "Upgrade | {{ current_node }} | Check if reboot required"
ansible.builtin.stat:
path: /var/run/reboot-required
register: reboot_required
- name: "Upgrade | {{ current_node }} | Reboot node"
ansible.builtin.reboot:
reboot_timeout: "{{ node_rejoin_timeout }}"
msg: "Rebooting for Proxmox upgrade"
pre_reboot_delay: 5
post_reboot_delay: 30
when: reboot_required.stat.exists
- name: "Upgrade | {{ current_node }} | Skip reboot (not required)"
ansible.builtin.debug:
msg: "No reboot required — skipping"
when: not reboot_required.stat.exists
# ── Wait for node to rejoin cluster ──────────────────────────────────────────
- name: "Upgrade | {{ current_node }} | Wait for node to rejoin cluster"
community.proxmox.proxmox_node_info:
api_host: "{{ api_host }}"
api_user: "{{ api_user }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port }}"
register: rejoin_check
until: >-
rejoin_check.proxmox_nodes
| selectattr('node', 'equalto', current_node)
| selectattr('status', 'equalto', 'online')
| list | length > 0
retries: "{{ (node_rejoin_timeout | int / 10) | int }}"
delay: 10
delegate_to: localhost
when: reboot_required.stat.exists
- name: "Upgrade | {{ current_node }} | Node back online"
ansible.builtin.debug:
msg: "Node {{ current_node }} has rejoined the cluster"
# ── CEPH recovery ─────────────────────────────────────────────────────────────
- name: "Upgrade | {{ current_node }} | Wait for CEPH to recover"
when: ceph_enabled | bool
block:
- name: "Upgrade | CEPH | Wait for healthy status"
ansible.builtin.shell: ceph health
register: ceph_health_post
until: >-
'HEALTH_OK' in ceph_health_post.stdout or
'HEALTH_WARN' in ceph_health_post.stdout
retries: "{{ (ceph_recover_timeout | int / 10) | int }}"
delay: 10
changed_when: false
- name: "Upgrade | CEPH | Clear noout flag"
ansible.builtin.command: ceph osd unset noout
changed_when: true
- name: "Upgrade | CEPH | Status"
ansible.builtin.debug:
msg: "CEPH recovered: {{ ceph_health_post.stdout }}"
- name: "Upgrade | {{ current_node }} | Upgrade complete"
ansible.builtin.debug:
msg: >-
Node {{ current_node }} upgrade complete
{{ '— rebooted' if reboot_required.stat.exists else '— no reboot needed' }}