refactor: proxmox_upgrade use inline community.proxmox for API calls.

This commit is contained in:
Semaphore
2026-03-14 15:23:47 -07:00
parent 464fba619f
commit 9bb8e97c82
9 changed files with 274 additions and 384 deletions

View File

@@ -1,8 +1,9 @@
---
# =============================================================================
# proxmox_upgrade — preflight.yml
# Check cluster health before starting any upgrade work
# All API checks done in single tasks on localhost to avoid variable scope issues
# Cluster health check before starting any upgrade work
# pvecm runs on the node directly (SSH) — no delegation needed
# API node check runs delegate_to: localhost via community.proxmox
# =============================================================================
- name: Preflight | Check quorum via pvecm
@@ -11,65 +12,55 @@
register: quorum_check
changed_when: false
failed_when: quorum_check.rc != 0
run_once: true
- name: Preflight | Check all cluster nodes online via API
ansible.builtin.shell: |
python3 << 'PYEOF'
import urllib.request, urllib.error, json, ssl
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
req = urllib.request.Request(
"https://{{ api_host }}:{{ api_port }}/api2/json/nodes",
headers={"Authorization": "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"}
)
with urllib.request.urlopen(req, context=ctx) as r:
data = json.loads(r.read())["data"]
offline = [n for n in data if n["status"] != "online"]
if offline:
print("FAIL: " + ", ".join(f"{n['node']}={n['status']}" for n in offline))
exit(1)
else:
print("OK: " + ", ".join(f"{n['node']}={n['status']}" for n in data))
exit(0)
PYEOF
register: node_check
changed_when: false
failed_when: node_check.rc != 0
- name: Preflight | Get all cluster nodes via API
community.proxmox.proxmox_node_info:
api_host: "{{ api_host }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port }}"
register: cluster_node_info
delegate_to: localhost
run_once: true
- name: Preflight | Log node status
ansible.builtin.debug:
msg: "{{ node_check.stdout }}"
- name: Preflight | Fail if any node is offline
ansible.builtin.fail:
msg: >
Cluster health check FAILED — node {{ item.node }} is {{ item.status }}.
Aborting upgrade to prevent data loss. Investigate before retrying.
loop: "{{ cluster_node_info.proxmox_nodes }}"
when: item.status != 'online'
delegate_to: localhost
run_once: true
- name: Preflight | Check CEPH health
when: ceph_enabled | bool
block:
- name: Preflight | Get CEPH health status
ansible.builtin.shell: ceph health 2>/dev/null
- name: Preflight | Get CEPH health
ansible.builtin.shell: ceph health
register: ceph_health
changed_when: false
run_once: true
- name: Preflight | Abort if CEPH is in error state
ansible.builtin.fail:
msg: >
CEPH health check FAILED — status: {{ ceph_health.stdout }}.
Aborting upgrade. Resolve CEPH issues before retrying.
CEPH health check FAILED — {{ ceph_health.stdout }}.
Resolve CEPH issues before retrying.
when: "'HEALTH_OK' not in ceph_health.stdout and 'HEALTH_WARN' not in ceph_health.stdout"
run_once: true
- name: Preflight | Warn if CEPH has warnings
ansible.builtin.debug:
msg: "WARNING — CEPH has warnings: {{ ceph_health.stdout }}. Proceeding but monitor closely."
when: "'HEALTH_WARN' in ceph_health.stdout"
run_once: true
- name: Preflight | Cluster health check passed
ansible.builtin.debug:
msg: "Cluster health check passed — all nodes online, quorum OK{{ ', CEPH checked' if ceph_enabled else '' }}"
msg: >-
Cluster health OK — {{ cluster_node_info.proxmox_nodes | length }} nodes online,
quorum confirmed{{ ', CEPH checked' if ceph_enabled else '' }}
delegate_to: localhost
run_once: true