--- # ============================================================================= # proxmox_upgrade — drain.yml # Migrate all VMs/LXCs off a node before upgrading it # Uses inline Python for API calls to avoid cross-task variable scope issues # ============================================================================= # ── Build migration plan via API ────────────────────────────────────────────── - name: "Drain | Build migration plan for {{ current_node }}" ansible.builtin.shell: | python3 << 'PYEOF' import urllib.request, urllib.error, json, ssl ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE api_base = "https://{{ api_host }}:{{ api_port }}/api2/json" headers = {"Authorization": "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"} node = "{{ current_node }}" shared = {{ shared_storage | lower }} exclude_tags = {{ migrate_exclude_tags | to_json }} def api_get(path): req = urllib.request.Request(f"{api_base}{path}", headers=headers) with urllib.request.urlopen(req, context=ctx) as r: return json.loads(r.read())["data"] # Get all online nodes except current all_nodes = api_get("/nodes") targets = [n["node"] for n in all_nodes if n["status"] == "online" and n["node"] != node] if not targets: print(json.dumps({"error": f"No online nodes available to migrate guests to from {node}"})) exit(1) # Get VMs and LXCs on this node vms = api_get(f"/nodes/{node}/qemu") lxcs = api_get(f"/nodes/{node}/lxc") plan = [] for vm in vms: tags = [t.strip() for t in (vm.get("tags") or "").split(",") if t.strip()] if any(t in exclude_tags for t in tags): continue cfg = api_get(f"/nodes/{node}/qemu/{vm['vmid']}/config") has_passthrough = any(k.startswith("hostpci") or k.startswith("usb") for k in cfg) has_local_cdrom = any( isinstance(v, str) and "local" in v and ".iso" in v for v in cfg.values() ) needs_fallback = has_passthrough or not shared or has_local_cdrom reason = "passthrough" if has_passthrough else ("local_disk" if not shared else ("local_cdrom" if has_local_cdrom else "")) plan.append({ "vmid": vm["vmid"], "name": vm.get("name", str(vm["vmid"])), "type": "qemu", "status": vm["status"], "needs_fallback": needs_fallback, "fallback_reason": reason }) for lxc in lxcs: tags = [t.strip() for t in (lxc.get("tags") or "").split(",") if t.strip()] if any(t in exclude_tags for t in tags): continue plan.append({ "vmid": lxc["vmid"], "name": lxc.get("name", str(lxc["vmid"])), "type": "lxc", "status": lxc["status"], "needs_fallback": False, "fallback_reason": "" }) print(json.dumps({"plan": plan, "targets": targets})) PYEOF register: drain_plan_raw delegate_to: localhost changed_when: false - name: "Drain | Parse migration plan" ansible.builtin.set_fact: drain_data: "{{ drain_plan_raw.stdout | from_json }}" delegate_to: localhost - name: "Drain | Fail if error building plan" ansible.builtin.fail: msg: "{{ drain_data.error }}" when: drain_data.error is defined delegate_to: localhost - name: "Drain | Set migration plan and targets" ansible.builtin.set_fact: migration_plan: "{{ drain_data.plan }}" migration_targets: "{{ drain_data.targets }}" delegate_to: localhost - name: "Drain | Log migration plan for {{ current_node }}" ansible.builtin.debug: msg: >- Migration plan for {{ current_node }} ({{ migration_plan | length }} guests → {{ migration_targets | first }}): {% for g in migration_plan %} - {{ g.type | upper }} {{ g.vmid }} ({{ g.name }}) [{{ g.status }}]{% if g.needs_fallback %} ⚠ fallback={{ live_migrate_fallback }} reason={{ g.fallback_reason }}{% endif %} {% endfor %} delegate_to: localhost - name: "Drain | Warn about non-live-migratable guests" ansible.builtin.debug: msg: >- WARNING — {{ item.type | upper }} {{ item.vmid }} ({{ item.name }}) cannot be live migrated ({{ item.fallback_reason }}). {% if live_migrate_fallback == 'skip' %}THIS VM WILL GO DOWN DURING NODE REBOOT. {% elif live_migrate_fallback == 'shutdown' %}Will be shut down, cold migrated, and restarted. {% else %}Will attempt live migrate anyway (may fail).{% endif %} loop: "{{ migration_plan | selectattr('needs_fallback') | list }}" loop_control: loop_var: item delegate_to: localhost # ── Sequential migrations ───────────────────────────────────────────────────── - name: "Drain | Migrate guests sequentially" when: not migration_bulk | bool block: - name: "Drain | Sequential | Migrate live-migratable guests" include_tasks: migrate_guest.yml loop: "{{ migration_plan | rejectattr('needs_fallback') | list }}" loop_control: loop_var: guest - name: "Drain | Sequential | Handle fallback guests" include_tasks: migrate_guest.yml loop: "{{ migration_plan | selectattr('needs_fallback') | list }}" loop_control: loop_var: guest when: live_migrate_fallback != 'skip' # ── Bulk migrations ─────────────────────────────────────────────────────────── - name: "Drain | Migrate guests in bulk" when: migration_bulk | bool block: - name: "Drain | Bulk | Trigger all live migrations" ansible.builtin.shell: | python3 << 'PYEOF' import urllib.request, json, ssl ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE api_base = "https://{{ api_host }}:{{ api_port }}/api2/json" headers = {"Authorization": "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"} node = "{{ current_node }}" target = "{{ migration_targets | first }}" plan = {{ migration_plan | rejectattr('needs_fallback') | list | to_json }} task_ids = [] for guest in plan: gtype = "qemu" if guest["type"] == "qemu" else "lxc" url = f"{api_base}/nodes/{node}/{gtype}/{guest['vmid']}/migrate" body = json.dumps({"target": target, "online": 1}).encode() req = urllib.request.Request(url, data=body, headers={**headers, "Content-Type": "application/json"}, method="POST") with urllib.request.urlopen(req, context=ctx) as r: task_id = json.loads(r.read())["data"] task_ids.append({"vmid": guest["vmid"], "name": guest["name"], "task": task_id}) print(f"Triggered migration: {guest['type'].upper()} {guest['vmid']} ({guest['name']}) → {target} task={task_id}") print(json.dumps({"task_ids": task_ids})) PYEOF register: bulk_trigger_raw delegate_to: localhost changed_when: true - name: "Drain | Bulk | Wait for all migration tasks to complete" ansible.builtin.shell: | python3 << 'PYEOF' import urllib.request, json, ssl, time ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE api_base = "https://{{ api_host }}:{{ api_port }}/api2/json" headers = {"Authorization": "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"} node = "{{ current_node }}" lines = """{{ bulk_trigger_raw.stdout }}""".strip().split('\n') last_line = [l for l in lines if l.startswith('{')][-1] task_ids = json.loads(last_line)["task_ids"] failed = [] for t in task_ids: retries = 60 while retries > 0: url = f"{api_base}/nodes/{node}/tasks/{t['task']}/status" req = urllib.request.Request(url, headers=headers) with urllib.request.urlopen(req, context=ctx) as r: status = json.loads(r.read())["data"] if status["status"] == "stopped": if status.get("exitstatus") != "OK": failed.append(f"{t['name']} ({t['vmid']}): {status.get('exitstatus')}") else: print(f"OK: {t['name']} ({t['vmid']}) migrated successfully") break time.sleep(10) retries -= 1 else: failed.append(f"{t['name']} ({t['vmid']}): timed out") if failed: print("FAILED: " + ", ".join(failed)) exit(1) print("All bulk migrations completed successfully") PYEOF register: bulk_wait_result delegate_to: localhost changed_when: false - name: "Drain | Bulk | Handle fallback guests sequentially" include_tasks: migrate_guest.yml loop: "{{ migration_plan | selectattr('needs_fallback') | list }}" loop_control: loop_var: guest when: live_migrate_fallback != 'skip'