refactor: proxmox_upgrade use inline Python for all API calls
This commit is contained in:
@@ -2,185 +2,222 @@
|
|||||||
# =============================================================================
|
# =============================================================================
|
||||||
# proxmox_upgrade — drain.yml
|
# proxmox_upgrade — drain.yml
|
||||||
# Migrate all VMs/LXCs off a node before upgrading it
|
# Migrate all VMs/LXCs off a node before upgrading it
|
||||||
# Uses Proxmox API — runs delegate_to: localhost
|
# Uses inline Python for API calls to avoid cross-task variable scope issues
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
# ── Get all guests on this node ───────────────────────────────────────────────
|
# ── Build migration plan via API ──────────────────────────────────────────────
|
||||||
- name: Drain | Get all VMs on node {{ current_node }}
|
- name: "Drain | Build migration plan for {{ current_node }}"
|
||||||
ansible.builtin.uri:
|
ansible.builtin.shell: |
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes/{{ current_node }}/qemu"
|
python3 << 'PYEOF'
|
||||||
method: GET
|
import urllib.request, urllib.error, json, ssl
|
||||||
headers:
|
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
|
||||||
validate_certs: false
|
|
||||||
register: node_vms
|
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
- name: Drain | Get all LXCs on node {{ current_node }}
|
ctx = ssl.create_default_context()
|
||||||
ansible.builtin.uri:
|
ctx.check_hostname = False
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes/{{ current_node }}/lxc"
|
ctx.verify_mode = ssl.CERT_NONE
|
||||||
method: GET
|
|
||||||
headers:
|
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
|
||||||
validate_certs: false
|
|
||||||
register: node_lxcs
|
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
- name: Drain | Get available target nodes
|
api_base = "https://{{ api_host }}:{{ api_port }}/api2/json"
|
||||||
ansible.builtin.uri:
|
headers = {"Authorization": "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"}
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes"
|
node = "{{ current_node }}"
|
||||||
method: GET
|
shared = {{ shared_storage | lower }}
|
||||||
headers:
|
exclude_tags = {{ migrate_exclude_tags | to_json }}
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
|
||||||
validate_certs: false
|
|
||||||
register: all_nodes
|
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
- name: Drain | Build target node list (exclude current node)
|
def api_get(path):
|
||||||
|
req = urllib.request.Request(f"{api_base}{path}", headers=headers)
|
||||||
|
with urllib.request.urlopen(req, context=ctx) as r:
|
||||||
|
return json.loads(r.read())["data"]
|
||||||
|
|
||||||
|
# Get all online nodes except current
|
||||||
|
all_nodes = api_get("/nodes")
|
||||||
|
targets = [n["node"] for n in all_nodes if n["status"] == "online" and n["node"] != node]
|
||||||
|
if not targets:
|
||||||
|
print(json.dumps({"error": f"No online nodes available to migrate guests to from {node}"}))
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
# Get VMs and LXCs on this node
|
||||||
|
vms = api_get(f"/nodes/{node}/qemu")
|
||||||
|
lxcs = api_get(f"/nodes/{node}/lxc")
|
||||||
|
|
||||||
|
plan = []
|
||||||
|
|
||||||
|
for vm in vms:
|
||||||
|
tags = [t.strip() for t in (vm.get("tags") or "").split(",") if t.strip()]
|
||||||
|
if any(t in exclude_tags for t in tags):
|
||||||
|
continue
|
||||||
|
cfg = api_get(f"/nodes/{node}/qemu/{vm['vmid']}/config")
|
||||||
|
has_passthrough = any(k.startswith("hostpci") or k.startswith("usb") for k in cfg)
|
||||||
|
has_local_cdrom = any(
|
||||||
|
isinstance(v, str) and "local" in v and ".iso" in v
|
||||||
|
for v in cfg.values()
|
||||||
|
)
|
||||||
|
needs_fallback = has_passthrough or not shared or has_local_cdrom
|
||||||
|
reason = "passthrough" if has_passthrough else ("local_disk" if not shared else ("local_cdrom" if has_local_cdrom else ""))
|
||||||
|
plan.append({
|
||||||
|
"vmid": vm["vmid"],
|
||||||
|
"name": vm.get("name", str(vm["vmid"])),
|
||||||
|
"type": "qemu",
|
||||||
|
"status": vm["status"],
|
||||||
|
"needs_fallback": needs_fallback,
|
||||||
|
"fallback_reason": reason
|
||||||
|
})
|
||||||
|
|
||||||
|
for lxc in lxcs:
|
||||||
|
tags = [t.strip() for t in (lxc.get("tags") or "").split(",") if t.strip()]
|
||||||
|
if any(t in exclude_tags for t in tags):
|
||||||
|
continue
|
||||||
|
plan.append({
|
||||||
|
"vmid": lxc["vmid"],
|
||||||
|
"name": lxc.get("name", str(lxc["vmid"])),
|
||||||
|
"type": "lxc",
|
||||||
|
"status": lxc["status"],
|
||||||
|
"needs_fallback": False,
|
||||||
|
"fallback_reason": ""
|
||||||
|
})
|
||||||
|
|
||||||
|
print(json.dumps({"plan": plan, "targets": targets}))
|
||||||
|
PYEOF
|
||||||
|
register: drain_plan_raw
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Drain | Parse migration plan"
|
||||||
ansible.builtin.set_fact:
|
ansible.builtin.set_fact:
|
||||||
migration_targets: >-
|
drain_data: "{{ drain_plan_raw.stdout | from_json }}"
|
||||||
{{ all_nodes.json.data
|
|
||||||
| selectattr('status', 'equalto', 'online')
|
|
||||||
| rejectattr('node', 'equalto', current_node)
|
|
||||||
| map(attribute='node')
|
|
||||||
| list }}
|
|
||||||
delegate_to: localhost
|
delegate_to: localhost
|
||||||
|
|
||||||
- name: Drain | Fail if no migration targets available
|
- name: "Drain | Fail if error building plan"
|
||||||
ansible.builtin.fail:
|
ansible.builtin.fail:
|
||||||
msg: "No online nodes available to migrate guests to. Cannot drain {{ current_node }}."
|
msg: "{{ drain_data.error }}"
|
||||||
when: migration_targets | length == 0
|
when: drain_data.error is defined
|
||||||
delegate_to: localhost
|
delegate_to: localhost
|
||||||
|
|
||||||
# ── Classify VMs — live migratable vs needs fallback ─────────────────────────
|
- name: "Drain | Set migration plan and targets"
|
||||||
- name: Drain | Get VM configs to check migratability
|
|
||||||
ansible.builtin.uri:
|
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes/{{ current_node }}/qemu/{{ item.vmid }}/config"
|
|
||||||
method: GET
|
|
||||||
headers:
|
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
|
||||||
validate_certs: false
|
|
||||||
register: vm_configs
|
|
||||||
loop: "{{ node_vms.json.data }}"
|
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
- name: Drain | Build guest migration plan
|
|
||||||
ansible.builtin.set_fact:
|
ansible.builtin.set_fact:
|
||||||
migration_plan: >-
|
migration_plan: "{{ drain_data.plan }}"
|
||||||
{%- set plan = [] -%}
|
migration_targets: "{{ drain_data.targets }}"
|
||||||
{%- for vm in node_vms.json.data -%}
|
|
||||||
{%- set cfg = vm_configs.results[loop.index0].json.data -%}
|
|
||||||
{%- set tags = (vm.tags | default('')) .split(',') | map('trim') | list -%}
|
|
||||||
{%- set excluded = tags | select('in', migrate_exclude_tags) | list | length > 0 -%}
|
|
||||||
{%- set has_passthrough = 'hostpci0' in cfg or 'usb0' in cfg -%}
|
|
||||||
{%- set has_local_disk = shared_storage == false -%}
|
|
||||||
{%- set has_local_cdrom = cfg.values() | select('string') | select('match', '.*local.*\\.iso.*') | list | length > 0 -%}
|
|
||||||
{%- set needs_fallback = has_passthrough or has_local_disk or has_local_cdrom -%}
|
|
||||||
{%- if not excluded -%}
|
|
||||||
{%- set _ = plan.append({
|
|
||||||
'vmid': vm.vmid,
|
|
||||||
'name': vm.name,
|
|
||||||
'type': 'qemu',
|
|
||||||
'status': vm.status,
|
|
||||||
'needs_fallback': needs_fallback,
|
|
||||||
'fallback_reason': ('passthrough' if has_passthrough else ('local_disk' if has_local_disk else ('local_cdrom' if has_local_cdrom else '')))
|
|
||||||
}) -%}
|
|
||||||
{%- endif -%}
|
|
||||||
{%- endfor -%}
|
|
||||||
{%- for lxc in node_lxcs.json.data -%}
|
|
||||||
{%- set tags = (lxc.tags | default('')) .split(',') | map('trim') | list -%}
|
|
||||||
{%- set excluded = tags | select('in', migrate_exclude_tags) | list | length > 0 -%}
|
|
||||||
{%- if not excluded -%}
|
|
||||||
{%- set _ = plan.append({
|
|
||||||
'vmid': lxc.vmid,
|
|
||||||
'name': lxc.name,
|
|
||||||
'type': 'lxc',
|
|
||||||
'status': lxc.status,
|
|
||||||
'needs_fallback': false,
|
|
||||||
'fallback_reason': ''
|
|
||||||
}) -%}
|
|
||||||
{%- endif -%}
|
|
||||||
{%- endfor -%}
|
|
||||||
{{ plan }}
|
|
||||||
delegate_to: localhost
|
delegate_to: localhost
|
||||||
|
|
||||||
- name: Drain | Log migration plan
|
- name: "Drain | Log migration plan for {{ current_node }}"
|
||||||
ansible.builtin.debug:
|
ansible.builtin.debug:
|
||||||
msg: >-
|
msg: >-
|
||||||
Migration plan for {{ current_node }}:
|
Migration plan for {{ current_node }} ({{ migration_plan | length }} guests → {{ migration_targets | first }}):
|
||||||
{% for g in migration_plan %}
|
{% for g in migration_plan %}
|
||||||
- {{ g.type | upper }} {{ g.vmid }} ({{ g.name }}) [{{ g.status }}]
|
- {{ g.type | upper }} {{ g.vmid }} ({{ g.name }}) [{{ g.status }}]{% if g.needs_fallback %} ⚠ fallback={{ live_migrate_fallback }} reason={{ g.fallback_reason }}{% endif %}
|
||||||
{% if g.needs_fallback %} ⚠ needs fallback ({{ g.fallback_reason }}) — action: {{ live_migrate_fallback }}{% endif %}
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
delegate_to: localhost
|
delegate_to: localhost
|
||||||
|
|
||||||
# ── Abort if any guests need fallback and live_migrate_fallback is 'migrate' ──
|
- name: "Drain | Warn about non-live-migratable guests"
|
||||||
- name: Drain | Warn about non-migratable guests
|
|
||||||
ansible.builtin.debug:
|
ansible.builtin.debug:
|
||||||
msg: >-
|
msg: >-
|
||||||
WARNING — {{ item.type | upper }} {{ item.vmid }} ({{ item.name }})
|
WARNING — {{ item.type | upper }} {{ item.vmid }} ({{ item.name }})
|
||||||
cannot be live migrated ({{ item.fallback_reason }}).
|
cannot be live migrated ({{ item.fallback_reason }}).
|
||||||
live_migrate_fallback={{ live_migrate_fallback }} —
|
{% if live_migrate_fallback == 'skip' %}THIS VM WILL GO DOWN DURING NODE REBOOT.
|
||||||
{% if live_migrate_fallback == 'skip' %}
|
{% elif live_migrate_fallback == 'shutdown' %}Will be shut down, cold migrated, and restarted.
|
||||||
THIS VM WILL GO DOWN DURING NODE REBOOT.
|
{% else %}Will attempt live migrate anyway (may fail).{% endif %}
|
||||||
{% elif live_migrate_fallback == 'shutdown' %}
|
|
||||||
Will be shut down, cold migrated, and restarted.
|
|
||||||
{% else %}
|
|
||||||
Will attempt live migrate anyway (may fail).
|
|
||||||
{% endif %}
|
|
||||||
loop: "{{ migration_plan | selectattr('needs_fallback') | list }}"
|
loop: "{{ migration_plan | selectattr('needs_fallback') | list }}"
|
||||||
|
loop_control:
|
||||||
|
loop_var: item
|
||||||
delegate_to: localhost
|
delegate_to: localhost
|
||||||
|
|
||||||
# ── Perform migrations ────────────────────────────────────────────────────────
|
# ── Sequential migrations ─────────────────────────────────────────────────────
|
||||||
- name: Drain | Migrate guests (sequential)
|
- name: "Drain | Migrate guests sequentially"
|
||||||
when: not migration_bulk | bool
|
when: not migration_bulk | bool
|
||||||
include_tasks: migrate_guest.yml
|
|
||||||
loop: "{{ migration_plan | rejectattr('needs_fallback') | list + migration_plan | selectattr('needs_fallback') | rejectattr('needs_fallback' if live_migrate_fallback == 'skip' else 'nonexistent') | list }}"
|
|
||||||
loop_control:
|
|
||||||
loop_var: guest
|
|
||||||
|
|
||||||
- name: Drain | Migrate guests (bulk — fire all at once)
|
|
||||||
when: migration_bulk | bool
|
|
||||||
block:
|
block:
|
||||||
- name: Drain | Bulk | Trigger all live migrations simultaneously
|
- name: "Drain | Sequential | Migrate live-migratable guests"
|
||||||
ansible.builtin.uri:
|
include_tasks: migrate_guest.yml
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes/{{ current_node }}/{{ 'qemu' if guest.type == 'qemu' else 'lxc' }}/{{ guest.vmid }}/migrate"
|
|
||||||
method: POST
|
|
||||||
headers:
|
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
|
||||||
body_format: json
|
|
||||||
body:
|
|
||||||
target: "{{ migration_targets | first }}"
|
|
||||||
online: "{{ 1 if not guest.needs_fallback else 0 }}"
|
|
||||||
validate_certs: false
|
|
||||||
register: bulk_migration_tasks
|
|
||||||
loop: "{{ migration_plan | rejectattr('needs_fallback') | list }}"
|
loop: "{{ migration_plan | rejectattr('needs_fallback') | list }}"
|
||||||
loop_control:
|
loop_control:
|
||||||
loop_var: guest
|
loop_var: guest
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
- name: Drain | Bulk | Wait for all migrations to complete
|
- name: "Drain | Sequential | Handle fallback guests"
|
||||||
ansible.builtin.uri:
|
include_tasks: migrate_guest.yml
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes/{{ current_node }}/tasks/{{ item.json.data }}/status"
|
loop: "{{ migration_plan | selectattr('needs_fallback') | list }}"
|
||||||
method: GET
|
loop_control:
|
||||||
headers:
|
loop_var: guest
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
when: live_migrate_fallback != 'skip'
|
||||||
validate_certs: false
|
|
||||||
register: task_status
|
# ── Bulk migrations ───────────────────────────────────────────────────────────
|
||||||
until: task_status.json.data.status == 'stopped'
|
- name: "Drain | Migrate guests in bulk"
|
||||||
retries: 60
|
when: migration_bulk | bool
|
||||||
delay: 10
|
block:
|
||||||
loop: "{{ bulk_migration_tasks.results }}"
|
- name: "Drain | Bulk | Trigger all live migrations"
|
||||||
delegate_to: localhost
|
ansible.builtin.shell: |
|
||||||
|
python3 << 'PYEOF'
|
||||||
- name: Drain | Bulk | Check all migrations succeeded
|
import urllib.request, json, ssl
|
||||||
ansible.builtin.fail:
|
|
||||||
msg: "Migration task failed for VMID — exitstatus: {{ item.json.data.exitstatus }}"
|
ctx = ssl.create_default_context()
|
||||||
loop: "{{ task_status.results }}"
|
ctx.check_hostname = False
|
||||||
when: item.json.data.exitstatus != 'OK'
|
ctx.verify_mode = ssl.CERT_NONE
|
||||||
delegate_to: localhost
|
|
||||||
|
api_base = "https://{{ api_host }}:{{ api_port }}/api2/json"
|
||||||
- name: Drain | Bulk | Handle fallback guests sequentially
|
headers = {"Authorization": "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"}
|
||||||
|
node = "{{ current_node }}"
|
||||||
|
target = "{{ migration_targets | first }}"
|
||||||
|
plan = {{ migration_plan | rejectattr('needs_fallback') | list | to_json }}
|
||||||
|
|
||||||
|
task_ids = []
|
||||||
|
for guest in plan:
|
||||||
|
gtype = "qemu" if guest["type"] == "qemu" else "lxc"
|
||||||
|
url = f"{api_base}/nodes/{node}/{gtype}/{guest['vmid']}/migrate"
|
||||||
|
body = json.dumps({"target": target, "online": 1}).encode()
|
||||||
|
req = urllib.request.Request(url, data=body, headers={**headers, "Content-Type": "application/json"}, method="POST")
|
||||||
|
with urllib.request.urlopen(req, context=ctx) as r:
|
||||||
|
task_id = json.loads(r.read())["data"]
|
||||||
|
task_ids.append({"vmid": guest["vmid"], "name": guest["name"], "task": task_id})
|
||||||
|
print(f"Triggered migration: {guest['type'].upper()} {guest['vmid']} ({guest['name']}) → {target} task={task_id}")
|
||||||
|
|
||||||
|
print(json.dumps({"task_ids": task_ids}))
|
||||||
|
PYEOF
|
||||||
|
register: bulk_trigger_raw
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: "Drain | Bulk | Wait for all migration tasks to complete"
|
||||||
|
ansible.builtin.shell: |
|
||||||
|
python3 << 'PYEOF'
|
||||||
|
import urllib.request, json, ssl, time
|
||||||
|
|
||||||
|
ctx = ssl.create_default_context()
|
||||||
|
ctx.check_hostname = False
|
||||||
|
ctx.verify_mode = ssl.CERT_NONE
|
||||||
|
|
||||||
|
api_base = "https://{{ api_host }}:{{ api_port }}/api2/json"
|
||||||
|
headers = {"Authorization": "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"}
|
||||||
|
node = "{{ current_node }}"
|
||||||
|
|
||||||
|
lines = """{{ bulk_trigger_raw.stdout }}""".strip().split('\n')
|
||||||
|
last_line = [l for l in lines if l.startswith('{')][-1]
|
||||||
|
task_ids = json.loads(last_line)["task_ids"]
|
||||||
|
|
||||||
|
failed = []
|
||||||
|
for t in task_ids:
|
||||||
|
retries = 60
|
||||||
|
while retries > 0:
|
||||||
|
url = f"{api_base}/nodes/{node}/tasks/{t['task']}/status"
|
||||||
|
req = urllib.request.Request(url, headers=headers)
|
||||||
|
with urllib.request.urlopen(req, context=ctx) as r:
|
||||||
|
status = json.loads(r.read())["data"]
|
||||||
|
if status["status"] == "stopped":
|
||||||
|
if status.get("exitstatus") != "OK":
|
||||||
|
failed.append(f"{t['name']} ({t['vmid']}): {status.get('exitstatus')}")
|
||||||
|
else:
|
||||||
|
print(f"OK: {t['name']} ({t['vmid']}) migrated successfully")
|
||||||
|
break
|
||||||
|
time.sleep(10)
|
||||||
|
retries -= 1
|
||||||
|
else:
|
||||||
|
failed.append(f"{t['name']} ({t['vmid']}): timed out")
|
||||||
|
|
||||||
|
if failed:
|
||||||
|
print("FAILED: " + ", ".join(failed))
|
||||||
|
exit(1)
|
||||||
|
print("All bulk migrations completed successfully")
|
||||||
|
PYEOF
|
||||||
|
register: bulk_wait_result
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Drain | Bulk | Handle fallback guests sequentially"
|
||||||
include_tasks: migrate_guest.yml
|
include_tasks: migrate_guest.yml
|
||||||
loop: "{{ migration_plan | selectattr('needs_fallback') | list }}"
|
loop: "{{ migration_plan | selectattr('needs_fallback') | list }}"
|
||||||
loop_control:
|
loop_control:
|
||||||
|
|||||||
@@ -6,111 +6,103 @@
|
|||||||
# guest = { vmid, name, type, status, needs_fallback, fallback_reason }
|
# guest = { vmid, name, type, status, needs_fallback, fallback_reason }
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
- name: "Migrate | {{ guest.type | upper }} {{ guest.vmid }} ({{ guest.name }}) — skip check"
|
- name: "Migrate | {{ guest.type | upper }} {{ guest.vmid }} ({{ guest.name }}) — skipping (live_migrate_fallback=skip)"
|
||||||
ansible.builtin.debug:
|
ansible.builtin.debug:
|
||||||
msg: "SKIPPING {{ guest.type | upper }} {{ guest.vmid }} ({{ guest.name }}) — live_migrate_fallback=skip, will go down during reboot"
|
msg: "SKIPPING {{ guest.type | upper }} {{ guest.vmid }} ({{ guest.name }}) — will go down during reboot"
|
||||||
when: guest.needs_fallback and live_migrate_fallback == 'skip'
|
when: guest.needs_fallback and live_migrate_fallback == 'skip'
|
||||||
delegate_to: localhost
|
delegate_to: localhost
|
||||||
|
|
||||||
- name: "Migrate | {{ guest.type | upper }} {{ guest.vmid }} ({{ guest.name }})"
|
- name: "Migrate | {{ guest.type | upper }} {{ guest.vmid }} ({{ guest.name }})"
|
||||||
when: not (guest.needs_fallback and live_migrate_fallback == 'skip')
|
when: not (guest.needs_fallback and live_migrate_fallback == 'skip')
|
||||||
block:
|
block:
|
||||||
# ── Cold migration: shutdown first ───────────────────────────────────────
|
- name: "Migrate | {{ guest.vmid }} | Execute migration"
|
||||||
- name: "Migrate | {{ guest.vmid }} | Shutdown for cold migration"
|
ansible.builtin.shell: |
|
||||||
ansible.builtin.uri:
|
python3 << 'PYEOF'
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes/{{ current_node }}/{{ 'qemu' if guest.type == 'qemu' else 'lxc' }}/{{ guest.vmid }}/status/shutdown"
|
import urllib.request, json, ssl, time
|
||||||
method: POST
|
|
||||||
headers:
|
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
|
||||||
body_format: json
|
|
||||||
body:
|
|
||||||
timeout: "{{ vm_shutdown_timeout }}"
|
|
||||||
forceStop: 1
|
|
||||||
validate_certs: false
|
|
||||||
when: guest.needs_fallback and live_migrate_fallback == 'shutdown' and guest.status == 'running'
|
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
- name: "Migrate | {{ guest.vmid }} | Wait for shutdown"
|
ctx = ssl.create_default_context()
|
||||||
ansible.builtin.uri:
|
ctx.check_hostname = False
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes/{{ current_node }}/{{ 'qemu' if guest.type == 'qemu' else 'lxc' }}/{{ guest.vmid }}/status/current"
|
ctx.verify_mode = ssl.CERT_NONE
|
||||||
method: GET
|
|
||||||
headers:
|
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
|
||||||
validate_certs: false
|
|
||||||
register: vm_status
|
|
||||||
until: vm_status.json.data.status == 'stopped'
|
|
||||||
retries: "{{ (vm_shutdown_timeout | int / 5) | int }}"
|
|
||||||
delay: 5
|
|
||||||
when: guest.needs_fallback and live_migrate_fallback == 'shutdown' and guest.status == 'running'
|
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
# ── Trigger migration ─────────────────────────────────────────────────────
|
api_base = "https://{{ api_host }}:{{ api_port }}/api2/json"
|
||||||
- name: "Migrate | {{ guest.vmid }} | Trigger migration to {{ migration_targets | first }}"
|
headers = {"Authorization": "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"}
|
||||||
ansible.builtin.uri:
|
node = "{{ current_node }}"
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes/{{ current_node }}/{{ 'qemu' if guest.type == 'qemu' else 'lxc' }}/{{ guest.vmid }}/migrate"
|
target = "{{ migration_targets | first }}"
|
||||||
method: POST
|
vmid = {{ guest.vmid }}
|
||||||
headers:
|
gtype = "{{ guest.type }}"
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
name = "{{ guest.name }}"
|
||||||
body_format: json
|
status = "{{ guest.status }}"
|
||||||
body:
|
needs_fallback = {{ guest.needs_fallback | lower }}
|
||||||
target: "{{ migration_targets | first }}"
|
fallback = "{{ live_migrate_fallback }}"
|
||||||
online: "{{ 0 if (guest.needs_fallback and live_migrate_fallback == 'shutdown') else 1 }}"
|
shutdown_timeout = {{ vm_shutdown_timeout }}
|
||||||
validate_certs: false
|
start_timeout = {{ vm_start_timeout }}
|
||||||
register: migration_task
|
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
# ── Wait for migration to complete ────────────────────────────────────────
|
def api_req(path, method="GET", body=None):
|
||||||
- name: "Migrate | {{ guest.vmid }} | Wait for migration task to complete"
|
url = f"{api_base}{path}"
|
||||||
ansible.builtin.uri:
|
data = json.dumps(body).encode() if body else None
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes/{{ current_node }}/tasks/{{ migration_task.json.data }}/status"
|
hdrs = {**headers}
|
||||||
method: GET
|
if data:
|
||||||
headers:
|
hdrs["Content-Type"] = "application/json"
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
req = urllib.request.Request(url, data=data, headers=hdrs, method=method)
|
||||||
validate_certs: false
|
with urllib.request.urlopen(req, context=ctx) as r:
|
||||||
register: task_status
|
return json.loads(r.read())["data"]
|
||||||
until: task_status.json.data.status == 'stopped'
|
|
||||||
retries: 60
|
|
||||||
delay: 10
|
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
- name: "Migrate | {{ guest.vmid }} | Verify migration succeeded"
|
# ── Cold migration: shutdown first ────────────────────────────────────
|
||||||
ansible.builtin.fail:
|
if needs_fallback and fallback == "shutdown" and status == "running":
|
||||||
msg: "Migration of {{ guest.type | upper }} {{ guest.vmid }} ({{ guest.name }}) failed — {{ task_status.json.data.exitstatus }}"
|
print(f"Shutting down {gtype.upper()} {vmid} ({name})...")
|
||||||
when: task_status.json.data.exitstatus != 'OK'
|
api_req(f"/nodes/{node}/{gtype}/{vmid}/status/shutdown", "POST",
|
||||||
delegate_to: localhost
|
{"timeout": shutdown_timeout, "forceStop": 1})
|
||||||
|
# Wait for stop
|
||||||
|
for _ in range(shutdown_timeout // 5):
|
||||||
|
s = api_req(f"/nodes/{node}/{gtype}/{vmid}/status/current")
|
||||||
|
if s["status"] == "stopped":
|
||||||
|
print(f" {vmid} stopped")
|
||||||
|
break
|
||||||
|
time.sleep(5)
|
||||||
|
else:
|
||||||
|
print(f"ERROR: {vmid} did not stop within {shutdown_timeout}s")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
# ── Cold migration: restart on target ────────────────────────────────────
|
# ── Trigger migration ─────────────────────────────────────────────────
|
||||||
- name: "Migrate | {{ guest.vmid }} | Start on target node after cold migration"
|
online = 0 if (needs_fallback and fallback == "shutdown") else 1
|
||||||
ansible.builtin.uri:
|
print(f"Migrating {gtype.upper()} {vmid} ({name}) → {target} (online={online})...")
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes/{{ migration_targets | first }}/{{ 'qemu' if guest.type == 'qemu' else 'lxc' }}/{{ guest.vmid }}/status/start"
|
task_id = api_req(f"/nodes/{node}/{gtype}/{vmid}/migrate", "POST",
|
||||||
method: POST
|
{"target": target, "online": online})
|
||||||
headers:
|
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
|
||||||
validate_certs: false
|
|
||||||
when: guest.needs_fallback and live_migrate_fallback == 'shutdown' and guest.status == 'running'
|
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
- name: "Migrate | {{ guest.vmid }} | Wait for VM to start on target"
|
# ── Wait for migration task ───────────────────────────────────────────
|
||||||
ansible.builtin.uri:
|
for _ in range(60):
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes/{{ migration_targets | first }}/{{ 'qemu' if guest.type == 'qemu' else 'lxc' }}/{{ guest.vmid }}/status/current"
|
t = api_req(f"/nodes/{node}/tasks/{task_id}/status")
|
||||||
method: GET
|
if t["status"] == "stopped":
|
||||||
headers:
|
if t.get("exitstatus") != "OK":
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
print(f"ERROR: migration failed — {t.get('exitstatus')}")
|
||||||
validate_certs: false
|
exit(1)
|
||||||
register: vm_start_status
|
print(f" Migration complete: {t.get('exitstatus')}")
|
||||||
until: vm_start_status.json.data.status == 'running'
|
break
|
||||||
retries: "{{ (vm_start_timeout | int / 5) | int }}"
|
time.sleep(10)
|
||||||
delay: 5
|
else:
|
||||||
when: guest.needs_fallback and live_migrate_fallback == 'shutdown' and guest.status == 'running'
|
print(f"ERROR: migration task timed out")
|
||||||
delegate_to: localhost
|
exit(1)
|
||||||
|
|
||||||
- name: "Migrate | {{ guest.vmid }} ({{ guest.name }}) | Migration complete"
|
# ── Cold migration: restart on target ─────────────────────────────────
|
||||||
|
if needs_fallback and fallback == "shutdown" and status == "running":
|
||||||
|
print(f"Starting {vmid} on {target}...")
|
||||||
|
api_req(f"/nodes/{target}/{gtype}/{vmid}/status/start", "POST")
|
||||||
|
for _ in range(start_timeout // 5):
|
||||||
|
s = api_req(f"/nodes/{target}/{gtype}/{vmid}/status/current")
|
||||||
|
if s["status"] == "running":
|
||||||
|
print(f" {vmid} running on {target}")
|
||||||
|
break
|
||||||
|
time.sleep(5)
|
||||||
|
else:
|
||||||
|
print(f"WARNING: {vmid} did not start within {start_timeout}s — check manually")
|
||||||
|
|
||||||
|
print(f"Done: {gtype.upper()} {vmid} ({name}) → {target}")
|
||||||
|
PYEOF
|
||||||
|
register: migrate_result
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: "Migrate | {{ guest.vmid }} ({{ guest.name }}) | Log result"
|
||||||
ansible.builtin.debug:
|
ansible.builtin.debug:
|
||||||
msg: >-
|
msg: "{{ migrate_result.stdout_lines }}"
|
||||||
{{ guest.type | upper }} {{ guest.vmid }} ({{ guest.name }})
|
|
||||||
{% if guest.needs_fallback and live_migrate_fallback == 'shutdown' %}
|
|
||||||
cold migrated to {{ migration_targets | first }} and restarted
|
|
||||||
{% else %}
|
|
||||||
live migrated to {{ migration_targets | first }}
|
|
||||||
{% endif %}
|
|
||||||
delegate_to: localhost
|
delegate_to: localhost
|
||||||
|
|||||||
@@ -5,59 +5,77 @@
|
|||||||
# Only runs if migration_restore: true
|
# Only runs if migration_restore: true
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
- name: Restore | Skip restore
|
- name: "Restore | Skip — migration_restore=false"
|
||||||
ansible.builtin.debug:
|
ansible.builtin.debug:
|
||||||
msg: "migration_restore=false — leaving guests on their current nodes"
|
msg: "migration_restore=false — leaving guests on their current nodes"
|
||||||
when: not migration_restore | bool
|
when: not migration_restore | bool
|
||||||
|
delegate_to: localhost
|
||||||
|
|
||||||
- name: Restore | Migrate guests back to {{ current_node }}
|
- name: "Restore | Migrate guests back to {{ current_node }}"
|
||||||
when: migration_restore | bool
|
when: migration_restore | bool
|
||||||
block:
|
block:
|
||||||
- name: Restore | Get guests currently on other nodes that originated from {{ current_node }}
|
- name: "Restore | Migrate all guests back to {{ current_node }}"
|
||||||
|
ansible.builtin.shell: |
|
||||||
|
python3 << 'PYEOF'
|
||||||
|
import urllib.request, json, ssl, time
|
||||||
|
|
||||||
|
ctx = ssl.create_default_context()
|
||||||
|
ctx.check_hostname = False
|
||||||
|
ctx.verify_mode = ssl.CERT_NONE
|
||||||
|
|
||||||
|
api_base = "https://{{ api_host }}:{{ api_port }}/api2/json"
|
||||||
|
headers = {"Authorization": "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"}
|
||||||
|
node = "{{ current_node }}"
|
||||||
|
source = "{{ migration_targets | first }}"
|
||||||
|
plan = {{ migration_plan | to_json }}
|
||||||
|
fallback = "{{ live_migrate_fallback }}"
|
||||||
|
|
||||||
|
def api_req(path, method="GET", body=None):
|
||||||
|
url = f"{api_base}{path}"
|
||||||
|
data = json.dumps(body).encode() if body else None
|
||||||
|
hdrs = {**headers}
|
||||||
|
if data:
|
||||||
|
hdrs["Content-Type"] = "application/json"
|
||||||
|
req = urllib.request.Request(url, data=data, headers=hdrs, method=method)
|
||||||
|
with urllib.request.urlopen(req, context=ctx) as r:
|
||||||
|
return json.loads(r.read())["data"]
|
||||||
|
|
||||||
|
task_ids = []
|
||||||
|
for guest in plan:
|
||||||
|
if guest["needs_fallback"] and fallback == "skip":
|
||||||
|
print(f"SKIP restore: {guest['type'].upper()} {guest['vmid']} ({guest['name']}) — was skipped during drain")
|
||||||
|
continue
|
||||||
|
gtype = guest["type"]
|
||||||
|
online = 0 if (guest["needs_fallback"] and fallback == "shutdown") else 1
|
||||||
|
print(f"Restoring {gtype.upper()} {guest['vmid']} ({guest['name']}) → {node} (online={online})...")
|
||||||
|
task_id = api_req(f"/nodes/{source}/{gtype}/{guest['vmid']}/migrate", "POST",
|
||||||
|
{"target": node, "online": online})
|
||||||
|
task_ids.append({"vmid": guest["vmid"], "name": guest["name"], "task": task_id, "type": gtype})
|
||||||
|
|
||||||
|
failed = []
|
||||||
|
for t in task_ids:
|
||||||
|
for _ in range(60):
|
||||||
|
status = api_req(f"/nodes/{source}/tasks/{t['task']}/status")
|
||||||
|
if status["status"] == "stopped":
|
||||||
|
if status.get("exitstatus") != "OK":
|
||||||
|
failed.append(f"{t['name']} ({t['vmid']}): {status.get('exitstatus')}")
|
||||||
|
else:
|
||||||
|
print(f"OK: {t['name']} ({t['vmid']}) restored to {node}")
|
||||||
|
break
|
||||||
|
time.sleep(10)
|
||||||
|
else:
|
||||||
|
failed.append(f"{t['name']} ({t['vmid']}): timed out")
|
||||||
|
|
||||||
|
if failed:
|
||||||
|
print("FAILED restores: " + ", ".join(failed))
|
||||||
|
exit(1)
|
||||||
|
print(f"All guests restored to {node}")
|
||||||
|
PYEOF
|
||||||
|
register: restore_result
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: "Restore | Log result"
|
||||||
ansible.builtin.debug:
|
ansible.builtin.debug:
|
||||||
msg: >-
|
msg: "{{ restore_result.stdout_lines }}"
|
||||||
Restoring {{ migration_plan | rejectattr('needs_fallback') | list | length +
|
|
||||||
(migration_plan | selectattr('needs_fallback') | list | length if live_migrate_fallback != 'skip' else 0) }}
|
|
||||||
guests back to {{ current_node }}
|
|
||||||
|
|
||||||
- name: Restore | Migrate each guest back
|
|
||||||
ansible.builtin.uri:
|
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes/{{ migration_targets | first }}/{{ 'qemu' if guest.type == 'qemu' else 'lxc' }}/{{ guest.vmid }}/migrate"
|
|
||||||
method: POST
|
|
||||||
headers:
|
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
|
||||||
body_format: json
|
|
||||||
body:
|
|
||||||
target: "{{ current_node }}"
|
|
||||||
online: "{{ 0 if (guest.needs_fallback and live_migrate_fallback == 'shutdown') else 1 }}"
|
|
||||||
validate_certs: false
|
|
||||||
register: restore_task
|
|
||||||
loop: "{{ migration_plan | rejectattr('needs_fallback') | list }}"
|
|
||||||
loop_control:
|
|
||||||
loop_var: guest
|
|
||||||
delegate_to: localhost
|
delegate_to: localhost
|
||||||
|
|
||||||
- name: Restore | Wait for all restore migrations to complete
|
|
||||||
ansible.builtin.uri:
|
|
||||||
url: "https://{{ api_host }}:{{ api_port }}/api2/json/nodes/{{ migration_targets | first }}/tasks/{{ item.json.data }}/status"
|
|
||||||
method: GET
|
|
||||||
headers:
|
|
||||||
Authorization: "PVEAPIToken={{ api_token_id }}={{ api_token_secret }}"
|
|
||||||
validate_certs: false
|
|
||||||
register: restore_status
|
|
||||||
until: restore_status.json.data.status == 'stopped'
|
|
||||||
retries: 60
|
|
||||||
delay: 10
|
|
||||||
loop: "{{ restore_task.results }}"
|
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
- name: Restore | Check all restores succeeded
|
|
||||||
ansible.builtin.fail:
|
|
||||||
msg: "Restore migration failed — {{ item.json.data.exitstatus }}"
|
|
||||||
loop: "{{ restore_status.results }}"
|
|
||||||
when: item.json.data.exitstatus != 'OK'
|
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
- name: Restore | Complete
|
|
||||||
ansible.builtin.debug:
|
|
||||||
msg: "All guests restored to {{ current_node }}"
|
|
||||||
|
|||||||
Reference in New Issue
Block a user