Files
ansible-msp-automations/playbooks/proxmox_migrate_vms.yml
2026-03-15 15:48:59 -07:00

372 lines
15 KiB
YAML

---
# =============================================================================
# proxmox_migrate_vms.yml
# Flexible VM migration playbook supporting three modes:
#
# drain — move all VMs off a specific node (pre-maintenance)
# rebalance — redistribute VMs evenly across all online nodes by resources
# restore — return VMs to their origin nodes using a drain state file
# targeted — migrate specific VMIDs or tagged VMs to a specified target
#
# Usage examples:
# # Drain a node before maintenance
# ansible-playbook proxmox_migrate_vms.yml -e "migrate_mode=drain migrate_source_node=pm-node-01"
#
# # Rebalance the cluster
# ansible-playbook proxmox_migrate_vms.yml -e "migrate_mode=rebalance"
#
# # Restore VMs to origin after maintenance
# ansible-playbook proxmox_migrate_vms.yml -e "migrate_mode=restore migrate_source_node=pm-node-01"
#
# # Migrate specific VMIDs to a target node
# ansible-playbook proxmox_migrate_vms.yml -e "migrate_mode=targeted migrate_vmids=[100,101] migrate_target_node=pm-node-02"
#
# # Migrate VMs by tag
# ansible-playbook proxmox_migrate_vms.yml -e "migrate_mode=targeted migrate_tags=[win11] migrate_target_node=pm-node-02"
# =============================================================================
- name: "Proxmox | Migrate VMs"
hosts: proxmox_cluster
gather_facts: true
run_once: true
vars:
# Mode: drain | rebalance | restore | targeted
migrate_mode: drain
# Source node (required for drain and restore modes)
migrate_source_node: ""
# Target node (required for targeted mode, optional for drain)
migrate_target_node: ""
# Targeted mode filters
migrate_vmids: [] # list of VMIDs to migrate
migrate_tags: [] # list of tags to match
# Rebalance threshold — don't migrate if imbalance is below this % of total memory
rebalance_threshold_pct: 10
# Shared drain role vars
drain_target_strategy: "{{ 'explicit' if migrate_target_node != '' else 'resources' }}"
drain_target_node: "{{ migrate_target_node }}"
drain_state_dir: "/tmp/proxmox_drain_state"
# Restore vars
restore_state_dir: "/tmp/proxmox_drain_state"
pre_tasks:
- name: "Migrate | Validate mode"
ansible.builtin.fail:
msg: >-
Invalid migrate_mode '{{ migrate_mode }}'.
Must be one of: drain, rebalance, restore, targeted.
when: migrate_mode not in ['drain', 'rebalance', 'restore', 'targeted']
- name: "Migrate | Validate drain — source node required"
ansible.builtin.fail:
msg: "migrate_source_node is required for drain mode."
when:
- migrate_mode == 'drain'
- migrate_source_node == ''
- name: "Migrate | Validate restore — source node required"
ansible.builtin.fail:
msg: "migrate_source_node is required for restore mode."
when:
- migrate_mode == 'restore'
- migrate_source_node == ''
- name: "Migrate | Validate targeted — VMIDs or tags required"
ansible.builtin.fail:
msg: "migrate_vmids or migrate_tags must be set for targeted mode."
when:
- migrate_mode == 'targeted'
- migrate_vmids | length == 0
- migrate_tags | length == 0
- name: "Migrate | Log operation"
ansible.builtin.debug:
msg: >-
Proxmox VM migration —
client={{ client_name | default('Unknown') }}
mode={{ migrate_mode }}
{% if migrate_source_node != '' %}source={{ migrate_source_node }}{% endif %}
{% if migrate_target_node != '' %}target={{ migrate_target_node }}{% endif %}
{% if migrate_vmids | length > 0 %}vmids={{ migrate_vmids }}{% endif %}
{% if migrate_tags | length > 0 %}tags={{ migrate_tags }}{% endif %}
roles:
- role: proxmox_preflight
tasks:
# ── DRAIN mode ─────────────────────────────────────────────────────────────
- name: "Migrate | DRAIN mode"
ansible.builtin.include_role:
name: proxmox_drain
vars:
current_node: "{{ migrate_source_node }}"
when: migrate_mode == 'drain'
# ── RESTORE mode ───────────────────────────────────────────────────────────
- name: "Migrate | RESTORE mode"
ansible.builtin.include_role:
name: proxmox_restore
vars:
current_node: "{{ migrate_source_node }}"
when: migrate_mode == 'restore'
# ── REBALANCE mode ─────────────────────────────────────────────────────────
- name: "Migrate | REBALANCE | Get all node info"
community.proxmox.proxmox_node_info:
api_host: "{{ api_host }}"
api_user: "{{ api_user }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port | default(8006) }}"
validate_certs: "{{ validate_certs | default(false) }}"
register: rebalance_nodes
delegate_to: localhost
when: migrate_mode == 'rebalance'
- name: "Migrate | REBALANCE | Get all VM info per node"
community.proxmox.proxmox_vm_info:
api_host: "{{ api_host }}"
api_user: "{{ api_user }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port | default(8006) }}"
validate_certs: "{{ validate_certs | default(false) }}"
node: "{{ item.node }}"
loop: >-
{{ rebalance_nodes.proxmox_nodes
| selectattr('status', 'equalto', 'online')
| list }}
loop_control:
label: "{{ item.node }}"
register: rebalance_vms_per_node
delegate_to: localhost
when: migrate_mode == 'rebalance'
- name: "Migrate | REBALANCE | Calculate node loads"
ansible.builtin.set_fact:
rebalance_node_loads: >-
{% set loads = [] %}
{% for result in rebalance_vms_per_node.results %}
{% set node_name = result.item.node %}
{% set node_info = rebalance_nodes.proxmox_nodes
| selectattr('node', 'equalto', node_name)
| first %}
{% set vm_mem = result.proxmox_vms
| map(attribute='mem')
| map('default', 0)
| sum %}
{% set free_mem = node_info.maxmem - node_info.mem %}
{% set load_pct = (node_info.mem / node_info.maxmem * 100) | round(1) %}
{% set _ = loads.append({
'node': node_name,
'used_mem': node_info.mem,
'max_mem': node_info.maxmem,
'free_mem': free_mem,
'load_pct': load_pct,
'vm_count': result.proxmox_vms | rejectattr('template', 'equalto', true) | list | length,
'vms': result.proxmox_vms | rejectattr('template', 'equalto', true) | list
}) %}
{% endfor %}
{{ loads | sort(attribute='load_pct', reverse=true) }}
delegate_to: localhost
when: migrate_mode == 'rebalance'
- name: "Migrate | REBALANCE | Log current distribution"
ansible.builtin.debug:
msg: >-
Current cluster load:
{% for n in rebalance_node_loads %}
{{ n.node }}: {{ n.load_pct }}% memory used, {{ n.vm_count }} VMs
{% endfor %}
when: migrate_mode == 'rebalance'
- name: "Migrate | REBALANCE | Build migration plan"
ansible.builtin.set_fact:
rebalance_migrations: >-
{% set moves = [] %}
{% set loads = rebalance_node_loads | list %}
{% set total_mem = loads | map(attribute='used_mem') | sum %}
{% set avg_mem = total_mem / loads | length %}
{% for vm in (loads | map(attribute='vms') | flatten
| rejectattr('status', 'equalto', 'stopped')
| list) %}
{% set src_node = vm.node %}
{% set src_info = loads | selectattr('node', 'equalto', src_node) | first %}
{% if src_info.load_pct | float > (avg_mem / src_info.max_mem * 100 + rebalance_threshold_pct) %}
{% set target = loads
| rejectattr('node', 'equalto', src_node)
| sort(attribute='load_pct')
| first %}
{% if target.load_pct | float < src_info.load_pct | float - rebalance_threshold_pct %}
{% set _ = moves.append({
'vmid': vm.vmid,
'name': vm.name,
'type': vm.type,
'status': vm.status,
'from': src_node,
'to': target.node
}) %}
{% endif %}
{% endif %}
{% endfor %}
{{ moves }}
delegate_to: localhost
when: migrate_mode == 'rebalance'
- name: "Migrate | REBALANCE | Log migration plan"
ansible.builtin.debug:
msg: >-
Rebalance plan ({{ rebalance_migrations | length }} migration(s)):
{% if rebalance_migrations | length == 0 %}
Cluster is already balanced within {{ rebalance_threshold_pct }}% threshold — no migrations needed.
{% else %}
{% for m in rebalance_migrations %}
{{ m.name }} (VMID {{ m.vmid }}) {{ m.from }} → {{ m.to }}
{% endfor %}
{% endif %}
when: migrate_mode == 'rebalance'
- name: "Migrate | REBALANCE | Execute KVM migrations"
ansible.builtin.command: >
qm migrate {{ item.vmid }} {{ item.to }}
{% if item.status == 'running' %}--online{% endif %}
--with-local-disks 0
loop: "{{ rebalance_migrations | selectattr('type', 'equalto', 'qemu') | list }}"
loop_control:
label: "{{ item.name }} ({{ item.from }} → {{ item.to }})"
changed_when: true
delegate_to: "{{ item.from }}"
when:
- migrate_mode == 'rebalance'
- rebalance_migrations | length > 0
- name: "Migrate | REBALANCE | Execute LXC migrations"
ansible.builtin.command: >
pct migrate {{ item.vmid }} {{ item.to }} --restart --timeout 120
loop: "{{ rebalance_migrations | selectattr('type', 'equalto', 'lxc') | list }}"
loop_control:
label: "{{ item.name | default(item.vmid) }} ({{ item.from }} → {{ item.to }})"
changed_when: true
delegate_to: "{{ item.from }}"
when:
- migrate_mode == 'rebalance'
- rebalance_migrations | length > 0
- name: "Migrate | REBALANCE | Complete"
ansible.builtin.debug:
msg: >-
✓ Rebalance complete —
{{ rebalance_migrations | length }} VM(s) redistributed.
when: migrate_mode == 'rebalance'
# ── TARGETED mode ──────────────────────────────────────────────────────────
- name: "Migrate | TARGETED | Get all VMs"
community.proxmox.proxmox_vm_info:
api_host: "{{ api_host }}"
api_user: "{{ api_user }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port | default(8006) }}"
validate_certs: "{{ validate_certs | default(false) }}"
register: targeted_all_vms
delegate_to: localhost
when: migrate_mode == 'targeted'
- name: "Migrate | TARGETED | Filter VMs by VMID"
ansible.builtin.set_fact:
targeted_vms: >-
{{ targeted_all_vms.proxmox_vms
| selectattr('vmid', 'in', migrate_vmids)
| list }}
delegate_to: localhost
when:
- migrate_mode == 'targeted'
- migrate_vmids | length > 0
- name: "Migrate | TARGETED | Filter VMs by tag"
ansible.builtin.set_fact:
targeted_vms: >-
{{ targeted_all_vms.proxmox_vms
| selectattr('tags', 'defined')
| selectattr('tags', 'search', migrate_tags | join('|'))
| list }}
delegate_to: localhost
when:
- migrate_mode == 'targeted'
- migrate_tags | length > 0
- migrate_vmids | length == 0
- name: "Migrate | TARGETED | Resolve target node"
ansible.builtin.set_fact:
targeted_resolved_target: "{{ migrate_target_node }}"
when:
- migrate_mode == 'targeted'
- migrate_target_node != ''
- name: "Migrate | TARGETED | Auto-select target by resources"
block:
- name: "Migrate | TARGETED | Get node resources"
community.proxmox.proxmox_node_info:
api_host: "{{ api_host }}"
api_user: "{{ api_user }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port | default(8006) }}"
validate_certs: "{{ validate_certs | default(false) }}"
register: targeted_nodes
delegate_to: localhost
- name: "Migrate | TARGETED | Pick best target"
ansible.builtin.set_fact:
targeted_resolved_target: >-
{{ (targeted_nodes.proxmox_nodes
| selectattr('status', 'equalto', 'online')
| sort(attribute='mem')
| first).node }}
delegate_to: localhost
when:
- migrate_mode == 'targeted'
- migrate_target_node == ''
- name: "Migrate | TARGETED | Log plan"
ansible.builtin.debug:
msg: >-
Targeted migration: {{ targeted_vms | length }} VM(s) → {{ targeted_resolved_target }}
VMIDs: {{ targeted_vms | map(attribute='vmid') | list }}
when: migrate_mode == 'targeted'
- name: "Migrate | TARGETED | Migrate KVM VMs"
ansible.builtin.command: >
qm migrate {{ item.vmid }} {{ targeted_resolved_target }}
{% if item.status == 'running' %}--online{% endif %}
--with-local-disks 0
loop: "{{ targeted_vms | selectattr('type', 'equalto', 'qemu') | list }}"
loop_control:
label: "{{ item.name }} (VMID {{ item.vmid }}) → {{ targeted_resolved_target }}"
changed_when: true
delegate_to: "{{ item.node }}"
when: migrate_mode == 'targeted'
- name: "Migrate | TARGETED | Migrate LXC containers"
ansible.builtin.command: >
pct migrate {{ item.vmid }} {{ targeted_resolved_target }} --restart --timeout 120
loop: "{{ targeted_vms | selectattr('type', 'equalto', 'lxc') | list }}"
loop_control:
label: "{{ item.name | default(item.vmid) }} (VMID {{ item.vmid }}) → {{ targeted_resolved_target }}"
changed_when: true
delegate_to: "{{ item.node }}"
when: migrate_mode == 'targeted'
- name: "Migrate | TARGETED | Complete"
ansible.builtin.debug:
msg: >-
✓ Targeted migration complete —
{{ targeted_vms | length }} VM(s) moved to {{ targeted_resolved_target }}.
when: migrate_mode == 'targeted'