Files
2026-03-15 15:48:59 -07:00

218 lines
9.1 KiB
YAML

---
# =============================================================================
# proxmox_drain — tasks
# Migrates all VMs/LXCs off current_node to the best available target.
# Writes a state file so proxmox_restore can return VMs to origin.
#
# Required vars:
# current_node — the node being drained
# =============================================================================
# ── Discover guests on this node ──────────────────────────────────────────────
- name: "Drain | {{ current_node }} | Discover guests"
community.proxmox.proxmox_vm_info:
api_host: "{{ api_host }}"
api_user: "{{ api_user }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port }}"
validate_certs: "{{ validate_certs }}"
node: "{{ current_node }}"
register: drain_node_guests
delegate_to: localhost
- name: "Drain | {{ current_node }} | Separate KVM and LXC guests"
ansible.builtin.set_fact:
drain_kvm_guests: >-
{{ drain_node_guests.proxmox_vms
| selectattr('type', 'equalto', 'qemu')
| rejectattr('template', 'equalto', true)
| list }}
drain_lxc_guests: >-
{{ drain_node_guests.proxmox_vms
| selectattr('type', 'equalto', 'lxc')
| list }}
delegate_to: localhost
- name: "Drain | {{ current_node }} | Filter excluded tags"
ansible.builtin.set_fact:
drain_kvm_guests: >-
{{ drain_kvm_guests
| rejectattr('tags', 'defined')
| list
+ drain_kvm_guests
| selectattr('tags', 'defined')
| rejectattr('tags', 'search', drain_exclude_tags | join('|'))
| list }}
drain_lxc_guests: >-
{{ drain_lxc_guests
| rejectattr('tags', 'defined')
| list
+ drain_lxc_guests
| selectattr('tags', 'defined')
| rejectattr('tags', 'search', drain_exclude_tags | join('|'))
| list }}
delegate_to: localhost
- name: "Drain | {{ current_node }} | Log guest inventory"
ansible.builtin.debug:
msg: >-
{{ current_node }} has
{{ drain_kvm_guests | length }} KVM guest(s) and
{{ drain_lxc_guests | length }} LXC guest(s) to migrate.
VMIDs: {{ (drain_kvm_guests + drain_lxc_guests) | map(attribute='vmid') | list }}
# ── Skip if nothing to migrate ────────────────────────────────────────────────
- name: "Drain | {{ current_node }} | Skip — no guests to migrate"
ansible.builtin.debug:
msg: "Node {{ current_node }} has no guests — skipping drain."
when:
- drain_kvm_guests | length == 0
- drain_lxc_guests | length == 0
- name: "Drain | {{ current_node }} | End play if no guests"
ansible.builtin.meta: end_play
when:
- drain_kvm_guests | length == 0
- drain_lxc_guests | length == 0
# ── Select migration target ───────────────────────────────────────────────────
- name: "Drain | {{ current_node }} | Get all node resource info"
community.proxmox.proxmox_node_info:
api_host: "{{ api_host }}"
api_user: "{{ api_user }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port }}"
validate_certs: "{{ validate_certs }}"
register: drain_all_nodes
delegate_to: localhost
when: drain_target_strategy == 'resources'
- name: "Drain | {{ current_node }} | Score nodes by available resources"
ansible.builtin.set_fact:
drain_scored_nodes: >-
{% set candidates = [] %}
{% for node in drain_all_nodes.proxmox_nodes %}
{% if node.status == 'online' and node.node != current_node %}
{% set free_mem = node.maxmem - node.mem %}
{% set free_cpu = 1.0 - (node.cpu | default(0)) %}
{% set score = (free_mem * drain_score_mem_weight | float) + (free_cpu * 1000000000 * drain_score_cpu_weight | float) %}
{% set _ = candidates.append({'node': node.node, 'score': score, 'free_mem': free_mem, 'free_cpu': free_cpu}) %}
{% endif %}
{% endfor %}
{{ candidates | sort(attribute='score', reverse=true) }}
delegate_to: localhost
when: drain_target_strategy == 'resources'
- name: "Drain | {{ current_node }} | Set migration target (resources)"
ansible.builtin.set_fact:
drain_resolved_target: "{{ drain_scored_nodes | first | default({}) | default({'node': ''}) }}"
drain_target: "{{ (drain_scored_nodes | first).node }}"
delegate_to: localhost
when: drain_target_strategy == 'resources'
- name: "Drain | {{ current_node }} | Set migration target (explicit)"
ansible.builtin.set_fact:
drain_target: "{{ drain_target_node }}"
delegate_to: localhost
when: drain_target_strategy == 'explicit'
- name: "Drain | {{ current_node }} | Fail if no target available"
ansible.builtin.fail:
msg: >-
No valid migration target found for node {{ current_node }}.
All other nodes may be offline or no nodes configured.
when: drain_target == ''
delegate_to: localhost
- name: "Drain | {{ current_node }} | Log migration target"
ansible.builtin.debug:
msg: >-
Migration target for {{ current_node }}: {{ drain_target }}
{% if drain_target_strategy == 'resources' %}
(free_mem={{ (drain_resolved_target.free_mem / 1073741824) | round(1) }}GB,
free_cpu={{ (drain_resolved_target.free_cpu * 100) | round(1) }}%)
{% endif %}
# ── Write state file for restore ──────────────────────────────────────────────
- name: "Drain | {{ current_node }} | Ensure state directory exists"
ansible.builtin.file:
path: "{{ drain_state_dir }}"
state: directory
mode: '0700'
delegate_to: localhost
- name: "Drain | {{ current_node }} | Write VM origin state"
ansible.builtin.copy:
content: >-
{{ (drain_kvm_guests + drain_lxc_guests)
| map('combine', {'origin_node': current_node})
| list
| to_nice_json }}
dest: "{{ drain_state_dir }}/{{ current_node }}_{{ ansible_date_time.iso8601_basic_short }}.json"
mode: '0600'
delegate_to: localhost
when: ansible_date_time is defined
# ── Migrate KVM guests ────────────────────────────────────────────────────────
- name: "Drain | {{ current_node }} | KVM | Live migrate to {{ drain_target }}"
ansible.builtin.command: >
qm migrate {{ item.vmid }} {{ drain_target }}
{% if item.status == 'running' %}--online{% endif %}
--with-local-disks 0
loop: "{{ drain_kvm_guests }}"
loop_control:
label: "{{ item.name }} (VMID {{ item.vmid }}) — {{ item.status }}"
changed_when: true
register: drain_kvm_results
failed_when: drain_kvm_results.rc is defined and drain_kvm_results.rc != 0
- name: "Drain | {{ current_node }} | KVM | Verify guests moved"
community.proxmox.proxmox_vm_info:
api_host: "{{ api_host }}"
api_user: "{{ api_user }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port }}"
validate_certs: "{{ validate_certs }}"
node: "{{ drain_target }}"
register: drain_verify_guests
delegate_to: localhost
when: drain_kvm_guests | length > 0
- name: "Drain | {{ current_node }} | KVM | Log migration results"
ansible.builtin.debug:
msg: >-
KVM migrations complete —
{{ drain_kvm_guests | length }} guest(s) moved to {{ drain_target }}.
when: drain_kvm_guests | length > 0
# ── Migrate LXC guests ────────────────────────────────────────────────────────
- name: "Drain | {{ current_node }} | LXC | Migrate to {{ drain_target }}"
ansible.builtin.command: >
pct migrate {{ item.vmid }} {{ drain_target }}
{% if drain_lxc_restart %}--restart{% endif %}
--timeout {{ drain_vm_shutdown_timeout }}
loop: "{{ drain_lxc_guests }}"
loop_control:
label: "{{ item.name | default(item.vmid) }} (VMID {{ item.vmid }}) — {{ item.status }}"
changed_when: true
register: drain_lxc_results
failed_when: drain_lxc_results.rc is defined and drain_lxc_results.rc != 0
- name: "Drain | {{ current_node }} | LXC | Log migration results"
ansible.builtin.debug:
msg: >-
LXC migrations complete —
{{ drain_lxc_guests | length }} container(s) moved to {{ drain_target }}.
when: drain_lxc_guests | length > 0
# ── Final summary ─────────────────────────────────────────────────────────────
- name: "Drain | {{ current_node }} | Complete"
ansible.builtin.debug:
msg: >-
✓ Node {{ current_node }} drained —
{{ drain_kvm_guests | length }} KVM +
{{ drain_lxc_guests | length }} LXC guests migrated to {{ drain_target }}.