Files
ansible-msp-automations/playbooks/proxmox_snapshot.yml
2026-03-15 15:48:59 -07:00

299 lines
12 KiB
YAML

---
# =============================================================================
# proxmox_snapshot.yml
# Pre/post maintenance VM snapshot management.
#
# Actions:
# create — snapshot all running VMs across the cluster before maintenance
# verify — verify snapshots exist and are readable
# cleanup — remove snapshots older than snapshot_max_age_hours
# rollback — rollback a specific VMID to its most recent automation snapshot
#
# Snapshots are named with a consistent prefix for easy identification and cleanup:
# auto_pre_<date>_<time>
#
# Usage:
# # Snapshot all running VMs before upgrade
# ansible-playbook proxmox_snapshot.yml -e "snapshot_action=create"
#
# # Verify snapshots exist
# ansible-playbook proxmox_snapshot.yml -e "snapshot_action=verify"
#
# # Clean up snapshots older than 48 hours
# ansible-playbook proxmox_snapshot.yml -e "snapshot_action=cleanup snapshot_max_age_hours=48"
#
# # Rollback a specific VM
# ansible-playbook proxmox_snapshot.yml -e "snapshot_action=rollback snapshot_rollback_vmid=100"
# =============================================================================
- name: "Proxmox | VM Snapshot Management"
hosts: proxmox_cluster
gather_facts: true
run_once: true
vars:
snapshot_action: create
snapshot_prefix: "auto_pre"
snapshot_description: "Pre-maintenance snapshot — managed by ansible-msp"
snapshot_max_age_hours: 72
snapshot_include_ram: false # include RAM state in snapshot (slower, more disk)
snapshot_target_vmids: [] # empty = all running VMs
snapshot_exclude_tags:
- nosnap
- nosnapshot
snapshot_rollback_vmid: "" # required for rollback action
# API connection
api_port: "{{ api_port | default(8006) }}"
validate_certs: "{{ validate_certs | default(false) }}"
pre_tasks:
- name: "Snapshot | Validate action"
ansible.builtin.fail:
msg: >-
Invalid snapshot_action '{{ snapshot_action }}'.
Must be one of: create, verify, cleanup, rollback.
when: snapshot_action not in ['create', 'verify', 'cleanup', 'rollback']
- name: "Snapshot | Validate rollback — VMID required"
ansible.builtin.fail:
msg: "snapshot_rollback_vmid is required for rollback action."
when:
- snapshot_action == 'rollback'
- snapshot_rollback_vmid == ''
- name: "Snapshot | Set snapshot name"
ansible.builtin.set_fact:
snapshot_name: "{{ snapshot_prefix }}_{{ ansible_date_time.date | replace('-','') }}_{{ ansible_date_time.hour }}{{ ansible_date_time.minute }}"
when: snapshot_action == 'create'
- name: "Snapshot | Log operation"
ansible.builtin.debug:
msg: >-
Snapshot {{ snapshot_action }} —
client={{ client_name | default('Unknown') }}
{% if snapshot_action == 'create' %}name={{ snapshot_name }}{% endif %}
{% if snapshot_action == 'cleanup' %}max_age={{ snapshot_max_age_hours }}h{% endif %}
{% if snapshot_action == 'rollback' %}vmid={{ snapshot_rollback_vmid }}{% endif %}
roles:
- role: proxmox_preflight
tasks:
# ── Get all VMs ────────────────────────────────────────────────────────────
- name: "Snapshot | Get all node info"
community.proxmox.proxmox_node_info:
api_host: "{{ api_host }}"
api_user: "{{ api_user }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port }}"
validate_certs: "{{ validate_certs }}"
register: snapshot_nodes
delegate_to: localhost
- name: "Snapshot | Get all VMs per node"
community.proxmox.proxmox_vm_info:
api_host: "{{ api_host }}"
api_user: "{{ api_user }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port }}"
validate_certs: "{{ validate_certs }}"
node: "{{ item.node }}"
loop: >-
{{ snapshot_nodes.proxmox_nodes
| selectattr('status', 'equalto', 'online')
| list }}
loop_control:
label: "{{ item.node }}"
register: snapshot_vms_per_node
delegate_to: localhost
- name: "Snapshot | Build VM list"
ansible.builtin.set_fact:
snapshot_all_vms: >-
{{ snapshot_vms_per_node.results
| map(attribute='proxmox_vms')
| flatten
| rejectattr('template', 'equalto', true)
| selectattr('type', 'equalto', 'qemu')
| list }}
delegate_to: localhost
- name: "Snapshot | Filter by VMID list"
ansible.builtin.set_fact:
snapshot_target_vms: >-
{{ snapshot_all_vms
| selectattr('vmid', 'in', snapshot_target_vmids)
| list }}
when: snapshot_target_vmids | length > 0
delegate_to: localhost
- name: "Snapshot | Filter running VMs (no VMID filter)"
ansible.builtin.set_fact:
snapshot_target_vms: >-
{{ snapshot_all_vms
| selectattr('status', 'equalto', 'running')
| rejectattr('tags', 'defined')
| list
+ snapshot_all_vms
| selectattr('status', 'equalto', 'running')
| selectattr('tags', 'defined')
| rejectattr('tags', 'search', snapshot_exclude_tags | join('|'))
| list }}
when: snapshot_target_vmids | length == 0
delegate_to: localhost
# ── CREATE ─────────────────────────────────────────────────────────────────
- name: "Snapshot | CREATE | Log plan"
ansible.builtin.debug:
msg: >-
Creating snapshot '{{ snapshot_name }}' for
{{ snapshot_target_vms | length }} VM(s):
{{ snapshot_target_vms | map(attribute='name') | list }}
when: snapshot_action == 'create'
- name: "Snapshot | CREATE | Take snapshots"
community.proxmox.proxmox_snap:
api_host: "{{ api_host }}"
api_user: "{{ api_user }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port }}"
validate_certs: "{{ validate_certs }}"
vmid: "{{ item.vmid }}"
snapname: "{{ snapshot_name }}"
description: "{{ snapshot_description }}"
vmstate: "{{ snapshot_include_ram }}"
state: present
loop: "{{ snapshot_target_vms }}"
loop_control:
label: "{{ item.name }} (VMID {{ item.vmid }}) on {{ item.node }}"
delegate_to: localhost
when: snapshot_action == 'create'
- name: "Snapshot | CREATE | Complete"
ansible.builtin.debug:
msg: "✓ Snapshots created: '{{ snapshot_name }}' on {{ snapshot_target_vms | length }} VM(s)."
when: snapshot_action == 'create'
# ── VERIFY ─────────────────────────────────────────────────────────────────
- name: "Snapshot | VERIFY | Check snapshots exist"
ansible.builtin.command: >
qm listsnapshot {{ item.vmid }}
loop: "{{ snapshot_target_vms }}"
loop_control:
label: "{{ item.name }} (VMID {{ item.vmid }})"
register: snapshot_verify_results
changed_when: false
delegate_to: "{{ item.node }}"
when: snapshot_action == 'verify'
- name: "Snapshot | VERIFY | Report"
ansible.builtin.debug:
msg: >-
{{ item.item.name }} (VMID {{ item.item.vmid }}):
{{ 'HAS snapshot' if snapshot_prefix in item.stdout else 'NO automation snapshot found' }}
loop: "{{ snapshot_verify_results.results | default([]) }}"
loop_control:
label: "{{ item.item.name | default(item.item.vmid) }}"
when: snapshot_action == 'verify'
# ── CLEANUP ────────────────────────────────────────────────────────────────
- name: "Snapshot | CLEANUP | Remove old snapshots"
ansible.builtin.shell: |
cutoff=$(date -d "{{ snapshot_max_age_hours }} hours ago" +%s)
for snap in $(qm listsnapshot {{ item.vmid }} 2>/dev/null | grep "{{ snapshot_prefix }}" | awk '{print $2}'); do
snap_date=$(echo $snap | sed 's/{{ snapshot_prefix }}_//' | sed 's/_[0-9]*$//')
snap_epoch=$(date -d "${snap_date:0:4}-${snap_date:4:2}-${snap_date:6:2}" +%s 2>/dev/null || echo 0)
if [ "$snap_epoch" -lt "$cutoff" ]; then
echo "Removing snapshot: $snap from VMID {{ item.vmid }}"
qm delsnapshot {{ item.vmid }} $snap
fi
done
loop: "{{ snapshot_target_vms }}"
loop_control:
label: "{{ item.name }} (VMID {{ item.vmid }})"
changed_when: true
register: snapshot_cleanup_result
delegate_to: "{{ item.node }}"
when: snapshot_action == 'cleanup'
- name: "Snapshot | CLEANUP | Complete"
ansible.builtin.debug:
msg: "✓ Snapshot cleanup complete — removed snapshots older than {{ snapshot_max_age_hours }} hours."
when: snapshot_action == 'cleanup'
# ── ROLLBACK ───────────────────────────────────────────────────────────────
- name: "Snapshot | ROLLBACK | Find most recent automation snapshot"
ansible.builtin.shell: >
qm listsnapshot {{ snapshot_rollback_vmid }} 2>/dev/null
| grep "{{ snapshot_prefix }}"
| awk '{print $2}'
| sort -r
| head -1
register: snapshot_rollback_name
changed_when: false
delegate_to: >-
{{ (snapshot_all_vms
| selectattr('vmid', 'equalto', snapshot_rollback_vmid | int)
| map(attribute='node')
| first) }}
when: snapshot_action == 'rollback'
- name: "Snapshot | ROLLBACK | Fail if no snapshot found"
ansible.builtin.fail:
msg: >-
No automation snapshot found for VMID {{ snapshot_rollback_vmid }}.
Run snapshot_action=create first.
when:
- snapshot_action == 'rollback'
- snapshot_rollback_name.stdout | trim == ''
- name: "Snapshot | ROLLBACK | Stop VM before rollback"
community.proxmox.proxmox_kvm:
api_host: "{{ api_host }}"
api_user: "{{ api_user }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port }}"
validate_certs: "{{ validate_certs }}"
vmid: "{{ snapshot_rollback_vmid }}"
state: stopped
force: true
timeout: 60
delegate_to: localhost
when: snapshot_action == 'rollback'
- name: "Snapshot | ROLLBACK | Execute rollback"
ansible.builtin.command: >
qm rollback {{ snapshot_rollback_vmid }} {{ snapshot_rollback_name.stdout | trim }}
changed_when: true
delegate_to: >-
{{ (snapshot_all_vms
| selectattr('vmid', 'equalto', snapshot_rollback_vmid | int)
| map(attribute='node')
| first) }}
when: snapshot_action == 'rollback'
- name: "Snapshot | ROLLBACK | Start VM after rollback"
community.proxmox.proxmox_kvm:
api_host: "{{ api_host }}"
api_user: "{{ api_user }}"
api_token_id: "{{ api_token_id }}"
api_token_secret: "{{ api_token_secret }}"
api_port: "{{ api_port }}"
validate_certs: "{{ validate_certs }}"
vmid: "{{ snapshot_rollback_vmid }}"
state: started
delegate_to: localhost
when: snapshot_action == 'rollback'
- name: "Snapshot | ROLLBACK | Complete"
ansible.builtin.debug:
msg: >-
✓ VMID {{ snapshot_rollback_vmid }} rolled back to
'{{ snapshot_rollback_name.stdout | trim }}'.
when: snapshot_action == 'rollback'