Add XCP-NG integration, deploy_agent.sh, overhaul onboard_client.sh

- roles/xcpng_update: new role — patches XCP-NG pools via XO REST API
  - non-HA pools: pool-level install_patches + restart_hosts
  - HA clusters: rolling pool update via JSON-RPC pool.rollingUpdate
  - dry run support, patch verification after update
- roles/snapshot: add xcpng_xo hypervisor_type support via XO REST API
- playbooks/xcpng_pool_update.yml: new playbook for XCP-NG pool patching
- inventories/client_template/hosts.yml: add xcpng_hosts group
- scripts/onboard_client.sh: major overhaul
  - add --hypervisor proxmox|xcpng|baremetal|mixed
  - add --xo-url / --xo-token (falls back to global env)
  - webhook no longer required (falls back to N8N_WEBHOOK_URL in env)
  - ansible_user changed to ansible-msp-agent with sudo
  - xcpng_hosts group in inventory scaffold for xcpng/mixed clients
  - hypervisor-appropriate task templates created automatically
  - add --dry-run support
- scripts/deploy_agent.sh: new script — bootstrap ansible-msp-agent
  - reads hosts.yml to get Linux/Windows hosts
  - SSHes as native account, su - to root
  - creates ansible-msp-agent user + sudo-nopasswd group
  - deploys client key + MSP backup key to agent user and root
  - adjusts sshd_config, reloads sshd
  - verifies key-based login after bootstrap
  - Windows stub with skip + warning
  - continues on failure, prints summary
This commit is contained in:
Semaphore
2026-03-12 11:15:43 -07:00
parent 5b846654ba
commit a42bf14665
2 changed files with 757 additions and 196 deletions

469
scripts/deploy_agent.sh Executable file
View File

@@ -0,0 +1,469 @@
#!/bin/bash
# =============================================================================
# deploy_agent.sh — MSP Agent Bootstrap Script
# =============================================================================
# Connects to Linux hosts defined in a client hosts.yml, creates the
# ansible-msp-agent service account, deploys SSH keys, configures sudoers,
# and hardens sshd_config.
#
# Usage:
# ./deploy_agent.sh --inventory /path/to/client_xxx/hosts.yml [options]
#
# Options:
# --inventory Path to client hosts.yml (required)
# --native-user Username to SSH in with (default: localcontrol)
# --native-pass Password for native user (will prompt if not provided)
# --root-pass Root password for su - (will prompt if not provided)
# --agent-user Service account to create (default: ansible-msp-agent)
# --client-key Path to client public key (default: auto-derived from inventory path)
# --msp-key Path to MSP backup public key file (default: /root/.ssh/ansible-msp-agent.pub)
# --key-repo-dir If set, look for public keys in this git repo dir instead
# --dry-run Show what would be done without making changes
# --skip-sshd Skip sshd_config modifications
# --help Show this help
#
# Dependencies: sshpass, python3, python3-yaml, ssh, ssh-keyscan
# =============================================================================
set -euo pipefail
# ─── Defaults ────────────────────────────────────────────────────────────────
NATIVE_USER="localcontrol"
NATIVE_PASS=""
ROOT_PASS=""
AGENT_USER="ansible-msp-agent"
CLIENT_KEY_PATH="" # auto-derived if empty
MSP_KEY_PATH="/root/.ssh/ansible-msp-agent.pub"
KEY_REPO_DIR="" # future: point to keys/ dir in git repo
DRY_RUN=false
SKIP_SSHD=false
INVENTORY_PATH=""
REPO_DIR="/opt/ansible-msp-automations"
# Load MSP backup key from environment if file not present
# Set MSP_BACKUP_PUBKEY in /root/.semaphore_env to avoid needing the file
if [[ -f /root/.semaphore_env ]]; then
source /root/.semaphore_env
fi
# ─── Counters ────────────────────────────────────────────────────────────────
HOSTS_TOTAL=0
HOSTS_OK=0
HOSTS_FAILED=0
HOSTS_SKIPPED=0
FAILED_HOSTS=()
# ─── Colors ──────────────────────────────────────────────────────────────────
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE} $*${NC}"; }
log_ok() { echo -e "${GREEN}$*${NC}"; }
log_warn() { echo -e "${YELLOW}$*${NC}"; }
log_error() { echo -e "${RED}$*${NC}"; }
log_section() { echo -e "\n${BLUE}━━━ $* ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; }
# ─── Parse args ──────────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
case $1 in
--inventory) INVENTORY_PATH="$2"; shift 2 ;;
--native-user) NATIVE_USER="$2"; shift 2 ;;
--native-pass) NATIVE_PASS="$2"; shift 2 ;;
--root-pass) ROOT_PASS="$2"; shift 2 ;;
--agent-user) AGENT_USER="$2"; shift 2 ;;
--client-key) CLIENT_KEY_PATH="$2"; shift 2 ;;
--msp-key) MSP_KEY_PATH="$2"; shift 2 ;;
--key-repo-dir) KEY_REPO_DIR="$2"; shift 2 ;;
--dry-run) DRY_RUN=true; shift ;;
--skip-sshd) SKIP_SSHD=true; shift ;;
--help)
head -30 "$0" | grep "^#" | sed 's/^# \?//'
exit 0
;;
*)
log_error "Unknown option: $1"
exit 1
;;
esac
done
# ─── Validate ────────────────────────────────────────────────────────────────
if [[ -z "$INVENTORY_PATH" ]]; then
log_error "--inventory is required"
exit 1
fi
if [[ ! -f "$INVENTORY_PATH" ]]; then
log_error "Inventory file not found: $INVENTORY_PATH"
exit 1
fi
# Check dependencies
for dep in sshpass python3 ssh ssh-keyscan; do
if ! command -v "$dep" &>/dev/null; then
log_error "Missing dependency: $dep"
echo " Install with: apt install sshpass python3 python3-yaml openssh-client"
exit 1
fi
done
python3 -c "import yaml" 2>/dev/null || {
log_error "python3-yaml not installed: apt install python3-yaml"
exit 1
}
# ─── Derive client key path from inventory path if not set ───────────────────
if [[ -z "$CLIENT_KEY_PATH" ]]; then
# Extract slug from inventory path: .../inventories/client_foo/hosts.yml -> client_foo
INVENTORY_DIR=$(dirname "$INVENTORY_PATH")
SLUG=$(basename "$INVENTORY_DIR")
# Remove client_ prefix for key name
KEY_SLUG="${SLUG#client_}"
# Check key repo dir first if set
if [[ -n "$KEY_REPO_DIR" && -f "$KEY_REPO_DIR/keys/client_${KEY_SLUG}.pub" ]]; then
CLIENT_KEY_PATH="$KEY_REPO_DIR/keys/client_${KEY_SLUG}.pub"
else
CLIENT_KEY_PATH="/root/.ssh/client_${KEY_SLUG}.pub"
fi
fi
# Check key repo dir for MSP key if set
if [[ -n "$KEY_REPO_DIR" && -f "$KEY_REPO_DIR/keys/ansible-msp-agent.pub" ]]; then
MSP_KEY_PATH="$KEY_REPO_DIR/keys/ansible-msp-agent.pub"
fi
if [[ ! -f "$CLIENT_KEY_PATH" ]]; then
log_error "Client public key not found: $CLIENT_KEY_PATH"
log_error "Generate it with: ssh-keygen -t ed25519 -f ${CLIENT_KEY_PATH%.pub}"
exit 1
fi
# Resolve MSP public key — file takes priority, fall back to env var
if [[ -f "$MSP_KEY_PATH" ]]; then
MSP_PUBKEY=$(cat "$MSP_KEY_PATH")
elif [[ -n "${MSP_BACKUP_PUBKEY:-}" ]]; then
MSP_PUBKEY="$MSP_BACKUP_PUBKEY"
log_info "MSP key loaded from environment (MSP_BACKUP_PUBKEY)"
else
log_error "MSP backup public key not found: $MSP_KEY_PATH"
log_error "Set MSP_BACKUP_PUBKEY in /root/.semaphore_env or pass --msp-key"
exit 1
fi
CLIENT_PUBKEY=$(cat "$CLIENT_KEY_PATH")
# ─── Prompt for passwords if not provided ────────────────────────────────────
if [[ -z "$NATIVE_PASS" ]]; then
echo -n "Password for ${NATIVE_USER}: "
read -rs NATIVE_PASS
echo
fi
if [[ -z "$ROOT_PASS" ]]; then
echo -n "Root password (for su -): "
read -rs ROOT_PASS
echo
fi
# ─── Parse inventory for hosts ───────────────────────────────────────────────
log_section "Parsing inventory"
log_info "Inventory: $INVENTORY_PATH"
# Extract Linux and Windows hosts using Python
HOST_DATA=$(python3 << PYEOF
import yaml, json, sys
with open('$INVENTORY_PATH') as f:
inv = yaml.safe_load(f)
linux_hosts = []
windows_hosts = []
def extract_hosts(group, target_list):
if not group:
return
hosts = group.get('hosts') or {}
group_vars = group.get('vars') or {}
for hostname, hvars in (hosts or {}).items():
hvars = hvars or {}
merged = {**group_vars, **hvars}
ip = merged.get('ansible_host', hostname)
target_list.append({'name': hostname, 'ip': ip})
children = (inv.get('all') or {}).get('children') or {}
extract_hosts(children.get('linux_hosts'), linux_hosts)
extract_hosts(children.get('windows_hosts'), windows_hosts)
print(json.dumps({'linux': linux_hosts, 'windows': windows_hosts}))
PYEOF
)
LINUX_HOSTS=$(echo "$HOST_DATA" | python3 -c "import json,sys; d=json.load(sys.stdin); [print(h['name']+'|'+h['ip']) for h in d['linux']]")
WINDOWS_HOSTS=$(echo "$HOST_DATA" | python3 -c "import json,sys; d=json.load(sys.stdin); [print(h['name']+'|'+h['ip']) for h in d['windows']]")
LINUX_COUNT=$(echo "$LINUX_HOSTS" | grep -c '.' || true)
WINDOWS_COUNT=$(echo "$WINDOWS_HOSTS" | grep -c '.' || true)
log_info "Linux hosts found: $LINUX_COUNT"
log_info "Windows hosts found: $WINDOWS_COUNT (skipped — WinRM setup not yet implemented)"
if [[ "$DRY_RUN" == "true" ]]; then
log_warn "DRY RUN MODE — no changes will be made"
fi
# ─── Windows stub ────────────────────────────────────────────────────────────
if [[ -n "$WINDOWS_HOSTS" ]]; then
log_section "Windows hosts (stub)"
while IFS='|' read -r hostname ip; do
[[ -z "$hostname" ]] && continue
log_warn "SKIP $hostname ($ip) — Windows host, WinRM/SSH setup not yet implemented"
((HOSTS_SKIPPED++)) || true
done <<< "$WINDOWS_HOSTS"
fi
# ─── Remote setup script ─────────────────────────────────────────────────────
# This heredoc is the script executed on each remote host as root
build_remote_script() {
local HOST_CLIENT_PUBKEY="$1"
local HOST_MSP_PUBKEY="$2"
local HOST_AGENT_USER="$3"
local HOST_SKIP_SSHD="$4"
cat << REMOTESCRIPT
#!/bin/bash
set -e
AGENT_USER="${HOST_AGENT_USER}"
CLIENT_PUBKEY='${HOST_CLIENT_PUBKEY}'
MSP_PUBKEY='${HOST_MSP_PUBKEY}'
SKIP_SSHD="${HOST_SKIP_SSHD}"
echo "[remote] Starting agent bootstrap on \$(hostname)"
# ── Create sudo-nopasswd group if missing ──
if ! getent group sudo-nopasswd > /dev/null 2>&1; then
groupadd sudo-nopasswd
echo "[remote] Created group: sudo-nopasswd"
else
echo "[remote] Group sudo-nopasswd already exists"
fi
# ── Create agent user if missing ──
if ! id "\$AGENT_USER" > /dev/null 2>&1; then
useradd -m -s /bin/bash -G sudo,sudo-nopasswd "\$AGENT_USER"
echo "[remote] Created user: \$AGENT_USER"
else
echo "[remote] User \$AGENT_USER already exists — ensuring group membership"
usermod -aG sudo,sudo-nopasswd "\$AGENT_USER" || true
fi
# ── Sudoers ──
SUDOERS_FILE="/etc/sudoers.d/99-ansible-nopasswd"
cat > "\$SUDOERS_FILE" << SUDOEOF
# Managed by ansible-msp deploy_agent.sh
# Members of sudo-nopasswd group can run all commands without password
%sudo-nopasswd ALL=(ALL) NOPASSWD:ALL
SUDOEOF
chmod 440 "\$SUDOERS_FILE"
visudo -cf "\$SUDOERS_FILE" && echo "[remote] Sudoers file validated OK" || {
echo "[remote] ERROR: sudoers file invalid — removing"
rm -f "\$SUDOERS_FILE"
exit 1
}
# ── Deploy SSH keys to agent user ──
AGENT_SSH_DIR="/home/\$AGENT_USER/.ssh"
mkdir -p "\$AGENT_SSH_DIR"
chmod 700 "\$AGENT_SSH_DIR"
AUTH_KEYS="\$AGENT_SSH_DIR/authorized_keys"
touch "\$AUTH_KEYS"
# Add client key if not present
if ! grep -qF "\$CLIENT_PUBKEY" "\$AUTH_KEYS" 2>/dev/null; then
echo "\$CLIENT_PUBKEY" >> "\$AUTH_KEYS"
echo "[remote] Client key added to \$AGENT_USER"
else
echo "[remote] Client key already present for \$AGENT_USER"
fi
# Add MSP backup key if not present
if ! grep -qF "\$MSP_PUBKEY" "\$AUTH_KEYS" 2>/dev/null; then
echo "\$MSP_PUBKEY" >> "\$AUTH_KEYS"
echo "[remote] MSP backup key added to \$AGENT_USER"
else
echo "[remote] MSP backup key already present for \$AGENT_USER"
fi
chmod 600 "\$AUTH_KEYS"
chown -R "\$AGENT_USER:\$AGENT_USER" "\$AGENT_SSH_DIR"
# ── Deploy SSH keys to root ──
ROOT_SSH_DIR="/root/.ssh"
mkdir -p "\$ROOT_SSH_DIR"
chmod 700 "\$ROOT_SSH_DIR"
ROOT_AUTH_KEYS="\$ROOT_SSH_DIR/authorized_keys"
touch "\$ROOT_AUTH_KEYS"
if ! grep -qF "\$CLIENT_PUBKEY" "\$ROOT_AUTH_KEYS" 2>/dev/null; then
echo "\$CLIENT_PUBKEY" >> "\$ROOT_AUTH_KEYS"
echo "[remote] Client key added to root"
else
echo "[remote] Client key already present for root"
fi
if ! grep -qF "\$MSP_PUBKEY" "\$ROOT_AUTH_KEYS" 2>/dev/null; then
echo "\$MSP_PUBKEY" >> "\$ROOT_AUTH_KEYS"
echo "[remote] MSP backup key added to root"
else
echo "[remote] MSP backup key already present for root"
fi
chmod 600 "\$ROOT_AUTH_KEYS"
# ── Adjust sshd_config ──
if [[ "\$SKIP_SSHD" != "true" ]]; then
SSHD_CONFIG="/etc/ssh/sshd_config"
set_sshd_option() {
local KEY="\$1"
local VALUE="\$2"
if grep -qE "^#?\s*\${KEY}\s" "\$SSHD_CONFIG"; then
sed -i "s|^#\?\s*\${KEY}\s.*|\${KEY} \${VALUE}|" "\$SSHD_CONFIG"
else
echo "\${KEY} \${VALUE}" >> "\$SSHD_CONFIG"
fi
echo "[remote] sshd_config: \${KEY} = \${VALUE}"
}
set_sshd_option "PubkeyAuthentication" "yes"
set_sshd_option "PermitRootLogin" "prohibit-password"
set_sshd_option "AuthorizedKeysFile" ".ssh/authorized_keys"
# Reload sshd
if command -v systemctl &>/dev/null; then
systemctl reload sshd 2>/dev/null || systemctl reload ssh 2>/dev/null || true
else
service sshd reload 2>/dev/null || service ssh reload 2>/dev/null || true
fi
echo "[remote] sshd reloaded"
fi
echo "[remote] Bootstrap complete on \$(hostname)"
REMOTESCRIPT
}
# ─── Process Linux hosts ─────────────────────────────────────────────────────
if [[ -z "$LINUX_HOSTS" ]]; then
log_warn "No Linux hosts found in inventory"
exit 0
fi
log_section "Processing Linux hosts"
while IFS='|' read -r HOSTNAME HOST_IP; do
[[ -z "$HOSTNAME" ]] && continue
((HOSTS_TOTAL++)) || true
echo ""
log_section "Host: $HOSTNAME ($HOST_IP)"
if [[ "$DRY_RUN" == "true" ]]; then
log_info "DRY RUN: Would bootstrap $HOSTNAME ($HOST_IP) as $NATIVE_USER → root → create $AGENT_USER"
((HOSTS_OK++)) || true
continue
fi
# Add host to known_hosts
log_info "Scanning host key..."
ssh-keyscan -T 10 "$HOST_IP" >> /root/.ssh/known_hosts 2>/dev/null || true
# Test native user SSH access
log_info "Testing SSH as $NATIVE_USER..."
if ! sshpass -p "$NATIVE_PASS" ssh -o StrictHostKeyChecking=no \
-o ConnectTimeout=10 \
-o PasswordAuthentication=yes \
"$NATIVE_USER@$HOST_IP" "echo connected" &>/dev/null; then
log_error "Cannot SSH to $HOSTNAME ($HOST_IP) as $NATIVE_USER — skipping"
FAILED_HOSTS+=("$HOSTNAME ($HOST_IP) — SSH connection failed")
((HOSTS_FAILED++)) || true
continue
fi
log_ok "SSH connection successful"
# Build remote script
REMOTE_SCRIPT=$(build_remote_script \
"$CLIENT_PUBKEY" \
"$MSP_PUBKEY" \
"$AGENT_USER" \
"$SKIP_SSHD")
# Execute via su - on remote host
log_info "Executing bootstrap via su - root..."
BOOTSTRAP_OUTPUT=$(sshpass -p "$NATIVE_PASS" ssh \
-o StrictHostKeyChecking=no \
-o ConnectTimeout=10 \
-o PasswordAuthentication=yes \
"$NATIVE_USER@$HOST_IP" \
"echo '$ROOT_PASS' | su - root -c 'bash -s'" <<< "$REMOTE_SCRIPT" 2>&1) || {
log_error "Bootstrap script failed on $HOSTNAME"
echo "$BOOTSTRAP_OUTPUT" | sed 's/^/ /'
FAILED_HOSTS+=("$HOSTNAME ($HOST_IP) — bootstrap script failed")
((HOSTS_FAILED++)) || true
continue
}
# Show remote output
echo "$BOOTSTRAP_OUTPUT" | grep "\[remote\]" | sed 's/^/ /'
# Verify key-based login works for agent user
log_info "Verifying key-based login for $AGENT_USER..."
CLIENT_PRIVKEY="${CLIENT_KEY_PATH%.pub}"
if [[ -f "$CLIENT_PRIVKEY" ]]; then
if ssh -i "$CLIENT_PRIVKEY" \
-o StrictHostKeyChecking=no \
-o ConnectTimeout=10 \
-o PasswordAuthentication=no \
"$AGENT_USER@$HOST_IP" "echo key-auth-ok" &>/dev/null; then
log_ok "Key-based login verified for $AGENT_USER"
else
log_warn "Key-based login test failed for $AGENT_USER — check manually"
fi
else
log_warn "Private key not found at $CLIENT_PRIVKEY — skipping login verification"
fi
log_ok "Bootstrap complete: $HOSTNAME"
((HOSTS_OK++)) || true
done <<< "$LINUX_HOSTS"
# ─── Summary ─────────────────────────────────────────────────────────────────
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo " Bootstrap Summary"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo " Total Linux hosts: $HOSTS_TOTAL"
echo -e " ${GREEN}Succeeded: $HOSTS_OK${NC}"
if [[ $HOSTS_FAILED -gt 0 ]]; then
echo -e " ${RED}Failed: $HOSTS_FAILED${NC}"
echo ""
echo " Failed hosts:"
for h in "${FAILED_HOSTS[@]}"; do
echo -e " ${RED}$h${NC}"
done
fi
if [[ $HOSTS_SKIPPED -gt 0 ]]; then
echo -e " ${YELLOW}Skipped (Windows): $HOSTS_SKIPPED${NC}"
fi
echo ""
if [[ $HOSTS_FAILED -gt 0 ]]; then
exit 1
fi