Files
ansible-msp-automations/scripts/onboard_client.sh
Semaphore a42bf14665 Add XCP-NG integration, deploy_agent.sh, overhaul onboard_client.sh
- roles/xcpng_update: new role — patches XCP-NG pools via XO REST API
  - non-HA pools: pool-level install_patches + restart_hosts
  - HA clusters: rolling pool update via JSON-RPC pool.rollingUpdate
  - dry run support, patch verification after update
- roles/snapshot: add xcpng_xo hypervisor_type support via XO REST API
- playbooks/xcpng_pool_update.yml: new playbook for XCP-NG pool patching
- inventories/client_template/hosts.yml: add xcpng_hosts group
- scripts/onboard_client.sh: major overhaul
  - add --hypervisor proxmox|xcpng|baremetal|mixed
  - add --xo-url / --xo-token (falls back to global env)
  - webhook no longer required (falls back to N8N_WEBHOOK_URL in env)
  - ansible_user changed to ansible-msp-agent with sudo
  - xcpng_hosts group in inventory scaffold for xcpng/mixed clients
  - hypervisor-appropriate task templates created automatically
  - add --dry-run support
- scripts/deploy_agent.sh: new script — bootstrap ansible-msp-agent
  - reads hosts.yml to get Linux/Windows hosts
  - SSHes as native account, su - to root
  - creates ansible-msp-agent user + sudo-nopasswd group
  - deploys client key + MSP backup key to agent user and root
  - adjusts sshd_config, reloads sshd
  - verifies key-based login after bootstrap
  - Windows stub with skip + warning
  - continues on failure, prints summary
2026-03-12 11:15:43 -07:00

446 lines
19 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -e
# =============================================================================
# scripts/onboard_client.sh — MSP Client Onboarding
# =============================================================================
# Creates a new client project in Semaphore, generates SSH keys, scaffolds
# the inventory, and creates all task templates.
#
# Usage:
# ./onboard_client.sh -i CLIENT-001 -n "Client Name" -s client_slug [options]
#
# Options:
# -i, --id Client ID (e.g. SRH-001) [required]
# -n, --name Client name (e.g. 'Sanrufo Homes') [required]
# -s, --slug Inventory slug (e.g. sanrufo_homes) [required]
# -b, --billing Billing model (default: hybrid)
# -e, --estimate Human time estimate seconds (default: 2700)
# -H, --hypervisor Hypervisor type: proxmox|xcpng|baremetal|mixed
# (default: xcpng)
# Use 'mixed' when a client has multiple hypervisor types
# Use 'baremetal' when no snapshots are possible
# -w, --webhook n8n webhook URL override (default: global from env)
# --proxmox-host Proxmox host IP
# --proxmox-token-id Proxmox API token ID
# --proxmox-token-secret Proxmox API token secret
# --xo-url XO URL override (default: global XO_URL from env)
# --xo-token XO token override (default: global XO_TOKEN from env)
# --semaphore-url Semaphore base URL (default: http://localhost:3000)
# --semaphore-token Semaphore API token (default: from /root/.semaphore_env)
# --gitea-url Gitea repo SSH URL
# --project-name Override Semaphore project name
# --dry-run Show what would be done without making changes
# =============================================================================
# ─── Defaults ────────────────────────────────────────────────────────────────
BILLING="hybrid"
ESTIMATE="2700"
HYPERVISOR="xcpng"
SEMAPHORE_URL="http://localhost:3000"
REPO_DIR="/opt/ansible-msp-automations"
GITEA_DEPLOY_KEY="/root/.ssh/gitea_ansible"
GITEA_REPO_URL="ssh://git@172.31.10.8:2222/VOICE1/ansible-msp-automations.git"
PROXMOX_HOST=""
PROXMOX_TOKEN_ID=""
PROXMOX_TOKEN_SECRET=""
XO_URL_OVERRIDE=""
XO_TOKEN_OVERRIDE=""
WEBHOOK_URL_OVERRIDE=""
PROJECT_NAME_OVERRIDE=""
DRY_RUN=false
# Load global defaults from env file
if [[ -f /root/.semaphore_env ]]; then
source /root/.semaphore_env
fi
# ─── Colors / logging ────────────────────────────────────────────────────────
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
BLUE='\033[0;34m'; NC='\033[0m'
log_info() { echo -e "${BLUE} $*${NC}"; }
log_ok() { echo -e "${GREEN}$*${NC}"; }
log_warn() { echo -e "${YELLOW}$*${NC}"; }
log_error() { echo -e "${RED}$*${NC}"; }
log_section() { echo -e "\n${BLUE}[ $* ]${NC}"; }
dry() { [[ "$DRY_RUN" == "true" ]] && echo -e "${YELLOW} DRY-RUN: $*${NC}" && return 0 || return 1; }
# ─── Usage ───────────────────────────────────────────────────────────────────
usage() {
grep "^#" "$0" | head -40 | sed 's/^# \?//'
exit 1
}
# ─── Parse args ──────────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
case $1 in
-i|--id) CLIENT_ID="$2"; shift 2 ;;
-n|--name) CLIENT_NAME="$2"; shift 2 ;;
-s|--slug) CLIENT_SLUG="$2"; shift 2 ;;
-b|--billing) BILLING="$2"; shift 2 ;;
-e|--estimate) ESTIMATE="$2"; shift 2 ;;
-H|--hypervisor) HYPERVISOR="$2"; shift 2 ;;
-w|--webhook) WEBHOOK_URL_OVERRIDE="$2"; shift 2 ;;
--proxmox-host) PROXMOX_HOST="$2"; shift 2 ;;
--proxmox-token-id) PROXMOX_TOKEN_ID="$2"; shift 2 ;;
--proxmox-token-secret) PROXMOX_TOKEN_SECRET="$2"; shift 2 ;;
--xo-url) XO_URL_OVERRIDE="$2"; shift 2 ;;
--xo-token) XO_TOKEN_OVERRIDE="$2"; shift 2 ;;
--semaphore-url) SEMAPHORE_URL="$2"; shift 2 ;;
--semaphore-token) SEMAPHORE_TOKEN="$2"; shift 2 ;;
--gitea-url) GITEA_REPO_URL="$2"; shift 2 ;;
--project-name) PROJECT_NAME_OVERRIDE="$2";shift 2 ;;
--dry-run) DRY_RUN=true; shift ;;
-h|--help) usage ;;
*) log_error "Unknown option: $1"; usage ;;
esac
done
# ─── Validate required args ──────────────────────────────────────────────────
MISSING=()
[[ -z "${CLIENT_ID:-}" ]] && MISSING+=("--id")
[[ -z "${CLIENT_NAME:-}" ]] && MISSING+=("--name")
[[ -z "${CLIENT_SLUG:-}" ]] && MISSING+=("--slug")
if [[ ${#MISSING[@]} -gt 0 ]]; then
log_error "Missing required arguments: ${MISSING[*]}"
usage
fi
case "$HYPERVISOR" in
proxmox|xcpng|baremetal|mixed) ;;
*) log_error "Invalid hypervisor type: $HYPERVISOR (use: proxmox|xcpng|baremetal|mixed)"; exit 1 ;;
esac
if [[ -z "${SEMAPHORE_TOKEN:-}" ]]; then
log_error "No SEMAPHORE_TOKEN available. Set in /root/.semaphore_env or pass --semaphore-token"
exit 1
fi
# Resolve XO vars — per-client override takes priority, then global env
EFFECTIVE_XO_URL="${XO_URL_OVERRIDE:-${XO_URL:-}}"
EFFECTIVE_XO_TOKEN="${XO_TOKEN_OVERRIDE:-${XO_TOKEN:-}}"
# Resolve webhook — per-client override takes priority, then global env
EFFECTIVE_WEBHOOK="${WEBHOOK_URL_OVERRIDE:-${N8N_WEBHOOK_URL:-}}"
if [[ -z "$EFFECTIVE_WEBHOOK" ]]; then
log_error "No webhook URL. Set N8N_WEBHOOK_URL in /root/.semaphore_env or pass --webhook"
exit 1
fi
PROJECT_NAME="${PROJECT_NAME_OVERRIDE:-Client - ${CLIENT_NAME}}"
KEY_FILE="/root/.ssh/client_${CLIENT_SLUG}"
INVENTORY_DIR="$REPO_DIR/inventories/client_${CLIENT_SLUG}"
INVENTORY_REPO_PATH="inventories/client_${CLIENT_SLUG}/hosts.yml"
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo " Onboarding: $CLIENT_NAME ($CLIENT_ID)"
echo " Slug: client_${CLIENT_SLUG}"
echo " Hypervisor: $HYPERVISOR"
echo " Billing: $BILLING"
[[ "$DRY_RUN" == "true" ]] && echo -e " ${YELLOW}DRY RUN MODE — no changes will be made${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# ─── Step 1: Generate SSH key ─────────────────────────────────────────────────
log_section "1/6 — SSH key"
if [[ -f "$KEY_FILE" ]]; then
log_warn "Key already exists at $KEY_FILE — skipping generation"
else
if ! dry "ssh-keygen -t ed25519 -C ansible-${CLIENT_SLUG} -f $KEY_FILE -N ''"; then
ssh-keygen -t ed25519 -C "ansible-${CLIENT_SLUG}" -f "$KEY_FILE" -N ""
log_ok "Key generated: $KEY_FILE"
fi
fi
echo ""
echo " ┌─ Public key to deploy to all client hosts ──────────────────────────"
echo " │"
sed 's/^/ │ /' "$KEY_FILE.pub" 2>/dev/null || echo " │ (dry run — key not yet generated)"
echo " │"
echo " └─────────────────────────────────────────────────────────────────────"
# ─── Step 2: Create inventory from template ───────────────────────────────────
log_section "2/6 — Inventory scaffold"
if [[ -d "$INVENTORY_DIR" ]]; then
log_warn "Inventory already exists at $INVENTORY_DIR — skipping"
else
if ! dry "cp -r $REPO_DIR/inventories/client_template $INVENTORY_DIR"; then
cp -r "$REPO_DIR/inventories/client_template" "$INVENTORY_DIR"
# Determine which hypervisor groups to include
INCLUDE_XCPNG=false
INCLUDE_PROXMOX=false
case "$HYPERVISOR" in
xcpng) INCLUDE_XCPNG=true ;;
proxmox) INCLUDE_PROXMOX=true ;;
mixed) INCLUDE_XCPNG=true; INCLUDE_PROXMOX=true ;;
baremetal) ;;
esac
XCPNG_BLOCK=""
if [[ "$INCLUDE_XCPNG" == "true" ]]; then
XCPNG_BLOCK=$(cat << 'XCPNGEOF'
# XCP-NG pool entries — one entry per pool (not per hypervisor host)
# Each entry triggers xcpng_pool_update.yml against that pool via XO REST API
# Required per host: xo_pool_uuid
# XO_URL and XO_TOKEN come from Semaphore variable group (override here if needed)
xcpng_hosts:
hosts: {}
vars:
ansible_connection: local
XCPNGEOF
)
fi
cat > "$INVENTORY_DIR/hosts.yml" << HOSTSEOF
---
# Client: ${CLIENT_NAME} (${CLIENT_ID})
# Onboarded: $(date +%Y-%m-%d)
# Hypervisor: ${HYPERVISOR}
# Billing: ${BILLING}
#
# ansible_user: ansible-msp-agent (deployed by scripts/deploy_agent.sh)
# Do NOT use root as ansible_user for day-to-day operations.
all:
vars:
client_id: "${CLIENT_ID}"
client_name: "${CLIENT_NAME}"
billing_model: "${BILLING}"
maintenance_window_start: "02:00"
maintenance_window_end: "05:00"
maintenance_window_tz: "UTC"
change_freeze: false
hypervisor_type: "${HYPERVISOR}"
auto_reboot: false
human_estimate_seconds: ${ESTIMATE}
children:
linux_hosts:
hosts: {}
vars:
ansible_user: ansible-msp-agent
ansible_become: true
ansible_become_method: sudo
# Per-host vars to set:
# ansible_host: <ip>
# xcpng_vm_uuid: <uuid> (if hypervisor is xcpng or mixed)
# proxmox_vmid: <id> (if hypervisor is proxmox or mixed)
windows_hosts:
hosts: {}
vars:
ansible_user: Administrator
ansible_connection: winrm
ansible_winrm_transport: ntlm
ansible_winrm_server_cert_validation: validate
ansible_port: 5986
# Windows patching not yet implemented — hosts listed for inventory completeness
# Per-host vars to set:
# ansible_host: <ip>
# xcpng_vm_uuid: <uuid> (if hypervisor is xcpng or mixed)
${XCPNG_BLOCK}
HOSTSEOF
cat > "$INVENTORY_DIR/group_vars/all.yml" << VARSEOF
---
# Client: ${CLIENT_NAME} (${CLIENT_ID})
# Onboarded: $(date +%Y-%m-%d)
# Client-specific variable overrides go here.
# Global vars (XO_URL, XO_TOKEN, N8N_WEBHOOK_URL) come from Semaphore variable group.
# Override here only if this client uses a different XO instance or webhook.
VARSEOF
log_ok "Inventory created at $INVENTORY_DIR"
fi
fi
# ─── Step 3: Commit and push ──────────────────────────────────────────────────
log_section "3/6 — Git commit"
if ! dry "git add . && git commit && git push"; then
cd "$REPO_DIR"
git add .
git diff --cached --quiet && log_warn "Nothing to commit" || \
git commit -m "Onboard client: ${CLIENT_NAME} (${CLIENT_ID}) — inventory scaffold"
git push origin main
log_ok "Pushed to Gitea"
fi
# ─── Step 4: Semaphore project ────────────────────────────────────────────────
log_section "4/6 — Semaphore project"
if dry "Create Semaphore project + keys + repo + env + inventory"; then
PROJECT_ID=0; GITEA_KEY_ID=0; CLIENT_KEY_ID=0
NONE_KEY_ID=0; REPO_ID=0; ENV_ID=0; INVENTORY_ID=0
else
# 4a. Project
PROJECT_RESPONSE=$(curl -s -X POST "$SEMAPHORE_URL/api/projects" \
-H "Authorization: Bearer $SEMAPHORE_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"name\":\"$PROJECT_NAME\",\"alert\":false,\"max_parallel_tasks\":0}")
PROJECT_ID=$(echo "$PROJECT_RESPONSE" | jq -r '.id')
[[ "$PROJECT_ID" == "null" || -z "$PROJECT_ID" ]] && {
log_error "Failed to create project: $PROJECT_RESPONSE"; exit 1; }
log_ok "Project: $PROJECT_NAME (ID: $PROJECT_ID)"
# 4b. Gitea deploy key
GITEA_KEY_ID=$(curl -s -X POST "$SEMAPHORE_URL/api/project/$PROJECT_ID/keys" \
-H "Authorization: Bearer $SEMAPHORE_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"name\":\"gitea-deploy\",\"type\":\"ssh\",\"project_id\":$PROJECT_ID,
\"ssh\":{\"login\":\"\",\"passphrase\":\"\",\"private_key\":$(jq -Rs . < "$GITEA_DEPLOY_KEY")}}" \
| jq -r '.id')
log_ok "gitea-deploy key (ID: $GITEA_KEY_ID)"
# 4c. Client SSH key
CLIENT_KEY_ID=$(curl -s -X POST "$SEMAPHORE_URL/api/project/$PROJECT_ID/keys" \
-H "Authorization: Bearer $SEMAPHORE_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"name\":\"client-${CLIENT_SLUG}-ssh\",\"type\":\"ssh\",\"project_id\":$PROJECT_ID,
\"ssh\":{\"login\":\"\",\"passphrase\":\"\",\"private_key\":$(jq -Rs . < "$KEY_FILE")}}" \
| jq -r '.id')
log_ok "Client SSH key (ID: $CLIENT_KEY_ID)"
# 4d. None key
NONE_KEY_ID=$(curl -s -X POST "$SEMAPHORE_URL/api/project/$PROJECT_ID/keys" \
-H "Authorization: Bearer $SEMAPHORE_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"name\":\"None\",\"type\":\"none\",\"project_id\":$PROJECT_ID}" \
| jq -r '.id')
log_ok "None key (ID: $NONE_KEY_ID)"
# 4e. Repository
REPO_ID=$(curl -s -X POST "$SEMAPHORE_URL/api/project/$PROJECT_ID/repositories" \
-H "Authorization: Bearer $SEMAPHORE_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"name\":\"ansible-msp-automations\",\"project_id\":$PROJECT_ID,
\"git_url\":\"$GITEA_REPO_URL\",\"git_branch\":\"main\",
\"ssh_key_id\":$GITEA_KEY_ID}" \
| jq -r '.id')
log_ok "Repository (ID: $REPO_ID)"
# 4f. Variable group — only include hypervisor vars that are set
VARS_JSON=$(jq -n \
--arg webhook "$EFFECTIVE_WEBHOOK" \
--arg cid "$CLIENT_ID" \
--arg cname "$CLIENT_NAME" \
--arg billing "$BILLING" \
--arg estimate "$ESTIMATE" \
--arg phost "$PROXMOX_HOST" \
--arg ptid "$PROXMOX_TOKEN_ID" \
--arg ptsecret "$PROXMOX_TOKEN_SECRET" \
--arg xourl "$EFFECTIVE_XO_URL" \
--arg xotoken "$EFFECTIVE_XO_TOKEN" \
'{
N8N_WEBHOOK_URL: $webhook,
CLIENT_ID: $cid,
CLIENT_NAME: $cname,
BILLING_MODEL: $billing,
HUMAN_ESTIMATE_SECONDS: $estimate
}
| if $phost != "" then . + {PROXMOX_HOST: $phost} else . end
| if $ptid != "" then . + {PROXMOX_TOKEN_ID: $ptid} else . end
| if $ptsecret != "" then . + {PROXMOX_TOKEN_SECRET: $ptsecret} else . end
| if $xourl != "" then . + {XO_URL: $xourl} else . end
| if $xotoken != "" then . + {XO_TOKEN: $xotoken} else . end
')
ENV_ID=$(curl -s -X POST "$SEMAPHORE_URL/api/project/$PROJECT_ID/environment" \
-H "Authorization: Bearer $SEMAPHORE_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"name\":\"${CLIENT_SLUG}-vars\",\"project_id\":$PROJECT_ID,
\"json\":$(echo "$VARS_JSON" | jq -Rs .),\"env\":\"{}\"}" \
| jq -r '.id')
log_ok "Variable group (ID: $ENV_ID)"
# 4g. Inventory
INVENTORY_ID=$(curl -s -X POST "$SEMAPHORE_URL/api/project/$PROJECT_ID/inventory" \
-H "Authorization: Bearer $SEMAPHORE_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"name\":\"client-${CLIENT_SLUG}\",\"project_id\":$PROJECT_ID,
\"inventory\":\"$INVENTORY_REPO_PATH\",\"ssh_key_id\":$CLIENT_KEY_ID,
\"become_key_id\":$NONE_KEY_ID,\"type\":\"file\",
\"repository_id\":$REPO_ID}" \
| jq -r '.id')
log_ok "Inventory (ID: $INVENTORY_ID)"
fi # end dry run block
# ─── Step 5: Task templates ───────────────────────────────────────────────────
log_section "5/6 — Task templates"
create_template() {
local TNAME="$1"
local PLAYBOOK="$2"
local DESC="$3"
if dry "Template: $TNAME$PLAYBOOK"; then return; fi
RESULT=$(curl -s -X POST "$SEMAPHORE_URL/api/project/$PROJECT_ID/templates" \
-H "Authorization: Bearer $SEMAPHORE_TOKEN" \
-H "Content-Type: application/json" \
-d "{
\"project_id\": $PROJECT_ID,
\"inventory_id\": $INVENTORY_ID,
\"repository_id\": $REPO_ID,
\"environment_id\": $ENV_ID,
\"name\": \"$TNAME\",
\"playbook\": \"$PLAYBOOK\",
\"arguments\": \"[]\",
\"allow_override_args_in_task\": false,
\"description\": \"$DESC\",
\"app\": \"ansible\"
}")
log_ok "$(echo "$RESULT" | jq -r '"Template: \(.name) (ID: \(.id))"')"
}
# Always created
create_template "Preflight Check" "playbooks/site_preflight.yml" "Safety checks on all hosts before maintenance"
create_template "Linux Patch" "playbooks/linux_patch.yml" "Full Linux patch run with version tracking"
create_template "Full Maintenance" "playbooks/site_maintenance.yml" "Full maintenance: snapshot, preflight, patch"
# Proxmox
case "$HYPERVISOR" in proxmox|mixed)
create_template "Snapshot (Proxmox)" "playbooks/snapshot_pre.yml" "Pre-patch VM snapshots via Proxmox API"
;; esac
# XCP-NG
case "$HYPERVISOR" in xcpng|mixed)
create_template "XCP-NG Pool Update" "playbooks/xcpng_pool_update.yml" "Patch XCP-NG hypervisor pools via XO REST API"
create_template "Snapshot (XCP-NG)" "playbooks/snapshot_pre.yml" "Pre-patch VM snapshots via XO REST API"
;; esac
[[ "$HYPERVISOR" == "baremetal" ]] && \
log_warn "Baremetal — no snapshot templates created. Ensure change approval before patching."
# ─── Step 6: Summary ──────────────────────────────────────────────────────────
log_section "6/6 — Done"
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "${CLIENT_NAME} (${CLIENT_ID}) onboarded"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
[[ "$DRY_RUN" != "true" ]] && echo " Semaphore project ID : $PROJECT_ID"
echo " Inventory : $INVENTORY_DIR/hosts.yml"
echo " Hypervisor : $HYPERVISOR"
echo ""
echo " Next steps:"
echo " 1. Add hosts to inventory then git push"
echo " 2. bash scripts/deploy_agent.sh --inventory $INVENTORY_REPO_PATH"
echo " 3. Semaphore → $PROJECT_NAME → Preflight Check → ▶ Run"
[[ "$HYPERVISOR" != "baremetal" ]] && \
echo " 4. Semaphore → $PROJECT_NAME → XCP-NG Pool Update / Snapshot → ▶ Run"
[[ "$HYPERVISOR" == "baremetal" ]] && \
echo " NOTE: Baremetal — no snapshots. Get explicit change approval before patching."
echo ""
if [[ "$DRY_RUN" != "true" && -f "$KEY_FILE.pub" ]]; then
echo " Client public key:"
sed 's/^/ /' "$KEY_FILE.pub"
fi
echo ""