Files
ansible-msp-automations/scripts/deploy_agent.sh

518 lines
19 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# =============================================================================
# deploy_agent.sh — MSP Agent Bootstrap Script
# =============================================================================
# Connects to Linux hosts defined in a client hosts.yml, creates the
# ansible-msp-agent service account, deploys SSH keys, configures sudoers,
# and hardens sshd_config.
#
# Usage:
# ./deploy_agent.sh --inventory /path/to/client_xxx/hosts.yml [options]
#
# Options:
# --inventory Path to client hosts.yml (required)
# --native-user Username to SSH in with (default: localcontrol)
# --native-pass Password for native user (will prompt if not provided)
# --root-pass Root password for su - (will prompt if not provided)
# --agent-user Service account to create (default: ansible-msp-agent)
# --client-key Path to client public key (default: auto-derived from inventory path)
# --msp-key Path to MSP backup public key file (default: /root/.ssh/ansible-msp-agent.pub)
# --key-repo-dir If set, look for public keys in this git repo dir instead
# --dry-run Show what would be done without making changes
# --skip-sshd Skip sshd_config modifications
# --help Show this help
#
# Dependencies: sshpass, python3, python3-yaml, ssh, ssh-keyscan
# =============================================================================
set -euo pipefail
# ─── Defaults ────────────────────────────────────────────────────────────────
NATIVE_USER="localcontrol"
NATIVE_PASS=""
ROOT_PASS=""
AGENT_USER="ansible-msp-agent"
CLIENT_KEY_PATH="" # auto-derived if empty
MSP_KEY_PATH="/root/.ssh/ansible-msp-agent.pub"
KEY_REPO_DIR="" # future: point to keys/ dir in git repo
DRY_RUN=false
SKIP_SSHD=false
INVENTORY_PATH=""
REPO_DIR="/opt/ansible-msp-automations"
# Load MSP backup key from environment if file not present
# Set MSP_BACKUP_PUBKEY in /root/.semaphore_env to avoid needing the file
if [[ -f /root/.semaphore_env ]]; then
source /root/.semaphore_env
fi
# ─── Counters ────────────────────────────────────────────────────────────────
HOSTS_TOTAL=0
HOSTS_OK=0
HOSTS_FAILED=0
HOSTS_SKIPPED=0
FAILED_HOSTS=()
# ─── Colors ──────────────────────────────────────────────────────────────────
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE} $*${NC}"; }
log_ok() { echo -e "${GREEN}$*${NC}"; }
log_warn() { echo -e "${YELLOW}$*${NC}"; }
log_error() { echo -e "${RED}$*${NC}"; }
log_section() { echo -e "\n${BLUE}━━━ $* ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; }
# ─── Parse args ──────────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
case $1 in
--inventory) INVENTORY_PATH="$2"; shift 2 ;;
--native-user) NATIVE_USER="$2"; shift 2 ;;
--native-pass) NATIVE_PASS="$2"; shift 2 ;;
--root-pass) ROOT_PASS="$2"; shift 2 ;;
--agent-user) AGENT_USER="$2"; shift 2 ;;
--client-key) CLIENT_KEY_PATH="$2"; shift 2 ;;
--msp-key) MSP_KEY_PATH="$2"; shift 2 ;;
--key-repo-dir) KEY_REPO_DIR="$2"; shift 2 ;;
--dry-run) DRY_RUN=true; shift ;;
--skip-sshd) SKIP_SSHD=true; shift ;;
--help)
head -30 "$0" | grep "^#" | sed 's/^# \?//'
exit 0
;;
*)
log_error "Unknown option: $1"
exit 1
;;
esac
done
# ─── Validate ────────────────────────────────────────────────────────────────
if [[ -z "$INVENTORY_PATH" ]]; then
log_error "--inventory is required"
exit 1
fi
if [[ ! -f "$INVENTORY_PATH" ]]; then
log_error "Inventory file not found: $INVENTORY_PATH"
exit 1
fi
# Check dependencies
for dep in sshpass python3 ssh ssh-keyscan; do
if ! command -v "$dep" &>/dev/null; then
log_error "Missing dependency: $dep"
echo " Install with: apt install sshpass python3 python3-yaml openssh-client"
exit 1
fi
done
python3 -c "import yaml" 2>/dev/null || {
log_error "python3-yaml not installed: apt install python3-yaml"
exit 1
}
# ─── Derive client key path from inventory path if not set ───────────────────
if [[ -z "$CLIENT_KEY_PATH" ]]; then
# Extract slug from inventory path: .../inventories/client_foo/hosts.yml -> client_foo
INVENTORY_DIR=$(dirname "$INVENTORY_PATH")
SLUG=$(basename "$INVENTORY_DIR")
# Remove client_ prefix for key name
KEY_SLUG="${SLUG#client_}"
# Check key repo dir first if set
if [[ -n "$KEY_REPO_DIR" && -f "$KEY_REPO_DIR/keys/client_${KEY_SLUG}.pub" ]]; then
CLIENT_KEY_PATH="$KEY_REPO_DIR/keys/client_${KEY_SLUG}.pub"
else
CLIENT_KEY_PATH="/root/.ssh/client_${KEY_SLUG}.pub"
fi
fi
# Check key repo dir for MSP key if set
if [[ -n "$KEY_REPO_DIR" && -f "$KEY_REPO_DIR/keys/ansible-msp-agent.pub" ]]; then
MSP_KEY_PATH="$KEY_REPO_DIR/keys/ansible-msp-agent.pub"
fi
if [[ ! -f "$CLIENT_KEY_PATH" ]]; then
log_error "Client public key not found: $CLIENT_KEY_PATH"
log_error "Generate it with: ssh-keygen -t ed25519 -f ${CLIENT_KEY_PATH%.pub}"
exit 1
fi
# Resolve MSP public key — file takes priority, fall back to env var
if [[ -f "$MSP_KEY_PATH" ]]; then
MSP_PUBKEY=$(cat "$MSP_KEY_PATH")
elif [[ -n "${MSP_BACKUP_PUBKEY:-}" ]]; then
MSP_PUBKEY="$MSP_BACKUP_PUBKEY"
log_info "MSP key loaded from environment (MSP_BACKUP_PUBKEY)"
else
log_error "MSP backup public key not found: $MSP_KEY_PATH"
log_error "Set MSP_BACKUP_PUBKEY in /root/.semaphore_env or pass --msp-key"
exit 1
fi
CLIENT_PUBKEY=$(cat "$CLIENT_KEY_PATH")
# ─── Prompt for passwords if not provided ────────────────────────────────────
if [[ -z "$NATIVE_PASS" ]]; then
echo -n "Password for ${NATIVE_USER}: "
read -rs NATIVE_PASS
echo
fi
if [[ -z "$ROOT_PASS" ]]; then
echo -n "Root password (for su -): "
read -rs ROOT_PASS
echo
fi
# ─── Parse inventory for hosts ───────────────────────────────────────────────
log_section "Parsing inventory"
log_info "Inventory: $INVENTORY_PATH"
# Extract Linux and Windows hosts using Python
HOST_DATA=$(python3 << PYEOF
import yaml, json, sys
with open('$INVENTORY_PATH') as f:
inv = yaml.safe_load(f)
linux_hosts = []
windows_hosts = []
def extract_hosts(group, target_list):
if not group:
return
hosts = group.get('hosts') or {}
group_vars = group.get('vars') or {}
for hostname, hvars in (hosts or {}).items():
hvars = hvars or {}
merged = {**group_vars, **hvars}
ip = merged.get('ansible_host', hostname)
target_list.append({
'name': hostname,
'ip': ip,
'native_user': merged.get('deploy_native_user', ''),
'native_pass': merged.get('deploy_native_pass', ''),
'root_pass': merged.get('deploy_root_pass', ''),
})
children = (inv.get('all') or {}).get('children') or {}
extract_hosts(children.get('linux_hosts'), linux_hosts)
extract_hosts(children.get('windows_hosts'), windows_hosts)
print(json.dumps({'linux': linux_hosts, 'windows': windows_hosts}))
PYEOF
)
LINUX_HOSTS=$(echo "$HOST_DATA" | python3 -c "import json,sys; d=json.load(sys.stdin); [print('|'.join([h['name'],h['ip'],h['native_user'],h['native_pass'],h['root_pass']])) for h in d['linux']]")
WINDOWS_HOSTS=$(echo "$HOST_DATA" | python3 -c "import json,sys; d=json.load(sys.stdin); [print(h['name']+'|'+h['ip']) for h in d['windows']]")
LINUX_COUNT=$(echo "$LINUX_HOSTS" | grep -c '.' || true)
WINDOWS_COUNT=$(echo "$WINDOWS_HOSTS" | grep -c '.' || true)
log_info "Linux hosts found: $LINUX_COUNT"
log_info "Windows hosts found: $WINDOWS_COUNT (skipped — WinRM setup not yet implemented)"
if [[ "$DRY_RUN" == "true" ]]; then
log_warn "DRY RUN MODE — no changes will be made"
fi
# ─── Windows stub ────────────────────────────────────────────────────────────
if [[ -n "$WINDOWS_HOSTS" ]]; then
log_section "Windows hosts (stub)"
while IFS='|' read -r hostname ip; do
[[ -z "$hostname" ]] && continue
log_warn "SKIP $hostname ($ip) — Windows host, WinRM/SSH setup not yet implemented"
((HOSTS_SKIPPED++)) || true
done <<< "$WINDOWS_HOSTS"
fi
# ─── Remote setup script ─────────────────────────────────────────────────────
# This heredoc is the script executed on each remote host as root
build_remote_script() {
local HOST_CLIENT_PUBKEY="$1"
local HOST_MSP_PUBKEY="$2"
local HOST_AGENT_USER="$3"
local HOST_SKIP_SSHD="$4"
cat << REMOTESCRIPT
#!/bin/bash
set -e
AGENT_USER="${HOST_AGENT_USER}"
CLIENT_PUBKEY='${HOST_CLIENT_PUBKEY}'
MSP_PUBKEY='${HOST_MSP_PUBKEY}'
SKIP_SSHD="${HOST_SKIP_SSHD}"
echo "[remote] Starting agent bootstrap on \$(hostname)"
# ── Create sudo-nopasswd group if missing ──
if ! getent group sudo-nopasswd > /dev/null 2>&1; then
groupadd sudo-nopasswd
echo "[remote] Created group: sudo-nopasswd"
else
echo "[remote] Group sudo-nopasswd already exists"
fi
# ── Create agent user if missing ──
if ! id "\$AGENT_USER" > /dev/null 2>&1; then
useradd -m -s /bin/bash -G sudo,sudo-nopasswd "\$AGENT_USER"
echo "[remote] Created user: \$AGENT_USER"
else
echo "[remote] User \$AGENT_USER already exists — ensuring group membership"
usermod -aG sudo,sudo-nopasswd "\$AGENT_USER" || true
fi
# ── Sudoers ──
SUDOERS_FILE="/etc/sudoers.d/99-ansible-nopasswd"
cat > "\$SUDOERS_FILE" << SUDOEOF
# Managed by ansible-msp deploy_agent.sh
# Members of sudo-nopasswd group can run all commands without password
%sudo-nopasswd ALL=(ALL) NOPASSWD:ALL
SUDOEOF
chmod 440 "\$SUDOERS_FILE"
if command -v visudo &>/dev/null; then
visudo -cf "\$SUDOERS_FILE" && echo "[remote] Sudoers file validated OK" || {
echo "[remote] ERROR: sudoers file invalid — removing"
rm -f "\$SUDOERS_FILE"
exit 1
}
else
echo "[remote] visudo not found — skipping validation (file written with chmod 440)"
fi
# ── Deploy SSH keys to agent user ──
AGENT_SSH_DIR="/home/\$AGENT_USER/.ssh"
mkdir -p "\$AGENT_SSH_DIR"
chmod 700 "\$AGENT_SSH_DIR"
AUTH_KEYS="\$AGENT_SSH_DIR/authorized_keys"
touch "\$AUTH_KEYS"
# Add client key if not present
if ! grep -qF "\$CLIENT_PUBKEY" "\$AUTH_KEYS" 2>/dev/null; then
echo "\$CLIENT_PUBKEY" >> "\$AUTH_KEYS"
echo "[remote] Client key added to \$AGENT_USER"
else
echo "[remote] Client key already present for \$AGENT_USER"
fi
# Add MSP backup key if not present
if ! grep -qF "\$MSP_PUBKEY" "\$AUTH_KEYS" 2>/dev/null; then
echo "\$MSP_PUBKEY" >> "\$AUTH_KEYS"
echo "[remote] MSP backup key added to \$AGENT_USER"
else
echo "[remote] MSP backup key already present for \$AGENT_USER"
fi
chmod 600 "\$AUTH_KEYS"
chown -R "\$AGENT_USER:\$AGENT_USER" "\$AGENT_SSH_DIR"
# ── Deploy SSH keys to root ──
ROOT_SSH_DIR="/root/.ssh"
mkdir -p "\$ROOT_SSH_DIR"
chmod 700 "\$ROOT_SSH_DIR"
ROOT_AUTH_KEYS="\$ROOT_SSH_DIR/authorized_keys"
touch "\$ROOT_AUTH_KEYS"
if ! grep -qF "\$CLIENT_PUBKEY" "\$ROOT_AUTH_KEYS" 2>/dev/null; then
echo "\$CLIENT_PUBKEY" >> "\$ROOT_AUTH_KEYS"
echo "[remote] Client key added to root"
else
echo "[remote] Client key already present for root"
fi
if ! grep -qF "\$MSP_PUBKEY" "\$ROOT_AUTH_KEYS" 2>/dev/null; then
echo "\$MSP_PUBKEY" >> "\$ROOT_AUTH_KEYS"
echo "[remote] MSP backup key added to root"
else
echo "[remote] MSP backup key already present for root"
fi
chmod 600 "\$ROOT_AUTH_KEYS"
# ── Adjust sshd_config ──
if [[ "\$SKIP_SSHD" != "true" ]]; then
SSHD_CONFIG="/etc/ssh/sshd_config"
set_sshd_option() {
local KEY="\$1"
local VALUE="\$2"
if grep -qE "^#?\s*\${KEY}\s" "\$SSHD_CONFIG"; then
sed -i "s|^#\?\s*\${KEY}\s.*|\${KEY} \${VALUE}|" "\$SSHD_CONFIG"
else
echo "\${KEY} \${VALUE}" >> "\$SSHD_CONFIG"
fi
echo "[remote] sshd_config: \${KEY} = \${VALUE}"
}
set_sshd_option "PubkeyAuthentication" "yes"
set_sshd_option "PermitRootLogin" "prohibit-password"
set_sshd_option "AuthorizedKeysFile" ".ssh/authorized_keys"
# Reload sshd
if command -v systemctl &>/dev/null; then
systemctl reload sshd 2>/dev/null || systemctl reload ssh 2>/dev/null || true
else
service sshd reload 2>/dev/null || service ssh reload 2>/dev/null || true
fi
echo "[remote] sshd reloaded"
fi
echo "[remote] Bootstrap complete on \$(hostname)"
REMOTESCRIPT
}
# ─── Process Linux hosts ─────────────────────────────────────────────────────
if [[ -z "$LINUX_HOSTS" ]]; then
log_warn "No Linux hosts found in inventory"
exit 0
fi
log_section "Processing Linux hosts"
# Write host list to temp file to prevent ssh/sshpass from consuming
# the while loop's stdin (classic bash gotcha with network commands in loops)
LINUX_HOSTS_FILE=$(mktemp)
echo "$LINUX_HOSTS" > "$LINUX_HOSTS_FILE"
trap 'rm -f "$LINUX_HOSTS_FILE"' EXIT
while IFS='|' read -r HOSTNAME HOST_IP HOST_NATIVE_USER HOST_NATIVE_PASS HOST_ROOT_PASS; do
[[ -z "$HOSTNAME" ]] && continue
((HOSTS_TOTAL++)) || true
# Per-host credentials override globals
EFFECTIVE_NATIVE_USER="${HOST_NATIVE_USER:-$NATIVE_USER}"
EFFECTIVE_NATIVE_USER="${EFFECTIVE_NATIVE_USER:-$NATIVE_USER}"
[[ -z "$EFFECTIVE_NATIVE_USER" ]] && EFFECTIVE_NATIVE_USER="$NATIVE_USER"
EFFECTIVE_NATIVE_PASS="${HOST_NATIVE_PASS:-}"
[[ -z "$EFFECTIVE_NATIVE_PASS" ]] && EFFECTIVE_NATIVE_PASS="$NATIVE_PASS"
EFFECTIVE_ROOT_PASS="${HOST_ROOT_PASS:-}"
[[ -z "$EFFECTIVE_ROOT_PASS" ]] && EFFECTIVE_ROOT_PASS="$ROOT_PASS"
echo ""
log_section "Host: $HOSTNAME ($HOST_IP)"
if [[ "$DRY_RUN" == "true" ]]; then
log_info "DRY RUN: Would bootstrap $HOSTNAME ($HOST_IP) as $EFFECTIVE_NATIVE_USER → root → create $AGENT_USER"
((HOSTS_OK++)) || true
continue
fi
# Add host to known_hosts
log_info "Scanning host key..."
ssh-keyscan -T 10 "$HOST_IP" >> /root/.ssh/known_hosts 2>/dev/null < /dev/null || true
# Test native user SSH access
log_info "Testing SSH as $EFFECTIVE_NATIVE_USER..."
if ! sshpass -p "$EFFECTIVE_NATIVE_PASS" ssh -o StrictHostKeyChecking=no \
-o ConnectTimeout=10 \
-o PasswordAuthentication=yes \
"$EFFECTIVE_NATIVE_USER@$HOST_IP" "echo connected" < /dev/null &>/dev/null; then
log_error "Cannot SSH to $HOSTNAME ($HOST_IP) as $EFFECTIVE_NATIVE_USER — skipping"
FAILED_HOSTS+=("$HOSTNAME ($HOST_IP) — SSH connection failed")
((HOSTS_FAILED++)) || true
continue
fi
log_ok "SSH connection successful"
# Build remote script
REMOTE_SCRIPT=$(build_remote_script \
"$CLIENT_PUBKEY" \
"$MSP_PUBKEY" \
"$AGENT_USER" \
"$SKIP_SSHD")
REMOTE_SCRIPT_B64=$(echo "$REMOTE_SCRIPT" | base64 -w 0)
# Execute — if native user is root, run directly; otherwise use su -
log_info "Executing bootstrap..."
if [[ "$EFFECTIVE_NATIVE_USER" == "root" ]]; then
BOOTSTRAP_OUTPUT=$(sshpass -p "$EFFECTIVE_NATIVE_PASS" ssh \
-o StrictHostKeyChecking=no \
-o ConnectTimeout=10 \
-o PasswordAuthentication=yes \
"root@$HOST_IP" \
"echo $REMOTE_SCRIPT_B64 | base64 -d | bash" \
< /dev/null 2>&1) || {
log_error "Bootstrap script failed on $HOSTNAME"
echo "$BOOTSTRAP_OUTPUT" | sed 's/^/ /'
FAILED_HOSTS+=("$HOSTNAME ($HOST_IP) — bootstrap script failed")
((HOSTS_FAILED++)) || true
continue
}
else
log_info "Escalating via su - root..."
BOOTSTRAP_OUTPUT=$(sshpass -p "$EFFECTIVE_NATIVE_PASS" ssh \
-o StrictHostKeyChecking=no \
-o ConnectTimeout=10 \
-o PasswordAuthentication=yes \
"$EFFECTIVE_NATIVE_USER@$HOST_IP" \
"echo '$EFFECTIVE_ROOT_PASS' | su - root -c 'echo $REMOTE_SCRIPT_B64 | base64 -d | bash'" \
< /dev/null 2>&1) || {
log_error "Bootstrap script failed on $HOSTNAME"
echo "$BOOTSTRAP_OUTPUT" | sed 's/^/ /'
FAILED_HOSTS+=("$HOSTNAME ($HOST_IP) — bootstrap script failed")
((HOSTS_FAILED++)) || true
continue
}
fi
# Show remote output
echo "$BOOTSTRAP_OUTPUT" | grep "\[remote\]" | sed 's/^/ /'
# Verify key-based login works for agent user
log_info "Verifying key-based login for $AGENT_USER..."
CLIENT_PRIVKEY="${CLIENT_KEY_PATH%.pub}"
if [[ -f "$CLIENT_PRIVKEY" ]]; then
if ssh -i "$CLIENT_PRIVKEY" \
-o StrictHostKeyChecking=no \
-o ConnectTimeout=10 \
-o PasswordAuthentication=no \
"$AGENT_USER@$HOST_IP" "echo key-auth-ok" \
< /dev/null &>/dev/null; then
log_ok "Key-based login verified for $AGENT_USER"
else
log_warn "Key-based login test failed for $AGENT_USER — check manually"
fi
else
log_warn "Private key not found at $CLIENT_PRIVKEY — skipping login verification"
fi
log_ok "Bootstrap complete: $HOSTNAME"
((HOSTS_OK++)) || true
done < "$LINUX_HOSTS_FILE"
# ─── Summary ─────────────────────────────────────────────────────────────────
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo " Bootstrap Summary"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo " Total Linux hosts: $HOSTS_TOTAL"
echo -e " ${GREEN}Succeeded: $HOSTS_OK${NC}"
if [[ $HOSTS_FAILED -gt 0 ]]; then
echo -e " ${RED}Failed: $HOSTS_FAILED${NC}"
echo ""
echo " Failed hosts:"
for h in "${FAILED_HOSTS[@]}"; do
echo -e " ${RED}$h${NC}"
done
fi
if [[ $HOSTS_SKIPPED -gt 0 ]]; then
echo -e " ${YELLOW}Skipped (Windows): $HOSTS_SKIPPED${NC}"
fi
echo ""
if [[ $HOSTS_FAILED -gt 0 ]]; then
exit 1
fi