#!/usr/bin/env bash # ============================================================================= # k8s-safe-shutdown.sh # Safe shutdown/reboot for Talos K8s cluster before powering off Proxmox host # # Usage (from Proxmox host): # chmod +x k8s-safe-shutdown.sh # ./k8s-safe-shutdown.sh # graceful shutdown # ./k8s-safe-shutdown.sh --reboot # graceful reboot # # Requirements on Proxmox host: # - SSH key access to your cluster management node # - talosctl + kubectl installed on that node # (talos is an alias to a function talosctl --talosconfig ~/.config/talosconfig -n "${TALOS_DEFAULT_NODE:-10.0.30.21}" "${args[@]}" ) # - TALOSCONFIG / KUBECONFIG set on that node # ============================================================================= set -euo pipefail # ─── Arguments ─────────────────────────────────────────────────────────────── ACTION="shutdown" TEST_ONLY=false for arg in "$@"; do case "$arg" in --reboot) ACTION="reboot" ;; --shutdown) ACTION="shutdown" ;; --test) TEST_ONLY=true ;; --help|-h) echo "Usage: $0 [--shutdown|--reboot|--test]" echo " --shutdown Gracefully stop cluster and power off host (default)" echo " --reboot Gracefully stop cluster and reboot host" echo " --test Check all connectivity only — no changes made" exit 0 ;; *) echo "Unknown argument: $arg"; exit 1 ;; esac done # ─── Configuration ──────────────────────────────────────────────────────────── MGMT_HOST="10.0.30.40" # SSH target: your cluster mgmt node / jumpbox MGMT_USER="sysadmin" # SSH user SSH_KEY="$HOME/.ssh/id_ed25519" # SSH key on this Proxmox host SSH_PORT="3333" # Talos node IPs (workers first, control plane last) WORKER_NODES=( "10.0.30.30" # worker-1 "10.0.30.31" # worker-2 "10.0.30.32" # worker-3 ) CONTROL_PLANE="10.0.30.21" # control-plane node # How long to wait for pods to drain before forcing (seconds) DRAIN_TIMEOUT=120 # How long to wait for Longhorn volumes to detach (seconds) LONGHORN_TIMEOUT=120 # How long to wait for other VMs to shut down gracefully (seconds) VM_SHUTDOWN_TIMEOUT=120 # ─── Colours ───────────────────────────────────────────────────────────────── RED='\033[0;31m'; YELLOW='\033[1;33m'; GREEN='\033[0;32m' CYAN='\033[0;36m'; BOLD='\033[1m'; RESET='\033[0m' log() { echo -e "${CYAN}[$(date +%H:%M:%S)]${RESET} $*"; } ok() { echo -e "${GREEN}[$(date +%H:%M:%S)] ✓${RESET} $*"; } warn() { echo -e "${YELLOW}[$(date +%H:%M:%S)] ⚠${RESET} $*"; } fail() { echo -e "${RED}[$(date +%H:%M:%S)] ✗${RESET} $*"; exit 1; } # Helper: run a command on the management node over SSH mgmt() { ssh -i "$SSH_KEY" -p "$SSH_PORT" -o StrictHostKeyChecking=no "${MGMT_USER}@${MGMT_HOST}" "$@"; } # ─── Preflight ──────────────────────────────────────────────────────────────── echo "" if [[ "$ACTION" == "reboot" ]]; then echo -e "${BOLD}╔══════════════════════════════════════════════════════╗${RESET}" echo -e "${BOLD}║ K8S SAFE REBOOT — Talos / Proxmox ║${RESET}" echo -e "${BOLD}╚══════════════════════════════════════════════════════╝${RESET}" elif [[ "$TEST_ONLY" == true ]]; then echo -e "${BOLD}╔══════════════════════════════════════════════════════╗${RESET}" echo -e "${BOLD}║ K8S CONNECTIVITY TEST — no changes ║${RESET}" echo -e "${BOLD}╚══════════════════════════════════════════════════════╝${RESET}" else echo -e "${BOLD}╔══════════════════════════════════════════════════════╗${RESET}" echo -e "${BOLD}║ K8S SAFE SHUTDOWN — Talos / Proxmox ║${RESET}" echo -e "${BOLD}╚══════════════════════════════════════════════════════╝${RESET}" fi echo "" if [[ "$TEST_ONLY" == true ]]; then log "TEST MODE — checking connectivity only, nothing will be changed" echo "" log "① SSH to management node ($MGMT_USER@$MGMT_HOST)..." if mgmt echo "ok" &>/dev/null; then ok " SSH connection successful" else fail " Cannot SSH to $MGMT_HOST — check MGMT_HOST, MGMT_USER and SSH_KEY" fi log "② kubectl — cluster nodes..." mgmt kubectl get nodes --no-headers | awk '{print $1, $2}' | while read name status; do if [[ "$status" == "Ready" ]]; then ok " $name → $status" else warn " $name → $status" fi done log "③ talosctl — worker nodes..." for node in "${WORKER_NODES[@]}"; do if mgmt talos version --nodes "$node" --short &>/dev/null; then ok " talosctl → $node reachable" else warn " talosctl → $node NOT reachable" fi done log "④ talosctl — control plane ($CONTROL_PLANE)..." if mgmt talos version --nodes "$CONTROL_PLANE" --short &>/dev/null; then ok " talosctl → $CONTROL_PLANE reachable" else warn " talosctl → $CONTROL_PLANE NOT reachable" fi log "⑤ Longhorn volumes..." ATTACHED=$(mgmt kubectl get volumes -n longhorn-system --no-headers 2>/dev/null | grep -c "attached" || true) TOTAL=$(mgmt kubectl get volumes -n longhorn-system --no-headers 2>/dev/null | wc -l || true) ok " $ATTACHED/$TOTAL volumes currently attached" log "⑥ Proxmox guests..." VM_COUNT=$(qm list 2>/dev/null | awk 'NR>1 && $3=="running"' | wc -l || echo "0") CT_COUNT=$(pct list 2>/dev/null | awk 'NR>1 && $2=="running"' | wc -l || echo "0") ok " $VM_COUNT running VMs, $CT_COUNT running CTs (excluding K8s nodes)" echo "" ok "Test complete — all checks passed. Ready to run with --shutdown or --reboot." exit 0 fi echo "" warn "This will gracefully stop your entire Kubernetes cluster." warn "All workloads will be stopped and volumes detached cleanly." warn "Then all other Proxmox VMs will be stopped before the host ${ACTION}s." echo "" read -rp "$(echo -e "${YELLOW}Action: ${ACTION^^} — Type YES to continue: ${RESET}")" CONFIRM [[ "$CONFIRM" == "YES" ]] || { echo "Aborted."; exit 0; } echo "" # ─── Step 1: Verify cluster is reachable ───────────────────────────────────── log "Step 1/7 — Checking cluster connectivity..." mgmt kubectl get nodes --no-headers | awk '{print $1, $2}' | while read name status; do if [[ "$status" == "Ready" ]]; then ok " $name → $status" else warn " $name → $status (not Ready — proceeding anyway)" fi done echo "" # ─── Step 2: Suspend Flux reconciliation ───────────────────────────────────── log "Step 2/7 — Suspending Flux to prevent reconcile loops during shutdown..." mgmt kubectl get kustomizations -A --no-headers 2>/dev/null | while read ns name rest; do mgmt flux suspend kustomization "$name" -n "$ns" 2>/dev/null && \ ok " Suspended kustomization: $ns/$name" || \ warn " Could not suspend $ns/$name (flux CLI may not be installed — skipping)" done || warn " Flux not found or no kustomizations — skipping" echo "" # ─── Step 3: Cordon all worker nodes ───────────────────────────────────────── log "Step 3/7 — Cordoning all worker nodes (no new scheduling)..." for node in "${WORKER_NODES[@]}"; do NODE_NAME=$(mgmt kubectl get nodes --no-headers -o custom-columns="NAME:.metadata.name,IP:.status.addresses[0].address" | grep "$node" | awk '{print $1}' || true) if [[ -n "$NODE_NAME" ]]; then mgmt kubectl cordon "$NODE_NAME" && ok " Cordoned $NODE_NAME ($node)" || warn " Could not cordon $NODE_NAME" else warn " Could not find node for IP $node — skipping cordon" fi done echo "" # ─── Helper: nuke all Longhorn PDBs ────────────────────────────────────────── delete_longhorn_pdbs() { local PDBS PDBS=$(mgmt kubectl get pdb -n longhorn-system --no-headers -o custom-columns="NAME:.metadata.name" 2>/dev/null || true) if [[ -n "$PDBS" ]]; then echo "$PDBS" | while read pdb; do mgmt kubectl delete pdb "$pdb" -n longhorn-system 2>/dev/null && \ ok " Deleted PDB: $pdb" || true done fi } # ─── Step 4: Drain workloads ───────────────────────────────────────────────── log "Step 4/7 — Draining workloads from worker nodes (timeout: ${DRAIN_TIMEOUT}s)..." for node in "${WORKER_NODES[@]}"; do NODE_NAME=$(mgmt kubectl get nodes --no-headers -o custom-columns="NAME:.metadata.name,IP:.status.addresses[0].address" \ | grep "$node" | awk '{print $1}' || true) if [[ -n "$NODE_NAME" ]]; then log " Clearing Longhorn PDBs before draining $NODE_NAME..." delete_longhorn_pdbs log " Draining $NODE_NAME..." # Run drain in background, keep killing PDBs while it works mgmt kubectl drain "$NODE_NAME" \ --ignore-daemonsets \ --delete-emptydir-data \ --force \ --timeout="${DRAIN_TIMEOUT}s" \ --grace-period=30 & DRAIN_PID=$! # Poll and delete any new PDBs while drain is running while kill -0 "$DRAIN_PID" 2>/dev/null; do sleep 4 delete_longhorn_pdbs done wait "$DRAIN_PID" && \ ok " $NODE_NAME drained" || \ warn " $NODE_NAME drain had warnings (DaemonSets left behind is normal)" else warn " Could not find node for IP $node — skipping" fi done echo "" # ─── Step 5: Wait for Longhorn volumes to detach ───────────────────────────── log "Step 5/7 — Waiting for Longhorn volumes to detach..." ELAPSED=0 INTERVAL=10 while true; do ATTACHED=$(mgmt kubectl get volumes -n longhorn-system --no-headers 2>/dev/null | grep -c "attached" || true) if [[ "$ATTACHED" -eq 0 ]]; then ok " All Longhorn volumes detached" break fi if [[ "$ELAPSED" -ge "$LONGHORN_TIMEOUT" ]]; then warn " Timeout waiting for Longhorn — $ATTACHED volume(s) still attached" warn " Proceeding anyway (Longhorn will recover on next boot)" break fi log " $ATTACHED volume(s) still attached — waiting ${INTERVAL}s... (${ELAPSED}s elapsed)" sleep "$INTERVAL" ELAPSED=$((ELAPSED + INTERVAL)) done echo "" # ─── Helper: wait for a Talos node to disappear from kubectl ───────────────── wait_for_node_gone() { local node_ip="$1" local timeout=120 local elapsed=0 local interval=5 while true; do local status status=$(mgmt kubectl get nodes --no-headers -o custom-columns="NAME:.metadata.name,IP:.status.addresses[0].address" \ 2>/dev/null | grep "$node_ip" | awk '{print $1}' || true) if [[ -z "$status" ]]; then ok " $node_ip — node gone from cluster" return 0 fi local ready ready=$(mgmt kubectl get nodes --no-headers 2>/dev/null | grep "$status" | awk '{print $2}' || true) if [[ "$ready" == "NotReady" ]]; then ok " $node_ip — node is NotReady (safe to proceed)" return 0 fi if [[ "$elapsed" -ge "$timeout" ]]; then warn " $node_ip — still Ready after ${timeout}s, proceeding anyway" return 0 fi log " $node_ip — waiting for NotReady... (${elapsed}s)" sleep "$interval" elapsed=$((elapsed + interval)) done } # ─── Step 6: Shut down Talos nodes ─────────────────────────────────────────── log "Step 6/7 — Initiating Talos shutdown sequence..." log " Shutting down worker nodes first..." for node in "${WORKER_NODES[@]}"; do log " Sending shutdown to $node..." if mgmt talos shutdown --nodes "$node" --force --wait=false 2>/dev/null; then ok " $node shutdown initiated" else warn " talosctl shutdown failed for $node — trying SSH poweroff" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "root@$node" "poweroff" 2>/dev/null || true fi # Wait for the node to actually go NotReady before moving to the next one wait_for_node_gone "$node" sleep 5 done log " Waiting 15s for workers to fully power off before stopping control plane..." sleep 15 log " Shutting down control plane node ($CONTROL_PLANE)..." if mgmt talos shutdown --nodes "$CONTROL_PLANE" --force --wait=false 2>/dev/null; then ok " Control plane shutdown initiated" else warn " talosctl shutdown failed for control plane — trying SSH poweroff" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "root@$CONTROL_PLANE" "poweroff" 2>/dev/null || true fi wait_for_node_gone "$CONTROL_PLANE" echo "" # ─── Step 7: Stop remaining Proxmox VMs/CTs, then host action ──────────────── log "Step 7/7 — Stopping remaining Proxmox VMs and containers..." # Shut down all still-running VMs (in parallel) RUNNING_VMS=$(qm list 2>/dev/null | awk 'NR>1 && $3=="running" {print $1}' || true) RUNNING_CTS=$(pct list 2>/dev/null | awk 'NR>1 && $2=="running" {print $1}' || true) if [[ -n "$RUNNING_VMS" ]]; then for vmid in $RUNNING_VMS; do log " Stopping VM $vmid..." qm shutdown "$vmid" --timeout "$VM_SHUTDOWN_TIMEOUT" & done else ok " No running VMs found" fi if [[ -n "$RUNNING_CTS" ]]; then for ctid in $RUNNING_CTS; do log " Stopping CT $ctid..." pct shutdown "$ctid" --timeout "$VM_SHUTDOWN_TIMEOUT" & done else ok " No running CTs found" fi log " Waiting for all guests to stop (up to ${VM_SHUTDOWN_TIMEOUT}s)..." wait ok " All guests stopped" echo "" # ─── Final action ───────────────────────────────────────────────────────────── if [[ "$ACTION" == "reboot" ]]; then ok "All done! Rebooting host now." echo "" shutdown -r now else ok "All done! Powering off host now." echo "" shutdown -h now fi