350 lines
15 KiB
Bash
350 lines
15 KiB
Bash
#!/usr/bin/env bash
|
|
# =============================================================================
|
|
# k8s-safe-shutdown.sh
|
|
# Safe shutdown/reboot for Talos K8s cluster before powering off Proxmox host
|
|
#
|
|
# Usage (from Proxmox host):
|
|
# chmod +x k8s-safe-shutdown.sh
|
|
# ./k8s-safe-shutdown.sh # graceful shutdown
|
|
# ./k8s-safe-shutdown.sh --reboot # graceful reboot
|
|
#
|
|
# Requirements on Proxmox host:
|
|
# - SSH key access to your cluster management node
|
|
# - talosctl + kubectl installed on that node
|
|
# (talos is an alias to a function talosctl --talosconfig ~/.config/talosconfig -n "${TALOS_DEFAULT_NODE:-10.0.30.21}" "${args[@]}" )
|
|
# - TALOSCONFIG / KUBECONFIG set on that node
|
|
# =============================================================================
|
|
|
|
set -euo pipefail
|
|
|
|
# ─── Arguments ───────────────────────────────────────────────────────────────
|
|
ACTION="shutdown"
|
|
TEST_ONLY=false
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
--reboot) ACTION="reboot" ;;
|
|
--shutdown) ACTION="shutdown" ;;
|
|
--test) TEST_ONLY=true ;;
|
|
--help|-h)
|
|
echo "Usage: $0 [--shutdown|--reboot|--test]"
|
|
echo " --shutdown Gracefully stop cluster and power off host (default)"
|
|
echo " --reboot Gracefully stop cluster and reboot host"
|
|
echo " --test Check all connectivity only — no changes made"
|
|
exit 0 ;;
|
|
*) echo "Unknown argument: $arg"; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
# ─── Configuration ────────────────────────────────────────────────────────────
|
|
MGMT_HOST="10.0.30.40" # SSH target: your cluster mgmt node / jumpbox
|
|
MGMT_USER="sysadmin" # SSH user
|
|
SSH_KEY="$HOME/.ssh/id_ed25519" # SSH key on this Proxmox host
|
|
SSH_PORT="3333"
|
|
|
|
# Talos node IPs (workers first, control plane last)
|
|
WORKER_NODES=(
|
|
"10.0.30.30" # worker-1
|
|
"10.0.30.31" # worker-2
|
|
"10.0.30.32" # worker-3
|
|
)
|
|
CONTROL_PLANE="10.0.30.21" # control-plane node
|
|
|
|
# How long to wait for pods to drain before forcing (seconds)
|
|
DRAIN_TIMEOUT=120
|
|
# How long to wait for Longhorn volumes to detach (seconds)
|
|
LONGHORN_TIMEOUT=120
|
|
# How long to wait for other VMs to shut down gracefully (seconds)
|
|
VM_SHUTDOWN_TIMEOUT=120
|
|
|
|
# ─── Colours ─────────────────────────────────────────────────────────────────
|
|
RED='\033[0;31m'; YELLOW='\033[1;33m'; GREEN='\033[0;32m'
|
|
CYAN='\033[0;36m'; BOLD='\033[1m'; RESET='\033[0m'
|
|
|
|
log() { echo -e "${CYAN}[$(date +%H:%M:%S)]${RESET} $*"; }
|
|
ok() { echo -e "${GREEN}[$(date +%H:%M:%S)] ✓${RESET} $*"; }
|
|
warn() { echo -e "${YELLOW}[$(date +%H:%M:%S)] ⚠${RESET} $*"; }
|
|
fail() { echo -e "${RED}[$(date +%H:%M:%S)] ✗${RESET} $*"; exit 1; }
|
|
|
|
# Helper: run a command on the management node over SSH
|
|
mgmt() { ssh -i "$SSH_KEY" -p "$SSH_PORT" -o StrictHostKeyChecking=no "${MGMT_USER}@${MGMT_HOST}" "$@"; }
|
|
|
|
# ─── Preflight ────────────────────────────────────────────────────────────────
|
|
echo ""
|
|
if [[ "$ACTION" == "reboot" ]]; then
|
|
echo -e "${BOLD}╔══════════════════════════════════════════════════════╗${RESET}"
|
|
echo -e "${BOLD}║ K8S SAFE REBOOT — Talos / Proxmox ║${RESET}"
|
|
echo -e "${BOLD}╚══════════════════════════════════════════════════════╝${RESET}"
|
|
elif [[ "$TEST_ONLY" == true ]]; then
|
|
echo -e "${BOLD}╔══════════════════════════════════════════════════════╗${RESET}"
|
|
echo -e "${BOLD}║ K8S CONNECTIVITY TEST — no changes ║${RESET}"
|
|
echo -e "${BOLD}╚══════════════════════════════════════════════════════╝${RESET}"
|
|
else
|
|
echo -e "${BOLD}╔══════════════════════════════════════════════════════╗${RESET}"
|
|
echo -e "${BOLD}║ K8S SAFE SHUTDOWN — Talos / Proxmox ║${RESET}"
|
|
echo -e "${BOLD}╚══════════════════════════════════════════════════════╝${RESET}"
|
|
fi
|
|
echo ""
|
|
|
|
if [[ "$TEST_ONLY" == true ]]; then
|
|
log "TEST MODE — checking connectivity only, nothing will be changed"
|
|
echo ""
|
|
|
|
log "① SSH to management node ($MGMT_USER@$MGMT_HOST)..."
|
|
if mgmt echo "ok" &>/dev/null; then
|
|
ok " SSH connection successful"
|
|
else
|
|
fail " Cannot SSH to $MGMT_HOST — check MGMT_HOST, MGMT_USER and SSH_KEY"
|
|
fi
|
|
|
|
log "② kubectl — cluster nodes..."
|
|
mgmt kubectl get nodes --no-headers | awk '{print $1, $2}' | while read name status; do
|
|
if [[ "$status" == "Ready" ]]; then
|
|
ok " $name → $status"
|
|
else
|
|
warn " $name → $status"
|
|
fi
|
|
done
|
|
|
|
log "③ talosctl — worker nodes..."
|
|
for node in "${WORKER_NODES[@]}"; do
|
|
if mgmt talos version --nodes "$node" --short &>/dev/null; then
|
|
ok " talosctl → $node reachable"
|
|
else
|
|
warn " talosctl → $node NOT reachable"
|
|
fi
|
|
done
|
|
|
|
log "④ talosctl — control plane ($CONTROL_PLANE)..."
|
|
if mgmt talos version --nodes "$CONTROL_PLANE" --short &>/dev/null; then
|
|
ok " talosctl → $CONTROL_PLANE reachable"
|
|
else
|
|
warn " talosctl → $CONTROL_PLANE NOT reachable"
|
|
fi
|
|
|
|
log "⑤ Longhorn volumes..."
|
|
ATTACHED=$(mgmt kubectl get volumes -n longhorn-system --no-headers 2>/dev/null | grep -c "attached" || true)
|
|
TOTAL=$(mgmt kubectl get volumes -n longhorn-system --no-headers 2>/dev/null | wc -l || true)
|
|
ok " $ATTACHED/$TOTAL volumes currently attached"
|
|
|
|
log "⑥ Proxmox guests..."
|
|
VM_COUNT=$(qm list 2>/dev/null | awk 'NR>1 && $3=="running"' | wc -l || echo "0")
|
|
CT_COUNT=$(pct list 2>/dev/null | awk 'NR>1 && $2=="running"' | wc -l || echo "0")
|
|
ok " $VM_COUNT running VMs, $CT_COUNT running CTs (excluding K8s nodes)"
|
|
|
|
echo ""
|
|
ok "Test complete — all checks passed. Ready to run with --shutdown or --reboot."
|
|
exit 0
|
|
fi
|
|
echo ""
|
|
|
|
warn "This will gracefully stop your entire Kubernetes cluster."
|
|
warn "All workloads will be stopped and volumes detached cleanly."
|
|
warn "Then all other Proxmox VMs will be stopped before the host ${ACTION}s."
|
|
echo ""
|
|
read -rp "$(echo -e "${YELLOW}Action: ${ACTION^^} — Type YES to continue: ${RESET}")" CONFIRM
|
|
[[ "$CONFIRM" == "YES" ]] || { echo "Aborted."; exit 0; }
|
|
echo ""
|
|
|
|
# ─── Step 1: Verify cluster is reachable ─────────────────────────────────────
|
|
log "Step 1/7 — Checking cluster connectivity..."
|
|
mgmt kubectl get nodes --no-headers | awk '{print $1, $2}' | while read name status; do
|
|
if [[ "$status" == "Ready" ]]; then
|
|
ok " $name → $status"
|
|
else
|
|
warn " $name → $status (not Ready — proceeding anyway)"
|
|
fi
|
|
done
|
|
echo ""
|
|
|
|
# ─── Step 2: Suspend Flux reconciliation ─────────────────────────────────────
|
|
log "Step 2/7 — Suspending Flux to prevent reconcile loops during shutdown..."
|
|
mgmt kubectl get kustomizations -A --no-headers 2>/dev/null | while read ns name rest; do
|
|
mgmt flux suspend kustomization "$name" -n "$ns" 2>/dev/null && \
|
|
ok " Suspended kustomization: $ns/$name" || \
|
|
warn " Could not suspend $ns/$name (flux CLI may not be installed — skipping)"
|
|
done || warn " Flux not found or no kustomizations — skipping"
|
|
echo ""
|
|
|
|
# ─── Step 3: Cordon all worker nodes ─────────────────────────────────────────
|
|
log "Step 3/7 — Cordoning all worker nodes (no new scheduling)..."
|
|
for node in "${WORKER_NODES[@]}"; do
|
|
NODE_NAME=$(mgmt kubectl get nodes --no-headers -o custom-columns="NAME:.metadata.name,IP:.status.addresses[0].address" | grep "$node" | awk '{print $1}' || true)
|
|
if [[ -n "$NODE_NAME" ]]; then
|
|
mgmt kubectl cordon "$NODE_NAME" && ok " Cordoned $NODE_NAME ($node)" || warn " Could not cordon $NODE_NAME"
|
|
else
|
|
warn " Could not find node for IP $node — skipping cordon"
|
|
fi
|
|
done
|
|
echo ""
|
|
|
|
# ─── Helper: nuke all Longhorn PDBs ──────────────────────────────────────────
|
|
delete_longhorn_pdbs() {
|
|
local PDBS
|
|
PDBS=$(mgmt kubectl get pdb -n longhorn-system --no-headers -o custom-columns="NAME:.metadata.name" 2>/dev/null || true)
|
|
if [[ -n "$PDBS" ]]; then
|
|
echo "$PDBS" | while read pdb; do
|
|
mgmt kubectl delete pdb "$pdb" -n longhorn-system 2>/dev/null && \
|
|
ok " Deleted PDB: $pdb" || true
|
|
done
|
|
fi
|
|
}
|
|
|
|
# ─── Step 4: Drain workloads ─────────────────────────────────────────────────
|
|
log "Step 4/7 — Draining workloads from worker nodes (timeout: ${DRAIN_TIMEOUT}s)..."
|
|
for node in "${WORKER_NODES[@]}"; do
|
|
NODE_NAME=$(mgmt kubectl get nodes --no-headers -o custom-columns="NAME:.metadata.name,IP:.status.addresses[0].address" \
|
|
| grep "$node" | awk '{print $1}' || true)
|
|
if [[ -n "$NODE_NAME" ]]; then
|
|
log " Clearing Longhorn PDBs before draining $NODE_NAME..."
|
|
delete_longhorn_pdbs
|
|
|
|
log " Draining $NODE_NAME..."
|
|
# Run drain in background, keep killing PDBs while it works
|
|
mgmt kubectl drain "$NODE_NAME" \
|
|
--ignore-daemonsets \
|
|
--delete-emptydir-data \
|
|
--force \
|
|
--timeout="${DRAIN_TIMEOUT}s" \
|
|
--grace-period=30 &
|
|
DRAIN_PID=$!
|
|
|
|
# Poll and delete any new PDBs while drain is running
|
|
while kill -0 "$DRAIN_PID" 2>/dev/null; do
|
|
sleep 4
|
|
delete_longhorn_pdbs
|
|
done
|
|
|
|
wait "$DRAIN_PID" && \
|
|
ok " $NODE_NAME drained" || \
|
|
warn " $NODE_NAME drain had warnings (DaemonSets left behind is normal)"
|
|
else
|
|
warn " Could not find node for IP $node — skipping"
|
|
fi
|
|
done
|
|
echo ""
|
|
|
|
# ─── Step 5: Wait for Longhorn volumes to detach ─────────────────────────────
|
|
log "Step 5/7 — Waiting for Longhorn volumes to detach..."
|
|
ELAPSED=0
|
|
INTERVAL=10
|
|
while true; do
|
|
ATTACHED=$(mgmt kubectl get volumes -n longhorn-system --no-headers 2>/dev/null | grep -c "attached" || true)
|
|
if [[ "$ATTACHED" -eq 0 ]]; then
|
|
ok " All Longhorn volumes detached"
|
|
break
|
|
fi
|
|
if [[ "$ELAPSED" -ge "$LONGHORN_TIMEOUT" ]]; then
|
|
warn " Timeout waiting for Longhorn — $ATTACHED volume(s) still attached"
|
|
warn " Proceeding anyway (Longhorn will recover on next boot)"
|
|
break
|
|
fi
|
|
log " $ATTACHED volume(s) still attached — waiting ${INTERVAL}s... (${ELAPSED}s elapsed)"
|
|
sleep "$INTERVAL"
|
|
ELAPSED=$((ELAPSED + INTERVAL))
|
|
done
|
|
echo ""
|
|
|
|
|
|
# ─── Helper: wait for a Talos node to disappear from kubectl ─────────────────
|
|
wait_for_node_gone() {
|
|
local node_ip="$1"
|
|
local timeout=120
|
|
local elapsed=0
|
|
local interval=5
|
|
while true; do
|
|
local status
|
|
status=$(mgmt kubectl get nodes --no-headers -o custom-columns="NAME:.metadata.name,IP:.status.addresses[0].address" \
|
|
2>/dev/null | grep "$node_ip" | awk '{print $1}' || true)
|
|
if [[ -z "$status" ]]; then
|
|
ok " $node_ip — node gone from cluster"
|
|
return 0
|
|
fi
|
|
local ready
|
|
ready=$(mgmt kubectl get nodes --no-headers 2>/dev/null | grep "$status" | awk '{print $2}' || true)
|
|
if [[ "$ready" == "NotReady" ]]; then
|
|
ok " $node_ip — node is NotReady (safe to proceed)"
|
|
return 0
|
|
fi
|
|
if [[ "$elapsed" -ge "$timeout" ]]; then
|
|
warn " $node_ip — still Ready after ${timeout}s, proceeding anyway"
|
|
return 0
|
|
fi
|
|
log " $node_ip — waiting for NotReady... (${elapsed}s)"
|
|
sleep "$interval"
|
|
elapsed=$((elapsed + interval))
|
|
done
|
|
}
|
|
|
|
# ─── Step 6: Shut down Talos nodes ───────────────────────────────────────────
|
|
log "Step 6/7 — Initiating Talos shutdown sequence..."
|
|
log " Shutting down worker nodes first..."
|
|
for node in "${WORKER_NODES[@]}"; do
|
|
log " Sending shutdown to $node..."
|
|
|
|
if mgmt talos shutdown --nodes "$node" --force --wait=false 2>/dev/null; then
|
|
ok " $node shutdown initiated"
|
|
else
|
|
warn " talosctl shutdown failed for $node — trying SSH poweroff"
|
|
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "root@$node" "poweroff" 2>/dev/null || true
|
|
fi
|
|
|
|
# Wait for the node to actually go NotReady before moving to the next one
|
|
wait_for_node_gone "$node"
|
|
sleep 5
|
|
done
|
|
|
|
log " Waiting 15s for workers to fully power off before stopping control plane..."
|
|
sleep 15
|
|
|
|
log " Shutting down control plane node ($CONTROL_PLANE)..."
|
|
if mgmt talos shutdown --nodes "$CONTROL_PLANE" --force --wait=false 2>/dev/null; then
|
|
ok " Control plane shutdown initiated"
|
|
else
|
|
warn " talosctl shutdown failed for control plane — trying SSH poweroff"
|
|
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "root@$CONTROL_PLANE" "poweroff" 2>/dev/null || true
|
|
fi
|
|
wait_for_node_gone "$CONTROL_PLANE"
|
|
echo ""
|
|
|
|
|
|
|
|
# ─── Step 7: Stop remaining Proxmox VMs/CTs, then host action ────────────────
|
|
log "Step 7/7 — Stopping remaining Proxmox VMs and containers..."
|
|
|
|
# Shut down all still-running VMs (in parallel)
|
|
RUNNING_VMS=$(qm list 2>/dev/null | awk 'NR>1 && $3=="running" {print $1}' || true)
|
|
RUNNING_CTS=$(pct list 2>/dev/null | awk 'NR>1 && $2=="running" {print $1}' || true)
|
|
|
|
if [[ -n "$RUNNING_VMS" ]]; then
|
|
for vmid in $RUNNING_VMS; do
|
|
log " Stopping VM $vmid..."
|
|
qm shutdown "$vmid" --timeout "$VM_SHUTDOWN_TIMEOUT" &
|
|
done
|
|
else
|
|
ok " No running VMs found"
|
|
fi
|
|
|
|
if [[ -n "$RUNNING_CTS" ]]; then
|
|
for ctid in $RUNNING_CTS; do
|
|
log " Stopping CT $ctid..."
|
|
pct shutdown "$ctid" --timeout "$VM_SHUTDOWN_TIMEOUT" &
|
|
done
|
|
else
|
|
ok " No running CTs found"
|
|
fi
|
|
|
|
log " Waiting for all guests to stop (up to ${VM_SHUTDOWN_TIMEOUT}s)..."
|
|
wait
|
|
ok " All guests stopped"
|
|
echo ""
|
|
|
|
# ─── Final action ─────────────────────────────────────────────────────────────
|
|
if [[ "$ACTION" == "reboot" ]]; then
|
|
ok "All done! Rebooting host now."
|
|
echo ""
|
|
shutdown -r now
|
|
else
|
|
ok "All done! Powering off host now."
|
|
echo ""
|
|
shutdown -h now
|
|
fi |