Add pve/k8s-talos-safe-shutdown.sh

2026-04-18 23:39:40 +00:00
parent 24e5a1dc57
commit 9e87de6974
1 changed files with 278 additions and 0 deletions
--- a/pve/k8s-talos-safe-shutdown.sh
+++ b/pve/k8s-talos-safe-shutdown.sh
@@ -0,0 +1,278 @@
+#!/usr/bin/env bash
+# =============================================================================
+# k8s-talos-safe-shutdown.sh
+# Safe shutdown/reboot for Talos K8s cluster before powering off Proxmox host
+#
+# Usage (from Proxmox host):
+#   chmod +x k8s-talos-safe-shutdown.sh
+#   ./k8s-talos-safe-shutdown.sh           # graceful shutdown
+#   ./k8s-talos-safe-shutdown.sh --reboot   # graceful reboot
+#
+# Requirements on Proxmox host:
+#   - SSH key access to your cluster management node
+#   - talos + kubectl installed on that node
+#   - TALOSCONFIG / KUBECONFIG set on that node
+# =============================================================================
+
+set -euo pipefail
+
+# ─── Arguments ───────────────────────────────────────────────────────────────
+ACTION="shutdown"
+TEST_ONLY=false
+for arg in "$@"; do
+  case "$arg" in
+    --reboot) ACTION="reboot" ;;
+    --shutdown) ACTION="shutdown" ;;
+    --test) TEST_ONLY=true ;;
+    --help|-h)
+      echo "Usage: $0 [--shutdown|--reboot|--test]"
+      echo "  --shutdown  Gracefully stop cluster and power off host (default)"
+      echo "  --reboot    Gracefully stop cluster and reboot host"
+      echo "  --test      Check all connectivity only — no changes made"
+      exit 0 ;;
+    *) echo "Unknown argument: $arg"; exit 1 ;;
+  esac
+done
+
+# ─── Configuration ────────────────────────────────────────────────────────────
+MGMT_HOST="10.0.30.40"           # SSH target: your cluster mgmt node
+MGMT_USER="sysadmin"             # SSH user
+SSH_KEY="$HOME/.ssh/id_ed25519"  # SSH key on this Proxmox host
+
+# Talos node IPs (workers first, control plane last)
+WORKER_NODES=(
+  "10.0.30.30"   # worker-1
+  "10.0.30.31"   # worker-2
+  "10.0.30.32"   # worker-3
+)
+CONTROL_PLANE="10.0.30.21"      # control-plane node
+
+# How long to wait for pods to drain before forcing (seconds)
+DRAIN_TIMEOUT=120
+# How long to wait for Longhorn volumes to detach (seconds)
+LONGHORN_TIMEOUT=120
+# How long to wait for other VMs to shut down gracefully (seconds)
+VM_SHUTDOWN_TIMEOUT=120
+
+# ─── Colours ─────────────────────────────────────────────────────────────────
+RED='\033[0;31m'; YELLOW='\033[1;33m'; GREEN='\033[0;32m'
+CYAN='\033[0;36m'; BOLD='\033[1m'; RESET='\033[0m'
+
+log()  { echo -e "${CYAN}[$(date +%H:%M:%S)]${RESET} $*"; }
+ok()   { echo -e "${GREEN}[$(date +%H:%M:%S)] ✓${RESET} $*"; }
+warn() { echo -e "${YELLOW}[$(date +%H:%M:%S)] ⚠${RESET} $*"; }
+fail() { echo -e "${RED}[$(date +%H:%M:%S)] ✗${RESET} $*"; exit 1; }
+
+# Helper: run a command on the management node over SSH
+mgmt() { ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no "${MGMT_USER}@${MGMT_HOST}" "$@"; }
+
+# ─── Preflight ────────────────────────────────────────────────────────────────
+echo ""
+if [[ "$ACTION" == "reboot" ]]; then
+  echo -e "${BOLD}╔══════════════════════════════════════════════════════╗${RESET}"
+  echo -e "${BOLD}║        K8S SAFE REBOOT — Talos / Proxmox             ║${RESET}"
+  echo -e "${BOLD}╚══════════════════════════════════════════════════════╝${RESET}"
+elif [[ "$TEST_ONLY" == true ]]; then
+  echo -e "${BOLD}╔══════════════════════════════════════════════════════╗${RESET}"
+  echo -e "${BOLD}║        K8S CONNECTIVITY TEST — no changes            ║${RESET}"
+  echo -e "${BOLD}╚══════════════════════════════════════════════════════╝${RESET}"
+else
+  echo -e "${BOLD}╔══════════════════════════════════════════════════════╗${RESET}"
+  echo -e "${BOLD}║        K8S SAFE SHUTDOWN — Talos / Proxmox           ║${RESET}"
+  echo -e "${BOLD}╚══════════════════════════════════════════════════════╝${RESET}"
+fi
+echo ""
+
+if [[ "$TEST_ONLY" == true ]]; then
+  log "TEST MODE — checking connectivity only, nothing will be changed"
+  echo ""
+
+  log "① SSH to management node ($MGMT_USER@$MGMT_HOST)..."
+  if mgmt echo "ok" &>/dev/null; then
+    ok "  SSH connection successful"
+  else
+    fail "  Cannot SSH to $MGMT_HOST — check MGMT_HOST, MGMT_USER and SSH_KEY"
+  fi
+
+  log "② kubectl — cluster nodes..."
+  mgmt kubectl get nodes --no-headers | awk '{print $1, $2}' | while read name status; do
+    if [[ "$status" == "Ready" ]]; then
+      ok "  $name → $status"
+    else
+      warn "  $name → $status"
+    fi
+  done
+
+  log "③ talos — worker nodes..."
+  for node in "${WORKER_NODES[@]}"; do
+    if mgmt talosctl version --nodes "$node" --short &>/dev/null; then
+      ok "  talos → $node reachable"
+    else
+      warn "  talos → $node NOT reachable"
+    fi
+  done
+
+  log "④ talos — control plane ($CONTROL_PLANE)..."
+  if mgmt talosctl version --nodes "$CONTROL_PLANE" --short &>/dev/null; then
+    ok "  talos → $CONTROL_PLANE reachable"
+  else
+    warn "  talos → $CONTROL_PLANE NOT reachable"
+  fi
+
+  log "⑤ Longhorn volumes..."
+  ATTACHED=$(mgmt kubectl get volumes -n longhorn-system --no-headers 2>/dev/null | grep -c "attached" || true)
+  TOTAL=$(mgmt kubectl get volumes -n longhorn-system --no-headers 2>/dev/null | wc -l || true)
+  ok "  $ATTACHED/$TOTAL volumes currently attached"
+
+  log "⑥ Proxmox guests..."
+  VM_COUNT=$(qm list 2>/dev/null | awk 'NR>1 && $3=="running"' | wc -l || echo "0")
+  CT_COUNT=$(pct list 2>/dev/null | awk 'NR>1 && $2=="running"' | wc -l || echo "0")
+  ok "  $VM_COUNT running VMs, $CT_COUNT running CTs (excluding K8s nodes)"
+
+  echo ""
+  ok "Test complete — all checks passed. Ready to run with --shutdown or --reboot."
+  exit 0
+fi
+echo ""
+
+warn "This will gracefully stop your entire Kubernetes cluster."
+warn "All workloads will be stopped and volumes detached cleanly."
+warn "Then all other Proxmox VMs will be stopped before the host ${ACTION}s."
+echo ""
+read -rp "$(echo -e "${YELLOW}Action: ${ACTION^^} — Type YES to continue: ${RESET}")" CONFIRM
+[[ "$CONFIRM" == "YES" ]] || { echo "Aborted."; exit 0; }
+echo ""
+
+# ─── Step 1: Verify cluster is reachable ─────────────────────────────────────
+log "Step 1/7 — Checking cluster connectivity..."
+mgmt kubectl get nodes --no-headers | awk '{print $1, $2}' | while read name status; do
+  if [[ "$status" == "Ready" ]]; then
+    ok "  $name → $status"
+  else
+    warn "  $name → $status (not Ready — proceeding anyway)"
+  fi
+done
+echo ""
+
+# ─── Step 2: Suspend Flux reconciliation ─────────────────────────────────────
+log "Step 2/7 — Suspending Flux to prevent reconcile loops during shutdown..."
+mgmt kubectl get kustomizations -A --no-headers 2>/dev/null | while read ns name rest; do
+  mgmt flux suspend kustomization "$name" -n "$ns" 2>/dev/null && \
+    ok "  Suspended kustomization: $ns/$name" || \
+    warn "  Could not suspend $ns/$name (flux CLI may not be installed — skipping)"
+done || warn "  Flux not found or no kustomizations — skipping"
+echo ""
+
+# ─── Step 3: Cordon all worker nodes ─────────────────────────────────────────
+log "Step 3/7 — Cordoning all worker nodes (no new scheduling)..."
+for node in "${WORKER_NODES[@]}"; do
+  NODE_NAME=$(mgmt kubectl get nodes --no-headers -o custom-columns="NAME:.metadata.name,IP:.status.addresses[0].address" | grep "$node" | awk '{print $1}' || true)
+  if [[ -n "$NODE_NAME" ]]; then
+    mgmt kubectl cordon "$NODE_NAME" && ok "  Cordoned $NODE_NAME ($node)" || warn "  Could not cordon $NODE_NAME"
+  else
+    warn "  Could not find node for IP $node — skipping cordon"
+  fi
+done
+echo ""
+
+# ─── Step 4: Drain workloads ─────────────────────────────────────────────────
+log "Step 4/7 — Draining workloads from worker nodes (timeout: ${DRAIN_TIMEOUT}s)..."
+for node in "${WORKER_NODES[@]}"; do
+  NODE_NAME=$(mgmt kubectl get nodes --no-headers -o custom-columns="NAME:.metadata.name,IP:.status.addresses[0].address" | grep "$node" | awk '{print $1}' || true)
+  if [[ -n "$NODE_NAME" ]]; then
+    log "  Draining $NODE_NAME..."
+    mgmt kubectl drain "$NODE_NAME" \
+      --ignore-daemonsets \
+      --delete-emptydir-data \
+      --force \
+      --timeout="${DRAIN_TIMEOUT}s" \
+      --grace-period=30 && \
+      ok "  $NODE_NAME drained" || \
+      warn "  $NODE_NAME drain had warnings (DaemonSets left behind is normal)"
+  fi
+done
+echo ""
+
+# ─── Step 5: Wait for Longhorn volumes to detach ─────────────────────────────
+log "Step 5/7 — Waiting for Longhorn volumes to detach..."
+ELAPSED=0
+INTERVAL=10
+while true; do
+  ATTACHED=$(mgmt kubectl get volumes -n longhorn-system --no-headers 2>/dev/null | grep -c "attached" || true)
+  if [[ "$ATTACHED" -eq 0 ]]; then
+    ok "  All Longhorn volumes detached"
+    break
+  fi
+  if [[ "$ELAPSED" -ge "$LONGHORN_TIMEOUT" ]]; then
+    warn "  Timeout waiting for Longhorn — $ATTACHED volume(s) still attached"
+    warn "  Proceeding anyway (Longhorn will recover on next boot)"
+    break
+  fi
+  log "  $ATTACHED volume(s) still attached — waiting ${INTERVAL}s... (${ELAPSED}s elapsed)"
+  sleep "$INTERVAL"
+  ELAPSED=$((ELAPSED + INTERVAL))
+done
+echo ""
+
+# ─── Step 6: Shut down Talos nodes (workers first, then control plane) ────────
+log "Step 6/7 — Initiating Talos shutdown sequence..."
+log "  Shutting down worker nodes first..."
+for node in "${WORKER_NODES[@]}"; do
+  log "  Sending shutdown to $node..."
+  mgmt talosctl shutdown --nodes "$node" --force 2>/dev/null && \
+    ok "  $node shutdown initiated" || \
+    warn "  talos shutdown failed for $node — trying SSH poweroff"
+    # Fallback: direct SSH if talos fails
+    ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "root@$node" "poweroff" 2>/dev/null || true
+  sleep 5
+done
+
+log "  Waiting 30s for workers to power off before stopping control plane..."
+sleep 30
+
+log "  Shutting down control plane node ($CONTROL_PLANE)..."
+mgmt talosctl shutdown --nodes "$CONTROL_PLANE" --force 2>/dev/null && \
+  ok "  Control plane shutdown initiated" || \
+  warn "  talos shutdown failed for control plane"
+echo ""
+
+# ─── Step 7: Stop remaining Proxmox VMs/CTs, then host action ────────────────
+log "Step 7/7 — Stopping remaining Proxmox VMs and containers..."
+
+# Shut down all still-running VMs (in parallel)
+RUNNING_VMS=$(qm list 2>/dev/null | awk 'NR>1 && $3=="running" {print $1}' || true)
+RUNNING_CTS=$(pct list 2>/dev/null | awk 'NR>1 && $2=="running" {print $1}' || true)
+
+if [[ -n "$RUNNING_VMS" ]]; then
+  for vmid in $RUNNING_VMS; do
+    log "  Stopping VM $vmid..."
+    qm shutdown "$vmid" --timeout "$VM_SHUTDOWN_TIMEOUT" &
+  done
+else
+  ok "  No running VMs found"
+fi
+
+if [[ -n "$RUNNING_CTS" ]]; then
+  for ctid in $RUNNING_CTS; do
+    log "  Stopping CT $ctid..."
+    pct shutdown "$ctid" --timeout "$VM_SHUTDOWN_TIMEOUT" &
+  done
+else
+  ok "  No running CTs found"
+fi
+
+log "  Waiting for all guests to stop (up to ${VM_SHUTDOWN_TIMEOUT}s)..."
+wait
+ok "  All guests stopped"
+echo ""
+
+# ─── Final action ─────────────────────────────────────────────────────────────
+if [[ "$ACTION" == "reboot" ]]; then
+  ok "All done! Rebooting host now."
+  echo ""
+  shutdown -r now
+else
+  ok "All done! Powering off host now."
+  echo ""
+  shutdown -h now
+fi