This commit captures the infrastructure state immediately following the "Post-Tyranny Tech" workshop on January 23rd, 2026. Infrastructure Status: - 13 client servers deployed (white, valk, zwaan, specht, das, uil, vos, haas, wolf, ree, mees, mus, mol, kikker) - Services: Authentik SSO, Nextcloud, Collabora Office, Traefik - Private network architecture with edge NAT gateway - OIDC integration between Authentik and Nextcloud - Automated recovery flows and invitation system - Container update monitoring with Diun - Uptime monitoring with Uptime Kuma Changes include: - Multiple new client host configurations - Network architecture improvements (private IPs + NAT) - DNS management automation - Container update notifications - Email configuration via Mailgun - SSH key generation for all clients - Encrypted secrets for all deployments - Health check and diagnostic scripts Known Issues to Address: - Nextcloud version pinned to v30 (should use 'latest' or v32) - Zitadel references in templates (migrated to Authentik but templates not updated) - Traefik dynamic config has obsolete static routes 🤖 Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
116 lines
3.4 KiB
Bash
Executable file
116 lines
3.4 KiB
Bash
Executable file
#!/bin/bash
|
|
# Health check script for client servers
|
|
# Usage: ./health-check.sh <client-name>
|
|
|
|
set -euo pipefail
|
|
|
|
CLIENT="${1:-}"
|
|
|
|
if [ -z "$CLIENT" ]; then
|
|
echo "Usage: $0 <client-name>"
|
|
echo "Example: $0 black"
|
|
exit 1
|
|
fi
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Get client IP
|
|
cd "$(dirname "$0")/../tofu"
|
|
IP=$(tofu output -json client_ips 2>/dev/null | jq -r ".$CLIENT" 2>/dev/null)
|
|
|
|
if [ -z "$IP" ] || [ "$IP" = "null" ]; then
|
|
echo -e "${RED}✗ ERROR: Client '$CLIENT' not found${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
echo "========================================"
|
|
echo "Health Check: $CLIENT ($IP)"
|
|
echo "========================================"
|
|
echo ""
|
|
|
|
# Container Status
|
|
echo "Container Status:"
|
|
echo "----------------"
|
|
ssh -i "../keys/ssh/$CLIENT" -o StrictHostKeyChecking=no root@$IP \
|
|
"docker ps --format 'table {{.Names}}\t{{.Status}}' | grep -E 'NAME|traefik|authentik|nextcloud|collabora|diun|redis|db'" 2>/dev/null || {
|
|
echo -e "${RED}✗ Cannot connect to server${NC}"
|
|
exit 1
|
|
}
|
|
echo ""
|
|
|
|
# Service URLs
|
|
echo "Service Accessibility:"
|
|
echo "---------------------"
|
|
|
|
# Authentik
|
|
AUTH_STATUS=$(curl -sI "https://auth.$CLIENT.vrije.cloud" 2>/dev/null | grep HTTP | awk '{print $2}')
|
|
if [ "$AUTH_STATUS" = "200" ] || [ "$AUTH_STATUS" = "302" ]; then
|
|
echo -e "Authentik: ${GREEN}✓ OK${NC} (HTTP $AUTH_STATUS)"
|
|
else
|
|
echo -e "Authentik: ${RED}✗ FAIL${NC} (HTTP ${AUTH_STATUS:-timeout})"
|
|
fi
|
|
|
|
# Nextcloud
|
|
NC_STATUS=$(curl -sI "https://nextcloud.$CLIENT.vrije.cloud" 2>/dev/null | grep HTTP | awk '{print $2}')
|
|
if [ "$NC_STATUS" = "200" ] || [ "$NC_STATUS" = "302" ]; then
|
|
echo -e "Nextcloud: ${GREEN}✓ OK${NC} (HTTP $NC_STATUS)"
|
|
else
|
|
echo -e "Nextcloud: ${RED}✗ FAIL${NC} (HTTP ${NC_STATUS:-timeout})"
|
|
fi
|
|
|
|
# Collabora
|
|
COLLAB_STATUS=$(curl -sI "https://office.$CLIENT.vrije.cloud" 2>/dev/null | grep HTTP | awk '{print $2}')
|
|
if [ "$COLLAB_STATUS" = "200" ]; then
|
|
echo -e "Collabora: ${GREEN}✓ OK${NC} (HTTP $COLLAB_STATUS)"
|
|
else
|
|
echo -e "Collabora: ${YELLOW}⚠ WARNING${NC} (HTTP ${COLLAB_STATUS:-timeout})"
|
|
fi
|
|
echo ""
|
|
|
|
# Disk Usage
|
|
echo "Disk Usage:"
|
|
echo "-----------"
|
|
DISK_USAGE=$(ssh -i "../keys/ssh/$CLIENT" -o StrictHostKeyChecking=no root@$IP \
|
|
"df -h /mnt/nextcloud-data 2>/dev/null | tail -1" || echo "N/A")
|
|
echo "$DISK_USAGE"
|
|
echo ""
|
|
|
|
# fail2ban
|
|
echo "Security (fail2ban):"
|
|
echo "--------------------"
|
|
BANNED=$(ssh -i "../keys/ssh/$CLIENT" -o StrictHostKeyChecking=no root@$IP \
|
|
"fail2ban-client status sshd 2>/dev/null | grep 'Currently banned'" || echo "N/A")
|
|
echo "$BANNED"
|
|
echo ""
|
|
|
|
# SSL Certificate Expiry
|
|
echo "SSL Certificate:"
|
|
echo "----------------"
|
|
CERT_EXPIRY=$(echo | openssl s_client -connect "auth.$CLIENT.vrije.cloud:443" 2>/dev/null | \
|
|
openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2)
|
|
if [ -n "$CERT_EXPIRY" ]; then
|
|
echo -e "Expires: ${GREEN}$CERT_EXPIRY${NC}"
|
|
else
|
|
echo -e "${RED}✗ Cannot retrieve certificate${NC}"
|
|
fi
|
|
echo ""
|
|
|
|
# Diun Status (if installed)
|
|
echo "Monitoring (Diun):"
|
|
echo "------------------"
|
|
DIUN_STATUS=$(ssh -i "../keys/ssh/$CLIENT" -o StrictHostKeyChecking=no root@$IP \
|
|
"docker ps --filter 'name=diun' --format '{{.Status}}' 2>/dev/null" || echo "Not installed")
|
|
if [ "$DIUN_STATUS" = "Not installed" ]; then
|
|
echo -e "${YELLOW}⚠ Diun not installed${NC}"
|
|
else
|
|
echo -e "${GREEN}✓ Diun: $DIUN_STATUS${NC}"
|
|
fi
|
|
echo ""
|
|
|
|
echo "========================================"
|
|
echo -e "${GREEN}Health check complete!${NC}"
|
|
echo "========================================"
|