From bf4659f6622bfcbae04f33563f8841ea11d56555 Mon Sep 17 00:00:00 2001 From: Pieter Date: Sat, 17 Jan 2026 20:24:53 +0100 Subject: [PATCH] feat: Implement client registry system (issue #12) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comprehensive client registry for tracking all deployed infrastructure: Registry System: - Single source of truth in clients/registry.yml - Tracks status, server specs, versions, maintenance history - Supports canary deployment workflow - Automatic updates via deployment scripts New Scripts: - scripts/list-clients.sh: List/filter clients (table/json/csv/summary) - scripts/client-status.sh: Detailed client info with health checks - scripts/update-registry.sh: Manual registry updates Updated Scripts: - scripts/deploy-client.sh: Auto-updates registry on deploy - scripts/rebuild-client.sh: Auto-updates registry on rebuild - scripts/destroy-client.sh: Marks clients as destroyed Documentation: - docs/client-registry.md: Complete registry reference - clients/README.md: Quick start guide Status tracking: pending → deployed → maintenance → destroyed Role support: canary (dev) and production clients 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- clients/README.md | 97 +++++++++++ clients/registry.yml | 89 ++++++++++ docs/client-registry.md | 325 +++++++++++++++++++++++++++++++++++++ scripts/client-status.sh | 237 +++++++++++++++++++++++++++ scripts/deploy-client.sh | 35 +++- scripts/destroy-client.sh | 11 +- scripts/list-clients.sh | 231 ++++++++++++++++++++++++++ scripts/rebuild-client.sh | 37 ++++- scripts/update-registry.sh | 183 +++++++++++++++++++++ 9 files changed, 1237 insertions(+), 8 deletions(-) create mode 100644 clients/README.md create mode 100644 clients/registry.yml create mode 100644 docs/client-registry.md create mode 100755 scripts/client-status.sh create mode 100755 scripts/list-clients.sh create mode 100755 scripts/update-registry.sh diff --git a/clients/README.md b/clients/README.md new file mode 100644 index 0000000..b6433c5 --- /dev/null +++ b/clients/README.md @@ -0,0 +1,97 @@ +# Client Registry + +This directory contains the client registry system for tracking all deployed infrastructure. + +## Files + +- **[registry.yml](registry.yml)** - Single source of truth for all clients + - Deployment status and lifecycle + - Server specifications + - Application versions + - Maintenance history + - Access URLs + +## Management Scripts + +All scripts are located in [`../scripts/`](../scripts/): + +### View Clients + +```bash +# List all clients +../scripts/list-clients.sh + +# Filter by status +../scripts/list-clients.sh --status=deployed + +# Filter by role +../scripts/list-clients.sh --role=canary + +# Different formats +../scripts/list-clients.sh --format=table # Default +../scripts/list-clients.sh --format=json # JSON +../scripts/list-clients.sh --format=csv # CSV export +../scripts/list-clients.sh --format=summary # Statistics +``` + +### View Client Details + +```bash +# Show detailed status with live health checks +../scripts/client-status.sh +``` + +### Update Registry + +The registry is **automatically updated** by deployment scripts: +- `deploy-client.sh` - Creates/updates entry on deployment +- `rebuild-client.sh` - Updates entry on rebuild +- `destroy-client.sh` - Marks as destroyed + +For manual updates: +```bash +../scripts/update-registry.sh [options] +``` + +## Registry Structure + +Each client entry tracks: +- **Status**: `pending` → `deployed` → `maintenance` → `offboarding` → `destroyed` +- **Role**: `canary` (testing) or `production` (live) +- **Server**: Type, location, IP, Hetzner ID +- **Apps**: Installed applications +- **Versions**: Application and OS versions +- **Maintenance**: Update and backup history +- **URLs**: Access endpoints +- **Notes**: Operational documentation + +## Canary Deployment + +The `dev` client has role `canary` and is used for testing: + +```bash +# 1. Test on canary first +../scripts/deploy-client.sh dev + +# 2. Verify it works +../scripts/client-status.sh dev + +# 3. Roll out to production +for client in $(../scripts/list-clients.sh --role=production --format=csv | tail -n +2 | cut -d, -f1); do + ../scripts/rebuild-client.sh "$client" +done +``` + +## Documentation + +See [docs/client-registry.md](../docs/client-registry.md) for: +- Complete registry structure reference +- Management script usage +- Best practices +- Integration examples +- Troubleshooting guide + +## Requirements + +- **yq**: YAML processor (`brew install yq`) +- **jq**: JSON processor (`brew install jq`) diff --git a/clients/registry.yml b/clients/registry.yml new file mode 100644 index 0000000..6e09c6c --- /dev/null +++ b/clients/registry.yml @@ -0,0 +1,89 @@ +# Client Registry +# +# Single source of truth for all clients in the infrastructure. +# This file tracks client lifecycle, deployment state, and versions. +# +# Status values: +# - pending: Client configuration created, not yet deployed +# - deployed: Client is live and operational +# - maintenance: Under maintenance, may be temporarily unavailable +# - offboarding: Being decommissioned +# - destroyed: Infrastructure removed, secrets archived +# +# Role values: +# - canary: Used for testing updates before production rollout +# - production: Live client serving real users + +clients: + dev: + status: deployed + role: canary + deployed_date: 2026-01-17 + destroyed_date: null + + server: + type: cpx22 # 3 vCPU, 4 GB RAM, 80 GB SSD + location: fsn1 # Falkenstein, Germany + ip: 78.47.191.38 + id: "117714358" # Hetzner server ID + + apps: + - authentik + - nextcloud + + versions: + authentik: "2025.10.3" + nextcloud: "30.0.17" + traefik: "v3.0" + ubuntu: "24.04" + + maintenance: + last_full_update: 2026-01-17 + last_security_patch: 2026-01-17 + last_os_update: 2026-01-17 + last_backup_verified: null + + urls: + authentik: "https://auth.dev.vrije.cloud" + nextcloud: "https://nextcloud.dev.vrije.cloud" + + notes: | + Canary/test server. Used for testing updates before production rollout. + Server was recreated on 2026-01-17 for per-client SSH key implementation. + +# Add new clients here as they are deployed +# Template: +# +# clientname: +# status: deployed +# role: production +# deployed_date: YYYY-MM-DD +# destroyed_date: null +# +# server: +# type: cx22 +# location: nbg1 +# ip: 1.2.3.4 +# id: "12345678" +# +# apps: +# - authentik +# - nextcloud +# +# versions: +# authentik: "2025.10.3" +# nextcloud: "30.0.17" +# traefik: "v3.0" +# ubuntu: "24.04" +# +# maintenance: +# last_full_update: YYYY-MM-DD +# last_security_patch: YYYY-MM-DD +# last_os_update: YYYY-MM-DD +# last_backup_verified: null +# +# urls: +# authentik: "https://auth.clientname.vrije.cloud" +# nextcloud: "https://nextcloud.clientname.vrije.cloud" +# +# notes: "" diff --git a/docs/client-registry.md b/docs/client-registry.md new file mode 100644 index 0000000..aaf1532 --- /dev/null +++ b/docs/client-registry.md @@ -0,0 +1,325 @@ +# Client Registry + +The client registry is the single source of truth for tracking all deployed clients, their configuration, status, and maintenance history. + +## Overview + +The registry is stored in [`clients/registry.yml`](../clients/registry.yml) and tracks: +- Deployment status and lifecycle +- Server specifications and location +- Installed applications and versions +- Maintenance history +- Access URLs +- Operational notes + +## Registry Structure + +```yaml +clients: + clientname: + status: deployed # pending | deployed | maintenance | offboarding | destroyed + role: production # canary | production + deployed_date: 2026-01-17 + destroyed_date: null + + server: + type: cx22 # Hetzner server type + location: nbg1 # Data center location + ip: 1.2.3.4 + id: "12345678" # Hetzner server ID + + apps: + - authentik + - nextcloud + + versions: + authentik: "2025.10.3" + nextcloud: "30.0.17" + traefik: "v3.0" + ubuntu: "24.04" + + maintenance: + last_full_update: 2026-01-17 + last_security_patch: 2026-01-17 + last_os_update: 2026-01-17 + last_backup_verified: null + + urls: + authentik: "https://auth.clientname.vrije.cloud" + nextcloud: "https://nextcloud.clientname.vrije.cloud" + + notes: "" +``` + +## Status Values + +- **pending**: Client configuration created, not yet deployed +- **deployed**: Client is live and operational +- **maintenance**: Under maintenance, may be temporarily unavailable +- **offboarding**: Being decommissioned +- **destroyed**: Infrastructure removed, secrets archived + +## Role Values + +- **canary**: Used for testing updates before production rollout (e.g., `dev`) +- **production**: Live client serving real users + +## Management Scripts + +### List All Clients + +```bash +# List all clients in table format +./scripts/list-clients.sh + +# Filter by status +./scripts/list-clients.sh --status=deployed +./scripts/list-clients.sh --status=destroyed + +# Filter by role +./scripts/list-clients.sh --role=canary +./scripts/list-clients.sh --role=production + +# Different output formats +./scripts/list-clients.sh --format=table # Default, colorized table +./scripts/list-clients.sh --format=json # JSON output +./scripts/list-clients.sh --format=csv # CSV export +./scripts/list-clients.sh --format=summary # Summary statistics +``` + +### View Client Details + +```bash +# Show detailed status for a specific client +./scripts/client-status.sh dev + +# Includes: +# - Deployment status and metadata +# - Server specifications +# - Application versions +# - Maintenance history +# - Access URLs +# - Live health checks (if deployed) +``` + +### Update Registry Manually + +```bash +# Mark client as deployed +./scripts/update-registry.sh myclient deploy \ + --role=production \ + --server-ip=1.2.3.4 \ + --server-id=12345678 \ + --server-type=cx22 \ + --server-location=nbg1 + +# Mark client as destroyed +./scripts/update-registry.sh myclient destroy + +# Update status +./scripts/update-registry.sh myclient status --status=maintenance +``` + +## Automatic Updates + +The registry is **automatically updated** by deployment scripts: + +### Deploy Script + +When running `./scripts/deploy-client.sh myclient`: +1. Creates registry entry if doesn't exist +2. Sets status to `deployed` +3. Records server details from OpenTofu state +4. Sets deployment date +5. Initializes maintenance tracking + +### Rebuild Script + +When running `./scripts/rebuild-client.sh myclient`: +1. Updates existing registry entry +2. Refreshes server details (IP, ID may change) +3. Updates `last_full_update` date +4. Maintains historical data + +### Destroy Script + +When running `./scripts/destroy-client.sh myclient`: +1. Sets status to `destroyed` +2. Records destruction date +3. Preserves all historical data +4. Keeps entry for audit trail + +## Canary Deployment Workflow + +The registry supports canary deployments for safe rollouts: + +```bash +# 1. Test on canary server first +./scripts/deploy-client.sh dev + +# 2. Verify canary is working +./scripts/client-status.sh dev + +# 3. If successful, roll out to production +./scripts/list-clients.sh --role=production | while read client; do + ./scripts/rebuild-client.sh "$client" +done +``` + +## Best Practices + +### 1. Always Review Registry Before Changes + +```bash +# Check current state +./scripts/list-clients.sh + +# Review specific client +./scripts/client-status.sh myclient +``` + +### 2. Use Status Field for Coordination + +Mark clients as `maintenance` before disruptive changes: + +```bash +./scripts/update-registry.sh myclient status --status=maintenance +# Perform maintenance... +./scripts/update-registry.sh myclient status --status=deployed +``` + +### 3. Track Maintenance History + +Update maintenance fields after significant operations: + +```bash +# After security patches +yq eval -i ".clients.myclient.maintenance.last_security_patch = \"$(date +%Y-%m-%d)\"" clients/registry.yml + +# After OS updates +yq eval -i ".clients.myclient.maintenance.last_os_update = \"$(date +%Y-%m-%d)\"" clients/registry.yml + +# After backup verification +yq eval -i ".clients.myclient.maintenance.last_backup_verified = \"$(date +%Y-%m-%d)\"" clients/registry.yml +``` + +### 4. Add Operational Notes + +Document important events: + +```bash +yq eval -i ".clients.myclient.notes = \"Upgraded to Nextcloud 31 on 2026-01-20. Migration successful.\"" clients/registry.yml +``` + +### 5. Export for Reporting + +```bash +# Generate CSV report for management +./scripts/list-clients.sh --format=csv > reports/clients-$(date +%Y%m%d).csv + +# Get summary statistics +./scripts/list-clients.sh --format=summary +``` + +## Version Control + +The registry is **version controlled** in Git: + +- All changes are tracked +- Audit trail of client lifecycle +- Easy rollback if needed +- Collaborative management + +Always commit registry changes: + +```bash +git add clients/registry.yml +git commit -m "chore: Update client registry after deployment" +git push +``` + +## Querying with yq + +For advanced queries, use `yq` directly: + +```bash +# Find all deployed clients +yq eval '.clients | to_entries | map(select(.value.status == "deployed")) | .[].key' clients/registry.yml + +# Find canary clients +yq eval '.clients | to_entries | map(select(.value.role == "canary")) | .[].key' clients/registry.yml + +# Get all IPs +yq eval '.clients | to_entries | .[] | "\(.key): \(.value.server.ip)"' clients/registry.yml + +# Find clients needing updates (no update in 30+ days) +# (requires date arithmetic with external tools) +``` + +## Integration with Monitoring + +The registry can feed into monitoring systems: + +```bash +# Export as JSON for consumption by monitoring tools +./scripts/list-clients.sh --format=json > /var/monitoring/clients.json + +# Check health of all deployed clients +for client in $(./scripts/list-clients.sh --status=deployed --format=csv | tail -n +2 | cut -d, -f1); do + ./scripts/client-status.sh "$client" +done +``` + +## Troubleshooting + +### Registry Out of Sync + +If registry doesn't match reality: + +```bash +# Get actual state from OpenTofu +cd tofu +tofu state list + +# Get actual server details +tofu state show 'hcloud_server.client["myclient"]' + +# Update registry manually +./scripts/update-registry.sh myclient deploy \ + --server-ip= \ + --server-id= +``` + +### Missing Registry Entry + +If a client exists but not in registry: + +```bash +# Create entry manually +./scripts/update-registry.sh myclient deploy + +# Or rebuild to auto-create +./scripts/rebuild-client.sh myclient +``` + +### Corrupted Registry File + +If YAML is invalid: + +```bash +# Check syntax +yq eval . clients/registry.yml + +# Restore from Git +git checkout clients/registry.yml + +# Or restore from backup +cp clients/registry.yml.backup clients/registry.yml +``` + +## Related Documentation + +- [SSH Key Management](ssh-key-management.md) - Per-client SSH keys +- [Secrets Management](../secrets/clients/README.md) - SOPS-encrypted secrets +- [Deployment Guide](deployment.md) - Full deployment procedures +- [Maintenance Guide](maintenance.md) - Update and patching procedures diff --git a/scripts/client-status.sh b/scripts/client-status.sh new file mode 100755 index 0000000..4bf8a49 --- /dev/null +++ b/scripts/client-status.sh @@ -0,0 +1,237 @@ +#!/usr/bin/env bash +# +# Show detailed status for a specific client +# +# Usage: ./scripts/client-status.sh +# +# Displays: +# - Deployment status and metadata +# - Server information +# - Application versions +# - Maintenance history +# - URLs and access information +# - Live health checks (optional) + +set -euo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +NC='\033[0m' # No Color + +# Script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" +REGISTRY_FILE="$PROJECT_ROOT/clients/registry.yml" + +# Check arguments +if [ $# -ne 1 ]; then + echo -e "${RED}Error: Client name required${NC}" + echo "Usage: $0 " + echo "" + echo "Example: $0 dev" + exit 1 +fi + +CLIENT_NAME="$1" + +# Check if yq is available +if ! command -v yq &> /dev/null; then + echo -e "${RED}Error: 'yq' not found. Install with: brew install yq${NC}" + exit 1 +fi + +# Check if registry exists +if [ ! -f "$REGISTRY_FILE" ]; then + echo -e "${RED}Error: Registry file not found: $REGISTRY_FILE${NC}" + exit 1 +fi + +# Check if client exists +if yq eval ".clients.\"$CLIENT_NAME\"" "$REGISTRY_FILE" | grep -q "null"; then + echo -e "${RED}Error: Client '$CLIENT_NAME' not found in registry${NC}" + echo "" + echo "Available clients:" + yq eval '.clients | keys | .[]' "$REGISTRY_FILE" + exit 1 +fi + +# Extract client information +STATUS=$(yq eval ".clients.\"$CLIENT_NAME\".status" "$REGISTRY_FILE") +ROLE=$(yq eval ".clients.\"$CLIENT_NAME\".role" "$REGISTRY_FILE") +DEPLOYED_DATE=$(yq eval ".clients.\"$CLIENT_NAME\".deployed_date" "$REGISTRY_FILE") +DESTROYED_DATE=$(yq eval ".clients.\"$CLIENT_NAME\".destroyed_date" "$REGISTRY_FILE") + +SERVER_TYPE=$(yq eval ".clients.\"$CLIENT_NAME\".server.type" "$REGISTRY_FILE") +SERVER_LOCATION=$(yq eval ".clients.\"$CLIENT_NAME\".server.location" "$REGISTRY_FILE") +SERVER_IP=$(yq eval ".clients.\"$CLIENT_NAME\".server.ip" "$REGISTRY_FILE") +SERVER_ID=$(yq eval ".clients.\"$CLIENT_NAME\".server.id" "$REGISTRY_FILE") + +APPS=$(yq eval ".clients.\"$CLIENT_NAME\".apps | join(\", \")" "$REGISTRY_FILE") + +AUTHENTIK_VERSION=$(yq eval ".clients.\"$CLIENT_NAME\".versions.authentik" "$REGISTRY_FILE") +NEXTCLOUD_VERSION=$(yq eval ".clients.\"$CLIENT_NAME\".versions.nextcloud" "$REGISTRY_FILE") +TRAEFIK_VERSION=$(yq eval ".clients.\"$CLIENT_NAME\".versions.traefik" "$REGISTRY_FILE") +UBUNTU_VERSION=$(yq eval ".clients.\"$CLIENT_NAME\".versions.ubuntu" "$REGISTRY_FILE") + +LAST_FULL_UPDATE=$(yq eval ".clients.\"$CLIENT_NAME\".maintenance.last_full_update" "$REGISTRY_FILE") +LAST_SECURITY_PATCH=$(yq eval ".clients.\"$CLIENT_NAME\".maintenance.last_security_patch" "$REGISTRY_FILE") +LAST_OS_UPDATE=$(yq eval ".clients.\"$CLIENT_NAME\".maintenance.last_os_update" "$REGISTRY_FILE") +LAST_BACKUP_VERIFIED=$(yq eval ".clients.\"$CLIENT_NAME\".maintenance.last_backup_verified" "$REGISTRY_FILE") + +AUTHENTIK_URL=$(yq eval ".clients.\"$CLIENT_NAME\".urls.authentik" "$REGISTRY_FILE") +NEXTCLOUD_URL=$(yq eval ".clients.\"$CLIENT_NAME\".urls.nextcloud" "$REGISTRY_FILE") + +NOTES=$(yq eval ".clients.\"$CLIENT_NAME\".notes" "$REGISTRY_FILE") + +# Display header +echo -e "${BLUE}═══════════════════════════════════════════════════════════${NC}" +echo -e "${BLUE} CLIENT STATUS: $CLIENT_NAME${NC}" +echo -e "${BLUE}═══════════════════════════════════════════════════════════${NC}" +echo "" + +# Status section +echo -e "${CYAN}━━━ Deployment Status ━━━${NC}" +echo "" + +# Color status +STATUS_COLOR=$NC +case $STATUS in + deployed) STATUS_COLOR=$GREEN ;; + pending) STATUS_COLOR=$YELLOW ;; + maintenance) STATUS_COLOR=$CYAN ;; + offboarding) STATUS_COLOR=$RED ;; + destroyed) STATUS_COLOR=$RED ;; +esac + +# Color role +ROLE_COLOR=$NC +case $ROLE in + canary) ROLE_COLOR=$YELLOW ;; + production) ROLE_COLOR=$GREEN ;; +esac + +echo -e "Status: ${STATUS_COLOR}$STATUS${NC}" +echo -e "Role: ${ROLE_COLOR}$ROLE${NC}" +echo -e "Deployed: $DEPLOYED_DATE" +if [ "$DESTROYED_DATE" != "null" ]; then + echo -e "Destroyed: ${RED}$DESTROYED_DATE${NC}" +fi +echo "" + +# Server section +echo -e "${CYAN}━━━ Server Information ━━━${NC}" +echo "" +echo -e "Server Type: $SERVER_TYPE" +echo -e "Location: $SERVER_LOCATION" +echo -e "IP Address: $SERVER_IP" +echo -e "Server ID: $SERVER_ID" +echo "" + +# Applications section +echo -e "${CYAN}━━━ Applications ━━━${NC}" +echo "" +echo -e "Installed: $APPS" +echo "" + +# Versions section +echo -e "${CYAN}━━━ Versions ━━━${NC}" +echo "" +echo -e "Authentik: $AUTHENTIK_VERSION" +echo -e "Nextcloud: $NEXTCLOUD_VERSION" +echo -e "Traefik: $TRAEFIK_VERSION" +echo -e "Ubuntu: $UBUNTU_VERSION" +echo "" + +# Maintenance section +echo -e "${CYAN}━━━ Maintenance History ━━━${NC}" +echo "" +echo -e "Last Full Update: $LAST_FULL_UPDATE" +echo -e "Last Security Patch: $LAST_SECURITY_PATCH" +echo -e "Last OS Update: $LAST_OS_UPDATE" +if [ "$LAST_BACKUP_VERIFIED" != "null" ]; then + echo -e "Last Backup Verified: $LAST_BACKUP_VERIFIED" +else + echo -e "Last Backup Verified: ${YELLOW}Never${NC}" +fi +echo "" + +# URLs section +echo -e "${CYAN}━━━ Access URLs ━━━${NC}" +echo "" +echo -e "Authentik: $AUTHENTIK_URL" +echo -e "Nextcloud: $NEXTCLOUD_URL" +echo "" + +# Notes section +if [ "$NOTES" != "null" ] && [ -n "$NOTES" ]; then + echo -e "${CYAN}━━━ Notes ━━━${NC}" + echo "" + echo "$NOTES" | sed 's/^/ /' + echo "" +fi + +# Live health check (if server is deployed and reachable) +if [ "$STATUS" = "deployed" ]; then + echo -e "${CYAN}━━━ Live Health Check ━━━${NC}" + echo "" + + # Check if server is reachable via SSH (if Ansible is configured) + if command -v ansible &> /dev/null && [ -n "${HCLOUD_TOKEN:-}" ]; then + cd "$PROJECT_ROOT/ansible" + if timeout 10 ~/.local/bin/ansible -i hcloud.yml "$CLIENT_NAME" -m ping -o &>/dev/null; then + echo -e "SSH Access: ${GREEN}✓ Reachable${NC}" + + # Get Docker status + DOCKER_STATUS=$(~/.local/bin/ansible -i hcloud.yml "$CLIENT_NAME" -m shell -a "docker ps --format '{{.Names}}' 2>/dev/null | wc -l" -o 2>/dev/null | tail -1 | awk '{print $NF}' || echo "0") + if [ "$DOCKER_STATUS" != "0" ]; then + echo -e "Docker: ${GREEN}✓ Running ($DOCKER_STATUS containers)${NC}" + else + echo -e "Docker: ${RED}✗ No containers running${NC}" + fi + else + echo -e "SSH Access: ${RED}✗ Not reachable${NC}" + fi + else + echo -e "${YELLOW}Note: Install Ansible and set HCLOUD_TOKEN for live health checks${NC}" + fi + + echo "" + + # Check HTTPS endpoints + echo -e "HTTPS Endpoints:" + + # Check Authentik + if command -v curl &> /dev/null; then + if timeout 10 curl -sSf -o /dev/null "$AUTHENTIK_URL" 2>/dev/null; then + echo -e " Authentik: ${GREEN}✓ Responding${NC}" + else + echo -e " Authentik: ${RED}�� Not responding${NC}" + fi + + # Check Nextcloud + if timeout 10 curl -sSf -o /dev/null "$NEXTCLOUD_URL" 2>/dev/null; then + echo -e " Nextcloud: ${GREEN}✓ Responding${NC}" + else + echo -e " Nextcloud: ${RED}✗ Not responding${NC}" + fi + else + echo -e " ${YELLOW}Install curl for endpoint checks${NC}" + fi + + echo "" +fi + +# Management commands section +echo -e "${CYAN}━━━ Management Commands ━━━${NC}" +echo "" +echo -e "View secrets: ${BLUE}sops secrets/clients/${CLIENT_NAME}.sops.yaml${NC}" +echo -e "Rebuild server: ${BLUE}./scripts/rebuild-client.sh $CLIENT_NAME${NC}" +echo -e "Destroy server: ${BLUE}./scripts/destroy-client.sh $CLIENT_NAME${NC}" +echo -e "List all: ${BLUE}./scripts/list-clients.sh${NC}" +echo "" + +echo -e "${BLUE}═══════════════════════════════════════════════════════════${NC}" diff --git a/scripts/deploy-client.sh b/scripts/deploy-client.sh index e131dca..22360d8 100755 --- a/scripts/deploy-client.sh +++ b/scripts/deploy-client.sh @@ -139,7 +139,7 @@ echo -e "${BLUE}========================================${NC}" echo "" # Step 1: Provision infrastructure -echo -e "${YELLOW}[1/3] Provisioning infrastructure with OpenTofu...${NC}" +echo -e "${YELLOW}[1/4] Provisioning infrastructure with OpenTofu...${NC}" cd "$PROJECT_ROOT/tofu" @@ -163,7 +163,7 @@ fi echo "" # Step 2: Setup base system -echo -e "${YELLOW}[2/3] Setting up base system (Docker, Traefik)...${NC}" +echo -e "${YELLOW}[2/4] Setting up base system (Docker, Traefik)...${NC}" cd "$PROJECT_ROOT/ansible" @@ -174,7 +174,7 @@ echo -e "${GREEN}✓ Base system configured${NC}" echo "" # Step 3: Deploy applications -echo -e "${YELLOW}[3/3] Deploying applications (Authentik, Nextcloud, SSO)...${NC}" +echo -e "${YELLOW}[3/4] Deploying applications (Authentik, Nextcloud, SSO)...${NC}" ~/.local/bin/ansible-playbook -i hcloud.yml playbooks/deploy.yml --limit "$CLIENT_NAME" @@ -182,6 +182,35 @@ echo "" echo -e "${GREEN}✓ Applications deployed${NC}" echo "" +# Step 4: Update client registry +echo -e "${YELLOW}[4/4] Updating client registry...${NC}" + +cd "$PROJECT_ROOT/tofu" + +# Get server information from Terraform state +SERVER_IP=$(tofu output -json client_ips 2>/dev/null | jq -r ".\"$CLIENT_NAME\"" || echo "") +SERVER_ID=$(tofu state show "hcloud_server.client[\"$CLIENT_NAME\"]" 2>/dev/null | grep "^[[:space:]]*id[[:space:]]*=" | awk '{print $3}' | tr -d '"' || echo "") +SERVER_TYPE=$(tofu state show "hcloud_server.client[\"$CLIENT_NAME\"]" 2>/dev/null | grep "^[[:space:]]*server_type[[:space:]]*=" | awk '{print $3}' | tr -d '"' || echo "") +SERVER_LOCATION=$(tofu state show "hcloud_server.client[\"$CLIENT_NAME\"]" 2>/dev/null | grep "^[[:space:]]*location[[:space:]]*=" | awk '{print $3}' | tr -d '"' || echo "") + +# Determine role (dev is canary, everything else is production by default) +ROLE="production" +if [ "$CLIENT_NAME" = "dev" ]; then + ROLE="canary" +fi + +# Update registry +"$SCRIPT_DIR/update-registry.sh" "$CLIENT_NAME" deploy \ + --role="$ROLE" \ + --server-ip="$SERVER_IP" \ + --server-id="$SERVER_ID" \ + --server-type="$SERVER_TYPE" \ + --server-location="$SERVER_LOCATION" + +echo "" +echo -e "${GREEN}✓ Registry updated${NC}" +echo "" + # Calculate duration END_TIME=$(date +%s) DURATION=$((END_TIME - START_TIME)) diff --git a/scripts/destroy-client.sh b/scripts/destroy-client.sh index 00dcc80..9f2d2b9 100755 --- a/scripts/destroy-client.sh +++ b/scripts/destroy-client.sh @@ -113,7 +113,7 @@ fi echo "" # Step 3: Destroy infrastructure with OpenTofu -echo -e "${YELLOW}[3/3] Destroying infrastructure with OpenTofu...${NC}" +echo -e "${YELLOW}[3/4] Destroying infrastructure with OpenTofu...${NC}" cd "$PROJECT_ROOT/tofu" @@ -128,6 +128,15 @@ tofu apply destroy.tfplan # Cleanup plan file rm -f destroy.tfplan +echo "" + +# Step 4: Update client registry +echo -e "${YELLOW}[4/4] Updating client registry...${NC}" + +"$SCRIPT_DIR/update-registry.sh" "$CLIENT_NAME" destroy + +echo "" +echo -e "${GREEN}✓ Registry updated${NC}" echo "" echo -e "${GREEN}========================================${NC}" echo -e "${GREEN}✓ Client '$CLIENT_NAME' destroyed successfully${NC}" diff --git a/scripts/list-clients.sh b/scripts/list-clients.sh new file mode 100755 index 0000000..628a197 --- /dev/null +++ b/scripts/list-clients.sh @@ -0,0 +1,231 @@ +#!/usr/bin/env bash +# +# List all clients from the registry +# +# Usage: ./scripts/list-clients.sh [--status=] [--role=] [--format=] +# +# Options: +# --status= Filter by status (deployed, pending, maintenance, offboarding, destroyed) +# --role= Filter by role (canary, production) +# --format= Output format: table (default), json, csv, summary +# +# Examples: +# ./scripts/list-clients.sh # List all clients +# ./scripts/list-clients.sh --status=deployed # Only deployed clients +# ./scripts/list-clients.sh --role=production # Only production clients +# ./scripts/list-clients.sh --format=json # JSON output + +set -euo pipefail + +# Script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" +REGISTRY_FILE="$PROJECT_ROOT/clients/registry.yml" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +NC='\033[0m' # No Color + +# Parse arguments +FILTER_STATUS="" +FILTER_ROLE="" +FORMAT="table" + +for arg in "$@"; do + case $arg in + --status=*) + FILTER_STATUS="${arg#*=}" + ;; + --role=*) + FILTER_ROLE="${arg#*=}" + ;; + --format=*) + FORMAT="${arg#*=}" + ;; + --help|-h) + echo "Usage: $0 [--status=] [--role=] [--format=]" + echo "" + echo "Options:" + echo " --status= Filter by status (deployed, pending, maintenance, offboarding, destroyed)" + echo " --role= Filter by role (canary, production)" + echo " --format= Output format: table (default), json, csv, summary" + exit 0 + ;; + esac +done + +# Check if registry exists +if [ ! -f "$REGISTRY_FILE" ]; then + echo -e "${RED}Error: Registry file not found: $REGISTRY_FILE${NC}" + exit 1 +fi + +# Check if yq is available (for YAML parsing) +if ! command -v yq &> /dev/null; then + echo -e "${YELLOW}Warning: 'yq' not found. Install with: brew install yq${NC}" + echo "Falling back to basic grep parsing..." + USE_YQ=false +else + USE_YQ=true +fi + +# Function to get clients using yq +list_clients_yq() { + local clients=$(yq eval '.clients | keys | .[]' "$REGISTRY_FILE") + + for client in $clients; do + local status=$(yq eval ".clients.\"$client\".status" "$REGISTRY_FILE") + local role=$(yq eval ".clients.\"$client\".role" "$REGISTRY_FILE") + + # Apply filters + if [ -n "$FILTER_STATUS" ] && [ "$status" != "$FILTER_STATUS" ]; then + continue + fi + if [ -n "$FILTER_ROLE" ] && [ "$role" != "$FILTER_ROLE" ]; then + continue + fi + + # Get other fields + local deployed_date=$(yq eval ".clients.\"$client\".deployed_date" "$REGISTRY_FILE") + local server_ip=$(yq eval ".clients.\"$client\".server.ip" "$REGISTRY_FILE") + local server_type=$(yq eval ".clients.\"$client\".server.type" "$REGISTRY_FILE") + local apps=$(yq eval ".clients.\"$client\".apps | join(\", \")" "$REGISTRY_FILE") + + echo "$client|$status|$role|$deployed_date|$server_type|$server_ip|$apps" + done +} + +# Function to output in table format +output_table() { + echo -e "${BLUE}╔════════════════════════════════════════════════════════════════════════════════════╗${NC}" + echo -e "${BLUE}║ CLIENT REGISTRY ║${NC}" + echo -e "${BLUE}╠════════════════════════════════════════════════════════════════════════════════════╣${NC}" + + printf "${CYAN}%-15s ${GREEN}%-12s ${YELLOW}%-10s ${NC}%-12s %-10s %-15s %-20s\n" \ + "CLIENT" "STATUS" "ROLE" "DEPLOYED" "TYPE" "IP" "APPS" + echo -e "${BLUE}────────────────────────────────────────────────────────────────────────────────────${NC}" + + local count=0 + while IFS='|' read -r client status role deployed_date server_type server_ip apps; do + # Color status + local status_color=$NC + case $status in + deployed) status_color=$GREEN ;; + pending) status_color=$YELLOW ;; + maintenance) status_color=$CYAN ;; + offboarding) status_color=$RED ;; + destroyed) status_color=$RED ;; + esac + + # Color role + local role_color=$NC + case $role in + canary) role_color=$YELLOW ;; + production) role_color=$GREEN ;; + esac + + printf "%-15s ${status_color}%-12s${NC} ${role_color}%-10s${NC} %-12s %-10s %-15s %-20s\n" \ + "$client" "$status" "$role" "$deployed_date" "$server_type" "$server_ip" "${apps:0:20}" + ((count++)) + done + + echo -e "${BLUE}────────────────────────────────────────────────────────────────────────────────────${NC}" + echo -e "${BLUE}║${NC} Total clients: $count ${BLUE}║${NC}" + echo -e "${BLUE}╚════════════════════════════════════════════════════════════════════════════════════╝${NC}" +} + +# Function to output summary +output_summary() { + local total=0 + local deployed=0 + local pending=0 + local maintenance=0 + local canary=0 + local production=0 + + while IFS='|' read -r client status role deployed_date server_type server_ip apps; do + ((total++)) + case $status in + deployed) ((deployed++)) ;; + pending) ((pending++)) ;; + maintenance) ((maintenance++)) ;; + esac + case $role in + canary) ((canary++)) ;; + production) ((production++)) ;; + esac + done + + echo -e "${BLUE}═══════════════════════════════════${NC}" + echo -e "${BLUE} CLIENT REGISTRY SUMMARY${NC}" + echo -e "${BLUE}═══════════════════════════════════${NC}" + echo "" + echo -e "Total Clients: ${CYAN}$total${NC}" + echo "" + echo -e "By Status:" + echo -e " Deployed: ${GREEN}$deployed${NC}" + echo -e " Pending: ${YELLOW}$pending${NC}" + echo -e " Maintenance: ${CYAN}$maintenance${NC}" + echo "" + echo -e "By Role:" + echo -e " Canary: ${YELLOW}$canary${NC}" + echo -e " Production: ${GREEN}$production${NC}" + echo "" +} + +# Function to output JSON +output_json() { + if $USE_YQ; then + yq eval -o=json '.clients' "$REGISTRY_FILE" + else + echo "{}" + fi +} + +# Function to output CSV +output_csv() { + echo "client,status,role,deployed_date,server_type,server_ip,apps" + while IFS='|' read -r client status role deployed_date server_type server_ip apps; do + echo "$client,$status,$role,$deployed_date,$server_type,$server_ip,\"$apps\"" + done +} + +# Main execution +if $USE_YQ; then + DATA=$(list_clients_yq) +else + echo -e "${RED}Error: yq is required for this script${NC}" + echo "Install with: brew install yq" + exit 1 +fi + +# Check if any clients found +if [ -z "$DATA" ]; then + echo -e "${YELLOW}No clients found matching criteria${NC}" + exit 0 +fi + +# Output based on format +case $FORMAT in + table) + echo "$DATA" | output_table + ;; + json) + output_json + ;; + csv) + echo "$DATA" | output_csv + ;; + summary) + echo "$DATA" | output_summary + ;; + *) + echo -e "${RED}Unknown format: $FORMAT${NC}" + echo "Valid formats: table, json, csv, summary" + exit 1 + ;; +esac diff --git a/scripts/rebuild-client.sh b/scripts/rebuild-client.sh index d1376a3..24a53be 100755 --- a/scripts/rebuild-client.sh +++ b/scripts/rebuild-client.sh @@ -118,7 +118,7 @@ echo -e "${BLUE}========================================${NC}" echo "" # Step 1: Check if infrastructure exists and destroy it -echo -e "${YELLOW}[1/4] Checking existing infrastructure...${NC}" +echo -e "${YELLOW}[1/5] Checking existing infrastructure...${NC}" cd "$PROJECT_ROOT/tofu" @@ -146,7 +146,7 @@ fi echo "" # Step 2: Provision infrastructure -echo -e "${YELLOW}[2/4] Provisioning infrastructure with OpenTofu...${NC}" +echo -e "${YELLOW}[2/5] Provisioning infrastructure with OpenTofu...${NC}" cd "$PROJECT_ROOT/tofu" @@ -164,7 +164,7 @@ sleep 60 echo "" # Step 3: Setup base system -echo -e "${YELLOW}[3/4] Setting up base system (Docker, Traefik)...${NC}" +echo -e "${YELLOW}[3/5] Setting up base system (Docker, Traefik)...${NC}" cd "$PROJECT_ROOT/ansible" @@ -175,7 +175,7 @@ echo -e "${GREEN}✓ Base system configured${NC}" echo "" # Step 4: Deploy applications -echo -e "${YELLOW}[4/4] Deploying applications (Authentik, Nextcloud, SSO)...${NC}" +echo -e "${YELLOW}[4/5] Deploying applications (Authentik, Nextcloud, SSO)...${NC}" ~/.local/bin/ansible-playbook -i hcloud.yml playbooks/deploy.yml --limit "$CLIENT_NAME" @@ -183,6 +183,35 @@ echo "" echo -e "${GREEN}✓ Applications deployed${NC}" echo "" +# Step 5: Update client registry +echo -e "${YELLOW}[5/5] Updating client registry...${NC}" + +cd "$PROJECT_ROOT/tofu" + +# Get server information from Terraform state +SERVER_IP=$(tofu output -json client_ips 2>/dev/null | jq -r ".\"$CLIENT_NAME\"" || echo "") +SERVER_ID=$(tofu state show "hcloud_server.client[\"$CLIENT_NAME\"]" 2>/dev/null | grep "^[[:space:]]*id[[:space:]]*=" | awk '{print $3}' | tr -d '"' || echo "") +SERVER_TYPE=$(tofu state show "hcloud_server.client[\"$CLIENT_NAME\"]" 2>/dev/null | grep "^[[:space:]]*server_type[[:space:]]*=" | awk '{print $3}' | tr -d '"' || echo "") +SERVER_LOCATION=$(tofu state show "hcloud_server.client[\"$CLIENT_NAME\"]" 2>/dev/null | grep "^[[:space:]]*location[[:space:]]*=" | awk '{print $3}' | tr -d '"' || echo "") + +# Determine role (dev is canary, everything else is production by default) +ROLE="production" +if [ "$CLIENT_NAME" = "dev" ]; then + ROLE="canary" +fi + +# Update registry +"$SCRIPT_DIR/update-registry.sh" "$CLIENT_NAME" deploy \ + --role="$ROLE" \ + --server-ip="$SERVER_IP" \ + --server-id="$SERVER_ID" \ + --server-type="$SERVER_TYPE" \ + --server-location="$SERVER_LOCATION" + +echo "" +echo -e "${GREEN}✓ Registry updated${NC}" +echo "" + # Calculate duration END_TIME=$(date +%s) DURATION=$((END_TIME - START_TIME)) diff --git a/scripts/update-registry.sh b/scripts/update-registry.sh new file mode 100755 index 0000000..969864c --- /dev/null +++ b/scripts/update-registry.sh @@ -0,0 +1,183 @@ +#!/usr/bin/env bash +# +# Update the client registry with deployment information +# +# Usage: ./scripts/update-registry.sh [options] +# +# Actions: +# deploy - Mark client as deployed (creates/updates entry) +# destroy - Mark client as destroyed +# status - Update status field +# +# Options: +# --status= Set status (pending|deployed|maintenance|offboarding|destroyed) +# --role= Set role (canary|production) +# --server-ip= Set server IP +# --server-id= Set server ID +# --server-type= Set server type +# --server-location= Set server location + +set -euo pipefail + +# Script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" +REGISTRY_FILE="$PROJECT_ROOT/clients/registry.yml" + +# Check if yq is available +if ! command -v yq &> /dev/null; then + echo "Error: 'yq' not found. Install with: brew install yq" + exit 1 +fi + +# Parse arguments +if [ $# -lt 2 ]; then + echo "Usage: $0 [options]" + exit 1 +fi + +CLIENT_NAME="$1" +ACTION="$2" +shift 2 + +# Parse options +STATUS="" +ROLE="" +SERVER_IP="" +SERVER_ID="" +SERVER_TYPE="" +SERVER_LOCATION="" + +for arg in "$@"; do + case $arg in + --status=*) + STATUS="${arg#*=}" + ;; + --role=*) + ROLE="${arg#*=}" + ;; + --server-ip=*) + SERVER_IP="${arg#*=}" + ;; + --server-id=*) + SERVER_ID="${arg#*=}" + ;; + --server-type=*) + SERVER_TYPE="${arg#*=}" + ;; + --server-location=*) + SERVER_LOCATION="${arg#*=}" + ;; + esac +done + +# Ensure registry file exists +if [ ! -f "$REGISTRY_FILE" ]; then + cat > "$REGISTRY_FILE" <<'EOF' +# Client Registry +# +# Single source of truth for all clients in the infrastructure. + +clients: {} +EOF +fi + +TODAY=$(date +%Y-%m-%d) + +case $ACTION in + deploy) + # Check if client exists + if yq eval ".clients.\"$CLIENT_NAME\"" "$REGISTRY_FILE" | grep -q "null"; then + # Create new client entry + echo "Creating new registry entry for $CLIENT_NAME" + + # Start with minimal structure + yq eval -i ".clients.\"$CLIENT_NAME\" = {}" "$REGISTRY_FILE" + yq eval -i ".clients.\"$CLIENT_NAME\".status = \"deployed\"" "$REGISTRY_FILE" + yq eval -i ".clients.\"$CLIENT_NAME\".deployed_date = \"$TODAY\"" "$REGISTRY_FILE" + yq eval -i ".clients.\"$CLIENT_NAME\".destroyed_date = null" "$REGISTRY_FILE" + + # Add role + if [ -n "$ROLE" ]; then + yq eval -i ".clients.\"$CLIENT_NAME\".role = \"$ROLE\"" "$REGISTRY_FILE" + else + yq eval -i ".clients.\"$CLIENT_NAME\".role = \"production\"" "$REGISTRY_FILE" + fi + + # Add server info + yq eval -i ".clients.\"$CLIENT_NAME\".server = {}" "$REGISTRY_FILE" + [ -n "$SERVER_TYPE" ] && yq eval -i ".clients.\"$CLIENT_NAME\".server.type = \"$SERVER_TYPE\"" "$REGISTRY_FILE" + [ -n "$SERVER_LOCATION" ] && yq eval -i ".clients.\"$CLIENT_NAME\".server.location = \"$SERVER_LOCATION\"" "$REGISTRY_FILE" + [ -n "$SERVER_IP" ] && yq eval -i ".clients.\"$CLIENT_NAME\".server.ip = \"$SERVER_IP\"" "$REGISTRY_FILE" + [ -n "$SERVER_ID" ] && yq eval -i ".clients.\"$CLIENT_NAME\".server.id = \"$SERVER_ID\"" "$REGISTRY_FILE" + + # Add apps + yq eval -i ".clients.\"$CLIENT_NAME\".apps = [\"authentik\", \"nextcloud\"]" "$REGISTRY_FILE" + + # Add maintenance tracking + yq eval -i ".clients.\"$CLIENT_NAME\".maintenance = {}" "$REGISTRY_FILE" + yq eval -i ".clients.\"$CLIENT_NAME\".maintenance.last_full_update = \"$TODAY\"" "$REGISTRY_FILE" + yq eval -i ".clients.\"$CLIENT_NAME\".maintenance.last_security_patch = \"$TODAY\"" "$REGISTRY_FILE" + yq eval -i ".clients.\"$CLIENT_NAME\".maintenance.last_os_update = \"$TODAY\"" "$REGISTRY_FILE" + yq eval -i ".clients.\"$CLIENT_NAME\".maintenance.last_backup_verified = null" "$REGISTRY_FILE" + + # Add URLs (will be determined from secrets file) + yq eval -i ".clients.\"$CLIENT_NAME\".urls = {}" "$REGISTRY_FILE" + yq eval -i ".clients.\"$CLIENT_NAME\".urls.authentik = \"https://auth.$CLIENT_NAME.vrije.cloud\"" "$REGISTRY_FILE" + yq eval -i ".clients.\"$CLIENT_NAME\".urls.nextcloud = \"https://nextcloud.$CLIENT_NAME.vrije.cloud\"" "$REGISTRY_FILE" + + # Add notes + yq eval -i ".clients.\"$CLIENT_NAME\".notes = \"\"" "$REGISTRY_FILE" + else + # Update existing client + echo "Updating registry entry for $CLIENT_NAME" + + yq eval -i ".clients.\"$CLIENT_NAME\".status = \"deployed\"" "$REGISTRY_FILE" + + # Update server info if provided + [ -n "$SERVER_IP" ] && yq eval -i ".clients.\"$CLIENT_NAME\".server.ip = \"$SERVER_IP\"" "$REGISTRY_FILE" + [ -n "$SERVER_ID" ] && yq eval -i ".clients.\"$CLIENT_NAME\".server.id = \"$SERVER_ID\"" "$REGISTRY_FILE" + [ -n "$SERVER_TYPE" ] && yq eval -i ".clients.\"$CLIENT_NAME\".server.type = \"$SERVER_TYPE\"" "$REGISTRY_FILE" + [ -n "$SERVER_LOCATION" ] && yq eval -i ".clients.\"$CLIENT_NAME\".server.location = \"$SERVER_LOCATION\"" "$REGISTRY_FILE" + + # Update maintenance date + yq eval -i ".clients.\"$CLIENT_NAME\".maintenance.last_full_update = \"$TODAY\"" "$REGISTRY_FILE" + fi + ;; + + destroy) + echo "Marking $CLIENT_NAME as destroyed in registry" + + if yq eval ".clients.\"$CLIENT_NAME\"" "$REGISTRY_FILE" | grep -q "null"; then + echo "Warning: Client $CLIENT_NAME not found in registry" + exit 0 + fi + + yq eval -i ".clients.\"$CLIENT_NAME\".status = \"destroyed\"" "$REGISTRY_FILE" + yq eval -i ".clients.\"$CLIENT_NAME\".destroyed_date = \"$TODAY\"" "$REGISTRY_FILE" + ;; + + status) + if [ -z "$STATUS" ]; then + echo "Error: --status= required for status action" + exit 1 + fi + + echo "Updating status of $CLIENT_NAME to $STATUS" + + if yq eval ".clients.\"$CLIENT_NAME\"" "$REGISTRY_FILE" | grep -q "null"; then + echo "Error: Client $CLIENT_NAME not found in registry" + exit 1 + fi + + yq eval -i ".clients.\"$CLIENT_NAME\".status = \"$STATUS\"" "$REGISTRY_FILE" + ;; + + *) + echo "Error: Unknown action '$ACTION'" + echo "Valid actions: deploy, destroy, status" + exit 1 + ;; +esac + +echo "✓ Registry updated successfully"