This commit captures the infrastructure state immediately following the "Post-Tyranny Tech" workshop on January 23rd, 2026. Infrastructure Status: - 13 client servers deployed (white, valk, zwaan, specht, das, uil, vos, haas, wolf, ree, mees, mus, mol, kikker) - Services: Authentik SSO, Nextcloud, Collabora Office, Traefik - Private network architecture with edge NAT gateway - OIDC integration between Authentik and Nextcloud - Automated recovery flows and invitation system - Container update monitoring with Diun - Uptime monitoring with Uptime Kuma Changes include: - Multiple new client host configurations - Network architecture improvements (private IPs + NAT) - DNS management automation - Container update notifications - Email configuration via Mailgun - SSH key generation for all clients - Encrypted secrets for all deployments - Health check and diagnostic scripts Known Issues to Address: - Nextcloud version pinned to v30 (should use 'latest' or v32) - Zitadel references in templates (migrated to Authentik but templates not updated) - Traefik dynamic config has obsolete static routes 🤖 Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
311 lines
9.9 KiB
YAML
311 lines
9.9 KiB
YAML
---
|
|
# Playbook: Update Docker containers across clients
|
|
# Usage:
|
|
# # Update single client
|
|
# ansible-playbook -i hcloud.yml playbooks/update-containers.yml --limit black
|
|
#
|
|
# # Update specific service only
|
|
# ansible-playbook -i hcloud.yml playbooks/update-containers.yml --limit black --tags authentik
|
|
#
|
|
# # Dry run (check mode)
|
|
# ansible-playbook -i hcloud.yml playbooks/update-containers.yml --limit black --check
|
|
#
|
|
# # Update multiple clients in sequence
|
|
# ansible-playbook -i hcloud.yml playbooks/update-containers.yml --limit "dev,test"
|
|
|
|
- name: Update Docker containers
|
|
hosts: all
|
|
become: yes
|
|
serial: 1 # Process one host at a time for safety
|
|
|
|
vars:
|
|
# Services to update (override with -e "services_to_update=['authentik']")
|
|
services_to_update:
|
|
- traefik
|
|
- authentik
|
|
- nextcloud
|
|
- diun
|
|
|
|
# Backup before update
|
|
create_backup: true
|
|
|
|
# Wait time between service updates (seconds)
|
|
update_delay: 30
|
|
|
|
pre_tasks:
|
|
- name: Display update plan
|
|
debug:
|
|
msg: |
|
|
Updating {{ inventory_hostname }}
|
|
Services: {{ services_to_update | join(', ') }}
|
|
Backup enabled: {{ create_backup }}
|
|
tags: always
|
|
|
|
- name: Check if host is reachable
|
|
ping:
|
|
tags: always
|
|
|
|
- name: Get current container status (before)
|
|
shell: docker ps --format 'table {{{{.Names}}}}\t{{{{.Status}}}}\t{{{{.Image}}}}'
|
|
register: containers_before
|
|
changed_when: false
|
|
tags: always
|
|
|
|
- name: Display current containers
|
|
debug:
|
|
msg: "{{ containers_before.stdout_lines }}"
|
|
tags: always
|
|
|
|
tasks:
|
|
# ==========================================
|
|
# Traefik Updates
|
|
# ==========================================
|
|
- name: Update Traefik
|
|
block:
|
|
- name: Create Traefik backup
|
|
shell: |
|
|
cd /opt/docker/traefik
|
|
tar -czf /tmp/traefik-backup-$(date +%Y%m%d-%H%M%S).tar.gz \
|
|
acme.json docker-compose.yml traefik.yml 2>/dev/null || true
|
|
when: create_backup
|
|
|
|
- name: Pull latest Traefik image
|
|
docker_image:
|
|
name: traefik:latest
|
|
source: pull
|
|
force_source: yes
|
|
|
|
- name: Restart Traefik
|
|
docker_compose:
|
|
project_src: /opt/docker/traefik
|
|
restarted: yes
|
|
pull: yes
|
|
|
|
- name: Wait for Traefik to be healthy
|
|
shell: docker inspect --format='{{{{.State.Status}}}}' traefik
|
|
register: traefik_status
|
|
until: traefik_status.stdout == "running"
|
|
retries: 10
|
|
delay: 5
|
|
changed_when: false
|
|
|
|
- name: Verify Traefik SSL certificates
|
|
shell: docker exec traefik ls -la /acme.json
|
|
register: traefik_certs
|
|
changed_when: false
|
|
failed_when: traefik_certs.rc != 0
|
|
|
|
- name: Delay between services
|
|
pause:
|
|
seconds: "{{ update_delay }}"
|
|
when: "'traefik' in services_to_update"
|
|
tags: traefik
|
|
|
|
# ==========================================
|
|
# Authentik Updates
|
|
# ==========================================
|
|
- name: Update Authentik
|
|
block:
|
|
- name: Create Authentik database backup
|
|
shell: |
|
|
docker exec authentik-db pg_dump -U authentik authentik | \
|
|
gzip > /tmp/authentik-backup-$(date +%Y%m%d-%H%M%S).sql.gz
|
|
when: create_backup
|
|
|
|
- name: Pull latest Authentik images
|
|
docker_image:
|
|
name: "{{ item }}"
|
|
source: pull
|
|
force_source: yes
|
|
loop:
|
|
- ghcr.io/goauthentik/server:latest
|
|
- postgres:16-alpine
|
|
- redis:alpine
|
|
|
|
- name: Restart Authentik services
|
|
docker_compose:
|
|
project_src: /opt/docker/authentik
|
|
restarted: yes
|
|
pull: yes
|
|
|
|
- name: Wait for Authentik server to be healthy
|
|
shell: docker inspect --format='{{{{.State.Health.Status}}}}' authentik-server
|
|
register: authentik_status
|
|
until: authentik_status.stdout == "healthy"
|
|
retries: 20
|
|
delay: 10
|
|
changed_when: false
|
|
|
|
- name: Wait for Authentik worker to be healthy
|
|
shell: docker inspect --format='{{{{.State.Health.Status}}}}' authentik-worker
|
|
register: authentik_worker_status
|
|
until: authentik_worker_status.stdout == "healthy"
|
|
retries: 20
|
|
delay: 10
|
|
changed_when: false
|
|
|
|
- name: Verify Authentik web interface
|
|
uri:
|
|
url: "https://auth.{{ client_name }}.vrije.cloud/if/flow/default-authentication-flow/"
|
|
validate_certs: yes
|
|
status_code: 200
|
|
register: authentik_health
|
|
retries: 5
|
|
delay: 10
|
|
|
|
- name: Delay between services
|
|
pause:
|
|
seconds: "{{ update_delay }}"
|
|
when: "'authentik' in services_to_update"
|
|
tags: authentik
|
|
|
|
# ==========================================
|
|
# Nextcloud Updates
|
|
# ==========================================
|
|
- name: Update Nextcloud
|
|
block:
|
|
- name: Create Nextcloud database backup
|
|
shell: |
|
|
docker exec nextcloud-db mysqldump -u nextcloud -p$(docker exec nextcloud-db cat /run/secrets/db_password 2>/dev/null || echo 'password') nextcloud | \
|
|
gzip > /tmp/nextcloud-backup-$(date +%Y%m%d-%H%M%S).sql.gz
|
|
when: create_backup
|
|
ignore_errors: yes
|
|
|
|
- name: Enable Nextcloud maintenance mode
|
|
shell: docker exec -u www-data nextcloud php occ maintenance:mode --on
|
|
register: maintenance_mode
|
|
changed_when: "'Maintenance mode enabled' in maintenance_mode.stdout"
|
|
|
|
- name: Pull latest Nextcloud images
|
|
docker_image:
|
|
name: "{{ item }}"
|
|
source: pull
|
|
force_source: yes
|
|
loop:
|
|
- nextcloud:latest
|
|
- mariadb:11
|
|
- redis:alpine
|
|
- collabora/code:latest
|
|
|
|
- name: Restart Nextcloud services
|
|
docker_compose:
|
|
project_src: /opt/docker/nextcloud
|
|
restarted: yes
|
|
pull: yes
|
|
|
|
- name: Wait for Nextcloud to be ready
|
|
shell: docker exec nextcloud-db mysqladmin ping -h localhost -u root --silent
|
|
register: nc_db_status
|
|
until: nc_db_status.rc == 0
|
|
retries: 20
|
|
delay: 5
|
|
changed_when: false
|
|
|
|
- name: Run Nextcloud upgrade (if needed)
|
|
shell: docker exec -u www-data nextcloud php occ upgrade
|
|
register: nc_upgrade
|
|
changed_when: "'Updated database' in nc_upgrade.stdout"
|
|
failed_when: nc_upgrade.rc != 0 and 'already latest version' not in nc_upgrade.stdout
|
|
|
|
- name: Disable Nextcloud maintenance mode
|
|
shell: docker exec -u www-data nextcloud php occ maintenance:mode --off
|
|
register: maintenance_off
|
|
changed_when: "'Maintenance mode disabled' in maintenance_off.stdout"
|
|
|
|
- name: Verify Nextcloud web interface
|
|
uri:
|
|
url: "https://nextcloud.{{ client_name }}.vrije.cloud/status.php"
|
|
validate_certs: yes
|
|
status_code: 200
|
|
register: nc_health
|
|
retries: 10
|
|
delay: 10
|
|
|
|
- name: Verify Nextcloud installed status
|
|
uri:
|
|
url: "https://nextcloud.{{ client_name }}.vrije.cloud/status.php"
|
|
validate_certs: yes
|
|
return_content: yes
|
|
register: nc_status_check
|
|
failed_when: "'\"installed\":true' not in nc_status_check.content"
|
|
|
|
- name: Delay between services
|
|
pause:
|
|
seconds: "{{ update_delay }}"
|
|
when: "'nextcloud' in services_to_update"
|
|
tags: nextcloud
|
|
|
|
# ==========================================
|
|
# Diun Updates
|
|
# ==========================================
|
|
- name: Update Diun
|
|
block:
|
|
- name: Pull latest Diun image
|
|
docker_image:
|
|
name: crazymax/diun:latest
|
|
source: pull
|
|
force_source: yes
|
|
|
|
- name: Restart Diun
|
|
docker_compose:
|
|
project_src: /opt/docker/diun
|
|
restarted: yes
|
|
pull: yes
|
|
|
|
- name: Wait for Diun to be running
|
|
shell: docker inspect --format='{{{{.State.Status}}}}' diun
|
|
register: diun_status
|
|
until: diun_status.stdout == "running"
|
|
retries: 5
|
|
delay: 3
|
|
changed_when: false
|
|
when: "'diun' in services_to_update"
|
|
tags: diun
|
|
|
|
post_tasks:
|
|
- name: Get final container status
|
|
shell: docker ps --format 'table {{{{.Names}}}}\t{{{{.Status}}}}\t{{{{.Image}}}}'
|
|
register: containers_after
|
|
changed_when: false
|
|
tags: always
|
|
|
|
- name: Display final container status
|
|
debug:
|
|
msg: "{{ containers_after.stdout_lines }}"
|
|
tags: always
|
|
|
|
- name: Verify all expected containers are running
|
|
shell: docker ps --filter "status=running" --format '{{{{.Names}}}}' | wc -l
|
|
register: running_count
|
|
changed_when: false
|
|
tags: always
|
|
|
|
- name: Check for unhealthy containers
|
|
shell: docker ps --filter "health=unhealthy" --format '{{{{.Names}}}}'
|
|
register: unhealthy_containers
|
|
changed_when: false
|
|
failed_when: unhealthy_containers.stdout != ""
|
|
tags: always
|
|
|
|
- name: Update summary
|
|
debug:
|
|
msg: |
|
|
========================================
|
|
Update Summary for {{ inventory_hostname }}
|
|
========================================
|
|
Running containers: {{ running_count.stdout }}
|
|
Unhealthy containers: {{ unhealthy_containers.stdout or 'None' }}
|
|
|
|
Services updated: {{ services_to_update | join(', ') }}
|
|
Status: SUCCESS
|
|
tags: always
|
|
|
|
- name: Post-update validation
|
|
hosts: all
|
|
become: yes
|
|
gather_facts: no
|
|
|
|
tasks:
|
|
- name: Final health check
|
|
debug:
|
|
msg: "All updates completed successfully on {{ inventory_hostname }}"
|