feat: Add infrastructure roles for multi-tenant architecture

Add new Ansible roles and configuration for the edge proxy and
private network architecture:

## New Roles:
- **edge-traefik**: Edge reverse proxy that routes to private clients
  - Dynamic routing configuration for multiple clients
  - SSL termination at the edge
  - Routes traffic to private IPs (10.0.0.x)

- **nat-gateway**: NAT/gateway configuration for edge server
  - IP forwarding and masquerading
  - Allows private network clients to access internet
  - iptables rules for Docker integration

- **diun**: Docker Image Update Notifier
  - Monitors containers for available updates
  - Email notifications via Mailgun
  - Per-client configuration

- **kuma**: Uptime monitoring integration
  - Registers HTTP monitors for client services
  - Automated monitor creation via API
  - Checks Authentik, Nextcloud, Collabora endpoints

## New Playbooks:
- **setup-edge.yml**: Configure edge server with proxy and NAT

## Configuration:
- **host_vars**: Per-client Ansible configuration (valk, white)
  - SSH bastion configuration for private IPs
  - Client-specific secrets file references

This enables the scalable multi-tenant architecture where:
- Edge server has public IP and routes traffic
- Client servers use private IPs only (cost savings)
- All traffic flows through edge proxy with SSL termination

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Pieter 2026-01-20 19:05:51 +01:00
parent f40acee0a3
commit 13685eb454
19 changed files with 752 additions and 0 deletions

View file

@ -0,0 +1,11 @@
---
# valk server - behind edge proxy (private network only)
# SSH via edge server as bastion/jump host
ansible_host: 10.0.0.41
ansible_ssh_common_args: '-o ProxyCommand="ssh -i ../keys/ssh/edge -W %h:%p -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@78.47.191.38" -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no'
# Client identification
client_name: valk
client_domain: valk.vrije.cloud
client_secrets_file: valk.sops.yaml

View file

@ -0,0 +1,11 @@
---
# White server - behind edge proxy
# Note: Currently has public IP for initial setup
# SSH directly via public IP (temporary)
ansible_host: 159.69.182.238
# Client identification
client_name: white
client_domain: white.vrije.cloud
client_secrets_file: white.sops.yaml

View file

@ -0,0 +1,20 @@
---
# Setup Edge Server
# Configures the edge server with Traefik reverse proxy
- name: Setup edge server
hosts: edge
become: yes
roles:
- role: common
tags: [common, setup]
- role: docker
tags: [docker, setup]
- role: nat-gateway
tags: [nat, gateway]
- role: edge-traefik
tags: [traefik, edge]

View file

@ -0,0 +1,28 @@
---
# Diun default configuration
diun_version: "latest"
diun_schedule: "0 6 * * *" # Daily at 6am UTC
diun_log_level: "info"
diun_watch_workers: 10
# Notification configuration
diun_notif_enabled: true
diun_notif_type: "webhook" # Options: webhook, slack, discord, email, gotify
diun_webhook_endpoint: "" # Set per environment or via secrets
diun_webhook_method: "POST"
diun_webhook_headers: {}
# Optional: Slack notification
diun_slack_webhook_url: ""
# Optional: Email notification (Mailgun)
# Note: Uses per-client SMTP credentials from mailgun role
diun_email_enabled: true
diun_smtp_host: "smtp.eu.mailgun.org"
diun_smtp_port: 587
diun_smtp_from: "{{ client_name }}@mg.vrije.cloud"
diun_smtp_to: "pieter@postxsociety.org"
# Which containers to watch
diun_watch_all: true
diun_exclude_containers: []

View file

@ -0,0 +1,5 @@
---
- name: Restart Diun
community.docker.docker_compose_v2:
project_src: /opt/docker/diun
state: restarted

View file

@ -0,0 +1,57 @@
---
- name: Set SMTP credentials from mailgun role facts or client_secrets
set_fact:
diun_smtp_username_final: "{{ mailgun_smtp_user | default(client_secrets.mailgun_smtp_user | default(client_name ~ '@mg.vrije.cloud')) }}"
diun_smtp_password_final: "{{ mailgun_smtp_password | default(client_secrets.mailgun_smtp_password | default('')) }}"
when: mailgun_smtp_user is defined or client_secrets.mailgun_smtp_user is defined or client_name is defined
no_log: true
- name: Create monitoring Docker network
community.docker.docker_network:
name: monitoring
state: present
- name: Create Diun directory
file:
path: /opt/docker/diun
state: directory
mode: '0755'
- name: Create Diun data directory
file:
path: /opt/docker/diun/data
state: directory
mode: '0755'
- name: Deploy Diun configuration
template:
src: diun.yml.j2
dest: /opt/docker/diun/diun.yml
mode: '0644'
notify: Restart Diun
- name: Deploy Diun docker-compose.yml
template:
src: docker-compose.yml.j2
dest: /opt/docker/diun/docker-compose.yml
mode: '0644'
notify: Restart Diun
- name: Start Diun container
community.docker.docker_compose_v2:
project_src: /opt/docker/diun
state: present
pull: always
register: diun_deploy
- name: Wait for Diun to be healthy
shell: docker inspect --format='{{"{{"}} .State.Status {{"}}"}}' diun
register: diun_status
until: diun_status.stdout == "running"
retries: 5
delay: 3
changed_when: false
- name: Display Diun status
debug:
msg: "Diun is {{ diun_status.stdout }} on {{ inventory_hostname }}"

View file

@ -0,0 +1,58 @@
---
# Diun configuration for {{ inventory_hostname }}
# Documentation: https://crazymax.dev/diun/
db:
path: /data/diun.db
watch:
workers: {{ diun_watch_workers }}
schedule: "{{ diun_schedule }}"
firstCheckNotif: false
defaults:
watchRepo: true
notifyOn:
- new
- update
providers:
docker:
watchByDefault: {{ diun_watch_all | lower }}
{% if diun_exclude_containers | length > 0 %}
excludeContainers:
{% for container in diun_exclude_containers %}
- {{ container }}
{% endfor %}
{% endif %}
notif:
{% if diun_notif_enabled and diun_notif_type == 'webhook' and diun_webhook_endpoint %}
webhook:
endpoint: {{ diun_webhook_endpoint }}
method: {{ diun_webhook_method }}
timeout: 10s
{% if diun_webhook_headers | length > 0 %}
headers:
{% for key, value in diun_webhook_headers.items() %}
{{ key }}: {{ value }}
{% endfor %}
{% endif %}
{% endif %}
{% if diun_slack_webhook_url %}
slack:
webhookURL: {{ diun_slack_webhook_url }}
{% endif %}
{% if diun_email_enabled and diun_smtp_username_final is defined and diun_smtp_password_final is defined and diun_smtp_password_final != '' %}
mail:
host: {{ diun_smtp_host }}
port: {{ diun_smtp_port }}
ssl: false
insecureSkipVerify: false
username: {{ diun_smtp_username_final }}
password: {{ diun_smtp_password_final }}
from: {{ diun_smtp_from }}
to: {{ diun_smtp_to }}
{% endif %}

View file

@ -0,0 +1,24 @@
version: '3.8'
services:
diun:
image: crazymax/diun:{{ diun_version }}
container_name: diun
restart: unless-stopped
command: serve
volumes:
- "./data:/data"
- "./diun.yml:/diun.yml:ro"
- "/var/run/docker.sock:/var/run/docker.sock:ro"
environment:
- TZ=UTC
- LOG_LEVEL={{ diun_log_level }}
labels:
- "diun.enable=true"
networks:
- monitoring
networks:
monitoring:
name: monitoring
external: true

View file

@ -0,0 +1,13 @@
---
# Edge Traefik Default Variables
# This Traefik instance acts as a reverse proxy for private network clients
traefik_version: "v3.3"
traefik_network: "web"
traefik_docker_socket: "/var/run/docker.sock"
traefik_acme_email: "admin@vrije.cloud"
traefik_acme_staging: false
traefik_dashboard_enabled: false
# Backend client servers (populated from inventory)
backend_clients: []

View file

@ -0,0 +1,7 @@
---
# Edge Traefik Handlers
- name: Restart Traefik
community.docker.docker_compose_v2:
project_src: /opt/docker/traefik
state: restarted

View file

@ -0,0 +1,60 @@
---
# Edge Traefik Installation Tasks
# Sets up Traefik as edge reverse proxy for private network clients
- name: Ensure Traefik configuration directory exists
file:
path: /opt/docker/traefik
state: directory
mode: '0755'
tags: [traefik, edge]
- name: Create Let's Encrypt storage directory
file:
path: /opt/docker/traefik/letsencrypt
state: directory
mode: '0600'
tags: [traefik, edge]
- name: Create Traefik log directory
file:
path: /var/log/traefik
state: directory
mode: '0755'
tags: [traefik, edge]
- name: Deploy Traefik static configuration
template:
src: traefik.yml.j2
dest: /opt/docker/traefik/traefik.yml
mode: '0644'
notify: Restart Traefik
tags: [traefik, edge, config]
- name: Deploy Traefik dynamic configuration (routing rules)
template:
src: dynamic.yml.j2
dest: /opt/docker/traefik/dynamic.yml
mode: '0644'
notify: Restart Traefik
tags: [traefik, edge, config]
- name: Deploy Traefik Docker Compose file
template:
src: docker-compose.yml.j2
dest: /opt/docker/traefik/docker-compose.yml
mode: '0644'
tags: [traefik, edge]
- name: Start Traefik container
community.docker.docker_compose_v2:
project_src: /opt/docker/traefik
state: present
tags: [traefik, edge]
- name: Wait for Traefik to be ready
wait_for:
port: 443
delay: 5
timeout: 60
tags: [traefik, edge]

View file

@ -0,0 +1,24 @@
# Edge Traefik Docker Compose
# Managed by Ansible - do not edit manually
services:
traefik:
image: traefik:{{ traefik_version }}
container_name: traefik
restart: unless-stopped
security_opt:
- no-new-privileges:true
ports:
- "80:80"
- "443:443"
{% if traefik_dashboard_enabled %}
- "8080:8080"
{% endif %}
volumes:
- /etc/localtime:/etc/localtime:ro
- ./traefik.yml:/etc/traefik/traefik.yml:ro
- ./dynamic.yml:/etc/traefik/dynamic.yml:ro
- ./letsencrypt:/letsencrypt
- /var/log/traefik:/var/log/traefik
labels:
- "traefik.enable=false"

View file

@ -0,0 +1,97 @@
# Edge Traefik Dynamic Configuration
# Managed by Ansible - do not edit manually
# Routes traffic to backend servers on private network
http:
# Routers for white client
routers:
white-auth:
rule: "Host(`auth.white.vrije.cloud`)"
service: white-auth
entryPoints:
- websecure
tls:
certResolver: letsencrypt
white-nextcloud:
rule: "Host(`nextcloud.white.vrije.cloud`)"
service: white-nextcloud
entryPoints:
- websecure
tls:
certResolver: letsencrypt
white-collabora:
rule: "Host(`office.white.vrije.cloud`)"
service: white-collabora
entryPoints:
- websecure
tls:
certResolver: letsencrypt
valk-auth:
rule: "Host(`auth.valk.vrije.cloud`)"
service: valk-auth
entryPoints:
- websecure
tls:
certResolver: letsencrypt
valk-nextcloud:
rule: "Host(`nextcloud.valk.vrije.cloud`)"
service: valk-nextcloud
entryPoints:
- websecure
tls:
certResolver: letsencrypt
valk-collabora:
rule: "Host(`office.valk.vrije.cloud`)"
service: valk-collabora
entryPoints:
- websecure
tls:
certResolver: letsencrypt
# Services (backend servers)
services:
white-auth:
loadBalancer:
servers:
- url: "https://10.0.0.40:443"
serversTransport: insecureTransport
white-nextcloud:
loadBalancer:
servers:
- url: "https://10.0.0.40:443"
serversTransport: insecureTransport
white-collabora:
loadBalancer:
servers:
- url: "https://10.0.0.40:443"
serversTransport: insecureTransport
valk-auth:
loadBalancer:
servers:
- url: "https://10.0.0.41:443"
serversTransport: insecureTransport
valk-nextcloud:
loadBalancer:
servers:
- url: "https://10.0.0.41:443"
serversTransport: insecureTransport
valk-collabora:
loadBalancer:
servers:
- url: "https://10.0.0.41:443"
serversTransport: insecureTransport
# Server transport (allow self-signed certs from backends)
serversTransports:
insecureTransport:
insecureSkipVerify: true

View file

@ -0,0 +1,47 @@
# Edge Traefik Static Configuration
# Managed by Ansible - do not edit manually
# This configuration proxies to backend servers on private network
api:
dashboard: {{ traefik_dashboard_enabled | lower }}
{% if traefik_dashboard_enabled %}
insecure: false
{% endif %}
entryPoints:
web:
address: ":80"
http:
redirections:
entryPoint:
to: websecure
scheme: https
websecure:
address: ":443"
http:
tls:
certResolver: letsencrypt
providers:
# File provider for static backend configurations
file:
filename: /etc/traefik/dynamic.yml
watch: true
certificatesResolvers:
letsencrypt:
acme:
email: {{ traefik_acme_email }}
storage: /letsencrypt/acme.json
{% if traefik_acme_staging %}
caServer: https://acme-staging-v02.api.letsencrypt.org/directory
{% endif %}
httpChallenge:
entryPoint: web
log:
level: INFO
accessLog:
filePath: /var/log/traefik/access.log

View file

@ -0,0 +1,41 @@
---
# Uptime Kuma monitoring registration
kuma_enabled: true
kuma_url: "https://status.vrije.cloud"
# Authentication options:
# Option 1: Username/Password (required for Socket.io API used by Python library)
kuma_username: "" # Set this for automated registration
kuma_password: "" # Set this for automated registration
# Option 2: API Key (only for REST endpoints like /metrics, not for monitor management)
kuma_api_key: "uk1_H2YjQsSG8em8GG9G9c0arQogSizXI1CRPNgTEUlU"
# Monitors to create for each client
kuma_monitors:
- name: "{{ client_name }} - Authentik SSO"
type: "http"
url: "https://auth.{{ client_domain }}"
method: "GET"
interval: 60
maxretries: 3
retry_interval: 60
expected_status: "200,302"
- name: "{{ client_name }} - Nextcloud"
type: "http"
url: "https://nextcloud.{{ client_domain }}"
method: "GET"
interval: 60
maxretries: 3
retry_interval: 60
expected_status: "200,302"
- name: "{{ client_name }} - Collabora Office"
type: "http"
url: "https://office.{{ client_domain }}"
method: "GET"
interval: 60
maxretries: 3
retry_interval: 60
expected_status: "200"

View file

@ -0,0 +1,49 @@
---
# Register client services with Uptime Kuma monitoring
# Uses uptime-kuma-api Python library with Socket.io
- name: Set Kuma credentials from shared secrets
set_fact:
kuma_username: "{{ shared_secrets.kuma_username | default('') }}"
kuma_password: "{{ shared_secrets.kuma_password | default('') }}"
when: shared_secrets is defined
- name: Check if Kuma monitoring is enabled
set_fact:
kuma_registration_enabled: "{{ (kuma_enabled | bool) and (kuma_url | length > 0) and (kuma_username | length > 0) and (kuma_password | length > 0) }}"
- name: Kuma registration block
when: kuma_registration_enabled
delegate_to: localhost
become: false
block:
- name: Ensure uptime-kuma-api Python package is installed
pip:
name: uptime-kuma-api
state: present
- name: Create Kuma registration script
template:
src: register_monitors.py.j2
dest: /tmp/kuma_register_{{ client_name }}.py
mode: '0700'
- name: Register monitors with Uptime Kuma
command: "{{ ansible_playbook_python }} /tmp/kuma_register_{{ client_name }}.py"
register: kuma_result
changed_when: "'Added' in kuma_result.stdout or 'Updated' in kuma_result.stdout"
failed_when: kuma_result.rc != 0
- name: Display Kuma registration result
debug:
msg: "{{ kuma_result.stdout_lines }}"
- name: Cleanup registration script
file:
path: /tmp/kuma_register_{{ client_name }}.py
state: absent
- name: Skip Kuma registration message
debug:
msg: "Kuma monitoring registration skipped (not enabled or missing credentials)"
when: not kuma_registration_enabled

View file

@ -0,0 +1,128 @@
#!/usr/bin/env python3
"""
Uptime Kuma Monitor Registration Script
Auto-generated for client: {{ client_name }}
"""
import sys
from uptime_kuma_api import UptimeKumaApi, MonitorType
# Configuration
KUMA_URL = "{{ kuma_url }}"
KUMA_USERNAME = "{{ kuma_username | default('') }}"
KUMA_PASSWORD = "{{ kuma_password | default('') }}"
CLIENT_NAME = "{{ client_name }}"
CLIENT_DOMAIN = "{{ client_domain }}"
# Monitor definitions
MONITORS = {{ kuma_monitors | to_json }}
# Monitor type mapping
TYPE_MAP = {
"http": MonitorType.HTTP,
"https": MonitorType.HTTP,
"ping": MonitorType.PING,
"tcp": MonitorType.PORT,
"dns": MonitorType.DNS,
}
def main():
"""Register monitors with Uptime Kuma"""
# Check if credentials are provided
if not KUMA_USERNAME or not KUMA_PASSWORD:
print("⚠️ Kuma registration skipped: No credentials provided")
print("")
print("To enable automated monitor registration, add to your secrets:")
print(" kuma_username: your_username")
print(" kuma_password: your_password")
print("")
print("Note: API keys (uk1_*) are only for REST endpoints, not monitor management")
print("Manual registration required at: https://status.vrije.cloud")
sys.exit(0) # Exit with success (not a failure, just skipped)
try:
# Connect to Uptime Kuma (Socket.io connection)
print(f"🔌 Connecting to Uptime Kuma at {KUMA_URL}...")
api = UptimeKumaApi(KUMA_URL)
# Login with username/password
print(f"🔐 Authenticating as {KUMA_USERNAME}...")
api.login(KUMA_USERNAME, KUMA_PASSWORD)
# Get existing monitors
print("📋 Fetching existing monitors...")
existing_monitors = api.get_monitors()
existing_names = {m['name']: m['id'] for m in existing_monitors}
# Register each monitor
added_count = 0
updated_count = 0
skipped_count = 0
for monitor_config in MONITORS:
monitor_name = monitor_config['name']
monitor_type_str = monitor_config.get('type', 'http').lower()
monitor_type = TYPE_MAP.get(monitor_type_str, MonitorType.HTTP)
# Build monitor parameters
params = {
'type': monitor_type,
'name': monitor_name,
'interval': monitor_config.get('interval', 60),
'maxretries': monitor_config.get('maxretries', 3),
'retryInterval': monitor_config.get('retry_interval', 60),
}
# Add type-specific parameters
if monitor_type == MonitorType.HTTP:
params['url'] = monitor_config['url']
params['method'] = monitor_config.get('method', 'GET')
if 'expected_status' in monitor_config:
params['accepted_statuscodes'] = monitor_config['expected_status'].split(',')
elif monitor_type == MonitorType.PING:
params['hostname'] = monitor_config.get('hostname', monitor_config.get('url', ''))
# Check if monitor already exists
if monitor_name in existing_names:
print(f"⚠️ Monitor '{monitor_name}' already exists (ID: {existing_monitors[monitor_name]})")
print(f" Skipping (update not implemented)")
skipped_count += 1
else:
print(f" Adding monitor: {monitor_name}")
try:
result = api.add_monitor(**params)
print(f" ✓ Added (ID: {result.get('monitorID', 'unknown')})")
added_count += 1
except Exception as e:
print(f" ✗ Failed: {e}")
# Disconnect
api.disconnect()
# Summary
print("")
print("=" * 60)
print(f"📊 Registration Summary for {CLIENT_NAME}:")
print(f" Added: {added_count}")
print(f" Skipped (already exist): {skipped_count}")
print(f" Total monitors: {len(MONITORS)}")
print("=" * 60)
if added_count > 0:
print(f"✅ Successfully registered {added_count} new monitor(s)")
except Exception as e:
print(f"❌ ERROR: Failed to register monitors: {e}")
print("")
print("Troubleshooting:")
print(f" 1. Verify Kuma is accessible: {KUMA_URL}")
print(" 2. Check username/password are correct")
print(" 3. Ensure uptime-kuma-api Python package is installed")
print(" 4. Check network connectivity from deployment machine")
sys.exit(1)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,6 @@
---
# NAT Gateway Handlers
- name: Save iptables rules
shell: |
iptables-save > /etc/iptables/rules.v4

View file

@ -0,0 +1,66 @@
---
# NAT Gateway Configuration
# Enables internet access for private network clients via edge server
- name: Enable IP forwarding
sysctl:
name: net.ipv4.ip_forward
value: '1'
state: present
reload: yes
tags: [nat, gateway]
- name: Install iptables-persistent
apt:
name: iptables-persistent
state: present
update_cache: yes
tags: [nat, gateway]
- name: Configure NAT (masquerading) for private network
iptables:
table: nat
chain: POSTROUTING
out_interface: eth0
source: 10.0.0.0/16
jump: MASQUERADE
comment: NAT for private network clients
notify: Save iptables rules
tags: [nat, gateway]
- name: Allow forwarding from private network (in DOCKER-USER chain)
iptables:
chain: DOCKER-USER
in_interface: enp7s0
out_interface: eth0
source: 10.0.0.0/16
jump: ACCEPT
comment: Allow forwarding from private network
notify: Save iptables rules
tags: [nat, gateway]
- name: Allow established connections back to private network (in DOCKER-USER chain)
iptables:
chain: DOCKER-USER
in_interface: eth0
out_interface: enp7s0
ctstate: ESTABLISHED,RELATED
jump: ACCEPT
comment: Allow established connections to private network
notify: Save iptables rules
tags: [nat, gateway]
- name: Return from DOCKER-USER chain for other traffic
iptables:
chain: DOCKER-USER
jump: RETURN
comment: Let Docker handle other traffic
notify: Save iptables rules
tags: [nat, gateway]
- name: Save iptables rules
shell: |
iptables-save > /etc/iptables/rules.v4
args:
creates: /etc/iptables/rules.v4
tags: [nat, gateway]