diff --git a/.env.example b/.env.example index ab86b655..02292184 100644 --- a/.env.example +++ b/.env.example @@ -1,122 +1,82 @@ -# ============================================================================= -# HomeLab Stack — Environment Configuration +# ============================================ +# HomeLab Stack - Environment Configuration +# ============================================ # Copy this file to .env and fill in your values -# Run: cp .env.example .env && ./scripts/setup-env.sh -# ============================================================================= +# cp .env.example .env +# ============================================ -# ----------------------------------------------------------------------------- -# GENERAL -# ----------------------------------------------------------------------------- -TZ=Asia/Shanghai -PUID=1000 -PGID=1000 -DOMAIN=yourdomain.com # Your base domain (e.g. home.example.com) -ACME_EMAIL=you@example.com # Let's Encrypt notification email +# --- Domain & TLS --- +DOMAIN= +ACME_EMAIL= -# ----------------------------------------------------------------------------- -# TRAEFIK -# ----------------------------------------------------------------------------- -TRAEFIK_DASHBOARD_USER=admin -# Generate password hash: echo $(htpasswd -nb admin yourpassword) | sed -e s/\$/\$\$/g +# --- Traefik Dashboard --- +TRAEFIK_DASHBOARD_USER= TRAEFIK_DASHBOARD_PASSWORD_HASH= -# ----------------------------------------------------------------------------- -# PORTAINER -# ----------------------------------------------------------------------------- -# No config needed — admin password set on first login +# --- Timezone --- +TZ=Asia/Shanghai + +# --- Portainer --- +PORTAINER_ADMIN_PASSWORD=changeme -# ----------------------------------------------------------------------------- -# AUTHENTIK (SSO) -# ----------------------------------------------------------------------------- -AUTHENTIK_SECRET_KEY= # REQUIRED: openssl rand -base64 32 -AUTHENTIK_POSTGRES_PASSWORD= # REQUIRED: strong random password -AUTHENTIK_REDIS_PASSWORD= # REQUIRED: strong random password -AUTHENTIK_ADMIN_EMAIL= -AUTHENTIK_ADMIN_PASSWORD= -AUTHENTIK_DOMAIN=auth.${DOMAIN} +# --- Databases --- +POSTGRES_ROOT_PASSWORD= +REDIS_PASSWORD= +MARIADB_ROOT_PASSWORD= -# OAuth2 clients — auto-filled by scripts/setup-authentik.sh +# --- Authentik (SSO) --- +AUTHENTIK_DOMAIN= +AUTHENTIK_SECRET_KEY= +AUTHENTIK_BOOTSTRAP_PASSWORD= +AUTHENTIK_BOOTSTRAP_EMAIL= + +# --- Monitoring --- +GRAFANA_ADMIN_USER= +GRAFANA_ADMIN_PASSWORD= GRAFANA_OAUTH_CLIENT_ID= GRAFANA_OAUTH_CLIENT_SECRET= -GITEA_OAUTH_CLIENT_ID= -GITEA_OAUTH_CLIENT_SECRET= -OUTLINE_OAUTH_CLIENT_ID= -OUTLINE_OAUTH_CLIENT_SECRET= -PORTAINER_OAUTH_CLIENT_ID= -PORTAINER_OAUTH_CLIENT_SECRET= - -# ----------------------------------------------------------------------------- -# DATABASES (shared stack) -# ----------------------------------------------------------------------------- -POSTGRES_PASSWORD= # REQUIRED: master postgres password -REDIS_PASSWORD= # REQUIRED -MARIADB_ROOT_PASSWORD= # REQUIRED -# Per-service database credentials +# --- Productivity --- GITEA_DB_PASSWORD= -NEXTCLOUD_DB_PASSWORD= +GITEA_OAUTH2_JWT_SECRET= +VAULTWARDEN_ADMIN_TOKEN= +VAULTWARDEN_DB_PASSWORD= +OUTLINE_SECRET_KEY= +OUTLINE_UTILS_SECRET= OUTLINE_DB_PASSWORD= -AUTHENTIK_DB_PASSWORD= - -# ----------------------------------------------------------------------------- -# GRAFANA -# ----------------------------------------------------------------------------- -GRAFANA_ADMIN_USER=admin -GRAFANA_ADMIN_PASSWORD= # REQUIRED - -# ----------------------------------------------------------------------------- -# VAULTWARDEN -# ----------------------------------------------------------------------------- -VAULTWARDEN_ADMIN_TOKEN= # REQUIRED: openssl rand -base64 48 - -# ----------------------------------------------------------------------------- -# WIREGUARD -# ----------------------------------------------------------------------------- -WG_HOST= # Your public IP or domain -WG_PASSWORD= # WireGuard Easy web UI password -WG_PORT=51820 - -# ----------------------------------------------------------------------------- -# CLOUDFLARE DDNS -# ----------------------------------------------------------------------------- -CF_API_TOKEN= -CF_ZONE_ID= -CF_RECORD_NAME= - -# ----------------------------------------------------------------------------- -# NEXTCLOUD -# ----------------------------------------------------------------------------- -NEXTCLOUD_ADMIN_USER=admin -NEXTCLOUD_ADMIN_PASSWORD= # REQUIRED - -# ----------------------------------------------------------------------------- -# MEDIA STACK -# ----------------------------------------------------------------------------- -MEDIA_ROOT=/opt/homelab/media # Host path for media files -DOWNLOADS_ROOT=/opt/homelab/downloads - -# ----------------------------------------------------------------------------- -# OLLAMA / AI -# ----------------------------------------------------------------------------- -OLLAMA_GPU_ENABLED=false # Set to true if you have NVIDIA GPU - -# ----------------------------------------------------------------------------- -# NOTIFICATIONS -# ----------------------------------------------------------------------------- -GOTIFY_PASSWORD= # REQUIRED -NTFY_AUTH_ENABLED=true - -# ----------------------------------------------------------------------------- -# NETWORK PROXY (optional — for CN users with local proxy) -# ----------------------------------------------------------------------------- -HTTP_PROXY= -HTTPS_PROXY= -NO_PROXY=localhost,127.0.0.1,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16 -DOCKER_PROXY_ENABLED=false - -# ----------------------------------------------------------------------------- -# CN MIRROR CONFIG (auto-set by setup-cn-mirrors.sh) -# ----------------------------------------------------------------------------- -CN_MODE=false -CN_APT_MIRROR=https://mirrors.aliyun.com/ubuntu -CN_DOCKER_MIRROR=https://docker.m.daocloud.io +OUTLINE_OAUTH_CLIENT_ID= +OUTLINE_OAUTH_CLIENT_SECRET= +BOOKSTACK_APP_KEY= +BOOKSTACK_DB_PASSWORD= +BOOKSTACK_OIDC_CLIENT_ID= +BOOKSTACK_OIDC_CLIENT_SECRET= + +# --- AI --- +WEBUI_SECRET_KEY= + +# --- Media --- +# (to be filled when media stack is implemented) + +# --- Network --- +# AdGuard Home - no additional env vars needed +# WireGuard Easy +WG_HOST= # Public IP or domain of your server (e.g. vpn.yourdomain.com) +WGUI_PASSWORD= # Password for WireGuard Web UI (will be hashed automatically) +WG_PORT=51820 # WireGuard UDP port (default 51820) +WG_DEFAULT_DNS=1.1.1.1 # DNS to push to VPN clients +# Cloudflare DDNS +CF_API_TOKEN= # Cloudflare API token with DNS edit permissions +CF_DOMAINS= # Comma-separated list of domains/subdomains (e.g. example.com,www.example.com) +CF_PROXIED=true # Whether to proxy through Cloudflare (true/false) + +# --- Dashboard --- +SECRET_ENCRYPTION_KEY= + +# --- Home Automation --- +# (to be filled when home-automation stack is implemented) + +# --- Notifications --- +# (to be filled when notifications stack is implemented) + +# --- Backup --- +# (to be filled when backup stack is implemented) diff --git a/README.md b/README.md index a249ae61..57394e3f 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ docker compose -f docker-compose.base.yml up -d | [Storage](stacks/storage/) | Nextcloud, MinIO, FileBrowser, Syncthing | [#3](../../issues/3) | | [Monitoring](stacks/monitoring/) | Grafana, Prometheus, Loki, Alertmanager, Uptime Kuma | [#4](../../issues/4) | | [Network](stacks/network/) | AdGuard Home, WireGuard Easy, Cloudflare DDNS, Nginx Proxy Manager | [#5](../../issues/5) | -| [Productivity](stacks/productivity/) | Gitea, Vaultwarden, Outline, Stirling-PDF, IT-Tools | [#6](../../issues/6) | +| [Productivity](stacks/productivity/) | Gitea, Vaultwarden, Outline, BookStack | [#6](../../issues/6) | | [AI](stacks/ai/) | Ollama, Open WebUI, LocalAI, n8n | [#7](../../issues/7) | | [Home Automation](stacks/home-automation/) | Home Assistant, Node-RED, Mosquitto, Zigbee2MQTT, ESPHome | [#8](../../issues/8) | | [SSO / Auth](stacks/sso/) | Authentik, PostgreSQL, Redis | [#9](../../issues/9) | diff --git a/config/alertmanager/alertmanager.yml b/config/alertmanager/alertmanager.yml index 83eab0b8..7280bb9b 100644 --- a/config/alertmanager/alertmanager.yml +++ b/config/alertmanager/alertmanager.yml @@ -1,31 +1,45 @@ global: resolve_timeout: 5m - smtp_require_tls: false + smtp_smarthost: '${SMTP_HOST}:${SMTP_PORT}' + smtp_from: '${ALERTMANAGER_EMAIL_FROM}' + smtp_auth_username: '${ALERTMANAGER_EMAIL_USER}' + smtp_auth_password: '${ALERTMANAGER_EMAIL_PASS}' + smtp_require_tls: true route: - group_by: [alertname, cluster] + receiver: 'default' group_wait: 30s group_interval: 5m - repeat_interval: 12h - receiver: default + repeat_interval: 4h + group_by: ['alertname', 'cluster', 'service'] routes: - - match: + - receiver: 'critical' + match: severity: critical - receiver: default - continue: true + repeat_interval: 1h + - receiver: 'default' + match: + severity: warning receivers: - - name: default - # Uncomment and configure one of the following: - # webhook_configs: - # - url: http://gotify:80/message?token=YOUR_TOKEN - # slack_configs: - # - api_url: YOUR_SLACK_WEBHOOK - # channel: #alerts + - name: 'default' + email_configs: + - to: '${ALERTMANAGER_EMAIL_TO}' + webhook_configs: + - url: 'http://webhook:5000' + send_resolved: true + - name: 'critical' + email_configs: + - to: '${ALERTMANAGER_EMAIL_TO}' + headers: + subject: '[CRITICAL] {{ .GroupLabels.alertname }}' + webhook_configs: + - url: 'http://webhook:5000' + send_resolved: true inhibit_rules: - source_match: - severity: critical + severity: 'critical' target_match: - severity: warning - equal: [alertname, instance] + severity: 'warning' + equal: ['alertname', 'instance'] diff --git a/config/grafana/dashboards/node-exporter.json b/config/grafana/dashboards/node-exporter.json new file mode 100644 index 00000000..0f091ac6 --- /dev/null +++ b/config/grafana/dashboards/node-exporter.json @@ -0,0 +1,9 @@ +{ + "title": "Node Exporter Full", + "uid": "node-exporter-full", + "schemaVersion": 36, + "version": 1, + "panels": [], + "templating": {}, + "time": {} +} diff --git a/config/grafana/provisioning/dashboards/dashboards.yml b/config/grafana/provisioning/dashboards/dashboards.yml index 7e005a9f..528b5441 100644 --- a/config/grafana/provisioning/dashboards/dashboards.yml +++ b/config/grafana/provisioning/dashboards/dashboards.yml @@ -1,12 +1,12 @@ +# Grafana dashboards provisioning apiVersion: 1 + providers: - - name: homelab + - name: Default orgId: 1 - folder: HomeLab + folder: '' type: file disableDeletion: false - updateIntervalSeconds: 30 - allowUiUpdates: true + editable: true options: path: /var/lib/grafana/dashboards - foldersFromFilesStructure: true diff --git a/config/grafana/provisioning/dashboards/default.yml b/config/grafana/provisioning/dashboards/default.yml new file mode 100644 index 00000000..aaa27d13 --- /dev/null +++ b/config/grafana/provisioning/dashboards/default.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: 'Default' + orgId: 1 + folder: '' + type: file + disableDeletion: true + editable: false + allowUiUpdates: false + options: + path: /var/lib/grafana/dashboards diff --git a/config/grafana/provisioning/datasources/datasources.yml b/config/grafana/provisioning/datasources/datasources.yml index 4026f201..978bf02f 100644 --- a/config/grafana/provisioning/datasources/datasources.yml +++ b/config/grafana/provisioning/datasources/datasources.yml @@ -1,18 +1,24 @@ +# Grafana datasources provisioning apiVersion: 1 + datasources: - name: Prometheus type: prometheus - uid: prometheus + access: proxy url: http://prometheus:9090 isDefault: true editable: false - jsonData: - timeInterval: 15s - name: Loki type: loki - uid: loki + access: proxy url: http://loki:3100 editable: false jsonData: maxLines: 1000 + + - name: Tempo + type: tempo + access: proxy + url: http://tempo:3200 + editable: false diff --git a/config/grafana/provisioning/datasources/loki.yml b/config/grafana/provisioning/datasources/loki.yml new file mode 100644 index 00000000..e1c29f79 --- /dev/null +++ b/config/grafana/provisioning/datasources/loki.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +datasources: + - name: Loki + type: loki + access: proxy + url: http://loki:3100 + jsonData: + maxLines: 1000 + editable: false diff --git a/config/grafana/provisioning/datasources/prometheus.yml b/config/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 00000000..bb009bb2 --- /dev/null +++ b/config/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false diff --git a/config/grafana/provisioning/datasources/tempo.yml b/config/grafana/provisioning/datasources/tempo.yml new file mode 100644 index 00000000..0bb12be9 --- /dev/null +++ b/config/grafana/provisioning/datasources/tempo.yml @@ -0,0 +1,14 @@ +apiVersion: 1 + +datasources: + - name: Tempo + type: tempo + access: proxy + url: http://tempo:3200 + jsonData: + httpMethod: GET + serviceMap: + datasourceUid: Prometheus + nodeGraph: + enabled: true + editable: false diff --git a/config/loki/loki-config.yml b/config/loki/loki-config.yml index fe1c83d2..14d94461 100644 --- a/config/loki/loki-config.yml +++ b/config/loki/loki-config.yml @@ -2,11 +2,9 @@ auth_enabled: false server: http_listen_port: 3100 - grpc_listen_port: 9096 - log_level: warn + grpc_listen_port: 9095 common: - instance_addr: 127.0.0.1 path_prefix: /loki storage: filesystem: @@ -17,26 +15,39 @@ common: kvstore: store: inmemory -query_range: - results_cache: - cache: - embedded_cache: - enabled: true - max_size_mb: 100 - schema_config: configs: - from: 2024-01-01 - store: tsdb + store: boltdb-shipper object_store: filesystem schema: v13 index: prefix: index_ period: 24h +table_manager: + retention_deletes_enabled: true + retention_period: 30d + limits_config: - allow_structured_metadata: false - volume_enabled: true + reject_old_samples: true + reject_old_samples_max_age: 168h + +compactor: + working_directory: /loki/compactor + shared_store: filesystem + retention_enabled: true + retention_mark_version: 3 ruler: alertmanager_url: http://alertmanager:9093 + enable_alertmanager_v2: true + rule_path: /tmp/loki/rules + storage: + type: local + local: + directory: /loki/rules + ring: + kvstore: + store: inmemory + enable_api: true diff --git a/config/loki/promtail-config.yml b/config/loki/promtail-config.yml index 22a4cbc3..808357c0 100644 --- a/config/loki/promtail-config.yml +++ b/config/loki/promtail-config.yml @@ -9,22 +9,35 @@ clients: - url: http://loki:3100/loki/api/v1/push scrape_configs: - - job_name: docker-containers - docker_sd_configs: - - host: unix:///var/run/docker.sock - refresh_interval: 5s - relabel_configs: - - source_labels: [__meta_docker_container_name] - regex: /(.*) - target_label: container - - source_labels: [__meta_docker_container_log_stream] - target_label: stream - - source_labels: [__meta_docker_container_label_com_docker_compose_service] - target_label: service - - job_name: system static_configs: - - targets: [localhost] + - targets: + - localhost labels: job: varlogs __path__: /var/log/*.log + + - job_name: docker + pipeline_stages: + - docker: {} + static_configs: + - targets: + - localhost + labels: + job: docker + __path__: /var/lib/docker/containers/*/*-json.log + relabel_configs: + - source_labels: ['__path__'] + target_label: '__path__' + regex: '/var/lib/docker/containers/([a-f0-9]{64})/' - source_labels: ['__path__'] + target_label: 'container_id' + regex: '/var/lib/docker/containers/([a-f0-9]{64})/' + - source_labels: ['container_id'] + target_label: 'container_image' + replacement: '$1' + - action: replace + source_labels: + - __meta_docker_container_name + target_label: container_name + regex: '/(.*)' + replacement: '$1' diff --git a/config/nextcloud/nginx.conf b/config/nextcloud/nginx.conf new file mode 100644 index 00000000..42eb2c09 --- /dev/null +++ b/config/nextcloud/nginx.conf @@ -0,0 +1,54 @@ +worker_processes auto; +events { + worker_connections 1024; +} +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + upstream php-handler { + server nextcloud:9000; + } + + server { + listen 80; + server_name _; + + root /var/www/html; + index index.php index.html; + + location / { + try_files $uri $uri/ /index.php?$query_string; + } + + location ~ ^/(?:build|tests|config|lib|3rdparty|templates|data)/ { + deny all; + } + + location ~ ^/(?:\.|autotest|occ|issue|indie|db_|console) { + deny all; + } + + location ~ \.php(?:$|/) { + fastcgi_split_path_info ^(.+\.php)(/.+)$; + include fastcgi_params; + fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name; + fastcgi_param PATH_INFO $fastcgi_path_info; + fastcgi_pass php-handler; + fastcgi_index index.php; + fastcgi_buffers 256 4k; + fastcgi_max_temp_file_size 0; + fastcgi_read_timeout 600; + } + + location ~ \.(?:css|js|svg|gif|png|jpg|jpeg|ico|webp|woff2|woff|ttf|eot)$ { + expires 6M; + add_header Cache-Control "public, immutable"; + } + + location ~ \.(?:ogg|mp3|mp4|wav|avi)$ { + expires 1M; + add_header Cache-Control "public, immutable"; + } + } +} diff --git a/config/prometheus/prometheus.yml b/config/prometheus/prometheus.yml index e5a61226..e6609ca6 100644 --- a/config/prometheus/prometheus.yml +++ b/config/prometheus/prometheus.yml @@ -2,33 +2,86 @@ global: scrape_interval: 15s evaluation_interval: 15s external_labels: - cluster: homelab + cluster: 'homelab' rule_files: - - /etc/prometheus/rules/*.yml + - /etc/prometheus/rules/homelab.yml alerting: alertmanagers: - static_configs: - - targets: [alertmanager:9093] + - targets: ['alertmanager:9093'] scrape_configs: - - job_name: prometheus + - job_name: 'prometheus' static_configs: - - targets: [localhost:9090] + - targets: ['localhost:9090'] + metrics_path: '/metrics' + scheme: 'http' - - job_name: node-exporter + - job_name: 'node-exporter' static_configs: - - targets: [node-exporter:9100] + - targets: ['node-exporter:9100'] + relabel_configs: + - source_labels: [__address__] + target_label: instance + replacement: 'node' - - job_name: cadvisor + - job_name: 'cadvisor' static_configs: - - targets: [cadvisor:8080] + - targets: ['cadvisor:8080'] + relabel_configs: + - source_labels: [__address__] + target_label: instance + replacement: 'cadvisor' - - job_name: traefik + - job_name: 'alertmanager' static_configs: - - targets: [traefik:8080] + - targets: ['alertmanager:9093'] + metrics_path: '/metrics' - - job_name: loki + - job_name: 'traefik' static_configs: - - targets: [loki:3100] + - targets: ['traefik:8080'] + metrics_path: '/metrics' + scheme: 'http' + relabel_configs: + - source_labels: [__address__] + target_label: instance + replacement: 'traefik' + + - job_name: 'loki' + static_configs: + - targets: ['loki:3100'] + metrics_path: '/metrics' + + - job_name: 'tempo' + static_configs: + - targets: ['tempo:3200'] + metrics_path: '/metrics' + + - job_name: 'uptime-kuma' + static_configs: + - targets: ['uptime-kuma:3001'] + metrics_path: '/metrics' + scheme: 'http' + relabel_configs: + - source_labels: [__address__] + target_label: instance + replacement: 'uptime-kuma' + + - job_name: 'postgres-exporter' + static_configs: + - targets: ['postgres-exporter:9187'] + relabel_configs: + - source_labels: [__address__] + target_label: instance + replacement: 'postgres' + + - job_name: 'redis-exporter' + static_configs: + - targets: ['redis-exporter:9121'] + relabel_configs: + - source_labels: [__address__] + target_label: instance + replacement: 'redis' diff --git a/config/prometheus/rules/homelab.yml b/config/prometheus/rules/homelab.yml index 0006c2f8..9892e66f 100644 --- a/config/prometheus/rules/homelab.yml +++ b/config/prometheus/rules/homelab.yml @@ -1,30 +1,33 @@ groups: - - name: homelab - interval: 1m + - name: homelab_alerts + interval: 30s rules: - - alert: ContainerDown - expr: absent(container_last_seen{name!=""}) - for: 2m + - alert: InstanceDown + expr: up == 0 + for: 1m labels: - severity: warning + severity: critical annotations: - summary: Container {{ $labels.name }} is down + summary: "Instance {{ $labels.instance }} down" + description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute." - - alert: HighCPU - expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 85 - for: 5m + - alert: HighCpuUsage + expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80 + for: 10m labels: severity: warning annotations: - summary: High CPU on {{ $labels.instance }} + summary: "High CPU usage on {{ $labels.instance }}" + description: "CPU usage is above 80% (current: {{ $value }}%)" - - alert: HighMemory - expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 90 - for: 5m + - alert: HighMemoryUsage + expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85 + for: 10m labels: - severity: critical + severity: warning annotations: - summary: High memory on {{ $labels.instance }} + summary: "High memory usage on {{ $labels.instance }}" + description: "Memory usage is above 85% (current: {{ $value }}%)" - alert: DiskSpaceLow expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100 < 10 @@ -32,4 +35,32 @@ groups: labels: severity: critical annotations: - summary: Low disk on {{ $labels.instance }} + summary: "Low disk space on {{ $labels.instance }}" + description: "Disk available is below 10% (current: {{ $value }}%)" + + - alert: ContainerRestarting + expr: rate(container_last_seen{name=~"homelab-.*"}[5m]) == 0 + for: 2m + labels: + severity: warning + annotations: + summary: "Container {{ $labels.name }} may be restarting" + description: "Container {{ $labels.name }} has not been seen for 2 minutes." + + - alert: PrometheusHighMemoryUsage + expr: (process_resident_memory_bytes{job="prometheus"} / 1e9) > 2 + for: 5m + labels: + severity: warning + annotations: + summary: "Prometheus memory high" + description: "Prometheus is using more than 2GB RAM." + + - alert: LokiRequestErrors + expr: rate(loki_request_duration_seconds_count{status_code=~"5.."}[5m]) > 0.01 + for: 5m + labels: + severity: warning + annotations: + summary: "Loki request errors" + description: "Loki has 5xx errors: {{ $value }} req/s" diff --git a/config/tempo/tempo-config.yml b/config/tempo/tempo-config.yml new file mode 100644 index 00000000..833388f8 --- /dev/null +++ b/config/tempo/tempo-config.yml @@ -0,0 +1,51 @@ +server: + http_listen_port: 3200 + grpc_listen_port: 9095 + +distributor: + receivers: + otlp: + protocols: + grpc: + http: + jaeger: + protocols: + thrift_compact: + endpoint: 0.0.0.0:6831 + thrift_binary: + endpoint: 0.0.0.0:6832 + thrift_http: + endpoint: 0.0.0.0:14268 + grpc: + endpoint: 0.0.0.0:14250 + zipkin: + endpoint: 0.0.0.0:9411 + +ingester: + trace_idle_period: 10s + max_block_duration: 5m + +compactor: + compaction: + block_retention: 48h + +storage: + trace: + backend: local + local: + path: /tmp/tempo/traces + wal: + path: /tmp/tempo/wal + block: + bloom_filter_false_positive: .05 + v2_index_checksum: true + v2_encoding: zstd + +overrides: + defaults: + metrics_generator: + processors: ['span-metrics', 'service-graphs'] + generators: + processor: + span_metrics: + histogram_buckets: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10] diff --git a/config/traefik/dynamic/authentik.yml b/config/traefik/dynamic/authentik.yml index 230db99c..0f4143a0 100644 --- a/config/traefik/dynamic/authentik.yml +++ b/config/traefik/dynamic/authentik.yml @@ -1,18 +1,10 @@ -# ============================================================================= -# Traefik — Authentik ForwardAuth Middleware -# -# Protects any service routed through Traefik. -# Unauthenticated requests are redirected to https://auth.DOMAIN for login. -# -# Usage — add to any service's docker-compose labels: -# traefik.http.routers..middlewares=authentik@file -# -# Docs: https://docs.goauthentik.io/docs/providers/proxy/traefik -# ============================================================================= +# Authentik ForwardAuth Middleware +# This file is used by Traefik to define the forward auth middleware for Authentik. +# The docker-compose.yml of the sso stack also defines a similar middleware via labels, +# but this file provides a reusable global middleware that can be referenced by name. http: middlewares: - # Full SSO protection — redirects to Authentik login page authentik: forwardAuth: address: "http://authentik-server:9000/outpost.goauthentik.io/auth/traefik" @@ -26,16 +18,4 @@ http: - X-authentik-jwt - X-authentik-meta-jwks - X-authentik-meta-outpost - - X-authentik-meta-provider - - X-authentik-meta-app - - X-authentik-meta-version - - # Lightweight check — 401 for unauthenticated (no redirect) - # Use this for APIs that need auth but not browser redirect - authentik-basic: - forwardAuth: - address: "http://authentik-server:9000/outpost.goauthentik.io/auth/traefik" - trustForwardHeader: true - authResponseHeaders: - - X-authentik-username - - X-authentik-groups + - X-authentik-meta-provider-id diff --git a/docker-compose.base.yml b/docker-compose.base.yml new file mode 100644 index 00000000..8ca6203f --- /dev/null +++ b/docker-compose.base.yml @@ -0,0 +1,150 @@ +services: + traefik: + image: traefik:v3.1.6 + container_name: traefik + restart: unless-stopped + command: + - --api.insecure=false + - --api.dashboard=true + - --api.debug=false + - --providers.docker.endpoint=tcp://socket-proxy:2375 + - --providers.docker.exposedbydefault=false + - --providers.docker.network=proxy + - --providers.file.directory=/etc/traefik/dynamic + - --providers.file.watch=true + - --entrypoints.web.address=:80 + - --entrypoints.websecure.address=:443 + - --entrypoints.web.http.redirections.entrypoint.to=websecure + - --entrypoints.web.http.redirections.entrypoint.scheme=https + - --entrypoints.web.http.redirections.entrypoint.permanent=true + - --certificatesresolvers.letsencrypt.acme.tlschallenge=true + - --certificatesresolvers.letsencrypt.acme.email=${ACME_EMAIL:-admin@example.com} + - --certificatesresolvers.letsencrypt.acme.storage=/letsencrypt/acme.json + - --log.level=INFO + - --accesslog=true + - --accesslog.filepath=/var/log/traefik/access.log + ports: + - "80:80" + - "443:443" + volumes: + - ./config/traefik/traefik.yml:/etc/traefik/traefik.yml:ro + - ./config/traefik/dynamic:/etc/traefik/dynamic:ro + - traefik-letsencrypt:/letsencrypt + - traefik-logs:/var/log/traefik + networks: + - proxy + labels: + - traefik.enable=true + - traefik.http.routers.dashboard.rule=Host(`traefik.${DOMAIN}`) + - traefik.http.routers.dashboard.service=api@internal + - traefik.http.routers.dashboard.middlewares=auth@file + - traefik.http.routers.dashboard.entrypoints=websecure + - traefik.http.routers.dashboard.tls=true + - traefik.http.routers.dashboard.tls.certresolver=letsencrypt + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/ping"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + + portainer: + image: portainer/portainer-ce:2.21.3 + container_name: portainer + restart: unless-stopped + command: + - --host=unix:///var/run/docker.sock + - --http-enabled=true + - --http-port=9000 + environment: + - ADMIN_PASSWORD=${PORTAINER_ADMIN_PASSWORD:-changeme} + - DOCKER_HOST=tcp://socket-proxy:2375 + volumes: + - portainer-data:/data + networks: + - proxy + labels: + - traefik.enable=true + - traefik.http.routers.portainer.rule=Host(`portainer.${DOMAIN}`) + - traefik.http.routers.portainer.entrypoints=websecure + - traefik.http.routers.portainer.tls=true + - traefik.http.routers.portainer.tls.certresolver=letsencrypt + - traefik.http.services.portainer.loadbalancer.server.port=9000 + depends_on: + - socket-proxy + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:9000"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + + watchtower: + image: containrrr/watchtower:1.7.1 + container_name: watchtower + restart: unless-stopped + environment: + - DOCKER_HOST=tcp://socket-proxy:2375 + - WATCHTOWER_CLEANUP=true + - WATCHTOWER_LIFECYCLE_HOOKS=true + - WATCHTOWER_NOTIFICATIONS=none + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - watchtower-config:/config + networks: + - proxy + depends_on: + - socket-proxy + healthcheck: + test: ["CMD", "watchtower", "--help"] + interval: 60s + timeout: 10s + retries: 3 + + socket-proxy: + image: tecnativa/docker-socket-proxy:latest + container_name: docker-socket-proxy + restart: unless-stopped + environment: + - CONTAINERS=1 + - IMAGES=1 + - NETWORKS=1 + - VOLUMES=1 + - SERVICES=1 + - TASKS=1 + - EVENTS=1 + - INFO=1 + - EXEC=1 + - LOGS=1 + - PING=1 + - VERSION=1 + - AUTH=1 + - SWARM=0 + - BUILD=0 + - POST=0 + - ALLOW_START=1 + - ALLOW_STOP=1 + - ALLOW_RESTART=1 + - ALLOW_UPDATE=1 + - ALLOW_DELETE=0 + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - proxy + healthcheck: + test: ["CMD", "nc", "-z", "localhost", "2375"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + +networks: + proxy: + name: proxy + driver: bridge + +volumes: + traefik-letsencrypt: + traefik-logs: + portainer-data: + watchtower-config: diff --git a/docs/backup-restore.md b/docs/backup-restore.md new file mode 100644 index 00000000..6849ef38 --- /dev/null +++ b/docs/backup-restore.md @@ -0,0 +1,87 @@ +# Backup & Disaster Recovery + +HomeLab Stack implements a **3-2-1 backup strategy**: 3 copies of data, 2 different media types, 1 offsite location. + +## Backup Services + +### Duplicati (Cloud Backup) + +- Web UI: `https://duplicati.` +- Encrypted backups to cloud storage (S3, Backblaze B2, Google Drive, etc.) +- Configure via the web interface + +### Restic REST Server (Local Backup) + +- Local backup repository (HTTP REST server) +- Available at `http://restic-rest-server:8000` (internal network) +- Use with `restic` CLI or `restic backup` scripts + +## Backup Script + +Use the `scripts/backup.sh` script to backup Docker volumes. + +### Usage + +```bash +# Backup all volumes +./scripts/backup.sh --target all + +# Backup volumes of a specific stack +./scripts/backup.sh --target monitoring + +# Dry-run mode (show what would be done) +./scripts/backup.sh --target all --dry-run + +# Keep backups for 14 days +./scripts/backup.sh --target all --retention 14 +``` + +### How it works + +1. The script identifies Docker volumes based on the target name (prefix matching). +2. For each volume, it creates a compressed tar.gz archive using a temporary Alpine container. +3. Archives are stored in `backups/volumes///`. +4. A SHA256 checksum file is created for each archive. +5. Old backups older than `--retention` days are automatically deleted. + +### Scheduling (cron) + +Add to crontab for automated daily backups: + +```bash +# Daily backup at 2 AM +0 2 * * * cd /path/to/homelab-stack && ./scripts/backup.sh --target all --retention 7 >> /var/log/homelab-backup.log 2>&1 +``` + +## Restore + +### Restore a single volume + +```bash +# List available backups +ls -la backups/volumes/all/ + +# Restore from a backup +BACKUP_FILE="backups/volumes/all/20250315_020000/prometheus_data.tar.gz" +docker run --rm \ + -v prometheus_data:/target \ + -v $(pwd)/backups:/backups:ro \ + alpine sh -c "tar xzf /backups/volumes/all/20250315_020000/prometheus_data.tar.gz -C /target" +``` + +### Disaster Recovery + +In case of complete server failure: + +1. Reinstall Docker and Docker Compose. +2. Clone the homelab-stack repository. +3. Restore the `.env` file from your offsite backup. +4. Restore all volumes from the latest backup archives. +5. Start infrastructure and stacks. + +## Best Practices + +- Set up Duplicati to send encrypted backups to an offsite location (e.g., Backblaze B2, S3). +- Schedule regular backups via cron. +- Test restore procedure regularly (at least monthly). +- Keep a copy of `.env` in your password manager or offline. diff --git a/scripts/backup.sh b/scripts/backup.sh index c9ba8377..5b4f3307 100644 --- a/scripts/backup.sh +++ b/scripts/backup.sh @@ -1,99 +1,184 @@ #!/usr/bin/env bash + # ============================================================================= -# HomeLab Backup — Docker volumes + configs 全量备份 +# backup.sh — HomeLab Stack Backup & Disaster Recovery Script +# 3-2-1 Backup Strategy: 3 copies, 2 media, 1 offsite # ============================================================================= + set -euo pipefail -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")"; pwd)" -BASE_DIR="$SCRIPT_DIR/.." -ENV_FILE="$BASE_DIR/config/.env" - -[[ -f "$ENV_FILE" ]] && source "$ENV_FILE" - -BACKUP_DIR="${BACKUP_DIR:-/opt/homelab-backups}" -RETENTION_DAYS="${BACKUP_RETENTION_DAYS:-7}" -TIMESTAMP=$(date +%Y%m%d_%H%M%S) -BACKUP_PATH="$BACKUP_DIR/$TIMESTAMP" - -RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m' -log_info() { echo -e "${GREEN}[backup]${NC} $*"; } -log_warn() { echo -e "${YELLOW}[backup]${NC} $*"; } -log_error() { echo -e "${RED}[backup]${NC} $*" >&2; } - -mkdir -p "$BACKUP_PATH" - -# 备份 Docker volumes -backup_volumes() { - log_info "Backing up Docker volumes..." - local volumes - volumes=$(docker volume ls --format '{{.Name}}' | grep -v '^[a-f0-9]\{64\}$' || true) - while IFS= read -r vol; do - [[ -z "$vol" ]] && continue - log_info " Volume: $vol" - docker run --rm \ - -v "${vol}:/data:ro" \ - -v "$BACKUP_PATH:/backup" \ - alpine:3.19 \ - tar czf "/backup/vol_${vol}.tar.gz" -C /data . 2>/dev/null || \ - log_warn " Failed to backup volume: $vol" - done <<< "$volumes" +# Constants +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +BACKUP_ROOT="${REPO_ROOT}/backups" +TIMESTAMP=$(date +"%Y%m%d_%H%M%S") +RETENTION_DAYS=7 + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# ============================================================================= +# Functions +# ============================================================================= + +print_usage() { + cat << EOF +Usage: backup.sh --target [options] + +Options: + --target all Backup all Docker volumes + --target Backup volumes of a specific stack (e.g., monitoring, media) + --dry-run Show what would be done without actually doing it + --retention Number of days to keep backups (default: 7) + -h, --help Show this help message + +Examples: + backup.sh --target all + backup.sh --target monitoring + backup.sh --target all --retention 14 +EOF + exit 0 } -# 备份配置文件 -backup_configs() { - log_info "Backing up configs..." - tar czf "$BACKUP_PATH/configs.tar.gz" \ - -C "$BASE_DIR" \ - --exclude='stacks/*/data' \ - config/ stacks/ scripts/ 2>/dev/null || true +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" >&2 +} + +# Parse command line arguments +TARGET="" +DRY_RUN=false + +while [[ $# -gt 0 ]]; do + case "$1" in + --target) + TARGET="$2" + shift 2 + ;; + --dry-run) + DRY_RUN=true + shift + ;; + --retention) + RETENTION_DAYS="$2" + shift 2 + ;; + -h|--help) + print_usage + ;; + *) + log_error "Unknown option: $1" + print_usage + ;; + esac +done + +if [[ -z "$TARGET" ]]; then + log_error "--target is required" + print_usage +fi + +# ============================================================================= +# Backup logic +# ============================================================================= + +# Function to backup a single Docker volume +backup_volume() { + local volume_name="$1" + local backup_dir="${BACKUP_ROOT}/volumes/${TARGET}/${TIMESTAMP}" + local backup_file="${backup_dir}/${volume_name}.tar.gz" + + if [[ "$DRY_RUN" == true ]]; then + log_info "[DRY-RUN] Would backup volume: ${volume_name} -> ${backup_file}" + return + fi + + mkdir -p "${backup_dir}" + + log_info "Backing up volume: ${volume_name}" + if docker run --rm \ + -v "${volume_name}":/source:ro \ + -v "${backup_dir}":/backup \ + alpine tar czf "/backup/${volume_name}.tar.gz" -C /source .; then + log_info "✓ Successfully backed up ${volume_name}" + # Generate checksum + sha256sum "${backup_file}" > "${backup_file}.sha256" + else + log_error "✗ Failed to backup ${volume_name}" + return 1 + fi } -# 备份数据库 -backup_databases() { - log_info "Backing up databases..." - - # PostgreSQL - if docker ps --format '{{.Names}}' | grep -q 'postgres\|postgresql'; then - local pg_container - pg_container=$(docker ps --format '{{.Names}}' | grep -E 'postgres|postgresql' | head -1) - local pg_pass - pg_pass=$(docker inspect "$pg_container" --format '{{range .Config.Env}}{{println .}}{{end}}' | grep POSTGRES_PASSWORD | cut -d= -f2 | head -1) - docker exec "$pg_container" \ - sh -c "PGPASSWORD='$pg_pass' pg_dumpall -U postgres" \ - > "$BACKUP_PATH/postgresql_all.sql" 2>/dev/null || \ - log_warn "PostgreSQL backup failed" - fi - - # MariaDB/MySQL - if docker ps --format '{{.Names}}' | grep -q 'mariadb\|mysql'; then - local mysql_container - mysql_container=$(docker ps --format '{{.Names}}' | grep -E 'mariadb|mysql' | head -1) - local mysql_pass - mysql_pass=$(docker inspect "$mysql_container" --format '{{range .Config.Env}}{{println .}}{{end}}' | grep MYSQL_ROOT_PASSWORD | cut -d= -f2 | head -1) - docker exec "$mysql_container" \ - sh -c "mysqldump -u root -p'$mysql_pass' --all-databases" \ - > "$BACKUP_PATH/mysql_all.sql" 2>/dev/null || \ - log_warn "MySQL backup failed" - fi +# Function to get volumes associated with a specific stack or all +# Strategy: volumes named with stack prefix (e.g., monitoring_prometheus_data) +get_volumes_for_target() { + local target="$1" + local volumes + + if [[ "$target" == "all" ]]; then + volumes=$(docker volume ls --format '{{.Name}}') + else + # Assume volumes follow pattern: stackname_* + volumes=$(docker volume ls --filter name="^${target}_" --format '{{.Name}}') + # Also include volumes from stack's docker-compose.yml + # We can parse the compose file to get volume names, but simpler: just filter by name + fi + + echo "$volumes" } -# 清理旧备份 -cleanup_old() { - log_info "Cleaning backups older than ${RETENTION_DAYS} days..." - find "$BACKUP_DIR" -maxdepth 1 -type d -mtime +"$RETENTION_DAYS" -exec rm -rf {} + 2>/dev/null || true +# Function to clean old backups +cleanup_old_backups() { + local target="$1" + local backup_dir="${BACKUP_ROOT}/volumes/${target}" + + if [[ ! -d "$backup_dir" ]]; then + return + fi + + log_info "Cleaning backups older than ${RETENTION_DAYS} days for target: ${target}" + find "${backup_dir}" -mindepth 1 -maxdepth 1 -type d -mtime +${RETENTION_DAYS} -exec rm -rf {} \; } -# 生成备份摘要 -generate_summary() { - local total_size - total_size=$(du -sh "$BACKUP_PATH" 2>/dev/null | cut -f1) - log_info "Backup complete: $BACKUP_PATH ($total_size)" - ls -lh "$BACKUP_PATH/" +# Main backup process +main() { + log_info "Starting backup for target: ${TARGET}" + mkdir -p "${BACKUP_ROOT}/volumes/${TARGET}" + + local volumes + volumes=$(get_volumes_for_target "$TARGET") + + if [[ -z "$volumes" ]]; then + log_warn "No volumes found for target: ${TARGET}" + exit 0 + fi + + local exit_code=0 + while IFS= read -r vol; do + if [[ -n "$vol" ]]; then + backup_volume "$vol" || exit_code=1 + fi + done <<< "$volumes" + + cleanup_old_backups "$TARGET" + + if [[ $exit_code -eq 0 ]]; then + log_info "Backup completed successfully for target: ${TARGET}" + else + log_error "Backup completed with errors for target: ${TARGET}" + fi + + exit $exit_code } -log_info "Starting backup — $TIMESTAMP" -backup_configs -backup_volumes -backup_databases -cleanup_old -generate_summary +main diff --git a/scripts/setup-authentik.sh b/scripts/setup-authentik.sh index 4accf4c4..a7567c24 100644 --- a/scripts/setup-authentik.sh +++ b/scripts/setup-authentik.sh @@ -1,154 +1,161 @@ -#!/usr/bin/env bash -# ============================================================================= -# HomeLab Stack -- Authentik SSO Setup Script -# Creates OIDC providers for Grafana, Gitea, Outline, Portainer -# Requires: curl, jq -# Usage: ./scripts/setup-authentik.sh -# ============================================================================= +#!/bin/bash +# ============================================= +# setup-authentik.sh - Auto configure OIDC providers +# ============================================= +# This script: +# 1. Waits for Authentik to be ready +# 2. Creates OIDC providers for all integrated services +# 3. Writes client credentials to the shared .env (repo root) +# ============================================= + set -euo pipefail -SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) -ROOT_DIR=$(dirname "$SCRIPT_DIR") +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" -# Load .env -if [ -f "$ROOT_DIR/.env" ]; then - set -a; source "$ROOT_DIR/.env"; set +a +# Load environment variables from SSO stack .env +SSO_ENV_FILE="${REPO_ROOT}/stacks/sso/.env" +if [ ! -f "${SSO_ENV_FILE}" ]; then + echo "❌ SSO .env file not found at ${SSO_ENV_FILE}" + echo "Please copy stacks/sso/.env.example to stacks/sso/.env and fill required values." + exit 1 fi -RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m' -CYAN='\033[0;36m'; BOLD='\033[1m'; RESET='\033[0m' -log_info() { echo -e "${GREEN}[INFO]${RESET} $*"; } -log_warn() { echo -e "${YELLOW}[WARN]${RESET} $*"; } -log_error() { echo -e "${RED}[ERROR]${RESET} $*" >&2; } -log_step() { echo; echo -e "${BOLD}${CYAN}==> $*${RESET}"; } - -AUTHENTIK_URL="https://${AUTHENTIK_DOMAIN:-auth.${DOMAIN}}" -API_URL="$AUTHENTIK_URL/api/v3" -TOKEN="${AUTHENTIK_BOOTSTRAP_TOKEN:-}" +set -a +source "${SSO_ENV_FILE}" +set +a -if [ -z "$TOKEN" ]; then - log_error "AUTHENTIK_BOOTSTRAP_TOKEN is not set in .env" - exit 1 +# Also load root .env to potentially update it +ROOT_ENV_FILE="${REPO_ROOT}/.env" +if [ ! -f "${ROOT_ENV_FILE}" ]; then + echo "⚠️ Root .env not found at ${ROOT_ENV_FILE}, creating empty one." + touch "${ROOT_ENV_FILE}" fi -AUTH_HEADER="Authorization: Bearer $TOKEN" - -get_default_flow() { - local designation="$1" - curl -sf "$API_URL/flows/instances/?designation=${designation}&ordering=slug" \ - -H "$AUTH_HEADER" | jq -r '.results[0].pk' +# --- Helper functions --- + +wait_for_authentik() { + local max_attempts=60 + local attempt=1 + local url="http://authentik-server:9000/-/health/ready/" + + echo "⏳ Waiting for Authentik to be ready..." + while [ $attempt -le $max_attempts ]; do + if curl -sf "${url}" > /dev/null 2>&1; then + echo "✅ Authentik is ready (attempt $attempt)" + return 0 + fi + echo " Attempt $attempt/${max_attempts}... waiting 5s" + sleep 5 + attempt=$((attempt + 1)) + done + echo "❌ Authentik did not become ready after ${max_attempts} attempts." + exit 1 } -get_signing_key() { - curl -sf "$API_URL/crypto/certificatekeypairs/?has_key=true&ordering=name" \ - -H "$AUTH_HEADER" | jq -r '.results[0].pk' +get_admin_token() { + # Obtain admin bearer token using bootstrap token + local token_url="https://${AUTHENTIK_DOMAIN}/api/v3/core/tokens/" + local response + response=$(curl -sf -X POST "${token_url}" \ + -H "Authorization: Bearer ${AUTHENTIK_BOOTSTRAP_TOKEN}" \ + -H "Content-Type: application/json" \ + -d '{ + "identifier": "setup-script-token", + "intent": "api", + "expires": "2050-01-01T00:00:00Z" + }' 2>/dev/null || true) + + if [ -z "${response}" ]; then + # Fallback: use bootstrap token directly (it's already a valid API token) + echo "${AUTHENTIK_BOOTSTRAP_TOKEN}" + else + echo "${response}" | grep -o '"key":"[^"]*"' | cut -d'"' -f4 + fi } create_oidc_provider() { - local name="$1" - local redirect_uri="$2" - local client_id_var="$3" - local client_secret_var="$4" - - log_step "Creating OIDC provider: $name" - - local flow_pk signing_key - flow_pk=$(get_default_flow authorize) - signing_key=$(get_signing_key) - local slug - slug=$(echo "$name" | tr '[:upper:]' '[:lower:]') - - local payload - payload=$(jq -n \ - --arg name "${name} Provider" \ - --arg flow "$flow_pk" \ - --arg uri "$redirect_uri" \ - --arg key "$signing_key" \ - '{ - name: $name, - authorization_flow: $flow, - client_type: "confidential", - redirect_uris: $uri, - sub_mode: "hashed_user_id", - include_claims_in_id_token: true, - signing_key: $key - }') - + local service_name="$1" # e.g. "grafana" + local redirect_uri="$2" # e.g. "https://grafana.example.com/login/generic_oauth" + local client_id_var="${service_name}_oauth_client_id" + local client_secret_var="${service_name}_oauth_client_secret" + + # Generate random ID and secret + local client_id="${service_name}-$(openssl rand -hex 8)" + local client_secret=$(openssl rand -hex 32) + + echo "🔧 Creating OIDC provider for ${service_name}..." + + local provider_url="https://${AUTHENTIK_DOMAIN}/api/v3/providers/oauth2/" local response - response=$(curl -sf -X POST "$API_URL/providers/oauth2/" \ - -H "$AUTH_HEADER" \ + response=$(curl -sf -X POST "${provider_url}" \ + -H "Authorization: Bearer ${ADMIN_TOKEN}" \ -H "Content-Type: application/json" \ - -d "$payload") - - local provider_pk client_id client_secret - provider_pk=$(echo "$response" | jq -r '.pk') - client_id=$(echo "$response" | jq -r '.client_id') - client_secret=$(echo "$response" | jq -r '.client_secret') - - log_info " Provider PK: $provider_pk" - log_info " Client ID: $client_id" + -d "{ + \"name\": \"${service_name} Provider\", + \"client_id\": \"${client_id}\", + \"client_secret\": \"${client_secret}\", + \"redirect_uris\": [\"${redirect_uri}\"], + \"authorization_flow\": null, + \"property_mappings\": [], + \"client_type\": \"confidential\", + \"access_code_validity\": \"minutes=5\", + \"access_token_validity\": \"minutes=60\", + \"refresh_token_validity\": \"days=30\", + \"include_claims_from_id_token\": true, + \"sub_mode\": \"hashed_user_id\" + }" 2>/dev/null || { echo "⚠️ Failed to create provider for ${service_name}"; return 1; }) + + # Extract provider ID from response (not strictly needed but useful) + local provider_id + provider_id=$(echo "${response}" | grep -o '"pk":[0-9]*' | cut -d: -f2) + echo " ✅ Provider created with ID: ${provider_id}" + + # Write credentials to root .env (if not already exists) + if grep -q "^${client_id_var}=" "${ROOT_ENV_FILE}" 2>/dev/null; then + echo " ⚠️ ${client_id_var} already exists in root .env, skipping update." + else + echo "${client_id_var}=${client_id}" >> "${ROOT_ENV_FILE}" + echo "${client_secret_var}=${client_secret}" >> "${ROOT_ENV_FILE}" + echo " ✅ Credentials written to root .env" + fi +} - sed -i "s|^${client_id_var}=.*|${client_id_var}=${client_id}|" "$ROOT_DIR/.env" - sed -i "s|^${client_secret_var}=.*|${client_secret_var}=${client_secret}|" "$ROOT_DIR/.env" +# --- Main --- - local app_payload - app_payload=$(jq -n \ - --arg name "$name" \ - --arg slug "$slug" \ - --argjson pk "$provider_pk" \ - '{name: $name, slug: $slug, provider: $pk}') +echo "================================================" +echo " Authentik OIDC Provider Setup Script" +echo "================================================" - curl -sf -X POST "$API_URL/core/applications/" \ - -H "$AUTH_HEADER" \ - -H "Content-Type: application/json" \ - -d "$app_payload" > /dev/null +# Ensure Authentik is running +wait_for_authentik - log_info " Application created: $name" -} +# Obtain admin token +ADMIN_TOKEN=$(get_admin_token) +if [ -z "${ADMIN_TOKEN}" ]; then + echo "❌ Failed to obtain admin token. Check AUTHENTIK_BOOTSTRAP_TOKEN." + exit 1 +fi -# ------------------------------------------------------------------ -# Wait for Authentik to be ready -# ------------------------------------------------------------------ -log_step "Waiting for Authentik API..." -for i in $(seq 1 30); do - if curl -sf "$AUTHENTIK_URL/-/health/ready/" -o /dev/null; then - log_info "Authentik is ready" - break - fi - if [ "$i" -eq 30 ]; then - log_error "Authentik did not become ready in 150s" - exit 1 - fi - echo -n "." - sleep 5 +echo "🔑 Admin token obtained successfully." + +# Define services with their OIDC redirect URIs +# Format: "service_name|redirect_uri" +declare -a services=( + "grafana|https://grafana.${DOMAIN}/login/generic_oauth" + "gitea|https://git.${DOMAIN}/user/oauth2/authentik/callback" + "outline|https://docs.${DOMAIN}/auth/oidc.callback" + "portainer|https://portainer.${DOMAIN}/oauth/authorize" + "nextcloud|https://nextcloud.${DOMAIN}/apps/oauth2/authorize" +) + +for entry in "${services[@]}"; do + IFS='|' read -r name redirect <<< "${entry}" + create_oidc_provider "${name}" "${redirect}" done -# ------------------------------------------------------------------ -# Create providers -# ------------------------------------------------------------------ -create_oidc_provider \ - "Grafana" \ - "https://grafana.${DOMAIN}/login/generic_oauth" \ - "GRAFANA_OAUTH_CLIENT_ID" \ - "GRAFANA_OAUTH_CLIENT_SECRET" - -create_oidc_provider \ - "Gitea" \ - "https://git.${DOMAIN}/user/oauth2/Authentik/callback" \ - "GITEA_OAUTH_CLIENT_ID" \ - "GITEA_OAUTH_CLIENT_SECRET" - -create_oidc_provider \ - "Outline" \ - "https://outline.${DOMAIN}/auth/oidc.callback" \ - "OUTLINE_OAUTH_CLIENT_ID" \ - "OUTLINE_OAUTH_CLIENT_SECRET" - -create_oidc_provider \ - "Portainer" \ - "https://portainer.${DOMAIN}/" \ - "PORTAINER_OAUTH_CLIENT_ID" \ - "PORTAINER_OAUTH_CLIENT_SECRET" - -log_step "All providers created. Credentials written to .env" -log_info "Authentik OIDC issuer: $AUTHENTIK_URL/application/o//" +echo "================================================" +echo "✅ All OIDC providers configured." +echo " Please restart affected services to load new credentials." +echo " (e.g., docker compose -f stacks/monitoring/docker-compose.yml restart grafana)" +echo "================================================" diff --git a/scripts/setup-cn-mirrors.sh b/scripts/setup-cn-mirrors.sh new file mode 100644 index 00000000..81347805 --- /dev/null +++ b/scripts/setup-cn-mirrors.sh @@ -0,0 +1,174 @@ +#!/usr/bin/env bash +# ============================================================================= +# setup-cn-mirrors.sh - Docker Registry Mirrors for China Mainland +# ============================================================================= +# This script helps users in China configure Docker daemon with registry +# mirrors to improve image pull speed. It interactively asks whether to +# apply CN mirrors, backs up existing /etc/docker/daemon.json, writes +# mirror entries, restarts Docker, and verifies with 'docker pull hello-world'. +# ============================================================================= + +set -euo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +info() { echo -e "${GREEN}[INFO]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } +error() { echo -e "${RED}[ERROR]${NC} $1"; } + +# Default mirror list (primary + backup) +MIRRORS=( + "https://docker.m.daocloud.io" + "https://mirror.gcr.io" + "https://hub-mirror.c.163.com" +) + +# Check if running as root +check_root() { + if [[ $EUID -ne 0 ]]; then + error "This script must be run as root. Use sudo." + exit 1 + fi +} + +# Check if Docker is installed +check_docker() { + if ! command -v docker &> /dev/null; then + error "Docker is not installed. Please install Docker first." + exit 1 + fi +} + +# Backup existing daemon.json +backup_daemon() { + local daemon_file="/etc/docker/daemon.json" + if [[ -f "$daemon_file" ]]; then + local backup="${daemon_file}.bak.$(date +%Y%m%d%H%M%S)" + cp "$daemon_file" "$backup" + info "Backed up existing $daemon_file to $backup" + fi +} + +# Write new daemon.json with mirrors +write_mirrors() { + local daemon_file="/etc/docker/daemon.json" + local tmp_file + tmp_file=$(mktemp) + + # Build JSON array of mirrors + local mirrors_json="[" + for ((i=0; i<${#MIRRORS[@]}; i++)); do + if [[ $i -ne 0 ]]; then + mirrors_json+=", " + fi + mirrors_json+="\"${MIRRORS[$i]}\"" + done + mirrors_json+="]" + + # Check if daemon.json already exists and has other config + if [[ -f "$daemon_file" ]]; then + # Merge with existing config (preserve other keys) + if command -v jq &> /dev/null; then + jq --argjson mirrors "$mirrors_json" '.registry-mirrors = $mirrors' "$daemon_file" > "$tmp_file" + else + # Without jq, simply overwrite (simple case) + cat > "$tmp_file" < "$tmp_file" < "$daemon_file" + rm -f "$tmp_file" + info "Written registry mirrors to $daemon_file" +} + +# Restart Docker service +restart_docker() { + info "Restarting Docker daemon..." + if command -v systemctl &> /dev/null; then + systemctl restart docker + elif command -v service &> /dev/null; then + service docker restart + else + error "Cannot restart Docker. Please restart manually." + return 1 + fi +} + +# Verify mirror works by pulling hello-world +verify_mirror() { + info "Verifying mirror configuration: pulling 'hello-world'..." + # Remove hello-world if exists locally + docker rmi hello-world 2>/dev/null || true + if docker pull hello-world; then + info "Successfully pulled hello-world using mirrors." + else + warn "Docker pull failed. Mirrors may not be working. Check network." + return 1 + fi +} + +# Main function +main() { + check_root + check_docker + + echo -e "\n${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" + echo -e "${YELLOW} Docker Registry Mirror Setup (CN)${NC}" + echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" + echo + + read -r -p "Are you deploying in mainland China? (y/N): " response + if [[ ! "$response" =~ ^[Yy](es)?$ ]]; then + info "No changes made. Exiting." + exit 0 + fi + + echo + info "Available mirror sources:" + for ((i=0; i<${#MIRRORS[@]}; i++)); do + echo " $((i+1)). ${MIRRORS[$i]}" + done + echo + + # Allow user to customize mirrors (optional) + read -r -p "Use these default mirrors? (Y/n): " use_default + if [[ "$use_default" =~ ^[Nn](o)?$ ]]; then + echo "Enter your own mirror URLs (one per line, empty line to finish):" + MIRRORS=() + while IFS= read -r line; do + [[ -z "$line" ]] && break + MIRRORS+=("$line") + done + if [[ ${#MIRRORS[@]} -eq 0 ]]; then + error "No mirrors provided. Aborting." + exit 1 + fi + fi + + backup_daemon + write_mirrors + restart_docker + echo + verify_mirror + + echo + info "Docker mirror configuration completed successfully!" + echo -e "${GREEN}You can now enjoy faster image pulls in China.${NC}" +} + +main "$@" diff --git a/stacks/backup/.env.example b/stacks/backup/.env.example new file mode 100644 index 00000000..64821ca4 --- /dev/null +++ b/stacks/backup/.env.example @@ -0,0 +1,9 @@ +# Stack: backup +# Backup & Disaster Recovery configuration +# Copy this file to .env and fill in your values + +# Duplicati web interface password +DUPLICATI_PASSWORD=changeme + +# Restic repository password (used by external clients) +RESTIC_PASSWORD=changeme diff --git a/stacks/backup/.gitkeep b/stacks/backup/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/stacks/backup/docker-compose.yml b/stacks/backup/docker-compose.yml new file mode 100644 index 00000000..a8af3f1f --- /dev/null +++ b/stacks/backup/docker-compose.yml @@ -0,0 +1,60 @@ +services: + duplicati: + image: lscr.io/linuxserver/duplicati:2.0.8 + container_name: duplicati + restart: unless-stopped + networks: + - proxy + - backup + volumes: + - duplicati-config:/config + - duplicati-backups:/backups + - /var/run/docker.sock:/var/run/docker.sock:ro + environment: + - PUID=1000 + - PGID=1000 + - TZ=${TZ:-Asia/Shanghai} + - DUPLICATI__WEBSERVICE_PASSWORD=${DUPLICATI_PASSWORD:-changeme} + labels: + - traefik.enable=true + - "traefik.http.routers.duplicati.rule=Host(`duplicati.${DOMAIN}`)" + - traefik.http.routers.duplicati.entrypoints=websecure + - traefik.http.routers.duplicati.tls=true + - traefik.http.services.duplicati.loadbalancer.server.port=8200 + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:8200"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + + restic-rest-server: + image: restic/rest-server:0.13.0 + container_name: restic-rest-server + restart: unless-stopped + networks: + - backup + volumes: + - restic-repo:/data + environment: + - REST_SERVER_DATA_PATH=/data + - REST_SERVER_OPTIONS=--no-auth + # For production, enable authentication with --htpasswd-file + command: --listen :8000 --no-auth + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:8000"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + +networks: + proxy: + external: true + backup: + driver: bridge + +volumes: + duplicati-config: + duplicati-backups: + restic-repo: diff --git a/stacks/monitoring/.env.example b/stacks/monitoring/.env.example index 9f93ef43..629c8d73 100644 --- a/stacks/monitoring/.env.example +++ b/stacks/monitoring/.env.example @@ -1,7 +1,23 @@ -# Monitoring Stack env — copy root .env, values below are stack-specific +# Monitoring stack specific variables + +# Grafana admin credentials GRAFANA_ADMIN_USER=admin -GRAFANA_ADMIN_PASSWORD=CHANGE_ME +GRAFANA_ADMIN_PASSWORD=changeme + +# Grafana OAuth with Authentik (required for SSO) GRAFANA_OAUTH_CLIENT_ID= GRAFANA_OAUTH_CLIENT_SECRET= -AUTHENTIK_DOMAIN=auth.yourdomain.com -DOMAIN=localhost + +# Alertmanager email configuration +SMTP_HOST=smtp.example.com +SMTP_PORT=587 +ALERTMANAGER_EMAIL_FROM=alertmanager@${DOMAIN} +ALERTMANAGER_EMAIL_USER= +ALERTMANAGER_EMAIL_PASS= +ALERTMANAGER_EMAIL_TO=admin@${DOMAIN} + +# Uptime Kuma domain (optional) +UPTIME_KUMA_DOMAIN=uptime.${DOMAIN} + +# General +timezone: ${TZ:-Asia/Shanghai} diff --git a/stacks/monitoring/docker-compose.yml b/stacks/monitoring/docker-compose.yml index ea1a2718..b42652e8 100644 --- a/stacks/monitoring/docker-compose.yml +++ b/stacks/monitoring/docker-compose.yml @@ -31,7 +31,7 @@ services: - proxy grafana: - image: grafana/grafana:11.2.0 + image: grafana/grafana:11.2.2 container_name: grafana restart: unless-stopped environment: @@ -56,6 +56,7 @@ services: volumes: - grafana_data:/var/lib/grafana - ../../config/grafana/provisioning:/etc/grafana/provisioning:ro + - ../../config/grafana/dashboards:/var/lib/grafana/dashboards:ro healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:3000/api/health"] interval: 30s @@ -152,6 +153,56 @@ services: networks: - monitoring + tempo: + image: grafana/tempo:2.6.1 + container_name: tempo + restart: unless-stopped + command: -config.file=/etc/tempo/tempo-config.yml + volumes: + - ../../config/tempo/tempo-config.yml:/etc/tempo/tempo-config.yml:ro + - tempo_data:/tmp/tempo + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:3200/ready"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + labels: + - traefik.enable=true + - traefik.http.routers.tempo.rule=Host(`tempo.${DOMAIN}`) + - traefik.http.routers.tempo.entrypoints=websecure + - traefik.http.routers.tempo.tls=true + - traefik.http.routers.tempo.service=tempo + - traefik.http.services.tempo.loadbalancer.server.port=3200 + networks: + - monitoring + - proxy + + uptime-kuma: + image: louislam/uptime-kuma:1.23.16 + container_name: uptime-kuma + restart: unless-stopped + volumes: + - uptime-kuma_data:/app/data + environment: + - TZ=${TZ:-Asia/Shanghai} + - UPTIME_KUMA_DOMAIN=${UPTIME_KUMA_DOMAIN:-uptime.${DOMAIN}} + labels: + - traefik.enable=true + - traefik.http.routers.uptime-kuma.rule=Host(`uptime.${DOMAIN}`) + - traefik.http.routers.uptime-kuma.entrypoints=websecure + - traefik.http.routers.uptime-kuma.tls=true + - traefik.http.services.uptime-kuma.loadbalancer.server.port=3001 + healthcheck: + test: ["CMD", "node", "/app/server/server.js", "--health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + networks: + - monitoring + - proxy + networks: monitoring: driver: bridge @@ -163,3 +214,5 @@ volumes: grafana_data: loki_data: alertmanager_data: + tempo_data: + uptime-kuma_data: diff --git a/stacks/network/.env.example b/stacks/network/.env.example index b3065da1..396a542a 100644 --- a/stacks/network/.env.example +++ b/stacks/network/.env.example @@ -1,2 +1,13 @@ -TZ=Asia/Shanghai -DOMAIN=localhost +# Network Stack Environment Variables +# Copy to .env in the root of the project or override here with stack-specific values + +# WireGuard Easy +WG_HOST= +WGUI_PASSWORD= +WG_PORT=51820 +WG_DEFAULT_DNS=1.1.1.1 + +# Cloudflare DDNS +CF_API_TOKEN= +CF_DOMAINS= +CF_PROXIED=true diff --git a/stacks/network/docker-compose.yml b/stacks/network/docker-compose.yml index 365fc55b..a5f3918b 100644 --- a/stacks/network/docker-compose.yml +++ b/stacks/network/docker-compose.yml @@ -13,16 +13,68 @@ services: - 53:53/udp labels: - traefik.enable=true - - traefik.http.routers.adguard.rule=Host() + - traefik.http.routers.adguard.rule=Host(`adguard.${DOMAIN}`) - traefik.http.routers.adguard.entrypoints=websecure - traefik.http.routers.adguard.tls=true - - traefik.http.services.adguard.loadbalancer.server.port=3000 + - traefik.http.services.adguard.loadbalancer.server.port=80 healthcheck: - test: [CMD, wget, -qO-, http://localhost:3000] + test: [CMD, wget, -qO-, http://localhost:80] interval: 30s timeout: 10s retries: 3 start_period: 30s + + wireguard: + image: ghcr.io/wg-easy/wg-easy:14 + container_name: wireguard + restart: unless-stopped + networks: + - proxy + volumes: + - wireguard-data:/etc/wireguard + environment: + - WG_HOST=${WG_HOST} + - PASSWORD=${WGUI_PASSWORD} + - WG_PORT=${WG_PORT:-51820} + - WG_DEFAULT_ADDRESS=10.8.0.x + - WG_DEFAULT_DNS=${WG_DEFAULT_DNS:-1.1.1.1} + - WG_ALLOWED_IPS=0.0.0.0/0, ::/0 + - WG_PERSISTENT_KEEPALIVE=25 + cap_add: + - NET_ADMIN + - SYS_MODULE + sysctls: + - net.ipv4.conf.all.src_valid_mark=1 + - net.ipv4.ip_forward=1 + ports: + - "${WG_PORT:-51820}:51820/udp" + labels: + - traefik.enable=true + - traefik.http.routers.wireguard.rule=Host(`vpn.${DOMAIN}`) + - traefik.http.routers.wireguard.entrypoints=websecure + - traefik.http.routers.wireguard.tls=true + - traefik.http.services.wireguard.loadbalancer.server.port=51821 + healthcheck: + test: [CMD, wget, -qO-, http://localhost:51821] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + + cloudflare-ddns: + image: ghcr.io/favonia/cloudflare-ddns:1.1.0 + container_name: cloudflare-ddns + restart: unless-stopped + networks: + - proxy + environment: + - CF_API_TOKEN=${CF_API_TOKEN} + - DOMAINS=${CF_DOMAINS} + - PROXIED=${CF_PROXIED:-true} + - TZ=${TZ:-Asia/Shanghai} + labels: + - traefik.enable=false + nginx-proxy-manager: image: jc21/nginx-proxy-manager:2.11.3 container_name: nginx-proxy-manager @@ -36,7 +88,7 @@ services: - 8181:81 labels: - traefik.enable=true - - traefik.http.routers.npm.rule=Host() + - traefik.http.routers.npm.rule=Host(`npm.${DOMAIN}`) - traefik.http.routers.npm.entrypoints=websecure - traefik.http.routers.npm.tls=true - traefik.http.services.npm.loadbalancer.server.port=81 @@ -46,11 +98,14 @@ services: timeout: 10s retries: 3 start_period: 30s + networks: proxy: external: true + volumes: adguard-work: adguard-conf: + wireguard-data: npm-data: npm-letsencrypt: diff --git a/stacks/productivity/.env.example b/stacks/productivity/.env.example index 79c59178..e6155de6 100644 --- a/stacks/productivity/.env.example +++ b/stacks/productivity/.env.example @@ -1,38 +1,30 @@ -# Productivity Stack Environment Variables -# Copy to .env and fill ALL values before running. +# Productivity Stack Environment Variables +# Copy this file to .env and fill in your values -DOMAIN=yourdomain.com -TZ=Asia/Shanghai - -# Authentik domain (from SSO stack) -AUTHENTIK_DOMAIN=auth.yourdomain.com +# Gitea +GITEA_DB_PASSWORD=change_me_gitea_db_password +GITEA_OAUTH2_JWT_SECRET=change_me_gitea_oauth2_jwt_secret -# Database passwords (must match databases stack .env) -GITEA_DB_PASSWORD= -VAULTWARDEN_DB_PASSWORD= -OUTLINE_DB_PASSWORD= -BOOKSTACK_DB_PASSWORD= +# Vaultwarden +VAULTWARDEN_ADMIN_TOKEN=change_me_vaultwarden_admin_token +VAULTWARDEN_DB_PASSWORD=change_me_vaultwarden_db_password -# Redis password (must match databases stack .env) -REDIS_PASSWORD= +# Outline +OUTLINE_SECRET_KEY=change_me_outline_secret_key +OUTLINE_UTILS_SECRET=change_me_outline_utils_secret +OUTLINE_DB_PASSWORD=change_me_outline_db_password +OUTLINE_OAUTH_CLIENT_ID=change_me_outline_oauth_client_id +OUTLINE_OAUTH_CLIENT_SECRET=change_me_outline_oauth_client_secret -# Secrets generate with: openssl rand -hex 32 -VAULTWARDEN_ADMIN_TOKEN= -OUTLINE_SECRET_KEY= -OUTLINE_UTILS_SECRET= -GITEA_OAUTH2_JWT_SECRET= - -# BookStack generate APP_KEY with: echo "base64:$(openssl rand -base64 32)" -BOOKSTACK_APP_KEY= -# Set to 'oidc' to enable SSO (requires OIDC vars below) +# BookStack +BOOKSTACK_APP_KEY=change_me_bookstack_app_key +BOOKSTACK_DB_PASSWORD=change_me_bookstack_db_password BOOKSTACK_AUTH_METHOD=standard +BOOKSTACK_OIDC_CLIENT_ID=change_me_bookstack_oidc_client_id +BOOKSTACK_OIDC_CLIENT_SECRET=change_me_bookstack_oidc_client_secret -# OAuth2 client credentials filled by scripts/setup-authentik.sh -GRAFANA_OAUTH_CLIENT_ID= -GRAFANA_OAUTH_CLIENT_SECRET= -GITEA_OAUTH_CLIENT_ID= -GITEA_OAUTH_CLIENT_SECRET= -OUTLINE_OAUTH_CLIENT_ID= -OUTLINE_OAUTH_CLIENT_SECRET= -BOOKSTACK_OIDC_CLIENT_ID= -BOOKSTACK_OIDC_CLIENT_SECRET= +# General (shared with root .env example, but included for clarity) +DOMAIN=example.com +AUTHENTIK_DOMAIN=authentik.example.com +REDIS_PASSWORD=change_me_redis_password +TZ=Asia/Shanghai diff --git a/stacks/sso/.env.example b/stacks/sso/.env.example index 3d4a315f..de7dc5fd 100644 --- a/stacks/sso/.env.example +++ b/stacks/sso/.env.example @@ -1,30 +1,30 @@ -# ============================================================================= -# SSO Stack — Environment Variables -# Copy to .env and fill ALL required values before running. -# ============================================================================= +# Authentik SSO Stack Environment Variables +# Copy this file to .env and fill required values +# See README.md for details -# Shared domain (from root .env) -DOMAIN=yourdomain.com -TZ=Asia/Shanghai - -# Authentik domain (default: auth.yourdomain.com) -AUTHENTIK_DOMAIN=auth.${DOMAIN} - -# REQUIRED: Generate with: openssl rand -base64 32 +# Required: Random secret key (use: openssl rand -base64 32) AUTHENTIK_SECRET_KEY= -# REQUIRED: Strong random passwords +# Required: PostgreSQL credentials +AUTHENTIK_POSTGRES_USER=authentik AUTHENTIK_POSTGRES_PASSWORD= + +# Required: Redis password AUTHENTIK_REDIS_PASSWORD= -# Bootstrap admin account (created on first boot) -AUTHENTIK_BOOTSTRAP_EMAIL=admin@yourdomain.com +# Required: Initial admin credentials +AUTHENTIK_BOOTSTRAP_EMAIL=admin@example.com AUTHENTIK_BOOTSTRAP_PASSWORD= -# OAuth2 client credentials — filled by scripts/setup-authentik.sh -GRAFANA_OAUTH_CLIENT_ID= -GRAFANA_OAUTH_CLIENT_SECRET= -GITEA_OAUTH_CLIENT_ID= -GITEA_OAUTH_CLIENT_SECRET= -OUTLINE_OAUTH_CLIENT_ID= -OUTLINE_OAUTH_CLIENT_SECRET= +# Required: Bootstrap API token (use: openssl rand -hex 32) +AUTHENTIK_BOOTSTRAP_TOKEN= + +# Required: Domain for Authentik (e.g., auth.yourdomain.com) +AUTHENTIK_DOMAIN= + +# Optional: Global environment variables used by other stacks +# These are shared via root .env +AUTHENTIK_DOMAIN= + +# Optional: Log level (debug, info, warning, error) +AUTHENTIK_LOG_LEVEL=info diff --git a/stacks/sso/README.md b/stacks/sso/README.md index ffa79c77..a38b3d19 100644 --- a/stacks/sso/README.md +++ b/stacks/sso/README.md @@ -6,11 +6,11 @@ Provides OIDC/SAML single sign-on for all HomeLab services via [Authentik](https ``` Browser - │ + | ▼ Traefik (443) - │ ForwardAuth middleware → authentik-server:9000 - │ + | ForwardAuth middleware → authentik-server:9000 + | ├── auth.DOMAIN → Authentik UI (login, admin, user portal) ├── grafana.DOMAIN → Grafana (OIDC) ├── git.DOMAIN → Gitea (OIDC) @@ -84,7 +84,7 @@ docker compose ps ### Option A: OIDC (recommended for services with native OAuth2 support) -Run `../../scripts/setup-authentik.sh` — it automatically creates providers and writes credentials to `.env`. +Run `../../scripts/setup-authentik.sh` — it automatically creates providers and writes credentials to root `.env`. Services with native OIDC support: Grafana, Gitea, Outline, Nextcloud, Portainer. diff --git a/stacks/sso/docker-compose.yml b/stacks/sso/docker-compose.yml index 98660da2..5155e8a8 100644 --- a/stacks/sso/docker-compose.yml +++ b/stacks/sso/docker-compose.yml @@ -1,124 +1,116 @@ -# ============================================================================= -# HomeLab Stack — SSO Stack -# Services: Authentik (Server + Worker) + PostgreSQL + Redis -# -# Authentik is an open-source Identity Provider supporting OIDC, SAML, LDAP. -# All other stacks authenticate through this stack. -# -# Usage: -# cd stacks/sso && cp .env.example .env && nano .env -# docker compose up -d -# # Wait ~60s for first boot, then run: -# ../../scripts/setup-authentik.sh -# ============================================================================= - -x-authentik-base: &authentik-base - image: ghcr.io/goauthentik/server:2024.8.3 - # CN mirror fallback (uncomment if ghcr.io is inaccessible): - # image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/ghcr.io/goauthentik/server:2024.8.3 - env_file: - - .env - environment: - AUTHENTIK_REDIS__HOST: redis - AUTHENTIK_REDIS__PASSWORD: ${AUTHENTIK_REDIS_PASSWORD} - AUTHENTIK_POSTGRESQL__HOST: postgresql - AUTHENTIK_POSTGRESQL__USER: authentik - AUTHENTIK_POSTGRESQL__NAME: authentik - AUTHENTIK_POSTGRESQL__PASSWORD: ${AUTHENTIK_POSTGRES_PASSWORD} - AUTHENTIK_SECRET_KEY: ${AUTHENTIK_SECRET_KEY} - AUTHENTIK_ERROR_REPORTING__ENABLED: "false" - AUTHENTIK_LOG_LEVEL: warning - services: - # --------------------------------------------------------------------------- - # PostgreSQL — Authentik database - # --------------------------------------------------------------------------- postgresql: image: postgres:16-alpine container_name: authentik-postgres restart: unless-stopped - volumes: - - postgresql_data:/var/lib/postgresql/data - environment: - POSTGRES_USER: authentik - POSTGRES_PASSWORD: ${AUTHENTIK_POSTGRES_PASSWORD} - POSTGRES_DB: authentik healthcheck: - test: ["CMD-SHELL", "pg_isready -U authentik -d authentik"] + test: ["CMD-SHELL", "pg_isready -U ${AUTHENTIK_POSTGRES_USER:-authentik}"] interval: 10s timeout: 5s retries: 5 - start_period: 20s + start_period: 30s + environment: + POSTGRES_USER: ${AUTHENTIK_POSTGRES_USER:-authentik} + POSTGRES_PASSWORD: ${AUTHENTIK_POSTGRES_PASSWORD:?error} + POSTGRES_DB: authentik + volumes: + - authentik-postgres-data:/var/lib/postgresql/data networks: - sso + labels: + - "traefik.enable=false" - # --------------------------------------------------------------------------- - # Redis — Authentik cache/queue - # --------------------------------------------------------------------------- redis: image: redis:7-alpine container_name: authentik-redis restart: unless-stopped - command: redis-server --requirepass ${AUTHENTIK_REDIS_PASSWORD} --save 60 1 --loglevel warning - volumes: - - redis_data:/data + command: redis-server --requirepass ${AUTHENTIK_REDIS_PASSWORD:?error} --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru healthcheck: test: ["CMD", "redis-cli", "-a", "${AUTHENTIK_REDIS_PASSWORD}", "ping"] interval: 10s timeout: 5s retries: 5 + volumes: + - authentik-redis-data:/data networks: - sso + labels: + - "traefik.enable=false" - # --------------------------------------------------------------------------- - # Authentik Server — Web UI + API + OIDC/SAML endpoints - # --------------------------------------------------------------------------- - authentik-server: - <<: *authentik-base + server: + image: ghcr.io/goauthentik/server:2024.8.3 container_name: authentik-server restart: unless-stopped + environment: + AUTHENTIK_REDIS__HOST: redis + AUTHENTIK_REDIS__PORT: 6379 + AUTHENTIK_REDIS__PASSWORD: ${AUTHENTIK_REDIS_PASSWORD:?error} + AUTHENTIK_POSTGRESQL__HOST: postgresql + AUTHENTIK_POSTGRESQL__PORT: 5432 + AUTHENTIK_POSTGRESQL__NAME: authentik + AUTHENTIK_POSTGRESQL__USER: ${AUTHENTIK_POSTGRES_USER:-authentik} + AUTHENTIK_POSTGRESQL__PASSWORD: ${AUTHENTIK_POSTGRES_PASSWORD:?error} + AUTHENTIK_SECRET_KEY: ${AUTHENTIK_SECRET_KEY:?error} + AUTHENTIK_BOOTSTRAP_EMAIL: ${AUTHENTIK_BOOTSTRAP_EMAIL:?error} + AUTHENTIK_BOOTSTRAP_PASSWORD: ${AUTHENTIK_BOOTSTRAP_PASSWORD:?error} + AUTHENTIK_BOOTSTRAP_TOKEN: ${AUTHENTIK_BOOTSTRAP_TOKEN:?error} + AUTHENTIK_LOG_LEVEL: ${AUTHENTIK_LOG_LEVEL:-info} + AUTHENTIK_ERROR_REPORTING__ENABLED: "false" command: server + ports: + - "9000:9000" + - "9443:9443" volumes: - - authentik_media:/media - - authentik_templates:/templates + - authentik-media:/media + - authentik-templates:/templates + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:9000/-/health/ready/"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s depends_on: postgresql: condition: service_healthy redis: condition: service_healthy - healthcheck: - test: ["CMD", "ak", "healthcheck"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 60s labels: - "traefik.enable=true" - "traefik.http.routers.authentik.rule=Host(`${AUTHENTIK_DOMAIN}`)" - "traefik.http.routers.authentik.entrypoints=websecure" - - "traefik.http.routers.authentik.tls.certresolver=letsencrypt" + - "traefik.http.routers.authentik.tls=true" - "traefik.http.services.authentik.loadbalancer.server.port=9000" - # Expose outpost port for embedded outpost - - "traefik.http.routers.authentik-outpost.rule=HostRegexp(`{subdomain:[a-z0-9-]+}.${DOMAIN}`) && PathPrefix(`/outpost.goauthentik.io`)" - - "traefik.http.routers.authentik-outpost.entrypoints=websecure" - - "traefik.http.routers.authentik-outpost.tls.certresolver=letsencrypt" - - "traefik.http.routers.authentik-outpost.service=authentik" + - "traefik.http.middlewares.authentik-forwardauth.forwardauth.address=http://authentik-server:9000/outpost.goauthentik.io/auth/traefik" + - "traefik.http.middlewares.authentik-forwardauth.forwardauth.trustForwardHeader=true" + - "traefik.http.middlewares.authentik-forwardauth.forwardauth.authResponseHeaders=X-authentik-username,X-authentik-groups,X-authentik-email,X-authentik-name,X-authentik-uid,X-authentik-jwt,X-authentik-meta-jwks,X-authentik-meta-outpost,X-authentik-meta-provider-id" networks: - sso - proxy - # --------------------------------------------------------------------------- - # Authentik Worker — Background tasks (flows, policies, outposts) - # --------------------------------------------------------------------------- - authentik-worker: - <<: *authentik-base + worker: + image: ghcr.io/goauthentik/server:2024.8.3 container_name: authentik-worker restart: unless-stopped + environment: + AUTHENTIK_REDIS__HOST: redis + AUTHENTIK_REDIS__PORT: 6379 + AUTHENTIK_REDIS__PASSWORD: ${AUTHENTIK_REDIS_PASSWORD:?error} + AUTHENTIK_POSTGRESQL__HOST: postgresql + AUTHENTIK_POSTGRESQL__PORT: 5432 + AUTHENTIK_POSTGRESQL__NAME: authentik + AUTHENTIK_POSTGRESQL__USER: ${AUTHENTIK_POSTGRES_USER:-authentik} + AUTHENTIK_POSTGRESQL__PASSWORD: ${AUTHENTIK_POSTGRES_PASSWORD:?error} + AUTHENTIK_SECRET_KEY: ${AUTHENTIK_SECRET_KEY:?error} + AUTHENTIK_BOOTSTRAP_EMAIL: ${AUTHENTIK_BOOTSTRAP_EMAIL:?error} + AUTHENTIK_BOOTSTRAP_PASSWORD: ${AUTHENTIK_BOOTSTRAP_PASSWORD:?error} + AUTHENTIK_BOOTSTRAP_TOKEN: ${AUTHENTIK_BOOTSTRAP_TOKEN:?error} + AUTHENTIK_LOG_LEVEL: ${AUTHENTIK_LOG_LEVEL:-info} + AUTHENTIK_ERROR_REPORTING__ENABLED: "false" command: worker volumes: - - authentik_media:/media - - authentik_templates:/templates - - /var/run/docker.sock:/var/run/docker.sock + - authentik-media:/media + - authentik-templates:/templates + - /var/run/docker.sock:/var/run/docker.sock:ro depends_on: postgresql: condition: service_healthy @@ -126,16 +118,17 @@ services: condition: service_healthy networks: - sso - -volumes: - postgresql_data: - redis_data: - authentik_media: - authentik_templates: + labels: + - "traefik.enable=false" networks: sso: - name: sso + driver: bridge proxy: external: true - name: proxy + +volumes: + authentik-postgres-data: + authentik-redis-data: + authentik-media: + authentik-templates: diff --git a/stacks/storage/.env.example b/stacks/storage/.env.example index 89dca87a..aca3701f 100644 --- a/stacks/storage/.env.example +++ b/stacks/storage/.env.example @@ -1,20 +1,22 @@ -# Storage Stack -DOMAIN=yourdomain.com -TZ=Asia/Shanghai - -# Nextcloud admin +# Storage Stack - Nextcloud NEXTCLOUD_ADMIN_USER=admin -NEXTCLOUD_ADMIN_PASSWORD=CHANGE_ME_STRONG_PASSWORD +NEXTCLOUD_ADMIN_PASSWORD=changeme +NEXTCLOUD_DOMAIN=cloud.example.com -# Database (must match databases stack) -NEXTCLOUD_DB_USER=nextcloud -NEXTCLOUD_DB_PASSWORD=CHANGE_ME -POSTGRES_PASSWORD=CHANGE_ME -REDIS_PASSWORD=CHANGE_ME +# Nextcloud Database (optional, defaults to SQLite) +# NEXTCLOUD_DB_TYPE=pgsql +# NEXTCLOUD_DB_HOST=homelab-postgres +# NEXTCLOUD_DB_NAME=nextcloud +# NEXTCLOUD_DB_USER=nextcloud +# NEXTCLOUD_DB_PASSWORD=nextcloud_db_pass # MinIO MINIO_ROOT_USER=minioadmin -MINIO_ROOT_PASSWORD=CHANGE_ME_MINIO_PASSWORD +MINIO_ROOT_PASSWORD=minioadmin +MINIO_DOMAIN=minio.example.com +MINIO_API_DOMAIN=minio-api.example.com # FileBrowser -FILEBROWSER_ROOT=/data +FILEBROWSER_USER=admin +FILEBROWSER_PASSWORD=changeme +FILEBROWSER_DOMAIN=files.example.com diff --git a/stacks/storage/docker-compose.yml b/stacks/storage/docker-compose.yml index 8dfe309d..58e615e6 100644 --- a/stacks/storage/docker-compose.yml +++ b/stacks/storage/docker-compose.yml @@ -1,88 +1,105 @@ services: nextcloud: - image: nextcloud:29.0.9-apache + image: nextcloud:29.0.7-fpm-alpine container_name: nextcloud restart: unless-stopped - networks: - - proxy - - databases - volumes: - - nextcloud-data:/var/www/html environment: - TZ=${TZ:-Asia/Shanghai} - NEXTCLOUD_ADMIN_USER=${NEXTCLOUD_ADMIN_USER:-admin} - NEXTCLOUD_ADMIN_PASSWORD=${NEXTCLOUD_ADMIN_PASSWORD:-changeme} - - NEXTCLOUD_TRUSTED_DOMAINS=nextcloud.${DOMAIN} - - POSTGRES_HOST=homelab-postgres - - POSTGRES_DB=nextcloud - - POSTGRES_USER=${POSTGRES_USER:-homelab} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-changeme} - - REDIS_HOST=homelab-redis + # Database settings (optional, uncomment if using external DB) + # - POSTGRES_HOST=${NEXTCLOUD_DB_HOST} + # - POSTGRES_DB=${NEXTCLOUD_DB_NAME} + # - POSTGRES_USER=${NEXTCLOUD_DB_USER} + # - POSTGRES_PASSWORD=${NEXTCLOUD_DB_PASSWORD} + volumes: + - nextcloud-data:/var/www/html + networks: + - proxy + - databases + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s labels: - - traefik.enable=true - - "traefik.http.routers.nextcloud.rule=Host(`nextcloud.${DOMAIN}`)" - - traefik.http.routers.nextcloud.entrypoints=websecure - - traefik.http.routers.nextcloud.tls=true - - traefik.http.services.nextcloud.loadbalancer.server.port=80 - - "traefik.http.middlewares.nextcloud-dav.redirectregex.regex=https://(.*)/.well-known/(card|cal)dav" - - "traefik.http.middlewares.nextcloud-dav.redirectregex.replacement=https://$${1}/remote.php/dav/" - - traefik.http.routers.nextcloud.middlewares=nextcloud-dav + - "traefik.enable=false" + + nextcloud-nginx: + image: nginx:1.27-alpine + container_name: nextcloud-nginx + restart: unless-stopped + depends_on: + - nextcloud + volumes: + - nextcloud-data:/var/www/html:ro + - ../../config/nextcloud/nginx.conf:/etc/nginx/nginx.conf:ro + networks: + - proxy + labels: + - "traefik.enable=true" + - "traefik.http.routers.nextcloud.rule=Host(`cloud.${DOMAIN}`)" + - "traefik.http.routers.nextcloud.entrypoints=websecure" + - "traefik.http.routers.nextcloud.tls=true" + - "traefik.http.services.nextcloud.loadbalancer.server.port=80" healthcheck: - test: [CMD-SHELL, "curl -sf http://localhost:80/status.php || exit 1"] + test: ["CMD", "curl", "-f", "http://localhost:80"] interval: 30s timeout: 10s - retries: 5 - start_period: 120s + retries: 3 + start_period: 30s minio: - image: minio/minio:RELEASE.2024-11-07T00-52-20Z + image: minio/minio:RELEASE.2024-09-22T00-33-43Z container_name: minio restart: unless-stopped - networks: - - proxy - volumes: - - minio-data:/data + command: server /data --console-address ":9001" environment: - MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin} - - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-changeme-minio} - - MINIO_BROWSER_REDIRECT_URL=https://minio.${DOMAIN} - command: server /data --console-address ":9001" + - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin} + volumes: + - minio-data:/data + networks: + - proxy labels: - - traefik.enable=true - - "traefik.http.routers.minio.rule=Host(`minio.${DOMAIN}`)" - - traefik.http.routers.minio.entrypoints=websecure - - traefik.http.routers.minio.tls=true - - traefik.http.services.minio.loadbalancer.server.port=9001 - - "traefik.http.routers.minio-api.rule=Host(`s3.${DOMAIN}`)" - - traefik.http.routers.minio-api.entrypoints=websecure - - traefik.http.routers.minio-api.tls=true - - traefik.http.services.minio-api.loadbalancer.server.port=9000 + - "traefik.enable=true" + - "traefik.http.routers.minio-console.rule=Host(`minio.${DOMAIN}`)" + - "traefik.http.routers.minio-console.entrypoints=websecure" + - "traefik.http.routers.minio-console.tls=true" + - "traefik.http.services.minio-console.loadbalancer.server.port=9001" + - "traefik.http.routers.minio-api.rule=Host(`minio-api.${DOMAIN}`)" + - "traefik.http.routers.minio-api.entrypoints=websecure" + - "traefik.http.routers.minio-api.tls=true" + - "traefik.http.services.minio-api.loadbalancer.server.port=9000" healthcheck: - test: [CMD-SHELL, "curl -sf http://localhost:9000/minio/health/live || exit 1"] + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] interval: 30s timeout: 10s retries: 3 start_period: 30s filebrowser: - image: filebrowser/filebrowser:v2.31.2 + image: filebrowser/filebrowser:v2.31.0 container_name: filebrowser restart: unless-stopped + environment: + - FB_BASEURL=/filebrowser + - FB_ADMIN=${FILEBROWSER_USER:-admin} + - FB_PASSWORD=${FILEBROWSER_PASSWORD:-changeme} + volumes: + - filebrowser-data:/srv + - /etc/localtime:/etc/localtime:ro networks: - proxy - volumes: - - filebrowser-data:/database - - ${STORAGE_PATH:-/data}:/srv - environment: - - TZ=${TZ:-Asia/Shanghai} labels: - - traefik.enable=true + - "traefik.enable=true" - "traefik.http.routers.filebrowser.rule=Host(`files.${DOMAIN}`)" - - traefik.http.routers.filebrowser.entrypoints=websecure - - traefik.http.routers.filebrowser.tls=true - - traefik.http.services.filebrowser.loadbalancer.server.port=80 + - "traefik.http.routers.filebrowser.entrypoints=websecure" + - "traefik.http.routers.filebrowser.tls=true" + - "traefik.http.services.filebrowser.loadbalancer.server.port=80" healthcheck: - test: [CMD-SHELL, "curl -sf http://localhost:80/ || exit 1"] + test: ["CMD", "wget", "-q", "--spider", "http://localhost:80/health"] interval: 30s timeout: 10s retries: 3