diff --git a/infrastructure/README.md b/infrastructure/README.md new file mode 100644 index 0000000..ade9028 --- /dev/null +++ b/infrastructure/README.md @@ -0,0 +1,173 @@ +# Infrastructure Services + +Core infrastructure components for automa self-hosted platform. + +## Quick Start + +### 1. Create Networks + +```bash +docker network create automa-proxy +docker network create automa-monitoring +``` + +### 2. Setup Environment + +```bash +# Copy global env file +cp ../.env.example ../.env + +# Edit with your values +vim ../.env +``` + +Required variables: +```bash +DOMAIN=example.com +GRAFANA_ADMIN_PASSWORD=changeme +TZ=Asia/Shanghai +``` + +### 3. Start Infrastructure + +```bash +# Start all at once +cd caddy && docker compose up -d && cd .. +cd monitoring && docker compose up -d && cd .. +cd watchtower && docker compose up -d && cd .. +cd duplicati && docker compose up -d && cd .. +cd fail2ban && docker compose up -d && cd .. + +# Or use Makefile +make infra-up +``` + +### 4. Verify + +```bash +docker ps +docker network ls | grep automa +``` + +## Services + +### Caddy (Reverse Proxy) +- **Port**: 80, 443 +- **Web**: N/A (proxy only) +- **Config**: `caddy/Caddyfile` +- Auto HTTPS via Let's Encrypt + +### Grafana (Monitoring Dashboard) +- **Port**: 3000 (internal) +- **Web**: https://grafana.example.com +- **User**: admin +- **Pass**: (from .env) + +Import dashboards: +- 11074 - Node Exporter +- 193 - Docker +- 12486 - Loki Logs + +### Prometheus (Metrics) +- **Port**: 9090 (localhost only) +- **Web**: http://localhost:9090 +- **Config**: `monitoring/prometheus.yml` + +### Loki (Logs) +- **Port**: 3100 (internal) +- No direct web UI (use Grafana) + +### Duplicati (Remote Backup) +- **Port**: 8200 (localhost only) +- **Web**: http://localhost:8200 +- Setup backup jobs via web UI + +### Watchtower (Auto Update) +- No ports exposed +- Runs daily at midnight +- Only updates containers with label: + ```yaml + labels: + - "com.centurylinklabs.watchtower.enable=true" + ``` + +### Fail2ban (Security) +- No ports exposed +- Monitors logs and bans IPs +- Config: `fail2ban/data/jail.d/` + +## Network Architecture + +``` +Internet + ↓ +Caddy (80/443) + ↓ + ├─→ automa-proxy ─→ Nextcloud, Grafana + └─→ automa-monitoring ─→ Prometheus, Loki, etc. +``` + +## Updating Services + +### Manual Update +```bash +cd monitoring +docker compose pull +docker compose up -d +``` + +### Auto Update (via Watchtower) +- Runs daily automatically +- Only updates labeled containers +- To disable for a service, set label to `false` + +## Troubleshooting + +### Check logs +```bash +docker logs automa-caddy +docker logs automa-prometheus +``` + +### Restart service +```bash +cd monitoring +docker compose restart grafana +``` + +### Reset service +```bash +cd monitoring +docker compose down +docker compose up -d +``` + +### Test Caddy config +```bash +docker exec -it automa-caddy caddy validate --config /etc/caddy/Caddyfile +``` + +## Resource Usage + +Typical usage per service: + +| Service | CPU | RAM | Disk | +|---------|-----|-----|------| +| Caddy | 0.1 | 50M | 50M | +| Prometheus | 0.5 | 500M | 10G | +| Grafana | 0.1 | 200M | 500M | +| Loki | 0.2 | 300M | 5G | +| Promtail | 0.02 | 50M | 10M | +| cAdvisor | 0.1 | 100M | 10M | +| Watchtower | 0.01 | 30M | 10M | +| Duplicati | 0.05 | 100M | 100M | +| Fail2ban | 0.02 | 50M | 100M | +| **Total** | **~1.2** | **~1.4G** | **~16G** | + +## Security Notes + +- Grafana and Duplicati only accessible via localhost +- Add firewall rules to restrict access +- Change default passwords +- Enable 2FA where supported +- Review logs regularly diff --git a/infrastructure/caddy/Caddyfile b/infrastructure/caddy/Caddyfile new file mode 100644 index 0000000..5da9558 --- /dev/null +++ b/infrastructure/caddy/Caddyfile @@ -0,0 +1,39 @@ +# Global options +{ + # ACME email for Let's Encrypt + email admin@{$DOMAIN} + + # Disable admin API in production + admin off +} + +# Nextcloud +cloud.{$DOMAIN} { + reverse_proxy nextcloud:80 { + header_up X-Forwarded-Proto {scheme} + header_up X-Real-IP {remote_host} + } + + encode gzip + + # Security headers + header Strict-Transport-Security "max-age=31536000;" + header X-Content-Type-Options "nosniff" + header X-Frame-Options "SAMEORIGIN" +} + +# Grafana (monitoring dashboard) +grafana.{$DOMAIN} { + reverse_proxy grafana:3000 + encode gzip +} + +# Health check endpoint (no SSL) +http://health.{$DOMAIN} { + respond "OK" 200 +} + +# Default catch-all +{$DOMAIN} { + respond "Automa Services" 404 +} diff --git a/infrastructure/caddy/compose.yml b/infrastructure/caddy/compose.yml new file mode 100644 index 0000000..73743af --- /dev/null +++ b/infrastructure/caddy/compose.yml @@ -0,0 +1,42 @@ +services: + caddy: + image: caddy:2-alpine + container_name: automa-caddy + restart: unless-stopped + + ports: + - "80:80" + - "443:443" + - "443:443/udp" # HTTP/3 + + volumes: + - ./Caddyfile:/etc/caddy/Caddyfile:ro + - caddy_data:/data + - caddy_config:/config + + environment: + - DOMAIN=${DOMAIN:-example.com} + + networks: + - automa-proxy + + labels: + - "com.automa.service=caddy" + - "com.centurylinklabs.watchtower.enable=true" + + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:2019/config/"] + interval: 30s + timeout: 10s + retries: 3 + +volumes: + caddy_data: + name: automa_caddy_data + caddy_config: + name: automa_caddy_config + +networks: + automa-proxy: + name: automa-proxy + external: true diff --git a/infrastructure/duplicati/compose.yml b/infrastructure/duplicati/compose.yml new file mode 100644 index 0000000..d781f59 --- /dev/null +++ b/infrastructure/duplicati/compose.yml @@ -0,0 +1,33 @@ +services: + duplicati: + image: lscr.io/linuxserver/duplicati:latest + container_name: automa-duplicati + restart: unless-stopped + + environment: + - PUID=1000 + - PGID=1000 + - TZ=${TZ:-Asia/Shanghai} + + volumes: + - duplicati_config:/config + - ../../backups:/source:ro # Read-only access to local backups + + ports: + - "127.0.0.1:8200:8200" # Only accessible locally + + labels: + - "com.automa.service=duplicati" + - "com.centurylinklabs.watchtower.enable=true" + +volumes: + duplicati_config: + name: automa_duplicati_config + +# Setup: +# 1. Open http://localhost:8200 +# 2. Add backup job +# 3. Source: /source (local backups) +# 4. Destination: S3/SFTP/WebDAV/etc +# 5. Schedule: Daily at 3 AM +# 6. Retention: Keep 30 days diff --git a/infrastructure/fail2ban/compose.yml b/infrastructure/fail2ban/compose.yml new file mode 100644 index 0000000..1d43a19 --- /dev/null +++ b/infrastructure/fail2ban/compose.yml @@ -0,0 +1,26 @@ +services: + fail2ban: + image: crazymax/fail2ban:latest + container_name: automa-fail2ban + restart: unless-stopped + + network_mode: host + + cap_add: + - NET_ADMIN + - NET_RAW + + environment: + - TZ=${TZ:-Asia/Shanghai} + - F2B_LOG_LEVEL=INFO + + volumes: + - fail2ban_data:/data + - /var/log:/var/log:ro + + labels: + - "com.automa.service=fail2ban" + +volumes: + fail2ban_data: + name: automa_fail2ban_data diff --git a/infrastructure/monitoring/compose.yml b/infrastructure/monitoring/compose.yml new file mode 100644 index 0000000..f5a0789 --- /dev/null +++ b/infrastructure/monitoring/compose.yml @@ -0,0 +1,137 @@ +services: + # Prometheus - Metrics collection + prometheus: + image: prom/prometheus:v2.48-alpine + container_name: automa-prometheus + restart: unless-stopped + + ports: + - "127.0.0.1:9090:9090" + + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus_data:/prometheus + + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention.time=30d' + - '--storage.tsdb.retention.size=10GB' + - '--web.enable-lifecycle' + + networks: + - automa-monitoring + - automa-proxy + + labels: + - "com.automa.service=prometheus" + - "com.centurylinklabs.watchtower.enable=false" + + # Grafana - Visualization + grafana: + image: grafana/grafana:10-alpine + container_name: automa-grafana + restart: unless-stopped + + ports: + - "127.0.0.1:3000:3000" + + volumes: + - grafana_data:/var/lib/grafana + - ./grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro + + environment: + - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-changeme} + - GF_ANALYTICS_REPORTING_ENABLED=false + - GF_SERVER_ROOT_URL=https://grafana.${DOMAIN:-example.com} + + networks: + - automa-monitoring + - automa-proxy + + labels: + - "com.automa.service=grafana" + - "com.centurylinklabs.watchtower.enable=true" + + # Loki - Log aggregation + loki: + image: grafana/loki:2-alpine + container_name: automa-loki + restart: unless-stopped + + ports: + - "127.0.0.1:3100:3100" + + volumes: + - ./loki-config.yml:/etc/loki/loki-config.yml:ro + - loki_data:/loki + + command: -config.file=/etc/loki/loki-config.yml + + networks: + - automa-monitoring + + labels: + - "com.automa.service=loki" + + # Promtail - Log collection + promtail: + image: grafana/promtail:2-alpine + container_name: automa-promtail + restart: unless-stopped + + volumes: + - ./promtail-config.yml:/etc/promtail/promtail-config.yml:ro + - /var/log:/var/log:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + + command: -config.file=/etc/promtail/promtail-config.yml + + networks: + - automa-monitoring + + labels: + - "com.automa.service=promtail" + + # cAdvisor - Container metrics + cadvisor: + image: gcr.io/cadvisor/cadvisor:latest + container_name: automa-cadvisor + restart: unless-stopped + + ports: + - "127.0.0.1:8080:8080" + + volumes: + - /:/rootfs:ro + - /var/run:/var/run:ro + - /sys:/sys:ro + - /var/lib/docker:/var/lib/docker:ro + + privileged: true + + networks: + - automa-monitoring + + labels: + - "com.automa.service=cadvisor" + + command: + - '--docker_only=true' + - '--housekeeping_interval=30s' + +volumes: + prometheus_data: + name: automa_prometheus_data + grafana_data: + name: automa_grafana_data + loki_data: + name: automa_loki_data + +networks: + automa-monitoring: + name: automa-monitoring + external: true + automa-proxy: + name: automa-proxy + external: true diff --git a/infrastructure/monitoring/grafana-datasources.yml b/infrastructure/monitoring/grafana-datasources.yml new file mode 100644 index 0000000..2c0808d --- /dev/null +++ b/infrastructure/monitoring/grafana-datasources.yml @@ -0,0 +1,15 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false + + - name: Loki + type: loki + access: proxy + url: http://loki:3100 + editable: false diff --git a/infrastructure/monitoring/loki-config.yml b/infrastructure/monitoring/loki-config.yml new file mode 100644 index 0000000..e80c964 --- /dev/null +++ b/infrastructure/monitoring/loki-config.yml @@ -0,0 +1,34 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2023-01-01 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + +limits_config: + retention_period: 30d + max_query_length: 721h + +compactor: + working_directory: /loki/compactor + shared_store: filesystem + retention_enabled: true diff --git a/infrastructure/monitoring/prometheus.yml b/infrastructure/monitoring/prometheus.yml new file mode 100644 index 0000000..d106fbd --- /dev/null +++ b/infrastructure/monitoring/prometheus.yml @@ -0,0 +1,23 @@ +global: + scrape_interval: 30s + evaluation_interval: 30s + +scrape_configs: + # Prometheus self-monitoring + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + labels: + service: 'prometheus' + + # Container metrics + - job_name: 'cadvisor' + static_configs: + - targets: ['cadvisor:8080'] + labels: + service: 'cadvisor' + + # Add more targets as needed + # - job_name: 'nextcloud' + # static_configs: + # - targets: ['nextcloud-exporter:9205'] diff --git a/infrastructure/monitoring/promtail-config.yml b/infrastructure/monitoring/promtail-config.yml new file mode 100644 index 0000000..5f672a7 --- /dev/null +++ b/infrastructure/monitoring/promtail-config.yml @@ -0,0 +1,35 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + # Docker containers + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + + relabel_configs: + - source_labels: ['__meta_docker_container_name'] + regex: '/(.*)' + target_label: 'container' + - source_labels: ['__meta_docker_container_label_com_automa_service'] + target_label: 'service' + + pipeline_stages: + - docker: {} + + # System logs + - job_name: system + static_configs: + - targets: + - localhost + labels: + job: syslog + __path__: /var/log/syslog diff --git a/infrastructure/watchtower/compose.yml b/infrastructure/watchtower/compose.yml new file mode 100644 index 0000000..a719f26 --- /dev/null +++ b/infrastructure/watchtower/compose.yml @@ -0,0 +1,23 @@ +services: + watchtower: + image: containrrr/watchtower:latest + container_name: automa-watchtower + restart: unless-stopped + + environment: + - WATCHTOWER_CLEANUP=true # Remove old images + - WATCHTOWER_POLL_INTERVAL=86400 # Check every 24 hours + - WATCHTOWER_LABEL_ENABLE=true # Only update labeled containers + - WATCHTOWER_INCLUDE_STOPPED=false # Skip stopped containers + - TZ=${TZ:-Asia/Shanghai} + + volumes: + - /var/run/docker.sock:/var/run/docker.sock + + labels: + - "com.automa.service=watchtower" + - "com.centurylinklabs.watchtower.enable=false" # Don't update itself + +# Add this label to containers you want to auto-update: +# labels: +# - "com.centurylinklabs.watchtower.enable=true"