{
  "schemaVersion": "1.0",
  "item": {
    "slug": "afrexai-self-hosting-mastery",
    "name": "Self-Hosting Mastery",
    "source": "tencent",
    "type": "skill",
    "category": "其他",
    "sourceUrl": "https://clawhub.ai/1kalin/afrexai-self-hosting-mastery",
    "canonicalUrl": "https://clawhub.ai/1kalin/afrexai-self-hosting-mastery",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/afrexai-self-hosting-mastery",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=afrexai-self-hosting-mastery",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "README.md",
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/afrexai-self-hosting-mastery"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/afrexai-self-hosting-mastery",
    "agentPageUrl": "https://openagent3.xyz/skills/afrexai-self-hosting-mastery/agent",
    "manifestUrl": "https://openagent3.xyz/skills/afrexai-self-hosting-mastery/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/afrexai-self-hosting-mastery/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Self-Hosting Mastery",
        "body": "Complete system for building and operating reliable self-hosted infrastructure — from first server to multi-node homelab."
      },
      {
        "title": "Server Profile YAML",
        "body": "server_profile:\n  name: \"\"\n  hardware:\n    cpu: \"\"              # e.g., \"Intel i5-12400\" or \"Raspberry Pi 5\"\n    ram_gb: 0\n    storage:\n      - device: \"\"       # e.g., \"/dev/sda\"\n        type: \"\"         # ssd | hdd | nvme\n        size_gb: 0\n        role: \"\"         # boot | data | backup\n    network: \"\"          # 1gbe | 2.5gbe | 10gbe\n  os: \"\"                 # debian | ubuntu | proxmox | unraid | truenas\n  location: \"\"           # home | closet | rack | colo | vps\n  power:\n    ups: false\n    wattage_idle: 0\n    wattage_load: 0\n    monthly_cost_estimate: \"\"  # electricity\n  network:\n    public_ip: \"\"        # static | dynamic | cgnat\n    domain: \"\"\n    dns_provider: \"\"     # cloudflare | duckdns | custom\n    isp_ports_open: true # some ISPs block 80/443\n  goals:\n    - \"\"                 # media server, smart home, dev environment, etc.\n  budget_monthly: \"\"     # electricity + domain + any VPS"
      },
      {
        "title": "Hardware Decision Matrix",
        "body": "BudgetRAMStorageGood ForExample Hardware$04-8GB64GB+Pi-hole, AdGuard, small toolsRaspberry Pi 4/5$50-1508-16GB256GB+Docker host, 5-10 servicesUsed SFF PC (Dell Optiplex, Lenovo Tiny)$150-40016-32GB1TB+NAS + services, media serverMini PC (Intel NUC, Beelink)$400-80032-64GB4TB+Full homelab, VMs + containersUsed enterprise (Dell R720, HP DL380)$800+64GB+10TB+Multi-node, Proxmox clusterMultiple nodes, dedicated NAS"
      },
      {
        "title": "Self-Host vs SaaS Decision",
        "body": "Ask before self-hosting anything:\n\nData sensitivity — Does keeping data local matter? (passwords, health, finance = yes)\nReliability need — Can you tolerate occasional downtime? (email = risky, media = fine)\nMaintenance budget — Do you have 2-4 hours/month for updates?\nSkill level — Can you debug Docker/networking issues?\nCost comparison — Is the SaaS < $10/mo? Often not worth self-hosting for trivial savings.\n\nAlways self-host: Password manager, DNS/ad-blocking, VPN, bookmarks, notes\nUsually self-host: Media server, file sync, photo backup, monitoring, git\nThink twice: Email (deliverability hell), calendar (sync complexity), chat (uptime expectations)\nRarely worth it: Search engine (resource hungry), social media (no network effect)"
      },
      {
        "title": "OS Selection Guide",
        "body": "OSBest ForLearning CurveNotesDebian 12Docker-only hostLowStable, minimal, just worksUbuntu Server 24.04Beginners, wide docsLowMore packages, snap controversyProxmox VEVMs + containersMediumFree, enterprise features, ZFSUnraidNAS + Docker + VMsMedium$59-129, great UI, parity arrayTrueNAS ScaleZFS NAS + DockerMediumFree, ZFS-first, apps improvingNixOSReproducible configsHighDeclarative, steep learning curve"
      },
      {
        "title": "Proxmox Quick Setup",
        "body": "# Post-install essentials\n# 1. Remove enterprise repo (if no subscription)\nsed -i 's/^deb/#deb/' /etc/apt/sources.list.d/pve-enterprise.list\necho \"deb http://download.proxmox.com/debian/pve bookworm pve-no-subscription\" > /etc/apt/sources.list.d/pve-no-subscription.list\napt update && apt upgrade -y\n\n# 2. Create a Docker LXC (lightweight container)\n# Download template: Datacenter → Storage → CT Templates → Download → debian-12\n# Create CT: 2 cores, 2GB RAM, 32GB disk, bridge vmbr0\n# Inside CT: install Docker\napt install -y curl\ncurl -fsSL https://get.docker.com | sh\n\n# 3. Enable IOMMU for GPU passthrough (if needed)\n# Edit /etc/default/grub: GRUB_CMDLINE_LINUX_DEFAULT=\"quiet intel_iommu=on\"\n# update-grub && reboot"
      },
      {
        "title": "VM vs LXC vs Docker Decision",
        "body": "FactorVMLXCDockerIsolationFull (own kernel)Partial (shared kernel)Process-levelOverheadHigh (1-2GB base)Low (50-200MB)MinimalUse whenDifferent OS, GPU passthrough, untrusted workloadsDedicated service host, ZFS datasetsMost servicesAvoid whenRAM-constrainedNeed Windows, custom kernelStateful databases (use LXC/VM)\n\nRule: Docker for 90% of services. LXC for Docker hosts or isolated environments. VM for Windows, different kernel needs, or GPU passthrough."
      },
      {
        "title": "Docker Compose Project Structure",
        "body": "/opt/stacks/           # or ~/docker/\n├── traefik/\n│   ├── docker-compose.yml\n│   ├── .env\n│   ├── config/\n│   │   └── traefik.yml\n│   └── data/\n│       ├── acme.json          # chmod 600\n│       └── dynamic/\n├── monitoring/\n│   ├── docker-compose.yml\n│   ├── .env\n│   └── config/\n├── media/\n│   ├── docker-compose.yml\n│   ├── .env\n│   └── config/\n├── productivity/\n│   ├── docker-compose.yml\n│   ├── .env\n│   └── config/\n└── scripts/\n    ├── backup.sh\n    ├── update-all.sh\n    └── health-check.sh"
      },
      {
        "title": "Docker Compose Best Practices",
        "body": "# Template: production-grade service\nservices:\n  app:\n    image: vendor/app:1.2.3           # ALWAYS pin version\n    container_name: app               # Explicit name\n    restart: unless-stopped           # Auto-restart\n    networks:\n      - proxy                         # Traefik network\n      - internal                      # Backend network\n    volumes:\n      - ./config:/config              # Bind mount for config\n      - app-data:/data                # Named volume for data\n    environment:\n      - TZ=Europe/London              # Always set timezone\n      - PUID=1000                     # Match host user\n      - PGID=1000\n    env_file:\n      - .env                          # Secrets in .env (gitignored)\n    labels:\n      - \"traefik.enable=true\"\n      - \"traefik.http.routers.app.rule=Host(`app.example.com`)\"\n      - \"traefik.http.routers.app.tls.certresolver=letsencrypt\"\n      - \"traefik.http.services.app.loadbalancer.server.port=8080\"\n    healthcheck:\n      test: [\"CMD\", \"curl\", \"-f\", \"http://localhost:8080/health\"]\n      interval: 30s\n      timeout: 10s\n      retries: 3\n      start_period: 40s\n    deploy:\n      resources:\n        limits:\n          memory: 512M               # Prevent OOM cascades\n    security_opt:\n      - no-new-privileges:true        # Security hardening\n    read_only: true                   # Where possible\n    tmpfs:\n      - /tmp\n\nvolumes:\n  app-data:\n\nnetworks:\n  proxy:\n    external: true\n  internal:"
      },
      {
        "title": "Docker Security Checklist",
        "body": "Pin all image versions (never :latest in production)\n Set restart: unless-stopped on all services\n Use .env files for secrets (never hardcode in compose)\n Set memory limits on all containers\n Use security_opt: no-new-privileges:true\n Use read_only: true where possible + tmpfs for /tmp\n Create separate Docker networks per stack\n Never expose database ports to 0.0.0.0\n Run containers as non-root (PUID/PGID or user:)\n Enable Docker content trust: export DOCKER_CONTENT_TRUST=1\n Prune unused images/volumes monthly: docker system prune -af\n Use named volumes (not anonymous) for all persistent data\n Set TZ environment variable on every container"
      },
      {
        "title": "Reverse Proxy Selection",
        "body": "ProxyBest ForSSLConfig StyleLearning CurveTraefikDocker-native, auto-discoveryAuto (ACME)Labels + YAMLMediumCaddySimplicity, auto-SSLAuto (built-in)CaddyfileLowNginx Proxy ManagerGUI preferenceAuto (UI)Web UIVery LowNginx (manual)Maximum controlManual/certbotConfig filesHigh\n\nRecommendation: Traefik for Docker power users. Caddy for simplicity. NPM for beginners."
      },
      {
        "title": "Traefik Production Config",
        "body": "# traefik/config/traefik.yml\napi:\n  dashboard: true\n  insecure: false\n\nentryPoints:\n  web:\n    address: \":80\"\n    http:\n      redirections:\n        entryPoint:\n          to: websecure\n          scheme: https\n  websecure:\n    address: \":443\"\n    http:\n      tls:\n        certResolver: letsencrypt\n\ncertificatesResolvers:\n  letsencrypt:\n    acme:\n      email: you@example.com\n      storage: /data/acme.json\n      # Use DNS challenge if ISP blocks port 80\n      # dnsChallenge:\n      #   provider: cloudflare\n      httpChallenge:\n        entryPoint: web\n\nproviders:\n  docker:\n    exposedByDefault: false    # Explicit opt-in per service\n    network: proxy\n  file:\n    directory: /data/dynamic\n    watch: true\n\nlog:\n  level: WARN\n\naccessLog:\n  filePath: /data/access.log\n  bufferingSize: 100"
      },
      {
        "title": "Cloudflare Tunnel (Zero Port Forwarding)",
        "body": "For CGNAT or ISPs blocking ports — expose services without opening firewall:\n\n# cloudflared/docker-compose.yml\nservices:\n  cloudflared:\n    image: cloudflare/cloudflared:2024.1.0\n    container_name: cloudflared\n    restart: unless-stopped\n    command: tunnel run\n    environment:\n      - TUNNEL_TOKEN=${CF_TUNNEL_TOKEN}\n    networks:\n      - proxy\n\nWhen to use Cloudflare Tunnel vs port forwarding:\n\nCGNAT (no public IP) → Tunnel (only option)\nISP blocks 80/443 → Tunnel or DNS challenge + non-standard ports\nSecurity-first → Tunnel (no open ports)\nPerformance-first → Direct (lower latency)\nLAN-only access → Neither (use Tailscale/WireGuard)"
      },
      {
        "title": "Tier 1 — Deploy First (Foundation)",
        "body": "ServicePurposeImageRAMNotesTraefik/CaddyReverse proxy + SSLtraefik:v3.064MBGateway to everythingPi-hole/AdGuardDNS + ad blockingpihole/pihole128MBNetwork-wide ad blockingAuthelia/AuthentikSSO + 2FAauthelia/authelia128MBProtect services without built-in authUptime KumaMonitoringlouislam/uptime-kuma128MBKnow when things breakWatchtowerAuto-updatescontainrrr/watchtower32MBOptional — some prefer manual"
      },
      {
        "title": "Tier 2 — Core Services",
        "body": "ServicePurposeAltRAMVaultwardenPassword managerBitwarden64MBNextcloudFile sync + officeSeafile (lighter)512MBImmichPhoto backupPhotoPrism1-4GBJellyfinMedia serverPlex (less free)512MB-2GBPaperless-ngxDocument management-256MBHome AssistantSmart home-512MB"
      },
      {
        "title": "Tier 3 — Power User",
        "body": "ServicePurposeRAMGitea/ForgejoGit hosting256MBn8nWorkflow automation256MBGrafana + PrometheusMetrics & dashboards512MBTandoorRecipe management256MBMealieMeal planning128MBLinkwarden/HoarderBookmark manager256MBStirling PDFPDF tools512MBIT-ToolsDeveloper utilities64MB"
      },
      {
        "title": "RAM Planning",
        "body": "Total RAM needed ≈ OS base (1-2GB) + sum of service RAM + 20% headroom\nExample 16GB server:\n  OS + Docker:     2 GB\n  Traefik:         0.1 GB\n  Pi-hole:         0.1 GB\n  Authelia:        0.1 GB\n  Uptime Kuma:     0.1 GB\n  Vaultwarden:     0.1 GB\n  Nextcloud:       0.5 GB\n  Immich:          2.0 GB\n  Jellyfin:        1.0 GB\n  Paperless:       0.3 GB\n  Home Assistant:  0.5 GB\n  ──────────────────────\n  Total:           6.8 GB → 8.2 GB with headroom\n  Available:       ~7.8 GB free for more services"
      },
      {
        "title": "DNS Architecture",
        "body": "Internet → Cloudflare DNS → Your Public IP → Router → Server\n                                                        ↓\n                                             Reverse Proxy (Traefik)\n                                                        ↓\n                                     ┌──────────────────┼──────────────────┐\n                                     ↓                  ↓                  ↓\n                                app.domain.com   files.domain.com   media.domain.com"
      },
      {
        "title": "Split DNS (Access Services Locally Without Hairpin NAT)",
        "body": "# Pi-hole/AdGuard: Local DNS rewrites\n# Point *.home.example.com → 192.168.1.100 (server LAN IP)\n# External: Cloudflare points to public IP\n# Result: LAN traffic stays local, external goes through internet"
      },
      {
        "title": "VPN for Remote Access",
        "body": "SolutionTypeBest ForComplexityTailscaleMesh VPNEasiest setup, multi-deviceVery LowWireGuardPoint-to-pointPerformance, full controlMediumHeadscaleSelf-hosted TailscalePrivacy, no vendor lockMedium-High\n\nRecommendation: Start with Tailscale (free for 3 users). Move to Headscale when you want full control."
      },
      {
        "title": "Firewall Rules (UFW)",
        "body": "# Default deny incoming\nufw default deny incoming\nufw default allow outgoing\n\n# Allow SSH (change port from 22!)\nufw allow 2222/tcp comment 'SSH'\n\n# Allow HTTP/HTTPS for reverse proxy\nufw allow 80/tcp comment 'HTTP redirect'\nufw allow 443/tcp comment 'HTTPS'\n\n# Allow local network for discovery\nufw allow from 192.168.1.0/24 comment 'LAN'\n\n# Enable\nufw enable"
      },
      {
        "title": "3-2-1 Rule Implementation",
        "body": "3 copies:  Live data + Local backup + Remote backup\n2 media:   SSD/HDD (server) + External drive or NAS\n1 offsite: Cloud (Backblaze B2, Wasabi) or second location"
      },
      {
        "title": "Backup Script Template",
        "body": "#!/bin/bash\n# /opt/stacks/scripts/backup.sh\nset -euo pipefail\n\nBACKUP_DIR=\"/mnt/backup/docker\"\nSTACKS_DIR=\"/opt/stacks\"\nDATE=$(date +%Y-%m-%d_%H%M)\nRETENTION_DAYS=30\n\nlog() { echo \"[$(date '+%Y-%m-%d %H:%M:%S')] $1\"; }\n\n# 1. Stop services that need consistent backups\nlog \"Stopping database services...\"\ncd \"$STACKS_DIR/productivity\" && docker compose stop db\n\n# 2. Backup Docker volumes\nlog \"Backing up volumes...\"\nfor vol in $(docker volume ls -q); do\n    docker run --rm \\\n        -v \"$vol\":/source:ro \\\n        -v \"$BACKUP_DIR/volumes\":/backup \\\n        alpine tar czf \"/backup/${vol}_${DATE}.tar.gz\" -C /source .\ndone\n\n# 3. Backup compose files and configs\nlog \"Backing up configs...\"\ntar czf \"$BACKUP_DIR/configs/stacks_${DATE}.tar.gz\" \\\n    --exclude='*.log' \\\n    --exclude='node_modules' \\\n    \"$STACKS_DIR\"\n\n# 4. Restart services\nlog \"Restarting services...\"\ncd \"$STACKS_DIR/productivity\" && docker compose start db\n\n# 5. Cleanup old backups\nlog \"Cleaning up backups older than ${RETENTION_DAYS} days...\"\nfind \"$BACKUP_DIR\" -name \"*.tar.gz\" -mtime +$RETENTION_DAYS -delete\n\n# 6. Sync to remote (Backblaze B2 example)\n# rclone sync \"$BACKUP_DIR\" b2:my-backups/docker/ --transfers 4\n\n# 7. Verify\nBACKUP_SIZE=$(du -sh \"$BACKUP_DIR\" | cut -f1)\nlog \"Backup complete. Total size: $BACKUP_SIZE\"\n\n# 8. Send notification (optional)\n# curl -s \"https://ntfy.sh/my-backups\" -d \"Backup complete: $BACKUP_SIZE\""
      },
      {
        "title": "Backup Schedule",
        "body": "WhatFrequencyRetentionMethodDocker volumesDaily 3 AM30 daysScript + cronCompose files + configsDaily 3 AM90 daysScript + cronDatabase dumpsEvery 6 hours7 dayspg_dump/mysqldumpFull disk imageMonthly3 monthsClonezilla/ddOffsite syncDaily 5 AM60 daysrclone to B2/Wasabi"
      },
      {
        "title": "Backup Verification (Monthly)",
        "body": "Pick a random backup from last week\n Restore to a test VM/container\n Verify data integrity (check file counts, DB row counts)\n Time the restore process (document RTO)\n Log results in backup-verification.md"
      },
      {
        "title": "Monitoring Stack (Docker Compose)",
        "body": "# monitoring/docker-compose.yml\nservices:\n  uptime-kuma:\n    image: louislam/uptime-kuma:1\n    container_name: uptime-kuma\n    restart: unless-stopped\n    volumes:\n      - uptime-data:/app/data\n    labels:\n      - \"traefik.enable=true\"\n      - \"traefik.http.routers.uptime.rule=Host(`status.example.com`)\"\n\n  prometheus:\n    image: prom/prometheus:v2.49.0\n    container_name: prometheus\n    restart: unless-stopped\n    volumes:\n      - ./config/prometheus.yml:/etc/prometheus/prometheus.yml:ro\n      - prometheus-data:/prometheus\n    command:\n      - '--config.file=/etc/prometheus/prometheus.yml'\n      - '--storage.tsdb.retention.time=30d'\n\n  grafana:\n    image: grafana/grafana:10.3.0\n    container_name: grafana\n    restart: unless-stopped\n    volumes:\n      - grafana-data:/var/lib/grafana\n    environment:\n      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}\n\n  node-exporter:\n    image: prom/node-exporter:v1.7.0\n    container_name: node-exporter\n    restart: unless-stopped\n    pid: host\n    volumes:\n      - /proc:/host/proc:ro\n      - /sys:/host/sys:ro\n      - /:/rootfs:ro\n    command:\n      - '--path.procfs=/host/proc'\n      - '--path.sysfs=/host/sys'\n      - '--path.rootfs=/rootfs'\n\n  cadvisor:\n    image: gcr.io/cadvisor/cadvisor:v0.49.0\n    container_name: cadvisor\n    restart: unless-stopped\n    volumes:\n      - /:/rootfs:ro\n      - /var/run:/var/run:ro\n      - /sys:/sys:ro\n      - /var/lib/docker/:/var/lib/docker:ro\n\nvolumes:\n  uptime-data:\n  prometheus-data:\n  grafana-data:"
      },
      {
        "title": "Alert Rules",
        "body": "MetricWarningCriticalActionDisk usage>80%>90%Cleanup or expandRAM usage>85%>95%Identify memory leak, add RAMCPU sustained>80% 5min>95% 5minCheck runaway processContainer restart>2/hour>5/hourCheck logs, fix root causeSSL cert expiry<14 days<3 daysRenew certBackup age>26 hours>48 hoursCheck backup script/cronService down>2 min>10 minInvestigate, restart"
      },
      {
        "title": "Notification Channels",
        "body": "ChannelServiceBest ForPush notificationntfy.sh (self-hosted)Mobile alertsChatDiscord/Slack webhookTeam alertsEmailUptime Kuma built-inFormal notificationsDashboardGrafana + Uptime KumaVisual monitoring"
      },
      {
        "title": "Server Hardening Checklist",
        "body": "# 1. SSH hardening\n# /etc/ssh/sshd_config\nPort 2222                          # Change default port\nPermitRootLogin no                 # No root SSH\nPasswordAuthentication no          # Key-only\nMaxAuthTries 3\nAllowUsers yourusername\n\n# 2. Install fail2ban\napt install fail2ban -y\nsystemctl enable fail2ban\n\n# 3. Automatic security updates\napt install unattended-upgrades -y\ndpkg-reconfigure -plow unattended-upgrades\n\n# 4. Disable unused services\nsystemctl list-unit-files --state=enabled\n# Disable anything you don't need"
      },
      {
        "title": "Authentication Architecture",
        "body": "Internet → Traefik → Authelia/Authentik → Service\n                         ↓\n                    Check: authenticated?\n                    Yes → Forward to service\n                    No → Redirect to login page + 2FA\n\nAuthelia (lightweight, YAML config) — good for smaller setups\nAuthentik (full IdP, web UI) — good for many users/services, SAML/OIDC"
      },
      {
        "title": "Security Scoring (0-100)",
        "body": "DimensionWeightScore GuideSSH hardened (keys, non-root, non-22)150=default, 15=fully hardenedFirewall active (deny-by-default)150=none, 15=UFW/iptables configuredReverse proxy (no direct port exposure)150=ports exposed, 15=all behind proxySSL/TLS on all services100=HTTP, 10=HTTPS everywhereAuth on all public services150=open, 15=SSO/2FA on everythingContainer security (non-root, limits)100=default, 10=hardenedAuto-updates enabled100=manual, 10=automatedSecrets management (.env, not hardcoded)100=in compose, 10=.env + restricted perms\n\nScore: 0-40 = Vulnerable, 41-70 = Acceptable, 71-90 = Good, 91-100 = Hardened"
      },
      {
        "title": "Update Strategy",
        "body": "Option A: Manual (Recommended for critical services)\n\n# Update script: /opt/stacks/scripts/update-all.sh\n#!/bin/bash\nset -euo pipefail\n\nSTACKS_DIR=\"/opt/stacks\"\nLOG=\"/var/log/docker-updates.log\"\n\nfor stack in \"$STACKS_DIR\"/*/; do\n    if [ -f \"$stack/docker-compose.yml\" ]; then\n        echo \"[$(date)] Updating $(basename $stack)...\" | tee -a \"$LOG\"\n        cd \"$stack\"\n        docker compose pull 2>&1 | tee -a \"$LOG\"\n        docker compose up -d 2>&1 | tee -a \"$LOG\"\n    fi\ndone\n\ndocker image prune -f | tee -a \"$LOG\"\necho \"[$(date)] Update complete\" | tee -a \"$LOG\"\n\nOption B: Watchtower (Automated — use with caution)\n\nservices:\n  watchtower:\n    image: containrrr/watchtower:1.7.1\n    container_name: watchtower\n    restart: unless-stopped\n    volumes:\n      - /var/run/docker.sock:/var/run/docker.sock\n    environment:\n      - WATCHTOWER_SCHEDULE=0 0 4 * * MON  # Monday 4 AM\n      - WATCHTOWER_CLEANUP=true\n      - WATCHTOWER_NOTIFICATIONS=shoutrrr\n      - WATCHTOWER_NOTIFICATION_URL=discord://webhook\n      - WATCHTOWER_LABEL_ENABLE=true    # Only update labeled containers\n    # Add label to containers: com.centurylinklabs.watchtower.enable=true"
      },
      {
        "title": "Weekly Maintenance Checklist",
        "body": "Check Uptime Kuma for any downtime events\n Review disk usage (df -h)\n Check container health (docker ps --filter health=unhealthy)\n Review fail2ban bans (fail2ban-client status)\n Check backup logs (last successful backup)\n Review Docker logs for errors (docker logs --since 7d <container>)\n Prune unused resources (docker system prune -f)"
      },
      {
        "title": "Monthly Maintenance",
        "body": "Update all container images (read changelogs first!)\n Update host OS (apt update && apt upgrade)\n Test a backup restore\n Review and rotate secrets/passwords\n Check SSL certificate expiry dates\n Review Grafana dashboards for trends\n Clean up unused Docker networks/volumes"
      },
      {
        "title": "Multi-Node Architecture",
        "body": "┌─────────────┐     ┌─────────────┐     ┌─────────────┐\n│   Node 1    │     │   Node 2    │     │   Node 3    │\n│ (Proxy/DNS) │────│ (Services)  │────│   (NAS)     │\n│ Traefik     │     │ Apps        │     │ TrueNAS     │\n│ Pi-hole     │     │ Databases   │     │ NFS/SMB     │\n│ Authelia    │     │ Media       │     │ Backup      │\n└─────────────┘     └─────────────┘     └─────────────┘\n       ↑                   ↑                   ↑\n       └───────── Tailscale Mesh ──────────────┘"
      },
      {
        "title": "Docker Compose Includes (Compose v2.20+)",
        "body": "# Shared fragments\ninclude:\n  - path: ../common/traefik-labels.yml\n  - path: ../common/logging.yml\n\nservices:\n  app:\n    # inherits common configs"
      },
      {
        "title": "GitOps for Homelab",
        "body": "homelab-configs/           # Git repo\n├── .github/\n│   └── workflows/\n│       └── deploy.yml     # CI: lint + push to server\n├── stacks/\n│   ├── traefik/\n│   ├── monitoring/\n│   └── media/\n├── scripts/\n└── README.md\n\nWorkflow: Edit compose locally → commit → push → CI deploys to server\nTools: Flux/ArgoCD (overkill), or simple git pull && docker compose up -d via webhook"
      },
      {
        "title": "Hardware Redundancy",
        "body": "ComponentSolutionCostPowerUPS (APC Back-UPS 600VA+)$60-150StorageRAID1/ZFS mirror (not RAID0!)2x disk costNetworkDual NIC, managed switch$30-100ServerSecond node (cold spare or active)$100-400\n\nRule: RAID is NOT backup. It protects against disk failure only, not ransomware/deletion/corruption."
      },
      {
        "title": "Common Issues Decision Tree",
        "body": "Service not accessible?\n├── Can you ping the server? → No → Network/firewall issue\n├── Is the container running? (`docker ps`) → No → Check logs: `docker logs <name>`\n├── Is the port exposed? (`docker port <name>`) → No → Check compose ports/networks\n├── Is Traefik routing? (Check Traefik dashboard) → No → Check labels, network\n├── Is DNS resolving? (`dig app.example.com`) → No → Check DNS provider\n└── SSL error? → Check acme.json permissions (chmod 600), cert resolver logs"
      },
      {
        "title": "Docker Debug Commands",
        "body": "# Container not starting\ndocker logs <name> --tail 50\ndocker inspect <name> | jq '.[0].State'\n\n# Network issues\ndocker network ls\ndocker network inspect <network>\ndocker exec <name> ping other-container\n\n# Resource issues\ndocker stats                          # Live resource usage\ndocker system df                      # Disk usage\ndocker volume ls -f dangling=true     # Orphaned volumes\n\n# Nuclear options (use carefully)\ndocker compose down && docker compose up -d    # Full restart\ndocker system prune -af --volumes              # Clean EVERYTHING"
      },
      {
        "title": "Performance Optimization",
        "body": "SymptomLikely CauseFixSlow file accessHDD for databaseMove DB to SSDHigh CPU idleMonitoring too frequentIncrease scrape intervalsOOM killsNo memory limitsSet deploy.resources.limits.memorySlow NextcloudMissing Redis cacheAdd Redis containerJellyfin bufferingNo hardware transcodingEnable GPU passthroughSlow Docker buildsNo layer cachingUse multi-stage + .dockerignore"
      },
      {
        "title": "Vaultwarden (Password Manager)",
        "body": "services:\n  vaultwarden:\n    image: vaultwarden/server:1.30.5\n    container_name: vaultwarden\n    restart: unless-stopped\n    volumes:\n      - vaultwarden-data:/data\n    environment:\n      - SIGNUPS_ALLOWED=false       # Disable after creating your account\n      - WEBSOCKET_ENABLED=true\n      - ADMIN_TOKEN=${ADMIN_TOKEN}  # Generate: openssl rand -base64 48\n    labels:\n      - \"traefik.enable=true\"\n      - \"traefik.http.routers.vault.rule=Host(`vault.example.com`)\""
      },
      {
        "title": "Immich (Photo Backup)",
        "body": "# Use their official docker-compose.yml from:\n# https://github.com/immich-app/immich/releases/latest/download/docker-compose.yml\n# Key settings:\n# - Set UPLOAD_LOCATION to a large storage mount\n# - Enable hardware transcoding if GPU available\n# - Set IMMICH_MACHINE_LEARNING_URL for face detection"
      },
      {
        "title": "Paperless-ngx (Document Management)",
        "body": "services:\n  paperless:\n    image: ghcr.io/paperless-ngx/paperless-ngx:2.4\n    container_name: paperless\n    restart: unless-stopped\n    volumes:\n      - paperless-data:/usr/src/paperless/data\n      - paperless-media:/usr/src/paperless/media\n      - ./consume:/usr/src/paperless/consume  # Drop PDFs here\n      - ./export:/usr/src/paperless/export\n    environment:\n      - PAPERLESS_OCR_LANGUAGE=eng\n      - PAPERLESS_TIME_ZONE=Europe/London\n      - PAPERLESS_ADMIN_USER=${ADMIN_USER}\n      - PAPERLESS_ADMIN_PASSWORD=${ADMIN_PASS}"
      },
      {
        "title": "Homelab Quality Rubric (0-100)",
        "body": "DimensionWeight0 (Poor)50 (Decent)100 (Excellent)Security20%Default passwords, open portsFirewall + SSLHardened SSH, SSO/2FA, no-new-privilegesBackups20%NoneLocal only, untested3-2-1, automated, verified monthlyMonitoring15%NoneUptime Kuma onlyFull stack: metrics + logs + alertsDocumentation10%Nothing writtenREADME per stackGitOps, full runbook, diagramsUpdates10%Never updatedManual quarterlyScheduled weekly, changelogs reviewedReliability10%Frequent crashesMostly stableUPS, auto-restart, health checksPerformance10%Slow, OOM killsAdequateResource limits, SSD, HW transcodingScalability5%Single machine, no planCompose organizedMulti-node ready, IaC"
      },
      {
        "title": "10 Self-Hosting Mistakes",
        "body": "#MistakeFix1Using :latest tagPin versions: image:1.2.32No backups3-2-1 backup rule, test restores3Exposing ports directlyEverything behind reverse proxy4Default passwordsChange immediately, use password manager5No monitoringUptime Kuma minimum, Grafana for depth6RAID = backup mentalityRAID protects disks, not data7Over-engineering day 1Start small, add complexity as needed8No documentationDocument every service, every port, every cron9Ignoring updatesSecurity patches matter, schedule updates10Running as rootNon-root containers, restricted SSH"
      },
      {
        "title": "Natural Language Commands",
        "body": "SayAgent Does\"Set up a new service\"Guide through compose file creation with security best practices\"Audit my homelab security\"Run through security scoring checklist\"Plan my backup strategy\"Design 3-2-1 backup plan for your setup\"What should I self-host?\"Assess needs and recommend services by tier\"My container keeps crashing\"Walk through troubleshooting decision tree\"Help me set up Traefik\"Generate production Traefik config with SSL\"Compare NAS options\"Compare TrueNAS vs Unraid vs DIY for your needs\"Optimize my Docker setup\"Review compose files for security and performance\"Set up monitoring\"Deploy Uptime Kuma + Prometheus + Grafana stack\"Plan a hardware upgrade\"Assess current usage, recommend hardware by budget\"Migrate from cloud to self-hosted\"Plan migration with data export and service mapping\"Set up remote access\"Compare and deploy VPN/Tailscale for secure remote access"
      }
    ],
    "body": "Self-Hosting Mastery\n\nComplete system for building and operating reliable self-hosted infrastructure — from first server to multi-node homelab.\n\nPhase 1: Infrastructure Assessment\nServer Profile YAML\nserver_profile:\n  name: \"\"\n  hardware:\n    cpu: \"\"              # e.g., \"Intel i5-12400\" or \"Raspberry Pi 5\"\n    ram_gb: 0\n    storage:\n      - device: \"\"       # e.g., \"/dev/sda\"\n        type: \"\"         # ssd | hdd | nvme\n        size_gb: 0\n        role: \"\"         # boot | data | backup\n    network: \"\"          # 1gbe | 2.5gbe | 10gbe\n  os: \"\"                 # debian | ubuntu | proxmox | unraid | truenas\n  location: \"\"           # home | closet | rack | colo | vps\n  power:\n    ups: false\n    wattage_idle: 0\n    wattage_load: 0\n    monthly_cost_estimate: \"\"  # electricity\n  network:\n    public_ip: \"\"        # static | dynamic | cgnat\n    domain: \"\"\n    dns_provider: \"\"     # cloudflare | duckdns | custom\n    isp_ports_open: true # some ISPs block 80/443\n  goals:\n    - \"\"                 # media server, smart home, dev environment, etc.\n  budget_monthly: \"\"     # electricity + domain + any VPS\n\nHardware Decision Matrix\nBudget\tRAM\tStorage\tGood For\tExample Hardware\n$0\t4-8GB\t64GB+\tPi-hole, AdGuard, small tools\tRaspberry Pi 4/5\n$50-150\t8-16GB\t256GB+\tDocker host, 5-10 services\tUsed SFF PC (Dell Optiplex, Lenovo Tiny)\n$150-400\t16-32GB\t1TB+\tNAS + services, media server\tMini PC (Intel NUC, Beelink)\n$400-800\t32-64GB\t4TB+\tFull homelab, VMs + containers\tUsed enterprise (Dell R720, HP DL380)\n$800+\t64GB+\t10TB+\tMulti-node, Proxmox cluster\tMultiple nodes, dedicated NAS\nSelf-Host vs SaaS Decision\n\nAsk before self-hosting anything:\n\nData sensitivity — Does keeping data local matter? (passwords, health, finance = yes)\nReliability need — Can you tolerate occasional downtime? (email = risky, media = fine)\nMaintenance budget — Do you have 2-4 hours/month for updates?\nSkill level — Can you debug Docker/networking issues?\nCost comparison — Is the SaaS < $10/mo? Often not worth self-hosting for trivial savings.\n\nAlways self-host: Password manager, DNS/ad-blocking, VPN, bookmarks, notes Usually self-host: Media server, file sync, photo backup, monitoring, git Think twice: Email (deliverability hell), calendar (sync complexity), chat (uptime expectations) Rarely worth it: Search engine (resource hungry), social media (no network effect)\n\nPhase 2: OS & Virtualization\nOS Selection Guide\nOS\tBest For\tLearning Curve\tNotes\nDebian 12\tDocker-only host\tLow\tStable, minimal, just works\nUbuntu Server 24.04\tBeginners, wide docs\tLow\tMore packages, snap controversy\nProxmox VE\tVMs + containers\tMedium\tFree, enterprise features, ZFS\nUnraid\tNAS + Docker + VMs\tMedium\t$59-129, great UI, parity array\nTrueNAS Scale\tZFS NAS + Docker\tMedium\tFree, ZFS-first, apps improving\nNixOS\tReproducible configs\tHigh\tDeclarative, steep learning curve\nProxmox Quick Setup\n# Post-install essentials\n# 1. Remove enterprise repo (if no subscription)\nsed -i 's/^deb/#deb/' /etc/apt/sources.list.d/pve-enterprise.list\necho \"deb http://download.proxmox.com/debian/pve bookworm pve-no-subscription\" > /etc/apt/sources.list.d/pve-no-subscription.list\napt update && apt upgrade -y\n\n# 2. Create a Docker LXC (lightweight container)\n# Download template: Datacenter → Storage → CT Templates → Download → debian-12\n# Create CT: 2 cores, 2GB RAM, 32GB disk, bridge vmbr0\n# Inside CT: install Docker\napt install -y curl\ncurl -fsSL https://get.docker.com | sh\n\n# 3. Enable IOMMU for GPU passthrough (if needed)\n# Edit /etc/default/grub: GRUB_CMDLINE_LINUX_DEFAULT=\"quiet intel_iommu=on\"\n# update-grub && reboot\n\nVM vs LXC vs Docker Decision\nFactor\tVM\tLXC\tDocker\nIsolation\tFull (own kernel)\tPartial (shared kernel)\tProcess-level\nOverhead\tHigh (1-2GB base)\tLow (50-200MB)\tMinimal\nUse when\tDifferent OS, GPU passthrough, untrusted workloads\tDedicated service host, ZFS datasets\tMost services\nAvoid when\tRAM-constrained\tNeed Windows, custom kernel\tStateful databases (use LXC/VM)\n\nRule: Docker for 90% of services. LXC for Docker hosts or isolated environments. VM for Windows, different kernel needs, or GPU passthrough.\n\nPhase 3: Docker Infrastructure\nDocker Compose Project Structure\n/opt/stacks/           # or ~/docker/\n├── traefik/\n│   ├── docker-compose.yml\n│   ├── .env\n│   ├── config/\n│   │   └── traefik.yml\n│   └── data/\n│       ├── acme.json          # chmod 600\n│       └── dynamic/\n├── monitoring/\n│   ├── docker-compose.yml\n│   ├── .env\n│   └── config/\n├── media/\n│   ├── docker-compose.yml\n│   ├── .env\n│   └── config/\n├── productivity/\n│   ├── docker-compose.yml\n│   ├── .env\n│   └── config/\n└── scripts/\n    ├── backup.sh\n    ├── update-all.sh\n    └── health-check.sh\n\nDocker Compose Best Practices\n# Template: production-grade service\nservices:\n  app:\n    image: vendor/app:1.2.3           # ALWAYS pin version\n    container_name: app               # Explicit name\n    restart: unless-stopped           # Auto-restart\n    networks:\n      - proxy                         # Traefik network\n      - internal                      # Backend network\n    volumes:\n      - ./config:/config              # Bind mount for config\n      - app-data:/data                # Named volume for data\n    environment:\n      - TZ=Europe/London              # Always set timezone\n      - PUID=1000                     # Match host user\n      - PGID=1000\n    env_file:\n      - .env                          # Secrets in .env (gitignored)\n    labels:\n      - \"traefik.enable=true\"\n      - \"traefik.http.routers.app.rule=Host(`app.example.com`)\"\n      - \"traefik.http.routers.app.tls.certresolver=letsencrypt\"\n      - \"traefik.http.services.app.loadbalancer.server.port=8080\"\n    healthcheck:\n      test: [\"CMD\", \"curl\", \"-f\", \"http://localhost:8080/health\"]\n      interval: 30s\n      timeout: 10s\n      retries: 3\n      start_period: 40s\n    deploy:\n      resources:\n        limits:\n          memory: 512M               # Prevent OOM cascades\n    security_opt:\n      - no-new-privileges:true        # Security hardening\n    read_only: true                   # Where possible\n    tmpfs:\n      - /tmp\n\nvolumes:\n  app-data:\n\nnetworks:\n  proxy:\n    external: true\n  internal:\n\nDocker Security Checklist\n Pin all image versions (never :latest in production)\n Set restart: unless-stopped on all services\n Use .env files for secrets (never hardcode in compose)\n Set memory limits on all containers\n Use security_opt: no-new-privileges:true\n Use read_only: true where possible + tmpfs for /tmp\n Create separate Docker networks per stack\n Never expose database ports to 0.0.0.0\n Run containers as non-root (PUID/PGID or user:)\n Enable Docker content trust: export DOCKER_CONTENT_TRUST=1\n Prune unused images/volumes monthly: docker system prune -af\n Use named volumes (not anonymous) for all persistent data\n Set TZ environment variable on every container\nPhase 4: Reverse Proxy & SSL\nReverse Proxy Selection\nProxy\tBest For\tSSL\tConfig Style\tLearning Curve\nTraefik\tDocker-native, auto-discovery\tAuto (ACME)\tLabels + YAML\tMedium\nCaddy\tSimplicity, auto-SSL\tAuto (built-in)\tCaddyfile\tLow\nNginx Proxy Manager\tGUI preference\tAuto (UI)\tWeb UI\tVery Low\nNginx (manual)\tMaximum control\tManual/certbot\tConfig files\tHigh\n\nRecommendation: Traefik for Docker power users. Caddy for simplicity. NPM for beginners.\n\nTraefik Production Config\n# traefik/config/traefik.yml\napi:\n  dashboard: true\n  insecure: false\n\nentryPoints:\n  web:\n    address: \":80\"\n    http:\n      redirections:\n        entryPoint:\n          to: websecure\n          scheme: https\n  websecure:\n    address: \":443\"\n    http:\n      tls:\n        certResolver: letsencrypt\n\ncertificatesResolvers:\n  letsencrypt:\n    acme:\n      email: you@example.com\n      storage: /data/acme.json\n      # Use DNS challenge if ISP blocks port 80\n      # dnsChallenge:\n      #   provider: cloudflare\n      httpChallenge:\n        entryPoint: web\n\nproviders:\n  docker:\n    exposedByDefault: false    # Explicit opt-in per service\n    network: proxy\n  file:\n    directory: /data/dynamic\n    watch: true\n\nlog:\n  level: WARN\n\naccessLog:\n  filePath: /data/access.log\n  bufferingSize: 100\n\nCloudflare Tunnel (Zero Port Forwarding)\n\nFor CGNAT or ISPs blocking ports — expose services without opening firewall:\n\n# cloudflared/docker-compose.yml\nservices:\n  cloudflared:\n    image: cloudflare/cloudflared:2024.1.0\n    container_name: cloudflared\n    restart: unless-stopped\n    command: tunnel run\n    environment:\n      - TUNNEL_TOKEN=${CF_TUNNEL_TOKEN}\n    networks:\n      - proxy\n\n\nWhen to use Cloudflare Tunnel vs port forwarding:\n\nCGNAT (no public IP) → Tunnel (only option)\nISP blocks 80/443 → Tunnel or DNS challenge + non-standard ports\nSecurity-first → Tunnel (no open ports)\nPerformance-first → Direct (lower latency)\nLAN-only access → Neither (use Tailscale/WireGuard)\nPhase 5: Essential Services Stack\nTier 1 — Deploy First (Foundation)\nService\tPurpose\tImage\tRAM\tNotes\nTraefik/Caddy\tReverse proxy + SSL\ttraefik:v3.0\t64MB\tGateway to everything\nPi-hole/AdGuard\tDNS + ad blocking\tpihole/pihole\t128MB\tNetwork-wide ad blocking\nAuthelia/Authentik\tSSO + 2FA\tauthelia/authelia\t128MB\tProtect services without built-in auth\nUptime Kuma\tMonitoring\tlouislam/uptime-kuma\t128MB\tKnow when things break\nWatchtower\tAuto-updates\tcontainrrr/watchtower\t32MB\tOptional — some prefer manual\nTier 2 — Core Services\nService\tPurpose\tAlt\tRAM\nVaultwarden\tPassword manager\tBitwarden\t64MB\nNextcloud\tFile sync + office\tSeafile (lighter)\t512MB\nImmich\tPhoto backup\tPhotoPrism\t1-4GB\nJellyfin\tMedia server\tPlex (less free)\t512MB-2GB\nPaperless-ngx\tDocument management\t-\t256MB\nHome Assistant\tSmart home\t-\t512MB\nTier 3 — Power User\nService\tPurpose\tRAM\nGitea/Forgejo\tGit hosting\t256MB\nn8n\tWorkflow automation\t256MB\nGrafana + Prometheus\tMetrics & dashboards\t512MB\nTandoor\tRecipe management\t256MB\nMealie\tMeal planning\t128MB\nLinkwarden/Hoarder\tBookmark manager\t256MB\nStirling PDF\tPDF tools\t512MB\nIT-Tools\tDeveloper utilities\t64MB\nRAM Planning\nTotal RAM needed ≈ OS base (1-2GB) + sum of service RAM + 20% headroom\nExample 16GB server:\n  OS + Docker:     2 GB\n  Traefik:         0.1 GB\n  Pi-hole:         0.1 GB\n  Authelia:        0.1 GB\n  Uptime Kuma:     0.1 GB\n  Vaultwarden:     0.1 GB\n  Nextcloud:       0.5 GB\n  Immich:          2.0 GB\n  Jellyfin:        1.0 GB\n  Paperless:       0.3 GB\n  Home Assistant:  0.5 GB\n  ──────────────────────\n  Total:           6.8 GB → 8.2 GB with headroom\n  Available:       ~7.8 GB free for more services\n\nPhase 6: Networking & DNS\nDNS Architecture\nInternet → Cloudflare DNS → Your Public IP → Router → Server\n                                                        ↓\n                                             Reverse Proxy (Traefik)\n                                                        ↓\n                                     ┌──────────────────┼──────────────────┐\n                                     ↓                  ↓                  ↓\n                                app.domain.com   files.domain.com   media.domain.com\n\nSplit DNS (Access Services Locally Without Hairpin NAT)\n# Pi-hole/AdGuard: Local DNS rewrites\n# Point *.home.example.com → 192.168.1.100 (server LAN IP)\n# External: Cloudflare points to public IP\n# Result: LAN traffic stays local, external goes through internet\n\nVPN for Remote Access\nSolution\tType\tBest For\tComplexity\nTailscale\tMesh VPN\tEasiest setup, multi-device\tVery Low\nWireGuard\tPoint-to-point\tPerformance, full control\tMedium\nHeadscale\tSelf-hosted Tailscale\tPrivacy, no vendor lock\tMedium-High\n\nRecommendation: Start with Tailscale (free for 3 users). Move to Headscale when you want full control.\n\nFirewall Rules (UFW)\n# Default deny incoming\nufw default deny incoming\nufw default allow outgoing\n\n# Allow SSH (change port from 22!)\nufw allow 2222/tcp comment 'SSH'\n\n# Allow HTTP/HTTPS for reverse proxy\nufw allow 80/tcp comment 'HTTP redirect'\nufw allow 443/tcp comment 'HTTPS'\n\n# Allow local network for discovery\nufw allow from 192.168.1.0/24 comment 'LAN'\n\n# Enable\nufw enable\n\nPhase 7: Backup Strategy\n3-2-1 Rule Implementation\n3 copies:  Live data + Local backup + Remote backup\n2 media:   SSD/HDD (server) + External drive or NAS\n1 offsite: Cloud (Backblaze B2, Wasabi) or second location\n\nBackup Script Template\n#!/bin/bash\n# /opt/stacks/scripts/backup.sh\nset -euo pipefail\n\nBACKUP_DIR=\"/mnt/backup/docker\"\nSTACKS_DIR=\"/opt/stacks\"\nDATE=$(date +%Y-%m-%d_%H%M)\nRETENTION_DAYS=30\n\nlog() { echo \"[$(date '+%Y-%m-%d %H:%M:%S')] $1\"; }\n\n# 1. Stop services that need consistent backups\nlog \"Stopping database services...\"\ncd \"$STACKS_DIR/productivity\" && docker compose stop db\n\n# 2. Backup Docker volumes\nlog \"Backing up volumes...\"\nfor vol in $(docker volume ls -q); do\n    docker run --rm \\\n        -v \"$vol\":/source:ro \\\n        -v \"$BACKUP_DIR/volumes\":/backup \\\n        alpine tar czf \"/backup/${vol}_${DATE}.tar.gz\" -C /source .\ndone\n\n# 3. Backup compose files and configs\nlog \"Backing up configs...\"\ntar czf \"$BACKUP_DIR/configs/stacks_${DATE}.tar.gz\" \\\n    --exclude='*.log' \\\n    --exclude='node_modules' \\\n    \"$STACKS_DIR\"\n\n# 4. Restart services\nlog \"Restarting services...\"\ncd \"$STACKS_DIR/productivity\" && docker compose start db\n\n# 5. Cleanup old backups\nlog \"Cleaning up backups older than ${RETENTION_DAYS} days...\"\nfind \"$BACKUP_DIR\" -name \"*.tar.gz\" -mtime +$RETENTION_DAYS -delete\n\n# 6. Sync to remote (Backblaze B2 example)\n# rclone sync \"$BACKUP_DIR\" b2:my-backups/docker/ --transfers 4\n\n# 7. Verify\nBACKUP_SIZE=$(du -sh \"$BACKUP_DIR\" | cut -f1)\nlog \"Backup complete. Total size: $BACKUP_SIZE\"\n\n# 8. Send notification (optional)\n# curl -s \"https://ntfy.sh/my-backups\" -d \"Backup complete: $BACKUP_SIZE\"\n\nBackup Schedule\nWhat\tFrequency\tRetention\tMethod\nDocker volumes\tDaily 3 AM\t30 days\tScript + cron\nCompose files + configs\tDaily 3 AM\t90 days\tScript + cron\nDatabase dumps\tEvery 6 hours\t7 days\tpg_dump/mysqldump\nFull disk image\tMonthly\t3 months\tClonezilla/dd\nOffsite sync\tDaily 5 AM\t60 days\trclone to B2/Wasabi\nBackup Verification (Monthly)\n Pick a random backup from last week\n Restore to a test VM/container\n Verify data integrity (check file counts, DB row counts)\n Time the restore process (document RTO)\n Log results in backup-verification.md\nPhase 8: Monitoring & Alerting\nMonitoring Stack (Docker Compose)\n# monitoring/docker-compose.yml\nservices:\n  uptime-kuma:\n    image: louislam/uptime-kuma:1\n    container_name: uptime-kuma\n    restart: unless-stopped\n    volumes:\n      - uptime-data:/app/data\n    labels:\n      - \"traefik.enable=true\"\n      - \"traefik.http.routers.uptime.rule=Host(`status.example.com`)\"\n\n  prometheus:\n    image: prom/prometheus:v2.49.0\n    container_name: prometheus\n    restart: unless-stopped\n    volumes:\n      - ./config/prometheus.yml:/etc/prometheus/prometheus.yml:ro\n      - prometheus-data:/prometheus\n    command:\n      - '--config.file=/etc/prometheus/prometheus.yml'\n      - '--storage.tsdb.retention.time=30d'\n\n  grafana:\n    image: grafana/grafana:10.3.0\n    container_name: grafana\n    restart: unless-stopped\n    volumes:\n      - grafana-data:/var/lib/grafana\n    environment:\n      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}\n\n  node-exporter:\n    image: prom/node-exporter:v1.7.0\n    container_name: node-exporter\n    restart: unless-stopped\n    pid: host\n    volumes:\n      - /proc:/host/proc:ro\n      - /sys:/host/sys:ro\n      - /:/rootfs:ro\n    command:\n      - '--path.procfs=/host/proc'\n      - '--path.sysfs=/host/sys'\n      - '--path.rootfs=/rootfs'\n\n  cadvisor:\n    image: gcr.io/cadvisor/cadvisor:v0.49.0\n    container_name: cadvisor\n    restart: unless-stopped\n    volumes:\n      - /:/rootfs:ro\n      - /var/run:/var/run:ro\n      - /sys:/sys:ro\n      - /var/lib/docker/:/var/lib/docker:ro\n\nvolumes:\n  uptime-data:\n  prometheus-data:\n  grafana-data:\n\nAlert Rules\nMetric\tWarning\tCritical\tAction\nDisk usage\t>80%\t>90%\tCleanup or expand\nRAM usage\t>85%\t>95%\tIdentify memory leak, add RAM\nCPU sustained\t>80% 5min\t>95% 5min\tCheck runaway process\nContainer restart\t>2/hour\t>5/hour\tCheck logs, fix root cause\nSSL cert expiry\t<14 days\t<3 days\tRenew cert\nBackup age\t>26 hours\t>48 hours\tCheck backup script/cron\nService down\t>2 min\t>10 min\tInvestigate, restart\nNotification Channels\nChannel\tService\tBest For\nPush notification\tntfy.sh (self-hosted)\tMobile alerts\nChat\tDiscord/Slack webhook\tTeam alerts\nEmail\tUptime Kuma built-in\tFormal notifications\nDashboard\tGrafana + Uptime Kuma\tVisual monitoring\nPhase 9: Security Hardening\nServer Hardening Checklist\n# 1. SSH hardening\n# /etc/ssh/sshd_config\nPort 2222                          # Change default port\nPermitRootLogin no                 # No root SSH\nPasswordAuthentication no          # Key-only\nMaxAuthTries 3\nAllowUsers yourusername\n\n# 2. Install fail2ban\napt install fail2ban -y\nsystemctl enable fail2ban\n\n# 3. Automatic security updates\napt install unattended-upgrades -y\ndpkg-reconfigure -plow unattended-upgrades\n\n# 4. Disable unused services\nsystemctl list-unit-files --state=enabled\n# Disable anything you don't need\n\nAuthentication Architecture\nInternet → Traefik → Authelia/Authentik → Service\n                         ↓\n                    Check: authenticated?\n                    Yes → Forward to service\n                    No → Redirect to login page + 2FA\n\n\nAuthelia (lightweight, YAML config) — good for smaller setups Authentik (full IdP, web UI) — good for many users/services, SAML/OIDC\n\nSecurity Scoring (0-100)\nDimension\tWeight\tScore Guide\nSSH hardened (keys, non-root, non-22)\t15\t0=default, 15=fully hardened\nFirewall active (deny-by-default)\t15\t0=none, 15=UFW/iptables configured\nReverse proxy (no direct port exposure)\t15\t0=ports exposed, 15=all behind proxy\nSSL/TLS on all services\t10\t0=HTTP, 10=HTTPS everywhere\nAuth on all public services\t15\t0=open, 15=SSO/2FA on everything\nContainer security (non-root, limits)\t10\t0=default, 10=hardened\nAuto-updates enabled\t10\t0=manual, 10=automated\nSecrets management (.env, not hardcoded)\t10\t0=in compose, 10=.env + restricted perms\n\nScore: 0-40 = Vulnerable, 41-70 = Acceptable, 71-90 = Good, 91-100 = Hardened\n\nPhase 10: Maintenance & Updates\nUpdate Strategy\n\nOption A: Manual (Recommended for critical services)\n\n# Update script: /opt/stacks/scripts/update-all.sh\n#!/bin/bash\nset -euo pipefail\n\nSTACKS_DIR=\"/opt/stacks\"\nLOG=\"/var/log/docker-updates.log\"\n\nfor stack in \"$STACKS_DIR\"/*/; do\n    if [ -f \"$stack/docker-compose.yml\" ]; then\n        echo \"[$(date)] Updating $(basename $stack)...\" | tee -a \"$LOG\"\n        cd \"$stack\"\n        docker compose pull 2>&1 | tee -a \"$LOG\"\n        docker compose up -d 2>&1 | tee -a \"$LOG\"\n    fi\ndone\n\ndocker image prune -f | tee -a \"$LOG\"\necho \"[$(date)] Update complete\" | tee -a \"$LOG\"\n\n\nOption B: Watchtower (Automated — use with caution)\n\nservices:\n  watchtower:\n    image: containrrr/watchtower:1.7.1\n    container_name: watchtower\n    restart: unless-stopped\n    volumes:\n      - /var/run/docker.sock:/var/run/docker.sock\n    environment:\n      - WATCHTOWER_SCHEDULE=0 0 4 * * MON  # Monday 4 AM\n      - WATCHTOWER_CLEANUP=true\n      - WATCHTOWER_NOTIFICATIONS=shoutrrr\n      - WATCHTOWER_NOTIFICATION_URL=discord://webhook\n      - WATCHTOWER_LABEL_ENABLE=true    # Only update labeled containers\n    # Add label to containers: com.centurylinklabs.watchtower.enable=true\n\nWeekly Maintenance Checklist\n Check Uptime Kuma for any downtime events\n Review disk usage (df -h)\n Check container health (docker ps --filter health=unhealthy)\n Review fail2ban bans (fail2ban-client status)\n Check backup logs (last successful backup)\n Review Docker logs for errors (docker logs --since 7d <container>)\n Prune unused resources (docker system prune -f)\nMonthly Maintenance\n Update all container images (read changelogs first!)\n Update host OS (apt update && apt upgrade)\n Test a backup restore\n Review and rotate secrets/passwords\n Check SSL certificate expiry dates\n Review Grafana dashboards for trends\n Clean up unused Docker networks/volumes\nPhase 11: Advanced Patterns\nMulti-Node Architecture\n┌─────────────┐     ┌─────────────┐     ┌─────────────┐\n│   Node 1    │     │   Node 2    │     │   Node 3    │\n│ (Proxy/DNS) │────│ (Services)  │────│   (NAS)     │\n│ Traefik     │     │ Apps        │     │ TrueNAS     │\n│ Pi-hole     │     │ Databases   │     │ NFS/SMB     │\n│ Authelia    │     │ Media       │     │ Backup      │\n└─────────────┘     └─────────────┘     └─────────────┘\n       ↑                   ↑                   ↑\n       └───────── Tailscale Mesh ──────────────┘\n\nDocker Compose Includes (Compose v2.20+)\n# Shared fragments\ninclude:\n  - path: ../common/traefik-labels.yml\n  - path: ../common/logging.yml\n\nservices:\n  app:\n    # inherits common configs\n\nGitOps for Homelab\nhomelab-configs/           # Git repo\n├── .github/\n│   └── workflows/\n│       └── deploy.yml     # CI: lint + push to server\n├── stacks/\n│   ├── traefik/\n│   ├── monitoring/\n│   └── media/\n├── scripts/\n└── README.md\n\n\nWorkflow: Edit compose locally → commit → push → CI deploys to server Tools: Flux/ArgoCD (overkill), or simple git pull && docker compose up -d via webhook\n\nHardware Redundancy\nComponent\tSolution\tCost\nPower\tUPS (APC Back-UPS 600VA+)\t$60-150\nStorage\tRAID1/ZFS mirror (not RAID0!)\t2x disk cost\nNetwork\tDual NIC, managed switch\t$30-100\nServer\tSecond node (cold spare or active)\t$100-400\n\nRule: RAID is NOT backup. It protects against disk failure only, not ransomware/deletion/corruption.\n\nPhase 12: Troubleshooting\nCommon Issues Decision Tree\nService not accessible?\n├── Can you ping the server? → No → Network/firewall issue\n├── Is the container running? (`docker ps`) → No → Check logs: `docker logs <name>`\n├── Is the port exposed? (`docker port <name>`) → No → Check compose ports/networks\n├── Is Traefik routing? (Check Traefik dashboard) → No → Check labels, network\n├── Is DNS resolving? (`dig app.example.com`) → No → Check DNS provider\n└── SSL error? → Check acme.json permissions (chmod 600), cert resolver logs\n\nDocker Debug Commands\n# Container not starting\ndocker logs <name> --tail 50\ndocker inspect <name> | jq '.[0].State'\n\n# Network issues\ndocker network ls\ndocker network inspect <network>\ndocker exec <name> ping other-container\n\n# Resource issues\ndocker stats                          # Live resource usage\ndocker system df                      # Disk usage\ndocker volume ls -f dangling=true     # Orphaned volumes\n\n# Nuclear options (use carefully)\ndocker compose down && docker compose up -d    # Full restart\ndocker system prune -af --volumes              # Clean EVERYTHING\n\nPerformance Optimization\nSymptom\tLikely Cause\tFix\nSlow file access\tHDD for database\tMove DB to SSD\nHigh CPU idle\tMonitoring too frequent\tIncrease scrape intervals\nOOM kills\tNo memory limits\tSet deploy.resources.limits.memory\nSlow Nextcloud\tMissing Redis cache\tAdd Redis container\nJellyfin buffering\tNo hardware transcoding\tEnable GPU passthrough\nSlow Docker builds\tNo layer caching\tUse multi-stage + .dockerignore\nService Configuration Quick Reference\nVaultwarden (Password Manager)\nservices:\n  vaultwarden:\n    image: vaultwarden/server:1.30.5\n    container_name: vaultwarden\n    restart: unless-stopped\n    volumes:\n      - vaultwarden-data:/data\n    environment:\n      - SIGNUPS_ALLOWED=false       # Disable after creating your account\n      - WEBSOCKET_ENABLED=true\n      - ADMIN_TOKEN=${ADMIN_TOKEN}  # Generate: openssl rand -base64 48\n    labels:\n      - \"traefik.enable=true\"\n      - \"traefik.http.routers.vault.rule=Host(`vault.example.com`)\"\n\nImmich (Photo Backup)\n# Use their official docker-compose.yml from:\n# https://github.com/immich-app/immich/releases/latest/download/docker-compose.yml\n# Key settings:\n# - Set UPLOAD_LOCATION to a large storage mount\n# - Enable hardware transcoding if GPU available\n# - Set IMMICH_MACHINE_LEARNING_URL for face detection\n\nPaperless-ngx (Document Management)\nservices:\n  paperless:\n    image: ghcr.io/paperless-ngx/paperless-ngx:2.4\n    container_name: paperless\n    restart: unless-stopped\n    volumes:\n      - paperless-data:/usr/src/paperless/data\n      - paperless-media:/usr/src/paperless/media\n      - ./consume:/usr/src/paperless/consume  # Drop PDFs here\n      - ./export:/usr/src/paperless/export\n    environment:\n      - PAPERLESS_OCR_LANGUAGE=eng\n      - PAPERLESS_TIME_ZONE=Europe/London\n      - PAPERLESS_ADMIN_USER=${ADMIN_USER}\n      - PAPERLESS_ADMIN_PASSWORD=${ADMIN_PASS}\n\nHomelab Quality Rubric (0-100)\nDimension\tWeight\t0 (Poor)\t50 (Decent)\t100 (Excellent)\nSecurity\t20%\tDefault passwords, open ports\tFirewall + SSL\tHardened SSH, SSO/2FA, no-new-privileges\nBackups\t20%\tNone\tLocal only, untested\t3-2-1, automated, verified monthly\nMonitoring\t15%\tNone\tUptime Kuma only\tFull stack: metrics + logs + alerts\nDocumentation\t10%\tNothing written\tREADME per stack\tGitOps, full runbook, diagrams\nUpdates\t10%\tNever updated\tManual quarterly\tScheduled weekly, changelogs reviewed\nReliability\t10%\tFrequent crashes\tMostly stable\tUPS, auto-restart, health checks\nPerformance\t10%\tSlow, OOM kills\tAdequate\tResource limits, SSD, HW transcoding\nScalability\t5%\tSingle machine, no plan\tCompose organized\tMulti-node ready, IaC\n10 Self-Hosting Mistakes\n#\tMistake\tFix\n1\tUsing :latest tag\tPin versions: image:1.2.3\n2\tNo backups\t3-2-1 backup rule, test restores\n3\tExposing ports directly\tEverything behind reverse proxy\n4\tDefault passwords\tChange immediately, use password manager\n5\tNo monitoring\tUptime Kuma minimum, Grafana for depth\n6\tRAID = backup mentality\tRAID protects disks, not data\n7\tOver-engineering day 1\tStart small, add complexity as needed\n8\tNo documentation\tDocument every service, every port, every cron\n9\tIgnoring updates\tSecurity patches matter, schedule updates\n10\tRunning as root\tNon-root containers, restricted SSH\nNatural Language Commands\nSay\tAgent Does\n\"Set up a new service\"\tGuide through compose file creation with security best practices\n\"Audit my homelab security\"\tRun through security scoring checklist\n\"Plan my backup strategy\"\tDesign 3-2-1 backup plan for your setup\n\"What should I self-host?\"\tAssess needs and recommend services by tier\n\"My container keeps crashing\"\tWalk through troubleshooting decision tree\n\"Help me set up Traefik\"\tGenerate production Traefik config with SSL\n\"Compare NAS options\"\tCompare TrueNAS vs Unraid vs DIY for your needs\n\"Optimize my Docker setup\"\tReview compose files for security and performance\n\"Set up monitoring\"\tDeploy Uptime Kuma + Prometheus + Grafana stack\n\"Plan a hardware upgrade\"\tAssess current usage, recommend hardware by budget\n\"Migrate from cloud to self-hosted\"\tPlan migration with data export and service mapping\n\"Set up remote access\"\tCompare and deploy VPN/Tailscale for secure remote access"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/1kalin/afrexai-self-hosting-mastery",
    "publisherUrl": "https://clawhub.ai/1kalin/afrexai-self-hosting-mastery",
    "owner": "1kalin",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/afrexai-self-hosting-mastery",
    "downloadUrl": "https://openagent3.xyz/downloads/afrexai-self-hosting-mastery",
    "agentUrl": "https://openagent3.xyz/skills/afrexai-self-hosting-mastery/agent",
    "manifestUrl": "https://openagent3.xyz/skills/afrexai-self-hosting-mastery/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/afrexai-self-hosting-mastery/agent.md"
  }
}