{
  "schemaVersion": "1.0",
  "item": {
    "slug": "subagent-architecture",
    "name": "Subagent Architecture",
    "source": "tencent",
    "type": "skill",
    "category": "AI 智能",
    "sourceUrl": "https://clawhub.ai/donovanpankratz-del/subagent-architecture",
    "canonicalUrl": "https://clawhub.ai/donovanpankratz-del/subagent-architecture",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/subagent-architecture",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=subagent-architecture",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "CHANGELOG.md",
      "CHANGELOG_v2.md",
      "README_FIRST.md",
      "SKILL.md",
      "SMITH_REVIEW_INTEGRATION.md",
      "examples/cost-estimation-demo.js"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/subagent-architecture"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/subagent-architecture",
    "agentPageUrl": "https://openagent3.xyz/skills/subagent-architecture/agent",
    "manifestUrl": "https://openagent3.xyz/skills/subagent-architecture/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/subagent-architecture/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Advanced Subagent Architecture",
        "body": "Patterns and templates for building robust multi-agent systems with OpenClaw."
      },
      {
        "title": "Why This Skill Is Complex (Read Before Installing)",
        "body": "This is one of the most feature-dense skills in the ClawHub registry. Security scanners will flag it — not because it's malicious, but because it does a lot. Here's exactly what's in it and why:\n\nScope:\n\n4 production-ready JS libraries (~1,200 lines total across spawn-security-proxy, spawn-researcher, cost-estimator, quality-scorer)\n4 spawn templates covering security proxy, researcher, phased implementation, and peer review patterns\nA setup.sh that creates local directory scaffolding (no network calls, no package installs)\nInline attack vector documentation in spawn-security-proxy.js (test fixtures, not live payloads)\n\nWhy the libs exist: These aren't glue code — they implement real patterns: output sanitization with canary tokens, multi-source research validation, cost projection with approval gates, and subagent output scoring. The complexity is the point; simpler skills don't solve these problems.\n\nWhy scanners flag it:\n\nJS code in a skill package looks like an execution surface — it is, but only when you explicitly require() it\nThe security proxy documents injection attack patterns as test examples — pattern matchers don't distinguish documentation from intent\nExternal integration references (Discord, peer agents) appear in templates — they're opt-in workflows, not auto-configured connections\n\nComplexity is not a red flag here. It's the product."
      },
      {
        "title": "⚠️ Security Transparency Notice",
        "body": "What this skill contains:\n\nlib/ — Reference JS libraries (spawn helpers, cost estimator, quality scorer). These are not auto-executed. Copy them to your workspace lib/ directory to use them.\ntemplates/ — Markdown spawn templates for common patterns.\nsetup.sh — Creates local directory scaffolding only. Makes no network calls, installs no packages.\n\nExternal integrations: All optional, none auto-configured.\n\nDiscord webhooks — Only used in the federated peer-review pattern. You supply your own token manually. This skill does not store or transmit credentials.\nPeer agent endpoints — Federated review is opt-in. No external calls unless you explicitly configure peer URLs.\n\nCredential requirements: None. No API keys, tokens, or env vars are required or auto-read by this skill."
      },
      {
        "title": "Overview",
        "body": "This skill provides battle-tested patterns for:\n\nSecurity isolation - Contain high-risk operations with minimal context exposure\nSpecialized research - Multi-perspective data gathering with domain experts\nPhased implementation - Architecture → Development → Review pipelines\nPeer collaboration - External validation via federated agent network (opt-in)\nCost-aware spawning - Budget estimation and optimization strategies"
      },
      {
        "title": "What's New in v2.0",
        "body": "Advanced Patterns:\n\nSecurity proxy pattern (blast shield isolation)\nResearcher specialist pattern (multi-source synthesis)\nPhased implementation pipeline (architect → coder → reviewer)\nPeer review integration (bot-to-bot validation)\nCost estimation framework (required for spawns >$0.50)\n\nTemplates:\n\ntemplates/security-proxy.md - Isolate untrusted service access\ntemplates/researcher-specialist.md - Domain-specific research agents\ntemplates/phased-implementation.md - Multi-phase feature development\ntemplates/peer-review-specialist.md - External peer validation\n\nIntegration:\n\ntask-routing skill (auto-classification and routing)\ncost-governor (budget enforcement)\ndrift-guard (behavioral validation)"
      },
      {
        "title": "1. Install Skill Structure",
        "body": "cd $OPENCLAW_WORKSPACE/skills/subagent-architecture\nbash setup.sh  # Creates directories and scaffolding"
      },
      {
        "title": "2. Choose Your Pattern",
        "body": "For high-risk operations:\n\n# Read security-proxy template\ncat templates/security-proxy.md\n\n# Spawn isolated proxy for untrusted API\n# (see template for full example)\n\nFor research tasks:\n\n# Read researcher-specialist template\ncat templates/researcher-specialist.md\n\n# Spawn domain expert for deep analysis\n# (see template for multi-perspective pattern)\n\nFor complex features:\n\n# Read phased-implementation template\ncat templates/phased-implementation.md\n\n# Launch architect → coder → reviewer pipeline\n# (see template for orchestration example)\n\nFor external validation:\n\n# Read peer-review-specialist template\ncat templates/peer-review-specialist.md\n\n# Request peer agent review via Discord/API\n# (see template for federated trust protocol)"
      },
      {
        "title": "Agent Registry: AGENTS.md is Optional",
        "body": "Critical clarification for new users: AGENTS.md is a human-readable reference document, not a configuration file. The spawning system does not read it."
      },
      {
        "title": "How spawning actually works",
        "body": "sessions_spawn is a tool call — it takes parameters you provide at the moment of the call. It does not read from any file, config, or registry. You can spawn subagents on a completely fresh OpenClaw install with zero files in your workspace.\n\n// This is ALL the system needs — no AGENTS.md required\nsessions_spawn({\n  label: \"my-researcher\",\n  task: \"Research the topic X\",\n  model: \"sonnet\"\n})"
      },
      {
        "title": "What AGENTS.md actually is",
        "body": "AGENTS.md (and domain files like AGENTS_WRITING.md, AGENTS_INFRA.md) are memory aids for you (the agent). They store:\n\nAgent names and personality snippets you've found effective\nCost history and last-used dates\nNotes on what tasks each agent type handles well\n\nYou can split AGENTS.md into 50 domain files or delete it entirely — spawning still works. The split is purely for your readability; it has zero functional effect."
      },
      {
        "title": "What \"read AGENTS_WRITING.md before spawning\" means",
        "body": "When documentation says \"read AGENTS_WRITING.md before spawning AuthorAgent\", it means:\n\nRead it so you know what personality, model, and task description to use when building the spawn call.\n\nThe system does not read it. You read it, extract the configuration, then make the spawn call with those parameters."
      },
      {
        "title": "Practical implication for fresh installs",
        "body": "When you install this skill on a fresh OpenClaw setup:\n\nYou do not need to create AGENTS.md to use subagents\nStart spawning immediately with inline parameters\nCreate AGENTS.md when you have enough recurring agent configurations that you want a reference doc\nSplit into domain files when AGENTS.md grows beyond ~10 entries and becomes hard to scan\n\nSee templates/agents-registry-template.md for a minimal starter template when you're ready."
      },
      {
        "title": "Reference Implementations",
        "body": "NEW in v2.1: Production-ready code libraries for all patterns."
      },
      {
        "title": "Available Libraries",
        "body": "lib/spawn-security-proxy.js - Security isolation framework\n\nspawnSecurityProxy(config) - Spawn isolated proxy with sanitization\ndeepSanitize(data) - Remove sensitive data (API keys, paths, emails)\nvalidateSchema(data, schema) - JSON schema validation\ncreateDefaultSchema(type) - Common output schemas (list, single, status)\n\nlib/spawn-researcher.js - Multi-perspective research framework\n\nspawnResearcher(config) - Spawn domain expert researcher\nspawnMultiPerspective(config) - Multi-perspective research (optimist/pessimist/pragmatist)\nassessSourceCredibility(source) - Score source trustworthiness (0-100)\nPre-configured trusted/blog/vendor domain lists\n\nlib/cost-estimator.js - Cost estimation and tracking\n\nestimateSubagentCost(params) - Pre-spawn cost estimation with confidence intervals\nlogSubagentCost(label, estimate, actual) - Log for accuracy tracking\nrecalibrateEstimator() - Monthly accuracy improvement\ngetPatternHistory(pattern) - Historical performance analysis\ngetCostTier(cost) - Classify as micro/small/medium/large\n\nlib/quality-scorer.js - Output quality assessment\n\nscoreSubagentOutput(output, rubric) - 8-dimension quality scoring\ncreateScoringTemplate() - Manual review template\nselfAuditChecklist(output) - Pre-delivery validation\nRubric dimensions: specificity, actionability, evidence, structure, completeness, clarity, relevance, efficiency"
      },
      {
        "title": "Usage Examples",
        "body": "All libraries include complete working examples:\n\n# Security proxy examples\nnode examples/security-proxy-usage.js\n\n# Researcher examples\nnode examples/researcher-usage.js\n\n# Cost estimation examples\nnode examples/cost-estimation-demo.js"
      },
      {
        "title": "Quick Integration",
        "body": "Path Resolution Note: The require() paths in examples below assume you call them from your workspace root. If you call from within the skill directory (e.g., inside an example script), use __dirname instead. For portable code that works regardless of cwd:\n// Path resolution — works regardless of workspace structure\nconst path = require('path');\nconst SKILL_DIR = __dirname; // when called from within skill directory\n// OR if calling from workspace root:\nconst SKILL_DIR = path.join(process.env.OPENCLAW_WORKSPACE || process.cwd(), 'skills', 'subagent-architecture');\nconst { spawnSecurityProxy } = require(path.join(SKILL_DIR, 'lib', 'spawn-security-proxy'));\n\n// Example: Spawn security proxy for untrusted API\nconst { spawnSecurityProxy } = require('./skills/subagent-architecture/lib/spawn-security-proxy');\n\nconst result = await spawnSecurityProxy({\n  service: 'weather-api',\n  task: 'Get current weather for New York',\n  query: { city: 'New York', units: 'metric' },\n  output_schema: {\n    type: 'object',\n    properties: {\n      temperature: { type: 'number' },\n      conditions: { type: 'string' }\n    }\n  },\n  spawn_fn: async (config) => {\n    // Your actual sessions_spawn call here\n    return await sessions_spawn(config);\n  }\n});\n\n// Example: Estimate cost before spawning\nconst { estimateSubagentCost, logSubagentCost } = require('./skills/subagent-architecture/lib/cost-estimator');\n\nconst estimate = estimateSubagentCost({\n  task_complexity: 'medium',\n  expected_duration_min: 15,\n  model: 'sonnet',\n  research_required: true\n});\n\nconsole.log(`Estimated: $${estimate.expected} (range: $${estimate.min}-$${estimate.max})`);\n\n// After spawn completes\nlogSubagentCost('researcher-task', estimate, actual_cost);\n\n// Example: Score output quality\nconst { scoreSubagentOutput } = require('./skills/subagent-architecture/lib/quality-scorer');\n\nconst score = scoreSubagentOutput(subagent_output, null, { auto_score: true });\nconsole.log(`Quality: ${score.overall_score}/10 (${score.pass ? 'PASS' : 'FAIL'})`);\n\nSee examples/ directory for complete working demonstrations."
      },
      {
        "title": "Dependencies Overview",
        "body": "Required:\n\n✅ OpenClaw 2026.2.17+ (sessions_spawn API)\n✅ Node.js 18+ (for library code)\n\nOptional Skills:\n\ntask-routing\n\nStatus: ✅ Available (workspace skills/task-routing/)\nVersion: 1.0.0+\nIntegration: Auto-classification of incoming tasks, risk scoring, pattern routing\nTested: Yes (production since 2026-02-15)\nDocumentation: See skills/task-routing/SKILL.md\n\ncost-governor\n\nStatus: ⚠️ Planned (design phase)\nVersion: N/A\nIntegration: Budget enforcement, approval workflows\nTested: No\nWorkaround: Use lib/cost-estimator.js for manual gating\n\ndrift-guard\n\nStatus: ⚠️ Planned (design phase)\nVersion: N/A\nIntegration: Behavioral audits, policy violation detection\nTested: No\nWorkaround: Manual output review against rubric"
      },
      {
        "title": "Library Dependencies",
        "body": "All reference implementations are dependency-free (pure Node.js):\n\nNo npm packages required\nNo external API calls\nFilesystem access for logging only (optional)\nMock-friendly for testing"
      },
      {
        "title": "Integration Points",
        "body": "Automatic (via task-routing skill):\n\nTask classification → Pattern recommendation\nRisk scoring → Security proxy auto-spawn for high-risk tasks\nCost estimation → Pre-spawn budget check\n\nManual (call libraries directly):\n\nrequire() any lib file and call functions\nSee examples/ for usage patterns\nSpawn functions accept custom spawn_fn for integration with your sessions_spawn"
      },
      {
        "title": "Migration Path",
        "body": "From v2.0 to v2.1:\n\nNo breaking changes - templates still work as-is\nNew: Import library functions for programmatic use\nNew: Run examples to see working code\nOptional: Integrate with task-routing for automation\n\nFuture (v3.0 with framework improvements):\n\nPer-spawn resource limits (when OpenClaw supports)\nBidirectional communication (when OpenClaw supports)\nPost-mortem tracking (when OpenClaw supports)\nSee \"Framework Limitations & v2 Roadmap\" section for details"
      },
      {
        "title": "1. Security Isolation (Blast Shield Philosophy)",
        "body": "Problem: Subagents with full workspace access can leak sensitive data to untrusted APIs.\n\nSolution: Security proxies receive minimal context, restricted tools, sanitized output.\n\nExample:\n\nMain Agent (full context)\n    │\n    └─ SecurityProxy (minimal context)\n            ├─ Query: \"Get weather for New York\"\n            ├─ Tools: exec (curl only)\n            ├─ Output: Sanitized JSON (no API metadata)\n            └─ Auto-terminate after single task\n\nKey principles:\n\nMinimal context (only task parameters, no workspace paths)\nTool restrictions (whitelist, not blacklist)\nOutput sanitization (validate schema before returning)\nEphemeral execution (no persistent state)\nCost cap (< $0.10 per proxy spawn)\n\nSee: templates/security-proxy.md"
      },
      {
        "title": "2. Researcher Specialists (Multi-Source Synthesis)",
        "body": "Problem: Generic web search returns surface-level results without domain expertise.\n\nSolution: Specialized researchers with domain bias, multi-source validation, actionable synthesis.\n\nExample:\n\nQuestion: \"Should we adopt technology X?\"\n\n├─ OptimistResearcher (best-case analysis)\n├─ PessimistResearcher (risk assessment)\n└─ PragmatistResearcher (current reality)\n\nMain Agent synthesizes: Balanced decision tree\n\nKey principles:\n\nDomain focus (single expertise area per researcher)\nMulti-source validation (3+ independent sources per claim)\nSkeptical by default (anti-hype, calls out marketing)\nStructured output (executive summary + recommendations)\nEvidence-backed (no speculation, contradictions addressed)\n\nSee: templates/researcher-specialist.md"
      },
      {
        "title": "3. Phased Implementation (Separation of Concerns)",
        "body": "Problem: Single-agent implementation mixes design, coding, and validation → tech debt.\n\nSolution: Separate architect (design), coder (build), reviewer (validate) phases.\n\nExample:\n\nFeature Request: \"Add memory consolidation skill\"\n\nPhase 1: SystemArchitect (15min, $0.40)\n    └─ Delivers: IMPLEMENTATION_PLAN.md\n\nPhase 2: CoderAgent (25min, $0.70)\n    └─ Delivers: Working code + tests\n\nPhase 3: ReviewerAgent (10min, $0.30)\n    └─ Delivers: REVIEW_REPORT.md (approval/rejection)\n\nTotal: 50min, $1.40, high-quality feature\n\nKey principles:\n\nArchitects optimize long-term (not quick hacks)\nCoders focus on working code (test after each step)\nReviewers provide fresh perspective (catch integration issues)\nIncremental delivery (small PRs, easy rollback)\nCost-aware (skip phases for simple features)\n\nSee: templates/phased-implementation.md"
      },
      {
        "title": "4. Peer Collaboration (Federated Trust)",
        "body": "Problem: Your agent may have blindspots or lack domain expertise.\n\nSolution: Request external validation from trusted peer agents.\n\nExample:\n\nYour Agent\n    │\n    ├─ Prepares sanitized review package\n    │\n    ├─ Contacts Smith's SecurityBot (via Discord/API)\n    │       └─ Smith's bot spawns SecurityReviewer\n    │               └─ Returns: Structured findings\n    │\n    └─ Integrates feedback (fix critical issues)\n\nKey principles:\n\nTrust earned (track peer accuracy over time)\nData sanitization (remove secrets, personal data)\nStructured feedback (severity, category, recommendations)\nReciprocal reviews (offer your expertise to peers)\nCost-effective (free peer reviews vs paid audits)\n\n⚠️ Trust tier ≠ security bypass:\nIf you implement tiered trust (Acquaintance → Friend → Ally or equivalent), \"higher trust\" means relaxed engagement policy — not skipping security validation. Content from even the most trusted peer still goes through injection detection (Stage 4). An ally account can be compromised; the injection scanner is the last defense that can't be socially engineered. \"Light validation\" always means reduced schema scrutiny, never reduced security scanning.\n\nSee: templates/peer-review-specialist.md"
      },
      {
        "title": "5. Identity Continuity (Ephemeral Process, Persistent External Identity)",
        "body": "Problem: Security proxies are ephemeral (spawn-per-task, terminate). But some external services require a persistent identity — a social network account, an API user, a recurring agent persona. These are in tension.\n\nSolution: Separate the process lifecycle from the external identity. The process dies every spawn. The external identity persists through workspace state + core-managed credentials.\n\nExternal service sees: CedarProxy (consistent identity across all spawns)\n\nUnder the hood:\n  Spawn 1: [process starts] → reads state files → executes → [process dies]\n  Spawn 2: [new process] → reads same state files → continues → [dies]\n  Spawn 3: [new process] → reads same state files → continues → [dies]\n\nState that persists:   /proxy-workspace/ files (logs, relationships, posts)\nState that doesn't:    in-memory session, conversation history, token\nCredentials:           held by core, passed as short-lived scoped token at spawn time\n\nKey rules:\n\nCredentials live in core memory only — never in proxy workspace files\nSession token: short TTL (2 hours max), passed encrypted in task context, never written to disk\nConsistency: core injects \"recent context\" summary (last N interactions) into each spawn's task description so the new process knows what prior spawns committed to\nToken rotation: scheduled by core (e.g., weekly), not the proxy\n\nAnti-pattern: storing tokens in [workspace]/config.json — if the proxy workspace is compromised, credentials should not be compromised with it."
      },
      {
        "title": "6. Research Team (Parallel Multi-Lens Analysis)",
        "body": "Problem: Single-agent analysis has blind spots — the same agent proposes and evaluates its own ideas.\n\nSolution: Spawn 3 specialist agents in parallel with distinct analytical lenses, then synthesize into one unified response.\n\nArchitecture:\n\nHigh-complexity query\n    │\n    ├─ Critic lens (find flaws, risks, costs — no solutions)\n    ├─ Implementer lens (concrete execution, architecture, feasibility)\n    └─ Synthesizer lens (integrate, resolve tensions, unified path)\n            │\n        UnifierAgent (one clean final response)\n\nWhen to use:\n\nTask complexity === 'high' OR explicit research flag set\nArchitecture decisions with real tradeoffs\nDebugging where root cause is unclear\nAny question where a single perspective tends to miss things\n\nWhen to skip:\n\nRoutine tasks, simple lookups, quick fixes\nCost-sensitive situations (research team = ~4x single-agent cost)\n\nKey components:\n\nlib/team-chatroom.js — append-only JSONL shared memory between parallel agents\nlib/research-coordinator.js — builds task strings for each lens + UnifierAgent\nshouldUseResearchTeam(complexity, explicitFlag) — gate function\nbuildResearchTeam(query) — returns sessionId + 3 specialist task objects\nbuildUnifierTask(query, sessionId) — builds synthesis prompt from chatroom\ncheckLensCompletion(sessionId) — verifies all lenses posted before unifying\n\nLens discipline:\n\nEach lens gets a distinct role description injected into its prompt\nLenses do NOT communicate with each other — only through the chatroom\nUnifierAgent reads all lens outputs and produces ONE response\nIf a lens fails silently, UnifierAgent warns but synthesizes from available data\n\nCost: ~4x single-agent. Gate on complexity to control spend.\n\nCritical rule: Grok or any external model used as a lens is a leaf node — it answers, it never spawns further agents.\n\nSee: lib/research-coordinator.js for reference implementation."
      },
      {
        "title": "7. External Model Consultation",
        "body": "Problem: Your agent may have architectural blind spots that another model's reasoning style would catch. Human relay (copy/paste to another AI) is slow and lossy.\n\nSolution: Spawn an ExternalConsultAgent subagent that calls an external model's API directly, persists the session, and returns the response.\n\nArchitecture:\n\nCore agent decides consultation needed\n    │\n    └─ ExternalConsultAgent subagent\n            ├─ Calls external API (Grok, OpenAI, etc.)\n            ├─ Injects system-level intent for alignment\n            ├─ Supports session continuity (sessionId)\n            └─ Returns response + sessionId\n\nWhy subagent, not core agent:\nExternal API calls are side effects with cost and data exposure implications. Isolating them in a subagent follows the blast-shield philosophy — if something goes wrong, it's contained.\n\nKey components:\n\nlib/external-bridge.js — session management, task string builder, consultation logger\nmemory/external-agents.json — provider config (model IDs, endpoints)\nmemory/external-sessions/ — persistent session files for conversation continuity\nlib/external-consult-helper.js — shouldAutoConsult() gate (RED drift + high complexity + 24h cooldown)\n\nAuto-consult gate (3 conditions, all required):\n\nIntent drift level === 'RED'\nTask complexity === 'high'\nLast consultation > 24 hours ago (with lock file to prevent race conditions)\n\nSecurity rules (enforce before go-live):\n\nExternal model is a leaf node — never triggers further spawns\nContext sanitization before every call — strip API keys, file contents, internal paths\nSession-level call cap (prevent fan-out)\nMetadata logging of every call (timestamp, provider, trigger reason)\nKey stored in environment variable, not config file\n\nSession continuity:\nPass sessionId from a previous consultation to continue the conversation. The bridge injects prior message history automatically.\n\nSee: lib/external-bridge.js for reference implementation."
      },
      {
        "title": "8. Intent Engineering (Value Alignment over Time)",
        "body": "Problem: Prompt engineering tells agents what to do. Context engineering tells agents what to know. Neither tells agents what to want — so they drift from user values over time without detection.\n\nSolution: A three-layer system that encodes agent intent, extracts intent signals from user behavior, and detects drift before it becomes a problem.\n\nThree layers:\n\nIntent Manifest (memory/intent-manifest.json) — Machine-readable intent spec per agent: core purpose, hard constraints (with regex), operational goals with verification keywords, user signals extracted from history\nIntent Extractor (lib/intent-extractor.js) — Weekly pass over episode history extracting preference/value/correction signals; decays old signals (rate: 0.975/week), prunes below 0.30 strength\nDrift Detector (lib/intent-drift-detector.js) — 5-component score per episode: hard constraint violations (0.35), goal keyword coverage (0.25), structural drift via Jaccard (0.20), correction pressure (0.15), forbidden phrase hits (0.05)\n\nDrift levels:\n\nGREEN (< 0.29): Normal operation\nYELLOW (0.29–0.48): Inject reminder next turn\nORANGE (0.48–0.68): Log + prepare summary\nRED (≥ 0.68): Flag for user check-in (triggers auto-consult gate)\n\nIntent manifest schema:\n\n{\n  \"system\": {\n    \"core_purpose\": \"...\",\n    \"hard_constraints\": [{\"id\": \"...\", \"rule\": \"...\", \"regex\": \"...\"}],\n    \"operational_goals\": [{\"id\": \"...\", \"description\": \"...\", \"verification_keywords\": [], \"weight\": 0}]\n  },\n  \"agents\": {\n    \"AgentName\": {\n      \"inherits\": true,\n      \"overrides\": {\n        \"core_purpose\": \"...\",\n        \"operational_goals\": [],\n        \"user_signals\": []\n      },\n      \"version\": \"YYYY-MM-DD.001\"\n    }\n  },\n  \"diff_log\": []\n}\n\nKey properties:\n\nSystem intent + agent overrides use deep merge — agents inherit system constraints\nUser signals decay weekly — recent corrections weighted higher\nOperational goals are hand-authored for v1; intent compression auto-suggests additions in v2\nManifest is diffable over time (diff_log tracks changes)\nDrift score adjusts confidence estimator: score *= (1 - drift.score * 0.3)\n\nCold start: Works on day 1 with hand-authored goals. Signals accumulate automatically.\n\nSee: lib/intent-manager.js, lib/intent-extractor.js, lib/intent-drift-detector.js for reference implementation."
      },
      {
        "title": "5. Cost-Aware Spawning",
        "body": "Problem: Uncontrolled subagent spawning leads to budget overruns.\n\nSolution: Estimate cost before spawning, require approval for expensive operations.\n\nFramework:\n\n// Pre-spawn cost estimation\nconst estimate = estimateSubagentCost({\n  task_complexity: \"high\",       // simple/medium/high\n  expected_duration_min: 20,\n  model: \"sonnet\",               // haiku/sonnet/opus\n  research_required: true\n})\n\n// estimate = { min: $0.60, max: $1.20, confidence: 0.8 }\n\nif (estimate.max > 0.50) {\n  // Log to cost tracking\n  await logCostEstimate(\"task-label\", estimate)\n  \n  if (estimate.max > 2.00) {\n    // Require human approval\n    await requestApproval(estimate)\n  }\n}\n\n// Spawn subagent\nconst result = await spawnSubagent(...)\n\n// Track actual cost\nawait logActualCost(\"task-label\", result.cost)\n\nCost tiers:\n\nMicro (< $0.10): Simple lookups, fact-checking\nSmall ($0.10-0.50): Standard research, code review\nMedium ($0.50-2.00): Feature implementation, deep analysis\nLarge (> $2.00): Complex refactors, multi-phase projects\n\nOptimization strategies:\n\nUse haiku for simple tasks (3x cheaper than sonnet)\nParallelize independent operations\nCache research findings for reuse\nSkip optional phases (reviewer for low-risk changes)"
      },
      {
        "title": "Directory Structure",
        "body": "subagents/\n├── [specialist-name]/\n│   ├── SPECIALIST.md       # Agent definition and personality\n│   ├── knowledge-base/     # Reference materials\n│   └── research/           # Task outputs and findings\n└── _archived/              # Retired subagents\n\nskills/subagent-architecture/\n├── SKILL.md               # This file\n├── templates/\n│   ├── security-proxy.md\n│   ├── researcher-specialist.md\n│   ├── phased-implementation.md\n│   └── peer-review-specialist.md\n└── setup.sh               # Directory scaffolding script"
      },
      {
        "title": "Security Proxy",
        "body": "Use when:\n\nAccessing untrusted APIs or experimental services\nRisk score > 70 (high blast radius or irreversibility)\nData needs sanitization before main agent sees it\nCost estimate < $0.10 (lightweight proxy)\n\nSkip when:\n\nTrusted first-party APIs (your infrastructure)\nRead-only public data (Wikipedia, documentation)\nRisk score < 30 (low stakes)"
      },
      {
        "title": "Researcher Specialist",
        "body": "Use when:\n\nQuestion requires 10+ sources\nDomain expertise needed (not general knowledge)\nMultiple conflicting claims (need fact-checking)\nDeliverable is a decision (not just information)\nCost estimate > $0.20 (worth specialist analysis)\n\nSkip when:\n\nSimple factual lookup (single authoritative source)\nReal-time data (weather, stock prices)\nCost estimate < $0.10 (not worth overhead)"
      },
      {
        "title": "Phased Implementation",
        "body": "Use when:\n\nFeature touches 3+ files\nRequires integration with existing systems\nCost estimate > $1.00 (worth upfront design)\nFailure would be expensive (need rollback plan)\nLong-term maintainability matters\n\nSkip when:\n\nSimple script (1 file, no integration)\nWell-understood pattern (copying existing structure)\nPrototype/experiment (might throw away)\nCost estimate < $0.50 (faster to just build)"
      },
      {
        "title": "Peer Review",
        "body": "Use when:\n\nHigh stakes (security, legal, financial)\nDomain gap (no internal specialist)\nBias check (fresh perspective needed)\nComplex validation (multi-dimensional review)\n\nSkip when:\n\nLow stakes (documentation, styling)\nDomain expertise available internally\nFast turnaround required (peer may take hours/days)\nSensitive data (can't sanitize for external review)"
      },
      {
        "title": "Integration with Task Routing",
        "body": "Automatic pattern selection via task-routing skill:\n\n# config/routing-rules.yaml\npattern_routing:\n  security_proxy:\n    triggers:\n      - blast_radius > 70\n      - untrusted_api: true\n    max_cost: 0.10\n    \n  researcher_specialist:\n    triggers:\n      - task_type: research\n      - complexity > 50\n    min_sources: 3\n    \n  phased_implementation:\n    triggers:\n      - task_type: code_gen\n      - files_affected > 3\n      - cost_estimate > 1.00\n    phases: [architect, coder, reviewer]\n    \n  peer_review:\n    triggers:\n      - irreversibility > 80\n      - domain_gap: true\n    require_approval: true\n\nManual override:\n\n// Force specific pattern\nspawnSubagent({\n  pattern: \"security-proxy\",  // override routing decision\n  task: \"...\",\n  justification: \"Experimental API, prefer isolation\"\n})"
      },
      {
        "title": "Logging Requirements",
        "body": "All subagent spawns >$0.50 must be logged to notes/cost-tracking.md:\n\n## Subagent Cost Tracking\n\n| Date | Label | Pattern | Estimate | Actual | Delta | Notes |\n|------|-------|---------|----------|--------|-------|-------|\n| 2026-02-22 | architect-feature-x | phased-impl | $0.40 | $0.38 | -5% | Faster than expected |\n| 2026-02-22 | coder-feature-x | phased-impl | $0.70 | $0.85 | +21% | Complex refactor, used opus |\n| 2026-02-22 | researcher-market | researcher | $0.60 | $0.55 | -8% | Cached sources helped |"
      },
      {
        "title": "Accuracy Improvement",
        "body": "Track estimate vs actual to improve future predictions:\n\n// Calculate rolling accuracy\nconst accuracy = calculateAccuracy({\n  window_days: 30,\n  min_samples: 10\n})\n\n// accuracy = { mean_error: 12%, confidence: 0.85 }\n\n// Adjust future estimates\nconst adjusted_estimate = base_estimate * (1 + accuracy.mean_error)"
      },
      {
        "title": "Subagent Output Rubric",
        "body": "All specialist subagents should score 7+ on this rubric:\n\nDimensionPoor (1-3)Good (4-6)Excellent (7-10)SpecificityVague generalizationsSome concrete detailsPrecise, actionable specificsActionabilityNo clear next stepsSuggestions providedStep-by-step implementation planEvidenceUnsourced claimsSome citationsEvery claim sourced, validatedStructureStream-of-consciousnessBasic organizationScannable hierarchy, summariesCompletenessMissing key aspectsMost areas coveredComprehensive, gaps documentedHonestyHides limitationsMentions some trade-offsExplicit about unknowns, risksCost-awarenessNo budget considerationRough estimatesDetailed cost/benefit analysisIntegrationIgnores existing systemsBasic compatibilitySeamless integration plan"
      },
      {
        "title": "Self-Audit Checklist",
        "body": "Before finalizing subagent output:\n\nEvery claim has source (URL + date)\n Contradictions addressed (not ignored)\n Recommendations include trade-offs\n Cost estimate provided (time + money)\n Integration points documented\n Rollback strategy included\n Success criteria defined\n Known limitations listed"
      },
      {
        "title": "Permanent vs Ephemeral",
        "body": "Permanent subagents (skill-based):\n\nRecurring specialized tasks\nDeep domain expertise\nKnowledge accumulation over time\nExamples: DevOps, AuthorAgent, WuxiaWorldbuilder\n\nEphemeral subagents (one-off spawns):\n\nBounded research tasks\nSimple implementation work\nExperimental exploration\nPattern-based specialists (security-proxy, researcher)"
      },
      {
        "title": "Anti-Sycophant by Default",
        "body": "All subagents should:\n\nCall out weak evidence (no polite agreement)\nReject unrealistic requirements (push back on bad specs)\nHighlight risks honestly (no sugar-coating)\nAdmit knowledge gaps (no speculation to please)"
      },
      {
        "title": "Cost-Conscious Operation",
        "body": "Estimate before spawning (no surprise bills)\nChoose appropriate model (haiku vs sonnet vs opus)\nParallelize when possible (save time = save money)\nCache and reuse (research findings, architecture patterns)\nTerminate early if stuck (don't spin wheels for 30min)"
      },
      {
        "title": "Required",
        "body": "OpenClaw 2026.2.17+ (subagent spawning, cost tracking)"
      },
      {
        "title": "Optional but Recommended",
        "body": "task-routing skill (auto-pattern selection, risk scoring)\ncost-governor skill (budget enforcement, approval workflows)\ndrift-guard skill (behavioral validation, quality audits)"
      },
      {
        "title": "Integration Points",
        "body": "Task routing:\n\nProvides task classification (8 types)\nRisk scoring (5 dimensions, 0-100 scale)\nAuto-route to pattern (security-proxy, researcher, phased-impl)\n\nCost governor:\n\nPre-spawn approval for expensive operations\nBudget tracking per project/feature\nAlert on overruns\n\nDrift guard:\n\nBehavioral audit of subagent outputs\nPolicy violation detection\nQuality score trending"
      },
      {
        "title": "Framework Limitations & v2 Roadmap",
        "body": "Known gaps identified in peer review (Agent Smith, EasyClaw project)\n\nCurrent OpenClaw subagent framework has architectural limitations that affect all patterns in this skill. These are framework-level constraints, not skill-specific issues. Documenting for transparency and future improvement."
      },
      {
        "title": "1. Spawn Configuration Constraints",
        "body": "Smith's Question: \"How does core define sub-bot constraints before launch?\"\n\nCurrent State:\n\nSpawn parameters: task, personality, model, timeout, label\nBasic configuration only (what to do, how long)\nNo granular resource controls\n\nLimitations:\n\n❌ No memory limits per spawn (subagent can consume unlimited RAM)\n❌ No API call quotas (can make 1000s of web requests)\n❌ No disk space caps (can fill storage)\n❌ No per-spawn tool restrictions (tool policy is framework-level, applies to all subagents)\n\nCurrent Workaround:\n\nManual timeout enforcement (kill after N minutes)\nFramework-level tool policies (same restrictions for all subagents)\nPost-spawn monitoring (watch logs, intervene manually)\n\nv2 Design Considerations:\n\n// Proposed: Granular per-spawn constraints\nspawnSubagent({\n  label: \"researcher-x\",\n  task: \"Research topic\",\n  constraints: {\n    max_memory_mb: 512,           // Kill if exceeds RAM limit\n    max_api_calls: 50,            // Deny further requests after quota\n    max_disk_mb: 100,             // Block file writes beyond limit\n    max_cost_usd: 0.50,           // Auto-terminate if cost projection exceeds\n    tools_allowed: [\"web_search\", \"web_fetch\"],  // Per-spawn tool whitelist\n    tools_denied: [\"exec\", \"write\"],             // Explicit blacklist\n    rate_limits: {\n      web_search: { calls: 10, window_seconds: 60 }  // Max 10 searches/min\n    }\n  }\n})\n\nImpact on Patterns:\n\nSecurity proxy: Could enforce strict tool whitelist per proxy (currently manual)\nResearcher: Could cap API calls to prevent runaway research loops\nPhased impl: Could allocate different budgets per phase (architect: $0.40, coder: $1.00)"
      },
      {
        "title": "2. Skill Loading & Validation",
        "body": "Smith's Question: \"How are skills validated before execution?\"\n\nCurrent State:\n\nExternal skills (downloaded) → skill-vetter checks metadata, malicious patterns\nInternal skills (workspace) → Just execute (trusted by default)\nNo runtime sandboxing\n\nLimitations:\n\n❌ No code isolation for skills (skill code runs in main agent context)\n❌ No execution validation (skill can do anything agent can do)\n❌ No capability restrictions (skill inherits all agent tools)\n❌ Trust model: Binary (external = vet, internal = trust)\n\nCurrent Workaround:\n\nVet external skills manually before first use\nReview internal skill code (human audit)\nHope skills don't misbehave\n\nv2 Design Considerations:\n\n// Proposed: Skill capability manifest\n// skills/my-skill/SKILL.md\n---\ncapabilities_required:\n  - web_search      # Skill needs web access\n  - read:config/    # Can read config directory only\n  - write:output/   # Can write to output directory only\nisolation_level: sandbox  # Run in isolated context\nmax_execution_time: 30    # Kill after 30 seconds\n---\n\n// Runtime: Skill runs in sandbox with only declared capabilities\n// Attempts to use undeclared tools → blocked, logged, skill terminated\n\nImpact on Patterns:\n\nSecurity proxy: Skills could be sandboxed (currently rely on manual isolation)\nAll patterns: Skills loaded by subagents would inherit spawn constraints\nPeer review: External skills from peers could run safely (sandboxed)"
      },
      {
        "title": "3. Communication Bounds (Bidirectional Channel)",
        "body": "Smith's Question: \"What can sub-bots ask for from core?\"\n\nCurrent State:\n\nOne-way communication only: Core spawns → Subagent executes → Returns result\nSubagent cannot request clarification mid-task\nSubagent cannot request additional context during execution\nNo interactive mode\n\nLimitations:\n\n❌ Subagent stuck on ambiguous requirement → Must guess or fail\n❌ Subagent needs additional context → Can't ask, works with what it has\n❌ Subagent encounters unexpected scenario → No escalation path mid-task\n❌ Human approval needed mid-execution → Not possible (only pre/post spawn)\n\nCurrent Workaround:\n\nOver-specify context in initial spawn (bloated prompts)\nSubagent makes best guess (may be wrong)\nSubagent fails, core respawns with clarification (expensive)\n\nv2 Design Considerations:\n\n// Proposed: Request-response protocol during execution\n// Subagent code:\nconst clarification = await requestFromCore({\n  type: \"clarification\",\n  question: \"User said 'recent data' - how many days back?\",\n  options: [\"7 days\", \"30 days\", \"90 days\"],\n  timeout_seconds: 60  // If no response, default to first option\n})\n\n// Core receives request:\n// - Auto-approve safe requests (data lookup from memory)\n// - Escalate to human for decisions\n// - Return answer to subagent\n// - Subagent continues with clarification\n\n// Examples:\n// - \"Need API key for service X\" → Core provides from secure store\n// - \"Found conflicting data, which source to trust?\" → Human decides\n// - \"Task seems too expensive ($5 estimated), proceed?\" → Approval workflow\n\nImpact on Patterns:\n\nResearcher: Can ask \"should I dig deeper on this tangent?\" mid-research\nPhased impl: Coder can ask architect for design clarification during build\nSecurity proxy: Can request human approval if API returns unexpected data\nAll patterns: Reduces over-specification, enables adaptive execution"
      },
      {
        "title": "4. Termination Conditions (Resource-Based Kills)",
        "body": "Smith's Question: \"When does core kill a sub-bot?\"\n\nCurrent State:\n\nTime-based only: Timeout (specified in spawn) or task completion\nManual intervention (human stops runaway agent)\nNo automatic resource-based termination\n\nLimitations:\n\n❌ No memory limit kills (agent can OOM the host)\n❌ No cost threshold kills (can exceed budget before timeout)\n❌ No stuck detection (infinite loops run until timeout)\n❌ No output size limits (can generate gigabytes of text)\n\nCurrent Workaround:\n\nSet conservative timeouts (may kill productive work early)\nMonitor logs manually (reactive, not proactive)\nHope agent doesn't get stuck in expensive loop\n\nv2 Design Considerations:\n\n// Proposed: Multi-condition termination\nspawnSubagent({\n  label: \"researcher-x\",\n  task: \"Research topic\",\n  termination: {\n    // Existing\n    timeout_minutes: 20,\n    \n    // New: Resource limits\n    max_memory_mb: 512,          // Kill if RSS > 512MB\n    max_cost_usd: 1.00,          // Kill if projected cost > $1\n    max_output_tokens: 10000,    // Kill if response > 10k tokens\n    \n    // New: Behavioral detection\n    stuck_detection: {\n      enabled: true,\n      same_action_threshold: 5,  // Kill if repeats same tool call 5x\n      no_progress_minutes: 5     // Kill if no new output for 5min\n    },\n    \n    // New: External triggers\n    kill_signal: \"session:parent_terminated\"  // Kill if parent agent dies\n  }\n})\n\n// Termination reasons logged for analysis:\n// - \"timeout\" (existing)\n// - \"memory_exceeded\" (new)\n// - \"cost_exceeded\" (new)\n// - \"stuck_loop_detected\" (new)\n// - \"output_overflow\" (new)\n// - \"parent_died\" (new)\n\nImpact on Patterns:\n\nSecurity proxy: Strict resource limits (memory, cost) prevent runaway isolation breaches\nResearcher: Stuck detection prevents infinite search loops\nPhased impl: Cost limits per phase (architect can't blow whole budget)\nAll patterns: Better cost control, faster failure detection"
      },
      {
        "title": "5. Post-Mortem & Learning System",
        "body": "Smith's Question: \"How do you learn from sub-bot failures?\"\n\nCurrent State:\n\nSuccess-only logging: Add to AGENTS.md after successful spawns\nManual failure review (check logs, wonder what happened)\nNo systematic failure tracking\nNo pattern analysis\n\nLimitations:\n\n❌ No failure tracking (only successes logged to AGENTS.md)\n❌ No common failure pattern detection (same mistake repeated)\n❌ No cost vs value analysis (did expensive agent deliver value?)\n❌ No success rate per agent type (which specialists are reliable?)\n❌ No improvement feedback loop (failures don't inform future spawns)\n\nCurrent Workaround:\n\nHuman remembers failures (unreliable)\nManually check logs when something seems off\nAnecdotal learning (\"that researcher failed last time\")\n\nv2 Design Considerations:\n\n// Proposed: Systematic post-mortem database\n// Storage: memory/subagent-postmortems.jsonl\n\n{\n  \"spawn_id\": \"researcher-market-20260222-1430\",\n  \"label\": \"researcher-market\",\n  \"pattern\": \"researcher-specialist\",\n  \"outcome\": \"failure\",  // success | partial | failure\n  \"termination_reason\": \"cost_exceeded\",\n  \"stats\": {\n    \"duration_minutes\": 18,\n    \"cost_actual\": 1.25,\n    \"cost_estimate\": 0.60,\n    \"cost_accuracy\": -108%,  // Overran by 108%\n    \"tokens_used\": 45000,\n    \"api_calls\": 127\n  },\n  \"deliverable_quality\": null,  // Not rated (failed before completion)\n  \"failure_mode\": {\n    \"category\": \"runaway_research\",  // Taxonomy of failures\n    \"root_cause\": \"No stuck detection, research loop on tangent\",\n    \"human_notes\": \"Researched sub-topic for 15min, didn't return to main question\"\n  },\n  \"lessons\": [\n    \"Add stuck detection for researcher pattern\",\n    \"Improve cost estimation for multi-source research (was 2x off)\"\n  ]\n}\n\n// Analytics queries:\n// - Success rate by pattern: researcher-specialist = 73% (8/11 succeeded)\n// - Most common failure mode: cost_exceeded (40% of failures)\n// - Cost estimation accuracy: researcher pattern = -25% average (underestimates)\n// - ROI analysis: phased-impl pattern = $2.50 avg cost, 90% success, high value\n\nProposed Learning Loop:\n\n// Before spawning, check historical performance\nconst history = getPatternHistory(\"researcher-specialist\")\n\nif (history.success_rate < 0.7) {\n  console.warn(`⚠️ Pattern has 65% success rate (13/20). Common failure: ${history.top_failure_mode}`)\n  // Adjust: Add stuck detection, reduce timeout, use cheaper model\n}\n\nif (history.cost_accuracy < -0.3) {\n  console.warn(`⚠️ Pattern underestimates cost by 35% on average`)\n  // Adjust: Inflate estimate by 35%\n  const adjusted_estimate = base_estimate * 1.35\n}\n\n// After completion, log outcome\nlogPostMortem({\n  spawn_id,\n  outcome: \"success\",\n  quality_score: 8.5,  // From rubric\n  cost_actual: 0.58,\n  cost_estimate: 0.60,\n  lessons: [\"Worked well, multi-perspective pattern delivered balanced view\"]\n})\n\nImpact on Patterns:\n\nAll patterns: Historical success rates inform spawn decisions\nCost framework: Improve estimation accuracy (learn from past errors)\nQuality standards: Track which patterns consistently deliver high scores\nPattern evolution: Retire unreliable patterns, double down on proven ones"
      },
      {
        "title": "Summary: v2 Feature Matrix",
        "body": "Featurev1 (Current)v2 (Proposed)BenefitSpawn constraintsTimeout onlyMemory, cost, quota, tool whitelistResource safetySkill isolationTrust-basedSandboxed with capabilitiesSecurityCommunicationOne-wayBidirectional request/responseAdaptive executionTerminationTime-basedMulti-condition (resource, stuck, cost)Cost controlPost-mortemSuccess-onlyFull lifecycle tracking + analyticsContinuous learning\n\nCurrent mitigation strategies:\n\nManual monitoring (watch logs, intervene when needed)\nConservative estimates (over-specify context, pad timeouts)\nPattern discipline (follow templates strictly to avoid known failure modes)\nHuman-in-loop (approve expensive spawns, review failures manually)\n\nv2 would enable:\n\nAutonomous resource management (agents self-limit)\nHigher confidence spawning (better failure prediction)\nFaster iteration (learn from failures automatically)\nFine-grained security (per-spawn, per-skill isolation)\n\nNote for users:\nThese limitations affect all subagent patterns in this skill. The patterns documented here (security-proxy, researcher, phased-impl, peer-review) work within current framework constraints. v2 improvements would enhance safety and reliability, but are not required for effective use of these patterns today.\n\nTracking: OpenClaw framework issue (conceptual - not filed yet)\nCredit: Agent Smith (EasyClaw peer review, 2026-02-22)"
      },
      {
        "title": "Example 1: Security-First API Integration",
        "body": "Scenario: Integrate with untrusted social network API\n\n// Main agent receives request\nconst task = \"Fetch user's posts from SocialNetworkX API\"\n\n// Security proxy pattern\nconst proxy = await spawnSubagent({\n  label: \"proxy-socialnetworkx\",\n  pattern: \"security-proxy\",\n  task: \"Query SocialNetworkX API for user posts, return sanitized JSON\",\n  context: {\n    query: \"user:$USERNAME, limit:10\"\n    // NO API keys, NO workspace paths\n  },\n  tools_allowed: [\"exec:curl\"],\n  timeout_minutes: 5,\n  auto_terminate: true\n})\n\n// proxy returns: [{username, timestamp, content}] - sanitized\n// Main agent: Safe to process, no raw API exposure"
      },
      {
        "title": "Example 2: Multi-Perspective Research",
        "body": "Scenario: Evaluate whether to adopt new framework\n\n// Spawn 3 researchers with different biases\nconst researchers = await Promise.all([\n  spawnSubagent({\n    label: \"researcher-optimist\",\n    pattern: \"researcher-specialist\",\n    task: \"Research FrameworkX benefits, best-case adoption scenario\",\n    personality: \"Optimistic, highlights opportunities\"\n  }),\n  \n  spawnSubagent({\n    label: \"researcher-pessimist\",\n    pattern: \"researcher-specialist\",\n    task: \"Research FrameworkX risks, failure modes, known issues\",\n    personality: \"Skeptical, risk-focused\"\n  }),\n  \n  spawnSubagent({\n    label: \"researcher-pragmatist\",\n    pattern: \"researcher-specialist\",\n    task: \"Research FrameworkX current state, real-world adoption data\",\n    personality: \"Pragmatic, data-driven\"\n  })\n])\n\n// Synthesize: Balanced view with decision criteria\nconst decision = synthesizeResearch(researchers.map(r => r.findings))"
      },
      {
        "title": "Example 3: Phased Feature Development",
        "body": "Scenario: Build complex new skill\n\n// Phase 1: Architecture\nconst plan = await spawnSubagent({\n  label: \"architect-skill-x\",\n  pattern: \"phased-implementation\",\n  phase: \"architect\",\n  task: \"Design implementation plan for skill-x with [requirements]\",\n  model: \"sonnet\"\n})\n\n// Review plan, get approval if expensive\nif (plan.cost_estimate > 2.00) {\n  await humanApproval(plan)\n}\n\n// Phase 2: Implementation\nconst implementation = await spawnSubagent({\n  label: \"coder-skill-x\",\n  pattern: \"phased-implementation\",\n  phase: \"coder\",\n  task: `Implement skill-x per plan: ${plan.path}`,\n  model: plan.recommended_model\n})\n\n// Phase 3: Review (conditional on risk)\nif (implementation.risk_level === \"high\") {\n  const review = await spawnSubagent({\n    label: \"reviewer-skill-x\",\n    pattern: \"phased-implementation\",\n    phase: \"reviewer\",\n    task: `Review implementation: ${implementation.path}`,\n    model: \"sonnet\"\n  })\n  \n  if (!review.approved) {\n    // Iterate or escalate\n  }\n}"
      },
      {
        "title": "Example 4: Peer Validation",
        "body": "Scenario: Security audit for payment processing\n\n// Prepare sanitized review package\nconst package = sanitizeForReview({\n  artifact: readFile(\"src/payment-processor.js\"),\n  remove: [\"API_KEYS\", \"USER_DATA\", \"INTERNAL_URLS\"]\n})\n\n// Request peer review via Discord\nawait message({\n  action: \"send\",\n  target: \"smith-security-bot\",\n  message: `Security review request:\\n${JSON.stringify(package)}\\n\\nFocus: Payment processing, SQL injection, input validation`\n})\n\n// Wait for peer response (auto-announces when received)\n// Process feedback, fix critical issues, optionally re-submit"
      },
      {
        "title": "Problem: Subagent exceeds cost estimate",
        "body": "Diagnosis:\n\nCheck actual task complexity vs estimate\nReview subagent logs for loops or retries\nVerify model choice (opus when sonnet would suffice?)\n\nSolutions:\n\nImprove cost estimation (track actuals, adjust formula)\nAdd cost cap to spawn config (hard limit)\nUse cheaper model for exploratory phase"
      },
      {
        "title": "Problem: Security proxy leaks sensitive data",
        "body": "Diagnosis:\n\nAudit spawn context (did it include workspace paths?)\nCheck tool restrictions (was file access allowed?)\nReview output sanitization (did schema validation fail?)\n\nSolutions:\n\nUpdate security-proxy template checklist\nAdd automated context sanitization pre-spawn\nImplement output schema validation (reject non-conforming data)"
      },
      {
        "title": "Problem: Phased implementation phases conflict",
        "body": "Diagnosis:\n\nArchitect plan unrealistic (coder can't implement)\nCoder deviated from plan (reviewer rejects)\nReviewer too strict (perfect is enemy of good)\n\nSolutions:\n\nArchitect: Include feasibility check (can this be built?)\nCoder: Flag deviations early (request plan amendment)\nReviewer: Focus on critical issues (don't block on style)"
      },
      {
        "title": "Problem: Peer review unavailable or slow",
        "body": "Diagnosis:\n\nPeer bot offline or rate-limited\nNo trusted peers for required domain\nReview package too large or unclear\n\nSolutions:\n\nMaintain backup peer list (2+ per domain)\nFall back to internal review (DevOps, CoderAgent)\nSimplify review package (focus on specific question)"
      },
      {
        "title": "Framework Limitations & v2 Roadmap",
        "body": "Known gaps identified in peer review (Agent Smith, EasyClaw - 2026-02-22)\n\nThese limitations represent areas where the current OpenClaw subagent framework needs evolution. Documented here for transparency and future development."
      },
      {
        "title": "1. Spawn Configuration - Per-Subagent Constraints",
        "body": "Current state:\n\nSpawn parameters: task description, personality, model, timeout, label\nConstraints are framework-level (all subagents share same tool policy)\n\nMissing:\n\nMemory limits per spawn (prevent individual subagent memory leaks)\nAPI call quotas per spawn (prevent runaway costs)\nDisk space caps per spawn (prevent storage exhaustion)\nPer-spawn tool restrictions (fine-grained beyond framework allowlist)\n\nv2 Design Consideration:\n\nsessions_spawn({\n  task: \"...\",\n  constraints: {\n    maxMemoryMB: 512,\n    maxAPICalls: 100,\n    maxDiskMB: 50,\n    tools: {\n      allowed: ['read', 'write'],\n      forbidden: ['exec', 'message'],\n      restrictions: {\n        read: { paths: ['/workspace/restricted/'] },\n        write: { paths: ['/workspace/output/'], maxFileSize: '10MB' }\n      }\n    }\n  }\n})\n\nImpact: Without per-spawn constraints, one rogue subagent can exhaust resources for all others."
      },
      {
        "title": "2. Skill Loading - Runtime Validation",
        "body": "Current state:\n\nExternal skills: Pre-execution vetting via skill-vetter (red flag detection)\nInternal skills: Trusted by default, no runtime sandboxing\n\nMissing:\n\nRuntime sandboxing for skill-embedded code (JavaScript, shell scripts)\nExecution validation (verify skill behaves as documented)\nResource monitoring during skill execution\n\nv2 Design Consideration:\n\n// Sandbox wrapper for skill execution\nrunSkill('ai-writing-humanizer', {\n  sandbox: {\n    networkAccess: false,  // Block network except allowlisted domains\n    filesystemAccess: 'readonly',  // Read-only except explicit write paths\n    cpuLimit: '50%',  // Prevent CPU monopolization\n    timeout: 30000  // 30s max execution\n  },\n  validate: {\n    outputSchema: { ... },  // Verify output format\n    sideEffects: ['writes to /workspace/humanizer/'],  // Allowed side effects\n    noExternalCalls: true  // Flag if skill attempts network/exec\n  }\n})\n\nImpact: Without runtime sandboxing, malicious or buggy skills can compromise core agent."
      },
      {
        "title": "3. Communication Bounds - Bidirectional Channels",
        "body": "Current state:\n\nOne-way communication only: Core spawns → Subagent executes → Result returns\nSubagents cannot ask clarifying questions during execution\n\nMissing:\n\nMid-task clarification requests (\"Should I proceed with risky operation?\")\nProgress updates for long-running tasks (>5min execution)\nDynamic priority adjustment (core can signal \"abort\" or \"expedite\")\n\nv2 Design Consideration:\n\n// Subagent perspective\nasync function executeTask(taskDescription) {\n  const plan = generatePlan(taskDescription);\n  \n  if (plan.hasHighRiskOperation()) {\n    const approval = await askCore({\n      question: \"Plan includes deleting 100 files. Proceed?\",\n      options: [\"yes\", \"no\", \"show list first\"],\n      timeout: 60000  // 1 min to respond\n    });\n    \n    if (approval !== \"yes\") return { status: \"aborted\", reason: \"user declined\" };\n  }\n  \n  // Continue with approved plan...\n}\n\nImpact: Without bidirectional channels, subagents must either be overly cautious (ask for everything upfront) or overly bold (proceed with assumptions)."
      },
      {
        "title": "4. Termination Conditions - Resource-Based Kills",
        "body": "Current state:\n\nTimeout-based only: Subagent runs until task completes or timeout expires\nNo resource-based termination\n\nMissing:\n\nMemory limit kills (terminate if subagent exceeds RAM quota)\nCost threshold kills (terminate if API calls exceed budget)\nStuck detection (infinite loops, deadlocks, recursive spawning)\nRunaway output prevention (subagent generating gigabytes of logs)\n\nv2 Design Consideration:\n\nsessions_spawn({\n  task: \"...\",\n  limits: {\n    timeout: 600000,  // 10 min max\n    maxMemory: 512,  // MB\n    maxCost: 2.00,  // USD\n    maxOutputSize: 10,  // MB\n    maxToolCalls: 100,  // Total tool invocations\n    stuckDetection: {\n      enabled: true,\n      sameToolRepeat: 5,  // Flag if same tool called 5+ times consecutively\n      noProgressTimeout: 120000  // Flag if no output/tool calls for 2 min\n    }\n  },\n  onLimitExceeded: (limit, value) => {\n    logFailure({ subagent, limit, value });\n    return { action: 'terminate', notify: 'core' };\n  }\n})\n\nImpact: Without resource-based kills, subagents can waste budget, exhaust memory, or spin indefinitely."
      },
      {
        "title": "5. Post-Mortem - Learning from Failures",
        "body": "Update (v2.1 → v2.2): A lightweight v0 of post-mortem logging is now implementable without framework changes. The crash report schema defined in templates/proxy-recovery.md gives you structured failure data per-spawn. Core reads crash reports and routes them. This covers the \"what failed and why\" dimension immediately.\n\nWhat still requires framework work (see v2 roadmap below): automated outcome analytics, pattern detection across spawns, success rate tracking. But single-spawn forensics — you can ship that now.\n\nCurrent state:\n\nSuccess logging only: AGENTS.md records completed subagents\nNo failure tracking or pattern analysis\nPartial exception: Security proxies using proxy-recovery.md pattern get crash reports per spawn\n\nMissing:\n\nFailure logging (why did subagent fail?)\nCommon failure patterns (which tasks/agents fail most?)\nCost vs value analysis (was the subagent worth the spend?)\nSuccess rate tracking per agent type (CoderAgent: 85%, ResearchAgent: 92%, etc.)\nImprovement loop (adjust spawn parameters based on historical outcomes)\n\nv2 Design Consideration:\n\n// Automatic post-mortem on subagent completion/failure\nfunction logSubagentOutcome(subagent, result) {\n  const record = {\n    timestamp: Date.now(),\n    label: subagent.label,\n    task: subagent.task,\n    model: subagent.model,\n    status: result.status,  // 'done', 'timeout', 'error', 'killed'\n    runtime: result.runtime,\n    cost: result.totalCost,\n    tokensUsed: result.totalTokens,\n    outcome: result.status === 'done' ? 'success' : 'failure',\n    failureReason: result.error || result.killReason || null,\n    valueProvided: result.status === 'done' ? assessValue(result) : null\n  };\n  \n  appendToLog('memory/subagent-outcomes.jsonl', record);\n  updateSuccessRates(subagent.label, record.outcome);\n  \n  if (record.outcome === 'failure') {\n    analyzeFailurePattern(record);  // Flag common failure modes\n  }\n}\n\n// Quarterly review: Which subagent types are most cost-effective?\nfunction analyzeSubagentROI() {\n  const outcomes = readLog('memory/subagent-outcomes.jsonl');\n  const byType = groupBy(outcomes, o => extractType(o.label));  // CoderAgent, ResearchAgent, etc.\n  \n  for (const [type, records] of Object.entries(byType)) {\n    const successRate = records.filter(r => r.outcome === 'success').length / records.length;\n    const avgCost = mean(records.map(r => r.cost));\n    const avgValue = mean(records.filter(r => r.valueProvided).map(r => r.valueProvided));\n    \n    console.log(`${type}: ${(successRate*100).toFixed(1)}% success, $${avgCost.toFixed(2)} avg cost, ${avgValue.toFixed(1)} avg value`);\n  }\n}\n\nImpact: Without post-mortem analysis, you repeat failures, can't optimize spawn strategy, and waste budget on low-value subagents.\n\nConclusion:\n\nThese 5 limitations don't block current usage but represent friction points as subagent usage scales. Prioritize based on pain:\n\nHigh priority: #4 (resource-based kills) - prevents runaway costs\nMedium priority: #5 (post-mortem) - enables data-driven optimization\nMedium priority: #1 (per-spawn constraints) - prevents resource exhaustion\nLow priority: #3 (bidirectional channels) - nice to have for complex tasks\nLow priority: #2 (skill sandboxing) - current vetting sufficient for trusted sources\n\nFeedback welcome: If you've hit these limitations in production, share workarounds and pain points via OpenClaw community channels."
      },
      {
        "title": "Contributing",
        "body": "This skill improves through real-world usage. Please contribute:\n\nPattern refinements:\n\nFound edge case not covered? Update template\nBetter cost estimation formula? Share it\nNew integration point? Document it\n\nNew patterns:\n\nDiscovered novel orchestration? Add template\nHybrid approach works better? Explain why\nOptimization technique? Include example\n\nTrack record:\n\nLog your subagent spawns (cost, accuracy, outcomes)\nShare what worked (and what didn't)\nUpdate rubrics based on production learnings"
      },
      {
        "title": "License",
        "body": "MIT - Use freely, modify as needed, share improvements"
      },
      {
        "title": "v2.3.0 (2026-02-24)",
        "body": "Added: Pattern 6 — Research Team (parallel multi-lens analysis with Critic/Implementer/Synthesizer lenses + UnifierAgent)\nAdded: Pattern 7 — External Model Consultation (ExternalConsultAgent subagent, session persistence, auto-consult gate)\nAdded: Pattern 8 — Intent Engineering Layer (manifest schema, intent extraction, drift detection, 5-component drift score)\nUpdated: Version bump to 2.3.0"
      },
      {
        "title": "v2.2.0 (2026-02-23)",
        "body": "Added: Core Concept #5 — Identity Continuity (ephemeral process vs persistent external identity, token handoff pattern, consistency via core-injected recent context, anti-pattern warning)\nAdded: Trust tier ≠ security bypass principle to Peer Collaboration section (Stage 4 injection detection always runs regardless of trust level)\nAdded: templates/proxy-recovery.md — crash/recovery pattern (pre-death checklist, crash report schema, core response decision tree, quarantine mode, human notification, re-spawn decision guide)\nUpdated: templates/security-proxy.md — full rewrite integrating: 6-stage inbound validation pipeline, Stage 6b outbound semantic leak filter, self-imposed rate limiter + circuit breaker, crash/recovery checklist, identity continuity section, MoltbookProxy as real-world reference implementation\nUpdated: Post-mortem section — notes crash reports from proxy-recovery.md are a working v0 of per-spawn forensics (no framework changes required)"
      },
      {
        "title": "v2.0.1 (2026-02-22)",
        "body": "Added: Framework Limitations & v2 Roadmap section\nAdded: 5 critical design gaps identified by Agent Smith (EasyClaw peer review)\nAdded: v2 design considerations with code examples for each limitation\nAdded: Priority ranking for addressing limitations"
      },
      {
        "title": "v2.0.0 (2026-02-22)",
        "body": "Breaking: Restructured to focus on advanced patterns\nAdded: Security proxy pattern and template\nAdded: Researcher specialist pattern and template\nAdded: Phased implementation pattern and template\nAdded: Peer review pattern and template\nAdded: Cost-aware spawning framework\nAdded: Integration with task-routing, cost-governor, drift-guard\nUpdated: Philosophy section (anti-sycophant, cost-conscious)\nUpdated: Examples (real-world scenarios, not toy problems)\nRemoved: Basic scaffolding (moved to setup.sh)"
      },
      {
        "title": "v1.0.0 (2026-02-21)",
        "body": "Initial release with basic subagent structure\nSPECIALIST.md template\nTask routing integration basics"
      },
      {
        "title": "Support",
        "body": "Documentation: Read templates in templates/ directory\nExamples: See AGENTS.md for real-world subagent library\nIssues: Check EVOLOG.md for known limitations\nCommunity: Share patterns via OpenClaw Discord (when federated network launches)"
      }
    ],
    "body": "Advanced Subagent Architecture\n\nPatterns and templates for building robust multi-agent systems with OpenClaw.\n\nWhy This Skill Is Complex (Read Before Installing)\n\nThis is one of the most feature-dense skills in the ClawHub registry. Security scanners will flag it — not because it's malicious, but because it does a lot. Here's exactly what's in it and why:\n\nScope:\n\n4 production-ready JS libraries (~1,200 lines total across spawn-security-proxy, spawn-researcher, cost-estimator, quality-scorer)\n4 spawn templates covering security proxy, researcher, phased implementation, and peer review patterns\nA setup.sh that creates local directory scaffolding (no network calls, no package installs)\nInline attack vector documentation in spawn-security-proxy.js (test fixtures, not live payloads)\n\nWhy the libs exist: These aren't glue code — they implement real patterns: output sanitization with canary tokens, multi-source research validation, cost projection with approval gates, and subagent output scoring. The complexity is the point; simpler skills don't solve these problems.\n\nWhy scanners flag it:\n\nJS code in a skill package looks like an execution surface — it is, but only when you explicitly require() it\nThe security proxy documents injection attack patterns as test examples — pattern matchers don't distinguish documentation from intent\nExternal integration references (Discord, peer agents) appear in templates — they're opt-in workflows, not auto-configured connections\n\nComplexity is not a red flag here. It's the product.\n\n⚠️ Security Transparency Notice\n\nWhat this skill contains:\n\nlib/ — Reference JS libraries (spawn helpers, cost estimator, quality scorer). These are not auto-executed. Copy them to your workspace lib/ directory to use them.\ntemplates/ — Markdown spawn templates for common patterns.\nsetup.sh — Creates local directory scaffolding only. Makes no network calls, installs no packages.\n\nExternal integrations: All optional, none auto-configured.\n\nDiscord webhooks — Only used in the federated peer-review pattern. You supply your own token manually. This skill does not store or transmit credentials.\nPeer agent endpoints — Federated review is opt-in. No external calls unless you explicitly configure peer URLs.\n\nCredential requirements: None. No API keys, tokens, or env vars are required or auto-read by this skill.\n\nOverview\n\nThis skill provides battle-tested patterns for:\n\nSecurity isolation - Contain high-risk operations with minimal context exposure\nSpecialized research - Multi-perspective data gathering with domain experts\nPhased implementation - Architecture → Development → Review pipelines\nPeer collaboration - External validation via federated agent network (opt-in)\nCost-aware spawning - Budget estimation and optimization strategies\nWhat's New in v2.0\n\nAdvanced Patterns:\n\nSecurity proxy pattern (blast shield isolation)\nResearcher specialist pattern (multi-source synthesis)\nPhased implementation pipeline (architect → coder → reviewer)\nPeer review integration (bot-to-bot validation)\nCost estimation framework (required for spawns >$0.50)\n\nTemplates:\n\ntemplates/security-proxy.md - Isolate untrusted service access\ntemplates/researcher-specialist.md - Domain-specific research agents\ntemplates/phased-implementation.md - Multi-phase feature development\ntemplates/peer-review-specialist.md - External peer validation\n\nIntegration:\n\ntask-routing skill (auto-classification and routing)\ncost-governor (budget enforcement)\ndrift-guard (behavioral validation)\nQuick Start\n1. Install Skill Structure\ncd $OPENCLAW_WORKSPACE/skills/subagent-architecture\nbash setup.sh  # Creates directories and scaffolding\n\n2. Choose Your Pattern\n\nFor high-risk operations:\n\n# Read security-proxy template\ncat templates/security-proxy.md\n\n# Spawn isolated proxy for untrusted API\n# (see template for full example)\n\n\nFor research tasks:\n\n# Read researcher-specialist template\ncat templates/researcher-specialist.md\n\n# Spawn domain expert for deep analysis\n# (see template for multi-perspective pattern)\n\n\nFor complex features:\n\n# Read phased-implementation template\ncat templates/phased-implementation.md\n\n# Launch architect → coder → reviewer pipeline\n# (see template for orchestration example)\n\n\nFor external validation:\n\n# Read peer-review-specialist template\ncat templates/peer-review-specialist.md\n\n# Request peer agent review via Discord/API\n# (see template for federated trust protocol)\n\nAgent Registry: AGENTS.md is Optional\n\nCritical clarification for new users: AGENTS.md is a human-readable reference document, not a configuration file. The spawning system does not read it.\n\nHow spawning actually works\n\nsessions_spawn is a tool call — it takes parameters you provide at the moment of the call. It does not read from any file, config, or registry. You can spawn subagents on a completely fresh OpenClaw install with zero files in your workspace.\n\n// This is ALL the system needs — no AGENTS.md required\nsessions_spawn({\n  label: \"my-researcher\",\n  task: \"Research the topic X\",\n  model: \"sonnet\"\n})\n\nWhat AGENTS.md actually is\n\nAGENTS.md (and domain files like AGENTS_WRITING.md, AGENTS_INFRA.md) are memory aids for you (the agent). They store:\n\nAgent names and personality snippets you've found effective\nCost history and last-used dates\nNotes on what tasks each agent type handles well\n\nYou can split AGENTS.md into 50 domain files or delete it entirely — spawning still works. The split is purely for your readability; it has zero functional effect.\n\nWhat \"read AGENTS_WRITING.md before spawning\" means\n\nWhen documentation says \"read AGENTS_WRITING.md before spawning AuthorAgent\", it means:\n\nRead it so you know what personality, model, and task description to use when building the spawn call.\n\nThe system does not read it. You read it, extract the configuration, then make the spawn call with those parameters.\n\nPractical implication for fresh installs\n\nWhen you install this skill on a fresh OpenClaw setup:\n\nYou do not need to create AGENTS.md to use subagents\nStart spawning immediately with inline parameters\nCreate AGENTS.md when you have enough recurring agent configurations that you want a reference doc\nSplit into domain files when AGENTS.md grows beyond ~10 entries and becomes hard to scan\n\nSee templates/agents-registry-template.md for a minimal starter template when you're ready.\n\nReference Implementations\n\nNEW in v2.1: Production-ready code libraries for all patterns.\n\nAvailable Libraries\n\nlib/spawn-security-proxy.js - Security isolation framework\n\nspawnSecurityProxy(config) - Spawn isolated proxy with sanitization\ndeepSanitize(data) - Remove sensitive data (API keys, paths, emails)\nvalidateSchema(data, schema) - JSON schema validation\ncreateDefaultSchema(type) - Common output schemas (list, single, status)\n\nlib/spawn-researcher.js - Multi-perspective research framework\n\nspawnResearcher(config) - Spawn domain expert researcher\nspawnMultiPerspective(config) - Multi-perspective research (optimist/pessimist/pragmatist)\nassessSourceCredibility(source) - Score source trustworthiness (0-100)\nPre-configured trusted/blog/vendor domain lists\n\nlib/cost-estimator.js - Cost estimation and tracking\n\nestimateSubagentCost(params) - Pre-spawn cost estimation with confidence intervals\nlogSubagentCost(label, estimate, actual) - Log for accuracy tracking\nrecalibrateEstimator() - Monthly accuracy improvement\ngetPatternHistory(pattern) - Historical performance analysis\ngetCostTier(cost) - Classify as micro/small/medium/large\n\nlib/quality-scorer.js - Output quality assessment\n\nscoreSubagentOutput(output, rubric) - 8-dimension quality scoring\ncreateScoringTemplate() - Manual review template\nselfAuditChecklist(output) - Pre-delivery validation\nRubric dimensions: specificity, actionability, evidence, structure, completeness, clarity, relevance, efficiency\nUsage Examples\n\nAll libraries include complete working examples:\n\n# Security proxy examples\nnode examples/security-proxy-usage.js\n\n# Researcher examples\nnode examples/researcher-usage.js\n\n# Cost estimation examples\nnode examples/cost-estimation-demo.js\n\nQuick Integration\n\nPath Resolution Note: The require() paths in examples below assume you call them from your workspace root. If you call from within the skill directory (e.g., inside an example script), use __dirname instead. For portable code that works regardless of cwd:\n\n// Path resolution — works regardless of workspace structure\nconst path = require('path');\nconst SKILL_DIR = __dirname; // when called from within skill directory\n// OR if calling from workspace root:\nconst SKILL_DIR = path.join(process.env.OPENCLAW_WORKSPACE || process.cwd(), 'skills', 'subagent-architecture');\nconst { spawnSecurityProxy } = require(path.join(SKILL_DIR, 'lib', 'spawn-security-proxy'));\n\n// Example: Spawn security proxy for untrusted API\nconst { spawnSecurityProxy } = require('./skills/subagent-architecture/lib/spawn-security-proxy');\n\nconst result = await spawnSecurityProxy({\n  service: 'weather-api',\n  task: 'Get current weather for New York',\n  query: { city: 'New York', units: 'metric' },\n  output_schema: {\n    type: 'object',\n    properties: {\n      temperature: { type: 'number' },\n      conditions: { type: 'string' }\n    }\n  },\n  spawn_fn: async (config) => {\n    // Your actual sessions_spawn call here\n    return await sessions_spawn(config);\n  }\n});\n\n// Example: Estimate cost before spawning\nconst { estimateSubagentCost, logSubagentCost } = require('./skills/subagent-architecture/lib/cost-estimator');\n\nconst estimate = estimateSubagentCost({\n  task_complexity: 'medium',\n  expected_duration_min: 15,\n  model: 'sonnet',\n  research_required: true\n});\n\nconsole.log(`Estimated: $${estimate.expected} (range: $${estimate.min}-$${estimate.max})`);\n\n// After spawn completes\nlogSubagentCost('researcher-task', estimate, actual_cost);\n\n// Example: Score output quality\nconst { scoreSubagentOutput } = require('./skills/subagent-architecture/lib/quality-scorer');\n\nconst score = scoreSubagentOutput(subagent_output, null, { auto_score: true });\nconsole.log(`Quality: ${score.overall_score}/10 (${score.pass ? 'PASS' : 'FAIL'})`);\n\n\nSee examples/ directory for complete working demonstrations.\n\nIntegration Status\nDependencies Overview\n\nRequired:\n\n✅ OpenClaw 2026.2.17+ (sessions_spawn API)\n✅ Node.js 18+ (for library code)\n\nOptional Skills:\n\ntask-routing\n\nStatus: ✅ Available (workspace skills/task-routing/)\nVersion: 1.0.0+\nIntegration: Auto-classification of incoming tasks, risk scoring, pattern routing\nTested: Yes (production since 2026-02-15)\nDocumentation: See skills/task-routing/SKILL.md\n\ncost-governor\n\nStatus: ⚠️ Planned (design phase)\nVersion: N/A\nIntegration: Budget enforcement, approval workflows\nTested: No\nWorkaround: Use lib/cost-estimator.js for manual gating\n\ndrift-guard\n\nStatus: ⚠️ Planned (design phase)\nVersion: N/A\nIntegration: Behavioral audits, policy violation detection\nTested: No\nWorkaround: Manual output review against rubric\nLibrary Dependencies\n\nAll reference implementations are dependency-free (pure Node.js):\n\nNo npm packages required\nNo external API calls\nFilesystem access for logging only (optional)\nMock-friendly for testing\nIntegration Points\n\nAutomatic (via task-routing skill):\n\nTask classification → Pattern recommendation\nRisk scoring → Security proxy auto-spawn for high-risk tasks\nCost estimation → Pre-spawn budget check\n\nManual (call libraries directly):\n\nrequire() any lib file and call functions\nSee examples/ for usage patterns\nSpawn functions accept custom spawn_fn for integration with your sessions_spawn\nMigration Path\n\nFrom v2.0 to v2.1:\n\nNo breaking changes - templates still work as-is\nNew: Import library functions for programmatic use\nNew: Run examples to see working code\nOptional: Integrate with task-routing for automation\n\nFuture (v3.0 with framework improvements):\n\nPer-spawn resource limits (when OpenClaw supports)\nBidirectional communication (when OpenClaw supports)\nPost-mortem tracking (when OpenClaw supports)\nSee \"Framework Limitations & v2 Roadmap\" section for details\nCore Concepts\n1. Security Isolation (Blast Shield Philosophy)\n\nProblem: Subagents with full workspace access can leak sensitive data to untrusted APIs.\n\nSolution: Security proxies receive minimal context, restricted tools, sanitized output.\n\nExample:\n\nMain Agent (full context)\n    │\n    └─ SecurityProxy (minimal context)\n            ├─ Query: \"Get weather for New York\"\n            ├─ Tools: exec (curl only)\n            ├─ Output: Sanitized JSON (no API metadata)\n            └─ Auto-terminate after single task\n\n\nKey principles:\n\nMinimal context (only task parameters, no workspace paths)\nTool restrictions (whitelist, not blacklist)\nOutput sanitization (validate schema before returning)\nEphemeral execution (no persistent state)\nCost cap (< $0.10 per proxy spawn)\n\nSee: templates/security-proxy.md\n\n2. Researcher Specialists (Multi-Source Synthesis)\n\nProblem: Generic web search returns surface-level results without domain expertise.\n\nSolution: Specialized researchers with domain bias, multi-source validation, actionable synthesis.\n\nExample:\n\nQuestion: \"Should we adopt technology X?\"\n\n├─ OptimistResearcher (best-case analysis)\n├─ PessimistResearcher (risk assessment)\n└─ PragmatistResearcher (current reality)\n\nMain Agent synthesizes: Balanced decision tree\n\n\nKey principles:\n\nDomain focus (single expertise area per researcher)\nMulti-source validation (3+ independent sources per claim)\nSkeptical by default (anti-hype, calls out marketing)\nStructured output (executive summary + recommendations)\nEvidence-backed (no speculation, contradictions addressed)\n\nSee: templates/researcher-specialist.md\n\n3. Phased Implementation (Separation of Concerns)\n\nProblem: Single-agent implementation mixes design, coding, and validation → tech debt.\n\nSolution: Separate architect (design), coder (build), reviewer (validate) phases.\n\nExample:\n\nFeature Request: \"Add memory consolidation skill\"\n\nPhase 1: SystemArchitect (15min, $0.40)\n    └─ Delivers: IMPLEMENTATION_PLAN.md\n\nPhase 2: CoderAgent (25min, $0.70)\n    └─ Delivers: Working code + tests\n\nPhase 3: ReviewerAgent (10min, $0.30)\n    └─ Delivers: REVIEW_REPORT.md (approval/rejection)\n\nTotal: 50min, $1.40, high-quality feature\n\n\nKey principles:\n\nArchitects optimize long-term (not quick hacks)\nCoders focus on working code (test after each step)\nReviewers provide fresh perspective (catch integration issues)\nIncremental delivery (small PRs, easy rollback)\nCost-aware (skip phases for simple features)\n\nSee: templates/phased-implementation.md\n\n4. Peer Collaboration (Federated Trust)\n\nProblem: Your agent may have blindspots or lack domain expertise.\n\nSolution: Request external validation from trusted peer agents.\n\nExample:\n\nYour Agent\n    │\n    ├─ Prepares sanitized review package\n    │\n    ├─ Contacts Smith's SecurityBot (via Discord/API)\n    │       └─ Smith's bot spawns SecurityReviewer\n    │               └─ Returns: Structured findings\n    │\n    └─ Integrates feedback (fix critical issues)\n\n\nKey principles:\n\nTrust earned (track peer accuracy over time)\nData sanitization (remove secrets, personal data)\nStructured feedback (severity, category, recommendations)\nReciprocal reviews (offer your expertise to peers)\nCost-effective (free peer reviews vs paid audits)\n\n⚠️ Trust tier ≠ security bypass: If you implement tiered trust (Acquaintance → Friend → Ally or equivalent), \"higher trust\" means relaxed engagement policy — not skipping security validation. Content from even the most trusted peer still goes through injection detection (Stage 4). An ally account can be compromised; the injection scanner is the last defense that can't be socially engineered. \"Light validation\" always means reduced schema scrutiny, never reduced security scanning.\n\nSee: templates/peer-review-specialist.md\n\n5. Identity Continuity (Ephemeral Process, Persistent External Identity)\n\nProblem: Security proxies are ephemeral (spawn-per-task, terminate). But some external services require a persistent identity — a social network account, an API user, a recurring agent persona. These are in tension.\n\nSolution: Separate the process lifecycle from the external identity. The process dies every spawn. The external identity persists through workspace state + core-managed credentials.\n\nExternal service sees: CedarProxy (consistent identity across all spawns)\n\nUnder the hood:\n  Spawn 1: [process starts] → reads state files → executes → [process dies]\n  Spawn 2: [new process] → reads same state files → continues → [dies]\n  Spawn 3: [new process] → reads same state files → continues → [dies]\n\nState that persists:   /proxy-workspace/ files (logs, relationships, posts)\nState that doesn't:    in-memory session, conversation history, token\nCredentials:           held by core, passed as short-lived scoped token at spawn time\n\n\nKey rules:\n\nCredentials live in core memory only — never in proxy workspace files\nSession token: short TTL (2 hours max), passed encrypted in task context, never written to disk\nConsistency: core injects \"recent context\" summary (last N interactions) into each spawn's task description so the new process knows what prior spawns committed to\nToken rotation: scheduled by core (e.g., weekly), not the proxy\n\nAnti-pattern: storing tokens in [workspace]/config.json — if the proxy workspace is compromised, credentials should not be compromised with it.\n\n6. Research Team (Parallel Multi-Lens Analysis)\n\nProblem: Single-agent analysis has blind spots — the same agent proposes and evaluates its own ideas.\n\nSolution: Spawn 3 specialist agents in parallel with distinct analytical lenses, then synthesize into one unified response.\n\nArchitecture:\n\nHigh-complexity query\n    │\n    ├─ Critic lens (find flaws, risks, costs — no solutions)\n    ├─ Implementer lens (concrete execution, architecture, feasibility)\n    └─ Synthesizer lens (integrate, resolve tensions, unified path)\n            │\n        UnifierAgent (one clean final response)\n\n\nWhen to use:\n\nTask complexity === 'high' OR explicit research flag set\nArchitecture decisions with real tradeoffs\nDebugging where root cause is unclear\nAny question where a single perspective tends to miss things\n\nWhen to skip:\n\nRoutine tasks, simple lookups, quick fixes\nCost-sensitive situations (research team = ~4x single-agent cost)\n\nKey components:\n\nlib/team-chatroom.js — append-only JSONL shared memory between parallel agents\nlib/research-coordinator.js — builds task strings for each lens + UnifierAgent\nshouldUseResearchTeam(complexity, explicitFlag) — gate function\nbuildResearchTeam(query) — returns sessionId + 3 specialist task objects\nbuildUnifierTask(query, sessionId) — builds synthesis prompt from chatroom\ncheckLensCompletion(sessionId) — verifies all lenses posted before unifying\n\nLens discipline:\n\nEach lens gets a distinct role description injected into its prompt\nLenses do NOT communicate with each other — only through the chatroom\nUnifierAgent reads all lens outputs and produces ONE response\nIf a lens fails silently, UnifierAgent warns but synthesizes from available data\n\nCost: ~4x single-agent. Gate on complexity to control spend.\n\nCritical rule: Grok or any external model used as a lens is a leaf node — it answers, it never spawns further agents.\n\nSee: lib/research-coordinator.js for reference implementation.\n\n7. External Model Consultation\n\nProblem: Your agent may have architectural blind spots that another model's reasoning style would catch. Human relay (copy/paste to another AI) is slow and lossy.\n\nSolution: Spawn an ExternalConsultAgent subagent that calls an external model's API directly, persists the session, and returns the response.\n\nArchitecture:\n\nCore agent decides consultation needed\n    │\n    └─ ExternalConsultAgent subagent\n            ├─ Calls external API (Grok, OpenAI, etc.)\n            ├─ Injects system-level intent for alignment\n            ├─ Supports session continuity (sessionId)\n            └─ Returns response + sessionId\n\n\nWhy subagent, not core agent: External API calls are side effects with cost and data exposure implications. Isolating them in a subagent follows the blast-shield philosophy — if something goes wrong, it's contained.\n\nKey components:\n\nlib/external-bridge.js — session management, task string builder, consultation logger\nmemory/external-agents.json — provider config (model IDs, endpoints)\nmemory/external-sessions/ — persistent session files for conversation continuity\nlib/external-consult-helper.js — shouldAutoConsult() gate (RED drift + high complexity + 24h cooldown)\n\nAuto-consult gate (3 conditions, all required):\n\nIntent drift level === 'RED'\nTask complexity === 'high'\nLast consultation > 24 hours ago (with lock file to prevent race conditions)\n\nSecurity rules (enforce before go-live):\n\nExternal model is a leaf node — never triggers further spawns\nContext sanitization before every call — strip API keys, file contents, internal paths\nSession-level call cap (prevent fan-out)\nMetadata logging of every call (timestamp, provider, trigger reason)\nKey stored in environment variable, not config file\n\nSession continuity: Pass sessionId from a previous consultation to continue the conversation. The bridge injects prior message history automatically.\n\nSee: lib/external-bridge.js for reference implementation.\n\n8. Intent Engineering (Value Alignment over Time)\n\nProblem: Prompt engineering tells agents what to do. Context engineering tells agents what to know. Neither tells agents what to want — so they drift from user values over time without detection.\n\nSolution: A three-layer system that encodes agent intent, extracts intent signals from user behavior, and detects drift before it becomes a problem.\n\nThree layers:\n\nIntent Manifest (memory/intent-manifest.json) — Machine-readable intent spec per agent: core purpose, hard constraints (with regex), operational goals with verification keywords, user signals extracted from history\nIntent Extractor (lib/intent-extractor.js) — Weekly pass over episode history extracting preference/value/correction signals; decays old signals (rate: 0.975/week), prunes below 0.30 strength\nDrift Detector (lib/intent-drift-detector.js) — 5-component score per episode: hard constraint violations (0.35), goal keyword coverage (0.25), structural drift via Jaccard (0.20), correction pressure (0.15), forbidden phrase hits (0.05)\n\nDrift levels:\n\nGREEN (< 0.29): Normal operation\nYELLOW (0.29–0.48): Inject reminder next turn\nORANGE (0.48–0.68): Log + prepare summary\nRED (≥ 0.68): Flag for user check-in (triggers auto-consult gate)\n\nIntent manifest schema:\n\n{\n  \"system\": {\n    \"core_purpose\": \"...\",\n    \"hard_constraints\": [{\"id\": \"...\", \"rule\": \"...\", \"regex\": \"...\"}],\n    \"operational_goals\": [{\"id\": \"...\", \"description\": \"...\", \"verification_keywords\": [], \"weight\": 0}]\n  },\n  \"agents\": {\n    \"AgentName\": {\n      \"inherits\": true,\n      \"overrides\": {\n        \"core_purpose\": \"...\",\n        \"operational_goals\": [],\n        \"user_signals\": []\n      },\n      \"version\": \"YYYY-MM-DD.001\"\n    }\n  },\n  \"diff_log\": []\n}\n\n\nKey properties:\n\nSystem intent + agent overrides use deep merge — agents inherit system constraints\nUser signals decay weekly — recent corrections weighted higher\nOperational goals are hand-authored for v1; intent compression auto-suggests additions in v2\nManifest is diffable over time (diff_log tracks changes)\nDrift score adjusts confidence estimator: score *= (1 - drift.score * 0.3)\n\nCold start: Works on day 1 with hand-authored goals. Signals accumulate automatically.\n\nSee: lib/intent-manager.js, lib/intent-extractor.js, lib/intent-drift-detector.js for reference implementation.\n\n5. Cost-Aware Spawning\n\nProblem: Uncontrolled subagent spawning leads to budget overruns.\n\nSolution: Estimate cost before spawning, require approval for expensive operations.\n\nFramework:\n\n// Pre-spawn cost estimation\nconst estimate = estimateSubagentCost({\n  task_complexity: \"high\",       // simple/medium/high\n  expected_duration_min: 20,\n  model: \"sonnet\",               // haiku/sonnet/opus\n  research_required: true\n})\n\n// estimate = { min: $0.60, max: $1.20, confidence: 0.8 }\n\nif (estimate.max > 0.50) {\n  // Log to cost tracking\n  await logCostEstimate(\"task-label\", estimate)\n  \n  if (estimate.max > 2.00) {\n    // Require human approval\n    await requestApproval(estimate)\n  }\n}\n\n// Spawn subagent\nconst result = await spawnSubagent(...)\n\n// Track actual cost\nawait logActualCost(\"task-label\", result.cost)\n\n\nCost tiers:\n\nMicro (< $0.10): Simple lookups, fact-checking\nSmall ($0.10-0.50): Standard research, code review\nMedium ($0.50-2.00): Feature implementation, deep analysis\nLarge (> $2.00): Complex refactors, multi-phase projects\n\nOptimization strategies:\n\nUse haiku for simple tasks (3x cheaper than sonnet)\nParallelize independent operations\nCache research findings for reuse\nSkip optional phases (reviewer for low-risk changes)\nDirectory Structure\nsubagents/\n├── [specialist-name]/\n│   ├── SPECIALIST.md       # Agent definition and personality\n│   ├── knowledge-base/     # Reference materials\n│   └── research/           # Task outputs and findings\n└── _archived/              # Retired subagents\n\nskills/subagent-architecture/\n├── SKILL.md               # This file\n├── templates/\n│   ├── security-proxy.md\n│   ├── researcher-specialist.md\n│   ├── phased-implementation.md\n│   └── peer-review-specialist.md\n└── setup.sh               # Directory scaffolding script\n\nWhen to Use Each Pattern\nSecurity Proxy\n\nUse when:\n\nAccessing untrusted APIs or experimental services\nRisk score > 70 (high blast radius or irreversibility)\nData needs sanitization before main agent sees it\nCost estimate < $0.10 (lightweight proxy)\n\nSkip when:\n\nTrusted first-party APIs (your infrastructure)\nRead-only public data (Wikipedia, documentation)\nRisk score < 30 (low stakes)\nResearcher Specialist\n\nUse when:\n\nQuestion requires 10+ sources\nDomain expertise needed (not general knowledge)\nMultiple conflicting claims (need fact-checking)\nDeliverable is a decision (not just information)\nCost estimate > $0.20 (worth specialist analysis)\n\nSkip when:\n\nSimple factual lookup (single authoritative source)\nReal-time data (weather, stock prices)\nCost estimate < $0.10 (not worth overhead)\nPhased Implementation\n\nUse when:\n\nFeature touches 3+ files\nRequires integration with existing systems\nCost estimate > $1.00 (worth upfront design)\nFailure would be expensive (need rollback plan)\nLong-term maintainability matters\n\nSkip when:\n\nSimple script (1 file, no integration)\nWell-understood pattern (copying existing structure)\nPrototype/experiment (might throw away)\nCost estimate < $0.50 (faster to just build)\nPeer Review\n\nUse when:\n\nHigh stakes (security, legal, financial)\nDomain gap (no internal specialist)\nBias check (fresh perspective needed)\nComplex validation (multi-dimensional review)\n\nSkip when:\n\nLow stakes (documentation, styling)\nDomain expertise available internally\nFast turnaround required (peer may take hours/days)\nSensitive data (can't sanitize for external review)\nIntegration with Task Routing\n\nAutomatic pattern selection via task-routing skill:\n\n# config/routing-rules.yaml\npattern_routing:\n  security_proxy:\n    triggers:\n      - blast_radius > 70\n      - untrusted_api: true\n    max_cost: 0.10\n    \n  researcher_specialist:\n    triggers:\n      - task_type: research\n      - complexity > 50\n    min_sources: 3\n    \n  phased_implementation:\n    triggers:\n      - task_type: code_gen\n      - files_affected > 3\n      - cost_estimate > 1.00\n    phases: [architect, coder, reviewer]\n    \n  peer_review:\n    triggers:\n      - irreversibility > 80\n      - domain_gap: true\n    require_approval: true\n\n\nManual override:\n\n// Force specific pattern\nspawnSubagent({\n  pattern: \"security-proxy\",  // override routing decision\n  task: \"...\",\n  justification: \"Experimental API, prefer isolation\"\n})\n\nCost Tracking & Optimization\nLogging Requirements\n\nAll subagent spawns >$0.50 must be logged to notes/cost-tracking.md:\n\n## Subagent Cost Tracking\n\n| Date | Label | Pattern | Estimate | Actual | Delta | Notes |\n|------|-------|---------|----------|--------|-------|-------|\n| 2026-02-22 | architect-feature-x | phased-impl | $0.40 | $0.38 | -5% | Faster than expected |\n| 2026-02-22 | coder-feature-x | phased-impl | $0.70 | $0.85 | +21% | Complex refactor, used opus |\n| 2026-02-22 | researcher-market | researcher | $0.60 | $0.55 | -8% | Cached sources helped |\n\nAccuracy Improvement\n\nTrack estimate vs actual to improve future predictions:\n\n// Calculate rolling accuracy\nconst accuracy = calculateAccuracy({\n  window_days: 30,\n  min_samples: 10\n})\n\n// accuracy = { mean_error: 12%, confidence: 0.85 }\n\n// Adjust future estimates\nconst adjusted_estimate = base_estimate * (1 + accuracy.mean_error)\n\nQuality Standards\nSubagent Output Rubric\n\nAll specialist subagents should score 7+ on this rubric:\n\nDimension\tPoor (1-3)\tGood (4-6)\tExcellent (7-10)\nSpecificity\tVague generalizations\tSome concrete details\tPrecise, actionable specifics\nActionability\tNo clear next steps\tSuggestions provided\tStep-by-step implementation plan\nEvidence\tUnsourced claims\tSome citations\tEvery claim sourced, validated\nStructure\tStream-of-consciousness\tBasic organization\tScannable hierarchy, summaries\nCompleteness\tMissing key aspects\tMost areas covered\tComprehensive, gaps documented\nHonesty\tHides limitations\tMentions some trade-offs\tExplicit about unknowns, risks\nCost-awareness\tNo budget consideration\tRough estimates\tDetailed cost/benefit analysis\nIntegration\tIgnores existing systems\tBasic compatibility\tSeamless integration plan\nSelf-Audit Checklist\n\nBefore finalizing subagent output:\n\n Every claim has source (URL + date)\n Contradictions addressed (not ignored)\n Recommendations include trade-offs\n Cost estimate provided (time + money)\n Integration points documented\n Rollback strategy included\n Success criteria defined\n Known limitations listed\nPhilosophy\nPermanent vs Ephemeral\n\nPermanent subagents (skill-based):\n\nRecurring specialized tasks\nDeep domain expertise\nKnowledge accumulation over time\nExamples: DevOps, AuthorAgent, WuxiaWorldbuilder\n\nEphemeral subagents (one-off spawns):\n\nBounded research tasks\nSimple implementation work\nExperimental exploration\nPattern-based specialists (security-proxy, researcher)\nAnti-Sycophant by Default\n\nAll subagents should:\n\nCall out weak evidence (no polite agreement)\nReject unrealistic requirements (push back on bad specs)\nHighlight risks honestly (no sugar-coating)\nAdmit knowledge gaps (no speculation to please)\nCost-Conscious Operation\nEstimate before spawning (no surprise bills)\nChoose appropriate model (haiku vs sonnet vs opus)\nParallelize when possible (save time = save money)\nCache and reuse (research findings, architecture patterns)\nTerminate early if stuck (don't spin wheels for 30min)\nDependencies\nRequired\nOpenClaw 2026.2.17+ (subagent spawning, cost tracking)\nOptional but Recommended\ntask-routing skill (auto-pattern selection, risk scoring)\ncost-governor skill (budget enforcement, approval workflows)\ndrift-guard skill (behavioral validation, quality audits)\nIntegration Points\n\nTask routing:\n\nProvides task classification (8 types)\nRisk scoring (5 dimensions, 0-100 scale)\nAuto-route to pattern (security-proxy, researcher, phased-impl)\n\nCost governor:\n\nPre-spawn approval for expensive operations\nBudget tracking per project/feature\nAlert on overruns\n\nDrift guard:\n\nBehavioral audit of subagent outputs\nPolicy violation detection\nQuality score trending\nFramework Limitations & v2 Roadmap\n\nKnown gaps identified in peer review (Agent Smith, EasyClaw project)\n\nCurrent OpenClaw subagent framework has architectural limitations that affect all patterns in this skill. These are framework-level constraints, not skill-specific issues. Documenting for transparency and future improvement.\n\n1. Spawn Configuration Constraints\n\nSmith's Question: \"How does core define sub-bot constraints before launch?\"\n\nCurrent State:\n\nSpawn parameters: task, personality, model, timeout, label\nBasic configuration only (what to do, how long)\nNo granular resource controls\n\nLimitations:\n\n❌ No memory limits per spawn (subagent can consume unlimited RAM)\n❌ No API call quotas (can make 1000s of web requests)\n❌ No disk space caps (can fill storage)\n❌ No per-spawn tool restrictions (tool policy is framework-level, applies to all subagents)\n\nCurrent Workaround:\n\nManual timeout enforcement (kill after N minutes)\nFramework-level tool policies (same restrictions for all subagents)\nPost-spawn monitoring (watch logs, intervene manually)\n\nv2 Design Considerations:\n\n// Proposed: Granular per-spawn constraints\nspawnSubagent({\n  label: \"researcher-x\",\n  task: \"Research topic\",\n  constraints: {\n    max_memory_mb: 512,           // Kill if exceeds RAM limit\n    max_api_calls: 50,            // Deny further requests after quota\n    max_disk_mb: 100,             // Block file writes beyond limit\n    max_cost_usd: 0.50,           // Auto-terminate if cost projection exceeds\n    tools_allowed: [\"web_search\", \"web_fetch\"],  // Per-spawn tool whitelist\n    tools_denied: [\"exec\", \"write\"],             // Explicit blacklist\n    rate_limits: {\n      web_search: { calls: 10, window_seconds: 60 }  // Max 10 searches/min\n    }\n  }\n})\n\n\nImpact on Patterns:\n\nSecurity proxy: Could enforce strict tool whitelist per proxy (currently manual)\nResearcher: Could cap API calls to prevent runaway research loops\nPhased impl: Could allocate different budgets per phase (architect: $0.40, coder: $1.00)\n2. Skill Loading & Validation\n\nSmith's Question: \"How are skills validated before execution?\"\n\nCurrent State:\n\nExternal skills (downloaded) → skill-vetter checks metadata, malicious patterns\nInternal skills (workspace) → Just execute (trusted by default)\nNo runtime sandboxing\n\nLimitations:\n\n❌ No code isolation for skills (skill code runs in main agent context)\n❌ No execution validation (skill can do anything agent can do)\n❌ No capability restrictions (skill inherits all agent tools)\n❌ Trust model: Binary (external = vet, internal = trust)\n\nCurrent Workaround:\n\nVet external skills manually before first use\nReview internal skill code (human audit)\nHope skills don't misbehave\n\nv2 Design Considerations:\n\n// Proposed: Skill capability manifest\n// skills/my-skill/SKILL.md\n---\ncapabilities_required:\n  - web_search      # Skill needs web access\n  - read:config/    # Can read config directory only\n  - write:output/   # Can write to output directory only\nisolation_level: sandbox  # Run in isolated context\nmax_execution_time: 30    # Kill after 30 seconds\n---\n\n// Runtime: Skill runs in sandbox with only declared capabilities\n// Attempts to use undeclared tools → blocked, logged, skill terminated\n\n\nImpact on Patterns:\n\nSecurity proxy: Skills could be sandboxed (currently rely on manual isolation)\nAll patterns: Skills loaded by subagents would inherit spawn constraints\nPeer review: External skills from peers could run safely (sandboxed)\n3. Communication Bounds (Bidirectional Channel)\n\nSmith's Question: \"What can sub-bots ask for from core?\"\n\nCurrent State:\n\nOne-way communication only: Core spawns → Subagent executes → Returns result\nSubagent cannot request clarification mid-task\nSubagent cannot request additional context during execution\nNo interactive mode\n\nLimitations:\n\n❌ Subagent stuck on ambiguous requirement → Must guess or fail\n❌ Subagent needs additional context → Can't ask, works with what it has\n❌ Subagent encounters unexpected scenario → No escalation path mid-task\n❌ Human approval needed mid-execution → Not possible (only pre/post spawn)\n\nCurrent Workaround:\n\nOver-specify context in initial spawn (bloated prompts)\nSubagent makes best guess (may be wrong)\nSubagent fails, core respawns with clarification (expensive)\n\nv2 Design Considerations:\n\n// Proposed: Request-response protocol during execution\n// Subagent code:\nconst clarification = await requestFromCore({\n  type: \"clarification\",\n  question: \"User said 'recent data' - how many days back?\",\n  options: [\"7 days\", \"30 days\", \"90 days\"],\n  timeout_seconds: 60  // If no response, default to first option\n})\n\n// Core receives request:\n// - Auto-approve safe requests (data lookup from memory)\n// - Escalate to human for decisions\n// - Return answer to subagent\n// - Subagent continues with clarification\n\n// Examples:\n// - \"Need API key for service X\" → Core provides from secure store\n// - \"Found conflicting data, which source to trust?\" → Human decides\n// - \"Task seems too expensive ($5 estimated), proceed?\" → Approval workflow\n\n\nImpact on Patterns:\n\nResearcher: Can ask \"should I dig deeper on this tangent?\" mid-research\nPhased impl: Coder can ask architect for design clarification during build\nSecurity proxy: Can request human approval if API returns unexpected data\nAll patterns: Reduces over-specification, enables adaptive execution\n4. Termination Conditions (Resource-Based Kills)\n\nSmith's Question: \"When does core kill a sub-bot?\"\n\nCurrent State:\n\nTime-based only: Timeout (specified in spawn) or task completion\nManual intervention (human stops runaway agent)\nNo automatic resource-based termination\n\nLimitations:\n\n❌ No memory limit kills (agent can OOM the host)\n❌ No cost threshold kills (can exceed budget before timeout)\n❌ No stuck detection (infinite loops run until timeout)\n❌ No output size limits (can generate gigabytes of text)\n\nCurrent Workaround:\n\nSet conservative timeouts (may kill productive work early)\nMonitor logs manually (reactive, not proactive)\nHope agent doesn't get stuck in expensive loop\n\nv2 Design Considerations:\n\n// Proposed: Multi-condition termination\nspawnSubagent({\n  label: \"researcher-x\",\n  task: \"Research topic\",\n  termination: {\n    // Existing\n    timeout_minutes: 20,\n    \n    // New: Resource limits\n    max_memory_mb: 512,          // Kill if RSS > 512MB\n    max_cost_usd: 1.00,          // Kill if projected cost > $1\n    max_output_tokens: 10000,    // Kill if response > 10k tokens\n    \n    // New: Behavioral detection\n    stuck_detection: {\n      enabled: true,\n      same_action_threshold: 5,  // Kill if repeats same tool call 5x\n      no_progress_minutes: 5     // Kill if no new output for 5min\n    },\n    \n    // New: External triggers\n    kill_signal: \"session:parent_terminated\"  // Kill if parent agent dies\n  }\n})\n\n// Termination reasons logged for analysis:\n// - \"timeout\" (existing)\n// - \"memory_exceeded\" (new)\n// - \"cost_exceeded\" (new)\n// - \"stuck_loop_detected\" (new)\n// - \"output_overflow\" (new)\n// - \"parent_died\" (new)\n\n\nImpact on Patterns:\n\nSecurity proxy: Strict resource limits (memory, cost) prevent runaway isolation breaches\nResearcher: Stuck detection prevents infinite search loops\nPhased impl: Cost limits per phase (architect can't blow whole budget)\nAll patterns: Better cost control, faster failure detection\n5. Post-Mortem & Learning System\n\nSmith's Question: \"How do you learn from sub-bot failures?\"\n\nCurrent State:\n\nSuccess-only logging: Add to AGENTS.md after successful spawns\nManual failure review (check logs, wonder what happened)\nNo systematic failure tracking\nNo pattern analysis\n\nLimitations:\n\n❌ No failure tracking (only successes logged to AGENTS.md)\n❌ No common failure pattern detection (same mistake repeated)\n❌ No cost vs value analysis (did expensive agent deliver value?)\n❌ No success rate per agent type (which specialists are reliable?)\n❌ No improvement feedback loop (failures don't inform future spawns)\n\nCurrent Workaround:\n\nHuman remembers failures (unreliable)\nManually check logs when something seems off\nAnecdotal learning (\"that researcher failed last time\")\n\nv2 Design Considerations:\n\n// Proposed: Systematic post-mortem database\n// Storage: memory/subagent-postmortems.jsonl\n\n{\n  \"spawn_id\": \"researcher-market-20260222-1430\",\n  \"label\": \"researcher-market\",\n  \"pattern\": \"researcher-specialist\",\n  \"outcome\": \"failure\",  // success | partial | failure\n  \"termination_reason\": \"cost_exceeded\",\n  \"stats\": {\n    \"duration_minutes\": 18,\n    \"cost_actual\": 1.25,\n    \"cost_estimate\": 0.60,\n    \"cost_accuracy\": -108%,  // Overran by 108%\n    \"tokens_used\": 45000,\n    \"api_calls\": 127\n  },\n  \"deliverable_quality\": null,  // Not rated (failed before completion)\n  \"failure_mode\": {\n    \"category\": \"runaway_research\",  // Taxonomy of failures\n    \"root_cause\": \"No stuck detection, research loop on tangent\",\n    \"human_notes\": \"Researched sub-topic for 15min, didn't return to main question\"\n  },\n  \"lessons\": [\n    \"Add stuck detection for researcher pattern\",\n    \"Improve cost estimation for multi-source research (was 2x off)\"\n  ]\n}\n\n// Analytics queries:\n// - Success rate by pattern: researcher-specialist = 73% (8/11 succeeded)\n// - Most common failure mode: cost_exceeded (40% of failures)\n// - Cost estimation accuracy: researcher pattern = -25% average (underestimates)\n// - ROI analysis: phased-impl pattern = $2.50 avg cost, 90% success, high value\n\n\nProposed Learning Loop:\n\n// Before spawning, check historical performance\nconst history = getPatternHistory(\"researcher-specialist\")\n\nif (history.success_rate < 0.7) {\n  console.warn(`⚠️ Pattern has 65% success rate (13/20). Common failure: ${history.top_failure_mode}`)\n  // Adjust: Add stuck detection, reduce timeout, use cheaper model\n}\n\nif (history.cost_accuracy < -0.3) {\n  console.warn(`⚠️ Pattern underestimates cost by 35% on average`)\n  // Adjust: Inflate estimate by 35%\n  const adjusted_estimate = base_estimate * 1.35\n}\n\n// After completion, log outcome\nlogPostMortem({\n  spawn_id,\n  outcome: \"success\",\n  quality_score: 8.5,  // From rubric\n  cost_actual: 0.58,\n  cost_estimate: 0.60,\n  lessons: [\"Worked well, multi-perspective pattern delivered balanced view\"]\n})\n\n\nImpact on Patterns:\n\nAll patterns: Historical success rates inform spawn decisions\nCost framework: Improve estimation accuracy (learn from past errors)\nQuality standards: Track which patterns consistently deliver high scores\nPattern evolution: Retire unreliable patterns, double down on proven ones\nSummary: v2 Feature Matrix\nFeature\tv1 (Current)\tv2 (Proposed)\tBenefit\nSpawn constraints\tTimeout only\tMemory, cost, quota, tool whitelist\tResource safety\nSkill isolation\tTrust-based\tSandboxed with capabilities\tSecurity\nCommunication\tOne-way\tBidirectional request/response\tAdaptive execution\nTermination\tTime-based\tMulti-condition (resource, stuck, cost)\tCost control\nPost-mortem\tSuccess-only\tFull lifecycle tracking + analytics\tContinuous learning\n\nCurrent mitigation strategies:\n\nManual monitoring (watch logs, intervene when needed)\nConservative estimates (over-specify context, pad timeouts)\nPattern discipline (follow templates strictly to avoid known failure modes)\nHuman-in-loop (approve expensive spawns, review failures manually)\n\nv2 would enable:\n\nAutonomous resource management (agents self-limit)\nHigher confidence spawning (better failure prediction)\nFaster iteration (learn from failures automatically)\nFine-grained security (per-spawn, per-skill isolation)\n\nNote for users: These limitations affect all subagent patterns in this skill. The patterns documented here (security-proxy, researcher, phased-impl, peer-review) work within current framework constraints. v2 improvements would enhance safety and reliability, but are not required for effective use of these patterns today.\n\nTracking: OpenClaw framework issue (conceptual - not filed yet) Credit: Agent Smith (EasyClaw peer review, 2026-02-22)\n\nExamples\nExample 1: Security-First API Integration\n\nScenario: Integrate with untrusted social network API\n\n// Main agent receives request\nconst task = \"Fetch user's posts from SocialNetworkX API\"\n\n// Security proxy pattern\nconst proxy = await spawnSubagent({\n  label: \"proxy-socialnetworkx\",\n  pattern: \"security-proxy\",\n  task: \"Query SocialNetworkX API for user posts, return sanitized JSON\",\n  context: {\n    query: \"user:$USERNAME, limit:10\"\n    // NO API keys, NO workspace paths\n  },\n  tools_allowed: [\"exec:curl\"],\n  timeout_minutes: 5,\n  auto_terminate: true\n})\n\n// proxy returns: [{username, timestamp, content}] - sanitized\n// Main agent: Safe to process, no raw API exposure\n\nExample 2: Multi-Perspective Research\n\nScenario: Evaluate whether to adopt new framework\n\n// Spawn 3 researchers with different biases\nconst researchers = await Promise.all([\n  spawnSubagent({\n    label: \"researcher-optimist\",\n    pattern: \"researcher-specialist\",\n    task: \"Research FrameworkX benefits, best-case adoption scenario\",\n    personality: \"Optimistic, highlights opportunities\"\n  }),\n  \n  spawnSubagent({\n    label: \"researcher-pessimist\",\n    pattern: \"researcher-specialist\",\n    task: \"Research FrameworkX risks, failure modes, known issues\",\n    personality: \"Skeptical, risk-focused\"\n  }),\n  \n  spawnSubagent({\n    label: \"researcher-pragmatist\",\n    pattern: \"researcher-specialist\",\n    task: \"Research FrameworkX current state, real-world adoption data\",\n    personality: \"Pragmatic, data-driven\"\n  })\n])\n\n// Synthesize: Balanced view with decision criteria\nconst decision = synthesizeResearch(researchers.map(r => r.findings))\n\nExample 3: Phased Feature Development\n\nScenario: Build complex new skill\n\n// Phase 1: Architecture\nconst plan = await spawnSubagent({\n  label: \"architect-skill-x\",\n  pattern: \"phased-implementation\",\n  phase: \"architect\",\n  task: \"Design implementation plan for skill-x with [requirements]\",\n  model: \"sonnet\"\n})\n\n// Review plan, get approval if expensive\nif (plan.cost_estimate > 2.00) {\n  await humanApproval(plan)\n}\n\n// Phase 2: Implementation\nconst implementation = await spawnSubagent({\n  label: \"coder-skill-x\",\n  pattern: \"phased-implementation\",\n  phase: \"coder\",\n  task: `Implement skill-x per plan: ${plan.path}`,\n  model: plan.recommended_model\n})\n\n// Phase 3: Review (conditional on risk)\nif (implementation.risk_level === \"high\") {\n  const review = await spawnSubagent({\n    label: \"reviewer-skill-x\",\n    pattern: \"phased-implementation\",\n    phase: \"reviewer\",\n    task: `Review implementation: ${implementation.path}`,\n    model: \"sonnet\"\n  })\n  \n  if (!review.approved) {\n    // Iterate or escalate\n  }\n}\n\nExample 4: Peer Validation\n\nScenario: Security audit for payment processing\n\n// Prepare sanitized review package\nconst package = sanitizeForReview({\n  artifact: readFile(\"src/payment-processor.js\"),\n  remove: [\"API_KEYS\", \"USER_DATA\", \"INTERNAL_URLS\"]\n})\n\n// Request peer review via Discord\nawait message({\n  action: \"send\",\n  target: \"smith-security-bot\",\n  message: `Security review request:\\n${JSON.stringify(package)}\\n\\nFocus: Payment processing, SQL injection, input validation`\n})\n\n// Wait for peer response (auto-announces when received)\n// Process feedback, fix critical issues, optionally re-submit\n\nTroubleshooting\nProblem: Subagent exceeds cost estimate\n\nDiagnosis:\n\nCheck actual task complexity vs estimate\nReview subagent logs for loops or retries\nVerify model choice (opus when sonnet would suffice?)\n\nSolutions:\n\nImprove cost estimation (track actuals, adjust formula)\nAdd cost cap to spawn config (hard limit)\nUse cheaper model for exploratory phase\nProblem: Security proxy leaks sensitive data\n\nDiagnosis:\n\nAudit spawn context (did it include workspace paths?)\nCheck tool restrictions (was file access allowed?)\nReview output sanitization (did schema validation fail?)\n\nSolutions:\n\nUpdate security-proxy template checklist\nAdd automated context sanitization pre-spawn\nImplement output schema validation (reject non-conforming data)\nProblem: Phased implementation phases conflict\n\nDiagnosis:\n\nArchitect plan unrealistic (coder can't implement)\nCoder deviated from plan (reviewer rejects)\nReviewer too strict (perfect is enemy of good)\n\nSolutions:\n\nArchitect: Include feasibility check (can this be built?)\nCoder: Flag deviations early (request plan amendment)\nReviewer: Focus on critical issues (don't block on style)\nProblem: Peer review unavailable or slow\n\nDiagnosis:\n\nPeer bot offline or rate-limited\nNo trusted peers for required domain\nReview package too large or unclear\n\nSolutions:\n\nMaintain backup peer list (2+ per domain)\nFall back to internal review (DevOps, CoderAgent)\nSimplify review package (focus on specific question)\nFramework Limitations & v2 Roadmap\n\nKnown gaps identified in peer review (Agent Smith, EasyClaw - 2026-02-22)\n\nThese limitations represent areas where the current OpenClaw subagent framework needs evolution. Documented here for transparency and future development.\n\n1. Spawn Configuration - Per-Subagent Constraints\n\nCurrent state:\n\nSpawn parameters: task description, personality, model, timeout, label\nConstraints are framework-level (all subagents share same tool policy)\n\nMissing:\n\nMemory limits per spawn (prevent individual subagent memory leaks)\nAPI call quotas per spawn (prevent runaway costs)\nDisk space caps per spawn (prevent storage exhaustion)\nPer-spawn tool restrictions (fine-grained beyond framework allowlist)\n\nv2 Design Consideration:\n\nsessions_spawn({\n  task: \"...\",\n  constraints: {\n    maxMemoryMB: 512,\n    maxAPICalls: 100,\n    maxDiskMB: 50,\n    tools: {\n      allowed: ['read', 'write'],\n      forbidden: ['exec', 'message'],\n      restrictions: {\n        read: { paths: ['/workspace/restricted/'] },\n        write: { paths: ['/workspace/output/'], maxFileSize: '10MB' }\n      }\n    }\n  }\n})\n\n\nImpact: Without per-spawn constraints, one rogue subagent can exhaust resources for all others.\n\n2. Skill Loading - Runtime Validation\n\nCurrent state:\n\nExternal skills: Pre-execution vetting via skill-vetter (red flag detection)\nInternal skills: Trusted by default, no runtime sandboxing\n\nMissing:\n\nRuntime sandboxing for skill-embedded code (JavaScript, shell scripts)\nExecution validation (verify skill behaves as documented)\nResource monitoring during skill execution\n\nv2 Design Consideration:\n\n// Sandbox wrapper for skill execution\nrunSkill('ai-writing-humanizer', {\n  sandbox: {\n    networkAccess: false,  // Block network except allowlisted domains\n    filesystemAccess: 'readonly',  // Read-only except explicit write paths\n    cpuLimit: '50%',  // Prevent CPU monopolization\n    timeout: 30000  // 30s max execution\n  },\n  validate: {\n    outputSchema: { ... },  // Verify output format\n    sideEffects: ['writes to /workspace/humanizer/'],  // Allowed side effects\n    noExternalCalls: true  // Flag if skill attempts network/exec\n  }\n})\n\n\nImpact: Without runtime sandboxing, malicious or buggy skills can compromise core agent.\n\n3. Communication Bounds - Bidirectional Channels\n\nCurrent state:\n\nOne-way communication only: Core spawns → Subagent executes → Result returns\nSubagents cannot ask clarifying questions during execution\n\nMissing:\n\nMid-task clarification requests (\"Should I proceed with risky operation?\")\nProgress updates for long-running tasks (>5min execution)\nDynamic priority adjustment (core can signal \"abort\" or \"expedite\")\n\nv2 Design Consideration:\n\n// Subagent perspective\nasync function executeTask(taskDescription) {\n  const plan = generatePlan(taskDescription);\n  \n  if (plan.hasHighRiskOperation()) {\n    const approval = await askCore({\n      question: \"Plan includes deleting 100 files. Proceed?\",\n      options: [\"yes\", \"no\", \"show list first\"],\n      timeout: 60000  // 1 min to respond\n    });\n    \n    if (approval !== \"yes\") return { status: \"aborted\", reason: \"user declined\" };\n  }\n  \n  // Continue with approved plan...\n}\n\n\nImpact: Without bidirectional channels, subagents must either be overly cautious (ask for everything upfront) or overly bold (proceed with assumptions).\n\n4. Termination Conditions - Resource-Based Kills\n\nCurrent state:\n\nTimeout-based only: Subagent runs until task completes or timeout expires\nNo resource-based termination\n\nMissing:\n\nMemory limit kills (terminate if subagent exceeds RAM quota)\nCost threshold kills (terminate if API calls exceed budget)\nStuck detection (infinite loops, deadlocks, recursive spawning)\nRunaway output prevention (subagent generating gigabytes of logs)\n\nv2 Design Consideration:\n\nsessions_spawn({\n  task: \"...\",\n  limits: {\n    timeout: 600000,  // 10 min max\n    maxMemory: 512,  // MB\n    maxCost: 2.00,  // USD\n    maxOutputSize: 10,  // MB\n    maxToolCalls: 100,  // Total tool invocations\n    stuckDetection: {\n      enabled: true,\n      sameToolRepeat: 5,  // Flag if same tool called 5+ times consecutively\n      noProgressTimeout: 120000  // Flag if no output/tool calls for 2 min\n    }\n  },\n  onLimitExceeded: (limit, value) => {\n    logFailure({ subagent, limit, value });\n    return { action: 'terminate', notify: 'core' };\n  }\n})\n\n\nImpact: Without resource-based kills, subagents can waste budget, exhaust memory, or spin indefinitely.\n\n5. Post-Mortem - Learning from Failures\n\nUpdate (v2.1 → v2.2): A lightweight v0 of post-mortem logging is now implementable without framework changes. The crash report schema defined in templates/proxy-recovery.md gives you structured failure data per-spawn. Core reads crash reports and routes them. This covers the \"what failed and why\" dimension immediately.\n\nWhat still requires framework work (see v2 roadmap below): automated outcome analytics, pattern detection across spawns, success rate tracking. But single-spawn forensics — you can ship that now.\n\nCurrent state:\n\nSuccess logging only: AGENTS.md records completed subagents\nNo failure tracking or pattern analysis\nPartial exception: Security proxies using proxy-recovery.md pattern get crash reports per spawn\n\nMissing:\n\nFailure logging (why did subagent fail?)\nCommon failure patterns (which tasks/agents fail most?)\nCost vs value analysis (was the subagent worth the spend?)\nSuccess rate tracking per agent type (CoderAgent: 85%, ResearchAgent: 92%, etc.)\nImprovement loop (adjust spawn parameters based on historical outcomes)\n\nv2 Design Consideration:\n\n// Automatic post-mortem on subagent completion/failure\nfunction logSubagentOutcome(subagent, result) {\n  const record = {\n    timestamp: Date.now(),\n    label: subagent.label,\n    task: subagent.task,\n    model: subagent.model,\n    status: result.status,  // 'done', 'timeout', 'error', 'killed'\n    runtime: result.runtime,\n    cost: result.totalCost,\n    tokensUsed: result.totalTokens,\n    outcome: result.status === 'done' ? 'success' : 'failure',\n    failureReason: result.error || result.killReason || null,\n    valueProvided: result.status === 'done' ? assessValue(result) : null\n  };\n  \n  appendToLog('memory/subagent-outcomes.jsonl', record);\n  updateSuccessRates(subagent.label, record.outcome);\n  \n  if (record.outcome === 'failure') {\n    analyzeFailurePattern(record);  // Flag common failure modes\n  }\n}\n\n// Quarterly review: Which subagent types are most cost-effective?\nfunction analyzeSubagentROI() {\n  const outcomes = readLog('memory/subagent-outcomes.jsonl');\n  const byType = groupBy(outcomes, o => extractType(o.label));  // CoderAgent, ResearchAgent, etc.\n  \n  for (const [type, records] of Object.entries(byType)) {\n    const successRate = records.filter(r => r.outcome === 'success').length / records.length;\n    const avgCost = mean(records.map(r => r.cost));\n    const avgValue = mean(records.filter(r => r.valueProvided).map(r => r.valueProvided));\n    \n    console.log(`${type}: ${(successRate*100).toFixed(1)}% success, $${avgCost.toFixed(2)} avg cost, ${avgValue.toFixed(1)} avg value`);\n  }\n}\n\n\nImpact: Without post-mortem analysis, you repeat failures, can't optimize spawn strategy, and waste budget on low-value subagents.\n\nConclusion:\n\nThese 5 limitations don't block current usage but represent friction points as subagent usage scales. Prioritize based on pain:\n\nHigh priority: #4 (resource-based kills) - prevents runaway costs\nMedium priority: #5 (post-mortem) - enables data-driven optimization\nMedium priority: #1 (per-spawn constraints) - prevents resource exhaustion\nLow priority: #3 (bidirectional channels) - nice to have for complex tasks\nLow priority: #2 (skill sandboxing) - current vetting sufficient for trusted sources\n\nFeedback welcome: If you've hit these limitations in production, share workarounds and pain points via OpenClaw community channels.\n\nContributing\n\nThis skill improves through real-world usage. Please contribute:\n\nPattern refinements:\n\nFound edge case not covered? Update template\nBetter cost estimation formula? Share it\nNew integration point? Document it\n\nNew patterns:\n\nDiscovered novel orchestration? Add template\nHybrid approach works better? Explain why\nOptimization technique? Include example\n\nTrack record:\n\nLog your subagent spawns (cost, accuracy, outcomes)\nShare what worked (and what didn't)\nUpdate rubrics based on production learnings\nLicense\n\nMIT - Use freely, modify as needed, share improvements\n\nChangelog\nv2.3.0 (2026-02-24)\nAdded: Pattern 6 — Research Team (parallel multi-lens analysis with Critic/Implementer/Synthesizer lenses + UnifierAgent)\nAdded: Pattern 7 — External Model Consultation (ExternalConsultAgent subagent, session persistence, auto-consult gate)\nAdded: Pattern 8 — Intent Engineering Layer (manifest schema, intent extraction, drift detection, 5-component drift score)\nUpdated: Version bump to 2.3.0\nv2.2.0 (2026-02-23)\nAdded: Core Concept #5 — Identity Continuity (ephemeral process vs persistent external identity, token handoff pattern, consistency via core-injected recent context, anti-pattern warning)\nAdded: Trust tier ≠ security bypass principle to Peer Collaboration section (Stage 4 injection detection always runs regardless of trust level)\nAdded: templates/proxy-recovery.md — crash/recovery pattern (pre-death checklist, crash report schema, core response decision tree, quarantine mode, human notification, re-spawn decision guide)\nUpdated: templates/security-proxy.md — full rewrite integrating: 6-stage inbound validation pipeline, Stage 6b outbound semantic leak filter, self-imposed rate limiter + circuit breaker, crash/recovery checklist, identity continuity section, MoltbookProxy as real-world reference implementation\nUpdated: Post-mortem section — notes crash reports from proxy-recovery.md are a working v0 of per-spawn forensics (no framework changes required)\nv2.0.1 (2026-02-22)\nAdded: Framework Limitations & v2 Roadmap section\nAdded: 5 critical design gaps identified by Agent Smith (EasyClaw peer review)\nAdded: v2 design considerations with code examples for each limitation\nAdded: Priority ranking for addressing limitations\nv2.0.0 (2026-02-22)\nBreaking: Restructured to focus on advanced patterns\nAdded: Security proxy pattern and template\nAdded: Researcher specialist pattern and template\nAdded: Phased implementation pattern and template\nAdded: Peer review pattern and template\nAdded: Cost-aware spawning framework\nAdded: Integration with task-routing, cost-governor, drift-guard\nUpdated: Philosophy section (anti-sycophant, cost-conscious)\nUpdated: Examples (real-world scenarios, not toy problems)\nRemoved: Basic scaffolding (moved to setup.sh)\nv1.0.0 (2026-02-21)\nInitial release with basic subagent structure\nSPECIALIST.md template\nTask routing integration basics\nSupport\nDocumentation: Read templates in templates/ directory\nExamples: See AGENTS.md for real-world subagent library\nIssues: Check EVOLOG.md for known limitations\nCommunity: Share patterns via OpenClaw Discord (when federated network launches)"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/donovanpankratz-del/subagent-architecture",
    "publisherUrl": "https://clawhub.ai/donovanpankratz-del/subagent-architecture",
    "owner": "donovanpankratz-del",
    "version": "2.3.5",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/subagent-architecture",
    "downloadUrl": "https://openagent3.xyz/downloads/subagent-architecture",
    "agentUrl": "https://openagent3.xyz/skills/subagent-architecture/agent",
    "manifestUrl": "https://openagent3.xyz/skills/subagent-architecture/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/subagent-architecture/agent.md"
  }
}