{
  "schemaVersion": "1.0",
  "item": {
    "slug": "smart-router",
    "name": "A.I. Smart Router",
    "source": "tencent",
    "type": "skill",
    "category": "AI 智能",
    "sourceUrl": "https://clawhub.ai/c0nSpIc0uS7uRk3r/smart-router",
    "canonicalUrl": "https://clawhub.ai/c0nSpIc0uS7uRk3r/smart-router",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/smart-router",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=smart-router",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "README.md",
      "SKILL.md",
      "STATE.md",
      "compactor.py",
      "context_guard.py",
      "dashboard.py"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/smart-router"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/smart-router",
    "agentPageUrl": "https://openagent3.xyz/skills/smart-router/agent",
    "manifestUrl": "https://openagent3.xyz/skills/smart-router/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/smart-router/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "A.I. Smart-Router",
        "body": "Intelligently route requests to the optimal AI model using tiered classification with automatic fallback handling and cost optimization."
      },
      {
        "title": "How It Works (Silent by Default)",
        "body": "The router operates transparently—users send messages normally and get responses from the best model for their task. No special commands needed.\n\nOptional visibility: Include [show routing] in any message to see the routing decision."
      },
      {
        "title": "Tiered Classification System",
        "body": "The router uses a three-tier decision process:\n\n┌─────────────────────────────────────────────────────────────────┐\n│                    TIER 1: INTENT DETECTION                      │\n│  Classify the primary purpose of the request                     │\n├─────────────────────────────────────────────────────────────────┤\n│  CODE        │ ANALYSIS    │ CREATIVE   │ REALTIME  │ GENERAL   │\n│  write/debug │ research    │ writing    │ news/live │ Q&A/chat  │\n│  refactor    │ explain     │ stories    │ X/Twitter │ translate │\n│  review      │ compare     │ brainstorm │ prices    │ summarize │\n└──────┬───────┴──────┬──────┴─────┬──────┴─────┬─────┴─────┬─────┘\n       │              │            │            │           │\n       ▼              ▼            ▼            ▼           ▼\n┌─────────────────────────────────────────────────────────────────┐\n│                  TIER 2: COMPLEXITY ESTIMATION                   │\n├─────────────────────────────────────────────────────────────────┤\n│  SIMPLE (Tier $)        │ MEDIUM (Tier $$)    │ COMPLEX (Tier $$$)│\n│  • One-step task        │ • Multi-step task   │ • Deep reasoning  │\n│  • Short response OK    │ • Some nuance       │ • Extensive output│\n│  • Factual lookup       │ • Moderate context  │ • Critical task   │\n│  → Haiku/Flash          │ → Sonnet/Grok/GPT   │ → Opus/GPT-5      │\n└──────────────────────────┴─────────────────────┴───────────────────┘\n                                    │\n                                    ▼\n┌─────────────────────────────────────────────────────────────────┐\n│                TIER 3: SPECIAL CASE OVERRIDES                    │\n├─────────────────────────────────────────────────────────────────┤\n│  CONDITION                           │ OVERRIDE TO              │\n│  ─────────────────────────────────────┼─────────────────────────│\n│  Context >100K tokens                │ → Gemini Pro (1M ctx)    │\n│  Context >500K tokens                │ → Gemini Pro ONLY        │\n│  Needs real-time data                │ → Grok (regardless)      │\n│  Image/vision input                  │ → Opus or Gemini Pro     │\n│  User explicit override              │ → Requested model        │\n└──────────────────────────────────────┴──────────────────────────┘"
      },
      {
        "title": "CODE Intent",
        "body": "Keywords: write, code, debug, fix, refactor, implement, function, class, script, API, bug, error, compile, test, PR, commit\nFile extensions mentioned: .py, .js, .ts, .go, .rs, .java, etc.\nCode blocks in input"
      },
      {
        "title": "ANALYSIS Intent",
        "body": "Keywords: analyze, explain, compare, research, understand, why, how does, evaluate, assess, review, investigate, examine\nLong-form questions\n\"Help me understand...\""
      },
      {
        "title": "CREATIVE Intent",
        "body": "Keywords: write (story/poem/essay), create, brainstorm, imagine, design, draft, compose\nFiction/narrative requests\nMarketing/copy requests"
      },
      {
        "title": "REALTIME Intent",
        "body": "Keywords: now, today, current, latest, trending, news, happening, live, price, score, weather\nX/Twitter mentions\nStock/crypto tickers\nSports scores"
      },
      {
        "title": "GENERAL Intent (Default)",
        "body": "Simple Q&A\nTranslations\nSummaries\nConversational"
      },
      {
        "title": "MIXED Intent (Multiple Intents Detected)",
        "body": "When a request contains multiple clear intents (e.g., \"Write code to analyze this data and explain it creatively\"):\n\nIdentify primary intent — What's the main deliverable?\nRoute to highest-capability model — Mixed tasks need versatility\nDefault to COMPLEX complexity — Multi-intent = multi-step\n\nExamples:\n\n\"Write code AND explain how it works\" → CODE (primary) + ANALYSIS → Route to Opus\n\"Summarize this AND what's the latest news on it\" → REALTIME takes precedence → Grok\n\"Creative story using real current events\" → REALTIME + CREATIVE → Grok (real-time wins)"
      },
      {
        "title": "Language Handling",
        "body": "Non-English requests are handled normally — all supported models have multilingual capabilities:\n\nModelNon-English SupportOpus/Sonnet/HaikuExcellent (100+ languages)GPT-5Excellent (100+ languages)Gemini Pro/FlashExcellent (100+ languages)GrokGood (major languages)\n\nIntent detection still works because:\n\nKeyword patterns include common non-English equivalents\nCode intent detected by file extensions, code blocks (language-agnostic)\nComplexity estimated by query length (works across languages)\n\nEdge case: If intent unclear due to language, default to GENERAL intent with MEDIUM complexity."
      },
      {
        "title": "Simple Complexity ($)",
        "body": "Short query (<50 words)\nSingle question mark\n\"Quick question\", \"Just tell me\", \"Briefly\"\nYes/no format\nUnit conversions, definitions"
      },
      {
        "title": "Medium Complexity ($$)",
        "body": "Moderate query (50-200 words)\nMultiple aspects to address\n\"Explain\", \"Describe\", \"Compare\"\nSome context provided"
      },
      {
        "title": "Complex Complexity ($$$)",
        "body": "Long query (>200 words) or complex task\n\"Step by step\", \"Thoroughly\", \"In detail\"\nMulti-part questions\nCritical/important qualifier\nResearch, analysis, or creative work"
      },
      {
        "title": "Routing Matrix",
        "body": "IntentSimpleMediumComplexCODESonnetOpusOpusANALYSISFlashGPT-5OpusCREATIVESonnetOpusOpusREALTIMEGrokGrokGrok-3GENERALFlashSonnetOpus"
      },
      {
        "title": "Token Exhaustion & Automatic Model Switching",
        "body": "When a model becomes unavailable mid-session (token quota exhausted, rate limit hit, API error), the router automatically switches to the next best available model and notifies the user."
      },
      {
        "title": "Notification Format",
        "body": "When a model switch occurs due to exhaustion, the user receives a notification:\n\n┌─────────────────────────────────────────────────────────────────┐\n│  ⚠️ MODEL SWITCH NOTICE                                         │\n│                                                                  │\n│  Your request could not be completed on claude-opus-4-5         │\n│  (reason: token quota exhausted).                               │\n│                                                                  │\n│  ✅ Request completed using: anthropic/claude-sonnet-4-5        │\n│                                                                  │\n│  The response below was generated by the fallback model.        │\n└─────────────────────────────────────────────────────────────────┘"
      },
      {
        "title": "Switch Reasons",
        "body": "ReasonDescriptiontoken quota exhaustedDaily/monthly token limit reachedrate limit exceededToo many requests per minutecontext window exceededInput too large for modelAPI timeoutModel took too long to respondAPI errorProvider returned an errormodel unavailableModel temporarily offline"
      },
      {
        "title": "Implementation",
        "body": "def execute_with_fallback(primary_model: str, fallback_chain: list[str], request: str) -> Response:\n    \"\"\"\n    Execute request with automatic fallback and user notification.\n    \"\"\"\n    attempted_models = []\n    switch_reason = None\n    \n    # Try primary model first\n    models_to_try = [primary_model] + fallback_chain\n    \n    for model in models_to_try:\n        try:\n            response = call_model(model, request)\n            \n            # If we switched models, prepend notification\n            if attempted_models:\n                notification = build_switch_notification(\n                    failed_model=attempted_models[0],\n                    reason=switch_reason,\n                    success_model=model\n                )\n                return Response(\n                    content=notification + \"\\n\\n---\\n\\n\" + response.content,\n                    model_used=model,\n                    switched=True\n                )\n            \n            return Response(content=response.content, model_used=model, switched=False)\n            \n        except TokenQuotaExhausted:\n            attempted_models.append(model)\n            switch_reason = \"token quota exhausted\"\n            log_fallback(model, switch_reason)\n            continue\n            \n        except RateLimitExceeded:\n            attempted_models.append(model)\n            switch_reason = \"rate limit exceeded\"\n            log_fallback(model, switch_reason)\n            continue\n            \n        except ContextWindowExceeded:\n            attempted_models.append(model)\n            switch_reason = \"context window exceeded\"\n            log_fallback(model, switch_reason)\n            continue\n            \n        except APITimeout:\n            attempted_models.append(model)\n            switch_reason = \"API timeout\"\n            log_fallback(model, switch_reason)\n            continue\n            \n        except APIError as e:\n            attempted_models.append(model)\n            switch_reason = f\"API error: {e.code}\"\n            log_fallback(model, switch_reason)\n            continue\n    \n    # All models exhausted\n    return build_exhaustion_error(attempted_models)\n\n\ndef build_switch_notification(failed_model: str, reason: str, success_model: str) -> str:\n    \"\"\"Build user-facing notification when model switch occurs.\"\"\"\n    return f\"\"\"⚠️ **MODEL SWITCH NOTICE**\n\nYour request could not be completed on `{failed_model}` (reason: {reason}).\n\n✅ **Request completed using:** `{success_model}`\n\nThe response below was generated by the fallback model.\"\"\"\n\n\ndef build_exhaustion_error(attempted_models: list[str]) -> Response:\n    \"\"\"Build error when all models are exhausted.\"\"\"\n    models_tried = \", \".join(attempted_models)\n    return Response(\n        content=f\"\"\"❌ **REQUEST FAILED**\n\nUnable to complete your request. All available models have been exhausted.\n\n**Models attempted:** {models_tried}\n\n**What you can do:**\n1. **Wait** — Token quotas typically reset hourly or daily\n2. **Simplify** — Try a shorter or simpler request\n3. **Check status** — Run `/router status` to see model availability\n\nIf this persists, your human may need to check API quotas or add additional providers.\"\"\",\n        model_used=None,\n        switched=False,\n        failed=True\n    )"
      },
      {
        "title": "Fallback Priority for Token Exhaustion",
        "body": "When a model is exhausted, the router selects the next best model for the same task type:\n\nOriginal ModelFallback Priority (same capability)OpusSonnet → GPT-5 → Grok-3 → Gemini ProSonnetGPT-5 → Grok-3 → Opus → HaikuGPT-5Sonnet → Opus → Grok-3 → Gemini ProGemini ProFlash → GPT-5 → Opus → SonnetGrok-2/3(warn: no real-time fallback available)"
      },
      {
        "title": "User Acknowledgment",
        "body": "After a model switch, the agent should note in the response that:\n\nThe original model was unavailable\nWhich model actually completed the request\nThe response quality may differ from the original model's typical output\n\nThis ensures transparency and sets appropriate expectations."
      },
      {
        "title": "Streaming Responses with Fallback",
        "body": "When using streaming responses, fallback handling requires special consideration:\n\nasync def execute_with_streaming_fallback(primary_model: str, fallback_chain: list[str], request: str):\n    \"\"\"\n    Handle streaming responses with mid-stream fallback.\n    \n    If a model fails DURING streaming (not before), the partial response is lost.\n    Strategy: Don't start streaming until first chunk received successfully.\n    \"\"\"\n    models_to_try = [primary_model] + fallback_chain\n    \n    for model in models_to_try:\n        try:\n            # Test with non-streaming ping first (optional, adds latency)\n            # await test_model_availability(model)\n            \n            # Start streaming\n            stream = await call_model_streaming(model, request)\n            first_chunk = await stream.get_first_chunk(timeout=10_000)  # 10s timeout for first chunk\n            \n            # If we got here, model is responding — continue streaming\n            yield first_chunk\n            async for chunk in stream:\n                yield chunk\n            return  # Success\n            \n        except (FirstChunkTimeout, StreamError) as e:\n            log_fallback(model, str(e))\n            continue  # Try next model\n    \n    # All models failed\n    yield build_exhaustion_error(models_to_try)\n\nKey insight: Wait for the first chunk before committing to a model. If the first chunk times out, fall back before any partial response is shown to the user."
      },
      {
        "title": "Retry Timing Configuration",
        "body": "RETRY_CONFIG = {\n    \"initial_timeout_ms\": 30_000,     # 30s for first attempt\n    \"fallback_timeout_ms\": 20_000,    # 20s for fallback attempts (faster fail)\n    \"max_retries_per_model\": 1,       # Don't retry same model\n    \"backoff_multiplier\": 1.5,        # Not used (no same-model retry)\n    \"circuit_breaker_threshold\": 3,   # Failures before skipping model entirely\n    \"circuit_breaker_reset_ms\": 300_000  # 5 min before trying failed model again\n}\n\nCircuit breaker: If a model fails 3 times in 5 minutes, skip it entirely for the next 5 minutes. This prevents repeatedly hitting a down service."
      },
      {
        "title": "Fallback Chains",
        "body": "When the preferred model fails (rate limit, API down, error), cascade to the next option:"
      },
      {
        "title": "Code Tasks",
        "body": "Opus → Sonnet → GPT-5 → Gemini Pro"
      },
      {
        "title": "Analysis Tasks",
        "body": "Opus → GPT-5 → Gemini Pro → Sonnet"
      },
      {
        "title": "Creative Tasks",
        "body": "Opus → GPT-5 → Sonnet → Gemini Pro"
      },
      {
        "title": "Real-time Tasks",
        "body": "Grok-2 → Grok-3 → (warn: no real-time fallback)"
      },
      {
        "title": "General Tasks",
        "body": "Flash → Haiku → Sonnet → GPT-5"
      },
      {
        "title": "Long Context (Tiered by Size)",
        "body": "┌─────────────────────────────────────────────────────────────────┐\n│                  LONG CONTEXT FALLBACK CHAIN                     │\n├─────────────────────────────────────────────────────────────────┤\n│  TOKEN COUNT        │ FALLBACK CHAIN                            │\n│  ───────────────────┼───────────────────────────────────────────│\n│  128K - 200K        │ Opus (200K) → Sonnet (200K) → Gemini Pro  │\n│  200K - 1M          │ Gemini Pro → Flash (1M) → ERROR_MESSAGE   │\n│  > 1M               │ ERROR_MESSAGE (no model supports this)    │\n└─────────────────────┴───────────────────────────────────────────┘\n\nImplementation:\n\ndef handle_long_context(token_count: int, available_models: dict) -> str | ErrorMessage:\n    \"\"\"Route long-context requests with graceful degradation.\"\"\"\n    \n    # Tier 1: 128K - 200K tokens (Opus/Sonnet can handle)\n    if token_count <= 200_000:\n        for model in [\"opus\", \"sonnet\", \"haiku\", \"gemini-pro\", \"flash\"]:\n            if model in available_models and get_context_limit(model) >= token_count:\n                return model\n    \n    # Tier 2: 200K - 1M tokens (only Gemini)\n    elif token_count <= 1_000_000:\n        for model in [\"gemini-pro\", \"flash\"]:\n            if model in available_models:\n                return model\n    \n    # Tier 3: > 1M tokens (nothing available)\n    # Fall through to error\n    \n    # No suitable model found — return helpful error\n    return build_context_error(token_count, available_models)\n\n\ndef build_context_error(token_count: int, available_models: dict) -> ErrorMessage:\n    \"\"\"Build a helpful error message when no model can handle the input.\"\"\"\n    \n    # Find the largest available context window\n    max_available = max(\n        (get_context_limit(m) for m in available_models),\n        default=0\n    )\n    \n    # Determine what's missing\n    missing_models = []\n    if \"gemini-pro\" not in available_models and \"flash\" not in available_models:\n        missing_models.append(\"Gemini Pro/Flash (1M context)\")\n    if token_count <= 200_000 and \"opus\" not in available_models:\n        missing_models.append(\"Opus (200K context)\")\n    \n    # Format token count for readability\n    if token_count >= 1_000_000:\n        token_display = f\"{token_count / 1_000_000:.1f}M\"\n    else:\n        token_display = f\"{token_count // 1000}K\"\n    \n    return ErrorMessage(\n        title=\"Context Window Exceeded\",\n        message=f\"\"\"Your input is approximately **{token_display} tokens**, which exceeds the context window of all currently available models.\n\n**Required:** Gemini Pro (1M context) {\"— currently unavailable\" if \"gemini-pro\" not in available_models else \"\"}\n**Your max available:** {max_available // 1000}K tokens\n\n**Options:**\n1. **Wait and retry** — Gemini may be temporarily down\n2. **Reduce input size** — Remove unnecessary content to fit within {max_available // 1000}K tokens\n3. **Split into chunks** — I can process your input sequentially in smaller pieces\n\nWould you like me to help split this into manageable chunks?\"\"\",\n        \n        recoverable=True,\n        suggested_action=\"split_chunks\"\n    )\n\nExample Error Output:\n\n⚠️ Context Window Exceeded\n\nYour input is approximately **340K tokens**, which exceeds the context \nwindow of all currently available models.\n\nRequired: Gemini Pro (1M context) — currently unavailable\nYour max available: 200K tokens\n\nOptions:\n1. Wait and retry — Gemini may be temporarily down\n2. Reduce input size — Remove unnecessary content to fit within 200K tokens\n3. Split into chunks — I can process your input sequentially in smaller pieces\n\nWould you like me to help split this into manageable chunks?"
      },
      {
        "title": "Dynamic Model Discovery",
        "body": "The router auto-detects available providers at runtime:\n\n1. Check configured auth profiles\n2. Build available model list from authenticated providers\n3. Construct routing table using ONLY available models\n4. If preferred model unavailable, use best available alternative\n\nExample: If only Anthropic and Google are configured:\n\nCode tasks → Opus (Anthropic available ✓)\nReal-time tasks → ⚠️ No Grok → Fall back to Opus + warn user\nLong docs → Gemini Pro (Google available ✓)"
      },
      {
        "title": "Cost Optimization",
        "body": "The router considers cost when complexity is LOW:\n\nModelCost TierUse WhenGemini Flash$Simple tasks, high volumeClaude Haiku$Simple tasks, quick responsesClaude Sonnet$$Medium complexityGrok 2$$Real-time needs onlyGPT-5$$General fallbackGemini Pro$$$Long context needsClaude Opus$$$$Complex/critical tasks\n\nRule: Never use Opus ($$$) for tasks that Flash ($) can handle."
      },
      {
        "title": "Show Routing Decision",
        "body": "Add [show routing] to any message:\n\n[show routing] What's the weather in NYC?\n\nOutput includes:\n\n[Routed → xai/grok-2-latest | Reason: REALTIME intent detected | Fallback: none available]"
      },
      {
        "title": "Force Specific Model",
        "body": "Explicit overrides:\n\n\"use grok: ...\" → Forces Grok\n\"use claude: ...\" → Forces Opus\n\"use gemini: ...\" → Forces Gemini Pro\n\"use flash: ...\" → Forces Gemini Flash\n\"use gpt: ...\" → Forces GPT-5"
      },
      {
        "title": "Check Router Status",
        "body": "Ask: \"router status\" or \"/router\" to see:\n\nAvailable providers\nConfigured models\nCurrent routing table\nRecent routing decisions"
      },
      {
        "title": "For Agent Implementation",
        "body": "When processing a request:\n\n1. DETECT available models (check auth profiles)\n2. CLASSIFY intent (code/analysis/creative/realtime/general)\n3. ESTIMATE complexity (simple/medium/complex)\n4. CHECK special cases (context size, vision, explicit override)\n5. FILTER by cost tier based on complexity ← BEFORE model selection\n6. SELECT model from filtered pool using routing matrix\n7. VERIFY model available, else use fallback chain (also cost-filtered)\n8. EXECUTE request with selected model\n9. IF failure, try next in fallback chain\n10. LOG routing decision (for debugging)"
      },
      {
        "title": "Cost-Aware Routing Flow (Critical Order)",
        "body": "def route_with_fallback(request):\n    \"\"\"\n    Main routing function with CORRECT execution order.\n    Cost filtering MUST happen BEFORE routing table lookup.\n    \"\"\"\n    \n    # Step 1: Discover available models\n    available_models = discover_providers()\n    \n    # Step 2: Classify intent\n    intent = classify_intent(request)\n    \n    # Step 3: Estimate complexity\n    complexity = estimate_complexity(request)\n    \n    # Step 4: Check special-case overrides (these bypass cost filtering)\n    if user_override := get_user_model_override(request):\n        return execute_with_fallback(user_override, [])  # No cost filter for explicit override\n    \n    if token_count > 128_000:\n        return handle_long_context(token_count, available_models)  # Special handling\n    \n    if needs_realtime(request):\n        return execute_with_fallback(\"grok-2\", [\"grok-3\"])  # Realtime bypasses cost\n    \n    # ┌─────────────────────────────────────────────────────────────┐\n    # │  STEP 5: FILTER BY COST TIER — THIS MUST COME FIRST!       │\n    # │                                                             │\n    # │  Cost filtering happens BEFORE the routing table lookup,   │\n    # │  NOT after. This ensures \"what's 2+2?\" never considers     │\n    # │  Opus even momentarily.                                    │\n    # └─────────────────────────────────────────────────────────────┘\n    \n    allowed_tiers = get_allowed_tiers(complexity)\n    # SIMPLE  → [\"$\"]\n    # MEDIUM  → [\"$\", \"$$\"]\n    # COMPLEX → [\"$\", \"$$\", \"$$$\"]\n    \n    cost_filtered_models = {\n        model: meta for model, meta in available_models.items()\n        if COST_TIERS.get(model) in allowed_tiers\n    }\n    \n    # Step 6: NOW select from cost-filtered pool using routing preferences\n    preferences = ROUTING_PREFERENCES.get((intent, complexity), [])\n    \n    for model in preferences:\n        if model in cost_filtered_models:  # Only consider cost-appropriate models\n            selected_model = model\n            break\n    else:\n        # No preferred model in cost-filtered pool — use cheapest available\n        selected_model = select_cheapest(cost_filtered_models)\n    \n    # Step 7: Build cost-filtered fallback chain\n    task_type = get_task_type(intent, complexity)\n    full_chain = MASTER_FALLBACK_CHAINS.get(task_type, [])\n    filtered_chain = [m for m in full_chain if m in cost_filtered_models and m != selected_model]\n    \n    # Step 8-10: Execute with fallback + logging\n    return execute_with_fallback(selected_model, filtered_chain)\n\n\ndef get_allowed_tiers(complexity: str) -> list[str]:\n    \"\"\"Return allowed cost tiers for a given complexity level.\"\"\"\n    return {\n        \"SIMPLE\":  [\"$\"],                      # Budget only — no exceptions\n        \"MEDIUM\":  [\"$\", \"$$\"],                # Budget + standard\n        \"COMPLEX\": [\"$\", \"$$\", \"$$$\", \"$$$$\"], # All tiers — complex tasks deserve the best\n    }.get(complexity, [\"$\", \"$$\"])\n\n\n# Example flow for \"what's 2+2?\":\n#\n# 1. available_models = {opus, sonnet, haiku, flash, grok-2, ...}\n# 2. intent = GENERAL\n# 3. complexity = SIMPLE\n# 4. (no special cases)\n# 5. allowed_tiers = [\"$\"]  ← SIMPLE means $ only\n#    cost_filtered_models = {haiku, flash, grok-2}  ← Opus/Sonnet EXCLUDED\n# 6. preferences for (GENERAL, SIMPLE) = [flash, haiku, grok-2, sonnet]\n#    first match in cost_filtered = flash ✓\n# 7. fallback_chain = [haiku, grok-2]  ← Also cost-filtered\n# 8. execute with flash\n#\n# Result: Opus is NEVER considered, not even momentarily."
      },
      {
        "title": "Cost Optimization: Two Approaches",
        "body": "┌─────────────────────────────────────────────────────────────────┐\n│           COST OPTIMIZATION IMPLEMENTATION OPTIONS               │\n├─────────────────────────────────────────────────────────────────┤\n│                                                                  │\n│  APPROACH 1: Explicit filter_by_cost() (shown above)            │\n│  ─────────────────────────────────────────────────────────────  │\n│  • Calls get_allowed_tiers(complexity) explicitly               │\n│  • Filters available_models BEFORE routing table lookup         │\n│  • Most defensive — impossible to route wrong tier              │\n│  • Recommended for security-critical deployments                │\n│                                                                  │\n│  APPROACH 2: Preference ordering (implicit)                     │\n│  ─────────────────────────────────────────────────────────────  │\n│  • ROUTING_PREFERENCES lists cheapest capable models first      │\n│  • For SIMPLE tasks: [flash, haiku, grok-2, sonnet]            │\n│  • First available match wins → naturally picks cheapest        │\n│  • Simpler code, relies on correct preference ordering          │\n│                                                                  │\n│  This implementation uses BOTH for defense-in-depth:            │\n│  • Preference ordering provides first line of cost awareness    │\n│  • Explicit filter_by_cost() guarantees tier enforcement        │\n│                                                                  │\n│  For alternative implementations that rely solely on            │\n│  preference ordering, see references/models.md for the          │\n│  filter_by_cost() function if explicit enforcement is needed.   │\n│                                                                  │\n└─────────────────────────────────────────────────────────────────┘"
      },
      {
        "title": "Spawning with Different Models",
        "body": "Use sessions_spawn for model routing:\n\nsessions_spawn(\n  task: \"user's request\",\n  model: \"selected/model-id\",\n  label: \"task-type-query\"\n)"
      },
      {
        "title": "Security",
        "body": "Never send sensitive data to untrusted models\nAPI keys handled via environment/auth profiles only\nSee references/security.md for full security guidance"
      },
      {
        "title": "Model Details",
        "body": "See references/models.md for detailed capabilities and pricing."
      }
    ],
    "body": "A.I. Smart-Router\n\nIntelligently route requests to the optimal AI model using tiered classification with automatic fallback handling and cost optimization.\n\nHow It Works (Silent by Default)\n\nThe router operates transparently—users send messages normally and get responses from the best model for their task. No special commands needed.\n\nOptional visibility: Include [show routing] in any message to see the routing decision.\n\nTiered Classification System\n\nThe router uses a three-tier decision process:\n\n┌─────────────────────────────────────────────────────────────────┐\n│                    TIER 1: INTENT DETECTION                      │\n│  Classify the primary purpose of the request                     │\n├─────────────────────────────────────────────────────────────────┤\n│  CODE        │ ANALYSIS    │ CREATIVE   │ REALTIME  │ GENERAL   │\n│  write/debug │ research    │ writing    │ news/live │ Q&A/chat  │\n│  refactor    │ explain     │ stories    │ X/Twitter │ translate │\n│  review      │ compare     │ brainstorm │ prices    │ summarize │\n└──────┬───────┴──────┬──────┴─────┬──────┴─────┬─────┴─────┬─────┘\n       │              │            │            │           │\n       ▼              ▼            ▼            ▼           ▼\n┌─────────────────────────────────────────────────────────────────┐\n│                  TIER 2: COMPLEXITY ESTIMATION                   │\n├─────────────────────────────────────────────────────────────────┤\n│  SIMPLE (Tier $)        │ MEDIUM (Tier $$)    │ COMPLEX (Tier $$$)│\n│  • One-step task        │ • Multi-step task   │ • Deep reasoning  │\n│  • Short response OK    │ • Some nuance       │ • Extensive output│\n│  • Factual lookup       │ • Moderate context  │ • Critical task   │\n│  → Haiku/Flash          │ → Sonnet/Grok/GPT   │ → Opus/GPT-5      │\n└──────────────────────────┴─────────────────────┴───────────────────┘\n                                    │\n                                    ▼\n┌─────────────────────────────────────────────────────────────────┐\n│                TIER 3: SPECIAL CASE OVERRIDES                    │\n├─────────────────────────────────────────────────────────────────┤\n│  CONDITION                           │ OVERRIDE TO              │\n│  ─────────────────────────────────────┼─────────────────────────│\n│  Context >100K tokens                │ → Gemini Pro (1M ctx)    │\n│  Context >500K tokens                │ → Gemini Pro ONLY        │\n│  Needs real-time data                │ → Grok (regardless)      │\n│  Image/vision input                  │ → Opus or Gemini Pro     │\n│  User explicit override              │ → Requested model        │\n└──────────────────────────────────────┴──────────────────────────┘\n\nIntent Detection Patterns\nCODE Intent\nKeywords: write, code, debug, fix, refactor, implement, function, class, script, API, bug, error, compile, test, PR, commit\nFile extensions mentioned: .py, .js, .ts, .go, .rs, .java, etc.\nCode blocks in input\nANALYSIS Intent\nKeywords: analyze, explain, compare, research, understand, why, how does, evaluate, assess, review, investigate, examine\nLong-form questions\n\"Help me understand...\"\nCREATIVE Intent\nKeywords: write (story/poem/essay), create, brainstorm, imagine, design, draft, compose\nFiction/narrative requests\nMarketing/copy requests\nREALTIME Intent\nKeywords: now, today, current, latest, trending, news, happening, live, price, score, weather\nX/Twitter mentions\nStock/crypto tickers\nSports scores\nGENERAL Intent (Default)\nSimple Q&A\nTranslations\nSummaries\nConversational\nMIXED Intent (Multiple Intents Detected)\n\nWhen a request contains multiple clear intents (e.g., \"Write code to analyze this data and explain it creatively\"):\n\nIdentify primary intent — What's the main deliverable?\nRoute to highest-capability model — Mixed tasks need versatility\nDefault to COMPLEX complexity — Multi-intent = multi-step\n\nExamples:\n\n\"Write code AND explain how it works\" → CODE (primary) + ANALYSIS → Route to Opus\n\"Summarize this AND what's the latest news on it\" → REALTIME takes precedence → Grok\n\"Creative story using real current events\" → REALTIME + CREATIVE → Grok (real-time wins)\nLanguage Handling\n\nNon-English requests are handled normally — all supported models have multilingual capabilities:\n\nModel\tNon-English Support\nOpus/Sonnet/Haiku\tExcellent (100+ languages)\nGPT-5\tExcellent (100+ languages)\nGemini Pro/Flash\tExcellent (100+ languages)\nGrok\tGood (major languages)\n\nIntent detection still works because:\n\nKeyword patterns include common non-English equivalents\nCode intent detected by file extensions, code blocks (language-agnostic)\nComplexity estimated by query length (works across languages)\n\nEdge case: If intent unclear due to language, default to GENERAL intent with MEDIUM complexity.\n\nComplexity Signals\nSimple Complexity ($)\nShort query (<50 words)\nSingle question mark\n\"Quick question\", \"Just tell me\", \"Briefly\"\nYes/no format\nUnit conversions, definitions\nMedium Complexity ($$)\nModerate query (50-200 words)\nMultiple aspects to address\n\"Explain\", \"Describe\", \"Compare\"\nSome context provided\nComplex Complexity ($$$)\nLong query (>200 words) or complex task\n\"Step by step\", \"Thoroughly\", \"In detail\"\nMulti-part questions\nCritical/important qualifier\nResearch, analysis, or creative work\nRouting Matrix\nIntent\tSimple\tMedium\tComplex\nCODE\tSonnet\tOpus\tOpus\nANALYSIS\tFlash\tGPT-5\tOpus\nCREATIVE\tSonnet\tOpus\tOpus\nREALTIME\tGrok\tGrok\tGrok-3\nGENERAL\tFlash\tSonnet\tOpus\nToken Exhaustion & Automatic Model Switching\n\nWhen a model becomes unavailable mid-session (token quota exhausted, rate limit hit, API error), the router automatically switches to the next best available model and notifies the user.\n\nNotification Format\n\nWhen a model switch occurs due to exhaustion, the user receives a notification:\n\n┌─────────────────────────────────────────────────────────────────┐\n│  ⚠️ MODEL SWITCH NOTICE                                         │\n│                                                                  │\n│  Your request could not be completed on claude-opus-4-5         │\n│  (reason: token quota exhausted).                               │\n│                                                                  │\n│  ✅ Request completed using: anthropic/claude-sonnet-4-5        │\n│                                                                  │\n│  The response below was generated by the fallback model.        │\n└─────────────────────────────────────────────────────────────────┘\n\nSwitch Reasons\nReason\tDescription\ntoken quota exhausted\tDaily/monthly token limit reached\nrate limit exceeded\tToo many requests per minute\ncontext window exceeded\tInput too large for model\nAPI timeout\tModel took too long to respond\nAPI error\tProvider returned an error\nmodel unavailable\tModel temporarily offline\nImplementation\ndef execute_with_fallback(primary_model: str, fallback_chain: list[str], request: str) -> Response:\n    \"\"\"\n    Execute request with automatic fallback and user notification.\n    \"\"\"\n    attempted_models = []\n    switch_reason = None\n    \n    # Try primary model first\n    models_to_try = [primary_model] + fallback_chain\n    \n    for model in models_to_try:\n        try:\n            response = call_model(model, request)\n            \n            # If we switched models, prepend notification\n            if attempted_models:\n                notification = build_switch_notification(\n                    failed_model=attempted_models[0],\n                    reason=switch_reason,\n                    success_model=model\n                )\n                return Response(\n                    content=notification + \"\\n\\n---\\n\\n\" + response.content,\n                    model_used=model,\n                    switched=True\n                )\n            \n            return Response(content=response.content, model_used=model, switched=False)\n            \n        except TokenQuotaExhausted:\n            attempted_models.append(model)\n            switch_reason = \"token quota exhausted\"\n            log_fallback(model, switch_reason)\n            continue\n            \n        except RateLimitExceeded:\n            attempted_models.append(model)\n            switch_reason = \"rate limit exceeded\"\n            log_fallback(model, switch_reason)\n            continue\n            \n        except ContextWindowExceeded:\n            attempted_models.append(model)\n            switch_reason = \"context window exceeded\"\n            log_fallback(model, switch_reason)\n            continue\n            \n        except APITimeout:\n            attempted_models.append(model)\n            switch_reason = \"API timeout\"\n            log_fallback(model, switch_reason)\n            continue\n            \n        except APIError as e:\n            attempted_models.append(model)\n            switch_reason = f\"API error: {e.code}\"\n            log_fallback(model, switch_reason)\n            continue\n    \n    # All models exhausted\n    return build_exhaustion_error(attempted_models)\n\n\ndef build_switch_notification(failed_model: str, reason: str, success_model: str) -> str:\n    \"\"\"Build user-facing notification when model switch occurs.\"\"\"\n    return f\"\"\"⚠️ **MODEL SWITCH NOTICE**\n\nYour request could not be completed on `{failed_model}` (reason: {reason}).\n\n✅ **Request completed using:** `{success_model}`\n\nThe response below was generated by the fallback model.\"\"\"\n\n\ndef build_exhaustion_error(attempted_models: list[str]) -> Response:\n    \"\"\"Build error when all models are exhausted.\"\"\"\n    models_tried = \", \".join(attempted_models)\n    return Response(\n        content=f\"\"\"❌ **REQUEST FAILED**\n\nUnable to complete your request. All available models have been exhausted.\n\n**Models attempted:** {models_tried}\n\n**What you can do:**\n1. **Wait** — Token quotas typically reset hourly or daily\n2. **Simplify** — Try a shorter or simpler request\n3. **Check status** — Run `/router status` to see model availability\n\nIf this persists, your human may need to check API quotas or add additional providers.\"\"\",\n        model_used=None,\n        switched=False,\n        failed=True\n    )\n\nFallback Priority for Token Exhaustion\n\nWhen a model is exhausted, the router selects the next best model for the same task type:\n\nOriginal Model\tFallback Priority (same capability)\nOpus\tSonnet → GPT-5 → Grok-3 → Gemini Pro\nSonnet\tGPT-5 → Grok-3 → Opus → Haiku\nGPT-5\tSonnet → Opus → Grok-3 → Gemini Pro\nGemini Pro\tFlash → GPT-5 → Opus → Sonnet\nGrok-2/3\t(warn: no real-time fallback available)\nUser Acknowledgment\n\nAfter a model switch, the agent should note in the response that:\n\nThe original model was unavailable\nWhich model actually completed the request\nThe response quality may differ from the original model's typical output\n\nThis ensures transparency and sets appropriate expectations.\n\nStreaming Responses with Fallback\n\nWhen using streaming responses, fallback handling requires special consideration:\n\nasync def execute_with_streaming_fallback(primary_model: str, fallback_chain: list[str], request: str):\n    \"\"\"\n    Handle streaming responses with mid-stream fallback.\n    \n    If a model fails DURING streaming (not before), the partial response is lost.\n    Strategy: Don't start streaming until first chunk received successfully.\n    \"\"\"\n    models_to_try = [primary_model] + fallback_chain\n    \n    for model in models_to_try:\n        try:\n            # Test with non-streaming ping first (optional, adds latency)\n            # await test_model_availability(model)\n            \n            # Start streaming\n            stream = await call_model_streaming(model, request)\n            first_chunk = await stream.get_first_chunk(timeout=10_000)  # 10s timeout for first chunk\n            \n            # If we got here, model is responding — continue streaming\n            yield first_chunk\n            async for chunk in stream:\n                yield chunk\n            return  # Success\n            \n        except (FirstChunkTimeout, StreamError) as e:\n            log_fallback(model, str(e))\n            continue  # Try next model\n    \n    # All models failed\n    yield build_exhaustion_error(models_to_try)\n\n\nKey insight: Wait for the first chunk before committing to a model. If the first chunk times out, fall back before any partial response is shown to the user.\n\nRetry Timing Configuration\nRETRY_CONFIG = {\n    \"initial_timeout_ms\": 30_000,     # 30s for first attempt\n    \"fallback_timeout_ms\": 20_000,    # 20s for fallback attempts (faster fail)\n    \"max_retries_per_model\": 1,       # Don't retry same model\n    \"backoff_multiplier\": 1.5,        # Not used (no same-model retry)\n    \"circuit_breaker_threshold\": 3,   # Failures before skipping model entirely\n    \"circuit_breaker_reset_ms\": 300_000  # 5 min before trying failed model again\n}\n\n\nCircuit breaker: If a model fails 3 times in 5 minutes, skip it entirely for the next 5 minutes. This prevents repeatedly hitting a down service.\n\nFallback Chains\n\nWhen the preferred model fails (rate limit, API down, error), cascade to the next option:\n\nCode Tasks\nOpus → Sonnet → GPT-5 → Gemini Pro\n\nAnalysis Tasks\nOpus → GPT-5 → Gemini Pro → Sonnet\n\nCreative Tasks\nOpus → GPT-5 → Sonnet → Gemini Pro\n\nReal-time Tasks\nGrok-2 → Grok-3 → (warn: no real-time fallback)\n\nGeneral Tasks\nFlash → Haiku → Sonnet → GPT-5\n\nLong Context (Tiered by Size)\n┌─────────────────────────────────────────────────────────────────┐\n│                  LONG CONTEXT FALLBACK CHAIN                     │\n├─────────────────────────────────────────────────────────────────┤\n│  TOKEN COUNT        │ FALLBACK CHAIN                            │\n│  ───────────────────┼───────────────────────────────────────────│\n│  128K - 200K        │ Opus (200K) → Sonnet (200K) → Gemini Pro  │\n│  200K - 1M          │ Gemini Pro → Flash (1M) → ERROR_MESSAGE   │\n│  > 1M               │ ERROR_MESSAGE (no model supports this)    │\n└─────────────────────┴───────────────────────────────────────────┘\n\n\nImplementation:\n\ndef handle_long_context(token_count: int, available_models: dict) -> str | ErrorMessage:\n    \"\"\"Route long-context requests with graceful degradation.\"\"\"\n    \n    # Tier 1: 128K - 200K tokens (Opus/Sonnet can handle)\n    if token_count <= 200_000:\n        for model in [\"opus\", \"sonnet\", \"haiku\", \"gemini-pro\", \"flash\"]:\n            if model in available_models and get_context_limit(model) >= token_count:\n                return model\n    \n    # Tier 2: 200K - 1M tokens (only Gemini)\n    elif token_count <= 1_000_000:\n        for model in [\"gemini-pro\", \"flash\"]:\n            if model in available_models:\n                return model\n    \n    # Tier 3: > 1M tokens (nothing available)\n    # Fall through to error\n    \n    # No suitable model found — return helpful error\n    return build_context_error(token_count, available_models)\n\n\ndef build_context_error(token_count: int, available_models: dict) -> ErrorMessage:\n    \"\"\"Build a helpful error message when no model can handle the input.\"\"\"\n    \n    # Find the largest available context window\n    max_available = max(\n        (get_context_limit(m) for m in available_models),\n        default=0\n    )\n    \n    # Determine what's missing\n    missing_models = []\n    if \"gemini-pro\" not in available_models and \"flash\" not in available_models:\n        missing_models.append(\"Gemini Pro/Flash (1M context)\")\n    if token_count <= 200_000 and \"opus\" not in available_models:\n        missing_models.append(\"Opus (200K context)\")\n    \n    # Format token count for readability\n    if token_count >= 1_000_000:\n        token_display = f\"{token_count / 1_000_000:.1f}M\"\n    else:\n        token_display = f\"{token_count // 1000}K\"\n    \n    return ErrorMessage(\n        title=\"Context Window Exceeded\",\n        message=f\"\"\"Your input is approximately **{token_display} tokens**, which exceeds the context window of all currently available models.\n\n**Required:** Gemini Pro (1M context) {\"— currently unavailable\" if \"gemini-pro\" not in available_models else \"\"}\n**Your max available:** {max_available // 1000}K tokens\n\n**Options:**\n1. **Wait and retry** — Gemini may be temporarily down\n2. **Reduce input size** — Remove unnecessary content to fit within {max_available // 1000}K tokens\n3. **Split into chunks** — I can process your input sequentially in smaller pieces\n\nWould you like me to help split this into manageable chunks?\"\"\",\n        \n        recoverable=True,\n        suggested_action=\"split_chunks\"\n    )\n\n\nExample Error Output:\n\n⚠️ Context Window Exceeded\n\nYour input is approximately **340K tokens**, which exceeds the context \nwindow of all currently available models.\n\nRequired: Gemini Pro (1M context) — currently unavailable\nYour max available: 200K tokens\n\nOptions:\n1. Wait and retry — Gemini may be temporarily down\n2. Reduce input size — Remove unnecessary content to fit within 200K tokens\n3. Split into chunks — I can process your input sequentially in smaller pieces\n\nWould you like me to help split this into manageable chunks?\n\nDynamic Model Discovery\n\nThe router auto-detects available providers at runtime:\n\n1. Check configured auth profiles\n2. Build available model list from authenticated providers\n3. Construct routing table using ONLY available models\n4. If preferred model unavailable, use best available alternative\n\n\nExample: If only Anthropic and Google are configured:\n\nCode tasks → Opus (Anthropic available ✓)\nReal-time tasks → ⚠️ No Grok → Fall back to Opus + warn user\nLong docs → Gemini Pro (Google available ✓)\nCost Optimization\n\nThe router considers cost when complexity is LOW:\n\nModel\tCost Tier\tUse When\nGemini Flash\t$\tSimple tasks, high volume\nClaude Haiku\t$\tSimple tasks, quick responses\nClaude Sonnet\t$$\tMedium complexity\nGrok 2\t$$\tReal-time needs only\nGPT-5\t$$\tGeneral fallback\nGemini Pro\t$$$\tLong context needs\nClaude Opus\t$$$$\tComplex/critical tasks\n\nRule: Never use Opus ($$$) for tasks that Flash ($) can handle.\n\nUser Controls\nShow Routing Decision\n\nAdd [show routing] to any message:\n\n[show routing] What's the weather in NYC?\n\n\nOutput includes:\n\n[Routed → xai/grok-2-latest | Reason: REALTIME intent detected | Fallback: none available]\n\nForce Specific Model\n\nExplicit overrides:\n\n\"use grok: ...\" → Forces Grok\n\"use claude: ...\" → Forces Opus\n\"use gemini: ...\" → Forces Gemini Pro\n\"use flash: ...\" → Forces Gemini Flash\n\"use gpt: ...\" → Forces GPT-5\nCheck Router Status\n\nAsk: \"router status\" or \"/router\" to see:\n\nAvailable providers\nConfigured models\nCurrent routing table\nRecent routing decisions\nImplementation Notes\nFor Agent Implementation\n\nWhen processing a request:\n\n1. DETECT available models (check auth profiles)\n2. CLASSIFY intent (code/analysis/creative/realtime/general)\n3. ESTIMATE complexity (simple/medium/complex)\n4. CHECK special cases (context size, vision, explicit override)\n5. FILTER by cost tier based on complexity ← BEFORE model selection\n6. SELECT model from filtered pool using routing matrix\n7. VERIFY model available, else use fallback chain (also cost-filtered)\n8. EXECUTE request with selected model\n9. IF failure, try next in fallback chain\n10. LOG routing decision (for debugging)\n\nCost-Aware Routing Flow (Critical Order)\ndef route_with_fallback(request):\n    \"\"\"\n    Main routing function with CORRECT execution order.\n    Cost filtering MUST happen BEFORE routing table lookup.\n    \"\"\"\n    \n    # Step 1: Discover available models\n    available_models = discover_providers()\n    \n    # Step 2: Classify intent\n    intent = classify_intent(request)\n    \n    # Step 3: Estimate complexity\n    complexity = estimate_complexity(request)\n    \n    # Step 4: Check special-case overrides (these bypass cost filtering)\n    if user_override := get_user_model_override(request):\n        return execute_with_fallback(user_override, [])  # No cost filter for explicit override\n    \n    if token_count > 128_000:\n        return handle_long_context(token_count, available_models)  # Special handling\n    \n    if needs_realtime(request):\n        return execute_with_fallback(\"grok-2\", [\"grok-3\"])  # Realtime bypasses cost\n    \n    # ┌─────────────────────────────────────────────────────────────┐\n    # │  STEP 5: FILTER BY COST TIER — THIS MUST COME FIRST!       │\n    # │                                                             │\n    # │  Cost filtering happens BEFORE the routing table lookup,   │\n    # │  NOT after. This ensures \"what's 2+2?\" never considers     │\n    # │  Opus even momentarily.                                    │\n    # └─────────────────────────────────────────────────────────────┘\n    \n    allowed_tiers = get_allowed_tiers(complexity)\n    # SIMPLE  → [\"$\"]\n    # MEDIUM  → [\"$\", \"$$\"]\n    # COMPLEX → [\"$\", \"$$\", \"$$$\"]\n    \n    cost_filtered_models = {\n        model: meta for model, meta in available_models.items()\n        if COST_TIERS.get(model) in allowed_tiers\n    }\n    \n    # Step 6: NOW select from cost-filtered pool using routing preferences\n    preferences = ROUTING_PREFERENCES.get((intent, complexity), [])\n    \n    for model in preferences:\n        if model in cost_filtered_models:  # Only consider cost-appropriate models\n            selected_model = model\n            break\n    else:\n        # No preferred model in cost-filtered pool — use cheapest available\n        selected_model = select_cheapest(cost_filtered_models)\n    \n    # Step 7: Build cost-filtered fallback chain\n    task_type = get_task_type(intent, complexity)\n    full_chain = MASTER_FALLBACK_CHAINS.get(task_type, [])\n    filtered_chain = [m for m in full_chain if m in cost_filtered_models and m != selected_model]\n    \n    # Step 8-10: Execute with fallback + logging\n    return execute_with_fallback(selected_model, filtered_chain)\n\n\ndef get_allowed_tiers(complexity: str) -> list[str]:\n    \"\"\"Return allowed cost tiers for a given complexity level.\"\"\"\n    return {\n        \"SIMPLE\":  [\"$\"],                      # Budget only — no exceptions\n        \"MEDIUM\":  [\"$\", \"$$\"],                # Budget + standard\n        \"COMPLEX\": [\"$\", \"$$\", \"$$$\", \"$$$$\"], # All tiers — complex tasks deserve the best\n    }.get(complexity, [\"$\", \"$$\"])\n\n\n# Example flow for \"what's 2+2?\":\n#\n# 1. available_models = {opus, sonnet, haiku, flash, grok-2, ...}\n# 2. intent = GENERAL\n# 3. complexity = SIMPLE\n# 4. (no special cases)\n# 5. allowed_tiers = [\"$\"]  ← SIMPLE means $ only\n#    cost_filtered_models = {haiku, flash, grok-2}  ← Opus/Sonnet EXCLUDED\n# 6. preferences for (GENERAL, SIMPLE) = [flash, haiku, grok-2, sonnet]\n#    first match in cost_filtered = flash ✓\n# 7. fallback_chain = [haiku, grok-2]  ← Also cost-filtered\n# 8. execute with flash\n#\n# Result: Opus is NEVER considered, not even momentarily.\n\nCost Optimization: Two Approaches\n┌─────────────────────────────────────────────────────────────────┐\n│           COST OPTIMIZATION IMPLEMENTATION OPTIONS               │\n├─────────────────────────────────────────────────────────────────┤\n│                                                                  │\n│  APPROACH 1: Explicit filter_by_cost() (shown above)            │\n│  ─────────────────────────────────────────────────────────────  │\n│  • Calls get_allowed_tiers(complexity) explicitly               │\n│  • Filters available_models BEFORE routing table lookup         │\n│  • Most defensive — impossible to route wrong tier              │\n│  • Recommended for security-critical deployments                │\n│                                                                  │\n│  APPROACH 2: Preference ordering (implicit)                     │\n│  ─────────────────────────────────────────────────────────────  │\n│  • ROUTING_PREFERENCES lists cheapest capable models first      │\n│  • For SIMPLE tasks: [flash, haiku, grok-2, sonnet]            │\n│  • First available match wins → naturally picks cheapest        │\n│  • Simpler code, relies on correct preference ordering          │\n│                                                                  │\n│  This implementation uses BOTH for defense-in-depth:            │\n│  • Preference ordering provides first line of cost awareness    │\n│  • Explicit filter_by_cost() guarantees tier enforcement        │\n│                                                                  │\n│  For alternative implementations that rely solely on            │\n│  preference ordering, see references/models.md for the          │\n│  filter_by_cost() function if explicit enforcement is needed.   │\n│                                                                  │\n└─────────────────────────────────────────────────────────────────┘\n\nSpawning with Different Models\n\nUse sessions_spawn for model routing:\n\nsessions_spawn(\n  task: \"user's request\",\n  model: \"selected/model-id\",\n  label: \"task-type-query\"\n)\n\nSecurity\nNever send sensitive data to untrusted models\nAPI keys handled via environment/auth profiles only\nSee references/security.md for full security guidance\nModel Details\n\nSee references/models.md for detailed capabilities and pricing."
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/c0nSpIc0uS7uRk3r/smart-router",
    "publisherUrl": "https://clawhub.ai/c0nSpIc0uS7uRk3r/smart-router",
    "owner": "c0nSpIc0uS7uRk3r",
    "version": "0.1.2",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/smart-router",
    "downloadUrl": "https://openagent3.xyz/downloads/smart-router",
    "agentUrl": "https://openagent3.xyz/skills/smart-router/agent",
    "manifestUrl": "https://openagent3.xyz/skills/smart-router/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/smart-router/agent.md"
  }
}