{
  "schemaVersion": "1.0",
  "item": {
    "slug": "anydocs",
    "name": "anydocs - Generic Documentation Indexing & Search",
    "source": "tencent",
    "type": "skill",
    "category": "效率提升",
    "sourceUrl": "https://clawhub.ai/Pektech/anydocs",
    "canonicalUrl": "https://clawhub.ai/Pektech/anydocs",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/anydocs",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=anydocs",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "anydocs.py",
      "cli.py",
      "setup.sh",
      "manifest.json",
      "README.md",
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/anydocs"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/anydocs",
    "agentPageUrl": "https://openagent3.xyz/skills/anydocs/agent",
    "manifestUrl": "https://openagent3.xyz/skills/anydocs/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/anydocs/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "anydocs - Generic Documentation Indexing & Search",
        "body": "A powerful, reusable skill for indexing and searching ANY documentation site."
      },
      {
        "title": "What It Does",
        "body": "anydocs solves a real problem: accessing documentation from code or CLI. Instead of opening a browser every time, you can:\n\nIndex any documentation site (Discord, OpenClaw, internal docs, etc.)\nSearch instantly from the command line or Python API\nCache pages locally to avoid repeated network calls\nConfigure multiple profiles for different doc sites"
      },
      {
        "title": "When to Use It",
        "body": "Use anydocs when you need to:\n\nQuickly look up API documentation without leaving the terminal\nBuild agents that need to reference docs\nExtract specific information from documentation\nSearch across multiple documentation sites\nIntegrate docs into your workflow"
      },
      {
        "title": "🔍 Multi-Method Search",
        "body": "Keyword search: Fast, term-based matching with BM25-style scoring\nHybrid search: Keyword + phrase proximity for better relevance\nRegex search: Advanced pattern matching for power users"
      },
      {
        "title": "🌐 Works with Any Docs Site",
        "body": "Sitemap-based discovery (standard XML sitemap)\nFallback crawling from base URL\nHTML content extraction with smart selector detection\nAutomatic rate limiting to be respectful"
      },
      {
        "title": "💾 Smart Caching",
        "body": "Pages cached locally with 7-day TTL (configurable)\nSearch indexes cached for instant second searches\nCache statistics and cleanup commands\nRespects cache invalidation"
      },
      {
        "title": "⚙️ Profile-Based Configuration",
        "body": "Support multiple doc sites simultaneously\nPer-profile search methods and cache TTLs\nConfiguration stored in ~/.anydocs/config.json\nExamples for Discord, OpenClaw, and custom sites"
      },
      {
        "title": "🌐 JavaScript Rendering (Optional)",
        "body": "Uses Playwright to render client-side SPAs (Single Page Apps)\nAutomatically discovers links on JS-heavy sites like Discord docs\nGracefully falls back to standard HTTP if Playwright unavailable\nConfigure per-discovery session or globally per profile"
      },
      {
        "title": "Installation",
        "body": "cd /path/to/skills/anydocs\npip install -r requirements.txt\nchmod +x anydocs.py"
      },
      {
        "title": "Optional: Browser-based rendering (for JavaScript-heavy sites)",
        "body": "For sites like Discord that use client-side rendering, install Playwright:\n\npip install playwright==1.40.0\nplaywright install  # Downloads Chromium\n\nIf Playwright is unavailable, anydocs gracefully falls back to standard HTTP fetching."
      },
      {
        "title": "1. Configure a Documentation Site",
        "body": "python anydocs.py config vuejs \\\n  https://vuejs.org \\\n  https://vuejs.org/sitemap.xml"
      },
      {
        "title": "2. Build the Index",
        "body": "python anydocs.py index vuejs\n\nThis discovers all pages via sitemap, scrapes content, and builds a searchable index."
      },
      {
        "title": "3. Search",
        "body": "python anydocs.py search \"composition api\" --profile vuejs\npython anydocs.py search \"reactivity\" --profile vuejs --limit 5"
      },
      {
        "title": "4. Fetch a Specific Page",
        "body": "python anydocs.py fetch \"guide/introduction\" --profile vuejs"
      },
      {
        "title": "Configuration",
        "body": "# Add or update a profile\nanydocs config <profile> <base_url> <sitemap_url> [--search-method hybrid] [--ttl-days 7]\n\n# List configured profiles\nanydocs list-profiles"
      },
      {
        "title": "Indexing",
        "body": "# Build index for a profile\nanydocs index <profile>\n\n# Force re-index (skip cache)\nanydocs index <profile> --force"
      },
      {
        "title": "Search",
        "body": "# Basic keyword search\nanydocs search \"query\" --profile discord\n\n# Limit results\nanydocs search \"query\" --profile discord --limit 5\n\n# Regex search\nanydocs search \"^API\" --profile discord --regex"
      },
      {
        "title": "Fetch",
        "body": "# Fetch a specific page (URL or path)\nanydocs fetch \"https://discord.com/developers/docs/resources/webhook\"\nanydocs fetch \"resources/webhook\" --profile discord"
      },
      {
        "title": "Cache Management",
        "body": "# Show cache statistics\nanydocs cache status\n\n# Clear all cache\nanydocs cache clear\n\n# Clear specific profile's cache\nanydocs cache clear --profile discord"
      },
      {
        "title": "Python API",
        "body": "For use in agents and scripts:\n\nfrom lib.config import ConfigManager\nfrom lib.scraper import DiscoveryEngine\nfrom lib.indexer import SearchIndex\n\n# Load configuration\nconfig_mgr = ConfigManager()\nconfig = config_mgr.get_profile(\"discord\")\n\n# Scrape documentation\nscraper = DiscoveryEngine(config[\"base_url\"], config[\"sitemap_url\"])\npages = scraper.fetch_all()\n\n# Build search index\nindex = SearchIndex()\nindex.build(pages)\n\n# Search\nresults = index.search(\"webhooks\", limit=10)\nfor result in results:\n    print(f\"{result['title']} ({result['relevance_score']})\")\n    print(f\"  {result['url']}\")"
      },
      {
        "title": "Configuration File Format",
        "body": "Configuration is stored in ~/.anydocs/config.json:\n\n{\n  \"discord\": {\n    \"name\": \"discord\",\n    \"base_url\": \"https://discord.com/developers/docs\",\n    \"sitemap_url\": \"https://discord.com/developers/docs/sitemap.xml\",\n    \"search_method\": \"hybrid\",\n    \"cache_ttl_days\": 7\n  },\n  \"openclaw\": {\n    \"name\": \"openclaw\",\n    \"base_url\": \"https://docs.openclaw.ai\",\n    \"sitemap_url\": \"https://docs.openclaw.ai/sitemap.xml\",\n    \"search_method\": \"hybrid\",\n    \"cache_ttl_days\": 7\n  }\n}"
      },
      {
        "title": "Keyword Search",
        "body": "Speed: Fast\nBest for: Common terms, exact matches\nHow it works: Term matching with position weighting (title > tags > content)\nExample: anydocs search \"webhooks\""
      },
      {
        "title": "Hybrid Search (Default)",
        "body": "Speed: Fast\nBest for: Natural language queries\nHow it works: Keyword search + phrase proximity scoring\nExample: anydocs search \"how to set up webhooks\""
      },
      {
        "title": "Regex Search",
        "body": "Speed: Medium\nBest for: Complex patterns\nHow it works: Compiled regex pattern matching across all content\nExample: anydocs search \"^(GET|POST)\" --regex"
      },
      {
        "title": "Caching Behavior",
        "body": "Pages: Cached as JSON with 7-day TTL (configurable)\nIndexes: Cached after indexing, invalidated on TTL expiry\nCache location: ~/.anydocs/cache/\nManual refresh: Use --force flag or clear cache"
      },
      {
        "title": "Performance Notes",
        "body": "First index build takes 2-10 minutes depending on site size\nSubsequent searches are instant (cached indexes)\nRate limit: 0.5s per page to be respectful\nTypical search returns ~100 results in <100ms"
      },
      {
        "title": "\"No index for 'profile'\" error",
        "body": "Run anydocs index <profile> first to build the index."
      },
      {
        "title": "Sitemap not found",
        "body": "Check the sitemap URL. Falls back to crawling from base_url if unavailable."
      },
      {
        "title": "Slow indexing",
        "body": "This is normal for large sites. Rate limiting prevents overwhelming servers."
      },
      {
        "title": "Cache grows too large",
        "body": "Run anydocs cache clear or set --ttl-days to a smaller value."
      },
      {
        "title": "Vue.js Framework Docs (SPA Example)",
        "body": "anydocs config vuejs \\\n  https://vuejs.org \\\n  https://vuejs.org/sitemap.xml\nanydocs index vuejs\nanydocs search \"composition api\""
      },
      {
        "title": "Next.js API Docs",
        "body": "anydocs config nextjs \\\n  https://nextjs.org \\\n  https://nextjs.org/sitemap.xml\nanydocs index nextjs\nanydocs search \"app router\" --profile nextjs"
      },
      {
        "title": "Internal Company Documentation",
        "body": "anydocs config internal \\\n  https://docs.company.local \\\n  https://docs.company.local/sitemap.xml\nanydocs index internal --force\nanydocs search \"deployment\" --profile internal"
      },
      {
        "title": "Architecture",
        "body": "scraper.py: Discovers URLs via sitemap, fetches and parses HTML\nindexer.py: Builds searchable indexes, implements multiple search strategies\nconfig.py: Manages configuration profiles\ncache.py: TTL-based file caching for pages and indexes\ncli.py: Click-based command-line interface"
      },
      {
        "title": "Contributing",
        "body": "To add new documentation sites, run:\n\nanydocs config <profile> <base_url> <sitemap_url>\n\nTo extend search functionality, modify lib/indexer.py."
      },
      {
        "title": "License",
        "body": "Part of the OpenClaw system."
      }
    ],
    "body": "anydocs - Generic Documentation Indexing & Search\n\nA powerful, reusable skill for indexing and searching ANY documentation site.\n\nWhat It Does\n\nanydocs solves a real problem: accessing documentation from code or CLI. Instead of opening a browser every time, you can:\n\nIndex any documentation site (Discord, OpenClaw, internal docs, etc.)\nSearch instantly from the command line or Python API\nCache pages locally to avoid repeated network calls\nConfigure multiple profiles for different doc sites\nWhen to Use It\n\nUse anydocs when you need to:\n\nQuickly look up API documentation without leaving the terminal\nBuild agents that need to reference docs\nExtract specific information from documentation\nSearch across multiple documentation sites\nIntegrate docs into your workflow\nKey Features\n🔍 Multi-Method Search\nKeyword search: Fast, term-based matching with BM25-style scoring\nHybrid search: Keyword + phrase proximity for better relevance\nRegex search: Advanced pattern matching for power users\n🌐 Works with Any Docs Site\nSitemap-based discovery (standard XML sitemap)\nFallback crawling from base URL\nHTML content extraction with smart selector detection\nAutomatic rate limiting to be respectful\n💾 Smart Caching\nPages cached locally with 7-day TTL (configurable)\nSearch indexes cached for instant second searches\nCache statistics and cleanup commands\nRespects cache invalidation\n⚙️ Profile-Based Configuration\nSupport multiple doc sites simultaneously\nPer-profile search methods and cache TTLs\nConfiguration stored in ~/.anydocs/config.json\nExamples for Discord, OpenClaw, and custom sites\n🌐 JavaScript Rendering (Optional)\nUses Playwright to render client-side SPAs (Single Page Apps)\nAutomatically discovers links on JS-heavy sites like Discord docs\nGracefully falls back to standard HTTP if Playwright unavailable\nConfigure per-discovery session or globally per profile\nInstallation\ncd /path/to/skills/anydocs\npip install -r requirements.txt\nchmod +x anydocs.py\n\nOptional: Browser-based rendering (for JavaScript-heavy sites)\n\nFor sites like Discord that use client-side rendering, install Playwright:\n\npip install playwright==1.40.0\nplaywright install  # Downloads Chromium\n\n\nIf Playwright is unavailable, anydocs gracefully falls back to standard HTTP fetching.\n\nQuick Start\n1. Configure a Documentation Site\npython anydocs.py config vuejs \\\n  https://vuejs.org \\\n  https://vuejs.org/sitemap.xml\n\n2. Build the Index\npython anydocs.py index vuejs\n\n\nThis discovers all pages via sitemap, scrapes content, and builds a searchable index.\n\n3. Search\npython anydocs.py search \"composition api\" --profile vuejs\npython anydocs.py search \"reactivity\" --profile vuejs --limit 5\n\n4. Fetch a Specific Page\npython anydocs.py fetch \"guide/introduction\" --profile vuejs\n\nCLI Commands\nConfiguration\n# Add or update a profile\nanydocs config <profile> <base_url> <sitemap_url> [--search-method hybrid] [--ttl-days 7]\n\n# List configured profiles\nanydocs list-profiles\n\nIndexing\n# Build index for a profile\nanydocs index <profile>\n\n# Force re-index (skip cache)\nanydocs index <profile> --force\n\nSearch\n# Basic keyword search\nanydocs search \"query\" --profile discord\n\n# Limit results\nanydocs search \"query\" --profile discord --limit 5\n\n# Regex search\nanydocs search \"^API\" --profile discord --regex\n\nFetch\n# Fetch a specific page (URL or path)\nanydocs fetch \"https://discord.com/developers/docs/resources/webhook\"\nanydocs fetch \"resources/webhook\" --profile discord\n\nCache Management\n# Show cache statistics\nanydocs cache status\n\n# Clear all cache\nanydocs cache clear\n\n# Clear specific profile's cache\nanydocs cache clear --profile discord\n\nPython API\n\nFor use in agents and scripts:\n\nfrom lib.config import ConfigManager\nfrom lib.scraper import DiscoveryEngine\nfrom lib.indexer import SearchIndex\n\n# Load configuration\nconfig_mgr = ConfigManager()\nconfig = config_mgr.get_profile(\"discord\")\n\n# Scrape documentation\nscraper = DiscoveryEngine(config[\"base_url\"], config[\"sitemap_url\"])\npages = scraper.fetch_all()\n\n# Build search index\nindex = SearchIndex()\nindex.build(pages)\n\n# Search\nresults = index.search(\"webhooks\", limit=10)\nfor result in results:\n    print(f\"{result['title']} ({result['relevance_score']})\")\n    print(f\"  {result['url']}\")\n\nConfiguration File Format\n\nConfiguration is stored in ~/.anydocs/config.json:\n\n{\n  \"discord\": {\n    \"name\": \"discord\",\n    \"base_url\": \"https://discord.com/developers/docs\",\n    \"sitemap_url\": \"https://discord.com/developers/docs/sitemap.xml\",\n    \"search_method\": \"hybrid\",\n    \"cache_ttl_days\": 7\n  },\n  \"openclaw\": {\n    \"name\": \"openclaw\",\n    \"base_url\": \"https://docs.openclaw.ai\",\n    \"sitemap_url\": \"https://docs.openclaw.ai/sitemap.xml\",\n    \"search_method\": \"hybrid\",\n    \"cache_ttl_days\": 7\n  }\n}\n\nSearch Methods\nKeyword Search\nSpeed: Fast\nBest for: Common terms, exact matches\nHow it works: Term matching with position weighting (title > tags > content)\nExample: anydocs search \"webhooks\"\nHybrid Search (Default)\nSpeed: Fast\nBest for: Natural language queries\nHow it works: Keyword search + phrase proximity scoring\nExample: anydocs search \"how to set up webhooks\"\nRegex Search\nSpeed: Medium\nBest for: Complex patterns\nHow it works: Compiled regex pattern matching across all content\nExample: anydocs search \"^(GET|POST)\" --regex\nCaching Behavior\nPages: Cached as JSON with 7-day TTL (configurable)\nIndexes: Cached after indexing, invalidated on TTL expiry\nCache location: ~/.anydocs/cache/\nManual refresh: Use --force flag or clear cache\nPerformance Notes\nFirst index build takes 2-10 minutes depending on site size\nSubsequent searches are instant (cached indexes)\nRate limit: 0.5s per page to be respectful\nTypical search returns ~100 results in <100ms\nTroubleshooting\n\"No index for 'profile'\" error\n\nRun anydocs index <profile> first to build the index.\n\nSitemap not found\n\nCheck the sitemap URL. Falls back to crawling from base_url if unavailable.\n\nSlow indexing\n\nThis is normal for large sites. Rate limiting prevents overwhelming servers.\n\nCache grows too large\n\nRun anydocs cache clear or set --ttl-days to a smaller value.\n\nExamples\nVue.js Framework Docs (SPA Example)\nanydocs config vuejs \\\n  https://vuejs.org \\\n  https://vuejs.org/sitemap.xml\nanydocs index vuejs\nanydocs search \"composition api\"\n\nNext.js API Docs\nanydocs config nextjs \\\n  https://nextjs.org \\\n  https://nextjs.org/sitemap.xml\nanydocs index nextjs\nanydocs search \"app router\" --profile nextjs\n\nInternal Company Documentation\nanydocs config internal \\\n  https://docs.company.local \\\n  https://docs.company.local/sitemap.xml\nanydocs index internal --force\nanydocs search \"deployment\" --profile internal\n\nArchitecture\nscraper.py: Discovers URLs via sitemap, fetches and parses HTML\nindexer.py: Builds searchable indexes, implements multiple search strategies\nconfig.py: Manages configuration profiles\ncache.py: TTL-based file caching for pages and indexes\ncli.py: Click-based command-line interface\nContributing\n\nTo add new documentation sites, run:\n\nanydocs config <profile> <base_url> <sitemap_url>\n\n\nTo extend search functionality, modify lib/indexer.py.\n\nLicense\n\nPart of the OpenClaw system."
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/Pektech/anydocs",
    "publisherUrl": "https://clawhub.ai/Pektech/anydocs",
    "owner": "Pektech",
    "version": "1.0.2",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/anydocs",
    "downloadUrl": "https://openagent3.xyz/downloads/anydocs",
    "agentUrl": "https://openagent3.xyz/skills/anydocs/agent",
    "manifestUrl": "https://openagent3.xyz/skills/anydocs/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/anydocs/agent.md"
  }
}