{
  "schemaVersion": "1.0",
  "item": {
    "slug": "tabstack-extractor",
    "name": "Tabstack Extractor",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/noblepayne/tabstack-extractor",
    "canonicalUrl": "https://clawhub.ai/noblepayne/tabstack-extractor",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/tabstack-extractor",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=tabstack-extractor",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md",
      "references/api_reference.md",
      "references/job_schema.json",
      "references/news_schema.json",
      "references/schema_guide.md",
      "references/simple_article.json"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "slug": "tabstack-extractor",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-05-09T16:13:46.584Z",
      "expiresAt": "2026-05-16T16:13:46.584Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=tabstack-extractor",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=tabstack-extractor",
        "contentDisposition": "attachment; filename=\"tabstack-extractor-0.1.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null,
        "slug": "tabstack-extractor"
      },
      "scope": "item",
      "summary": "Item download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this item.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/tabstack-extractor"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/tabstack-extractor",
    "agentPageUrl": "https://openagent3.xyz/skills/tabstack-extractor/agent",
    "manifestUrl": "https://openagent3.xyz/skills/tabstack-extractor/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/tabstack-extractor/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Overview",
        "body": "This skill enables structured data extraction from websites using the Tabstack API. It's ideal for web scraping tasks where you need consistent, schema-based data extraction from job boards, news sites, product pages, or any structured content."
      },
      {
        "title": "1. Install Babashka (if needed)",
        "body": "# Option A: From GitHub (recommended for sharing)\ncurl -s https://raw.githubusercontent.com/babashka/babashka/master/install | bash\n\n# Option B: From Nix\nnix-shell -p babashka\n\n# Option C: From Homebrew\nbrew install borkdude/brew/babashka"
      },
      {
        "title": "2. Set up API Key",
        "body": "Option A: Environment variable (recommended)\n\nexport TABSTACK_API_KEY=\"your_api_key_here\"\n\nOption B: Configuration file\n\nmkdir -p ~/.config/tabstack\necho '{:api-key \"your_api_key_here\"}' > ~/.config/tabstack/config.edn\n\nGet an API key: Sign up at Tabstack Console"
      },
      {
        "title": "3. Test Connection",
        "body": "bb scripts/tabstack.clj test"
      },
      {
        "title": "4. Extract Markdown (Simple)",
        "body": "bb scripts/tabstack.clj markdown \"https://example.com\""
      },
      {
        "title": "5. Extract JSON (Start Simple)",
        "body": "# Start with simple schema (fast, reliable)\nbb scripts/tabstack.clj json \"https://example.com\" references/simple_article.json\n\n# Try more complex schemas (may be slower)\nbb scripts/tabstack.clj json \"https://news.site\" references/news_schema.json"
      },
      {
        "title": "6. Advanced Features",
        "body": "# Extract with retry logic (3 retries, 1s delay)\nbb scripts/tabstack.clj json-retry \"https://example.com\" references/simple_article.json\n\n# Extract with caching (24-hour cache)\nbb scripts/tabstack.clj json-cache \"https://example.com\" references/simple_article.json\n\n# Batch extract from URLs file\necho \"https://example.com\" > urls.txt\necho \"https://example.org\" >> urls.txt\nbb scripts/tabstack.clj batch urls.txt references/simple_article.json"
      },
      {
        "title": "1. Markdown Extraction",
        "body": "Extract clean, readable markdown from any webpage. Useful for content analysis, summarization, or archiving.\n\nWhen to use: When you need the textual content of a page without the HTML clutter.\n\nExample use cases:\n\nExtract article content for summarization\nArchive webpage content\nAnalyze blog post content"
      },
      {
        "title": "2. JSON Schema Extraction",
        "body": "Extract structured data using JSON schemas. Define exactly what data you want and get it in a consistent format.\n\nWhen to use: When scraping job listings, product pages, news articles, or any structured data.\n\nExample use cases:\n\nScrape job listings from BuiltIn/LinkedIn\nExtract product details from e-commerce sites\nGather news articles with consistent metadata"
      },
      {
        "title": "3. Schema Templates",
        "body": "Pre-built schemas for common scraping tasks. See references/ directory for templates.\n\nAvailable schemas:\n\nJob listing schema (see references/job_schema.json)\nNews article schema\nProduct page schema\nContact information schema"
      },
      {
        "title": "Workflow: Job Scraping Example",
        "body": "Follow this workflow to scrape job listings:\n\nIdentify target sites - BuiltIn, LinkedIn, company career pages\nChoose or create schema - Use references/job_schema.json or customize\nTest extraction - Run a single page to verify schema works\nScale up - Process multiple URLs\nStore results - Save to database or file\n\nExample job schema:\n\n{\n  \"type\": \"object\",\n  \"properties\": {\n    \"title\": {\"type\": \"string\"},\n    \"company\": {\"type\": \"string\"},\n    \"location\": {\"type\": \"string\"},\n    \"description\": {\"type\": \"string\"},\n    \"salary\": {\"type\": \"string\"},\n    \"apply_url\": {\"type\": \"string\"},\n    \"posted_date\": {\"type\": \"string\"},\n    \"requirements\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}}\n  }\n}"
      },
      {
        "title": "Combine with Web Search",
        "body": "Use web_search to find relevant URLs\nUse Tabstack to extract structured data from those URLs\nStore results in Datalevin (future skill)"
      },
      {
        "title": "Combine with Browser Automation",
        "body": "Use browser tool to navigate complex sites\nExtract page URLs\nUse Tabstack for structured extraction"
      },
      {
        "title": "Error Handling",
        "body": "Common issues and solutions:\n\nAuthentication failed - Check TABSTACK_API_KEY environment variable\nInvalid URL - Ensure URL is accessible and correct\nSchema mismatch - Adjust schema to match page structure\nRate limiting - Add delays between requests"
      },
      {
        "title": "scripts/",
        "body": "tabstack.clj - Main API wrapper in Babashka (recommended, has retry logic, caching, batch processing)\ntabstack_curl.sh - Bash/curl fallback (simple, no dependencies)\ntabstack_api.py - Python API wrapper (requires requests module)"
      },
      {
        "title": "references/",
        "body": "job_schema.json - Template schema for job listings\napi_reference.md - Tabstack API documentation"
      },
      {
        "title": "Best Practices",
        "body": "Start small - Test with single pages before scaling\nRespect robots.txt - Check site scraping policies\nAdd delays - Avoid overwhelming target sites\nValidate schemas - Test schemas on sample pages\nHandle errors gracefully - Implement retry logic for failed requests"
      },
      {
        "title": "Teaching Focus: How to Create Schemas",
        "body": "This skill is designed to teach agents how to use Tabstack API effectively. The key is learning to create appropriate JSON schemas for different websites."
      },
      {
        "title": "Learning Path",
        "body": "Start Simple - Use references/simple_article.json (4 basic fields)\nTest Extensively - Try schemas on multiple page types\nIterate - Add fields based on what the page actually contains\nOptimize - Remove unnecessary fields for speed\n\nSee Schema Creation Guide for detailed instructions and examples."
      },
      {
        "title": "Common Mistakes to Avoid",
        "body": "Over-complex schemas - Start with 2-3 fields, not 20\nMissing fields - Don't require fields that don't exist on the page\nNo testing - Always test with example.com first, then target sites\nIgnoring timeouts - Complex schemas take longer (45s timeout)"
      },
      {
        "title": "Babashka Advantages",
        "body": "Using Babashka for this skill provides:\n\nSingle binary - Easy to share/install (GitHub releases, brew, nix)\nFast startup - No JVM warmup, ~50ms startup time\nBuilt-in HTTP client - No external dependencies\nClojure syntax - Familiar to you (Wes), expressive\nRetry logic & caching - Built into the skill\nBatch processing - Parallel extraction for multiple URLs"
      },
      {
        "title": "Example User Requests",
        "body": "For this skill to trigger:\n\n\"Scrape job listings from Docker careers page\"\n\"Extract the main content from this article\"\n\"Get structured product data from this e-commerce page\"\n\"Pull all the news articles from this site\"\n\"Extract contact information from this company page\"\n\"Batch extract job listings from these 20 URLs\"\n\"Get cached results for this page (avoid API calls)\""
      }
    ],
    "body": "Tabstack Extractor\nOverview\n\nThis skill enables structured data extraction from websites using the Tabstack API. It's ideal for web scraping tasks where you need consistent, schema-based data extraction from job boards, news sites, product pages, or any structured content.\n\nQuick Start\n1. Install Babashka (if needed)\n# Option A: From GitHub (recommended for sharing)\ncurl -s https://raw.githubusercontent.com/babashka/babashka/master/install | bash\n\n# Option B: From Nix\nnix-shell -p babashka\n\n# Option C: From Homebrew\nbrew install borkdude/brew/babashka\n\n2. Set up API Key\n\nOption A: Environment variable (recommended)\n\nexport TABSTACK_API_KEY=\"your_api_key_here\"\n\n\nOption B: Configuration file\n\nmkdir -p ~/.config/tabstack\necho '{:api-key \"your_api_key_here\"}' > ~/.config/tabstack/config.edn\n\n\nGet an API key: Sign up at Tabstack Console\n\n3. Test Connection\nbb scripts/tabstack.clj test\n\n4. Extract Markdown (Simple)\nbb scripts/tabstack.clj markdown \"https://example.com\"\n\n5. Extract JSON (Start Simple)\n# Start with simple schema (fast, reliable)\nbb scripts/tabstack.clj json \"https://example.com\" references/simple_article.json\n\n# Try more complex schemas (may be slower)\nbb scripts/tabstack.clj json \"https://news.site\" references/news_schema.json\n\n6. Advanced Features\n# Extract with retry logic (3 retries, 1s delay)\nbb scripts/tabstack.clj json-retry \"https://example.com\" references/simple_article.json\n\n# Extract with caching (24-hour cache)\nbb scripts/tabstack.clj json-cache \"https://example.com\" references/simple_article.json\n\n# Batch extract from URLs file\necho \"https://example.com\" > urls.txt\necho \"https://example.org\" >> urls.txt\nbb scripts/tabstack.clj batch urls.txt references/simple_article.json\n\nCore Capabilities\n1. Markdown Extraction\n\nExtract clean, readable markdown from any webpage. Useful for content analysis, summarization, or archiving.\n\nWhen to use: When you need the textual content of a page without the HTML clutter.\n\nExample use cases:\n\nExtract article content for summarization\nArchive webpage content\nAnalyze blog post content\n2. JSON Schema Extraction\n\nExtract structured data using JSON schemas. Define exactly what data you want and get it in a consistent format.\n\nWhen to use: When scraping job listings, product pages, news articles, or any structured data.\n\nExample use cases:\n\nScrape job listings from BuiltIn/LinkedIn\nExtract product details from e-commerce sites\nGather news articles with consistent metadata\n3. Schema Templates\n\nPre-built schemas for common scraping tasks. See references/ directory for templates.\n\nAvailable schemas:\n\nJob listing schema (see references/job_schema.json)\nNews article schema\nProduct page schema\nContact information schema\nWorkflow: Job Scraping Example\n\nFollow this workflow to scrape job listings:\n\nIdentify target sites - BuiltIn, LinkedIn, company career pages\nChoose or create schema - Use references/job_schema.json or customize\nTest extraction - Run a single page to verify schema works\nScale up - Process multiple URLs\nStore results - Save to database or file\n\nExample job schema:\n\n{\n  \"type\": \"object\",\n  \"properties\": {\n    \"title\": {\"type\": \"string\"},\n    \"company\": {\"type\": \"string\"},\n    \"location\": {\"type\": \"string\"},\n    \"description\": {\"type\": \"string\"},\n    \"salary\": {\"type\": \"string\"},\n    \"apply_url\": {\"type\": \"string\"},\n    \"posted_date\": {\"type\": \"string\"},\n    \"requirements\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}}\n  }\n}\n\nIntegration with Other Skills\nCombine with Web Search\nUse web_search to find relevant URLs\nUse Tabstack to extract structured data from those URLs\nStore results in Datalevin (future skill)\nCombine with Browser Automation\nUse browser tool to navigate complex sites\nExtract page URLs\nUse Tabstack for structured extraction\nError Handling\n\nCommon issues and solutions:\n\nAuthentication failed - Check TABSTACK_API_KEY environment variable\nInvalid URL - Ensure URL is accessible and correct\nSchema mismatch - Adjust schema to match page structure\nRate limiting - Add delays between requests\nResources\nscripts/\ntabstack.clj - Main API wrapper in Babashka (recommended, has retry logic, caching, batch processing)\ntabstack_curl.sh - Bash/curl fallback (simple, no dependencies)\ntabstack_api.py - Python API wrapper (requires requests module)\nreferences/\njob_schema.json - Template schema for job listings\napi_reference.md - Tabstack API documentation\nBest Practices\nStart small - Test with single pages before scaling\nRespect robots.txt - Check site scraping policies\nAdd delays - Avoid overwhelming target sites\nValidate schemas - Test schemas on sample pages\nHandle errors gracefully - Implement retry logic for failed requests\nTeaching Focus: How to Create Schemas\n\nThis skill is designed to teach agents how to use Tabstack API effectively. The key is learning to create appropriate JSON schemas for different websites.\n\nLearning Path\nStart Simple - Use references/simple_article.json (4 basic fields)\nTest Extensively - Try schemas on multiple page types\nIterate - Add fields based on what the page actually contains\nOptimize - Remove unnecessary fields for speed\n\nSee Schema Creation Guide for detailed instructions and examples.\n\nCommon Mistakes to Avoid\nOver-complex schemas - Start with 2-3 fields, not 20\nMissing fields - Don't require fields that don't exist on the page\nNo testing - Always test with example.com first, then target sites\nIgnoring timeouts - Complex schemas take longer (45s timeout)\nBabashka Advantages\n\nUsing Babashka for this skill provides:\n\nSingle binary - Easy to share/install (GitHub releases, brew, nix)\nFast startup - No JVM warmup, ~50ms startup time\nBuilt-in HTTP client - No external dependencies\nClojure syntax - Familiar to you (Wes), expressive\nRetry logic & caching - Built into the skill\nBatch processing - Parallel extraction for multiple URLs\nExample User Requests\n\nFor this skill to trigger:\n\n\"Scrape job listings from Docker careers page\"\n\"Extract the main content from this article\"\n\"Get structured product data from this e-commerce page\"\n\"Pull all the news articles from this site\"\n\"Extract contact information from this company page\"\n\"Batch extract job listings from these 20 URLs\"\n\"Get cached results for this page (avoid API calls)\""
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/noblepayne/tabstack-extractor",
    "publisherUrl": "https://clawhub.ai/noblepayne/tabstack-extractor",
    "owner": "noblepayne",
    "version": "0.1.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/tabstack-extractor",
    "downloadUrl": "https://openagent3.xyz/downloads/tabstack-extractor",
    "agentUrl": "https://openagent3.xyz/skills/tabstack-extractor/agent",
    "manifestUrl": "https://openagent3.xyz/skills/tabstack-extractor/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/tabstack-extractor/agent.md"
  }
}