{
  "schemaVersion": "1.0",
  "item": {
    "slug": "openclaw-scrapling",
    "name": "OpenClaw Scrapling",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/cryptos3c/openclaw-scrapling",
    "canonicalUrl": "https://clawhub.ai/cryptos3c/openclaw-scrapling",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/openclaw-scrapling",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=openclaw-scrapling",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "INSTALLATION_GUIDE.md",
      "QUICK_START.md",
      "README.md",
      "SKILL.md",
      "examples/README.md",
      "examples/adaptive.py"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/openclaw-scrapling"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/openclaw-scrapling",
    "agentPageUrl": "https://openagent3.xyz/skills/openclaw-scrapling/agent",
    "manifestUrl": "https://openagent3.xyz/skills/openclaw-scrapling/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/openclaw-scrapling/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Scrapling Web Scraping Skill",
        "body": "Use Scrapling to scrape modern websites, including those with anti-bot protection, JavaScript-rendered content, and adaptive element tracking."
      },
      {
        "title": "When to Use This Skill",
        "body": "User asks to scrape a website or extract data from a URL\nNeed to bypass Cloudflare, bot detection, or anti-scraping measures\nNeed to handle JavaScript-rendered/dynamic content (React, Vue, etc.)\nWebsite requires login or session management\nWebsite structure changes frequently (adaptive selectors)\nNeed to scrape multiple pages with rate limiting"
      },
      {
        "title": "Commands",
        "body": "All commands use the scrape.py script in this skill's directory."
      },
      {
        "title": "Basic HTTP Scraping (Fast)",
        "body": "python scrape.py \\\n  --url \"https://example.com\" \\\n  --selector \".product\" \\\n  --output products.json\n\nUse when: Static HTML, no JavaScript, no bot protection"
      },
      {
        "title": "Stealth Mode (Bypass Anti-Bot)",
        "body": "python scrape.py \\\n  --url \"https://nopecha.com/demo/cloudflare\" \\\n  --stealth \\\n  --selector \"#content\" \\\n  --output data.json\n\nUse when: Cloudflare protection, bot detection, fingerprinting\n\nFeatures:\n\nBypasses Cloudflare Turnstile automatically\nBrowser fingerprint spoofing\nHeadless browser mode"
      },
      {
        "title": "Dynamic/JavaScript Content",
        "body": "python scrape.py \\\n  --url \"https://spa-website.com\" \\\n  --dynamic \\\n  --selector \".loaded-content\" \\\n  --wait-for \".loaded-content\" \\\n  --output data.json\n\nUse when: React/Vue/Angular apps, lazy-loaded content, AJAX\n\nFeatures:\n\nFull Playwright browser automation\nWait for elements to load\nNetwork idle detection"
      },
      {
        "title": "Adaptive Selectors (Survives Website Changes)",
        "body": "# First time - save the selector pattern\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --selector \".product-card\" \\\n  --adaptive-save \\\n  --output products.json\n\n# Later, if website structure changes\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --adaptive \\\n  --output products.json\n\nUse when: Website frequently redesigns, need robust scraping\n\nHow it works:\n\nFirst run: Saves element patterns/structure\nLater runs: Uses similarity algorithms to relocate moved elements\nAuto-updates selector cache"
      },
      {
        "title": "Session Management (Login Required)",
        "body": "# Login and save session\npython scrape.py \\\n  --url \"https://example.com/dashboard\" \\\n  --stealth \\\n  --login \\\n  --username \"user@example.com\" \\\n  --password \"password123\" \\\n  --session-name \"my-session\" \\\n  --selector \".protected-data\" \\\n  --output data.json\n\n# Reuse saved session (no login needed)\npython scrape.py \\\n  --url \"https://example.com/another-page\" \\\n  --stealth \\\n  --session-name \"my-session\" \\\n  --selector \".more-data\" \\\n  --output more_data.json\n\nUse when: Content requires authentication, multi-step scraping"
      },
      {
        "title": "Extract Specific Data Types",
        "body": "Text only:\n\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --selector \".content\" \\\n  --extract text \\\n  --output content.txt\n\nMarkdown:\n\npython scrape.py \\\n  --url \"https://docs.example.com\" \\\n  --selector \"article\" \\\n  --extract markdown \\\n  --output article.md\n\nAttributes:\n\n# Extract href links\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --selector \"a.product-link\" \\\n  --extract attr:href \\\n  --output links.json\n\nMultiple fields:\n\npython scrape.py \\\n  --url \"https://example.com/products\" \\\n  --selector \".product\" \\\n  --fields \"title:.title::text,price:.price::text,link:a::attr(href)\" \\\n  --output products.json"
      },
      {
        "title": "Advanced Options",
        "body": "Proxy support:\n\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --proxy \"http://user:pass@proxy.com:8080\" \\\n  --selector \".content\"\n\nRate limiting:\n\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --selector \".content\" \\\n  --delay 2  # 2 seconds between requests\n\nCustom headers:\n\npython scrape.py \\\n  --url \"https://api.example.com\" \\\n  --headers '{\"Authorization\": \"Bearer token123\"}' \\\n  --selector \"body\"\n\nScreenshot (for debugging):\n\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --stealth \\\n  --screenshot debug.png"
      },
      {
        "title": "Python API (For Custom Scripts)",
        "body": "You can also use Scrapling directly in Python scripts:\n\nfrom scrapling.fetchers import Fetcher, StealthyFetcher, DynamicFetcher\n\n# Basic HTTP request\npage = Fetcher.get('https://example.com')\nproducts = page.css('.product')\nfor product in products:\n    title = product.css('.title::text').get()\n    price = product.css('.price::text').get()\n    print(f\"{title}: {price}\")\n\n# Stealth mode (bypass anti-bot)\npage = StealthyFetcher.fetch('https://protected-site.com', headless=True)\ndata = page.css('.content').getall()\n\n# Dynamic content (full browser)\npage = DynamicFetcher.fetch('https://spa-app.com', network_idle=True)\nitems = page.css('.loaded-item').getall()\n\n# Sessions (login)\nfrom scrapling.fetchers import StealthySession\n\nwith StealthySession(headless=True) as session:\n    # Login\n    login_page = session.fetch('https://example.com/login')\n    login_page.fill('#username', 'user@example.com')\n    login_page.fill('#password', 'password123')\n    login_page.click('#submit')\n    \n    # Access protected content\n    protected_page = session.fetch('https://example.com/dashboard')\n    data = protected_page.css('.private-data').getall()"
      },
      {
        "title": "Output Formats",
        "body": "JSON (default): --output data.json\nJSONL (streaming): --output data.jsonl\nCSV: --output data.csv\nTXT (text only): --output data.txt\nMD (markdown): --output data.md\nHTML (raw): --output data.html"
      },
      {
        "title": "Selector Types",
        "body": "Scrapling supports multiple selector formats:\n\nCSS selectors:\n\n--selector \".product\"\n--selector \"div.container > p.text\"\n--selector \"a[href*='product']\"\n\nXPath selectors:\n\n--selector \"//div[@class='product']\"\n--selector \"//a[contains(@href, 'product')]\"\n\nPseudo-elements (like Scrapy):\n\n--selector \".product::text\"          # Text content\n--selector \"a::attr(href)\"           # Attribute value\n--selector \".price::text::strip\"     # Text with whitespace removed\n\nCombined selectors:\n\n--selector \".product .title::text\"   # Nested elements"
      },
      {
        "title": "Troubleshooting",
        "body": "Issue: \"Element not found\"\n\nTry --dynamic if content is JavaScript-loaded\nUse --wait-for SELECTOR to wait for element\nUse --screenshot to debug what's visible\n\nIssue: \"Cloudflare blocking\"\n\nUse --stealth mode\nAdd --solve-cloudflare flag (enabled by default in stealth)\nTry --delay 2 to slow down requests\n\nIssue: \"Login not working\"\n\nUse --headless false to see browser interaction\nCheck credentials are correct\nWebsite might use CAPTCHA (manual intervention needed)\n\nIssue: \"Selector broke after website update\"\n\nUse --adaptive mode to auto-relocate elements\nRe-run with --adaptive-save to update saved patterns"
      },
      {
        "title": "Scrape Hacker News Front Page",
        "body": "python scrape.py \\\n  --url \"https://news.ycombinator.com\" \\\n  --selector \".athing\" \\\n  --fields \"title:.titleline>a::text,link:.titleline>a::attr(href)\" \\\n  --output hn_stories.json"
      },
      {
        "title": "Scrape Protected Site with Login",
        "body": "python scrape.py \\\n  --url \"https://example.com/data\" \\\n  --stealth \\\n  --login \\\n  --username \"user@example.com\" \\\n  --password \"secret\" \\\n  --session-name \"example-session\" \\\n  --selector \".data-table tr\" \\\n  --output protected_data.json"
      },
      {
        "title": "Monitor Price Changes",
        "body": "# Save initial selector pattern\npython scrape.py \\\n  --url \"https://store.com/product/123\" \\\n  --selector \".price\" \\\n  --adaptive-save \\\n  --output price.txt\n\n# Later, check price (even if page redesigned)\npython scrape.py \\\n  --url \"https://store.com/product/123\" \\\n  --adaptive \\\n  --output price_new.txt"
      },
      {
        "title": "Scrape Dynamic JavaScript App",
        "body": "python scrape.py \\\n  --url \"https://react-app.com/data\" \\\n  --dynamic \\\n  --wait-for \".loaded-content\" \\\n  --selector \".item\" \\\n  --fields \"name:.name::text,value:.value::text\" \\\n  --output app_data.json"
      },
      {
        "title": "Notes",
        "body": "First run: Scrapling downloads browsers (~500MB). This is automatic.\nSessions: Saved in sessions/ directory, reusable across runs\nAdaptive cache: Saved in selector_cache.json, auto-updated\nRate limiting: Always respect robots.txt and add delays for ethical scraping\nLegal: Use only on sites you have permission to scrape"
      },
      {
        "title": "Dependencies",
        "body": "Installed automatically when skill is installed:\n\nscrapling[all] - Main library with all features\npyyaml - For config file support"
      },
      {
        "title": "Skill Structure",
        "body": "scrapling/\n├── SKILL.md           # This file\n├── scrape.py          # Main CLI script\n├── requirements.txt   # Python dependencies\n├── sessions/          # Saved browser sessions\n├── selector_cache.json # Adaptive selector patterns\n└── examples/          # Example scripts\n    ├── basic.py\n    ├── stealth.py\n    ├── dynamic.py\n    └── adaptive.py"
      },
      {
        "title": "Advanced: Custom Python Scripts",
        "body": "For complex scraping tasks, you can create custom Python scripts in this directory:\n\n# custom_scraper.py\nfrom scrapling.fetchers import StealthyFetcher\nfrom scrapling.spiders import Spider, Response\nimport json\n\nclass MySpider(Spider):\n    name = \"custom\"\n    start_urls = [\"https://example.com/page1\"]\n    \n    async def parse(self, response: Response):\n        for item in response.css('.product'):\n            yield {\n                \"title\": item.css('.title::text').get(),\n                \"price\": item.css('.price::text').get()\n            }\n        \n        # Follow pagination\n        next_page = response.css('.next-page::attr(href)').get()\n        if next_page:\n            yield response.follow(next_page)\n\n# Run spider\nresult = MySpider().start()\nwith open('output.json', 'w') as f:\n    json.dump(result.items, f, indent=2)\n\nRun with:\n\npython custom_scraper.py\n\nQuestions? Check Scrapling docs: https://scrapling.readthedocs.io"
      }
    ],
    "body": "Scrapling Web Scraping Skill\n\nUse Scrapling to scrape modern websites, including those with anti-bot protection, JavaScript-rendered content, and adaptive element tracking.\n\nWhen to Use This Skill\nUser asks to scrape a website or extract data from a URL\nNeed to bypass Cloudflare, bot detection, or anti-scraping measures\nNeed to handle JavaScript-rendered/dynamic content (React, Vue, etc.)\nWebsite requires login or session management\nWebsite structure changes frequently (adaptive selectors)\nNeed to scrape multiple pages with rate limiting\nCommands\n\nAll commands use the scrape.py script in this skill's directory.\n\nBasic HTTP Scraping (Fast)\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --selector \".product\" \\\n  --output products.json\n\n\nUse when: Static HTML, no JavaScript, no bot protection\n\nStealth Mode (Bypass Anti-Bot)\npython scrape.py \\\n  --url \"https://nopecha.com/demo/cloudflare\" \\\n  --stealth \\\n  --selector \"#content\" \\\n  --output data.json\n\n\nUse when: Cloudflare protection, bot detection, fingerprinting\n\nFeatures:\n\nBypasses Cloudflare Turnstile automatically\nBrowser fingerprint spoofing\nHeadless browser mode\nDynamic/JavaScript Content\npython scrape.py \\\n  --url \"https://spa-website.com\" \\\n  --dynamic \\\n  --selector \".loaded-content\" \\\n  --wait-for \".loaded-content\" \\\n  --output data.json\n\n\nUse when: React/Vue/Angular apps, lazy-loaded content, AJAX\n\nFeatures:\n\nFull Playwright browser automation\nWait for elements to load\nNetwork idle detection\nAdaptive Selectors (Survives Website Changes)\n# First time - save the selector pattern\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --selector \".product-card\" \\\n  --adaptive-save \\\n  --output products.json\n\n# Later, if website structure changes\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --adaptive \\\n  --output products.json\n\n\nUse when: Website frequently redesigns, need robust scraping\n\nHow it works:\n\nFirst run: Saves element patterns/structure\nLater runs: Uses similarity algorithms to relocate moved elements\nAuto-updates selector cache\nSession Management (Login Required)\n# Login and save session\npython scrape.py \\\n  --url \"https://example.com/dashboard\" \\\n  --stealth \\\n  --login \\\n  --username \"user@example.com\" \\\n  --password \"password123\" \\\n  --session-name \"my-session\" \\\n  --selector \".protected-data\" \\\n  --output data.json\n\n# Reuse saved session (no login needed)\npython scrape.py \\\n  --url \"https://example.com/another-page\" \\\n  --stealth \\\n  --session-name \"my-session\" \\\n  --selector \".more-data\" \\\n  --output more_data.json\n\n\nUse when: Content requires authentication, multi-step scraping\n\nExtract Specific Data Types\n\nText only:\n\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --selector \".content\" \\\n  --extract text \\\n  --output content.txt\n\n\nMarkdown:\n\npython scrape.py \\\n  --url \"https://docs.example.com\" \\\n  --selector \"article\" \\\n  --extract markdown \\\n  --output article.md\n\n\nAttributes:\n\n# Extract href links\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --selector \"a.product-link\" \\\n  --extract attr:href \\\n  --output links.json\n\n\nMultiple fields:\n\npython scrape.py \\\n  --url \"https://example.com/products\" \\\n  --selector \".product\" \\\n  --fields \"title:.title::text,price:.price::text,link:a::attr(href)\" \\\n  --output products.json\n\nAdvanced Options\n\nProxy support:\n\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --proxy \"http://user:pass@proxy.com:8080\" \\\n  --selector \".content\"\n\n\nRate limiting:\n\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --selector \".content\" \\\n  --delay 2  # 2 seconds between requests\n\n\nCustom headers:\n\npython scrape.py \\\n  --url \"https://api.example.com\" \\\n  --headers '{\"Authorization\": \"Bearer token123\"}' \\\n  --selector \"body\"\n\n\nScreenshot (for debugging):\n\npython scrape.py \\\n  --url \"https://example.com\" \\\n  --stealth \\\n  --screenshot debug.png\n\nPython API (For Custom Scripts)\n\nYou can also use Scrapling directly in Python scripts:\n\nfrom scrapling.fetchers import Fetcher, StealthyFetcher, DynamicFetcher\n\n# Basic HTTP request\npage = Fetcher.get('https://example.com')\nproducts = page.css('.product')\nfor product in products:\n    title = product.css('.title::text').get()\n    price = product.css('.price::text').get()\n    print(f\"{title}: {price}\")\n\n# Stealth mode (bypass anti-bot)\npage = StealthyFetcher.fetch('https://protected-site.com', headless=True)\ndata = page.css('.content').getall()\n\n# Dynamic content (full browser)\npage = DynamicFetcher.fetch('https://spa-app.com', network_idle=True)\nitems = page.css('.loaded-item').getall()\n\n# Sessions (login)\nfrom scrapling.fetchers import StealthySession\n\nwith StealthySession(headless=True) as session:\n    # Login\n    login_page = session.fetch('https://example.com/login')\n    login_page.fill('#username', 'user@example.com')\n    login_page.fill('#password', 'password123')\n    login_page.click('#submit')\n    \n    # Access protected content\n    protected_page = session.fetch('https://example.com/dashboard')\n    data = protected_page.css('.private-data').getall()\n\nOutput Formats\nJSON (default): --output data.json\nJSONL (streaming): --output data.jsonl\nCSV: --output data.csv\nTXT (text only): --output data.txt\nMD (markdown): --output data.md\nHTML (raw): --output data.html\nSelector Types\n\nScrapling supports multiple selector formats:\n\nCSS selectors:\n\n--selector \".product\"\n--selector \"div.container > p.text\"\n--selector \"a[href*='product']\"\n\n\nXPath selectors:\n\n--selector \"//div[@class='product']\"\n--selector \"//a[contains(@href, 'product')]\"\n\n\nPseudo-elements (like Scrapy):\n\n--selector \".product::text\"          # Text content\n--selector \"a::attr(href)\"           # Attribute value\n--selector \".price::text::strip\"     # Text with whitespace removed\n\n\nCombined selectors:\n\n--selector \".product .title::text\"   # Nested elements\n\nTroubleshooting\n\nIssue: \"Element not found\"\n\nTry --dynamic if content is JavaScript-loaded\nUse --wait-for SELECTOR to wait for element\nUse --screenshot to debug what's visible\n\nIssue: \"Cloudflare blocking\"\n\nUse --stealth mode\nAdd --solve-cloudflare flag (enabled by default in stealth)\nTry --delay 2 to slow down requests\n\nIssue: \"Login not working\"\n\nUse --headless false to see browser interaction\nCheck credentials are correct\nWebsite might use CAPTCHA (manual intervention needed)\n\nIssue: \"Selector broke after website update\"\n\nUse --adaptive mode to auto-relocate elements\nRe-run with --adaptive-save to update saved patterns\nExamples\nScrape Hacker News Front Page\npython scrape.py \\\n  --url \"https://news.ycombinator.com\" \\\n  --selector \".athing\" \\\n  --fields \"title:.titleline>a::text,link:.titleline>a::attr(href)\" \\\n  --output hn_stories.json\n\nScrape Protected Site with Login\npython scrape.py \\\n  --url \"https://example.com/data\" \\\n  --stealth \\\n  --login \\\n  --username \"user@example.com\" \\\n  --password \"secret\" \\\n  --session-name \"example-session\" \\\n  --selector \".data-table tr\" \\\n  --output protected_data.json\n\nMonitor Price Changes\n# Save initial selector pattern\npython scrape.py \\\n  --url \"https://store.com/product/123\" \\\n  --selector \".price\" \\\n  --adaptive-save \\\n  --output price.txt\n\n# Later, check price (even if page redesigned)\npython scrape.py \\\n  --url \"https://store.com/product/123\" \\\n  --adaptive \\\n  --output price_new.txt\n\nScrape Dynamic JavaScript App\npython scrape.py \\\n  --url \"https://react-app.com/data\" \\\n  --dynamic \\\n  --wait-for \".loaded-content\" \\\n  --selector \".item\" \\\n  --fields \"name:.name::text,value:.value::text\" \\\n  --output app_data.json\n\nNotes\nFirst run: Scrapling downloads browsers (~500MB). This is automatic.\nSessions: Saved in sessions/ directory, reusable across runs\nAdaptive cache: Saved in selector_cache.json, auto-updated\nRate limiting: Always respect robots.txt and add delays for ethical scraping\nLegal: Use only on sites you have permission to scrape\nDependencies\n\nInstalled automatically when skill is installed:\n\nscrapling[all] - Main library with all features\npyyaml - For config file support\nSkill Structure\nscrapling/\n├── SKILL.md           # This file\n├── scrape.py          # Main CLI script\n├── requirements.txt   # Python dependencies\n├── sessions/          # Saved browser sessions\n├── selector_cache.json # Adaptive selector patterns\n└── examples/          # Example scripts\n    ├── basic.py\n    ├── stealth.py\n    ├── dynamic.py\n    └── adaptive.py\n\nAdvanced: Custom Python Scripts\n\nFor complex scraping tasks, you can create custom Python scripts in this directory:\n\n# custom_scraper.py\nfrom scrapling.fetchers import StealthyFetcher\nfrom scrapling.spiders import Spider, Response\nimport json\n\nclass MySpider(Spider):\n    name = \"custom\"\n    start_urls = [\"https://example.com/page1\"]\n    \n    async def parse(self, response: Response):\n        for item in response.css('.product'):\n            yield {\n                \"title\": item.css('.title::text').get(),\n                \"price\": item.css('.price::text').get()\n            }\n        \n        # Follow pagination\n        next_page = response.css('.next-page::attr(href)').get()\n        if next_page:\n            yield response.follow(next_page)\n\n# Run spider\nresult = MySpider().start()\nwith open('output.json', 'w') as f:\n    json.dump(result.items, f, indent=2)\n\n\nRun with:\n\npython custom_scraper.py\n\n\nQuestions? Check Scrapling docs: https://scrapling.readthedocs.io"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/cryptos3c/openclaw-scrapling",
    "publisherUrl": "https://clawhub.ai/cryptos3c/openclaw-scrapling",
    "owner": "cryptos3c",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/openclaw-scrapling",
    "downloadUrl": "https://openagent3.xyz/downloads/openclaw-scrapling",
    "agentUrl": "https://openagent3.xyz/skills/openclaw-scrapling/agent",
    "manifestUrl": "https://openagent3.xyz/skills/openclaw-scrapling/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/openclaw-scrapling/agent.md"
  }
}