{
  "schemaVersion": "1.0",
  "item": {
    "slug": "scrapling-web-scraping",
    "name": "Scrapling Web Scraping",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/DevBD1/scrapling-web-scraping",
    "canonicalUrl": "https://clawhub.ai/DevBD1/scrapling-web-scraping",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/scrapling-web-scraping",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=scrapling-web-scraping",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md",
      "_meta.json",
      "references/anti-bot.md",
      "references/api_reference.md",
      "references/links.md",
      "references/mcp-setup.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-05-07T17:22:31.273Z",
      "expiresAt": "2026-05-14T17:22:31.273Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=afrexai-annual-report",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=afrexai-annual-report",
        "contentDisposition": "attachment; filename=\"afrexai-annual-report-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/scrapling-web-scraping"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/scrapling-web-scraping",
    "agentPageUrl": "https://openagent3.xyz/skills/scrapling-web-scraping/agent",
    "manifestUrl": "https://openagent3.xyz/skills/scrapling-web-scraping/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/scrapling-web-scraping/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Scrapling MCP — Web Scraping Guidance",
        "body": "Guidance Layer + MCP Integration\nUse this skill for strategy and patterns. For execution, call Scrapling's MCP server via mcporter."
      },
      {
        "title": "1. Install Scrapling with MCP support",
        "body": "pip install scrapling[mcp]\n# Or for full features:\npip install scrapling[mcp,playwright]\npython -m playwright install chromium"
      },
      {
        "title": "2. Add to OpenClaw MCP config",
        "body": "{\n  \"mcpServers\": {\n    \"scrapling\": {\n      \"command\": \"python\",\n      \"args\": [\"-m\", \"scrapling.mcp\"]\n    }\n  }\n}"
      },
      {
        "title": "3. Call via mcporter",
        "body": "mcporter call scrapling fetch_page --url \"https://example.com\""
      },
      {
        "title": "Execution vs Guidance",
        "body": "TaskToolExampleFetch a pagemcportermcporter call scrapling fetch_page --url URLExtract with CSSmcportermcporter call scrapling css_select --selector \".title::text\"Which fetcher to use?This skillSee \"Fetcher Selection Guide\" belowAnti-bot strategy?This skillSee \"Anti-Bot Escalation Ladder\"Complex crawl patterns?This skillSee \"Spider Recipes\""
      },
      {
        "title": "Fetcher Selection Guide",
        "body": "┌─────────────────┐     ┌──────────────────┐     ┌──────────────────┐\n│   Fetcher       │────▶│ DynamicFetcher   │────▶│ StealthyFetcher  │\n│   (HTTP)        │     │ (Browser/JS)     │     │ (Anti-bot)       │\n└─────────────────┘     └──────────────────┘     └──────────────────┘\n     Fastest              JS-rendered               Cloudflare, \n     Static pages         SPAs, React/Vue          Turnstile, etc."
      },
      {
        "title": "Decision Tree",
        "body": "Static HTML? → Fetcher (10-100x faster)\nNeed JS execution? → DynamicFetcher\nGetting blocked? → StealthyFetcher\nComplex session? → Use Session variants"
      },
      {
        "title": "MCP Fetch Modes",
        "body": "fetch_page — HTTP fetcher\nfetch_dynamic — Browser-based with Playwright\nfetch_stealthy — Anti-bot bypass mode"
      },
      {
        "title": "Level 1: Polite HTTP",
        "body": "# MCP call: fetch_page with options\n{\n  \"url\": \"https://example.com\",\n  \"headers\": {\"User-Agent\": \"...\"},\n  \"delay\": 2.0\n}"
      },
      {
        "title": "Level 2: Session Persistence",
        "body": "# Use sessions for cookie/state across requests\nFetcherSession(impersonate=\"chrome\")  # TLS fingerprint spoofing"
      },
      {
        "title": "Level 3: Stealth Mode",
        "body": "# MCP: fetch_stealthy\nStealthyFetcher.fetch(\n    url,\n    headless=True,\n    solve_cloudflare=True,  # Auto-solve Turnstile\n    network_idle=True\n)"
      },
      {
        "title": "Level 4: Proxy Rotation",
        "body": "See references/proxy-rotation.md"
      },
      {
        "title": "Adaptive Scraping (Anti-Fragile)",
        "body": "Scrapling can survive website redesigns using adaptive selectors:\n\n# First run — save fingerprints\nproducts = page.css('.product', auto_save=True)\n\n# Later runs — auto-relocate if DOM changed\nproducts = page.css('.product', adaptive=True)\n\nMCP usage:\n\nmcporter call scrapling css_select \\\\\n  --selector \".product\" \\\\\n  --adaptive true \\\\\n  --auto-save true"
      },
      {
        "title": "Spider Framework (Large Crawls)",
        "body": "When to use Spiders vs direct fetching:\n\n✅ Spider: 10+ pages, concurrency needed, resume capability, proxy rotation\n✅ Direct: 1-5 pages, quick extraction, simple flow"
      },
      {
        "title": "Basic Spider Pattern",
        "body": "from scrapling.spiders import Spider, Response\n\nclass ProductSpider(Spider):\n    name = \"products\"\n    start_urls = [\"https://example.com/products\"]\n    concurrent_requests = 10\n    download_delay = 1.0\n    \n    async def parse(self, response: Response):\n        for product in response.css('.product'):\n            yield {\n                \"name\": product.css('h2::text').get(),\n                \"price\": product.css('.price::text').get(),\n                \"url\": response.url\n            }\n        \n        # Follow pagination\n        next_page = response.css('.next a::attr(href)').get()\n        if next_page:\n            yield response.follow(next_page)\n\n# Run with resume capability\nresult = ProductSpider(crawldir=\"./crawl_data\").start()\nresult.items.to_jsonl(\"products.jsonl\")"
      },
      {
        "title": "Advanced: Multi-Session Spider",
        "body": "from scrapling.spiders import Spider, Request, Response\nfrom scrapling.fetchers import FetcherSession, AsyncStealthySession\n\nclass MultiSessionSpider(Spider):\n    name = \"multi\"\n    start_urls = [\"https://example.com/\"]\n    \n    def configure_sessions(self, manager):\n        manager.add(\"fast\", FetcherSession(impersonate=\"chrome\"))\n        manager.add(\"stealth\", AsyncStealthySession(headless=True), lazy=True)\n    \n    async def parse(self, response: Response):\n        for link in response.css('a::attr(href)').getall():\n            if \"/protected/\" in link:\n                yield Request(link, sid=\"stealth\")\n            else:\n                yield Request(link, sid=\"fast\")"
      },
      {
        "title": "Spider Features",
        "body": "Pause/Resume: crawldir parameter saves checkpoints\nStreaming: async for item in spider.stream() for real-time processing\nAuto-retry: Configurable retry on blocked requests\nExport: Built-in to_json(), to_jsonl()"
      },
      {
        "title": "Terminal Extraction (No Code)",
        "body": "# Extract to markdown\nscrapling extract get 'https://example.com' content.md\n\n# Extract specific element\nscrapling extract get 'https://example.com' content.txt \\\\\n  --css-selector '.article' \\\\\n  --impersonate 'chrome'\n\n# Stealth mode\nscrapling extract stealthy-fetch 'https://protected.com' content.md \\\\\n  --no-headless \\\\\n  --solve-cloudflare"
      },
      {
        "title": "Interactive Shell",
        "body": "scrapling shell\n\n# Inside shell:\n>>> page = Fetcher.get('https://example.com')\n>>> page.css('h1::text').get()\n>>> page.find_all('div', class_='item')"
      },
      {
        "title": "BeautifulSoup-Style Methods",
        "body": "# Find by attributes\npage.find_all('div', {'class': 'product', 'data-id': True})\npage.find_all('div', class_='product', id=re.compile(r'item-\\\\d+'))\n\n# Text search\npage.find_by_text('Add to Cart', tag='button')\npage.find_by_regex(r'\\\\$\\\\d+\\\\.\\\\d{2}')\n\n# Navigation\nfirst = page.css('.product')[0]\nparent = first.parent\nsiblings = first.next_siblings\nchildren = first.children\n\n# Similarity\nsimilar = first.find_similar()  # Find visually/structurally similar elements\nbelow = first.below_elements()  # Elements below in DOM"
      },
      {
        "title": "Auto-Generated Selectors",
        "body": "# Get robust selector for any element\nelement = page.css('.product')[0]\nselector = element.auto_css_selector()  # Returns stable CSS path\nxpath = element.auto_xpath()"
      },
      {
        "title": "Proxy Rotation",
        "body": "from scrapling.spiders import ProxyRotator\n\n# Cyclic rotation\nrotator = ProxyRotator([\n    \"http://proxy1:8080\",\n    \"http://proxy2:8080\",\n    \"http://user:pass@proxy3:8080\"\n], strategy=\"cyclic\")\n\n# Use with any session\nwith FetcherSession(proxy=rotator.next()) as session:\n    page = session.get('https://example.com')"
      },
      {
        "title": "Pagination Patterns",
        "body": "# Page numbers\nfor page_num in range(1, 11):\n    url = f\"https://example.com/products?page={page_num}\"\n    ...\n\n# Next button\nwhile next_page := response.css('.next a::attr(href)').get():\n    yield response.follow(next_page)\n\n# Infinite scroll (DynamicFetcher)\nwith DynamicSession() as session:\n    page = session.fetch(url)\n    page.scroll_to_bottom()\n    items = page.css('.item').getall()"
      },
      {
        "title": "Login Sessions",
        "body": "with StealthySession(headless=False) as session:\n    # Login\n    login_page = session.fetch('https://example.com/login')\n    login_page.fill('input[name=\"username\"]', 'user')\n    login_page.fill('input[name=\"password\"]', 'pass')\n    login_page.click('button[type=\"submit\"]')\n    \n    # Now session has cookies\n    protected_page = session.fetch('https://example.com/dashboard')"
      },
      {
        "title": "Next.js Data Extraction",
        "body": "# Extract JSON from __NEXT_DATA__\nimport json\nimport re\n\nnext_data = json.loads(\n    re.search(\n        r'__NEXT_DATA__\" type=\"application/json\">(.*?)</script>',\n        page.html_content,\n        re.S\n    ).group(1)\n)\nprops = next_data['props']['pageProps']"
      },
      {
        "title": "Output Formats",
        "body": "# JSON (pretty)\nresult.items.to_json('output.json')\n\n# JSONL (streaming, one per line)\nresult.items.to_jsonl('output.jsonl')\n\n# Python objects\nfor item in result.items:\n    print(item['title'])"
      },
      {
        "title": "Performance Tips",
        "body": "Use HTTP fetcher when possible — 10-100x faster than browser\nImpersonate browsers — impersonate='chrome' for TLS fingerprinting\nHTTP/3 support — FetcherSession(http3=True)\nLimit resources — disable_resources=True in Dynamic/Stealthy\nConnection pooling — Reuse sessions across requests"
      },
      {
        "title": "Guardrails (Always)",
        "body": "Only scrape content you're authorized to access\nRespect robots.txt and ToS\nAdd delays (download_delay) for large crawls\nDon't bypass paywalls or authentication without permission\nNever scrape personal/sensitive data"
      },
      {
        "title": "References",
        "body": "references/mcp-setup.md — Detailed MCP configuration\nreferences/anti-bot.md — Anti-bot handling strategies\nreferences/proxy-rotation.md — Proxy setup and rotation\nreferences/spider-recipes.md — Advanced crawling patterns\nreferences/api-reference.md — Quick API reference\nreferences/links.md — Official docs links"
      },
      {
        "title": "Scripts",
        "body": "scripts/scrapling_scrape.py — Quick one-off extraction\nscripts/scrapling_smoke_test.py — Test connectivity and anti-bot indicators"
      }
    ],
    "body": "Scrapling MCP — Web Scraping Guidance\n\nGuidance Layer + MCP Integration\nUse this skill for strategy and patterns. For execution, call Scrapling's MCP server via mcporter.\n\nQuick Start (MCP)\n1. Install Scrapling with MCP support\npip install scrapling[mcp]\n# Or for full features:\npip install scrapling[mcp,playwright]\npython -m playwright install chromium\n\n2. Add to OpenClaw MCP config\n{\n  \"mcpServers\": {\n    \"scrapling\": {\n      \"command\": \"python\",\n      \"args\": [\"-m\", \"scrapling.mcp\"]\n    }\n  }\n}\n\n3. Call via mcporter\nmcporter call scrapling fetch_page --url \"https://example.com\"\n\nExecution vs Guidance\nTask\tTool\tExample\nFetch a page\tmcporter\tmcporter call scrapling fetch_page --url URL\nExtract with CSS\tmcporter\tmcporter call scrapling css_select --selector \".title::text\"\nWhich fetcher to use?\tThis skill\tSee \"Fetcher Selection Guide\" below\nAnti-bot strategy?\tThis skill\tSee \"Anti-Bot Escalation Ladder\"\nComplex crawl patterns?\tThis skill\tSee \"Spider Recipes\"\nFetcher Selection Guide\n┌─────────────────┐     ┌──────────────────┐     ┌──────────────────┐\n│   Fetcher       │────▶│ DynamicFetcher   │────▶│ StealthyFetcher  │\n│   (HTTP)        │     │ (Browser/JS)     │     │ (Anti-bot)       │\n└─────────────────┘     └──────────────────┘     └──────────────────┘\n     Fastest              JS-rendered               Cloudflare, \n     Static pages         SPAs, React/Vue          Turnstile, etc.\n\nDecision Tree\nStatic HTML? → Fetcher (10-100x faster)\nNeed JS execution? → DynamicFetcher\nGetting blocked? → StealthyFetcher\nComplex session? → Use Session variants\nMCP Fetch Modes\nfetch_page — HTTP fetcher\nfetch_dynamic — Browser-based with Playwright\nfetch_stealthy — Anti-bot bypass mode\nAnti-Bot Escalation Ladder\nLevel 1: Polite HTTP\n# MCP call: fetch_page with options\n{\n  \"url\": \"https://example.com\",\n  \"headers\": {\"User-Agent\": \"...\"},\n  \"delay\": 2.0\n}\n\nLevel 2: Session Persistence\n# Use sessions for cookie/state across requests\nFetcherSession(impersonate=\"chrome\")  # TLS fingerprint spoofing\n\nLevel 3: Stealth Mode\n# MCP: fetch_stealthy\nStealthyFetcher.fetch(\n    url,\n    headless=True,\n    solve_cloudflare=True,  # Auto-solve Turnstile\n    network_idle=True\n)\n\nLevel 4: Proxy Rotation\n\nSee references/proxy-rotation.md\n\nAdaptive Scraping (Anti-Fragile)\n\nScrapling can survive website redesigns using adaptive selectors:\n\n# First run — save fingerprints\nproducts = page.css('.product', auto_save=True)\n\n# Later runs — auto-relocate if DOM changed\nproducts = page.css('.product', adaptive=True)\n\n\nMCP usage:\n\nmcporter call scrapling css_select \\\\\n  --selector \".product\" \\\\\n  --adaptive true \\\\\n  --auto-save true\n\nSpider Framework (Large Crawls)\n\nWhen to use Spiders vs direct fetching:\n\n✅ Spider: 10+ pages, concurrency needed, resume capability, proxy rotation\n✅ Direct: 1-5 pages, quick extraction, simple flow\nBasic Spider Pattern\nfrom scrapling.spiders import Spider, Response\n\nclass ProductSpider(Spider):\n    name = \"products\"\n    start_urls = [\"https://example.com/products\"]\n    concurrent_requests = 10\n    download_delay = 1.0\n    \n    async def parse(self, response: Response):\n        for product in response.css('.product'):\n            yield {\n                \"name\": product.css('h2::text').get(),\n                \"price\": product.css('.price::text').get(),\n                \"url\": response.url\n            }\n        \n        # Follow pagination\n        next_page = response.css('.next a::attr(href)').get()\n        if next_page:\n            yield response.follow(next_page)\n\n# Run with resume capability\nresult = ProductSpider(crawldir=\"./crawl_data\").start()\nresult.items.to_jsonl(\"products.jsonl\")\n\nAdvanced: Multi-Session Spider\nfrom scrapling.spiders import Spider, Request, Response\nfrom scrapling.fetchers import FetcherSession, AsyncStealthySession\n\nclass MultiSessionSpider(Spider):\n    name = \"multi\"\n    start_urls = [\"https://example.com/\"]\n    \n    def configure_sessions(self, manager):\n        manager.add(\"fast\", FetcherSession(impersonate=\"chrome\"))\n        manager.add(\"stealth\", AsyncStealthySession(headless=True), lazy=True)\n    \n    async def parse(self, response: Response):\n        for link in response.css('a::attr(href)').getall():\n            if \"/protected/\" in link:\n                yield Request(link, sid=\"stealth\")\n            else:\n                yield Request(link, sid=\"fast\")\n\nSpider Features\nPause/Resume: crawldir parameter saves checkpoints\nStreaming: async for item in spider.stream() for real-time processing\nAuto-retry: Configurable retry on blocked requests\nExport: Built-in to_json(), to_jsonl()\nCLI & Interactive Shell\nTerminal Extraction (No Code)\n# Extract to markdown\nscrapling extract get 'https://example.com' content.md\n\n# Extract specific element\nscrapling extract get 'https://example.com' content.txt \\\\\n  --css-selector '.article' \\\\\n  --impersonate 'chrome'\n\n# Stealth mode\nscrapling extract stealthy-fetch 'https://protected.com' content.md \\\\\n  --no-headless \\\\\n  --solve-cloudflare\n\nInteractive Shell\nscrapling shell\n\n# Inside shell:\n>>> page = Fetcher.get('https://example.com')\n>>> page.css('h1::text').get()\n>>> page.find_all('div', class_='item')\n\nParser API (Beyond CSS/XPath)\nBeautifulSoup-Style Methods\n# Find by attributes\npage.find_all('div', {'class': 'product', 'data-id': True})\npage.find_all('div', class_='product', id=re.compile(r'item-\\\\d+'))\n\n# Text search\npage.find_by_text('Add to Cart', tag='button')\npage.find_by_regex(r'\\\\$\\\\d+\\\\.\\\\d{2}')\n\n# Navigation\nfirst = page.css('.product')[0]\nparent = first.parent\nsiblings = first.next_siblings\nchildren = first.children\n\n# Similarity\nsimilar = first.find_similar()  # Find visually/structurally similar elements\nbelow = first.below_elements()  # Elements below in DOM\n\nAuto-Generated Selectors\n# Get robust selector for any element\nelement = page.css('.product')[0]\nselector = element.auto_css_selector()  # Returns stable CSS path\nxpath = element.auto_xpath()\n\nProxy Rotation\nfrom scrapling.spiders import ProxyRotator\n\n# Cyclic rotation\nrotator = ProxyRotator([\n    \"http://proxy1:8080\",\n    \"http://proxy2:8080\",\n    \"http://user:pass@proxy3:8080\"\n], strategy=\"cyclic\")\n\n# Use with any session\nwith FetcherSession(proxy=rotator.next()) as session:\n    page = session.get('https://example.com')\n\nCommon Recipes\nPagination Patterns\n# Page numbers\nfor page_num in range(1, 11):\n    url = f\"https://example.com/products?page={page_num}\"\n    ...\n\n# Next button\nwhile next_page := response.css('.next a::attr(href)').get():\n    yield response.follow(next_page)\n\n# Infinite scroll (DynamicFetcher)\nwith DynamicSession() as session:\n    page = session.fetch(url)\n    page.scroll_to_bottom()\n    items = page.css('.item').getall()\n\nLogin Sessions\nwith StealthySession(headless=False) as session:\n    # Login\n    login_page = session.fetch('https://example.com/login')\n    login_page.fill('input[name=\"username\"]', 'user')\n    login_page.fill('input[name=\"password\"]', 'pass')\n    login_page.click('button[type=\"submit\"]')\n    \n    # Now session has cookies\n    protected_page = session.fetch('https://example.com/dashboard')\n\nNext.js Data Extraction\n# Extract JSON from __NEXT_DATA__\nimport json\nimport re\n\nnext_data = json.loads(\n    re.search(\n        r'__NEXT_DATA__\" type=\"application/json\">(.*?)</script>',\n        page.html_content,\n        re.S\n    ).group(1)\n)\nprops = next_data['props']['pageProps']\n\nOutput Formats\n# JSON (pretty)\nresult.items.to_json('output.json')\n\n# JSONL (streaming, one per line)\nresult.items.to_jsonl('output.jsonl')\n\n# Python objects\nfor item in result.items:\n    print(item['title'])\n\nPerformance Tips\nUse HTTP fetcher when possible — 10-100x faster than browser\nImpersonate browsers — impersonate='chrome' for TLS fingerprinting\nHTTP/3 support — FetcherSession(http3=True)\nLimit resources — disable_resources=True in Dynamic/Stealthy\nConnection pooling — Reuse sessions across requests\nGuardrails (Always)\nOnly scrape content you're authorized to access\nRespect robots.txt and ToS\nAdd delays (download_delay) for large crawls\nDon't bypass paywalls or authentication without permission\nNever scrape personal/sensitive data\nReferences\nreferences/mcp-setup.md — Detailed MCP configuration\nreferences/anti-bot.md — Anti-bot handling strategies\nreferences/proxy-rotation.md — Proxy setup and rotation\nreferences/spider-recipes.md — Advanced crawling patterns\nreferences/api-reference.md — Quick API reference\nreferences/links.md — Official docs links\nScripts\nscripts/scrapling_scrape.py — Quick one-off extraction\nscripts/scrapling_smoke_test.py — Test connectivity and anti-bot indicators"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/DevBD1/scrapling-web-scraping",
    "publisherUrl": "https://clawhub.ai/DevBD1/scrapling-web-scraping",
    "owner": "DevBD1",
    "version": "1.2.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/scrapling-web-scraping",
    "downloadUrl": "https://openagent3.xyz/downloads/scrapling-web-scraping",
    "agentUrl": "https://openagent3.xyz/skills/scrapling-web-scraping/agent",
    "manifestUrl": "https://openagent3.xyz/skills/scrapling-web-scraping/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/scrapling-web-scraping/agent.md"
  }
}