{
  "schemaVersion": "1.0",
  "item": {
    "slug": "wechat-article-extractor",
    "name": "WeChat Article Extractor",
    "source": "tencent",
    "type": "skill",
    "category": "通讯协作",
    "sourceUrl": "https://clawhub.ai/chunhualiao/wechat-article-extractor",
    "canonicalUrl": "https://clawhub.ai/chunhualiao/wechat-article-extractor",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/wechat-article-extractor",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=wechat-article-extractor",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "CHANGELOG.md",
      "README.md",
      "SKILL.md",
      "scripts/extract_wechat.py",
      "skill.yml",
      "tests/test_triggers.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/wechat-article-extractor"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/wechat-article-extractor",
    "agentPageUrl": "https://openagent3.xyz/skills/wechat-article-extractor/agent",
    "manifestUrl": "https://openagent3.xyz/skills/wechat-article-extractor/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/wechat-article-extractor/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "WeChat Article Extractor",
        "body": "Extract WeChat public account articles to clean Markdown. WeChat blocks headless browsers (环境异常 CAPTCHA) and web_fetch gets empty JS-rendered pages, so the reliable approach is: find a mirror on aggregator sites, then extract content."
      },
      {
        "title": "Scope & Boundaries",
        "body": "This skill handles:\n\nExtracting article text, images, and metadata from WeChat article URLs\nFinding mirror copies when direct access is blocked\nConverting HTML to clean Markdown\nSaving output as .md files\n\nThis skill does NOT handle:\n\nPublishing or syncing to note-taking apps (that's the user's workflow)\nBatch extraction of multiple articles (handle one at a time)\nWeChat login, authentication, or account management\nTranslating article content"
      },
      {
        "title": "Inputs",
        "body": "InputRequiredDescriptionWeChat URLYesAn mp.weixin.qq.com linkOutput filenameNoDefaults to kebab-case of article titleSave locationNoDefaults to /tmp/"
      },
      {
        "title": "Outputs",
        "body": "A Markdown file with full article content, images, and metadata header\nConsole confirmation with file path and character count"
      },
      {
        "title": "Step 1 — Try direct fetch (fast path)",
        "body": "web_fetch(url, extractMode=\"markdown\", maxChars=50000)\n\nSuccess check: If result rawLength > 500 AND content has real paragraphs (not just nav/footer text) → skip to Step 4 Option B.\n\nFailure indicators: rawLength < 500, content is navigation/boilerplate only, or contains \"环境异常\" → go to Step 2."
      },
      {
        "title": "Step 2 — Extract article metadata",
        "body": "From the URL or any partial content, identify:\n\nArticle title (from <title> or og:title)\nAuthor / account name (from og:description or page content)\n\nIf metadata is unavailable from the URL, ask the user for the article title."
      },
      {
        "title": "Step 3 — Search for mirrors",
        "body": "web_search(\"<article title> <author/account name>\")\n\nMirror site priority (ranked by content quality and reliability):\n\n53ai.com — full content, reliable formatting\nmp.ofweek.com — tech articles\njuejin.cn — developer content\nwoshipm.com — product/business content\n36kr.com — tech/business news\n\nIf title is unknown, try: web_search(\"site:53ai.com <keywords from URL path>\")\n\nIf no mirrors found: Try the Chrome Extension Relay fallback (see Fallback section)."
      },
      {
        "title": "Step 4 — Download and extract",
        "body": "Option A — Mirror found:\n\ncurl -s -L \"<mirror_url>\" -o /tmp/wechat-article.html\n\nVerify file size > 10KB (smaller usually means redirect/error page).\n\nRun the extraction script:\n\npython3 <skill_dir>/scripts/extract_wechat.py /tmp/wechat-article.html /tmp/<output-filename>.md\n\nReplace <skill_dir> with the directory containing this SKILL.md.\n\nOption B — Direct fetch succeeded (Step 1):\nFormat the fetched markdown with the header template below."
      },
      {
        "title": "Step 5 — Verify output quality",
        "body": "Check the output file:\n\nHas a title (not \"WeChat Article\")\nHas multiple paragraphs of real content\nImages have valid URLs (not broken/placeholder)\nNo excessive HTML artifacts remaining\n\nIf output looks truncated or garbled, try a different mirror site (return to Step 3)."
      },
      {
        "title": "Step 6 — Deliver to user",
        "body": "Report:\n\nFile saved at: <path>\nTitle: <title>\nSize: <char count> characters\nImage count: <N> images\n\nIf the user wants it saved to a specific location (e.g., Obsidian), follow their instructions for the final copy."
      },
      {
        "title": "Markdown Header Template",
        "body": "Every extracted article must include this header:\n\n# <title>\n\n**作者：** <author>\n**来源：** 微信公众号「<account_name>」\n**日期：** <date>\n**原文：** <original_wechat_url>\n\n---\n\n> **摘要：** <1-2 sentence summary generated from content>\n\n---\n\nFields that cannot be determined should be omitted (don't write \"Unknown\")."
      },
      {
        "title": "Fallback: Chrome Extension Relay",
        "body": "If no mirror exists (very new or niche article):\n\nTell the user (in Chinese if they wrote in Chinese):\n\n\"没有找到镜像。请在 Chrome 中打开这篇文章，然后点击 OpenClaw Browser Relay 扩展图标（badge 亮起），我就能直接读取内容。\"\n\nThen use:\n\nbrowser(action=\"snapshot\", profile=\"chrome\")\n\nExtract content from the snapshot and format with the header template."
      },
      {
        "title": "Error Handling",
        "body": "ProblemDetectionActionWeChat blocks accessrawLength < 500 or \"环境异常\"Search for mirrors (Step 3)No mirrors foundSearch returns 0 relevant resultsTry Chrome Relay fallbackMirror content truncatedOutput < 1000 chars when original is longTry next mirror siteScript extraction failsPython error or empty outputFall back to web_fetch on mirror URLImages brokenImage URLs return 404Note in output; images may expire"
      },
      {
        "title": "Success Criteria",
        "body": "Output Markdown contains the full article text (not truncated)\nTitle and metadata are correctly extracted\nImages are preserved with working URLs\nNo HTML artifacts or navigation junk in output\nFile is saved at the specified location"
      },
      {
        "title": "Notes",
        "body": "WeChat image URLs from mirrors (e.g., api.ibos.cn proxy) are generally valid and render in most Markdown viewers\nMirror sites typically publish within minutes of the original\nThe · · · section dividers are WeChat style — preserve them\nFor very long articles (>50K chars), the script handles them fine but web_fetch may truncate"
      },
      {
        "title": "Configuration",
        "body": "No persistent configuration required. The skill uses standard OpenClaw tools (web_fetch, web_search, exec) and optionally browser for the Chrome Relay fallback.\n\nRequired tools:\n\nToolPurposeweb_fetchDirect article fetch attemptweb_searchMirror site discoveryexecRun curl and Python extraction script\n\nOptional tools:\n\nToolPurposebrowserChrome Extension Relay fallback\n\nSystem dependencies:\n\nDependencyPurposePython 3.8+Extraction scriptcurlMirror page download"
      }
    ],
    "body": "WeChat Article Extractor\n\nExtract WeChat public account articles to clean Markdown. WeChat blocks headless browsers (环境异常 CAPTCHA) and web_fetch gets empty JS-rendered pages, so the reliable approach is: find a mirror on aggregator sites, then extract content.\n\nScope & Boundaries\n\nThis skill handles:\n\nExtracting article text, images, and metadata from WeChat article URLs\nFinding mirror copies when direct access is blocked\nConverting HTML to clean Markdown\nSaving output as .md files\n\nThis skill does NOT handle:\n\nPublishing or syncing to note-taking apps (that's the user's workflow)\nBatch extraction of multiple articles (handle one at a time)\nWeChat login, authentication, or account management\nTranslating article content\nInputs\nInput\tRequired\tDescription\nWeChat URL\tYes\tAn mp.weixin.qq.com link\nOutput filename\tNo\tDefaults to kebab-case of article title\nSave location\tNo\tDefaults to /tmp/\nOutputs\nA Markdown file with full article content, images, and metadata header\nConsole confirmation with file path and character count\nWorkflow\nStep 1 — Try direct fetch (fast path)\nweb_fetch(url, extractMode=\"markdown\", maxChars=50000)\n\n\nSuccess check: If result rawLength > 500 AND content has real paragraphs (not just nav/footer text) → skip to Step 4 Option B.\n\nFailure indicators: rawLength < 500, content is navigation/boilerplate only, or contains \"环境异常\" → go to Step 2.\n\nStep 2 — Extract article metadata\n\nFrom the URL or any partial content, identify:\n\nArticle title (from <title> or og:title)\nAuthor / account name (from og:description or page content)\n\nIf metadata is unavailable from the URL, ask the user for the article title.\n\nStep 3 — Search for mirrors\nweb_search(\"<article title> <author/account name>\")\n\n\nMirror site priority (ranked by content quality and reliability):\n\n53ai.com — full content, reliable formatting\nmp.ofweek.com — tech articles\njuejin.cn — developer content\nwoshipm.com — product/business content\n36kr.com — tech/business news\n\nIf title is unknown, try: web_search(\"site:53ai.com <keywords from URL path>\")\n\nIf no mirrors found: Try the Chrome Extension Relay fallback (see Fallback section).\n\nStep 4 — Download and extract\n\nOption A — Mirror found:\n\ncurl -s -L \"<mirror_url>\" -o /tmp/wechat-article.html\n\n\nVerify file size > 10KB (smaller usually means redirect/error page).\n\nRun the extraction script:\n\npython3 <skill_dir>/scripts/extract_wechat.py /tmp/wechat-article.html /tmp/<output-filename>.md\n\n\nReplace <skill_dir> with the directory containing this SKILL.md.\n\nOption B — Direct fetch succeeded (Step 1): Format the fetched markdown with the header template below.\n\nStep 5 — Verify output quality\n\nCheck the output file:\n\nHas a title (not \"WeChat Article\")\nHas multiple paragraphs of real content\nImages have valid URLs (not broken/placeholder)\nNo excessive HTML artifacts remaining\n\nIf output looks truncated or garbled, try a different mirror site (return to Step 3).\n\nStep 6 — Deliver to user\n\nReport:\n\nFile saved at: <path>\nTitle: <title>\nSize: <char count> characters\nImage count: <N> images\n\nIf the user wants it saved to a specific location (e.g., Obsidian), follow their instructions for the final copy.\n\nMarkdown Header Template\n\nEvery extracted article must include this header:\n\n# <title>\n\n**作者：** <author>\n**来源：** 微信公众号「<account_name>」\n**日期：** <date>\n**原文：** <original_wechat_url>\n\n---\n\n> **摘要：** <1-2 sentence summary generated from content>\n\n---\n\n\nFields that cannot be determined should be omitted (don't write \"Unknown\").\n\nFallback: Chrome Extension Relay\n\nIf no mirror exists (very new or niche article):\n\nTell the user (in Chinese if they wrote in Chinese):\n\n\"没有找到镜像。请在 Chrome 中打开这篇文章，然后点击 OpenClaw Browser Relay 扩展图标（badge 亮起），我就能直接读取内容。\"\n\nThen use:\n\nbrowser(action=\"snapshot\", profile=\"chrome\")\n\n\nExtract content from the snapshot and format with the header template.\n\nError Handling\nProblem\tDetection\tAction\nWeChat blocks access\trawLength < 500 or \"环境异常\"\tSearch for mirrors (Step 3)\nNo mirrors found\tSearch returns 0 relevant results\tTry Chrome Relay fallback\nMirror content truncated\tOutput < 1000 chars when original is long\tTry next mirror site\nScript extraction fails\tPython error or empty output\tFall back to web_fetch on mirror URL\nImages broken\tImage URLs return 404\tNote in output; images may expire\nSuccess Criteria\nOutput Markdown contains the full article text (not truncated)\nTitle and metadata are correctly extracted\nImages are preserved with working URLs\nNo HTML artifacts or navigation junk in output\nFile is saved at the specified location\nNotes\nWeChat image URLs from mirrors (e.g., api.ibos.cn proxy) are generally valid and render in most Markdown viewers\nMirror sites typically publish within minutes of the original\nThe · · · section dividers are WeChat style — preserve them\nFor very long articles (>50K chars), the script handles them fine but web_fetch may truncate\nConfiguration\n\nNo persistent configuration required. The skill uses standard OpenClaw tools (web_fetch, web_search, exec) and optionally browser for the Chrome Relay fallback.\n\nRequired tools:\n\nTool\tPurpose\nweb_fetch\tDirect article fetch attempt\nweb_search\tMirror site discovery\nexec\tRun curl and Python extraction script\n\nOptional tools:\n\nTool\tPurpose\nbrowser\tChrome Extension Relay fallback\n\nSystem dependencies:\n\nDependency\tPurpose\nPython 3.8+\tExtraction script\ncurl\tMirror page download"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/chunhualiao/wechat-article-extractor",
    "publisherUrl": "https://clawhub.ai/chunhualiao/wechat-article-extractor",
    "owner": "chunhualiao",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/wechat-article-extractor",
    "downloadUrl": "https://openagent3.xyz/downloads/wechat-article-extractor",
    "agentUrl": "https://openagent3.xyz/skills/wechat-article-extractor/agent",
    "manifestUrl": "https://openagent3.xyz/skills/wechat-article-extractor/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/wechat-article-extractor/agent.md"
  }
}