{
  "schemaVersion": "1.0",
  "item": {
    "slug": "x-extract",
    "name": "X Extract",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/chunhualiao/x-extract",
    "canonicalUrl": "https://clawhub.ai/chunhualiao/x-extract",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/x-extract",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=x-extract",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "CHANGELOG.md",
      "README.md",
      "SKILL.md",
      "references/selectors.md",
      "scripts/extract.mjs",
      "skill.yml"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-05-07T17:22:31.273Z",
      "expiresAt": "2026-05-14T17:22:31.273Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=afrexai-annual-report",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=afrexai-annual-report",
        "contentDisposition": "attachment; filename=\"afrexai-annual-report-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/x-extract"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/x-extract",
    "agentPageUrl": "https://openagent3.xyz/skills/x-extract/agent",
    "manifestUrl": "https://openagent3.xyz/skills/x-extract/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/x-extract/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "X.com Tweet Extraction",
        "body": "Extract tweet content (text, media, author, metadata) from x.com URLs without requiring Twitter/X credentials."
      },
      {
        "title": "How It Works",
        "body": "Uses OpenClaw's browser tool to load the tweet page, then extracts content from the rendered HTML."
      },
      {
        "title": "1. Validate URL",
        "body": "Check that the URL is a valid x.com/twitter.com tweet:\n\nMust contain x.com/*/status/ or twitter.com/*/status/\nExtract tweet ID from URL pattern: /status/(\\d+)"
      },
      {
        "title": "2. Open in Browser",
        "body": "browser action=open profile=openclaw targetUrl=<x.com-url>\n\nWait for page load (targetId returned)."
      },
      {
        "title": "3. Capture Snapshot",
        "body": "browser action=snapshot targetId=<TARGET_ID> snapshotFormat=aria"
      },
      {
        "title": "4. Extract Content",
        "body": "From the snapshot, extract:\n\nRequired fields:\n\nTweet text: Look for role=article containing the main tweet content\nAuthor: role=link with author name/handle (usually @username format)\nTimestamp: role=time element\n\nOptional fields:\n\nMedia: role=img or role=link containing /photo/, /video/\nEngagement: Like count, retweet count, reply count (in role=group or role=button)\nThread context: If tweet is part of thread, note previous/next tweet references"
      },
      {
        "title": "5. Format Output",
        "body": "Output as structured markdown:\n\n# Tweet by @username\n\n**Author:** Full Name (@handle)  \n**Posted:** YYYY-MM-DD HH:MM  \n**Source:** <original-url>\n\n---\n\n<Tweet text content here>\n\n---\n\n**Media:**\n- ![Image 1](<media-url-1>)\n- ![Image 2](<media-url-2>)\n\n**Engagement:**\n- 👍 Likes: 1,234\n- 🔄 Retweets: 567\n- 💬 Replies: 89\n\n**Thread:** [Part 2/5] | [View full thread](<thread-url>)"
      },
      {
        "title": "6. Download Media (Optional)",
        "body": "If user requests --download-media or \"download images\":\n\nExtract all media URLs from snapshot\nUse exec with curl or wget to download:\ncurl -L -o \"tweet-{tweetId}-image-{n}.jpg\" \"<media-url>\"\n\n\nReport downloaded files with paths"
      },
      {
        "title": "Error Handling",
        "body": "If page fails to load:\n\nCheck if URL is valid\nTry alternative: replace x.com with twitter.com (still works)\nSome tweets may require login (controversial, age-restricted) - report to user\n\nIf content extraction fails:\n\nX.com layout may have changed - check references/selectors.md\nProvide raw snapshot to user for manual review\nReport which fields were successfully extracted"
      },
      {
        "title": "Common Selectors",
        "body": "See references/selectors.md for detailed CSS/ARIA selectors used by x.com (updated as layout changes)."
      },
      {
        "title": "Limitations",
        "body": "No credentials: Cannot access protected tweets, DMs, or login-required content\nRate limiting: X.com may block excessive automated requests\nLayout changes: Selectors may break if X updates their HTML structure\nDynamic content: Some content (comments, threads) may load lazily"
      },
      {
        "title": "Examples",
        "body": "Extract single tweet:\n\nUser: \"Extract this tweet: https://x.com/vista8/status/2019651804062241077\"\nAgent: [Opens browser, captures snapshot, formats markdown output]\n\nExtract with media download:\n\nUser: \"Get the tweet text and download all images from https://x.com/user/status/123\"\nAgent: [Extracts content, downloads images to ./downloads/, reports paths]\n\nThread extraction:\n\nUser: \"Extract this thread: https://x.com/user/status/456\"\nAgent: [Detects thread, extracts all tweets in sequence, formats as numbered list]"
      }
    ],
    "body": "X.com Tweet Extraction\n\nExtract tweet content (text, media, author, metadata) from x.com URLs without requiring Twitter/X credentials.\n\nHow It Works\n\nUses OpenClaw's browser tool to load the tweet page, then extracts content from the rendered HTML.\n\nWorkflow\n1. Validate URL\n\nCheck that the URL is a valid x.com/twitter.com tweet:\n\nMust contain x.com/*/status/ or twitter.com/*/status/\nExtract tweet ID from URL pattern: /status/(\\d+)\n2. Open in Browser\nbrowser action=open profile=openclaw targetUrl=<x.com-url>\n\n\nWait for page load (targetId returned).\n\n3. Capture Snapshot\nbrowser action=snapshot targetId=<TARGET_ID> snapshotFormat=aria\n\n4. Extract Content\n\nFrom the snapshot, extract:\n\nRequired fields:\n\nTweet text: Look for role=article containing the main tweet content\nAuthor: role=link with author name/handle (usually @username format)\nTimestamp: role=time element\n\nOptional fields:\n\nMedia: role=img or role=link containing /photo/, /video/\nEngagement: Like count, retweet count, reply count (in role=group or role=button)\nThread context: If tweet is part of thread, note previous/next tweet references\n5. Format Output\n\nOutput as structured markdown:\n\n# Tweet by @username\n\n**Author:** Full Name (@handle)  \n**Posted:** YYYY-MM-DD HH:MM  \n**Source:** <original-url>\n\n---\n\n<Tweet text content here>\n\n---\n\n**Media:**\n- ![Image 1](<media-url-1>)\n- ![Image 2](<media-url-2>)\n\n**Engagement:**\n- 👍 Likes: 1,234\n- 🔄 Retweets: 567\n- 💬 Replies: 89\n\n**Thread:** [Part 2/5] | [View full thread](<thread-url>)\n\n6. Download Media (Optional)\n\nIf user requests --download-media or \"download images\":\n\nExtract all media URLs from snapshot\nUse exec with curl or wget to download:\ncurl -L -o \"tweet-{tweetId}-image-{n}.jpg\" \"<media-url>\"\n\nReport downloaded files with paths\nError Handling\n\nIf page fails to load:\n\nCheck if URL is valid\nTry alternative: replace x.com with twitter.com (still works)\nSome tweets may require login (controversial, age-restricted) - report to user\n\nIf content extraction fails:\n\nX.com layout may have changed - check references/selectors.md\nProvide raw snapshot to user for manual review\nReport which fields were successfully extracted\nCommon Selectors\n\nSee references/selectors.md for detailed CSS/ARIA selectors used by x.com (updated as layout changes).\n\nLimitations\nNo credentials: Cannot access protected tweets, DMs, or login-required content\nRate limiting: X.com may block excessive automated requests\nLayout changes: Selectors may break if X updates their HTML structure\nDynamic content: Some content (comments, threads) may load lazily\nExamples\n\nExtract single tweet:\n\nUser: \"Extract this tweet: https://x.com/vista8/status/2019651804062241077\"\nAgent: [Opens browser, captures snapshot, formats markdown output]\n\n\nExtract with media download:\n\nUser: \"Get the tweet text and download all images from https://x.com/user/status/123\"\nAgent: [Extracts content, downloads images to ./downloads/, reports paths]\n\n\nThread extraction:\n\nUser: \"Extract this thread: https://x.com/user/status/456\"\nAgent: [Detects thread, extracts all tweets in sequence, formats as numbered list]"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/chunhualiao/x-extract",
    "publisherUrl": "https://clawhub.ai/chunhualiao/x-extract",
    "owner": "chunhualiao",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/x-extract",
    "downloadUrl": "https://openagent3.xyz/downloads/x-extract",
    "agentUrl": "https://openagent3.xyz/skills/x-extract/agent",
    "manifestUrl": "https://openagent3.xyz/skills/x-extract/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/x-extract/agent.md"
  }
}