{
  "schemaVersion": "1.0",
  "item": {
    "slug": "haibo-jina-reader",
    "name": "Jina Reader",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/smile-xuc/haibo-jina-reader",
    "canonicalUrl": "https://clawhub.ai/smile-xuc/haibo-jina-reader",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/haibo-jina-reader",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=haibo-jina-reader",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md",
      "scripts/jina-reader.py",
      "scripts/jina-reader.sh"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/haibo-jina-reader"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/haibo-jina-reader",
    "agentPageUrl": "https://openagent3.xyz/skills/haibo-jina-reader/agent",
    "manifestUrl": "https://openagent3.xyz/skills/haibo-jina-reader/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/haibo-jina-reader/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Overview",
        "body": "Jina Reader provides clean markdown extraction from any URL, bypassing HTML complexity, JavaScript rendering, and many paywalls. It returns structured text content with metadata (title, URL, published time) that's perfect for AI analysis."
      },
      {
        "title": "Extract markdown content",
        "body": "scripts/jina-reader.py <url>"
      },
      {
        "title": "Extract with JSON metadata",
        "body": "scripts/jina-reader.py <url> --format json"
      },
      {
        "title": "Save to file",
        "body": "scripts/jina-reader.py <url> -o output.md"
      },
      {
        "title": "1. Basic Extraction",
        "body": "Extract clean markdown from any URL:\n\nscripts/jina-reader.py https://example.com/article\n\nReturns: Full markdown content including title, metadata headers, and structured text.\n\nUse when: You need readable text from a webpage for summarization, analysis, or content processing."
      },
      {
        "title": "2. JSON Format",
        "body": "Get structured data with metadata:\n\nscripts/jina-reader.py https://example.com/article --format json\n\nReturns:\n\n{\n  \"status\": \"success\",\n  \"metadata\": {\n    \"title\": \"Article Title\",\n    \"url\": \"https://example.com/article\",\n    \"published\": \"Mon, 10 Feb 2026 12:00:00 GMT\"\n  },\n  \"content\": \"Markdown content...\"\n}\n\nUse when: You need programmatic access to metadata or want to integrate with other tools."
      },
      {
        "title": "3. Shell Script Quick Access",
        "body": "For simple one-liners:\n\nscripts/jina-reader.sh https://example.com/article\n\nReturns: Raw markdown content directly to stdout.\n\nUse when: Quick extraction without arguments or when piping to other commands."
      },
      {
        "title": "Combining with Search",
        "body": "When using tavily-search, web_search, or searxng skills:\n\nGet search results with relevant URLs\nExtract content from top results using jina-reader\nProcess and summarize the extracted content\n\n# Example workflow\nURL=\"https://example.com/article\"\nscripts/jina-reader.py \"$URL\" --format json | jq -r '.content'"
      },
      {
        "title": "Batch Processing",
        "body": "Extract from multiple URLs:\n\nfor url in $(cat urls.txt); do\n  scripts/jina-reader.py \"$url\" -o \"output/$(basename $url).md\"\ndone"
      },
      {
        "title": "Content Analysis",
        "body": "Pipe extracted content to analysis tools:\n\nscripts/jina-reader.py https://example.com/article | wc -w\nscripts/jina-reader.py https://example.com/article | grep -i \"keyword\""
      },
      {
        "title": "Python Script (jina-reader.py)",
        "body": "url (required): The URL to extract content from\n-f, --format: Output format - markdown or json (default: markdown)\n-t, --timeout: Request timeout in seconds (default: 30)\n-o, --output: Save output to file instead of stdout"
      },
      {
        "title": "Shell Script (jina-reader.sh)",
        "body": "url (required): The URL to extract content from"
      },
      {
        "title": "Limitations",
        "body": "Timeout: Default 30 seconds. Increase with -t for slow-loading pages\nRate limits: Jina Reader API has rate limits. Use batching strategically\nDynamic content: Can't extract content generated by client-side JavaScript after page load\nAuthentication: Can't access pages requiring login or special headers"
      },
      {
        "title": "Timeout errors",
        "body": "scripts/jina-reader.py <url> -t 60  # Increase timeout"
      },
      {
        "title": "Invalid URLs",
        "body": "The tool auto-prepends https:// if missing. Use fully qualified URLs for reliability."
      },
      {
        "title": "Empty content",
        "body": "Some pages may block scraping. Try the shell script as fallback, or verify the URL is accessible."
      },
      {
        "title": "scripts/jina-reader.py",
        "body": "Full-featured Python tool with JSON output, metadata extraction, and file saving."
      },
      {
        "title": "scripts/jina-reader.sh",
        "body": "Lightweight shell script for quick markdown extraction."
      }
    ],
    "body": "Jina Reader\nOverview\n\nJina Reader provides clean markdown extraction from any URL, bypassing HTML complexity, JavaScript rendering, and many paywalls. It returns structured text content with metadata (title, URL, published time) that's perfect for AI analysis.\n\nQuick Start\nExtract markdown content\nscripts/jina-reader.py <url>\n\nExtract with JSON metadata\nscripts/jina-reader.py <url> --format json\n\nSave to file\nscripts/jina-reader.py <url> -o output.md\n\nCore Operations\n1. Basic Extraction\n\nExtract clean markdown from any URL:\n\nscripts/jina-reader.py https://example.com/article\n\n\nReturns: Full markdown content including title, metadata headers, and structured text.\n\nUse when: You need readable text from a webpage for summarization, analysis, or content processing.\n\n2. JSON Format\n\nGet structured data with metadata:\n\nscripts/jina-reader.py https://example.com/article --format json\n\n\nReturns:\n\n{\n  \"status\": \"success\",\n  \"metadata\": {\n    \"title\": \"Article Title\",\n    \"url\": \"https://example.com/article\",\n    \"published\": \"Mon, 10 Feb 2026 12:00:00 GMT\"\n  },\n  \"content\": \"Markdown content...\"\n}\n\n\nUse when: You need programmatic access to metadata or want to integrate with other tools.\n\n3. Shell Script Quick Access\n\nFor simple one-liners:\n\nscripts/jina-reader.sh https://example.com/article\n\n\nReturns: Raw markdown content directly to stdout.\n\nUse when: Quick extraction without arguments or when piping to other commands.\n\nUsage Patterns\nCombining with Search\n\nWhen using tavily-search, web_search, or searxng skills:\n\nGet search results with relevant URLs\nExtract content from top results using jina-reader\nProcess and summarize the extracted content\n# Example workflow\nURL=\"https://example.com/article\"\nscripts/jina-reader.py \"$URL\" --format json | jq -r '.content'\n\nBatch Processing\n\nExtract from multiple URLs:\n\nfor url in $(cat urls.txt); do\n  scripts/jina-reader.py \"$url\" -o \"output/$(basename $url).md\"\ndone\n\nContent Analysis\n\nPipe extracted content to analysis tools:\n\nscripts/jina-reader.py https://example.com/article | wc -w\nscripts/jina-reader.py https://example.com/article | grep -i \"keyword\"\n\nOptions\nPython Script (jina-reader.py)\nurl (required): The URL to extract content from\n-f, --format: Output format - markdown or json (default: markdown)\n-t, --timeout: Request timeout in seconds (default: 30)\n-o, --output: Save output to file instead of stdout\nShell Script (jina-reader.sh)\nurl (required): The URL to extract content from\nLimitations\nTimeout: Default 30 seconds. Increase with -t for slow-loading pages\nRate limits: Jina Reader API has rate limits. Use batching strategically\nDynamic content: Can't extract content generated by client-side JavaScript after page load\nAuthentication: Can't access pages requiring login or special headers\nTroubleshooting\nTimeout errors\nscripts/jina-reader.py <url> -t 60  # Increase timeout\n\nInvalid URLs\n\nThe tool auto-prepends https:// if missing. Use fully qualified URLs for reliability.\n\nEmpty content\n\nSome pages may block scraping. Try the shell script as fallback, or verify the URL is accessible.\n\nResources\nscripts/jina-reader.py\n\nFull-featured Python tool with JSON output, metadata extraction, and file saving.\n\nscripts/jina-reader.sh\n\nLightweight shell script for quick markdown extraction."
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/smile-xuc/haibo-jina-reader",
    "publisherUrl": "https://clawhub.ai/smile-xuc/haibo-jina-reader",
    "owner": "smile-xuc",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/haibo-jina-reader",
    "downloadUrl": "https://openagent3.xyz/downloads/haibo-jina-reader",
    "agentUrl": "https://openagent3.xyz/skills/haibo-jina-reader/agent",
    "manifestUrl": "https://openagent3.xyz/skills/haibo-jina-reader/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/haibo-jina-reader/agent.md"
  }
}