{
  "schemaVersion": "1.0",
  "item": {
    "slug": "mineru",
    "name": "MinerU PDF Parser",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/EasonAI-5589/mineru",
    "canonicalUrl": "https://clawhub.ai/EasonAI-5589/mineru",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/mineru",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=mineru",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/mineru"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/mineru",
    "agentPageUrl": "https://openagent3.xyz/skills/mineru/agent",
    "manifestUrl": "https://openagent3.xyz/skills/mineru/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/mineru/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "📄 MinerU - 文档解析神器",
        "body": "OpenDataLab 出品\n\nPDF/Word/PPT/图片 → 结构化 Markdown，公式表格全保留！"
      },
      {
        "title": "🔗 资源链接",
        "body": "资源链接官网https://mineru.net/API 文档https://mineru.net/apiManage/docsGitHubhttps://github.com/opendatalab/MinerU"
      },
      {
        "title": "支持的文件类型",
        "body": "类型格式📕 PDF论文、书籍、扫描件📝 Word.docx📊 PPT.pptx🖼️ 图片.jpg, .png (OCR)"
      },
      {
        "title": "核心优势",
        "body": "公式完美保留 - LaTeX 格式输出\n表格结构识别 - 复杂表格也能搞定\n多语言 OCR - 中英文混排无压力\n版面分析 - 多栏、图文混排自动处理"
      },
      {
        "title": "认证",
        "body": "# Header 认证\nAuthorization: Bearer {YOUR_API_KEY}"
      },
      {
        "title": "单文件解析",
        "body": "# 1. 提交任务\ncurl -X POST \"https://mineru.net/api/v4/extract/task\" \\\n  -H \"Authorization: Bearer $MINERU_TOKEN\" \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"url\": \"https://arxiv.org/pdf/2410.17247\",\n    \"enable_formula\": true,\n    \"enable_table\": true,\n    \"layout_model\": \"doclayout_yolo\",\n    \"language\": \"en\"\n  }'\n\n# 返回: {\"task_id\": \"xxx\", \"status\": \"pending\"}\n\n# 2. 轮询结果\ncurl \"https://mineru.net/api/v4/extract/task/{task_id}\" \\\n  -H \"Authorization: Bearer $MINERU_TOKEN\"\n\n# 返回: {\"status\": \"done\", \"result\": {...}}"
      },
      {
        "title": "批量解析",
        "body": "# 1. 获取上传 URL\ncurl -X POST \"https://mineru.net/api/v4/file-urls/batch\" \\\n  -H \"Authorization: Bearer $MINERU_TOKEN\" \\\n  -d '{\"file_names\": [\"paper1.pdf\", \"paper2.pdf\"]}'\n\n# 2. 上传文件到返回的 presigned URLs\n\n# 3. 批量提交任务\ncurl -X POST \"https://mineru.net/api/v4/extract/task/batch\" \\\n  -H \"Authorization: Bearer $MINERU_TOKEN\" \\\n  -d '{\"files\": [{\"url\": \"...\", \"name\": \"paper1.pdf\"}, ...]}'"
      },
      {
        "title": "⚙️ 参数说明",
        "body": "参数类型说明urlstring文件 URL (支持 http/https)enable_formulabool启用公式识别 (默认 true)enable_tablebool启用表格识别 (默认 true)layout_modelstringdoclayout_yolo (快) / layoutlmv3 (准)languagestringen / ch / automodel_versionstringpipeline / vlm / MinerU-HTML"
      },
      {
        "title": "模型版本对比",
        "body": "版本速度准确度适用场景pipeline⚡ 快高常规文档vlm🐢 慢最高复杂版面MinerU-HTML⚡ 快高网页样式输出"
      },
      {
        "title": "📂 输出结构",
        "body": "解析完成后下载的 ZIP 包含：\n\noutput/\n├── full.md           # 完整 Markdown\n├── content_list.json # 结构化内容\n├── images/           # 提取的图片\n└── layout.json       # 版面分析结果"
      },
      {
        "title": "论文解析流程",
        "body": "# 1. 创建论文目录\nmkdir -p \"./paper-reading/[CVPR 2025] NewPaper\"\ncd \"./paper-reading/[CVPR 2025] NewPaper\"\n\n# 2. 提交解析任务\nTASK_ID=$(curl -s -X POST \"https://mineru.net/api/v4/extract/task\" \\\n  -H \"Authorization: Bearer $MINERU_TOKEN\" \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"url\": \"https://arxiv.org/pdf/XXXX.XXXXX\"}' | jq -r '.task_id')\n\n# 3. 等待完成 & 下载\n# (轮询 status 直到 done，然后下载 result.zip)\n\n# 4. 解压\nunzip result.zip -d ."
      },
      {
        "title": "环境变量",
        "body": "在 ~/.bashrc 或 OpenClaw config 中设置：\n\nexport MINERU_TOKEN=\"your_api_key_here\""
      },
      {
        "title": "⚠️ 限制",
        "body": "限制数值单文件大小200 MB单文件页数600 页并发任务数根据套餐"
      },
      {
        "title": "💡 使用技巧",
        "body": "arXiv 论文直接用 URL\nhttps://arxiv.org/pdf/2410.17247\n\n\n\n中文论文用 language: ch\n\n\n复杂表格用 vlm 模型\n\n\n批量处理省 quota\n\n一次提交多个文件，比单个提交更高效"
      },
      {
        "title": "📚 相关资源",
        "body": "Paper Banana Skill - 论文配图生成\n\n论文解析不再手动复制粘贴！📖"
      }
    ],
    "body": "📄 MinerU - 文档解析神器\n\nOpenDataLab 出品\n\nPDF/Word/PPT/图片 → 结构化 Markdown，公式表格全保留！\n\n🔗 资源链接\n资源\t链接\n官网\thttps://mineru.net/\nAPI 文档\thttps://mineru.net/apiManage/docs\nGitHub\thttps://github.com/opendatalab/MinerU\n🎯 功能\n支持的文件类型\n类型\t格式\n📕 PDF\t论文、书籍、扫描件\n📝 Word\t.docx\n📊 PPT\t.pptx\n🖼️ 图片\t.jpg, .png (OCR)\n核心优势\n公式完美保留 - LaTeX 格式输出\n表格结构识别 - 复杂表格也能搞定\n多语言 OCR - 中英文混排无压力\n版面分析 - 多栏、图文混排自动处理\n🚀 API 使用 (v4)\n认证\n# Header 认证\nAuthorization: Bearer {YOUR_API_KEY}\n\n单文件解析\n# 1. 提交任务\ncurl -X POST \"https://mineru.net/api/v4/extract/task\" \\\n  -H \"Authorization: Bearer $MINERU_TOKEN\" \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"url\": \"https://arxiv.org/pdf/2410.17247\",\n    \"enable_formula\": true,\n    \"enable_table\": true,\n    \"layout_model\": \"doclayout_yolo\",\n    \"language\": \"en\"\n  }'\n\n# 返回: {\"task_id\": \"xxx\", \"status\": \"pending\"}\n\n# 2. 轮询结果\ncurl \"https://mineru.net/api/v4/extract/task/{task_id}\" \\\n  -H \"Authorization: Bearer $MINERU_TOKEN\"\n\n# 返回: {\"status\": \"done\", \"result\": {...}}\n\n批量解析\n# 1. 获取上传 URL\ncurl -X POST \"https://mineru.net/api/v4/file-urls/batch\" \\\n  -H \"Authorization: Bearer $MINERU_TOKEN\" \\\n  -d '{\"file_names\": [\"paper1.pdf\", \"paper2.pdf\"]}'\n\n# 2. 上传文件到返回的 presigned URLs\n\n# 3. 批量提交任务\ncurl -X POST \"https://mineru.net/api/v4/extract/task/batch\" \\\n  -H \"Authorization: Bearer $MINERU_TOKEN\" \\\n  -d '{\"files\": [{\"url\": \"...\", \"name\": \"paper1.pdf\"}, ...]}'\n\n⚙️ 参数说明\n参数\t类型\t说明\nurl\tstring\t文件 URL (支持 http/https)\nenable_formula\tbool\t启用公式识别 (默认 true)\nenable_table\tbool\t启用表格识别 (默认 true)\nlayout_model\tstring\tdoclayout_yolo (快) / layoutlmv3 (准)\nlanguage\tstring\ten / ch / auto\nmodel_version\tstring\tpipeline / vlm / MinerU-HTML\n模型版本对比\n版本\t速度\t准确度\t适用场景\npipeline\t⚡ 快\t高\t常规文档\nvlm\t🐢 慢\t最高\t复杂版面\nMinerU-HTML\t⚡ 快\t高\t网页样式输出\n📂 输出结构\n\n解析完成后下载的 ZIP 包含：\n\noutput/\n├── full.md           # 完整 Markdown\n├── content_list.json # 结构化内容\n├── images/           # 提取的图片\n└── layout.json       # 版面分析结果\n\n🔧 OpenClaw 集成工作流\n论文解析流程\n# 1. 创建论文目录\nmkdir -p \"./paper-reading/[CVPR 2025] NewPaper\"\ncd \"./paper-reading/[CVPR 2025] NewPaper\"\n\n# 2. 提交解析任务\nTASK_ID=$(curl -s -X POST \"https://mineru.net/api/v4/extract/task\" \\\n  -H \"Authorization: Bearer $MINERU_TOKEN\" \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"url\": \"https://arxiv.org/pdf/XXXX.XXXXX\"}' | jq -r '.task_id')\n\n# 3. 等待完成 & 下载\n# (轮询 status 直到 done，然后下载 result.zip)\n\n# 4. 解压\nunzip result.zip -d .\n\n环境变量\n\n在 ~/.bashrc 或 OpenClaw config 中设置：\n\nexport MINERU_TOKEN=\"your_api_key_here\"\n\n⚠️ 限制\n限制\t数值\n单文件大小\t200 MB\n单文件页数\t600 页\n并发任务数\t根据套餐\n💡 使用技巧\n\narXiv 论文直接用 URL\n\nhttps://arxiv.org/pdf/2410.17247\n\n\n中文论文用 language: ch\n\n复杂表格用 vlm 模型\n\n批量处理省 quota\n\n一次提交多个文件，比单个提交更高效\n📚 相关资源\nPaper Banana Skill - 论文配图生成\n\n论文解析不再手动复制粘贴！📖"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/EasonAI-5589/mineru",
    "publisherUrl": "https://clawhub.ai/EasonAI-5589/mineru",
    "owner": "EasonAI-5589",
    "version": "1.0.1",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/mineru",
    "downloadUrl": "https://openagent3.xyz/downloads/mineru",
    "agentUrl": "https://openagent3.xyz/skills/mineru/agent",
    "manifestUrl": "https://openagent3.xyz/skills/mineru/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/mineru/agent.md"
  }
}