{
  "schemaVersion": "1.0",
  "item": {
    "slug": "pdf-cn",
    "name": "Pdf Cn",
    "source": "tencent",
    "type": "skill",
    "category": "数据分析",
    "sourceUrl": "https://clawhub.ai/guohongbin-git/pdf-cn",
    "canonicalUrl": "https://clawhub.ai/guohongbin-git/pdf-cn",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/pdf-cn",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=pdf-cn",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "LICENSE.txt",
      "SKILL.md",
      "_meta.json",
      "forms.md",
      "reference.md",
      "scripts/check_bounding_boxes.py"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/pdf-cn"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/pdf-cn",
    "agentPageUrl": "https://openagent3.xyz/skills/pdf-cn/agent",
    "manifestUrl": "https://openagent3.xyz/skills/pdf-cn/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/pdf-cn/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Overview",
        "body": "This guide covers essential PDF processing operations using Python libraries and command-line tools. For advanced features, JavaScript libraries, and detailed examples, see REFERENCE.md. If you need to fill out a PDF form, read FORMS.md and follow its instructions."
      },
      {
        "title": "Quick Start",
        "body": "from pypdf import PdfReader, PdfWriter\n\n# Read a PDF\nreader = PdfReader(\"document.pdf\")\nprint(f\"Pages: {len(reader.pages)}\")\n\n# Extract text\ntext = \"\"\nfor page in reader.pages:\n    text += page.extract_text()"
      },
      {
        "title": "pypdf - Basic Operations",
        "body": "Merge PDFs\n\nfrom pypdf import PdfWriter, PdfReader\n\nwriter = PdfWriter()\nfor pdf_file in [\"doc1.pdf\", \"doc2.pdf\", \"doc3.pdf\"]:\n    reader = PdfReader(pdf_file)\n    for page in reader.pages:\n        writer.add_page(page)\n\nwith open(\"merged.pdf\", \"wb\") as output:\n    writer.write(output)\n\nSplit PDF\n\nreader = PdfReader(\"input.pdf\")\nfor i, page in enumerate(reader.pages):\n    writer = PdfWriter()\n    writer.add_page(page)\n    with open(f\"page_{i+1}.pdf\", \"wb\") as output:\n        writer.write(output)\n\nExtract Metadata\n\nreader = PdfReader(\"document.pdf\")\nmeta = reader.metadata\nprint(f\"Title: {meta.title}\")\nprint(f\"Author: {meta.author}\")\nprint(f\"Subject: {meta.subject}\")\nprint(f\"Creator: {meta.creator}\")\n\nRotate Pages\n\nreader = PdfReader(\"input.pdf\")\nwriter = PdfWriter()\n\npage = reader.pages[0]\npage.rotate(90)  # Rotate 90 degrees clockwise\nwriter.add_page(page)\n\nwith open(\"rotated.pdf\", \"wb\") as output:\n    writer.write(output)"
      },
      {
        "title": "pdfplumber - Text and Table Extraction",
        "body": "Extract Text with Layout\n\nimport pdfplumber\n\nwith pdfplumber.open(\"document.pdf\") as pdf:\n    for page in pdf.pages:\n        text = page.extract_text()\n        print(text)\n\nExtract Tables\n\nwith pdfplumber.open(\"document.pdf\") as pdf:\n    for i, page in enumerate(pdf.pages):\n        tables = page.extract_tables()\n        for j, table in enumerate(tables):\n            print(f\"Table {j+1} on page {i+1}:\")\n            for row in table:\n                print(row)\n\nAdvanced Table Extraction\n\nimport pandas as pd\n\nwith pdfplumber.open(\"document.pdf\") as pdf:\n    all_tables = []\n    for page in pdf.pages:\n        tables = page.extract_tables()\n        for table in tables:\n            if table:  # Check if table is not empty\n                df = pd.DataFrame(table[1:], columns=table[0])\n                all_tables.append(df)\n\n# Combine all tables\nif all_tables:\n    combined_df = pd.concat(all_tables, ignore_index=True)\n    combined_df.to_excel(\"extracted_tables.xlsx\", index=False)"
      },
      {
        "title": "reportlab - Create PDFs",
        "body": "Basic PDF Creation\n\nfrom reportlab.lib.pagesizes import letter\nfrom reportlab.pdfgen import canvas\n\nc = canvas.Canvas(\"hello.pdf\", pagesize=letter)\nwidth, height = letter\n\n# Add text\nc.drawString(100, height - 100, \"Hello World!\")\nc.drawString(100, height - 120, \"This is a PDF created with reportlab\")\n\n# Add a line\nc.line(100, height - 140, 400, height - 140)\n\n# Save\nc.save()\n\nCreate PDF with Multiple Pages\n\nfrom reportlab.lib.pagesizes import letter\nfrom reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak\nfrom reportlab.lib.styles import getSampleStyleSheet\n\ndoc = SimpleDocTemplate(\"report.pdf\", pagesize=letter)\nstyles = getSampleStyleSheet()\nstory = []\n\n# Add content\ntitle = Paragraph(\"Report Title\", styles['Title'])\nstory.append(title)\nstory.append(Spacer(1, 12))\n\nbody = Paragraph(\"This is the body of the report. \" * 20, styles['Normal'])\nstory.append(body)\nstory.append(PageBreak())\n\n# Page 2\nstory.append(Paragraph(\"Page 2\", styles['Heading1']))\nstory.append(Paragraph(\"Content for page 2\", styles['Normal']))\n\n# Build PDF\ndoc.build(story)\n\nSubscripts and Superscripts\n\nIMPORTANT: Never use Unicode subscript/superscript characters (₀₁₂₃₄₅₆₇₈₉, ⁰¹²³⁴⁵⁶⁷⁸⁹) in ReportLab PDFs. The built-in fonts do not include these glyphs, causing them to render as solid black boxes.\n\nInstead, use ReportLab's XML markup tags in Paragraph objects:\n\nfrom reportlab.platypus import Paragraph\nfrom reportlab.lib.styles import getSampleStyleSheet\n\nstyles = getSampleStyleSheet()\n\n# Subscripts: use <sub> tag\nchemical = Paragraph(\"H<sub>2</sub>O\", styles['Normal'])\n\n# Superscripts: use <super> tag\nsquared = Paragraph(\"x<super>2</super> + y<super>2</super>\", styles['Normal'])\n\nFor canvas-drawn text (not Paragraph objects), manually adjust font the size and position rather than using Unicode subscripts/superscripts."
      },
      {
        "title": "pdftotext (poppler-utils)",
        "body": "# Extract text\npdftotext input.pdf output.txt\n\n# Extract text preserving layout\npdftotext -layout input.pdf output.txt\n\n# Extract specific pages\npdftotext -f 1 -l 5 input.pdf output.txt  # Pages 1-5"
      },
      {
        "title": "qpdf",
        "body": "# Merge PDFs\nqpdf --empty --pages file1.pdf file2.pdf -- merged.pdf\n\n# Split pages\nqpdf input.pdf --pages . 1-5 -- pages1-5.pdf\nqpdf input.pdf --pages . 6-10 -- pages6-10.pdf\n\n# Rotate pages\nqpdf input.pdf output.pdf --rotate=+90:1  # Rotate page 1 by 90 degrees\n\n# Remove password\nqpdf --password=mypassword --decrypt encrypted.pdf decrypted.pdf"
      },
      {
        "title": "pdftk (if available)",
        "body": "# Merge\npdftk file1.pdf file2.pdf cat output merged.pdf\n\n# Split\npdftk input.pdf burst\n\n# Rotate\npdftk input.pdf rotate 1east output rotated.pdf"
      },
      {
        "title": "Extract Text from Scanned PDFs",
        "body": "# Requires: pip install pytesseract pdf2image\nimport pytesseract\nfrom pdf2image import convert_from_path\n\n# Convert PDF to images\nimages = convert_from_path('scanned.pdf')\n\n# OCR each page\ntext = \"\"\nfor i, image in enumerate(images):\n    text += f\"Page {i+1}:\\n\"\n    text += pytesseract.image_to_string(image)\n    text += \"\\n\\n\"\n\nprint(text)"
      },
      {
        "title": "Add Watermark",
        "body": "from pypdf import PdfReader, PdfWriter\n\n# Create watermark (or load existing)\nwatermark = PdfReader(\"watermark.pdf\").pages[0]\n\n# Apply to all pages\nreader = PdfReader(\"document.pdf\")\nwriter = PdfWriter()\n\nfor page in reader.pages:\n    page.merge_page(watermark)\n    writer.add_page(page)\n\nwith open(\"watermarked.pdf\", \"wb\") as output:\n    writer.write(output)"
      },
      {
        "title": "Extract Images",
        "body": "# Using pdfimages (poppler-utils)\npdfimages -j input.pdf output_prefix\n\n# This extracts all images as output_prefix-000.jpg, output_prefix-001.jpg, etc."
      },
      {
        "title": "Password Protection",
        "body": "from pypdf import PdfReader, PdfWriter\n\nreader = PdfReader(\"input.pdf\")\nwriter = PdfWriter()\n\nfor page in reader.pages:\n    writer.add_page(page)\n\n# Add password\nwriter.encrypt(\"userpassword\", \"ownerpassword\")\n\nwith open(\"encrypted.pdf\", \"wb\") as output:\n    writer.write(output)"
      },
      {
        "title": "Quick Reference",
        "body": "TaskBest ToolCommand/CodeMerge PDFspypdfwriter.add_page(page)Split PDFspypdfOne page per fileExtract textpdfplumberpage.extract_text()Extract tablespdfplumberpage.extract_tables()Create PDFsreportlabCanvas or PlatypusCommand line mergeqpdfqpdf --empty --pages ...OCR scanned PDFspytesseractConvert to image firstFill PDF formspdf-lib or pypdf (see FORMS.md)See FORMS.md"
      },
      {
        "title": "Next Steps",
        "body": "For advanced pypdfium2 usage, see REFERENCE.md\nFor JavaScript libraries (pdf-lib), see REFERENCE.md\nIf you need to fill out a PDF form, follow the instructions in FORMS.md\nFor troubleshooting guides, see REFERENCE.md"
      }
    ],
    "body": "PDF Processing Guide\nOverview\n\nThis guide covers essential PDF processing operations using Python libraries and command-line tools. For advanced features, JavaScript libraries, and detailed examples, see REFERENCE.md. If you need to fill out a PDF form, read FORMS.md and follow its instructions.\n\nQuick Start\nfrom pypdf import PdfReader, PdfWriter\n\n# Read a PDF\nreader = PdfReader(\"document.pdf\")\nprint(f\"Pages: {len(reader.pages)}\")\n\n# Extract text\ntext = \"\"\nfor page in reader.pages:\n    text += page.extract_text()\n\nPython Libraries\npypdf - Basic Operations\nMerge PDFs\nfrom pypdf import PdfWriter, PdfReader\n\nwriter = PdfWriter()\nfor pdf_file in [\"doc1.pdf\", \"doc2.pdf\", \"doc3.pdf\"]:\n    reader = PdfReader(pdf_file)\n    for page in reader.pages:\n        writer.add_page(page)\n\nwith open(\"merged.pdf\", \"wb\") as output:\n    writer.write(output)\n\nSplit PDF\nreader = PdfReader(\"input.pdf\")\nfor i, page in enumerate(reader.pages):\n    writer = PdfWriter()\n    writer.add_page(page)\n    with open(f\"page_{i+1}.pdf\", \"wb\") as output:\n        writer.write(output)\n\nExtract Metadata\nreader = PdfReader(\"document.pdf\")\nmeta = reader.metadata\nprint(f\"Title: {meta.title}\")\nprint(f\"Author: {meta.author}\")\nprint(f\"Subject: {meta.subject}\")\nprint(f\"Creator: {meta.creator}\")\n\nRotate Pages\nreader = PdfReader(\"input.pdf\")\nwriter = PdfWriter()\n\npage = reader.pages[0]\npage.rotate(90)  # Rotate 90 degrees clockwise\nwriter.add_page(page)\n\nwith open(\"rotated.pdf\", \"wb\") as output:\n    writer.write(output)\n\npdfplumber - Text and Table Extraction\nExtract Text with Layout\nimport pdfplumber\n\nwith pdfplumber.open(\"document.pdf\") as pdf:\n    for page in pdf.pages:\n        text = page.extract_text()\n        print(text)\n\nExtract Tables\nwith pdfplumber.open(\"document.pdf\") as pdf:\n    for i, page in enumerate(pdf.pages):\n        tables = page.extract_tables()\n        for j, table in enumerate(tables):\n            print(f\"Table {j+1} on page {i+1}:\")\n            for row in table:\n                print(row)\n\nAdvanced Table Extraction\nimport pandas as pd\n\nwith pdfplumber.open(\"document.pdf\") as pdf:\n    all_tables = []\n    for page in pdf.pages:\n        tables = page.extract_tables()\n        for table in tables:\n            if table:  # Check if table is not empty\n                df = pd.DataFrame(table[1:], columns=table[0])\n                all_tables.append(df)\n\n# Combine all tables\nif all_tables:\n    combined_df = pd.concat(all_tables, ignore_index=True)\n    combined_df.to_excel(\"extracted_tables.xlsx\", index=False)\n\nreportlab - Create PDFs\nBasic PDF Creation\nfrom reportlab.lib.pagesizes import letter\nfrom reportlab.pdfgen import canvas\n\nc = canvas.Canvas(\"hello.pdf\", pagesize=letter)\nwidth, height = letter\n\n# Add text\nc.drawString(100, height - 100, \"Hello World!\")\nc.drawString(100, height - 120, \"This is a PDF created with reportlab\")\n\n# Add a line\nc.line(100, height - 140, 400, height - 140)\n\n# Save\nc.save()\n\nCreate PDF with Multiple Pages\nfrom reportlab.lib.pagesizes import letter\nfrom reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak\nfrom reportlab.lib.styles import getSampleStyleSheet\n\ndoc = SimpleDocTemplate(\"report.pdf\", pagesize=letter)\nstyles = getSampleStyleSheet()\nstory = []\n\n# Add content\ntitle = Paragraph(\"Report Title\", styles['Title'])\nstory.append(title)\nstory.append(Spacer(1, 12))\n\nbody = Paragraph(\"This is the body of the report. \" * 20, styles['Normal'])\nstory.append(body)\nstory.append(PageBreak())\n\n# Page 2\nstory.append(Paragraph(\"Page 2\", styles['Heading1']))\nstory.append(Paragraph(\"Content for page 2\", styles['Normal']))\n\n# Build PDF\ndoc.build(story)\n\nSubscripts and Superscripts\n\nIMPORTANT: Never use Unicode subscript/superscript characters (₀₁₂₃₄₅₆₇₈₉, ⁰¹²³⁴⁵⁶⁷⁸⁹) in ReportLab PDFs. The built-in fonts do not include these glyphs, causing them to render as solid black boxes.\n\nInstead, use ReportLab's XML markup tags in Paragraph objects:\n\nfrom reportlab.platypus import Paragraph\nfrom reportlab.lib.styles import getSampleStyleSheet\n\nstyles = getSampleStyleSheet()\n\n# Subscripts: use <sub> tag\nchemical = Paragraph(\"H<sub>2</sub>O\", styles['Normal'])\n\n# Superscripts: use <super> tag\nsquared = Paragraph(\"x<super>2</super> + y<super>2</super>\", styles['Normal'])\n\n\nFor canvas-drawn text (not Paragraph objects), manually adjust font the size and position rather than using Unicode subscripts/superscripts.\n\nCommand-Line Tools\npdftotext (poppler-utils)\n# Extract text\npdftotext input.pdf output.txt\n\n# Extract text preserving layout\npdftotext -layout input.pdf output.txt\n\n# Extract specific pages\npdftotext -f 1 -l 5 input.pdf output.txt  # Pages 1-5\n\nqpdf\n# Merge PDFs\nqpdf --empty --pages file1.pdf file2.pdf -- merged.pdf\n\n# Split pages\nqpdf input.pdf --pages . 1-5 -- pages1-5.pdf\nqpdf input.pdf --pages . 6-10 -- pages6-10.pdf\n\n# Rotate pages\nqpdf input.pdf output.pdf --rotate=+90:1  # Rotate page 1 by 90 degrees\n\n# Remove password\nqpdf --password=mypassword --decrypt encrypted.pdf decrypted.pdf\n\npdftk (if available)\n# Merge\npdftk file1.pdf file2.pdf cat output merged.pdf\n\n# Split\npdftk input.pdf burst\n\n# Rotate\npdftk input.pdf rotate 1east output rotated.pdf\n\nCommon Tasks\nExtract Text from Scanned PDFs\n# Requires: pip install pytesseract pdf2image\nimport pytesseract\nfrom pdf2image import convert_from_path\n\n# Convert PDF to images\nimages = convert_from_path('scanned.pdf')\n\n# OCR each page\ntext = \"\"\nfor i, image in enumerate(images):\n    text += f\"Page {i+1}:\\n\"\n    text += pytesseract.image_to_string(image)\n    text += \"\\n\\n\"\n\nprint(text)\n\nAdd Watermark\nfrom pypdf import PdfReader, PdfWriter\n\n# Create watermark (or load existing)\nwatermark = PdfReader(\"watermark.pdf\").pages[0]\n\n# Apply to all pages\nreader = PdfReader(\"document.pdf\")\nwriter = PdfWriter()\n\nfor page in reader.pages:\n    page.merge_page(watermark)\n    writer.add_page(page)\n\nwith open(\"watermarked.pdf\", \"wb\") as output:\n    writer.write(output)\n\nExtract Images\n# Using pdfimages (poppler-utils)\npdfimages -j input.pdf output_prefix\n\n# This extracts all images as output_prefix-000.jpg, output_prefix-001.jpg, etc.\n\nPassword Protection\nfrom pypdf import PdfReader, PdfWriter\n\nreader = PdfReader(\"input.pdf\")\nwriter = PdfWriter()\n\nfor page in reader.pages:\n    writer.add_page(page)\n\n# Add password\nwriter.encrypt(\"userpassword\", \"ownerpassword\")\n\nwith open(\"encrypted.pdf\", \"wb\") as output:\n    writer.write(output)\n\nQuick Reference\nTask\tBest Tool\tCommand/Code\nMerge PDFs\tpypdf\twriter.add_page(page)\nSplit PDFs\tpypdf\tOne page per file\nExtract text\tpdfplumber\tpage.extract_text()\nExtract tables\tpdfplumber\tpage.extract_tables()\nCreate PDFs\treportlab\tCanvas or Platypus\nCommand line merge\tqpdf\tqpdf --empty --pages ...\nOCR scanned PDFs\tpytesseract\tConvert to image first\nFill PDF forms\tpdf-lib or pypdf (see FORMS.md)\tSee FORMS.md\nNext Steps\nFor advanced pypdfium2 usage, see REFERENCE.md\nFor JavaScript libraries (pdf-lib), see REFERENCE.md\nIf you need to fill out a PDF form, follow the instructions in FORMS.md\nFor troubleshooting guides, see REFERENCE.md"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/guohongbin-git/pdf-cn",
    "publisherUrl": "https://clawhub.ai/guohongbin-git/pdf-cn",
    "owner": "guohongbin-git",
    "version": "1.0.1",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/pdf-cn",
    "downloadUrl": "https://openagent3.xyz/downloads/pdf-cn",
    "agentUrl": "https://openagent3.xyz/skills/pdf-cn/agent",
    "manifestUrl": "https://openagent3.xyz/skills/pdf-cn/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/pdf-cn/agent.md"
  }
}