{
  "schemaVersion": "1.0",
  "item": {
    "slug": "deepread-ocr",
    "name": "DeepRead OCR",
    "source": "tencent",
    "type": "skill",
    "category": "AI 智能",
    "sourceUrl": "https://clawhub.ai/uday390/deepread-ocr",
    "canonicalUrl": "https://clawhub.ai/uday390/deepread-ocr",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/deepread-ocr",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=deepread-ocr",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md",
      "package.json"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/deepread-ocr"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/deepread-ocr",
    "agentPageUrl": "https://openagent3.xyz/skills/deepread-ocr/agent",
    "manifestUrl": "https://openagent3.xyz/skills/deepread-ocr/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/deepread-ocr/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "DeepRead - Production OCR API",
        "body": "DeepRead is an AI-native OCR platform that turns documents into high-accuracy data in minutes. Using multi-model consensus, DeepRead achieves 97%+ accuracy and flags only uncertain fields for Human-in-the-Loop (HIL) review—reducing manual work from 100% to 5-10%. Zero prompt engineering required."
      },
      {
        "title": "What This Skill Does",
        "body": "DeepRead is a production-grade document processing API that gives you high-accuracy structured data output in minutes with human review flagging so manual review is limited to the flagged exceptions\n\nCore Features:\n\nText Extraction: Convert PDFs and images to clean markdown\nStructured Data: Extract JSON fields with confidence scores\nHIL Interface: Built-in Human-in-the-Loop review — uncertain fields are flagged (hil_flag) so only exceptions need manual review\nMulti-Pass Processing: Multiple validation passes for maximum accuracy\nMulti-Model Consensus: Cross-validation between models for reliability\nFree Tier: 2,000 pages/month (no credit card required)"
      },
      {
        "title": "1. Get Your API Key",
        "body": "Sign up and create an API key:\n\n# Visit the dashboard\nhttps://www.deepread.tech/dashboard\n\n# Or use this direct link\nhttps://www.deepread.tech/dashboard/?utm_source=clawdhub\n\nSave your API key:\n\nexport DEEPREAD_API_KEY=\"sk_live_your_key_here\""
      },
      {
        "title": "2. Clawdbot Configuration (Optional)",
        "body": "Add to your clawdbot.config.json5:\n\n{\n  skills: {\n    entries: {\n      \"deepread\": {\n        enabled: true\n        // API key is read from DEEPREAD_API_KEY environment variable\n        // Do NOT hardcode your API key here\n      }\n    }\n  }\n}"
      },
      {
        "title": "3. Process Your First Document",
        "body": "Option A: With Webhook (Recommended)\n\n# Upload PDF with webhook notification\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@document.pdf\" \\\n  -F \"webhook_url=https://your-app.com/webhooks/deepread\"\n\n# Returns immediately\n{\n  \"id\": \"550e8400-e29b-41d4-a716-446655440000\",\n  \"status\": \"queued\"\n}\n\n# Your webhook receives results when processing completes (2-5 minutes)\n\nOption B: Poll for Results\n\n# Upload PDF without webhook\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@document.pdf\"\n\n# Returns immediately\n{\n  \"id\": \"550e8400-e29b-41d4-a716-446655440000\",\n  \"status\": \"queued\"\n}\n\n# Poll until completed\ncurl https://api.deepread.tech/v1/jobs/550e8400-e29b-41d4-a716-446655440000 \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\""
      },
      {
        "title": "Basic OCR (Text Only)",
        "body": "Extract text as clean markdown:\n\n# With webhook (recommended)\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\" \\\n  -F \"webhook_url=https://your-app.com/webhook\"\n\n# OR poll for completion\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\"\n\n# Then poll\ncurl https://api.deepread.tech/v1/jobs/JOB_ID \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\"\n\nResponse when completed:\n\n{\n  \"id\": \"550e8400-...\",\n  \"status\": \"completed\",\n  \"result\": {\n    \"text\": \"# INVOICE\\n\\n**Vendor:** Acme Corp\\n**Total:** $1,250.00...\"\n  }\n}"
      },
      {
        "title": "Structured Data Extraction",
        "body": "Extract specific fields with confidence scoring:\n\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\" \\\n  -F 'schema={\n    \"type\": \"object\",\n    \"properties\": {\n      \"vendor\": {\n        \"type\": \"string\",\n        \"description\": \"Vendor company name\"\n      },\n      \"total\": {\n        \"type\": \"number\",\n        \"description\": \"Total invoice amount\"\n      },\n      \"invoice_date\": {\n        \"type\": \"string\",\n        \"description\": \"Invoice date in MM/DD/YYYY format\"\n      }\n    }\n  }'\n\nResponse includes confidence flags:\n\n{\n  \"status\": \"completed\",\n  \"result\": {\n    \"text\": \"# INVOICE\\n\\n**Vendor:** Acme Corp...\",\n    \"data\": {\n      \"vendor\": {\n        \"value\": \"Acme Corp\",\n        \"hil_flag\": false,\n        \"found_on_page\": 1\n      },\n      \"total\": {\n        \"value\": 1250.00,\n        \"hil_flag\": false,\n        \"found_on_page\": 1\n      },\n      \"invoice_date\": {\n        \"value\": \"2024-10-??\",\n        \"hil_flag\": true,\n        \"reason\": \"Date partially obscured\",\n        \"found_on_page\": 1\n      }\n    },\n    \"metadata\": {\n      \"fields_requiring_review\": 1,\n      \"total_fields\": 3,\n      \"review_percentage\": 33.3\n    }\n  }\n}"
      },
      {
        "title": "Complex Schemas (Nested Data)",
        "body": "Extract arrays and nested objects:\n\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\" \\\n  -F 'schema={\n    \"type\": \"object\",\n    \"properties\": {\n      \"vendor\": {\"type\": \"string\"},\n      \"total\": {\"type\": \"number\"},\n      \"line_items\": {\n        \"type\": \"array\",\n        \"items\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"description\": {\"type\": \"string\"},\n            \"quantity\": {\"type\": \"number\"},\n            \"price\": {\"type\": \"number\"}\n          }\n        }\n      }\n    }\n  }'"
      },
      {
        "title": "Page-by-Page Breakdown",
        "body": "Get per-page OCR results with quality flags:\n\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@contract.pdf\" \\\n  -F \"include_pages=true\"\n\nResponse:\n\n{\n  \"result\": {\n    \"text\": \"Combined text from all pages...\",\n    \"pages\": [\n      {\n        \"page_number\": 1,\n        \"text\": \"# Contract Agreement\\n\\n...\",\n        \"hil_flag\": false\n      },\n      {\n        \"page_number\": 2,\n        \"text\": \"Terms and C??diti??s...\",\n        \"hil_flag\": true,\n        \"reason\": \"Multiple unrecognized characters\"\n      }\n    ],\n    \"metadata\": {\n      \"pages_requiring_review\": 1,\n      \"total_pages\": 2\n      }\n  }\n}"
      },
      {
        "title": "✅ Use DeepRead For:",
        "body": "Invoice Processing: Extract vendor, totals, line items\nReceipt OCR: Parse merchant, items, totals\nContract Analysis: Extract parties, dates, terms\nForm Digitization: Convert paper forms to structured data\nDocument Workflows: Any process requiring OCR + data extraction\nQuality-Critical Apps: When you need to know which extractions are uncertain"
      },
      {
        "title": "❌ Don't Use For:",
        "body": "Real-time Processing: Processing takes 2-5 minutes (async workflow)\nBatch >2,000 pages/month: Upgrade to PRO or SCALE tier"
      },
      {
        "title": "Multi-Pass Pipeline",
        "body": "PDF → Convert → Rotate Correction → OCR → Multi-Model Validation → Extract → Done\n\nThe pipeline automatically handles:\n\nDocument rotation and orientation correction\nMulti-pass validation for accuracy\nCross-model consensus for reliability\nField-level confidence scoring"
      },
      {
        "title": "Human-in-the-Loop (HIL) Interface",
        "body": "DeepRead includes a built-in Human-in-the-Loop (HIL) review system. The AI compares extracted text to the original image and sets hil_flag on each field:\n\nhil_flag: false = Clear, confident extraction → Auto-process\nhil_flag: true = Uncertain extraction → Routed to human review\n\nHow HIL works:\n\nFields extracted with high confidence are auto-approved\nUncertain fields are flagged with hil_flag: true and a reason\nOnly flagged fields need human review (typically 5-10% of total fields)\nReview flagged fields in DeepRead Preview (preview.deepread.tech) — a dedicated HIL review interface where reviewers can see the original document side-by-side with extracted data, correct flagged fields, and approve results\nOr integrate with your own review queue using the hil_flag data in the API response\n\nAI flags extractions when:\n\nText is handwritten, blurry, or low quality\nMultiple possible interpretations exist\nCharacters are partially visible or unclear\nField not found in document\n\nThis is multimodal AI determination, not rule-based."
      },
      {
        "title": "1. Blueprints (Optimized Schemas)",
        "body": "Create reusable, optimized schemas for specific document types:\n\n# List your blueprints\ncurl https://api.deepread.tech/v1/blueprints \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\"\n\n# Use blueprint instead of inline schema\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\" \\\n  -F \"blueprint_id=660e8400-e29b-41d4-a716-446655440001\"\n\nBenefits:\n\n20-30% accuracy improvement over baseline schemas\nReusable across similar documents\nVersioned with rollback support\n\nHow to create blueprints:\n\n# Create a blueprint from training data\ncurl -X POST https://api.deepread.tech/v1/optimize \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"name\": \"utility_invoice\",\n    \"description\": \"Optimized for utility invoices\",\n    \"document_type\": \"invoice\",\n    \"initial_schema\": {\n      \"type\": \"object\",\n      \"properties\": {\n        \"vendor\": {\"type\": \"string\", \"description\": \"Vendor name\"},\n        \"total\": {\"type\": \"number\", \"description\": \"Total amount\"}\n      }\n    },\n    \"training_documents\": [\"doc1.pdf\", \"doc2.pdf\", \"doc3.pdf\"],\n    \"ground_truth_data\": [\n      {\"vendor\": \"Acme Power\", \"total\": 125.50},\n      {\"vendor\": \"City Electric\", \"total\": 89.25}\n    ],\n    \"target_accuracy\": 95.0,\n    \"max_iterations\": 5\n  }'\n\n# Returns: {\"job_id\": \"...\", \"blueprint_id\": \"...\", \"status\": \"pending\"}\n\n# Check optimization status\ncurl https://api.deepread.tech/v1/blueprints/jobs/JOB_ID \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\"\n\n# Use blueprint (once completed)\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\" \\\n  -F \"blueprint_id=BLUEPRINT_ID\""
      },
      {
        "title": "2. Webhooks (Recommended for Production)",
        "body": "Get notified when processing completes instead of polling:\n\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\" \\\n  -F \"webhook_url=https://your-app.com/webhooks/deepread\"\n\nYour webhook receives this payload when processing completes:\n\n{\n  \"job_id\": \"550e8400-...\",\n  \"status\": \"completed\",\n  \"created_at\": \"2025-01-27T10:00:00Z\",\n  \"completed_at\": \"2025-01-27T10:02:30Z\",\n  \"result\": {\n    \"text\": \"...\",\n    \"data\": {...}\n  },\n  \"preview_url\": \"https://preview.deepread.tech/abc1234\"\n}\n\nBenefits:\n\nNo polling required\nInstant notification when done\nLower latency\nBetter for production workflows"
      },
      {
        "title": "3. Preview (HIL Review Interface)",
        "body": "DeepRead Preview (preview.deepread.tech) is the built-in Human-in-the-Loop review interface. Reviewers can view the original document alongside extracted data, correct flagged fields, and approve results. Preview URLs can also be shared without authentication:\n\n# Request preview URL\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@document.pdf\" \\\n  -F \"include_images=true\"\n\n# Get preview URL in response\n{\n  \"result\": {\n    \"text\": \"...\",\n    \"data\": {...}\n  },\n  \"preview_url\": \"https://preview.deepread.tech/Xy9aB12\"\n}\n\nPublic Preview Endpoint:\n\n# No authentication required\ncurl https://api.deepread.tech/v1/preview/Xy9aB12"
      },
      {
        "title": "Free Tier (No Credit Card)",
        "body": "2,000 pages/month\n10 requests/minute\nFull feature access (OCR + structured extraction + blueprints)"
      },
      {
        "title": "Paid Plans",
        "body": "PRO: 50,000 pages/month, 100 requests/minute @ $99/mo\nSCALE: Custom volume pricing (contact sales)\n\nUpgrade: https://www.deepread.tech/dashboard/billing?utm_source=clawdhub"
      },
      {
        "title": "Rate Limit Headers",
        "body": "Every response includes quota information:\n\nX-RateLimit-Limit: 2000\nX-RateLimit-Remaining: 1847\nX-RateLimit-Used: 153\nX-RateLimit-Reset: 1730419200"
      },
      {
        "title": "1. Use Webhooks for Production",
        "body": "✅ Recommended: Webhook notifications\n\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@document.pdf\" \\\n  -F \"webhook_url=https://your-app.com/webhook\"\n\nOnly use polling if:\n\nTesting/development\nCannot expose a webhook endpoint\nNeed synchronous response"
      },
      {
        "title": "2. Schema Design",
        "body": "✅ Good: Descriptive field descriptions\n\n{\n  \"vendor\": {\n    \"type\": \"string\",\n    \"description\": \"Vendor company name. Usually in header or top-left of invoice.\"\n  }\n}\n\n❌ Bad: No description\n\n{\n  \"vendor\": {\"type\": \"string\"}\n}"
      },
      {
        "title": "3. Polling Strategy (If Needed)",
        "body": "Only if you can't use webhooks, poll every 5-10 seconds:\n\nimport time\nimport requests\n\ndef wait_for_result(job_id, api_key):\n    while True:\n        response = requests.get(\n            f\"https://api.deepread.tech/v1/jobs/{job_id}\",\n            headers={\"X-API-Key\": api_key}\n        )\n        result = response.json()\n\n        if result[\"status\"] == \"completed\":\n            return result[\"result\"]\n        elif result[\"status\"] == \"failed\":\n            raise Exception(f\"Job failed: {result.get('error')}\")\n\n        time.sleep(5)"
      },
      {
        "title": "4. Handling Quality Flags",
        "body": "Separate confident fields from uncertain ones:\n\ndef process_extraction(data):\n    confident = {}\n    needs_review = []\n\n    for field, field_data in data.items():\n        if field_data[\"hil_flag\"]:\n            needs_review.append({\n                \"field\": field,\n                \"value\": field_data[\"value\"],\n                \"reason\": field_data.get(\"reason\")\n            })\n        else:\n            confident[field] = field_data[\"value\"]\n\n    # Auto-process confident fields\n    save_to_database(confident)\n\n    # Send uncertain fields to review queue\n    if needs_review:\n        send_to_review_queue(needs_review)"
      },
      {
        "title": "Error: quota_exceeded",
        "body": "{\"detail\": \"Monthly page quota exceeded\"}\n\nSolution: Upgrade to PRO or wait until next billing cycle."
      },
      {
        "title": "Error: invalid_schema",
        "body": "{\"detail\": \"Schema must be valid JSON Schema\"}\n\nSolution: Ensure schema is valid JSON and includes type and properties."
      },
      {
        "title": "Error: file_too_large",
        "body": "{\"detail\": \"File size exceeds 50MB limit\"}\n\nSolution: Compress PDF or split into smaller files."
      },
      {
        "title": "Job Status: failed",
        "body": "{\"status\": \"failed\", \"error\": \"PDF could not be processed\"}\n\nCommon causes:\n\nCorrupted PDF file\nPassword-protected PDF\nUnsupported PDF version\nImage quality too low for OCR"
      },
      {
        "title": "Invoice Schema",
        "body": "{\n  \"type\": \"object\",\n  \"properties\": {\n    \"invoice_number\": {\n      \"type\": \"string\",\n      \"description\": \"Unique invoice ID\"\n    },\n    \"invoice_date\": {\n      \"type\": \"string\",\n      \"description\": \"Invoice date in MM/DD/YYYY format\"\n    },\n    \"vendor\": {\n      \"type\": \"string\",\n      \"description\": \"Vendor company name\"\n    },\n    \"total\": {\n      \"type\": \"number\",\n      \"description\": \"Total amount due including tax\"\n    },\n    \"line_items\": {\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"object\",\n        \"properties\": {\n          \"description\": {\"type\": \"string\"},\n          \"quantity\": {\"type\": \"number\"},\n          \"price\": {\"type\": \"number\"}\n        }\n      }\n    }\n  }\n}"
      },
      {
        "title": "Receipt Schema",
        "body": "{\n  \"type\": \"object\",\n  \"properties\": {\n    \"merchant\": {\n      \"type\": \"string\",\n      \"description\": \"Store or merchant name\"\n    },\n    \"date\": {\n      \"type\": \"string\",\n      \"description\": \"Transaction date\"\n    },\n    \"total\": {\n      \"type\": \"number\",\n      \"description\": \"Total amount paid\"\n    },\n    \"items\": {\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"object\",\n        \"properties\": {\n          \"name\": {\"type\": \"string\"},\n          \"price\": {\"type\": \"number\"}\n        }\n      }\n    }\n  }\n}"
      },
      {
        "title": "Contract Schema",
        "body": "{\n  \"type\": \"object\",\n  \"properties\": {\n    \"parties\": {\n      \"type\": \"array\",\n      \"items\": {\"type\": \"string\"},\n      \"description\": \"Names of all parties in the contract\"\n    },\n    \"effective_date\": {\n      \"type\": \"string\",\n      \"description\": \"Contract start date\"\n    },\n    \"term_length\": {\n      \"type\": \"string\",\n      \"description\": \"Duration of contract\"\n    },\n    \"termination_clause\": {\n      \"type\": \"string\",\n      \"description\": \"Conditions for termination\"\n    }\n  }\n}"
      },
      {
        "title": "Support & Resources",
        "body": "GitHub: https://github.com/deepread-tech\nIssues: https://github.com/deepread-tech/deep-read-service/issues\nEmail:  hello@deepread.tech"
      },
      {
        "title": "Important Notes",
        "body": "Processing Time: 2-5 minutes (async, not real-time)\nAsync Workflow: Use webhooks (recommended) or polling\nRate Limits: 10 req/min on free tier\nFile Size Limit: 50MB per file\nSupported Formats: PDF, JPG, JPEG, PNG\n\nReady to start? Get your free API key at https://www.deepread.tech/dashboard/?utm_source=clawdhub"
      }
    ],
    "body": "DeepRead - Production OCR API\n\nDeepRead is an AI-native OCR platform that turns documents into high-accuracy data in minutes. Using multi-model consensus, DeepRead achieves 97%+ accuracy and flags only uncertain fields for Human-in-the-Loop (HIL) review—reducing manual work from 100% to 5-10%. Zero prompt engineering required.\n\nWhat This Skill Does\n\nDeepRead is a production-grade document processing API that gives you high-accuracy structured data output in minutes with human review flagging so manual review is limited to the flagged exceptions\n\nCore Features:\n\nText Extraction: Convert PDFs and images to clean markdown\nStructured Data: Extract JSON fields with confidence scores\nHIL Interface: Built-in Human-in-the-Loop review — uncertain fields are flagged (hil_flag) so only exceptions need manual review\nMulti-Pass Processing: Multiple validation passes for maximum accuracy\nMulti-Model Consensus: Cross-validation between models for reliability\nFree Tier: 2,000 pages/month (no credit card required)\nSetup\n1. Get Your API Key\n\nSign up and create an API key:\n\n# Visit the dashboard\nhttps://www.deepread.tech/dashboard\n\n# Or use this direct link\nhttps://www.deepread.tech/dashboard/?utm_source=clawdhub\n\n\nSave your API key:\n\nexport DEEPREAD_API_KEY=\"sk_live_your_key_here\"\n\n2. Clawdbot Configuration (Optional)\n\nAdd to your clawdbot.config.json5:\n\n{\n  skills: {\n    entries: {\n      \"deepread\": {\n        enabled: true\n        // API key is read from DEEPREAD_API_KEY environment variable\n        // Do NOT hardcode your API key here\n      }\n    }\n  }\n}\n\n3. Process Your First Document\n\nOption A: With Webhook (Recommended)\n\n# Upload PDF with webhook notification\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@document.pdf\" \\\n  -F \"webhook_url=https://your-app.com/webhooks/deepread\"\n\n# Returns immediately\n{\n  \"id\": \"550e8400-e29b-41d4-a716-446655440000\",\n  \"status\": \"queued\"\n}\n\n# Your webhook receives results when processing completes (2-5 minutes)\n\n\nOption B: Poll for Results\n\n# Upload PDF without webhook\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@document.pdf\"\n\n# Returns immediately\n{\n  \"id\": \"550e8400-e29b-41d4-a716-446655440000\",\n  \"status\": \"queued\"\n}\n\n# Poll until completed\ncurl https://api.deepread.tech/v1/jobs/550e8400-e29b-41d4-a716-446655440000 \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\"\n\nUsage Examples\nBasic OCR (Text Only)\n\nExtract text as clean markdown:\n\n# With webhook (recommended)\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\" \\\n  -F \"webhook_url=https://your-app.com/webhook\"\n\n# OR poll for completion\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\"\n\n# Then poll\ncurl https://api.deepread.tech/v1/jobs/JOB_ID \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\"\n\n\nResponse when completed:\n\n{\n  \"id\": \"550e8400-...\",\n  \"status\": \"completed\",\n  \"result\": {\n    \"text\": \"# INVOICE\\n\\n**Vendor:** Acme Corp\\n**Total:** $1,250.00...\"\n  }\n}\n\nStructured Data Extraction\n\nExtract specific fields with confidence scoring:\n\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\" \\\n  -F 'schema={\n    \"type\": \"object\",\n    \"properties\": {\n      \"vendor\": {\n        \"type\": \"string\",\n        \"description\": \"Vendor company name\"\n      },\n      \"total\": {\n        \"type\": \"number\",\n        \"description\": \"Total invoice amount\"\n      },\n      \"invoice_date\": {\n        \"type\": \"string\",\n        \"description\": \"Invoice date in MM/DD/YYYY format\"\n      }\n    }\n  }'\n\n\nResponse includes confidence flags:\n\n{\n  \"status\": \"completed\",\n  \"result\": {\n    \"text\": \"# INVOICE\\n\\n**Vendor:** Acme Corp...\",\n    \"data\": {\n      \"vendor\": {\n        \"value\": \"Acme Corp\",\n        \"hil_flag\": false,\n        \"found_on_page\": 1\n      },\n      \"total\": {\n        \"value\": 1250.00,\n        \"hil_flag\": false,\n        \"found_on_page\": 1\n      },\n      \"invoice_date\": {\n        \"value\": \"2024-10-??\",\n        \"hil_flag\": true,\n        \"reason\": \"Date partially obscured\",\n        \"found_on_page\": 1\n      }\n    },\n    \"metadata\": {\n      \"fields_requiring_review\": 1,\n      \"total_fields\": 3,\n      \"review_percentage\": 33.3\n    }\n  }\n}\n\nComplex Schemas (Nested Data)\n\nExtract arrays and nested objects:\n\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\" \\\n  -F 'schema={\n    \"type\": \"object\",\n    \"properties\": {\n      \"vendor\": {\"type\": \"string\"},\n      \"total\": {\"type\": \"number\"},\n      \"line_items\": {\n        \"type\": \"array\",\n        \"items\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"description\": {\"type\": \"string\"},\n            \"quantity\": {\"type\": \"number\"},\n            \"price\": {\"type\": \"number\"}\n          }\n        }\n      }\n    }\n  }'\n\nPage-by-Page Breakdown\n\nGet per-page OCR results with quality flags:\n\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@contract.pdf\" \\\n  -F \"include_pages=true\"\n\n\nResponse:\n\n{\n  \"result\": {\n    \"text\": \"Combined text from all pages...\",\n    \"pages\": [\n      {\n        \"page_number\": 1,\n        \"text\": \"# Contract Agreement\\n\\n...\",\n        \"hil_flag\": false\n      },\n      {\n        \"page_number\": 2,\n        \"text\": \"Terms and C??diti??s...\",\n        \"hil_flag\": true,\n        \"reason\": \"Multiple unrecognized characters\"\n      }\n    ],\n    \"metadata\": {\n      \"pages_requiring_review\": 1,\n      \"total_pages\": 2\n      }\n  }\n}\n\nWhen to Use This Skill\n✅ Use DeepRead For:\nInvoice Processing: Extract vendor, totals, line items\nReceipt OCR: Parse merchant, items, totals\nContract Analysis: Extract parties, dates, terms\nForm Digitization: Convert paper forms to structured data\nDocument Workflows: Any process requiring OCR + data extraction\nQuality-Critical Apps: When you need to know which extractions are uncertain\n❌ Don't Use For:\nReal-time Processing: Processing takes 2-5 minutes (async workflow)\nBatch >2,000 pages/month: Upgrade to PRO or SCALE tier\nHow It Works\nMulti-Pass Pipeline\nPDF → Convert → Rotate Correction → OCR → Multi-Model Validation → Extract → Done\n\n\nThe pipeline automatically handles:\n\nDocument rotation and orientation correction\nMulti-pass validation for accuracy\nCross-model consensus for reliability\nField-level confidence scoring\nHuman-in-the-Loop (HIL) Interface\n\nDeepRead includes a built-in Human-in-the-Loop (HIL) review system. The AI compares extracted text to the original image and sets hil_flag on each field:\n\nhil_flag: false = Clear, confident extraction → Auto-process\nhil_flag: true = Uncertain extraction → Routed to human review\n\nHow HIL works:\n\nFields extracted with high confidence are auto-approved\nUncertain fields are flagged with hil_flag: true and a reason\nOnly flagged fields need human review (typically 5-10% of total fields)\nReview flagged fields in DeepRead Preview (preview.deepread.tech) — a dedicated HIL review interface where reviewers can see the original document side-by-side with extracted data, correct flagged fields, and approve results\nOr integrate with your own review queue using the hil_flag data in the API response\n\nAI flags extractions when:\n\nText is handwritten, blurry, or low quality\nMultiple possible interpretations exist\nCharacters are partially visible or unclear\nField not found in document\n\nThis is multimodal AI determination, not rule-based.\n\nAdvanced Features\n1. Blueprints (Optimized Schemas)\n\nCreate reusable, optimized schemas for specific document types:\n\n# List your blueprints\ncurl https://api.deepread.tech/v1/blueprints \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\"\n\n# Use blueprint instead of inline schema\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\" \\\n  -F \"blueprint_id=660e8400-e29b-41d4-a716-446655440001\"\n\n\nBenefits:\n\n20-30% accuracy improvement over baseline schemas\nReusable across similar documents\nVersioned with rollback support\n\nHow to create blueprints:\n\n# Create a blueprint from training data\ncurl -X POST https://api.deepread.tech/v1/optimize \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"name\": \"utility_invoice\",\n    \"description\": \"Optimized for utility invoices\",\n    \"document_type\": \"invoice\",\n    \"initial_schema\": {\n      \"type\": \"object\",\n      \"properties\": {\n        \"vendor\": {\"type\": \"string\", \"description\": \"Vendor name\"},\n        \"total\": {\"type\": \"number\", \"description\": \"Total amount\"}\n      }\n    },\n    \"training_documents\": [\"doc1.pdf\", \"doc2.pdf\", \"doc3.pdf\"],\n    \"ground_truth_data\": [\n      {\"vendor\": \"Acme Power\", \"total\": 125.50},\n      {\"vendor\": \"City Electric\", \"total\": 89.25}\n    ],\n    \"target_accuracy\": 95.0,\n    \"max_iterations\": 5\n  }'\n\n# Returns: {\"job_id\": \"...\", \"blueprint_id\": \"...\", \"status\": \"pending\"}\n\n# Check optimization status\ncurl https://api.deepread.tech/v1/blueprints/jobs/JOB_ID \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\"\n\n# Use blueprint (once completed)\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\" \\\n  -F \"blueprint_id=BLUEPRINT_ID\"\n\n2. Webhooks (Recommended for Production)\n\nGet notified when processing completes instead of polling:\n\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@invoice.pdf\" \\\n  -F \"webhook_url=https://your-app.com/webhooks/deepread\"\n\n\nYour webhook receives this payload when processing completes:\n\n{\n  \"job_id\": \"550e8400-...\",\n  \"status\": \"completed\",\n  \"created_at\": \"2025-01-27T10:00:00Z\",\n  \"completed_at\": \"2025-01-27T10:02:30Z\",\n  \"result\": {\n    \"text\": \"...\",\n    \"data\": {...}\n  },\n  \"preview_url\": \"https://preview.deepread.tech/abc1234\"\n}\n\n\nBenefits:\n\nNo polling required\nInstant notification when done\nLower latency\nBetter for production workflows\n3. Preview (HIL Review Interface)\n\nDeepRead Preview (preview.deepread.tech) is the built-in Human-in-the-Loop review interface. Reviewers can view the original document alongside extracted data, correct flagged fields, and approve results. Preview URLs can also be shared without authentication:\n\n# Request preview URL\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@document.pdf\" \\\n  -F \"include_images=true\"\n\n# Get preview URL in response\n{\n  \"result\": {\n    \"text\": \"...\",\n    \"data\": {...}\n  },\n  \"preview_url\": \"https://preview.deepread.tech/Xy9aB12\"\n}\n\n\nPublic Preview Endpoint:\n\n# No authentication required\ncurl https://api.deepread.tech/v1/preview/Xy9aB12\n\nRate Limits & Pricing\nFree Tier (No Credit Card)\n2,000 pages/month\n10 requests/minute\nFull feature access (OCR + structured extraction + blueprints)\nPaid Plans\nPRO: 50,000 pages/month, 100 requests/minute @ $99/mo\nSCALE: Custom volume pricing (contact sales)\n\nUpgrade: https://www.deepread.tech/dashboard/billing?utm_source=clawdhub\n\nRate Limit Headers\n\nEvery response includes quota information:\n\nX-RateLimit-Limit: 2000\nX-RateLimit-Remaining: 1847\nX-RateLimit-Used: 153\nX-RateLimit-Reset: 1730419200\n\nBest Practices\n1. Use Webhooks for Production\n\n✅ Recommended: Webhook notifications\n\ncurl -X POST https://api.deepread.tech/v1/process \\\n  -H \"X-API-Key: $DEEPREAD_API_KEY\" \\\n  -F \"file=@document.pdf\" \\\n  -F \"webhook_url=https://your-app.com/webhook\"\n\n\nOnly use polling if:\n\nTesting/development\nCannot expose a webhook endpoint\nNeed synchronous response\n2. Schema Design\n\n✅ Good: Descriptive field descriptions\n\n{\n  \"vendor\": {\n    \"type\": \"string\",\n    \"description\": \"Vendor company name. Usually in header or top-left of invoice.\"\n  }\n}\n\n\n❌ Bad: No description\n\n{\n  \"vendor\": {\"type\": \"string\"}\n}\n\n3. Polling Strategy (If Needed)\n\nOnly if you can't use webhooks, poll every 5-10 seconds:\n\nimport time\nimport requests\n\ndef wait_for_result(job_id, api_key):\n    while True:\n        response = requests.get(\n            f\"https://api.deepread.tech/v1/jobs/{job_id}\",\n            headers={\"X-API-Key\": api_key}\n        )\n        result = response.json()\n\n        if result[\"status\"] == \"completed\":\n            return result[\"result\"]\n        elif result[\"status\"] == \"failed\":\n            raise Exception(f\"Job failed: {result.get('error')}\")\n\n        time.sleep(5)\n\n4. Handling Quality Flags\n\nSeparate confident fields from uncertain ones:\n\ndef process_extraction(data):\n    confident = {}\n    needs_review = []\n\n    for field, field_data in data.items():\n        if field_data[\"hil_flag\"]:\n            needs_review.append({\n                \"field\": field,\n                \"value\": field_data[\"value\"],\n                \"reason\": field_data.get(\"reason\")\n            })\n        else:\n            confident[field] = field_data[\"value\"]\n\n    # Auto-process confident fields\n    save_to_database(confident)\n\n    # Send uncertain fields to review queue\n    if needs_review:\n        send_to_review_queue(needs_review)\n\nTroubleshooting\nError: quota_exceeded\n{\"detail\": \"Monthly page quota exceeded\"}\n\n\nSolution: Upgrade to PRO or wait until next billing cycle.\n\nError: invalid_schema\n{\"detail\": \"Schema must be valid JSON Schema\"}\n\n\nSolution: Ensure schema is valid JSON and includes type and properties.\n\nError: file_too_large\n{\"detail\": \"File size exceeds 50MB limit\"}\n\n\nSolution: Compress PDF or split into smaller files.\n\nJob Status: failed\n{\"status\": \"failed\", \"error\": \"PDF could not be processed\"}\n\n\nCommon causes:\n\nCorrupted PDF file\nPassword-protected PDF\nUnsupported PDF version\nImage quality too low for OCR\nExample Schema Templates\nInvoice Schema\n{\n  \"type\": \"object\",\n  \"properties\": {\n    \"invoice_number\": {\n      \"type\": \"string\",\n      \"description\": \"Unique invoice ID\"\n    },\n    \"invoice_date\": {\n      \"type\": \"string\",\n      \"description\": \"Invoice date in MM/DD/YYYY format\"\n    },\n    \"vendor\": {\n      \"type\": \"string\",\n      \"description\": \"Vendor company name\"\n    },\n    \"total\": {\n      \"type\": \"number\",\n      \"description\": \"Total amount due including tax\"\n    },\n    \"line_items\": {\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"object\",\n        \"properties\": {\n          \"description\": {\"type\": \"string\"},\n          \"quantity\": {\"type\": \"number\"},\n          \"price\": {\"type\": \"number\"}\n        }\n      }\n    }\n  }\n}\n\nReceipt Schema\n{\n  \"type\": \"object\",\n  \"properties\": {\n    \"merchant\": {\n      \"type\": \"string\",\n      \"description\": \"Store or merchant name\"\n    },\n    \"date\": {\n      \"type\": \"string\",\n      \"description\": \"Transaction date\"\n    },\n    \"total\": {\n      \"type\": \"number\",\n      \"description\": \"Total amount paid\"\n    },\n    \"items\": {\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"object\",\n        \"properties\": {\n          \"name\": {\"type\": \"string\"},\n          \"price\": {\"type\": \"number\"}\n        }\n      }\n    }\n  }\n}\n\nContract Schema\n{\n  \"type\": \"object\",\n  \"properties\": {\n    \"parties\": {\n      \"type\": \"array\",\n      \"items\": {\"type\": \"string\"},\n      \"description\": \"Names of all parties in the contract\"\n    },\n    \"effective_date\": {\n      \"type\": \"string\",\n      \"description\": \"Contract start date\"\n    },\n    \"term_length\": {\n      \"type\": \"string\",\n      \"description\": \"Duration of contract\"\n    },\n    \"termination_clause\": {\n      \"type\": \"string\",\n      \"description\": \"Conditions for termination\"\n    }\n  }\n}\n\nSupport & Resources\nGitHub: https://github.com/deepread-tech\nIssues: https://github.com/deepread-tech/deep-read-service/issues\nEmail: hello@deepread.tech\nImportant Notes\nProcessing Time: 2-5 minutes (async, not real-time)\nAsync Workflow: Use webhooks (recommended) or polling\nRate Limits: 10 req/min on free tier\nFile Size Limit: 50MB per file\nSupported Formats: PDF, JPG, JPEG, PNG\n\nReady to start? Get your free API key at https://www.deepread.tech/dashboard/?utm_source=clawdhub"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/uday390/deepread-ocr",
    "publisherUrl": "https://clawhub.ai/uday390/deepread-ocr",
    "owner": "uday390",
    "version": "1.0.6",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/deepread-ocr",
    "downloadUrl": "https://openagent3.xyz/downloads/deepread-ocr",
    "agentUrl": "https://openagent3.xyz/skills/deepread-ocr/agent",
    "manifestUrl": "https://openagent3.xyz/skills/deepread-ocr/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/deepread-ocr/agent.md"
  }
}