{
  "schemaVersion": "1.0",
  "item": {
    "slug": "clawd-cursor",
    "name": "ClawdCursor",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/AmrDab/clawd-cursor",
    "canonicalUrl": "https://clawhub.ai/AmrDab/clawd-cursor",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/clawd-cursor",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=clawd-cursor",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "slug": "clawd-cursor",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-05-01T15:59:48.331Z",
      "expiresAt": "2026-05-08T15:59:48.331Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=clawd-cursor",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=clawd-cursor",
        "contentDisposition": "attachment; filename=\"clawd-cursor-0.6.3.zip\"",
        "redirectLocation": null,
        "bodySnippet": null,
        "slug": "clawd-cursor"
      },
      "scope": "item",
      "summary": "Item download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this item.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/clawd-cursor"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/clawd-cursor",
    "agentPageUrl": "https://openagent3.xyz/skills/clawd-cursor/agent",
    "manifestUrl": "https://openagent3.xyz/skills/clawd-cursor/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/clawd-cursor/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Clawd Cursor",
        "body": "If a human can do it on a screen, you can too. No API? No integration? No problem.\nEvery app already has a UI — Clawd Cursor gives you eyes and hands to use all of them.\n\nNo task is impossible. If it's on the screen, it's within reach.\n\nIMPORTANT — READ THIS BEFORE ANYTHING ELSE\nYou have a desktop AI agent over GUI running on the user's machine. This means:\n\nYou CAN open apps, click buttons, type text, fill forms\nYou CAN see what's on screen (via Clawd Cursor's screenshot pipeline)\nYou CAN interact with applications — browser, desktop apps, settings\nYou MUST respect safety tiers — 🟢 Auto actions run freely, 🔴 Confirm actions require user approval\nYou MUST ask before accessing sensitive apps (email, banking, private messages, password managers)\nYou SHOULD prefer direct tools (file read, web_fetch, CLI) over desktop automation when available\n\nThink of Clawd Cursor as your hands and eyes on the user's computer — used with their permission."
      },
      {
        "title": "When to Use This (AGENTS: READ THIS)",
        "body": "You have a desktop UI agent available. Use it when the user asks you to interact with their desktop, or when no other tool can accomplish the task."
      },
      {
        "title": "Tool vs Skill (OpenClaw terminology)",
        "body": "Tool = direct capability (API call, filesystem, shell, web fetch, browser command).\nSkill = packaged workflow/domain logic that may call one or more tools.\nThis skill (Clawd Cursor) = GUI execution skill. Use it after OpenClaw tools/skills that can complete the same work without GUI."
      },
      {
        "title": "Use Clawd Cursor for (examples, not limits):",
        "body": "Clawd Cursor can perform any action that is visible and interactable in the GUI (subject to safety policy).\n\nUser-requested desktop tasks — \"open Gmail and send an email\", \"check my calendar\"\nRead a webpage — when web_fetch or browser tools aren't available\nInteract with desktop apps — click buttons, fill forms, read results\nBrowser tasks — search, navigate, fill forms (when browser tool unavailable)\nVisual verification — did the page load? what does the UI show?\nCross-app workflows — copy from one app, paste in another\nSettings changes — when the user explicitly asks"
      },
      {
        "title": "⚠️ Sensitive App Policy",
        "body": "Always ask the user before accessing:\n\nEmail clients (Gmail, Outlook)\nBanking or financial apps\nPrivate messaging (WhatsApp, Signal, Telegram)\nPassword managers\nAdmin panels or cloud consoles"
      },
      {
        "title": "Don't use Clawd Cursor when:",
        "body": "You can do it with a direct API call or CLI command (faster)\nThe task is purely computational (math, text generation, code writing)\nYou can already read/write the file directly\nThe browser tool or web_fetch can handle it"
      },
      {
        "title": "OpenClaw + Clawd Cursor Routing Contract (Avoid Overlap)",
        "body": "Clawd Cursor should be treated as OpenClaw's GUI execution layer, not a competing planner."
      },
      {
        "title": "Route tasks in this order:",
        "body": "OpenClaw native tools first (filesystem, API, shell, provider-native skills)\nBrowser-native automation next (Playwright/CDP direct) for browser-only reads/clicks\nClawd Cursor API task (POST /task) only when desktop/UI-level interaction is required"
      },
      {
        "title": "Practical rule",
        "body": "If OpenClaw already has a reliable skill/tool for the domain, use it.\nUse Clawd Cursor to bridge gaps where no API/tool exists or when the user explicitly asks for GUI interaction.\n\nThis keeps behavior predictable, lowers latency/cost, and avoids duplicated logic between the main OpenClaw agent and this skill."
      },
      {
        "title": "Universal task pattern",
        "body": "For broad \"get it done\" requests, split into three phases:\n\nPlan in OpenClaw: break work into API/CLI/browser/GUI subtasks.\nExecute cheap paths first: API + CLI + browser direct.\nEscalate only residual UI steps to Clawd Cursor.\n\nThink: \"OpenClaw decides, Clawd Cursor acts on GUI when needed.\""
      },
      {
        "title": "Direct Browser Access (Fast Path)",
        "body": "For quick page reads without a full task, connect to Chrome via Playwright CDP:\n\nconst pw = require('playwright');\nconst browser = await pw.chromium.connectOverCDP('http://127.0.0.1:9222');\nconst pages = browser.contexts()[0].pages();\nconst text = await pages[0].innerText('body');\n\nUse this when you just need page content — faster than sending a task.\n\nScenarioUseWhyRead page content/textCDP DirectInstant, freeFill a web formAPI task (POST /task)Clawd handles multi-step planningCheck if a page loadedCDP DirectJust read the title/URLClick through a complex UI flowAPI task (POST /task)Clawd handles planningGet a list of elements on pageCDP DirectFast DOM queryInteract with a desktop appAPI task (POST /task)CDP is browser-only"
      },
      {
        "title": "REST API Reference",
        "body": "Base URL: http://127.0.0.1:3847\n\nNote: On Windows PowerShell, use curl.exe (with .exe) or Invoke-RestMethod. Bare curl is aliased to Invoke-WebRequest which behaves differently."
      },
      {
        "title": "Pre-flight Check",
        "body": "Before your first task, verify Clawd Cursor is running:\n\ncurl.exe -s http://127.0.0.1:3847/health\n\nExpected: {\"status\":\"ok\",\"version\":\"0.6.0\"}\n\nIf connection refused — start it yourself (don't ask the user):\n\n# Find the skill directory and start the server\nStart-Process -FilePath \"node\" -ArgumentList \"dist/index.js\",\"start\" -WorkingDirectory \"<clawd-cursor-directory>\" -WindowStyle Hidden\nStart-Sleep 3\n# Verify it's running\ncurl.exe -s http://127.0.0.1:3847/health\n\nThe skill directory is wherever SKILL.md lives (the parent of this file). Use that path as the working directory."
      },
      {
        "title": "Sending a Task (Async — Returns Immediately)",
        "body": "POST /task accepts the task and returns immediately. The task runs in the background. You must poll /status to know when it's done.\n\ncurl.exe -s -X POST http://127.0.0.1:3847/task -H \"Content-Type: application/json\" -d \"{\\\"task\\\": \\\"YOUR_TASK_HERE\\\"}\"\n\nPowerShell:\n\nInvoke-RestMethod -Uri http://127.0.0.1:3847/task -Method POST -ContentType \"application/json\" -Body '{\"task\": \"YOUR_TASK_HERE\"}'"
      },
      {
        "title": "Polling Pattern (Follow This)",
        "body": "1. POST /task → get accepted\n2. Wait 2 seconds\n3. GET /status\n4. If status is \"idle\" → done\n5. If status is \"waiting_confirm\" → ASK THE USER, then POST /confirm based on their answer\n6. If still running → wait 2 more seconds, go to step 3\n7. If 60+ seconds → POST /abort and retry with clearer instructions"
      },
      {
        "title": "Checking Status",
        "body": "curl.exe -s http://127.0.0.1:3847/status"
      },
      {
        "title": "Confirming Safety-Gated Actions",
        "body": "Some actions (sending messages, deleting) require approval. 🔴 NEVER self-approve these. Always ask the user for confirmation before POST /confirm. These exist to protect the user — do not bypass them.\n\ncurl.exe -s -X POST http://127.0.0.1:3847/confirm -H \"Content-Type: application/json\" -d \"{\\\"approved\\\": true}\""
      },
      {
        "title": "Aborting a Task",
        "body": "curl.exe -s -X POST http://127.0.0.1:3847/abort"
      },
      {
        "title": "Reading Logs (Debugging)",
        "body": "curl.exe -s http://127.0.0.1:3847/logs\n\nReturns last 200 log entries. Check for error or warn entries when tasks fail."
      },
      {
        "title": "Response States",
        "body": "StateResponseWhat to doAccepted{\"accepted\": true, \"task\": \"...\"}Start pollingRunning{\"status\": \"acting\", \"currentTask\": \"...\", \"stepsCompleted\": 2}Keep pollingWaiting confirm{\"status\": \"waiting_confirm\", \"currentStep\": \"...\"}POST /confirmDone{\"status\": \"idle\"}Task completeBusy{\"error\": \"Agent is busy\", \"state\": {...}}Wait or POST /abort first"
      },
      {
        "title": "CDP Direct Reference",
        "body": "Chrome must be running with --remote-debugging-port=9222."
      },
      {
        "title": "Quick check:",
        "body": "curl.exe -s http://127.0.0.1:9222/json/version\n\nIf this returns JSON, Chrome is ready."
      },
      {
        "title": "Connecting via Playwright:",
        "body": "const { chromium } = require('playwright');\nconst browser = await chromium.connectOverCDP('http://127.0.0.1:9222');\nconst context = browser.contexts()[0];\nconst page = context.pages()[0];\n\n// Read page content\nconst title = await page.title();\nconst url = page.url();\nconst text = await page.textContent('body');\n\n// Click by role\nawait page.getByRole('button', { name: 'Submit' }).click();\n\n// Fill a field\nawait page.getByLabel('Email').fill('user@example.com');\n\n// Read specific elements\nconst buttons = await page.$$eval('button', els => els.map(e => e.textContent));"
      },
      {
        "title": "Task Writing Guidelines",
        "body": "Be specific — include app names, URLs, exact text to type, button names\nOne task at a time — wait for completion before sending the next\nDescribe the goal, not the clicks — say \"Send an email to john@example.com about the meeting\" not \"click compose, click to field...\"\nCheck status if a task seems to hang\nDon't include credentials in task text — tasks are logged"
      },
      {
        "title": "Task Examples",
        "body": "GoalTask to sendSimple navigationOpen Chrome and go to github.comRead screen contentWhat text is currently displayed in Notepad?Cross-app workflowCopy the email address from the Chrome tab and paste it into the To field in OutlookForm fillingIn the open Chrome tab, fill the contact form: name \"John Doe\", email \"john@example.com\"App interactionOpen Spotify and play the Discover Weekly playlistSettings changeOpen Windows Settings and turn on Dark ModeData extractionRead the stock price shown in the Bloomberg tab in ChromeComplex browserOpen YouTube, search for \"Adele Hello\", and play the first video resultVerificationCheck if the deployment succeeded — look at the Vercel dashboard in ChromeSend emailOpen Gmail, compose email to john@example.com, subject: Meeting Tomorrow, body: Confirming 2pm. Best regards.Take screenshotTake a screenshot"
      },
      {
        "title": "Error Recovery",
        "body": "ProblemSolutionConnection refused on :3847Start Clawd Cursor: cd clawd-cursor && npm startConnection refused on :9222Start Chrome with CDP: Start-Process chrome -ArgumentList \"--remote-debugging-port=9222\"Agent returns \"busy\"Poll /status — wait for idle, or POST /abortTask fails with no detailsCheck /logs for error entriesTask completes but wrong resultRephrase with more specifics: exact app name, button text, field labelsSame task fails repeatedlyBreak into smaller tasks (one action per task)Safety confirmation pendingPOST /confirm with {\"approved\": true} or {\"approved\": false}Task hangs > 60 secondsPOST /abort, then retry with simpler phrasing"
      },
      {
        "title": "How It Works — 5-Layer Pipeline",
        "body": "LayerWhatSpeedCost0: Browser LayerURL detection → direct navigationInstantFree1: Action Router + ShortcutsRegex + UI Automation + keyboard shortcutsInstantFree1.5: Smart Interaction1 LLM plan → CDP/UIDriver executes~2-5s1 LLM call2: Accessibility ReasonerUI tree → text LLM decides~1sCheap3: Computer UseScreenshot → vision LLM~5-8sExpensive\n\nLayer 1 includes keyboard shortcuts — common actions execute as direct keystrokes (0 LLM calls).\n\n80%+ of tasks handled by Layer 0-1 (free, instant). Vision model is last resort only."
      },
      {
        "title": "Safety Tiers",
        "body": "TierActionsBehavior🟢 AutoNavigation, reading, opening appsRuns immediately🟡 PreviewTyping, form fillingLogs before executing🔴 ConfirmSending messages, deletingPauses — ask the user before POST /confirm. Never self-approve."
      },
      {
        "title": "Network Isolation",
        "body": "API binds to 127.0.0.1 only — not network accessible. Verify: netstat -an | findstr 3847 should show 127.0.0.1:3847\nScreenshots stay in memory, never saved to disk (unless --debug)\nNo telemetry, no analytics, no phone-home calls"
      },
      {
        "title": "Data Flow",
        "body": "With Ollama (local): 100% offline — zero external network calls. No data leaves the machine.\nWith cloud providers: screenshots/text are sent to the user's chosen provider API only. No data goes to skill authors, ClawHub, or third parties.\nOpenClaw users: credentials auto-discovered from local config files — no keys stored in skill directory.\nThe user controls data flow by choosing their provider. Ollama = fully private."
      },
      {
        "title": "Agent Autonomy Controls",
        "body": "🟢 Auto actions (navigation, reading, opening apps) run without prompting\n🟡 Preview actions (typing, form filling) are logged before executing\n🔴 Confirm actions (sending messages, deleting, purchases) always pause for user approval\nAgents must ask the user before accessing sensitive apps (email, banking, messaging, passwords)\nAgents must never self-approve 🔴 Confirm actions"
      },
      {
        "title": "Setup (User Reference)",
        "body": "Setup is handled by the user. If Clawd Cursor isn't running, start it yourself using the exec tool:\n\nStart-Process -FilePath \"node\" -ArgumentList \"dist/index.js\",\"start\" -WorkingDirectory \"<skill-directory>\" -WindowStyle Hidden\n\nOnly ask the user if you cannot start it (e.g., node not installed, build missing).\n\ngit clone https://github.com/AmrDab/clawd-cursor.git\ncd clawd-cursor\nnpm install && npm run build\nnpx clawd-cursor doctor    # auto-detects and configures everything\nnpm start                  # starts on port 3847\n\nmacOS: Grant Accessibility permission to terminal: System Settings → Privacy & Security → Accessibility\n\nProviderSetupCostOllama (free)ollama pull <model>$0 (fully offline)Any cloud providerSet AI_API_KEY=your-keyVaries by providerOpenClaw usersAutomatic — no setup neededUses configured provider"
      },
      {
        "title": "Performance Optimization",
        "body": "Proven optimizations applied to reduce task execution latency and LLM API costs. Reference files in perf/references/patches/."
      },
      {
        "title": "Applied Optimizations",
        "body": "#NameImpact1Screenshot hash cache90% fewer LLM calls on static screens2Parallel screenshot+a11y30-40% per-step latency cut3A11y context cache (2s TTL)Eliminates redundant PS spawns4Screenshot compression52% smaller payload (58KB vs 120KB)5Async debug writes94% less event loop blocking6Streaming LLM responses1-3s faster per LLM call7Trimmed system prompts~60% fewer prompt tokens8A11y tree filteringInteractive elements only, 3000 char cap9Combined PS script1 spawn instead of 310Taskbar cache (30s TTL)Skip expensive taskbar query11Delay reduction50-150ms vs 200-1500ms"
      },
      {
        "title": "Benchmarks (2560x1440)",
        "body": "Metricv0.3 (VNC)v0.4 (Native)v0.4.1+ (Optimized)Screenshot capture~850ms~50ms~57msScreenshot size~200KB~120KB~58KBA11y context (uncached)N/A~600ms~462msA11y context (cached)N/A0ms0ms (2s TTL)Delays (per step)N/A200-1500ms50-600msSystem prompt tokensN/A~800~300"
      },
      {
        "title": "Perf Tools",
        "body": "perf/apply-optimizations.ps1 — apply all patches\nperf/perf-test.ts — benchmark harness (npx ts-node perf/perf-test.ts)"
      }
    ],
    "body": "Clawd Cursor\n\nIf a human can do it on a screen, you can too. No API? No integration? No problem. Every app already has a UI — Clawd Cursor gives you eyes and hands to use all of them.\n\nNo task is impossible. If it's on the screen, it's within reach.\n\nIMPORTANT — READ THIS BEFORE ANYTHING ELSE\n\nYou have a desktop AI agent over GUI running on the user's machine. This means:\n\nYou CAN open apps, click buttons, type text, fill forms\nYou CAN see what's on screen (via Clawd Cursor's screenshot pipeline)\nYou CAN interact with applications — browser, desktop apps, settings\nYou MUST respect safety tiers — 🟢 Auto actions run freely, 🔴 Confirm actions require user approval\nYou MUST ask before accessing sensitive apps (email, banking, private messages, password managers)\nYou SHOULD prefer direct tools (file read, web_fetch, CLI) over desktop automation when available\n\nThink of Clawd Cursor as your hands and eyes on the user's computer — used with their permission.\n\nWhen to Use This (AGENTS: READ THIS)\n\nYou have a desktop UI agent available. Use it when the user asks you to interact with their desktop, or when no other tool can accomplish the task.\n\nTool vs Skill (OpenClaw terminology)\nTool = direct capability (API call, filesystem, shell, web fetch, browser command).\nSkill = packaged workflow/domain logic that may call one or more tools.\nThis skill (Clawd Cursor) = GUI execution skill. Use it after OpenClaw tools/skills that can complete the same work without GUI.\nUse Clawd Cursor for (examples, not limits):\n\nClawd Cursor can perform any action that is visible and interactable in the GUI (subject to safety policy).\n\nUser-requested desktop tasks — \"open Gmail and send an email\", \"check my calendar\"\nRead a webpage — when web_fetch or browser tools aren't available\nInteract with desktop apps — click buttons, fill forms, read results\nBrowser tasks — search, navigate, fill forms (when browser tool unavailable)\nVisual verification — did the page load? what does the UI show?\nCross-app workflows — copy from one app, paste in another\nSettings changes — when the user explicitly asks\n⚠️ Sensitive App Policy\n\nAlways ask the user before accessing:\n\nEmail clients (Gmail, Outlook)\nBanking or financial apps\nPrivate messaging (WhatsApp, Signal, Telegram)\nPassword managers\nAdmin panels or cloud consoles\nDon't use Clawd Cursor when:\nYou can do it with a direct API call or CLI command (faster)\nThe task is purely computational (math, text generation, code writing)\nYou can already read/write the file directly\nThe browser tool or web_fetch can handle it\nOpenClaw + Clawd Cursor Routing Contract (Avoid Overlap)\n\nClawd Cursor should be treated as OpenClaw's GUI execution layer, not a competing planner.\n\nRoute tasks in this order:\nOpenClaw native tools first (filesystem, API, shell, provider-native skills)\nBrowser-native automation next (Playwright/CDP direct) for browser-only reads/clicks\nClawd Cursor API task (POST /task) only when desktop/UI-level interaction is required\nPractical rule\nIf OpenClaw already has a reliable skill/tool for the domain, use it.\nUse Clawd Cursor to bridge gaps where no API/tool exists or when the user explicitly asks for GUI interaction.\n\nThis keeps behavior predictable, lowers latency/cost, and avoids duplicated logic between the main OpenClaw agent and this skill.\n\nUniversal task pattern\n\nFor broad \"get it done\" requests, split into three phases:\n\nPlan in OpenClaw: break work into API/CLI/browser/GUI subtasks.\nExecute cheap paths first: API + CLI + browser direct.\nEscalate only residual UI steps to Clawd Cursor.\n\nThink: \"OpenClaw decides, Clawd Cursor acts on GUI when needed.\"\n\nDirect Browser Access (Fast Path)\n\nFor quick page reads without a full task, connect to Chrome via Playwright CDP:\n\nconst pw = require('playwright');\nconst browser = await pw.chromium.connectOverCDP('http://127.0.0.1:9222');\nconst pages = browser.contexts()[0].pages();\nconst text = await pages[0].innerText('body');\n\n\nUse this when you just need page content — faster than sending a task.\n\nScenario\tUse\tWhy\nRead page content/text\tCDP Direct\tInstant, free\nFill a web form\tAPI task (POST /task)\tClawd handles multi-step planning\nCheck if a page loaded\tCDP Direct\tJust read the title/URL\nClick through a complex UI flow\tAPI task (POST /task)\tClawd handles planning\nGet a list of elements on page\tCDP Direct\tFast DOM query\nInteract with a desktop app\tAPI task (POST /task)\tCDP is browser-only\nREST API Reference\n\nBase URL: http://127.0.0.1:3847\n\nNote: On Windows PowerShell, use curl.exe (with .exe) or Invoke-RestMethod. Bare curl is aliased to Invoke-WebRequest which behaves differently.\n\nPre-flight Check\n\nBefore your first task, verify Clawd Cursor is running:\n\ncurl.exe -s http://127.0.0.1:3847/health\n\n\nExpected: {\"status\":\"ok\",\"version\":\"0.6.0\"}\n\nIf connection refused — start it yourself (don't ask the user):\n\n# Find the skill directory and start the server\nStart-Process -FilePath \"node\" -ArgumentList \"dist/index.js\",\"start\" -WorkingDirectory \"<clawd-cursor-directory>\" -WindowStyle Hidden\nStart-Sleep 3\n# Verify it's running\ncurl.exe -s http://127.0.0.1:3847/health\n\n\nThe skill directory is wherever SKILL.md lives (the parent of this file). Use that path as the working directory.\n\nSending a Task (Async — Returns Immediately)\n\nPOST /task accepts the task and returns immediately. The task runs in the background. You must poll /status to know when it's done.\n\ncurl.exe -s -X POST http://127.0.0.1:3847/task -H \"Content-Type: application/json\" -d \"{\\\"task\\\": \\\"YOUR_TASK_HERE\\\"}\"\n\n\nPowerShell:\n\nInvoke-RestMethod -Uri http://127.0.0.1:3847/task -Method POST -ContentType \"application/json\" -Body '{\"task\": \"YOUR_TASK_HERE\"}'\n\nPolling Pattern (Follow This)\n1. POST /task → get accepted\n2. Wait 2 seconds\n3. GET /status\n4. If status is \"idle\" → done\n5. If status is \"waiting_confirm\" → ASK THE USER, then POST /confirm based on their answer\n6. If still running → wait 2 more seconds, go to step 3\n7. If 60+ seconds → POST /abort and retry with clearer instructions\n\nChecking Status\ncurl.exe -s http://127.0.0.1:3847/status\n\nConfirming Safety-Gated Actions\n\nSome actions (sending messages, deleting) require approval. 🔴 NEVER self-approve these. Always ask the user for confirmation before POST /confirm. These exist to protect the user — do not bypass them.\n\ncurl.exe -s -X POST http://127.0.0.1:3847/confirm -H \"Content-Type: application/json\" -d \"{\\\"approved\\\": true}\"\n\nAborting a Task\ncurl.exe -s -X POST http://127.0.0.1:3847/abort\n\nReading Logs (Debugging)\ncurl.exe -s http://127.0.0.1:3847/logs\n\n\nReturns last 200 log entries. Check for error or warn entries when tasks fail.\n\nResponse States\nState\tResponse\tWhat to do\nAccepted\t{\"accepted\": true, \"task\": \"...\"}\tStart polling\nRunning\t{\"status\": \"acting\", \"currentTask\": \"...\", \"stepsCompleted\": 2}\tKeep polling\nWaiting confirm\t{\"status\": \"waiting_confirm\", \"currentStep\": \"...\"}\tPOST /confirm\nDone\t{\"status\": \"idle\"}\tTask complete\nBusy\t{\"error\": \"Agent is busy\", \"state\": {...}}\tWait or POST /abort first\nCDP Direct Reference\n\nChrome must be running with --remote-debugging-port=9222.\n\nQuick check:\ncurl.exe -s http://127.0.0.1:9222/json/version\n\n\nIf this returns JSON, Chrome is ready.\n\nConnecting via Playwright:\nconst { chromium } = require('playwright');\nconst browser = await chromium.connectOverCDP('http://127.0.0.1:9222');\nconst context = browser.contexts()[0];\nconst page = context.pages()[0];\n\n// Read page content\nconst title = await page.title();\nconst url = page.url();\nconst text = await page.textContent('body');\n\n// Click by role\nawait page.getByRole('button', { name: 'Submit' }).click();\n\n// Fill a field\nawait page.getByLabel('Email').fill('user@example.com');\n\n// Read specific elements\nconst buttons = await page.$$eval('button', els => els.map(e => e.textContent));\n\nTask Writing Guidelines\nBe specific — include app names, URLs, exact text to type, button names\nOne task at a time — wait for completion before sending the next\nDescribe the goal, not the clicks — say \"Send an email to john@example.com about the meeting\" not \"click compose, click to field...\"\nCheck status if a task seems to hang\nDon't include credentials in task text — tasks are logged\nTask Examples\nGoal\tTask to send\nSimple navigation\tOpen Chrome and go to github.com\nRead screen content\tWhat text is currently displayed in Notepad?\nCross-app workflow\tCopy the email address from the Chrome tab and paste it into the To field in Outlook\nForm filling\tIn the open Chrome tab, fill the contact form: name \"John Doe\", email \"john@example.com\"\nApp interaction\tOpen Spotify and play the Discover Weekly playlist\nSettings change\tOpen Windows Settings and turn on Dark Mode\nData extraction\tRead the stock price shown in the Bloomberg tab in Chrome\nComplex browser\tOpen YouTube, search for \"Adele Hello\", and play the first video result\nVerification\tCheck if the deployment succeeded — look at the Vercel dashboard in Chrome\nSend email\tOpen Gmail, compose email to john@example.com, subject: Meeting Tomorrow, body: Confirming 2pm. Best regards.\nTake screenshot\tTake a screenshot\nError Recovery\nProblem\tSolution\nConnection refused on :3847\tStart Clawd Cursor: cd clawd-cursor && npm start\nConnection refused on :9222\tStart Chrome with CDP: Start-Process chrome -ArgumentList \"--remote-debugging-port=9222\"\nAgent returns \"busy\"\tPoll /status — wait for idle, or POST /abort\nTask fails with no details\tCheck /logs for error entries\nTask completes but wrong result\tRephrase with more specifics: exact app name, button text, field labels\nSame task fails repeatedly\tBreak into smaller tasks (one action per task)\nSafety confirmation pending\tPOST /confirm with {\"approved\": true} or {\"approved\": false}\nTask hangs > 60 seconds\tPOST /abort, then retry with simpler phrasing\nHow It Works — 5-Layer Pipeline\nLayer\tWhat\tSpeed\tCost\n0: Browser Layer\tURL detection → direct navigation\tInstant\tFree\n1: Action Router + Shortcuts\tRegex + UI Automation + keyboard shortcuts\tInstant\tFree\n1.5: Smart Interaction\t1 LLM plan → CDP/UIDriver executes\t~2-5s\t1 LLM call\n2: Accessibility Reasoner\tUI tree → text LLM decides\t~1s\tCheap\n3: Computer Use\tScreenshot → vision LLM\t~5-8s\tExpensive\n\nLayer 1 includes keyboard shortcuts — common actions execute as direct keystrokes (0 LLM calls).\n\n80%+ of tasks handled by Layer 0-1 (free, instant). Vision model is last resort only.\n\nSafety Tiers\nTier\tActions\tBehavior\n🟢 Auto\tNavigation, reading, opening apps\tRuns immediately\n🟡 Preview\tTyping, form filling\tLogs before executing\n🔴 Confirm\tSending messages, deleting\tPauses — ask the user before POST /confirm. Never self-approve.\nSecurity & Privacy\nNetwork Isolation\nAPI binds to 127.0.0.1 only — not network accessible. Verify: netstat -an | findstr 3847 should show 127.0.0.1:3847\nScreenshots stay in memory, never saved to disk (unless --debug)\nNo telemetry, no analytics, no phone-home calls\nData Flow\nWith Ollama (local): 100% offline — zero external network calls. No data leaves the machine.\nWith cloud providers: screenshots/text are sent to the user's chosen provider API only. No data goes to skill authors, ClawHub, or third parties.\nOpenClaw users: credentials auto-discovered from local config files — no keys stored in skill directory.\nThe user controls data flow by choosing their provider. Ollama = fully private.\nAgent Autonomy Controls\n🟢 Auto actions (navigation, reading, opening apps) run without prompting\n🟡 Preview actions (typing, form filling) are logged before executing\n🔴 Confirm actions (sending messages, deleting, purchases) always pause for user approval\nAgents must ask the user before accessing sensitive apps (email, banking, messaging, passwords)\nAgents must never self-approve 🔴 Confirm actions\nSetup (User Reference)\n\nSetup is handled by the user. If Clawd Cursor isn't running, start it yourself using the exec tool:\n\nStart-Process -FilePath \"node\" -ArgumentList \"dist/index.js\",\"start\" -WorkingDirectory \"<skill-directory>\" -WindowStyle Hidden\n\n\nOnly ask the user if you cannot start it (e.g., node not installed, build missing).\n\ngit clone https://github.com/AmrDab/clawd-cursor.git\ncd clawd-cursor\nnpm install && npm run build\nnpx clawd-cursor doctor    # auto-detects and configures everything\nnpm start                  # starts on port 3847\n\n\nmacOS: Grant Accessibility permission to terminal: System Settings → Privacy & Security → Accessibility\n\nProvider\tSetup\tCost\nOllama (free)\tollama pull <model>\t$0 (fully offline)\nAny cloud provider\tSet AI_API_KEY=your-key\tVaries by provider\nOpenClaw users\tAutomatic — no setup needed\tUses configured provider\nPerformance Optimization\n\nProven optimizations applied to reduce task execution latency and LLM API costs. Reference files in perf/references/patches/.\n\nApplied Optimizations\n#\tName\tImpact\n1\tScreenshot hash cache\t90% fewer LLM calls on static screens\n2\tParallel screenshot+a11y\t30-40% per-step latency cut\n3\tA11y context cache (2s TTL)\tEliminates redundant PS spawns\n4\tScreenshot compression\t52% smaller payload (58KB vs 120KB)\n5\tAsync debug writes\t94% less event loop blocking\n6\tStreaming LLM responses\t1-3s faster per LLM call\n7\tTrimmed system prompts\t~60% fewer prompt tokens\n8\tA11y tree filtering\tInteractive elements only, 3000 char cap\n9\tCombined PS script\t1 spawn instead of 3\n10\tTaskbar cache (30s TTL)\tSkip expensive taskbar query\n11\tDelay reduction\t50-150ms vs 200-1500ms\nBenchmarks (2560x1440)\nMetric\tv0.3 (VNC)\tv0.4 (Native)\tv0.4.1+ (Optimized)\nScreenshot capture\t~850ms\t~50ms\t~57ms\nScreenshot size\t~200KB\t~120KB\t~58KB\nA11y context (uncached)\tN/A\t~600ms\t~462ms\nA11y context (cached)\tN/A\t0ms\t0ms (2s TTL)\nDelays (per step)\tN/A\t200-1500ms\t50-600ms\nSystem prompt tokens\tN/A\t~800\t~300\nPerf Tools\nperf/apply-optimizations.ps1 — apply all patches\nperf/perf-test.ts — benchmark harness (npx ts-node perf/perf-test.ts)"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/AmrDab/clawd-cursor",
    "publisherUrl": "https://clawhub.ai/AmrDab/clawd-cursor",
    "owner": "AmrDab",
    "version": "0.6.3",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/clawd-cursor",
    "downloadUrl": "https://openagent3.xyz/downloads/clawd-cursor",
    "agentUrl": "https://openagent3.xyz/skills/clawd-cursor/agent",
    "manifestUrl": "https://openagent3.xyz/skills/clawd-cursor/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/clawd-cursor/agent.md"
  }
}