{
  "schemaVersion": "1.0",
  "item": {
    "slug": "stealthy-auto-browse",
    "name": "stealthy-auto-browse",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/psyb0t/stealthy-auto-browse",
    "canonicalUrl": "https://clawhub.ai/psyb0t/stealthy-auto-browse",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/stealthy-auto-browse",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=stealthy-auto-browse",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-05-07T17:22:31.273Z",
      "expiresAt": "2026-05-14T17:22:31.273Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=afrexai-annual-report",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=afrexai-annual-report",
        "contentDisposition": "attachment; filename=\"afrexai-annual-report-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/stealthy-auto-browse"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/stealthy-auto-browse",
    "agentPageUrl": "https://openagent3.xyz/skills/stealthy-auto-browse/agent",
    "manifestUrl": "https://openagent3.xyz/skills/stealthy-auto-browse/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/stealthy-auto-browse/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "stealthy-auto-browse",
        "body": "A stealth browser running in Docker. It uses Camoufox (a custom Firefox fork) instead of Chromium, so there are zero Chrome DevTools Protocol (CDP) signals for bot detectors to find. Mouse and keyboard input happens at the OS level via PyAutoGUI — the browser itself doesn't know it's being automated, which means behavioral analysis can't detect it either."
      },
      {
        "title": "Why This Exists",
        "body": "Standard browser automation (Playwright + Chromium, Puppeteer, Selenium) exposes CDP signals that bot detection services (Cloudflare, DataDome, PerimeterX, Akamai) catch instantly. Even with stealth plugins, the CDP protocol is still there and detectable. This skill eliminates that entirely by using Firefox (no CDP at all) and generating input events at the OS level rather than through the browser's automation API."
      },
      {
        "title": "When To Use This Skill",
        "body": "Site has bot detection (Cloudflare challenge pages, DataDome, PerimeterX, Akamai)\nSite blocks headless browsers or serves CAPTCHAs\nYou need a logged-in session that doesn't get banned\nAnother browser skill is getting 403s or empty/blocked responses\nYou're scraping a site that actively fights automation"
      },
      {
        "title": "When NOT To Use This Skill",
        "body": "Simple fetches with no bot protection — use curl or WebFetch\nSites that don't care about automation — use a regular browser skill, it's faster to set up\nYou only need static HTML — use curl"
      },
      {
        "title": "Setup",
        "body": "1. Start the container:\n\ndocker run -d -p 8080:8080 -p 5900:5900 psyb0t/stealthy-auto-browse\n\nPort 8080 is the HTTP API. Port 5900 is a noVNC web viewer where you can watch the browser in real time.\n\n2. Set the environment variable:\n\nexport STEALTHY_AUTO_BROWSE_URL=http://localhost:8080\n\nOr via OpenClaw config (~/.openclaw/openclaw.json):\n\n{\n  \"skills\": {\n    \"entries\": {\n      \"stealthy-auto-browse\": {\n        \"env\": {\n          \"STEALTHY_AUTO_BROWSE_URL\": \"http://localhost:8080\"\n        }\n      }\n    }\n  }\n}\n\n3. Verify: curl $STEALTHY_AUTO_BROWSE_URL/health returns ok when the browser is ready."
      },
      {
        "title": "How It Works",
        "body": "The container runs a virtual X display (Xvfb at 1920x1080), the Camoufox browser, and an HTTP API server. You send JSON commands to the API and get JSON responses back. All commands go to POST $STEALTHY_AUTO_BROWSE_URL/ with {\"action\": \"<name>\", ...params}.\n\nEvery response has this shape:\n\n{\n  \"success\": true,\n  \"timestamp\": 1234567890.123,\n  \"data\": { ... },\n  \"error\": \"only present when success is false\"\n}\n\nThe data field contents vary by action — documented below for each one."
      },
      {
        "title": "Understanding the Two Input Modes",
        "body": "This is the most important concept. There are two ways to interact with pages:"
      },
      {
        "title": "System Input (Undetectable)",
        "body": "Actions: system_click, mouse_move, mouse_click, system_type, send_key, scroll\n\nThese use PyAutoGUI to generate real OS-level mouse movements and keystrokes. The browser receives these as genuine user input — there is no way for any website JavaScript to distinguish these from a real human. Use these for stealth.\n\nSystem input works with viewport coordinates (x, y pixel positions within the browser content area). Get these coordinates from get_interactive_elements."
      },
      {
        "title": "Playwright Input (Detectable)",
        "body": "Actions: click, fill, type\n\nThese use Playwright's DOM automation to interact with elements by CSS selector or XPath. They're faster and more reliable (no coordinate math), but they inject events through the browser's automation layer. Sophisticated behavioral analysis can potentially detect the timing patterns. Use these when speed matters more than stealth, or when you have a selector but no coordinates."
      },
      {
        "title": "When to Use Which",
        "body": "Stealth-critical sites (Cloudflare, login forms, anything with bot detection): Always use system input.\nSimple scraping where the site isn't actively fighting you: Playwright input is fine and easier.\nForm filling: Use system_click to focus the field, then system_type to enter text. This is undetectable. Using fill is faster but detectable.\nClicking buttons: If you have coordinates from get_interactive_elements, use system_click. If you only have a CSS selector, use click."
      },
      {
        "title": "Workflow",
        "body": "This is the typical sequence for interacting with a page:\n\nNavigate: goto to load the URL\nRead the page: get_text returns all visible text — usually enough to understand the page\nIf text isn't clear: get_html gives you the full DOM structure\nIf still confused: Take a screenshot (GET /screenshot/browser?whLargest=512)\nFind interactive elements: get_interactive_elements returns all buttons, links, inputs with their x,y coordinates\nInteract: system_click to click, system_type to type, send_key for Enter/Tab/Escape\nWait for results: wait_for_element or wait_for_text instead of sleeping\nVerify: get_text again to confirm the page changed as expected"
      },
      {
        "title": "Navigation",
        "body": "goto\n\nNavigates to a URL. This is how you load pages.\n\n{\"action\": \"goto\", \"url\": \"https://example.com\"}\n{\"action\": \"goto\", \"url\": \"https://example.com\", \"wait_until\": \"networkidle\"}\n\nParameters:\n\nurl (required): The URL to navigate to.\nwait_until (optional, default \"domcontentloaded\"): When to consider the page loaded. Options: \"domcontentloaded\" (DOM parsed, fast), \"load\" (all resources loaded), \"networkidle\" (no network activity for 500ms, slowest but most complete).\n\nResponse data: {\"url\": \"https://example.com/\", \"title\": \"Example Domain\"}\n\nNote: If a page loader matches the URL (see Page Loaders section), the loader's steps execute instead of the default navigation. The response will include \"loader\": \"loader name\" when this happens.\n\nrefresh\n\nReloads the current page.\n\n{\"action\": \"refresh\"}\n{\"action\": \"refresh\", \"wait_until\": \"networkidle\"}\n\nParameters:\n\nwait_until (optional, default \"domcontentloaded\"): Same options as goto.\n\nResponse data: {\"url\": \"https://example.com/current-page\", \"title\": \"Current Page\"}"
      },
      {
        "title": "System Input (Undetectable)",
        "body": "system_click\n\nMoves the mouse to viewport coordinates with a human-like curve (random jitter, eased acceleration), then clicks. This is the primary way to click things stealthily.\n\n{\"action\": \"system_click\", \"x\": 500, \"y\": 300}\n{\"action\": \"system_click\", \"x\": 500, \"y\": 300, \"duration\": 0.5}\n\nParameters:\n\nx, y (required): Viewport coordinates — get these from get_interactive_elements.\nduration (optional): How long the mouse movement takes in seconds. If omitted, a random duration between 0.2-0.6s is used for realism.\n\nResponse data: {\"system_clicked\": {\"x\": 500, \"y\": 300}}\n\nHow it differs from mouse_click: system_click always moves the mouse first (smooth human-like path), then clicks. mouse_click can click at a position instantly without the smooth movement, or click wherever the mouse currently is.\n\nmouse_move\n\nMoves the mouse to viewport coordinates with human-like movement (jitter, eased curve) but does NOT click. Use this to hover over elements (to trigger hover menus, tooltips) or to simulate natural mouse behavior between actions.\n\n{\"action\": \"mouse_move\", \"x\": 500, \"y\": 300}\n{\"action\": \"mouse_move\", \"x\": 500, \"y\": 300, \"duration\": 0.4}\n\nParameters:\n\nx, y (required): Viewport coordinates.\nduration (optional): Movement time in seconds. Random 0.2-0.6s if omitted.\n\nResponse data: {\"moved_to\": {\"x\": 500, \"y\": 300}}\n\nmouse_click\n\nClicks at a position or at the current mouse location. Unlike system_click, this does NOT do a smooth mouse movement first — it's a direct click via PyAutoGUI.\n\n{\"action\": \"mouse_click\"}\n{\"action\": \"mouse_click\", \"x\": 500, \"y\": 300}\n\nParameters:\n\nx, y (optional): If provided, clicks at that viewport position directly. If omitted, clicks wherever the mouse currently is.\n\nResponse data: {\"clicked_at\": {\"x\": 500, \"y\": 300}} or {\"clicked_at\": \"current\"}\n\nWhen to use: After a mouse_move when you want to separate the movement and click into two steps. Or when the mouse is already positioned and you just need to click.\n\nsystem_type\n\nTypes text character-by-character via real OS keystrokes. Each keystroke has a randomized delay (jittered around the interval) to mimic human typing speed. Completely undetectable.\n\n{\"action\": \"system_type\", \"text\": \"hello world\"}\n{\"action\": \"system_type\", \"text\": \"hello world\", \"interval\": 0.12}\n\nParameters:\n\ntext (required): The text to type. Must click/focus an input field first.\ninterval (optional, default 0.08): Base delay between keystrokes in seconds. Actual delay is randomized +-30ms around this value.\n\nResponse data: {\"typed_len\": 11}\n\nImportant: You must click on the input field first (using system_click or click) before calling system_type. This action types into whatever is currently focused.\n\nsend_key\n\nSends a single keyboard key or key combination via OS-level input. Use this for pressing Enter to submit forms, Tab to move between fields, Escape to close dialogs, or any key combos like Ctrl+A, Ctrl+C, etc.\n\n{\"action\": \"send_key\", \"key\": \"enter\"}\n{\"action\": \"send_key\", \"key\": \"tab\"}\n{\"action\": \"send_key\", \"key\": \"escape\"}\n{\"action\": \"send_key\", \"key\": \"ctrl+a\"}\n{\"action\": \"send_key\", \"key\": \"ctrl+shift+t\"}\n\nParameters:\n\nkey (required): Key name or combo with + separator. Key names follow PyAutoGUI naming: enter, tab, escape, backspace, delete, up, down, left, right, home, end, pageup, pagedown, f1-f12, ctrl, alt, shift, space, etc.\n\nResponse data: {\"send_key\": \"enter\"}\n\nscroll\n\nScrolls the page using the mouse scroll wheel. Generates real OS-level scroll events.\n\n{\"action\": \"scroll\", \"amount\": -3}\n{\"action\": \"scroll\", \"amount\": 5, \"x\": 500, \"y\": 300}\n\nParameters:\n\namount (optional, default -3): Scroll amount. Negative = scroll down, positive = scroll up. Each unit is roughly one \"click\" of a mouse wheel.\nx, y (optional): If provided, moves the mouse to these viewport coordinates first, then scrolls. Useful for scrolling inside a specific scrollable element rather than the whole page.\n\nResponse data: {\"scrolled\": -3}"
      },
      {
        "title": "Playwright Input (Detectable)",
        "body": "These are faster and more convenient but use Playwright's DOM event injection, which is detectable by sophisticated behavioral analysis.\n\nclick\n\nClicks an element by CSS selector or XPath. Playwright finds the element in the DOM, scrolls it into view if needed, and dispatches click events.\n\n{\"action\": \"click\", \"selector\": \"#submit-btn\"}\n{\"action\": \"click\", \"selector\": \"button.primary\"}\n{\"action\": \"click\", \"selector\": \"xpath=//button[@id='submit-btn']\"}\n\nParameters:\n\nselector (required): CSS selector or XPath (prefix with xpath=).\n\nResponse data: {\"clicked\": \"#submit-btn\"}\n\nWhen to use over system_click: When you have a selector but don't want to bother getting coordinates. When the element might move around and coordinates aren't reliable. When stealth isn't critical.\n\nfill\n\nFills an input field by selector. Clears any existing content first, then sets the value. This is the fastest way to fill forms but is detectable because it doesn't generate individual keystroke events.\n\n{\"action\": \"fill\", \"selector\": \"input[name='email']\", \"value\": \"user@example.com\"}\n\nParameters:\n\nselector (required): CSS selector or XPath of the input element.\nvalue (required): Text to fill in.\n\nResponse data: {\"filled\": \"input[name='email']\"}\n\ntype\n\nTypes text into an element character-by-character via Playwright (NOT the OS). Each keystroke has a configurable delay. This is a middle ground between fill (instant but obviously automated) and system_type (OS-level, undetectable). The typing pattern is more realistic than fill but still comes through Playwright's event system.\n\n{\"action\": \"type\", \"selector\": \"#search\", \"text\": \"query\", \"delay\": 0.05}\n\nParameters:\n\nselector (required): CSS selector or XPath of the element.\ntext (required): Text to type.\ndelay (optional, default 0.05): Delay between keystrokes in seconds.\n\nResponse data: {\"typed\": \"#search\"}"
      },
      {
        "title": "Screenshots",
        "body": "Screenshots are GET requests (not POST actions).\n\nGET /screenshot/browser\n\nCaptures the browser viewport as a PNG image. This is what the page looks like to a user.\n\ncurl -s \"$STEALTHY_AUTO_BROWSE_URL/screenshot/browser?whLargest=512\" -o screenshot.png\n\nAlways resize screenshots to avoid huge images. Resize query parameters (all optional):\n\nParameterWhat it doeswhLargest=512Scales so the largest dimension is 512px, keeps aspect ratio. Use this by default.width=800Scales to 800px wide, keeps aspect ratioheight=300Scales to 300px tall, keeps aspect ratiowidth=400&height=400Forces exact 400x400 dimensions\n\nGET /screenshot/desktop\n\nCaptures the entire virtual desktop (including window chrome, taskbar, etc.) using scrot. Same resize parameters as above. Useful when you need to see things outside the browser viewport.\n\ncurl -s \"$STEALTHY_AUTO_BROWSE_URL/screenshot/desktop?whLargest=512\" -o desktop.png"
      },
      {
        "title": "Page Inspection",
        "body": "get_interactive_elements\n\nScans the page and returns every interactive element (buttons, links, inputs, selects, textareas, etc.) with their viewport coordinates. This is how you find what to click and where.\n\n{\"action\": \"get_interactive_elements\"}\n{\"action\": \"get_interactive_elements\", \"visible_only\": true}\n\nParameters:\n\nvisible_only (optional, default true): Only return elements that are currently visible on screen.\n\nResponse data:\n\n{\n  \"count\": 5,\n  \"elements\": [\n    {\n      \"i\": 0,\n      \"tag\": \"button\",\n      \"id\": \"submit-btn\",\n      \"text\": \"Submit\",\n      \"selector\": \"#submit-btn\",\n      \"x\": 400,\n      \"y\": 250,\n      \"w\": 120,\n      \"h\": 40,\n      \"visible\": true\n    },\n    {\n      \"i\": 1,\n      \"tag\": \"input\",\n      \"id\": null,\n      \"text\": \"\",\n      \"selector\": \"input[name='email']\",\n      \"x\": 300,\n      \"y\": 180,\n      \"w\": 250,\n      \"h\": 35,\n      \"visible\": true\n    }\n  ]\n}\n\nThe x, y are the center of the element — pass these directly to system_click. The selector can be used with Playwright actions like click or fill. The w, h give you the element dimensions.\n\nThis is your primary tool for understanding what you can interact with on a page. Call this before clicking anything.\n\nget_text\n\nReturns all visible text content of the page body. Text is truncated to 10,000 characters.\n\n{\"action\": \"get_text\"}\n\nResponse data: {\"text\": \"Page title\\nSome content here...\", \"length\": 1234}\n\nThis is usually the first thing to call after navigating — it tells you what's on the page without needing a screenshot.\n\nget_html\n\nReturns the full HTML source of the current page.\n\n{\"action\": \"get_html\"}\n\nResponse data: {\"html\": \"<!DOCTYPE html>...\", \"length\": 45678}\n\nUse when get_text doesn't give enough structure to understand the page layout, or when you need to find specific elements in the DOM.\n\neval\n\nExecutes arbitrary JavaScript in the page context and returns the result. The expression is evaluated via page.evaluate().\n\n{\"action\": \"eval\", \"expression\": \"document.title\"}\n{\"action\": \"eval\", \"expression\": \"document.querySelectorAll('a').length\"}\n{\"action\": \"eval\", \"expression\": \"JSON.stringify(performance.timing)\"}\n\nParameters:\n\nexpression (required): JavaScript expression to evaluate. Must return a JSON-serializable value.\n\nResponse data: {\"result\": \"Example Domain\"} — the result is whatever the expression returns."
      },
      {
        "title": "Wait Conditions",
        "body": "Use these instead of sleep to wait for page content. They're more reliable because they wait for the exact condition rather than an arbitrary time.\n\nwait_for_element\n\nWaits for an element matching a CSS selector or XPath to reach a certain state (visible, hidden, attached to DOM, detached).\n\n{\"action\": \"wait_for_element\", \"selector\": \"#results\", \"timeout\": 10}\n{\"action\": \"wait_for_element\", \"selector\": \"xpath=//div[@class='loaded']\", \"timeout\": 15}\n{\"action\": \"wait_for_element\", \"selector\": \".spinner\", \"state\": \"hidden\", \"timeout\": 10}\n\nParameters:\n\nselector (required): CSS selector or XPath (prefix with xpath=).\nstate (optional, default \"visible\"): What state to wait for. Options: \"visible\" (rendered and not hidden), \"hidden\" (not visible), \"attached\" (in DOM regardless of visibility), \"detached\" (removed from DOM).\ntimeout (optional, default 30): Max wait time in seconds. Throws error if exceeded.\n\nResponse data: {\"selector\": \"#results\", \"state\": \"visible\"}\n\nwait_for_text\n\nWaits for specific text to appear anywhere in the page body.\n\n{\"action\": \"wait_for_text\", \"text\": \"Search results\", \"timeout\": 10}\n\nParameters:\n\ntext (required): Exact text to look for (substring match on document.body.innerText).\ntimeout (optional, default 30): Max wait time in seconds.\n\nResponse data: {\"text\": \"Search results\", \"found\": true}\n\nwait_for_url\n\nWaits for the page URL to match a pattern. Useful after form submissions or redirects.\n\n{\"action\": \"wait_for_url\", \"url\": \"**/dashboard\", \"timeout\": 10}\n{\"action\": \"wait_for_url\", \"url\": \"https://example.com/success*\", \"timeout\": 15}\n\nParameters:\n\nurl (required): URL pattern to match. Supports * (any chars except /) and ** (any chars including /) glob patterns. Can also be a full URL for exact match.\ntimeout (optional, default 30): Max wait time in seconds.\n\nResponse data: {\"url\": \"https://example.com/dashboard\"}\n\nwait_for_network_idle\n\nWaits until there are no network requests in flight for 500ms. Useful for pages that load content dynamically after the initial page load.\n\n{\"action\": \"wait_for_network_idle\", \"timeout\": 30}\n\nParameters:\n\ntimeout (optional, default 30): Max wait time in seconds.\n\nResponse data: {\"idle\": true}"
      },
      {
        "title": "Tab Management",
        "body": "The browser can have multiple tabs open. One tab is \"active\" at a time — all actions operate on the active tab.\n\nlist_tabs\n\nReturns all open tabs with their URLs and which one is active.\n\n{\"action\": \"list_tabs\"}\n\nResponse data:\n\n{\n  \"count\": 2,\n  \"tabs\": [\n    {\"index\": 0, \"url\": \"https://example.com/\", \"active\": false},\n    {\"index\": 1, \"url\": \"https://other.com/\", \"active\": true}\n  ]\n}\n\nnew_tab\n\nOpens a new browser tab. Optionally navigates it to a URL. The new tab becomes the active tab.\n\n{\"action\": \"new_tab\"}\n{\"action\": \"new_tab\", \"url\": \"https://example.com\"}\n\nParameters:\n\nurl (optional): URL to navigate to in the new tab.\nwait_until (optional, default \"domcontentloaded\"): Same as goto.\n\nResponse data: {\"index\": 1, \"url\": \"https://example.com/\"}\n\nswitch_tab\n\nSwitches the active tab by index (0-based). All subsequent actions will operate on this tab.\n\n{\"action\": \"switch_tab\", \"index\": 0}\n\nParameters:\n\nindex (required): Tab index from list_tabs.\n\nResponse data: {\"index\": 0, \"url\": \"https://example.com/\"}\n\nclose_tab\n\nCloses a tab. After closing, the last remaining tab becomes active.\n\n{\"action\": \"close_tab\"}\n{\"action\": \"close_tab\", \"index\": 1}\n\nParameters:\n\nindex (optional): Tab index to close. If omitted, closes the currently active tab.\n\nResponse data: {\"closed\": true, \"remaining\": 1}"
      },
      {
        "title": "Dialog Handling",
        "body": "Browsers have modal dialogs (alert, confirm, prompt). By default, dialogs are auto-accepted (clicks OK). Use handle_dialog if you need to dismiss a dialog or provide text for a prompt.\n\nhandle_dialog\n\nCall BEFORE the action that triggers the dialog if you want to dismiss it or provide prompt text. If you don't call this, the dialog is auto-accepted (clicks OK).\n\n{\"action\": \"handle_dialog\", \"accept\": true}\n{\"action\": \"handle_dialog\", \"accept\": false}\n{\"action\": \"handle_dialog\", \"accept\": true, \"text\": \"my response\"}\n\nParameters:\n\naccept (optional, default true): true clicks OK/Accept, false clicks Cancel/Dismiss.\ntext (optional): Response text for prompt dialogs. Ignored for alert/confirm.\n\nResponse data: {\"configured\": {\"accept\": true, \"text\": null}}\n\nExample — handling a confirm dialog:\n\n# Step 1: Tell the browser to accept the next dialog\ncurl -X POST $API -H 'Content-Type: application/json' -d '{\"action\": \"handle_dialog\", \"accept\": true}'\n# Step 2: Now click the button that triggers the confirm\ncurl -X POST $API -H 'Content-Type: application/json' -d '{\"action\": \"system_click\", \"x\": 300, \"y\": 200}'\n\nget_last_dialog\n\nReturns information about the most recent dialog that appeared.\n\n{\"action\": \"get_last_dialog\"}\n\nResponse data:\n\n{\n  \"dialog\": {\n    \"type\": \"confirm\",\n    \"message\": \"Are you sure you want to delete this?\",\n    \"default_value\": \"\",\n    \"buttons\": [\"ok\", \"cancel\"]\n  }\n}\n\nReturns {\"dialog\": null} if no dialog has appeared yet. The type field is one of: \"alert\", \"confirm\", \"prompt\", \"beforeunload\"."
      },
      {
        "title": "Cookies",
        "body": "get_cookies\n\nReturns all cookies for the browser context, or cookies for specific URLs.\n\n{\"action\": \"get_cookies\"}\n{\"action\": \"get_cookies\", \"urls\": [\"https://example.com\"]}\n\nParameters:\n\nurls (optional): Array of URLs to filter cookies by. If omitted, returns all cookies.\n\nResponse data:\n\n{\n  \"count\": 3,\n  \"cookies\": [\n    {\"name\": \"session\", \"value\": \"abc123\", \"domain\": \".example.com\", \"path\": \"/\", \"httpOnly\": true, \"secure\": true, ...}\n  ]\n}\n\nset_cookie\n\nSets a cookie in the browser context.\n\n{\"action\": \"set_cookie\", \"name\": \"session\", \"value\": \"abc123\", \"url\": \"https://example.com\"}\n{\"action\": \"set_cookie\", \"name\": \"pref\", \"value\": \"dark\", \"domain\": \".example.com\", \"path\": \"/\", \"httpOnly\": false, \"secure\": true}\n\nParameters: Any standard cookie fields — name, value, url, domain, path, httpOnly, secure, sameSite, expires. At minimum you need name, value, and either url or domain.\n\nResponse data: {\"set\": \"session\"}\n\ndelete_cookies\n\nClears all cookies from the browser context.\n\n{\"action\": \"delete_cookies\"}\n\nResponse data: {\"cleared\": true}"
      },
      {
        "title": "Storage",
        "body": "Access the page's localStorage and sessionStorage. These are per-origin — you must be on the right page for the storage to be accessible.\n\nget_storage\n\nReturns all items from localStorage or sessionStorage as a key-value object.\n\n{\"action\": \"get_storage\", \"type\": \"local\"}\n{\"action\": \"get_storage\", \"type\": \"session\"}\n\nParameters:\n\ntype (optional, default \"local\"): \"local\" for localStorage, \"session\" for sessionStorage.\n\nResponse data: {\"items\": {\"theme\": \"dark\", \"lang\": \"en\"}, \"type\": \"local\"}\n\nset_storage\n\nSets a single key-value pair in localStorage or sessionStorage.\n\n{\"action\": \"set_storage\", \"type\": \"local\", \"key\": \"theme\", \"value\": \"dark\"}\n\nParameters:\n\ntype (optional, default \"local\"): \"local\" or \"session\".\nkey (required): Storage key.\nvalue (required): Storage value (string).\n\nResponse data: {\"set\": \"theme\", \"type\": \"local\"}\n\nclear_storage\n\nClears all items from localStorage or sessionStorage.\n\n{\"action\": \"clear_storage\", \"type\": \"local\"}\n{\"action\": \"clear_storage\", \"type\": \"session\"}\n\nResponse data: {\"cleared\": \"local\"}"
      },
      {
        "title": "Downloads",
        "body": "The browser automatically tracks file downloads triggered by page interactions (clicking download links, form submissions that return files, etc.).\n\nget_last_download\n\nReturns information about the most recently downloaded file.\n\n{\"action\": \"get_last_download\"}\n\nResponse data:\n\n{\n  \"download\": {\n    \"url\": \"https://example.com/file.pdf\",\n    \"filename\": \"file.pdf\",\n    \"path\": \"/tmp/playwright-downloads/abc123/file.pdf\"\n  }\n}\n\nReturns {\"download\": null} if nothing has been downloaded yet. The path is the local path inside the container where the file was saved. The filename is what the server suggested as the download name."
      },
      {
        "title": "Uploads",
        "body": "upload_file\n\nProgrammatically sets a file on an <input type=\"file\"> element without opening the OS file picker. The file must exist inside the container — use docker cp to copy files in if needed.\n\n{\"action\": \"upload_file\", \"selector\": \"#file-input\", \"file_path\": \"/tmp/document.pdf\"}\n\nParameters:\n\nselector (required): CSS selector of the file input element.\nfile_path (required): Absolute path to the file inside the container.\n\nResponse data: {\"selector\": \"#file-input\", \"file\": \"document.pdf\", \"size\": 12345}\n\nNote: After setting the file, you still need to submit the form (click the submit button) for the upload to actually happen."
      },
      {
        "title": "Network Logging",
        "body": "Capture all HTTP requests and responses the page makes. Useful for debugging, finding API endpoints the page calls, or verifying that certain resources loaded.\n\nenable_network_log\n\nStarts recording all HTTP requests and responses from the active page.\n\n{\"action\": \"enable_network_log\"}\n\nResponse data: {\"enabled\": true}\n\ndisable_network_log\n\nStops recording network activity. Already-captured entries remain.\n\n{\"action\": \"disable_network_log\"}\n\nResponse data: {\"enabled\": false}\n\nget_network_log\n\nReturns all captured network entries since logging was enabled (or last cleared).\n\n{\"action\": \"get_network_log\"}\n\nResponse data:\n\n{\n  \"count\": 4,\n  \"log\": [\n    {\"type\": \"request\", \"url\": \"https://api.example.com/data\", \"method\": \"GET\", \"resource_type\": \"fetch\", \"timestamp\": 1234567890.123},\n    {\"type\": \"response\", \"url\": \"https://api.example.com/data\", \"status\": 200, \"timestamp\": 1234567890.456},\n    {\"type\": \"request\", \"url\": \"https://cdn.example.com/style.css\", \"method\": \"GET\", \"resource_type\": \"stylesheet\", \"timestamp\": 1234567890.789},\n    {\"type\": \"response\", \"url\": \"https://cdn.example.com/style.css\", \"status\": 200, \"timestamp\": 1234567890.999}\n  ]\n}\n\nEach entry is either a \"request\" or \"response\". Requests include method and resource_type (fetch, document, stylesheet, script, image, etc.). Responses include status code.\n\nclear_network_log\n\nDeletes all captured network entries but keeps logging enabled if it was on.\n\n{\"action\": \"clear_network_log\"}\n\nResponse data: {\"cleared\": true}"
      },
      {
        "title": "Scrolling",
        "body": "scroll_to_bottom\n\nScrolls the entire page from top to bottom using JavaScript window.scrollBy(). Scrolls one viewport height at a time with a fixed delay between scrolls. When it reaches the bottom (scroll position stops changing), it scrolls back to the top. Useful for triggering lazy-loaded content.\n\n{\"action\": \"scroll_to_bottom\"}\n{\"action\": \"scroll_to_bottom\", \"delay\": 0.6}\n\nParameters:\n\ndelay (optional, default 0.4): Seconds to wait between each scroll step.\n\nResponse data: {\"scrolled\": \"bottom\"}\n\nscroll_to_bottom_humanized\n\nSame as scroll_to_bottom but uses real OS-level mouse wheel scrolling (via PyAutoGUI) with randomized scroll amounts and jittered delays to look like a human scrolling. Undetectable by behavioral analysis.\n\n{\"action\": \"scroll_to_bottom_humanized\"}\n{\"action\": \"scroll_to_bottom_humanized\", \"min_clicks\": 3, \"max_clicks\": 8, \"delay\": 0.7}\n\nParameters:\n\nmin_clicks (optional, default 2): Minimum mouse wheel clicks per scroll step.\nmax_clicks (optional, default 6): Maximum mouse wheel clicks per scroll step. A random value between min and max is chosen each time.\ndelay (optional, default 0.5): Base delay between scroll steps. Actual delay is jittered +-30%.\n\nResponse data: {\"scrolled\": \"bottom_humanized\"}"
      },
      {
        "title": "Display",
        "body": "calibrate\n\nRecalculates the mapping between viewport coordinates (what get_interactive_elements returns) and screen coordinates (what PyAutoGUI uses). The browser has window chrome (title bar, address bar) that offsets the viewport from the screen origin.\n\n{\"action\": \"calibrate\"}\n\nResponse data: {\"window_offset\": {\"x\": 0, \"y\": 74}}\n\nWhen to call this: After entering/exiting fullscreen, after the browser window is resized, or if system_click coordinates seem off. The offset is auto-calculated at startup, so you rarely need this.\n\nget_resolution\n\nReturns the virtual display resolution (from the XVFB_RESOLUTION environment variable).\n\n{\"action\": \"get_resolution\"}\n\nResponse data: {\"width\": 1920, \"height\": 1080}\n\nenter_fullscreen / exit_fullscreen\n\nToggles browser fullscreen mode (hides address bar and window chrome). In fullscreen, the viewport takes up the entire screen, so coordinates map differently.\n\n{\"action\": \"enter_fullscreen\"}\n{\"action\": \"exit_fullscreen\"}\n\nResponse data: {\"fullscreen\": true, \"changed\": true} — changed is false if already in the requested state.\n\nImportant: Call calibrate after entering/exiting fullscreen to update the coordinate mapping."
      },
      {
        "title": "Utility",
        "body": "ping\n\nHealth check that returns the current page URL. Use to verify the API is responding and the browser is alive.\n\n{\"action\": \"ping\"}\n\nResponse data: {\"message\": \"pong\", \"url\": \"https://example.com/\"}\n\nsleep\n\nPauses execution for a specified duration. Prefer wait_for_element or wait_for_text when waiting for page content — use sleep only for fixed timing needs.\n\n{\"action\": \"sleep\", \"duration\": 2}\n\nParameters:\n\nduration (optional, default 1): Seconds to sleep.\n\nResponse data: {\"slept\": 2}\n\nclose\n\nShuts down the browser. The container will stop after this.\n\n{\"action\": \"close\"}\n\nResponse data: {\"message\": \"closing\"}"
      },
      {
        "title": "State Endpoints (GET)",
        "body": "GET /state\n\nReturns the current browser state.\n\ncurl -s \"$STEALTHY_AUTO_BROWSE_URL/state\"\n\nResponse:\n\n{\n  \"status\": \"ready\",\n  \"url\": \"https://example.com/\",\n  \"title\": \"Example Domain\",\n  \"window_offset\": {\"x\": 0, \"y\": 74}\n}\n\nGET /health\n\nSimple health check. Returns ok as plain text when the API is ready.\n\ncurl -s \"$STEALTHY_AUTO_BROWSE_URL/health\""
      },
      {
        "title": "Container Options",
        "body": "# Custom display resolution\ndocker run -d -p 8080:8080 -e XVFB_RESOLUTION=1280x720 psyb0t/stealthy-auto-browse\n\n# Match timezone to your IP's geographic location (important for stealth — mismatched\n# timezone is a common bot detection signal)\ndocker run -d -p 8080:8080 -e TZ=Europe/Bucharest psyb0t/stealthy-auto-browse\n\n# Route browser traffic through an HTTP proxy\ndocker run -d -p 8080:8080 -e PROXY_URL=http://user:pass@proxy:8888 psyb0t/stealthy-auto-browse\n\n# Persistent browser profile — cookies, sessions, and fingerprint survive container restarts\ndocker run -d -p 8080:8080 -v ./profile:/userdata psyb0t/stealthy-auto-browse\n\n# Open a URL automatically on startup\ndocker run -d -p 8080:8080 psyb0t/stealthy-auto-browse https://example.com"
      },
      {
        "title": "Page Loaders (URL-Triggered Automation)",
        "body": "Page loaders are like Greasemonkey/Tampermonkey userscripts but for the HTTP API. You define a set of actions that automatically run whenever the browser navigates to a matching URL. Instead of manually sending a sequence of commands every time you visit a site, you write it once as a YAML file and the container handles it.\n\nThis is useful for things like: removing cookie popups, dismissing overlays, waiting for dynamic content, cleaning up pages before scraping, or any repetitive setup you'd otherwise do manually every time."
      },
      {
        "title": "How They Work",
        "body": "You create YAML files that define URL patterns and a list of steps\nMount those files into the container at /loaders\nWhenever goto navigates to a URL that matches a loader's pattern, the loader's steps run automatically instead of the default navigation\n\nThe steps are the exact same actions as the HTTP API. Every action you can send via POST / (goto, eval, click, system_click, sleep, scroll, wait_for_element, etc.) works as a loader step. Same names, same parameters."
      },
      {
        "title": "Setup",
        "body": "docker run -d -p 8080:8080 -p 5900:5900 \\\n  -v ./my-loaders:/loaders \\\n  psyb0t/stealthy-auto-browse"
      },
      {
        "title": "Loader Format",
        "body": "name: Human-readable name for this loader\nmatch:\n  domain: example.com         # Exact hostname match (www. is stripped automatically)\n  path_prefix: /articles      # URL path must start with this\n  regex: \"article/\\\\d+\"       # Full URL must match this regex\nsteps:\n  - action: goto              # Same actions as the HTTP API\n    url: \"${url}\"             # ${url} is replaced with the original URL\n    wait_until: networkidle\n  - action: eval\n    expression: \"document.querySelector('.cookie-banner')?.remove()\"\n  - action: wait_for_element\n    selector: \"#main-content\"\n    timeout: 10"
      },
      {
        "title": "Match Rules",
        "body": "All match fields are optional, but at least one is required. If you specify multiple fields, all of them must match for the loader to trigger:\n\ndomain: Exact hostname. www. is stripped from both sides before comparing, so domain: example.com matches www.example.com too.\npath_prefix: The URL path must start with this string. path_prefix: /blog matches /blog, /blog/post-1, /blog/archive, etc.\nregex: The full URL is tested against this regular expression."
      },
      {
        "title": "The ${url} Placeholder",
        "body": "In any string value within a step, ${url} is replaced with the original URL that was passed to goto. This lets you navigate to the URL with custom wait settings, or pass it to JavaScript:\n\nsteps:\n  - action: goto\n    url: \"${url}\"\n    wait_until: networkidle\n  - action: eval\n    expression: \"console.log('Loaded:', '${url}')\""
      },
      {
        "title": "Practical Example: Clean Scraping",
        "body": "Say you're scraping a news site that has cookie popups, newsletter modals, and lazy-loaded content. Without a loader, you'd send 5+ commands after every goto. With a loader:\n\n# loaders/news_site.yaml\nname: News Site Cleanup\nmatch:\n  domain: news-site.com\nsteps:\n  # Navigate with full network wait so everything loads\n  - action: goto\n    url: \"${url}\"\n    wait_until: networkidle\n\n  # Wait for the main content to be there\n  - action: wait_for_element\n    selector: \"article\"\n    timeout: 10\n\n  # Kill the cookie popup\n  - action: eval\n    expression: \"document.querySelector('.cookie-consent')?.remove()\"\n\n  # Kill the newsletter modal\n  - action: eval\n    expression: \"document.querySelector('.newsletter-overlay')?.remove()\"\n\n  # Scroll to trigger lazy-loaded images\n  - action: scroll_to_bottom\n    delay: 0.3\n\n  # Small pause for everything to settle\n  - action: sleep\n    duration: 1\n\nNow when you goto any URL on news-site.com, all of this happens automatically. Your response includes \"loader\": \"News Site Cleanup\" so you know it triggered."
      },
      {
        "title": "Response When a Loader Triggers",
        "body": "{\n  \"success\": true,\n  \"data\": {\n    \"loader\": \"News Site Cleanup\",\n    \"steps_executed\": 6,\n    \"last_result\": { \"success\": true, \"timestamp\": 1234567890.456, \"data\": { \"slept\": 1 } }\n  }\n}"
      },
      {
        "title": "Pre-installed Extensions",
        "body": "The browser comes with these extensions pre-installed:\n\nuBlock Origin: Ad and tracker blocking\nLocalCDN: Serves common CDN resources locally to prevent tracking\nClearURLs: Strips tracking parameters from URLs\nConsent-O-Matic: Automatically handles cookie consent popups (clicks \"reject all\" or minimal consent)"
      },
      {
        "title": "Example: Full Login Flow (Undetectable)",
        "body": "API=$STEALTHY_AUTO_BROWSE_URL\n\n# Navigate to login page\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"goto\", \"url\": \"https://example.com/login\"}'\n\n# See what's on the page\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"get_text\"}'\n\n# Find all interactive elements and their coordinates\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"get_interactive_elements\"}'\n\n# Click the email field (coordinates from get_interactive_elements)\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"system_click\", \"x\": 400, \"y\": 200}'\n\n# Type email with human-like keystrokes\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"system_type\", \"text\": \"user@example.com\"}'\n\n# Tab to password field\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"send_key\", \"key\": \"tab\"}'\n\n# Type password\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"system_type\", \"text\": \"secretpassword\"}'\n\n# Press Enter to submit\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"send_key\", \"key\": \"enter\"}'\n\n# Wait for redirect to dashboard\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"wait_for_url\", \"url\": \"**/dashboard\", \"timeout\": 15}'\n\n# Verify we're logged in\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"get_text\"}'"
      },
      {
        "title": "Tips",
        "body": "Always call get_interactive_elements before clicking — don't guess coordinates\nUse system methods for stealth — system_click, system_type, send_key are undetectable\nUse get_text first, screenshots second — text is faster and smaller\nMatch TZ to your IP location — timezone mismatch is a common bot detection signal\nResize screenshots with ?whLargest=512 — full resolution is unnecessarily large\nMount /userdata for persistent sessions — cookies, fingerprint, and profile survive restarts\nUse wait conditions instead of sleep — wait_for_element, wait_for_text, wait_for_url\nCall handle_dialog BEFORE the action that triggers it — if you need to dismiss or provide prompt text (dialogs are auto-accepted otherwise)\nCall calibrate after fullscreen changes — coordinate mapping shifts\nAdd slight delays between actions for realism — sleep with 0.5-1.5s between clicks looks more human"
      }
    ],
    "body": "stealthy-auto-browse\n\nA stealth browser running in Docker. It uses Camoufox (a custom Firefox fork) instead of Chromium, so there are zero Chrome DevTools Protocol (CDP) signals for bot detectors to find. Mouse and keyboard input happens at the OS level via PyAutoGUI — the browser itself doesn't know it's being automated, which means behavioral analysis can't detect it either.\n\nWhy This Exists\n\nStandard browser automation (Playwright + Chromium, Puppeteer, Selenium) exposes CDP signals that bot detection services (Cloudflare, DataDome, PerimeterX, Akamai) catch instantly. Even with stealth plugins, the CDP protocol is still there and detectable. This skill eliminates that entirely by using Firefox (no CDP at all) and generating input events at the OS level rather than through the browser's automation API.\n\nWhen To Use This Skill\nSite has bot detection (Cloudflare challenge pages, DataDome, PerimeterX, Akamai)\nSite blocks headless browsers or serves CAPTCHAs\nYou need a logged-in session that doesn't get banned\nAnother browser skill is getting 403s or empty/blocked responses\nYou're scraping a site that actively fights automation\nWhen NOT To Use This Skill\nSimple fetches with no bot protection — use curl or WebFetch\nSites that don't care about automation — use a regular browser skill, it's faster to set up\nYou only need static HTML — use curl\nSetup\n\n1. Start the container:\n\ndocker run -d -p 8080:8080 -p 5900:5900 psyb0t/stealthy-auto-browse\n\n\nPort 8080 is the HTTP API. Port 5900 is a noVNC web viewer where you can watch the browser in real time.\n\n2. Set the environment variable:\n\nexport STEALTHY_AUTO_BROWSE_URL=http://localhost:8080\n\n\nOr via OpenClaw config (~/.openclaw/openclaw.json):\n\n{\n  \"skills\": {\n    \"entries\": {\n      \"stealthy-auto-browse\": {\n        \"env\": {\n          \"STEALTHY_AUTO_BROWSE_URL\": \"http://localhost:8080\"\n        }\n      }\n    }\n  }\n}\n\n\n3. Verify: curl $STEALTHY_AUTO_BROWSE_URL/health returns ok when the browser is ready.\n\nHow It Works\n\nThe container runs a virtual X display (Xvfb at 1920x1080), the Camoufox browser, and an HTTP API server. You send JSON commands to the API and get JSON responses back. All commands go to POST $STEALTHY_AUTO_BROWSE_URL/ with {\"action\": \"<name>\", ...params}.\n\nEvery response has this shape:\n\n{\n  \"success\": true,\n  \"timestamp\": 1234567890.123,\n  \"data\": { ... },\n  \"error\": \"only present when success is false\"\n}\n\n\nThe data field contents vary by action — documented below for each one.\n\nUnderstanding the Two Input Modes\n\nThis is the most important concept. There are two ways to interact with pages:\n\nSystem Input (Undetectable)\n\nActions: system_click, mouse_move, mouse_click, system_type, send_key, scroll\n\nThese use PyAutoGUI to generate real OS-level mouse movements and keystrokes. The browser receives these as genuine user input — there is no way for any website JavaScript to distinguish these from a real human. Use these for stealth.\n\nSystem input works with viewport coordinates (x, y pixel positions within the browser content area). Get these coordinates from get_interactive_elements.\n\nPlaywright Input (Detectable)\n\nActions: click, fill, type\n\nThese use Playwright's DOM automation to interact with elements by CSS selector or XPath. They're faster and more reliable (no coordinate math), but they inject events through the browser's automation layer. Sophisticated behavioral analysis can potentially detect the timing patterns. Use these when speed matters more than stealth, or when you have a selector but no coordinates.\n\nWhen to Use Which\nStealth-critical sites (Cloudflare, login forms, anything with bot detection): Always use system input.\nSimple scraping where the site isn't actively fighting you: Playwright input is fine and easier.\nForm filling: Use system_click to focus the field, then system_type to enter text. This is undetectable. Using fill is faster but detectable.\nClicking buttons: If you have coordinates from get_interactive_elements, use system_click. If you only have a CSS selector, use click.\nWorkflow\n\nThis is the typical sequence for interacting with a page:\n\nNavigate: goto to load the URL\nRead the page: get_text returns all visible text — usually enough to understand the page\nIf text isn't clear: get_html gives you the full DOM structure\nIf still confused: Take a screenshot (GET /screenshot/browser?whLargest=512)\nFind interactive elements: get_interactive_elements returns all buttons, links, inputs with their x,y coordinates\nInteract: system_click to click, system_type to type, send_key for Enter/Tab/Escape\nWait for results: wait_for_element or wait_for_text instead of sleeping\nVerify: get_text again to confirm the page changed as expected\nActions Reference\nNavigation\ngoto\n\nNavigates to a URL. This is how you load pages.\n\n{\"action\": \"goto\", \"url\": \"https://example.com\"}\n{\"action\": \"goto\", \"url\": \"https://example.com\", \"wait_until\": \"networkidle\"}\n\n\nParameters:\n\nurl (required): The URL to navigate to.\nwait_until (optional, default \"domcontentloaded\"): When to consider the page loaded. Options: \"domcontentloaded\" (DOM parsed, fast), \"load\" (all resources loaded), \"networkidle\" (no network activity for 500ms, slowest but most complete).\n\nResponse data: {\"url\": \"https://example.com/\", \"title\": \"Example Domain\"}\n\nNote: If a page loader matches the URL (see Page Loaders section), the loader's steps execute instead of the default navigation. The response will include \"loader\": \"loader name\" when this happens.\n\nrefresh\n\nReloads the current page.\n\n{\"action\": \"refresh\"}\n{\"action\": \"refresh\", \"wait_until\": \"networkidle\"}\n\n\nParameters:\n\nwait_until (optional, default \"domcontentloaded\"): Same options as goto.\n\nResponse data: {\"url\": \"https://example.com/current-page\", \"title\": \"Current Page\"}\n\nSystem Input (Undetectable)\nsystem_click\n\nMoves the mouse to viewport coordinates with a human-like curve (random jitter, eased acceleration), then clicks. This is the primary way to click things stealthily.\n\n{\"action\": \"system_click\", \"x\": 500, \"y\": 300}\n{\"action\": \"system_click\", \"x\": 500, \"y\": 300, \"duration\": 0.5}\n\n\nParameters:\n\nx, y (required): Viewport coordinates — get these from get_interactive_elements.\nduration (optional): How long the mouse movement takes in seconds. If omitted, a random duration between 0.2-0.6s is used for realism.\n\nResponse data: {\"system_clicked\": {\"x\": 500, \"y\": 300}}\n\nHow it differs from mouse_click: system_click always moves the mouse first (smooth human-like path), then clicks. mouse_click can click at a position instantly without the smooth movement, or click wherever the mouse currently is.\n\nmouse_move\n\nMoves the mouse to viewport coordinates with human-like movement (jitter, eased curve) but does NOT click. Use this to hover over elements (to trigger hover menus, tooltips) or to simulate natural mouse behavior between actions.\n\n{\"action\": \"mouse_move\", \"x\": 500, \"y\": 300}\n{\"action\": \"mouse_move\", \"x\": 500, \"y\": 300, \"duration\": 0.4}\n\n\nParameters:\n\nx, y (required): Viewport coordinates.\nduration (optional): Movement time in seconds. Random 0.2-0.6s if omitted.\n\nResponse data: {\"moved_to\": {\"x\": 500, \"y\": 300}}\n\nmouse_click\n\nClicks at a position or at the current mouse location. Unlike system_click, this does NOT do a smooth mouse movement first — it's a direct click via PyAutoGUI.\n\n{\"action\": \"mouse_click\"}\n{\"action\": \"mouse_click\", \"x\": 500, \"y\": 300}\n\n\nParameters:\n\nx, y (optional): If provided, clicks at that viewport position directly. If omitted, clicks wherever the mouse currently is.\n\nResponse data: {\"clicked_at\": {\"x\": 500, \"y\": 300}} or {\"clicked_at\": \"current\"}\n\nWhen to use: After a mouse_move when you want to separate the movement and click into two steps. Or when the mouse is already positioned and you just need to click.\n\nsystem_type\n\nTypes text character-by-character via real OS keystrokes. Each keystroke has a randomized delay (jittered around the interval) to mimic human typing speed. Completely undetectable.\n\n{\"action\": \"system_type\", \"text\": \"hello world\"}\n{\"action\": \"system_type\", \"text\": \"hello world\", \"interval\": 0.12}\n\n\nParameters:\n\ntext (required): The text to type. Must click/focus an input field first.\ninterval (optional, default 0.08): Base delay between keystrokes in seconds. Actual delay is randomized +-30ms around this value.\n\nResponse data: {\"typed_len\": 11}\n\nImportant: You must click on the input field first (using system_click or click) before calling system_type. This action types into whatever is currently focused.\n\nsend_key\n\nSends a single keyboard key or key combination via OS-level input. Use this for pressing Enter to submit forms, Tab to move between fields, Escape to close dialogs, or any key combos like Ctrl+A, Ctrl+C, etc.\n\n{\"action\": \"send_key\", \"key\": \"enter\"}\n{\"action\": \"send_key\", \"key\": \"tab\"}\n{\"action\": \"send_key\", \"key\": \"escape\"}\n{\"action\": \"send_key\", \"key\": \"ctrl+a\"}\n{\"action\": \"send_key\", \"key\": \"ctrl+shift+t\"}\n\n\nParameters:\n\nkey (required): Key name or combo with + separator. Key names follow PyAutoGUI naming: enter, tab, escape, backspace, delete, up, down, left, right, home, end, pageup, pagedown, f1-f12, ctrl, alt, shift, space, etc.\n\nResponse data: {\"send_key\": \"enter\"}\n\nscroll\n\nScrolls the page using the mouse scroll wheel. Generates real OS-level scroll events.\n\n{\"action\": \"scroll\", \"amount\": -3}\n{\"action\": \"scroll\", \"amount\": 5, \"x\": 500, \"y\": 300}\n\n\nParameters:\n\namount (optional, default -3): Scroll amount. Negative = scroll down, positive = scroll up. Each unit is roughly one \"click\" of a mouse wheel.\nx, y (optional): If provided, moves the mouse to these viewport coordinates first, then scrolls. Useful for scrolling inside a specific scrollable element rather than the whole page.\n\nResponse data: {\"scrolled\": -3}\n\nPlaywright Input (Detectable)\n\nThese are faster and more convenient but use Playwright's DOM event injection, which is detectable by sophisticated behavioral analysis.\n\nclick\n\nClicks an element by CSS selector or XPath. Playwright finds the element in the DOM, scrolls it into view if needed, and dispatches click events.\n\n{\"action\": \"click\", \"selector\": \"#submit-btn\"}\n{\"action\": \"click\", \"selector\": \"button.primary\"}\n{\"action\": \"click\", \"selector\": \"xpath=//button[@id='submit-btn']\"}\n\n\nParameters:\n\nselector (required): CSS selector or XPath (prefix with xpath=).\n\nResponse data: {\"clicked\": \"#submit-btn\"}\n\nWhen to use over system_click: When you have a selector but don't want to bother getting coordinates. When the element might move around and coordinates aren't reliable. When stealth isn't critical.\n\nfill\n\nFills an input field by selector. Clears any existing content first, then sets the value. This is the fastest way to fill forms but is detectable because it doesn't generate individual keystroke events.\n\n{\"action\": \"fill\", \"selector\": \"input[name='email']\", \"value\": \"user@example.com\"}\n\n\nParameters:\n\nselector (required): CSS selector or XPath of the input element.\nvalue (required): Text to fill in.\n\nResponse data: {\"filled\": \"input[name='email']\"}\n\ntype\n\nTypes text into an element character-by-character via Playwright (NOT the OS). Each keystroke has a configurable delay. This is a middle ground between fill (instant but obviously automated) and system_type (OS-level, undetectable). The typing pattern is more realistic than fill but still comes through Playwright's event system.\n\n{\"action\": \"type\", \"selector\": \"#search\", \"text\": \"query\", \"delay\": 0.05}\n\n\nParameters:\n\nselector (required): CSS selector or XPath of the element.\ntext (required): Text to type.\ndelay (optional, default 0.05): Delay between keystrokes in seconds.\n\nResponse data: {\"typed\": \"#search\"}\n\nScreenshots\n\nScreenshots are GET requests (not POST actions).\n\nGET /screenshot/browser\n\nCaptures the browser viewport as a PNG image. This is what the page looks like to a user.\n\ncurl -s \"$STEALTHY_AUTO_BROWSE_URL/screenshot/browser?whLargest=512\" -o screenshot.png\n\n\nAlways resize screenshots to avoid huge images. Resize query parameters (all optional):\n\nParameter\tWhat it does\nwhLargest=512\tScales so the largest dimension is 512px, keeps aspect ratio. Use this by default.\nwidth=800\tScales to 800px wide, keeps aspect ratio\nheight=300\tScales to 300px tall, keeps aspect ratio\nwidth=400&height=400\tForces exact 400x400 dimensions\nGET /screenshot/desktop\n\nCaptures the entire virtual desktop (including window chrome, taskbar, etc.) using scrot. Same resize parameters as above. Useful when you need to see things outside the browser viewport.\n\ncurl -s \"$STEALTHY_AUTO_BROWSE_URL/screenshot/desktop?whLargest=512\" -o desktop.png\n\nPage Inspection\nget_interactive_elements\n\nScans the page and returns every interactive element (buttons, links, inputs, selects, textareas, etc.) with their viewport coordinates. This is how you find what to click and where.\n\n{\"action\": \"get_interactive_elements\"}\n{\"action\": \"get_interactive_elements\", \"visible_only\": true}\n\n\nParameters:\n\nvisible_only (optional, default true): Only return elements that are currently visible on screen.\n\nResponse data:\n\n{\n  \"count\": 5,\n  \"elements\": [\n    {\n      \"i\": 0,\n      \"tag\": \"button\",\n      \"id\": \"submit-btn\",\n      \"text\": \"Submit\",\n      \"selector\": \"#submit-btn\",\n      \"x\": 400,\n      \"y\": 250,\n      \"w\": 120,\n      \"h\": 40,\n      \"visible\": true\n    },\n    {\n      \"i\": 1,\n      \"tag\": \"input\",\n      \"id\": null,\n      \"text\": \"\",\n      \"selector\": \"input[name='email']\",\n      \"x\": 300,\n      \"y\": 180,\n      \"w\": 250,\n      \"h\": 35,\n      \"visible\": true\n    }\n  ]\n}\n\n\nThe x, y are the center of the element — pass these directly to system_click. The selector can be used with Playwright actions like click or fill. The w, h give you the element dimensions.\n\nThis is your primary tool for understanding what you can interact with on a page. Call this before clicking anything.\n\nget_text\n\nReturns all visible text content of the page body. Text is truncated to 10,000 characters.\n\n{\"action\": \"get_text\"}\n\n\nResponse data: {\"text\": \"Page title\\nSome content here...\", \"length\": 1234}\n\nThis is usually the first thing to call after navigating — it tells you what's on the page without needing a screenshot.\n\nget_html\n\nReturns the full HTML source of the current page.\n\n{\"action\": \"get_html\"}\n\n\nResponse data: {\"html\": \"<!DOCTYPE html>...\", \"length\": 45678}\n\nUse when get_text doesn't give enough structure to understand the page layout, or when you need to find specific elements in the DOM.\n\neval\n\nExecutes arbitrary JavaScript in the page context and returns the result. The expression is evaluated via page.evaluate().\n\n{\"action\": \"eval\", \"expression\": \"document.title\"}\n{\"action\": \"eval\", \"expression\": \"document.querySelectorAll('a').length\"}\n{\"action\": \"eval\", \"expression\": \"JSON.stringify(performance.timing)\"}\n\n\nParameters:\n\nexpression (required): JavaScript expression to evaluate. Must return a JSON-serializable value.\n\nResponse data: {\"result\": \"Example Domain\"} — the result is whatever the expression returns.\n\nWait Conditions\n\nUse these instead of sleep to wait for page content. They're more reliable because they wait for the exact condition rather than an arbitrary time.\n\nwait_for_element\n\nWaits for an element matching a CSS selector or XPath to reach a certain state (visible, hidden, attached to DOM, detached).\n\n{\"action\": \"wait_for_element\", \"selector\": \"#results\", \"timeout\": 10}\n{\"action\": \"wait_for_element\", \"selector\": \"xpath=//div[@class='loaded']\", \"timeout\": 15}\n{\"action\": \"wait_for_element\", \"selector\": \".spinner\", \"state\": \"hidden\", \"timeout\": 10}\n\n\nParameters:\n\nselector (required): CSS selector or XPath (prefix with xpath=).\nstate (optional, default \"visible\"): What state to wait for. Options: \"visible\" (rendered and not hidden), \"hidden\" (not visible), \"attached\" (in DOM regardless of visibility), \"detached\" (removed from DOM).\ntimeout (optional, default 30): Max wait time in seconds. Throws error if exceeded.\n\nResponse data: {\"selector\": \"#results\", \"state\": \"visible\"}\n\nwait_for_text\n\nWaits for specific text to appear anywhere in the page body.\n\n{\"action\": \"wait_for_text\", \"text\": \"Search results\", \"timeout\": 10}\n\n\nParameters:\n\ntext (required): Exact text to look for (substring match on document.body.innerText).\ntimeout (optional, default 30): Max wait time in seconds.\n\nResponse data: {\"text\": \"Search results\", \"found\": true}\n\nwait_for_url\n\nWaits for the page URL to match a pattern. Useful after form submissions or redirects.\n\n{\"action\": \"wait_for_url\", \"url\": \"**/dashboard\", \"timeout\": 10}\n{\"action\": \"wait_for_url\", \"url\": \"https://example.com/success*\", \"timeout\": 15}\n\n\nParameters:\n\nurl (required): URL pattern to match. Supports * (any chars except /) and ** (any chars including /) glob patterns. Can also be a full URL for exact match.\ntimeout (optional, default 30): Max wait time in seconds.\n\nResponse data: {\"url\": \"https://example.com/dashboard\"}\n\nwait_for_network_idle\n\nWaits until there are no network requests in flight for 500ms. Useful for pages that load content dynamically after the initial page load.\n\n{\"action\": \"wait_for_network_idle\", \"timeout\": 30}\n\n\nParameters:\n\ntimeout (optional, default 30): Max wait time in seconds.\n\nResponse data: {\"idle\": true}\n\nTab Management\n\nThe browser can have multiple tabs open. One tab is \"active\" at a time — all actions operate on the active tab.\n\nlist_tabs\n\nReturns all open tabs with their URLs and which one is active.\n\n{\"action\": \"list_tabs\"}\n\n\nResponse data:\n\n{\n  \"count\": 2,\n  \"tabs\": [\n    {\"index\": 0, \"url\": \"https://example.com/\", \"active\": false},\n    {\"index\": 1, \"url\": \"https://other.com/\", \"active\": true}\n  ]\n}\n\nnew_tab\n\nOpens a new browser tab. Optionally navigates it to a URL. The new tab becomes the active tab.\n\n{\"action\": \"new_tab\"}\n{\"action\": \"new_tab\", \"url\": \"https://example.com\"}\n\n\nParameters:\n\nurl (optional): URL to navigate to in the new tab.\nwait_until (optional, default \"domcontentloaded\"): Same as goto.\n\nResponse data: {\"index\": 1, \"url\": \"https://example.com/\"}\n\nswitch_tab\n\nSwitches the active tab by index (0-based). All subsequent actions will operate on this tab.\n\n{\"action\": \"switch_tab\", \"index\": 0}\n\n\nParameters:\n\nindex (required): Tab index from list_tabs.\n\nResponse data: {\"index\": 0, \"url\": \"https://example.com/\"}\n\nclose_tab\n\nCloses a tab. After closing, the last remaining tab becomes active.\n\n{\"action\": \"close_tab\"}\n{\"action\": \"close_tab\", \"index\": 1}\n\n\nParameters:\n\nindex (optional): Tab index to close. If omitted, closes the currently active tab.\n\nResponse data: {\"closed\": true, \"remaining\": 1}\n\nDialog Handling\n\nBrowsers have modal dialogs (alert, confirm, prompt). By default, dialogs are auto-accepted (clicks OK). Use handle_dialog if you need to dismiss a dialog or provide text for a prompt.\n\nhandle_dialog\n\nCall BEFORE the action that triggers the dialog if you want to dismiss it or provide prompt text. If you don't call this, the dialog is auto-accepted (clicks OK).\n\n{\"action\": \"handle_dialog\", \"accept\": true}\n{\"action\": \"handle_dialog\", \"accept\": false}\n{\"action\": \"handle_dialog\", \"accept\": true, \"text\": \"my response\"}\n\n\nParameters:\n\naccept (optional, default true): true clicks OK/Accept, false clicks Cancel/Dismiss.\ntext (optional): Response text for prompt dialogs. Ignored for alert/confirm.\n\nResponse data: {\"configured\": {\"accept\": true, \"text\": null}}\n\nExample — handling a confirm dialog:\n\n# Step 1: Tell the browser to accept the next dialog\ncurl -X POST $API -H 'Content-Type: application/json' -d '{\"action\": \"handle_dialog\", \"accept\": true}'\n# Step 2: Now click the button that triggers the confirm\ncurl -X POST $API -H 'Content-Type: application/json' -d '{\"action\": \"system_click\", \"x\": 300, \"y\": 200}'\n\nget_last_dialog\n\nReturns information about the most recent dialog that appeared.\n\n{\"action\": \"get_last_dialog\"}\n\n\nResponse data:\n\n{\n  \"dialog\": {\n    \"type\": \"confirm\",\n    \"message\": \"Are you sure you want to delete this?\",\n    \"default_value\": \"\",\n    \"buttons\": [\"ok\", \"cancel\"]\n  }\n}\n\n\nReturns {\"dialog\": null} if no dialog has appeared yet. The type field is one of: \"alert\", \"confirm\", \"prompt\", \"beforeunload\".\n\nCookies\nget_cookies\n\nReturns all cookies for the browser context, or cookies for specific URLs.\n\n{\"action\": \"get_cookies\"}\n{\"action\": \"get_cookies\", \"urls\": [\"https://example.com\"]}\n\n\nParameters:\n\nurls (optional): Array of URLs to filter cookies by. If omitted, returns all cookies.\n\nResponse data:\n\n{\n  \"count\": 3,\n  \"cookies\": [\n    {\"name\": \"session\", \"value\": \"abc123\", \"domain\": \".example.com\", \"path\": \"/\", \"httpOnly\": true, \"secure\": true, ...}\n  ]\n}\n\nset_cookie\n\nSets a cookie in the browser context.\n\n{\"action\": \"set_cookie\", \"name\": \"session\", \"value\": \"abc123\", \"url\": \"https://example.com\"}\n{\"action\": \"set_cookie\", \"name\": \"pref\", \"value\": \"dark\", \"domain\": \".example.com\", \"path\": \"/\", \"httpOnly\": false, \"secure\": true}\n\n\nParameters: Any standard cookie fields — name, value, url, domain, path, httpOnly, secure, sameSite, expires. At minimum you need name, value, and either url or domain.\n\nResponse data: {\"set\": \"session\"}\n\ndelete_cookies\n\nClears all cookies from the browser context.\n\n{\"action\": \"delete_cookies\"}\n\n\nResponse data: {\"cleared\": true}\n\nStorage\n\nAccess the page's localStorage and sessionStorage. These are per-origin — you must be on the right page for the storage to be accessible.\n\nget_storage\n\nReturns all items from localStorage or sessionStorage as a key-value object.\n\n{\"action\": \"get_storage\", \"type\": \"local\"}\n{\"action\": \"get_storage\", \"type\": \"session\"}\n\n\nParameters:\n\ntype (optional, default \"local\"): \"local\" for localStorage, \"session\" for sessionStorage.\n\nResponse data: {\"items\": {\"theme\": \"dark\", \"lang\": \"en\"}, \"type\": \"local\"}\n\nset_storage\n\nSets a single key-value pair in localStorage or sessionStorage.\n\n{\"action\": \"set_storage\", \"type\": \"local\", \"key\": \"theme\", \"value\": \"dark\"}\n\n\nParameters:\n\ntype (optional, default \"local\"): \"local\" or \"session\".\nkey (required): Storage key.\nvalue (required): Storage value (string).\n\nResponse data: {\"set\": \"theme\", \"type\": \"local\"}\n\nclear_storage\n\nClears all items from localStorage or sessionStorage.\n\n{\"action\": \"clear_storage\", \"type\": \"local\"}\n{\"action\": \"clear_storage\", \"type\": \"session\"}\n\n\nResponse data: {\"cleared\": \"local\"}\n\nDownloads\n\nThe browser automatically tracks file downloads triggered by page interactions (clicking download links, form submissions that return files, etc.).\n\nget_last_download\n\nReturns information about the most recently downloaded file.\n\n{\"action\": \"get_last_download\"}\n\n\nResponse data:\n\n{\n  \"download\": {\n    \"url\": \"https://example.com/file.pdf\",\n    \"filename\": \"file.pdf\",\n    \"path\": \"/tmp/playwright-downloads/abc123/file.pdf\"\n  }\n}\n\n\nReturns {\"download\": null} if nothing has been downloaded yet. The path is the local path inside the container where the file was saved. The filename is what the server suggested as the download name.\n\nUploads\nupload_file\n\nProgrammatically sets a file on an <input type=\"file\"> element without opening the OS file picker. The file must exist inside the container — use docker cp to copy files in if needed.\n\n{\"action\": \"upload_file\", \"selector\": \"#file-input\", \"file_path\": \"/tmp/document.pdf\"}\n\n\nParameters:\n\nselector (required): CSS selector of the file input element.\nfile_path (required): Absolute path to the file inside the container.\n\nResponse data: {\"selector\": \"#file-input\", \"file\": \"document.pdf\", \"size\": 12345}\n\nNote: After setting the file, you still need to submit the form (click the submit button) for the upload to actually happen.\n\nNetwork Logging\n\nCapture all HTTP requests and responses the page makes. Useful for debugging, finding API endpoints the page calls, or verifying that certain resources loaded.\n\nenable_network_log\n\nStarts recording all HTTP requests and responses from the active page.\n\n{\"action\": \"enable_network_log\"}\n\n\nResponse data: {\"enabled\": true}\n\ndisable_network_log\n\nStops recording network activity. Already-captured entries remain.\n\n{\"action\": \"disable_network_log\"}\n\n\nResponse data: {\"enabled\": false}\n\nget_network_log\n\nReturns all captured network entries since logging was enabled (or last cleared).\n\n{\"action\": \"get_network_log\"}\n\n\nResponse data:\n\n{\n  \"count\": 4,\n  \"log\": [\n    {\"type\": \"request\", \"url\": \"https://api.example.com/data\", \"method\": \"GET\", \"resource_type\": \"fetch\", \"timestamp\": 1234567890.123},\n    {\"type\": \"response\", \"url\": \"https://api.example.com/data\", \"status\": 200, \"timestamp\": 1234567890.456},\n    {\"type\": \"request\", \"url\": \"https://cdn.example.com/style.css\", \"method\": \"GET\", \"resource_type\": \"stylesheet\", \"timestamp\": 1234567890.789},\n    {\"type\": \"response\", \"url\": \"https://cdn.example.com/style.css\", \"status\": 200, \"timestamp\": 1234567890.999}\n  ]\n}\n\n\nEach entry is either a \"request\" or \"response\". Requests include method and resource_type (fetch, document, stylesheet, script, image, etc.). Responses include status code.\n\nclear_network_log\n\nDeletes all captured network entries but keeps logging enabled if it was on.\n\n{\"action\": \"clear_network_log\"}\n\n\nResponse data: {\"cleared\": true}\n\nScrolling\nscroll_to_bottom\n\nScrolls the entire page from top to bottom using JavaScript window.scrollBy(). Scrolls one viewport height at a time with a fixed delay between scrolls. When it reaches the bottom (scroll position stops changing), it scrolls back to the top. Useful for triggering lazy-loaded content.\n\n{\"action\": \"scroll_to_bottom\"}\n{\"action\": \"scroll_to_bottom\", \"delay\": 0.6}\n\n\nParameters:\n\ndelay (optional, default 0.4): Seconds to wait between each scroll step.\n\nResponse data: {\"scrolled\": \"bottom\"}\n\nscroll_to_bottom_humanized\n\nSame as scroll_to_bottom but uses real OS-level mouse wheel scrolling (via PyAutoGUI) with randomized scroll amounts and jittered delays to look like a human scrolling. Undetectable by behavioral analysis.\n\n{\"action\": \"scroll_to_bottom_humanized\"}\n{\"action\": \"scroll_to_bottom_humanized\", \"min_clicks\": 3, \"max_clicks\": 8, \"delay\": 0.7}\n\n\nParameters:\n\nmin_clicks (optional, default 2): Minimum mouse wheel clicks per scroll step.\nmax_clicks (optional, default 6): Maximum mouse wheel clicks per scroll step. A random value between min and max is chosen each time.\ndelay (optional, default 0.5): Base delay between scroll steps. Actual delay is jittered +-30%.\n\nResponse data: {\"scrolled\": \"bottom_humanized\"}\n\nDisplay\ncalibrate\n\nRecalculates the mapping between viewport coordinates (what get_interactive_elements returns) and screen coordinates (what PyAutoGUI uses). The browser has window chrome (title bar, address bar) that offsets the viewport from the screen origin.\n\n{\"action\": \"calibrate\"}\n\n\nResponse data: {\"window_offset\": {\"x\": 0, \"y\": 74}}\n\nWhen to call this: After entering/exiting fullscreen, after the browser window is resized, or if system_click coordinates seem off. The offset is auto-calculated at startup, so you rarely need this.\n\nget_resolution\n\nReturns the virtual display resolution (from the XVFB_RESOLUTION environment variable).\n\n{\"action\": \"get_resolution\"}\n\n\nResponse data: {\"width\": 1920, \"height\": 1080}\n\nenter_fullscreen / exit_fullscreen\n\nToggles browser fullscreen mode (hides address bar and window chrome). In fullscreen, the viewport takes up the entire screen, so coordinates map differently.\n\n{\"action\": \"enter_fullscreen\"}\n{\"action\": \"exit_fullscreen\"}\n\n\nResponse data: {\"fullscreen\": true, \"changed\": true} — changed is false if already in the requested state.\n\nImportant: Call calibrate after entering/exiting fullscreen to update the coordinate mapping.\n\nUtility\nping\n\nHealth check that returns the current page URL. Use to verify the API is responding and the browser is alive.\n\n{\"action\": \"ping\"}\n\n\nResponse data: {\"message\": \"pong\", \"url\": \"https://example.com/\"}\n\nsleep\n\nPauses execution for a specified duration. Prefer wait_for_element or wait_for_text when waiting for page content — use sleep only for fixed timing needs.\n\n{\"action\": \"sleep\", \"duration\": 2}\n\n\nParameters:\n\nduration (optional, default 1): Seconds to sleep.\n\nResponse data: {\"slept\": 2}\n\nclose\n\nShuts down the browser. The container will stop after this.\n\n{\"action\": \"close\"}\n\n\nResponse data: {\"message\": \"closing\"}\n\nState Endpoints (GET)\nGET /state\n\nReturns the current browser state.\n\ncurl -s \"$STEALTHY_AUTO_BROWSE_URL/state\"\n\n\nResponse:\n\n{\n  \"status\": \"ready\",\n  \"url\": \"https://example.com/\",\n  \"title\": \"Example Domain\",\n  \"window_offset\": {\"x\": 0, \"y\": 74}\n}\n\nGET /health\n\nSimple health check. Returns ok as plain text when the API is ready.\n\ncurl -s \"$STEALTHY_AUTO_BROWSE_URL/health\"\n\nContainer Options\n# Custom display resolution\ndocker run -d -p 8080:8080 -e XVFB_RESOLUTION=1280x720 psyb0t/stealthy-auto-browse\n\n# Match timezone to your IP's geographic location (important for stealth — mismatched\n# timezone is a common bot detection signal)\ndocker run -d -p 8080:8080 -e TZ=Europe/Bucharest psyb0t/stealthy-auto-browse\n\n# Route browser traffic through an HTTP proxy\ndocker run -d -p 8080:8080 -e PROXY_URL=http://user:pass@proxy:8888 psyb0t/stealthy-auto-browse\n\n# Persistent browser profile — cookies, sessions, and fingerprint survive container restarts\ndocker run -d -p 8080:8080 -v ./profile:/userdata psyb0t/stealthy-auto-browse\n\n# Open a URL automatically on startup\ndocker run -d -p 8080:8080 psyb0t/stealthy-auto-browse https://example.com\n\nPage Loaders (URL-Triggered Automation)\n\nPage loaders are like Greasemonkey/Tampermonkey userscripts but for the HTTP API. You define a set of actions that automatically run whenever the browser navigates to a matching URL. Instead of manually sending a sequence of commands every time you visit a site, you write it once as a YAML file and the container handles it.\n\nThis is useful for things like: removing cookie popups, dismissing overlays, waiting for dynamic content, cleaning up pages before scraping, or any repetitive setup you'd otherwise do manually every time.\n\nHow They Work\nYou create YAML files that define URL patterns and a list of steps\nMount those files into the container at /loaders\nWhenever goto navigates to a URL that matches a loader's pattern, the loader's steps run automatically instead of the default navigation\n\nThe steps are the exact same actions as the HTTP API. Every action you can send via POST / (goto, eval, click, system_click, sleep, scroll, wait_for_element, etc.) works as a loader step. Same names, same parameters.\n\nSetup\ndocker run -d -p 8080:8080 -p 5900:5900 \\\n  -v ./my-loaders:/loaders \\\n  psyb0t/stealthy-auto-browse\n\nLoader Format\nname: Human-readable name for this loader\nmatch:\n  domain: example.com         # Exact hostname match (www. is stripped automatically)\n  path_prefix: /articles      # URL path must start with this\n  regex: \"article/\\\\d+\"       # Full URL must match this regex\nsteps:\n  - action: goto              # Same actions as the HTTP API\n    url: \"${url}\"             # ${url} is replaced with the original URL\n    wait_until: networkidle\n  - action: eval\n    expression: \"document.querySelector('.cookie-banner')?.remove()\"\n  - action: wait_for_element\n    selector: \"#main-content\"\n    timeout: 10\n\nMatch Rules\n\nAll match fields are optional, but at least one is required. If you specify multiple fields, all of them must match for the loader to trigger:\n\ndomain: Exact hostname. www. is stripped from both sides before comparing, so domain: example.com matches www.example.com too.\npath_prefix: The URL path must start with this string. path_prefix: /blog matches /blog, /blog/post-1, /blog/archive, etc.\nregex: The full URL is tested against this regular expression.\nThe ${url} Placeholder\n\nIn any string value within a step, ${url} is replaced with the original URL that was passed to goto. This lets you navigate to the URL with custom wait settings, or pass it to JavaScript:\n\nsteps:\n  - action: goto\n    url: \"${url}\"\n    wait_until: networkidle\n  - action: eval\n    expression: \"console.log('Loaded:', '${url}')\"\n\nPractical Example: Clean Scraping\n\nSay you're scraping a news site that has cookie popups, newsletter modals, and lazy-loaded content. Without a loader, you'd send 5+ commands after every goto. With a loader:\n\n# loaders/news_site.yaml\nname: News Site Cleanup\nmatch:\n  domain: news-site.com\nsteps:\n  # Navigate with full network wait so everything loads\n  - action: goto\n    url: \"${url}\"\n    wait_until: networkidle\n\n  # Wait for the main content to be there\n  - action: wait_for_element\n    selector: \"article\"\n    timeout: 10\n\n  # Kill the cookie popup\n  - action: eval\n    expression: \"document.querySelector('.cookie-consent')?.remove()\"\n\n  # Kill the newsletter modal\n  - action: eval\n    expression: \"document.querySelector('.newsletter-overlay')?.remove()\"\n\n  # Scroll to trigger lazy-loaded images\n  - action: scroll_to_bottom\n    delay: 0.3\n\n  # Small pause for everything to settle\n  - action: sleep\n    duration: 1\n\n\nNow when you goto any URL on news-site.com, all of this happens automatically. Your response includes \"loader\": \"News Site Cleanup\" so you know it triggered.\n\nResponse When a Loader Triggers\n{\n  \"success\": true,\n  \"data\": {\n    \"loader\": \"News Site Cleanup\",\n    \"steps_executed\": 6,\n    \"last_result\": { \"success\": true, \"timestamp\": 1234567890.456, \"data\": { \"slept\": 1 } }\n  }\n}\n\nPre-installed Extensions\n\nThe browser comes with these extensions pre-installed:\n\nuBlock Origin: Ad and tracker blocking\nLocalCDN: Serves common CDN resources locally to prevent tracking\nClearURLs: Strips tracking parameters from URLs\nConsent-O-Matic: Automatically handles cookie consent popups (clicks \"reject all\" or minimal consent)\nExample: Full Login Flow (Undetectable)\nAPI=$STEALTHY_AUTO_BROWSE_URL\n\n# Navigate to login page\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"goto\", \"url\": \"https://example.com/login\"}'\n\n# See what's on the page\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"get_text\"}'\n\n# Find all interactive elements and their coordinates\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"get_interactive_elements\"}'\n\n# Click the email field (coordinates from get_interactive_elements)\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"system_click\", \"x\": 400, \"y\": 200}'\n\n# Type email with human-like keystrokes\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"system_type\", \"text\": \"user@example.com\"}'\n\n# Tab to password field\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"send_key\", \"key\": \"tab\"}'\n\n# Type password\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"system_type\", \"text\": \"secretpassword\"}'\n\n# Press Enter to submit\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"send_key\", \"key\": \"enter\"}'\n\n# Wait for redirect to dashboard\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"wait_for_url\", \"url\": \"**/dashboard\", \"timeout\": 15}'\n\n# Verify we're logged in\ncurl -s -X POST $API -H 'Content-Type: application/json' \\\n  -d '{\"action\": \"get_text\"}'\n\nTips\nAlways call get_interactive_elements before clicking — don't guess coordinates\nUse system methods for stealth — system_click, system_type, send_key are undetectable\nUse get_text first, screenshots second — text is faster and smaller\nMatch TZ to your IP location — timezone mismatch is a common bot detection signal\nResize screenshots with ?whLargest=512 — full resolution is unnecessarily large\nMount /userdata for persistent sessions — cookies, fingerprint, and profile survive restarts\nUse wait conditions instead of sleep — wait_for_element, wait_for_text, wait_for_url\nCall handle_dialog BEFORE the action that triggers it — if you need to dismiss or provide prompt text (dialogs are auto-accepted otherwise)\nCall calibrate after fullscreen changes — coordinate mapping shifts\nAdd slight delays between actions for realism — sleep with 0.5-1.5s between clicks looks more human"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/psyb0t/stealthy-auto-browse",
    "publisherUrl": "https://clawhub.ai/psyb0t/stealthy-auto-browse",
    "owner": "psyb0t",
    "version": "1.3.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/stealthy-auto-browse",
    "downloadUrl": "https://openagent3.xyz/downloads/stealthy-auto-browse",
    "agentUrl": "https://openagent3.xyz/skills/stealthy-auto-browse/agent",
    "manifestUrl": "https://openagent3.xyz/skills/stealthy-auto-browse/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/stealthy-auto-browse/agent.md"
  }
}