{
  "schemaVersion": "1.0",
  "item": {
    "slug": "agent-takeover",
    "name": "Clawfinger Agent Takeover",
    "source": "tencent",
    "type": "skill",
    "category": "AI 智能",
    "sourceUrl": "https://clawhub.ai/TracSystems/agent-takeover",
    "canonicalUrl": "https://clawhub.ai/TracSystems/agent-takeover",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/agent-takeover",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=agent-takeover",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/agent-takeover"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/agent-takeover",
    "agentPageUrl": "https://openagent3.xyz/skills/agent-takeover/agent",
    "manifestUrl": "https://openagent3.xyz/skills/agent-takeover/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/agent-takeover/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Agent Takeover — Full Lifecycle Guide",
        "body": "How an external agent (OpenClaw plugin, custom script, or any WebSocket client) takes control of a live phone call, handles conversation turns directly, and hands back to the local LLM."
      },
      {
        "title": "Architecture Overview",
        "body": "Caller  <-->  Phone App  <-->  Gateway /api/turn  <-->  Local LLM\n                                      |\n                                      +-- (takeover) --> Agent WS\n\nNormal flow: phone sends audio to /api/turn, gateway runs ASR → LLM → TTS, returns audio.\n\nTakeover flow: after takeover, gateway sends turn.request to the agent WebSocket instead of calling the local LLM. The agent replies with text, gateway runs TTS, returns audio to phone."
      },
      {
        "title": "WebSocket (primary — full bidirectional control)",
        "body": "WS /api/agent/ws — No authentication required on the WebSocket itself. Connects, receives all bus events, and sends commands.\n\nSend (agent → gateway)FieldsDescriptiondialnumberDial outbound call via ADBinjecttext, session_idQueue TTS message for next turn polltakeoversession_idTake over LLM for this sessionreleasesession_idHand back to local LLMhangupsession_id (optional)Force hang up call + end sessionget_call_statesession_idQuery conversation history and stateend_sessionsession_idMark session ended without phone hangupinject_contextsession_id, contextPush knowledge into LLM contextclear_contextsession_idRemove injected knowledgeping—Heartbeat\n\nReceive (gateway → agent)FieldsDescriptiondial.ackok, detailDial resulttakeover.ackok, session_idTakeover confirmedrelease.ackok, session_idRelease confirmedhangup.ackok, detail, session_idHangup resultturn.requestsession_id, transcript, request_idTakeover only — caller spoke, agent must replyturn.startedsession_idTurn processing beganturn.transcripttranscriptASR resultturn.replyreplyLLM/agent reply textturn.completemetrics, transcript, reply, modelTurn finishedsession.endedsession_idSession ended (stale sweep, hangup, or explicit end)"
      },
      {
        "title": "REST (alternative — no persistent connection needed)",
        "body": "MethodPathPurposePOST/api/call/dial{\"number\": \"+49...\"} — dial via ADBPOST/api/call/hangup{\"session_id\": \"...\"} — force hangupPOST/api/call/inject{\"text\": \"...\", \"session_id\": \"...\"} — inject TTSGET/api/agent/sessionsList active session IDsGET/api/agent/call/{sid}Full call state (history, instructions, takeover)POST/api/agent/context/{sid}{\"context\": \"...\"} — inject knowledge\n\nREST cannot do takeover. Takeover requires the WebSocket for real-time turn.request / reply exchange. REST is fine for dial, inject, hangup, and observation."
      },
      {
        "title": "Takeover Turn Protocol",
        "body": "During takeover, the gateway replaces the local LLM with the agent for response generation:\n\nPhone → /api/turn (audio) → Gateway ASR → transcript\n                                              ↓\n                           Gateway sends to Agent WS:\n                           {\"type\": \"turn.request\",\n                            \"session_id\": \"abc123\",\n                            \"transcript\": \"what caller said\",\n                            \"request_id\": \"unique-id\"}\n                                              ↓\n                           Agent replies on same WS:\n                           {\"reply\": \"agent's response\",\n                            \"request_id\": \"unique-id\"}\n                                              ↓\n                           Gateway TTS → audio → Phone"
      },
      {
        "title": "Critical: request_id correlation",
        "body": "The agent must echo back the request_id from the turn.request. Without it, the gateway cannot match the reply to the pending turn and the request times out.\n\n// Gateway sends:\n{\"type\": \"turn.request\", \"session_id\": \"abc\", \"transcript\": \"hello\", \"request_id\": \"a1b2c3\"}\n\n// Agent must reply:\n{\"reply\": \"Hi there!\", \"request_id\": \"a1b2c3\"}\n\nNo type field needed in the reply — just reply + request_id."
      },
      {
        "title": "Timeout and fallback",
        "body": "If the agent doesn't reply within the timeout (default 60s, configurable via agent_takeover_timeout in config), the gateway falls back to the local LLM for that single turn. The takeover remains active — the next turn will try the agent again."
      },
      {
        "title": "Timing Model",
        "body": "Understanding timing is critical for a smooth takeover experience."
      },
      {
        "title": "Phone polling cadence",
        "body": "The phone app polls /api/turn in a tight loop:\n\nRecord audio chunk (~2-5s of speech)\nPOST to /api/turn\nWait for response (ASR + LLM/agent + TTS)\nPlay response audio\nGo to step 1\n\nThe phone does NOT poll on a fixed interval — it sends the next turn as soon as playback finishes and new audio is captured. Typical turn cycle: 3-8 seconds."
      },
      {
        "title": "Inject timing",
        "body": "inject queues a pre-synthesized TTS message. It's delivered on the next /api/turn poll, before ASR/LLM processing:\n\nAgent injects \"Hello!\" at T=0\n  ↓\nPhone polls /api/turn at T=3  (next natural poll)\n  ↓\nGateway sees pending inject → returns inject audio immediately (skips ASR/LLM)\n  ↓\nPhone plays \"Hello!\" → polls again\n\nKey implications:\n\nInject is NOT instant — there's a delay of up to one poll cycle (3-8s)\nDuring takeover, the phone is usually waiting for the agent's reply, so the next poll happens quickly after the agent responds\nMultiple injects queue up — each delivered on successive polls\nInject skips ASR entirely — the phone's recorded audio is ignored for that poll"
      },
      {
        "title": "Takeover timing",
        "body": "T=0    Agent sends {\"type\": \"takeover\", \"session_id\": \"...\"}\nT=0    Gateway immediately routes future turns to agent\nT=0    Agent gets {\"type\": \"takeover.ack\", \"ok\": true}\nT=3-8  Phone polls /api/turn → gateway ASR → turn.request sent to agent\nT=3-8  Agent replies → gateway TTS → phone plays agent's response\n\nTakeover takes effect instantly on the gateway side. The first turn.request arrives on the next phone poll."
      },
      {
        "title": "Release timing",
        "body": "T=0    Agent sends {\"type\": \"release\", \"session_id\": \"...\"}\nT=0    Gateway removes takeover → local LLM handles future turns\nT=0    Agent gets {\"type\": \"release.ack\", \"ok\": true}\nT=3-8  Phone polls /api/turn → local LLM responds (no more turn.requests to agent)\n\nRelease is also instant. The agent continues receiving bus events (turn.complete, etc.) but no longer gets turn.request messages."
      },
      {
        "title": "Inject + Takeover ordering",
        "body": "When you inject a greeting AND takeover in quick succession:\n\nT=0.0  Agent injects greeting text\nT=0.5  Agent sends takeover\nT=3    Phone polls → gets inject (greeting plays) — takeover is active but no turn.request yet\nT=8    Phone polls again → NOW it's a takeover turn → turn.request sent to agent\n\nThe inject is consumed first (it takes priority in the turn endpoint), then takeover kicks in on the subsequent poll. This is the correct order for \"inject greeting then take over.\""
      },
      {
        "title": "Dial-to-first-turn latency",
        "body": "T=0     Agent sends dial command\nT=0     ADB broadcast sent to phone\nT=1-3   Phone initiates outbound call\nT=5-30  Callee picks up (depends on the person)\nT=+1    Phone detects call connected, sends first /api/turn (greeting)\nT=+2    Gateway processes greeting (forced_reply → TTS only, no ASR/LLM)\nT=+5    Greeting plays, phone captures first real audio\nT=+8    First real turn arrives at gateway\n\nTotal dial-to-first-real-turn: 10-40 seconds depending on pickup time."
      },
      {
        "title": "Session Lifecycle",
        "body": "Sessions have a TTL of 60 seconds of inactivity (configurable via session_ttl). The phone polls every 3-8s during a call, so active calls never hit the TTL. But if:\n\nThe phone crashes or loses USB connection\nThe caller hangs up and the phone doesn't send /api/session/end\n\n...the session auto-ends after 60s of no polls.\n\nSession detection after dial: After dialing, the agent needs the session ID. Options:\n\nWatch bus events — a turn.started event with session_id arrives when the call connects\nPoll GET /api/agent/sessions — check for new session IDs\nSend get_call_state — if you know the session ID\n\nOption 1 (events) is most reliable and fastest."
      },
      {
        "title": "Complete Takeover Lifecycle",
        "body": "1. CONNECT     ws://gateway:8996/api/agent/ws\n2. DIAL        {\"type\": \"dial\", \"number\": \"+49...\"}\n   WAIT        for dial.ack (ok: true)\n3. DISCOVER    watch events for session_id (turn.started or session.started)\n4. INJECT      {\"type\": \"inject\", \"session_id\": \"...\", \"text\": \"Custom greeting\"}\n5. TAKEOVER    {\"type\": \"takeover\", \"session_id\": \"...\"}\n   WAIT        for takeover.ack (ok: true)\n6. HANDLE      receive turn.request → reply with {reply, request_id}\n   REPEAT      for N turns\n7. RELEASE     {\"type\": \"release\", \"session_id\": \"...\"}\n   WAIT        for release.ack (ok: true)\n8. OBSERVE     watch turn.complete events → model != \"agent\" confirms local LLM resumed\n9. HANGUP      {\"type\": \"hangup\", \"session_id\": \"...\"} (optional — end call)"
      },
      {
        "title": "Error Handling",
        "body": "ScenarioWhat happensAgent WS disconnects during takeoverAll takeovers auto-released, local LLM resumesAgent doesn't reply within 60sThat turn falls back to local LLM; takeover stays active for next turnSession ends during takeoversession.ended event; further turn.requests stopMultiple agents take over same sessionLast takeover wins (overwrites previous)Takeover of non-existent sessiontakeover.ack with ok: falseRelease without takeoverrelease.ack with ok: false"
      },
      {
        "title": "OpenClaw Plugin (Clawfinger)",
        "body": "If using the Clawfinger OpenClaw plugin, you don't need raw WebSocket code. The plugin tools map directly:\n\nLifecycle stepPlugin toolDialclawfinger_dialCheck sessionsclawfinger_sessionsInspect stateclawfinger_call_stateInject greetingclawfinger_injectTake overclawfinger_takeoverWait for callerclawfinger_turn_wait — blocks until caller speaks, returns transcript + request_idReply to callerclawfinger_turn_reply — send response text with the request_idReleaseclawfinger_releaseHang upclawfinger_hangup\n\nTakeover tool workflow:\n\nclawfinger_takeover(session_id)     → \"Takeover active.\"\nclawfinger_turn_wait()              → {transcript: \"...\", request_id: \"abc123\"}\nclawfinger_turn_reply(request_id, reply)  → \"Reply sent.\"\nclawfinger_turn_wait()              → next turn...\nclawfinger_turn_reply(...)          → ...\nclawfinger_release(session_id)      → \"Released.\"\n\nSlash commands: /clawfinger dial +49..., /clawfinger takeover <sid>, /clawfinger release <sid>, /clawfinger hangup."
      },
      {
        "title": "Test Case: Human-Guided Takeover (3 turns)",
        "body": "A complete test walkthrough. Requires: gateway running, phone connected via ADB, a real phone number to call."
      },
      {
        "title": "Prerequisites",
        "body": "Gateway running on 127.0.0.1:8996\nPhone connected via ADB (adb devices shows device)\nADB reverse active (adb reverse tcp:8996 tcp:8996)\nwebsockets Python package installed (pip install websockets)"
      },
      {
        "title": "Test Script",
        "body": "Save as test_takeover.py in the gateway directory:\n\n#!/usr/bin/env python3\n\"\"\"Live takeover test: dial, inject greeting, handle 3 turns, release.\"\"\"\n\nimport asyncio\nimport json\nimport time\n\nimport websockets\n\nGW = \"ws://127.0.0.1:8996/api/agent/ws\"\nDIAL_NUMBER = \"+49123456789\"  # <-- change to your number\nGREETING = (\n    \"Hello! This is a test from the agent takeover system. \"\n    \"I am now controlling this call. Please say something \"\n    \"and I will respond for three turns, then hand back to the local assistant.\"\n)\nREPLIES = [\n    \"That's interesting! I heard you clearly. This is turn one of three. Say something else.\",\n    \"Got it! That was turn two. One more turn, then I hand back to the local assistant.\",\n    \"Perfect, that was turn three! Releasing control now. You should notice a change. Goodbye from the agent!\",\n]\n\n\nasync def main():\n    print(f\"[*] Connecting to agent WS: {GW}\")\n    async with websockets.connect(GW) as ws:\n        print(\"[+] Connected\")\n\n        # Helper: drain pending events\n        async def drain(timeout=1.0):\n            events = []\n            try:\n                while True:\n                    raw = await asyncio.wait_for(ws.recv(), timeout=timeout)\n                    ev = json.loads(raw)\n                    events.append(ev)\n                    print(f\"    [event] {ev.get('type', '?')}\")\n            except asyncio.TimeoutError:\n                pass\n            return events\n\n        await drain(0.5)\n\n        # 1. Dial\n        print(f\"\\n[1] Dialing {DIAL_NUMBER}...\")\n        await ws.send(json.dumps({\"type\": \"dial\", \"number\": DIAL_NUMBER}))\n        while True:\n            ev = json.loads(await asyncio.wait_for(ws.recv(), timeout=15))\n            print(f\"    [event] {ev.get('type')}\")\n            if ev.get(\"type\") == \"dial.ack\":\n                if not ev.get(\"ok\"):\n                    print(f\"    FAIL: {ev.get('detail')}\")\n                    return\n                print(\"    OK: dial succeeded\")\n                break\n\n        # 2. Wait for session\n        print(\"\\n[2] Waiting for call pickup (up to 60s)...\")\n        session_id = None\n        start = time.time()\n        while time.time() - start < 60:\n            try:\n                ev = json.loads(await asyncio.wait_for(ws.recv(), timeout=2))\n                print(f\"    [event] {ev.get('type')}\")\n                if ev.get(\"session_id\"):\n                    session_id = ev[\"session_id\"]\n                    print(f\"    OK: session = {session_id}\")\n                    break\n            except asyncio.TimeoutError:\n                pass\n        if not session_id:\n            events = await drain(5.0)\n            for ev in events:\n                if ev.get(\"session_id\"):\n                    session_id = ev[\"session_id\"]\n                    break\n        if not session_id:\n            print(\"    FAIL: no session appeared\")\n            return\n\n        # 3. Inject greeting\n        print(f\"\\n[3] Injecting greeting...\")\n        await ws.send(json.dumps({\n            \"type\": \"inject\", \"session_id\": session_id, \"text\": GREETING,\n        }))\n        print(\"    OK: greeting queued (delivers on next turn poll)\")\n        await asyncio.sleep(0.5)\n\n        # 4. Takeover\n        print(f\"\\n[4] Taking over session {session_id[:12]}...\")\n        await ws.send(json.dumps({\"type\": \"takeover\", \"session_id\": session_id}))\n        while True:\n            ev = json.loads(await asyncio.wait_for(ws.recv(), timeout=10))\n            print(f\"    [event] {ev.get('type')}\")\n            if ev.get(\"type\") == \"takeover.ack\":\n                print(f\"    OK: takeover {'succeeded' if ev.get('ok') else 'FAILED'}\")\n                if not ev.get(\"ok\"):\n                    return\n                break\n\n        # 5. Handle 3 turns\n        print(f\"\\n[5] Handling {len(REPLIES)} turns...\")\n        turns = 0\n        while turns < len(REPLIES):\n            try:\n                ev = json.loads(await asyncio.wait_for(ws.recv(), timeout=90))\n                if ev.get(\"type\") == \"turn.request\":\n                    turns += 1\n                    rid = ev.get(\"request_id\", \"\")\n                    print(f\"\\n    Turn {turns}/{len(REPLIES)}\")\n                    print(f\"      Caller: {ev.get('transcript', '')!r}\")\n                    print(f\"      Reply:  {REPLIES[turns-1]!r}\")\n                    await ws.send(json.dumps({\n                        \"reply\": REPLIES[turns - 1], \"request_id\": rid,\n                    }))\n                    print(f\"      Sent (request_id={rid[:8]}...)\")\n                else:\n                    print(f\"    [event] {ev.get('type')}\")\n            except asyncio.TimeoutError:\n                print(f\"    TIMEOUT after {turns} turns\")\n                break\n\n        # 6. Release\n        print(f\"\\n[6] Releasing takeover...\")\n        await ws.send(json.dumps({\"type\": \"release\", \"session_id\": session_id}))\n        while True:\n            ev = json.loads(await asyncio.wait_for(ws.recv(), timeout=10))\n            print(f\"    [event] {ev.get('type')}\")\n            if ev.get(\"type\") == \"release.ack\":\n                print(f\"    OK: released, local LLM resumes\")\n                break\n\n        # 7. Observe post-release\n        print(f\"\\n[7] Observing post-release (30s)...\")\n        t0 = time.time()\n        while time.time() - t0 < 30:\n            try:\n                ev = json.loads(await asyncio.wait_for(ws.recv(), timeout=5))\n                etype = ev.get(\"type\", \"\")\n                if etype == \"turn.complete\":\n                    model = ev.get(\"model\", ev.get(\"metrics\", {}).get(\"llm_model\", \"\"))\n                    print(f\"    [turn] model={model} reply={ev.get('reply', '')[:60]!r}\")\n                elif etype == \"session.ended\":\n                    print(f\"    [session ended]\")\n                    break\n                else:\n                    print(f\"    [event] {etype}\")\n            except asyncio.TimeoutError:\n                print(\"    (quiet)\")\n\n        print(\"\\n[*] Test complete!\")\n\n\nif __name__ == \"__main__\":\n    try:\n        asyncio.run(main())\n    except KeyboardInterrupt:\n        print(\"\\n[*] Interrupted\")\n    except Exception as e:\n        print(f\"\\n[!] Error: {e}\")\n        raise"
      },
      {
        "title": "Running the Test",
        "body": "cd /path/to/gateway\npython3 test_takeover.py"
      },
      {
        "title": "Expected Output",
        "body": "[*] Connecting to agent WS: ws://127.0.0.1:8996/api/agent/ws\n[+] Connected\n    [event] agent.connected\n\n[1] Dialing +49123456789...\n    [event] dial.ack\n    OK: dial succeeded\n\n[2] Waiting for call pickup (up to 60s)...\n    [event] call.dial\n    [event] turn.started\n    OK: session = d56a80fc30dc42b5ab2cecd2484ff847\n\n[3] Injecting greeting...\n    OK: greeting queued (delivers on next turn poll)\n\n[4] Taking over session d56a80fc30dc...\n    [event] turn.reply\n    [event] turn.complete\n    [event] agent.inject\n    [event] agent.takeover\n    [event] takeover.ack\n    OK: takeover succeeded\n\n[5] Handling 3 turns...\n    [event] turn.started\n    [event] turn.reply          <-- this is the injected greeting being delivered\n    [event] turn.complete\n    [event] turn.started\n    [event] turn.transcript\n\n    Turn 1/3\n      Caller: \"Okay, I'm saying something.\"\n      Reply:  \"That's interesting! I heard you clearly. ...\"\n      Sent (request_id=36fcc0e4...)\n    [event] turn.reply\n    [event] turn.complete\n    [event] turn.started\n    [event] turn.transcript\n\n    Turn 2/3\n      Caller: \"Okay, I'm saying something else.\"\n      Reply:  \"Got it! That was turn two. ...\"\n      Sent (request_id=3582e133...)\n\n    ...turn 3 similar...\n\n[6] Releasing takeover...\n    [event] agent.release\n    [event] release.ack\n    OK: released, local LLM resumes\n\n[7] Observing post-release (30s)...\n    [event] turn.reply\n    [turn] model=local/... reply='Hello! How can I help you?'\n\n[*] Test complete!"
      },
      {
        "title": "What to Verify (Human)",
        "body": "Greeting: You hear the custom greeting text spoken by the TTS voice\nTurns 1-3: You speak, and the canned agent replies play back (not the local LLM's responses)\nPost-release: After turn 3, the next time you speak, the response is clearly different — it's the local LLM's personality/style, not the canned replies\nLatency: Each turn should complete in 2-8 seconds (ASR + TTS, no LLM inference during takeover)\nNo errors: No timeouts, no dropped turns, no silence gaps"
      },
      {
        "title": "Troubleshooting",
        "body": "SymptomCauseFixdial.ack ok but no session appearsPhone not picking up (DIALER role lost, app not running)Check DIALER role, restart appturn.request never arrivesTakeover didn't register, or phone isn't pollingCheck takeover.ack was ok: true; check phone logsGreeting doesn't playInject arrived after takeover consumed the pollInject BEFORE takeover, add 0.5s delayAgent reply not heardMissing request_id in replyAlways echo back the exact request_id60s timeout on turnAgent reply too slowCheck network, agent processing timeLocal LLM doesn't resume after releaseRelease failedCheck release.ack was ok: truePhone stops picking up callsDIALER role reset (reboot) or USB issueRun post-reboot recovery"
      }
    ],
    "body": "Agent Takeover — Full Lifecycle Guide\n\nHow an external agent (OpenClaw plugin, custom script, or any WebSocket client) takes control of a live phone call, handles conversation turns directly, and hands back to the local LLM.\n\nArchitecture Overview\nCaller  <-->  Phone App  <-->  Gateway /api/turn  <-->  Local LLM\n                                      |\n                                      +-- (takeover) --> Agent WS\n\n\nNormal flow: phone sends audio to /api/turn, gateway runs ASR → LLM → TTS, returns audio.\n\nTakeover flow: after takeover, gateway sends turn.request to the agent WebSocket instead of calling the local LLM. The agent replies with text, gateway runs TTS, returns audio to phone.\n\nEndpoints Used\nWebSocket (primary — full bidirectional control)\n\nWS /api/agent/ws — No authentication required on the WebSocket itself. Connects, receives all bus events, and sends commands.\n\nSend (agent → gateway)\tFields\tDescription\ndial\tnumber\tDial outbound call via ADB\ninject\ttext, session_id\tQueue TTS message for next turn poll\ntakeover\tsession_id\tTake over LLM for this session\nrelease\tsession_id\tHand back to local LLM\nhangup\tsession_id (optional)\tForce hang up call + end session\nget_call_state\tsession_id\tQuery conversation history and state\nend_session\tsession_id\tMark session ended without phone hangup\ninject_context\tsession_id, context\tPush knowledge into LLM context\nclear_context\tsession_id\tRemove injected knowledge\nping\t—\tHeartbeat\nReceive (gateway → agent)\tFields\tDescription\ndial.ack\tok, detail\tDial result\ntakeover.ack\tok, session_id\tTakeover confirmed\nrelease.ack\tok, session_id\tRelease confirmed\nhangup.ack\tok, detail, session_id\tHangup result\nturn.request\tsession_id, transcript, request_id\tTakeover only — caller spoke, agent must reply\nturn.started\tsession_id\tTurn processing began\nturn.transcript\ttranscript\tASR result\nturn.reply\treply\tLLM/agent reply text\nturn.complete\tmetrics, transcript, reply, model\tTurn finished\nsession.ended\tsession_id\tSession ended (stale sweep, hangup, or explicit end)\nREST (alternative — no persistent connection needed)\nMethod\tPath\tPurpose\nPOST\t/api/call/dial\t{\"number\": \"+49...\"} — dial via ADB\nPOST\t/api/call/hangup\t{\"session_id\": \"...\"} — force hangup\nPOST\t/api/call/inject\t{\"text\": \"...\", \"session_id\": \"...\"} — inject TTS\nGET\t/api/agent/sessions\tList active session IDs\nGET\t/api/agent/call/{sid}\tFull call state (history, instructions, takeover)\nPOST\t/api/agent/context/{sid}\t{\"context\": \"...\"} — inject knowledge\n\nREST cannot do takeover. Takeover requires the WebSocket for real-time turn.request / reply exchange. REST is fine for dial, inject, hangup, and observation.\n\nTakeover Turn Protocol\n\nDuring takeover, the gateway replaces the local LLM with the agent for response generation:\n\nPhone → /api/turn (audio) → Gateway ASR → transcript\n                                              ↓\n                           Gateway sends to Agent WS:\n                           {\"type\": \"turn.request\",\n                            \"session_id\": \"abc123\",\n                            \"transcript\": \"what caller said\",\n                            \"request_id\": \"unique-id\"}\n                                              ↓\n                           Agent replies on same WS:\n                           {\"reply\": \"agent's response\",\n                            \"request_id\": \"unique-id\"}\n                                              ↓\n                           Gateway TTS → audio → Phone\n\nCritical: request_id correlation\n\nThe agent must echo back the request_id from the turn.request. Without it, the gateway cannot match the reply to the pending turn and the request times out.\n\n// Gateway sends:\n{\"type\": \"turn.request\", \"session_id\": \"abc\", \"transcript\": \"hello\", \"request_id\": \"a1b2c3\"}\n\n// Agent must reply:\n{\"reply\": \"Hi there!\", \"request_id\": \"a1b2c3\"}\n\n\nNo type field needed in the reply — just reply + request_id.\n\nTimeout and fallback\n\nIf the agent doesn't reply within the timeout (default 60s, configurable via agent_takeover_timeout in config), the gateway falls back to the local LLM for that single turn. The takeover remains active — the next turn will try the agent again.\n\nTiming Model\n\nUnderstanding timing is critical for a smooth takeover experience.\n\nPhone polling cadence\n\nThe phone app polls /api/turn in a tight loop:\n\nRecord audio chunk (~2-5s of speech)\nPOST to /api/turn\nWait for response (ASR + LLM/agent + TTS)\nPlay response audio\nGo to step 1\n\nThe phone does NOT poll on a fixed interval — it sends the next turn as soon as playback finishes and new audio is captured. Typical turn cycle: 3-8 seconds.\n\nInject timing\n\ninject queues a pre-synthesized TTS message. It's delivered on the next /api/turn poll, before ASR/LLM processing:\n\nAgent injects \"Hello!\" at T=0\n  ↓\nPhone polls /api/turn at T=3  (next natural poll)\n  ↓\nGateway sees pending inject → returns inject audio immediately (skips ASR/LLM)\n  ↓\nPhone plays \"Hello!\" → polls again\n\n\nKey implications:\n\nInject is NOT instant — there's a delay of up to one poll cycle (3-8s)\nDuring takeover, the phone is usually waiting for the agent's reply, so the next poll happens quickly after the agent responds\nMultiple injects queue up — each delivered on successive polls\nInject skips ASR entirely — the phone's recorded audio is ignored for that poll\nTakeover timing\nT=0    Agent sends {\"type\": \"takeover\", \"session_id\": \"...\"}\nT=0    Gateway immediately routes future turns to agent\nT=0    Agent gets {\"type\": \"takeover.ack\", \"ok\": true}\nT=3-8  Phone polls /api/turn → gateway ASR → turn.request sent to agent\nT=3-8  Agent replies → gateway TTS → phone plays agent's response\n\n\nTakeover takes effect instantly on the gateway side. The first turn.request arrives on the next phone poll.\n\nRelease timing\nT=0    Agent sends {\"type\": \"release\", \"session_id\": \"...\"}\nT=0    Gateway removes takeover → local LLM handles future turns\nT=0    Agent gets {\"type\": \"release.ack\", \"ok\": true}\nT=3-8  Phone polls /api/turn → local LLM responds (no more turn.requests to agent)\n\n\nRelease is also instant. The agent continues receiving bus events (turn.complete, etc.) but no longer gets turn.request messages.\n\nInject + Takeover ordering\n\nWhen you inject a greeting AND takeover in quick succession:\n\nT=0.0  Agent injects greeting text\nT=0.5  Agent sends takeover\nT=3    Phone polls → gets inject (greeting plays) — takeover is active but no turn.request yet\nT=8    Phone polls again → NOW it's a takeover turn → turn.request sent to agent\n\n\nThe inject is consumed first (it takes priority in the turn endpoint), then takeover kicks in on the subsequent poll. This is the correct order for \"inject greeting then take over.\"\n\nDial-to-first-turn latency\nT=0     Agent sends dial command\nT=0     ADB broadcast sent to phone\nT=1-3   Phone initiates outbound call\nT=5-30  Callee picks up (depends on the person)\nT=+1    Phone detects call connected, sends first /api/turn (greeting)\nT=+2    Gateway processes greeting (forced_reply → TTS only, no ASR/LLM)\nT=+5    Greeting plays, phone captures first real audio\nT=+8    First real turn arrives at gateway\n\n\nTotal dial-to-first-real-turn: 10-40 seconds depending on pickup time.\n\nSession Lifecycle\n\nSessions have a TTL of 60 seconds of inactivity (configurable via session_ttl). The phone polls every 3-8s during a call, so active calls never hit the TTL. But if:\n\nThe phone crashes or loses USB connection\nThe caller hangs up and the phone doesn't send /api/session/end\n\n...the session auto-ends after 60s of no polls.\n\nSession detection after dial: After dialing, the agent needs the session ID. Options:\n\nWatch bus events — a turn.started event with session_id arrives when the call connects\nPoll GET /api/agent/sessions — check for new session IDs\nSend get_call_state — if you know the session ID\n\nOption 1 (events) is most reliable and fastest.\n\nComplete Takeover Lifecycle\n1. CONNECT     ws://gateway:8996/api/agent/ws\n2. DIAL        {\"type\": \"dial\", \"number\": \"+49...\"}\n   WAIT        for dial.ack (ok: true)\n3. DISCOVER    watch events for session_id (turn.started or session.started)\n4. INJECT      {\"type\": \"inject\", \"session_id\": \"...\", \"text\": \"Custom greeting\"}\n5. TAKEOVER    {\"type\": \"takeover\", \"session_id\": \"...\"}\n   WAIT        for takeover.ack (ok: true)\n6. HANDLE      receive turn.request → reply with {reply, request_id}\n   REPEAT      for N turns\n7. RELEASE     {\"type\": \"release\", \"session_id\": \"...\"}\n   WAIT        for release.ack (ok: true)\n8. OBSERVE     watch turn.complete events → model != \"agent\" confirms local LLM resumed\n9. HANGUP      {\"type\": \"hangup\", \"session_id\": \"...\"} (optional — end call)\n\nError Handling\nScenario\tWhat happens\nAgent WS disconnects during takeover\tAll takeovers auto-released, local LLM resumes\nAgent doesn't reply within 60s\tThat turn falls back to local LLM; takeover stays active for next turn\nSession ends during takeover\tsession.ended event; further turn.requests stop\nMultiple agents take over same session\tLast takeover wins (overwrites previous)\nTakeover of non-existent session\ttakeover.ack with ok: false\nRelease without takeover\trelease.ack with ok: false\nOpenClaw Plugin (Clawfinger)\n\nIf using the Clawfinger OpenClaw plugin, you don't need raw WebSocket code. The plugin tools map directly:\n\nLifecycle step\tPlugin tool\nDial\tclawfinger_dial\nCheck sessions\tclawfinger_sessions\nInspect state\tclawfinger_call_state\nInject greeting\tclawfinger_inject\nTake over\tclawfinger_takeover\nWait for caller\tclawfinger_turn_wait — blocks until caller speaks, returns transcript + request_id\nReply to caller\tclawfinger_turn_reply — send response text with the request_id\nRelease\tclawfinger_release\nHang up\tclawfinger_hangup\n\nTakeover tool workflow:\n\nclawfinger_takeover(session_id)     → \"Takeover active.\"\nclawfinger_turn_wait()              → {transcript: \"...\", request_id: \"abc123\"}\nclawfinger_turn_reply(request_id, reply)  → \"Reply sent.\"\nclawfinger_turn_wait()              → next turn...\nclawfinger_turn_reply(...)          → ...\nclawfinger_release(session_id)      → \"Released.\"\n\n\nSlash commands: /clawfinger dial +49..., /clawfinger takeover <sid>, /clawfinger release <sid>, /clawfinger hangup.\n\nTest Case: Human-Guided Takeover (3 turns)\n\nA complete test walkthrough. Requires: gateway running, phone connected via ADB, a real phone number to call.\n\nPrerequisites\nGateway running on 127.0.0.1:8996\nPhone connected via ADB (adb devices shows device)\nADB reverse active (adb reverse tcp:8996 tcp:8996)\nwebsockets Python package installed (pip install websockets)\nTest Script\n\nSave as test_takeover.py in the gateway directory:\n\n#!/usr/bin/env python3\n\"\"\"Live takeover test: dial, inject greeting, handle 3 turns, release.\"\"\"\n\nimport asyncio\nimport json\nimport time\n\nimport websockets\n\nGW = \"ws://127.0.0.1:8996/api/agent/ws\"\nDIAL_NUMBER = \"+49123456789\"  # <-- change to your number\nGREETING = (\n    \"Hello! This is a test from the agent takeover system. \"\n    \"I am now controlling this call. Please say something \"\n    \"and I will respond for three turns, then hand back to the local assistant.\"\n)\nREPLIES = [\n    \"That's interesting! I heard you clearly. This is turn one of three. Say something else.\",\n    \"Got it! That was turn two. One more turn, then I hand back to the local assistant.\",\n    \"Perfect, that was turn three! Releasing control now. You should notice a change. Goodbye from the agent!\",\n]\n\n\nasync def main():\n    print(f\"[*] Connecting to agent WS: {GW}\")\n    async with websockets.connect(GW) as ws:\n        print(\"[+] Connected\")\n\n        # Helper: drain pending events\n        async def drain(timeout=1.0):\n            events = []\n            try:\n                while True:\n                    raw = await asyncio.wait_for(ws.recv(), timeout=timeout)\n                    ev = json.loads(raw)\n                    events.append(ev)\n                    print(f\"    [event] {ev.get('type', '?')}\")\n            except asyncio.TimeoutError:\n                pass\n            return events\n\n        await drain(0.5)\n\n        # 1. Dial\n        print(f\"\\n[1] Dialing {DIAL_NUMBER}...\")\n        await ws.send(json.dumps({\"type\": \"dial\", \"number\": DIAL_NUMBER}))\n        while True:\n            ev = json.loads(await asyncio.wait_for(ws.recv(), timeout=15))\n            print(f\"    [event] {ev.get('type')}\")\n            if ev.get(\"type\") == \"dial.ack\":\n                if not ev.get(\"ok\"):\n                    print(f\"    FAIL: {ev.get('detail')}\")\n                    return\n                print(\"    OK: dial succeeded\")\n                break\n\n        # 2. Wait for session\n        print(\"\\n[2] Waiting for call pickup (up to 60s)...\")\n        session_id = None\n        start = time.time()\n        while time.time() - start < 60:\n            try:\n                ev = json.loads(await asyncio.wait_for(ws.recv(), timeout=2))\n                print(f\"    [event] {ev.get('type')}\")\n                if ev.get(\"session_id\"):\n                    session_id = ev[\"session_id\"]\n                    print(f\"    OK: session = {session_id}\")\n                    break\n            except asyncio.TimeoutError:\n                pass\n        if not session_id:\n            events = await drain(5.0)\n            for ev in events:\n                if ev.get(\"session_id\"):\n                    session_id = ev[\"session_id\"]\n                    break\n        if not session_id:\n            print(\"    FAIL: no session appeared\")\n            return\n\n        # 3. Inject greeting\n        print(f\"\\n[3] Injecting greeting...\")\n        await ws.send(json.dumps({\n            \"type\": \"inject\", \"session_id\": session_id, \"text\": GREETING,\n        }))\n        print(\"    OK: greeting queued (delivers on next turn poll)\")\n        await asyncio.sleep(0.5)\n\n        # 4. Takeover\n        print(f\"\\n[4] Taking over session {session_id[:12]}...\")\n        await ws.send(json.dumps({\"type\": \"takeover\", \"session_id\": session_id}))\n        while True:\n            ev = json.loads(await asyncio.wait_for(ws.recv(), timeout=10))\n            print(f\"    [event] {ev.get('type')}\")\n            if ev.get(\"type\") == \"takeover.ack\":\n                print(f\"    OK: takeover {'succeeded' if ev.get('ok') else 'FAILED'}\")\n                if not ev.get(\"ok\"):\n                    return\n                break\n\n        # 5. Handle 3 turns\n        print(f\"\\n[5] Handling {len(REPLIES)} turns...\")\n        turns = 0\n        while turns < len(REPLIES):\n            try:\n                ev = json.loads(await asyncio.wait_for(ws.recv(), timeout=90))\n                if ev.get(\"type\") == \"turn.request\":\n                    turns += 1\n                    rid = ev.get(\"request_id\", \"\")\n                    print(f\"\\n    Turn {turns}/{len(REPLIES)}\")\n                    print(f\"      Caller: {ev.get('transcript', '')!r}\")\n                    print(f\"      Reply:  {REPLIES[turns-1]!r}\")\n                    await ws.send(json.dumps({\n                        \"reply\": REPLIES[turns - 1], \"request_id\": rid,\n                    }))\n                    print(f\"      Sent (request_id={rid[:8]}...)\")\n                else:\n                    print(f\"    [event] {ev.get('type')}\")\n            except asyncio.TimeoutError:\n                print(f\"    TIMEOUT after {turns} turns\")\n                break\n\n        # 6. Release\n        print(f\"\\n[6] Releasing takeover...\")\n        await ws.send(json.dumps({\"type\": \"release\", \"session_id\": session_id}))\n        while True:\n            ev = json.loads(await asyncio.wait_for(ws.recv(), timeout=10))\n            print(f\"    [event] {ev.get('type')}\")\n            if ev.get(\"type\") == \"release.ack\":\n                print(f\"    OK: released, local LLM resumes\")\n                break\n\n        # 7. Observe post-release\n        print(f\"\\n[7] Observing post-release (30s)...\")\n        t0 = time.time()\n        while time.time() - t0 < 30:\n            try:\n                ev = json.loads(await asyncio.wait_for(ws.recv(), timeout=5))\n                etype = ev.get(\"type\", \"\")\n                if etype == \"turn.complete\":\n                    model = ev.get(\"model\", ev.get(\"metrics\", {}).get(\"llm_model\", \"\"))\n                    print(f\"    [turn] model={model} reply={ev.get('reply', '')[:60]!r}\")\n                elif etype == \"session.ended\":\n                    print(f\"    [session ended]\")\n                    break\n                else:\n                    print(f\"    [event] {etype}\")\n            except asyncio.TimeoutError:\n                print(\"    (quiet)\")\n\n        print(\"\\n[*] Test complete!\")\n\n\nif __name__ == \"__main__\":\n    try:\n        asyncio.run(main())\n    except KeyboardInterrupt:\n        print(\"\\n[*] Interrupted\")\n    except Exception as e:\n        print(f\"\\n[!] Error: {e}\")\n        raise\n\nRunning the Test\ncd /path/to/gateway\npython3 test_takeover.py\n\nExpected Output\n[*] Connecting to agent WS: ws://127.0.0.1:8996/api/agent/ws\n[+] Connected\n    [event] agent.connected\n\n[1] Dialing +49123456789...\n    [event] dial.ack\n    OK: dial succeeded\n\n[2] Waiting for call pickup (up to 60s)...\n    [event] call.dial\n    [event] turn.started\n    OK: session = d56a80fc30dc42b5ab2cecd2484ff847\n\n[3] Injecting greeting...\n    OK: greeting queued (delivers on next turn poll)\n\n[4] Taking over session d56a80fc30dc...\n    [event] turn.reply\n    [event] turn.complete\n    [event] agent.inject\n    [event] agent.takeover\n    [event] takeover.ack\n    OK: takeover succeeded\n\n[5] Handling 3 turns...\n    [event] turn.started\n    [event] turn.reply          <-- this is the injected greeting being delivered\n    [event] turn.complete\n    [event] turn.started\n    [event] turn.transcript\n\n    Turn 1/3\n      Caller: \"Okay, I'm saying something.\"\n      Reply:  \"That's interesting! I heard you clearly. ...\"\n      Sent (request_id=36fcc0e4...)\n    [event] turn.reply\n    [event] turn.complete\n    [event] turn.started\n    [event] turn.transcript\n\n    Turn 2/3\n      Caller: \"Okay, I'm saying something else.\"\n      Reply:  \"Got it! That was turn two. ...\"\n      Sent (request_id=3582e133...)\n\n    ...turn 3 similar...\n\n[6] Releasing takeover...\n    [event] agent.release\n    [event] release.ack\n    OK: released, local LLM resumes\n\n[7] Observing post-release (30s)...\n    [event] turn.reply\n    [turn] model=local/... reply='Hello! How can I help you?'\n\n[*] Test complete!\n\nWhat to Verify (Human)\nGreeting: You hear the custom greeting text spoken by the TTS voice\nTurns 1-3: You speak, and the canned agent replies play back (not the local LLM's responses)\nPost-release: After turn 3, the next time you speak, the response is clearly different — it's the local LLM's personality/style, not the canned replies\nLatency: Each turn should complete in 2-8 seconds (ASR + TTS, no LLM inference during takeover)\nNo errors: No timeouts, no dropped turns, no silence gaps\nTroubleshooting\nSymptom\tCause\tFix\ndial.ack ok but no session appears\tPhone not picking up (DIALER role lost, app not running)\tCheck DIALER role, restart app\nturn.request never arrives\tTakeover didn't register, or phone isn't polling\tCheck takeover.ack was ok: true; check phone logs\nGreeting doesn't play\tInject arrived after takeover consumed the poll\tInject BEFORE takeover, add 0.5s delay\nAgent reply not heard\tMissing request_id in reply\tAlways echo back the exact request_id\n60s timeout on turn\tAgent reply too slow\tCheck network, agent processing time\nLocal LLM doesn't resume after release\tRelease failed\tCheck release.ack was ok: true\nPhone stops picking up calls\tDIALER role reset (reboot) or USB issue\tRun post-reboot recovery"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/TracSystems/agent-takeover",
    "publisherUrl": "https://clawhub.ai/TracSystems/agent-takeover",
    "owner": "TracSystems",
    "version": "0.1.4",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/agent-takeover",
    "downloadUrl": "https://openagent3.xyz/downloads/agent-takeover",
    "agentUrl": "https://openagent3.xyz/skills/agent-takeover/agent",
    "manifestUrl": "https://openagent3.xyz/skills/agent-takeover/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/agent-takeover/agent.md"
  }
}