{
  "schemaVersion": "1.0",
  "item": {
    "slug": "jetson-cuda-voice",
    "name": "Jetson CUDA Voice Pipeline",
    "source": "tencent",
    "type": "skill",
    "category": "AI 智能",
    "sourceUrl": "https://clawhub.ai/nikil511/jetson-cuda-voice",
    "canonicalUrl": "https://clawhub.ai/nikil511/jetson-cuda-voice",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/jetson-cuda-voice",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=jetson-cuda-voice",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "BUILD.md",
      "SKILL.md",
      "pipeline/led.py",
      "pipeline/manage.sh",
      "pipeline/setup.sh",
      "pipeline/voice_pipeline.py"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "slug": "jetson-cuda-voice",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-05-02T00:59:55.947Z",
      "expiresAt": "2026-05-09T00:59:55.947Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=jetson-cuda-voice",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=jetson-cuda-voice",
        "contentDisposition": "attachment; filename=\"jetson-cuda-voice-1.1.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null,
        "slug": "jetson-cuda-voice"
      },
      "scope": "item",
      "summary": "Item download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this item.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/jetson-cuda-voice"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/jetson-cuda-voice",
    "agentPageUrl": "https://openagent3.xyz/skills/jetson-cuda-voice/agent",
    "manifestUrl": "https://openagent3.xyz/skills/jetson-cuda-voice/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/jetson-cuda-voice/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Jetson CUDA Voice Pipeline",
        "body": "Fully offline, GPU-accelerated local voice assistant for NVIDIA Jetson devices.\nNo cloud for STT or TTS — only the LLM call uses the internet (OpenRouter or any OpenAI-compatible endpoint)."
      },
      {
        "title": "Architecture",
        "body": "ReSpeaker mic (hw:Array,0, S24_3LE, 16kHz)\n    ↓ arecord raw stream — never restarted mid-conversation\nopenWakeWord — \"Hey Jarvis\" detection (~32ms chunks)\n    ↓ wake word triggered → two-tone beep\n_measure_ambient() — 480ms median RMS → dynamic VAD thresholds\n    ↓\ntranscribe_stream() — VAD + whisper.cpp CUDA HTTP (~2-4s per utterance)\n    ↓\nask_llm() — OpenRouter or local OpenAI-compatible API (~1-2s)\n    ↓\nPiper TTS — offline neural TTS, hot-loaded at startup → aplay\n    ↓\nReSpeaker LEDs: 🔵 blue=listening  🩵 cyan=thinking  ⚫ off=done  🔴 red=error\n\nTotal latency: ~5-8 seconds from wake word to first spoken word."
      },
      {
        "title": "Key Features",
        "body": "Zero mic-restart gap — same arecord pipe feeds wake word detection and STT\nDynamic ambient calibration — measures room noise floor on every wake word trigger (adapts to fans, AC, time of day)\nConversation history — 20-turn rolling context for natural follow-ups\nAuto language detection — whisper -l auto, works multilingual\nReSpeaker LED ring — visual state feedback (silent no-op if device not present)\nFully configurable — all paths and thresholds via environment variables"
      },
      {
        "title": "Hardware Requirements",
        "body": "ComponentTestedNotesJetson Xavier NX✅ARM64, sm_72, 8GB, JetPack 5.1.4ReSpeaker USB Mic Array v1.0✅2886:0007, S24_3LE, 16kHzAny ALSA speaker✅tested with Creative MUVO 2cOther Jetson models✅change CMAKE_CUDA_ARCHITECTURES"
      },
      {
        "title": "Quick Start",
        "body": "# 1. Install Python deps\npip install openwakeword piper-tts numpy requests pyusb\n\n# 2. Build whisper.cpp with CUDA (see BUILD.md — ~45 min, one-time)\n#    Then place binary at ~/.local/bin/whisper-server-gpu\n\n# 3. Download Piper voice model\nmkdir -p ~/.local/share/piper/voices && cd ~/.local/share/piper/voices\nwget https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx\nwget https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx.json\n\n# 4. Install and start services\nexport OPENROUTER_API_KEY=your-key-here\nbash pipeline/setup.sh\nbash pipeline/manage.sh start\n\n# Say \"Hey Jarvis\" — blue LED = listening"
      },
      {
        "title": "Build whisper.cpp with CUDA",
        "body": "See BUILD.md for full instructions. Critical flag:\n\ncmake .. -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=72 -DCMAKE_BUILD_TYPE=Release\nmake -j4   # ~45 min — detach with nohup if needed\n\n⚠️ CMAKE_CUDA_ARCHITECTURES=72 (sm_72 = Xavier NX) is critical.\nDefault multi-arch compilation OOMs on 8GB Jetson.\n\nArchitecture map:\n\nXavier NX / AGX Xavier → 72\nOrin → 87\nTX2 → 62\nNano → 53"
      },
      {
        "title": "Piper Voice Models",
        "body": "mkdir -p ~/.local/share/piper/voices && cd \"$_\"\n\n# English (required)\nwget https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx\nwget https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx.json\n\n# Greek (optional — any language from huggingface.co/rhasspy/piper-voices works)\nwget https://huggingface.co/rhasspy/piper-voices/resolve/main/el/el_GR/rapunzelina/medium/el_GR-rapunzelina-medium.onnx\nwget https://huggingface.co/rhasspy/piper-voices/resolve/main/el/el_GR/rapunzelina/medium/el_GR-rapunzelina-medium.onnx.json"
      },
      {
        "title": "Service Install",
        "body": "setup.sh writes and enables the systemd user services automatically:\n\nbash pipeline/setup.sh [/path/to/voice_pipeline.py] [API_KEY]\n\nOr with env var:\n\nOPENROUTER_API_KEY=sk-... bash pipeline/setup.sh\n\nRe-run to update an existing install."
      },
      {
        "title": "ReSpeaker Mic Gain & USB Autosuspend",
        "body": "# Optimal gain (no clipping, RMS ~180 ambient)\namixer -c 0 set Mic 90\n\n# Prevent USB autosuspend (mic sleeps after 2s idle without this)\nsudo tee /etc/udev/rules.d/99-usb-audio-nosuspend.rules << 'EOF'\nACTION==\"add\", SUBSYSTEM==\"usb\", ATTR{idVendor}==\"2886\", ATTR{idProduct}==\"0007\", \\\n  ATTR{power/control}=\"on\", ATTR{power/autosuspend}=\"-1\"\nEOF\nsudo udevadm control --reload-rules"
      },
      {
        "title": "Management",
        "body": "bash pipeline/manage.sh start     # start both services\nbash pipeline/manage.sh stop      # stop both services\nbash pipeline/manage.sh restart   # restart both\nbash pipeline/manage.sh status    # systemd status\nbash pipeline/manage.sh logs      # tail live log\nbash pipeline/manage.sh test-mic  # record 4s + play back\nbash pipeline/manage.sh test-stt  # record 4s + transcribe\nbash pipeline/manage.sh test-tts  # speak a test phrase"
      },
      {
        "title": "Environment Variables",
        "body": "VariableDefaultDescriptionOPENROUTER_API_KEY(required)API key for OpenRouter (or any OpenAI-compatible provider)VOICE_MIChw:Array,0ALSA mic device nameVOICE_SPEAKERhw:C2c,0ALSA speaker device nameVOICE_LLM_URLOpenRouterLLM API endpointVOICE_LLM_MODELanthropic/claude-3.5-haikuModel nameVOICE_WAKE_THRESHOLD0.5Wake word confidence (0.0–1.0)VOICE_SPEECH_RMS400Fallback speech RMS thresholdVOICE_SILENCE_RMS250Fallback silence RMS thresholdVOICE_UTC_OFFSET0Timezone offset hours for LLM contextPIPER_VOICES_DIR~/.local/share/piper/voicesPiper voice models directoryWHISPER_URLhttp://127.0.0.1:8181/inferencewhisper-server endpointWHISPER_BIN~/.local/bin/whisper-server-gpuwhisper-server binary (used by setup.sh)WHISPER_MODEL~/.local/share/whisper/models/ggml-base.binWhisper model (used by setup.sh)"
      },
      {
        "title": "Troubleshooting",
        "body": "Mic records silence\n\nCheck gain: amixer -c 0 set Mic 90\nUse card name not number (hw:Array,0 not hw:0,0) — numbers shift on reboot\nReSpeaker requires S24_3LE format, not S16_LE\nDisable USB autosuspend (see setup above)\n\nRecords full 6s timeout, never cuts off\n\nRoom ambient noise > VOICE_SILENCE_RMS fallback. Dynamic calibration handles this automatically.\nIf still an issue, set VOICE_SILENCE_RMS slightly above your measured ambient floor.\n\n[BEEPING] or (bell dings) in transcript\n\nSpeaker beep being picked up by mic. The 0.3s drain buffer after beep handles this.\nCheck speaker/mic distance and speaker volume.\n\nWhisper OOM during build\n\nMust use -DCMAKE_CUDA_ARCHITECTURES=72 — default multi-arch build exhausts 8GB RAM.\nUse -j4 not -j6.\n\nLED not lighting up\n\nInstall pyusb: pip install pyusb\nOnly supported on ReSpeaker USB Mic Array v1.0 (2886:0007)\nAll LED errors are silent — pipeline continues without it.\n\nWake word triggers constantly (false positives)\n\nLower VOICE_WAKE_THRESHOLD to 0.7 or higher.\nEnsure no TV/radio playing phrases close to \"Hey Jarvis\"."
      },
      {
        "title": "File Structure",
        "body": "jetson-cuda-voice/\n├── SKILL.md                  ← this file\n├── BUILD.md                  ← whisper.cpp CUDA build guide\n└── pipeline/\n    ├── voice_pipeline.py     ← main pipeline\n    ├── led.py                ← ReSpeaker LED control (optional)\n    ├── setup.sh              ← one-command service installer\n    └── manage.sh             ← start/stop/status/test"
      }
    ],
    "body": "Jetson CUDA Voice Pipeline\n\nFully offline, GPU-accelerated local voice assistant for NVIDIA Jetson devices. No cloud for STT or TTS — only the LLM call uses the internet (OpenRouter or any OpenAI-compatible endpoint).\n\nArchitecture\nReSpeaker mic (hw:Array,0, S24_3LE, 16kHz)\n    ↓ arecord raw stream — never restarted mid-conversation\nopenWakeWord — \"Hey Jarvis\" detection (~32ms chunks)\n    ↓ wake word triggered → two-tone beep\n_measure_ambient() — 480ms median RMS → dynamic VAD thresholds\n    ↓\ntranscribe_stream() — VAD + whisper.cpp CUDA HTTP (~2-4s per utterance)\n    ↓\nask_llm() — OpenRouter or local OpenAI-compatible API (~1-2s)\n    ↓\nPiper TTS — offline neural TTS, hot-loaded at startup → aplay\n    ↓\nReSpeaker LEDs: 🔵 blue=listening  🩵 cyan=thinking  ⚫ off=done  🔴 red=error\n\n\nTotal latency: ~5-8 seconds from wake word to first spoken word.\n\nKey Features\nZero mic-restart gap — same arecord pipe feeds wake word detection and STT\nDynamic ambient calibration — measures room noise floor on every wake word trigger (adapts to fans, AC, time of day)\nConversation history — 20-turn rolling context for natural follow-ups\nAuto language detection — whisper -l auto, works multilingual\nReSpeaker LED ring — visual state feedback (silent no-op if device not present)\nFully configurable — all paths and thresholds via environment variables\nHardware Requirements\nComponent\tTested\tNotes\nJetson Xavier NX\t✅\tARM64, sm_72, 8GB, JetPack 5.1.4\nReSpeaker USB Mic Array v1.0\t✅\t2886:0007, S24_3LE, 16kHz\nAny ALSA speaker\t✅\ttested with Creative MUVO 2c\nOther Jetson models\t✅\tchange CMAKE_CUDA_ARCHITECTURES\nQuick Start\n# 1. Install Python deps\npip install openwakeword piper-tts numpy requests pyusb\n\n# 2. Build whisper.cpp with CUDA (see BUILD.md — ~45 min, one-time)\n#    Then place binary at ~/.local/bin/whisper-server-gpu\n\n# 3. Download Piper voice model\nmkdir -p ~/.local/share/piper/voices && cd ~/.local/share/piper/voices\nwget https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx\nwget https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx.json\n\n# 4. Install and start services\nexport OPENROUTER_API_KEY=your-key-here\nbash pipeline/setup.sh\nbash pipeline/manage.sh start\n\n# Say \"Hey Jarvis\" — blue LED = listening\n\nSetup Details\nBuild whisper.cpp with CUDA\n\nSee BUILD.md for full instructions. Critical flag:\n\ncmake .. -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=72 -DCMAKE_BUILD_TYPE=Release\nmake -j4   # ~45 min — detach with nohup if needed\n\n\n⚠️ CMAKE_CUDA_ARCHITECTURES=72 (sm_72 = Xavier NX) is critical. Default multi-arch compilation OOMs on 8GB Jetson.\n\nArchitecture map:\n\nXavier NX / AGX Xavier → 72\nOrin → 87\nTX2 → 62\nNano → 53\nPiper Voice Models\nmkdir -p ~/.local/share/piper/voices && cd \"$_\"\n\n# English (required)\nwget https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx\nwget https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx.json\n\n# Greek (optional — any language from huggingface.co/rhasspy/piper-voices works)\nwget https://huggingface.co/rhasspy/piper-voices/resolve/main/el/el_GR/rapunzelina/medium/el_GR-rapunzelina-medium.onnx\nwget https://huggingface.co/rhasspy/piper-voices/resolve/main/el/el_GR/rapunzelina/medium/el_GR-rapunzelina-medium.onnx.json\n\nService Install\n\nsetup.sh writes and enables the systemd user services automatically:\n\nbash pipeline/setup.sh [/path/to/voice_pipeline.py] [API_KEY]\n\n\nOr with env var:\n\nOPENROUTER_API_KEY=sk-... bash pipeline/setup.sh\n\n\nRe-run to update an existing install.\n\nReSpeaker Mic Gain & USB Autosuspend\n# Optimal gain (no clipping, RMS ~180 ambient)\namixer -c 0 set Mic 90\n\n# Prevent USB autosuspend (mic sleeps after 2s idle without this)\nsudo tee /etc/udev/rules.d/99-usb-audio-nosuspend.rules << 'EOF'\nACTION==\"add\", SUBSYSTEM==\"usb\", ATTR{idVendor}==\"2886\", ATTR{idProduct}==\"0007\", \\\n  ATTR{power/control}=\"on\", ATTR{power/autosuspend}=\"-1\"\nEOF\nsudo udevadm control --reload-rules\n\nManagement\nbash pipeline/manage.sh start     # start both services\nbash pipeline/manage.sh stop      # stop both services\nbash pipeline/manage.sh restart   # restart both\nbash pipeline/manage.sh status    # systemd status\nbash pipeline/manage.sh logs      # tail live log\nbash pipeline/manage.sh test-mic  # record 4s + play back\nbash pipeline/manage.sh test-stt  # record 4s + transcribe\nbash pipeline/manage.sh test-tts  # speak a test phrase\n\nEnvironment Variables\nVariable\tDefault\tDescription\nOPENROUTER_API_KEY\t(required)\tAPI key for OpenRouter (or any OpenAI-compatible provider)\nVOICE_MIC\thw:Array,0\tALSA mic device name\nVOICE_SPEAKER\thw:C2c,0\tALSA speaker device name\nVOICE_LLM_URL\tOpenRouter\tLLM API endpoint\nVOICE_LLM_MODEL\tanthropic/claude-3.5-haiku\tModel name\nVOICE_WAKE_THRESHOLD\t0.5\tWake word confidence (0.0–1.0)\nVOICE_SPEECH_RMS\t400\tFallback speech RMS threshold\nVOICE_SILENCE_RMS\t250\tFallback silence RMS threshold\nVOICE_UTC_OFFSET\t0\tTimezone offset hours for LLM context\nPIPER_VOICES_DIR\t~/.local/share/piper/voices\tPiper voice models directory\nWHISPER_URL\thttp://127.0.0.1:8181/inference\twhisper-server endpoint\nWHISPER_BIN\t~/.local/bin/whisper-server-gpu\twhisper-server binary (used by setup.sh)\nWHISPER_MODEL\t~/.local/share/whisper/models/ggml-base.bin\tWhisper model (used by setup.sh)\nTroubleshooting\n\nMic records silence\n\nCheck gain: amixer -c 0 set Mic 90\nUse card name not number (hw:Array,0 not hw:0,0) — numbers shift on reboot\nReSpeaker requires S24_3LE format, not S16_LE\nDisable USB autosuspend (see setup above)\n\nRecords full 6s timeout, never cuts off\n\nRoom ambient noise > VOICE_SILENCE_RMS fallback. Dynamic calibration handles this automatically.\nIf still an issue, set VOICE_SILENCE_RMS slightly above your measured ambient floor.\n\n[BEEPING] or (bell dings) in transcript\n\nSpeaker beep being picked up by mic. The 0.3s drain buffer after beep handles this.\nCheck speaker/mic distance and speaker volume.\n\nWhisper OOM during build\n\nMust use -DCMAKE_CUDA_ARCHITECTURES=72 — default multi-arch build exhausts 8GB RAM.\nUse -j4 not -j6.\n\nLED not lighting up\n\nInstall pyusb: pip install pyusb\nOnly supported on ReSpeaker USB Mic Array v1.0 (2886:0007)\nAll LED errors are silent — pipeline continues without it.\n\nWake word triggers constantly (false positives)\n\nLower VOICE_WAKE_THRESHOLD to 0.7 or higher.\nEnsure no TV/radio playing phrases close to \"Hey Jarvis\".\nFile Structure\njetson-cuda-voice/\n├── SKILL.md                  ← this file\n├── BUILD.md                  ← whisper.cpp CUDA build guide\n└── pipeline/\n    ├── voice_pipeline.py     ← main pipeline\n    ├── led.py                ← ReSpeaker LED control (optional)\n    ├── setup.sh              ← one-command service installer\n    └── manage.sh             ← start/stop/status/test"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/nikil511/jetson-cuda-voice",
    "publisherUrl": "https://clawhub.ai/nikil511/jetson-cuda-voice",
    "owner": "nikil511",
    "version": "1.1.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/jetson-cuda-voice",
    "downloadUrl": "https://openagent3.xyz/downloads/jetson-cuda-voice",
    "agentUrl": "https://openagent3.xyz/skills/jetson-cuda-voice/agent",
    "manifestUrl": "https://openagent3.xyz/skills/jetson-cuda-voice/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/jetson-cuda-voice/agent.md"
  }
}