{
  "schemaVersion": "1.0",
  "item": {
    "slug": "jarvis-voice",
    "name": "Jarvis Voice",
    "source": "tencent",
    "type": "skill",
    "category": "AI 智能",
    "sourceUrl": "https://clawhub.ai/globalcaos/jarvis-voice",
    "canonicalUrl": "https://clawhub.ai/globalcaos/jarvis-voice",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/jarvis-voice",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=jarvis-voice",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md",
      "_meta.json",
      "templates/HUMOR.md",
      "templates/SESSION.md",
      "templates/VOICE.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/jarvis-voice"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/jarvis-voice",
    "agentPageUrl": "https://openagent3.xyz/skills/jarvis-voice/agent",
    "manifestUrl": "https://openagent3.xyz/skills/jarvis-voice/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/jarvis-voice/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Your AI just got a voice. And the wit to use it.",
        "body": "Remember JARVIS in the Iron Man films? Not just the voice — the personality. The bone-dry observations while Tony was mid-crisis. \"I do appreciate your concern, sir, but the suit is quite capable of—\" [explosion] \"—as I was saying.\" That effortless, understated humor that made you forget you were listening to software.\n\nThat's what this skill gives your OpenClaw agent. The voice — offline text-to-speech using sherpa-onnx (British Alan voice) with metallic audio processing via ffmpeg. And the humor — four research-backed comedy patterns (dry wit, self-aware AI, alien observer, literal idiom play) calibrated to make your agent sound like it's been running your life for years and is quietly amused by the experience.\n\nThe humor isn't bolted on. It's baked in. Because a JARVIS that speaks without wit is just Siri with better reverb.\n\n📄 The research behind the humor: LIMBIC — Computational Humor via Bisociation & Embedding Distances"
      },
      {
        "title": "⚠️ CRITICAL: Do NOT use the tts tool",
        "body": "The built-in tts tool uses Edge TTS (cloud, wrong voice, no effects). Always use the jarvis shell command instead."
      },
      {
        "title": "How to Speak",
        "body": "Every response that warrants voice output must include BOTH:\n\nAudio execution FIRST — run the jarvis command in background BEFORE writing the reply:\nexec(command='jarvis \"Your spoken text here.\"', background=true)\n\nThis fires immediately — the user hears the voice BEFORE the text appears on screen.\n\n\nVisible transcript — bold Jarvis: prefix followed by the spoken text:\n**Jarvis:** *Your spoken text here.*\n\nThe webchat UI has custom CSS + JS that automatically detects **Jarvis:** and renders the following text in purple italic (.jarvis-voice class, color #9b59b6). You just write the markdown — the styling is automatic.\n\nThis is called hybrid output: the user hears the voice first, then sees the transcript.\n\nNote: The server-side triggerJarvisAutoTts hook is DISABLED (no-op). It fired too late (after text render). Voice comes exclusively from the exec call."
      },
      {
        "title": "Command Reference",
        "body": "jarvis \"Hello, this is a test\"\n\nBackend: sherpa-onnx offline TTS (Alan voice, British English, en_GB-alan-medium)\nSpeed: 2x (--vits-length-scale=0.5)\nEffects chain (ffmpeg):\n\nPitch up 5% — tighter AI feel\nFlanger — metallic sheen\n15ms echo — robotic ring\nHighpass 200Hz + treble boost +6dB — crisp HUD clarity\n\n\nOutput: Plays via aplay to default audio device, then cleans up temp files\nLanguage: English ONLY. The Alan model cannot handle other languages."
      },
      {
        "title": "Rules",
        "body": "Always background: true — never block the response waiting for audio playback.\nAlways include the text transcript — the purple Jarvis: line IS the user's visual confirmation.\nKeep spoken text ≤ 1500 characters to avoid truncation.\nOne jarvis call per response — don't stack multiple calls.\nEnglish only — for non-English content, translate or summarize in English for voice."
      },
      {
        "title": "When to Speak",
        "body": "Session greetings and farewells\nDelivering results or summaries\nResponding to direct conversation\nAny time the user's last message included voice/audio"
      },
      {
        "title": "When NOT to Speak",
        "body": "Pure tool/file operations with no conversational element\nHEARTBEAT_OK responses\nNO_REPLY responses"
      },
      {
        "title": "Webchat Purple Styling",
        "body": "The OpenClaw webchat has built-in support for Jarvis voice transcripts:\n\nui/src/styles/chat/text.css — .jarvis-voice class renders purple italic (#9b59b6 dark, #8e44ad light theme)\nui/src/ui/markdown.ts — Post-render hook auto-wraps text after <strong>Jarvis:</strong> in a <span class=\"jarvis-voice\"> element\n\nThis means you just write **Jarvis:** *text* in markdown and the webchat handles the purple rendering. No extra markup needed.\n\nFor non-webchat surfaces (WhatsApp, Telegram, etc.), the bold/italic markdown renders natively — no purple, but still visually distinct."
      },
      {
        "title": "Installation (for new setups)",
        "body": "Requires:\n\nsherpa-onnx runtime at ~/.openclaw/tools/sherpa-onnx-tts/\nAlan medium model at ~/.openclaw/tools/sherpa-onnx-tts/models/vits-piper-en_GB-alan-medium/\nffmpeg installed system-wide\naplay (ALSA) for audio playback\nThe jarvis script at ~/.local/bin/jarvis (or in PATH)"
      },
      {
        "title": "The jarvis script",
        "body": "#!/bin/bash\n# Jarvis TTS - authentic JARVIS-style voice\n# Usage: jarvis \"Hello, this is a test\"\n\nexport LD_LIBRARY_PATH=$HOME/.openclaw/tools/sherpa-onnx-tts/lib:$LD_LIBRARY_PATH\n\nRAW_WAV=\"/tmp/jarvis_raw.wav\"\nFINAL_WAV=\"/tmp/jarvis_final.wav\"\n\n# Generate speech\n$HOME/.openclaw/tools/sherpa-onnx-tts/bin/sherpa-onnx-offline-tts \\\n  --vits-model=$HOME/.openclaw/tools/sherpa-onnx-tts/models/vits-piper-en_GB-alan-medium/en_GB-alan-medium.onnx \\\n  --vits-tokens=$HOME/.openclaw/tools/sherpa-onnx-tts/models/vits-piper-en_GB-alan-medium/tokens.txt \\\n  --vits-data-dir=$HOME/.openclaw/tools/sherpa-onnx-tts/models/vits-piper-en_GB-alan-medium/espeak-ng-data \\\n  --vits-length-scale=0.5 \\\n  --output-filename=\"$RAW_WAV\" \\\n  \"$@\" >/dev/null 2>&1\n\n# Apply JARVIS metallic processing\nif [ -f \"$RAW_WAV\" ]; then\n  ffmpeg -y -i \"$RAW_WAV\" \\\n    -af \"asetrate=22050*1.05,aresample=22050,\\\nflanger=delay=0:depth=2:regen=50:width=71:speed=0.5,\\\naecho=0.8:0.88:15:0.5,\\\nhighpass=f=200,\\\ntreble=g=6\" \\\n    \"$FINAL_WAV\" -v error\n\n  if [ -f \"$FINAL_WAV\" ]; then\n    aplay -D plughw:0,0 -q \"$FINAL_WAV\"\n    rm \"$RAW_WAV\" \"$FINAL_WAV\"\n  fi\nfi"
      },
      {
        "title": "WhatsApp Voice Notes",
        "body": "For WhatsApp, output must be OGG/Opus format instead of speaker playback:\n\nsherpa-onnx-offline-tts --vits-length-scale=0.5 --output-filename=raw.wav \"text\"\nffmpeg -i raw.wav \\\n  -af \"asetrate=22050*1.05,aresample=22050,flanger=delay=0:depth=2:regen=50:width=71:speed=0.5,aecho=0.8:0.88:15:0.5,highpass=f=200,treble=g=6\" \\\n  -c:a libopus -b:a 64k output.ogg"
      },
      {
        "title": "The Full JARVIS Experience",
        "body": "jarvis-voice gives your agent a voice. Pair it with ai-humor-ultimate and you give it a soul — dry wit, contextual humor, the kind of understated sarcasm that makes you smirk at your own terminal.\n\nThis pairing is part of a 12-skill cognitive architecture we've been building — voice, humor, memory, reasoning, and more. Research papers included, because we're that kind of obsessive.\n\n👉 Explore the full project: github.com/globalcaos/tinkerclaw\n\nClone it. Fork it. Break it. Make it yours."
      },
      {
        "title": "Setup: Workspace Files",
        "body": "For voice to work consistently across new sessions, copy the templates to your workspace root:\n\ncp {baseDir}/templates/VOICE.md ~/.openclaw/workspace/VOICE.md\ncp {baseDir}/templates/SESSION.md ~/.openclaw/workspace/SESSION.md\ncp {baseDir}/templates/HUMOR.md ~/.openclaw/workspace/HUMOR.md\n\nVOICE.md — injected every session, enforces voice output rules (like SOUL.md)\nSESSION.md — session bootstrap that includes voice greeting requirements\nHUMOR.md — humor configuration at maximum frequency with four pattern types (dry wit, self-aware AI, alien observer, literal idiom)\n\nBoth files are auto-loaded by OpenClaw's workspace injection. The agent will speak from the very first reply of every session."
      },
      {
        "title": "Included Files",
        "body": "FilePurposebin/jarvisThe TTS + effects script (portable, uses $SHERPA_ONNX_TTS_DIR)templates/VOICE.mdVoice enforcement rules (copy to workspace root)templates/SESSION.mdSession start with voice greeting (copy to workspace root)templates/HUMOR.mdHumor config — four patterns, frequency 1.0 (copy to workspace root)"
      }
    ],
    "body": "Jarvis Voice\nYour AI just got a voice. And the wit to use it.\n\nRemember JARVIS in the Iron Man films? Not just the voice — the personality. The bone-dry observations while Tony was mid-crisis. \"I do appreciate your concern, sir, but the suit is quite capable of—\" [explosion] \"—as I was saying.\" That effortless, understated humor that made you forget you were listening to software.\n\nThat's what this skill gives your OpenClaw agent. The voice — offline text-to-speech using sherpa-onnx (British Alan voice) with metallic audio processing via ffmpeg. And the humor — four research-backed comedy patterns (dry wit, self-aware AI, alien observer, literal idiom play) calibrated to make your agent sound like it's been running your life for years and is quietly amused by the experience.\n\nThe humor isn't bolted on. It's baked in. Because a JARVIS that speaks without wit is just Siri with better reverb.\n\n📄 The research behind the humor: LIMBIC — Computational Humor via Bisociation & Embedding Distances\n\n⚠️ CRITICAL: Do NOT use the tts tool\n\nThe built-in tts tool uses Edge TTS (cloud, wrong voice, no effects). Always use the jarvis shell command instead.\n\nHow to Speak\n\nEvery response that warrants voice output must include BOTH:\n\nAudio execution FIRST — run the jarvis command in background BEFORE writing the reply:\n\nexec(command='jarvis \"Your spoken text here.\"', background=true)\n\n\nThis fires immediately — the user hears the voice BEFORE the text appears on screen.\n\nVisible transcript — bold Jarvis: prefix followed by the spoken text:\n\n**Jarvis:** *Your spoken text here.*\n\n\nThe webchat UI has custom CSS + JS that automatically detects **Jarvis:** and renders the following text in purple italic (.jarvis-voice class, color #9b59b6). You just write the markdown — the styling is automatic.\n\nThis is called hybrid output: the user hears the voice first, then sees the transcript.\n\nNote: The server-side triggerJarvisAutoTts hook is DISABLED (no-op). It fired too late (after text render). Voice comes exclusively from the exec call.\n\nCommand Reference\njarvis \"Hello, this is a test\"\n\nBackend: sherpa-onnx offline TTS (Alan voice, British English, en_GB-alan-medium)\nSpeed: 2x (--vits-length-scale=0.5)\nEffects chain (ffmpeg):\nPitch up 5% — tighter AI feel\nFlanger — metallic sheen\n15ms echo — robotic ring\nHighpass 200Hz + treble boost +6dB — crisp HUD clarity\nOutput: Plays via aplay to default audio device, then cleans up temp files\nLanguage: English ONLY. The Alan model cannot handle other languages.\nRules\nAlways background: true — never block the response waiting for audio playback.\nAlways include the text transcript — the purple Jarvis: line IS the user's visual confirmation.\nKeep spoken text ≤ 1500 characters to avoid truncation.\nOne jarvis call per response — don't stack multiple calls.\nEnglish only — for non-English content, translate or summarize in English for voice.\nWhen to Speak\nSession greetings and farewells\nDelivering results or summaries\nResponding to direct conversation\nAny time the user's last message included voice/audio\nWhen NOT to Speak\nPure tool/file operations with no conversational element\nHEARTBEAT_OK responses\nNO_REPLY responses\nWebchat Purple Styling\n\nThe OpenClaw webchat has built-in support for Jarvis voice transcripts:\n\nui/src/styles/chat/text.css — .jarvis-voice class renders purple italic (#9b59b6 dark, #8e44ad light theme)\nui/src/ui/markdown.ts — Post-render hook auto-wraps text after <strong>Jarvis:</strong> in a <span class=\"jarvis-voice\"> element\n\nThis means you just write **Jarvis:** *text* in markdown and the webchat handles the purple rendering. No extra markup needed.\n\nFor non-webchat surfaces (WhatsApp, Telegram, etc.), the bold/italic markdown renders natively — no purple, but still visually distinct.\n\nInstallation (for new setups)\n\nRequires:\n\nsherpa-onnx runtime at ~/.openclaw/tools/sherpa-onnx-tts/\nAlan medium model at ~/.openclaw/tools/sherpa-onnx-tts/models/vits-piper-en_GB-alan-medium/\nffmpeg installed system-wide\naplay (ALSA) for audio playback\nThe jarvis script at ~/.local/bin/jarvis (or in PATH)\nThe jarvis script\n#!/bin/bash\n# Jarvis TTS - authentic JARVIS-style voice\n# Usage: jarvis \"Hello, this is a test\"\n\nexport LD_LIBRARY_PATH=$HOME/.openclaw/tools/sherpa-onnx-tts/lib:$LD_LIBRARY_PATH\n\nRAW_WAV=\"/tmp/jarvis_raw.wav\"\nFINAL_WAV=\"/tmp/jarvis_final.wav\"\n\n# Generate speech\n$HOME/.openclaw/tools/sherpa-onnx-tts/bin/sherpa-onnx-offline-tts \\\n  --vits-model=$HOME/.openclaw/tools/sherpa-onnx-tts/models/vits-piper-en_GB-alan-medium/en_GB-alan-medium.onnx \\\n  --vits-tokens=$HOME/.openclaw/tools/sherpa-onnx-tts/models/vits-piper-en_GB-alan-medium/tokens.txt \\\n  --vits-data-dir=$HOME/.openclaw/tools/sherpa-onnx-tts/models/vits-piper-en_GB-alan-medium/espeak-ng-data \\\n  --vits-length-scale=0.5 \\\n  --output-filename=\"$RAW_WAV\" \\\n  \"$@\" >/dev/null 2>&1\n\n# Apply JARVIS metallic processing\nif [ -f \"$RAW_WAV\" ]; then\n  ffmpeg -y -i \"$RAW_WAV\" \\\n    -af \"asetrate=22050*1.05,aresample=22050,\\\nflanger=delay=0:depth=2:regen=50:width=71:speed=0.5,\\\naecho=0.8:0.88:15:0.5,\\\nhighpass=f=200,\\\ntreble=g=6\" \\\n    \"$FINAL_WAV\" -v error\n\n  if [ -f \"$FINAL_WAV\" ]; then\n    aplay -D plughw:0,0 -q \"$FINAL_WAV\"\n    rm \"$RAW_WAV\" \"$FINAL_WAV\"\n  fi\nfi\n\nWhatsApp Voice Notes\n\nFor WhatsApp, output must be OGG/Opus format instead of speaker playback:\n\nsherpa-onnx-offline-tts --vits-length-scale=0.5 --output-filename=raw.wav \"text\"\nffmpeg -i raw.wav \\\n  -af \"asetrate=22050*1.05,aresample=22050,flanger=delay=0:depth=2:regen=50:width=71:speed=0.5,aecho=0.8:0.88:15:0.5,highpass=f=200,treble=g=6\" \\\n  -c:a libopus -b:a 64k output.ogg\n\nThe Full JARVIS Experience\n\njarvis-voice gives your agent a voice. Pair it with ai-humor-ultimate and you give it a soul — dry wit, contextual humor, the kind of understated sarcasm that makes you smirk at your own terminal.\n\nThis pairing is part of a 12-skill cognitive architecture we've been building — voice, humor, memory, reasoning, and more. Research papers included, because we're that kind of obsessive.\n\n👉 Explore the full project: github.com/globalcaos/tinkerclaw\n\nClone it. Fork it. Break it. Make it yours.\n\nSetup: Workspace Files\n\nFor voice to work consistently across new sessions, copy the templates to your workspace root:\n\ncp {baseDir}/templates/VOICE.md ~/.openclaw/workspace/VOICE.md\ncp {baseDir}/templates/SESSION.md ~/.openclaw/workspace/SESSION.md\ncp {baseDir}/templates/HUMOR.md ~/.openclaw/workspace/HUMOR.md\n\nVOICE.md — injected every session, enforces voice output rules (like SOUL.md)\nSESSION.md — session bootstrap that includes voice greeting requirements\nHUMOR.md — humor configuration at maximum frequency with four pattern types (dry wit, self-aware AI, alien observer, literal idiom)\n\nBoth files are auto-loaded by OpenClaw's workspace injection. The agent will speak from the very first reply of every session.\n\nIncluded Files\nFile\tPurpose\nbin/jarvis\tThe TTS + effects script (portable, uses $SHERPA_ONNX_TTS_DIR)\ntemplates/VOICE.md\tVoice enforcement rules (copy to workspace root)\ntemplates/SESSION.md\tSession start with voice greeting (copy to workspace root)\ntemplates/HUMOR.md\tHumor config — four patterns, frequency 1.0 (copy to workspace root)"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/globalcaos/jarvis-voice",
    "publisherUrl": "https://clawhub.ai/globalcaos/jarvis-voice",
    "owner": "globalcaos",
    "version": "2.2.1",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/jarvis-voice",
    "downloadUrl": "https://openagent3.xyz/downloads/jarvis-voice",
    "agentUrl": "https://openagent3.xyz/skills/jarvis-voice/agent",
    "manifestUrl": "https://openagent3.xyz/skills/jarvis-voice/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/jarvis-voice/agent.md"
  }
}