{
  "schemaVersion": "1.0",
  "item": {
    "slug": "cosyvoice3-macos",
    "name": "CosyVoice3 macOS",
    "source": "tencent",
    "type": "skill",
    "category": "AI 智能",
    "sourceUrl": "https://clawhub.ai/lhuaizhong/cosyvoice3-macos",
    "canonicalUrl": "https://clawhub.ai/lhuaizhong/cosyvoice3-macos",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/cosyvoice3-macos",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=cosyvoice3-macos",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md",
      "scripts/download_models.py",
      "scripts/install.sh",
      "scripts/tts.py"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/cosyvoice3-macos"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/cosyvoice3-macos",
    "agentPageUrl": "https://openagent3.xyz/skills/cosyvoice3-macos/agent",
    "manifestUrl": "https://openagent3.xyz/skills/cosyvoice3-macos/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/cosyvoice3-macos/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "CosyVoice3 TTS",
        "body": "Local text-to-speech using Alibaba's CosyVoice3 on macOS Apple Silicon."
      },
      {
        "title": "Overview",
        "body": "CosyVoice3 is an advanced TTS system based on large language models, supporting:\n\n9 languages: Chinese, English, Japanese, Korean, German, Spanish, French, Italian, Russian\n18+ Chinese dialects: Cantonese, Sichuan, Dongbei, Shanghai, etc.\nZero-shot voice cloning: Clone any voice from 3-10 seconds of audio\nCross-lingual synthesis: Speak Chinese with English voice or vice versa\nFine-grained control: Emotions, speed, volume via text tags"
      },
      {
        "title": "Prerequisites",
        "body": "macOS with Apple Silicon (M1/M2/M3)\nPython 3.10\nConda installed\n~5GB disk space for models"
      },
      {
        "title": "Installation",
        "body": "Run the installation script:\n\ncd /Users/lhz/.openclaw/workspace/skills/cosyvoice3/scripts\nbash install.sh\n\nThis will:\n\nCreate conda environment cosyvoice\nInstall PyTorch (CPU version for Apple Silicon)\nInstall CosyVoice dependencies\nDownload Fun-CosyVoice3-0.5B model (~2GB)"
      },
      {
        "title": "Quick Start - Basic TTS",
        "body": "重要：CosyVoice3 需要在参考文本中添加 <|endofprompt|> 标记！\n\ncd /Users/lhz/.openclaw/workspace/cosyvoice3-repo\nexport PATH=\"$HOME/miniconda3/bin:$PATH\"\nconda activate cosyvoice\n\npython -c \"\nimport sys\nsys.path.append('third_party/Matcha-TTS')\nfrom cosyvoice.cli.cosyvoice import AutoModel\nimport torchaudio\n\ncosyvoice = AutoModel(model_dir='pretrained_models/Fun-CosyVoice3-0.5B')\nfor i, j in enumerate(cosyvoice.inference_zero_shot(\n    '你好，这是CosyVoice3语音合成测试。', \n    '希望你以后能够做的比我还好呦。<|endofprompt|>',  # 注意这个标记！\n    'asset/zero_shot_prompt.wav'\n)):\n    torchaudio.save('output.wav', j['tts_speech'], cosyvoice.sample_rate)\nprint('Generated: output.wav')\n\""
      },
      {
        "title": "Using the TTS Script",
        "body": "Generate speech from text:\n\ncd /Users/lhz/.openclaw/workspace/skills/cosyvoice3/scripts\nconda activate cosyvoice\n\n# Basic TTS with default voice\npython tts.py \"你好，这是一个测试。\"\n\n# With custom reference audio for voice cloning\npython tts.py \"你好，这是克隆的声音。\" --reference /path/to/reference.wav\n\n# Cross-lingual (English text with Chinese voice)\npython tts.py \"Hello, this is cross-lingual synthesis.\" --reference asset/zero_shot_prompt.wav --lang en\n\n# With speed control\npython tts.py \"这是一段快速的语音。\" --speed 1.5\n\n# Save to specific path\npython tts.py \"你好。\" --output ~/Desktop/greeting.wav"
      },
      {
        "title": "Available Assets",
        "body": "Reference audio files in cosyvoice3-repo/asset/:\n\nzero_shot_prompt.wav - Default Chinese female voice\ncross_lingual_prompt.wav - English prompt for cross-lingual"
      },
      {
        "title": "Voice Cloning",
        "body": "Clone a voice from 3-10 seconds of reference audio:\n\nfrom cosyvoice.cli.cosyvoice import AutoModel\nimport torchaudio\n\ncosyvoice = AutoModel(model_dir='pretrained_models/Fun-CosyVoice3-0.5B')\n\n# Clone voice and generate\nfor i, j in enumerate(cosyvoice.inference_zero_shot(\n    '这是克隆后的声音在说话。',\n    'Reference text transcription',\n    '/path/to/reference.wav'\n)):\n    torchaudio.save('cloned.wav', j['tts_speech'], cosyvoice.sample_rate)"
      },
      {
        "title": "Fine-Grained Control",
        "body": "Control prosody with special tags:\n\n# Add laughter\n\"他突然[laughter]笑了起来[laughter]。\"\n\n# Add breathing\n\"他说完这句话[breath]，深吸一口气。\"\n\n# Strong emphasis\n\"这是<strong>非常重要</strong>的。\"\n\n# Combined\n\"在面对挑战时，他展现了非凡的<strong>勇气</strong>与<strong>智慧</strong>[breath]。\""
      },
      {
        "title": "Dialect Support",
        "body": "Use instruct mode for dialects:\n\ncosyvoice = AutoModel(model_dir='pretrained_models/CosyVoice-300M-Instruct')\n\nfor i, j in enumerate(cosyvoice.inference_instruct(\n    '你好，这是测试语音。',\n    '中文男',\n    '用四川话说这句话<|endofprompt|>'\n)):\n    torchaudio.save('sichuan.wav', j['tts_speech'], cosyvoice.sample_rate)"
      },
      {
        "title": "Model not found",
        "body": "If you get \"model not found\" errors, download models manually:\n\ncd /Users/lhz/.openclaw/workspace/cosyvoice3-repo\nexport PATH=\"$HOME/miniconda3/bin:$PATH\"\nconda activate cosyvoice\n\npython -c \"\nfrom modelscope import snapshot_download\nsnapshot_download('FunAudioLLM/Fun-CosyVoice3-0.5B-2512', local_dir='pretrained_models/Fun-CosyVoice3-0.5B')\n\""
      },
      {
        "title": "Memory issues",
        "body": "For long text, split into sentences:\n\ntext = \"很长的文本...\"\nsentences = text.split('。')\nfor sent in sentences:\n    if sent.strip():\n        # Process each sentence"
      },
      {
        "title": "Audio format",
        "body": "Reference audio requirements:\n\nFormat: WAV, MP3\nSample rate: 16kHz+ (automatically resampled)\nDuration: 3-10 seconds optimal\nContent: Clear speech, minimal background noise"
      },
      {
        "title": "Scripts",
        "body": "install.sh - Installation script for macOS\ntts.py - Main TTS script with CLI interface\ndownload_models.py - Download pretrained models"
      },
      {
        "title": "References",
        "body": "CosyVoice GitHub\nFun-CosyVoice3 Demo"
      },
      {
        "title": "Model Files",
        "body": "Located in cosyvoice3-repo/pretrained_models/:\n\nFun-CosyVoice3-0.5B/ - Main model (recommended)\nCosyVoice2-0.5B/ - Previous version\nCosyVoice-300M/ - Lighter model\nCosyVoice-300M-SFT/ - SFT version\nCosyVoice-300M-Instruct/ - Instruct version"
      },
      {
        "title": "Notes",
        "body": "First inference takes ~30 seconds (model warmup)\nSubsequent inferences are faster\nApple Silicon uses CPU mode (no CUDA)\nRTF (real-time factor) ~0.3-0.5 on M-series chips\nModel files are cached locally after first download"
      }
    ],
    "body": "CosyVoice3 TTS\n\nLocal text-to-speech using Alibaba's CosyVoice3 on macOS Apple Silicon.\n\nOverview\n\nCosyVoice3 is an advanced TTS system based on large language models, supporting:\n\n9 languages: Chinese, English, Japanese, Korean, German, Spanish, French, Italian, Russian\n18+ Chinese dialects: Cantonese, Sichuan, Dongbei, Shanghai, etc.\nZero-shot voice cloning: Clone any voice from 3-10 seconds of audio\nCross-lingual synthesis: Speak Chinese with English voice or vice versa\nFine-grained control: Emotions, speed, volume via text tags\nPrerequisites\nmacOS with Apple Silicon (M1/M2/M3)\nPython 3.10\nConda installed\n~5GB disk space for models\nInstallation\n\nRun the installation script:\n\ncd /Users/lhz/.openclaw/workspace/skills/cosyvoice3/scripts\nbash install.sh\n\n\nThis will:\n\nCreate conda environment cosyvoice\nInstall PyTorch (CPU version for Apple Silicon)\nInstall CosyVoice dependencies\nDownload Fun-CosyVoice3-0.5B model (~2GB)\nUsage\nQuick Start - Basic TTS\n\n重要：CosyVoice3 需要在参考文本中添加 <|endofprompt|> 标记！\n\ncd /Users/lhz/.openclaw/workspace/cosyvoice3-repo\nexport PATH=\"$HOME/miniconda3/bin:$PATH\"\nconda activate cosyvoice\n\npython -c \"\nimport sys\nsys.path.append('third_party/Matcha-TTS')\nfrom cosyvoice.cli.cosyvoice import AutoModel\nimport torchaudio\n\ncosyvoice = AutoModel(model_dir='pretrained_models/Fun-CosyVoice3-0.5B')\nfor i, j in enumerate(cosyvoice.inference_zero_shot(\n    '你好，这是CosyVoice3语音合成测试。', \n    '希望你以后能够做的比我还好呦。<|endofprompt|>',  # 注意这个标记！\n    'asset/zero_shot_prompt.wav'\n)):\n    torchaudio.save('output.wav', j['tts_speech'], cosyvoice.sample_rate)\nprint('Generated: output.wav')\n\"\n\nUsing the TTS Script\n\nGenerate speech from text:\n\ncd /Users/lhz/.openclaw/workspace/skills/cosyvoice3/scripts\nconda activate cosyvoice\n\n# Basic TTS with default voice\npython tts.py \"你好，这是一个测试。\"\n\n# With custom reference audio for voice cloning\npython tts.py \"你好，这是克隆的声音。\" --reference /path/to/reference.wav\n\n# Cross-lingual (English text with Chinese voice)\npython tts.py \"Hello, this is cross-lingual synthesis.\" --reference asset/zero_shot_prompt.wav --lang en\n\n# With speed control\npython tts.py \"这是一段快速的语音。\" --speed 1.5\n\n# Save to specific path\npython tts.py \"你好。\" --output ~/Desktop/greeting.wav\n\nAvailable Assets\n\nReference audio files in cosyvoice3-repo/asset/:\n\nzero_shot_prompt.wav - Default Chinese female voice\ncross_lingual_prompt.wav - English prompt for cross-lingual\nAdvanced Features\nVoice Cloning\n\nClone a voice from 3-10 seconds of reference audio:\n\nfrom cosyvoice.cli.cosyvoice import AutoModel\nimport torchaudio\n\ncosyvoice = AutoModel(model_dir='pretrained_models/Fun-CosyVoice3-0.5B')\n\n# Clone voice and generate\nfor i, j in enumerate(cosyvoice.inference_zero_shot(\n    '这是克隆后的声音在说话。',\n    'Reference text transcription',\n    '/path/to/reference.wav'\n)):\n    torchaudio.save('cloned.wav', j['tts_speech'], cosyvoice.sample_rate)\n\nFine-Grained Control\n\nControl prosody with special tags:\n\n# Add laughter\n\"他突然[laughter]笑了起来[laughter]。\"\n\n# Add breathing\n\"他说完这句话[breath]，深吸一口气。\"\n\n# Strong emphasis\n\"这是<strong>非常重要</strong>的。\"\n\n# Combined\n\"在面对挑战时，他展现了非凡的<strong>勇气</strong>与<strong>智慧</strong>[breath]。\"\n\nDialect Support\n\nUse instruct mode for dialects:\n\ncosyvoice = AutoModel(model_dir='pretrained_models/CosyVoice-300M-Instruct')\n\nfor i, j in enumerate(cosyvoice.inference_instruct(\n    '你好，这是测试语音。',\n    '中文男',\n    '用四川话说这句话<|endofprompt|>'\n)):\n    torchaudio.save('sichuan.wav', j['tts_speech'], cosyvoice.sample_rate)\n\nTroubleshooting\nModel not found\n\nIf you get \"model not found\" errors, download models manually:\n\ncd /Users/lhz/.openclaw/workspace/cosyvoice3-repo\nexport PATH=\"$HOME/miniconda3/bin:$PATH\"\nconda activate cosyvoice\n\npython -c \"\nfrom modelscope import snapshot_download\nsnapshot_download('FunAudioLLM/Fun-CosyVoice3-0.5B-2512', local_dir='pretrained_models/Fun-CosyVoice3-0.5B')\n\"\n\nMemory issues\n\nFor long text, split into sentences:\n\ntext = \"很长的文本...\"\nsentences = text.split('。')\nfor sent in sentences:\n    if sent.strip():\n        # Process each sentence\n\nAudio format\n\nReference audio requirements:\n\nFormat: WAV, MP3\nSample rate: 16kHz+ (automatically resampled)\nDuration: 3-10 seconds optimal\nContent: Clear speech, minimal background noise\nResources\nScripts\ninstall.sh - Installation script for macOS\ntts.py - Main TTS script with CLI interface\ndownload_models.py - Download pretrained models\nReferences\nCosyVoice GitHub\nFun-CosyVoice3 Demo\nModel Files\n\nLocated in cosyvoice3-repo/pretrained_models/:\n\nFun-CosyVoice3-0.5B/ - Main model (recommended)\nCosyVoice2-0.5B/ - Previous version\nCosyVoice-300M/ - Lighter model\nCosyVoice-300M-SFT/ - SFT version\nCosyVoice-300M-Instruct/ - Instruct version\nNotes\nFirst inference takes ~30 seconds (model warmup)\nSubsequent inferences are faster\nApple Silicon uses CPU mode (no CUDA)\nRTF (real-time factor) ~0.3-0.5 on M-series chips\nModel files are cached locally after first download"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/lhuaizhong/cosyvoice3-macos",
    "publisherUrl": "https://clawhub.ai/lhuaizhong/cosyvoice3-macos",
    "owner": "lhuaizhong",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/cosyvoice3-macos",
    "downloadUrl": "https://openagent3.xyz/downloads/cosyvoice3-macos",
    "agentUrl": "https://openagent3.xyz/skills/cosyvoice3-macos/agent",
    "manifestUrl": "https://openagent3.xyz/skills/cosyvoice3-macos/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/cosyvoice3-macos/agent.md"
  }
}