{
  "schemaVersion": "1.0",
  "item": {
    "slug": "qwen3-tts-voicedesign",
    "name": "Qwen3-TTS VoiceDesign",
    "source": "tencent",
    "type": "skill",
    "category": "AI 智能",
    "sourceUrl": "https://clawhub.ai/xiaoyaner0201/qwen3-tts-voicedesign",
    "canonicalUrl": "https://clawhub.ai/xiaoyaner0201/qwen3-tts-voicedesign",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/qwen3-tts-voicedesign",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=qwen3-tts-voicedesign",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md",
      "scripts/batch_seeds.sh",
      "scripts/say.sh",
      "scripts/setup.sh",
      "scripts/tts_server.py"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/qwen3-tts-voicedesign"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/qwen3-tts-voicedesign",
    "agentPageUrl": "https://openagent3.xyz/skills/qwen3-tts-voicedesign/agent",
    "manifestUrl": "https://openagent3.xyz/skills/qwen3-tts-voicedesign/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/qwen3-tts-voicedesign/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Qwen3-TTS VoiceDesign",
        "body": "Text → Speech with natural language voice descriptions + seed-based timbre fixation."
      },
      {
        "title": "Quick Start",
        "body": "# Generate speech (uses server defaults)\nTTS_URL=http://your-server:8881 scripts/say.sh \"Hello world!\"\n\n# Save to file\nscripts/say.sh \"Save this\" output.mp3\n\n# Batch compare seeds (voice exploration)\nscripts/batch_seeds.sh \"Hello world!\" 42 123 201 456 789 /tmp/seeds"
      },
      {
        "title": "Environment Variables",
        "body": "All config via env vars — text is the only required argument:\n\nVariableDefaultDescriptionTTS_URLhttp://localhost:8881Server base URL (client side)TTS_SEED4096Random seed → controls timbreTTS_INSTRUCT(generic female voice)Voice description promptTTS_MODEL_PATHQwen/Qwen3-TTS-12Hz-1.7B-VoiceDesignModel weights pathTTS_PORT8881Server listen portTTS_HOST0.0.0.0Server bind addressTTS_FORMATmp3Output format: mp3 / wav\n\nServer reads from .env file in its directory. Client scripts read from shell env."
      },
      {
        "title": "Voice Description Example",
        "body": "30岁男性播音员，声音低沉磁性，\n语速稳重从容，咬字清晰标准，\n像新闻联播主播的专业感，又带一点温暖。\n\nTip: Once you've found your perfect voice (description + seed), set them as server defaults in .env. Then client calls only need to pass text."
      },
      {
        "title": "OpenAI-Compatible",
        "body": "curl -X POST $TTS_URL/v1/audio/speech \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"input\": \"Hello!\"}' -o speech.mp3"
      },
      {
        "title": "Custom (seed + instruct override)",
        "body": "curl -X POST $TTS_URL/tts \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"text\": \"Hello!\", \"seed\": 201, \"instruct\": \"温柔女生\"}' -o speech.mp3"
      },
      {
        "title": "GET (quick test)",
        "body": "curl \"$TTS_URL/tts?text=Hello&seed=201\" -o test.mp3"
      },
      {
        "title": "Seed Mechanics",
        "body": "Same (description + seed) → same timbre. Different seeds → completely different voices.\n\n⚠️ Seeds are purely random — seed 42 and 43 can sound completely different. Finding a voice = opening blind boxes.\n\nWorkflow: fix description → batch 30-40 seeds → listen → shortlist 2-3 → compare across scenarios → pick."
      },
      {
        "title": "Deploy Your Own",
        "body": "# One-click setup (Python 3.10+ and CUDA GPU required)\nbash scripts/setup.sh ./my-tts\n\n# Configure voice in .env\necho 'TTS_SEED=201' >> ./my-tts/.env\necho 'TTS_INSTRUCT=Your voice description here' >> ./my-tts/.env\n\n# Start server\nbash scripts/setup.sh start ./my-tts\n\nSetup installs: qwen-tts, soundfile, pydub, uvicorn, fastapi, torch (CUDA).\nDownloads VoiceDesign model (~3.5GB) via ModelScope (China) or HuggingFace.\n\nRequirements: CUDA GPU with 4GB+ VRAM, Python 3.10+, ~4GB disk."
      },
      {
        "title": "Scripts",
        "body": "ScriptPurposescripts/say.shGenerate speech — say.sh \"text\" [output.mp3]scripts/batch_seeds.shCompare seeds — batch_seeds.sh \"text\" seed1 seed2 ...scripts/tts_server.pyFastAPI server (fully env-configurable)scripts/setup.shOne-click deploy (venv + deps + model download)"
      },
      {
        "title": "OpenClaw Integration",
        "body": "In openclaw.json:\n\n{\n  \"env\": { \"OPENAI_TTS_BASE_URL\": \"http://<your-server>:8881/v1\" },\n  \"messages\": {\n    \"tts\": {\n      \"provider\": \"openai\",\n      \"openai\": { \"apiKey\": \"dummy\", \"model\": \"qwen3-tts\", \"voice\": \"default\" },\n      \"timeoutMs\": 120000\n    }\n  }\n}"
      },
      {
        "title": "Server Management",
        "body": "# Health check\ncurl -s $TTS_URL/health\n\n# Start (foreground)\npython tts_server.py\n\n# Start (background, Linux/macOS)\nnohup python tts_server.py > server.log 2>&1 &\n\n# Auto-restart (Windows — scheduled task + guard script)\n# Create tts_guard.bat:\n#   @echo off\n#   :loop\n#   python tts_server.py\n#   timeout /t 10\n#   goto loop\n# Register: schtasks /create /tn \"TTS-Guard\" /tr \"tts_guard.bat\" /sc onlogon /rl highest\n\n# Auto-restart (Linux — systemd)\n# See setup.sh output for systemd unit template\n\n# Stop\n# Linux/macOS: kill $(lsof -ti:8881)\n# Windows: for /f \"tokens=5\" %a in ('netstat -aon ^| findstr :8881') do taskkill /PID %a /F"
      },
      {
        "title": "Troubleshooting",
        "body": "Connection refused → Server not running; start it\n30s+ first request → Cold start (model loading ~60s); subsequent requests 10-15s\nBehind proxy → Set NO_PROXY=<server_ip> on client side\nWindows firewall → netsh advfirewall firewall add rule name=\"TTS\" dir=in action=allow protocol=TCP localport=8881\nNo flash-attn on Windows → Expected; falls back to PyTorch SDPA (slower but works)\nPowerShell corrupts Chinese → Edit .env/config via Python or SCP, not PowerShell Set-Content\nProcess dies on SSH disconnect → Use scheduled task (Windows) or systemd (Linux) instead of foreground"
      },
      {
        "title": "Voice Design Tips",
        "body": "Describe like casting a voice actor:\n\nAge/gender: \"18岁女大学生\" / \"30岁男性播音员\"\nTexture: \"柔和温暖\" / \"清脆明亮\" / \"低沉磁性\"\nEmotion: \"轻柔细腻\" / \"活泼开朗\"\nAccent: \"南方口音软糯\" / \"台湾腔\" / \"东北大碴子味\"\nMetaphor: \"像棉花糖\" / \"像播音主持\" (helps the model capture feeling)\n\n⚠️ Timbre ≠ description. Description controls style/emotion; seed controls timbre. Don't put personality traits (\"灵动俏皮\") in description — that's the seed's job."
      }
    ],
    "body": "Qwen3-TTS VoiceDesign\n\nText → Speech with natural language voice descriptions + seed-based timbre fixation.\n\nQuick Start\n# Generate speech (uses server defaults)\nTTS_URL=http://your-server:8881 scripts/say.sh \"Hello world!\"\n\n# Save to file\nscripts/say.sh \"Save this\" output.mp3\n\n# Batch compare seeds (voice exploration)\nscripts/batch_seeds.sh \"Hello world!\" 42 123 201 456 789 /tmp/seeds\n\nEnvironment Variables\n\nAll config via env vars — text is the only required argument:\n\nVariable\tDefault\tDescription\nTTS_URL\thttp://localhost:8881\tServer base URL (client side)\nTTS_SEED\t4096\tRandom seed → controls timbre\nTTS_INSTRUCT\t(generic female voice)\tVoice description prompt\nTTS_MODEL_PATH\tQwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign\tModel weights path\nTTS_PORT\t8881\tServer listen port\nTTS_HOST\t0.0.0.0\tServer bind address\nTTS_FORMAT\tmp3\tOutput format: mp3 / wav\n\nServer reads from .env file in its directory. Client scripts read from shell env.\n\nVoice Description Example\n30岁男性播音员，声音低沉磁性，\n语速稳重从容，咬字清晰标准，\n像新闻联播主播的专业感，又带一点温暖。\n\n\nTip: Once you've found your perfect voice (description + seed), set them as server defaults in .env. Then client calls only need to pass text.\n\nAPI\nOpenAI-Compatible\ncurl -X POST $TTS_URL/v1/audio/speech \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"input\": \"Hello!\"}' -o speech.mp3\n\nCustom (seed + instruct override)\ncurl -X POST $TTS_URL/tts \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"text\": \"Hello!\", \"seed\": 201, \"instruct\": \"温柔女生\"}' -o speech.mp3\n\nGET (quick test)\ncurl \"$TTS_URL/tts?text=Hello&seed=201\" -o test.mp3\n\nSeed Mechanics\n\nSame (description + seed) → same timbre. Different seeds → completely different voices.\n\n⚠️ Seeds are purely random — seed 42 and 43 can sound completely different. Finding a voice = opening blind boxes.\n\nWorkflow: fix description → batch 30-40 seeds → listen → shortlist 2-3 → compare across scenarios → pick.\n\nDeploy Your Own\n# One-click setup (Python 3.10+ and CUDA GPU required)\nbash scripts/setup.sh ./my-tts\n\n# Configure voice in .env\necho 'TTS_SEED=201' >> ./my-tts/.env\necho 'TTS_INSTRUCT=Your voice description here' >> ./my-tts/.env\n\n# Start server\nbash scripts/setup.sh start ./my-tts\n\n\nSetup installs: qwen-tts, soundfile, pydub, uvicorn, fastapi, torch (CUDA). Downloads VoiceDesign model (~3.5GB) via ModelScope (China) or HuggingFace.\n\nRequirements: CUDA GPU with 4GB+ VRAM, Python 3.10+, ~4GB disk.\n\nScripts\nScript\tPurpose\nscripts/say.sh\tGenerate speech — say.sh \"text\" [output.mp3]\nscripts/batch_seeds.sh\tCompare seeds — batch_seeds.sh \"text\" seed1 seed2 ...\nscripts/tts_server.py\tFastAPI server (fully env-configurable)\nscripts/setup.sh\tOne-click deploy (venv + deps + model download)\nOpenClaw Integration\n\nIn openclaw.json:\n\n{\n  \"env\": { \"OPENAI_TTS_BASE_URL\": \"http://<your-server>:8881/v1\" },\n  \"messages\": {\n    \"tts\": {\n      \"provider\": \"openai\",\n      \"openai\": { \"apiKey\": \"dummy\", \"model\": \"qwen3-tts\", \"voice\": \"default\" },\n      \"timeoutMs\": 120000\n    }\n  }\n}\n\nServer Management\n# Health check\ncurl -s $TTS_URL/health\n\n# Start (foreground)\npython tts_server.py\n\n# Start (background, Linux/macOS)\nnohup python tts_server.py > server.log 2>&1 &\n\n# Auto-restart (Windows — scheduled task + guard script)\n# Create tts_guard.bat:\n#   @echo off\n#   :loop\n#   python tts_server.py\n#   timeout /t 10\n#   goto loop\n# Register: schtasks /create /tn \"TTS-Guard\" /tr \"tts_guard.bat\" /sc onlogon /rl highest\n\n# Auto-restart (Linux — systemd)\n# See setup.sh output for systemd unit template\n\n# Stop\n# Linux/macOS: kill $(lsof -ti:8881)\n# Windows: for /f \"tokens=5\" %a in ('netstat -aon ^| findstr :8881') do taskkill /PID %a /F\n\nTroubleshooting\nConnection refused → Server not running; start it\n30s+ first request → Cold start (model loading ~60s); subsequent requests 10-15s\nBehind proxy → Set NO_PROXY=<server_ip> on client side\nWindows firewall → netsh advfirewall firewall add rule name=\"TTS\" dir=in action=allow protocol=TCP localport=8881\nNo flash-attn on Windows → Expected; falls back to PyTorch SDPA (slower but works)\nPowerShell corrupts Chinese → Edit .env/config via Python or SCP, not PowerShell Set-Content\nProcess dies on SSH disconnect → Use scheduled task (Windows) or systemd (Linux) instead of foreground\nVoice Design Tips\n\nDescribe like casting a voice actor:\n\nAge/gender: \"18岁女大学生\" / \"30岁男性播音员\"\nTexture: \"柔和温暖\" / \"清脆明亮\" / \"低沉磁性\"\nEmotion: \"轻柔细腻\" / \"活泼开朗\"\nAccent: \"南方口音软糯\" / \"台湾腔\" / \"东北大碴子味\"\nMetaphor: \"像棉花糖\" / \"像播音主持\" (helps the model capture feeling)\n\n⚠️ Timbre ≠ description. Description controls style/emotion; seed controls timbre. Don't put personality traits (\"灵动俏皮\") in description — that's the seed's job."
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/xiaoyaner0201/qwen3-tts-voicedesign",
    "publisherUrl": "https://clawhub.ai/xiaoyaner0201/qwen3-tts-voicedesign",
    "owner": "xiaoyaner0201",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/qwen3-tts-voicedesign",
    "downloadUrl": "https://openagent3.xyz/downloads/qwen3-tts-voicedesign",
    "agentUrl": "https://openagent3.xyz/skills/qwen3-tts-voicedesign/agent",
    "manifestUrl": "https://openagent3.xyz/skills/qwen3-tts-voicedesign/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/qwen3-tts-voicedesign/agent.md"
  }
}