{
  "schemaVersion": "1.0",
  "item": {
    "slug": "qwen3-tts-mlx",
    "name": "Qwen3 Tts Mlx",
    "source": "tencent",
    "type": "skill",
    "category": "AI 智能",
    "sourceUrl": "https://clawhub.ai/h1bomb/qwen3-tts-mlx",
    "canonicalUrl": "https://clawhub.ai/h1bomb/qwen3-tts-mlx",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/qwen3-tts-mlx",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=qwen3-tts-mlx",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md",
      "references/dubbing_format.md",
      "scripts/batch_dubbing.py",
      "scripts/run_tts.py"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/qwen3-tts-mlx"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/qwen3-tts-mlx",
    "agentPageUrl": "https://openagent3.xyz/skills/qwen3-tts-mlx/agent",
    "manifestUrl": "https://openagent3.xyz/skills/qwen3-tts-mlx/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/qwen3-tts-mlx/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Qwen3-TTS MLX",
        "body": "Run Qwen3-TTS locally on Apple Silicon (M1/M2/M3/M4) using MLX. Supports 11 languages, 9 built-in voices, voice cloning, and voice design from text descriptions."
      },
      {
        "title": "When to Use",
        "body": "Generate speech fully offline on a Mac\nProduce narration, audiobooks, podcasts, or video voiceovers\nCreate multilingual TTS with controllable style and emotion\nClone any voice from a short audio sample\nDesign custom voices from text descriptions"
      },
      {
        "title": "Install",
        "body": "pip install mlx-audio\nbrew install ffmpeg"
      },
      {
        "title": "Basic Usage",
        "body": "python scripts/run_tts.py custom-voice \\\n  --text \"Hello, welcome to local text to speech.\" \\\n  --voice Ryan \\\n  --output output.wav"
      },
      {
        "title": "With Style Control",
        "body": "python scripts/run_tts.py custom-voice \\\n  --text \"Breaking news: local AI model achieves human-level speech.\" \\\n  --voice Uncle_Fu \\\n  --instruct \"news anchor tone, calm and authoritative\" \\\n  --output news.wav"
      },
      {
        "title": "Model Variants",
        "body": "VariantModelSizeMemoryUse CaseCustomVoicemlx-community/Qwen3-TTS-12Hz-0.6B-CustomVoice-4bit~1GB~4GBBuilt-in voices + style control (recommended)VoiceDesignmlx-community/Qwen3-TTS-12Hz-1.7B-VoiceDesign-5bit~2GB~5GBCreate voices from text descriptionsBasemlx-community/Qwen3-TTS-12Hz-0.6B-Base-4bit~1GB~4GBVoice cloning from reference audio"
      },
      {
        "title": "Supported Languages",
        "body": "LanguageCodeNotesAuto-detectautoDefault, detects from textChineseChineseMandarinEnglishEnglishJapaneseJapaneseKoreanKoreanFrenchFrenchGermanGermanSpanishSpanishPortuguesePortugueseItalianItalianRussianRussian"
      },
      {
        "title": "Built-in Voices",
        "body": "VoiceLanguageCharacterVivianChineseFemale, bright, youngSerenaChineseFemale, gentle, softUncle_FuChineseMale, authoritative, news anchorDylanChineseMale, Beijing dialectEricChineseMale, Sichuan dialectRyanEnglishMale, energeticAidenEnglishMale, clear, neutralOno_AnnaJapaneseFemaleSoheeKoreanFemale\n\nVoice Selection Guide:\n\nScenarioRecommended VoiceChinese news/narrationUncle_FuChinese casual/livelyEricChinese female, professionalVivianChinese female, storytellingSerenaEnglish energetic contentRyanEnglish neutral/educationalAidenJapanese contentOno_AnnaKorean contentSohee"
      },
      {
        "title": "1) CustomVoice",
        "body": "Use built-in voices with optional emotion/style control via --instruct.\n\npython scripts/run_tts.py custom-voice \\\n  --text \"This is amazing news!\" \\\n  --voice Vivian \\\n  --instruct \"excited and happy\" \\\n  --output excited.wav\n\nStyle instruction examples:\n\n\"calm and warm\" - Soft, friendly delivery\n\"news anchor, authoritative\" - Professional broadcast style\n\"excited and energetic\" - High energy, enthusiastic\n\"sad and melancholic\" - Emotional, somber tone\n\"whispering, intimate\" - Quiet, close-mic feel"
      },
      {
        "title": "2) VoiceDesign",
        "body": "Create a completely new voice by describing it in natural language.\n\npython scripts/run_tts.py voice-design \\\n  --text \"Welcome to our podcast.\" \\\n  --instruct \"warm, mature male narrator with low pitch and gentle tone\" \\\n  --output podcast_intro.wav\n\nVoice description examples:\n\n\"young cheerful female with high pitch\"\n\"elderly wise male with deep resonant voice\"\n\"professional female news anchor, clear articulation\"\n\"friendly young male, casual and relaxed\""
      },
      {
        "title": "3) VoiceClone",
        "body": "Clone any voice from a reference audio sample (5-10 seconds recommended).\n\npython scripts/run_tts.py voice-clone \\\n  --text \"This is my cloned voice speaking new content.\" \\\n  --ref_audio reference.wav \\\n  --ref_text \"The exact transcript of the reference audio\" \\\n  --output cloned.wav\n\nTips for voice cloning:\n\nUse clean audio without background noise\n5-10 seconds of speech works best\nProvide accurate transcript of the reference\nReference and output language should match"
      },
      {
        "title": "CLI Parameters",
        "body": "ParameterRequiredDefaultDescription--textYes-Text to synthesize--voiceNoVivianBuilt-in voice (CustomVoice only)--lang_codeNoautoLanguage code--instructNo-Style control or voice description--speedNo1.0Speech speed multiplier--temperatureNo0.7Sampling temperature (higher = more variation)--modelNo(per mode)Override default model--outputNo-Output file path--out-dirNo./outputsOutput directory when --output not set--ref_audioVoiceClone-Reference audio file--ref_textVoiceClone-Reference audio transcript"
      },
      {
        "title": "Using generate_audio (recommended)",
        "body": "from mlx_audio.tts.generate import generate_audio\n\n# CustomVoice with style control\ngenerate_audio(\n    text=\"Hello from Qwen3-TTS!\",\n    model=\"mlx-community/Qwen3-TTS-12Hz-0.6B-CustomVoice-4bit\",\n    voice=\"Ryan\",\n    lang_code=\"english\",\n    instruct=\"friendly and warm\",\n    output_path=\".\",\n    file_prefix=\"hello\",\n    audio_format=\"wav\",\n    join_audio=True,\n    verbose=True,\n)"
      },
      {
        "title": "Using Model directly",
        "body": "from mlx_audio.tts.utils import load\nimport soundfile as sf\nimport numpy as np\n\n# Load model\nmodel = load(\"mlx-community/Qwen3-TTS-12Hz-0.6B-CustomVoice-4bit\")\n\n# Generate audio (returns a generator)\naudio_chunks = []\nfor chunk in model.generate_custom_voice(\n    text=\"Hello from Qwen3-TTS.\",\n    speaker=\"Ryan\",\n    language=\"english\",\n    instruct=\"clear, steady delivery\"\n):\n    if hasattr(chunk, 'audio') and chunk.audio is not None:\n        audio_chunks.append(chunk.audio)\n\n# Combine and save\naudio = np.concatenate(audio_chunks)\nsf.write(\"output.wav\", audio, 24000)"
      },
      {
        "title": "VoiceDesign",
        "body": "from mlx_audio.tts.generate import generate_audio\n\ngenerate_audio(\n    text=\"Welcome to the show.\",\n    model=\"mlx-community/Qwen3-TTS-12Hz-1.7B-VoiceDesign-5bit\",\n    instruct=\"warm, friendly female narrator with medium pitch\",\n    lang_code=\"english\",\n    output_path=\".\",\n    file_prefix=\"voice_design\",\n    join_audio=True,\n)"
      },
      {
        "title": "VoiceClone",
        "body": "from mlx_audio.tts.generate import generate_audio\n\ngenerate_audio(\n    text=\"New content in the cloned voice.\",\n    model=\"mlx-community/Qwen3-TTS-12Hz-0.6B-Base-4bit\",\n    ref_audio=\"reference.wav\",\n    ref_text=\"Transcript of the reference audio\",\n    output_path=\".\",\n    file_prefix=\"cloned\",\n    join_audio=True,\n)"
      },
      {
        "title": "Batch Processing",
        "body": "Use scripts/batch_dubbing.py for processing multiple lines:\n\npython scripts/batch_dubbing.py \\\n  --input dubbing.json \\\n  --out-dir outputs\n\nSee references/dubbing_format.md for the JSON format."
      },
      {
        "title": "Performance",
        "body": "MetricValueSample rate24,000 HzReal-time factor~0.7x (faster than real-time)Peak memory~4-6 GBFirst runDownloads model (~1-2GB)"
      },
      {
        "title": "Troubleshooting",
        "body": "IssueSolutionSlow generationUse 4-bit CustomVoice modelUnnatural pausesAdd punctuation, keep sentences shortWrong language detectedSpecify --lang_code explicitlyVoice cloning qualityUse cleaner reference audio, accurate transcriptTokenizer warningsHarmless, can be ignoredOut of memoryClose other apps, use 4-bit model"
      }
    ],
    "body": "Qwen3-TTS MLX\n\nRun Qwen3-TTS locally on Apple Silicon (M1/M2/M3/M4) using MLX. Supports 11 languages, 9 built-in voices, voice cloning, and voice design from text descriptions.\n\nWhen to Use\nGenerate speech fully offline on a Mac\nProduce narration, audiobooks, podcasts, or video voiceovers\nCreate multilingual TTS with controllable style and emotion\nClone any voice from a short audio sample\nDesign custom voices from text descriptions\nQuick Start\nInstall\npip install mlx-audio\nbrew install ffmpeg\n\nBasic Usage\npython scripts/run_tts.py custom-voice \\\n  --text \"Hello, welcome to local text to speech.\" \\\n  --voice Ryan \\\n  --output output.wav\n\nWith Style Control\npython scripts/run_tts.py custom-voice \\\n  --text \"Breaking news: local AI model achieves human-level speech.\" \\\n  --voice Uncle_Fu \\\n  --instruct \"news anchor tone, calm and authoritative\" \\\n  --output news.wav\n\nModel Variants\nVariant\tModel\tSize\tMemory\tUse Case\nCustomVoice\tmlx-community/Qwen3-TTS-12Hz-0.6B-CustomVoice-4bit\t~1GB\t~4GB\tBuilt-in voices + style control (recommended)\nVoiceDesign\tmlx-community/Qwen3-TTS-12Hz-1.7B-VoiceDesign-5bit\t~2GB\t~5GB\tCreate voices from text descriptions\nBase\tmlx-community/Qwen3-TTS-12Hz-0.6B-Base-4bit\t~1GB\t~4GB\tVoice cloning from reference audio\nSupported Languages\nLanguage\tCode\tNotes\nAuto-detect\tauto\tDefault, detects from text\nChinese\tChinese\tMandarin\nEnglish\tEnglish\t\nJapanese\tJapanese\t\nKorean\tKorean\t\nFrench\tFrench\t\nGerman\tGerman\t\nSpanish\tSpanish\t\nPortuguese\tPortuguese\t\nItalian\tItalian\t\nRussian\tRussian\t\nBuilt-in Voices\nVoice\tLanguage\tCharacter\nVivian\tChinese\tFemale, bright, young\nSerena\tChinese\tFemale, gentle, soft\nUncle_Fu\tChinese\tMale, authoritative, news anchor\nDylan\tChinese\tMale, Beijing dialect\nEric\tChinese\tMale, Sichuan dialect\nRyan\tEnglish\tMale, energetic\nAiden\tEnglish\tMale, clear, neutral\nOno_Anna\tJapanese\tFemale\nSohee\tKorean\tFemale\n\nVoice Selection Guide:\n\nScenario\tRecommended Voice\nChinese news/narration\tUncle_Fu\nChinese casual/lively\tEric\nChinese female, professional\tVivian\nChinese female, storytelling\tSerena\nEnglish energetic content\tRyan\nEnglish neutral/educational\tAiden\nJapanese content\tOno_Anna\nKorean content\tSohee\nModes\n1) CustomVoice\n\nUse built-in voices with optional emotion/style control via --instruct.\n\npython scripts/run_tts.py custom-voice \\\n  --text \"This is amazing news!\" \\\n  --voice Vivian \\\n  --instruct \"excited and happy\" \\\n  --output excited.wav\n\n\nStyle instruction examples:\n\n\"calm and warm\" - Soft, friendly delivery\n\"news anchor, authoritative\" - Professional broadcast style\n\"excited and energetic\" - High energy, enthusiastic\n\"sad and melancholic\" - Emotional, somber tone\n\"whispering, intimate\" - Quiet, close-mic feel\n2) VoiceDesign\n\nCreate a completely new voice by describing it in natural language.\n\npython scripts/run_tts.py voice-design \\\n  --text \"Welcome to our podcast.\" \\\n  --instruct \"warm, mature male narrator with low pitch and gentle tone\" \\\n  --output podcast_intro.wav\n\n\nVoice description examples:\n\n\"young cheerful female with high pitch\"\n\"elderly wise male with deep resonant voice\"\n\"professional female news anchor, clear articulation\"\n\"friendly young male, casual and relaxed\"\n3) VoiceClone\n\nClone any voice from a reference audio sample (5-10 seconds recommended).\n\npython scripts/run_tts.py voice-clone \\\n  --text \"This is my cloned voice speaking new content.\" \\\n  --ref_audio reference.wav \\\n  --ref_text \"The exact transcript of the reference audio\" \\\n  --output cloned.wav\n\n\nTips for voice cloning:\n\nUse clean audio without background noise\n5-10 seconds of speech works best\nProvide accurate transcript of the reference\nReference and output language should match\nCLI Parameters\nParameter\tRequired\tDefault\tDescription\n--text\tYes\t-\tText to synthesize\n--voice\tNo\tVivian\tBuilt-in voice (CustomVoice only)\n--lang_code\tNo\tauto\tLanguage code\n--instruct\tNo\t-\tStyle control or voice description\n--speed\tNo\t1.0\tSpeech speed multiplier\n--temperature\tNo\t0.7\tSampling temperature (higher = more variation)\n--model\tNo\t(per mode)\tOverride default model\n--output\tNo\t-\tOutput file path\n--out-dir\tNo\t./outputs\tOutput directory when --output not set\n--ref_audio\tVoiceClone\t-\tReference audio file\n--ref_text\tVoiceClone\t-\tReference audio transcript\nPython API\nUsing generate_audio (recommended)\nfrom mlx_audio.tts.generate import generate_audio\n\n# CustomVoice with style control\ngenerate_audio(\n    text=\"Hello from Qwen3-TTS!\",\n    model=\"mlx-community/Qwen3-TTS-12Hz-0.6B-CustomVoice-4bit\",\n    voice=\"Ryan\",\n    lang_code=\"english\",\n    instruct=\"friendly and warm\",\n    output_path=\".\",\n    file_prefix=\"hello\",\n    audio_format=\"wav\",\n    join_audio=True,\n    verbose=True,\n)\n\nUsing Model directly\nfrom mlx_audio.tts.utils import load\nimport soundfile as sf\nimport numpy as np\n\n# Load model\nmodel = load(\"mlx-community/Qwen3-TTS-12Hz-0.6B-CustomVoice-4bit\")\n\n# Generate audio (returns a generator)\naudio_chunks = []\nfor chunk in model.generate_custom_voice(\n    text=\"Hello from Qwen3-TTS.\",\n    speaker=\"Ryan\",\n    language=\"english\",\n    instruct=\"clear, steady delivery\"\n):\n    if hasattr(chunk, 'audio') and chunk.audio is not None:\n        audio_chunks.append(chunk.audio)\n\n# Combine and save\naudio = np.concatenate(audio_chunks)\nsf.write(\"output.wav\", audio, 24000)\n\nVoiceDesign\nfrom mlx_audio.tts.generate import generate_audio\n\ngenerate_audio(\n    text=\"Welcome to the show.\",\n    model=\"mlx-community/Qwen3-TTS-12Hz-1.7B-VoiceDesign-5bit\",\n    instruct=\"warm, friendly female narrator with medium pitch\",\n    lang_code=\"english\",\n    output_path=\".\",\n    file_prefix=\"voice_design\",\n    join_audio=True,\n)\n\nVoiceClone\nfrom mlx_audio.tts.generate import generate_audio\n\ngenerate_audio(\n    text=\"New content in the cloned voice.\",\n    model=\"mlx-community/Qwen3-TTS-12Hz-0.6B-Base-4bit\",\n    ref_audio=\"reference.wav\",\n    ref_text=\"Transcript of the reference audio\",\n    output_path=\".\",\n    file_prefix=\"cloned\",\n    join_audio=True,\n)\n\nBatch Processing\n\nUse scripts/batch_dubbing.py for processing multiple lines:\n\npython scripts/batch_dubbing.py \\\n  --input dubbing.json \\\n  --out-dir outputs\n\n\nSee references/dubbing_format.md for the JSON format.\n\nPerformance\nMetric\tValue\nSample rate\t24,000 Hz\nReal-time factor\t~0.7x (faster than real-time)\nPeak memory\t~4-6 GB\nFirst run\tDownloads model (~1-2GB)\nTroubleshooting\nIssue\tSolution\nSlow generation\tUse 4-bit CustomVoice model\nUnnatural pauses\tAdd punctuation, keep sentences short\nWrong language detected\tSpecify --lang_code explicitly\nVoice cloning quality\tUse cleaner reference audio, accurate transcript\nTokenizer warnings\tHarmless, can be ignored\nOut of memory\tClose other apps, use 4-bit model"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/h1bomb/qwen3-tts-mlx",
    "publisherUrl": "https://clawhub.ai/h1bomb/qwen3-tts-mlx",
    "owner": "h1bomb",
    "version": "2.1.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/qwen3-tts-mlx",
    "downloadUrl": "https://openagent3.xyz/downloads/qwen3-tts-mlx",
    "agentUrl": "https://openagent3.xyz/skills/qwen3-tts-mlx/agent",
    "manifestUrl": "https://openagent3.xyz/skills/qwen3-tts-mlx/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/qwen3-tts-mlx/agent.md"
  }
}