{
  "schemaVersion": "1.0",
  "item": {
    "slug": "qwenspeak",
    "name": "qwenspeak",
    "source": "tencent",
    "type": "skill",
    "category": "AI 智能",
    "sourceUrl": "https://clawhub.ai/psyb0t/qwenspeak",
    "canonicalUrl": "https://clawhub.ai/psyb0t/qwenspeak",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/qwenspeak",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=qwenspeak",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md",
      "references/setup.md",
      "scripts/qwenspeak.sh"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/qwenspeak"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/qwenspeak",
    "agentPageUrl": "https://openagent3.xyz/skills/qwenspeak/agent",
    "manifestUrl": "https://openagent3.xyz/skills/qwenspeak/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/qwenspeak/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "qwenspeak",
        "body": "YAML-driven text-to-speech over SSH using Qwen3-TTS models.\n\nFor installation and deployment, see references/setup.md."
      },
      {
        "title": "SSH Wrapper",
        "body": "Use scripts/qwenspeak.sh for all commands. It handles host, port, and host key acceptance via QWENSPEAK_HOST and QWENSPEAK_PORT env vars.\n\nscripts/qwenspeak.sh <command> [args]\nscripts/qwenspeak.sh <command> < input_file\nscripts/qwenspeak.sh <command> > output_file"
      },
      {
        "title": "TTS Generation",
        "body": "Submit YAML, get a job UUID back immediately, poll for progress. Jobs run sequentially — one at a time, the rest queue up.\n\n# Get the YAML template\nscripts/qwenspeak.sh \"tts print-yaml\" > job.yaml\n\n# Submit job\nscripts/qwenspeak.sh \"tts\" < job.yaml\n# {\"id\": \"550e8400-...\", \"status\": \"queued\", \"total_steps\": 3, \"total_generations\": 7}\n\n# Check progress\nscripts/qwenspeak.sh \"tts get-job 550e8400\"\n\n# Follow job log\nscripts/qwenspeak.sh \"tts get-job-log 550e8400 -f\"\n\n# Download result\nscripts/qwenspeak.sh \"get hello.wav\" > hello.wav"
      },
      {
        "title": "YAML Structure",
        "body": "Global settings + list of steps. Each step loads a model, runs all its generations, then unloads. Settings cascade: global > step > generation.\n\nsteps:\n  - mode: custom-voice\n    model_size: 1.7b\n    speaker: Ryan\n    language: English\n    generate:\n      - text: \"Hello world\"\n        output: hello.wav\n      - text: \"I cannot believe this!\"\n        speaker: Vivian\n        instruct: \"Speak angrily\"\n        output: angry.wav\n\n  - mode: voice-design\n    generate:\n      - text: \"Welcome to our store.\"\n        instruct: \"A warm, friendly young female voice with a cheerful tone\"\n        output: welcome.wav\n\n  - mode: voice-clone\n    model_size: 1.7b\n    ref_audio: ref.wav\n    ref_text: \"Transcript of reference\"\n    generate:\n      - text: \"First line in cloned voice\"\n        output: clone1.wav\n      - text: \"Second line\"\n        output: clone2.wav"
      },
      {
        "title": "Modes",
        "body": "custom-voice — Pick from 9 preset speakers. 1.7B supports emotion/style via instruct.\n\nvoice-design — Describe the voice in natural language via instruct. 1.7B only.\n\nvoice-clone — Clone from reference audio. Set ref_audio and ref_text at step level to reuse across generations. x_vector_only: true skips transcript."
      },
      {
        "title": "Emotion trick for cloned voices",
        "body": "Upload references with different emotions, use separate steps:\n\nscripts/qwenspeak.sh \"create-dir refs\"\nscripts/qwenspeak.sh \"put refs/happy.wav\" < me_happy.wav\nscripts/qwenspeak.sh \"put refs/angry.wav\" < me_angry.wav\n\nsteps:\n  - mode: voice-clone\n    ref_audio: refs/happy.wav\n    ref_text: \"transcript of happy ref\"\n    generate:\n      - text: \"Great news everyone!\"\n        output: happy1.wav\n\n  - mode: voice-clone\n    ref_audio: refs/angry.wav\n    ref_text: \"transcript of angry ref\"\n    generate:\n      - text: \"This is unacceptable\"\n        output: angry1.wav"
      },
      {
        "title": "Job Management",
        "body": "scripts/qwenspeak.sh \"tts list-jobs\"              # list all\nscripts/qwenspeak.sh \"tts list-jobs --json\"        # JSON output\nscripts/qwenspeak.sh \"tts get-job <id>\"            # job details\nscripts/qwenspeak.sh \"tts get-job-log <id>\"        # view log\nscripts/qwenspeak.sh \"tts get-job-log <id> -f\"     # follow log\nscripts/qwenspeak.sh \"tts cancel-job <id>\"         # cancel\n\nStatuses: queued → running → completed | failed | cancelled\n\nCompleted jobs auto-cleaned after 1 day, all jobs after 1 week. UUID prefixes work (e.g. first 8 chars)."
      },
      {
        "title": "File Operations",
        "body": "All paths relative to the work directory. Traversal blocked.\n\nCommandDescriptionput <path>Upload file from stdinget <path>Download file to stdoutlist-files [--json]List directoryremove-file <path>Delete a filecreate-dir <path>Create directoryremove-dir <path>Remove empty directorymove-file <src> <dst>Move or renamecopy-file <src> <dst>Copy a filefile-exists <path>Check if file exists (true/false)search-files <glob>Glob search (** recursive)"
      },
      {
        "title": "Speakers",
        "body": "SpeakerGenderLanguageDescriptionVivianFemaleChineseBright, slightly edgy young voiceSerenaFemaleChineseWarm, gentle young voiceUncle_FuMaleChineseSeasoned, low mellow timbreDylanMaleChineseYouthful Beijing dialect, clear natural timbreEricMaleChineseLively Chengdu/Sichuan dialect, slightly huskyRyanMaleEnglishDynamic with strong rhythmic driveAidenMaleEnglishSunny American, clear midrangeOno_AnnaFemaleJapanesePlayful, light nimble timbreSoheeFemaleKoreanWarm with rich emotion"
      },
      {
        "title": "YAML Options",
        "body": "All settings cascade: global > step > generation.\n\nFieldDefaultDescriptiondtypefloat32float32, float16, bfloat16 (float16/bfloat16 GPU only)flash_attnautoFlashAttention-2: auto-detects, auto-switches float32→bfloat16temperature0.9Sampling temperaturetop_k50Top-k samplingtop_p1.0Top-p / nucleus samplingrepetition_penalty1.05Repetition penaltymax_new_tokens2048Max codec tokens to generateno_samplefalseGreedy decodingstreamingfalseStreaming mode (lower latency)moderequiredStep only: custom-voice, voice-design, or voice-clonemodel_size1.7bStep only: 1.7b or 0.6btextrequiredText to synthesizeoutputrequiredOutput file pathspeakerViviancustom-voice: speaker namelanguageAutoLanguage for synthesisinstruct-custom-voice: emotion/style; voice-design: voice descriptionref_audio-voice-clone: reference audio file pathref_text-voice-clone: transcript of reference audiox_vector_onlyfalsevoice-clone: use speaker embedding only"
      }
    ],
    "body": "qwenspeak\n\nYAML-driven text-to-speech over SSH using Qwen3-TTS models.\n\nFor installation and deployment, see references/setup.md.\n\nSSH Wrapper\n\nUse scripts/qwenspeak.sh for all commands. It handles host, port, and host key acceptance via QWENSPEAK_HOST and QWENSPEAK_PORT env vars.\n\nscripts/qwenspeak.sh <command> [args]\nscripts/qwenspeak.sh <command> < input_file\nscripts/qwenspeak.sh <command> > output_file\n\nTTS Generation\n\nSubmit YAML, get a job UUID back immediately, poll for progress. Jobs run sequentially — one at a time, the rest queue up.\n\n# Get the YAML template\nscripts/qwenspeak.sh \"tts print-yaml\" > job.yaml\n\n# Submit job\nscripts/qwenspeak.sh \"tts\" < job.yaml\n# {\"id\": \"550e8400-...\", \"status\": \"queued\", \"total_steps\": 3, \"total_generations\": 7}\n\n# Check progress\nscripts/qwenspeak.sh \"tts get-job 550e8400\"\n\n# Follow job log\nscripts/qwenspeak.sh \"tts get-job-log 550e8400 -f\"\n\n# Download result\nscripts/qwenspeak.sh \"get hello.wav\" > hello.wav\n\nYAML Structure\n\nGlobal settings + list of steps. Each step loads a model, runs all its generations, then unloads. Settings cascade: global > step > generation.\n\nsteps:\n  - mode: custom-voice\n    model_size: 1.7b\n    speaker: Ryan\n    language: English\n    generate:\n      - text: \"Hello world\"\n        output: hello.wav\n      - text: \"I cannot believe this!\"\n        speaker: Vivian\n        instruct: \"Speak angrily\"\n        output: angry.wav\n\n  - mode: voice-design\n    generate:\n      - text: \"Welcome to our store.\"\n        instruct: \"A warm, friendly young female voice with a cheerful tone\"\n        output: welcome.wav\n\n  - mode: voice-clone\n    model_size: 1.7b\n    ref_audio: ref.wav\n    ref_text: \"Transcript of reference\"\n    generate:\n      - text: \"First line in cloned voice\"\n        output: clone1.wav\n      - text: \"Second line\"\n        output: clone2.wav\n\nModes\n\ncustom-voice — Pick from 9 preset speakers. 1.7B supports emotion/style via instruct.\n\nvoice-design — Describe the voice in natural language via instruct. 1.7B only.\n\nvoice-clone — Clone from reference audio. Set ref_audio and ref_text at step level to reuse across generations. x_vector_only: true skips transcript.\n\nEmotion trick for cloned voices\n\nUpload references with different emotions, use separate steps:\n\nscripts/qwenspeak.sh \"create-dir refs\"\nscripts/qwenspeak.sh \"put refs/happy.wav\" < me_happy.wav\nscripts/qwenspeak.sh \"put refs/angry.wav\" < me_angry.wav\n\nsteps:\n  - mode: voice-clone\n    ref_audio: refs/happy.wav\n    ref_text: \"transcript of happy ref\"\n    generate:\n      - text: \"Great news everyone!\"\n        output: happy1.wav\n\n  - mode: voice-clone\n    ref_audio: refs/angry.wav\n    ref_text: \"transcript of angry ref\"\n    generate:\n      - text: \"This is unacceptable\"\n        output: angry1.wav\n\nJob Management\nscripts/qwenspeak.sh \"tts list-jobs\"              # list all\nscripts/qwenspeak.sh \"tts list-jobs --json\"        # JSON output\nscripts/qwenspeak.sh \"tts get-job <id>\"            # job details\nscripts/qwenspeak.sh \"tts get-job-log <id>\"        # view log\nscripts/qwenspeak.sh \"tts get-job-log <id> -f\"     # follow log\nscripts/qwenspeak.sh \"tts cancel-job <id>\"         # cancel\n\n\nStatuses: queued → running → completed | failed | cancelled\n\nCompleted jobs auto-cleaned after 1 day, all jobs after 1 week. UUID prefixes work (e.g. first 8 chars).\n\nFile Operations\n\nAll paths relative to the work directory. Traversal blocked.\n\nCommand\tDescription\nput <path>\tUpload file from stdin\nget <path>\tDownload file to stdout\nlist-files [--json]\tList directory\nremove-file <path>\tDelete a file\ncreate-dir <path>\tCreate directory\nremove-dir <path>\tRemove empty directory\nmove-file <src> <dst>\tMove or rename\ncopy-file <src> <dst>\tCopy a file\nfile-exists <path>\tCheck if file exists (true/false)\nsearch-files <glob>\tGlob search (** recursive)\nSpeakers\nSpeaker\tGender\tLanguage\tDescription\nVivian\tFemale\tChinese\tBright, slightly edgy young voice\nSerena\tFemale\tChinese\tWarm, gentle young voice\nUncle_Fu\tMale\tChinese\tSeasoned, low mellow timbre\nDylan\tMale\tChinese\tYouthful Beijing dialect, clear natural timbre\nEric\tMale\tChinese\tLively Chengdu/Sichuan dialect, slightly husky\nRyan\tMale\tEnglish\tDynamic with strong rhythmic drive\nAiden\tMale\tEnglish\tSunny American, clear midrange\nOno_Anna\tFemale\tJapanese\tPlayful, light nimble timbre\nSohee\tFemale\tKorean\tWarm with rich emotion\nYAML Options\n\nAll settings cascade: global > step > generation.\n\nField\tDefault\tDescription\ndtype\tfloat32\tfloat32, float16, bfloat16 (float16/bfloat16 GPU only)\nflash_attn\tauto\tFlashAttention-2: auto-detects, auto-switches float32→bfloat16\ntemperature\t0.9\tSampling temperature\ntop_k\t50\tTop-k sampling\ntop_p\t1.0\tTop-p / nucleus sampling\nrepetition_penalty\t1.05\tRepetition penalty\nmax_new_tokens\t2048\tMax codec tokens to generate\nno_sample\tfalse\tGreedy decoding\nstreaming\tfalse\tStreaming mode (lower latency)\nmode\trequired\tStep only: custom-voice, voice-design, or voice-clone\nmodel_size\t1.7b\tStep only: 1.7b or 0.6b\ntext\trequired\tText to synthesize\noutput\trequired\tOutput file path\nspeaker\tVivian\tcustom-voice: speaker name\nlanguage\tAuto\tLanguage for synthesis\ninstruct\t-\tcustom-voice: emotion/style; voice-design: voice description\nref_audio\t-\tvoice-clone: reference audio file path\nref_text\t-\tvoice-clone: transcript of reference audio\nx_vector_only\tfalse\tvoice-clone: use speaker embedding only"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/psyb0t/qwenspeak",
    "publisherUrl": "https://clawhub.ai/psyb0t/qwenspeak",
    "owner": "psyb0t",
    "version": "1.5.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/qwenspeak",
    "downloadUrl": "https://openagent3.xyz/downloads/qwenspeak",
    "agentUrl": "https://openagent3.xyz/skills/qwenspeak/agent",
    "manifestUrl": "https://openagent3.xyz/skills/qwenspeak/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/qwenspeak/agent.md"
  }
}