{
  "schemaVersion": "1.0",
  "item": {
    "slug": "listenhub-official-skills",
    "name": "Listenhub",
    "source": "tencent",
    "type": "skill",
    "category": "内容创作",
    "sourceUrl": "https://clawhub.ai/0xFANGO/listenhub-official-skills",
    "canonicalUrl": "https://clawhub.ai/0xFANGO/listenhub-official-skills",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/listenhub-official-skills",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=listenhub-official-skills",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md",
      "scripts/check-status.sh",
      "scripts/create-explainer.sh",
      "scripts/create-podcast-audio.sh",
      "scripts/create-podcast-text.sh",
      "scripts/create-podcast.sh"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/listenhub-official-skills"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/listenhub-official-skills",
    "agentPageUrl": "https://openagent3.xyz/skills/listenhub-official-skills/agent",
    "manifestUrl": "https://openagent3.xyz/skills/listenhub-official-skills/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/listenhub-official-skills/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "⛔ Hard Constraints (Inviolable)",
        "body": "The scripts are the ONLY interface. Period.\n\n┌─────────────────────────────────────────────────────────┐\n│  AI Agent  ──▶  ./scripts/*.sh  ──▶  ListenHub API     │\n│                      ▲                                  │\n│                      │                                  │\n│            This is the ONLY path.                       │\n│            Direct API calls are FORBIDDEN.              │\n└─────────────────────────────────────────────────────────┘\n\nMUST:\n\nExecute functionality ONLY through provided scripts in **/skills/listenhub/scripts/\nPass user intent as script arguments exactly as documented\nTrust script outputs; do not second-guess internal logic\n\nMUST NOT:\n\nWrite curl commands to ListenHub/Marswave API directly\nConstruct JSON bodies for API calls manually\nGuess or fabricate speakerIds, endpoints, or API parameters\nAssume API structure based on patterns or web searches\nHallucinate features not exposed by existing scripts\n\nWhy: The API is proprietary. Endpoints, parameters, and speakerIds are NOT publicly documented. Web searches will NOT find this information. Any attempt to bypass scripts will produce incorrect, non-functional code."
      },
      {
        "title": "Script Location",
        "body": "Scripts are located at **/skills/listenhub/scripts/ relative to your working context.\n\nDifferent AI clients use different dot-directories:\n\nClaude Code: .claude/skills/listenhub/scripts/\nOther clients: may vary (.cursor/, .windsurf/, etc.)\n\nResolution: Use glob pattern **/skills/listenhub/scripts/*.sh to locate scripts reliably, or resolve from the SKILL.md file's own path."
      },
      {
        "title": "Private Data (Cannot Be Searched)",
        "body": "The following are internal implementation details that AI cannot reliably know:\n\nCategoryExamplesHow to ObtainAPI Base URLapi.marswave.ai/...✗ Cannot — internal to scriptsEndpointspodcast/episodes, etc.✗ Cannot — internal to scriptsSpeaker IDscozy-man-english, etc.✓ Call get-speakers.shRequest schemasJSON body structure✗ Cannot — internal to scriptsResponse formatsEpisode ID, status codes✓ Documented per script\n\nRule: If information is not in this SKILL.md or retrievable via a script (like get-speakers.sh), assume you don't know it."
      },
      {
        "title": "Design Philosophy",
        "body": "Hide complexity, reveal magic.\n\nUsers don't need to know: Episode IDs, API structure, polling mechanisms, credits, endpoint differences.\nUsers only need: Say idea → wait a moment → get the link."
      },
      {
        "title": "Security",
        "body": "User-provided content (text, URLs) is transmitted to the ListenHub API (api.marswave.ai) for processing. Do not pass sensitive or confidential information as input.\nThe --source-url parameter accepts external URLs whose content is fetched and processed by the backend. Only use trusted URLs.\nAPI keys are stored locally in environment variables and transmitted via HTTPS. Never log or display full API keys.\nVersion checks connect to raw.githubusercontent.com (read-only, no code execution). Set LISTENHUB_SKIP_VERSION_CHECK=1 to disable."
      },
      {
        "title": "ListenHub API Key",
        "body": "API key stored in $LISTENHUB_API_KEY. Check on first use:\n\nsource ~/.zshrc 2>/dev/null; [ -n \"$LISTENHUB_API_KEY\" ] && echo \"ready\" || echo \"need_setup\"\n\nIf setup needed, guide user:\n\nVisit https://listenhub.ai/settings/api-keys\nPaste key (only the lh_sk_... part)\nAuto-save to ~/.zshrc"
      },
      {
        "title": "Image Generation API Key",
        "body": "Image generation uses the same ListenHub API key stored in $LISTENHUB_API_KEY.\nImage generation output path defaults to the user downloads directory, stored in $LISTENHUB_OUTPUT_DIR.\n\nOn first image generation, the script auto-guides configuration:\n\nVisit https://listenhub.ai/settings/api-keys (requires subscription)\nPaste API key\nConfigure output path (default: ~/Downloads)\nAuto-save to shell rc file\n\nSecurity: Never expose full API keys in output."
      },
      {
        "title": "Mode Detection",
        "body": "Auto-detect mode from user input:\n\n→ Podcast (1-2 speakers)\nSupports single-speaker or dual-speaker podcasts. Debate mode requires 2 speakers.\nDefault mode: quick unless explicitly requested.\nIf speakers are not specified, call get-speakers.sh and select the first speakerId\nmatching the chosen language.\nIf reference materials are provided, pass them as --source-url or --source-text.\nWhen the user only provides a topic (e.g., \"I want a podcast about X\"), proceed with:\n\ndetect language from user input,\nset mode=quick,\nchoose one speaker via get-speakers.sh matching the language,\ncreate a single-speaker podcast without further clarification.\n\nKeywords: \"podcast\", \"chat about\", \"discuss\", \"debate\", \"dialogue\"\nUse case: Topic exploration, opinion exchange, deep analysis\n\nFeature: Two voices, interactive feel\n\n→ Explain (Explainer video)\n\nKeywords: \"explain\", \"introduce\", \"video\", \"explainer\", \"tutorial\"\nUse case: Product intro, concept explanation, tutorials\nFeature: Single narrator + AI-generated visuals, can export video\n\n→ TTS (Text-to-speech)\nTTS defaults to FlowSpeech direct for single-pass text or URL narration.\nScript arrays and multi-speaker dialogue belong to Speech as an advanced path, not the default TTS entry.\nText-to-speech input is limited to 10,000 characters; split or use a URL when longer.\n\nKeywords: \"read aloud\", \"convert to speech\", \"tts\", \"voice\"\nUse case: Article to audio, note review, document narration\nFeature: Fastest (1-2 min), pure audio"
      },
      {
        "title": "Ambiguous \"Convert to speech\" Guidance",
        "body": "When the request is ambiguous (e.g., \"convert to speech\", \"read aloud\"), apply:\n\nDefault to FlowSpeech and prioritize direct to avoid altering content.\nInput type: URL uses type=url, plain text uses type=text.\nSpeaker: if not specified, call get-speakers and pick the first speakerId matching language.\nSwitch to Speech only when multi-line scripts or multi-speaker dialogue is explicitly requested, and require scripts.\n\nExample guidance:\n\n“This request can use FlowSpeech with the default direct mode; switch to smart for grammar and punctuation fixes. For per-line speaker assignment, provide scripts and switch to Speech.”\n\n→ Image Generation\n\nKeywords: \"generate image\", \"draw\", \"create picture\", \"visualize\"\nUse case: Creative visualization, concept art, illustrations\nFeature: AI image generation via Labnana API, multiple resolutions and aspect ratios\n\nReference Images via Image Hosts\nWhen reference images are local files, upload to a known image host and use the direct image URL in --reference-images.\nRecommended hosts: imgbb.com, sm.ms, postimages.org, imgur.com.\nDirect image URLs should end with .jpg, .png, .webp, or .gif.\n\nDefault: If unclear, ask user which format they prefer.\n\nExplicit override: User can say \"make it a podcast\" / \"I want explainer video\" / \"just voice\" / \"generate image\" to override auto-detection."
      },
      {
        "title": "Step 1: Receive input + detect mode",
        "body": "→ Got it! Preparing...\n  Mode: Two-person podcast\n  Topic: Latest developments in Manus AI\n\nFor URLs, identify type:\n\nyoutu.be/XXX → convert to https://www.youtube.com/watch?v=XXX\nOther URLs → use directly"
      },
      {
        "title": "Step 2: Submit generation",
        "body": "→ Generation submitted\n\n  Estimated time:\n  • Podcast: 2-3 minutes\n  • Explain: 3-5 minutes\n  • TTS: 1-2 minutes\n\n  You can:\n  • Wait and ask \"done yet?\"\n  • Use check-status via scripts\n  • View outputs in product pages:\n    - Podcast: https://listenhub.ai/app/podcast\n    - Explain: https://listenhub.ai/app/explainer\n    - Text-to-Speech: https://listenhub.ai/app/text-to-speech\n  • Do other things, ask later\n\nInternally remember Episode ID for status queries."
      },
      {
        "title": "Step 3: Query status",
        "body": "When user says \"done yet?\" / \"ready?\" / \"check status\":\n\nSuccess: Show result + next options\nProcessing: \"Still generating, wait another minute?\"\nFailed: \"Generation failed, content might be unparseable. Try another?\""
      },
      {
        "title": "Step 4: Show results",
        "body": "Podcast result:\n\n✓ Podcast generated!\n\n  \"{title}\"\n\n  Episode: https://listenhub.ai/app/episode/{episodeId}\n\n  Duration: ~{duration} minutes\n\n  Download audio: provide audioUrl or audioStreamUrl on request\n\nOne-stage podcast creation generates an online task. When status is success,\nthe episode detail already includes scripts and audio URLs. Download uses the\nreturned audioUrl or audioStreamUrl without a second create call. Two-stage\ncreation is only for script review or manual edits before audio generation.\n\nExplain result:\n\n✓ Explainer video generated!\n\n  \"{title}\"\n\n  Watch: https://listenhub.ai/app/explainer\n\n  Duration: ~{duration} minutes\n\n  Need to download audio? Just say so.\n\nImage result:\n\n✓ Image generated!\n\n  ~/Downloads/labnana-{timestamp}.jpg\n\nImage results are file-only and not shown in the web UI.\n\nImportant: Prioritize web experience. Only provide download URLs when user explicitly requests."
      },
      {
        "title": "Script Reference",
        "body": "Scripts are shell-based. Locate via **/skills/listenhub/scripts/.\nDependency: jq is required for request construction.\nThe AI must ensure curl and jq are installed before invoking scripts.\n\n⚠️ Long-running Tasks: Generation may take 1-5 minutes. Use your CLI client's native background execution feature:\n\nClaude Code: set run_in_background: true in Bash tool\nOther CLIs: use built-in async/background job management if available\n\nInvocation pattern:\n\n$SCRIPTS/script-name.sh [args]\n\nWhere $SCRIPTS = resolved path to **/skills/listenhub/scripts/"
      },
      {
        "title": "Podcast (One-Stage)",
        "body": "Default path. Use unless script review or manual editing is required.\n\n$SCRIPTS/create-podcast.sh --query \"The future of AI development\" --language en --mode deep --speakers cozy-man-english\n$SCRIPTS/create-podcast.sh --query \"Analyze this article\" --language en --mode deep --speakers cozy-man-english --source-url \"https://example.com/article\"\n\nMultiple --source-url and --source-text arguments are supported to combine several references in one request."
      },
      {
        "title": "Podcast (Two-Stage: Text → Review → Audio)",
        "body": "Advanced path. Use only when script review or edits are explicitly requested.\n\nThe entire value of two-stage generation is human review between stages.\nSkipping review reduces it to one-stage with extra latency — never do this.\n\nStage 1: Generate text content.\n\n$SCRIPTS/create-podcast-text.sh --query \"AI history\" --language en --mode deep --speakers cozy-man-english,travel-girl-english\n\nReview Gate (mandatory): After text generation completes, the agent MUST:\n\nRun check-status.sh --wait to poll until completion. On exit code 2 (timeout or rate-limited), wait briefly and retry.\nSave two files from the response:\n\n~/Downloads/podcast-draft-<episode-id>.md — human-readable version assembled from the response fields (title, outline, sourceProcessResult.content, and the scripts array formatted as readable dialogue). This is for the user to review.\n~/Downloads/podcast-scripts-<episode-id>.json — the raw {\"scripts\": [...]} object extracted from the response, exactly in the format that create-podcast-audio.sh --scripts expects. This is the machine-readable source of truth for Stage 2.\n\n\nInform the user that both files have been saved, and offer to open the markdown draft for review (use the open command on macOS).\nSTOP and wait for explicit user approval before proceeding to Stage 2.\nOn user approval:\n\nNo changes: run create-podcast-audio.sh --episode <id> without --scripts (server uses original).\nWith edits: the user may edit the JSON file directly, or describe changes for the agent to apply. Pass the modified file via --scripts.\n\nThe agent MUST NOT proceed to Stage 2 automatically. This is a hard constraint, not a suggestion.\n\nStage 2: Generate audio from reviewed/approved text.\n\n# User approved without changes:\n$SCRIPTS/create-podcast-audio.sh --episode \"<episode-id>\"\n\n# User provided edits:\n$SCRIPTS/create-podcast-audio.sh --episode \"<episode-id>\" --scripts modified-scripts.json"
      },
      {
        "title": "Speech (Multi-Speaker)",
        "body": "$SCRIPTS/create-speech.sh --scripts scripts.json\necho '{\"scripts\":[{\"content\":\"Hello\",\"speakerId\":\"cozy-man-english\"}]}' | $SCRIPTS/create-speech.sh --scripts -\n\n# scripts.json format:\n# {\n#   \"scripts\": [\n#     {\"content\": \"Script content here\", \"speakerId\": \"speaker-id\"},\n#     ...\n#   ]\n# }"
      },
      {
        "title": "Get Available Speakers",
        "body": "$SCRIPTS/get-speakers.sh --language zh\n$SCRIPTS/get-speakers.sh --language en\n\nGuidance:\n\n若用户未指定音色，必须先调用 get-speakers.sh 获取可用列表。\n默认值兜底：取与 language 匹配的列表首个 speakerId 作为默认音色。\n\nResponse structure (for AI parsing):\n\n{\n  \"code\": 0,\n  \"data\": {\n    \"items\": [\n      {\n        \"name\": \"Yuanye\",\n        \"speakerId\": \"cozy-man-english\",\n        \"gender\": \"male\",\n        \"language\": \"zh\"\n      }\n    ]\n  }\n}\n\nUsage: When user requests specific voice characteristics (gender, style), call this script first to discover available speakerId values. NEVER hardcode or assume speakerIds."
      },
      {
        "title": "Explain",
        "body": "$SCRIPTS/create-explainer.sh --content \"Introduce ListenHub\" --language en --mode info --speakers cozy-man-english\n$SCRIPTS/generate-video.sh --episode \"<episode-id>\""
      },
      {
        "title": "TTS",
        "body": "$SCRIPTS/create-tts.sh --type text --content \"Welcome to ListenHub\" --language en --mode smart --speakers cozy-man-english"
      },
      {
        "title": "Image Generation",
        "body": "$SCRIPTS/generate-image.sh --prompt \"sunset over mountains\" --size 2K --ratio 16:9\n$SCRIPTS/generate-image.sh --prompt \"style reference\" --reference-images \"https://example.com/ref1.jpg,https://example.com/ref2.png\"\n\nSupported sizes: 1K | 2K | 4K (default: 2K).\nSupported aspect ratios: 16:9 | 1:1 | 9:16 | 2:3 | 3:2 | 3:4 | 4:3 | 21:9 (default: 16:9).\nReference images: comma-separated URLs, maximum 14."
      },
      {
        "title": "Check Status",
        "body": "# Single-shot query\n$SCRIPTS/check-status.sh --episode \"<episode-id>\" --type podcast\n\n# Wait mode (recommended for automated polling)\n$SCRIPTS/check-status.sh --episode \"<episode-id>\" --type podcast --wait\n$SCRIPTS/check-status.sh --episode \"<episode-id>\" --type flow-speech --wait --timeout 60\n$SCRIPTS/check-status.sh --episode \"<episode-id>\" --type explainer --wait --timeout 600\n\ntts is accepted as an alias for flow-speech.\n\n--wait mode handles polling internally with configurable limits.\nAgents SHOULD use --wait instead of manual polling loops. On exit code 2, wait briefly and retry the command.\n\nOptionDefaultDescription--waitoffEnable polling mode--max-polls30Maximum poll attempts--timeout300Maximum total wait (seconds)--interval10Base poll interval (seconds)\n\nExit codes: 0 = completed, 1 = failed, 2 = timeout or rate-limited (still pending, safe to retry after a short wait)."
      },
      {
        "title": "Language Adaptation",
        "body": "Automatic Language Detection: Adapt output language based on user input and context.\n\nDetection Rules:\n\nUser Input Language: If user writes in Chinese, respond in Chinese. If user writes in English, respond in English.\nContext Consistency: Maintain the same language throughout the interaction unless user explicitly switches.\nCLAUDE.md Override: If project-level CLAUDE.md specifies a default language, respect it unless user input indicates otherwise.\nMixed Input: If user mixes languages, prioritize the dominant language (>50% of content).\n\nApplication:\n\nStatus messages: \"→ Got it! Preparing...\" (English) vs \"→ 收到！准备中...\" (Chinese)\nError messages: Match user's language\nResult summaries: Match user's language\nScript outputs: Pass through as-is (scripts handle their own language)\n\nExample:\n\nUser (Chinese): \"生成一个关于 AI 的播客\"\nAI (Chinese): \"→ 收到！准备双人播客...\"\n\nUser (English): \"Make a podcast about AI\"\nAI (English): \"→ Got it! Preparing two-person podcast...\"\n\nPrinciple: Language is interface, not barrier. Adapt seamlessly to user's natural expression."
      },
      {
        "title": "Black Box Principle",
        "body": "You are a dispatcher, not an implementer.\n\nYour job is to:\n\nUnderstand user intent (what do they want to create?)\nSelect the correct script (which tool fits?)\nFormat arguments correctly (what parameters?)\nExecute and relay results (what happened?)\n\nYour job is NOT to:\n\nUnderstand or modify script internals\nConstruct API calls directly\nGuess parameters not documented here\nInvent features that scripts don't expose"
      },
      {
        "title": "Mode-Specific Behavior",
        "body": "ListenHub modes (passthrough):\n\nPodcast/Explain/TTS/Speech → pass user input directly\nServer has full AI capability to process content\nIf user needs specific speakers → call get-speakers.sh first to list options\n\nLabnana mode (passthrough by default):\n\nImage Generation → pass the user's prompt through as-is by default\nThe generation model handles prompt interpretation; client-side rewriting is not required"
      },
      {
        "title": "Prompt Handling (Image Generation)",
        "body": "Default behavior: transparent forwarding. Pass the user's prompt directly to the script without modification.\n\nWhen to offer optimization:\n\nThe user provides only a short topic or phrase (e.g., \"a cat\"), AND\nThe user has not explicitly stated they want verbatim generation\n\nIn this case, ask whether the user would like help enriching the prompt. Do not optimize without confirmation.\n\nWhen to never modify:\n\nThe user pastes a long, structured, or detailed prompt — treat them as experienced\nThe user explicitly says \"use this prompt exactly\" or similar\n\nIf the user agrees to optimization, the following techniques are available as reference:\n\nStyle: \"cyberpunk\" → add \"neon lights, futuristic, dystopian\"; \"ink painting\" → add \"Chinese ink painting, traditional art style\"\n\nScene: time of day, lighting conditions, weather\n\nQuality: \"highly detailed\", \"8K quality\", \"cinematic composition\"\n\nRules when optimizing:\n\nUse English keywords (models trained on English)\nShow the optimized prompt transparently before submitting\nKeep the user's core intent unchanged\nDo not over-stack terminology or add unwanted elements\n\n→ Generation submitted, about 2-3 minutes\n\nYou can:\n• Wait and ask \"done yet?\"\n• Check listenhub.ai/app/library\n</response>\n</example>\n\n→ Generation submitted, explainer videos take 3-5 minutes\n\nIncludes: Script + narration + AI visuals\n</response>\n</example>\n\n→ TTS submitted, about 1-2 minutes\n\nWait a moment, or ask \"done yet?\" to check\n</response>\n</example>\n\nPrompt: Cyberpunk city at night, neon lights reflecting on wet streets,\ntowering skyscrapers with holographic ads, flying vehicles,\ncinematic composition, highly detailed, 8K quality\n\nResolution: 2K (16:9)\n\n✓ Image generated!\n~/Downloads/labnana-20260121-143145.jpg\n</response>\n</example>\n\nPrompt: a futuristic car\nReference images: 1\nReference image URL: https://example.com/style-ref.jpg\nResolution: 2K (16:9)\n\n✓ Image generated!\n~/Downloads/labnana-20260122-154230.jpg\n</response>\n</example>\n\n\"AI Revolution: From GPT to AGI\"\n\nListen: https://listenhub.ai/app/podcast\n\nDuration: ~8 minutes\n\nNeed to download? Just say so.\n</response>\n</example>"
      }
    ],
    "body": "<purpose> **The Hook**: Paste content, get audio/video/image. That simple.\n\nFour modes, one entry point:\n\nPodcast — Two-person dialogue, ideal for deep discussions\nExplain — Single narrator + AI visuals, ideal for product intros\nTTS/Flow Speech — Pure voice reading, ideal for articles\nImage Generation — AI image creation, ideal for creative visualization\n\nUsers don't need to remember APIs, modes, or parameters. Just say what you want. </purpose>\n\n<instructions>\n⛔ Hard Constraints (Inviolable)\n\nThe scripts are the ONLY interface. Period.\n\n┌─────────────────────────────────────────────────────────┐\n│  AI Agent  ──▶  ./scripts/*.sh  ──▶  ListenHub API     │\n│                      ▲                                  │\n│                      │                                  │\n│            This is the ONLY path.                       │\n│            Direct API calls are FORBIDDEN.              │\n└─────────────────────────────────────────────────────────┘\n\n\nMUST:\n\nExecute functionality ONLY through provided scripts in **/skills/listenhub/scripts/\nPass user intent as script arguments exactly as documented\nTrust script outputs; do not second-guess internal logic\n\nMUST NOT:\n\nWrite curl commands to ListenHub/Marswave API directly\nConstruct JSON bodies for API calls manually\nGuess or fabricate speakerIds, endpoints, or API parameters\nAssume API structure based on patterns or web searches\nHallucinate features not exposed by existing scripts\n\nWhy: The API is proprietary. Endpoints, parameters, and speakerIds are NOT publicly documented. Web searches will NOT find this information. Any attempt to bypass scripts will produce incorrect, non-functional code.\n\nScript Location\n\nScripts are located at **/skills/listenhub/scripts/ relative to your working context.\n\nDifferent AI clients use different dot-directories:\n\nClaude Code: .claude/skills/listenhub/scripts/\nOther clients: may vary (.cursor/, .windsurf/, etc.)\n\nResolution: Use glob pattern **/skills/listenhub/scripts/*.sh to locate scripts reliably, or resolve from the SKILL.md file's own path.\n\nPrivate Data (Cannot Be Searched)\n\nThe following are internal implementation details that AI cannot reliably know:\n\nCategory\tExamples\tHow to Obtain\nAPI Base URL\tapi.marswave.ai/...\t✗ Cannot — internal to scripts\nEndpoints\tpodcast/episodes, etc.\t✗ Cannot — internal to scripts\nSpeaker IDs\tcozy-man-english, etc.\t✓ Call get-speakers.sh\nRequest schemas\tJSON body structure\t✗ Cannot — internal to scripts\nResponse formats\tEpisode ID, status codes\t✓ Documented per script\n\nRule: If information is not in this SKILL.md or retrievable via a script (like get-speakers.sh), assume you don't know it.\n\nDesign Philosophy\n\nHide complexity, reveal magic.\n\nUsers don't need to know: Episode IDs, API structure, polling mechanisms, credits, endpoint differences. Users only need: Say idea → wait a moment → get the link.\n\nSecurity\nUser-provided content (text, URLs) is transmitted to the ListenHub API (api.marswave.ai) for processing. Do not pass sensitive or confidential information as input.\nThe --source-url parameter accepts external URLs whose content is fetched and processed by the backend. Only use trusted URLs.\nAPI keys are stored locally in environment variables and transmitted via HTTPS. Never log or display full API keys.\nVersion checks connect to raw.githubusercontent.com (read-only, no code execution). Set LISTENHUB_SKIP_VERSION_CHECK=1 to disable.\nEnvironment\nListenHub API Key\n\nAPI key stored in $LISTENHUB_API_KEY. Check on first use:\n\nsource ~/.zshrc 2>/dev/null; [ -n \"$LISTENHUB_API_KEY\" ] && echo \"ready\" || echo \"need_setup\"\n\n\nIf setup needed, guide user:\n\nVisit https://listenhub.ai/settings/api-keys\nPaste key (only the lh_sk_... part)\nAuto-save to ~/.zshrc\nImage Generation API Key\n\nImage generation uses the same ListenHub API key stored in $LISTENHUB_API_KEY. Image generation output path defaults to the user downloads directory, stored in $LISTENHUB_OUTPUT_DIR.\n\nOn first image generation, the script auto-guides configuration:\n\nVisit https://listenhub.ai/settings/api-keys (requires subscription)\nPaste API key\nConfigure output path (default: ~/Downloads)\nAuto-save to shell rc file\n\nSecurity: Never expose full API keys in output.\n\nMode Detection\n\nAuto-detect mode from user input:\n\n→ Podcast (1-2 speakers) Supports single-speaker or dual-speaker podcasts. Debate mode requires 2 speakers. Default mode: quick unless explicitly requested. If speakers are not specified, call get-speakers.sh and select the first speakerId matching the chosen language. If reference materials are provided, pass them as --source-url or --source-text. When the user only provides a topic (e.g., \"I want a podcast about X\"), proceed with:\n\ndetect language from user input,\nset mode=quick,\nchoose one speaker via get-speakers.sh matching the language,\ncreate a single-speaker podcast without further clarification.\nKeywords: \"podcast\", \"chat about\", \"discuss\", \"debate\", \"dialogue\"\nUse case: Topic exploration, opinion exchange, deep analysis\nFeature: Two voices, interactive feel\n\n→ Explain (Explainer video)\n\nKeywords: \"explain\", \"introduce\", \"video\", \"explainer\", \"tutorial\"\nUse case: Product intro, concept explanation, tutorials\nFeature: Single narrator + AI-generated visuals, can export video\n\n→ TTS (Text-to-speech) TTS defaults to FlowSpeech direct for single-pass text or URL narration. Script arrays and multi-speaker dialogue belong to Speech as an advanced path, not the default TTS entry. Text-to-speech input is limited to 10,000 characters; split or use a URL when longer.\n\nKeywords: \"read aloud\", \"convert to speech\", \"tts\", \"voice\"\nUse case: Article to audio, note review, document narration\nFeature: Fastest (1-2 min), pure audio\nAmbiguous \"Convert to speech\" Guidance\n\nWhen the request is ambiguous (e.g., \"convert to speech\", \"read aloud\"), apply:\n\nDefault to FlowSpeech and prioritize direct to avoid altering content.\nInput type: URL uses type=url, plain text uses type=text.\nSpeaker: if not specified, call get-speakers and pick the first speakerId matching language.\nSwitch to Speech only when multi-line scripts or multi-speaker dialogue is explicitly requested, and require scripts.\n\nExample guidance:\n\n“This request can use FlowSpeech with the default direct mode; switch to smart for grammar and punctuation fixes. For per-line speaker assignment, provide scripts and switch to Speech.”\n\n→ Image Generation\n\nKeywords: \"generate image\", \"draw\", \"create picture\", \"visualize\"\nUse case: Creative visualization, concept art, illustrations\nFeature: AI image generation via Labnana API, multiple resolutions and aspect ratios\n\nReference Images via Image Hosts When reference images are local files, upload to a known image host and use the direct image URL in --reference-images. Recommended hosts: imgbb.com, sm.ms, postimages.org, imgur.com. Direct image URLs should end with .jpg, .png, .webp, or .gif.\n\nDefault: If unclear, ask user which format they prefer.\n\nExplicit override: User can say \"make it a podcast\" / \"I want explainer video\" / \"just voice\" / \"generate image\" to override auto-detection.\n\nInteraction Flow\nStep 1: Receive input + detect mode\n→ Got it! Preparing...\n  Mode: Two-person podcast\n  Topic: Latest developments in Manus AI\n\n\nFor URLs, identify type:\n\nyoutu.be/XXX → convert to https://www.youtube.com/watch?v=XXX\nOther URLs → use directly\nStep 2: Submit generation\n→ Generation submitted\n\n  Estimated time:\n  • Podcast: 2-3 minutes\n  • Explain: 3-5 minutes\n  • TTS: 1-2 minutes\n\n  You can:\n  • Wait and ask \"done yet?\"\n  • Use check-status via scripts\n  • View outputs in product pages:\n    - Podcast: https://listenhub.ai/app/podcast\n    - Explain: https://listenhub.ai/app/explainer\n    - Text-to-Speech: https://listenhub.ai/app/text-to-speech\n  • Do other things, ask later\n\n\nInternally remember Episode ID for status queries.\n\nStep 3: Query status\n\nWhen user says \"done yet?\" / \"ready?\" / \"check status\":\n\nSuccess: Show result + next options\nProcessing: \"Still generating, wait another minute?\"\nFailed: \"Generation failed, content might be unparseable. Try another?\"\nStep 4: Show results\n\nPodcast result:\n\n✓ Podcast generated!\n\n  \"{title}\"\n\n  Episode: https://listenhub.ai/app/episode/{episodeId}\n\n  Duration: ~{duration} minutes\n\n  Download audio: provide audioUrl or audioStreamUrl on request\n\n\nOne-stage podcast creation generates an online task. When status is success, the episode detail already includes scripts and audio URLs. Download uses the returned audioUrl or audioStreamUrl without a second create call. Two-stage creation is only for script review or manual edits before audio generation.\n\nExplain result:\n\n✓ Explainer video generated!\n\n  \"{title}\"\n\n  Watch: https://listenhub.ai/app/explainer\n\n  Duration: ~{duration} minutes\n\n  Need to download audio? Just say so.\n\n\nImage result:\n\n✓ Image generated!\n\n  ~/Downloads/labnana-{timestamp}.jpg\n\n\nImage results are file-only and not shown in the web UI.\n\nImportant: Prioritize web experience. Only provide download URLs when user explicitly requests.\n\nScript Reference\n\nScripts are shell-based. Locate via **/skills/listenhub/scripts/. Dependency: jq is required for request construction. The AI must ensure curl and jq are installed before invoking scripts.\n\n⚠️ Long-running Tasks: Generation may take 1-5 minutes. Use your CLI client's native background execution feature:\n\nClaude Code: set run_in_background: true in Bash tool\nOther CLIs: use built-in async/background job management if available\n\nInvocation pattern:\n\n$SCRIPTS/script-name.sh [args]\n\n\nWhere $SCRIPTS = resolved path to **/skills/listenhub/scripts/\n\nPodcast (One-Stage)\n\nDefault path. Use unless script review or manual editing is required.\n\n$SCRIPTS/create-podcast.sh --query \"The future of AI development\" --language en --mode deep --speakers cozy-man-english\n$SCRIPTS/create-podcast.sh --query \"Analyze this article\" --language en --mode deep --speakers cozy-man-english --source-url \"https://example.com/article\"\n\n\nMultiple --source-url and --source-text arguments are supported to combine several references in one request.\n\nPodcast (Two-Stage: Text → Review → Audio)\n\nAdvanced path. Use only when script review or edits are explicitly requested.\n\nThe entire value of two-stage generation is human review between stages. Skipping review reduces it to one-stage with extra latency — never do this.\n\nStage 1: Generate text content.\n\n$SCRIPTS/create-podcast-text.sh --query \"AI history\" --language en --mode deep --speakers cozy-man-english,travel-girl-english\n\n\nReview Gate (mandatory): After text generation completes, the agent MUST:\n\nRun check-status.sh --wait to poll until completion. On exit code 2 (timeout or rate-limited), wait briefly and retry.\nSave two files from the response:\n~/Downloads/podcast-draft-<episode-id>.md — human-readable version assembled from the response fields (title, outline, sourceProcessResult.content, and the scripts array formatted as readable dialogue). This is for the user to review.\n~/Downloads/podcast-scripts-<episode-id>.json — the raw {\"scripts\": [...]} object extracted from the response, exactly in the format that create-podcast-audio.sh --scripts expects. This is the machine-readable source of truth for Stage 2.\nInform the user that both files have been saved, and offer to open the markdown draft for review (use the open command on macOS).\nSTOP and wait for explicit user approval before proceeding to Stage 2.\nOn user approval:\nNo changes: run create-podcast-audio.sh --episode <id> without --scripts (server uses original).\nWith edits: the user may edit the JSON file directly, or describe changes for the agent to apply. Pass the modified file via --scripts.\n\nThe agent MUST NOT proceed to Stage 2 automatically. This is a hard constraint, not a suggestion.\n\nStage 2: Generate audio from reviewed/approved text.\n\n# User approved without changes:\n$SCRIPTS/create-podcast-audio.sh --episode \"<episode-id>\"\n\n# User provided edits:\n$SCRIPTS/create-podcast-audio.sh --episode \"<episode-id>\" --scripts modified-scripts.json\n\nSpeech (Multi-Speaker)\n$SCRIPTS/create-speech.sh --scripts scripts.json\necho '{\"scripts\":[{\"content\":\"Hello\",\"speakerId\":\"cozy-man-english\"}]}' | $SCRIPTS/create-speech.sh --scripts -\n\n# scripts.json format:\n# {\n#   \"scripts\": [\n#     {\"content\": \"Script content here\", \"speakerId\": \"speaker-id\"},\n#     ...\n#   ]\n# }\n\nGet Available Speakers\n$SCRIPTS/get-speakers.sh --language zh\n$SCRIPTS/get-speakers.sh --language en\n\n\nGuidance:\n\n若用户未指定音色，必须先调用 get-speakers.sh 获取可用列表。\n默认值兜底：取与 language 匹配的列表首个 speakerId 作为默认音色。\n\nResponse structure (for AI parsing):\n\n{\n  \"code\": 0,\n  \"data\": {\n    \"items\": [\n      {\n        \"name\": \"Yuanye\",\n        \"speakerId\": \"cozy-man-english\",\n        \"gender\": \"male\",\n        \"language\": \"zh\"\n      }\n    ]\n  }\n}\n\n\nUsage: When user requests specific voice characteristics (gender, style), call this script first to discover available speakerId values. NEVER hardcode or assume speakerIds.\n\nExplain\n$SCRIPTS/create-explainer.sh --content \"Introduce ListenHub\" --language en --mode info --speakers cozy-man-english\n$SCRIPTS/generate-video.sh --episode \"<episode-id>\"\n\nTTS\n$SCRIPTS/create-tts.sh --type text --content \"Welcome to ListenHub\" --language en --mode smart --speakers cozy-man-english\n\nImage Generation\n$SCRIPTS/generate-image.sh --prompt \"sunset over mountains\" --size 2K --ratio 16:9\n$SCRIPTS/generate-image.sh --prompt \"style reference\" --reference-images \"https://example.com/ref1.jpg,https://example.com/ref2.png\"\n\n\nSupported sizes: 1K | 2K | 4K (default: 2K). Supported aspect ratios: 16:9 | 1:1 | 9:16 | 2:3 | 3:2 | 3:4 | 4:3 | 21:9 (default: 16:9). Reference images: comma-separated URLs, maximum 14.\n\nCheck Status\n# Single-shot query\n$SCRIPTS/check-status.sh --episode \"<episode-id>\" --type podcast\n\n# Wait mode (recommended for automated polling)\n$SCRIPTS/check-status.sh --episode \"<episode-id>\" --type podcast --wait\n$SCRIPTS/check-status.sh --episode \"<episode-id>\" --type flow-speech --wait --timeout 60\n$SCRIPTS/check-status.sh --episode \"<episode-id>\" --type explainer --wait --timeout 600\n\n\ntts is accepted as an alias for flow-speech.\n\n--wait mode handles polling internally with configurable limits. Agents SHOULD use --wait instead of manual polling loops. On exit code 2, wait briefly and retry the command.\n\nOption\tDefault\tDescription\n--wait\toff\tEnable polling mode\n--max-polls\t30\tMaximum poll attempts\n--timeout\t300\tMaximum total wait (seconds)\n--interval\t10\tBase poll interval (seconds)\n\nExit codes: 0 = completed, 1 = failed, 2 = timeout or rate-limited (still pending, safe to retry after a short wait).\n\nLanguage Adaptation\n\nAutomatic Language Detection: Adapt output language based on user input and context.\n\nDetection Rules:\n\nUser Input Language: If user writes in Chinese, respond in Chinese. If user writes in English, respond in English.\nContext Consistency: Maintain the same language throughout the interaction unless user explicitly switches.\nCLAUDE.md Override: If project-level CLAUDE.md specifies a default language, respect it unless user input indicates otherwise.\nMixed Input: If user mixes languages, prioritize the dominant language (>50% of content).\n\nApplication:\n\nStatus messages: \"→ Got it! Preparing...\" (English) vs \"→ 收到！准备中...\" (Chinese)\nError messages: Match user's language\nResult summaries: Match user's language\nScript outputs: Pass through as-is (scripts handle their own language)\n\nExample:\n\nUser (Chinese): \"生成一个关于 AI 的播客\"\nAI (Chinese): \"→ 收到！准备双人播客...\"\n\nUser (English): \"Make a podcast about AI\"\nAI (English): \"→ Got it! Preparing two-person podcast...\"\n\n\nPrinciple: Language is interface, not barrier. Adapt seamlessly to user's natural expression.\n\nAI Responsibilities\nBlack Box Principle\n\nYou are a dispatcher, not an implementer.\n\nYour job is to:\n\nUnderstand user intent (what do they want to create?)\nSelect the correct script (which tool fits?)\nFormat arguments correctly (what parameters?)\nExecute and relay results (what happened?)\n\nYour job is NOT to:\n\nUnderstand or modify script internals\nConstruct API calls directly\nGuess parameters not documented here\nInvent features that scripts don't expose\nMode-Specific Behavior\n\nListenHub modes (passthrough):\n\nPodcast/Explain/TTS/Speech → pass user input directly\nServer has full AI capability to process content\nIf user needs specific speakers → call get-speakers.sh first to list options\n\nLabnana mode (passthrough by default):\n\nImage Generation → pass the user's prompt through as-is by default\nThe generation model handles prompt interpretation; client-side rewriting is not required\nPrompt Handling (Image Generation)\n\nDefault behavior: transparent forwarding. Pass the user's prompt directly to the script without modification.\n\nWhen to offer optimization:\n\nThe user provides only a short topic or phrase (e.g., \"a cat\"), AND\nThe user has not explicitly stated they want verbatim generation\n\nIn this case, ask whether the user would like help enriching the prompt. Do not optimize without confirmation.\n\nWhen to never modify:\n\nThe user pastes a long, structured, or detailed prompt — treat them as experienced\nThe user explicitly says \"use this prompt exactly\" or similar\n\nIf the user agrees to optimization, the following techniques are available as reference:\n\nStyle: \"cyberpunk\" → add \"neon lights, futuristic, dystopian\"; \"ink painting\" → add \"Chinese ink painting, traditional art style\"\n\nScene: time of day, lighting conditions, weather\n\nQuality: \"highly detailed\", \"8K quality\", \"cinematic composition\"\n\nRules when optimizing:\n\nUse English keywords (models trained on English)\nShow the optimized prompt transparently before submitting\nKeep the user's core intent unchanged\nDo not over-stack terminology or add unwanted elements\n</instructions> <examples> <example name=\"podcast-request\"> <user>Make a podcast about the latest AI developments</user> <response> → Got it! Preparing two-person podcast... Topic: Latest AI developments\n\n→ Generation submitted, about 2-3 minutes\n\nYou can: • Wait and ask \"done yet?\" • Check listenhub.ai/app/library </response> </example>\n\n<example name=\"explainer-request\"> <user>Create an explainer video introducing Claude Code</user> <response> → Got it! Preparing explainer video... Topic: Claude Code introduction\n\n→ Generation submitted, explainer videos take 3-5 minutes\n\nIncludes: Script + narration + AI visuals </response> </example>\n\n<example name=\"tts-request\"> <user>Convert this article to speech https://blog.example.com/article</user> <response> → Got it! Parsing article...\n\n→ TTS submitted, about 1-2 minutes\n\nWait a moment, or ask \"done yet?\" to check </response> </example>\n\n<example name=\"image-generation-short-prompt\"> <user>Generate an image: cyberpunk city at night</user> <response> → Short prompt detected. Would you like help enriching it with style/lighting/composition details, or use it as-is? </response> </example> <example name=\"image-generation-detailed-prompt\"> <user>Generate an image: \"Cyberpunk city at night, neon lights reflecting on wet streets, towering skyscrapers with holographic ads, flying vehicles, cinematic composition, highly detailed, 8K quality\"</user> <response> → Generating image...\n\nPrompt: Cyberpunk city at night, neon lights reflecting on wet streets, towering skyscrapers with holographic ads, flying vehicles, cinematic composition, highly detailed, 8K quality\n\nResolution: 2K (16:9)\n\n✓ Image generated! ~/Downloads/labnana-20260121-143145.jpg </response> </example>\n\n<example name=\"image-with-reference\"> <user>Generate an image in this style: https://example.com/style-ref.jpg, prompt: \"a futuristic car\"</user> <response> → Generating image with reference...\n\nPrompt: a futuristic car Reference images: 1 Reference image URL: https://example.com/style-ref.jpg Resolution: 2K (16:9)\n\n✓ Image generated! ~/Downloads/labnana-20260122-154230.jpg </response> </example>\n\n<example name=\"status-check\"> <user>Done yet?</user> <response> ✓ Podcast generated!\n\n\"AI Revolution: From GPT to AGI\"\n\nListen: https://listenhub.ai/app/podcast\n\nDuration: ~8 minutes\n\nNeed to download? Just say so. </response> </example>\n\n</examples>"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/0xFANGO/listenhub-official-skills",
    "publisherUrl": "https://clawhub.ai/0xFANGO/listenhub-official-skills",
    "owner": "0xFANGO",
    "version": "0.1.1",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/listenhub-official-skills",
    "downloadUrl": "https://openagent3.xyz/downloads/listenhub-official-skills",
    "agentUrl": "https://openagent3.xyz/skills/listenhub-official-skills/agent",
    "manifestUrl": "https://openagent3.xyz/skills/listenhub-official-skills/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/listenhub-official-skills/agent.md"
  }
}