{
  "schemaVersion": "1.0",
  "item": {
    "slug": "faster-whisper-gpu",
    "name": "Faster Whisper Gpu",
    "source": "tencent",
    "type": "skill",
    "category": "AI 智能",
    "sourceUrl": "https://clawhub.ai/FelipeOFF/faster-whisper-gpu",
    "canonicalUrl": "https://clawhub.ai/FelipeOFF/faster-whisper-gpu",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/faster-whisper-gpu",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=faster-whisper-gpu",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "EXAMPLES.md",
      "README.md",
      "SKILL.md",
      "requirements.txt",
      "transcribe.py"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/faster-whisper-gpu"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/faster-whisper-gpu",
    "agentPageUrl": "https://openagent3.xyz/skills/faster-whisper-gpu/agent",
    "manifestUrl": "https://openagent3.xyz/skills/faster-whisper-gpu/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/faster-whisper-gpu/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "🎙️ Faster Whisper GPU",
        "body": "High-performance local speech-to-text transcription using Faster Whisper with NVIDIA GPU acceleration."
      },
      {
        "title": "✨ Features",
        "body": "🚀 GPU Accelerated: Uses NVIDIA CUDA for blazing-fast transcription\n🔒 100% Local: No data leaves your machine. Complete privacy.\n💰 Free Forever: No API costs. Run unlimited transcriptions.\n🌍 Multilingual: Supports 99 languages with automatic detection\n📁 Multiple Formats: Input: MP3, WAV, FLAC, OGG, M4A. Output: TXT, SRT, JSON\n🎯 Multiple Models: From tiny (fast) to large-v3 (most accurate)\n🎬 Subtitle Generation: Create SRT files with word-level timestamps"
      },
      {
        "title": "Hardware",
        "body": "NVIDIA GPU with CUDA support (recommended: 4GB+ VRAM)\nOr CPU-only mode (slower but works on any machine)"
      },
      {
        "title": "Software",
        "body": "Python 3.8+\nNVIDIA drivers (for GPU support)\nCUDA Toolkit 11.8+ or 12.x"
      },
      {
        "title": "Installation",
        "body": "# Install dependencies\npip install faster-whisper torch\n\n# Verify GPU is available\npython -c \"import torch; print(f'CUDA available: {torch.cuda.is_available()}')\""
      },
      {
        "title": "Basic Usage",
        "body": "# Transcribe an audio file (auto-detects GPU)\npython transcribe.py audio.mp3\n\n# Specify language explicitly\npython transcribe.py audio.mp3 --language pt\n\n# Output as SRT subtitles\npython transcribe.py audio.mp3 --format srt --output subtitles.srt\n\n# Use larger model for better accuracy\npython transcribe.py audio.mp3 --model large-v3"
      },
      {
        "title": "Command Line Options",
        "body": "python transcribe.py <audio_file> [options]\n\nOptions:\n  --model {tiny,base,small,medium,large-v1,large-v2,large-v3}\n                        Model size to use (default: base)\n  --language LANG       Language code (e.g., 'pt', 'en', 'es'). Auto-detect if not specified.\n  --format {txt,srt,json,vtt}\n                        Output format (default: txt)\n  --output FILE         Output file path (default: stdout)\n  --device {cuda,cpu}   Device to use (default: cuda if available)\n  --compute_type {int8,int8_float16,int16,float16,float32}\n                        Computation precision (default: float16)\n  --task {transcribe,translate}\n                        Task: transcribe or translate to English (default: transcribe)\n  --vad_filter          Enable voice activity detection filter\n  --vad_parameters MIN_DURATION_ON,MIN_DURATION_OFF\n                        VAD parameters as comma-separated values\n  --condition_on_previous_text\n                        Condition on previous text (default: True)\n  --initial_prompt PROMPT\n                        Initial prompt to guide transcription\n  --word_timestamps     Include word-level timestamps (for SRT/JSON)\n  --hotwords WORDS      Comma-separated hotwords to boost recognition"
      },
      {
        "title": "Examples",
        "body": "Portuguese Transcription with SRT Output\n\npython transcribe.py meeting.mp3 --language pt --format srt --output meeting.srt\n\nEnglish Translation from Any Language\n\npython transcribe.py japanese_audio.mp3 --task translate --format txt\n\nHigh-Accuracy Mode with Large Model\n\npython transcribe.py podcast.mp3 --model large-v3 --vad_filter --word_timestamps\n\nCPU-Only Mode (no GPU)\n\npython transcribe.py audio.mp3 --device cpu --compute_type int8"
      },
      {
        "title": "🐍 Python API",
        "body": "from faster_whisper import WhisperModel\n\n# Load model\nmodel = WhisperModel(\"base\", device=\"cuda\", compute_type=\"float16\")\n\n# Transcribe\nsegments, info = model.transcribe(\"audio.mp3\", language=\"pt\")\n\nprint(f\"Detected language: {info.language} (probability: {info.language_probability:.2f})\")\n\nfor segment in segments:\n    print(f\"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\")"
      },
      {
        "title": "📊 Model Sizes & VRAM Requirements",
        "body": "ModelParametersVRAM RequiredRelative SpeedAccuracytiny39 M~1 GB~32xBasicbase74 M~1 GB~16xGoodsmall244 M~2 GB~6xBettermedium769 M~5 GB~2xGreatlarge-v31550 M~10 GB1xBest\n\nBenchmarks measured on NVIDIA RTX 4090"
      },
      {
        "title": "🔍 Supported Languages",
        "body": "Faster Whisper supports 99 languages including:\n\nPortuguese (pt)\nEnglish (en)\nSpanish (es)\nFrench (fr)\nGerman (de)\nItalian (it)\nJapanese (ja)\nChinese (zh)\nRussian (ru)\nAnd 90+ more..."
      },
      {
        "title": "CUDA Out of Memory",
        "body": "# Use smaller model\npython transcribe.py audio.mp3 --model tiny\n\n# Or use CPU\npython transcribe.py audio.mp3 --device cpu\n\n# Or reduce precision\npython transcribe.py audio.mp3 --compute_type int8"
      },
      {
        "title": "Model Download Issues",
        "body": "Models are automatically downloaded on first use to ~/.cache/huggingface/hub/.\nIf behind a proxy, set:\n\nexport HF_HOME=/path/to/custom/cache"
      },
      {
        "title": "Slow Transcription",
        "body": "Ensure GPU is being used: check nvidia-smi during transcription\nUse smaller model for faster results\nEnable VAD filter to skip silent parts"
      },
      {
        "title": "🤝 Contributing",
        "body": "Contributions are welcome! Please:\n\nFork the repository\nCreate a feature branch\nSubmit a pull request"
      },
      {
        "title": "📜 License",
        "body": "MIT License - See LICENSE for details.\n\nFaster Whisper is developed by SYSTRAN and based on OpenAI's Whisper."
      },
      {
        "title": "🙏 Acknowledgments",
        "body": "OpenAI Whisper - Original model\nFaster Whisper - Optimized implementation\nCTranslate2 - Fast inference engine\n\nMade with ❤️ for the OpenClaw community"
      }
    ],
    "body": "🎙️ Faster Whisper GPU\n\nHigh-performance local speech-to-text transcription using Faster Whisper with NVIDIA GPU acceleration.\n\n✨ Features\n🚀 GPU Accelerated: Uses NVIDIA CUDA for blazing-fast transcription\n🔒 100% Local: No data leaves your machine. Complete privacy.\n💰 Free Forever: No API costs. Run unlimited transcriptions.\n🌍 Multilingual: Supports 99 languages with automatic detection\n📁 Multiple Formats: Input: MP3, WAV, FLAC, OGG, M4A. Output: TXT, SRT, JSON\n🎯 Multiple Models: From tiny (fast) to large-v3 (most accurate)\n🎬 Subtitle Generation: Create SRT files with word-level timestamps\n📋 Requirements\nHardware\nNVIDIA GPU with CUDA support (recommended: 4GB+ VRAM)\nOr CPU-only mode (slower but works on any machine)\nSoftware\nPython 3.8+\nNVIDIA drivers (for GPU support)\nCUDA Toolkit 11.8+ or 12.x\n🚀 Quick Start\nInstallation\n# Install dependencies\npip install faster-whisper torch\n\n# Verify GPU is available\npython -c \"import torch; print(f'CUDA available: {torch.cuda.is_available()}')\"\n\nBasic Usage\n# Transcribe an audio file (auto-detects GPU)\npython transcribe.py audio.mp3\n\n# Specify language explicitly\npython transcribe.py audio.mp3 --language pt\n\n# Output as SRT subtitles\npython transcribe.py audio.mp3 --format srt --output subtitles.srt\n\n# Use larger model for better accuracy\npython transcribe.py audio.mp3 --model large-v3\n\n🔧 Advanced Usage\nCommand Line Options\npython transcribe.py <audio_file> [options]\n\nOptions:\n  --model {tiny,base,small,medium,large-v1,large-v2,large-v3}\n                        Model size to use (default: base)\n  --language LANG       Language code (e.g., 'pt', 'en', 'es'). Auto-detect if not specified.\n  --format {txt,srt,json,vtt}\n                        Output format (default: txt)\n  --output FILE         Output file path (default: stdout)\n  --device {cuda,cpu}   Device to use (default: cuda if available)\n  --compute_type {int8,int8_float16,int16,float16,float32}\n                        Computation precision (default: float16)\n  --task {transcribe,translate}\n                        Task: transcribe or translate to English (default: transcribe)\n  --vad_filter          Enable voice activity detection filter\n  --vad_parameters MIN_DURATION_ON,MIN_DURATION_OFF\n                        VAD parameters as comma-separated values\n  --condition_on_previous_text\n                        Condition on previous text (default: True)\n  --initial_prompt PROMPT\n                        Initial prompt to guide transcription\n  --word_timestamps     Include word-level timestamps (for SRT/JSON)\n  --hotwords WORDS      Comma-separated hotwords to boost recognition\n\nExamples\nPortuguese Transcription with SRT Output\npython transcribe.py meeting.mp3 --language pt --format srt --output meeting.srt\n\nEnglish Translation from Any Language\npython transcribe.py japanese_audio.mp3 --task translate --format txt\n\nHigh-Accuracy Mode with Large Model\npython transcribe.py podcast.mp3 --model large-v3 --vad_filter --word_timestamps\n\nCPU-Only Mode (no GPU)\npython transcribe.py audio.mp3 --device cpu --compute_type int8\n\n🐍 Python API\nfrom faster_whisper import WhisperModel\n\n# Load model\nmodel = WhisperModel(\"base\", device=\"cuda\", compute_type=\"float16\")\n\n# Transcribe\nsegments, info = model.transcribe(\"audio.mp3\", language=\"pt\")\n\nprint(f\"Detected language: {info.language} (probability: {info.language_probability:.2f})\")\n\nfor segment in segments:\n    print(f\"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\")\n\n📊 Model Sizes & VRAM Requirements\nModel\tParameters\tVRAM Required\tRelative Speed\tAccuracy\ntiny\t39 M\t~1 GB\t~32x\tBasic\nbase\t74 M\t~1 GB\t~16x\tGood\nsmall\t244 M\t~2 GB\t~6x\tBetter\nmedium\t769 M\t~5 GB\t~2x\tGreat\nlarge-v3\t1550 M\t~10 GB\t1x\tBest\n\nBenchmarks measured on NVIDIA RTX 4090\n\n🔍 Supported Languages\n\nFaster Whisper supports 99 languages including:\n\nPortuguese (pt)\nEnglish (en)\nSpanish (es)\nFrench (fr)\nGerman (de)\nItalian (it)\nJapanese (ja)\nChinese (zh)\nRussian (ru)\nAnd 90+ more...\n🛠️ Troubleshooting\nCUDA Out of Memory\n# Use smaller model\npython transcribe.py audio.mp3 --model tiny\n\n# Or use CPU\npython transcribe.py audio.mp3 --device cpu\n\n# Or reduce precision\npython transcribe.py audio.mp3 --compute_type int8\n\nModel Download Issues\n\nModels are automatically downloaded on first use to ~/.cache/huggingface/hub/. If behind a proxy, set:\n\nexport HF_HOME=/path/to/custom/cache\n\nSlow Transcription\nEnsure GPU is being used: check nvidia-smi during transcription\nUse smaller model for faster results\nEnable VAD filter to skip silent parts\n🤝 Contributing\n\nContributions are welcome! Please:\n\nFork the repository\nCreate a feature branch\nSubmit a pull request\n📜 License\n\nMIT License - See LICENSE for details.\n\nFaster Whisper is developed by SYSTRAN and based on OpenAI's Whisper.\n\n🙏 Acknowledgments\nOpenAI Whisper - Original model\nFaster Whisper - Optimized implementation\nCTranslate2 - Fast inference engine\n\nMade with ❤️ for the OpenClaw community"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/FelipeOFF/faster-whisper-gpu",
    "publisherUrl": "https://clawhub.ai/FelipeOFF/faster-whisper-gpu",
    "owner": "FelipeOFF",
    "version": "0.1.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/faster-whisper-gpu",
    "downloadUrl": "https://openagent3.xyz/downloads/faster-whisper-gpu",
    "agentUrl": "https://openagent3.xyz/skills/faster-whisper-gpu/agent",
    "manifestUrl": "https://openagent3.xyz/skills/faster-whisper-gpu/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/faster-whisper-gpu/agent.md"
  }
}