{
  "schemaVersion": "1.0",
  "item": {
    "slug": "voice-note-to-midi",
    "name": "Voice Note To Midi",
    "source": "tencent",
    "type": "skill",
    "category": "AI 智能",
    "sourceUrl": "https://clawhub.ai/DanBennettUK/voice-note-to-midi",
    "canonicalUrl": "https://clawhub.ai/DanBennettUK/voice-note-to-midi",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/voice-note-to-midi",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=voice-note-to-midi",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "QUICKSTART.md",
      "README.md",
      "SKILL.md",
      "setup.sh"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/voice-note-to-midi"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/voice-note-to-midi",
    "agentPageUrl": "https://openagent3.xyz/skills/voice-note-to-midi/agent",
    "manifestUrl": "https://openagent3.xyz/skills/voice-note-to-midi/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/voice-note-to-midi/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "🎵 Voice Note to MIDI",
        "body": "Transform your voice memos, humming, and melodic recordings into clean, quantized MIDI files ready for your DAW."
      },
      {
        "title": "What It Does",
        "body": "This skill provides a complete audio-to-MIDI conversion pipeline that:\n\nStem Separation - Uses HPSS (Harmonic-Percussive Source Separation) to isolate melodic content from drums, noise, and background sounds\nML-Powered Pitch Detection - Leverages Spotify's Basic Pitch model for accurate fundamental frequency extraction\nKey Detection - Automatically detects the musical key of your recording using Krumhansl-Kessler key profiles\nIntelligent Quantization - Snaps notes to a configurable timing grid with optional key-aware pitch correction\nPost-Processing - Applies octave pruning, overlap-based harmonic removal, and legato note merging for clean output"
      },
      {
        "title": "Pipeline Architecture",
        "body": "Audio Input (WAV/M4A/MP3)\n    ↓\n┌─────────────────────────────────────┐\n│ Step 1: Stem Separation (HPSS)     │\n│ - Isolate harmonic content          │\n│ - Remove drums/percussion           │\n│ - Noise gating                      │\n└─────────────────────────────────────┘\n    ↓\n┌─────────────────────────────────────┐\n│ Step 2: Pitch Detection             │\n│ - Basic Pitch ML model (Spotify)    │\n│ - Polyphonic note detection         │\n│ - Onset/offset estimation           │\n└─────────────────────────────────────┘\n    ↓\n┌─────────────────────────────────────┐\n│ Step 3: Analysis                    │\n│ - Pitch class distribution          │\n│ - Key detection                     │\n│ - Dominant note identification      │\n└─────────────────────────────────────┘\n    ↓\n┌─────────────────────────────────────┐\n│ Step 4: Quantization & Cleanup      │\n│ - Timing grid snap                  │\n│ - Key-aware pitch correction        │\n│ - Octave pruning (harmonic removal) │\n│ - Overlap-based pruning             │\n│ - Note merging (legato)             │\n│ - Velocity normalization            │\n└─────────────────────────────────────┘\n    ↓\nMIDI Output (Standard MIDI File)"
      },
      {
        "title": "Prerequisites",
        "body": "Python 3.11+ (Python 3.14+ recommended)\nFFmpeg (for audio format support)\npip"
      },
      {
        "title": "Installation",
        "body": "Quick Install (Recommended):\n\ncd /path/to/voice-note-to-midi\n./setup.sh\n\nThis automated script will:\n\nCheck Python 3.11+ is installed\nCreate the ~/melody-pipeline directory\nSet up the virtual environment\nInstall all dependencies (basic-pitch, librosa, music21, etc.)\nDownload and configure the hum2midi script\nAdd melody-pipeline to your PATH\n\nManual Install:\n\nIf you prefer manual setup:\n\nmkdir -p ~/melody-pipeline\ncd ~/melody-pipeline\npython3 -m venv venv-bp\nsource venv-bp/bin/activate\npip install basic-pitch librosa soundfile mido music21\nchmod +x ~/melody-pipeline/hum2midi\n\nAdd to your PATH (optional):\n\necho 'export PATH=\"$HOME/melody-pipeline:$PATH\"' >> ~/.bashrc\nsource ~/.bashrc"
      },
      {
        "title": "Verify Installation",
        "body": "cd ~/melody-pipeline\n./hum2midi --help"
      },
      {
        "title": "Basic Usage",
        "body": "Convert a voice memo to MIDI:\n\n./hum2midi my_humming.wav\n\nThis creates my_humming.mid with 16th-note quantization."
      },
      {
        "title": "Specify Output File",
        "body": "./hum2midi input.wav output.mid"
      },
      {
        "title": "Command-Line Options",
        "body": "OptionDescriptionDefault--grid <value>Quantization grid: 1/4, 1/8, 1/16, 1/321/16--min-note <ms>Minimum note duration in milliseconds50--no-quantizeSkip quantization (output raw Basic Pitch MIDI)disabled--key-awareEnable key-aware pitch correctiondisabled--no-analysisSkip pitch analysis and key detectiondisabled"
      },
      {
        "title": "Usage Examples",
        "body": "Quantize to eighth notes\n\n./hum2midi melody.wav --grid 1/8\n\nKey-aware quantization (recommended for tonal music)\n\n./hum2midi song.wav --key-aware\n\nRequire longer minimum notes\n\n./hum2midi humming.wav --min-note 100\n\nSkip analysis for faster processing\n\n./hum2midi quick.wav --no-analysis\n\nCombine options\n\n./hum2midi recording.wav output.mid --grid 1/8 --key-aware --min-note 80"
      },
      {
        "title": "Processing MIDI Input",
        "body": "You can also process existing MIDI files through the quantization pipeline:\n\n./hum2midi input.mid output.mid --grid 1/16 --key-aware\n\nThis skips the audio processing steps and goes directly to analysis and quantization."
      },
      {
        "title": "Sample Output",
        "body": "═══════════════════════════════════════════════════════════════\n  hum2midi - Melody-to-MIDI Pipeline (Basic Pitch Edition)\n  [Key-Aware Mode Enabled]\n═══════════════════════════════════════════════════════════════\n\nInput:  my_humming.wav\nOutput: my_humming.mid\n\n→ Step 1: Stem Separation (HPSS)\n  Isolating melodic content...\n  Loaded: 5.23s @ 44100Hz\n  ✓ Melody stem extracted → 5.23s\n\n→ Step 2: Audio-to-MIDI Conversion (Basic Pitch)\n  Running Spotify's Basic Pitch ML model on melody stem...\n  ✓ Raw MIDI generated (Basic Pitch)\n\n→ Step 3: Pitch Analysis & Key Detection\n  Notes detected: 42 total, 7 unique\n  Note range: C3 - G4\n  Pitch classes: C3, E3, G3, A3, C4, D4, G4\n  Dominant note: G3 (23.8% of notes)\n  Detected key: G major\n\n→ Step 4: Quantization & Cleanup\n  Octave pruning: removed 3 harmonic notes above 67 (median+12)\n  Overlap pruning: removed 2 harmonic notes at overlapping positions\n  Note merging: merged 5 staccato chunks into legato notes (gap<=60 ticks)\n  Grid:   240 ticks (1/16)\n  Notes:  38 notes\n  Key:    G major\n  Key-aware: 2 notes corrected to scale\n  Tempo:  120 BPM\n  ✓ Quantized MIDI saved\n\n═══════════════════════════════════════════════════════════════\n  ✓ Done! Output: my_humming.mid\n═══════════════════════════════════════════════════════════════\n\n📊 ANALYSIS SUMMARY\n─────────────────────────────────────────────────────────────\n  Detected Notes: C3, E3, G3, A3, C4, D4, G4\n  Detected Key:   G major\n  Quantization:   Key-aware mode (notes snapped to scale)\n\nMIDI Info: 38 notes, 7 unique pitches, 120 BPM\nPitches: C3, E3, G3, A3, C4, D4, G4"
      },
      {
        "title": "Audio Quality Matters",
        "body": "Clear, loud melody produces the best results\nBackground noise can cause false note detection\nReverb and effects may confuse pitch detection\nClose-mic'd vocals work significantly better than room recordings"
      },
      {
        "title": "Musical Considerations",
        "body": "Monophonic sources work best (single melody line)\nPolyphonic audio (chords, multiple instruments) will produce messy results\nVibrato and pitch bends may be quantized to stepped pitches\nRapid note passages may be missed or merged"
      },
      {
        "title": "Technical Limitations",
        "body": "Tempo is fixed at 120 BPM in output (time positions are preserved, but tempo may need adjustment in your DAW)\nNote velocities are normalized but may need manual adjustment\nVery short notes (<50ms) may be filtered out by default\nExtreme pitch ranges may cause octave detection issues"
      },
      {
        "title": "Post-Processing Recommendations",
        "body": "After generating MIDI, you may want to:\n\nImport into your DAW and adjust tempo to match your original recording\nQuantize further if stricter timing is needed\nAdjust note velocities for dynamics\nApply swing/groove templates if the rigid grid sounds too mechanical\nEdit individual notes that were misdetected (common with fast runs)"
      },
      {
        "title": "Supported Audio Formats",
        "body": "Input formats supported via FFmpeg:\n\nWAV, AIFF, FLAC (uncompressed, best quality)\nMP3, M4A, AAC (compressed, acceptable)\nOGG, OPUS (open source formats)\nMost other formats FFmpeg supports"
      },
      {
        "title": "No notes detected",
        "body": "Check that input file isn't silent or corrupted\nTry increasing --min-note threshold\nVerify audio has clear melodic content (not just noise)"
      },
      {
        "title": "Too many notes / messy output",
        "body": "Enable octave pruning and overlap pruning (on by default)\nUse --key-aware to constrain to musical scale\nCheck for background noise in source audio"
      },
      {
        "title": "Wrong key detected",
        "body": "Key detection works best with at least 8-10 measures of music\nChromatic passages may confuse the detector\nManually review and adjust in your DAW if needed"
      },
      {
        "title": "Notes in wrong octave",
        "body": "Basic Pitch sometimes detects harmonics instead of fundamentals\nThe pipeline includes pruning, but some may slip through\nUse your DAW's transpose function for simple octave shifts"
      },
      {
        "title": "References",
        "body": "Basic Pitch - Spotify's polyphonic pitch detection model\nlibrosa HPSS - Harmonic-Percussive Source Separation\nKrumhansl-Kessler Key Profiles - Key detection algorithm"
      },
      {
        "title": "License",
        "body": "This skill integrates Basic Pitch by Spotify, which is licensed under Apache 2.0. The pipeline script and documentation are provided under MIT license."
      }
    ],
    "body": "🎵 Voice Note to MIDI\n\nTransform your voice memos, humming, and melodic recordings into clean, quantized MIDI files ready for your DAW.\n\nWhat It Does\n\nThis skill provides a complete audio-to-MIDI conversion pipeline that:\n\nStem Separation - Uses HPSS (Harmonic-Percussive Source Separation) to isolate melodic content from drums, noise, and background sounds\nML-Powered Pitch Detection - Leverages Spotify's Basic Pitch model for accurate fundamental frequency extraction\nKey Detection - Automatically detects the musical key of your recording using Krumhansl-Kessler key profiles\nIntelligent Quantization - Snaps notes to a configurable timing grid with optional key-aware pitch correction\nPost-Processing - Applies octave pruning, overlap-based harmonic removal, and legato note merging for clean output\nPipeline Architecture\nAudio Input (WAV/M4A/MP3)\n    ↓\n┌─────────────────────────────────────┐\n│ Step 1: Stem Separation (HPSS)     │\n│ - Isolate harmonic content          │\n│ - Remove drums/percussion           │\n│ - Noise gating                      │\n└─────────────────────────────────────┘\n    ↓\n┌─────────────────────────────────────┐\n│ Step 2: Pitch Detection             │\n│ - Basic Pitch ML model (Spotify)    │\n│ - Polyphonic note detection         │\n│ - Onset/offset estimation           │\n└─────────────────────────────────────┘\n    ↓\n┌─────────────────────────────────────┐\n│ Step 3: Analysis                    │\n│ - Pitch class distribution          │\n│ - Key detection                     │\n│ - Dominant note identification      │\n└─────────────────────────────────────┘\n    ↓\n┌─────────────────────────────────────┐\n│ Step 4: Quantization & Cleanup      │\n│ - Timing grid snap                  │\n│ - Key-aware pitch correction        │\n│ - Octave pruning (harmonic removal) │\n│ - Overlap-based pruning             │\n│ - Note merging (legato)             │\n│ - Velocity normalization            │\n└─────────────────────────────────────┘\n    ↓\nMIDI Output (Standard MIDI File)\n\nSetup\nPrerequisites\nPython 3.11+ (Python 3.14+ recommended)\nFFmpeg (for audio format support)\npip\nInstallation\n\nQuick Install (Recommended):\n\ncd /path/to/voice-note-to-midi\n./setup.sh\n\n\nThis automated script will:\n\nCheck Python 3.11+ is installed\nCreate the ~/melody-pipeline directory\nSet up the virtual environment\nInstall all dependencies (basic-pitch, librosa, music21, etc.)\nDownload and configure the hum2midi script\nAdd melody-pipeline to your PATH\n\nManual Install:\n\nIf you prefer manual setup:\n\nmkdir -p ~/melody-pipeline\ncd ~/melody-pipeline\npython3 -m venv venv-bp\nsource venv-bp/bin/activate\npip install basic-pitch librosa soundfile mido music21\nchmod +x ~/melody-pipeline/hum2midi\n\nAdd to your PATH (optional):\necho 'export PATH=\"$HOME/melody-pipeline:$PATH\"' >> ~/.bashrc\nsource ~/.bashrc\n\nVerify Installation\ncd ~/melody-pipeline\n./hum2midi --help\n\nUsage\nBasic Usage\n\nConvert a voice memo to MIDI:\n\n./hum2midi my_humming.wav\n\n\nThis creates my_humming.mid with 16th-note quantization.\n\nSpecify Output File\n./hum2midi input.wav output.mid\n\nCommand-Line Options\nOption\tDescription\tDefault\n--grid <value>\tQuantization grid: 1/4, 1/8, 1/16, 1/32\t1/16\n--min-note <ms>\tMinimum note duration in milliseconds\t50\n--no-quantize\tSkip quantization (output raw Basic Pitch MIDI)\tdisabled\n--key-aware\tEnable key-aware pitch correction\tdisabled\n--no-analysis\tSkip pitch analysis and key detection\tdisabled\nUsage Examples\nQuantize to eighth notes\n./hum2midi melody.wav --grid 1/8\n\nKey-aware quantization (recommended for tonal music)\n./hum2midi song.wav --key-aware\n\nRequire longer minimum notes\n./hum2midi humming.wav --min-note 100\n\nSkip analysis for faster processing\n./hum2midi quick.wav --no-analysis\n\nCombine options\n./hum2midi recording.wav output.mid --grid 1/8 --key-aware --min-note 80\n\nProcessing MIDI Input\n\nYou can also process existing MIDI files through the quantization pipeline:\n\n./hum2midi input.mid output.mid --grid 1/16 --key-aware\n\n\nThis skips the audio processing steps and goes directly to analysis and quantization.\n\nSample Output\n═══════════════════════════════════════════════════════════════\n  hum2midi - Melody-to-MIDI Pipeline (Basic Pitch Edition)\n  [Key-Aware Mode Enabled]\n═══════════════════════════════════════════════════════════════\n\nInput:  my_humming.wav\nOutput: my_humming.mid\n\n→ Step 1: Stem Separation (HPSS)\n  Isolating melodic content...\n  Loaded: 5.23s @ 44100Hz\n  ✓ Melody stem extracted → 5.23s\n\n→ Step 2: Audio-to-MIDI Conversion (Basic Pitch)\n  Running Spotify's Basic Pitch ML model on melody stem...\n  ✓ Raw MIDI generated (Basic Pitch)\n\n→ Step 3: Pitch Analysis & Key Detection\n  Notes detected: 42 total, 7 unique\n  Note range: C3 - G4\n  Pitch classes: C3, E3, G3, A3, C4, D4, G4\n  Dominant note: G3 (23.8% of notes)\n  Detected key: G major\n\n→ Step 4: Quantization & Cleanup\n  Octave pruning: removed 3 harmonic notes above 67 (median+12)\n  Overlap pruning: removed 2 harmonic notes at overlapping positions\n  Note merging: merged 5 staccato chunks into legato notes (gap<=60 ticks)\n  Grid:   240 ticks (1/16)\n  Notes:  38 notes\n  Key:    G major\n  Key-aware: 2 notes corrected to scale\n  Tempo:  120 BPM\n  ✓ Quantized MIDI saved\n\n═══════════════════════════════════════════════════════════════\n  ✓ Done! Output: my_humming.mid\n═══════════════════════════════════════════════════════════════\n\n📊 ANALYSIS SUMMARY\n─────────────────────────────────────────────────────────────\n  Detected Notes: C3, E3, G3, A3, C4, D4, G4\n  Detected Key:   G major\n  Quantization:   Key-aware mode (notes snapped to scale)\n\nMIDI Info: 38 notes, 7 unique pitches, 120 BPM\nPitches: C3, E3, G3, A3, C4, D4, G4\n\nNotes & Limitations\nAudio Quality Matters\nClear, loud melody produces the best results\nBackground noise can cause false note detection\nReverb and effects may confuse pitch detection\nClose-mic'd vocals work significantly better than room recordings\nMusical Considerations\nMonophonic sources work best (single melody line)\nPolyphonic audio (chords, multiple instruments) will produce messy results\nVibrato and pitch bends may be quantized to stepped pitches\nRapid note passages may be missed or merged\nTechnical Limitations\nTempo is fixed at 120 BPM in output (time positions are preserved, but tempo may need adjustment in your DAW)\nNote velocities are normalized but may need manual adjustment\nVery short notes (<50ms) may be filtered out by default\nExtreme pitch ranges may cause octave detection issues\nPost-Processing Recommendations\n\nAfter generating MIDI, you may want to:\n\nImport into your DAW and adjust tempo to match your original recording\nQuantize further if stricter timing is needed\nAdjust note velocities for dynamics\nApply swing/groove templates if the rigid grid sounds too mechanical\nEdit individual notes that were misdetected (common with fast runs)\nSupported Audio Formats\n\nInput formats supported via FFmpeg:\n\nWAV, AIFF, FLAC (uncompressed, best quality)\nMP3, M4A, AAC (compressed, acceptable)\nOGG, OPUS (open source formats)\nMost other formats FFmpeg supports\nTroubleshooting\nNo notes detected\nCheck that input file isn't silent or corrupted\nTry increasing --min-note threshold\nVerify audio has clear melodic content (not just noise)\nToo many notes / messy output\nEnable octave pruning and overlap pruning (on by default)\nUse --key-aware to constrain to musical scale\nCheck for background noise in source audio\nWrong key detected\nKey detection works best with at least 8-10 measures of music\nChromatic passages may confuse the detector\nManually review and adjust in your DAW if needed\nNotes in wrong octave\nBasic Pitch sometimes detects harmonics instead of fundamentals\nThe pipeline includes pruning, but some may slip through\nUse your DAW's transpose function for simple octave shifts\nReferences\nBasic Pitch - Spotify's polyphonic pitch detection model\nlibrosa HPSS - Harmonic-Percussive Source Separation\nKrumhansl-Kessler Key Profiles - Key detection algorithm\nLicense\n\nThis skill integrates Basic Pitch by Spotify, which is licensed under Apache 2.0. The pipeline script and documentation are provided under MIT license."
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/DanBennettUK/voice-note-to-midi",
    "publisherUrl": "https://clawhub.ai/DanBennettUK/voice-note-to-midi",
    "owner": "DanBennettUK",
    "version": "0.1.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/voice-note-to-midi",
    "downloadUrl": "https://openagent3.xyz/downloads/voice-note-to-midi",
    "agentUrl": "https://openagent3.xyz/skills/voice-note-to-midi/agent",
    "manifestUrl": "https://openagent3.xyz/skills/voice-note-to-midi/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/voice-note-to-midi/agent.md"
  }
}