{
  "schemaVersion": "1.0",
  "item": {
    "slug": "youtube-video-analyzer",
    "name": "Youtube Video Analyzer",
    "source": "tencent",
    "type": "skill",
    "category": "数据分析",
    "sourceUrl": "https://clawhub.ai/sdrabent/youtube-video-analyzer",
    "canonicalUrl": "https://clawhub.ai/sdrabent/youtube-video-analyzer",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/youtube-video-analyzer",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=youtube-video-analyzer",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/youtube-video-analyzer"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/youtube-video-analyzer",
    "agentPageUrl": "https://openagent3.xyz/skills/youtube-video-analyzer/agent",
    "manifestUrl": "https://openagent3.xyz/skills/youtube-video-analyzer/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/youtube-video-analyzer/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "YouTube Video Analyzer — Multimodal",
        "body": "This skill performs deep analysis of YouTube videos through both information channels:\n\nAudio channel: Transcript with timestamps (what is SAID)\nVisual channel: Frame extraction + image analysis (what is SHOWN)\n\nMost YouTube skills only extract transcripts. This skill closes the gap by synchronizing visual frames with spoken content, enabling accurate step-by-step guides where \"click the blue button\" is matched with the actual screenshot showing which button."
      },
      {
        "title": "Workflow Overview",
        "body": "YouTube URL\n    |\n    +---> 1. Get metadata (title, duration, video ID)\n    |\n    +---> 2. Extract transcript (yt-dlp --dump-json + curl)\n    |         -> Timestamped segments\n    |\n    +---> 3. Extract frames (yt-dlp + ffmpeg)\n    |         -> Keyframes at strategic intervals\n    |\n    +---> 4. Synchronize frames <-> transcript\n    |         -> Match frames to spoken content by timestamp\n    |\n    +---> 5. Multimodal analysis\n              -> Read each frame image, combine with transcript\n              -> Generate structured output"
      },
      {
        "title": "Step 1: Setup Working Directory",
        "body": "VIDEO_URL=\"<YOUTUBE_URL>\"\nWORK_DIR=$(mktemp -d /tmp/yt-analysis-XXXXXX)\nmkdir -p \"$WORK_DIR/frames\""
      },
      {
        "title": "Step 2: Get Video Metadata",
        "body": "yt-dlp --print title --print duration --print id \"$VIDEO_URL\" 2>/dev/null\n\nThis returns three lines: title, duration in seconds, video ID. Store these for later use."
      },
      {
        "title": "Step 3: Extract Transcript",
        "body": "IMPORTANT: Direct subtitle download via --write-sub frequently hits YouTube rate limits (HTTP 429).\nUse the reliable two-step method below instead."
      },
      {
        "title": "Step 3a: Get subtitle URL from video JSON",
        "body": "yt-dlp --dump-json \"$VIDEO_URL\" 2>/dev/null | python3 -c \"\nimport json, sys\ndata = json.load(sys.stdin)\nauto = data.get('automatic_captions', {})\nsubs = data.get('subtitles', {})\n\n# Priority: manual subs > auto subs. Prefer user's language, fallback chain.\nfor source in [subs, auto]:\n    for lang in ['en', 'de', 'en-orig', 'fr', 'es']:\n        if lang in source:\n            for fmt in source[lang]:\n                if fmt.get('ext') == 'json3':\n                    print(fmt['url'])\n                    sys.exit(0)\n\n# Fallback: take first available auto-caption, get json3 URL\nfor lang in sorted(auto.keys()):\n    for fmt in auto[lang]:\n        if fmt.get('ext') == 'json3':\n            url = fmt['url']\n            # Remove translation param to get original language\n            import re\n            url = re.sub(r'&tlang=[^&]+', '', url)\n            print(url)\n            sys.exit(0)\n\nprint('NO_SUBS', file=sys.stderr)\nsys.exit(1)\n\" > \"$WORK_DIR/sub_url.txt\""
      },
      {
        "title": "Step 3b: Download and parse transcript",
        "body": "curl -s \"$(cat \"$WORK_DIR/sub_url.txt\")\" -o \"$WORK_DIR/transcript.json3\"\n\nVerify it is valid JSON (not an HTML error page):\n\nhead -c 20 \"$WORK_DIR/transcript.json3\"\n# Should start with { — if it starts with <html, retry after 10s sleep"
      },
      {
        "title": "Step 3c: Parse json3 into readable timestamped segments",
        "body": "python3 -c \"\nimport json\n\nwith open('$WORK_DIR/transcript.json3') as f:\n    data = json.load(f)\n\nfor event in data.get('events', []):\n    segs = event.get('segs', [])\n    if not segs:\n        continue\n    start_ms = event.get('tStartMs', 0)\n    duration_ms = event.get('dDurationMs', 0)\n    text = ''.join(s.get('utf8', '') for s in segs).strip()\n    if not text or text == '\\n':\n        continue\n    s = start_ms / 1000\n    e = (start_ms + duration_ms) / 1000\n    print(f'[{int(s//60):02d}:{int(s%60):02d} - {int(e//60):02d}:{int(e%60):02d}] {text}')\n\" > \"$WORK_DIR/transcript.txt\"\n\nRead $WORK_DIR/transcript.txt to get the full transcript with timestamps."
      },
      {
        "title": "Fallback: No transcript available",
        "body": "If no subtitles exist at all, inform the user and proceed with visual-only analysis."
      },
      {
        "title": "Step 4a: Download video (720p is sufficient for frame analysis)",
        "body": "yt-dlp -f \"bestvideo[height<=720]+bestaudio/best[height<=720]\" \\\n       -o \"$WORK_DIR/video.mp4\" \"$VIDEO_URL\""
      },
      {
        "title": "Step 4b: Get exact duration",
        "body": "DURATION=$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 \"$WORK_DIR/video.mp4\")"
      },
      {
        "title": "Step 4c: Extract frames using adaptive interval strategy",
        "body": "Choose interval based on video length:\n\nDurationIntervalApprox. FramesRationale< 5 min10s20-30Dense enough for detailed analysis5-20 min20s15-60Good balance of coverage vs. volume20-60 min30-45s30-120Focus on key moments> 60 min60s60-120+Ask user if they want to focus on specific sections\n\n# Example for a 5-20 minute video (interval=20):\nffmpeg -i \"$WORK_DIR/video.mp4\" -vf \"fps=1/20\" -q:v 3 \"$WORK_DIR/frames/frame_%04d.jpg\" 2>&1\n\nFor scene-change-detection (software HowTos, UI demos):\n\nffmpeg -i \"$WORK_DIR/video.mp4\" \\\n       -vf \"select='gt(scene,0.3)',showinfo\" \\\n       -vsync vfr -q:v 3 \"$WORK_DIR/frames/scene_%04d.jpg\" 2>&1"
      },
      {
        "title": "Step 4d: Calculate timestamps for each frame",
        "body": "For fixed-interval extraction: frame N has timestamp (N-1) * interval seconds.\n\nframe_0001.jpg -> 0:00\nframe_0002.jpg -> 0:20\nframe_0003.jpg -> 0:40\n..."
      },
      {
        "title": "Step 5: Synchronize Frames with Transcript",
        "body": "For each extracted frame:\n\nCalculate the frame's timestamp in seconds\nFind the transcript segment(s) covering that timestamp\nCreate a synchronized pair: {timestamp, transcript_text, frame_path}\n\nThis is done mentally or via a simple lookup — no external script needed."
      },
      {
        "title": "Step 6a: Read and analyze each frame",
        "body": "Use the Read tool (or view tool) to look at each frame image. For each frame, consider:\n\nUI elements: Buttons, menus, dialogs, settings panels visible\nText on screen: Code, labels, error messages, URLs, terminal output\nDiagrams/graphics: Charts, flow diagrams, architecture drawings\nPhysical actions: Hand positions, tool usage (for physical HowTos)\nChanges: What changed compared to the previous frame?"
      },
      {
        "title": "Step 6b: Synthesize both channels",
        "body": "For each key moment, combine audio and visual:\n\nSegment [TIMESTAMP]:\n  SAID: \"Click the blue button in the top right\"\n  SHOWN: Settings page screenshot, blue \"Save\" button highlighted\n         in top-right corner, cursor pointing at it\n  SYNTHESIS: -> On the Settings page, click the blue \"Save\" button\n               in the top-right corner"
      },
      {
        "title": "Step 6c: Identify visual-only information",
        "body": "Flag moments where the visual channel provides information NOT present in audio:\n\nSpecific button names, menu paths, exact UI locations\nCode that is shown but not read aloud\nError messages visible on screen\nBefore/after comparisons"
      },
      {
        "title": "Output Formats",
        "body": "Generate the appropriate format based on the user's request:"
      },
      {
        "title": "Format A: Step-by-Step Guide (most common)",
        "body": "# [Video Title] — Guide\n\n## Step 1: [Action] (00:15)\n[Description based on transcript + frame analysis]\n> Visual: [What the screen/image shows at this point]\n\n## Step 2: [Action] (00:42)\n[...]"
      },
      {
        "title": "Format B: Comprehensive Summary with Visual Anchors",
        "body": "# [Video Title] — Summary\n\n## Overview\n[2-3 sentence summary of the entire video]\n\n## Key Sections\n\n### [Section Name] (00:00 - 02:30)\n[Summary of this section]\n- Key visual: [Description of what's shown]\n- Key quote: \"[Important spoken content]\"\n\n### [Section Name] (02:30 - 05:00)\n[...]\n\n## Key Takeaways\n- [Takeaway 1]\n- [Takeaway 2]"
      },
      {
        "title": "Format C: Technical Detail Analysis",
        "body": "Separate analysis of both channels plus discrepancy detection:\n\n# [Video Title] — Technical Analysis\n\n## Audio Channel Analysis\n[What was said, key points, structure]\n\n## Visual Channel Analysis\n[What was shown, UI flows, code, diagrams]\n\n## Channel Synchronization\n[Where audio and visual complement each other]\n\n## Visual-Only Information\n[Important details only visible in frames, not mentioned in speech]"
      },
      {
        "title": "Error Handling & Edge Cases",
        "body": "ProblemSolutionHTTP 429 on subtitle downloadUse --dump-json method (Step 3a). If curl also gets blocked, wait 10-15 seconds and retry with different User-AgentNo subtitles available at allProceed with visual-only analysis, inform userOriginal audio language not in auto-captions listThe original language is the source — auto-captions are translations. Remove &tlang=XX from any auto-caption URL to get the originaltranscript.json3 contains HTML instead of JSONYouTube returned an error page. Wait 10s, retry with: curl -s --user-agent \"Mozilla/5.0 (Windows NT 10.0; Win64; x64)\" \"$URL\"Video > 60 minAsk user if they want to focus on specific time ranges or chaptersPoor video quality / blurry framesExtract more frames at tighter intervals to compensateVideo is age-restricted or privateInform user that the video cannot be accessed. Suggest using --cookies-from-browser if they have accessyt-dlp download failsTry alternative format: -f \"best[height<=720]\" without separate audio+video streams"
      },
      {
        "title": "Cleanup",
        "body": "After analysis is complete, remove temporary files:\n\nrm -rf \"$WORK_DIR\""
      },
      {
        "title": "Tips for Best Results",
        "body": "Software HowTos: Use scene-change detection — UI transitions create clear visual breaks\nPhysical HowTos: Use tighter frame intervals (10-15s) — movements are subtler\nRead the transcript first: Identify \"interesting timestamps\" before extracting frames. Look for phrases like \"as you can see here\", \"let me show you\", \"on the screen\" — these signal important visual moments\nContext-aware frame analysis: When analyzing a frame, always provide the transcript context. The speaker often explains what's about to be shown\nBatch frame reading: Read frames in batches of 8-10 to maintain context across sequential frames and detect visual changes\nAlways extract both channels in parallel: Start the video download while processing the transcript to save time"
      }
    ],
    "body": "YouTube Video Analyzer — Multimodal\n\nThis skill performs deep analysis of YouTube videos through both information channels:\n\nAudio channel: Transcript with timestamps (what is SAID)\nVisual channel: Frame extraction + image analysis (what is SHOWN)\n\nMost YouTube skills only extract transcripts. This skill closes the gap by synchronizing visual frames with spoken content, enabling accurate step-by-step guides where \"click the blue button\" is matched with the actual screenshot showing which button.\n\nWorkflow Overview\nYouTube URL\n    |\n    +---> 1. Get metadata (title, duration, video ID)\n    |\n    +---> 2. Extract transcript (yt-dlp --dump-json + curl)\n    |         -> Timestamped segments\n    |\n    +---> 3. Extract frames (yt-dlp + ffmpeg)\n    |         -> Keyframes at strategic intervals\n    |\n    +---> 4. Synchronize frames <-> transcript\n    |         -> Match frames to spoken content by timestamp\n    |\n    +---> 5. Multimodal analysis\n              -> Read each frame image, combine with transcript\n              -> Generate structured output\n\nStep 1: Setup Working Directory\nVIDEO_URL=\"<YOUTUBE_URL>\"\nWORK_DIR=$(mktemp -d /tmp/yt-analysis-XXXXXX)\nmkdir -p \"$WORK_DIR/frames\"\n\nStep 2: Get Video Metadata\nyt-dlp --print title --print duration --print id \"$VIDEO_URL\" 2>/dev/null\n\n\nThis returns three lines: title, duration in seconds, video ID. Store these for later use.\n\nStep 3: Extract Transcript\n\nIMPORTANT: Direct subtitle download via --write-sub frequently hits YouTube rate limits (HTTP 429). Use the reliable two-step method below instead.\n\nStep 3a: Get subtitle URL from video JSON\nyt-dlp --dump-json \"$VIDEO_URL\" 2>/dev/null | python3 -c \"\nimport json, sys\ndata = json.load(sys.stdin)\nauto = data.get('automatic_captions', {})\nsubs = data.get('subtitles', {})\n\n# Priority: manual subs > auto subs. Prefer user's language, fallback chain.\nfor source in [subs, auto]:\n    for lang in ['en', 'de', 'en-orig', 'fr', 'es']:\n        if lang in source:\n            for fmt in source[lang]:\n                if fmt.get('ext') == 'json3':\n                    print(fmt['url'])\n                    sys.exit(0)\n\n# Fallback: take first available auto-caption, get json3 URL\nfor lang in sorted(auto.keys()):\n    for fmt in auto[lang]:\n        if fmt.get('ext') == 'json3':\n            url = fmt['url']\n            # Remove translation param to get original language\n            import re\n            url = re.sub(r'&tlang=[^&]+', '', url)\n            print(url)\n            sys.exit(0)\n\nprint('NO_SUBS', file=sys.stderr)\nsys.exit(1)\n\" > \"$WORK_DIR/sub_url.txt\"\n\nStep 3b: Download and parse transcript\ncurl -s \"$(cat \"$WORK_DIR/sub_url.txt\")\" -o \"$WORK_DIR/transcript.json3\"\n\n\nVerify it is valid JSON (not an HTML error page):\n\nhead -c 20 \"$WORK_DIR/transcript.json3\"\n# Should start with { — if it starts with <html, retry after 10s sleep\n\nStep 3c: Parse json3 into readable timestamped segments\npython3 -c \"\nimport json\n\nwith open('$WORK_DIR/transcript.json3') as f:\n    data = json.load(f)\n\nfor event in data.get('events', []):\n    segs = event.get('segs', [])\n    if not segs:\n        continue\n    start_ms = event.get('tStartMs', 0)\n    duration_ms = event.get('dDurationMs', 0)\n    text = ''.join(s.get('utf8', '') for s in segs).strip()\n    if not text or text == '\\n':\n        continue\n    s = start_ms / 1000\n    e = (start_ms + duration_ms) / 1000\n    print(f'[{int(s//60):02d}:{int(s%60):02d} - {int(e//60):02d}:{int(e%60):02d}] {text}')\n\" > \"$WORK_DIR/transcript.txt\"\n\n\nRead $WORK_DIR/transcript.txt to get the full transcript with timestamps.\n\nFallback: No transcript available\n\nIf no subtitles exist at all, inform the user and proceed with visual-only analysis.\n\nStep 4: Download Video and Extract Frames\nStep 4a: Download video (720p is sufficient for frame analysis)\nyt-dlp -f \"bestvideo[height<=720]+bestaudio/best[height<=720]\" \\\n       -o \"$WORK_DIR/video.mp4\" \"$VIDEO_URL\"\n\nStep 4b: Get exact duration\nDURATION=$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 \"$WORK_DIR/video.mp4\")\n\nStep 4c: Extract frames using adaptive interval strategy\n\nChoose interval based on video length:\n\nDuration\tInterval\tApprox. Frames\tRationale\n< 5 min\t10s\t20-30\tDense enough for detailed analysis\n5-20 min\t20s\t15-60\tGood balance of coverage vs. volume\n20-60 min\t30-45s\t30-120\tFocus on key moments\n> 60 min\t60s\t60-120+\tAsk user if they want to focus on specific sections\n# Example for a 5-20 minute video (interval=20):\nffmpeg -i \"$WORK_DIR/video.mp4\" -vf \"fps=1/20\" -q:v 3 \"$WORK_DIR/frames/frame_%04d.jpg\" 2>&1\n\n\nFor scene-change-detection (software HowTos, UI demos):\n\nffmpeg -i \"$WORK_DIR/video.mp4\" \\\n       -vf \"select='gt(scene,0.3)',showinfo\" \\\n       -vsync vfr -q:v 3 \"$WORK_DIR/frames/scene_%04d.jpg\" 2>&1\n\nStep 4d: Calculate timestamps for each frame\n\nFor fixed-interval extraction: frame N has timestamp (N-1) * interval seconds.\n\nframe_0001.jpg -> 0:00\nframe_0002.jpg -> 0:20\nframe_0003.jpg -> 0:40\n...\n\nStep 5: Synchronize Frames with Transcript\n\nFor each extracted frame:\n\nCalculate the frame's timestamp in seconds\nFind the transcript segment(s) covering that timestamp\nCreate a synchronized pair: {timestamp, transcript_text, frame_path}\n\nThis is done mentally or via a simple lookup — no external script needed.\n\nStep 6: Multimodal Analysis\nStep 6a: Read and analyze each frame\n\nUse the Read tool (or view tool) to look at each frame image. For each frame, consider:\n\nUI elements: Buttons, menus, dialogs, settings panels visible\nText on screen: Code, labels, error messages, URLs, terminal output\nDiagrams/graphics: Charts, flow diagrams, architecture drawings\nPhysical actions: Hand positions, tool usage (for physical HowTos)\nChanges: What changed compared to the previous frame?\nStep 6b: Synthesize both channels\n\nFor each key moment, combine audio and visual:\n\nSegment [TIMESTAMP]:\n  SAID: \"Click the blue button in the top right\"\n  SHOWN: Settings page screenshot, blue \"Save\" button highlighted\n         in top-right corner, cursor pointing at it\n  SYNTHESIS: -> On the Settings page, click the blue \"Save\" button\n               in the top-right corner\n\nStep 6c: Identify visual-only information\n\nFlag moments where the visual channel provides information NOT present in audio:\n\nSpecific button names, menu paths, exact UI locations\nCode that is shown but not read aloud\nError messages visible on screen\nBefore/after comparisons\nOutput Formats\n\nGenerate the appropriate format based on the user's request:\n\nFormat A: Step-by-Step Guide (most common)\n# [Video Title] — Guide\n\n## Step 1: [Action] (00:15)\n[Description based on transcript + frame analysis]\n> Visual: [What the screen/image shows at this point]\n\n## Step 2: [Action] (00:42)\n[...]\n\nFormat B: Comprehensive Summary with Visual Anchors\n# [Video Title] — Summary\n\n## Overview\n[2-3 sentence summary of the entire video]\n\n## Key Sections\n\n### [Section Name] (00:00 - 02:30)\n[Summary of this section]\n- Key visual: [Description of what's shown]\n- Key quote: \"[Important spoken content]\"\n\n### [Section Name] (02:30 - 05:00)\n[...]\n\n## Key Takeaways\n- [Takeaway 1]\n- [Takeaway 2]\n\nFormat C: Technical Detail Analysis\n\nSeparate analysis of both channels plus discrepancy detection:\n\n# [Video Title] — Technical Analysis\n\n## Audio Channel Analysis\n[What was said, key points, structure]\n\n## Visual Channel Analysis\n[What was shown, UI flows, code, diagrams]\n\n## Channel Synchronization\n[Where audio and visual complement each other]\n\n## Visual-Only Information\n[Important details only visible in frames, not mentioned in speech]\n\nError Handling & Edge Cases\nProblem\tSolution\nHTTP 429 on subtitle download\tUse --dump-json method (Step 3a). If curl also gets blocked, wait 10-15 seconds and retry with different User-Agent\nNo subtitles available at all\tProceed with visual-only analysis, inform user\nOriginal audio language not in auto-captions list\tThe original language is the source — auto-captions are translations. Remove &tlang=XX from any auto-caption URL to get the original\ntranscript.json3 contains HTML instead of JSON\tYouTube returned an error page. Wait 10s, retry with: curl -s --user-agent \"Mozilla/5.0 (Windows NT 10.0; Win64; x64)\" \"$URL\"\nVideo > 60 min\tAsk user if they want to focus on specific time ranges or chapters\nPoor video quality / blurry frames\tExtract more frames at tighter intervals to compensate\nVideo is age-restricted or private\tInform user that the video cannot be accessed. Suggest using --cookies-from-browser if they have access\nyt-dlp download fails\tTry alternative format: -f \"best[height<=720]\" without separate audio+video streams\nCleanup\n\nAfter analysis is complete, remove temporary files:\n\nrm -rf \"$WORK_DIR\"\n\nTips for Best Results\nSoftware HowTos: Use scene-change detection — UI transitions create clear visual breaks\nPhysical HowTos: Use tighter frame intervals (10-15s) — movements are subtler\nRead the transcript first: Identify \"interesting timestamps\" before extracting frames. Look for phrases like \"as you can see here\", \"let me show you\", \"on the screen\" — these signal important visual moments\nContext-aware frame analysis: When analyzing a frame, always provide the transcript context. The speaker often explains what's about to be shown\nBatch frame reading: Read frames in batches of 8-10 to maintain context across sequential frames and detect visual changes\nAlways extract both channels in parallel: Start the video download while processing the transcript to save time"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/sdrabent/youtube-video-analyzer",
    "publisherUrl": "https://clawhub.ai/sdrabent/youtube-video-analyzer",
    "owner": "sdrabent",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/youtube-video-analyzer",
    "downloadUrl": "https://openagent3.xyz/downloads/youtube-video-analyzer",
    "agentUrl": "https://openagent3.xyz/skills/youtube-video-analyzer/agent",
    "manifestUrl": "https://openagent3.xyz/skills/youtube-video-analyzer/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/youtube-video-analyzer/agent.md"
  }
}