{
  "schemaVersion": "1.0",
  "item": {
    "slug": "cross-ref",
    "name": "cross-ref",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/Glucksberg/cross-ref",
    "canonicalUrl": "https://clawhub.ai/Glucksberg/cross-ref",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/cross-ref",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=cross-ref",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "README.md",
      "ROADMAP.md",
      "SKILL.md",
      "references/commenting-strategy.md",
      "references/principles.md",
      "scripts/fetch-data.sh"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-05-07T17:22:31.273Z",
      "expiresAt": "2026-05-14T17:22:31.273Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=afrexai-annual-report",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=afrexai-annual-report",
        "contentDisposition": "attachment; filename=\"afrexai-annual-report-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/cross-ref"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/cross-ref",
    "agentPageUrl": "https://openagent3.xyz/skills/cross-ref/agent",
    "manifestUrl": "https://openagent3.xyz/skills/cross-ref/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/cross-ref/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Cross-Ref: PR & Issue Linker",
        "body": "You find hidden connections between PRs and issues that humans miss at scale.\nThe core loop is: fetch → analyze in parallel → cluster → verify → report → act.\n\nBefore doing anything, read references/principles.md. Those rules override\neverything in this file when there's a conflict."
      },
      {
        "title": "Overview",
        "body": "Repos accumulate duplicate PRs and orphaned issue→PR links over time. Manual\ncross-referencing doesn't scale past a few dozen items. This skill uses parallel\nSonnet subagents to analyze up to 1000 PRs and 1000 issues simultaneously,\nfinding two kinds of links:\n\nDuplicate PRs — PRs that address the same bug or feature (even with\ndifferent approaches or wording)\nIssue→PR links — Open issues that already have a PR solving them but\nno explicit \"fixes #N\" reference\n\nResults are grouped into thematic clusters, scored by actionability,\nand presented with available actions (comment, close, label) — not just\nas a flat list of pairs."
      },
      {
        "title": "Configuration",
        "body": "The user provides these at invocation time (ask if not given):\n\nParameterDefaultDescriptionrepo(ask)GitHub owner/repo to analyzepr_count1000How many recent PRs to scanissue_count1000How many recent issues to scanpr_stateallPR state filter: open, closed, allissue_stateopenIssue state filter: open, closed, allbatch_size50PRs per subagent batchconfidence_thresholdmediumMinimum confidence to include in report: low, medium, highmodeplanplan = report only (default, always start here). execute = act on findings.\n\nDefault mode is plan (dry-run). The skill always starts by generating\nthe report. The user must explicitly choose to execute actions after reviewing\nthe findings. This matters because actions can't be undone."
      },
      {
        "title": "Phase 1: Data Collection",
        "body": "Fetch PR and issue metadata from the GitHub API. This phase is deterministic\nand uses the shell script — no AI needed.\n\nscripts/fetch-data.sh <owner/repo> <workspace_dir> [pr_count] [issue_count] [pr_state] [issue_state]\n\nThis produces:\n\nworkspace/prs.json — Full PR metadata\nworkspace/issues.json — Full issue metadata (PRs filtered out)\nworkspace/existing-refs.json — Pre-extracted explicit cross-references\nworkspace/pr-index.txt — Compact one-line-per-PR index\nworkspace/issue-index.txt — Compact one-line-per-issue index\n\nThe existing references map captures what's already linked (via \"fixes #N\",\n\"closes #N\", etc.) so subagents can focus on what's missing."
      },
      {
        "title": "Phase 2: Parallel Analysis (Sonnet Subagents)",
        "body": "This is where the intelligence happens. Split PRs into batches and spawn\nparallel Sonnet subagents. Each subagent receives:\n\nIts batch of PRs (full metadata from prs.json, ~50 PRs)\nThe complete issue index (compact, ~60KB)\nThe complete PR index (compact, ~60KB) — for duplicate detection\nThe existing references map (so it skips already-linked items)\n\nSpawn subagents using the Task tool:\n\nFor each batch B of {batch_size} PRs:\n  Task(\n    subagent_type=\"general-purpose\",\n    model=\"sonnet\",\n    prompt=<see below>\n  )\n\nSubagent prompt template:\n\nImportant: When building each subagent prompt, paste the FULL contents of\nreferences/principles.md into the \"Decision Principles\" section below.\nDo not summarize or condense — include the complete text. This ensures\nsubagents always use the latest principles without drift.\n\nYou are a cross-reference analyst for a GitHub repository. Your job is to find\nconnections between PRs and issues that aren't explicitly linked yet.\n\n## Decision Principles (these override everything else)\n\n{paste full contents of references/principles.md here}\n\n## Your Batch\nYou are analyzing PRs {start_num} through {end_num} of {total_prs}.\n\n## PR Details (your batch)\n{full PR metadata for this batch from prs.json}\n\n## Complete Issue Index\n{issue-index.txt content}\n\n## Complete PR Index\n{pr-index.txt content}\n\n## Already Known References\n{existing-refs.json content}\n\n## Your Task\n\nFind TWO types of connections:\n\n### 1. Issue→PR Links\nFor each PR in your batch, determine if it resolves any issue in the index.\nEvidence must include at least one of:\n- Same error message or failure path described in both\n- PR modifies the component/module that the issue describes as broken\n- PR body explicitly references the problem the issue describes (even without #N)\n\nTitle similarity alone is NOT sufficient. Skip any links that already exist\nin the known references.\n\n### 2. Duplicate PRs\nFor each PR in your batch, check if any OTHER PR in the full PR index\naddresses the same problem. Evidence must include at least one of:\n- Both modify the same files for the same reason\n- Both fix the same error/behavior (even with different approaches)\n- One is a resubmission or continuation of the other (same branch, similar body)\n\nSame area of code is NOT enough — the PRs must address the same specific problem.\n\n### 3. Flagging Uncertainty\n\nIf you encounter a pair where the evidence is ambiguous — you can see a\nplausible connection but can't confirm it from the available data — mark it\nwith `\"status\": \"manual_review_required\"` instead of guessing a confidence\nlevel. Include what's missing (e.g., \"need to see full diff to confirm\nfile overlap\").\n\n### Output Format\nReturn ONLY a JSON array. No other text.\n\n[\n  {\n    \"type\": \"issue_link\",\n    \"pr\": 5678,\n    \"pr_author\": \"@username\",\n    \"issue\": 1234,\n    \"confidence\": \"high|medium|low\",\n    \"status\": \"confirmed|manual_review_required\",\n    \"root_cause\": \"One sentence: what shared problem connects these\",\n    \"evidence\": \"Specific: same error message, same file, same component, etc.\",\n    \"missing_evidence\": null or \"What would be needed to confirm this\"\n  },\n  {\n    \"type\": \"duplicate_pr\",\n    \"pr_a\": 5678,\n    \"pr_b\": 5679,\n    \"pr_a_author\": \"@username_a\",\n    \"pr_b_author\": \"@username_b\",\n    \"confidence\": \"high|medium|low\",\n    \"status\": \"confirmed|manual_review_required\",\n    \"root_cause\": \"One sentence: what shared problem connects these\",\n    \"evidence\": \"Specific: same files modified, same branch, resubmission, etc.\",\n    \"missing_evidence\": null or \"What would be needed to confirm this\"\n  }\n]\n\nParallelism: Spawn ALL batch subagents simultaneously. With batch_size=50\nand 1000 PRs, that's 20 parallel subagents. This is the power of the skill —\nwhat would take hours sequentially completes in minutes."
      },
      {
        "title": "Phase 3: Merge, Deduplicate & Cluster",
        "body": "After all subagents return:\n\nCollect all JSON results into a single array\nDeduplicate duplicate_pr entries (A→B and B→A are the same link)\nMerge confidence — if two subagents found the same link, take the\nhigher confidence and merge both evidence strings\nFilter by confidence_threshold\nBuild clusters — group related findings into thematic clusters (see below)\nScore clusters by actionability (see below)\nSort clusters by score (highest first)\n\nSave to workspace/results-unverified.json.\n\nClustering Algorithm\n\nInstead of reporting isolated pairs, group connected findings into clusters.\nTwo findings belong to the same cluster if they share any PR or issue number.\n\nExample: If you find PR#100 ↔ PR#101 (duplicate) and PR#100 ↔ Issue#50\n(link), these form a single cluster: \"Cluster: Issue#50 + PR#100 + PR#101\".\n\nCluster structure:\n\n{\n  \"cluster_id\": 1,\n  \"theme\": \"Onboard token mismatch — OPENCLAW_GATEWAY_TOKEN ignored\",\n  \"items\": [\"PR#22662\", \"PR#22658\", \"Issue#22638\"],\n  \"findings\": [ ...individual findings in this cluster... ],\n  \"score\": 8.5,\n  \"cluster_status\": \"actionable|needs_review|manual_review_required\",\n  \"suggested_actions\": [ ...see Phase 4b... ]\n}\n\nThe theme is a one-line summary that describes what this cluster is about\n— the shared root cause or feature area. Generate it from the root_cause\nfields of the cluster's findings.\n\nActionability Scoring\n\nEach cluster gets a score based on these signals (clamp result to 0-10):\n\nSignalPointsWhy it mattersAll items open+3Can still be acted onAt least one high-confidence finding+2Strong evidenceMultiple findings in cluster+1More connections = more valueIssue has >5 reactions/comments+1High community interestPR is not draft+1Ready for reviewCluster has a clear canonical PR+1Easy to pick a winnerAny manual_review_required-2Needs human judgmentAll items closed-3Low urgency\n\nClusters scoring 7+ are actionable (green in report).\nClusters scoring 4-6 need review (yellow).\nClusters scoring 0-3 are low priority (gray)."
      },
      {
        "title": "Phase 3b: Evidence Verification",
        "body": "The batch subagents work from truncated bodies (500 chars) and compact indexes.\nThat's good enough for discovery but not for final decisions. This phase takes\nthe candidates and verifies them against deeper data.\n\nSpawn a single verification subagent (Sonnet) that:\n\nReads workspace/results-unverified.json\nFor each high/medium candidate, fetches deeper evidence via gh:\n\nDuplicate PRs: gh pr diff {id} --name-only for both PRs to confirm\nthey actually touch the same files. If the file lists don't overlap at all,\ndowngrade to low or remove.\nIssue→PR links: gh issue view {id} --json body,comments to read the\nfull issue body (not truncated) and check if any commenter already noted\nthe connection.\nFor both: gh pr view {id} --json body to read the full PR body\nwhen the truncated version was ambiguous.\n\n\nFor manual_review_required items: attempt to resolve with deeper data.\nIf still ambiguous after deep check, keep the flag — it goes to the user.\nUpgrades, downgrades, or removes candidates based on the deeper evidence.\nRecalculates cluster scores after confidence changes.\nWrites the verified results to workspace/results.json.\n\nVerification subagent prompt:\n\nYou are an evidence verification agent. You received candidate cross-references\nbetween GitHub PRs and issues from a discovery pass. Your job is to verify or\nreject each candidate using deeper data.\n\n## Principles\n- A candidate stays only if deeper evidence confirms the connection.\n- If file diffs don't overlap for duplicate PRs, downgrade or remove.\n- If the full issue body reveals the problem is actually different, remove.\n- If someone already commented the link, exclude the candidate from results entirely.\n- You may upgrade \"medium\" to \"high\" if deeper evidence is strong.\n- For \"manual_review_required\" items: try to resolve with the deeper data.\n  If you can confirm or deny, update status to \"confirmed\" with the new\n  confidence. If still ambiguous, keep \"manual_review_required\".\n- Add a \"verified_evidence\" field with what you found in the deep check.\n\n## Candidates to verify\n{contents of results-unverified.json}\n\n## Commands available\nRun these via bash to fetch deeper data:\n- gh pr diff {number} --name-only --repo {owner/repo}\n- gh pr view {number} --json body --repo {owner/repo}\n- gh issue view {number} --json body,comments --repo {owner/repo}\n\n## Output\nWrite verified results to {workspace}/results.json as a JSON array.\nSame structure as input, but with:\n- Updated confidence levels and status fields\n- Added \"verified_evidence\" field\n- Removed any candidates that didn't survive verification\n- Added \"verification_note\" for anything noteworthy\n\nThis phase catches false positives that slipped through the discovery phase.\nThe batch subagents are optimized for recall (find everything plausible); the\nverifier is optimized for precision (keep only what's real).\n\nSkip this phase if the total candidate count is under 5 — the cost of\nverification outweighs the benefit for small result sets."
      },
      {
        "title": "Phase 4: Generate Report",
        "body": "Present the report to the user organized by clusters, not flat pairs.\n\nReport structure:\n\n# Cross-Reference Report: {owner}/{repo}\n\n**Scanned**: {N} PRs, {M} issues\n**Found**: {X} clusters containing {Y} findings\n**Already linked**: {Z} existing references (skipped)\n**Mode**: plan (review only — no actions taken)\n\n## Clusters (sorted by actionability score)\n\n### Cluster 1: Onboard token mismatch (Score: 8.5 🟢)\n**Theme**: OPENCLAW_GATEWAY_TOKEN env var ignored during onboard setup\n**Items**: PR#22662 (@aiworks451), PR#22658 (@otherdev), Issue#22638\n**Status**: Actionable\n\n| Finding | Type | Confidence | Root Cause |\n|---------|------|------------|------------|\n| PR#22662 ↔ PR#22658 | duplicate_pr | high | Both fix token mismatch in onboard wizard |\n| PR#22658 → Issue#22638 | issue_link | high | PR explicitly closes the issue |\n\n**Suggested actions** (choose per cluster):\n- 💬 Comment on PR#22662 noting PR#22658 covers the same fix more broadly\n- 🏷️ Label PR#22662 as `duplicate`\n- ❌ Close PR#22662 as duplicate of PR#22658\n\n---\n\n### Cluster 2: i18n Portuguese translations (Score: 6.0 🟡)\n**Theme**: Competing pt-BR translation implementations\n**Items**: PR#22637 (@dev1), PR#22628 (@dev2)\n**Status**: Needs review — different approaches, human must choose\n\n| Finding | Type | Confidence | Root Cause |\n|---------|------|------------|------------|\n| PR#22637 ↔ PR#22628 | duplicate_pr | medium | Same feature, different implementations |\n\n**Suggested actions**:\n- 💬 Comment linking the two PRs for coordination\n- ⚠️ Manual review required: different i18n architectures, maintainer must decide\n\n---\n\n### ⚠️ Items Requiring Manual Review\n\nThese findings had ambiguous evidence that couldn't be resolved automatically:\n\n| Finding | Reason | What's Missing |\n|---------|--------|----------------|\n| PR#1234 ↔ Issue#5678 | Keyword overlap but no shared error path | Need to check if PR touches the auth module |\n\n## Summary\n- **Actionable clusters**: {count} (score 7+, ready for bulk action)\n- **Needs review**: {count} (score 4-6, human judgment needed)\n- **Manual review required**: {count} (ambiguous, flagged for human)\n- **Next step**: Choose actions per cluster, then select a commenting/action strategy."
      },
      {
        "title": "Phase 4b: Suggested Actions Per Cluster",
        "body": "For each cluster, suggest appropriate actions based on confidence and item states.\n\nFor duplicate PRs (high confidence, both open):\n\n💬 Comment — link the PRs so authors can coordinate\n🏷️ Label — add duplicate label to the weaker PR\n❌ Close — close the weaker PR as duplicate (only if very clear)\n\nFor duplicate PRs (one open, one closed):\n\n💬 Comment — note the connection for context (lower priority)\n\nFor issue→PR links (high confidence):\n\n💬 Comment on issue — note that a PR addresses this\n🏷️ Label issue — add has-pr or similar\n\nFor manual_review_required items:\n\n⚠️ Flag for human — present in a separate section, no automated action\n\nAction rules:\n\nNever suggest closing without high confidence + verification\nNever suggest labeling without at least medium confidence\nAlways suggest commenting as the minimum action (it's the safest)\nFor clusters with mixed confidence, suggest the action matching the\nlowest-confidence finding (conservative)"
      },
      {
        "title": "Phase 5: Interactive Action Strategy",
        "body": "After presenting the report, ask the user how they want to proceed.\nRead references/commenting-strategy.md for rate-limiting details.\n\nPresent action choices per cluster:\n\nFor each actionable cluster, let the user pick:\n\nComment only — just link the items\nComment + label — link and add labels\nComment + close — link and close duplicates (high confidence only)\nSkip — do nothing for this cluster\nManual — I'll handle this one myself\n\nThen present the timing strategy. Read references/commenting-strategy.md for\nthe full tier definitions, rate calculations, and daily budget math. Present\nthe user with the strategy table from that file, populated with the actual\ncounts from the report. If total actions exceed the daily budget, show the\nmulti-day plan as described in commenting-strategy.md.\n\nAlways offer Dry Run (report only, no actions) as the default choice.\nAlso offer Skip — save the report but don't act at all."
      },
      {
        "title": "Phase 6: Execute Actions",
        "body": "If the user chooses to act, build workspace/approved-comments.json and\nexecute with rate limiting via the shell script.\n\napproved-comments.json schema (array of objects):\n\n[\n  {\n    \"target_number\": 1234,\n    \"type\": \"issue_link|duplicate_pr\",\n    \"body\": \"The full comment text to post\",\n    \"cluster_id\": 1,\n    \"finding_index\": 0\n  }\n]\n\ntarget_number — the issue or PR number to comment on (used by post-comments.sh)\ntype — finding type, used for logging only\nbody — the complete comment text\ncluster_id and finding_index — traceability back to the report\n\nscripts/post-comments.sh <owner/repo> <workspace_dir> [jitter_min] [jitter_max] [daily_max]\n\nFor label and close actions, execute them inline (not via the script)\nsince they don't need the same rate limiting as comments:\n\n# Label (works for both issues and PRs — GitHub treats PRs as issues for labels)\ngh issue edit {number} --add-label duplicate --repo {owner/repo}\n# Close PR as duplicate (use heredoc for safe body passing)\ngh pr close {number} --comment \"$(cat <<'EOF'\nClosing in favor of #{canonical_pr_number} by @{canonical_author}, which covers the same change ({root_cause_sentence}).\n\nThanks for the contribution, @{closed_pr_author} — your work helped confirm this was worth fixing.\n\n_If this closure is wrong, reopen and let me know._\nEOF\n)\" --repo {owner/repo}\n\nAlways execute in this order within a cluster:\n\nPost comments first (so the context exists before close/label)\nAdd labels\nClose (only after comment is posted)\n\nComment style: Comments should feel like they're from a helpful maintainer,\nnot a bot. Vary the opener and closer for each comment to avoid sounding\nrepetitive. Always mention the PR author by name.\n\nComment templates (vary the opener each time):\n\nOpeners (rotate through these, never use the same one twice in a row):\n\n\"Heads up — this might be related.\"\n\"Worth a look:\"\n\"Noticed a possible connection here.\"\n\"This could be relevant to what you're working on.\"\n\nFor issue→PR links (comment on the issue):\n\n{opener}\n\nPR #{pr_number} by @{author} ({pr_title}) appears to address this issue.\n\n{root_cause_sentence}\n\n_If this doesn't look right, let me know and I'll correct the link._\n\nFor duplicate PRs (comment on the newer PR):\n\n{opener}\n\nPR #{other_pr_number} by @{other_author} ({other_pr_title}) seems to address\nthe same problem.\n\n{root_cause_sentence}\n\nBoth approaches have merit — might be worth coordinating.\n\n_If these aren't actually related, let me know and I'll correct this._\n\nEvery comment includes a correction path because wrong links erode trust.\n\nSave progress to workspace/comment-progress.json for resume support."
      },
      {
        "title": "Error Handling",
        "body": "API rate limit hit: Pause, show remaining reset time, save progress.\nSubagent returns invalid JSON: Log the error, skip that batch, warn user.\nDon't retry — the batch results are lost but other batches continue.\nPR/issue not found (deleted): Skip silently, note in report.\nNetwork error during commenting: Save progress immediately, offer resume.\nSubagent returns empty results: Normal — not every batch has links.\nClose/label fails: Log the error, continue with remaining actions.\nNever retry a close — the user should investigate manually."
      },
      {
        "title": "Workspace Structure",
        "body": "cross-ref-workspace/\n├── prs.json                  # Raw PR metadata\n├── issues.json               # Raw issue metadata\n├── pr-index.txt              # Compact PR index (one line per PR)\n├── issue-index.txt           # Compact issue index (one line per issue)\n├── existing-refs.json        # Pre-extracted explicit references\n├── batches/\n│   ├── batch-01-results.json # Subagent results per batch\n│   ├── batch-02-results.json\n│   └── ...\n├── results-unverified.json   # Raw merged findings (before verification)\n├── results.json              # Verified findings with clusters\n├── report.md                 # Human-readable report\n├── approved-comments.json    # Comments approved for posting\n├── comment-progress.json     # Commenting progress tracker\n└── pending-comments.json     # Links not yet commented (if day limit hit)"
      },
      {
        "title": "Resume Support",
        "body": "If a previous run exists in the workspace:\n\nPhase 1-3: Skip if results.json exists and user confirms\nPhase 4: Skip if report.md exists and user confirms\nPhase 5-6: Resume from comment-progress.json if commenting was interrupted\nAsk: \"Found a previous run with {N} results. Resume commenting or start fresh?\""
      },
      {
        "title": "Tips for Operators",
        "body": "Start with a smaller count (100 PRs, 100 issues) to validate before scaling\nAlways review the report in plan mode before executing actions\nThe compact index approach keeps memory usage manageable — don't fetch full\nPR bodies (500 char truncation is intentional)\nFor very active repos (>10K PRs), increase batch_size to reduce subagent count\nToken costs: ~20 subagent calls for 1000 PRs at batch_size=50, each with\n~120KB context. Plan accordingly.\nThe gh CLI token needs repo scope (private) or public_repo (public),\nplus issues:write for posting comments."
      }
    ],
    "body": "Cross-Ref: PR & Issue Linker\n\nYou find hidden connections between PRs and issues that humans miss at scale. The core loop is: fetch → analyze in parallel → cluster → verify → report → act.\n\nBefore doing anything, read references/principles.md. Those rules override everything in this file when there's a conflict.\n\nOverview\n\nRepos accumulate duplicate PRs and orphaned issue→PR links over time. Manual cross-referencing doesn't scale past a few dozen items. This skill uses parallel Sonnet subagents to analyze up to 1000 PRs and 1000 issues simultaneously, finding two kinds of links:\n\nDuplicate PRs — PRs that address the same bug or feature (even with different approaches or wording)\nIssue→PR links — Open issues that already have a PR solving them but no explicit \"fixes #N\" reference\n\nResults are grouped into thematic clusters, scored by actionability, and presented with available actions (comment, close, label) — not just as a flat list of pairs.\n\nConfiguration\n\nThe user provides these at invocation time (ask if not given):\n\nParameter\tDefault\tDescription\nrepo\t(ask)\tGitHub owner/repo to analyze\npr_count\t1000\tHow many recent PRs to scan\nissue_count\t1000\tHow many recent issues to scan\npr_state\tall\tPR state filter: open, closed, all\nissue_state\topen\tIssue state filter: open, closed, all\nbatch_size\t50\tPRs per subagent batch\nconfidence_threshold\tmedium\tMinimum confidence to include in report: low, medium, high\nmode\tplan\tplan = report only (default, always start here). execute = act on findings.\n\nDefault mode is plan (dry-run). The skill always starts by generating the report. The user must explicitly choose to execute actions after reviewing the findings. This matters because actions can't be undone.\n\nWorkflow\nPhase 1: Data Collection\n\nFetch PR and issue metadata from the GitHub API. This phase is deterministic and uses the shell script — no AI needed.\n\nscripts/fetch-data.sh <owner/repo> <workspace_dir> [pr_count] [issue_count] [pr_state] [issue_state]\n\n\nThis produces:\n\nworkspace/prs.json — Full PR metadata\nworkspace/issues.json — Full issue metadata (PRs filtered out)\nworkspace/existing-refs.json — Pre-extracted explicit cross-references\nworkspace/pr-index.txt — Compact one-line-per-PR index\nworkspace/issue-index.txt — Compact one-line-per-issue index\n\nThe existing references map captures what's already linked (via \"fixes #N\", \"closes #N\", etc.) so subagents can focus on what's missing.\n\nPhase 2: Parallel Analysis (Sonnet Subagents)\n\nThis is where the intelligence happens. Split PRs into batches and spawn parallel Sonnet subagents. Each subagent receives:\n\nIts batch of PRs (full metadata from prs.json, ~50 PRs)\nThe complete issue index (compact, ~60KB)\nThe complete PR index (compact, ~60KB) — for duplicate detection\nThe existing references map (so it skips already-linked items)\n\nSpawn subagents using the Task tool:\n\nFor each batch B of {batch_size} PRs:\n  Task(\n    subagent_type=\"general-purpose\",\n    model=\"sonnet\",\n    prompt=<see below>\n  )\n\n\nSubagent prompt template:\n\nImportant: When building each subagent prompt, paste the FULL contents of references/principles.md into the \"Decision Principles\" section below. Do not summarize or condense — include the complete text. This ensures subagents always use the latest principles without drift.\n\nYou are a cross-reference analyst for a GitHub repository. Your job is to find\nconnections between PRs and issues that aren't explicitly linked yet.\n\n## Decision Principles (these override everything else)\n\n{paste full contents of references/principles.md here}\n\n## Your Batch\nYou are analyzing PRs {start_num} through {end_num} of {total_prs}.\n\n## PR Details (your batch)\n{full PR metadata for this batch from prs.json}\n\n## Complete Issue Index\n{issue-index.txt content}\n\n## Complete PR Index\n{pr-index.txt content}\n\n## Already Known References\n{existing-refs.json content}\n\n## Your Task\n\nFind TWO types of connections:\n\n### 1. Issue→PR Links\nFor each PR in your batch, determine if it resolves any issue in the index.\nEvidence must include at least one of:\n- Same error message or failure path described in both\n- PR modifies the component/module that the issue describes as broken\n- PR body explicitly references the problem the issue describes (even without #N)\n\nTitle similarity alone is NOT sufficient. Skip any links that already exist\nin the known references.\n\n### 2. Duplicate PRs\nFor each PR in your batch, check if any OTHER PR in the full PR index\naddresses the same problem. Evidence must include at least one of:\n- Both modify the same files for the same reason\n- Both fix the same error/behavior (even with different approaches)\n- One is a resubmission or continuation of the other (same branch, similar body)\n\nSame area of code is NOT enough — the PRs must address the same specific problem.\n\n### 3. Flagging Uncertainty\n\nIf you encounter a pair where the evidence is ambiguous — you can see a\nplausible connection but can't confirm it from the available data — mark it\nwith `\"status\": \"manual_review_required\"` instead of guessing a confidence\nlevel. Include what's missing (e.g., \"need to see full diff to confirm\nfile overlap\").\n\n### Output Format\nReturn ONLY a JSON array. No other text.\n\n[\n  {\n    \"type\": \"issue_link\",\n    \"pr\": 5678,\n    \"pr_author\": \"@username\",\n    \"issue\": 1234,\n    \"confidence\": \"high|medium|low\",\n    \"status\": \"confirmed|manual_review_required\",\n    \"root_cause\": \"One sentence: what shared problem connects these\",\n    \"evidence\": \"Specific: same error message, same file, same component, etc.\",\n    \"missing_evidence\": null or \"What would be needed to confirm this\"\n  },\n  {\n    \"type\": \"duplicate_pr\",\n    \"pr_a\": 5678,\n    \"pr_b\": 5679,\n    \"pr_a_author\": \"@username_a\",\n    \"pr_b_author\": \"@username_b\",\n    \"confidence\": \"high|medium|low\",\n    \"status\": \"confirmed|manual_review_required\",\n    \"root_cause\": \"One sentence: what shared problem connects these\",\n    \"evidence\": \"Specific: same files modified, same branch, resubmission, etc.\",\n    \"missing_evidence\": null or \"What would be needed to confirm this\"\n  }\n]\n\n\nParallelism: Spawn ALL batch subagents simultaneously. With batch_size=50 and 1000 PRs, that's 20 parallel subagents. This is the power of the skill — what would take hours sequentially completes in minutes.\n\nPhase 3: Merge, Deduplicate & Cluster\n\nAfter all subagents return:\n\nCollect all JSON results into a single array\nDeduplicate duplicate_pr entries (A→B and B→A are the same link)\nMerge confidence — if two subagents found the same link, take the higher confidence and merge both evidence strings\nFilter by confidence_threshold\nBuild clusters — group related findings into thematic clusters (see below)\nScore clusters by actionability (see below)\nSort clusters by score (highest first)\n\nSave to workspace/results-unverified.json.\n\nClustering Algorithm\n\nInstead of reporting isolated pairs, group connected findings into clusters. Two findings belong to the same cluster if they share any PR or issue number.\n\nExample: If you find PR#100 ↔ PR#101 (duplicate) and PR#100 ↔ Issue#50 (link), these form a single cluster: \"Cluster: Issue#50 + PR#100 + PR#101\".\n\nCluster structure:\n\n{\n  \"cluster_id\": 1,\n  \"theme\": \"Onboard token mismatch — OPENCLAW_GATEWAY_TOKEN ignored\",\n  \"items\": [\"PR#22662\", \"PR#22658\", \"Issue#22638\"],\n  \"findings\": [ ...individual findings in this cluster... ],\n  \"score\": 8.5,\n  \"cluster_status\": \"actionable|needs_review|manual_review_required\",\n  \"suggested_actions\": [ ...see Phase 4b... ]\n}\n\n\nThe theme is a one-line summary that describes what this cluster is about — the shared root cause or feature area. Generate it from the root_cause fields of the cluster's findings.\n\nActionability Scoring\n\nEach cluster gets a score based on these signals (clamp result to 0-10):\n\nSignal\tPoints\tWhy it matters\nAll items open\t+3\tCan still be acted on\nAt least one high-confidence finding\t+2\tStrong evidence\nMultiple findings in cluster\t+1\tMore connections = more value\nIssue has >5 reactions/comments\t+1\tHigh community interest\nPR is not draft\t+1\tReady for review\nCluster has a clear canonical PR\t+1\tEasy to pick a winner\nAny manual_review_required\t-2\tNeeds human judgment\nAll items closed\t-3\tLow urgency\n\nClusters scoring 7+ are actionable (green in report). Clusters scoring 4-6 need review (yellow). Clusters scoring 0-3 are low priority (gray).\n\nPhase 3b: Evidence Verification\n\nThe batch subagents work from truncated bodies (500 chars) and compact indexes. That's good enough for discovery but not for final decisions. This phase takes the candidates and verifies them against deeper data.\n\nSpawn a single verification subagent (Sonnet) that:\n\nReads workspace/results-unverified.json\nFor each high/medium candidate, fetches deeper evidence via gh:\nDuplicate PRs: gh pr diff {id} --name-only for both PRs to confirm they actually touch the same files. If the file lists don't overlap at all, downgrade to low or remove.\nIssue→PR links: gh issue view {id} --json body,comments to read the full issue body (not truncated) and check if any commenter already noted the connection.\nFor both: gh pr view {id} --json body to read the full PR body when the truncated version was ambiguous.\nFor manual_review_required items: attempt to resolve with deeper data. If still ambiguous after deep check, keep the flag — it goes to the user.\nUpgrades, downgrades, or removes candidates based on the deeper evidence.\nRecalculates cluster scores after confidence changes.\nWrites the verified results to workspace/results.json.\n\nVerification subagent prompt:\n\nYou are an evidence verification agent. You received candidate cross-references\nbetween GitHub PRs and issues from a discovery pass. Your job is to verify or\nreject each candidate using deeper data.\n\n## Principles\n- A candidate stays only if deeper evidence confirms the connection.\n- If file diffs don't overlap for duplicate PRs, downgrade or remove.\n- If the full issue body reveals the problem is actually different, remove.\n- If someone already commented the link, exclude the candidate from results entirely.\n- You may upgrade \"medium\" to \"high\" if deeper evidence is strong.\n- For \"manual_review_required\" items: try to resolve with the deeper data.\n  If you can confirm or deny, update status to \"confirmed\" with the new\n  confidence. If still ambiguous, keep \"manual_review_required\".\n- Add a \"verified_evidence\" field with what you found in the deep check.\n\n## Candidates to verify\n{contents of results-unverified.json}\n\n## Commands available\nRun these via bash to fetch deeper data:\n- gh pr diff {number} --name-only --repo {owner/repo}\n- gh pr view {number} --json body --repo {owner/repo}\n- gh issue view {number} --json body,comments --repo {owner/repo}\n\n## Output\nWrite verified results to {workspace}/results.json as a JSON array.\nSame structure as input, but with:\n- Updated confidence levels and status fields\n- Added \"verified_evidence\" field\n- Removed any candidates that didn't survive verification\n- Added \"verification_note\" for anything noteworthy\n\n\nThis phase catches false positives that slipped through the discovery phase. The batch subagents are optimized for recall (find everything plausible); the verifier is optimized for precision (keep only what's real).\n\nSkip this phase if the total candidate count is under 5 — the cost of verification outweighs the benefit for small result sets.\n\nPhase 4: Generate Report\n\nPresent the report to the user organized by clusters, not flat pairs.\n\nReport structure:\n\n# Cross-Reference Report: {owner}/{repo}\n\n**Scanned**: {N} PRs, {M} issues\n**Found**: {X} clusters containing {Y} findings\n**Already linked**: {Z} existing references (skipped)\n**Mode**: plan (review only — no actions taken)\n\n## Clusters (sorted by actionability score)\n\n### Cluster 1: Onboard token mismatch (Score: 8.5 🟢)\n**Theme**: OPENCLAW_GATEWAY_TOKEN env var ignored during onboard setup\n**Items**: PR#22662 (@aiworks451), PR#22658 (@otherdev), Issue#22638\n**Status**: Actionable\n\n| Finding | Type | Confidence | Root Cause |\n|---------|------|------------|------------|\n| PR#22662 ↔ PR#22658 | duplicate_pr | high | Both fix token mismatch in onboard wizard |\n| PR#22658 → Issue#22638 | issue_link | high | PR explicitly closes the issue |\n\n**Suggested actions** (choose per cluster):\n- 💬 Comment on PR#22662 noting PR#22658 covers the same fix more broadly\n- 🏷️ Label PR#22662 as `duplicate`\n- ❌ Close PR#22662 as duplicate of PR#22658\n\n---\n\n### Cluster 2: i18n Portuguese translations (Score: 6.0 🟡)\n**Theme**: Competing pt-BR translation implementations\n**Items**: PR#22637 (@dev1), PR#22628 (@dev2)\n**Status**: Needs review — different approaches, human must choose\n\n| Finding | Type | Confidence | Root Cause |\n|---------|------|------------|------------|\n| PR#22637 ↔ PR#22628 | duplicate_pr | medium | Same feature, different implementations |\n\n**Suggested actions**:\n- 💬 Comment linking the two PRs for coordination\n- ⚠️ Manual review required: different i18n architectures, maintainer must decide\n\n---\n\n### ⚠️ Items Requiring Manual Review\n\nThese findings had ambiguous evidence that couldn't be resolved automatically:\n\n| Finding | Reason | What's Missing |\n|---------|--------|----------------|\n| PR#1234 ↔ Issue#5678 | Keyword overlap but no shared error path | Need to check if PR touches the auth module |\n\n## Summary\n- **Actionable clusters**: {count} (score 7+, ready for bulk action)\n- **Needs review**: {count} (score 4-6, human judgment needed)\n- **Manual review required**: {count} (ambiguous, flagged for human)\n- **Next step**: Choose actions per cluster, then select a commenting/action strategy.\n\nPhase 4b: Suggested Actions Per Cluster\n\nFor each cluster, suggest appropriate actions based on confidence and item states.\n\nFor duplicate PRs (high confidence, both open):\n\n💬 Comment — link the PRs so authors can coordinate\n🏷️ Label — add duplicate label to the weaker PR\n❌ Close — close the weaker PR as duplicate (only if very clear)\n\nFor duplicate PRs (one open, one closed):\n\n💬 Comment — note the connection for context (lower priority)\n\nFor issue→PR links (high confidence):\n\n💬 Comment on issue — note that a PR addresses this\n🏷️ Label issue — add has-pr or similar\n\nFor manual_review_required items:\n\n⚠️ Flag for human — present in a separate section, no automated action\n\nAction rules:\n\nNever suggest closing without high confidence + verification\nNever suggest labeling without at least medium confidence\nAlways suggest commenting as the minimum action (it's the safest)\nFor clusters with mixed confidence, suggest the action matching the lowest-confidence finding (conservative)\nPhase 5: Interactive Action Strategy\n\nAfter presenting the report, ask the user how they want to proceed. Read references/commenting-strategy.md for rate-limiting details.\n\nPresent action choices per cluster:\n\nFor each actionable cluster, let the user pick:\n\nComment only — just link the items\nComment + label — link and add labels\nComment + close — link and close duplicates (high confidence only)\nSkip — do nothing for this cluster\nManual — I'll handle this one myself\n\nThen present the timing strategy. Read references/commenting-strategy.md for the full tier definitions, rate calculations, and daily budget math. Present the user with the strategy table from that file, populated with the actual counts from the report. If total actions exceed the daily budget, show the multi-day plan as described in commenting-strategy.md.\n\nAlways offer Dry Run (report only, no actions) as the default choice. Also offer Skip — save the report but don't act at all.\n\nPhase 6: Execute Actions\n\nIf the user chooses to act, build workspace/approved-comments.json and execute with rate limiting via the shell script.\n\napproved-comments.json schema (array of objects):\n\n[\n  {\n    \"target_number\": 1234,\n    \"type\": \"issue_link|duplicate_pr\",\n    \"body\": \"The full comment text to post\",\n    \"cluster_id\": 1,\n    \"finding_index\": 0\n  }\n]\n\ntarget_number — the issue or PR number to comment on (used by post-comments.sh)\ntype — finding type, used for logging only\nbody — the complete comment text\ncluster_id and finding_index — traceability back to the report\nscripts/post-comments.sh <owner/repo> <workspace_dir> [jitter_min] [jitter_max] [daily_max]\n\n\nFor label and close actions, execute them inline (not via the script) since they don't need the same rate limiting as comments:\n\n# Label (works for both issues and PRs — GitHub treats PRs as issues for labels)\ngh issue edit {number} --add-label duplicate --repo {owner/repo}\n# Close PR as duplicate (use heredoc for safe body passing)\ngh pr close {number} --comment \"$(cat <<'EOF'\nClosing in favor of #{canonical_pr_number} by @{canonical_author}, which covers the same change ({root_cause_sentence}).\n\nThanks for the contribution, @{closed_pr_author} — your work helped confirm this was worth fixing.\n\n_If this closure is wrong, reopen and let me know._\nEOF\n)\" --repo {owner/repo}\n\n\nAlways execute in this order within a cluster:\n\nPost comments first (so the context exists before close/label)\nAdd labels\nClose (only after comment is posted)\n\nComment style: Comments should feel like they're from a helpful maintainer, not a bot. Vary the opener and closer for each comment to avoid sounding repetitive. Always mention the PR author by name.\n\nComment templates (vary the opener each time):\n\nOpeners (rotate through these, never use the same one twice in a row):\n\n\"Heads up — this might be related.\"\n\"Worth a look:\"\n\"Noticed a possible connection here.\"\n\"This could be relevant to what you're working on.\"\n\nFor issue→PR links (comment on the issue):\n\n{opener}\n\nPR #{pr_number} by @{author} ({pr_title}) appears to address this issue.\n\n{root_cause_sentence}\n\n_If this doesn't look right, let me know and I'll correct the link._\n\n\nFor duplicate PRs (comment on the newer PR):\n\n{opener}\n\nPR #{other_pr_number} by @{other_author} ({other_pr_title}) seems to address\nthe same problem.\n\n{root_cause_sentence}\n\nBoth approaches have merit — might be worth coordinating.\n\n_If these aren't actually related, let me know and I'll correct this._\n\n\nEvery comment includes a correction path because wrong links erode trust.\n\nSave progress to workspace/comment-progress.json for resume support.\n\nError Handling\nAPI rate limit hit: Pause, show remaining reset time, save progress.\nSubagent returns invalid JSON: Log the error, skip that batch, warn user. Don't retry — the batch results are lost but other batches continue.\nPR/issue not found (deleted): Skip silently, note in report.\nNetwork error during commenting: Save progress immediately, offer resume.\nSubagent returns empty results: Normal — not every batch has links.\nClose/label fails: Log the error, continue with remaining actions. Never retry a close — the user should investigate manually.\nWorkspace Structure\ncross-ref-workspace/\n├── prs.json                  # Raw PR metadata\n├── issues.json               # Raw issue metadata\n├── pr-index.txt              # Compact PR index (one line per PR)\n├── issue-index.txt           # Compact issue index (one line per issue)\n├── existing-refs.json        # Pre-extracted explicit references\n├── batches/\n│   ├── batch-01-results.json # Subagent results per batch\n│   ├── batch-02-results.json\n│   └── ...\n├── results-unverified.json   # Raw merged findings (before verification)\n├── results.json              # Verified findings with clusters\n├── report.md                 # Human-readable report\n├── approved-comments.json    # Comments approved for posting\n├── comment-progress.json     # Commenting progress tracker\n└── pending-comments.json     # Links not yet commented (if day limit hit)\n\nResume Support\n\nIf a previous run exists in the workspace:\n\nPhase 1-3: Skip if results.json exists and user confirms\nPhase 4: Skip if report.md exists and user confirms\nPhase 5-6: Resume from comment-progress.json if commenting was interrupted\nAsk: \"Found a previous run with {N} results. Resume commenting or start fresh?\"\nTips for Operators\nStart with a smaller count (100 PRs, 100 issues) to validate before scaling\nAlways review the report in plan mode before executing actions\nThe compact index approach keeps memory usage manageable — don't fetch full PR bodies (500 char truncation is intentional)\nFor very active repos (>10K PRs), increase batch_size to reduce subagent count\nToken costs: ~20 subagent calls for 1000 PRs at batch_size=50, each with ~120KB context. Plan accordingly.\nThe gh CLI token needs repo scope (private) or public_repo (public), plus issues:write for posting comments."
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/Glucksberg/cross-ref",
    "publisherUrl": "https://clawhub.ai/Glucksberg/cross-ref",
    "owner": "Glucksberg",
    "version": "1.1.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/cross-ref",
    "downloadUrl": "https://openagent3.xyz/downloads/cross-ref",
    "agentUrl": "https://openagent3.xyz/skills/cross-ref/agent",
    "manifestUrl": "https://openagent3.xyz/skills/cross-ref/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/cross-ref/agent.md"
  }
}