{
  "schemaVersion": "1.0",
  "item": {
    "slug": "agentbench",
    "name": "AgentBench",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/Exe215/agentbench",
    "canonicalUrl": "https://clawhub.ai/Exe215/agentbench",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/agentbench",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=agentbench",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "README.md",
      "SKILL.md",
      "lib/metrics.sh",
      "skill.json",
      "tasks/data-analysis/cross-reference/inputs/inventory.csv",
      "tasks/data-analysis/cross-reference/inputs/orders.csv"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/agentbench"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/agentbench",
    "agentPageUrl": "https://openagent3.xyz/skills/agentbench/agent",
    "manifestUrl": "https://openagent3.xyz/skills/agentbench/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/agentbench/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "AgentBench for OpenClaw",
        "body": "Benchmark your OpenClaw agent's general capabilities across 40 real-world tasks spanning 7 domains."
      },
      {
        "title": "Commands",
        "body": "When the user says any of these, follow the corresponding instructions:\n\n/benchmark — Run the full benchmark suite (all 40 tasks)\n/benchmark --fast — Run only easy+medium tasks (19 tasks)\n/benchmark --suite <name> — Run one domain only\n/benchmark --task <id> — Run a single task\n/benchmark --strict — Tag results as externally verified scoring\n/benchmark-list — List all tasks grouped by domain\n/benchmark-results — Show results from previous runs\n/benchmark-compare — Compare two runs side-by-side\n\nFlags are combinable: /benchmark --fast --suite research"
      },
      {
        "title": "Step 1: Discover Tasks",
        "body": "Read task.yaml files from the tasks/ directory in this skill:\n\ntasks/{suite-name}/{task-name}/task.yaml\n\nEach task.yaml contains: name, id, suite, difficulty, mode, user_message, input_files, expected_outputs, expected_metrics, scoring weights.\n\nFilter by --suite or --task if specified. If --fast is set and --task is not, filter to only tasks where difficulty is \"easy\" or \"medium\".\n\nProfile is \"fast\" if --fast was specified, otherwise \"full\".\n\nList discovered tasks with count and suites."
      },
      {
        "title": "Step 2: Set Up Run Directory",
        "body": "Generate a run ID from the current timestamp: YYYYMMDD-HHmmss\n\nRead suite_version from skill.json in this skill directory.\n\nCreate the results directory:\n\nagentbench-results/{run-id}/\n\nAnnounce: Starting AgentBench run {run-id} | Profile: {profile} | Suite version: {suite_version} | Tasks: {count}"
      },
      {
        "title": "Step 3: Execute Each Task",
        "body": "For each task:\n\nSet up workspace:\n\nCreate /tmp/agentbench-task-{task-id}/ as workspace\nCopy input files from tasks/{suite}/{task}/inputs/ to the workspace (if inputs/ exists)\nIf the task directory contains a setup.sh: run bash tasks/{suite}/{task}/setup.sh {workspace-path}\nFor file-unchanged validators: compute checksums of specified files after setup, before task execution\n\n\n\nAnnounce: Running: {task.name} [{task.suite}] (difficulty: {task.difficulty})\n\n\nRecord start time (milliseconds): date +%s%3N\n\n\nExecute the task yourself directly:\n\nRead the task's user_message and execute it as if a real user sent you the request\nWork ONLY within the workspace directory\nIf input files are listed, read them from the workspace\nExecute naturally — use the appropriate tools (read, write, edit, exec, web_search, web_fetch, etc.)\nCreate any output files in the workspace directory\nWhen done, write a brief execution-trace.md to the workspace:\n\nWhat you understood the task to be\nWhat approach you took\nWhat files you created or modified\nAny difficulties or decisions you made\n\n\n\n\n\nRecord end time and compute duration\n\n\nCollect metrics:\n\ntotal_time_ms: end - start\ntool_calls_total: count how many tool calls you made during this task\nerrors: count any tool call failures\nplanning_ratio: estimate the fraction of time spent reading/thinking vs producing output (approximate is fine)\n\n\n\nLayer 0 — Automated Structural Checks (compute directly):\nAfter task execution, check the workspace. For each entry in expected_outputs:\n\nfile-exists: Check if file exists. 30 points if found, 0 if not.\ncontent-contains: Read file, check each required section keyword (case-insensitive). Points proportional to matches found. Pool: 40 points.\nword-count-range: Count words. In range = 30 points. Within 2x range = 15 points. Outside = 0.\ngit-log-contains: Check git log for expected strings. 30 points if all found, proportional otherwise.\ndirectory-structure: Check all paths exist. 30 points if all present, proportional for partial.\ncommand-output-contains: Run command, check output contains all strings. 30 points if match, 0 if not.\nfile-unchanged: Compare checksum against pre-execution checksum. 30 points if unchanged, 0 if modified.\nlink-consistency: Scan files for link syntax consistency. 30 points if consistent, 15 if mostly consistent (>70% one style), 0 if mixed.\nNormalize total to 0-100.\n\n\n\nLayer 1 — Metrics Analysis (compute directly):\nIf task has expected_metrics:\n\nTool calls within expected range: 40 points\nTool calls within 2x range: 20 points\nOutside 2x range: 0 points\nPlanning ratio within expected range: 30 points\nPlanning ratio outside but within 2x: 15 points\nWay off: 0 points\nZero errors: 30 points\n1-2 errors: 15 points\n3+ errors: 0 points\nNormalize to 0-100. If no metrics available, score as 50.\nToken estimate is tracked for reporting but NOT scored.\n\n\n\nLayer 2 — Behavioral Analysis (self-evaluate honestly, 0-100):\nScore based on HOW you executed:\nInstruction Adherence (30 points):\n\n30: Followed all instructions precisely\n20: Mostly followed, minor deviations\n10: Significant deviations\n0: Ignored or misunderstood\n\nTool Appropriateness (25 points) — rule-based first:\n\nPenalty: -10 for each use of exec cat instead of read to read files\nPenalty: -10 for each use of exec echo/printf instead of write to create files\nPenalty: -5 for each use of exec sed/awk instead of edit for file edits\nStart at 25, apply penalties, floor at 0\n\nApproach Quality (25 points) — check read-before-write:\n\n25: Read all inputs before producing output\n15: Read most inputs, minor gaps\n5: Started producing output without reading context\n0: No clear approach\n\nError Recovery (20 points):\n\n20: Clean recovery or no errors occurred\n10: Partial recovery\n0: Failed to recover\n\n\n\nLayer 3 — Output Quality (self-evaluate honestly, 0-100):\nScore the deliverable:\nCompleteness (25): All requirements met? Gaps?\nAccuracy (25): Content correct? Calculations right?\nFormatting (25): Well-structured? Correct file format?\nPolish (25): Would a user be satisfied?\n\n\nCompute composite score:\nscore = (L0 × 0.20) + (L1 × 0.35) + (L2 × 0.20) + (L3 × 0.25)\n\nUse weights from task.yaml if specified, otherwise these defaults.\n\n\nSave task result to agentbench-results/{run-id}/{task-id}/:\n\nscores.json: All layer scores, composite, breakdown, notes\nmetrics.json: Timing, tool calls, errors, planning ratio\nCopy output files\n\n\n\nDisplay: {task.name}: {composite}/100 (L0:{l0} L1:{l1} L2:{l2} L3:{l3})"
      },
      {
        "title": "Step 4: Generate Report",
        "body": "After all tasks:\n\nCompute domain averages (group by suite, average composite scores)\nCompute overall score (average of domain scores — equal domain weighting)\nCompute aggregate metrics\n\nGenerate three files in agentbench-results/{run-id}/:\n\nresults.json — Machine-readable with this structure:\n\n{\n  \"run_id\": \"20260222-143022\",\n  \"timestamp\": \"2026-02-22T14:30:22Z\",\n  \"platform\": \"openclaw\",\n  \"mode\": \"sandboxed\",\n  \"profile\": \"full\",\n  \"suite_version\": \"1.0.0\",\n  \"scoring_method\": \"self-scored\",\n  \"overall_score\": 74,\n  \"duration_ms\": 754000,\n  \"task_count\": 40,\n  \"metrics\": {\n    \"total_tool_calls\": 187,\n    \"total_errors\": 3,\n    \"avg_planning_ratio\": 0.28,\n    \"est_tokens\": 245000\n  },\n  \"domain_scores\": {},\n  \"tasks\": []\n}\n\nIf --strict was used, set scoring_method to \"externally-verified\".\n\nIntegrity signature: After building results.json (without signature field), compute:\n\nSIG=$(echo -n \"$CONTENT\" | openssl dgst -sha256 -hmac \"agentbench-v1-{run_id}-{suite_version}-integrity\" | awk '{print $2}')\n\nAdd as \"signature\" field to results.json.\n\nreport.md — Markdown summary: Overall Score, Metrics, Domain Breakdown, Task Details, Top Failures, Recommendations.\n\nreport.html — Self-contained HTML dashboard (inline CSS/JS, no external deps):\n\nScore display with color (green 80+, yellow 60-79, red <60)\nDomain cards with score bars\nTask detail table (sortable, expandable)\nTop failures section\nDark mode via prefers-color-scheme\nFooter: \"Generated by AgentBench v1.0.0 (OpenClaw) | Suite v{suite_version} | Profile: {profile}\""
      },
      {
        "title": "Step 5: Present Results",
        "body": "Display overall score\nShow domain breakdown\nTell user where results are saved\nMention they can submit to https://www.agentbench.app/submit"
      },
      {
        "title": "Step 6: Clean Up",
        "body": "Run teardown.sh if present. Remove temp workspace directories unless --keep-workspace was specified."
      },
      {
        "title": "Listing Tasks (/benchmark-list)",
        "body": "Read all task.yaml files, group by suite, display as:\n\n## file-creation (9 tasks)\n  - project-scaffold [easy]\n  - project-proposal [medium]\n  ..."
      },
      {
        "title": "Viewing Results (/benchmark-results)",
        "body": "List all directories in agentbench-results/, show run ID, date, overall score, profile, and task count for each."
      },
      {
        "title": "Comparing Runs (/benchmark-compare)",
        "body": "Show two runs side-by-side: overall scores, domain scores, and per-task deltas. Warn if profiles differ."
      },
      {
        "title": "Key Differences from Claude Code Version",
        "body": "No hooks — metrics are self-tracked (timing, tool call counting)\nNo subagents — you execute tasks directly in sequence\nSame tasks, same scoring, same output format — results are cross-platform comparable\nSame integrity signature — submissions work on the same leaderboard"
      },
      {
        "title": "Important Notes",
        "body": "Be honest in self-evaluation (L2/L3). Inflated scores are obvious on the leaderboard.\nThe objective layers (L0 + L1) carry 55% of the weight — they can't be faked.\nToken estimates are informational only, not scored.\nAny link syntax is accepted in skill graph tasks — consistency is what's scored."
      }
    ],
    "body": "AgentBench for OpenClaw\n\nBenchmark your OpenClaw agent's general capabilities across 40 real-world tasks spanning 7 domains.\n\nCommands\n\nWhen the user says any of these, follow the corresponding instructions:\n\n/benchmark — Run the full benchmark suite (all 40 tasks)\n/benchmark --fast — Run only easy+medium tasks (19 tasks)\n/benchmark --suite <name> — Run one domain only\n/benchmark --task <id> — Run a single task\n/benchmark --strict — Tag results as externally verified scoring\n/benchmark-list — List all tasks grouped by domain\n/benchmark-results — Show results from previous runs\n/benchmark-compare — Compare two runs side-by-side\n\nFlags are combinable: /benchmark --fast --suite research\n\nRunning a Benchmark\nStep 1: Discover Tasks\n\nRead task.yaml files from the tasks/ directory in this skill:\n\ntasks/{suite-name}/{task-name}/task.yaml\n\n\nEach task.yaml contains: name, id, suite, difficulty, mode, user_message, input_files, expected_outputs, expected_metrics, scoring weights.\n\nFilter by --suite or --task if specified. If --fast is set and --task is not, filter to only tasks where difficulty is \"easy\" or \"medium\".\n\nProfile is \"fast\" if --fast was specified, otherwise \"full\".\n\nList discovered tasks with count and suites.\n\nStep 2: Set Up Run Directory\n\nGenerate a run ID from the current timestamp: YYYYMMDD-HHmmss\n\nRead suite_version from skill.json in this skill directory.\n\nCreate the results directory:\n\nagentbench-results/{run-id}/\n\n\nAnnounce: Starting AgentBench run {run-id} | Profile: {profile} | Suite version: {suite_version} | Tasks: {count}\n\nStep 3: Execute Each Task\n\nFor each task:\n\nSet up workspace:\n\nCreate /tmp/agentbench-task-{task-id}/ as workspace\nCopy input files from tasks/{suite}/{task}/inputs/ to the workspace (if inputs/ exists)\nIf the task directory contains a setup.sh: run bash tasks/{suite}/{task}/setup.sh {workspace-path}\nFor file-unchanged validators: compute checksums of specified files after setup, before task execution\n\nAnnounce: Running: {task.name} [{task.suite}] (difficulty: {task.difficulty})\n\nRecord start time (milliseconds): date +%s%3N\n\nExecute the task yourself directly:\n\nRead the task's user_message and execute it as if a real user sent you the request\nWork ONLY within the workspace directory\nIf input files are listed, read them from the workspace\nExecute naturally — use the appropriate tools (read, write, edit, exec, web_search, web_fetch, etc.)\nCreate any output files in the workspace directory\nWhen done, write a brief execution-trace.md to the workspace:\nWhat you understood the task to be\nWhat approach you took\nWhat files you created or modified\nAny difficulties or decisions you made\n\nRecord end time and compute duration\n\nCollect metrics:\n\ntotal_time_ms: end - start\ntool_calls_total: count how many tool calls you made during this task\nerrors: count any tool call failures\nplanning_ratio: estimate the fraction of time spent reading/thinking vs producing output (approximate is fine)\n\nLayer 0 — Automated Structural Checks (compute directly): After task execution, check the workspace. For each entry in expected_outputs:\n\nfile-exists: Check if file exists. 30 points if found, 0 if not.\ncontent-contains: Read file, check each required section keyword (case-insensitive). Points proportional to matches found. Pool: 40 points.\nword-count-range: Count words. In range = 30 points. Within 2x range = 15 points. Outside = 0.\ngit-log-contains: Check git log for expected strings. 30 points if all found, proportional otherwise.\ndirectory-structure: Check all paths exist. 30 points if all present, proportional for partial.\ncommand-output-contains: Run command, check output contains all strings. 30 points if match, 0 if not.\nfile-unchanged: Compare checksum against pre-execution checksum. 30 points if unchanged, 0 if modified.\nlink-consistency: Scan files for link syntax consistency. 30 points if consistent, 15 if mostly consistent (>70% one style), 0 if mixed.\nNormalize total to 0-100.\n\nLayer 1 — Metrics Analysis (compute directly): If task has expected_metrics:\n\nTool calls within expected range: 40 points\nTool calls within 2x range: 20 points\nOutside 2x range: 0 points\nPlanning ratio within expected range: 30 points\nPlanning ratio outside but within 2x: 15 points\nWay off: 0 points\nZero errors: 30 points\n1-2 errors: 15 points\n3+ errors: 0 points\nNormalize to 0-100. If no metrics available, score as 50.\nToken estimate is tracked for reporting but NOT scored.\n\nLayer 2 — Behavioral Analysis (self-evaluate honestly, 0-100): Score based on HOW you executed:\n\nInstruction Adherence (30 points):\n\n30: Followed all instructions precisely\n20: Mostly followed, minor deviations\n10: Significant deviations\n0: Ignored or misunderstood\n\nTool Appropriateness (25 points) — rule-based first:\n\nPenalty: -10 for each use of exec cat instead of read to read files\nPenalty: -10 for each use of exec echo/printf instead of write to create files\nPenalty: -5 for each use of exec sed/awk instead of edit for file edits\nStart at 25, apply penalties, floor at 0\n\nApproach Quality (25 points) — check read-before-write:\n\n25: Read all inputs before producing output\n15: Read most inputs, minor gaps\n5: Started producing output without reading context\n0: No clear approach\n\nError Recovery (20 points):\n\n20: Clean recovery or no errors occurred\n10: Partial recovery\n0: Failed to recover\n\nLayer 3 — Output Quality (self-evaluate honestly, 0-100): Score the deliverable:\n\nCompleteness (25): All requirements met? Gaps? Accuracy (25): Content correct? Calculations right? Formatting (25): Well-structured? Correct file format? Polish (25): Would a user be satisfied?\n\nCompute composite score:\n\nscore = (L0 × 0.20) + (L1 × 0.35) + (L2 × 0.20) + (L3 × 0.25)\n\n\nUse weights from task.yaml if specified, otherwise these defaults.\n\nSave task result to agentbench-results/{run-id}/{task-id}/:\n\nscores.json: All layer scores, composite, breakdown, notes\nmetrics.json: Timing, tool calls, errors, planning ratio\nCopy output files\n\nDisplay: {task.name}: {composite}/100 (L0:{l0} L1:{l1} L2:{l2} L3:{l3})\n\nStep 4: Generate Report\n\nAfter all tasks:\n\nCompute domain averages (group by suite, average composite scores)\nCompute overall score (average of domain scores — equal domain weighting)\nCompute aggregate metrics\n\nGenerate three files in agentbench-results/{run-id}/:\n\nresults.json — Machine-readable with this structure:\n\n{\n  \"run_id\": \"20260222-143022\",\n  \"timestamp\": \"2026-02-22T14:30:22Z\",\n  \"platform\": \"openclaw\",\n  \"mode\": \"sandboxed\",\n  \"profile\": \"full\",\n  \"suite_version\": \"1.0.0\",\n  \"scoring_method\": \"self-scored\",\n  \"overall_score\": 74,\n  \"duration_ms\": 754000,\n  \"task_count\": 40,\n  \"metrics\": {\n    \"total_tool_calls\": 187,\n    \"total_errors\": 3,\n    \"avg_planning_ratio\": 0.28,\n    \"est_tokens\": 245000\n  },\n  \"domain_scores\": {},\n  \"tasks\": []\n}\n\n\nIf --strict was used, set scoring_method to \"externally-verified\".\n\nIntegrity signature: After building results.json (without signature field), compute:\n\nSIG=$(echo -n \"$CONTENT\" | openssl dgst -sha256 -hmac \"agentbench-v1-{run_id}-{suite_version}-integrity\" | awk '{print $2}')\n\n\nAdd as \"signature\" field to results.json.\n\nreport.md — Markdown summary: Overall Score, Metrics, Domain Breakdown, Task Details, Top Failures, Recommendations.\n\nreport.html — Self-contained HTML dashboard (inline CSS/JS, no external deps):\n\nScore display with color (green 80+, yellow 60-79, red <60)\nDomain cards with score bars\nTask detail table (sortable, expandable)\nTop failures section\nDark mode via prefers-color-scheme\nFooter: \"Generated by AgentBench v1.0.0 (OpenClaw) | Suite v{suite_version} | Profile: {profile}\"\nStep 5: Present Results\nDisplay overall score\nShow domain breakdown\nTell user where results are saved\nMention they can submit to https://www.agentbench.app/submit\nStep 6: Clean Up\n\nRun teardown.sh if present. Remove temp workspace directories unless --keep-workspace was specified.\n\nListing Tasks (/benchmark-list)\n\nRead all task.yaml files, group by suite, display as:\n\n## file-creation (9 tasks)\n  - project-scaffold [easy]\n  - project-proposal [medium]\n  ...\n\nViewing Results (/benchmark-results)\n\nList all directories in agentbench-results/, show run ID, date, overall score, profile, and task count for each.\n\nComparing Runs (/benchmark-compare)\n\nShow two runs side-by-side: overall scores, domain scores, and per-task deltas. Warn if profiles differ.\n\nKey Differences from Claude Code Version\nNo hooks — metrics are self-tracked (timing, tool call counting)\nNo subagents — you execute tasks directly in sequence\nSame tasks, same scoring, same output format — results are cross-platform comparable\nSame integrity signature — submissions work on the same leaderboard\nImportant Notes\nBe honest in self-evaluation (L2/L3). Inflated scores are obvious on the leaderboard.\nThe objective layers (L0 + L1) carry 55% of the weight — they can't be faked.\nToken estimates are informational only, not scored.\nAny link syntax is accepted in skill graph tasks — consistency is what's scored."
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/Exe215/agentbench",
    "publisherUrl": "https://clawhub.ai/Exe215/agentbench",
    "owner": "Exe215",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/agentbench",
    "downloadUrl": "https://openagent3.xyz/downloads/agentbench",
    "agentUrl": "https://openagent3.xyz/skills/agentbench/agent",
    "manifestUrl": "https://openagent3.xyz/skills/agentbench/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/agentbench/agent.md"
  }
}