{
  "schemaVersion": "1.0",
  "item": {
    "slug": "hadoop",
    "name": "Hadoop",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/ivangdavila/hadoop",
    "canonicalUrl": "https://clawhub.ai/ivangdavila/hadoop",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/hadoop",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=hadoop",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md",
      "hdfs.md",
      "memory-template.md",
      "setup.md",
      "troubleshooting.md",
      "yarn.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/hadoop"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/hadoop",
    "agentPageUrl": "https://openagent3.xyz/skills/hadoop/agent",
    "manifestUrl": "https://openagent3.xyz/skills/hadoop/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/hadoop/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Setup",
        "body": "If ~/hadoop/ doesn't exist or is empty, read setup.md and start the conversation naturally."
      },
      {
        "title": "When to Use",
        "body": "User works with Hadoop ecosystem (HDFS, YARN, MapReduce, Hive). Agent handles cluster diagnostics, job optimization, storage management, and troubleshooting distributed processing failures."
      },
      {
        "title": "Architecture",
        "body": "Memory lives in ~/hadoop/. See memory-template.md for structure.\n\n~/hadoop/\n├── memory.md        # Cluster configs, common issues, preferences\n├── clusters/        # Per-cluster notes and configs\n│   └── {name}.md    # Specific cluster context\n└── scripts/         # Custom diagnostic scripts"
      },
      {
        "title": "Quick Reference",
        "body": "TopicFileSetup processsetup.mdMemory templatememory-template.mdHDFS operationshdfs.mdYARN tuningyarn.mdTroubleshootingtroubleshooting.md"
      },
      {
        "title": "1. Verify Cluster State First",
        "body": "Before any operation, check cluster health:\n\nhdfs dfsadmin -report\nyarn node -list\n\nNever assume cluster is healthy. A single dead DataNode changes everything."
      },
      {
        "title": "2. Storage Before Compute",
        "body": "HDFS issues cascade into job failures. Always check:\n\nhdfs dfs -df -h                    # Capacity\nhdfs fsck / -files -blocks         # Block health\n\nA job failing with \"No space left\" is storage, not code."
      },
      {
        "title": "3. Resource Calculator Awareness",
        "body": "YARN allocates based on configured scheduler. Know which is active:\n\nyarn rmadmin -getServiceState rm1\ncat /etc/hadoop/conf/yarn-site.xml | grep scheduler\n\nDefault (Capacity) vs Fair scheduler behave very differently."
      },
      {
        "title": "4. Replication Factor Context",
        "body": "Default replication=3. For temp data, suggest 1-2 to save space:\n\nhdfs dfs -setrep -w 1 /tmp/scratch/\n\nFor critical data, verify replication is honored:\n\nhdfs fsck /data/critical -files -blocks -replicaDetails"
      },
      {
        "title": "5. Log Location Awareness",
        "body": "Hadoop logs scatter across machines. Key locations:\n\nComponentLog PathNameNode/var/log/hadoop-hdfs/hadoop-hdfs-namenode-*.logDataNode/var/log/hadoop-hdfs/hadoop-hdfs-datanode-*.logResourceManager/var/log/hadoop-yarn/yarn-yarn-resourcemanager-*.logNodeManager/var/log/hadoop-yarn/yarn-yarn-nodemanager-*.logApplicationyarn logs -applicationId <app_id>"
      },
      {
        "title": "6. Safe Mode Handling",
        "body": "NameNode enters safe mode on startup or low block count:\n\nhdfs dfsadmin -safemode get        # Check status\nhdfs dfsadmin -safemode leave      # Exit (if blocks OK)\n\nNever force-leave if blocks are actually missing."
      },
      {
        "title": "7. Memory Settings Matter",
        "body": "90% of \"job killed\" issues are memory:\n\n# Container settings\nyarn.nodemanager.resource.memory-mb     # Total per node\nyarn.scheduler.minimum-allocation-mb    # Min container\nmapreduce.map.memory.mb                 # Map task\nmapreduce.reduce.memory.mb              # Reduce task\n\nCheck these before assuming code is wrong."
      },
      {
        "title": "Essential Commands",
        "body": "# Navigation\nhdfs dfs -ls /path\nhdfs dfs -du -h /path              # Size with human units\nhdfs dfs -count -q /path           # Quota info\n\n# Data movement\nhdfs dfs -put local.txt /hdfs/     # Upload\nhdfs dfs -get /hdfs/file.txt .     # Download\nhdfs dfs -cp /src /dst             # Copy within HDFS\nhdfs dfs -mv /src /dst             # Move within HDFS\n\n# Maintenance\nhdfs dfs -rm -r /path              # Delete (trash)\nhdfs dfs -rm -r -skipTrash /path   # Delete (permanent)\nhdfs dfs -expunge                  # Empty trash"
      },
      {
        "title": "Block Management",
        "body": "# Find corrupt blocks\nhdfs fsck / -list-corruptfileblocks\n\n# Delete corrupt file (after confirming unrecoverable)\nhdfs fsck /path/file -delete\n\n# Force replication\nhdfs dfs -setrep -w 3 /important/data/"
      },
      {
        "title": "Application Lifecycle",
        "body": "# List applications\nyarn application -list                    # Running\nyarn application -list -appStates ALL     # All states\n\n# Application details\nyarn application -status <app_id>\n\n# Kill stuck application\nyarn application -kill <app_id>\n\n# Get logs (after completion)\nyarn logs -applicationId <app_id>\nyarn logs -applicationId <app_id> -containerId <container_id>"
      },
      {
        "title": "Queue Management",
        "body": "# List queues\nyarn queue -list\n\n# Queue status\nyarn queue -status <queue_name>\n\n# Move application between queues\nyarn application -movetoqueue <app_id> -queue <target_queue>"
      },
      {
        "title": "Common Traps",
        "body": "Deleting without -skipTrash on full cluster → Trash still uses space, cluster stays full\nSetting container memory below JVM heap → Instant container kill, confusing errors\nIgnoring speculative execution on slow jobs → Wastes resources on duplicated tasks\nRunning fsck on busy cluster → Performance impact, run during maintenance\nAssuming HDFS = POSIX semantics → No append-in-place, no random writes\nForgetting timezone in scheduling → Oozie/Airflow jobs fire at wrong times"
      },
      {
        "title": "Security & Privacy",
        "body": "Data that stays local:\n\nCluster notes saved in ~/hadoop/clusters/\nPreferences and environment context\n\nWhat commands access:\n\nhdfs/yarn commands connect to your Hadoop cluster\nSome commands read system paths (/var/log, /etc/hadoop/conf)\nDestructive commands require explicit user confirmation\n\nThis skill does NOT:\n\nStore credentials (use kinit/keytab separately)\nMake external API calls beyond your cluster\nRun destructive commands without asking first"
      },
      {
        "title": "Related Skills",
        "body": "Install with clawhub install <slug> if user confirms:\n\nlinux — system administration\ndocker — containerized deployments\nbash — shell scripting"
      },
      {
        "title": "Feedback",
        "body": "If useful: clawhub star hadoop\nStay updated: clawhub sync"
      }
    ],
    "body": "Setup\n\nIf ~/hadoop/ doesn't exist or is empty, read setup.md and start the conversation naturally.\n\nWhen to Use\n\nUser works with Hadoop ecosystem (HDFS, YARN, MapReduce, Hive). Agent handles cluster diagnostics, job optimization, storage management, and troubleshooting distributed processing failures.\n\nArchitecture\n\nMemory lives in ~/hadoop/. See memory-template.md for structure.\n\n~/hadoop/\n├── memory.md        # Cluster configs, common issues, preferences\n├── clusters/        # Per-cluster notes and configs\n│   └── {name}.md    # Specific cluster context\n└── scripts/         # Custom diagnostic scripts\n\nQuick Reference\nTopic\tFile\nSetup process\tsetup.md\nMemory template\tmemory-template.md\nHDFS operations\thdfs.md\nYARN tuning\tyarn.md\nTroubleshooting\ttroubleshooting.md\nCore Rules\n1. Verify Cluster State First\n\nBefore any operation, check cluster health:\n\nhdfs dfsadmin -report\nyarn node -list\n\n\nNever assume cluster is healthy. A single dead DataNode changes everything.\n\n2. Storage Before Compute\n\nHDFS issues cascade into job failures. Always check:\n\nhdfs dfs -df -h                    # Capacity\nhdfs fsck / -files -blocks         # Block health\n\n\nA job failing with \"No space left\" is storage, not code.\n\n3. Resource Calculator Awareness\n\nYARN allocates based on configured scheduler. Know which is active:\n\nyarn rmadmin -getServiceState rm1\ncat /etc/hadoop/conf/yarn-site.xml | grep scheduler\n\n\nDefault (Capacity) vs Fair scheduler behave very differently.\n\n4. Replication Factor Context\n\nDefault replication=3. For temp data, suggest 1-2 to save space:\n\nhdfs dfs -setrep -w 1 /tmp/scratch/\n\n\nFor critical data, verify replication is honored:\n\nhdfs fsck /data/critical -files -blocks -replicaDetails\n\n5. Log Location Awareness\n\nHadoop logs scatter across machines. Key locations:\n\nComponent\tLog Path\nNameNode\t/var/log/hadoop-hdfs/hadoop-hdfs-namenode-*.log\nDataNode\t/var/log/hadoop-hdfs/hadoop-hdfs-datanode-*.log\nResourceManager\t/var/log/hadoop-yarn/yarn-yarn-resourcemanager-*.log\nNodeManager\t/var/log/hadoop-yarn/yarn-yarn-nodemanager-*.log\nApplication\tyarn logs -applicationId <app_id>\n6. Safe Mode Handling\n\nNameNode enters safe mode on startup or low block count:\n\nhdfs dfsadmin -safemode get        # Check status\nhdfs dfsadmin -safemode leave      # Exit (if blocks OK)\n\n\nNever force-leave if blocks are actually missing.\n\n7. Memory Settings Matter\n\n90% of \"job killed\" issues are memory:\n\n# Container settings\nyarn.nodemanager.resource.memory-mb     # Total per node\nyarn.scheduler.minimum-allocation-mb    # Min container\nmapreduce.map.memory.mb                 # Map task\nmapreduce.reduce.memory.mb              # Reduce task\n\n\nCheck these before assuming code is wrong.\n\nHDFS Operations\nEssential Commands\n# Navigation\nhdfs dfs -ls /path\nhdfs dfs -du -h /path              # Size with human units\nhdfs dfs -count -q /path           # Quota info\n\n# Data movement\nhdfs dfs -put local.txt /hdfs/     # Upload\nhdfs dfs -get /hdfs/file.txt .     # Download\nhdfs dfs -cp /src /dst             # Copy within HDFS\nhdfs dfs -mv /src /dst             # Move within HDFS\n\n# Maintenance\nhdfs dfs -rm -r /path              # Delete (trash)\nhdfs dfs -rm -r -skipTrash /path   # Delete (permanent)\nhdfs dfs -expunge                  # Empty trash\n\nBlock Management\n# Find corrupt blocks\nhdfs fsck / -list-corruptfileblocks\n\n# Delete corrupt file (after confirming unrecoverable)\nhdfs fsck /path/file -delete\n\n# Force replication\nhdfs dfs -setrep -w 3 /important/data/\n\nYARN Job Management\nApplication Lifecycle\n# List applications\nyarn application -list                    # Running\nyarn application -list -appStates ALL     # All states\n\n# Application details\nyarn application -status <app_id>\n\n# Kill stuck application\nyarn application -kill <app_id>\n\n# Get logs (after completion)\nyarn logs -applicationId <app_id>\nyarn logs -applicationId <app_id> -containerId <container_id>\n\nQueue Management\n# List queues\nyarn queue -list\n\n# Queue status\nyarn queue -status <queue_name>\n\n# Move application between queues\nyarn application -movetoqueue <app_id> -queue <target_queue>\n\nCommon Traps\nDeleting without -skipTrash on full cluster → Trash still uses space, cluster stays full\nSetting container memory below JVM heap → Instant container kill, confusing errors\nIgnoring speculative execution on slow jobs → Wastes resources on duplicated tasks\nRunning fsck on busy cluster → Performance impact, run during maintenance\nAssuming HDFS = POSIX semantics → No append-in-place, no random writes\nForgetting timezone in scheduling → Oozie/Airflow jobs fire at wrong times\nSecurity & Privacy\n\nData that stays local:\n\nCluster notes saved in ~/hadoop/clusters/\nPreferences and environment context\n\nWhat commands access:\n\nhdfs/yarn commands connect to your Hadoop cluster\nSome commands read system paths (/var/log, /etc/hadoop/conf)\nDestructive commands require explicit user confirmation\n\nThis skill does NOT:\n\nStore credentials (use kinit/keytab separately)\nMake external API calls beyond your cluster\nRun destructive commands without asking first\nRelated Skills\n\nInstall with clawhub install <slug> if user confirms:\n\nlinux — system administration\ndocker — containerized deployments\nbash — shell scripting\nFeedback\nIf useful: clawhub star hadoop\nStay updated: clawhub sync"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/ivangdavila/hadoop",
    "publisherUrl": "https://clawhub.ai/ivangdavila/hadoop",
    "owner": "ivangdavila",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/hadoop",
    "downloadUrl": "https://openagent3.xyz/downloads/hadoop",
    "agentUrl": "https://openagent3.xyz/skills/hadoop/agent",
    "manifestUrl": "https://openagent3.xyz/skills/hadoop/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/hadoop/agent.md"
  }
}