{
  "schemaVersion": "1.0",
  "item": {
    "slug": "file-deduplicator",
    "name": "File Deduplicator",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/Michael-laffin/file-deduplicator",
    "canonicalUrl": "https://clawhub.ai/Michael-laffin/file-deduplicator",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/file-deduplicator",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=file-deduplicator",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "README.md",
      "SKILL.md",
      "config.json",
      "index.js",
      "package.json",
      "test.js"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "slug": "file-deduplicator",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-05-03T05:27:00.670Z",
      "expiresAt": "2026-05-10T05:27:00.670Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=file-deduplicator",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=file-deduplicator",
        "contentDisposition": "attachment; filename=\"file-deduplicator-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null,
        "slug": "file-deduplicator"
      },
      "scope": "item",
      "summary": "Item download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this item.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/file-deduplicator"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/file-deduplicator",
    "agentPageUrl": "https://openagent3.xyz/skills/file-deduplicator/agent",
    "manifestUrl": "https://openagent3.xyz/skills/file-deduplicator/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/file-deduplicator/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "File-Deduplicator - Find and Remove Duplicates",
        "body": "Vernox Utility Skill - Clean up your digital hoard."
      },
      {
        "title": "Overview",
        "body": "File-Deduplicator is an intelligent file duplicate finder and remover. Uses content hashing to identify identical files across directories, then provides options to remove duplicates safely."
      },
      {
        "title": "✅ Duplicate Detection",
        "body": "Content-based hashing (MD5) for fast comparison\nSize-based detection (exact match, near match)\nName-based detection (similar filenames)\nDirectory scanning (recursive)\nExclude patterns (.git, node_modules, etc.)"
      },
      {
        "title": "✅ Removal Options",
        "body": "Auto-delete duplicates (keep newest/oldest)\nInteractive review before deletion\nMove to archive instead of delete\nPreserve permissions and metadata\nDry-run mode (preview changes)"
      },
      {
        "title": "✅ Analysis Tools",
        "body": "Duplicate count summary\nSpace savings estimation\nLargest duplicate files\nMost common duplicate patterns\nDetailed report generation"
      },
      {
        "title": "✅ Safety Features",
        "body": "Confirmation prompts before deletion\nBackup to archive folder\nSize threshold (don't remove huge files by mistake)\nWhitelist important directories\nUndo functionality (log for recovery)"
      },
      {
        "title": "Installation",
        "body": "clawhub install file-deduplicator"
      },
      {
        "title": "Find Duplicates in Directory",
        "body": "const result = await findDuplicates({\n  directories: ['./documents', './downloads', './projects'],\n  options: {\n    method: 'content',  // content-based comparison\n    includeSubdirs: true\n  }\n});\n\nconsole.log(`Found ${result.duplicateCount} duplicate groups`);\nconsole.log(`Potential space savings: ${result.spaceSaved}`);"
      },
      {
        "title": "Remove Duplicates Automatically",
        "body": "const result = await removeDuplicates({\n  directories: ['./documents', './downloads'],\n  options: {\n    method: 'content',\n    keep: 'newest',  // keep newest, delete oldest\n    action: 'delete',  // or 'move' to archive\n    autoConfirm: false  // show confirmation for each\n  }\n});\n\nconsole.log(`Removed ${result.filesRemoved} duplicates`);\nconsole.log(`Space saved: ${result.spaceSaved}`);"
      },
      {
        "title": "Dry-Run Preview",
        "body": "const result = await removeDuplicates({\n  directories: ['./documents', './downloads'],\n  options: {\n    method: 'content',\n    keep: 'newest',\n    action: 'delete',\n    dryRun: true  // Preview without actual deletion\n  }\n});\n\nconsole.log('Would remove:');\nresult.duplicates.forEach((dup, i) => {\n  console.log(`${i+1}. ${dup.file}`);\n});"
      },
      {
        "title": "findDuplicates",
        "body": "Find duplicate files across directories.\n\nParameters:\n\ndirectories (array|string, required): Directory paths to scan\noptions (object, optional):\n\nmethod (string): 'content' | 'size' | 'name' - comparison method\nincludeSubdirs (boolean): Scan recursively (default: true)\nminSize (number): Minimum size in bytes (default: 0)\nmaxSize (number): Maximum size in bytes (default: 0)\nexcludePatterns (array): Glob patterns to exclude (default: ['.git', 'node_modules'])\nwhitelist (array): Directories to never scan (default: [])\n\nReturns:\n\nduplicates (array): Array of duplicate groups\n\nduplicateCount (number): Number of duplicate groups found\ntotalFiles (number): Total files scanned\nscanDuration (number): Time taken to scan (ms)\nspaceWasted (number): Total bytes wasted by duplicates\nspaceSaved (number): Potential savings if duplicates removed"
      },
      {
        "title": "removeDuplicates",
        "body": "Remove duplicate files based on findings.\n\nParameters:\n\ndirectories (array|string, required): Same as findDuplicates\noptions (object, optional):\n\nkeep (string): 'newest' | 'oldest' | 'smallest' | 'largest' - which to keep\naction (string): 'delete' | 'move' | 'archive'\narchivePath (string): Where to move files when action='move'\ndryRun (boolean): Preview without actual action\nautoConfirm (boolean): Auto-confirm deletions\nsizeThreshold (number): Don't remove files larger than this\n\nReturns:\n\nfilesRemoved (number): Number of files removed/moved\nspaceSaved (number): Bytes saved\ngroupsProcessed (number): Number of duplicate groups handled\nlogPath (string): Path to action log\nerrors (array): Any errors encountered"
      },
      {
        "title": "analyzeDirectory",
        "body": "Analyze a single directory for duplicates.\n\nParameters:\n\ndirectory (string, required): Path to directory\noptions (object, optional): Same as findDuplicates options\n\nReturns:\n\nfileCount (number): Total files in directory\ntotalSize (number): Total bytes in directory\nduplicateSize (number): Bytes in duplicate files\nduplicateRatio (number): Percentage of files that are duplicates"
      },
      {
        "title": "Digital Hoarder Cleanup",
        "body": "Find duplicate photos/videos\nIdentify wasted storage space\nRemove old duplicates, keep newest\nClean up download folders"
      },
      {
        "title": "Document Management",
        "body": "Find duplicate PDFs, docs, reports\nKeep latest version, archive old versions\nPrevent version confusion\nReduce backup bloat"
      },
      {
        "title": "Project Cleanup",
        "body": "Find duplicate source files\nRemove duplicate build artifacts\nClean up node_modules duplicates\nSave storage on SSD/HDD"
      },
      {
        "title": "Backup Optimization",
        "body": "Find duplicate backup files\nRemove redundant backups\nIdentify what's actually duplicated\nSave space on backup drives"
      },
      {
        "title": "Edit config.json:",
        "body": "{\n  \"detection\": {\n    \"defaultMethod\": \"content\",\n    \"sizeTolerancePercent\": 0,  // exact match only\n    \"nameSimilarity\": 0.7,  // 0-1, lower = more similar\n    \"includeSubdirs\": true\n  },\n  \"removal\": {\n    \"defaultAction\": \"delete\",\n    \"defaultKeep\": \"newest\",\n    \"archivePath\": \"./archive\",\n    \"sizeThreshold\": 10485760,  // 10MB threshold\n    \"autoConfirm\": false,\n    \"dryRunDefault\": false\n  },\n  \"exclude\": {\n    \"patterns\": [\".git\", \"node_modules\", \".vscode\", \".idea\"],\n    \"whitelist\": [\"important\", \"work\", \"projects\"]\n  }\n}"
      },
      {
        "title": "Content-Based (Recommended)",
        "body": "Fast MD5 hashing\nDetects exact duplicates regardless of filename\nWorks across renamed files\nPerfect for documents, code, archives"
      },
      {
        "title": "Size-Based",
        "body": "Compares file sizes\nFaster than content hashing\nGood for media files where content hashing is slow\nFinds near-duplicates (similar but not exact)"
      },
      {
        "title": "Name-Based",
        "body": "Compares filenames\nDetects similar named files\nGood for finding version duplicates (file_v1, file_v2)"
      },
      {
        "title": "Find Duplicates in Documents",
        "body": "const result = await findDuplicates({\n  directories: '~/Documents',\n  options: {\n    method: 'content',\n    includeSubdirs: true\n  }\n});\n\nconsole.log(`Found ${result.duplicateCount} duplicate sets`);\nresult.duplicates.slice(0, 5).forEach((set, i) => {\n  console.log(`Set ${i+1}: ${set.files.length} files`);\n  console.log(`  Total size: ${set.totalSize} bytes`);\n});"
      },
      {
        "title": "Remove Duplicates, Keep Newest",
        "body": "const result = await removeDuplicates({\n  directories: '~/Documents',\n  options: {\n    keep: 'newest',\n    action: 'delete'\n  }\n});\n\nconsole.log(`Removed ${result.filesRemoved} files`);\nconsole.log(`Saved ${result.spaceSaved} bytes`);"
      },
      {
        "title": "Move to Archive Instead of Delete",
        "body": "const result = await removeDuplicates({\n  directories: '~/Downloads',\n  options: {\n    keep: 'newest',\n    action: 'move',\n    archivePath: '~/Documents/Archive'\n  }\n});\n\nconsole.log(`Archived ${result.filesRemoved} files`);\nconsole.log(`Safe in: ~/Documents/Archive`);"
      },
      {
        "title": "Dry-Run Preview Changes",
        "body": "const result = await removeDuplicates({\n  directories: '~/Documents',\n  options: {\n    dryRun: true  // Just show what would happen\n  }\n});\n\nconsole.log('=== Dry Run Preview ===');\nresult.duplicates.forEach((set, i) => {\n  console.log(`Would delete: ${set.toDelete.join(', ')}`);\n});"
      },
      {
        "title": "Scanning Speed",
        "body": "Small directories (<1000 files): <1s\nMedium directories (1000-10000 files): 1-5s\nLarge directories (10000+ files): 5-20s"
      },
      {
        "title": "Detection Accuracy",
        "body": "Content-based: 100% (exact duplicates)\nSize-based: Fast but may miss renamed files\nName-based: Detects naming patterns only"
      },
      {
        "title": "Memory Usage",
        "body": "Hash cache: ~1MB per 100,000 files\nBatch processing: Processes 1000 files at a time\nPeak memory: ~200MB for 1M files"
      },
      {
        "title": "Size Thresholding",
        "body": "Won't remove files larger than configurable threshold (default: 10MB). Prevents accidental deletion of important large files."
      },
      {
        "title": "Archive Mode",
        "body": "Move files to archive directory instead of deleting. No data loss, full recoverability."
      },
      {
        "title": "Action Logging",
        "body": "All deletions/moves are logged to file for recovery and audit."
      },
      {
        "title": "Undo Functionality",
        "body": "Log file can be used to restore accidentally deleted files (limited undo window)."
      },
      {
        "title": "Permission Errors",
        "body": "Clear error message\nSuggest running with sudo\nSkip files that can't be accessed"
      },
      {
        "title": "File Lock Errors",
        "body": "Detect locked files\nSkip and report\nSuggest closing applications using files"
      },
      {
        "title": "Space Errors",
        "body": "Check available disk space before deletion\nWarn if space is critically low\nPrevent disk-full scenarios"
      },
      {
        "title": "Not Finding Expected Duplicates",
        "body": "Check detection method (content vs size vs name)\nVerify exclude patterns aren't too broad\nCheck if files are in whitelisted directories\nTry with includeSubdirs: false"
      },
      {
        "title": "Deletion Not Working",
        "body": "Check write permissions on directories\nVerify action isn't 'delete' with autoConfirm: true\nCheck size threshold isn't blocking all deletions\nCheck file locks (is another program using files?)"
      },
      {
        "title": "Slow Scanning",
        "body": "Reduce includeSubdirs scope\nUse size-based detection (faster)\nExclude large directories (node_modules, .git)\nProcess directories individually instead of batch"
      },
      {
        "title": "Best Results",
        "body": "Use content-based detection for documents (100% accurate)\nRun dry-run first to preview changes\nArchive instead of delete for important files\nCheck logs if anything unexpected deleted"
      },
      {
        "title": "Performance Optimization",
        "body": "Process frequently used directories first\nUse size threshold to skip large media files\nExclude hidden directories from scan\nProcess directories in parallel when possible"
      },
      {
        "title": "Space Management",
        "body": "Regular duplicate cleanup prevents storage bloat\nDelete temp directories regularly\nClear download folders of installers\nEmpty trash before large scans"
      },
      {
        "title": "Roadmap",
        "body": "Duplicate detection by image similarity\n Near-duplicate detection (similar but not exact)\n Duplicate detection across network drives\n Cloud storage integration (S3, Google Drive)\n Automatic scheduling of scans\n Heuristic duplicate detection (ML-based)\n Recover deleted files from backup\n Duplicate detection by file content similarity (not just hash)"
      },
      {
        "title": "License",
        "body": "MIT\n\nFind duplicates. Save space. Keep your system clean. 🔮"
      }
    ],
    "body": "File-Deduplicator - Find and Remove Duplicates\n\nVernox Utility Skill - Clean up your digital hoard.\n\nOverview\n\nFile-Deduplicator is an intelligent file duplicate finder and remover. Uses content hashing to identify identical files across directories, then provides options to remove duplicates safely.\n\nFeatures\n✅ Duplicate Detection\nContent-based hashing (MD5) for fast comparison\nSize-based detection (exact match, near match)\nName-based detection (similar filenames)\nDirectory scanning (recursive)\nExclude patterns (.git, node_modules, etc.)\n✅ Removal Options\nAuto-delete duplicates (keep newest/oldest)\nInteractive review before deletion\nMove to archive instead of delete\nPreserve permissions and metadata\nDry-run mode (preview changes)\n✅ Analysis Tools\nDuplicate count summary\nSpace savings estimation\nLargest duplicate files\nMost common duplicate patterns\nDetailed report generation\n✅ Safety Features\nConfirmation prompts before deletion\nBackup to archive folder\nSize threshold (don't remove huge files by mistake)\nWhitelist important directories\nUndo functionality (log for recovery)\nInstallation\nclawhub install file-deduplicator\n\nQuick Start\nFind Duplicates in Directory\nconst result = await findDuplicates({\n  directories: ['./documents', './downloads', './projects'],\n  options: {\n    method: 'content',  // content-based comparison\n    includeSubdirs: true\n  }\n});\n\nconsole.log(`Found ${result.duplicateCount} duplicate groups`);\nconsole.log(`Potential space savings: ${result.spaceSaved}`);\n\nRemove Duplicates Automatically\nconst result = await removeDuplicates({\n  directories: ['./documents', './downloads'],\n  options: {\n    method: 'content',\n    keep: 'newest',  // keep newest, delete oldest\n    action: 'delete',  // or 'move' to archive\n    autoConfirm: false  // show confirmation for each\n  }\n});\n\nconsole.log(`Removed ${result.filesRemoved} duplicates`);\nconsole.log(`Space saved: ${result.spaceSaved}`);\n\nDry-Run Preview\nconst result = await removeDuplicates({\n  directories: ['./documents', './downloads'],\n  options: {\n    method: 'content',\n    keep: 'newest',\n    action: 'delete',\n    dryRun: true  // Preview without actual deletion\n  }\n});\n\nconsole.log('Would remove:');\nresult.duplicates.forEach((dup, i) => {\n  console.log(`${i+1}. ${dup.file}`);\n});\n\nTool Functions\nfindDuplicates\n\nFind duplicate files across directories.\n\nParameters:\n\ndirectories (array|string, required): Directory paths to scan\noptions (object, optional):\nmethod (string): 'content' | 'size' | 'name' - comparison method\nincludeSubdirs (boolean): Scan recursively (default: true)\nminSize (number): Minimum size in bytes (default: 0)\nmaxSize (number): Maximum size in bytes (default: 0)\nexcludePatterns (array): Glob patterns to exclude (default: ['.git', 'node_modules'])\nwhitelist (array): Directories to never scan (default: [])\n\nReturns:\n\nduplicates (array): Array of duplicate groups\nduplicateCount (number): Number of duplicate groups found\ntotalFiles (number): Total files scanned\nscanDuration (number): Time taken to scan (ms)\nspaceWasted (number): Total bytes wasted by duplicates\nspaceSaved (number): Potential savings if duplicates removed\nremoveDuplicates\n\nRemove duplicate files based on findings.\n\nParameters:\n\ndirectories (array|string, required): Same as findDuplicates\noptions (object, optional):\nkeep (string): 'newest' | 'oldest' | 'smallest' | 'largest' - which to keep\naction (string): 'delete' | 'move' | 'archive'\narchivePath (string): Where to move files when action='move'\ndryRun (boolean): Preview without actual action\nautoConfirm (boolean): Auto-confirm deletions\nsizeThreshold (number): Don't remove files larger than this\n\nReturns:\n\nfilesRemoved (number): Number of files removed/moved\nspaceSaved (number): Bytes saved\ngroupsProcessed (number): Number of duplicate groups handled\nlogPath (string): Path to action log\nerrors (array): Any errors encountered\nanalyzeDirectory\n\nAnalyze a single directory for duplicates.\n\nParameters:\n\ndirectory (string, required): Path to directory\noptions (object, optional): Same as findDuplicates options\n\nReturns:\n\nfileCount (number): Total files in directory\ntotalSize (number): Total bytes in directory\nduplicateSize (number): Bytes in duplicate files\nduplicateRatio (number): Percentage of files that are duplicates\nUse Cases\nDigital Hoarder Cleanup\nFind duplicate photos/videos\nIdentify wasted storage space\nRemove old duplicates, keep newest\nClean up download folders\nDocument Management\nFind duplicate PDFs, docs, reports\nKeep latest version, archive old versions\nPrevent version confusion\nReduce backup bloat\nProject Cleanup\nFind duplicate source files\nRemove duplicate build artifacts\nClean up node_modules duplicates\nSave storage on SSD/HDD\nBackup Optimization\nFind duplicate backup files\nRemove redundant backups\nIdentify what's actually duplicated\nSave space on backup drives\nConfiguration\nEdit config.json:\n{\n  \"detection\": {\n    \"defaultMethod\": \"content\",\n    \"sizeTolerancePercent\": 0,  // exact match only\n    \"nameSimilarity\": 0.7,  // 0-1, lower = more similar\n    \"includeSubdirs\": true\n  },\n  \"removal\": {\n    \"defaultAction\": \"delete\",\n    \"defaultKeep\": \"newest\",\n    \"archivePath\": \"./archive\",\n    \"sizeThreshold\": 10485760,  // 10MB threshold\n    \"autoConfirm\": false,\n    \"dryRunDefault\": false\n  },\n  \"exclude\": {\n    \"patterns\": [\".git\", \"node_modules\", \".vscode\", \".idea\"],\n    \"whitelist\": [\"important\", \"work\", \"projects\"]\n  }\n}\n\nMethods\nContent-Based (Recommended)\nFast MD5 hashing\nDetects exact duplicates regardless of filename\nWorks across renamed files\nPerfect for documents, code, archives\nSize-Based\nCompares file sizes\nFaster than content hashing\nGood for media files where content hashing is slow\nFinds near-duplicates (similar but not exact)\nName-Based\nCompares filenames\nDetects similar named files\nGood for finding version duplicates (file_v1, file_v2)\nExamples\nFind Duplicates in Documents\nconst result = await findDuplicates({\n  directories: '~/Documents',\n  options: {\n    method: 'content',\n    includeSubdirs: true\n  }\n});\n\nconsole.log(`Found ${result.duplicateCount} duplicate sets`);\nresult.duplicates.slice(0, 5).forEach((set, i) => {\n  console.log(`Set ${i+1}: ${set.files.length} files`);\n  console.log(`  Total size: ${set.totalSize} bytes`);\n});\n\nRemove Duplicates, Keep Newest\nconst result = await removeDuplicates({\n  directories: '~/Documents',\n  options: {\n    keep: 'newest',\n    action: 'delete'\n  }\n});\n\nconsole.log(`Removed ${result.filesRemoved} files`);\nconsole.log(`Saved ${result.spaceSaved} bytes`);\n\nMove to Archive Instead of Delete\nconst result = await removeDuplicates({\n  directories: '~/Downloads',\n  options: {\n    keep: 'newest',\n    action: 'move',\n    archivePath: '~/Documents/Archive'\n  }\n});\n\nconsole.log(`Archived ${result.filesRemoved} files`);\nconsole.log(`Safe in: ~/Documents/Archive`);\n\nDry-Run Preview Changes\nconst result = await removeDuplicates({\n  directories: '~/Documents',\n  options: {\n    dryRun: true  // Just show what would happen\n  }\n});\n\nconsole.log('=== Dry Run Preview ===');\nresult.duplicates.forEach((set, i) => {\n  console.log(`Would delete: ${set.toDelete.join(', ')}`);\n});\n\nPerformance\nScanning Speed\nSmall directories (<1000 files): <1s\nMedium directories (1000-10000 files): 1-5s\nLarge directories (10000+ files): 5-20s\nDetection Accuracy\nContent-based: 100% (exact duplicates)\nSize-based: Fast but may miss renamed files\nName-based: Detects naming patterns only\nMemory Usage\nHash cache: ~1MB per 100,000 files\nBatch processing: Processes 1000 files at a time\nPeak memory: ~200MB for 1M files\nSafety Features\nSize Thresholding\n\nWon't remove files larger than configurable threshold (default: 10MB). Prevents accidental deletion of important large files.\n\nArchive Mode\n\nMove files to archive directory instead of deleting. No data loss, full recoverability.\n\nAction Logging\n\nAll deletions/moves are logged to file for recovery and audit.\n\nUndo Functionality\n\nLog file can be used to restore accidentally deleted files (limited undo window).\n\nError Handling\nPermission Errors\nClear error message\nSuggest running with sudo\nSkip files that can't be accessed\nFile Lock Errors\nDetect locked files\nSkip and report\nSuggest closing applications using files\nSpace Errors\nCheck available disk space before deletion\nWarn if space is critically low\nPrevent disk-full scenarios\nTroubleshooting\nNot Finding Expected Duplicates\nCheck detection method (content vs size vs name)\nVerify exclude patterns aren't too broad\nCheck if files are in whitelisted directories\nTry with includeSubdirs: false\nDeletion Not Working\nCheck write permissions on directories\nVerify action isn't 'delete' with autoConfirm: true\nCheck size threshold isn't blocking all deletions\nCheck file locks (is another program using files?)\nSlow Scanning\nReduce includeSubdirs scope\nUse size-based detection (faster)\nExclude large directories (node_modules, .git)\nProcess directories individually instead of batch\nTips\nBest Results\nUse content-based detection for documents (100% accurate)\nRun dry-run first to preview changes\nArchive instead of delete for important files\nCheck logs if anything unexpected deleted\nPerformance Optimization\nProcess frequently used directories first\nUse size threshold to skip large media files\nExclude hidden directories from scan\nProcess directories in parallel when possible\nSpace Management\nRegular duplicate cleanup prevents storage bloat\nDelete temp directories regularly\nClear download folders of installers\nEmpty trash before large scans\nRoadmap\n Duplicate detection by image similarity\n Near-duplicate detection (similar but not exact)\n Duplicate detection across network drives\n Cloud storage integration (S3, Google Drive)\n Automatic scheduling of scans\n Heuristic duplicate detection (ML-based)\n Recover deleted files from backup\n Duplicate detection by file content similarity (not just hash)\nLicense\n\nMIT\n\nFind duplicates. Save space. Keep your system clean. 🔮"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/Michael-laffin/file-deduplicator",
    "publisherUrl": "https://clawhub.ai/Michael-laffin/file-deduplicator",
    "owner": "Michael-laffin",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/file-deduplicator",
    "downloadUrl": "https://openagent3.xyz/downloads/file-deduplicator",
    "agentUrl": "https://openagent3.xyz/skills/file-deduplicator/agent",
    "manifestUrl": "https://openagent3.xyz/skills/file-deduplicator/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/file-deduplicator/agent.md"
  }
}