{
  "schemaVersion": "1.0",
  "item": {
    "slug": "afrexai-performance-engineering",
    "name": "Performance Engineering System",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/1kalin/afrexai-performance-engineering",
    "canonicalUrl": "https://clawhub.ai/1kalin/afrexai-performance-engineering",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/afrexai-performance-engineering",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=afrexai-performance-engineering",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "README.md",
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/afrexai-performance-engineering"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/afrexai-performance-engineering",
    "agentPageUrl": "https://openagent3.xyz/skills/afrexai-performance-engineering/agent",
    "manifestUrl": "https://openagent3.xyz/skills/afrexai-performance-engineering/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/afrexai-performance-engineering/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Performance Engineering System",
        "body": "From \"it's slow\" to \"here's why and here's the fix\" — a complete methodology for measuring, diagnosing, optimizing, and preventing performance problems."
      },
      {
        "title": "Phase 1: Performance Investigation Brief",
        "body": "Before touching anything, define the problem.\n\n# performance-brief.yaml\ninvestigation:\n  reported_by: \"\"\n  reported_date: \"\"\n  system: \"\"              # service/app name\n  environment: \"\"         # production, staging, dev\n\nproblem_statement:\n  symptom: \"\"             # \"API response time increased 3x\"\n  impact: \"\"              # \"15% of users seeing timeouts\"\n  since_when: \"\"          # \"After deploy v2.14 on Feb 20\"\n  affected_scope: \"\"      # \"All endpoints\" | \"Only /search\" | \"Users in EU\"\n\nbaselines:\n  target_p50: \"\"          # e.g., \"200ms\"\n  target_p95: \"\"          # e.g., \"500ms\"\n  target_p99: \"\"          # e.g., \"1000ms\"\n  current_p50: \"\"\n  current_p95: \"\"\n  current_p99: \"\"\n  throughput_target: \"\"   # e.g., \"1000 rps\"\n  error_rate_target: \"\"   # e.g., \"<0.1%\"\n\nconstraints:\n  budget: \"\"              # time/money for optimization\n  risk_tolerance: \"\"      # \"Can we change the schema?\" \"Can we add caching?\"\n  deadline: \"\"            # \"Must fix before Black Friday\"\n\nhypothesis:\n  primary: \"\"             # \"N+1 queries in the new recommendation engine\"\n  secondary: \"\"           # \"Connection pool exhaustion under load\"\n  evidence: \"\"            # \"Slow query log shows 200+ queries per request\""
      },
      {
        "title": "Performance Budget Framework",
        "body": "Set budgets BEFORE building, not after complaints:\n\nMetricWeb AppAPIMobileBatch JobP50 response<200ms<100ms<300msN/AP95 response<500ms<250ms<800msN/AP99 response<1s<500ms<1.5sN/AError rate<0.1%<0.01%<0.5%<0.001%Time to Interactive<3sN/A<2sN/AMemory per request<50MB<20MB<100MB<1GBCPU per request<100ms<50ms<200msN/AThroughput100+ rps500+ rpsN/Aitems/min"
      },
      {
        "title": "The Golden Rule",
        "body": "Never optimize without measuring first. Never measure without a hypothesis."
      },
      {
        "title": "Profiling Decision Tree",
        "body": "Is it slow?\n├── YES → Where is time spent?\n│   ├── CPU-bound → Profile CPU (flame graph)\n│   │   ├── Hot function found → Optimize algorithm/data structure\n│   │   └── Spread evenly → Architecture problem (too many layers)\n│   ├── I/O-bound → Profile I/O\n│   │   ├── Database → Query analysis (Phase 4)\n│   │   ├── Network → Connection profiling\n│   │   ├── Disk → I/O scheduler + buffering\n│   │   └── External API → Caching + async + circuit breaker\n│   ├── Memory-bound → Profile allocations\n│   │   ├── GC pressure → Reduce allocations, pool objects\n│   │   ├── Memory leak → Heap snapshot comparison\n│   │   └── Cache thrashing → Resize or eviction policy\n│   └── Concurrency-bound → Profile locks/contention\n│       ├── Lock contention → Reduce critical section, lock-free structures\n│       ├── Thread starvation → Pool sizing\n│       └── Deadlock → Lock ordering analysis\n└── NO → Define \"fast enough\" (see budgets above)"
      },
      {
        "title": "CPU Profiling by Language",
        "body": "Node.js\n\n# Built-in profiler (V8)\nnode --prof app.js\nnode --prof-process isolate-*.log > profile.txt\n\n# Inspector-based (connect Chrome DevTools)\nnode --inspect app.js\n# Open chrome://inspect → Profiler → Start\n\n# Clinic.js (best overall Node.js profiler)\nnpx clinic doctor -- node app.js\nnpx clinic flame -- node app.js    # Flame graph\nnpx clinic bubbleprof -- node app.js  # Async bottlenecks\n\n# 0x (flame graphs)\nnpx 0x app.js\n\nPython\n\n# cProfile (built-in)\nimport cProfile\nimport pstats\n\nprofiler = cProfile.Profile()\nprofiler.enable()\n# ... code to profile ...\nprofiler.disable()\n\nstats = pstats.Stats(profiler)\nstats.sort_stats('cumulative')\nstats.print_stats(20)  # Top 20\n\n# Line profiler (pip install line-profiler)\n# Add @profile decorator, then:\n# kernprof -l -v script.py\n\n# py-spy (sampling profiler, no code changes)\n# pip install py-spy\n# py-spy top --pid <PID>\n# py-spy record -o profile.svg --pid <PID>  # Flame graph\n\n# Scalene (CPU + memory + GPU)\n# pip install scalene\n# scalene script.py\n\nGo\n\n// Built-in pprof\nimport (\n    \"net/http\"\n    _ \"net/http/pprof\"\n    \"runtime/pprof\"\n)\n\n// HTTP server (add to existing server)\n// Access: http://localhost:6060/debug/pprof/\ngo func() { http.ListenAndServe(\":6060\", nil) }()\n\n// CLI analysis\n// go tool pprof http://localhost:6060/debug/pprof/profile?seconds=30\n// go tool pprof -http=:8080 profile.out  # Web UI\n\nJava\n\n# async-profiler (best for JVM)\n# https://github.com/async-profiler/async-profiler\n./asprof -d 30 -f profile.html <PID>\n\n# JFR (built-in since JDK 11)\njava -XX:StartFlightRecording=duration=60s,filename=rec.jfr MyApp\njfr print --events CPULoad rec.jfr\n\n# jstack (thread dump)\njstack <PID> > threads.txt"
      },
      {
        "title": "Memory Profiling",
        "body": "Leak Detection Pattern (any language)\n\n1. Take heap snapshot at T0\n2. Run suspected operation N times\n3. Force GC\n4. Take heap snapshot at T1\n5. Compare: objects that grew = potential leak\n6. Check: are they reachable? From where? (retention path)\n\nNode.js Memory\n\n// Heap snapshot\nconst v8 = require('v8');\nconst fs = require('fs');\n\nfunction takeSnapshot(label) {\n  const snapshotStream = v8.writeHeapSnapshot();\n  console.log(`Heap snapshot written to ${snapshotStream}`);\n}\n\n// Process memory monitoring\nsetInterval(() => {\n  const mem = process.memoryUsage();\n  console.log({\n    rss_mb: (mem.rss / 1048576).toFixed(1),\n    heap_used_mb: (mem.heapUsed / 1048576).toFixed(1),\n    heap_total_mb: (mem.heapTotal / 1048576).toFixed(1),\n    external_mb: (mem.external / 1048576).toFixed(1),\n  });\n}, 10000);\n\nPython Memory\n\n# tracemalloc (built-in)\nimport tracemalloc\n\ntracemalloc.start()\n# ... code ...\nsnapshot = tracemalloc.take_snapshot()\ntop = snapshot.statistics('lineno')\nfor stat in top[:10]:\n    print(stat)\n\n# objgraph (pip install objgraph)\nimport objgraph\nobjgraph.show_most_common_types(limit=20)\nobjgraph.show_growth(limit=10)  # Call twice to see what's growing"
      },
      {
        "title": "Flame Graph Interpretation",
        "body": "Reading a flame graph:\n┌─────────────────────────────────────────────┐\n│                  main()                      │  ← Entry point (bottom)\n├──────────────────────┬──────────────────────┤\n│     processData()    │    renderOutput()     │  ← Width = time spent\n├──────────┬───────────┤                      │\n│ parseCSV │ validate  │                      │  ← Tall = deep call stack\n├──────────┤           │                      │\n│ readline │           │                      │  ← Top = where CPU burns\n└──────────┴───────────┴──────────────────────┘\n\nWHAT TO LOOK FOR:\n1. Wide plateaus at top → CPU-intensive leaf function (optimize this!)\n2. Many thin towers → excessive function calls (batch or reduce)\n3. Recursive patterns → potential stack overflow risk\n4. Unexpected width → function taking more time than expected\n5. GC/runtime frames → memory pressure\n\nACTION RULES:\n- Plateau >20% width → must investigate\n- Plateau >40% width → almost certainly the bottleneck\n- If top 3 functions = 80% of time → focused optimization will work\n- If evenly distributed → architectural change needed"
      },
      {
        "title": "Algorithm & Data Structure Optimizations",
        "body": "ProblemBad O()FixGood O()Search unsorted arrayO(n)Sort + binary search, or use Set/MapO(log n) or O(1)Nested loop matchingO(n²)Hash map lookupO(n)Repeated string concatO(n²)StringBuilder/join arrayO(n)Sorting already-sorted dataO(n log n)Check if sorted firstO(n)Finding duplicatesO(n²)Set-based detectionO(n)Frequent min/max of changing dataO(n) per queryHeap/priority queueO(log n)"
      },
      {
        "title": "Caching Strategy Decision Matrix",
        "body": "Should you cache this?\n├── Does the same input always produce the same output?\n│   ├── YES → Cache candidate ✓\n│   └── NO → Can you define a valid TTL?\n│       ├── YES → Cache with TTL ✓\n│       └── NO → Don't cache ✗\n├── Is it called frequently?\n│   ├── <10x/min → Probably not worth caching\n│   └── >10x/min → Cache ✓\n├── Is the source data expensive to compute/fetch?\n│   ├── <10ms → Probably not worth caching\n│   └── >10ms → Cache ✓\n└── Does staleness cause problems?\n    ├── Critical (financial, auth) → Short TTL or cache-aside with invalidation\n    ├── Important (user data) → 1-5 min TTL with invalidation\n    └── Tolerant (content, search) → 5-60 min TTL\n\nCACHE LAYERS (use in order):\n1. In-process (Map/LRU) → <1μs, limited by memory, per-instance\n2. Shared cache (Redis/Memcached) → <1ms, shared across instances\n3. CDN/edge cache → <10ms, geographic distribution\n4. Browser cache → 0ms for user, stale risk\n\nINVALIDATION STRATEGIES:\n- TTL-based: simplest, best for read-heavy + staleness-tolerant\n- Event-based: publish cache-invalidate on write, best for consistency\n- Write-through: update cache on every write, best for write-read patterns\n- Cache-aside: app manages cache explicitly, most flexible"
      },
      {
        "title": "Connection Pooling",
        "body": "# Sizing formula\npool_size: min(available_cores * 2 + effective_spindle_count, max_connections / num_instances)\n\n# Rules of thumb:\n# - PostgreSQL: connections = cores * 2 + 1 (per pgBouncer docs)\n# - MySQL: keep total connections < 150 for most workloads\n# - HTTP clients: match to concurrent request volume\n# - Redis: usually 5-10 per instance is enough\n\n# Warning signs of pool problems:\n# - \"connection timeout\" errors under load\n# - Response time spikes at regular intervals\n# - Idle connections holding resources\n# - Connection count hitting max_connections"
      },
      {
        "title": "Async & Concurrency Patterns",
        "body": "// BAD: Sequential when independent\nconst user = await getUser(id);\nconst orders = await getOrders(id);\nconst prefs = await getPreferences(id);\n// Total: user_time + orders_time + prefs_time\n\n// GOOD: Parallel when independent\nconst [user, orders, prefs] = await Promise.all([\n  getUser(id),\n  getOrders(id),\n  getPreferences(id),\n]);\n// Total: max(user_time, orders_time, prefs_time)\n\n// GOOD: Controlled concurrency for many items\n// (npm: p-limit, p-map, or manual semaphore)\nimport pLimit from 'p-limit';\nconst limit = pLimit(10); // Max 10 concurrent\nconst results = await Promise.all(\n  items.map(item => limit(() => processItem(item)))\n);\n\n# Python: asyncio for I/O-bound\nimport asyncio\n\nasync def fetch_all(ids):\n    # Parallel\n    tasks = [fetch_one(id) for id in ids]\n    return await asyncio.gather(*tasks)\n\n# Python: ProcessPoolExecutor for CPU-bound\nfrom concurrent.futures import ProcessPoolExecutor\nwith ProcessPoolExecutor(max_workers=4) as pool:\n    results = list(pool.map(cpu_intensive_fn, items))"
      },
      {
        "title": "N+1 Query Detection & Fix",
        "body": "SYMPTOM: Response time scales linearly with result count\nDETECTION: Enable query logging, count queries per request\n\n# Bad: N+1\nusers = db.query(\"SELECT * FROM users LIMIT 100\")\nfor user in users:\n    orders = db.query(f\"SELECT * FROM orders WHERE user_id = {user.id}\")\n# Result: 1 + 100 = 101 queries\n\n# Fix 1: JOIN\nSELECT u.*, o.* FROM users u\nLEFT JOIN orders o ON o.user_id = u.id\nLIMIT 100\n\n# Fix 2: Batch load (better for large datasets)\nusers = db.query(\"SELECT * FROM users LIMIT 100\")\nuser_ids = [u.id for u in users]\norders = db.query(f\"SELECT * FROM orders WHERE user_id IN ({','.join(user_ids)})\")\n# Result: 2 queries regardless of count\n\n# Fix 3: ORM eager loading\n# Drizzle: .with(users.orders)\n# SQLAlchemy: joinedload(User.orders)\n# Prisma: include: { orders: true }"
      },
      {
        "title": "Query Optimization Checklist",
        "body": "For every slow query:\n□ Run EXPLAIN ANALYZE (not just EXPLAIN)\n□ Check: is it doing a sequential scan on a large table?\n□ Check: is the row estimate accurate? (bad stats = bad plan)\n□ Check: are there implicit type casts preventing index use?\n□ Check: is it sorting more data than needed? (add LIMIT earlier)\n□ Check: is it joining in the right order?\n□ Check: can a covering index eliminate table lookups?\n□ Check: is the query running during peak hours? (schedule if batch)"
      },
      {
        "title": "EXPLAIN ANALYZE Interpretation",
        "body": "-- PostgreSQL EXPLAIN output reading guide:\nEXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT ...;\n\n-- Key metrics to check:\n-- 1. Actual time vs estimated time (large gap = stale stats → ANALYZE)\n-- 2. Rows actual vs estimated (>10x off = bad stats)\n-- 3. Seq Scan on large table (>10K rows) = needs index\n-- 4. Sort with external merge = needs more work_mem or index\n-- 5. Nested Loop with large outer = consider hash/merge join\n-- 6. Buffers shared hit vs read (low hit ratio = needs more shared_buffers)"
      },
      {
        "title": "Index Strategy Guide",
        "body": "WHEN TO ADD AN INDEX:\n✓ WHERE clause column (equality or range)\n✓ JOIN condition column\n✓ ORDER BY column (if query is index-only scan candidate)\n✓ Foreign key column (prevents table lock on parent delete)\n✓ Column in a unique constraint\n\nWHEN NOT TO ADD AN INDEX:\n✗ Table has <1000 rows (seq scan is fine)\n✗ Column has very low cardinality (boolean, status with 3 values)\n✗ Write-heavy table where reads are rare\n✗ You already have 8+ indexes on the table (diminishing returns, write penalty)\n\nINDEX TYPES:\n- B-tree (default): equality, range, sorting, LIKE 'prefix%'\n- Hash: equality only (rarely better than B-tree)\n- GIN: arrays, JSONB, full-text search\n- GiST: geometry, range types, full-text\n- BRIN: large tables with natural ordering (timestamps, sequential IDs)\n\nCOMPOSITE INDEX RULES:\n1. Equality columns first, then range columns\n2. Most selective column first (if all equality)\n3. Index on (a, b) works for WHERE a=1 AND b=2 AND for WHERE a=1 alone\n4. Index on (a, b) does NOT work for WHERE b=2 alone"
      },
      {
        "title": "Load Test Design",
        "body": "# load-test-plan.yaml\ntest_name: \"\"\ntarget: \"\"              # URL/endpoint\ndate: \"\"\n\nscenarios:\n  - name: \"Baseline\"\n    description: \"Normal traffic pattern\"\n    vus: 50               # Virtual users\n    duration: \"5m\"\n    ramp_up: \"30s\"\n    think_time: \"1-3s\"    # Pause between requests\n\n  - name: \"Peak\"\n    description: \"2x normal traffic (expected peak)\"\n    vus: 100\n    duration: \"10m\"\n    ramp_up: \"1m\"\n\n  - name: \"Stress\"\n    description: \"Find the breaking point\"\n    vus_start: 50\n    vus_end: 500\n    step_duration: \"2m\"   # Add users every 2 min\n    step_size: 50\n\n  - name: \"Soak\"\n    description: \"Memory leaks, connection exhaustion\"\n    vus: 50\n    duration: \"2h\"\n\npass_criteria:\n  p95_response_ms: 500\n  error_rate_pct: 0.1\n  throughput_rps: 200"
      },
      {
        "title": "k6 Load Test Template",
        "body": "// load-test.js (run: k6 run load-test.js)\nimport http from 'k6/http';\nimport { check, sleep } from 'k6';\nimport { Rate, Trend } from 'k6/metrics';\n\nconst errorRate = new Rate('errors');\nconst responseTime = new Trend('response_time');\n\nexport const options = {\n  stages: [\n    { duration: '30s', target: 20 },   // Ramp up\n    { duration: '3m', target: 20 },    // Steady\n    { duration: '30s', target: 50 },   // Peak\n    { duration: '3m', target: 50 },    // Steady peak\n    { duration: '30s', target: 0 },    // Ramp down\n  ],\n  thresholds: {\n    http_req_duration: ['p(95)<500'],   // 95% under 500ms\n    errors: ['rate<0.01'],              // <1% error rate\n  },\n};\n\nexport default function () {\n  const res = http.get('https://api.example.com/endpoint');\n\n  check(res, {\n    'status 200': (r) => r.status === 200,\n    'response < 500ms': (r) => r.timings.duration < 500,\n  });\n\n  errorRate.add(res.status !== 200);\n  responseTime.add(res.timings.duration);\n\n  sleep(Math.random() * 2 + 1); // 1-3s think time\n}"
      },
      {
        "title": "Load Test Results Analysis",
        "body": "READING RESULTS:\n┌──────────────────────────────────────────┐\n│ Metric          │ Healthy │ Warning │ Bad│\n├──────────────────────────────────────────┤\n│ p50/p95 ratio   │ <2x     │ 2-5x    │>5x│  ← High ratio = tail latency problem\n│ p95/p99 ratio   │ <2x     │ 2-3x    │>3x│  ← Outliers affecting some users\n│ Error rate      │ <0.1%   │ 0.1-1%  │>1%│  ← Above 1% = user-visible\n│ Throughput drop  │ <5%     │ 5-20%   │>20%│ ← System under stress\n│ CPU at peak     │ <70%    │ 70-85%  │>85%│ ← No headroom\n│ Memory at peak  │ <75%    │ 75-90%  │>90%│ ← Risk of OOM\n│ GC pause time   │ <50ms   │ 50-200ms│>200ms│ ← GC storm\n└──────────────────────────────────────────┘\n\nBOTTLENECK IDENTIFICATION:\n- Throughput plateaus but CPU is low → I/O bound (DB, network, disk)\n- Throughput plateaus and CPU is high → CPU bound (optimize hot path)\n- Response time climbs linearly → Queue building (capacity limit)\n- Response time climbs exponentially → Resource exhaustion (connection pool, memory)\n- Errors spike at specific VU count → Hard limit hit (max connections, file descriptors)"
      },
      {
        "title": "Core Web Vitals Optimization",
        "body": "METRIC      │ GOOD    │ NEEDS WORK │ POOR   │ HOW TO FIX\n────────────┼─────────┼────────────┼────────┼────────────────────────\nLCP         │ <2.5s   │ 2.5-4s     │ >4s    │ Optimize largest image/text\nFID/INP     │ <100ms  │ 100-300ms  │ >300ms │ Break up long tasks, defer JS\nCLS         │ <0.1    │ 0.1-0.25   │ >0.25  │ Set dimensions, font-display\n\nLCP FIXES (in priority order):\n1. Preload the LCP image: <link rel=\"preload\" as=\"image\" href=\"...\">\n2. Use responsive images: srcset with correct sizes\n3. Serve WebP/AVIF (30-50% smaller)\n4. Remove render-blocking CSS/JS from <head>\n5. Use CDN for static assets\n6. Server-side render the above-fold content\n\nINP FIXES:\n1. Break long tasks (>50ms) with requestIdleCallback or setTimeout(0)\n2. Use web workers for CPU-intensive work\n3. Debounce/throttle event handlers\n4. Defer non-critical JS: <script defer> or dynamic import()\n5. Avoid layout thrashing (batch DOM reads, then batch writes)\n\nCLS FIXES:\n1. Always set width/height on <img> and <video>\n2. Use aspect-ratio CSS for dynamic content\n3. Reserve space for ads/embeds\n4. Use font-display: swap with size-adjusted fallback\n5. Never insert content above existing content"
      },
      {
        "title": "Bundle Optimization",
        "body": "ANALYSIS:\n- Webpack: npx webpack-bundle-analyzer stats.json\n- Vite: npx vite-bundle-visualizer\n- Next.js: @next/bundle-analyzer\n\nREDUCTION STRATEGIES (in order of impact):\n1. Code splitting: dynamic import() for routes and heavy components\n2. Tree shaking: use ESM imports, avoid barrel files (index.ts re-exports)\n3. Replace heavy libraries:\n   - moment.js (330KB) → date-fns (tree-shakeable) or dayjs (2KB)\n   - lodash (530KB) → lodash-es (tree-shakeable) or native JS\n   - chart.js → lightweight alternative for simple charts\n4. Lazy load below-fold components\n5. Externalize large deps to CDN (React, etc.)\n6. Compress: Brotli > gzip (15-20% smaller)"
      },
      {
        "title": "Scaling Decision Framework",
        "body": "VERTICAL SCALING (scale up):\n✓ Quick fix, no code changes\n✓ Database servers (often best first move)\n✓ Memory-bound workloads\n✗ Diminishing returns past 8-16 cores\n✗ Single point of failure\n✗ Expensive at high end\n\nHORIZONTAL SCALING (scale out):\n✓ Stateless services (APIs, workers)\n✓ Read-heavy workloads (read replicas)\n✓ Geographic distribution\n✗ Requires stateless design\n✗ Adds complexity (load balancing, session management)\n✗ Not all workloads parallelize\n\nSCALING CHECKLIST:\n□ Can we optimize the code first? (cheapest option)\n□ Can we add caching? (often 10-100x improvement)\n□ Can we add a read replica? (if read-heavy)\n□ Can we queue and process async? (if latency-tolerant)\n□ Can we scale vertically? (if CPU/memory bound)\n□ Do we need horizontal scaling? (if all above exhausted)"
      },
      {
        "title": "Auto-scaling Configuration",
        "body": "# Kubernetes HPA example\napiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n  name: api-server\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: api-server\n  minReplicas: 2\n  maxReplicas: 20\n  metrics:\n    - type: Resource\n      resource:\n        name: cpu\n        target:\n          type: Utilization\n          averageUtilization: 70    # Scale at 70% CPU\n    - type: Resource\n      resource:\n        name: memory\n        target:\n          type: Utilization\n          averageUtilization: 80\n  behavior:\n    scaleUp:\n      stabilizationWindowSeconds: 60    # Wait 1m before scaling up\n      policies:\n        - type: Percent\n          value: 50                      # Max 50% increase per step\n          periodSeconds: 60\n    scaleDown:\n      stabilizationWindowSeconds: 300   # Wait 5m before scaling down\n      policies:\n        - type: Percent\n          value: 25                      # Max 25% decrease per step\n          periodSeconds: 120"
      },
      {
        "title": "Capacity Model Template",
        "body": "# capacity-model.yaml\nservice: \"\"\nlast_updated: \"\"\n\ncurrent_state:\n  daily_requests: 0\n  peak_rps: 0\n  avg_response_ms: 0\n  instances: 0\n  cpu_peak_pct: 0\n  memory_peak_pct: 0\n  db_connections_peak: 0\n  storage_used_gb: 0\n\ngrowth_model:\n  request_growth_monthly_pct: 0    # e.g., 15%\n  storage_growth_monthly_gb: 0\n  seasonal_peak_multiplier: 0      # e.g., 3x for Black Friday\n\nprojections:\n  # Formula: current * (1 + growth_rate)^months * seasonal_multiplier\n  3_month:\n    daily_requests: 0\n    peak_rps: 0\n    instances_needed: 0\n    storage_gb: 0\n    estimated_cost: \"\"\n  6_month:\n    daily_requests: 0\n    peak_rps: 0\n    instances_needed: 0\n    storage_gb: 0\n    estimated_cost: \"\"\n  12_month:\n    daily_requests: 0\n    peak_rps: 0\n    instances_needed: 0\n    storage_gb: 0\n    estimated_cost: \"\"\n\nheadroom_rules:\n  cpu: \"Scale when sustained >70% for 5m\"\n  memory: \"Scale when >80%\"\n  storage: \"Alert when >75%, expand when >85%\"\n  db_connections: \"Alert when >80% of max\""
      },
      {
        "title": "Cost-Performance Tradeoff Analysis",
        "body": "For every optimization, calculate:\n\nROI = (time_saved_per_month × cost_per_hour) / implementation_cost\n\nEXAMPLE:\n- P95 latency: 800ms → 200ms after optimization\n- Requests/month: 10M\n- Time saved: 600ms × 10M = 1,667 hours of compute\n- Compute cost: $0.05/hour = $83/month savings\n- Implementation: 16 hours × $150/hr = $2,400\n- Payback: 29 months ← NOT WORTH IT for cost alone\n\nBUT ALSO CONSIDER:\n- User experience improvement → conversion rate\n- Reduced infrastructure needs → fewer instances\n- Headroom for growth → delayed scaling investment\n- Developer productivity → faster local dev cycles"
      },
      {
        "title": "Automated Performance Gates",
        "body": "# .github/workflows/perf-gate.yml\nname: Performance Gate\non: pull_request\n\njobs:\n  benchmark:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n\n      - name: Run benchmarks\n        run: |\n          # Run your benchmark suite\n          npm run benchmark -- --json > bench-results.json\n\n      - name: Compare with baseline\n        run: |\n          # Compare against main branch baseline\n          node scripts/compare-benchmarks.js \\\n            --baseline benchmarks/baseline.json \\\n            --current bench-results.json \\\n            --threshold 10  # Fail if >10% regression\n\n      - name: Load test (on staging)\n        if: github.base_ref == 'main'\n        run: |\n          k6 run --out json=load-results.json tests/load-test.js\n          # Check thresholds automatically via k6\n\n      - name: Bundle size check\n        run: |\n          npm run build\n          node scripts/check-bundle-size.js \\\n            --max-size 250KB \\\n            --max-increase 5%"
      },
      {
        "title": "Performance Regression Detection",
        "body": "AUTOMATED CHECKS (run on every PR):\n□ Unit benchmarks: critical path functions < threshold\n□ Bundle size: total and per-chunk limits\n□ Lighthouse CI: Core Web Vitals pass\n□ Query count: no N+1 regressions (count queries per test)\n□ Memory: no leak patterns in test suite\n\nWEEKLY CHECKS (cron job):\n□ Production p50/p95/p99 trends (compare to 4-week average)\n□ Error rate trends\n□ Database slow query log review\n□ Infrastructure cost vs traffic ratio\n□ Cache hit rates\n\nMONTHLY REVIEW:\n□ Capacity model update\n□ Performance budget review\n□ Top 10 slowest endpoints → optimization candidates\n□ Cost-performance analysis\n□ Load test full suite against staging"
      },
      {
        "title": "Performance Review Checklist",
        "body": "Score your system (0-100):\n\nMEASUREMENT (25 points):\n□ (5) Performance budgets defined for all key metrics\n□ (5) Real User Monitoring (RUM) in production\n□ (5) Alerting on p95 degradation\n□ (5) Dashboards visible to team\n□ (5) Regular load testing\n\nPREVENTION (25 points):\n□ (5) Performance gates in CI/CD\n□ (5) Bundle size limits enforced\n□ (5) Query count checks in tests\n□ (5) Code review includes perf review\n□ (5) Capacity planning model maintained\n\nOPTIMIZATION (25 points):\n□ (5) Caching strategy documented\n□ (5) Database indexes reviewed quarterly\n□ (5) No known N+1 queries\n□ (5) Connection pools properly sized\n□ (5) Async patterns used for I/O\n\nOPERATIONS (25 points):\n□ (5) Auto-scaling configured and tested\n□ (5) Slow query logging enabled\n□ (5) Memory leak monitoring\n□ (5) Performance incident runbook exists\n□ (5) Monthly performance review"
      },
      {
        "title": "Common Anti-Patterns",
        "body": "1. PREMATURE OPTIMIZATION\n   Problem: Optimizing before measuring\n   Fix: Profile first, optimize the measured bottleneck\n\n2. MICRO-BENCHMARKING IN ISOLATION\n   Problem: Function is fast alone but slow in context (cache, contention)\n   Fix: Always benchmark in realistic conditions with realistic data\n\n3. OPTIMIZING THE WRONG LAYER\n   Problem: Tuning app code when the DB is the bottleneck\n   Fix: Use distributed tracing to find the actual bottleneck\n\n4. CACHING EVERYTHING\n   Problem: Cache invalidation bugs, stale data, memory pressure\n   Fix: Cache selectively using the decision matrix (Phase 3)\n\n5. PREMATURE HORIZONTAL SCALING\n   Problem: Adding instances when single instance is underoptimized\n   Fix: Vertical optimization first, scale second\n\n6. IGNORING TAIL LATENCY\n   Problem: p50 is fine but p99 is terrible\n   Fix: Investigate outliers — they're often the most important users\n\n7. LOAD TESTING IN DEV\n   Problem: Dev environment doesn't match production\n   Fix: Load test against staging with production-like data\n\n8. OPTIMIZING COLD PATHS\n   Problem: Spending time on rarely-executed code\n   Fix: Profile in production to find actual hot paths"
      },
      {
        "title": "Quick Reference: Tool Selection",
        "body": "TaskRecommended ToolAlternativeHTTP benchmarkingk6wrk, ab, heyCPU profiling (Node)clinic flame0x, --profCPU profiling (Python)py-spyScalene, cProfileCPU profiling (Go)pprofgo tool traceCPU profiling (Java)async-profilerJFR, VisualVMMemory profilinglanguage-specific (see Phase 2)CLI benchmarkinghyperfinetimeBundle analysiswebpack-bundle-analyzersource-map-explorerWeb performanceLighthouseWebPageTestDB query analysisEXPLAIN ANALYZEpgMustard, pganalyzeDistributed tracingJaeger, ZipkinOpenTelemetryAPMDatadog, New RelicGrafana + PrometheusContinuous profilingPyroscopeParca"
      },
      {
        "title": "Natural Language Commands",
        "body": "\"Profile this function\"     → CPU profiling with flame graph\n\"Why is this endpoint slow\" → Full investigation brief + profiling\n\"Load test the API\"         → k6 test design and execution\n\"Check for memory leaks\"    → Heap snapshot comparison workflow\n\"Optimize this query\"       → EXPLAIN ANALYZE + index recommendations\n\"Review frontend perf\"      → Core Web Vitals audit + bundle analysis\n\"Plan capacity for 10x\"     → Capacity model with projections\n\"Set up perf monitoring\"    → CI/CD gates + dashboards + alerts\n\"Find the bottleneck\"       → Profiling decision tree walkthrough\n\"Score our performance\"     → Performance review checklist (0-100)\n\"Compare before and after\"  → Benchmark comparison methodology\n\"Reduce bundle size\"        → Bundle analysis + reduction strategies"
      }
    ],
    "body": "Performance Engineering System\n\nFrom \"it's slow\" to \"here's why and here's the fix\" — a complete methodology for measuring, diagnosing, optimizing, and preventing performance problems.\n\nPhase 1: Performance Investigation Brief\n\nBefore touching anything, define the problem.\n\n# performance-brief.yaml\ninvestigation:\n  reported_by: \"\"\n  reported_date: \"\"\n  system: \"\"              # service/app name\n  environment: \"\"         # production, staging, dev\n\nproblem_statement:\n  symptom: \"\"             # \"API response time increased 3x\"\n  impact: \"\"              # \"15% of users seeing timeouts\"\n  since_when: \"\"          # \"After deploy v2.14 on Feb 20\"\n  affected_scope: \"\"      # \"All endpoints\" | \"Only /search\" | \"Users in EU\"\n\nbaselines:\n  target_p50: \"\"          # e.g., \"200ms\"\n  target_p95: \"\"          # e.g., \"500ms\"\n  target_p99: \"\"          # e.g., \"1000ms\"\n  current_p50: \"\"\n  current_p95: \"\"\n  current_p99: \"\"\n  throughput_target: \"\"   # e.g., \"1000 rps\"\n  error_rate_target: \"\"   # e.g., \"<0.1%\"\n\nconstraints:\n  budget: \"\"              # time/money for optimization\n  risk_tolerance: \"\"      # \"Can we change the schema?\" \"Can we add caching?\"\n  deadline: \"\"            # \"Must fix before Black Friday\"\n\nhypothesis:\n  primary: \"\"             # \"N+1 queries in the new recommendation engine\"\n  secondary: \"\"           # \"Connection pool exhaustion under load\"\n  evidence: \"\"            # \"Slow query log shows 200+ queries per request\"\n\nPerformance Budget Framework\n\nSet budgets BEFORE building, not after complaints:\n\nMetric\tWeb App\tAPI\tMobile\tBatch Job\nP50 response\t<200ms\t<100ms\t<300ms\tN/A\nP95 response\t<500ms\t<250ms\t<800ms\tN/A\nP99 response\t<1s\t<500ms\t<1.5s\tN/A\nError rate\t<0.1%\t<0.01%\t<0.5%\t<0.001%\nTime to Interactive\t<3s\tN/A\t<2s\tN/A\nMemory per request\t<50MB\t<20MB\t<100MB\t<1GB\nCPU per request\t<100ms\t<50ms\t<200ms\tN/A\nThroughput\t100+ rps\t500+ rps\tN/A\titems/min\nPhase 2: Measurement & Profiling\nThe Golden Rule\n\nNever optimize without measuring first. Never measure without a hypothesis.\n\nProfiling Decision Tree\nIs it slow?\n├── YES → Where is time spent?\n│   ├── CPU-bound → Profile CPU (flame graph)\n│   │   ├── Hot function found → Optimize algorithm/data structure\n│   │   └── Spread evenly → Architecture problem (too many layers)\n│   ├── I/O-bound → Profile I/O\n│   │   ├── Database → Query analysis (Phase 4)\n│   │   ├── Network → Connection profiling\n│   │   ├── Disk → I/O scheduler + buffering\n│   │   └── External API → Caching + async + circuit breaker\n│   ├── Memory-bound → Profile allocations\n│   │   ├── GC pressure → Reduce allocations, pool objects\n│   │   ├── Memory leak → Heap snapshot comparison\n│   │   └── Cache thrashing → Resize or eviction policy\n│   └── Concurrency-bound → Profile locks/contention\n│       ├── Lock contention → Reduce critical section, lock-free structures\n│       ├── Thread starvation → Pool sizing\n│       └── Deadlock → Lock ordering analysis\n└── NO → Define \"fast enough\" (see budgets above)\n\nCPU Profiling by Language\nNode.js\n# Built-in profiler (V8)\nnode --prof app.js\nnode --prof-process isolate-*.log > profile.txt\n\n# Inspector-based (connect Chrome DevTools)\nnode --inspect app.js\n# Open chrome://inspect → Profiler → Start\n\n# Clinic.js (best overall Node.js profiler)\nnpx clinic doctor -- node app.js\nnpx clinic flame -- node app.js    # Flame graph\nnpx clinic bubbleprof -- node app.js  # Async bottlenecks\n\n# 0x (flame graphs)\nnpx 0x app.js\n\nPython\n# cProfile (built-in)\nimport cProfile\nimport pstats\n\nprofiler = cProfile.Profile()\nprofiler.enable()\n# ... code to profile ...\nprofiler.disable()\n\nstats = pstats.Stats(profiler)\nstats.sort_stats('cumulative')\nstats.print_stats(20)  # Top 20\n\n# Line profiler (pip install line-profiler)\n# Add @profile decorator, then:\n# kernprof -l -v script.py\n\n# py-spy (sampling profiler, no code changes)\n# pip install py-spy\n# py-spy top --pid <PID>\n# py-spy record -o profile.svg --pid <PID>  # Flame graph\n\n# Scalene (CPU + memory + GPU)\n# pip install scalene\n# scalene script.py\n\nGo\n// Built-in pprof\nimport (\n    \"net/http\"\n    _ \"net/http/pprof\"\n    \"runtime/pprof\"\n)\n\n// HTTP server (add to existing server)\n// Access: http://localhost:6060/debug/pprof/\ngo func() { http.ListenAndServe(\":6060\", nil) }()\n\n// CLI analysis\n// go tool pprof http://localhost:6060/debug/pprof/profile?seconds=30\n// go tool pprof -http=:8080 profile.out  # Web UI\n\nJava\n# async-profiler (best for JVM)\n# https://github.com/async-profiler/async-profiler\n./asprof -d 30 -f profile.html <PID>\n\n# JFR (built-in since JDK 11)\njava -XX:StartFlightRecording=duration=60s,filename=rec.jfr MyApp\njfr print --events CPULoad rec.jfr\n\n# jstack (thread dump)\njstack <PID> > threads.txt\n\nMemory Profiling\nLeak Detection Pattern (any language)\n1. Take heap snapshot at T0\n2. Run suspected operation N times\n3. Force GC\n4. Take heap snapshot at T1\n5. Compare: objects that grew = potential leak\n6. Check: are they reachable? From where? (retention path)\n\nNode.js Memory\n// Heap snapshot\nconst v8 = require('v8');\nconst fs = require('fs');\n\nfunction takeSnapshot(label) {\n  const snapshotStream = v8.writeHeapSnapshot();\n  console.log(`Heap snapshot written to ${snapshotStream}`);\n}\n\n// Process memory monitoring\nsetInterval(() => {\n  const mem = process.memoryUsage();\n  console.log({\n    rss_mb: (mem.rss / 1048576).toFixed(1),\n    heap_used_mb: (mem.heapUsed / 1048576).toFixed(1),\n    heap_total_mb: (mem.heapTotal / 1048576).toFixed(1),\n    external_mb: (mem.external / 1048576).toFixed(1),\n  });\n}, 10000);\n\nPython Memory\n# tracemalloc (built-in)\nimport tracemalloc\n\ntracemalloc.start()\n# ... code ...\nsnapshot = tracemalloc.take_snapshot()\ntop = snapshot.statistics('lineno')\nfor stat in top[:10]:\n    print(stat)\n\n# objgraph (pip install objgraph)\nimport objgraph\nobjgraph.show_most_common_types(limit=20)\nobjgraph.show_growth(limit=10)  # Call twice to see what's growing\n\nFlame Graph Interpretation\nReading a flame graph:\n┌─────────────────────────────────────────────┐\n│                  main()                      │  ← Entry point (bottom)\n├──────────────────────┬──────────────────────┤\n│     processData()    │    renderOutput()     │  ← Width = time spent\n├──────────┬───────────┤                      │\n│ parseCSV │ validate  │                      │  ← Tall = deep call stack\n├──────────┤           │                      │\n│ readline │           │                      │  ← Top = where CPU burns\n└──────────┴───────────┴──────────────────────┘\n\nWHAT TO LOOK FOR:\n1. Wide plateaus at top → CPU-intensive leaf function (optimize this!)\n2. Many thin towers → excessive function calls (batch or reduce)\n3. Recursive patterns → potential stack overflow risk\n4. Unexpected width → function taking more time than expected\n5. GC/runtime frames → memory pressure\n\nACTION RULES:\n- Plateau >20% width → must investigate\n- Plateau >40% width → almost certainly the bottleneck\n- If top 3 functions = 80% of time → focused optimization will work\n- If evenly distributed → architectural change needed\n\nPhase 3: Common Optimization Patterns\nAlgorithm & Data Structure Optimizations\nProblem\tBad O()\tFix\tGood O()\nSearch unsorted array\tO(n)\tSort + binary search, or use Set/Map\tO(log n) or O(1)\nNested loop matching\tO(n²)\tHash map lookup\tO(n)\nRepeated string concat\tO(n²)\tStringBuilder/join array\tO(n)\nSorting already-sorted data\tO(n log n)\tCheck if sorted first\tO(n)\nFinding duplicates\tO(n²)\tSet-based detection\tO(n)\nFrequent min/max of changing data\tO(n) per query\tHeap/priority queue\tO(log n)\nCaching Strategy Decision Matrix\nShould you cache this?\n├── Does the same input always produce the same output?\n│   ├── YES → Cache candidate ✓\n│   └── NO → Can you define a valid TTL?\n│       ├── YES → Cache with TTL ✓\n│       └── NO → Don't cache ✗\n├── Is it called frequently?\n│   ├── <10x/min → Probably not worth caching\n│   └── >10x/min → Cache ✓\n├── Is the source data expensive to compute/fetch?\n│   ├── <10ms → Probably not worth caching\n│   └── >10ms → Cache ✓\n└── Does staleness cause problems?\n    ├── Critical (financial, auth) → Short TTL or cache-aside with invalidation\n    ├── Important (user data) → 1-5 min TTL with invalidation\n    └── Tolerant (content, search) → 5-60 min TTL\n\nCACHE LAYERS (use in order):\n1. In-process (Map/LRU) → <1μs, limited by memory, per-instance\n2. Shared cache (Redis/Memcached) → <1ms, shared across instances\n3. CDN/edge cache → <10ms, geographic distribution\n4. Browser cache → 0ms for user, stale risk\n\nINVALIDATION STRATEGIES:\n- TTL-based: simplest, best for read-heavy + staleness-tolerant\n- Event-based: publish cache-invalidate on write, best for consistency\n- Write-through: update cache on every write, best for write-read patterns\n- Cache-aside: app manages cache explicitly, most flexible\n\nConnection Pooling\n# Sizing formula\npool_size: min(available_cores * 2 + effective_spindle_count, max_connections / num_instances)\n\n# Rules of thumb:\n# - PostgreSQL: connections = cores * 2 + 1 (per pgBouncer docs)\n# - MySQL: keep total connections < 150 for most workloads\n# - HTTP clients: match to concurrent request volume\n# - Redis: usually 5-10 per instance is enough\n\n# Warning signs of pool problems:\n# - \"connection timeout\" errors under load\n# - Response time spikes at regular intervals\n# - Idle connections holding resources\n# - Connection count hitting max_connections\n\nAsync & Concurrency Patterns\n// BAD: Sequential when independent\nconst user = await getUser(id);\nconst orders = await getOrders(id);\nconst prefs = await getPreferences(id);\n// Total: user_time + orders_time + prefs_time\n\n// GOOD: Parallel when independent\nconst [user, orders, prefs] = await Promise.all([\n  getUser(id),\n  getOrders(id),\n  getPreferences(id),\n]);\n// Total: max(user_time, orders_time, prefs_time)\n\n// GOOD: Controlled concurrency for many items\n// (npm: p-limit, p-map, or manual semaphore)\nimport pLimit from 'p-limit';\nconst limit = pLimit(10); // Max 10 concurrent\nconst results = await Promise.all(\n  items.map(item => limit(() => processItem(item)))\n);\n\n# Python: asyncio for I/O-bound\nimport asyncio\n\nasync def fetch_all(ids):\n    # Parallel\n    tasks = [fetch_one(id) for id in ids]\n    return await asyncio.gather(*tasks)\n\n# Python: ProcessPoolExecutor for CPU-bound\nfrom concurrent.futures import ProcessPoolExecutor\nwith ProcessPoolExecutor(max_workers=4) as pool:\n    results = list(pool.map(cpu_intensive_fn, items))\n\nN+1 Query Detection & Fix\nSYMPTOM: Response time scales linearly with result count\nDETECTION: Enable query logging, count queries per request\n\n# Bad: N+1\nusers = db.query(\"SELECT * FROM users LIMIT 100\")\nfor user in users:\n    orders = db.query(f\"SELECT * FROM orders WHERE user_id = {user.id}\")\n# Result: 1 + 100 = 101 queries\n\n# Fix 1: JOIN\nSELECT u.*, o.* FROM users u\nLEFT JOIN orders o ON o.user_id = u.id\nLIMIT 100\n\n# Fix 2: Batch load (better for large datasets)\nusers = db.query(\"SELECT * FROM users LIMIT 100\")\nuser_ids = [u.id for u in users]\norders = db.query(f\"SELECT * FROM orders WHERE user_id IN ({','.join(user_ids)})\")\n# Result: 2 queries regardless of count\n\n# Fix 3: ORM eager loading\n# Drizzle: .with(users.orders)\n# SQLAlchemy: joinedload(User.orders)\n# Prisma: include: { orders: true }\n\nPhase 4: Database Performance\nQuery Optimization Checklist\nFor every slow query:\n□ Run EXPLAIN ANALYZE (not just EXPLAIN)\n□ Check: is it doing a sequential scan on a large table?\n□ Check: is the row estimate accurate? (bad stats = bad plan)\n□ Check: are there implicit type casts preventing index use?\n□ Check: is it sorting more data than needed? (add LIMIT earlier)\n□ Check: is it joining in the right order?\n□ Check: can a covering index eliminate table lookups?\n□ Check: is the query running during peak hours? (schedule if batch)\n\nEXPLAIN ANALYZE Interpretation\n-- PostgreSQL EXPLAIN output reading guide:\nEXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT ...;\n\n-- Key metrics to check:\n-- 1. Actual time vs estimated time (large gap = stale stats → ANALYZE)\n-- 2. Rows actual vs estimated (>10x off = bad stats)\n-- 3. Seq Scan on large table (>10K rows) = needs index\n-- 4. Sort with external merge = needs more work_mem or index\n-- 5. Nested Loop with large outer = consider hash/merge join\n-- 6. Buffers shared hit vs read (low hit ratio = needs more shared_buffers)\n\nIndex Strategy Guide\nWHEN TO ADD AN INDEX:\n✓ WHERE clause column (equality or range)\n✓ JOIN condition column\n✓ ORDER BY column (if query is index-only scan candidate)\n✓ Foreign key column (prevents table lock on parent delete)\n✓ Column in a unique constraint\n\nWHEN NOT TO ADD AN INDEX:\n✗ Table has <1000 rows (seq scan is fine)\n✗ Column has very low cardinality (boolean, status with 3 values)\n✗ Write-heavy table where reads are rare\n✗ You already have 8+ indexes on the table (diminishing returns, write penalty)\n\nINDEX TYPES:\n- B-tree (default): equality, range, sorting, LIKE 'prefix%'\n- Hash: equality only (rarely better than B-tree)\n- GIN: arrays, JSONB, full-text search\n- GiST: geometry, range types, full-text\n- BRIN: large tables with natural ordering (timestamps, sequential IDs)\n\nCOMPOSITE INDEX RULES:\n1. Equality columns first, then range columns\n2. Most selective column first (if all equality)\n3. Index on (a, b) works for WHERE a=1 AND b=2 AND for WHERE a=1 alone\n4. Index on (a, b) does NOT work for WHERE b=2 alone\n\nPhase 5: Load Testing\nLoad Test Design\n# load-test-plan.yaml\ntest_name: \"\"\ntarget: \"\"              # URL/endpoint\ndate: \"\"\n\nscenarios:\n  - name: \"Baseline\"\n    description: \"Normal traffic pattern\"\n    vus: 50               # Virtual users\n    duration: \"5m\"\n    ramp_up: \"30s\"\n    think_time: \"1-3s\"    # Pause between requests\n\n  - name: \"Peak\"\n    description: \"2x normal traffic (expected peak)\"\n    vus: 100\n    duration: \"10m\"\n    ramp_up: \"1m\"\n\n  - name: \"Stress\"\n    description: \"Find the breaking point\"\n    vus_start: 50\n    vus_end: 500\n    step_duration: \"2m\"   # Add users every 2 min\n    step_size: 50\n\n  - name: \"Soak\"\n    description: \"Memory leaks, connection exhaustion\"\n    vus: 50\n    duration: \"2h\"\n\npass_criteria:\n  p95_response_ms: 500\n  error_rate_pct: 0.1\n  throughput_rps: 200\n\nk6 Load Test Template\n// load-test.js (run: k6 run load-test.js)\nimport http from 'k6/http';\nimport { check, sleep } from 'k6';\nimport { Rate, Trend } from 'k6/metrics';\n\nconst errorRate = new Rate('errors');\nconst responseTime = new Trend('response_time');\n\nexport const options = {\n  stages: [\n    { duration: '30s', target: 20 },   // Ramp up\n    { duration: '3m', target: 20 },    // Steady\n    { duration: '30s', target: 50 },   // Peak\n    { duration: '3m', target: 50 },    // Steady peak\n    { duration: '30s', target: 0 },    // Ramp down\n  ],\n  thresholds: {\n    http_req_duration: ['p(95)<500'],   // 95% under 500ms\n    errors: ['rate<0.01'],              // <1% error rate\n  },\n};\n\nexport default function () {\n  const res = http.get('https://api.example.com/endpoint');\n\n  check(res, {\n    'status 200': (r) => r.status === 200,\n    'response < 500ms': (r) => r.timings.duration < 500,\n  });\n\n  errorRate.add(res.status !== 200);\n  responseTime.add(res.timings.duration);\n\n  sleep(Math.random() * 2 + 1); // 1-3s think time\n}\n\nLoad Test Results Analysis\nREADING RESULTS:\n┌──────────────────────────────────────────┐\n│ Metric          │ Healthy │ Warning │ Bad│\n├──────────────────────────────────────────┤\n│ p50/p95 ratio   │ <2x     │ 2-5x    │>5x│  ← High ratio = tail latency problem\n│ p95/p99 ratio   │ <2x     │ 2-3x    │>3x│  ← Outliers affecting some users\n│ Error rate      │ <0.1%   │ 0.1-1%  │>1%│  ← Above 1% = user-visible\n│ Throughput drop  │ <5%     │ 5-20%   │>20%│ ← System under stress\n│ CPU at peak     │ <70%    │ 70-85%  │>85%│ ← No headroom\n│ Memory at peak  │ <75%    │ 75-90%  │>90%│ ← Risk of OOM\n│ GC pause time   │ <50ms   │ 50-200ms│>200ms│ ← GC storm\n└──────────────────────────────────────────┘\n\nBOTTLENECK IDENTIFICATION:\n- Throughput plateaus but CPU is low → I/O bound (DB, network, disk)\n- Throughput plateaus and CPU is high → CPU bound (optimize hot path)\n- Response time climbs linearly → Queue building (capacity limit)\n- Response time climbs exponentially → Resource exhaustion (connection pool, memory)\n- Errors spike at specific VU count → Hard limit hit (max connections, file descriptors)\n\nPhase 6: Frontend Performance\nCore Web Vitals Optimization\nMETRIC      │ GOOD    │ NEEDS WORK │ POOR   │ HOW TO FIX\n────────────┼─────────┼────────────┼────────┼────────────────────────\nLCP         │ <2.5s   │ 2.5-4s     │ >4s    │ Optimize largest image/text\nFID/INP     │ <100ms  │ 100-300ms  │ >300ms │ Break up long tasks, defer JS\nCLS         │ <0.1    │ 0.1-0.25   │ >0.25  │ Set dimensions, font-display\n\nLCP FIXES (in priority order):\n1. Preload the LCP image: <link rel=\"preload\" as=\"image\" href=\"...\">\n2. Use responsive images: srcset with correct sizes\n3. Serve WebP/AVIF (30-50% smaller)\n4. Remove render-blocking CSS/JS from <head>\n5. Use CDN for static assets\n6. Server-side render the above-fold content\n\nINP FIXES:\n1. Break long tasks (>50ms) with requestIdleCallback or setTimeout(0)\n2. Use web workers for CPU-intensive work\n3. Debounce/throttle event handlers\n4. Defer non-critical JS: <script defer> or dynamic import()\n5. Avoid layout thrashing (batch DOM reads, then batch writes)\n\nCLS FIXES:\n1. Always set width/height on <img> and <video>\n2. Use aspect-ratio CSS for dynamic content\n3. Reserve space for ads/embeds\n4. Use font-display: swap with size-adjusted fallback\n5. Never insert content above existing content\n\nBundle Optimization\nANALYSIS:\n- Webpack: npx webpack-bundle-analyzer stats.json\n- Vite: npx vite-bundle-visualizer\n- Next.js: @next/bundle-analyzer\n\nREDUCTION STRATEGIES (in order of impact):\n1. Code splitting: dynamic import() for routes and heavy components\n2. Tree shaking: use ESM imports, avoid barrel files (index.ts re-exports)\n3. Replace heavy libraries:\n   - moment.js (330KB) → date-fns (tree-shakeable) or dayjs (2KB)\n   - lodash (530KB) → lodash-es (tree-shakeable) or native JS\n   - chart.js → lightweight alternative for simple charts\n4. Lazy load below-fold components\n5. Externalize large deps to CDN (React, etc.)\n6. Compress: Brotli > gzip (15-20% smaller)\n\nPhase 7: Infrastructure & Scaling\nScaling Decision Framework\nVERTICAL SCALING (scale up):\n✓ Quick fix, no code changes\n✓ Database servers (often best first move)\n✓ Memory-bound workloads\n✗ Diminishing returns past 8-16 cores\n✗ Single point of failure\n✗ Expensive at high end\n\nHORIZONTAL SCALING (scale out):\n✓ Stateless services (APIs, workers)\n✓ Read-heavy workloads (read replicas)\n✓ Geographic distribution\n✗ Requires stateless design\n✗ Adds complexity (load balancing, session management)\n✗ Not all workloads parallelize\n\nSCALING CHECKLIST:\n□ Can we optimize the code first? (cheapest option)\n□ Can we add caching? (often 10-100x improvement)\n□ Can we add a read replica? (if read-heavy)\n□ Can we queue and process async? (if latency-tolerant)\n□ Can we scale vertically? (if CPU/memory bound)\n□ Do we need horizontal scaling? (if all above exhausted)\n\nAuto-scaling Configuration\n# Kubernetes HPA example\napiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n  name: api-server\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1\n    kind: Deployment\n    name: api-server\n  minReplicas: 2\n  maxReplicas: 20\n  metrics:\n    - type: Resource\n      resource:\n        name: cpu\n        target:\n          type: Utilization\n          averageUtilization: 70    # Scale at 70% CPU\n    - type: Resource\n      resource:\n        name: memory\n        target:\n          type: Utilization\n          averageUtilization: 80\n  behavior:\n    scaleUp:\n      stabilizationWindowSeconds: 60    # Wait 1m before scaling up\n      policies:\n        - type: Percent\n          value: 50                      # Max 50% increase per step\n          periodSeconds: 60\n    scaleDown:\n      stabilizationWindowSeconds: 300   # Wait 5m before scaling down\n      policies:\n        - type: Percent\n          value: 25                      # Max 25% decrease per step\n          periodSeconds: 120\n\nPhase 8: Capacity Planning\nCapacity Model Template\n# capacity-model.yaml\nservice: \"\"\nlast_updated: \"\"\n\ncurrent_state:\n  daily_requests: 0\n  peak_rps: 0\n  avg_response_ms: 0\n  instances: 0\n  cpu_peak_pct: 0\n  memory_peak_pct: 0\n  db_connections_peak: 0\n  storage_used_gb: 0\n\ngrowth_model:\n  request_growth_monthly_pct: 0    # e.g., 15%\n  storage_growth_monthly_gb: 0\n  seasonal_peak_multiplier: 0      # e.g., 3x for Black Friday\n\nprojections:\n  # Formula: current * (1 + growth_rate)^months * seasonal_multiplier\n  3_month:\n    daily_requests: 0\n    peak_rps: 0\n    instances_needed: 0\n    storage_gb: 0\n    estimated_cost: \"\"\n  6_month:\n    daily_requests: 0\n    peak_rps: 0\n    instances_needed: 0\n    storage_gb: 0\n    estimated_cost: \"\"\n  12_month:\n    daily_requests: 0\n    peak_rps: 0\n    instances_needed: 0\n    storage_gb: 0\n    estimated_cost: \"\"\n\nheadroom_rules:\n  cpu: \"Scale when sustained >70% for 5m\"\n  memory: \"Scale when >80%\"\n  storage: \"Alert when >75%, expand when >85%\"\n  db_connections: \"Alert when >80% of max\"\n\nCost-Performance Tradeoff Analysis\nFor every optimization, calculate:\n\nROI = (time_saved_per_month × cost_per_hour) / implementation_cost\n\nEXAMPLE:\n- P95 latency: 800ms → 200ms after optimization\n- Requests/month: 10M\n- Time saved: 600ms × 10M = 1,667 hours of compute\n- Compute cost: $0.05/hour = $83/month savings\n- Implementation: 16 hours × $150/hr = $2,400\n- Payback: 29 months ← NOT WORTH IT for cost alone\n\nBUT ALSO CONSIDER:\n- User experience improvement → conversion rate\n- Reduced infrastructure needs → fewer instances\n- Headroom for growth → delayed scaling investment\n- Developer productivity → faster local dev cycles\n\nPhase 9: Performance in CI/CD\nAutomated Performance Gates\n# .github/workflows/perf-gate.yml\nname: Performance Gate\non: pull_request\n\njobs:\n  benchmark:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n\n      - name: Run benchmarks\n        run: |\n          # Run your benchmark suite\n          npm run benchmark -- --json > bench-results.json\n\n      - name: Compare with baseline\n        run: |\n          # Compare against main branch baseline\n          node scripts/compare-benchmarks.js \\\n            --baseline benchmarks/baseline.json \\\n            --current bench-results.json \\\n            --threshold 10  # Fail if >10% regression\n\n      - name: Load test (on staging)\n        if: github.base_ref == 'main'\n        run: |\n          k6 run --out json=load-results.json tests/load-test.js\n          # Check thresholds automatically via k6\n\n      - name: Bundle size check\n        run: |\n          npm run build\n          node scripts/check-bundle-size.js \\\n            --max-size 250KB \\\n            --max-increase 5%\n\nPerformance Regression Detection\nAUTOMATED CHECKS (run on every PR):\n□ Unit benchmarks: critical path functions < threshold\n□ Bundle size: total and per-chunk limits\n□ Lighthouse CI: Core Web Vitals pass\n□ Query count: no N+1 regressions (count queries per test)\n□ Memory: no leak patterns in test suite\n\nWEEKLY CHECKS (cron job):\n□ Production p50/p95/p99 trends (compare to 4-week average)\n□ Error rate trends\n□ Database slow query log review\n□ Infrastructure cost vs traffic ratio\n□ Cache hit rates\n\nMONTHLY REVIEW:\n□ Capacity model update\n□ Performance budget review\n□ Top 10 slowest endpoints → optimization candidates\n□ Cost-performance analysis\n□ Load test full suite against staging\n\nPhase 10: Performance Culture\nPerformance Review Checklist\n\nScore your system (0-100):\n\nMEASUREMENT (25 points):\n□ (5) Performance budgets defined for all key metrics\n□ (5) Real User Monitoring (RUM) in production\n□ (5) Alerting on p95 degradation\n□ (5) Dashboards visible to team\n□ (5) Regular load testing\n\nPREVENTION (25 points):\n□ (5) Performance gates in CI/CD\n□ (5) Bundle size limits enforced\n□ (5) Query count checks in tests\n□ (5) Code review includes perf review\n□ (5) Capacity planning model maintained\n\nOPTIMIZATION (25 points):\n□ (5) Caching strategy documented\n□ (5) Database indexes reviewed quarterly\n□ (5) No known N+1 queries\n□ (5) Connection pools properly sized\n□ (5) Async patterns used for I/O\n\nOPERATIONS (25 points):\n□ (5) Auto-scaling configured and tested\n□ (5) Slow query logging enabled\n□ (5) Memory leak monitoring\n□ (5) Performance incident runbook exists\n□ (5) Monthly performance review\n\nCommon Anti-Patterns\n1. PREMATURE OPTIMIZATION\n   Problem: Optimizing before measuring\n   Fix: Profile first, optimize the measured bottleneck\n\n2. MICRO-BENCHMARKING IN ISOLATION\n   Problem: Function is fast alone but slow in context (cache, contention)\n   Fix: Always benchmark in realistic conditions with realistic data\n\n3. OPTIMIZING THE WRONG LAYER\n   Problem: Tuning app code when the DB is the bottleneck\n   Fix: Use distributed tracing to find the actual bottleneck\n\n4. CACHING EVERYTHING\n   Problem: Cache invalidation bugs, stale data, memory pressure\n   Fix: Cache selectively using the decision matrix (Phase 3)\n\n5. PREMATURE HORIZONTAL SCALING\n   Problem: Adding instances when single instance is underoptimized\n   Fix: Vertical optimization first, scale second\n\n6. IGNORING TAIL LATENCY\n   Problem: p50 is fine but p99 is terrible\n   Fix: Investigate outliers — they're often the most important users\n\n7. LOAD TESTING IN DEV\n   Problem: Dev environment doesn't match production\n   Fix: Load test against staging with production-like data\n\n8. OPTIMIZING COLD PATHS\n   Problem: Spending time on rarely-executed code\n   Fix: Profile in production to find actual hot paths\n\nQuick Reference: Tool Selection\nTask\tRecommended Tool\tAlternative\nHTTP benchmarking\tk6\twrk, ab, hey\nCPU profiling (Node)\tclinic flame\t0x, --prof\nCPU profiling (Python)\tpy-spy\tScalene, cProfile\nCPU profiling (Go)\tpprof\tgo tool trace\nCPU profiling (Java)\tasync-profiler\tJFR, VisualVM\nMemory profiling\tlanguage-specific (see Phase 2)\t\nCLI benchmarking\thyperfine\ttime\nBundle analysis\twebpack-bundle-analyzer\tsource-map-explorer\nWeb performance\tLighthouse\tWebPageTest\nDB query analysis\tEXPLAIN ANALYZE\tpgMustard, pganalyze\nDistributed tracing\tJaeger, Zipkin\tOpenTelemetry\nAPM\tDatadog, New Relic\tGrafana + Prometheus\nContinuous profiling\tPyroscope\tParca\nNatural Language Commands\n\"Profile this function\"     → CPU profiling with flame graph\n\"Why is this endpoint slow\" → Full investigation brief + profiling\n\"Load test the API\"         → k6 test design and execution\n\"Check for memory leaks\"    → Heap snapshot comparison workflow\n\"Optimize this query\"       → EXPLAIN ANALYZE + index recommendations\n\"Review frontend perf\"      → Core Web Vitals audit + bundle analysis\n\"Plan capacity for 10x\"     → Capacity model with projections\n\"Set up perf monitoring\"    → CI/CD gates + dashboards + alerts\n\"Find the bottleneck\"       → Profiling decision tree walkthrough\n\"Score our performance\"     → Performance review checklist (0-100)\n\"Compare before and after\"  → Benchmark comparison methodology\n\"Reduce bundle size\"        → Bundle analysis + reduction strategies"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/1kalin/afrexai-performance-engineering",
    "publisherUrl": "https://clawhub.ai/1kalin/afrexai-performance-engineering",
    "owner": "1kalin",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/afrexai-performance-engineering",
    "downloadUrl": "https://openagent3.xyz/downloads/afrexai-performance-engineering",
    "agentUrl": "https://openagent3.xyz/skills/afrexai-performance-engineering/agent",
    "manifestUrl": "https://openagent3.xyz/skills/afrexai-performance-engineering/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/afrexai-performance-engineering/agent.md"
  }
}