{
  "schemaVersion": "1.0",
  "item": {
    "slug": "afrexai-database-engineer",
    "name": "Database Engineering Mastery",
    "source": "tencent",
    "type": "skill",
    "category": "数据分析",
    "sourceUrl": "https://clawhub.ai/1kalin/afrexai-database-engineer",
    "canonicalUrl": "https://clawhub.ai/1kalin/afrexai-database-engineer",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/afrexai-database-engineer",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=afrexai-database-engineer",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "README.md",
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/afrexai-database-engineer"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/afrexai-database-engineer",
    "agentPageUrl": "https://openagent3.xyz/skills/afrexai-database-engineer/agent",
    "manifestUrl": "https://openagent3.xyz/skills/afrexai-database-engineer/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/afrexai-database-engineer/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Database Engineering Mastery",
        "body": "Complete database design, optimization, migration, and operations system. From schema design to production monitoring — covers PostgreSQL, MySQL, SQLite, and general SQL patterns."
      },
      {
        "title": "Design Brief",
        "body": "Before writing any DDL, fill this out:\n\nproject: \"\"\ndomain: \"\"\nprimary_use_case: \"OLTP | OLAP | mixed\"\nexpected_scale:\n  rows_year_1: \"\"\n  rows_year_3: \"\"\n  concurrent_users: \"\"\n  read_write_ratio: \"80:20 | 50:50 | 20:80\"\ncompliance: [] # GDPR, HIPAA, PCI-DSS, SOX\nmulti_tenancy: \"none | schema-per-tenant | row-level | database-per-tenant\""
      },
      {
        "title": "Normalization Decision Framework",
        "body": "FormRuleWhen to Denormalize1NFNo repeating groups, atomic valuesNever skip2NFNo partial dependencies on composite keysNever skip3NFNo transitive dependenciesReporting tables, read-heavy aggregationsBCNFEvery determinant is a candidate keyRarely needed unless complex key relationships\n\nDenormalization triggers:\n\nQuery joins > 4 tables consistently\nRead latency > 100ms on indexed queries\nCache invalidation complexity exceeds denormalization maintenance\nReporting queries block OLTP workloads"
      },
      {
        "title": "Naming Conventions",
        "body": "Tables:      snake_case, plural (users, order_items, payment_methods)\nColumns:     snake_case, singular (first_name, created_at, is_active)\nPKs:         id (bigint/uuid) or {table_singular}_id\nFKs:         {referenced_table_singular}_id\nIndexes:     idx_{table}_{columns}\nConstraints: chk_{table}_{rule}, uq_{table}_{columns}, fk_{table}_{ref}\nEnums:       Use VARCHAR + CHECK, not DB enums (easier to migrate)\nBooleans:    is_, has_, can_ prefix (is_active, has_subscription)\nTimestamps:  _at suffix (created_at, updated_at, deleted_at)"
      },
      {
        "title": "Column Type Decision Tree",
        "body": "Text < 255 chars, fixed set?     → VARCHAR(N) + CHECK\nText < 255 chars, variable?      → VARCHAR(255)\nText > 255 chars?                → TEXT\nWhole numbers < 2B?              → INTEGER\nWhole numbers > 2B?              → BIGINT\nMoney/financial?                 → NUMERIC(precision, scale) — NEVER float\nTrue/false?                      → BOOLEAN\nDate only?                       → DATE\nDate + time?                     → TIMESTAMPTZ (always with timezone)\nUnique identifier?               → UUID (distributed) or BIGSERIAL (single DB)\nJSON/flexible schema?            → JSONB (Postgres) or JSON (MySQL)\nBinary/file?                     → Store in object storage, reference by URL\nIP address?                      → INET (Postgres) or VARCHAR(45)\nGeospatial?                      → PostGIS geometry/geography types"
      },
      {
        "title": "Essential Table Template",
        "body": "CREATE TABLE {table_name} (\n    id          BIGSERIAL PRIMARY KEY,\n    -- domain columns here --\n    created_at  TIMESTAMPTZ NOT NULL DEFAULT NOW(),\n    updated_at  TIMESTAMPTZ NOT NULL DEFAULT NOW(),\n    created_by  BIGINT REFERENCES users(id),\n    version     INTEGER NOT NULL DEFAULT 1,  -- optimistic locking\n    \n    -- soft delete (optional)\n    deleted_at  TIMESTAMPTZ,\n    \n    -- multi-tenant (optional)  \n    tenant_id   BIGINT NOT NULL REFERENCES tenants(id)\n);\n\n-- Updated_at trigger (PostgreSQL)\nCREATE OR REPLACE FUNCTION update_modified_column()\nRETURNS TRIGGER AS $$\nBEGIN\n    NEW.updated_at = NOW();\n    NEW.version = OLD.version + 1;\n    RETURN NEW;\nEND;\n$$ LANGUAGE plpgsql;\n\nCREATE TRIGGER trg_{table_name}_updated\n    BEFORE UPDATE ON {table_name}\n    FOR EACH ROW\n    EXECUTE FUNCTION update_modified_column();"
      },
      {
        "title": "Relationship Patterns",
        "body": "One-to-Many:\n\n-- Parent\nCREATE TABLE departments (id BIGSERIAL PRIMARY KEY, name VARCHAR(100) NOT NULL);\n-- Child  \nCREATE TABLE employees (\n    id BIGSERIAL PRIMARY KEY,\n    department_id BIGINT NOT NULL REFERENCES departments(id) ON DELETE RESTRICT,\n    -- ON DELETE options: RESTRICT (safe default), CASCADE (children die), SET NULL\n);\nCREATE INDEX idx_employees_department_id ON employees(department_id);\n\nMany-to-Many:\n\nCREATE TABLE user_roles (\n    user_id BIGINT NOT NULL REFERENCES users(id) ON DELETE CASCADE,\n    role_id BIGINT NOT NULL REFERENCES roles(id) ON DELETE CASCADE,\n    granted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),\n    granted_by BIGINT REFERENCES users(id),\n    PRIMARY KEY (user_id, role_id)\n);\n\nSelf-Referencing (hierarchy):\n\nCREATE TABLE categories (\n    id BIGSERIAL PRIMARY KEY,\n    parent_id BIGINT REFERENCES categories(id) ON DELETE CASCADE,\n    name VARCHAR(100) NOT NULL,\n    depth INTEGER NOT NULL DEFAULT 0,\n    path TEXT NOT NULL DEFAULT ''  -- materialized path: '/1/5/12/'\n);\nCREATE INDEX idx_categories_parent ON categories(parent_id);\nCREATE INDEX idx_categories_path ON categories(path text_pattern_ops);\n\nPolymorphic (avoid if possible, use if you must):\n\n-- Preferred: separate FKs\nCREATE TABLE comments (\n    id BIGSERIAL PRIMARY KEY,\n    post_id BIGINT REFERENCES posts(id),\n    ticket_id BIGINT REFERENCES tickets(id),\n    body TEXT NOT NULL,\n    CONSTRAINT chk_one_parent CHECK (\n        (post_id IS NOT NULL)::int + (ticket_id IS NOT NULL)::int = 1\n    )\n);"
      },
      {
        "title": "Index Type Selection",
        "body": "Index TypeUse WhenExampleB-tree (default)Equality, range, sorting, LIKE 'prefix%'CREATE INDEX idx_users_email ON users(email)HashEquality only, no rangeCREATE INDEX idx_sessions_token ON sessions USING hash(token)GINJSONB, full-text search, arrays, tsvectorCREATE INDEX idx_products_tags ON products USING gin(tags)GiSTGeospatial, range types, nearest-neighborCREATE INDEX idx_locations_geom ON locations USING gist(geom)BRINVery large tables with natural ordering (time-series)CREATE INDEX idx_events_created ON events USING brin(created_at)PartialSubset of rowsCREATE INDEX idx_orders_pending ON orders(created_at) WHERE status = 'pending'CoveringInclude columns to avoid table lookupCREATE INDEX idx_orders_user ON orders(user_id) INCLUDE (status, total)"
      },
      {
        "title": "Indexing Rules",
        "body": "Always index: Foreign keys, columns in WHERE/JOIN/ORDER BY\nNever index: Low-cardinality columns alone (boolean, status with 3 values) — combine in composite\nComposite order: Most selective column first, then left-to-right matches query patterns\nWatch write overhead: Each index slows INSERT/UPDATE. >8 indexes on a write-heavy table = review\nUnused index audit: Run monthly — drop indexes with 0 scans"
      },
      {
        "title": "Find Unused Indexes (PostgreSQL)",
        "body": "SELECT schemaname, tablename, indexname, idx_scan, \n       pg_size_pretty(pg_relation_size(indexrelid)) as size\nFROM pg_stat_user_indexes\nWHERE idx_scan = 0 AND indexrelid NOT IN (\n    SELECT conindid FROM pg_constraint WHERE contype IN ('p', 'u')\n)\nORDER BY pg_relation_size(indexrelid) DESC;"
      },
      {
        "title": "Find Missing Indexes (PostgreSQL)",
        "body": "SELECT relname, seq_scan, seq_tup_read, \n       idx_scan, seq_tup_read / GREATEST(seq_scan, 1) as avg_tuples_per_scan\nFROM pg_stat_user_tables\nWHERE seq_scan > 100 AND seq_tup_read > 10000\nORDER BY seq_tup_read DESC;\n-- High seq_scan + high seq_tup_read = missing index candidate"
      },
      {
        "title": "EXPLAIN Interpretation",
        "body": "EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT ...;\n\nRed flags in query plans:\n\nPatternProblemFixSeq Scan on large tableMissing indexAdd appropriate indexNested Loop with large outerO(n×m) joinAdd index on join column, consider Hash JoinSort with high costMissing index for ORDER BYAdd index matching sort orderHash Join spilling to diskwork_mem too lowIncrease work_mem or reduce result setBitmap Heap Scan with many recheckLow selectivity indexMore selective index or partial indexSubPlan (correlated subquery)Executes per rowRewrite as JOIN or lateralRows estimate wildly wrongStale statisticsANALYZE table"
      },
      {
        "title": "Query Anti-Patterns & Fixes",
        "body": "1. SELECT * in production:\n\n-- Bad: fetches all columns, breaks covering indexes\nSELECT * FROM orders WHERE user_id = 123;\n-- Good: explicit columns\nSELECT id, status, total, created_at FROM orders WHERE user_id = 123;\n\n2. N+1 queries:\n\n-- Bad: 1 query for users + N queries for orders\nSELECT id FROM users WHERE active = true;  -- returns 100 rows\nSELECT * FROM orders WHERE user_id = ?;     -- called 100 times\n\n-- Good: single JOIN or IN\nSELECT u.id, o.id, o.total \nFROM users u\nJOIN orders o ON o.user_id = u.id\nWHERE u.active = true;\n\n3. Functions on indexed columns:\n\n-- Bad: can't use index on created_at\nWHERE EXTRACT(YEAR FROM created_at) = 2025\n-- Good: range scan uses index\nWHERE created_at >= '2025-01-01' AND created_at < '2026-01-01'\n\n-- Bad: can't use index on email  \nWHERE LOWER(email) = 'user@example.com'\n-- Good: expression index\nCREATE INDEX idx_users_email_lower ON users(LOWER(email));\n\n4. OR conditions killing indexes:\n\n-- Bad: often causes Seq Scan\nWHERE status = 'pending' OR status = 'processing'\n-- Good: IN uses index\nWHERE status IN ('pending', 'processing')\n\n5. Pagination with OFFSET:\n\n-- Bad: OFFSET 10000 scans and discards 10000 rows\nSELECT * FROM products ORDER BY id LIMIT 20 OFFSET 10000;\n-- Good: keyset pagination\nSELECT * FROM products WHERE id > :last_seen_id ORDER BY id LIMIT 20;\n\n6. COUNT(*) on large tables:\n\n-- Bad: full table scan\nSELECT COUNT(*) FROM events;\n-- Good: approximate count (PostgreSQL)\nSELECT reltuples::bigint FROM pg_class WHERE relname = 'events';\n-- Or maintain a counter cache table"
      },
      {
        "title": "Window Functions Reference",
        "body": "-- Running total\nSELECT id, amount, SUM(amount) OVER (ORDER BY created_at) as running_total FROM payments;\n\n-- Rank within group\nSELECT *, RANK() OVER (PARTITION BY department_id ORDER BY salary DESC) as dept_rank FROM employees;\n\n-- Previous/next row\nSELECT *, LAG(amount) OVER (ORDER BY created_at) as prev_amount,\n          LEAD(amount) OVER (ORDER BY created_at) as next_amount FROM payments;\n\n-- Moving average\nSELECT *, AVG(amount) OVER (ORDER BY created_at ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) as ma_7 FROM daily_sales;\n\n-- Percent of total\nSELECT *, amount / SUM(amount) OVER () * 100 as pct_of_total FROM line_items WHERE order_id = 1;"
      },
      {
        "title": "CTE Patterns",
        "body": "-- Recursive: org chart traversal\nWITH RECURSIVE org AS (\n    SELECT id, name, manager_id, 1 as depth FROM employees WHERE manager_id IS NULL\n    UNION ALL\n    SELECT e.id, e.name, e.manager_id, o.depth + 1\n    FROM employees e JOIN org o ON e.manager_id = o.id\n    WHERE o.depth < 10  -- safety limit\n)\nSELECT * FROM org ORDER BY depth, name;\n\n-- Data pipeline: clean → transform → aggregate\nWITH cleaned AS (\n    SELECT *, TRIM(LOWER(email)) as clean_email FROM raw_signups WHERE email IS NOT NULL\n),\ndeduped AS (\n    SELECT DISTINCT ON (clean_email) * FROM cleaned ORDER BY clean_email, created_at DESC\n)\nSELECT DATE_TRUNC('week', created_at) as week, COUNT(*) FROM deduped GROUP BY 1 ORDER BY 1;"
      },
      {
        "title": "Migration Safety Rules",
        "body": "Never rename columns/tables in production without a multi-step process\nNever add NOT NULL without a DEFAULT on existing tables with data\nNever drop columns that application code still references\nAlways test migrations on a copy of production data first\nAlways have a rollback plan (down migration)\nAlways take a backup before schema changes in production"
      },
      {
        "title": "Safe Migration Patterns",
        "body": "Add column (safe):\n\n-- Step 1: Add nullable column\nALTER TABLE users ADD COLUMN phone VARCHAR(20);\n-- Step 2: Backfill (in batches!)\nUPDATE users SET phone = '' WHERE phone IS NULL AND id BETWEEN 1 AND 10000;\n-- Step 3: Add NOT NULL after backfill\nALTER TABLE users ALTER COLUMN phone SET NOT NULL;\nALTER TABLE users ALTER COLUMN phone SET DEFAULT '';\n\nRename column (safe multi-step):\n\n-- Step 1: Add new column\nALTER TABLE users ADD COLUMN full_name VARCHAR(200);\n-- Step 2: Dual-write in application code (write to both old + new)\n-- Step 3: Backfill\nUPDATE users SET full_name = name WHERE full_name IS NULL;\n-- Step 4: Switch application to read from new column\n-- Step 5: Drop old column (after confirming no reads)\nALTER TABLE users DROP COLUMN name;\n\nAdd index without locking (PostgreSQL):\n\nCREATE INDEX CONCURRENTLY idx_orders_customer ON orders(customer_id);\n-- Takes longer but doesn't lock the table\n\nLarge table backfill (batched):\n\n-- Don't: UPDATE millions of rows in one transaction\n-- Do: batch it\nDO $$\nDECLARE\n    batch_size INT := 5000;\n    affected INT;\nBEGIN\n    LOOP\n        UPDATE users SET normalized_email = LOWER(email)\n        WHERE normalized_email IS NULL AND id IN (\n            SELECT id FROM users WHERE normalized_email IS NULL LIMIT batch_size\n        );\n        GET DIAGNOSTICS affected = ROW_COUNT;\n        RAISE NOTICE 'Updated % rows', affected;\n        EXIT WHEN affected = 0;\n        COMMIT;\n    END LOOP;\nEND $$;"
      },
      {
        "title": "Migration File Template",
        "body": "-- Migration: YYYYMMDDHHMMSS_description.sql\n-- Author: [name]\n-- Ticket: [JIRA/Linear ID]\n-- Risk: low|medium|high\n-- Rollback: see DOWN section\n-- Estimated time: [for production data volume]\n-- Requires: [prerequisite migrations]\n\n-- ========== UP ==========\nBEGIN;\n\n-- [DDL/DML here]\n\nCOMMIT;\n\n-- ========== DOWN ==========\n-- BEGIN;\n-- [Rollback DDL/DML here]\n-- COMMIT;\n\n-- ========== VERIFY ==========\n-- [Queries to confirm migration succeeded]\n-- SELECT COUNT(*) FROM ... WHERE ...;"
      },
      {
        "title": "Key Metrics Dashboard",
        "body": "health_metrics:\n  connections:\n    active: \"SELECT count(*) FROM pg_stat_activity WHERE state = 'active'\"\n    idle: \"SELECT count(*) FROM pg_stat_activity WHERE state = 'idle'\"\n    max: \"SHOW max_connections\"\n    threshold: \"active > 80% of max = ALERT\"\n    \n  cache_hit_ratio:\n    query: |\n      SELECT ROUND(100.0 * sum(heap_blks_hit) / \n             NULLIF(sum(heap_blks_hit) + sum(heap_blks_read), 0), 2) as ratio\n      FROM pg_statio_user_tables\n    healthy: \"> 99%\"\n    warning: \"< 95%\"\n    critical: \"< 90%\"\n    \n  index_hit_ratio:\n    query: |\n      SELECT ROUND(100.0 * sum(idx_blks_hit) / \n             NULLIF(sum(idx_blks_hit) + sum(idx_blks_read), 0), 2) as ratio\n      FROM pg_statio_user_indexes\n    healthy: \"> 99%\"\n    \n  table_bloat:\n    query: |\n      SELECT relname, n_dead_tup, n_live_tup,\n             ROUND(100.0 * n_dead_tup / NULLIF(n_live_tup, 0), 2) as dead_pct\n      FROM pg_stat_user_tables WHERE n_dead_tup > 10000\n      ORDER BY n_dead_tup DESC LIMIT 10\n    action: \"VACUUM ANALYZE {table} when dead_pct > 20%\"\n    \n  slow_queries:\n    query: |\n      SELECT query, calls, mean_exec_time, total_exec_time\n      FROM pg_stat_statements\n      ORDER BY mean_exec_time DESC LIMIT 20\n    action: \"Optimize top 5 by total_exec_time first\"\n    \n  replication_lag:\n    query: |\n      SELECT EXTRACT(EPOCH FROM replay_lag) as lag_seconds\n      FROM pg_stat_replication\n    warning: \"> 5 seconds\"\n    critical: \"> 30 seconds\""
      },
      {
        "title": "Table Size Analysis",
        "body": "SELECT \n    relname as table,\n    pg_size_pretty(pg_total_relation_size(relid)) as total_size,\n    pg_size_pretty(pg_relation_size(relid)) as table_size,\n    pg_size_pretty(pg_total_relation_size(relid) - pg_relation_size(relid)) as index_size,\n    n_live_tup as row_count\nFROM pg_stat_user_tables\nORDER BY pg_total_relation_size(relid) DESC\nLIMIT 20;"
      },
      {
        "title": "Lock Monitoring",
        "body": "-- Find blocking queries\nSELECT \n    blocked.pid as blocked_pid,\n    blocked.query as blocked_query,\n    blocking.pid as blocking_pid,\n    blocking.query as blocking_query,\n    NOW() - blocked.query_start as blocked_duration\nFROM pg_stat_activity blocked\nJOIN pg_locks bl ON bl.pid = blocked.pid\nJOIN pg_locks kl ON kl.locktype = bl.locktype AND kl.relation = bl.relation AND kl.pid != bl.pid\nJOIN pg_stat_activity blocking ON blocking.pid = kl.pid\nWHERE NOT bl.granted;"
      },
      {
        "title": "Backup Strategy Decision",
        "body": "MethodRPOSpeedUse Whenpg_dump (logical)Point-in-timeSlow for >50GBSmall-medium DBs, cross-version migrationpg_basebackup (physical)Continuous (with WAL)FastLarge DBs, same-version restoreWAL archiving (PITR)SecondsN/A (continuous)Production with near-zero RPOReplica promotionSecondsInstantHA failover"
      },
      {
        "title": "Backup Commands",
        "body": "# Logical backup (compressed)\npg_dump -Fc -Z 9 -j 4 -d mydb -f backup_$(date +%Y%m%d_%H%M%S).dump\n\n# Restore\npg_restore -d mydb -j 4 --clean --if-exists backup_20260216.dump\n\n# Schema only\npg_dump -s -d mydb -f schema.sql\n\n# Single table\npg_dump -t orders -d mydb -f orders_backup.dump\n\n# Physical backup\npg_basebackup -D /backup/base -Ft -z -P -X stream"
      },
      {
        "title": "Backup Verification Checklist",
        "body": "Backup completes without errors\n Backup file size is within expected range (not suspiciously small)\n Restore to a test database succeeds\n Row counts match production (spot check 5 tables)\n Application can connect and query the restored database\n Run automated test suite against restored backup\n Backup encryption verified (if required)\n Offsite copy confirmed"
      },
      {
        "title": "Access Control Checklist",
        "body": "-- Create application role (least privilege)\nCREATE ROLE app_user LOGIN PASSWORD 'use-vault-not-plaintext';\nGRANT CONNECT ON DATABASE mydb TO app_user;\nGRANT USAGE ON SCHEMA public TO app_user;\nGRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO app_user;\n-- NO: GRANT ALL, superuser, CREATE, DROP\n\n-- Read-only role for analytics\nCREATE ROLE analyst LOGIN PASSWORD 'use-vault';\nGRANT CONNECT ON DATABASE mydb TO analyst;\nGRANT USAGE ON SCHEMA public TO analyst;\nGRANT SELECT ON ALL TABLES IN SCHEMA public TO analyst;\n\n-- Row-Level Security (multi-tenant)\nALTER TABLE orders ENABLE ROW LEVEL SECURITY;\nCREATE POLICY tenant_isolation ON orders\n    USING (tenant_id = current_setting('app.tenant_id')::bigint);"
      },
      {
        "title": "SQL Injection Prevention",
        "body": "RULE 1: NEVER concatenate user input into SQL strings\nRULE 2: Always use parameterized queries / prepared statements\nRULE 3: Validate and whitelist table/column names if dynamic\nRULE 4: Use ORMs for CRUD, raw SQL only for complex queries\nRULE 5: Audit logs for unusual query patterns (UNION, DROP, --)"
      },
      {
        "title": "Data Protection",
        "body": "-- Encrypt sensitive columns (application-level)\n-- Store: pgp_sym_encrypt(data, key) \n-- Read: pgp_sym_decrypt(encrypted_col, key)\n\n-- Audit trail table\nCREATE TABLE audit_log (\n    id BIGSERIAL PRIMARY KEY,\n    table_name VARCHAR(100) NOT NULL,\n    record_id BIGINT NOT NULL,\n    action VARCHAR(10) NOT NULL, -- INSERT, UPDATE, DELETE\n    old_data JSONB,\n    new_data JSONB,\n    changed_by BIGINT REFERENCES users(id),\n    changed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),\n    ip_address INET\n);\n\n-- Generic audit trigger\nCREATE OR REPLACE FUNCTION audit_trigger() RETURNS TRIGGER AS $$\nBEGIN\n    INSERT INTO audit_log (table_name, record_id, action, old_data, new_data, changed_by)\n    VALUES (\n        TG_TABLE_NAME,\n        COALESCE(NEW.id, OLD.id),\n        TG_OP,\n        CASE WHEN TG_OP != 'INSERT' THEN to_jsonb(OLD) END,\n        CASE WHEN TG_OP != 'DELETE' THEN to_jsonb(NEW) END,\n        current_setting('app.user_id', true)::bigint\n    );\n    RETURN COALESCE(NEW, OLD);\nEND;\n$$ LANGUAGE plpgsql;"
      },
      {
        "title": "Essential Settings by Server Size",
        "body": "SettingSmall (4GB RAM)Medium (16GB)Large (64GB+)shared_buffers1GB4GB16GBeffective_cache_size3GB12GB48GBwork_mem16MB64MB256MBmaintenance_work_mem256MB1GB2GBmax_connections100200300wal_buffers64MB128MB256MBrandom_page_cost1.1 (SSD)1.1 (SSD)1.1 (SSD)effective_io_concurrency200 (SSD)200 (SSD)200 (SSD)max_parallel_workers_per_gather248"
      },
      {
        "title": "Connection Pooling (PgBouncer)",
        "body": "[databases]\nmydb = host=127.0.0.1 port=5432 dbname=mydb\n\n[pgbouncer]\npool_mode = transaction          # transaction pooling (best for most apps)\nmax_client_conn = 1000           # accept up to 1000 app connections\ndefault_pool_size = 25           # 25 actual DB connections per database\nreserve_pool_size = 5            # extra connections for burst\nreserve_pool_timeout = 3         # seconds before using reserve\nserver_idle_timeout = 300        # close idle server connections after 5 min"
      },
      {
        "title": "Soft Delete",
        "body": "-- Add to table\nALTER TABLE users ADD COLUMN deleted_at TIMESTAMPTZ;\nCREATE INDEX idx_users_active ON users(id) WHERE deleted_at IS NULL;\n\n-- Application queries always filter\nSELECT * FROM users WHERE deleted_at IS NULL AND ...;\n\n-- Or use a view\nCREATE VIEW active_users AS SELECT * FROM users WHERE deleted_at IS NULL;"
      },
      {
        "title": "Optimistic Locking",
        "body": "UPDATE products SET \n    price = 29.99, \n    version = version + 1, \n    updated_at = NOW()\nWHERE id = 123 AND version = 5;  -- expected version\n-- If 0 rows affected → concurrent modification → retry or error"
      },
      {
        "title": "Event Sourcing Table",
        "body": "CREATE TABLE events (\n    id BIGSERIAL PRIMARY KEY,\n    aggregate_type VARCHAR(50) NOT NULL,\n    aggregate_id UUID NOT NULL,\n    event_type VARCHAR(100) NOT NULL,\n    event_data JSONB NOT NULL,\n    metadata JSONB DEFAULT '{}',\n    version INTEGER NOT NULL,\n    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),\n    UNIQUE (aggregate_id, version)\n);\nCREATE INDEX idx_events_aggregate ON events(aggregate_id, version);\nCREATE INDEX idx_events_type ON events(event_type, created_at);"
      },
      {
        "title": "Time-Series Optimization",
        "body": "-- Partitioned by month\nCREATE TABLE metrics (\n    id BIGSERIAL,\n    sensor_id INTEGER NOT NULL,\n    value NUMERIC(12,4) NOT NULL,\n    recorded_at TIMESTAMPTZ NOT NULL\n) PARTITION BY RANGE (recorded_at);\n\nCREATE TABLE metrics_2026_01 PARTITION OF metrics\n    FOR VALUES FROM ('2026-01-01') TO ('2026-02-01');\nCREATE TABLE metrics_2026_02 PARTITION OF metrics\n    FOR VALUES FROM ('2026-02-01') TO ('2026-03-01');\n\n-- Auto-create future partitions via cron or pg_partman\n-- Use BRIN index for time-series\nCREATE INDEX idx_metrics_time ON metrics USING brin(recorded_at);"
      },
      {
        "title": "Full-Text Search (PostgreSQL)",
        "body": "-- Add search column\nALTER TABLE articles ADD COLUMN search_vector tsvector;\nCREATE INDEX idx_articles_search ON articles USING gin(search_vector);\n\n-- Populate\nUPDATE articles SET search_vector = \n    setweight(to_tsvector('english', COALESCE(title, '')), 'A') ||\n    setweight(to_tsvector('english', COALESCE(body, '')), 'B');\n\n-- Search with ranking\nSELECT id, title, ts_rank(search_vector, query) as rank\nFROM articles, plainto_tsquery('english', 'database optimization') query\nWHERE search_vector @@ query\nORDER BY rank DESC LIMIT 20;"
      },
      {
        "title": "JSONB Patterns",
        "body": "-- Store flexible attributes\nCREATE TABLE products (\n    id BIGSERIAL PRIMARY KEY,\n    name VARCHAR(200) NOT NULL,\n    attributes JSONB NOT NULL DEFAULT '{}',\n    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()\n);\n\n-- Index specific JSON paths\nCREATE INDEX idx_products_color ON products((attributes->>'color'));\n-- Or GIN for any key lookups\nCREATE INDEX idx_products_attrs ON products USING gin(attributes);\n\n-- Query patterns\nSELECT * FROM products WHERE attributes->>'color' = 'red';\nSELECT * FROM products WHERE attributes @> '{\"size\": \"large\"}';\nSELECT * FROM products WHERE attributes ? 'warranty';"
      },
      {
        "title": "Emergency: Database Overloaded",
        "body": "-- 1. Find and kill long-running queries\nSELECT pid, NOW() - query_start as duration, query \nFROM pg_stat_activity WHERE state = 'active' AND query_start < NOW() - INTERVAL '5 minutes'\nORDER BY duration DESC;\n\n-- Kill a specific query\nSELECT pg_cancel_backend(pid);    -- graceful\nSELECT pg_terminate_backend(pid); -- force\n\n-- 2. Check for lock contention (see Phase 5)\n\n-- 3. Reduce max connections temporarily\n-- In pgbouncer: pause database, reduce pool, resume\n\n-- 4. Check if VACUUM is needed\nSELECT relname, n_dead_tup, last_autovacuum FROM pg_stat_user_tables \nWHERE n_dead_tup > 100000 ORDER BY n_dead_tup DESC;"
      },
      {
        "title": "Emergency: Disk Full",
        "body": "# 1. Check what's consuming space\ndu -sh /var/lib/postgresql/*/main/ 2>/dev/null || du -sh /var/lib/mysql/\n\n# 2. Clean up WAL files (PostgreSQL) — CAREFUL\n# Check replication slot status first\nSELECT slot_name, active FROM pg_replication_slots;\n# Drop inactive slots consuming WAL\nSELECT pg_drop_replication_slot('unused_slot');\n\n# 3. VACUUM FULL largest tables (locks table!)\nVACUUM FULL large_table;\n\n# 4. Remove old backups / logs\nfind /backups -name \"*.dump\" -mtime +7 -delete"
      },
      {
        "title": "Weekly Maintenance Checklist",
        "body": "Review slow query log (top 10 by total time)\n Check index usage stats — drop unused, add missing\n Verify backup success and test restore\n Check table bloat — schedule VACUUM where needed\n Review connection count trends\n Check disk space trajectory\n Review replication lag\n Update table statistics: ANALYZE;"
      },
      {
        "title": "Phase 11 — Database Comparison Quick Reference",
        "body": "FeaturePostgreSQLMySQL (InnoDB)SQLiteBest forComplex queries, extensionsWeb apps, read-heavyEmbedded, dev, small appsMax sizeUnlimited (practical)Unlimited (practical)281 TB (practical ~1TB)JSON supportJSONB (indexable, fast)JSON (limited indexing)JSON1 extensionFull-text searchBuilt-in (tsvector)Built-in (FULLTEXT)FTS5 extensionWindow functionsFull supportFull support (8.0+)Full support (3.25+)CTEsRecursive + materializedRecursive (8.0+)Recursive (3.8+)PartitioningDeclarative + list/range/hashRange/list/hash/keyNoneRow-level securityYesNo (use views)NoReplicationStreaming + logicalBinary logNone (use Litestream)Connection modelProcess per connectionThread per connectionIn-process"
      },
      {
        "title": "Quality Scoring Rubric (0-100)",
        "body": "DimensionWeight0 (Poor)5 (Good)10 (Excellent)Schema Design20%No normalization, no constraints3NF, FKs, proper typesOptimal normal form, all constraints, audit fieldsIndexing15%No indexes beyond PKIndexes on FKs and common queriesCovering indexes, partials, no unused indexesQuery Quality20%SELECT *, N+1, no EXPLAINSpecific columns, JOINs, basic optimizationKeyset pagination, window functions, optimized plansMigration Safety10%Raw DDL, no rollbackVersioned files, up/downZero-downtime, batched backfills, concurrent indexesSecurity15%Superuser access, no auditLeast privilege, parameterized queriesRLS, encryption, audit triggers, regular access reviewMonitoring10%No monitoringBasic alerts on connections/diskFull dashboard, slow query analysis, proactive tuningBackup/Recovery10%No backupsDaily dumpsPITR, tested restores, offsite copies\n\nScore interpretation: <40 = Critical risk | 40-60 = Needs work | 60-80 = Solid | 80-90 = Professional | 90+ = Expert"
      },
      {
        "title": "Natural Language Commands",
        "body": "\"Design a schema for [domain]\" → Phase 1 full design process\n\"Optimize this query: [SQL]\" → EXPLAIN analysis + rewrite\n\"Add an index for [query pattern]\" → Index type selection + creation\n\"Write a migration to [change]\" → Safe migration with rollback\n\"Audit this database\" → Full scoring across all dimensions\n\"Set up monitoring for [database]\" → Phase 5 dashboard queries\n\"Review this schema\" → Naming, types, constraints, relationships check\n\"Help me with [PostgreSQL/MySQL/SQLite] [topic]\" → Platform-specific guidance\n\"Troubleshoot slow queries\" → pg_stat_statements analysis + top fixes\n\"Plan a backup strategy\" → Phase 6 decision framework\n\"Make this table multi-tenant\" → RLS + tenant_id pattern\n\"Convert this to use partitioning\" → Phase 9 time-series pattern"
      }
    ],
    "body": "Database Engineering Mastery\n\nComplete database design, optimization, migration, and operations system. From schema design to production monitoring — covers PostgreSQL, MySQL, SQLite, and general SQL patterns.\n\nPhase 1 — Schema Design\nDesign Brief\n\nBefore writing any DDL, fill this out:\n\nproject: \"\"\ndomain: \"\"\nprimary_use_case: \"OLTP | OLAP | mixed\"\nexpected_scale:\n  rows_year_1: \"\"\n  rows_year_3: \"\"\n  concurrent_users: \"\"\n  read_write_ratio: \"80:20 | 50:50 | 20:80\"\ncompliance: [] # GDPR, HIPAA, PCI-DSS, SOX\nmulti_tenancy: \"none | schema-per-tenant | row-level | database-per-tenant\"\n\nNormalization Decision Framework\nForm\tRule\tWhen to Denormalize\n1NF\tNo repeating groups, atomic values\tNever skip\n2NF\tNo partial dependencies on composite keys\tNever skip\n3NF\tNo transitive dependencies\tReporting tables, read-heavy aggregations\nBCNF\tEvery determinant is a candidate key\tRarely needed unless complex key relationships\n\nDenormalization triggers:\n\nQuery joins > 4 tables consistently\nRead latency > 100ms on indexed queries\nCache invalidation complexity exceeds denormalization maintenance\nReporting queries block OLTP workloads\nNaming Conventions\nTables:      snake_case, plural (users, order_items, payment_methods)\nColumns:     snake_case, singular (first_name, created_at, is_active)\nPKs:         id (bigint/uuid) or {table_singular}_id\nFKs:         {referenced_table_singular}_id\nIndexes:     idx_{table}_{columns}\nConstraints: chk_{table}_{rule}, uq_{table}_{columns}, fk_{table}_{ref}\nEnums:       Use VARCHAR + CHECK, not DB enums (easier to migrate)\nBooleans:    is_, has_, can_ prefix (is_active, has_subscription)\nTimestamps:  _at suffix (created_at, updated_at, deleted_at)\n\nColumn Type Decision Tree\nText < 255 chars, fixed set?     → VARCHAR(N) + CHECK\nText < 255 chars, variable?      → VARCHAR(255)\nText > 255 chars?                → TEXT\nWhole numbers < 2B?              → INTEGER\nWhole numbers > 2B?              → BIGINT\nMoney/financial?                 → NUMERIC(precision, scale) — NEVER float\nTrue/false?                      → BOOLEAN\nDate only?                       → DATE\nDate + time?                     → TIMESTAMPTZ (always with timezone)\nUnique identifier?               → UUID (distributed) or BIGSERIAL (single DB)\nJSON/flexible schema?            → JSONB (Postgres) or JSON (MySQL)\nBinary/file?                     → Store in object storage, reference by URL\nIP address?                      → INET (Postgres) or VARCHAR(45)\nGeospatial?                      → PostGIS geometry/geography types\n\nEssential Table Template\nCREATE TABLE {table_name} (\n    id          BIGSERIAL PRIMARY KEY,\n    -- domain columns here --\n    created_at  TIMESTAMPTZ NOT NULL DEFAULT NOW(),\n    updated_at  TIMESTAMPTZ NOT NULL DEFAULT NOW(),\n    created_by  BIGINT REFERENCES users(id),\n    version     INTEGER NOT NULL DEFAULT 1,  -- optimistic locking\n    \n    -- soft delete (optional)\n    deleted_at  TIMESTAMPTZ,\n    \n    -- multi-tenant (optional)  \n    tenant_id   BIGINT NOT NULL REFERENCES tenants(id)\n);\n\n-- Updated_at trigger (PostgreSQL)\nCREATE OR REPLACE FUNCTION update_modified_column()\nRETURNS TRIGGER AS $$\nBEGIN\n    NEW.updated_at = NOW();\n    NEW.version = OLD.version + 1;\n    RETURN NEW;\nEND;\n$$ LANGUAGE plpgsql;\n\nCREATE TRIGGER trg_{table_name}_updated\n    BEFORE UPDATE ON {table_name}\n    FOR EACH ROW\n    EXECUTE FUNCTION update_modified_column();\n\nRelationship Patterns\n\nOne-to-Many:\n\n-- Parent\nCREATE TABLE departments (id BIGSERIAL PRIMARY KEY, name VARCHAR(100) NOT NULL);\n-- Child  \nCREATE TABLE employees (\n    id BIGSERIAL PRIMARY KEY,\n    department_id BIGINT NOT NULL REFERENCES departments(id) ON DELETE RESTRICT,\n    -- ON DELETE options: RESTRICT (safe default), CASCADE (children die), SET NULL\n);\nCREATE INDEX idx_employees_department_id ON employees(department_id);\n\n\nMany-to-Many:\n\nCREATE TABLE user_roles (\n    user_id BIGINT NOT NULL REFERENCES users(id) ON DELETE CASCADE,\n    role_id BIGINT NOT NULL REFERENCES roles(id) ON DELETE CASCADE,\n    granted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),\n    granted_by BIGINT REFERENCES users(id),\n    PRIMARY KEY (user_id, role_id)\n);\n\n\nSelf-Referencing (hierarchy):\n\nCREATE TABLE categories (\n    id BIGSERIAL PRIMARY KEY,\n    parent_id BIGINT REFERENCES categories(id) ON DELETE CASCADE,\n    name VARCHAR(100) NOT NULL,\n    depth INTEGER NOT NULL DEFAULT 0,\n    path TEXT NOT NULL DEFAULT ''  -- materialized path: '/1/5/12/'\n);\nCREATE INDEX idx_categories_parent ON categories(parent_id);\nCREATE INDEX idx_categories_path ON categories(path text_pattern_ops);\n\n\nPolymorphic (avoid if possible, use if you must):\n\n-- Preferred: separate FKs\nCREATE TABLE comments (\n    id BIGSERIAL PRIMARY KEY,\n    post_id BIGINT REFERENCES posts(id),\n    ticket_id BIGINT REFERENCES tickets(id),\n    body TEXT NOT NULL,\n    CONSTRAINT chk_one_parent CHECK (\n        (post_id IS NOT NULL)::int + (ticket_id IS NOT NULL)::int = 1\n    )\n);\n\nPhase 2 — Indexing Strategy\nIndex Type Selection\nIndex Type\tUse When\tExample\nB-tree (default)\tEquality, range, sorting, LIKE 'prefix%'\tCREATE INDEX idx_users_email ON users(email)\nHash\tEquality only, no range\tCREATE INDEX idx_sessions_token ON sessions USING hash(token)\nGIN\tJSONB, full-text search, arrays, tsvector\tCREATE INDEX idx_products_tags ON products USING gin(tags)\nGiST\tGeospatial, range types, nearest-neighbor\tCREATE INDEX idx_locations_geom ON locations USING gist(geom)\nBRIN\tVery large tables with natural ordering (time-series)\tCREATE INDEX idx_events_created ON events USING brin(created_at)\nPartial\tSubset of rows\tCREATE INDEX idx_orders_pending ON orders(created_at) WHERE status = 'pending'\nCovering\tInclude columns to avoid table lookup\tCREATE INDEX idx_orders_user ON orders(user_id) INCLUDE (status, total)\nIndexing Rules\nAlways index: Foreign keys, columns in WHERE/JOIN/ORDER BY\nNever index: Low-cardinality columns alone (boolean, status with 3 values) — combine in composite\nComposite order: Most selective column first, then left-to-right matches query patterns\nWatch write overhead: Each index slows INSERT/UPDATE. >8 indexes on a write-heavy table = review\nUnused index audit: Run monthly — drop indexes with 0 scans\nFind Unused Indexes (PostgreSQL)\nSELECT schemaname, tablename, indexname, idx_scan, \n       pg_size_pretty(pg_relation_size(indexrelid)) as size\nFROM pg_stat_user_indexes\nWHERE idx_scan = 0 AND indexrelid NOT IN (\n    SELECT conindid FROM pg_constraint WHERE contype IN ('p', 'u')\n)\nORDER BY pg_relation_size(indexrelid) DESC;\n\nFind Missing Indexes (PostgreSQL)\nSELECT relname, seq_scan, seq_tup_read, \n       idx_scan, seq_tup_read / GREATEST(seq_scan, 1) as avg_tuples_per_scan\nFROM pg_stat_user_tables\nWHERE seq_scan > 100 AND seq_tup_read > 10000\nORDER BY seq_tup_read DESC;\n-- High seq_scan + high seq_tup_read = missing index candidate\n\nPhase 3 — Query Optimization\nEXPLAIN Interpretation\nEXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT ...;\n\n\nRed flags in query plans:\n\nPattern\tProblem\tFix\nSeq Scan on large table\tMissing index\tAdd appropriate index\nNested Loop with large outer\tO(n×m) join\tAdd index on join column, consider Hash Join\nSort with high cost\tMissing index for ORDER BY\tAdd index matching sort order\nHash Join spilling to disk\twork_mem too low\tIncrease work_mem or reduce result set\nBitmap Heap Scan with many recheck\tLow selectivity index\tMore selective index or partial index\nSubPlan (correlated subquery)\tExecutes per row\tRewrite as JOIN or lateral\nRows estimate wildly wrong\tStale statistics\tANALYZE table\nQuery Anti-Patterns & Fixes\n\n1. SELECT * in production:\n\n-- Bad: fetches all columns, breaks covering indexes\nSELECT * FROM orders WHERE user_id = 123;\n-- Good: explicit columns\nSELECT id, status, total, created_at FROM orders WHERE user_id = 123;\n\n\n2. N+1 queries:\n\n-- Bad: 1 query for users + N queries for orders\nSELECT id FROM users WHERE active = true;  -- returns 100 rows\nSELECT * FROM orders WHERE user_id = ?;     -- called 100 times\n\n-- Good: single JOIN or IN\nSELECT u.id, o.id, o.total \nFROM users u\nJOIN orders o ON o.user_id = u.id\nWHERE u.active = true;\n\n\n3. Functions on indexed columns:\n\n-- Bad: can't use index on created_at\nWHERE EXTRACT(YEAR FROM created_at) = 2025\n-- Good: range scan uses index\nWHERE created_at >= '2025-01-01' AND created_at < '2026-01-01'\n\n-- Bad: can't use index on email  \nWHERE LOWER(email) = 'user@example.com'\n-- Good: expression index\nCREATE INDEX idx_users_email_lower ON users(LOWER(email));\n\n\n4. OR conditions killing indexes:\n\n-- Bad: often causes Seq Scan\nWHERE status = 'pending' OR status = 'processing'\n-- Good: IN uses index\nWHERE status IN ('pending', 'processing')\n\n\n5. Pagination with OFFSET:\n\n-- Bad: OFFSET 10000 scans and discards 10000 rows\nSELECT * FROM products ORDER BY id LIMIT 20 OFFSET 10000;\n-- Good: keyset pagination\nSELECT * FROM products WHERE id > :last_seen_id ORDER BY id LIMIT 20;\n\n\n6. COUNT(*) on large tables:\n\n-- Bad: full table scan\nSELECT COUNT(*) FROM events;\n-- Good: approximate count (PostgreSQL)\nSELECT reltuples::bigint FROM pg_class WHERE relname = 'events';\n-- Or maintain a counter cache table\n\nWindow Functions Reference\n-- Running total\nSELECT id, amount, SUM(amount) OVER (ORDER BY created_at) as running_total FROM payments;\n\n-- Rank within group\nSELECT *, RANK() OVER (PARTITION BY department_id ORDER BY salary DESC) as dept_rank FROM employees;\n\n-- Previous/next row\nSELECT *, LAG(amount) OVER (ORDER BY created_at) as prev_amount,\n          LEAD(amount) OVER (ORDER BY created_at) as next_amount FROM payments;\n\n-- Moving average\nSELECT *, AVG(amount) OVER (ORDER BY created_at ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) as ma_7 FROM daily_sales;\n\n-- Percent of total\nSELECT *, amount / SUM(amount) OVER () * 100 as pct_of_total FROM line_items WHERE order_id = 1;\n\nCTE Patterns\n-- Recursive: org chart traversal\nWITH RECURSIVE org AS (\n    SELECT id, name, manager_id, 1 as depth FROM employees WHERE manager_id IS NULL\n    UNION ALL\n    SELECT e.id, e.name, e.manager_id, o.depth + 1\n    FROM employees e JOIN org o ON e.manager_id = o.id\n    WHERE o.depth < 10  -- safety limit\n)\nSELECT * FROM org ORDER BY depth, name;\n\n-- Data pipeline: clean → transform → aggregate\nWITH cleaned AS (\n    SELECT *, TRIM(LOWER(email)) as clean_email FROM raw_signups WHERE email IS NOT NULL\n),\ndeduped AS (\n    SELECT DISTINCT ON (clean_email) * FROM cleaned ORDER BY clean_email, created_at DESC\n)\nSELECT DATE_TRUNC('week', created_at) as week, COUNT(*) FROM deduped GROUP BY 1 ORDER BY 1;\n\nPhase 4 — Migrations\nMigration Safety Rules\nNever rename columns/tables in production without a multi-step process\nNever add NOT NULL without a DEFAULT on existing tables with data\nNever drop columns that application code still references\nAlways test migrations on a copy of production data first\nAlways have a rollback plan (down migration)\nAlways take a backup before schema changes in production\nSafe Migration Patterns\n\nAdd column (safe):\n\n-- Step 1: Add nullable column\nALTER TABLE users ADD COLUMN phone VARCHAR(20);\n-- Step 2: Backfill (in batches!)\nUPDATE users SET phone = '' WHERE phone IS NULL AND id BETWEEN 1 AND 10000;\n-- Step 3: Add NOT NULL after backfill\nALTER TABLE users ALTER COLUMN phone SET NOT NULL;\nALTER TABLE users ALTER COLUMN phone SET DEFAULT '';\n\n\nRename column (safe multi-step):\n\n-- Step 1: Add new column\nALTER TABLE users ADD COLUMN full_name VARCHAR(200);\n-- Step 2: Dual-write in application code (write to both old + new)\n-- Step 3: Backfill\nUPDATE users SET full_name = name WHERE full_name IS NULL;\n-- Step 4: Switch application to read from new column\n-- Step 5: Drop old column (after confirming no reads)\nALTER TABLE users DROP COLUMN name;\n\n\nAdd index without locking (PostgreSQL):\n\nCREATE INDEX CONCURRENTLY idx_orders_customer ON orders(customer_id);\n-- Takes longer but doesn't lock the table\n\n\nLarge table backfill (batched):\n\n-- Don't: UPDATE millions of rows in one transaction\n-- Do: batch it\nDO $$\nDECLARE\n    batch_size INT := 5000;\n    affected INT;\nBEGIN\n    LOOP\n        UPDATE users SET normalized_email = LOWER(email)\n        WHERE normalized_email IS NULL AND id IN (\n            SELECT id FROM users WHERE normalized_email IS NULL LIMIT batch_size\n        );\n        GET DIAGNOSTICS affected = ROW_COUNT;\n        RAISE NOTICE 'Updated % rows', affected;\n        EXIT WHEN affected = 0;\n        COMMIT;\n    END LOOP;\nEND $$;\n\nMigration File Template\n-- Migration: YYYYMMDDHHMMSS_description.sql\n-- Author: [name]\n-- Ticket: [JIRA/Linear ID]\n-- Risk: low|medium|high\n-- Rollback: see DOWN section\n-- Estimated time: [for production data volume]\n-- Requires: [prerequisite migrations]\n\n-- ========== UP ==========\nBEGIN;\n\n-- [DDL/DML here]\n\nCOMMIT;\n\n-- ========== DOWN ==========\n-- BEGIN;\n-- [Rollback DDL/DML here]\n-- COMMIT;\n\n-- ========== VERIFY ==========\n-- [Queries to confirm migration succeeded]\n-- SELECT COUNT(*) FROM ... WHERE ...;\n\nPhase 5 — Performance Monitoring\nKey Metrics Dashboard\nhealth_metrics:\n  connections:\n    active: \"SELECT count(*) FROM pg_stat_activity WHERE state = 'active'\"\n    idle: \"SELECT count(*) FROM pg_stat_activity WHERE state = 'idle'\"\n    max: \"SHOW max_connections\"\n    threshold: \"active > 80% of max = ALERT\"\n    \n  cache_hit_ratio:\n    query: |\n      SELECT ROUND(100.0 * sum(heap_blks_hit) / \n             NULLIF(sum(heap_blks_hit) + sum(heap_blks_read), 0), 2) as ratio\n      FROM pg_statio_user_tables\n    healthy: \"> 99%\"\n    warning: \"< 95%\"\n    critical: \"< 90%\"\n    \n  index_hit_ratio:\n    query: |\n      SELECT ROUND(100.0 * sum(idx_blks_hit) / \n             NULLIF(sum(idx_blks_hit) + sum(idx_blks_read), 0), 2) as ratio\n      FROM pg_statio_user_indexes\n    healthy: \"> 99%\"\n    \n  table_bloat:\n    query: |\n      SELECT relname, n_dead_tup, n_live_tup,\n             ROUND(100.0 * n_dead_tup / NULLIF(n_live_tup, 0), 2) as dead_pct\n      FROM pg_stat_user_tables WHERE n_dead_tup > 10000\n      ORDER BY n_dead_tup DESC LIMIT 10\n    action: \"VACUUM ANALYZE {table} when dead_pct > 20%\"\n    \n  slow_queries:\n    query: |\n      SELECT query, calls, mean_exec_time, total_exec_time\n      FROM pg_stat_statements\n      ORDER BY mean_exec_time DESC LIMIT 20\n    action: \"Optimize top 5 by total_exec_time first\"\n    \n  replication_lag:\n    query: |\n      SELECT EXTRACT(EPOCH FROM replay_lag) as lag_seconds\n      FROM pg_stat_replication\n    warning: \"> 5 seconds\"\n    critical: \"> 30 seconds\"\n\nTable Size Analysis\nSELECT \n    relname as table,\n    pg_size_pretty(pg_total_relation_size(relid)) as total_size,\n    pg_size_pretty(pg_relation_size(relid)) as table_size,\n    pg_size_pretty(pg_total_relation_size(relid) - pg_relation_size(relid)) as index_size,\n    n_live_tup as row_count\nFROM pg_stat_user_tables\nORDER BY pg_total_relation_size(relid) DESC\nLIMIT 20;\n\nLock Monitoring\n-- Find blocking queries\nSELECT \n    blocked.pid as blocked_pid,\n    blocked.query as blocked_query,\n    blocking.pid as blocking_pid,\n    blocking.query as blocking_query,\n    NOW() - blocked.query_start as blocked_duration\nFROM pg_stat_activity blocked\nJOIN pg_locks bl ON bl.pid = blocked.pid\nJOIN pg_locks kl ON kl.locktype = bl.locktype AND kl.relation = bl.relation AND kl.pid != bl.pid\nJOIN pg_stat_activity blocking ON blocking.pid = kl.pid\nWHERE NOT bl.granted;\n\nPhase 6 — Backup & Recovery\nBackup Strategy Decision\nMethod\tRPO\tSpeed\tUse When\npg_dump (logical)\tPoint-in-time\tSlow for >50GB\tSmall-medium DBs, cross-version migration\npg_basebackup (physical)\tContinuous (with WAL)\tFast\tLarge DBs, same-version restore\nWAL archiving (PITR)\tSeconds\tN/A (continuous)\tProduction with near-zero RPO\nReplica promotion\tSeconds\tInstant\tHA failover\nBackup Commands\n# Logical backup (compressed)\npg_dump -Fc -Z 9 -j 4 -d mydb -f backup_$(date +%Y%m%d_%H%M%S).dump\n\n# Restore\npg_restore -d mydb -j 4 --clean --if-exists backup_20260216.dump\n\n# Schema only\npg_dump -s -d mydb -f schema.sql\n\n# Single table\npg_dump -t orders -d mydb -f orders_backup.dump\n\n# Physical backup\npg_basebackup -D /backup/base -Ft -z -P -X stream\n\nBackup Verification Checklist\n Backup completes without errors\n Backup file size is within expected range (not suspiciously small)\n Restore to a test database succeeds\n Row counts match production (spot check 5 tables)\n Application can connect and query the restored database\n Run automated test suite against restored backup\n Backup encryption verified (if required)\n Offsite copy confirmed\nPhase 7 — Security\nAccess Control Checklist\n-- Create application role (least privilege)\nCREATE ROLE app_user LOGIN PASSWORD 'use-vault-not-plaintext';\nGRANT CONNECT ON DATABASE mydb TO app_user;\nGRANT USAGE ON SCHEMA public TO app_user;\nGRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO app_user;\n-- NO: GRANT ALL, superuser, CREATE, DROP\n\n-- Read-only role for analytics\nCREATE ROLE analyst LOGIN PASSWORD 'use-vault';\nGRANT CONNECT ON DATABASE mydb TO analyst;\nGRANT USAGE ON SCHEMA public TO analyst;\nGRANT SELECT ON ALL TABLES IN SCHEMA public TO analyst;\n\n-- Row-Level Security (multi-tenant)\nALTER TABLE orders ENABLE ROW LEVEL SECURITY;\nCREATE POLICY tenant_isolation ON orders\n    USING (tenant_id = current_setting('app.tenant_id')::bigint);\n\nSQL Injection Prevention\nRULE 1: NEVER concatenate user input into SQL strings\nRULE 2: Always use parameterized queries / prepared statements\nRULE 3: Validate and whitelist table/column names if dynamic\nRULE 4: Use ORMs for CRUD, raw SQL only for complex queries\nRULE 5: Audit logs for unusual query patterns (UNION, DROP, --)\n\nData Protection\n-- Encrypt sensitive columns (application-level)\n-- Store: pgp_sym_encrypt(data, key) \n-- Read: pgp_sym_decrypt(encrypted_col, key)\n\n-- Audit trail table\nCREATE TABLE audit_log (\n    id BIGSERIAL PRIMARY KEY,\n    table_name VARCHAR(100) NOT NULL,\n    record_id BIGINT NOT NULL,\n    action VARCHAR(10) NOT NULL, -- INSERT, UPDATE, DELETE\n    old_data JSONB,\n    new_data JSONB,\n    changed_by BIGINT REFERENCES users(id),\n    changed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),\n    ip_address INET\n);\n\n-- Generic audit trigger\nCREATE OR REPLACE FUNCTION audit_trigger() RETURNS TRIGGER AS $$\nBEGIN\n    INSERT INTO audit_log (table_name, record_id, action, old_data, new_data, changed_by)\n    VALUES (\n        TG_TABLE_NAME,\n        COALESCE(NEW.id, OLD.id),\n        TG_OP,\n        CASE WHEN TG_OP != 'INSERT' THEN to_jsonb(OLD) END,\n        CASE WHEN TG_OP != 'DELETE' THEN to_jsonb(NEW) END,\n        current_setting('app.user_id', true)::bigint\n    );\n    RETURN COALESCE(NEW, OLD);\nEND;\n$$ LANGUAGE plpgsql;\n\nPhase 8 — PostgreSQL Configuration Tuning\nEssential Settings by Server Size\nSetting\tSmall (4GB RAM)\tMedium (16GB)\tLarge (64GB+)\nshared_buffers\t1GB\t4GB\t16GB\neffective_cache_size\t3GB\t12GB\t48GB\nwork_mem\t16MB\t64MB\t256MB\nmaintenance_work_mem\t256MB\t1GB\t2GB\nmax_connections\t100\t200\t300\nwal_buffers\t64MB\t128MB\t256MB\nrandom_page_cost\t1.1 (SSD)\t1.1 (SSD)\t1.1 (SSD)\neffective_io_concurrency\t200 (SSD)\t200 (SSD)\t200 (SSD)\nmax_parallel_workers_per_gather\t2\t4\t8\nConnection Pooling (PgBouncer)\n[databases]\nmydb = host=127.0.0.1 port=5432 dbname=mydb\n\n[pgbouncer]\npool_mode = transaction          # transaction pooling (best for most apps)\nmax_client_conn = 1000           # accept up to 1000 app connections\ndefault_pool_size = 25           # 25 actual DB connections per database\nreserve_pool_size = 5            # extra connections for burst\nreserve_pool_timeout = 3         # seconds before using reserve\nserver_idle_timeout = 300        # close idle server connections after 5 min\n\nPhase 9 — Common Patterns\nSoft Delete\n-- Add to table\nALTER TABLE users ADD COLUMN deleted_at TIMESTAMPTZ;\nCREATE INDEX idx_users_active ON users(id) WHERE deleted_at IS NULL;\n\n-- Application queries always filter\nSELECT * FROM users WHERE deleted_at IS NULL AND ...;\n\n-- Or use a view\nCREATE VIEW active_users AS SELECT * FROM users WHERE deleted_at IS NULL;\n\nOptimistic Locking\nUPDATE products SET \n    price = 29.99, \n    version = version + 1, \n    updated_at = NOW()\nWHERE id = 123 AND version = 5;  -- expected version\n-- If 0 rows affected → concurrent modification → retry or error\n\nEvent Sourcing Table\nCREATE TABLE events (\n    id BIGSERIAL PRIMARY KEY,\n    aggregate_type VARCHAR(50) NOT NULL,\n    aggregate_id UUID NOT NULL,\n    event_type VARCHAR(100) NOT NULL,\n    event_data JSONB NOT NULL,\n    metadata JSONB DEFAULT '{}',\n    version INTEGER NOT NULL,\n    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),\n    UNIQUE (aggregate_id, version)\n);\nCREATE INDEX idx_events_aggregate ON events(aggregate_id, version);\nCREATE INDEX idx_events_type ON events(event_type, created_at);\n\nTime-Series Optimization\n-- Partitioned by month\nCREATE TABLE metrics (\n    id BIGSERIAL,\n    sensor_id INTEGER NOT NULL,\n    value NUMERIC(12,4) NOT NULL,\n    recorded_at TIMESTAMPTZ NOT NULL\n) PARTITION BY RANGE (recorded_at);\n\nCREATE TABLE metrics_2026_01 PARTITION OF metrics\n    FOR VALUES FROM ('2026-01-01') TO ('2026-02-01');\nCREATE TABLE metrics_2026_02 PARTITION OF metrics\n    FOR VALUES FROM ('2026-02-01') TO ('2026-03-01');\n\n-- Auto-create future partitions via cron or pg_partman\n-- Use BRIN index for time-series\nCREATE INDEX idx_metrics_time ON metrics USING brin(recorded_at);\n\nFull-Text Search (PostgreSQL)\n-- Add search column\nALTER TABLE articles ADD COLUMN search_vector tsvector;\nCREATE INDEX idx_articles_search ON articles USING gin(search_vector);\n\n-- Populate\nUPDATE articles SET search_vector = \n    setweight(to_tsvector('english', COALESCE(title, '')), 'A') ||\n    setweight(to_tsvector('english', COALESCE(body, '')), 'B');\n\n-- Search with ranking\nSELECT id, title, ts_rank(search_vector, query) as rank\nFROM articles, plainto_tsquery('english', 'database optimization') query\nWHERE search_vector @@ query\nORDER BY rank DESC LIMIT 20;\n\nJSONB Patterns\n-- Store flexible attributes\nCREATE TABLE products (\n    id BIGSERIAL PRIMARY KEY,\n    name VARCHAR(200) NOT NULL,\n    attributes JSONB NOT NULL DEFAULT '{}',\n    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()\n);\n\n-- Index specific JSON paths\nCREATE INDEX idx_products_color ON products((attributes->>'color'));\n-- Or GIN for any key lookups\nCREATE INDEX idx_products_attrs ON products USING gin(attributes);\n\n-- Query patterns\nSELECT * FROM products WHERE attributes->>'color' = 'red';\nSELECT * FROM products WHERE attributes @> '{\"size\": \"large\"}';\nSELECT * FROM products WHERE attributes ? 'warranty';\n\nPhase 10 — Operational Runbooks\nEmergency: Database Overloaded\n-- 1. Find and kill long-running queries\nSELECT pid, NOW() - query_start as duration, query \nFROM pg_stat_activity WHERE state = 'active' AND query_start < NOW() - INTERVAL '5 minutes'\nORDER BY duration DESC;\n\n-- Kill a specific query\nSELECT pg_cancel_backend(pid);    -- graceful\nSELECT pg_terminate_backend(pid); -- force\n\n-- 2. Check for lock contention (see Phase 5)\n\n-- 3. Reduce max connections temporarily\n-- In pgbouncer: pause database, reduce pool, resume\n\n-- 4. Check if VACUUM is needed\nSELECT relname, n_dead_tup, last_autovacuum FROM pg_stat_user_tables \nWHERE n_dead_tup > 100000 ORDER BY n_dead_tup DESC;\n\nEmergency: Disk Full\n# 1. Check what's consuming space\ndu -sh /var/lib/postgresql/*/main/ 2>/dev/null || du -sh /var/lib/mysql/\n\n# 2. Clean up WAL files (PostgreSQL) — CAREFUL\n# Check replication slot status first\nSELECT slot_name, active FROM pg_replication_slots;\n# Drop inactive slots consuming WAL\nSELECT pg_drop_replication_slot('unused_slot');\n\n# 3. VACUUM FULL largest tables (locks table!)\nVACUUM FULL large_table;\n\n# 4. Remove old backups / logs\nfind /backups -name \"*.dump\" -mtime +7 -delete\n\nWeekly Maintenance Checklist\n Review slow query log (top 10 by total time)\n Check index usage stats — drop unused, add missing\n Verify backup success and test restore\n Check table bloat — schedule VACUUM where needed\n Review connection count trends\n Check disk space trajectory\n Review replication lag\n Update table statistics: ANALYZE;\nPhase 11 — Database Comparison Quick Reference\nFeature\tPostgreSQL\tMySQL (InnoDB)\tSQLite\nBest for\tComplex queries, extensions\tWeb apps, read-heavy\tEmbedded, dev, small apps\nMax size\tUnlimited (practical)\tUnlimited (practical)\t281 TB (practical ~1TB)\nJSON support\tJSONB (indexable, fast)\tJSON (limited indexing)\tJSON1 extension\nFull-text search\tBuilt-in (tsvector)\tBuilt-in (FULLTEXT)\tFTS5 extension\nWindow functions\tFull support\tFull support (8.0+)\tFull support (3.25+)\nCTEs\tRecursive + materialized\tRecursive (8.0+)\tRecursive (3.8+)\nPartitioning\tDeclarative + list/range/hash\tRange/list/hash/key\tNone\nRow-level security\tYes\tNo (use views)\tNo\nReplication\tStreaming + logical\tBinary log\tNone (use Litestream)\nConnection model\tProcess per connection\tThread per connection\tIn-process\nQuality Scoring Rubric (0-100)\nDimension\tWeight\t0 (Poor)\t5 (Good)\t10 (Excellent)\nSchema Design\t20%\tNo normalization, no constraints\t3NF, FKs, proper types\tOptimal normal form, all constraints, audit fields\nIndexing\t15%\tNo indexes beyond PK\tIndexes on FKs and common queries\tCovering indexes, partials, no unused indexes\nQuery Quality\t20%\tSELECT *, N+1, no EXPLAIN\tSpecific columns, JOINs, basic optimization\tKeyset pagination, window functions, optimized plans\nMigration Safety\t10%\tRaw DDL, no rollback\tVersioned files, up/down\tZero-downtime, batched backfills, concurrent indexes\nSecurity\t15%\tSuperuser access, no audit\tLeast privilege, parameterized queries\tRLS, encryption, audit triggers, regular access review\nMonitoring\t10%\tNo monitoring\tBasic alerts on connections/disk\tFull dashboard, slow query analysis, proactive tuning\nBackup/Recovery\t10%\tNo backups\tDaily dumps\tPITR, tested restores, offsite copies\n\nScore interpretation: <40 = Critical risk | 40-60 = Needs work | 60-80 = Solid | 80-90 = Professional | 90+ = Expert\n\nNatural Language Commands\n\"Design a schema for [domain]\" → Phase 1 full design process\n\"Optimize this query: [SQL]\" → EXPLAIN analysis + rewrite\n\"Add an index for [query pattern]\" → Index type selection + creation\n\"Write a migration to [change]\" → Safe migration with rollback\n\"Audit this database\" → Full scoring across all dimensions\n\"Set up monitoring for [database]\" → Phase 5 dashboard queries\n\"Review this schema\" → Naming, types, constraints, relationships check\n\"Help me with [PostgreSQL/MySQL/SQLite] [topic]\" → Platform-specific guidance\n\"Troubleshoot slow queries\" → pg_stat_statements analysis + top fixes\n\"Plan a backup strategy\" → Phase 6 decision framework\n\"Make this table multi-tenant\" → RLS + tenant_id pattern\n\"Convert this to use partitioning\" → Phase 9 time-series pattern"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/1kalin/afrexai-database-engineer",
    "publisherUrl": "https://clawhub.ai/1kalin/afrexai-database-engineer",
    "owner": "1kalin",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/afrexai-database-engineer",
    "downloadUrl": "https://openagent3.xyz/downloads/afrexai-database-engineer",
    "agentUrl": "https://openagent3.xyz/skills/afrexai-database-engineer/agent",
    "manifestUrl": "https://openagent3.xyz/skills/afrexai-database-engineer/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/afrexai-database-engineer/agent.md"
  }
}