{
  "schemaVersion": "1.0",
  "item": {
    "slug": "data-validation",
    "name": "Data Validation",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/gitgoodordietrying/data-validation",
    "canonicalUrl": "https://clawhub.ai/gitgoodordietrying/data-validation",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/data-validation",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=data-validation",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "slug": "data-validation",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-05-02T08:18:44.416Z",
      "expiresAt": "2026-05-09T08:18:44.416Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=data-validation",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=data-validation",
        "contentDisposition": "attachment; filename=\"data-validation-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null,
        "slug": "data-validation"
      },
      "scope": "item",
      "summary": "Item download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this item.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/data-validation"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/data-validation",
    "agentPageUrl": "https://openagent3.xyz/skills/data-validation/agent",
    "manifestUrl": "https://openagent3.xyz/skills/data-validation/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/data-validation/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Data Validation",
        "body": "Schema-based data validation across languages and formats. Covers JSON Schema, Zod (TypeScript), Pydantic (Python), API boundary validation, data contracts, and integrity checking."
      },
      {
        "title": "When to Use",
        "body": "Defining the shape of API request/response bodies\nValidating user input before processing\nSetting up data contracts between services\nChecking CSV/JSON file integrity before import\nMigrating data (did the ETL preserve everything?)\nGenerating types or documentation from schemas"
      },
      {
        "title": "Basic schema",
        "body": "{\n  \"$schema\": \"https://json-schema.org/draft/2020-12/schema\",\n  \"type\": \"object\",\n  \"required\": [\"name\", \"email\", \"age\"],\n  \"properties\": {\n    \"name\": {\n      \"type\": \"string\",\n      \"minLength\": 1,\n      \"maxLength\": 100\n    },\n    \"email\": {\n      \"type\": \"string\",\n      \"format\": \"email\"\n    },\n    \"age\": {\n      \"type\": \"integer\",\n      \"minimum\": 0,\n      \"maximum\": 150\n    },\n    \"role\": {\n      \"type\": \"string\",\n      \"enum\": [\"user\", \"admin\", \"moderator\"],\n      \"default\": \"user\"\n    },\n    \"tags\": {\n      \"type\": \"array\",\n      \"items\": { \"type\": \"string\" },\n      \"uniqueItems\": true,\n      \"maxItems\": 10\n    },\n    \"address\": {\n      \"type\": \"object\",\n      \"properties\": {\n        \"street\": { \"type\": \"string\" },\n        \"city\": { \"type\": \"string\" },\n        \"zip\": { \"type\": \"string\", \"pattern\": \"^\\\\d{5}(-\\\\d{4})?$\" }\n      },\n      \"required\": [\"street\", \"city\"]\n    }\n  },\n  \"additionalProperties\": false\n}"
      },
      {
        "title": "Common patterns",
        "body": "// Nullable field\n{ \"type\": [\"string\", \"null\"] }\n\n// Union type (string or number)\n{ \"oneOf\": [{ \"type\": \"string\" }, { \"type\": \"number\" }] }\n\n// Conditional: if role is admin, require permissions\n{\n  \"if\": { \"properties\": { \"role\": { \"const\": \"admin\" } } },\n  \"then\": { \"required\": [\"permissions\"] }\n}\n\n// Pattern properties (dynamic keys)\n{\n  \"type\": \"object\",\n  \"patternProperties\": {\n    \"^env_\": { \"type\": \"string\" }\n  }\n}\n\n// Reusable definitions\n{\n  \"$defs\": {\n    \"address\": {\n      \"type\": \"object\",\n      \"properties\": {\n        \"street\": { \"type\": \"string\" },\n        \"city\": { \"type\": \"string\" }\n      }\n    }\n  },\n  \"properties\": {\n    \"home\": { \"$ref\": \"#/$defs/address\" },\n    \"work\": { \"$ref\": \"#/$defs/address\" }\n  }\n}"
      },
      {
        "title": "Validate with command line",
        "body": "# Using ajv-cli (Node.js)\nnpx ajv-cli validate -s schema.json -d data.json\n\n# Using jsonschema (Python)\npip install jsonschema\npython3 -c \"\nimport json, jsonschema\nschema = json.load(open('schema.json'))\ndata = json.load(open('data.json'))\njsonschema.validate(data, schema)\nprint('Valid')\n\"\n\n# Validate multiple files\nfor f in data/*.json; do\n  npx ajv-cli validate -s schema.json -d \"$f\" 2>&1 || echo \"INVALID: $f\"\ndone"
      },
      {
        "title": "Basic schemas",
        "body": "import { z } from 'zod';\n\n// Primitives\nconst nameSchema = z.string().min(1).max(100);\nconst ageSchema = z.number().int().min(0).max(150);\nconst emailSchema = z.string().email();\nconst urlSchema = z.string().url();\n\n// Objects\nconst userSchema = z.object({\n  name: z.string().min(1),\n  email: z.string().email(),\n  age: z.number().int().min(0),\n  role: z.enum(['user', 'admin', 'moderator']).default('user'),\n  tags: z.array(z.string()).max(10).default([]),\n  createdAt: z.string().datetime(),\n});\n\n// Infer TypeScript type from schema\ntype User = z.infer<typeof userSchema>;\n// { name: string; email: string; age: number; role: \"user\" | \"admin\" | \"moderator\"; ... }\n\n// Validate\nconst result = userSchema.safeParse(data);\nif (result.success) {\n  console.log(result.data); // typed as User\n} else {\n  console.log(result.error.issues); // validation errors\n}\n\n// Parse (throws on invalid)\nconst user = userSchema.parse(data);"
      },
      {
        "title": "Advanced patterns",
        "body": "// Optional and nullable\nconst schema = z.object({\n  name: z.string(),\n  nickname: z.string().optional(),       // string | undefined\n  middleName: z.string().nullable(),     // string | null\n  suffix: z.string().nullish(),          // string | null | undefined\n});\n\n// Transforms (validate then transform)\nconst dateSchema = z.string().datetime().transform(s => new Date(s));\nconst trimmed = z.string().trim().toLowerCase();\nconst parsed = z.string().transform(s => parseInt(s, 10)).pipe(z.number().int());\n\n// Discriminated unions (tagged unions)\nconst eventSchema = z.discriminatedUnion('type', [\n  z.object({ type: z.literal('click'), x: z.number(), y: z.number() }),\n  z.object({ type: z.literal('keypress'), key: z.string() }),\n  z.object({ type: z.literal('scroll'), delta: z.number() }),\n]);\n\n// Recursive types\nconst categorySchema: z.ZodType<Category> = z.object({\n  name: z.string(),\n  children: z.lazy(() => z.array(categorySchema)).default([]),\n});\n\n// Refinements (custom validation)\nconst passwordSchema = z.string()\n  .min(8)\n  .refine(s => /[A-Z]/.test(s), 'Must contain uppercase')\n  .refine(s => /[0-9]/.test(s), 'Must contain digit')\n  .refine(s => /[^a-zA-Z0-9]/.test(s), 'Must contain special character');\n\n// Extend/merge objects\nconst baseUser = z.object({ name: z.string(), email: z.string() });\nconst adminUser = baseUser.extend({ permissions: z.array(z.string()) });\n\n// Pick/omit\nconst createUser = userSchema.omit({ createdAt: true });\nconst userSummary = userSchema.pick({ name: true, email: true });\n\n// Passthrough (allow extra fields)\nconst flexible = userSchema.passthrough();\n\n// Strip unknown fields\nconst strict = userSchema.strict(); // Error on extra fields"
      },
      {
        "title": "API validation with Zod",
        "body": "// Express middleware\nimport { z } from 'zod';\n\nconst createUserBody = z.object({\n  name: z.string().min(1),\n  email: z.string().email(),\n  password: z.string().min(8),\n});\n\napp.post('/api/users', (req, res) => {\n  const result = createUserBody.safeParse(req.body);\n  if (!result.success) {\n    return res.status(400).json({ errors: result.error.issues });\n  }\n  const { name, email, password } = result.data;\n  // ... create user\n});\n\n// Query parameter validation\nconst listParams = z.object({\n  page: z.coerce.number().int().min(1).default(1),\n  limit: z.coerce.number().int().min(1).max(100).default(20),\n  sort: z.enum(['newest', 'oldest', 'name']).default('newest'),\n  q: z.string().optional(),\n});\n\napp.get('/api/users', (req, res) => {\n  const params = listParams.parse(req.query);\n  // params.page is a number, params.sort is typed\n});"
      },
      {
        "title": "Basic models",
        "body": "from pydantic import BaseModel, Field, EmailStr, field_validator\nfrom typing import Optional\nfrom datetime import datetime\nfrom enum import Enum\n\nclass Role(str, Enum):\n    USER = \"user\"\n    ADMIN = \"admin\"\n    MODERATOR = \"moderator\"\n\nclass Address(BaseModel):\n    street: str\n    city: str\n    zip_code: str = Field(pattern=r\"^\\d{5}(-\\d{4})?$\")\n\nclass User(BaseModel):\n    name: str = Field(min_length=1, max_length=100)\n    email: EmailStr\n    age: int = Field(ge=0, le=150)\n    role: Role = Role.USER\n    tags: list[str] = Field(default_factory=list, max_length=10)\n    address: Optional[Address] = None\n    created_at: datetime = Field(default_factory=datetime.now)\n\n    @field_validator(\"name\")\n    @classmethod\n    def name_must_not_be_empty(cls, v: str) -> str:\n        if not v.strip():\n            raise ValueError(\"name cannot be blank\")\n        return v.strip()\n\n# Validate\nuser = User(name=\"Alice\", email=\"alice@example.com\", age=30)\nprint(user.model_dump())      # dict\nprint(user.model_dump_json())  # JSON string\n\n# Validation errors\ntry:\n    User(name=\"\", email=\"bad\", age=-1)\nexcept Exception as e:\n    print(e)  # Detailed validation errors"
      },
      {
        "title": "Advanced patterns",
        "body": "from pydantic import BaseModel, model_validator, ConfigDict\nfrom typing import Literal, Union, Annotated\n\n# Discriminated union\nclass ClickEvent(BaseModel):\n    type: Literal[\"click\"]\n    x: int\n    y: int\n\nclass KeypressEvent(BaseModel):\n    type: Literal[\"keypress\"]\n    key: str\n\nEvent = Annotated[Union[ClickEvent, KeypressEvent], Field(discriminator=\"type\")]\n\n# Model-level validation (cross-field)\nclass DateRange(BaseModel):\n    start: datetime\n    end: datetime\n\n    @model_validator(mode=\"after\")\n    def end_after_start(self):\n        if self.end <= self.start:\n            raise ValueError(\"end must be after start\")\n        return self\n\n# Strict mode (no type coercion)\nclass StrictUser(BaseModel):\n    model_config = ConfigDict(strict=True)\n    age: int  # \"30\" will be rejected, must be int 30\n\n# Alias (accept different field names in input)\nclass APIResponse(BaseModel):\n    user_name: str = Field(alias=\"userName\")\n    created_at: datetime = Field(alias=\"createdAt\")\n\n    model_config = ConfigDict(populate_by_name=True)\n\n# Computed fields\nfrom pydantic import computed_field\n\nclass Order(BaseModel):\n    items: list[dict]\n    tax_rate: float = 0.08\n\n    @computed_field\n    @property\n    def total(self) -> float:\n        subtotal = sum(i.get(\"price\", 0) * i.get(\"qty\", 1) for i in self.items)\n        return round(subtotal * (1 + self.tax_rate), 2)\n\n# Generate JSON Schema\nprint(User.model_json_schema())"
      },
      {
        "title": "FastAPI integration",
        "body": "from fastapi import FastAPI, Query\nfrom pydantic import BaseModel\n\napp = FastAPI()\n\nclass CreateUser(BaseModel):\n    name: str = Field(min_length=1)\n    email: EmailStr\n    password: str = Field(min_length=8)\n\nclass UserResponse(BaseModel):\n    id: int\n    name: str\n    email: str\n\n@app.post(\"/api/users\", response_model=UserResponse)\nasync def create_user(body: CreateUser):\n    # body is already validated\n    return {\"id\": 1, \"name\": body.name, \"email\": body.email}\n\n@app.get(\"/api/users\")\nasync def list_users(\n    page: int = Query(default=1, ge=1),\n    limit: int = Query(default=20, ge=1, le=100),\n    q: str | None = Query(default=None),\n):\n    # All params validated and typed\n    pass"
      },
      {
        "title": "CSV validation",
        "body": "#!/bin/bash\n# validate-csv.sh — Check CSV structure and data quality\nFILE=\"${1:?Usage: validate-csv.sh <file.csv>}\"\n\necho \"=== CSV Validation: $FILE ===\"\n\n# Row count\nROWS=$(wc -l < \"$FILE\")\necho \"Rows: $ROWS (including header)\"\n\n# Column count consistency\nHEADER_COLS=$(head -1 \"$FILE\" | awk -F',' '{print NF}')\necho \"Columns (header): $HEADER_COLS\"\n\nBAD_ROWS=$(awk -F',' -v expected=\"$HEADER_COLS\" 'NR>1 && NF!=expected {count++} END {print count+0}' \"$FILE\")\nif [ \"$BAD_ROWS\" -gt 0 ]; then\n    echo \"ERROR: $BAD_ROWS rows have wrong column count\"\n    awk -F',' -v expected=\"$HEADER_COLS\" 'NR>1 && NF!=expected {print \"  Line \"NR\": \"NF\" columns (expected \"expected\")\"}' \"$FILE\" | head -5\nelse\n    echo \"Column count: consistent\"\nfi\n\n# Empty fields\nEMPTY=$(awk -F',' '{for(i=1;i<=NF;i++) if($i==\"\") count++} END {print count}' \"$FILE\")\necho \"Empty fields: $EMPTY\"\n\n# Duplicate rows\nDUPES=$(($(sort \"$FILE\" | uniq -d | wc -l)))\necho \"Duplicate rows: $DUPES\"\n\necho \"=== Done ===\""
      },
      {
        "title": "JSON validation",
        "body": "# Check if file is valid JSON\njq empty data.json && echo \"Valid JSON\" || echo \"Invalid JSON\"\n\n# Validate structure of each object in an array\njq -e '\n  .[] |\n  select(\n    (.name | type) != \"string\" or\n    (.email | type) != \"string\" or\n    (.age | type) != \"number\" or\n    .age < 0\n  )\n' data.json && echo \"INVALID records found\" || echo \"All records valid\"\n\n# Check for required fields\njq -e '.[] | select(.id == null or .name == null)' data.json\n\n# Check for unique IDs\njq '[.[].id] | length != (. | unique | length)' data.json\n# true = duplicates exist\n\n# Compare record counts between source and target\nSRC=$(jq length source.json)\nTGT=$(jq length target.json)\necho \"Source: $SRC, Target: $TGT, Match: $([ \"$SRC\" = \"$TGT\" ] && echo yes || echo NO)\""
      },
      {
        "title": "Migration validation",
        "body": "#!/usr/bin/env python3\n\"\"\"Validate that a data migration preserved all records.\"\"\"\nimport json\nimport sys\n\ndef validate_migration(source_path, target_path, key_field=\"id\"):\n    with open(source_path) as f:\n        source = {r[key_field]: r for r in json.load(f)}\n    with open(target_path) as f:\n        target = {r[key_field]: r for r in json.load(f)}\n\n    missing = set(source) - set(target)\n    extra = set(target) - set(source)\n    changed = []\n\n    for key in set(source) & set(target):\n        if source[key] != target[key]:\n            changed.append(key)\n\n    print(f\"Source records: {len(source)}\")\n    print(f\"Target records: {len(target)}\")\n    print(f\"Missing in target: {len(missing)}\")\n    print(f\"Extra in target: {len(extra)}\")\n    print(f\"Changed: {len(changed)}\")\n\n    if missing:\n        print(f\"\\nMissing IDs (first 10): {list(missing)[:10]}\")\n    if extra:\n        print(f\"\\nExtra IDs (first 10): {list(extra)[:10]}\")\n    if changed:\n        print(f\"\\nChanged IDs (first 5): {changed[:5]}\")\n        for key in changed[:3]:\n            print(f\"\\n  {key}:\")\n            for field in set(source[key]) | set(target[key]):\n                s = source[key].get(field)\n                t = target[key].get(field)\n                if s != t:\n                    print(f\"    {field}: {s!r} → {t!r}\")\n\n    return len(missing) == 0 and len(extra) == 0\n\nif __name__ == \"__main__\":\n    ok = validate_migration(sys.argv[1], sys.argv[2], sys.argv[3] if len(sys.argv) > 3 else \"id\")\n    sys.exit(0 if ok else 1)"
      },
      {
        "title": "Tips",
        "body": "Validate at system boundaries (API endpoints, file imports, message queues), not deep inside business logic. Trust internal data.\nZod and Pydantic both generate JSON Schema from their definitions. Use this for documentation, OpenAPI specs, and cross-language contracts.\nadditionalProperties: false in JSON Schema catches typos in field names. Use it for strict APIs.\nPydantic v2 is significantly faster than v1. Use model_config = ConfigDict(strict=True) when you want no implicit type coercion.\nZod's .safeParse() returns a result object; .parse() throws. Use safeParse in API handlers to return structured errors.\nFor CSV validation, always check column count consistency first — most downstream errors trace back to misaligned columns.\nData migration validation should compare record counts, check for missing/extra records, and sample-check field values. Counting alone isn't enough."
      }
    ],
    "body": "Data Validation\n\nSchema-based data validation across languages and formats. Covers JSON Schema, Zod (TypeScript), Pydantic (Python), API boundary validation, data contracts, and integrity checking.\n\nWhen to Use\nDefining the shape of API request/response bodies\nValidating user input before processing\nSetting up data contracts between services\nChecking CSV/JSON file integrity before import\nMigrating data (did the ETL preserve everything?)\nGenerating types or documentation from schemas\nJSON Schema\nBasic schema\n{\n  \"$schema\": \"https://json-schema.org/draft/2020-12/schema\",\n  \"type\": \"object\",\n  \"required\": [\"name\", \"email\", \"age\"],\n  \"properties\": {\n    \"name\": {\n      \"type\": \"string\",\n      \"minLength\": 1,\n      \"maxLength\": 100\n    },\n    \"email\": {\n      \"type\": \"string\",\n      \"format\": \"email\"\n    },\n    \"age\": {\n      \"type\": \"integer\",\n      \"minimum\": 0,\n      \"maximum\": 150\n    },\n    \"role\": {\n      \"type\": \"string\",\n      \"enum\": [\"user\", \"admin\", \"moderator\"],\n      \"default\": \"user\"\n    },\n    \"tags\": {\n      \"type\": \"array\",\n      \"items\": { \"type\": \"string\" },\n      \"uniqueItems\": true,\n      \"maxItems\": 10\n    },\n    \"address\": {\n      \"type\": \"object\",\n      \"properties\": {\n        \"street\": { \"type\": \"string\" },\n        \"city\": { \"type\": \"string\" },\n        \"zip\": { \"type\": \"string\", \"pattern\": \"^\\\\d{5}(-\\\\d{4})?$\" }\n      },\n      \"required\": [\"street\", \"city\"]\n    }\n  },\n  \"additionalProperties\": false\n}\n\nCommon patterns\n// Nullable field\n{ \"type\": [\"string\", \"null\"] }\n\n// Union type (string or number)\n{ \"oneOf\": [{ \"type\": \"string\" }, { \"type\": \"number\" }] }\n\n// Conditional: if role is admin, require permissions\n{\n  \"if\": { \"properties\": { \"role\": { \"const\": \"admin\" } } },\n  \"then\": { \"required\": [\"permissions\"] }\n}\n\n// Pattern properties (dynamic keys)\n{\n  \"type\": \"object\",\n  \"patternProperties\": {\n    \"^env_\": { \"type\": \"string\" }\n  }\n}\n\n// Reusable definitions\n{\n  \"$defs\": {\n    \"address\": {\n      \"type\": \"object\",\n      \"properties\": {\n        \"street\": { \"type\": \"string\" },\n        \"city\": { \"type\": \"string\" }\n      }\n    }\n  },\n  \"properties\": {\n    \"home\": { \"$ref\": \"#/$defs/address\" },\n    \"work\": { \"$ref\": \"#/$defs/address\" }\n  }\n}\n\nValidate with command line\n# Using ajv-cli (Node.js)\nnpx ajv-cli validate -s schema.json -d data.json\n\n# Using jsonschema (Python)\npip install jsonschema\npython3 -c \"\nimport json, jsonschema\nschema = json.load(open('schema.json'))\ndata = json.load(open('data.json'))\njsonschema.validate(data, schema)\nprint('Valid')\n\"\n\n# Validate multiple files\nfor f in data/*.json; do\n  npx ajv-cli validate -s schema.json -d \"$f\" 2>&1 || echo \"INVALID: $f\"\ndone\n\nZod (TypeScript)\nBasic schemas\nimport { z } from 'zod';\n\n// Primitives\nconst nameSchema = z.string().min(1).max(100);\nconst ageSchema = z.number().int().min(0).max(150);\nconst emailSchema = z.string().email();\nconst urlSchema = z.string().url();\n\n// Objects\nconst userSchema = z.object({\n  name: z.string().min(1),\n  email: z.string().email(),\n  age: z.number().int().min(0),\n  role: z.enum(['user', 'admin', 'moderator']).default('user'),\n  tags: z.array(z.string()).max(10).default([]),\n  createdAt: z.string().datetime(),\n});\n\n// Infer TypeScript type from schema\ntype User = z.infer<typeof userSchema>;\n// { name: string; email: string; age: number; role: \"user\" | \"admin\" | \"moderator\"; ... }\n\n// Validate\nconst result = userSchema.safeParse(data);\nif (result.success) {\n  console.log(result.data); // typed as User\n} else {\n  console.log(result.error.issues); // validation errors\n}\n\n// Parse (throws on invalid)\nconst user = userSchema.parse(data);\n\nAdvanced patterns\n// Optional and nullable\nconst schema = z.object({\n  name: z.string(),\n  nickname: z.string().optional(),       // string | undefined\n  middleName: z.string().nullable(),     // string | null\n  suffix: z.string().nullish(),          // string | null | undefined\n});\n\n// Transforms (validate then transform)\nconst dateSchema = z.string().datetime().transform(s => new Date(s));\nconst trimmed = z.string().trim().toLowerCase();\nconst parsed = z.string().transform(s => parseInt(s, 10)).pipe(z.number().int());\n\n// Discriminated unions (tagged unions)\nconst eventSchema = z.discriminatedUnion('type', [\n  z.object({ type: z.literal('click'), x: z.number(), y: z.number() }),\n  z.object({ type: z.literal('keypress'), key: z.string() }),\n  z.object({ type: z.literal('scroll'), delta: z.number() }),\n]);\n\n// Recursive types\nconst categorySchema: z.ZodType<Category> = z.object({\n  name: z.string(),\n  children: z.lazy(() => z.array(categorySchema)).default([]),\n});\n\n// Refinements (custom validation)\nconst passwordSchema = z.string()\n  .min(8)\n  .refine(s => /[A-Z]/.test(s), 'Must contain uppercase')\n  .refine(s => /[0-9]/.test(s), 'Must contain digit')\n  .refine(s => /[^a-zA-Z0-9]/.test(s), 'Must contain special character');\n\n// Extend/merge objects\nconst baseUser = z.object({ name: z.string(), email: z.string() });\nconst adminUser = baseUser.extend({ permissions: z.array(z.string()) });\n\n// Pick/omit\nconst createUser = userSchema.omit({ createdAt: true });\nconst userSummary = userSchema.pick({ name: true, email: true });\n\n// Passthrough (allow extra fields)\nconst flexible = userSchema.passthrough();\n\n// Strip unknown fields\nconst strict = userSchema.strict(); // Error on extra fields\n\nAPI validation with Zod\n// Express middleware\nimport { z } from 'zod';\n\nconst createUserBody = z.object({\n  name: z.string().min(1),\n  email: z.string().email(),\n  password: z.string().min(8),\n});\n\napp.post('/api/users', (req, res) => {\n  const result = createUserBody.safeParse(req.body);\n  if (!result.success) {\n    return res.status(400).json({ errors: result.error.issues });\n  }\n  const { name, email, password } = result.data;\n  // ... create user\n});\n\n// Query parameter validation\nconst listParams = z.object({\n  page: z.coerce.number().int().min(1).default(1),\n  limit: z.coerce.number().int().min(1).max(100).default(20),\n  sort: z.enum(['newest', 'oldest', 'name']).default('newest'),\n  q: z.string().optional(),\n});\n\napp.get('/api/users', (req, res) => {\n  const params = listParams.parse(req.query);\n  // params.page is a number, params.sort is typed\n});\n\nPydantic (Python)\nBasic models\nfrom pydantic import BaseModel, Field, EmailStr, field_validator\nfrom typing import Optional\nfrom datetime import datetime\nfrom enum import Enum\n\nclass Role(str, Enum):\n    USER = \"user\"\n    ADMIN = \"admin\"\n    MODERATOR = \"moderator\"\n\nclass Address(BaseModel):\n    street: str\n    city: str\n    zip_code: str = Field(pattern=r\"^\\d{5}(-\\d{4})?$\")\n\nclass User(BaseModel):\n    name: str = Field(min_length=1, max_length=100)\n    email: EmailStr\n    age: int = Field(ge=0, le=150)\n    role: Role = Role.USER\n    tags: list[str] = Field(default_factory=list, max_length=10)\n    address: Optional[Address] = None\n    created_at: datetime = Field(default_factory=datetime.now)\n\n    @field_validator(\"name\")\n    @classmethod\n    def name_must_not_be_empty(cls, v: str) -> str:\n        if not v.strip():\n            raise ValueError(\"name cannot be blank\")\n        return v.strip()\n\n# Validate\nuser = User(name=\"Alice\", email=\"alice@example.com\", age=30)\nprint(user.model_dump())      # dict\nprint(user.model_dump_json())  # JSON string\n\n# Validation errors\ntry:\n    User(name=\"\", email=\"bad\", age=-1)\nexcept Exception as e:\n    print(e)  # Detailed validation errors\n\nAdvanced patterns\nfrom pydantic import BaseModel, model_validator, ConfigDict\nfrom typing import Literal, Union, Annotated\n\n# Discriminated union\nclass ClickEvent(BaseModel):\n    type: Literal[\"click\"]\n    x: int\n    y: int\n\nclass KeypressEvent(BaseModel):\n    type: Literal[\"keypress\"]\n    key: str\n\nEvent = Annotated[Union[ClickEvent, KeypressEvent], Field(discriminator=\"type\")]\n\n# Model-level validation (cross-field)\nclass DateRange(BaseModel):\n    start: datetime\n    end: datetime\n\n    @model_validator(mode=\"after\")\n    def end_after_start(self):\n        if self.end <= self.start:\n            raise ValueError(\"end must be after start\")\n        return self\n\n# Strict mode (no type coercion)\nclass StrictUser(BaseModel):\n    model_config = ConfigDict(strict=True)\n    age: int  # \"30\" will be rejected, must be int 30\n\n# Alias (accept different field names in input)\nclass APIResponse(BaseModel):\n    user_name: str = Field(alias=\"userName\")\n    created_at: datetime = Field(alias=\"createdAt\")\n\n    model_config = ConfigDict(populate_by_name=True)\n\n# Computed fields\nfrom pydantic import computed_field\n\nclass Order(BaseModel):\n    items: list[dict]\n    tax_rate: float = 0.08\n\n    @computed_field\n    @property\n    def total(self) -> float:\n        subtotal = sum(i.get(\"price\", 0) * i.get(\"qty\", 1) for i in self.items)\n        return round(subtotal * (1 + self.tax_rate), 2)\n\n# Generate JSON Schema\nprint(User.model_json_schema())\n\nFastAPI integration\nfrom fastapi import FastAPI, Query\nfrom pydantic import BaseModel\n\napp = FastAPI()\n\nclass CreateUser(BaseModel):\n    name: str = Field(min_length=1)\n    email: EmailStr\n    password: str = Field(min_length=8)\n\nclass UserResponse(BaseModel):\n    id: int\n    name: str\n    email: str\n\n@app.post(\"/api/users\", response_model=UserResponse)\nasync def create_user(body: CreateUser):\n    # body is already validated\n    return {\"id\": 1, \"name\": body.name, \"email\": body.email}\n\n@app.get(\"/api/users\")\nasync def list_users(\n    page: int = Query(default=1, ge=1),\n    limit: int = Query(default=20, ge=1, le=100),\n    q: str | None = Query(default=None),\n):\n    # All params validated and typed\n    pass\n\nData Integrity Checks\nCSV validation\n#!/bin/bash\n# validate-csv.sh — Check CSV structure and data quality\nFILE=\"${1:?Usage: validate-csv.sh <file.csv>}\"\n\necho \"=== CSV Validation: $FILE ===\"\n\n# Row count\nROWS=$(wc -l < \"$FILE\")\necho \"Rows: $ROWS (including header)\"\n\n# Column count consistency\nHEADER_COLS=$(head -1 \"$FILE\" | awk -F',' '{print NF}')\necho \"Columns (header): $HEADER_COLS\"\n\nBAD_ROWS=$(awk -F',' -v expected=\"$HEADER_COLS\" 'NR>1 && NF!=expected {count++} END {print count+0}' \"$FILE\")\nif [ \"$BAD_ROWS\" -gt 0 ]; then\n    echo \"ERROR: $BAD_ROWS rows have wrong column count\"\n    awk -F',' -v expected=\"$HEADER_COLS\" 'NR>1 && NF!=expected {print \"  Line \"NR\": \"NF\" columns (expected \"expected\")\"}' \"$FILE\" | head -5\nelse\n    echo \"Column count: consistent\"\nfi\n\n# Empty fields\nEMPTY=$(awk -F',' '{for(i=1;i<=NF;i++) if($i==\"\") count++} END {print count}' \"$FILE\")\necho \"Empty fields: $EMPTY\"\n\n# Duplicate rows\nDUPES=$(($(sort \"$FILE\" | uniq -d | wc -l)))\necho \"Duplicate rows: $DUPES\"\n\necho \"=== Done ===\"\n\nJSON validation\n# Check if file is valid JSON\njq empty data.json && echo \"Valid JSON\" || echo \"Invalid JSON\"\n\n# Validate structure of each object in an array\njq -e '\n  .[] |\n  select(\n    (.name | type) != \"string\" or\n    (.email | type) != \"string\" or\n    (.age | type) != \"number\" or\n    .age < 0\n  )\n' data.json && echo \"INVALID records found\" || echo \"All records valid\"\n\n# Check for required fields\njq -e '.[] | select(.id == null or .name == null)' data.json\n\n# Check for unique IDs\njq '[.[].id] | length != (. | unique | length)' data.json\n# true = duplicates exist\n\n# Compare record counts between source and target\nSRC=$(jq length source.json)\nTGT=$(jq length target.json)\necho \"Source: $SRC, Target: $TGT, Match: $([ \"$SRC\" = \"$TGT\" ] && echo yes || echo NO)\"\n\nMigration validation\n#!/usr/bin/env python3\n\"\"\"Validate that a data migration preserved all records.\"\"\"\nimport json\nimport sys\n\ndef validate_migration(source_path, target_path, key_field=\"id\"):\n    with open(source_path) as f:\n        source = {r[key_field]: r for r in json.load(f)}\n    with open(target_path) as f:\n        target = {r[key_field]: r for r in json.load(f)}\n\n    missing = set(source) - set(target)\n    extra = set(target) - set(source)\n    changed = []\n\n    for key in set(source) & set(target):\n        if source[key] != target[key]:\n            changed.append(key)\n\n    print(f\"Source records: {len(source)}\")\n    print(f\"Target records: {len(target)}\")\n    print(f\"Missing in target: {len(missing)}\")\n    print(f\"Extra in target: {len(extra)}\")\n    print(f\"Changed: {len(changed)}\")\n\n    if missing:\n        print(f\"\\nMissing IDs (first 10): {list(missing)[:10]}\")\n    if extra:\n        print(f\"\\nExtra IDs (first 10): {list(extra)[:10]}\")\n    if changed:\n        print(f\"\\nChanged IDs (first 5): {changed[:5]}\")\n        for key in changed[:3]:\n            print(f\"\\n  {key}:\")\n            for field in set(source[key]) | set(target[key]):\n                s = source[key].get(field)\n                t = target[key].get(field)\n                if s != t:\n                    print(f\"    {field}: {s!r} → {t!r}\")\n\n    return len(missing) == 0 and len(extra) == 0\n\nif __name__ == \"__main__\":\n    ok = validate_migration(sys.argv[1], sys.argv[2], sys.argv[3] if len(sys.argv) > 3 else \"id\")\n    sys.exit(0 if ok else 1)\n\nTips\nValidate at system boundaries (API endpoints, file imports, message queues), not deep inside business logic. Trust internal data.\nZod and Pydantic both generate JSON Schema from their definitions. Use this for documentation, OpenAPI specs, and cross-language contracts.\nadditionalProperties: false in JSON Schema catches typos in field names. Use it for strict APIs.\nPydantic v2 is significantly faster than v1. Use model_config = ConfigDict(strict=True) when you want no implicit type coercion.\nZod's .safeParse() returns a result object; .parse() throws. Use safeParse in API handlers to return structured errors.\nFor CSV validation, always check column count consistency first — most downstream errors trace back to misaligned columns.\nData migration validation should compare record counts, check for missing/extra records, and sample-check field values. Counting alone isn't enough."
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/gitgoodordietrying/data-validation",
    "publisherUrl": "https://clawhub.ai/gitgoodordietrying/data-validation",
    "owner": "gitgoodordietrying",
    "version": "1.0.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/data-validation",
    "downloadUrl": "https://openagent3.xyz/downloads/data-validation",
    "agentUrl": "https://openagent3.xyz/skills/data-validation/agent",
    "manifestUrl": "https://openagent3.xyz/skills/data-validation/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/data-validation/agent.md"
  }
}