{
  "schemaVersion": "1.0",
  "item": {
    "slug": "data-lineage-tracker",
    "name": "Data Lineage Tracker",
    "source": "tencent",
    "type": "skill",
    "category": "数据分析",
    "sourceUrl": "https://clawhub.ai/datadrivenconstruction/data-lineage-tracker",
    "canonicalUrl": "https://clawhub.ai/datadrivenconstruction/data-lineage-tracker",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/data-lineage-tracker",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=data-lineage-tracker",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "claw.json",
      "instructions.md",
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/data-lineage-tracker"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/data-lineage-tracker",
    "agentPageUrl": "https://openagent3.xyz/skills/data-lineage-tracker/agent",
    "manifestUrl": "https://openagent3.xyz/skills/data-lineage-tracker/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/data-lineage-tracker/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Overview",
        "body": "Track the origin, transformations, and flow of construction data through systems. Provides audit trails for compliance, helps debug data issues, and ensures data governance."
      },
      {
        "title": "Business Case",
        "body": "Construction projects require data accountability:\n\nAudit Compliance: Know where every number came from\nIssue Resolution: Trace data problems to their source\nChange Impact: Understand what downstream systems are affected\nRegulatory Requirements: Maintain data provenance for legal/insurance"
      },
      {
        "title": "Technical Implementation",
        "body": "from dataclasses import dataclass, field\nfrom typing import List, Dict, Any, Optional, Set\nfrom datetime import datetime\nfrom enum import Enum\nimport json\nimport hashlib\nimport uuid\n\nclass TransformationType(Enum):\n    EXTRACT = \"extract\"\n    TRANSFORM = \"transform\"\n    LOAD = \"load\"\n    AGGREGATE = \"aggregate\"\n    JOIN = \"join\"\n    FILTER = \"filter\"\n    CALCULATE = \"calculate\"\n    MANUAL_EDIT = \"manual_edit\"\n    IMPORT = \"import\"\n    EXPORT = \"export\"\n\n@dataclass\nclass DataSource:\n    id: str\n    name: str\n    system: str\n    location: str\n    owner: str\n    created_at: datetime\n\n@dataclass\nclass TransformationStep:\n    id: str\n    transformation_type: TransformationType\n    description: str\n    input_entities: List[str]\n    output_entities: List[str]\n    logic: str  # SQL, Python, or description\n    performed_by: str  # user or system\n    performed_at: datetime\n    parameters: Dict[str, Any] = field(default_factory=dict)\n\n@dataclass\nclass DataEntity:\n    id: str\n    name: str\n    source_id: str\n    entity_type: str  # table, file, field, record\n    created_at: datetime\n    version: int = 1\n    checksum: Optional[str] = None\n    parent_entities: List[str] = field(default_factory=list)\n    metadata: Dict[str, Any] = field(default_factory=dict)\n\n@dataclass\nclass LineageRecord:\n    id: str\n    entity_id: str\n    transformation_id: str\n    upstream_entities: List[str]\n    downstream_entities: List[str]\n    recorded_at: datetime\n\nclass ConstructionDataLineageTracker:\n    \"\"\"Track data lineage for construction data flows.\"\"\"\n\n    def __init__(self, project_id: str):\n        self.project_id = project_id\n        self.sources: Dict[str, DataSource] = {}\n        self.entities: Dict[str, DataEntity] = {}\n        self.transformations: Dict[str, TransformationStep] = {}\n        self.lineage_records: List[LineageRecord] = []\n\n    def register_source(self, name: str, system: str, location: str, owner: str) -> DataSource:\n        \"\"\"Register a new data source.\"\"\"\n        source = DataSource(\n            id=f\"SRC-{uuid.uuid4().hex[:8]}\",\n            name=name,\n            system=system,\n            location=location,\n            owner=owner,\n            created_at=datetime.now()\n        )\n        self.sources[source.id] = source\n        return source\n\n    def register_entity(self, name: str, source_id: str, entity_type: str,\n                       parent_entities: List[str] = None,\n                       metadata: Dict = None) -> DataEntity:\n        \"\"\"Register a data entity (table, file, field).\"\"\"\n        entity = DataEntity(\n            id=f\"ENT-{uuid.uuid4().hex[:8]}\",\n            name=name,\n            source_id=source_id,\n            entity_type=entity_type,\n            created_at=datetime.now(),\n            parent_entities=parent_entities or [],\n            metadata=metadata or {}\n        )\n        self.entities[entity.id] = entity\n        return entity\n\n    def calculate_checksum(self, data: Any) -> str:\n        \"\"\"Calculate checksum for data verification.\"\"\"\n        if isinstance(data, str):\n            content = data\n        else:\n            content = json.dumps(data, sort_keys=True, default=str)\n        return hashlib.sha256(content.encode()).hexdigest()[:16]\n\n    def record_transformation(self,\n                             transformation_type: TransformationType,\n                             description: str,\n                             input_entities: List[str],\n                             output_entities: List[str],\n                             logic: str,\n                             performed_by: str,\n                             parameters: Dict = None) -> TransformationStep:\n        \"\"\"Record a data transformation.\"\"\"\n        transformation = TransformationStep(\n            id=f\"TRF-{uuid.uuid4().hex[:8]}\",\n            transformation_type=transformation_type,\n            description=description,\n            input_entities=input_entities,\n            output_entities=output_entities,\n            logic=logic,\n            performed_by=performed_by,\n            performed_at=datetime.now(),\n            parameters=parameters or {}\n        )\n        self.transformations[transformation.id] = transformation\n\n        # Create lineage records\n        for output_id in output_entities:\n            record = LineageRecord(\n                id=f\"LIN-{uuid.uuid4().hex[:8]}\",\n                entity_id=output_id,\n                transformation_id=transformation.id,\n                upstream_entities=input_entities,\n                downstream_entities=[],\n                recorded_at=datetime.now()\n            )\n            self.lineage_records.append(record)\n\n            # Update downstream references for input entities\n            for input_id in input_entities:\n                for existing_record in self.lineage_records:\n                    if existing_record.entity_id == input_id:\n                        existing_record.downstream_entities.append(output_id)\n\n        return transformation\n\n    def trace_upstream(self, entity_id: str, depth: int = None) -> List[Dict]:\n        \"\"\"Trace all upstream sources of an entity.\"\"\"\n        visited = set()\n        lineage = []\n\n        def trace(eid: str, current_depth: int):\n            if eid in visited:\n                return\n            if depth is not None and current_depth > depth:\n                return\n\n            visited.add(eid)\n\n            entity = self.entities.get(eid)\n            if not entity:\n                return\n\n            # Find transformations that produced this entity\n            for record in self.lineage_records:\n                if record.entity_id == eid:\n                    transformation = self.transformations.get(record.transformation_id)\n                    if transformation:\n                        lineage.append({\n                            'entity': entity.name,\n                            'entity_id': eid,\n                            'depth': current_depth,\n                            'transformation': transformation.description,\n                            'transformation_type': transformation.transformation_type.value,\n                            'performed_at': transformation.performed_at.isoformat(),\n                            'performed_by': transformation.performed_by,\n                            'upstream': record.upstream_entities\n                        })\n\n                        for upstream_id in record.upstream_entities:\n                            trace(upstream_id, current_depth + 1)\n\n        trace(entity_id, 0)\n        return sorted(lineage, key=lambda x: x['depth'])\n\n    def trace_downstream(self, entity_id: str, depth: int = None) -> List[Dict]:\n        \"\"\"Trace all downstream dependencies of an entity.\"\"\"\n        visited = set()\n        dependencies = []\n\n        def trace(eid: str, current_depth: int):\n            if eid in visited:\n                return\n            if depth is not None and current_depth > depth:\n                return\n\n            visited.add(eid)\n\n            entity = self.entities.get(eid)\n            if not entity:\n                return\n\n            # Find entities that use this entity\n            for record in self.lineage_records:\n                if eid in record.upstream_entities:\n                    transformation = self.transformations.get(record.transformation_id)\n                    if transformation:\n                        dependencies.append({\n                            'entity': self.entities[record.entity_id].name if record.entity_id in self.entities else record.entity_id,\n                            'entity_id': record.entity_id,\n                            'depth': current_depth,\n                            'transformation': transformation.description,\n                            'transformation_type': transformation.transformation_type.value\n                        })\n\n                        trace(record.entity_id, current_depth + 1)\n\n        trace(entity_id, 0)\n        return sorted(dependencies, key=lambda x: x['depth'])\n\n    def get_entity_history(self, entity_id: str) -> List[Dict]:\n        \"\"\"Get complete history of changes to an entity.\"\"\"\n        history = []\n\n        for record in self.lineage_records:\n            if record.entity_id == entity_id:\n                transformation = self.transformations.get(record.transformation_id)\n                if transformation:\n                    history.append({\n                        'timestamp': transformation.performed_at.isoformat(),\n                        'action': transformation.transformation_type.value,\n                        'description': transformation.description,\n                        'performed_by': transformation.performed_by,\n                        'inputs': [\n                            self.entities[eid].name if eid in self.entities else eid\n                            for eid in record.upstream_entities\n                        ]\n                    })\n\n        return sorted(history, key=lambda x: x['timestamp'])\n\n    def impact_analysis(self, entity_id: str) -> Dict:\n        \"\"\"Analyze impact of changes to an entity.\"\"\"\n        downstream = self.trace_downstream(entity_id)\n\n        impact = {\n            'entity': self.entities[entity_id].name if entity_id in self.entities else entity_id,\n            'total_affected': len(downstream),\n            'affected_by_depth': {},\n            'affected_entities': downstream\n        }\n\n        for dep in downstream:\n            depth = dep['depth']\n            impact['affected_by_depth'][depth] = impact['affected_by_depth'].get(depth, 0) + 1\n\n        return impact\n\n    def validate_lineage(self) -> List[str]:\n        \"\"\"Validate lineage for completeness and consistency.\"\"\"\n        issues = []\n\n        # Check for orphan entities (no source or transformation)\n        for eid, entity in self.entities.items():\n            has_lineage = any(r.entity_id == eid for r in self.lineage_records)\n            if not has_lineage and entity.entity_type != 'source':\n                issues.append(f\"Entity '{entity.name}' has no lineage record\")\n\n        # Check for broken references\n        all_entity_ids = set(self.entities.keys())\n        for record in self.lineage_records:\n            for upstream_id in record.upstream_entities:\n                if upstream_id not in all_entity_ids:\n                    issues.append(f\"Lineage references unknown entity: {upstream_id}\")\n\n        # Check for circular dependencies\n        for eid in self.entities:\n            upstream = set()\n            to_check = [eid]\n            while to_check:\n                current = to_check.pop()\n                if current in upstream:\n                    issues.append(f\"Circular dependency detected involving entity: {self.entities[eid].name}\")\n                    break\n                upstream.add(current)\n                for record in self.lineage_records:\n                    if record.entity_id == current:\n                        to_check.extend(record.upstream_entities)\n\n        return issues\n\n    def generate_lineage_graph(self, entity_id: str) -> str:\n        \"\"\"Generate Mermaid diagram of lineage.\"\"\"\n        lines = [\"```mermaid\", \"graph LR\"]\n\n        upstream = self.trace_upstream(entity_id, depth=5)\n        downstream = self.trace_downstream(entity_id, depth=5)\n\n        # Add nodes\n        added_nodes = set()\n        for item in upstream + downstream:\n            node_id = item['entity_id'].replace('-', '_')\n            if node_id not in added_nodes:\n                entity = self.entities.get(item['entity_id'])\n                name = entity.name if entity else item['entity_id']\n                lines.append(f\"    {node_id}[{name}]\")\n                added_nodes.add(node_id)\n\n        # Add target node\n        target_node = entity_id.replace('-', '_')\n        if target_node not in added_nodes:\n            entity = self.entities.get(entity_id)\n            name = entity.name if entity else entity_id\n            lines.append(f\"    {target_node}[{name}]:::target\")\n\n        # Add edges\n        for item in upstream:\n            for upstream_id in item.get('upstream', []):\n                from_node = upstream_id.replace('-', '_')\n                to_node = item['entity_id'].replace('-', '_')\n                lines.append(f\"    {from_node} --> {to_node}\")\n\n        for item in downstream:\n            from_node = entity_id.replace('-', '_')\n            to_node = item['entity_id'].replace('-', '_')\n            if to_node != from_node:\n                lines.append(f\"    {from_node} --> {to_node}\")\n\n        lines.append(\"    classDef target fill:#f96\")\n        lines.append(\"```\")\n\n        return \"\\n\".join(lines)\n\n    def export_lineage(self) -> Dict:\n        \"\"\"Export complete lineage data.\"\"\"\n        return {\n            'project_id': self.project_id,\n            'exported_at': datetime.now().isoformat(),\n            'sources': {k: {\n                'id': v.id,\n                'name': v.name,\n                'system': v.system,\n                'location': v.location,\n                'owner': v.owner\n            } for k, v in self.sources.items()},\n            'entities': {k: {\n                'id': v.id,\n                'name': v.name,\n                'source_id': v.source_id,\n                'entity_type': v.entity_type,\n                'parent_entities': v.parent_entities\n            } for k, v in self.entities.items()},\n            'transformations': {k: {\n                'id': v.id,\n                'type': v.transformation_type.value,\n                'description': v.description,\n                'input_entities': v.input_entities,\n                'output_entities': v.output_entities,\n                'performed_by': v.performed_by,\n                'performed_at': v.performed_at.isoformat()\n            } for k, v in self.transformations.items()},\n            'lineage_records': [{\n                'id': r.id,\n                'entity_id': r.entity_id,\n                'transformation_id': r.transformation_id,\n                'upstream_entities': r.upstream_entities\n            } for r in self.lineage_records]\n        }\n\n    def generate_report(self) -> str:\n        \"\"\"Generate lineage report.\"\"\"\n        lines = [f\"# Data Lineage Report: {self.project_id}\", \"\"]\n        lines.append(f\"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}\")\n        lines.append(f\"**Sources:** {len(self.sources)}\")\n        lines.append(f\"**Entities:** {len(self.entities)}\")\n        lines.append(f\"**Transformations:** {len(self.transformations)}\")\n        lines.append(\"\")\n\n        # Sources\n        lines.append(\"## Data Sources\")\n        for source in self.sources.values():\n            lines.append(f\"- **{source.name}** ({source.system})\")\n            lines.append(f\"  - Location: {source.location}\")\n            lines.append(f\"  - Owner: {source.owner}\")\n        lines.append(\"\")\n\n        # Validation\n        issues = self.validate_lineage()\n        if issues:\n            lines.append(\"## Lineage Issues\")\n            for issue in issues:\n                lines.append(f\"- ⚠️ {issue}\")\n            lines.append(\"\")\n\n        # Transformation summary\n        lines.append(\"## Transformation Summary\")\n        type_counts = {}\n        for t in self.transformations.values():\n            type_counts[t.transformation_type.value] = type_counts.get(t.transformation_type.value, 0) + 1\n        for t_type, count in sorted(type_counts.items()):\n            lines.append(f\"- {t_type}: {count}\")\n\n        return \"\\n\".join(lines)"
      },
      {
        "title": "Quick Start",
        "body": "# Initialize tracker\ntracker = ConstructionDataLineageTracker(\"PROJECT-001\")\n\n# Register sources\nprocore = tracker.register_source(\"Procore\", \"SaaS\", \"cloud\", \"PM Team\")\nsage = tracker.register_source(\"Sage 300\", \"Database\", \"on-prem\", \"Finance\")\n\n# Register entities\nbudget = tracker.register_entity(\"Project Budget\", procore.id, \"table\")\ncosts = tracker.register_entity(\"Job Costs\", sage.id, \"table\")\nreport = tracker.register_entity(\"Cost Variance Report\", procore.id, \"file\")\n\n# Record transformation\ntracker.record_transformation(\n    transformation_type=TransformationType.JOIN,\n    description=\"Join budget and actual costs for variance calculation\",\n    input_entities=[budget.id, costs.id],\n    output_entities=[report.id],\n    logic=\"SELECT b.*, c.actual, (b.budget - c.actual) as variance FROM budget b JOIN costs c ON b.cost_code = c.cost_code\",\n    performed_by=\"ETL Pipeline\"\n)\n\n# Trace lineage\nupstream = tracker.trace_upstream(report.id)\nprint(\"Upstream lineage:\", upstream)\n\n# Generate graph\nprint(tracker.generate_lineage_graph(report.id))\n\n# Export for audit\nlineage_data = tracker.export_lineage()"
      },
      {
        "title": "Resources",
        "body": "Data Governance: DAMA DMBOK lineage guidelines\nAudit Requirements: SOX, ISO compliance"
      }
    ],
    "body": "Data Lineage Tracker for Construction\nOverview\n\nTrack the origin, transformations, and flow of construction data through systems. Provides audit trails for compliance, helps debug data issues, and ensures data governance.\n\nBusiness Case\n\nConstruction projects require data accountability:\n\nAudit Compliance: Know where every number came from\nIssue Resolution: Trace data problems to their source\nChange Impact: Understand what downstream systems are affected\nRegulatory Requirements: Maintain data provenance for legal/insurance\nTechnical Implementation\nfrom dataclasses import dataclass, field\nfrom typing import List, Dict, Any, Optional, Set\nfrom datetime import datetime\nfrom enum import Enum\nimport json\nimport hashlib\nimport uuid\n\nclass TransformationType(Enum):\n    EXTRACT = \"extract\"\n    TRANSFORM = \"transform\"\n    LOAD = \"load\"\n    AGGREGATE = \"aggregate\"\n    JOIN = \"join\"\n    FILTER = \"filter\"\n    CALCULATE = \"calculate\"\n    MANUAL_EDIT = \"manual_edit\"\n    IMPORT = \"import\"\n    EXPORT = \"export\"\n\n@dataclass\nclass DataSource:\n    id: str\n    name: str\n    system: str\n    location: str\n    owner: str\n    created_at: datetime\n\n@dataclass\nclass TransformationStep:\n    id: str\n    transformation_type: TransformationType\n    description: str\n    input_entities: List[str]\n    output_entities: List[str]\n    logic: str  # SQL, Python, or description\n    performed_by: str  # user or system\n    performed_at: datetime\n    parameters: Dict[str, Any] = field(default_factory=dict)\n\n@dataclass\nclass DataEntity:\n    id: str\n    name: str\n    source_id: str\n    entity_type: str  # table, file, field, record\n    created_at: datetime\n    version: int = 1\n    checksum: Optional[str] = None\n    parent_entities: List[str] = field(default_factory=list)\n    metadata: Dict[str, Any] = field(default_factory=dict)\n\n@dataclass\nclass LineageRecord:\n    id: str\n    entity_id: str\n    transformation_id: str\n    upstream_entities: List[str]\n    downstream_entities: List[str]\n    recorded_at: datetime\n\nclass ConstructionDataLineageTracker:\n    \"\"\"Track data lineage for construction data flows.\"\"\"\n\n    def __init__(self, project_id: str):\n        self.project_id = project_id\n        self.sources: Dict[str, DataSource] = {}\n        self.entities: Dict[str, DataEntity] = {}\n        self.transformations: Dict[str, TransformationStep] = {}\n        self.lineage_records: List[LineageRecord] = []\n\n    def register_source(self, name: str, system: str, location: str, owner: str) -> DataSource:\n        \"\"\"Register a new data source.\"\"\"\n        source = DataSource(\n            id=f\"SRC-{uuid.uuid4().hex[:8]}\",\n            name=name,\n            system=system,\n            location=location,\n            owner=owner,\n            created_at=datetime.now()\n        )\n        self.sources[source.id] = source\n        return source\n\n    def register_entity(self, name: str, source_id: str, entity_type: str,\n                       parent_entities: List[str] = None,\n                       metadata: Dict = None) -> DataEntity:\n        \"\"\"Register a data entity (table, file, field).\"\"\"\n        entity = DataEntity(\n            id=f\"ENT-{uuid.uuid4().hex[:8]}\",\n            name=name,\n            source_id=source_id,\n            entity_type=entity_type,\n            created_at=datetime.now(),\n            parent_entities=parent_entities or [],\n            metadata=metadata or {}\n        )\n        self.entities[entity.id] = entity\n        return entity\n\n    def calculate_checksum(self, data: Any) -> str:\n        \"\"\"Calculate checksum for data verification.\"\"\"\n        if isinstance(data, str):\n            content = data\n        else:\n            content = json.dumps(data, sort_keys=True, default=str)\n        return hashlib.sha256(content.encode()).hexdigest()[:16]\n\n    def record_transformation(self,\n                             transformation_type: TransformationType,\n                             description: str,\n                             input_entities: List[str],\n                             output_entities: List[str],\n                             logic: str,\n                             performed_by: str,\n                             parameters: Dict = None) -> TransformationStep:\n        \"\"\"Record a data transformation.\"\"\"\n        transformation = TransformationStep(\n            id=f\"TRF-{uuid.uuid4().hex[:8]}\",\n            transformation_type=transformation_type,\n            description=description,\n            input_entities=input_entities,\n            output_entities=output_entities,\n            logic=logic,\n            performed_by=performed_by,\n            performed_at=datetime.now(),\n            parameters=parameters or {}\n        )\n        self.transformations[transformation.id] = transformation\n\n        # Create lineage records\n        for output_id in output_entities:\n            record = LineageRecord(\n                id=f\"LIN-{uuid.uuid4().hex[:8]}\",\n                entity_id=output_id,\n                transformation_id=transformation.id,\n                upstream_entities=input_entities,\n                downstream_entities=[],\n                recorded_at=datetime.now()\n            )\n            self.lineage_records.append(record)\n\n            # Update downstream references for input entities\n            for input_id in input_entities:\n                for existing_record in self.lineage_records:\n                    if existing_record.entity_id == input_id:\n                        existing_record.downstream_entities.append(output_id)\n\n        return transformation\n\n    def trace_upstream(self, entity_id: str, depth: int = None) -> List[Dict]:\n        \"\"\"Trace all upstream sources of an entity.\"\"\"\n        visited = set()\n        lineage = []\n\n        def trace(eid: str, current_depth: int):\n            if eid in visited:\n                return\n            if depth is not None and current_depth > depth:\n                return\n\n            visited.add(eid)\n\n            entity = self.entities.get(eid)\n            if not entity:\n                return\n\n            # Find transformations that produced this entity\n            for record in self.lineage_records:\n                if record.entity_id == eid:\n                    transformation = self.transformations.get(record.transformation_id)\n                    if transformation:\n                        lineage.append({\n                            'entity': entity.name,\n                            'entity_id': eid,\n                            'depth': current_depth,\n                            'transformation': transformation.description,\n                            'transformation_type': transformation.transformation_type.value,\n                            'performed_at': transformation.performed_at.isoformat(),\n                            'performed_by': transformation.performed_by,\n                            'upstream': record.upstream_entities\n                        })\n\n                        for upstream_id in record.upstream_entities:\n                            trace(upstream_id, current_depth + 1)\n\n        trace(entity_id, 0)\n        return sorted(lineage, key=lambda x: x['depth'])\n\n    def trace_downstream(self, entity_id: str, depth: int = None) -> List[Dict]:\n        \"\"\"Trace all downstream dependencies of an entity.\"\"\"\n        visited = set()\n        dependencies = []\n\n        def trace(eid: str, current_depth: int):\n            if eid in visited:\n                return\n            if depth is not None and current_depth > depth:\n                return\n\n            visited.add(eid)\n\n            entity = self.entities.get(eid)\n            if not entity:\n                return\n\n            # Find entities that use this entity\n            for record in self.lineage_records:\n                if eid in record.upstream_entities:\n                    transformation = self.transformations.get(record.transformation_id)\n                    if transformation:\n                        dependencies.append({\n                            'entity': self.entities[record.entity_id].name if record.entity_id in self.entities else record.entity_id,\n                            'entity_id': record.entity_id,\n                            'depth': current_depth,\n                            'transformation': transformation.description,\n                            'transformation_type': transformation.transformation_type.value\n                        })\n\n                        trace(record.entity_id, current_depth + 1)\n\n        trace(entity_id, 0)\n        return sorted(dependencies, key=lambda x: x['depth'])\n\n    def get_entity_history(self, entity_id: str) -> List[Dict]:\n        \"\"\"Get complete history of changes to an entity.\"\"\"\n        history = []\n\n        for record in self.lineage_records:\n            if record.entity_id == entity_id:\n                transformation = self.transformations.get(record.transformation_id)\n                if transformation:\n                    history.append({\n                        'timestamp': transformation.performed_at.isoformat(),\n                        'action': transformation.transformation_type.value,\n                        'description': transformation.description,\n                        'performed_by': transformation.performed_by,\n                        'inputs': [\n                            self.entities[eid].name if eid in self.entities else eid\n                            for eid in record.upstream_entities\n                        ]\n                    })\n\n        return sorted(history, key=lambda x: x['timestamp'])\n\n    def impact_analysis(self, entity_id: str) -> Dict:\n        \"\"\"Analyze impact of changes to an entity.\"\"\"\n        downstream = self.trace_downstream(entity_id)\n\n        impact = {\n            'entity': self.entities[entity_id].name if entity_id in self.entities else entity_id,\n            'total_affected': len(downstream),\n            'affected_by_depth': {},\n            'affected_entities': downstream\n        }\n\n        for dep in downstream:\n            depth = dep['depth']\n            impact['affected_by_depth'][depth] = impact['affected_by_depth'].get(depth, 0) + 1\n\n        return impact\n\n    def validate_lineage(self) -> List[str]:\n        \"\"\"Validate lineage for completeness and consistency.\"\"\"\n        issues = []\n\n        # Check for orphan entities (no source or transformation)\n        for eid, entity in self.entities.items():\n            has_lineage = any(r.entity_id == eid for r in self.lineage_records)\n            if not has_lineage and entity.entity_type != 'source':\n                issues.append(f\"Entity '{entity.name}' has no lineage record\")\n\n        # Check for broken references\n        all_entity_ids = set(self.entities.keys())\n        for record in self.lineage_records:\n            for upstream_id in record.upstream_entities:\n                if upstream_id not in all_entity_ids:\n                    issues.append(f\"Lineage references unknown entity: {upstream_id}\")\n\n        # Check for circular dependencies\n        for eid in self.entities:\n            upstream = set()\n            to_check = [eid]\n            while to_check:\n                current = to_check.pop()\n                if current in upstream:\n                    issues.append(f\"Circular dependency detected involving entity: {self.entities[eid].name}\")\n                    break\n                upstream.add(current)\n                for record in self.lineage_records:\n                    if record.entity_id == current:\n                        to_check.extend(record.upstream_entities)\n\n        return issues\n\n    def generate_lineage_graph(self, entity_id: str) -> str:\n        \"\"\"Generate Mermaid diagram of lineage.\"\"\"\n        lines = [\"```mermaid\", \"graph LR\"]\n\n        upstream = self.trace_upstream(entity_id, depth=5)\n        downstream = self.trace_downstream(entity_id, depth=5)\n\n        # Add nodes\n        added_nodes = set()\n        for item in upstream + downstream:\n            node_id = item['entity_id'].replace('-', '_')\n            if node_id not in added_nodes:\n                entity = self.entities.get(item['entity_id'])\n                name = entity.name if entity else item['entity_id']\n                lines.append(f\"    {node_id}[{name}]\")\n                added_nodes.add(node_id)\n\n        # Add target node\n        target_node = entity_id.replace('-', '_')\n        if target_node not in added_nodes:\n            entity = self.entities.get(entity_id)\n            name = entity.name if entity else entity_id\n            lines.append(f\"    {target_node}[{name}]:::target\")\n\n        # Add edges\n        for item in upstream:\n            for upstream_id in item.get('upstream', []):\n                from_node = upstream_id.replace('-', '_')\n                to_node = item['entity_id'].replace('-', '_')\n                lines.append(f\"    {from_node} --> {to_node}\")\n\n        for item in downstream:\n            from_node = entity_id.replace('-', '_')\n            to_node = item['entity_id'].replace('-', '_')\n            if to_node != from_node:\n                lines.append(f\"    {from_node} --> {to_node}\")\n\n        lines.append(\"    classDef target fill:#f96\")\n        lines.append(\"```\")\n\n        return \"\\n\".join(lines)\n\n    def export_lineage(self) -> Dict:\n        \"\"\"Export complete lineage data.\"\"\"\n        return {\n            'project_id': self.project_id,\n            'exported_at': datetime.now().isoformat(),\n            'sources': {k: {\n                'id': v.id,\n                'name': v.name,\n                'system': v.system,\n                'location': v.location,\n                'owner': v.owner\n            } for k, v in self.sources.items()},\n            'entities': {k: {\n                'id': v.id,\n                'name': v.name,\n                'source_id': v.source_id,\n                'entity_type': v.entity_type,\n                'parent_entities': v.parent_entities\n            } for k, v in self.entities.items()},\n            'transformations': {k: {\n                'id': v.id,\n                'type': v.transformation_type.value,\n                'description': v.description,\n                'input_entities': v.input_entities,\n                'output_entities': v.output_entities,\n                'performed_by': v.performed_by,\n                'performed_at': v.performed_at.isoformat()\n            } for k, v in self.transformations.items()},\n            'lineage_records': [{\n                'id': r.id,\n                'entity_id': r.entity_id,\n                'transformation_id': r.transformation_id,\n                'upstream_entities': r.upstream_entities\n            } for r in self.lineage_records]\n        }\n\n    def generate_report(self) -> str:\n        \"\"\"Generate lineage report.\"\"\"\n        lines = [f\"# Data Lineage Report: {self.project_id}\", \"\"]\n        lines.append(f\"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}\")\n        lines.append(f\"**Sources:** {len(self.sources)}\")\n        lines.append(f\"**Entities:** {len(self.entities)}\")\n        lines.append(f\"**Transformations:** {len(self.transformations)}\")\n        lines.append(\"\")\n\n        # Sources\n        lines.append(\"## Data Sources\")\n        for source in self.sources.values():\n            lines.append(f\"- **{source.name}** ({source.system})\")\n            lines.append(f\"  - Location: {source.location}\")\n            lines.append(f\"  - Owner: {source.owner}\")\n        lines.append(\"\")\n\n        # Validation\n        issues = self.validate_lineage()\n        if issues:\n            lines.append(\"## Lineage Issues\")\n            for issue in issues:\n                lines.append(f\"- ⚠️ {issue}\")\n            lines.append(\"\")\n\n        # Transformation summary\n        lines.append(\"## Transformation Summary\")\n        type_counts = {}\n        for t in self.transformations.values():\n            type_counts[t.transformation_type.value] = type_counts.get(t.transformation_type.value, 0) + 1\n        for t_type, count in sorted(type_counts.items()):\n            lines.append(f\"- {t_type}: {count}\")\n\n        return \"\\n\".join(lines)\n\nQuick Start\n# Initialize tracker\ntracker = ConstructionDataLineageTracker(\"PROJECT-001\")\n\n# Register sources\nprocore = tracker.register_source(\"Procore\", \"SaaS\", \"cloud\", \"PM Team\")\nsage = tracker.register_source(\"Sage 300\", \"Database\", \"on-prem\", \"Finance\")\n\n# Register entities\nbudget = tracker.register_entity(\"Project Budget\", procore.id, \"table\")\ncosts = tracker.register_entity(\"Job Costs\", sage.id, \"table\")\nreport = tracker.register_entity(\"Cost Variance Report\", procore.id, \"file\")\n\n# Record transformation\ntracker.record_transformation(\n    transformation_type=TransformationType.JOIN,\n    description=\"Join budget and actual costs for variance calculation\",\n    input_entities=[budget.id, costs.id],\n    output_entities=[report.id],\n    logic=\"SELECT b.*, c.actual, (b.budget - c.actual) as variance FROM budget b JOIN costs c ON b.cost_code = c.cost_code\",\n    performed_by=\"ETL Pipeline\"\n)\n\n# Trace lineage\nupstream = tracker.trace_upstream(report.id)\nprint(\"Upstream lineage:\", upstream)\n\n# Generate graph\nprint(tracker.generate_lineage_graph(report.id))\n\n# Export for audit\nlineage_data = tracker.export_lineage()\n\nResources\nData Governance: DAMA DMBOK lineage guidelines\nAudit Requirements: SOX, ISO compliance"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/datadrivenconstruction/data-lineage-tracker",
    "publisherUrl": "https://clawhub.ai/datadrivenconstruction/data-lineage-tracker",
    "owner": "datadrivenconstruction",
    "version": "2.1.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/data-lineage-tracker",
    "downloadUrl": "https://openagent3.xyz/downloads/data-lineage-tracker",
    "agentUrl": "https://openagent3.xyz/skills/data-lineage-tracker/agent",
    "manifestUrl": "https://openagent3.xyz/skills/data-lineage-tracker/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/data-lineage-tracker/agent.md"
  }
}