{
  "schemaVersion": "1.0",
  "item": {
    "slug": "data-silo-detection",
    "name": "Data Silo Detection",
    "source": "tencent",
    "type": "skill",
    "category": "数据分析",
    "sourceUrl": "https://clawhub.ai/datadrivenconstruction/data-silo-detection",
    "canonicalUrl": "https://clawhub.ai/datadrivenconstruction/data-silo-detection",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/data-silo-detection",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=data-silo-detection",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "claw.json",
      "instructions.md",
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/data-silo-detection"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/data-silo-detection",
    "agentPageUrl": "https://openagent3.xyz/skills/data-silo-detection/agent",
    "manifestUrl": "https://openagent3.xyz/skills/data-silo-detection/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/data-silo-detection/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Overview",
        "body": "Based on DDC methodology (Chapter 1.2), this skill detects and maps data silos in construction organizations, identifying disconnected data sources, duplicate data, and integration opportunities.\n\nBook Reference: \"Технологии и системы управления в современном строительстве\" / \"Technologies and Management Systems in Modern Construction\""
      },
      {
        "title": "Quick Start",
        "body": "from dataclasses import dataclass, field\nfrom enum import Enum\nfrom typing import List, Dict, Optional, Set, Tuple\nfrom datetime import datetime\nimport json\nfrom collections import defaultdict\n\nclass DataDomain(Enum):\n    \"\"\"Construction data domains\"\"\"\n    DESIGN = \"design\"\n    COST = \"cost\"\n    SCHEDULE = \"schedule\"\n    QUALITY = \"quality\"\n    SAFETY = \"safety\"\n    PROCUREMENT = \"procurement\"\n    SITE = \"site\"\n    DOCUMENT = \"document\"\n    FINANCIAL = \"financial\"\n    HR = \"hr\"\n\nclass SiloSeverity(Enum):\n    \"\"\"Severity level of data silo\"\"\"\n    CRITICAL = \"critical\"      # Major business impact\n    HIGH = \"high\"              # Significant inefficiency\n    MEDIUM = \"medium\"          # Noticeable issues\n    LOW = \"low\"                # Minor inconvenience\n\nclass DataSourceType(Enum):\n    \"\"\"Types of data sources\"\"\"\n    DATABASE = \"database\"\n    SPREADSHEET = \"spreadsheet\"\n    FILE_SHARE = \"file_share\"\n    CLOUD_APP = \"cloud_app\"\n    DESKTOP_APP = \"desktop_app\"\n    PAPER = \"paper\"\n    EMAIL = \"email\"\n    PERSONAL = \"personal\"\n\n@dataclass\nclass DataSource:\n    \"\"\"Represents a data source in the organization\"\"\"\n    id: str\n    name: str\n    type: DataSourceType\n    domain: DataDomain\n    owner: str\n    department: str\n    users: List[str]\n    data_entities: List[str]\n    connections: List[str] = field(default_factory=list)\n    update_frequency: str = \"unknown\"\n    access_level: str = \"department\"  # personal, department, organization\n    has_api: bool = False\n    last_modified: Optional[datetime] = None\n\n@dataclass\nclass DataSilo:\n    \"\"\"Detected data silo\"\"\"\n    id: str\n    sources: List[DataSource]\n    domain: DataDomain\n    severity: SiloSeverity\n    issue_type: str\n    description: str\n    impact: str\n    affected_users: int\n    affected_processes: List[str]\n    recommendations: List[str]\n    estimated_cost: Optional[float] = None\n\n@dataclass\nclass DuplicateData:\n    \"\"\"Detected duplicate data across sources\"\"\"\n    entity_name: str\n    sources: List[str]\n    discrepancy_rate: float  # 0-1\n    master_source: Optional[str] = None\n    issues: List[str] = field(default_factory=list)\n\n@dataclass\nclass SiloAnalysis:\n    \"\"\"Complete silo analysis results\"\"\"\n    organization: str\n    analysis_date: datetime\n    total_sources: int\n    silos_detected: List[DataSilo]\n    duplicates: List[DuplicateData]\n    connectivity_score: float\n    data_flow_gaps: List[Dict]\n    priority_actions: List[str]\n    integration_roadmap: Dict\n\n\nclass DataSiloDetector:\n    \"\"\"\n    Detect and analyze data silos in construction organizations.\n    Based on DDC methodology Chapter 1.2.\n    \"\"\"\n\n    def __init__(self):\n        self.domain_relationships = self._define_domain_relationships()\n        self.critical_entities = self._define_critical_entities()\n\n    def _define_domain_relationships(self) -> Dict[DataDomain, List[DataDomain]]:\n        \"\"\"Define expected relationships between domains\"\"\"\n        return {\n            DataDomain.DESIGN: [\n                DataDomain.COST, DataDomain.SCHEDULE,\n                DataDomain.PROCUREMENT, DataDomain.QUALITY\n            ],\n            DataDomain.COST: [\n                DataDomain.DESIGN, DataDomain.SCHEDULE,\n                DataDomain.FINANCIAL, DataDomain.PROCUREMENT\n            ],\n            DataDomain.SCHEDULE: [\n                DataDomain.DESIGN, DataDomain.COST,\n                DataDomain.SITE, DataDomain.HR\n            ],\n            DataDomain.PROCUREMENT: [\n                DataDomain.COST, DataDomain.DESIGN,\n                DataDomain.SITE, DataDomain.FINANCIAL\n            ],\n            DataDomain.SITE: [\n                DataDomain.SCHEDULE, DataDomain.SAFETY,\n                DataDomain.QUALITY, DataDomain.HR\n            ],\n            DataDomain.QUALITY: [\n                DataDomain.DESIGN, DataDomain.SITE,\n                DataDomain.DOCUMENT\n            ],\n            DataDomain.SAFETY: [\n                DataDomain.SITE, DataDomain.HR,\n                DataDomain.DOCUMENT\n            ],\n            DataDomain.FINANCIAL: [\n                DataDomain.COST, DataDomain.PROCUREMENT,\n                DataDomain.HR\n            ]\n        }\n\n    def _define_critical_entities(self) -> Dict[str, List[DataDomain]]:\n        \"\"\"Define entities that should be shared across domains\"\"\"\n        return {\n            \"project\": [DataDomain.DESIGN, DataDomain.COST, DataDomain.SCHEDULE],\n            \"budget\": [DataDomain.COST, DataDomain.FINANCIAL, DataDomain.PROCUREMENT],\n            \"schedule\": [DataDomain.SCHEDULE, DataDomain.SITE, DataDomain.PROCUREMENT],\n            \"material\": [DataDomain.DESIGN, DataDomain.COST, DataDomain.PROCUREMENT],\n            \"labor\": [DataDomain.HR, DataDomain.COST, DataDomain.SCHEDULE],\n            \"subcontractor\": [DataDomain.PROCUREMENT, DataDomain.COST, DataDomain.SCHEDULE],\n            \"rfi\": [DataDomain.DESIGN, DataDomain.DOCUMENT, DataDomain.SITE],\n            \"change_order\": [DataDomain.COST, DataDomain.DESIGN, DataDomain.SCHEDULE]\n        }\n\n    def detect_silos(\n        self,\n        organization: str,\n        data_sources: List[DataSource],\n        process_flows: Optional[List[Dict]] = None\n    ) -> SiloAnalysis:\n        \"\"\"\n        Detect data silos in the organization.\n\n        Args:\n            organization: Organization name\n            data_sources: List of data sources to analyze\n            process_flows: Optional business process flows\n\n        Returns:\n            Complete silo analysis\n        \"\"\"\n        # Build connectivity graph\n        connectivity = self._build_connectivity_graph(data_sources)\n\n        # Detect isolated sources\n        isolated_silos = self._detect_isolated_sources(\n            data_sources, connectivity\n        )\n\n        # Detect domain silos\n        domain_silos = self._detect_domain_silos(data_sources)\n\n        # Detect duplicate data\n        duplicates = self._detect_duplicates(data_sources)\n\n        # Detect data flow gaps\n        flow_gaps = self._detect_flow_gaps(\n            data_sources, process_flows\n        )\n\n        # Calculate connectivity score\n        connectivity_score = self._calculate_connectivity_score(\n            data_sources, connectivity\n        )\n\n        # Combine all silos\n        all_silos = isolated_silos + domain_silos\n\n        # Prioritize silos\n        prioritized_silos = self._prioritize_silos(all_silos)\n\n        # Generate priority actions\n        priority_actions = self._generate_priority_actions(\n            prioritized_silos, duplicates\n        )\n\n        # Create integration roadmap\n        roadmap = self._create_integration_roadmap(\n            prioritized_silos, flow_gaps\n        )\n\n        return SiloAnalysis(\n            organization=organization,\n            analysis_date=datetime.now(),\n            total_sources=len(data_sources),\n            silos_detected=prioritized_silos,\n            duplicates=duplicates,\n            connectivity_score=connectivity_score,\n            data_flow_gaps=flow_gaps,\n            priority_actions=priority_actions,\n            integration_roadmap=roadmap\n        )\n\n    def _build_connectivity_graph(\n        self,\n        sources: List[DataSource]\n    ) -> Dict[str, Set[str]]:\n        \"\"\"Build graph of source connections\"\"\"\n        graph = defaultdict(set)\n\n        for source in sources:\n            for connection in source.connections:\n                graph[source.id].add(connection)\n                graph[connection].add(source.id)\n\n        return graph\n\n    def _detect_isolated_sources(\n        self,\n        sources: List[DataSource],\n        connectivity: Dict[str, Set[str]]\n    ) -> List[DataSilo]:\n        \"\"\"Detect sources with no connections\"\"\"\n        silos = []\n\n        for source in sources:\n            connections = len(connectivity.get(source.id, set()))\n\n            if connections == 0:\n                severity = SiloSeverity.CRITICAL if source.domain in [\n                    DataDomain.COST, DataDomain.SCHEDULE\n                ] else SiloSeverity.HIGH\n\n                silos.append(DataSilo(\n                    id=f\"isolated_{source.id}\",\n                    sources=[source],\n                    domain=source.domain,\n                    severity=severity,\n                    issue_type=\"isolated_source\",\n                    description=f\"{source.name} has no connections to other systems\",\n                    impact=\"Data must be manually transferred, risking errors and delays\",\n                    affected_users=len(source.users),\n                    affected_processes=self._get_affected_processes(source.domain),\n                    recommendations=[\n                        f\"Connect {source.name} via API or ETL to related systems\",\n                        \"Establish data synchronization schedule\",\n                        \"Define master data source for shared entities\"\n                    ]\n                ))\n            elif connections == 1 and source.access_level == \"personal\":\n                silos.append(DataSilo(\n                    id=f\"personal_{source.id}\",\n                    sources=[source],\n                    domain=source.domain,\n                    severity=SiloSeverity.MEDIUM,\n                    issue_type=\"personal_silo\",\n                    description=f\"{source.name} is a personal data store with limited access\",\n                    impact=\"Data not accessible to team, knowledge loss risk\",\n                    affected_users=1,\n                    affected_processes=self._get_affected_processes(source.domain),\n                    recommendations=[\n                        \"Move data to shared organizational repository\",\n                        \"Implement access controls instead of isolation\",\n                        \"Document data structure and usage\"\n                    ]\n                ))\n\n        return silos\n\n    def _detect_domain_silos(\n        self,\n        sources: List[DataSource]\n    ) -> List[DataSilo]:\n        \"\"\"Detect silos between domains that should be connected\"\"\"\n        silos = []\n\n        # Group sources by domain\n        domain_sources = defaultdict(list)\n        for source in sources:\n            domain_sources[source.domain].append(source)\n\n        # Check for missing domain connections\n        for domain, related_domains in self.domain_relationships.items():\n            domain_srcs = domain_sources.get(domain, [])\n\n            for related in related_domains:\n                related_srcs = domain_sources.get(related, [])\n\n                if domain_srcs and related_srcs:\n                    # Check if any connections exist between domains\n                    has_connection = False\n                    for src in domain_srcs:\n                        for rel_src in related_srcs:\n                            if rel_src.id in src.connections:\n                                has_connection = True\n                                break\n\n                    if not has_connection:\n                        silos.append(DataSilo(\n                            id=f\"domain_gap_{domain.value}_{related.value}\",\n                            sources=domain_srcs + related_srcs,\n                            domain=domain,\n                            severity=SiloSeverity.HIGH,\n                            issue_type=\"domain_disconnect\",\n                            description=f\"No data flow between {domain.value} and {related.value}\",\n                            impact=\"Related information not synchronized, decision delays\",\n                            affected_users=sum(len(s.users) for s in domain_srcs + related_srcs),\n                            affected_processes=self._get_affected_processes(domain) +\n                                              self._get_affected_processes(related),\n                            recommendations=[\n                                f\"Establish integration between {domain.value} and {related.value} systems\",\n                                \"Define shared data entities and master sources\",\n                                \"Implement automated data synchronization\"\n                            ]\n                        ))\n\n        return silos\n\n    def _detect_duplicates(\n        self,\n        sources: List[DataSource]\n    ) -> List[DuplicateData]:\n        \"\"\"Detect duplicate data across sources\"\"\"\n        duplicates = []\n\n        # Map entities to sources\n        entity_sources = defaultdict(list)\n        for source in sources:\n            for entity in source.data_entities:\n                entity_sources[entity].append(source.id)\n\n        # Find duplicates\n        for entity, source_ids in entity_sources.items():\n            if len(source_ids) > 1:\n                # Check if it's a critical entity\n                is_critical = entity.lower() in self.critical_entities\n\n                duplicate = DuplicateData(\n                    entity_name=entity,\n                    sources=source_ids,\n                    discrepancy_rate=0.0,  # Would need actual data to calculate\n                    issues=[]\n                )\n\n                if is_critical and len(source_ids) > 2:\n                    duplicate.issues.append(\n                        \"Critical entity duplicated in multiple systems\"\n                    )\n\n                if not any(s for s in sources if s.id in source_ids and \"master\" in s.name.lower()):\n                    duplicate.issues.append(\"No clear master source defined\")\n\n                duplicates.append(duplicate)\n\n        return duplicates\n\n    def _detect_flow_gaps(\n        self,\n        sources: List[DataSource],\n        process_flows: Optional[List[Dict]]\n    ) -> List[Dict]:\n        \"\"\"Detect gaps in expected data flows\"\"\"\n        gaps = []\n\n        # Check critical entity coverage\n        for entity, required_domains in self.critical_entities.items():\n            entity_domains = set()\n            for source in sources:\n                if entity in [e.lower() for e in source.data_entities]:\n                    entity_domains.add(source.domain)\n\n            missing = set(required_domains) - entity_domains\n            if missing:\n                gaps.append({\n                    \"entity\": entity,\n                    \"missing_domains\": [d.value for d in missing],\n                    \"impact\": f\"{entity} data not available in {len(missing)} domains\"\n                })\n\n        return gaps\n\n    def _calculate_connectivity_score(\n        self,\n        sources: List[DataSource],\n        connectivity: Dict[str, Set[str]]\n    ) -> float:\n        \"\"\"Calculate overall connectivity score\"\"\"\n        if not sources:\n            return 0.0\n\n        # Calculate average connections per source\n        total_connections = sum(len(conns) for conns in connectivity.values())\n        avg_connections = total_connections / len(sources)\n\n        # Ideal connections per source\n        ideal_connections = 3\n\n        # Score based on average connections\n        connection_score = min(1.0, avg_connections / ideal_connections)\n\n        # Penalize for isolated sources\n        isolated = sum(1 for s in sources if s.id not in connectivity or not connectivity[s.id])\n        isolation_penalty = isolated / len(sources)\n\n        # API availability bonus\n        api_count = sum(1 for s in sources if s.has_api)\n        api_bonus = (api_count / len(sources)) * 0.2\n\n        return max(0, min(1.0, connection_score - isolation_penalty + api_bonus))\n\n    def _get_affected_processes(self, domain: DataDomain) -> List[str]:\n        \"\"\"Get business processes affected by domain\"\"\"\n        process_map = {\n            DataDomain.DESIGN: [\"Design Review\", \"RFI Processing\", \"Drawing Distribution\"],\n            DataDomain.COST: [\"Budgeting\", \"Cost Tracking\", \"Invoice Processing\"],\n            DataDomain.SCHEDULE: [\"Planning\", \"Progress Tracking\", \"Resource Allocation\"],\n            DataDomain.PROCUREMENT: [\"Vendor Selection\", \"Purchase Orders\", \"Material Tracking\"],\n            DataDomain.SITE: [\"Daily Reports\", \"Progress Photos\", \"Issue Management\"],\n            DataDomain.QUALITY: [\"Inspections\", \"Defect Tracking\", \"Compliance\"],\n            DataDomain.SAFETY: [\"Incident Reporting\", \"Safety Inspections\", \"Training\"],\n            DataDomain.FINANCIAL: [\"Billing\", \"Payments\", \"Financial Reporting\"],\n            DataDomain.HR: [\"Timekeeping\", \"Resource Management\", \"Certifications\"]\n        }\n        return process_map.get(domain, [])\n\n    def _prioritize_silos(\n        self,\n        silos: List[DataSilo]\n    ) -> List[DataSilo]:\n        \"\"\"Prioritize silos by severity and impact\"\"\"\n        severity_order = {\n            SiloSeverity.CRITICAL: 0,\n            SiloSeverity.HIGH: 1,\n            SiloSeverity.MEDIUM: 2,\n            SiloSeverity.LOW: 3\n        }\n\n        return sorted(\n            silos,\n            key=lambda s: (severity_order[s.severity], -s.affected_users)\n        )\n\n    def _generate_priority_actions(\n        self,\n        silos: List[DataSilo],\n        duplicates: List[DuplicateData]\n    ) -> List[str]:\n        \"\"\"Generate prioritized action items\"\"\"\n        actions = []\n\n        # Critical silos first\n        critical_silos = [s for s in silos if s.severity == SiloSeverity.CRITICAL]\n        for silo in critical_silos[:3]:\n            actions.append(f\"URGENT: {silo.recommendations[0]}\")\n\n        # Duplicate data issues\n        critical_dups = [d for d in duplicates if d.issues]\n        for dup in critical_dups[:2]:\n            actions.append(\n                f\"Define master source for '{dup.entity_name}' \"\n                f\"(currently in {len(dup.sources)} sources)\"\n            )\n\n        # High priority silos\n        high_silos = [s for s in silos if s.severity == SiloSeverity.HIGH]\n        for silo in high_silos[:3]:\n            if silo.recommendations:\n                actions.append(silo.recommendations[0])\n\n        return actions[:10]\n\n    def _create_integration_roadmap(\n        self,\n        silos: List[DataSilo],\n        gaps: List[Dict]\n    ) -> Dict:\n        \"\"\"Create phased integration roadmap\"\"\"\n        roadmap = {\n            \"Phase 1 - Quick Wins (0-3 months)\": [],\n            \"Phase 2 - Core Integration (3-6 months)\": [],\n            \"Phase 3 - Advanced Integration (6-12 months)\": [],\n            \"Phase 4 - Optimization (12+ months)\": []\n        }\n\n        # Phase 1: Address personal silos and easy integrations\n        for silo in silos:\n            if silo.issue_type == \"personal_silo\":\n                roadmap[\"Phase 1 - Quick Wins (0-3 months)\"].append(\n                    f\"Migrate {silo.sources[0].name} to shared repository\"\n                )\n\n        # Phase 2: Core domain integrations\n        domain_gaps = [s for s in silos if s.issue_type == \"domain_disconnect\"]\n        for silo in domain_gaps[:3]:\n            roadmap[\"Phase 2 - Core Integration (3-6 months)\"].append(\n                silo.recommendations[0] if silo.recommendations else silo.description\n            )\n\n        # Phase 3: Critical entity master data\n        roadmap[\"Phase 3 - Advanced Integration (6-12 months)\"].extend([\n            \"Implement master data management for shared entities\",\n            \"Deploy integration middleware/ESB\",\n            \"Establish data governance policies\"\n        ])\n\n        # Phase 4: Optimization\n        roadmap[\"Phase 4 - Optimization (12+ months)\"].extend([\n            \"Implement real-time data synchronization\",\n            \"Deploy integration monitoring and alerting\",\n            \"Continuous improvement based on metrics\"\n        ])\n\n        return roadmap\n\n    def generate_report(self, analysis: SiloAnalysis) -> str:\n        \"\"\"Generate silo analysis report\"\"\"\n        report = f\"\"\"\n# Data Silo Analysis Report\n## {analysis.organization}\n\n**Analysis Date:** {analysis.analysis_date.strftime('%Y-%m-%d')}\n**Data Sources Analyzed:** {analysis.total_sources}\n**Connectivity Score:** {analysis.connectivity_score:.0%}\n\n## Executive Summary\n\nDetected **{len(analysis.silos_detected)}** data silos and **{len(analysis.duplicates)}** duplicate data issues.\n\n### Silos by Severity\n\"\"\"\n        severity_counts = defaultdict(int)\n        for silo in analysis.silos_detected:\n            severity_counts[silo.severity.value] += 1\n\n        for severity in [\"critical\", \"high\", \"medium\", \"low\"]:\n            count = severity_counts.get(severity, 0)\n            if count > 0:\n                report += f\"- **{severity.title()}**: {count}\\n\"\n\n        report += \"\\n## Priority Actions\\n\\n\"\n        for i, action in enumerate(analysis.priority_actions, 1):\n            report += f\"{i}. {action}\\n\"\n\n        report += \"\\n## Detected Silos\\n\\n\"\n        for silo in analysis.silos_detected[:5]:\n            report += f\"\"\"\n### {silo.id}\n- **Type:** {silo.issue_type}\n- **Severity:** {silo.severity.value}\n- **Impact:** {silo.impact}\n- **Affected Users:** {silo.affected_users}\n\"\"\"\n\n        report += \"\\n## Integration Roadmap\\n\"\n        for phase, items in analysis.integration_roadmap.items():\n            report += f\"\\n### {phase}\\n\"\n            for item in items:\n                report += f\"- {item}\\n\"\n\n        return report"
      },
      {
        "title": "Detect Data Silos",
        "body": "detector = DataSiloDetector()\n\n# Define data sources\nsources = [\n    DataSource(\n        id=\"revit\",\n        name=\"Revit Models\",\n        type=DataSourceType.DESKTOP_APP,\n        domain=DataDomain.DESIGN,\n        owner=\"Design Team\",\n        department=\"Engineering\",\n        users=[\"architect1\", \"engineer1\", \"engineer2\"],\n        data_entities=[\"building_model\", \"drawings\", \"schedules\"],\n        connections=[\"navisworks\"],\n        has_api=True\n    ),\n    DataSource(\n        id=\"excel_estimates\",\n        name=\"Excel Cost Estimates\",\n        type=DataSourceType.SPREADSHEET,\n        domain=DataDomain.COST,\n        owner=\"Estimator\",\n        department=\"Pre-construction\",\n        users=[\"estimator1\"],\n        data_entities=[\"costs\", \"quantities\", \"labor_rates\"],\n        connections=[],  # No connections - silo!\n        access_level=\"personal\"\n    ),\n    DataSource(\n        id=\"procore\",\n        name=\"Procore\",\n        type=DataSourceType.CLOUD_APP,\n        domain=DataDomain.SITE,\n        owner=\"Project Manager\",\n        department=\"Operations\",\n        users=[\"pm1\", \"pm2\", \"super1\"],\n        data_entities=[\"daily_reports\", \"photos\", \"punch_list\"],\n        connections=[\"primavera\"],\n        has_api=True\n    )\n]\n\nanalysis = detector.detect_silos(\n    organization=\"ABC Construction\",\n    data_sources=sources\n)\n\nprint(f\"Silos detected: {len(analysis.silos_detected)}\")\nprint(f\"Connectivity score: {analysis.connectivity_score:.0%}\")"
      },
      {
        "title": "Generate Silo Report",
        "body": "report = detector.generate_report(analysis)\nprint(report)\n\n# Save to file\nwith open(\"silo_report.md\", \"w\") as f:\n    f.write(report)"
      },
      {
        "title": "View Priority Actions",
        "body": "print(\"Priority Actions:\")\nfor i, action in enumerate(analysis.priority_actions, 1):\n    print(f\"{i}. {action}\")\n\nprint(\"\\nIntegration Roadmap:\")\nfor phase, items in analysis.integration_roadmap.items():\n    print(f\"\\n{phase}:\")\n    for item in items:\n        print(f\"  - {item}\")"
      },
      {
        "title": "Quick Reference",
        "body": "ComponentPurposeDataSiloDetectorMain detection engineDataSourceData source definitionDataSiloDetected silo with detailsDuplicateDataDuplicate data detectionSiloAnalysisComplete analysis resultsSiloSeveritySeverity classification"
      },
      {
        "title": "Resources",
        "body": "Book: \"Data-Driven Construction\" by Artem Boiko, Chapter 1.2\nWebsite: https://datadrivenconstruction.io"
      },
      {
        "title": "Next Steps",
        "body": "Use erp-integration-analysis for system integration\nUse data-evolution-analysis for maturity assessment\nUse etl-pipeline to connect silos"
      }
    ],
    "body": "Data Silo Detection\nOverview\n\nBased on DDC methodology (Chapter 1.2), this skill detects and maps data silos in construction organizations, identifying disconnected data sources, duplicate data, and integration opportunities.\n\nBook Reference: \"Технологии и системы управления в современном строительстве\" / \"Technologies and Management Systems in Modern Construction\"\n\nQuick Start\nfrom dataclasses import dataclass, field\nfrom enum import Enum\nfrom typing import List, Dict, Optional, Set, Tuple\nfrom datetime import datetime\nimport json\nfrom collections import defaultdict\n\nclass DataDomain(Enum):\n    \"\"\"Construction data domains\"\"\"\n    DESIGN = \"design\"\n    COST = \"cost\"\n    SCHEDULE = \"schedule\"\n    QUALITY = \"quality\"\n    SAFETY = \"safety\"\n    PROCUREMENT = \"procurement\"\n    SITE = \"site\"\n    DOCUMENT = \"document\"\n    FINANCIAL = \"financial\"\n    HR = \"hr\"\n\nclass SiloSeverity(Enum):\n    \"\"\"Severity level of data silo\"\"\"\n    CRITICAL = \"critical\"      # Major business impact\n    HIGH = \"high\"              # Significant inefficiency\n    MEDIUM = \"medium\"          # Noticeable issues\n    LOW = \"low\"                # Minor inconvenience\n\nclass DataSourceType(Enum):\n    \"\"\"Types of data sources\"\"\"\n    DATABASE = \"database\"\n    SPREADSHEET = \"spreadsheet\"\n    FILE_SHARE = \"file_share\"\n    CLOUD_APP = \"cloud_app\"\n    DESKTOP_APP = \"desktop_app\"\n    PAPER = \"paper\"\n    EMAIL = \"email\"\n    PERSONAL = \"personal\"\n\n@dataclass\nclass DataSource:\n    \"\"\"Represents a data source in the organization\"\"\"\n    id: str\n    name: str\n    type: DataSourceType\n    domain: DataDomain\n    owner: str\n    department: str\n    users: List[str]\n    data_entities: List[str]\n    connections: List[str] = field(default_factory=list)\n    update_frequency: str = \"unknown\"\n    access_level: str = \"department\"  # personal, department, organization\n    has_api: bool = False\n    last_modified: Optional[datetime] = None\n\n@dataclass\nclass DataSilo:\n    \"\"\"Detected data silo\"\"\"\n    id: str\n    sources: List[DataSource]\n    domain: DataDomain\n    severity: SiloSeverity\n    issue_type: str\n    description: str\n    impact: str\n    affected_users: int\n    affected_processes: List[str]\n    recommendations: List[str]\n    estimated_cost: Optional[float] = None\n\n@dataclass\nclass DuplicateData:\n    \"\"\"Detected duplicate data across sources\"\"\"\n    entity_name: str\n    sources: List[str]\n    discrepancy_rate: float  # 0-1\n    master_source: Optional[str] = None\n    issues: List[str] = field(default_factory=list)\n\n@dataclass\nclass SiloAnalysis:\n    \"\"\"Complete silo analysis results\"\"\"\n    organization: str\n    analysis_date: datetime\n    total_sources: int\n    silos_detected: List[DataSilo]\n    duplicates: List[DuplicateData]\n    connectivity_score: float\n    data_flow_gaps: List[Dict]\n    priority_actions: List[str]\n    integration_roadmap: Dict\n\n\nclass DataSiloDetector:\n    \"\"\"\n    Detect and analyze data silos in construction organizations.\n    Based on DDC methodology Chapter 1.2.\n    \"\"\"\n\n    def __init__(self):\n        self.domain_relationships = self._define_domain_relationships()\n        self.critical_entities = self._define_critical_entities()\n\n    def _define_domain_relationships(self) -> Dict[DataDomain, List[DataDomain]]:\n        \"\"\"Define expected relationships between domains\"\"\"\n        return {\n            DataDomain.DESIGN: [\n                DataDomain.COST, DataDomain.SCHEDULE,\n                DataDomain.PROCUREMENT, DataDomain.QUALITY\n            ],\n            DataDomain.COST: [\n                DataDomain.DESIGN, DataDomain.SCHEDULE,\n                DataDomain.FINANCIAL, DataDomain.PROCUREMENT\n            ],\n            DataDomain.SCHEDULE: [\n                DataDomain.DESIGN, DataDomain.COST,\n                DataDomain.SITE, DataDomain.HR\n            ],\n            DataDomain.PROCUREMENT: [\n                DataDomain.COST, DataDomain.DESIGN,\n                DataDomain.SITE, DataDomain.FINANCIAL\n            ],\n            DataDomain.SITE: [\n                DataDomain.SCHEDULE, DataDomain.SAFETY,\n                DataDomain.QUALITY, DataDomain.HR\n            ],\n            DataDomain.QUALITY: [\n                DataDomain.DESIGN, DataDomain.SITE,\n                DataDomain.DOCUMENT\n            ],\n            DataDomain.SAFETY: [\n                DataDomain.SITE, DataDomain.HR,\n                DataDomain.DOCUMENT\n            ],\n            DataDomain.FINANCIAL: [\n                DataDomain.COST, DataDomain.PROCUREMENT,\n                DataDomain.HR\n            ]\n        }\n\n    def _define_critical_entities(self) -> Dict[str, List[DataDomain]]:\n        \"\"\"Define entities that should be shared across domains\"\"\"\n        return {\n            \"project\": [DataDomain.DESIGN, DataDomain.COST, DataDomain.SCHEDULE],\n            \"budget\": [DataDomain.COST, DataDomain.FINANCIAL, DataDomain.PROCUREMENT],\n            \"schedule\": [DataDomain.SCHEDULE, DataDomain.SITE, DataDomain.PROCUREMENT],\n            \"material\": [DataDomain.DESIGN, DataDomain.COST, DataDomain.PROCUREMENT],\n            \"labor\": [DataDomain.HR, DataDomain.COST, DataDomain.SCHEDULE],\n            \"subcontractor\": [DataDomain.PROCUREMENT, DataDomain.COST, DataDomain.SCHEDULE],\n            \"rfi\": [DataDomain.DESIGN, DataDomain.DOCUMENT, DataDomain.SITE],\n            \"change_order\": [DataDomain.COST, DataDomain.DESIGN, DataDomain.SCHEDULE]\n        }\n\n    def detect_silos(\n        self,\n        organization: str,\n        data_sources: List[DataSource],\n        process_flows: Optional[List[Dict]] = None\n    ) -> SiloAnalysis:\n        \"\"\"\n        Detect data silos in the organization.\n\n        Args:\n            organization: Organization name\n            data_sources: List of data sources to analyze\n            process_flows: Optional business process flows\n\n        Returns:\n            Complete silo analysis\n        \"\"\"\n        # Build connectivity graph\n        connectivity = self._build_connectivity_graph(data_sources)\n\n        # Detect isolated sources\n        isolated_silos = self._detect_isolated_sources(\n            data_sources, connectivity\n        )\n\n        # Detect domain silos\n        domain_silos = self._detect_domain_silos(data_sources)\n\n        # Detect duplicate data\n        duplicates = self._detect_duplicates(data_sources)\n\n        # Detect data flow gaps\n        flow_gaps = self._detect_flow_gaps(\n            data_sources, process_flows\n        )\n\n        # Calculate connectivity score\n        connectivity_score = self._calculate_connectivity_score(\n            data_sources, connectivity\n        )\n\n        # Combine all silos\n        all_silos = isolated_silos + domain_silos\n\n        # Prioritize silos\n        prioritized_silos = self._prioritize_silos(all_silos)\n\n        # Generate priority actions\n        priority_actions = self._generate_priority_actions(\n            prioritized_silos, duplicates\n        )\n\n        # Create integration roadmap\n        roadmap = self._create_integration_roadmap(\n            prioritized_silos, flow_gaps\n        )\n\n        return SiloAnalysis(\n            organization=organization,\n            analysis_date=datetime.now(),\n            total_sources=len(data_sources),\n            silos_detected=prioritized_silos,\n            duplicates=duplicates,\n            connectivity_score=connectivity_score,\n            data_flow_gaps=flow_gaps,\n            priority_actions=priority_actions,\n            integration_roadmap=roadmap\n        )\n\n    def _build_connectivity_graph(\n        self,\n        sources: List[DataSource]\n    ) -> Dict[str, Set[str]]:\n        \"\"\"Build graph of source connections\"\"\"\n        graph = defaultdict(set)\n\n        for source in sources:\n            for connection in source.connections:\n                graph[source.id].add(connection)\n                graph[connection].add(source.id)\n\n        return graph\n\n    def _detect_isolated_sources(\n        self,\n        sources: List[DataSource],\n        connectivity: Dict[str, Set[str]]\n    ) -> List[DataSilo]:\n        \"\"\"Detect sources with no connections\"\"\"\n        silos = []\n\n        for source in sources:\n            connections = len(connectivity.get(source.id, set()))\n\n            if connections == 0:\n                severity = SiloSeverity.CRITICAL if source.domain in [\n                    DataDomain.COST, DataDomain.SCHEDULE\n                ] else SiloSeverity.HIGH\n\n                silos.append(DataSilo(\n                    id=f\"isolated_{source.id}\",\n                    sources=[source],\n                    domain=source.domain,\n                    severity=severity,\n                    issue_type=\"isolated_source\",\n                    description=f\"{source.name} has no connections to other systems\",\n                    impact=\"Data must be manually transferred, risking errors and delays\",\n                    affected_users=len(source.users),\n                    affected_processes=self._get_affected_processes(source.domain),\n                    recommendations=[\n                        f\"Connect {source.name} via API or ETL to related systems\",\n                        \"Establish data synchronization schedule\",\n                        \"Define master data source for shared entities\"\n                    ]\n                ))\n            elif connections == 1 and source.access_level == \"personal\":\n                silos.append(DataSilo(\n                    id=f\"personal_{source.id}\",\n                    sources=[source],\n                    domain=source.domain,\n                    severity=SiloSeverity.MEDIUM,\n                    issue_type=\"personal_silo\",\n                    description=f\"{source.name} is a personal data store with limited access\",\n                    impact=\"Data not accessible to team, knowledge loss risk\",\n                    affected_users=1,\n                    affected_processes=self._get_affected_processes(source.domain),\n                    recommendations=[\n                        \"Move data to shared organizational repository\",\n                        \"Implement access controls instead of isolation\",\n                        \"Document data structure and usage\"\n                    ]\n                ))\n\n        return silos\n\n    def _detect_domain_silos(\n        self,\n        sources: List[DataSource]\n    ) -> List[DataSilo]:\n        \"\"\"Detect silos between domains that should be connected\"\"\"\n        silos = []\n\n        # Group sources by domain\n        domain_sources = defaultdict(list)\n        for source in sources:\n            domain_sources[source.domain].append(source)\n\n        # Check for missing domain connections\n        for domain, related_domains in self.domain_relationships.items():\n            domain_srcs = domain_sources.get(domain, [])\n\n            for related in related_domains:\n                related_srcs = domain_sources.get(related, [])\n\n                if domain_srcs and related_srcs:\n                    # Check if any connections exist between domains\n                    has_connection = False\n                    for src in domain_srcs:\n                        for rel_src in related_srcs:\n                            if rel_src.id in src.connections:\n                                has_connection = True\n                                break\n\n                    if not has_connection:\n                        silos.append(DataSilo(\n                            id=f\"domain_gap_{domain.value}_{related.value}\",\n                            sources=domain_srcs + related_srcs,\n                            domain=domain,\n                            severity=SiloSeverity.HIGH,\n                            issue_type=\"domain_disconnect\",\n                            description=f\"No data flow between {domain.value} and {related.value}\",\n                            impact=\"Related information not synchronized, decision delays\",\n                            affected_users=sum(len(s.users) for s in domain_srcs + related_srcs),\n                            affected_processes=self._get_affected_processes(domain) +\n                                              self._get_affected_processes(related),\n                            recommendations=[\n                                f\"Establish integration between {domain.value} and {related.value} systems\",\n                                \"Define shared data entities and master sources\",\n                                \"Implement automated data synchronization\"\n                            ]\n                        ))\n\n        return silos\n\n    def _detect_duplicates(\n        self,\n        sources: List[DataSource]\n    ) -> List[DuplicateData]:\n        \"\"\"Detect duplicate data across sources\"\"\"\n        duplicates = []\n\n        # Map entities to sources\n        entity_sources = defaultdict(list)\n        for source in sources:\n            for entity in source.data_entities:\n                entity_sources[entity].append(source.id)\n\n        # Find duplicates\n        for entity, source_ids in entity_sources.items():\n            if len(source_ids) > 1:\n                # Check if it's a critical entity\n                is_critical = entity.lower() in self.critical_entities\n\n                duplicate = DuplicateData(\n                    entity_name=entity,\n                    sources=source_ids,\n                    discrepancy_rate=0.0,  # Would need actual data to calculate\n                    issues=[]\n                )\n\n                if is_critical and len(source_ids) > 2:\n                    duplicate.issues.append(\n                        \"Critical entity duplicated in multiple systems\"\n                    )\n\n                if not any(s for s in sources if s.id in source_ids and \"master\" in s.name.lower()):\n                    duplicate.issues.append(\"No clear master source defined\")\n\n                duplicates.append(duplicate)\n\n        return duplicates\n\n    def _detect_flow_gaps(\n        self,\n        sources: List[DataSource],\n        process_flows: Optional[List[Dict]]\n    ) -> List[Dict]:\n        \"\"\"Detect gaps in expected data flows\"\"\"\n        gaps = []\n\n        # Check critical entity coverage\n        for entity, required_domains in self.critical_entities.items():\n            entity_domains = set()\n            for source in sources:\n                if entity in [e.lower() for e in source.data_entities]:\n                    entity_domains.add(source.domain)\n\n            missing = set(required_domains) - entity_domains\n            if missing:\n                gaps.append({\n                    \"entity\": entity,\n                    \"missing_domains\": [d.value for d in missing],\n                    \"impact\": f\"{entity} data not available in {len(missing)} domains\"\n                })\n\n        return gaps\n\n    def _calculate_connectivity_score(\n        self,\n        sources: List[DataSource],\n        connectivity: Dict[str, Set[str]]\n    ) -> float:\n        \"\"\"Calculate overall connectivity score\"\"\"\n        if not sources:\n            return 0.0\n\n        # Calculate average connections per source\n        total_connections = sum(len(conns) for conns in connectivity.values())\n        avg_connections = total_connections / len(sources)\n\n        # Ideal connections per source\n        ideal_connections = 3\n\n        # Score based on average connections\n        connection_score = min(1.0, avg_connections / ideal_connections)\n\n        # Penalize for isolated sources\n        isolated = sum(1 for s in sources if s.id not in connectivity or not connectivity[s.id])\n        isolation_penalty = isolated / len(sources)\n\n        # API availability bonus\n        api_count = sum(1 for s in sources if s.has_api)\n        api_bonus = (api_count / len(sources)) * 0.2\n\n        return max(0, min(1.0, connection_score - isolation_penalty + api_bonus))\n\n    def _get_affected_processes(self, domain: DataDomain) -> List[str]:\n        \"\"\"Get business processes affected by domain\"\"\"\n        process_map = {\n            DataDomain.DESIGN: [\"Design Review\", \"RFI Processing\", \"Drawing Distribution\"],\n            DataDomain.COST: [\"Budgeting\", \"Cost Tracking\", \"Invoice Processing\"],\n            DataDomain.SCHEDULE: [\"Planning\", \"Progress Tracking\", \"Resource Allocation\"],\n            DataDomain.PROCUREMENT: [\"Vendor Selection\", \"Purchase Orders\", \"Material Tracking\"],\n            DataDomain.SITE: [\"Daily Reports\", \"Progress Photos\", \"Issue Management\"],\n            DataDomain.QUALITY: [\"Inspections\", \"Defect Tracking\", \"Compliance\"],\n            DataDomain.SAFETY: [\"Incident Reporting\", \"Safety Inspections\", \"Training\"],\n            DataDomain.FINANCIAL: [\"Billing\", \"Payments\", \"Financial Reporting\"],\n            DataDomain.HR: [\"Timekeeping\", \"Resource Management\", \"Certifications\"]\n        }\n        return process_map.get(domain, [])\n\n    def _prioritize_silos(\n        self,\n        silos: List[DataSilo]\n    ) -> List[DataSilo]:\n        \"\"\"Prioritize silos by severity and impact\"\"\"\n        severity_order = {\n            SiloSeverity.CRITICAL: 0,\n            SiloSeverity.HIGH: 1,\n            SiloSeverity.MEDIUM: 2,\n            SiloSeverity.LOW: 3\n        }\n\n        return sorted(\n            silos,\n            key=lambda s: (severity_order[s.severity], -s.affected_users)\n        )\n\n    def _generate_priority_actions(\n        self,\n        silos: List[DataSilo],\n        duplicates: List[DuplicateData]\n    ) -> List[str]:\n        \"\"\"Generate prioritized action items\"\"\"\n        actions = []\n\n        # Critical silos first\n        critical_silos = [s for s in silos if s.severity == SiloSeverity.CRITICAL]\n        for silo in critical_silos[:3]:\n            actions.append(f\"URGENT: {silo.recommendations[0]}\")\n\n        # Duplicate data issues\n        critical_dups = [d for d in duplicates if d.issues]\n        for dup in critical_dups[:2]:\n            actions.append(\n                f\"Define master source for '{dup.entity_name}' \"\n                f\"(currently in {len(dup.sources)} sources)\"\n            )\n\n        # High priority silos\n        high_silos = [s for s in silos if s.severity == SiloSeverity.HIGH]\n        for silo in high_silos[:3]:\n            if silo.recommendations:\n                actions.append(silo.recommendations[0])\n\n        return actions[:10]\n\n    def _create_integration_roadmap(\n        self,\n        silos: List[DataSilo],\n        gaps: List[Dict]\n    ) -> Dict:\n        \"\"\"Create phased integration roadmap\"\"\"\n        roadmap = {\n            \"Phase 1 - Quick Wins (0-3 months)\": [],\n            \"Phase 2 - Core Integration (3-6 months)\": [],\n            \"Phase 3 - Advanced Integration (6-12 months)\": [],\n            \"Phase 4 - Optimization (12+ months)\": []\n        }\n\n        # Phase 1: Address personal silos and easy integrations\n        for silo in silos:\n            if silo.issue_type == \"personal_silo\":\n                roadmap[\"Phase 1 - Quick Wins (0-3 months)\"].append(\n                    f\"Migrate {silo.sources[0].name} to shared repository\"\n                )\n\n        # Phase 2: Core domain integrations\n        domain_gaps = [s for s in silos if s.issue_type == \"domain_disconnect\"]\n        for silo in domain_gaps[:3]:\n            roadmap[\"Phase 2 - Core Integration (3-6 months)\"].append(\n                silo.recommendations[0] if silo.recommendations else silo.description\n            )\n\n        # Phase 3: Critical entity master data\n        roadmap[\"Phase 3 - Advanced Integration (6-12 months)\"].extend([\n            \"Implement master data management for shared entities\",\n            \"Deploy integration middleware/ESB\",\n            \"Establish data governance policies\"\n        ])\n\n        # Phase 4: Optimization\n        roadmap[\"Phase 4 - Optimization (12+ months)\"].extend([\n            \"Implement real-time data synchronization\",\n            \"Deploy integration monitoring and alerting\",\n            \"Continuous improvement based on metrics\"\n        ])\n\n        return roadmap\n\n    def generate_report(self, analysis: SiloAnalysis) -> str:\n        \"\"\"Generate silo analysis report\"\"\"\n        report = f\"\"\"\n# Data Silo Analysis Report\n## {analysis.organization}\n\n**Analysis Date:** {analysis.analysis_date.strftime('%Y-%m-%d')}\n**Data Sources Analyzed:** {analysis.total_sources}\n**Connectivity Score:** {analysis.connectivity_score:.0%}\n\n## Executive Summary\n\nDetected **{len(analysis.silos_detected)}** data silos and **{len(analysis.duplicates)}** duplicate data issues.\n\n### Silos by Severity\n\"\"\"\n        severity_counts = defaultdict(int)\n        for silo in analysis.silos_detected:\n            severity_counts[silo.severity.value] += 1\n\n        for severity in [\"critical\", \"high\", \"medium\", \"low\"]:\n            count = severity_counts.get(severity, 0)\n            if count > 0:\n                report += f\"- **{severity.title()}**: {count}\\n\"\n\n        report += \"\\n## Priority Actions\\n\\n\"\n        for i, action in enumerate(analysis.priority_actions, 1):\n            report += f\"{i}. {action}\\n\"\n\n        report += \"\\n## Detected Silos\\n\\n\"\n        for silo in analysis.silos_detected[:5]:\n            report += f\"\"\"\n### {silo.id}\n- **Type:** {silo.issue_type}\n- **Severity:** {silo.severity.value}\n- **Impact:** {silo.impact}\n- **Affected Users:** {silo.affected_users}\n\"\"\"\n\n        report += \"\\n## Integration Roadmap\\n\"\n        for phase, items in analysis.integration_roadmap.items():\n            report += f\"\\n### {phase}\\n\"\n            for item in items:\n                report += f\"- {item}\\n\"\n\n        return report\n\nCommon Use Cases\nDetect Data Silos\ndetector = DataSiloDetector()\n\n# Define data sources\nsources = [\n    DataSource(\n        id=\"revit\",\n        name=\"Revit Models\",\n        type=DataSourceType.DESKTOP_APP,\n        domain=DataDomain.DESIGN,\n        owner=\"Design Team\",\n        department=\"Engineering\",\n        users=[\"architect1\", \"engineer1\", \"engineer2\"],\n        data_entities=[\"building_model\", \"drawings\", \"schedules\"],\n        connections=[\"navisworks\"],\n        has_api=True\n    ),\n    DataSource(\n        id=\"excel_estimates\",\n        name=\"Excel Cost Estimates\",\n        type=DataSourceType.SPREADSHEET,\n        domain=DataDomain.COST,\n        owner=\"Estimator\",\n        department=\"Pre-construction\",\n        users=[\"estimator1\"],\n        data_entities=[\"costs\", \"quantities\", \"labor_rates\"],\n        connections=[],  # No connections - silo!\n        access_level=\"personal\"\n    ),\n    DataSource(\n        id=\"procore\",\n        name=\"Procore\",\n        type=DataSourceType.CLOUD_APP,\n        domain=DataDomain.SITE,\n        owner=\"Project Manager\",\n        department=\"Operations\",\n        users=[\"pm1\", \"pm2\", \"super1\"],\n        data_entities=[\"daily_reports\", \"photos\", \"punch_list\"],\n        connections=[\"primavera\"],\n        has_api=True\n    )\n]\n\nanalysis = detector.detect_silos(\n    organization=\"ABC Construction\",\n    data_sources=sources\n)\n\nprint(f\"Silos detected: {len(analysis.silos_detected)}\")\nprint(f\"Connectivity score: {analysis.connectivity_score:.0%}\")\n\nGenerate Silo Report\nreport = detector.generate_report(analysis)\nprint(report)\n\n# Save to file\nwith open(\"silo_report.md\", \"w\") as f:\n    f.write(report)\n\nView Priority Actions\nprint(\"Priority Actions:\")\nfor i, action in enumerate(analysis.priority_actions, 1):\n    print(f\"{i}. {action}\")\n\nprint(\"\\nIntegration Roadmap:\")\nfor phase, items in analysis.integration_roadmap.items():\n    print(f\"\\n{phase}:\")\n    for item in items:\n        print(f\"  - {item}\")\n\nQuick Reference\nComponent\tPurpose\nDataSiloDetector\tMain detection engine\nDataSource\tData source definition\nDataSilo\tDetected silo with details\nDuplicateData\tDuplicate data detection\nSiloAnalysis\tComplete analysis results\nSiloSeverity\tSeverity classification\nResources\nBook: \"Data-Driven Construction\" by Artem Boiko, Chapter 1.2\nWebsite: https://datadrivenconstruction.io\nNext Steps\nUse erp-integration-analysis for system integration\nUse data-evolution-analysis for maturity assessment\nUse etl-pipeline to connect silos"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/datadrivenconstruction/data-silo-detection",
    "publisherUrl": "https://clawhub.ai/datadrivenconstruction/data-silo-detection",
    "owner": "datadrivenconstruction",
    "version": "2.1.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/data-silo-detection",
    "downloadUrl": "https://openagent3.xyz/downloads/data-silo-detection",
    "agentUrl": "https://openagent3.xyz/skills/data-silo-detection/agent",
    "manifestUrl": "https://openagent3.xyz/skills/data-silo-detection/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/data-silo-detection/agent.md"
  }
}