{
  "schemaVersion": "1.0",
  "item": {
    "slug": "open-data-integrator",
    "name": "Open Data Integrator",
    "source": "tencent",
    "type": "skill",
    "category": "数据分析",
    "sourceUrl": "https://clawhub.ai/datadrivenconstruction/open-data-integrator",
    "canonicalUrl": "https://clawhub.ai/datadrivenconstruction/open-data-integrator",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/open-data-integrator",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=open-data-integrator",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "claw.json",
      "instructions.md",
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/open-data-integrator"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/open-data-integrator",
    "agentPageUrl": "https://openagent3.xyz/skills/open-data-integrator/agent",
    "manifestUrl": "https://openagent3.xyz/skills/open-data-integrator/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/open-data-integrator/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Overview",
        "body": "Based on DDC methodology (Chapter 2.2), this skill integrates open construction datasets from various sources like government databases, industry benchmarks, weather services, and geospatial data.\n\nBook Reference: \"Доминирование открытых данных\" / \"Open Data Dominance\""
      },
      {
        "title": "Quick Start",
        "body": "from dataclasses import dataclass, field\nfrom enum import Enum\nfrom typing import List, Dict, Optional, Any, Callable\nfrom datetime import datetime, date\nimport json\nimport requests\nfrom abc import ABC, abstractmethod\n\nclass DataSourceType(Enum):\n    \"\"\"Types of open data sources\"\"\"\n    GOVERNMENT = \"government\"           # Government statistics\n    INDUSTRY_BENCHMARK = \"benchmark\"    # Industry benchmarks\n    WEATHER = \"weather\"                 # Weather data\n    GEOSPATIAL = \"geospatial\"           # Geographic data\n    MATERIAL_PRICES = \"material_prices\" # Material cost indices\n    LABOR_RATES = \"labor_rates\"         # Labor cost data\n    BUILDING_PERMITS = \"permits\"        # Permit data\n    ENERGY = \"energy\"                   # Energy prices/data\n    ECONOMIC = \"economic\"               # Economic indicators\n\nclass UpdateFrequency(Enum):\n    \"\"\"Data update frequency\"\"\"\n    REALTIME = \"realtime\"\n    HOURLY = \"hourly\"\n    DAILY = \"daily\"\n    WEEKLY = \"weekly\"\n    MONTHLY = \"monthly\"\n    QUARTERLY = \"quarterly\"\n    ANNUAL = \"annual\"\n\n@dataclass\nclass OpenDataSource:\n    \"\"\"Definition of an open data source\"\"\"\n    id: str\n    name: str\n    source_type: DataSourceType\n    url: str\n    api_key_required: bool = False\n    update_frequency: UpdateFrequency = UpdateFrequency.DAILY\n    format: str = \"json\"\n    license: str = \"open\"\n    description: Optional[str] = None\n    fields: List[str] = field(default_factory=list)\n\n@dataclass\nclass DataRecord:\n    \"\"\"A single data record from a source\"\"\"\n    source_id: str\n    timestamp: datetime\n    data: Dict[str, Any]\n    metadata: Dict[str, Any] = field(default_factory=dict)\n\n@dataclass\nclass IntegrationResult:\n    \"\"\"Result of data integration\"\"\"\n    source: str\n    records_fetched: int\n    records_processed: int\n    errors: List[str]\n    last_updated: datetime\n    sample_data: List[Dict]\n\n@dataclass\nclass EnrichedData:\n    \"\"\"Data enriched with open data\"\"\"\n    original_data: Dict[str, Any]\n    enrichments: Dict[str, Any]\n    sources_used: List[str]\n    confidence: float\n\n\nclass OpenDataConnector(ABC):\n    \"\"\"Base class for open data connectors\"\"\"\n\n    @abstractmethod\n    def fetch(self, params: Dict) -> List[DataRecord]:\n        pass\n\n    @abstractmethod\n    def get_metadata(self) -> Dict:\n        pass\n\n\nclass WeatherDataConnector(OpenDataConnector):\n    \"\"\"Connector for weather data (e.g., OpenWeatherMap)\"\"\"\n\n    def __init__(self, api_key: Optional[str] = None):\n        self.api_key = api_key\n        self.base_url = \"https://api.openweathermap.org/data/2.5\"\n\n    def fetch(\n        self,\n        params: Dict\n    ) -> List[DataRecord]:\n        \"\"\"Fetch weather data for location\"\"\"\n        lat = params.get(\"lat\")\n        lon = params.get(\"lon\")\n        start_date = params.get(\"start_date\")\n        end_date = params.get(\"end_date\")\n\n        # Simulate API call (in production, use actual API)\n        records = []\n\n        # Generate sample historical data\n        current = start_date\n        while current <= end_date:\n            records.append(DataRecord(\n                source_id=\"openweathermap\",\n                timestamp=datetime.combine(current, datetime.min.time()),\n                data={\n                    \"date\": current.isoformat(),\n                    \"temp_max\": 25.0,\n                    \"temp_min\": 15.0,\n                    \"precipitation\": 0.0,\n                    \"wind_speed\": 10.0,\n                    \"weather_code\": \"clear\"\n                },\n                metadata={\"lat\": lat, \"lon\": lon}\n            ))\n            current = date(current.year, current.month, current.day + 1) if current.day < 28 else date(current.year, current.month + 1 if current.month < 12 else 1, 1)\n\n        return records[:30]  # Limit for demo\n\n    def get_metadata(self) -> Dict:\n        return {\n            \"source\": \"OpenWeatherMap\",\n            \"type\": DataSourceType.WEATHER.value,\n            \"frequency\": UpdateFrequency.HOURLY.value,\n            \"fields\": [\"temp_max\", \"temp_min\", \"precipitation\", \"wind_speed\"]\n        }\n\n\nclass MaterialPriceConnector(OpenDataConnector):\n    \"\"\"Connector for material price indices\"\"\"\n\n    def __init__(self, region: str = \"US\"):\n        self.region = region\n        self.price_indices = self._load_indices()\n\n    def _load_indices(self) -> Dict[str, Dict]:\n        \"\"\"Load material price indices\"\"\"\n        return {\n            \"concrete\": {\"base\": 100, \"current\": 125, \"trend\": \"up\"},\n            \"steel\": {\"base\": 100, \"current\": 145, \"trend\": \"up\"},\n            \"lumber\": {\"base\": 100, \"current\": 180, \"trend\": \"stable\"},\n            \"copper\": {\"base\": 100, \"current\": 135, \"trend\": \"up\"},\n            \"asphalt\": {\"base\": 100, \"current\": 115, \"trend\": \"stable\"},\n            \"gypsum\": {\"base\": 100, \"current\": 110, \"trend\": \"stable\"},\n            \"glass\": {\"base\": 100, \"current\": 105, \"trend\": \"down\"},\n            \"cement\": {\"base\": 100, \"current\": 120, \"trend\": \"up\"},\n        }\n\n    def fetch(self, params: Dict) -> List[DataRecord]:\n        \"\"\"Fetch material price data\"\"\"\n        materials = params.get(\"materials\", list(self.price_indices.keys()))\n\n        records = []\n        for material in materials:\n            if material in self.price_indices:\n                records.append(DataRecord(\n                    source_id=\"material_prices\",\n                    timestamp=datetime.now(),\n                    data={\n                        \"material\": material,\n                        \"region\": self.region,\n                        **self.price_indices[material]\n                    }\n                ))\n        return records\n\n    def get_metadata(self) -> Dict:\n        return {\n            \"source\": \"Material Price Index\",\n            \"type\": DataSourceType.MATERIAL_PRICES.value,\n            \"frequency\": UpdateFrequency.MONTHLY.value,\n            \"materials\": list(self.price_indices.keys())\n        }\n\n\nclass LaborRateConnector(OpenDataConnector):\n    \"\"\"Connector for labor rate data\"\"\"\n\n    def __init__(self, region: str = \"US\"):\n        self.region = region\n        self.labor_rates = self._load_rates()\n\n    def _load_rates(self) -> Dict[str, Dict]:\n        \"\"\"Load labor rates by trade\"\"\"\n        return {\n            \"carpenter\": {\"hourly\": 45.00, \"burden_rate\": 1.35},\n            \"electrician\": {\"hourly\": 55.00, \"burden_rate\": 1.40},\n            \"plumber\": {\"hourly\": 52.00, \"burden_rate\": 1.38},\n            \"ironworker\": {\"hourly\": 58.00, \"burden_rate\": 1.42},\n            \"laborer\": {\"hourly\": 32.00, \"burden_rate\": 1.30},\n            \"operator\": {\"hourly\": 48.00, \"burden_rate\": 1.35},\n            \"mason\": {\"hourly\": 50.00, \"burden_rate\": 1.36},\n            \"painter\": {\"hourly\": 38.00, \"burden_rate\": 1.32},\n            \"hvac_tech\": {\"hourly\": 54.00, \"burden_rate\": 1.38},\n            \"welder\": {\"hourly\": 52.00, \"burden_rate\": 1.40},\n        }\n\n    def fetch(self, params: Dict) -> List[DataRecord]:\n        \"\"\"Fetch labor rate data\"\"\"\n        trades = params.get(\"trades\", list(self.labor_rates.keys()))\n\n        records = []\n        for trade in trades:\n            if trade in self.labor_rates:\n                rate_data = self.labor_rates[trade]\n                records.append(DataRecord(\n                    source_id=\"labor_rates\",\n                    timestamp=datetime.now(),\n                    data={\n                        \"trade\": trade,\n                        \"region\": self.region,\n                        \"hourly_rate\": rate_data[\"hourly\"],\n                        \"burden_rate\": rate_data[\"burden_rate\"],\n                        \"fully_loaded\": rate_data[\"hourly\"] * rate_data[\"burden_rate\"]\n                    }\n                ))\n        return records\n\n    def get_metadata(self) -> Dict:\n        return {\n            \"source\": \"Labor Rate Database\",\n            \"type\": DataSourceType.LABOR_RATES.value,\n            \"frequency\": UpdateFrequency.QUARTERLY.value,\n            \"trades\": list(self.labor_rates.keys())\n        }\n\n\nclass BuildingPermitConnector(OpenDataConnector):\n    \"\"\"Connector for building permit data\"\"\"\n\n    def __init__(self, jurisdiction: str = \"default\"):\n        self.jurisdiction = jurisdiction\n\n    def fetch(self, params: Dict) -> List[DataRecord]:\n        \"\"\"Fetch permit data\"\"\"\n        # Simulate permit data\n        permit_types = [\"new_construction\", \"renovation\", \"addition\", \"demolition\"]\n\n        records = []\n        for ptype in permit_types:\n            records.append(DataRecord(\n                source_id=\"building_permits\",\n                timestamp=datetime.now(),\n                data={\n                    \"permit_type\": ptype,\n                    \"jurisdiction\": self.jurisdiction,\n                    \"count_ytd\": 150,\n                    \"total_value\": 25000000,\n                    \"avg_processing_days\": 21\n                }\n            ))\n        return records\n\n    def get_metadata(self) -> Dict:\n        return {\n            \"source\": \"Building Permit Database\",\n            \"type\": DataSourceType.BUILDING_PERMITS.value,\n            \"frequency\": UpdateFrequency.DAILY.value\n        }\n\n\nclass OpenDataIntegrator:\n    \"\"\"\n    Integrate open construction datasets.\n    Based on DDC methodology Chapter 2.2.\n    \"\"\"\n\n    def __init__(self, region: str = \"US\"):\n        self.region = region\n        self.connectors: Dict[str, OpenDataConnector] = {}\n        self.cache: Dict[str, List[DataRecord]] = {}\n        self.cache_expiry: Dict[str, datetime] = {}\n        self._register_default_connectors()\n\n    def _register_default_connectors(self):\n        \"\"\"Register default data connectors\"\"\"\n        self.register_connector(\"weather\", WeatherDataConnector())\n        self.register_connector(\"material_prices\", MaterialPriceConnector(self.region))\n        self.register_connector(\"labor_rates\", LaborRateConnector(self.region))\n        self.register_connector(\"permits\", BuildingPermitConnector())\n\n    def register_connector(\n        self,\n        name: str,\n        connector: OpenDataConnector\n    ):\n        \"\"\"Register a data connector\"\"\"\n        self.connectors[name] = connector\n\n    def fetch_data(\n        self,\n        source: str,\n        params: Optional[Dict] = None,\n        use_cache: bool = True\n    ) -> IntegrationResult:\n        \"\"\"\n        Fetch data from a source.\n\n        Args:\n            source: Name of the data source\n            params: Query parameters\n            use_cache: Whether to use cached data\n\n        Returns:\n            Integration result with fetched data\n        \"\"\"\n        if source not in self.connectors:\n            return IntegrationResult(\n                source=source,\n                records_fetched=0,\n                records_processed=0,\n                errors=[f\"Unknown source: {source}\"],\n                last_updated=datetime.now(),\n                sample_data=[]\n            )\n\n        # Check cache\n        cache_key = f\"{source}_{json.dumps(params or {}, sort_keys=True)}\"\n        if use_cache and cache_key in self.cache:\n            expiry = self.cache_expiry.get(cache_key)\n            if expiry and expiry > datetime.now():\n                cached = self.cache[cache_key]\n                return IntegrationResult(\n                    source=source,\n                    records_fetched=len(cached),\n                    records_processed=len(cached),\n                    errors=[],\n                    last_updated=expiry,\n                    sample_data=[r.data for r in cached[:5]]\n                )\n\n        # Fetch fresh data\n        connector = self.connectors[source]\n        errors = []\n\n        try:\n            records = connector.fetch(params or {})\n\n            # Cache the results\n            self.cache[cache_key] = records\n            self.cache_expiry[cache_key] = datetime.now()\n\n            return IntegrationResult(\n                source=source,\n                records_fetched=len(records),\n                records_processed=len(records),\n                errors=errors,\n                last_updated=datetime.now(),\n                sample_data=[r.data for r in records[:5]]\n            )\n\n        except Exception as e:\n            errors.append(str(e))\n            return IntegrationResult(\n                source=source,\n                records_fetched=0,\n                records_processed=0,\n                errors=errors,\n                last_updated=datetime.now(),\n                sample_data=[]\n            )\n\n    def enrich_project_data(\n        self,\n        project_data: Dict[str, Any],\n        enrichment_sources: Optional[List[str]] = None\n    ) -> EnrichedData:\n        \"\"\"\n        Enrich project data with open data.\n\n        Args:\n            project_data: Original project data\n            enrichment_sources: Sources to use for enrichment\n\n        Returns:\n            Enriched data\n        \"\"\"\n        sources = enrichment_sources or [\"material_prices\", \"labor_rates\", \"weather\"]\n        enrichments = {}\n        sources_used = []\n\n        # Material price enrichment\n        if \"material_prices\" in sources and \"materials\" in project_data:\n            materials = project_data[\"materials\"]\n            result = self.fetch_data(\"material_prices\", {\"materials\": materials})\n            if result.records_fetched > 0:\n                enrichments[\"material_price_indices\"] = result.sample_data\n                sources_used.append(\"material_prices\")\n\n        # Labor rate enrichment\n        if \"labor_rates\" in sources and \"trades\" in project_data:\n            trades = project_data[\"trades\"]\n            result = self.fetch_data(\"labor_rates\", {\"trades\": trades})\n            if result.records_fetched > 0:\n                enrichments[\"labor_rates\"] = result.sample_data\n                sources_used.append(\"labor_rates\")\n\n        # Weather enrichment\n        if \"weather\" in sources and \"location\" in project_data:\n            loc = project_data[\"location\"]\n            params = {\n                \"lat\": loc.get(\"lat\"),\n                \"lon\": loc.get(\"lon\"),\n                \"start_date\": project_data.get(\"start_date\", date.today()),\n                \"end_date\": project_data.get(\"end_date\", date.today())\n            }\n            result = self.fetch_data(\"weather\", params)\n            if result.records_fetched > 0:\n                enrichments[\"weather_forecast\"] = result.sample_data\n                sources_used.append(\"weather\")\n\n        # Calculate confidence based on enrichment success\n        confidence = len(sources_used) / len(sources) if sources else 0\n\n        return EnrichedData(\n            original_data=project_data,\n            enrichments=enrichments,\n            sources_used=sources_used,\n            confidence=confidence\n        )\n\n    def get_cost_indices(\n        self,\n        materials: Optional[List[str]] = None,\n        trades: Optional[List[str]] = None\n    ) -> Dict:\n        \"\"\"Get current cost indices\"\"\"\n        indices = {\n            \"timestamp\": datetime.now().isoformat(),\n            \"region\": self.region\n        }\n\n        if materials:\n            result = self.fetch_data(\"material_prices\", {\"materials\": materials})\n            indices[\"materials\"] = result.sample_data\n\n        if trades:\n            result = self.fetch_data(\"labor_rates\", {\"trades\": trades})\n            indices[\"labor\"] = result.sample_data\n\n        return indices\n\n    def get_weather_risk(\n        self,\n        lat: float,\n        lon: float,\n        start_date: date,\n        end_date: date\n    ) -> Dict:\n        \"\"\"Assess weather risk for project period\"\"\"\n        result = self.fetch_data(\"weather\", {\n            \"lat\": lat,\n            \"lon\": lon,\n            \"start_date\": start_date,\n            \"end_date\": end_date\n        })\n\n        if result.records_fetched == 0:\n            return {\"error\": \"No weather data available\"}\n\n        # Calculate risk metrics\n        rain_days = sum(1 for d in result.sample_data\n                       if d.get(\"precipitation\", 0) > 5)\n        extreme_temp_days = sum(1 for d in result.sample_data\n                               if d.get(\"temp_max\", 0) > 35 or d.get(\"temp_min\", 0) < 0)\n\n        total_days = len(result.sample_data)\n        risk_score = (rain_days + extreme_temp_days) / total_days if total_days > 0 else 0\n\n        return {\n            \"total_days\": total_days,\n            \"rain_days\": rain_days,\n            \"extreme_temperature_days\": extreme_temp_days,\n            \"risk_score\": risk_score,\n            \"risk_level\": \"high\" if risk_score > 0.3 else \"medium\" if risk_score > 0.1 else \"low\"\n        }\n\n    def list_sources(self) -> List[Dict]:\n        \"\"\"List all available data sources\"\"\"\n        sources = []\n        for name, connector in self.connectors.items():\n            meta = connector.get_metadata()\n            sources.append({\n                \"name\": name,\n                **meta\n            })\n        return sources\n\n    def generate_report(self) -> str:\n        \"\"\"Generate data availability report\"\"\"\n        output = \"\"\"\n# Open Data Integration Report\n\n## Available Sources\n\"\"\"\n        for source in self.list_sources():\n            output += f\"\"\"\n### {source['name'].title()}\n- **Type:** {source['type']}\n- **Update Frequency:** {source['frequency']}\n\"\"\"\n\n        output += \"\"\"\n## Cache Status\n\"\"\"\n        for key, expiry in self.cache_expiry.items():\n            status = \"valid\" if expiry > datetime.now() else \"expired\"\n            output += f\"- {key}: {status}\\n\"\n\n        return output"
      },
      {
        "title": "Fetch Material Prices",
        "body": "integrator = OpenDataIntegrator(region=\"US\")\n\n# Get material price indices\nresult = integrator.fetch_data(\"material_prices\", {\n    \"materials\": [\"concrete\", \"steel\", \"lumber\"]\n})\n\nprint(f\"Fetched: {result.records_fetched} records\")\nfor record in result.sample_data:\n    print(f\"  {record['material']}: index={record['current']}, trend={record['trend']}\")"
      },
      {
        "title": "Enrich Project Data",
        "body": "project = {\n    \"name\": \"Office Building\",\n    \"materials\": [\"concrete\", \"steel\", \"glass\"],\n    \"trades\": [\"carpenter\", \"electrician\", \"plumber\"],\n    \"location\": {\"lat\": 40.7128, \"lon\": -74.0060},\n    \"start_date\": date(2024, 6, 1),\n    \"end_date\": date(2024, 12, 31)\n}\n\nenriched = integrator.enrich_project_data(project)\n\nprint(f\"Sources used: {enriched.sources_used}\")\nprint(f\"Confidence: {enriched.confidence:.0%}\")\nprint(f\"Material indices: {enriched.enrichments.get('material_price_indices')}\")"
      },
      {
        "title": "Assess Weather Risk",
        "body": "risk = integrator.get_weather_risk(\n    lat=40.7128,\n    lon=-74.0060,\n    start_date=date(2024, 6, 1),\n    end_date=date(2024, 8, 31)\n)\n\nprint(f\"Risk Level: {risk['risk_level']}\")\nprint(f\"Rain Days: {risk['rain_days']}\")"
      },
      {
        "title": "Quick Reference",
        "body": "ComponentPurposeOpenDataIntegratorMain integration engineOpenDataConnectorBase connector classWeatherDataConnectorWeather API connectorMaterialPriceConnectorMaterial price indicesLaborRateConnectorLabor rate dataEnrichedDataEnriched data result"
      },
      {
        "title": "Resources",
        "body": "Book: \"Data-Driven Construction\" by Artem Boiko, Chapter 2.2\nWebsite: https://datadrivenconstruction.io"
      },
      {
        "title": "Next Steps",
        "body": "Use ontology-mapper for semantic mapping\nUse cost-prediction with indices\nUse weather-impact-analysis for scheduling"
      }
    ],
    "body": "Open Data Integrator\nOverview\n\nBased on DDC methodology (Chapter 2.2), this skill integrates open construction datasets from various sources like government databases, industry benchmarks, weather services, and geospatial data.\n\nBook Reference: \"Доминирование открытых данных\" / \"Open Data Dominance\"\n\nQuick Start\nfrom dataclasses import dataclass, field\nfrom enum import Enum\nfrom typing import List, Dict, Optional, Any, Callable\nfrom datetime import datetime, date\nimport json\nimport requests\nfrom abc import ABC, abstractmethod\n\nclass DataSourceType(Enum):\n    \"\"\"Types of open data sources\"\"\"\n    GOVERNMENT = \"government\"           # Government statistics\n    INDUSTRY_BENCHMARK = \"benchmark\"    # Industry benchmarks\n    WEATHER = \"weather\"                 # Weather data\n    GEOSPATIAL = \"geospatial\"           # Geographic data\n    MATERIAL_PRICES = \"material_prices\" # Material cost indices\n    LABOR_RATES = \"labor_rates\"         # Labor cost data\n    BUILDING_PERMITS = \"permits\"        # Permit data\n    ENERGY = \"energy\"                   # Energy prices/data\n    ECONOMIC = \"economic\"               # Economic indicators\n\nclass UpdateFrequency(Enum):\n    \"\"\"Data update frequency\"\"\"\n    REALTIME = \"realtime\"\n    HOURLY = \"hourly\"\n    DAILY = \"daily\"\n    WEEKLY = \"weekly\"\n    MONTHLY = \"monthly\"\n    QUARTERLY = \"quarterly\"\n    ANNUAL = \"annual\"\n\n@dataclass\nclass OpenDataSource:\n    \"\"\"Definition of an open data source\"\"\"\n    id: str\n    name: str\n    source_type: DataSourceType\n    url: str\n    api_key_required: bool = False\n    update_frequency: UpdateFrequency = UpdateFrequency.DAILY\n    format: str = \"json\"\n    license: str = \"open\"\n    description: Optional[str] = None\n    fields: List[str] = field(default_factory=list)\n\n@dataclass\nclass DataRecord:\n    \"\"\"A single data record from a source\"\"\"\n    source_id: str\n    timestamp: datetime\n    data: Dict[str, Any]\n    metadata: Dict[str, Any] = field(default_factory=dict)\n\n@dataclass\nclass IntegrationResult:\n    \"\"\"Result of data integration\"\"\"\n    source: str\n    records_fetched: int\n    records_processed: int\n    errors: List[str]\n    last_updated: datetime\n    sample_data: List[Dict]\n\n@dataclass\nclass EnrichedData:\n    \"\"\"Data enriched with open data\"\"\"\n    original_data: Dict[str, Any]\n    enrichments: Dict[str, Any]\n    sources_used: List[str]\n    confidence: float\n\n\nclass OpenDataConnector(ABC):\n    \"\"\"Base class for open data connectors\"\"\"\n\n    @abstractmethod\n    def fetch(self, params: Dict) -> List[DataRecord]:\n        pass\n\n    @abstractmethod\n    def get_metadata(self) -> Dict:\n        pass\n\n\nclass WeatherDataConnector(OpenDataConnector):\n    \"\"\"Connector for weather data (e.g., OpenWeatherMap)\"\"\"\n\n    def __init__(self, api_key: Optional[str] = None):\n        self.api_key = api_key\n        self.base_url = \"https://api.openweathermap.org/data/2.5\"\n\n    def fetch(\n        self,\n        params: Dict\n    ) -> List[DataRecord]:\n        \"\"\"Fetch weather data for location\"\"\"\n        lat = params.get(\"lat\")\n        lon = params.get(\"lon\")\n        start_date = params.get(\"start_date\")\n        end_date = params.get(\"end_date\")\n\n        # Simulate API call (in production, use actual API)\n        records = []\n\n        # Generate sample historical data\n        current = start_date\n        while current <= end_date:\n            records.append(DataRecord(\n                source_id=\"openweathermap\",\n                timestamp=datetime.combine(current, datetime.min.time()),\n                data={\n                    \"date\": current.isoformat(),\n                    \"temp_max\": 25.0,\n                    \"temp_min\": 15.0,\n                    \"precipitation\": 0.0,\n                    \"wind_speed\": 10.0,\n                    \"weather_code\": \"clear\"\n                },\n                metadata={\"lat\": lat, \"lon\": lon}\n            ))\n            current = date(current.year, current.month, current.day + 1) if current.day < 28 else date(current.year, current.month + 1 if current.month < 12 else 1, 1)\n\n        return records[:30]  # Limit for demo\n\n    def get_metadata(self) -> Dict:\n        return {\n            \"source\": \"OpenWeatherMap\",\n            \"type\": DataSourceType.WEATHER.value,\n            \"frequency\": UpdateFrequency.HOURLY.value,\n            \"fields\": [\"temp_max\", \"temp_min\", \"precipitation\", \"wind_speed\"]\n        }\n\n\nclass MaterialPriceConnector(OpenDataConnector):\n    \"\"\"Connector for material price indices\"\"\"\n\n    def __init__(self, region: str = \"US\"):\n        self.region = region\n        self.price_indices = self._load_indices()\n\n    def _load_indices(self) -> Dict[str, Dict]:\n        \"\"\"Load material price indices\"\"\"\n        return {\n            \"concrete\": {\"base\": 100, \"current\": 125, \"trend\": \"up\"},\n            \"steel\": {\"base\": 100, \"current\": 145, \"trend\": \"up\"},\n            \"lumber\": {\"base\": 100, \"current\": 180, \"trend\": \"stable\"},\n            \"copper\": {\"base\": 100, \"current\": 135, \"trend\": \"up\"},\n            \"asphalt\": {\"base\": 100, \"current\": 115, \"trend\": \"stable\"},\n            \"gypsum\": {\"base\": 100, \"current\": 110, \"trend\": \"stable\"},\n            \"glass\": {\"base\": 100, \"current\": 105, \"trend\": \"down\"},\n            \"cement\": {\"base\": 100, \"current\": 120, \"trend\": \"up\"},\n        }\n\n    def fetch(self, params: Dict) -> List[DataRecord]:\n        \"\"\"Fetch material price data\"\"\"\n        materials = params.get(\"materials\", list(self.price_indices.keys()))\n\n        records = []\n        for material in materials:\n            if material in self.price_indices:\n                records.append(DataRecord(\n                    source_id=\"material_prices\",\n                    timestamp=datetime.now(),\n                    data={\n                        \"material\": material,\n                        \"region\": self.region,\n                        **self.price_indices[material]\n                    }\n                ))\n        return records\n\n    def get_metadata(self) -> Dict:\n        return {\n            \"source\": \"Material Price Index\",\n            \"type\": DataSourceType.MATERIAL_PRICES.value,\n            \"frequency\": UpdateFrequency.MONTHLY.value,\n            \"materials\": list(self.price_indices.keys())\n        }\n\n\nclass LaborRateConnector(OpenDataConnector):\n    \"\"\"Connector for labor rate data\"\"\"\n\n    def __init__(self, region: str = \"US\"):\n        self.region = region\n        self.labor_rates = self._load_rates()\n\n    def _load_rates(self) -> Dict[str, Dict]:\n        \"\"\"Load labor rates by trade\"\"\"\n        return {\n            \"carpenter\": {\"hourly\": 45.00, \"burden_rate\": 1.35},\n            \"electrician\": {\"hourly\": 55.00, \"burden_rate\": 1.40},\n            \"plumber\": {\"hourly\": 52.00, \"burden_rate\": 1.38},\n            \"ironworker\": {\"hourly\": 58.00, \"burden_rate\": 1.42},\n            \"laborer\": {\"hourly\": 32.00, \"burden_rate\": 1.30},\n            \"operator\": {\"hourly\": 48.00, \"burden_rate\": 1.35},\n            \"mason\": {\"hourly\": 50.00, \"burden_rate\": 1.36},\n            \"painter\": {\"hourly\": 38.00, \"burden_rate\": 1.32},\n            \"hvac_tech\": {\"hourly\": 54.00, \"burden_rate\": 1.38},\n            \"welder\": {\"hourly\": 52.00, \"burden_rate\": 1.40},\n        }\n\n    def fetch(self, params: Dict) -> List[DataRecord]:\n        \"\"\"Fetch labor rate data\"\"\"\n        trades = params.get(\"trades\", list(self.labor_rates.keys()))\n\n        records = []\n        for trade in trades:\n            if trade in self.labor_rates:\n                rate_data = self.labor_rates[trade]\n                records.append(DataRecord(\n                    source_id=\"labor_rates\",\n                    timestamp=datetime.now(),\n                    data={\n                        \"trade\": trade,\n                        \"region\": self.region,\n                        \"hourly_rate\": rate_data[\"hourly\"],\n                        \"burden_rate\": rate_data[\"burden_rate\"],\n                        \"fully_loaded\": rate_data[\"hourly\"] * rate_data[\"burden_rate\"]\n                    }\n                ))\n        return records\n\n    def get_metadata(self) -> Dict:\n        return {\n            \"source\": \"Labor Rate Database\",\n            \"type\": DataSourceType.LABOR_RATES.value,\n            \"frequency\": UpdateFrequency.QUARTERLY.value,\n            \"trades\": list(self.labor_rates.keys())\n        }\n\n\nclass BuildingPermitConnector(OpenDataConnector):\n    \"\"\"Connector for building permit data\"\"\"\n\n    def __init__(self, jurisdiction: str = \"default\"):\n        self.jurisdiction = jurisdiction\n\n    def fetch(self, params: Dict) -> List[DataRecord]:\n        \"\"\"Fetch permit data\"\"\"\n        # Simulate permit data\n        permit_types = [\"new_construction\", \"renovation\", \"addition\", \"demolition\"]\n\n        records = []\n        for ptype in permit_types:\n            records.append(DataRecord(\n                source_id=\"building_permits\",\n                timestamp=datetime.now(),\n                data={\n                    \"permit_type\": ptype,\n                    \"jurisdiction\": self.jurisdiction,\n                    \"count_ytd\": 150,\n                    \"total_value\": 25000000,\n                    \"avg_processing_days\": 21\n                }\n            ))\n        return records\n\n    def get_metadata(self) -> Dict:\n        return {\n            \"source\": \"Building Permit Database\",\n            \"type\": DataSourceType.BUILDING_PERMITS.value,\n            \"frequency\": UpdateFrequency.DAILY.value\n        }\n\n\nclass OpenDataIntegrator:\n    \"\"\"\n    Integrate open construction datasets.\n    Based on DDC methodology Chapter 2.2.\n    \"\"\"\n\n    def __init__(self, region: str = \"US\"):\n        self.region = region\n        self.connectors: Dict[str, OpenDataConnector] = {}\n        self.cache: Dict[str, List[DataRecord]] = {}\n        self.cache_expiry: Dict[str, datetime] = {}\n        self._register_default_connectors()\n\n    def _register_default_connectors(self):\n        \"\"\"Register default data connectors\"\"\"\n        self.register_connector(\"weather\", WeatherDataConnector())\n        self.register_connector(\"material_prices\", MaterialPriceConnector(self.region))\n        self.register_connector(\"labor_rates\", LaborRateConnector(self.region))\n        self.register_connector(\"permits\", BuildingPermitConnector())\n\n    def register_connector(\n        self,\n        name: str,\n        connector: OpenDataConnector\n    ):\n        \"\"\"Register a data connector\"\"\"\n        self.connectors[name] = connector\n\n    def fetch_data(\n        self,\n        source: str,\n        params: Optional[Dict] = None,\n        use_cache: bool = True\n    ) -> IntegrationResult:\n        \"\"\"\n        Fetch data from a source.\n\n        Args:\n            source: Name of the data source\n            params: Query parameters\n            use_cache: Whether to use cached data\n\n        Returns:\n            Integration result with fetched data\n        \"\"\"\n        if source not in self.connectors:\n            return IntegrationResult(\n                source=source,\n                records_fetched=0,\n                records_processed=0,\n                errors=[f\"Unknown source: {source}\"],\n                last_updated=datetime.now(),\n                sample_data=[]\n            )\n\n        # Check cache\n        cache_key = f\"{source}_{json.dumps(params or {}, sort_keys=True)}\"\n        if use_cache and cache_key in self.cache:\n            expiry = self.cache_expiry.get(cache_key)\n            if expiry and expiry > datetime.now():\n                cached = self.cache[cache_key]\n                return IntegrationResult(\n                    source=source,\n                    records_fetched=len(cached),\n                    records_processed=len(cached),\n                    errors=[],\n                    last_updated=expiry,\n                    sample_data=[r.data for r in cached[:5]]\n                )\n\n        # Fetch fresh data\n        connector = self.connectors[source]\n        errors = []\n\n        try:\n            records = connector.fetch(params or {})\n\n            # Cache the results\n            self.cache[cache_key] = records\n            self.cache_expiry[cache_key] = datetime.now()\n\n            return IntegrationResult(\n                source=source,\n                records_fetched=len(records),\n                records_processed=len(records),\n                errors=errors,\n                last_updated=datetime.now(),\n                sample_data=[r.data for r in records[:5]]\n            )\n\n        except Exception as e:\n            errors.append(str(e))\n            return IntegrationResult(\n                source=source,\n                records_fetched=0,\n                records_processed=0,\n                errors=errors,\n                last_updated=datetime.now(),\n                sample_data=[]\n            )\n\n    def enrich_project_data(\n        self,\n        project_data: Dict[str, Any],\n        enrichment_sources: Optional[List[str]] = None\n    ) -> EnrichedData:\n        \"\"\"\n        Enrich project data with open data.\n\n        Args:\n            project_data: Original project data\n            enrichment_sources: Sources to use for enrichment\n\n        Returns:\n            Enriched data\n        \"\"\"\n        sources = enrichment_sources or [\"material_prices\", \"labor_rates\", \"weather\"]\n        enrichments = {}\n        sources_used = []\n\n        # Material price enrichment\n        if \"material_prices\" in sources and \"materials\" in project_data:\n            materials = project_data[\"materials\"]\n            result = self.fetch_data(\"material_prices\", {\"materials\": materials})\n            if result.records_fetched > 0:\n                enrichments[\"material_price_indices\"] = result.sample_data\n                sources_used.append(\"material_prices\")\n\n        # Labor rate enrichment\n        if \"labor_rates\" in sources and \"trades\" in project_data:\n            trades = project_data[\"trades\"]\n            result = self.fetch_data(\"labor_rates\", {\"trades\": trades})\n            if result.records_fetched > 0:\n                enrichments[\"labor_rates\"] = result.sample_data\n                sources_used.append(\"labor_rates\")\n\n        # Weather enrichment\n        if \"weather\" in sources and \"location\" in project_data:\n            loc = project_data[\"location\"]\n            params = {\n                \"lat\": loc.get(\"lat\"),\n                \"lon\": loc.get(\"lon\"),\n                \"start_date\": project_data.get(\"start_date\", date.today()),\n                \"end_date\": project_data.get(\"end_date\", date.today())\n            }\n            result = self.fetch_data(\"weather\", params)\n            if result.records_fetched > 0:\n                enrichments[\"weather_forecast\"] = result.sample_data\n                sources_used.append(\"weather\")\n\n        # Calculate confidence based on enrichment success\n        confidence = len(sources_used) / len(sources) if sources else 0\n\n        return EnrichedData(\n            original_data=project_data,\n            enrichments=enrichments,\n            sources_used=sources_used,\n            confidence=confidence\n        )\n\n    def get_cost_indices(\n        self,\n        materials: Optional[List[str]] = None,\n        trades: Optional[List[str]] = None\n    ) -> Dict:\n        \"\"\"Get current cost indices\"\"\"\n        indices = {\n            \"timestamp\": datetime.now().isoformat(),\n            \"region\": self.region\n        }\n\n        if materials:\n            result = self.fetch_data(\"material_prices\", {\"materials\": materials})\n            indices[\"materials\"] = result.sample_data\n\n        if trades:\n            result = self.fetch_data(\"labor_rates\", {\"trades\": trades})\n            indices[\"labor\"] = result.sample_data\n\n        return indices\n\n    def get_weather_risk(\n        self,\n        lat: float,\n        lon: float,\n        start_date: date,\n        end_date: date\n    ) -> Dict:\n        \"\"\"Assess weather risk for project period\"\"\"\n        result = self.fetch_data(\"weather\", {\n            \"lat\": lat,\n            \"lon\": lon,\n            \"start_date\": start_date,\n            \"end_date\": end_date\n        })\n\n        if result.records_fetched == 0:\n            return {\"error\": \"No weather data available\"}\n\n        # Calculate risk metrics\n        rain_days = sum(1 for d in result.sample_data\n                       if d.get(\"precipitation\", 0) > 5)\n        extreme_temp_days = sum(1 for d in result.sample_data\n                               if d.get(\"temp_max\", 0) > 35 or d.get(\"temp_min\", 0) < 0)\n\n        total_days = len(result.sample_data)\n        risk_score = (rain_days + extreme_temp_days) / total_days if total_days > 0 else 0\n\n        return {\n            \"total_days\": total_days,\n            \"rain_days\": rain_days,\n            \"extreme_temperature_days\": extreme_temp_days,\n            \"risk_score\": risk_score,\n            \"risk_level\": \"high\" if risk_score > 0.3 else \"medium\" if risk_score > 0.1 else \"low\"\n        }\n\n    def list_sources(self) -> List[Dict]:\n        \"\"\"List all available data sources\"\"\"\n        sources = []\n        for name, connector in self.connectors.items():\n            meta = connector.get_metadata()\n            sources.append({\n                \"name\": name,\n                **meta\n            })\n        return sources\n\n    def generate_report(self) -> str:\n        \"\"\"Generate data availability report\"\"\"\n        output = \"\"\"\n# Open Data Integration Report\n\n## Available Sources\n\"\"\"\n        for source in self.list_sources():\n            output += f\"\"\"\n### {source['name'].title()}\n- **Type:** {source['type']}\n- **Update Frequency:** {source['frequency']}\n\"\"\"\n\n        output += \"\"\"\n## Cache Status\n\"\"\"\n        for key, expiry in self.cache_expiry.items():\n            status = \"valid\" if expiry > datetime.now() else \"expired\"\n            output += f\"- {key}: {status}\\n\"\n\n        return output\n\nCommon Use Cases\nFetch Material Prices\nintegrator = OpenDataIntegrator(region=\"US\")\n\n# Get material price indices\nresult = integrator.fetch_data(\"material_prices\", {\n    \"materials\": [\"concrete\", \"steel\", \"lumber\"]\n})\n\nprint(f\"Fetched: {result.records_fetched} records\")\nfor record in result.sample_data:\n    print(f\"  {record['material']}: index={record['current']}, trend={record['trend']}\")\n\nEnrich Project Data\nproject = {\n    \"name\": \"Office Building\",\n    \"materials\": [\"concrete\", \"steel\", \"glass\"],\n    \"trades\": [\"carpenter\", \"electrician\", \"plumber\"],\n    \"location\": {\"lat\": 40.7128, \"lon\": -74.0060},\n    \"start_date\": date(2024, 6, 1),\n    \"end_date\": date(2024, 12, 31)\n}\n\nenriched = integrator.enrich_project_data(project)\n\nprint(f\"Sources used: {enriched.sources_used}\")\nprint(f\"Confidence: {enriched.confidence:.0%}\")\nprint(f\"Material indices: {enriched.enrichments.get('material_price_indices')}\")\n\nAssess Weather Risk\nrisk = integrator.get_weather_risk(\n    lat=40.7128,\n    lon=-74.0060,\n    start_date=date(2024, 6, 1),\n    end_date=date(2024, 8, 31)\n)\n\nprint(f\"Risk Level: {risk['risk_level']}\")\nprint(f\"Rain Days: {risk['rain_days']}\")\n\nQuick Reference\nComponent\tPurpose\nOpenDataIntegrator\tMain integration engine\nOpenDataConnector\tBase connector class\nWeatherDataConnector\tWeather API connector\nMaterialPriceConnector\tMaterial price indices\nLaborRateConnector\tLabor rate data\nEnrichedData\tEnriched data result\nResources\nBook: \"Data-Driven Construction\" by Artem Boiko, Chapter 2.2\nWebsite: https://datadrivenconstruction.io\nNext Steps\nUse ontology-mapper for semantic mapping\nUse cost-prediction with indices\nUse weather-impact-analysis for scheduling"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/datadrivenconstruction/open-data-integrator",
    "publisherUrl": "https://clawhub.ai/datadrivenconstruction/open-data-integrator",
    "owner": "datadrivenconstruction",
    "version": "2.1.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/open-data-integrator",
    "downloadUrl": "https://openagent3.xyz/downloads/open-data-integrator",
    "agentUrl": "https://openagent3.xyz/skills/open-data-integrator/agent",
    "manifestUrl": "https://openagent3.xyz/skills/open-data-integrator/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/open-data-integrator/agent.md"
  }
}