{
  "schemaVersion": "1.0",
  "item": {
    "slug": "historical-data-manager",
    "name": "Historical Data Manager",
    "source": "tencent",
    "type": "skill",
    "category": "数据分析",
    "sourceUrl": "https://clawhub.ai/datadrivenconstruction/historical-data-manager",
    "canonicalUrl": "https://clawhub.ai/datadrivenconstruction/historical-data-manager",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/historical-data-manager",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=historical-data-manager",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "claw.json",
      "instructions.md",
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/historical-data-manager"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/historical-data-manager",
    "agentPageUrl": "https://openagent3.xyz/skills/historical-data-manager/agent",
    "manifestUrl": "https://openagent3.xyz/skills/historical-data-manager/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/historical-data-manager/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Overview",
        "body": "Manage legacy construction data from archives, old systems, and historical records. Extract, clean, normalize, and migrate data into modern formats for analysis and benchmarking."
      },
      {
        "title": "Business Case",
        "body": "Construction companies accumulate decades of project data in various formats:\n\nPaper records scanned to PDF\nLegacy database exports (Access, dBase, FoxPro)\nOld spreadsheet formats (Lotus 1-2-3, early Excel)\nProprietary software exports\nProject closeout documentation\n\nThis skill helps extract value from historical data for:\n\nCost benchmarking and trending\nProductivity analysis over time\nRisk pattern identification\nEstimating improvement"
      },
      {
        "title": "Historical Data Extractor",
        "body": "from dataclasses import dataclass, field\nfrom typing import List, Dict, Any, Optional\nfrom datetime import datetime\nfrom pathlib import Path\nimport pandas as pd\nimport re\nimport json\n\n@dataclass\nclass HistoricalRecord:\n    project_id: str\n    project_name: str\n    year: int\n    data_type: str  # cost, schedule, labor, material\n    original_format: str\n    extracted_data: Dict[str, Any]\n    quality_score: float\n    notes: List[str] = field(default_factory=list)\n\nclass HistoricalDataManager:\n    \"\"\"Manage extraction and normalization of historical construction data.\"\"\"\n\n    def __init__(self, archive_path: str):\n        self.archive_path = Path(archive_path)\n        self.records: List[HistoricalRecord] = []\n        self.normalization_rules = self._load_normalization_rules()\n\n    def scan_archive(self) -> Dict[str, int]:\n        \"\"\"Scan archive and categorize files by type.\"\"\"\n        file_types = {}\n\n        for file_path in self.archive_path.rglob('*'):\n            if file_path.is_file():\n                ext = file_path.suffix.lower()\n                file_types[ext] = file_types.get(ext, 0) + 1\n\n        return file_types\n\n    def extract_from_legacy_excel(self, file_path: str, year: int) -> List[HistoricalRecord]:\n        \"\"\"Extract data from legacy Excel files.\"\"\"\n        records = []\n\n        try:\n            # Try different engines for old formats\n            try:\n                df = pd.read_excel(file_path, engine='openpyxl')\n            except:\n                df = pd.read_excel(file_path, engine='xlrd')\n\n            # Detect data type from content\n            data_type = self._detect_data_type(df)\n\n            # Normalize column names\n            df = self._normalize_columns(df)\n\n            # Extract project info\n            project_info = self._extract_project_info(df, file_path)\n\n            record = HistoricalRecord(\n                project_id=project_info.get('id', f'LEGACY-{year}-{hash(file_path) % 10000}'),\n                project_name=project_info.get('name', Path(file_path).stem),\n                year=year,\n                data_type=data_type,\n                original_format='excel',\n                extracted_data=df.to_dict('records'),\n                quality_score=self._assess_quality(df)\n            )\n            records.append(record)\n\n        except Exception as e:\n            print(f\"Error extracting {file_path}: {e}\")\n\n        return records\n\n    def extract_from_csv(self, file_path: str, year: int) -> HistoricalRecord:\n        \"\"\"Extract data from CSV files with encoding detection.\"\"\"\n        # Try different encodings\n        encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']\n\n        for encoding in encodings:\n            try:\n                df = pd.read_csv(file_path, encoding=encoding)\n                break\n            except:\n                continue\n\n        df = self._normalize_columns(df)\n        data_type = self._detect_data_type(df)\n\n        return HistoricalRecord(\n            project_id=f'CSV-{year}-{hash(file_path) % 10000}',\n            project_name=Path(file_path).stem,\n            year=year,\n            data_type=data_type,\n            original_format='csv',\n            extracted_data=df.to_dict('records'),\n            quality_score=self._assess_quality(df)\n        )\n\n    def extract_from_database_export(self, file_path: str, db_type: str) -> List[HistoricalRecord]:\n        \"\"\"Extract data from legacy database exports.\"\"\"\n        records = []\n\n        if db_type == 'access':\n            # Read Access MDB/ACCDB files\n            import pyodbc\n            conn_str = f'DRIVER={{Microsoft Access Driver (*.mdb, *.accdb)}};DBQ={file_path}'\n            conn = pyodbc.connect(conn_str)\n\n            # Get all tables\n            cursor = conn.cursor()\n            tables = [row.table_name for row in cursor.tables(tableType='TABLE')]\n\n            for table in tables:\n                df = pd.read_sql(f'SELECT * FROM [{table}]', conn)\n                # Process each table...\n\n            conn.close()\n\n        return records\n\n    def normalize_cost_data(self, records: List[HistoricalRecord], base_year: int = 2026) -> pd.DataFrame:\n        \"\"\"Normalize historical cost data to current dollars.\"\"\"\n        # RSMeans historical cost indices (example values)\n        cost_indices = {\n            2015: 0.82, 2016: 0.84, 2017: 0.87, 2018: 0.90,\n            2019: 0.93, 2020: 0.95, 2021: 0.98, 2022: 1.02,\n            2023: 1.06, 2024: 1.10, 2025: 1.14, 2026: 1.18\n        }\n\n        normalized_data = []\n\n        for record in records:\n            if record.data_type == 'cost':\n                year_index = cost_indices.get(record.year, 1.0)\n                base_index = cost_indices.get(base_year, 1.18)\n                escalation_factor = base_index / year_index\n\n                for item in record.extracted_data:\n                    if 'amount' in item or 'cost' in item:\n                        original_cost = item.get('amount') or item.get('cost', 0)\n                        normalized_item = item.copy()\n                        normalized_item['original_cost'] = original_cost\n                        normalized_item['normalized_cost'] = original_cost * escalation_factor\n                        normalized_item['escalation_factor'] = escalation_factor\n                        normalized_item['original_year'] = record.year\n                        normalized_item['project_id'] = record.project_id\n                        normalized_data.append(normalized_item)\n\n        return pd.DataFrame(normalized_data)\n\n    def _detect_data_type(self, df: pd.DataFrame) -> str:\n        \"\"\"Detect type of data from column names and content.\"\"\"\n        columns_lower = [c.lower() for c in df.columns]\n\n        if any(c in columns_lower for c in ['cost', 'amount', 'price', 'total', 'budget']):\n            return 'cost'\n        elif any(c in columns_lower for c in ['start', 'finish', 'duration', 'task', 'activity']):\n            return 'schedule'\n        elif any(c in columns_lower for c in ['hours', 'labor', 'worker', 'crew']):\n            return 'labor'\n        elif any(c in columns_lower for c in ['material', 'quantity', 'unit', 'supplier']):\n            return 'material'\n        else:\n            return 'unknown'\n\n    def _normalize_columns(self, df: pd.DataFrame) -> pd.DataFrame:\n        \"\"\"Normalize column names to standard format.\"\"\"\n        column_mapping = {\n            r'proj.*id': 'project_id',\n            r'proj.*name': 'project_name',\n            r'desc.*': 'description',\n            r'qty|quantity': 'quantity',\n            r'unit.*cost|unit.*price': 'unit_cost',\n            r'total|amount': 'amount',\n            r'start.*date': 'start_date',\n            r'end.*date|finish.*date': 'end_date',\n            r'dur.*': 'duration',\n        }\n\n        new_columns = {}\n        for col in df.columns:\n            col_lower = col.lower().strip()\n            for pattern, new_name in column_mapping.items():\n                if re.match(pattern, col_lower):\n                    new_columns[col] = new_name\n                    break\n\n        return df.rename(columns=new_columns)\n\n    def _assess_quality(self, df: pd.DataFrame) -> float:\n        \"\"\"Assess data quality score (0-1).\"\"\"\n        if df.empty:\n            return 0.0\n\n        scores = []\n\n        # Completeness: % of non-null values\n        completeness = 1 - (df.isnull().sum().sum() / df.size)\n        scores.append(completeness)\n\n        # Column quality: has meaningful column names\n        meaningful_cols = sum(1 for c in df.columns if len(c) > 2 and not c.startswith('Unnamed'))\n        col_quality = meaningful_cols / len(df.columns)\n        scores.append(col_quality)\n\n        # Row count: more data is better (capped at 1.0)\n        row_score = min(len(df) / 100, 1.0)\n        scores.append(row_score)\n\n        return sum(scores) / len(scores)\n\n    def _extract_project_info(self, df: pd.DataFrame, file_path: str) -> Dict[str, str]:\n        \"\"\"Extract project info from data or filename.\"\"\"\n        info = {}\n\n        # Try to find project info in data\n        for col in df.columns:\n            if 'project' in col.lower() and 'id' in col.lower():\n                info['id'] = str(df[col].iloc[0]) if not df[col].empty else None\n            if 'project' in col.lower() and 'name' in col.lower():\n                info['name'] = str(df[col].iloc[0]) if not df[col].empty else None\n\n        # Fallback to filename\n        if 'name' not in info:\n            info['name'] = Path(file_path).stem\n\n        return info\n\n    def _load_normalization_rules(self) -> Dict:\n        \"\"\"Load rules for normalizing legacy data.\"\"\"\n        return {\n            'unit_conversions': {\n                'M': 1000,  # Thousand\n                'C': 100,   # Hundred\n                'LF': 1,    # Linear Foot\n                'SF': 1,    # Square Foot\n                'CY': 1,    # Cubic Yard\n            },\n            'date_formats': [\n                '%m/%d/%Y', '%m/%d/%y', '%Y-%m-%d',\n                '%d-%b-%Y', '%B %d, %Y'\n            ]\n        }\n\n    def generate_migration_report(self) -> str:\n        \"\"\"Generate report on migrated data.\"\"\"\n        report = [\"# Historical Data Migration Report\", \"\"]\n\n        # Summary\n        report.append(\"## Summary\")\n        report.append(f\"- Total Records: {len(self.records)}\")\n\n        by_type = {}\n        by_year = {}\n        for r in self.records:\n            by_type[r.data_type] = by_type.get(r.data_type, 0) + 1\n            by_year[r.year] = by_year.get(r.year, 0) + 1\n\n        report.append(\"\\n### By Data Type\")\n        for dt, count in sorted(by_type.items()):\n            report.append(f\"- {dt}: {count}\")\n\n        report.append(\"\\n### By Year\")\n        for year, count in sorted(by_year.items()):\n            report.append(f\"- {year}: {count}\")\n\n        # Quality Assessment\n        report.append(\"\\n## Data Quality\")\n        avg_quality = sum(r.quality_score for r in self.records) / len(self.records) if self.records else 0\n        report.append(f\"- Average Quality Score: {avg_quality:.2%}\")\n\n        low_quality = [r for r in self.records if r.quality_score < 0.5]\n        if low_quality:\n            report.append(f\"\\n### Low Quality Records ({len(low_quality)})\")\n            for r in low_quality[:10]:\n                report.append(f\"- {r.project_name} ({r.year}): {r.quality_score:.2%}\")\n\n        return \"\\n\".join(report)"
      },
      {
        "title": "Legacy System Connectors",
        "body": "class LegacySystemConnector:\n    \"\"\"Connect to various legacy construction systems.\"\"\"\n\n    @staticmethod\n    def read_timberline_export(file_path: str) -> pd.DataFrame:\n        \"\"\"Read Sage Timberline (now Sage 300) export files.\"\"\"\n        # Timberline exports typically have specific format\n        df = pd.read_csv(file_path, encoding='cp1252')\n\n        # Map Timberline columns to standard\n        column_map = {\n            'JOB': 'project_id',\n            'PHASE': 'phase_code',\n            'CATEGORY': 'cost_code',\n            'DESCRIPTION': 'description',\n            'ESTIMATE': 'estimated_cost',\n            'ACTUAL': 'actual_cost',\n            'COMMITTED': 'committed_cost'\n        }\n\n        return df.rename(columns=column_map)\n\n    @staticmethod\n    def read_primavera_xer(file_path: str) -> Dict[str, pd.DataFrame]:\n        \"\"\"Read Primavera P6 XER export files.\"\"\"\n        tables = {}\n        current_table = None\n        current_data = []\n        columns = []\n\n        with open(file_path, 'r', encoding='utf-8') as f:\n            for line in f:\n                line = line.strip()\n                if line.startswith('%T'):\n                    # Save previous table\n                    if current_table and current_data:\n                        tables[current_table] = pd.DataFrame(current_data, columns=columns)\n                    # Start new table\n                    current_table = line.split('\\t')[1] if '\\t' in line else None\n                    current_data = []\n                    columns = []\n                elif line.startswith('%F'):\n                    # Field definitions\n                    columns = line.split('\\t')[1:]\n                elif line.startswith('%R'):\n                    # Data row\n                    current_data.append(line.split('\\t')[1:])\n\n        # Save last table\n        if current_table and current_data:\n            tables[current_table] = pd.DataFrame(current_data, columns=columns)\n\n        return tables\n\n    @staticmethod\n    def read_mc2_ice(file_path: str) -> pd.DataFrame:\n        \"\"\"Read MC2 ICE estimating export.\"\"\"\n        # MC2 ICE format handling\n        pass"
      },
      {
        "title": "Quick Start",
        "body": "# Initialize manager\nmanager = HistoricalDataManager('/archive/projects')\n\n# Scan archive\nfile_types = manager.scan_archive()\nprint(f\"Found: {file_types}\")\n\n# Extract from legacy Excel files\nfor year in range(2015, 2024):\n    year_path = f'/archive/projects/{year}'\n    for file in Path(year_path).glob('*.xls*'):\n        records = manager.extract_from_legacy_excel(str(file), year)\n        manager.records.extend(records)\n\n# Normalize cost data to 2026 dollars\ncost_records = [r for r in manager.records if r.data_type == 'cost']\nnormalized_costs = manager.normalize_cost_data(cost_records, base_year=2026)\n\n# Generate migration report\nreport = manager.generate_migration_report()\nprint(report)\n\n# Export for analysis\nnormalized_costs.to_excel('historical_costs_normalized.xlsx', index=False)"
      },
      {
        "title": "Common Use Cases",
        "body": "Cost Benchmarking: Normalize historical costs for comparison\nProductivity Analysis: Track labor productivity over time\nRisk Identification: Find patterns in historical project issues\nEstimating Calibration: Improve estimates with historical data"
      },
      {
        "title": "Dependencies",
        "body": "pip install pandas openpyxl xlrd pyodbc"
      },
      {
        "title": "Resources",
        "body": "RSMeans Historical Cost Index: For cost escalation\nENR Construction Cost Index: Alternative escalation source\nLegacy Format Documentation: Vendor-specific export formats"
      }
    ],
    "body": "Historical Data Manager for Construction\nOverview\n\nManage legacy construction data from archives, old systems, and historical records. Extract, clean, normalize, and migrate data into modern formats for analysis and benchmarking.\n\nBusiness Case\n\nConstruction companies accumulate decades of project data in various formats:\n\nPaper records scanned to PDF\nLegacy database exports (Access, dBase, FoxPro)\nOld spreadsheet formats (Lotus 1-2-3, early Excel)\nProprietary software exports\nProject closeout documentation\n\nThis skill helps extract value from historical data for:\n\nCost benchmarking and trending\nProductivity analysis over time\nRisk pattern identification\nEstimating improvement\nTechnical Implementation\nHistorical Data Extractor\nfrom dataclasses import dataclass, field\nfrom typing import List, Dict, Any, Optional\nfrom datetime import datetime\nfrom pathlib import Path\nimport pandas as pd\nimport re\nimport json\n\n@dataclass\nclass HistoricalRecord:\n    project_id: str\n    project_name: str\n    year: int\n    data_type: str  # cost, schedule, labor, material\n    original_format: str\n    extracted_data: Dict[str, Any]\n    quality_score: float\n    notes: List[str] = field(default_factory=list)\n\nclass HistoricalDataManager:\n    \"\"\"Manage extraction and normalization of historical construction data.\"\"\"\n\n    def __init__(self, archive_path: str):\n        self.archive_path = Path(archive_path)\n        self.records: List[HistoricalRecord] = []\n        self.normalization_rules = self._load_normalization_rules()\n\n    def scan_archive(self) -> Dict[str, int]:\n        \"\"\"Scan archive and categorize files by type.\"\"\"\n        file_types = {}\n\n        for file_path in self.archive_path.rglob('*'):\n            if file_path.is_file():\n                ext = file_path.suffix.lower()\n                file_types[ext] = file_types.get(ext, 0) + 1\n\n        return file_types\n\n    def extract_from_legacy_excel(self, file_path: str, year: int) -> List[HistoricalRecord]:\n        \"\"\"Extract data from legacy Excel files.\"\"\"\n        records = []\n\n        try:\n            # Try different engines for old formats\n            try:\n                df = pd.read_excel(file_path, engine='openpyxl')\n            except:\n                df = pd.read_excel(file_path, engine='xlrd')\n\n            # Detect data type from content\n            data_type = self._detect_data_type(df)\n\n            # Normalize column names\n            df = self._normalize_columns(df)\n\n            # Extract project info\n            project_info = self._extract_project_info(df, file_path)\n\n            record = HistoricalRecord(\n                project_id=project_info.get('id', f'LEGACY-{year}-{hash(file_path) % 10000}'),\n                project_name=project_info.get('name', Path(file_path).stem),\n                year=year,\n                data_type=data_type,\n                original_format='excel',\n                extracted_data=df.to_dict('records'),\n                quality_score=self._assess_quality(df)\n            )\n            records.append(record)\n\n        except Exception as e:\n            print(f\"Error extracting {file_path}: {e}\")\n\n        return records\n\n    def extract_from_csv(self, file_path: str, year: int) -> HistoricalRecord:\n        \"\"\"Extract data from CSV files with encoding detection.\"\"\"\n        # Try different encodings\n        encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']\n\n        for encoding in encodings:\n            try:\n                df = pd.read_csv(file_path, encoding=encoding)\n                break\n            except:\n                continue\n\n        df = self._normalize_columns(df)\n        data_type = self._detect_data_type(df)\n\n        return HistoricalRecord(\n            project_id=f'CSV-{year}-{hash(file_path) % 10000}',\n            project_name=Path(file_path).stem,\n            year=year,\n            data_type=data_type,\n            original_format='csv',\n            extracted_data=df.to_dict('records'),\n            quality_score=self._assess_quality(df)\n        )\n\n    def extract_from_database_export(self, file_path: str, db_type: str) -> List[HistoricalRecord]:\n        \"\"\"Extract data from legacy database exports.\"\"\"\n        records = []\n\n        if db_type == 'access':\n            # Read Access MDB/ACCDB files\n            import pyodbc\n            conn_str = f'DRIVER={{Microsoft Access Driver (*.mdb, *.accdb)}};DBQ={file_path}'\n            conn = pyodbc.connect(conn_str)\n\n            # Get all tables\n            cursor = conn.cursor()\n            tables = [row.table_name for row in cursor.tables(tableType='TABLE')]\n\n            for table in tables:\n                df = pd.read_sql(f'SELECT * FROM [{table}]', conn)\n                # Process each table...\n\n            conn.close()\n\n        return records\n\n    def normalize_cost_data(self, records: List[HistoricalRecord], base_year: int = 2026) -> pd.DataFrame:\n        \"\"\"Normalize historical cost data to current dollars.\"\"\"\n        # RSMeans historical cost indices (example values)\n        cost_indices = {\n            2015: 0.82, 2016: 0.84, 2017: 0.87, 2018: 0.90,\n            2019: 0.93, 2020: 0.95, 2021: 0.98, 2022: 1.02,\n            2023: 1.06, 2024: 1.10, 2025: 1.14, 2026: 1.18\n        }\n\n        normalized_data = []\n\n        for record in records:\n            if record.data_type == 'cost':\n                year_index = cost_indices.get(record.year, 1.0)\n                base_index = cost_indices.get(base_year, 1.18)\n                escalation_factor = base_index / year_index\n\n                for item in record.extracted_data:\n                    if 'amount' in item or 'cost' in item:\n                        original_cost = item.get('amount') or item.get('cost', 0)\n                        normalized_item = item.copy()\n                        normalized_item['original_cost'] = original_cost\n                        normalized_item['normalized_cost'] = original_cost * escalation_factor\n                        normalized_item['escalation_factor'] = escalation_factor\n                        normalized_item['original_year'] = record.year\n                        normalized_item['project_id'] = record.project_id\n                        normalized_data.append(normalized_item)\n\n        return pd.DataFrame(normalized_data)\n\n    def _detect_data_type(self, df: pd.DataFrame) -> str:\n        \"\"\"Detect type of data from column names and content.\"\"\"\n        columns_lower = [c.lower() for c in df.columns]\n\n        if any(c in columns_lower for c in ['cost', 'amount', 'price', 'total', 'budget']):\n            return 'cost'\n        elif any(c in columns_lower for c in ['start', 'finish', 'duration', 'task', 'activity']):\n            return 'schedule'\n        elif any(c in columns_lower for c in ['hours', 'labor', 'worker', 'crew']):\n            return 'labor'\n        elif any(c in columns_lower for c in ['material', 'quantity', 'unit', 'supplier']):\n            return 'material'\n        else:\n            return 'unknown'\n\n    def _normalize_columns(self, df: pd.DataFrame) -> pd.DataFrame:\n        \"\"\"Normalize column names to standard format.\"\"\"\n        column_mapping = {\n            r'proj.*id': 'project_id',\n            r'proj.*name': 'project_name',\n            r'desc.*': 'description',\n            r'qty|quantity': 'quantity',\n            r'unit.*cost|unit.*price': 'unit_cost',\n            r'total|amount': 'amount',\n            r'start.*date': 'start_date',\n            r'end.*date|finish.*date': 'end_date',\n            r'dur.*': 'duration',\n        }\n\n        new_columns = {}\n        for col in df.columns:\n            col_lower = col.lower().strip()\n            for pattern, new_name in column_mapping.items():\n                if re.match(pattern, col_lower):\n                    new_columns[col] = new_name\n                    break\n\n        return df.rename(columns=new_columns)\n\n    def _assess_quality(self, df: pd.DataFrame) -> float:\n        \"\"\"Assess data quality score (0-1).\"\"\"\n        if df.empty:\n            return 0.0\n\n        scores = []\n\n        # Completeness: % of non-null values\n        completeness = 1 - (df.isnull().sum().sum() / df.size)\n        scores.append(completeness)\n\n        # Column quality: has meaningful column names\n        meaningful_cols = sum(1 for c in df.columns if len(c) > 2 and not c.startswith('Unnamed'))\n        col_quality = meaningful_cols / len(df.columns)\n        scores.append(col_quality)\n\n        # Row count: more data is better (capped at 1.0)\n        row_score = min(len(df) / 100, 1.0)\n        scores.append(row_score)\n\n        return sum(scores) / len(scores)\n\n    def _extract_project_info(self, df: pd.DataFrame, file_path: str) -> Dict[str, str]:\n        \"\"\"Extract project info from data or filename.\"\"\"\n        info = {}\n\n        # Try to find project info in data\n        for col in df.columns:\n            if 'project' in col.lower() and 'id' in col.lower():\n                info['id'] = str(df[col].iloc[0]) if not df[col].empty else None\n            if 'project' in col.lower() and 'name' in col.lower():\n                info['name'] = str(df[col].iloc[0]) if not df[col].empty else None\n\n        # Fallback to filename\n        if 'name' not in info:\n            info['name'] = Path(file_path).stem\n\n        return info\n\n    def _load_normalization_rules(self) -> Dict:\n        \"\"\"Load rules for normalizing legacy data.\"\"\"\n        return {\n            'unit_conversions': {\n                'M': 1000,  # Thousand\n                'C': 100,   # Hundred\n                'LF': 1,    # Linear Foot\n                'SF': 1,    # Square Foot\n                'CY': 1,    # Cubic Yard\n            },\n            'date_formats': [\n                '%m/%d/%Y', '%m/%d/%y', '%Y-%m-%d',\n                '%d-%b-%Y', '%B %d, %Y'\n            ]\n        }\n\n    def generate_migration_report(self) -> str:\n        \"\"\"Generate report on migrated data.\"\"\"\n        report = [\"# Historical Data Migration Report\", \"\"]\n\n        # Summary\n        report.append(\"## Summary\")\n        report.append(f\"- Total Records: {len(self.records)}\")\n\n        by_type = {}\n        by_year = {}\n        for r in self.records:\n            by_type[r.data_type] = by_type.get(r.data_type, 0) + 1\n            by_year[r.year] = by_year.get(r.year, 0) + 1\n\n        report.append(\"\\n### By Data Type\")\n        for dt, count in sorted(by_type.items()):\n            report.append(f\"- {dt}: {count}\")\n\n        report.append(\"\\n### By Year\")\n        for year, count in sorted(by_year.items()):\n            report.append(f\"- {year}: {count}\")\n\n        # Quality Assessment\n        report.append(\"\\n## Data Quality\")\n        avg_quality = sum(r.quality_score for r in self.records) / len(self.records) if self.records else 0\n        report.append(f\"- Average Quality Score: {avg_quality:.2%}\")\n\n        low_quality = [r for r in self.records if r.quality_score < 0.5]\n        if low_quality:\n            report.append(f\"\\n### Low Quality Records ({len(low_quality)})\")\n            for r in low_quality[:10]:\n                report.append(f\"- {r.project_name} ({r.year}): {r.quality_score:.2%}\")\n\n        return \"\\n\".join(report)\n\nLegacy System Connectors\nclass LegacySystemConnector:\n    \"\"\"Connect to various legacy construction systems.\"\"\"\n\n    @staticmethod\n    def read_timberline_export(file_path: str) -> pd.DataFrame:\n        \"\"\"Read Sage Timberline (now Sage 300) export files.\"\"\"\n        # Timberline exports typically have specific format\n        df = pd.read_csv(file_path, encoding='cp1252')\n\n        # Map Timberline columns to standard\n        column_map = {\n            'JOB': 'project_id',\n            'PHASE': 'phase_code',\n            'CATEGORY': 'cost_code',\n            'DESCRIPTION': 'description',\n            'ESTIMATE': 'estimated_cost',\n            'ACTUAL': 'actual_cost',\n            'COMMITTED': 'committed_cost'\n        }\n\n        return df.rename(columns=column_map)\n\n    @staticmethod\n    def read_primavera_xer(file_path: str) -> Dict[str, pd.DataFrame]:\n        \"\"\"Read Primavera P6 XER export files.\"\"\"\n        tables = {}\n        current_table = None\n        current_data = []\n        columns = []\n\n        with open(file_path, 'r', encoding='utf-8') as f:\n            for line in f:\n                line = line.strip()\n                if line.startswith('%T'):\n                    # Save previous table\n                    if current_table and current_data:\n                        tables[current_table] = pd.DataFrame(current_data, columns=columns)\n                    # Start new table\n                    current_table = line.split('\\t')[1] if '\\t' in line else None\n                    current_data = []\n                    columns = []\n                elif line.startswith('%F'):\n                    # Field definitions\n                    columns = line.split('\\t')[1:]\n                elif line.startswith('%R'):\n                    # Data row\n                    current_data.append(line.split('\\t')[1:])\n\n        # Save last table\n        if current_table and current_data:\n            tables[current_table] = pd.DataFrame(current_data, columns=columns)\n\n        return tables\n\n    @staticmethod\n    def read_mc2_ice(file_path: str) -> pd.DataFrame:\n        \"\"\"Read MC2 ICE estimating export.\"\"\"\n        # MC2 ICE format handling\n        pass\n\nQuick Start\n# Initialize manager\nmanager = HistoricalDataManager('/archive/projects')\n\n# Scan archive\nfile_types = manager.scan_archive()\nprint(f\"Found: {file_types}\")\n\n# Extract from legacy Excel files\nfor year in range(2015, 2024):\n    year_path = f'/archive/projects/{year}'\n    for file in Path(year_path).glob('*.xls*'):\n        records = manager.extract_from_legacy_excel(str(file), year)\n        manager.records.extend(records)\n\n# Normalize cost data to 2026 dollars\ncost_records = [r for r in manager.records if r.data_type == 'cost']\nnormalized_costs = manager.normalize_cost_data(cost_records, base_year=2026)\n\n# Generate migration report\nreport = manager.generate_migration_report()\nprint(report)\n\n# Export for analysis\nnormalized_costs.to_excel('historical_costs_normalized.xlsx', index=False)\n\nCommon Use Cases\nCost Benchmarking: Normalize historical costs for comparison\nProductivity Analysis: Track labor productivity over time\nRisk Identification: Find patterns in historical project issues\nEstimating Calibration: Improve estimates with historical data\nDependencies\npip install pandas openpyxl xlrd pyodbc\n\nResources\nRSMeans Historical Cost Index: For cost escalation\nENR Construction Cost Index: Alternative escalation source\nLegacy Format Documentation: Vendor-specific export formats"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/datadrivenconstruction/historical-data-manager",
    "publisherUrl": "https://clawhub.ai/datadrivenconstruction/historical-data-manager",
    "owner": "datadrivenconstruction",
    "version": "2.1.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/historical-data-manager",
    "downloadUrl": "https://openagent3.xyz/downloads/historical-data-manager",
    "agentUrl": "https://openagent3.xyz/skills/historical-data-manager/agent",
    "manifestUrl": "https://openagent3.xyz/skills/historical-data-manager/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/historical-data-manager/agent.md"
  }
}