{
  "schemaVersion": "1.0",
  "item": {
    "slug": "pandas-construction-analysis",
    "name": "Pandas Construction Analysis",
    "source": "tencent",
    "type": "skill",
    "category": "数据分析",
    "sourceUrl": "https://clawhub.ai/datadrivenconstruction/pandas-construction-analysis",
    "canonicalUrl": "https://clawhub.ai/datadrivenconstruction/pandas-construction-analysis",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/pandas-construction-analysis",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=pandas-construction-analysis",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "claw.json",
      "instructions.md",
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/pandas-construction-analysis"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/pandas-construction-analysis",
    "agentPageUrl": "https://openagent3.xyz/skills/pandas-construction-analysis/agent",
    "manifestUrl": "https://openagent3.xyz/skills/pandas-construction-analysis/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/pandas-construction-analysis/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Overview",
        "body": "Based on DDC methodology (Chapter 2.3), this skill provides comprehensive Pandas operations for construction data processing. Pandas is the Swiss Army knife for data analysts - handling everything from simple data filtering to complex aggregations across millions of rows.\n\nBook Reference: \"Pandas DataFrame и LLM ChatGPT\" / \"Pandas DataFrame and LLM ChatGPT\"\n\n\"Используя Pandas, вы можете управлять и анализировать наборы данных, намного превосходящие возможности Excel. В то время как Excel способен обрабатывать до 1 миллиона строк данных, Pandas может без труда работать с наборами данных, содержащими десятки миллионов строк.\"\n— DDC Book, Chapter 2.3"
      },
      {
        "title": "Quick Start",
        "body": "import pandas as pd\n\n# Read construction data\ndf = pd.read_excel(\"bim_export.xlsx\")\n\n# Basic operations\nprint(df.head())           # First 5 rows\nprint(df.info())           # Column types and memory\nprint(df.describe())       # Statistics for numeric columns\n\n# Filter structural elements\nstructural = df[df['Category'] == 'Structural']\n\n# Calculate total volume\ntotal_volume = df['Volume'].sum()\nprint(f\"Total volume: {total_volume:.2f} m³\")"
      },
      {
        "title": "Creating DataFrames",
        "body": "import pandas as pd\n\n# From dictionary (construction elements)\nelements = pd.DataFrame({\n    'ElementId': ['E001', 'E002', 'E003', 'E004'],\n    'Category': ['Wall', 'Floor', 'Wall', 'Column'],\n    'Material': ['Concrete', 'Concrete', 'Brick', 'Steel'],\n    'Volume_m3': [45.5, 120.0, 32.0, 8.5],\n    'Level': ['Level 1', 'Level 1', 'Level 2', 'Level 1']\n})\n\n# From CSV\ndf_csv = pd.read_csv(\"construction_data.csv\")\n\n# From Excel\ndf_excel = pd.read_excel(\"project_data.xlsx\", sheet_name=\"Elements\")\n\n# From multiple Excel sheets\nall_sheets = pd.read_excel(\"project.xlsx\", sheet_name=None)  # Dict of DataFrames"
      },
      {
        "title": "Data Types in Construction",
        "body": "# Common data types for construction\ndf = pd.DataFrame({\n    'element_id': pd.Series(['W001', 'W002'], dtype='string'),\n    'quantity': pd.Series([10, 20], dtype='int64'),\n    'volume': pd.Series([45.5, 32.0], dtype='float64'),\n    'is_structural': pd.Series([True, False], dtype='bool'),\n    'created_date': pd.to_datetime(['2024-01-15', '2024-01-16']),\n    'category': pd.Categorical(['Wall', 'Slab'])\n})\n\n# Check data types\nprint(df.dtypes)\n\n# Convert types\ndf['quantity'] = df['quantity'].astype('float64')\ndf['volume'] = pd.to_numeric(df['volume'], errors='coerce')"
      },
      {
        "title": "Basic Filtering",
        "body": "# Single condition\nwalls = df[df['Category'] == 'Wall']\n\n# Multiple conditions (AND)\nlarge_concrete = df[(df['Material'] == 'Concrete') & (df['Volume_m3'] > 50)]\n\n# Multiple conditions (OR)\nwalls_or_floors = df[(df['Category'] == 'Wall') | (df['Category'] == 'Floor')]\n\n# Using isin for multiple values\nstructural = df[df['Category'].isin(['Wall', 'Column', 'Beam', 'Foundation'])]\n\n# String contains\ninsulated = df[df['Description'].str.contains('insulated', case=False, na=False)]\n\n# Null value filtering\nincomplete = df[df['Cost'].isna()]\ncomplete = df[df['Cost'].notna()]"
      },
      {
        "title": "Advanced Selection",
        "body": "# Select columns\nvolumes = df[['ElementId', 'Category', 'Volume_m3']]\n\n# Query syntax (SQL-like)\nresult = df.query(\"Category == 'Wall' and Volume_m3 > 30\")\n\n# Loc and iloc\nspecific_row = df.loc[0]                    # By label\nrange_rows = df.iloc[0:10]                  # By position\nspecific_cell = df.loc[0, 'Volume_m3']      # Row and column\nsubset = df.loc[0:5, ['Category', 'Volume_m3']]  # Range with columns"
      },
      {
        "title": "GroupBy Operations",
        "body": "# Basic groupby\nby_category = df.groupby('Category')['Volume_m3'].sum()\n\n# Multiple aggregations\nsummary = df.groupby('Category').agg({\n    'Volume_m3': ['sum', 'mean', 'count'],\n    'Cost': ['sum', 'mean']\n})\n\n# Named aggregations (cleaner output)\nsummary = df.groupby('Category').agg(\n    total_volume=('Volume_m3', 'sum'),\n    avg_volume=('Volume_m3', 'mean'),\n    element_count=('ElementId', 'count'),\n    total_cost=('Cost', 'sum')\n).reset_index()\n\n# Multiple grouping columns\nby_level_cat = df.groupby(['Level', 'Category']).agg({\n    'Volume_m3': 'sum',\n    'Cost': 'sum'\n}).reset_index()"
      },
      {
        "title": "Pivot Tables",
        "body": "# Create pivot table\npivot = pd.pivot_table(\n    df,\n    values='Volume_m3',\n    index='Level',\n    columns='Category',\n    aggfunc='sum',\n    fill_value=0,\n    margins=True,           # Add totals\n    margins_name='Total'\n)\n\n# Multiple values\npivot_detailed = pd.pivot_table(\n    df,\n    values=['Volume_m3', 'Cost'],\n    index='Level',\n    columns='Category',\n    aggfunc={'Volume_m3': 'sum', 'Cost': 'mean'}\n)"
      },
      {
        "title": "Adding Calculated Columns",
        "body": "# Simple calculation\ndf['Cost_Total'] = df['Volume_m3'] * df['Unit_Price']\n\n# Conditional column\ndf['Size_Category'] = df['Volume_m3'].apply(\n    lambda x: 'Large' if x > 50 else ('Medium' if x > 20 else 'Small')\n)\n\n# Using np.where for binary conditions\nimport numpy as np\ndf['Is_Large'] = np.where(df['Volume_m3'] > 50, True, False)\n\n# Using cut for binning\ndf['Volume_Bin'] = pd.cut(\n    df['Volume_m3'],\n    bins=[0, 10, 50, 100, float('inf')],\n    labels=['XS', 'S', 'M', 'L']\n)"
      },
      {
        "title": "String Operations",
        "body": "# Extract from strings\ndf['Level_Number'] = df['Level'].str.extract(r'(\\d+)').astype(int)\n\n# Split and expand\ndf[['Building', 'Floor']] = df['Location'].str.split('-', expand=True)\n\n# Clean strings\ndf['Category'] = df['Category'].str.strip().str.lower().str.title()\n\n# Replace values\ndf['Material'] = df['Material'].str.replace('Reinforced Concrete', 'RC')"
      },
      {
        "title": "Date Operations",
        "body": "# Parse dates\ndf['Start_Date'] = pd.to_datetime(df['Start_Date'])\n\n# Extract components\ndf['Year'] = df['Start_Date'].dt.year\ndf['Month'] = df['Start_Date'].dt.month\ndf['Week'] = df['Start_Date'].dt.isocalendar().week\ndf['DayOfWeek'] = df['Start_Date'].dt.day_name()\n\n# Calculate duration\ndf['Duration_Days'] = (df['End_Date'] - df['Start_Date']).dt.days\n\n# Filter by date range\nrecent = df[df['Start_Date'] >= '2024-01-01']"
      },
      {
        "title": "Merge DataFrames",
        "body": "# Elements data\nelements = pd.DataFrame({\n    'ElementId': ['E001', 'E002', 'E003'],\n    'Category': ['Wall', 'Floor', 'Column'],\n    'Volume_m3': [45.5, 120.0, 8.5]\n})\n\n# Unit prices\nprices = pd.DataFrame({\n    'Category': ['Wall', 'Floor', 'Column', 'Beam'],\n    'Unit_Price': [150, 80, 450, 200]\n})\n\n# Inner join (only matching)\nmerged = elements.merge(prices, on='Category', how='inner')\n\n# Left join (keep all elements)\nmerged = elements.merge(prices, on='Category', how='left')\n\n# Join on different column names\nresult = df1.merge(df2, left_on='elem_id', right_on='ElementId')"
      },
      {
        "title": "Concatenating DataFrames",
        "body": "# Vertical concatenation (stacking)\nall_floors = pd.concat([floor1_df, floor2_df, floor3_df], ignore_index=True)\n\n# Horizontal concatenation\ncombined = pd.concat([quantities, costs, schedule], axis=1)\n\n# Append new rows\nnew_elements = pd.DataFrame({'ElementId': ['E004'], 'Category': ['Beam']})\ndf = pd.concat([df, new_elements], ignore_index=True)"
      },
      {
        "title": "Quantity Take-Off (QTO)",
        "body": "def generate_qto_report(df):\n    \"\"\"Generate Quantity Take-Off summary by category\"\"\"\n    qto = df.groupby(['Category', 'Material']).agg(\n        count=('ElementId', 'count'),\n        total_volume=('Volume_m3', 'sum'),\n        total_area=('Area_m2', 'sum'),\n        avg_volume=('Volume_m3', 'mean')\n    ).round(2)\n\n    # Add percentage column\n    qto['volume_pct'] = (qto['total_volume'] /\n                          qto['total_volume'].sum() * 100).round(1)\n\n    return qto.sort_values('total_volume', ascending=False)\n\n# Usage\nqto_report = generate_qto_report(df)\nqto_report.to_excel(\"qto_report.xlsx\")"
      },
      {
        "title": "Cost Estimation",
        "body": "def calculate_project_cost(elements_df, prices_df, markup=0.15):\n    \"\"\"Calculate total project cost with markup\"\"\"\n    # Merge with prices\n    df = elements_df.merge(prices_df, on='Category', how='left')\n\n    # Calculate base cost\n    df['Base_Cost'] = df['Volume_m3'] * df['Unit_Price']\n\n    # Apply markup\n    df['Total_Cost'] = df['Base_Cost'] * (1 + markup)\n\n    # Summary by category\n    summary = df.groupby('Category').agg(\n        volume=('Volume_m3', 'sum'),\n        base_cost=('Base_Cost', 'sum'),\n        total_cost=('Total_Cost', 'sum')\n    ).round(2)\n\n    return df, summary, summary['total_cost'].sum()\n\n# Usage\ndetailed, summary, total = calculate_project_cost(elements, prices)\nprint(f\"Project Total: ${total:,.2f}\")"
      },
      {
        "title": "Material Summary",
        "body": "def material_summary(df):\n    \"\"\"Summarize materials across project\"\"\"\n    summary = df.groupby('Material').agg({\n        'Volume_m3': 'sum',\n        'Weight_kg': 'sum',\n        'ElementId': 'nunique'\n    }).rename(columns={'ElementId': 'Element_Count'})\n\n    summary['Volume_Pct'] = (summary['Volume_m3'] /\n                              summary['Volume_m3'].sum() * 100).round(1)\n\n    return summary.sort_values('Volume_m3', ascending=False)"
      },
      {
        "title": "Level-by-Level Analysis",
        "body": "def analyze_by_level(df):\n    \"\"\"Analyze construction quantities by building level\"\"\"\n    level_summary = df.pivot_table(\n        values=['Volume_m3', 'Cost'],\n        index='Level',\n        columns='Category',\n        aggfunc='sum',\n        fill_value=0\n    )\n\n    level_summary['Total_Volume'] = level_summary['Volume_m3'].sum(axis=1)\n    level_summary['Total_Cost'] = level_summary['Cost'].sum(axis=1)\n\n    return level_summary"
      },
      {
        "title": "Export to Excel with Multiple Sheets",
        "body": "def export_to_excel_formatted(df, summary, filepath):\n    \"\"\"Export with multiple sheets\"\"\"\n    with pd.ExcelWriter(filepath, engine='openpyxl') as writer:\n        df.to_excel(writer, sheet_name='Details', index=False)\n        summary.to_excel(writer, sheet_name='Summary')\n\n        pivot = pd.pivot_table(df, values='Volume_m3',\n                               index='Level', columns='Category')\n        pivot.to_excel(writer, sheet_name='By_Level')\n\n# Usage\nexport_to_excel_formatted(elements, qto_summary, \"project_report.xlsx\")"
      },
      {
        "title": "Export to CSV",
        "body": "# Basic export\ndf.to_csv(\"output.csv\", index=False)\n\n# With encoding for special characters\ndf.to_csv(\"output.csv\", index=False, encoding='utf-8-sig')\n\n# Specific columns\ndf[['ElementId', 'Category', 'Volume_m3']].to_csv(\"volumes.csv\", index=False)"
      },
      {
        "title": "Performance Tips",
        "body": "# Use categories for string columns with few unique values\ndf['Category'] = df['Category'].astype('category')\n\n# Read only needed columns\ndf = pd.read_csv(\"large_file.csv\", usecols=['ElementId', 'Category', 'Volume'])\n\n# Use chunking for very large files\nchunks = pd.read_csv(\"huge_file.csv\", chunksize=100000)\nresult = pd.concat([chunk[chunk['Category'] == 'Wall'] for chunk in chunks])\n\n# Check memory usage\nprint(df.memory_usage(deep=True).sum() / 1024**2, \"MB\")"
      },
      {
        "title": "Quick Reference",
        "body": "OperationCodeRead Excelpd.read_excel(\"file.xlsx\")Read CSVpd.read_csv(\"file.csv\")Filter rowsdf[df['Column'] == 'Value']Select columnsdf[['Col1', 'Col2']]Group and sumdf.groupby('Cat')['Vol'].sum()Pivot tablepd.pivot_table(df, values='Vol', index='Level')Mergedf1.merge(df2, on='key')Add columndf['New'] = df['A'] * df['B']Export Exceldf.to_excel(\"out.xlsx\", index=False)"
      },
      {
        "title": "Resources",
        "body": "Book: \"Data-Driven Construction\" by Artem Boiko, Chapter 2.3\nWebsite: https://datadrivenconstruction.io\nPandas Docs: https://pandas.pydata.org/docs/"
      },
      {
        "title": "Next Steps",
        "body": "See llm-data-automation for generating Pandas code with AI\nSee qto-report for specialized QTO calculations\nSee cost-estimation-resource for detailed cost calculations"
      }
    ],
    "body": "Pandas Construction Data Analysis\nOverview\n\nBased on DDC methodology (Chapter 2.3), this skill provides comprehensive Pandas operations for construction data processing. Pandas is the Swiss Army knife for data analysts - handling everything from simple data filtering to complex aggregations across millions of rows.\n\nBook Reference: \"Pandas DataFrame и LLM ChatGPT\" / \"Pandas DataFrame and LLM ChatGPT\"\n\n\"Используя Pandas, вы можете управлять и анализировать наборы данных, намного превосходящие возможности Excel. В то время как Excel способен обрабатывать до 1 миллиона строк данных, Pandas может без труда работать с наборами данных, содержащими десятки миллионов строк.\" — DDC Book, Chapter 2.3\n\nQuick Start\nimport pandas as pd\n\n# Read construction data\ndf = pd.read_excel(\"bim_export.xlsx\")\n\n# Basic operations\nprint(df.head())           # First 5 rows\nprint(df.info())           # Column types and memory\nprint(df.describe())       # Statistics for numeric columns\n\n# Filter structural elements\nstructural = df[df['Category'] == 'Structural']\n\n# Calculate total volume\ntotal_volume = df['Volume'].sum()\nprint(f\"Total volume: {total_volume:.2f} m³\")\n\nDataFrame Fundamentals\nCreating DataFrames\nimport pandas as pd\n\n# From dictionary (construction elements)\nelements = pd.DataFrame({\n    'ElementId': ['E001', 'E002', 'E003', 'E004'],\n    'Category': ['Wall', 'Floor', 'Wall', 'Column'],\n    'Material': ['Concrete', 'Concrete', 'Brick', 'Steel'],\n    'Volume_m3': [45.5, 120.0, 32.0, 8.5],\n    'Level': ['Level 1', 'Level 1', 'Level 2', 'Level 1']\n})\n\n# From CSV\ndf_csv = pd.read_csv(\"construction_data.csv\")\n\n# From Excel\ndf_excel = pd.read_excel(\"project_data.xlsx\", sheet_name=\"Elements\")\n\n# From multiple Excel sheets\nall_sheets = pd.read_excel(\"project.xlsx\", sheet_name=None)  # Dict of DataFrames\n\nData Types in Construction\n# Common data types for construction\ndf = pd.DataFrame({\n    'element_id': pd.Series(['W001', 'W002'], dtype='string'),\n    'quantity': pd.Series([10, 20], dtype='int64'),\n    'volume': pd.Series([45.5, 32.0], dtype='float64'),\n    'is_structural': pd.Series([True, False], dtype='bool'),\n    'created_date': pd.to_datetime(['2024-01-15', '2024-01-16']),\n    'category': pd.Categorical(['Wall', 'Slab'])\n})\n\n# Check data types\nprint(df.dtypes)\n\n# Convert types\ndf['quantity'] = df['quantity'].astype('float64')\ndf['volume'] = pd.to_numeric(df['volume'], errors='coerce')\n\nFiltering and Selection\nBasic Filtering\n# Single condition\nwalls = df[df['Category'] == 'Wall']\n\n# Multiple conditions (AND)\nlarge_concrete = df[(df['Material'] == 'Concrete') & (df['Volume_m3'] > 50)]\n\n# Multiple conditions (OR)\nwalls_or_floors = df[(df['Category'] == 'Wall') | (df['Category'] == 'Floor')]\n\n# Using isin for multiple values\nstructural = df[df['Category'].isin(['Wall', 'Column', 'Beam', 'Foundation'])]\n\n# String contains\ninsulated = df[df['Description'].str.contains('insulated', case=False, na=False)]\n\n# Null value filtering\nincomplete = df[df['Cost'].isna()]\ncomplete = df[df['Cost'].notna()]\n\nAdvanced Selection\n# Select columns\nvolumes = df[['ElementId', 'Category', 'Volume_m3']]\n\n# Query syntax (SQL-like)\nresult = df.query(\"Category == 'Wall' and Volume_m3 > 30\")\n\n# Loc and iloc\nspecific_row = df.loc[0]                    # By label\nrange_rows = df.iloc[0:10]                  # By position\nspecific_cell = df.loc[0, 'Volume_m3']      # Row and column\nsubset = df.loc[0:5, ['Category', 'Volume_m3']]  # Range with columns\n\nGrouping and Aggregation\nGroupBy Operations\n# Basic groupby\nby_category = df.groupby('Category')['Volume_m3'].sum()\n\n# Multiple aggregations\nsummary = df.groupby('Category').agg({\n    'Volume_m3': ['sum', 'mean', 'count'],\n    'Cost': ['sum', 'mean']\n})\n\n# Named aggregations (cleaner output)\nsummary = df.groupby('Category').agg(\n    total_volume=('Volume_m3', 'sum'),\n    avg_volume=('Volume_m3', 'mean'),\n    element_count=('ElementId', 'count'),\n    total_cost=('Cost', 'sum')\n).reset_index()\n\n# Multiple grouping columns\nby_level_cat = df.groupby(['Level', 'Category']).agg({\n    'Volume_m3': 'sum',\n    'Cost': 'sum'\n}).reset_index()\n\nPivot Tables\n# Create pivot table\npivot = pd.pivot_table(\n    df,\n    values='Volume_m3',\n    index='Level',\n    columns='Category',\n    aggfunc='sum',\n    fill_value=0,\n    margins=True,           # Add totals\n    margins_name='Total'\n)\n\n# Multiple values\npivot_detailed = pd.pivot_table(\n    df,\n    values=['Volume_m3', 'Cost'],\n    index='Level',\n    columns='Category',\n    aggfunc={'Volume_m3': 'sum', 'Cost': 'mean'}\n)\n\nData Transformation\nAdding Calculated Columns\n# Simple calculation\ndf['Cost_Total'] = df['Volume_m3'] * df['Unit_Price']\n\n# Conditional column\ndf['Size_Category'] = df['Volume_m3'].apply(\n    lambda x: 'Large' if x > 50 else ('Medium' if x > 20 else 'Small')\n)\n\n# Using np.where for binary conditions\nimport numpy as np\ndf['Is_Large'] = np.where(df['Volume_m3'] > 50, True, False)\n\n# Using cut for binning\ndf['Volume_Bin'] = pd.cut(\n    df['Volume_m3'],\n    bins=[0, 10, 50, 100, float('inf')],\n    labels=['XS', 'S', 'M', 'L']\n)\n\nString Operations\n# Extract from strings\ndf['Level_Number'] = df['Level'].str.extract(r'(\\d+)').astype(int)\n\n# Split and expand\ndf[['Building', 'Floor']] = df['Location'].str.split('-', expand=True)\n\n# Clean strings\ndf['Category'] = df['Category'].str.strip().str.lower().str.title()\n\n# Replace values\ndf['Material'] = df['Material'].str.replace('Reinforced Concrete', 'RC')\n\nDate Operations\n# Parse dates\ndf['Start_Date'] = pd.to_datetime(df['Start_Date'])\n\n# Extract components\ndf['Year'] = df['Start_Date'].dt.year\ndf['Month'] = df['Start_Date'].dt.month\ndf['Week'] = df['Start_Date'].dt.isocalendar().week\ndf['DayOfWeek'] = df['Start_Date'].dt.day_name()\n\n# Calculate duration\ndf['Duration_Days'] = (df['End_Date'] - df['Start_Date']).dt.days\n\n# Filter by date range\nrecent = df[df['Start_Date'] >= '2024-01-01']\n\nMerging and Joining\nMerge DataFrames\n# Elements data\nelements = pd.DataFrame({\n    'ElementId': ['E001', 'E002', 'E003'],\n    'Category': ['Wall', 'Floor', 'Column'],\n    'Volume_m3': [45.5, 120.0, 8.5]\n})\n\n# Unit prices\nprices = pd.DataFrame({\n    'Category': ['Wall', 'Floor', 'Column', 'Beam'],\n    'Unit_Price': [150, 80, 450, 200]\n})\n\n# Inner join (only matching)\nmerged = elements.merge(prices, on='Category', how='inner')\n\n# Left join (keep all elements)\nmerged = elements.merge(prices, on='Category', how='left')\n\n# Join on different column names\nresult = df1.merge(df2, left_on='elem_id', right_on='ElementId')\n\nConcatenating DataFrames\n# Vertical concatenation (stacking)\nall_floors = pd.concat([floor1_df, floor2_df, floor3_df], ignore_index=True)\n\n# Horizontal concatenation\ncombined = pd.concat([quantities, costs, schedule], axis=1)\n\n# Append new rows\nnew_elements = pd.DataFrame({'ElementId': ['E004'], 'Category': ['Beam']})\ndf = pd.concat([df, new_elements], ignore_index=True)\n\nConstruction-Specific Analyses\nQuantity Take-Off (QTO)\ndef generate_qto_report(df):\n    \"\"\"Generate Quantity Take-Off summary by category\"\"\"\n    qto = df.groupby(['Category', 'Material']).agg(\n        count=('ElementId', 'count'),\n        total_volume=('Volume_m3', 'sum'),\n        total_area=('Area_m2', 'sum'),\n        avg_volume=('Volume_m3', 'mean')\n    ).round(2)\n\n    # Add percentage column\n    qto['volume_pct'] = (qto['total_volume'] /\n                          qto['total_volume'].sum() * 100).round(1)\n\n    return qto.sort_values('total_volume', ascending=False)\n\n# Usage\nqto_report = generate_qto_report(df)\nqto_report.to_excel(\"qto_report.xlsx\")\n\nCost Estimation\ndef calculate_project_cost(elements_df, prices_df, markup=0.15):\n    \"\"\"Calculate total project cost with markup\"\"\"\n    # Merge with prices\n    df = elements_df.merge(prices_df, on='Category', how='left')\n\n    # Calculate base cost\n    df['Base_Cost'] = df['Volume_m3'] * df['Unit_Price']\n\n    # Apply markup\n    df['Total_Cost'] = df['Base_Cost'] * (1 + markup)\n\n    # Summary by category\n    summary = df.groupby('Category').agg(\n        volume=('Volume_m3', 'sum'),\n        base_cost=('Base_Cost', 'sum'),\n        total_cost=('Total_Cost', 'sum')\n    ).round(2)\n\n    return df, summary, summary['total_cost'].sum()\n\n# Usage\ndetailed, summary, total = calculate_project_cost(elements, prices)\nprint(f\"Project Total: ${total:,.2f}\")\n\nMaterial Summary\ndef material_summary(df):\n    \"\"\"Summarize materials across project\"\"\"\n    summary = df.groupby('Material').agg({\n        'Volume_m3': 'sum',\n        'Weight_kg': 'sum',\n        'ElementId': 'nunique'\n    }).rename(columns={'ElementId': 'Element_Count'})\n\n    summary['Volume_Pct'] = (summary['Volume_m3'] /\n                              summary['Volume_m3'].sum() * 100).round(1)\n\n    return summary.sort_values('Volume_m3', ascending=False)\n\nLevel-by-Level Analysis\ndef analyze_by_level(df):\n    \"\"\"Analyze construction quantities by building level\"\"\"\n    level_summary = df.pivot_table(\n        values=['Volume_m3', 'Cost'],\n        index='Level',\n        columns='Category',\n        aggfunc='sum',\n        fill_value=0\n    )\n\n    level_summary['Total_Volume'] = level_summary['Volume_m3'].sum(axis=1)\n    level_summary['Total_Cost'] = level_summary['Cost'].sum(axis=1)\n\n    return level_summary\n\nData Export\nExport to Excel with Multiple Sheets\ndef export_to_excel_formatted(df, summary, filepath):\n    \"\"\"Export with multiple sheets\"\"\"\n    with pd.ExcelWriter(filepath, engine='openpyxl') as writer:\n        df.to_excel(writer, sheet_name='Details', index=False)\n        summary.to_excel(writer, sheet_name='Summary')\n\n        pivot = pd.pivot_table(df, values='Volume_m3',\n                               index='Level', columns='Category')\n        pivot.to_excel(writer, sheet_name='By_Level')\n\n# Usage\nexport_to_excel_formatted(elements, qto_summary, \"project_report.xlsx\")\n\nExport to CSV\n# Basic export\ndf.to_csv(\"output.csv\", index=False)\n\n# With encoding for special characters\ndf.to_csv(\"output.csv\", index=False, encoding='utf-8-sig')\n\n# Specific columns\ndf[['ElementId', 'Category', 'Volume_m3']].to_csv(\"volumes.csv\", index=False)\n\nPerformance Tips\n# Use categories for string columns with few unique values\ndf['Category'] = df['Category'].astype('category')\n\n# Read only needed columns\ndf = pd.read_csv(\"large_file.csv\", usecols=['ElementId', 'Category', 'Volume'])\n\n# Use chunking for very large files\nchunks = pd.read_csv(\"huge_file.csv\", chunksize=100000)\nresult = pd.concat([chunk[chunk['Category'] == 'Wall'] for chunk in chunks])\n\n# Check memory usage\nprint(df.memory_usage(deep=True).sum() / 1024**2, \"MB\")\n\nQuick Reference\nOperation\tCode\nRead Excel\tpd.read_excel(\"file.xlsx\")\nRead CSV\tpd.read_csv(\"file.csv\")\nFilter rows\tdf[df['Column'] == 'Value']\nSelect columns\tdf[['Col1', 'Col2']]\nGroup and sum\tdf.groupby('Cat')['Vol'].sum()\nPivot table\tpd.pivot_table(df, values='Vol', index='Level')\nMerge\tdf1.merge(df2, on='key')\nAdd column\tdf['New'] = df['A'] * df['B']\nExport Excel\tdf.to_excel(\"out.xlsx\", index=False)\nResources\nBook: \"Data-Driven Construction\" by Artem Boiko, Chapter 2.3\nWebsite: https://datadrivenconstruction.io\nPandas Docs: https://pandas.pydata.org/docs/\nNext Steps\nSee llm-data-automation for generating Pandas code with AI\nSee qto-report for specialized QTO calculations\nSee cost-estimation-resource for detailed cost calculations"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/datadrivenconstruction/pandas-construction-analysis",
    "publisherUrl": "https://clawhub.ai/datadrivenconstruction/pandas-construction-analysis",
    "owner": "datadrivenconstruction",
    "version": "2.1.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/pandas-construction-analysis",
    "downloadUrl": "https://openagent3.xyz/downloads/pandas-construction-analysis",
    "agentUrl": "https://openagent3.xyz/skills/pandas-construction-analysis/agent",
    "manifestUrl": "https://openagent3.xyz/skills/pandas-construction-analysis/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/pandas-construction-analysis/agent.md"
  }
}