{
  "schemaVersion": "1.0",
  "item": {
    "slug": "llm-data-automation",
    "name": "Llm Data Automation",
    "source": "tencent",
    "type": "skill",
    "category": "通讯协作",
    "sourceUrl": "https://clawhub.ai/datadrivenconstruction/llm-data-automation",
    "canonicalUrl": "https://clawhub.ai/datadrivenconstruction/llm-data-automation",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/llm-data-automation",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=llm-data-automation",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "claw.json",
      "instructions.md",
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-23T16:43:11.935Z",
      "expiresAt": "2026-04-30T16:43:11.935Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=4claw-imageboard",
        "contentDisposition": "attachment; filename=\"4claw-imageboard-1.0.1.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/llm-data-automation"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/llm-data-automation",
    "agentPageUrl": "https://openagent3.xyz/skills/llm-data-automation/agent",
    "manifestUrl": "https://openagent3.xyz/skills/llm-data-automation/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/llm-data-automation/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Overview",
        "body": "Based on DDC methodology (Chapter 2.3), this skill enables automation of construction data processing using Large Language Models (LLM). Instead of manually coding data transformations, you describe what you need in natural language, and the LLM generates the necessary Python/Pandas code.\n\nBook Reference: \"Pandas DataFrame и LLM ChatGPT\" / \"Pandas DataFrame and LLM ChatGPT\"\n\n\"LLM-модели, такие как ChatGPT и LLaMA, позволяют специалистам без глубоких знаний программирования внести свой вклад в автоматизацию и улучшение бизнес-процессов компании.\"\n— DDC Book, Chapter 2.3"
      },
      {
        "title": "Option 1: Use ChatGPT/Claude Online",
        "body": "Simply describe your data processing task in natural language:\n\nPrompt: \"Write Python code to read an Excel file with construction materials,\nfilter rows where quantity > 100, and save to CSV.\""
      },
      {
        "title": "Option 2: Run Local LLM (Ollama)",
        "body": "# Install Ollama from ollama.com\nollama pull mistral\n\n# Run a query\nollama run mistral \"Write Pandas code to calculate total cost from quantity * unit_price\""
      },
      {
        "title": "Option 3: Use LM Studio (GUI)",
        "body": "Download from lmstudio.ai\nInstall and select a model (e.g., Mistral, LLaMA)\nStart chatting with your local AI"
      },
      {
        "title": "DataFrame as Universal Format",
        "body": "import pandas as pd\n\n# Construction project as DataFrame\n# Rows = elements, Columns = attributes\ndf = pd.DataFrame({\n    'element_id': ['W001', 'W002', 'C001'],\n    'category': ['Wall', 'Wall', 'Column'],\n    'material': ['Concrete', 'Brick', 'Steel'],\n    'volume_m3': [45.5, 32.0, 8.2],\n    'cost_per_m3': [150, 80, 450]\n})\n\n# Calculate total cost\ndf['total_cost'] = df['volume_m3'] * df['cost_per_m3']\nprint(df)"
      },
      {
        "title": "LLM Prompts for Construction Tasks",
        "body": "Data Import:\n\n\"Write code to import Excel file with construction schedule,\nparse dates, and create a Pandas DataFrame\"\n\nData Filtering:\n\n\"Filter construction elements where category is 'Structural'\nand cost exceeds budget limit of 50000\"\n\nData Aggregation:\n\n\"Group construction data by floor level,\ncalculate total volume and cost for each floor\"\n\nReport Generation:\n\n\"Create summary report with material quantities grouped by category,\nexport to Excel with formatting\""
      },
      {
        "title": "1. Extract Data from PDF Documents",
        "body": "# Prompt to ChatGPT:\n# \"Write code to extract tables from PDF and convert to DataFrame\"\n\nimport pdfplumber\nimport pandas as pd\n\ndef pdf_to_dataframe(pdf_path):\n    \"\"\"Extract tables from PDF file\"\"\"\n    all_tables = []\n    with pdfplumber.open(pdf_path) as pdf:\n        for page in pdf.pages:\n            tables = page.extract_tables()\n            for table in tables:\n                if table:\n                    df = pd.DataFrame(table[1:], columns=table[0])\n                    all_tables.append(df)\n\n    if all_tables:\n        return pd.concat(all_tables, ignore_index=True)\n    return pd.DataFrame()\n\n# Usage\ndf = pdf_to_dataframe(\"construction_spec.pdf\")\ndf.to_excel(\"extracted_data.xlsx\", index=False)"
      },
      {
        "title": "2. Process BIM Element Data",
        "body": "# Prompt: \"Analyze BIM elements, group by category, calculate volumes\"\n\nimport pandas as pd\n\ndef analyze_bim_elements(csv_path):\n    \"\"\"Analyze BIM element data from CSV export\"\"\"\n    df = pd.read_csv(csv_path)\n\n    # Group by category\n    summary = df.groupby('Category').agg({\n        'Volume': 'sum',\n        'Area': 'sum',\n        'ElementId': 'count'\n    }).rename(columns={'ElementId': 'Count'})\n\n    return summary\n\n# Usage\nsummary = analyze_bim_elements(\"revit_export.csv\")\nprint(summary)"
      },
      {
        "title": "3. Cost Estimation Pipeline",
        "body": "# Prompt: \"Create cost estimation from quantities and unit prices\"\n\nimport pandas as pd\n\ndef calculate_cost_estimate(quantities_df, prices_df):\n    \"\"\"\n    Calculate project cost estimate\n\n    Args:\n        quantities_df: DataFrame with columns [item_code, quantity]\n        prices_df: DataFrame with columns [item_code, unit_price, unit]\n\n    Returns:\n        DataFrame with cost calculations\n    \"\"\"\n    # Merge quantities with prices\n    result = quantities_df.merge(prices_df, on='item_code', how='left')\n\n    # Calculate costs\n    result['total_cost'] = result['quantity'] * result['unit_price']\n\n    # Add summary\n    result['cost_percentage'] = (result['total_cost'] /\n                                  result['total_cost'].sum() * 100).round(2)\n\n    return result\n\n# Usage\nquantities = pd.DataFrame({\n    'item_code': ['C001', 'S001', 'W001'],\n    'quantity': [150, 2000, 500]\n})\n\nprices = pd.DataFrame({\n    'item_code': ['C001', 'S001', 'W001'],\n    'unit_price': [120, 45, 85],\n    'unit': ['m3', 'kg', 'm2']\n})\n\nestimate = calculate_cost_estimate(quantities, prices)\nprint(estimate)"
      },
      {
        "title": "4. Schedule Data Processing",
        "body": "# Prompt: \"Parse construction schedule, calculate durations, identify delays\"\n\nimport pandas as pd\nfrom datetime import datetime\n\ndef analyze_schedule(schedule_path):\n    \"\"\"Analyze construction schedule for delays\"\"\"\n    df = pd.read_excel(schedule_path)\n\n    # Parse dates\n    df['start_date'] = pd.to_datetime(df['start_date'])\n    df['end_date'] = pd.to_datetime(df['end_date'])\n    df['actual_end'] = pd.to_datetime(df['actual_end'])\n\n    # Calculate durations\n    df['planned_duration'] = (df['end_date'] - df['start_date']).dt.days\n    df['actual_duration'] = (df['actual_end'] - df['start_date']).dt.days\n\n    # Identify delays\n    df['delay_days'] = df['actual_duration'] - df['planned_duration']\n    df['is_delayed'] = df['delay_days'] > 0\n\n    return df\n\n# Usage\nschedule = analyze_schedule(\"project_schedule.xlsx\")\ndelayed_tasks = schedule[schedule['is_delayed']]\nprint(f\"Delayed tasks: {len(delayed_tasks)}\")"
      },
      {
        "title": "Using Ollama",
        "body": "# Install\ncurl -fsSL https://ollama.com/install.sh | sh\n\n# Download models\nollama pull mistral      # General purpose, 7B params\nollama pull codellama    # Code-focused\nollama pull deepseek-coder  # Best for coding tasks\n\n# Run\nollama run mistral \"Write Pandas code to merge two DataFrames on project_id\""
      },
      {
        "title": "Using LlamaIndex for Company Documents",
        "body": "# Load company documents into local LLM\nfrom llama_index import SimpleDirectoryReader, VectorStoreIndex\n\n# Read all PDFs from folder\nreader = SimpleDirectoryReader(\"company_documents/\")\ndocuments = reader.load_data()\n\n# Create searchable index\nindex = VectorStoreIndex.from_documents(documents)\n\n# Query your documents\nquery_engine = index.as_query_engine()\nresponse = query_engine.query(\n    \"What are the standard concrete mix specifications?\"\n)\nprint(response)"
      },
      {
        "title": "IDE Recommendations",
        "body": "IDEBest ForFeaturesJupyter NotebookLearning, experimentsInteractive cells, visualizationsGoogle ColabFree GPU, quick startCloud-based, pre-installed libsVS CodeProfessional developmentExtensions, GitHub CopilotPyCharmLarge projectsAdvanced debugging, refactoring"
      },
      {
        "title": "Quick Setup with Jupyter",
        "body": "pip install jupyter pandas openpyxl pdfplumber\njupyter notebook"
      },
      {
        "title": "Best Practices",
        "body": "Start Simple: Begin with clear, specific prompts\nIterate: Refine prompts based on results\nValidate: Always check generated code before running\nDocument: Save working prompts for reuse\nSecure: Use local LLM for sensitive company data"
      },
      {
        "title": "Data Import",
        "body": "\"Read Excel file and show first 10 rows\"\n\"Import CSV with custom delimiter and encoding\"\n\"Load multiple Excel sheets into dictionary of DataFrames\""
      },
      {
        "title": "Data Cleaning",
        "body": "\"Remove duplicate rows based on element_id\"\n\"Fill missing values with column mean\"\n\"Convert column to numeric, handling errors\""
      },
      {
        "title": "Data Analysis",
        "body": "\"Calculate descriptive statistics for numeric columns\"\n\"Find correlation between cost and duration\"\n\"Identify outliers using IQR method\""
      },
      {
        "title": "Data Export",
        "body": "\"Export to Excel with multiple sheets\"\n\"Save to CSV with specific encoding\"\n\"Generate formatted PDF report\""
      },
      {
        "title": "Resources",
        "body": "Book: \"Data-Driven Construction\" by Artem Boiko, Chapter 2.3\nWebsite: https://datadrivenconstruction.io\nPandas Documentation: https://pandas.pydata.org/docs/\nOllama: https://ollama.com\nLM Studio: https://lmstudio.ai\nGoogle Colab: https://colab.research.google.com"
      },
      {
        "title": "Next Steps",
        "body": "See pandas-construction-analysis for advanced Pandas operations\nSee pdf-to-structured for document processing\nSee etl-pipeline for automated data pipelines\nSee rag-construction for RAG implementation with construction documents"
      }
    ],
    "body": "LLM Data Automation for Construction\nOverview\n\nBased on DDC methodology (Chapter 2.3), this skill enables automation of construction data processing using Large Language Models (LLM). Instead of manually coding data transformations, you describe what you need in natural language, and the LLM generates the necessary Python/Pandas code.\n\nBook Reference: \"Pandas DataFrame и LLM ChatGPT\" / \"Pandas DataFrame and LLM ChatGPT\"\n\n\"LLM-модели, такие как ChatGPT и LLaMA, позволяют специалистам без глубоких знаний программирования внести свой вклад в автоматизацию и улучшение бизнес-процессов компании.\" — DDC Book, Chapter 2.3\n\nQuick Start\nOption 1: Use ChatGPT/Claude Online\n\nSimply describe your data processing task in natural language:\n\nPrompt: \"Write Python code to read an Excel file with construction materials,\nfilter rows where quantity > 100, and save to CSV.\"\n\nOption 2: Run Local LLM (Ollama)\n# Install Ollama from ollama.com\nollama pull mistral\n\n# Run a query\nollama run mistral \"Write Pandas code to calculate total cost from quantity * unit_price\"\n\nOption 3: Use LM Studio (GUI)\nDownload from lmstudio.ai\nInstall and select a model (e.g., Mistral, LLaMA)\nStart chatting with your local AI\nCore Concepts\nDataFrame as Universal Format\nimport pandas as pd\n\n# Construction project as DataFrame\n# Rows = elements, Columns = attributes\ndf = pd.DataFrame({\n    'element_id': ['W001', 'W002', 'C001'],\n    'category': ['Wall', 'Wall', 'Column'],\n    'material': ['Concrete', 'Brick', 'Steel'],\n    'volume_m3': [45.5, 32.0, 8.2],\n    'cost_per_m3': [150, 80, 450]\n})\n\n# Calculate total cost\ndf['total_cost'] = df['volume_m3'] * df['cost_per_m3']\nprint(df)\n\nLLM Prompts for Construction Tasks\n\nData Import:\n\n\"Write code to import Excel file with construction schedule,\nparse dates, and create a Pandas DataFrame\"\n\n\nData Filtering:\n\n\"Filter construction elements where category is 'Structural'\nand cost exceeds budget limit of 50000\"\n\n\nData Aggregation:\n\n\"Group construction data by floor level,\ncalculate total volume and cost for each floor\"\n\n\nReport Generation:\n\n\"Create summary report with material quantities grouped by category,\nexport to Excel with formatting\"\n\nCommon Use Cases\n1. Extract Data from PDF Documents\n# Prompt to ChatGPT:\n# \"Write code to extract tables from PDF and convert to DataFrame\"\n\nimport pdfplumber\nimport pandas as pd\n\ndef pdf_to_dataframe(pdf_path):\n    \"\"\"Extract tables from PDF file\"\"\"\n    all_tables = []\n    with pdfplumber.open(pdf_path) as pdf:\n        for page in pdf.pages:\n            tables = page.extract_tables()\n            for table in tables:\n                if table:\n                    df = pd.DataFrame(table[1:], columns=table[0])\n                    all_tables.append(df)\n\n    if all_tables:\n        return pd.concat(all_tables, ignore_index=True)\n    return pd.DataFrame()\n\n# Usage\ndf = pdf_to_dataframe(\"construction_spec.pdf\")\ndf.to_excel(\"extracted_data.xlsx\", index=False)\n\n2. Process BIM Element Data\n# Prompt: \"Analyze BIM elements, group by category, calculate volumes\"\n\nimport pandas as pd\n\ndef analyze_bim_elements(csv_path):\n    \"\"\"Analyze BIM element data from CSV export\"\"\"\n    df = pd.read_csv(csv_path)\n\n    # Group by category\n    summary = df.groupby('Category').agg({\n        'Volume': 'sum',\n        'Area': 'sum',\n        'ElementId': 'count'\n    }).rename(columns={'ElementId': 'Count'})\n\n    return summary\n\n# Usage\nsummary = analyze_bim_elements(\"revit_export.csv\")\nprint(summary)\n\n3. Cost Estimation Pipeline\n# Prompt: \"Create cost estimation from quantities and unit prices\"\n\nimport pandas as pd\n\ndef calculate_cost_estimate(quantities_df, prices_df):\n    \"\"\"\n    Calculate project cost estimate\n\n    Args:\n        quantities_df: DataFrame with columns [item_code, quantity]\n        prices_df: DataFrame with columns [item_code, unit_price, unit]\n\n    Returns:\n        DataFrame with cost calculations\n    \"\"\"\n    # Merge quantities with prices\n    result = quantities_df.merge(prices_df, on='item_code', how='left')\n\n    # Calculate costs\n    result['total_cost'] = result['quantity'] * result['unit_price']\n\n    # Add summary\n    result['cost_percentage'] = (result['total_cost'] /\n                                  result['total_cost'].sum() * 100).round(2)\n\n    return result\n\n# Usage\nquantities = pd.DataFrame({\n    'item_code': ['C001', 'S001', 'W001'],\n    'quantity': [150, 2000, 500]\n})\n\nprices = pd.DataFrame({\n    'item_code': ['C001', 'S001', 'W001'],\n    'unit_price': [120, 45, 85],\n    'unit': ['m3', 'kg', 'm2']\n})\n\nestimate = calculate_cost_estimate(quantities, prices)\nprint(estimate)\n\n4. Schedule Data Processing\n# Prompt: \"Parse construction schedule, calculate durations, identify delays\"\n\nimport pandas as pd\nfrom datetime import datetime\n\ndef analyze_schedule(schedule_path):\n    \"\"\"Analyze construction schedule for delays\"\"\"\n    df = pd.read_excel(schedule_path)\n\n    # Parse dates\n    df['start_date'] = pd.to_datetime(df['start_date'])\n    df['end_date'] = pd.to_datetime(df['end_date'])\n    df['actual_end'] = pd.to_datetime(df['actual_end'])\n\n    # Calculate durations\n    df['planned_duration'] = (df['end_date'] - df['start_date']).dt.days\n    df['actual_duration'] = (df['actual_end'] - df['start_date']).dt.days\n\n    # Identify delays\n    df['delay_days'] = df['actual_duration'] - df['planned_duration']\n    df['is_delayed'] = df['delay_days'] > 0\n\n    return df\n\n# Usage\nschedule = analyze_schedule(\"project_schedule.xlsx\")\ndelayed_tasks = schedule[schedule['is_delayed']]\nprint(f\"Delayed tasks: {len(delayed_tasks)}\")\n\nLocal LLM Setup (No Internet Required)\nUsing Ollama\n# Install\ncurl -fsSL https://ollama.com/install.sh | sh\n\n# Download models\nollama pull mistral      # General purpose, 7B params\nollama pull codellama    # Code-focused\nollama pull deepseek-coder  # Best for coding tasks\n\n# Run\nollama run mistral \"Write Pandas code to merge two DataFrames on project_id\"\n\nUsing LlamaIndex for Company Documents\n# Load company documents into local LLM\nfrom llama_index import SimpleDirectoryReader, VectorStoreIndex\n\n# Read all PDFs from folder\nreader = SimpleDirectoryReader(\"company_documents/\")\ndocuments = reader.load_data()\n\n# Create searchable index\nindex = VectorStoreIndex.from_documents(documents)\n\n# Query your documents\nquery_engine = index.as_query_engine()\nresponse = query_engine.query(\n    \"What are the standard concrete mix specifications?\"\n)\nprint(response)\n\nIDE Recommendations\nIDE\tBest For\tFeatures\nJupyter Notebook\tLearning, experiments\tInteractive cells, visualizations\nGoogle Colab\tFree GPU, quick start\tCloud-based, pre-installed libs\nVS Code\tProfessional development\tExtensions, GitHub Copilot\nPyCharm\tLarge projects\tAdvanced debugging, refactoring\nQuick Setup with Jupyter\npip install jupyter pandas openpyxl pdfplumber\njupyter notebook\n\nBest Practices\nStart Simple: Begin with clear, specific prompts\nIterate: Refine prompts based on results\nValidate: Always check generated code before running\nDocument: Save working prompts for reuse\nSecure: Use local LLM for sensitive company data\nCommon Prompts Library\nData Import\n\"Read Excel file and show first 10 rows\"\n\"Import CSV with custom delimiter and encoding\"\n\"Load multiple Excel sheets into dictionary of DataFrames\"\nData Cleaning\n\"Remove duplicate rows based on element_id\"\n\"Fill missing values with column mean\"\n\"Convert column to numeric, handling errors\"\nData Analysis\n\"Calculate descriptive statistics for numeric columns\"\n\"Find correlation between cost and duration\"\n\"Identify outliers using IQR method\"\nData Export\n\"Export to Excel with multiple sheets\"\n\"Save to CSV with specific encoding\"\n\"Generate formatted PDF report\"\nResources\nBook: \"Data-Driven Construction\" by Artem Boiko, Chapter 2.3\nWebsite: https://datadrivenconstruction.io\nPandas Documentation: https://pandas.pydata.org/docs/\nOllama: https://ollama.com\nLM Studio: https://lmstudio.ai\nGoogle Colab: https://colab.research.google.com\nNext Steps\nSee pandas-construction-analysis for advanced Pandas operations\nSee pdf-to-structured for document processing\nSee etl-pipeline for automated data pipelines\nSee rag-construction for RAG implementation with construction documents"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/datadrivenconstruction/llm-data-automation",
    "publisherUrl": "https://clawhub.ai/datadrivenconstruction/llm-data-automation",
    "owner": "datadrivenconstruction",
    "version": "2.1.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/llm-data-automation",
    "downloadUrl": "https://openagent3.xyz/downloads/llm-data-automation",
    "agentUrl": "https://openagent3.xyz/skills/llm-data-automation/agent",
    "manifestUrl": "https://openagent3.xyz/skills/llm-data-automation/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/llm-data-automation/agent.md"
  }
}