{
  "schemaVersion": "1.0",
  "item": {
    "slug": "specification-extractor",
    "name": "Specification Extractor",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/datadrivenconstruction/specification-extractor",
    "canonicalUrl": "https://clawhub.ai/datadrivenconstruction/specification-extractor",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/specification-extractor",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=specification-extractor",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "claw.json",
      "instructions.md",
      "SKILL.md"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "slug": "specification-extractor",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-05-09T07:35:49.407Z",
      "expiresAt": "2026-05-16T07:35:49.407Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=specification-extractor",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=specification-extractor",
        "contentDisposition": "attachment; filename=\"specification-extractor-2.1.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null,
        "slug": "specification-extractor"
      },
      "scope": "item",
      "summary": "Item download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this item.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/specification-extractor"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/specification-extractor",
    "agentPageUrl": "https://openagent3.xyz/skills/specification-extractor/agent",
    "manifestUrl": "https://openagent3.xyz/skills/specification-extractor/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/specification-extractor/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Overview",
        "body": "Extract structured data from construction specification documents. Parse CSI MasterFormat sections, identify requirements, submittals, product standards, and compile actionable data for estimating and procurement."
      },
      {
        "title": "Business Case",
        "body": "Automated spec extraction enables:\n\nFaster Estimating: Quickly identify scope and requirements\nProcurement Accuracy: Extract exact product specifications\nSubmittal Tracking: Identify all required submittals\nCompliance Checking: Verify specs against standards"
      },
      {
        "title": "Technical Implementation",
        "body": "from dataclasses import dataclass, field\nfrom typing import List, Dict, Any, Optional\nimport re\nimport pdfplumber\nfrom pathlib import Path\n\n@dataclass\nclass SpecSection:\n    number: str  # e.g., \"03 30 00\"\n    title: str\n    part1_general: Dict[str, Any]\n    part2_products: Dict[str, Any]\n    part3_execution: Dict[str, Any]\n    raw_text: str\n\n@dataclass\nclass ProductRequirement:\n    section: str\n    manufacturer: str\n    product_name: str\n    model: str\n    standards: List[str]\n    properties: Dict[str, str]\n\n@dataclass\nclass SubmittalRequirement:\n    section: str\n    submittal_type: str  # shop drawings, samples, product data, etc.\n    description: str\n    timing: str\n    copies: int\n\n@dataclass\nclass SpecExtractionResult:\n    document_name: str\n    total_pages: int\n    sections: List[SpecSection]\n    products: List[ProductRequirement]\n    submittals: List[SubmittalRequirement]\n    standards_referenced: List[str]\n\nclass SpecificationExtractor:\n    \"\"\"Extract structured data from construction specifications.\"\"\"\n\n    # CSI MasterFormat patterns\n    CSI_SECTION_PATTERN = r'^(\\d{2}\\s?\\d{2}\\s?\\d{2})\\s*[-–]\\s*(.+?)$'\n    PART_PATTERN = r'^PART\\s+(\\d+)\\s*[-–]\\s*(.+?)$'\n    ARTICLE_PATTERN = r'^(\\d+\\.\\d+)\\s+([A-Z][A-Z\\s]+)$'\n\n    # Submittal type keywords\n    SUBMITTAL_TYPES = {\n        'shop drawings': 'Shop Drawings',\n        'product data': 'Product Data',\n        'samples': 'Samples',\n        'certificates': 'Certificates',\n        'test reports': 'Test Reports',\n        'manufacturer instructions': 'Manufacturer Instructions',\n        'warranty': 'Warranty',\n        'maintenance data': 'Maintenance Data',\n        'mock-ups': 'Mock-ups',\n    }\n\n    # Common standard organizations\n    STANDARD_PATTERNS = [\n        r'ASTM\\s+[A-Z]\\d+',\n        r'ANSI\\s+[A-Z]?\\d+',\n        r'ACI\\s+\\d+',\n        r'AISC\\s+\\d+',\n        r'AWS\\s+[A-Z]\\d+',\n        r'ASCE\\s+\\d+',\n        r'UL\\s+\\d+',\n        r'FM\\s+\\d+',\n        r'NFPA\\s+\\d+',\n        r'IBC\\s+\\d+',\n    ]\n\n    def __init__(self):\n        self.sections: Dict[str, SpecSection] = {}\n\n    def extract_from_pdf(self, pdf_path: str) -> SpecExtractionResult:\n        \"\"\"Extract specification data from PDF.\"\"\"\n        path = Path(pdf_path)\n\n        all_text = \"\"\n        page_count = 0\n\n        with pdfplumber.open(pdf_path) as pdf:\n            page_count = len(pdf.pages)\n            for page in pdf.pages:\n                text = page.extract_text() or \"\"\n                all_text += text + \"\\n\\n\"\n\n        # Parse sections\n        sections = self._parse_sections(all_text)\n\n        # Extract products\n        products = self._extract_products(sections)\n\n        # Extract submittals\n        submittals = self._extract_submittals(sections)\n\n        # Extract standards\n        standards = self._extract_standards(all_text)\n\n        return SpecExtractionResult(\n            document_name=path.name,\n            total_pages=page_count,\n            sections=sections,\n            products=products,\n            submittals=submittals,\n            standards_referenced=standards\n        )\n\n    def _parse_sections(self, text: str) -> List[SpecSection]:\n        \"\"\"Parse CSI sections from specification text.\"\"\"\n        sections = []\n        lines = text.split('\\n')\n\n        current_section = None\n        current_part = None\n        current_content = []\n\n        for line in lines:\n            line = line.strip()\n            if not line:\n                continue\n\n            # Check for section header\n            section_match = re.match(self.CSI_SECTION_PATTERN, line, re.IGNORECASE)\n            if section_match:\n                # Save previous section\n                if current_section:\n                    sections.append(self._finalize_section(current_section, current_content))\n\n                current_section = {\n                    'number': section_match.group(1).replace(' ', ''),\n                    'title': section_match.group(2).strip(),\n                    'parts': {}\n                }\n                current_content = []\n                current_part = None\n                continue\n\n            # Check for part header\n            part_match = re.match(self.PART_PATTERN, line, re.IGNORECASE)\n            if part_match and current_section:\n                part_num = part_match.group(1)\n                part_name = part_match.group(2).strip()\n                current_part = f\"part{part_num}\"\n                current_section['parts'][current_part] = {\n                    'name': part_name,\n                    'content': []\n                }\n                continue\n\n            # Add content to current part\n            if current_section and current_part:\n                current_section['parts'][current_part]['content'].append(line)\n            elif current_section:\n                current_content.append(line)\n\n        # Save last section\n        if current_section:\n            sections.append(self._finalize_section(current_section, current_content))\n\n        return sections\n\n    def _finalize_section(self, section_data: Dict, general_content: List[str]) -> SpecSection:\n        \"\"\"Finalize a section with parsed parts.\"\"\"\n        parts = section_data.get('parts', {})\n\n        part1 = self._parse_part_content(parts.get('part1', {}).get('content', []))\n        part2 = self._parse_part_content(parts.get('part2', {}).get('content', []))\n        part3 = self._parse_part_content(parts.get('part3', {}).get('content', []))\n\n        return SpecSection(\n            number=section_data['number'],\n            title=section_data['title'],\n            part1_general=part1,\n            part2_products=part2,\n            part3_execution=part3,\n            raw_text='\\n'.join(general_content)\n        )\n\n    def _parse_part_content(self, content: List[str]) -> Dict[str, Any]:\n        \"\"\"Parse part content into structured data.\"\"\"\n        result = {\n            'articles': {},\n            'items': []\n        }\n\n        current_article = None\n\n        for line in content:\n            # Check for article header\n            article_match = re.match(self.ARTICLE_PATTERN, line)\n            if article_match:\n                current_article = article_match.group(1)\n                result['articles'][current_article] = {\n                    'title': article_match.group(2),\n                    'items': []\n                }\n                continue\n\n            # Add to current article or general items\n            if current_article and current_article in result['articles']:\n                result['articles'][current_article]['items'].append(line)\n            else:\n                result['items'].append(line)\n\n        return result\n\n    def _extract_products(self, sections: List[SpecSection]) -> List[ProductRequirement]:\n        \"\"\"Extract product requirements from Part 2.\"\"\"\n        products = []\n\n        for section in sections:\n            part2 = section.part2_products\n\n            for article_num, article in part2.get('articles', {}).items():\n                if 'MANUFACTURERS' in article['title'].upper():\n                    for item in article['items']:\n                        # Extract manufacturer names\n                        if item.strip().startswith(('A.', 'B.', 'C.', '1.', '2.', '3.')):\n                            mfr_name = re.sub(r'^[A-Z\\d]+\\.\\s*', '', item).strip()\n                            products.append(ProductRequirement(\n                                section=section.number,\n                                manufacturer=mfr_name,\n                                product_name='',\n                                model='',\n                                standards=[],\n                                properties={}\n                            ))\n\n                elif 'MATERIALS' in article['title'].upper() or 'PRODUCTS' in article['title'].upper():\n                    for item in article['items']:\n                        # Extract material requirements\n                        standards = self._extract_standards(item)\n                        if standards:\n                            products.append(ProductRequirement(\n                                section=section.number,\n                                manufacturer='',\n                                product_name=item[:100],\n                                model='',\n                                standards=standards,\n                                properties={}\n                            ))\n\n        return products\n\n    def _extract_submittals(self, sections: List[SpecSection]) -> List[SubmittalRequirement]:\n        \"\"\"Extract submittal requirements from Part 1.\"\"\"\n        submittals = []\n\n        for section in sections:\n            part1 = section.part1_general\n\n            for article_num, article in part1.get('articles', {}).items():\n                if 'SUBMITTAL' in article['title'].upper():\n                    for item in article['items']:\n                        item_lower = item.lower()\n\n                        for keyword, submittal_type in self.SUBMITTAL_TYPES.items():\n                            if keyword in item_lower:\n                                submittals.append(SubmittalRequirement(\n                                    section=section.number,\n                                    submittal_type=submittal_type,\n                                    description=item.strip(),\n                                    timing='Prior to fabrication',\n                                    copies=3\n                                ))\n                                break\n\n        return submittals\n\n    def _extract_standards(self, text: str) -> List[str]:\n        \"\"\"Extract referenced standards from text.\"\"\"\n        standards = []\n\n        for pattern in self.STANDARD_PATTERNS:\n            matches = re.findall(pattern, text, re.IGNORECASE)\n            standards.extend(matches)\n\n        return list(set(standards))\n\n    def generate_submittal_log(self, result: SpecExtractionResult) -> str:\n        \"\"\"Generate submittal log from extraction results.\"\"\"\n        lines = [\"# Submittal Log\", \"\"]\n        lines.append(f\"**Project Specs:** {result.document_name}\")\n        lines.append(f\"**Total Submittals:** {len(result.submittals)}\")\n        lines.append(\"\")\n\n        lines.append(\"| # | Section | Type | Description | Status |\")\n        lines.append(\"|---|---------|------|-------------|--------|\")\n\n        for i, sub in enumerate(result.submittals, 1):\n            desc = sub.description[:50] + \"...\" if len(sub.description) > 50 else sub.description\n            lines.append(f\"| {i} | {sub.section} | {sub.submittal_type} | {desc} | Pending |\")\n\n        return \"\\n\".join(lines)\n\n    def generate_product_schedule(self, result: SpecExtractionResult) -> str:\n        \"\"\"Generate product schedule from extraction results.\"\"\"\n        lines = [\"# Product Schedule\", \"\"]\n\n        # Group by section\n        by_section = {}\n        for prod in result.products:\n            if prod.section not in by_section:\n                by_section[prod.section] = []\n            by_section[prod.section].append(prod)\n\n        for section, products in sorted(by_section.items()):\n            lines.append(f\"## Section {section}\")\n            lines.append(\"\")\n\n            for prod in products:\n                if prod.manufacturer:\n                    lines.append(f\"- **Manufacturer:** {prod.manufacturer}\")\n                if prod.product_name:\n                    lines.append(f\"- **Product:** {prod.product_name}\")\n                if prod.standards:\n                    lines.append(f\"- **Standards:** {', '.join(prod.standards)}\")\n                lines.append(\"\")\n\n        return \"\\n\".join(lines)\n\n    def generate_report(self, result: SpecExtractionResult) -> str:\n        \"\"\"Generate comprehensive extraction report.\"\"\"\n        lines = [\"# Specification Extraction Report\", \"\"]\n        lines.append(f\"**Document:** {result.document_name}\")\n        lines.append(f\"**Pages:** {result.total_pages}\")\n        lines.append(f\"**Sections Found:** {len(result.sections)}\")\n        lines.append(\"\")\n\n        # Sections summary\n        lines.append(\"## Sections Extracted\")\n        for section in result.sections:\n            lines.append(f\"- **{section.number}** - {section.title}\")\n        lines.append(\"\")\n\n        # Standards\n        if result.standards_referenced:\n            lines.append(\"## Standards Referenced\")\n            for std in sorted(set(result.standards_referenced)):\n                lines.append(f\"- {std}\")\n            lines.append(\"\")\n\n        # Submittals summary\n        lines.append(\"## Submittals Required\")\n        lines.append(f\"Total: {len(result.submittals)}\")\n        by_type = {}\n        for sub in result.submittals:\n            by_type[sub.submittal_type] = by_type.get(sub.submittal_type, 0) + 1\n        for t, count in sorted(by_type.items()):\n            lines.append(f\"- {t}: {count}\")\n        lines.append(\"\")\n\n        # Products summary\n        lines.append(\"## Products/Manufacturers\")\n        lines.append(f\"Total: {len(result.products)}\")\n\n        return \"\\n\".join(lines)"
      },
      {
        "title": "Quick Start",
        "body": "# Initialize extractor\nextractor = SpecificationExtractor()\n\n# Extract from PDF\nresult = extractor.extract_from_pdf(\"Project_Specifications.pdf\")\n\nprint(f\"Found {len(result.sections)} sections\")\nprint(f\"Found {len(result.submittals)} submittals\")\nprint(f\"Found {len(result.products)} product requirements\")\n\n# Generate submittal log\nsubmittal_log = extractor.generate_submittal_log(result)\nprint(submittal_log)\n\n# Generate product schedule\nproduct_schedule = extractor.generate_product_schedule(result)\nprint(product_schedule)\n\n# Full report\nreport = extractor.generate_report(result)\nprint(report)"
      },
      {
        "title": "Dependencies",
        "body": "pip install pdfplumber"
      }
    ],
    "body": "Specification Extractor for Construction\nOverview\n\nExtract structured data from construction specification documents. Parse CSI MasterFormat sections, identify requirements, submittals, product standards, and compile actionable data for estimating and procurement.\n\nBusiness Case\n\nAutomated spec extraction enables:\n\nFaster Estimating: Quickly identify scope and requirements\nProcurement Accuracy: Extract exact product specifications\nSubmittal Tracking: Identify all required submittals\nCompliance Checking: Verify specs against standards\nTechnical Implementation\nfrom dataclasses import dataclass, field\nfrom typing import List, Dict, Any, Optional\nimport re\nimport pdfplumber\nfrom pathlib import Path\n\n@dataclass\nclass SpecSection:\n    number: str  # e.g., \"03 30 00\"\n    title: str\n    part1_general: Dict[str, Any]\n    part2_products: Dict[str, Any]\n    part3_execution: Dict[str, Any]\n    raw_text: str\n\n@dataclass\nclass ProductRequirement:\n    section: str\n    manufacturer: str\n    product_name: str\n    model: str\n    standards: List[str]\n    properties: Dict[str, str]\n\n@dataclass\nclass SubmittalRequirement:\n    section: str\n    submittal_type: str  # shop drawings, samples, product data, etc.\n    description: str\n    timing: str\n    copies: int\n\n@dataclass\nclass SpecExtractionResult:\n    document_name: str\n    total_pages: int\n    sections: List[SpecSection]\n    products: List[ProductRequirement]\n    submittals: List[SubmittalRequirement]\n    standards_referenced: List[str]\n\nclass SpecificationExtractor:\n    \"\"\"Extract structured data from construction specifications.\"\"\"\n\n    # CSI MasterFormat patterns\n    CSI_SECTION_PATTERN = r'^(\\d{2}\\s?\\d{2}\\s?\\d{2})\\s*[-–]\\s*(.+?)$'\n    PART_PATTERN = r'^PART\\s+(\\d+)\\s*[-–]\\s*(.+?)$'\n    ARTICLE_PATTERN = r'^(\\d+\\.\\d+)\\s+([A-Z][A-Z\\s]+)$'\n\n    # Submittal type keywords\n    SUBMITTAL_TYPES = {\n        'shop drawings': 'Shop Drawings',\n        'product data': 'Product Data',\n        'samples': 'Samples',\n        'certificates': 'Certificates',\n        'test reports': 'Test Reports',\n        'manufacturer instructions': 'Manufacturer Instructions',\n        'warranty': 'Warranty',\n        'maintenance data': 'Maintenance Data',\n        'mock-ups': 'Mock-ups',\n    }\n\n    # Common standard organizations\n    STANDARD_PATTERNS = [\n        r'ASTM\\s+[A-Z]\\d+',\n        r'ANSI\\s+[A-Z]?\\d+',\n        r'ACI\\s+\\d+',\n        r'AISC\\s+\\d+',\n        r'AWS\\s+[A-Z]\\d+',\n        r'ASCE\\s+\\d+',\n        r'UL\\s+\\d+',\n        r'FM\\s+\\d+',\n        r'NFPA\\s+\\d+',\n        r'IBC\\s+\\d+',\n    ]\n\n    def __init__(self):\n        self.sections: Dict[str, SpecSection] = {}\n\n    def extract_from_pdf(self, pdf_path: str) -> SpecExtractionResult:\n        \"\"\"Extract specification data from PDF.\"\"\"\n        path = Path(pdf_path)\n\n        all_text = \"\"\n        page_count = 0\n\n        with pdfplumber.open(pdf_path) as pdf:\n            page_count = len(pdf.pages)\n            for page in pdf.pages:\n                text = page.extract_text() or \"\"\n                all_text += text + \"\\n\\n\"\n\n        # Parse sections\n        sections = self._parse_sections(all_text)\n\n        # Extract products\n        products = self._extract_products(sections)\n\n        # Extract submittals\n        submittals = self._extract_submittals(sections)\n\n        # Extract standards\n        standards = self._extract_standards(all_text)\n\n        return SpecExtractionResult(\n            document_name=path.name,\n            total_pages=page_count,\n            sections=sections,\n            products=products,\n            submittals=submittals,\n            standards_referenced=standards\n        )\n\n    def _parse_sections(self, text: str) -> List[SpecSection]:\n        \"\"\"Parse CSI sections from specification text.\"\"\"\n        sections = []\n        lines = text.split('\\n')\n\n        current_section = None\n        current_part = None\n        current_content = []\n\n        for line in lines:\n            line = line.strip()\n            if not line:\n                continue\n\n            # Check for section header\n            section_match = re.match(self.CSI_SECTION_PATTERN, line, re.IGNORECASE)\n            if section_match:\n                # Save previous section\n                if current_section:\n                    sections.append(self._finalize_section(current_section, current_content))\n\n                current_section = {\n                    'number': section_match.group(1).replace(' ', ''),\n                    'title': section_match.group(2).strip(),\n                    'parts': {}\n                }\n                current_content = []\n                current_part = None\n                continue\n\n            # Check for part header\n            part_match = re.match(self.PART_PATTERN, line, re.IGNORECASE)\n            if part_match and current_section:\n                part_num = part_match.group(1)\n                part_name = part_match.group(2).strip()\n                current_part = f\"part{part_num}\"\n                current_section['parts'][current_part] = {\n                    'name': part_name,\n                    'content': []\n                }\n                continue\n\n            # Add content to current part\n            if current_section and current_part:\n                current_section['parts'][current_part]['content'].append(line)\n            elif current_section:\n                current_content.append(line)\n\n        # Save last section\n        if current_section:\n            sections.append(self._finalize_section(current_section, current_content))\n\n        return sections\n\n    def _finalize_section(self, section_data: Dict, general_content: List[str]) -> SpecSection:\n        \"\"\"Finalize a section with parsed parts.\"\"\"\n        parts = section_data.get('parts', {})\n\n        part1 = self._parse_part_content(parts.get('part1', {}).get('content', []))\n        part2 = self._parse_part_content(parts.get('part2', {}).get('content', []))\n        part3 = self._parse_part_content(parts.get('part3', {}).get('content', []))\n\n        return SpecSection(\n            number=section_data['number'],\n            title=section_data['title'],\n            part1_general=part1,\n            part2_products=part2,\n            part3_execution=part3,\n            raw_text='\\n'.join(general_content)\n        )\n\n    def _parse_part_content(self, content: List[str]) -> Dict[str, Any]:\n        \"\"\"Parse part content into structured data.\"\"\"\n        result = {\n            'articles': {},\n            'items': []\n        }\n\n        current_article = None\n\n        for line in content:\n            # Check for article header\n            article_match = re.match(self.ARTICLE_PATTERN, line)\n            if article_match:\n                current_article = article_match.group(1)\n                result['articles'][current_article] = {\n                    'title': article_match.group(2),\n                    'items': []\n                }\n                continue\n\n            # Add to current article or general items\n            if current_article and current_article in result['articles']:\n                result['articles'][current_article]['items'].append(line)\n            else:\n                result['items'].append(line)\n\n        return result\n\n    def _extract_products(self, sections: List[SpecSection]) -> List[ProductRequirement]:\n        \"\"\"Extract product requirements from Part 2.\"\"\"\n        products = []\n\n        for section in sections:\n            part2 = section.part2_products\n\n            for article_num, article in part2.get('articles', {}).items():\n                if 'MANUFACTURERS' in article['title'].upper():\n                    for item in article['items']:\n                        # Extract manufacturer names\n                        if item.strip().startswith(('A.', 'B.', 'C.', '1.', '2.', '3.')):\n                            mfr_name = re.sub(r'^[A-Z\\d]+\\.\\s*', '', item).strip()\n                            products.append(ProductRequirement(\n                                section=section.number,\n                                manufacturer=mfr_name,\n                                product_name='',\n                                model='',\n                                standards=[],\n                                properties={}\n                            ))\n\n                elif 'MATERIALS' in article['title'].upper() or 'PRODUCTS' in article['title'].upper():\n                    for item in article['items']:\n                        # Extract material requirements\n                        standards = self._extract_standards(item)\n                        if standards:\n                            products.append(ProductRequirement(\n                                section=section.number,\n                                manufacturer='',\n                                product_name=item[:100],\n                                model='',\n                                standards=standards,\n                                properties={}\n                            ))\n\n        return products\n\n    def _extract_submittals(self, sections: List[SpecSection]) -> List[SubmittalRequirement]:\n        \"\"\"Extract submittal requirements from Part 1.\"\"\"\n        submittals = []\n\n        for section in sections:\n            part1 = section.part1_general\n\n            for article_num, article in part1.get('articles', {}).items():\n                if 'SUBMITTAL' in article['title'].upper():\n                    for item in article['items']:\n                        item_lower = item.lower()\n\n                        for keyword, submittal_type in self.SUBMITTAL_TYPES.items():\n                            if keyword in item_lower:\n                                submittals.append(SubmittalRequirement(\n                                    section=section.number,\n                                    submittal_type=submittal_type,\n                                    description=item.strip(),\n                                    timing='Prior to fabrication',\n                                    copies=3\n                                ))\n                                break\n\n        return submittals\n\n    def _extract_standards(self, text: str) -> List[str]:\n        \"\"\"Extract referenced standards from text.\"\"\"\n        standards = []\n\n        for pattern in self.STANDARD_PATTERNS:\n            matches = re.findall(pattern, text, re.IGNORECASE)\n            standards.extend(matches)\n\n        return list(set(standards))\n\n    def generate_submittal_log(self, result: SpecExtractionResult) -> str:\n        \"\"\"Generate submittal log from extraction results.\"\"\"\n        lines = [\"# Submittal Log\", \"\"]\n        lines.append(f\"**Project Specs:** {result.document_name}\")\n        lines.append(f\"**Total Submittals:** {len(result.submittals)}\")\n        lines.append(\"\")\n\n        lines.append(\"| # | Section | Type | Description | Status |\")\n        lines.append(\"|---|---------|------|-------------|--------|\")\n\n        for i, sub in enumerate(result.submittals, 1):\n            desc = sub.description[:50] + \"...\" if len(sub.description) > 50 else sub.description\n            lines.append(f\"| {i} | {sub.section} | {sub.submittal_type} | {desc} | Pending |\")\n\n        return \"\\n\".join(lines)\n\n    def generate_product_schedule(self, result: SpecExtractionResult) -> str:\n        \"\"\"Generate product schedule from extraction results.\"\"\"\n        lines = [\"# Product Schedule\", \"\"]\n\n        # Group by section\n        by_section = {}\n        for prod in result.products:\n            if prod.section not in by_section:\n                by_section[prod.section] = []\n            by_section[prod.section].append(prod)\n\n        for section, products in sorted(by_section.items()):\n            lines.append(f\"## Section {section}\")\n            lines.append(\"\")\n\n            for prod in products:\n                if prod.manufacturer:\n                    lines.append(f\"- **Manufacturer:** {prod.manufacturer}\")\n                if prod.product_name:\n                    lines.append(f\"- **Product:** {prod.product_name}\")\n                if prod.standards:\n                    lines.append(f\"- **Standards:** {', '.join(prod.standards)}\")\n                lines.append(\"\")\n\n        return \"\\n\".join(lines)\n\n    def generate_report(self, result: SpecExtractionResult) -> str:\n        \"\"\"Generate comprehensive extraction report.\"\"\"\n        lines = [\"# Specification Extraction Report\", \"\"]\n        lines.append(f\"**Document:** {result.document_name}\")\n        lines.append(f\"**Pages:** {result.total_pages}\")\n        lines.append(f\"**Sections Found:** {len(result.sections)}\")\n        lines.append(\"\")\n\n        # Sections summary\n        lines.append(\"## Sections Extracted\")\n        for section in result.sections:\n            lines.append(f\"- **{section.number}** - {section.title}\")\n        lines.append(\"\")\n\n        # Standards\n        if result.standards_referenced:\n            lines.append(\"## Standards Referenced\")\n            for std in sorted(set(result.standards_referenced)):\n                lines.append(f\"- {std}\")\n            lines.append(\"\")\n\n        # Submittals summary\n        lines.append(\"## Submittals Required\")\n        lines.append(f\"Total: {len(result.submittals)}\")\n        by_type = {}\n        for sub in result.submittals:\n            by_type[sub.submittal_type] = by_type.get(sub.submittal_type, 0) + 1\n        for t, count in sorted(by_type.items()):\n            lines.append(f\"- {t}: {count}\")\n        lines.append(\"\")\n\n        # Products summary\n        lines.append(\"## Products/Manufacturers\")\n        lines.append(f\"Total: {len(result.products)}\")\n\n        return \"\\n\".join(lines)\n\nQuick Start\n# Initialize extractor\nextractor = SpecificationExtractor()\n\n# Extract from PDF\nresult = extractor.extract_from_pdf(\"Project_Specifications.pdf\")\n\nprint(f\"Found {len(result.sections)} sections\")\nprint(f\"Found {len(result.submittals)} submittals\")\nprint(f\"Found {len(result.products)} product requirements\")\n\n# Generate submittal log\nsubmittal_log = extractor.generate_submittal_log(result)\nprint(submittal_log)\n\n# Generate product schedule\nproduct_schedule = extractor.generate_product_schedule(result)\nprint(product_schedule)\n\n# Full report\nreport = extractor.generate_report(result)\nprint(report)\n\nDependencies\npip install pdfplumber"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/datadrivenconstruction/specification-extractor",
    "publisherUrl": "https://clawhub.ai/datadrivenconstruction/specification-extractor",
    "owner": "datadrivenconstruction",
    "version": "2.1.0",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/specification-extractor",
    "downloadUrl": "https://openagent3.xyz/downloads/specification-extractor",
    "agentUrl": "https://openagent3.xyz/skills/specification-extractor/agent",
    "manifestUrl": "https://openagent3.xyz/skills/specification-extractor/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/specification-extractor/agent.md"
  }
}